From fdc1e9fbd3ff99eba845cb97b1ad8ffe025788a0 Mon Sep 17 00:00:00 2001 From: stswangzhiping <59632378+stswangzhiping@users.noreply.github.com> Date: Sat, 28 Mar 2026 14:49:07 +0800 Subject: [PATCH] fix(systemd): pet watchdog when WATCHDOG_USEC missing; StartLimitInterval for old systemd Embedded units may set WatchdogSec but omit WATCHDOG_USEC; SD_NOTIFY_INTERVAL was 0 so no WATCHDOG=1 was sent and systemd killed the main process after ~60s. Replace StartLimitIntervalSec with StartLimitInterval= for older systemd (journal warning). Made-with: Cursor --- install.sh | 3 ++- lib/client.js | 27 +++++++++++++++------------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/install.sh b/install.sh index fe9922b..262ab98 100644 --- a/install.sh +++ b/install.sh @@ -195,7 +195,8 @@ WorkingDirectory=$INSTALL_DIR # 重启策略 Restart=always RestartSec=5 -StartLimitIntervalSec=300 +# 旧版 systemd 不认 StartLimitIntervalSec,用 StartLimitInterval=(秒) +StartLimitInterval=300 StartLimitBurst=10 # 优雅停止(10s 内 SIGTERM,超时 SIGKILL) diff --git a/lib/client.js b/lib/client.js index 8515620..191e7a7 100644 --- a/lib/client.js +++ b/lib/client.js @@ -24,12 +24,6 @@ const AP_NET_FAIL_MAX = 3; const HEARTBEAT_INTERVAL_MS = 10_000; // 心跳间隔:10 秒,用于快速感知网络状态 const METRICS_EVERY_N = 3; // 每 N 次心跳采集一次指标(= 30 秒) -// systemd watchdog: 如果 WatchdogSec 存在,定期发 WATCHDOG=1 -const SD_WATCHDOG_USEC = parseInt(process.env.WATCHDOG_USEC || '0', 10); -const SD_NOTIFY_INTERVAL = SD_WATCHDOG_USEC > 0 - ? Math.floor(SD_WATCHDOG_USEC / 2 / 1000) // 半周期通知(μs → ms) - : 0; - class ClawClient { constructor() { this._cfg = config.load(); @@ -569,25 +563,34 @@ class ClawClient { // ── systemd Watchdog ──────────────────────────────────────────────────────── _startSdNotify() { - if (!SD_NOTIFY_INTERVAL) return; - const raw = getNotifySocket(); if (!raw) return; + // 部分嵌入式 systemd 有 WatchdogSec 但未注入 WATCHDOG_USEC;若此时不喂狗,约 1min 会 SIGABRT + let usec = parseInt(process.env.WATCHDOG_USEC || '0', 10); + if (usec <= 0) { + usec = 60_000_000; + log.info('clawd', 'WATCHDOG_USEC 未设置,按 60s 周期向 systemd 发送 WATCHDOG=1(与 unit WatchdogSec=60 一致)'); + } + const intervalMs = Math.max(1000, Math.floor(usec / 2 / 1000)); + // 抽象套接字:NOTIFY_SOCKET 以 @ 开头,内核地址首字节为 \0(与 sd_notify 一致) this._sdNotifyAddr = raw.startsWith('@') ? `\0${raw.slice(1)}` : raw; try { this._sdDgram = dgram.createSocket('unix_dgram'); - this._sdDgram.on('error', () => { /* 忽略,避免未处理 error 崩溃 */ }); - } catch (_) { + this._sdDgram.on('error', (err) => { + log.warn('clawd', 'systemd notify socket:', err.message); + }); + } catch (err) { this._sdNotifyAddr = null; + log.warn('clawd', 'systemd notify dgram 创建失败:', err.message); return; } - log.debug('clawd', `systemd watchdog 启用,通知间隔 ${SD_NOTIFY_INTERVAL}ms`); + log.debug('clawd', `systemd watchdog 启用,通知间隔 ${intervalMs}ms`); this._sdNotify('READY=1'); - this._sdTimer = setInterval(() => this._sdNotify('WATCHDOG=1'), SD_NOTIFY_INTERVAL); + this._sdTimer = setInterval(() => this._sdNotify('WATCHDOG=1'), intervalMs); } /**