fix(systemd): pet watchdog when WATCHDOG_USEC missing; StartLimitInterval for old systemd

Embedded units may set WatchdogSec but omit WATCHDOG_USEC; SD_NOTIFY_INTERVAL was 0 so no
WATCHDOG=1 was sent and systemd killed the main process after ~60s.

Replace StartLimitIntervalSec with StartLimitInterval= for older systemd (journal warning).

Made-with: Cursor
This commit is contained in:
stswangzhiping
2026-03-28 14:49:07 +08:00
parent 04dd1017bb
commit fdc1e9fbd3
2 changed files with 17 additions and 13 deletions

View File

@@ -195,7 +195,8 @@ WorkingDirectory=$INSTALL_DIR
# 重启策略
Restart=always
RestartSec=5
StartLimitIntervalSec=300
# 旧版 systemd 不认 StartLimitIntervalSec,用 StartLimitInterval=(秒)
StartLimitInterval=300
StartLimitBurst=10
# 优雅停止10s 内 SIGTERM超时 SIGKILL

View File

@@ -24,12 +24,6 @@ const AP_NET_FAIL_MAX = 3;
const HEARTBEAT_INTERVAL_MS = 10_000; // 心跳间隔10 秒,用于快速感知网络状态
const METRICS_EVERY_N = 3; // 每 N 次心跳采集一次指标(= 30 秒)
// systemd watchdog: 如果 WatchdogSec 存在,定期发 WATCHDOG=1
const SD_WATCHDOG_USEC = parseInt(process.env.WATCHDOG_USEC || '0', 10);
const SD_NOTIFY_INTERVAL = SD_WATCHDOG_USEC > 0
? Math.floor(SD_WATCHDOG_USEC / 2 / 1000) // 半周期通知μs → ms
: 0;
class ClawClient {
constructor() {
this._cfg = config.load();
@@ -569,25 +563,34 @@ class ClawClient {
// ── systemd Watchdog ────────────────────────────────────────────────────────
_startSdNotify() {
if (!SD_NOTIFY_INTERVAL) return;
const raw = getNotifySocket();
if (!raw) return;
// 部分嵌入式 systemd 有 WatchdogSec 但未注入 WATCHDOG_USEC若此时不喂狗约 1min 会 SIGABRT
let usec = parseInt(process.env.WATCHDOG_USEC || '0', 10);
if (usec <= 0) {
usec = 60_000_000;
log.info('clawd', 'WATCHDOG_USEC 未设置,按 60s 周期向 systemd 发送 WATCHDOG=1与 unit WatchdogSec=60 一致)');
}
const intervalMs = Math.max(1000, Math.floor(usec / 2 / 1000));
// 抽象套接字NOTIFY_SOCKET 以 @ 开头,内核地址首字节为 \0与 sd_notify 一致)
this._sdNotifyAddr = raw.startsWith('@') ? `\0${raw.slice(1)}` : raw;
try {
this._sdDgram = dgram.createSocket('unix_dgram');
this._sdDgram.on('error', () => { /* 忽略,避免未处理 error 崩溃 */ });
} catch (_) {
this._sdDgram.on('error', (err) => {
log.warn('clawd', 'systemd notify socket:', err.message);
});
} catch (err) {
this._sdNotifyAddr = null;
log.warn('clawd', 'systemd notify dgram 创建失败:', err.message);
return;
}
log.debug('clawd', `systemd watchdog 启用,通知间隔 ${SD_NOTIFY_INTERVAL}ms`);
log.debug('clawd', `systemd watchdog 启用,通知间隔 ${intervalMs}ms`);
this._sdNotify('READY=1');
this._sdTimer = setInterval(() => this._sdNotify('WATCHDOG=1'), SD_NOTIFY_INTERVAL);
this._sdTimer = setInterval(() => this._sdNotify('WATCHDOG=1'), intervalMs);
}
/**