fix(systemd): pet watchdog when WATCHDOG_USEC missing; StartLimitInterval for old systemd
Embedded units may set WatchdogSec but omit WATCHDOG_USEC; SD_NOTIFY_INTERVAL was 0 so no WATCHDOG=1 was sent and systemd killed the main process after ~60s. Replace StartLimitIntervalSec with StartLimitInterval= for older systemd (journal warning). Made-with: Cursor
This commit is contained in:
@@ -24,12 +24,6 @@ const AP_NET_FAIL_MAX = 3;
|
||||
const HEARTBEAT_INTERVAL_MS = 10_000; // 心跳间隔:10 秒,用于快速感知网络状态
|
||||
const METRICS_EVERY_N = 3; // 每 N 次心跳采集一次指标(= 30 秒)
|
||||
|
||||
// systemd watchdog: 如果 WatchdogSec 存在,定期发 WATCHDOG=1
|
||||
const SD_WATCHDOG_USEC = parseInt(process.env.WATCHDOG_USEC || '0', 10);
|
||||
const SD_NOTIFY_INTERVAL = SD_WATCHDOG_USEC > 0
|
||||
? Math.floor(SD_WATCHDOG_USEC / 2 / 1000) // 半周期通知(μs → ms)
|
||||
: 0;
|
||||
|
||||
class ClawClient {
|
||||
constructor() {
|
||||
this._cfg = config.load();
|
||||
@@ -569,25 +563,34 @@ class ClawClient {
|
||||
// ── systemd Watchdog ────────────────────────────────────────────────────────
|
||||
|
||||
_startSdNotify() {
|
||||
if (!SD_NOTIFY_INTERVAL) return;
|
||||
|
||||
const raw = getNotifySocket();
|
||||
if (!raw) return;
|
||||
|
||||
// 部分嵌入式 systemd 有 WatchdogSec 但未注入 WATCHDOG_USEC;若此时不喂狗,约 1min 会 SIGABRT
|
||||
let usec = parseInt(process.env.WATCHDOG_USEC || '0', 10);
|
||||
if (usec <= 0) {
|
||||
usec = 60_000_000;
|
||||
log.info('clawd', 'WATCHDOG_USEC 未设置,按 60s 周期向 systemd 发送 WATCHDOG=1(与 unit WatchdogSec=60 一致)');
|
||||
}
|
||||
const intervalMs = Math.max(1000, Math.floor(usec / 2 / 1000));
|
||||
|
||||
// 抽象套接字:NOTIFY_SOCKET 以 @ 开头,内核地址首字节为 \0(与 sd_notify 一致)
|
||||
this._sdNotifyAddr = raw.startsWith('@') ? `\0${raw.slice(1)}` : raw;
|
||||
|
||||
try {
|
||||
this._sdDgram = dgram.createSocket('unix_dgram');
|
||||
this._sdDgram.on('error', () => { /* 忽略,避免未处理 error 崩溃 */ });
|
||||
} catch (_) {
|
||||
this._sdDgram.on('error', (err) => {
|
||||
log.warn('clawd', 'systemd notify socket:', err.message);
|
||||
});
|
||||
} catch (err) {
|
||||
this._sdNotifyAddr = null;
|
||||
log.warn('clawd', 'systemd notify dgram 创建失败:', err.message);
|
||||
return;
|
||||
}
|
||||
|
||||
log.debug('clawd', `systemd watchdog 启用,通知间隔 ${SD_NOTIFY_INTERVAL}ms`);
|
||||
log.debug('clawd', `systemd watchdog 启用,通知间隔 ${intervalMs}ms`);
|
||||
this._sdNotify('READY=1');
|
||||
this._sdTimer = setInterval(() => this._sdNotify('WATCHDOG=1'), SD_NOTIFY_INTERVAL);
|
||||
this._sdTimer = setInterval(() => this._sdNotify('WATCHDOG=1'), intervalMs);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user