fix(systemd): use systemd-notify + NotifyAccess=all for watchdog

unix_dgram to NOTIFY_SOCKET failed on rk3528 (embedded Node), so WATCHDOG=1 never
reached systemd despite WATCHDOG_USEC fallback. Switch to exec systemd-notify with
NOTIFY_SOCKET in child env; unit sets NotifyAccess=all so cgroup may notify.

Users must reload unit (re-run install.sh or add NotifyAccess=all manually).

Made-with: Cursor
This commit is contained in:
stswangzhiping
2026-03-28 14:54:56 +08:00
parent fdc1e9fbd3
commit 06d06fdd1e
2 changed files with 22 additions and 31 deletions

View File

@@ -188,6 +188,8 @@ Wants=NetworkManager.service
[Service] [Service]
Type=simple Type=simple
# systemd-notify 由子进程执行,默认 NotifyAccess=main 会拒收;需 all 才能喂 WatchdogSec
NotifyAccess=all
EnvironmentFile=$ENV_FILE EnvironmentFile=$ENV_FILE
ExecStart=$NODE_BIN $INSTALL_DIR/bin/clawd.js ExecStart=$NODE_BIN $INSTALL_DIR/bin/clawd.js
WorkingDirectory=$INSTALL_DIR WorkingDirectory=$INSTALL_DIR

View File

@@ -2,8 +2,7 @@
const { getNotifySocket } = require('./systemd-env'); const { getNotifySocket } = require('./systemd-env');
const WebSocket = require('ws'); const WebSocket = require('ws');
const dgram = require('dgram'); const { execSync, execFileSync } = require('child_process');
const { execSync } = require('child_process');
const config = require('./config'); const config = require('./config');
const log = require('./logger'); const log = require('./logger');
const { getBoxId } = require('./fingerprint'); const { getBoxId } = require('./fingerprint');
@@ -56,10 +55,8 @@ class ClawClient {
this._certTimeError = false; this._certTimeError = false;
// systemd watchdog进程 unix_dgram → NOTIFY_SOCKET // systemd watchdogsystemd-notify 子进程 + unit 里 NotifyAccess=all
this._sdTimer = null; this._sdTimer = null;
this._sdDgram = null;
this._sdNotifyAddr = null;
this._setupGlobalHandlers(); this._setupGlobalHandlers();
} }
@@ -182,11 +179,6 @@ class ClawClient {
if (this._ws) this._ws.terminate(); if (this._ws) this._ws.terminate();
led.status.off(); // 进程退出,两灯全灭 led.status.off(); // 进程退出,两灯全灭
this._sdNotify('STOPPING=1'); this._sdNotify('STOPPING=1');
if (this._sdDgram) {
try { this._sdDgram.close(); } catch (_) {}
this._sdDgram = null;
}
this._sdNotifyAddr = null;
log.info('clawd', '已停止'); log.info('clawd', '已停止');
log.close(); log.close();
} }
@@ -574,35 +566,32 @@ class ClawClient {
} }
const intervalMs = Math.max(1000, Math.floor(usec / 2 / 1000)); const intervalMs = Math.max(1000, Math.floor(usec / 2 / 1000));
// 抽象套接字NOTIFY_SOCKET 以 @ 开头,内核地址首字节为 \0与 sd_notify 一致)
this._sdNotifyAddr = raw.startsWith('@') ? `\0${raw.slice(1)}` : raw;
try {
this._sdDgram = dgram.createSocket('unix_dgram');
this._sdDgram.on('error', (err) => {
log.warn('clawd', 'systemd notify socket:', err.message);
});
} catch (err) {
this._sdNotifyAddr = null;
log.warn('clawd', 'systemd notify dgram 创建失败:', err.message);
return;
}
log.debug('clawd', `systemd watchdog 启用,通知间隔 ${intervalMs}ms`); log.debug('clawd', `systemd watchdog 启用,通知间隔 ${intervalMs}ms`);
this._sdNotify('READY=1'); this._sdNotify('READY=1');
this._sdTimer = setInterval(() => this._sdNotify('WATCHDOG=1'), intervalMs); this._sdTimer = setInterval(() => this._sdNotify('WATCHDOG=1'), intervalMs);
} }
/** /**
* 必须由本进程(主 PID发往 NOTIFY_SOCKETexec systemd-notify 会换 PID * 通过 systemd-notify 写入 NOTIFY_SOCKET。嵌入式上 Node unix_dgram 对抽象套接字常无效;
* systemd 在 NotifyAccess=main 下会拒绝并刷屏 journal * 子进程发 notify 需在 unit 中设置 NotifyAccess=allinstall.sh 已写)
*/ */
_sdNotify(msg) { _sdNotify(msg) {
if (!this._sdDgram || !this._sdNotifyAddr) return; const sock = getNotifySocket();
const payload = Buffer.from(msg.endsWith('\n') ? msg : `${msg}\n`); if (!sock) return;
const arg = String(msg).replace(/\n+$/, '');
if (!arg) return;
try { try {
this._sdDgram.send(payload, 0, payload.length, this._sdNotifyAddr, () => {}); execFileSync('systemd-notify', [arg], {
} catch (_) { /* ignore */ } timeout: 3000,
encoding: 'utf8',
env: {
PATH: process.env.PATH || '/usr/bin:/bin',
NOTIFY_SOCKET: sock,
},
});
} catch (e) {
log.warn('clawd', 'systemd-notify 失败:', e.message);
}
} }
} }