fix(network): wired ping probe, AP/WS and systemd notify hardening

- Add hasWiredInternetProbe and export; AP mode uses it with hasInternet
- systemd-env: strip NOTIFY_SOCKET from env early; client uses unix_dgram
- Strip NOTIFY_SOCKET from frpc/ttyd spawn env in watchdog and frpc
- WS: pong miss debounce; AP net monitor consecutive-fail debounce

Made-with: Cursor
This commit is contained in:
stswangzhiping
2026-03-28 14:37:56 +08:00
parent 402440aadc
commit 04dd1017bb
6 changed files with 116 additions and 24 deletions

View File

@@ -1,7 +1,9 @@
'use strict';
const { getNotifySocket } = require('./systemd-env');
const WebSocket = require('ws');
const { execFileSync, execSync } = require('child_process');
const dgram = require('dgram');
const { execSync } = require('child_process');
const config = require('./config');
const log = require('./logger');
const { getBoxId } = require('./fingerprint');
@@ -9,13 +11,16 @@ const { collect } = require('./metrics');
const { getDashboardInfo, resolveOpenclawConfigFile, startTtyd, FrpcManager } = require('./frpc'); // getDashboardInfo 也用于心跳中定期刷新
const { ProvisionManager } = require('./provisioning');
const { BtMonitor } = require('./bt-monitor');
const { hasInternet, getLocalIps } = require('./network');
const { hasInternet, hasWiredInternetProbe, getLocalIps } = require('./network');
const led = require('./led');
const MAX_BACKOFF_MS = 60_000;
const PONG_TIMEOUT_MS = 8_000;
const PING_INTERVAL_MS = 10_000;
/** 连续若干轮 ping 后仍无 pong 才判定死链(单轮易因调度/弱网误判) */
const PONG_MISS_MAX = 3;
const PING_INTERVAL_MS = 15_000;
const NET_MONITOR_MS = 5_000; // AP 模式网络监视间隔
/** AP 下 nmcli/ping 易抖动:连续多轮无上行再关 WS避免误杀仍通的长连接 */
const AP_NET_FAIL_MAX = 3;
const HEARTBEAT_INTERVAL_MS = 10_000; // 心跳间隔10 秒,用于快速感知网络状态
const METRICS_EVERY_N = 3; // 每 N 次心跳采集一次指标(= 30 秒)
@@ -43,9 +48,11 @@ class ClawClient {
// WS 层活性检测
this._pingTimer = null;
this._awaitingPong = false;
this._pongMissCount = 0;
// AP 模式网络监视WS 连通后每 5s 检查,断网立即 terminate
// AP 模式网络监视WS 连通后每 5s 检查;连续多轮无上行才 terminate
this._netMonitorTimer = null;
this._apNetFailStreak = 0;
// WS 连续失败计数open 时清零)
this._wsFailCount = 0;
@@ -55,8 +62,10 @@ class ClawClient {
this._certTimeError = false;
// systemd watchdog
this._sdTimer = null;
// systemd watchdog(主进程 unix_dgram → NOTIFY_SOCKET
this._sdTimer = null;
this._sdDgram = null;
this._sdNotifyAddr = null;
this._setupGlobalHandlers();
}
@@ -179,6 +188,11 @@ class ClawClient {
if (this._ws) this._ws.terminate();
led.status.off(); // 进程退出,两灯全灭
this._sdNotify('STOPPING=1');
if (this._sdDgram) {
try { this._sdDgram.close(); } catch (_) {}
this._sdDgram = null;
}
this._sdNotifyAddr = null;
log.info('clawd', '已停止');
log.close();
}
@@ -188,8 +202,8 @@ class ClawClient {
_connect() {
if (this._stopped) return;
// AP 模式 + 无网:不建立 WS5s 后重新检查网络
if (this._provisionMgr && this._provisionMgr.isApMode() && !hasInternet()) {
// AP 模式 + 无网:不建立 WS5s 后重新检查(有线经 -I ping 仍通则建立,避免热点误挡 WS
if (this._provisionMgr && this._provisionMgr.isApMode() && !hasInternet() && !hasWiredInternetProbe()) {
led.display.showAP();
log.info('clawd', 'AP 模式无网络5s 后重新检查...');
this._backoff = 1_000; // 有网时立即快速重连
@@ -226,6 +240,7 @@ class ClawClient {
ws.on('pong', () => {
this._awaitingPong = false;
this._pongMissCount = 0;
});
ws.on('close', (code, reason) => {
@@ -237,8 +252,8 @@ class ClawClient {
log.warn('clawd', `连接断开 (${code}),失败次数=${this._wsFailCount}${this._backoff / 1000}s 后重连...`);
if (this._hasEverConnected && this._wsFailCount >= 3) {
const inAp = this._provisionMgr && this._provisionMgr.isApMode();
if (inAp || !hasInternet()) {
led.display.showAP(); // AP 模式 或 无网
if (inAp || (!hasInternet() && !hasWiredInternetProbe())) {
led.display.showAP(); // AP 模式 或 无网(有线探测也无则视为无上行)
} else {
led.display.showErr0(); // STA 模式 + 有网 但 VPS 不可达
}
@@ -273,9 +288,13 @@ class ClawClient {
if (!this._ws || this._ws.readyState !== WebSocket.OPEN) return;
if (this._awaitingPong) {
log.warn('clawd', 'Pong 超时,连接可能已死,主动关闭重连');
this._ws.terminate();
return;
this._pongMissCount++;
if (this._pongMissCount >= PONG_MISS_MAX) {
log.warn('clawd', `Pong 连续 ${PONG_MISS_MAX} 次未响应,主动关闭重连`);
this._ws.terminate();
return;
}
log.warn('clawd', `Pong 超时 (${this._pongMissCount}/${PONG_MISS_MAX}),继续探测...`);
}
this._awaitingPong = true;
@@ -289,18 +308,31 @@ class ClawClient {
this._pingTimer = null;
}
this._awaitingPong = false;
this._pongMissCount = 0;
}
// ── AP 模式网络监视(拔网线后 ≤5s 感知)────────────────────────────────────
_startNetMonitor() {
this._clearNetMonitor();
this._apNetFailStreak = 0;
this._netMonitorTimer = setInterval(() => {
if (!this._provisionMgr || !this._provisionMgr.isApMode()) return;
if (hasInternet()) return;
// AP 模式 + 无网,但 WS 还"活着" → 立即终止,触发 close → _connect() 进入 5s 轮询
log.warn('clawd', 'AP 模式检测到网络断开,主动关闭 WS');
if (!this._provisionMgr || !this._provisionMgr.isApMode()) {
this._apNetFailStreak = 0;
return;
}
if (hasInternet() || hasWiredInternetProbe()) {
this._apNetFailStreak = 0;
return;
}
this._apNetFailStreak++;
if (this._apNetFailStreak < AP_NET_FAIL_MAX) {
log.info('clawd', `AP 网监:无上行 (${this._apNetFailStreak}/${AP_NET_FAIL_MAX}),累计后再判定`);
return;
}
log.warn('clawd', 'AP 模式检测到网络断开(已连续多次无上行),主动关闭 WS');
led.display.showAP();
this._apNetFailStreak = 0;
if (this._ws) this._ws.terminate();
}, NET_MONITOR_MS);
}
@@ -310,6 +342,7 @@ class ClawClient {
clearInterval(this._netMonitorTimer);
this._netMonitorTimer = null;
}
this._apNetFailStreak = 0;
}
// ── 发送 connect ─────────────────────────────────────────────────────────────
@@ -538,18 +571,35 @@ class ClawClient {
_startSdNotify() {
if (!SD_NOTIFY_INTERVAL) return;
const raw = getNotifySocket();
if (!raw) return;
// 抽象套接字NOTIFY_SOCKET 以 @ 开头,内核地址首字节为 \0与 sd_notify 一致)
this._sdNotifyAddr = raw.startsWith('@') ? `\0${raw.slice(1)}` : raw;
try {
this._sdDgram = dgram.createSocket('unix_dgram');
this._sdDgram.on('error', () => { /* 忽略,避免未处理 error 崩溃 */ });
} catch (_) {
this._sdNotifyAddr = null;
return;
}
log.debug('clawd', `systemd watchdog 启用,通知间隔 ${SD_NOTIFY_INTERVAL}ms`);
this._sdNotify('READY=1');
this._sdTimer = setInterval(() => this._sdNotify('WATCHDOG=1'), SD_NOTIFY_INTERVAL);
}
/**
* 必须由本进程(主 PID发往 NOTIFY_SOCKETexec systemd-notify 会换 PID
* systemd 在 NotifyAccess=main 下会拒绝并刷屏 journal。
*/
_sdNotify(msg) {
if (!process.env.NOTIFY_SOCKET) return;
if (!this._sdDgram || !this._sdNotifyAddr) return;
const payload = Buffer.from(msg.endsWith('\n') ? msg : `${msg}\n`);
try {
execFileSync('systemd-notify', ['--pid=' + process.pid, msg], { timeout: 2000 });
} catch (_) {
// systemd-notify 不可用时静默忽略
}
this._sdDgram.send(payload, 0, payload.length, this._sdNotifyAddr, () => {});
} catch (_) { /* ignore */ }
}
}

View File

@@ -124,9 +124,12 @@ async function startTtyd() {
const shell = fs.existsSync('/bin/bash') ? '/bin/bash' : '/bin/sh';
// 以普通用户身份启动 shell与 SSH 登录一致)
const ttydUser = process.env.CLAWD_TTY_USER || 'sts';
const ttyEnv = { ...process.env };
delete ttyEnv.NOTIFY_SOCKET;
const proc = spawn(TTYD_BIN, ['-p', String(TTYD_PORT), '-i', '127.0.0.1', '-W', '-t', 'cursorBlink=true', '/bin/su', '-', ttydUser], {
stdio: 'ignore',
detached: true,
env: ttyEnv,
});
proc.unref();
log.info('ttyd', `已启动,端口 ${TTYD_PORT},用户=${ttydUser}`);

View File

@@ -79,6 +79,20 @@ function _tryPingInternet() {
return false;
}
/**
* 仅经有线口 ping 公网(不依赖默认路由)。
* AP 开启时 hasInternet() 易误判;维持 WS / 网络监视时用此兜底。
*/
function hasWiredInternetProbe() {
const wired = getWiredIfaceWithCarrier();
if (!wired) return false;
try {
run(`ping -c 1 -W 3 -I ${wired} 8.8.8.8`);
return true;
} catch (_) {}
return false;
}
/**
* 检测是否有互联网连接nmcli 连通性 + ping 兜底)
*/
@@ -298,6 +312,7 @@ function getLocalIps() {
module.exports = {
hasInternet,
hasWiredCarrier,
hasWiredInternetProbe,
getWiredIfaceWithCarrier,
hasSavedWifiConnection,
isWifiStaConnected,

17
lib/systemd-env.js Normal file
View File

@@ -0,0 +1,17 @@
'use strict';
/**
* 在任意子进程nmcli、pkill、frpc、依赖库启动前从 process.env 摘掉 NOTIFY_SOCKET。
* 否则子进程继承后可能向 systemd 发 sd_notify触发「仅主 PID 可收」的 journal 刷屏。
* 主进程通过 getNotifySocket() 取回路径,自行 unix_dgram 发送。
*/
const _notifySocket = process.env.NOTIFY_SOCKET;
if (_notifySocket) {
delete process.env.NOTIFY_SOCKET;
}
function getNotifySocket() {
return _notifySocket;
}
module.exports = { getNotifySocket };

View File

@@ -67,9 +67,13 @@ class Watchdog {
if (this._stopped) return;
log.info(this._name, '启动进程...');
const { env: optsEnv, ...restSpawn } = this._spawnOpts;
const env = { ...process.env, ...optsEnv };
delete env.NOTIFY_SOCKET; // 避免 frpc 等子进程向 systemd 发 notify触发非主 PID 拒收
const proc = spawn(this._bin, this._args, {
stdio: ['ignore', 'pipe', 'pipe'],
...this._spawnOpts,
...restSpawn,
env,
});
this._proc = proc;