feat: add structured logging, process watchdog, and systemd hardening

- Add lib/logger.js: timestamped structured logging with 5MB x 5 file rotation
- Add lib/watchdog.js: generic child process supervisor with rate-limited restarts
- Enhance client.js: WS ping/pong liveness detection, uncaughtException/unhandledRejection handlers, systemd sd-notify integration
- Refactor frpc.js: FrpcManager now delegates to Watchdog instead of manual spawn/exit
- Enhance install.sh: environment file, log directory, systemd resource limits, security hardening, WatchdogSec=60
- Replace all console.log/warn/error with structured logger across modules

Made-with: Cursor
This commit is contained in:
stswangzhiping
2026-03-16 07:31:19 +08:00
parent 42d1d361dc
commit b3770d21d4
9 changed files with 545 additions and 149 deletions

View File

@@ -1,85 +1,156 @@
'use strict';
const WebSocket = require('ws');
const { execFileSync } = require('child_process');
const config = require('./config');
const log = require('./logger');
const { getBoxId } = require('./fingerprint');
const { collect } = require('./metrics');
const { getDashboardInfo, startTtyd, FrpcManager } = require('./frpc');
const MAX_BACKOFF_MS = 60_000;
const MAX_BACKOFF_MS = 60_000;
const PONG_TIMEOUT_MS = 15_000;
const PING_INTERVAL_MS = 30_000;
// systemd watchdog: 如果 WatchdogSec 存在,定期发 WATCHDOG=1
const SD_WATCHDOG_USEC = parseInt(process.env.WATCHDOG_USEC || '0', 10);
const SD_NOTIFY_INTERVAL = SD_WATCHDOG_USEC > 0
? Math.floor(SD_WATCHDOG_USEC / 2 / 1000) // 半周期通知μs → ms
: 0;
class ClawClient {
constructor() {
this._cfg = config.load();
this._boxId = getBoxId();
this._ws = null;
this._hbTimer = null; // 心跳定时器
this._backoff = 1_000; // 重连等待ms
this._hbTimer = null;
this._backoff = 1_000;
this._stopped = false;
this._frpc = new FrpcManager();
this._dashInfo = {}; // { dashboard_token, dashboard_port }
this._dashInfo = {};
// WS 层活性检测
this._pingTimer = null;
this._awaitingPong = false;
// systemd watchdog
this._sdTimer = null;
this._setupGlobalHandlers();
}
// ── 全局异常兜底 ─────────────────────────────────────────────────────────────
_setupGlobalHandlers() {
process.on('uncaughtException', (err) => {
log.error('process', '未捕获异常:', err);
// 给日志写盘的时间,然后退出让 systemd 重启
setTimeout(() => process.exit(1), 1000);
});
process.on('unhandledRejection', (reason) => {
log.error('process', '未处理的 Promise 拒绝:', reason);
});
}
// ── 生命周期 ─────────────────────────────────────────────────────────────────
async start() {
console.log(`[clawd] 启动中... 服务器 = ${this._cfg.server}`);
// 并行:获取 openclaw dashboard 信息 + 启动 ttyd
log.info('clawd', `启动中... 服务器 = ${this._cfg.server}`);
const [dashInfo] = await Promise.all([
getDashboardInfo(),
startTtyd().catch(e => console.warn('[ttyd] 启动失败:', e.message)),
startTtyd().catch(e => log.warn('ttyd', '启动失败:', e.message)),
]);
this._dashInfo = dashInfo || {};
this._startSdNotify();
this._connect();
}
stop() {
this._stopped = true;
this._clearHeartbeat();
this._clearPing();
if (this._sdTimer) { clearInterval(this._sdTimer); this._sdTimer = null; }
this._frpc.stop();
if (this._ws) this._ws.terminate();
console.log('[clawd] 已停止');
this._sdNotify('STOPPING=1');
log.info('clawd', '已停止');
log.close();
}
// ── 连接 ──────────────────────────────────────────────────────────────────
// ── WebSocket 连接 ──────────────────────────────────────────────────────────
_connect() {
if (this._stopped) return;
console.log(`[clawd] 正在连接 ${this._cfg.server} ...`);
log.info('clawd', `正在连接 ${this._cfg.server} ...`);
const ws = new WebSocket(this._cfg.server, {
handshakeTimeout: 10_000,
});
this._ws = ws;
ws.on('open', () => {
console.log('[clawd] WebSocket 已连接');
log.info('clawd', 'WebSocket 已连接');
this._backoff = 1_000;
this._sendConnect();
this._startPing();
});
ws.on('message', (data) => {
try {
this._handleMessage(JSON.parse(data.toString()));
} catch (e) {
console.error('[clawd] 消息解析失败:', e.message);
log.error('clawd', '消息解析失败:', e.message);
}
});
ws.on('pong', () => {
this._awaitingPong = false;
});
ws.on('close', (code, reason) => {
this._clearHeartbeat();
this._clearPing();
if (!this._stopped) {
console.warn(`[clawd] 连接断开 (${code})${this._backoff / 1000}s 后重连...`);
log.warn('clawd', `连接断开 (${code})${this._backoff / 1000}s 后重连...`);
setTimeout(() => this._connect(), this._backoff);
this._backoff = Math.min(this._backoff * 2, MAX_BACKOFF_MS);
}
});
ws.on('error', (err) => {
console.error('[clawd] 连接错误:', err.message);
// close 事件会在 error 之后触发,重连逻辑在 close 里处理
log.error('clawd', '连接错误:', err.message);
});
}
// ── 发送 connect ──────────────────────────────────────────────────────────
// ── WS 层 Ping/Pong 活性检测 ──────────────────────────────────────────────
_startPing() {
this._clearPing();
this._pingTimer = setInterval(() => {
if (!this._ws || this._ws.readyState !== WebSocket.OPEN) return;
if (this._awaitingPong) {
log.warn('clawd', 'Pong 超时,连接可能已死,主动关闭重连');
this._ws.terminate();
return;
}
this._awaitingPong = true;
try { this._ws.ping(); } catch (_) {}
}, PING_INTERVAL_MS);
}
_clearPing() {
if (this._pingTimer) {
clearInterval(this._pingTimer);
this._pingTimer = null;
}
this._awaitingPong = false;
}
// ── 发送 connect ─────────────────────────────────────────────────────────────
_sendConnect() {
const msg = {
@@ -87,13 +158,12 @@ class ClawClient {
box_id: this._boxId,
claw_id: this._cfg.claw_id ?? null,
token: this._cfg.token ?? null,
// dashboard 信息可选openclaw 未安装时为空)
...this._dashInfo,
};
this._send(msg);
}
// ── 消息处理 ──────────────────────────────────────────────────────────────
// ── 消息处理 ─────────────────────────────────────────────────────────────────
_handleMessage(msg) {
switch (msg.type) {
@@ -101,76 +171,67 @@ class ClawClient {
this._onConnected(msg);
break;
case 'heartbeat_ack':
// 正常回包,静默处理
break;
case 'error':
console.error(`[clawd] 服务器错误: ${msg.msg}`);
log.error('clawd', `服务器错误: ${msg.msg}`);
if (msg.msg === 'hardware_mismatch') {
// box_id 与库中不符:硬件变更或凭证泄露
// 清空本地凭证,下次重连走全新注册流程
console.warn('[clawd] 硬件指纹与服务器不符(硬件变更或凭证泄露),清除本地凭证重新注册...');
log.warn('clawd', '硬件指纹不符,清除凭证重新注册...');
this._cfg.claw_id = null;
this._cfg.token = null;
config.save(this._cfg);
} else if (msg.msg && msg.msg.includes('invalid')) {
console.warn('[clawd] 凭证无效,清除本地凭证重新注册...');
log.warn('clawd', '凭证无效,清除凭证重新注册...');
this._cfg.claw_id = null;
this._cfg.token = null;
config.save(this._cfg);
}
break;
default:
console.warn('[clawd] 未知消息类型:', msg.type);
log.warn('clawd', '未知消息类型:', msg.type);
}
}
_onConnected(msg) {
const isNew = !this._cfg.claw_id;
// 保存 claw_id + token
this._cfg.claw_id = msg.claw_id;
this._cfg.token = msg.token;
config.save(this._cfg);
if (isNew) {
console.log(`[clawd] 注册成功claw_id = ${msg.claw_id}`);
log.info('clawd', `注册成功claw_id = ${msg.claw_id}`);
}
if (msg.status === 'inactive') {
const id = String(msg.claw_id).padEnd(6);
const pin = String(msg.pin);
console.log('');
console.log('╔════════════════════════════════════╗');
console.log(`║ Claw ID : ${id}`);
console.log(`║ PIN 码 : ${pin}`);
console.log('║ 请在网页前台「添加设备」中输入 ║');
console.log('╚════════════════════════════════════╝');
console.log('');
console.log('[clawd] 等待激活,心跳正常运行...');
log.info('clawd', '');
log.info('clawd', '╔════════════════════════════════════╗');
log.info('clawd', `║ Claw ID : ${id}`);
log.info('clawd', `║ PIN 码 : ${pin}`);
log.info('clawd', '║ 请在网页前台「添加设备」中输入 ║');
log.info('clawd', '╚════════════════════════════════════╝');
log.info('clawd', '');
log.info('clawd', '等待激活,心跳正常运行...');
} else {
console.log(`[clawd] 已激活 claw_id = ${msg.claw_id}`);
log.info('clawd', `已激活 claw_id = ${msg.claw_id}`);
}
// 启动 frpc如果 VPS 下发了 frp 配置)
if (msg.frp && msg.frp.server && msg.frp.auth_token) {
this._frpc.start(msg.claw_id, msg.frp).catch(e => {
console.error('[frpc] 启动失败:', e.message);
log.error('frpc', '启动失败:', e.message);
});
}
// 开始心跳
this._startHeartbeat();
}
// ── 心跳 ─────────────────────────────────────────────────────────────────
// ── 心跳 ────────────────────────────────────────────────────────────────────
_startHeartbeat() {
this._clearHeartbeat();
const interval = (this._cfg.heartbeat_interval || 30) * 1000;
// 立即发一次
this._sendHeartbeat();
this._hbTimer = setInterval(() => this._sendHeartbeat(), interval);
}
@@ -185,7 +246,7 @@ class ClawClient {
metrics,
});
} catch (e) {
console.error('[clawd] 心跳发送失败:', e.message);
log.error('clawd', '心跳发送失败:', e.message);
}
}
@@ -196,13 +257,32 @@ class ClawClient {
}
}
// ── 工具 ──────────────────────────────────────────────────────────────────
// ── 工具 ────────────────────────────────────────────────────────────────────
_send(obj) {
if (this._ws && this._ws.readyState === WebSocket.OPEN) {
this._ws.send(JSON.stringify(obj));
}
}
// ── systemd Watchdog ────────────────────────────────────────────────────────
_startSdNotify() {
if (!SD_NOTIFY_INTERVAL) return;
log.debug('clawd', `systemd watchdog 启用,通知间隔 ${SD_NOTIFY_INTERVAL}ms`);
this._sdNotify('READY=1');
this._sdTimer = setInterval(() => this._sdNotify('WATCHDOG=1'), SD_NOTIFY_INTERVAL);
}
_sdNotify(msg) {
if (!process.env.NOTIFY_SOCKET) return;
try {
execFileSync('systemd-notify', ['--pid=' + process.pid, msg], { timeout: 2000 });
} catch (_) {
// systemd-notify 不可用时静默忽略
}
}
}
module.exports = { ClawClient };