feat: add structured logging, process watchdog, and systemd hardening

- Add lib/logger.js: timestamped structured logging with 5MB x 5 file rotation
- Add lib/watchdog.js: generic child process supervisor with rate-limited restarts
- Enhance client.js: WS ping/pong liveness detection, uncaughtException/unhandledRejection handlers, systemd sd-notify integration
- Refactor frpc.js: FrpcManager now delegates to Watchdog instead of manual spawn/exit
- Enhance install.sh: environment file, log directory, systemd resource limits, security hardening, WatchdogSec=60
- Replace all console.log/warn/error with structured logger across modules

Made-with: Cursor
This commit is contained in:
stswangzhiping
2026-03-16 07:31:19 +08:00
parent 42d1d361dc
commit b3770d21d4
9 changed files with 545 additions and 149 deletions

View File

@@ -1,85 +1,156 @@
'use strict';
const WebSocket = require('ws');
const { execFileSync } = require('child_process');
const config = require('./config');
const log = require('./logger');
const { getBoxId } = require('./fingerprint');
const { collect } = require('./metrics');
const { getDashboardInfo, startTtyd, FrpcManager } = require('./frpc');
const MAX_BACKOFF_MS = 60_000;
const MAX_BACKOFF_MS = 60_000;
const PONG_TIMEOUT_MS = 15_000;
const PING_INTERVAL_MS = 30_000;
// systemd watchdog: 如果 WatchdogSec 存在,定期发 WATCHDOG=1
const SD_WATCHDOG_USEC = parseInt(process.env.WATCHDOG_USEC || '0', 10);
const SD_NOTIFY_INTERVAL = SD_WATCHDOG_USEC > 0
? Math.floor(SD_WATCHDOG_USEC / 2 / 1000) // 半周期通知μs → ms
: 0;
class ClawClient {
constructor() {
this._cfg = config.load();
this._boxId = getBoxId();
this._ws = null;
this._hbTimer = null; // 心跳定时器
this._backoff = 1_000; // 重连等待ms
this._hbTimer = null;
this._backoff = 1_000;
this._stopped = false;
this._frpc = new FrpcManager();
this._dashInfo = {}; // { dashboard_token, dashboard_port }
this._dashInfo = {};
// WS 层活性检测
this._pingTimer = null;
this._awaitingPong = false;
// systemd watchdog
this._sdTimer = null;
this._setupGlobalHandlers();
}
// ── 全局异常兜底 ─────────────────────────────────────────────────────────────
_setupGlobalHandlers() {
process.on('uncaughtException', (err) => {
log.error('process', '未捕获异常:', err);
// 给日志写盘的时间,然后退出让 systemd 重启
setTimeout(() => process.exit(1), 1000);
});
process.on('unhandledRejection', (reason) => {
log.error('process', '未处理的 Promise 拒绝:', reason);
});
}
// ── 生命周期 ─────────────────────────────────────────────────────────────────
async start() {
console.log(`[clawd] 启动中... 服务器 = ${this._cfg.server}`);
// 并行:获取 openclaw dashboard 信息 + 启动 ttyd
log.info('clawd', `启动中... 服务器 = ${this._cfg.server}`);
const [dashInfo] = await Promise.all([
getDashboardInfo(),
startTtyd().catch(e => console.warn('[ttyd] 启动失败:', e.message)),
startTtyd().catch(e => log.warn('ttyd', '启动失败:', e.message)),
]);
this._dashInfo = dashInfo || {};
this._startSdNotify();
this._connect();
}
stop() {
this._stopped = true;
this._clearHeartbeat();
this._clearPing();
if (this._sdTimer) { clearInterval(this._sdTimer); this._sdTimer = null; }
this._frpc.stop();
if (this._ws) this._ws.terminate();
console.log('[clawd] 已停止');
this._sdNotify('STOPPING=1');
log.info('clawd', '已停止');
log.close();
}
// ── 连接 ──────────────────────────────────────────────────────────────────
// ── WebSocket 连接 ──────────────────────────────────────────────────────────
_connect() {
if (this._stopped) return;
console.log(`[clawd] 正在连接 ${this._cfg.server} ...`);
log.info('clawd', `正在连接 ${this._cfg.server} ...`);
const ws = new WebSocket(this._cfg.server, {
handshakeTimeout: 10_000,
});
this._ws = ws;
ws.on('open', () => {
console.log('[clawd] WebSocket 已连接');
log.info('clawd', 'WebSocket 已连接');
this._backoff = 1_000;
this._sendConnect();
this._startPing();
});
ws.on('message', (data) => {
try {
this._handleMessage(JSON.parse(data.toString()));
} catch (e) {
console.error('[clawd] 消息解析失败:', e.message);
log.error('clawd', '消息解析失败:', e.message);
}
});
ws.on('pong', () => {
this._awaitingPong = false;
});
ws.on('close', (code, reason) => {
this._clearHeartbeat();
this._clearPing();
if (!this._stopped) {
console.warn(`[clawd] 连接断开 (${code})${this._backoff / 1000}s 后重连...`);
log.warn('clawd', `连接断开 (${code})${this._backoff / 1000}s 后重连...`);
setTimeout(() => this._connect(), this._backoff);
this._backoff = Math.min(this._backoff * 2, MAX_BACKOFF_MS);
}
});
ws.on('error', (err) => {
console.error('[clawd] 连接错误:', err.message);
// close 事件会在 error 之后触发,重连逻辑在 close 里处理
log.error('clawd', '连接错误:', err.message);
});
}
// ── 发送 connect ──────────────────────────────────────────────────────────
// ── WS 层 Ping/Pong 活性检测 ──────────────────────────────────────────────
_startPing() {
this._clearPing();
this._pingTimer = setInterval(() => {
if (!this._ws || this._ws.readyState !== WebSocket.OPEN) return;
if (this._awaitingPong) {
log.warn('clawd', 'Pong 超时,连接可能已死,主动关闭重连');
this._ws.terminate();
return;
}
this._awaitingPong = true;
try { this._ws.ping(); } catch (_) {}
}, PING_INTERVAL_MS);
}
_clearPing() {
if (this._pingTimer) {
clearInterval(this._pingTimer);
this._pingTimer = null;
}
this._awaitingPong = false;
}
// ── 发送 connect ─────────────────────────────────────────────────────────────
_sendConnect() {
const msg = {
@@ -87,13 +158,12 @@ class ClawClient {
box_id: this._boxId,
claw_id: this._cfg.claw_id ?? null,
token: this._cfg.token ?? null,
// dashboard 信息可选openclaw 未安装时为空)
...this._dashInfo,
};
this._send(msg);
}
// ── 消息处理 ──────────────────────────────────────────────────────────────
// ── 消息处理 ─────────────────────────────────────────────────────────────────
_handleMessage(msg) {
switch (msg.type) {
@@ -101,76 +171,67 @@ class ClawClient {
this._onConnected(msg);
break;
case 'heartbeat_ack':
// 正常回包,静默处理
break;
case 'error':
console.error(`[clawd] 服务器错误: ${msg.msg}`);
log.error('clawd', `服务器错误: ${msg.msg}`);
if (msg.msg === 'hardware_mismatch') {
// box_id 与库中不符:硬件变更或凭证泄露
// 清空本地凭证,下次重连走全新注册流程
console.warn('[clawd] 硬件指纹与服务器不符(硬件变更或凭证泄露),清除本地凭证重新注册...');
log.warn('clawd', '硬件指纹不符,清除凭证重新注册...');
this._cfg.claw_id = null;
this._cfg.token = null;
config.save(this._cfg);
} else if (msg.msg && msg.msg.includes('invalid')) {
console.warn('[clawd] 凭证无效,清除本地凭证重新注册...');
log.warn('clawd', '凭证无效,清除凭证重新注册...');
this._cfg.claw_id = null;
this._cfg.token = null;
config.save(this._cfg);
}
break;
default:
console.warn('[clawd] 未知消息类型:', msg.type);
log.warn('clawd', '未知消息类型:', msg.type);
}
}
_onConnected(msg) {
const isNew = !this._cfg.claw_id;
// 保存 claw_id + token
this._cfg.claw_id = msg.claw_id;
this._cfg.token = msg.token;
config.save(this._cfg);
if (isNew) {
console.log(`[clawd] 注册成功claw_id = ${msg.claw_id}`);
log.info('clawd', `注册成功claw_id = ${msg.claw_id}`);
}
if (msg.status === 'inactive') {
const id = String(msg.claw_id).padEnd(6);
const pin = String(msg.pin);
console.log('');
console.log('╔════════════════════════════════════╗');
console.log(`║ Claw ID : ${id}`);
console.log(`║ PIN 码 : ${pin}`);
console.log('║ 请在网页前台「添加设备」中输入 ║');
console.log('╚════════════════════════════════════╝');
console.log('');
console.log('[clawd] 等待激活,心跳正常运行...');
log.info('clawd', '');
log.info('clawd', '╔════════════════════════════════════╗');
log.info('clawd', `║ Claw ID : ${id}`);
log.info('clawd', `║ PIN 码 : ${pin}`);
log.info('clawd', '║ 请在网页前台「添加设备」中输入 ║');
log.info('clawd', '╚════════════════════════════════════╝');
log.info('clawd', '');
log.info('clawd', '等待激活,心跳正常运行...');
} else {
console.log(`[clawd] 已激活 claw_id = ${msg.claw_id}`);
log.info('clawd', `已激活 claw_id = ${msg.claw_id}`);
}
// 启动 frpc如果 VPS 下发了 frp 配置)
if (msg.frp && msg.frp.server && msg.frp.auth_token) {
this._frpc.start(msg.claw_id, msg.frp).catch(e => {
console.error('[frpc] 启动失败:', e.message);
log.error('frpc', '启动失败:', e.message);
});
}
// 开始心跳
this._startHeartbeat();
}
// ── 心跳 ─────────────────────────────────────────────────────────────────
// ── 心跳 ────────────────────────────────────────────────────────────────────
_startHeartbeat() {
this._clearHeartbeat();
const interval = (this._cfg.heartbeat_interval || 30) * 1000;
// 立即发一次
this._sendHeartbeat();
this._hbTimer = setInterval(() => this._sendHeartbeat(), interval);
}
@@ -185,7 +246,7 @@ class ClawClient {
metrics,
});
} catch (e) {
console.error('[clawd] 心跳发送失败:', e.message);
log.error('clawd', '心跳发送失败:', e.message);
}
}
@@ -196,13 +257,32 @@ class ClawClient {
}
}
// ── 工具 ──────────────────────────────────────────────────────────────────
// ── 工具 ────────────────────────────────────────────────────────────────────
_send(obj) {
if (this._ws && this._ws.readyState === WebSocket.OPEN) {
this._ws.send(JSON.stringify(obj));
}
}
// ── systemd Watchdog ────────────────────────────────────────────────────────
_startSdNotify() {
if (!SD_NOTIFY_INTERVAL) return;
log.debug('clawd', `systemd watchdog 启用,通知间隔 ${SD_NOTIFY_INTERVAL}ms`);
this._sdNotify('READY=1');
this._sdTimer = setInterval(() => this._sdNotify('WATCHDOG=1'), SD_NOTIFY_INTERVAL);
}
_sdNotify(msg) {
if (!process.env.NOTIFY_SOCKET) return;
try {
execFileSync('systemd-notify', ['--pid=' + process.pid, msg], { timeout: 2000 });
} catch (_) {
// systemd-notify 不可用时静默忽略
}
}
}
module.exports = { ClawClient };

View File

@@ -26,7 +26,8 @@ function load() {
return Object.assign({}, DEFAULTS, JSON.parse(raw));
}
} catch (e) {
console.error('[config] 读取配置失败,使用默认值:', e.message);
const log = require('./logger');
log.error('config', '读取配置失败,使用默认值:', e.message);
}
return Object.assign({}, DEFAULTS);
}
@@ -36,7 +37,8 @@ function save(data) {
fs.mkdirSync(CONFIG_DIR, { recursive: true });
fs.writeFileSync(CONFIG_FILE, JSON.stringify(data, null, 2), 'utf8');
} catch (e) {
console.error('[config] 写入配置失败:', e.message);
const log = require('./logger');
log.error('config', '写入配置失败:', e.message);
}
}

View File

@@ -104,7 +104,8 @@ function getPersistentUUID() {
fs.writeFileSync(PERSIST_FILE, id, 'utf8');
} catch (e) {
// 写不进去也没关系,本次用内存值(重启后会变,但这是最后兜底)
console.warn('[fingerprint] 无法持久化 box_id:', e.message);
const log = require('./logger');
log.warn('fingerprint', '无法持久化 box_id:', e.message);
}
return id;
}

View File

@@ -5,15 +5,15 @@ const fs = require('fs');
const os = require('os');
const path = require('path');
const https = require('https');
const log = require('./logger');
const { Watchdog } = require('./watchdog');
// frpc 配置目录(与 clawd config 同目录)
const CONFIG_DIR = process.env.CLAWD_CONFIG_DIR
|| (process.getuid && process.getuid() === 0 ? '/etc/clawd' : path.join(os.homedir(), '.clawd'));
const FRPC_BIN = path.join(CONFIG_DIR, 'frpc');
const FRPC_CONFIG = path.join(CONFIG_DIR, 'frpc.toml');
const TTYD_BIN = path.join(CONFIG_DIR, 'ttyd');
// frp / ttyd 版本
const FRP_VERSION = '0.62.0';
const TTYD_VERSION = '1.7.7';
const TTYD_PORT = 7681;
@@ -27,14 +27,12 @@ function getDashboardInfo() {
return new Promise((resolve) => {
const tmpLog = '/tmp/clawd-dashboard.log';
// 后台启动 dashboard输出重定向到日志文件
try {
execSync(`openclaw dashboard > ${tmpLog} 2>&1 &`, { shell: true, timeout: 3000 });
} catch (e) {
// 已在运行或命令不存在,继续轮询
}
// 每秒读一次日志文件,最多等 10 秒
let attempts = 0;
const interval = setInterval(() => {
attempts++;
@@ -45,7 +43,7 @@ function getDashboardInfo() {
clearInterval(interval);
const port = parseInt(match[1], 10);
const token = match[2];
console.log(`[frpc] openclaw dashboard: port=${port}, token=${token.substring(0, 8)}...`);
log.info('dashboard', `openclaw dashboard: port=${port}, token=${token.substring(0, 8)}...`);
resolve({ dashboard_port: port, dashboard_token: token });
return;
}
@@ -53,18 +51,16 @@ function getDashboardInfo() {
if (attempts >= 10) {
clearInterval(interval);
log.debug('dashboard', 'openclaw dashboard 未检测到,跳过');
resolve({});
}
}, 1000);
});
}
/**
* 根据当前系统架构下载对应的 frpc 二进制。
*/
async function downloadFrpc() {
const arch = os.arch(); // 'x64', 'arm64', 'arm', ...
const platform = os.platform(); // 'linux'
const arch = os.arch();
const platform = os.platform();
const archMap = {
x64: 'amd64', arm64: 'arm64',
@@ -76,56 +72,50 @@ async function downloadFrpc() {
const url = `https://github.com/fatedier/frp/releases/download/v${FRP_VERSION}/${filename}`;
const tmpFile = `/tmp/${filename}`;
console.log(`[frpc] 下载 frpc ${FRP_VERSION} (${platform}/${frpArch})...`);
log.info('frpc', `下载 frpc ${FRP_VERSION} (${platform}/${frpArch})...`);
await downloadFile(url, tmpFile);
// 解压并复制 frpc
fs.mkdirSync(CONFIG_DIR, { recursive: true });
execSync(`tar -xzf ${tmpFile} -C /tmp && cp /tmp/frp_${FRP_VERSION}_${platform}_${frpArch}/frpc ${FRPC_BIN}`, {
stdio: 'inherit'
});
fs.chmodSync(FRPC_BIN, 0o755);
console.log(`[frpc] frpc 已安装到 ${FRPC_BIN}`);
log.info('frpc', `frpc 已安装到 ${FRPC_BIN}`);
}
/**
* 下载 ttyd 静态二进制。
*/
async function downloadTtyd() {
const arch = os.arch();
const archMap = { arm64: 'aarch64', x64: 'x86_64', arm: 'armv7l', ia32: 'i686' };
const ttydArch = archMap[arch] || 'x86_64';
const url = `https://github.com/tsl0922/ttyd/releases/download/${TTYD_VERSION}/ttyd.${ttydArch}`;
console.log(`[ttyd] 下载 ttyd ${TTYD_VERSION} (${ttydArch})...`);
log.info('ttyd', `下载 ttyd ${TTYD_VERSION} (${ttydArch})...`);
fs.mkdirSync(CONFIG_DIR, { recursive: true });
await downloadFile(url, TTYD_BIN);
fs.chmodSync(TTYD_BIN, 0o755);
console.log(`[ttyd] ttyd 已安装到 ${TTYD_BIN}`);
log.info('ttyd', `ttyd 已安装到 ${TTYD_BIN}`);
}
/**
* 启动 ttyd如未安装先下载
* ttyd 绑定 127.0.0.1:7681供 frpc 代理。
* 返回 true 表示启动成功false 表示失败。
*/
async function startTtyd() {
if (!fs.existsSync(TTYD_BIN)) {
try {
await downloadTtyd();
} catch (e) {
console.warn('[ttyd] 下载失败:', e.message);
log.warn('ttyd', '下载失败:', e.message);
return false;
}
}
// 终止旧进程(重启 clawd 时可能残留)
// 终止旧进程
try {
execSync(`pkill -f "${TTYD_BIN}"`, { timeout: 3000 });
// 稍等旧进程退出
await new Promise(r => setTimeout(r, 500));
} catch (_) { /* 无进程可杀,忽略 */ }
} catch (_) {}
try {
const shell = fs.existsSync('/bin/bash') ? '/bin/bash' : '/bin/sh';
@@ -134,10 +124,10 @@ async function startTtyd() {
detached: true,
});
proc.unref();
console.log(`[ttyd] 已启动,端口 ${TTYD_PORT}shell=${shell}`);
log.info('ttyd', `已启动,端口 ${TTYD_PORT}shell=${shell}`);
return true;
} catch (e) {
console.warn('[ttyd] 启动失败:', e.message);
log.warn('ttyd', '启动失败:', e.message);
return false;
}
}
@@ -156,12 +146,6 @@ function downloadFile(url, dest) {
});
}
/**
* 生成 frpc.toml 配置文件。
* 包含两条代理:
* - dashboard-{clawId} → openclaw dashboard
* - tty-{clawId} → ttyd 终端
*/
function writeFrpcConfig(clawId, frpConfig) {
const { server, port, auth_token, dashboard_local_port = 18789 } = frpConfig;
const ttyRemotePort = 10000 + Number(clawId);
@@ -187,66 +171,44 @@ remotePort = ${ttyRemotePort}
`;
fs.mkdirSync(CONFIG_DIR, { recursive: true });
fs.writeFileSync(FRPC_CONFIG, toml, 'utf8');
console.log(`[frpc] frpc.toml 已写入: dashboard subdomain=${clawId}, tty tcp-port=${ttyRemotePort}`);
log.info('frpc', `frpc.toml 已写入: dashboard subdomain=${clawId}, tty tcp-port=${ttyRemotePort}`);
}
/**
* FrpcManager —— 基于 Watchdog 的 frpc 进程管理器。
* 崩溃自动重启5 分钟内最多重启 10 次。
*/
class FrpcManager {
constructor() {
this._proc = null;
this._stopped = false;
this._restartTimer = null;
this._watchdog = null;
}
/**
* 启动 frpc如未安装先下载写配置然后 spawn。
*/
async start(clawId, frpConfig) {
this._stopped = false;
this.stop();
// 下载 frpc如果不存在
if (!fs.existsSync(FRPC_BIN)) {
try {
await downloadFrpc();
} catch (e) {
console.error('[frpc] 下载 frpc 失败:', e.message);
log.error('frpc', '下载 frpc 失败:', e.message);
return;
}
}
writeFrpcConfig(clawId, frpConfig);
this._spawn();
}
_spawn() {
if (this._stopped) return;
console.log('[frpc] 启动 frpc...');
this._proc = spawn(FRPC_BIN, ['-c', FRPC_CONFIG], {
stdio: ['ignore', 'pipe', 'pipe'],
});
this._proc.stdout.on('data', d => {
const line = d.toString().trim();
if (line) console.log(`[frpc] ${line}`);
});
this._proc.stderr.on('data', d => {
const line = d.toString().trim();
if (line) console.warn(`[frpc] ${line}`);
});
this._proc.on('exit', (code) => {
console.warn(`[frpc] 进程退出 (code=${code})`);
if (!this._stopped) {
this._restartTimer = setTimeout(() => this._spawn(), 5000);
}
this._watchdog = new Watchdog('frpc', FRPC_BIN, ['-c', FRPC_CONFIG], {
maxRestarts: 10,
windowMs: 300_000,
restartDelay: 5_000,
});
this._watchdog.start();
}
stop() {
this._stopped = true;
if (this._restartTimer) clearTimeout(this._restartTimer);
if (this._proc) {
this._proc.kill('SIGTERM');
this._proc = null;
if (this._watchdog) {
this._watchdog.stop();
this._watchdog = null;
}
}
}

118
lib/logger.js Normal file
View File

@@ -0,0 +1,118 @@
'use strict';
const fs = require('fs');
const path = require('path');
const os = require('os');
const LEVELS = { debug: 0, info: 1, warn: 2, error: 3 };
const CONFIG_DIR = process.env.CLAWD_CONFIG_DIR
|| (process.getuid && process.getuid() === 0 ? '/etc/clawd' : path.join(os.homedir(), '.clawd'));
const LOG_DIR = process.env.CLAWD_LOG_DIR || path.join(CONFIG_DIR, 'logs');
const MAX_FILE_SIZE = 5 * 1024 * 1024; // 5 MB
const MAX_FILES = 5;
class Logger {
constructor(opts = {}) {
this._level = LEVELS[opts.level || process.env.CLAWD_LOG_LEVEL || 'info'] ?? LEVELS.info;
this._logToFile = opts.logToFile ?? (process.env.CLAWD_LOG_FILE !== '0');
this._stream = null;
this._filePath = null;
this._fileSize = 0;
if (this._logToFile) {
this._ensureLogDir();
}
}
debug(tag, ...args) { this._log('debug', tag, args); }
info(tag, ...args) { this._log('info', tag, args); }
warn(tag, ...args) { this._log('warn', tag, args); }
error(tag, ...args) { this._log('error', tag, args); }
_log(level, tag, args) {
if (LEVELS[level] < this._level) return;
const ts = new Date().toISOString();
const lvl = level.toUpperCase().padEnd(5);
const body = args.map(a => (a instanceof Error ? a.stack || a.message : String(a))).join(' ');
const line = `${ts} ${lvl} [${tag}] ${body}`;
const consoleFn = level === 'error' ? console.error
: level === 'warn' ? console.warn
: console.log;
consoleFn(line);
if (this._logToFile) this._writeToFile(line + '\n');
}
_ensureLogDir() {
try { fs.mkdirSync(LOG_DIR, { recursive: true }); }
catch (_) { this._logToFile = false; }
}
_writeToFile(line) {
if (!this._stream) this._openFile();
if (!this._stream) return;
this._stream.write(line);
this._fileSize += Buffer.byteLength(line);
if (this._fileSize >= MAX_FILE_SIZE) this._rotate();
}
_openFile() {
try {
this._filePath = path.join(LOG_DIR, 'clawd.log');
try {
const stat = fs.statSync(this._filePath);
this._fileSize = stat.size;
} catch (_) { this._fileSize = 0; }
this._stream = fs.createWriteStream(this._filePath, { flags: 'a' });
this._stream.on('error', () => {
this._logToFile = false;
this._stream = null;
});
} catch (_) {
this._logToFile = false;
}
}
_rotate() {
if (this._stream) {
this._stream.end();
this._stream = null;
}
// clawd.log.4 → delete, clawd.log.3 → .4, ... clawd.log → .1
for (let i = MAX_FILES - 1; i >= 1; i--) {
const from = path.join(LOG_DIR, `clawd.log.${i}`);
const to = path.join(LOG_DIR, `clawd.log.${i + 1}`);
try { fs.renameSync(from, to); } catch (_) {}
}
try {
fs.renameSync(this._filePath, path.join(LOG_DIR, 'clawd.log.1'));
} catch (_) {}
// 删除超出上限的文件
try {
fs.unlinkSync(path.join(LOG_DIR, `clawd.log.${MAX_FILES + 1}`));
} catch (_) {}
this._fileSize = 0;
this._openFile();
}
close() {
if (this._stream) {
this._stream.end();
this._stream = null;
}
}
}
const logger = new Logger();
module.exports = logger;

121
lib/watchdog.js Normal file
View File

@@ -0,0 +1,121 @@
'use strict';
const { spawn } = require('child_process');
const log = require('./logger');
const DEFAULT_MAX_RESTARTS = 10;
const DEFAULT_WINDOW_MS = 300_000; // 5 min
const DEFAULT_RESTART_DELAY = 3_000;
/**
* 通用子进程守护:崩溃自动重启、速率限制、健康回调。
*
* 用法:
* const wd = new Watchdog('frpc', '/path/to/frpc', ['-c', 'frpc.toml'], {
* maxRestarts: 10,
* windowMs: 300_000,
* onStdout: (line) => { ... },
* });
* wd.start();
* wd.stop();
*/
class Watchdog {
constructor(name, bin, args = [], opts = {}) {
this._name = name;
this._bin = bin;
this._args = args;
this._proc = null;
this._stopped = false;
this._restartTimer = null;
this._onStdout = opts.onStdout || null;
this._onStderr = opts.onStderr || null;
this._onExit = opts.onExit || null;
this._spawnOpts = opts.spawnOpts || {};
this._maxRestarts = opts.maxRestarts ?? DEFAULT_MAX_RESTARTS;
this._windowMs = opts.windowMs ?? DEFAULT_WINDOW_MS;
this._restartDelay = opts.restartDelay ?? DEFAULT_RESTART_DELAY;
this._restartTimes = []; // timestamps of recent restarts
}
get running() {
return !!(this._proc && !this._proc.killed);
}
start() {
this._stopped = false;
this._spawn();
}
stop() {
this._stopped = true;
if (this._restartTimer) {
clearTimeout(this._restartTimer);
this._restartTimer = null;
}
if (this._proc) {
this._proc.kill('SIGTERM');
// 强杀兜底
const p = this._proc;
setTimeout(() => { try { p.kill('SIGKILL'); } catch (_) {} }, 5000);
this._proc = null;
}
}
_spawn() {
if (this._stopped) return;
log.info(this._name, '启动进程...');
const proc = spawn(this._bin, this._args, {
stdio: ['ignore', 'pipe', 'pipe'],
...this._spawnOpts,
});
this._proc = proc;
proc.stdout.on('data', (d) => {
const line = d.toString().trim();
if (!line) return;
if (this._onStdout) this._onStdout(line);
else log.info(this._name, line);
});
proc.stderr.on('data', (d) => {
const line = d.toString().trim();
if (!line) return;
if (this._onStderr) this._onStderr(line);
else log.warn(this._name, line);
});
proc.on('error', (err) => {
log.error(this._name, '进程启动失败:', err.message);
});
proc.on('exit', (code, signal) => {
log.warn(this._name, `进程退出 code=${code} signal=${signal}`);
this._proc = null;
if (this._onExit) this._onExit(code, signal);
if (!this._stopped) this._scheduleRestart();
});
}
_scheduleRestart() {
const now = Date.now();
this._restartTimes.push(now);
// 只保留窗口内的记录
this._restartTimes = this._restartTimes.filter(t => now - t < this._windowMs);
if (this._restartTimes.length > this._maxRestarts) {
log.error(this._name,
`${this._windowMs / 1000}s 内重启 ${this._restartTimes.length} 次,超过上限 ${this._maxRestarts},停止守护`);
return;
}
const delay = this._restartDelay * Math.min(this._restartTimes.length, 5);
log.info(this._name, `${delay / 1000}s 后重启... (窗口内第 ${this._restartTimes.length} 次)`);
this._restartTimer = setTimeout(() => this._spawn(), delay);
}
}
module.exports = { Watchdog };