From b3770d21d4852a9abdd1ebf14600a19ea7841652 Mon Sep 17 00:00:00 2001 From: stswangzhiping <59632378+stswangzhiping@users.noreply.github.com> Date: Mon, 16 Mar 2026 07:31:19 +0800 Subject: [PATCH] feat: add structured logging, process watchdog, and systemd hardening - Add lib/logger.js: timestamped structured logging with 5MB x 5 file rotation - Add lib/watchdog.js: generic child process supervisor with rate-limited restarts - Enhance client.js: WS ping/pong liveness detection, uncaughtException/unhandledRejection handlers, systemd sd-notify integration - Refactor frpc.js: FrpcManager now delegates to Watchdog instead of manual spawn/exit - Enhance install.sh: environment file, log directory, systemd resource limits, security hardening, WatchdogSec=60 - Replace all console.log/warn/error with structured logger across modules Made-with: Cursor --- README.md | 70 ++++++++++++++----- bin/clawd.js | 16 ++++- install.sh | 90 ++++++++++++++++++++---- lib/client.js | 170 +++++++++++++++++++++++++++++++++------------ lib/config.js | 6 +- lib/fingerprint.js | 3 +- lib/frpc.js | 100 +++++++++----------------- lib/logger.js | 118 +++++++++++++++++++++++++++++++ lib/watchdog.js | 121 ++++++++++++++++++++++++++++++++ 9 files changed, 545 insertions(+), 149 deletions(-) create mode 100644 lib/logger.js create mode 100644 lib/watchdog.js diff --git a/README.md b/README.md index b64e01e..c1d9e2a 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,11 @@ Claw Box 守护进程,将本地 Linux 设备通过 WebSocket 长连接接入 [ - 首次连接自动注册,获取 `claw_id` + `token` 并持久化 - 每 30 秒上报系统指标(CPU、内存、磁盘、温度、负载、运行时间) - 断线自动重连(指数退避,最大 60 秒) -- systemd 管理,开机自启 +- WS 层 Ping/Pong 活性检测,连接假死自动重连 +- frpc / ttyd 子进程 Watchdog 守护,崩溃自动重启(速率限制) +- 结构化日志 + 文件轮转(5MB × 5 份) +- systemd 集成:Watchdog、资源限制、优雅停止 +- 全局异常兜底(uncaughtException / unhandledRejection) ## 快速安装(Linux,需要 root) @@ -32,18 +36,17 @@ node bin/clawd.js ## 首次启动输出示例 ``` -[clawd] 启动中... -[clawd] box_id = a1b2c3d4e5f6... -[clawd] 服务器 = wss://claw.cutos.ai/ws -[clawd] WebSocket 已连接 -[clawd] 注册成功!claw_id = 1000 - -╔══════════════════════════════════╗ -║ 激活 PIN 码: 779413 ║ -║ 请在管理后台或前台输入此 PIN 码 ║ -╚══════════════════════════════════╝ - -[clawd] 等待激活中,心跳正常运行... +2026-03-16T10:00:00.000Z INFO [clawd] 启动中... 服务器 = wss://claw.cutos.ai/ws +2026-03-16T10:00:01.000Z INFO [clawd] WebSocket 已连接 +2026-03-16T10:00:01.100Z INFO [clawd] 注册成功!claw_id = 1000 +2026-03-16T10:00:01.100Z INFO [clawd] +2026-03-16T10:00:01.100Z INFO [clawd] ╔════════════════════════════════════╗ +2026-03-16T10:00:01.100Z INFO [clawd] ║ Claw ID : 1000 ║ +2026-03-16T10:00:01.100Z INFO [clawd] ║ PIN 码 : 779413 ║ +2026-03-16T10:00:01.100Z INFO [clawd] ║ 请在网页前台「添加设备」中输入 ║ +2026-03-16T10:00:01.100Z INFO [clawd] ╚════════════════════════════════════╝ +2026-03-16T10:00:01.100Z INFO [clawd] +2026-03-16T10:00:01.100Z INFO [clawd] 等待激活,心跳正常运行... ``` ## 配置文件 @@ -59,27 +62,60 @@ node bin/clawd.js } ``` +## 环境变量 + +| 变量 | 默认值 | 说明 | +|------|--------|------| +| `CLAWD_LOG_LEVEL` | `info` | 日志级别:debug / info / warn / error | +| `CLAWD_LOG_FILE` | `1` | 是否写日志文件(`0` = 仅 stdout/journald) | +| `CLAWD_LOG_DIR` | `~/.clawd/logs` | 日志文件目录 | +| `CLAWD_CONFIG_DIR` | `~/.clawd` | 配置目录 | + +systemd 安装后环境变量文件位于 `/etc/clawd/env`。 + ## 服务管理 ```bash systemctl status clawd # 查看状态 journalctl -u clawd -f # 实时日志 -systemctl restart clawd # 重启 -systemctl stop clawd # 停止 -systemctl disable clawd # 取消开机自启 +systemctl restart clawd # 重启 +systemctl stop clawd # 停止 +systemctl disable clawd # 取消开机自启 ``` +## 日志 + +- **stdout/journald**:所有日志同时输出到标准输出(systemd 自动采集到 journald) +- **文件日志**:`/etc/clawd/logs/clawd.log`,单文件 5MB,保留 5 份轮转 + ## 心跳上报字段 | 字段 | 说明 | 单位 | |------|------|------| | `cpu` | CPU 使用率 | % | | `mem_total` / `mem_used` | 内存总量 / 已用 | KB | -| `disk_total` / `disk_used` | 根分区总量 / 已用 | KB | +| `disk_total` / `disk_used` | 磁盘总量 / 已用 | KB | | `temperature` | CPU 温度 | °C | | `load_1m` / `load_5m` / `load_15m` | 系统负载 | — | | `uptime` | 运行时间 | 秒 | +## 架构 + +``` +clawd/ +├── bin/clawd.js ← 入口,优雅停止 +├── lib/ +│ ├── client.js ← 核心:WS 连接、心跳、Ping/Pong、sd-notify +│ ├── config.js ← 配置读写 +│ ├── fingerprint.js ← 硬件指纹生成 +│ ├── frpc.js ← frpc/ttyd/dashboard 管理(Watchdog 守护) +│ ├── logger.js ← 结构化日志 + 文件轮转 +│ ├── metrics.js ← 系统指标采集 +│ └── watchdog.js ← 通用子进程守护(速率限制重启) +├── install.sh ← 一键安装(含 systemd) +└── package.json +``` + ## License MIT diff --git a/bin/clawd.js b/bin/clawd.js index 56829c3..6c06d83 100644 --- a/bin/clawd.js +++ b/bin/clawd.js @@ -2,10 +2,20 @@ 'use strict'; const { ClawClient } = require('../lib/client'); +const log = require('../lib/logger'); const client = new ClawClient(); client.start(); -// 优雅退出 -process.on('SIGINT', () => { client.stop(); process.exit(0); }); -process.on('SIGTERM', () => { client.stop(); process.exit(0); }); +let stopping = false; + +function shutdown(signal) { + if (stopping) return; + stopping = true; + log.info('clawd', `收到 ${signal},正在停止...`); + client.stop(); + setTimeout(() => process.exit(0), 500); +} + +process.on('SIGINT', () => shutdown('SIGINT')); +process.on('SIGTERM', () => shutdown('SIGTERM')); diff --git a/install.sh b/install.sh index e8360d2..d337ffc 100644 --- a/install.sh +++ b/install.sh @@ -30,6 +30,8 @@ info "Node.js $NODE_VER ✓" # ── 安装 clawd ─────────────────────────────────────────────────────────────── INSTALL_DIR="/opt/clawd" +CONFIG_DIR="/etc/clawd" +ENV_FILE="$CONFIG_DIR/env" info "安装到 $INSTALL_DIR ..." mkdir -p "$INSTALL_DIR" @@ -43,7 +45,6 @@ if command -v git &>/dev/null; then git clone --depth=1 https://github.com/stswangzhiping/clawd.git . fi else - # 无 git 时用 curl 下载 tarball TARBALL_URL="https://github.com/stswangzhiping/clawd/archive/refs/heads/main.tar.gz" curl -fsSL "$TARBALL_URL" | tar -xz --strip-components=1 fi @@ -58,10 +59,11 @@ chmod +x "$INSTALL_DIR/bin/clawd.js" info "clawd 已安装到 /usr/local/bin/clawd ✓" -# ── 创建配置目录 ────────────────────────────────────────────────────────────── -mkdir -p /etc/clawd -if [ ! -f /etc/clawd/config.json ]; then - cat > /etc/clawd/config.json < "$CONFIG_DIR/config.json" < "$ENV_FILE" < "$JOURNAL_CONF" </dev/null || true + info "journald 日志限制已配置 ✓" +fi + +# ── 启用并启动 ────────────────────────────────────────────────────────────── systemctl daemon-reload systemctl enable clawd systemctl restart clawd @@ -105,10 +167,14 @@ sleep 2 if systemctl is-active --quiet clawd; then info "clawd 服务运行中 ✓" echo "" - echo " 查看日志:journalctl -u clawd -f" - echo " 查看状态:systemctl status clawd" - echo " 停止服务:systemctl stop clawd" + echo " 查看日志: journalctl -u clawd -f" + echo " 查看状态: systemctl status clawd" + echo " 停止服务: systemctl stop clawd" + echo " 配置文件: $CONFIG_DIR/config.json" + echo " 环境变量: $ENV_FILE" + echo " 文件日志: $CONFIG_DIR/logs/clawd.log" echo "" else - warn "服务启动失败,请检查日志:journalctl -u clawd -n 30" + warn "服务启动失败,请检查日志:" + echo " journalctl -u clawd -n 50 --no-pager" fi diff --git a/lib/client.js b/lib/client.js index 32e502d..f0969f6 100644 --- a/lib/client.js +++ b/lib/client.js @@ -1,85 +1,156 @@ 'use strict'; const WebSocket = require('ws'); +const { execFileSync } = require('child_process'); const config = require('./config'); +const log = require('./logger'); const { getBoxId } = require('./fingerprint'); const { collect } = require('./metrics'); const { getDashboardInfo, startTtyd, FrpcManager } = require('./frpc'); -const MAX_BACKOFF_MS = 60_000; +const MAX_BACKOFF_MS = 60_000; +const PONG_TIMEOUT_MS = 15_000; +const PING_INTERVAL_MS = 30_000; + +// systemd watchdog: 如果 WatchdogSec 存在,定期发 WATCHDOG=1 +const SD_WATCHDOG_USEC = parseInt(process.env.WATCHDOG_USEC || '0', 10); +const SD_NOTIFY_INTERVAL = SD_WATCHDOG_USEC > 0 + ? Math.floor(SD_WATCHDOG_USEC / 2 / 1000) // 半周期通知(μs → ms) + : 0; class ClawClient { constructor() { this._cfg = config.load(); this._boxId = getBoxId(); this._ws = null; - this._hbTimer = null; // 心跳定时器 - this._backoff = 1_000; // 重连等待(ms) + this._hbTimer = null; + this._backoff = 1_000; this._stopped = false; this._frpc = new FrpcManager(); - this._dashInfo = {}; // { dashboard_token, dashboard_port } + this._dashInfo = {}; + + // WS 层活性检测 + this._pingTimer = null; + this._awaitingPong = false; + + // systemd watchdog + this._sdTimer = null; + + this._setupGlobalHandlers(); } + // ── 全局异常兜底 ───────────────────────────────────────────────────────────── + + _setupGlobalHandlers() { + process.on('uncaughtException', (err) => { + log.error('process', '未捕获异常:', err); + // 给日志写盘的时间,然后退出让 systemd 重启 + setTimeout(() => process.exit(1), 1000); + }); + + process.on('unhandledRejection', (reason) => { + log.error('process', '未处理的 Promise 拒绝:', reason); + }); + } + + // ── 生命周期 ───────────────────────────────────────────────────────────────── + async start() { - console.log(`[clawd] 启动中... 服务器 = ${this._cfg.server}`); - // 并行:获取 openclaw dashboard 信息 + 启动 ttyd + log.info('clawd', `启动中... 服务器 = ${this._cfg.server}`); + const [dashInfo] = await Promise.all([ getDashboardInfo(), - startTtyd().catch(e => console.warn('[ttyd] 启动失败:', e.message)), + startTtyd().catch(e => log.warn('ttyd', '启动失败:', e.message)), ]); this._dashInfo = dashInfo || {}; + this._startSdNotify(); this._connect(); } stop() { this._stopped = true; this._clearHeartbeat(); + this._clearPing(); + if (this._sdTimer) { clearInterval(this._sdTimer); this._sdTimer = null; } this._frpc.stop(); if (this._ws) this._ws.terminate(); - console.log('[clawd] 已停止'); + this._sdNotify('STOPPING=1'); + log.info('clawd', '已停止'); + log.close(); } - // ── 连接 ────────────────────────────────────────────────────────────────── + // ── WebSocket 连接 ────────────────────────────────────────────────────────── _connect() { if (this._stopped) return; - console.log(`[clawd] 正在连接 ${this._cfg.server} ...`); + log.info('clawd', `正在连接 ${this._cfg.server} ...`); const ws = new WebSocket(this._cfg.server, { handshakeTimeout: 10_000, }); this._ws = ws; ws.on('open', () => { - console.log('[clawd] WebSocket 已连接'); + log.info('clawd', 'WebSocket 已连接'); this._backoff = 1_000; this._sendConnect(); + this._startPing(); }); ws.on('message', (data) => { try { this._handleMessage(JSON.parse(data.toString())); } catch (e) { - console.error('[clawd] 消息解析失败:', e.message); + log.error('clawd', '消息解析失败:', e.message); } }); + ws.on('pong', () => { + this._awaitingPong = false; + }); + ws.on('close', (code, reason) => { this._clearHeartbeat(); + this._clearPing(); if (!this._stopped) { - console.warn(`[clawd] 连接断开 (${code}),${this._backoff / 1000}s 后重连...`); + log.warn('clawd', `连接断开 (${code}),${this._backoff / 1000}s 后重连...`); setTimeout(() => this._connect(), this._backoff); this._backoff = Math.min(this._backoff * 2, MAX_BACKOFF_MS); } }); ws.on('error', (err) => { - console.error('[clawd] 连接错误:', err.message); - // close 事件会在 error 之后触发,重连逻辑在 close 里处理 + log.error('clawd', '连接错误:', err.message); }); } - // ── 发送 connect ────────────────────────────────────────────────────────── + // ── WS 层 Ping/Pong 活性检测 ────────────────────────────────────────────── + + _startPing() { + this._clearPing(); + this._pingTimer = setInterval(() => { + if (!this._ws || this._ws.readyState !== WebSocket.OPEN) return; + + if (this._awaitingPong) { + log.warn('clawd', 'Pong 超时,连接可能已死,主动关闭重连'); + this._ws.terminate(); + return; + } + + this._awaitingPong = true; + try { this._ws.ping(); } catch (_) {} + }, PING_INTERVAL_MS); + } + + _clearPing() { + if (this._pingTimer) { + clearInterval(this._pingTimer); + this._pingTimer = null; + } + this._awaitingPong = false; + } + + // ── 发送 connect ───────────────────────────────────────────────────────────── _sendConnect() { const msg = { @@ -87,13 +158,12 @@ class ClawClient { box_id: this._boxId, claw_id: this._cfg.claw_id ?? null, token: this._cfg.token ?? null, - // dashboard 信息(可选,openclaw 未安装时为空) ...this._dashInfo, }; this._send(msg); } - // ── 消息处理 ────────────────────────────────────────────────────────────── + // ── 消息处理 ───────────────────────────────────────────────────────────────── _handleMessage(msg) { switch (msg.type) { @@ -101,76 +171,67 @@ class ClawClient { this._onConnected(msg); break; case 'heartbeat_ack': - // 正常回包,静默处理 break; case 'error': - console.error(`[clawd] 服务器错误: ${msg.msg}`); + log.error('clawd', `服务器错误: ${msg.msg}`); if (msg.msg === 'hardware_mismatch') { - // box_id 与库中不符:硬件变更或凭证泄露 - // 清空本地凭证,下次重连走全新注册流程 - console.warn('[clawd] 硬件指纹与服务器不符(硬件变更或凭证泄露),清除本地凭证重新注册...'); + log.warn('clawd', '硬件指纹不符,清除凭证重新注册...'); this._cfg.claw_id = null; this._cfg.token = null; config.save(this._cfg); } else if (msg.msg && msg.msg.includes('invalid')) { - console.warn('[clawd] 凭证无效,清除本地凭证并重新注册...'); + log.warn('clawd', '凭证无效,清除凭证重新注册...'); this._cfg.claw_id = null; this._cfg.token = null; config.save(this._cfg); } break; default: - console.warn('[clawd] 未知消息类型:', msg.type); + log.warn('clawd', '未知消息类型:', msg.type); } } _onConnected(msg) { const isNew = !this._cfg.claw_id; - // 保存 claw_id + token this._cfg.claw_id = msg.claw_id; this._cfg.token = msg.token; config.save(this._cfg); if (isNew) { - console.log(`[clawd] 注册成功!claw_id = ${msg.claw_id}`); + log.info('clawd', `注册成功!claw_id = ${msg.claw_id}`); } if (msg.status === 'inactive') { const id = String(msg.claw_id).padEnd(6); const pin = String(msg.pin); - console.log(''); - console.log('╔════════════════════════════════════╗'); - console.log(`║ Claw ID : ${id} ║`); - console.log(`║ PIN 码 : ${pin} ║`); - console.log('║ 请在网页前台「添加设备」中输入 ║'); - console.log('╚════════════════════════════════════╝'); - console.log(''); - console.log('[clawd] 等待激活,心跳正常运行...'); + log.info('clawd', ''); + log.info('clawd', '╔════════════════════════════════════╗'); + log.info('clawd', `║ Claw ID : ${id} ║`); + log.info('clawd', `║ PIN 码 : ${pin} ║`); + log.info('clawd', '║ 请在网页前台「添加设备」中输入 ║'); + log.info('clawd', '╚════════════════════════════════════╝'); + log.info('clawd', ''); + log.info('clawd', '等待激活,心跳正常运行...'); } else { - console.log(`[clawd] 已激活 claw_id = ${msg.claw_id}`); + log.info('clawd', `已激活 claw_id = ${msg.claw_id}`); } - // 启动 frpc(如果 VPS 下发了 frp 配置) if (msg.frp && msg.frp.server && msg.frp.auth_token) { this._frpc.start(msg.claw_id, msg.frp).catch(e => { - console.error('[frpc] 启动失败:', e.message); + log.error('frpc', '启动失败:', e.message); }); } - // 开始心跳 this._startHeartbeat(); } - // ── 心跳 ───────────────────────────────────────────────────────────────── + // ── 心跳 ──────────────────────────────────────────────────────────────────── _startHeartbeat() { this._clearHeartbeat(); const interval = (this._cfg.heartbeat_interval || 30) * 1000; - - // 立即发一次 this._sendHeartbeat(); - this._hbTimer = setInterval(() => this._sendHeartbeat(), interval); } @@ -185,7 +246,7 @@ class ClawClient { metrics, }); } catch (e) { - console.error('[clawd] 心跳发送失败:', e.message); + log.error('clawd', '心跳发送失败:', e.message); } } @@ -196,13 +257,32 @@ class ClawClient { } } - // ── 工具 ────────────────────────────────────────────────────────────────── + // ── 工具 ──────────────────────────────────────────────────────────────────── _send(obj) { if (this._ws && this._ws.readyState === WebSocket.OPEN) { this._ws.send(JSON.stringify(obj)); } } + + // ── systemd Watchdog ──────────────────────────────────────────────────────── + + _startSdNotify() { + if (!SD_NOTIFY_INTERVAL) return; + + log.debug('clawd', `systemd watchdog 启用,通知间隔 ${SD_NOTIFY_INTERVAL}ms`); + this._sdNotify('READY=1'); + this._sdTimer = setInterval(() => this._sdNotify('WATCHDOG=1'), SD_NOTIFY_INTERVAL); + } + + _sdNotify(msg) { + if (!process.env.NOTIFY_SOCKET) return; + try { + execFileSync('systemd-notify', ['--pid=' + process.pid, msg], { timeout: 2000 }); + } catch (_) { + // systemd-notify 不可用时静默忽略 + } + } } module.exports = { ClawClient }; diff --git a/lib/config.js b/lib/config.js index fed6b78..7ceea14 100644 --- a/lib/config.js +++ b/lib/config.js @@ -26,7 +26,8 @@ function load() { return Object.assign({}, DEFAULTS, JSON.parse(raw)); } } catch (e) { - console.error('[config] 读取配置失败,使用默认值:', e.message); + const log = require('./logger'); + log.error('config', '读取配置失败,使用默认值:', e.message); } return Object.assign({}, DEFAULTS); } @@ -36,7 +37,8 @@ function save(data) { fs.mkdirSync(CONFIG_DIR, { recursive: true }); fs.writeFileSync(CONFIG_FILE, JSON.stringify(data, null, 2), 'utf8'); } catch (e) { - console.error('[config] 写入配置失败:', e.message); + const log = require('./logger'); + log.error('config', '写入配置失败:', e.message); } } diff --git a/lib/fingerprint.js b/lib/fingerprint.js index d996669..5a9c902 100644 --- a/lib/fingerprint.js +++ b/lib/fingerprint.js @@ -104,7 +104,8 @@ function getPersistentUUID() { fs.writeFileSync(PERSIST_FILE, id, 'utf8'); } catch (e) { // 写不进去也没关系,本次用内存值(重启后会变,但这是最后兜底) - console.warn('[fingerprint] 无法持久化 box_id:', e.message); + const log = require('./logger'); + log.warn('fingerprint', '无法持久化 box_id:', e.message); } return id; } diff --git a/lib/frpc.js b/lib/frpc.js index 5173c26..cdd61c8 100644 --- a/lib/frpc.js +++ b/lib/frpc.js @@ -5,15 +5,15 @@ const fs = require('fs'); const os = require('os'); const path = require('path'); const https = require('https'); +const log = require('./logger'); +const { Watchdog } = require('./watchdog'); -// frpc 配置目录(与 clawd config 同目录) const CONFIG_DIR = process.env.CLAWD_CONFIG_DIR || (process.getuid && process.getuid() === 0 ? '/etc/clawd' : path.join(os.homedir(), '.clawd')); const FRPC_BIN = path.join(CONFIG_DIR, 'frpc'); const FRPC_CONFIG = path.join(CONFIG_DIR, 'frpc.toml'); const TTYD_BIN = path.join(CONFIG_DIR, 'ttyd'); -// frp / ttyd 版本 const FRP_VERSION = '0.62.0'; const TTYD_VERSION = '1.7.7'; const TTYD_PORT = 7681; @@ -27,14 +27,12 @@ function getDashboardInfo() { return new Promise((resolve) => { const tmpLog = '/tmp/clawd-dashboard.log'; - // 后台启动 dashboard,输出重定向到日志文件 try { execSync(`openclaw dashboard > ${tmpLog} 2>&1 &`, { shell: true, timeout: 3000 }); } catch (e) { // 已在运行或命令不存在,继续轮询 } - // 每秒读一次日志文件,最多等 10 秒 let attempts = 0; const interval = setInterval(() => { attempts++; @@ -45,7 +43,7 @@ function getDashboardInfo() { clearInterval(interval); const port = parseInt(match[1], 10); const token = match[2]; - console.log(`[frpc] openclaw dashboard: port=${port}, token=${token.substring(0, 8)}...`); + log.info('dashboard', `openclaw dashboard: port=${port}, token=${token.substring(0, 8)}...`); resolve({ dashboard_port: port, dashboard_token: token }); return; } @@ -53,18 +51,16 @@ function getDashboardInfo() { if (attempts >= 10) { clearInterval(interval); + log.debug('dashboard', 'openclaw dashboard 未检测到,跳过'); resolve({}); } }, 1000); }); } -/** - * 根据当前系统架构下载对应的 frpc 二进制。 - */ async function downloadFrpc() { - const arch = os.arch(); // 'x64', 'arm64', 'arm', ... - const platform = os.platform(); // 'linux' + const arch = os.arch(); + const platform = os.platform(); const archMap = { x64: 'amd64', arm64: 'arm64', @@ -76,56 +72,50 @@ async function downloadFrpc() { const url = `https://github.com/fatedier/frp/releases/download/v${FRP_VERSION}/${filename}`; const tmpFile = `/tmp/${filename}`; - console.log(`[frpc] 下载 frpc ${FRP_VERSION} (${platform}/${frpArch})...`); + log.info('frpc', `下载 frpc ${FRP_VERSION} (${platform}/${frpArch})...`); await downloadFile(url, tmpFile); - // 解压并复制 frpc fs.mkdirSync(CONFIG_DIR, { recursive: true }); execSync(`tar -xzf ${tmpFile} -C /tmp && cp /tmp/frp_${FRP_VERSION}_${platform}_${frpArch}/frpc ${FRPC_BIN}`, { stdio: 'inherit' }); fs.chmodSync(FRPC_BIN, 0o755); - console.log(`[frpc] frpc 已安装到 ${FRPC_BIN}`); + log.info('frpc', `frpc 已安装到 ${FRPC_BIN}`); } -/** - * 下载 ttyd 静态二进制。 - */ async function downloadTtyd() { const arch = os.arch(); const archMap = { arm64: 'aarch64', x64: 'x86_64', arm: 'armv7l', ia32: 'i686' }; const ttydArch = archMap[arch] || 'x86_64'; const url = `https://github.com/tsl0922/ttyd/releases/download/${TTYD_VERSION}/ttyd.${ttydArch}`; - console.log(`[ttyd] 下载 ttyd ${TTYD_VERSION} (${ttydArch})...`); + log.info('ttyd', `下载 ttyd ${TTYD_VERSION} (${ttydArch})...`); fs.mkdirSync(CONFIG_DIR, { recursive: true }); await downloadFile(url, TTYD_BIN); fs.chmodSync(TTYD_BIN, 0o755); - console.log(`[ttyd] ttyd 已安装到 ${TTYD_BIN}`); + log.info('ttyd', `ttyd 已安装到 ${TTYD_BIN}`); } /** * 启动 ttyd(如未安装先下载)。 * ttyd 绑定 127.0.0.1:7681,供 frpc 代理。 - * 返回 true 表示启动成功,false 表示失败。 */ async function startTtyd() { if (!fs.existsSync(TTYD_BIN)) { try { await downloadTtyd(); } catch (e) { - console.warn('[ttyd] 下载失败:', e.message); + log.warn('ttyd', '下载失败:', e.message); return false; } } - // 终止旧进程(重启 clawd 时可能残留) + // 终止旧进程 try { execSync(`pkill -f "${TTYD_BIN}"`, { timeout: 3000 }); - // 稍等旧进程退出 await new Promise(r => setTimeout(r, 500)); - } catch (_) { /* 无进程可杀,忽略 */ } + } catch (_) {} try { const shell = fs.existsSync('/bin/bash') ? '/bin/bash' : '/bin/sh'; @@ -134,10 +124,10 @@ async function startTtyd() { detached: true, }); proc.unref(); - console.log(`[ttyd] 已启动,端口 ${TTYD_PORT},shell=${shell}`); + log.info('ttyd', `已启动,端口 ${TTYD_PORT},shell=${shell}`); return true; } catch (e) { - console.warn('[ttyd] 启动失败:', e.message); + log.warn('ttyd', '启动失败:', e.message); return false; } } @@ -156,12 +146,6 @@ function downloadFile(url, dest) { }); } -/** - * 生成 frpc.toml 配置文件。 - * 包含两条代理: - * - dashboard-{clawId} → openclaw dashboard - * - tty-{clawId} → ttyd 终端 - */ function writeFrpcConfig(clawId, frpConfig) { const { server, port, auth_token, dashboard_local_port = 18789 } = frpConfig; const ttyRemotePort = 10000 + Number(clawId); @@ -187,66 +171,44 @@ remotePort = ${ttyRemotePort} `; fs.mkdirSync(CONFIG_DIR, { recursive: true }); fs.writeFileSync(FRPC_CONFIG, toml, 'utf8'); - console.log(`[frpc] frpc.toml 已写入: dashboard subdomain=${clawId}, tty tcp-port=${ttyRemotePort}`); + log.info('frpc', `frpc.toml 已写入: dashboard subdomain=${clawId}, tty tcp-port=${ttyRemotePort}`); } +/** + * FrpcManager —— 基于 Watchdog 的 frpc 进程管理器。 + * 崩溃自动重启,5 分钟内最多重启 10 次。 + */ class FrpcManager { constructor() { - this._proc = null; - this._stopped = false; - this._restartTimer = null; + this._watchdog = null; } - /** - * 启动 frpc:如未安装先下载,写配置,然后 spawn。 - */ async start(clawId, frpConfig) { - this._stopped = false; + this.stop(); - // 下载 frpc(如果不存在) if (!fs.existsSync(FRPC_BIN)) { try { await downloadFrpc(); } catch (e) { - console.error('[frpc] 下载 frpc 失败:', e.message); + log.error('frpc', '下载 frpc 失败:', e.message); return; } } writeFrpcConfig(clawId, frpConfig); - this._spawn(); - } - _spawn() { - if (this._stopped) return; - - console.log('[frpc] 启动 frpc...'); - this._proc = spawn(FRPC_BIN, ['-c', FRPC_CONFIG], { - stdio: ['ignore', 'pipe', 'pipe'], - }); - - this._proc.stdout.on('data', d => { - const line = d.toString().trim(); - if (line) console.log(`[frpc] ${line}`); - }); - this._proc.stderr.on('data', d => { - const line = d.toString().trim(); - if (line) console.warn(`[frpc] ${line}`); - }); - this._proc.on('exit', (code) => { - console.warn(`[frpc] 进程退出 (code=${code})`); - if (!this._stopped) { - this._restartTimer = setTimeout(() => this._spawn(), 5000); - } + this._watchdog = new Watchdog('frpc', FRPC_BIN, ['-c', FRPC_CONFIG], { + maxRestarts: 10, + windowMs: 300_000, + restartDelay: 5_000, }); + this._watchdog.start(); } stop() { - this._stopped = true; - if (this._restartTimer) clearTimeout(this._restartTimer); - if (this._proc) { - this._proc.kill('SIGTERM'); - this._proc = null; + if (this._watchdog) { + this._watchdog.stop(); + this._watchdog = null; } } } diff --git a/lib/logger.js b/lib/logger.js new file mode 100644 index 0000000..1cc9498 --- /dev/null +++ b/lib/logger.js @@ -0,0 +1,118 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const os = require('os'); + +const LEVELS = { debug: 0, info: 1, warn: 2, error: 3 }; + +const CONFIG_DIR = process.env.CLAWD_CONFIG_DIR + || (process.getuid && process.getuid() === 0 ? '/etc/clawd' : path.join(os.homedir(), '.clawd')); + +const LOG_DIR = process.env.CLAWD_LOG_DIR || path.join(CONFIG_DIR, 'logs'); +const MAX_FILE_SIZE = 5 * 1024 * 1024; // 5 MB +const MAX_FILES = 5; + +class Logger { + constructor(opts = {}) { + this._level = LEVELS[opts.level || process.env.CLAWD_LOG_LEVEL || 'info'] ?? LEVELS.info; + this._logToFile = opts.logToFile ?? (process.env.CLAWD_LOG_FILE !== '0'); + this._stream = null; + this._filePath = null; + this._fileSize = 0; + + if (this._logToFile) { + this._ensureLogDir(); + } + } + + debug(tag, ...args) { this._log('debug', tag, args); } + info(tag, ...args) { this._log('info', tag, args); } + warn(tag, ...args) { this._log('warn', tag, args); } + error(tag, ...args) { this._log('error', tag, args); } + + _log(level, tag, args) { + if (LEVELS[level] < this._level) return; + + const ts = new Date().toISOString(); + const lvl = level.toUpperCase().padEnd(5); + const body = args.map(a => (a instanceof Error ? a.stack || a.message : String(a))).join(' '); + const line = `${ts} ${lvl} [${tag}] ${body}`; + + const consoleFn = level === 'error' ? console.error + : level === 'warn' ? console.warn + : console.log; + consoleFn(line); + + if (this._logToFile) this._writeToFile(line + '\n'); + } + + _ensureLogDir() { + try { fs.mkdirSync(LOG_DIR, { recursive: true }); } + catch (_) { this._logToFile = false; } + } + + _writeToFile(line) { + if (!this._stream) this._openFile(); + if (!this._stream) return; + + this._stream.write(line); + this._fileSize += Buffer.byteLength(line); + + if (this._fileSize >= MAX_FILE_SIZE) this._rotate(); + } + + _openFile() { + try { + this._filePath = path.join(LOG_DIR, 'clawd.log'); + try { + const stat = fs.statSync(this._filePath); + this._fileSize = stat.size; + } catch (_) { this._fileSize = 0; } + + this._stream = fs.createWriteStream(this._filePath, { flags: 'a' }); + this._stream.on('error', () => { + this._logToFile = false; + this._stream = null; + }); + } catch (_) { + this._logToFile = false; + } + } + + _rotate() { + if (this._stream) { + this._stream.end(); + this._stream = null; + } + + // clawd.log.4 → delete, clawd.log.3 → .4, ... clawd.log → .1 + for (let i = MAX_FILES - 1; i >= 1; i--) { + const from = path.join(LOG_DIR, `clawd.log.${i}`); + const to = path.join(LOG_DIR, `clawd.log.${i + 1}`); + try { fs.renameSync(from, to); } catch (_) {} + } + try { + fs.renameSync(this._filePath, path.join(LOG_DIR, 'clawd.log.1')); + } catch (_) {} + + // 删除超出上限的文件 + try { + fs.unlinkSync(path.join(LOG_DIR, `clawd.log.${MAX_FILES + 1}`)); + } catch (_) {} + + this._fileSize = 0; + this._openFile(); + } + + close() { + if (this._stream) { + this._stream.end(); + this._stream = null; + } + } +} + +const logger = new Logger(); + +module.exports = logger; diff --git a/lib/watchdog.js b/lib/watchdog.js new file mode 100644 index 0000000..dc9c2bd --- /dev/null +++ b/lib/watchdog.js @@ -0,0 +1,121 @@ +'use strict'; + +const { spawn } = require('child_process'); +const log = require('./logger'); + +const DEFAULT_MAX_RESTARTS = 10; +const DEFAULT_WINDOW_MS = 300_000; // 5 min +const DEFAULT_RESTART_DELAY = 3_000; + +/** + * 通用子进程守护:崩溃自动重启、速率限制、健康回调。 + * + * 用法: + * const wd = new Watchdog('frpc', '/path/to/frpc', ['-c', 'frpc.toml'], { + * maxRestarts: 10, + * windowMs: 300_000, + * onStdout: (line) => { ... }, + * }); + * wd.start(); + * wd.stop(); + */ +class Watchdog { + constructor(name, bin, args = [], opts = {}) { + this._name = name; + this._bin = bin; + this._args = args; + this._proc = null; + this._stopped = false; + this._restartTimer = null; + this._onStdout = opts.onStdout || null; + this._onStderr = opts.onStderr || null; + this._onExit = opts.onExit || null; + this._spawnOpts = opts.spawnOpts || {}; + + this._maxRestarts = opts.maxRestarts ?? DEFAULT_MAX_RESTARTS; + this._windowMs = opts.windowMs ?? DEFAULT_WINDOW_MS; + this._restartDelay = opts.restartDelay ?? DEFAULT_RESTART_DELAY; + + this._restartTimes = []; // timestamps of recent restarts + } + + get running() { + return !!(this._proc && !this._proc.killed); + } + + start() { + this._stopped = false; + this._spawn(); + } + + stop() { + this._stopped = true; + if (this._restartTimer) { + clearTimeout(this._restartTimer); + this._restartTimer = null; + } + if (this._proc) { + this._proc.kill('SIGTERM'); + // 强杀兜底 + const p = this._proc; + setTimeout(() => { try { p.kill('SIGKILL'); } catch (_) {} }, 5000); + this._proc = null; + } + } + + _spawn() { + if (this._stopped) return; + + log.info(this._name, '启动进程...'); + const proc = spawn(this._bin, this._args, { + stdio: ['ignore', 'pipe', 'pipe'], + ...this._spawnOpts, + }); + this._proc = proc; + + proc.stdout.on('data', (d) => { + const line = d.toString().trim(); + if (!line) return; + if (this._onStdout) this._onStdout(line); + else log.info(this._name, line); + }); + + proc.stderr.on('data', (d) => { + const line = d.toString().trim(); + if (!line) return; + if (this._onStderr) this._onStderr(line); + else log.warn(this._name, line); + }); + + proc.on('error', (err) => { + log.error(this._name, '进程启动失败:', err.message); + }); + + proc.on('exit', (code, signal) => { + log.warn(this._name, `进程退出 code=${code} signal=${signal}`); + this._proc = null; + if (this._onExit) this._onExit(code, signal); + if (!this._stopped) this._scheduleRestart(); + }); + } + + _scheduleRestart() { + const now = Date.now(); + this._restartTimes.push(now); + + // 只保留窗口内的记录 + this._restartTimes = this._restartTimes.filter(t => now - t < this._windowMs); + + if (this._restartTimes.length > this._maxRestarts) { + log.error(this._name, + `${this._windowMs / 1000}s 内重启 ${this._restartTimes.length} 次,超过上限 ${this._maxRestarts},停止守护`); + return; + } + + const delay = this._restartDelay * Math.min(this._restartTimes.length, 5); + log.info(this._name, `${delay / 1000}s 后重启... (窗口内第 ${this._restartTimes.length} 次)`); + this._restartTimer = setTimeout(() => this._spawn(), delay); + } +} + +module.exports = { Watchdog };