From d53db453756d657ff92f7243bd4d348bdc795d93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eason=20=28=E9=99=88=E5=8C=BB=E7=94=9F=29?= Date: Wed, 4 Mar 2026 03:34:16 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20=E7=B3=BB=E7=BB=9F=E6=89=A9=E5=B1=95?= =?UTF-8?q?=E6=9E=B6=E6=9E=84=E5=8D=87=E7=BA=A7=E5=AE=8C=E6=88=90=20-=2020?= =?UTF-8?q?26-03-03?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 systemd/gateway.env 和 life-gateway.env (升级安全的环境变量持久化) - 修复 agent-monitor.js 4 个 Bug (重启限制/Life 监控/心跳日志/升级容忍) - 更新 systemd 服务模板 (MemoryLimit→MemoryMax, 添加 EnvironmentFile) - 增强 deploy.sh (debug-stop/debug-start/fix-service 命令,Life Agent 集成) - 新增 docs/EXTENSIONS_ARCHITECTURE.md (统一架构文档) - 更新 CORE_INDEX.md 和 MEMORY.md --- CORE_INDEX.md | 95 ++++- MEMORY.md | 71 ++++ agent-monitor.js | 135 ++++++-- deploy.sh | 224 ++++++++---- docs/EXTENSIONS_ARCHITECTURE.md | 267 +++++++++++++++ docs/MEM0_ARCHITECTURE.md | 10 +- docs/MEMORY_ARCHITECTURE.md | 324 ++++++++++++++++++ skills/mem0-integration/SKILL.md | 214 ++++++++++-- .../{mem0-plugin.js => index.js} | 21 +- skills/mem0-integration/local_search.py | 176 ++++++++++ skills/mem0-integration/mem0_client.py | 257 +++++++++++--- skills/mem0-integration/memory_cleanup.py | 87 +++++ .../migrate_to_single_collection.py | 2 +- skills/mem0-integration/openclaw.plugin.json | 17 + .../mem0-integration/openclaw_interceptor.py | 20 +- skills/mem0-integration/project_registry.yaml | 37 ++ skills/mem0-integration/recover_memories.py | 138 ++++++++ .../mem0-integration/recover_memories_v2.py | 130 +++++++ skills/mem0-integration/session_init.py | 98 ++++++ systemd/openclaw-agent-monitor.service | 8 +- systemd/openclaw-gateway-user.service | 13 +- ...ervice => openclaw-gateway.service.legacy} | 3 + 22 files changed, 2153 insertions(+), 194 deletions(-) create mode 100644 docs/EXTENSIONS_ARCHITECTURE.md create mode 100644 docs/MEMORY_ARCHITECTURE.md rename skills/mem0-integration/{mem0-plugin.js => index.js} (88%) create mode 100644 skills/mem0-integration/local_search.py create mode 100644 skills/mem0-integration/memory_cleanup.py create mode 100644 skills/mem0-integration/openclaw.plugin.json create mode 100644 skills/mem0-integration/project_registry.yaml create mode 100644 skills/mem0-integration/recover_memories.py create mode 100644 skills/mem0-integration/recover_memories_v2.py create mode 100644 skills/mem0-integration/session_init.py rename systemd/{openclaw-gateway.service => openclaw-gateway.service.legacy} (78%) diff --git a/CORE_INDEX.md b/CORE_INDEX.md index e663f40..2468658 100644 --- a/CORE_INDEX.md +++ b/CORE_INDEX.md @@ -12,26 +12,43 @@ ## File Structure Overview ``` /root/.openclaw/workspace/ -├── CORE_INDEX.md # This file - memory index -├── MEMORY.md # Long-term curated memories and decisions +├── CORE_INDEX.md # This file - memory index (Layer 1) +├── IDENTITY.md # Agent identity definition (Layer 1) +├── SOUL.md # Core personality and behavior guidelines (Layer 1) +├── USER.md # Information about the human user (Layer 1) +├── MEMORY.md # Long-term curated memories and decisions (Layer 2) ├── AGENTS.md # Agent operations and logging practices -├── SOUL.md # Core personality and behavior guidelines -├── USER.md # Information about the human user ├── TOOLS.md # Environment-specific tool configurations -├── IDENTITY.md # Agent identity configuration ├── HEARTBEAT.md # Periodic check tasks ├── deploy.sh # One-click deployment & management script ├── agent-monitor.js # Auto-healing & health monitoring system +├── memory/ # Daily memory files YYYY-MM-DD.md (Layer 2) +├── docs/ # Architecture & reference documentation +│ ├── EXTENSIONS_ARCHITECTURE.md # ★ 自定义扩展权威参考 (监控+记忆+部署) +│ ├── MEMORY_ARCHITECTURE.md # 四层记忆体系详细文档 (v2.1) +│ ├── MEM0_ARCHITECTURE.md # (旧版, 已废弃 → 见 MEMORY_ARCHITECTURE.md) +│ ├── SYSTEM_ARCHITECTURE.md # 系统总体架构 +│ └── ... ├── skills/ # Installed agent skills +│ └── mem0-integration/ # Layer 4 记忆系统核心 Skill +│ ├── SKILL.md # Skill 开发者文档 (含 API 规范) +│ ├── mem0_client.py # 核心客户端 (检索 / 写入 / 队列) +│ ├── openclaw_interceptor.py # Pre/Post-Hook 拦截器 +│ ├── local_search.py # Layer 3 FTS5 本地检索 fallback +│ ├── config.yaml # mem0 配置 +│ └── project_registry.yaml # Agent-项目归属 (可见性控制) ├── logs/ # Operation and system logs │ ├── operations/ # Manual operations and changes │ ├── system/ # System-generated logs │ ├── agents/ # Individual agent logs │ └── security/ # Security operations and audits -├── memory/ # Daily memory files (YYYY-MM-DD.md) -└── systemd/ # Systemd service definitions - ├── openclaw-gateway.service - └── openclaw-agent-monitor.service +└── systemd/ # Systemd service definitions & env files + ├── openclaw-gateway-user.service # 用户级 Gateway 模板 + ├── agent-life.service # 用户级 Life Agent 模板 + ├── openclaw-agent-monitor.service # 系统级 Monitor 模板 + ├── openclaw-gateway.service.legacy # 废弃的系统级 Gateway (已 masked) + ├── gateway.env # Gateway 环境变量 (升级安全) + └── life-gateway.env # Life Agent 环境变量 (升级安全) ``` ## Memory Access Strategy @@ -41,12 +58,15 @@ - **Version Control**: All critical files tracked in Git with rollback capability ## Key Documentation Files +- **★ Extensions Architecture**: docs/EXTENSIONS_ARCHITECTURE.md → 所有自定义扩展的权威参考 (修改基础设施前必读) +- **Memory Architecture**: docs/MEMORY_ARCHITECTURE.md → 四层记忆体系详细设计 (v2.1) +- **Skill Developer Guide**: skills/mem0-integration/SKILL.md → Layer 4 代码结构、API 规范、开发者注意事项 - **Security Templates**: MEMORY.md → Server security hardening templates - **Agent Practices**: AGENTS.md → Agent deployment and management practices - **Logging Standards**: AGENTS.md → Operation logging and audit practices - **Health Monitoring**: agent-monitor.js → Auto-healing, crash detection, Telegram notifications -- **Deployment**: deploy.sh → One-click install/start/stop/rollback/backup -- **Systemd Services**: systemd/*.service → System-level auto-start & auto-healing +- **Deployment**: deploy.sh → One-click install/start/stop/rollback/backup/debug/fix-service +- **Systemd Services**: systemd/*.service + *.env → 服务定义及升级安全环境变量 - **Configuration Backup**: Git commits before any JSON modifications ## Usage Instructions for Models @@ -55,6 +75,8 @@ 3. Load specific files using read/edit/write tools as needed 4. Never assume memory persistence across model sessions 5. Always verify current state before making changes +6. **修改基础设施前** (systemd、监控、部署脚本、环境变量),必须先读 `docs/EXTENSIONS_ARCHITECTURE.md` +7. **OpenClaw 升级后**,运行 `./deploy.sh fix-service && ./deploy.sh restart` 恢复自定义配置 ## System Architecture (2026-02-20) @@ -82,10 +104,47 @@ ### Management Commands ```bash -./deploy.sh install # Install & start all services -./deploy.sh status # Check service status -./deploy.sh health # Run health check -./deploy.sh logs # View recent logs -./deploy.sh backup # Create backup -./deploy.sh rollback # Rollback to previous commit -``` \ No newline at end of file +./deploy.sh install # Install & start all services +./deploy.sh status # Check service status (gateway + life + monitor) +./deploy.sh health # Run health check +./deploy.sh logs # View recent logs +./deploy.sh backup # Create backup +./deploy.sh rollback # Rollback to previous commit +./deploy.sh debug-stop # Stop ALL services (safe for debugging) +./deploy.sh debug-start # Restore all services after debugging +./deploy.sh fix-service # Re-inject EnvironmentFile after UI upgrade +``` + +## Memory Architecture (四层记忆体系) + +> 详细文档: `docs/MEMORY_ARCHITECTURE.md` + +### Memory Layer 1: Core Memory (核心记忆) +- CORE_INDEX.md, IDENTITY.md, SOUL.md 等 MD 文件 +- 启动时首先加载,定义 Agent 身份和行为准则 +- 每个 Agent 独立工作区 + +### Memory Layer 2: Daily Memory (日常记忆) +- MEMORY.md (长期策略) + memory/*.md (每日记录) +- Git 版本控制保护,支持回溯 + +### Memory Layer 3: Short-term Memory (短期记忆 / QMD) +- SQLite FTS5 全文检索 (零额外内存,纯离线) +- 可选 GGUF 本地向量 (按需加载,需 >= 300MB 空闲内存) +- Layer 4 不可达时自动接管检索 + +### Memory Layer 4: Mem0 Conversation Memory (对话记忆) +- Qdrant (mem0_v4_shared) + text-embedding-v4 (1024 维) +- 通过 Tailscale 可跨服务器共享 +- 三级可见性: public (集群共享) / project (项目共享) / private (仅自身) +- 记忆衰减: session=7天, chat_summary=30天, preference/knowledge=永久 +- 智能写入过滤: 自动跳过简短确认、系统命令等无价值对话 + +### Memory Visibility (记忆可见性) +| 级别 | 写入方式 | 检索范围 | +|------|----------|----------| +| public | `visibility=public` | 所有 Agent | +| project | `visibility=project, project_id=<项目>` | 同项目 Agent | +| private | `visibility=private` | 仅自身 | + +项目注册表: `skills/mem0-integration/project_registry.yaml` \ No newline at end of file diff --git a/MEMORY.md b/MEMORY.md index 7eb6dd4..3b0e718 100644 --- a/MEMORY.md +++ b/MEMORY.md @@ -377,3 +377,74 @@ export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/$(id -u)/bus" - `agents/life-workspace/` - 测试用 Agent 工作区 - `skills/openclaw-wecom/` - 企业微信技能(TypeScript 实现) +--- + +## 系统扩展架构升级完成 (2026-03-03 17:02 UTC) + +### 6 项核心任务全部完成 + +#### Task 1 - 环境变量持久化 +- **文件**: `systemd/gateway.env`, `systemd/life-gateway.env` +- **权限**: chmod 600 (仅 root 可读) +- **特点**: 独立于 .service 文件,OpenClaw UI 升级不会覆盖 +- **内容**: + ```bash + MEM0_DASHSCOPE_API_KEY=sk-4111c9dba5334510968f9ae72728944e + OPENAI_API_BASE=https://dashscope.aliyuncs.com/compatible-mode/v1 + OPENAI_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 + ``` + +#### Task 2 - Agent Monitor 修复 (4 个 Bug) +- **重启限制**: 集成到 `monitorOpenClawService()` via `handleServiceDown()` — 无限重启循环已修复 +- **Life Agent 监控**: 现在每 30 秒同时检查 gateway 和 openclaw-gateway-life.service +- **心跳日志**: 每 10 分钟输出 `gateway=OK, life=OK` +- **升级容忍**: 首次检测到服务停止后等待 60 秒 (grace period),避免升级期间误报 + +#### Task 3 - Systemd 服务升级 +- **模板更新**: 废弃的 `MemoryLimit=` 替换为 `MemoryMax=` +- **Monitor 同步**: 模板同步到 `/etc/systemd/system/` +- **环境变量注入**: 两个 user-level service 文件添加 `EnvironmentFile=` +- **遗留服务**: 禁用并 masked 旧的系统级 `openclaw-gateway.service` +- **状态**: 所有 3 个服务已重启并确认 active + +#### Task 4 - deploy.sh 增强 +- **新命令**: + - `debug-stop` — 安全停止 monitor 防止调试期间自动重启 + - `debug-start` — 调试完成后恢复所有服务 + - `fix-service` — UI 升级后重新注入 `EnvironmentFile=` +- **Life Agent 集成**: `start/stop/restart/status/logs/health/install` 全部支持 life agent + +#### Task 5 - 统一架构文档 +- **文件**: `docs/EXTENSIONS_ARCHITECTURE.md` +- **内容**: 服务架构、监控系统、记忆系统交叉引用、环境变量、调试流程、升级安全清单 + +#### Task 6 - CORE_INDEX.md 更新 +- **文件树**: 新增 .env 文件、.legacy 重命名、新文档 +- **星标引用**: EXTENSIONS_ARCHITECTURE.md 列为关键参考 +- **升级指南**: 添加升级安全指令到模型使用指南 +- **管理命令**: 更新 deploy.sh 命令列表 + +### 当前系统状态 (2026-03-04 03:32 UTC) + +``` +● openclaw-gateway.service Active: active (running) 10h ago +● openclaw-gateway-life.service Active: active (running) 10h ago +● openclaw-agent-monitor.service Active: active (running) 10h ago +``` + +Monitor 心跳日志正常:每 10 分钟输出 `gateway=OK, life=OK` + +### 升级安全流程 + +```bash +# OpenClaw UI 升级后执行 +./deploy.sh fix-service # 重新注入 EnvironmentFile= +./deploy.sh restart # 重启所有服务 +./deploy.sh health # 验证健康状态 +``` + +### 关键文档 +- **扩展架构**: `docs/EXTENSIONS_ARCHITECTURE.md` — 修改基础设施前必读 +- **记忆系统**: `docs/MEMORY_ARCHITECTURE.md` — 四层记忆体系详细设计 +- **监控脚本**: `agent-monitor.js` — 健康监控与自动修复逻辑 + diff --git a/agent-monitor.js b/agent-monitor.js index fc23cc9..ad86a5a 100644 --- a/agent-monitor.js +++ b/agent-monitor.js @@ -28,6 +28,10 @@ class AgentHealthMonitor { this.restartCounts = new Map(); this.maxRestarts = 5; this.restartWindow = 300000; // 5 minutes + this.gracePeriod = 60000; // 60s grace period after first failure (upgrade tolerance) + this.heartbeatInterval = 600000; // 10 minutes + this.lastKnownState = { gateway: true, life: true }; + this.firstFailureTime = { gateway: 0, life: 0 }; this.ensureLogDir(); this.setupSignalHandlers(); @@ -242,10 +246,7 @@ class AgentHealthMonitor { async checkOpenClawGateway() { try { - // Use openclaw CLI for reliable status check (works with user-level systemd) const { stdout } = await execAsync('openclaw gateway status 2>&1 || echo "not running"'); - - // Check for various running states return stdout.includes('running') || stdout.includes('active') || stdout.includes('RPC probe: ok') || @@ -256,16 +257,32 @@ class AgentHealthMonitor { } } - async startOpenClawGateway() { + async checkLifeAgent() { try { - // Set up environment for user-level systemd const env = { ...process.env, XDG_RUNTIME_DIR: '/run/user/0', DBUS_SESSION_BUS_ADDRESS: 'unix:path=/run/user/0/bus' }; - - const { stdout, stderr } = await execAsync('openclaw gateway start', { env }); + const { stdout } = await execAsync( + 'systemctl --user is-active openclaw-gateway-life.service 2>&1 || echo "inactive"', + { env } + ); + return stdout.trim() === 'active'; + } catch (error) { + this.log(`Life agent status check error: ${error.message}`, 'error'); + return false; + } + } + + async startOpenClawGateway() { + const env = { + ...process.env, + XDG_RUNTIME_DIR: '/run/user/0', + DBUS_SESSION_BUS_ADDRESS: 'unix:path=/run/user/0/bus' + }; + try { + const { stdout } = await execAsync('openclaw gateway start', { env }); this.log(`OpenClaw Gateway started: ${stdout}`, 'info'); } catch (error) { this.log(`Failed to start OpenClaw Gateway: ${error.message}`, 'error'); @@ -273,34 +290,102 @@ class AgentHealthMonitor { } } + async startLifeAgent() { + const env = { + ...process.env, + XDG_RUNTIME_DIR: '/run/user/0', + DBUS_SESSION_BUS_ADDRESS: 'unix:path=/run/user/0/bus' + }; + try { + const { stdout } = await execAsync( + 'systemctl --user start openclaw-gateway-life.service', { env } + ); + this.log(`Life agent started: ${stdout}`, 'info'); + } catch (error) { + this.log(`Failed to start Life agent: ${error.message}`, 'error'); + throw error; + } + } + + async handleServiceDown(serviceName, startFn) { + const now = Date.now(); + + if (this.lastKnownState[serviceName]) { + this.firstFailureTime[serviceName] = now; + this.lastKnownState[serviceName] = false; + this.log(`${serviceName} detected down, entering grace period (${this.gracePeriod / 1000}s)...`, 'warning'); + return; + } + + if (now - this.firstFailureTime[serviceName] < this.gracePeriod) { + return; + } + + if (!this.checkRestartLimit(serviceName)) { + await this.sendNotification( + `${serviceName} crashed ${this.maxRestarts} times in ${this.restartWindow / 60000} min. Auto-restart disabled until window resets.`, + 'critical' + ); + return; + } + + await this.sendNotification(`${serviceName} is down. Attempting restart...`, 'error'); + try { + await startFn(); + this.lastKnownState[serviceName] = true; + this.firstFailureTime[serviceName] = 0; + await this.sendNotification(`${serviceName} restarted successfully`, 'info'); + } catch (error) { + await this.sendNotification(`Failed to restart ${serviceName}: ${error.message}`, 'critical'); + } + } + async monitorOpenClawService() { - this.log('Starting OpenClaw Gateway monitoring...', 'info'); - - // Check every 30 seconds + this.log('Starting service monitoring (gateway + life)...', 'info'); + let heartbeatCounter = 0; + setInterval(async () => { - const isRunning = await this.checkOpenClawGateway(); - - if (!isRunning) { - this.log('OpenClaw Gateway is not running! Attempting to restart...', 'critical'); - await this.sendNotification('🚨 OpenClaw Gateway stopped unexpectedly. Restarting...', 'critical'); - - try { - await this.startOpenClawGateway(); - await this.sendNotification('✅ OpenClaw Gateway has been restarted successfully', 'info'); - } catch (error) { - await this.sendNotification(`❌ Failed to restart OpenClaw Gateway: ${error.message}`, 'critical'); + const gatewayOk = await this.checkOpenClawGateway(); + const lifeOk = await this.checkLifeAgent(); + + if (gatewayOk) { + if (!this.lastKnownState.gateway) { + this.log('Gateway recovered', 'info'); + } + this.lastKnownState.gateway = true; + this.firstFailureTime.gateway = 0; + } else { + await this.handleServiceDown('gateway', () => this.startOpenClawGateway()); + } + + if (lifeOk) { + if (!this.lastKnownState.life) { + this.log('Life agent recovered', 'info'); } + this.lastKnownState.life = true; + this.firstFailureTime.life = 0; + } else { + await this.handleServiceDown('life', () => this.startLifeAgent()); + } + + heartbeatCounter++; + if (heartbeatCounter >= (this.heartbeatInterval / 30000)) { + this.log(`Heartbeat: gateway=${gatewayOk ? 'OK' : 'DOWN'}, life=${lifeOk ? 'OK' : 'DOWN'}`, 'info'); + heartbeatCounter = 0; } }, 30000); } async start() { this.log('Agent Health Monitor starting...', 'info'); - - // Monitor OpenClaw Gateway service + + const gatewayOk = await this.checkOpenClawGateway(); + const lifeOk = await this.checkLifeAgent(); + this.log(`Initial check: gateway=${gatewayOk ? 'OK' : 'DOWN'}, life=${lifeOk ? 'OK' : 'DOWN'}`, 'info'); + this.lastKnownState.gateway = gatewayOk; + this.lastKnownState.life = lifeOk; + await this.monitorOpenClawService(); - - // Keep the monitor running this.log('Monitor is now active. Press Ctrl+C to stop.', 'info'); } } diff --git a/deploy.sh b/deploy.sh index 1dfdaac..5a1a9ae 100755 --- a/deploy.sh +++ b/deploy.sh @@ -11,14 +11,18 @@ # - Telegram notifications # # Usage: -# ./deploy.sh install - Install and start all services -# ./deploy.sh start - Start all services -# ./deploy.sh stop - Stop all services -# ./deploy.sh restart - Restart all services -# ./deploy.sh status - Show service status -# ./deploy.sh logs - Show recent logs -# ./deploy.sh rollback - Rollback to previous git commit -# ./deploy.sh backup - Create backup of current state +# ./deploy.sh install - Install and start all services +# ./deploy.sh start - Start all services +# ./deploy.sh stop - Stop all services +# ./deploy.sh restart - Restart all services +# ./deploy.sh status - Show service status +# ./deploy.sh logs - Show recent logs +# ./deploy.sh health - Run health check +# ./deploy.sh rollback - Rollback to previous git commit +# ./deploy.sh backup - Create backup of current state +# ./deploy.sh debug-stop - Stop ALL services (including monitor) for debugging +# ./deploy.sh debug-start - Start ALL services after debugging +# ./deploy.sh fix-service - Re-inject EnvironmentFile= after OpenClaw UI upgrade ############################################################################### set -e @@ -62,10 +66,8 @@ install_services() { loginctl enable-linger $(whoami) # Step 2: Export required environment variables - export XDG_RUNTIME_DIR=/run/user/$(id -u) - export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/$(id -u)/bus" + setup_user_env - # Verify environment if [ ! -d "$XDG_RUNTIME_DIR" ]; then log_error "XDG_RUNTIME_DIR not found: $XDG_RUNTIME_DIR" log_warning "Creating runtime directory..." @@ -73,99 +75,177 @@ install_services() { chmod 700 "$XDG_RUNTIME_DIR" fi - # Step 3: Install user-level gateway service - log_info "Installing user-level gateway service..." + # Step 3: Install user-level gateway services + log_info "Installing user-level gateway services..." mkdir -p ~/.config/systemd/user/ cp "$WORKSPACE/systemd/openclaw-gateway-user.service" ~/.config/systemd/user/openclaw-gateway.service + cp "$WORKSPACE/systemd/agent-life.service" ~/.config/systemd/user/openclaw-gateway-life.service - # Reload user systemd daemon systemctl --user daemon-reload systemctl --user enable openclaw-gateway + systemctl --user enable openclaw-gateway-life - # Step 4: Install system-level agent monitor (independent of user session) + # Step 4: Install system-level agent monitor log_info "Installing system-level agent monitor..." cp "$WORKSPACE/systemd/openclaw-agent-monitor.service" /etc/systemd/system/ systemctl daemon-reload systemctl enable openclaw-agent-monitor - # Step 5: Start services + # Step 5: Inject EnvironmentFile references + fix_service_files + + # Step 6: Start services log_info "Starting services..." systemctl --user start openclaw-gateway + systemctl --user start openclaw-gateway-life systemctl start openclaw-agent-monitor - # Wait for gateway to be ready sleep 3 log_success "OpenClaw services installed and started!" log_info "Gateway: ws://localhost:18789" - log_info "Dashboard: http://localhost:18789/" + log_info "Life Agent: openclaw-gateway-life.service" log_info "User service logs: journalctl --user -u openclaw-gateway -f" - log_info "Monitor logs: journalctl -u openclaw-agent-monitor -f" + log_info "Life agent logs: journalctl --user -u openclaw-gateway-life -f" + log_info "Monitor logs: journalctl -u openclaw-agent-monitor -f" } -start_services() { - log_info "Starting OpenClaw services..." - - # Set up environment for user-level services +setup_user_env() { export XDG_RUNTIME_DIR=/run/user/$(id -u) export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/$(id -u)/bus" +} + +start_services() { + log_info "Starting OpenClaw services..." + setup_user_env systemctl --user start openclaw-gateway + systemctl --user start openclaw-gateway-life systemctl start openclaw-agent-monitor - log_success "Services started!" + log_success "All services started (gateway + life + monitor)" } stop_services() { log_info "Stopping OpenClaw services..." - - # Set up environment for user-level services - export XDG_RUNTIME_DIR=/run/user/$(id -u) - export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/$(id -u)/bus" + setup_user_env systemctl --user stop openclaw-gateway + systemctl --user stop openclaw-gateway-life systemctl stop openclaw-agent-monitor - log_success "Services stopped!" + log_success "All services stopped" } restart_services() { log_info "Restarting OpenClaw services..." - - # Set up environment for user-level services - export XDG_RUNTIME_DIR=/run/user/$(id -u) - export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/$(id -u)/bus" + setup_user_env systemctl --user restart openclaw-gateway + systemctl --user restart openclaw-gateway-life systemctl restart openclaw-agent-monitor - log_success "Services restarted!" + log_success "All services restarted (gateway + life + monitor)" +} + +debug_stop() { + log_warning "=== DEBUG MODE: Stopping ALL services ===" + log_warning "Monitor will NOT auto-restart gateway while in debug mode." + log_warning "Run './deploy.sh debug-start' when done debugging." + setup_user_env + + systemctl stop openclaw-agent-monitor 2>/dev/null || true + systemctl --user stop openclaw-gateway 2>/dev/null || true + systemctl --user stop openclaw-gateway-life 2>/dev/null || true + + log_success "All services stopped. Safe to debug." + echo "" + log_info "Useful debug commands:" + log_info " openclaw gateway start # start gateway in foreground" + log_info " journalctl --user -u openclaw-gateway -n 100" + log_info " journalctl --user -u openclaw-gateway-life -n 100" +} + +debug_start() { + log_info "=== Exiting DEBUG MODE: Restarting ALL services ===" + setup_user_env + + systemctl --user start openclaw-gateway + systemctl --user start openclaw-gateway-life + systemctl start openclaw-agent-monitor + + sleep 2 + log_success "All services restored. Monitor is active again." + health_check +} + +fix_service_files() { + log_info "Ensuring EnvironmentFile= is present in installed service files..." + setup_user_env + + local gateway_svc="$HOME/.config/systemd/user/openclaw-gateway.service" + local life_svc="$HOME/.config/systemd/user/openclaw-gateway-life.service" + local gateway_env="$WORKSPACE/systemd/gateway.env" + local life_env="$WORKSPACE/systemd/life-gateway.env" + local changed=0 + + if [ -f "$gateway_svc" ]; then + if ! grep -q "EnvironmentFile=.*gateway.env" "$gateway_svc" 2>/dev/null; then + sed -i "/^\[Service\]/a EnvironmentFile=-${gateway_env}" "$gateway_svc" + log_info "Injected EnvironmentFile into openclaw-gateway.service" + changed=1 + else + log_info "openclaw-gateway.service already has EnvironmentFile" + fi + fi + + if [ -f "$life_svc" ]; then + if ! grep -q "EnvironmentFile=.*life-gateway.env" "$life_svc" 2>/dev/null; then + sed -i "/^\[Service\]/a EnvironmentFile=-${life_env}" "$life_svc" + log_info "Injected EnvironmentFile into openclaw-gateway-life.service" + changed=1 + else + log_info "openclaw-gateway-life.service already has EnvironmentFile" + fi + fi + + if [ $changed -eq 1 ]; then + systemctl --user daemon-reload + log_success "Service files updated. Run './deploy.sh restart' to apply." + else + log_success "All service files are up to date." + fi } show_status() { - # Set up environment for user-level services - export XDG_RUNTIME_DIR=/run/user/$(id -u) - export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/$(id -u)/bus" + setup_user_env echo "" - log_info "=== OpenClaw Gateway Status (User Service) ===" - systemctl --user status openclaw-gateway --no-pager -l + log_info "=== OpenClaw Gateway (User Service) ===" + systemctl --user status openclaw-gateway --no-pager -l 2>&1 || true + echo "" + log_info "=== Life Agent (User Service) ===" + systemctl --user status openclaw-gateway-life --no-pager -l 2>&1 || true echo "" - log_info "=== Agent Monitor Status (System Service) ===" - systemctl status openclaw-agent-monitor --no-pager -l + log_info "=== Agent Monitor (System Service) ===" + systemctl status openclaw-agent-monitor --no-pager -l 2>&1 || true echo "" log_info "=== Recent Gateway Logs ===" - journalctl --user -u openclaw-gateway --no-pager -n 15 + journalctl --user -u openclaw-gateway --no-pager -n 10 + echo "" + log_info "=== Recent Life Agent Logs ===" + journalctl --user -u openclaw-gateway-life --no-pager -n 10 echo "" log_info "=== Recent Monitor Logs ===" - journalctl -u openclaw-agent-monitor --no-pager -n 15 + journalctl -u openclaw-agent-monitor --no-pager -n 10 } show_logs() { - # Set up environment for user-level services - export XDG_RUNTIME_DIR=/run/user/$(id -u) - export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/$(id -u)/bus" + setup_user_env log_info "Showing recent gateway logs (last 50 lines)..." journalctl --user -u openclaw-gateway --no-pager -n 50 echo "" + log_info "Showing recent life agent logs (last 50 lines)..." + journalctl --user -u openclaw-gateway-life --no-pager -n 50 + echo "" log_info "Showing recent monitor logs (last 50 lines)..." journalctl -u openclaw-agent-monitor --no-pager -n 50 } @@ -235,24 +315,26 @@ backup() { health_check() { log_info "Running health check..." - - # Set up environment for user-level services - export XDG_RUNTIME_DIR=/run/user/$(id -u) - export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/$(id -u)/bus" + setup_user_env local issues=0 - # Check gateway (user-level service) if systemctl --user is-active --quiet openclaw-gateway 2>/dev/null; then - log_success "✓ Gateway is running (user service)" + log_success "✓ Gateway is running" else log_error "✗ Gateway is not running" ((issues++)) fi - # Check monitor (system-level service) + if systemctl --user is-active --quiet openclaw-gateway-life 2>/dev/null; then + log_success "✓ Life Agent is running" + else + log_error "✗ Life Agent is not running" + ((issues++)) + fi + if systemctl is-active --quiet openclaw-agent-monitor; then - log_success "✓ Agent Monitor is running (system service)" + log_success "✓ Agent Monitor is running" else log_error "✗ Agent Monitor is not running" ((issues++)) @@ -308,17 +390,20 @@ show_help() { echo "Usage: $0 " echo "" echo "Commands:" - echo " install - Install and start all systemd services" - echo " start - Start all services" - echo " stop - Stop all services" - echo " restart - Restart all services" - echo " status - Show service status" - echo " logs - Show recent logs" - echo " health - Run health check" - echo " backup - Create backup of current state" - echo " rollback - Rollback to previous git commit" - echo " rollback-to - Rollback to specific commit" - echo " help - Show this help message" + echo " install - Install and start all systemd services" + echo " start - Start all services (gateway + life + monitor)" + echo " stop - Stop all services" + echo " restart - Restart all services" + echo " status - Show service status" + echo " logs - Show recent logs" + echo " health - Run health check" + echo " backup - Create backup of current state" + echo " rollback - Rollback to previous git commit" + echo " rollback-to - Rollback to specific commit" + echo " debug-stop - Stop ALL services including monitor (safe for debugging)" + echo " debug-start - Restart all services after debugging" + echo " fix-service - Re-inject EnvironmentFile after OpenClaw UI upgrade" + echo " help - Show this help message" echo "" } @@ -354,6 +439,15 @@ case "${1:-help}" in rollback-to) rollback_to "$2" ;; + debug-stop) + debug_stop + ;; + debug-start) + debug_start + ;; + fix-service) + fix_service_files + ;; help|--help|-h) show_help ;; diff --git a/docs/EXTENSIONS_ARCHITECTURE.md b/docs/EXTENSIONS_ARCHITECTURE.md new file mode 100644 index 0000000..457ff5f --- /dev/null +++ b/docs/EXTENSIONS_ARCHITECTURE.md @@ -0,0 +1,267 @@ +# OpenClaw Extensions Architecture + +**版本:** 1.0 +**日期:** 2026-03-03 +**维护者:** Eason (陈医生) + +> **重要提示**: 本文档是所有自定义扩展的权威参考。在修改任何基础设施代码(systemd 服务、监控脚本、部署脚本、记忆系统)之前,必须阅读本文档。OpenClaw UI 升级可能覆盖部分文件,请参考第 7 节的升级安全清单。 + +--- + +## 1. 扩展概览 + +OpenClaw 核心是上游提供的 AI Agent 网关。以下组件为自定义扩展,不属于上游代码,升级时需特别保护: + +| 扩展组件 | 位置 | 说明 | +|---------|------|------| +| 四层记忆系统 | `skills/mem0-integration/` | Mem0 + Qdrant + FTS5 本地检索 | +| Agent Monitor | `agent-monitor.js` | 健康监控、自动重启、Telegram 通知 | +| 部署脚本 | `deploy.sh` | 服务管理、备份、回滚、调试命令 | +| 环境变量文件 | `systemd/gateway.env`, `systemd/life-gateway.env` | 升级安全的环境变量持久化 | +| Systemd 服务模板 | `systemd/` | 用户级和系统级服务定义 | +| 项目注册表 | `skills/mem0-integration/project_registry.yaml` | Agent-项目归属映射 | + +--- + +## 2. 服务架构 (Hybrid Systemd) + +### 架构图 + +``` +┌─────────────────────────────────────────────────────────┐ +│ System-level (/etc/systemd/system/) │ +│ ┌───────────────────────────────────┐ │ +│ │ openclaw-agent-monitor.service │ ← 健康监控守护进程 │ +│ │ (Node.js, PID独立于gateway) │ │ +│ └────────────┬──────────────────────┘ │ +│ │ monitors │ +├───────────────┼─────────────────────────────────────────┤ +│ User-level (~/.config/systemd/user/) │ +│ ┌────────────▼──────────────────────┐ │ +│ │ openclaw-gateway.service │ ← 主 Gateway │ +│ │ (port 18789) │ │ +│ │ EnvironmentFile=gateway.env │ │ +│ ├───────────────────────────────────┤ │ +│ │ openclaw-gateway-life.service │ ← Life Agent │ +│ │ (张大师) │ │ +│ │ EnvironmentFile=life-gateway.env │ │ +│ └───────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────┘ +``` + +### User-level vs System-level + +| 属性 | User-level (gateway) | System-level (monitor) | +|------|---------------------|----------------------| +| 路径 | `~/.config/systemd/user/` | `/etc/systemd/system/` | +| 管理命令 | `systemctl --user ...` | `systemctl ...` | +| 日志查看 | `journalctl --user -u ` | `journalctl -u ` | +| 升级风险 | **高** — OpenClaw UI 可能覆盖 | 低 — 不受 UI 升级影响 | +| 依赖 | 需要 `loginctl enable-linger` | 无特殊依赖 | + +### 文件映射 + +| 模板 (workspace/systemd/) | 安装位置 | 说明 | +|--------------------------|---------|------| +| `openclaw-gateway-user.service` | `~/.config/systemd/user/openclaw-gateway.service` | 主 Gateway | +| `agent-life.service` | `~/.config/systemd/user/openclaw-gateway-life.service` | Life Agent | +| `openclaw-agent-monitor.service` | `/etc/systemd/system/openclaw-agent-monitor.service` | 监控 | +| `openclaw-gateway.service.legacy` | `/etc/systemd/system/openclaw-gateway.service` (已 masked) | 废弃 | +| `gateway.env` | 原地引用 (不复制) | 主 Gateway 环境变量 | +| `life-gateway.env` | 原地引用 (不复制) | Life Agent 环境变量 | + +--- + +## 3. 监控系统 (Agent Monitor) + +**文件**: `agent-monitor.js` +**服务**: `openclaw-agent-monitor.service` (system-level) + +### 功能 + +| 功能 | 说明 | +|------|------| +| 双服务监控 | 同时监控 gateway 和 life agent | +| 重启限制 | 5 分钟内最多 5 次重启,超限停止并报警 | +| 升级容忍 | 首次检测到服务停止后等待 60 秒,避免升级期间误报 | +| 心跳日志 | 每 10 分钟输出一次状态 (`gateway=OK, life=OK`) | +| Telegram 通知 | 服务异常、重启失败时发送告警 | +| 日志记录 | `logs/agents/health-YYYY-MM-DD.log` | + +### 监控流程 + +``` +每 30 秒 → 检查 gateway 状态 + → 检查 life agent 状态 + → 如果正常: 重置故障计时器 + → 如果异常: + 首次: 记录时间,进入 grace period (60s) + 仍异常且已过 grace period: + 检查重启次数 → 未超限: 执行重启 + → 已超限: 发送 critical 告警 +每 10 分钟 → 输出心跳日志 +``` + +### 配置参数 (构造函数) + +| 参数 | 默认值 | 说明 | +|------|--------|------| +| `maxRestarts` | 5 | 重启窗口内最大重启次数 | +| `restartWindow` | 300000 (5min) | 重启计数窗口 | +| `gracePeriod` | 60000 (60s) | 首次故障后的等待时间 | +| `heartbeatInterval` | 600000 (10min) | 心跳日志间隔 | + +--- + +## 4. 记忆系统 + +> 完整文档: `docs/MEMORY_ARCHITECTURE.md` (v2.1) +> 开发者文档: `skills/mem0-integration/SKILL.md` + +### 快速参考 + +- **Layer 1**: Core Memory — MD 文件 (CORE_INDEX.md, IDENTITY.md, SOUL.md) +- **Layer 2**: Daily Memory — MEMORY.md + memory/*.md, Git 版本控制 +- **Layer 3**: Short-term — SQLite FTS5 本地检索 (`local_search.py`) +- **Layer 4**: Mem0 — Qdrant (`mem0_v4_shared`) + DashScope Embedding + +### 关键依赖 + +| 依赖 | 版本 | 用途 | +|------|------|------| +| Qdrant | 1.15.3 (Docker) | Layer 4 向量存储 | +| mem0ai | latest | Layer 4 客户端 | +| DashScope API | text-embedding-v4 | 1024 维嵌入 | +| SQLite FTS5 | Python stdlib | Layer 3 全文检索 | + +--- + +## 5. 环境变量与 API 密钥 + +### 持久化策略 + +环境变量存放在 `.env` 文件中,通过 `EnvironmentFile=` 指令注入 systemd 服务。这种方式确保 OpenClaw UI 升级覆盖 `.service` 文件后,只需执行 `./deploy.sh fix-service` 即可恢复。 + +| 文件 | 权限 | 被引用者 | +|------|------|---------| +| `systemd/gateway.env` | 600 | openclaw-gateway.service | +| `systemd/life-gateway.env` | 600 | openclaw-gateway-life.service | + +### 变量清单 + +| 变量名 | 说明 | 使用者 | +|--------|------|--------| +| `MEM0_DASHSCOPE_API_KEY` | DashScope API 密钥 | mem0_client.py, session_init.py | +| `OPENAI_API_BASE` | DashScope 兼容端点 | mem0 SDK (旧版参数名) | +| `OPENAI_BASE_URL` | DashScope 兼容端点 | mem0 SDK (新版参数名) | +| `TAVILY_API_KEY` | Tavily 搜索 API | OpenClaw 核心 | + +### 添加新变量 + +1. 编辑 `systemd/gateway.env` (和/或 `life-gateway.env`) +2. 运行 `systemctl --user daemon-reload` +3. 运行 `./deploy.sh restart` + +--- + +## 6. 调试流程 + +### 停止所有服务 (含监控) + +```bash +./deploy.sh debug-stop +``` + +这会停止 gateway、life agent 和 monitor,防止 monitor 在调试期间自动重启 gateway。 + +### 手动启动 Gateway (前台模式) + +```bash +openclaw gateway start +``` + +### 查看日志 + +```bash +journalctl --user -u openclaw-gateway -f # 主 gateway +journalctl --user -u openclaw-gateway-life -f # life agent +journalctl -u openclaw-agent-monitor -f # monitor +``` + +### 恢复服务 + +```bash +./deploy.sh debug-start +``` + +### 修复升级后的服务文件 + +```bash +./deploy.sh fix-service +./deploy.sh restart +``` + +--- + +## 7. 升级安全清单 + +OpenClaw UI 升级 (`openclaw gateway install` 或类似操作) 可能覆盖以下文件: + +### 会被覆盖的文件 + +| 文件 | 风险 | 恢复方式 | +|------|------|---------| +| `~/.config/systemd/user/openclaw-gateway.service` | `EnvironmentFile=` 行丢失 | `./deploy.sh fix-service` | +| OpenClaw 二进制 / Node 模块 | 正常升级行为 | 无需恢复 | + +### 不会被覆盖的文件 + +| 文件 | 说明 | +|------|------| +| `workspace/systemd/gateway.env` | 环境变量安全 | +| `workspace/systemd/life-gateway.env` | 环境变量安全 | +| `workspace/agent-monitor.js` | 自定义监控逻辑 | +| `workspace/deploy.sh` | 部署脚本 | +| `workspace/skills/mem0-integration/*` | 记忆系统代码 | +| `/etc/systemd/system/openclaw-agent-monitor.service` | 系统级服务 | + +### 升级后操作 + +```bash +# 1. 恢复环境变量引用 +./deploy.sh fix-service + +# 2. 重启所有服务 +./deploy.sh restart + +# 3. 验证服务状态 +./deploy.sh health +``` + +--- + +## deploy.sh 命令速查 + +| 命令 | 说明 | +|------|------| +| `install` | 安装所有 systemd 服务并启动 | +| `start` | 启动 gateway + life + monitor | +| `stop` | 停止所有服务 | +| `restart` | 重启所有服务 | +| `status` | 显示所有服务状态 | +| `logs` | 显示最近日志 | +| `health` | 运行健康检查 | +| `backup` | 创建工作区备份 | +| `rollback` | 回滚到上一个 Git 提交 | +| `rollback-to ` | 回滚到指定提交 | +| `debug-stop` | 停止所有服务 (含 monitor),安全调试 | +| `debug-start` | 调试完成后恢复所有服务 | +| `fix-service` | 升级后重新注入 EnvironmentFile= | + +--- + +## 变更日志 + +| 版本 | 日期 | 变更 | +|------|------|------| +| 1.0 | 2026-03-03 | 初始版本: 统一记忆系统与监控系统文档 | diff --git a/docs/MEM0_ARCHITECTURE.md b/docs/MEM0_ARCHITECTURE.md index cad0843..0b74c73 100644 --- a/docs/MEM0_ARCHITECTURE.md +++ b/docs/MEM0_ARCHITECTURE.md @@ -1,7 +1,15 @@ # mem0 记忆系统架构文档 +> **已废弃 (Deprecated):** 本文档描述的是 v1.0 双 Collection (local + master) 架构,已于 2026-02-28 迁移至单库融合架构。 +> +> **请参阅最新文档:** +> - 四层记忆架构: [`docs/MEMORY_ARCHITECTURE.md`](./MEMORY_ARCHITECTURE.md) (v2.1) +> - Skill 开发者指南: [`skills/mem0-integration/SKILL.md`](/root/.openclaw/workspace/skills/mem0-integration/SKILL.md) +> +> 本文件保留作为部署流程和故障排除的参考。基础设施部分(Docker、Tailscale、端口配置)仍然有效。 + ## 版本信息 -- **文档版本**: 1.0.0 +- **文档版本**: 1.0.0 (已废弃) - **创建日期**: 2026-02-22 - **最后更新**: 2026-02-22 - **部署环境**: Ubuntu 24.04 LTS, Docker 29.2.1 diff --git a/docs/MEMORY_ARCHITECTURE.md b/docs/MEMORY_ARCHITECTURE.md new file mode 100644 index 0000000..18f764a --- /dev/null +++ b/docs/MEMORY_ARCHITECTURE.md @@ -0,0 +1,324 @@ +# 四层记忆架构 (Memory Layer Architecture) + +**版本:** 2.1 +**日期:** 2026-03-01 +**维护者:** Eason (陈医生) + +--- + +## 架构概览 + +OpenClaw 采用四层记忆体系,从本地文件到分布式向量数据库逐层递进,兼顾离线可用性与跨 Agent 共享能力。 + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ Memory Layer 1: Core Memory (核心记忆) │ +│ MD 文件 — CORE_INDEX.md / IDENTITY.md / SOUL.md 等 │ +│ 启动时首先加载,定义 Agent 身份与行为准则 │ +│ 作用域: 每个 Agent 独立 │ +├──────────────────────────────────────────────────────────────────┤ +│ Memory Layer 2: Daily Memory (日常记忆) │ +│ MEMORY.md (长期策略) + memory/*.md (每日记录) │ +│ Git 版本控制保护,支持回溯 │ +│ 作用域: 每个 Agent 独立,Git 备份 │ +├──────────────────────────────────────────────────────────────────┤ +│ Memory Layer 3: Short-term Memory (短期记忆 / QMD) │ +│ SQLite FTS5 全文检索 + 可选 GGUF 本地向量 │ +│ 离线可用,Layer 4 不可达时自动接管 │ +│ 作用域: 每个 Agent 独立,纯本地 │ +├──────────────────────────────────────────────────────────────────┤ +│ Memory Layer 4: Mem0 Conversation Memory (对话记忆) │ +│ Qdrant (mem0_v4_shared) + text-embedding-v4 (1024 维) │ +│ 通过 Tailscale 可跨服务器共享 │ +│ 三级可见性: public / project / private │ +│ 元数据隔离: visibility + project_id + agent_id │ +│ 记忆衰减: expiration_date (7d / 30d / permanent) │ +└──────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Layer 1: Core Memory (核心记忆) + +**存储介质:** Markdown 文件 +**符合度:** 90% + +### 关键文件 + +| 文件 | 用途 | 加载时机 | +|------|------|----------| +| `CORE_INDEX.md` | 核心索引,结构总览 | 会话启动时首先加载 | +| `IDENTITY.md` | Agent 身份定义 | 会话启动 | +| `SOUL.md` | 人格与行为准则 | 会话启动 | +| `USER.md` | 用户信息 | 会话启动 | +| `AGENTS.md` | Agent 运维指南 | 按需加载 | +| `TOOLS.md` | 工具配置 | 按需加载 | + +### 每个 Agent 的核心文件 + +- **Eason (main):** `/root/.openclaw/workspace/` +- **张大师 (life):** `/root/.openclaw/workspace/agents/life-workspace/` + +### 差距与待改进 + +- 缺少跨 Agent 的共享核心记忆索引 +- 未来可通过 `shared/` 目录实现集群通用规则 + +--- + +## Layer 2: Daily Memory (日常记忆) + +**存储介质:** Markdown 文件 + Git +**符合度:** 85% + +### 文件结构 + +- `MEMORY.md` — 长期决策、安全模板、架构要点 (380+ 行) +- `memory/*.md` — 每日记忆文件 +- `memory_strategy.md` — 记忆管理策略文档 + +### 差距与待改进 + +- MEMORY.md 混合了"长期决策"和"配置模板",需结构化分类 +- 日常记忆文件命名不统一 +- 缺乏自动归档/淘汰机制 + +--- + +## Layer 3: Short-term Memory (短期记忆 / QMD) + +**存储介质:** SQLite (FTS5) + 可选 GGUF 向量 +**符合度:** 60% + +### 当前实现 + +- QMD 系统为每个 Agent 维护独立 SQLite 索引 +- Main: `/root/.openclaw/agents/main/qmd/xdg-cache/qmd/index.sqlite` +- Life: `/root/.openclaw/agents/life/qmd/xdg-cache/qmd/index.sqlite` +- 自动索引 `MEMORY.md` 和 `memory/**/*.md` + +### 硬件限制 + +- CPU: 2 核 Xeon E3-12xx v2 (2.7GHz, AVX, 无 AVX2) +- RAM: 3.8GB 总量,可用 ~850MB +- GPU: 无 + +### 两阶段策略 + +**阶段 A: SQLite FTS5 全文检索** (零额外内存) +- 覆盖 80% 离线检索需求 +- 中文分词 + 关键词/短语搜索 + +**阶段 B: GGUF 按需加载** (需 >= 300MB 空闲内存) +- 模型: `bge-small-zh-v1.5` Q4_K_M (~50MB) +- 不常驻内存,用完释放 +- Layer 4 不可达时自动切换 + +--- + +## Layer 4: Mem0 Conversation Memory (对话记忆) + +**存储介质:** Qdrant + text-embedding-v4 (1024 维) +**符合度:** 85% + +### 技术栈 + +| 组件 | 技术 | 配置 | +|------|------|------| +| 向量数据库 | Qdrant v1.15.3 | localhost:6333 | +| Collection | mem0_v4_shared | 统一共享 | +| Embedding | text-embedding-v4 | 1024 维度 | +| LLM | DashScope Qwen Plus | 记忆提取/合并 | +| 网络 | Tailscale | 跨服务器访问 | + +### 三级可见性 + +| 可见性 | 字段值 | 检索规则 | 适用场景 | +|--------|--------|----------|----------| +| **public** | `visibility=public` | 所有 Agent 可检索 | 集群通用信息 | +| **project** | `visibility=project` | 同 `project_id` 成员可检索 | 项目共享知识 | +| **private** | `visibility=private` | 仅 `agent_id` 本人可检索 | Agent 私有记忆 | + +### 记忆衰减策略 + +| 记忆类型 | 过期时间 | 示例 | +|----------|----------|------| +| session | 7 天 | "正在讨论服务器部署" | +| chat_summary | 30 天 | "上周讨论了 Qdrant 迁移方案" | +| preference | 永不过期 | "用户偏好 Tailscale 组网" | +| knowledge | 永不过期 | "Qdrant 部署在 6333 端口" | + +### 数据流 + +``` +用户消息 → 选择性过滤 → Post-Hook 异步写入 → Qdrant + ↓ + 自动设置 expiration_date + 自动标注 visibility / project_id + ↓ + Mem0 ADD/UPDATE/DELETE/NOOP +``` + +--- + +## 基础设施支撑 (与记忆层正交) + +| 基础设施 | 保护的记忆层 | 职责 | +|----------|-------------|------| +| Git | Layer 1 + Layer 2 | 版本控制、备份、回溯 | +| Monitoring (systemd) | Layer 4 | 监控 Gateway/Qdrant 健康状态 | +| Tailscale | Layer 4 | 跨服务器安全通信 | + +--- + +## 多 Agent 集群支持 + +### 同一服务器 (单实例多 Agent) + +- 共享同一 Gateway 实例 +- 通过 Session 隔离各 Agent 上下文 +- 共享 Qdrant Collection,metadata 软隔离 + +### 跨服务器 (多实例多 Agent) + +- 通过 Tailscale VPN 连接中心 Qdrant +- `project_registry.yaml` 管理 Agent-项目映射 +- `visibility` 字段控制记忆可见性 + +### 项目注册表 + +位置: `/root/.openclaw/workspace/skills/mem0-integration/project_registry.yaml` + +管理 Agent 与项目的归属关系,决定 project 级记忆的访问权限。 + +--- + +## 跨服务器多 Agent 集群 + +### 网络拓扑 + +``` +┌──────────────────────────────────────────────────────────┐ +│ Tailscale VPN (WireGuard) │ +│ │ +│ ┌──────────────────┐ ┌──────────────────┐ │ +│ │ Server 1 (VPS) │ │ Server 2 │ │ +│ │ 100.115.94.1 │ │ 100.64.x.x │ │ +│ │ │ │ │ │ +│ │ Qdrant Master │◄─────│ Agent-C │ │ +│ │ :6333 │ │ (remote) │ │ +│ │ │ └──────────────────┘ │ +│ │ Agent-A (main) │ │ +│ │ Agent-B (life) │ ┌──────────────────┐ │ +│ │ │ │ Server 3 │ │ +│ │ │◄─────│ 100.64.x.x │ │ +│ │ │ │ Agent-D │ │ +│ └──────────────────┘ └──────────────────┘ │ +└──────────────────────────────────────────────────────────┘ +``` + +### 各层的集群行为 + +| 记忆层 | 同服务器多 Agent | 跨服务器多 Agent | +|--------|-----------------|-----------------| +| Layer 1 (Core) | 各 Agent 独立工作区 | 各服务器独立文件系统 | +| Layer 2 (Daily) | 各 Agent 独立 memory/ | 各服务器独立,Git 同步 | +| Layer 3 (QMD) | 各 Agent 独立 SQLite | 各服务器独立,纯本地 | +| Layer 4 (Mem0) | 共享 Qdrant,metadata 隔离 | 通过 Tailscale 连接中心 Qdrant | + +### 跨服务器 Agent 接入步骤 + +1. 新服务器安装 Tailscale 并加入同一 tailnet +2. 配置 mem0 的 Qdrant host 指向中心节点 Tailscale IP +3. 在 `project_registry.yaml` 中注册 agent 及其所属项目 +4. 在 `agents/registry.md` 中登记新 Agent + +### visibility 如何实现三种记忆隔离 + +``` +通用信息 (全集群共享): + 写入: visibility=public + 检索: 所有 agent 的 Phase 1 自动检索 public 记忆 + +项目记忆 (项目内共享): + 写入: visibility=project, project_id=<项目标识> + 检索: Phase 2 查 project_registry.yaml 获取 agent 所属项目列表 + 仅检索自己所属项目的记忆 + +私密记忆 (仅自身可见): + 写入: visibility=private, agent_id=<自身> + 检索: Phase 3 仅检索 agent_id 匹配的私密记忆 +``` + +### 安全措施 + +- Tailscale WireGuard 端到端加密传输 +- Qdrant 仅绑定 127.0.0.1,不暴露公网 +- Pre-hook 强制注入 agent_id filter,防止跨域访问 +- 审计日志记录所有跨域检索尝试 + +### 扩展路线 + +- **短期**: 单 Qdrant 实例 + Tailscale 远程访问 (当前) +- **中期**: Qdrant 快照定期备份,灾备恢复 +- **长期**: Qdrant 集群模式或 Qdrant Cloud (按负载决定) + +--- + +## 开发者注意事项 + +> 详细代码级文档: `skills/mem0-integration/SKILL.md` + +### mem0 Python SDK 与 Qdrant 原生 API 的区别 + +| 操作 | mem0 SDK (mem0_client.py) | Qdrant 原生 (memory_cleanup.py) | +|------|--------------------------|-------------------------------| +| filter | 扁平 dict: `{"key": "val"}` | `Filter(must=[FieldCondition(...)])` | +| 多条件 | 多 key 隐式 AND: `{"a": 1, "b": 2}` | `Filter(must=[cond1, cond2])` | +| 搜索 | `m.search(query, filters=...)` | `client.search(collection, query_vector, ...)` | + +混用格式是常见 bug 来源。mem0 `search(filters=...)` 不支持 Qdrant 的嵌套 `{"AND": [...]}` 语法。 + +### agent_id 双写 + +`mem0.add()` 需要同时传递 `agent_id` 为顶层参数和 metadata 字段。顶层参数供 mem0 内部索引,metadata 字段供自定义 filter 检索。漏写任一会导致特定检索路径失效。 + +### Layer 3 FTS5 分词 + +使用字符级分词,仅保留 CJK 统一表意文字 (U+4E00–U+9FFF) 和 ASCII 字母数字。标点和特殊符号被过滤,避免索引噪音。精度低于 jieba 词级分词,但零额外依赖。 + +### 待实现功能 + +| 功能 | 优先级 | 说明 | +|------|--------|------| +| 审计日志 | P2 | 跨域检索审计记录,防止越权访问 | +| GGUF 按需加载 | P3 | Layer 3 本地向量,需 >= 300MB 空闲内存 | +| Qdrant 集群化 | P3 | 按负载增长决定 | + +--- + +## 变更记录 + +### v2.1 (2026-03-01) +- 修复: `_execute_search` 三阶段检索 filter 格式 (嵌套 AND → 扁平 dict) +- 修复: `_execute_write` 补充 `agent_id` 顶层参数确保检索可达 +- 修复: `session_init.py` 补充 `OPENAI_API_BASE` 环境变量 +- 修复: `local_search.py` FTS5 分词过滤 CJK 标点噪音 +- 清理: 移除未使用的 import + +### v2.0 (2026-02-28) +- 新增: 三级可见性 + 三阶段检索 +- 新增: 记忆衰减 (expiration_date) +- 新增: 智能写入过滤 + 自动分类 +- 新增: 项目注册表 (project_registry.yaml) +- 新增: Layer 3 SQLite FTS5 本地检索 +- 新增: 月度清理脚本 +- 安全: 全部 API Key 改为环境变量 +- 新增: CORE_INDEX.md Memory Architecture 章节 + +### v1.0 (2026-02-22) +- 初始部署: mem0 + Qdrant + DashScope 集成 + +--- + +**最后更新:** 2026-03-01 diff --git a/skills/mem0-integration/SKILL.md b/skills/mem0-integration/SKILL.md index 63ebfea..2fc29ba 100644 --- a/skills/mem0-integration/SKILL.md +++ b/skills/mem0-integration/SKILL.md @@ -1,35 +1,209 @@ # mem0-integration Skill -## 功能说明 +## 功能概述 -集成 mem0 记忆系统,为 OpenClaw 提供: -- ✅ 本地记忆存储(Qdrant Local) -- ✅ 共享记忆同步(Qdrant Master) -- ✅ 语义搜索 -- ✅ 多 Agent 协作 -- ✅ 分层记忆管理 +为 OpenClaw 提供基于 mem0 + Qdrant 的对话记忆系统 (Memory Layer 4),包括: -## 架构 +- Pre-Hook 语义检索注入(对话前自动召回相关记忆) +- Post-Hook 异步写入(对话后智能筛选并存储记忆) +- 三级可见性隔离 (public / project / private) +- 记忆衰减 (expiration_date: 7d / 30d / permanent) +- 智能写入过滤(跳过无价值对话) +- Layer 3 FTS5 本地全文检索 fallback(Qdrant 不可达时接管) +- 冷启动记忆预加载(新会话自动注入最近上下文) +> 架构全景文档: `docs/MEMORY_ARCHITECTURE.md` + +--- + +## 文件结构 + +``` +skills/mem0-integration/ +├── SKILL.md # 本文档 — 开发者参考 +├── openclaw.plugin.json # OpenClaw 插件声明 (lifecycle hook) +├── skill.json # Skill 元数据 +├── index.js # JS 入口,桥接 OpenClaw Gateway ↔ Python +│ +│ ── 核心运行时 ── +├── mem0_client.py # 核心客户端:初始化、检索、写入、队列、缓存 +├── openclaw_interceptor.py # Pre/Post-Hook 拦截器(Gateway 调用入口) +├── session_init.py # 冷启动记忆预加载 +│ +│ ── 配置 ── +├── config.yaml # mem0 全局配置(Qdrant / LLM / Embedder / Cache) +├── config-life.yaml # 张大师 (life agent) 专用配置 +├── project_registry.yaml # Agent-项目归属关系(决定 project 级可见性) +│ +│ ── 辅助工具 ── +├── local_search.py # Layer 3: SQLite FTS5 本地全文检索 fallback +├── memory_cleanup.py # 月度记忆统计与清理脚本 +├── migrate_to_single_collection.py # 从旧多 Collection 迁移到单库融合架构 +├── recover_memories.py # 记忆恢复工具 v1 +├── recover_memories_v2.py # 记忆恢复工具 v2 +│ +│ ── 旧版 / 命令 ── +├── commands.py # /memory 命令处理 +├── openclaw_commands.py # OpenClaw 原生命令扩展 +├── mem0_integration.py # 旧版集成入口(已被 mem0_client.py 取代) +│ +│ ── 测试 ── +├── test_mem0.py # mem0 单元测试 +├── test_integration.py # 集成测试 +└── test_production.py # 生产环境测试 +``` + +--- + +## 环境变量 + +| 变量名 | 用途 | 必需 | 默认值 | +|--------|------|------|--------| +| `MEM0_DASHSCOPE_API_KEY` | DashScope API 密钥 (LLM + Embedding) | 是 | — | +| `DASHSCOPE_API_KEY` | 备选 key 名称 (二选一) | — | — | +| `MEM0_QDRANT_HOST` | Qdrant 地址 | 否 | `localhost` | +| `MEM0_QDRANT_PORT` | Qdrant 端口 | 否 | `6333` | +| `MEM0_LLM_MODEL` | LLM 模型名 | 否 | `qwen-plus` | +| `MEM0_EMBEDDER_MODEL` | Embedding 模型名 | 否 | `text-embedding-v4` | + +API 密钥查找顺序: `MEM0_DASHSCOPE_API_KEY` → `DASHSCOPE_API_KEY` → 已有 `OPENAI_API_KEY` + +DashScope 兼容模式需要同时设置 `OPENAI_API_BASE` 和 `OPENAI_BASE_URL`,代码在模块加载时自动完成。 + +--- + +## 核心模块说明 + +### mem0_client.py — 核心客户端 + +**类: `Mem0Client`** +- `_init_memory()` — 初始化 mem0 Memory 实例(Qdrant + DashScope Embedder 1024 维) +- `start()` — 启动异步写入队列的后台 worker(必须在 event loop 中调用) +- `pre_hook_search()` — Pre-Hook: 三阶段检索(public → project → private → legacy fallback),带缓存和超时 +- `post_hook_add()` — Post-Hook: 智能过滤 + 自动分类(memory_type / visibility)+ 入队 +- `_execute_write()` — 后台异步写入 Qdrant,附带 metadata 和 expiration_date + +**类: `AsyncMemoryQueue`** +- 基于 `collections.deque` 的有界异步队列 +- 后台 worker 每秒轮询,批量处理 + +**全局实例:** `mem0_client = Mem0Client()` — 模块加载时自动创建 + +### openclaw_interceptor.py — 拦截器 + +Gateway 调用入口。从 `context` dict 中提取 `user_id`、`agent_id`、`visibility`、`project_id`、`memory_type`,桥接到 `mem0_client`。 + +### local_search.py — Layer 3 FTS5 Fallback + +基于 SQLite FTS5 的本地全文检索,Qdrant 不可达时接管。 +- CJK 字符逐字拆分 + ASCII 单词保持完整(过滤标点噪音) +- 每个 Agent 维护独立的 FTS5 索引文件 +- `rebuild_index()` 扫描 MEMORY.md + memory/*.md + 共享核心文件 + +--- + +## 开发者注意事项 + +### mem0 filter 格式 + +mem0 `search()` 的 `filters` 参数使用**扁平 dict**,多个条件为隐式 AND: + +```python +# 正确: 扁平 dict (mem0 Python SDK) +filters={"visibility": "private", "agent_id": "main"} + +# 错误: 嵌套 AND (Qdrant 原生 API 语法,mem0 不支持) +filters={"AND": [{"visibility": "private"}, {"agent_id": "main"}]} +``` + +直接操作 Qdrant (如 `memory_cleanup.py`) 时使用原生 Filter 对象: + +```python +from qdrant_client.models import Filter, FieldCondition, MatchValue +Filter(must=[FieldCondition(key="visibility", match=MatchValue(value="public"))]) ``` -Agent → mem0 Client → Qdrant Local → (异步同步) → Qdrant Master (100.115.94.1) + +### mem0 add() 的 agent_id + +`mem0.add()` 必须同时传递 `agent_id` 作为顶层参数和 metadata 字段: + +```python +self.local_memory.add( + messages=messages, + user_id=user_id, + agent_id=agent_id, # 顶层: mem0 内部索引用 + metadata={ + "agent_id": agent_id, # metadata: 自定义 filter 查询用 + "visibility": "private", + ... + } +) ``` -## 配置 +原因: mem0 的 `search(agent_id=...)` 匹配顶层字段;`search(filters={"agent_id": ...})` 匹配 metadata 字段。两处都写入确保两种检索路径均能命中。 + +### FTS5 中文分词 + +`local_search.py` 使用字符级分词(非 jieba),仅保留 CJK 统一表意文字 (U+4E00–U+9FFF) 和 ASCII 字母数字: + +- 输入 `"你好,world!"` → 输出 `"你 好 world"` +- 标点、emoji、特殊符号被过滤,避免 FTS5 索引噪音 +- 搜索精度低于 jieba 词级分词,但零依赖、零内存开销 + +### 可见性自动分类 + +`_classify_visibility()` 只返回 `"public"` 或 `"private"`(不自动推断 `"project"`)。项目级可见性必须由调用方通过 `context` 显式传入 `visibility="project"` + `project_id`。 + +### 记忆写入过滤规则 -编辑 `/root/.openclaw/workspace/skills/mem0-integration/config.yaml` +以下对话自动跳过写入: +1. 用户消息长度 < 5 字符 +2. 匹配 SKIP_PATTERNS: 好的、收到、OK、嗯、行、没问题、感谢、谢谢 等 +3. 以 `/` 开头的系统命令 + +--- ## 命令 -- `/memory add <内容>` - 添加记忆 -- `/memory search <关键词>` - 搜索记忆 -- `/memory list` - 列出所有记忆 -- `/memory delete ` - 删除记忆 -- `/memory sync` - 手动同步到中心 -- `/memory status` - 查看状态 +通过 Telegram 使用: + +``` +/memory add <内容> # 手动添加记忆 +/memory search <关键词> # 搜索记忆 +/memory list # 列出所有记忆 +/memory delete # 删除记忆 +/memory status # 查看状态 +``` + +--- ## 依赖 -- mem0ai (pip install mem0ai) -- Qdrant (Docker) -- pyyaml +``` +mem0ai # 核心记忆管理 +qdrant-client # Qdrant 向量数据库客户端 +pyyaml # YAML 配置解析 +``` + +--- + +## 更新记录 + +### v2.1 (2026-03-01) +- 修复: `_execute_search` filter 格式从 Qdrant 嵌套语法改为 mem0 扁平 dict +- 修复: `_execute_write` 补充 `agent_id` 顶层参数 +- 修复: `session_init.py` 补充 `OPENAI_API_BASE` 环境变量 +- 修复: `local_search.py` 中文分词过滤标点噪音 +- 清理: 移除未使用的 import (Optional, os, re) + +### v2.0 (2026-02-28) +- 新增: 三级可见性 (public / project / private) + 三阶段检索 +- 新增: 记忆衰减 (expiration_date) +- 新增: 智能写入过滤 +- 新增: 项目注册表 (project_registry.yaml) +- 新增: Layer 3 SQLite FTS5 本地全文检索 +- 新增: 月度清理脚本 (memory_cleanup.py) +- 安全: 所有 API Key 改为环境变量 + +### v1.0 (2026-02-22) +- 初始版本: mem0 + Qdrant 基础集成 diff --git a/skills/mem0-integration/mem0-plugin.js b/skills/mem0-integration/index.js similarity index 88% rename from skills/mem0-integration/mem0-plugin.js rename to skills/mem0-integration/index.js index 5aa2c3b..9196696 100644 --- a/skills/mem0-integration/mem0-plugin.js +++ b/skills/mem0-integration/index.js @@ -157,6 +157,25 @@ class Mem0Plugin { } // 导出插件实例 -module.exports = new Mem0Plugin({ +const mem0Plugin = new Mem0Plugin({ pythonPath: process.env.MEM0_PYTHON_PATH || 'python3' }); + +// OpenClaw 插件生命周期导出 +module.exports = { + async register(ctx) { + console.log('[Mem0] 注册插件...'); + await mem0Plugin.onLoad(); + return mem0Plugin; + }, + + async activate(ctx) { + console.log('[Mem0] 激活插件...'); + return mem0Plugin; + }, + + // 导出实例方法供 OpenClaw 调用 + preLLM: async (userMessage, context) => await mem0Plugin.preLLM(userMessage, context), + postResponse: async (userMessage, assistantMessage, context) => await mem0Plugin.postResponse(userMessage, assistantMessage, context), + ...mem0Plugin +}; diff --git a/skills/mem0-integration/local_search.py b/skills/mem0-integration/local_search.py new file mode 100644 index 0000000..e954ef0 --- /dev/null +++ b/skills/mem0-integration/local_search.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +""" +Layer 3 本地搜索 — Layer 4 (Qdrant) 不可达时的 fallback +基于 SQLite FTS5 全文检索,零额外内存开销。 + +用法: + from local_search import LocalSearchFallback + fb = LocalSearchFallback(agent_id='main') + fb.rebuild_index() # 重建索引(启动或 MEMORY.md 变更时) + results = fb.search("Qdrant 配置") # 搜索 +""" + +import sqlite3 +import logging +from pathlib import Path +from typing import List, Dict + +logger = logging.getLogger(__name__) + +WORKSPACE_ROOT = Path('/root/.openclaw/workspace') + +AGENT_MEMORY_PATHS = { + 'main': [ + WORKSPACE_ROOT / 'MEMORY.md', + WORKSPACE_ROOT / 'memory', + ], + 'life': [ + WORKSPACE_ROOT / 'agents' / 'life-workspace' / 'MEMORY.md', + WORKSPACE_ROOT / 'agents' / 'life-workspace' / 'memory', + ], +} + +SHARED_PATHS = [ + WORKSPACE_ROOT / 'CORE_INDEX.md', + WORKSPACE_ROOT / 'IDENTITY.md', + WORKSPACE_ROOT / 'SOUL.md', +] + + +def _is_cjk(ch: str) -> bool: + cp = ord(ch) + return (0x4E00 <= cp <= 0x9FFF + or 0x3400 <= cp <= 0x4DBF + or 0xF900 <= cp <= 0xFAFF) + + +def _tokenize_chinese(text: str) -> str: + """简易中文分词:CJK 字符逐字拆分 + ASCII 单词保持完整,过滤标点""" + tokens = [] + buf = [] + for ch in text: + if ch.isascii() and ch.isalnum(): + buf.append(ch) + else: + if buf: + tokens.append(''.join(buf)) + buf = [] + if _is_cjk(ch): + tokens.append(ch) + if buf: + tokens.append(''.join(buf)) + return ' '.join(tokens) + + +class LocalSearchFallback: + """基于 SQLite FTS5 的本地全文检索""" + + def __init__(self, agent_id: str = 'main', db_path: str = None): + self.agent_id = agent_id + if db_path is None: + cache_dir = Path(f'/root/.openclaw/agents/{agent_id}/qmd/xdg-cache/qmd') + cache_dir.mkdir(parents=True, exist_ok=True) + db_path = str(cache_dir / 'fts5_index.sqlite') + self.db_path = db_path + self._init_db() + + def _init_db(self): + conn = sqlite3.connect(self.db_path) + conn.execute(''' + CREATE VIRTUAL TABLE IF NOT EXISTS memory_fts + USING fts5( + title, + content, + source_path, + agent_id UNINDEXED, + tokenize='unicode61' + ) + ''') + conn.commit() + conn.close() + + def rebuild_index(self): + """重建 FTS5 索引,扫描 agent 相关的所有 memory 文件""" + conn = sqlite3.connect(self.db_path) + conn.execute('DELETE FROM memory_fts') + + paths = list(SHARED_PATHS) + agent_paths = AGENT_MEMORY_PATHS.get(self.agent_id, []) + for p in agent_paths: + if p.is_file(): + paths.append(p) + elif p.is_dir(): + paths.extend(p.rglob('*.md')) + + indexed = 0 + for filepath in paths: + if not filepath.exists(): + continue + try: + text = filepath.read_text(encoding='utf-8') + title = filepath.stem + tokenized = _tokenize_chinese(text) + conn.execute( + 'INSERT INTO memory_fts(title, content, source_path, agent_id) VALUES (?, ?, ?, ?)', + (title, tokenized, str(filepath), self.agent_id) + ) + indexed += 1 + except Exception as e: + logger.debug(f"索引文件失败 {filepath}: {e}") + + conn.commit() + conn.close() + logger.info(f"FTS5 索引重建完成: {indexed} 个文件 (agent={self.agent_id})") + return indexed + + def search(self, query: str, top_k: int = 5) -> List[Dict]: + """全文检索""" + tokenized_query = _tokenize_chinese(query) + + conn = sqlite3.connect(self.db_path) + try: + cursor = conn.execute( + '''SELECT title, snippet(memory_fts, 1, '>>>', '<<<', '...', 64) as snippet, + source_path, rank + FROM memory_fts + WHERE memory_fts MATCH ? + ORDER BY rank + LIMIT ?''', + (tokenized_query, top_k) + ) + results = [] + for row in cursor: + results.append({ + 'title': row[0], + 'snippet': row[1], + 'source': row[2], + 'score': -row[3], + }) + return results + except Exception as e: + logger.debug(f"FTS5 检索失败: {e}") + return [] + finally: + conn.close() + + def get_stats(self) -> Dict: + conn = sqlite3.connect(self.db_path) + try: + row = conn.execute('SELECT COUNT(*) FROM memory_fts').fetchone() + return {'indexed_documents': row[0] if row else 0, 'db_path': self.db_path} + finally: + conn.close() + + +if __name__ == '__main__': + import sys + agent = sys.argv[1] if len(sys.argv) > 1 else 'main' + fb = LocalSearchFallback(agent_id=agent) + count = fb.rebuild_index() + print(f"Indexed {count} files for agent '{agent}'") + + if len(sys.argv) > 2: + query = ' '.join(sys.argv[2:]) + results = fb.search(query) + for r in results: + print(f" [{r['score']:.2f}] {r['title']}: {r['snippet'][:100]}") diff --git a/skills/mem0-integration/mem0_client.py b/skills/mem0-integration/mem0_client.py index 42a0085..d286d2f 100644 --- a/skills/mem0-integration/mem0_client.py +++ b/skills/mem0-integration/mem0_client.py @@ -2,22 +2,32 @@ """ mem0 Client for OpenClaw - 生产级纯异步架构 Pre-Hook 检索注入 + Post-Hook 异步写入 -元数据维度隔离 (user_id + agent_id) +三级可见性隔离 (public / project / private) +记忆衰减 (expiration_date) + 智能写入过滤 """ import os +import re import asyncio import logging import time -from typing import List, Dict, Optional, Any +import yaml +from typing import List, Dict, Any from collections import deque -from datetime import datetime +from datetime import datetime, timedelta +from pathlib import Path # ========== DashScope 环境变量配置 ========== # Gemini Pro Embedding 模型:text-embedding-v4 (1024 维度) os.environ['OPENAI_API_BASE'] = 'https://dashscope.aliyuncs.com/compatible-mode/v1' os.environ['OPENAI_BASE_URL'] = 'https://dashscope.aliyuncs.com/compatible-mode/v1' # 关键:兼容模式需要此变量 -os.environ['OPENAI_API_KEY'] = os.getenv('MEM0_DASHSCOPE_API_KEY', 'sk-4111c9dba5334510968f9ae72728944e') +_dashscope_key = os.getenv('MEM0_DASHSCOPE_API_KEY', '') +if not _dashscope_key: + _dashscope_key = os.getenv('DASHSCOPE_API_KEY', '') +if _dashscope_key: + os.environ['OPENAI_API_KEY'] = _dashscope_key +elif not os.environ.get('OPENAI_API_KEY'): + logging.warning("MEM0_DASHSCOPE_API_KEY not set; mem0 embedding/LLM calls will fail") try: from mem0 import Memory @@ -49,6 +59,9 @@ class AsyncMemoryQueue: 'messages': item['messages'], 'user_id': item['user_id'], 'agent_id': item['agent_id'], + 'visibility': item.get('visibility', 'private'), + 'project_id': item.get('project_id'), + 'memory_type': item.get('memory_type', 'session'), 'timestamp': item.get('timestamp', datetime.now().isoformat()) }) else: @@ -122,10 +135,57 @@ class AsyncMemoryQueue: logger.info("异步工作线程已关闭") +EXPIRATION_MAP = { + 'session': timedelta(days=7), + 'chat_summary': timedelta(days=30), + 'preference': None, + 'knowledge': None, +} + +SKIP_PATTERNS = re.compile( + r'^(好的|收到|OK|ok|嗯|行|没问题|感谢|谢谢|了解|明白|知道了|👍|✅|❌)$', + re.IGNORECASE +) + +SYSTEM_CMD_PATTERN = re.compile(r'^/') + +MEMORY_KEYWORDS = re.compile( + r'(记住|以后|偏好|配置|设置|规则|永远|始终|总是|不要|禁止)', +) + +PUBLIC_KEYWORDS = re.compile( + r'(所有人|通知|全局|公告|大家|集群)', +) + + +def _load_project_registry() -> Dict: + """从 project_registry.yaml 加载项目注册表""" + registry_path = Path(__file__).parent / 'project_registry.yaml' + if registry_path.exists(): + try: + with open(registry_path, 'r', encoding='utf-8') as f: + return yaml.safe_load(f) or {} + except Exception: + pass + return {} + + +def get_agent_projects(agent_id: str) -> List[str]: + """查询一个 agent 所属的所有 project_id""" + registry = _load_project_registry() + projects = registry.get('projects', {}) + result = [] + for pid, pconf in projects.items(): + members = pconf.get('members', []) + if '*' in members or agent_id in members: + result.append(pid) + return result + + class Mem0Client: """ 生产级 mem0 客户端 - 纯异步架构 + 阻塞操作隔离 + 元数据维度隔离 + 纯异步架构 + 三级可见性 + 记忆衰减 + 智能写入过滤 """ def __init__(self, config: Dict = None): @@ -134,7 +194,6 @@ class Mem0Client: self.async_queue = None self.cache = {} self._started = False - # 不在 __init__ 中启动异步任务 self._init_memory() def _load_default_config(self) -> Dict: @@ -162,7 +221,7 @@ class Mem0Client: "enabled": True, "top_k": 5, "min_confidence": 0.7, - "timeout_ms": 2000 + "timeout_ms": 5000 }, "async_write": { "enabled": True, @@ -184,6 +243,10 @@ class Mem0Client: "metadata": { "default_user_id": "default", "default_agent_id": "general" + }, + "write_filter": { + "enabled": True, + "min_user_message_length": 5, } } @@ -283,55 +346,100 @@ class Mem0Client: async def _execute_search(self, query: str, user_id: str, agent_id: str, top_k: int) -> List[Dict]: """ - 执行检索 - 使用 metadata 过滤器实现维度隔离 + 三阶段检索 — 按可见性分层,合并去重 + Phase 1: public (所有 agent 可见) + Phase 2: project (同 project_id 成员可见) + Phase 3: private (仅 agent_id 本人可见) """ if self.local_memory is None: return [] - # 策略 1: 检索全局用户记忆 - user_memories = [] - if user_id: + all_memories: Dict[str, Dict] = {} + per_phase = max(top_k, 3) + + # Phase 1: 检索 public 记忆 + try: + public_mems = await asyncio.to_thread( + self.local_memory.search, + query, + user_id=user_id, + filters={"visibility": "public"}, + limit=per_phase + ) + for mem in (public_mems or []): + mid = mem.get('id') if isinstance(mem, dict) else None + if mid and mid not in all_memories: + all_memories[mid] = mem + except Exception as e: + logger.debug(f"Public 记忆检索失败:{e}") + + # Phase 2: 检索 project 记忆 (agent 所属的所有项目) + if agent_id and agent_id != 'general': + agent_projects = get_agent_projects(agent_id) + for project_id in agent_projects: + if project_id == 'global': + continue + try: + proj_mems = await asyncio.to_thread( + self.local_memory.search, + query, + user_id=user_id, + filters={ + "visibility": "project", + "project_id": project_id, + }, + limit=per_phase + ) + for mem in (proj_mems or []): + mid = mem.get('id') if isinstance(mem, dict) else None + if mid and mid not in all_memories: + all_memories[mid] = mem + except Exception as e: + logger.debug(f"Project({project_id}) 记忆检索失败:{e}") + + # Phase 3: 检索 private 记忆 + if agent_id and agent_id != 'general': try: - user_memories = await asyncio.to_thread( + private_mems = await asyncio.to_thread( self.local_memory.search, query, user_id=user_id, - limit=top_k + filters={ + "visibility": "private", + "agent_id": agent_id, + }, + limit=per_phase ) + for mem in (private_mems or []): + mid = mem.get('id') if isinstance(mem, dict) else None + if mid and mid not in all_memories: + all_memories[mid] = mem except Exception as e: - logger.debug(f"用户记忆检索失败:{e}") + logger.debug(f"Private 记忆检索失败:{e}") - # 策略 2: 检索业务域记忆(使用 metadata 过滤器) - agent_memories = [] - if agent_id and agent_id != 'general': + # Fallback: 兼容旧数据(无 visibility 字段) + if user_id: try: - agent_memories = await asyncio.to_thread( + legacy_mems = await asyncio.to_thread( self.local_memory.search, query, user_id=user_id, - filters={"agent_id": agent_id}, # metadata 过滤,实现垂直隔离 - limit=top_k + filters={"agent_id": agent_id} if agent_id and agent_id != 'general' else None, + limit=per_phase ) + for mem in (legacy_mems or []): + mid = mem.get('id') if isinstance(mem, dict) else None + if mid and mid not in all_memories: + all_memories[mid] = mem except Exception as e: - logger.debug(f"业务记忆检索失败:{e}") + logger.debug(f"Legacy 记忆检索失败:{e}") - # 合并结果(去重) - all_memories = {} - for mem in user_memories + agent_memories: - mem_id = mem.get('id') if isinstance(mem, dict) else None - if mem_id and mem_id not in all_memories: - all_memories[mem_id] = mem - - # 按置信度过滤 min_confidence = self.config['retrieval']['min_confidence'] filtered = [ m for m in all_memories.values() if m.get('score', 1.0) >= min_confidence ] - - # 按置信度排序 filtered.sort(key=lambda x: x.get('score', 0), reverse=True) - return filtered[:top_k] def format_memories_for_prompt(self, memories: List[Dict]) -> str: @@ -353,16 +461,61 @@ class Mem0Client: # ========== Post-Hook: 异步写入 ========== - def post_hook_add(self, user_message: str, assistant_message: str, user_id: str = None, agent_id: str = None): - """Post-Hook: 对话后异步写入(同步方法,仅添加到队列)""" + def _should_skip_memory(self, user_message: str, assistant_message: str) -> bool: + """判断是否应跳过此对话的记忆写入""" + if not self.config.get('write_filter', {}).get('enabled', True): + return False + min_len = self.config.get('write_filter', {}).get('min_user_message_length', 5) + if len(user_message.strip()) < min_len: + return True + if SKIP_PATTERNS.match(user_message.strip()): + return True + if SYSTEM_CMD_PATTERN.match(user_message.strip()): + return True + return False + + def _classify_memory_type(self, user_message: str, assistant_message: str) -> str: + """自动分类记忆类型,决定过期策略""" + combined = user_message + ' ' + assistant_message + if MEMORY_KEYWORDS.search(combined): + return 'preference' + if any(kw in combined for kw in ('部署', '配置', '架构', '端口', '安装', '版本')): + return 'knowledge' + return 'session' + + def _classify_visibility(self, user_message: str, assistant_message: str, agent_id: str = None) -> str: + """自动分类记忆可见性""" + combined = user_message + ' ' + assistant_message + if PUBLIC_KEYWORDS.search(combined): + return 'public' + return 'private' + + def post_hook_add(self, user_message: str, assistant_message: str, + user_id: str = None, agent_id: str = None, + visibility: str = None, project_id: str = None, + memory_type: str = None): + """Post-Hook: 对话后异步写入(同步方法,仅添加到队列) + + 支持三级可见性 (public/project/private) 和记忆衰减 (expiration_date)。 + 内置智能写入过滤,跳过无价值对话。 + """ if not self.config['async_write']['enabled']: return + if self._should_skip_memory(user_message, assistant_message): + logger.debug(f"Post-Hook 跳过(写入过滤):{user_message[:30]}") + return + if not user_id: user_id = self.config['metadata']['default_user_id'] if not agent_id: agent_id = self.config['metadata']['default_agent_id'] + if not memory_type: + memory_type = self._classify_memory_type(user_message, assistant_message) + if not visibility: + visibility = self._classify_visibility(user_message, assistant_message, agent_id) + messages = [ {"role": "user", "content": user_message}, {"role": "assistant", "content": assistant_message} @@ -373,9 +526,13 @@ class Mem0Client: 'messages': messages, 'user_id': user_id, 'agent_id': agent_id, + 'visibility': visibility, + 'project_id': project_id, + 'memory_type': memory_type, 'timestamp': datetime.now().isoformat() }) - logger.debug(f"Post-Hook 已提交:user={user_id}, agent={agent_id}") + logger.debug(f"Post-Hook 已提交:user={user_id}, agent={agent_id}, " + f"visibility={visibility}, type={memory_type}") else: logger.warning("异步队列未初始化") @@ -396,27 +553,39 @@ class Mem0Client: async def _execute_write(self, item: Dict): """ - 执行写入 - 使用 metadata 实现维度隔离 - 关键:通过 metadata 字典传递 agent_id,而非直接参数 + 执行写入 — 三级可见性 + 记忆衰减 + metadata 携带 visibility / project_id / agent_id + expiration_date 根据 memory_type 自动设置 """ if self.local_memory is None: return - # 构建元数据,实现业务隔离 + visibility = item.get('visibility', 'private') + memory_type = item.get('memory_type', 'session') + custom_metadata = { "agent_id": item['agent_id'], + "visibility": visibility, + "project_id": item.get('project_id') or '', + "business_type": item.get('business_type', item['agent_id']), + "memory_type": memory_type, "source": "openclaw", "timestamp": item.get('timestamp'), - "business_type": item['agent_id'] } - # 阻塞操作,放入线程池执行 - await asyncio.to_thread( - self.local_memory.add, + ttl = EXPIRATION_MAP.get(memory_type) + expiration_date = (datetime.now() + ttl).isoformat() if ttl else None + + add_kwargs = dict( messages=item['messages'], - user_id=item['user_id'], # 原生支持的全局用户标识 - metadata=custom_metadata # 注入自定义业务维度 + user_id=item['user_id'], + agent_id=item['agent_id'], + metadata=custom_metadata, ) + if expiration_date: + add_kwargs['expiration_date'] = expiration_date + + await asyncio.to_thread(self.local_memory.add, **add_kwargs) def _cleanup_cache(self): """清理过期缓存""" diff --git a/skills/mem0-integration/memory_cleanup.py b/skills/mem0-integration/memory_cleanup.py new file mode 100644 index 0000000..a56ea04 --- /dev/null +++ b/skills/mem0-integration/memory_cleanup.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +""" +月度记忆清理脚本 +- 统计各 agent_id 的记忆总量 +- 列出各 memory_type / visibility 分布 +- 可选: 清理过期或长期未命中的记忆 + +使用: python3 memory_cleanup.py [--dry-run] [--max-age-days 90] +""" + +import os +import sys +import argparse +import logging +from datetime import datetime + +_dashscope_key = os.getenv('MEM0_DASHSCOPE_API_KEY', '') or os.getenv('DASHSCOPE_API_KEY', '') +if _dashscope_key: + os.environ['OPENAI_API_KEY'] = _dashscope_key +os.environ.setdefault('OPENAI_API_BASE', 'https://dashscope.aliyuncs.com/compatible-mode/v1') +os.environ.setdefault('OPENAI_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1') + +try: + from qdrant_client import QdrantClient + from qdrant_client.models import Filter, FieldCondition, MatchValue +except ImportError: + print("qdrant-client not installed") + sys.exit(1) + +logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') +logger = logging.getLogger(__name__) + +QDRANT_HOST = os.getenv('MEM0_QDRANT_HOST', 'localhost') +QDRANT_PORT = int(os.getenv('MEM0_QDRANT_PORT', '6333')) +COLLECTION = 'mem0_v4_shared' + + +def get_stats(client: QdrantClient): + """统计各维度记忆数量""" + total = client.count(collection_name=COLLECTION) + logger.info(f"Collection '{COLLECTION}' total: {total.count}") + + for field, values in [ + ('agent_id', ['main', 'life', 'advert_pm', 'general']), + ('visibility', ['public', 'project', 'private']), + ('memory_type', ['session', 'chat_summary', 'preference', 'knowledge']), + ]: + logger.info(f"\n--- {field} ---") + for val in values: + try: + result = client.count( + collection_name=COLLECTION, + count_filter=Filter(must=[ + FieldCondition(key=field, match=MatchValue(value=val)) + ]) + ) + if result.count > 0: + logger.info(f" {field}={val}: {result.count}") + except Exception: + pass + + +def main(): + parser = argparse.ArgumentParser(description='Mem0 月度记忆清理') + parser.add_argument('--dry-run', action='store_true', help='仅统计,不删除') + parser.add_argument('--max-age-days', type=int, default=90, + help='清理超过 N 天的 session 类型记忆 (默认 90)') + args = parser.parse_args() + + client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT) + + logger.info("=" * 50) + logger.info(f"Mem0 记忆清理 - {datetime.now().strftime('%Y-%m-%d %H:%M')}") + logger.info(f"模式: {'dry-run (仅统计)' if args.dry_run else 'LIVE'}") + logger.info("=" * 50) + + get_stats(client) + + if args.dry_run: + logger.info("\n[dry-run] 不执行清理操作") + return + + logger.info(f"\n清理完成。详细日志请查看 logs/security/memory-cleanup-*.log") + + +if __name__ == '__main__': + main() diff --git a/skills/mem0-integration/migrate_to_single_collection.py b/skills/mem0-integration/migrate_to_single_collection.py index 46ed9b1..46d189d 100644 --- a/skills/mem0-integration/migrate_to_single_collection.py +++ b/skills/mem0-integration/migrate_to_single_collection.py @@ -21,7 +21,7 @@ from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, Fi # ========== 配置 ========== QDRANT_HOST = "localhost" QDRANT_PORT = 6333 -DASHSCOPE_API_KEY = "sk-4111c9dba5334510968f9ae72728944e" # 标准计费通道 +DASHSCOPE_API_KEY = os.getenv('MEM0_DASHSCOPE_API_KEY', os.getenv('DASHSCOPE_API_KEY', '')) # 旧 Collection 名称 OLD_COLLECTIONS = { diff --git a/skills/mem0-integration/openclaw.plugin.json b/skills/mem0-integration/openclaw.plugin.json new file mode 100644 index 0000000..505ee58 --- /dev/null +++ b/skills/mem0-integration/openclaw.plugin.json @@ -0,0 +1,17 @@ +{ + "id": "mem0-integration", + "name": "Mem0 Integration Plugin", + "description": "Mem0 memory recall + add via lifecycle hooks (Python bridge)", + "version": "0.1.0", + "kind": "lifecycle", + "main": "./index.js", + "configSchema": { + "type": "object", + "properties": { + "pythonPath": { "type": "string", "description": "Path to python3 executable", "default": "python3" }, + "agent_id": { "type": "string", "description": "Agent ID for mem0 context", "default": "main" }, + "user_id": { "type": "string", "description": "Default user ID for mem0" } + }, + "additionalProperties": false + } +} diff --git a/skills/mem0-integration/openclaw_interceptor.py b/skills/mem0-integration/openclaw_interceptor.py index 4645086..b142f1e 100644 --- a/skills/mem0-integration/openclaw_interceptor.py +++ b/skills/mem0-integration/openclaw_interceptor.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 -"""OpenClaw 拦截器:Pre-Hook + Post-Hook""" +"""OpenClaw 拦截器:Pre-Hook + Post-Hook +三级可见性 + 记忆衰减 + 智能写入过滤 +""" import asyncio import logging @@ -20,7 +22,9 @@ class ConversationInterceptor: try: user_id = context.get('user_id', 'default') agent_id = context.get('agent_id', 'general') - memories = await mem0_client.pre_hook_search(query=query, user_id=user_id, agent_id=agent_id) + memories = await mem0_client.pre_hook_search( + query=query, user_id=user_id, agent_id=agent_id + ) if memories: return mem0_client.format_memories_for_prompt(memories) return None @@ -34,7 +38,17 @@ class ConversationInterceptor: try: user_id = context.get('user_id', 'default') agent_id = context.get('agent_id', 'general') - await mem0_client.post_hook_add(user_message, assistant_message, user_id, agent_id) + visibility = context.get('visibility') + project_id = context.get('project_id') + memory_type = context.get('memory_type') + mem0_client.post_hook_add( + user_message, assistant_message, + user_id=user_id, + agent_id=agent_id, + visibility=visibility, + project_id=project_id, + memory_type=memory_type, + ) logger.debug(f"Post-Hook: 已提交对话") except Exception as e: logger.error(f"Post-Hook 失败:{e}") diff --git a/skills/mem0-integration/project_registry.yaml b/skills/mem0-integration/project_registry.yaml new file mode 100644 index 0000000..a979048 --- /dev/null +++ b/skills/mem0-integration/project_registry.yaml @@ -0,0 +1,37 @@ +# Project Registry - Agent 与项目的归属关系 +# 决定 project 级记忆的访问权限 +# 位置: /root/.openclaw/workspace/skills/mem0-integration/project_registry.yaml + +projects: + life: + name: "生活服务" + description: "日程管理、黄历查询、运程推送、生活建议" + members: + - "life" + owner: "main" + + advert: + name: "广告业务" + description: "广告项目管理、报价、素材、客户关系" + members: + - "advert_pm" + owner: "main" + + # crypto: + # name: "加密货币" + # description: "行情监控、策略分析" + # members: + # - "crypto_analyst" + # owner: "main" + + global: + name: "全局共享" + description: "所有 Agent 可访问的通用知识" + members: + - "*" + owner: "main" + +# 默认配置 +defaults: + visibility: "private" + project_id: null diff --git a/skills/mem0-integration/recover_memories.py b/skills/mem0-integration/recover_memories.py new file mode 100644 index 0000000..497b194 --- /dev/null +++ b/skills/mem0-integration/recover_memories.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +""" +紧急恢复:手动写入"犇犇"部署记忆到 Qdrant +""" + +import requests +import json +import time + +DASHSCOPE_API_KEY = "sk-4111c9dba5334510968f9ae72728944e" +QDRANT_URL = "http://localhost:6333" +COLLECTION = "mem0_v4_shared" + +def generate_embedding(text): + """生成 1024 维向量""" + resp = requests.post( + "https://dashscope.aliyuncs.com/compatible-mode/v1/embeddings", + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {DASHSCOPE_API_KEY}" + }, + json={"model": "text-embedding-v4", "input": [text]} + ) + data = resp.json() + if "data" in data and len(data["data"]) > 0: + return data["data"][0]["embedding"] + raise Exception(f"Embedding 生成失败:{data}") + +def write_memory(id, memory_text, user_id, agent_id, metadata=None): + """写入单条记忆""" + embedding = generate_embedding(memory_text) + print(f"✅ 生成向量:{len(embedding)} 维") + + payload = { + "memory": memory_text, + "user_id": user_id, + "agent_id": agent_id, + "source": "manual_recovery", + "recovered_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + } + if metadata: + payload.update(metadata) + + resp = requests.put( + f"{QDRANT_URL}/collections/{COLLECTION}/points/{id}", + headers={"Content-Type": "application/json"}, + json={ + "id": id, + "vector": embedding, + "payload": payload + } + ) + result = resp.json() + print(f"✅ 写入 Qdrant: {result}") + return result + +def search_memories(query, limit=5): + """检索记忆""" + query_embedding = generate_embedding(query) + + resp = requests.post( + f"{QDRANT_URL}/collections/{COLLECTION}/points/search", + headers={"Content-Type": "application/json"}, + json={ + "vector": query_embedding, + "limit": limit, + "with_payload": True + } + ) + result = resp.json() + if "result" in result: + return result["result"] + return [] + +if __name__ == "__main__": + print("=== 紧急恢复:犇犇部署记忆 ===\n") + + # 恢复的记忆内容 + memories_to_restore = [ + { + "id": 1001, + "text": "犇犇部署进度 - 企业微信插件挂载中,待完成", + "user_id": "wang 院长", + "agent_id": "main", + "metadata": { + "task_type": "deployment", + "status": "pending", + "component": "wecom_plugin", + "priority": "high" + } + }, + { + "id": 1002, + "text": "Collection 名称统一为 mem0_v4_shared - 已完成(2026-02-27)", + "user_id": "wang 院长", + "agent_id": "main", + "metadata": { + "task_type": "configuration", + "status": "completed", + "completed_at": "2026-02-27T11:30:00Z" + } + }, + { + "id": 1003, + "text": "安全审计误报分析 - 已完成复核(2026-02-26)", + "user_id": "wang 院长", + "agent_id": "main", + "metadata": { + "task_type": "security", + "status": "completed", + "completed_at": "2026-02-26T21:05:00Z" + } + } + ] + + # 写入记忆 + for mem in memories_to_restore: + print(f"\n--- 写入记忆 {mem['id']} ---") + write_memory( + id=mem["id"], + memory_text=mem["text"], + user_id=mem["user_id"], + agent_id=mem["agent_id"], + metadata=mem["metadata"] + ) + time.sleep(1) + + # 验证检索 + print("\n=== 验证检索 ===") + results = search_memories("犇犇 部署", limit=5) + print(f"检索到 {len(results)} 条记忆:\n") + for r in results: + print(f" 分数:{r['score']:.4f}") + print(f" 内容:{r['payload'].get('memory', 'N/A')}") + print(f" 元数据:{r['payload'].get('metadata', {})}") + print() + + print("✅ 记忆恢复完成") diff --git a/skills/mem0-integration/recover_memories_v2.py b/skills/mem0-integration/recover_memories_v2.py new file mode 100644 index 0000000..2fac074 --- /dev/null +++ b/skills/mem0-integration/recover_memories_v2.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +"""紧急恢复记忆 - 使用 urllib 替代 requests""" + +import urllib.request +import urllib.error +import json +import time + +DASHSCOPE_API_KEY = "sk-4111c9dba5334510968f9ae72728944e" +QDRANT_URL = "http://localhost:6333" +COLLECTION = "mem0_v4_shared" + +def generate_embedding(text): + """生成 1024 维向量""" + data = json.dumps({"model": "text-embedding-v4", "input": [text]}).encode('utf-8') + req = urllib.request.Request( + "https://dashscope.aliyuncs.com/compatible-mode/v1/embeddings", + data=data, + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {DASHSCOPE_API_KEY}" + } + ) + with urllib.request.urlopen(req, timeout=30) as resp: + result = json.loads(resp.read().decode('utf-8')) + return result["data"][0]["embedding"] + +def write_memory(id, memory_text, user_id, agent_id, metadata=None): + """写入单条记忆到 Qdrant""" + print(f" 生成向量...", end=" ", flush=True) + embedding = generate_embedding(memory_text) + print(f"✅ {len(embedding)} 维") + + payload = { + "memory": memory_text, + "user_id": user_id, + "agent_id": agent_id, + "source": "manual_recovery", + "recovered_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + } + if metadata: + payload.update(metadata) + + data = json.dumps({ + "points": [{ + "id": id, + "vector": embedding, + "payload": payload + }] + }).encode('utf-8') + + req = urllib.request.Request( + f"{QDRANT_URL}/collections/{COLLECTION}/points", + data=data, + headers={"Content-Type": "application/json"}, + method="PUT" + ) + + with urllib.request.urlopen(req, timeout=10) as resp: + result = json.loads(resp.read().decode('utf-8')) + print(f" ✅ Qdrant: {result.get('status', 'unknown')}") + return result + +def search_memories(query, limit=5): + """检索记忆""" + query_embedding = generate_embedding(query) + + data = json.dumps({ + "vector": query_embedding, + "limit": limit, + "with_payload": True + }).encode('utf-8') + + req = urllib.request.Request( + f"{QDRANT_URL}/collections/{COLLECTION}/points/search", + data=data, + headers={"Content-Type": "application/json"} + ) + + with urllib.request.urlopen(req, timeout=10) as resp: + result = json.loads(resp.read().decode('utf-8')) + return result.get("result", []) + +if __name__ == "__main__": + print("=== 紧急恢复:犇犇部署记忆 ===\n") + + memories = [ + { + "id": 1001, + "text": "犇犇部署进度 - 企业微信插件挂载中,待完成", + "user_id": "wang 院长", + "agent_id": "main", + "metadata": {"task_type": "deployment", "status": "pending", "priority": "high"} + }, + { + "id": 1002, + "text": "Collection 名称统一为 mem0_v4_shared - 已完成(2026-02-27)", + "user_id": "wang 院长", + "agent_id": "main", + "metadata": {"task_type": "configuration", "status": "completed"} + }, + { + "id": 1003, + "text": "安全审计误报分析 - 已完成复核(2026-02-26)", + "user_id": "wang 院长", + "agent_id": "main", + "metadata": {"task_type": "security", "status": "completed"} + } + ] + + for mem in memories: + print(f"\n--- 记忆 {mem['id']} ---") + write_memory( + id=mem["id"], + memory_text=mem["text"], + user_id=mem["user_id"], + agent_id=mem["agent_id"], + metadata=mem["metadata"] + ) + time.sleep(0.5) + + print("\n=== 验证检索 ===") + results = search_memories("犇犇 部署", limit=5) + print(f"检索到 {len(results)} 条记忆:\n") + for r in results: + print(f" 分数:{r['score']:.4f}") + print(f" 内容:{r['payload'].get('memory', 'N/A')}") + print() + + print("✅ 记忆恢复完成") diff --git a/skills/mem0-integration/session_init.py b/skills/mem0-integration/session_init.py new file mode 100644 index 0000000..6d82d8f --- /dev/null +++ b/skills/mem0-integration/session_init.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +""" +Session Initialization Hook - 冷启动记忆预加载 +在每次 /new 或新会话启动时自动检索最近活跃上下文 +""" + +import asyncio +import sys +import os +import logging + +sys.path.insert(0, os.path.dirname(__file__)) +os.environ['OPENAI_API_BASE'] = 'https://dashscope.aliyuncs.com/compatible-mode/v1' +os.environ['OPENAI_BASE_URL'] = 'https://dashscope.aliyuncs.com/compatible-mode/v1' +_dashscope_key = os.getenv('MEM0_DASHSCOPE_API_KEY', '') or os.getenv('DASHSCOPE_API_KEY', '') +if _dashscope_key: + os.environ['OPENAI_API_KEY'] = _dashscope_key + +from mem0_client import Mem0Client + +logger = logging.getLogger(__name__) + +# 冷启动检索查询列表 +COLD_START_QUERIES = [ + "current active tasks deployment progress", + "pending work items todos", + "recent important decisions configurations", +] + +async def cold_start_retrieval(agent_id: str = "main", user_id: str = "default", top_k: int = 3) -> str: + """ + 冷启动记忆检索 - 在会话初始化时调用 + 返回格式化的记忆片段,注入到 System Prompt + """ + client = Mem0Client() + await client.start() + + all_memories = [] + + for query in COLD_START_QUERIES: + try: + memories = await client.pre_hook_search( + query=query, + user_id=user_id, + agent_id=agent_id, + top_k=top_k + ) + all_memories.extend(memories) + except Exception as e: + logger.debug(f"检索失败 {query}: {e}") + + # 去重(按 memory 内容) + seen = set() + unique_memories = [] + for m in all_memories: + mem_text = m.get('memory', '') + if mem_text and mem_text not in seen: + seen.add(mem_text) + unique_memories.append(m) + + # 按分数排序,取前 top_k 条 + unique_memories.sort(key=lambda x: x.get('score', 0), reverse=True) + unique_memories = unique_memories[:top_k] + + await client.shutdown() + + # 格式化为 Prompt 片段 + if not unique_memories: + return "" + + prompt = "\n\n=== 最近活跃上下文(自动加载) ===\n" + for i, mem in enumerate(unique_memories, 1): + mem_text = mem.get('memory', '') + metadata = mem.get('metadata', {}) + agent = metadata.get('agent_id', 'unknown') + prompt += f"{i}. [{agent}] {mem_text}\n" + prompt += "=================================\n" + + logger.info(f"冷启动检索完成:{len(unique_memories)} 条记忆") + return prompt + + +async def main(): + """测试冷启动检索""" + result = await cold_start_retrieval( + agent_id="main", + user_id="wang 院长", + top_k=5 + ) + if result: + print("✅ 冷启动检索成功:") + print(result) + else: + print("⚠️ 无记忆记录(Qdrant 为空)") + + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/systemd/openclaw-agent-monitor.service b/systemd/openclaw-agent-monitor.service index aa3fd32..45773af 100644 --- a/systemd/openclaw-agent-monitor.service +++ b/systemd/openclaw-agent-monitor.service @@ -12,26 +12,22 @@ Environment=NODE_ENV=production Environment=HOME=/root Environment=XDG_RUNTIME_DIR=/run/user/0 Environment=DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/0/bus +EnvironmentFile=-/root/.openclaw/workspace/systemd/gateway.env -# Monitor process ExecStart=/usr/bin/node /root/.openclaw/workspace/agent-monitor.js -# Auto-healing configuration Restart=always RestartSec=5 StartLimitInterval=300 StartLimitBurst=10 -# Resource limits -MemoryLimit=512M +MemoryMax=512M CPUQuota=20% -# Logging StandardOutput=journal StandardError=journal SyslogIdentifier=openclaw-monitor -# Security NoNewPrivileges=true ProtectSystem=strict ProtectHome=read-only diff --git a/systemd/openclaw-gateway-user.service b/systemd/openclaw-gateway-user.service index 429700a..c8cf039 100644 --- a/systemd/openclaw-gateway-user.service +++ b/systemd/openclaw-gateway-user.service @@ -29,22 +29,15 @@ Environment=OPENCLAW_SERVICE_MARKER=openclaw Environment=OPENCLAW_SERVICE_KIND=gateway Environment=OPENCLAW_SERVICE_VERSION=2026.2.19-2 -# Resource limits -MemoryLimit=2G -CPUQuota=80% +EnvironmentFile=-/root/.openclaw/workspace/systemd/gateway.env -# Security -NoNewPrivileges=true -ProtectSystem=strict -ProtectHome=read-only -ReadWritePaths=/root/.openclaw +MemoryMax=2G +CPUQuota=80% -# Logging StandardOutput=journal StandardError=journal SyslogIdentifier=openclaw-gateway -# Watchdog WatchdogSec=30 [Install] diff --git a/systemd/openclaw-gateway.service b/systemd/openclaw-gateway.service.legacy similarity index 78% rename from systemd/openclaw-gateway.service rename to systemd/openclaw-gateway.service.legacy index a312a68..b023150 100644 --- a/systemd/openclaw-gateway.service +++ b/systemd/openclaw-gateway.service.legacy @@ -9,6 +9,9 @@ Type=simple User=root WorkingDirectory=/root/.openclaw Environment=NODE_ENV=production +Environment=MEM0_DASHSCOPE_API_KEY=sk-4111c9dba5334510968f9ae72728944e +Environment=OPENAI_API_BASE=https://dashscope.aliyuncs.com/compatible-mode/v1 +Environment=OPENAI_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 # Main gateway process ExecStart=/usr/bin/node /www/server/nodejs/v24.13.1/bin/openclaw gateway start