|
|
|
|
#!/bin/bash
|
|
|
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
|
# OpenClaw System Deployment & Management Script
|
|
|
|
|
#
|
|
|
|
|
# Config-driven: reads agent list from agents.yaml via parse_agents.py
|
|
|
|
|
# No hardcoded agent references -- add/remove agents by editing agents.yaml.
|
|
|
|
|
#
|
|
|
|
|
# Usage:
|
|
|
|
|
# ./deploy.sh install - Install and start all services
|
|
|
|
|
# ./deploy.sh start - Start all services
|
|
|
|
|
# ./deploy.sh stop - Stop all services
|
|
|
|
|
# ./deploy.sh restart - Restart all services
|
|
|
|
|
# ./deploy.sh status - Show service status
|
|
|
|
|
# ./deploy.sh logs - Show recent logs
|
|
|
|
|
# ./deploy.sh health - Run health check
|
|
|
|
|
# ./deploy.sh rollback - Rollback to previous git commit
|
|
|
|
|
# ./deploy.sh backup - Full backup (workspace + Qdrant snapshot + agent profiles)
|
|
|
|
|
# ./deploy.sh backup quick - Quick backup (workspace files only, no Qdrant)
|
|
|
|
|
# ./deploy.sh restore <dir> - Restore workspace + config from backup directory
|
|
|
|
|
# ./deploy.sh restore-qdrant <file> - Restore Qdrant collection from snapshot file
|
|
|
|
|
# ./deploy.sh debug-stop - Stop ALL services (including monitor) for debugging
|
|
|
|
|
# ./deploy.sh debug-start - Start ALL services after debugging
|
|
|
|
|
# ./deploy.sh fix-service - Re-inject EnvironmentFile after OpenClaw UI upgrade
|
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
|
|
set -e
|
|
|
|
|
|
|
|
|
|
WORKSPACE="/root/.openclaw/workspace"
|
|
|
|
|
LOG_DIR="/root/.openclaw/workspace/logs/system"
|
|
|
|
|
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
|
|
|
|
PARSE_AGENTS="python3 $WORKSPACE/scripts/parse_agents.py"
|
|
|
|
|
|
|
|
|
|
RED='\033[0;31m'
|
|
|
|
|
GREEN='\033[0;32m'
|
|
|
|
|
YELLOW='\033[1;33m'
|
|
|
|
|
BLUE='\033[0;34m'
|
|
|
|
|
NC='\033[0m'
|
|
|
|
|
|
|
|
|
|
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
|
|
|
|
log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
|
|
|
|
|
log_warning() { echo -e "${YELLOW}[WARNING]${NC} $1"; }
|
|
|
|
|
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
|
|
|
|
|
|
|
|
|
|
ensure_log_dir() { mkdir -p "$LOG_DIR"; }
|
|
|
|
|
|
|
|
|
|
setup_user_env() {
|
|
|
|
|
export XDG_RUNTIME_DIR=/run/user/$(id -u)
|
|
|
|
|
export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/$(id -u)/bus"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Iterate over agents from agents.yaml and perform an action per type.
|
|
|
|
|
# Usage: for_each_agent <start|stop|restart|enable|disable|status>
|
|
|
|
|
for_each_agent() {
|
|
|
|
|
local action="$1"
|
|
|
|
|
setup_user_env
|
|
|
|
|
|
|
|
|
|
while IFS=$'\t' read -r aid atype f3 f4 f5; do
|
|
|
|
|
case "$atype" in
|
|
|
|
|
local-cli)
|
|
|
|
|
local check_cmd="$f3" start_cmd="$f4"
|
|
|
|
|
case "$action" in
|
|
|
|
|
start) eval "$start_cmd" 2>/dev/null && log_info "Started $aid" || log_warning "$aid start failed" ;;
|
|
|
|
|
stop) eval "${start_cmd/start/stop}" 2>/dev/null || true; log_info "Stopped $aid" ;;
|
|
|
|
|
restart) eval "${start_cmd/start/stop}" 2>/dev/null || true; sleep 1; eval "$start_cmd" 2>/dev/null && log_info "Restarted $aid" || log_warning "$aid restart failed" ;;
|
|
|
|
|
status) echo ""; log_info "=== $aid (local-cli) ==="; eval "$check_cmd" || true ;;
|
|
|
|
|
logs) log_info "=== $aid logs ==="; journalctl --user -u openclaw-gateway --no-pager -n 50 2>/dev/null || true ;;
|
|
|
|
|
esac
|
|
|
|
|
;;
|
|
|
|
|
local-systemd)
|
|
|
|
|
local unit="$f3"
|
|
|
|
|
case "$action" in
|
|
|
|
|
start) systemctl --user start "$unit" 2>/dev/null && log_info "Started $aid ($unit)" || log_warning "$aid start failed" ;;
|
|
|
|
|
stop) systemctl --user stop "$unit" 2>/dev/null || true; log_info "Stopped $aid" ;;
|
|
|
|
|
restart) systemctl --user restart "$unit" 2>/dev/null && log_info "Restarted $aid ($unit)" || log_warning "$aid restart failed" ;;
|
|
|
|
|
enable) systemctl --user enable "$unit" 2>/dev/null ;;
|
|
|
|
|
disable) systemctl --user disable "$unit" 2>/dev/null ;;
|
|
|
|
|
status) echo ""; log_info "=== $aid (systemd: $unit) ==="; systemctl --user status "$unit" --no-pager -l 2>&1 || true ;;
|
|
|
|
|
logs) log_info "=== $aid logs ==="; journalctl --user -u "$unit" --no-pager -n 50 2>/dev/null || true ;;
|
|
|
|
|
esac
|
|
|
|
|
;;
|
|
|
|
|
remote-http)
|
|
|
|
|
case "$action" in
|
|
|
|
|
status) log_info "=== $aid (remote) ==="; echo " Remote agent -- check via health URL" ;;
|
|
|
|
|
*) log_info "$aid is remote; skipping $action" ;;
|
|
|
|
|
esac
|
|
|
|
|
;;
|
|
|
|
|
esac
|
|
|
|
|
done < <($PARSE_AGENTS services)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
install_services() {
|
|
|
|
|
log_info "Installing OpenClaw systemd services..."
|
|
|
|
|
|
|
|
|
|
loginctl enable-linger $(whoami)
|
|
|
|
|
setup_user_env
|
|
|
|
|
|
|
|
|
|
if [ ! -d "$XDG_RUNTIME_DIR" ]; then
|
|
|
|
|
log_warning "Creating runtime directory..."
|
|
|
|
|
mkdir -p "$XDG_RUNTIME_DIR"
|
|
|
|
|
chmod 700 "$XDG_RUNTIME_DIR"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
mkdir -p ~/.config/systemd/user/
|
|
|
|
|
|
|
|
|
|
# Install main gateway service
|
|
|
|
|
cp "$WORKSPACE/systemd/openclaw-gateway-user.service" ~/.config/systemd/user/openclaw-gateway.service
|
|
|
|
|
|
|
|
|
|
# Install any local-systemd agents from agents.yaml
|
|
|
|
|
while IFS=$'\t' read -r aid atype f3 f4 f5; do
|
|
|
|
|
if [ "$atype" = "local-systemd" ]; then
|
|
|
|
|
local unit="$f3"
|
|
|
|
|
local svc_template="$WORKSPACE/systemd/$unit"
|
|
|
|
|
if [ -f "$svc_template" ]; then
|
|
|
|
|
cp "$svc_template" "$HOME/.config/systemd/user/$unit"
|
|
|
|
|
systemctl --user enable "$unit" 2>/dev/null
|
|
|
|
|
log_info "Installed $unit"
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
done < <($PARSE_AGENTS services)
|
|
|
|
|
|
|
|
|
|
systemctl --user daemon-reload
|
|
|
|
|
systemctl --user enable openclaw-gateway
|
|
|
|
|
|
|
|
|
|
# Install system-level agent monitor
|
|
|
|
|
log_info "Installing system-level agent monitor..."
|
|
|
|
|
cp "$WORKSPACE/systemd/openclaw-agent-monitor.service" /etc/systemd/system/
|
|
|
|
|
systemctl daemon-reload
|
|
|
|
|
systemctl enable openclaw-agent-monitor
|
|
|
|
|
|
|
|
|
|
fix_service_files
|
|
|
|
|
|
|
|
|
|
log_info "Starting services..."
|
|
|
|
|
for_each_agent start
|
|
|
|
|
systemctl start openclaw-agent-monitor
|
|
|
|
|
|
|
|
|
|
sleep 3
|
|
|
|
|
|
|
|
|
|
log_success "OpenClaw services installed and started!"
|
|
|
|
|
local agent_names=$($PARSE_AGENTS ids)
|
|
|
|
|
log_info "Active agents: $agent_names"
|
|
|
|
|
log_info "Gateway logs: journalctl --user -u openclaw-gateway -f"
|
|
|
|
|
log_info "Monitor logs: journalctl -u openclaw-agent-monitor -f"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
start_services() {
|
|
|
|
|
log_info "Starting OpenClaw services..."
|
|
|
|
|
for_each_agent start
|
|
|
|
|
systemctl start openclaw-agent-monitor
|
|
|
|
|
log_success "All services started"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
stop_services() {
|
|
|
|
|
log_info "Stopping OpenClaw services..."
|
|
|
|
|
for_each_agent stop
|
|
|
|
|
systemctl stop openclaw-agent-monitor
|
|
|
|
|
log_success "All services stopped"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
restart_services() {
|
|
|
|
|
log_info "Restarting OpenClaw services..."
|
|
|
|
|
for_each_agent restart
|
|
|
|
|
systemctl restart openclaw-agent-monitor
|
|
|
|
|
log_success "All services restarted"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
debug_stop() {
|
|
|
|
|
log_warning "=== DEBUG MODE: Stopping ALL services ==="
|
|
|
|
|
log_warning "Monitor will NOT auto-restart gateway while in debug mode."
|
|
|
|
|
log_warning "Run './deploy.sh debug-start' when done debugging."
|
|
|
|
|
|
|
|
|
|
systemctl stop openclaw-agent-monitor 2>/dev/null || true
|
|
|
|
|
for_each_agent stop
|
|
|
|
|
|
|
|
|
|
log_success "All services stopped. Safe to debug."
|
|
|
|
|
echo ""
|
|
|
|
|
log_info "Useful debug commands:"
|
|
|
|
|
log_info " openclaw gateway start # start gateway in foreground"
|
|
|
|
|
log_info " journalctl --user -u openclaw-gateway -n 100"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
debug_start() {
|
|
|
|
|
log_info "=== Exiting DEBUG MODE: Restarting ALL services ==="
|
|
|
|
|
for_each_agent start
|
|
|
|
|
systemctl start openclaw-agent-monitor
|
|
|
|
|
|
|
|
|
|
sleep 2
|
|
|
|
|
log_success "All services restored. Monitor is active again."
|
|
|
|
|
health_check
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fix_service_files() {
|
|
|
|
|
log_info "Ensuring EnvironmentFile= is present in installed service files..."
|
|
|
|
|
setup_user_env
|
|
|
|
|
|
|
|
|
|
local changed=0
|
|
|
|
|
|
|
|
|
|
while IFS=$'\t' read -r aid atype f3 f4 f5; do
|
|
|
|
|
eval $($PARSE_AGENTS info "$aid" 2>/dev/null | grep -E '^(ENV_FILE|AGENT_TYPE)=')
|
|
|
|
|
if [ -z "$ENV_FILE" ]; then continue; fi
|
|
|
|
|
|
|
|
|
|
local env_path="$WORKSPACE/systemd/$ENV_FILE"
|
|
|
|
|
local svc_file=""
|
|
|
|
|
|
|
|
|
|
if [ "$AGENT_TYPE" = "local-cli" ]; then
|
|
|
|
|
svc_file="$HOME/.config/systemd/user/openclaw-gateway.service"
|
|
|
|
|
elif [ "$AGENT_TYPE" = "local-systemd" ]; then
|
|
|
|
|
svc_file="$HOME/.config/systemd/user/$f3"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
if [ -n "$svc_file" ] && [ -f "$svc_file" ] && [ -f "$env_path" ]; then
|
|
|
|
|
if ! grep -q "EnvironmentFile=.*${ENV_FILE}" "$svc_file" 2>/dev/null; then
|
|
|
|
|
sed -i "/^\[Service\]/a EnvironmentFile=-${env_path}" "$svc_file"
|
|
|
|
|
log_info "Injected EnvironmentFile into $(basename $svc_file)"
|
|
|
|
|
changed=1
|
|
|
|
|
else
|
|
|
|
|
log_info "$(basename $svc_file) already has EnvironmentFile"
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
done < <($PARSE_AGENTS services)
|
|
|
|
|
|
|
|
|
|
if [ $changed -eq 1 ]; then
|
|
|
|
|
systemctl --user daemon-reload
|
|
|
|
|
log_success "Service files updated. Run './deploy.sh restart' to apply."
|
|
|
|
|
else
|
|
|
|
|
log_success "All service files are up to date."
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
show_status() {
|
|
|
|
|
for_each_agent status
|
|
|
|
|
echo ""
|
|
|
|
|
log_info "=== Agent Monitor (System Service) ==="
|
|
|
|
|
systemctl status openclaw-agent-monitor --no-pager -l 2>&1 || true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
show_logs() {
|
|
|
|
|
setup_user_env
|
|
|
|
|
for_each_agent logs
|
|
|
|
|
echo ""
|
|
|
|
|
log_info "=== Monitor logs (last 50 lines) ==="
|
|
|
|
|
journalctl -u openclaw-agent-monitor --no-pager -n 50
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rollback() {
|
|
|
|
|
log_warning "This will rollback the workspace to the previous git commit!"
|
|
|
|
|
read -p "Are you sure? (y/N): " confirm
|
|
|
|
|
|
|
|
|
|
if [[ $confirm =~ ^[Yy]$ ]]; then
|
|
|
|
|
cd "$WORKSPACE"
|
|
|
|
|
backup
|
|
|
|
|
log_info "Current commit:"
|
|
|
|
|
git log -1 --oneline
|
|
|
|
|
git reset --hard HEAD~1
|
|
|
|
|
log_success "Rolled back to previous commit!"
|
|
|
|
|
log_info "Restarting services to apply changes..."
|
|
|
|
|
restart_services
|
|
|
|
|
else
|
|
|
|
|
log_info "Rollback cancelled."
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
rollback_to() {
|
|
|
|
|
if [ -z "$1" ]; then
|
|
|
|
|
log_error "Please specify a commit hash or tag"
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
log_warning "This will rollback the workspace to commit: $1"
|
|
|
|
|
read -p "Are you sure? (y/N): " confirm
|
|
|
|
|
|
|
|
|
|
if [[ $confirm =~ ^[Yy]$ ]]; then
|
|
|
|
|
cd "$WORKSPACE"
|
|
|
|
|
backup
|
|
|
|
|
git reset --hard "$1"
|
|
|
|
|
log_success "Rolled back to commit: $1"
|
|
|
|
|
restart_services
|
|
|
|
|
else
|
|
|
|
|
log_info "Rollback cancelled."
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
backup() {
|
|
|
|
|
local mode="${1:-full}"
|
|
|
|
|
local backup_dir="/root/.openclaw/backups/$TIMESTAMP"
|
|
|
|
|
mkdir -p "$backup_dir"
|
|
|
|
|
|
|
|
|
|
log_info "Creating $mode backup -> $backup_dir"
|
|
|
|
|
|
|
|
|
|
# --- Layer 1+2: workspace files ---
|
|
|
|
|
log_info "Backing up workspace (Layer 1+2)..."
|
|
|
|
|
tar -czf "$backup_dir/workspace.tar.gz" \
|
|
|
|
|
--exclude='.git' \
|
|
|
|
|
--exclude='logs' \
|
|
|
|
|
-C /root/.openclaw workspace
|
|
|
|
|
|
|
|
|
|
# --- Config: all agent openclaw.json profiles ---
|
|
|
|
|
log_info "Backing up agent profiles..."
|
|
|
|
|
for d in /root/.openclaw/openclaw.json /root/.openclaw-*/openclaw.json; do
|
|
|
|
|
[ -f "$d" ] && cp "$d" "$backup_dir/$(echo "$d" | sed 's|/root/||;s|/|__|g')" 2>/dev/null || true
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
# --- Config: docker-compose ---
|
|
|
|
|
cp /opt/mem0-center/docker-compose.yml "$backup_dir/" 2>/dev/null || true
|
|
|
|
|
|
|
|
|
|
if [ "$mode" = "full" ]; then
|
|
|
|
|
# --- Layer 4: Qdrant snapshot ---
|
|
|
|
|
log_info "Creating Qdrant snapshot (mem0_v4_shared)..."
|
|
|
|
|
local snap_response
|
|
|
|
|
snap_response=$(curl -sf -X POST "http://localhost:6333/collections/mem0_v4_shared/snapshots" 2>/dev/null)
|
|
|
|
|
if [ $? -eq 0 ] && [ -n "$snap_response" ]; then
|
|
|
|
|
local snap_name
|
|
|
|
|
snap_name=$(echo "$snap_response" | python3 -c "import sys,json; print(json.load(sys.stdin).get('result',{}).get('name',''))" 2>/dev/null)
|
|
|
|
|
if [ -n "$snap_name" ]; then
|
|
|
|
|
local snap_src="/opt/mem0-center/snapshots/mem0_v4_shared/$snap_name"
|
|
|
|
|
if [ -f "$snap_src" ]; then
|
|
|
|
|
cp "$snap_src" "$backup_dir/qdrant-mem0_v4_shared.snapshot"
|
|
|
|
|
log_success "Qdrant snapshot saved: $snap_name"
|
|
|
|
|
else
|
|
|
|
|
log_warning "Snapshot file not found at $snap_src"
|
|
|
|
|
fi
|
|
|
|
|
else
|
|
|
|
|
log_warning "Could not parse snapshot name from response"
|
|
|
|
|
fi
|
|
|
|
|
else
|
|
|
|
|
log_warning "Qdrant snapshot failed (is Qdrant running?)"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# --- Layer 4: pre-backup memory count ---
|
|
|
|
|
local mem_count
|
|
|
|
|
mem_count=$(curl -sf "http://localhost:6333/collections/mem0_v4_shared" 2>/dev/null | \
|
|
|
|
|
python3 -c "import sys,json; print(json.load(sys.stdin).get('result',{}).get('points_count',0))" 2>/dev/null || echo "unknown")
|
|
|
|
|
echo "$mem_count" > "$backup_dir/qdrant-point-count.txt"
|
|
|
|
|
log_info "Qdrant point count: $mem_count"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# --- Manifest ---
|
|
|
|
|
cat > "$backup_dir/manifest.txt" <<EOF
|
|
|
|
|
OpenClaw Backup - $TIMESTAMP
|
|
|
|
|
Mode: $mode
|
|
|
|
|
Date: $(date -Iseconds)
|
|
|
|
|
Agents: $($PARSE_AGENTS ids 2>/dev/null || echo "unknown")
|
|
|
|
|
Contents:
|
|
|
|
|
workspace.tar.gz - Layer 1+2 workspace files
|
|
|
|
|
.openclaw__openclaw.json - main agent profile
|
|
|
|
|
docker-compose.yml - Qdrant docker config
|
|
|
|
|
EOF
|
|
|
|
|
[ "$mode" = "full" ] && echo " qdrant-mem0_v4_shared.snapshot - Layer 4 vector data" >> "$backup_dir/manifest.txt"
|
|
|
|
|
|
|
|
|
|
log_success "Backup complete: $backup_dir"
|
|
|
|
|
|
|
|
|
|
# --- Retention: keep last 10 backups ---
|
|
|
|
|
local parent="/root/.openclaw/backups"
|
|
|
|
|
local count=$(ls -1d "$parent"/[0-9]* 2>/dev/null | wc -l)
|
|
|
|
|
if [ "$count" -gt 10 ]; then
|
|
|
|
|
local to_remove=$((count - 10))
|
|
|
|
|
ls -1d "$parent"/[0-9]* 2>/dev/null | head -n "$to_remove" | while read -r old; do
|
|
|
|
|
rm -rf "$old"
|
|
|
|
|
log_info "Pruned old backup: $(basename "$old")"
|
|
|
|
|
done
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
restore_workspace() {
|
|
|
|
|
local restore_dir="$1"
|
|
|
|
|
if [ -z "$restore_dir" ] || [ ! -d "$restore_dir" ]; then
|
|
|
|
|
log_error "Usage: $0 restore <backup-directory>"
|
|
|
|
|
log_info "Available backups:"
|
|
|
|
|
ls -1d /root/.openclaw/backups/[0-9]* 2>/dev/null | while read -r d; do
|
|
|
|
|
echo " $d"
|
|
|
|
|
done
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
log_warning "This will restore workspace from: $restore_dir"
|
|
|
|
|
log_warning "Current workspace will be overwritten!"
|
|
|
|
|
read -p "Are you sure? (y/N): " confirm
|
|
|
|
|
if [[ ! $confirm =~ ^[Yy]$ ]]; then
|
|
|
|
|
log_info "Restore cancelled."
|
|
|
|
|
return
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Pre-restore backup
|
|
|
|
|
log_info "Creating pre-restore backup..."
|
|
|
|
|
backup quick
|
|
|
|
|
|
|
|
|
|
if [ -f "$restore_dir/workspace.tar.gz" ]; then
|
|
|
|
|
log_info "Restoring workspace files..."
|
|
|
|
|
tar -xzf "$restore_dir/workspace.tar.gz" -C /root/.openclaw/
|
|
|
|
|
log_success "Workspace restored"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Restore agent profiles
|
|
|
|
|
for f in "$restore_dir"/.openclaw__openclaw.json "$restore_dir"/.openclaw-*__openclaw.json; do
|
|
|
|
|
[ -f "$f" ] || continue
|
|
|
|
|
local target="/root/$(basename "$f" | sed 's|__|/|g')"
|
|
|
|
|
local target_dir="$(dirname "$target")"
|
|
|
|
|
mkdir -p "$target_dir"
|
|
|
|
|
cp "$f" "$target"
|
|
|
|
|
log_info "Restored: $target"
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
log_success "Restore complete. Run './deploy.sh restart' to apply."
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
restore_qdrant() {
|
|
|
|
|
local snap_file="$1"
|
|
|
|
|
if [ -z "$snap_file" ]; then
|
|
|
|
|
log_error "Usage: $0 restore-qdrant <snapshot-file>"
|
|
|
|
|
log_info "Example: $0 restore-qdrant /root/.openclaw/backups/20260306-120000/qdrant-mem0_v4_shared.snapshot"
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
if [ ! -f "$snap_file" ]; then
|
|
|
|
|
log_error "Snapshot file not found: $snap_file"
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
log_warning "This will REPLACE collection mem0_v4_shared with snapshot data!"
|
|
|
|
|
log_warning "Snapshot: $snap_file"
|
|
|
|
|
read -p "Are you sure? (y/N): " confirm
|
|
|
|
|
if [[ ! $confirm =~ ^[Yy]$ ]]; then
|
|
|
|
|
log_info "Restore cancelled."
|
|
|
|
|
return
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Copy snapshot into Qdrant snapshots directory
|
|
|
|
|
local qdrant_snap_dir="/opt/mem0-center/snapshots/mem0_v4_shared"
|
|
|
|
|
mkdir -p "$qdrant_snap_dir"
|
|
|
|
|
local snap_name="$(basename "$snap_file")"
|
|
|
|
|
cp "$snap_file" "$qdrant_snap_dir/$snap_name"
|
|
|
|
|
|
|
|
|
|
log_info "Recovering Qdrant snapshot..."
|
|
|
|
|
local result
|
|
|
|
|
result=$(curl -sf -X PUT "http://localhost:6333/collections/mem0_v4_shared/snapshots/recover" \
|
|
|
|
|
-H "Content-Type: application/json" \
|
|
|
|
|
-d "{\"location\":\"/qdrant/snapshots/mem0_v4_shared/$snap_name\"}" 2>&1)
|
|
|
|
|
if [ $? -eq 0 ]; then
|
|
|
|
|
log_success "Qdrant snapshot recovered: $snap_name"
|
|
|
|
|
local count
|
|
|
|
|
count=$(curl -sf "http://localhost:6333/collections/mem0_v4_shared" 2>/dev/null | \
|
|
|
|
|
python3 -c "import sys,json; print(json.load(sys.stdin).get('result',{}).get('points_count',0))" 2>/dev/null || echo "unknown")
|
|
|
|
|
log_info "Collection point count after restore: $count"
|
|
|
|
|
else
|
|
|
|
|
log_error "Qdrant snapshot recovery failed: $result"
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
health_check() {
|
|
|
|
|
log_info "Running health check..."
|
|
|
|
|
setup_user_env
|
|
|
|
|
|
|
|
|
|
local issues=0
|
|
|
|
|
|
|
|
|
|
while IFS=$'\t' read -r aid atype f3 f4 f5; do
|
|
|
|
|
case "$atype" in
|
|
|
|
|
local-cli)
|
|
|
|
|
local check_cmd="$f3" check_pattern="$f5"
|
|
|
|
|
local output
|
|
|
|
|
output=$(eval "$check_cmd" 2>&1)
|
|
|
|
|
if echo "$output" | grep -qE "$check_pattern"; then
|
|
|
|
|
log_success "✓ $aid is running"
|
|
|
|
|
else
|
|
|
|
|
log_error "✗ $aid is not running"
|
|
|
|
|
((issues++)) || true
|
|
|
|
|
fi
|
|
|
|
|
;;
|
|
|
|
|
local-systemd)
|
|
|
|
|
local unit="$f3"
|
|
|
|
|
if systemctl --user is-active --quiet "$unit" 2>/dev/null; then
|
|
|
|
|
log_success "✓ $aid is running ($unit)"
|
|
|
|
|
else
|
|
|
|
|
log_error "✗ $aid is not running ($unit)"
|
|
|
|
|
((issues++)) || true
|
|
|
|
|
fi
|
|
|
|
|
;;
|
|
|
|
|
remote-http)
|
|
|
|
|
local health_url="$f3" timeout="$f4"
|
|
|
|
|
if curl -sf --max-time 5 "$health_url" >/dev/null 2>&1; then
|
|
|
|
|
log_success "✓ $aid is reachable"
|
|
|
|
|
else
|
|
|
|
|
log_warning "⚠ $aid is unreachable ($health_url)"
|
|
|
|
|
((issues++)) || true
|
|
|
|
|
fi
|
|
|
|
|
;;
|
|
|
|
|
esac
|
|
|
|
|
done < <($PARSE_AGENTS services)
|
|
|
|
|
|
|
|
|
|
if systemctl is-active --quiet openclaw-agent-monitor; then
|
|
|
|
|
log_success "✓ Agent Monitor is running"
|
|
|
|
|
else
|
|
|
|
|
log_error "✗ Agent Monitor is not running"
|
|
|
|
|
((issues++)) || true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
local disk_usage=$(df -h /root | tail -1 | awk '{print $5}' | sed 's/%//')
|
|
|
|
|
if [ "$disk_usage" -lt 80 ]; then
|
|
|
|
|
log_success "✓ Disk usage: ${disk_usage}%"
|
|
|
|
|
else
|
|
|
|
|
log_warning "⚠ Disk usage: ${disk_usage}%"
|
|
|
|
|
((issues++)) || true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
local mem_usage=$(free | grep Mem | awk '{printf("%.0f", $3/$2 * 100.0)}')
|
|
|
|
|
if [ "$mem_usage" -lt 80 ]; then
|
|
|
|
|
log_success "✓ Memory usage: ${mem_usage}%"
|
|
|
|
|
else
|
|
|
|
|
log_warning "⚠ Memory usage: ${mem_usage}%"
|
|
|
|
|
((issues++)) || true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
if [ -d "$XDG_RUNTIME_DIR" ]; then
|
|
|
|
|
log_success "✓ XDG_RUNTIME_DIR exists"
|
|
|
|
|
else
|
|
|
|
|
log_warning "⚠ XDG_RUNTIME_DIR not found"
|
|
|
|
|
((issues++)) || true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
if loginctl show-user $(whoami) -p Linger | grep -q "yes"; then
|
|
|
|
|
log_success "✓ User linger enabled"
|
|
|
|
|
else
|
|
|
|
|
log_warning "⚠ User linger NOT enabled"
|
|
|
|
|
((issues++)) || true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
echo ""
|
|
|
|
|
if [ $issues -eq 0 ]; then
|
|
|
|
|
log_success "All health checks passed!"
|
|
|
|
|
return 0
|
|
|
|
|
else
|
|
|
|
|
log_error "$issues health check(s) failed!"
|
|
|
|
|
return 1
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
show_help() {
|
|
|
|
|
echo "OpenClaw System Management Script (config-driven via agents.yaml)"
|
|
|
|
|
echo ""
|
|
|
|
|
echo "Usage: $0 <command>"
|
|
|
|
|
echo ""
|
|
|
|
|
echo "Commands:"
|
|
|
|
|
echo " install - Install and start all systemd services"
|
|
|
|
|
echo " start - Start all registered agent services + monitor"
|
|
|
|
|
echo " stop - Stop all services"
|
|
|
|
|
echo " restart - Restart all services"
|
|
|
|
|
echo " status - Show service status"
|
|
|
|
|
echo " logs - Show recent logs"
|
|
|
|
|
echo " health - Run health check"
|
|
|
|
|
echo " backup - Full backup (workspace + Qdrant snapshot + agent profiles)"
|
|
|
|
|
echo " backup quick - Quick backup (workspace files only, no Qdrant)"
|
|
|
|
|
echo " restore <dir> - Restore workspace + config from backup directory"
|
|
|
|
|
echo " restore-qdrant <file> - Restore Qdrant from snapshot file"
|
|
|
|
|
echo " rollback - Rollback to previous git commit"
|
|
|
|
|
echo " rollback-to - Rollback to specific commit"
|
|
|
|
|
echo " debug-stop - Stop ALL services including monitor (safe for debugging)"
|
|
|
|
|
echo " debug-start - Restart all services after debugging"
|
|
|
|
|
echo " fix-service - Re-inject EnvironmentFile after OpenClaw UI upgrade"
|
|
|
|
|
echo " help - Show this help message"
|
|
|
|
|
echo ""
|
|
|
|
|
echo "Registered agents:"
|
|
|
|
|
$PARSE_AGENTS list | while IFS=$'\t' read -r id type name; do
|
|
|
|
|
echo " $id ($type) - $name"
|
|
|
|
|
done
|
|
|
|
|
echo ""
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Main
|
|
|
|
|
case "${1:-help}" in
|
|
|
|
|
install) install_services ;;
|
|
|
|
|
start) start_services ;;
|
|
|
|
|
stop) stop_services ;;
|
|
|
|
|
restart) restart_services ;;
|
|
|
|
|
status) show_status ;;
|
|
|
|
|
logs) show_logs ;;
|
|
|
|
|
health) health_check ;;
|
|
|
|
|
backup) backup "$2" ;;
|
|
|
|
|
restore) restore_workspace "$2" ;;
|
|
|
|
|
restore-qdrant) restore_qdrant "$2" ;;
|
|
|
|
|
rollback) rollback ;;
|
|
|
|
|
rollback-to) rollback_to "$2" ;;
|
|
|
|
|
debug-stop) debug_stop ;;
|
|
|
|
|
debug-start) debug_start ;;
|
|
|
|
|
fix-service) fix_service_files ;;
|
|
|
|
|
help|--help|-h) show_help ;;
|
|
|
|
|
*)
|
|
|
|
|
log_error "Unknown command: $1"
|
|
|
|
|
show_help
|
|
|
|
|
exit 1
|
|
|
|
|
;;
|
|
|
|
|
esac
|