You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

234 lines
8.2 KiB

#!/usr/bin/env python3
"""
Memory cleanup and audit script.
Stats mode (default / --dry-run):
python3 memory_cleanup.py --dry-run
Cleanup mode (requires --execute):
python3 memory_cleanup.py --execute --max-age-days 90
Retention policy (aligned with EXPIRATION_MAP in mem0_client.py):
session -> 7 days (written with expiration_date, but Qdrant does NOT auto-delete)
chat_summary -> 30 days
preference -> never auto-delete
knowledge -> never auto-delete
The --max-age-days flag is a hard ceiling: any session or chat_summary older
than that threshold is removed regardless of its expiration_date.
"""
import os
import sys
import argparse
import logging
import yaml
from pathlib import Path
from datetime import datetime, timedelta, timezone
_dashscope_key = os.getenv('MEM0_DASHSCOPE_API_KEY', '') or os.getenv('DASHSCOPE_API_KEY', '')
if _dashscope_key:
os.environ['OPENAI_API_KEY'] = _dashscope_key
os.environ.setdefault('OPENAI_API_BASE', 'https://dashscope.aliyuncs.com/compatible-mode/v1')
os.environ.setdefault('OPENAI_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1')
try:
from qdrant_client import QdrantClient
from qdrant_client.models import (
Filter, FieldCondition, MatchValue, FilterSelector,
)
except ImportError:
print("qdrant-client not installed")
sys.exit(1)
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
logger = logging.getLogger(__name__)
QDRANT_HOST = os.getenv('MEM0_QDRANT_HOST', 'localhost')
QDRANT_PORT = int(os.getenv('MEM0_QDRANT_PORT', '6333'))
COLLECTION = 'mem0_v4_shared'
RETENTION_DAYS = {
'session': 7,
'chat_summary': 30,
}
AUDIT_LOG_DIR = Path('/root/.openclaw/workspace/logs/security')
def _load_agent_ids():
try:
agents_yaml = Path('/root/.openclaw/workspace/agents.yaml')
with open(agents_yaml, 'r', encoding='utf-8') as f:
data = yaml.safe_load(f) or {}
return list(data.get('agents', {}).keys()) + ['general']
except Exception:
return ['main', 'general']
def get_stats(client: QdrantClient):
"""Print memory counts by agent_id, visibility, and memory_type."""
total = client.count(collection_name=COLLECTION)
logger.info(f"Collection '{COLLECTION}' total: {total.count}")
agent_ids = _load_agent_ids()
for field, values in [
('agent_id', agent_ids),
('visibility', ['public', 'project', 'private']),
('memory_type', ['session', 'chat_summary', 'preference', 'knowledge']),
]:
logger.info(f"\n--- {field} ---")
for val in values:
try:
result = client.count(
collection_name=COLLECTION,
count_filter=Filter(must=[
FieldCondition(key=field, match=MatchValue(value=val))
])
)
if result.count > 0:
logger.info(f" {field}={val}: {result.count}")
except Exception:
pass
return total.count
def _find_expired_points(client: QdrantClient, memory_type: str, max_age_days: int):
"""Scroll through points of a given memory_type and return IDs older than max_age_days."""
cutoff = datetime.now(timezone.utc) - timedelta(days=max_age_days)
cutoff_iso = cutoff.isoformat()
expired_ids = []
offset = None
while True:
results, next_offset = client.scroll(
collection_name=COLLECTION,
scroll_filter=Filter(must=[
FieldCondition(key="memory_type", match=MatchValue(value=memory_type))
]),
limit=500,
offset=offset,
with_payload=True,
with_vectors=False,
)
if not results:
break
for point in results:
payload = point.payload or {}
ts = payload.get('timestamp') or payload.get('created_at', '')
if not ts:
continue
try:
if ts < cutoff_iso:
expired_ids.append(point.id)
except (TypeError, ValueError):
continue
if next_offset is None:
break
offset = next_offset
return expired_ids
def cleanup_expired(client: QdrantClient, max_age_days: int, execute: bool):
"""Identify and optionally delete expired session/chat_summary memories."""
total_deleted = 0
results_summary = []
for memory_type, default_days in RETENTION_DAYS.items():
effective_days = min(max_age_days, default_days * 4) if max_age_days else default_days
effective_days = max_age_days
logger.info(f"\nScanning memory_type={memory_type} (cutoff: {effective_days} days)...")
expired_ids = _find_expired_points(client, memory_type, effective_days)
if not expired_ids:
logger.info(f" No expired {memory_type} memories found.")
results_summary.append((memory_type, 0))
continue
logger.info(f" Found {len(expired_ids)} expired {memory_type} memories")
if execute:
batch_size = 100
for i in range(0, len(expired_ids), batch_size):
batch = expired_ids[i:i + batch_size]
client.delete(
collection_name=COLLECTION,
points_selector=batch,
)
logger.info(f" DELETED {len(expired_ids)} {memory_type} memories")
total_deleted += len(expired_ids)
else:
logger.info(f" [dry-run] Would delete {len(expired_ids)} {memory_type} memories")
results_summary.append((memory_type, len(expired_ids)))
return total_deleted, results_summary
def _write_audit_log(total_before, total_deleted, results_summary, max_age_days, execute):
AUDIT_LOG_DIR.mkdir(parents=True, exist_ok=True)
log_file = AUDIT_LOG_DIR / f"memory-cleanup-{datetime.now().strftime('%Y-%m-%d')}.log"
with open(log_file, 'a', encoding='utf-8') as f:
f.write(f"\n{'='*60}\n")
f.write(f"Memory Cleanup - {datetime.now().isoformat()}\n")
f.write(f"Mode: {'EXECUTE' if execute else 'DRY-RUN'}\n")
f.write(f"Max age: {max_age_days} days\n")
f.write(f"Total before: {total_before}\n")
for mtype, count in results_summary:
f.write(f" {mtype}: {count} expired\n")
f.write(f"Total deleted: {total_deleted}\n")
f.write(f"Total after: {total_before - total_deleted}\n")
logger.info(f"Audit log: {log_file}")
def main():
parser = argparse.ArgumentParser(description='Mem0 memory cleanup and audit')
parser.add_argument('--dry-run', action='store_true',
help='Show stats and expired counts without deleting (default behavior)')
parser.add_argument('--execute', action='store_true',
help='Actually delete expired memories (requires this flag)')
parser.add_argument('--max-age-days', type=int, default=90,
help='Delete session/chat_summary older than N days (default: 90)')
args = parser.parse_args()
if args.execute and args.dry_run:
logger.error("Cannot use --execute and --dry-run together")
sys.exit(1)
execute = args.execute
if not execute:
args.dry_run = True
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
logger.info("=" * 60)
logger.info(f"Memory Cleanup - {datetime.now().strftime('%Y-%m-%d %H:%M')}")
logger.info(f"Mode: {'EXECUTE' if execute else 'DRY-RUN (use --execute to delete)'}")
logger.info(f"Max age: {args.max_age_days} days")
logger.info(f"Retention: session={RETENTION_DAYS['session']}d, "
f"chat_summary={RETENTION_DAYS['chat_summary']}d, "
f"preference=permanent, knowledge=permanent")
logger.info("=" * 60)
total_before = get_stats(client)
total_deleted, results_summary = cleanup_expired(
client, args.max_age_days, execute
)
if execute and total_deleted > 0:
logger.info(f"\nPost-cleanup stats:")
get_stats(client)
_write_audit_log(total_before, total_deleted, results_summary, args.max_age_days, execute)
logger.info(f"\nSummary: {total_deleted} memories "
f"{'deleted' if execute else 'would be deleted (dry-run)'}.")
if __name__ == '__main__':
main()