You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
234 lines
8.2 KiB
234 lines
8.2 KiB
#!/usr/bin/env python3 |
|
""" |
|
Memory cleanup and audit script. |
|
|
|
Stats mode (default / --dry-run): |
|
python3 memory_cleanup.py --dry-run |
|
|
|
Cleanup mode (requires --execute): |
|
python3 memory_cleanup.py --execute --max-age-days 90 |
|
|
|
Retention policy (aligned with EXPIRATION_MAP in mem0_client.py): |
|
session -> 7 days (written with expiration_date, but Qdrant does NOT auto-delete) |
|
chat_summary -> 30 days |
|
preference -> never auto-delete |
|
knowledge -> never auto-delete |
|
|
|
The --max-age-days flag is a hard ceiling: any session or chat_summary older |
|
than that threshold is removed regardless of its expiration_date. |
|
""" |
|
|
|
import os |
|
import sys |
|
import argparse |
|
import logging |
|
import yaml |
|
from pathlib import Path |
|
from datetime import datetime, timedelta, timezone |
|
|
|
_dashscope_key = os.getenv('MEM0_DASHSCOPE_API_KEY', '') or os.getenv('DASHSCOPE_API_KEY', '') |
|
if _dashscope_key: |
|
os.environ['OPENAI_API_KEY'] = _dashscope_key |
|
os.environ.setdefault('OPENAI_API_BASE', 'https://dashscope.aliyuncs.com/compatible-mode/v1') |
|
os.environ.setdefault('OPENAI_BASE_URL', 'https://dashscope.aliyuncs.com/compatible-mode/v1') |
|
|
|
try: |
|
from qdrant_client import QdrantClient |
|
from qdrant_client.models import ( |
|
Filter, FieldCondition, MatchValue, FilterSelector, |
|
) |
|
except ImportError: |
|
print("qdrant-client not installed") |
|
sys.exit(1) |
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') |
|
logger = logging.getLogger(__name__) |
|
|
|
QDRANT_HOST = os.getenv('MEM0_QDRANT_HOST', 'localhost') |
|
QDRANT_PORT = int(os.getenv('MEM0_QDRANT_PORT', '6333')) |
|
COLLECTION = 'mem0_v4_shared' |
|
|
|
RETENTION_DAYS = { |
|
'session': 7, |
|
'chat_summary': 30, |
|
} |
|
|
|
AUDIT_LOG_DIR = Path('/root/.openclaw/workspace/logs/security') |
|
|
|
|
|
def _load_agent_ids(): |
|
try: |
|
agents_yaml = Path('/root/.openclaw/workspace/agents.yaml') |
|
with open(agents_yaml, 'r', encoding='utf-8') as f: |
|
data = yaml.safe_load(f) or {} |
|
return list(data.get('agents', {}).keys()) + ['general'] |
|
except Exception: |
|
return ['main', 'general'] |
|
|
|
|
|
def get_stats(client: QdrantClient): |
|
"""Print memory counts by agent_id, visibility, and memory_type.""" |
|
total = client.count(collection_name=COLLECTION) |
|
logger.info(f"Collection '{COLLECTION}' total: {total.count}") |
|
|
|
agent_ids = _load_agent_ids() |
|
|
|
for field, values in [ |
|
('agent_id', agent_ids), |
|
('visibility', ['public', 'project', 'private']), |
|
('memory_type', ['session', 'chat_summary', 'preference', 'knowledge']), |
|
]: |
|
logger.info(f"\n--- {field} ---") |
|
for val in values: |
|
try: |
|
result = client.count( |
|
collection_name=COLLECTION, |
|
count_filter=Filter(must=[ |
|
FieldCondition(key=field, match=MatchValue(value=val)) |
|
]) |
|
) |
|
if result.count > 0: |
|
logger.info(f" {field}={val}: {result.count}") |
|
except Exception: |
|
pass |
|
|
|
return total.count |
|
|
|
|
|
def _find_expired_points(client: QdrantClient, memory_type: str, max_age_days: int): |
|
"""Scroll through points of a given memory_type and return IDs older than max_age_days.""" |
|
cutoff = datetime.now(timezone.utc) - timedelta(days=max_age_days) |
|
cutoff_iso = cutoff.isoformat() |
|
|
|
expired_ids = [] |
|
offset = None |
|
while True: |
|
results, next_offset = client.scroll( |
|
collection_name=COLLECTION, |
|
scroll_filter=Filter(must=[ |
|
FieldCondition(key="memory_type", match=MatchValue(value=memory_type)) |
|
]), |
|
limit=500, |
|
offset=offset, |
|
with_payload=True, |
|
with_vectors=False, |
|
) |
|
if not results: |
|
break |
|
for point in results: |
|
payload = point.payload or {} |
|
ts = payload.get('timestamp') or payload.get('created_at', '') |
|
if not ts: |
|
continue |
|
try: |
|
if ts < cutoff_iso: |
|
expired_ids.append(point.id) |
|
except (TypeError, ValueError): |
|
continue |
|
if next_offset is None: |
|
break |
|
offset = next_offset |
|
|
|
return expired_ids |
|
|
|
|
|
def cleanup_expired(client: QdrantClient, max_age_days: int, execute: bool): |
|
"""Identify and optionally delete expired session/chat_summary memories.""" |
|
total_deleted = 0 |
|
results_summary = [] |
|
|
|
for memory_type, default_days in RETENTION_DAYS.items(): |
|
effective_days = min(max_age_days, default_days * 4) if max_age_days else default_days |
|
effective_days = max_age_days |
|
|
|
logger.info(f"\nScanning memory_type={memory_type} (cutoff: {effective_days} days)...") |
|
expired_ids = _find_expired_points(client, memory_type, effective_days) |
|
|
|
if not expired_ids: |
|
logger.info(f" No expired {memory_type} memories found.") |
|
results_summary.append((memory_type, 0)) |
|
continue |
|
|
|
logger.info(f" Found {len(expired_ids)} expired {memory_type} memories") |
|
|
|
if execute: |
|
batch_size = 100 |
|
for i in range(0, len(expired_ids), batch_size): |
|
batch = expired_ids[i:i + batch_size] |
|
client.delete( |
|
collection_name=COLLECTION, |
|
points_selector=batch, |
|
) |
|
logger.info(f" DELETED {len(expired_ids)} {memory_type} memories") |
|
total_deleted += len(expired_ids) |
|
else: |
|
logger.info(f" [dry-run] Would delete {len(expired_ids)} {memory_type} memories") |
|
|
|
results_summary.append((memory_type, len(expired_ids))) |
|
|
|
return total_deleted, results_summary |
|
|
|
|
|
def _write_audit_log(total_before, total_deleted, results_summary, max_age_days, execute): |
|
AUDIT_LOG_DIR.mkdir(parents=True, exist_ok=True) |
|
log_file = AUDIT_LOG_DIR / f"memory-cleanup-{datetime.now().strftime('%Y-%m-%d')}.log" |
|
with open(log_file, 'a', encoding='utf-8') as f: |
|
f.write(f"\n{'='*60}\n") |
|
f.write(f"Memory Cleanup - {datetime.now().isoformat()}\n") |
|
f.write(f"Mode: {'EXECUTE' if execute else 'DRY-RUN'}\n") |
|
f.write(f"Max age: {max_age_days} days\n") |
|
f.write(f"Total before: {total_before}\n") |
|
for mtype, count in results_summary: |
|
f.write(f" {mtype}: {count} expired\n") |
|
f.write(f"Total deleted: {total_deleted}\n") |
|
f.write(f"Total after: {total_before - total_deleted}\n") |
|
logger.info(f"Audit log: {log_file}") |
|
|
|
|
|
def main(): |
|
parser = argparse.ArgumentParser(description='Mem0 memory cleanup and audit') |
|
parser.add_argument('--dry-run', action='store_true', |
|
help='Show stats and expired counts without deleting (default behavior)') |
|
parser.add_argument('--execute', action='store_true', |
|
help='Actually delete expired memories (requires this flag)') |
|
parser.add_argument('--max-age-days', type=int, default=90, |
|
help='Delete session/chat_summary older than N days (default: 90)') |
|
args = parser.parse_args() |
|
|
|
if args.execute and args.dry_run: |
|
logger.error("Cannot use --execute and --dry-run together") |
|
sys.exit(1) |
|
|
|
execute = args.execute |
|
if not execute: |
|
args.dry_run = True |
|
|
|
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT) |
|
|
|
logger.info("=" * 60) |
|
logger.info(f"Memory Cleanup - {datetime.now().strftime('%Y-%m-%d %H:%M')}") |
|
logger.info(f"Mode: {'EXECUTE' if execute else 'DRY-RUN (use --execute to delete)'}") |
|
logger.info(f"Max age: {args.max_age_days} days") |
|
logger.info(f"Retention: session={RETENTION_DAYS['session']}d, " |
|
f"chat_summary={RETENTION_DAYS['chat_summary']}d, " |
|
f"preference=permanent, knowledge=permanent") |
|
logger.info("=" * 60) |
|
|
|
total_before = get_stats(client) |
|
|
|
total_deleted, results_summary = cleanup_expired( |
|
client, args.max_age_days, execute |
|
) |
|
|
|
if execute and total_deleted > 0: |
|
logger.info(f"\nPost-cleanup stats:") |
|
get_stats(client) |
|
|
|
_write_audit_log(total_before, total_deleted, results_summary, args.max_age_days, execute) |
|
|
|
logger.info(f"\nSummary: {total_deleted} memories " |
|
f"{'deleted' if execute else 'would be deleted (dry-run)'}.") |
|
|
|
|
|
if __name__ == '__main__': |
|
main()
|
|
|