feat(CF-2502): proper E2E encryption with cross-signing and device lifecycle

Replace insecure auto-trust-all-devices with cross-signed-only trust policy.
Extract cross-signing manager into reusable module with vault backup/recovery.
Add device cleanup script and automatic old device pruning on startup.

- device_trust.py: CrossSignedOnlyPolicy (only trust cross-signed devices)
- cross_signing.py: Extracted from bot.py, adds vault seed backup + recovery
- scripts/matrix_device_cleanup.py: Synapse Admin API bulk device cleanup CLI
- bot.py: Use new modules, add _cleanup_own_devices() on startup

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-03-23 19:05:48 +02:00
parent bfc717372c
commit 7fd3aae176
4 changed files with 609 additions and 114 deletions

227
scripts/matrix_device_cleanup.py Executable file
View File

@@ -0,0 +1,227 @@
#!/usr/bin/env python3
"""Clean up stale Matrix devices via Synapse Admin API.
Usage:
python matrix_device_cleanup.py --user @admin:agiliton.eu --keep 1 --dry-run
python matrix_device_cleanup.py --user @admin:agiliton.eu --keep 1
python matrix_device_cleanup.py --auto --max-age-days 30 --keep 3
"""
import argparse
import asyncio
import json
import logging
import os
import subprocess
import sys
import time
import httpx
logger = logging.getLogger(__name__)
BATCH_SIZE = 100
BATCH_DELAY = 1.0 # seconds between batch deletions
async def get_admin_token(homeserver: str) -> str:
"""Get Synapse admin token from env or vault."""
token = os.environ.get("SYNAPSE_ADMIN_TOKEN")
if token:
return token
try:
result = subprocess.run(
["vault", "get", "matrix.agiliton.admin_token"],
capture_output=True, text=True, timeout=10,
)
if result.returncode == 0 and result.stdout.strip():
return result.stdout.strip()
except (FileNotFoundError, subprocess.TimeoutExpired):
pass
raise RuntimeError(
"No admin token found. Set SYNAPSE_ADMIN_TOKEN or store in vault "
"as matrix.agiliton.admin_token"
)
async def list_devices(
client: httpx.AsyncClient, homeserver: str, headers: dict, user_id: str,
) -> list[dict]:
"""List all devices for a user via Synapse Admin API."""
resp = await client.get(
f"{homeserver}/_synapse/admin/v2/users/{user_id}/devices",
headers=headers,
)
resp.raise_for_status()
return resp.json().get("devices", [])
async def delete_devices_batch(
client: httpx.AsyncClient,
homeserver: str,
headers: dict,
user_id: str,
device_ids: list[str],
) -> int:
"""Bulk-delete devices. Returns count deleted."""
resp = await client.post(
f"{homeserver}/_synapse/admin/v2/users/{user_id}/delete_devices",
headers=headers,
json={"devices": device_ids},
)
resp.raise_for_status()
return len(device_ids)
async def cleanup_devices(
homeserver: str,
user_id: str,
keep: int = 1,
max_age_days: int | None = None,
dry_run: bool = False,
skip_device_ids: list[str] | None = None,
) -> dict:
"""Remove stale devices, keeping the N most recently active.
Returns summary dict with counts.
"""
token = await get_admin_token(homeserver)
headers = {"Authorization": f"Bearer {token}"}
skip = set(skip_device_ids or [])
async with httpx.AsyncClient(timeout=30.0) as client:
devices = await list_devices(client, homeserver, headers, user_id)
if not devices:
logger.info("No devices found for %s", user_id)
return {"total": 0, "kept": 0, "deleted": 0}
# Sort by last_seen_ts descending (most recent first), treat None as 0
devices.sort(key=lambda d: d.get("last_seen_ts") or 0, reverse=True)
# Determine which to keep
to_keep = []
to_delete = []
for i, dev in enumerate(devices):
dev_id = dev["device_id"]
last_seen = dev.get("last_seen_ts") or 0
# Always skip explicitly protected devices
if dev_id in skip:
to_keep.append(dev)
continue
# Keep the top N most recent
if i < keep:
to_keep.append(dev)
continue
# If max_age_days set, only delete devices older than threshold
if max_age_days is not None and last_seen > 0:
age_days = (time.time() * 1000 - last_seen) / (86400 * 1000)
if age_days < max_age_days:
to_keep.append(dev)
continue
to_delete.append(dev)
logger.info(
"User %s: %d total devices, keeping %d, deleting %d%s",
user_id, len(devices), len(to_keep), len(to_delete),
" (DRY RUN)" if dry_run else "",
)
if dry_run:
for dev in to_delete[:10]:
last = dev.get("last_seen_ts") or 0
age = f"{(time.time() * 1000 - last) / (86400 * 1000):.1f}d" if last else "never"
logger.info(
" Would delete: %s (display: %s, last seen: %s ago)",
dev["device_id"],
dev.get("display_name", ""),
age,
)
if len(to_delete) > 10:
logger.info(" ... and %d more", len(to_delete) - 10)
return {
"total": len(devices),
"kept": len(to_keep),
"deleted": 0,
"would_delete": len(to_delete),
}
# Delete in batches
deleted = 0
delete_ids = [d["device_id"] for d in to_delete]
for i in range(0, len(delete_ids), BATCH_SIZE):
batch = delete_ids[i : i + BATCH_SIZE]
try:
count = await delete_devices_batch(
client, homeserver, headers, user_id, batch,
)
deleted += count
logger.info(
" Deleted batch %d-%d (%d devices)",
i, i + len(batch), count,
)
except httpx.HTTPStatusError as e:
logger.error(
" Batch %d-%d failed: %d %s",
i, i + len(batch), e.response.status_code, e.response.text,
)
if i + BATCH_SIZE < len(delete_ids):
await asyncio.sleep(BATCH_DELAY)
logger.info("Cleanup complete: deleted %d of %d devices", deleted, len(devices))
return {"total": len(devices), "kept": len(to_keep), "deleted": deleted}
def main():
parser = argparse.ArgumentParser(description="Clean up stale Matrix devices")
parser.add_argument("--user", required=True, help="Matrix user ID (e.g. @admin:agiliton.eu)")
parser.add_argument(
"--homeserver",
default=os.environ.get("MATRIX_HOMESERVER", "https://matrix.agiliton.eu"),
help="Homeserver URL",
)
parser.add_argument("--keep", type=int, default=1, help="Number of most recent devices to keep")
parser.add_argument("--max-age-days", type=int, default=None, help="Only delete devices older than N days")
parser.add_argument("--dry-run", action="store_true", help="Show what would be deleted without deleting")
parser.add_argument("--skip", nargs="*", default=[], help="Device IDs to never delete")
parser.add_argument("--auto", action="store_true", help="Auto mode: --max-age-days 30 --keep 3")
parser.add_argument("-v", "--verbose", action="store_true")
args = parser.parse_args()
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO,
format="%(levelname)s %(message)s",
)
if args.auto:
if args.max_age_days is None:
args.max_age_days = 30
if args.keep == 1:
args.keep = 3
result = asyncio.run(
cleanup_devices(
homeserver=args.homeserver,
user_id=args.user,
keep=args.keep,
max_age_days=args.max_age_days,
dry_run=args.dry_run,
skip_device_ids=args.skip,
)
)
print(json.dumps(result, indent=2))
sys.exit(0 if result.get("deleted", 0) >= 0 else 1)
if __name__ == "__main__":
main()