fix(e2ee): extend key rotation wait to 10s, debug late key events

EC rotates encryption key when bot joins LiveKit room. The rotated
key arrives via Matrix sync 3-5s later. Previous 2s wait was too
short - DEC_FAILED before new key arrived.

Extended wait to 10s. Added logging to bot.py to trace why late
key events were not being processed.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-22 21:54:27 +02:00
parent 230c083b7b
commit 4b4a150fbf
2 changed files with 39 additions and 12 deletions

16
bot.py
View File

@@ -376,17 +376,25 @@ class Bot:
if event.sender == BOT_USER:
return # ignore our own key events
room_id = room.room_id
logger.info("Got encryption_keys timeline event from %s in %s", event.sender, room_id)
content = event.source.get("content", {})
device_id = content.get("device_id", "")
keys_list = content.get("keys", [])
logger.info("Got encryption_keys timeline event from %s in %s (device=%s, keys=%d, content_keys=%s)",
event.sender, room_id, device_id, len(keys_list), list(content.keys()))
vs = self.voice_sessions.get(room_id)
if vs:
content = event.source.get("content", {})
device_id = content.get("device_id", "")
for k in content.get("keys", []):
for k in keys_list:
if "key" in k and "index" in k:
key_b64 = k["key"]
key_b64 += "=" * (-len(key_b64) % 4)
key_bytes = base64.urlsafe_b64decode(key_b64)
vs.on_encryption_key(event.sender, device_id, key_bytes, k["index"])
else:
logger.warning("encryption_keys event missing key/index: %s", k)
if not keys_list:
logger.warning("encryption_keys event has empty keys list, full content: %s", content)
else:
logger.warning("No voice session for room %s to deliver encryption key", room_id)
return
if event.type != CALL_MEMBER_TYPE:

View File

@@ -520,19 +520,38 @@ class VoiceSession:
# and call on_encryption_key(), which updates self._caller_all_keys.
# NOTE: HTTP fetch is useless here — keys are Matrix-E2EE encrypted (m.room.encrypted).
pre_max_idx = max(self._caller_all_keys.keys()) if self._caller_all_keys else -1
logger.info("Waiting for EC key rotation via nio sync (current max_idx=%d)...", pre_max_idx)
for _attempt in range(4): # up to 2s (4 × 0.5s)
pre_key_count = len(self._caller_all_keys)
logger.info("Waiting for EC key rotation via nio sync (current max_idx=%d, keys=%d)...",
pre_max_idx, pre_key_count)
for _attempt in range(20): # up to 10s (20 × 0.5s)
await asyncio.sleep(0.5)
new_max = max(self._caller_all_keys.keys()) if self._caller_all_keys else -1
if new_max > pre_max_idx:
new_count = len(self._caller_all_keys)
# Detect rotation: new index OR new key bytes at existing index
if new_max > pre_max_idx or new_count > pre_key_count:
self._caller_key = self._caller_all_keys[new_max]
logger.info("Key rotated: index %d%d (%d bytes)",
pre_max_idx, new_max, len(self._caller_key))
logger.info("Key rotated: index %d%d, count %d%d (%d bytes, raw=%s)",
pre_max_idx, new_max, pre_key_count, new_count,
len(self._caller_key), self._caller_key.hex())
# Re-set all keys on the frame cryptor
if self._caller_identity:
for p in self.lk_room.remote_participants.values():
if p.identity == self._caller_identity:
try:
kp_rot = self.lk_room.e2ee_manager.key_provider
for idx, k in sorted(self._caller_all_keys.items()):
kp_rot.set_key(p.identity, k, idx)
logger.info("Rotation set_key[%d] for %s (%d bytes, raw=%s)",
idx, p.identity, len(k), k.hex())
except Exception as e:
logger.warning("Rotation set_key failed: %s", e)
break
break
if _attempt % 2 == 1: # log every 1s
logger.info("Key rotation wait %0.1fs: max_idx still %d", (_attempt + 1) * 0.5, new_max)
if _attempt % 4 == 3: # log every 2s
logger.info("Key rotation wait %0.1fs: max_idx still %d, keys=%d",
(_attempt + 1) * 0.5, new_max, new_count)
else:
logger.warning("No key rotation after 2s — using pre-join key[%d]", pre_max_idx)
logger.warning("No key rotation after 10s — using pre-join key[%d]", pre_max_idx)
# Find the remote participant, wait up to 10s if not yet connected
remote_identity = None