From 4b4a150fbff3802cebec6d1401d795b44433e83c Mon Sep 17 00:00:00 2001 From: Christian Gick Date: Sun, 22 Feb 2026 21:54:27 +0200 Subject: [PATCH] fix(e2ee): extend key rotation wait to 10s, debug late key events EC rotates encryption key when bot joins LiveKit room. The rotated key arrives via Matrix sync 3-5s later. Previous 2s wait was too short - DEC_FAILED before new key arrived. Extended wait to 10s. Added logging to bot.py to trace why late key events were not being processed. Co-Authored-By: Claude Opus 4.6 --- bot.py | 16 ++++++++++++---- voice.py | 35 +++++++++++++++++++++++++++-------- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/bot.py b/bot.py index 29a4a0e..8f5dd1a 100644 --- a/bot.py +++ b/bot.py @@ -376,17 +376,25 @@ class Bot: if event.sender == BOT_USER: return # ignore our own key events room_id = room.room_id - logger.info("Got encryption_keys timeline event from %s in %s", event.sender, room_id) + content = event.source.get("content", {}) + device_id = content.get("device_id", "") + keys_list = content.get("keys", []) + logger.info("Got encryption_keys timeline event from %s in %s (device=%s, keys=%d, content_keys=%s)", + event.sender, room_id, device_id, len(keys_list), list(content.keys())) vs = self.voice_sessions.get(room_id) if vs: - content = event.source.get("content", {}) - device_id = content.get("device_id", "") - for k in content.get("keys", []): + for k in keys_list: if "key" in k and "index" in k: key_b64 = k["key"] key_b64 += "=" * (-len(key_b64) % 4) key_bytes = base64.urlsafe_b64decode(key_b64) vs.on_encryption_key(event.sender, device_id, key_bytes, k["index"]) + else: + logger.warning("encryption_keys event missing key/index: %s", k) + if not keys_list: + logger.warning("encryption_keys event has empty keys list, full content: %s", content) + else: + logger.warning("No voice session for room %s to deliver encryption key", room_id) return if event.type != CALL_MEMBER_TYPE: diff --git a/voice.py b/voice.py index c89ad64..533d205 100644 --- a/voice.py +++ b/voice.py @@ -520,19 +520,38 @@ class VoiceSession: # and call on_encryption_key(), which updates self._caller_all_keys. # NOTE: HTTP fetch is useless here — keys are Matrix-E2EE encrypted (m.room.encrypted). pre_max_idx = max(self._caller_all_keys.keys()) if self._caller_all_keys else -1 - logger.info("Waiting for EC key rotation via nio sync (current max_idx=%d)...", pre_max_idx) - for _attempt in range(4): # up to 2s (4 × 0.5s) + pre_key_count = len(self._caller_all_keys) + logger.info("Waiting for EC key rotation via nio sync (current max_idx=%d, keys=%d)...", + pre_max_idx, pre_key_count) + for _attempt in range(20): # up to 10s (20 × 0.5s) await asyncio.sleep(0.5) new_max = max(self._caller_all_keys.keys()) if self._caller_all_keys else -1 - if new_max > pre_max_idx: + new_count = len(self._caller_all_keys) + # Detect rotation: new index OR new key bytes at existing index + if new_max > pre_max_idx or new_count > pre_key_count: self._caller_key = self._caller_all_keys[new_max] - logger.info("Key rotated: index %d→%d (%d bytes)", - pre_max_idx, new_max, len(self._caller_key)) + logger.info("Key rotated: index %d→%d, count %d→%d (%d bytes, raw=%s)", + pre_max_idx, new_max, pre_key_count, new_count, + len(self._caller_key), self._caller_key.hex()) + # Re-set all keys on the frame cryptor + if self._caller_identity: + for p in self.lk_room.remote_participants.values(): + if p.identity == self._caller_identity: + try: + kp_rot = self.lk_room.e2ee_manager.key_provider + for idx, k in sorted(self._caller_all_keys.items()): + kp_rot.set_key(p.identity, k, idx) + logger.info("Rotation set_key[%d] for %s (%d bytes, raw=%s)", + idx, p.identity, len(k), k.hex()) + except Exception as e: + logger.warning("Rotation set_key failed: %s", e) + break break - if _attempt % 2 == 1: # log every 1s - logger.info("Key rotation wait %0.1fs: max_idx still %d", (_attempt + 1) * 0.5, new_max) + if _attempt % 4 == 3: # log every 2s + logger.info("Key rotation wait %0.1fs: max_idx still %d, keys=%d", + (_attempt + 1) * 0.5, new_max, new_count) else: - logger.warning("No key rotation after 2s — using pre-join key[%d]", pre_max_idx) + logger.warning("No key rotation after 10s — using pre-join key[%d]", pre_max_idx) # Find the remote participant, wait up to 10s if not yet connected remote_identity = None