fix: E2EE key re-fetch now triggers on DEC_FAILED before cooldown

The re-fetch check was placed after the 5s cooldown return, so it never
executed. Now it triggers after 3+ DEC_FAILED regardless of cooldown.
Also relaxed stale key age filter from 60s to 300s to handle key
rotation during ongoing calls.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-03-10 13:51:10 +02:00
parent 488e50e73c
commit 1a0a2ec305

View File

@@ -575,7 +575,7 @@ class VoiceSession:
age_s = (now_ms - sent_ts) / 1000 if sent_ts else 999
logger.info("Found encryption_keys timeline event: sender=%s device=%s age=%.0fs",
sender, device, age_s)
if age_s > 60:
if age_s > 300: # 5 min — covers key rotation during long calls
logger.info("Skipping stale encryption_keys event (%.0fs old)", age_s)
continue
all_keys = {}
@@ -752,9 +752,36 @@ class VoiceSession:
# When remote participant needs key: NEW, MISSING_KEY, or DEC_FAILED (with cooldown)
if participant and p_id != bot_identity and int(state) in (0, 3, 4):
now = time.monotonic()
# DEC_FAILED: only re-key every 5s to avoid tight loops
if int(state) == 3:
_dec_failed_count[p_id] = _dec_failed_count.get(p_id, 0) + 1
# After 3+ DEC_FAILED: re-fetch key from timeline (key may have rotated)
if _dec_failed_count[p_id] >= 3 and not _refetch_in_progress:
_refetch_in_progress = True
_p_id_copy = p_id # capture for closure
async def _refetch_key():
nonlocal _refetch_in_progress
try:
logger.info("DEC_FAILED x%d — re-fetching key from timeline",
_dec_failed_count.get(_p_id_copy, 0))
new_key = await self._fetch_encryption_key_http()
if new_key and new_key != self._caller_key:
logger.info("Got NEW key from timeline re-fetch (%s)",
new_key.hex()[:8])
self._caller_key = new_key
kp_r = self.lk_room.e2ee_manager.key_provider
for idx, base_k in sorted(self._caller_all_keys.items()):
_derive_and_set_key(kp_r, _p_id_copy, base_k, idx)
_dec_failed_count[_p_id_copy] = 0
elif new_key:
logger.info("Re-fetch returned same key — no rotation")
else:
logger.info("Re-fetch returned no fresh key")
except Exception as exc:
logger.warning("Key re-fetch failed: %s", exc)
finally:
_refetch_in_progress = False
asyncio.ensure_future(_refetch_key())
# Cooldown: only re-key every 5s to avoid tight loops
last = _last_rekey_time.get(p_id, 0)
if (now - last) < 5.0:
return
@@ -768,32 +795,6 @@ class VoiceSession:
idx, p_id, state_name)
except Exception as exc:
logger.warning("e2ee_state set_key failed: %s", exc)
# After 3+ DEC_FAILED: re-fetch key from timeline (might have rotated)
if _dec_failed_count.get(p_id, 0) >= 3 and not _refetch_in_progress:
_refetch_in_progress = True
async def _refetch_key():
nonlocal _refetch_in_progress
try:
logger.info("DEC_FAILED x%d — re-fetching key from timeline",
_dec_failed_count.get(p_id, 0))
new_key = await self._fetch_encryption_key_http()
if new_key and new_key != self._caller_key:
logger.info("Got NEW key from timeline re-fetch (%s)",
new_key.hex()[:8])
self._caller_key = new_key
kp_r = self.lk_room.e2ee_manager.key_provider
for idx, base_k in sorted(self._caller_all_keys.items()):
_derive_and_set_key(kp_r, p_id, base_k, idx)
_dec_failed_count[p_id] = 0
elif new_key:
logger.info("Re-fetch returned same key — no rotation")
else:
logger.info("Re-fetch returned no fresh key")
except Exception as exc:
logger.warning("Key re-fetch failed: %s", exc)
finally:
_refetch_in_progress = False
asyncio.ensure_future(_refetch_key())
await self.lk_room.connect(self.lk_url, jwt, options=room_opts)
logger.info("Connected (E2EE=HKDF), remote=%d",