fix(voice): set caller key in on_track_subscribed — frame cryptor must exist for HKDF to apply

Root cause: C++ set_key() only applies HKDF when impl_->GetKey(pid) returns a valid
handler, which requires the frame cryptor for that participant to be initialized.
Frame cryptors are created at track subscription time, not at connect time.

Calling set_key(caller_identity, key) immediately after connect() skips HKDF
derivation (impl_->GetKey returns null) → raw key stored → DEC_FAILED.

Fix: move caller key setting to on_track_subscribed where frame cryptor definitely exists.
Also update on_encryption_key to use set_key() for key rotation updates.
This commit is contained in:
Christian Gick
2026-02-22 14:05:54 +02:00
parent 190b35945c
commit c379064f80

120
voice.py
View File

@@ -144,18 +144,20 @@ class VoiceSession:
self._caller_all_keys[index] = key self._caller_all_keys[index] = key
logger.info("E2EE key received from %s:%s (index=%d, %d bytes)", logger.info("E2EE key received from %s:%s (index=%d, %d bytes)",
sender, device_id, index, len(key)) sender, device_id, index, len(key))
# Live-update shared_key with pre-derived AES key on rotation. # Live-update key on rotation — use set_key() which applies HKDF via Rust FFI.
# set_shared_key() bypasses Rust FFI KDF — we pre-derive with Python HKDF. # At this point the track is usually already subscribed so frame cryptor exists.
# Per-participant set_key() is NOT used for caller (Rust HKDF may differ from JS). if self.lk_room and self._caller_identity:
if self.lk_room and hasattr(self.lk_room, 'e2ee_manager'): caller_lk_id = self._caller_identity # e.g. "@user:server.eu:DEVICEID"
try: for p in self.lk_room.remote_participants.values():
kp = self.lk_room.e2ee_manager.key_provider if p.identity == caller_lk_id:
derived = _hkdf_derive(key) try:
kp.set_shared_key(derived, index) kp = self.lk_room.e2ee_manager.key_provider
logger.info("Live-updated shared_key (pre-derived)[%d] for caller (%d bytes)", ok = kp.set_key(p.identity, index, key)
index, len(derived)) logger.info("Live-updated set_key[%d] for %s (ok=%s, %d bytes)",
except Exception as e: index, p.identity, ok, len(key))
logger.warning("Failed to live-update caller shared_key: %s", e) except Exception as e:
logger.warning("Failed to live-update caller key: %s", e)
break
async def _fetch_encryption_key_http(self) -> bytes | None: async def _fetch_encryption_key_http(self) -> bytes | None:
"""Fetch encryption key from room timeline (NOT state) via Matrix HTTP API. """Fetch encryption key from room timeline (NOT state) via Matrix HTTP API.
@@ -256,28 +258,29 @@ class VoiceSession:
break break
await asyncio.sleep(0.1) await asyncio.sleep(0.1)
# DIAGNOSTIC: disable E2EE entirely to check if EC sends encrypted or plaintext. # E2EE: re-enabled after diagnostic confirmed EC encrypts audio.
# If VAD triggers → EC sends plaintext (E2EE disabled on EC side). # Root cause found: set_key() only applies HKDF if the frame cryptor for that
# If VAD silent → EC sends encrypted (frame format or key issue). # participant already exists. Must call set_key() in on_track_subscribed, not at connect time.
e2ee_opts = None # TODO: re-enable after diagnosis key_opts = rtc.KeyProviderOptions(
shared_key=b"", # per-participant mode
ratchet_window_size=16,
ratchet_salt=b"LKFrameEncryptionKey",
failure_tolerance=-1,
key_ring_size=16,
key_derivation_function=KDF_HKDF, # Rust applies HKDF matching EC JS SDK
)
e2ee_opts = rtc.E2EEOptions(
encryption_type=rtc.EncryptionType.GCM,
key_provider_options=key_opts,
)
room_opts = rtc.RoomOptions(e2ee=e2ee_opts) room_opts = rtc.RoomOptions(e2ee=e2ee_opts)
self.lk_room = rtc.Room() self.lk_room = rtc.Room()
@self.lk_room.on("participant_connected") @self.lk_room.on("participant_connected")
def on_p(p): def on_p(p):
logger.info("Participant connected: %s", p.identity) logger.info("Participant connected: %s", p.identity)
# Apply any already-received caller keys to the new participant's LK identity. # Note: do NOT set keys here — frame cryptor not initialized yet at participant_connected.
# This handles the case where key arrives before the participant joins LiveKit. # Keys are set in on_track_subscribed where the frame cryptor definitely exists.
if self._caller_all_keys:
try:
kp_local = self.lk_room.e2ee_manager.key_provider
# Use shared_key with pre-derived AES key (not set_key which applies Rust HKDF)
for idx, base_k in sorted(self._caller_all_keys.items()):
kp_local.set_shared_key(_hkdf_derive(base_k), idx)
logger.info("on_p: set shared_key (pre-derived) for %d caller key(s)",
len(self._caller_all_keys))
except Exception as exc:
logger.warning("on_p: failed to set caller shared_key: %s", exc)
@self.lk_room.on("track_published") @self.lk_room.on("track_published")
def on_tp(pub, p): def on_tp(pub, p):
@@ -286,6 +289,23 @@ class VoiceSession:
@self.lk_room.on("track_subscribed") @self.lk_room.on("track_subscribed")
def on_ts(t, pub, p): def on_ts(t, pub, p):
logger.info("Track sub: %s %s kind=%s", p.identity, pub.sid, t.kind) logger.info("Track sub: %s %s kind=%s", p.identity, pub.sid, t.kind)
# *** KEY FIX: set_key() with KDF_HKDF only applies HKDF when the frame cryptor
# for this participant already exists. The frame cryptor is created at track
# subscription time. Calling set_key() BEFORE track subscription (at connect)
# skips HKDF derivation → raw key stored → DEC_FAILED.
# Solution: set caller key HERE, after frame cryptor is initialized.
if int(t.kind) == 1 and e2ee_opts is not None: # audio track only
try:
kp_local = self.lk_room.e2ee_manager.key_provider
if self._caller_all_keys:
for idx, base_k in sorted(self._caller_all_keys.items()):
ok = kp_local.set_key(p.identity, idx, base_k)
logger.info("on_ts: set_key[%d] for %s (ok=%s, %d bytes)",
idx, p.identity, ok, len(base_k))
else:
logger.warning("on_ts: no caller keys available yet — will set on key receipt")
except Exception as exc:
logger.warning("on_ts: set_key failed: %s", exc)
_e2ee_state_names = {0:"NEW",1:"OK",2:"ENC_FAILED",3:"DEC_FAILED",4:"MISSING_KEY",5:"RATCHETED",6:"INTERNAL_ERR"} _e2ee_state_names = {0:"NEW",1:"OK",2:"ENC_FAILED",3:"DEC_FAILED",4:"MISSING_KEY",5:"RATCHETED",6:"INTERNAL_ERR"}
@self.lk_room.on("e2ee_state_changed") @self.lk_room.on("e2ee_state_changed")
@@ -298,27 +318,12 @@ class VoiceSession:
logger.info("Connected (E2EE=HKDF), remote=%d", logger.info("Connected (E2EE=HKDF), remote=%d",
len(self.lk_room.remote_participants)) len(self.lk_room.remote_participants))
# *** FIX: Set keys immediately after connect — BEFORE the rotation wait. # Set bot's own key immediately after connect — local frame cryptor exists at connect time.
# The caller's track is subscribed during the wait; if no key is set when # CALLER keys are set in on_track_subscribed (NOT here) because the caller's frame cryptor
# the frame cryptor is first created it enters DEC_FAILED and drops all frames # is only created when their track arrives. Calling set_key() before that skips HKDF.
# even after the key is set later. kp = self.lk_room.e2ee_manager.key_provider
kp = None ok = kp.set_key(bot_identity, 0, self._bot_key)
if e2ee_opts is not None: logger.info("Set bot key for %s (ok=%s, %d bytes)", bot_identity, ok, len(self._bot_key))
kp = self.lk_room.e2ee_manager.key_provider
kp.set_key(bot_identity, self._bot_key, 0)
logger.info("Set bot raw key for %s (%d bytes)", bot_identity, len(self._bot_key))
# Caller keys: use set_shared_key with pre-derived AES (bypasses Rust HKDF).
# Per-participant set_key is NOT called for caller — Rust HKDF may not match EC's JS HKDF.
if self._caller_all_keys:
for idx, base_k in sorted(self._caller_all_keys.items()):
kp.set_shared_key(_hkdf_derive(base_k), idx)
logger.info("Early-set shared_key (pre-derived) for caller indices %s",
list(self._caller_all_keys.keys()))
elif self._caller_key:
kp.set_shared_key(_hkdf_derive(self._caller_key), 0)
logger.info("Early-set shared_key (pre-derived) caller key[0] (%d bytes)", 16)
else:
logger.info("E2EE disabled (diagnostic mode) — skipping key setup")
# Element Call rotates its encryption key when bot joins the LiveKit room. # Element Call rotates its encryption key when bot joins the LiveKit room.
# EC sends the new key via Matrix (Megolm-encrypted); nio sync will decrypt it # EC sends the new key via Matrix (Megolm-encrypted); nio sync will decrypt it
@@ -357,16 +362,19 @@ class VoiceSession:
# Set shared_key with pre-derived AES key for caller decryption. # Set shared_key with pre-derived AES key for caller decryption.
# NOT using set_key() for caller — Rust HKDF may produce different result than EC's JS HKDF. # NOT using set_key() for caller — Rust HKDF may produce different result than EC's JS HKDF.
# set_shared_key() stores key raw (no KDF applied) — we pre-derive in Python. # set_shared_key() stores key raw (no KDF applied) — we pre-derive in Python.
if kp is not None and self._caller_all_keys: # After key rotation wait: if track already subscribed, set rotated key.
# (Usually on_track_subscribed handles this, but if track arrived before rotation,
# the rotated key needs to be set here for the already-subscribed participant.)
if self._caller_all_keys and remote_identity:
try: try:
for idx, base_k in sorted(self._caller_all_keys.items()): for idx, base_k in sorted(self._caller_all_keys.items()):
derived = _hkdf_derive(base_k) ok = kp.set_key(remote_identity, idx, base_k)
kp.set_shared_key(derived, idx) logger.info("Post-rotation set_key[%d] for %s (ok=%s)",
logger.info("Set shared_key (pre-derived)[%d] (%d bytes)", idx, len(derived)) idx, remote_identity, ok)
except Exception as e: except Exception as e:
logger.warning("Failed to set caller shared_key: %s", e) logger.warning("Post-rotation set_key failed: %s", e)
elif e2ee_opts is not None and not self._caller_all_keys: elif not self._caller_all_keys:
logger.warning("No caller E2EE keys — incoming audio will be silence") logger.warning("No caller E2EE keys received — incoming audio will be silence")
if remote_identity: if remote_identity:
logger.info("Linking to remote participant: %s", remote_identity) logger.info("Linking to remote participant: %s", remote_identity)