fix(e2ee): revert to per-participant mode with proper rotation handling

The shared-key mode uses HKDF with empty info, but Element Call JS uses
participant identity as HKDF info. Per-participant mode (set_key with
identity) matches EC's derivation.

Previous per-participant attempt (b65d043) failed because key rotation
(index 0→1 when bot joins) wasn't handled. Now on_encryption_key calls
set_key(caller_id, key, index) on rotation, so the bot stays in sync.

Changes:
- _build_e2ee_options(): remove caller_key param, shared_key=b"" (per-participant mode)
- _run(): set_key(remote_identity, caller_key, 0) for incoming decryption
- on_encryption_key: only set_key() on rotation (no set_shared_key)
This commit is contained in:
Christian Gick
2026-02-22 08:10:27 +02:00
parent 295c0ed5cb
commit 630a0de970

View File

@@ -69,16 +69,16 @@ def _generate_lk_jwt(room_id, user_id, device_id):
KDF_HKDF = 1
def _build_e2ee_options(caller_key: bytes = b"") -> rtc.E2EEOptions:
def _build_e2ee_options() -> rtc.E2EEOptions:
"""Build HKDF E2EE options matching Element Call's key derivation.
Pass caller_key as shared_key to initialize in true shared-key mode.
This ensures the Rust FFI decrypts incoming frames using caller's key.
Outgoing encryption is overridden via set_key(bot_identity, bot_key) after connect.
Use per-participant key mode (no shared_key) so the Rust FFI uses the
participant identity as HKDF info — matching Element Call's JS SFrame.
Keys are set post-connect via set_key(participant_identity, key, index).
Element Call uses: ratchetWindowSize=16, keyringSize=256, salt="LKFrameEncryptionKey"
"""
key_opts = rtc.KeyProviderOptions(
shared_key=caller_key,
shared_key=b"", # empty = per-participant mode; keys set via set_key() after connect
ratchet_window_size=16,
ratchet_salt=b"LKFrameEncryptionKey",
failure_tolerance=-1,
@@ -116,16 +116,15 @@ class VoiceSession:
self._caller_all_keys[index] = key
logger.info("E2EE key received from %s:%s (index=%d, %d bytes)",
sender, device_id, index, len(key))
# Live-update both shared key and per-participant key on rotation.
# Live-update per-participant key on rotation (Element Call rotates on bot join).
# Use only set_key() (per-participant mode) — matching EC's HKDF info=identity.
if self.lk_room and hasattr(self.lk_room, 'e2ee_manager'):
try:
kp = self.lk_room.e2ee_manager.key_provider
kp.set_shared_key(key, index)
logger.info("Live-updated shared key[%d] (%d bytes)", index, len(key))
# Also update per-participant key (belt+suspenders for rotations)
caller_id = self._caller_identity or f"{sender}:{device_id}"
kp.set_key(caller_id, key, index)
logger.info("Live-updated per-participant key[%d] for %s", index, caller_id)
logger.info("Live-updated per-participant key[%d] for %s (%d bytes)",
index, caller_id, len(key))
except Exception as e:
logger.warning("Failed to live-update caller key: %s", e)
@@ -228,10 +227,10 @@ class VoiceSession:
break
await asyncio.sleep(0.1)
# Connect with caller_key as shared_key so Rust FFI decrypts
# incoming audio in true shared-key mode. Outgoing encryption
# is overridden to bot_key via set_key(bot_identity) after connect.
e2ee_opts = _build_e2ee_options(self._caller_key or b"")
# Connect in per-participant mode (empty shared_key) so Rust FFI uses
# identity-based HKDF — matching Element Call's JS SFrame key derivation.
# Keys are set post-connect via set_key(identity, key, index).
e2ee_opts = _build_e2ee_options()
room_opts = rtc.RoomOptions(e2ee=e2ee_opts)
self.lk_room = rtc.Room()
@@ -280,30 +279,21 @@ class VoiceSession:
if remote_identity:
break
# Caller key was passed as shared_key at connect time — verify it's stored.
if self._caller_key:
logger.info("Caller key active as shared_key (%d bytes, index 0)", len(self._caller_key))
try:
stored = kp.export_shared_key(0)
if stored == self._caller_key:
logger.info("VERIFIED: shared key[0] matches caller key (%d bytes)", len(stored))
else:
logger.warning("MISMATCH: stored shared key[0] (%d bytes) != caller key (%d bytes)",
len(stored), len(self._caller_key))
logger.warning("stored=%s", stored.hex())
logger.warning("caller=%s", self._caller_key.hex())
except Exception as e:
logger.warning("Could not export shared key: %s", e)
else:
logger.warning("No caller E2EE key — incoming audio will be silence")
# Also set caller key as per-participant key (belt+suspenders: both modes)
# Set caller's per-participant key (HKDF info=caller_identity = matching EC JS).
if self._caller_key and remote_identity:
try:
kp.set_key(remote_identity, self._caller_key, 0)
logger.info("Also set per-participant key for %s (%d bytes)", remote_identity, len(self._caller_key))
logger.info("Set caller key for %s (index=0, %d bytes)", remote_identity, len(self._caller_key))
# Also set via caller_identity (belt+suspenders if identities differ)
if self._caller_identity and self._caller_identity != remote_identity:
kp.set_key(self._caller_identity, self._caller_key, 0)
logger.info("Also set caller key via identity %s", self._caller_identity)
except Exception as e:
logger.warning("Failed to set per-participant caller key: %s", e)
logger.warning("Failed to set caller per-participant key: %s", e)
elif not self._caller_key:
logger.warning("No caller E2EE key — incoming audio will be silence")
elif not remote_identity:
logger.warning("No remote participant found — caller key not set")
if remote_identity:
logger.info("Linking to remote participant: %s", remote_identity)