fix: Switch E2EE from shared key to per-participant key mode

Element Call uses per-participant keys via MatrixKeyProvider.onSetEncryptionKey(),
not shared key mode. This was causing silence with E2EE enabled.

- Set bot's own key and caller's key separately via e2ee_manager.key_provider.set_key()
- Live-update caller key when received after connect
- Fallback to set_shared_key if per-participant API unavailable

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-21 18:50:19 +02:00
parent 9aef846619
commit 533847c952
2 changed files with 89 additions and 30 deletions

16
bot.py
View File

@@ -448,12 +448,13 @@ class Bot:
model = self.room_models.get(room_id, DEFAULT_MODEL) model = self.room_models.get(room_id, DEFAULT_MODEL)
caller_device_id = content.get("device_id", "") caller_device_id = content.get("device_id", "")
# Publish a placeholder key first to trigger Element Call # Generate bot's E2EE key and publish it so Element Call
# to share its key with us. We'll republish the real shared # can decrypt our audio. This also triggers Element Call
# key once we receive the caller's key. # to share its key with us.
import secrets import secrets
placeholder_key = secrets.token_bytes(16) bot_key = secrets.token_bytes(16)
await self._publish_encryption_key(room_id, placeholder_key) # Publish bot's key early so Element Call can decrypt our audio
await self._publish_encryption_key(room_id, bot_key)
vs = VoiceSession( vs = VoiceSession(
nio_client=self.client, nio_client=self.client,
@@ -461,8 +462,9 @@ class Bot:
device_id=BOT_DEVICE_ID, device_id=BOT_DEVICE_ID,
lk_url=LK_URL, lk_url=LK_URL,
model=model, model=model,
publish_key_cb=lambda key: asyncio.ensure_future( publish_key_cb=lambda key, rid=room_id: asyncio.ensure_future(
self._publish_encryption_key(room_id, key)), self._publish_encryption_key(rid, key)),
bot_key=bot_key,
) )
# Check timeline for caller's key # Check timeline for caller's key

103
voice.py
View File

@@ -87,7 +87,7 @@ def _build_e2ee_options(shared_key: bytes) -> rtc.E2EEOptions:
class VoiceSession: class VoiceSession:
def __init__(self, nio_client, room_id, device_id, lk_url, model="claude-sonnet", def __init__(self, nio_client, room_id, device_id, lk_url, model="claude-sonnet",
publish_key_cb=None): publish_key_cb=None, bot_key: bytes | None = None):
self.nio_client = nio_client self.nio_client = nio_client
self.room_id = room_id self.room_id = room_id
self.device_id = device_id self.device_id = device_id
@@ -97,20 +97,39 @@ class VoiceSession:
self.session = None self.session = None
self._task = None self._task = None
self._http_session = None self._http_session = None
self._e2ee_key: bytes | None = None self._caller_key: bytes | None = None
self._caller_identity: str | None = None # "sender:device_id" format
self._bot_key: bytes = bot_key or os.urandom(16)
self._publish_key_cb = publish_key_cb self._publish_key_cb = publish_key_cb
def on_encryption_key(self, sender, device_id, key, index): def on_encryption_key(self, sender, device_id, key, index):
"""Receive E2EE key from Element Call participant.""" """Receive E2EE key from Element Call participant.
if key and not self._e2ee_key:
self._e2ee_key = key If the room is already connected, immediately set the key on the
logger.info("E2EE key received from %s:%s (index=%d, %d bytes)", key provider so we can decrypt the caller's audio.
sender, device_id, index, len(key)) """
if not key:
return
identity = _make_lk_identity(sender, device_id)
self._caller_key = key
self._caller_identity = identity
logger.info("E2EE key received from %s:%s (identity=%s, index=%d, %d bytes)",
sender, device_id, identity, index, len(key))
# If already connected, set key on the key provider immediately
if self.lk_room:
try:
kp = self.lk_room.e2ee_manager.key_provider
kp.set_key(identity, key, key_index=index)
logger.info("Live-updated caller E2EE key for %s", identity)
except Exception:
logger.warning("Could not live-update caller E2EE key", exc_info=True)
async def _fetch_encryption_key_http(self) -> bytes | None: async def _fetch_encryption_key_http(self) -> bytes | None:
"""Fetch encryption key from room timeline (NOT state) via Matrix HTTP API. """Fetch encryption key from room timeline (NOT state) via Matrix HTTP API.
Element Call distributes encryption keys as timeline events, not state. Element Call distributes encryption keys as timeline events, not state.
Also sets self._caller_identity from the event sender + device_id.
""" """
import httpx import httpx
homeserver = str(self.nio_client.homeserver) homeserver = str(self.nio_client.homeserver)
@@ -134,14 +153,16 @@ class VoiceSession:
if sender == user_id: if sender == user_id:
continue # skip our own key continue # skip our own key
content = evt.get("content", {}) content = evt.get("content", {})
logger.info("Found encryption_keys timeline event: sender=%s content=%s", caller_device = content.get("device_id", "")
sender, content) logger.info("Found encryption_keys timeline event: sender=%s device=%s",
sender, caller_device)
if caller_device:
self._caller_identity = _make_lk_identity(sender, caller_device)
for k in content.get("keys", []): for k in content.get("keys", []):
key_b64 = k.get("key", "") key_b64 = k.get("key", "")
if key_b64: if key_b64:
key_b64 += "=" * (-len(key_b64) % 4) key_b64 += "=" * (-len(key_b64) % 4)
import base64 as b64 return base64.urlsafe_b64decode(key_b64)
return b64.urlsafe_b64decode(key_b64)
logger.info("No encryption_keys events in last %d timeline events", len(events)) logger.info("No encryption_keys events in last %d timeline events", len(events))
except Exception as e: except Exception as e:
logger.warning("HTTP encryption key fetch failed: %s", e) logger.warning("HTTP encryption key fetch failed: %s", e)
@@ -177,26 +198,25 @@ class VoiceSession:
# Check timeline for caller's encryption key # Check timeline for caller's encryption key
caller_key = await self._fetch_encryption_key_http() caller_key = await self._fetch_encryption_key_http()
if caller_key: if caller_key:
self._e2ee_key = caller_key self._caller_key = caller_key
logger.info("Got caller E2EE key via timeline (%d bytes)", len(caller_key)) logger.info("Got caller E2EE key via timeline (%d bytes)", len(caller_key))
if not self._e2ee_key: if not self._caller_key:
# Wait up to 15s for key via sync handler (bot.py forwards # Wait up to 15s for key via sync handler (bot.py forwards
# encryption_keys timeline events to on_encryption_key) # encryption_keys timeline events to on_encryption_key)
logger.info("No key in timeline yet, waiting for sync...") logger.info("No key in timeline yet, waiting for sync...")
for _ in range(150): for _ in range(150):
if self._e2ee_key: if self._caller_key:
break break
await asyncio.sleep(0.1) await asyncio.sleep(0.1)
# E2EE disabled — Element Call key derivation mismatch not yet resolved. # Publish bot's own key so caller can decrypt our audio
# Audio pipeline confirmed working without E2EE. if self._publish_key_cb:
if self._e2ee_key: self._publish_key_cb(self._bot_key)
logger.info("Caller E2EE key available (%d bytes) — E2EE disabled pending fix",
len(self._e2ee_key)) # Build E2EE options with empty shared key — we set per-participant
if self._publish_key_cb: # keys after connect via e2ee_manager.key_provider.set_key()
self._publish_key_cb(self._e2ee_key) e2ee_opts = _build_e2ee_options(b"")
e2ee_opts = None
room_opts = rtc.RoomOptions(e2ee=e2ee_opts) room_opts = rtc.RoomOptions(e2ee=e2ee_opts)
self.lk_room = rtc.Room() self.lk_room = rtc.Room()
@@ -214,9 +234,46 @@ class VoiceSession:
logger.info("Track sub: %s %s kind=%s", p.identity, pub.sid, t.kind) logger.info("Track sub: %s %s kind=%s", p.identity, pub.sid, t.kind)
await self.lk_room.connect(self.lk_url, jwt, options=room_opts) await self.lk_room.connect(self.lk_url, jwt, options=room_opts)
logger.info("Connected (E2EE=HKDF), remote=%d", logger.info("Connected (E2EE=per-participant), remote=%d",
len(self.lk_room.remote_participants)) len(self.lk_room.remote_participants))
# Set per-participant E2EE keys via key provider
bot_identity = _make_lk_identity(user_id, self.device_id)
try:
kp = self.lk_room.e2ee_manager.key_provider
# Set bot's own key (encrypts outgoing audio)
kp.set_key(bot_identity, self._bot_key, key_index=0)
logger.info("Set bot E2EE key for identity=%s (%d bytes)",
bot_identity, len(self._bot_key))
# Set caller's key (decrypts incoming audio)
if self._caller_key and self._caller_identity:
kp.set_key(self._caller_identity, self._caller_key, key_index=0)
logger.info("Set caller E2EE key for identity=%s (%d bytes)",
self._caller_identity, len(self._caller_key))
elif self._caller_key:
# Caller identity not yet known — try to get from remote participants
for p in self.lk_room.remote_participants.values():
kp.set_key(p.identity, self._caller_key, key_index=0)
logger.info("Set caller E2EE key for identity=%s (%d bytes)",
p.identity, len(self._caller_key))
break
else:
logger.warning("No caller E2EE key available — caller audio will be silent")
except AttributeError:
logger.warning("e2ee_manager.key_provider not available — "
"falling back to shared key mode")
# Fallback: set shared key after connect if per-participant isn't supported
if self._caller_key:
try:
kp = self.lk_room.e2ee_manager.key_provider
kp.set_shared_key(self._caller_key, key_index=0)
logger.info("Fallback: set shared E2EE key (%d bytes)",
len(self._caller_key))
except Exception:
logger.exception("Fallback shared key also failed")
# Find the remote participant, wait up to 10s if not yet connected # Find the remote participant, wait up to 10s if not yet connected
remote_identity = None remote_identity = None
for p in self.lk_room.remote_participants.values(): for p in self.lk_room.remote_participants.values():