fix: Switch E2EE from shared key to per-participant key mode

Element Call uses per-participant keys via MatrixKeyProvider.onSetEncryptionKey(),
not shared key mode. This was causing silence with E2EE enabled.

- Set bot's own key and caller's key separately via e2ee_manager.key_provider.set_key()
- Live-update caller key when received after connect
- Fallback to set_shared_key if per-participant API unavailable

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-21 18:50:19 +02:00
parent 9aef846619
commit 533847c952
2 changed files with 89 additions and 30 deletions

16
bot.py
View File

@@ -448,12 +448,13 @@ class Bot:
model = self.room_models.get(room_id, DEFAULT_MODEL)
caller_device_id = content.get("device_id", "")
# Publish a placeholder key first to trigger Element Call
# to share its key with us. We'll republish the real shared
# key once we receive the caller's key.
# Generate bot's E2EE key and publish it so Element Call
# can decrypt our audio. This also triggers Element Call
# to share its key with us.
import secrets
placeholder_key = secrets.token_bytes(16)
await self._publish_encryption_key(room_id, placeholder_key)
bot_key = secrets.token_bytes(16)
# Publish bot's key early so Element Call can decrypt our audio
await self._publish_encryption_key(room_id, bot_key)
vs = VoiceSession(
nio_client=self.client,
@@ -461,8 +462,9 @@ class Bot:
device_id=BOT_DEVICE_ID,
lk_url=LK_URL,
model=model,
publish_key_cb=lambda key: asyncio.ensure_future(
self._publish_encryption_key(room_id, key)),
publish_key_cb=lambda key, rid=room_id: asyncio.ensure_future(
self._publish_encryption_key(rid, key)),
bot_key=bot_key,
)
# Check timeline for caller's key

103
voice.py
View File

@@ -87,7 +87,7 @@ def _build_e2ee_options(shared_key: bytes) -> rtc.E2EEOptions:
class VoiceSession:
def __init__(self, nio_client, room_id, device_id, lk_url, model="claude-sonnet",
publish_key_cb=None):
publish_key_cb=None, bot_key: bytes | None = None):
self.nio_client = nio_client
self.room_id = room_id
self.device_id = device_id
@@ -97,20 +97,39 @@ class VoiceSession:
self.session = None
self._task = None
self._http_session = None
self._e2ee_key: bytes | None = None
self._caller_key: bytes | None = None
self._caller_identity: str | None = None # "sender:device_id" format
self._bot_key: bytes = bot_key or os.urandom(16)
self._publish_key_cb = publish_key_cb
def on_encryption_key(self, sender, device_id, key, index):
"""Receive E2EE key from Element Call participant."""
if key and not self._e2ee_key:
self._e2ee_key = key
logger.info("E2EE key received from %s:%s (index=%d, %d bytes)",
sender, device_id, index, len(key))
"""Receive E2EE key from Element Call participant.
If the room is already connected, immediately set the key on the
key provider so we can decrypt the caller's audio.
"""
if not key:
return
identity = _make_lk_identity(sender, device_id)
self._caller_key = key
self._caller_identity = identity
logger.info("E2EE key received from %s:%s (identity=%s, index=%d, %d bytes)",
sender, device_id, identity, index, len(key))
# If already connected, set key on the key provider immediately
if self.lk_room:
try:
kp = self.lk_room.e2ee_manager.key_provider
kp.set_key(identity, key, key_index=index)
logger.info("Live-updated caller E2EE key for %s", identity)
except Exception:
logger.warning("Could not live-update caller E2EE key", exc_info=True)
async def _fetch_encryption_key_http(self) -> bytes | None:
"""Fetch encryption key from room timeline (NOT state) via Matrix HTTP API.
Element Call distributes encryption keys as timeline events, not state.
Also sets self._caller_identity from the event sender + device_id.
"""
import httpx
homeserver = str(self.nio_client.homeserver)
@@ -134,14 +153,16 @@ class VoiceSession:
if sender == user_id:
continue # skip our own key
content = evt.get("content", {})
logger.info("Found encryption_keys timeline event: sender=%s content=%s",
sender, content)
caller_device = content.get("device_id", "")
logger.info("Found encryption_keys timeline event: sender=%s device=%s",
sender, caller_device)
if caller_device:
self._caller_identity = _make_lk_identity(sender, caller_device)
for k in content.get("keys", []):
key_b64 = k.get("key", "")
if key_b64:
key_b64 += "=" * (-len(key_b64) % 4)
import base64 as b64
return b64.urlsafe_b64decode(key_b64)
return base64.urlsafe_b64decode(key_b64)
logger.info("No encryption_keys events in last %d timeline events", len(events))
except Exception as e:
logger.warning("HTTP encryption key fetch failed: %s", e)
@@ -177,26 +198,25 @@ class VoiceSession:
# Check timeline for caller's encryption key
caller_key = await self._fetch_encryption_key_http()
if caller_key:
self._e2ee_key = caller_key
self._caller_key = caller_key
logger.info("Got caller E2EE key via timeline (%d bytes)", len(caller_key))
if not self._e2ee_key:
if not self._caller_key:
# Wait up to 15s for key via sync handler (bot.py forwards
# encryption_keys timeline events to on_encryption_key)
logger.info("No key in timeline yet, waiting for sync...")
for _ in range(150):
if self._e2ee_key:
if self._caller_key:
break
await asyncio.sleep(0.1)
# E2EE disabled — Element Call key derivation mismatch not yet resolved.
# Audio pipeline confirmed working without E2EE.
if self._e2ee_key:
logger.info("Caller E2EE key available (%d bytes) — E2EE disabled pending fix",
len(self._e2ee_key))
if self._publish_key_cb:
self._publish_key_cb(self._e2ee_key)
e2ee_opts = None
# Publish bot's own key so caller can decrypt our audio
if self._publish_key_cb:
self._publish_key_cb(self._bot_key)
# Build E2EE options with empty shared key — we set per-participant
# keys after connect via e2ee_manager.key_provider.set_key()
e2ee_opts = _build_e2ee_options(b"")
room_opts = rtc.RoomOptions(e2ee=e2ee_opts)
self.lk_room = rtc.Room()
@@ -214,9 +234,46 @@ class VoiceSession:
logger.info("Track sub: %s %s kind=%s", p.identity, pub.sid, t.kind)
await self.lk_room.connect(self.lk_url, jwt, options=room_opts)
logger.info("Connected (E2EE=HKDF), remote=%d",
logger.info("Connected (E2EE=per-participant), remote=%d",
len(self.lk_room.remote_participants))
# Set per-participant E2EE keys via key provider
bot_identity = _make_lk_identity(user_id, self.device_id)
try:
kp = self.lk_room.e2ee_manager.key_provider
# Set bot's own key (encrypts outgoing audio)
kp.set_key(bot_identity, self._bot_key, key_index=0)
logger.info("Set bot E2EE key for identity=%s (%d bytes)",
bot_identity, len(self._bot_key))
# Set caller's key (decrypts incoming audio)
if self._caller_key and self._caller_identity:
kp.set_key(self._caller_identity, self._caller_key, key_index=0)
logger.info("Set caller E2EE key for identity=%s (%d bytes)",
self._caller_identity, len(self._caller_key))
elif self._caller_key:
# Caller identity not yet known — try to get from remote participants
for p in self.lk_room.remote_participants.values():
kp.set_key(p.identity, self._caller_key, key_index=0)
logger.info("Set caller E2EE key for identity=%s (%d bytes)",
p.identity, len(self._caller_key))
break
else:
logger.warning("No caller E2EE key available — caller audio will be silent")
except AttributeError:
logger.warning("e2ee_manager.key_provider not available — "
"falling back to shared key mode")
# Fallback: set shared key after connect if per-participant isn't supported
if self._caller_key:
try:
kp = self.lk_room.e2ee_manager.key_provider
kp.set_shared_key(self._caller_key, key_index=0)
logger.info("Fallback: set shared E2EE key (%d bytes)",
len(self._caller_key))
except Exception:
logger.exception("Fallback shared key also failed")
# Find the remote participant, wait up to 10s if not yet connected
remote_identity = None
for p in self.lk_room.remote_participants.values():