fix: Switch E2EE from shared key to per-participant key mode
Element Call uses per-participant keys via MatrixKeyProvider.onSetEncryptionKey(), not shared key mode. This was causing silence with E2EE enabled. - Set bot's own key and caller's key separately via e2ee_manager.key_provider.set_key() - Live-update caller key when received after connect - Fallback to set_shared_key if per-participant API unavailable Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
16
bot.py
16
bot.py
@@ -448,12 +448,13 @@ class Bot:
|
|||||||
model = self.room_models.get(room_id, DEFAULT_MODEL)
|
model = self.room_models.get(room_id, DEFAULT_MODEL)
|
||||||
caller_device_id = content.get("device_id", "")
|
caller_device_id = content.get("device_id", "")
|
||||||
|
|
||||||
# Publish a placeholder key first to trigger Element Call
|
# Generate bot's E2EE key and publish it so Element Call
|
||||||
# to share its key with us. We'll republish the real shared
|
# can decrypt our audio. This also triggers Element Call
|
||||||
# key once we receive the caller's key.
|
# to share its key with us.
|
||||||
import secrets
|
import secrets
|
||||||
placeholder_key = secrets.token_bytes(16)
|
bot_key = secrets.token_bytes(16)
|
||||||
await self._publish_encryption_key(room_id, placeholder_key)
|
# Publish bot's key early so Element Call can decrypt our audio
|
||||||
|
await self._publish_encryption_key(room_id, bot_key)
|
||||||
|
|
||||||
vs = VoiceSession(
|
vs = VoiceSession(
|
||||||
nio_client=self.client,
|
nio_client=self.client,
|
||||||
@@ -461,8 +462,9 @@ class Bot:
|
|||||||
device_id=BOT_DEVICE_ID,
|
device_id=BOT_DEVICE_ID,
|
||||||
lk_url=LK_URL,
|
lk_url=LK_URL,
|
||||||
model=model,
|
model=model,
|
||||||
publish_key_cb=lambda key: asyncio.ensure_future(
|
publish_key_cb=lambda key, rid=room_id: asyncio.ensure_future(
|
||||||
self._publish_encryption_key(room_id, key)),
|
self._publish_encryption_key(rid, key)),
|
||||||
|
bot_key=bot_key,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check timeline for caller's key
|
# Check timeline for caller's key
|
||||||
|
|||||||
103
voice.py
103
voice.py
@@ -87,7 +87,7 @@ def _build_e2ee_options(shared_key: bytes) -> rtc.E2EEOptions:
|
|||||||
|
|
||||||
class VoiceSession:
|
class VoiceSession:
|
||||||
def __init__(self, nio_client, room_id, device_id, lk_url, model="claude-sonnet",
|
def __init__(self, nio_client, room_id, device_id, lk_url, model="claude-sonnet",
|
||||||
publish_key_cb=None):
|
publish_key_cb=None, bot_key: bytes | None = None):
|
||||||
self.nio_client = nio_client
|
self.nio_client = nio_client
|
||||||
self.room_id = room_id
|
self.room_id = room_id
|
||||||
self.device_id = device_id
|
self.device_id = device_id
|
||||||
@@ -97,20 +97,39 @@ class VoiceSession:
|
|||||||
self.session = None
|
self.session = None
|
||||||
self._task = None
|
self._task = None
|
||||||
self._http_session = None
|
self._http_session = None
|
||||||
self._e2ee_key: bytes | None = None
|
self._caller_key: bytes | None = None
|
||||||
|
self._caller_identity: str | None = None # "sender:device_id" format
|
||||||
|
self._bot_key: bytes = bot_key or os.urandom(16)
|
||||||
self._publish_key_cb = publish_key_cb
|
self._publish_key_cb = publish_key_cb
|
||||||
|
|
||||||
def on_encryption_key(self, sender, device_id, key, index):
|
def on_encryption_key(self, sender, device_id, key, index):
|
||||||
"""Receive E2EE key from Element Call participant."""
|
"""Receive E2EE key from Element Call participant.
|
||||||
if key and not self._e2ee_key:
|
|
||||||
self._e2ee_key = key
|
If the room is already connected, immediately set the key on the
|
||||||
logger.info("E2EE key received from %s:%s (index=%d, %d bytes)",
|
key provider so we can decrypt the caller's audio.
|
||||||
sender, device_id, index, len(key))
|
"""
|
||||||
|
if not key:
|
||||||
|
return
|
||||||
|
identity = _make_lk_identity(sender, device_id)
|
||||||
|
self._caller_key = key
|
||||||
|
self._caller_identity = identity
|
||||||
|
logger.info("E2EE key received from %s:%s (identity=%s, index=%d, %d bytes)",
|
||||||
|
sender, device_id, identity, index, len(key))
|
||||||
|
|
||||||
|
# If already connected, set key on the key provider immediately
|
||||||
|
if self.lk_room:
|
||||||
|
try:
|
||||||
|
kp = self.lk_room.e2ee_manager.key_provider
|
||||||
|
kp.set_key(identity, key, key_index=index)
|
||||||
|
logger.info("Live-updated caller E2EE key for %s", identity)
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Could not live-update caller E2EE key", exc_info=True)
|
||||||
|
|
||||||
async def _fetch_encryption_key_http(self) -> bytes | None:
|
async def _fetch_encryption_key_http(self) -> bytes | None:
|
||||||
"""Fetch encryption key from room timeline (NOT state) via Matrix HTTP API.
|
"""Fetch encryption key from room timeline (NOT state) via Matrix HTTP API.
|
||||||
|
|
||||||
Element Call distributes encryption keys as timeline events, not state.
|
Element Call distributes encryption keys as timeline events, not state.
|
||||||
|
Also sets self._caller_identity from the event sender + device_id.
|
||||||
"""
|
"""
|
||||||
import httpx
|
import httpx
|
||||||
homeserver = str(self.nio_client.homeserver)
|
homeserver = str(self.nio_client.homeserver)
|
||||||
@@ -134,14 +153,16 @@ class VoiceSession:
|
|||||||
if sender == user_id:
|
if sender == user_id:
|
||||||
continue # skip our own key
|
continue # skip our own key
|
||||||
content = evt.get("content", {})
|
content = evt.get("content", {})
|
||||||
logger.info("Found encryption_keys timeline event: sender=%s content=%s",
|
caller_device = content.get("device_id", "")
|
||||||
sender, content)
|
logger.info("Found encryption_keys timeline event: sender=%s device=%s",
|
||||||
|
sender, caller_device)
|
||||||
|
if caller_device:
|
||||||
|
self._caller_identity = _make_lk_identity(sender, caller_device)
|
||||||
for k in content.get("keys", []):
|
for k in content.get("keys", []):
|
||||||
key_b64 = k.get("key", "")
|
key_b64 = k.get("key", "")
|
||||||
if key_b64:
|
if key_b64:
|
||||||
key_b64 += "=" * (-len(key_b64) % 4)
|
key_b64 += "=" * (-len(key_b64) % 4)
|
||||||
import base64 as b64
|
return base64.urlsafe_b64decode(key_b64)
|
||||||
return b64.urlsafe_b64decode(key_b64)
|
|
||||||
logger.info("No encryption_keys events in last %d timeline events", len(events))
|
logger.info("No encryption_keys events in last %d timeline events", len(events))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("HTTP encryption key fetch failed: %s", e)
|
logger.warning("HTTP encryption key fetch failed: %s", e)
|
||||||
@@ -177,26 +198,25 @@ class VoiceSession:
|
|||||||
# Check timeline for caller's encryption key
|
# Check timeline for caller's encryption key
|
||||||
caller_key = await self._fetch_encryption_key_http()
|
caller_key = await self._fetch_encryption_key_http()
|
||||||
if caller_key:
|
if caller_key:
|
||||||
self._e2ee_key = caller_key
|
self._caller_key = caller_key
|
||||||
logger.info("Got caller E2EE key via timeline (%d bytes)", len(caller_key))
|
logger.info("Got caller E2EE key via timeline (%d bytes)", len(caller_key))
|
||||||
|
|
||||||
if not self._e2ee_key:
|
if not self._caller_key:
|
||||||
# Wait up to 15s for key via sync handler (bot.py forwards
|
# Wait up to 15s for key via sync handler (bot.py forwards
|
||||||
# encryption_keys timeline events to on_encryption_key)
|
# encryption_keys timeline events to on_encryption_key)
|
||||||
logger.info("No key in timeline yet, waiting for sync...")
|
logger.info("No key in timeline yet, waiting for sync...")
|
||||||
for _ in range(150):
|
for _ in range(150):
|
||||||
if self._e2ee_key:
|
if self._caller_key:
|
||||||
break
|
break
|
||||||
await asyncio.sleep(0.1)
|
await asyncio.sleep(0.1)
|
||||||
|
|
||||||
# E2EE disabled — Element Call key derivation mismatch not yet resolved.
|
# Publish bot's own key so caller can decrypt our audio
|
||||||
# Audio pipeline confirmed working without E2EE.
|
if self._publish_key_cb:
|
||||||
if self._e2ee_key:
|
self._publish_key_cb(self._bot_key)
|
||||||
logger.info("Caller E2EE key available (%d bytes) — E2EE disabled pending fix",
|
|
||||||
len(self._e2ee_key))
|
# Build E2EE options with empty shared key — we set per-participant
|
||||||
if self._publish_key_cb:
|
# keys after connect via e2ee_manager.key_provider.set_key()
|
||||||
self._publish_key_cb(self._e2ee_key)
|
e2ee_opts = _build_e2ee_options(b"")
|
||||||
e2ee_opts = None
|
|
||||||
|
|
||||||
room_opts = rtc.RoomOptions(e2ee=e2ee_opts)
|
room_opts = rtc.RoomOptions(e2ee=e2ee_opts)
|
||||||
self.lk_room = rtc.Room()
|
self.lk_room = rtc.Room()
|
||||||
@@ -214,9 +234,46 @@ class VoiceSession:
|
|||||||
logger.info("Track sub: %s %s kind=%s", p.identity, pub.sid, t.kind)
|
logger.info("Track sub: %s %s kind=%s", p.identity, pub.sid, t.kind)
|
||||||
|
|
||||||
await self.lk_room.connect(self.lk_url, jwt, options=room_opts)
|
await self.lk_room.connect(self.lk_url, jwt, options=room_opts)
|
||||||
logger.info("Connected (E2EE=HKDF), remote=%d",
|
logger.info("Connected (E2EE=per-participant), remote=%d",
|
||||||
len(self.lk_room.remote_participants))
|
len(self.lk_room.remote_participants))
|
||||||
|
|
||||||
|
# Set per-participant E2EE keys via key provider
|
||||||
|
bot_identity = _make_lk_identity(user_id, self.device_id)
|
||||||
|
try:
|
||||||
|
kp = self.lk_room.e2ee_manager.key_provider
|
||||||
|
|
||||||
|
# Set bot's own key (encrypts outgoing audio)
|
||||||
|
kp.set_key(bot_identity, self._bot_key, key_index=0)
|
||||||
|
logger.info("Set bot E2EE key for identity=%s (%d bytes)",
|
||||||
|
bot_identity, len(self._bot_key))
|
||||||
|
|
||||||
|
# Set caller's key (decrypts incoming audio)
|
||||||
|
if self._caller_key and self._caller_identity:
|
||||||
|
kp.set_key(self._caller_identity, self._caller_key, key_index=0)
|
||||||
|
logger.info("Set caller E2EE key for identity=%s (%d bytes)",
|
||||||
|
self._caller_identity, len(self._caller_key))
|
||||||
|
elif self._caller_key:
|
||||||
|
# Caller identity not yet known — try to get from remote participants
|
||||||
|
for p in self.lk_room.remote_participants.values():
|
||||||
|
kp.set_key(p.identity, self._caller_key, key_index=0)
|
||||||
|
logger.info("Set caller E2EE key for identity=%s (%d bytes)",
|
||||||
|
p.identity, len(self._caller_key))
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
logger.warning("No caller E2EE key available — caller audio will be silent")
|
||||||
|
except AttributeError:
|
||||||
|
logger.warning("e2ee_manager.key_provider not available — "
|
||||||
|
"falling back to shared key mode")
|
||||||
|
# Fallback: set shared key after connect if per-participant isn't supported
|
||||||
|
if self._caller_key:
|
||||||
|
try:
|
||||||
|
kp = self.lk_room.e2ee_manager.key_provider
|
||||||
|
kp.set_shared_key(self._caller_key, key_index=0)
|
||||||
|
logger.info("Fallback: set shared E2EE key (%d bytes)",
|
||||||
|
len(self._caller_key))
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Fallback shared key also failed")
|
||||||
|
|
||||||
# Find the remote participant, wait up to 10s if not yet connected
|
# Find the remote participant, wait up to 10s if not yet connected
|
||||||
remote_identity = None
|
remote_identity = None
|
||||||
for p in self.lk_room.remote_participants.values():
|
for p in self.lk_room.remote_participants.values():
|
||||||
|
|||||||
Reference in New Issue
Block a user