fix: E2EE key lookup for Element Call voice sessions

- Fix state_key format: try @user:domain:DEVICE_ID (Element Call format),
  then @user:domain, then scan all room state as fallback
- Publish bot E2EE key to room so Element shows encrypted status
- Extract caller device_id from call member event content
- Also fix pipecat-poc pipeline with context aggregators (CF-1579)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-21 14:51:26 +02:00
parent c60dfc0cef
commit 80582860b9
2 changed files with 196 additions and 18 deletions

81
bot.py
View File

@@ -424,14 +424,19 @@ class Bot:
model=model, model=model,
) )
# Read existing encryption keys from room state before starting # Read existing encryption keys from room state before starting
caller_key = await self._get_call_encryption_key(room_id, event.sender) caller_device_id = content.get("device_id", "")
caller_key = await self._get_call_encryption_key(room_id, event.sender, caller_device_id)
if caller_key: if caller_key:
vs.on_encryption_key(event.sender, "", caller_key, 0) vs.on_encryption_key(event.sender, caller_device_id, caller_key, 0)
await vs.start() await vs.start()
self.voice_sessions[room_id] = vs self.voice_sessions[room_id] = vs
logger.info("Voice session started for room %s (e2ee_key=%s)", logger.info("Voice session started for room %s (e2ee_key=%s)",
room_id, "yes" if caller_key else "no") room_id, "yes" if caller_key else "no")
# Publish our E2EE key so Element Call sees us as encrypted
if caller_key:
await self._publish_encryption_key(room_id, caller_key)
except Exception: except Exception:
logger.exception("Voice session start failed for %s", room_id) logger.exception("Voice session start failed for %s", room_id)
@@ -1430,24 +1435,64 @@ class Bot:
}, },
) )
async def _get_call_encryption_key(self, room_id: str, sender: str) -> bytes | None: async def _get_call_encryption_key(self, room_id: str, sender: str, caller_device_id: str = "") -> bytes | None:
"""Read E2EE encryption key from io.element.call.encryption_keys state events.""" """Read E2EE encryption key from io.element.call.encryption_keys state events.
Element Call uses state_key format: @user:domain:DEVICE_ID
Falls back to trying just @user:domain and scanning all room state.
"""
# Try state_key formats: @user:domain:device_id, then @user:domain
state_keys_to_try = []
if caller_device_id:
state_keys_to_try.append(f"{sender}:{caller_device_id}")
state_keys_to_try.append(sender)
for state_key in state_keys_to_try:
try:
resp = await self.client.room_get_state_event(
room_id, ENCRYPTION_KEYS_TYPE, state_key,
)
key = self._extract_e2ee_key(resp, sender, state_key)
if key:
return key
except Exception as e:
logger.debug("No encryption key at state_key=%s: %s", state_key, e)
# Fallback: scan all room state for any encryption_keys event
try: try:
resp = await self.client.room_get_state_event( resp = await self.client.room_get_state(room_id)
room_id, ENCRYPTION_KEYS_TYPE, sender, if hasattr(resp, "events"):
) for evt in resp.events:
if hasattr(resp, "content") and resp.content: if evt.get("type") == ENCRYPTION_KEYS_TYPE and evt.get("sender") != BOT_USER:
keys = resp.content.get("keys", []) content = evt.get("content", {})
if keys: keys = content.get("keys", [])
key_b64 = keys[0].get("key", "") for k in keys:
if key_b64: key_b64 = k.get("key", "")
# Element Call uses base64url encoding if key_b64:
key_b64 += "=" * (-len(key_b64) % 4) # pad key_b64 += "=" * (-len(key_b64) % 4)
key = base64.urlsafe_b64decode(key_b64) key = base64.urlsafe_b64decode(key_b64)
logger.info("Got E2EE key from %s (%d bytes)", sender, len(key)) logger.info("Got E2EE key from room state scan (%s, %d bytes)",
return key evt.get("state_key", "?"), len(key))
return key
except Exception as e: except Exception as e:
logger.debug("No encryption key from %s in %s: %s", sender, room_id, e) logger.debug("Room state scan for encryption keys failed: %s", e)
logger.warning("No E2EE encryption key found for %s in %s", sender, room_id)
return None
@staticmethod
def _extract_e2ee_key(resp, sender: str, state_key: str) -> bytes | None:
"""Extract E2EE key bytes from a state event response."""
if not hasattr(resp, "content") or not resp.content:
return None
keys = resp.content.get("keys", [])
for k in keys:
key_b64 = k.get("key", "")
if key_b64:
key_b64 += "=" * (-len(key_b64) % 4)
key = base64.urlsafe_b64decode(key_b64)
logger.info("Got E2EE key from %s (state_key=%s, %d bytes)", sender, state_key, len(key))
return key
return None return None
async def _publish_encryption_key(self, room_id: str, key: bytes): async def _publish_encryption_key(self, room_id: str, key: bytes):

133
pipecat-poc/pipeline.py Normal file
View File

@@ -0,0 +1,133 @@
"""Configurable voice pipeline using Pipecat.
Builds a STT → LLM → TTS pipeline with context aggregators.
Providers are selected via config (STT_PROVIDER, TTS_PROVIDER env vars).
LLM uses OpenAI-compatible API via LiteLLM.
"""
import asyncio
import logging
import aiohttp
from pipecat.frames.frames import LLMMessagesAppendFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.livekit.transport import LiveKitParams, LiveKitTransport
from config import Config
from providers import create_stt, create_tts
logger = logging.getLogger("pipecat-poc.pipeline")
def _create_llm(cfg: Config):
"""Create LLM service using OpenAI-compatible API (LiteLLM)."""
return OpenAILLMService(
base_url=cfg.llm.base_url,
api_key=cfg.llm.api_key,
model=cfg.llm.model,
)
async def run_voice_pipeline(token: str, lk_room_name: str, cfg: Config):
"""Run the Pipecat voice pipeline.
Args:
token: LiveKit JWT used to connect
lk_room_name: LiveKit room name
cfg: Application configuration
"""
http_session = aiohttp.ClientSession()
stt = await create_stt(cfg.stt, http_session)
tts = await create_tts(cfg.tts, http_session)
llm = _create_llm(cfg)
transport = LiveKitTransport(
url=cfg.livekit.url,
token=token,
room_name=lk_room_name,
params=LiveKitParams(
audio_in_enabled=True,
audio_out_enabled=True,
),
)
# Create context with system prompt and context aggregators
context = OpenAILLMContext(
messages=[{"role": "system", "content": cfg.voice.system_prompt}],
)
context_aggregator = llm.create_context_aggregator(context)
pipeline = Pipeline([
transport.input(),
stt,
context_aggregator.user(), # TranscriptionFrame → user message in context
llm,
tts,
transport.output(),
context_aggregator.assistant(), # LLM output → assistant message in context
])
task = PipelineTask(
pipeline,
params=PipelineParams(
allow_interruptions=True,
enable_metrics=True,
),
)
@transport.event_handler("on_first_participant_joined")
async def on_first_participant(transport_obj, participant):
logger.info("First participant joined: %s", participant)
await task.queue_frames([
LLMMessagesAppendFrame(
[{"role": "user", "content": cfg.voice.greeting}],
run_llm=True,
),
])
runner = PipelineRunner()
logger.info("Starting Pipecat pipeline (stt=%s, tts=%s, llm=%s)",
cfg.stt.provider, cfg.tts.provider, cfg.llm.model)
await runner.run(task)
class VoicePipelineSession:
"""Manages a single voice pipeline session lifecycle.
Wraps the Pipecat pipeline with start/stop semantics.
Used by bot_bridge.py to manage pipeline per Matrix call.
"""
def __init__(self, token: str, lk_room_name: str, cfg: Config):
self.token = token
self.lk_room_name = lk_room_name
self.cfg = cfg
self._task: asyncio.Task | None = None
async def start(self):
"""Start the voice pipeline in a background task."""
self._task = asyncio.create_task(self._run())
logger.info("Voice pipeline session started")
async def stop(self):
"""Stop the voice pipeline."""
if self._task and not self._task.done():
self._task.cancel()
try:
await self._task
except asyncio.CancelledError:
pass
logger.info("Voice pipeline session stopped")
async def _run(self):
try:
await run_voice_pipeline(self.token, self.lk_room_name, self.cfg)
except asyncio.CancelledError:
logger.info("Pipeline cancelled")
except Exception:
logger.exception("Pipeline failed")