diff --git a/bot.py b/bot.py index fb1852b..ab0de19 100644 --- a/bot.py +++ b/bot.py @@ -413,34 +413,8 @@ class Bot: lk_room_name = base64.b64encode(lk_room_hash).decode().rstrip("=") logger.info("LiveKit room name: %s (hashed from %s)", lk_room_name, room_id) - if room_id not in self.voice_sessions: - try: - model = self.room_models.get(room_id, DEFAULT_MODEL) - vs = VoiceSession( - nio_client=self.client, - room_id=room_id, - device_id=BOT_DEVICE_ID, - lk_url=LK_URL, - model=model, - ) - # Read existing encryption keys from room state before starting - caller_device_id = content.get("device_id", "") - caller_key = await self._get_call_encryption_key(room_id, event.sender, caller_device_id) - if caller_key: - vs.on_encryption_key(event.sender, caller_device_id, caller_key, 0) - - await vs.start() - self.voice_sessions[room_id] = vs - logger.info("Voice session started for room %s (e2ee_key=%s)", - room_id, "yes" if caller_key else "no") - - # Publish our E2EE key so Element Call sees us as encrypted - if caller_key: - await self._publish_encryption_key(room_id, caller_key) - except Exception: - logger.exception("Voice session start failed for %s", room_id) - - # Send our own call member state event + # Send our own call member state event FIRST so Element Call + # sends encryption_keys in response (before we start VoiceSession) call_content = { "application": "m.call", "call_id": "", @@ -464,6 +438,37 @@ class Bot: except Exception: logger.exception("Failed to send call member event in %s", room_id) + # Now create VoiceSession — encryption_keys may arrive via sync + # while VoiceSession waits for key (up to 10s) + if room_id not in self.voice_sessions: + try: + model = self.room_models.get(room_id, DEFAULT_MODEL) + caller_device_id = content.get("device_id", "") + vs = VoiceSession( + nio_client=self.client, + room_id=room_id, + device_id=BOT_DEVICE_ID, + lk_url=LK_URL, + model=model, + ) + # Try reading encryption key from room state + caller_key = await self._get_call_encryption_key(room_id, event.sender, caller_device_id) + if caller_key: + vs.on_encryption_key(event.sender, caller_device_id, caller_key, 0) + + # Store BEFORE start so on_unknown handler can forward keys + self.voice_sessions[room_id] = vs + await vs.start() + logger.info("Voice session started for room %s (e2ee_key=%s)", + room_id, "yes" if caller_key else "no") + + # Publish our E2EE key so Element Call sees us as encrypted + if caller_key: + await self._publish_encryption_key(room_id, caller_key) + except Exception: + logger.exception("Voice session start failed for %s", room_id) + self.voice_sessions.pop(room_id, None) + else: # Empty content = someone left the call, check if anyone is still calling room_id = room.room_id @@ -1452,18 +1457,29 @@ class Bot: resp = await self.client.room_get_state_event( room_id, ENCRYPTION_KEYS_TYPE, state_key, ) + logger.info("E2EE key lookup state_key=%s → resp type=%s", state_key, type(resp).__name__) key = self._extract_e2ee_key(resp, sender, state_key) if key: return key except Exception as e: - logger.debug("No encryption key at state_key=%s: %s", state_key, e) + logger.info("E2EE key lookup state_key=%s failed: %s", state_key, e) - # Fallback: scan all room state for any encryption_keys event + # Fallback: scan all room state via HTTP API for any encryption_keys event try: - resp = await self.client.room_get_state(room_id) - if hasattr(resp, "events"): - for evt in resp.events: - if evt.get("type") == ENCRYPTION_KEYS_TYPE and evt.get("sender") != BOT_USER: + import httpx + token = self.client.access_token + url = f"{HOMESERVER}/_matrix/client/v3/rooms/{room_id}/state" + async with httpx.AsyncClient(timeout=10.0) as http: + resp = await http.get(url, headers={"Authorization": f"Bearer {token}"}) + resp.raise_for_status() + events = resp.json() + logger.info("Room state scan: %d events total", len(events)) + for evt in events: + evt_type = evt.get("type", "") + if "call" in evt_type or "encryption" in evt_type: + logger.info(" state event: type=%s state_key=%s content_keys=%s", + evt_type, evt.get("state_key", ""), list(evt.get("content", {}).keys())[:5]) + if evt_type == ENCRYPTION_KEYS_TYPE and evt.get("sender") != BOT_USER: content = evt.get("content", {}) keys = content.get("keys", []) for k in keys: @@ -1475,7 +1491,7 @@ class Bot: evt.get("state_key", "?"), len(key)) return key except Exception as e: - logger.debug("Room state scan for encryption keys failed: %s", e) + logger.warning("Room state scan for encryption keys failed: %s", e) logger.warning("No E2EE encryption key found for %s in %s", sender, room_id) return None diff --git a/voice.py b/voice.py index e0c78e1..2d84fc8 100644 --- a/voice.py +++ b/voice.py @@ -23,10 +23,14 @@ ELEVENLABS_KEY = os.environ.get("ELEVENLABS_API_KEY", "") DEFAULT_VOICE_ID = "onwK4e9ZLuTAKqWW03F9" # Daniel - male, free tier VOICE_PROMPT = """Du bist ein hilfreicher Sprachassistent in einem Matrix-Anruf. -Regeln: -- Halte Antworten KURZ - 1-3 Saetze maximal -- Sei direkt, keine Fuellwoerter -- Antworte immer auf Deutsch""" + +STRIKTE Regeln: +- Antworte IMMER auf Deutsch +- Halte JEDE Antwort auf MAXIMAL 1-2 kurze Saetze +- Sei direkt und praezise, keine Fuellwoerter +- Erfinde NICHTS - keine Geschichten, keine Musik, keine Fantasie +- Beantworte nur was gefragt wird +- Wenn niemand etwas fragt, sage nur kurz Hallo""" _vad = None def _get_vad(): @@ -121,13 +125,13 @@ class VoiceSession: user_id = self.nio_client.user_id jwt = _generate_lk_jwt(self.room_id, user_id, self.device_id) - # Wait up to 3s for E2EE encryption key from Element Call - for _ in range(30): + # Wait up to 10s for E2EE encryption key from Element Call + for _ in range(100): if self._e2ee_key: break await asyncio.sleep(0.1) if not self._e2ee_key: - logger.warning("No E2EE key received after 3s, connecting without encryption") + logger.warning("No E2EE key received after 10s, connecting without encryption") # Connect with E2EE if key available e2ee_opts = None