fix: link AgentSession to remote participant + debug speech events

- Pass participant_identity via RoomOptions so AgentSession knows which audio track to consume (was silently ignoring user audio) - Add USER_SPEECH and AGENT_SPEECH event handlers for debugging - Simplify greeting to exact text to prevent hallucination - Use httpx for room state scan (nio API was unreliable) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 15:03:24 +02:00
parent 85df4b295f
commit 75970fc06b
1 changed files with 28 additions and 4 deletions
--- a/voice.py
+++ b/voice.py
@@ -10,7 +10,7 @@ import os

 import aiohttp
 from livekit import rtc, api as lkapi
-from livekit.agents import Agent, AgentSession
+from livekit.agents import Agent, AgentSession, room_io
 from livekit.plugins import openai as lk_openai, elevenlabs, silero

 logger = logging.getLogger("matrix-ai-voice")
@@ -159,6 +159,13 @@ class VoiceSession:
                        "HKDF" if self._e2ee_key else "off",
                        len(self.lk_room.remote_participants))

+            # Find the remote participant to link to
+            remote_identity = None
+            for p in self.lk_room.remote_participants.values():
+                remote_identity = p.identity
+                logger.info("Linking to remote participant: %s", remote_identity)
+                break
+
            # Voice pipeline — German male voice (Daniel)
            self._http_session = aiohttp.ClientSession()
            voice_id = os.environ.get("ELEVENLABS_VOICE_ID", DEFAULT_VOICE_ID)
@@ -169,14 +176,31 @@ class VoiceSession:
                                   api_key=ELEVENLABS_KEY, http_session=self._http_session),
                vad=_get_vad(),
            )
+
+            # Debug: log speech events
+            @self.session.on("user_speech_committed")
+            def _on_user_speech(msg):
+                logger.info("USER_SPEECH: %s", msg.text_content)
+
+            @self.session.on("agent_speech_committed")
+            def _on_agent_speech(msg):
+                logger.info("AGENT_SPEECH: %s", msg.text_content)
+
            agent = Agent(instructions=VOICE_PROMPT)
-            await self.session.start(agent=agent, room=self.lk_room)
-            logger.info("Voice pipeline started (voice=%s)", voice_id)
+            room_opts = room_io.RoomOptions(
+                participant_identity=remote_identity,
+            ) if remote_identity else None
+            await self.session.start(
+                agent=agent,
+                room=self.lk_room,
+                room_options=room_opts,
+            )
+            logger.info("Voice pipeline started (voice=%s, linked_to=%s)", voice_id, remote_identity)

            try:
                await asyncio.wait_for(
                    self.session.generate_reply(
-                        instructions="Begruesse den Nutzer kurz auf Deutsch."),
+                        instructions="Sage nur: Hallo, wie kann ich helfen?"),
                    timeout=30.0)
                logger.info("Greeting sent")
            except asyncio.TimeoutError: