diff --git a/voice.py b/voice.py index 2d84fc8..3b45dc9 100644 --- a/voice.py +++ b/voice.py @@ -10,7 +10,7 @@ import os import aiohttp from livekit import rtc, api as lkapi -from livekit.agents import Agent, AgentSession +from livekit.agents import Agent, AgentSession, room_io from livekit.plugins import openai as lk_openai, elevenlabs, silero logger = logging.getLogger("matrix-ai-voice") @@ -159,6 +159,13 @@ class VoiceSession: "HKDF" if self._e2ee_key else "off", len(self.lk_room.remote_participants)) + # Find the remote participant to link to + remote_identity = None + for p in self.lk_room.remote_participants.values(): + remote_identity = p.identity + logger.info("Linking to remote participant: %s", remote_identity) + break + # Voice pipeline — German male voice (Daniel) self._http_session = aiohttp.ClientSession() voice_id = os.environ.get("ELEVENLABS_VOICE_ID", DEFAULT_VOICE_ID) @@ -169,14 +176,31 @@ class VoiceSession: api_key=ELEVENLABS_KEY, http_session=self._http_session), vad=_get_vad(), ) + + # Debug: log speech events + @self.session.on("user_speech_committed") + def _on_user_speech(msg): + logger.info("USER_SPEECH: %s", msg.text_content) + + @self.session.on("agent_speech_committed") + def _on_agent_speech(msg): + logger.info("AGENT_SPEECH: %s", msg.text_content) + agent = Agent(instructions=VOICE_PROMPT) - await self.session.start(agent=agent, room=self.lk_room) - logger.info("Voice pipeline started (voice=%s)", voice_id) + room_opts = room_io.RoomOptions( + participant_identity=remote_identity, + ) if remote_identity else None + await self.session.start( + agent=agent, + room=self.lk_room, + room_options=room_opts, + ) + logger.info("Voice pipeline started (voice=%s, linked_to=%s)", voice_id, remote_identity) try: await asyncio.wait_for( self.session.generate_reply( - instructions="Begruesse den Nutzer kurz auf Deutsch."), + instructions="Sage nur: Hallo, wie kann ich helfen?"), timeout=30.0) logger.info("Greeting sent") except asyncio.TimeoutError: