diff --git a/agent.py b/agent.py index 684cee1..f980b45 100644 --- a/agent.py +++ b/agent.py @@ -1,21 +1,14 @@ import os -import asyncio import logging -from livekit import rtc -from livekit.api import AccessToken, VideoGrants -from livekit.agents import Agent, AgentSession, AgentServer, JobContext, JobProcess, cli, room_io +from livekit.agents import Agent, AgentSession, AgentServer, JobContext, JobProcess, cli from livekit.plugins import openai as lk_openai, elevenlabs, silero logger = logging.getLogger("matrix-ai-agent") -logging.basicConfig(level=logging.INFO) +logging.basicConfig(level=logging.DEBUG) LITELLM_URL = os.environ["LITELLM_BASE_URL"] LITELLM_KEY = os.environ.get("LITELLM_API_KEY", "not-needed") -LK_API_KEY = os.environ["LIVEKIT_API_KEY"] -LK_API_SECRET = os.environ["LIVEKIT_API_SECRET"] -LK_URL = os.environ["LIVEKIT_URL"] -BOT_IDENTITY = os.environ.get("BOT_IDENTITY", "@ai:agiliton.eu:AIBOT") SYSTEM_PROMPT = """You are a helpful voice assistant in a Matrix call. Rules: @@ -36,46 +29,12 @@ server.setup_fnc = prewarm @server.rtc_session() async def entrypoint(ctx: JobContext): - room_name = ctx.job.room.name - logger.info("Job received for room %s, connecting with Matrix identity...", room_name) + logger.info("Job received for room %s", ctx.job.room.name) - # Generate a token with the correct Matrix user identity - token = ( - AccessToken(LK_API_KEY, LK_API_SECRET) - .with_identity(BOT_IDENTITY) - .with_grants(VideoGrants( - room=room_name, - room_join=True, - can_publish=True, - can_subscribe=True, - )) - .to_jwt() - ) - - # Connect our own room with the Matrix identity (NOT ctx.connect()) - custom_room = rtc.Room() - await custom_room.connect(LK_URL, token) - logger.info("Connected to room as %s", BOT_IDENTITY) - - # Wait for a real (non-agent) participant - def has_real_participant(): - return any( - not p.identity.startswith("agent-") - for p in custom_room.remote_participants.values() - ) - - if not has_real_participant(): - logger.info("Waiting for real participant...") - fut = asyncio.get_event_loop().create_future() - - def on_participant(p: rtc.RemoteParticipant): - if not p.identity.startswith("agent-") and not fut.done(): - fut.set_result(p) - - custom_room.on("participant_connected", on_participant) - await fut - - logger.info("Participants: %s", list(custom_room.remote_participants.keys())) + # Standard framework connection (handles audio pipeline properly) + await ctx.connect() + logger.info("Connected to room, local identity: %s", ctx.room.local_participant.identity) + logger.info("Remote participants: %s", list(ctx.room.remote_participants.keys())) model = os.environ.get("LITELLM_MODEL", "claude-sonnet") voice_id = os.environ.get("ELEVENLABS_VOICE_ID", "21m00Tcm4TlvDq8ikWAM") @@ -94,46 +53,24 @@ async def entrypoint(ctx: JobContext): vad=ctx.proc.userdata["vad"], ) - # Find the real user's identity for targeted audio input - real_identity = next( - (p.identity for p in custom_room.remote_participants.values() - if not p.identity.startswith("agent-")), - None, - ) - logger.info("Starting agent session, targeting participant: %s", real_identity) + # Debug: log pipeline events + @session.on("user_speech_committed") + def on_speech(msg): + logger.info("USER_SPEECH_COMMITTED: %s", msg.text_content) + + @session.on("agent_speech_committed") + def on_agent_speech(msg): + logger.info("AGENT_SPEECH_COMMITTED: %s", msg.text_content) agent = Agent(instructions=SYSTEM_PROMPT) - input_opts = room_io.RoomInputOptions( - participant_identity=real_identity, - ) await session.start( agent=agent, - room=custom_room, - room_input_options=input_opts, + room=ctx.room, ) logger.info("Session started, generating greeting...") await session.generate_reply(instructions="Greet the user briefly.") logger.info("Greeting generated.") - # Wait for all real participants to leave, then disconnect cleanly - # so the LiveKit room gets deleted and auto-dispatch fires on next call - left_fut = asyncio.get_event_loop().create_future() - - def on_participant_left(p: rtc.RemoteParticipant): - # Check if any real (non-agent) participants remain - remaining = [ - pid for pid in custom_room.remote_participants - if not pid.startswith("agent-") - ] - if not remaining and not left_fut.done(): - left_fut.set_result(True) - - custom_room.on("participant_disconnected", on_participant_left) - await left_fut - logger.info("All participants left, disconnecting custom room...") - await custom_room.disconnect() - logger.info("Room disconnected, agent exiting.") - if __name__ == "__main__": cli.run_app(server)