feat: activity video track (pulsing orb) for voice sessions

- ActivityVideoPublisher renders animated orb on 160x120 canvas - Integrated into both agent.py and voice.py - Updates confluence-collab submodule
2026-03-06 15:58:51 +00:00
parent 947699c988
commit efb976a27c
4 changed files with 199 additions and 1 deletions
--- a/voice.py
+++ b/voice.py
@@ -18,6 +18,7 @@ import aiohttp
 import httpx
 from livekit import rtc, api as lkapi
 from livekit.agents import Agent, AgentSession, StopResponse, function_tool, room_io, llm
+from activity_video import ActivityVideoPublisher
 from livekit.plugins import openai as lk_openai, elevenlabs, silero
 from openai import AsyncOpenAI

@@ -463,6 +464,8 @@ class VoiceSession:
        self.lk_room = None
        self.session = None
        self._task = None
+        self._activity_video = None
+        self._activity_task = None
        self._http_session = None
        self._caller_key: bytes | None = None
        self._caller_identity: str | None = None
@@ -575,6 +578,10 @@ class VoiceSession:
                        await obj.close()
                except Exception:
                    pass
+        if self._activity_video:
+            self._activity_video.stop()
+        if self._activity_task and not self._activity_task.done():
+            self._activity_task.cancel()
        if self._task and not self._task.done():
            self._task.cancel()
            try:
@@ -779,6 +786,17 @@ class VoiceSession:
            if remote_identity:
                logger.info("Linking to remote participant: %s", remote_identity)

+            # Publish activity video track (animated waveform bars)
+            try:
+                self._activity_video = ActivityVideoPublisher()
+                video_track = rtc.LocalVideoTrack.create_video_track("activity", self._activity_video.source)
+                pub_opts = rtc.TrackPublishOptions(source=rtc.TrackSource.SOURCE_CAMERA)
+                await self.lk_room.local_participant.publish_track(video_track, pub_opts)
+                self._activity_task = asyncio.create_task(self._activity_video.run())
+                logger.info("Activity video track published")
+            except Exception as e:
+                logger.warning("Failed to publish activity video: %s", e)
+
            # Load memories and user preferences for this caller
            memory_section = ""
            user_timezone = None
@@ -1217,6 +1235,12 @@ class VoiceSession:
            )
            logger.info("Voice pipeline started (voice=%s, linked_to=%s)", voice_id, remote_identity)

+            # Wire agent state to activity video animation
+            if self._activity_video:
+                @self.session.on("agent_state_changed")
+                def _on_state_changed(ev):
+                    self._activity_video.set_state(ev.new_state)
+
            try:
                await asyncio.wait_for(
                    self.session.generate_reply(