feat(voice): add Confluence read/write tools for voice sessions

Enable realtime Confluence page editing during Element Call voice sessions. - Add read_confluence_page and update_confluence_page function tools - Detect Confluence URLs shared in Matrix rooms, store page ID for voice context - Section-level updates via heading match + version-incremented PUT Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 13:09:34 +02:00
parent e81aa79396
commit b275e7cb88
2 changed files with 145 additions and 2 deletions
--- a/voice.py
+++ b/voice.py
@@ -30,6 +30,9 @@ LK_API_SECRET = os.environ.get("LIVEKIT_API_SECRET", "")
 ELEVENLABS_KEY = os.environ.get("ELEVENLABS_API_KEY", "")
 BRAVE_API_KEY = os.environ.get("BRAVE_API_KEY", "")
 MEMORY_SERVICE_URL = os.environ.get("MEMORY_SERVICE_URL", "http://memory-service:8090")
+CONFLUENCE_URL = os.environ.get("CONFLUENCE_BASE_URL", "")
+CONFLUENCE_USER = os.environ.get("CONFLUENCE_USER", "")
+CONFLUENCE_TOKEN = os.environ.get("CONFLUENCE_TOKEN", "")
 DEFAULT_VOICE_ID = "ML23UVoFL5mI6APbRAeR"  # Robert Ranger - Cool Storyteller, native German

 _VOICE_PROMPT_TEMPLATE = """Du bist ein hilfreicher Sprachassistent in einem Matrix-Anruf.
@@ -47,7 +50,8 @@ STRIKTE Regeln:
 - Schreibe Zahlen und Jahreszahlen IMMER als Woerter aus (z.B. "zweitausendundzwanzig" statt "2026", "zweiundzwanzigsten Februar" statt "22. Februar")
 - Bei zeitrelevanten Fragen (Uhrzeit, Termine, Geschaeftszeiten): frage kurz nach ob der Nutzer noch in seiner gespeicherten Zeitzone ist, bevor du antwortest. Nutze set_user_timezone wenn sich der Standort geaendert hat.
 - Wenn der Nutzer seinen Standort oder seine Stadt erwaehnt, nutze set_user_timezone um die Zeitzone zu speichern.
- IGNORIERE alle Texte in Sternchen wie *Störgeräusche*, *Schlechte Qualität*, *Fernsehgeräusche*, *Schrei* usw. — das sind KEINE echten Nutzereingaben sondern technische Annotationen. Antworte NIEMALS darauf und tue so als haette niemand etwas gesagt."""
+- IGNORIERE alle Texte in Sternchen wie *Störgeräusche*, *Schlechte Qualität*, *Fernsehgeräusche*, *Schrei* usw. — das sind KEINE echten Nutzereingaben sondern technische Annotationen. Antworte NIEMALS darauf und tue so als haette niemand etwas gesagt.
+- Du kannst Confluence-Seiten lesen und bearbeiten. Nutze read_confluence_page und update_confluence_page wenn der Nutzer Dokumente besprechen oder aendern moechte."""


 def _build_voice_prompt(model: str = "claude-sonnet",
@@ -232,6 +236,95 @@ async def _store_voice_exchange(user_text: str, agent_text: str,
        logger.warning("Voice memory store failed: %s", exc)


+async def _confluence_read_page(page_id: str) -> tuple[str, str, int]:
+    """Read a Confluence page and return (title, plain_text, version_number)."""
+    if not CONFLUENCE_URL or not CONFLUENCE_USER or not CONFLUENCE_TOKEN:
+        raise RuntimeError("Confluence credentials not configured")
+    url = f"{CONFLUENCE_URL}/rest/api/content/{page_id}"
+    params = {"expand": "body.storage,version,title"}
+    async with httpx.AsyncClient(timeout=15.0) as client:
+        resp = await client.get(
+            url,
+            params=params,
+            auth=(CONFLUENCE_USER, CONFLUENCE_TOKEN),
+        )
+        resp.raise_for_status()
+        data = resp.json()
+    title = data.get("title", "")
+    version = data.get("version", {}).get("number", 1)
+    html = data.get("body", {}).get("storage", {}).get("value", "")
+    # Strip HTML tags to plain text for voice readback
+    plain = re.sub(r"<[^>]+>", " ", html)
+    plain = re.sub(r"\s+", " ", plain).strip()
+    return title, plain, version
+
+
+async def _confluence_update_section(page_id: str, section_heading: str, new_html: str) -> str:
+    """Update a section of a Confluence page by heading.
+
+    Finds the section by heading, replaces content up to next same-level heading,
+    PUTs with incremented version.
+    """
+    if not CONFLUENCE_URL or not CONFLUENCE_USER or not CONFLUENCE_TOKEN:
+        return "Confluence credentials not configured."
+    # Read current page
+    url = f"{CONFLUENCE_URL}/rest/api/content/{page_id}"
+    params = {"expand": "body.storage,version,title"}
+    async with httpx.AsyncClient(timeout=15.0) as client:
+        resp = await client.get(url, params=params, auth=(CONFLUENCE_USER, CONFLUENCE_TOKEN))
+        resp.raise_for_status()
+        data = resp.json()
+
+    title = data["title"]
+    version = data["version"]["number"]
+    body_html = data["body"]["storage"]["value"]
+
+    # Find section by heading (h1-h6) and replace content up to next same-level heading
+    heading_pattern = re.compile(
+        r'(<h([1-6])[^>]*>.*?' + re.escape(section_heading) + r'.*?</h\2>)',
+        re.IGNORECASE | re.DOTALL,
+    )
+    match = heading_pattern.search(body_html)
+    if not match:
+        return f"Section '{section_heading}' not found on page."
+
+    heading_tag = match.group(0)
+    heading_level = match.group(2)
+    section_start = match.end()
+
+    # Find next heading of same or higher level
+    next_heading = re.compile(
+        rf'<h[1-{heading_level}][^>]*>',
+        re.IGNORECASE,
+    )
+    next_match = next_heading.search(body_html, section_start)
+    section_end = next_match.start() if next_match else len(body_html)
+
+    # Replace section content
+    new_body = body_html[:section_start] + new_html + body_html[section_end:]
+
+    # PUT updated page
+    put_data = {
+        "version": {"number": version + 1},
+        "title": title,
+        "type": "page",
+        "body": {
+            "storage": {
+                "value": new_body,
+                "representation": "storage",
+            }
+        },
+    }
+    async with httpx.AsyncClient(timeout=15.0) as client:
+        resp = await client.put(
+            url,
+            json=put_data,
+            auth=(CONFLUENCE_USER, CONFLUENCE_TOKEN),
+        )
+        resp.raise_for_status()
+    return f"Section '{section_heading}' updated successfully."
+
+
 def _build_e2ee_options() -> rtc.E2EEOptions:
    """Build E2EE options — let Rust FFI apply HKDF internally (KDF_HKDF=1).

@@ -698,12 +791,48 @@ class VoiceSession:
                    await _store_user_pref(caller_uid, "timezone", iana_timezone)
                return f"Timezone set to {iana_timezone}"

+            @function_tool
+            async def read_confluence_page(page_id: str) -> str:
+                """Read a Confluence page. Use when user asks to read, review,
+                or check a document. Returns page title and content as text."""
+                logger.info("CONFLUENCE_READ: page_id=%s", page_id)
+                try:
+                    title, text, _ver = await _confluence_read_page(page_id)
+                    result = f"Page: {title}\n\n{text}"
+                    logger.info("CONFLUENCE_READ_OK: %s (%d chars)", title, len(text))
+                    return result
+                except Exception as exc:
+                    logger.warning("CONFLUENCE_READ_FAIL: %s", exc)
+                    return f"Failed to read page: {exc}"
+
+            @function_tool
+            async def update_confluence_page(page_id: str, section_heading: str, new_content: str) -> str:
+                """Update a section of a Confluence page. Use when user asks to
+                change, update, or rewrite part of a document.
+                - page_id: Confluence page ID
+                - section_heading: heading text of the section to update
+                - new_content: new plain text for the section (will be wrapped in <p> tags)
+                Human sees changes instantly in their browser via Live Docs."""
+                logger.info("CONFLUENCE_UPDATE: page=%s section='%s'", page_id, section_heading)
+                try:
+                    new_html = f"<p>{new_content}</p>"
+                    result = await _confluence_update_section(page_id, section_heading, new_html)
+                    logger.info("CONFLUENCE_UPDATE_OK: %s", result)
+                    return result
+                except Exception as exc:
+                    logger.warning("CONFLUENCE_UPDATE_FAIL: %s", exc)
+                    return f"Failed to update page: {exc}"
+
            instructions = _build_voice_prompt(model=self.model, timezone=user_timezone) + memory_section
            if self._document_context:
                instructions += f"\n\nDokument-Kontext (im Raum hochgeladen):\n{self._document_context}"
+                # Extract Confluence page IDs from document context for tool use
+                conf_ids = re.findall(r'confluence_page_id:(\d+)', self._document_context)
+                if conf_ids:
+                    instructions += f"\n\nAktive Confluence-Seite(n): {', '.join(conf_ids)}. Nutze diese page_id fuer read_confluence_page und update_confluence_page."
            agent = _NoiseFilterAgent(
                instructions=instructions,
-                tools=[search_web, set_user_timezone],
+                tools=[search_web, set_user_timezone, read_confluence_page, update_confluence_page],
            )
            io_opts = room_io.RoomOptions(
                participant_identity=remote_identity,