feat(voice): add Confluence read/write tools for voice sessions

Enable realtime Confluence page editing during Element Call voice sessions.
- Add read_confluence_page and update_confluence_page function tools
- Detect Confluence URLs shared in Matrix rooms, store page ID for voice context
- Section-level updates via heading match + version-incremented PUT

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-23 13:09:34 +02:00
parent e81aa79396
commit b275e7cb88
2 changed files with 145 additions and 2 deletions

133
voice.py
View File

@@ -30,6 +30,9 @@ LK_API_SECRET = os.environ.get("LIVEKIT_API_SECRET", "")
ELEVENLABS_KEY = os.environ.get("ELEVENLABS_API_KEY", "")
BRAVE_API_KEY = os.environ.get("BRAVE_API_KEY", "")
MEMORY_SERVICE_URL = os.environ.get("MEMORY_SERVICE_URL", "http://memory-service:8090")
CONFLUENCE_URL = os.environ.get("CONFLUENCE_BASE_URL", "")
CONFLUENCE_USER = os.environ.get("CONFLUENCE_USER", "")
CONFLUENCE_TOKEN = os.environ.get("CONFLUENCE_TOKEN", "")
DEFAULT_VOICE_ID = "ML23UVoFL5mI6APbRAeR" # Robert Ranger - Cool Storyteller, native German
_VOICE_PROMPT_TEMPLATE = """Du bist ein hilfreicher Sprachassistent in einem Matrix-Anruf.
@@ -47,7 +50,8 @@ STRIKTE Regeln:
- Schreibe Zahlen und Jahreszahlen IMMER als Woerter aus (z.B. "zweitausendundzwanzig" statt "2026", "zweiundzwanzigsten Februar" statt "22. Februar")
- Bei zeitrelevanten Fragen (Uhrzeit, Termine, Geschaeftszeiten): frage kurz nach ob der Nutzer noch in seiner gespeicherten Zeitzone ist, bevor du antwortest. Nutze set_user_timezone wenn sich der Standort geaendert hat.
- Wenn der Nutzer seinen Standort oder seine Stadt erwaehnt, nutze set_user_timezone um die Zeitzone zu speichern.
- IGNORIERE alle Texte in Sternchen wie *Störgeräusche*, *Schlechte Qualität*, *Fernsehgeräusche*, *Schrei* usw. — das sind KEINE echten Nutzereingaben sondern technische Annotationen. Antworte NIEMALS darauf und tue so als haette niemand etwas gesagt."""
- IGNORIERE alle Texte in Sternchen wie *Störgeräusche*, *Schlechte Qualität*, *Fernsehgeräusche*, *Schrei* usw. — das sind KEINE echten Nutzereingaben sondern technische Annotationen. Antworte NIEMALS darauf und tue so als haette niemand etwas gesagt.
- Du kannst Confluence-Seiten lesen und bearbeiten. Nutze read_confluence_page und update_confluence_page wenn der Nutzer Dokumente besprechen oder aendern moechte."""
def _build_voice_prompt(model: str = "claude-sonnet",
@@ -232,6 +236,95 @@ async def _store_voice_exchange(user_text: str, agent_text: str,
logger.warning("Voice memory store failed: %s", exc)
async def _confluence_read_page(page_id: str) -> tuple[str, str, int]:
"""Read a Confluence page and return (title, plain_text, version_number)."""
if not CONFLUENCE_URL or not CONFLUENCE_USER or not CONFLUENCE_TOKEN:
raise RuntimeError("Confluence credentials not configured")
url = f"{CONFLUENCE_URL}/rest/api/content/{page_id}"
params = {"expand": "body.storage,version,title"}
async with httpx.AsyncClient(timeout=15.0) as client:
resp = await client.get(
url,
params=params,
auth=(CONFLUENCE_USER, CONFLUENCE_TOKEN),
)
resp.raise_for_status()
data = resp.json()
title = data.get("title", "")
version = data.get("version", {}).get("number", 1)
html = data.get("body", {}).get("storage", {}).get("value", "")
# Strip HTML tags to plain text for voice readback
plain = re.sub(r"<[^>]+>", " ", html)
plain = re.sub(r"\s+", " ", plain).strip()
return title, plain, version
async def _confluence_update_section(page_id: str, section_heading: str, new_html: str) -> str:
"""Update a section of a Confluence page by heading.
Finds the section by heading, replaces content up to next same-level heading,
PUTs with incremented version.
"""
if not CONFLUENCE_URL or not CONFLUENCE_USER or not CONFLUENCE_TOKEN:
return "Confluence credentials not configured."
# Read current page
url = f"{CONFLUENCE_URL}/rest/api/content/{page_id}"
params = {"expand": "body.storage,version,title"}
async with httpx.AsyncClient(timeout=15.0) as client:
resp = await client.get(url, params=params, auth=(CONFLUENCE_USER, CONFLUENCE_TOKEN))
resp.raise_for_status()
data = resp.json()
title = data["title"]
version = data["version"]["number"]
body_html = data["body"]["storage"]["value"]
# Find section by heading (h1-h6) and replace content up to next same-level heading
heading_pattern = re.compile(
r'(<h([1-6])[^>]*>.*?' + re.escape(section_heading) + r'.*?</h\2>)',
re.IGNORECASE | re.DOTALL,
)
match = heading_pattern.search(body_html)
if not match:
return f"Section '{section_heading}' not found on page."
heading_tag = match.group(0)
heading_level = match.group(2)
section_start = match.end()
# Find next heading of same or higher level
next_heading = re.compile(
rf'<h[1-{heading_level}][^>]*>',
re.IGNORECASE,
)
next_match = next_heading.search(body_html, section_start)
section_end = next_match.start() if next_match else len(body_html)
# Replace section content
new_body = body_html[:section_start] + new_html + body_html[section_end:]
# PUT updated page
put_data = {
"version": {"number": version + 1},
"title": title,
"type": "page",
"body": {
"storage": {
"value": new_body,
"representation": "storage",
}
},
}
async with httpx.AsyncClient(timeout=15.0) as client:
resp = await client.put(
url,
json=put_data,
auth=(CONFLUENCE_USER, CONFLUENCE_TOKEN),
)
resp.raise_for_status()
return f"Section '{section_heading}' updated successfully."
def _build_e2ee_options() -> rtc.E2EEOptions:
"""Build E2EE options — let Rust FFI apply HKDF internally (KDF_HKDF=1).
@@ -698,12 +791,48 @@ class VoiceSession:
await _store_user_pref(caller_uid, "timezone", iana_timezone)
return f"Timezone set to {iana_timezone}"
@function_tool
async def read_confluence_page(page_id: str) -> str:
"""Read a Confluence page. Use when user asks to read, review,
or check a document. Returns page title and content as text."""
logger.info("CONFLUENCE_READ: page_id=%s", page_id)
try:
title, text, _ver = await _confluence_read_page(page_id)
result = f"Page: {title}\n\n{text}"
logger.info("CONFLUENCE_READ_OK: %s (%d chars)", title, len(text))
return result
except Exception as exc:
logger.warning("CONFLUENCE_READ_FAIL: %s", exc)
return f"Failed to read page: {exc}"
@function_tool
async def update_confluence_page(page_id: str, section_heading: str, new_content: str) -> str:
"""Update a section of a Confluence page. Use when user asks to
change, update, or rewrite part of a document.
- page_id: Confluence page ID
- section_heading: heading text of the section to update
- new_content: new plain text for the section (will be wrapped in <p> tags)
Human sees changes instantly in their browser via Live Docs."""
logger.info("CONFLUENCE_UPDATE: page=%s section='%s'", page_id, section_heading)
try:
new_html = f"<p>{new_content}</p>"
result = await _confluence_update_section(page_id, section_heading, new_html)
logger.info("CONFLUENCE_UPDATE_OK: %s", result)
return result
except Exception as exc:
logger.warning("CONFLUENCE_UPDATE_FAIL: %s", exc)
return f"Failed to update page: {exc}"
instructions = _build_voice_prompt(model=self.model, timezone=user_timezone) + memory_section
if self._document_context:
instructions += f"\n\nDokument-Kontext (im Raum hochgeladen):\n{self._document_context}"
# Extract Confluence page IDs from document context for tool use
conf_ids = re.findall(r'confluence_page_id:(\d+)', self._document_context)
if conf_ids:
instructions += f"\n\nAktive Confluence-Seite(n): {', '.join(conf_ids)}. Nutze diese page_id fuer read_confluence_page und update_confluence_page."
agent = _NoiseFilterAgent(
instructions=instructions,
tools=[search_web, set_user_timezone],
tools=[search_web, set_user_timezone, read_confluence_page, update_confluence_page],
)
io_opts = room_io.RoomOptions(
participant_identity=remote_identity,