feat(MAT-46): Extract and post document annotations after voice calls

When a voice call ends and a document was loaded in the room, the bot now analyzes the transcript for document-specific changes/corrections and posts them as a structured "Dokument-Aenderungen" message. Returns nothing if no document changes were discussed. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 20:18:00 +02:00
parent a4b5c5da86
commit de66ba5eea
2 changed files with 74 additions and 0 deletions
--- a/bot.py
+++ b/bot.py
@@ -529,6 +529,8 @@ class Bot:
                vs = self.voice_sessions.pop(room_id, None)
                if vs:
                    transcript = vs.get_transcript()
                    doc_context = vs.get_document_context()
                    confluence_page_id = vs.get_confluence_page_id()
                    try:
                        await vs.stop()
                        logger.info("Voice session stopped for %s", room_id)
@@ -541,6 +543,16 @@ class Bot:
                            await self._send_text(room_id, f"**Anruf-Zusammenfassung:**\n\n{summary}")
                        except Exception:
                            logger.exception("Failed to post call summary for %s", room_id)
                        # Extract and post document annotations if a document was discussed
                        if doc_context:
                            try:
                                annotations = await self._extract_document_annotations(
                                    transcript, doc_context, room_id)
                                if annotations:
                                    await self._send_text(room_id,
                                        f"**Dokument-Aenderungen:**\n\n{annotations}")
                            except Exception:
                                logger.exception("Failed to post document annotations for %s", room_id)
                # Leave the call too
                self.active_calls.discard(room_id)
@@ -1735,6 +1747,57 @@ class Bot:
            logger.warning("Call summary LLM failed, falling back to raw transcript", exc_info=True)
            return "\n".join(lines[-20:])
    async def _extract_document_annotations(
        self, transcript: list[dict], doc_context: str, room_id: str
    ) -> str | None:
        """Extract document-specific changes/annotations discussed during a voice call.
        Returns a structured list of changes, or None if no document changes were discussed.
        """
        lines = []
        for entry in transcript[-30:]:
            role = "Nutzer" if entry["role"] == "user" else "Assistent"
            lines.append(f"{role}: {entry['text']}")
        transcript_text = "\n".join(lines)
        # Truncate document context for the prompt
        doc_excerpt = doc_context[:8000]
        if not self.llm:
            return None
        try:
            model = self.room_models.get(room_id, DEFAULT_MODEL)
            resp = await self.llm.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": (
                        "Analysiere das Anruf-Transkript im Kontext des besprochenen Dokuments. "
                        "Extrahiere ALLE konkreten Aenderungen, Korrekturen, Ergaenzungen oder "
                        "Anmerkungen die der Nutzer zum Dokument gemacht hat.\n\n"
                        "Format:\n"
                        "- Jede Aenderung als Bullet Point\n"
                        "- Zitiere den betroffenen Abschnitt/Satz wenn moeglich\n"
                        "- Beschreibe was geaendert werden soll\n\n"
                        "Wenn KEINE Dokument-Aenderungen besprochen wurden, antworte NUR mit: KEINE_AENDERUNGEN\n\n"
                        "Antworte in der Sprache des Gespraechs."
                    )},
                    {"role": "user", "content": (
                        f"Dokument:\n{doc_excerpt}\n\n---\n\nTranskript:\n{transcript_text}"
                    )},
                ],
                max_tokens=800,
            )
            result = resp.choices[0].message.content.strip()
            if "KEINE_AENDERUNGEN" in result:
                logger.info("No document annotations found in call for %s", room_id)
                return None
            logger.info("Extracted document annotations for %s: %s", room_id, result[:200])
            return result
        except Exception:
            logger.warning("Document annotation extraction failed", exc_info=True)
            return None
    async def _send_text(self, room_id: str, text: str):
        await self.client.room_send(
            room_id,
--- a/voice.py
+++ b/voice.py
@@ -498,6 +498,17 @@ class VoiceSession:
        """Return the call transcript as a list of {role, text} dicts."""
        return list(self._transcript)
    def get_document_context(self) -> str | None:
        """Return the document context loaded for this call, if any."""
        return self._document_context
    def get_confluence_page_id(self) -> str | None:
        """Return the active Confluence page ID, if any."""
        if not self._document_context:
            return None
        ids = re.findall(r'confluence_page_id:(\d+)', self._document_context)
        return ids[0] if ids else None
    async def _run(self):
        try:
            user_id = self.nio_client.user_id