feat(MAT-46): Add think_deeper tool for Opus escalation in voice calls

Sonnet can now escalate complex questions to Opus via a function tool, same pattern as search_web and read_confluence_page. Full context (transcript + document) is passed automatically. Triggered by user phrases like "denk genauer nach" / "think harder" or when Sonnet is unsure about complex analysis. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 06:13:44 +02:00
parent de66ba5eea
commit 6081f9a7ec
1 changed files with 58 additions and 1 deletions
--- a/voice.py
+++ b/voice.py
@@ -864,6 +864,63 @@ class VoiceSession:
                    logger.warning("CONFLUENCE_UPDATE_FAIL: %s", exc)
                    return f"Failed to update page: {exc}"

+            # Deep thinking tool — escalates to Opus for complex questions
+            _transcript_ref = self._transcript
+            _doc_context_ref = self._document_context
+
+            @function_tool
+            async def think_deeper(question: str) -> str:
+                """Denke intensiver ueber eine komplexe Frage nach mit einem staerkeren Modell.
+                Nutze dieses Tool wenn:
+                - Der Nutzer sagt "denk genauer nach", "think harder", "nimm opus", "use opus",
+                  "ueberleg nochmal", "analysier das genauer"
+                - Du dir bei einer komplexen Analyse, Code-Review oder Dokumentinterpretation unsicher bist
+                - Eine Frage mehrere Schritte logisches Denken erfordert
+
+                Beschreibe die Frage so praezise wie moeglich — der Kontext (Transkript + Dokument)
+                wird automatisch mitgeliefert."""
+                # Build context: recent transcript + document
+                context_parts = []
+                if _doc_context_ref:
+                    context_parts.append(f"Dokument-Kontext:\n{_doc_context_ref[:12000]}")
+                recent = _transcript_ref[-10:] if _transcript_ref else []
+                if recent:
+                    lines = []
+                    for e in recent:
+                        role = "Nutzer" if e["role"] == "user" else "Assistent"
+                        lines.append(f"{role}: {e['text']}")
+                    context_parts.append(f"Gespraechsverlauf:\n" + "\n".join(lines))
+                context_parts.append(f"Frage: {question}")
+                full_prompt = "\n\n---\n\n".join(context_parts)
+
+                logger.info("THINK_DEEPER: %s (context=%d chars)", question[:100], len(full_prompt))
+                try:
+                    async with httpx.AsyncClient(timeout=60.0) as client:
+                        resp = await client.post(
+                            f"{LITELLM_URL}/chat/completions",
+                            headers={"Authorization": f"Bearer {LITELLM_KEY}"},
+                            json={
+                                "model": "claude-opus",
+                                "messages": [
+                                    {"role": "system", "content": (
+                                        "Du bist ein Experte fuer tiefgehende Analyse. "
+                                        "Beantworte die Frage praezise und ausfuehrlich basierend auf dem Kontext. "
+                                        "Antworte in der Sprache der Frage."
+                                    )},
+                                    {"role": "user", "content": full_prompt},
+                                ],
+                                "max_tokens": 1500,
+                            },
+                        )
+                        resp.raise_for_status()
+                        data = resp.json()
+                        answer = data["choices"][0]["message"]["content"]
+                        logger.info("THINK_DEEPER_OK: %s", answer[:200])
+                        return answer
+                except Exception as exc:
+                    logger.warning("THINK_DEEPER_FAIL: %s", exc)
+                    return f"Tiefere Analyse fehlgeschlagen: {exc}"
+
            instructions = _build_voice_prompt(model=self.model, timezone=user_timezone) + memory_section
            if self._document_context:
                instructions += f"\n\nDokument-Kontext (im Raum hochgeladen):\n{self._document_context}"
@@ -871,7 +928,7 @@ class VoiceSession:
                    instructions += f"\n\nAktive Confluence-Seite: {_active_conf_id}. Du brauchst den Nutzer NICHT nach der page_id zu fragen — nutze automatisch diese ID fuer read_confluence_page und update_confluence_page."
            agent = _NoiseFilterAgent(
                instructions=instructions,
-                tools=[search_web, set_user_timezone, read_confluence_page, update_confluence_page],
+                tools=[search_web, set_user_timezone, read_confluence_page, update_confluence_page, think_deeper],
            )
            io_opts = room_io.RoomOptions(
                participant_identity=remote_identity,