From 6081f9a7ec0fedf6b26a5e7b9689bd3c66f04f2a Mon Sep 17 00:00:00 2001
From: Christian Gick <service@agiliton.eu>
Date: Tue, 24 Feb 2026 06:13:44 +0200
Subject: [PATCH] feat(MAT-46): Add think_deeper tool for Opus escalation in
 voice calls

Sonnet can now escalate complex questions to Opus via a function tool,
same pattern as search_web and read_confluence_page. Full context
(transcript + document) is passed automatically. Triggered by user
phrases like "denk genauer nach" / "think harder" or when Sonnet is
unsure about complex analysis.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 voice.py | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 58 insertions(+), 1 deletion(-)

diff --git a/voice.py b/voice.py
index 375b66c..0953b34 100644
--- a/voice.py
+++ b/voice.py
@@ -864,6 +864,63 @@ class VoiceSession:
                     logger.warning("CONFLUENCE_UPDATE_FAIL: %s", exc)
                     return f"Failed to update page: {exc}"
 
+            # Deep thinking tool — escalates to Opus for complex questions
+            _transcript_ref = self._transcript
+            _doc_context_ref = self._document_context
+
+            @function_tool
+            async def think_deeper(question: str) -> str:
+                """Denke intensiver ueber eine komplexe Frage nach mit einem staerkeren Modell.
+                Nutze dieses Tool wenn:
+                - Der Nutzer sagt "denk genauer nach", "think harder", "nimm opus", "use opus",
+                  "ueberleg nochmal", "analysier das genauer"
+                - Du dir bei einer komplexen Analyse, Code-Review oder Dokumentinterpretation unsicher bist
+                - Eine Frage mehrere Schritte logisches Denken erfordert
+
+                Beschreibe die Frage so praezise wie moeglich — der Kontext (Transkript + Dokument)
+                wird automatisch mitgeliefert."""
+                # Build context: recent transcript + document
+                context_parts = []
+                if _doc_context_ref:
+                    context_parts.append(f"Dokument-Kontext:\n{_doc_context_ref[:12000]}")
+                recent = _transcript_ref[-10:] if _transcript_ref else []
+                if recent:
+                    lines = []
+                    for e in recent:
+                        role = "Nutzer" if e["role"] == "user" else "Assistent"
+                        lines.append(f"{role}: {e['text']}")
+                    context_parts.append(f"Gespraechsverlauf:\n" + "\n".join(lines))
+                context_parts.append(f"Frage: {question}")
+                full_prompt = "\n\n---\n\n".join(context_parts)
+
+                logger.info("THINK_DEEPER: %s (context=%d chars)", question[:100], len(full_prompt))
+                try:
+                    async with httpx.AsyncClient(timeout=60.0) as client:
+                        resp = await client.post(
+                            f"{LITELLM_URL}/chat/completions",
+                            headers={"Authorization": f"Bearer {LITELLM_KEY}"},
+                            json={
+                                "model": "claude-opus",
+                                "messages": [
+                                    {"role": "system", "content": (
+                                        "Du bist ein Experte fuer tiefgehende Analyse. "
+                                        "Beantworte die Frage praezise und ausfuehrlich basierend auf dem Kontext. "
+                                        "Antworte in der Sprache der Frage."
+                                    )},
+                                    {"role": "user", "content": full_prompt},
+                                ],
+                                "max_tokens": 1500,
+                            },
+                        )
+                        resp.raise_for_status()
+                        data = resp.json()
+                        answer = data["choices"][0]["message"]["content"]
+                        logger.info("THINK_DEEPER_OK: %s", answer[:200])
+                        return answer
+                except Exception as exc:
+                    logger.warning("THINK_DEEPER_FAIL: %s", exc)
+                    return f"Tiefere Analyse fehlgeschlagen: {exc}"
+
             instructions = _build_voice_prompt(model=self.model, timezone=user_timezone) + memory_section
             if self._document_context:
                 instructions += f"\n\nDokument-Kontext (im Raum hochgeladen):\n{self._document_context}"
@@ -871,7 +928,7 @@ class VoiceSession:
                     instructions += f"\n\nAktive Confluence-Seite: {_active_conf_id}. Du brauchst den Nutzer NICHT nach der page_id zu fragen — nutze automatisch diese ID fuer read_confluence_page und update_confluence_page."
             agent = _NoiseFilterAgent(
                 instructions=instructions,
-                tools=[search_web, set_user_timezone, read_confluence_page, update_confluence_page],
+                tools=[search_web, set_user_timezone, read_confluence_page, update_confluence_page, think_deeper],
             )
             io_opts = room_io.RoomOptions(
                 participant_identity=remote_identity,