fix(bot): prevent dangling preamble + force final summary on tool-loop exhaustion

Two compounding streaming bugs caused the bot to render only a 'Gute Frage — lass mich' preamble when claude-haiku spent all MAX_TOOL_ITERATIONS=5 on tool calls without producing final text. 1. Preamble leak: stream posted first content delta as soon as it crossed MIN_CHARS_BEFORE_POST=20, before tool_calls deltas had arrived. Added 1.2s TOOL_GRACE_SECONDS buffer so the suppression path catches the upcoming tool_calls before we go visible. 2. No final synthesis: when the loop exhausted iterations while still requesting tools, reply was empty and the orphaned preamble stayed on screen. Added a forced tools=None final call to make the model summarize accumulated tool results before send/edit.
2026-04-18 05:25:12 +00:00
parent f586dd1fc8
commit 7087fbf733
1 changed files with 33 additions and 1 deletions
--- a/bot.py
+++ b/bot.py
@@ -3283,6 +3283,30 @@ class Bot:
            if iteration > 0:
                sentry_sdk.set_tag("used_tools", "true")

+            # If the loop exhausted MAX_TOOL_ITERATIONS while the model was still
+            # requesting tools, `reply` is empty and tool results sit unsummarized
+            # in `messages`. Force one final text-only turn so the user sees a
+            # synthesis instead of the dangling preamble we already streamed.
+            if not reply and tool_calls:
+                logger.info(
+                    "[stream] hit MAX_TOOL_ITERATIONS=%d still requesting tools; forcing final summary",
+                    MAX_TOOL_ITERATIONS,
+                )
+                try:
+                    final_resp = await self.llm.chat.completions.create(
+                        model=model,
+                        messages=messages + [{
+                            "role": "user",
+                            "content": "Bitte fasse jetzt deine Recherche zusammen — keine weiteren Tool-Aufrufe.",
+                        }],
+                        max_tokens=2048,
+                        tools=None,
+                    )
+                    reply = (final_resp.choices[0].message.content or "").strip()
+                except Exception:
+                    logger.warning("[stream] forced final-summary call failed", exc_info=True)
+                    reply = "_(Recherche lief in Tool-Schleife — bitte gezielter nachfragen.)_"
+
            # Send / finalize reply. If we streamed, do a final edit only if
            # the complete text differs from what was last sent (avoids the
            # "(bearbeitet)" / "(edited)" indicator for unchanged messages).
@@ -3754,8 +3778,10 @@ class Bot:
        event_id = prior_event_id
        last_edit = 0.0
        last_sent_text: str = ""  # track what was last sent to Matrix to avoid redundant edits
+        first_content_time: float = 0.0  # monotonic time of first content delta
        EDIT_THROTTLE = 0.6  # seconds — keep Matrix edit traffic reasonable
        MIN_CHARS_BEFORE_POST = 20  # avoid posting a single character first
+        TOOL_GRACE_SECONDS = 1.2  # buffer initial content this long; tool_calls deltas usually arrive within ~500ms

        try:
            stream = await self.llm.chat.completions.create(
@@ -3816,7 +3842,13 @@ class Bot:
                # Suppress visible streaming once we know this turn will end in tool calls
                if not tool_calls_acc:
                    now = time.monotonic()
-                    if now - last_edit >= EDIT_THROTTLE:
+                    if first_content_time == 0.0:
+                        first_content_time = now
+                    # Grace period: hold first post long enough for tool_calls deltas
+                    # to start arriving, so we never leak a "Gute Frage — lass mich…"
+                    # preamble that the model intends to follow with tool calls.
+                    grace_passed = (event_id is not None) or (now - first_content_time >= TOOL_GRACE_SECONDS)
+                    if grace_passed and now - last_edit >= EDIT_THROTTLE:
                        text_so_far = "".join(content_parts)
                        if len(text_so_far) >= MIN_CHARS_BEFORE_POST:
                            if event_id is None: