From 7087fbf733a557d9e7bf76367679a88afacd346a Mon Sep 17 00:00:00 2001 From: Christian Gick Date: Sat, 18 Apr 2026 05:25:12 +0000 Subject: [PATCH] fix(bot): prevent dangling preamble + force final summary on tool-loop exhaustion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two compounding streaming bugs caused the bot to render only a 'Gute Frage — lass mich' preamble when claude-haiku spent all MAX_TOOL_ITERATIONS=5 on tool calls without producing final text. 1. Preamble leak: stream posted first content delta as soon as it crossed MIN_CHARS_BEFORE_POST=20, before tool_calls deltas had arrived. Added 1.2s TOOL_GRACE_SECONDS buffer so the suppression path catches the upcoming tool_calls before we go visible. 2. No final synthesis: when the loop exhausted iterations while still requesting tools, reply was empty and the orphaned preamble stayed on screen. Added a forced tools=None final call to make the model summarize accumulated tool results before send/edit. --- bot.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/bot.py b/bot.py index 8e5c8c9..5fa7e38 100644 --- a/bot.py +++ b/bot.py @@ -3283,6 +3283,30 @@ class Bot: if iteration > 0: sentry_sdk.set_tag("used_tools", "true") + # If the loop exhausted MAX_TOOL_ITERATIONS while the model was still + # requesting tools, `reply` is empty and tool results sit unsummarized + # in `messages`. Force one final text-only turn so the user sees a + # synthesis instead of the dangling preamble we already streamed. + if not reply and tool_calls: + logger.info( + "[stream] hit MAX_TOOL_ITERATIONS=%d still requesting tools; forcing final summary", + MAX_TOOL_ITERATIONS, + ) + try: + final_resp = await self.llm.chat.completions.create( + model=model, + messages=messages + [{ + "role": "user", + "content": "Bitte fasse jetzt deine Recherche zusammen — keine weiteren Tool-Aufrufe.", + }], + max_tokens=2048, + tools=None, + ) + reply = (final_resp.choices[0].message.content or "").strip() + except Exception: + logger.warning("[stream] forced final-summary call failed", exc_info=True) + reply = "_(Recherche lief in Tool-Schleife — bitte gezielter nachfragen.)_" + # Send / finalize reply. If we streamed, do a final edit only if # the complete text differs from what was last sent (avoids the # "(bearbeitet)" / "(edited)" indicator for unchanged messages). @@ -3754,8 +3778,10 @@ class Bot: event_id = prior_event_id last_edit = 0.0 last_sent_text: str = "" # track what was last sent to Matrix to avoid redundant edits + first_content_time: float = 0.0 # monotonic time of first content delta EDIT_THROTTLE = 0.6 # seconds — keep Matrix edit traffic reasonable MIN_CHARS_BEFORE_POST = 20 # avoid posting a single character first + TOOL_GRACE_SECONDS = 1.2 # buffer initial content this long; tool_calls deltas usually arrive within ~500ms try: stream = await self.llm.chat.completions.create( @@ -3816,7 +3842,13 @@ class Bot: # Suppress visible streaming once we know this turn will end in tool calls if not tool_calls_acc: now = time.monotonic() - if now - last_edit >= EDIT_THROTTLE: + if first_content_time == 0.0: + first_content_time = now + # Grace period: hold first post long enough for tool_calls deltas + # to start arriving, so we never leak a "Gute Frage — lass mich…" + # preamble that the model intends to follow with tool calls. + grace_passed = (event_id is not None) or (now - first_content_time >= TOOL_GRACE_SECONDS) + if grace_passed and now - last_edit >= EDIT_THROTTLE: text_so_far = "".join(content_parts) if len(text_so_far) >= MIN_CHARS_BEFORE_POST: if event_id is None: