fix(bot): prevent dangling preamble + force final summary on tool-loop exhaustion
Two compounding streaming bugs caused the bot to render only a 'Gute Frage — lass mich' preamble when claude-haiku spent all MAX_TOOL_ITERATIONS=5 on tool calls without producing final text. 1. Preamble leak: stream posted first content delta as soon as it crossed MIN_CHARS_BEFORE_POST=20, before tool_calls deltas had arrived. Added 1.2s TOOL_GRACE_SECONDS buffer so the suppression path catches the upcoming tool_calls before we go visible. 2. No final synthesis: when the loop exhausted iterations while still requesting tools, reply was empty and the orphaned preamble stayed on screen. Added a forced tools=None final call to make the model summarize accumulated tool results before send/edit.
This commit is contained in:
34
bot.py
34
bot.py
@@ -3283,6 +3283,30 @@ class Bot:
|
||||
if iteration > 0:
|
||||
sentry_sdk.set_tag("used_tools", "true")
|
||||
|
||||
# If the loop exhausted MAX_TOOL_ITERATIONS while the model was still
|
||||
# requesting tools, `reply` is empty and tool results sit unsummarized
|
||||
# in `messages`. Force one final text-only turn so the user sees a
|
||||
# synthesis instead of the dangling preamble we already streamed.
|
||||
if not reply and tool_calls:
|
||||
logger.info(
|
||||
"[stream] hit MAX_TOOL_ITERATIONS=%d still requesting tools; forcing final summary",
|
||||
MAX_TOOL_ITERATIONS,
|
||||
)
|
||||
try:
|
||||
final_resp = await self.llm.chat.completions.create(
|
||||
model=model,
|
||||
messages=messages + [{
|
||||
"role": "user",
|
||||
"content": "Bitte fasse jetzt deine Recherche zusammen — keine weiteren Tool-Aufrufe.",
|
||||
}],
|
||||
max_tokens=2048,
|
||||
tools=None,
|
||||
)
|
||||
reply = (final_resp.choices[0].message.content or "").strip()
|
||||
except Exception:
|
||||
logger.warning("[stream] forced final-summary call failed", exc_info=True)
|
||||
reply = "_(Recherche lief in Tool-Schleife — bitte gezielter nachfragen.)_"
|
||||
|
||||
# Send / finalize reply. If we streamed, do a final edit only if
|
||||
# the complete text differs from what was last sent (avoids the
|
||||
# "(bearbeitet)" / "(edited)" indicator for unchanged messages).
|
||||
@@ -3754,8 +3778,10 @@ class Bot:
|
||||
event_id = prior_event_id
|
||||
last_edit = 0.0
|
||||
last_sent_text: str = "" # track what was last sent to Matrix to avoid redundant edits
|
||||
first_content_time: float = 0.0 # monotonic time of first content delta
|
||||
EDIT_THROTTLE = 0.6 # seconds — keep Matrix edit traffic reasonable
|
||||
MIN_CHARS_BEFORE_POST = 20 # avoid posting a single character first
|
||||
TOOL_GRACE_SECONDS = 1.2 # buffer initial content this long; tool_calls deltas usually arrive within ~500ms
|
||||
|
||||
try:
|
||||
stream = await self.llm.chat.completions.create(
|
||||
@@ -3816,7 +3842,13 @@ class Bot:
|
||||
# Suppress visible streaming once we know this turn will end in tool calls
|
||||
if not tool_calls_acc:
|
||||
now = time.monotonic()
|
||||
if now - last_edit >= EDIT_THROTTLE:
|
||||
if first_content_time == 0.0:
|
||||
first_content_time = now
|
||||
# Grace period: hold first post long enough for tool_calls deltas
|
||||
# to start arriving, so we never leak a "Gute Frage — lass mich…"
|
||||
# preamble that the model intends to follow with tool calls.
|
||||
grace_passed = (event_id is not None) or (now - first_content_time >= TOOL_GRACE_SECONDS)
|
||||
if grace_passed and now - last_edit >= EDIT_THROTTLE:
|
||||
text_so_far = "".join(content_parts)
|
||||
if len(text_so_far) >= MIN_CHARS_BEFORE_POST:
|
||||
if event_id is None:
|
||||
|
||||
Reference in New Issue
Block a user