fix(bot): prevent dangling preamble + force final summary on tool-loop exhaustion
Two compounding streaming bugs caused the bot to render only a 'Gute Frage — lass mich' preamble when claude-haiku spent all MAX_TOOL_ITERATIONS=5 on tool calls without producing final text. 1. Preamble leak: stream posted first content delta as soon as it crossed MIN_CHARS_BEFORE_POST=20, before tool_calls deltas had arrived. Added 1.2s TOOL_GRACE_SECONDS buffer so the suppression path catches the upcoming tool_calls before we go visible. 2. No final synthesis: when the loop exhausted iterations while still requesting tools, reply was empty and the orphaned preamble stayed on screen. Added a forced tools=None final call to make the model summarize accumulated tool results before send/edit.
This commit is contained in:
34
bot.py
34
bot.py
@@ -3283,6 +3283,30 @@ class Bot:
|
|||||||
if iteration > 0:
|
if iteration > 0:
|
||||||
sentry_sdk.set_tag("used_tools", "true")
|
sentry_sdk.set_tag("used_tools", "true")
|
||||||
|
|
||||||
|
# If the loop exhausted MAX_TOOL_ITERATIONS while the model was still
|
||||||
|
# requesting tools, `reply` is empty and tool results sit unsummarized
|
||||||
|
# in `messages`. Force one final text-only turn so the user sees a
|
||||||
|
# synthesis instead of the dangling preamble we already streamed.
|
||||||
|
if not reply and tool_calls:
|
||||||
|
logger.info(
|
||||||
|
"[stream] hit MAX_TOOL_ITERATIONS=%d still requesting tools; forcing final summary",
|
||||||
|
MAX_TOOL_ITERATIONS,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
final_resp = await self.llm.chat.completions.create(
|
||||||
|
model=model,
|
||||||
|
messages=messages + [{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Bitte fasse jetzt deine Recherche zusammen — keine weiteren Tool-Aufrufe.",
|
||||||
|
}],
|
||||||
|
max_tokens=2048,
|
||||||
|
tools=None,
|
||||||
|
)
|
||||||
|
reply = (final_resp.choices[0].message.content or "").strip()
|
||||||
|
except Exception:
|
||||||
|
logger.warning("[stream] forced final-summary call failed", exc_info=True)
|
||||||
|
reply = "_(Recherche lief in Tool-Schleife — bitte gezielter nachfragen.)_"
|
||||||
|
|
||||||
# Send / finalize reply. If we streamed, do a final edit only if
|
# Send / finalize reply. If we streamed, do a final edit only if
|
||||||
# the complete text differs from what was last sent (avoids the
|
# the complete text differs from what was last sent (avoids the
|
||||||
# "(bearbeitet)" / "(edited)" indicator for unchanged messages).
|
# "(bearbeitet)" / "(edited)" indicator for unchanged messages).
|
||||||
@@ -3754,8 +3778,10 @@ class Bot:
|
|||||||
event_id = prior_event_id
|
event_id = prior_event_id
|
||||||
last_edit = 0.0
|
last_edit = 0.0
|
||||||
last_sent_text: str = "" # track what was last sent to Matrix to avoid redundant edits
|
last_sent_text: str = "" # track what was last sent to Matrix to avoid redundant edits
|
||||||
|
first_content_time: float = 0.0 # monotonic time of first content delta
|
||||||
EDIT_THROTTLE = 0.6 # seconds — keep Matrix edit traffic reasonable
|
EDIT_THROTTLE = 0.6 # seconds — keep Matrix edit traffic reasonable
|
||||||
MIN_CHARS_BEFORE_POST = 20 # avoid posting a single character first
|
MIN_CHARS_BEFORE_POST = 20 # avoid posting a single character first
|
||||||
|
TOOL_GRACE_SECONDS = 1.2 # buffer initial content this long; tool_calls deltas usually arrive within ~500ms
|
||||||
|
|
||||||
try:
|
try:
|
||||||
stream = await self.llm.chat.completions.create(
|
stream = await self.llm.chat.completions.create(
|
||||||
@@ -3816,7 +3842,13 @@ class Bot:
|
|||||||
# Suppress visible streaming once we know this turn will end in tool calls
|
# Suppress visible streaming once we know this turn will end in tool calls
|
||||||
if not tool_calls_acc:
|
if not tool_calls_acc:
|
||||||
now = time.monotonic()
|
now = time.monotonic()
|
||||||
if now - last_edit >= EDIT_THROTTLE:
|
if first_content_time == 0.0:
|
||||||
|
first_content_time = now
|
||||||
|
# Grace period: hold first post long enough for tool_calls deltas
|
||||||
|
# to start arriving, so we never leak a "Gute Frage — lass mich…"
|
||||||
|
# preamble that the model intends to follow with tool calls.
|
||||||
|
grace_passed = (event_id is not None) or (now - first_content_time >= TOOL_GRACE_SECONDS)
|
||||||
|
if grace_passed and now - last_edit >= EDIT_THROTTLE:
|
||||||
text_so_far = "".join(content_parts)
|
text_so_far = "".join(content_parts)
|
||||||
if len(text_so_far) >= MIN_CHARS_BEFORE_POST:
|
if len(text_so_far) >= MIN_CHARS_BEFORE_POST:
|
||||||
if event_id is None:
|
if event_id is None:
|
||||||
|
|||||||
Reference in New Issue
Block a user