fix(MAT): skip redundant stream edit + retry empty responses with escalation model
Some checks failed
Build & Deploy / test (push) Successful in 11s
Tests / test (push) Successful in 9s
Build & Deploy / build-and-deploy (push) Failing after 8s

1. Track last-sent text during streaming, skip final m.replace edit when
   content is identical — eliminates spurious '(bearbeitet)' indicator.
2. When base model (haiku) returns empty content + no tool calls, auto-retry
   with escalation model (sonnet) before giving up.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-04-16 16:31:18 +03:00
parent 0c0a424004
commit e41a3bff78

29
bot.py
View File

@@ -3217,11 +3217,12 @@ class Bot:
try: try:
reply = "" reply = ""
last_sent_text = ""
streamed_event_id: str | None = None # set when streaming has already posted a message in Matrix streamed_event_id: str | None = None # set when streaming has already posted a message in Matrix
# Agentic tool-calling loop: iterate up to MAX_TOOL_ITERATIONS # Agentic tool-calling loop: iterate up to MAX_TOOL_ITERATIONS
for iteration in range(MAX_TOOL_ITERATIONS): for iteration in range(MAX_TOOL_ITERATIONS):
content, tool_calls, usage, streamed_event_id = await self._stream_chat_completion( content, tool_calls, usage, streamed_event_id, last_sent_text = await self._stream_chat_completion(
room_id=room.room_id, room_id=room.room_id,
model=model, model=model,
messages=messages, messages=messages,
@@ -3242,6 +3243,12 @@ class Bot:
}, },
) )
# Empty response with no tool calls — retry once with escalation model
if not content and not tool_calls and model != ESCALATION_MODEL:
logger.warning("[empty-response] %s returned nothing, retrying with %s", model, ESCALATION_MODEL)
model = ESCALATION_MODEL
continue
if not tool_calls: if not tool_calls:
# No tool calls — final text response # No tool calls — final text response
break break
@@ -3276,12 +3283,13 @@ class Bot:
if iteration > 0: if iteration > 0:
sentry_sdk.set_tag("used_tools", "true") sentry_sdk.set_tag("used_tools", "true")
# Send / finalize reply. If we streamed, just do a final edit so the # Send / finalize reply. If we streamed, do a final edit only if
# Matrix message reflects the complete text (otherwise progressive # the complete text differs from what was last sent (avoids the
# throttling may have stopped short of the last tokens). # "(bearbeitet)" / "(edited)" indicator for unchanged messages).
if reply: if reply:
if streamed_event_id: if streamed_event_id:
await self._send_stream_edit(room.room_id, streamed_event_id, reply, final=True) if reply != last_sent_text:
await self._send_stream_edit(room.room_id, streamed_event_id, reply, final=True)
else: else:
await self._send_text(room.room_id, reply) await self._send_text(room.room_id, reply)
@@ -3728,22 +3736,24 @@ class Bot:
messages: list[dict], messages: list[dict],
tools: list | None, tools: list | None,
prior_event_id: str | None = None, prior_event_id: str | None = None,
) -> tuple[str, list[dict] | None, dict | None, str | None]: ) -> tuple[str, list[dict] | None, dict | None, str | None, str]:
"""Stream one chat completion turn. """Stream one chat completion turn.
Progressively edits a Matrix message as content tokens arrive (unless Progressively edits a Matrix message as content tokens arrive (unless
tool_calls have started — those suppress visible streaming until the tool_calls have started — those suppress visible streaming until the
model settles on plain text on a later iteration). model settles on plain text on a later iteration).
Returns (content, tool_calls or None, usage dict or None, event_id). Returns (content, tool_calls or None, usage dict or None, event_id, last_sent_text).
`event_id` is the Matrix event we've been streaming into, or None if `event_id` is the Matrix event we've been streaming into, or None if
we didn't (yet) post a visible message this turn. we didn't (yet) post a visible message this turn.
`last_sent_text` is the text last sent/edited to Matrix (for dedup).
""" """
content_parts: list[str] = [] content_parts: list[str] = []
tool_calls_acc: dict[int, dict] = {} tool_calls_acc: dict[int, dict] = {}
usage: dict | None = None usage: dict | None = None
event_id = prior_event_id event_id = prior_event_id
last_edit = 0.0 last_edit = 0.0
last_sent_text: str = "" # track what was last sent to Matrix to avoid redundant edits
EDIT_THROTTLE = 0.6 # seconds — keep Matrix edit traffic reasonable EDIT_THROTTLE = 0.6 # seconds — keep Matrix edit traffic reasonable
MIN_CHARS_BEFORE_POST = 20 # avoid posting a single character first MIN_CHARS_BEFORE_POST = 20 # avoid posting a single character first
@@ -3773,7 +3783,7 @@ class Bot:
"prompt_tokens": getattr(resp.usage, "prompt_tokens", 0), "prompt_tokens": getattr(resp.usage, "prompt_tokens", 0),
"completion_tokens": getattr(resp.usage, "completion_tokens", 0), "completion_tokens": getattr(resp.usage, "completion_tokens", 0),
} }
return choice.message.content or "", tc_list, u, event_id return choice.message.content or "", tc_list, u, event_id, ""
async for chunk in stream: async for chunk in stream:
if not chunk.choices: if not chunk.choices:
@@ -3813,6 +3823,7 @@ class Bot:
event_id = await self._send_stream_start(room_id, text_so_far) event_id = await self._send_stream_start(room_id, text_so_far)
else: else:
await self._send_stream_edit(room_id, event_id, text_so_far) await self._send_stream_edit(room_id, event_id, text_so_far)
last_sent_text = text_so_far
last_edit = now last_edit = now
# Some providers attach usage to the last choice chunk # Some providers attach usage to the last choice chunk
@@ -3874,7 +3885,7 @@ class Bot:
"[stream] model=%s chars=%d tool_calls=%d streamed_to_matrix=%s", "[stream] model=%s chars=%d tool_calls=%d streamed_to_matrix=%s",
model, len(content), len(tc_list or []), event_id is not None, model, len(content), len(tc_list or []), event_id is not None,
) )
return content, tc_list, usage, event_id return content, tc_list, usage, event_id, last_sent_text
async def _get_call_encryption_key(self, room_id: str, sender: str, caller_device_id: str = "") -> bytes | None: async def _get_call_encryption_key(self, room_id: str, sender: str, caller_device_id: str = "") -> bytes | None:
"""Read E2EE encryption key from call.member state (MSC4143) or timeline (legacy). """Read E2EE encryption key from call.member state (MSC4143) or timeline (legacy).