fix(MAT): skip redundant stream edit + retry empty responses with escalation model
1. Track last-sent text during streaming, skip final m.replace edit when content is identical — eliminates spurious '(bearbeitet)' indicator. 2. When base model (haiku) returns empty content + no tool calls, auto-retry with escalation model (sonnet) before giving up. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
27
bot.py
27
bot.py
@@ -3217,11 +3217,12 @@ class Bot:
|
||||
|
||||
try:
|
||||
reply = ""
|
||||
last_sent_text = ""
|
||||
streamed_event_id: str | None = None # set when streaming has already posted a message in Matrix
|
||||
|
||||
# Agentic tool-calling loop: iterate up to MAX_TOOL_ITERATIONS
|
||||
for iteration in range(MAX_TOOL_ITERATIONS):
|
||||
content, tool_calls, usage, streamed_event_id = await self._stream_chat_completion(
|
||||
content, tool_calls, usage, streamed_event_id, last_sent_text = await self._stream_chat_completion(
|
||||
room_id=room.room_id,
|
||||
model=model,
|
||||
messages=messages,
|
||||
@@ -3242,6 +3243,12 @@ class Bot:
|
||||
},
|
||||
)
|
||||
|
||||
# Empty response with no tool calls — retry once with escalation model
|
||||
if not content and not tool_calls and model != ESCALATION_MODEL:
|
||||
logger.warning("[empty-response] %s returned nothing, retrying with %s", model, ESCALATION_MODEL)
|
||||
model = ESCALATION_MODEL
|
||||
continue
|
||||
|
||||
if not tool_calls:
|
||||
# No tool calls — final text response
|
||||
break
|
||||
@@ -3276,11 +3283,12 @@ class Bot:
|
||||
if iteration > 0:
|
||||
sentry_sdk.set_tag("used_tools", "true")
|
||||
|
||||
# Send / finalize reply. If we streamed, just do a final edit so the
|
||||
# Matrix message reflects the complete text (otherwise progressive
|
||||
# throttling may have stopped short of the last tokens).
|
||||
# Send / finalize reply. If we streamed, do a final edit only if
|
||||
# the complete text differs from what was last sent (avoids the
|
||||
# "(bearbeitet)" / "(edited)" indicator for unchanged messages).
|
||||
if reply:
|
||||
if streamed_event_id:
|
||||
if reply != last_sent_text:
|
||||
await self._send_stream_edit(room.room_id, streamed_event_id, reply, final=True)
|
||||
else:
|
||||
await self._send_text(room.room_id, reply)
|
||||
@@ -3728,22 +3736,24 @@ class Bot:
|
||||
messages: list[dict],
|
||||
tools: list | None,
|
||||
prior_event_id: str | None = None,
|
||||
) -> tuple[str, list[dict] | None, dict | None, str | None]:
|
||||
) -> tuple[str, list[dict] | None, dict | None, str | None, str]:
|
||||
"""Stream one chat completion turn.
|
||||
|
||||
Progressively edits a Matrix message as content tokens arrive (unless
|
||||
tool_calls have started — those suppress visible streaming until the
|
||||
model settles on plain text on a later iteration).
|
||||
|
||||
Returns (content, tool_calls or None, usage dict or None, event_id).
|
||||
Returns (content, tool_calls or None, usage dict or None, event_id, last_sent_text).
|
||||
`event_id` is the Matrix event we've been streaming into, or None if
|
||||
we didn't (yet) post a visible message this turn.
|
||||
`last_sent_text` is the text last sent/edited to Matrix (for dedup).
|
||||
"""
|
||||
content_parts: list[str] = []
|
||||
tool_calls_acc: dict[int, dict] = {}
|
||||
usage: dict | None = None
|
||||
event_id = prior_event_id
|
||||
last_edit = 0.0
|
||||
last_sent_text: str = "" # track what was last sent to Matrix to avoid redundant edits
|
||||
EDIT_THROTTLE = 0.6 # seconds — keep Matrix edit traffic reasonable
|
||||
MIN_CHARS_BEFORE_POST = 20 # avoid posting a single character first
|
||||
|
||||
@@ -3773,7 +3783,7 @@ class Bot:
|
||||
"prompt_tokens": getattr(resp.usage, "prompt_tokens", 0),
|
||||
"completion_tokens": getattr(resp.usage, "completion_tokens", 0),
|
||||
}
|
||||
return choice.message.content or "", tc_list, u, event_id
|
||||
return choice.message.content or "", tc_list, u, event_id, ""
|
||||
|
||||
async for chunk in stream:
|
||||
if not chunk.choices:
|
||||
@@ -3813,6 +3823,7 @@ class Bot:
|
||||
event_id = await self._send_stream_start(room_id, text_so_far)
|
||||
else:
|
||||
await self._send_stream_edit(room_id, event_id, text_so_far)
|
||||
last_sent_text = text_so_far
|
||||
last_edit = now
|
||||
|
||||
# Some providers attach usage to the last choice chunk
|
||||
@@ -3874,7 +3885,7 @@ class Bot:
|
||||
"[stream] model=%s chars=%d tool_calls=%d streamed_to_matrix=%s",
|
||||
model, len(content), len(tc_list or []), event_id is not None,
|
||||
)
|
||||
return content, tc_list, usage, event_id
|
||||
return content, tc_list, usage, event_id, last_sent_text
|
||||
|
||||
async def _get_call_encryption_key(self, room_id: str, sender: str, caller_device_id: str = "") -> bytes | None:
|
||||
"""Read E2EE encryption key from call.member state (MSC4143) or timeline (legacy).
|
||||
|
||||
Reference in New Issue
Block a user