fix(MAT): skip redundant stream edit + retry empty responses with escalation model
1. Track last-sent text during streaming, skip final m.replace edit when content is identical — eliminates spurious '(bearbeitet)' indicator. 2. When base model (haiku) returns empty content + no tool calls, auto-retry with escalation model (sonnet) before giving up. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
29
bot.py
29
bot.py
@@ -3217,11 +3217,12 @@ class Bot:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
reply = ""
|
reply = ""
|
||||||
|
last_sent_text = ""
|
||||||
streamed_event_id: str | None = None # set when streaming has already posted a message in Matrix
|
streamed_event_id: str | None = None # set when streaming has already posted a message in Matrix
|
||||||
|
|
||||||
# Agentic tool-calling loop: iterate up to MAX_TOOL_ITERATIONS
|
# Agentic tool-calling loop: iterate up to MAX_TOOL_ITERATIONS
|
||||||
for iteration in range(MAX_TOOL_ITERATIONS):
|
for iteration in range(MAX_TOOL_ITERATIONS):
|
||||||
content, tool_calls, usage, streamed_event_id = await self._stream_chat_completion(
|
content, tool_calls, usage, streamed_event_id, last_sent_text = await self._stream_chat_completion(
|
||||||
room_id=room.room_id,
|
room_id=room.room_id,
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
@@ -3242,6 +3243,12 @@ class Bot:
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Empty response with no tool calls — retry once with escalation model
|
||||||
|
if not content and not tool_calls and model != ESCALATION_MODEL:
|
||||||
|
logger.warning("[empty-response] %s returned nothing, retrying with %s", model, ESCALATION_MODEL)
|
||||||
|
model = ESCALATION_MODEL
|
||||||
|
continue
|
||||||
|
|
||||||
if not tool_calls:
|
if not tool_calls:
|
||||||
# No tool calls — final text response
|
# No tool calls — final text response
|
||||||
break
|
break
|
||||||
@@ -3276,12 +3283,13 @@ class Bot:
|
|||||||
if iteration > 0:
|
if iteration > 0:
|
||||||
sentry_sdk.set_tag("used_tools", "true")
|
sentry_sdk.set_tag("used_tools", "true")
|
||||||
|
|
||||||
# Send / finalize reply. If we streamed, just do a final edit so the
|
# Send / finalize reply. If we streamed, do a final edit only if
|
||||||
# Matrix message reflects the complete text (otherwise progressive
|
# the complete text differs from what was last sent (avoids the
|
||||||
# throttling may have stopped short of the last tokens).
|
# "(bearbeitet)" / "(edited)" indicator for unchanged messages).
|
||||||
if reply:
|
if reply:
|
||||||
if streamed_event_id:
|
if streamed_event_id:
|
||||||
await self._send_stream_edit(room.room_id, streamed_event_id, reply, final=True)
|
if reply != last_sent_text:
|
||||||
|
await self._send_stream_edit(room.room_id, streamed_event_id, reply, final=True)
|
||||||
else:
|
else:
|
||||||
await self._send_text(room.room_id, reply)
|
await self._send_text(room.room_id, reply)
|
||||||
|
|
||||||
@@ -3728,22 +3736,24 @@ class Bot:
|
|||||||
messages: list[dict],
|
messages: list[dict],
|
||||||
tools: list | None,
|
tools: list | None,
|
||||||
prior_event_id: str | None = None,
|
prior_event_id: str | None = None,
|
||||||
) -> tuple[str, list[dict] | None, dict | None, str | None]:
|
) -> tuple[str, list[dict] | None, dict | None, str | None, str]:
|
||||||
"""Stream one chat completion turn.
|
"""Stream one chat completion turn.
|
||||||
|
|
||||||
Progressively edits a Matrix message as content tokens arrive (unless
|
Progressively edits a Matrix message as content tokens arrive (unless
|
||||||
tool_calls have started — those suppress visible streaming until the
|
tool_calls have started — those suppress visible streaming until the
|
||||||
model settles on plain text on a later iteration).
|
model settles on plain text on a later iteration).
|
||||||
|
|
||||||
Returns (content, tool_calls or None, usage dict or None, event_id).
|
Returns (content, tool_calls or None, usage dict or None, event_id, last_sent_text).
|
||||||
`event_id` is the Matrix event we've been streaming into, or None if
|
`event_id` is the Matrix event we've been streaming into, or None if
|
||||||
we didn't (yet) post a visible message this turn.
|
we didn't (yet) post a visible message this turn.
|
||||||
|
`last_sent_text` is the text last sent/edited to Matrix (for dedup).
|
||||||
"""
|
"""
|
||||||
content_parts: list[str] = []
|
content_parts: list[str] = []
|
||||||
tool_calls_acc: dict[int, dict] = {}
|
tool_calls_acc: dict[int, dict] = {}
|
||||||
usage: dict | None = None
|
usage: dict | None = None
|
||||||
event_id = prior_event_id
|
event_id = prior_event_id
|
||||||
last_edit = 0.0
|
last_edit = 0.0
|
||||||
|
last_sent_text: str = "" # track what was last sent to Matrix to avoid redundant edits
|
||||||
EDIT_THROTTLE = 0.6 # seconds — keep Matrix edit traffic reasonable
|
EDIT_THROTTLE = 0.6 # seconds — keep Matrix edit traffic reasonable
|
||||||
MIN_CHARS_BEFORE_POST = 20 # avoid posting a single character first
|
MIN_CHARS_BEFORE_POST = 20 # avoid posting a single character first
|
||||||
|
|
||||||
@@ -3773,7 +3783,7 @@ class Bot:
|
|||||||
"prompt_tokens": getattr(resp.usage, "prompt_tokens", 0),
|
"prompt_tokens": getattr(resp.usage, "prompt_tokens", 0),
|
||||||
"completion_tokens": getattr(resp.usage, "completion_tokens", 0),
|
"completion_tokens": getattr(resp.usage, "completion_tokens", 0),
|
||||||
}
|
}
|
||||||
return choice.message.content or "", tc_list, u, event_id
|
return choice.message.content or "", tc_list, u, event_id, ""
|
||||||
|
|
||||||
async for chunk in stream:
|
async for chunk in stream:
|
||||||
if not chunk.choices:
|
if not chunk.choices:
|
||||||
@@ -3813,6 +3823,7 @@ class Bot:
|
|||||||
event_id = await self._send_stream_start(room_id, text_so_far)
|
event_id = await self._send_stream_start(room_id, text_so_far)
|
||||||
else:
|
else:
|
||||||
await self._send_stream_edit(room_id, event_id, text_so_far)
|
await self._send_stream_edit(room_id, event_id, text_so_far)
|
||||||
|
last_sent_text = text_so_far
|
||||||
last_edit = now
|
last_edit = now
|
||||||
|
|
||||||
# Some providers attach usage to the last choice chunk
|
# Some providers attach usage to the last choice chunk
|
||||||
@@ -3874,7 +3885,7 @@ class Bot:
|
|||||||
"[stream] model=%s chars=%d tool_calls=%d streamed_to_matrix=%s",
|
"[stream] model=%s chars=%d tool_calls=%d streamed_to_matrix=%s",
|
||||||
model, len(content), len(tc_list or []), event_id is not None,
|
model, len(content), len(tc_list or []), event_id is not None,
|
||||||
)
|
)
|
||||||
return content, tc_list, usage, event_id
|
return content, tc_list, usage, event_id, last_sent_text
|
||||||
|
|
||||||
async def _get_call_encryption_key(self, room_id: str, sender: str, caller_device_id: str = "") -> bytes | None:
|
async def _get_call_encryption_key(self, room_id: str, sender: str, caller_device_id: str = "") -> bytes | None:
|
||||||
"""Read E2EE encryption key from call.member state (MSC4143) or timeline (legacy).
|
"""Read E2EE encryption key from call.member state (MSC4143) or timeline (legacy).
|
||||||
|
|||||||
Reference in New Issue
Block a user