fix(MAT): skip redundant stream edit + retry empty responses with escalation model

1. Track last-sent text during streaming, skip final m.replace edit when content is identical — eliminates spurious '(bearbeitet)' indicator. 2. When base model (haiku) returns empty content + no tool calls, auto-retry with escalation model (sonnet) before giving up. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-16 16:31:18 +03:00
parent 0c0a424004
commit e41a3bff78
1 changed files with 20 additions and 9 deletions
--- a/bot.py
+++ b/bot.py
@@ -3217,11 +3217,12 @@ class Bot:

        try:
            reply = ""
+            last_sent_text = ""
            streamed_event_id: str | None = None  # set when streaming has already posted a message in Matrix

            # Agentic tool-calling loop: iterate up to MAX_TOOL_ITERATIONS
            for iteration in range(MAX_TOOL_ITERATIONS):
-                content, tool_calls, usage, streamed_event_id = await self._stream_chat_completion(
+                content, tool_calls, usage, streamed_event_id, last_sent_text = await self._stream_chat_completion(
                    room_id=room.room_id,
                    model=model,
                    messages=messages,
@@ -3242,6 +3243,12 @@ class Bot:
                    },
                )

+                # Empty response with no tool calls — retry once with escalation model
+                if not content and not tool_calls and model != ESCALATION_MODEL:
+                    logger.warning("[empty-response] %s returned nothing, retrying with %s", model, ESCALATION_MODEL)
+                    model = ESCALATION_MODEL
+                    continue
+
                if not tool_calls:
                    # No tool calls — final text response
                    break
@@ -3276,11 +3283,12 @@ class Bot:
            if iteration > 0:
                sentry_sdk.set_tag("used_tools", "true")

-            # Send / finalize reply. If we streamed, just do a final edit so the
-            # Matrix message reflects the complete text (otherwise progressive
-            # throttling may have stopped short of the last tokens).
+            # Send / finalize reply. If we streamed, do a final edit only if
+            # the complete text differs from what was last sent (avoids the
+            # "(bearbeitet)" / "(edited)" indicator for unchanged messages).
            if reply:
                if streamed_event_id:
+                    if reply != last_sent_text:
                        await self._send_stream_edit(room.room_id, streamed_event_id, reply, final=True)
                else:
                    await self._send_text(room.room_id, reply)
@@ -3728,22 +3736,24 @@ class Bot:
        messages: list[dict],
        tools: list | None,
        prior_event_id: str | None = None,
-    ) -> tuple[str, list[dict] | None, dict | None, str | None]:
+    ) -> tuple[str, list[dict] | None, dict | None, str | None, str]:
        """Stream one chat completion turn.

        Progressively edits a Matrix message as content tokens arrive (unless
        tool_calls have started — those suppress visible streaming until the
        model settles on plain text on a later iteration).

-        Returns (content, tool_calls or None, usage dict or None, event_id).
+        Returns (content, tool_calls or None, usage dict or None, event_id, last_sent_text).
        `event_id` is the Matrix event we've been streaming into, or None if
        we didn't (yet) post a visible message this turn.
+        `last_sent_text` is the text last sent/edited to Matrix (for dedup).
        """
        content_parts: list[str] = []
        tool_calls_acc: dict[int, dict] = {}
        usage: dict | None = None
        event_id = prior_event_id
        last_edit = 0.0
+        last_sent_text: str = ""  # track what was last sent to Matrix to avoid redundant edits
        EDIT_THROTTLE = 0.6  # seconds — keep Matrix edit traffic reasonable
        MIN_CHARS_BEFORE_POST = 20  # avoid posting a single character first

@@ -3773,7 +3783,7 @@ class Bot:
                    "prompt_tokens": getattr(resp.usage, "prompt_tokens", 0),
                    "completion_tokens": getattr(resp.usage, "completion_tokens", 0),
                }
-            return choice.message.content or "", tc_list, u, event_id
+            return choice.message.content or "", tc_list, u, event_id, ""

        async for chunk in stream:
            if not chunk.choices:
@@ -3813,6 +3823,7 @@ class Bot:
                                event_id = await self._send_stream_start(room_id, text_so_far)
                            else:
                                await self._send_stream_edit(room_id, event_id, text_so_far)
+                            last_sent_text = text_so_far
                            last_edit = now

            # Some providers attach usage to the last choice chunk
@@ -3874,7 +3885,7 @@ class Bot:
            "[stream] model=%s chars=%d tool_calls=%d streamed_to_matrix=%s",
            model, len(content), len(tc_list or []), event_id is not None,
        )
-        return content, tc_list, usage, event_id
+        return content, tc_list, usage, event_id, last_sent_text

    async def _get_call_encryption_key(self, room_id: str, sender: str, caller_device_id: str = "") -> bytes | None:
        """Read E2EE encryption key from call.member state (MSC4143) or timeline (legacy).