diff --git a/bot.py b/bot.py index c5b673e..8e5c8c9 100644 --- a/bot.py +++ b/bot.py @@ -3217,11 +3217,12 @@ class Bot: try: reply = "" + last_sent_text = "" streamed_event_id: str | None = None # set when streaming has already posted a message in Matrix # Agentic tool-calling loop: iterate up to MAX_TOOL_ITERATIONS for iteration in range(MAX_TOOL_ITERATIONS): - content, tool_calls, usage, streamed_event_id = await self._stream_chat_completion( + content, tool_calls, usage, streamed_event_id, last_sent_text = await self._stream_chat_completion( room_id=room.room_id, model=model, messages=messages, @@ -3242,6 +3243,12 @@ class Bot: }, ) + # Empty response with no tool calls — retry once with escalation model + if not content and not tool_calls and model != ESCALATION_MODEL: + logger.warning("[empty-response] %s returned nothing, retrying with %s", model, ESCALATION_MODEL) + model = ESCALATION_MODEL + continue + if not tool_calls: # No tool calls — final text response break @@ -3276,12 +3283,13 @@ class Bot: if iteration > 0: sentry_sdk.set_tag("used_tools", "true") - # Send / finalize reply. If we streamed, just do a final edit so the - # Matrix message reflects the complete text (otherwise progressive - # throttling may have stopped short of the last tokens). + # Send / finalize reply. If we streamed, do a final edit only if + # the complete text differs from what was last sent (avoids the + # "(bearbeitet)" / "(edited)" indicator for unchanged messages). if reply: if streamed_event_id: - await self._send_stream_edit(room.room_id, streamed_event_id, reply, final=True) + if reply != last_sent_text: + await self._send_stream_edit(room.room_id, streamed_event_id, reply, final=True) else: await self._send_text(room.room_id, reply) @@ -3728,22 +3736,24 @@ class Bot: messages: list[dict], tools: list | None, prior_event_id: str | None = None, - ) -> tuple[str, list[dict] | None, dict | None, str | None]: + ) -> tuple[str, list[dict] | None, dict | None, str | None, str]: """Stream one chat completion turn. Progressively edits a Matrix message as content tokens arrive (unless tool_calls have started — those suppress visible streaming until the model settles on plain text on a later iteration). - Returns (content, tool_calls or None, usage dict or None, event_id). + Returns (content, tool_calls or None, usage dict or None, event_id, last_sent_text). `event_id` is the Matrix event we've been streaming into, or None if we didn't (yet) post a visible message this turn. + `last_sent_text` is the text last sent/edited to Matrix (for dedup). """ content_parts: list[str] = [] tool_calls_acc: dict[int, dict] = {} usage: dict | None = None event_id = prior_event_id last_edit = 0.0 + last_sent_text: str = "" # track what was last sent to Matrix to avoid redundant edits EDIT_THROTTLE = 0.6 # seconds — keep Matrix edit traffic reasonable MIN_CHARS_BEFORE_POST = 20 # avoid posting a single character first @@ -3773,7 +3783,7 @@ class Bot: "prompt_tokens": getattr(resp.usage, "prompt_tokens", 0), "completion_tokens": getattr(resp.usage, "completion_tokens", 0), } - return choice.message.content or "", tc_list, u, event_id + return choice.message.content or "", tc_list, u, event_id, "" async for chunk in stream: if not chunk.choices: @@ -3813,6 +3823,7 @@ class Bot: event_id = await self._send_stream_start(room_id, text_so_far) else: await self._send_stream_edit(room_id, event_id, text_so_far) + last_sent_text = text_so_far last_edit = now # Some providers attach usage to the last choice chunk @@ -3874,7 +3885,7 @@ class Bot: "[stream] model=%s chars=%d tool_calls=%d streamed_to_matrix=%s", model, len(content), len(tc_list or []), event_id is not None, ) - return content, tc_list, usage, event_id + return content, tc_list, usage, event_id, last_sent_text async def _get_call_encryption_key(self, room_id: str, sender: str, caller_device_id: str = "") -> bytes | None: """Read E2EE encryption key from call.member state (MSC4143) or timeline (legacy).