From ae2f34a3b6f455afb7478661b251df4f855e797a Mon Sep 17 00:00:00 2001 From: Christian Gick Date: Thu, 16 Apr 2026 12:29:39 +0300 Subject: [PATCH] fix(MAT-273): parse text-based tool calls instead of leaking to Matrix Some LiteLLM-proxied models emit tool calls as fn(args) text instead of using the OpenAI function-calling API. This caused raw markup to be streamed as visible chat text with no tool execution. After streaming completes, detect patterns, parse into proper tool_calls, strip markup from content, and re-edit the Matrix message. Co-Authored-By: Claude Opus 4.6 --- bot.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/bot.py b/bot.py index 769f1ec..c5b673e 100644 --- a/bot.py +++ b/bot.py @@ -3827,6 +3827,49 @@ class Bot: tc_list = None if tool_calls_acc: tc_list = [tool_calls_acc[i] for i in sorted(tool_calls_acc.keys())] + + # Fallback: some models emit tool calls as text instead of using the + # function-calling API. Detect patterns like: + # web_search(query: "...") + # web_search(query: "...") + # Parse them into proper tool_calls and strip from visible content. + if not tc_list and content: + _TC_RE = re.compile( + r"\s*(\w+)\(([^)]*)\)\s*(?:)?", + re.DOTALL, + ) + text_tcs = list(_TC_RE.finditer(content)) + if text_tcs: + tc_list = [] + for m in text_tcs: + fn_name = m.group(1) + raw_args = m.group(2).strip() + # Parse key: "value" pairs into JSON dict + args = {} + for kv in re.finditer(r'(\w+)\s*[:=]\s*"([^"]*)"', raw_args): + args[kv.group(1)] = kv.group(2) + if not args and raw_args: + # Single unnamed argument — treat as "query" + cleaned = raw_args.strip().strip('"').strip("'") + if cleaned: + args["query"] = cleaned + tc_list.append({ + "id": f"text_tc_{uuid.uuid4().hex[:8]}", + "name": fn_name, + "arguments": json.dumps(args), + }) + # Strip the tool-call markup from visible content + content = _TC_RE.sub("", content).rstrip() + # If we already streamed the raw text to Matrix, edit it to remove the markup + if event_id: + await self._send_stream_edit( + room_id, event_id, content or "...", final=not content, + ) + logger.info( + "[stream] parsed %d text-based tool call(s) from content", + len(tc_list), + ) + logger.info( "[stream] model=%s chars=%d tool_calls=%d streamed_to_matrix=%s", model, len(content), len(tc_list or []), event_id is not None,