fix(MAT-273): parse text-based tool calls instead of leaking to Matrix
Some checks failed
Build & Deploy / test (push) Failing after 11s
Build & Deploy / build-and-deploy (push) Has been skipped
Tests / test (push) Failing after 9s

Some LiteLLM-proxied models emit tool calls as <tool_call>fn(args)
text instead of using the OpenAI function-calling API. This caused
raw markup to be streamed as visible chat text with no tool execution.

After streaming completes, detect <tool_call> patterns, parse into
proper tool_calls, strip markup from content, and re-edit the Matrix
message.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-04-16 12:29:39 +03:00
parent f16c94b2dc
commit ae2f34a3b6

43
bot.py
View File

@@ -3827,6 +3827,49 @@ class Bot:
tc_list = None
if tool_calls_acc:
tc_list = [tool_calls_acc[i] for i in sorted(tool_calls_acc.keys())]
# Fallback: some models emit tool calls as text instead of using the
# function-calling API. Detect patterns like:
# <tool_call>web_search(query: "...")</tool_call>
# <tool_call>web_search(query: "...")
# Parse them into proper tool_calls and strip from visible content.
if not tc_list and content:
_TC_RE = re.compile(
r"<tool[-_]?call>\s*(\w+)\(([^)]*)\)\s*(?:</tool[-_]?call>)?",
re.DOTALL,
)
text_tcs = list(_TC_RE.finditer(content))
if text_tcs:
tc_list = []
for m in text_tcs:
fn_name = m.group(1)
raw_args = m.group(2).strip()
# Parse key: "value" pairs into JSON dict
args = {}
for kv in re.finditer(r'(\w+)\s*[:=]\s*"([^"]*)"', raw_args):
args[kv.group(1)] = kv.group(2)
if not args and raw_args:
# Single unnamed argument — treat as "query"
cleaned = raw_args.strip().strip('"').strip("'")
if cleaned:
args["query"] = cleaned
tc_list.append({
"id": f"text_tc_{uuid.uuid4().hex[:8]}",
"name": fn_name,
"arguments": json.dumps(args),
})
# Strip the tool-call markup from visible content
content = _TC_RE.sub("", content).rstrip()
# If we already streamed the raw text to Matrix, edit it to remove the markup
if event_id:
await self._send_stream_edit(
room_id, event_id, content or "...", final=not content,
)
logger.info(
"[stream] parsed %d text-based tool call(s) from content",
len(tc_list),
)
logger.info(
"[stream] model=%s chars=%d tool_calls=%d streamed_to_matrix=%s",
model, len(content), len(tc_list or []), event_id is not None,