fix(MAT-174): Robust LLM filter JSON extraction with regex
LLM sometimes returns extra text around the JSON array. Use regex to extract the array pattern instead of parsing the full response. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -59,8 +59,13 @@ async def _llm_filter(results: list[dict], criteria: str) -> list[dict]:
|
|||||||
data = resp.json()
|
data = resp.json()
|
||||||
|
|
||||||
reply = data["choices"][0]["message"]["content"].strip()
|
reply = data["choices"][0]["message"]["content"].strip()
|
||||||
# Parse the JSON array of indices
|
# Extract JSON array from response (LLM may include extra text)
|
||||||
indices = json.loads(reply)
|
import re
|
||||||
|
match = re.search(r"\[[\d,\s]*\]", reply)
|
||||||
|
if not match:
|
||||||
|
logger.warning("LLM filter returned no array: %s", reply)
|
||||||
|
return results
|
||||||
|
indices = json.loads(match.group())
|
||||||
if not isinstance(indices, list):
|
if not isinstance(indices, list):
|
||||||
logger.warning("LLM filter returned non-list: %s", reply)
|
logger.warning("LLM filter returned non-list: %s", reply)
|
||||||
return results
|
return results
|
||||||
|
|||||||
Reference in New Issue
Block a user