feat(memory): store full conversation exchanges instead of LLM-extracted facts

- Replace _extract_voice_memories with _store_voice_exchange
- Store raw "User: ... / Assistant: ..." pairs directly
- No LLM call needed — faster, cheaper, no lost context
- Load as "Frühere Gespräche" with full thread context

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-23 10:40:59 +02:00
parent 150df19be1
commit a11cafc1d6

View File

@@ -192,59 +192,21 @@ async def _brave_search(query: str, count: int = 5) -> str:
return f"Search failed: {exc}"
async def _extract_voice_memories(user_text: str, agent_text: str,
user_id: str, room_id: str) -> None:
"""Extract memorable facts from a voice exchange and store them."""
if not LITELLM_URL or not MEMORY_SERVICE_URL:
async def _store_voice_exchange(user_text: str, agent_text: str,
user_id: str, room_id: str) -> None:
"""Store the full conversation exchange as memory (no LLM extraction)."""
if not MEMORY_SERVICE_URL:
return
exchange = f"User: {user_text}\nAssistant: {agent_text}"
try:
# Fetch existing facts to avoid duplicates
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.post(
f"{MEMORY_SERVICE_URL}/memories/query",
json={"user_id": user_id, "query": "all facts", "top_k": 20},
await client.post(
f"{MEMORY_SERVICE_URL}/memories/store",
json={"user_id": user_id, "fact": exchange, "source_room": room_id},
)
existing = [m["fact"] for m in resp.json().get("results", [])] if resp.is_success else []
existing_text = "\n".join(f"- {f}" for f in existing) if existing else "(none)"
llm = AsyncOpenAI(base_url=LITELLM_URL, api_key=LITELLM_KEY)
resp = await llm.chat.completions.create(
model="claude-haiku",
messages=[
{"role": "system", "content": (
"Extract memorable facts about the user from this voice conversation snippet. "
"Return a JSON array of concise strings. Include: name, preferences, location, "
"occupation, interests, family, projects. Skip duplicate or temporary info. "
"Return [] if nothing new."
)},
{"role": "user", "content": (
f"Existing memories:\n{existing_text}\n\n"
f"User said: {user_text}\nAssistant replied: {agent_text}\n\n"
"New facts (JSON array):"
)},
],
max_tokens=200,
)
raw = resp.choices[0].message.content.strip()
if raw.startswith("```"):
raw = re.sub(r"^```\w*\n?", "", raw)
raw = re.sub(r"\n?```$", "", raw)
match = re.search(r"\[.*\]", raw, re.DOTALL)
if match:
raw = match.group(0)
new_facts = json.loads(raw)
if not isinstance(new_facts, list):
return
async with httpx.AsyncClient(timeout=10.0) as client:
for fact in new_facts:
if isinstance(fact, str) and fact.strip():
await client.post(
f"{MEMORY_SERVICE_URL}/memories/store",
json={"user_id": user_id, "fact": fact.strip(), "source_room": room_id},
)
logger.info("Memory stored for %s: %s", user_id, fact[:80])
logger.info("Memory stored for %s: %s", user_id, exchange[:120])
except Exception as exc:
logger.warning("Voice memory extraction failed: %s", exc)
logger.warning("Voice memory store failed: %s", exc)
def _build_e2ee_options() -> rtc.E2EEOptions:
@@ -569,8 +531,8 @@ class VoiceSession:
try:
mems = await self._memory.query(self._caller_user_id, "voice call", top_k=10)
if mems:
memory_section = "\n\nKontext aus früheren Gesprächen mit diesem Nutzer:\n" + \
"\n".join(f"- {m['fact']}" for m in mems)
memory_section = "\n\nFrühere Gespräche mit diesem Nutzer:\n" + \
"\n---\n".join(m['fact'] for m in mems)
logger.info("Loaded %d memories for %s", len(mems), self._caller_user_id)
except Exception as exc:
logger.warning("Memory query failed: %s", exc)
@@ -640,8 +602,8 @@ class VoiceSession:
user_text = " ".join(_last_user_speech)
_last_user_speech.clear()
asyncio.ensure_future(
_extract_voice_memories(user_text, text,
self._caller_user_id, self.room_id))
_store_voice_exchange(user_text, text,
self._caller_user_id, self.room_id))
# Brave Search tool — lets the agent answer questions about current events
@function_tool