feat: switch DocumentRAG to MatrixHost API, remove WildFiles dependency

DocumentRAG now calls MatrixHost /api/bot/documents/search instead of
the WildFiles API. Removes device auth flow and legacy org provisioning.
Bot authenticates via existing BOT_API_KEY pattern.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-03-02 10:06:12 +02:00
parent a4f01ca177
commit 4d6cba1f0c

181
bot.py
View File

@@ -325,66 +325,36 @@ HELP_TEXT = """**AI Bot Commands**
class DocumentRAG: class DocumentRAG:
"""Search WildFiles for relevant documents.""" """Search documents via MatrixHost API (replaces WildFiles)."""
def __init__(self, base_url: str, org: str): def __init__(self, portal_url: str, bot_api_key: str):
self.base_url = base_url.rstrip("/") self.portal_url = portal_url.rstrip("/")
self.org = org self.bot_api_key = bot_api_key
self.enabled = bool(base_url) self.enabled = bool(portal_url and bot_api_key)
async def search(self, query: str, top_k: int = 3, api_key: str | None = None, org_slug: str | None = None) -> list[dict]: async def search(self, query: str, top_k: int = 3, api_key: str | None = None, org_slug: str | None = None, matrix_user_id: str | None = None) -> list[dict]:
org = org_slug or self.org if not self.enabled or not matrix_user_id:
if not org and not api_key:
return [] return []
try: try:
headers = {} body = {"query": query, "limit": top_k, "matrix_user_id": matrix_user_id}
if api_key:
headers["X-API-Key"] = api_key
body = {"query": query, "limit": top_k, "organization": org}
async with httpx.AsyncClient(timeout=15.0) as client: async with httpx.AsyncClient(timeout=15.0) as client:
resp = await client.post( resp = await client.post(
f"{self.base_url}/api/v1/rag/search", f"{self.portal_url}/api/bot/documents/search",
json=body, json=body,
headers=headers, headers={"Authorization": f"Bearer {self.bot_api_key}"},
) )
resp.raise_for_status() resp.raise_for_status()
return resp.json().get("results", []) return resp.json().get("results", [])
except Exception: except Exception:
logger.debug("WildFiles search failed", exc_info=True) logger.debug("Document search failed", exc_info=True)
return [] return []
async def validate_key(self, api_key: str) -> dict | None: async def validate_key(self, api_key: str) -> dict | None:
"""Validate an API key against WildFiles. Returns stats dict or None.""" """Legacy: no longer used (keys replaced by portal auth)."""
if not self.base_url:
return None
try:
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.get(
f"{self.base_url}/api/v1/rag/stats",
headers={"X-API-Key": api_key},
)
resp.raise_for_status()
data = resp.json()
if data.get("total_documents", 0) >= 0:
return data
except Exception:
logger.debug("WildFiles key validation failed", exc_info=True)
return None return None
async def get_org_stats(self, org_slug: str) -> dict | None: async def get_org_stats(self, org_slug: str) -> dict | None:
"""Get stats for an org by slug. Returns stats dict or None.""" """Legacy: no longer used."""
if not self.base_url:
return None
try:
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.get(
f"{self.base_url}/api/v1/rag/stats",
params={"organization": org_slug},
)
resp.raise_for_status()
return resp.json()
except Exception:
logger.debug("WildFiles org stats failed for %s", org_slug, exc_info=True)
return None return None
def format_context(self, results: list[dict]) -> str: def format_context(self, results: list[dict]) -> str:
@@ -903,7 +873,7 @@ class Bot:
self.voice_sessions: dict[str, VoiceSession] = {} self.voice_sessions: dict[str, VoiceSession] = {}
self.active_calls = set() # rooms where we've sent call member event self.active_calls = set() # rooms where we've sent call member event
self.active_callers: dict[str, set[str]] = {} # room_id → set of caller user IDs self.active_callers: dict[str, set[str]] = {} # room_id → set of caller user IDs
self.rag = DocumentRAG(WILDFILES_BASE_URL, WILDFILES_ORG) self.rag = DocumentRAG(PORTAL_URL, BOT_API_KEY)
self.memory = MemoryClient(MEMORY_SERVICE_URL) self.memory = MemoryClient(MEMORY_SERVICE_URL)
self.atlassian = AtlassianClient(PORTAL_URL, BOT_API_KEY) self.atlassian = AtlassianClient(PORTAL_URL, BOT_API_KEY)
self.llm = AsyncOpenAI(base_url=LITELLM_URL, api_key=LITELLM_KEY) if LITELLM_URL else None self.llm = AsyncOpenAI(base_url=LITELLM_URL, api_key=LITELLM_KEY) if LITELLM_URL else None
@@ -937,18 +907,15 @@ class Bot:
except Exception: except Exception:
logger.exception("Failed to save user keys") logger.exception("Failed to save user keys")
async def _get_wildfiles_org(self, matrix_user_id: str) -> str | None: async def _has_documents(self, matrix_user_id: str) -> bool:
"""Get user's WildFiles org slug via MatrixHost portal API. """Check if user has documents via MatrixHost portal API.
Auto-provisions a WildFiles org if the user has a MatrixHost account.
Falls back to legacy user_keys for backward compat.
Results are cached per session. Results are cached per session.
""" """
if matrix_user_id in self._wildfiles_org_cache: if matrix_user_id in self._wildfiles_org_cache:
return self._wildfiles_org_cache[matrix_user_id] return self._wildfiles_org_cache[matrix_user_id] is not None
# Try portal API (auto-provisions org if needed) if self.atlassian.enabled:
if self.atlassian.enabled: # reuses same portal_url + bot_api_key
try: try:
async with httpx.AsyncClient(timeout=10.0) as client: async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.get( resp = await client.get(
@@ -959,16 +926,13 @@ class Bot:
resp.raise_for_status() resp.raise_for_status()
data = resp.json() data = resp.json()
if data.get("connected"): if data.get("connected"):
org_slug = data["org_slug"] self._wildfiles_org_cache[matrix_user_id] = "connected"
self._wildfiles_org_cache[matrix_user_id] = org_slug return True
logger.debug("Resolved WildFiles org %s for %s via portal", org_slug, matrix_user_id)
return org_slug
except Exception: except Exception:
logger.debug("Portal WildFiles org lookup failed for %s", matrix_user_id, exc_info=True) logger.debug("Portal document check failed for %s", matrix_user_id, exc_info=True)
# No portal result — cache as None to avoid repeated lookups
self._wildfiles_org_cache[matrix_user_id] = None self._wildfiles_org_cache[matrix_user_id] = None
return None return False
async def start(self): async def start(self):
# Restore existing session or create new one # Restore existing session or create new one
@@ -1954,11 +1918,11 @@ class Bot:
return return
sender = event.sender if event else None sender = event.sender if event else None
user_api_key = self.user_keys.get(sender) if sender else None user_api_key = self.user_keys.get(sender) if sender else None
user_org_slug = await self._get_wildfiles_org(sender) if sender else None has_docs = await self._has_documents(sender) if sender else False
if not user_api_key and not user_org_slug: if not has_docs:
await self._send_text(room.room_id, "Documents not available. Manage your documents at [matrixhost.eu/documents](https://matrixhost.eu/documents).") await self._send_text(room.room_id, "Documents not available. Manage your documents at [matrixhost.eu/documents](https://matrixhost.eu/documents).")
return return
results = await self.rag.search(query, top_k=5, api_key=user_api_key, org_slug=user_org_slug) results = await self.rag.search(query, top_k=5, matrix_user_id=sender)
if not results: if not results:
await self._send_text(room.room_id, "No documents found.") await self._send_text(room.room_id, "No documents found.")
return return
@@ -2010,100 +1974,33 @@ class Bot:
logger.info("User %s connected WildFiles key (org: %s)", sender, org_name) logger.info("User %s connected WildFiles key (org: %s)", sender, org_name)
return return
# Check if user already has auto-provisioned org via MatrixHost portal # Documents are managed via MatrixHost portal
if sender: if sender:
org_slug = await self._get_wildfiles_org(sender) has_docs = await self._has_documents(sender)
if org_slug: if has_docs:
stats = await self.rag.get_org_stats(org_slug)
total = stats.get("total_documents", 0) if stats else 0
await self._send_text( await self._send_text(
room.room_id, room.room_id,
f"Documents are already connected via your MatrixHost account (org: **{org_slug}**, {total} documents). " "Documents are connected via your MatrixHost account. "
f"Manage documents at [matrixhost.eu/documents](https://matrixhost.eu/documents).", "Manage documents at [matrixhost.eu/documents](https://matrixhost.eu/documents).",
) )
return return
# SSO device authorization flow (fallback for non-MatrixHost users)
if sender and sender in self._pending_connects:
await self._send_text(room.room_id, "A connect flow is already in progress. Please complete or wait for it to expire.")
return
try:
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.post(f"{self.rag.base_url}/api/v1/auth/device/code")
resp.raise_for_status()
data = resp.json()
except Exception:
logger.exception("Failed to start device auth flow")
await self._send_text(room.room_id, "Failed to start connection flow. Please try again later.")
return
device_code = data["device_code"]
user_code = data["user_code"]
verification_url = data["verification_url"]
await self._send_text( await self._send_text(
room.room_id, room.room_id,
f"To connect documents, visit:\n\n" "Upload documents at [matrixhost.eu/documents](https://matrixhost.eu/documents) "
f"**{verification_url}**\n\n" "to enable AI-powered document search.",
f"and enter code: **{user_code}**\n\n"
f"_This link expires in 10 minutes._",
) )
# Track pending connect and start polling
self._pending_connects[sender] = device_code
asyncio.create_task(self._poll_device_auth(room.room_id, sender, device_code))
async def _handle_disconnect(self, room, event=None): async def _handle_disconnect(self, room, event=None):
"""Handle !ai disconnect — remove stored WildFiles API key.""" """Handle !ai disconnect — legacy, documents managed via portal now."""
sender = event.sender if event else None sender = event.sender if event else None
if sender and sender in self.user_keys: if sender and sender in self.user_keys:
del self.user_keys[sender] del self.user_keys[sender]
self._save_user_keys() self._save_user_keys()
await self._send_text(room.room_id, "Custom document key removed. Using default document search.") await self._send_text(room.room_id, "Legacy document key removed.")
logger.info("User %s disconnected WildFiles key", sender) logger.info("User %s removed legacy WildFiles key", sender)
else: else:
await self._send_text(room.room_id, "No custom document key connected.") await self._send_text(room.room_id, "Documents are managed at [matrixhost.eu/documents](https://matrixhost.eu/documents).")
async def _poll_device_auth(self, room_id: str, sender: str, device_code: str):
"""Poll WildFiles for device auth approval (5s interval, 10 min max)."""
poll_url = f"{self.rag.base_url}/api/v1/auth/device/status"
try:
for _ in range(120): # 120 * 5s = 10 min
await asyncio.sleep(5)
try:
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.get(poll_url, params={"device_code": device_code})
resp.raise_for_status()
data = resp.json()
except Exception:
logger.debug("Device auth poll failed, retrying", exc_info=True)
continue
if data["status"] == "approved":
api_key = data["api_key"]
org_slug = data.get("organization", "unknown")
self.user_keys[sender] = api_key
self._save_user_keys()
await self._send_text(
room_id,
f"Documents connected (org: **{org_slug}**). Your documents are now searchable.",
)
logger.info("User %s connected via device auth (org: %s)", sender, org_slug)
return
elif data["status"] == "expired":
await self._send_text(room_id, "Connection flow expired. Type `!ai connect` to try again.")
return
# Timeout after 10 minutes
await self._send_text(room_id, "Connection flow timed out. Type `!ai connect` to try again.")
except asyncio.CancelledError:
pass
except Exception:
logger.exception("Device auth polling error")
await self._send_text(room_id, "Connection flow failed. Type `!ai connect` to try again.")
finally:
self._pending_connects.pop(sender, None)
async def _brave_search(self, query: str, count: int = 5) -> str: async def _brave_search(self, query: str, count: int = 5) -> str:
"""Call Brave Search API and return formatted results.""" """Call Brave Search API and return formatted results."""
@@ -2243,10 +2140,8 @@ class Bot:
# Rewrite query using conversation context for better RAG search # Rewrite query using conversation context for better RAG search
search_query = await self._rewrite_query(user_message, history, model) search_query = await self._rewrite_query(user_message, history, model)
# WildFiles document context (portal org auto-provision, legacy API key fallback) # Document context via MatrixHost API
user_api_key = self.user_keys.get(sender) if sender else None doc_results = await self.rag.search(search_query, matrix_user_id=sender) if sender else []
user_org_slug = await self._get_wildfiles_org(sender) if sender else None
doc_results = await self.rag.search(search_query, api_key=user_api_key, org_slug=user_org_slug)
doc_context = self.rag.format_context(doc_results) doc_context = self.rag.format_context(doc_results)
if doc_context: if doc_context:
logger.info("RAG found %d docs for: %s (original: %s)", len(doc_results), search_query[:50], user_message[:50]) logger.info("RAG found %d docs for: %s (original: %s)", len(doc_results), search_query[:50], user_message[:50])