feat: switch DocumentRAG to MatrixHost API, remove WildFiles dependency
DocumentRAG now calls MatrixHost /api/bot/documents/search instead of the WildFiles API. Removes device auth flow and legacy org provisioning. Bot authenticates via existing BOT_API_KEY pattern. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
181
bot.py
181
bot.py
@@ -325,66 +325,36 @@ HELP_TEXT = """**AI Bot Commands**
|
|||||||
|
|
||||||
|
|
||||||
class DocumentRAG:
|
class DocumentRAG:
|
||||||
"""Search WildFiles for relevant documents."""
|
"""Search documents via MatrixHost API (replaces WildFiles)."""
|
||||||
|
|
||||||
def __init__(self, base_url: str, org: str):
|
def __init__(self, portal_url: str, bot_api_key: str):
|
||||||
self.base_url = base_url.rstrip("/")
|
self.portal_url = portal_url.rstrip("/")
|
||||||
self.org = org
|
self.bot_api_key = bot_api_key
|
||||||
self.enabled = bool(base_url)
|
self.enabled = bool(portal_url and bot_api_key)
|
||||||
|
|
||||||
async def search(self, query: str, top_k: int = 3, api_key: str | None = None, org_slug: str | None = None) -> list[dict]:
|
async def search(self, query: str, top_k: int = 3, api_key: str | None = None, org_slug: str | None = None, matrix_user_id: str | None = None) -> list[dict]:
|
||||||
org = org_slug or self.org
|
if not self.enabled or not matrix_user_id:
|
||||||
if not org and not api_key:
|
|
||||||
return []
|
return []
|
||||||
try:
|
try:
|
||||||
headers = {}
|
body = {"query": query, "limit": top_k, "matrix_user_id": matrix_user_id}
|
||||||
if api_key:
|
|
||||||
headers["X-API-Key"] = api_key
|
|
||||||
body = {"query": query, "limit": top_k, "organization": org}
|
|
||||||
async with httpx.AsyncClient(timeout=15.0) as client:
|
async with httpx.AsyncClient(timeout=15.0) as client:
|
||||||
resp = await client.post(
|
resp = await client.post(
|
||||||
f"{self.base_url}/api/v1/rag/search",
|
f"{self.portal_url}/api/bot/documents/search",
|
||||||
json=body,
|
json=body,
|
||||||
headers=headers,
|
headers={"Authorization": f"Bearer {self.bot_api_key}"},
|
||||||
)
|
)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
return resp.json().get("results", [])
|
return resp.json().get("results", [])
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.debug("WildFiles search failed", exc_info=True)
|
logger.debug("Document search failed", exc_info=True)
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def validate_key(self, api_key: str) -> dict | None:
|
async def validate_key(self, api_key: str) -> dict | None:
|
||||||
"""Validate an API key against WildFiles. Returns stats dict or None."""
|
"""Legacy: no longer used (keys replaced by portal auth)."""
|
||||||
if not self.base_url:
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
||||||
resp = await client.get(
|
|
||||||
f"{self.base_url}/api/v1/rag/stats",
|
|
||||||
headers={"X-API-Key": api_key},
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
data = resp.json()
|
|
||||||
if data.get("total_documents", 0) >= 0:
|
|
||||||
return data
|
|
||||||
except Exception:
|
|
||||||
logger.debug("WildFiles key validation failed", exc_info=True)
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def get_org_stats(self, org_slug: str) -> dict | None:
|
async def get_org_stats(self, org_slug: str) -> dict | None:
|
||||||
"""Get stats for an org by slug. Returns stats dict or None."""
|
"""Legacy: no longer used."""
|
||||||
if not self.base_url:
|
|
||||||
return None
|
|
||||||
try:
|
|
||||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
||||||
resp = await client.get(
|
|
||||||
f"{self.base_url}/api/v1/rag/stats",
|
|
||||||
params={"organization": org_slug},
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
return resp.json()
|
|
||||||
except Exception:
|
|
||||||
logger.debug("WildFiles org stats failed for %s", org_slug, exc_info=True)
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def format_context(self, results: list[dict]) -> str:
|
def format_context(self, results: list[dict]) -> str:
|
||||||
@@ -903,7 +873,7 @@ class Bot:
|
|||||||
self.voice_sessions: dict[str, VoiceSession] = {}
|
self.voice_sessions: dict[str, VoiceSession] = {}
|
||||||
self.active_calls = set() # rooms where we've sent call member event
|
self.active_calls = set() # rooms where we've sent call member event
|
||||||
self.active_callers: dict[str, set[str]] = {} # room_id → set of caller user IDs
|
self.active_callers: dict[str, set[str]] = {} # room_id → set of caller user IDs
|
||||||
self.rag = DocumentRAG(WILDFILES_BASE_URL, WILDFILES_ORG)
|
self.rag = DocumentRAG(PORTAL_URL, BOT_API_KEY)
|
||||||
self.memory = MemoryClient(MEMORY_SERVICE_URL)
|
self.memory = MemoryClient(MEMORY_SERVICE_URL)
|
||||||
self.atlassian = AtlassianClient(PORTAL_URL, BOT_API_KEY)
|
self.atlassian = AtlassianClient(PORTAL_URL, BOT_API_KEY)
|
||||||
self.llm = AsyncOpenAI(base_url=LITELLM_URL, api_key=LITELLM_KEY) if LITELLM_URL else None
|
self.llm = AsyncOpenAI(base_url=LITELLM_URL, api_key=LITELLM_KEY) if LITELLM_URL else None
|
||||||
@@ -937,18 +907,15 @@ class Bot:
|
|||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("Failed to save user keys")
|
logger.exception("Failed to save user keys")
|
||||||
|
|
||||||
async def _get_wildfiles_org(self, matrix_user_id: str) -> str | None:
|
async def _has_documents(self, matrix_user_id: str) -> bool:
|
||||||
"""Get user's WildFiles org slug via MatrixHost portal API.
|
"""Check if user has documents via MatrixHost portal API.
|
||||||
|
|
||||||
Auto-provisions a WildFiles org if the user has a MatrixHost account.
|
|
||||||
Falls back to legacy user_keys for backward compat.
|
|
||||||
Results are cached per session.
|
Results are cached per session.
|
||||||
"""
|
"""
|
||||||
if matrix_user_id in self._wildfiles_org_cache:
|
if matrix_user_id in self._wildfiles_org_cache:
|
||||||
return self._wildfiles_org_cache[matrix_user_id]
|
return self._wildfiles_org_cache[matrix_user_id] is not None
|
||||||
|
|
||||||
# Try portal API (auto-provisions org if needed)
|
if self.atlassian.enabled:
|
||||||
if self.atlassian.enabled: # reuses same portal_url + bot_api_key
|
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
async with httpx.AsyncClient(timeout=10.0) as client:
|
||||||
resp = await client.get(
|
resp = await client.get(
|
||||||
@@ -959,16 +926,13 @@ class Bot:
|
|||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
data = resp.json()
|
data = resp.json()
|
||||||
if data.get("connected"):
|
if data.get("connected"):
|
||||||
org_slug = data["org_slug"]
|
self._wildfiles_org_cache[matrix_user_id] = "connected"
|
||||||
self._wildfiles_org_cache[matrix_user_id] = org_slug
|
return True
|
||||||
logger.debug("Resolved WildFiles org %s for %s via portal", org_slug, matrix_user_id)
|
|
||||||
return org_slug
|
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.debug("Portal WildFiles org lookup failed for %s", matrix_user_id, exc_info=True)
|
logger.debug("Portal document check failed for %s", matrix_user_id, exc_info=True)
|
||||||
|
|
||||||
# No portal result — cache as None to avoid repeated lookups
|
|
||||||
self._wildfiles_org_cache[matrix_user_id] = None
|
self._wildfiles_org_cache[matrix_user_id] = None
|
||||||
return None
|
return False
|
||||||
|
|
||||||
async def start(self):
|
async def start(self):
|
||||||
# Restore existing session or create new one
|
# Restore existing session or create new one
|
||||||
@@ -1954,11 +1918,11 @@ class Bot:
|
|||||||
return
|
return
|
||||||
sender = event.sender if event else None
|
sender = event.sender if event else None
|
||||||
user_api_key = self.user_keys.get(sender) if sender else None
|
user_api_key = self.user_keys.get(sender) if sender else None
|
||||||
user_org_slug = await self._get_wildfiles_org(sender) if sender else None
|
has_docs = await self._has_documents(sender) if sender else False
|
||||||
if not user_api_key and not user_org_slug:
|
if not has_docs:
|
||||||
await self._send_text(room.room_id, "Documents not available. Manage your documents at [matrixhost.eu/documents](https://matrixhost.eu/documents).")
|
await self._send_text(room.room_id, "Documents not available. Manage your documents at [matrixhost.eu/documents](https://matrixhost.eu/documents).")
|
||||||
return
|
return
|
||||||
results = await self.rag.search(query, top_k=5, api_key=user_api_key, org_slug=user_org_slug)
|
results = await self.rag.search(query, top_k=5, matrix_user_id=sender)
|
||||||
if not results:
|
if not results:
|
||||||
await self._send_text(room.room_id, "No documents found.")
|
await self._send_text(room.room_id, "No documents found.")
|
||||||
return
|
return
|
||||||
@@ -2010,100 +1974,33 @@ class Bot:
|
|||||||
logger.info("User %s connected WildFiles key (org: %s)", sender, org_name)
|
logger.info("User %s connected WildFiles key (org: %s)", sender, org_name)
|
||||||
return
|
return
|
||||||
|
|
||||||
# Check if user already has auto-provisioned org via MatrixHost portal
|
# Documents are managed via MatrixHost portal
|
||||||
if sender:
|
if sender:
|
||||||
org_slug = await self._get_wildfiles_org(sender)
|
has_docs = await self._has_documents(sender)
|
||||||
if org_slug:
|
if has_docs:
|
||||||
stats = await self.rag.get_org_stats(org_slug)
|
|
||||||
total = stats.get("total_documents", 0) if stats else 0
|
|
||||||
await self._send_text(
|
await self._send_text(
|
||||||
room.room_id,
|
room.room_id,
|
||||||
f"Documents are already connected via your MatrixHost account (org: **{org_slug}**, {total} documents). "
|
"Documents are connected via your MatrixHost account. "
|
||||||
f"Manage documents at [matrixhost.eu/documents](https://matrixhost.eu/documents).",
|
"Manage documents at [matrixhost.eu/documents](https://matrixhost.eu/documents).",
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
# SSO device authorization flow (fallback for non-MatrixHost users)
|
|
||||||
if sender and sender in self._pending_connects:
|
|
||||||
await self._send_text(room.room_id, "A connect flow is already in progress. Please complete or wait for it to expire.")
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
||||||
resp = await client.post(f"{self.rag.base_url}/api/v1/auth/device/code")
|
|
||||||
resp.raise_for_status()
|
|
||||||
data = resp.json()
|
|
||||||
except Exception:
|
|
||||||
logger.exception("Failed to start device auth flow")
|
|
||||||
await self._send_text(room.room_id, "Failed to start connection flow. Please try again later.")
|
|
||||||
return
|
|
||||||
|
|
||||||
device_code = data["device_code"]
|
|
||||||
user_code = data["user_code"]
|
|
||||||
verification_url = data["verification_url"]
|
|
||||||
|
|
||||||
await self._send_text(
|
await self._send_text(
|
||||||
room.room_id,
|
room.room_id,
|
||||||
f"To connect documents, visit:\n\n"
|
"Upload documents at [matrixhost.eu/documents](https://matrixhost.eu/documents) "
|
||||||
f"**{verification_url}**\n\n"
|
"to enable AI-powered document search.",
|
||||||
f"and enter code: **{user_code}**\n\n"
|
|
||||||
f"_This link expires in 10 minutes._",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Track pending connect and start polling
|
|
||||||
self._pending_connects[sender] = device_code
|
|
||||||
asyncio.create_task(self._poll_device_auth(room.room_id, sender, device_code))
|
|
||||||
|
|
||||||
async def _handle_disconnect(self, room, event=None):
|
async def _handle_disconnect(self, room, event=None):
|
||||||
"""Handle !ai disconnect — remove stored WildFiles API key."""
|
"""Handle !ai disconnect — legacy, documents managed via portal now."""
|
||||||
sender = event.sender if event else None
|
sender = event.sender if event else None
|
||||||
if sender and sender in self.user_keys:
|
if sender and sender in self.user_keys:
|
||||||
del self.user_keys[sender]
|
del self.user_keys[sender]
|
||||||
self._save_user_keys()
|
self._save_user_keys()
|
||||||
await self._send_text(room.room_id, "Custom document key removed. Using default document search.")
|
await self._send_text(room.room_id, "Legacy document key removed.")
|
||||||
logger.info("User %s disconnected WildFiles key", sender)
|
logger.info("User %s removed legacy WildFiles key", sender)
|
||||||
else:
|
else:
|
||||||
await self._send_text(room.room_id, "No custom document key connected.")
|
await self._send_text(room.room_id, "Documents are managed at [matrixhost.eu/documents](https://matrixhost.eu/documents).")
|
||||||
|
|
||||||
async def _poll_device_auth(self, room_id: str, sender: str, device_code: str):
|
|
||||||
"""Poll WildFiles for device auth approval (5s interval, 10 min max)."""
|
|
||||||
poll_url = f"{self.rag.base_url}/api/v1/auth/device/status"
|
|
||||||
try:
|
|
||||||
for _ in range(120): # 120 * 5s = 10 min
|
|
||||||
await asyncio.sleep(5)
|
|
||||||
try:
|
|
||||||
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
||||||
resp = await client.get(poll_url, params={"device_code": device_code})
|
|
||||||
resp.raise_for_status()
|
|
||||||
data = resp.json()
|
|
||||||
except Exception:
|
|
||||||
logger.debug("Device auth poll failed, retrying", exc_info=True)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if data["status"] == "approved":
|
|
||||||
api_key = data["api_key"]
|
|
||||||
org_slug = data.get("organization", "unknown")
|
|
||||||
self.user_keys[sender] = api_key
|
|
||||||
self._save_user_keys()
|
|
||||||
await self._send_text(
|
|
||||||
room_id,
|
|
||||||
f"Documents connected (org: **{org_slug}**). Your documents are now searchable.",
|
|
||||||
)
|
|
||||||
logger.info("User %s connected via device auth (org: %s)", sender, org_slug)
|
|
||||||
return
|
|
||||||
elif data["status"] == "expired":
|
|
||||||
await self._send_text(room_id, "Connection flow expired. Type `!ai connect` to try again.")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Timeout after 10 minutes
|
|
||||||
await self._send_text(room_id, "Connection flow timed out. Type `!ai connect` to try again.")
|
|
||||||
except asyncio.CancelledError:
|
|
||||||
pass
|
|
||||||
except Exception:
|
|
||||||
logger.exception("Device auth polling error")
|
|
||||||
await self._send_text(room_id, "Connection flow failed. Type `!ai connect` to try again.")
|
|
||||||
finally:
|
|
||||||
self._pending_connects.pop(sender, None)
|
|
||||||
|
|
||||||
async def _brave_search(self, query: str, count: int = 5) -> str:
|
async def _brave_search(self, query: str, count: int = 5) -> str:
|
||||||
"""Call Brave Search API and return formatted results."""
|
"""Call Brave Search API and return formatted results."""
|
||||||
@@ -2243,10 +2140,8 @@ class Bot:
|
|||||||
# Rewrite query using conversation context for better RAG search
|
# Rewrite query using conversation context for better RAG search
|
||||||
search_query = await self._rewrite_query(user_message, history, model)
|
search_query = await self._rewrite_query(user_message, history, model)
|
||||||
|
|
||||||
# WildFiles document context (portal org auto-provision, legacy API key fallback)
|
# Document context via MatrixHost API
|
||||||
user_api_key = self.user_keys.get(sender) if sender else None
|
doc_results = await self.rag.search(search_query, matrix_user_id=sender) if sender else []
|
||||||
user_org_slug = await self._get_wildfiles_org(sender) if sender else None
|
|
||||||
doc_results = await self.rag.search(search_query, api_key=user_api_key, org_slug=user_org_slug)
|
|
||||||
doc_context = self.rag.format_context(doc_results)
|
doc_context = self.rag.format_context(doc_results)
|
||||||
if doc_context:
|
if doc_context:
|
||||||
logger.info("RAG found %d docs for: %s (original: %s)", len(doc_results), search_query[:50], user_message[:50])
|
logger.info("RAG found %d docs for: %s (original: %s)", len(doc_results), search_query[:50], user_message[:50])
|
||||||
|
|||||||
Reference in New Issue
Block a user