diff --git a/bot.py b/bot.py index 7512217..6d35487 100644 --- a/bot.py +++ b/bot.py @@ -325,66 +325,36 @@ HELP_TEXT = """**AI Bot Commands** class DocumentRAG: - """Search WildFiles for relevant documents.""" + """Search documents via MatrixHost API (replaces WildFiles).""" - def __init__(self, base_url: str, org: str): - self.base_url = base_url.rstrip("/") - self.org = org - self.enabled = bool(base_url) + def __init__(self, portal_url: str, bot_api_key: str): + self.portal_url = portal_url.rstrip("/") + self.bot_api_key = bot_api_key + self.enabled = bool(portal_url and bot_api_key) - async def search(self, query: str, top_k: int = 3, api_key: str | None = None, org_slug: str | None = None) -> list[dict]: - org = org_slug or self.org - if not org and not api_key: + async def search(self, query: str, top_k: int = 3, api_key: str | None = None, org_slug: str | None = None, matrix_user_id: str | None = None) -> list[dict]: + if not self.enabled or not matrix_user_id: return [] try: - headers = {} - if api_key: - headers["X-API-Key"] = api_key - body = {"query": query, "limit": top_k, "organization": org} + body = {"query": query, "limit": top_k, "matrix_user_id": matrix_user_id} async with httpx.AsyncClient(timeout=15.0) as client: resp = await client.post( - f"{self.base_url}/api/v1/rag/search", + f"{self.portal_url}/api/bot/documents/search", json=body, - headers=headers, + headers={"Authorization": f"Bearer {self.bot_api_key}"}, ) resp.raise_for_status() return resp.json().get("results", []) except Exception: - logger.debug("WildFiles search failed", exc_info=True) + logger.debug("Document search failed", exc_info=True) return [] async def validate_key(self, api_key: str) -> dict | None: - """Validate an API key against WildFiles. Returns stats dict or None.""" - if not self.base_url: - return None - try: - async with httpx.AsyncClient(timeout=10.0) as client: - resp = await client.get( - f"{self.base_url}/api/v1/rag/stats", - headers={"X-API-Key": api_key}, - ) - resp.raise_for_status() - data = resp.json() - if data.get("total_documents", 0) >= 0: - return data - except Exception: - logger.debug("WildFiles key validation failed", exc_info=True) + """Legacy: no longer used (keys replaced by portal auth).""" return None async def get_org_stats(self, org_slug: str) -> dict | None: - """Get stats for an org by slug. Returns stats dict or None.""" - if not self.base_url: - return None - try: - async with httpx.AsyncClient(timeout=10.0) as client: - resp = await client.get( - f"{self.base_url}/api/v1/rag/stats", - params={"organization": org_slug}, - ) - resp.raise_for_status() - return resp.json() - except Exception: - logger.debug("WildFiles org stats failed for %s", org_slug, exc_info=True) + """Legacy: no longer used.""" return None def format_context(self, results: list[dict]) -> str: @@ -903,7 +873,7 @@ class Bot: self.voice_sessions: dict[str, VoiceSession] = {} self.active_calls = set() # rooms where we've sent call member event self.active_callers: dict[str, set[str]] = {} # room_id → set of caller user IDs - self.rag = DocumentRAG(WILDFILES_BASE_URL, WILDFILES_ORG) + self.rag = DocumentRAG(PORTAL_URL, BOT_API_KEY) self.memory = MemoryClient(MEMORY_SERVICE_URL) self.atlassian = AtlassianClient(PORTAL_URL, BOT_API_KEY) self.llm = AsyncOpenAI(base_url=LITELLM_URL, api_key=LITELLM_KEY) if LITELLM_URL else None @@ -937,18 +907,15 @@ class Bot: except Exception: logger.exception("Failed to save user keys") - async def _get_wildfiles_org(self, matrix_user_id: str) -> str | None: - """Get user's WildFiles org slug via MatrixHost portal API. + async def _has_documents(self, matrix_user_id: str) -> bool: + """Check if user has documents via MatrixHost portal API. - Auto-provisions a WildFiles org if the user has a MatrixHost account. - Falls back to legacy user_keys for backward compat. Results are cached per session. """ if matrix_user_id in self._wildfiles_org_cache: - return self._wildfiles_org_cache[matrix_user_id] + return self._wildfiles_org_cache[matrix_user_id] is not None - # Try portal API (auto-provisions org if needed) - if self.atlassian.enabled: # reuses same portal_url + bot_api_key + if self.atlassian.enabled: try: async with httpx.AsyncClient(timeout=10.0) as client: resp = await client.get( @@ -959,16 +926,13 @@ class Bot: resp.raise_for_status() data = resp.json() if data.get("connected"): - org_slug = data["org_slug"] - self._wildfiles_org_cache[matrix_user_id] = org_slug - logger.debug("Resolved WildFiles org %s for %s via portal", org_slug, matrix_user_id) - return org_slug + self._wildfiles_org_cache[matrix_user_id] = "connected" + return True except Exception: - logger.debug("Portal WildFiles org lookup failed for %s", matrix_user_id, exc_info=True) + logger.debug("Portal document check failed for %s", matrix_user_id, exc_info=True) - # No portal result — cache as None to avoid repeated lookups self._wildfiles_org_cache[matrix_user_id] = None - return None + return False async def start(self): # Restore existing session or create new one @@ -1954,11 +1918,11 @@ class Bot: return sender = event.sender if event else None user_api_key = self.user_keys.get(sender) if sender else None - user_org_slug = await self._get_wildfiles_org(sender) if sender else None - if not user_api_key and not user_org_slug: + has_docs = await self._has_documents(sender) if sender else False + if not has_docs: await self._send_text(room.room_id, "Documents not available. Manage your documents at [matrixhost.eu/documents](https://matrixhost.eu/documents).") return - results = await self.rag.search(query, top_k=5, api_key=user_api_key, org_slug=user_org_slug) + results = await self.rag.search(query, top_k=5, matrix_user_id=sender) if not results: await self._send_text(room.room_id, "No documents found.") return @@ -2010,100 +1974,33 @@ class Bot: logger.info("User %s connected WildFiles key (org: %s)", sender, org_name) return - # Check if user already has auto-provisioned org via MatrixHost portal + # Documents are managed via MatrixHost portal if sender: - org_slug = await self._get_wildfiles_org(sender) - if org_slug: - stats = await self.rag.get_org_stats(org_slug) - total = stats.get("total_documents", 0) if stats else 0 + has_docs = await self._has_documents(sender) + if has_docs: await self._send_text( room.room_id, - f"Documents are already connected via your MatrixHost account (org: **{org_slug}**, {total} documents). " - f"Manage documents at [matrixhost.eu/documents](https://matrixhost.eu/documents).", + "Documents are connected via your MatrixHost account. " + "Manage documents at [matrixhost.eu/documents](https://matrixhost.eu/documents).", ) return - # SSO device authorization flow (fallback for non-MatrixHost users) - if sender and sender in self._pending_connects: - await self._send_text(room.room_id, "A connect flow is already in progress. Please complete or wait for it to expire.") - return - - try: - async with httpx.AsyncClient(timeout=10.0) as client: - resp = await client.post(f"{self.rag.base_url}/api/v1/auth/device/code") - resp.raise_for_status() - data = resp.json() - except Exception: - logger.exception("Failed to start device auth flow") - await self._send_text(room.room_id, "Failed to start connection flow. Please try again later.") - return - - device_code = data["device_code"] - user_code = data["user_code"] - verification_url = data["verification_url"] - await self._send_text( room.room_id, - f"To connect documents, visit:\n\n" - f"**{verification_url}**\n\n" - f"and enter code: **{user_code}**\n\n" - f"_This link expires in 10 minutes._", + "Upload documents at [matrixhost.eu/documents](https://matrixhost.eu/documents) " + "to enable AI-powered document search.", ) - # Track pending connect and start polling - self._pending_connects[sender] = device_code - asyncio.create_task(self._poll_device_auth(room.room_id, sender, device_code)) - async def _handle_disconnect(self, room, event=None): - """Handle !ai disconnect — remove stored WildFiles API key.""" + """Handle !ai disconnect — legacy, documents managed via portal now.""" sender = event.sender if event else None if sender and sender in self.user_keys: del self.user_keys[sender] self._save_user_keys() - await self._send_text(room.room_id, "Custom document key removed. Using default document search.") - logger.info("User %s disconnected WildFiles key", sender) + await self._send_text(room.room_id, "Legacy document key removed.") + logger.info("User %s removed legacy WildFiles key", sender) else: - await self._send_text(room.room_id, "No custom document key connected.") - - async def _poll_device_auth(self, room_id: str, sender: str, device_code: str): - """Poll WildFiles for device auth approval (5s interval, 10 min max).""" - poll_url = f"{self.rag.base_url}/api/v1/auth/device/status" - try: - for _ in range(120): # 120 * 5s = 10 min - await asyncio.sleep(5) - try: - async with httpx.AsyncClient(timeout=10.0) as client: - resp = await client.get(poll_url, params={"device_code": device_code}) - resp.raise_for_status() - data = resp.json() - except Exception: - logger.debug("Device auth poll failed, retrying", exc_info=True) - continue - - if data["status"] == "approved": - api_key = data["api_key"] - org_slug = data.get("organization", "unknown") - self.user_keys[sender] = api_key - self._save_user_keys() - await self._send_text( - room_id, - f"Documents connected (org: **{org_slug}**). Your documents are now searchable.", - ) - logger.info("User %s connected via device auth (org: %s)", sender, org_slug) - return - elif data["status"] == "expired": - await self._send_text(room_id, "Connection flow expired. Type `!ai connect` to try again.") - return - - # Timeout after 10 minutes - await self._send_text(room_id, "Connection flow timed out. Type `!ai connect` to try again.") - except asyncio.CancelledError: - pass - except Exception: - logger.exception("Device auth polling error") - await self._send_text(room_id, "Connection flow failed. Type `!ai connect` to try again.") - finally: - self._pending_connects.pop(sender, None) + await self._send_text(room.room_id, "Documents are managed at [matrixhost.eu/documents](https://matrixhost.eu/documents).") async def _brave_search(self, query: str, count: int = 5) -> str: """Call Brave Search API and return formatted results.""" @@ -2243,10 +2140,8 @@ class Bot: # Rewrite query using conversation context for better RAG search search_query = await self._rewrite_query(user_message, history, model) - # WildFiles document context (portal org auto-provision, legacy API key fallback) - user_api_key = self.user_keys.get(sender) if sender else None - user_org_slug = await self._get_wildfiles_org(sender) if sender else None - doc_results = await self.rag.search(search_query, api_key=user_api_key, org_slug=user_org_slug) + # Document context via MatrixHost API + doc_results = await self.rag.search(search_query, matrix_user_id=sender) if sender else [] doc_context = self.rag.format_context(doc_results) if doc_context: logger.info("RAG found %d docs for: %s (original: %s)", len(doc_results), search_query[:50], user_message[:50])