From 6fe9607fb1c6931fe0f70df68099f1c5bbfa314f Mon Sep 17 00:00:00 2001 From: Christian Gick Date: Sat, 28 Feb 2026 16:26:17 +0200 Subject: [PATCH] feat: Add web page browsing tool (browse_url) to voice and text bot Both bots can now fetch and read web pages via browse_url tool. Uses httpx + BeautifulSoup to extract clean text from HTML. Complements existing web_search (Brave) with full page reading. Co-Authored-By: Claude Opus 4.6 --- bot.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ voice.py | 44 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/bot.py b/bot.py index 113346e..7bffac8 100644 --- a/bot.py +++ b/bot.py @@ -96,6 +96,7 @@ IMPORTANT RULES — FOLLOW THESE STRICTLY: - You can read and analyze PDF documents that users send. Summarize content and answer questions about them. - You can generate images when asked — use the generate_image tool for any image creation, drawing, or illustration requests. - You can search the web using the web_search tool. Use it when users ask about current events, facts, or anything that needs up-to-date information. +- You can open and read web pages using browse_url. Use it when a user shares a link, or when you need more detail from a search result. Summarize the key content concisely. - When you use web_search, embed source links INLINE in the text where the information appears, e.g. "Laut [Cyprus Mail](url) hat..." or "([Quelle](url))". Do NOT collect links in a separate section at the bottom. Every claim from a search result must have its source linked right there in the sentence. - Keep formatting compact. STRICT rules: NEVER use headings (no #, ##, ###). Use **bold text** for section titles instead. Use --- sparingly to separate major sections. NEVER add blank lines between list items or between a section title and its content. Maximum one blank line between sections. - You can search Confluence and Jira using tools. When users ask about documentation, wiki pages, tickets, or tasks, use the appropriate tool. Use confluence_recent_pages FIRST to show recently edited pages before searching. @@ -286,6 +287,19 @@ WEB_SEARCH_TOOLS = [{ "required": ["query"], }, }, +}, { + "type": "function", + "function": { + "name": "browse_url", + "description": "Open a web page and read its text content. Use when the user shares a URL, or when you need more detail from a search result link.", + "parameters": { + "type": "object", + "properties": { + "url": {"type": "string", "description": "Full URL to fetch (https://...)"}, + }, + "required": ["url"], + }, + }, }] ALL_TOOLS = IMAGE_GEN_TOOLS + WEB_SEARCH_TOOLS + ATLASSIAN_TOOLS @@ -1969,6 +1983,32 @@ class Bot: logger.warning("Brave search error: %s", exc) return f"Search failed: {exc}" + async def _fetch_webpage(self, url: str, max_chars: int = 8000) -> str: + """Fetch a URL and extract clean text content using BeautifulSoup.""" + try: + from bs4 import BeautifulSoup + async with httpx.AsyncClient(timeout=15.0, follow_redirects=True, + headers={"User-Agent": "Mozilla/5.0 (compatible; AgilitonBot/1.0)"}) as client: + resp = await client.get(url) + resp.raise_for_status() + ct = resp.headers.get("content-type", "") + if "html" not in ct and "text" not in ct: + return f"URL returned non-text content ({ct})." + soup = BeautifulSoup(resp.text, "lxml") + for tag in soup(["script", "style", "nav", "footer", "header", "aside", "iframe"]): + tag.decompose() + main = soup.find("article") or soup.find("main") or soup.find("body") + text = main.get_text(separator="\n", strip=True) if main else soup.get_text(separator="\n", strip=True) + text = re.sub(r'\n{3,}', '\n\n', text) + if len(text) > max_chars: + text = text[:max_chars] + "\n\n[... truncated]" + return text if text.strip() else "Page loaded but no readable text content found." + except httpx.HTTPStatusError as exc: + return f"HTTP error {exc.response.status_code} fetching {url}" + except Exception as exc: + logger.warning("Webpage fetch error for %s: %s", url, exc) + return f"Failed to fetch page: {exc}" + async def _execute_tool(self, tool_name: str, args: dict, sender: str, room_id: str) -> str: """Execute a tool call and return the result as a string.""" # Image generation — no Atlassian token needed @@ -1980,6 +2020,10 @@ class Bot: if tool_name == "web_search": return await self._brave_search(args.get("query", ""), args.get("count", 5)) + # Browse URL — no auth needed + if tool_name == "browse_url": + return await self._fetch_webpage(args.get("url", "")) + # Atlassian tools — need per-user token token = await self.atlassian.get_token(sender) if sender else None if not token: diff --git a/voice.py b/voice.py index 475cf67..e1a931b 100644 --- a/voice.py +++ b/voice.py @@ -58,6 +58,7 @@ STRIKTE Regeln: - Bei zeitrelevanten Fragen (Uhrzeit, Termine, Geschaeftszeiten): frage kurz nach ob der Nutzer noch in seiner gespeicherten Zeitzone ist, bevor du antwortest. Nutze set_user_timezone wenn sich der Standort geaendert hat. - Wenn der Nutzer seinen Standort oder seine Stadt erwaehnt, nutze set_user_timezone um die Zeitzone zu speichern. - IGNORIERE alle Texte in Sternchen wie *Störgeräusche*, *Schlechte Qualität*, *Fernsehgeräusche*, *Schrei* usw. — das sind KEINE echten Nutzereingaben sondern technische Annotationen. Antworte NIEMALS darauf und tue so als haette niemand etwas gesagt. +- Du kannst Webseiten oeffnen und lesen mit browse_url. Wenn der Nutzer einen Link teilt oder du nach einer Websuche mehr Details brauchst, nutze browse_url um die Seite zu lesen und zusammenzufassen. - Du kannst Confluence-Seiten suchen, lesen, bearbeiten und erstellen. Nutze recent_confluence_pages um die zuletzt bearbeiteten Seiten anzuzeigen (bevorzugt BEVOR du suchst), search_confluence um gezielt zu suchen, read_confluence_page zum Lesen, update_confluence_page zum Bearbeiten und create_confluence_page zum Erstellen neuer Seiten. - Du kannst den Bildschirm oder die Kamera des Nutzers sehen wenn er sie teilt. Nutze look_at_screen wenn der Nutzer etwas zeigen moechte oder fragt ob du etwas sehen kannst.""" @@ -240,6 +241,35 @@ async def _brave_search(query: str, count: int = 5) -> str: return f"Search failed: {exc}" +async def _fetch_webpage(url: str, max_chars: int = 8000) -> str: + """Fetch a URL and extract clean text content using BeautifulSoup.""" + try: + from bs4 import BeautifulSoup + async with httpx.AsyncClient(timeout=15.0, follow_redirects=True, + headers={"User-Agent": "Mozilla/5.0 (compatible; AgilitonBot/1.0)"}) as client: + resp = await client.get(url) + resp.raise_for_status() + ct = resp.headers.get("content-type", "") + if "html" not in ct and "text" not in ct: + return f"URL returned non-text content ({ct})." + soup = BeautifulSoup(resp.text, "lxml") + for tag in soup(["script", "style", "nav", "footer", "header", "aside", "iframe"]): + tag.decompose() + # Prefer article/main content + main = soup.find("article") or soup.find("main") or soup.find("body") + text = main.get_text(separator="\n", strip=True) if main else soup.get_text(separator="\n", strip=True) + # Collapse multiple blank lines + text = re.sub(r'\n{3,}', '\n\n', text) + if len(text) > max_chars: + text = text[:max_chars] + "\n\n[... truncated]" + return text if text.strip() else "Page loaded but no readable text content found." + except httpx.HTTPStatusError as exc: + return f"HTTP error {exc.response.status_code} fetching {url}" + except Exception as exc: + logger.warning("Webpage fetch error for %s: %s", url, exc) + return f"Failed to fetch page: {exc}" + + async def _store_user_pref(user_id: str, key: str, value: str) -> None: """Store a user preference in memory (e.g. timezone, language).""" if not MEMORY_SERVICE_URL: @@ -854,6 +884,18 @@ class VoiceSession: logger.info("SEARCH_RESULT: %s", result[:200]) return result + @function_tool + async def browse_url(url: str) -> str: + """Open a web page and read its content. Use this when: + - The user shares a URL and wants you to read/summarize it + - You found a relevant URL from search_web and need more details + - The user asks to "open", "read", or "check" a link/website + Returns the page text content.""" + logger.info("BROWSE: %s", url) + result = await _fetch_webpage(url) + logger.info("BROWSE_OK: %d chars from %s", len(result), url) + return result + # Tool: set user timezone — called by the LLM when user mentions their location caller_uid = self._caller_user_id @@ -1128,7 +1170,7 @@ class VoiceSession: instructions += f"\n\nAktive Confluence-Seite: {_active_conf_id}. Du brauchst den Nutzer NICHT nach der page_id zu fragen — nutze automatisch diese ID fuer read_confluence_page und update_confluence_page." agent = _NoiseFilterAgent( instructions=instructions, - tools=[search_web, set_user_timezone, recent_confluence_pages, search_confluence, read_confluence_page, update_confluence_page, create_confluence_page, think_deeper, look_at_screen], + tools=[search_web, browse_url, set_user_timezone, recent_confluence_pages, search_confluence, read_confluence_page, update_confluence_page, create_confluence_page, think_deeper, look_at_screen], ) io_opts = room_io.RoomOptions( participant_identity=remote_identity,