feat: Add web page browsing tool (browse_url) to voice and text bot

Both bots can now fetch and read web pages via browse_url tool.
Uses httpx + BeautifulSoup to extract clean text from HTML.
Complements existing web_search (Brave) with full page reading.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-28 16:26:17 +02:00
parent 34f403a066
commit 6fe9607fb1
2 changed files with 87 additions and 1 deletions

44
bot.py
View File

@@ -96,6 +96,7 @@ IMPORTANT RULES — FOLLOW THESE STRICTLY:
- You can read and analyze PDF documents that users send. Summarize content and answer questions about them. - You can read and analyze PDF documents that users send. Summarize content and answer questions about them.
- You can generate images when asked — use the generate_image tool for any image creation, drawing, or illustration requests. - You can generate images when asked — use the generate_image tool for any image creation, drawing, or illustration requests.
- You can search the web using the web_search tool. Use it when users ask about current events, facts, or anything that needs up-to-date information. - You can search the web using the web_search tool. Use it when users ask about current events, facts, or anything that needs up-to-date information.
- You can open and read web pages using browse_url. Use it when a user shares a link, or when you need more detail from a search result. Summarize the key content concisely.
- When you use web_search, embed source links INLINE in the text where the information appears, e.g. "Laut [Cyprus Mail](url) hat..." or "([Quelle](url))". Do NOT collect links in a separate section at the bottom. Every claim from a search result must have its source linked right there in the sentence. - When you use web_search, embed source links INLINE in the text where the information appears, e.g. "Laut [Cyprus Mail](url) hat..." or "([Quelle](url))". Do NOT collect links in a separate section at the bottom. Every claim from a search result must have its source linked right there in the sentence.
- Keep formatting compact. STRICT rules: NEVER use headings (no #, ##, ###). Use **bold text** for section titles instead. Use --- sparingly to separate major sections. NEVER add blank lines between list items or between a section title and its content. Maximum one blank line between sections. - Keep formatting compact. STRICT rules: NEVER use headings (no #, ##, ###). Use **bold text** for section titles instead. Use --- sparingly to separate major sections. NEVER add blank lines between list items or between a section title and its content. Maximum one blank line between sections.
- You can search Confluence and Jira using tools. When users ask about documentation, wiki pages, tickets, or tasks, use the appropriate tool. Use confluence_recent_pages FIRST to show recently edited pages before searching. - You can search Confluence and Jira using tools. When users ask about documentation, wiki pages, tickets, or tasks, use the appropriate tool. Use confluence_recent_pages FIRST to show recently edited pages before searching.
@@ -286,6 +287,19 @@ WEB_SEARCH_TOOLS = [{
"required": ["query"], "required": ["query"],
}, },
}, },
}, {
"type": "function",
"function": {
"name": "browse_url",
"description": "Open a web page and read its text content. Use when the user shares a URL, or when you need more detail from a search result link.",
"parameters": {
"type": "object",
"properties": {
"url": {"type": "string", "description": "Full URL to fetch (https://...)"},
},
"required": ["url"],
},
},
}] }]
ALL_TOOLS = IMAGE_GEN_TOOLS + WEB_SEARCH_TOOLS + ATLASSIAN_TOOLS ALL_TOOLS = IMAGE_GEN_TOOLS + WEB_SEARCH_TOOLS + ATLASSIAN_TOOLS
@@ -1969,6 +1983,32 @@ class Bot:
logger.warning("Brave search error: %s", exc) logger.warning("Brave search error: %s", exc)
return f"Search failed: {exc}" return f"Search failed: {exc}"
async def _fetch_webpage(self, url: str, max_chars: int = 8000) -> str:
"""Fetch a URL and extract clean text content using BeautifulSoup."""
try:
from bs4 import BeautifulSoup
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True,
headers={"User-Agent": "Mozilla/5.0 (compatible; AgilitonBot/1.0)"}) as client:
resp = await client.get(url)
resp.raise_for_status()
ct = resp.headers.get("content-type", "")
if "html" not in ct and "text" not in ct:
return f"URL returned non-text content ({ct})."
soup = BeautifulSoup(resp.text, "lxml")
for tag in soup(["script", "style", "nav", "footer", "header", "aside", "iframe"]):
tag.decompose()
main = soup.find("article") or soup.find("main") or soup.find("body")
text = main.get_text(separator="\n", strip=True) if main else soup.get_text(separator="\n", strip=True)
text = re.sub(r'\n{3,}', '\n\n', text)
if len(text) > max_chars:
text = text[:max_chars] + "\n\n[... truncated]"
return text if text.strip() else "Page loaded but no readable text content found."
except httpx.HTTPStatusError as exc:
return f"HTTP error {exc.response.status_code} fetching {url}"
except Exception as exc:
logger.warning("Webpage fetch error for %s: %s", url, exc)
return f"Failed to fetch page: {exc}"
async def _execute_tool(self, tool_name: str, args: dict, sender: str, room_id: str) -> str: async def _execute_tool(self, tool_name: str, args: dict, sender: str, room_id: str) -> str:
"""Execute a tool call and return the result as a string.""" """Execute a tool call and return the result as a string."""
# Image generation — no Atlassian token needed # Image generation — no Atlassian token needed
@@ -1980,6 +2020,10 @@ class Bot:
if tool_name == "web_search": if tool_name == "web_search":
return await self._brave_search(args.get("query", ""), args.get("count", 5)) return await self._brave_search(args.get("query", ""), args.get("count", 5))
# Browse URL — no auth needed
if tool_name == "browse_url":
return await self._fetch_webpage(args.get("url", ""))
# Atlassian tools — need per-user token # Atlassian tools — need per-user token
token = await self.atlassian.get_token(sender) if sender else None token = await self.atlassian.get_token(sender) if sender else None
if not token: if not token:

View File

@@ -58,6 +58,7 @@ STRIKTE Regeln:
- Bei zeitrelevanten Fragen (Uhrzeit, Termine, Geschaeftszeiten): frage kurz nach ob der Nutzer noch in seiner gespeicherten Zeitzone ist, bevor du antwortest. Nutze set_user_timezone wenn sich der Standort geaendert hat. - Bei zeitrelevanten Fragen (Uhrzeit, Termine, Geschaeftszeiten): frage kurz nach ob der Nutzer noch in seiner gespeicherten Zeitzone ist, bevor du antwortest. Nutze set_user_timezone wenn sich der Standort geaendert hat.
- Wenn der Nutzer seinen Standort oder seine Stadt erwaehnt, nutze set_user_timezone um die Zeitzone zu speichern. - Wenn der Nutzer seinen Standort oder seine Stadt erwaehnt, nutze set_user_timezone um die Zeitzone zu speichern.
- IGNORIERE alle Texte in Sternchen wie *Störgeräusche*, *Schlechte Qualität*, *Fernsehgeräusche*, *Schrei* usw. — das sind KEINE echten Nutzereingaben sondern technische Annotationen. Antworte NIEMALS darauf und tue so als haette niemand etwas gesagt. - IGNORIERE alle Texte in Sternchen wie *Störgeräusche*, *Schlechte Qualität*, *Fernsehgeräusche*, *Schrei* usw. — das sind KEINE echten Nutzereingaben sondern technische Annotationen. Antworte NIEMALS darauf und tue so als haette niemand etwas gesagt.
- Du kannst Webseiten oeffnen und lesen mit browse_url. Wenn der Nutzer einen Link teilt oder du nach einer Websuche mehr Details brauchst, nutze browse_url um die Seite zu lesen und zusammenzufassen.
- Du kannst Confluence-Seiten suchen, lesen, bearbeiten und erstellen. Nutze recent_confluence_pages um die zuletzt bearbeiteten Seiten anzuzeigen (bevorzugt BEVOR du suchst), search_confluence um gezielt zu suchen, read_confluence_page zum Lesen, update_confluence_page zum Bearbeiten und create_confluence_page zum Erstellen neuer Seiten. - Du kannst Confluence-Seiten suchen, lesen, bearbeiten und erstellen. Nutze recent_confluence_pages um die zuletzt bearbeiteten Seiten anzuzeigen (bevorzugt BEVOR du suchst), search_confluence um gezielt zu suchen, read_confluence_page zum Lesen, update_confluence_page zum Bearbeiten und create_confluence_page zum Erstellen neuer Seiten.
- Du kannst den Bildschirm oder die Kamera des Nutzers sehen wenn er sie teilt. Nutze look_at_screen wenn der Nutzer etwas zeigen moechte oder fragt ob du etwas sehen kannst.""" - Du kannst den Bildschirm oder die Kamera des Nutzers sehen wenn er sie teilt. Nutze look_at_screen wenn der Nutzer etwas zeigen moechte oder fragt ob du etwas sehen kannst."""
@@ -240,6 +241,35 @@ async def _brave_search(query: str, count: int = 5) -> str:
return f"Search failed: {exc}" return f"Search failed: {exc}"
async def _fetch_webpage(url: str, max_chars: int = 8000) -> str:
"""Fetch a URL and extract clean text content using BeautifulSoup."""
try:
from bs4 import BeautifulSoup
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True,
headers={"User-Agent": "Mozilla/5.0 (compatible; AgilitonBot/1.0)"}) as client:
resp = await client.get(url)
resp.raise_for_status()
ct = resp.headers.get("content-type", "")
if "html" not in ct and "text" not in ct:
return f"URL returned non-text content ({ct})."
soup = BeautifulSoup(resp.text, "lxml")
for tag in soup(["script", "style", "nav", "footer", "header", "aside", "iframe"]):
tag.decompose()
# Prefer article/main content
main = soup.find("article") or soup.find("main") or soup.find("body")
text = main.get_text(separator="\n", strip=True) if main else soup.get_text(separator="\n", strip=True)
# Collapse multiple blank lines
text = re.sub(r'\n{3,}', '\n\n', text)
if len(text) > max_chars:
text = text[:max_chars] + "\n\n[... truncated]"
return text if text.strip() else "Page loaded but no readable text content found."
except httpx.HTTPStatusError as exc:
return f"HTTP error {exc.response.status_code} fetching {url}"
except Exception as exc:
logger.warning("Webpage fetch error for %s: %s", url, exc)
return f"Failed to fetch page: {exc}"
async def _store_user_pref(user_id: str, key: str, value: str) -> None: async def _store_user_pref(user_id: str, key: str, value: str) -> None:
"""Store a user preference in memory (e.g. timezone, language).""" """Store a user preference in memory (e.g. timezone, language)."""
if not MEMORY_SERVICE_URL: if not MEMORY_SERVICE_URL:
@@ -854,6 +884,18 @@ class VoiceSession:
logger.info("SEARCH_RESULT: %s", result[:200]) logger.info("SEARCH_RESULT: %s", result[:200])
return result return result
@function_tool
async def browse_url(url: str) -> str:
"""Open a web page and read its content. Use this when:
- The user shares a URL and wants you to read/summarize it
- You found a relevant URL from search_web and need more details
- The user asks to "open", "read", or "check" a link/website
Returns the page text content."""
logger.info("BROWSE: %s", url)
result = await _fetch_webpage(url)
logger.info("BROWSE_OK: %d chars from %s", len(result), url)
return result
# Tool: set user timezone — called by the LLM when user mentions their location # Tool: set user timezone — called by the LLM when user mentions their location
caller_uid = self._caller_user_id caller_uid = self._caller_user_id
@@ -1128,7 +1170,7 @@ class VoiceSession:
instructions += f"\n\nAktive Confluence-Seite: {_active_conf_id}. Du brauchst den Nutzer NICHT nach der page_id zu fragen — nutze automatisch diese ID fuer read_confluence_page und update_confluence_page." instructions += f"\n\nAktive Confluence-Seite: {_active_conf_id}. Du brauchst den Nutzer NICHT nach der page_id zu fragen — nutze automatisch diese ID fuer read_confluence_page und update_confluence_page."
agent = _NoiseFilterAgent( agent = _NoiseFilterAgent(
instructions=instructions, instructions=instructions,
tools=[search_web, set_user_timezone, recent_confluence_pages, search_confluence, read_confluence_page, update_confluence_page, create_confluence_page, think_deeper, look_at_screen], tools=[search_web, browse_url, set_user_timezone, recent_confluence_pages, search_confluence, read_confluence_page, update_confluence_page, create_confluence_page, think_deeper, look_at_screen],
) )
io_opts = room_io.RoomOptions( io_opts = room_io.RoomOptions(
participant_identity=remote_identity, participant_identity=remote_identity,