From 9e146da3b0152bba983e01368393e0e8d10a6296 Mon Sep 17 00:00:00 2001 From: Christian Gick Date: Tue, 24 Feb 2026 11:37:37 +0200 Subject: [PATCH] feat(CF-1812): Use confluence-collab for section-based page editing Replace inline regex section parser in voice.py with confluence_collab library (BS4 parsing, 409 conflict retry). Bot now loads section outline into LLM context when Confluence links are detected. Co-Authored-By: Claude Opus 4.6 --- Dockerfile | 4 +++ Dockerfile.bot | 5 ++++ bot.py | 36 ++++++++++++++++++++----- confluence-collab | 1 + requirements.txt | 2 ++ voice.py | 68 ++++++++--------------------------------------- 6 files changed, 52 insertions(+), 64 deletions(-) create mode 120000 confluence-collab diff --git a/Dockerfile b/Dockerfile index e50fb62..421f20a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -52,6 +52,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt +# Install confluence-collab for section-based editing (CF-1812) +COPY confluence-collab/ /tmp/confluence-collab/ +RUN pip install --no-cache-dir /tmp/confluence-collab/ && rm -rf /tmp/confluence-collab/ + # Overwrite installed FFI binary with patched version (HKDF + key_ring_size support) COPY --from=rust-build /build/livekit-rust-sdks/target/release/liblivekit_ffi.so /patched/ ENV LIVEKIT_LIB_PATH=/patched/liblivekit_ffi.so diff --git a/Dockerfile.bot b/Dockerfile.bot index b8bc728..6aa988a 100644 --- a/Dockerfile.bot +++ b/Dockerfile.bot @@ -3,4 +3,9 @@ WORKDIR /app RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg libolm-dev && rm -rf /var/lib/apt/lists/* COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt + +# Install confluence-collab for section-based editing (CF-1812) +COPY confluence-collab/ /tmp/confluence-collab/ +RUN pip install --no-cache-dir /tmp/confluence-collab/ && rm -rf /tmp/confluence-collab/ + COPY . . diff --git a/bot.py b/bot.py index 406187c..c8e7b17 100644 --- a/bot.py +++ b/bot.py @@ -857,20 +857,42 @@ class Bot: except Exception as exc: logger.warning("Confluence short link resolution failed: %s", exc) if confluence_page_id: - # Fetch actual page content so the text bot can work with it + # Fetch page with section structure for targeted editing conf_text = f"confluence_page_id:{confluence_page_id}" conf_title = f"Confluence page {confluence_page_id}" if CONFLUENCE_URL and CONFLUENCE_USER and CONFLUENCE_TOKEN: try: - from voice import _confluence_read_page - title, plain, _ver = await _confluence_read_page(confluence_page_id) - conf_title = title - conf_text = f"confluence_page_id:{confluence_page_id}\n\nTitle: {title}\n\n{plain}" - logger.info("Fetched Confluence page %s: %s (%d chars)", - confluence_page_id, title, len(plain)) + from confluence_collab.client import Auth, get_page + from confluence_collab.parser import parse_sections + auth = Auth(base_url=CONFLUENCE_URL, username=CONFLUENCE_USER, api_token=CONFLUENCE_TOKEN) + page = await get_page(confluence_page_id, auth) + conf_title = page.title + sections = parse_sections(page.body_html) + section_outline = "\n".join( + f"{' ' * (s.level - 1)}h{s.level}: {s.heading}" for s in sections + ) + # Strip HTML for plain text context + plain = re.sub(r"<[^>]+>", " ", page.body_html) + plain = re.sub(r"\s+", " ", plain).strip() + conf_text = ( + f"confluence_page_id:{confluence_page_id}\n\n" + f"Title: {page.title}\n\n" + f"Sections:\n{section_outline}\n\n" + f"{plain}" + ) + logger.info("Fetched Confluence page %s: %s (%d chars, %d sections)", + confluence_page_id, page.title, len(plain), len(sections)) except Exception as exc: logger.warning("Confluence page fetch failed for %s: %s", confluence_page_id, exc) + # Fallback to voice.py reader + try: + from voice import _confluence_read_page + title, plain, _ver = await _confluence_read_page(confluence_page_id) + conf_title = title + conf_text = f"confluence_page_id:{confluence_page_id}\n\nTitle: {title}\n\n{plain}" + except Exception: + pass docs = self._room_document_context.setdefault(room.room_id, []) docs.append({ "type": "confluence", diff --git a/confluence-collab b/confluence-collab new file mode 120000 index 0000000..b4287a0 --- /dev/null +++ b/confluence-collab @@ -0,0 +1 @@ +/Users/christian.gick/Development/Infrastructure/mcp-servers/confluence-collab \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index d0ebf8a..dfd6a82 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,5 @@ openai>=2.0,<3.0 pymupdf>=1.24,<2.0 python-docx>=1.0,<2.0 Pillow>=10.0,<12.0 +beautifulsoup4>=4.12 +lxml>=5.0 diff --git a/voice.py b/voice.py index 295ae64..d4bd25a 100644 --- a/voice.py +++ b/voice.py @@ -279,67 +279,21 @@ async def _confluence_read_page(page_id: str) -> tuple[str, str, int]: async def _confluence_update_section(page_id: str, section_heading: str, new_html: str) -> str: """Update a section of a Confluence page by heading. - Finds the section by heading, replaces content up to next same-level heading, - PUTs with incremented version. + Uses confluence_collab library for BS4 parsing and 409 conflict retry. """ if not CONFLUENCE_URL or not CONFLUENCE_USER or not CONFLUENCE_TOKEN: return "Confluence credentials not configured." - # Read current page - url = f"{CONFLUENCE_URL}/rest/api/content/{page_id}" - params = {"expand": "body.storage,version,title"} - async with httpx.AsyncClient(timeout=15.0) as client: - resp = await client.get(url, params=params, auth=(CONFLUENCE_USER, CONFLUENCE_TOKEN)) - resp.raise_for_status() - data = resp.json() + from confluence_collab.client import Auth + from confluence_collab.editor import section_update - title = data["title"] - version = data["version"]["number"] - body_html = data["body"]["storage"]["value"] - - # Find section by heading (h1-h6) and replace content up to next same-level heading - heading_pattern = re.compile( - r'(]*>.*?' + re.escape(section_heading) + r'.*?)', - re.IGNORECASE | re.DOTALL, - ) - match = heading_pattern.search(body_html) - if not match: - return f"Section '{section_heading}' not found on page." - - heading_tag = match.group(0) - heading_level = match.group(2) - section_start = match.end() - - # Find next heading of same or higher level - next_heading = re.compile( - rf']*>', - re.IGNORECASE, - ) - next_match = next_heading.search(body_html, section_start) - section_end = next_match.start() if next_match else len(body_html) - - # Replace section content - new_body = body_html[:section_start] + new_html + body_html[section_end:] - - # PUT updated page - put_data = { - "version": {"number": version + 1}, - "title": title, - "type": "page", - "body": { - "storage": { - "value": new_body, - "representation": "storage", - } - }, - } - async with httpx.AsyncClient(timeout=15.0) as client: - resp = await client.put( - url, - json=put_data, - auth=(CONFLUENCE_USER, CONFLUENCE_TOKEN), - ) - resp.raise_for_status() - return f"Section '{section_heading}' updated successfully." + auth = Auth(base_url=CONFLUENCE_URL, username=CONFLUENCE_USER, api_token=CONFLUENCE_TOKEN) + result = await section_update(page_id, section_heading, new_html, auth) + if result.ok: + msg = f"Section '{section_heading}' updated successfully." + if result.retries > 0: + msg += f" ({result.retries} conflict retries)" + return msg + return result.message def _build_e2ee_options() -> rtc.E2EEOptions: