refactor(CF-1812): Replace inline confluence-collab copy with git submodule

Single source of truth at christian/confluence-collab.git — eliminates stale copy drift.
Dockerfile COPY unchanged, works identically with submodule.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-24 12:30:31 +02:00
parent 9958fb9b6b
commit 08a3c4a9cc
11 changed files with 4 additions and 827 deletions

3
.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "confluence-collab"]
path = confluence-collab
url = ssh://git@gitea-ssh.agiliton.internal:2222/christian/confluence-collab.git

1
confluence-collab Submodule

Submodule confluence-collab added at a189fa326b

View File

@@ -1,24 +0,0 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "confluence-collab"
version = "0.1.0"
description = "Section-based Confluence page editing with conflict retry"
requires-python = ">=3.11"
dependencies = [
"httpx>=0.27",
"beautifulsoup4>=4.12",
"lxml>=5.0",
"mcp>=1.0",
]
[project.optional-dependencies]
dev = ["pytest", "pytest-asyncio", "respx"]
[project.scripts]
confluence-collab = "confluence_collab.cli:main"
[tool.pytest.ini_options]
asyncio_mode = "auto"

View File

@@ -1,16 +0,0 @@
"""Confluence section-based collaborative editing library."""
from confluence_collab.parser import Section, parse_sections, find_section, replace_section_content
from confluence_collab.editor import section_list, section_get, section_update, section_append, section_delete
__all__ = [
"Section",
"parse_sections",
"find_section",
"replace_section_content",
"section_list",
"section_get",
"section_update",
"section_append",
"section_delete",
]

View File

@@ -1,4 +0,0 @@
"""Allow running as: python -m confluence_collab <command>"""
from confluence_collab.cli import main
main()

View File

@@ -1,103 +0,0 @@
"""CLI for manual testing of confluence-collab operations."""
from __future__ import annotations
import argparse
import asyncio
import json
import sys
from confluence_collab.client import Auth
from confluence_collab.editor import section_list, section_get, section_update, section_append, section_delete
async def cmd_list(args: argparse.Namespace) -> None:
auth = Auth.from_env()
sections = await section_list(args.page_id, auth)
for s in sections:
indent = " " * (s.level - 1)
print(f"{indent}h{s.level}: {s.heading}")
async def cmd_get(args: argparse.Namespace) -> None:
auth = Auth.from_env()
content = await section_get(args.page_id, args.heading, auth)
if content is None:
print(f"Section '{args.heading}' not found", file=sys.stderr)
sys.exit(1)
print(content)
async def cmd_update(args: argparse.Namespace) -> None:
auth = Auth.from_env()
body = args.body
if body == "-":
body = sys.stdin.read()
result = await section_update(args.page_id, args.heading, body, auth)
print(json.dumps({"ok": result.ok, "message": result.message, "version": result.version, "retries": result.retries}))
if not result.ok:
sys.exit(1)
async def cmd_append(args: argparse.Namespace) -> None:
auth = Auth.from_env()
body = args.body
if body == "-":
body = sys.stdin.read()
result = await section_append(args.page_id, args.heading, body, auth)
print(json.dumps({"ok": result.ok, "message": result.message, "version": result.version}))
if not result.ok:
sys.exit(1)
async def cmd_delete(args: argparse.Namespace) -> None:
auth = Auth.from_env()
result = await section_delete(args.page_id, args.heading, auth)
print(json.dumps({"ok": result.ok, "message": result.message, "version": result.version}))
if not result.ok:
sys.exit(1)
def main() -> None:
parser = argparse.ArgumentParser(prog="confluence-collab", description="Section-based Confluence editing")
sub = parser.add_subparsers(dest="command", required=True)
# list-sections
p_list = sub.add_parser("list-sections", help="List page sections")
p_list.add_argument("--page-id", required=True)
# get-section
p_get = sub.add_parser("get-section", help="Get section content")
p_get.add_argument("--page-id", required=True)
p_get.add_argument("--heading", required=True)
# update-section
p_update = sub.add_parser("update-section", help="Update section content")
p_update.add_argument("--page-id", required=True)
p_update.add_argument("--heading", required=True)
p_update.add_argument("--body", required=True, help="HTML content or '-' for stdin")
# append-section
p_append = sub.add_parser("append-section", help="Append to section")
p_append.add_argument("--page-id", required=True)
p_append.add_argument("--heading", required=True)
p_append.add_argument("--body", required=True, help="HTML content or '-' for stdin")
# delete-section
p_del = sub.add_parser("delete-section", help="Delete a section")
p_del.add_argument("--page-id", required=True)
p_del.add_argument("--heading", required=True)
args = parser.parse_args()
handlers = {
"list-sections": cmd_list,
"get-section": cmd_get,
"update-section": cmd_update,
"append-section": cmd_append,
"delete-section": cmd_delete,
}
asyncio.run(handlers[args.command](args))
if __name__ == "__main__":
main()

View File

@@ -1,93 +0,0 @@
"""Confluence REST API v1 client using httpx."""
from __future__ import annotations
import os
from dataclasses import dataclass
import httpx
@dataclass
class PageData:
"""Confluence page data."""
page_id: str
title: str
body_html: str
version: int
@dataclass
class Auth:
"""Confluence authentication credentials."""
base_url: str
username: str
api_token: str
@classmethod
def from_env(cls) -> Auth:
"""Load auth from environment variables."""
return cls(
base_url=os.environ.get("CONFLUENCE_URL", ""),
username=os.environ.get("CONFLUENCE_USERNAME", os.environ.get("CONFLUENCE_USER", "")),
api_token=os.environ.get("CONFLUENCE_API_TOKEN", ""),
)
@property
def httpx_auth(self) -> tuple[str, str]:
return (self.username, self.api_token)
async def get_page(page_id: str, auth: Auth) -> PageData:
"""Fetch a Confluence page with body.storage and version."""
url = f"{auth.base_url}/rest/api/content/{page_id}"
params = {"expand": "body.storage,version,title"}
async with httpx.AsyncClient(timeout=15.0) as client:
resp = await client.get(url, params=params, auth=auth.httpx_auth)
resp.raise_for_status()
data = resp.json()
return PageData(
page_id=str(data["id"]),
title=data["title"],
body_html=data["body"]["storage"]["value"],
version=data["version"]["number"],
)
async def put_page(
page_id: str,
title: str,
body_html: str,
version: int,
auth: Auth,
) -> PageData:
"""Update a Confluence page with version increment.
Raises httpx.HTTPStatusError on 409 (version conflict) or other errors.
"""
url = f"{auth.base_url}/rest/api/content/{page_id}"
payload = {
"version": {"number": version},
"title": title,
"type": "page",
"body": {
"storage": {
"value": body_html,
"representation": "storage",
}
},
}
async with httpx.AsyncClient(timeout=15.0) as client:
resp = await client.put(url, json=payload, auth=auth.httpx_auth)
resp.raise_for_status()
data = resp.json()
return PageData(
page_id=str(data["id"]),
title=data["title"],
body_html=data["body"]["storage"]["value"],
version=data["version"]["number"],
)

View File

@@ -1,159 +0,0 @@
"""Section-level CRUD operations with 409 conflict retry."""
from __future__ import annotations
import asyncio
from dataclasses import dataclass
import httpx
from confluence_collab.client import Auth, get_page, put_page
from confluence_collab.parser import Section, parse_sections, find_section, replace_section_content
@dataclass
class Result:
"""Result of a section operation."""
ok: bool
message: str
version: int = 0
retries: int = 0
async def section_list(page_id: str, auth: Auth) -> list[Section]:
"""List all sections (headings) on a page."""
page = await get_page(page_id, auth)
return parse_sections(page.body_html)
async def section_get(page_id: str, heading: str, auth: Auth) -> str | None:
"""Get the HTML content of a specific section."""
page = await get_page(page_id, auth)
sections = parse_sections(page.body_html)
section = find_section(sections, heading)
if section is None:
return None
return section.content_html
async def section_update(
page_id: str,
heading: str,
new_content: str,
auth: Auth,
*,
max_retries: int = 3,
) -> Result:
"""Update a section's content with 409 conflict retry.
Fetches current page, parses sections, replaces target section content,
and PUTs with version+1. On 409: exponential backoff and retry.
"""
attempt = 0
while True:
page = await get_page(page_id, auth)
sections = parse_sections(page.body_html)
section = find_section(sections, heading)
if section is None:
return Result(ok=False, message=f"Section '{heading}' not found")
new_body = replace_section_content(page.body_html, section, new_content)
try:
updated = await put_page(page_id, page.title, new_body, page.version + 1, auth)
return Result(
ok=True,
message=f"Section '{heading}' updated",
version=updated.version,
retries=attempt,
)
except httpx.HTTPStatusError as exc:
if exc.response.status_code == 409 and attempt < max_retries:
attempt += 1
await asyncio.sleep(0.1 * (2 ** attempt))
else:
return Result(
ok=False,
message=f"Failed after {attempt} retries: {exc}",
retries=attempt,
)
async def section_append(
page_id: str,
heading: str,
append_content: str,
auth: Auth,
*,
max_retries: int = 3,
) -> Result:
"""Append content to the end of a section."""
attempt = 0
while True:
page = await get_page(page_id, auth)
sections = parse_sections(page.body_html)
section = find_section(sections, heading)
if section is None:
return Result(ok=False, message=f"Section '{heading}' not found")
combined = section.content_html + append_content
new_body = replace_section_content(page.body_html, section, combined)
try:
updated = await put_page(page_id, page.title, new_body, page.version + 1, auth)
return Result(
ok=True,
message=f"Content appended to '{heading}'",
version=updated.version,
retries=attempt,
)
except httpx.HTTPStatusError as exc:
if exc.response.status_code == 409 and attempt < max_retries:
attempt += 1
await asyncio.sleep(0.1 * (2 ** attempt))
else:
return Result(
ok=False,
message=f"Failed after {attempt} retries: {exc}",
retries=attempt,
)
async def section_delete(
page_id: str,
heading: str,
auth: Auth,
*,
max_retries: int = 3,
) -> Result:
"""Delete a section (heading + content) from a page."""
attempt = 0
while True:
page = await get_page(page_id, auth)
sections = parse_sections(page.body_html)
section = find_section(sections, heading)
if section is None:
return Result(ok=False, message=f"Section '{heading}' not found")
# Remove the heading tag AND its content
new_body = page.body_html[:section.start_offset] + page.body_html[section.end_offset:]
try:
updated = await put_page(page_id, page.title, new_body, page.version + 1, auth)
return Result(
ok=True,
message=f"Section '{heading}' deleted",
version=updated.version,
retries=attempt,
)
except httpx.HTTPStatusError as exc:
if exc.response.status_code == 409 and attempt < max_retries:
attempt += 1
await asyncio.sleep(0.1 * (2 ** attempt))
else:
return Result(
ok=False,
message=f"Failed after {attempt} retries: {exc}",
retries=attempt,
)

View File

@@ -1,135 +0,0 @@
"""Section parsing for Confluence storage-format HTML using BeautifulSoup."""
from __future__ import annotations
import re
from dataclasses import dataclass
from bs4 import BeautifulSoup, Tag
@dataclass
class Section:
"""A heading-delimited section of a Confluence page."""
heading: str
level: int # 1-6
content_html: str # HTML between this heading and the next same-or-higher-level heading
start_offset: int # character offset of the heading tag start in original HTML
end_offset: int # character offset where this section's content ends
def parse_sections(html: str) -> list[Section]:
"""Parse Confluence storage-format HTML into heading-delimited sections.
Each section spans from a heading tag (h1-h6) to the next heading of the
same or higher level (lower number), or end of document.
"""
soup = BeautifulSoup(html, "lxml")
body = soup.body if soup.body else soup
# Find all heading tags with their positions in the original HTML
heading_pattern = re.compile(r"^h([1-6])$")
headings: list[tuple[int, int, str, Tag]] = [] # (start, level, text, tag)
for tag in body.find_all(heading_pattern):
level = int(tag.name[1])
text = tag.get_text(strip=True)
# Find position in original HTML
tag_str = str(tag)
pos = html.find(tag_str)
if pos == -1:
# Fallback: search by heading text pattern
pattern = re.compile(
rf"<h{level}[^>]*>.*?{re.escape(text)}.*?</h{level}>",
re.IGNORECASE | re.DOTALL,
)
m = pattern.search(html)
if m:
pos = m.start()
else:
continue
headings.append((pos, level, text, tag))
if not headings:
return []
sections: list[Section] = []
for i, (pos, level, text, tag) in enumerate(headings):
tag_str = str(tag)
content_start = pos + len(tag_str)
# Find where this section ends: next heading of same or higher level
end = len(html)
for j in range(i + 1, len(headings)):
next_pos, next_level, _, _ = headings[j]
if next_level <= level:
end = next_pos
break
else:
# No same-or-higher-level heading found; check if there's any next heading
if i + 1 < len(headings):
# Next heading is lower level (subsection) — find end after all subsections
for j in range(i + 1, len(headings)):
next_pos, next_level, _, _ = headings[j]
if next_level <= level:
end = next_pos
break
content_html = html[content_start:end]
sections.append(Section(
heading=text,
level=level,
content_html=content_html,
start_offset=pos,
end_offset=end,
))
return sections
def find_section(
sections: list[Section],
heading: str,
*,
fuzzy: bool = True,
) -> Section | None:
"""Find a section by heading text.
If fuzzy=True, matches case-insensitively and strips whitespace.
"""
needle = heading.strip().lower() if fuzzy else heading
for sec in sections:
target = sec.heading.strip().lower() if fuzzy else sec.heading
if target == needle:
return sec
# Partial match as fallback
if fuzzy:
for sec in sections:
if needle in sec.heading.strip().lower():
return sec
return None
def replace_section_content(html: str, section: Section, new_content: str) -> str:
"""Replace the content of a section in the original HTML.
Preserves the heading tag itself, replaces only the content between
this heading's close tag and the next section's start.
"""
# Find the end of the heading tag
heading_pattern = re.compile(
rf"<h{section.level}[^>]*>.*?{re.escape(section.heading)}.*?</h{section.level}>",
re.IGNORECASE | re.DOTALL,
)
m = heading_pattern.search(html, section.start_offset)
if not m:
raise ValueError(f"Cannot find heading '{section.heading}' at expected offset")
content_start = m.end()
content_end = section.end_offset
return html[:content_start] + new_content + html[content_end:]

View File

@@ -1,180 +0,0 @@
"""Composite MCP server: proxies mcp-atlassian + adds section tools.
Spawns mcp-atlassian as a subprocess (stdio), proxies all its tools, and
registers the confluence_section_* tools from this package. Claude Code
sees a single MCP server with all tools under one prefix.
Usage: python -m confluence_collab.proxy
"""
from __future__ import annotations
import asyncio
import json
import logging
import os
import sys
from mcp import types
from mcp.server.lowlevel import Server
from mcp.server.stdio import stdio_server
from mcp.client.session import ClientSession
from mcp.client.stdio import StdioServerParameters, stdio_client
from confluence_collab.client import Auth
from confluence_collab.editor import (
section_list,
section_get,
section_update,
section_append,
section_delete,
)
logger = logging.getLogger("confluence-collab-proxy")
# Section tool definitions (added alongside proxied mcp-atlassian tools)
SECTION_TOOLS = [
types.Tool(
name="confluence_section_list",
description="List all sections (headings) on a Confluence page. Returns JSON array of {heading, level}.",
inputSchema={
"type": "object",
"properties": {
"page_id": {"type": "string", "description": "Confluence page ID"},
},
"required": ["page_id"],
},
),
types.Tool(
name="confluence_section_get",
description="Get the HTML content of a specific section by heading text. Uses fuzzy matching (case-insensitive, partial match).",
inputSchema={
"type": "object",
"properties": {
"page_id": {"type": "string", "description": "Confluence page ID"},
"heading": {"type": "string", "description": "Section heading text to find"},
},
"required": ["page_id", "heading"],
},
),
types.Tool(
name="confluence_section_update",
description="Update the content of a section identified by heading. Replaces only the target section, preserving the rest of the page. Handles version conflicts with automatic retry (exponential backoff).",
inputSchema={
"type": "object",
"properties": {
"page_id": {"type": "string", "description": "Confluence page ID"},
"heading": {"type": "string", "description": "Section heading text to find (fuzzy matched)"},
"body": {"type": "string", "description": "New HTML content for the section"},
},
"required": ["page_id", "heading", "body"],
},
),
types.Tool(
name="confluence_section_append",
description="Append HTML content to the end of a section. Adds content after existing section content, before the next heading.",
inputSchema={
"type": "object",
"properties": {
"page_id": {"type": "string", "description": "Confluence page ID"},
"heading": {"type": "string", "description": "Section heading text to find (fuzzy matched)"},
"body": {"type": "string", "description": "HTML content to append"},
},
"required": ["page_id", "heading", "body"],
},
),
types.Tool(
name="confluence_section_delete",
description="Delete an entire section (heading + content) from a page. Removes the heading tag and all content up to the next same-or-higher-level heading.",
inputSchema={
"type": "object",
"properties": {
"page_id": {"type": "string", "description": "Confluence page ID"},
"heading": {"type": "string", "description": "Section heading text to find (fuzzy matched)"},
},
"required": ["page_id", "heading"],
},
),
]
SECTION_TOOL_NAMES = {t.name for t in SECTION_TOOLS}
def _get_auth() -> Auth:
return Auth.from_env()
async def _handle_section_tool(name: str, arguments: dict) -> list[types.TextContent]:
"""Handle a section tool call and return MCP content."""
auth = _get_auth()
page_id = arguments["page_id"]
if name == "confluence_section_list":
sections = await section_list(page_id, auth)
text = json.dumps([{"heading": s.heading, "level": s.level} for s in sections], indent=2)
elif name == "confluence_section_get":
content = await section_get(page_id, arguments["heading"], auth)
text = content if content is not None else f"Section '{arguments['heading']}' not found on page {page_id}"
elif name == "confluence_section_update":
result = await section_update(page_id, arguments["heading"], arguments["body"], auth)
text = json.dumps({"status": "ok" if result.ok else "error", "message": result.message, "version": result.version, "retries": result.retries})
elif name == "confluence_section_append":
result = await section_append(page_id, arguments["heading"], arguments["body"], auth)
text = json.dumps({"status": "ok" if result.ok else "error", "message": result.message, "version": result.version})
elif name == "confluence_section_delete":
result = await section_delete(page_id, arguments["heading"], auth)
text = json.dumps({"status": "ok" if result.ok else "error", "message": result.message, "version": result.version})
else:
text = f"Unknown section tool: {name}"
return [types.TextContent(type="text", text=text)]
async def run_proxy():
"""Run the composite MCP server with upstream proxy + section tools."""
server = Server("atlassian")
# Connect to upstream mcp-atlassian
cmd = "uvx"
args = ["--python", "3.13", "mcp-atlassian"]
server_params = StdioServerParameters(command=cmd, args=args, env=dict(os.environ))
async with stdio_client(server_params) as (upstream_read, upstream_write):
async with ClientSession(upstream_read, upstream_write) as upstream:
await upstream.initialize()
# Discover upstream tools
upstream_tools_result = await upstream.list_tools()
upstream_tools = upstream_tools_result.tools
logger.info("Proxying %d upstream tools + %d section tools", len(upstream_tools), len(SECTION_TOOLS))
# Combined tool list
all_tools = list(upstream_tools) + SECTION_TOOLS
@server.list_tools()
async def handle_list_tools() -> list[types.Tool]:
return all_tools
@server.call_tool()
async def handle_call_tool(name: str, arguments: dict | None) -> list[types.TextContent]:
arguments = arguments or {}
# Section tools handled locally
if name in SECTION_TOOL_NAMES:
return await _handle_section_tool(name, arguments)
# Proxy to upstream mcp-atlassian
result = await upstream.call_tool(name, arguments)
return result.content
# Run our server on stdio
async with stdio_server() as (read, write):
await server.run(read, write, server.create_initialization_options())
def main():
asyncio.run(run_proxy())
if __name__ == "__main__":
main()

View File

@@ -1,113 +0,0 @@
"""MCP server exposing section-based Confluence editing tools."""
from __future__ import annotations
import json
from mcp.server.fastmcp import FastMCP
from confluence_collab.client import Auth
from confluence_collab.editor import section_list, section_get, section_update, section_append, section_delete
mcp = FastMCP("confluence-collab")
def _get_auth() -> Auth:
return Auth.from_env()
@mcp.tool()
async def confluence_section_list(page_id: str) -> str:
"""List all sections (headings) on a Confluence page.
Returns a JSON array of {heading, level} objects showing the page structure.
"""
auth = _get_auth()
sections = await section_list(page_id, auth)
return json.dumps(
[{"heading": s.heading, "level": s.level} for s in sections],
indent=2,
)
@mcp.tool()
async def confluence_section_get(page_id: str, heading: str) -> str:
"""Get the HTML content of a specific section by heading text.
Uses fuzzy matching (case-insensitive, partial match).
"""
auth = _get_auth()
content = await section_get(page_id, heading, auth)
if content is None:
return f"Section '{heading}' not found on page {page_id}"
return content
@mcp.tool()
async def confluence_section_update(page_id: str, heading: str, body: str) -> str:
"""Update the content of a section identified by heading.
Replaces only the target section, preserving the rest of the page.
Handles version conflicts with automatic retry (exponential backoff).
Args:
page_id: Confluence page ID
heading: Section heading text to find (fuzzy matched)
body: New HTML content for the section (replaces existing content between headings)
"""
auth = _get_auth()
result = await section_update(page_id, heading, body, auth)
return json.dumps({
"status": "ok" if result.ok else "error",
"message": result.message,
"version": result.version,
"retries": result.retries,
})
@mcp.tool()
async def confluence_section_append(page_id: str, heading: str, body: str) -> str:
"""Append HTML content to the end of a section.
Adds content after the existing section content, before the next heading.
Args:
page_id: Confluence page ID
heading: Section heading text to find (fuzzy matched)
body: HTML content to append
"""
auth = _get_auth()
result = await section_append(page_id, heading, body, auth)
return json.dumps({
"status": "ok" if result.ok else "error",
"message": result.message,
"version": result.version,
})
@mcp.tool()
async def confluence_section_delete(page_id: str, heading: str) -> str:
"""Delete an entire section (heading + content) from a page.
Removes the heading tag and all content up to the next same-or-higher-level heading.
Args:
page_id: Confluence page ID
heading: Section heading text to find (fuzzy matched)
"""
auth = _get_auth()
result = await section_delete(page_id, heading, auth)
return json.dumps({
"status": "ok" if result.ok else "error",
"message": result.message,
"version": result.version,
})
def main():
"""Run the MCP server."""
mcp.run(transport="stdio")
if __name__ == "__main__":
main()