feat: add pipeline engine with approval flow and file triggers

Sequential step executor (script, claude_prompt, approval, api_call,
template, skyvern placeholder), reaction-based approvals, file upload
trigger matching, portal API state sync.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-03-18 17:06:07 +02:00
parent f4feb3bfe1
commit bd8d96335e
12 changed files with 755 additions and 1 deletions

120
bot.py
View File

@@ -1180,9 +1180,16 @@ class Bot:
api_key=BOT_API_KEY,
matrix_client=self.client,
send_text_fn=self._send_text,
llm_client=self.llm,
default_model=DEFAULT_MODEL,
escalation_model=ESCALATION_MODEL,
)
else:
self.cron_scheduler = None
# Pipeline approval tracking: event_id -> execution_id
self._pipeline_approval_events: dict[str, str] = {}
if self.cron_scheduler:
self.cron_scheduler.pipeline_engine.on_approval_registered = self._on_pipeline_approval_registered
async def _has_documents(self, matrix_user_id: str) -> bool:
"""Check if user has documents via local RAG or MatrixHost portal API.
@@ -1261,6 +1268,7 @@ class Bot:
self.client.add_event_callback(self.on_file_message, RoomMessageFile)
self.client.add_event_callback(self.on_encrypted_file_message, RoomEncryptedFile)
self.client.add_event_callback(self.on_room_unknown, RoomMessageUnknown)
self.client.add_event_callback(self.on_reaction, UnknownEvent)
self.client.add_response_callback(self.on_sync, SyncResponse)
self.client.add_to_device_callback(self.on_key_verification, KeyVerificationStart)
self.client.add_to_device_callback(self.on_key_verification, KeyVerificationKey)
@@ -1456,6 +1464,57 @@ class Bot:
self.client.verify_device(device)
logger.info("Auto-trusted device %s of %s", device.device_id, user_id)
async def on_reaction(self, room, event: UnknownEvent):
"""Handle reaction events for pipeline approval flow."""
if event.sender == BOT_USER:
return
if not self._sync_token_received:
return
source = event.source or {}
if source.get("type") != "m.reaction":
return
content = source.get("content", {})
relates_to = content.get("m.relates_to", {})
if relates_to.get("rel_type") != "m.annotation":
return
event_id = relates_to.get("event_id", "")
reaction_key = relates_to.get("key", "")
# Check if this reaction is for a pipeline approval
if not self.cron_scheduler:
return
from pipelines.approval import reaction_to_response
response = reaction_to_response(reaction_key)
if not response:
return
# Look up execution by approval event ID
execution_id = self._pipeline_approval_events.get(event_id)
if execution_id:
resolved = self.cron_scheduler.pipeline_engine.resolve_approval(execution_id, response)
if resolved:
self._pipeline_approval_events.pop(event_id, None)
logger.info("Pipeline approval resolved: %s -> %s", execution_id, response)
return
# If not in local cache, check pending approvals from portal
try:
pending = await self.cron_scheduler._pipeline_state.fetch_pending_approvals()
for execution in pending:
if execution.get("approvalMsgId") == event_id:
eid = execution["id"]
self._pipeline_approval_events[event_id] = eid
resolved = self.cron_scheduler.pipeline_engine.resolve_approval(eid, response)
if resolved:
self._pipeline_approval_events.pop(event_id, None)
logger.info("Pipeline approval resolved (from portal): %s -> %s", eid, response)
break
except Exception:
logger.debug("Failed to check pending approvals for reaction", exc_info=True)
async def on_unknown(self, room, event: UnknownEvent):
"""Handle call member state events and in-room verification."""
# Route verification events
@@ -2185,6 +2244,63 @@ class Bot:
".txt", ".md", ".csv", ".json", ".xml", ".html", ".yaml", ".yml", ".log",
})
def _on_pipeline_approval_registered(self, event_id: str, execution_id: str):
"""Track approval event -> execution mapping for reaction handling."""
self._pipeline_approval_events[event_id] = execution_id
async def _check_pipeline_file_trigger(self, room, event, filename: str, mime_type: str):
"""Check if an uploaded file matches any pipeline file_upload trigger."""
pipelines = self.cron_scheduler.get_file_upload_pipelines()
for pipeline in pipelines:
target_room = pipeline.get("targetRoom", "")
# Pipeline must target this room
if target_room != room.room_id:
continue
# Check mime type match
required_mime = pipeline.get("fileMimetype", "")
if required_mime and not mime_type.startswith(required_mime):
continue
# Check filename pattern
pattern = pipeline.get("filePattern", "")
if pattern:
try:
if not re.match(pattern, filename):
continue
except re.error:
continue
# Download file content for trigger data
mxc_url = event.url if hasattr(event, "url") else None
file_text = ""
if mxc_url:
try:
resp = await self.client.download(mxc=mxc_url)
if hasattr(resp, "body"):
file_bytes = resp.body
ext = os.path.splitext(filename.lower())[1]
if ext == ".pdf":
file_text = self._extract_pdf_text(file_bytes)
elif ext == ".docx":
file_text = self._extract_docx_text(file_bytes)
else:
file_text = self._extract_text_file(file_bytes)
if file_text and len(file_text) > 50000:
file_text = file_text[:50000]
except Exception:
logger.debug("Failed to extract file for pipeline trigger", exc_info=True)
trigger_data = {
"filename": filename,
"mime_type": mime_type,
"file_content": file_text,
"sender": event.sender,
"room_id": room.room_id,
}
logger.info("File upload triggered pipeline: %s (file: %s)", pipeline["name"], filename)
asyncio.create_task(self.cron_scheduler.pipeline_engine.run(pipeline, trigger_data))
async def on_file_message(self, room, event: RoomMessageFile):
"""Handle file messages: extract text from PDFs, docx, and text files."""
if event.sender == BOT_USER:
@@ -2207,6 +2323,10 @@ class Bot:
is_docx = mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" or ext == ".docx"
is_text = ext in self._TEXT_EXTENSIONS or mime_type.startswith("text/")
# Check for pipeline file_upload triggers (before DM/mention check)
if self.cron_scheduler:
await self._check_pipeline_file_trigger(room, event, filename, mime_type)
if not (is_pdf or is_docx or is_text):
return