feat(CF-2411): Pipeline hardening — Sentry, retry, concurrent limits, audit log
- Sentry transactions wrapping pipeline execution with tags - Retry with exponential backoff for transient failures (connect, timeout, 5xx) - Concurrent execution limit (3/user) enforced in scheduler - Audit log events fired at each pipeline lifecycle point - Resume support: skip already-completed steps on restart Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -13,6 +13,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
SYNC_INTERVAL = 300 # 5 minutes — full job reconciliation
|
||||
PENDING_CHECK_INTERVAL = 15 # 15 seconds — fast check for manual triggers
|
||||
MAX_CONCURRENT_PER_USER = 3 # CF-2411: prevent runaway pipelines
|
||||
|
||||
|
||||
class CronScheduler:
|
||||
@@ -145,6 +146,16 @@ class CronScheduler:
|
||||
pipelines = await self._pipeline_state.fetch_active_pipelines()
|
||||
for pipeline in pipelines:
|
||||
if pipeline.get("lastStatus") == "pending":
|
||||
# CF-2411: concurrent limit check
|
||||
user_id = pipeline.get("userId", "")
|
||||
if user_id:
|
||||
active = await self._pipeline_state.count_active_executions(user_id)
|
||||
if active >= MAX_CONCURRENT_PER_USER:
|
||||
logger.warning(
|
||||
"Pipeline %s skipped: user %s has %d active executions (limit %d)",
|
||||
pipeline["name"], user_id, active, MAX_CONCURRENT_PER_USER,
|
||||
)
|
||||
continue
|
||||
logger.info("Pending pipeline trigger: %s", pipeline["name"])
|
||||
asyncio.create_task(self.pipeline_engine.run(pipeline))
|
||||
|
||||
@@ -155,6 +166,16 @@ class CronScheduler:
|
||||
sleep_secs = self._seconds_until_next_run(pipeline)
|
||||
if sleep_secs > 0:
|
||||
await asyncio.sleep(sleep_secs)
|
||||
# CF-2411: concurrent limit check
|
||||
user_id = pipeline.get("userId", "")
|
||||
if user_id:
|
||||
active = await self._pipeline_state.count_active_executions(user_id)
|
||||
if active >= MAX_CONCURRENT_PER_USER:
|
||||
logger.warning(
|
||||
"Pipeline %s cron skipped: user %s at limit (%d/%d)",
|
||||
pipeline["name"], user_id, active, MAX_CONCURRENT_PER_USER,
|
||||
)
|
||||
continue
|
||||
await self.pipeline_engine.run(pipeline)
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user