feat(CF-580): Implement transcript-based session recovery for MCP

Add direct transcript ingestion and orphan recovery using Claude Code's JSONL transcripts instead of relying on daemon-based note synchronization. Changes: 1. **Database migration** (027_session_transcript_storage.sql): - Add transcript_jsonl, transcript_ingested_at, transcript_file_path columns - Add indexes for efficient ingestion tracking 2. **Transcript parser utility** (src/utils/transcript-parser.ts): - parseTranscriptFile(): Parse JSONL line-by-line, handle corrupt lines - encodeWorkingDir(): Convert paths to Claude Code directory encoding - findOrphanedTranscripts(): Scan for stale transcript files - ingestTranscriptToDatabase(): Main ingestion function for Node.js 3. **Orphan recovery enhancement** (src/tools/sessions.ts): - sessionRecoverOrphaned() now tries transcript ingestion first - Finds most recently modified JSONL in project directory - Falls back to legacy notes.md recovery for backward compatibility - Properly handles path encoding (/ and . → -) Benefits: - No daemon needed for recovery (Phase 2 will remove LaunchAgent) - Full transcript audit trail stored in database - Immediate recovery capability for orphaned sessions - Cleaner architecture (no markdown parsing complexity) - Compatible with Claude Code's UUID-based session files Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2026-01-29 17:53:37 +02:00
parent 30650cf47f
commit e04a8ab524
3 changed files with 290 additions and 2 deletions
--- a/migrations/027_session_transcript_storage.sql
+++ b/migrations/027_session_transcript_storage.sql
@@ -0,0 +1,24 @@
+-- Migration 027: Add transcript storage for session recovery
+-- Purpose: Store full JSONL transcripts from Claude Code for session recovery (CF-580)
+-- Context: Replace daemon-based notes syncing with direct transcript ingestion
+
+-- Add columns to sessions table for transcript storage
+ALTER TABLE sessions
+  ADD COLUMN IF NOT EXISTS transcript_jsonl TEXT,
+  ADD COLUMN IF NOT EXISTS transcript_ingested_at TIMESTAMPTZ,
+  ADD COLUMN IF NOT EXISTS transcript_file_path TEXT;
+
+-- Index for efficient querying of ingested sessions
+CREATE INDEX IF NOT EXISTS idx_sessions_transcript_ingested
+  ON sessions(transcript_ingested_at)
+  WHERE transcript_ingested_at IS NOT NULL;
+
+-- Index for finding sessions by transcript file path (for recovery)
+CREATE INDEX IF NOT EXISTS idx_sessions_transcript_file_path
+  ON sessions(transcript_file_path)
+  WHERE transcript_file_path IS NOT NULL;
+
+-- Comments for documentation
+COMMENT ON COLUMN sessions.transcript_jsonl IS 'Full JSONL transcript from Claude Code for complete session audit trail';
+COMMENT ON COLUMN sessions.transcript_ingested_at IS 'Timestamp when transcript was ingested into database';
+COMMENT ON COLUMN sessions.transcript_file_path IS 'Path to source JSONL file for debugging and recovery';
--- a/src/tools/sessions.ts
+++ b/src/tools/sessions.ts
@@ -488,8 +488,55 @@ export async function sessionRecoverOrphaned(args: { project?: string }): Promis
        `✓ Session ${session.project} #${session.session_number} marked as abandoned`
      );

-      // Attempt to recover notes from temp file
+      // Attempt to recover transcript first (CF-580)
+      let transcriptRecovered = false;
      if (session.working_directory) {
+        // Construct projects path: ~/.claude/projects/{encoded-dir}/
+        // Encoding: / and . → - (Claude Code removes dots from usernames)
+        const home = process.env.HOME || '';
+        const encodedDir = session.working_directory.replace(/[/\.]/g, '-');
+        const projectsDir = `${home}/.claude/projects/${encodedDir}`;
+
+        try {
+          const fs = await import('fs');
+          const path = await import('path');
+
+          if (fs.default.existsSync(projectsDir)) {
+            // Find the most recently modified JSONL file
+            const files = fs.default.readdirSync(projectsDir);
+            const jsonlFiles = files
+              .filter(f => f.endsWith('.jsonl'))
+              .map(f => ({
+                name: f,
+                path: `${projectsDir}/${f}`,
+                mtime: fs.default.statSync(`${projectsDir}/${f}`).mtimeMs,
+              }))
+              .sort((a, b) => b.mtime - a.mtime);
+
+            if (jsonlFiles.length > 0) {
+              const latestFile = jsonlFiles[0];
+              const transcriptContent = fs.default.readFileSync(latestFile.path, 'utf-8');
+              const lineCount = transcriptContent.split('\n').filter(l => l.trim()).length;
+
+              // Update session with transcript
+              await execute(
+                `UPDATE sessions
+                 SET transcript_jsonl = $1, transcript_ingested_at = NOW(), transcript_file_path = $2, updated_at = NOW()
+                 WHERE id = $3`,
+                [transcriptContent, latestFile.path, session.id]
+              );
+
+              results.push(`  → Recovered transcript (${lineCount} lines)`);
+              transcriptRecovered = true;
+            }
+          }
+        } catch (err) {
+          // Silently skip transcript recovery errors
+        }
+      }
+
+      // Fallback: Attempt to recover notes from temp file if transcript not recovered
+      if (!transcriptRecovered && session.working_directory) {
        const tempFilePath = `${session.working_directory}/.claude-session/${session.id}/notes.md`;

        try {
@@ -497,7 +544,7 @@ export async function sessionRecoverOrphaned(args: { project?: string }): Promis
          const recovered = await recoverNotesFromTempFile(session.id, tempFilePath, 'recovered');

          if (recovered > 0) {
-            results.push(`  → Recovered ${recovered} note(s) from temp file`);
+            results.push(`  → Recovered ${recovered} note(s) from temp file (legacy)`);
            totalNotesRecovered += recovered;
          }
        } catch (err) {
--- a/src/utils/transcript-parser.ts
+++ b/src/utils/transcript-parser.ts
@@ -0,0 +1,217 @@
+/**
+ * Transcript Parser for CF-580
+ *
+ * Parses JSONL transcripts from Claude Code and ingests them into the database.
+ * Replaces daemon-based note syncing with direct transcript ingestion.
+ *
+ * Key functions:
+ * - parseTranscriptFile() - Parse JSONL, extract metadata and messages
+ * - ingestTranscriptToDatabase() - Main ingestion function
+ * - getTranscriptPath() - Construct path from session ID + working dir
+ */
+
+import * as fs from 'fs';
+import * as readline from 'readline';
+import { execSync } from 'child_process';
+
+interface ParsedTranscript {
+  messageCount: number;
+  initialPrompt: string;
+  summary: string;
+  fullJsonl: string;
+  toolsUsed: Set<string>;
+}
+
+/**
+ * Parse JSONL transcript file line by line
+ * Handles corrupt lines gracefully
+ */
+export async function parseTranscriptFile(filePath: string): Promise<ParsedTranscript> {
+  const result: ParsedTranscript = {
+    messageCount: 0,
+    initialPrompt: '',
+    summary: '',
+    fullJsonl: '',
+    toolsUsed: new Set(),
+  };
+
+  if (!fs.existsSync(filePath)) {
+    throw new Error(`Transcript file not found: ${filePath}`);
+  }
+
+  const fileContent = fs.readFileSync(filePath, 'utf-8');
+  result.fullJsonl = fileContent;
+
+  const lines = fileContent.split('\n').filter(line => line.trim());
+
+  for (const line of lines) {
+    try {
+      const entry = JSON.parse(line);
+
+      // Count messages
+      if (entry.type === 'user' || entry.type === 'assistant') {
+        result.messageCount++;
+      }
+
+      // Extract initial prompt from first user message
+      if (entry.type === 'user' && result.initialPrompt === '') {
+        result.initialPrompt = entry.content || '';
+      }
+
+      // Extract summary if provided
+      if (entry.type === 'summary' && entry.summary) {
+        result.summary = entry.summary;
+      }
+
+      // Track tools used
+      if (entry.toolUse) {
+        result.toolsUsed.add(entry.toolUse);
+      }
+      if (entry.toolName) {
+        result.toolsUsed.add(entry.toolName);
+      }
+    } catch (error) {
+      // Skip malformed JSON lines gracefully
+      console.warn(`Skipping malformed JSONL line at offset ${lines.indexOf(line)}`);
+    }
+  }
+
+  return result;
+}
+
+/**
+ * Convert working directory path to encoded form
+ * Example: /Users/christian.gick/Development/Infrastructure/ClaudeFramework
+ *          → -Users-christian-gick-Development-Infrastructure-ClaudeFramework
+ * Note: Both / and . are replaced with - (Claude Code removes dots from usernames)
+ */
+export function encodeWorkingDir(workingDir: string): string {
+  return workingDir.replace(/[/\.]/g, '-');
+}
+
+/**
+ * Get transcript file path from session ID and working directory
+ * Path: ~/.claude/projects/{encoded-dir}/{session-id}.jsonl
+ */
+export function getTranscriptPath(sessionId: string, workingDir: string): string {
+  const home = process.env.HOME || '';
+  const encodedDir = encodeWorkingDir(workingDir);
+  return `${home}/.claude/projects/${encodedDir}/${sessionId}.jsonl`;
+}
+
+/**
+ * Ingest transcript to database
+ * Called from session-end script
+ */
+export async function ingestTranscriptToDatabase(
+  sessionId: string,
+  transcriptPath: string,
+  pgPassword: string,
+  pgHost: string = 'infra',
+  pgUser: string = 'agiliton',
+  pgDb: string = 'agiliton'
+): Promise<{ success: boolean; messageCount: number; error?: string }> {
+  try {
+    // Validate file exists
+    if (!fs.existsSync(transcriptPath)) {
+      return {
+        success: false,
+        messageCount: 0,
+        error: `Transcript file not found: ${transcriptPath}`,
+      };
+    }
+
+    // Parse transcript
+    const parsed = await parseTranscriptFile(transcriptPath);
+
+    // Escape single quotes for SQL
+    const jsonlEscaped = parsed.fullJsonl.replace(/'/g, "''");
+    const pathEscaped = transcriptPath.replace(/'/g, "''");
+
+    // Build SQL query
+    const toolsArray = Array.from(parsed.toolsUsed);
+    const toolsSql = toolsArray.length > 0 ? `ARRAY[${toolsArray.map(t => `'${t.replace(/'/g, "''")}'`).join(',')}]` : 'NULL';
+
+    const updateSql = `
+      UPDATE sessions SET
+        transcript_jsonl = '${jsonlEscaped}',
+        transcript_ingested_at = NOW(),
+        transcript_file_path = '${pathEscaped}',
+        message_count = ${parsed.messageCount},
+        tools_used = ${toolsSql},
+        updated_at = NOW()
+      WHERE id = '${sessionId.replace(/'/g, "''")}'
+      RETURNING TRUE;
+    `;
+
+    // Execute update
+    const result = execSync(`PGPASSWORD="${pgPassword}" psql -h ${pgHost} -U ${pgUser} -d ${pgDb} -t -c "${updateSql.replace(/"/g, '\\"')}"`, {
+      encoding: 'utf-8',
+      stdio: ['pipe', 'pipe', 'pipe'],
+    }).trim();
+
+    if (result === 't' || result === 'true') {
+      return {
+        success: true,
+        messageCount: parsed.messageCount,
+      };
+    } else {
+      return {
+        success: false,
+        messageCount: 0,
+        error: 'Failed to update session in database',
+      };
+    }
+  } catch (error) {
+    const errorMsg = error instanceof Error ? error.message : String(error);
+    return {
+      success: false,
+      messageCount: 0,
+      error: `Ingestion failed: ${errorMsg}`,
+    };
+  }
+}
+
+/**
+ * Find orphaned transcript files that haven't been ingested
+ * Scans ~/.claude/projects/ for .jsonl files with stale modification times
+ */
+export function findOrphanedTranscripts(maxAgeHours: number = 2): string[] {
+  const home = process.env.HOME || '';
+  const projectsDir = `${home}/.claude/projects`;
+
+  if (!fs.existsSync(projectsDir)) {
+    return [];
+  }
+
+  const orphans: string[] = [];
+  const maxAgeMs = maxAgeHours * 60 * 60 * 1000;
+  const now = Date.now();
+
+  try {
+    const entries = fs.readdirSync(projectsDir, { withFileTypes: true });
+
+    for (const entry of entries) {
+      if (entry.isDirectory()) {
+        const dir = `${projectsDir}/${entry.name}`;
+        const files = fs.readdirSync(dir);
+
+        for (const file of files) {
+          if (file.endsWith('.jsonl')) {
+            const filePath = `${dir}/${file}`;
+            const stats = fs.statSync(filePath);
+            const age = now - stats.mtimeMs;
+
+            if (age > maxAgeMs) {
+              orphans.push(filePath);
+            }
+          }
+        }
+      }
+    }
+  } catch (error) {
+    console.error(`Error scanning for orphaned transcripts: ${error}`);
+  }
+
+  return orphans;
+}