From e04a8ab524dcf7475e68a7c5c670af1c23fc7348 Mon Sep 17 00:00:00 2001 From: Christian Gick Date: Thu, 29 Jan 2026 17:53:37 +0200 Subject: [PATCH] feat(CF-580): Implement transcript-based session recovery for MCP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add direct transcript ingestion and orphan recovery using Claude Code's JSONL transcripts instead of relying on daemon-based note synchronization. Changes: 1. **Database migration** (027_session_transcript_storage.sql): - Add transcript_jsonl, transcript_ingested_at, transcript_file_path columns - Add indexes for efficient ingestion tracking 2. **Transcript parser utility** (src/utils/transcript-parser.ts): - parseTranscriptFile(): Parse JSONL line-by-line, handle corrupt lines - encodeWorkingDir(): Convert paths to Claude Code directory encoding - findOrphanedTranscripts(): Scan for stale transcript files - ingestTranscriptToDatabase(): Main ingestion function for Node.js 3. **Orphan recovery enhancement** (src/tools/sessions.ts): - sessionRecoverOrphaned() now tries transcript ingestion first - Finds most recently modified JSONL in project directory - Falls back to legacy notes.md recovery for backward compatibility - Properly handles path encoding (/ and . → -) Benefits: - No daemon needed for recovery (Phase 2 will remove LaunchAgent) - Full transcript audit trail stored in database - Immediate recovery capability for orphaned sessions - Cleaner architecture (no markdown parsing complexity) - Compatible with Claude Code's UUID-based session files Co-Authored-By: Claude Haiku 4.5 --- migrations/027_session_transcript_storage.sql | 24 ++ src/tools/sessions.ts | 51 +++- src/utils/transcript-parser.ts | 217 ++++++++++++++++++ 3 files changed, 290 insertions(+), 2 deletions(-) create mode 100644 migrations/027_session_transcript_storage.sql create mode 100644 src/utils/transcript-parser.ts diff --git a/migrations/027_session_transcript_storage.sql b/migrations/027_session_transcript_storage.sql new file mode 100644 index 0000000..04a3a59 --- /dev/null +++ b/migrations/027_session_transcript_storage.sql @@ -0,0 +1,24 @@ +-- Migration 027: Add transcript storage for session recovery +-- Purpose: Store full JSONL transcripts from Claude Code for session recovery (CF-580) +-- Context: Replace daemon-based notes syncing with direct transcript ingestion + +-- Add columns to sessions table for transcript storage +ALTER TABLE sessions + ADD COLUMN IF NOT EXISTS transcript_jsonl TEXT, + ADD COLUMN IF NOT EXISTS transcript_ingested_at TIMESTAMPTZ, + ADD COLUMN IF NOT EXISTS transcript_file_path TEXT; + +-- Index for efficient querying of ingested sessions +CREATE INDEX IF NOT EXISTS idx_sessions_transcript_ingested + ON sessions(transcript_ingested_at) + WHERE transcript_ingested_at IS NOT NULL; + +-- Index for finding sessions by transcript file path (for recovery) +CREATE INDEX IF NOT EXISTS idx_sessions_transcript_file_path + ON sessions(transcript_file_path) + WHERE transcript_file_path IS NOT NULL; + +-- Comments for documentation +COMMENT ON COLUMN sessions.transcript_jsonl IS 'Full JSONL transcript from Claude Code for complete session audit trail'; +COMMENT ON COLUMN sessions.transcript_ingested_at IS 'Timestamp when transcript was ingested into database'; +COMMENT ON COLUMN sessions.transcript_file_path IS 'Path to source JSONL file for debugging and recovery'; diff --git a/src/tools/sessions.ts b/src/tools/sessions.ts index c0facc9..ab59eaa 100644 --- a/src/tools/sessions.ts +++ b/src/tools/sessions.ts @@ -488,8 +488,55 @@ export async function sessionRecoverOrphaned(args: { project?: string }): Promis `✓ Session ${session.project} #${session.session_number} marked as abandoned` ); - // Attempt to recover notes from temp file + // Attempt to recover transcript first (CF-580) + let transcriptRecovered = false; if (session.working_directory) { + // Construct projects path: ~/.claude/projects/{encoded-dir}/ + // Encoding: / and . → - (Claude Code removes dots from usernames) + const home = process.env.HOME || ''; + const encodedDir = session.working_directory.replace(/[/\.]/g, '-'); + const projectsDir = `${home}/.claude/projects/${encodedDir}`; + + try { + const fs = await import('fs'); + const path = await import('path'); + + if (fs.default.existsSync(projectsDir)) { + // Find the most recently modified JSONL file + const files = fs.default.readdirSync(projectsDir); + const jsonlFiles = files + .filter(f => f.endsWith('.jsonl')) + .map(f => ({ + name: f, + path: `${projectsDir}/${f}`, + mtime: fs.default.statSync(`${projectsDir}/${f}`).mtimeMs, + })) + .sort((a, b) => b.mtime - a.mtime); + + if (jsonlFiles.length > 0) { + const latestFile = jsonlFiles[0]; + const transcriptContent = fs.default.readFileSync(latestFile.path, 'utf-8'); + const lineCount = transcriptContent.split('\n').filter(l => l.trim()).length; + + // Update session with transcript + await execute( + `UPDATE sessions + SET transcript_jsonl = $1, transcript_ingested_at = NOW(), transcript_file_path = $2, updated_at = NOW() + WHERE id = $3`, + [transcriptContent, latestFile.path, session.id] + ); + + results.push(` → Recovered transcript (${lineCount} lines)`); + transcriptRecovered = true; + } + } + } catch (err) { + // Silently skip transcript recovery errors + } + } + + // Fallback: Attempt to recover notes from temp file if transcript not recovered + if (!transcriptRecovered && session.working_directory) { const tempFilePath = `${session.working_directory}/.claude-session/${session.id}/notes.md`; try { @@ -497,7 +544,7 @@ export async function sessionRecoverOrphaned(args: { project?: string }): Promis const recovered = await recoverNotesFromTempFile(session.id, tempFilePath, 'recovered'); if (recovered > 0) { - results.push(` → Recovered ${recovered} note(s) from temp file`); + results.push(` → Recovered ${recovered} note(s) from temp file (legacy)`); totalNotesRecovered += recovered; } } catch (err) { diff --git a/src/utils/transcript-parser.ts b/src/utils/transcript-parser.ts new file mode 100644 index 0000000..096ca59 --- /dev/null +++ b/src/utils/transcript-parser.ts @@ -0,0 +1,217 @@ +/** + * Transcript Parser for CF-580 + * + * Parses JSONL transcripts from Claude Code and ingests them into the database. + * Replaces daemon-based note syncing with direct transcript ingestion. + * + * Key functions: + * - parseTranscriptFile() - Parse JSONL, extract metadata and messages + * - ingestTranscriptToDatabase() - Main ingestion function + * - getTranscriptPath() - Construct path from session ID + working dir + */ + +import * as fs from 'fs'; +import * as readline from 'readline'; +import { execSync } from 'child_process'; + +interface ParsedTranscript { + messageCount: number; + initialPrompt: string; + summary: string; + fullJsonl: string; + toolsUsed: Set; +} + +/** + * Parse JSONL transcript file line by line + * Handles corrupt lines gracefully + */ +export async function parseTranscriptFile(filePath: string): Promise { + const result: ParsedTranscript = { + messageCount: 0, + initialPrompt: '', + summary: '', + fullJsonl: '', + toolsUsed: new Set(), + }; + + if (!fs.existsSync(filePath)) { + throw new Error(`Transcript file not found: ${filePath}`); + } + + const fileContent = fs.readFileSync(filePath, 'utf-8'); + result.fullJsonl = fileContent; + + const lines = fileContent.split('\n').filter(line => line.trim()); + + for (const line of lines) { + try { + const entry = JSON.parse(line); + + // Count messages + if (entry.type === 'user' || entry.type === 'assistant') { + result.messageCount++; + } + + // Extract initial prompt from first user message + if (entry.type === 'user' && result.initialPrompt === '') { + result.initialPrompt = entry.content || ''; + } + + // Extract summary if provided + if (entry.type === 'summary' && entry.summary) { + result.summary = entry.summary; + } + + // Track tools used + if (entry.toolUse) { + result.toolsUsed.add(entry.toolUse); + } + if (entry.toolName) { + result.toolsUsed.add(entry.toolName); + } + } catch (error) { + // Skip malformed JSON lines gracefully + console.warn(`Skipping malformed JSONL line at offset ${lines.indexOf(line)}`); + } + } + + return result; +} + +/** + * Convert working directory path to encoded form + * Example: /Users/christian.gick/Development/Infrastructure/ClaudeFramework + * → -Users-christian-gick-Development-Infrastructure-ClaudeFramework + * Note: Both / and . are replaced with - (Claude Code removes dots from usernames) + */ +export function encodeWorkingDir(workingDir: string): string { + return workingDir.replace(/[/\.]/g, '-'); +} + +/** + * Get transcript file path from session ID and working directory + * Path: ~/.claude/projects/{encoded-dir}/{session-id}.jsonl + */ +export function getTranscriptPath(sessionId: string, workingDir: string): string { + const home = process.env.HOME || ''; + const encodedDir = encodeWorkingDir(workingDir); + return `${home}/.claude/projects/${encodedDir}/${sessionId}.jsonl`; +} + +/** + * Ingest transcript to database + * Called from session-end script + */ +export async function ingestTranscriptToDatabase( + sessionId: string, + transcriptPath: string, + pgPassword: string, + pgHost: string = 'infra', + pgUser: string = 'agiliton', + pgDb: string = 'agiliton' +): Promise<{ success: boolean; messageCount: number; error?: string }> { + try { + // Validate file exists + if (!fs.existsSync(transcriptPath)) { + return { + success: false, + messageCount: 0, + error: `Transcript file not found: ${transcriptPath}`, + }; + } + + // Parse transcript + const parsed = await parseTranscriptFile(transcriptPath); + + // Escape single quotes for SQL + const jsonlEscaped = parsed.fullJsonl.replace(/'/g, "''"); + const pathEscaped = transcriptPath.replace(/'/g, "''"); + + // Build SQL query + const toolsArray = Array.from(parsed.toolsUsed); + const toolsSql = toolsArray.length > 0 ? `ARRAY[${toolsArray.map(t => `'${t.replace(/'/g, "''")}'`).join(',')}]` : 'NULL'; + + const updateSql = ` + UPDATE sessions SET + transcript_jsonl = '${jsonlEscaped}', + transcript_ingested_at = NOW(), + transcript_file_path = '${pathEscaped}', + message_count = ${parsed.messageCount}, + tools_used = ${toolsSql}, + updated_at = NOW() + WHERE id = '${sessionId.replace(/'/g, "''")}' + RETURNING TRUE; + `; + + // Execute update + const result = execSync(`PGPASSWORD="${pgPassword}" psql -h ${pgHost} -U ${pgUser} -d ${pgDb} -t -c "${updateSql.replace(/"/g, '\\"')}"`, { + encoding: 'utf-8', + stdio: ['pipe', 'pipe', 'pipe'], + }).trim(); + + if (result === 't' || result === 'true') { + return { + success: true, + messageCount: parsed.messageCount, + }; + } else { + return { + success: false, + messageCount: 0, + error: 'Failed to update session in database', + }; + } + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + return { + success: false, + messageCount: 0, + error: `Ingestion failed: ${errorMsg}`, + }; + } +} + +/** + * Find orphaned transcript files that haven't been ingested + * Scans ~/.claude/projects/ for .jsonl files with stale modification times + */ +export function findOrphanedTranscripts(maxAgeHours: number = 2): string[] { + const home = process.env.HOME || ''; + const projectsDir = `${home}/.claude/projects`; + + if (!fs.existsSync(projectsDir)) { + return []; + } + + const orphans: string[] = []; + const maxAgeMs = maxAgeHours * 60 * 60 * 1000; + const now = Date.now(); + + try { + const entries = fs.readdirSync(projectsDir, { withFileTypes: true }); + + for (const entry of entries) { + if (entry.isDirectory()) { + const dir = `${projectsDir}/${entry.name}`; + const files = fs.readdirSync(dir); + + for (const file of files) { + if (file.endsWith('.jsonl')) { + const filePath = `${dir}/${file}`; + const stats = fs.statSync(filePath); + const age = now - stats.mtimeMs; + + if (age > maxAgeMs) { + orphans.push(filePath); + } + } + } + } + } + } catch (error) { + console.error(`Error scanning for orphaned transcripts: ${error}`); + } + + return orphans; +}