feat(CF-580): Implement transcript-based session recovery for MCP

Add direct transcript ingestion and orphan recovery using Claude Code's JSONL
transcripts instead of relying on daemon-based note synchronization.

Changes:

1. **Database migration** (027_session_transcript_storage.sql):
   - Add transcript_jsonl, transcript_ingested_at, transcript_file_path columns
   - Add indexes for efficient ingestion tracking

2. **Transcript parser utility** (src/utils/transcript-parser.ts):
   - parseTranscriptFile(): Parse JSONL line-by-line, handle corrupt lines
   - encodeWorkingDir(): Convert paths to Claude Code directory encoding
   - findOrphanedTranscripts(): Scan for stale transcript files
   - ingestTranscriptToDatabase(): Main ingestion function for Node.js

3. **Orphan recovery enhancement** (src/tools/sessions.ts):
   - sessionRecoverOrphaned() now tries transcript ingestion first
   - Finds most recently modified JSONL in project directory
   - Falls back to legacy notes.md recovery for backward compatibility
   - Properly handles path encoding (/ and . → -)

Benefits:
- No daemon needed for recovery (Phase 2 will remove LaunchAgent)
- Full transcript audit trail stored in database
- Immediate recovery capability for orphaned sessions
- Cleaner architecture (no markdown parsing complexity)
- Compatible with Claude Code's UUID-based session files

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-01-29 17:53:37 +02:00
parent 30650cf47f
commit e04a8ab524
3 changed files with 290 additions and 2 deletions

View File

@@ -0,0 +1,24 @@
-- Migration 027: Add transcript storage for session recovery
-- Purpose: Store full JSONL transcripts from Claude Code for session recovery (CF-580)
-- Context: Replace daemon-based notes syncing with direct transcript ingestion
-- Add columns to sessions table for transcript storage
ALTER TABLE sessions
ADD COLUMN IF NOT EXISTS transcript_jsonl TEXT,
ADD COLUMN IF NOT EXISTS transcript_ingested_at TIMESTAMPTZ,
ADD COLUMN IF NOT EXISTS transcript_file_path TEXT;
-- Index for efficient querying of ingested sessions
CREATE INDEX IF NOT EXISTS idx_sessions_transcript_ingested
ON sessions(transcript_ingested_at)
WHERE transcript_ingested_at IS NOT NULL;
-- Index for finding sessions by transcript file path (for recovery)
CREATE INDEX IF NOT EXISTS idx_sessions_transcript_file_path
ON sessions(transcript_file_path)
WHERE transcript_file_path IS NOT NULL;
-- Comments for documentation
COMMENT ON COLUMN sessions.transcript_jsonl IS 'Full JSONL transcript from Claude Code for complete session audit trail';
COMMENT ON COLUMN sessions.transcript_ingested_at IS 'Timestamp when transcript was ingested into database';
COMMENT ON COLUMN sessions.transcript_file_path IS 'Path to source JSONL file for debugging and recovery';

View File

@@ -488,8 +488,55 @@ export async function sessionRecoverOrphaned(args: { project?: string }): Promis
`✓ Session ${session.project} #${session.session_number} marked as abandoned` `✓ Session ${session.project} #${session.session_number} marked as abandoned`
); );
// Attempt to recover notes from temp file // Attempt to recover transcript first (CF-580)
let transcriptRecovered = false;
if (session.working_directory) { if (session.working_directory) {
// Construct projects path: ~/.claude/projects/{encoded-dir}/
// Encoding: / and . → - (Claude Code removes dots from usernames)
const home = process.env.HOME || '';
const encodedDir = session.working_directory.replace(/[/\.]/g, '-');
const projectsDir = `${home}/.claude/projects/${encodedDir}`;
try {
const fs = await import('fs');
const path = await import('path');
if (fs.default.existsSync(projectsDir)) {
// Find the most recently modified JSONL file
const files = fs.default.readdirSync(projectsDir);
const jsonlFiles = files
.filter(f => f.endsWith('.jsonl'))
.map(f => ({
name: f,
path: `${projectsDir}/${f}`,
mtime: fs.default.statSync(`${projectsDir}/${f}`).mtimeMs,
}))
.sort((a, b) => b.mtime - a.mtime);
if (jsonlFiles.length > 0) {
const latestFile = jsonlFiles[0];
const transcriptContent = fs.default.readFileSync(latestFile.path, 'utf-8');
const lineCount = transcriptContent.split('\n').filter(l => l.trim()).length;
// Update session with transcript
await execute(
`UPDATE sessions
SET transcript_jsonl = $1, transcript_ingested_at = NOW(), transcript_file_path = $2, updated_at = NOW()
WHERE id = $3`,
[transcriptContent, latestFile.path, session.id]
);
results.push(` → Recovered transcript (${lineCount} lines)`);
transcriptRecovered = true;
}
}
} catch (err) {
// Silently skip transcript recovery errors
}
}
// Fallback: Attempt to recover notes from temp file if transcript not recovered
if (!transcriptRecovered && session.working_directory) {
const tempFilePath = `${session.working_directory}/.claude-session/${session.id}/notes.md`; const tempFilePath = `${session.working_directory}/.claude-session/${session.id}/notes.md`;
try { try {
@@ -497,7 +544,7 @@ export async function sessionRecoverOrphaned(args: { project?: string }): Promis
const recovered = await recoverNotesFromTempFile(session.id, tempFilePath, 'recovered'); const recovered = await recoverNotesFromTempFile(session.id, tempFilePath, 'recovered');
if (recovered > 0) { if (recovered > 0) {
results.push(` → Recovered ${recovered} note(s) from temp file`); results.push(` → Recovered ${recovered} note(s) from temp file (legacy)`);
totalNotesRecovered += recovered; totalNotesRecovered += recovered;
} }
} catch (err) { } catch (err) {

View File

@@ -0,0 +1,217 @@
/**
* Transcript Parser for CF-580
*
* Parses JSONL transcripts from Claude Code and ingests them into the database.
* Replaces daemon-based note syncing with direct transcript ingestion.
*
* Key functions:
* - parseTranscriptFile() - Parse JSONL, extract metadata and messages
* - ingestTranscriptToDatabase() - Main ingestion function
* - getTranscriptPath() - Construct path from session ID + working dir
*/
import * as fs from 'fs';
import * as readline from 'readline';
import { execSync } from 'child_process';
interface ParsedTranscript {
messageCount: number;
initialPrompt: string;
summary: string;
fullJsonl: string;
toolsUsed: Set<string>;
}
/**
* Parse JSONL transcript file line by line
* Handles corrupt lines gracefully
*/
export async function parseTranscriptFile(filePath: string): Promise<ParsedTranscript> {
const result: ParsedTranscript = {
messageCount: 0,
initialPrompt: '',
summary: '',
fullJsonl: '',
toolsUsed: new Set(),
};
if (!fs.existsSync(filePath)) {
throw new Error(`Transcript file not found: ${filePath}`);
}
const fileContent = fs.readFileSync(filePath, 'utf-8');
result.fullJsonl = fileContent;
const lines = fileContent.split('\n').filter(line => line.trim());
for (const line of lines) {
try {
const entry = JSON.parse(line);
// Count messages
if (entry.type === 'user' || entry.type === 'assistant') {
result.messageCount++;
}
// Extract initial prompt from first user message
if (entry.type === 'user' && result.initialPrompt === '') {
result.initialPrompt = entry.content || '';
}
// Extract summary if provided
if (entry.type === 'summary' && entry.summary) {
result.summary = entry.summary;
}
// Track tools used
if (entry.toolUse) {
result.toolsUsed.add(entry.toolUse);
}
if (entry.toolName) {
result.toolsUsed.add(entry.toolName);
}
} catch (error) {
// Skip malformed JSON lines gracefully
console.warn(`Skipping malformed JSONL line at offset ${lines.indexOf(line)}`);
}
}
return result;
}
/**
* Convert working directory path to encoded form
* Example: /Users/christian.gick/Development/Infrastructure/ClaudeFramework
* → -Users-christian-gick-Development-Infrastructure-ClaudeFramework
* Note: Both / and . are replaced with - (Claude Code removes dots from usernames)
*/
export function encodeWorkingDir(workingDir: string): string {
return workingDir.replace(/[/\.]/g, '-');
}
/**
* Get transcript file path from session ID and working directory
* Path: ~/.claude/projects/{encoded-dir}/{session-id}.jsonl
*/
export function getTranscriptPath(sessionId: string, workingDir: string): string {
const home = process.env.HOME || '';
const encodedDir = encodeWorkingDir(workingDir);
return `${home}/.claude/projects/${encodedDir}/${sessionId}.jsonl`;
}
/**
* Ingest transcript to database
* Called from session-end script
*/
export async function ingestTranscriptToDatabase(
sessionId: string,
transcriptPath: string,
pgPassword: string,
pgHost: string = 'infra',
pgUser: string = 'agiliton',
pgDb: string = 'agiliton'
): Promise<{ success: boolean; messageCount: number; error?: string }> {
try {
// Validate file exists
if (!fs.existsSync(transcriptPath)) {
return {
success: false,
messageCount: 0,
error: `Transcript file not found: ${transcriptPath}`,
};
}
// Parse transcript
const parsed = await parseTranscriptFile(transcriptPath);
// Escape single quotes for SQL
const jsonlEscaped = parsed.fullJsonl.replace(/'/g, "''");
const pathEscaped = transcriptPath.replace(/'/g, "''");
// Build SQL query
const toolsArray = Array.from(parsed.toolsUsed);
const toolsSql = toolsArray.length > 0 ? `ARRAY[${toolsArray.map(t => `'${t.replace(/'/g, "''")}'`).join(',')}]` : 'NULL';
const updateSql = `
UPDATE sessions SET
transcript_jsonl = '${jsonlEscaped}',
transcript_ingested_at = NOW(),
transcript_file_path = '${pathEscaped}',
message_count = ${parsed.messageCount},
tools_used = ${toolsSql},
updated_at = NOW()
WHERE id = '${sessionId.replace(/'/g, "''")}'
RETURNING TRUE;
`;
// Execute update
const result = execSync(`PGPASSWORD="${pgPassword}" psql -h ${pgHost} -U ${pgUser} -d ${pgDb} -t -c "${updateSql.replace(/"/g, '\\"')}"`, {
encoding: 'utf-8',
stdio: ['pipe', 'pipe', 'pipe'],
}).trim();
if (result === 't' || result === 'true') {
return {
success: true,
messageCount: parsed.messageCount,
};
} else {
return {
success: false,
messageCount: 0,
error: 'Failed to update session in database',
};
}
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error);
return {
success: false,
messageCount: 0,
error: `Ingestion failed: ${errorMsg}`,
};
}
}
/**
* Find orphaned transcript files that haven't been ingested
* Scans ~/.claude/projects/ for .jsonl files with stale modification times
*/
export function findOrphanedTranscripts(maxAgeHours: number = 2): string[] {
const home = process.env.HOME || '';
const projectsDir = `${home}/.claude/projects`;
if (!fs.existsSync(projectsDir)) {
return [];
}
const orphans: string[] = [];
const maxAgeMs = maxAgeHours * 60 * 60 * 1000;
const now = Date.now();
try {
const entries = fs.readdirSync(projectsDir, { withFileTypes: true });
for (const entry of entries) {
if (entry.isDirectory()) {
const dir = `${projectsDir}/${entry.name}`;
const files = fs.readdirSync(dir);
for (const file of files) {
if (file.endsWith('.jsonl')) {
const filePath = `${dir}/${file}`;
const stats = fs.statSync(filePath);
const age = now - stats.mtimeMs;
if (age > maxAgeMs) {
orphans.push(filePath);
}
}
}
}
}
} catch (error) {
console.error(`Error scanning for orphaned transcripts: ${error}`);
}
return orphans;
}