feat(CF-580): Implement transcript-based session recovery for MCP
Add direct transcript ingestion and orphan recovery using Claude Code's JSONL transcripts instead of relying on daemon-based note synchronization. Changes: 1. **Database migration** (027_session_transcript_storage.sql): - Add transcript_jsonl, transcript_ingested_at, transcript_file_path columns - Add indexes for efficient ingestion tracking 2. **Transcript parser utility** (src/utils/transcript-parser.ts): - parseTranscriptFile(): Parse JSONL line-by-line, handle corrupt lines - encodeWorkingDir(): Convert paths to Claude Code directory encoding - findOrphanedTranscripts(): Scan for stale transcript files - ingestTranscriptToDatabase(): Main ingestion function for Node.js 3. **Orphan recovery enhancement** (src/tools/sessions.ts): - sessionRecoverOrphaned() now tries transcript ingestion first - Finds most recently modified JSONL in project directory - Falls back to legacy notes.md recovery for backward compatibility - Properly handles path encoding (/ and . → -) Benefits: - No daemon needed for recovery (Phase 2 will remove LaunchAgent) - Full transcript audit trail stored in database - Immediate recovery capability for orphaned sessions - Cleaner architecture (no markdown parsing complexity) - Compatible with Claude Code's UUID-based session files Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
24
migrations/027_session_transcript_storage.sql
Normal file
24
migrations/027_session_transcript_storage.sql
Normal file
@@ -0,0 +1,24 @@
|
||||
-- Migration 027: Add transcript storage for session recovery
|
||||
-- Purpose: Store full JSONL transcripts from Claude Code for session recovery (CF-580)
|
||||
-- Context: Replace daemon-based notes syncing with direct transcript ingestion
|
||||
|
||||
-- Add columns to sessions table for transcript storage
|
||||
ALTER TABLE sessions
|
||||
ADD COLUMN IF NOT EXISTS transcript_jsonl TEXT,
|
||||
ADD COLUMN IF NOT EXISTS transcript_ingested_at TIMESTAMPTZ,
|
||||
ADD COLUMN IF NOT EXISTS transcript_file_path TEXT;
|
||||
|
||||
-- Index for efficient querying of ingested sessions
|
||||
CREATE INDEX IF NOT EXISTS idx_sessions_transcript_ingested
|
||||
ON sessions(transcript_ingested_at)
|
||||
WHERE transcript_ingested_at IS NOT NULL;
|
||||
|
||||
-- Index for finding sessions by transcript file path (for recovery)
|
||||
CREATE INDEX IF NOT EXISTS idx_sessions_transcript_file_path
|
||||
ON sessions(transcript_file_path)
|
||||
WHERE transcript_file_path IS NOT NULL;
|
||||
|
||||
-- Comments for documentation
|
||||
COMMENT ON COLUMN sessions.transcript_jsonl IS 'Full JSONL transcript from Claude Code for complete session audit trail';
|
||||
COMMENT ON COLUMN sessions.transcript_ingested_at IS 'Timestamp when transcript was ingested into database';
|
||||
COMMENT ON COLUMN sessions.transcript_file_path IS 'Path to source JSONL file for debugging and recovery';
|
||||
@@ -488,8 +488,55 @@ export async function sessionRecoverOrphaned(args: { project?: string }): Promis
|
||||
`✓ Session ${session.project} #${session.session_number} marked as abandoned`
|
||||
);
|
||||
|
||||
// Attempt to recover notes from temp file
|
||||
// Attempt to recover transcript first (CF-580)
|
||||
let transcriptRecovered = false;
|
||||
if (session.working_directory) {
|
||||
// Construct projects path: ~/.claude/projects/{encoded-dir}/
|
||||
// Encoding: / and . → - (Claude Code removes dots from usernames)
|
||||
const home = process.env.HOME || '';
|
||||
const encodedDir = session.working_directory.replace(/[/\.]/g, '-');
|
||||
const projectsDir = `${home}/.claude/projects/${encodedDir}`;
|
||||
|
||||
try {
|
||||
const fs = await import('fs');
|
||||
const path = await import('path');
|
||||
|
||||
if (fs.default.existsSync(projectsDir)) {
|
||||
// Find the most recently modified JSONL file
|
||||
const files = fs.default.readdirSync(projectsDir);
|
||||
const jsonlFiles = files
|
||||
.filter(f => f.endsWith('.jsonl'))
|
||||
.map(f => ({
|
||||
name: f,
|
||||
path: `${projectsDir}/${f}`,
|
||||
mtime: fs.default.statSync(`${projectsDir}/${f}`).mtimeMs,
|
||||
}))
|
||||
.sort((a, b) => b.mtime - a.mtime);
|
||||
|
||||
if (jsonlFiles.length > 0) {
|
||||
const latestFile = jsonlFiles[0];
|
||||
const transcriptContent = fs.default.readFileSync(latestFile.path, 'utf-8');
|
||||
const lineCount = transcriptContent.split('\n').filter(l => l.trim()).length;
|
||||
|
||||
// Update session with transcript
|
||||
await execute(
|
||||
`UPDATE sessions
|
||||
SET transcript_jsonl = $1, transcript_ingested_at = NOW(), transcript_file_path = $2, updated_at = NOW()
|
||||
WHERE id = $3`,
|
||||
[transcriptContent, latestFile.path, session.id]
|
||||
);
|
||||
|
||||
results.push(` → Recovered transcript (${lineCount} lines)`);
|
||||
transcriptRecovered = true;
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
// Silently skip transcript recovery errors
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: Attempt to recover notes from temp file if transcript not recovered
|
||||
if (!transcriptRecovered && session.working_directory) {
|
||||
const tempFilePath = `${session.working_directory}/.claude-session/${session.id}/notes.md`;
|
||||
|
||||
try {
|
||||
@@ -497,7 +544,7 @@ export async function sessionRecoverOrphaned(args: { project?: string }): Promis
|
||||
const recovered = await recoverNotesFromTempFile(session.id, tempFilePath, 'recovered');
|
||||
|
||||
if (recovered > 0) {
|
||||
results.push(` → Recovered ${recovered} note(s) from temp file`);
|
||||
results.push(` → Recovered ${recovered} note(s) from temp file (legacy)`);
|
||||
totalNotesRecovered += recovered;
|
||||
}
|
||||
} catch (err) {
|
||||
|
||||
217
src/utils/transcript-parser.ts
Normal file
217
src/utils/transcript-parser.ts
Normal file
@@ -0,0 +1,217 @@
|
||||
/**
|
||||
* Transcript Parser for CF-580
|
||||
*
|
||||
* Parses JSONL transcripts from Claude Code and ingests them into the database.
|
||||
* Replaces daemon-based note syncing with direct transcript ingestion.
|
||||
*
|
||||
* Key functions:
|
||||
* - parseTranscriptFile() - Parse JSONL, extract metadata and messages
|
||||
* - ingestTranscriptToDatabase() - Main ingestion function
|
||||
* - getTranscriptPath() - Construct path from session ID + working dir
|
||||
*/
|
||||
|
||||
import * as fs from 'fs';
|
||||
import * as readline from 'readline';
|
||||
import { execSync } from 'child_process';
|
||||
|
||||
interface ParsedTranscript {
|
||||
messageCount: number;
|
||||
initialPrompt: string;
|
||||
summary: string;
|
||||
fullJsonl: string;
|
||||
toolsUsed: Set<string>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse JSONL transcript file line by line
|
||||
* Handles corrupt lines gracefully
|
||||
*/
|
||||
export async function parseTranscriptFile(filePath: string): Promise<ParsedTranscript> {
|
||||
const result: ParsedTranscript = {
|
||||
messageCount: 0,
|
||||
initialPrompt: '',
|
||||
summary: '',
|
||||
fullJsonl: '',
|
||||
toolsUsed: new Set(),
|
||||
};
|
||||
|
||||
if (!fs.existsSync(filePath)) {
|
||||
throw new Error(`Transcript file not found: ${filePath}`);
|
||||
}
|
||||
|
||||
const fileContent = fs.readFileSync(filePath, 'utf-8');
|
||||
result.fullJsonl = fileContent;
|
||||
|
||||
const lines = fileContent.split('\n').filter(line => line.trim());
|
||||
|
||||
for (const line of lines) {
|
||||
try {
|
||||
const entry = JSON.parse(line);
|
||||
|
||||
// Count messages
|
||||
if (entry.type === 'user' || entry.type === 'assistant') {
|
||||
result.messageCount++;
|
||||
}
|
||||
|
||||
// Extract initial prompt from first user message
|
||||
if (entry.type === 'user' && result.initialPrompt === '') {
|
||||
result.initialPrompt = entry.content || '';
|
||||
}
|
||||
|
||||
// Extract summary if provided
|
||||
if (entry.type === 'summary' && entry.summary) {
|
||||
result.summary = entry.summary;
|
||||
}
|
||||
|
||||
// Track tools used
|
||||
if (entry.toolUse) {
|
||||
result.toolsUsed.add(entry.toolUse);
|
||||
}
|
||||
if (entry.toolName) {
|
||||
result.toolsUsed.add(entry.toolName);
|
||||
}
|
||||
} catch (error) {
|
||||
// Skip malformed JSON lines gracefully
|
||||
console.warn(`Skipping malformed JSONL line at offset ${lines.indexOf(line)}`);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert working directory path to encoded form
|
||||
* Example: /Users/christian.gick/Development/Infrastructure/ClaudeFramework
|
||||
* → -Users-christian-gick-Development-Infrastructure-ClaudeFramework
|
||||
* Note: Both / and . are replaced with - (Claude Code removes dots from usernames)
|
||||
*/
|
||||
export function encodeWorkingDir(workingDir: string): string {
|
||||
return workingDir.replace(/[/\.]/g, '-');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get transcript file path from session ID and working directory
|
||||
* Path: ~/.claude/projects/{encoded-dir}/{session-id}.jsonl
|
||||
*/
|
||||
export function getTranscriptPath(sessionId: string, workingDir: string): string {
|
||||
const home = process.env.HOME || '';
|
||||
const encodedDir = encodeWorkingDir(workingDir);
|
||||
return `${home}/.claude/projects/${encodedDir}/${sessionId}.jsonl`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ingest transcript to database
|
||||
* Called from session-end script
|
||||
*/
|
||||
export async function ingestTranscriptToDatabase(
|
||||
sessionId: string,
|
||||
transcriptPath: string,
|
||||
pgPassword: string,
|
||||
pgHost: string = 'infra',
|
||||
pgUser: string = 'agiliton',
|
||||
pgDb: string = 'agiliton'
|
||||
): Promise<{ success: boolean; messageCount: number; error?: string }> {
|
||||
try {
|
||||
// Validate file exists
|
||||
if (!fs.existsSync(transcriptPath)) {
|
||||
return {
|
||||
success: false,
|
||||
messageCount: 0,
|
||||
error: `Transcript file not found: ${transcriptPath}`,
|
||||
};
|
||||
}
|
||||
|
||||
// Parse transcript
|
||||
const parsed = await parseTranscriptFile(transcriptPath);
|
||||
|
||||
// Escape single quotes for SQL
|
||||
const jsonlEscaped = parsed.fullJsonl.replace(/'/g, "''");
|
||||
const pathEscaped = transcriptPath.replace(/'/g, "''");
|
||||
|
||||
// Build SQL query
|
||||
const toolsArray = Array.from(parsed.toolsUsed);
|
||||
const toolsSql = toolsArray.length > 0 ? `ARRAY[${toolsArray.map(t => `'${t.replace(/'/g, "''")}'`).join(',')}]` : 'NULL';
|
||||
|
||||
const updateSql = `
|
||||
UPDATE sessions SET
|
||||
transcript_jsonl = '${jsonlEscaped}',
|
||||
transcript_ingested_at = NOW(),
|
||||
transcript_file_path = '${pathEscaped}',
|
||||
message_count = ${parsed.messageCount},
|
||||
tools_used = ${toolsSql},
|
||||
updated_at = NOW()
|
||||
WHERE id = '${sessionId.replace(/'/g, "''")}'
|
||||
RETURNING TRUE;
|
||||
`;
|
||||
|
||||
// Execute update
|
||||
const result = execSync(`PGPASSWORD="${pgPassword}" psql -h ${pgHost} -U ${pgUser} -d ${pgDb} -t -c "${updateSql.replace(/"/g, '\\"')}"`, {
|
||||
encoding: 'utf-8',
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
}).trim();
|
||||
|
||||
if (result === 't' || result === 'true') {
|
||||
return {
|
||||
success: true,
|
||||
messageCount: parsed.messageCount,
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
success: false,
|
||||
messageCount: 0,
|
||||
error: 'Failed to update session in database',
|
||||
};
|
||||
}
|
||||
} catch (error) {
|
||||
const errorMsg = error instanceof Error ? error.message : String(error);
|
||||
return {
|
||||
success: false,
|
||||
messageCount: 0,
|
||||
error: `Ingestion failed: ${errorMsg}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Find orphaned transcript files that haven't been ingested
|
||||
* Scans ~/.claude/projects/ for .jsonl files with stale modification times
|
||||
*/
|
||||
export function findOrphanedTranscripts(maxAgeHours: number = 2): string[] {
|
||||
const home = process.env.HOME || '';
|
||||
const projectsDir = `${home}/.claude/projects`;
|
||||
|
||||
if (!fs.existsSync(projectsDir)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const orphans: string[] = [];
|
||||
const maxAgeMs = maxAgeHours * 60 * 60 * 1000;
|
||||
const now = Date.now();
|
||||
|
||||
try {
|
||||
const entries = fs.readdirSync(projectsDir, { withFileTypes: true });
|
||||
|
||||
for (const entry of entries) {
|
||||
if (entry.isDirectory()) {
|
||||
const dir = `${projectsDir}/${entry.name}`;
|
||||
const files = fs.readdirSync(dir);
|
||||
|
||||
for (const file of files) {
|
||||
if (file.endsWith('.jsonl')) {
|
||||
const filePath = `${dir}/${file}`;
|
||||
const stats = fs.statSync(filePath);
|
||||
const age = now - stats.mtimeMs;
|
||||
|
||||
if (age > maxAgeMs) {
|
||||
orphans.push(filePath);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error scanning for orphaned transcripts: ${error}`);
|
||||
}
|
||||
|
||||
return orphans;
|
||||
}
|
||||
Reference in New Issue
Block a user