feat(CF-580): Implement transcript-based session recovery for MCP
Add direct transcript ingestion and orphan recovery using Claude Code's JSONL transcripts instead of relying on daemon-based note synchronization. Changes: 1. **Database migration** (027_session_transcript_storage.sql): - Add transcript_jsonl, transcript_ingested_at, transcript_file_path columns - Add indexes for efficient ingestion tracking 2. **Transcript parser utility** (src/utils/transcript-parser.ts): - parseTranscriptFile(): Parse JSONL line-by-line, handle corrupt lines - encodeWorkingDir(): Convert paths to Claude Code directory encoding - findOrphanedTranscripts(): Scan for stale transcript files - ingestTranscriptToDatabase(): Main ingestion function for Node.js 3. **Orphan recovery enhancement** (src/tools/sessions.ts): - sessionRecoverOrphaned() now tries transcript ingestion first - Finds most recently modified JSONL in project directory - Falls back to legacy notes.md recovery for backward compatibility - Properly handles path encoding (/ and . → -) Benefits: - No daemon needed for recovery (Phase 2 will remove LaunchAgent) - Full transcript audit trail stored in database - Immediate recovery capability for orphaned sessions - Cleaner architecture (no markdown parsing complexity) - Compatible with Claude Code's UUID-based session files Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
24
migrations/027_session_transcript_storage.sql
Normal file
24
migrations/027_session_transcript_storage.sql
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
-- Migration 027: Add transcript storage for session recovery
|
||||||
|
-- Purpose: Store full JSONL transcripts from Claude Code for session recovery (CF-580)
|
||||||
|
-- Context: Replace daemon-based notes syncing with direct transcript ingestion
|
||||||
|
|
||||||
|
-- Add columns to sessions table for transcript storage
|
||||||
|
ALTER TABLE sessions
|
||||||
|
ADD COLUMN IF NOT EXISTS transcript_jsonl TEXT,
|
||||||
|
ADD COLUMN IF NOT EXISTS transcript_ingested_at TIMESTAMPTZ,
|
||||||
|
ADD COLUMN IF NOT EXISTS transcript_file_path TEXT;
|
||||||
|
|
||||||
|
-- Index for efficient querying of ingested sessions
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_sessions_transcript_ingested
|
||||||
|
ON sessions(transcript_ingested_at)
|
||||||
|
WHERE transcript_ingested_at IS NOT NULL;
|
||||||
|
|
||||||
|
-- Index for finding sessions by transcript file path (for recovery)
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_sessions_transcript_file_path
|
||||||
|
ON sessions(transcript_file_path)
|
||||||
|
WHERE transcript_file_path IS NOT NULL;
|
||||||
|
|
||||||
|
-- Comments for documentation
|
||||||
|
COMMENT ON COLUMN sessions.transcript_jsonl IS 'Full JSONL transcript from Claude Code for complete session audit trail';
|
||||||
|
COMMENT ON COLUMN sessions.transcript_ingested_at IS 'Timestamp when transcript was ingested into database';
|
||||||
|
COMMENT ON COLUMN sessions.transcript_file_path IS 'Path to source JSONL file for debugging and recovery';
|
||||||
@@ -488,8 +488,55 @@ export async function sessionRecoverOrphaned(args: { project?: string }): Promis
|
|||||||
`✓ Session ${session.project} #${session.session_number} marked as abandoned`
|
`✓ Session ${session.project} #${session.session_number} marked as abandoned`
|
||||||
);
|
);
|
||||||
|
|
||||||
// Attempt to recover notes from temp file
|
// Attempt to recover transcript first (CF-580)
|
||||||
|
let transcriptRecovered = false;
|
||||||
if (session.working_directory) {
|
if (session.working_directory) {
|
||||||
|
// Construct projects path: ~/.claude/projects/{encoded-dir}/
|
||||||
|
// Encoding: / and . → - (Claude Code removes dots from usernames)
|
||||||
|
const home = process.env.HOME || '';
|
||||||
|
const encodedDir = session.working_directory.replace(/[/\.]/g, '-');
|
||||||
|
const projectsDir = `${home}/.claude/projects/${encodedDir}`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const fs = await import('fs');
|
||||||
|
const path = await import('path');
|
||||||
|
|
||||||
|
if (fs.default.existsSync(projectsDir)) {
|
||||||
|
// Find the most recently modified JSONL file
|
||||||
|
const files = fs.default.readdirSync(projectsDir);
|
||||||
|
const jsonlFiles = files
|
||||||
|
.filter(f => f.endsWith('.jsonl'))
|
||||||
|
.map(f => ({
|
||||||
|
name: f,
|
||||||
|
path: `${projectsDir}/${f}`,
|
||||||
|
mtime: fs.default.statSync(`${projectsDir}/${f}`).mtimeMs,
|
||||||
|
}))
|
||||||
|
.sort((a, b) => b.mtime - a.mtime);
|
||||||
|
|
||||||
|
if (jsonlFiles.length > 0) {
|
||||||
|
const latestFile = jsonlFiles[0];
|
||||||
|
const transcriptContent = fs.default.readFileSync(latestFile.path, 'utf-8');
|
||||||
|
const lineCount = transcriptContent.split('\n').filter(l => l.trim()).length;
|
||||||
|
|
||||||
|
// Update session with transcript
|
||||||
|
await execute(
|
||||||
|
`UPDATE sessions
|
||||||
|
SET transcript_jsonl = $1, transcript_ingested_at = NOW(), transcript_file_path = $2, updated_at = NOW()
|
||||||
|
WHERE id = $3`,
|
||||||
|
[transcriptContent, latestFile.path, session.id]
|
||||||
|
);
|
||||||
|
|
||||||
|
results.push(` → Recovered transcript (${lineCount} lines)`);
|
||||||
|
transcriptRecovered = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
// Silently skip transcript recovery errors
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: Attempt to recover notes from temp file if transcript not recovered
|
||||||
|
if (!transcriptRecovered && session.working_directory) {
|
||||||
const tempFilePath = `${session.working_directory}/.claude-session/${session.id}/notes.md`;
|
const tempFilePath = `${session.working_directory}/.claude-session/${session.id}/notes.md`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -497,7 +544,7 @@ export async function sessionRecoverOrphaned(args: { project?: string }): Promis
|
|||||||
const recovered = await recoverNotesFromTempFile(session.id, tempFilePath, 'recovered');
|
const recovered = await recoverNotesFromTempFile(session.id, tempFilePath, 'recovered');
|
||||||
|
|
||||||
if (recovered > 0) {
|
if (recovered > 0) {
|
||||||
results.push(` → Recovered ${recovered} note(s) from temp file`);
|
results.push(` → Recovered ${recovered} note(s) from temp file (legacy)`);
|
||||||
totalNotesRecovered += recovered;
|
totalNotesRecovered += recovered;
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
|||||||
217
src/utils/transcript-parser.ts
Normal file
217
src/utils/transcript-parser.ts
Normal file
@@ -0,0 +1,217 @@
|
|||||||
|
/**
|
||||||
|
* Transcript Parser for CF-580
|
||||||
|
*
|
||||||
|
* Parses JSONL transcripts from Claude Code and ingests them into the database.
|
||||||
|
* Replaces daemon-based note syncing with direct transcript ingestion.
|
||||||
|
*
|
||||||
|
* Key functions:
|
||||||
|
* - parseTranscriptFile() - Parse JSONL, extract metadata and messages
|
||||||
|
* - ingestTranscriptToDatabase() - Main ingestion function
|
||||||
|
* - getTranscriptPath() - Construct path from session ID + working dir
|
||||||
|
*/
|
||||||
|
|
||||||
|
import * as fs from 'fs';
|
||||||
|
import * as readline from 'readline';
|
||||||
|
import { execSync } from 'child_process';
|
||||||
|
|
||||||
|
interface ParsedTranscript {
|
||||||
|
messageCount: number;
|
||||||
|
initialPrompt: string;
|
||||||
|
summary: string;
|
||||||
|
fullJsonl: string;
|
||||||
|
toolsUsed: Set<string>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse JSONL transcript file line by line
|
||||||
|
* Handles corrupt lines gracefully
|
||||||
|
*/
|
||||||
|
export async function parseTranscriptFile(filePath: string): Promise<ParsedTranscript> {
|
||||||
|
const result: ParsedTranscript = {
|
||||||
|
messageCount: 0,
|
||||||
|
initialPrompt: '',
|
||||||
|
summary: '',
|
||||||
|
fullJsonl: '',
|
||||||
|
toolsUsed: new Set(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!fs.existsSync(filePath)) {
|
||||||
|
throw new Error(`Transcript file not found: ${filePath}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const fileContent = fs.readFileSync(filePath, 'utf-8');
|
||||||
|
result.fullJsonl = fileContent;
|
||||||
|
|
||||||
|
const lines = fileContent.split('\n').filter(line => line.trim());
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
try {
|
||||||
|
const entry = JSON.parse(line);
|
||||||
|
|
||||||
|
// Count messages
|
||||||
|
if (entry.type === 'user' || entry.type === 'assistant') {
|
||||||
|
result.messageCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract initial prompt from first user message
|
||||||
|
if (entry.type === 'user' && result.initialPrompt === '') {
|
||||||
|
result.initialPrompt = entry.content || '';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract summary if provided
|
||||||
|
if (entry.type === 'summary' && entry.summary) {
|
||||||
|
result.summary = entry.summary;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track tools used
|
||||||
|
if (entry.toolUse) {
|
||||||
|
result.toolsUsed.add(entry.toolUse);
|
||||||
|
}
|
||||||
|
if (entry.toolName) {
|
||||||
|
result.toolsUsed.add(entry.toolName);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
// Skip malformed JSON lines gracefully
|
||||||
|
console.warn(`Skipping malformed JSONL line at offset ${lines.indexOf(line)}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert working directory path to encoded form
|
||||||
|
* Example: /Users/christian.gick/Development/Infrastructure/ClaudeFramework
|
||||||
|
* → -Users-christian-gick-Development-Infrastructure-ClaudeFramework
|
||||||
|
* Note: Both / and . are replaced with - (Claude Code removes dots from usernames)
|
||||||
|
*/
|
||||||
|
export function encodeWorkingDir(workingDir: string): string {
|
||||||
|
return workingDir.replace(/[/\.]/g, '-');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get transcript file path from session ID and working directory
|
||||||
|
* Path: ~/.claude/projects/{encoded-dir}/{session-id}.jsonl
|
||||||
|
*/
|
||||||
|
export function getTranscriptPath(sessionId: string, workingDir: string): string {
|
||||||
|
const home = process.env.HOME || '';
|
||||||
|
const encodedDir = encodeWorkingDir(workingDir);
|
||||||
|
return `${home}/.claude/projects/${encodedDir}/${sessionId}.jsonl`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ingest transcript to database
|
||||||
|
* Called from session-end script
|
||||||
|
*/
|
||||||
|
export async function ingestTranscriptToDatabase(
|
||||||
|
sessionId: string,
|
||||||
|
transcriptPath: string,
|
||||||
|
pgPassword: string,
|
||||||
|
pgHost: string = 'infra',
|
||||||
|
pgUser: string = 'agiliton',
|
||||||
|
pgDb: string = 'agiliton'
|
||||||
|
): Promise<{ success: boolean; messageCount: number; error?: string }> {
|
||||||
|
try {
|
||||||
|
// Validate file exists
|
||||||
|
if (!fs.existsSync(transcriptPath)) {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
messageCount: 0,
|
||||||
|
error: `Transcript file not found: ${transcriptPath}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse transcript
|
||||||
|
const parsed = await parseTranscriptFile(transcriptPath);
|
||||||
|
|
||||||
|
// Escape single quotes for SQL
|
||||||
|
const jsonlEscaped = parsed.fullJsonl.replace(/'/g, "''");
|
||||||
|
const pathEscaped = transcriptPath.replace(/'/g, "''");
|
||||||
|
|
||||||
|
// Build SQL query
|
||||||
|
const toolsArray = Array.from(parsed.toolsUsed);
|
||||||
|
const toolsSql = toolsArray.length > 0 ? `ARRAY[${toolsArray.map(t => `'${t.replace(/'/g, "''")}'`).join(',')}]` : 'NULL';
|
||||||
|
|
||||||
|
const updateSql = `
|
||||||
|
UPDATE sessions SET
|
||||||
|
transcript_jsonl = '${jsonlEscaped}',
|
||||||
|
transcript_ingested_at = NOW(),
|
||||||
|
transcript_file_path = '${pathEscaped}',
|
||||||
|
message_count = ${parsed.messageCount},
|
||||||
|
tools_used = ${toolsSql},
|
||||||
|
updated_at = NOW()
|
||||||
|
WHERE id = '${sessionId.replace(/'/g, "''")}'
|
||||||
|
RETURNING TRUE;
|
||||||
|
`;
|
||||||
|
|
||||||
|
// Execute update
|
||||||
|
const result = execSync(`PGPASSWORD="${pgPassword}" psql -h ${pgHost} -U ${pgUser} -d ${pgDb} -t -c "${updateSql.replace(/"/g, '\\"')}"`, {
|
||||||
|
encoding: 'utf-8',
|
||||||
|
stdio: ['pipe', 'pipe', 'pipe'],
|
||||||
|
}).trim();
|
||||||
|
|
||||||
|
if (result === 't' || result === 'true') {
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
messageCount: parsed.messageCount,
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
messageCount: 0,
|
||||||
|
error: 'Failed to update session in database',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
const errorMsg = error instanceof Error ? error.message : String(error);
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
messageCount: 0,
|
||||||
|
error: `Ingestion failed: ${errorMsg}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find orphaned transcript files that haven't been ingested
|
||||||
|
* Scans ~/.claude/projects/ for .jsonl files with stale modification times
|
||||||
|
*/
|
||||||
|
export function findOrphanedTranscripts(maxAgeHours: number = 2): string[] {
|
||||||
|
const home = process.env.HOME || '';
|
||||||
|
const projectsDir = `${home}/.claude/projects`;
|
||||||
|
|
||||||
|
if (!fs.existsSync(projectsDir)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const orphans: string[] = [];
|
||||||
|
const maxAgeMs = maxAgeHours * 60 * 60 * 1000;
|
||||||
|
const now = Date.now();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const entries = fs.readdirSync(projectsDir, { withFileTypes: true });
|
||||||
|
|
||||||
|
for (const entry of entries) {
|
||||||
|
if (entry.isDirectory()) {
|
||||||
|
const dir = `${projectsDir}/${entry.name}`;
|
||||||
|
const files = fs.readdirSync(dir);
|
||||||
|
|
||||||
|
for (const file of files) {
|
||||||
|
if (file.endsWith('.jsonl')) {
|
||||||
|
const filePath = `${dir}/${file}`;
|
||||||
|
const stats = fs.statSync(filePath);
|
||||||
|
const age = now - stats.mtimeMs;
|
||||||
|
|
||||||
|
if (age > maxAgeMs) {
|
||||||
|
orphans.push(filePath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Error scanning for orphaned transcripts: ${error}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return orphans;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user