feat(CF-2394): Add session_transcript_search MCP tool

Hybrid (vector + keyword + rerank) search over indexed session transcripts.
Enables context recovery from past sessions without re-reading JSONL files.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-03-18 08:37:19 +02:00
parent 3613e2aa52
commit 0fad29801e
3 changed files with 190 additions and 0 deletions

View File

@@ -81,6 +81,7 @@ import {
sessionPatternDetection,
} from './tools/session-docs.js';
import { archiveAdd, archiveSearch, archiveList, archiveGet } from './tools/archives.js';
import { transcriptSearch } from './tools/transcripts.js';
import { projectArchive } from './tools/project-archive.js';
// Create MCP server
@@ -454,6 +455,17 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
);
break;
// Transcripts (CF-2394)
case 'session_transcript_search':
result = await transcriptSearch({
query: a.query,
project: a.project,
session_issue_key: a.session_issue_key,
limit: a.limit,
search_mode: a.search_mode,
});
break;
// Archives
case 'archive_add':
result = await archiveAdd({

View File

@@ -653,6 +653,23 @@ export const toolDefinitions = [
},
},
// Transcript Tools (CF-2394)
{
name: 'session_transcript_search',
description: 'Search session transcripts (JSONL) using hybrid (vector + keyword) search. Finds past sessions by content — commands run, decisions made, plans discussed. Use when recovering context from prior sessions.',
inputSchema: {
type: 'object',
properties: {
query: { type: 'string', description: 'Search query (e.g., "hetzner disk resize", "auth migration plan")' },
project: { type: 'string', description: 'Filter by project key (optional)' },
session_issue_key: { type: 'string', description: 'Filter by session Jira issue key (optional)' },
limit: { type: 'number', description: 'Max results (default: 10)' },
search_mode: { type: 'string', enum: ['hybrid', 'vector', 'keyword'], description: 'Search mode (default: hybrid)' },
},
required: ['query'],
},
},
// Archive Tools
{
name: 'archive_add',

161
src/tools/transcripts.ts Normal file
View File

@@ -0,0 +1,161 @@
// Session transcript search (CF-2394)
import { query } from '../db.js';
import { getEmbedding, formatEmbedding, rrfMerge, rerank } from '../embeddings.js';
interface TranscriptSearchArgs {
query: string;
project?: string;
session_issue_key?: string;
limit?: number;
search_mode?: 'hybrid' | 'vector' | 'keyword';
}
interface TranscriptRow {
id: number;
session_uuid: string;
session_issue_key: string | null;
project_key: string;
git_branch: string | null;
message_count: number;
tool_names: string[] | null;
started_at: string | null;
similarity?: number;
rank?: number;
snippet?: string;
}
export async function transcriptSearch(args: TranscriptSearchArgs): Promise<string> {
const { query: searchQuery, project, session_issue_key, limit = 10, search_mode = 'hybrid' } = args;
const buildFilter = (startIdx: number) => {
let where = '';
const params: unknown[] = [];
let idx = startIdx;
if (project) {
where += ` AND project_key = $${idx++}`;
params.push(project);
}
if (session_issue_key) {
where += ` AND session_issue_key = $${idx++}`;
params.push(session_issue_key);
}
return { where, params, nextIdx: idx };
};
// Vector search
let vectorIds: number[] = [];
let vectorRows: Map<number, TranscriptRow> = new Map();
let embeddingFailed = false;
if (search_mode !== 'keyword') {
const embedding = await getEmbedding(searchQuery);
if (embedding) {
const embeddingStr = formatEmbedding(embedding);
const filter = buildFilter(3);
const params: unknown[] = [embeddingStr, limit, ...filter.params];
const rows = await query<TranscriptRow>(
`SELECT id, session_uuid, session_issue_key, project_key, git_branch,
message_count, tool_names,
to_char(started_at, 'YYYY-MM-DD HH24:MI') as started_at,
1 - (embedding <=> $1) as similarity
FROM session_transcripts
WHERE embedding IS NOT NULL${filter.where}
ORDER BY embedding <=> $1
LIMIT $2`,
params
);
vectorIds = rows.map(r => r.id);
for (const r of rows) vectorRows.set(r.id, r);
} else {
embeddingFailed = true;
if (search_mode === 'vector') {
return 'Error: Could not generate embedding for vector search';
}
}
}
// Keyword search
let keywordIds: number[] = [];
let keywordRows: Map<number, TranscriptRow> = new Map();
if (search_mode !== 'vector') {
const filter = buildFilter(3);
const params: unknown[] = [searchQuery, limit, ...filter.params];
const rows = await query<TranscriptRow>(
`SELECT id, session_uuid, session_issue_key, project_key, git_branch,
message_count, tool_names,
to_char(started_at, 'YYYY-MM-DD HH24:MI') as started_at,
ts_rank(tsv, plainto_tsquery('english', $1)) as rank,
ts_headline('english', searchable_content,
plainto_tsquery('english', $1),
'StartSel=**,StopSel=**,MaxWords=25,MinWords=8') as snippet
FROM session_transcripts
WHERE tsv @@ plainto_tsquery('english', $1)${filter.where}
ORDER BY rank DESC
LIMIT $2`,
params
);
keywordIds = rows.map(r => r.id);
for (const r of rows) keywordRows.set(r.id, r);
}
// Merge results
let finalIds: number[];
let searchLabel: string;
if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) {
const merged = rrfMerge(vectorIds, keywordIds);
finalIds = merged.map(m => m.id as number);
searchLabel = 'hybrid';
// Re-rank using snippets
const docs = finalIds.map(id => {
const r = keywordRows.get(id) || vectorRows.get(id);
return r?.snippet || r?.session_issue_key || '';
});
const reranked = await rerank(searchQuery, docs, limit);
if (reranked) {
finalIds = reranked.map(r => finalIds[r.index]);
searchLabel = 'hybrid+rerank';
} else {
finalIds = finalIds.slice(0, limit);
}
} else if (vectorIds.length > 0) {
finalIds = vectorIds;
searchLabel = 'vector';
} else if (keywordIds.length > 0) {
finalIds = keywordIds;
searchLabel = embeddingFailed ? 'keyword (embedding unavailable)' : 'keyword';
} else {
return 'No matching transcripts found';
}
// Format output
const lines = [`Session transcripts (${searchLabel}, ${finalIds.length} results):\n`];
for (const id of finalIds) {
const r = vectorRows.get(id) || keywordRows.get(id);
if (!r) continue;
const scoreParts: string[] = [];
if (vectorRows.has(id)) scoreParts.push(`${Math.round(vectorRows.get(id)!.similarity! * 100)}% semantic`);
if (keywordRows.has(id)) scoreParts.push(`rank: ${keywordRows.get(id)!.rank!.toFixed(3)}`);
const scores = scoreParts.length > 0 ? ` (${scoreParts.join(', ')})` : '';
const issueLink = r.session_issue_key
? `[${r.session_issue_key}](https://agiliton.atlassian.net/browse/${r.session_issue_key})`
: 'unlinked';
const tools = r.tool_names?.slice(0, 5).join(', ') || 'none';
lines.push(`**#${r.id}** ${issueLink}${r.project_key} (${r.git_branch || 'no-branch'})${scores}`);
lines.push(` ${r.started_at || 'unknown date'} | ${r.message_count} msgs | Tools: ${tools}`);
if (r.snippet) {
lines.push(` > ${r.snippet.replace(/\n/g, ' ').substring(0, 150)}`);
}
lines.push('');
}
return lines.join('\n');
}