feat(CF-2394): Add session_transcript_search MCP tool
Hybrid (vector + keyword + rerank) search over indexed session transcripts. Enables context recovery from past sessions without re-reading JSONL files. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
12
src/index.ts
12
src/index.ts
@@ -81,6 +81,7 @@ import {
|
||||
sessionPatternDetection,
|
||||
} from './tools/session-docs.js';
|
||||
import { archiveAdd, archiveSearch, archiveList, archiveGet } from './tools/archives.js';
|
||||
import { transcriptSearch } from './tools/transcripts.js';
|
||||
import { projectArchive } from './tools/project-archive.js';
|
||||
|
||||
// Create MCP server
|
||||
@@ -454,6 +455,17 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
||||
);
|
||||
break;
|
||||
|
||||
// Transcripts (CF-2394)
|
||||
case 'session_transcript_search':
|
||||
result = await transcriptSearch({
|
||||
query: a.query,
|
||||
project: a.project,
|
||||
session_issue_key: a.session_issue_key,
|
||||
limit: a.limit,
|
||||
search_mode: a.search_mode,
|
||||
});
|
||||
break;
|
||||
|
||||
// Archives
|
||||
case 'archive_add':
|
||||
result = await archiveAdd({
|
||||
|
||||
@@ -653,6 +653,23 @@ export const toolDefinitions = [
|
||||
},
|
||||
},
|
||||
|
||||
// Transcript Tools (CF-2394)
|
||||
{
|
||||
name: 'session_transcript_search',
|
||||
description: 'Search session transcripts (JSONL) using hybrid (vector + keyword) search. Finds past sessions by content — commands run, decisions made, plans discussed. Use when recovering context from prior sessions.',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
query: { type: 'string', description: 'Search query (e.g., "hetzner disk resize", "auth migration plan")' },
|
||||
project: { type: 'string', description: 'Filter by project key (optional)' },
|
||||
session_issue_key: { type: 'string', description: 'Filter by session Jira issue key (optional)' },
|
||||
limit: { type: 'number', description: 'Max results (default: 10)' },
|
||||
search_mode: { type: 'string', enum: ['hybrid', 'vector', 'keyword'], description: 'Search mode (default: hybrid)' },
|
||||
},
|
||||
required: ['query'],
|
||||
},
|
||||
},
|
||||
|
||||
// Archive Tools
|
||||
{
|
||||
name: 'archive_add',
|
||||
|
||||
161
src/tools/transcripts.ts
Normal file
161
src/tools/transcripts.ts
Normal file
@@ -0,0 +1,161 @@
|
||||
// Session transcript search (CF-2394)
|
||||
|
||||
import { query } from '../db.js';
|
||||
import { getEmbedding, formatEmbedding, rrfMerge, rerank } from '../embeddings.js';
|
||||
|
||||
interface TranscriptSearchArgs {
|
||||
query: string;
|
||||
project?: string;
|
||||
session_issue_key?: string;
|
||||
limit?: number;
|
||||
search_mode?: 'hybrid' | 'vector' | 'keyword';
|
||||
}
|
||||
|
||||
interface TranscriptRow {
|
||||
id: number;
|
||||
session_uuid: string;
|
||||
session_issue_key: string | null;
|
||||
project_key: string;
|
||||
git_branch: string | null;
|
||||
message_count: number;
|
||||
tool_names: string[] | null;
|
||||
started_at: string | null;
|
||||
similarity?: number;
|
||||
rank?: number;
|
||||
snippet?: string;
|
||||
}
|
||||
|
||||
export async function transcriptSearch(args: TranscriptSearchArgs): Promise<string> {
|
||||
const { query: searchQuery, project, session_issue_key, limit = 10, search_mode = 'hybrid' } = args;
|
||||
|
||||
const buildFilter = (startIdx: number) => {
|
||||
let where = '';
|
||||
const params: unknown[] = [];
|
||||
let idx = startIdx;
|
||||
if (project) {
|
||||
where += ` AND project_key = $${idx++}`;
|
||||
params.push(project);
|
||||
}
|
||||
if (session_issue_key) {
|
||||
where += ` AND session_issue_key = $${idx++}`;
|
||||
params.push(session_issue_key);
|
||||
}
|
||||
return { where, params, nextIdx: idx };
|
||||
};
|
||||
|
||||
// Vector search
|
||||
let vectorIds: number[] = [];
|
||||
let vectorRows: Map<number, TranscriptRow> = new Map();
|
||||
let embeddingFailed = false;
|
||||
|
||||
if (search_mode !== 'keyword') {
|
||||
const embedding = await getEmbedding(searchQuery);
|
||||
if (embedding) {
|
||||
const embeddingStr = formatEmbedding(embedding);
|
||||
const filter = buildFilter(3);
|
||||
const params: unknown[] = [embeddingStr, limit, ...filter.params];
|
||||
|
||||
const rows = await query<TranscriptRow>(
|
||||
`SELECT id, session_uuid, session_issue_key, project_key, git_branch,
|
||||
message_count, tool_names,
|
||||
to_char(started_at, 'YYYY-MM-DD HH24:MI') as started_at,
|
||||
1 - (embedding <=> $1) as similarity
|
||||
FROM session_transcripts
|
||||
WHERE embedding IS NOT NULL${filter.where}
|
||||
ORDER BY embedding <=> $1
|
||||
LIMIT $2`,
|
||||
params
|
||||
);
|
||||
vectorIds = rows.map(r => r.id);
|
||||
for (const r of rows) vectorRows.set(r.id, r);
|
||||
} else {
|
||||
embeddingFailed = true;
|
||||
if (search_mode === 'vector') {
|
||||
return 'Error: Could not generate embedding for vector search';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Keyword search
|
||||
let keywordIds: number[] = [];
|
||||
let keywordRows: Map<number, TranscriptRow> = new Map();
|
||||
|
||||
if (search_mode !== 'vector') {
|
||||
const filter = buildFilter(3);
|
||||
const params: unknown[] = [searchQuery, limit, ...filter.params];
|
||||
|
||||
const rows = await query<TranscriptRow>(
|
||||
`SELECT id, session_uuid, session_issue_key, project_key, git_branch,
|
||||
message_count, tool_names,
|
||||
to_char(started_at, 'YYYY-MM-DD HH24:MI') as started_at,
|
||||
ts_rank(tsv, plainto_tsquery('english', $1)) as rank,
|
||||
ts_headline('english', searchable_content,
|
||||
plainto_tsquery('english', $1),
|
||||
'StartSel=**,StopSel=**,MaxWords=25,MinWords=8') as snippet
|
||||
FROM session_transcripts
|
||||
WHERE tsv @@ plainto_tsquery('english', $1)${filter.where}
|
||||
ORDER BY rank DESC
|
||||
LIMIT $2`,
|
||||
params
|
||||
);
|
||||
keywordIds = rows.map(r => r.id);
|
||||
for (const r of rows) keywordRows.set(r.id, r);
|
||||
}
|
||||
|
||||
// Merge results
|
||||
let finalIds: number[];
|
||||
let searchLabel: string;
|
||||
|
||||
if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) {
|
||||
const merged = rrfMerge(vectorIds, keywordIds);
|
||||
finalIds = merged.map(m => m.id as number);
|
||||
searchLabel = 'hybrid';
|
||||
|
||||
// Re-rank using snippets
|
||||
const docs = finalIds.map(id => {
|
||||
const r = keywordRows.get(id) || vectorRows.get(id);
|
||||
return r?.snippet || r?.session_issue_key || '';
|
||||
});
|
||||
const reranked = await rerank(searchQuery, docs, limit);
|
||||
if (reranked) {
|
||||
finalIds = reranked.map(r => finalIds[r.index]);
|
||||
searchLabel = 'hybrid+rerank';
|
||||
} else {
|
||||
finalIds = finalIds.slice(0, limit);
|
||||
}
|
||||
} else if (vectorIds.length > 0) {
|
||||
finalIds = vectorIds;
|
||||
searchLabel = 'vector';
|
||||
} else if (keywordIds.length > 0) {
|
||||
finalIds = keywordIds;
|
||||
searchLabel = embeddingFailed ? 'keyword (embedding unavailable)' : 'keyword';
|
||||
} else {
|
||||
return 'No matching transcripts found';
|
||||
}
|
||||
|
||||
// Format output
|
||||
const lines = [`Session transcripts (${searchLabel}, ${finalIds.length} results):\n`];
|
||||
for (const id of finalIds) {
|
||||
const r = vectorRows.get(id) || keywordRows.get(id);
|
||||
if (!r) continue;
|
||||
|
||||
const scoreParts: string[] = [];
|
||||
if (vectorRows.has(id)) scoreParts.push(`${Math.round(vectorRows.get(id)!.similarity! * 100)}% semantic`);
|
||||
if (keywordRows.has(id)) scoreParts.push(`rank: ${keywordRows.get(id)!.rank!.toFixed(3)}`);
|
||||
const scores = scoreParts.length > 0 ? ` (${scoreParts.join(', ')})` : '';
|
||||
|
||||
const issueLink = r.session_issue_key
|
||||
? `[${r.session_issue_key}](https://agiliton.atlassian.net/browse/${r.session_issue_key})`
|
||||
: 'unlinked';
|
||||
const tools = r.tool_names?.slice(0, 5).join(', ') || 'none';
|
||||
|
||||
lines.push(`**#${r.id}** ${issueLink} — ${r.project_key} (${r.git_branch || 'no-branch'})${scores}`);
|
||||
lines.push(` ${r.started_at || 'unknown date'} | ${r.message_count} msgs | Tools: ${tools}`);
|
||||
if (r.snippet) {
|
||||
lines.push(` > ${r.snippet.replace(/\n/g, ' ').substring(0, 150)}`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
Reference in New Issue
Block a user