feat(CF-2394): Add session_transcript_search MCP tool
Hybrid (vector + keyword + rerank) search over indexed session transcripts. Enables context recovery from past sessions without re-reading JSONL files. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
12
src/index.ts
12
src/index.ts
@@ -81,6 +81,7 @@ import {
|
|||||||
sessionPatternDetection,
|
sessionPatternDetection,
|
||||||
} from './tools/session-docs.js';
|
} from './tools/session-docs.js';
|
||||||
import { archiveAdd, archiveSearch, archiveList, archiveGet } from './tools/archives.js';
|
import { archiveAdd, archiveSearch, archiveList, archiveGet } from './tools/archives.js';
|
||||||
|
import { transcriptSearch } from './tools/transcripts.js';
|
||||||
import { projectArchive } from './tools/project-archive.js';
|
import { projectArchive } from './tools/project-archive.js';
|
||||||
|
|
||||||
// Create MCP server
|
// Create MCP server
|
||||||
@@ -454,6 +455,17 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|||||||
);
|
);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
// Transcripts (CF-2394)
|
||||||
|
case 'session_transcript_search':
|
||||||
|
result = await transcriptSearch({
|
||||||
|
query: a.query,
|
||||||
|
project: a.project,
|
||||||
|
session_issue_key: a.session_issue_key,
|
||||||
|
limit: a.limit,
|
||||||
|
search_mode: a.search_mode,
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
|
||||||
// Archives
|
// Archives
|
||||||
case 'archive_add':
|
case 'archive_add':
|
||||||
result = await archiveAdd({
|
result = await archiveAdd({
|
||||||
|
|||||||
@@ -653,6 +653,23 @@ export const toolDefinitions = [
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// Transcript Tools (CF-2394)
|
||||||
|
{
|
||||||
|
name: 'session_transcript_search',
|
||||||
|
description: 'Search session transcripts (JSONL) using hybrid (vector + keyword) search. Finds past sessions by content — commands run, decisions made, plans discussed. Use when recovering context from prior sessions.',
|
||||||
|
inputSchema: {
|
||||||
|
type: 'object',
|
||||||
|
properties: {
|
||||||
|
query: { type: 'string', description: 'Search query (e.g., "hetzner disk resize", "auth migration plan")' },
|
||||||
|
project: { type: 'string', description: 'Filter by project key (optional)' },
|
||||||
|
session_issue_key: { type: 'string', description: 'Filter by session Jira issue key (optional)' },
|
||||||
|
limit: { type: 'number', description: 'Max results (default: 10)' },
|
||||||
|
search_mode: { type: 'string', enum: ['hybrid', 'vector', 'keyword'], description: 'Search mode (default: hybrid)' },
|
||||||
|
},
|
||||||
|
required: ['query'],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
// Archive Tools
|
// Archive Tools
|
||||||
{
|
{
|
||||||
name: 'archive_add',
|
name: 'archive_add',
|
||||||
|
|||||||
161
src/tools/transcripts.ts
Normal file
161
src/tools/transcripts.ts
Normal file
@@ -0,0 +1,161 @@
|
|||||||
|
// Session transcript search (CF-2394)
|
||||||
|
|
||||||
|
import { query } from '../db.js';
|
||||||
|
import { getEmbedding, formatEmbedding, rrfMerge, rerank } from '../embeddings.js';
|
||||||
|
|
||||||
|
interface TranscriptSearchArgs {
|
||||||
|
query: string;
|
||||||
|
project?: string;
|
||||||
|
session_issue_key?: string;
|
||||||
|
limit?: number;
|
||||||
|
search_mode?: 'hybrid' | 'vector' | 'keyword';
|
||||||
|
}
|
||||||
|
|
||||||
|
interface TranscriptRow {
|
||||||
|
id: number;
|
||||||
|
session_uuid: string;
|
||||||
|
session_issue_key: string | null;
|
||||||
|
project_key: string;
|
||||||
|
git_branch: string | null;
|
||||||
|
message_count: number;
|
||||||
|
tool_names: string[] | null;
|
||||||
|
started_at: string | null;
|
||||||
|
similarity?: number;
|
||||||
|
rank?: number;
|
||||||
|
snippet?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function transcriptSearch(args: TranscriptSearchArgs): Promise<string> {
|
||||||
|
const { query: searchQuery, project, session_issue_key, limit = 10, search_mode = 'hybrid' } = args;
|
||||||
|
|
||||||
|
const buildFilter = (startIdx: number) => {
|
||||||
|
let where = '';
|
||||||
|
const params: unknown[] = [];
|
||||||
|
let idx = startIdx;
|
||||||
|
if (project) {
|
||||||
|
where += ` AND project_key = $${idx++}`;
|
||||||
|
params.push(project);
|
||||||
|
}
|
||||||
|
if (session_issue_key) {
|
||||||
|
where += ` AND session_issue_key = $${idx++}`;
|
||||||
|
params.push(session_issue_key);
|
||||||
|
}
|
||||||
|
return { where, params, nextIdx: idx };
|
||||||
|
};
|
||||||
|
|
||||||
|
// Vector search
|
||||||
|
let vectorIds: number[] = [];
|
||||||
|
let vectorRows: Map<number, TranscriptRow> = new Map();
|
||||||
|
let embeddingFailed = false;
|
||||||
|
|
||||||
|
if (search_mode !== 'keyword') {
|
||||||
|
const embedding = await getEmbedding(searchQuery);
|
||||||
|
if (embedding) {
|
||||||
|
const embeddingStr = formatEmbedding(embedding);
|
||||||
|
const filter = buildFilter(3);
|
||||||
|
const params: unknown[] = [embeddingStr, limit, ...filter.params];
|
||||||
|
|
||||||
|
const rows = await query<TranscriptRow>(
|
||||||
|
`SELECT id, session_uuid, session_issue_key, project_key, git_branch,
|
||||||
|
message_count, tool_names,
|
||||||
|
to_char(started_at, 'YYYY-MM-DD HH24:MI') as started_at,
|
||||||
|
1 - (embedding <=> $1) as similarity
|
||||||
|
FROM session_transcripts
|
||||||
|
WHERE embedding IS NOT NULL${filter.where}
|
||||||
|
ORDER BY embedding <=> $1
|
||||||
|
LIMIT $2`,
|
||||||
|
params
|
||||||
|
);
|
||||||
|
vectorIds = rows.map(r => r.id);
|
||||||
|
for (const r of rows) vectorRows.set(r.id, r);
|
||||||
|
} else {
|
||||||
|
embeddingFailed = true;
|
||||||
|
if (search_mode === 'vector') {
|
||||||
|
return 'Error: Could not generate embedding for vector search';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keyword search
|
||||||
|
let keywordIds: number[] = [];
|
||||||
|
let keywordRows: Map<number, TranscriptRow> = new Map();
|
||||||
|
|
||||||
|
if (search_mode !== 'vector') {
|
||||||
|
const filter = buildFilter(3);
|
||||||
|
const params: unknown[] = [searchQuery, limit, ...filter.params];
|
||||||
|
|
||||||
|
const rows = await query<TranscriptRow>(
|
||||||
|
`SELECT id, session_uuid, session_issue_key, project_key, git_branch,
|
||||||
|
message_count, tool_names,
|
||||||
|
to_char(started_at, 'YYYY-MM-DD HH24:MI') as started_at,
|
||||||
|
ts_rank(tsv, plainto_tsquery('english', $1)) as rank,
|
||||||
|
ts_headline('english', searchable_content,
|
||||||
|
plainto_tsquery('english', $1),
|
||||||
|
'StartSel=**,StopSel=**,MaxWords=25,MinWords=8') as snippet
|
||||||
|
FROM session_transcripts
|
||||||
|
WHERE tsv @@ plainto_tsquery('english', $1)${filter.where}
|
||||||
|
ORDER BY rank DESC
|
||||||
|
LIMIT $2`,
|
||||||
|
params
|
||||||
|
);
|
||||||
|
keywordIds = rows.map(r => r.id);
|
||||||
|
for (const r of rows) keywordRows.set(r.id, r);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Merge results
|
||||||
|
let finalIds: number[];
|
||||||
|
let searchLabel: string;
|
||||||
|
|
||||||
|
if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) {
|
||||||
|
const merged = rrfMerge(vectorIds, keywordIds);
|
||||||
|
finalIds = merged.map(m => m.id as number);
|
||||||
|
searchLabel = 'hybrid';
|
||||||
|
|
||||||
|
// Re-rank using snippets
|
||||||
|
const docs = finalIds.map(id => {
|
||||||
|
const r = keywordRows.get(id) || vectorRows.get(id);
|
||||||
|
return r?.snippet || r?.session_issue_key || '';
|
||||||
|
});
|
||||||
|
const reranked = await rerank(searchQuery, docs, limit);
|
||||||
|
if (reranked) {
|
||||||
|
finalIds = reranked.map(r => finalIds[r.index]);
|
||||||
|
searchLabel = 'hybrid+rerank';
|
||||||
|
} else {
|
||||||
|
finalIds = finalIds.slice(0, limit);
|
||||||
|
}
|
||||||
|
} else if (vectorIds.length > 0) {
|
||||||
|
finalIds = vectorIds;
|
||||||
|
searchLabel = 'vector';
|
||||||
|
} else if (keywordIds.length > 0) {
|
||||||
|
finalIds = keywordIds;
|
||||||
|
searchLabel = embeddingFailed ? 'keyword (embedding unavailable)' : 'keyword';
|
||||||
|
} else {
|
||||||
|
return 'No matching transcripts found';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format output
|
||||||
|
const lines = [`Session transcripts (${searchLabel}, ${finalIds.length} results):\n`];
|
||||||
|
for (const id of finalIds) {
|
||||||
|
const r = vectorRows.get(id) || keywordRows.get(id);
|
||||||
|
if (!r) continue;
|
||||||
|
|
||||||
|
const scoreParts: string[] = [];
|
||||||
|
if (vectorRows.has(id)) scoreParts.push(`${Math.round(vectorRows.get(id)!.similarity! * 100)}% semantic`);
|
||||||
|
if (keywordRows.has(id)) scoreParts.push(`rank: ${keywordRows.get(id)!.rank!.toFixed(3)}`);
|
||||||
|
const scores = scoreParts.length > 0 ? ` (${scoreParts.join(', ')})` : '';
|
||||||
|
|
||||||
|
const issueLink = r.session_issue_key
|
||||||
|
? `[${r.session_issue_key}](https://agiliton.atlassian.net/browse/${r.session_issue_key})`
|
||||||
|
: 'unlinked';
|
||||||
|
const tools = r.tool_names?.slice(0, 5).join(', ') || 'none';
|
||||||
|
|
||||||
|
lines.push(`**#${r.id}** ${issueLink} — ${r.project_key} (${r.git_branch || 'no-branch'})${scores}`);
|
||||||
|
lines.push(` ${r.started_at || 'unknown date'} | ${r.message_count} msgs | Tools: ${tools}`);
|
||||||
|
if (r.snippet) {
|
||||||
|
lines.push(` > ${r.snippet.replace(/\n/g, ' ').substring(0, 150)}`);
|
||||||
|
}
|
||||||
|
lines.push('');
|
||||||
|
}
|
||||||
|
|
||||||
|
return lines.join('\n');
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user