diff --git a/src/index.ts b/src/index.ts index 2316be6..4bd05a9 100644 --- a/src/index.ts +++ b/src/index.ts @@ -81,6 +81,7 @@ import { sessionPatternDetection, } from './tools/session-docs.js'; import { archiveAdd, archiveSearch, archiveList, archiveGet } from './tools/archives.js'; +import { transcriptSearch } from './tools/transcripts.js'; import { projectArchive } from './tools/project-archive.js'; // Create MCP server @@ -454,6 +455,17 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { ); break; + // Transcripts (CF-2394) + case 'session_transcript_search': + result = await transcriptSearch({ + query: a.query, + project: a.project, + session_issue_key: a.session_issue_key, + limit: a.limit, + search_mode: a.search_mode, + }); + break; + // Archives case 'archive_add': result = await archiveAdd({ diff --git a/src/tools/index.ts b/src/tools/index.ts index 43c5a0e..ac9c7c4 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -653,6 +653,23 @@ export const toolDefinitions = [ }, }, + // Transcript Tools (CF-2394) + { + name: 'session_transcript_search', + description: 'Search session transcripts (JSONL) using hybrid (vector + keyword) search. Finds past sessions by content — commands run, decisions made, plans discussed. Use when recovering context from prior sessions.', + inputSchema: { + type: 'object', + properties: { + query: { type: 'string', description: 'Search query (e.g., "hetzner disk resize", "auth migration plan")' }, + project: { type: 'string', description: 'Filter by project key (optional)' }, + session_issue_key: { type: 'string', description: 'Filter by session Jira issue key (optional)' }, + limit: { type: 'number', description: 'Max results (default: 10)' }, + search_mode: { type: 'string', enum: ['hybrid', 'vector', 'keyword'], description: 'Search mode (default: hybrid)' }, + }, + required: ['query'], + }, + }, + // Archive Tools { name: 'archive_add', diff --git a/src/tools/transcripts.ts b/src/tools/transcripts.ts new file mode 100644 index 0000000..11651d6 --- /dev/null +++ b/src/tools/transcripts.ts @@ -0,0 +1,161 @@ +// Session transcript search (CF-2394) + +import { query } from '../db.js'; +import { getEmbedding, formatEmbedding, rrfMerge, rerank } from '../embeddings.js'; + +interface TranscriptSearchArgs { + query: string; + project?: string; + session_issue_key?: string; + limit?: number; + search_mode?: 'hybrid' | 'vector' | 'keyword'; +} + +interface TranscriptRow { + id: number; + session_uuid: string; + session_issue_key: string | null; + project_key: string; + git_branch: string | null; + message_count: number; + tool_names: string[] | null; + started_at: string | null; + similarity?: number; + rank?: number; + snippet?: string; +} + +export async function transcriptSearch(args: TranscriptSearchArgs): Promise { + const { query: searchQuery, project, session_issue_key, limit = 10, search_mode = 'hybrid' } = args; + + const buildFilter = (startIdx: number) => { + let where = ''; + const params: unknown[] = []; + let idx = startIdx; + if (project) { + where += ` AND project_key = $${idx++}`; + params.push(project); + } + if (session_issue_key) { + where += ` AND session_issue_key = $${idx++}`; + params.push(session_issue_key); + } + return { where, params, nextIdx: idx }; + }; + + // Vector search + let vectorIds: number[] = []; + let vectorRows: Map = new Map(); + let embeddingFailed = false; + + if (search_mode !== 'keyword') { + const embedding = await getEmbedding(searchQuery); + if (embedding) { + const embeddingStr = formatEmbedding(embedding); + const filter = buildFilter(3); + const params: unknown[] = [embeddingStr, limit, ...filter.params]; + + const rows = await query( + `SELECT id, session_uuid, session_issue_key, project_key, git_branch, + message_count, tool_names, + to_char(started_at, 'YYYY-MM-DD HH24:MI') as started_at, + 1 - (embedding <=> $1) as similarity + FROM session_transcripts + WHERE embedding IS NOT NULL${filter.where} + ORDER BY embedding <=> $1 + LIMIT $2`, + params + ); + vectorIds = rows.map(r => r.id); + for (const r of rows) vectorRows.set(r.id, r); + } else { + embeddingFailed = true; + if (search_mode === 'vector') { + return 'Error: Could not generate embedding for vector search'; + } + } + } + + // Keyword search + let keywordIds: number[] = []; + let keywordRows: Map = new Map(); + + if (search_mode !== 'vector') { + const filter = buildFilter(3); + const params: unknown[] = [searchQuery, limit, ...filter.params]; + + const rows = await query( + `SELECT id, session_uuid, session_issue_key, project_key, git_branch, + message_count, tool_names, + to_char(started_at, 'YYYY-MM-DD HH24:MI') as started_at, + ts_rank(tsv, plainto_tsquery('english', $1)) as rank, + ts_headline('english', searchable_content, + plainto_tsquery('english', $1), + 'StartSel=**,StopSel=**,MaxWords=25,MinWords=8') as snippet + FROM session_transcripts + WHERE tsv @@ plainto_tsquery('english', $1)${filter.where} + ORDER BY rank DESC + LIMIT $2`, + params + ); + keywordIds = rows.map(r => r.id); + for (const r of rows) keywordRows.set(r.id, r); + } + + // Merge results + let finalIds: number[]; + let searchLabel: string; + + if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) { + const merged = rrfMerge(vectorIds, keywordIds); + finalIds = merged.map(m => m.id as number); + searchLabel = 'hybrid'; + + // Re-rank using snippets + const docs = finalIds.map(id => { + const r = keywordRows.get(id) || vectorRows.get(id); + return r?.snippet || r?.session_issue_key || ''; + }); + const reranked = await rerank(searchQuery, docs, limit); + if (reranked) { + finalIds = reranked.map(r => finalIds[r.index]); + searchLabel = 'hybrid+rerank'; + } else { + finalIds = finalIds.slice(0, limit); + } + } else if (vectorIds.length > 0) { + finalIds = vectorIds; + searchLabel = 'vector'; + } else if (keywordIds.length > 0) { + finalIds = keywordIds; + searchLabel = embeddingFailed ? 'keyword (embedding unavailable)' : 'keyword'; + } else { + return 'No matching transcripts found'; + } + + // Format output + const lines = [`Session transcripts (${searchLabel}, ${finalIds.length} results):\n`]; + for (const id of finalIds) { + const r = vectorRows.get(id) || keywordRows.get(id); + if (!r) continue; + + const scoreParts: string[] = []; + if (vectorRows.has(id)) scoreParts.push(`${Math.round(vectorRows.get(id)!.similarity! * 100)}% semantic`); + if (keywordRows.has(id)) scoreParts.push(`rank: ${keywordRows.get(id)!.rank!.toFixed(3)}`); + const scores = scoreParts.length > 0 ? ` (${scoreParts.join(', ')})` : ''; + + const issueLink = r.session_issue_key + ? `[${r.session_issue_key}](https://agiliton.atlassian.net/browse/${r.session_issue_key})` + : 'unlinked'; + const tools = r.tool_names?.slice(0, 5).join(', ') || 'none'; + + lines.push(`**#${r.id}** ${issueLink} — ${r.project_key} (${r.git_branch || 'no-branch'})${scores}`); + lines.push(` ${r.started_at || 'unknown date'} | ${r.message_count} msgs | Tools: ${tools}`); + if (r.snippet) { + lines.push(` > ${r.snippet.replace(/\n/g, ' ').substring(0, 150)}`); + } + lines.push(''); + } + + return lines.join('\n'); +}