feat(CF-1315): Hybrid search with tsvector + RRF
Add PostgreSQL full-text search alongside pgvector for exact matches on Jira keys, error messages, file paths. Merge results with Reciprocal Rank Fusion. Default mode: hybrid, with graceful degradation to keyword-only when embeddings unavailable. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
// Replaces file-based CLAUDE.md and plan files with database storage
|
||||
|
||||
import { query, queryOne, execute } from '../db.js';
|
||||
import { getEmbedding, formatEmbedding, generateContentHash } from '../embeddings.js';
|
||||
import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge } from '../embeddings.js';
|
||||
import { getSessionId } from './session-id.js';
|
||||
|
||||
// ============================================================================
|
||||
@@ -451,10 +451,13 @@ export async function sessionDocumentationGenerate(args: SessionDocumentationGen
|
||||
// SEMANTIC SEARCH & ANALYTICS
|
||||
// ============================================================================
|
||||
|
||||
type SearchMode = 'hybrid' | 'vector' | 'keyword';
|
||||
|
||||
interface SessionSemanticSearchArgs {
|
||||
query: string;
|
||||
project?: string;
|
||||
limit?: number;
|
||||
search_mode?: SearchMode;
|
||||
}
|
||||
|
||||
interface SessionSearchResult {
|
||||
@@ -467,60 +470,96 @@ interface SessionSearchResult {
|
||||
}
|
||||
|
||||
/**
|
||||
* Semantic search across all session documentation
|
||||
* Uses vector similarity to find related sessions
|
||||
* Semantic search across all session documentation with hybrid/vector/keyword modes (CF-1315)
|
||||
*/
|
||||
export async function sessionSemanticSearch(args: SessionSemanticSearchArgs): Promise<SessionSearchResult[]> {
|
||||
const { query: searchQuery, project, limit = 10 } = args;
|
||||
const { query: searchQuery, project, limit = 10, search_mode = 'hybrid' } = args;
|
||||
|
||||
// Generate embedding for search query
|
||||
const queryEmbedding = await getEmbedding(searchQuery);
|
||||
// Build shared filter clause
|
||||
const buildFilter = (startIdx: number) => {
|
||||
let where = '';
|
||||
const params: unknown[] = [];
|
||||
let idx = startIdx;
|
||||
if (project) {
|
||||
where += ` AND s.project = $${idx++}`;
|
||||
params.push(project);
|
||||
}
|
||||
return { where, params, nextIdx: idx };
|
||||
};
|
||||
|
||||
if (!queryEmbedding) {
|
||||
// Fallback to text search if embedding generation fails
|
||||
let sql = `
|
||||
SELECT
|
||||
s.id as session_id,
|
||||
s.session_number,
|
||||
s.project,
|
||||
s.summary,
|
||||
s.started_at,
|
||||
0.5 as similarity
|
||||
FROM sessions s
|
||||
WHERE s.summary IS NOT NULL
|
||||
AND s.status = 'completed'
|
||||
${project ? 'AND s.project = $1' : ''}
|
||||
AND s.summary ILIKE $${project ? '2' : '1'}
|
||||
ORDER BY s.started_at DESC
|
||||
LIMIT $${project ? '3' : '2'}
|
||||
`;
|
||||
// Vector search
|
||||
let vectorIds: string[] = [];
|
||||
let vectorRows: Map<string, SessionSearchResult> = new Map();
|
||||
let embeddingFailed = false;
|
||||
|
||||
const params: unknown[] = project ? [project, `%${searchQuery}%`, limit] : [`%${searchQuery}%`, limit];
|
||||
const results = await query<SessionSearchResult>(sql, params);
|
||||
return results;
|
||||
if (search_mode !== 'keyword') {
|
||||
const queryEmbedding = await getEmbedding(searchQuery);
|
||||
if (queryEmbedding) {
|
||||
const embeddingFormatted = formatEmbedding(queryEmbedding);
|
||||
const filter = buildFilter(3);
|
||||
const params: unknown[] = [embeddingFormatted, limit, ...filter.params];
|
||||
|
||||
const rows = await query<SessionSearchResult>(
|
||||
`SELECT s.id as session_id, s.session_number, s.project, s.summary, s.started_at,
|
||||
1 - (s.embedding <=> $1) as similarity
|
||||
FROM sessions s
|
||||
WHERE s.embedding IS NOT NULL AND s.status = 'completed'${filter.where}
|
||||
ORDER BY s.embedding <=> $1
|
||||
LIMIT $2`,
|
||||
params
|
||||
);
|
||||
vectorIds = rows.map(r => r.session_id);
|
||||
for (const r of rows) vectorRows.set(r.session_id, r);
|
||||
} else {
|
||||
embeddingFailed = true;
|
||||
if (search_mode === 'vector') {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const embeddingFormatted = formatEmbedding(queryEmbedding);
|
||||
// Keyword search
|
||||
let keywordIds: string[] = [];
|
||||
let keywordRows: Map<string, SessionSearchResult> = new Map();
|
||||
|
||||
// Vector similarity search
|
||||
let sql = `
|
||||
SELECT
|
||||
s.id as session_id,
|
||||
s.session_number,
|
||||
s.project,
|
||||
s.summary,
|
||||
s.started_at,
|
||||
1 - (s.embedding <=> $1) as similarity
|
||||
FROM sessions s
|
||||
WHERE s.embedding IS NOT NULL
|
||||
${project ? 'AND s.project = $2' : ''}
|
||||
AND s.status = 'completed'
|
||||
ORDER BY s.embedding <=> $1
|
||||
LIMIT $${project ? '3' : '2'}
|
||||
`;
|
||||
if (search_mode !== 'vector') {
|
||||
const filter = buildFilter(3);
|
||||
const params: unknown[] = [searchQuery, limit, ...filter.params];
|
||||
|
||||
const params: unknown[] = project ? [embeddingFormatted, project, limit] : [embeddingFormatted, limit];
|
||||
const results = await query<SessionSearchResult>(sql, params);
|
||||
const rows = await query<SessionSearchResult & { rank: number }>(
|
||||
`SELECT s.id as session_id, s.session_number, s.project, s.summary, s.started_at,
|
||||
ts_rank(s.search_vector, plainto_tsquery('english', $1)) as similarity
|
||||
FROM sessions s
|
||||
WHERE s.search_vector @@ plainto_tsquery('english', $1)
|
||||
AND s.status = 'completed'${filter.where}
|
||||
ORDER BY similarity DESC
|
||||
LIMIT $2`,
|
||||
params
|
||||
);
|
||||
keywordIds = rows.map(r => r.session_id);
|
||||
for (const r of rows) keywordRows.set(r.session_id, r);
|
||||
}
|
||||
|
||||
// Merge results
|
||||
let finalIds: string[];
|
||||
|
||||
if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) {
|
||||
const merged = rrfMerge(vectorIds, keywordIds);
|
||||
finalIds = merged.slice(0, limit).map(m => m.id as string);
|
||||
} else if (vectorIds.length > 0) {
|
||||
finalIds = vectorIds;
|
||||
} else if (keywordIds.length > 0) {
|
||||
finalIds = keywordIds;
|
||||
} else {
|
||||
return [];
|
||||
}
|
||||
|
||||
// Build final results preserving original similarity scores
|
||||
const results: SessionSearchResult[] = [];
|
||||
for (const id of finalIds) {
|
||||
const r = vectorRows.get(id) || keywordRows.get(id);
|
||||
if (r) results.push(r);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user