feat(CF-1315): Hybrid search with tsvector + RRF
Add PostgreSQL full-text search alongside pgvector for exact matches on Jira keys, error messages, file paths. Merge results with Reciprocal Rank Fusion. Default mode: hybrid, with graceful degradation to keyword-only when embeddings unavailable. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
53
migrations/034_hybrid_search.sql
Normal file
53
migrations/034_hybrid_search.sql
Normal file
@@ -0,0 +1,53 @@
|
||||
-- CF-1315: Hybrid search - tsvector columns, GIN indexes, triggers
|
||||
|
||||
-- 1. Add search_vector columns
|
||||
ALTER TABLE project_archives ADD COLUMN IF NOT EXISTS search_vector tsvector;
|
||||
ALTER TABLE memories ADD COLUMN IF NOT EXISTS search_vector tsvector;
|
||||
ALTER TABLE sessions ADD COLUMN IF NOT EXISTS search_vector tsvector;
|
||||
|
||||
-- 2. GIN indexes for fast full-text search
|
||||
CREATE INDEX IF NOT EXISTS idx_archives_search_vector ON project_archives USING gin(search_vector);
|
||||
CREATE INDEX IF NOT EXISTS idx_memories_search_vector ON memories USING gin(search_vector);
|
||||
CREATE INDEX IF NOT EXISTS idx_sessions_search_vector ON sessions USING gin(search_vector);
|
||||
|
||||
-- 3. Triggers to auto-populate search_vector on INSERT/UPDATE
|
||||
CREATE OR REPLACE FUNCTION update_archives_search_vector() RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.search_vector := to_tsvector('english', coalesce(NEW.title, '') || ' ' || coalesce(NEW.content, ''));
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION update_memories_search_vector() RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.search_vector := to_tsvector('english', coalesce(NEW.title, '') || ' ' || coalesce(NEW.content, ''));
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION update_sessions_search_vector() RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.search_vector := to_tsvector('english', coalesce(NEW.summary, ''));
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
DROP TRIGGER IF EXISTS trg_archives_search_vector ON project_archives;
|
||||
CREATE TRIGGER trg_archives_search_vector
|
||||
BEFORE INSERT OR UPDATE OF title, content ON project_archives
|
||||
FOR EACH ROW EXECUTE FUNCTION update_archives_search_vector();
|
||||
|
||||
DROP TRIGGER IF EXISTS trg_memories_search_vector ON memories;
|
||||
CREATE TRIGGER trg_memories_search_vector
|
||||
BEFORE INSERT OR UPDATE OF title, content ON memories
|
||||
FOR EACH ROW EXECUTE FUNCTION update_memories_search_vector();
|
||||
|
||||
DROP TRIGGER IF EXISTS trg_sessions_search_vector ON sessions;
|
||||
CREATE TRIGGER trg_sessions_search_vector
|
||||
BEFORE INSERT OR UPDATE OF summary ON sessions
|
||||
FOR EACH ROW EXECUTE FUNCTION update_sessions_search_vector();
|
||||
|
||||
-- 4. Backfill existing rows (no-op if tables empty, safe to re-run)
|
||||
UPDATE project_archives SET search_vector = to_tsvector('english', coalesce(title, '') || ' ' || coalesce(content, '')) WHERE search_vector IS NULL;
|
||||
UPDATE memories SET search_vector = to_tsvector('english', coalesce(title, '') || ' ' || coalesce(content, '')) WHERE search_vector IS NULL;
|
||||
UPDATE sessions SET search_vector = to_tsvector('english', coalesce(summary, '')) WHERE search_vector IS NULL AND summary IS NOT NULL;
|
||||
Reference in New Issue
Block a user