SHA-256 hash check before embedding API call eliminates ~60-80% of redundant embedding requests. Consolidates dual INSERT paths to single INSERT with nullable embedding column. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
21 lines
1.2 KiB
SQL
21 lines
1.2 KiB
SQL
-- CF-1314: Content hashing to prevent duplicate embeddings
|
|
-- Adds content_hash column to all embedding tables for dedup before API call
|
|
-- Adds source_id columns for future CF-1315 hybrid search
|
|
|
|
ALTER TABLE project_archives ADD COLUMN IF NOT EXISTS content_hash TEXT;
|
|
ALTER TABLE project_archives ADD COLUMN IF NOT EXISTS source_id TEXT;
|
|
ALTER TABLE memories ADD COLUMN IF NOT EXISTS content_hash TEXT;
|
|
ALTER TABLE memories ADD COLUMN IF NOT EXISTS source_id TEXT;
|
|
ALTER TABLE session_notes ADD COLUMN IF NOT EXISTS content_hash TEXT;
|
|
ALTER TABLE session_plans ADD COLUMN IF NOT EXISTS content_hash TEXT;
|
|
ALTER TABLE sessions ADD COLUMN IF NOT EXISTS content_hash TEXT;
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_archives_content_hash ON project_archives(content_hash);
|
|
CREATE INDEX IF NOT EXISTS idx_memories_content_hash ON memories(content_hash);
|
|
CREATE INDEX IF NOT EXISTS idx_session_notes_content_hash ON session_notes(content_hash);
|
|
CREATE INDEX IF NOT EXISTS idx_session_plans_content_hash ON session_plans(content_hash);
|
|
CREATE INDEX IF NOT EXISTS idx_sessions_content_hash ON sessions(content_hash);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_archives_source_id ON project_archives(source_id);
|
|
CREATE INDEX IF NOT EXISTS idx_memories_source_id ON memories(source_id);
|