Files
session-mcp/migrations/033_content_hash_dedup.sql
Christian Gick 1f499bd926 feat(CF-1314): Content hashing to prevent duplicate embeddings
SHA-256 hash check before embedding API call eliminates ~60-80% of
redundant embedding requests. Consolidates dual INSERT paths to single
INSERT with nullable embedding column.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-18 08:28:11 +02:00

21 lines
1.2 KiB
SQL

-- CF-1314: Content hashing to prevent duplicate embeddings
-- Adds content_hash column to all embedding tables for dedup before API call
-- Adds source_id columns for future CF-1315 hybrid search
ALTER TABLE project_archives ADD COLUMN IF NOT EXISTS content_hash TEXT;
ALTER TABLE project_archives ADD COLUMN IF NOT EXISTS source_id TEXT;
ALTER TABLE memories ADD COLUMN IF NOT EXISTS content_hash TEXT;
ALTER TABLE memories ADD COLUMN IF NOT EXISTS source_id TEXT;
ALTER TABLE session_notes ADD COLUMN IF NOT EXISTS content_hash TEXT;
ALTER TABLE session_plans ADD COLUMN IF NOT EXISTS content_hash TEXT;
ALTER TABLE sessions ADD COLUMN IF NOT EXISTS content_hash TEXT;
CREATE INDEX IF NOT EXISTS idx_archives_content_hash ON project_archives(content_hash);
CREATE INDEX IF NOT EXISTS idx_memories_content_hash ON memories(content_hash);
CREATE INDEX IF NOT EXISTS idx_session_notes_content_hash ON session_notes(content_hash);
CREATE INDEX IF NOT EXISTS idx_session_plans_content_hash ON session_plans(content_hash);
CREATE INDEX IF NOT EXISTS idx_sessions_content_hash ON sessions(content_hash);
CREATE INDEX IF NOT EXISTS idx_archives_source_id ON project_archives(source_id);
CREATE INDEX IF NOT EXISTS idx_memories_source_id ON memories(source_id);