feat(CF-1314): Content hashing to prevent duplicate embeddings

SHA-256 hash check before embedding API call eliminates ~60-80% of
redundant embedding requests. Consolidates dual INSERT paths to single
INSERT with nullable embedding column.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-18 08:28:11 +02:00
parent 77097ac65f
commit 1f499bd926
6 changed files with 127 additions and 81 deletions

View File

@@ -2,7 +2,7 @@
// Sessions auto-create CF Jira issues and post output on close (CF-762)
import { query, queryOne, execute } from '../db.js';
import { getEmbedding, formatEmbedding } from '../embeddings.js';
import { getEmbedding, formatEmbedding, generateContentHash } from '../embeddings.js';
import { createSessionIssue, addComment, transitionToDone, updateIssueDescription } from '../services/jira.js';
interface SessionStartArgs {
@@ -157,32 +157,24 @@ export async function sessionUpdate(args: SessionUpdateArgs): Promise<string> {
export async function sessionEnd(args: SessionEndArgs): Promise<string> {
const { session_id, summary, status = 'completed' } = args;
// CF-1314: Store content hash alongside embedding
const contentHash = generateContentHash(summary);
// Generate embedding for semantic search
const embedding = await getEmbedding(summary);
const embeddingValue = embedding ? formatEmbedding(embedding) : null;
if (embeddingValue) {
await execute(
`UPDATE sessions
SET ended_at = NOW(),
summary = $1,
embedding = $2,
status = $3,
updated_at = NOW()
WHERE id = $4`,
[summary, embeddingValue, status, session_id]
);
} else {
await execute(
`UPDATE sessions
SET ended_at = NOW(),
summary = $1,
status = $2,
updated_at = NOW()
WHERE id = $3`,
[summary, status, session_id]
);
}
await execute(
`UPDATE sessions
SET ended_at = NOW(),
summary = $1,
embedding = $2,
status = $3,
content_hash = $4,
updated_at = NOW()
WHERE id = $5`,
[summary, embeddingValue, status, contentHash, session_id]
);
// Get session details
const session = await queryOne<Session & { jira_issue_key: string | null }>(