diff --git a/src/llm.js b/src/llm.js index 0351657..db8d42e 100644 --- a/src/llm.js +++ b/src/llm.js @@ -5,7 +5,10 @@ const LITELLM_URL = process.env.LITELLM_URL ?? 'http://llm:4000'; const LITELLM_KEY = process.env.LITELLM_API_KEY ?? ''; const TRANSLATE_MODEL = process.env.TRANSLATE_MODEL ?? 'gemini-2.5-flash'; -const EMBED_MODEL = process.env.TRANSLATE_EMBED_MODEL ?? 'mxbai-embed-large'; +const EMBED_MODEL = process.env.TRANSLATE_EMBED_MODEL ?? 'text-embedding-3-small'; +// SmartTranslate translation_memory.embedding is vector(1024); text-embedding-3-* +// supports runtime dimension reduction via the `dimensions` parameter. +const EMBED_DIMENSIONS = parseInt(process.env.TRANSLATE_EMBED_DIMENSIONS ?? '1024'); function headers() { const h = { 'Content-Type': 'application/json' }; @@ -17,7 +20,7 @@ export async function embed(text) { const r = await fetch(`${LITELLM_URL}/v1/embeddings`, { method: 'POST', headers: headers(), - body: JSON.stringify({ model: EMBED_MODEL, input: text }), + body: JSON.stringify({ model: EMBED_MODEL, input: text, dimensions: EMBED_DIMENSIONS }), }); if (!r.ok) throw new Error(`embed ${r.status}: ${await r.text()}`); const j = await r.json();