embedding.mjs
430 lines 17.5 KB
Raw
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 1 day ago
1 /**
2 * Embedding provider abstraction. Ollama, OpenAI, Voyage AI, or DeepInfra from config.
3 * SPEC §4.4: embedding.provider, embedding.model; env for API keys.
4 *
5 * DeepInfra (OpenAI-compatible): same single DEEPINFRA_API_KEY can drive chat
6 * (lib/llm-complete.mjs) and embeddings here. Default model BAAI/bge-large-en-v1.5
7 * (1024 dim). Switching dimension requires a vault re-index.
8 */
9
10 const OLLAMA_DEFAULT_URL = 'http://localhost:11434';
11 const OPENAI_EMBED_URL = 'https://api.openai.com/v1/embeddings';
12 const VOYAGE_EMBED_URL = 'https://api.voyageai.com/v1/embeddings';
13 const DEEPINFRA_EMBED_URL = 'https://api.deepinfra.com/v1/openai/embeddings';
14
15 /**
16 * Turn Undici/Node `fetch` failures (often message-only "fetch failed") into an actionable Hub/API error.
17 * @param {'ollama'|'openai'|'voyage'|'deepinfra'} provider
18 * @param {string} endpointDescription - Ollama base URL or short label for OpenAI
19 * @param {string} model
20 * @param {unknown} err
21 * @returns {string}
22 */
23 export function formatEmbeddingFetchFailure(provider, endpointDescription, model, err) {
24 const raw = err && typeof err === 'object' && 'message' in err && err.message != null ? String(err.message) : String(err);
25 const bits = [raw];
26 if (err && typeof err === 'object' && 'cause' in err && err.cause != null) {
27 const c = err.cause;
28 if (c && typeof c === 'object' && 'message' in c && c.message != null) bits.push(String(c.message));
29 if (c && typeof c === 'object' && 'code' in c && c.code != null) bits.push(`code=${String(c.code)}`);
30 }
31 const detail = bits.filter(Boolean).join(' — ');
32 const m = String(model || '').trim() || 'nomic-embed-text';
33 if (provider === 'ollama') {
34 return (
35 `Ollama embeddings unreachable at ${endpointDescription} (${detail}). ` +
36 `For Meaning search, start Ollama (\`ollama serve\`), run \`ollama pull ${m}\`, and confirm the URL in config/env ` +
37 `(\`OLLAMA_URL\` / \`embedding.ollama_url\`). If \`localhost\` fails, try \`http://127.0.0.1:11434\` (IPv6 vs IPv4). ` +
38 `Alternatively set \`EMBEDDING_PROVIDER=openai\` and \`OPENAI_API_KEY\`, or \`EMBEDDING_PROVIDER=voyage\` and \`VOYAGE_API_KEY\`.`
39 );
40 }
41 if (provider === 'voyage') {
42 return (
43 `Voyage embeddings unreachable (${detail}). ` +
44 `Set \`VOYAGE_API_KEY\`, confirm \`embedding.provider: voyage\` / \`EMBEDDING_PROVIDER=voyage\`, and model (e.g. voyage-4-lite). ` +
45 `See https://docs.voyageai.com/docs/embeddings. After switching provider or dimension, re-index the vault.`
46 );
47 }
48 if (provider === 'deepinfra') {
49 return (
50 `DeepInfra embeddings unreachable (${detail}). ` +
51 `Set \`DEEPINFRA_API_KEY\`, confirm \`embedding.provider: deepinfra\` / \`EMBEDDING_PROVIDER=deepinfra\`, and model ` +
52 `(e.g. ${JSON.stringify(m)}). See https://deepinfra.com/docs/embeddings. After switching provider or dimension, re-index the vault.`
53 );
54 }
55 return (
56 `OpenAI embeddings request failed (${detail}). ` +
57 `Check \`OPENAI_API_KEY\`, network access to api.openai.com, and model ${JSON.stringify(m)}.`
58 );
59 }
60
61 /**
62 * Rough embedding input-token estimate (~4 chars per token) for providers that do not return usage (e.g. Ollama).
63 * @param {string[]} texts
64 * @returns {number}
65 */
66 export function estimateEmbeddingInputTokens(texts) {
67 let n = 0;
68 for (const t of texts) {
69 const s = typeof t === 'string' ? t : '';
70 n += Math.ceil(s.length / 4);
71 }
72 return n;
73 }
74
75 /**
76 * Normalize and validate Ollama API base URL so fetch() never receives a relative or malformed URL
77 * (Undici throws TypeError "Invalid URL" with no context).
78 * @param {string|null|undefined} urlInput - From config or env; null/empty uses default localhost.
79 * @returns {string} Base URL without trailing slash
80 */
81 export function normalizeOllamaEmbedBaseUrl(urlInput) {
82 const raw = urlInput == null || urlInput === '' ? OLLAMA_DEFAULT_URL : String(urlInput);
83 const trimmed = raw.trim();
84 if (!trimmed) {
85 throw new Error(
86 'Ollama embed base URL is empty after trim. Set OLLAMA_URL to an absolute http(s) URL ' +
87 '(e.g. https://your-ollama-host:11434). On Netlify/serverless use EMBEDDING_PROVIDER=openai and OPENAI_API_KEY.'
88 );
89 }
90 // Node's URL() accepts "host:port" as a non-http "protocol" — reject missing scheme explicitly.
91 if (!/^https?:\/\//i.test(trimmed)) {
92 throw new Error(
93 `Ollama base URL must be an absolute http(s) URL starting with http:// or https://; got ${JSON.stringify(raw)}. ` +
94 'Examples: http://localhost:11434 (local Hub only), https://ollama.example.com:11434'
95 );
96 }
97 let u;
98 try {
99 u = new URL(trimmed);
100 } catch {
101 throw new Error(
102 `Ollama base URL is not a valid URL; got ${JSON.stringify(raw)}. ` +
103 'Examples: http://localhost:11434, https://ollama.example.com:11434'
104 );
105 }
106 if (u.protocol !== 'http:' && u.protocol !== 'https:') {
107 throw new Error(`Ollama base URL must use http or https; got protocol ${u.protocol} for ${u.href}`);
108 }
109 return u.toString().replace(/\/$/, '');
110 }
111
112 /**
113 * @typedef {{ voyageInputType?: 'query'|'document' }} EmbedOptions
114 * Voyage retrieval: pass `voyageInputType: 'query'` for search queries and `'document'` for index chunks (recommended).
115 */
116
117 /**
118 * Embed one or many texts. Returns array of vectors (same order as input).
119 * @param {string[]} texts
120 * @param {{ provider: string, model: string, ollama_url?: string }} config - From loadConfig().embedding
121 * @param {EmbedOptions} [options]
122 * @returns {Promise<number[][]>}
123 */
124 export async function embed(texts, config, options = {}) {
125 const { vectors } = await embedWithUsage(texts, config, options);
126 return vectors;
127 }
128
129 /**
130 * Same as {@link embed} but returns **embedding_input_tokens** for billing (OpenAI: API `usage.prompt_tokens`; Ollama: estimate).
131 * @param {string[]} texts
132 * @param {{ provider: string, model: string, ollama_url?: string }} config
133 * @param {EmbedOptions} [options]
134 * @returns {Promise<{ vectors: number[][], embedding_input_tokens: number }>}
135 */
136 export async function embedWithUsage(texts, config, options = {}) {
137 if (!texts.length) return { vectors: [], embedding_input_tokens: 0 };
138 const provider = String(config?.provider || 'ollama').trim().toLowerCase();
139 let model =
140 config?.model != null && String(config.model).trim() !== '' ? String(config.model).trim() : null;
141 if (model == null) {
142 if (provider === 'openai') model = 'text-embedding-3-small';
143 else if (provider === 'voyage') model = 'voyage-4-lite';
144 else if (provider === 'deepinfra') model = 'BAAI/bge-large-en-v1.5';
145 else model = 'nomic-embed-text';
146 }
147
148 if (provider === 'ollama') {
149 return embedOllamaWithUsage(texts, { model, url: config?.ollama_url || OLLAMA_DEFAULT_URL });
150 }
151 if (provider === 'openai') {
152 return embedOpenAIWithUsage(texts, { model, apiKey: process.env.OPENAI_API_KEY });
153 }
154 if (provider === 'voyage') {
155 const inputType = options?.voyageInputType === 'query' || options?.voyageInputType === 'document' ? options.voyageInputType : undefined;
156 return embedVoyageWithUsage(texts, { model, apiKey: process.env.VOYAGE_API_KEY, inputType });
157 }
158 if (provider === 'deepinfra') {
159 return embedDeepInfraWithUsage(texts, { model, apiKey: process.env.DEEPINFRA_API_KEY });
160 }
161 throw new Error(`Unknown embedding provider: ${provider}. Supported: ollama, openai, voyage, deepinfra.`);
162 }
163
164 /**
165 * Default backoff before retrying a single 429. Exported so tests can keep wall time low
166 * by wrapping `embedDeepInfraWithUsage` with a smaller `sleepFn`. The bridge index path
167 * runs on Netlify Functions where every retry costs against the 60s sync cap, so we keep
168 * the retry budget intentionally small (one retry; second 429 surfaces as an error).
169 */
170 export const DEEPINFRA_429_BACKOFF_DEFAULT_MS = 1000;
171 export const DEEPINFRA_429_BACKOFF_MAX_MS = 5000;
172
173 /**
174 * Parse a fetch-Response `Retry-After` header. Spec allows seconds (integer) or HTTP-date.
175 * We support seconds and fall back to the default if absent or unparseable.
176 *
177 * @param {string|null|undefined} headerValue
178 * @param {number} defaultMs
179 * @returns {number} milliseconds to wait before retrying
180 */
181 export function retryAfterHeaderMs(headerValue, defaultMs = DEEPINFRA_429_BACKOFF_DEFAULT_MS) {
182 if (headerValue == null || headerValue === '') return defaultMs;
183 const trimmed = String(headerValue).trim();
184 // Pure integer (seconds) is the dominant case from DeepInfra/OpenAI.
185 if (/^\d+$/.test(trimmed)) {
186 const sec = parseInt(trimmed, 10);
187 if (!Number.isFinite(sec) || sec < 0) return defaultMs;
188 const ms = sec * 1000;
189 return Math.min(Math.max(ms, defaultMs), DEEPINFRA_429_BACKOFF_MAX_MS);
190 }
191 // HTTP-date fallback. Cap to MAX so a "1 hour" header does not strand a function.
192 const t = Date.parse(trimmed);
193 if (!Number.isFinite(t)) return defaultMs;
194 const ms = t - Date.now();
195 if (!Number.isFinite(ms) || ms <= 0) return defaultMs;
196 return Math.min(Math.max(ms, defaultMs), DEEPINFRA_429_BACKOFF_MAX_MS);
197 }
198
199 /**
200 * @param {string[]} texts
201 * @param {{ model: string, url: string }}
202 * @returns {Promise<number[][]>}
203 */
204 async function embedOllamaWithUsage(texts, { model, url }) {
205 const base = normalizeOllamaEmbedBaseUrl(url);
206 const apiKey = process.env.OLLAMA_API_KEY;
207 const headers = { 'Content-Type': 'application/json' };
208 if (apiKey) headers['Authorization'] = 'Bearer ' + apiKey;
209 const out = [];
210 let embedding_input_tokens = 0;
211 // Ollama /api/embed accepts one prompt; for batch we call per text (or check if array is supported)
212 for (const text of texts) {
213 embedding_input_tokens += estimateEmbeddingInputTokens([text]);
214 let res;
215 try {
216 res = await fetch(`${base}/api/embed`, {
217 method: 'POST',
218 headers,
219 body: JSON.stringify({ model, input: text }),
220 });
221 } catch (e) {
222 throw new Error(formatEmbeddingFetchFailure('ollama', base, model, e));
223 }
224 if (!res.ok) {
225 const err = await res.text();
226 throw new Error(`Ollama embed failed (${res.status}): ${err}`);
227 }
228 const data = await res.json();
229 if (data.embeddings && data.embeddings[0]) {
230 out.push(data.embeddings[0]);
231 } else if (Array.isArray(data.embedding)) {
232 out.push(data.embedding);
233 } else {
234 throw new Error('Ollama embed response missing embeddings');
235 }
236 }
237 return { vectors: out, embedding_input_tokens };
238 }
239
240 /**
241 * @param {string[]} texts
242 * @param {{ model: string, apiKey?: string }}
243 * @returns {Promise<number[][]>}
244 */
245 async function embedOpenAIWithUsage(texts, { model, apiKey }) {
246 if (!apiKey) {
247 throw new Error('OpenAI embeddings require OPENAI_API_KEY environment variable.');
248 }
249 let res;
250 try {
251 res = await fetch(OPENAI_EMBED_URL, {
252 method: 'POST',
253 headers: {
254 'Content-Type': 'application/json',
255 Authorization: `Bearer ${apiKey}`,
256 },
257 body: JSON.stringify({ model, input: texts }),
258 });
259 } catch (e) {
260 throw new Error(formatEmbeddingFetchFailure('openai', OPENAI_EMBED_URL, model, e));
261 }
262 if (!res.ok) {
263 const err = await res.text();
264 throw new Error(`OpenAI embed failed (${res.status}): ${err}`);
265 }
266 const data = await res.json();
267 const byIndex = (data.data || []).slice().sort((a, b) => (a.index ?? 0) - (b.index ?? 0));
268 const vectors = byIndex.map((d) => d.embedding);
269 let embedding_input_tokens = 0;
270 if (data.usage && typeof data.usage.prompt_tokens === 'number') {
271 embedding_input_tokens = data.usage.prompt_tokens;
272 } else {
273 embedding_input_tokens = estimateEmbeddingInputTokens(texts);
274 }
275 return { vectors, embedding_input_tokens };
276 }
277
278 /**
279 * @param {string[]} texts
280 * @param {{ model: string, apiKey?: string, inputType?: 'query'|'document' }} opts
281 */
282 async function embedVoyageWithUsage(texts, { model, apiKey, inputType }) {
283 if (!apiKey || !String(apiKey).trim()) {
284 throw new Error('Voyage embeddings require VOYAGE_API_KEY environment variable.');
285 }
286 const body = {
287 model,
288 input: texts.length === 1 ? texts[0] : texts,
289 ...(inputType ? { input_type: inputType } : {}),
290 };
291 let res;
292 try {
293 res = await fetch(VOYAGE_EMBED_URL, {
294 method: 'POST',
295 headers: {
296 'Content-Type': 'application/json',
297 Authorization: `Bearer ${apiKey}`,
298 },
299 body: JSON.stringify(body),
300 });
301 } catch (e) {
302 throw new Error(formatEmbeddingFetchFailure('voyage', VOYAGE_EMBED_URL, model, e));
303 }
304 if (!res.ok) {
305 const err = await res.text();
306 throw new Error(`Voyage embed failed (${res.status}): ${err}`);
307 }
308 const data = await res.json();
309 const byIndex = (data.data || []).slice().sort((a, b) => (a.index ?? 0) - (b.index ?? 0));
310 const vectors = byIndex.map((d) => d.embedding);
311 let embedding_input_tokens = 0;
312 if (data.usage && typeof data.usage.total_tokens === 'number') {
313 embedding_input_tokens = data.usage.total_tokens;
314 } else {
315 embedding_input_tokens = estimateEmbeddingInputTokens(texts);
316 }
317 return { vectors, embedding_input_tokens };
318 }
319
320 /**
321 * @param {string[]} texts
322 * @param {{
323 * model: string,
324 * apiKey?: string,
325 * fetchImpl?: typeof fetch,
326 * sleepFn?: (ms: number) => Promise<void>,
327 * maxRetries?: number,
328 * }} opts
329 * DeepInfra OpenAI-compatible embeddings: same wire format as OpenAI, different host + key.
330 *
331 * 429 handling: bridge index runs concurrent embed calls (`lib/parallel-embed-pool.mjs`).
332 * If we accidentally exceed DeepInfra's per-second limit, we want a short backoff + one
333 * retry (driven by the `Retry-After` header when present) so a transient burst does not
334 * fail an entire vault re-index. A second 429 surfaces as an error and gets reported to
335 * the user; we deliberately do not retry indefinitely because Netlify's 60s sync-function
336 * cap leaves no room for exponential-backoff multi-minute waits.
337 */
338 export async function embedDeepInfraWithUsage(
339 texts,
340 { model, apiKey, fetchImpl, sleepFn, maxRetries } = {},
341 ) {
342 if (!apiKey || !String(apiKey).trim()) {
343 throw new Error('DeepInfra embeddings require DEEPINFRA_API_KEY environment variable.');
344 }
345 const doFetch = typeof fetchImpl === 'function' ? fetchImpl : fetch;
346 const doSleep =
347 typeof sleepFn === 'function' ? sleepFn : (ms) => new Promise((r) => setTimeout(r, ms));
348 const retryBudget = Number.isFinite(maxRetries) && maxRetries >= 0 ? Math.floor(maxRetries) : 1;
349
350 let attempt = 0;
351 // Loop bounded by retryBudget; each non-429 outcome (success or other error) returns/throws.
352 while (true) {
353 let res;
354 try {
355 res = await doFetch(DEEPINFRA_EMBED_URL, {
356 method: 'POST',
357 headers: {
358 'Content-Type': 'application/json',
359 Authorization: `Bearer ${apiKey}`,
360 },
361 body: JSON.stringify({ model, input: texts }),
362 });
363 } catch (e) {
364 throw new Error(formatEmbeddingFetchFailure('deepinfra', DEEPINFRA_EMBED_URL, model, e));
365 }
366 if (res.status === 429 && attempt < retryBudget) {
367 const headerValue =
368 typeof res.headers?.get === 'function' ? res.headers.get('retry-after') : null;
369 const waitMs = retryAfterHeaderMs(headerValue);
370 // Drain body to free the connection so the retry can reuse the keepalive socket.
371 try {
372 await res.text();
373 } catch (_) {}
374 await doSleep(waitMs);
375 attempt++;
376 continue;
377 }
378 if (!res.ok) {
379 const err = await res.text();
380 throw new Error(`DeepInfra embed failed (${res.status}): ${err}`);
381 }
382 const data = await res.json();
383 const byIndex = (data.data || []).slice().sort((a, b) => (a.index ?? 0) - (b.index ?? 0));
384 const vectors = byIndex.map((d) => d.embedding);
385 let embedding_input_tokens = 0;
386 if (data.usage && typeof data.usage.prompt_tokens === 'number') {
387 embedding_input_tokens = data.usage.prompt_tokens;
388 } else if (data.usage && typeof data.usage.total_tokens === 'number') {
389 embedding_input_tokens = data.usage.total_tokens;
390 } else {
391 embedding_input_tokens = estimateEmbeddingInputTokens(texts);
392 }
393 return { vectors, embedding_input_tokens };
394 }
395 }
396
397 /**
398 * Dimension for the configured model (for creating collection). Ollama nomic-embed-text is 768.
399 * @param {{ provider?: string, model?: string }} config
400 * @returns {number}
401 */
402 export function embeddingDimension(config) {
403 const provider = String(config?.provider || 'ollama').trim().toLowerCase();
404 if (provider === 'openai') {
405 // text-embedding-3-small 1536, text-embedding-3-large 3072, ada 1536
406 const m = String(config?.model || '').trim().toLowerCase();
407 if (m.includes('large')) return 3072;
408 return 1536;
409 }
410 if (provider === 'voyage') {
411 const m = String(config?.model || '').trim().toLowerCase();
412 if (m.includes('voyage-3-lite') && !m.includes('3.5')) return 512;
413 if (m.includes('code-2') || (m.includes('large-2') && !m.includes('voyage-3') && !m.includes('voyage-4'))) return 1536;
414 return 1024;
415 }
416 if (provider === 'deepinfra') {
417 // Common DeepInfra embedding models. Default BAAI/bge-large-en-v1.5 is 1024.
418 // Switching dimension requires a vault re-index — see EMBEDDING_MODEL in .env.example.
419 const m = String(config?.model || '').trim().toLowerCase();
420 if (m.includes('qwen3-embedding-8b') || m.includes('bge-en-icl')) return 4096;
421 if (m.includes('qwen3-embedding-4b')) return 2560;
422 if (m.includes('qwen3-embedding-0.6b')) return 1024;
423 if (m.includes('multilingual-e5-large') || m.includes('bge-large') || m.includes('bge-m3')) return 1024;
424 if (m.includes('bge-base') || m.includes('e5-base')) return 768;
425 if (m.includes('bge-small') || m.includes('e5-small')) return 384;
426 return 1024; // safe default for the default model BAAI/bge-large-en-v1.5
427 }
428 // nomic-embed-text and most Ollama embed models
429 return 768;
430 }
File History 2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor 1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 1 day ago