lib/embedding.mjs · aaronrene/knowtation — MuseHub

aaronrene / knowtation public

embedding.mjs

430 lines 17.5 KB

Raw

sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 1 day ago

1	/**
2	* Embedding provider abstraction. Ollama, OpenAI, Voyage AI, or DeepInfra from config.
3	* SPEC §4.4: embedding.provider, embedding.model; env for API keys.
4	*
5	* DeepInfra (OpenAI-compatible): same single DEEPINFRA_API_KEY can drive chat
6	* (lib/llm-complete.mjs) and embeddings here. Default model BAAI/bge-large-en-v1.5
7	* (1024 dim). Switching dimension requires a vault re-index.
8	*/
9
10	const OLLAMA_DEFAULT_URL = 'http://localhost:11434';
11	const OPENAI_EMBED_URL = 'https://api.openai.com/v1/embeddings';
12	const VOYAGE_EMBED_URL = 'https://api.voyageai.com/v1/embeddings';
13	const DEEPINFRA_EMBED_URL = 'https://api.deepinfra.com/v1/openai/embeddings';
14
15	/**
16	* Turn Undici/Node `fetch` failures (often message-only "fetch failed") into an actionable Hub/API error.
17	* @param {'ollama'\|'openai'\|'voyage'\|'deepinfra'} provider
18	* @param {string} endpointDescription - Ollama base URL or short label for OpenAI
19	* @param {string} model
20	* @param {unknown} err
21	* @returns {string}
22	*/
23	export function formatEmbeddingFetchFailure(provider, endpointDescription, model, err) {
24	const raw = err && typeof err === 'object' && 'message' in err && err.message != null ? String(err.message) : String(err);
25	const bits = [raw];
26	if (err && typeof err === 'object' && 'cause' in err && err.cause != null) {
27	const c = err.cause;
28	if (c && typeof c === 'object' && 'message' in c && c.message != null) bits.push(String(c.message));
29	if (c && typeof c === 'object' && 'code' in c && c.code != null) bits.push(`code=${String(c.code)}`);
30	}
31	const detail = bits.filter(Boolean).join(' — ');
32	const m = String(model \|\| '').trim() \|\| 'nomic-embed-text';
33	if (provider === 'ollama') {
34	return (
35	`Ollama embeddings unreachable at ${endpointDescription} (${detail}). ` +
36	`For Meaning search, start Ollama (\`ollama serve\`), run \`ollama pull ${m}\`, and confirm the URL in config/env ` +
37	`(\`OLLAMA_URL\` / \`embedding.ollama_url\`). If \`localhost\` fails, try \`http://127.0.0.1:11434\` (IPv6 vs IPv4). ` +
38	`Alternatively set \`EMBEDDING_PROVIDER=openai\` and \`OPENAI_API_KEY\`, or \`EMBEDDING_PROVIDER=voyage\` and \`VOYAGE_API_KEY\`.`
39	);
40	}
41	if (provider === 'voyage') {
42	return (
43	`Voyage embeddings unreachable (${detail}). ` +
44	`Set \`VOYAGE_API_KEY\`, confirm \`embedding.provider: voyage\` / \`EMBEDDING_PROVIDER=voyage\`, and model (e.g. voyage-4-lite). ` +
45	`See https://docs.voyageai.com/docs/embeddings. After switching provider or dimension, re-index the vault.`
46	);
47	}
48	if (provider === 'deepinfra') {
49	return (
50	`DeepInfra embeddings unreachable (${detail}). ` +
51	`Set \`DEEPINFRA_API_KEY\`, confirm \`embedding.provider: deepinfra\` / \`EMBEDDING_PROVIDER=deepinfra\`, and model ` +
52	`(e.g. ${JSON.stringify(m)}). See https://deepinfra.com/docs/embeddings. After switching provider or dimension, re-index the vault.`
53	);
54	}
55	return (
56	`OpenAI embeddings request failed (${detail}). ` +
57	`Check \`OPENAI_API_KEY\`, network access to api.openai.com, and model ${JSON.stringify(m)}.`
58	);
59	}
60
61	/**
62	* Rough embedding input-token estimate (~4 chars per token) for providers that do not return usage (e.g. Ollama).
63	* @param {string[]} texts
64	* @returns {number}
65	*/
66	export function estimateEmbeddingInputTokens(texts) {
67	let n = 0;
68	for (const t of texts) {
69	const s = typeof t === 'string' ? t : '';
70	n += Math.ceil(s.length / 4);
71	}
72	return n;
73	}
74
75	/**
76	* Normalize and validate Ollama API base URL so fetch() never receives a relative or malformed URL
77	* (Undici throws TypeError "Invalid URL" with no context).
78	* @param {string\|null\|undefined} urlInput - From config or env; null/empty uses default localhost.
79	* @returns {string} Base URL without trailing slash
80	*/
81	export function normalizeOllamaEmbedBaseUrl(urlInput) {
82	const raw = urlInput == null \|\| urlInput === '' ? OLLAMA_DEFAULT_URL : String(urlInput);
83	const trimmed = raw.trim();
84	if (!trimmed) {
85	throw new Error(
86	'Ollama embed base URL is empty after trim. Set OLLAMA_URL to an absolute http(s) URL ' +
87	'(e.g. https://your-ollama-host:11434). On Netlify/serverless use EMBEDDING_PROVIDER=openai and OPENAI_API_KEY.'
88	);
89	}
90	// Node's URL() accepts "host:port" as a non-http "protocol" — reject missing scheme explicitly.
91	if (!/^https?:\/\//i.test(trimmed)) {
92	throw new Error(
93	`Ollama base URL must be an absolute http(s) URL starting with http:// or https://; got ${JSON.stringify(raw)}. ` +
94	'Examples: http://localhost:11434 (local Hub only), https://ollama.example.com:11434'
95	);
96	}
97	let u;
98	try {
99	u = new URL(trimmed);
100	} catch {
101	throw new Error(
102	`Ollama base URL is not a valid URL; got ${JSON.stringify(raw)}. ` +
103	'Examples: http://localhost:11434, https://ollama.example.com:11434'
104	);
105	}
106	if (u.protocol !== 'http:' && u.protocol !== 'https:') {
107	throw new Error(`Ollama base URL must use http or https; got protocol ${u.protocol} for ${u.href}`);
108	}
109	return u.toString().replace(/\/$/, '');
110	}
111
112	/**
113	* @typedef {{ voyageInputType?: 'query'\|'document' }} EmbedOptions
114	* Voyage retrieval: pass `voyageInputType: 'query'` for search queries and `'document'` for index chunks (recommended).
115	*/
116
117	/**
118	* Embed one or many texts. Returns array of vectors (same order as input).
119	* @param {string[]} texts
120	* @param {{ provider: string, model: string, ollama_url?: string }} config - From loadConfig().embedding
121	* @param {EmbedOptions} [options]
122	* @returns {Promise<number[][]>}
123	*/
124	export async function embed(texts, config, options = {}) {
125	const { vectors } = await embedWithUsage(texts, config, options);
126	return vectors;
127	}
128
129	/**
130	* Same as {@link embed} but returns embedding_input_tokens for billing (OpenAI: API `usage.prompt_tokens`; Ollama: estimate).
131	* @param {string[]} texts
132	* @param {{ provider: string, model: string, ollama_url?: string }} config
133	* @param {EmbedOptions} [options]
134	* @returns {Promise<{ vectors: number[][], embedding_input_tokens: number }>}
135	*/
136	export async function embedWithUsage(texts, config, options = {}) {
137	if (!texts.length) return { vectors: [], embedding_input_tokens: 0 };
138	const provider = String(config?.provider \|\| 'ollama').trim().toLowerCase();
139	let model =
140	config?.model != null && String(config.model).trim() !== '' ? String(config.model).trim() : null;
141	if (model == null) {
142	if (provider === 'openai') model = 'text-embedding-3-small';
143	else if (provider === 'voyage') model = 'voyage-4-lite';
144	else if (provider === 'deepinfra') model = 'BAAI/bge-large-en-v1.5';
145	else model = 'nomic-embed-text';
146	}
147
148	if (provider === 'ollama') {
149	return embedOllamaWithUsage(texts, { model, url: config?.ollama_url \|\| OLLAMA_DEFAULT_URL });
150	}
151	if (provider === 'openai') {
152	return embedOpenAIWithUsage(texts, { model, apiKey: process.env.OPENAI_API_KEY });
153	}
154	if (provider === 'voyage') {
155	const inputType = options?.voyageInputType === 'query' \|\| options?.voyageInputType === 'document' ? options.voyageInputType : undefined;
156	return embedVoyageWithUsage(texts, { model, apiKey: process.env.VOYAGE_API_KEY, inputType });
157	}
158	if (provider === 'deepinfra') {
159	return embedDeepInfraWithUsage(texts, { model, apiKey: process.env.DEEPINFRA_API_KEY });
160	}
161	throw new Error(`Unknown embedding provider: ${provider}. Supported: ollama, openai, voyage, deepinfra.`);
162	}
163
164	/**
165	* Default backoff before retrying a single 429. Exported so tests can keep wall time low
166	* by wrapping `embedDeepInfraWithUsage` with a smaller `sleepFn`. The bridge index path
167	* runs on Netlify Functions where every retry costs against the 60s sync cap, so we keep
168	* the retry budget intentionally small (one retry; second 429 surfaces as an error).
169	*/
170	export const DEEPINFRA_429_BACKOFF_DEFAULT_MS = 1000;
171	export const DEEPINFRA_429_BACKOFF_MAX_MS = 5000;
172
173	/**
174	* Parse a fetch-Response `Retry-After` header. Spec allows seconds (integer) or HTTP-date.
175	* We support seconds and fall back to the default if absent or unparseable.
176	*
177	* @param {string\|null\|undefined} headerValue
178	* @param {number} defaultMs
179	* @returns {number} milliseconds to wait before retrying
180	*/
181	export function retryAfterHeaderMs(headerValue, defaultMs = DEEPINFRA_429_BACKOFF_DEFAULT_MS) {
182	if (headerValue == null \|\| headerValue === '') return defaultMs;
183	const trimmed = String(headerValue).trim();
184	// Pure integer (seconds) is the dominant case from DeepInfra/OpenAI.
185	if (/^\d+$/.test(trimmed)) {
186	const sec = parseInt(trimmed, 10);
187	if (!Number.isFinite(sec) \|\| sec < 0) return defaultMs;
188	const ms = sec * 1000;
189	return Math.min(Math.max(ms, defaultMs), DEEPINFRA_429_BACKOFF_MAX_MS);
190	}
191	// HTTP-date fallback. Cap to MAX so a "1 hour" header does not strand a function.
192	const t = Date.parse(trimmed);
193	if (!Number.isFinite(t)) return defaultMs;
194	const ms = t - Date.now();
195	if (!Number.isFinite(ms) \|\| ms <= 0) return defaultMs;
196	return Math.min(Math.max(ms, defaultMs), DEEPINFRA_429_BACKOFF_MAX_MS);
197	}
198
199	/**
200	* @param {string[]} texts
201	* @param {{ model: string, url: string }}
202	* @returns {Promise<number[][]>}
203	*/
204	async function embedOllamaWithUsage(texts, { model, url }) {
205	const base = normalizeOllamaEmbedBaseUrl(url);
206	const apiKey = process.env.OLLAMA_API_KEY;
207	const headers = { 'Content-Type': 'application/json' };
208	if (apiKey) headers['Authorization'] = 'Bearer ' + apiKey;
209	const out = [];
210	let embedding_input_tokens = 0;
211	// Ollama /api/embed accepts one prompt; for batch we call per text (or check if array is supported)
212	for (const text of texts) {
213	embedding_input_tokens += estimateEmbeddingInputTokens([text]);
214	let res;
215	try {
216	res = await fetch(`${base}/api/embed`, {
217	method: 'POST',
218	headers,
219	body: JSON.stringify({ model, input: text }),
220	});
221	} catch (e) {
222	throw new Error(formatEmbeddingFetchFailure('ollama', base, model, e));
223	}
224	if (!res.ok) {
225	const err = await res.text();
226	throw new Error(`Ollama embed failed (${res.status}): ${err}`);
227	}
228	const data = await res.json();
229	if (data.embeddings && data.embeddings[0]) {
230	out.push(data.embeddings[0]);
231	} else if (Array.isArray(data.embedding)) {
232	out.push(data.embedding);
233	} else {
234	throw new Error('Ollama embed response missing embeddings');
235	}
236	}
237	return { vectors: out, embedding_input_tokens };
238	}
239
240	/**
241	* @param {string[]} texts
242	* @param {{ model: string, apiKey?: string }}
243	* @returns {Promise<number[][]>}
244	*/
245	async function embedOpenAIWithUsage(texts, { model, apiKey }) {
246	if (!apiKey) {
247	throw new Error('OpenAI embeddings require OPENAI_API_KEY environment variable.');
248	}
249	let res;
250	try {
251	res = await fetch(OPENAI_EMBED_URL, {
252	method: 'POST',
253	headers: {
254	'Content-Type': 'application/json',
255	Authorization: `Bearer ${apiKey}`,
256	},
257	body: JSON.stringify({ model, input: texts }),
258	});
259	} catch (e) {
260	throw new Error(formatEmbeddingFetchFailure('openai', OPENAI_EMBED_URL, model, e));
261	}
262	if (!res.ok) {
263	const err = await res.text();
264	throw new Error(`OpenAI embed failed (${res.status}): ${err}`);
265	}
266	const data = await res.json();
267	const byIndex = (data.data \|\| []).slice().sort((a, b) => (a.index ?? 0) - (b.index ?? 0));
268	const vectors = byIndex.map((d) => d.embedding);
269	let embedding_input_tokens = 0;
270	if (data.usage && typeof data.usage.prompt_tokens === 'number') {
271	embedding_input_tokens = data.usage.prompt_tokens;
272	} else {
273	embedding_input_tokens = estimateEmbeddingInputTokens(texts);
274	}
275	return { vectors, embedding_input_tokens };
276	}
277
278	/**
279	* @param {string[]} texts
280	* @param {{ model: string, apiKey?: string, inputType?: 'query'\|'document' }} opts
281	*/
282	async function embedVoyageWithUsage(texts, { model, apiKey, inputType }) {
283	if (!apiKey \|\| !String(apiKey).trim()) {
284	throw new Error('Voyage embeddings require VOYAGE_API_KEY environment variable.');
285	}
286	const body = {
287	model,
288	input: texts.length === 1 ? texts[0] : texts,
289	...(inputType ? { input_type: inputType } : {}),
290	};
291	let res;
292	try {
293	res = await fetch(VOYAGE_EMBED_URL, {
294	method: 'POST',
295	headers: {
296	'Content-Type': 'application/json',
297	Authorization: `Bearer ${apiKey}`,
298	},
299	body: JSON.stringify(body),
300	});
301	} catch (e) {
302	throw new Error(formatEmbeddingFetchFailure('voyage', VOYAGE_EMBED_URL, model, e));
303	}
304	if (!res.ok) {
305	const err = await res.text();
306	throw new Error(`Voyage embed failed (${res.status}): ${err}`);
307	}
308	const data = await res.json();
309	const byIndex = (data.data \|\| []).slice().sort((a, b) => (a.index ?? 0) - (b.index ?? 0));
310	const vectors = byIndex.map((d) => d.embedding);
311	let embedding_input_tokens = 0;
312	if (data.usage && typeof data.usage.total_tokens === 'number') {
313	embedding_input_tokens = data.usage.total_tokens;
314	} else {
315	embedding_input_tokens = estimateEmbeddingInputTokens(texts);
316	}
317	return { vectors, embedding_input_tokens };
318	}
319
320	/**
321	* @param {string[]} texts
322	* @param {{
323	* model: string,
324	* apiKey?: string,
325	* fetchImpl?: typeof fetch,
326	* sleepFn?: (ms: number) => Promise<void>,
327	* maxRetries?: number,
328	* }} opts
329	* DeepInfra OpenAI-compatible embeddings: same wire format as OpenAI, different host + key.
330	*
331	* 429 handling: bridge index runs concurrent embed calls (`lib/parallel-embed-pool.mjs`).
332	* If we accidentally exceed DeepInfra's per-second limit, we want a short backoff + one
333	* retry (driven by the `Retry-After` header when present) so a transient burst does not
334	* fail an entire vault re-index. A second 429 surfaces as an error and gets reported to
335	* the user; we deliberately do not retry indefinitely because Netlify's 60s sync-function
336	* cap leaves no room for exponential-backoff multi-minute waits.
337	*/
338	export async function embedDeepInfraWithUsage(
339	texts,
340	{ model, apiKey, fetchImpl, sleepFn, maxRetries } = {},
341	) {
342	if (!apiKey \|\| !String(apiKey).trim()) {
343	throw new Error('DeepInfra embeddings require DEEPINFRA_API_KEY environment variable.');
344	}
345	const doFetch = typeof fetchImpl === 'function' ? fetchImpl : fetch;
346	const doSleep =
347	typeof sleepFn === 'function' ? sleepFn : (ms) => new Promise((r) => setTimeout(r, ms));
348	const retryBudget = Number.isFinite(maxRetries) && maxRetries >= 0 ? Math.floor(maxRetries) : 1;
349
350	let attempt = 0;
351	// Loop bounded by retryBudget; each non-429 outcome (success or other error) returns/throws.
352	while (true) {
353	let res;
354	try {
355	res = await doFetch(DEEPINFRA_EMBED_URL, {
356	method: 'POST',
357	headers: {
358	'Content-Type': 'application/json',
359	Authorization: `Bearer ${apiKey}`,
360	},
361	body: JSON.stringify({ model, input: texts }),
362	});
363	} catch (e) {
364	throw new Error(formatEmbeddingFetchFailure('deepinfra', DEEPINFRA_EMBED_URL, model, e));
365	}
366	if (res.status === 429 && attempt < retryBudget) {
367	const headerValue =
368	typeof res.headers?.get === 'function' ? res.headers.get('retry-after') : null;
369	const waitMs = retryAfterHeaderMs(headerValue);
370	// Drain body to free the connection so the retry can reuse the keepalive socket.
371	try {
372	await res.text();
373	} catch (_) {}
374	await doSleep(waitMs);
375	attempt++;
376	continue;
377	}
378	if (!res.ok) {
379	const err = await res.text();
380	throw new Error(`DeepInfra embed failed (${res.status}): ${err}`);
381	}
382	const data = await res.json();
383	const byIndex = (data.data \|\| []).slice().sort((a, b) => (a.index ?? 0) - (b.index ?? 0));
384	const vectors = byIndex.map((d) => d.embedding);
385	let embedding_input_tokens = 0;
386	if (data.usage && typeof data.usage.prompt_tokens === 'number') {
387	embedding_input_tokens = data.usage.prompt_tokens;
388	} else if (data.usage && typeof data.usage.total_tokens === 'number') {
389	embedding_input_tokens = data.usage.total_tokens;
390	} else {
391	embedding_input_tokens = estimateEmbeddingInputTokens(texts);
392	}
393	return { vectors, embedding_input_tokens };
394	}
395	}
396
397	/**
398	* Dimension for the configured model (for creating collection). Ollama nomic-embed-text is 768.
399	* @param {{ provider?: string, model?: string }} config
400	* @returns {number}
401	*/
402	export function embeddingDimension(config) {
403	const provider = String(config?.provider \|\| 'ollama').trim().toLowerCase();
404	if (provider === 'openai') {
405	// text-embedding-3-small 1536, text-embedding-3-large 3072, ada 1536
406	const m = String(config?.model \|\| '').trim().toLowerCase();
407	if (m.includes('large')) return 3072;
408	return 1536;
409	}
410	if (provider === 'voyage') {
411	const m = String(config?.model \|\| '').trim().toLowerCase();
412	if (m.includes('voyage-3-lite') && !m.includes('3.5')) return 512;
413	if (m.includes('code-2') \|\| (m.includes('large-2') && !m.includes('voyage-3') && !m.includes('voyage-4'))) return 1536;
414	return 1024;
415	}
416	if (provider === 'deepinfra') {
417	// Common DeepInfra embedding models. Default BAAI/bge-large-en-v1.5 is 1024.
418	// Switching dimension requires a vault re-index — see EMBEDDING_MODEL in .env.example.
419	const m = String(config?.model \|\| '').trim().toLowerCase();
420	if (m.includes('qwen3-embedding-8b') \|\| m.includes('bge-en-icl')) return 4096;
421	if (m.includes('qwen3-embedding-4b')) return 2560;
422	if (m.includes('qwen3-embedding-0.6b')) return 1024;
423	if (m.includes('multilingual-e5-large') \|\| m.includes('bge-large') \|\| m.includes('bge-m3')) return 1024;
424	if (m.includes('bge-base') \|\| m.includes('e5-base')) return 768;
425	if (m.includes('bge-small') \|\| m.includes('e5-small')) return 384;
426	return 1024; // safe default for the default model BAAI/bge-large-en-v1.5
427	}
428	// nomic-embed-text and most Ollama embed models
429	return 768;
430	}

File History 2 commits

sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ 1 day ago

sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 1 day ago