embedding.mjs
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠ breaking
1 day ago
| 1 | /** |
| 2 | * Embedding provider abstraction. Ollama, OpenAI, Voyage AI, or DeepInfra from config. |
| 3 | * SPEC §4.4: embedding.provider, embedding.model; env for API keys. |
| 4 | * |
| 5 | * DeepInfra (OpenAI-compatible): same single DEEPINFRA_API_KEY can drive chat |
| 6 | * (lib/llm-complete.mjs) and embeddings here. Default model BAAI/bge-large-en-v1.5 |
| 7 | * (1024 dim). Switching dimension requires a vault re-index. |
| 8 | */ |
| 9 | |
| 10 | const OLLAMA_DEFAULT_URL = 'http://localhost:11434'; |
| 11 | const OPENAI_EMBED_URL = 'https://api.openai.com/v1/embeddings'; |
| 12 | const VOYAGE_EMBED_URL = 'https://api.voyageai.com/v1/embeddings'; |
| 13 | const DEEPINFRA_EMBED_URL = 'https://api.deepinfra.com/v1/openai/embeddings'; |
| 14 | |
| 15 | /** |
| 16 | * Turn Undici/Node `fetch` failures (often message-only "fetch failed") into an actionable Hub/API error. |
| 17 | * @param {'ollama'|'openai'|'voyage'|'deepinfra'} provider |
| 18 | * @param {string} endpointDescription - Ollama base URL or short label for OpenAI |
| 19 | * @param {string} model |
| 20 | * @param {unknown} err |
| 21 | * @returns {string} |
| 22 | */ |
| 23 | export function formatEmbeddingFetchFailure(provider, endpointDescription, model, err) { |
| 24 | const raw = err && typeof err === 'object' && 'message' in err && err.message != null ? String(err.message) : String(err); |
| 25 | const bits = [raw]; |
| 26 | if (err && typeof err === 'object' && 'cause' in err && err.cause != null) { |
| 27 | const c = err.cause; |
| 28 | if (c && typeof c === 'object' && 'message' in c && c.message != null) bits.push(String(c.message)); |
| 29 | if (c && typeof c === 'object' && 'code' in c && c.code != null) bits.push(`code=${String(c.code)}`); |
| 30 | } |
| 31 | const detail = bits.filter(Boolean).join(' — '); |
| 32 | const m = String(model || '').trim() || 'nomic-embed-text'; |
| 33 | if (provider === 'ollama') { |
| 34 | return ( |
| 35 | `Ollama embeddings unreachable at ${endpointDescription} (${detail}). ` + |
| 36 | `For Meaning search, start Ollama (\`ollama serve\`), run \`ollama pull ${m}\`, and confirm the URL in config/env ` + |
| 37 | `(\`OLLAMA_URL\` / \`embedding.ollama_url\`). If \`localhost\` fails, try \`http://127.0.0.1:11434\` (IPv6 vs IPv4). ` + |
| 38 | `Alternatively set \`EMBEDDING_PROVIDER=openai\` and \`OPENAI_API_KEY\`, or \`EMBEDDING_PROVIDER=voyage\` and \`VOYAGE_API_KEY\`.` |
| 39 | ); |
| 40 | } |
| 41 | if (provider === 'voyage') { |
| 42 | return ( |
| 43 | `Voyage embeddings unreachable (${detail}). ` + |
| 44 | `Set \`VOYAGE_API_KEY\`, confirm \`embedding.provider: voyage\` / \`EMBEDDING_PROVIDER=voyage\`, and model (e.g. voyage-4-lite). ` + |
| 45 | `See https://docs.voyageai.com/docs/embeddings. After switching provider or dimension, re-index the vault.` |
| 46 | ); |
| 47 | } |
| 48 | if (provider === 'deepinfra') { |
| 49 | return ( |
| 50 | `DeepInfra embeddings unreachable (${detail}). ` + |
| 51 | `Set \`DEEPINFRA_API_KEY\`, confirm \`embedding.provider: deepinfra\` / \`EMBEDDING_PROVIDER=deepinfra\`, and model ` + |
| 52 | `(e.g. ${JSON.stringify(m)}). See https://deepinfra.com/docs/embeddings. After switching provider or dimension, re-index the vault.` |
| 53 | ); |
| 54 | } |
| 55 | return ( |
| 56 | `OpenAI embeddings request failed (${detail}). ` + |
| 57 | `Check \`OPENAI_API_KEY\`, network access to api.openai.com, and model ${JSON.stringify(m)}.` |
| 58 | ); |
| 59 | } |
| 60 | |
| 61 | /** |
| 62 | * Rough embedding input-token estimate (~4 chars per token) for providers that do not return usage (e.g. Ollama). |
| 63 | * @param {string[]} texts |
| 64 | * @returns {number} |
| 65 | */ |
| 66 | export function estimateEmbeddingInputTokens(texts) { |
| 67 | let n = 0; |
| 68 | for (const t of texts) { |
| 69 | const s = typeof t === 'string' ? t : ''; |
| 70 | n += Math.ceil(s.length / 4); |
| 71 | } |
| 72 | return n; |
| 73 | } |
| 74 | |
| 75 | /** |
| 76 | * Normalize and validate Ollama API base URL so fetch() never receives a relative or malformed URL |
| 77 | * (Undici throws TypeError "Invalid URL" with no context). |
| 78 | * @param {string|null|undefined} urlInput - From config or env; null/empty uses default localhost. |
| 79 | * @returns {string} Base URL without trailing slash |
| 80 | */ |
| 81 | export function normalizeOllamaEmbedBaseUrl(urlInput) { |
| 82 | const raw = urlInput == null || urlInput === '' ? OLLAMA_DEFAULT_URL : String(urlInput); |
| 83 | const trimmed = raw.trim(); |
| 84 | if (!trimmed) { |
| 85 | throw new Error( |
| 86 | 'Ollama embed base URL is empty after trim. Set OLLAMA_URL to an absolute http(s) URL ' + |
| 87 | '(e.g. https://your-ollama-host:11434). On Netlify/serverless use EMBEDDING_PROVIDER=openai and OPENAI_API_KEY.' |
| 88 | ); |
| 89 | } |
| 90 | // Node's URL() accepts "host:port" as a non-http "protocol" — reject missing scheme explicitly. |
| 91 | if (!/^https?:\/\//i.test(trimmed)) { |
| 92 | throw new Error( |
| 93 | `Ollama base URL must be an absolute http(s) URL starting with http:// or https://; got ${JSON.stringify(raw)}. ` + |
| 94 | 'Examples: http://localhost:11434 (local Hub only), https://ollama.example.com:11434' |
| 95 | ); |
| 96 | } |
| 97 | let u; |
| 98 | try { |
| 99 | u = new URL(trimmed); |
| 100 | } catch { |
| 101 | throw new Error( |
| 102 | `Ollama base URL is not a valid URL; got ${JSON.stringify(raw)}. ` + |
| 103 | 'Examples: http://localhost:11434, https://ollama.example.com:11434' |
| 104 | ); |
| 105 | } |
| 106 | if (u.protocol !== 'http:' && u.protocol !== 'https:') { |
| 107 | throw new Error(`Ollama base URL must use http or https; got protocol ${u.protocol} for ${u.href}`); |
| 108 | } |
| 109 | return u.toString().replace(/\/$/, ''); |
| 110 | } |
| 111 | |
| 112 | /** |
| 113 | * @typedef {{ voyageInputType?: 'query'|'document' }} EmbedOptions |
| 114 | * Voyage retrieval: pass `voyageInputType: 'query'` for search queries and `'document'` for index chunks (recommended). |
| 115 | */ |
| 116 | |
| 117 | /** |
| 118 | * Embed one or many texts. Returns array of vectors (same order as input). |
| 119 | * @param {string[]} texts |
| 120 | * @param {{ provider: string, model: string, ollama_url?: string }} config - From loadConfig().embedding |
| 121 | * @param {EmbedOptions} [options] |
| 122 | * @returns {Promise<number[][]>} |
| 123 | */ |
| 124 | export async function embed(texts, config, options = {}) { |
| 125 | const { vectors } = await embedWithUsage(texts, config, options); |
| 126 | return vectors; |
| 127 | } |
| 128 | |
| 129 | /** |
| 130 | * Same as {@link embed} but returns **embedding_input_tokens** for billing (OpenAI: API `usage.prompt_tokens`; Ollama: estimate). |
| 131 | * @param {string[]} texts |
| 132 | * @param {{ provider: string, model: string, ollama_url?: string }} config |
| 133 | * @param {EmbedOptions} [options] |
| 134 | * @returns {Promise<{ vectors: number[][], embedding_input_tokens: number }>} |
| 135 | */ |
| 136 | export async function embedWithUsage(texts, config, options = {}) { |
| 137 | if (!texts.length) return { vectors: [], embedding_input_tokens: 0 }; |
| 138 | const provider = String(config?.provider || 'ollama').trim().toLowerCase(); |
| 139 | let model = |
| 140 | config?.model != null && String(config.model).trim() !== '' ? String(config.model).trim() : null; |
| 141 | if (model == null) { |
| 142 | if (provider === 'openai') model = 'text-embedding-3-small'; |
| 143 | else if (provider === 'voyage') model = 'voyage-4-lite'; |
| 144 | else if (provider === 'deepinfra') model = 'BAAI/bge-large-en-v1.5'; |
| 145 | else model = 'nomic-embed-text'; |
| 146 | } |
| 147 | |
| 148 | if (provider === 'ollama') { |
| 149 | return embedOllamaWithUsage(texts, { model, url: config?.ollama_url || OLLAMA_DEFAULT_URL }); |
| 150 | } |
| 151 | if (provider === 'openai') { |
| 152 | return embedOpenAIWithUsage(texts, { model, apiKey: process.env.OPENAI_API_KEY }); |
| 153 | } |
| 154 | if (provider === 'voyage') { |
| 155 | const inputType = options?.voyageInputType === 'query' || options?.voyageInputType === 'document' ? options.voyageInputType : undefined; |
| 156 | return embedVoyageWithUsage(texts, { model, apiKey: process.env.VOYAGE_API_KEY, inputType }); |
| 157 | } |
| 158 | if (provider === 'deepinfra') { |
| 159 | return embedDeepInfraWithUsage(texts, { model, apiKey: process.env.DEEPINFRA_API_KEY }); |
| 160 | } |
| 161 | throw new Error(`Unknown embedding provider: ${provider}. Supported: ollama, openai, voyage, deepinfra.`); |
| 162 | } |
| 163 | |
| 164 | /** |
| 165 | * Default backoff before retrying a single 429. Exported so tests can keep wall time low |
| 166 | * by wrapping `embedDeepInfraWithUsage` with a smaller `sleepFn`. The bridge index path |
| 167 | * runs on Netlify Functions where every retry costs against the 60s sync cap, so we keep |
| 168 | * the retry budget intentionally small (one retry; second 429 surfaces as an error). |
| 169 | */ |
| 170 | export const DEEPINFRA_429_BACKOFF_DEFAULT_MS = 1000; |
| 171 | export const DEEPINFRA_429_BACKOFF_MAX_MS = 5000; |
| 172 | |
| 173 | /** |
| 174 | * Parse a fetch-Response `Retry-After` header. Spec allows seconds (integer) or HTTP-date. |
| 175 | * We support seconds and fall back to the default if absent or unparseable. |
| 176 | * |
| 177 | * @param {string|null|undefined} headerValue |
| 178 | * @param {number} defaultMs |
| 179 | * @returns {number} milliseconds to wait before retrying |
| 180 | */ |
| 181 | export function retryAfterHeaderMs(headerValue, defaultMs = DEEPINFRA_429_BACKOFF_DEFAULT_MS) { |
| 182 | if (headerValue == null || headerValue === '') return defaultMs; |
| 183 | const trimmed = String(headerValue).trim(); |
| 184 | // Pure integer (seconds) is the dominant case from DeepInfra/OpenAI. |
| 185 | if (/^\d+$/.test(trimmed)) { |
| 186 | const sec = parseInt(trimmed, 10); |
| 187 | if (!Number.isFinite(sec) || sec < 0) return defaultMs; |
| 188 | const ms = sec * 1000; |
| 189 | return Math.min(Math.max(ms, defaultMs), DEEPINFRA_429_BACKOFF_MAX_MS); |
| 190 | } |
| 191 | // HTTP-date fallback. Cap to MAX so a "1 hour" header does not strand a function. |
| 192 | const t = Date.parse(trimmed); |
| 193 | if (!Number.isFinite(t)) return defaultMs; |
| 194 | const ms = t - Date.now(); |
| 195 | if (!Number.isFinite(ms) || ms <= 0) return defaultMs; |
| 196 | return Math.min(Math.max(ms, defaultMs), DEEPINFRA_429_BACKOFF_MAX_MS); |
| 197 | } |
| 198 | |
| 199 | /** |
| 200 | * @param {string[]} texts |
| 201 | * @param {{ model: string, url: string }} |
| 202 | * @returns {Promise<number[][]>} |
| 203 | */ |
| 204 | async function embedOllamaWithUsage(texts, { model, url }) { |
| 205 | const base = normalizeOllamaEmbedBaseUrl(url); |
| 206 | const apiKey = process.env.OLLAMA_API_KEY; |
| 207 | const headers = { 'Content-Type': 'application/json' }; |
| 208 | if (apiKey) headers['Authorization'] = 'Bearer ' + apiKey; |
| 209 | const out = []; |
| 210 | let embedding_input_tokens = 0; |
| 211 | // Ollama /api/embed accepts one prompt; for batch we call per text (or check if array is supported) |
| 212 | for (const text of texts) { |
| 213 | embedding_input_tokens += estimateEmbeddingInputTokens([text]); |
| 214 | let res; |
| 215 | try { |
| 216 | res = await fetch(`${base}/api/embed`, { |
| 217 | method: 'POST', |
| 218 | headers, |
| 219 | body: JSON.stringify({ model, input: text }), |
| 220 | }); |
| 221 | } catch (e) { |
| 222 | throw new Error(formatEmbeddingFetchFailure('ollama', base, model, e)); |
| 223 | } |
| 224 | if (!res.ok) { |
| 225 | const err = await res.text(); |
| 226 | throw new Error(`Ollama embed failed (${res.status}): ${err}`); |
| 227 | } |
| 228 | const data = await res.json(); |
| 229 | if (data.embeddings && data.embeddings[0]) { |
| 230 | out.push(data.embeddings[0]); |
| 231 | } else if (Array.isArray(data.embedding)) { |
| 232 | out.push(data.embedding); |
| 233 | } else { |
| 234 | throw new Error('Ollama embed response missing embeddings'); |
| 235 | } |
| 236 | } |
| 237 | return { vectors: out, embedding_input_tokens }; |
| 238 | } |
| 239 | |
| 240 | /** |
| 241 | * @param {string[]} texts |
| 242 | * @param {{ model: string, apiKey?: string }} |
| 243 | * @returns {Promise<number[][]>} |
| 244 | */ |
| 245 | async function embedOpenAIWithUsage(texts, { model, apiKey }) { |
| 246 | if (!apiKey) { |
| 247 | throw new Error('OpenAI embeddings require OPENAI_API_KEY environment variable.'); |
| 248 | } |
| 249 | let res; |
| 250 | try { |
| 251 | res = await fetch(OPENAI_EMBED_URL, { |
| 252 | method: 'POST', |
| 253 | headers: { |
| 254 | 'Content-Type': 'application/json', |
| 255 | Authorization: `Bearer ${apiKey}`, |
| 256 | }, |
| 257 | body: JSON.stringify({ model, input: texts }), |
| 258 | }); |
| 259 | } catch (e) { |
| 260 | throw new Error(formatEmbeddingFetchFailure('openai', OPENAI_EMBED_URL, model, e)); |
| 261 | } |
| 262 | if (!res.ok) { |
| 263 | const err = await res.text(); |
| 264 | throw new Error(`OpenAI embed failed (${res.status}): ${err}`); |
| 265 | } |
| 266 | const data = await res.json(); |
| 267 | const byIndex = (data.data || []).slice().sort((a, b) => (a.index ?? 0) - (b.index ?? 0)); |
| 268 | const vectors = byIndex.map((d) => d.embedding); |
| 269 | let embedding_input_tokens = 0; |
| 270 | if (data.usage && typeof data.usage.prompt_tokens === 'number') { |
| 271 | embedding_input_tokens = data.usage.prompt_tokens; |
| 272 | } else { |
| 273 | embedding_input_tokens = estimateEmbeddingInputTokens(texts); |
| 274 | } |
| 275 | return { vectors, embedding_input_tokens }; |
| 276 | } |
| 277 | |
| 278 | /** |
| 279 | * @param {string[]} texts |
| 280 | * @param {{ model: string, apiKey?: string, inputType?: 'query'|'document' }} opts |
| 281 | */ |
| 282 | async function embedVoyageWithUsage(texts, { model, apiKey, inputType }) { |
| 283 | if (!apiKey || !String(apiKey).trim()) { |
| 284 | throw new Error('Voyage embeddings require VOYAGE_API_KEY environment variable.'); |
| 285 | } |
| 286 | const body = { |
| 287 | model, |
| 288 | input: texts.length === 1 ? texts[0] : texts, |
| 289 | ...(inputType ? { input_type: inputType } : {}), |
| 290 | }; |
| 291 | let res; |
| 292 | try { |
| 293 | res = await fetch(VOYAGE_EMBED_URL, { |
| 294 | method: 'POST', |
| 295 | headers: { |
| 296 | 'Content-Type': 'application/json', |
| 297 | Authorization: `Bearer ${apiKey}`, |
| 298 | }, |
| 299 | body: JSON.stringify(body), |
| 300 | }); |
| 301 | } catch (e) { |
| 302 | throw new Error(formatEmbeddingFetchFailure('voyage', VOYAGE_EMBED_URL, model, e)); |
| 303 | } |
| 304 | if (!res.ok) { |
| 305 | const err = await res.text(); |
| 306 | throw new Error(`Voyage embed failed (${res.status}): ${err}`); |
| 307 | } |
| 308 | const data = await res.json(); |
| 309 | const byIndex = (data.data || []).slice().sort((a, b) => (a.index ?? 0) - (b.index ?? 0)); |
| 310 | const vectors = byIndex.map((d) => d.embedding); |
| 311 | let embedding_input_tokens = 0; |
| 312 | if (data.usage && typeof data.usage.total_tokens === 'number') { |
| 313 | embedding_input_tokens = data.usage.total_tokens; |
| 314 | } else { |
| 315 | embedding_input_tokens = estimateEmbeddingInputTokens(texts); |
| 316 | } |
| 317 | return { vectors, embedding_input_tokens }; |
| 318 | } |
| 319 | |
| 320 | /** |
| 321 | * @param {string[]} texts |
| 322 | * @param {{ |
| 323 | * model: string, |
| 324 | * apiKey?: string, |
| 325 | * fetchImpl?: typeof fetch, |
| 326 | * sleepFn?: (ms: number) => Promise<void>, |
| 327 | * maxRetries?: number, |
| 328 | * }} opts |
| 329 | * DeepInfra OpenAI-compatible embeddings: same wire format as OpenAI, different host + key. |
| 330 | * |
| 331 | * 429 handling: bridge index runs concurrent embed calls (`lib/parallel-embed-pool.mjs`). |
| 332 | * If we accidentally exceed DeepInfra's per-second limit, we want a short backoff + one |
| 333 | * retry (driven by the `Retry-After` header when present) so a transient burst does not |
| 334 | * fail an entire vault re-index. A second 429 surfaces as an error and gets reported to |
| 335 | * the user; we deliberately do not retry indefinitely because Netlify's 60s sync-function |
| 336 | * cap leaves no room for exponential-backoff multi-minute waits. |
| 337 | */ |
| 338 | export async function embedDeepInfraWithUsage( |
| 339 | texts, |
| 340 | { model, apiKey, fetchImpl, sleepFn, maxRetries } = {}, |
| 341 | ) { |
| 342 | if (!apiKey || !String(apiKey).trim()) { |
| 343 | throw new Error('DeepInfra embeddings require DEEPINFRA_API_KEY environment variable.'); |
| 344 | } |
| 345 | const doFetch = typeof fetchImpl === 'function' ? fetchImpl : fetch; |
| 346 | const doSleep = |
| 347 | typeof sleepFn === 'function' ? sleepFn : (ms) => new Promise((r) => setTimeout(r, ms)); |
| 348 | const retryBudget = Number.isFinite(maxRetries) && maxRetries >= 0 ? Math.floor(maxRetries) : 1; |
| 349 | |
| 350 | let attempt = 0; |
| 351 | // Loop bounded by retryBudget; each non-429 outcome (success or other error) returns/throws. |
| 352 | while (true) { |
| 353 | let res; |
| 354 | try { |
| 355 | res = await doFetch(DEEPINFRA_EMBED_URL, { |
| 356 | method: 'POST', |
| 357 | headers: { |
| 358 | 'Content-Type': 'application/json', |
| 359 | Authorization: `Bearer ${apiKey}`, |
| 360 | }, |
| 361 | body: JSON.stringify({ model, input: texts }), |
| 362 | }); |
| 363 | } catch (e) { |
| 364 | throw new Error(formatEmbeddingFetchFailure('deepinfra', DEEPINFRA_EMBED_URL, model, e)); |
| 365 | } |
| 366 | if (res.status === 429 && attempt < retryBudget) { |
| 367 | const headerValue = |
| 368 | typeof res.headers?.get === 'function' ? res.headers.get('retry-after') : null; |
| 369 | const waitMs = retryAfterHeaderMs(headerValue); |
| 370 | // Drain body to free the connection so the retry can reuse the keepalive socket. |
| 371 | try { |
| 372 | await res.text(); |
| 373 | } catch (_) {} |
| 374 | await doSleep(waitMs); |
| 375 | attempt++; |
| 376 | continue; |
| 377 | } |
| 378 | if (!res.ok) { |
| 379 | const err = await res.text(); |
| 380 | throw new Error(`DeepInfra embed failed (${res.status}): ${err}`); |
| 381 | } |
| 382 | const data = await res.json(); |
| 383 | const byIndex = (data.data || []).slice().sort((a, b) => (a.index ?? 0) - (b.index ?? 0)); |
| 384 | const vectors = byIndex.map((d) => d.embedding); |
| 385 | let embedding_input_tokens = 0; |
| 386 | if (data.usage && typeof data.usage.prompt_tokens === 'number') { |
| 387 | embedding_input_tokens = data.usage.prompt_tokens; |
| 388 | } else if (data.usage && typeof data.usage.total_tokens === 'number') { |
| 389 | embedding_input_tokens = data.usage.total_tokens; |
| 390 | } else { |
| 391 | embedding_input_tokens = estimateEmbeddingInputTokens(texts); |
| 392 | } |
| 393 | return { vectors, embedding_input_tokens }; |
| 394 | } |
| 395 | } |
| 396 | |
| 397 | /** |
| 398 | * Dimension for the configured model (for creating collection). Ollama nomic-embed-text is 768. |
| 399 | * @param {{ provider?: string, model?: string }} config |
| 400 | * @returns {number} |
| 401 | */ |
| 402 | export function embeddingDimension(config) { |
| 403 | const provider = String(config?.provider || 'ollama').trim().toLowerCase(); |
| 404 | if (provider === 'openai') { |
| 405 | // text-embedding-3-small 1536, text-embedding-3-large 3072, ada 1536 |
| 406 | const m = String(config?.model || '').trim().toLowerCase(); |
| 407 | if (m.includes('large')) return 3072; |
| 408 | return 1536; |
| 409 | } |
| 410 | if (provider === 'voyage') { |
| 411 | const m = String(config?.model || '').trim().toLowerCase(); |
| 412 | if (m.includes('voyage-3-lite') && !m.includes('3.5')) return 512; |
| 413 | if (m.includes('code-2') || (m.includes('large-2') && !m.includes('voyage-3') && !m.includes('voyage-4'))) return 1536; |
| 414 | return 1024; |
| 415 | } |
| 416 | if (provider === 'deepinfra') { |
| 417 | // Common DeepInfra embedding models. Default BAAI/bge-large-en-v1.5 is 1024. |
| 418 | // Switching dimension requires a vault re-index — see EMBEDDING_MODEL in .env.example. |
| 419 | const m = String(config?.model || '').trim().toLowerCase(); |
| 420 | if (m.includes('qwen3-embedding-8b') || m.includes('bge-en-icl')) return 4096; |
| 421 | if (m.includes('qwen3-embedding-4b')) return 2560; |
| 422 | if (m.includes('qwen3-embedding-0.6b')) return 1024; |
| 423 | if (m.includes('multilingual-e5-large') || m.includes('bge-large') || m.includes('bge-m3')) return 1024; |
| 424 | if (m.includes('bge-base') || m.includes('e5-base')) return 768; |
| 425 | if (m.includes('bge-small') || m.includes('e5-small')) return 384; |
| 426 | return 1024; // safe default for the default model BAAI/bge-large-en-v1.5 |
| 427 | } |
| 428 | // nomic-embed-text and most Ollama embed models |
| 429 | return 768; |
| 430 | } |
File History
2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠
1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6
docs: accept Calendar Events v0 spec with Phase 0 security …
Human
1 day ago