lib/bridge-index-preflight-estimate.mjs · aaronrene/knowtation — MuseHub

aaronrene / knowtation public

bridge-index-preflight-estimate.mjs

193 lines 8.3 KB

Raw

sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 1 day ago

1	/**
2	* Pure preflight estimator for `hub/bridge/server.mjs POST /api/v1/index`.
3	*
4	* The bridge runs as a Netlify synchronous function (60 s platform max). After the
5	* OpenAI(1536) → DeepInfra(1024 BAAI/bge-large-en-v1.5) switch, per-batch embed
6	* latency went from ~1.2 s to ~2.5 s median (5–8.5 s tails). With ~50 chunks/batch
7	* and concurrency 5, that means a vault of ~1500+ chunks needing a full re-embed
8	* can blow past 60 s and the gateway returns a 504 mid-request.
9	*
10	* Rather than always paying the latency tax of a background-function kickoff, the
11	* sync handler does a cheap preflight (canister export + chunking + cache lookup
12	* are already happening) and then asks THIS module: "given chunks_to_embed +
13	* concurrency, will it fit in the sync budget?". When it won't, the handler kicks
14	* off a Netlify background function (15 min cap) and returns 202 immediately.
15	*
16	* Pure module: no I/O, no env reads, no time. Tests must be deterministic.
17	*/
18
19	/**
20	* Per-batch embedding latency (median ms) used by the estimator. Sourced from
21	* `hub/bridge/index-timing.mjs` post-mortem on production logs after the DeepInfra
22	* switch (median 2.5 s, p95 ~5 s). We use the median, NOT p95, because we already
23	* have a hard ceiling (`SYNC_BUDGET_SECONDS_DEFAULT`) below the platform max — a
24	* single tail batch that pushes us 4–5 s over our estimate is still safely under
25	* 60 s, but planning every job for p95 would route 70 %+ of jobs to background
26	* unnecessarily and cost an extra cold start each time.
27	*
28	* If you swap providers (e.g. back to OpenAI 1.2 s/batch, or to a faster
29	* Anthropic embedding endpoint), update this constant — the rest of the math
30	* scales linearly.
31	*/
32	export const DEFAULT_EMBED_MS_PER_BATCH = 2500;
33
34	/**
35	* Sync budget. Netlify's platform max for synchronous functions is 60 s
36	* (docs.netlify.com/build/functions/overview); we reserve 30 s as headroom for
37	* preflight + post-embed steps (chunk hash compute, ensureCollection migration,
38	* upserts, persistVectorsToBlob) so a 30 s embed phase still finishes inside the
39	* function timeout.
40	*/
41	export const SYNC_BUDGET_SECONDS_DEFAULT = 30;
42
43	/**
44	* Hard chunk-count ceiling for the sync path. Even when the time estimate looks
45	* safe, indexing >= 500 chunks pulls in a lot of upsert + persist work whose
46	* tail-latency is hard to predict (Blob upload contention, sqlite-vec single
47	* writer). Routing those to background is cheaper than discovering at 58 s that
48	* we're out of budget and the gateway already 504'd.
49	*/
50	export const MAX_SYNC_CHUNKS_DEFAULT = 500;
51
52	/**
53	* Per-chunk overhead for the upsert + persist phases. ~5 ms is a conservative
54	* upper bound observed in `index-timing.mjs` step `upsert_total` for the bridge
55	* sqlite-vec backend (most upserts come in well under 2 ms/chunk; we round up).
56	*/
57	export const UPSERT_MS_PER_CHUNK = 5;
58
59	/**
60	* Fixed overhead added to every estimate (canister export already done by the
61	* time we reach the estimator, but ensureCollection + chunk hash compute +
62	* persistVectorsToBlob still need to run after the embed phase).
63	*/
64	export const FIXED_OVERHEAD_MS = 3000;
65
66	/**
67	* Estimate wall-clock seconds for the embed + upsert + persist phases of a
68	* re-index, given how many chunks need re-embedding and the active parallelism
69	* settings.
70	*
71	* Math: `embedBatches = ceil(chunksToEmbed / batchSize)` total embed batches.
72	* With bounded concurrency `concurrency`, the wall-clock is
73	* `ceil(embedBatches / concurrency) * msPerBatch` (round-robin worker pool;
74	* matches `lib/parallel-embed-pool.mjs:runWithConcurrency`). Add per-chunk
75	* upsert overhead and a fixed tail for the post-embed steps, divide by 1000,
76	* round up.
77	*
78	* @param {{
79	* chunksToEmbed: number,
80	* batchSize: number,
81	* concurrency: number,
82	* msPerBatch?: number,
83	* upsertMsPerChunk?: number,
84	* fixedOverheadMs?: number,
85	* }} input
86	* @returns {number} Estimated whole seconds (>= 0). Returns 0 if `chunksToEmbed <= 0`.
87	*/
88	export function estimateEmbedSeconds(input) {
89	if (input == null \|\| typeof input !== 'object') {
90	throw new TypeError('estimateEmbedSeconds: input is required');
91	}
92	const chunksToEmbed = numberOr(input.chunksToEmbed, 0);
93	if (chunksToEmbed <= 0) return 0;
94	const batchSize = numberOr(input.batchSize, 50);
95	const concurrency = numberOr(input.concurrency, 5);
96	if (batchSize < 1) throw new RangeError('estimateEmbedSeconds: batchSize must be >= 1');
97	if (concurrency < 1) throw new RangeError('estimateEmbedSeconds: concurrency must be >= 1');
98	const msPerBatch = numberOr(input.msPerBatch, DEFAULT_EMBED_MS_PER_BATCH);
99	const upsertMsPerChunk = numberOr(input.upsertMsPerChunk, UPSERT_MS_PER_CHUNK);
100	const fixedOverheadMs = numberOr(input.fixedOverheadMs, FIXED_OVERHEAD_MS);
101
102	const embedBatches = Math.ceil(chunksToEmbed / batchSize);
103	const parallelMs = Math.ceil(embedBatches / concurrency) * msPerBatch;
104	const upsertMs = chunksToEmbed * upsertMsPerChunk;
105	const totalMs = parallelMs + upsertMs + fixedOverheadMs;
106	return Math.ceil(totalMs / 1000);
107	}
108
109	/**
110	* Routing decision for the sync handler. Returns `{ shouldUseBackground, reason }`.
111	* Background mode wins on ANY of the following so we never trip the 60 s wall:
112	* - estimated seconds >= sync budget
113	* - chunks to embed >= hard chunk ceiling (tail-latency safety)
114	* - dimension migration just happened (full re-embed of every prior vector)
115	* - first-time index of this vault (cache empty → full re-embed)
116	*
117	* The first matching reason is returned (not the union), because the calling
118	* timer + 202 response only need one human-readable cause.
119	*
120	* @param {{
121	* chunksToEmbed: number,
122	* estimatedSeconds: number,
123	* syncBudgetSeconds?: number,
124	* maxSyncChunks?: number,
125	* dimMigrationRequired?: boolean,
126	* isFirstIndex?: boolean,
127	* }} input
128	* @returns {{ shouldUseBackground: boolean, reason: 'fits_in_sync' \| 'estimate_exceeds_budget' \| 'chunk_count_exceeds_max' \| 'dim_migration' \| 'first_index' }}
129	*/
130	export function shouldUseBackgroundIndex(input) {
131	if (input == null \|\| typeof input !== 'object') {
132	throw new TypeError('shouldUseBackgroundIndex: input is required');
133	}
134	const chunksToEmbed = numberOr(input.chunksToEmbed, 0);
135	const estimatedSeconds = numberOr(input.estimatedSeconds, 0);
136	const syncBudgetSeconds = numberOr(input.syncBudgetSeconds, SYNC_BUDGET_SECONDS_DEFAULT);
137	const maxSyncChunks = numberOr(input.maxSyncChunks, MAX_SYNC_CHUNKS_DEFAULT);
138	const dimMigrationRequired = Boolean(input.dimMigrationRequired);
139	const isFirstIndex = Boolean(input.isFirstIndex);
140
141	if (dimMigrationRequired && chunksToEmbed > 0) {
142	return { shouldUseBackground: true, reason: 'dim_migration' };
143	}
144	if (isFirstIndex && chunksToEmbed > 0) {
145	return { shouldUseBackground: true, reason: 'first_index' };
146	}
147	if (chunksToEmbed >= maxSyncChunks) {
148	return { shouldUseBackground: true, reason: 'chunk_count_exceeds_max' };
149	}
150	if (estimatedSeconds >= syncBudgetSeconds) {
151	return { shouldUseBackground: true, reason: 'estimate_exceeds_budget' };
152	}
153	return { shouldUseBackground: false, reason: 'fits_in_sync' };
154	}
155
156	/**
157	* Parse `INDEXER_SYNC_BUDGET_SECONDS` (env or override). Defaults to 30; clamps
158	* to `[5, 55]` so a typo can't push the budget above the platform max (60 s) or
159	* effectively disable sync mode.
160	*
161	* @param {string\|number\|null\|undefined} raw
162	* @returns {number}
163	*/
164	export function parseSyncBudgetSeconds(raw) {
165	if (raw == null \|\| raw === '') return SYNC_BUDGET_SECONDS_DEFAULT;
166	const n = typeof raw === 'number' ? raw : parseInt(String(raw).trim(), 10);
167	if (!Number.isFinite(n)) return SYNC_BUDGET_SECONDS_DEFAULT;
168	if (n < 5) return 5;
169	if (n > 55) return 55;
170	return Math.floor(n);
171	}
172
173	/**
174	* Parse `INDEXER_MAX_SYNC_CHUNKS` (env or override). Defaults to 500; clamps
175	* to `[50, 5000]` so a typo can't disable the chunk-count safety net.
176	*
177	* @param {string\|number\|null\|undefined} raw
178	* @returns {number}
179	*/
180	export function parseMaxSyncChunks(raw) {
181	if (raw == null \|\| raw === '') return MAX_SYNC_CHUNKS_DEFAULT;
182	const n = typeof raw === 'number' ? raw : parseInt(String(raw).trim(), 10);
183	if (!Number.isFinite(n)) return MAX_SYNC_CHUNKS_DEFAULT;
184	if (n < 50) return 50;
185	if (n > 5000) return 5000;
186	return Math.floor(n);
187	}
188
189	function numberOr(value, fallback) {
190	if (value == null) return fallback;
191	const n = typeof value === 'number' ? value : Number(value);
192	return Number.isFinite(n) ? n : fallback;
193	}

File History 2 commits

sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ 1 day ago

sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 2 days ago