bridge-index-preflight-estimate.mjs
193 lines 8.3 KB
Raw
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 1 day ago
1 /**
2 * Pure preflight estimator for `hub/bridge/server.mjs POST /api/v1/index`.
3 *
4 * The bridge runs as a Netlify synchronous function (60 s platform max). After the
5 * OpenAI(1536) → DeepInfra(1024 BAAI/bge-large-en-v1.5) switch, per-batch embed
6 * latency went from ~1.2 s to ~2.5 s median (5–8.5 s tails). With ~50 chunks/batch
7 * and concurrency 5, that means a vault of ~1500+ chunks needing a full re-embed
8 * can blow past 60 s and the gateway returns a 504 mid-request.
9 *
10 * Rather than always paying the latency tax of a background-function kickoff, the
11 * sync handler does a cheap preflight (canister export + chunking + cache lookup
12 * are already happening) and then asks THIS module: "given chunks_to_embed +
13 * concurrency, will it fit in the sync budget?". When it won't, the handler kicks
14 * off a Netlify background function (15 min cap) and returns 202 immediately.
15 *
16 * Pure module: no I/O, no env reads, no time. Tests must be deterministic.
17 */
18
19 /**
20 * Per-batch embedding latency (median ms) used by the estimator. Sourced from
21 * `hub/bridge/index-timing.mjs` post-mortem on production logs after the DeepInfra
22 * switch (median 2.5 s, p95 ~5 s). We use the median, NOT p95, because we already
23 * have a hard ceiling (`SYNC_BUDGET_SECONDS_DEFAULT`) below the platform max — a
24 * single tail batch that pushes us 4–5 s over our estimate is still safely under
25 * 60 s, but planning every job for p95 would route 70 %+ of jobs to background
26 * unnecessarily and cost an extra cold start each time.
27 *
28 * If you swap providers (e.g. back to OpenAI 1.2 s/batch, or to a faster
29 * Anthropic embedding endpoint), update this constant — the rest of the math
30 * scales linearly.
31 */
32 export const DEFAULT_EMBED_MS_PER_BATCH = 2500;
33
34 /**
35 * Sync budget. Netlify's platform max for synchronous functions is 60 s
36 * (docs.netlify.com/build/functions/overview); we reserve 30 s as headroom for
37 * preflight + post-embed steps (chunk hash compute, ensureCollection migration,
38 * upserts, persistVectorsToBlob) so a 30 s embed phase still finishes inside the
39 * function timeout.
40 */
41 export const SYNC_BUDGET_SECONDS_DEFAULT = 30;
42
43 /**
44 * Hard chunk-count ceiling for the sync path. Even when the time estimate looks
45 * safe, indexing >= 500 chunks pulls in a lot of upsert + persist work whose
46 * tail-latency is hard to predict (Blob upload contention, sqlite-vec single
47 * writer). Routing those to background is cheaper than discovering at 58 s that
48 * we're out of budget and the gateway already 504'd.
49 */
50 export const MAX_SYNC_CHUNKS_DEFAULT = 500;
51
52 /**
53 * Per-chunk overhead for the upsert + persist phases. ~5 ms is a conservative
54 * upper bound observed in `index-timing.mjs` step `upsert_total` for the bridge
55 * sqlite-vec backend (most upserts come in well under 2 ms/chunk; we round up).
56 */
57 export const UPSERT_MS_PER_CHUNK = 5;
58
59 /**
60 * Fixed overhead added to every estimate (canister export already done by the
61 * time we reach the estimator, but ensureCollection + chunk hash compute +
62 * persistVectorsToBlob still need to run after the embed phase).
63 */
64 export const FIXED_OVERHEAD_MS = 3000;
65
66 /**
67 * Estimate wall-clock seconds for the embed + upsert + persist phases of a
68 * re-index, given how many chunks need re-embedding and the active parallelism
69 * settings.
70 *
71 * Math: `embedBatches = ceil(chunksToEmbed / batchSize)` total embed batches.
72 * With bounded concurrency `concurrency`, the wall-clock is
73 * `ceil(embedBatches / concurrency) * msPerBatch` (round-robin worker pool;
74 * matches `lib/parallel-embed-pool.mjs:runWithConcurrency`). Add per-chunk
75 * upsert overhead and a fixed tail for the post-embed steps, divide by 1000,
76 * round up.
77 *
78 * @param {{
79 * chunksToEmbed: number,
80 * batchSize: number,
81 * concurrency: number,
82 * msPerBatch?: number,
83 * upsertMsPerChunk?: number,
84 * fixedOverheadMs?: number,
85 * }} input
86 * @returns {number} Estimated whole seconds (>= 0). Returns 0 if `chunksToEmbed <= 0`.
87 */
88 export function estimateEmbedSeconds(input) {
89 if (input == null || typeof input !== 'object') {
90 throw new TypeError('estimateEmbedSeconds: input is required');
91 }
92 const chunksToEmbed = numberOr(input.chunksToEmbed, 0);
93 if (chunksToEmbed <= 0) return 0;
94 const batchSize = numberOr(input.batchSize, 50);
95 const concurrency = numberOr(input.concurrency, 5);
96 if (batchSize < 1) throw new RangeError('estimateEmbedSeconds: batchSize must be >= 1');
97 if (concurrency < 1) throw new RangeError('estimateEmbedSeconds: concurrency must be >= 1');
98 const msPerBatch = numberOr(input.msPerBatch, DEFAULT_EMBED_MS_PER_BATCH);
99 const upsertMsPerChunk = numberOr(input.upsertMsPerChunk, UPSERT_MS_PER_CHUNK);
100 const fixedOverheadMs = numberOr(input.fixedOverheadMs, FIXED_OVERHEAD_MS);
101
102 const embedBatches = Math.ceil(chunksToEmbed / batchSize);
103 const parallelMs = Math.ceil(embedBatches / concurrency) * msPerBatch;
104 const upsertMs = chunksToEmbed * upsertMsPerChunk;
105 const totalMs = parallelMs + upsertMs + fixedOverheadMs;
106 return Math.ceil(totalMs / 1000);
107 }
108
109 /**
110 * Routing decision for the sync handler. Returns `{ shouldUseBackground, reason }`.
111 * Background mode wins on ANY of the following so we never trip the 60 s wall:
112 * - estimated seconds >= sync budget
113 * - chunks to embed >= hard chunk ceiling (tail-latency safety)
114 * - dimension migration just happened (full re-embed of every prior vector)
115 * - first-time index of this vault (cache empty → full re-embed)
116 *
117 * The first matching reason is returned (not the union), because the calling
118 * timer + 202 response only need one human-readable cause.
119 *
120 * @param {{
121 * chunksToEmbed: number,
122 * estimatedSeconds: number,
123 * syncBudgetSeconds?: number,
124 * maxSyncChunks?: number,
125 * dimMigrationRequired?: boolean,
126 * isFirstIndex?: boolean,
127 * }} input
128 * @returns {{ shouldUseBackground: boolean, reason: 'fits_in_sync' | 'estimate_exceeds_budget' | 'chunk_count_exceeds_max' | 'dim_migration' | 'first_index' }}
129 */
130 export function shouldUseBackgroundIndex(input) {
131 if (input == null || typeof input !== 'object') {
132 throw new TypeError('shouldUseBackgroundIndex: input is required');
133 }
134 const chunksToEmbed = numberOr(input.chunksToEmbed, 0);
135 const estimatedSeconds = numberOr(input.estimatedSeconds, 0);
136 const syncBudgetSeconds = numberOr(input.syncBudgetSeconds, SYNC_BUDGET_SECONDS_DEFAULT);
137 const maxSyncChunks = numberOr(input.maxSyncChunks, MAX_SYNC_CHUNKS_DEFAULT);
138 const dimMigrationRequired = Boolean(input.dimMigrationRequired);
139 const isFirstIndex = Boolean(input.isFirstIndex);
140
141 if (dimMigrationRequired && chunksToEmbed > 0) {
142 return { shouldUseBackground: true, reason: 'dim_migration' };
143 }
144 if (isFirstIndex && chunksToEmbed > 0) {
145 return { shouldUseBackground: true, reason: 'first_index' };
146 }
147 if (chunksToEmbed >= maxSyncChunks) {
148 return { shouldUseBackground: true, reason: 'chunk_count_exceeds_max' };
149 }
150 if (estimatedSeconds >= syncBudgetSeconds) {
151 return { shouldUseBackground: true, reason: 'estimate_exceeds_budget' };
152 }
153 return { shouldUseBackground: false, reason: 'fits_in_sync' };
154 }
155
156 /**
157 * Parse `INDEXER_SYNC_BUDGET_SECONDS` (env or override). Defaults to 30; clamps
158 * to `[5, 55]` so a typo can't push the budget above the platform max (60 s) or
159 * effectively disable sync mode.
160 *
161 * @param {string|number|null|undefined} raw
162 * @returns {number}
163 */
164 export function parseSyncBudgetSeconds(raw) {
165 if (raw == null || raw === '') return SYNC_BUDGET_SECONDS_DEFAULT;
166 const n = typeof raw === 'number' ? raw : parseInt(String(raw).trim(), 10);
167 if (!Number.isFinite(n)) return SYNC_BUDGET_SECONDS_DEFAULT;
168 if (n < 5) return 5;
169 if (n > 55) return 55;
170 return Math.floor(n);
171 }
172
173 /**
174 * Parse `INDEXER_MAX_SYNC_CHUNKS` (env or override). Defaults to 500; clamps
175 * to `[50, 5000]` so a typo can't disable the chunk-count safety net.
176 *
177 * @param {string|number|null|undefined} raw
178 * @returns {number}
179 */
180 export function parseMaxSyncChunks(raw) {
181 if (raw == null || raw === '') return MAX_SYNC_CHUNKS_DEFAULT;
182 const n = typeof raw === 'number' ? raw : parseInt(String(raw).trim(), 10);
183 if (!Number.isFinite(n)) return MAX_SYNC_CHUNKS_DEFAULT;
184 if (n < 50) return 50;
185 if (n > 5000) return 5000;
186 return Math.floor(n);
187 }
188
189 function numberOr(value, fallback) {
190 if (value == null) return fallback;
191 const n = typeof value === 'number' ? value : Number(value);
192 return Number.isFinite(n) ? n : fallback;
193 }
File History 2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor 1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 2 days ago