bridge-index-preflight-estimate.mjs
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠ breaking
1 day ago
| 1 | /** |
| 2 | * Pure preflight estimator for `hub/bridge/server.mjs POST /api/v1/index`. |
| 3 | * |
| 4 | * The bridge runs as a Netlify synchronous function (60 s platform max). After the |
| 5 | * OpenAI(1536) → DeepInfra(1024 BAAI/bge-large-en-v1.5) switch, per-batch embed |
| 6 | * latency went from ~1.2 s to ~2.5 s median (5–8.5 s tails). With ~50 chunks/batch |
| 7 | * and concurrency 5, that means a vault of ~1500+ chunks needing a full re-embed |
| 8 | * can blow past 60 s and the gateway returns a 504 mid-request. |
| 9 | * |
| 10 | * Rather than always paying the latency tax of a background-function kickoff, the |
| 11 | * sync handler does a cheap preflight (canister export + chunking + cache lookup |
| 12 | * are already happening) and then asks THIS module: "given chunks_to_embed + |
| 13 | * concurrency, will it fit in the sync budget?". When it won't, the handler kicks |
| 14 | * off a Netlify background function (15 min cap) and returns 202 immediately. |
| 15 | * |
| 16 | * Pure module: no I/O, no env reads, no time. Tests must be deterministic. |
| 17 | */ |
| 18 | |
| 19 | /** |
| 20 | * Per-batch embedding latency (median ms) used by the estimator. Sourced from |
| 21 | * `hub/bridge/index-timing.mjs` post-mortem on production logs after the DeepInfra |
| 22 | * switch (median 2.5 s, p95 ~5 s). We use the median, NOT p95, because we already |
| 23 | * have a hard ceiling (`SYNC_BUDGET_SECONDS_DEFAULT`) below the platform max — a |
| 24 | * single tail batch that pushes us 4–5 s over our estimate is still safely under |
| 25 | * 60 s, but planning every job for p95 would route 70 %+ of jobs to background |
| 26 | * unnecessarily and cost an extra cold start each time. |
| 27 | * |
| 28 | * If you swap providers (e.g. back to OpenAI 1.2 s/batch, or to a faster |
| 29 | * Anthropic embedding endpoint), update this constant — the rest of the math |
| 30 | * scales linearly. |
| 31 | */ |
| 32 | export const DEFAULT_EMBED_MS_PER_BATCH = 2500; |
| 33 | |
| 34 | /** |
| 35 | * Sync budget. Netlify's platform max for synchronous functions is 60 s |
| 36 | * (docs.netlify.com/build/functions/overview); we reserve 30 s as headroom for |
| 37 | * preflight + post-embed steps (chunk hash compute, ensureCollection migration, |
| 38 | * upserts, persistVectorsToBlob) so a 30 s embed phase still finishes inside the |
| 39 | * function timeout. |
| 40 | */ |
| 41 | export const SYNC_BUDGET_SECONDS_DEFAULT = 30; |
| 42 | |
| 43 | /** |
| 44 | * Hard chunk-count ceiling for the sync path. Even when the time estimate looks |
| 45 | * safe, indexing >= 500 chunks pulls in a lot of upsert + persist work whose |
| 46 | * tail-latency is hard to predict (Blob upload contention, sqlite-vec single |
| 47 | * writer). Routing those to background is cheaper than discovering at 58 s that |
| 48 | * we're out of budget and the gateway already 504'd. |
| 49 | */ |
| 50 | export const MAX_SYNC_CHUNKS_DEFAULT = 500; |
| 51 | |
| 52 | /** |
| 53 | * Per-chunk overhead for the upsert + persist phases. ~5 ms is a conservative |
| 54 | * upper bound observed in `index-timing.mjs` step `upsert_total` for the bridge |
| 55 | * sqlite-vec backend (most upserts come in well under 2 ms/chunk; we round up). |
| 56 | */ |
| 57 | export const UPSERT_MS_PER_CHUNK = 5; |
| 58 | |
| 59 | /** |
| 60 | * Fixed overhead added to every estimate (canister export already done by the |
| 61 | * time we reach the estimator, but ensureCollection + chunk hash compute + |
| 62 | * persistVectorsToBlob still need to run after the embed phase). |
| 63 | */ |
| 64 | export const FIXED_OVERHEAD_MS = 3000; |
| 65 | |
| 66 | /** |
| 67 | * Estimate wall-clock seconds for the embed + upsert + persist phases of a |
| 68 | * re-index, given how many chunks need re-embedding and the active parallelism |
| 69 | * settings. |
| 70 | * |
| 71 | * Math: `embedBatches = ceil(chunksToEmbed / batchSize)` total embed batches. |
| 72 | * With bounded concurrency `concurrency`, the wall-clock is |
| 73 | * `ceil(embedBatches / concurrency) * msPerBatch` (round-robin worker pool; |
| 74 | * matches `lib/parallel-embed-pool.mjs:runWithConcurrency`). Add per-chunk |
| 75 | * upsert overhead and a fixed tail for the post-embed steps, divide by 1000, |
| 76 | * round up. |
| 77 | * |
| 78 | * @param {{ |
| 79 | * chunksToEmbed: number, |
| 80 | * batchSize: number, |
| 81 | * concurrency: number, |
| 82 | * msPerBatch?: number, |
| 83 | * upsertMsPerChunk?: number, |
| 84 | * fixedOverheadMs?: number, |
| 85 | * }} input |
| 86 | * @returns {number} Estimated whole seconds (>= 0). Returns 0 if `chunksToEmbed <= 0`. |
| 87 | */ |
| 88 | export function estimateEmbedSeconds(input) { |
| 89 | if (input == null || typeof input !== 'object') { |
| 90 | throw new TypeError('estimateEmbedSeconds: input is required'); |
| 91 | } |
| 92 | const chunksToEmbed = numberOr(input.chunksToEmbed, 0); |
| 93 | if (chunksToEmbed <= 0) return 0; |
| 94 | const batchSize = numberOr(input.batchSize, 50); |
| 95 | const concurrency = numberOr(input.concurrency, 5); |
| 96 | if (batchSize < 1) throw new RangeError('estimateEmbedSeconds: batchSize must be >= 1'); |
| 97 | if (concurrency < 1) throw new RangeError('estimateEmbedSeconds: concurrency must be >= 1'); |
| 98 | const msPerBatch = numberOr(input.msPerBatch, DEFAULT_EMBED_MS_PER_BATCH); |
| 99 | const upsertMsPerChunk = numberOr(input.upsertMsPerChunk, UPSERT_MS_PER_CHUNK); |
| 100 | const fixedOverheadMs = numberOr(input.fixedOverheadMs, FIXED_OVERHEAD_MS); |
| 101 | |
| 102 | const embedBatches = Math.ceil(chunksToEmbed / batchSize); |
| 103 | const parallelMs = Math.ceil(embedBatches / concurrency) * msPerBatch; |
| 104 | const upsertMs = chunksToEmbed * upsertMsPerChunk; |
| 105 | const totalMs = parallelMs + upsertMs + fixedOverheadMs; |
| 106 | return Math.ceil(totalMs / 1000); |
| 107 | } |
| 108 | |
| 109 | /** |
| 110 | * Routing decision for the sync handler. Returns `{ shouldUseBackground, reason }`. |
| 111 | * Background mode wins on ANY of the following so we never trip the 60 s wall: |
| 112 | * - estimated seconds >= sync budget |
| 113 | * - chunks to embed >= hard chunk ceiling (tail-latency safety) |
| 114 | * - dimension migration just happened (full re-embed of every prior vector) |
| 115 | * - first-time index of this vault (cache empty → full re-embed) |
| 116 | * |
| 117 | * The first matching reason is returned (not the union), because the calling |
| 118 | * timer + 202 response only need one human-readable cause. |
| 119 | * |
| 120 | * @param {{ |
| 121 | * chunksToEmbed: number, |
| 122 | * estimatedSeconds: number, |
| 123 | * syncBudgetSeconds?: number, |
| 124 | * maxSyncChunks?: number, |
| 125 | * dimMigrationRequired?: boolean, |
| 126 | * isFirstIndex?: boolean, |
| 127 | * }} input |
| 128 | * @returns {{ shouldUseBackground: boolean, reason: 'fits_in_sync' | 'estimate_exceeds_budget' | 'chunk_count_exceeds_max' | 'dim_migration' | 'first_index' }} |
| 129 | */ |
| 130 | export function shouldUseBackgroundIndex(input) { |
| 131 | if (input == null || typeof input !== 'object') { |
| 132 | throw new TypeError('shouldUseBackgroundIndex: input is required'); |
| 133 | } |
| 134 | const chunksToEmbed = numberOr(input.chunksToEmbed, 0); |
| 135 | const estimatedSeconds = numberOr(input.estimatedSeconds, 0); |
| 136 | const syncBudgetSeconds = numberOr(input.syncBudgetSeconds, SYNC_BUDGET_SECONDS_DEFAULT); |
| 137 | const maxSyncChunks = numberOr(input.maxSyncChunks, MAX_SYNC_CHUNKS_DEFAULT); |
| 138 | const dimMigrationRequired = Boolean(input.dimMigrationRequired); |
| 139 | const isFirstIndex = Boolean(input.isFirstIndex); |
| 140 | |
| 141 | if (dimMigrationRequired && chunksToEmbed > 0) { |
| 142 | return { shouldUseBackground: true, reason: 'dim_migration' }; |
| 143 | } |
| 144 | if (isFirstIndex && chunksToEmbed > 0) { |
| 145 | return { shouldUseBackground: true, reason: 'first_index' }; |
| 146 | } |
| 147 | if (chunksToEmbed >= maxSyncChunks) { |
| 148 | return { shouldUseBackground: true, reason: 'chunk_count_exceeds_max' }; |
| 149 | } |
| 150 | if (estimatedSeconds >= syncBudgetSeconds) { |
| 151 | return { shouldUseBackground: true, reason: 'estimate_exceeds_budget' }; |
| 152 | } |
| 153 | return { shouldUseBackground: false, reason: 'fits_in_sync' }; |
| 154 | } |
| 155 | |
| 156 | /** |
| 157 | * Parse `INDEXER_SYNC_BUDGET_SECONDS` (env or override). Defaults to 30; clamps |
| 158 | * to `[5, 55]` so a typo can't push the budget above the platform max (60 s) or |
| 159 | * effectively disable sync mode. |
| 160 | * |
| 161 | * @param {string|number|null|undefined} raw |
| 162 | * @returns {number} |
| 163 | */ |
| 164 | export function parseSyncBudgetSeconds(raw) { |
| 165 | if (raw == null || raw === '') return SYNC_BUDGET_SECONDS_DEFAULT; |
| 166 | const n = typeof raw === 'number' ? raw : parseInt(String(raw).trim(), 10); |
| 167 | if (!Number.isFinite(n)) return SYNC_BUDGET_SECONDS_DEFAULT; |
| 168 | if (n < 5) return 5; |
| 169 | if (n > 55) return 55; |
| 170 | return Math.floor(n); |
| 171 | } |
| 172 | |
| 173 | /** |
| 174 | * Parse `INDEXER_MAX_SYNC_CHUNKS` (env or override). Defaults to 500; clamps |
| 175 | * to `[50, 5000]` so a typo can't disable the chunk-count safety net. |
| 176 | * |
| 177 | * @param {string|number|null|undefined} raw |
| 178 | * @returns {number} |
| 179 | */ |
| 180 | export function parseMaxSyncChunks(raw) { |
| 181 | if (raw == null || raw === '') return MAX_SYNC_CHUNKS_DEFAULT; |
| 182 | const n = typeof raw === 'number' ? raw : parseInt(String(raw).trim(), 10); |
| 183 | if (!Number.isFinite(n)) return MAX_SYNC_CHUNKS_DEFAULT; |
| 184 | if (n < 50) return 50; |
| 185 | if (n > 5000) return 5000; |
| 186 | return Math.floor(n); |
| 187 | } |
| 188 | |
| 189 | function numberOr(value, fallback) { |
| 190 | if (value == null) return fallback; |
| 191 | const n = typeof value === 'number' ? value : Number(value); |
| 192 | return Number.isFinite(n) ? n : fallback; |
| 193 | } |
File History
2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠
1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6
docs: accept Calendar Events v0 spec with Phase 0 security …
Human
2 days ago