embedding-deepinfra-429-backoff.test.mjs
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠ breaking
1 day ago
| 1 | /** |
| 2 | * Tests for DeepInfra embedding 429 retry. The bridge `POST /api/v1/index` will run |
| 3 | * `embedDeepInfraWithUsage` concurrently via `lib/parallel-embed-pool.mjs`; if a burst |
| 4 | * trips DeepInfra's per-second cap, we want a single bounded retry honoring the |
| 5 | * `Retry-After` header rather than failing the entire vault re-index. |
| 6 | * |
| 7 | * Hermetic: we inject a fake `fetchImpl` and `sleepFn` so no real network or real |
| 8 | * `setTimeout` is involved (test runs in a few ms). |
| 9 | */ |
| 10 | |
| 11 | import { describe, it } from 'node:test'; |
| 12 | import assert from 'node:assert/strict'; |
| 13 | import { |
| 14 | embedDeepInfraWithUsage, |
| 15 | retryAfterHeaderMs, |
| 16 | DEEPINFRA_429_BACKOFF_DEFAULT_MS, |
| 17 | DEEPINFRA_429_BACKOFF_MAX_MS, |
| 18 | } from '../lib/embedding.mjs'; |
| 19 | |
| 20 | function makeFakeResponse({ status = 200, headers = {}, body = {} } = {}) { |
| 21 | return { |
| 22 | ok: status >= 200 && status < 300, |
| 23 | status, |
| 24 | headers: { |
| 25 | get(name) { |
| 26 | const k = String(name).toLowerCase(); |
| 27 | for (const h of Object.keys(headers)) { |
| 28 | if (h.toLowerCase() === k) return headers[h]; |
| 29 | } |
| 30 | return null; |
| 31 | }, |
| 32 | }, |
| 33 | async text() { |
| 34 | return typeof body === 'string' ? body : JSON.stringify(body); |
| 35 | }, |
| 36 | async json() { |
| 37 | return typeof body === 'string' ? JSON.parse(body) : body; |
| 38 | }, |
| 39 | }; |
| 40 | } |
| 41 | |
| 42 | function fakeEmbeddingsBody(vectors) { |
| 43 | return { |
| 44 | data: vectors.map((vec, i) => ({ index: i, embedding: vec })), |
| 45 | usage: { prompt_tokens: 7 }, |
| 46 | }; |
| 47 | } |
| 48 | |
| 49 | describe('embedDeepInfraWithUsage — 429 retry', () => { |
| 50 | it('retries once on 429 then succeeds, honoring Retry-After (seconds)', async () => { |
| 51 | let calls = 0; |
| 52 | const sleeps = []; |
| 53 | const fakeFetch = async () => { |
| 54 | calls++; |
| 55 | if (calls === 1) { |
| 56 | return makeFakeResponse({ |
| 57 | status: 429, |
| 58 | headers: { 'Retry-After': '2' }, |
| 59 | body: { error: 'rate_limited' }, |
| 60 | }); |
| 61 | } |
| 62 | return makeFakeResponse({ |
| 63 | status: 200, |
| 64 | body: fakeEmbeddingsBody([[0.1, 0.2]]), |
| 65 | }); |
| 66 | }; |
| 67 | const out = await embedDeepInfraWithUsage(['hi'], { |
| 68 | model: 'BAAI/bge-large-en-v1.5', |
| 69 | apiKey: 'fake', |
| 70 | fetchImpl: fakeFetch, |
| 71 | sleepFn: async (ms) => { |
| 72 | sleeps.push(ms); |
| 73 | }, |
| 74 | }); |
| 75 | assert.equal(calls, 2); |
| 76 | assert.deepEqual(sleeps, [2000]); |
| 77 | assert.deepEqual(out.vectors, [[0.1, 0.2]]); |
| 78 | assert.equal(out.embedding_input_tokens, 7); |
| 79 | }); |
| 80 | |
| 81 | it('uses default backoff when Retry-After header is missing', async () => { |
| 82 | let calls = 0; |
| 83 | const sleeps = []; |
| 84 | const fakeFetch = async () => { |
| 85 | calls++; |
| 86 | if (calls === 1) { |
| 87 | return makeFakeResponse({ status: 429, body: 'too many' }); |
| 88 | } |
| 89 | return makeFakeResponse({ |
| 90 | status: 200, |
| 91 | body: fakeEmbeddingsBody([[0.5]]), |
| 92 | }); |
| 93 | }; |
| 94 | await embedDeepInfraWithUsage(['hi'], { |
| 95 | model: 'BAAI/bge-large-en-v1.5', |
| 96 | apiKey: 'fake', |
| 97 | fetchImpl: fakeFetch, |
| 98 | sleepFn: async (ms) => { |
| 99 | sleeps.push(ms); |
| 100 | }, |
| 101 | }); |
| 102 | assert.equal(sleeps.length, 1); |
| 103 | assert.equal(sleeps[0], DEEPINFRA_429_BACKOFF_DEFAULT_MS); |
| 104 | }); |
| 105 | |
| 106 | it('does NOT retry beyond budget — second 429 surfaces as DeepInfra embed failed (429)', async () => { |
| 107 | let calls = 0; |
| 108 | const sleeps = []; |
| 109 | const fakeFetch = async () => { |
| 110 | calls++; |
| 111 | return makeFakeResponse({ |
| 112 | status: 429, |
| 113 | headers: { 'Retry-After': '1' }, |
| 114 | body: 'rate limit', |
| 115 | }); |
| 116 | }; |
| 117 | await assert.rejects( |
| 118 | () => |
| 119 | embedDeepInfraWithUsage(['hi'], { |
| 120 | model: 'BAAI/bge-large-en-v1.5', |
| 121 | apiKey: 'fake', |
| 122 | fetchImpl: fakeFetch, |
| 123 | sleepFn: async (ms) => { |
| 124 | sleeps.push(ms); |
| 125 | }, |
| 126 | }), |
| 127 | /DeepInfra embed failed \(429\)/, |
| 128 | ); |
| 129 | assert.equal(calls, 2, 'should attempt original + 1 retry, then surface'); |
| 130 | assert.equal(sleeps.length, 1); |
| 131 | }); |
| 132 | |
| 133 | it('does not retry on non-429 errors (e.g. 500) — fail fast so the user sees the real error', async () => { |
| 134 | let calls = 0; |
| 135 | const fakeFetch = async () => |
| 136 | makeFakeResponse({ status: 500, body: 'upstream broke' }); |
| 137 | await assert.rejects( |
| 138 | () => |
| 139 | embedDeepInfraWithUsage(['hi'], { |
| 140 | model: 'BAAI/bge-large-en-v1.5', |
| 141 | apiKey: 'fake', |
| 142 | fetchImpl: fakeFetch, |
| 143 | sleepFn: async () => { |
| 144 | throw new Error('sleep should not be called for non-429'); |
| 145 | }, |
| 146 | }), |
| 147 | /DeepInfra embed failed \(500\)/, |
| 148 | ); |
| 149 | void calls; |
| 150 | }); |
| 151 | |
| 152 | it('maxRetries: 0 disables retry (one attempt only) — useful for hot paths that prefer fail-fast', async () => { |
| 153 | let calls = 0; |
| 154 | const fakeFetch = async () => { |
| 155 | calls++; |
| 156 | return makeFakeResponse({ status: 429, body: 'rl' }); |
| 157 | }; |
| 158 | await assert.rejects( |
| 159 | () => |
| 160 | embedDeepInfraWithUsage(['hi'], { |
| 161 | model: 'BAAI/bge-large-en-v1.5', |
| 162 | apiKey: 'fake', |
| 163 | fetchImpl: fakeFetch, |
| 164 | sleepFn: async () => {}, |
| 165 | maxRetries: 0, |
| 166 | }), |
| 167 | /DeepInfra embed failed \(429\)/, |
| 168 | ); |
| 169 | assert.equal(calls, 1); |
| 170 | }); |
| 171 | |
| 172 | it('throws on missing/empty apiKey before any fetch', async () => { |
| 173 | let called = false; |
| 174 | const fakeFetch = async () => { |
| 175 | called = true; |
| 176 | return makeFakeResponse({ status: 200, body: fakeEmbeddingsBody([[0]]) }); |
| 177 | }; |
| 178 | await assert.rejects( |
| 179 | () => |
| 180 | embedDeepInfraWithUsage(['hi'], { |
| 181 | model: 'BAAI/bge-large-en-v1.5', |
| 182 | apiKey: '', |
| 183 | fetchImpl: fakeFetch, |
| 184 | }), |
| 185 | /DeepInfra embeddings require DEEPINFRA_API_KEY/, |
| 186 | ); |
| 187 | assert.equal(called, false); |
| 188 | }); |
| 189 | }); |
| 190 | |
| 191 | describe('retryAfterHeaderMs', () => { |
| 192 | it('returns default when header is missing', () => { |
| 193 | assert.equal(retryAfterHeaderMs(null), DEEPINFRA_429_BACKOFF_DEFAULT_MS); |
| 194 | assert.equal(retryAfterHeaderMs(undefined), DEEPINFRA_429_BACKOFF_DEFAULT_MS); |
| 195 | assert.equal(retryAfterHeaderMs(''), DEEPINFRA_429_BACKOFF_DEFAULT_MS); |
| 196 | }); |
| 197 | |
| 198 | it('parses integer seconds', () => { |
| 199 | assert.equal(retryAfterHeaderMs('3'), 3000); |
| 200 | }); |
| 201 | |
| 202 | it('clamps to MAX so a huge value cannot strand a Netlify Function past its 60s cap', () => { |
| 203 | assert.equal(retryAfterHeaderMs('3600'), DEEPINFRA_429_BACKOFF_MAX_MS); |
| 204 | }); |
| 205 | |
| 206 | it('rejects garbage and returns default', () => { |
| 207 | assert.equal(retryAfterHeaderMs('abc'), DEEPINFRA_429_BACKOFF_DEFAULT_MS); |
| 208 | assert.equal(retryAfterHeaderMs('-1'), DEEPINFRA_429_BACKOFF_DEFAULT_MS); |
| 209 | }); |
| 210 | |
| 211 | it('parses HTTP-date when in the future, capped to MAX', () => { |
| 212 | const future = new Date(Date.now() + 30 * 1000).toUTCString(); |
| 213 | const got = retryAfterHeaderMs(future); |
| 214 | assert.ok(got > 0 && got <= DEEPINFRA_429_BACKOFF_MAX_MS); |
| 215 | }); |
| 216 | |
| 217 | it('past HTTP-date falls back to default (no negative wait)', () => { |
| 218 | const past = new Date(Date.now() - 60 * 1000).toUTCString(); |
| 219 | assert.equal(retryAfterHeaderMs(past), DEEPINFRA_429_BACKOFF_DEFAULT_MS); |
| 220 | }); |
| 221 | }); |
File History
2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠
1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6
docs: accept Calendar Events v0 spec with Phase 0 security …
Human
1 day ago