embedding-deepinfra-429-backoff.test.mjs
221 lines 6.7 KB
Raw
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 1 day ago
1 /**
2 * Tests for DeepInfra embedding 429 retry. The bridge `POST /api/v1/index` will run
3 * `embedDeepInfraWithUsage` concurrently via `lib/parallel-embed-pool.mjs`; if a burst
4 * trips DeepInfra's per-second cap, we want a single bounded retry honoring the
5 * `Retry-After` header rather than failing the entire vault re-index.
6 *
7 * Hermetic: we inject a fake `fetchImpl` and `sleepFn` so no real network or real
8 * `setTimeout` is involved (test runs in a few ms).
9 */
10
11 import { describe, it } from 'node:test';
12 import assert from 'node:assert/strict';
13 import {
14 embedDeepInfraWithUsage,
15 retryAfterHeaderMs,
16 DEEPINFRA_429_BACKOFF_DEFAULT_MS,
17 DEEPINFRA_429_BACKOFF_MAX_MS,
18 } from '../lib/embedding.mjs';
19
20 function makeFakeResponse({ status = 200, headers = {}, body = {} } = {}) {
21 return {
22 ok: status >= 200 && status < 300,
23 status,
24 headers: {
25 get(name) {
26 const k = String(name).toLowerCase();
27 for (const h of Object.keys(headers)) {
28 if (h.toLowerCase() === k) return headers[h];
29 }
30 return null;
31 },
32 },
33 async text() {
34 return typeof body === 'string' ? body : JSON.stringify(body);
35 },
36 async json() {
37 return typeof body === 'string' ? JSON.parse(body) : body;
38 },
39 };
40 }
41
42 function fakeEmbeddingsBody(vectors) {
43 return {
44 data: vectors.map((vec, i) => ({ index: i, embedding: vec })),
45 usage: { prompt_tokens: 7 },
46 };
47 }
48
49 describe('embedDeepInfraWithUsage — 429 retry', () => {
50 it('retries once on 429 then succeeds, honoring Retry-After (seconds)', async () => {
51 let calls = 0;
52 const sleeps = [];
53 const fakeFetch = async () => {
54 calls++;
55 if (calls === 1) {
56 return makeFakeResponse({
57 status: 429,
58 headers: { 'Retry-After': '2' },
59 body: { error: 'rate_limited' },
60 });
61 }
62 return makeFakeResponse({
63 status: 200,
64 body: fakeEmbeddingsBody([[0.1, 0.2]]),
65 });
66 };
67 const out = await embedDeepInfraWithUsage(['hi'], {
68 model: 'BAAI/bge-large-en-v1.5',
69 apiKey: 'fake',
70 fetchImpl: fakeFetch,
71 sleepFn: async (ms) => {
72 sleeps.push(ms);
73 },
74 });
75 assert.equal(calls, 2);
76 assert.deepEqual(sleeps, [2000]);
77 assert.deepEqual(out.vectors, [[0.1, 0.2]]);
78 assert.equal(out.embedding_input_tokens, 7);
79 });
80
81 it('uses default backoff when Retry-After header is missing', async () => {
82 let calls = 0;
83 const sleeps = [];
84 const fakeFetch = async () => {
85 calls++;
86 if (calls === 1) {
87 return makeFakeResponse({ status: 429, body: 'too many' });
88 }
89 return makeFakeResponse({
90 status: 200,
91 body: fakeEmbeddingsBody([[0.5]]),
92 });
93 };
94 await embedDeepInfraWithUsage(['hi'], {
95 model: 'BAAI/bge-large-en-v1.5',
96 apiKey: 'fake',
97 fetchImpl: fakeFetch,
98 sleepFn: async (ms) => {
99 sleeps.push(ms);
100 },
101 });
102 assert.equal(sleeps.length, 1);
103 assert.equal(sleeps[0], DEEPINFRA_429_BACKOFF_DEFAULT_MS);
104 });
105
106 it('does NOT retry beyond budget — second 429 surfaces as DeepInfra embed failed (429)', async () => {
107 let calls = 0;
108 const sleeps = [];
109 const fakeFetch = async () => {
110 calls++;
111 return makeFakeResponse({
112 status: 429,
113 headers: { 'Retry-After': '1' },
114 body: 'rate limit',
115 });
116 };
117 await assert.rejects(
118 () =>
119 embedDeepInfraWithUsage(['hi'], {
120 model: 'BAAI/bge-large-en-v1.5',
121 apiKey: 'fake',
122 fetchImpl: fakeFetch,
123 sleepFn: async (ms) => {
124 sleeps.push(ms);
125 },
126 }),
127 /DeepInfra embed failed \(429\)/,
128 );
129 assert.equal(calls, 2, 'should attempt original + 1 retry, then surface');
130 assert.equal(sleeps.length, 1);
131 });
132
133 it('does not retry on non-429 errors (e.g. 500) — fail fast so the user sees the real error', async () => {
134 let calls = 0;
135 const fakeFetch = async () =>
136 makeFakeResponse({ status: 500, body: 'upstream broke' });
137 await assert.rejects(
138 () =>
139 embedDeepInfraWithUsage(['hi'], {
140 model: 'BAAI/bge-large-en-v1.5',
141 apiKey: 'fake',
142 fetchImpl: fakeFetch,
143 sleepFn: async () => {
144 throw new Error('sleep should not be called for non-429');
145 },
146 }),
147 /DeepInfra embed failed \(500\)/,
148 );
149 void calls;
150 });
151
152 it('maxRetries: 0 disables retry (one attempt only) — useful for hot paths that prefer fail-fast', async () => {
153 let calls = 0;
154 const fakeFetch = async () => {
155 calls++;
156 return makeFakeResponse({ status: 429, body: 'rl' });
157 };
158 await assert.rejects(
159 () =>
160 embedDeepInfraWithUsage(['hi'], {
161 model: 'BAAI/bge-large-en-v1.5',
162 apiKey: 'fake',
163 fetchImpl: fakeFetch,
164 sleepFn: async () => {},
165 maxRetries: 0,
166 }),
167 /DeepInfra embed failed \(429\)/,
168 );
169 assert.equal(calls, 1);
170 });
171
172 it('throws on missing/empty apiKey before any fetch', async () => {
173 let called = false;
174 const fakeFetch = async () => {
175 called = true;
176 return makeFakeResponse({ status: 200, body: fakeEmbeddingsBody([[0]]) });
177 };
178 await assert.rejects(
179 () =>
180 embedDeepInfraWithUsage(['hi'], {
181 model: 'BAAI/bge-large-en-v1.5',
182 apiKey: '',
183 fetchImpl: fakeFetch,
184 }),
185 /DeepInfra embeddings require DEEPINFRA_API_KEY/,
186 );
187 assert.equal(called, false);
188 });
189 });
190
191 describe('retryAfterHeaderMs', () => {
192 it('returns default when header is missing', () => {
193 assert.equal(retryAfterHeaderMs(null), DEEPINFRA_429_BACKOFF_DEFAULT_MS);
194 assert.equal(retryAfterHeaderMs(undefined), DEEPINFRA_429_BACKOFF_DEFAULT_MS);
195 assert.equal(retryAfterHeaderMs(''), DEEPINFRA_429_BACKOFF_DEFAULT_MS);
196 });
197
198 it('parses integer seconds', () => {
199 assert.equal(retryAfterHeaderMs('3'), 3000);
200 });
201
202 it('clamps to MAX so a huge value cannot strand a Netlify Function past its 60s cap', () => {
203 assert.equal(retryAfterHeaderMs('3600'), DEEPINFRA_429_BACKOFF_MAX_MS);
204 });
205
206 it('rejects garbage and returns default', () => {
207 assert.equal(retryAfterHeaderMs('abc'), DEEPINFRA_429_BACKOFF_DEFAULT_MS);
208 assert.equal(retryAfterHeaderMs('-1'), DEEPINFRA_429_BACKOFF_DEFAULT_MS);
209 });
210
211 it('parses HTTP-date when in the future, capped to MAX', () => {
212 const future = new Date(Date.now() + 30 * 1000).toUTCString();
213 const got = retryAfterHeaderMs(future);
214 assert.ok(got > 0 && got <= DEEPINFRA_429_BACKOFF_MAX_MS);
215 });
216
217 it('past HTTP-date falls back to default (no negative wait)', () => {
218 const past = new Date(Date.now() - 60 * 1000).toUTCString();
219 assert.equal(retryAfterHeaderMs(past), DEEPINFRA_429_BACKOFF_DEFAULT_MS);
220 });
221 });
File History 2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor 1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 1 day ago