llm-complete.mjs
393 lines 16.1 KB
Raw
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 1 day ago
1 /**
2 * Minimal chat completion for MCP summarize (Issue #1 Phase C6) and Hub proposal LLM jobs.
3 *
4 * Provider selection (in order):
5 * 1. KNOWTATION_CHAT_PROVIDER=deepinfra → DeepInfra (OpenAI-compatible). Falls back to OpenAI then
6 * Anthropic if DeepInfra returns an error and those keys are set.
7 * 2. KNOWTATION_CHAT_PROVIDER=openrouter → OpenRouter (OpenAI-compatible, BYO key). Requires
8 * OPENROUTER_API_KEY. No fallback to managed lanes: this is a
9 * "bring your own provider" lane, so a failure must surface
10 * rather than silently re-route note text to a metered/managed
11 * provider (privacy + billing correctness — see
12 * docs/COMPANION-APP-MODEL-ROUTING-AND-ENRICHMENT-ARCHITECTURE.md §4/§6).
13 * 3. KNOWTATION_CHAT_PROVIDER=openai → OpenAI only (no fallback). Requires OPENAI_API_KEY.
14 * 4. KNOWTATION_CHAT_PROVIDER=anthropic → Anthropic only (no fallback). Requires ANTHROPIC_API_KEY.
15 * 5. Implicit DeepInfra: DEEPINFRA_API_KEY set AND neither OPENAI_API_KEY nor ANTHROPIC_API_KEY set.
16 * (Backward compatible — does NOT preempt an existing OpenAI/Anthropic deployment.)
17 * 6. KNOWTATION_CHAT_PREFER_ANTHROPIC=1 (or true): try Anthropic before OpenAI when both keys exist;
18 * OpenAI is used as fallback if Claude fails.
19 * 7. Default: OpenAI when OPENAI_API_KEY; else Anthropic when ANTHROPIC_API_KEY; else Ollama /api/chat.
20 *
21 * OpenRouter is explicit-only: it is never selected implicitly, so adding OPENROUTER_API_KEY to an
22 * environment cannot change the provider for an existing deployment unless KNOWTATION_CHAT_PROVIDER=openrouter
23 * is also set (mirrors the implicit-DeepInfra backward-compatibility guarantee).
24 *
25 * Provider source precedence: `KNOWTATION_CHAT_PROVIDER` env (operator lock) wins; otherwise
26 * `config.llm.provider` (persisted via the Hub Settings UI → config/local.yaml) is honored. This
27 * lets a self-hosted operator pick the chat provider from the UI without an env var, while hosted /
28 * env-locked deploys remain authoritative. Selecting `ollama` forces the local lane regardless of
29 * any cloud keys present.
30 *
31 * Models: OPENAI_CHAT_MODEL (gpt-4o-mini), ANTHROPIC_CHAT_MODEL (claude-3-5-haiku-20241022),
32 * DEEPINFRA_CHAT_MODEL (Qwen/Qwen2.5-72B-Instruct), OPENROUTER_CHAT_MODEL (openai/gpt-4o-mini),
33 * OLLAMA_CHAT_MODEL (llama3.2).
34 *
35 * OpenRouter attribution (optional, OpenRouter best practice for app ranking; sent only when set):
36 * OPENROUTER_SITE_URL → HTTP-Referer header, OPENROUTER_APP_TITLE → X-Title header.
37 */
38
39 const OPENAI_CHAT_URL = 'https://api.openai.com/v1/chat/completions';
40 const ANTHROPIC_MESSAGES_URL = 'https://api.anthropic.com/v1/messages';
41 const DEEPINFRA_CHAT_URL = 'https://api.deepinfra.com/v1/openai/chat/completions';
42 const OPENROUTER_CHAT_URL = 'https://openrouter.ai/api/v1/chat/completions';
43
44 /**
45 * @param {{ llm?: { openai_chat_model?: string } }} config
46 * @param {{ system: string, user: string }} opts
47 * @param {number} maxTokens
48 */
49 async function openaiChat(config, opts, maxTokens) {
50 const apiKey = process.env.OPENAI_API_KEY;
51 if (!apiKey) throw new Error('OpenAI chat: OPENAI_API_KEY is not set');
52 const model = config.llm?.openai_chat_model || process.env.OPENAI_CHAT_MODEL || 'gpt-4o-mini';
53 const res = await fetch(OPENAI_CHAT_URL, {
54 method: 'POST',
55 headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${apiKey}` },
56 body: JSON.stringify({
57 model,
58 messages: [
59 { role: 'system', content: opts.system },
60 { role: 'user', content: opts.user },
61 ],
62 max_tokens: maxTokens,
63 }),
64 });
65 if (!res.ok) {
66 const t = await res.text();
67 throw new Error(`OpenAI chat failed: ${res.status} ${t}`);
68 }
69 const data = await res.json();
70 const text = data.choices?.[0]?.message?.content;
71 if (!text) throw new Error('OpenAI chat: empty response');
72 return String(text).trim();
73 }
74
75 /**
76 * @param {{ llm?: { anthropic_chat_model?: string } }} config
77 * @param {{ system: string, user: string }} opts
78 * @param {number} maxTokens
79 */
80 async function anthropicChat(config, opts, maxTokens) {
81 const anthropicKey = process.env.ANTHROPIC_API_KEY;
82 if (!anthropicKey) throw new Error('Anthropic chat: ANTHROPIC_API_KEY is not set');
83 const model =
84 config.llm?.anthropic_chat_model ||
85 process.env.ANTHROPIC_CHAT_MODEL ||
86 'claude-3-5-haiku-20241022';
87 const res = await fetch(ANTHROPIC_MESSAGES_URL, {
88 method: 'POST',
89 headers: {
90 'Content-Type': 'application/json',
91 'x-api-key': anthropicKey,
92 'anthropic-version': '2023-06-01',
93 },
94 body: JSON.stringify({
95 model,
96 max_tokens: maxTokens,
97 system: opts.system,
98 messages: [{ role: 'user', content: opts.user }],
99 }),
100 });
101 if (!res.ok) {
102 const t = await res.text();
103 throw new Error(`Anthropic chat failed: ${res.status} ${t}`);
104 }
105 const data = await res.json();
106 const blocks = data.content;
107 const first = Array.isArray(blocks) && blocks[0] && blocks[0].text != null ? blocks[0].text : '';
108 const text = String(first).trim();
109 if (!text) throw new Error('Anthropic chat: empty response');
110 return text;
111 }
112
113 function chatPreferAnthropic() {
114 const v = process.env.KNOWTATION_CHAT_PREFER_ANTHROPIC;
115 return v === '1' || String(v || '').toLowerCase() === 'true';
116 }
117
118 /**
119 * Resolve the active chat provider. Precedence:
120 * 1. `KNOWTATION_CHAT_PROVIDER` env var (operator lock; wins so hosted/env-locked deploys are
121 * never overridden by a persisted UI setting).
122 * 2. `config.llm.provider` (persisted via the Hub Settings UI → config/local.yaml).
123 * 3. '' → fall through to the default auto-detection chain.
124 * Returns a lowercased provider id or '' when unset.
125 * @param {{ llm?: { provider?: string } }} [config]
126 */
127 function chatProvider(config) {
128 const env = String(process.env.KNOWTATION_CHAT_PROVIDER || '').trim().toLowerCase();
129 if (env) return env;
130 return String(config?.llm?.provider || '').trim().toLowerCase();
131 }
132
133 /**
134 * @param {{ llm?: { deepinfra_chat_model?: string } }} config
135 * @param {{ system: string, user: string }} opts
136 * @param {number} maxTokens
137 */
138 async function deepinfraChat(config, opts, maxTokens) {
139 const apiKey = process.env.DEEPINFRA_API_KEY;
140 if (!apiKey) throw new Error('DeepInfra chat: DEEPINFRA_API_KEY is not set');
141 const model =
142 config.llm?.deepinfra_chat_model ||
143 process.env.DEEPINFRA_CHAT_MODEL ||
144 'Qwen/Qwen2.5-72B-Instruct';
145 const res = await fetch(DEEPINFRA_CHAT_URL, {
146 method: 'POST',
147 headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${apiKey}` },
148 body: JSON.stringify({
149 model,
150 messages: [
151 { role: 'system', content: opts.system },
152 { role: 'user', content: opts.user },
153 ],
154 max_tokens: maxTokens,
155 }),
156 });
157 if (!res.ok) {
158 const t = await res.text();
159 throw new Error(`DeepInfra chat failed: ${res.status} ${t}`);
160 }
161 const data = await res.json();
162 const text = data.choices?.[0]?.message?.content;
163 if (!text) throw new Error('DeepInfra chat: empty response');
164 return String(text).trim();
165 }
166
167 /**
168 * OpenRouter chat completion (OpenAI-compatible wire format, BYO key).
169 *
170 * OpenRouter is a provider aggregator: the user supplies their own OPENROUTER_API_KEY and pays
171 * OpenRouter directly, so this lane is never metered against Knowtation packs. Because the user owns
172 * the contract, a failure here is surfaced to the caller and is NOT silently re-routed to a managed
173 * lane — that would change both the privacy surface (note text → a different third party) and the
174 * billing surface (a metered pack event the user did not choose).
175 *
176 * @param {{ llm?: { openrouter_chat_model?: string } }} config
177 * @param {{ system: string, user: string }} opts
178 * @param {number} maxTokens
179 */
180 async function openrouterChat(config, opts, maxTokens) {
181 const apiKey = process.env.OPENROUTER_API_KEY;
182 if (!apiKey) throw new Error('OpenRouter chat: OPENROUTER_API_KEY is not set');
183 const model =
184 config.llm?.openrouter_chat_model ||
185 process.env.OPENROUTER_CHAT_MODEL ||
186 'openai/gpt-4o-mini';
187 const headers = {
188 'Content-Type': 'application/json',
189 Authorization: `Bearer ${apiKey}`,
190 };
191 const siteUrl = process.env.OPENROUTER_SITE_URL;
192 if (siteUrl && String(siteUrl).trim()) headers['HTTP-Referer'] = String(siteUrl).trim();
193 const appTitle = process.env.OPENROUTER_APP_TITLE;
194 if (appTitle && String(appTitle).trim()) headers['X-Title'] = String(appTitle).trim();
195 const res = await fetch(OPENROUTER_CHAT_URL, {
196 method: 'POST',
197 headers,
198 body: JSON.stringify({
199 model,
200 messages: [
201 { role: 'system', content: opts.system },
202 { role: 'user', content: opts.user },
203 ],
204 max_tokens: maxTokens,
205 }),
206 });
207 if (!res.ok) {
208 const t = await res.text();
209 throw new Error(`OpenRouter chat failed: ${res.status} ${t}`);
210 }
211 const data = await res.json();
212 const text = data.choices?.[0]?.message?.content;
213 if (!text) throw new Error('OpenRouter chat: empty response');
214 return String(text).trim();
215 }
216
217 /**
218 * Ollama chat completion (local `/api/chat`). Used both as the default fallback and as the
219 * explicit `ollama` lane. Free + private (runs on the user's own Ollama instance).
220 *
221 * @param {{ embedding?: { ollama_url?: string }, llm?: { ollama_chat_model?: string } }} config
222 * @param {{ system: string, user: string }} opts
223 * @param {number} maxTokens
224 */
225 async function ollamaChat(config, opts, maxTokens) {
226 const base = (config.embedding?.ollama_url || process.env.OLLAMA_URL || 'http://localhost:11434').replace(/\/$/, '');
227 const model =
228 process.env.OLLAMA_CHAT_MODEL ||
229 config.llm?.ollama_chat_model ||
230 process.env.OLLAMA_MODEL ||
231 'llama3.2';
232 let ollamaRes;
233 try {
234 ollamaRes = await fetch(`${base}/api/chat`, {
235 method: 'POST',
236 headers: { 'Content-Type': 'application/json' },
237 body: JSON.stringify({
238 model,
239 messages: [
240 { role: 'system', content: opts.system },
241 { role: 'user', content: opts.user },
242 ],
243 stream: false,
244 options: { num_predict: maxTokens },
245 }),
246 });
247 } catch (e) {
248 const detail = e?.message || String(e);
249 throw new Error(
250 `LLM provider not reachable (${base}): ${detail}. Set OPENAI_API_KEY or ANTHROPIC_API_KEY in environment variables, or point OLLAMA_URL at a running Ollama instance.`,
251 );
252 }
253 if (!ollamaRes.ok) {
254 const t = await ollamaRes.text();
255 throw new Error(`Ollama chat failed (${ollamaRes.status}): ${t}. Set OPENAI_API_KEY or OLLAMA_CHAT_MODEL to a chat-capable model.`);
256 }
257 const data = await ollamaRes.json();
258 const text = data.message?.content;
259 if (!text) throw new Error('Ollama chat: empty response');
260 return String(text).trim();
261 }
262
263 /**
264 * @param {{ embedding?: { provider?: string, model?: string, ollama_url?: string }, llm?: { provider?: string, ollama_chat_model?: string, openai_chat_model?: string, anthropic_chat_model?: string, deepinfra_chat_model?: string, openrouter_chat_model?: string } }} config - loadConfig() or mini hub config
265 * @param {{ system: string, user: string, maxTokens?: number }} opts
266 * @returns {Promise<string>}
267 */
268 export async function completeChat(config, opts) {
269 const maxTokens = opts.maxTokens ?? 512;
270 const openaiKey = process.env.OPENAI_API_KEY;
271 const anthropicKey = process.env.ANTHROPIC_API_KEY;
272 const deepinfraKey = process.env.DEEPINFRA_API_KEY;
273 const hasOpenai = Boolean(openaiKey && String(openaiKey).trim());
274 const hasAnthropic = Boolean(anthropicKey && String(anthropicKey).trim());
275 const hasDeepinfra = Boolean(deepinfraKey && String(deepinfraKey).trim());
276 const provider = chatProvider(config);
277
278 // 1. Explicit DeepInfra: try DeepInfra first; fall back to OpenAI then Anthropic if available.
279 if (provider === 'deepinfra') {
280 if (!hasDeepinfra) {
281 throw new Error(
282 "Chat provider 'deepinfra' selected (KNOWTATION_CHAT_PROVIDER or config llm.provider) but " +
283 'DEEPINFRA_API_KEY is not set. Set DEEPINFRA_API_KEY (Netlify deploy env for hosted Hub) or change the provider.',
284 );
285 }
286 try {
287 return await deepinfraChat(config, opts, maxTokens);
288 } catch (e1) {
289 if (hasOpenai) {
290 try {
291 return await openaiChat(config, opts, maxTokens);
292 } catch (e2) {
293 if (hasAnthropic) {
294 try {
295 return await anthropicChat(config, opts, maxTokens);
296 } catch (e3) {
297 const d = e1 instanceof Error ? e1.message : String(e1);
298 const o = e2 instanceof Error ? e2.message : String(e2);
299 const a = e3 instanceof Error ? e3.message : String(e3);
300 throw new Error(
301 `DeepInfra chat failed (${d}); OpenAI fallback failed (${o}); Anthropic fallback failed (${a})`,
302 );
303 }
304 }
305 const d = e1 instanceof Error ? e1.message : String(e1);
306 const o = e2 instanceof Error ? e2.message : String(e2);
307 throw new Error(`DeepInfra chat failed (${d}); OpenAI fallback failed (${o})`);
308 }
309 }
310 if (hasAnthropic) {
311 try {
312 return await anthropicChat(config, opts, maxTokens);
313 } catch (e2) {
314 const d = e1 instanceof Error ? e1.message : String(e1);
315 const a = e2 instanceof Error ? e2.message : String(e2);
316 throw new Error(`DeepInfra chat failed (${d}); Anthropic fallback failed (${a})`);
317 }
318 }
319 throw e1;
320 }
321 }
322
323 // 2. Explicit OpenRouter: BYO-key lane. Require OPENROUTER_API_KEY and never fall back to a managed
324 // lane — surfacing the failure preserves the user's privacy/billing contract (see module docstring).
325 if (provider === 'openrouter') {
326 const openrouterKey = process.env.OPENROUTER_API_KEY;
327 if (!(openrouterKey && String(openrouterKey).trim())) {
328 throw new Error(
329 "Chat provider 'openrouter' selected (KNOWTATION_CHAT_PROVIDER or config llm.provider) but " +
330 'OPENROUTER_API_KEY is not set. Set OPENROUTER_API_KEY or change the provider.',
331 );
332 }
333 return openrouterChat(config, opts, maxTokens);
334 }
335
336 // 3. Explicit OpenAI / Anthropic: bypass the provider-priority dance and require the matching key.
337 if (provider === 'openai') {
338 if (!hasOpenai) {
339 throw new Error(
340 "Chat provider 'openai' selected (KNOWTATION_CHAT_PROVIDER or config llm.provider) but " +
341 'OPENAI_API_KEY is not set. Set OPENAI_API_KEY or change the provider.',
342 );
343 }
344 return openaiChat(config, opts, maxTokens);
345 }
346 if (provider === 'anthropic') {
347 if (!hasAnthropic) {
348 throw new Error(
349 "Chat provider 'anthropic' selected (KNOWTATION_CHAT_PROVIDER or config llm.provider) but " +
350 'ANTHROPIC_API_KEY is not set. Set ANTHROPIC_API_KEY or change the provider.',
351 );
352 }
353 return anthropicChat(config, opts, maxTokens);
354 }
355
356 // 3b. Explicit Ollama: force the local Ollama lane regardless of any cloud keys present.
357 if (provider === 'ollama') {
358 return ollamaChat(config, opts, maxTokens);
359 }
360
361 // 4. Implicit DeepInfra: only the DeepInfra key is set (no OpenAI / no Anthropic).
362 // Backward compatible — never preempts an existing OpenAI/Anthropic deployment.
363 if (hasDeepinfra && !hasOpenai && !hasAnthropic) {
364 return deepinfraChat(config, opts, maxTokens);
365 }
366
367 if (chatPreferAnthropic() && hasAnthropic && hasOpenai) {
368 try {
369 return await anthropicChat(config, opts, maxTokens);
370 } catch (e1) {
371 try {
372 return await openaiChat(config, opts, maxTokens);
373 } catch (e2) {
374 const a = e1 instanceof Error ? e1.message : String(e1);
375 const o = e2 instanceof Error ? e2.message : String(e2);
376 throw new Error(`Anthropic chat failed (${a}); OpenAI fallback failed (${o})`);
377 }
378 }
379 }
380
381 if (chatPreferAnthropic() && hasAnthropic && !hasOpenai) {
382 return anthropicChat(config, opts, maxTokens);
383 }
384
385 if (hasOpenai) {
386 return openaiChat(config, opts, maxTokens);
387 }
388 if (hasAnthropic) {
389 return anthropicChat(config, opts, maxTokens);
390 }
391
392 return ollamaChat(config, opts, maxTokens);
393 }
File History 2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor 1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 1 day ago