lib/llm-complete.mjs · aaronrene/knowtation — MuseHub

aaronrene / knowtation public

llm-complete.mjs

393 lines 16.1 KB

Raw

sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 1 day ago

1	/**
2	* Minimal chat completion for MCP summarize (Issue #1 Phase C6) and Hub proposal LLM jobs.
3	*
4	* Provider selection (in order):
5	* 1. KNOWTATION_CHAT_PROVIDER=deepinfra → DeepInfra (OpenAI-compatible). Falls back to OpenAI then
6	* Anthropic if DeepInfra returns an error and those keys are set.
7	* 2. KNOWTATION_CHAT_PROVIDER=openrouter → OpenRouter (OpenAI-compatible, BYO key). Requires
8	* OPENROUTER_API_KEY. No fallback to managed lanes: this is a
9	* "bring your own provider" lane, so a failure must surface
10	* rather than silently re-route note text to a metered/managed
11	* provider (privacy + billing correctness — see
12	* docs/COMPANION-APP-MODEL-ROUTING-AND-ENRICHMENT-ARCHITECTURE.md §4/§6).
13	* 3. KNOWTATION_CHAT_PROVIDER=openai → OpenAI only (no fallback). Requires OPENAI_API_KEY.
14	* 4. KNOWTATION_CHAT_PROVIDER=anthropic → Anthropic only (no fallback). Requires ANTHROPIC_API_KEY.
15	* 5. Implicit DeepInfra: DEEPINFRA_API_KEY set AND neither OPENAI_API_KEY nor ANTHROPIC_API_KEY set.
16	* (Backward compatible — does NOT preempt an existing OpenAI/Anthropic deployment.)
17	* 6. KNOWTATION_CHAT_PREFER_ANTHROPIC=1 (or true): try Anthropic before OpenAI when both keys exist;
18	* OpenAI is used as fallback if Claude fails.
19	* 7. Default: OpenAI when OPENAI_API_KEY; else Anthropic when ANTHROPIC_API_KEY; else Ollama /api/chat.
20	*
21	* OpenRouter is explicit-only: it is never selected implicitly, so adding OPENROUTER_API_KEY to an
22	* environment cannot change the provider for an existing deployment unless KNOWTATION_CHAT_PROVIDER=openrouter
23	* is also set (mirrors the implicit-DeepInfra backward-compatibility guarantee).
24	*
25	* Provider source precedence: `KNOWTATION_CHAT_PROVIDER` env (operator lock) wins; otherwise
26	* `config.llm.provider` (persisted via the Hub Settings UI → config/local.yaml) is honored. This
27	* lets a self-hosted operator pick the chat provider from the UI without an env var, while hosted /
28	* env-locked deploys remain authoritative. Selecting `ollama` forces the local lane regardless of
29	* any cloud keys present.
30	*
31	* Models: OPENAI_CHAT_MODEL (gpt-4o-mini), ANTHROPIC_CHAT_MODEL (claude-3-5-haiku-20241022),
32	* DEEPINFRA_CHAT_MODEL (Qwen/Qwen2.5-72B-Instruct), OPENROUTER_CHAT_MODEL (openai/gpt-4o-mini),
33	* OLLAMA_CHAT_MODEL (llama3.2).
34	*
35	* OpenRouter attribution (optional, OpenRouter best practice for app ranking; sent only when set):
36	* OPENROUTER_SITE_URL → HTTP-Referer header, OPENROUTER_APP_TITLE → X-Title header.
37	*/
38
39	const OPENAI_CHAT_URL = 'https://api.openai.com/v1/chat/completions';
40	const ANTHROPIC_MESSAGES_URL = 'https://api.anthropic.com/v1/messages';
41	const DEEPINFRA_CHAT_URL = 'https://api.deepinfra.com/v1/openai/chat/completions';
42	const OPENROUTER_CHAT_URL = 'https://openrouter.ai/api/v1/chat/completions';
43
44	/**
45	* @param {{ llm?: { openai_chat_model?: string } }} config
46	* @param {{ system: string, user: string }} opts
47	* @param {number} maxTokens
48	*/
49	async function openaiChat(config, opts, maxTokens) {
50	const apiKey = process.env.OPENAI_API_KEY;
51	if (!apiKey) throw new Error('OpenAI chat: OPENAI_API_KEY is not set');
52	const model = config.llm?.openai_chat_model \|\| process.env.OPENAI_CHAT_MODEL \|\| 'gpt-4o-mini';
53	const res = await fetch(OPENAI_CHAT_URL, {
54	method: 'POST',
55	headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${apiKey}` },
56	body: JSON.stringify({
57	model,
58	messages: [
59	{ role: 'system', content: opts.system },
60	{ role: 'user', content: opts.user },
61	],
62	max_tokens: maxTokens,
63	}),
64	});
65	if (!res.ok) {
66	const t = await res.text();
67	throw new Error(`OpenAI chat failed: ${res.status} ${t}`);
68	}
69	const data = await res.json();
70	const text = data.choices?.[0]?.message?.content;
71	if (!text) throw new Error('OpenAI chat: empty response');
72	return String(text).trim();
73	}
74
75	/**
76	* @param {{ llm?: { anthropic_chat_model?: string } }} config
77	* @param {{ system: string, user: string }} opts
78	* @param {number} maxTokens
79	*/
80	async function anthropicChat(config, opts, maxTokens) {
81	const anthropicKey = process.env.ANTHROPIC_API_KEY;
82	if (!anthropicKey) throw new Error('Anthropic chat: ANTHROPIC_API_KEY is not set');
83	const model =
84	config.llm?.anthropic_chat_model \|\|
85	process.env.ANTHROPIC_CHAT_MODEL \|\|
86	'claude-3-5-haiku-20241022';
87	const res = await fetch(ANTHROPIC_MESSAGES_URL, {
88	method: 'POST',
89	headers: {
90	'Content-Type': 'application/json',
91	'x-api-key': anthropicKey,
92	'anthropic-version': '2023-06-01',
93	},
94	body: JSON.stringify({
95	model,
96	max_tokens: maxTokens,
97	system: opts.system,
98	messages: [{ role: 'user', content: opts.user }],
99	}),
100	});
101	if (!res.ok) {
102	const t = await res.text();
103	throw new Error(`Anthropic chat failed: ${res.status} ${t}`);
104	}
105	const data = await res.json();
106	const blocks = data.content;
107	const first = Array.isArray(blocks) && blocks[0] && blocks[0].text != null ? blocks[0].text : '';
108	const text = String(first).trim();
109	if (!text) throw new Error('Anthropic chat: empty response');
110	return text;
111	}
112
113	function chatPreferAnthropic() {
114	const v = process.env.KNOWTATION_CHAT_PREFER_ANTHROPIC;
115	return v === '1' \|\| String(v \|\| '').toLowerCase() === 'true';
116	}
117
118	/**
119	* Resolve the active chat provider. Precedence:
120	* 1. `KNOWTATION_CHAT_PROVIDER` env var (operator lock; wins so hosted/env-locked deploys are
121	* never overridden by a persisted UI setting).
122	* 2. `config.llm.provider` (persisted via the Hub Settings UI → config/local.yaml).
123	* 3. '' → fall through to the default auto-detection chain.
124	* Returns a lowercased provider id or '' when unset.
125	* @param {{ llm?: { provider?: string } }} [config]
126	*/
127	function chatProvider(config) {
128	const env = String(process.env.KNOWTATION_CHAT_PROVIDER \|\| '').trim().toLowerCase();
129	if (env) return env;
130	return String(config?.llm?.provider \|\| '').trim().toLowerCase();
131	}
132
133	/**
134	* @param {{ llm?: { deepinfra_chat_model?: string } }} config
135	* @param {{ system: string, user: string }} opts
136	* @param {number} maxTokens
137	*/
138	async function deepinfraChat(config, opts, maxTokens) {
139	const apiKey = process.env.DEEPINFRA_API_KEY;
140	if (!apiKey) throw new Error('DeepInfra chat: DEEPINFRA_API_KEY is not set');
141	const model =
142	config.llm?.deepinfra_chat_model \|\|
143	process.env.DEEPINFRA_CHAT_MODEL \|\|
144	'Qwen/Qwen2.5-72B-Instruct';
145	const res = await fetch(DEEPINFRA_CHAT_URL, {
146	method: 'POST',
147	headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${apiKey}` },
148	body: JSON.stringify({
149	model,
150	messages: [
151	{ role: 'system', content: opts.system },
152	{ role: 'user', content: opts.user },
153	],
154	max_tokens: maxTokens,
155	}),
156	});
157	if (!res.ok) {
158	const t = await res.text();
159	throw new Error(`DeepInfra chat failed: ${res.status} ${t}`);
160	}
161	const data = await res.json();
162	const text = data.choices?.[0]?.message?.content;
163	if (!text) throw new Error('DeepInfra chat: empty response');
164	return String(text).trim();
165	}
166
167	/**
168	* OpenRouter chat completion (OpenAI-compatible wire format, BYO key).
169	*
170	* OpenRouter is a provider aggregator: the user supplies their own OPENROUTER_API_KEY and pays
171	* OpenRouter directly, so this lane is never metered against Knowtation packs. Because the user owns
172	* the contract, a failure here is surfaced to the caller and is NOT silently re-routed to a managed
173	* lane — that would change both the privacy surface (note text → a different third party) and the
174	* billing surface (a metered pack event the user did not choose).
175	*
176	* @param {{ llm?: { openrouter_chat_model?: string } }} config
177	* @param {{ system: string, user: string }} opts
178	* @param {number} maxTokens
179	*/
180	async function openrouterChat(config, opts, maxTokens) {
181	const apiKey = process.env.OPENROUTER_API_KEY;
182	if (!apiKey) throw new Error('OpenRouter chat: OPENROUTER_API_KEY is not set');
183	const model =
184	config.llm?.openrouter_chat_model \|\|
185	process.env.OPENROUTER_CHAT_MODEL \|\|
186	'openai/gpt-4o-mini';
187	const headers = {
188	'Content-Type': 'application/json',
189	Authorization: `Bearer ${apiKey}`,
190	};
191	const siteUrl = process.env.OPENROUTER_SITE_URL;
192	if (siteUrl && String(siteUrl).trim()) headers['HTTP-Referer'] = String(siteUrl).trim();
193	const appTitle = process.env.OPENROUTER_APP_TITLE;
194	if (appTitle && String(appTitle).trim()) headers['X-Title'] = String(appTitle).trim();
195	const res = await fetch(OPENROUTER_CHAT_URL, {
196	method: 'POST',
197	headers,
198	body: JSON.stringify({
199	model,
200	messages: [
201	{ role: 'system', content: opts.system },
202	{ role: 'user', content: opts.user },
203	],
204	max_tokens: maxTokens,
205	}),
206	});
207	if (!res.ok) {
208	const t = await res.text();
209	throw new Error(`OpenRouter chat failed: ${res.status} ${t}`);
210	}
211	const data = await res.json();
212	const text = data.choices?.[0]?.message?.content;
213	if (!text) throw new Error('OpenRouter chat: empty response');
214	return String(text).trim();
215	}
216
217	/**
218	* Ollama chat completion (local `/api/chat`). Used both as the default fallback and as the
219	* explicit `ollama` lane. Free + private (runs on the user's own Ollama instance).
220	*
221	* @param {{ embedding?: { ollama_url?: string }, llm?: { ollama_chat_model?: string } }} config
222	* @param {{ system: string, user: string }} opts
223	* @param {number} maxTokens
224	*/
225	async function ollamaChat(config, opts, maxTokens) {
226	const base = (config.embedding?.ollama_url \|\| process.env.OLLAMA_URL \|\| 'http://localhost:11434').replace(/\/$/, '');
227	const model =
228	process.env.OLLAMA_CHAT_MODEL \|\|
229	config.llm?.ollama_chat_model \|\|
230	process.env.OLLAMA_MODEL \|\|
231	'llama3.2';
232	let ollamaRes;
233	try {
234	ollamaRes = await fetch(`${base}/api/chat`, {
235	method: 'POST',
236	headers: { 'Content-Type': 'application/json' },
237	body: JSON.stringify({
238	model,
239	messages: [
240	{ role: 'system', content: opts.system },
241	{ role: 'user', content: opts.user },
242	],
243	stream: false,
244	options: { num_predict: maxTokens },
245	}),
246	});
247	} catch (e) {
248	const detail = e?.message \|\| String(e);
249	throw new Error(
250	`LLM provider not reachable (${base}): ${detail}. Set OPENAI_API_KEY or ANTHROPIC_API_KEY in environment variables, or point OLLAMA_URL at a running Ollama instance.`,
251	);
252	}
253	if (!ollamaRes.ok) {
254	const t = await ollamaRes.text();
255	throw new Error(`Ollama chat failed (${ollamaRes.status}): ${t}. Set OPENAI_API_KEY or OLLAMA_CHAT_MODEL to a chat-capable model.`);
256	}
257	const data = await ollamaRes.json();
258	const text = data.message?.content;
259	if (!text) throw new Error('Ollama chat: empty response');
260	return String(text).trim();
261	}
262
263	/**
264	* @param {{ embedding?: { provider?: string, model?: string, ollama_url?: string }, llm?: { provider?: string, ollama_chat_model?: string, openai_chat_model?: string, anthropic_chat_model?: string, deepinfra_chat_model?: string, openrouter_chat_model?: string } }} config - loadConfig() or mini hub config
265	* @param {{ system: string, user: string, maxTokens?: number }} opts
266	* @returns {Promise<string>}
267	*/
268	export async function completeChat(config, opts) {
269	const maxTokens = opts.maxTokens ?? 512;
270	const openaiKey = process.env.OPENAI_API_KEY;
271	const anthropicKey = process.env.ANTHROPIC_API_KEY;
272	const deepinfraKey = process.env.DEEPINFRA_API_KEY;
273	const hasOpenai = Boolean(openaiKey && String(openaiKey).trim());
274	const hasAnthropic = Boolean(anthropicKey && String(anthropicKey).trim());
275	const hasDeepinfra = Boolean(deepinfraKey && String(deepinfraKey).trim());
276	const provider = chatProvider(config);
277
278	// 1. Explicit DeepInfra: try DeepInfra first; fall back to OpenAI then Anthropic if available.
279	if (provider === 'deepinfra') {
280	if (!hasDeepinfra) {
281	throw new Error(
282	"Chat provider 'deepinfra' selected (KNOWTATION_CHAT_PROVIDER or config llm.provider) but " +
283	'DEEPINFRA_API_KEY is not set. Set DEEPINFRA_API_KEY (Netlify deploy env for hosted Hub) or change the provider.',
284	);
285	}
286	try {
287	return await deepinfraChat(config, opts, maxTokens);
288	} catch (e1) {
289	if (hasOpenai) {
290	try {
291	return await openaiChat(config, opts, maxTokens);
292	} catch (e2) {
293	if (hasAnthropic) {
294	try {
295	return await anthropicChat(config, opts, maxTokens);
296	} catch (e3) {
297	const d = e1 instanceof Error ? e1.message : String(e1);
298	const o = e2 instanceof Error ? e2.message : String(e2);
299	const a = e3 instanceof Error ? e3.message : String(e3);
300	throw new Error(
301	`DeepInfra chat failed (${d}); OpenAI fallback failed (${o}); Anthropic fallback failed (${a})`,
302	);
303	}
304	}
305	const d = e1 instanceof Error ? e1.message : String(e1);
306	const o = e2 instanceof Error ? e2.message : String(e2);
307	throw new Error(`DeepInfra chat failed (${d}); OpenAI fallback failed (${o})`);
308	}
309	}
310	if (hasAnthropic) {
311	try {
312	return await anthropicChat(config, opts, maxTokens);
313	} catch (e2) {
314	const d = e1 instanceof Error ? e1.message : String(e1);
315	const a = e2 instanceof Error ? e2.message : String(e2);
316	throw new Error(`DeepInfra chat failed (${d}); Anthropic fallback failed (${a})`);
317	}
318	}
319	throw e1;
320	}
321	}
322
323	// 2. Explicit OpenRouter: BYO-key lane. Require OPENROUTER_API_KEY and never fall back to a managed
324	// lane — surfacing the failure preserves the user's privacy/billing contract (see module docstring).
325	if (provider === 'openrouter') {
326	const openrouterKey = process.env.OPENROUTER_API_KEY;
327	if (!(openrouterKey && String(openrouterKey).trim())) {
328	throw new Error(
329	"Chat provider 'openrouter' selected (KNOWTATION_CHAT_PROVIDER or config llm.provider) but " +
330	'OPENROUTER_API_KEY is not set. Set OPENROUTER_API_KEY or change the provider.',
331	);
332	}
333	return openrouterChat(config, opts, maxTokens);
334	}
335
336	// 3. Explicit OpenAI / Anthropic: bypass the provider-priority dance and require the matching key.
337	if (provider === 'openai') {
338	if (!hasOpenai) {
339	throw new Error(
340	"Chat provider 'openai' selected (KNOWTATION_CHAT_PROVIDER or config llm.provider) but " +
341	'OPENAI_API_KEY is not set. Set OPENAI_API_KEY or change the provider.',
342	);
343	}
344	return openaiChat(config, opts, maxTokens);
345	}
346	if (provider === 'anthropic') {
347	if (!hasAnthropic) {
348	throw new Error(
349	"Chat provider 'anthropic' selected (KNOWTATION_CHAT_PROVIDER or config llm.provider) but " +
350	'ANTHROPIC_API_KEY is not set. Set ANTHROPIC_API_KEY or change the provider.',
351	);
352	}
353	return anthropicChat(config, opts, maxTokens);
354	}
355
356	// 3b. Explicit Ollama: force the local Ollama lane regardless of any cloud keys present.
357	if (provider === 'ollama') {
358	return ollamaChat(config, opts, maxTokens);
359	}
360
361	// 4. Implicit DeepInfra: only the DeepInfra key is set (no OpenAI / no Anthropic).
362	// Backward compatible — never preempts an existing OpenAI/Anthropic deployment.
363	if (hasDeepinfra && !hasOpenai && !hasAnthropic) {
364	return deepinfraChat(config, opts, maxTokens);
365	}
366
367	if (chatPreferAnthropic() && hasAnthropic && hasOpenai) {
368	try {
369	return await anthropicChat(config, opts, maxTokens);
370	} catch (e1) {
371	try {
372	return await openaiChat(config, opts, maxTokens);
373	} catch (e2) {
374	const a = e1 instanceof Error ? e1.message : String(e1);
375	const o = e2 instanceof Error ? e2.message : String(e2);
376	throw new Error(`Anthropic chat failed (${a}); OpenAI fallback failed (${o})`);
377	}
378	}
379	}
380
381	if (chatPreferAnthropic() && hasAnthropic && !hasOpenai) {
382	return anthropicChat(config, opts, maxTokens);
383	}
384
385	if (hasOpenai) {
386	return openaiChat(config, opts, maxTokens);
387	}
388	if (hasAnthropic) {
389	return anthropicChat(config, opts, maxTokens);
390	}
391
392	return ollamaChat(config, opts, maxTokens);
393	}

File History 2 commits

sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ 1 day ago

sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 1 day ago