llm-complete.mjs
281 lines 10.6 KB
Raw
1 /**
2 * Minimal chat completion for MCP summarize (Issue #1 Phase C6) and Hub proposal LLM jobs.
3 *
4 * Provider selection (in order):
5 * 1. KNOWTATION_CHAT_PROVIDER=deepinfra → DeepInfra (OpenAI-compatible). Falls back to OpenAI then
6 * Anthropic if DeepInfra returns an error and those keys are set.
7 * 2. KNOWTATION_CHAT_PROVIDER=openai → OpenAI only (no fallback). Requires OPENAI_API_KEY.
8 * 3. KNOWTATION_CHAT_PROVIDER=anthropic → Anthropic only (no fallback). Requires ANTHROPIC_API_KEY.
9 * 4. Implicit DeepInfra: DEEPINFRA_API_KEY set AND neither OPENAI_API_KEY nor ANTHROPIC_API_KEY set.
10 * (Backward compatible — does NOT preempt an existing OpenAI/Anthropic deployment.)
11 * 5. KNOWTATION_CHAT_PREFER_ANTHROPIC=1 (or true): try Anthropic before OpenAI when both keys exist;
12 * OpenAI is used as fallback if Claude fails.
13 * 6. Default: OpenAI when OPENAI_API_KEY; else Anthropic when ANTHROPIC_API_KEY; else Ollama /api/chat.
14 *
15 * Models: OPENAI_CHAT_MODEL (gpt-4o-mini), ANTHROPIC_CHAT_MODEL (claude-3-5-haiku-20241022),
16 * DEEPINFRA_CHAT_MODEL (Qwen/Qwen2.5-72B-Instruct), OLLAMA_CHAT_MODEL (llama3.2).
17 */
18
19 const OPENAI_CHAT_URL = 'https://api.openai.com/v1/chat/completions';
20 const ANTHROPIC_MESSAGES_URL = 'https://api.anthropic.com/v1/messages';
21 const DEEPINFRA_CHAT_URL = 'https://api.deepinfra.com/v1/openai/chat/completions';
22
23 /**
24 * @param {{ llm?: { openai_chat_model?: string } }} config
25 * @param {{ system: string, user: string }} opts
26 * @param {number} maxTokens
27 */
28 async function openaiChat(config, opts, maxTokens) {
29 const apiKey = process.env.OPENAI_API_KEY;
30 if (!apiKey) throw new Error('OpenAI chat: OPENAI_API_KEY is not set');
31 const model = config.llm?.openai_chat_model || process.env.OPENAI_CHAT_MODEL || 'gpt-4o-mini';
32 const res = await fetch(OPENAI_CHAT_URL, {
33 method: 'POST',
34 headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${apiKey}` },
35 body: JSON.stringify({
36 model,
37 messages: [
38 { role: 'system', content: opts.system },
39 { role: 'user', content: opts.user },
40 ],
41 max_tokens: maxTokens,
42 }),
43 });
44 if (!res.ok) {
45 const t = await res.text();
46 throw new Error(`OpenAI chat failed: ${res.status} ${t}`);
47 }
48 const data = await res.json();
49 const text = data.choices?.[0]?.message?.content;
50 if (!text) throw new Error('OpenAI chat: empty response');
51 return String(text).trim();
52 }
53
54 /**
55 * @param {{ llm?: { anthropic_chat_model?: string } }} config
56 * @param {{ system: string, user: string }} opts
57 * @param {number} maxTokens
58 */
59 async function anthropicChat(config, opts, maxTokens) {
60 const anthropicKey = process.env.ANTHROPIC_API_KEY;
61 if (!anthropicKey) throw new Error('Anthropic chat: ANTHROPIC_API_KEY is not set');
62 const model =
63 config.llm?.anthropic_chat_model ||
64 process.env.ANTHROPIC_CHAT_MODEL ||
65 'claude-3-5-haiku-20241022';
66 const res = await fetch(ANTHROPIC_MESSAGES_URL, {
67 method: 'POST',
68 headers: {
69 'Content-Type': 'application/json',
70 'x-api-key': anthropicKey,
71 'anthropic-version': '2023-06-01',
72 },
73 body: JSON.stringify({
74 model,
75 max_tokens: maxTokens,
76 system: opts.system,
77 messages: [{ role: 'user', content: opts.user }],
78 }),
79 });
80 if (!res.ok) {
81 const t = await res.text();
82 throw new Error(`Anthropic chat failed: ${res.status} ${t}`);
83 }
84 const data = await res.json();
85 const blocks = data.content;
86 const first = Array.isArray(blocks) && blocks[0] && blocks[0].text != null ? blocks[0].text : '';
87 const text = String(first).trim();
88 if (!text) throw new Error('Anthropic chat: empty response');
89 return text;
90 }
91
92 function chatPreferAnthropic() {
93 const v = process.env.KNOWTATION_CHAT_PREFER_ANTHROPIC;
94 return v === '1' || String(v || '').toLowerCase() === 'true';
95 }
96
97 function chatProvider() {
98 return String(process.env.KNOWTATION_CHAT_PROVIDER || '').trim().toLowerCase();
99 }
100
101 /**
102 * @param {{ llm?: { deepinfra_chat_model?: string } }} config
103 * @param {{ system: string, user: string }} opts
104 * @param {number} maxTokens
105 */
106 async function deepinfraChat(config, opts, maxTokens) {
107 const apiKey = process.env.DEEPINFRA_API_KEY;
108 if (!apiKey) throw new Error('DeepInfra chat: DEEPINFRA_API_KEY is not set');
109 const model =
110 config.llm?.deepinfra_chat_model ||
111 process.env.DEEPINFRA_CHAT_MODEL ||
112 'Qwen/Qwen2.5-72B-Instruct';
113 const res = await fetch(DEEPINFRA_CHAT_URL, {
114 method: 'POST',
115 headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${apiKey}` },
116 body: JSON.stringify({
117 model,
118 messages: [
119 { role: 'system', content: opts.system },
120 { role: 'user', content: opts.user },
121 ],
122 max_tokens: maxTokens,
123 }),
124 });
125 if (!res.ok) {
126 const t = await res.text();
127 throw new Error(`DeepInfra chat failed: ${res.status} ${t}`);
128 }
129 const data = await res.json();
130 const text = data.choices?.[0]?.message?.content;
131 if (!text) throw new Error('DeepInfra chat: empty response');
132 return String(text).trim();
133 }
134
135 /**
136 * @param {{ embedding?: { provider?: string, model?: string, ollama_url?: string }, llm?: { ollama_chat_model?: string, openai_chat_model?: string, anthropic_chat_model?: string, deepinfra_chat_model?: string } }} config - loadConfig() or mini hub config
137 * @param {{ system: string, user: string, maxTokens?: number }} opts
138 * @returns {Promise<string>}
139 */
140 export async function completeChat(config, opts) {
141 const maxTokens = opts.maxTokens ?? 512;
142 const openaiKey = process.env.OPENAI_API_KEY;
143 const anthropicKey = process.env.ANTHROPIC_API_KEY;
144 const deepinfraKey = process.env.DEEPINFRA_API_KEY;
145 const hasOpenai = Boolean(openaiKey && String(openaiKey).trim());
146 const hasAnthropic = Boolean(anthropicKey && String(anthropicKey).trim());
147 const hasDeepinfra = Boolean(deepinfraKey && String(deepinfraKey).trim());
148 const provider = chatProvider();
149
150 // 1. Explicit DeepInfra: try DeepInfra first; fall back to OpenAI then Anthropic if available.
151 if (provider === 'deepinfra') {
152 if (!hasDeepinfra) {
153 throw new Error(
154 'KNOWTATION_CHAT_PROVIDER=deepinfra but DEEPINFRA_API_KEY is not set. ' +
155 'Set DEEPINFRA_API_KEY in your environment (Netlify deploy env for hosted Hub) or remove KNOWTATION_CHAT_PROVIDER.',
156 );
157 }
158 try {
159 return await deepinfraChat(config, opts, maxTokens);
160 } catch (e1) {
161 if (hasOpenai) {
162 try {
163 return await openaiChat(config, opts, maxTokens);
164 } catch (e2) {
165 if (hasAnthropic) {
166 try {
167 return await anthropicChat(config, opts, maxTokens);
168 } catch (e3) {
169 const d = e1 instanceof Error ? e1.message : String(e1);
170 const o = e2 instanceof Error ? e2.message : String(e2);
171 const a = e3 instanceof Error ? e3.message : String(e3);
172 throw new Error(
173 `DeepInfra chat failed (${d}); OpenAI fallback failed (${o}); Anthropic fallback failed (${a})`,
174 );
175 }
176 }
177 const d = e1 instanceof Error ? e1.message : String(e1);
178 const o = e2 instanceof Error ? e2.message : String(e2);
179 throw new Error(`DeepInfra chat failed (${d}); OpenAI fallback failed (${o})`);
180 }
181 }
182 if (hasAnthropic) {
183 try {
184 return await anthropicChat(config, opts, maxTokens);
185 } catch (e2) {
186 const d = e1 instanceof Error ? e1.message : String(e1);
187 const a = e2 instanceof Error ? e2.message : String(e2);
188 throw new Error(`DeepInfra chat failed (${d}); Anthropic fallback failed (${a})`);
189 }
190 }
191 throw e1;
192 }
193 }
194
195 // 2. Explicit OpenAI / Anthropic: bypass the provider-priority dance and require the matching key.
196 if (provider === 'openai') {
197 if (!hasOpenai) {
198 throw new Error(
199 'KNOWTATION_CHAT_PROVIDER=openai but OPENAI_API_KEY is not set. ' +
200 'Set OPENAI_API_KEY or remove KNOWTATION_CHAT_PROVIDER.',
201 );
202 }
203 return openaiChat(config, opts, maxTokens);
204 }
205 if (provider === 'anthropic') {
206 if (!hasAnthropic) {
207 throw new Error(
208 'KNOWTATION_CHAT_PROVIDER=anthropic but ANTHROPIC_API_KEY is not set. ' +
209 'Set ANTHROPIC_API_KEY or remove KNOWTATION_CHAT_PROVIDER.',
210 );
211 }
212 return anthropicChat(config, opts, maxTokens);
213 }
214
215 // 3. Implicit DeepInfra: only the DeepInfra key is set (no OpenAI / no Anthropic).
216 // Backward compatible — never preempts an existing OpenAI/Anthropic deployment.
217 if (hasDeepinfra && !hasOpenai && !hasAnthropic) {
218 return deepinfraChat(config, opts, maxTokens);
219 }
220
221 if (chatPreferAnthropic() && hasAnthropic && hasOpenai) {
222 try {
223 return await anthropicChat(config, opts, maxTokens);
224 } catch (e1) {
225 try {
226 return await openaiChat(config, opts, maxTokens);
227 } catch (e2) {
228 const a = e1 instanceof Error ? e1.message : String(e1);
229 const o = e2 instanceof Error ? e2.message : String(e2);
230 throw new Error(`Anthropic chat failed (${a}); OpenAI fallback failed (${o})`);
231 }
232 }
233 }
234
235 if (chatPreferAnthropic() && hasAnthropic && !hasOpenai) {
236 return anthropicChat(config, opts, maxTokens);
237 }
238
239 if (hasOpenai) {
240 return openaiChat(config, opts, maxTokens);
241 }
242 if (hasAnthropic) {
243 return anthropicChat(config, opts, maxTokens);
244 }
245
246 const base = (config.embedding?.ollama_url || process.env.OLLAMA_URL || 'http://localhost:11434').replace(/\/$/, '');
247 const model =
248 process.env.OLLAMA_CHAT_MODEL ||
249 config.llm?.ollama_chat_model ||
250 process.env.OLLAMA_MODEL ||
251 'llama3.2';
252 let ollamaRes;
253 try {
254 ollamaRes = await fetch(`${base}/api/chat`, {
255 method: 'POST',
256 headers: { 'Content-Type': 'application/json' },
257 body: JSON.stringify({
258 model,
259 messages: [
260 { role: 'system', content: opts.system },
261 { role: 'user', content: opts.user },
262 ],
263 stream: false,
264 options: { num_predict: maxTokens },
265 }),
266 });
267 } catch (e) {
268 const detail = e?.message || String(e);
269 throw new Error(
270 `LLM provider not reachable (${base}): ${detail}. Set OPENAI_API_KEY or ANTHROPIC_API_KEY in environment variables, or point OLLAMA_URL at a running Ollama instance.`,
271 );
272 }
273 if (!ollamaRes.ok) {
274 const t = await ollamaRes.text();
275 throw new Error(`Ollama chat failed (${ollamaRes.status}): ${t}. Set OPENAI_API_KEY or OLLAMA_CHAT_MODEL to a chat-capable model.`);
276 }
277 const data = await ollamaRes.json();
278 const text = data.message?.content;
279 if (!text) throw new Error('Ollama chat: empty response');
280 return String(text).trim();
281 }
File History 1 commit