config.mjs
426 lines 18.6 KB
Raw
sha256:8915fe406161f95c1681f9469375e7bae5b28c884f00bedbdef65e4b0cd0738d docs(flow): commit FLOW-V0-SPEC.md hygiene for 7A-INT merge Human 13 hours ago
1 /**
2 * Config loader: config/local.yaml + env overrides. SPEC §4.4.
3 * Env overrides apply after file. vault_path is required.
4 * Multi-vault (Phase 15): when hub_vaults.yaml is absent, single vault "default" from vault_path.
5 */
6
7 import fs from 'fs';
8 import path from 'path';
9 import yaml from 'js-yaml';
10 import { readHubVaults } from './hub-vaults.mjs';
11 import { getRepoRoot } from './repo-root.mjs';
12
13 const projectRoot = getRepoRoot();
14
15 const ENV_VAULT = 'KNOWTATION_VAULT_PATH';
16 const ENV_QDRANT = 'QDRANT_URL';
17 const ENV_DATA_DIR = 'KNOWTATION_DATA_DIR';
18 const ENV_VECTOR_STORE = 'KNOWTATION_VECTOR_STORE';
19 const ENV_MEMORY_URL = 'KNOWTATION_MEMORY_URL';
20 const ENV_MEMORY_ENABLED = 'KNOWTATION_MEMORY_ENABLED';
21 const ENV_MEMORY_PROVIDER = 'KNOWTATION_MEMORY_PROVIDER';
22 const ENV_AIR_ENDPOINT = 'KNOWTATION_AIR_ENDPOINT';
23 const ENV_OLLAMA_URL = 'OLLAMA_URL';
24 const ENV_EMBEDDING_PROVIDER = 'EMBEDDING_PROVIDER';
25 const ENV_EMBEDDING_MODEL = 'EMBEDDING_MODEL';
26 const ENV_TRANSCODE_OVERSIZED = 'KNOWTATION_TRANSCODE_OVERSIZED';
27 const ENV_MUSE_URL = 'MUSE_URL';
28
29 /** Default embed model name when YAML/env omits `model` (must match provider). */
30 function defaultEmbeddingModelForProvider(provider) {
31 const p = String(provider || 'ollama').toLowerCase();
32 if (p === 'openai') return 'text-embedding-3-small';
33 if (p === 'voyage') return 'voyage-4-lite';
34 return 'nomic-embed-text';
35 }
36
37 const DEFAULT_IGNORE = ['templates', 'meta', 'node_modules', '.git'];
38
39 /**
40 * Read transcription.* from config/local.yaml only (no vault_path required).
41 * Used by lib/transcribe.mjs on hosted bridge where full loadConfig may be skipped.
42 * @param {string} [cwd]
43 * @returns {{ provider: string, model: string, transcode_oversized: boolean }}
44 */
45 export function readTranscriptionYaml(cwd = projectRoot) {
46 const configPath = path.join(cwd, 'config', 'local.yaml');
47 let t = {};
48 if (fs.existsSync(configPath)) {
49 try {
50 const raw = yaml.load(fs.readFileSync(configPath, 'utf8')) || {};
51 if (raw.transcription && typeof raw.transcription === 'object') {
52 t = raw.transcription;
53 }
54 } catch (_) {
55 /* ignore invalid yaml for this optional slice */
56 }
57 }
58 return {
59 provider: t.provider || 'openai',
60 model: t.model || 'whisper-1',
61 transcode_oversized: t.transcode_oversized !== false,
62 };
63 }
64
65 /**
66 * Load config from config/local.yaml (if present) then apply env overrides.
67 *
68 * **AIR config** (`air` key in local.yaml):
69 * ```yaml
70 * air:
71 * enabled: true # master switch; default false
72 * required: true # hard-fail: throw AttestationRequiredError when endpoint fails; default false
73 * endpoint: https://... # attestation endpoint URL; falls back to KNOWTATION_AIR_ENDPOINT env var
74 * ```
75 * When `air.required=true` a write or export is rejected if the attestation endpoint is
76 * unreachable or returns a non-OK response. Default (`false`) is non-blocking: a placeholder
77 * id is logged and the operation proceeds (backward-compatible).
78 *
79 * @param {string} [cwd] - Working directory (default: project root)
80 * @returns {{ vault_path: string, qdrant_url?: string, vector_store?: string, data_dir: string, embedding?: object, memory?: object, air?: { enabled: boolean, required: boolean, endpoint: string|undefined }, ignore?: string[] }} embedding.ollama_url from YAML or OLLAMA_URL env when set
81 * @throws if vault_path is missing after load
82 */
83 export function loadConfig(cwd = projectRoot) {
84 const configPath = path.join(cwd, 'config', 'local.yaml');
85 let config = {};
86
87 if (fs.existsSync(configPath)) {
88 try {
89 const raw = fs.readFileSync(configPath, 'utf8');
90 config = yaml.load(raw) || {};
91 } catch (e) {
92 throw new Error(`Invalid config at ${configPath}: ${e.message}`);
93 }
94 }
95
96 // Env overrides (SPEC: env overrides, then config)
97 if (process.env[ENV_VAULT]) config.vault_path = process.env[ENV_VAULT];
98 if (process.env[ENV_QDRANT]) config.qdrant_url = process.env[ENV_QDRANT];
99 if (process.env[ENV_DATA_DIR]) config.data_dir = process.env[ENV_DATA_DIR];
100 if (process.env[ENV_VECTOR_STORE]) config.vector_store = process.env[ENV_VECTOR_STORE];
101 if (process.env[ENV_MEMORY_URL]) config.memory = { ...(config.memory || {}), url: process.env[ENV_MEMORY_URL] };
102 if (process.env[ENV_MEMORY_ENABLED] === 'true') config.memory = { ...(config.memory || {}), enabled: true };
103 if (process.env[ENV_MEMORY_ENABLED] === 'false') config.memory = { ...(config.memory || {}), enabled: false };
104 if (process.env[ENV_MEMORY_PROVIDER]) config.memory = { ...(config.memory || {}), provider: process.env[ENV_MEMORY_PROVIDER] };
105 if (process.env[ENV_AIR_ENDPOINT]) config.air = { ...config.air, endpoint: process.env[ENV_AIR_ENDPOINT] };
106 if (process.env[ENV_TRANSCODE_OVERSIZED] === '0' || process.env[ENV_TRANSCODE_OVERSIZED] === 'false') {
107 config.transcription = { ...(config.transcription || {}), transcode_oversized: false };
108 }
109 if (process.env[ENV_TRANSCODE_OVERSIZED] === '1' || process.env[ENV_TRANSCODE_OVERSIZED] === 'true') {
110 config.transcription = { ...(config.transcription || {}), transcode_oversized: true };
111 }
112
113 // Hub Setup overrides (optional): data_dir/hub_setup.yaml can set vault_path and vault.git
114 const dataDirPath = path.resolve(cwd, config.data_dir || 'data');
115 const hubSetupPath = path.join(dataDirPath, 'hub_setup.yaml');
116 if (fs.existsSync(hubSetupPath)) {
117 try {
118 const setupRaw = fs.readFileSync(hubSetupPath, 'utf8');
119 const setup = yaml.load(setupRaw) || {};
120 // Hub writes vault_path here; operator/tests use KNOWTATION_VAULT_PATH — that must win (SPEC: env overrides).
121 if (setup.vault_path != null && !process.env[ENV_VAULT]) {
122 config.vault_path = setup.vault_path;
123 }
124 if (setup.vault?.git && typeof setup.vault.git === 'object') {
125 config.vault = config.vault || {};
126 config.vault.git = { ...(config.vault.git || {}), ...setup.vault.git };
127 }
128 } catch (_) { /* ignore invalid hub_setup */ }
129 }
130
131 /** Muse thin bridge: optional `muse.url` in local.yaml; `MUSE_URL` env wins when set. */
132 const museRaw = config.muse && typeof config.muse === 'object' ? config.muse : {};
133 let museUrlMerged =
134 typeof museRaw.url === 'string' ? museRaw.url.trim().replace(/\/+$/, '') : '';
135 if (process.env[ENV_MUSE_URL] != null && String(process.env[ENV_MUSE_URL]).trim() !== '') {
136 museUrlMerged = String(process.env[ENV_MUSE_URL]).trim().replace(/\/+$/, '');
137 }
138 config.muse = museUrlMerged ? { url: museUrlMerged } : {};
139
140 /**
141 * Flow workspace identity (Phase 7A): the local CLI/MCP operator's authorized Flow scopes.
142 * This is the "local config identity" channel the Flow store/projection surfaces resolve scope
143 * through (FLOW-STORE-CONTRACT-7A-10 §4) — the CLI passes it as `visibleScopes` into the store;
144 * no CLI flag can widen it. Deny-by-default: absent ⇒ `personal` only (resolved downstream in
145 * `flow-scope.mjs`). Values are validated against the canonical scope set there; here we only
146 * surface a clean string array so a malformed config can never inject a non-string scope.
147 */
148 const flowRaw = config.flow && typeof config.flow === 'object' ? config.flow : null;
149 const flowVisibleScopes =
150 flowRaw && Array.isArray(flowRaw.visible_scopes)
151 ? flowRaw.visible_scopes.filter((s) => typeof s === 'string' && s.trim() !== '')
152 : undefined;
153
154 const flowCaptureRaw =
155 flowRaw?.capture && typeof flowRaw.capture === 'object' ? flowRaw.capture : null;
156 const flowCapture = flowCaptureRaw
157 ? {
158 enabled: flowCaptureRaw.enabled !== false,
159 session_extraction_opt_in: flowCaptureRaw.session_extraction_opt_in === true,
160 classroom_minor_mode: flowCaptureRaw.classroom_minor_mode === true,
161 min_confidence_floor:
162 flowCaptureRaw.min_confidence_floor === 'low' ||
163 flowCaptureRaw.min_confidence_floor === 'high'
164 ? flowCaptureRaw.min_confidence_floor
165 : 'medium',
166 }
167 : undefined;
168
169 const vaultPath = config.vault_path;
170 if (!vaultPath || typeof vaultPath !== 'string') {
171 throw new Error('vault_path is required. Set in config/local.yaml or env KNOWTATION_VAULT_PATH.');
172 }
173
174 const resolvedVault = path.isAbsolute(vaultPath) ? vaultPath : path.resolve(cwd, vaultPath);
175 if (!fs.existsSync(resolvedVault) || !fs.statSync(resolvedVault).isDirectory()) {
176 throw new Error(`Vault path does not exist or is not a directory: ${resolvedVault}`);
177 }
178
179 let vaultList = readHubVaults(dataDirPath, cwd);
180 if (vaultList.length === 0) {
181 vaultList = [{ id: 'default', path: resolvedVault, label: undefined }];
182 }
183
184 /**
185 * Resolve vault id to absolute path. Returns undefined if vault id not in list.
186 * @param {string} vaultId
187 * @returns {string | undefined}
188 */
189 function resolveVaultPath(vaultId) {
190 const v = vaultList.find((e) => e.id === vaultId);
191 return v ? v.path : undefined;
192 }
193
194 const embeddingYaml = config.embedding && typeof config.embedding === 'object' ? config.embedding : null;
195 let embeddingProvider = embeddingYaml?.provider || 'ollama';
196 if (process.env[ENV_EMBEDDING_PROVIDER] != null && String(process.env[ENV_EMBEDDING_PROVIDER]).trim() !== '') {
197 embeddingProvider = String(process.env[ENV_EMBEDDING_PROVIDER]).trim().toLowerCase();
198 }
199 let embeddingModel = embeddingYaml?.model;
200 if (process.env[ENV_EMBEDDING_MODEL] != null && String(process.env[ENV_EMBEDDING_MODEL]).trim() !== '') {
201 embeddingModel = String(process.env[ENV_EMBEDDING_MODEL]).trim();
202 }
203 if (embeddingModel == null || String(embeddingModel).trim() === '') {
204 embeddingModel = defaultEmbeddingModelForProvider(embeddingProvider);
205 }
206 const embedding = {
207 provider: embeddingProvider,
208 model: embeddingModel,
209 ollama_url: embeddingYaml?.ollama_url,
210 };
211 if (process.env[ENV_OLLAMA_URL]) {
212 embedding.ollama_url = process.env[ENV_OLLAMA_URL];
213 }
214
215 return {
216 vault_path: resolvedVault,
217 vaultList,
218 resolveVaultPath,
219 qdrant_url: config.qdrant_url,
220 vector_store: config.vector_store || 'qdrant',
221 data_dir: path.resolve(cwd, config.data_dir || 'data'),
222 embedding,
223 indexer: config.indexer && typeof config.indexer === 'object'
224 ? {
225 chunk_size: config.indexer.chunk_size ?? 2048,
226 chunk_overlap: config.indexer.chunk_overlap ?? 256,
227 }
228 : { chunk_size: 2048, chunk_overlap: 256 },
229 transcription: config.transcription && typeof config.transcription === 'object'
230 ? {
231 provider: config.transcription.provider || 'openai',
232 model: config.transcription.model || 'whisper-1',
233 transcode_oversized: config.transcription.transcode_oversized !== false,
234 }
235 : { provider: 'openai', model: 'whisper-1', transcode_oversized: true },
236 memory: config.memory && typeof config.memory === 'object'
237 ? {
238 enabled: config.memory.enabled === true,
239 provider: config.memory.provider || 'file',
240 url: config.memory.url || process.env.KNOWTATION_MEMORY_URL,
241 retention_days: config.memory.retention_days ?? null,
242 capture: Array.isArray(config.memory.capture) ? config.memory.capture : undefined,
243 scope: config.memory.scope === 'global' ? 'global' : 'vault',
244 encrypt: config.memory.encrypt === true,
245 secret: config.memory.secret || undefined,
246 supabase_url: config.memory.supabase_url || process.env.KNOWTATION_SUPABASE_URL || undefined,
247 supabase_key: config.memory.supabase_key || process.env.KNOWTATION_SUPABASE_KEY || undefined,
248 }
249 : { enabled: false, provider: 'file', url: undefined, retention_days: null, capture: undefined, scope: 'vault', encrypt: false, secret: undefined, supabase_url: undefined, supabase_key: undefined },
250 air: config.air && typeof config.air === 'object'
251 ? {
252 enabled: config.air.enabled === true,
253 required: config.air.required === true,
254 endpoint: config.air.endpoint || process.env.KNOWTATION_AIR_ENDPOINT,
255 }
256 : { enabled: false, required: false, endpoint: undefined },
257 vault_git: config.vault?.git && typeof config.vault.git === 'object'
258 ? {
259 enabled: config.vault.git.enabled === true,
260 remote: config.vault.git.remote || undefined,
261 auto_commit: config.vault.git.auto_commit === true,
262 auto_push: config.vault.git.auto_push === true,
263 }
264 : { enabled: false, remote: undefined, auto_commit: false, auto_push: false },
265 mcp: (() => {
266 const mcpRaw = config.mcp && typeof config.mcp === 'object' ? config.mcp : {};
267 const envPort = process.env.KNOWTATION_MCP_HTTP_PORT;
268 const http_port =
269 envPort != null && String(envPort).trim() !== ''
270 ? parseInt(String(envPort), 10) || 3334
271 : mcpRaw.http_port ?? 3334;
272 const http_host = mcpRaw.http_host || '127.0.0.1';
273 return { http_port, http_host };
274 })(),
275 muse: config.muse && typeof config.muse === 'object' && config.muse.url
276 ? { url: String(config.muse.url).trim().replace(/\/+$/, '') }
277 : {},
278 daemon: loadDaemonConfig(config.daemon),
279 llm: loadLlmConfig(config.llm),
280 flow:
281 (flowVisibleScopes && flowVisibleScopes.length > 0) || flowCapture
282 ? {
283 ...(flowVisibleScopes && flowVisibleScopes.length > 0
284 ? { visible_scopes: flowVisibleScopes }
285 : {}),
286 ...(flowCapture ? { capture: flowCapture } : {}),
287 }
288 : undefined,
289 ignore: config.ignore || DEFAULT_IGNORE,
290 };
291 }
292
293 /** Chat providers selectable for the `completeChat` path (Hub Settings UI + config llm.provider). */
294 export const CHAT_PROVIDERS = Object.freeze(['deepinfra', 'openrouter', 'openai', 'anthropic', 'ollama']);
295
296 /**
297 * Validate a chat-provider value submitted from the Hub Settings UI before it is persisted.
298 *
299 * Security boundary: the persisted provider drives where note text is sent (privacy) and which
300 * account is billed. Only an empty string ("" → auto-detect) or an exact whitelisted provider is
301 * accepted; anything else is rejected so a malformed/hostile value can never be written to
302 * config/local.yaml or reach completeChat.
303 *
304 * @param {unknown} value
305 * @returns {{ ok: true, provider: string } | { ok: false, error: string }}
306 */
307 export function normalizeChatProviderInput(value) {
308 if (value == null || value === '') return { ok: true, provider: '' };
309 if (typeof value !== 'string') {
310 return { ok: false, error: 'provider must be a string' };
311 }
312 const v = value.trim().toLowerCase();
313 if (v === '') return { ok: true, provider: '' };
314 if (!CHAT_PROVIDERS.includes(v)) {
315 return {
316 ok: false,
317 error: `provider must be one of: ${CHAT_PROVIDERS.join(', ')} (or empty for auto-detect)`,
318 };
319 }
320 return { ok: true, provider: v };
321 }
322
323 /**
324 * Resolve the top-level `llm` block (the `completeChat` path) from local.yaml.
325 *
326 * Distinct from `daemon.llm` (consolidation/Discovery). `provider` selects the chat lane used by
327 * MCP summarize and Hub proposal LLM jobs; invalid values are dropped (treated as auto-detect) so a
328 * malformed config can never force an unknown provider. The env var `KNOWTATION_CHAT_PROVIDER` still
329 * takes precedence at resolution time (see lib/llm-complete.mjs).
330 *
331 * @param {object|undefined} raw — the `llm` key from the YAML file
332 * @returns {{ provider: string, openai_chat_model: string|undefined, anthropic_chat_model: string|undefined, deepinfra_chat_model: string|undefined, openrouter_chat_model: string|undefined, ollama_chat_model: string|undefined }}
333 */
334 export function loadLlmConfig(raw) {
335 const l = raw && typeof raw === 'object' ? raw : {};
336 const providerRaw = String(l.provider || '').trim().toLowerCase();
337 const provider = CHAT_PROVIDERS.includes(providerRaw) ? providerRaw : '';
338 return {
339 provider,
340 openai_chat_model: l.openai_chat_model || undefined,
341 anthropic_chat_model: l.anthropic_chat_model || undefined,
342 deepinfra_chat_model: l.deepinfra_chat_model || undefined,
343 openrouter_chat_model: l.openrouter_chat_model || undefined,
344 ollama_chat_model: l.ollama_chat_model || undefined,
345 };
346 }
347
348 const DAEMON_DEFAULTS = Object.freeze({
349 enabled: false,
350 interval_minutes: 120,
351 idle_only: true,
352 idle_threshold_minutes: 15,
353 run_on_start: false,
354 lookback_hours: 24,
355 max_events_per_pass: 200,
356 max_topics_per_pass: 10,
357 passes: { consolidate: true, verify: true, discover: false, rebuild_index: true },
358 llm: {
359 provider: null,
360 model: null,
361 api_key_env: null,
362 base_url: null,
363 max_tokens: 1024,
364 temperature: 0.2,
365 },
366 dry_run: false,
367 log_file: null,
368 max_cost_per_day_usd: null,
369 });
370
371 /**
372 * Parse daemon configuration from the raw `daemon` section of local.yaml,
373 * applying defaults and environment variable overrides.
374 *
375 * @param {object|undefined} raw — the `daemon` key from the YAML file
376 * @returns {object} fully resolved daemon config with all defaults applied
377 */
378 export function loadDaemonConfig(raw) {
379 const d = raw && typeof raw === 'object' ? raw : {};
380
381 const enabled = process.env.KNOWTATION_DAEMON_ENABLED != null
382 ? process.env.KNOWTATION_DAEMON_ENABLED === 'true'
383 : d.enabled === true;
384
385 const interval_minutes = process.env.KNOWTATION_DAEMON_INTERVAL != null
386 ? parseInt(process.env.KNOWTATION_DAEMON_INTERVAL, 10) || DAEMON_DEFAULTS.interval_minutes
387 : d.interval_minutes ?? DAEMON_DEFAULTS.interval_minutes;
388
389 const dry_run = process.env.KNOWTATION_DAEMON_DRY_RUN != null
390 ? process.env.KNOWTATION_DAEMON_DRY_RUN === 'true'
391 : d.dry_run === true;
392
393 const rawPasses = d.passes && typeof d.passes === 'object' ? d.passes : {};
394 const passes = {
395 consolidate: rawPasses.consolidate !== false,
396 verify: rawPasses.verify !== false,
397 discover: rawPasses.discover === true,
398 rebuild_index: rawPasses.rebuild_index !== false,
399 };
400
401 const rawLlm = d.llm && typeof d.llm === 'object' ? d.llm : {};
402 const llm = {
403 provider: process.env.KNOWTATION_DAEMON_LLM_PROVIDER || rawLlm.provider || null,
404 model: process.env.KNOWTATION_DAEMON_LLM_MODEL || rawLlm.model || null,
405 api_key_env: rawLlm.api_key_env || null,
406 base_url: process.env.KNOWTATION_DAEMON_LLM_BASE_URL || rawLlm.base_url || null,
407 max_tokens: rawLlm.max_tokens ?? DAEMON_DEFAULTS.llm.max_tokens,
408 temperature: rawLlm.temperature ?? DAEMON_DEFAULTS.llm.temperature,
409 };
410
411 return {
412 enabled,
413 interval_minutes,
414 idle_only: d.idle_only !== false,
415 idle_threshold_minutes: d.idle_threshold_minutes ?? DAEMON_DEFAULTS.idle_threshold_minutes,
416 run_on_start: d.run_on_start === true,
417 lookback_hours: d.lookback_hours ?? DAEMON_DEFAULTS.lookback_hours,
418 max_events_per_pass: d.max_events_per_pass ?? DAEMON_DEFAULTS.max_events_per_pass,
419 max_topics_per_pass: d.max_topics_per_pass ?? DAEMON_DEFAULTS.max_topics_per_pass,
420 passes,
421 llm,
422 dry_run,
423 log_file: d.log_file || null,
424 max_cost_per_day_usd: d.max_cost_per_day_usd ?? null,
425 };
426 }
File History 1 commit
sha256:8915fe406161f95c1681f9469375e7bae5b28c884f00bedbdef65e4b0cd0738d docs(flow): commit FLOW-V0-SPEC.md hygiene for 7A-INT merge Human 13 hours ago