lib/config.mjs · aaronrene/knowtation — MuseHub

aaronrene / knowtation public

config.mjs

426 lines 18.6 KB

Raw

sha256:8915fe406161f95c1681f9469375e7bae5b28c884f00bedbdef65e4b0cd0738d docs(flow): commit FLOW-V0-SPEC.md hygiene for 7A-INT merge Human 13 hours ago

1	/**
2	* Config loader: config/local.yaml + env overrides. SPEC §4.4.
3	* Env overrides apply after file. vault_path is required.
4	* Multi-vault (Phase 15): when hub_vaults.yaml is absent, single vault "default" from vault_path.
5	*/
6
7	import fs from 'fs';
8	import path from 'path';
9	import yaml from 'js-yaml';
10	import { readHubVaults } from './hub-vaults.mjs';
11	import { getRepoRoot } from './repo-root.mjs';
12
13	const projectRoot = getRepoRoot();
14
15	const ENV_VAULT = 'KNOWTATION_VAULT_PATH';
16	const ENV_QDRANT = 'QDRANT_URL';
17	const ENV_DATA_DIR = 'KNOWTATION_DATA_DIR';
18	const ENV_VECTOR_STORE = 'KNOWTATION_VECTOR_STORE';
19	const ENV_MEMORY_URL = 'KNOWTATION_MEMORY_URL';
20	const ENV_MEMORY_ENABLED = 'KNOWTATION_MEMORY_ENABLED';
21	const ENV_MEMORY_PROVIDER = 'KNOWTATION_MEMORY_PROVIDER';
22	const ENV_AIR_ENDPOINT = 'KNOWTATION_AIR_ENDPOINT';
23	const ENV_OLLAMA_URL = 'OLLAMA_URL';
24	const ENV_EMBEDDING_PROVIDER = 'EMBEDDING_PROVIDER';
25	const ENV_EMBEDDING_MODEL = 'EMBEDDING_MODEL';
26	const ENV_TRANSCODE_OVERSIZED = 'KNOWTATION_TRANSCODE_OVERSIZED';
27	const ENV_MUSE_URL = 'MUSE_URL';
28
29	/** Default embed model name when YAML/env omits `model` (must match provider). */
30	function defaultEmbeddingModelForProvider(provider) {
31	const p = String(provider \|\| 'ollama').toLowerCase();
32	if (p === 'openai') return 'text-embedding-3-small';
33	if (p === 'voyage') return 'voyage-4-lite';
34	return 'nomic-embed-text';
35	}
36
37	const DEFAULT_IGNORE = ['templates', 'meta', 'node_modules', '.git'];
38
39	/**
40	* Read transcription.* from config/local.yaml only (no vault_path required).
41	* Used by lib/transcribe.mjs on hosted bridge where full loadConfig may be skipped.
42	* @param {string} [cwd]
43	* @returns {{ provider: string, model: string, transcode_oversized: boolean }}
44	*/
45	export function readTranscriptionYaml(cwd = projectRoot) {
46	const configPath = path.join(cwd, 'config', 'local.yaml');
47	let t = {};
48	if (fs.existsSync(configPath)) {
49	try {
50	const raw = yaml.load(fs.readFileSync(configPath, 'utf8')) \|\| {};
51	if (raw.transcription && typeof raw.transcription === 'object') {
52	t = raw.transcription;
53	}
54	} catch (_) {
55	/* ignore invalid yaml for this optional slice */
56	}
57	}
58	return {
59	provider: t.provider \|\| 'openai',
60	model: t.model \|\| 'whisper-1',
61	transcode_oversized: t.transcode_oversized !== false,
62	};
63	}
64
65	/**
66	* Load config from config/local.yaml (if present) then apply env overrides.
67	*
68	* AIR config (`air` key in local.yaml):
69	* ```yaml
70	* air:
71	* enabled: true # master switch; default false
72	* required: true # hard-fail: throw AttestationRequiredError when endpoint fails; default false
73	* endpoint: https://... # attestation endpoint URL; falls back to KNOWTATION_AIR_ENDPOINT env var
74	* ```
75	* When `air.required=true` a write or export is rejected if the attestation endpoint is
76	* unreachable or returns a non-OK response. Default (`false`) is non-blocking: a placeholder
77	* id is logged and the operation proceeds (backward-compatible).
78	*
79	* @param {string} [cwd] - Working directory (default: project root)
80	* @returns {{ vault_path: string, qdrant_url?: string, vector_store?: string, data_dir: string, embedding?: object, memory?: object, air?: { enabled: boolean, required: boolean, endpoint: string\|undefined }, ignore?: string[] }} embedding.ollama_url from YAML or OLLAMA_URL env when set
81	* @throws if vault_path is missing after load
82	*/
83	export function loadConfig(cwd = projectRoot) {
84	const configPath = path.join(cwd, 'config', 'local.yaml');
85	let config = {};
86
87	if (fs.existsSync(configPath)) {
88	try {
89	const raw = fs.readFileSync(configPath, 'utf8');
90	config = yaml.load(raw) \|\| {};
91	} catch (e) {
92	throw new Error(`Invalid config at ${configPath}: ${e.message}`);
93	}
94	}
95
96	// Env overrides (SPEC: env overrides, then config)
97	if (process.env[ENV_VAULT]) config.vault_path = process.env[ENV_VAULT];
98	if (process.env[ENV_QDRANT]) config.qdrant_url = process.env[ENV_QDRANT];
99	if (process.env[ENV_DATA_DIR]) config.data_dir = process.env[ENV_DATA_DIR];
100	if (process.env[ENV_VECTOR_STORE]) config.vector_store = process.env[ENV_VECTOR_STORE];
101	if (process.env[ENV_MEMORY_URL]) config.memory = { ...(config.memory \|\| {}), url: process.env[ENV_MEMORY_URL] };
102	if (process.env[ENV_MEMORY_ENABLED] === 'true') config.memory = { ...(config.memory \|\| {}), enabled: true };
103	if (process.env[ENV_MEMORY_ENABLED] === 'false') config.memory = { ...(config.memory \|\| {}), enabled: false };
104	if (process.env[ENV_MEMORY_PROVIDER]) config.memory = { ...(config.memory \|\| {}), provider: process.env[ENV_MEMORY_PROVIDER] };
105	if (process.env[ENV_AIR_ENDPOINT]) config.air = { ...config.air, endpoint: process.env[ENV_AIR_ENDPOINT] };
106	if (process.env[ENV_TRANSCODE_OVERSIZED] === '0' \|\| process.env[ENV_TRANSCODE_OVERSIZED] === 'false') {
107	config.transcription = { ...(config.transcription \|\| {}), transcode_oversized: false };
108	}
109	if (process.env[ENV_TRANSCODE_OVERSIZED] === '1' \|\| process.env[ENV_TRANSCODE_OVERSIZED] === 'true') {
110	config.transcription = { ...(config.transcription \|\| {}), transcode_oversized: true };
111	}
112
113	// Hub Setup overrides (optional): data_dir/hub_setup.yaml can set vault_path and vault.git
114	const dataDirPath = path.resolve(cwd, config.data_dir \|\| 'data');
115	const hubSetupPath = path.join(dataDirPath, 'hub_setup.yaml');
116	if (fs.existsSync(hubSetupPath)) {
117	try {
118	const setupRaw = fs.readFileSync(hubSetupPath, 'utf8');
119	const setup = yaml.load(setupRaw) \|\| {};
120	// Hub writes vault_path here; operator/tests use KNOWTATION_VAULT_PATH — that must win (SPEC: env overrides).
121	if (setup.vault_path != null && !process.env[ENV_VAULT]) {
122	config.vault_path = setup.vault_path;
123	}
124	if (setup.vault?.git && typeof setup.vault.git === 'object') {
125	config.vault = config.vault \|\| {};
126	config.vault.git = { ...(config.vault.git \|\| {}), ...setup.vault.git };
127	}
128	} catch (_) { /* ignore invalid hub_setup */ }
129	}
130
131	/** Muse thin bridge: optional `muse.url` in local.yaml; `MUSE_URL` env wins when set. */
132	const museRaw = config.muse && typeof config.muse === 'object' ? config.muse : {};
133	let museUrlMerged =
134	typeof museRaw.url === 'string' ? museRaw.url.trim().replace(/\/+$/, '') : '';
135	if (process.env[ENV_MUSE_URL] != null && String(process.env[ENV_MUSE_URL]).trim() !== '') {
136	museUrlMerged = String(process.env[ENV_MUSE_URL]).trim().replace(/\/+$/, '');
137	}
138	config.muse = museUrlMerged ? { url: museUrlMerged } : {};
139
140	/**
141	* Flow workspace identity (Phase 7A): the local CLI/MCP operator's authorized Flow scopes.
142	* This is the "local config identity" channel the Flow store/projection surfaces resolve scope
143	* through (FLOW-STORE-CONTRACT-7A-10 §4) — the CLI passes it as `visibleScopes` into the store;
144	* no CLI flag can widen it. Deny-by-default: absent ⇒ `personal` only (resolved downstream in
145	* `flow-scope.mjs`). Values are validated against the canonical scope set there; here we only
146	* surface a clean string array so a malformed config can never inject a non-string scope.
147	*/
148	const flowRaw = config.flow && typeof config.flow === 'object' ? config.flow : null;
149	const flowVisibleScopes =
150	flowRaw && Array.isArray(flowRaw.visible_scopes)
151	? flowRaw.visible_scopes.filter((s) => typeof s === 'string' && s.trim() !== '')
152	: undefined;
153
154	const flowCaptureRaw =
155	flowRaw?.capture && typeof flowRaw.capture === 'object' ? flowRaw.capture : null;
156	const flowCapture = flowCaptureRaw
157	? {
158	enabled: flowCaptureRaw.enabled !== false,
159	session_extraction_opt_in: flowCaptureRaw.session_extraction_opt_in === true,
160	classroom_minor_mode: flowCaptureRaw.classroom_minor_mode === true,
161	min_confidence_floor:
162	flowCaptureRaw.min_confidence_floor === 'low' \|\|
163	flowCaptureRaw.min_confidence_floor === 'high'
164	? flowCaptureRaw.min_confidence_floor
165	: 'medium',
166	}
167	: undefined;
168
169	const vaultPath = config.vault_path;
170	if (!vaultPath \|\| typeof vaultPath !== 'string') {
171	throw new Error('vault_path is required. Set in config/local.yaml or env KNOWTATION_VAULT_PATH.');
172	}
173
174	const resolvedVault = path.isAbsolute(vaultPath) ? vaultPath : path.resolve(cwd, vaultPath);
175	if (!fs.existsSync(resolvedVault) \|\| !fs.statSync(resolvedVault).isDirectory()) {
176	throw new Error(`Vault path does not exist or is not a directory: ${resolvedVault}`);
177	}
178
179	let vaultList = readHubVaults(dataDirPath, cwd);
180	if (vaultList.length === 0) {
181	vaultList = [{ id: 'default', path: resolvedVault, label: undefined }];
182	}
183
184	/**
185	* Resolve vault id to absolute path. Returns undefined if vault id not in list.
186	* @param {string} vaultId
187	* @returns {string \| undefined}
188	*/
189	function resolveVaultPath(vaultId) {
190	const v = vaultList.find((e) => e.id === vaultId);
191	return v ? v.path : undefined;
192	}
193
194	const embeddingYaml = config.embedding && typeof config.embedding === 'object' ? config.embedding : null;
195	let embeddingProvider = embeddingYaml?.provider \|\| 'ollama';
196	if (process.env[ENV_EMBEDDING_PROVIDER] != null && String(process.env[ENV_EMBEDDING_PROVIDER]).trim() !== '') {
197	embeddingProvider = String(process.env[ENV_EMBEDDING_PROVIDER]).trim().toLowerCase();
198	}
199	let embeddingModel = embeddingYaml?.model;
200	if (process.env[ENV_EMBEDDING_MODEL] != null && String(process.env[ENV_EMBEDDING_MODEL]).trim() !== '') {
201	embeddingModel = String(process.env[ENV_EMBEDDING_MODEL]).trim();
202	}
203	if (embeddingModel == null \|\| String(embeddingModel).trim() === '') {
204	embeddingModel = defaultEmbeddingModelForProvider(embeddingProvider);
205	}
206	const embedding = {
207	provider: embeddingProvider,
208	model: embeddingModel,
209	ollama_url: embeddingYaml?.ollama_url,
210	};
211	if (process.env[ENV_OLLAMA_URL]) {
212	embedding.ollama_url = process.env[ENV_OLLAMA_URL];
213	}
214
215	return {
216	vault_path: resolvedVault,
217	vaultList,
218	resolveVaultPath,
219	qdrant_url: config.qdrant_url,
220	vector_store: config.vector_store \|\| 'qdrant',
221	data_dir: path.resolve(cwd, config.data_dir \|\| 'data'),
222	embedding,
223	indexer: config.indexer && typeof config.indexer === 'object'
224	? {
225	chunk_size: config.indexer.chunk_size ?? 2048,
226	chunk_overlap: config.indexer.chunk_overlap ?? 256,
227	}
228	: { chunk_size: 2048, chunk_overlap: 256 },
229	transcription: config.transcription && typeof config.transcription === 'object'
230	? {
231	provider: config.transcription.provider \|\| 'openai',
232	model: config.transcription.model \|\| 'whisper-1',
233	transcode_oversized: config.transcription.transcode_oversized !== false,
234	}
235	: { provider: 'openai', model: 'whisper-1', transcode_oversized: true },
236	memory: config.memory && typeof config.memory === 'object'
237	? {
238	enabled: config.memory.enabled === true,
239	provider: config.memory.provider \|\| 'file',
240	url: config.memory.url \|\| process.env.KNOWTATION_MEMORY_URL,
241	retention_days: config.memory.retention_days ?? null,
242	capture: Array.isArray(config.memory.capture) ? config.memory.capture : undefined,
243	scope: config.memory.scope === 'global' ? 'global' : 'vault',
244	encrypt: config.memory.encrypt === true,
245	secret: config.memory.secret \|\| undefined,
246	supabase_url: config.memory.supabase_url \|\| process.env.KNOWTATION_SUPABASE_URL \|\| undefined,
247	supabase_key: config.memory.supabase_key \|\| process.env.KNOWTATION_SUPABASE_KEY \|\| undefined,
248	}
249	: { enabled: false, provider: 'file', url: undefined, retention_days: null, capture: undefined, scope: 'vault', encrypt: false, secret: undefined, supabase_url: undefined, supabase_key: undefined },
250	air: config.air && typeof config.air === 'object'
251	? {
252	enabled: config.air.enabled === true,
253	required: config.air.required === true,
254	endpoint: config.air.endpoint \|\| process.env.KNOWTATION_AIR_ENDPOINT,
255	}
256	: { enabled: false, required: false, endpoint: undefined },
257	vault_git: config.vault?.git && typeof config.vault.git === 'object'
258	? {
259	enabled: config.vault.git.enabled === true,
260	remote: config.vault.git.remote \|\| undefined,
261	auto_commit: config.vault.git.auto_commit === true,
262	auto_push: config.vault.git.auto_push === true,
263	}
264	: { enabled: false, remote: undefined, auto_commit: false, auto_push: false },
265	mcp: (() => {
266	const mcpRaw = config.mcp && typeof config.mcp === 'object' ? config.mcp : {};
267	const envPort = process.env.KNOWTATION_MCP_HTTP_PORT;
268	const http_port =
269	envPort != null && String(envPort).trim() !== ''
270	? parseInt(String(envPort), 10) \|\| 3334
271	: mcpRaw.http_port ?? 3334;
272	const http_host = mcpRaw.http_host \|\| '127.0.0.1';
273	return { http_port, http_host };
274	})(),
275	muse: config.muse && typeof config.muse === 'object' && config.muse.url
276	? { url: String(config.muse.url).trim().replace(/\/+$/, '') }
277	: {},
278	daemon: loadDaemonConfig(config.daemon),
279	llm: loadLlmConfig(config.llm),
280	flow:
281	(flowVisibleScopes && flowVisibleScopes.length > 0) \|\| flowCapture
282	? {
283	...(flowVisibleScopes && flowVisibleScopes.length > 0
284	? { visible_scopes: flowVisibleScopes }
285	: {}),
286	...(flowCapture ? { capture: flowCapture } : {}),
287	}
288	: undefined,
289	ignore: config.ignore \|\| DEFAULT_IGNORE,
290	};
291	}
292
293	/** Chat providers selectable for the `completeChat` path (Hub Settings UI + config llm.provider). */
294	export const CHAT_PROVIDERS = Object.freeze(['deepinfra', 'openrouter', 'openai', 'anthropic', 'ollama']);
295
296	/**
297	* Validate a chat-provider value submitted from the Hub Settings UI before it is persisted.
298	*
299	* Security boundary: the persisted provider drives where note text is sent (privacy) and which
300	* account is billed. Only an empty string ("" → auto-detect) or an exact whitelisted provider is
301	* accepted; anything else is rejected so a malformed/hostile value can never be written to
302	* config/local.yaml or reach completeChat.
303	*
304	* @param {unknown} value
305	* @returns {{ ok: true, provider: string } \| { ok: false, error: string }}
306	*/
307	export function normalizeChatProviderInput(value) {
308	if (value == null \|\| value === '') return { ok: true, provider: '' };
309	if (typeof value !== 'string') {
310	return { ok: false, error: 'provider must be a string' };
311	}
312	const v = value.trim().toLowerCase();
313	if (v === '') return { ok: true, provider: '' };
314	if (!CHAT_PROVIDERS.includes(v)) {
315	return {
316	ok: false,
317	error: `provider must be one of: ${CHAT_PROVIDERS.join(', ')} (or empty for auto-detect)`,
318	};
319	}
320	return { ok: true, provider: v };
321	}
322
323	/**
324	* Resolve the top-level `llm` block (the `completeChat` path) from local.yaml.
325	*
326	* Distinct from `daemon.llm` (consolidation/Discovery). `provider` selects the chat lane used by
327	* MCP summarize and Hub proposal LLM jobs; invalid values are dropped (treated as auto-detect) so a
328	* malformed config can never force an unknown provider. The env var `KNOWTATION_CHAT_PROVIDER` still
329	* takes precedence at resolution time (see lib/llm-complete.mjs).
330	*
331	* @param {object\|undefined} raw — the `llm` key from the YAML file
332	* @returns {{ provider: string, openai_chat_model: string\|undefined, anthropic_chat_model: string\|undefined, deepinfra_chat_model: string\|undefined, openrouter_chat_model: string\|undefined, ollama_chat_model: string\|undefined }}
333	*/
334	export function loadLlmConfig(raw) {
335	const l = raw && typeof raw === 'object' ? raw : {};
336	const providerRaw = String(l.provider \|\| '').trim().toLowerCase();
337	const provider = CHAT_PROVIDERS.includes(providerRaw) ? providerRaw : '';
338	return {
339	provider,
340	openai_chat_model: l.openai_chat_model \|\| undefined,
341	anthropic_chat_model: l.anthropic_chat_model \|\| undefined,
342	deepinfra_chat_model: l.deepinfra_chat_model \|\| undefined,
343	openrouter_chat_model: l.openrouter_chat_model \|\| undefined,
344	ollama_chat_model: l.ollama_chat_model \|\| undefined,
345	};
346	}
347
348	const DAEMON_DEFAULTS = Object.freeze({
349	enabled: false,
350	interval_minutes: 120,
351	idle_only: true,
352	idle_threshold_minutes: 15,
353	run_on_start: false,
354	lookback_hours: 24,
355	max_events_per_pass: 200,
356	max_topics_per_pass: 10,
357	passes: { consolidate: true, verify: true, discover: false, rebuild_index: true },
358	llm: {
359	provider: null,
360	model: null,
361	api_key_env: null,
362	base_url: null,
363	max_tokens: 1024,
364	temperature: 0.2,
365	},
366	dry_run: false,
367	log_file: null,
368	max_cost_per_day_usd: null,
369	});
370
371	/**
372	* Parse daemon configuration from the raw `daemon` section of local.yaml,
373	* applying defaults and environment variable overrides.
374	*
375	* @param {object\|undefined} raw — the `daemon` key from the YAML file
376	* @returns {object} fully resolved daemon config with all defaults applied
377	*/
378	export function loadDaemonConfig(raw) {
379	const d = raw && typeof raw === 'object' ? raw : {};
380
381	const enabled = process.env.KNOWTATION_DAEMON_ENABLED != null
382	? process.env.KNOWTATION_DAEMON_ENABLED === 'true'
383	: d.enabled === true;
384
385	const interval_minutes = process.env.KNOWTATION_DAEMON_INTERVAL != null
386	? parseInt(process.env.KNOWTATION_DAEMON_INTERVAL, 10) \|\| DAEMON_DEFAULTS.interval_minutes
387	: d.interval_minutes ?? DAEMON_DEFAULTS.interval_minutes;
388
389	const dry_run = process.env.KNOWTATION_DAEMON_DRY_RUN != null
390	? process.env.KNOWTATION_DAEMON_DRY_RUN === 'true'
391	: d.dry_run === true;
392
393	const rawPasses = d.passes && typeof d.passes === 'object' ? d.passes : {};
394	const passes = {
395	consolidate: rawPasses.consolidate !== false,
396	verify: rawPasses.verify !== false,
397	discover: rawPasses.discover === true,
398	rebuild_index: rawPasses.rebuild_index !== false,
399	};
400
401	const rawLlm = d.llm && typeof d.llm === 'object' ? d.llm : {};
402	const llm = {
403	provider: process.env.KNOWTATION_DAEMON_LLM_PROVIDER \|\| rawLlm.provider \|\| null,
404	model: process.env.KNOWTATION_DAEMON_LLM_MODEL \|\| rawLlm.model \|\| null,
405	api_key_env: rawLlm.api_key_env \|\| null,
406	base_url: process.env.KNOWTATION_DAEMON_LLM_BASE_URL \|\| rawLlm.base_url \|\| null,
407	max_tokens: rawLlm.max_tokens ?? DAEMON_DEFAULTS.llm.max_tokens,
408	temperature: rawLlm.temperature ?? DAEMON_DEFAULTS.llm.temperature,
409	};
410
411	return {
412	enabled,
413	interval_minutes,
414	idle_only: d.idle_only !== false,
415	idle_threshold_minutes: d.idle_threshold_minutes ?? DAEMON_DEFAULTS.idle_threshold_minutes,
416	run_on_start: d.run_on_start === true,
417	lookback_hours: d.lookback_hours ?? DAEMON_DEFAULTS.lookback_hours,
418	max_events_per_pass: d.max_events_per_pass ?? DAEMON_DEFAULTS.max_events_per_pass,
419	max_topics_per_pass: d.max_topics_per_pass ?? DAEMON_DEFAULTS.max_topics_per_pass,
420	passes,
421	llm,
422	dry_run,
423	log_file: d.log_file \|\| null,
424	max_cost_per_day_usd: d.max_cost_per_day_usd ?? null,
425	};
426	}

File History 1 commit

sha256:8915fe406161f95c1681f9469375e7bae5b28c884f00bedbdef65e4b0cd0738d docs(flow): commit FLOW-V0-SPEC.md hygiene for 7A-INT merge Human 13 hours ago