lib/memory-consolidate.mjs · aaronrene/knowtation — MuseHub

aaronrene / knowtation public

memory-consolidate.mjs

551 lines 19.4 KB

Raw

sha256:0d530f9ef27b8b75547d1db7701a74bc77b77aa8f3d7fa3a8672cf2af36e63bb reconcile: import GitHub-direct RBAC/OAuth/companion and ho… Human minor ⚠ breaking 6 hours ago

1	/**
2	* Core consolidation engine: reads recent memory events, groups by topic,
3	* sends each group to an LLM for deduplication/merging, and stores the result
4	* as consolidation events. Phase A of the Daemon Consolidation Spec.
5	*
6	* Phase C adds runVerifyPass (Pass 2: Stale Reference Detection).
7	* Phase D adds runDiscoverPass (Pass 3: Relationship Discovery).
8	*
9	* Phase 6 migration (D6.6.2): runDiscoverPass routes insight persistence through
10	* DerivedArtifactWriter. The direct mm.store('insight', ...) call is removed.
11	*
12	* This module is a pure function library with no daemon lifecycle logic.
13	* It can be invoked manually via CLI or MCP.
14	*/
15
16	import { extractTopicFromEvent } from './memory-event.mjs';
17	import { createMemoryManager, verifyMemoryEvent } from './memory.mjs';
18	import { completeChat } from './llm-complete.mjs';
19	import { createDerivedArtifactWriter } from './companion-artifact-writer.mjs';
20	import { buildConvenienceProvenance } from './companion-provenance-validator.mjs';
21
22	const CONSOLIDATION_SYSTEM_PROMPT = `You are a memory consolidation engine for a personal knowledge vault.
23	You receive a batch of timestamped activity events on a single topic.
24	Your job:
25	1. Merge redundant observations into single factual statements.
26	2. When events contradict each other, keep the most recent fact and discard the older one.
27	3. Distill the batch into 3-7 concise, factual statements.
28	4. Each statement must be a complete, standalone fact (no "as mentioned earlier").
29	5. Preserve note paths and dates when they add context.
30
31	Output format: JSON array of strings, one per fact. No commentary.`;
32
33	/**
34	* Build a user prompt for the consolidation LLM call from a topic group.
35	* When options.encrypt is true (memory.encrypt), raw event payloads are omitted from the LLM prompt.
36	* Exported for testing.
37	*
38	* @param {string} topic
39	* @param {object[]} events
40	* @param {{ encrypt?: boolean }} [options]
41	* @returns {string}
42	*/
43	export function buildConsolidationPrompt(topic, events, options = {}) {
44	const encrypt = options.encrypt === true;
45	const lines = events.map((e) => {
46	if (encrypt) {
47	return `[${e.ts}] ${e.type} (event payload omitted — encrypted memory mode)`;
48	}
49	const summary = JSON.stringify(e.data).slice(0, 300);
50	return `[${e.ts}] ${e.type}: ${summary}`;
51	});
52	return `Topic: "${topic}"\nEvents (${events.length}):\n${lines.join('\n')}`;
53	}
54
55	/**
56	* Parse the LLM response into an array of fact strings.
57	* Handles common quirks: markdown code fences, trailing text, invalid JSON.
58	* Exported for testing.
59	*
60	* @param {string} raw — raw LLM output
61	* @returns {string[]}
62	*/
63	export function parseConsolidationResponse(raw) {
64	if (!raw \|\| typeof raw !== 'string') return [];
65
66	let cleaned = raw.trim();
67	const fenceMatch = cleaned.match(/```(?:json)?\s([\s\S]?)```/);
68	if (fenceMatch) cleaned = fenceMatch[1].trim();
69
70	try {
71	const parsed = JSON.parse(cleaned);
72	if (Array.isArray(parsed)) {
73	return parsed.filter((item) => typeof item === 'string' && item.trim()).map((s) => s.trim());
74	}
75	return [];
76	} catch (_) {
77	const lines = cleaned.split('\n')
78	.map((l) => l.replace(/^[\s\-*\d.]+/, '').trim())
79	.filter((l) => l.length > 0 && !l.startsWith('{') && !l.startsWith('['));
80	return lines.length > 0 ? lines : [];
81	}
82	}
83
84	const DISCOVER_SYSTEM_PROMPT = `You are an insight engine for a personal knowledge vault.
85	Given topic summaries from the vault's memory, identify:
86	1. Connections between topics that the user might not have noticed.
87	2. Contradictions between topics (a fact in one topic conflicts with another).
88	3. Open questions — things the vault seems to be exploring but hasn't resolved.
89
90	Be concise. Each item should be 1-2 sentences.
91
92	Output format: JSON object with three arrays: "connections", "contradictions", "open_questions".`;
93
94	/**
95	* Build a user prompt for the relationship discovery LLM call from an array
96	* of consolidation events. Each consolidation must have data.topic and data.facts.
97	* Exported for testing.
98	*
99	* When encrypt is false, facts are included in the block.
100	* When encrypt is true, only topic names are sent (no fact content).
101	*
102	* @param {object[]} consolidations — consolidation events (each has data.topic, data.facts)
103	* @param {boolean} [encrypt]
104	* @returns {string}
105	*/
106	export function buildDiscoverPrompt(consolidations, encrypt = false) {
107	const blocks = consolidations.map((c) => {
108	const data = c.data ?? c;
109	const topic = typeof data.topic === 'string' ? data.topic : 'unknown';
110	if (encrypt) {
111	return `Topic: "${topic}"`;
112	}
113	const facts = Array.isArray(data.facts) && data.facts.length > 0
114	? data.facts.map((f) => ` - ${f}`).join('\n')
115	: ' (no facts)';
116	return `Topic: "${topic}"\n${facts}`;
117	});
118	return `Topic summaries:\n${blocks.join('\n\n')}`;
119	}
120
121	/**
122	* Parse the LLM response into { connections, contradictions, open_questions }.
123	* Handles markdown code fences, invalid JSON, and partial objects (missing keys
124	* default to empty arrays). Exported for testing.
125	*
126	* @param {string} raw — raw LLM output
127	* @returns {{ connections: string[], contradictions: string[], open_questions: string[] }}
128	*/
129	export function parseDiscoverResponse(raw) {
130	const empty = { connections: [], contradictions: [], open_questions: [] };
131	if (!raw \|\| typeof raw !== 'string') return empty;
132
133	let cleaned = raw.trim();
134	const fenceMatch = cleaned.match(/```(?:json)?\s([\s\S]?)```/);
135	if (fenceMatch) cleaned = fenceMatch[1].trim();
136
137	let parsed;
138	try {
139	parsed = JSON.parse(cleaned);
140	} catch (_) {
141	return empty;
142	}
143
144	if (!parsed \|\| typeof parsed !== 'object' \|\| Array.isArray(parsed)) return empty;
145
146	const toStringArray = (v) => {
147	if (!Array.isArray(v)) return [];
148	return v.filter((item) => typeof item === 'string' && item.trim()).map((s) => s.trim());
149	};
150
151	return {
152	connections: toStringArray(parsed.connections),
153	contradictions: toStringArray(parsed.contradictions),
154	open_questions: toStringArray(parsed.open_questions),
155	};
156	}
157
158	/**
159	* Group events by their extracted topic slug.
160	* Exported for testing.
161	*
162	* @param {object[]} events
163	* @returns {Map<string, object[]>}
164	*/
165	export function groupEventsByTopic(events) {
166	const groups = new Map();
167	for (const event of events) {
168	const topic = extractTopicFromEvent(event);
169	if (!groups.has(topic)) groups.set(topic, []);
170	groups.get(topic).push(event);
171	}
172	return groups;
173	}
174
175	/**
176	* Extract all unique path references from a memory event's data payload.
177	*
178	* Always extracts data.path (single path string).
179	* When encrypt is false, also expands data.paths arrays for full coverage.
180	* Exported for testing.
181	*
182	* @param {object} data — event.data
183	* @param {boolean} [encrypt] — if true, skip data.paths (content is opaque)
184	* @returns {string[]} unique, non-empty path strings
185	*/
186	export function extractPathsFromEventData(data, encrypt = false) {
187	if (!data \|\| typeof data !== 'object') return [];
188	const seen = new Set();
189	const paths = [];
190
191	const add = (p) => {
192	if (typeof p === 'string' && p.trim() && !seen.has(p)) {
193	seen.add(p);
194	paths.push(p);
195	}
196	};
197
198	add(data.path);
199
200	if (!encrypt && Array.isArray(data.paths)) {
201	for (const p of data.paths) add(p);
202	}
203
204	return paths;
205	}
206
207	/**
208	* Resolve the list of pass names to run from the caller's opts.passes value
209	* and the daemon config.
210	*
211	* opts.passes may be:
212	* - string[] — explicit pass names, e.g. ['consolidate', 'verify']
213	* - string — comma-separated, e.g. 'consolidate,verify'
214	* - undefined/null — fall back to daemon config defaults
215	*
216	* @param {string[]\|string\|null\|undefined} passesOpt
217	* @param {object} [daemonPassesCfg] — daemon.passes section from config
218	* @returns {string[]}
219	*/
220	export function resolvePassNames(passesOpt, daemonPassesCfg) {
221	if (Array.isArray(passesOpt)) {
222	return passesOpt.map((s) => String(s).trim()).filter(Boolean);
223	}
224	if (typeof passesOpt === 'string') {
225	return passesOpt.split(',').map((s) => s.trim()).filter(Boolean);
226	}
227	const dp = daemonPassesCfg && typeof daemonPassesCfg === 'object' ? daemonPassesCfg : {};
228	const names = [];
229	if (dp.consolidate !== false) names.push('consolidate');
230	if (dp.verify !== false) names.push('verify');
231	if (dp.discover === true) names.push('discover');
232	return names;
233	}
234
235	/**
236	* Run Pass 2: Stale Reference Detection.
237	*
238	* Scans the provided events for note path references, checks each path against
239	* the vault filesystem, and writes a maintenance event summarising stale and
240	* verified paths (unless dryRun: true). Reuses verifyMemoryEvent for all
241	* per-path filesystem checks.
242	*
243	* Classification per path:
244	* 'verified' — file exists and was not modified after the event timestamp
245	* 'stale' — file is missing or was modified after the event timestamp
246	* 'no_ref' — event has no path reference (not counted in checked_count)
247	*
248	* @param {object} config — loadConfig() result
249	* @param {object[]} events — memory events to scan (already read by caller)
250	* @param {{ dryRun?: boolean }} [opts]
251	* @returns {{ stale_paths: string[], verified_paths: string[], checked_count: number, dry_run: boolean }}
252	*/
253	export function runVerifyPass(config, events, opts = {}) {
254	const dryRun = opts.dryRun ?? false;
255	const encrypt = config.memory?.encrypt === true;
256
257	const stalePaths = new Set();
258	const verifiedPaths = new Set();
259	let checked_count = 0;
260
261	for (const event of events) {
262	const paths = extractPathsFromEventData(event.data, encrypt);
263	if (paths.length === 0) continue;
264
265	checked_count++;
266
267	for (const refPath of paths) {
268	// Synthetic event: override data to isolate this path; force status 'success'
269	// so verifyMemoryEvent performs the filesystem check rather than short-circuiting.
270	const syntheticEvent = { ...event, status: 'success', data: { path: refPath } };
271	const { confidence } = verifyMemoryEvent(config, syntheticEvent);
272
273	if (confidence === 'stale') {
274	stalePaths.add(refPath);
275	} else if (confidence === 'verified') {
276	verifiedPaths.add(refPath);
277	}
278	// 'hint' (no vault_path configured, or filesystem error) — skip; cannot classify
279	}
280	}
281
282	const stale_paths = [...stalePaths];
283	const verified_paths = [...verifiedPaths];
284
285	if (!dryRun) {
286	const mm = opts.mm ?? createMemoryManager(config);
287	mm.store('maintenance', { stale_paths, verified_paths, checked_count });
288	}
289
290	return { stale_paths, verified_paths, checked_count, dry_run: dryRun };
291	}
292
293	/**
294	* Run Pass 3: Relationship Discovery.
295	*
296	* Reads the current consolidation events (already provided by the caller — these
297	* are the events written during the current consolidation pass), builds a topic
298	* summaries payload, sends it to the LLM, and stores the result as an insight event.
299	*
300	* Phase 6 migration (D6.6.2): insight persistence now routes through the
301	* DerivedArtifactWriter. The direct mm.store('insight', ...) call is removed.
302	* For the convenience/self-partition default the behavior is unchanged.
303	*
304	* When config.memory.encrypt is true, only topic names are sent to the LLM (no facts).
305	* When dryRun is true, no LLM call or event write is made.
306	*
307	* @param {object} config — loadConfig() result
308	* @param {object[]} consolidations — consolidation events to discover across
309	* (each must have data.topic and data.facts)
310	* @param {{
311	* dryRun?: boolean,
312	* llmFn?: Function,
313	* mm?: import('./memory.mjs').MemoryManager,
314	* writer?: import('./companion-artifact-writer.mjs').DerivedArtifactWriter,
315	* writerContext?: import('./companion-artifact-writer.mjs').WriteContext,
316	* generatedBy?: string,
317	* model?: string,
318	* modelVersion?: string,
319	* runtimeVersion?: string,
320	* }} [opts]
321	* @returns {Promise<{ connections: string[], contradictions: string[], open_questions: string[], topic_count: number, dry_run: boolean }>}
322	*/
323	export async function runDiscoverPass(config, consolidations, opts = {}) {
324	const dryRun = opts.dryRun ?? false;
325	const encrypt = config.memory?.encrypt === true;
326	const maxTokens = config.daemon?.llm?.max_tokens ?? 1024;
327	const llmFn = opts.llmFn \|\| completeChat;
328
329	const topic_count = consolidations.length;
330
331	if (dryRun) {
332	return { connections: [], contradictions: [], open_questions: [], topic_count, dry_run: true };
333	}
334
335	const userPrompt = buildDiscoverPrompt(consolidations, encrypt);
336
337	let parsed;
338	try {
339	const rawResponse = await llmFn(config, {
340	system: DISCOVER_SYSTEM_PROMPT,
341	user: userPrompt,
342	maxTokens,
343	});
344	parsed = parseDiscoverResponse(rawResponse);
345	} catch (_) {
346	parsed = { connections: [], contradictions: [], open_questions: [] };
347	}
348
349	const { connections, contradictions, open_questions } = parsed;
350
351	// Phase 6 (D6.6.2): route through DerivedArtifactWriter — no direct mm.store('insight', ...).
352	const mm = opts.mm ?? createMemoryManager(config);
353
354	// Build writer + provenance for convenience/self-partition default.
355	const writer = opts.writer ?? createDerivedArtifactWriter({
356	writeNoteFn: () => { /* insight artifacts don't use writeNoteFn */ },
357	vaultPath: config.vault_path \|\| '.',
358	mm,
359	vaultRegistryAvailable: false,
360	});
361
362	const context = opts.writerContext ?? {
363	lane: 'local',
364	containsPrivateData: false,
365	isDelegate: false,
366	delegatedManagedAllowed: false,
367	enrichesDelegatedPartition: false,
368	delegatedEnrichmentAllowed: false,
369	};
370
371	// source_event_id: collect consolidation event ids if available; fall back to timestamp slug
372	const sourceEventIds = consolidations
373	.map((c) => c.id)
374	.filter((id) => typeof id === 'string' && id);
375	const sourceEventId = sourceEventIds.length > 0
376	? sourceEventIds
377	: [`discover-${Date.now()}`];
378
379	const generatedBy = opts.generatedBy \|\| config.vault_id \|\| 'system';
380	const model = opts.model \|\| config.llm?.model \|\| 'unknown';
381	// D6.2.1: one of model_version\|runtime_version MUST be a concrete value.
382	// When no version is configured, use 'unknown' rather than null to satisfy this.
383	const modelVersion = opts.modelVersion \|\| config.llm?.model_version \|\| 'unknown';
384	const runtimeVersion = opts.runtimeVersion \|\| null;
385
386	const provenance = buildConvenienceProvenance({
387	generatedBy,
388	source: 'companion',
389	model,
390	modelVersion,
391	runtimeVersion: runtimeVersion ?? undefined,
392	lane: context.lane,
393	artifactType: 'insight',
394	sourceNotePath: null,
395	sourceEventId,
396	});
397
398	const artifact = { connections, contradictions, open_questions, topic_count };
399
400	await writer.write(artifact, provenance, context);
401
402	return { connections, contradictions, open_questions, topic_count, dry_run: false };
403	}
404
405	/**
406	* Run the consolidation engine: read recent events, optionally group by topic
407	* and call LLM (consolidate pass), optionally detect stale path references
408	* (verify pass), store results, and rebuild the pointer index.
409	*
410	* opts.passes controls which passes run:
411	* - undefined/null → use daemon config (consolidate + verify by default)
412	* - string[] → explicit list, e.g. ['consolidate', 'verify']
413	* - comma-string → e.g. 'consolidate,verify'
414	*
415	* @param {object} config — loadConfig() result
416	* @param {{ dryRun?: boolean, passes?: string[]\|string, lookbackHours?: number, maxEventsPerPass?: number, maxTopicsPerPass?: number, llmFn?: Function }} [opts]
417	* @returns {Promise<{ topics: Array<{ topic: string, event_count: number, facts: string[], id?: string }>, total_events: number, dry_run: boolean, verify: object\|null, discover: object\|null }>}
418	*/
419	export async function consolidateMemory(config, opts = {}) {
420	const daemonCfg = config.daemon \|\| {};
421	const dryRun = opts.dryRun ?? daemonCfg.dry_run ?? false;
422	const lookbackHours = opts.lookbackHours ?? daemonCfg.lookback_hours ?? 24;
423	const maxEventsPerPass = opts.maxEventsPerPass ?? daemonCfg.max_events_per_pass ?? 200;
424	const maxTopicsPerPass = opts.maxTopicsPerPass ?? daemonCfg.max_topics_per_pass ?? 10;
425	const maxTokens = daemonCfg.llm?.max_tokens ?? 1024;
426
427	const llmFn = opts.llmFn \|\| completeChat;
428	const consolidateEncrypt = config.memory?.encrypt === true;
429
430	const passNames = resolvePassNames(opts.passes, daemonCfg.passes);
431	const runConsolidate = passNames.includes('consolidate');
432	const runVerify = passNames.includes('verify');
433	const runDiscover = passNames.includes('discover');
434
435	const mm = opts.mm ?? createMemoryManager(config);
436	const since = new Date(Date.now() - lookbackHours * 3_600_000).toISOString();
437	const allEvents = mm.list({ since, limit: maxEventsPerPass });
438
439	const nonConsolidationEvents = allEvents.filter(
440	(e) => e.type !== 'consolidation' && e.type !== 'maintenance' && e.type !== 'insight',
441	);
442
443	if (nonConsolidationEvents.length === 0) {
444	return { topics: [], total_events: 0, dry_run: dryRun, verify: null, discover: null };
445	}
446
447	const results = [];
448
449	if (runConsolidate) {
450	const topicGroups = groupEventsByTopic(nonConsolidationEvents);
451
452	const sortedTopics = [...topicGroups.entries()]
453	.sort((a, b) => b[1].length - a[1].length)
454	.slice(0, maxTopicsPerPass);
455
456	for (const [topic, events] of sortedTopics) {
457	if (events.length < 2) continue;
458
459	const userPrompt = buildConsolidationPrompt(topic, events, { encrypt: consolidateEncrypt });
460
461	if (dryRun) {
462	results.push({
463	topic,
464	event_count: events.length,
465	facts: [],
466	dry_run_estimate: `${Math.min(events.length, 7)} facts`,
467	});
468	continue;
469	}
470
471	let facts;
472	try {
473	const rawResponse = await llmFn(config, {
474	system: CONSOLIDATION_SYSTEM_PROMPT,
475	user: userPrompt,
476	maxTokens,
477	});
478	facts = parseConsolidationResponse(rawResponse);
479	} catch (err) {
480	results.push({
481	topic,
482	event_count: events.length,
483	facts: [],
484	error: err.message \|\| String(err),
485	});
486	continue;
487	}
488
489	if (facts.length === 0) {
490	results.push({
491	topic,
492	event_count: events.length,
493	facts: [],
494	error: 'LLM returned no parseable facts',
495	});
496	continue;
497	}
498
499	const timestamps = events.map((e) => e.ts).sort();
500	const consolidationData = {
501	topic,
502	facts,
503	event_count: events.length,
504	since: timestamps[0],
505	until: timestamps[timestamps.length - 1],
506	};
507
508	const stored = mm.store('consolidation', consolidationData);
509	results.push({
510	topic,
511	event_count: events.length,
512	facts,
513	id: stored.id,
514	});
515	}
516	}
517
518	let verifyResult = null;
519	if (runVerify) {
520	verifyResult = runVerifyPass(config, nonConsolidationEvents, { dryRun, mm });
521	}
522
523	// Collect consolidation events that were actually stored this pass (non-dry-run, non-error).
524	// These are passed to the discover pass so it doesn't re-read from disk.
525	const storedConsolidations = results.filter((r) => r.id != null);
526
527	let discoverResult = null;
528	if (runDiscover) {
529	if (storedConsolidations.length === 0) {
530	// No consolidations written (dry-run, consolidate pass skipped, or no qualifying groups).
531	discoverResult = null;
532	} else {
533	const consolidationEvents = storedConsolidations.map((r) => ({
534	data: { topic: r.topic, facts: r.facts },
535	}));
536	discoverResult = await runDiscoverPass(config, consolidationEvents, { dryRun, llmFn, mm });
537	}
538	}
539
540	if (!dryRun) {
541	mm.generateIndex({ force: true });
542	}
543
544	return {
545	topics: results,
546	total_events: nonConsolidationEvents.length,
547	dry_run: dryRun,
548	verify: verifyResult,
549	discover: discoverResult,
550	};
551	}

File History 1 commit

sha256:0d530f9ef27b8b75547d1db7701a74bc77b77aa8f3d7fa3a8672cf2af36e63bb reconcile: import GitHub-direct RBAC/OAuth/companion and ho… Human minor ⚠ 6 hours ago

patch memory-consolidate.mjs 6 hours ago

replace runDiscoverPass 6 hours ago

Pathlib/memory-consolidate.mjs

Lines551

Size19.4 KB

Refsha256:0d530f9ef27b8b75547d1db7701a74bc77b77aa8f3d7fa3a8672cf2af36e63bb

Object ID

sha256:06bfee5aaffd6738759779b785b7bd19ced1b8c16578c7b9ade5451bf85102ca…

Last commit

sha256:0d530f9ef27b8b75547d1db7701a74bc77b77aa8f3d7fa3a8672cf2af36e63bb

reconcile: import GitHub-direct RBAC/OAuth/compan…

6 hours ago

Quick links

Blame History