memory.mjs
582 lines 18.0 KB
Raw
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 2 days ago
1 /**
2 * Memory layer: timestamped event log with provider-based storage. Phase 8.
3 *
4 * Providers: file (default), vector (semantic recall), mem0 (external API).
5 * Backward-compatible: storeMemory() and getMemory() still work for callers
6 * that pass (dataDir, key, value) — they delegate to the file provider.
7 */
8
9 import fs from 'fs';
10 import path from 'path';
11 import { createMemoryEvent, DEFAULT_CAPTURE_TYPES } from './memory-event.mjs';
12 import { FileMemoryProvider } from './memory-provider-file.mjs';
13
14 /**
15 * Resolve the memory directory for a given vault within a data directory.
16 * @param {string} dataDir
17 * @param {string} [vaultId]
18 * @param {{ scope?: 'vault'|'global' }} [opts]
19 * @returns {string}
20 */
21 export function resolveMemoryDir(dataDir, vaultId = 'default', opts = {}) {
22 if (opts.scope === 'global') {
23 return path.join(dataDir, 'memory', '_global');
24 }
25 return path.join(dataDir, 'memory', vaultId);
26 }
27
28 /**
29 * Confidence levels for memory verification.
30 * Computed dynamically at read time — never stored on the event.
31 * @type {readonly string[]}
32 */
33 export const MEMORY_CONFIDENCE_LEVELS = Object.freeze(['verified', 'hint', 'stale']);
34
35 /**
36 * Verify a memory event against the current vault state and return a confidence assessment.
37 *
38 * Rules:
39 * - 'stale' — the event references a note path that no longer exists, or has been
40 * modified after the event was recorded (vault content has changed)
41 * - 'verified' — the event references a path that still exists and hasn't changed since
42 * the event timestamp
43 * - 'hint' — the event has no verifiable path reference; treat as context only
44 *
45 * The function never throws. On any filesystem error it returns 'hint'.
46 *
47 * @param {object} config - loadConfig() result (needs config.vault_path)
48 * @param {object} event - memory event object
49 * @returns {{ confidence: 'verified'|'hint'|'stale', reason: string }}
50 */
51 export function verifyMemoryEvent(config, event) {
52 if (!event || typeof event !== 'object') {
53 return { confidence: 'hint', reason: 'invalid event object' };
54 }
55
56 if (event.status === 'failed') {
57 return { confidence: 'stale', reason: 'event recorded a failed operation' };
58 }
59
60 const vaultPath = config?.vault_path;
61 const data = event.data;
62 const eventTs = event.ts;
63
64 const refPath = _extractPathReference(data);
65
66 if (!refPath) {
67 return { confidence: 'hint', reason: 'no verifiable path reference in event data' };
68 }
69
70 if (!vaultPath) {
71 return { confidence: 'hint', reason: 'vault_path not configured, cannot verify' };
72 }
73
74 try {
75 const absPath = path.isAbsolute(refPath)
76 ? refPath
77 : path.join(vaultPath, refPath);
78
79 if (!fs.existsSync(absPath)) {
80 return { confidence: 'stale', reason: `referenced path no longer exists: ${refPath}` };
81 }
82
83 const stat = fs.statSync(absPath);
84 if (eventTs && stat.mtime.toISOString() > eventTs) {
85 return {
86 confidence: 'stale',
87 reason: `referenced path modified after event (file: ${stat.mtime.toISOString()}, event: ${eventTs})`,
88 };
89 }
90
91 return { confidence: 'verified', reason: `path exists and unchanged since event: ${refPath}` };
92 } catch (_) {
93 return { confidence: 'hint', reason: 'could not verify path — filesystem error' };
94 }
95 }
96
97 /**
98 * Extract the most meaningful path reference from event data.
99 * Looks for common keys used by auto-captured events.
100 * @param {object} data
101 * @returns {string|null}
102 */
103 function _extractPathReference(data) {
104 if (!data || typeof data !== 'object') return null;
105 if (typeof data.path === 'string' && data.path) return data.path;
106 if (Array.isArray(data.paths) && data.paths[0]) return data.paths[0];
107 if (Array.isArray(data.exported) && data.exported[0]?.path) return data.exported[0].path;
108 return null;
109 }
110
111 const PRUNE_THROTTLE_MS = 3_600_000; // 1 hour
112 const INDEX_THROTTLE_MS = 10_000; // 10 seconds — avoids rebuilding index on rapid successive stores
113 const INDEX_SUMMARY_CHAR_LIMIT = 120;
114 const INDEX_RECENT_LIMIT = 10;
115
116 /**
117 * Extract a short human-readable summary phrase from an event's data payload.
118 * @param {object} event
119 * @returns {string}
120 */
121 function summarizeEventData(event) {
122 const d = event.data;
123 if (!d || typeof d !== 'object') return '';
124 if (d.query) return String(d.query);
125 if (d.path) return String(d.path);
126 if (d.summary_text) return String(d.summary_text);
127 if (d.format) return `format:${d.format}`;
128 if (d.source) return `source:${d.source}`;
129 if (d.key) return String(d.key);
130 const json = JSON.stringify(d);
131 return json.length > INDEX_SUMMARY_CHAR_LIMIT ? json.slice(0, INDEX_SUMMARY_CHAR_LIMIT) + '…' : json;
132 }
133
134 /**
135 * Truncate a string and append ellipsis if longer than max.
136 * @param {string} s
137 * @param {number} max
138 * @returns {string}
139 */
140 function truncate(s, max) {
141 if (s.length <= max) return s;
142 return s.slice(0, max) + '…';
143 }
144
145 /**
146 * Generate a lightweight markdown memory index from a MemoryManager.
147 * Designed to be cheap enough (~150 chars/line) for agents to always include in context.
148 * Only includes successful events (status !== 'failed').
149 *
150 * @param {MemoryManager} mm
151 * @param {{ recentLimit?: number }} [opts]
152 * @returns {{ markdown: string, generated_at: string, total_events: number, types: string[] }}
153 */
154 export function generateMemoryIndex(mm, opts = {}) {
155 const recentLimit = opts.recentLimit ?? INDEX_RECENT_LIMIT;
156 const stats = mm.stats();
157 const generatedAt = new Date().toISOString();
158
159 if (stats.total === 0) {
160 return {
161 markdown: `# Memory Index\nGenerated: ${generatedAt}\n\n(empty — no memory events recorded yet)\n`,
162 generated_at: generatedAt,
163 total_events: 0,
164 types: [],
165 };
166 }
167
168 const lines = [`# Memory Index`, `Generated: ${generatedAt}`, ''];
169
170 const types = Object.keys(stats.counts_by_type).sort();
171
172 lines.push('## By Type');
173 for (const type of types) {
174 const count = stats.counts_by_type[type];
175 const latest = mm.getLatest(type);
176 if (!latest) {
177 lines.push(`- ${type}: ${count} events`);
178 continue;
179 }
180 if (latest.status === 'failed') {
181 lines.push(`- ${type}: ${count} events (latest failed)`);
182 continue;
183 }
184 const ts = latest.ts.slice(0, 19) + 'Z';
185 const summary = truncate(summarizeEventData(latest), 80);
186 lines.push(`- ${type}: ${count} events, last ${ts} — "${summary}"`);
187 }
188
189 lines.push('');
190 lines.push('## Recent Activity');
191
192 const recent = mm.list({ limit: recentLimit });
193 const successRecent = recent.filter((e) => e.status !== 'failed');
194 if (successRecent.length === 0) {
195 lines.push('(no recent successful events)');
196 } else {
197 for (const e of successRecent) {
198 const ts = e.ts.slice(0, 19) + 'Z';
199 const summary = truncate(summarizeEventData(e), 80);
200 lines.push(`- ${ts} [${e.type}] ${summary}`);
201 }
202 }
203
204 const topics = mm.listTopics();
205 if (topics.length > 0) {
206 lines.push('');
207 lines.push('## Topics');
208 for (const t of topics) {
209 const ts = mm.topicStats(t);
210 lines.push(`- ${t}: ${ts.total} events`);
211 }
212 }
213
214 lines.push('');
215
216 return {
217 markdown: lines.join('\n'),
218 generated_at: generatedAt,
219 total_events: stats.total,
220 types,
221 topics,
222 };
223 }
224
225 export class MemoryManager {
226 #provider;
227 #config;
228 #lastPruneTs;
229 #lastIndexTs;
230 #cachedIndex;
231
232 /**
233 * @param {object} provider — must implement storeEvent, getLatest, listEvents, searchEvents, clearEvents, getStats
234 * @param {{ capture?: string[], retentionDays?: number|null }} [config]
235 */
236 constructor(provider, config = {}) {
237 this.#provider = provider;
238 this.#config = {
239 capture: config.capture || [...DEFAULT_CAPTURE_TYPES],
240 retentionDays: config.retentionDays ?? null,
241 };
242 this.#lastPruneTs = 0;
243 this.#lastIndexTs = 0;
244 this.#cachedIndex = null;
245 }
246
247 get provider() {
248 return this.#provider;
249 }
250
251 get captureTypes() {
252 return this.#config.capture;
253 }
254
255 /**
256 * Whether this event type should be auto-captured based on config.
257 * @param {string} type
258 * @returns {boolean}
259 */
260 shouldCapture(type) {
261 return this.#config.capture.includes(type);
262 }
263
264 /**
265 * Run retention pruning if configured and throttle period has elapsed.
266 * @returns {{ pruned: number }|null}
267 */
268 prune() {
269 const days = this.#config.retentionDays;
270 if (!days || days <= 0) return null;
271 if (typeof this.#provider.pruneExpired !== 'function') return null;
272 const now = Date.now();
273 if (now - this.#lastPruneTs < PRUNE_THROTTLE_MS) return null;
274 this.#lastPruneTs = now;
275 return this.#provider.pruneExpired(days);
276 }
277
278 /**
279 * Store a memory event. Piggybacks retention pruning and index rebuild (both throttled).
280 * @param {string} type
281 * @param {object} data
282 * @param {{ vaultId?: string, ttl?: string|null, airId?: string, status?: 'success'|'failed' }} [opts]
283 * @returns {{ id: string, ts: string }}
284 */
285 store(type, data, opts = {}) {
286 const event = createMemoryEvent(type, data, opts);
287 const result = this.#provider.storeEvent(event);
288 this.prune();
289 this.#maybeRebuildIndex();
290 return result;
291 }
292
293 /**
294 * Get the latest event for a type.
295 * @param {string} type
296 * @returns {object|null}
297 */
298 getLatest(type) {
299 return this.#provider.getLatest(type);
300 }
301
302 /**
303 * List events with optional filters.
304 * @param {{ type?: string, since?: string, until?: string, limit?: number, topic?: string }} [opts]
305 * @returns {object[]}
306 */
307 list(opts = {}) {
308 return this.#provider.listEvents(opts);
309 }
310
311 /**
312 * List all known topic slugs. Only meaningful when topic partitioning is enabled.
313 * @returns {string[]}
314 */
315 listTopics() {
316 if (typeof this.#provider.listTopics === 'function') {
317 return this.#provider.listTopics();
318 }
319 return [];
320 }
321
322 /**
323 * Get statistics for a specific topic.
324 * @param {string} slug
325 * @returns {{ topic: string, total: number, oldest: string|null, newest: string|null }}
326 */
327 topicStats(slug) {
328 if (typeof this.#provider.getTopicStats === 'function') {
329 return this.#provider.getTopicStats(slug);
330 }
331 return { topic: slug, total: 0, oldest: null, newest: null };
332 }
333
334 /**
335 * Semantic search over memory entries (vector/mem0 only).
336 * @param {string} query
337 * @param {{ limit?: number }} [opts]
338 * @returns {object[]}
339 */
340 search(query, opts = {}) {
341 return this.#provider.searchEvents(query, opts);
342 }
343
344 /**
345 * Whether the underlying provider supports semantic search.
346 * @returns {boolean}
347 */
348 supportsSearch() {
349 return typeof this.#provider.supportsSearch === 'function' && this.#provider.supportsSearch();
350 }
351
352 /**
353 * Clear events with optional filters.
354 * @param {{ type?: string, before?: string }} [opts]
355 * @returns {{ cleared: number }}
356 */
357 clear(opts = {}) {
358 const result = this.#provider.clearEvents(opts);
359 this.#cachedIndex = null;
360 return result;
361 }
362
363 /**
364 * Get memory statistics.
365 * @returns {{ counts_by_type: Record<string, number>, total: number, oldest: string|null, newest: string|null, size_bytes: number }}
366 */
367 stats() {
368 return this.#provider.getStats();
369 }
370
371 /**
372 * Generate the lightweight pointer index. Returns cached version if fresh,
373 * otherwise rebuilds. Pass force=true to bypass the cache.
374 * @param {{ force?: boolean, recentLimit?: number }} [opts]
375 * @returns {{ markdown: string, generated_at: string, total_events: number, types: string[] }}
376 */
377 generateIndex(opts = {}) {
378 if (!opts.force && this.#cachedIndex && (Date.now() - this.#lastIndexTs < INDEX_THROTTLE_MS)) {
379 return this.#cachedIndex;
380 }
381 this.#cachedIndex = generateMemoryIndex(this, opts);
382 this.#lastIndexTs = Date.now();
383 return this.#cachedIndex;
384 }
385
386 #maybeRebuildIndex() {
387 const now = Date.now();
388 if (now - this.#lastIndexTs < INDEX_THROTTLE_MS) return;
389 try {
390 this.#cachedIndex = generateMemoryIndex(this);
391 this.#lastIndexTs = now;
392 } catch (_) { /* non-critical */ }
393 }
394 }
395
396 /**
397 * Create a MemoryManager from a Knowtation config object.
398 * For file and mem0 providers this is synchronous.
399 * For vector provider, use createMemoryManagerAsync() instead.
400 * @param {object} config — result of loadConfig()
401 * @param {string} [vaultId]
402 * @returns {MemoryManager}
403 */
404 export function createMemoryManager(config, vaultId = 'default', opts = {}) {
405 const memCfg = config.memory || {};
406 const providerName = memCfg.provider || 'file';
407 const scope = opts.scope || memCfg.scope || 'vault';
408 const baseDir = resolveMemoryDir(config.data_dir, vaultId, { scope });
409 const capture = Array.isArray(memCfg.capture) ? memCfg.capture : [...DEFAULT_CAPTURE_TYPES];
410 const retentionDays = memCfg.retention_days ?? null;
411
412 const encrypt = memCfg.encrypt === true;
413 const secret = memCfg.secret || process.env.KNOWTATION_MEMORY_SECRET || '';
414
415 const topicPartition = memCfg.topic_partition === true;
416 const fileProviderOpts = { topicPartition };
417
418 let provider;
419 if (encrypt && secret) {
420 try {
421 const mod = _encryptedProviderModule;
422 if (mod?.EncryptedFileMemoryProvider) {
423 provider = new mod.EncryptedFileMemoryProvider(baseDir, secret);
424 } else {
425 provider = new FileMemoryProvider(baseDir, fileProviderOpts);
426 }
427 } catch (_) {
428 provider = new FileMemoryProvider(baseDir, fileProviderOpts);
429 }
430 } else if (providerName === 'vector') {
431 try {
432 const mod = _vectorProviderModule;
433 if (mod?.VectorMemoryProvider) {
434 provider = new mod.VectorMemoryProvider(baseDir, config);
435 } else {
436 provider = new FileMemoryProvider(baseDir, fileProviderOpts);
437 }
438 } catch (_) {
439 provider = new FileMemoryProvider(baseDir, fileProviderOpts);
440 }
441 } else if (providerName === 'mem0') {
442 try {
443 const mod = _mem0ProviderModule;
444 if (mod?.Mem0MemoryProvider) {
445 provider = new mod.Mem0MemoryProvider(baseDir, { url: memCfg.url, apiKey: memCfg.api_key });
446 } else {
447 provider = new FileMemoryProvider(baseDir, fileProviderOpts);
448 }
449 } catch (_) {
450 provider = new FileMemoryProvider(baseDir, fileProviderOpts);
451 }
452 } else if (providerName === 'supabase') {
453 try {
454 const mod = _supabaseProviderModule;
455 if (mod?.SupabaseMemoryProvider) {
456 const sbUrl = memCfg.supabase_url || process.env.KNOWTATION_SUPABASE_URL || '';
457 const sbKey = memCfg.supabase_key || process.env.KNOWTATION_SUPABASE_KEY || '';
458 provider = new mod.SupabaseMemoryProvider(baseDir, { url: sbUrl, key: sbKey, vaultId });
459 } else {
460 provider = new FileMemoryProvider(baseDir, fileProviderOpts);
461 }
462 } catch (_) {
463 provider = new FileMemoryProvider(baseDir, fileProviderOpts);
464 }
465 } else {
466 provider = new FileMemoryProvider(baseDir, fileProviderOpts);
467 }
468
469 return new MemoryManager(provider, { capture, retentionDays });
470 }
471
472 let _vectorProviderModule = null;
473 let _mem0ProviderModule = null;
474 let _encryptedProviderModule = null;
475 let _supabaseProviderModule = null;
476
477 /**
478 * Async version that dynamically imports provider modules before creating.
479 * @param {object} config
480 * @param {string} [vaultId]
481 * @returns {Promise<MemoryManager>}
482 */
483 export async function createMemoryManagerAsync(config, vaultId = 'default', opts = {}) {
484 const memCfg = config.memory || {};
485 if (memCfg.encrypt && !_encryptedProviderModule) {
486 try {
487 _encryptedProviderModule = await import('./memory-provider-encrypted.mjs');
488 } catch (_) {}
489 }
490 if (memCfg.provider === 'vector' && !_vectorProviderModule) {
491 try {
492 _vectorProviderModule = await import('./memory-provider-vector.mjs');
493 } catch (_) {}
494 }
495 if (memCfg.provider === 'mem0' && !_mem0ProviderModule) {
496 try {
497 _mem0ProviderModule = await import('./memory-provider-mem0.mjs');
498 } catch (_) {}
499 }
500 if (memCfg.provider === 'supabase' && !_supabaseProviderModule) {
501 try {
502 _supabaseProviderModule = await import('./memory-provider-supabase.mjs');
503 } catch (_) {}
504 }
505 return createMemoryManager(config, vaultId, opts);
506 }
507
508 /**
509 * Map legacy keys (last_search, last_export) to event types.
510 * @param {string} key
511 * @returns {string}
512 */
513 function legacyKeyToType(key) {
514 if (key === 'last_search') return 'search';
515 if (key === 'last_export') return 'export';
516 return key;
517 }
518
519 /**
520 * Backward-compatible: store a value under a key. Delegates to file provider + legacy file.
521 * @param {string} dataDir
522 * @param {string} key
523 * @param {object} value
524 */
525 export function storeMemory(dataDir, key, value) {
526 try {
527 const baseDir = resolveMemoryDir(dataDir);
528 const provider = new FileMemoryProvider(baseDir);
529 const type = legacyKeyToType(key);
530 const event = createMemoryEvent(type, value);
531 provider.storeEvent(event);
532
533 writeLegacyMemoryJson(dataDir, key, value);
534 } catch (e) {
535 console.error('knowtation: memory store failed:', e.message);
536 }
537 }
538
539 /**
540 * Backward-compatible: read a value by key. Returns null on miss.
541 * @param {string} dataDir
542 * @param {string} key
543 * @returns {object|null}
544 */
545 export function getMemory(dataDir, key) {
546 try {
547 const type = legacyKeyToType(key);
548 const baseDir = resolveMemoryDir(dataDir);
549 const provider = new FileMemoryProvider(baseDir);
550 const event = provider.getLatest(type);
551 if (!event) {
552 return readLegacyMemoryJson(dataDir, key);
553 }
554 return { ...event.data, _at: event.ts };
555 } catch (_) {
556 return readLegacyMemoryJson(dataDir, key);
557 }
558 }
559
560 function writeLegacyMemoryJson(dataDir, key, value) {
561 try {
562 const filePath = path.join(dataDir, 'memory.json');
563 let data = {};
564 if (fs.existsSync(filePath)) {
565 try { data = JSON.parse(fs.readFileSync(filePath, 'utf8')); } catch (_) {}
566 }
567 data[key] = { ...value, _at: new Date().toISOString() };
568 fs.mkdirSync(path.dirname(filePath), { recursive: true });
569 fs.writeFileSync(filePath, JSON.stringify(data, null, 2), 'utf8');
570 } catch (_) {}
571 }
572
573 function readLegacyMemoryJson(dataDir, key) {
574 try {
575 const filePath = path.join(dataDir, 'memory.json');
576 if (!fs.existsSync(filePath)) return null;
577 const data = JSON.parse(fs.readFileSync(filePath, 'utf8'));
578 return data[key] ?? null;
579 } catch (_) {
580 return null;
581 }
582 }
File History 2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor 2 days ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 2 days ago