memory-provider-vector.mjs
165 lines 4.8 KB
Raw
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 2 days ago
1 /**
2 * Vector-backed memory provider: extends file provider with embedding-based semantic search.
3 * Uses the existing Knowtation embedding + vector store infrastructure.
4 * Phase 8 Memory Augmentation.
5 *
6 * Storage:
7 * - File layer (events.jsonl + state.json) via FileMemoryProvider for durability
8 * - Vector index: embeddings of event data stored in a `_memory` collection/table
9 */
10
11 import { FileMemoryProvider } from './memory-provider-file.mjs';
12
13 export class VectorMemoryProvider {
14 #fileProvider;
15 #config;
16 #vectorStore;
17 #initialized;
18
19 /**
20 * @param {string} baseDir — memory directory for one vault
21 * @param {object} config — full loadConfig() result (needs embedding, vector_store, data_dir)
22 */
23 constructor(baseDir, config) {
24 this.#fileProvider = new FileMemoryProvider(baseDir);
25 this.#config = config;
26 this.#vectorStore = null;
27 this.#initialized = false;
28 }
29
30 get baseDir() {
31 return this.#fileProvider.baseDir;
32 }
33
34 async #ensureVectorStore() {
35 if (this.#initialized) return this.#vectorStore;
36 try {
37 const { createVectorStore } = await import('./vector-store.mjs');
38 const { embeddingDimension } = await import('./embedding.mjs');
39 const memConfig = {
40 ...this.#config,
41 data_dir: this.#fileProvider.baseDir,
42 };
43 this.#vectorStore = await createVectorStore(memConfig);
44 const dim = embeddingDimension(this.#config.embedding);
45 await this.#vectorStore.ensureCollection(dim);
46 } catch (e) {
47 console.error('knowtation: memory vector store init failed:', e.message);
48 this.#vectorStore = null;
49 }
50 this.#initialized = true;
51 return this.#vectorStore;
52 }
53
54 async #embedText(text) {
55 try {
56 const { embed } = await import('./embedding.mjs');
57 const [vector] = await embed([text], this.#config.embedding, { voyageInputType: 'document' });
58 return vector;
59 } catch (_) {
60 return null;
61 }
62 }
63
64 /**
65 * Build a text representation of an event for embedding.
66 * @param {object} event
67 * @returns {string}
68 */
69 #eventToText(event) {
70 const parts = [event.type];
71 const d = event.data;
72 if (d.query) parts.push(d.query);
73 if (d.key) parts.push(d.key);
74 if (d.text) parts.push(d.text);
75 if (d.path) parts.push(d.path);
76 if (d.source_type) parts.push(d.source_type);
77 if (d.operation) parts.push(d.operation);
78 if (d.summary_text) parts.push(d.summary_text);
79 if (d.error_message) parts.push(d.error_message);
80 const extra = JSON.stringify(d);
81 if (extra.length < 500) parts.push(extra);
82 return parts.join(' ').slice(0, 2000);
83 }
84
85 /**
86 * Store event in file log (sync) and embed + index in vector store (background).
87 * Returns synchronously after the file write; vector indexing is best-effort.
88 * @param {object} event
89 * @returns {{ id: string, ts: string }}
90 */
91 storeEvent(event) {
92 const result = this.#fileProvider.storeEvent(event);
93 this.#indexEventAsync(event).catch(() => {});
94 return result;
95 }
96
97 async #indexEventAsync(event) {
98 try {
99 const vs = await this.#ensureVectorStore();
100 if (vs) {
101 const text = this.#eventToText(event);
102 const vector = await this.#embedText(text);
103 if (vector) {
104 await vs.upsert([{
105 id: event.id,
106 vector,
107 payload: { id: event.id, type: event.type, ts: event.ts, text },
108 }]);
109 }
110 }
111 } catch (_) {}
112 }
113
114 getLatest(type) {
115 return this.#fileProvider.getLatest(type);
116 }
117
118 listEvents(opts) {
119 return this.#fileProvider.listEvents(opts);
120 }
121
122 /**
123 * Semantic search over memory events using vector similarity.
124 * @param {string} query
125 * @param {{ limit?: number }} [opts]
126 * @returns {Promise<object[]>}
127 */
128 async searchEvents(query, opts = {}) {
129 const limit = opts.limit ?? 10;
130 try {
131 const vs = await this.#ensureVectorStore();
132 if (!vs) return [];
133 const queryVector = await this.#embedText(query);
134 if (!queryVector) return [];
135 const results = await vs.search(queryVector, { limit });
136 const events = this.#fileProvider.listEvents({ limit: 10000 });
137 const eventMap = new Map(events.map((e) => [e.id, e]));
138 return results
139 .map((r) => {
140 const event = eventMap.get(r.id || r.path);
141 if (event) return { ...event, score: r.score };
142 return { id: r.id || r.path, type: r.payload?.type, ts: r.payload?.ts, data: {}, score: r.score };
143 })
144 .slice(0, limit);
145 } catch (_) {
146 return [];
147 }
148 }
149
150 supportsSearch() {
151 return true;
152 }
153
154 clearEvents(opts) {
155 return this.#fileProvider.clearEvents(opts);
156 }
157
158 pruneExpired(retentionDays) {
159 return this.#fileProvider.pruneExpired(retentionDays);
160 }
161
162 getStats() {
163 return this.#fileProvider.getStats();
164 }
165 }
File History 2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor 2 days ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 2 days ago