vector-store-sqlite-content-hash.test.mjs
274 lines 10.2 KB
Raw
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 1 day ago
1 /**
2 * Tests for the `+content_hash` cache surface on `lib/vector-store-sqlite.mjs`:
3 * - upsert persists `content_hash` and `chunk_id` alongside the integer primary key
4 * - getChunkHashes(vaultId) returns Map<chunk_id, content_hash> scoped to the vault
5 * - deleteByChunkIds(chunkIds) removes rows by string chunk_id (internally hashed)
6 * - cross-vault isolation: getChunkHashes('A') never sees rows from vault B
7 * - rows with empty content_hash are skipped (defensive)
8 * - ensureCollection migration: legacy table without content_hash → drop + recreate
9 */
10
11 import { describe, it, before, after } from 'node:test';
12 import assert from 'node:assert/strict';
13 import path from 'node:path';
14 import fs from 'node:fs';
15 import { fileURLToPath } from 'node:url';
16 import Database from 'better-sqlite3';
17 import * as sqliteVec from 'sqlite-vec';
18 import { createSqliteVectorStore } from '../lib/vector-store-sqlite.mjs';
19
20 const __dirname = path.dirname(fileURLToPath(import.meta.url));
21 const testRoot = path.join(__dirname, 'fixtures', 'tmp-content-hash');
22
23 function freshDir(name) {
24 const dir = path.join(testRoot, name);
25 if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true });
26 fs.mkdirSync(dir, { recursive: true });
27 return dir;
28 }
29
30 function pointFor({ id, vault, vector, hash, text = 't', pathStr = 'a.md' }) {
31 return {
32 id,
33 vector,
34 text,
35 path: pathStr,
36 vault_id: vault,
37 project: null,
38 tags: [],
39 date: null,
40 causal_chain_id: null,
41 entity: [],
42 episode_id: null,
43 content_hash: hash,
44 };
45 }
46
47 describe('vector-store-sqlite — content_hash cache surface', () => {
48 before(() => {
49 if (fs.existsSync(testRoot)) fs.rmSync(testRoot, { recursive: true, force: true });
50 fs.mkdirSync(testRoot, { recursive: true });
51 });
52
53 after(() => {
54 if (fs.existsSync(testRoot)) {
55 try {
56 fs.rmSync(testRoot, { recursive: true, force: true });
57 } catch (_) {}
58 }
59 });
60
61 it('ensureCollection creates a table that includes content_hash + chunk_id columns', () => {
62 const dir = freshDir('ensure');
63 const store = createSqliteVectorStore({ data_dir: dir });
64 return store.ensureCollection(3).then(() => {
65 const db = new Database(path.join(dir, 'knowtation_vectors.db'));
66 sqliteVec.load(db);
67 const row = db
68 .prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name='knowtation_vec'")
69 .get();
70 db.close();
71 store.close();
72 assert.ok(row && row.sql);
73 assert.match(row.sql, /content_hash/);
74 assert.match(row.sql, /chunk_id/);
75 });
76 });
77
78 it('upsert persists content_hash and chunk_id; getChunkHashes returns them keyed by chunk_id', async () => {
79 const dir = freshDir('upsert');
80 const store = createSqliteVectorStore({ data_dir: dir });
81 await store.ensureCollection(2);
82 await store.upsert([
83 pointFor({ id: 'vA::path/a_0', vault: 'vA', vector: [0.1, 0.2], hash: 'v1:abcd' }),
84 pointFor({ id: 'vA::path/b_0', vault: 'vA', vector: [0.3, 0.4], hash: 'v1:efgh' }),
85 ]);
86 const hashes = await store.getChunkHashes('vA');
87 assert.equal(hashes.size, 2);
88 assert.equal(hashes.get('vA::path/a_0'), 'v1:abcd');
89 assert.equal(hashes.get('vA::path/b_0'), 'v1:efgh');
90 store.close();
91 });
92
93 it('getChunkHashes is vault-scoped (no cross-vault leak)', async () => {
94 const dir = freshDir('vault-scope');
95 const store = createSqliteVectorStore({ data_dir: dir });
96 await store.ensureCollection(2);
97 await store.upsert([
98 pointFor({ id: 'vA::a_0', vault: 'vA', vector: [1, 0], hash: 'v1:aaaa' }),
99 pointFor({ id: 'vB::a_0', vault: 'vB', vector: [0, 1], hash: 'v1:bbbb' }),
100 ]);
101 const a = await store.getChunkHashes('vA');
102 const b = await store.getChunkHashes('vB');
103 assert.equal(a.size, 1);
104 assert.equal(a.get('vA::a_0'), 'v1:aaaa');
105 assert.equal(b.size, 1);
106 assert.equal(b.get('vB::a_0'), 'v1:bbbb');
107 store.close();
108 });
109
110 it('getChunkHashes returns empty Map when collection is missing or vaultId is empty', async () => {
111 const dir = freshDir('empty');
112 const store = createSqliteVectorStore({ data_dir: dir });
113 const empty = await store.getChunkHashes('vA');
114 assert.equal(empty.size, 0);
115 await store.ensureCollection(2);
116 const stillEmpty = await store.getChunkHashes('');
117 assert.equal(stillEmpty.size, 0);
118 const stillEmpty2 = await store.getChunkHashes(null);
119 assert.equal(stillEmpty2.size, 0);
120 store.close();
121 });
122
123 it('rows written without content_hash are skipped by getChunkHashes (defensive)', async () => {
124 const dir = freshDir('no-hash');
125 const store = createSqliteVectorStore({ data_dir: dir });
126 await store.ensureCollection(2);
127 await store.upsert([
128 pointFor({ id: 'vA::with_0', vault: 'vA', vector: [1, 0], hash: 'v1:has' }),
129 pointFor({ id: 'vA::without_0', vault: 'vA', vector: [0, 1], hash: undefined }),
130 ]);
131 const hashes = await store.getChunkHashes('vA');
132 assert.equal(hashes.size, 1);
133 assert.equal(hashes.get('vA::with_0'), 'v1:has');
134 assert.equal(hashes.has('vA::without_0'), false);
135 store.close();
136 });
137
138 it('deleteByChunkIds removes the named rows and returns the deleted count', async () => {
139 const dir = freshDir('delete');
140 const store = createSqliteVectorStore({ data_dir: dir });
141 await store.ensureCollection(2);
142 await store.upsert([
143 pointFor({ id: 'vA::keep_0', vault: 'vA', vector: [1, 0], hash: 'v1:k' }),
144 pointFor({ id: 'vA::drop_0', vault: 'vA', vector: [0, 1], hash: 'v1:d1' }),
145 pointFor({ id: 'vA::drop_1', vault: 'vA', vector: [1, 1], hash: 'v1:d2' }),
146 ]);
147 assert.equal(await store.count(), 3);
148 const deleted = await store.deleteByChunkIds(['vA::drop_0', 'vA::drop_1']);
149 assert.equal(deleted, 2);
150 assert.equal(await store.count(), 1);
151 const hashes = await store.getChunkHashes('vA');
152 assert.equal(hashes.size, 1);
153 assert.ok(hashes.has('vA::keep_0'));
154 store.close();
155 });
156
157 it('deleteByChunkIds returns 0 for empty/invalid input and missing collection', async () => {
158 const dir = freshDir('delete-empty');
159 const store = createSqliteVectorStore({ data_dir: dir });
160 assert.equal(await store.deleteByChunkIds([]), 0);
161 assert.equal(await store.deleteByChunkIds(null), 0);
162 await store.ensureCollection(2);
163 assert.equal(await store.deleteByChunkIds(['', null, undefined]), 0);
164 store.close();
165 });
166
167 it('default config still throws on dimension mismatch (CLI safety preserved)', async () => {
168 const dir = freshDir('dim-throw');
169 const store = createSqliteVectorStore({ data_dir: dir });
170 await store.ensureCollection(3);
171 await assert.rejects(
172 () => store.ensureCollection(5),
173 /dimension mismatch/,
174 'default (no allow_dimension_migration) must throw — protects CLI users from accidental EMBEDDING_PROVIDER swaps',
175 );
176 store.close();
177 });
178
179 it('with allow_dimension_migration: true, ensureCollection drops + recreates at the new dimension', async () => {
180 const dir = freshDir('dim-migrate');
181 const store = createSqliteVectorStore({
182 data_dir: dir,
183 allow_dimension_migration: true,
184 });
185 await store.ensureCollection(3);
186 await store.upsert([
187 pointFor({ id: 'vA::a_0', vault: 'vA', vector: [1, 0, 0], hash: 'v1:openai:m:abc' }),
188 ]);
189 assert.equal(await store.count(), 1);
190
191 // Capture the warn so the migration is auditable in production logs.
192 const originalWarn = console.warn;
193 const warnings = [];
194 console.warn = (...args) => warnings.push(args.join(' '));
195 try {
196 await store.ensureCollection(5);
197 } finally {
198 console.warn = originalWarn;
199 }
200 assert.equal(await store.count(), 0, 'migration must wipe vectors at the old dimension');
201 assert.ok(
202 warnings.some((w) => /dimension migration/i.test(w)),
203 'must console.warn so the operator can see why all vectors were wiped',
204 );
205
206 // New dimension is in effect — upsert at dim=5 succeeds.
207 await store.upsert([
208 pointFor({ id: 'vA::b_0', vault: 'vA', vector: [1, 0, 0, 0, 0], hash: 'v1:deepinfra:m:def' }),
209 ]);
210 assert.equal(await store.count(), 1);
211 store.close();
212 });
213
214 it('allow_dimension_migration only triggers when dimensions actually differ (no-op when same)', async () => {
215 const dir = freshDir('dim-noop');
216 const store = createSqliteVectorStore({
217 data_dir: dir,
218 allow_dimension_migration: true,
219 });
220 await store.ensureCollection(4);
221 await store.upsert([
222 pointFor({ id: 'vA::keep_0', vault: 'vA', vector: [1, 0, 0, 0], hash: 'v1:p:m:keep' }),
223 ]);
224 const originalWarn = console.warn;
225 const warnings = [];
226 console.warn = (...args) => warnings.push(args.join(' '));
227 try {
228 await store.ensureCollection(4);
229 } finally {
230 console.warn = originalWarn;
231 }
232 assert.equal(await store.count(), 1, 'same dimension must not wipe rows');
233 assert.equal(
234 warnings.filter((w) => /dimension migration/i.test(w)).length,
235 0,
236 'must not warn when dimensions match',
237 );
238 store.close();
239 });
240
241 it('ensureCollection migrates a legacy table that lacks content_hash by dropping + recreating', async () => {
242 const dir = freshDir('migrate');
243 const dbPath = path.join(dir, 'knowtation_vectors.db');
244 // Hand-build a legacy table without content_hash/chunk_id (mirrors pre-PR schema).
245 const db = new Database(dbPath);
246 sqliteVec.load(db);
247 db.exec(`CREATE VIRTUAL TABLE knowtation_vec USING vec0(
248 id INTEGER PRIMARY KEY,
249 embedding FLOAT[2],
250 path TEXT,
251 project TEXT,
252 date TEXT,
253 causal_chain_id TEXT,
254 episode_id TEXT,
255 +vault_id TEXT,
256 +tags TEXT,
257 +entity TEXT,
258 +chunk_text TEXT
259 )`);
260 db.close();
261 const store = createSqliteVectorStore({ data_dir: dir });
262 await store.ensureCollection(2);
263 // After migration, getChunkHashes works without throwing (column now exists).
264 const hashes = await store.getChunkHashes('vA');
265 assert.equal(hashes.size, 0);
266 // And new upserts populate content_hash correctly.
267 await store.upsert([
268 pointFor({ id: 'vA::x_0', vault: 'vA', vector: [1, 0], hash: 'v1:xxxx' }),
269 ]);
270 const after = await store.getChunkHashes('vA');
271 assert.equal(after.get('vA::x_0'), 'v1:xxxx');
272 store.close();
273 });
274 });
File History 2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor 1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 1 day ago