vector-store-sqlite-content-hash.test.mjs
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠ breaking
1 day ago
| 1 | /** |
| 2 | * Tests for the `+content_hash` cache surface on `lib/vector-store-sqlite.mjs`: |
| 3 | * - upsert persists `content_hash` and `chunk_id` alongside the integer primary key |
| 4 | * - getChunkHashes(vaultId) returns Map<chunk_id, content_hash> scoped to the vault |
| 5 | * - deleteByChunkIds(chunkIds) removes rows by string chunk_id (internally hashed) |
| 6 | * - cross-vault isolation: getChunkHashes('A') never sees rows from vault B |
| 7 | * - rows with empty content_hash are skipped (defensive) |
| 8 | * - ensureCollection migration: legacy table without content_hash → drop + recreate |
| 9 | */ |
| 10 | |
| 11 | import { describe, it, before, after } from 'node:test'; |
| 12 | import assert from 'node:assert/strict'; |
| 13 | import path from 'node:path'; |
| 14 | import fs from 'node:fs'; |
| 15 | import { fileURLToPath } from 'node:url'; |
| 16 | import Database from 'better-sqlite3'; |
| 17 | import * as sqliteVec from 'sqlite-vec'; |
| 18 | import { createSqliteVectorStore } from '../lib/vector-store-sqlite.mjs'; |
| 19 | |
| 20 | const __dirname = path.dirname(fileURLToPath(import.meta.url)); |
| 21 | const testRoot = path.join(__dirname, 'fixtures', 'tmp-content-hash'); |
| 22 | |
| 23 | function freshDir(name) { |
| 24 | const dir = path.join(testRoot, name); |
| 25 | if (fs.existsSync(dir)) fs.rmSync(dir, { recursive: true, force: true }); |
| 26 | fs.mkdirSync(dir, { recursive: true }); |
| 27 | return dir; |
| 28 | } |
| 29 | |
| 30 | function pointFor({ id, vault, vector, hash, text = 't', pathStr = 'a.md' }) { |
| 31 | return { |
| 32 | id, |
| 33 | vector, |
| 34 | text, |
| 35 | path: pathStr, |
| 36 | vault_id: vault, |
| 37 | project: null, |
| 38 | tags: [], |
| 39 | date: null, |
| 40 | causal_chain_id: null, |
| 41 | entity: [], |
| 42 | episode_id: null, |
| 43 | content_hash: hash, |
| 44 | }; |
| 45 | } |
| 46 | |
| 47 | describe('vector-store-sqlite — content_hash cache surface', () => { |
| 48 | before(() => { |
| 49 | if (fs.existsSync(testRoot)) fs.rmSync(testRoot, { recursive: true, force: true }); |
| 50 | fs.mkdirSync(testRoot, { recursive: true }); |
| 51 | }); |
| 52 | |
| 53 | after(() => { |
| 54 | if (fs.existsSync(testRoot)) { |
| 55 | try { |
| 56 | fs.rmSync(testRoot, { recursive: true, force: true }); |
| 57 | } catch (_) {} |
| 58 | } |
| 59 | }); |
| 60 | |
| 61 | it('ensureCollection creates a table that includes content_hash + chunk_id columns', () => { |
| 62 | const dir = freshDir('ensure'); |
| 63 | const store = createSqliteVectorStore({ data_dir: dir }); |
| 64 | return store.ensureCollection(3).then(() => { |
| 65 | const db = new Database(path.join(dir, 'knowtation_vectors.db')); |
| 66 | sqliteVec.load(db); |
| 67 | const row = db |
| 68 | .prepare("SELECT sql FROM sqlite_master WHERE type='table' AND name='knowtation_vec'") |
| 69 | .get(); |
| 70 | db.close(); |
| 71 | store.close(); |
| 72 | assert.ok(row && row.sql); |
| 73 | assert.match(row.sql, /content_hash/); |
| 74 | assert.match(row.sql, /chunk_id/); |
| 75 | }); |
| 76 | }); |
| 77 | |
| 78 | it('upsert persists content_hash and chunk_id; getChunkHashes returns them keyed by chunk_id', async () => { |
| 79 | const dir = freshDir('upsert'); |
| 80 | const store = createSqliteVectorStore({ data_dir: dir }); |
| 81 | await store.ensureCollection(2); |
| 82 | await store.upsert([ |
| 83 | pointFor({ id: 'vA::path/a_0', vault: 'vA', vector: [0.1, 0.2], hash: 'v1:abcd' }), |
| 84 | pointFor({ id: 'vA::path/b_0', vault: 'vA', vector: [0.3, 0.4], hash: 'v1:efgh' }), |
| 85 | ]); |
| 86 | const hashes = await store.getChunkHashes('vA'); |
| 87 | assert.equal(hashes.size, 2); |
| 88 | assert.equal(hashes.get('vA::path/a_0'), 'v1:abcd'); |
| 89 | assert.equal(hashes.get('vA::path/b_0'), 'v1:efgh'); |
| 90 | store.close(); |
| 91 | }); |
| 92 | |
| 93 | it('getChunkHashes is vault-scoped (no cross-vault leak)', async () => { |
| 94 | const dir = freshDir('vault-scope'); |
| 95 | const store = createSqliteVectorStore({ data_dir: dir }); |
| 96 | await store.ensureCollection(2); |
| 97 | await store.upsert([ |
| 98 | pointFor({ id: 'vA::a_0', vault: 'vA', vector: [1, 0], hash: 'v1:aaaa' }), |
| 99 | pointFor({ id: 'vB::a_0', vault: 'vB', vector: [0, 1], hash: 'v1:bbbb' }), |
| 100 | ]); |
| 101 | const a = await store.getChunkHashes('vA'); |
| 102 | const b = await store.getChunkHashes('vB'); |
| 103 | assert.equal(a.size, 1); |
| 104 | assert.equal(a.get('vA::a_0'), 'v1:aaaa'); |
| 105 | assert.equal(b.size, 1); |
| 106 | assert.equal(b.get('vB::a_0'), 'v1:bbbb'); |
| 107 | store.close(); |
| 108 | }); |
| 109 | |
| 110 | it('getChunkHashes returns empty Map when collection is missing or vaultId is empty', async () => { |
| 111 | const dir = freshDir('empty'); |
| 112 | const store = createSqliteVectorStore({ data_dir: dir }); |
| 113 | const empty = await store.getChunkHashes('vA'); |
| 114 | assert.equal(empty.size, 0); |
| 115 | await store.ensureCollection(2); |
| 116 | const stillEmpty = await store.getChunkHashes(''); |
| 117 | assert.equal(stillEmpty.size, 0); |
| 118 | const stillEmpty2 = await store.getChunkHashes(null); |
| 119 | assert.equal(stillEmpty2.size, 0); |
| 120 | store.close(); |
| 121 | }); |
| 122 | |
| 123 | it('rows written without content_hash are skipped by getChunkHashes (defensive)', async () => { |
| 124 | const dir = freshDir('no-hash'); |
| 125 | const store = createSqliteVectorStore({ data_dir: dir }); |
| 126 | await store.ensureCollection(2); |
| 127 | await store.upsert([ |
| 128 | pointFor({ id: 'vA::with_0', vault: 'vA', vector: [1, 0], hash: 'v1:has' }), |
| 129 | pointFor({ id: 'vA::without_0', vault: 'vA', vector: [0, 1], hash: undefined }), |
| 130 | ]); |
| 131 | const hashes = await store.getChunkHashes('vA'); |
| 132 | assert.equal(hashes.size, 1); |
| 133 | assert.equal(hashes.get('vA::with_0'), 'v1:has'); |
| 134 | assert.equal(hashes.has('vA::without_0'), false); |
| 135 | store.close(); |
| 136 | }); |
| 137 | |
| 138 | it('deleteByChunkIds removes the named rows and returns the deleted count', async () => { |
| 139 | const dir = freshDir('delete'); |
| 140 | const store = createSqliteVectorStore({ data_dir: dir }); |
| 141 | await store.ensureCollection(2); |
| 142 | await store.upsert([ |
| 143 | pointFor({ id: 'vA::keep_0', vault: 'vA', vector: [1, 0], hash: 'v1:k' }), |
| 144 | pointFor({ id: 'vA::drop_0', vault: 'vA', vector: [0, 1], hash: 'v1:d1' }), |
| 145 | pointFor({ id: 'vA::drop_1', vault: 'vA', vector: [1, 1], hash: 'v1:d2' }), |
| 146 | ]); |
| 147 | assert.equal(await store.count(), 3); |
| 148 | const deleted = await store.deleteByChunkIds(['vA::drop_0', 'vA::drop_1']); |
| 149 | assert.equal(deleted, 2); |
| 150 | assert.equal(await store.count(), 1); |
| 151 | const hashes = await store.getChunkHashes('vA'); |
| 152 | assert.equal(hashes.size, 1); |
| 153 | assert.ok(hashes.has('vA::keep_0')); |
| 154 | store.close(); |
| 155 | }); |
| 156 | |
| 157 | it('deleteByChunkIds returns 0 for empty/invalid input and missing collection', async () => { |
| 158 | const dir = freshDir('delete-empty'); |
| 159 | const store = createSqliteVectorStore({ data_dir: dir }); |
| 160 | assert.equal(await store.deleteByChunkIds([]), 0); |
| 161 | assert.equal(await store.deleteByChunkIds(null), 0); |
| 162 | await store.ensureCollection(2); |
| 163 | assert.equal(await store.deleteByChunkIds(['', null, undefined]), 0); |
| 164 | store.close(); |
| 165 | }); |
| 166 | |
| 167 | it('default config still throws on dimension mismatch (CLI safety preserved)', async () => { |
| 168 | const dir = freshDir('dim-throw'); |
| 169 | const store = createSqliteVectorStore({ data_dir: dir }); |
| 170 | await store.ensureCollection(3); |
| 171 | await assert.rejects( |
| 172 | () => store.ensureCollection(5), |
| 173 | /dimension mismatch/, |
| 174 | 'default (no allow_dimension_migration) must throw — protects CLI users from accidental EMBEDDING_PROVIDER swaps', |
| 175 | ); |
| 176 | store.close(); |
| 177 | }); |
| 178 | |
| 179 | it('with allow_dimension_migration: true, ensureCollection drops + recreates at the new dimension', async () => { |
| 180 | const dir = freshDir('dim-migrate'); |
| 181 | const store = createSqliteVectorStore({ |
| 182 | data_dir: dir, |
| 183 | allow_dimension_migration: true, |
| 184 | }); |
| 185 | await store.ensureCollection(3); |
| 186 | await store.upsert([ |
| 187 | pointFor({ id: 'vA::a_0', vault: 'vA', vector: [1, 0, 0], hash: 'v1:openai:m:abc' }), |
| 188 | ]); |
| 189 | assert.equal(await store.count(), 1); |
| 190 | |
| 191 | // Capture the warn so the migration is auditable in production logs. |
| 192 | const originalWarn = console.warn; |
| 193 | const warnings = []; |
| 194 | console.warn = (...args) => warnings.push(args.join(' ')); |
| 195 | try { |
| 196 | await store.ensureCollection(5); |
| 197 | } finally { |
| 198 | console.warn = originalWarn; |
| 199 | } |
| 200 | assert.equal(await store.count(), 0, 'migration must wipe vectors at the old dimension'); |
| 201 | assert.ok( |
| 202 | warnings.some((w) => /dimension migration/i.test(w)), |
| 203 | 'must console.warn so the operator can see why all vectors were wiped', |
| 204 | ); |
| 205 | |
| 206 | // New dimension is in effect — upsert at dim=5 succeeds. |
| 207 | await store.upsert([ |
| 208 | pointFor({ id: 'vA::b_0', vault: 'vA', vector: [1, 0, 0, 0, 0], hash: 'v1:deepinfra:m:def' }), |
| 209 | ]); |
| 210 | assert.equal(await store.count(), 1); |
| 211 | store.close(); |
| 212 | }); |
| 213 | |
| 214 | it('allow_dimension_migration only triggers when dimensions actually differ (no-op when same)', async () => { |
| 215 | const dir = freshDir('dim-noop'); |
| 216 | const store = createSqliteVectorStore({ |
| 217 | data_dir: dir, |
| 218 | allow_dimension_migration: true, |
| 219 | }); |
| 220 | await store.ensureCollection(4); |
| 221 | await store.upsert([ |
| 222 | pointFor({ id: 'vA::keep_0', vault: 'vA', vector: [1, 0, 0, 0], hash: 'v1:p:m:keep' }), |
| 223 | ]); |
| 224 | const originalWarn = console.warn; |
| 225 | const warnings = []; |
| 226 | console.warn = (...args) => warnings.push(args.join(' ')); |
| 227 | try { |
| 228 | await store.ensureCollection(4); |
| 229 | } finally { |
| 230 | console.warn = originalWarn; |
| 231 | } |
| 232 | assert.equal(await store.count(), 1, 'same dimension must not wipe rows'); |
| 233 | assert.equal( |
| 234 | warnings.filter((w) => /dimension migration/i.test(w)).length, |
| 235 | 0, |
| 236 | 'must not warn when dimensions match', |
| 237 | ); |
| 238 | store.close(); |
| 239 | }); |
| 240 | |
| 241 | it('ensureCollection migrates a legacy table that lacks content_hash by dropping + recreating', async () => { |
| 242 | const dir = freshDir('migrate'); |
| 243 | const dbPath = path.join(dir, 'knowtation_vectors.db'); |
| 244 | // Hand-build a legacy table without content_hash/chunk_id (mirrors pre-PR schema). |
| 245 | const db = new Database(dbPath); |
| 246 | sqliteVec.load(db); |
| 247 | db.exec(`CREATE VIRTUAL TABLE knowtation_vec USING vec0( |
| 248 | id INTEGER PRIMARY KEY, |
| 249 | embedding FLOAT[2], |
| 250 | path TEXT, |
| 251 | project TEXT, |
| 252 | date TEXT, |
| 253 | causal_chain_id TEXT, |
| 254 | episode_id TEXT, |
| 255 | +vault_id TEXT, |
| 256 | +tags TEXT, |
| 257 | +entity TEXT, |
| 258 | +chunk_text TEXT |
| 259 | )`); |
| 260 | db.close(); |
| 261 | const store = createSqliteVectorStore({ data_dir: dir }); |
| 262 | await store.ensureCollection(2); |
| 263 | // After migration, getChunkHashes works without throwing (column now exists). |
| 264 | const hashes = await store.getChunkHashes('vA'); |
| 265 | assert.equal(hashes.size, 0); |
| 266 | // And new upserts populate content_hash correctly. |
| 267 | await store.upsert([ |
| 268 | pointFor({ id: 'vA::x_0', vault: 'vA', vector: [1, 0], hash: 'v1:xxxx' }), |
| 269 | ]); |
| 270 | const after = await store.getChunkHashes('vA'); |
| 271 | assert.equal(after.get('vA::x_0'), 'v1:xxxx'); |
| 272 | store.close(); |
| 273 | }); |
| 274 | }); |
File History
2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠
1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6
docs: accept Calendar Events v0 spec with Phase 0 security …
Human
1 day ago