index-partition.test.mjs
150 lines 5.8 KB
Raw
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 3 days ago
1 /**
2 * Tests for `lib/index-partition.mjs` — the pure inner step the bridge index handler
3 * uses to decide which chunks need embedding, which can be skipped (cache hit), and
4 * which prior chunk_ids are orphans.
5 *
6 * Why this matters: skipping wrong chunks → stale vector for changed text → wrong
7 * search results. Embedding wrong chunks → wasted DeepInfra spend. Missing orphan
8 * detection → search returns paths for notes that were deleted. All three failure
9 * modes are silent in production (no error log) so we lock the behavior here.
10 */
11
12 import { describe, it } from 'node:test';
13 import assert from 'node:assert/strict';
14 import { partitionChunksForReindex } from '../lib/index-partition.mjs';
15
16 function item(storeId, contentHash, extra = {}) {
17 return {
18 chunk: { id: storeId.split('::').slice(1).join('::') || storeId, text: extra.text || 't', path: extra.path || 'a.md' },
19 storeId,
20 contentHash,
21 };
22 }
23
24 describe('partitionChunksForReindex', () => {
25 it('first run (empty cache): every chunk goes into toEmbed, no orphans', () => {
26 const chunks = [item('vA::a_0', 'v1:hash-a'), item('vA::b_0', 'v1:hash-b')];
27 const out = partitionChunksForReindex(chunks, new Map());
28 assert.equal(out.toEmbed.length, 2);
29 assert.equal(out.skippedCachedCount, 0);
30 assert.deepEqual(out.orphanIds, []);
31 assert.equal(out.presentChunkIds.size, 2);
32 assert.ok(out.presentChunkIds.has('vA::a_0'));
33 assert.ok(out.presentChunkIds.has('vA::b_0'));
34 });
35
36 it('all hashes match: every chunk skipped, nothing to embed, no orphans', () => {
37 const chunks = [item('vA::a_0', 'v1:H1'), item('vA::b_0', 'v1:H2')];
38 const cache = new Map([
39 ['vA::a_0', 'v1:H1'],
40 ['vA::b_0', 'v1:H2'],
41 ]);
42 const out = partitionChunksForReindex(chunks, cache);
43 assert.equal(out.toEmbed.length, 0);
44 assert.equal(out.skippedCachedCount, 2);
45 assert.deepEqual(out.orphanIds, []);
46 });
47
48 it('changed text → different hash → must embed (NOT skipped)', () => {
49 const chunks = [item('vA::a_0', 'v1:NEW-HASH')];
50 const cache = new Map([['vA::a_0', 'v1:OLD-HASH']]);
51 const out = partitionChunksForReindex(chunks, cache);
52 assert.equal(out.toEmbed.length, 1);
53 assert.equal(out.toEmbed[0].storeId, 'vA::a_0');
54 assert.equal(out.skippedCachedCount, 0);
55 });
56
57 it('mixed: some cached, some changed, some new → exact partition', () => {
58 const chunks = [
59 item('vA::cached_0', 'v1:keep'),
60 item('vA::changed_0', 'v1:new'),
61 item('vA::brand-new_0', 'v1:fresh'),
62 ];
63 const cache = new Map([
64 ['vA::cached_0', 'v1:keep'],
65 ['vA::changed_0', 'v1:OLD'],
66 ]);
67 const out = partitionChunksForReindex(chunks, cache);
68 assert.equal(out.skippedCachedCount, 1);
69 const toEmbedIds = out.toEmbed.map((x) => x.storeId).sort();
70 assert.deepEqual(toEmbedIds, ['vA::brand-new_0', 'vA::changed_0']);
71 });
72
73 it('detects orphans: chunk_ids in cache but not in current export', () => {
74 const chunks = [item('vA::keep_0', 'v1:K')];
75 const cache = new Map([
76 ['vA::keep_0', 'v1:K'],
77 ['vA::deleted_0', 'v1:OLD-D'],
78 ['vA::renamed-from_0', 'v1:OLD-R'],
79 ]);
80 const out = partitionChunksForReindex(chunks, cache);
81 assert.equal(out.skippedCachedCount, 1);
82 assert.equal(out.toEmbed.length, 0);
83 const orphans = out.orphanIds.slice().sort();
84 assert.deepEqual(orphans, ['vA::deleted_0', 'vA::renamed-from_0']);
85 });
86
87 it('handles null/undefined cache as empty (e.g. backend lacking getChunkHashes)', () => {
88 const chunks = [item('vA::a_0', 'v1:h')];
89 const out1 = partitionChunksForReindex(chunks, null);
90 const out2 = partitionChunksForReindex(chunks, undefined);
91 assert.equal(out1.toEmbed.length, 1);
92 assert.equal(out2.toEmbed.length, 1);
93 assert.deepEqual(out1.orphanIds, []);
94 assert.deepEqual(out2.orphanIds, []);
95 });
96
97 it('empty chunks input: skip 0, embed 0, every cache key becomes an orphan', () => {
98 const cache = new Map([
99 ['vA::a_0', 'v1:h1'],
100 ['vA::b_0', 'v1:h2'],
101 ]);
102 const out = partitionChunksForReindex([], cache);
103 assert.equal(out.skippedCachedCount, 0);
104 assert.equal(out.toEmbed.length, 0);
105 assert.deepEqual(out.orphanIds.sort(), ['vA::a_0', 'vA::b_0']);
106 });
107
108 it('throws TypeError on non-array input — fail loud, never silently re-embed everything', () => {
109 assert.throws(
110 () => partitionChunksForReindex(null, new Map()),
111 /chunksWithHash must be an array/,
112 );
113 assert.throws(
114 () => partitionChunksForReindex({}, new Map()),
115 /chunksWithHash must be an array/,
116 );
117 });
118
119 it('throws TypeError when an item lacks storeId or contentHash (bridge bug must surface)', () => {
120 assert.throws(
121 () => partitionChunksForReindex([{ contentHash: 'h' }], new Map()),
122 /storeId and contentHash/,
123 );
124 assert.throws(
125 () =>
126 partitionChunksForReindex([{ storeId: 'vA::x_0', contentHash: '' }], new Map()),
127 /storeId and contentHash/,
128 );
129 });
130
131 it('cache entry with empty string is treated as cache miss (defensive against bad legacy data)', () => {
132 const chunks = [item('vA::a_0', 'v1:h')];
133 // Note: an empty string is falsy but a Map.get returns it. partitionChunksForReindex
134 // does a strict `prior && prior === item.contentHash` check, so empty string → no skip.
135 const cache = new Map([['vA::a_0', '']]);
136 const out = partitionChunksForReindex(chunks, cache);
137 assert.equal(out.toEmbed.length, 1);
138 assert.equal(out.skippedCachedCount, 0);
139 });
140
141 it('does not mutate the input cache or chunks arrays', () => {
142 const chunks = [item('vA::a_0', 'v1:h')];
143 const cache = new Map([['vA::a_0', 'v1:h']]);
144 const cacheBefore = new Map(cache);
145 const chunksBefore = chunks.slice();
146 partitionChunksForReindex(chunks, cache);
147 assert.deepEqual([...cache.entries()], [...cacheBefore.entries()]);
148 assert.deepEqual(chunks, chunksBefore);
149 });
150 });
File History 2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor 3 days ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 3 days ago