index-partition.mjs
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠ breaking
1 day ago
| 1 | /** |
| 2 | * Pure partition step for `hub/bridge/server.mjs POST /api/v1/index`'s incremental |
| 3 | * cache flow. Given the chunks built from the canister export (each tagged with a |
| 4 | * versioned content hash) and the `(chunk_id → content_hash)` Map persisted by the |
| 5 | * sqlite-vec / Qdrant store from the previous successful index, decide: |
| 6 | * |
| 7 | * - which chunks can be **skipped** (hash matches: vector + payload already correct); |
| 8 | * - which chunks must be **embedded** (new chunk, or text/metadata changed); |
| 9 | * - which prior chunk_ids are **orphans** (present in the store but absent from |
| 10 | * the current export, e.g. note deleted or path renamed). |
| 11 | * |
| 12 | * Pulled out of the index handler so the partition contract has unit tests without |
| 13 | * spinning up the canister, embedding provider, or sqlite-vec backend. |
| 14 | * |
| 15 | * @typedef {{ |
| 16 | * chunk: { id: string, text: string, path: string, [k: string]: any }, |
| 17 | * storeId: string, |
| 18 | * contentHash: string, |
| 19 | * }} ChunkWithHash |
| 20 | * |
| 21 | * @param {ChunkWithHash[]} chunksWithHash - Output of building chunks for the current export. |
| 22 | * @param {Map<string, string>|null|undefined} existingHashes - From `store.getChunkHashes(vaultId)`. |
| 23 | * Treated as empty when null/undefined (e.g. backend without the surface). |
| 24 | * @returns {{ |
| 25 | * toEmbed: ChunkWithHash[], |
| 26 | * skippedCachedCount: number, |
| 27 | * orphanIds: string[], |
| 28 | * presentChunkIds: Set<string>, |
| 29 | * }} |
| 30 | */ |
| 31 | export function partitionChunksForReindex(chunksWithHash, existingHashes) { |
| 32 | if (!Array.isArray(chunksWithHash)) { |
| 33 | throw new TypeError('partitionChunksForReindex: chunksWithHash must be an array'); |
| 34 | } |
| 35 | const cache = existingHashes instanceof Map ? existingHashes : new Map(); |
| 36 | const toEmbed = []; |
| 37 | let skippedCachedCount = 0; |
| 38 | const presentChunkIds = new Set(); |
| 39 | for (const item of chunksWithHash) { |
| 40 | if ( |
| 41 | !item || |
| 42 | typeof item.storeId !== 'string' || |
| 43 | item.storeId === '' || |
| 44 | typeof item.contentHash !== 'string' || |
| 45 | item.contentHash === '' |
| 46 | ) { |
| 47 | throw new TypeError( |
| 48 | 'partitionChunksForReindex: each item must have non-empty storeId and contentHash', |
| 49 | ); |
| 50 | } |
| 51 | presentChunkIds.add(item.storeId); |
| 52 | const prior = cache.get(item.storeId); |
| 53 | if (prior && prior === item.contentHash) { |
| 54 | skippedCachedCount++; |
| 55 | continue; |
| 56 | } |
| 57 | toEmbed.push(item); |
| 58 | } |
| 59 | const orphanIds = []; |
| 60 | for (const cid of cache.keys()) { |
| 61 | if (!presentChunkIds.has(cid)) orphanIds.push(cid); |
| 62 | } |
| 63 | return { toEmbed, skippedCachedCount, orphanIds, presentChunkIds }; |
| 64 | } |
File History
2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠
1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6
docs: accept Calendar Events v0 spec with Phase 0 security …
Human
1 day ago