cluster-semantic.mjs
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠ breaking
1 day ago
| 1 | /** |
| 2 | * Semantic clustering over notes (Issue #1 Phase C8). |
| 3 | * Embeds truncated note text (up to 200 notes) and runs k-means — works with Qdrant or sqlite-vec without reading raw vectors from the store. |
| 4 | */ |
| 5 | |
| 6 | import { loadConfig } from './config.mjs'; |
| 7 | import { listMarkdownFiles, readNote, normalizeSlug } from './vault.mjs'; |
| 8 | import { embed } from './embedding.mjs'; |
| 9 | import { kmeans } from './kmeans.mjs'; |
| 10 | |
| 11 | const MAX_NOTES = 200; |
| 12 | const TEXT_SLICE = 800; |
| 13 | |
| 14 | /** |
| 15 | * @param {{ project?: string, folder?: string, n_clusters?: number }} options |
| 16 | */ |
| 17 | export async function runCluster(options = {}) { |
| 18 | const config = loadConfig(); |
| 19 | const k = Math.max(2, Math.min(options.n_clusters ?? 5, 15)); |
| 20 | let paths = listMarkdownFiles(config.vault_path, { ignore: config.ignore }); |
| 21 | |
| 22 | if (options.folder) { |
| 23 | const prefix = options.folder.replace(/\\/g, '/').replace(/\/$/, '') + '/'; |
| 24 | const exact = options.folder.replace(/\\/g, '/').replace(/\/$/, ''); |
| 25 | paths = paths.filter((p) => { |
| 26 | const n = p.replace(/\\/g, '/'); |
| 27 | return n === exact || n.startsWith(prefix); |
| 28 | }); |
| 29 | } |
| 30 | |
| 31 | const wantProject = options.project != null ? normalizeSlug(String(options.project)) : null; |
| 32 | |
| 33 | const texts = []; |
| 34 | const pathFor = []; |
| 35 | for (const p of paths) { |
| 36 | if (pathFor.length >= MAX_NOTES) break; |
| 37 | try { |
| 38 | const note = readNote(config.vault_path, p); |
| 39 | if (wantProject && note.project !== wantProject) continue; |
| 40 | const t = `${note.frontmatter?.title ? String(note.frontmatter.title) + '\n' : ''}${(note.body || '').slice(0, TEXT_SLICE)}`; |
| 41 | if (!t.trim()) continue; |
| 42 | texts.push(t); |
| 43 | pathFor.push(note.path.replace(/\\/g, '/')); |
| 44 | } catch (_) {} |
| 45 | } |
| 46 | |
| 47 | if (texts.length < k) { |
| 48 | return { |
| 49 | clusters: [], |
| 50 | note: `Not enough notes (${texts.length}) for k=${k}. Add notes or lower n_clusters.`, |
| 51 | }; |
| 52 | } |
| 53 | |
| 54 | const vectors = await embed(texts, config.embedding || {}, { voyageInputType: 'document' }); |
| 55 | const points = []; |
| 56 | for (let i = 0; i < pathFor.length; i++) { |
| 57 | const v = vectors[i]; |
| 58 | if (!v || !v.length) continue; |
| 59 | points.push({ id: pathFor[i], vector: v, path: pathFor[i], text: texts[i] }); |
| 60 | } |
| 61 | if (points.length < k) { |
| 62 | return { clusters: [], note: 'Embedding failed for some notes.' }; |
| 63 | } |
| 64 | |
| 65 | const { labels } = kmeans( |
| 66 | points.map((p) => ({ id: p.id, vector: p.vector })), |
| 67 | k |
| 68 | ); |
| 69 | |
| 70 | const clusters = []; |
| 71 | for (let c = 0; c < k; c++) { |
| 72 | const members = []; |
| 73 | for (let i = 0; i < points.length; i++) { |
| 74 | if (labels[i] === c) members.push(points[i]); |
| 75 | } |
| 76 | if (!members.length) continue; |
| 77 | const centroidSnippet = (members[0].text || '').slice(0, 120).replace(/\s+/g, ' ').trim(); |
| 78 | const pathsIn = [...new Set(members.map((m) => m.path))]; |
| 79 | clusters.push({ |
| 80 | label: `cluster_${c + 1}`, |
| 81 | centroid_snippet: centroidSnippet, |
| 82 | paths: pathsIn, |
| 83 | }); |
| 84 | } |
| 85 | |
| 86 | return { clusters, notes_sampled: points.length, max_notes: MAX_NOTES }; |
| 87 | } |
File History
2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠
1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6
docs: accept Calendar Events v0 spec with Phase 0 security …
Human
1 day ago