document-tree.test.mjs
file-level
1
files
1
commits
0
hotspots
0
🧊 dead
0
💥 blast risk
| 1 | /** |
| 2 | * DocumentTree v0 pure builder tests. |
| 3 | * |
| 4 | * Phase 1A is builder-only: no file reads, CLI, MCP, hosted MCP, Hub, search, |
| 5 | * index, vector, memory, persistence, summaries, PageIndex, or OCR behavior. |
| 6 | */ |
| 7 | import { describe, it } from 'node:test'; |
| 8 | import assert from 'node:assert/strict'; |
| 9 | |
| 10 | import { |
| 11 | DOCUMENT_TREE_SCHEMA, |
| 12 | buildDocumentTree, |
| 13 | buildDocumentTreeFromMarkdown, |
| 14 | buildDocumentTreeFromOutline, |
| 15 | } from '../lib/document-tree.mjs'; |
| 16 | |
| 17 | function outline(overrides = {}) { |
| 18 | return { |
| 19 | schema: 'knowtation.note_outline/v1', |
| 20 | path: 'notes/example.md', |
| 21 | title: 'Example', |
| 22 | headings: [], |
| 23 | truncated: false, |
| 24 | ...overrides, |
| 25 | }; |
| 26 | } |
| 27 | |
| 28 | describe('DocumentTree v0 pure builder', () => { |
| 29 | it('unit: returns v0 schema, path, title, empty root, and truncation flag', () => { |
| 30 | const tree = buildDocumentTreeFromOutline(outline()); |
| 31 | |
| 32 | assert.equal(tree.schema, DOCUMENT_TREE_SCHEMA); |
| 33 | assert.equal(tree.path, 'notes/example.md'); |
| 34 | assert.equal(tree.title, 'Example'); |
| 35 | assert.equal(tree.truncated, false); |
| 36 | assert.deepEqual(tree.root, { children: [] }); |
| 37 | }); |
| 38 | |
| 39 | it('unit: nests deeper headings and keeps same-level headings as siblings', () => { |
| 40 | const tree = buildDocumentTreeFromOutline( |
| 41 | outline({ |
| 42 | headings: [ |
| 43 | { level: 1, text: 'Intro', id: 'h1-intro-0001' }, |
| 44 | { level: 2, text: 'Background', id: 'h2-background-0002' }, |
| 45 | { level: 2, text: 'Method', id: 'h2-method-0003' }, |
| 46 | { level: 3, text: 'Step One', id: 'h3-step-one-0004' }, |
| 47 | ], |
| 48 | }) |
| 49 | ); |
| 50 | |
| 51 | assert.deepEqual(tree.root.children, [ |
| 52 | { |
| 53 | id: 'h1-intro-0001', |
| 54 | level: 1, |
| 55 | text: 'Intro', |
| 56 | children: [ |
| 57 | { |
| 58 | id: 'h2-background-0002', |
| 59 | level: 2, |
| 60 | text: 'Background', |
| 61 | children: [], |
| 62 | }, |
| 63 | { |
| 64 | id: 'h2-method-0003', |
| 65 | level: 2, |
| 66 | text: 'Method', |
| 67 | children: [ |
| 68 | { |
| 69 | id: 'h3-step-one-0004', |
| 70 | level: 3, |
| 71 | text: 'Step One', |
| 72 | children: [], |
| 73 | }, |
| 74 | ], |
| 75 | }, |
| 76 | ], |
| 77 | }, |
| 78 | ]); |
| 79 | }); |
| 80 | |
| 81 | it('unit: closes ancestors on lower-level headings and allows skipped levels', () => { |
| 82 | const tree = buildDocumentTreeFromOutline( |
| 83 | outline({ |
| 84 | headings: [ |
| 85 | { level: 1, text: 'A', id: 'h1-a-0001' }, |
| 86 | { level: 3, text: 'B', id: 'h3-b-0002' }, |
| 87 | { level: 2, text: 'C', id: 'h2-c-0003' }, |
| 88 | { level: 1, text: 'D', id: 'h1-d-0004' }, |
| 89 | ], |
| 90 | }) |
| 91 | ); |
| 92 | |
| 93 | assert.deepEqual( |
| 94 | tree.root.children.map((node) => ({ |
| 95 | text: node.text, |
| 96 | children: node.children.map((child) => ({ |
| 97 | text: child.text, |
| 98 | children: child.children.map((grandchild) => grandchild.text), |
| 99 | })), |
| 100 | })), |
| 101 | [ |
| 102 | { |
| 103 | text: 'A', |
| 104 | children: [ |
| 105 | { text: 'B', children: [] }, |
| 106 | { text: 'C', children: [] }, |
| 107 | ], |
| 108 | }, |
| 109 | { text: 'D', children: [] }, |
| 110 | ] |
| 111 | ); |
| 112 | }); |
| 113 | |
| 114 | it('unit: preserves empty heading text and duplicate heading IDs from NoteOutline', () => { |
| 115 | const tree = buildDocumentTreeFromOutline( |
| 116 | outline({ |
| 117 | headings: [ |
| 118 | { level: 2, text: '', id: 'h2-heading-0001' }, |
| 119 | { level: 2, text: 'Install', id: 'h2-install-0002' }, |
| 120 | { level: 2, text: 'Install', id: 'h2-install-0003' }, |
| 121 | ], |
| 122 | }) |
| 123 | ); |
| 124 | |
| 125 | assert.deepEqual( |
| 126 | tree.root.children.map((node) => ({ id: node.id, text: node.text })), |
| 127 | [ |
| 128 | { id: 'h2-heading-0001', text: '' }, |
| 129 | { id: 'h2-install-0002', text: 'Install' }, |
| 130 | { id: 'h2-install-0003', text: 'Install' }, |
| 131 | ] |
| 132 | ); |
| 133 | }); |
| 134 | |
| 135 | it('unit: rejects invalid heading records', () => { |
| 136 | assert.throws( |
| 137 | () => |
| 138 | buildDocumentTreeFromOutline( |
| 139 | outline({ headings: [{ level: 7, text: 'Bad', id: 'h7-bad-0001' }] }) |
| 140 | ), |
| 141 | /heading.level/ |
| 142 | ); |
| 143 | |
| 144 | assert.throws( |
| 145 | () => |
| 146 | buildDocumentTreeFromOutline( |
| 147 | outline({ headings: [{ level: 1, text: 'Missing id' }] }) |
| 148 | ), |
| 149 | /heading.id/ |
| 150 | ); |
| 151 | }); |
| 152 | |
| 153 | it('data-integrity: does not mutate the input outline or heading records', () => { |
| 154 | const input = outline({ |
| 155 | headings: [ |
| 156 | { level: 1, text: 'Root', id: 'h1-root-0001' }, |
| 157 | { level: 2, text: 'Child', id: 'h2-child-0002' }, |
| 158 | ], |
| 159 | }); |
| 160 | const before = JSON.stringify(input); |
| 161 | |
| 162 | buildDocumentTreeFromOutline(input); |
| 163 | |
| 164 | assert.equal(JSON.stringify(input), before); |
| 165 | }); |
| 166 | |
| 167 | it('security: does not include body, snippets, frontmatter, vectors, or summaries', () => { |
| 168 | const tree = buildDocumentTreeFromOutline( |
| 169 | outline({ |
| 170 | path: 'private/secret.md', |
| 171 | title: 'Private', |
| 172 | frontmatter: { api_key: 'must-not-appear' }, |
| 173 | body: 'Sensitive body text must not appear.', |
| 174 | snippet: 'Sensitive snippet must not appear.', |
| 175 | vectorScore: 0.99, |
| 176 | summary: 'Sensitive summary must not appear.', |
| 177 | headings: [{ level: 1, text: '<script> alert(1) </script>', id: 'h1-script-0001' }], |
| 178 | }) |
| 179 | ); |
| 180 | const serialized = JSON.stringify(tree); |
| 181 | |
| 182 | assert.equal(Object.hasOwn(tree, 'body'), false); |
| 183 | assert.equal(Object.hasOwn(tree, 'frontmatter'), false); |
| 184 | assert.equal(Object.hasOwn(tree, 'snippet'), false); |
| 185 | assert.equal(serialized.includes('must-not-appear'), false); |
| 186 | assert.equal(serialized.includes('Sensitive body text'), false); |
| 187 | assert.equal(serialized.includes('Sensitive snippet'), false); |
| 188 | assert.equal(serialized.includes('Sensitive summary'), false); |
| 189 | assert.equal(serialized.includes('vectorScore'), false); |
| 190 | assert.equal(tree.root.children[0].text, '<script> alert(1) </script>'); |
| 191 | }); |
| 192 | |
| 193 | it('security: rejects absolute and traversal paths before returning a tree', () => { |
| 194 | assert.throws( |
| 195 | () => buildDocumentTreeFromOutline(outline({ path: '/Users/example/vault/secret.md' })), |
| 196 | /vault-relative/ |
| 197 | ); |
| 198 | assert.throws( |
| 199 | () => buildDocumentTreeFromOutline(outline({ path: '../secret.md' })), |
| 200 | /escape vault/ |
| 201 | ); |
| 202 | assert.throws( |
| 203 | () => buildDocumentTreeFromOutline(outline({ path: '\\Users\\example\\vault\\secret.md' })), |
| 204 | /vault-relative/ |
| 205 | ); |
| 206 | }); |
| 207 | |
| 208 | it('stress: builds a capped heading list deterministically', () => { |
| 209 | const headings = Array.from({ length: 500 }, (_, index) => ({ |
| 210 | level: (index % 6) + 1, |
| 211 | text: `Heading ${index + 1}`, |
| 212 | id: `h${(index % 6) + 1}-heading-${String(index + 1).padStart(4, '0')}`, |
| 213 | })); |
| 214 | const input = outline({ headings, truncated: true }); |
| 215 | |
| 216 | const first = buildDocumentTreeFromOutline(input); |
| 217 | const second = buildDocumentTreeFromOutline(input); |
| 218 | |
| 219 | assert.deepEqual(first, second); |
| 220 | assert.equal(first.truncated, true); |
| 221 | }); |
| 222 | |
| 223 | it('stress: caps untrusted direct outline heading input', () => { |
| 224 | const headings = Array.from({ length: 501 }, (_, index) => ({ |
| 225 | level: 1, |
| 226 | text: `Heading ${index + 1}`, |
| 227 | id: `h1-heading-${String(index + 1).padStart(4, '0')}`, |
| 228 | })); |
| 229 | |
| 230 | const tree = buildDocumentTreeFromOutline(outline({ headings, truncated: false })); |
| 231 | |
| 232 | assert.equal(tree.root.children.length, 500); |
| 233 | assert.equal(tree.root.children[499].text, 'Heading 500'); |
| 234 | assert.equal(tree.truncated, true); |
| 235 | }); |
| 236 | |
| 237 | it('performance: tree construction is linear for normal heading counts', () => { |
| 238 | const headings = Array.from({ length: 500 }, (_, index) => ({ |
| 239 | level: Math.min(6, (index % 4) + 1), |
| 240 | text: `Heading ${index + 1}`, |
| 241 | id: `h${Math.min(6, (index % 4) + 1)}-heading-${String(index + 1).padStart(4, '0')}`, |
| 242 | })); |
| 243 | const started = Date.now(); |
| 244 | const tree = buildDocumentTreeFromOutline(outline({ headings })); |
| 245 | const elapsedMs = Date.now() - started; |
| 246 | |
| 247 | assert.equal(tree.root.children.length > 0, true); |
| 248 | assert.ok(elapsedMs < 200, `expected builder under 200ms, got ${elapsedMs}ms`); |
| 249 | }); |
| 250 | }); |
| 251 | |
| 252 | describe('DocumentTree v0 Markdown parser integration', () => { |
| 253 | it('integration: builds a nested tree from Markdown using NoteOutline parsing semantics', () => { |
| 254 | const tree = buildDocumentTree({ |
| 255 | path: 'notes/markdown.md', |
| 256 | frontmatter: { title: 'Markdown Tree' }, |
| 257 | body: '# Intro\n\n### Context\n\n## Method\n\n# Outro', |
| 258 | }); |
| 259 | |
| 260 | assert.equal(tree.schema, DOCUMENT_TREE_SCHEMA); |
| 261 | assert.equal(tree.path, 'notes/markdown.md'); |
| 262 | assert.equal(tree.title, 'Markdown Tree'); |
| 263 | assert.deepEqual( |
| 264 | tree.root.children.map((node) => ({ |
| 265 | id: node.id, |
| 266 | level: node.level, |
| 267 | text: node.text, |
| 268 | children: node.children.map((child) => child.text), |
| 269 | })), |
| 270 | [ |
| 271 | { |
| 272 | id: 'h1-intro-0001', |
| 273 | level: 1, |
| 274 | text: 'Intro', |
| 275 | children: ['Context', 'Method'], |
| 276 | }, |
| 277 | { |
| 278 | id: 'h1-outro-0004', |
| 279 | level: 1, |
| 280 | text: 'Outro', |
| 281 | children: [], |
| 282 | }, |
| 283 | ] |
| 284 | ); |
| 285 | }); |
| 286 | |
| 287 | it('integration: parses raw Markdown frontmatter and derives the display title', () => { |
| 288 | const tree = buildDocumentTreeFromMarkdown( |
| 289 | 'projects/tree/frontmatter.md', |
| 290 | '---\ntitle: Frontmatter Tree\napi_key: must-not-appear\n---\n\n# Root\n\n## Child\n' |
| 291 | ); |
| 292 | const serialized = JSON.stringify(tree); |
| 293 | |
| 294 | assert.equal(tree.title, 'Frontmatter Tree'); |
| 295 | assert.deepEqual(tree.root.children, [ |
| 296 | { |
| 297 | id: 'h1-root-0001', |
| 298 | level: 1, |
| 299 | text: 'Root', |
| 300 | children: [ |
| 301 | { |
| 302 | id: 'h2-child-0002', |
| 303 | level: 2, |
| 304 | text: 'Child', |
| 305 | children: [], |
| 306 | }, |
| 307 | ], |
| 308 | }, |
| 309 | ]); |
| 310 | assert.equal(serialized.includes('api_key'), false); |
| 311 | assert.equal(serialized.includes('must-not-appear'), false); |
| 312 | }); |
| 313 | |
| 314 | it('integration: keeps NoteOutline block-awareness for code blocks and Setext headings', () => { |
| 315 | const tree = buildDocumentTree({ |
| 316 | path: 'notes/block-aware.md', |
| 317 | frontmatter: {}, |
| 318 | body: [ |
| 319 | 'Title', |
| 320 | '=====', |
| 321 | '', |
| 322 | '```', |
| 323 | '## Not a child', |
| 324 | '```', |
| 325 | '', |
| 326 | 'Subtitle', |
| 327 | '--------', |
| 328 | ].join('\n'), |
| 329 | }); |
| 330 | |
| 331 | assert.deepEqual(tree.root.children, [ |
| 332 | { |
| 333 | id: 'h1-title-0001', |
| 334 | level: 1, |
| 335 | text: 'Title', |
| 336 | children: [ |
| 337 | { |
| 338 | id: 'h2-subtitle-0002', |
| 339 | level: 2, |
| 340 | text: 'Subtitle', |
| 341 | children: [], |
| 342 | }, |
| 343 | ], |
| 344 | }, |
| 345 | ]); |
| 346 | }); |
| 347 | |
| 348 | it('security: Markdown integration stays body-free and rejects unsafe paths', () => { |
| 349 | const tree = buildDocumentTreeFromMarkdown( |
| 350 | 'private/secret.md', |
| 351 | '# Visible\n\nSensitive body text must not appear.' |
| 352 | ); |
| 353 | const serialized = JSON.stringify(tree); |
| 354 | |
| 355 | assert.equal(Object.hasOwn(tree, 'body'), false); |
| 356 | assert.equal(Object.hasOwn(tree, 'frontmatter'), false); |
| 357 | assert.equal(serialized.includes('Sensitive body text'), false); |
| 358 | |
| 359 | assert.throws( |
| 360 | () => buildDocumentTreeFromMarkdown('../secret.md', '# Secret'), |
| 361 | /escape vault/ |
| 362 | ); |
| 363 | }); |
| 364 | |
| 365 | it('stress: Markdown integration preserves truncation from NoteOutline caps', () => { |
| 366 | const tree = buildDocumentTree( |
| 367 | { |
| 368 | path: 'notes/many-markdown.md', |
| 369 | frontmatter: {}, |
| 370 | body: Array.from({ length: 10 }, (_, index) => `## Heading ${index + 1}`).join('\n\n'), |
| 371 | }, |
| 372 | { maxHeadings: 3 } |
| 373 | ); |
| 374 | |
| 375 | assert.equal(tree.truncated, true); |
| 376 | assert.deepEqual( |
| 377 | tree.root.children.map((node) => node.text), |
| 378 | ['Heading 1', 'Heading 2', 'Heading 3'] |
| 379 | ); |
| 380 | }); |
| 381 | }); |