document-tree.test.mjs
381 lines 11.3 KB
Raw
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 2 days ago
1 /**
2 * DocumentTree v0 pure builder tests.
3 *
4 * Phase 1A is builder-only: no file reads, CLI, MCP, hosted MCP, Hub, search,
5 * index, vector, memory, persistence, summaries, PageIndex, or OCR behavior.
6 */
7 import { describe, it } from 'node:test';
8 import assert from 'node:assert/strict';
9
10 import {
11 DOCUMENT_TREE_SCHEMA,
12 buildDocumentTree,
13 buildDocumentTreeFromMarkdown,
14 buildDocumentTreeFromOutline,
15 } from '../lib/document-tree.mjs';
16
17 function outline(overrides = {}) {
18 return {
19 schema: 'knowtation.note_outline/v1',
20 path: 'notes/example.md',
21 title: 'Example',
22 headings: [],
23 truncated: false,
24 ...overrides,
25 };
26 }
27
28 describe('DocumentTree v0 pure builder', () => {
29 it('unit: returns v0 schema, path, title, empty root, and truncation flag', () => {
30 const tree = buildDocumentTreeFromOutline(outline());
31
32 assert.equal(tree.schema, DOCUMENT_TREE_SCHEMA);
33 assert.equal(tree.path, 'notes/example.md');
34 assert.equal(tree.title, 'Example');
35 assert.equal(tree.truncated, false);
36 assert.deepEqual(tree.root, { children: [] });
37 });
38
39 it('unit: nests deeper headings and keeps same-level headings as siblings', () => {
40 const tree = buildDocumentTreeFromOutline(
41 outline({
42 headings: [
43 { level: 1, text: 'Intro', id: 'h1-intro-0001' },
44 { level: 2, text: 'Background', id: 'h2-background-0002' },
45 { level: 2, text: 'Method', id: 'h2-method-0003' },
46 { level: 3, text: 'Step One', id: 'h3-step-one-0004' },
47 ],
48 })
49 );
50
51 assert.deepEqual(tree.root.children, [
52 {
53 id: 'h1-intro-0001',
54 level: 1,
55 text: 'Intro',
56 children: [
57 {
58 id: 'h2-background-0002',
59 level: 2,
60 text: 'Background',
61 children: [],
62 },
63 {
64 id: 'h2-method-0003',
65 level: 2,
66 text: 'Method',
67 children: [
68 {
69 id: 'h3-step-one-0004',
70 level: 3,
71 text: 'Step One',
72 children: [],
73 },
74 ],
75 },
76 ],
77 },
78 ]);
79 });
80
81 it('unit: closes ancestors on lower-level headings and allows skipped levels', () => {
82 const tree = buildDocumentTreeFromOutline(
83 outline({
84 headings: [
85 { level: 1, text: 'A', id: 'h1-a-0001' },
86 { level: 3, text: 'B', id: 'h3-b-0002' },
87 { level: 2, text: 'C', id: 'h2-c-0003' },
88 { level: 1, text: 'D', id: 'h1-d-0004' },
89 ],
90 })
91 );
92
93 assert.deepEqual(
94 tree.root.children.map((node) => ({
95 text: node.text,
96 children: node.children.map((child) => ({
97 text: child.text,
98 children: child.children.map((grandchild) => grandchild.text),
99 })),
100 })),
101 [
102 {
103 text: 'A',
104 children: [
105 { text: 'B', children: [] },
106 { text: 'C', children: [] },
107 ],
108 },
109 { text: 'D', children: [] },
110 ]
111 );
112 });
113
114 it('unit: preserves empty heading text and duplicate heading IDs from NoteOutline', () => {
115 const tree = buildDocumentTreeFromOutline(
116 outline({
117 headings: [
118 { level: 2, text: '', id: 'h2-heading-0001' },
119 { level: 2, text: 'Install', id: 'h2-install-0002' },
120 { level: 2, text: 'Install', id: 'h2-install-0003' },
121 ],
122 })
123 );
124
125 assert.deepEqual(
126 tree.root.children.map((node) => ({ id: node.id, text: node.text })),
127 [
128 { id: 'h2-heading-0001', text: '' },
129 { id: 'h2-install-0002', text: 'Install' },
130 { id: 'h2-install-0003', text: 'Install' },
131 ]
132 );
133 });
134
135 it('unit: rejects invalid heading records', () => {
136 assert.throws(
137 () =>
138 buildDocumentTreeFromOutline(
139 outline({ headings: [{ level: 7, text: 'Bad', id: 'h7-bad-0001' }] })
140 ),
141 /heading.level/
142 );
143
144 assert.throws(
145 () =>
146 buildDocumentTreeFromOutline(
147 outline({ headings: [{ level: 1, text: 'Missing id' }] })
148 ),
149 /heading.id/
150 );
151 });
152
153 it('data-integrity: does not mutate the input outline or heading records', () => {
154 const input = outline({
155 headings: [
156 { level: 1, text: 'Root', id: 'h1-root-0001' },
157 { level: 2, text: 'Child', id: 'h2-child-0002' },
158 ],
159 });
160 const before = JSON.stringify(input);
161
162 buildDocumentTreeFromOutline(input);
163
164 assert.equal(JSON.stringify(input), before);
165 });
166
167 it('security: does not include body, snippets, frontmatter, vectors, or summaries', () => {
168 const tree = buildDocumentTreeFromOutline(
169 outline({
170 path: 'private/secret.md',
171 title: 'Private',
172 frontmatter: { api_key: 'must-not-appear' },
173 body: 'Sensitive body text must not appear.',
174 snippet: 'Sensitive snippet must not appear.',
175 vectorScore: 0.99,
176 summary: 'Sensitive summary must not appear.',
177 headings: [{ level: 1, text: '<script> alert(1) </script>', id: 'h1-script-0001' }],
178 })
179 );
180 const serialized = JSON.stringify(tree);
181
182 assert.equal(Object.hasOwn(tree, 'body'), false);
183 assert.equal(Object.hasOwn(tree, 'frontmatter'), false);
184 assert.equal(Object.hasOwn(tree, 'snippet'), false);
185 assert.equal(serialized.includes('must-not-appear'), false);
186 assert.equal(serialized.includes('Sensitive body text'), false);
187 assert.equal(serialized.includes('Sensitive snippet'), false);
188 assert.equal(serialized.includes('Sensitive summary'), false);
189 assert.equal(serialized.includes('vectorScore'), false);
190 assert.equal(tree.root.children[0].text, '<script> alert(1) </script>');
191 });
192
193 it('security: rejects absolute and traversal paths before returning a tree', () => {
194 assert.throws(
195 () => buildDocumentTreeFromOutline(outline({ path: '/Users/example/vault/secret.md' })),
196 /vault-relative/
197 );
198 assert.throws(
199 () => buildDocumentTreeFromOutline(outline({ path: '../secret.md' })),
200 /escape vault/
201 );
202 assert.throws(
203 () => buildDocumentTreeFromOutline(outline({ path: '\\Users\\example\\vault\\secret.md' })),
204 /vault-relative/
205 );
206 });
207
208 it('stress: builds a capped heading list deterministically', () => {
209 const headings = Array.from({ length: 500 }, (_, index) => ({
210 level: (index % 6) + 1,
211 text: `Heading ${index + 1}`,
212 id: `h${(index % 6) + 1}-heading-${String(index + 1).padStart(4, '0')}`,
213 }));
214 const input = outline({ headings, truncated: true });
215
216 const first = buildDocumentTreeFromOutline(input);
217 const second = buildDocumentTreeFromOutline(input);
218
219 assert.deepEqual(first, second);
220 assert.equal(first.truncated, true);
221 });
222
223 it('stress: caps untrusted direct outline heading input', () => {
224 const headings = Array.from({ length: 501 }, (_, index) => ({
225 level: 1,
226 text: `Heading ${index + 1}`,
227 id: `h1-heading-${String(index + 1).padStart(4, '0')}`,
228 }));
229
230 const tree = buildDocumentTreeFromOutline(outline({ headings, truncated: false }));
231
232 assert.equal(tree.root.children.length, 500);
233 assert.equal(tree.root.children[499].text, 'Heading 500');
234 assert.equal(tree.truncated, true);
235 });
236
237 it('performance: tree construction is linear for normal heading counts', () => {
238 const headings = Array.from({ length: 500 }, (_, index) => ({
239 level: Math.min(6, (index % 4) + 1),
240 text: `Heading ${index + 1}`,
241 id: `h${Math.min(6, (index % 4) + 1)}-heading-${String(index + 1).padStart(4, '0')}`,
242 }));
243 const started = Date.now();
244 const tree = buildDocumentTreeFromOutline(outline({ headings }));
245 const elapsedMs = Date.now() - started;
246
247 assert.equal(tree.root.children.length > 0, true);
248 assert.ok(elapsedMs < 200, `expected builder under 200ms, got ${elapsedMs}ms`);
249 });
250 });
251
252 describe('DocumentTree v0 Markdown parser integration', () => {
253 it('integration: builds a nested tree from Markdown using NoteOutline parsing semantics', () => {
254 const tree = buildDocumentTree({
255 path: 'notes/markdown.md',
256 frontmatter: { title: 'Markdown Tree' },
257 body: '# Intro\n\n### Context\n\n## Method\n\n# Outro',
258 });
259
260 assert.equal(tree.schema, DOCUMENT_TREE_SCHEMA);
261 assert.equal(tree.path, 'notes/markdown.md');
262 assert.equal(tree.title, 'Markdown Tree');
263 assert.deepEqual(
264 tree.root.children.map((node) => ({
265 id: node.id,
266 level: node.level,
267 text: node.text,
268 children: node.children.map((child) => child.text),
269 })),
270 [
271 {
272 id: 'h1-intro-0001',
273 level: 1,
274 text: 'Intro',
275 children: ['Context', 'Method'],
276 },
277 {
278 id: 'h1-outro-0004',
279 level: 1,
280 text: 'Outro',
281 children: [],
282 },
283 ]
284 );
285 });
286
287 it('integration: parses raw Markdown frontmatter and derives the display title', () => {
288 const tree = buildDocumentTreeFromMarkdown(
289 'projects/tree/frontmatter.md',
290 '---\ntitle: Frontmatter Tree\napi_key: must-not-appear\n---\n\n# Root\n\n## Child\n'
291 );
292 const serialized = JSON.stringify(tree);
293
294 assert.equal(tree.title, 'Frontmatter Tree');
295 assert.deepEqual(tree.root.children, [
296 {
297 id: 'h1-root-0001',
298 level: 1,
299 text: 'Root',
300 children: [
301 {
302 id: 'h2-child-0002',
303 level: 2,
304 text: 'Child',
305 children: [],
306 },
307 ],
308 },
309 ]);
310 assert.equal(serialized.includes('api_key'), false);
311 assert.equal(serialized.includes('must-not-appear'), false);
312 });
313
314 it('integration: keeps NoteOutline block-awareness for code blocks and Setext headings', () => {
315 const tree = buildDocumentTree({
316 path: 'notes/block-aware.md',
317 frontmatter: {},
318 body: [
319 'Title',
320 '=====',
321 '',
322 '```',
323 '## Not a child',
324 '```',
325 '',
326 'Subtitle',
327 '--------',
328 ].join('\n'),
329 });
330
331 assert.deepEqual(tree.root.children, [
332 {
333 id: 'h1-title-0001',
334 level: 1,
335 text: 'Title',
336 children: [
337 {
338 id: 'h2-subtitle-0002',
339 level: 2,
340 text: 'Subtitle',
341 children: [],
342 },
343 ],
344 },
345 ]);
346 });
347
348 it('security: Markdown integration stays body-free and rejects unsafe paths', () => {
349 const tree = buildDocumentTreeFromMarkdown(
350 'private/secret.md',
351 '# Visible\n\nSensitive body text must not appear.'
352 );
353 const serialized = JSON.stringify(tree);
354
355 assert.equal(Object.hasOwn(tree, 'body'), false);
356 assert.equal(Object.hasOwn(tree, 'frontmatter'), false);
357 assert.equal(serialized.includes('Sensitive body text'), false);
358
359 assert.throws(
360 () => buildDocumentTreeFromMarkdown('../secret.md', '# Secret'),
361 /escape vault/
362 );
363 });
364
365 it('stress: Markdown integration preserves truncation from NoteOutline caps', () => {
366 const tree = buildDocumentTree(
367 {
368 path: 'notes/many-markdown.md',
369 frontmatter: {},
370 body: Array.from({ length: 10 }, (_, index) => `## Heading ${index + 1}`).join('\n\n'),
371 },
372 { maxHeadings: 3 }
373 );
374
375 assert.equal(tree.truncated, true);
376 assert.deepEqual(
377 tree.root.children.map((node) => node.text),
378 ['Heading 1', 'Heading 2', 'Heading 3']
379 );
380 });
381 });
File History 2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor 2 days ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 2 days ago