section-source.mjs
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠ breaking
1 day ago
| 1 | /** |
| 2 | * Body-free section source metadata for one Markdown note. |
| 3 | * |
| 4 | * This module is intentionally pure: no file reads, writes, CLI, MCP, hosted |
| 5 | * transport, Hub, search, indexes, vectors, memory, summaries, PageIndex, OCR, |
| 6 | * LLM calls, provider routing, snippets, or section body output. |
| 7 | */ |
| 8 | |
| 9 | import { unified } from 'unified'; |
| 10 | import remarkParse from 'remark-parse'; |
| 11 | import { buildDocumentTreeFromOutline } from './document-tree.mjs'; |
| 12 | import { buildNoteOutline, buildNoteOutlineFromMarkdown } from './note-outline.mjs'; |
| 13 | import { normalizeSlug, parseFrontmatterAndBody } from './vault.mjs'; |
| 14 | |
| 15 | export const SECTION_SOURCE_SCHEMA = 'knowtation.section_source/v0'; |
| 16 | |
| 17 | const parser = unified().use(remarkParse); |
| 18 | |
| 19 | /** |
| 20 | * Build body-free SectionSource metadata from a raw Markdown file string. |
| 21 | * @param {string} notePath |
| 22 | * @param {string} markdown |
| 23 | * @param {{ maxInputChars?: number, maxHeadings?: number }} [options] |
| 24 | * @returns {{ |
| 25 | * schema: string, |
| 26 | * path: string, |
| 27 | * title: string|null, |
| 28 | * sections: { |
| 29 | * section_id: string, |
| 30 | * heading_id: string, |
| 31 | * level: number, |
| 32 | * heading_path: string[], |
| 33 | * heading_text: string, |
| 34 | * child_section_ids: string[], |
| 35 | * body_available: boolean, |
| 36 | * body_returned: false, |
| 37 | * snippet_returned: false |
| 38 | * }[], |
| 39 | * truncated: boolean |
| 40 | * }} |
| 41 | */ |
| 42 | export function buildSectionSourceFromMarkdown(notePath, markdown, options = {}) { |
| 43 | if (typeof markdown !== 'string') { |
| 44 | throw new TypeError('buildSectionSourceFromMarkdown: markdown must be a string'); |
| 45 | } |
| 46 | const { frontmatter, body } = parseFrontmatterAndBody(markdown); |
| 47 | return buildSectionSource({ path: notePath, frontmatter, body }, options); |
| 48 | } |
| 49 | |
| 50 | /** |
| 51 | * Build body-free SectionSource metadata from a parsed vault note. |
| 52 | * @param {{ path: string, frontmatter?: Record<string, unknown>, body: string }} note |
| 53 | * @param {{ maxInputChars?: number, maxHeadings?: number }} [options] |
| 54 | * @returns {{ |
| 55 | * schema: string, |
| 56 | * path: string, |
| 57 | * title: string|null, |
| 58 | * sections: { |
| 59 | * section_id: string, |
| 60 | * heading_id: string, |
| 61 | * level: number, |
| 62 | * heading_path: string[], |
| 63 | * heading_text: string, |
| 64 | * child_section_ids: string[], |
| 65 | * body_available: boolean, |
| 66 | * body_returned: false, |
| 67 | * snippet_returned: false |
| 68 | * }[], |
| 69 | * truncated: boolean |
| 70 | * }} |
| 71 | */ |
| 72 | export function buildSectionSource(note, options = {}) { |
| 73 | if (note == null || typeof note !== 'object') { |
| 74 | throw new TypeError('buildSectionSource: note is required'); |
| 75 | } |
| 76 | if (typeof note.body !== 'string') { |
| 77 | throw new TypeError('buildSectionSource: note.body must be a string'); |
| 78 | } |
| 79 | |
| 80 | const outline = buildNoteOutline(note, options); |
| 81 | return buildSectionSourceFromOutlineAndBody(outline, note.body, options); |
| 82 | } |
| 83 | |
| 84 | /** |
| 85 | * Build SectionSource metadata from a NoteOutline-compatible object plus the |
| 86 | * current Markdown body. The body is parsed only to derive body availability and |
| 87 | * is never returned. |
| 88 | * @param {{ path: string, title: string|null, headings: { level: number, text: string, id: string }[], truncated: boolean }} outline |
| 89 | * @param {string} markdownBody |
| 90 | * @param {{ maxHeadings?: number }} [options] |
| 91 | * @returns {{ |
| 92 | * schema: string, |
| 93 | * path: string, |
| 94 | * title: string|null, |
| 95 | * sections: { |
| 96 | * section_id: string, |
| 97 | * heading_id: string, |
| 98 | * level: number, |
| 99 | * heading_path: string[], |
| 100 | * heading_text: string, |
| 101 | * child_section_ids: string[], |
| 102 | * body_available: boolean, |
| 103 | * body_returned: false, |
| 104 | * snippet_returned: false |
| 105 | * }[], |
| 106 | * truncated: boolean |
| 107 | * }} |
| 108 | */ |
| 109 | export function buildSectionSourceFromOutlineAndBody(outline, markdownBody, options = {}) { |
| 110 | if (typeof markdownBody !== 'string') { |
| 111 | throw new TypeError('buildSectionSourceFromOutlineAndBody: markdownBody must be a string'); |
| 112 | } |
| 113 | |
| 114 | const tree = buildDocumentTreeFromOutline(outline, options); |
| 115 | const bodyAvailability = bodyAvailabilityByHeadingId(markdownBody, tree.root.children); |
| 116 | const sectionIds = new Map(); |
| 117 | const pathSlug = normalizeSlug(tree.path) || 'note'; |
| 118 | |
| 119 | assignSectionIds(tree.root.children, pathSlug, sectionIds); |
| 120 | |
| 121 | return { |
| 122 | schema: SECTION_SOURCE_SCHEMA, |
| 123 | path: tree.path, |
| 124 | title: tree.title, |
| 125 | sections: flattenSections(tree.root.children, [], sectionIds, bodyAvailability), |
| 126 | truncated: tree.truncated, |
| 127 | }; |
| 128 | } |
| 129 | |
| 130 | /** |
| 131 | * @param {{ id: string, children: unknown[] }[]} nodes |
| 132 | * @param {string} pathSlug |
| 133 | * @param {Map<string, string>} sectionIds |
| 134 | */ |
| 135 | function assignSectionIds(nodes, pathSlug, sectionIds) { |
| 136 | for (const node of nodes) { |
| 137 | const sectionId = `${pathSlug}:${node.id}`; |
| 138 | sectionIds.set(node.id, sectionId); |
| 139 | assignSectionIds(node.children, pathSlug, sectionIds); |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | /** |
| 144 | * @param {{ id: string, level: number, text: string, children: unknown[] }[]} nodes |
| 145 | * @param {string[]} ancestors |
| 146 | * @param {Map<string, string>} sectionIds |
| 147 | * @param {Map<string, boolean>} bodyAvailability |
| 148 | * @returns {{ |
| 149 | * section_id: string, |
| 150 | * heading_id: string, |
| 151 | * level: number, |
| 152 | * heading_path: string[], |
| 153 | * heading_text: string, |
| 154 | * child_section_ids: string[], |
| 155 | * body_available: boolean, |
| 156 | * body_returned: false, |
| 157 | * snippet_returned: false |
| 158 | * }[]} |
| 159 | */ |
| 160 | function flattenSections(nodes, ancestors, sectionIds, bodyAvailability) { |
| 161 | const out = []; |
| 162 | for (const node of nodes) { |
| 163 | const headingPath = [...ancestors, node.text]; |
| 164 | out.push({ |
| 165 | section_id: sectionIds.get(node.id), |
| 166 | heading_id: node.id, |
| 167 | level: node.level, |
| 168 | heading_path: headingPath, |
| 169 | heading_text: node.text, |
| 170 | child_section_ids: node.children.map((child) => sectionIds.get(child.id)), |
| 171 | body_available: bodyAvailability.get(node.id) === true, |
| 172 | body_returned: false, |
| 173 | snippet_returned: false, |
| 174 | }); |
| 175 | out.push(...flattenSections(node.children, headingPath, sectionIds, bodyAvailability)); |
| 176 | } |
| 177 | return out; |
| 178 | } |
| 179 | |
| 180 | /** |
| 181 | * @param {string} markdownBody |
| 182 | * @param {{ id: string }[]} visibleTreeNodes |
| 183 | * @returns {Map<string, boolean>} |
| 184 | */ |
| 185 | function bodyAvailabilityByHeadingId(markdownBody, visibleTreeNodes) { |
| 186 | const visibleHeadingIds = flattenTreeHeadingIds(visibleTreeNodes); |
| 187 | const ast = parser.parse(markdownBody); |
| 188 | const headingNodes = []; |
| 189 | collectHeadingNodes(ast, headingNodes); |
| 190 | const topLevelBlocks = Array.isArray(ast.children) ? ast.children : []; |
| 191 | const out = new Map(); |
| 192 | |
| 193 | visibleHeadingIds.forEach((headingId, visibleIndex) => { |
| 194 | const headingNode = headingNodes[visibleIndex]; |
| 195 | if (!headingNode) { |
| 196 | out.set(headingId, false); |
| 197 | return; |
| 198 | } |
| 199 | out.set(headingId, hasBodyContentInBoundary(headingNode, headingNodes, visibleIndex, topLevelBlocks)); |
| 200 | }); |
| 201 | |
| 202 | return out; |
| 203 | } |
| 204 | |
| 205 | /** |
| 206 | * @param {{ id: string, children?: unknown[] }[]} nodes |
| 207 | * @returns {string[]} |
| 208 | */ |
| 209 | function flattenTreeHeadingIds(nodes) { |
| 210 | const out = []; |
| 211 | for (const node of nodes) { |
| 212 | out.push(node.id); |
| 213 | out.push(...flattenTreeHeadingIds(Array.isArray(node.children) ? node.children : [])); |
| 214 | } |
| 215 | return out; |
| 216 | } |
| 217 | |
| 218 | /** |
| 219 | * @param {unknown} node |
| 220 | * @param {unknown[]} out |
| 221 | */ |
| 222 | function collectHeadingNodes(node, out) { |
| 223 | if (node == null || typeof node !== 'object') return; |
| 224 | if (node.type === 'heading') { |
| 225 | out.push(node); |
| 226 | return; |
| 227 | } |
| 228 | if (Array.isArray(node.children)) { |
| 229 | for (const child of node.children) { |
| 230 | collectHeadingNodes(child, out); |
| 231 | } |
| 232 | } |
| 233 | } |
| 234 | |
| 235 | /** |
| 236 | * @param {unknown} headingNode |
| 237 | * @param {unknown[]} headingNodes |
| 238 | * @param {number} headingIndex |
| 239 | * @param {unknown[]} topLevelBlocks |
| 240 | * @returns {boolean} |
| 241 | */ |
| 242 | function hasBodyContentInBoundary(headingNode, headingNodes, headingIndex, topLevelBlocks) { |
| 243 | const startOffset = endOffset(headingNode); |
| 244 | const endBoundary = nextPeerOrAncestorStartOffset(headingNode, headingNodes, headingIndex); |
| 245 | |
| 246 | return topLevelBlocks.some((node) => { |
| 247 | if (node == null || typeof node !== 'object' || node.type === 'heading') return false; |
| 248 | const nodeStart = startOffsetOf(node); |
| 249 | if (nodeStart == null || nodeStart < startOffset || nodeStart >= endBoundary) return false; |
| 250 | return hasSubstantiveContent(node); |
| 251 | }); |
| 252 | } |
| 253 | |
| 254 | /** |
| 255 | * @param {unknown} headingNode |
| 256 | * @param {unknown[]} headingNodes |
| 257 | * @param {number} headingIndex |
| 258 | * @returns {number} |
| 259 | */ |
| 260 | function nextPeerOrAncestorStartOffset(headingNode, headingNodes, headingIndex) { |
| 261 | const currentDepth = Number(headingNode.depth); |
| 262 | for (let index = headingIndex + 1; index < headingNodes.length; index += 1) { |
| 263 | const candidate = headingNodes[index]; |
| 264 | if (Number(candidate.depth) <= currentDepth) { |
| 265 | const offset = startOffsetOf(candidate); |
| 266 | if (offset != null) return offset; |
| 267 | } |
| 268 | } |
| 269 | return Number.POSITIVE_INFINITY; |
| 270 | } |
| 271 | |
| 272 | /** |
| 273 | * @param {unknown} node |
| 274 | * @returns {boolean} |
| 275 | */ |
| 276 | function hasSubstantiveContent(node) { |
| 277 | if (node == null || typeof node !== 'object') return false; |
| 278 | if (typeof node.value === 'string' && node.value.trim() !== '') return true; |
| 279 | if (typeof node.alt === 'string' && node.alt.trim() !== '') return true; |
| 280 | if (Array.isArray(node.children)) { |
| 281 | return node.children.some((child) => hasSubstantiveContent(child)); |
| 282 | } |
| 283 | return node.type === 'thematicBreak'; |
| 284 | } |
| 285 | |
| 286 | /** |
| 287 | * @param {unknown} node |
| 288 | * @returns {number|null} |
| 289 | */ |
| 290 | function startOffsetOf(node) { |
| 291 | const offset = node?.position?.start?.offset; |
| 292 | return Number.isInteger(offset) ? offset : null; |
| 293 | } |
| 294 | |
| 295 | /** |
| 296 | * @param {unknown} node |
| 297 | * @returns {number} |
| 298 | */ |
| 299 | function endOffset(node) { |
| 300 | const offset = node?.position?.end?.offset; |
| 301 | return Number.isInteger(offset) ? offset : 0; |
| 302 | } |
File History
2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠
1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6
docs: accept Calendar Events v0 spec with Phase 0 security …
Human
2 days ago