vault.mjs
file-level
1
files
1
commits
0
hotspots
0
🧊 dead
0
💥 blast risk
| 1 | /** |
| 2 | * Vault utilities: list Markdown files, parse frontmatter + body, normalize project/tags. SPEC §1-2. |
| 3 | */ |
| 4 | |
| 5 | import fs from 'fs'; |
| 6 | import path from 'path'; |
| 7 | import yaml from 'js-yaml'; |
| 8 | |
| 9 | /** |
| 10 | * Normalize project slug or tag: lowercase, a-z0-9 and hyphen only, no leading/trailing hyphen. SPEC §1. |
| 11 | * @param {string} s |
| 12 | * @returns {string} |
| 13 | */ |
| 14 | export function normalizeSlug(s) { |
| 15 | if (typeof s !== 'string') return ''; |
| 16 | return s |
| 17 | .toLowerCase() |
| 18 | .replace(/[^a-z0-9-]/g, '-') |
| 19 | .replace(/-+/g, '-') |
| 20 | .replace(/^-|-$/g, ''); |
| 21 | } |
| 22 | |
| 23 | /** |
| 24 | * Normalize tags: accept string (comma-sep) or array; return array of normalized strings. |
| 25 | * @param {string|string[]} tags |
| 26 | * @returns {string[]} |
| 27 | */ |
| 28 | export function normalizeTags(tags) { |
| 29 | if (tags == null) return []; |
| 30 | const arr = Array.isArray(tags) ? tags : String(tags).split(',').map((t) => t.trim()); |
| 31 | return arr.map(normalizeSlug).filter(Boolean); |
| 32 | } |
| 33 | |
| 34 | export const METADATA_FACETS_SCHEMA = 'knowtation.metadata_facets/v0'; |
| 35 | export const METADATA_FACET_MAX_VALUES = 100; |
| 36 | export const METADATA_FACET_MAX_VALUE_CHARS = 256; |
| 37 | |
| 38 | /** |
| 39 | * Normalize parsed note frontmatter into the body-free MetadataFacets v0 shape. |
| 40 | * This is intentionally pure: no file reads, no writes, no index updates, and no |
| 41 | * body/frontmatter echoing. |
| 42 | * @param {string} relativePath - Vault-relative note path. |
| 43 | * @param {object} [frontmatter] - Parsed YAML/JSON frontmatter for the note. |
| 44 | * @returns {{ |
| 45 | * schema: string, |
| 46 | * path: string, |
| 47 | * facets: { |
| 48 | * project: string|null, |
| 49 | * tags: string[], |
| 50 | * date: string|null, |
| 51 | * updated: string|null, |
| 52 | * causal_chain_id: string|null, |
| 53 | * entity: string[], |
| 54 | * episode_id: string|null, |
| 55 | * }, |
| 56 | * inferred: { |
| 57 | * folder: string|null, |
| 58 | * source_type: null, |
| 59 | * }, |
| 60 | * truncated: boolean, |
| 61 | * }} |
| 62 | */ |
| 63 | export function normalizeMetadataFacets(relativePath, frontmatter = {}) { |
| 64 | const safePath = normalizeFacetRelativePath(relativePath); |
| 65 | const fm = frontmatter && typeof frontmatter === 'object' && !Array.isArray(frontmatter) ? frontmatter : {}; |
| 66 | let truncated = false; |
| 67 | |
| 68 | const capScalar = (value) => { |
| 69 | const capped = capFacetValue(String(value)); |
| 70 | if (capped.truncated) truncated = true; |
| 71 | return capped.value; |
| 72 | }; |
| 73 | const capSlug = (value) => { |
| 74 | const capped = capFacetSlug(String(value)); |
| 75 | if (capped.truncated) truncated = true; |
| 76 | return capped.value || null; |
| 77 | }; |
| 78 | const capSlugArray = (value) => { |
| 79 | const capped = normalizeFacetSlugArray(value); |
| 80 | if (capped.truncated) truncated = true; |
| 81 | return capped.values; |
| 82 | }; |
| 83 | |
| 84 | const projectRaw = fm.project != null ? normalizeSlug(String(fm.project)) : effectiveProjectSlug(safePath, fm); |
| 85 | const project = projectRaw ? capSlug(projectRaw) : null; |
| 86 | const tags = capSlugArray(fm.tags); |
| 87 | const date = fm.date != null ? capScalar(dateFacetString(fm.date)) : null; |
| 88 | const updated = fm.updated != null ? capScalar(dateFacetString(fm.updated)) : null; |
| 89 | const causalChainId = fm.causal_chain_id != null ? capSlug(fm.causal_chain_id) : null; |
| 90 | const entity = fm.entity != null ? capSlugArray(fm.entity) : []; |
| 91 | const episodeId = fm.episode_id != null ? capSlug(fm.episode_id) : null; |
| 92 | const folder = inferredFolderForFacetPath(safePath); |
| 93 | |
| 94 | return { |
| 95 | schema: METADATA_FACETS_SCHEMA, |
| 96 | path: safePath, |
| 97 | facets: { |
| 98 | project, |
| 99 | tags, |
| 100 | date, |
| 101 | updated, |
| 102 | causal_chain_id: causalChainId, |
| 103 | entity, |
| 104 | episode_id: episodeId, |
| 105 | }, |
| 106 | inferred: { |
| 107 | folder, |
| 108 | source_type: null, |
| 109 | }, |
| 110 | truncated, |
| 111 | }; |
| 112 | } |
| 113 | |
| 114 | /** |
| 115 | * @param {unknown} value |
| 116 | * @returns {string} |
| 117 | */ |
| 118 | function dateFacetString(value) { |
| 119 | return value instanceof Date ? value.toISOString() : String(value); |
| 120 | } |
| 121 | |
| 122 | /** |
| 123 | * @param {string} relativePath |
| 124 | * @returns {string} |
| 125 | */ |
| 126 | function normalizeFacetRelativePath(relativePath) { |
| 127 | if (typeof relativePath !== 'string' || relativePath.trim() === '') { |
| 128 | throw new Error('Invalid path: path must be a non-empty vault-relative string'); |
| 129 | } |
| 130 | if (relativePath.includes('\0')) { |
| 131 | throw new Error('Invalid path: path contains a null byte'); |
| 132 | } |
| 133 | const forwardPath = relativePath.replace(/\\/g, '/'); |
| 134 | if ( |
| 135 | path.isAbsolute(relativePath) || |
| 136 | path.posix.isAbsolute(forwardPath) || |
| 137 | /^[a-zA-Z]:\//.test(forwardPath) || |
| 138 | forwardPath.startsWith('//') |
| 139 | ) { |
| 140 | throw new Error('Invalid path: path must be vault-relative'); |
| 141 | } |
| 142 | const normalized = path.posix.normalize(forwardPath); |
| 143 | if (normalized === '.' || normalized === '..' || normalized.startsWith('../')) { |
| 144 | throw new Error('Invalid path: path must be vault-relative and cannot escape vault'); |
| 145 | } |
| 146 | return normalized; |
| 147 | } |
| 148 | |
| 149 | /** |
| 150 | * @param {string} relativePath |
| 151 | * @returns {string|null} |
| 152 | */ |
| 153 | function inferredFolderForFacetPath(relativePath) { |
| 154 | const folder = path.posix.dirname(relativePath); |
| 155 | return folder === '.' ? null : folder; |
| 156 | } |
| 157 | |
| 158 | /** |
| 159 | * @param {string} value |
| 160 | * @returns {{ value: string, truncated: boolean }} |
| 161 | */ |
| 162 | function capFacetValue(value) { |
| 163 | if (value.length <= METADATA_FACET_MAX_VALUE_CHARS) { |
| 164 | return { value, truncated: false }; |
| 165 | } |
| 166 | return { value: value.slice(0, METADATA_FACET_MAX_VALUE_CHARS), truncated: true }; |
| 167 | } |
| 168 | |
| 169 | /** |
| 170 | * @param {string} value |
| 171 | * @returns {{ value: string, truncated: boolean }} |
| 172 | */ |
| 173 | function capFacetSlug(value) { |
| 174 | const slug = normalizeSlug(value); |
| 175 | const capped = capFacetValue(slug); |
| 176 | return { |
| 177 | value: capped.value.replace(/-+$/g, ''), |
| 178 | truncated: capped.truncated, |
| 179 | }; |
| 180 | } |
| 181 | |
| 182 | /** |
| 183 | * @param {unknown} value |
| 184 | * @returns {{ values: string[], truncated: boolean }} |
| 185 | */ |
| 186 | function normalizeFacetSlugArray(value) { |
| 187 | if (value == null) { |
| 188 | return { values: [], truncated: false }; |
| 189 | } |
| 190 | const rawValues = Array.isArray(value) ? value : String(value).split(',').map((item) => item.trim()); |
| 191 | const values = []; |
| 192 | let truncated = rawValues.length > METADATA_FACET_MAX_VALUES; |
| 193 | for (const raw of rawValues.slice(0, METADATA_FACET_MAX_VALUES)) { |
| 194 | const capped = capFacetSlug(String(raw)); |
| 195 | if (capped.truncated) truncated = true; |
| 196 | if (capped.value) values.push(capped.value); |
| 197 | } |
| 198 | return { values, truncated }; |
| 199 | } |
| 200 | |
| 201 | /** |
| 202 | * Parse frontmatter and body from Markdown content. Returns { frontmatter, body }. |
| 203 | * @param {string} content |
| 204 | * @returns {{ frontmatter: object, body: string }} |
| 205 | */ |
| 206 | export function parseFrontmatterAndBody(content) { |
| 207 | const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/); |
| 208 | if (!match) { |
| 209 | return { frontmatter: {}, body: content.trimEnd() }; |
| 210 | } |
| 211 | let frontmatter = {}; |
| 212 | try { |
| 213 | frontmatter = yaml.load(match[1]) || {}; |
| 214 | } catch (_) { |
| 215 | // invalid YAML → treat as empty |
| 216 | } |
| 217 | return { frontmatter, body: match[2].trimEnd() }; |
| 218 | } |
| 219 | |
| 220 | /** |
| 221 | * List all .md files under vault root, with vault-relative paths. Respects ignore list (folder names). |
| 222 | * @param {string} vaultPath - Absolute path to vault root |
| 223 | * @param {{ ignore?: string[] }} options - Folder names to skip (e.g. templates, meta) |
| 224 | * @returns {string[]} Vault-relative paths (forward slashes) |
| 225 | */ |
| 226 | export function listMarkdownFiles(vaultPath, options = {}) { |
| 227 | const ignore = new Set((options.ignore || []).map((p) => p.toLowerCase())); |
| 228 | const out = []; |
| 229 | |
| 230 | function walk(dir, relDir = '') { |
| 231 | const entries = fs.readdirSync(dir, { withFileTypes: true }); |
| 232 | for (const e of entries) { |
| 233 | const rel = relDir ? `${relDir}/${e.name}` : e.name; |
| 234 | if (e.isDirectory()) { |
| 235 | if (ignore.has(e.name.toLowerCase())) continue; |
| 236 | walk(path.join(dir, e.name), rel); |
| 237 | } else if (e.isFile() && e.name.endsWith('.md')) { |
| 238 | out.push(rel.replace(/\\/g, '/')); |
| 239 | } |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | walk(vaultPath); |
| 244 | return out; |
| 245 | } |
| 246 | |
| 247 | /** |
| 248 | * Read one note from vault. Path must be vault-relative; validated to not escape vault. |
| 249 | * @param {string} vaultPath - Absolute path to vault root |
| 250 | * @param {string} relativePath - Vault-relative path (e.g. inbox/foo.md) |
| 251 | * @returns {{ path: string, frontmatter: object, body: string, project?: string, tags?: string[], date?: string, updated?: string, causal_chain_id?: string, entity?: string[], episode_id?: string }} |
| 252 | * @throws if path escapes vault or file not found |
| 253 | */ |
| 254 | export function readNote(vaultPath, relativePath) { |
| 255 | const safe = resolveVaultRelativePath(vaultPath, relativePath); |
| 256 | const fullPath = resolveExistingVaultFilePath(vaultPath, safe, relativePath); |
| 257 | const content = fs.readFileSync(fullPath, 'utf8'); |
| 258 | const { frontmatter, body } = parseFrontmatterAndBody(content); |
| 259 | |
| 260 | const project = effectiveProjectSlug(safe, frontmatter); |
| 261 | const tags = normalizeTags(frontmatter.tags); |
| 262 | const date = |
| 263 | frontmatter.date != null |
| 264 | ? frontmatter.date instanceof Date |
| 265 | ? frontmatter.date.toISOString() |
| 266 | : String(frontmatter.date) |
| 267 | : undefined; |
| 268 | const updated = |
| 269 | frontmatter.updated != null |
| 270 | ? frontmatter.updated instanceof Date |
| 271 | ? frontmatter.updated.toISOString() |
| 272 | : String(frontmatter.updated) |
| 273 | : undefined; |
| 274 | const causal_chain_id = |
| 275 | frontmatter.causal_chain_id != null ? normalizeSlug(String(frontmatter.causal_chain_id)) : undefined; |
| 276 | const entityRaw = frontmatter.entity; |
| 277 | const entity = |
| 278 | entityRaw != null |
| 279 | ? (Array.isArray(entityRaw) ? entityRaw : [entityRaw]).map((e) => normalizeSlug(String(e))).filter(Boolean) |
| 280 | : undefined; |
| 281 | const episode_id = |
| 282 | frontmatter.episode_id != null ? normalizeSlug(String(frontmatter.episode_id)) : undefined; |
| 283 | |
| 284 | return { |
| 285 | path: safe.replace(/\\/g, '/'), |
| 286 | frontmatter, |
| 287 | body, |
| 288 | project, |
| 289 | tags, |
| 290 | date, |
| 291 | updated, |
| 292 | causal_chain_id, |
| 293 | entity, |
| 294 | episode_id, |
| 295 | }; |
| 296 | } |
| 297 | |
| 298 | /** |
| 299 | * Ensure path is vault-relative and does not escape (no ..). Returns normalized relative path. |
| 300 | * @param {string} vaultPath - Absolute vault root |
| 301 | * @param {string} relativePath - User-provided path |
| 302 | * @returns {string} Safe vault-relative path |
| 303 | * @throws if path escapes vault |
| 304 | */ |
| 305 | export function resolveVaultRelativePath(vaultPath, relativePath) { |
| 306 | const normalized = path.normalize(relativePath).replace(/\\/g, '/'); |
| 307 | if (normalized.startsWith('..') || path.isAbsolute(relativePath)) { |
| 308 | throw new Error(`Invalid path: path must be vault-relative and cannot escape vault (${relativePath})`); |
| 309 | } |
| 310 | const full = path.resolve(vaultPath, normalized); |
| 311 | if (!full.startsWith(path.resolve(vaultPath))) { |
| 312 | throw new Error(`Invalid path: path escapes vault (${relativePath})`); |
| 313 | } |
| 314 | return path.relative(vaultPath, full).replace(/\\/g, '/'); |
| 315 | } |
| 316 | |
| 317 | /** |
| 318 | * True if relativePath is vault-safe and resolves to an existing regular file under vaultPath. |
| 319 | * Used by the Hub to drop semantic-search hits that are not on disk for the active vault. |
| 320 | * @param {string} vaultPath - Absolute vault root |
| 321 | * @param {string} relativePath - Vault-relative path |
| 322 | * @returns {boolean} |
| 323 | */ |
| 324 | export function noteFileExistsInVault(vaultPath, relativePath) { |
| 325 | if (relativePath == null || typeof relativePath !== 'string' || !relativePath.trim()) return false; |
| 326 | try { |
| 327 | const safe = resolveVaultRelativePath(vaultPath, relativePath); |
| 328 | resolveExistingVaultFilePath(vaultPath, safe, relativePath); |
| 329 | return true; |
| 330 | } catch { |
| 331 | return false; |
| 332 | } |
| 333 | } |
| 334 | |
| 335 | /** |
| 336 | * Resolve an existing note path and prove the real target stays inside the vault root. |
| 337 | * This prevents symlinks inside the vault from exposing files outside the vault. |
| 338 | * @param {string} vaultPath |
| 339 | * @param {string} safeRelativePath |
| 340 | * @param {string} originalRelativePath |
| 341 | * @returns {string} |
| 342 | */ |
| 343 | function resolveExistingVaultFilePath(vaultPath, safeRelativePath, originalRelativePath) { |
| 344 | const fullPath = path.join(vaultPath, safeRelativePath); |
| 345 | if (!fs.existsSync(fullPath)) { |
| 346 | throw new Error(`Note not found: ${originalRelativePath}`); |
| 347 | } |
| 348 | |
| 349 | const vaultReal = fs.realpathSync(vaultPath); |
| 350 | const fileReal = fs.realpathSync(fullPath); |
| 351 | if (!isPathInside(vaultReal, fileReal)) { |
| 352 | throw new Error(`Invalid path: path escapes vault (${originalRelativePath})`); |
| 353 | } |
| 354 | if (!fs.statSync(fileReal).isFile()) { |
| 355 | throw new Error(`Note not found: ${originalRelativePath}`); |
| 356 | } |
| 357 | return fileReal; |
| 358 | } |
| 359 | |
| 360 | /** |
| 361 | * @param {string} parentPath |
| 362 | * @param {string} childPath |
| 363 | * @returns {boolean} |
| 364 | */ |
| 365 | function isPathInside(parentPath, childPath) { |
| 366 | const parent = path.resolve(parentPath); |
| 367 | const child = path.resolve(childPath); |
| 368 | return child === parent || child.startsWith(`${parent}${path.sep}`); |
| 369 | } |
| 370 | |
| 371 | /** |
| 372 | * Top-level directories under the vault (non-hidden) plus each immediate child of `projects/` |
| 373 | * as `projects/<name>`. Used by Hub “New note” folder picker; includes empty dirs. |
| 374 | * `inbox` is always listed first when present on disk; if missing, it is still prepended so the UI can default there. |
| 375 | * @param {string} vaultPath - Absolute vault root |
| 376 | * @returns {string[]} Vault-relative folder prefixes (no trailing slash) |
| 377 | */ |
| 378 | export function listVaultFolderOptions(vaultPath) { |
| 379 | const out = new Set(); |
| 380 | if (!vaultPath || typeof vaultPath !== 'string' || !fs.existsSync(vaultPath)) { |
| 381 | return ['inbox']; |
| 382 | } |
| 383 | let dirents; |
| 384 | try { |
| 385 | dirents = fs.readdirSync(vaultPath, { withFileTypes: true }); |
| 386 | } catch { |
| 387 | return ['inbox']; |
| 388 | } |
| 389 | for (const d of dirents) { |
| 390 | if (!d.isDirectory()) continue; |
| 391 | if (d.name.startsWith('.')) continue; |
| 392 | out.add(d.name.replace(/\\/g, '/')); |
| 393 | } |
| 394 | if (out.has('projects')) { |
| 395 | const projectsRoot = path.join(vaultPath, 'projects'); |
| 396 | try { |
| 397 | const subs = fs.readdirSync(projectsRoot, { withFileTypes: true }); |
| 398 | for (const s of subs) { |
| 399 | if (!s.isDirectory() || s.name.startsWith('.')) continue; |
| 400 | out.add('projects/' + s.name.replace(/\\/g, '/')); |
| 401 | } |
| 402 | } catch { |
| 403 | /* ignore */ |
| 404 | } |
| 405 | } |
| 406 | const rest = [...out].filter((x) => x !== 'inbox').sort((a, b) => a.localeCompare(b)); |
| 407 | return ['inbox', ...rest]; |
| 408 | } |
| 409 | |
| 410 | function inferProjectFromPath(relPath) { |
| 411 | const m = String(relPath).replace(/\\/g, '/').match(/^projects\/([^/]+)/); |
| 412 | return m ? normalizeSlug(m[1]) : undefined; |
| 413 | } |
| 414 | |
| 415 | /** |
| 416 | * Effective project slug for list/search/bulk ops: normalized `frontmatter.project` when set, else `projects/<slug>/` path inference. |
| 417 | * Matches readNote / GET ?project= (see SPEC, docs/HUB-METADATA-BULK-OPS.md). |
| 418 | * @param {string} relativePath - vault-relative path |
| 419 | * @param {object} [frontmatter] |
| 420 | * @returns {string|undefined} |
| 421 | */ |
| 422 | export function effectiveProjectSlug(relativePath, frontmatter) { |
| 423 | const safe = String(relativePath ?? '').replace(/\\/g, '/'); |
| 424 | const fm = frontmatter && typeof frontmatter === 'object' && !Array.isArray(frontmatter) ? frontmatter : {}; |
| 425 | if (fm.project != null) { |
| 426 | return normalizeSlug(String(fm.project)); |
| 427 | } |
| 428 | return inferProjectFromPath(safe); |
| 429 | } |