vault.mjs file-level

at main · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 💥 blast risk
sha256:4 fix(security): pin patched transitive deps to clear Dependabot moderate… · aaronrene · Jun 11, 2026
1 /**
2 * Vault utilities: list Markdown files, parse frontmatter + body, normalize project/tags. SPEC §1-2.
3 */
4
5 import fs from 'fs';
6 import path from 'path';
7 import yaml from 'js-yaml';
8
9 /**
10 * Normalize project slug or tag: lowercase, a-z0-9 and hyphen only, no leading/trailing hyphen. SPEC §1.
11 * @param {string} s
12 * @returns {string}
13 */
14 export function normalizeSlug(s) {
15 if (typeof s !== 'string') return '';
16 return s
17 .toLowerCase()
18 .replace(/[^a-z0-9-]/g, '-')
19 .replace(/-+/g, '-')
20 .replace(/^-|-$/g, '');
21 }
22
23 /**
24 * Normalize tags: accept string (comma-sep) or array; return array of normalized strings.
25 * @param {string|string[]} tags
26 * @returns {string[]}
27 */
28 export function normalizeTags(tags) {
29 if (tags == null) return [];
30 const arr = Array.isArray(tags) ? tags : String(tags).split(',').map((t) => t.trim());
31 return arr.map(normalizeSlug).filter(Boolean);
32 }
33
34 export const METADATA_FACETS_SCHEMA = 'knowtation.metadata_facets/v0';
35 export const METADATA_FACET_MAX_VALUES = 100;
36 export const METADATA_FACET_MAX_VALUE_CHARS = 256;
37
38 /**
39 * Normalize parsed note frontmatter into the body-free MetadataFacets v0 shape.
40 * This is intentionally pure: no file reads, no writes, no index updates, and no
41 * body/frontmatter echoing.
42 * @param {string} relativePath - Vault-relative note path.
43 * @param {object} [frontmatter] - Parsed YAML/JSON frontmatter for the note.
44 * @returns {{
45 * schema: string,
46 * path: string,
47 * facets: {
48 * project: string|null,
49 * tags: string[],
50 * date: string|null,
51 * updated: string|null,
52 * causal_chain_id: string|null,
53 * entity: string[],
54 * episode_id: string|null,
55 * },
56 * inferred: {
57 * folder: string|null,
58 * source_type: null,
59 * },
60 * truncated: boolean,
61 * }}
62 */
63 export function normalizeMetadataFacets(relativePath, frontmatter = {}) {
64 const safePath = normalizeFacetRelativePath(relativePath);
65 const fm = frontmatter && typeof frontmatter === 'object' && !Array.isArray(frontmatter) ? frontmatter : {};
66 let truncated = false;
67
68 const capScalar = (value) => {
69 const capped = capFacetValue(String(value));
70 if (capped.truncated) truncated = true;
71 return capped.value;
72 };
73 const capSlug = (value) => {
74 const capped = capFacetSlug(String(value));
75 if (capped.truncated) truncated = true;
76 return capped.value || null;
77 };
78 const capSlugArray = (value) => {
79 const capped = normalizeFacetSlugArray(value);
80 if (capped.truncated) truncated = true;
81 return capped.values;
82 };
83
84 const projectRaw = fm.project != null ? normalizeSlug(String(fm.project)) : effectiveProjectSlug(safePath, fm);
85 const project = projectRaw ? capSlug(projectRaw) : null;
86 const tags = capSlugArray(fm.tags);
87 const date = fm.date != null ? capScalar(dateFacetString(fm.date)) : null;
88 const updated = fm.updated != null ? capScalar(dateFacetString(fm.updated)) : null;
89 const causalChainId = fm.causal_chain_id != null ? capSlug(fm.causal_chain_id) : null;
90 const entity = fm.entity != null ? capSlugArray(fm.entity) : [];
91 const episodeId = fm.episode_id != null ? capSlug(fm.episode_id) : null;
92 const folder = inferredFolderForFacetPath(safePath);
93
94 return {
95 schema: METADATA_FACETS_SCHEMA,
96 path: safePath,
97 facets: {
98 project,
99 tags,
100 date,
101 updated,
102 causal_chain_id: causalChainId,
103 entity,
104 episode_id: episodeId,
105 },
106 inferred: {
107 folder,
108 source_type: null,
109 },
110 truncated,
111 };
112 }
113
114 /**
115 * @param {unknown} value
116 * @returns {string}
117 */
118 function dateFacetString(value) {
119 return value instanceof Date ? value.toISOString() : String(value);
120 }
121
122 /**
123 * @param {string} relativePath
124 * @returns {string}
125 */
126 function normalizeFacetRelativePath(relativePath) {
127 if (typeof relativePath !== 'string' || relativePath.trim() === '') {
128 throw new Error('Invalid path: path must be a non-empty vault-relative string');
129 }
130 if (relativePath.includes('\0')) {
131 throw new Error('Invalid path: path contains a null byte');
132 }
133 const forwardPath = relativePath.replace(/\\/g, '/');
134 if (
135 path.isAbsolute(relativePath) ||
136 path.posix.isAbsolute(forwardPath) ||
137 /^[a-zA-Z]:\//.test(forwardPath) ||
138 forwardPath.startsWith('//')
139 ) {
140 throw new Error('Invalid path: path must be vault-relative');
141 }
142 const normalized = path.posix.normalize(forwardPath);
143 if (normalized === '.' || normalized === '..' || normalized.startsWith('../')) {
144 throw new Error('Invalid path: path must be vault-relative and cannot escape vault');
145 }
146 return normalized;
147 }
148
149 /**
150 * @param {string} relativePath
151 * @returns {string|null}
152 */
153 function inferredFolderForFacetPath(relativePath) {
154 const folder = path.posix.dirname(relativePath);
155 return folder === '.' ? null : folder;
156 }
157
158 /**
159 * @param {string} value
160 * @returns {{ value: string, truncated: boolean }}
161 */
162 function capFacetValue(value) {
163 if (value.length <= METADATA_FACET_MAX_VALUE_CHARS) {
164 return { value, truncated: false };
165 }
166 return { value: value.slice(0, METADATA_FACET_MAX_VALUE_CHARS), truncated: true };
167 }
168
169 /**
170 * @param {string} value
171 * @returns {{ value: string, truncated: boolean }}
172 */
173 function capFacetSlug(value) {
174 const slug = normalizeSlug(value);
175 const capped = capFacetValue(slug);
176 return {
177 value: capped.value.replace(/-+$/g, ''),
178 truncated: capped.truncated,
179 };
180 }
181
182 /**
183 * @param {unknown} value
184 * @returns {{ values: string[], truncated: boolean }}
185 */
186 function normalizeFacetSlugArray(value) {
187 if (value == null) {
188 return { values: [], truncated: false };
189 }
190 const rawValues = Array.isArray(value) ? value : String(value).split(',').map((item) => item.trim());
191 const values = [];
192 let truncated = rawValues.length > METADATA_FACET_MAX_VALUES;
193 for (const raw of rawValues.slice(0, METADATA_FACET_MAX_VALUES)) {
194 const capped = capFacetSlug(String(raw));
195 if (capped.truncated) truncated = true;
196 if (capped.value) values.push(capped.value);
197 }
198 return { values, truncated };
199 }
200
201 /**
202 * Parse frontmatter and body from Markdown content. Returns { frontmatter, body }.
203 * @param {string} content
204 * @returns {{ frontmatter: object, body: string }}
205 */
206 export function parseFrontmatterAndBody(content) {
207 const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/);
208 if (!match) {
209 return { frontmatter: {}, body: content.trimEnd() };
210 }
211 let frontmatter = {};
212 try {
213 frontmatter = yaml.load(match[1]) || {};
214 } catch (_) {
215 // invalid YAML → treat as empty
216 }
217 return { frontmatter, body: match[2].trimEnd() };
218 }
219
220 /**
221 * List all .md files under vault root, with vault-relative paths. Respects ignore list (folder names).
222 * @param {string} vaultPath - Absolute path to vault root
223 * @param {{ ignore?: string[] }} options - Folder names to skip (e.g. templates, meta)
224 * @returns {string[]} Vault-relative paths (forward slashes)
225 */
226 export function listMarkdownFiles(vaultPath, options = {}) {
227 const ignore = new Set((options.ignore || []).map((p) => p.toLowerCase()));
228 const out = [];
229
230 function walk(dir, relDir = '') {
231 const entries = fs.readdirSync(dir, { withFileTypes: true });
232 for (const e of entries) {
233 const rel = relDir ? `${relDir}/${e.name}` : e.name;
234 if (e.isDirectory()) {
235 if (ignore.has(e.name.toLowerCase())) continue;
236 walk(path.join(dir, e.name), rel);
237 } else if (e.isFile() && e.name.endsWith('.md')) {
238 out.push(rel.replace(/\\/g, '/'));
239 }
240 }
241 }
242
243 walk(vaultPath);
244 return out;
245 }
246
247 /**
248 * Read one note from vault. Path must be vault-relative; validated to not escape vault.
249 * @param {string} vaultPath - Absolute path to vault root
250 * @param {string} relativePath - Vault-relative path (e.g. inbox/foo.md)
251 * @returns {{ path: string, frontmatter: object, body: string, project?: string, tags?: string[], date?: string, updated?: string, causal_chain_id?: string, entity?: string[], episode_id?: string }}
252 * @throws if path escapes vault or file not found
253 */
254 export function readNote(vaultPath, relativePath) {
255 const safe = resolveVaultRelativePath(vaultPath, relativePath);
256 const fullPath = resolveExistingVaultFilePath(vaultPath, safe, relativePath);
257 const content = fs.readFileSync(fullPath, 'utf8');
258 const { frontmatter, body } = parseFrontmatterAndBody(content);
259
260 const project = effectiveProjectSlug(safe, frontmatter);
261 const tags = normalizeTags(frontmatter.tags);
262 const date =
263 frontmatter.date != null
264 ? frontmatter.date instanceof Date
265 ? frontmatter.date.toISOString()
266 : String(frontmatter.date)
267 : undefined;
268 const updated =
269 frontmatter.updated != null
270 ? frontmatter.updated instanceof Date
271 ? frontmatter.updated.toISOString()
272 : String(frontmatter.updated)
273 : undefined;
274 const causal_chain_id =
275 frontmatter.causal_chain_id != null ? normalizeSlug(String(frontmatter.causal_chain_id)) : undefined;
276 const entityRaw = frontmatter.entity;
277 const entity =
278 entityRaw != null
279 ? (Array.isArray(entityRaw) ? entityRaw : [entityRaw]).map((e) => normalizeSlug(String(e))).filter(Boolean)
280 : undefined;
281 const episode_id =
282 frontmatter.episode_id != null ? normalizeSlug(String(frontmatter.episode_id)) : undefined;
283
284 return {
285 path: safe.replace(/\\/g, '/'),
286 frontmatter,
287 body,
288 project,
289 tags,
290 date,
291 updated,
292 causal_chain_id,
293 entity,
294 episode_id,
295 };
296 }
297
298 /**
299 * Ensure path is vault-relative and does not escape (no ..). Returns normalized relative path.
300 * @param {string} vaultPath - Absolute vault root
301 * @param {string} relativePath - User-provided path
302 * @returns {string} Safe vault-relative path
303 * @throws if path escapes vault
304 */
305 export function resolveVaultRelativePath(vaultPath, relativePath) {
306 const normalized = path.normalize(relativePath).replace(/\\/g, '/');
307 if (normalized.startsWith('..') || path.isAbsolute(relativePath)) {
308 throw new Error(`Invalid path: path must be vault-relative and cannot escape vault (${relativePath})`);
309 }
310 const full = path.resolve(vaultPath, normalized);
311 if (!full.startsWith(path.resolve(vaultPath))) {
312 throw new Error(`Invalid path: path escapes vault (${relativePath})`);
313 }
314 return path.relative(vaultPath, full).replace(/\\/g, '/');
315 }
316
317 /**
318 * True if relativePath is vault-safe and resolves to an existing regular file under vaultPath.
319 * Used by the Hub to drop semantic-search hits that are not on disk for the active vault.
320 * @param {string} vaultPath - Absolute vault root
321 * @param {string} relativePath - Vault-relative path
322 * @returns {boolean}
323 */
324 export function noteFileExistsInVault(vaultPath, relativePath) {
325 if (relativePath == null || typeof relativePath !== 'string' || !relativePath.trim()) return false;
326 try {
327 const safe = resolveVaultRelativePath(vaultPath, relativePath);
328 resolveExistingVaultFilePath(vaultPath, safe, relativePath);
329 return true;
330 } catch {
331 return false;
332 }
333 }
334
335 /**
336 * Resolve an existing note path and prove the real target stays inside the vault root.
337 * This prevents symlinks inside the vault from exposing files outside the vault.
338 * @param {string} vaultPath
339 * @param {string} safeRelativePath
340 * @param {string} originalRelativePath
341 * @returns {string}
342 */
343 function resolveExistingVaultFilePath(vaultPath, safeRelativePath, originalRelativePath) {
344 const fullPath = path.join(vaultPath, safeRelativePath);
345 if (!fs.existsSync(fullPath)) {
346 throw new Error(`Note not found: ${originalRelativePath}`);
347 }
348
349 const vaultReal = fs.realpathSync(vaultPath);
350 const fileReal = fs.realpathSync(fullPath);
351 if (!isPathInside(vaultReal, fileReal)) {
352 throw new Error(`Invalid path: path escapes vault (${originalRelativePath})`);
353 }
354 if (!fs.statSync(fileReal).isFile()) {
355 throw new Error(`Note not found: ${originalRelativePath}`);
356 }
357 return fileReal;
358 }
359
360 /**
361 * @param {string} parentPath
362 * @param {string} childPath
363 * @returns {boolean}
364 */
365 function isPathInside(parentPath, childPath) {
366 const parent = path.resolve(parentPath);
367 const child = path.resolve(childPath);
368 return child === parent || child.startsWith(`${parent}${path.sep}`);
369 }
370
371 /**
372 * Top-level directories under the vault (non-hidden) plus each immediate child of `projects/`
373 * as `projects/<name>`. Used by Hub “New note” folder picker; includes empty dirs.
374 * `inbox` is always listed first when present on disk; if missing, it is still prepended so the UI can default there.
375 * @param {string} vaultPath - Absolute vault root
376 * @returns {string[]} Vault-relative folder prefixes (no trailing slash)
377 */
378 export function listVaultFolderOptions(vaultPath) {
379 const out = new Set();
380 if (!vaultPath || typeof vaultPath !== 'string' || !fs.existsSync(vaultPath)) {
381 return ['inbox'];
382 }
383 let dirents;
384 try {
385 dirents = fs.readdirSync(vaultPath, { withFileTypes: true });
386 } catch {
387 return ['inbox'];
388 }
389 for (const d of dirents) {
390 if (!d.isDirectory()) continue;
391 if (d.name.startsWith('.')) continue;
392 out.add(d.name.replace(/\\/g, '/'));
393 }
394 if (out.has('projects')) {
395 const projectsRoot = path.join(vaultPath, 'projects');
396 try {
397 const subs = fs.readdirSync(projectsRoot, { withFileTypes: true });
398 for (const s of subs) {
399 if (!s.isDirectory() || s.name.startsWith('.')) continue;
400 out.add('projects/' + s.name.replace(/\\/g, '/'));
401 }
402 } catch {
403 /* ignore */
404 }
405 }
406 const rest = [...out].filter((x) => x !== 'inbox').sort((a, b) => a.localeCompare(b));
407 return ['inbox', ...rest];
408 }
409
410 function inferProjectFromPath(relPath) {
411 const m = String(relPath).replace(/\\/g, '/').match(/^projects\/([^/]+)/);
412 return m ? normalizeSlug(m[1]) : undefined;
413 }
414
415 /**
416 * Effective project slug for list/search/bulk ops: normalized `frontmatter.project` when set, else `projects/<slug>/` path inference.
417 * Matches readNote / GET ?project= (see SPEC, docs/HUB-METADATA-BULK-OPS.md).
418 * @param {string} relativePath - vault-relative path
419 * @param {object} [frontmatter]
420 * @returns {string|undefined}
421 */
422 export function effectiveProjectSlug(relativePath, frontmatter) {
423 const safe = String(relativePath ?? '').replace(/\\/g, '/');
424 const fm = frontmatter && typeof frontmatter === 'object' && !Array.isArray(frontmatter) ? frontmatter : {};
425 if (fm.project != null) {
426 return normalizeSlug(String(fm.project));
427 }
428 return inferProjectFromPath(safe);
429 }