tabular-import.mjs
156 lines 5.8 KB
Raw
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 1 day ago
1 /**
2 * Shared: tabular data (header row + data rows) → one Markdown note per row.
3 * Used by generic-csv, excel-xlsx, google-sheets.
4 * Each note has frontmatter `title` (source file or spreadsheet id + optional label from a `title|name|subject|summary|label` column) and body H1 matching that title.
5 */
6
7 import crypto from 'crypto';
8 import path from 'path';
9 import { writeNote } from '../write.mjs';
10 import { normalizeSlug } from '../vault.mjs';
11
12 const MAX_ROWS = 10_000;
13 const MAX_FIELD_LEN = 32_000;
14 const MAX_JSON_BLOCK_CHARS = 512_000;
15 /** Headers (normalized to lowercase) checked in order for a short human label for `title` frontmatter. */
16 const PRIMARY_LABEL_HEADER_ORDER = ['title', 'name', 'subject', 'summary', 'label'];
17
18 /**
19 * @param {string[]} headers
20 * @param {string[]} cells
21 * @returns {Record<string, string>}
22 */
23 export function buildRowObjectForJson(headers, cells) {
24 /** @type {Record<string, string>} */
25 const o = {};
26 for (let c = 0; c < headers.length; c++) {
27 let base = (headers[c] && String(headers[c]).trim()) || `column_${c}`;
28 let key = base;
29 let n = 2;
30 while (Object.hasOwn(o, key)) {
31 key = `${base}__${n}`;
32 n++;
33 }
34 let v = c < cells.length ? String(cells[c] ?? '') : '';
35 if (v.length > MAX_FIELD_LEN) v = v.slice(0, MAX_FIELD_LEN) + '…';
36 o[key] = v;
37 }
38 return o;
39 }
40
41 /**
42 * @param {string[]} headers
43 * @param {string[]} cells
44 * @returns {string | null} First non-empty cell for a "primary" column, or null
45 */
46 function findPrimaryLabelValue(headers, cells) {
47 const lower = headers.map((h) => h.toLowerCase());
48 for (const want of PRIMARY_LABEL_HEADER_ORDER) {
49 const idx = lower.findIndex((h) => h === want);
50 if (idx < 0) continue;
51 const v = (cells[idx] || '').replace(/\r\n/g, '\n').trim();
52 if (v) {
53 return v.length > 200 ? v.slice(0, 200) + '…' : v;
54 }
55 }
56 return null;
57 }
58
59 /**
60 * Human-readable `title` frontmatter: always includes the source file/sheet id; includes row when no label column.
61 * @param {string} fileLabel
62 * @param {number} rowNum
63 * @param {string | null} primary
64 */
65 function buildNoteTitleForRow(fileLabel, rowNum, primary) {
66 const file = String(fileLabel || 'tabular').replace(/\s+/g, ' ').trim().slice(0, 100);
67 if (primary) {
68 const p = String(primary).replace(/\r\n/g, ' ').trim().slice(0, 120);
69 const combined = `${file} · ${p}`;
70 return combined.length > 220 ? combined.slice(0, 217) + '…' : combined;
71 }
72 return `${file} (row ${rowNum})`.slice(0, 220);
73 }
74
75 /**
76 * @param {(string|number|boolean|null|undefined)[][]} matrix - row0 = headers, rest = data
77 * @param {{ vaultPath: string, outputBase: string, project?: string, tags: string[], dryRun: boolean }} ctx
78 * @param {{ source: string, fileLabel: string, subdir: string, fileKey: string }} meta - fileKey = frontmatter key for file id (e.g. csv_file, xlsx_file)
79 * @returns {Promise<{ imported: { path: string, source_id?: string }[], count: number }>}
80 */
81 export async function importStringMatrixToNotes(matrix, ctx, meta) {
82 const { vaultPath, outputBase, project, tags, dryRun } = ctx;
83 const { source, fileLabel, subdir, fileKey } = meta;
84 if (!matrix || matrix.length < 2) {
85 return { imported: [], count: 0 };
86 }
87
88 const headerRow = matrix[0].map((c) => String(c ?? '').trim());
89 const headers = headerRow.map((h) => h || 'column');
90 const idColIdx = headers.findIndex(
91 (h) => /^(id|uuid|key|source_id)$/i.test(h) || /^source[\s_]?id$/i.test(h),
92 );
93
94 const outSub = path.join(outputBase, 'imports', subdir).replace(/\\/g, '/');
95 const imported = [];
96 const now = new Date().toISOString().slice(0, 10);
97
98 for (let rowNum = 1; rowNum < matrix.length; rowNum++) {
99 if (imported.length >= MAX_ROWS) {
100 throw new Error(`tabular import: row limit exceeded (max ${MAX_ROWS} data rows).`);
101 }
102 const row = matrix[rowNum] || [];
103 const cells = headers.map((_, j) => {
104 const c = j < row.length ? String(row[j] ?? '') : '';
105 return c.length > MAX_FIELD_LEN ? c.slice(0, MAX_FIELD_LEN) + '…' : c;
106 });
107 const rowLine = cells.join('\t');
108 const sourceId =
109 idColIdx >= 0 && (cells[idColIdx] || '').trim()
110 ? (cells[idColIdx] || '').trim().slice(0, 200)
111 : crypto.createHash('sha256').update(String(rowLine) + fileLabel + String(rowNum)).digest('hex').slice(0, 32);
112
113 const primaryLabel = findPrimaryLabelValue(headers, cells);
114 const noteTitle = buildNoteTitleForRow(fileLabel, rowNum, primaryLabel);
115
116 const bodyLines = [`# ${noteTitle}`, ''];
117 for (let c = 0; c < headers.length; c++) {
118 const label = headers[c] || `col_${c}`;
119 const val = (cells[c] || '').replace(/\r\n/g, '\n');
120 bodyLines.push(`- **${label}:** ${val || '—'}`);
121 }
122 const rowObj = buildRowObjectForJson(headers, cells);
123 let jsonBlock = JSON.stringify(rowObj, null, 2);
124 if (jsonBlock.length > MAX_JSON_BLOCK_CHARS) {
125 jsonBlock = jsonBlock.slice(0, MAX_JSON_BLOCK_CHARS) + '\n…(truncated)';
126 }
127 bodyLines.push('', '## Full row (JSON)', '', '```json', jsonBlock, '```');
128 const body = bodyLines.join('\n');
129
130 const fileSlug = crypto
131 .createHash('sha256')
132 .update(String(rowLine) + fileLabel + String(rowNum))
133 .digest('hex')
134 .slice(0, 12);
135 const outputRel = path.join(outSub, `row-${String(rowNum).padStart(5, '0')}-${fileSlug}.md`).replace(/\\/g, '/');
136
137 const frontmatter = {
138 source,
139 title: noteTitle,
140 source_id: sourceId,
141 date: now,
142 [fileKey]: fileLabel,
143 row_index: rowNum,
144 import_column_headers: JSON.stringify(headers),
145 ...(project && { project: normalizeSlug(project) }),
146 ...(tags.length && { tags }),
147 };
148
149 if (!dryRun) {
150 writeNote(vaultPath, outputRel, { body, frontmatter });
151 }
152 imported.push({ path: outputRel, source_id: sourceId });
153 }
154
155 return { imported, count: imported.length };
156 }
File History 2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor 1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 1 day ago