tabular-import.mjs
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠ breaking
1 day ago
| 1 | /** |
| 2 | * Shared: tabular data (header row + data rows) → one Markdown note per row. |
| 3 | * Used by generic-csv, excel-xlsx, google-sheets. |
| 4 | * Each note has frontmatter `title` (source file or spreadsheet id + optional label from a `title|name|subject|summary|label` column) and body H1 matching that title. |
| 5 | */ |
| 6 | |
| 7 | import crypto from 'crypto'; |
| 8 | import path from 'path'; |
| 9 | import { writeNote } from '../write.mjs'; |
| 10 | import { normalizeSlug } from '../vault.mjs'; |
| 11 | |
| 12 | const MAX_ROWS = 10_000; |
| 13 | const MAX_FIELD_LEN = 32_000; |
| 14 | const MAX_JSON_BLOCK_CHARS = 512_000; |
| 15 | /** Headers (normalized to lowercase) checked in order for a short human label for `title` frontmatter. */ |
| 16 | const PRIMARY_LABEL_HEADER_ORDER = ['title', 'name', 'subject', 'summary', 'label']; |
| 17 | |
| 18 | /** |
| 19 | * @param {string[]} headers |
| 20 | * @param {string[]} cells |
| 21 | * @returns {Record<string, string>} |
| 22 | */ |
| 23 | export function buildRowObjectForJson(headers, cells) { |
| 24 | /** @type {Record<string, string>} */ |
| 25 | const o = {}; |
| 26 | for (let c = 0; c < headers.length; c++) { |
| 27 | let base = (headers[c] && String(headers[c]).trim()) || `column_${c}`; |
| 28 | let key = base; |
| 29 | let n = 2; |
| 30 | while (Object.hasOwn(o, key)) { |
| 31 | key = `${base}__${n}`; |
| 32 | n++; |
| 33 | } |
| 34 | let v = c < cells.length ? String(cells[c] ?? '') : ''; |
| 35 | if (v.length > MAX_FIELD_LEN) v = v.slice(0, MAX_FIELD_LEN) + '…'; |
| 36 | o[key] = v; |
| 37 | } |
| 38 | return o; |
| 39 | } |
| 40 | |
| 41 | /** |
| 42 | * @param {string[]} headers |
| 43 | * @param {string[]} cells |
| 44 | * @returns {string | null} First non-empty cell for a "primary" column, or null |
| 45 | */ |
| 46 | function findPrimaryLabelValue(headers, cells) { |
| 47 | const lower = headers.map((h) => h.toLowerCase()); |
| 48 | for (const want of PRIMARY_LABEL_HEADER_ORDER) { |
| 49 | const idx = lower.findIndex((h) => h === want); |
| 50 | if (idx < 0) continue; |
| 51 | const v = (cells[idx] || '').replace(/\r\n/g, '\n').trim(); |
| 52 | if (v) { |
| 53 | return v.length > 200 ? v.slice(0, 200) + '…' : v; |
| 54 | } |
| 55 | } |
| 56 | return null; |
| 57 | } |
| 58 | |
| 59 | /** |
| 60 | * Human-readable `title` frontmatter: always includes the source file/sheet id; includes row when no label column. |
| 61 | * @param {string} fileLabel |
| 62 | * @param {number} rowNum |
| 63 | * @param {string | null} primary |
| 64 | */ |
| 65 | function buildNoteTitleForRow(fileLabel, rowNum, primary) { |
| 66 | const file = String(fileLabel || 'tabular').replace(/\s+/g, ' ').trim().slice(0, 100); |
| 67 | if (primary) { |
| 68 | const p = String(primary).replace(/\r\n/g, ' ').trim().slice(0, 120); |
| 69 | const combined = `${file} · ${p}`; |
| 70 | return combined.length > 220 ? combined.slice(0, 217) + '…' : combined; |
| 71 | } |
| 72 | return `${file} (row ${rowNum})`.slice(0, 220); |
| 73 | } |
| 74 | |
| 75 | /** |
| 76 | * @param {(string|number|boolean|null|undefined)[][]} matrix - row0 = headers, rest = data |
| 77 | * @param {{ vaultPath: string, outputBase: string, project?: string, tags: string[], dryRun: boolean }} ctx |
| 78 | * @param {{ source: string, fileLabel: string, subdir: string, fileKey: string }} meta - fileKey = frontmatter key for file id (e.g. csv_file, xlsx_file) |
| 79 | * @returns {Promise<{ imported: { path: string, source_id?: string }[], count: number }>} |
| 80 | */ |
| 81 | export async function importStringMatrixToNotes(matrix, ctx, meta) { |
| 82 | const { vaultPath, outputBase, project, tags, dryRun } = ctx; |
| 83 | const { source, fileLabel, subdir, fileKey } = meta; |
| 84 | if (!matrix || matrix.length < 2) { |
| 85 | return { imported: [], count: 0 }; |
| 86 | } |
| 87 | |
| 88 | const headerRow = matrix[0].map((c) => String(c ?? '').trim()); |
| 89 | const headers = headerRow.map((h) => h || 'column'); |
| 90 | const idColIdx = headers.findIndex( |
| 91 | (h) => /^(id|uuid|key|source_id)$/i.test(h) || /^source[\s_]?id$/i.test(h), |
| 92 | ); |
| 93 | |
| 94 | const outSub = path.join(outputBase, 'imports', subdir).replace(/\\/g, '/'); |
| 95 | const imported = []; |
| 96 | const now = new Date().toISOString().slice(0, 10); |
| 97 | |
| 98 | for (let rowNum = 1; rowNum < matrix.length; rowNum++) { |
| 99 | if (imported.length >= MAX_ROWS) { |
| 100 | throw new Error(`tabular import: row limit exceeded (max ${MAX_ROWS} data rows).`); |
| 101 | } |
| 102 | const row = matrix[rowNum] || []; |
| 103 | const cells = headers.map((_, j) => { |
| 104 | const c = j < row.length ? String(row[j] ?? '') : ''; |
| 105 | return c.length > MAX_FIELD_LEN ? c.slice(0, MAX_FIELD_LEN) + '…' : c; |
| 106 | }); |
| 107 | const rowLine = cells.join('\t'); |
| 108 | const sourceId = |
| 109 | idColIdx >= 0 && (cells[idColIdx] || '').trim() |
| 110 | ? (cells[idColIdx] || '').trim().slice(0, 200) |
| 111 | : crypto.createHash('sha256').update(String(rowLine) + fileLabel + String(rowNum)).digest('hex').slice(0, 32); |
| 112 | |
| 113 | const primaryLabel = findPrimaryLabelValue(headers, cells); |
| 114 | const noteTitle = buildNoteTitleForRow(fileLabel, rowNum, primaryLabel); |
| 115 | |
| 116 | const bodyLines = [`# ${noteTitle}`, '']; |
| 117 | for (let c = 0; c < headers.length; c++) { |
| 118 | const label = headers[c] || `col_${c}`; |
| 119 | const val = (cells[c] || '').replace(/\r\n/g, '\n'); |
| 120 | bodyLines.push(`- **${label}:** ${val || '—'}`); |
| 121 | } |
| 122 | const rowObj = buildRowObjectForJson(headers, cells); |
| 123 | let jsonBlock = JSON.stringify(rowObj, null, 2); |
| 124 | if (jsonBlock.length > MAX_JSON_BLOCK_CHARS) { |
| 125 | jsonBlock = jsonBlock.slice(0, MAX_JSON_BLOCK_CHARS) + '\n…(truncated)'; |
| 126 | } |
| 127 | bodyLines.push('', '## Full row (JSON)', '', '```json', jsonBlock, '```'); |
| 128 | const body = bodyLines.join('\n'); |
| 129 | |
| 130 | const fileSlug = crypto |
| 131 | .createHash('sha256') |
| 132 | .update(String(rowLine) + fileLabel + String(rowNum)) |
| 133 | .digest('hex') |
| 134 | .slice(0, 12); |
| 135 | const outputRel = path.join(outSub, `row-${String(rowNum).padStart(5, '0')}-${fileSlug}.md`).replace(/\\/g, '/'); |
| 136 | |
| 137 | const frontmatter = { |
| 138 | source, |
| 139 | title: noteTitle, |
| 140 | source_id: sourceId, |
| 141 | date: now, |
| 142 | [fileKey]: fileLabel, |
| 143 | row_index: rowNum, |
| 144 | import_column_headers: JSON.stringify(headers), |
| 145 | ...(project && { project: normalizeSlug(project) }), |
| 146 | ...(tags.length && { tags }), |
| 147 | }; |
| 148 | |
| 149 | if (!dryRun) { |
| 150 | writeNote(vaultPath, outputRel, { body, frontmatter }); |
| 151 | } |
| 152 | imported.push({ path: outputRel, source_id: sourceId }); |
| 153 | } |
| 154 | |
| 155 | return { imported, count: imported.length }; |
| 156 | } |
File History
2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠
1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6
docs: accept Calendar Events v0 spec with Phase 0 security …
Human
1 day ago