excel-xlsx.mjs
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠ breaking
1 day ago
| 1 | /** |
| 2 | * Microsoft Excel .xlsx (first sheet) → one note per data row (same as generic-csv). |
| 3 | * Uses `exceljs` (not the unmaintained `xlsx` / SheetJS community) for secure parsing. |
| 4 | */ |
| 5 | |
| 6 | import fs from 'fs'; |
| 7 | import path from 'path'; |
| 8 | import ExcelJS from 'exceljs'; |
| 9 | import { importStringMatrixToNotes } from './tabular-import.mjs'; |
| 10 | |
| 11 | const MAX_XLSX_BYTES = 50 * 1024 * 1024; |
| 12 | const MAX_FIELD = 32_000; |
| 13 | |
| 14 | /** |
| 15 | * @param {unknown} val |
| 16 | * @returns {string} |
| 17 | */ |
| 18 | function valueToString(val) { |
| 19 | if (val == null) return ''; |
| 20 | if (typeof val === 'object' && val && 'richText' in val && Array.isArray(val.richText)) { |
| 21 | return val.richText.map((t) => (t && t.text) || '').join(''); |
| 22 | } |
| 23 | if (typeof val === 'object' && val && 'hyperlink' in val) { |
| 24 | const t = val; |
| 25 | return t.text != null ? String(t.text) : ''; |
| 26 | } |
| 27 | if (typeof val === 'object' && val && 'formula' in val && 'result' in val) { |
| 28 | const t = val; |
| 29 | return t.result == null ? '' : String(t.result); |
| 30 | } |
| 31 | if (val instanceof Date) { |
| 32 | return val.toISOString().slice(0, 10); |
| 33 | } |
| 34 | if (typeof val === 'object') { |
| 35 | return JSON.stringify(val); |
| 36 | } |
| 37 | return String(val); |
| 38 | } |
| 39 | |
| 40 | /** |
| 41 | * @param {import('exceljs').Cell} cell |
| 42 | * @returns {string} |
| 43 | */ |
| 44 | function stringFromCell(cell) { |
| 45 | if (!cell) return ''; |
| 46 | if (cell.text != null && String(cell.text).length > 0) { |
| 47 | return String(cell.text); |
| 48 | } |
| 49 | return valueToString(cell.value); |
| 50 | } |
| 51 | |
| 52 | /** |
| 53 | * @param {import('exceljs').Row} row |
| 54 | * @returns {string[]} |
| 55 | */ |
| 56 | function rowToStringArray(row) { |
| 57 | if (!row) return []; |
| 58 | let maxC = 0; |
| 59 | const sparse = new Map(); |
| 60 | row.eachCell({ includeEmpty: true }, (cell, colNumber) => { |
| 61 | if (colNumber < 1) return; |
| 62 | maxC = Math.max(maxC, colNumber); |
| 63 | let s = stringFromCell(cell); |
| 64 | if (s.length > MAX_FIELD) s = s.slice(0, MAX_FIELD) + '…'; |
| 65 | sparse.set(colNumber, s); |
| 66 | }); |
| 67 | if (maxC < 1) return []; |
| 68 | const r = new Array(maxC).fill(''); |
| 69 | for (let c = 1; c <= maxC; c++) { |
| 70 | r[c - 1] = sparse.has(c) ? sparse.get(c) : ''; |
| 71 | } |
| 72 | return r; |
| 73 | } |
| 74 | |
| 75 | /** |
| 76 | * @param {string} input |
| 77 | * @param {{ vaultPath: string, outputBase: string, project?: string, tags: string[], dryRun: boolean }} ctx |
| 78 | */ |
| 79 | export async function importExcelXlsx(input, ctx) { |
| 80 | const absInput = path.isAbsolute(input) ? input : path.resolve(process.cwd(), input); |
| 81 | if (!fs.existsSync(absInput) || !fs.statSync(absInput).isFile()) { |
| 82 | throw new Error('excel-xlsx import expects a path to an .xlsx file.'); |
| 83 | } |
| 84 | if (!absInput.toLowerCase().endsWith('.xlsx')) { |
| 85 | throw new Error('excel-xlsx import requires a .xlsx file (Office Open XML). Legacy .xls is not supported.'); |
| 86 | } |
| 87 | const stat = fs.statSync(absInput); |
| 88 | if (stat.size > MAX_XLSX_BYTES) { |
| 89 | throw new Error(`Excel file too large (max ${MAX_XLSX_BYTES} bytes).`); |
| 90 | } |
| 91 | |
| 92 | const buf = fs.readFileSync(absInput); |
| 93 | const wb = new ExcelJS.Workbook(); |
| 94 | await wb.xlsx.load(buf); |
| 95 | if (!wb.worksheets || wb.worksheets.length === 0) { |
| 96 | return { imported: [], count: 0 }; |
| 97 | } |
| 98 | const ws = wb.worksheets[0]; |
| 99 | /** @type {string[][]} */ |
| 100 | const rawMatrix = []; |
| 101 | let maxCol = 0; |
| 102 | ws.eachRow({ includeEmpty: true }, (row) => { |
| 103 | const r = rowToStringArray(row); |
| 104 | maxCol = Math.max(maxCol, r.length); |
| 105 | rawMatrix.push(r); |
| 106 | }); |
| 107 | if (rawMatrix.length < 2) { |
| 108 | return { imported: [], count: 0 }; |
| 109 | } |
| 110 | /** @type {(string|number)[][]} */ |
| 111 | const matrix = rawMatrix.map((r) => { |
| 112 | const out = r.slice(); |
| 113 | while (out.length < maxCol) { |
| 114 | out.push(''); |
| 115 | } |
| 116 | return out; |
| 117 | }); |
| 118 | return importStringMatrixToNotes(matrix, ctx, { |
| 119 | source: 'xlsx-import', |
| 120 | fileLabel: path.basename(absInput), |
| 121 | subdir: 'xlsx', |
| 122 | fileKey: 'xlsx_file', |
| 123 | }); |
| 124 | } |
File History
2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠
1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6
docs: accept Calendar Events v0 spec with Phase 0 security …
Human
2 days ago