excel-xlsx.mjs
124 lines 3.5 KB
Raw
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 1 day ago
1 /**
2 * Microsoft Excel .xlsx (first sheet) → one note per data row (same as generic-csv).
3 * Uses `exceljs` (not the unmaintained `xlsx` / SheetJS community) for secure parsing.
4 */
5
6 import fs from 'fs';
7 import path from 'path';
8 import ExcelJS from 'exceljs';
9 import { importStringMatrixToNotes } from './tabular-import.mjs';
10
11 const MAX_XLSX_BYTES = 50 * 1024 * 1024;
12 const MAX_FIELD = 32_000;
13
14 /**
15 * @param {unknown} val
16 * @returns {string}
17 */
18 function valueToString(val) {
19 if (val == null) return '';
20 if (typeof val === 'object' && val && 'richText' in val && Array.isArray(val.richText)) {
21 return val.richText.map((t) => (t && t.text) || '').join('');
22 }
23 if (typeof val === 'object' && val && 'hyperlink' in val) {
24 const t = val;
25 return t.text != null ? String(t.text) : '';
26 }
27 if (typeof val === 'object' && val && 'formula' in val && 'result' in val) {
28 const t = val;
29 return t.result == null ? '' : String(t.result);
30 }
31 if (val instanceof Date) {
32 return val.toISOString().slice(0, 10);
33 }
34 if (typeof val === 'object') {
35 return JSON.stringify(val);
36 }
37 return String(val);
38 }
39
40 /**
41 * @param {import('exceljs').Cell} cell
42 * @returns {string}
43 */
44 function stringFromCell(cell) {
45 if (!cell) return '';
46 if (cell.text != null && String(cell.text).length > 0) {
47 return String(cell.text);
48 }
49 return valueToString(cell.value);
50 }
51
52 /**
53 * @param {import('exceljs').Row} row
54 * @returns {string[]}
55 */
56 function rowToStringArray(row) {
57 if (!row) return [];
58 let maxC = 0;
59 const sparse = new Map();
60 row.eachCell({ includeEmpty: true }, (cell, colNumber) => {
61 if (colNumber < 1) return;
62 maxC = Math.max(maxC, colNumber);
63 let s = stringFromCell(cell);
64 if (s.length > MAX_FIELD) s = s.slice(0, MAX_FIELD) + '…';
65 sparse.set(colNumber, s);
66 });
67 if (maxC < 1) return [];
68 const r = new Array(maxC).fill('');
69 for (let c = 1; c <= maxC; c++) {
70 r[c - 1] = sparse.has(c) ? sparse.get(c) : '';
71 }
72 return r;
73 }
74
75 /**
76 * @param {string} input
77 * @param {{ vaultPath: string, outputBase: string, project?: string, tags: string[], dryRun: boolean }} ctx
78 */
79 export async function importExcelXlsx(input, ctx) {
80 const absInput = path.isAbsolute(input) ? input : path.resolve(process.cwd(), input);
81 if (!fs.existsSync(absInput) || !fs.statSync(absInput).isFile()) {
82 throw new Error('excel-xlsx import expects a path to an .xlsx file.');
83 }
84 if (!absInput.toLowerCase().endsWith('.xlsx')) {
85 throw new Error('excel-xlsx import requires a .xlsx file (Office Open XML). Legacy .xls is not supported.');
86 }
87 const stat = fs.statSync(absInput);
88 if (stat.size > MAX_XLSX_BYTES) {
89 throw new Error(`Excel file too large (max ${MAX_XLSX_BYTES} bytes).`);
90 }
91
92 const buf = fs.readFileSync(absInput);
93 const wb = new ExcelJS.Workbook();
94 await wb.xlsx.load(buf);
95 if (!wb.worksheets || wb.worksheets.length === 0) {
96 return { imported: [], count: 0 };
97 }
98 const ws = wb.worksheets[0];
99 /** @type {string[][]} */
100 const rawMatrix = [];
101 let maxCol = 0;
102 ws.eachRow({ includeEmpty: true }, (row) => {
103 const r = rowToStringArray(row);
104 maxCol = Math.max(maxCol, r.length);
105 rawMatrix.push(r);
106 });
107 if (rawMatrix.length < 2) {
108 return { imported: [], count: 0 };
109 }
110 /** @type {(string|number)[][]} */
111 const matrix = rawMatrix.map((r) => {
112 const out = r.slice();
113 while (out.length < maxCol) {
114 out.push('');
115 }
116 return out;
117 });
118 return importStringMatrixToNotes(matrix, ctx, {
119 source: 'xlsx-import',
120 fileLabel: path.basename(absInput),
121 subdir: 'xlsx',
122 fileKey: 'xlsx_file',
123 });
124 }
File History 2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor 1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 2 days ago