hub-client-import-zip.mjs
224 lines 6.4 KB
Raw
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 1 day ago
1 /**
2 * Hub import: in-browser ZIP for folder-shaped sources (Phase 4A₂).
3 * Shared by `hub-import-zip-shim.mjs` (browser) and unit tests (Node + jszip).
4 */
5
6 /** @typedef {{ maxZipBytes: number, maxUncompressedBytes: number, maxFiles: number }} HubImportZipLimits */
7
8 /** Default caps (align with `hub/server.mjs` + bridge multer: 100MB upload). */
9 export const DEFAULT_HUB_IMPORT_ZIP_LIMITS = Object.freeze({
10 maxZipBytes: 100 * 1024 * 1024,
11 maxUncompressedBytes: 100 * 1024 * 1024,
12 maxFiles: 5000,
13 });
14
15 /**
16 * One POST per file (Phase 4B) for source types that use a single file / CSV row file at a time.
17 * Jira/Wallet: directory input uses the first .csv (see importers) — for multiple CSVs, use sequential.
18 */
19 export const HUB_IMPORT_SEQUENTIAL_MULTI_SOURCE_TYPES = new Set([
20 'pdf',
21 'docx',
22 'mem0-export',
23 'linear-export',
24 'audio',
25 'jira-export',
26 'wallet-csv',
27 'supabase-memory',
28 'generic-csv',
29 'json-rows',
30 'excel-xlsx',
31 'vcf',
32 ]);
33
34 /**
35 * Types where a **directory** after server ZIP extraction is a valid `runImport` input and
36 * multiple local files are merged into one client-built ZIP (markdown trees, mif, exports, etc.).
37 * ChatGPT/Claude have extra rules in `getHubImportFileMode`.
38 */
39 export const HUB_IMPORT_ZIP_BULK_SOURCE_TYPES = new Set([
40 'markdown',
41 'mif',
42 'gdrive',
43 'notebooklm',
44 'claude-export',
45 'chatgpt-export',
46 ]);
47
48 /**
49 * @param {string} sourceType
50 * @param {File[]} files
51 * @returns {'direct' | 'client_zip' | 'sequential'} `direct` = one POST; `client_zip` = 4A₂; `sequential` = 4B.
52 */
53 export function getHubImportFileMode(sourceType, files) {
54 const list = Array.isArray(files) ? files : Array.from(files);
55 const n = list.length;
56 if (n === 0) return 'direct';
57
58 if (sourceType === 'url') return 'direct';
59
60 if (n === 1 && list[0] && list[0].name && list[0].name.toLowerCase().endsWith('.zip')) {
61 return 'direct';
62 }
63
64 if (HUB_IMPORT_SEQUENTIAL_MULTI_SOURCE_TYPES.has(sourceType) && n > 1) {
65 return 'sequential';
66 }
67
68 if (sourceType === 'chatgpt-export') {
69 if (n === 1 && list[0].name && list[0].name.toLowerCase().endsWith('.zip')) return 'direct';
70 return 'client_zip';
71 }
72
73 if (sourceType === 'claude-export' && n > 1) {
74 const allMd = list.every((f) => f.name && /\.(md|markdown)$/i.test(f.name));
75 return allMd ? 'client_zip' : 'sequential';
76 }
77
78 if (HUB_IMPORT_ZIP_BULK_SOURCE_TYPES.has(sourceType) && n > 1) {
79 return 'client_zip';
80 }
81
82 return 'direct';
83 }
84
85 /**
86 * @param {string} rel
87 * @returns {boolean}
88 */
89 function isSafeRelativeZipPath(rel) {
90 if (!rel || rel.includes('..') || rel.startsWith('/') || rel.startsWith('\\')) return false;
91 return true;
92 }
93
94 /**
95 * @param {File} f
96 * @returns {string}
97 */
98 function defaultRelativePathForFile(f) {
99 const w = typeof f.webkitRelativePath === 'string' && f.webkitRelativePath ? f.webkitRelativePath : f.name;
100 return w.split('\\').join('/');
101 }
102
103 /**
104 * @param {File[]} files
105 * @param {{ warn?: (s: string) => void }} [opt]
106 * @returns {string[]}
107 */
108 function dedupePaths(names, opt) {
109 const seen = new Set();
110 const out = [];
111 for (const raw of names) {
112 const base = raw.split('\\').join('/');
113 if (!isSafeRelativeZipPath(base)) {
114 throw new Error('Unsafe path in selection: ' + raw);
115 }
116 let name = base;
117 let n = 0;
118 while (seen.has(name)) {
119 n++;
120 const dot = base.lastIndexOf('.');
121 if (dot > 0) {
122 name = `${base.slice(0, dot)}(${n})${base.slice(dot)}`;
123 } else {
124 name = `${base}(${n})`;
125 }
126 if (opt && typeof opt.warn === 'function' && n === 1) {
127 opt.warn(`Renamed duplicate path: ${base} → ${name}`);
128 }
129 }
130 seen.add(name);
131 out.push(name);
132 }
133 return out;
134 }
135
136 /**
137 * @param {HubImportZipLimits} limits
138 * @param {number} uncompressedDelta
139 * @param {number} fileCount
140 */
141 function enforceRunningLimits(limits, uncompressedDelta, fileCount) {
142 if (fileCount > limits.maxFiles) {
143 throw new Error(
144 `Too many files (${fileCount}). Max ${limits.maxFiles} in one ZIP. Split the batch or use the CLI.`,
145 );
146 }
147 if (uncompressedDelta > limits.maxUncompressedBytes) {
148 throw new Error(
149 `Uncompressed total exceeds limit (${limits.maxUncompressedBytes} bytes). Choose fewer or smaller files.`,
150 );
151 }
152 }
153
154 /**
155 * @param {import('jszip').default} JSZipCtor
156 * @param {File[]} fileList
157 * @param {HubImportZipLimits} limits
158 * @param {{ signal?: AbortSignal, warn?: (s: string) => void, pathForFile?: (f: File) => string }} [opts]
159 * @returns {Promise<Blob>}
160 */
161 export async function buildImportZipBlobWithJsZip(JSZipCtor, fileList, limits, opts = {}) {
162 const { signal, warn, pathForFile } = opts;
163 const list = Array.isArray(fileList) ? fileList : Array.from(fileList);
164 if (list.length === 0) {
165 throw new Error('No files to zip.');
166 }
167
168 const nameFn = pathForFile || defaultRelativePathForFile;
169 const names = list.map((f) => nameFn(f));
170 const paths = dedupePaths(names, { warn });
171
172 const zip = new JSZipCtor();
173 let uncompressed = 0;
174 for (let i = 0; i < list.length; i++) {
175 if (signal && signal.aborted) {
176 const err = new Error('aborted');
177 err.name = 'AbortError';
178 throw err;
179 }
180 const f = list[i];
181 const p = paths[i];
182 if (!f.size && p.endsWith('/')) {
183 continue;
184 }
185 uncompressed += f.size || 0;
186 enforceRunningLimits(limits, uncompressed, i + 1);
187 const buf = await f.arrayBuffer();
188 if (uncompressed > limits.maxUncompressedBytes) {
189 throw new Error('Uncompressed total exceeds limit after reading files.');
190 }
191 zip.file(p, buf);
192 }
193
194 enforceRunningLimits(limits, uncompressed, list.length);
195 if (typeof zip.generateAsync !== 'function') {
196 throw new Error('Invalid JSZip instance; generateAsync missing');
197 }
198 const blob = await zip.generateAsync({
199 type: 'blob',
200 compression: 'DEFLATE',
201 streamFiles: true,
202 });
203 if (blob && blob.size > limits.maxZipBytes) {
204 throw new Error(
205 `ZIP is ${blob.size} bytes; max ${limits.maxZipBytes} (matches server upload limit).`,
206 );
207 }
208 return blob;
209 }
210
211 /**
212 * 4B: server accepts one file at a time, max size per `DEFAULT_HUB_IMPORT_ZIP_LIMITS.maxZipBytes`.
213 * @param {File} f
214 * @param {HubImportZipLimits} limits
215 */
216 export function assertSingleFileWithinLimit(f, limits) {
217 if (f && f.size > limits.maxZipBytes) {
218 throw new Error(
219 `File is ${f.size} bytes; max per upload is ${limits.maxZipBytes} bytes (~100MB).`,
220 );
221 }
222 }
223
224 export { isSafeRelativeZipPath, defaultRelativePathForFile };
File History 2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor 1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 1 day ago