ics-normalizer.mjs
464 lines 13.3 KB
Raw
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 1 day ago
1 /**
2 * Pure ICS (iCalendar RFC 5545 subset) parser and event normalizer.
3 *
4 * No network, no OAuth, no filesystem I/O — callers pass ICS text only.
5 * Output shape matches Calendar Events v0 `CalendarEvent` fields except
6 * `event_id` and `source_calendar_id` (assigned by the store layer in 1B).
7 *
8 * @see docs/CALENDAR-EVENTS-V0-SPEC.md — Phase 1A
9 */
10
11 /** @typedef {'confirmed' | 'cancelled' | 'tentative'} CalendarEventStatus */
12
13 /**
14 * @typedef {Object} NormalizedCalendarEvent
15 * @property {string} external_uid — Provider UID for dedup
16 * @property {string} start — UTC ISO8601 instant
17 * @property {string} end — UTC ISO8601 instant
18 * @property {string} timezone — IANA timezone id used for display semantics
19 * @property {string|null} summary
20 * @property {boolean} busy — false when TRANSP=TRANSPARENT
21 * @property {CalendarEventStatus} status
22 * @property {string|null} recurrence_rule — Raw RRULE string; expansion deferred
23 */
24
25 /**
26 * @typedef {Object} ParseIcsOptions
27 * @property {string} [defaultTimezone='UTC'] — Used for floating DATE-TIME values
28 * @property {number} [maxEvents=5000] — Hard cap to bound hostile payloads
29 */
30
31 const MAX_ICS_BYTES = 5 * 1024 * 1024;
32 const MAX_LINE_LENGTH = 8192;
33 const MAX_PROPERTY_VALUE_LENGTH = 4096;
34
35 /**
36 * Parse ICS text into normalized calendar events.
37 *
38 * @param {string} icsText
39 * @param {ParseIcsOptions} [options]
40 * @returns {NormalizedCalendarEvent[]}
41 */
42 export function parseIcsToEvents(icsText, options = {}) {
43 if (typeof icsText !== 'string') {
44 throw new TypeError('icsText must be a string');
45 }
46 if (icsText.length > MAX_ICS_BYTES) {
47 throw new RangeError(`ICS payload exceeds ${MAX_ICS_BYTES} bytes`);
48 }
49
50 const defaultTimezone = normalizeTimezoneId(options.defaultTimezone ?? 'UTC');
51 const maxEvents = options.maxEvents ?? 5000;
52 const unfolded = unfoldIcsLines(icsText);
53 const components = extractVevents(unfolded);
54 if (components.length > maxEvents) {
55 throw new RangeError(`ICS contains ${components.length} VEVENT components; max is ${maxEvents}`);
56 }
57
58 /** @type {NormalizedCalendarEvent[]} */
59 const events = [];
60 for (const props of components) {
61 const normalized = normalizeVevent(props, defaultTimezone);
62 if (normalized) {
63 events.push(normalized);
64 }
65 }
66 return events;
67 }
68
69 /**
70 * RFC 5545 line unfolding: CRLF + single space/tab continues the prior line.
71 *
72 * @param {string} text
73 * @returns {string[]}
74 */
75 export function unfoldIcsLines(text) {
76 const rawLines = text.replace(/\r\n/g, '\n').replace(/\r/g, '\n').split('\n');
77 /** @type {string[]} */
78 const out = [];
79 for (const line of rawLines) {
80 if (line.length > MAX_LINE_LENGTH) {
81 throw new RangeError(`ICS line exceeds ${MAX_LINE_LENGTH} characters`);
82 }
83 if ((line.startsWith(' ') || line.startsWith('\t')) && out.length > 0) {
84 out[out.length - 1] += line.slice(1);
85 } else {
86 out.push(line);
87 }
88 }
89 return out;
90 }
91
92 /**
93 * @param {string[]} lines
94 * @returns {{ name: string, value: string, params: Record<string, string> }[][]}
95 */
96 function extractVevents(lines) {
97 /** @type {{ name: string, value: string, params: Record<string, string> }[][]} */
98 const events = [];
99 /** @type {{ name: string, value: string, params: Record<string, string> }[] | null} */
100 let current = null;
101 let depth = 0;
102
103 for (const line of lines) {
104 if (line === 'BEGIN:VEVENT') {
105 if (current) {
106 throw new SyntaxError('Nested VEVENT components are not supported');
107 }
108 current = [];
109 depth = 1;
110 continue;
111 }
112 if (line === 'END:VEVENT') {
113 if (!current || depth !== 1) {
114 throw new SyntaxError('END:VEVENT without matching BEGIN:VEVENT');
115 }
116 events.push(current);
117 current = null;
118 depth = 0;
119 continue;
120 }
121 if (!current) {
122 continue;
123 }
124 const parsed = parsePropertyLine(line);
125 if (parsed) {
126 current.push(parsed);
127 }
128 }
129
130 if (current) {
131 throw new SyntaxError('Unclosed VEVENT component');
132 }
133 return events;
134 }
135
136 /**
137 * @param {string} line
138 * @returns {{ name: string, value: string, params: Record<string, string> } | null}
139 */
140 export function parsePropertyLine(line) {
141 if (!line || line.startsWith('BEGIN:') || line.startsWith('END:')) {
142 return null;
143 }
144 const colon = line.indexOf(':');
145 if (colon <= 0) {
146 return null;
147 }
148
149 const left = line.slice(0, colon);
150 const value = unescapeIcsText(line.slice(colon + 1));
151 if (value.length > MAX_PROPERTY_VALUE_LENGTH) {
152 throw new RangeError(`ICS property value exceeds ${MAX_PROPERTY_VALUE_LENGTH} characters`);
153 }
154
155 const semi = left.indexOf(';');
156 const name = (semi === -1 ? left : left.slice(0, semi)).toUpperCase();
157 /** @type {Record<string, string>} */
158 const params = {};
159 if (semi !== -1) {
160 const paramPart = left.slice(semi + 1);
161 for (const chunk of paramPart.split(';')) {
162 const eq = chunk.indexOf('=');
163 if (eq === -1) {
164 continue;
165 }
166 const key = chunk.slice(0, eq).toUpperCase();
167 params[key] = chunk.slice(eq + 1);
168 }
169 }
170 return { name, value, params };
171 }
172
173 /**
174 * @param {string} value
175 * @returns {string}
176 */
177 export function unescapeIcsText(value) {
178 return value
179 .replace(/\\n/gi, '\n')
180 .replace(/\\N/g, '\n')
181 .replace(/\\,/g, ',')
182 .replace(/\\;/g, ';')
183 .replace(/\\\\/g, '\\');
184 }
185
186 /**
187 * @param {{ name: string, value: string, params: Record<string, string> }[]} props
188 * @param {string} defaultTimezone
189 * @returns {NormalizedCalendarEvent | null}
190 */
191 function normalizeVevent(props, defaultTimezone) {
192 const uid = getProperty(props, 'UID')?.value;
193 if (!uid) {
194 return null;
195 }
196
197 const dtStartProp = getProperty(props, 'DTSTART');
198 if (!dtStartProp) {
199 return null;
200 }
201
202 const dtStart = parseIcsDateTimeProp('DTSTART', dtStartProp, defaultTimezone);
203 let endInstant;
204 const dtEndProp = getProperty(props, 'DTEND');
205 const durationProp = getProperty(props, 'DURATION');
206
207 if (dtEndProp) {
208 const dtEnd = parseIcsDateTimeProp('DTEND', dtEndProp, defaultTimezone);
209 endInstant = dtEnd.instant;
210 if (dtStart.isDate && dtEnd.isDate) {
211 // DTEND is exclusive for all-day events — use as-is (already midnight UTC of end date).
212 endInstant = dtEnd.instant;
213 }
214 } else if (durationProp) {
215 endInstant = addDuration(dtStart.instant, durationProp.value);
216 } else if (dtStart.isDate) {
217 endInstant = addDaysUtc(dtStart.instant, 1);
218 } else {
219 endInstant = addMinutesUtc(dtStart.instant, 60);
220 }
221
222 if (endInstant <= dtStart.instant) {
223 endInstant = dtStart.isDate ? addDaysUtc(dtStart.instant, 1) : addMinutesUtc(dtStart.instant, 60);
224 }
225
226 const statusProp = getProperty(props, 'STATUS');
227 const status = normalizeStatus(statusProp?.value);
228 const transpProp = getProperty(props, 'TRANSP');
229 const transp = (transpProp?.value ?? 'OPAQUE').toUpperCase();
230 const summaryProp = getProperty(props, 'SUMMARY');
231 const rruleProp = getProperty(props, 'RRULE');
232
233 return {
234 external_uid: uid,
235 start: dtStart.instant.toISOString(),
236 end: endInstant.toISOString(),
237 timezone: dtStart.timezone,
238 summary: summaryProp?.value ?? null,
239 busy: transp !== 'TRANSPARENT',
240 status,
241 recurrence_rule: rruleProp?.value ?? null,
242 };
243 }
244
245 /**
246 * @param {{ name: string, value: string, params: Record<string, string> }[]} props
247 * @param {string} name
248 * @returns {{ name: string, value: string, params: Record<string, string> } | undefined}
249 */
250 function getProperty(props, name) {
251 const upper = name.toUpperCase();
252 return props.find((p) => p.name === upper);
253 }
254
255 /**
256 * @param {string} propName
257 * @param {{ value: string, params: Record<string, string> }} prop
258 * @param {string} defaultTimezone
259 * @returns {{ instant: Date, timezone: string, isDate: boolean }}
260 */
261 function parseIcsDateTimeProp(propName, prop, defaultTimezone) {
262 const valueType = (prop.params.VALUE ?? '').toUpperCase();
263 const tzid = prop.params.TZID;
264 const trimmed = prop.value.trim();
265 const isDate = valueType === 'DATE' || (trimmed.length === 8 && /^\d{8}$/.test(trimmed));
266 return parseIcsDateTime(propName, trimmed, tzid, defaultTimezone, isDate);
267 }
268
269 /**
270 * @param {string} propName
271 * @param {string} raw
272 * @param {string|undefined} tzidFromParam
273 * @param {string} defaultTimezone
274 * @param {boolean} [isDate=false]
275 * @returns {{ instant: Date, timezone: string, isDate: boolean }}
276 */
277 function parseIcsDateTime(propName, raw, tzidFromParam, defaultTimezone, isDate = false) {
278 const trimmed = raw.trim();
279 const dateOnly = isDate || (trimmed.length === 8 && /^\d{8}$/.test(trimmed));
280
281 if (dateOnly) {
282 const year = Number(trimmed.slice(0, 4));
283 const month = Number(trimmed.slice(4, 6));
284 const day = Number(trimmed.slice(6, 8));
285 const tz = normalizeTimezoneId(tzidFromParam ?? defaultTimezone);
286 const instant = zonedLocalToUtc({ year, month, day, hour: 0, minute: 0, second: 0 }, tz);
287 return { instant, timezone: tz, isDate: true };
288 }
289
290 const match = trimmed.match(/^(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})(\d{2})(Z)?$/);
291 if (!match) {
292 throw new SyntaxError(`${propName} has unsupported datetime format`);
293 }
294
295 const [, ys, ms, ds, hs, mins, ss, zulu] = match;
296 const components = {
297 year: Number(ys),
298 month: Number(ms),
299 day: Number(ds),
300 hour: Number(hs),
301 minute: Number(mins),
302 second: Number(ss),
303 };
304
305 if (zulu === 'Z') {
306 const instant = new Date(Date.UTC(
307 components.year,
308 components.month - 1,
309 components.day,
310 components.hour,
311 components.minute,
312 components.second,
313 ));
314 return { instant, timezone: 'UTC', isDate: false };
315 }
316
317 const tz = normalizeTimezoneId(tzidFromParam ?? defaultTimezone);
318 const instant = zonedLocalToUtc(components, tz);
319 return { instant, timezone: tz, isDate: false };
320 }
321
322 /**
323 * Convert wall-clock components in an IANA zone to a UTC instant.
324 *
325 * @param {{ year: number, month: number, day: number, hour: number, minute: number, second: number }} local
326 * @param {string} timeZone
327 * @returns {Date}
328 */
329 export function zonedLocalToUtc(local, timeZone) {
330 validateLocalComponents(local);
331 const targetMs = Date.UTC(local.year, local.month - 1, local.day, local.hour, local.minute, local.second);
332 let utcMs = targetMs;
333
334 for (let i = 0; i < 4; i += 1) {
335 const parts = getZonedParts(new Date(utcMs), timeZone);
336 const shownMs = Date.UTC(parts.year, parts.month - 1, parts.day, parts.hour, parts.minute, parts.second);
337 const diff = targetMs - shownMs;
338 if (diff === 0) {
339 break;
340 }
341 utcMs += diff;
342 }
343
344 return new Date(utcMs);
345 }
346
347 /**
348 * @param {Date} date
349 * @param {string} timeZone
350 * @returns {{ year: number, month: number, day: number, hour: number, minute: number, second: number }}
351 */
352 function getZonedParts(date, timeZone) {
353 const fmt = new Intl.DateTimeFormat('en-US', {
354 timeZone,
355 year: 'numeric',
356 month: '2-digit',
357 day: '2-digit',
358 hour: '2-digit',
359 minute: '2-digit',
360 second: '2-digit',
361 hourCycle: 'h23',
362 });
363 /** @type {Record<string, string>} */
364 const bag = {};
365 for (const part of fmt.formatToParts(date)) {
366 if (part.type !== 'literal') {
367 bag[part.type] = part.value;
368 }
369 }
370 return {
371 year: Number(bag.year),
372 month: Number(bag.month),
373 day: Number(bag.day),
374 hour: Number(bag.hour),
375 minute: Number(bag.minute),
376 second: Number(bag.second),
377 };
378 }
379
380 /**
381 * @param {{ year: number, month: number, day: number, hour: number, minute: number, second: number }} local
382 */
383 function validateLocalComponents(local) {
384 for (const [key, val] of Object.entries(local)) {
385 if (!Number.isInteger(val)) {
386 throw new RangeError(`Invalid calendar component ${key}`);
387 }
388 }
389 if (local.month < 1 || local.month > 12 || local.day < 1 || local.day > 31) {
390 throw new RangeError('Invalid calendar date');
391 }
392 }
393
394 /**
395 * @param {string} tz
396 * @returns {string}
397 */
398 export function normalizeTimezoneId(tz) {
399 const trimmed = String(tz).trim();
400 if (!trimmed) {
401 return 'UTC';
402 }
403 try {
404 Intl.DateTimeFormat('en-US', { timeZone: trimmed });
405 } catch {
406 throw new RangeError(`Unknown IANA timezone: ${trimmed}`);
407 }
408 return trimmed;
409 }
410
411 /**
412 * @param {string|undefined|null} statusRaw
413 * @returns {CalendarEventStatus}
414 */
415 export function normalizeStatus(statusRaw) {
416 const upper = (statusRaw ?? 'CONFIRMED').toUpperCase();
417 if (upper === 'CANCELLED') {
418 return 'cancelled';
419 }
420 if (upper === 'TENTATIVE') {
421 return 'tentative';
422 }
423 return 'confirmed';
424 }
425
426 /**
427 * @param {Date} instant
428 * @param {number} days
429 * @returns {Date}
430 */
431 function addDaysUtc(instant, days) {
432 const d = new Date(instant.getTime());
433 d.setUTCDate(d.getUTCDate() + days);
434 return d;
435 }
436
437 /**
438 * @param {Date} instant
439 * @param {number} minutes
440 * @returns {Date}
441 */
442 function addMinutesUtc(instant, minutes) {
443 return new Date(instant.getTime() + minutes * 60_000);
444 }
445
446 /**
447 * Parse ISO 8601 DURATION subset used by ICS (PnDTnHnMnS).
448 *
449 * @param {Date} start
450 * @param {string} duration
451 * @returns {Date}
452 */
453 export function addDuration(start, duration) {
454 const match = duration.match(/^P(?:(\d+)D)?(?:T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?)?$/);
455 if (!match) {
456 throw new SyntaxError('Unsupported DURATION format');
457 }
458 const days = Number(match[1] ?? 0);
459 const hours = Number(match[2] ?? 0);
460 const minutes = Number(match[3] ?? 0);
461 const seconds = Number(match[4] ?? 0);
462 const ms = (((days * 24 + hours) * 60 + minutes) * 60 + seconds) * 1000;
463 return new Date(start.getTime() + ms);
464 }
File History 1 commit
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor 1 day ago