ics-normalizer.mjs
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠ breaking
1 day ago
| 1 | /** |
| 2 | * Pure ICS (iCalendar RFC 5545 subset) parser and event normalizer. |
| 3 | * |
| 4 | * No network, no OAuth, no filesystem I/O — callers pass ICS text only. |
| 5 | * Output shape matches Calendar Events v0 `CalendarEvent` fields except |
| 6 | * `event_id` and `source_calendar_id` (assigned by the store layer in 1B). |
| 7 | * |
| 8 | * @see docs/CALENDAR-EVENTS-V0-SPEC.md — Phase 1A |
| 9 | */ |
| 10 | |
| 11 | /** @typedef {'confirmed' | 'cancelled' | 'tentative'} CalendarEventStatus */ |
| 12 | |
| 13 | /** |
| 14 | * @typedef {Object} NormalizedCalendarEvent |
| 15 | * @property {string} external_uid — Provider UID for dedup |
| 16 | * @property {string} start — UTC ISO8601 instant |
| 17 | * @property {string} end — UTC ISO8601 instant |
| 18 | * @property {string} timezone — IANA timezone id used for display semantics |
| 19 | * @property {string|null} summary |
| 20 | * @property {boolean} busy — false when TRANSP=TRANSPARENT |
| 21 | * @property {CalendarEventStatus} status |
| 22 | * @property {string|null} recurrence_rule — Raw RRULE string; expansion deferred |
| 23 | */ |
| 24 | |
| 25 | /** |
| 26 | * @typedef {Object} ParseIcsOptions |
| 27 | * @property {string} [defaultTimezone='UTC'] — Used for floating DATE-TIME values |
| 28 | * @property {number} [maxEvents=5000] — Hard cap to bound hostile payloads |
| 29 | */ |
| 30 | |
| 31 | const MAX_ICS_BYTES = 5 * 1024 * 1024; |
| 32 | const MAX_LINE_LENGTH = 8192; |
| 33 | const MAX_PROPERTY_VALUE_LENGTH = 4096; |
| 34 | |
| 35 | /** |
| 36 | * Parse ICS text into normalized calendar events. |
| 37 | * |
| 38 | * @param {string} icsText |
| 39 | * @param {ParseIcsOptions} [options] |
| 40 | * @returns {NormalizedCalendarEvent[]} |
| 41 | */ |
| 42 | export function parseIcsToEvents(icsText, options = {}) { |
| 43 | if (typeof icsText !== 'string') { |
| 44 | throw new TypeError('icsText must be a string'); |
| 45 | } |
| 46 | if (icsText.length > MAX_ICS_BYTES) { |
| 47 | throw new RangeError(`ICS payload exceeds ${MAX_ICS_BYTES} bytes`); |
| 48 | } |
| 49 | |
| 50 | const defaultTimezone = normalizeTimezoneId(options.defaultTimezone ?? 'UTC'); |
| 51 | const maxEvents = options.maxEvents ?? 5000; |
| 52 | const unfolded = unfoldIcsLines(icsText); |
| 53 | const components = extractVevents(unfolded); |
| 54 | if (components.length > maxEvents) { |
| 55 | throw new RangeError(`ICS contains ${components.length} VEVENT components; max is ${maxEvents}`); |
| 56 | } |
| 57 | |
| 58 | /** @type {NormalizedCalendarEvent[]} */ |
| 59 | const events = []; |
| 60 | for (const props of components) { |
| 61 | const normalized = normalizeVevent(props, defaultTimezone); |
| 62 | if (normalized) { |
| 63 | events.push(normalized); |
| 64 | } |
| 65 | } |
| 66 | return events; |
| 67 | } |
| 68 | |
| 69 | /** |
| 70 | * RFC 5545 line unfolding: CRLF + single space/tab continues the prior line. |
| 71 | * |
| 72 | * @param {string} text |
| 73 | * @returns {string[]} |
| 74 | */ |
| 75 | export function unfoldIcsLines(text) { |
| 76 | const rawLines = text.replace(/\r\n/g, '\n').replace(/\r/g, '\n').split('\n'); |
| 77 | /** @type {string[]} */ |
| 78 | const out = []; |
| 79 | for (const line of rawLines) { |
| 80 | if (line.length > MAX_LINE_LENGTH) { |
| 81 | throw new RangeError(`ICS line exceeds ${MAX_LINE_LENGTH} characters`); |
| 82 | } |
| 83 | if ((line.startsWith(' ') || line.startsWith('\t')) && out.length > 0) { |
| 84 | out[out.length - 1] += line.slice(1); |
| 85 | } else { |
| 86 | out.push(line); |
| 87 | } |
| 88 | } |
| 89 | return out; |
| 90 | } |
| 91 | |
| 92 | /** |
| 93 | * @param {string[]} lines |
| 94 | * @returns {{ name: string, value: string, params: Record<string, string> }[][]} |
| 95 | */ |
| 96 | function extractVevents(lines) { |
| 97 | /** @type {{ name: string, value: string, params: Record<string, string> }[][]} */ |
| 98 | const events = []; |
| 99 | /** @type {{ name: string, value: string, params: Record<string, string> }[] | null} */ |
| 100 | let current = null; |
| 101 | let depth = 0; |
| 102 | |
| 103 | for (const line of lines) { |
| 104 | if (line === 'BEGIN:VEVENT') { |
| 105 | if (current) { |
| 106 | throw new SyntaxError('Nested VEVENT components are not supported'); |
| 107 | } |
| 108 | current = []; |
| 109 | depth = 1; |
| 110 | continue; |
| 111 | } |
| 112 | if (line === 'END:VEVENT') { |
| 113 | if (!current || depth !== 1) { |
| 114 | throw new SyntaxError('END:VEVENT without matching BEGIN:VEVENT'); |
| 115 | } |
| 116 | events.push(current); |
| 117 | current = null; |
| 118 | depth = 0; |
| 119 | continue; |
| 120 | } |
| 121 | if (!current) { |
| 122 | continue; |
| 123 | } |
| 124 | const parsed = parsePropertyLine(line); |
| 125 | if (parsed) { |
| 126 | current.push(parsed); |
| 127 | } |
| 128 | } |
| 129 | |
| 130 | if (current) { |
| 131 | throw new SyntaxError('Unclosed VEVENT component'); |
| 132 | } |
| 133 | return events; |
| 134 | } |
| 135 | |
| 136 | /** |
| 137 | * @param {string} line |
| 138 | * @returns {{ name: string, value: string, params: Record<string, string> } | null} |
| 139 | */ |
| 140 | export function parsePropertyLine(line) { |
| 141 | if (!line || line.startsWith('BEGIN:') || line.startsWith('END:')) { |
| 142 | return null; |
| 143 | } |
| 144 | const colon = line.indexOf(':'); |
| 145 | if (colon <= 0) { |
| 146 | return null; |
| 147 | } |
| 148 | |
| 149 | const left = line.slice(0, colon); |
| 150 | const value = unescapeIcsText(line.slice(colon + 1)); |
| 151 | if (value.length > MAX_PROPERTY_VALUE_LENGTH) { |
| 152 | throw new RangeError(`ICS property value exceeds ${MAX_PROPERTY_VALUE_LENGTH} characters`); |
| 153 | } |
| 154 | |
| 155 | const semi = left.indexOf(';'); |
| 156 | const name = (semi === -1 ? left : left.slice(0, semi)).toUpperCase(); |
| 157 | /** @type {Record<string, string>} */ |
| 158 | const params = {}; |
| 159 | if (semi !== -1) { |
| 160 | const paramPart = left.slice(semi + 1); |
| 161 | for (const chunk of paramPart.split(';')) { |
| 162 | const eq = chunk.indexOf('='); |
| 163 | if (eq === -1) { |
| 164 | continue; |
| 165 | } |
| 166 | const key = chunk.slice(0, eq).toUpperCase(); |
| 167 | params[key] = chunk.slice(eq + 1); |
| 168 | } |
| 169 | } |
| 170 | return { name, value, params }; |
| 171 | } |
| 172 | |
| 173 | /** |
| 174 | * @param {string} value |
| 175 | * @returns {string} |
| 176 | */ |
| 177 | export function unescapeIcsText(value) { |
| 178 | return value |
| 179 | .replace(/\\n/gi, '\n') |
| 180 | .replace(/\\N/g, '\n') |
| 181 | .replace(/\\,/g, ',') |
| 182 | .replace(/\\;/g, ';') |
| 183 | .replace(/\\\\/g, '\\'); |
| 184 | } |
| 185 | |
| 186 | /** |
| 187 | * @param {{ name: string, value: string, params: Record<string, string> }[]} props |
| 188 | * @param {string} defaultTimezone |
| 189 | * @returns {NormalizedCalendarEvent | null} |
| 190 | */ |
| 191 | function normalizeVevent(props, defaultTimezone) { |
| 192 | const uid = getProperty(props, 'UID')?.value; |
| 193 | if (!uid) { |
| 194 | return null; |
| 195 | } |
| 196 | |
| 197 | const dtStartProp = getProperty(props, 'DTSTART'); |
| 198 | if (!dtStartProp) { |
| 199 | return null; |
| 200 | } |
| 201 | |
| 202 | const dtStart = parseIcsDateTimeProp('DTSTART', dtStartProp, defaultTimezone); |
| 203 | let endInstant; |
| 204 | const dtEndProp = getProperty(props, 'DTEND'); |
| 205 | const durationProp = getProperty(props, 'DURATION'); |
| 206 | |
| 207 | if (dtEndProp) { |
| 208 | const dtEnd = parseIcsDateTimeProp('DTEND', dtEndProp, defaultTimezone); |
| 209 | endInstant = dtEnd.instant; |
| 210 | if (dtStart.isDate && dtEnd.isDate) { |
| 211 | // DTEND is exclusive for all-day events — use as-is (already midnight UTC of end date). |
| 212 | endInstant = dtEnd.instant; |
| 213 | } |
| 214 | } else if (durationProp) { |
| 215 | endInstant = addDuration(dtStart.instant, durationProp.value); |
| 216 | } else if (dtStart.isDate) { |
| 217 | endInstant = addDaysUtc(dtStart.instant, 1); |
| 218 | } else { |
| 219 | endInstant = addMinutesUtc(dtStart.instant, 60); |
| 220 | } |
| 221 | |
| 222 | if (endInstant <= dtStart.instant) { |
| 223 | endInstant = dtStart.isDate ? addDaysUtc(dtStart.instant, 1) : addMinutesUtc(dtStart.instant, 60); |
| 224 | } |
| 225 | |
| 226 | const statusProp = getProperty(props, 'STATUS'); |
| 227 | const status = normalizeStatus(statusProp?.value); |
| 228 | const transpProp = getProperty(props, 'TRANSP'); |
| 229 | const transp = (transpProp?.value ?? 'OPAQUE').toUpperCase(); |
| 230 | const summaryProp = getProperty(props, 'SUMMARY'); |
| 231 | const rruleProp = getProperty(props, 'RRULE'); |
| 232 | |
| 233 | return { |
| 234 | external_uid: uid, |
| 235 | start: dtStart.instant.toISOString(), |
| 236 | end: endInstant.toISOString(), |
| 237 | timezone: dtStart.timezone, |
| 238 | summary: summaryProp?.value ?? null, |
| 239 | busy: transp !== 'TRANSPARENT', |
| 240 | status, |
| 241 | recurrence_rule: rruleProp?.value ?? null, |
| 242 | }; |
| 243 | } |
| 244 | |
| 245 | /** |
| 246 | * @param {{ name: string, value: string, params: Record<string, string> }[]} props |
| 247 | * @param {string} name |
| 248 | * @returns {{ name: string, value: string, params: Record<string, string> } | undefined} |
| 249 | */ |
| 250 | function getProperty(props, name) { |
| 251 | const upper = name.toUpperCase(); |
| 252 | return props.find((p) => p.name === upper); |
| 253 | } |
| 254 | |
| 255 | /** |
| 256 | * @param {string} propName |
| 257 | * @param {{ value: string, params: Record<string, string> }} prop |
| 258 | * @param {string} defaultTimezone |
| 259 | * @returns {{ instant: Date, timezone: string, isDate: boolean }} |
| 260 | */ |
| 261 | function parseIcsDateTimeProp(propName, prop, defaultTimezone) { |
| 262 | const valueType = (prop.params.VALUE ?? '').toUpperCase(); |
| 263 | const tzid = prop.params.TZID; |
| 264 | const trimmed = prop.value.trim(); |
| 265 | const isDate = valueType === 'DATE' || (trimmed.length === 8 && /^\d{8}$/.test(trimmed)); |
| 266 | return parseIcsDateTime(propName, trimmed, tzid, defaultTimezone, isDate); |
| 267 | } |
| 268 | |
| 269 | /** |
| 270 | * @param {string} propName |
| 271 | * @param {string} raw |
| 272 | * @param {string|undefined} tzidFromParam |
| 273 | * @param {string} defaultTimezone |
| 274 | * @param {boolean} [isDate=false] |
| 275 | * @returns {{ instant: Date, timezone: string, isDate: boolean }} |
| 276 | */ |
| 277 | function parseIcsDateTime(propName, raw, tzidFromParam, defaultTimezone, isDate = false) { |
| 278 | const trimmed = raw.trim(); |
| 279 | const dateOnly = isDate || (trimmed.length === 8 && /^\d{8}$/.test(trimmed)); |
| 280 | |
| 281 | if (dateOnly) { |
| 282 | const year = Number(trimmed.slice(0, 4)); |
| 283 | const month = Number(trimmed.slice(4, 6)); |
| 284 | const day = Number(trimmed.slice(6, 8)); |
| 285 | const tz = normalizeTimezoneId(tzidFromParam ?? defaultTimezone); |
| 286 | const instant = zonedLocalToUtc({ year, month, day, hour: 0, minute: 0, second: 0 }, tz); |
| 287 | return { instant, timezone: tz, isDate: true }; |
| 288 | } |
| 289 | |
| 290 | const match = trimmed.match(/^(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})(\d{2})(Z)?$/); |
| 291 | if (!match) { |
| 292 | throw new SyntaxError(`${propName} has unsupported datetime format`); |
| 293 | } |
| 294 | |
| 295 | const [, ys, ms, ds, hs, mins, ss, zulu] = match; |
| 296 | const components = { |
| 297 | year: Number(ys), |
| 298 | month: Number(ms), |
| 299 | day: Number(ds), |
| 300 | hour: Number(hs), |
| 301 | minute: Number(mins), |
| 302 | second: Number(ss), |
| 303 | }; |
| 304 | |
| 305 | if (zulu === 'Z') { |
| 306 | const instant = new Date(Date.UTC( |
| 307 | components.year, |
| 308 | components.month - 1, |
| 309 | components.day, |
| 310 | components.hour, |
| 311 | components.minute, |
| 312 | components.second, |
| 313 | )); |
| 314 | return { instant, timezone: 'UTC', isDate: false }; |
| 315 | } |
| 316 | |
| 317 | const tz = normalizeTimezoneId(tzidFromParam ?? defaultTimezone); |
| 318 | const instant = zonedLocalToUtc(components, tz); |
| 319 | return { instant, timezone: tz, isDate: false }; |
| 320 | } |
| 321 | |
| 322 | /** |
| 323 | * Convert wall-clock components in an IANA zone to a UTC instant. |
| 324 | * |
| 325 | * @param {{ year: number, month: number, day: number, hour: number, minute: number, second: number }} local |
| 326 | * @param {string} timeZone |
| 327 | * @returns {Date} |
| 328 | */ |
| 329 | export function zonedLocalToUtc(local, timeZone) { |
| 330 | validateLocalComponents(local); |
| 331 | const targetMs = Date.UTC(local.year, local.month - 1, local.day, local.hour, local.minute, local.second); |
| 332 | let utcMs = targetMs; |
| 333 | |
| 334 | for (let i = 0; i < 4; i += 1) { |
| 335 | const parts = getZonedParts(new Date(utcMs), timeZone); |
| 336 | const shownMs = Date.UTC(parts.year, parts.month - 1, parts.day, parts.hour, parts.minute, parts.second); |
| 337 | const diff = targetMs - shownMs; |
| 338 | if (diff === 0) { |
| 339 | break; |
| 340 | } |
| 341 | utcMs += diff; |
| 342 | } |
| 343 | |
| 344 | return new Date(utcMs); |
| 345 | } |
| 346 | |
| 347 | /** |
| 348 | * @param {Date} date |
| 349 | * @param {string} timeZone |
| 350 | * @returns {{ year: number, month: number, day: number, hour: number, minute: number, second: number }} |
| 351 | */ |
| 352 | function getZonedParts(date, timeZone) { |
| 353 | const fmt = new Intl.DateTimeFormat('en-US', { |
| 354 | timeZone, |
| 355 | year: 'numeric', |
| 356 | month: '2-digit', |
| 357 | day: '2-digit', |
| 358 | hour: '2-digit', |
| 359 | minute: '2-digit', |
| 360 | second: '2-digit', |
| 361 | hourCycle: 'h23', |
| 362 | }); |
| 363 | /** @type {Record<string, string>} */ |
| 364 | const bag = {}; |
| 365 | for (const part of fmt.formatToParts(date)) { |
| 366 | if (part.type !== 'literal') { |
| 367 | bag[part.type] = part.value; |
| 368 | } |
| 369 | } |
| 370 | return { |
| 371 | year: Number(bag.year), |
| 372 | month: Number(bag.month), |
| 373 | day: Number(bag.day), |
| 374 | hour: Number(bag.hour), |
| 375 | minute: Number(bag.minute), |
| 376 | second: Number(bag.second), |
| 377 | }; |
| 378 | } |
| 379 | |
| 380 | /** |
| 381 | * @param {{ year: number, month: number, day: number, hour: number, minute: number, second: number }} local |
| 382 | */ |
| 383 | function validateLocalComponents(local) { |
| 384 | for (const [key, val] of Object.entries(local)) { |
| 385 | if (!Number.isInteger(val)) { |
| 386 | throw new RangeError(`Invalid calendar component ${key}`); |
| 387 | } |
| 388 | } |
| 389 | if (local.month < 1 || local.month > 12 || local.day < 1 || local.day > 31) { |
| 390 | throw new RangeError('Invalid calendar date'); |
| 391 | } |
| 392 | } |
| 393 | |
| 394 | /** |
| 395 | * @param {string} tz |
| 396 | * @returns {string} |
| 397 | */ |
| 398 | export function normalizeTimezoneId(tz) { |
| 399 | const trimmed = String(tz).trim(); |
| 400 | if (!trimmed) { |
| 401 | return 'UTC'; |
| 402 | } |
| 403 | try { |
| 404 | Intl.DateTimeFormat('en-US', { timeZone: trimmed }); |
| 405 | } catch { |
| 406 | throw new RangeError(`Unknown IANA timezone: ${trimmed}`); |
| 407 | } |
| 408 | return trimmed; |
| 409 | } |
| 410 | |
| 411 | /** |
| 412 | * @param {string|undefined|null} statusRaw |
| 413 | * @returns {CalendarEventStatus} |
| 414 | */ |
| 415 | export function normalizeStatus(statusRaw) { |
| 416 | const upper = (statusRaw ?? 'CONFIRMED').toUpperCase(); |
| 417 | if (upper === 'CANCELLED') { |
| 418 | return 'cancelled'; |
| 419 | } |
| 420 | if (upper === 'TENTATIVE') { |
| 421 | return 'tentative'; |
| 422 | } |
| 423 | return 'confirmed'; |
| 424 | } |
| 425 | |
| 426 | /** |
| 427 | * @param {Date} instant |
| 428 | * @param {number} days |
| 429 | * @returns {Date} |
| 430 | */ |
| 431 | function addDaysUtc(instant, days) { |
| 432 | const d = new Date(instant.getTime()); |
| 433 | d.setUTCDate(d.getUTCDate() + days); |
| 434 | return d; |
| 435 | } |
| 436 | |
| 437 | /** |
| 438 | * @param {Date} instant |
| 439 | * @param {number} minutes |
| 440 | * @returns {Date} |
| 441 | */ |
| 442 | function addMinutesUtc(instant, minutes) { |
| 443 | return new Date(instant.getTime() + minutes * 60_000); |
| 444 | } |
| 445 | |
| 446 | /** |
| 447 | * Parse ISO 8601 DURATION subset used by ICS (PnDTnHnMnS). |
| 448 | * |
| 449 | * @param {Date} start |
| 450 | * @param {string} duration |
| 451 | * @returns {Date} |
| 452 | */ |
| 453 | export function addDuration(start, duration) { |
| 454 | const match = duration.match(/^P(?:(\d+)D)?(?:T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?)?$/); |
| 455 | if (!match) { |
| 456 | throw new SyntaxError('Unsupported DURATION format'); |
| 457 | } |
| 458 | const days = Number(match[1] ?? 0); |
| 459 | const hours = Number(match[2] ?? 0); |
| 460 | const minutes = Number(match[3] ?? 0); |
| 461 | const seconds = Number(match[4] ?? 0); |
| 462 | const ms = (((days * 24 + hours) * 60 + minutes) * 60 + seconds) * 1000; |
| 463 | return new Date(start.getTime() + ms); |
| 464 | } |
File History
1 commit
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠
1 day ago