url-fetch-safe.mjs
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠ breaking
1 day ago
| 1 | /** |
| 2 | * SSRF-hardened HTTP fetch for URL import: HTTPS only, DNS re-check each redirect hop, |
| 3 | * response size cap, timeout. |
| 4 | */ |
| 5 | |
| 6 | import dns from 'node:dns/promises'; |
| 7 | |
| 8 | /** @type {readonly RegExp[]} */ |
| 9 | const PRIVATE_IPV4 = [ |
| 10 | /^127\./, |
| 11 | /^10\./, |
| 12 | /^172\.(1[6-9]|2\d|3[01])\./, |
| 13 | /^192\.168\./, |
| 14 | /^169\.254\./, |
| 15 | /^0\./, |
| 16 | /^100\.(6[4-9]|[7-9]\d|1[0-2]\d)\./, |
| 17 | ]; |
| 18 | |
| 19 | /** |
| 20 | * @param {string} ip |
| 21 | * @returns {boolean} |
| 22 | */ |
| 23 | export function isPrivateOrBlockedIp(ip) { |
| 24 | if (!ip || ip === '0.0.0.0') return true; |
| 25 | const lower = ip.toLowerCase(); |
| 26 | if (lower === '::1') return true; |
| 27 | if (lower.startsWith('fe80:')) return true; |
| 28 | if (lower.startsWith('fc') || lower.startsWith('fd')) return true; |
| 29 | return PRIVATE_IPV4.some((r) => r.test(ip)); |
| 30 | } |
| 31 | |
| 32 | /** |
| 33 | * @param {string} hostname |
| 34 | * @returns {Promise<void>} |
| 35 | */ |
| 36 | async function assertHostnameResolvesToPublicIp(hostname) { |
| 37 | if (!hostname || typeof hostname !== 'string') throw new Error('Invalid hostname'); |
| 38 | const h = hostname.trim().toLowerCase(); |
| 39 | if (h === 'localhost' || h === '[::1]') throw new Error('Requests to localhost are blocked (SSRF protection)'); |
| 40 | if (h.endsWith('.local')) throw new Error('Requests to .local hosts are blocked (SSRF protection)'); |
| 41 | try { |
| 42 | const { address } = await dns.lookup(h); |
| 43 | if (isPrivateOrBlockedIp(address)) { |
| 44 | throw new Error(`Requests to private IP ranges are blocked (resolved ${h} -> ${address})`); |
| 45 | } |
| 46 | } catch (e) { |
| 47 | if (e && typeof e.message === 'string' && e.message.includes('blocked')) throw e; |
| 48 | throw new Error(`DNS resolution failed for ${h}: ${e.message || e}`); |
| 49 | } |
| 50 | } |
| 51 | |
| 52 | /** |
| 53 | * @param {string} urlString |
| 54 | * @returns {URL} |
| 55 | */ |
| 56 | function parseHttpsUrl(urlString) { |
| 57 | if (typeof urlString !== 'string' || !urlString.trim()) throw new Error('URL is required'); |
| 58 | let u; |
| 59 | try { |
| 60 | u = new URL(urlString.trim()); |
| 61 | } catch { |
| 62 | throw new Error('Invalid URL'); |
| 63 | } |
| 64 | if (u.protocol !== 'https:') throw new Error('Only https:// URLs are allowed'); |
| 65 | if (!u.hostname) throw new Error('URL must include a hostname'); |
| 66 | if (u.username || u.password) throw new Error('URLs with embedded credentials are not allowed'); |
| 67 | return u; |
| 68 | } |
| 69 | |
| 70 | /** |
| 71 | * @param {string} urlString |
| 72 | * @param {{ maxBytes?: number, timeoutMs?: number, maxRedirects?: number, userAgent?: string }} [opts] |
| 73 | * @returns {Promise<{ finalUrl: string, status: number, contentType: string, text: string }>} |
| 74 | */ |
| 75 | export async function fetchUrlForImport(urlString, opts = {}) { |
| 76 | const maxBytes = opts.maxBytes ?? 5 * 1024 * 1024; |
| 77 | const timeoutMs = opts.timeoutMs ?? 15_000; |
| 78 | const maxRedirects = opts.maxRedirects ?? 8; |
| 79 | const userAgent = opts.userAgent ?? 'Knowtation-UrlImport/1.0'; |
| 80 | |
| 81 | let current = parseHttpsUrl(urlString); |
| 82 | await assertHostnameResolvesToPublicIp(current.hostname); |
| 83 | |
| 84 | const controller = new AbortController(); |
| 85 | const timer = setTimeout(() => controller.abort(), timeoutMs); |
| 86 | |
| 87 | try { |
| 88 | for (let hop = 0; hop <= maxRedirects; hop++) { |
| 89 | const res = await fetch(current.href, { |
| 90 | method: 'GET', |
| 91 | redirect: 'manual', |
| 92 | signal: controller.signal, |
| 93 | headers: { |
| 94 | 'User-Agent': userAgent, |
| 95 | Accept: 'text/html,application/xhtml+xml,text/plain;q=0.9,*/*;q=0.8', |
| 96 | }, |
| 97 | }); |
| 98 | |
| 99 | if (res.status >= 300 && res.status < 400) { |
| 100 | const loc = res.headers.get('location'); |
| 101 | if (!loc || hop === maxRedirects) { |
| 102 | throw new Error(loc ? 'Too many redirects' : `HTTP ${res.status} without Location`); |
| 103 | } |
| 104 | let next; |
| 105 | try { |
| 106 | next = new URL(loc, current.href); |
| 107 | } catch { |
| 108 | throw new Error('Invalid redirect Location'); |
| 109 | } |
| 110 | if (next.protocol !== 'https:') throw new Error('Redirect to non-https URL is not allowed'); |
| 111 | if (next.username || next.password) throw new Error('Redirect URL must not contain credentials'); |
| 112 | current = next; |
| 113 | await assertHostnameResolvesToPublicIp(current.hostname); |
| 114 | continue; |
| 115 | } |
| 116 | |
| 117 | if (!res.ok) { |
| 118 | const errText = await res.text().catch(() => ''); |
| 119 | throw new Error(`HTTP ${res.status} ${res.statusText}${errText ? `: ${errText.slice(0, 200)}` : ''}`); |
| 120 | } |
| 121 | |
| 122 | const contentType = (res.headers.get('content-type') || '').split(';')[0].trim().toLowerCase(); |
| 123 | const text = await readTextBodyLimited(res, maxBytes); |
| 124 | clearTimeout(timer); |
| 125 | return { |
| 126 | finalUrl: current.href, |
| 127 | status: res.status, |
| 128 | contentType, |
| 129 | text, |
| 130 | }; |
| 131 | } |
| 132 | throw new Error('Too many redirects'); |
| 133 | } catch (e) { |
| 134 | clearTimeout(timer); |
| 135 | if (e.name === 'AbortError') throw new Error(`URL fetch timed out after ${timeoutMs}ms`); |
| 136 | throw e; |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | /** |
| 141 | * @param {Response} res |
| 142 | * @param {number} maxBytes |
| 143 | * @returns {Promise<string>} |
| 144 | */ |
| 145 | async function readTextBodyLimited(res, maxBytes) { |
| 146 | if (!res.body) { |
| 147 | const buf = await res.arrayBuffer(); |
| 148 | if (buf.byteLength > maxBytes) throw new Error(`Response body exceeds ${maxBytes} bytes`); |
| 149 | return Buffer.from(buf).toString('utf8'); |
| 150 | } |
| 151 | const reader = res.body.getReader(); |
| 152 | const chunks = []; |
| 153 | let total = 0; |
| 154 | while (true) { |
| 155 | const { done, value } = await reader.read(); |
| 156 | if (done) break; |
| 157 | if (!value) continue; |
| 158 | total += value.byteLength; |
| 159 | if (total > maxBytes) { |
| 160 | try { |
| 161 | await reader.cancel(); |
| 162 | } catch (_) {} |
| 163 | throw new Error(`Response body exceeds ${maxBytes} bytes`); |
| 164 | } |
| 165 | chunks.push(Buffer.from(value)); |
| 166 | } |
| 167 | return Buffer.concat(chunks).toString('utf8'); |
| 168 | } |
File History
2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠
1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6
docs: accept Calendar Events v0 spec with Phase 0 security …
Human
1 day ago