url-fetch-safe.mjs
168 lines 5.4 KB
Raw
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 1 day ago
1 /**
2 * SSRF-hardened HTTP fetch for URL import: HTTPS only, DNS re-check each redirect hop,
3 * response size cap, timeout.
4 */
5
6 import dns from 'node:dns/promises';
7
8 /** @type {readonly RegExp[]} */
9 const PRIVATE_IPV4 = [
10 /^127\./,
11 /^10\./,
12 /^172\.(1[6-9]|2\d|3[01])\./,
13 /^192\.168\./,
14 /^169\.254\./,
15 /^0\./,
16 /^100\.(6[4-9]|[7-9]\d|1[0-2]\d)\./,
17 ];
18
19 /**
20 * @param {string} ip
21 * @returns {boolean}
22 */
23 export function isPrivateOrBlockedIp(ip) {
24 if (!ip || ip === '0.0.0.0') return true;
25 const lower = ip.toLowerCase();
26 if (lower === '::1') return true;
27 if (lower.startsWith('fe80:')) return true;
28 if (lower.startsWith('fc') || lower.startsWith('fd')) return true;
29 return PRIVATE_IPV4.some((r) => r.test(ip));
30 }
31
32 /**
33 * @param {string} hostname
34 * @returns {Promise<void>}
35 */
36 async function assertHostnameResolvesToPublicIp(hostname) {
37 if (!hostname || typeof hostname !== 'string') throw new Error('Invalid hostname');
38 const h = hostname.trim().toLowerCase();
39 if (h === 'localhost' || h === '[::1]') throw new Error('Requests to localhost are blocked (SSRF protection)');
40 if (h.endsWith('.local')) throw new Error('Requests to .local hosts are blocked (SSRF protection)');
41 try {
42 const { address } = await dns.lookup(h);
43 if (isPrivateOrBlockedIp(address)) {
44 throw new Error(`Requests to private IP ranges are blocked (resolved ${h} -> ${address})`);
45 }
46 } catch (e) {
47 if (e && typeof e.message === 'string' && e.message.includes('blocked')) throw e;
48 throw new Error(`DNS resolution failed for ${h}: ${e.message || e}`);
49 }
50 }
51
52 /**
53 * @param {string} urlString
54 * @returns {URL}
55 */
56 function parseHttpsUrl(urlString) {
57 if (typeof urlString !== 'string' || !urlString.trim()) throw new Error('URL is required');
58 let u;
59 try {
60 u = new URL(urlString.trim());
61 } catch {
62 throw new Error('Invalid URL');
63 }
64 if (u.protocol !== 'https:') throw new Error('Only https:// URLs are allowed');
65 if (!u.hostname) throw new Error('URL must include a hostname');
66 if (u.username || u.password) throw new Error('URLs with embedded credentials are not allowed');
67 return u;
68 }
69
70 /**
71 * @param {string} urlString
72 * @param {{ maxBytes?: number, timeoutMs?: number, maxRedirects?: number, userAgent?: string }} [opts]
73 * @returns {Promise<{ finalUrl: string, status: number, contentType: string, text: string }>}
74 */
75 export async function fetchUrlForImport(urlString, opts = {}) {
76 const maxBytes = opts.maxBytes ?? 5 * 1024 * 1024;
77 const timeoutMs = opts.timeoutMs ?? 15_000;
78 const maxRedirects = opts.maxRedirects ?? 8;
79 const userAgent = opts.userAgent ?? 'Knowtation-UrlImport/1.0';
80
81 let current = parseHttpsUrl(urlString);
82 await assertHostnameResolvesToPublicIp(current.hostname);
83
84 const controller = new AbortController();
85 const timer = setTimeout(() => controller.abort(), timeoutMs);
86
87 try {
88 for (let hop = 0; hop <= maxRedirects; hop++) {
89 const res = await fetch(current.href, {
90 method: 'GET',
91 redirect: 'manual',
92 signal: controller.signal,
93 headers: {
94 'User-Agent': userAgent,
95 Accept: 'text/html,application/xhtml+xml,text/plain;q=0.9,*/*;q=0.8',
96 },
97 });
98
99 if (res.status >= 300 && res.status < 400) {
100 const loc = res.headers.get('location');
101 if (!loc || hop === maxRedirects) {
102 throw new Error(loc ? 'Too many redirects' : `HTTP ${res.status} without Location`);
103 }
104 let next;
105 try {
106 next = new URL(loc, current.href);
107 } catch {
108 throw new Error('Invalid redirect Location');
109 }
110 if (next.protocol !== 'https:') throw new Error('Redirect to non-https URL is not allowed');
111 if (next.username || next.password) throw new Error('Redirect URL must not contain credentials');
112 current = next;
113 await assertHostnameResolvesToPublicIp(current.hostname);
114 continue;
115 }
116
117 if (!res.ok) {
118 const errText = await res.text().catch(() => '');
119 throw new Error(`HTTP ${res.status} ${res.statusText}${errText ? `: ${errText.slice(0, 200)}` : ''}`);
120 }
121
122 const contentType = (res.headers.get('content-type') || '').split(';')[0].trim().toLowerCase();
123 const text = await readTextBodyLimited(res, maxBytes);
124 clearTimeout(timer);
125 return {
126 finalUrl: current.href,
127 status: res.status,
128 contentType,
129 text,
130 };
131 }
132 throw new Error('Too many redirects');
133 } catch (e) {
134 clearTimeout(timer);
135 if (e.name === 'AbortError') throw new Error(`URL fetch timed out after ${timeoutMs}ms`);
136 throw e;
137 }
138 }
139
140 /**
141 * @param {Response} res
142 * @param {number} maxBytes
143 * @returns {Promise<string>}
144 */
145 async function readTextBodyLimited(res, maxBytes) {
146 if (!res.body) {
147 const buf = await res.arrayBuffer();
148 if (buf.byteLength > maxBytes) throw new Error(`Response body exceeds ${maxBytes} bytes`);
149 return Buffer.from(buf).toString('utf8');
150 }
151 const reader = res.body.getReader();
152 const chunks = [];
153 let total = 0;
154 while (true) {
155 const { done, value } = await reader.read();
156 if (done) break;
157 if (!value) continue;
158 total += value.byteLength;
159 if (total > maxBytes) {
160 try {
161 await reader.cancel();
162 } catch (_) {}
163 throw new Error(`Response body exceeds ${maxBytes} bytes`);
164 }
165 chunks.push(Buffer.from(value));
166 }
167 return Buffer.concat(chunks).toString('utf8');
168 }
File History 2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor 1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 1 day ago