lib/media-url-extract.mjs · aaronrene/knowtation — MuseHub

aaronrene / knowtation public

media-url-extract.mjs

143 lines 3.7 KB

Raw

sha256:8915fe406161f95c1681f9469375e7bae5b28c884f00bedbdef65e4b0cd0738d docs(flow): commit FLOW-V0-SPEC.md hygiene for 7A-INT merge Human 12 hours ago

1	/**
2	* Extract image and video URLs from markdown note bodies.
3	* Foundation for Phase 18 MCP image/video resources and Hub rendering.
4	*/
5
6	const MAX_URLS_PER_NOTE = 50;
7
8	const IMAGE_EXT_MIME = {
9	jpg: 'image/jpeg',
10	jpeg: 'image/jpeg',
11	png: 'image/png',
12	gif: 'image/gif',
13	webp: 'image/webp',
14	svg: 'image/svg+xml',
15	};
16
17	const VIDEO_EXT_MIME = {
18	mp4: 'video/mp4',
19	webm: 'video/webm',
20	mov: 'video/quicktime',
21	};
22
23	const IMAGE_EXTENSIONS = Object.keys(IMAGE_EXT_MIME);
24	const VIDEO_EXTENSIONS = Object.keys(VIDEO_EXT_MIME);
25
26	/**
27	* Strip query string and fragment from a URL for extension detection.
28	* @param {string} url
29	* @returns {string} extension without dot, lowercased
30	*/
31	function extractExtension(url) {
32	try {
33	const u = new URL(url);
34	const pathname = u.pathname;
35	const dot = pathname.lastIndexOf('.');
36	if (dot === -1) return '';
37	return pathname.slice(dot + 1).toLowerCase();
38	} catch {
39	const clean = url.split('?')[0].split('#')[0];
40	const dot = clean.lastIndexOf('.');
41	if (dot === -1) return '';
42	return clean.slice(dot + 1).toLowerCase();
43	}
44	}
45
46	/**
47	* Markdown image syntax: ![alt](url)
48	* Captures: group 1 = alt text, group 2 = URL
49	*/
50	const MD_IMAGE_RE = /!\[([^\]]*)\]\((https?:\/\/[^)\s]+)\)/gi;
51
52	/**
53	* Bare URL on its own line (not inside markdown link/image syntax).
54	* Matches lines that are just a URL (with optional whitespace).
55	*/
56	const BARE_URL_LINE_RE = /^[ \t](https?:\/\/[^\s]+)[ \t]$/gm;
57
58	/**
59	* Extract image URLs from a markdown body.
60	* Finds both `![alt](url)` syntax and bare image URLs on their own line.
61	* @param {string} body
62	* @returns {Array<{ alt: string, url: string, mimeType: string }>}
63	*/
64	export function extractImageUrls(body) {
65	if (!body \|\| typeof body !== 'string') return [];
66
67	const seen = new Set();
68	const results = [];
69
70	function addIfImage(url, alt) {
71	if (results.length >= MAX_URLS_PER_NOTE) return;
72	const trimmed = url.trim();
73	if (seen.has(trimmed)) return;
74	if (/^data:/i.test(trimmed)) return;
75	const ext = extractExtension(trimmed);
76	if (!IMAGE_EXTENSIONS.includes(ext)) return;
77	if (VIDEO_EXTENSIONS.includes(ext)) return;
78	seen.add(trimmed);
79	results.push({
80	alt: alt \|\| '',
81	url: trimmed,
82	mimeType: IMAGE_EXT_MIME[ext] \|\| 'image/png',
83	});
84	}
85
86	let m;
87	MD_IMAGE_RE.lastIndex = 0;
88	while ((m = MD_IMAGE_RE.exec(body)) !== null) {
89	const url = m[2];
90	const ext = extractExtension(url);
91	if (VIDEO_EXTENSIONS.includes(ext)) continue;
92	addIfImage(url, m[1]);
93	}
94
95	BARE_URL_LINE_RE.lastIndex = 0;
96	while ((m = BARE_URL_LINE_RE.exec(body)) !== null) {
97	addIfImage(m[1], '');
98	}
99
100	return results;
101	}
102
103	/**
104	* Extract video URLs from a markdown body.
105	* Finds bare video URLs and video URLs inside `![alt](url)` syntax.
106	* @param {string} body
107	* @returns {Array<{ url: string, mimeType: string }>}
108	*/
109	export function extractVideoUrls(body) {
110	if (!body \|\| typeof body !== 'string') return [];
111
112	const seen = new Set();
113	const results = [];
114
115	function addIfVideo(url) {
116	if (results.length >= MAX_URLS_PER_NOTE) return;
117	const trimmed = url.trim();
118	if (seen.has(trimmed)) return;
119	if (/^data:/i.test(trimmed)) return;
120	const ext = extractExtension(trimmed);
121	if (!VIDEO_EXTENSIONS.includes(ext)) return;
122	seen.add(trimmed);
123	results.push({
124	url: trimmed,
125	mimeType: VIDEO_EXT_MIME[ext] \|\| 'video/mp4',
126	});
127	}
128
129	let m;
130	MD_IMAGE_RE.lastIndex = 0;
131	while ((m = MD_IMAGE_RE.exec(body)) !== null) {
132	addIfVideo(m[2]);
133	}
134
135	BARE_URL_LINE_RE.lastIndex = 0;
136	while ((m = BARE_URL_LINE_RE.exec(body)) !== null) {
137	addIfVideo(m[1]);
138	}
139
140	return results;
141	}
142
143	export { MAX_URLS_PER_NOTE, IMAGE_EXT_MIME, VIDEO_EXT_MIME };

File History 1 commit

sha256:8915fe406161f95c1681f9469375e7bae5b28c884f00bedbdef65e4b0cd0738d docs(flow): commit FLOW-V0-SPEC.md hygiene for 7A-INT merge Human 12 hours ago