/**
 * Extract image and video URLs from markdown note bodies.
 * Foundation for Phase 18 MCP image/video resources and Hub rendering.
 */

const MAX_URLS_PER_NOTE = 50;

const IMAGE_EXT_MIME = {
  jpg: 'image/jpeg',
  jpeg: 'image/jpeg',
  png: 'image/png',
  gif: 'image/gif',
  webp: 'image/webp',
  svg: 'image/svg+xml',
};

const VIDEO_EXT_MIME = {
  mp4: 'video/mp4',
  webm: 'video/webm',
  mov: 'video/quicktime',
};

const IMAGE_EXTENSIONS = Object.keys(IMAGE_EXT_MIME);
const VIDEO_EXTENSIONS = Object.keys(VIDEO_EXT_MIME);

/**
 * Strip query string and fragment from a URL for extension detection.
 * @param {string} url
 * @returns {string} extension without dot, lowercased
 */
function extractExtension(url) {
  try {
    const u = new URL(url);
    const pathname = u.pathname;
    const dot = pathname.lastIndexOf('.');
    if (dot === -1) return '';
    return pathname.slice(dot + 1).toLowerCase();
  } catch {
    const clean = url.split('?')[0].split('#')[0];
    const dot = clean.lastIndexOf('.');
    if (dot === -1) return '';
    return clean.slice(dot + 1).toLowerCase();
  }
}

/**
 * Markdown image syntax: ![alt](url)
 * Captures: group 1 = alt text, group 2 = URL
 */
const MD_IMAGE_RE = /!\[([^\]]*)\]\((https?:\/\/[^)\s]+)\)/gi;

/**
 * Bare URL on its own line (not inside markdown link/image syntax).
 * Matches lines that are just a URL (with optional whitespace).
 */
const BARE_URL_LINE_RE = /^[ \t]*(https?:\/\/[^\s]+)[ \t]*$/gm;

/**
 * Extract image URLs from a markdown body.
 * Finds both `![alt](url)` syntax and bare image URLs on their own line.
 * @param {string} body
 * @returns {Array<{ alt: string, url: string, mimeType: string }>}
 */
export function extractImageUrls(body) {
  if (!body || typeof body !== 'string') return [];

  const seen = new Set();
  const results = [];

  function addIfImage(url, alt) {
    if (results.length >= MAX_URLS_PER_NOTE) return;
    const trimmed = url.trim();
    if (seen.has(trimmed)) return;
    if (/^data:/i.test(trimmed)) return;
    const ext = extractExtension(trimmed);
    if (!IMAGE_EXTENSIONS.includes(ext)) return;
    if (VIDEO_EXTENSIONS.includes(ext)) return;
    seen.add(trimmed);
    results.push({
      alt: alt || '',
      url: trimmed,
      mimeType: IMAGE_EXT_MIME[ext] || 'image/png',
    });
  }

  let m;
  MD_IMAGE_RE.lastIndex = 0;
  while ((m = MD_IMAGE_RE.exec(body)) !== null) {
    const url = m[2];
    const ext = extractExtension(url);
    if (VIDEO_EXTENSIONS.includes(ext)) continue;
    addIfImage(url, m[1]);
  }

  BARE_URL_LINE_RE.lastIndex = 0;
  while ((m = BARE_URL_LINE_RE.exec(body)) !== null) {
    addIfImage(m[1], '');
  }

  return results;
}

/**
 * Extract video URLs from a markdown body.
 * Finds bare video URLs and video URLs inside `![alt](url)` syntax.
 * @param {string} body
 * @returns {Array<{ url: string, mimeType: string }>}
 */
export function extractVideoUrls(body) {
  if (!body || typeof body !== 'string') return [];

  const seen = new Set();
  const results = [];

  function addIfVideo(url) {
    if (results.length >= MAX_URLS_PER_NOTE) return;
    const trimmed = url.trim();
    if (seen.has(trimmed)) return;
    if (/^data:/i.test(trimmed)) return;
    const ext = extractExtension(trimmed);
    if (!VIDEO_EXTENSIONS.includes(ext)) return;
    seen.add(trimmed);
    results.push({
      url: trimmed,
      mimeType: VIDEO_EXT_MIME[ext] || 'video/mp4',
    });
  }

  let m;
  MD_IMAGE_RE.lastIndex = 0;
  while ((m = MD_IMAGE_RE.exec(body)) !== null) {
    addIfVideo(m[2]);
  }

  BARE_URL_LINE_RE.lastIndex = 0;
  while ((m = BARE_URL_LINE_RE.exec(body)) !== null) {
    addIfVideo(m[1]);
  }

  return results;
}

export { MAX_URLS_PER_NOTE, IMAGE_EXT_MIME, VIDEO_EXT_MIME };