"""TDD tests for _fetch_file_history performance fix.

Problem: _fetch_file_history loads up to 300 commits and calls
get_snapshot_manifest() once per commit — 300 individual DB queries +
300 full msgpack deserializations per file page view.

Fix: batch-fetch all snapshot manifests with a single IN query using
get_snapshot_manifests_batch(), then look up the file path in the
resulting dict.

Covers:
  _fetch_file_history — query count
  - test_file_history_does_not_call_per_commit_manifest_fetch
  - test_file_history_calls_batch_fetch_once

  _fetch_file_history — correctness
  - test_file_history_returns_only_commits_where_file_changed
  - test_file_history_returns_empty_when_file_not_in_head
  - test_file_history_returns_empty_when_no_commits
  - test_file_history_respects_limit
  - test_file_history_unchanged_file_returns_one_entry
"""
from __future__ import annotations

import secrets
from datetime import datetime, timezone, timedelta
from contextlib import asynccontextmanager
from typing import AsyncGenerator

import msgpack
import pytest
from sqlalchemy.ext.asyncio import AsyncSession

from muse.core.types import long_id, now_utc_iso
from musehub.api.routes.musehub.ui_blob import _fetch_file_history
from musehub.core.genesis import compute_identity_id, compute_repo_id
from musehub.db import database as _database
from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo, MusehubSnapshot, MusehubSnapshotRef
from musehub.types.json_types import JSONObject, StrDict

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

_OWNER_ID = compute_identity_id(b"perf-tester")
_FILE = "musehub/services/billing.py"
_OTHER_FILE = "musehub/services/auth.py"


def _uid() -> str:
    return long_id(secrets.token_hex(32))


def _repo_id() -> str:
    return compute_repo_id(_OWNER_ID, f"perf-test-{secrets.token_hex(4)}", "code",
                           now_utc_iso())


def _snap_id() -> str:
    return long_id(secrets.token_hex(32))


def _obj_id(tag: str) -> str:
    return long_id(tag.encode().hex().ljust(64, "0"))


def _manifest_blob(path_oid: StrDict) -> bytes:
    return msgpack.packb(path_oid, use_bin_type=True)


async def _make_repo(session: AsyncSession) -> str:
    rid = _repo_id()
    now = datetime.now(tz=timezone.utc)
    session.add(MusehubRepo(
        repo_id=rid,
        name="perf-test",
        owner="perf-tester",
        slug="perf-test",
        visibility="public",
        owner_user_id=_OWNER_ID,
        created_at=now,
        updated_at=now,
    ))
    await session.commit()
    return rid


async def _add_snapshot(session: AsyncSession, repo_id: str, manifest: StrDict) -> str:
    sid = _snap_id()
    session.add(MusehubSnapshot(
        snapshot_id=sid,
        directories=[],
        manifest_blob=_manifest_blob(manifest),
        entry_count=len(manifest),
    ))
    session.add(MusehubSnapshotRef(repo_id=repo_id, snapshot_id=sid))
    await session.flush()
    return sid


@asynccontextmanager
async def _fresh_session() -> AsyncGenerator[AsyncSession, None]:
    """Open a fresh session from the (test-overridden) factory.

    Using the shared db_session for both writes and reads leaves asyncpg in
    an unexpected state on teardown — this helper avoids that by keeping
    read calls isolated in their own short-lived session.
    """
    async with _database._async_session_factory() as session:
        yield session


async def _add_commit(
    session: AsyncSession,
    repo_id: str,
    snapshot_id: str,
    branch: str = "main",
    ts_offset_seconds: int = 0,
    message: str = "feat: change",
) -> str:
    cid = _uid()
    now = datetime.now(tz=timezone.utc) + timedelta(seconds=ts_offset_seconds)
    session.add(MusehubCommit(
        commit_id=cid,
        branch=branch,
        parent_ids=[],
        message=message,
        author="tester",
        timestamp=now,
        snapshot_id=snapshot_id,
    ))
    session.add(MusehubCommitRef(repo_id=repo_id, commit_id=cid))
    await session.flush()
    return cid


# ---------------------------------------------------------------------------
# Query-count tests — these fail until the N+1 is fixed
# ---------------------------------------------------------------------------


@pytest.mark.anyio
async def test_file_history_does_not_call_per_commit_manifest_fetch(
    db_session: AsyncSession,
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """get_snapshot_manifest must NOT be called per-commit after the fix.

    The old code called it once per commit in the 300-row loop.
    The new code must never call the single-snapshot variant inside the loop.
    """
    import musehub.api.routes.musehub.ui_blob as _module

    calls: list[str] = []

    async def _spy_single(session: AsyncSession, snapshot_id: str) -> JSONObject:  # type: ignore[override]
        calls.append(snapshot_id)
        return {}

    monkeypatch.setattr(_module, "get_snapshot_manifest", _spy_single)

    repo_id = await _make_repo(db_session)
    head_snap = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id("v2")})
    head_id = await _add_commit(db_session, repo_id, head_snap, ts_offset_seconds=10)
    await db_session.commit()

    async with _fresh_session() as read_session:
        await _fetch_file_history(read_session, repo_id, _FILE, head_id)

    assert calls == [], f"get_snapshot_manifest was called {len(calls)} time(s) — N+1 not fixed"


@pytest.mark.anyio
async def test_file_history_calls_batch_fetch_once(
    db_session: AsyncSession,
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """get_snapshot_manifests_batch must be called instead of per-commit fetches."""
    import musehub.api.routes.musehub.ui_blob as _module
    from musehub.services import musehub_snapshot as _snap_svc

    batch_calls: list[list[str]] = []
    _real_batch = _snap_svc.get_snapshot_manifests_batch

    async def _spy_batch(session: AsyncSession, snapshot_ids: list[str]) -> JSONObject:  # type: ignore[override]
        batch_calls.append(list(snapshot_ids))
        return await _real_batch(session, snapshot_ids)

    monkeypatch.setattr(_module, "get_snapshot_manifests_batch", _spy_batch)

    repo_id = await _make_repo(db_session)
    head_snap_id = ""
    head_commit_id = ""
    for i in range(5):
        snap_id = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id(f"v{i}")})
        cid = await _add_commit(db_session, repo_id, snap_id, ts_offset_seconds=i * 10)
        if i == 4:
            head_snap_id = snap_id
            head_commit_id = cid

    await db_session.commit()

    async with _fresh_session() as read_session:
        await _fetch_file_history(read_session, repo_id, _FILE, head_commit_id)

    assert len(batch_calls) >= 1, "get_snapshot_manifests_batch was never called"
    all_fetched = [sid for call in batch_calls for sid in call]
    assert head_snap_id in all_fetched, "head snapshot_id must be included in batch fetch"


# ---------------------------------------------------------------------------
# Correctness tests
# ---------------------------------------------------------------------------


@pytest.mark.anyio
async def test_file_history_returns_only_commits_where_file_changed(
    db_session: AsyncSession,
) -> None:
    """Only commits where the file's object_id changes between adjacent snapshots are returned."""
    repo_id = await _make_repo(db_session)

    # Commit 1: file = v1  (oldest)
    s1 = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id("v1")})
    c1 = await _add_commit(db_session, repo_id, s1, ts_offset_seconds=0, message="init")

    # Commit 2: file = v1  (unchanged — should NOT appear in history)
    s2 = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id("v1")})
    c2 = await _add_commit(db_session, repo_id, s2, ts_offset_seconds=10, message="no-op")

    # Commit 3: file = v2  (changed — should appear)
    s3 = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id("v2")})
    c3 = await _add_commit(db_session, repo_id, s3, ts_offset_seconds=20, message="feat: v2")

    # Commit 4: file = v3  (changed — HEAD, should appear)
    s4 = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id("v3")})
    c4 = await _add_commit(db_session, repo_id, s4, ts_offset_seconds=30, message="feat: v3")

    await db_session.commit()

    async with _fresh_session() as read_session:
        history = await _fetch_file_history(read_session, repo_id, _FILE, c4)

    commit_ids = {h["commit_id_full"] for h in history}
    assert c4 in commit_ids, "HEAD commit (v3) should be in history"
    assert c3 in commit_ids, "commit that introduced v2 should be in history"
    # Walking backward: c4(v3)→c3(v2)→c2(v1)→c1(v1)
    # c2 appears because the file changed from v2→v1 between c3 and c2.
    # c1 is skipped because c1 and c2 have the same oid — consecutive duplicates are collapsed.
    assert c1 not in commit_ids, "c1 has same oid as c2 — consecutive duplicate, should be skipped"


@pytest.mark.anyio
async def test_file_history_returns_empty_when_file_not_in_head(
    db_session: AsyncSession,
) -> None:
    """Returns [] when the file path does not exist in the head snapshot."""
    repo_id = await _make_repo(db_session)
    snap = await _add_snapshot(db_session, repo_id, {_OTHER_FILE: _obj_id("v1")})
    head_id = await _add_commit(db_session, repo_id, snap)
    await db_session.commit()

    async with _fresh_session() as read_session:
        history = await _fetch_file_history(read_session, repo_id, _FILE, head_id)
    assert history == []


@pytest.mark.anyio
async def test_file_history_returns_empty_when_no_commits(
    db_session: AsyncSession,
) -> None:
    """Returns [] when the head commit cannot be found in the DB."""
    repo_id = await _make_repo(db_session)
    await db_session.commit()

    async with _fresh_session() as read_session:
        history = await _fetch_file_history(read_session, repo_id, _FILE, _uid())
    assert history == []


@pytest.mark.anyio
async def test_file_history_respects_limit(
    db_session: AsyncSession,
) -> None:
    """History is capped at the requested limit even when more changes exist."""
    repo_id = await _make_repo(db_session)

    head_snap = None
    head_cid = None
    for i in range(25):
        snap = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id(f"v{i}")})
        cid = await _add_commit(db_session, repo_id, snap, ts_offset_seconds=i * 10)
        if i == 24:
            head_snap = snap
            head_cid = cid

    await db_session.commit()

    async with _fresh_session() as read_session:
        history = await _fetch_file_history(read_session, repo_id, _FILE, head_cid, limit=5)
    assert len(history) <= 5


@pytest.mark.anyio
async def test_file_history_unchanged_file_returns_one_entry(
    db_session: AsyncSession,
) -> None:
    """A file that never changes shows only the initial commit."""
    repo_id = await _make_repo(db_session)

    head_cid = None
    for i in range(4):
        snap = await _add_snapshot(db_session, repo_id, {_FILE: _obj_id("v1")})  # always v1
        cid = await _add_commit(db_session, repo_id, snap, ts_offset_seconds=i * 10)
        if i == 3:
            head_cid = cid

    await db_session.commit()

    async with _fresh_session() as read_session:
        history = await _fetch_file_history(read_session, repo_id, _FILE, head_cid)
    assert len(history) == 1