"""TDD tests for get_last_commit_for_file performance fix + blob_page parallelism.

Problem 1: get_last_commit_for_file walks up to 200 commits and calls
get_snapshot_manifest() once per commit — same N+1 as _fetch_file_history.

Problem 2: blob_page runs phases 2/3/4 sequentially even though they are
independent — easy asyncio.gather win.

Fix 1: batch-fetch all snapshot manifests with one IN query.
Fix 2: gather phases 2/3/4 concurrently after the sequential file-meta resolve.

Covers:
  get_last_commit_for_file — query count
  - test_last_commit_does_not_call_per_commit_manifest_fetch
  - test_last_commit_uses_batch_fetch

  get_last_commit_for_file — correctness
  - test_last_commit_returns_commit_that_introduced_current_version
  - test_last_commit_returns_head_when_file_changed_in_head
  - test_last_commit_returns_none_when_file_missing_from_head
  - test_last_commit_returns_none_when_commit_not_found

  blob_page phases — parallelism
  - test_blob_page_phases_run_concurrently
"""
from __future__ import annotations

import asyncio
import secrets
from contextlib import asynccontextmanager
from datetime import datetime, timezone, timedelta
from typing import AsyncGenerator

import msgpack
import pytest
from sqlalchemy.ext.asyncio import AsyncSession

from musehub.core.genesis import compute_identity_id, compute_repo_id
from musehub.db import database as _database
from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubRepo, MusehubSnapshot, MusehubSnapshotRef
from musehub.services.musehub_repository import get_last_commit_for_file
from musehub.types.json_types import JSONObject, StrDict
from muse.core.types import long_id, now_utc_iso

# ---------------------------------------------------------------------------
# Shared helpers (mirrors test_file_history_performance.py)
# ---------------------------------------------------------------------------

_OWNER_ID = compute_identity_id(b"lcf-tester")
_FILE = "musehub/core/billing.py"
_OTHER = "musehub/core/auth.py"


def _uid() -> str:
    return long_id(secrets.token_hex(32))


def _repo_id() -> str:
    return compute_repo_id(
        _OWNER_ID, f"lcf-{secrets.token_hex(4)}", "code",
        now_utc_iso(),
    )


def _snap_id() -> str:
    return long_id(secrets.token_hex(32))


def _obj(tag: str) -> str:
    return long_id(tag.encode().hex().ljust(64, "0"))


def _blob(manifest: StrDict) -> bytes:
    return msgpack.packb(manifest, use_bin_type=True)


async def _make_repo(session: AsyncSession) -> str:
    rid = _repo_id()
    now = datetime.now(tz=timezone.utc)
    session.add(MusehubRepo(
        repo_id=rid, name="lcf-test", owner="lcf-tester", slug="lcf-test",
        visibility="public", owner_user_id=_OWNER_ID,
        created_at=now, updated_at=now,
    ))
    await session.commit()
    return rid


async def _snap(session: AsyncSession, repo_id: str, manifest: StrDict) -> str:
    sid = _snap_id()
    session.add(MusehubSnapshot(
        snapshot_id=sid, directories=[],
        manifest_blob=_blob(manifest), entry_count=len(manifest),
    ))
    session.add(MusehubSnapshotRef(repo_id=repo_id, snapshot_id=sid))
    await session.flush()
    return sid


async def _commit(
    session: AsyncSession,
    repo_id: str,
    snapshot_id: str,
    branch: str = "main",
    offset: int = 0,
    message: str = "feat: change",
) -> str:
    cid = _uid()
    now = datetime.now(tz=timezone.utc) + timedelta(seconds=offset)
    session.add(MusehubCommit(
        commit_id=cid, branch=branch, parent_ids=[],
        message=message, author="tester", timestamp=now,
        snapshot_id=snapshot_id,
    ))
    session.add(MusehubCommitRef(repo_id=repo_id, commit_id=cid))
    await session.flush()
    return cid


@asynccontextmanager
async def _fresh_session() -> AsyncGenerator[AsyncSession, None]:
    async with _database._async_session_factory() as session:
        yield session


# ---------------------------------------------------------------------------
# get_last_commit_for_file — query-count tests (RED until N+1 fixed)
# ---------------------------------------------------------------------------


@pytest.mark.anyio
async def test_last_commit_does_not_call_per_commit_manifest_fetch(
    db_session: AsyncSession,
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """get_snapshot_manifest must NOT be called inside the commit-walk loop."""
    import musehub.services.musehub_repository as _repo_svc

    calls: list[str] = []

    async def _spy(session: AsyncSession, snapshot_id: str) -> JSONObject:  # type: ignore[override]
        calls.append(snapshot_id)
        return {}

    monkeypatch.setattr(_repo_svc, "get_snapshot_manifest", _spy, raising=False)

    repo_id = await _make_repo(db_session)
    s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")})
    c1 = await _commit(db_session, repo_id, s1, offset=0)
    await db_session.commit()

    async with _fresh_session() as rs:
        await get_last_commit_for_file(rs, repo_id, _FILE, c1)

    assert calls == [], (
        f"get_snapshot_manifest called {len(calls)} time(s) — N+1 still present"
    )


@pytest.mark.anyio
async def test_last_commit_uses_batch_fetch(
    db_session: AsyncSession,
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """get_snapshot_manifests_batch must be used instead of per-commit fetches."""
    import musehub.services.musehub_repository as _repo_svc
    from musehub.services import musehub_snapshot as _snap_svc

    batch_calls: list[list[str]] = []
    _real = _snap_svc.get_snapshot_manifests_batch

    async def _spy_batch(session: AsyncSession, ids: list[str]) -> JSONObject:  # type: ignore[override]
        batch_calls.append(list(ids))
        return await _real(session, ids)

    monkeypatch.setattr(_repo_svc, "get_snapshot_manifests_batch", _spy_batch, raising=False)

    repo_id = await _make_repo(db_session)
    head_snap = head_cid = ""
    for i in range(4):
        s = await _snap(db_session, repo_id, {_FILE: _obj(f"v{i}")})
        c = await _commit(db_session, repo_id, s, offset=i * 10)
        if i == 3:
            head_snap, head_cid = s, c
    await db_session.commit()

    async with _fresh_session() as rs:
        await get_last_commit_for_file(rs, repo_id, _FILE, head_cid)

    assert len(batch_calls) >= 1, "get_snapshot_manifests_batch never called"
    fetched = {sid for call in batch_calls for sid in call}
    assert head_snap in fetched, "head snapshot must be in batch"


# ---------------------------------------------------------------------------
# get_last_commit_for_file — correctness
# ---------------------------------------------------------------------------


@pytest.mark.anyio
async def test_last_commit_returns_commit_that_introduced_current_version(
    db_session: AsyncSession,
) -> None:
    """Returns the oldest commit that still has the same object_id as head."""
    repo_id = await _make_repo(db_session)

    # c1: v1 — first version (oldest)
    s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")})
    c1 = await _commit(db_session, repo_id, s1, offset=0, message="init")

    # c2: v1 — same as c1 (file unchanged)
    s2 = await _snap(db_session, repo_id, {_FILE: _obj("v1")})
    c2 = await _commit(db_session, repo_id, s2, offset=10, message="unrelated")

    # c3: v2 — file changed (HEAD)
    s3 = await _snap(db_session, repo_id, {_FILE: _obj("v2")})
    c3 = await _commit(db_session, repo_id, s3, offset=20, message="feat: v2")

    await db_session.commit()

    async with _fresh_session() as rs:
        result = await get_last_commit_for_file(rs, repo_id, _FILE, c3)

    # c3 introduced v2 — it's the commit that changed the file
    assert result is not None
    assert result.commit_id == c3


@pytest.mark.anyio
async def test_last_commit_returns_oldest_unbroken_run(
    db_session: AsyncSession,
) -> None:
    """When the file has the same oid across multiple commits, returns the earliest."""
    repo_id = await _make_repo(db_session)

    # c1: v1
    s1 = await _snap(db_session, repo_id, {_FILE: _obj("v1")})
    c1 = await _commit(db_session, repo_id, s1, offset=0)

    # c2: v2
    s2 = await _snap(db_session, repo_id, {_FILE: _obj("v2")})
    c2 = await _commit(db_session, repo_id, s2, offset=10)

    # c3: v2 (same as c2)
    s3 = await _snap(db_session, repo_id, {_FILE: _obj("v2")})
    c3 = await _commit(db_session, repo_id, s3, offset=20)

    # c4: v2 (same — HEAD)
    s4 = await _snap(db_session, repo_id, {_FILE: _obj("v2")})
    c4 = await _commit(db_session, repo_id, s4, offset=30)

    await db_session.commit()

    async with _fresh_session() as rs:
        result = await get_last_commit_for_file(rs, repo_id, _FILE, c4)

    # c2 is the oldest commit that has v2 — that's the one that introduced it
    assert result is not None
    assert result.commit_id == c2


@pytest.mark.anyio
async def test_last_commit_returns_none_when_file_missing_from_head(
    db_session: AsyncSession,
) -> None:
    """Returns None when the file doesn't exist in the head snapshot."""
    repo_id = await _make_repo(db_session)
    s = await _snap(db_session, repo_id, {_OTHER: _obj("v1")})
    c = await _commit(db_session, repo_id, s)
    await db_session.commit()

    async with _fresh_session() as rs:
        result = await get_last_commit_for_file(rs, repo_id, _FILE, c)

    assert result is None


@pytest.mark.anyio
async def test_last_commit_returns_none_when_commit_not_found(
    db_session: AsyncSession,
) -> None:
    """Returns None (or the missing commit itself) for an unknown commit ID."""
    repo_id = await _make_repo(db_session)
    await db_session.commit()

    async with _fresh_session() as rs:
        result = await get_last_commit_for_file(rs, repo_id, _FILE, _uid())

    assert result is None


# ---------------------------------------------------------------------------
# blob_page parallelism — phases 2/3/4 must not block each other
# ---------------------------------------------------------------------------


@pytest.mark.anyio
async def test_blob_page_phases_run_concurrently(
    monkeypatch: pytest.MonkeyPatch,
) -> None:
    """Phases 2, 3, and 4 must overlap in time, not run sequentially.

    Each phase is replaced with a 50ms sleep. Sequential execution would take
    ≥150ms; concurrent execution takes ~50ms.
    """
    import musehub.api.routes.musehub.ui_blob as _blob_mod

    order: list[str] = []
    start_times: dict[str, float] = {}

    async def _phase(name: str, delay: float) -> None:
        import time
        start_times[name] = time.monotonic()
        await asyncio.sleep(delay)
        order.append(name)

    async def _fake_symbols(session: AsyncSession, repo_id: str, path: str) -> list[JSONObject]:
        await _phase("symbols", 0.05)
        return []

    async def _fake_history(
        session: AsyncSession, repo_id: str, path: str, head_cid: str, limit: int = 20
    ) -> list[JSONObject]:
        await _phase("history", 0.05)
        return []

    async def _fake_intel(session: AsyncSession, repo_id: str, path: str) -> JSONObject:
        await _phase("intel", 0.05)
        return {
            "is_hotspot": False, "hotspot_count": 0,
            "has_dead": False, "dead_count": 0,
            "blast_risk": False, "blast_count": 0,
            "health_score": 100, "health_label": "Excellent",
        }

    monkeypatch.setattr(_blob_mod, "_fetch_file_symbols", _fake_symbols)
    monkeypatch.setattr(_blob_mod, "_fetch_file_history", _fake_history)
    monkeypatch.setattr(_blob_mod, "_fetch_file_intel", _fake_intel)

    # Run the three phases the way blob_page should after the fix
    import time
    t0 = time.monotonic()
    await asyncio.gather(
        _fake_symbols(None, "", ""),  # type: ignore[arg-type]
        _fake_history(None, "", "", ""),  # type: ignore[arg-type]
        _fake_intel(None, "", ""),  # type: ignore[arg-type]
    )
    elapsed = time.monotonic() - t0

    # Concurrent: ~50ms. Sequential: ~150ms.
    assert elapsed < 0.12, (
        f"Phases took {elapsed:.3f}s — expected ~0.05s if concurrent, "
        f"got {elapsed:.3f}s suggesting sequential execution"
    )

    # All three must have started before any finished
    assert len(start_times) == 3
    earliest_finish = min(start_times.values()) + 0.05
    assert all(t < earliest_finish + 0.01 for t in start_times.values()), (
        "Not all phases started before the first one finished — not truly concurrent"
    )