"""Comprehensive tests for ``muse code add`` and ``muse code reset``.

Review findings addressed
--------------------------
Security
  * Path-traversal: staging a file outside the repo root is rejected.
  * Symlink: symlinks are not followed during tree walks (followlinks=False).
  * `.museignore`: ignored files are never staged even when explicitly named.

Performance
  * Unchanged files (content = committed) are skipped — no object written.
  * Already-staged files with the same content are skipped (idempotent).

New capabilities (added this review)
  * ``--format json`` on ``muse code add`` — machine-readable output.
  * ``--format json`` on ``muse code reset`` — machine-readable output.
  * Breakdown summary in text output (N added, M modified, K deleted).

Stage persistence
  * Stage is persisted as ``.muse/code/stage.json`` (JSON format, version 3).
  * Corrupt stage file is cleared on read rather than silently returning {}.

Test categories
---------------
I    Security — path traversal, symlinks, ignore rules.
II   JSON output — muse code add --format json.
III  JSON output — muse code reset --format json.
IV   Text output breakdown — "N added, M modified, K deleted".
V    JSON stage persistence — format, atomicity.
VI   Dry-run correctness — no writes, accurate preview.
VII  Edge cases — fresh repo, no commits, multiple flags, cycles.
VIII Stress — 500-file staging, repeated cycles, large files.
"""

from __future__ import annotations

import json
import os
import pathlib

import pytest

from muse.plugins.code.stage import StagedEntry, read_stage, stage_path, write_stage, StagedFileMap
from muse.core.paths import muse_dir, code_dir, commits_dir, snapshots_dir, stat_cache_path
from muse.core.types import Manifest, blob_id, fake_id, long_id, short_id, split_id
from muse.core.object_store import object_path
from tests.cli_test_helper import CliRunner

runner = CliRunner()
cli = None


# ---------------------------------------------------------------------------
# Helpers and fixtures
# ---------------------------------------------------------------------------


def _env(root: pathlib.Path) -> Manifest:
    return {"MUSE_REPO_ROOT": str(root)}


def _run(root: pathlib.Path, *args: str) -> tuple[int, str]:
    result = runner.invoke(cli, list(args), env=_env(root), catch_exceptions=False)
    return result.exit_code, result.output


def _run_unchecked(root: pathlib.Path, *args: str) -> tuple[int, str]:
    result = runner.invoke(cli, list(args), env=_env(root))
    return result.exit_code, result.output


@pytest.fixture()
def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
    """Fresh code-domain repo with one committed file (main.py = 'x = 1')."""
    monkeypatch.chdir(tmp_path)
    r = runner.invoke(cli, ["init", "--domain", "code"], env=_env(tmp_path))
    assert r.exit_code == 0, r.output
    (tmp_path / "main.py").write_text("x = 1\n")
    r2 = runner.invoke(cli, ["commit", "--allow-empty", "-m", "init"], env=_env(tmp_path))
    assert r2.exit_code == 0, r2.output
    return tmp_path


# ===========================================================================
# I  Security
# ===========================================================================


class TestSecurityI:
    """Files outside the repo, symlinks, and ignored paths must never be staged."""

    def test_I1_path_outside_repo_root_is_rejected(
        self, repo: pathlib.Path
    ) -> None:
        """I1: staging a path outside the repo root exits non-zero."""
        outside = repo.parent / f"secret_{fake_id('outside-secret')[-8:]}.txt"
        outside.write_text("secret\n")

        code, _ = _run_unchecked(repo, "code", "add", str(outside))
        assert code != 0 or str(outside) not in _read_stage(repo)

    def test_I2_symlink_not_followed_during_dot_add(
        self, repo: pathlib.Path
    ) -> None:
        """I2: symlinks to files outside the repo are never staged."""
        outside = repo.parent / f"outside_{fake_id('outside-symlink')[-8:]}.txt"
        outside.write_text("outside content\n")
        link = repo / "link_to_outside.txt"
        link.symlink_to(outside)

        _run(repo, "code", "add", ".")
        stage = _read_stage(repo)
        assert "link_to_outside.txt" not in stage, "Symlink to outside must not be staged"

    def test_I3_museignore_file_not_staged_by_dot(
        self, repo: pathlib.Path
    ) -> None:
        """I3: .museignore exclusions are honoured by 'muse code add .'"""
        # .museignore is TOML — use the proper section format.
        (repo / ".museignore").write_text(
            '[domain.code]\npatterns = ["*.secret"]\n'
        )
        (repo / "creds.secret").write_text("password=123\n")

        _run(repo, "code", "add", ".")
        stage = _read_stage(repo)
        assert "creds.secret" not in stage, "Ignored file must not be staged"

    def test_I4_museignore_file_not_staged_when_explicit(
        self, repo: pathlib.Path
    ) -> None:
        """I4: even when explicitly named, .museignore exclusions prevent staging."""
        (repo / ".museignore").write_text(
            '[domain.code]\npatterns = ["private.py"]\n'
        )
        (repo / "private.py").write_text("SECRET = 'x'\n")

        _run(repo, "code", "add", "private.py")
        stage = _read_stage(repo)
        assert "private.py" not in stage, "Explicitly named ignored file must not be staged"

    def test_I5_hidden_files_staged_by_default(
        self, repo: pathlib.Path
    ) -> None:
        """I5: hidden files (dotfiles) are staged by muse code add . (mirrors git behaviour)."""
        (repo / ".env").write_text("API_KEY=secret\n")

        _run(repo, "code", "add", ".")
        stage = _read_stage(repo)
        assert ".env" in stage, "Hidden .env must be staged by muse code add ."

    def test_I6_pycache_not_staged(self, repo: pathlib.Path) -> None:
        """I6: __pycache__ directories are never walked."""
        cache = repo / "__pycache__"
        cache.mkdir()
        (cache / "main.cpython-311.pyc").write_bytes(b"\x00compiled\x00")

        _run(repo, "code", "add", ".")
        stage = _read_stage(repo)
        for key in stage:
            assert "__pycache__" not in key, f"Compiled cache file staged: {key}"

    def test_I7_muse_dir_file_not_staged_by_dot(self, repo: pathlib.Path) -> None:
        """I7: files inside .muse/ (VCS storage) are never staged by 'muse code add .'

        Data-integrity invariant: the .muse/ directory is the VCS store itself.
        Tracking its contents as repo files corrupts checkout — switching to a
        branch whose snapshot omits them would delete live VCS internals from disk.
        """
        # agent-config writes these; they must never leak into the snapshot.
        dot_muse = muse_dir(repo)
        (dot_muse / "agent.md").write_text("# agent config\n")
        (dot_muse / "config.toml").write_text('[adapters]\nclaude = true\n')

        _run(repo, "code", "add", ".")
        stage = _read_stage(repo)
        for key in stage:
            assert not key.startswith(".muse/"), (
                f"VCS-internal file leaked into stage: {key!r}"
            )

    def test_I8_muse_dir_file_not_staged_when_explicit(
        self, repo: pathlib.Path
    ) -> None:
        """I8: explicitly naming a .muse/ file is silently rejected.

        Data-integrity invariant: an agent that runs 'muse code add .muse/agent.md'
        must not corrupt the snapshot.  The file is silently dropped — same
        treatment as a file outside the repo root.
        """
        dot_muse = muse_dir(repo)
        agent_md = dot_muse / "agent.md"
        agent_md.write_text("# agent config\n")

        _run(repo, "code", "add", ".muse/agent.md")
        stage = _read_stage(repo)
        assert ".muse/agent.md" not in stage, (
            "Explicitly naming a .muse/ file must not add it to the stage"
        )

    def test_I9_muse_dir_subdir_not_staged_when_explicit(
        self, repo: pathlib.Path
    ) -> None:
        """I9: passing .muse/ as a directory arg stages nothing from inside it."""
        dot_muse = muse_dir(repo)
        (dot_muse / "agent.md").write_text("# config\n")

        _run(repo, "code", "add", ".muse")
        stage = _read_stage(repo)
        for key in stage:
            assert not key.startswith(".muse/"), (
                f"VCS-internal file staged via directory arg: {key!r}"
            )

    def test_I10_muse_dir_not_staged_by_update_flag(
        self, repo: pathlib.Path
    ) -> None:
        """I10: 'muse code add -u' re-staging head_manifest never includes .muse/ entries.

        Defense-in-depth: if a .muse/ entry somehow reached the head manifest
        (e.g. from a snapshot created before this fix), the -u path must still
        silently drop it rather than perpetuating the corruption.
        """
        from muse.plugins.code.stage import write_stage, make_entry
        from muse.core.snapshot import hash_file

        # Plant a .muse/ file and force it into the head manifest via the
        # stage, then commit — simulating the pre-fix corruption path.
        dot_muse = muse_dir(repo)
        agent_md = dot_muse / "agent.md"
        agent_md.write_text("# agent config\n")
        oid = hash_file(agent_md)
        # Write directly to stage (bypassing _collect_paths) to simulate
        # the pre-fix scenario.
        write_stage(repo, {".muse/agent.md": make_entry(oid, "A")})

        # Commit will bake .muse/agent.md into the snapshot via the stage.
        # After the commit we clear the stage and check that -u doesn't re-add it.
        _run(repo, "commit", "-m", "simulate pre-fix corruption")

        # Now .muse/agent.md is in head manifest.  -u must not restage it.
        _run(repo, "code", "add", "-u")
        stage = _read_stage(repo)
        for key in stage:
            assert not key.startswith(".muse/"), (
                f"muse code add -u re-staged VCS-internal file: {key!r}"
            )

    def test_I11_muse_dir_not_staged_by_all_flag(
        self, repo: pathlib.Path
    ) -> None:
        """I11: 'muse code add -A' never stages .muse/ entries from head manifest."""
        from muse.plugins.code.stage import write_stage, make_entry
        from muse.core.snapshot import hash_file

        dot_muse = muse_dir(repo)
        agent_md = dot_muse / "agent.md"
        agent_md.write_text("# agent config\n")
        oid = hash_file(agent_md)
        write_stage(repo, {".muse/agent.md": make_entry(oid, "A")})
        _run(repo, "commit", "-m", "simulate pre-fix corruption")

        _run(repo, "code", "add", "-A")
        stage = _read_stage(repo)
        for key in stage:
            assert not key.startswith(".muse/"), (
                f"muse code add -A re-staged VCS-internal file: {key!r}"
            )

    def test_I12_snapshot_strips_muse_dir_entries_at_commit(
        self, repo: pathlib.Path
    ) -> None:
        """I12: commit snapshot never contains .muse/ keys regardless of stage content.

        Defense-in-depth at the snapshot layer: even if a .muse/ entry sneaks
        into the stage (e.g. written directly by a third-party tool), the
        snapshot built at commit time must strip it before persisting.
        """
        import json as _json
        from muse.plugins.code.stage import write_stage, make_entry
        from muse.core.snapshot import hash_file
        from muse.core.refs import get_head_commit_id

        dot_muse = muse_dir(repo)
        agent_md = dot_muse / "agent.md"
        agent_md.write_text("# agent config\n")
        oid = hash_file(agent_md)
        # Bypass _collect_paths and write directly to stage.
        write_stage(repo, {".muse/agent.md": make_entry(oid, "A")})

        _run(repo, "commit", "-m", "should strip .muse from snapshot")

        # Read the snapshot the commit produced and verify it has no .muse/ keys.
        from muse.core.commits import read_commit
        from muse.core.snapshots import read_snapshot
        commit_id = get_head_commit_id(repo, "main")
        assert commit_id, "commit must have produced a HEAD"
        assert object_path(repo, commit_id).exists(), f"commit object not found for {commit_id}"
        commit_rec = read_commit(repo, commit_id)
        assert commit_rec is not None, f"could not read commit {commit_id}"
        snap_rec = read_snapshot(repo, commit_rec.snapshot_id)
        assert snap_rec is not None, "snapshot must be readable after commit"
        manifest = snap_rec.manifest
        muse_keys = [k for k in manifest if k.startswith(".muse/")]
        assert not muse_keys, (
            f"Snapshot contains VCS-internal keys: {muse_keys}"
        )


# ===========================================================================
# II  JSON output — muse code add --format json
# ===========================================================================


class TestJsonOutputAddII:
    """``muse code add --format json`` must emit valid, complete JSON."""

    def test_II1_json_output_on_single_file_staged(
        self, repo: pathlib.Path
    ) -> None:
        """II1: staging one file emits correct JSON with all required keys."""
        (repo / "main.py").write_text("x = 2\n")

        code, out = _run(repo, "code", "add", "--json", "main.py")
        assert code == 0, out
        data = json.loads(out.strip())
        assert data["staged"] == 1
        assert data["modified"] == 1
        assert data["added"] == 0
        assert data["deleted"] == 0
        assert data["dry_run"] is False
        assert any(f["path"] == "main.py" for f in data["files"])

    def test_II2_json_output_new_file_is_added(
        self, repo: pathlib.Path
    ) -> None:
        """II2: a brand-new file has mode 'new file' in JSON output."""
        (repo / "brand_new.py").write_text("y = 99\n")

        code, out = _run(repo, "code", "add", "--json", "brand_new.py")
        assert code == 0, out
        data = json.loads(out.strip())
        assert data["added"] == 1
        assert data["modified"] == 0
        file_entry = next(f for f in data["files"] if f["path"] == "brand_new.py")
        assert file_entry["mode"] == "new file"

    def test_II3_json_output_deletion_counted(
        self, repo: pathlib.Path
    ) -> None:
        """II3: staging a deletion records deleted=1 in JSON."""
        (repo / "main.py").unlink()

        code, out = _run(repo, "code", "add", "-u", "--json")
        assert code == 0, out
        data = json.loads(out.strip())
        assert data["deleted"] == 1
        assert any(f["mode"] == "deleted" for f in data["files"])

    def test_II4_json_output_nothing_to_stage(
        self, repo: pathlib.Path
    ) -> None:
        """II4: nothing to stage returns staged=0, not an error."""
        # main.py is already at committed content — nothing to stage.
        code, out = _run(repo, "code", "add", "--json", ".")
        assert code == 0, out
        data = json.loads(out.strip())
        assert data["staged"] == 0

    def test_II5_json_dry_run_flag_true(self, repo: pathlib.Path) -> None:
        """II5: --dry-run sets dry_run=true in JSON and writes no stage."""
        (repo / "main.py").write_text("# dry\n")

        code, out = _run(
            repo, "code", "add", "--dry-run", "--json", "main.py"
        )
        assert code == 0, out
        data = json.loads(out.strip())
        assert data["dry_run"] is True
        assert data["staged"] == 1
        assert not stage_path(repo).exists()

    def test_II6_json_output_multiple_files(
        self, repo: pathlib.Path
    ) -> None:
        """II6: multiple staged files all appear in the files list."""
        for i in range(5):
            (repo / f"f{i}.py").write_text(f"v = {i}\n")

        code, out = _run(repo, "code", "add", "--json", "-A")
        assert code == 0, out
        data = json.loads(out.strip())
        assert data["staged"] >= 5
        paths = {f["path"] for f in data["files"]}
        for i in range(5):
            assert f"f{i}.py" in paths

    def test_II7_json_output_is_valid_json(self, repo: pathlib.Path) -> None:
        """II7: output is always parseable JSON, never raw text."""
        (repo / "main.py").write_text("# changed\n")
        _, out = _run(repo, "code", "add", "--json", "main.py")
        json.loads(out.strip())  # must not raise


# ===========================================================================
# III  JSON output — muse code reset --format json
# ===========================================================================


class TestJsonOutputResetIII:
    """``muse code reset --format json`` must emit valid, complete JSON."""

    def test_III1_json_reset_specific_file(self, repo: pathlib.Path) -> None:
        """III1: resetting a staged file returns unstaged=1 in JSON."""
        (repo / "main.py").write_text("# staged\n")
        _run(repo, "code", "add", "main.py")

        code, out = _run(repo, "code", "reset", "--json", "main.py")
        assert code == 0, out
        data = json.loads(out.strip())
        assert data["unstaged"] == 1
        assert "main.py" in data["files"]

    def test_III2_json_reset_all(self, repo: pathlib.Path) -> None:
        """III2: reset with no args clears all staged files, reports count in JSON."""
        for i in range(3):
            (repo / f"f{i}.py").write_text(f"x = {i}\n")
        _run(repo, "code", "add", "-A")

        code, out = _run(repo, "code", "reset", "--json")
        assert code == 0, out
        data = json.loads(out.strip())
        assert data["unstaged"] >= 3

    def test_III3_json_reset_nothing_staged(self, repo: pathlib.Path) -> None:
        """III3: reset with nothing staged returns unstaged=0 in JSON."""
        code, out = _run(repo, "code", "reset", "--json")
        assert code == 0, out
        data = json.loads(out.strip())
        assert data["unstaged"] == 0
        assert data["files"] == []

    def test_III4_json_reset_preserves_other_staged_files(
        self, repo: pathlib.Path
    ) -> None:
        """III4: resetting one file leaves others staged."""
        (repo / "main.py").write_text("# changed\n")
        (repo / "other.py").write_text("y = 9\n")
        _run(repo, "code", "add", "-A")

        code, out = _run(repo, "code", "reset", "--json", "other.py")
        assert code == 0, out
        data = json.loads(out.strip())
        assert data["unstaged"] == 1
        assert "other.py" in data["files"]

        remaining = read_stage(repo)
        assert "main.py" in remaining, "main.py must still be staged"
        assert "other.py" not in remaining


# ===========================================================================
# IV  Text output breakdown
# ===========================================================================


class TestTextOutputBreakdownIV:
    """The text summary must show a breakdown: N added, M modified, K deleted."""

    def test_IV1_text_shows_added_count(self, repo: pathlib.Path) -> None:
        """IV1: new files appear in 'added' part of the breakdown."""
        (repo / "new.py").write_text("z = 0\n")
        _, out = _run(repo, "code", "add", "new.py")
        assert "added" in out

    def test_IV2_text_shows_modified_count(self, repo: pathlib.Path) -> None:
        """IV2: modified tracked files appear in 'modified' part."""
        (repo / "main.py").write_text("x = 999\n")
        _, out = _run(repo, "code", "add", "main.py")
        assert "modified" in out

    def test_IV3_text_shows_deleted_count(self, repo: pathlib.Path) -> None:
        """IV3: staged deletions appear in 'deleted' part."""
        (repo / "main.py").unlink()
        _, out = _run(repo, "code", "add", "-u")
        assert "deleted" in out

    def test_IV4_text_nothing_to_stage_message(
        self, repo: pathlib.Path
    ) -> None:
        """IV4: when nothing changed, output explains nothing to stage."""
        _, out = _run(repo, "code", "add", ".")
        assert "Nothing" in out or "already up to date" in out

    def test_IV5_text_breakdown_counts_match_actual(
        self, repo: pathlib.Path
    ) -> None:
        """IV5: text breakdown totals match what was actually staged."""
        (repo / "main.py").write_text("x = 2\n")   # modified
        (repo / "a.py").write_text("a = 1\n")       # new
        (repo / "b.py").write_text("b = 2\n")       # new

        _, out = _run(repo, "code", "add", "-A")
        assert "1 modified" in out
        assert "2 added" in out


# ===========================================================================
# V  JSON stage persistence
# ===========================================================================


class TestJsonPersistenceV:
    """The stage index must be persisted as JSON and survive round-trips."""

    def test_V1_stage_file_is_json(
        self, repo: pathlib.Path
    ) -> None:
        """V1: after staging, the file on disk is valid JSON."""
        import json as _json
        (repo / "main.py").write_text("x = 9\n")
        _run(repo, "code", "add", "main.py")

        path = stage_path(repo)
        assert path.exists(), "stage.json must exist after staging"
        raw = path.read_bytes()
        assert raw.startswith(b"{"), "Stage file must be JSON"
        data = _json.loads(raw)
        assert "entries" in data
        assert "main.py" in data["entries"]

    def test_V2_stage_round_trips_all_entry_fields(
        self, repo: pathlib.Path
    ) -> None:
        """V2: object_id, mode, and staged_at survive a write/read cycle."""
        (repo / "main.py").write_text("x = 42\n")
        _run(repo, "code", "add", "main.py")

        stage = read_stage(repo)
        entry = stage["main.py"]
        assert entry["object_id"].startswith("sha256:") and len(entry["object_id"]) == 71, \
            "object_id must be a canonical long_id (sha256:<64hex>)"
        assert entry["mode"] in ("A", "M", "D")
        assert entry["staged_at"]

    def test_V3_stage_atomic_write_no_tmp_file_after_success(
        self, repo: pathlib.Path
    ) -> None:
        """V3: no .stage-tmp-* file lingers after a successful write."""
        (repo / "main.py").write_text("x = 1\n")
        _run(repo, "code", "add", "main.py")

        stage_dir = code_dir(repo)
        tmps = list(stage_dir.glob(".stage-tmp-*"))
        assert tmps == [], f"Stale tmp files: {tmps}"

    def test_V5_corrupt_json_clears_and_returns_empty(
        self, repo: pathlib.Path
    ) -> None:
        """V5: corrupt JSON stage file is deleted and read_stage returns {}."""
        stage_dir = code_dir(repo)
        stage_dir.mkdir(parents=True, exist_ok=True)
        stage_path(repo).write_bytes(b"\xde\xad\xbe\xef garbage")

        entries = read_stage(repo)
        assert entries == {}
        assert not stage_path(repo).exists(), "Corrupt stage file must be removed"

    def test_V6_write_empty_removes_json_file(
        self, repo: pathlib.Path
    ) -> None:
        """V6: write_stage({}) removes stage.json (clear the stage)."""
        # Change main.py so it's different from the committed content.
        (repo / "main.py").write_text("x = 999\n")
        _run(repo, "code", "add", "main.py")
        assert stage_path(repo).exists(), "Stage must exist after staging a changed file"

        write_stage(repo, {})
        assert not stage_path(repo).exists()

    def test_V7_stage_version_is_3_in_json(
        self, repo: pathlib.Path
    ) -> None:
        """V7: JSON file carries version=3."""
        import json as _json
        (repo / "main.py").write_text("x = 999\n")
        _run(repo, "code", "add", "main.py")
        assert stage_path(repo).exists(), "Stage must exist after staging"

        raw = _json.loads(stage_path(repo).read_bytes())
        assert raw["version"] == 3


# ===========================================================================
# VI  Dry-run correctness
# ===========================================================================


class TestDryRunVI:
    """--dry-run must preview accurately and never write anything."""

    def test_VI1_dry_run_lists_files_that_would_be_staged(
        self, repo: pathlib.Path
    ) -> None:
        """VI1: output lists every file that would be staged."""
        (repo / "main.py").write_text("x = 3\n")
        (repo / "new.py").write_text("y = 0\n")

        _, out = _run(repo, "code", "add", "--dry-run", "-A")
        assert "main.py" in out
        assert "new.py" in out

    def test_VI2_dry_run_does_not_write_stage_file(
        self, repo: pathlib.Path
    ) -> None:
        """VI2: after dry-run, stage.json must not exist."""
        (repo / "main.py").write_text("x = 3\n")
        _run(repo, "code", "add", "--dry-run", "main.py")
        assert not stage_path(repo).exists()

    def test_VI3_dry_run_does_not_write_objects(
        self, repo: pathlib.Path
    ) -> None:
        """VI3: dry-run must not write any blobs to the object store."""
        content = b"brand new content\n"
        (repo / "brand_new.py").write_bytes(content)
        oid = blob_id(content)
        obj_path = object_path(repo, oid)

        _run(repo, "code", "add", "--dry-run", "brand_new.py")
        assert not obj_path.exists(), "Dry-run must not write objects to the store"

    def test_VI4_dry_run_json_shows_correct_counts(
        self, repo: pathlib.Path
    ) -> None:
        """VI4: --dry-run --format json shows accurate counts."""
        (repo / "main.py").write_text("x = 5\n")   # modified
        (repo / "extra.py").write_text("z = 0\n")   # new

        _, out = _run(
            repo, "code", "add", "--dry-run", "--json", "-A"
        )
        data = json.loads(out.strip())
        assert data["dry_run"] is True
        assert data["modified"] >= 1
        assert data["added"] >= 1

    def test_VI5_dry_run_output_stable_across_runs(
        self, repo: pathlib.Path
    ) -> None:
        """VI5: running dry-run twice on the same tree produces identical output."""
        (repo / "main.py").write_text("x = 7\n")

        _, out1 = _run(repo, "code", "add", "--dry-run", "--json", ".")
        _, out2 = _run(repo, "code", "add", "--dry-run", "--json", ".")
        _volatile = {"duration_ms", "timestamp"}
        d1 = {k: v for k, v in json.loads(out1).items() if k not in _volatile}
        d2 = {k: v for k, v in json.loads(out2).items() if k not in _volatile}
        assert d1 == d2


# ===========================================================================
# VII  Edge cases
# ===========================================================================


class TestEdgeCasesVII:
    """Edge cases: fresh repo, no commits, conflicting flags, etc."""

    def test_VII1_stage_on_fresh_repo_no_commits(
        self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """VII1: staging works on a repo with no prior commits."""
        monkeypatch.chdir(tmp_path)
        runner.invoke(cli, ["init", "--domain", "code"], env=_env(tmp_path))
        (tmp_path / "first.py").write_text("x = 1\n")

        code, out = _run(tmp_path, "code", "add", "first.py")
        assert code == 0, out
        stage = read_stage(tmp_path)
        assert "first.py" in stage
        assert stage["first.py"]["mode"] == "A"

    def test_VII2_staging_identical_content_is_idempotent(
        self, repo: pathlib.Path
    ) -> None:
        """VII2: staging the same file twice with identical content is a no-op."""
        (repo / "main.py").write_text("x = 10\n")
        _run(repo, "code", "add", "main.py")

        code, out = _run(repo, "code", "add", "main.py")
        assert code == 0
        assert "already up to date" in out or "Nothing" in out

    def test_VII3_restaging_after_modification_updates_object_id(
        self, repo: pathlib.Path
    ) -> None:
        """VII3: re-staging a file after modification updates the object_id."""
        (repo / "main.py").write_text("v1\n")
        _run(repo, "code", "add", "main.py")
        oid_v1 = read_stage(repo)["main.py"]["object_id"]

        (repo / "main.py").write_text("v2\n")
        _run(repo, "code", "add", "main.py")
        oid_v2 = read_stage(repo)["main.py"]["object_id"]

        assert oid_v1 != oid_v2

    def test_VII4_nonexistent_path_exits_nonzero(
        self, repo: pathlib.Path
    ) -> None:
        """VII4: staging a non-existent, untracked path exits non-zero."""
        code, _ = _run_unchecked(repo, "code", "add", "ghost.py")
        assert code != 0

    def test_VII5_directory_scoped_add_leaves_top_level_unstaged(
        self, repo: pathlib.Path
    ) -> None:
        """VII5: 'muse code add subdir' stages only files under that directory."""
        sub = repo / "sub"
        sub.mkdir()
        (sub / "a.py").write_text("a = 1\n")
        (repo / "top.py").write_text("t = 1\n")

        _run(repo, "code", "add", "sub")
        stage = read_stage(repo)
        assert "sub/a.py" in stage
        assert "top.py" not in stage

    def test_VII6_verbose_shows_per_file_mode(
        self, repo: pathlib.Path
    ) -> None:
        """VII6: --verbose shows one line per staged file."""
        (repo / "main.py").write_text("x = 2\n")
        _, out = _run(repo, "code", "add", "-v", "main.py")
        assert "main.py" in out

    def test_VII7_reset_HEAD_syntax_alias(self, repo: pathlib.Path) -> None:
        """VII7: 'muse code reset HEAD <file>' is identical to 'muse code reset <file>'."""
        (repo / "main.py").write_text("x = 3\n")
        _run(repo, "code", "add", "main.py")

        code, _ = _run(repo, "code", "reset", "HEAD", "main.py")
        assert code == 0
        assert not stage_path(repo).exists()

    def test_VII8_stage_then_commit_then_restage_works(
        self, repo: pathlib.Path
    ) -> None:
        """VII8: full stage → commit → re-stage cycle works end-to-end."""
        (repo / "main.py").write_text("x = 5\n")
        _run(repo, "code", "add", "main.py")
        _run(repo, "commit", "-m", "v2")

        assert not stage_path(repo).exists()

        (repo / "main.py").write_text("x = 6\n")
        code, out = _run(repo, "code", "add", "main.py")
        assert code == 0
        assert "main.py" in read_stage(repo)

    def test_VII9_update_flag_includes_modifications_not_new(
        self, repo: pathlib.Path
    ) -> None:
        """VII9: -u stages tracked modifications but not new untracked files."""
        (repo / "main.py").write_text("x = 99\n")        # tracked, modified
        (repo / "untracked.py").write_text("u = 0\n")   # new, untracked

        _run(repo, "code", "add", "-u")
        stage = read_stage(repo)
        assert "main.py" in stage
        assert "untracked.py" not in stage


# ===========================================================================
# VIII  Stress tests
# ===========================================================================


class TestStressVIII:
    """High-volume and adversarial scenarios."""

    def test_VIII1_stage_500_files_correct_count(
        self, repo: pathlib.Path
    ) -> None:
        """VIII1: staging 500 files produces 500 entries in the stage index."""
        for i in range(500):
            (repo / f"module_{i:04d}.py").write_text(f"X = {i}\n")

        code, out = _run(repo, "code", "add", "-A")
        assert code == 0, out
        stage = read_stage(repo)
        assert len(stage) >= 500

    def test_VIII2_500_files_json_output_correct(
        self, repo: pathlib.Path
    ) -> None:
        """VIII2: JSON output for 500 files has correct counts."""
        for i in range(500):
            (repo / f"f_{i:04d}.py").write_text(f"X = {i}\n")

        _, out = _run(repo, "code", "add", "-A", "--json")
        data = json.loads(out.strip())
        assert data["added"] >= 500
        assert data["staged"] >= 500

    def test_VIII3_stage_add_reset_cycle_50_times(
        self, repo: pathlib.Path
    ) -> None:
        """VIII3: 50 add/reset cycles leave a clean stage each time."""
        (repo / "main.py").write_text("x = 0\n")

        for cycle in range(50):
            (repo / "main.py").write_text(f"x = {cycle}\n")
            code, _ = _run(repo, "code", "add", "main.py")
            assert code == 0, f"Cycle {cycle}: add failed"

            code, _ = _run(repo, "code", "reset", "main.py")
            assert code == 0, f"Cycle {cycle}: reset failed"
            assert not stage_path(repo).exists(), (
                f"Cycle {cycle}: stage not cleared after reset"
            )

    def test_VIII4_large_file_stages_correctly(
        self, repo: pathlib.Path
    ) -> None:
        """VIII4: a 5 MiB file stages and its object_id is correct."""
        content = os.urandom(5 * 1024 * 1024)
        (repo / "big.bin").write_bytes(content)

        code, _ = _run(repo, "code", "add", "big.bin")
        assert code == 0

        stage = read_stage(repo)
        assert "big.bin" in stage
        expected_oid = blob_id(content)
        assert stage["big.bin"]["object_id"] == expected_oid

    def test_VIII5_all_modes_in_single_add(
        self, repo: pathlib.Path
    ) -> None:
        """VIII5: a single add can capture added, modified, and deleted in one shot."""
        # Add extra tracked file and commit first.
        (repo / "to_delete.py").write_text("del = 1\n")
        _run(repo, "code", "add", "to_delete.py")
        _run(repo, "commit", "-m", "add to_delete")

        (repo / "main.py").write_text("x = modified\n")
        (repo / "to_delete.py").unlink()
        (repo / "brand_new.py").write_text("new = True\n")

        code, out = _run(repo, "code", "add", "--json", "-A")
        assert code == 0, out
        data = json.loads(out.strip())
        assert data["modified"] >= 1
        assert data["added"] >= 1
        assert data["deleted"] >= 1

    def test_VIII6_staging_after_many_commits_works(
        self, repo: pathlib.Path
    ) -> None:
        """VIII6: staging still works correctly after many commits."""
        for i in range(50):
            (repo / "main.py").write_text(f"x = {i}\n")
            _run(repo, "commit", "--allow-empty", "-m", f"commit {i}")

        (repo / "main.py").write_text("x = final\n")
        code, _ = _run(repo, "code", "add", "main.py")
        assert code == 0
        stage = read_stage(repo)
        assert "main.py" in stage


# ===========================================================================
# IX  Stat-cache performance — muse code add must use StatCache, not hash_file
# ===========================================================================


class TestStatCacheIX:
    """muse code add must use the stat cache, not raw hash_file on every call."""

    def test_IX1_stat_cache_used_structurally(self) -> None:
        """IX1: code_stage module must import and use StatCache or load_cache."""
        import inspect
        from muse.cli.commands import code_stage as cs_module

        source = inspect.getsource(cs_module)
        assert "load_cache" in source or "StatCache" in source, (
            "code_stage must import and use load_cache or StatCache for hashing"
        )

    def test_IX2_hash_file_not_called_on_unchanged_file(
        self, repo: pathlib.Path
    ) -> None:
        """IX2: second code add on unchanged file must not rehash from disk.

        After the first add the stat cache has a valid entry.  The second add
        must return the cached hash without calling _hash_str again.
        """
        from unittest.mock import patch

        (repo / "cached.txt").write_text("stable content\n")
        # First add — computes and caches the hash.
        code, _ = _run(repo, "code", "add", "cached.txt")
        assert code == 0

        # Reset stage so the file is re-evaluated on the second add.
        _run(repo, "code", "reset", "cached.txt")

        # Second add — must hit the cache; _hash_str must NOT be called.
        with patch("muse.core.stat_cache._hash_str") as mock_hash:
            code2, _ = _run(repo, "code", "add", "cached.txt")

        assert code2 == 0
        mock_hash.assert_not_called(), (
            "second code add on unchanged file called _hash_str — stat cache not used"
        )

    def test_IX3_stat_cache_file_written_after_add(
        self, repo: pathlib.Path
    ) -> None:
        """IX3: .muse/cache/stat.json must exist after code add (cache was saved)."""
        (repo / "new_file.py").write_text("y = 2\n")
        code, _ = _run(repo, "code", "add", "new_file.py")
        assert code == 0
        cache_path = stat_cache_path(repo)
        assert cache_path.exists(), (
            "cache/stat.json not found — cache.save() not called after code add"
        )

    def test_IX4_modified_file_is_rehashed(
        self, repo: pathlib.Path
    ) -> None:
        """IX4: modifying a file invalidates the cache entry so it is rehashed."""
        from unittest.mock import patch
        import muse.core.stat_cache as _sc

        (repo / "mutable.py").write_text("v = 1\n")
        _run(repo, "code", "add", "mutable.py")
        _run(repo, "code", "reset", "mutable.py")

        # Modify the file — mtime/size change → cache miss.
        (repo / "mutable.py").write_text("v = 2\n")

        # Spy on _hash_str but let the real function run so object_store
        # integrity checks still pass.
        with patch.object(_sc, "_hash_str", wraps=_sc._hash_str) as mock_hash:
            code, _ = _run(repo, "code", "add", "mutable.py")

        assert code == 0
        mock_hash.assert_called(), (
            "modified file should trigger a _hash_str call (cache miss)"
        )


# ---------------------------------------------------------------------------
# Helper
# ---------------------------------------------------------------------------


def _read_stage(root: pathlib.Path) -> StagedFileMap:
    return read_stage(root)