"""Tests for ``muse snapshot-diff``.

Verifies categorisation of added/modified/deleted paths, resolution of
snapshot IDs, commit IDs, and branch names, text-format output, and error
handling for unresolvable refs.
"""

from __future__ import annotations

import datetime
import json
import pathlib

from tests.cli_test_helper import CliRunner

cli = None  # argparse migration — CliRunner ignores this arg
from muse.core.errors import ExitCode
from muse.core.object_store import write_object
from muse.core.ids import hash_commit, hash_snapshot
from muse.core.commits import (
    CommitRecord,
    write_commit,
)
from muse.core.snapshots import (
    SnapshotRecord,
    write_snapshot,
)
from muse.core.types import Manifest, blob_id
from muse.core.paths import head_path, muse_dir, ref_path

runner = CliRunner()


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _init_repo(path: pathlib.Path) -> pathlib.Path:
    muse = muse_dir(path)
    (muse / "commits").mkdir(parents=True)
    (muse / "snapshots").mkdir(parents=True)
    (muse / "objects").mkdir(parents=True)
    (muse / "refs" / "heads").mkdir(parents=True)
    (muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
    (muse / "repo.json").write_text(
        json.dumps({"repo_id": "test-repo", "domain": "midi"}), encoding="utf-8"
    )
    return path


def _env(repo: pathlib.Path) -> Manifest:
    return {"MUSE_REPO_ROOT": str(repo)}


def _obj(repo: pathlib.Path, content: bytes) -> str:
    oid = blob_id(content)
    write_object(repo, oid, content)
    return oid


def _snap(repo: pathlib.Path, manifest: Manifest) -> str:
    sid = hash_snapshot(manifest)
    write_snapshot(
        repo,
        SnapshotRecord(
            snapshot_id=sid,
            manifest=manifest,
            created_at=datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc),
        ),
    )
    return sid


def _commit(repo: pathlib.Path, tag: str, sid: str, branch: str = "main") -> str:
    committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
    cid = hash_commit(
        parent_ids=[],
        snapshot_id=sid,
        message=tag,
        committed_at_iso=committed_at.isoformat(),
        author="tester",
    )
    write_commit(
        repo,
        CommitRecord(
            commit_id=cid,
            branch=branch,
            snapshot_id=sid,
            message=tag,
            committed_at=committed_at,
            author="tester",
            parent_commit_id=None,
        ),
    )
    ref = ref_path(repo, branch)
    ref.write_text(cid, encoding="utf-8")
    return cid


# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------


class TestSnapshotDiff:
    def test_added_deleted_categorised_correctly(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        shared = _obj(repo, b"shared")
        new_obj = _obj(repo, b"new")
        sid_a = _snap(repo, {"shared.mid": shared, "old.mid": shared})
        sid_b = _snap(repo, {"shared.mid": shared, "new.mid": new_obj})
        result = runner.invoke(cli, ["snapshot-diff", "--json", sid_a, sid_b], env=_env(repo))
        assert result.exit_code == 0, result.output
        data = json.loads(result.stdout)
        assert [e["path"] for e in data["added"]] == ["new.mid"]
        assert [e["path"] for e in data["deleted"]] == ["old.mid"]
        assert data["modified"] == []
        assert data["total_changes"] == 2

    def test_modified_entry_contains_both_object_ids(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        v1 = _obj(repo, b"v1")
        v2 = _obj(repo, b"v2")
        sid_a = _snap(repo, {"track.mid": v1})
        sid_b = _snap(repo, {"track.mid": v2})
        result = runner.invoke(cli, ["snapshot-diff", "--json", sid_a, sid_b], env=_env(repo))
        assert result.exit_code == 0, result.output
        data = json.loads(result.stdout)
        assert len(data["modified"]) == 1
        mod = data["modified"][0]
        assert mod["path"] == "track.mid"
        assert mod["object_id_a"] == v1
        assert mod["object_id_b"] == v2

    def test_zero_changes_when_snapshots_identical(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        obj = _obj(repo, b"same")
        sid = _snap(repo, {"f.mid": obj})
        result = runner.invoke(cli, ["snapshot-diff", "--json", sid, sid], env=_env(repo))
        assert result.exit_code == 0, result.output
        data = json.loads(result.stdout)
        assert data["total_changes"] == 0

    def test_resolves_by_branch_name(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        obj_a = _obj(repo, b"a")
        obj_b = _obj(repo, b"b")
        _commit(repo, "cmt-main", _snap(repo, {"a.mid": obj_a}), branch="main")
        _commit(repo, "cmt-dev", _snap(repo, {"b.mid": obj_b}), branch="dev")
        (head_path(repo)).write_text("ref: refs/heads/main", encoding="utf-8")
        result = runner.invoke(cli, ["snapshot-diff", "--json", "main", "dev"], env=_env(repo))
        assert result.exit_code == 0, result.output
        data = json.loads(result.stdout)
        assert data["total_changes"] == 2

    def test_text_format_shows_status_letters(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        shared = _obj(repo, b"s")
        new_obj = _obj(repo, b"n")
        sid_a = _snap(repo, {"gone.mid": shared})
        sid_b = _snap(repo, {"new.mid": new_obj})
        result = runner.invoke(
            cli, ["snapshot-diff", sid_a, sid_b], env=_env(repo)
        )
        assert result.exit_code == 0, result.output
        assert "A  new.mid" in result.stdout
        assert "D  gone.mid" in result.stdout

    def test_stat_flag_appends_summary(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        sid_a = _snap(repo, {"gone.mid": _obj(repo, b"g")})
        sid_b = _snap(repo, {"new.mid": _obj(repo, b"n")})
        result = runner.invoke(
            cli,
            ["snapshot-diff", "--stat", sid_a, sid_b],
            env=_env(repo),
        )
        assert result.exit_code == 0, result.output
        assert "added" in result.stdout
        assert "deleted" in result.stdout

    def test_unresolvable_ref_exits_user_error(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        result = runner.invoke(
            cli, ["snapshot-diff", "no-such-thing", "also-missing", "--json"], env=_env(repo)
        )
        assert result.exit_code == ExitCode.USER_ERROR
        assert "error" in json.loads(result.stderr)

    def test_results_sorted_lexicographically(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        sid_a = _snap(repo, {})
        sid_b = _snap(
            repo, {"z.mid": _obj(repo, b"z"), "a.mid": _obj(repo, b"a"), "m.mid": _obj(repo, b"m")}
        )
        result = runner.invoke(cli, ["snapshot-diff", "--json", sid_a, sid_b], env=_env(repo))
        assert result.exit_code == 0, result.output
        data = json.loads(result.stdout)
        added_paths = [e["path"] for e in data["added"]]
        assert added_paths == sorted(added_paths)


class TestSnapshotDiffStdin:
    """Tests for ``--stdin`` batch mode."""

    def test_single_pair_via_stdin_json(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        oid_a = _obj(repo, b"a")
        oid_b = _obj(repo, b"b")
        sid_a = _snap(repo, {"a.mid": oid_a})
        sid_b = _snap(repo, {"b.mid": oid_b})
        stdin = f"{sid_a} {sid_b}\n"
        result = runner.invoke(cli, ["snapshot-diff", "--json", "--stdin"], env=_env(repo), input=stdin)
        assert result.exit_code == 0, result.output
        lines = [ln for ln in result.stdout.strip().splitlines() if ln]
        assert len(lines) == 1
        data = json.loads(lines[0])
        assert data["snapshot_a"] == sid_a
        assert data["snapshot_b"] == sid_b
        assert len(data["added"]) == 1
        assert len(data["deleted"]) == 1
        assert data["total_changes"] == 2

    def test_multiple_pairs_emit_ndjson(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        oid = _obj(repo, b"x")
        sid1 = _snap(repo, {"x.mid": oid})
        sid2 = _snap(repo, {})
        sid3 = _snap(repo, {"x.mid": oid, "y.mid": _obj(repo, b"y")})
        stdin = f"{sid1} {sid2}\n{sid2} {sid3}\n"
        result = runner.invoke(cli, ["snapshot-diff", "--json", "--stdin"], env=_env(repo), input=stdin)
        assert result.exit_code == 0, result.output
        lines = [ln for ln in result.stdout.strip().splitlines() if ln]
        assert len(lines) == 2
        first = json.loads(lines[0])
        second = json.loads(lines[1])
        assert first["snapshot_a"] == sid1
        assert first["snapshot_b"] == sid2
        assert second["snapshot_a"] == sid2
        assert second["snapshot_b"] == sid3

    def test_invalid_ref_reported_inline_not_exit_error(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        oid = _obj(repo, b"ok")
        sid_a = _snap(repo, {"f.mid": oid})
        sid_b = _snap(repo, {})
        # First line is bad ref, second is valid
        bad_ref = "a" * 64  # valid OID format but not in store
        stdin = f"{bad_ref} {bad_ref}\n{sid_a} {sid_b}\n"
        result = runner.invoke(cli, ["snapshot-diff", "--json", "--stdin"], env=_env(repo), input=stdin)
        assert result.exit_code == 0  # batch mode always exits 0
        lines = [ln for ln in result.stdout.strip().splitlines() if ln]
        assert len(lines) == 2
        first = json.loads(lines[0])
        assert "error" in first
        second = json.loads(lines[1])
        assert "error" not in second
        assert second["total_changes"] == 1

    def test_empty_lines_and_comments_skipped(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        sid = _snap(repo, {})
        stdin = f"\n# this is a comment\n\n{sid} {sid}\n\n"
        result = runner.invoke(cli, ["snapshot-diff", "--json", "--stdin"], env=_env(repo), input=stdin)
        assert result.exit_code == 0, result.output
        lines = [ln for ln in result.stdout.strip().splitlines() if ln]
        assert len(lines) == 1
        data = json.loads(lines[0])
        assert data["total_changes"] == 0

    def test_malformed_line_single_token_reported_inline(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        sid = _snap(repo, {})
        stdin = f"only-one-token\n{sid} {sid}\n"
        result = runner.invoke(cli, ["snapshot-diff", "--json", "--stdin"], env=_env(repo), input=stdin)
        assert result.exit_code == 0
        lines = [ln for ln in result.stdout.strip().splitlines() if ln]
        assert len(lines) == 2
        first = json.loads(lines[0])
        assert "error" in first
        second = json.loads(lines[1])
        assert "error" not in second

    def test_empty_stdin_produces_no_output(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        result = runner.invoke(cli, ["snapshot-diff", "--json", "--stdin"], env=_env(repo), input="")
        assert result.exit_code == 0
        assert result.stdout.strip() == ""

    def test_stdin_text_format_blank_line_separated(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        oid_a = _obj(repo, b"a")
        oid_b = _obj(repo, b"b")
        sid1 = _snap(repo, {"a.mid": oid_a})
        sid2 = _snap(repo, {"b.mid": oid_b})
        sid3 = _snap(repo, {})
        stdin = f"{sid1} {sid2}\n{sid2} {sid3}\n"
        result = runner.invoke(
            cli, ["snapshot-diff", "--stdin", ], env=_env(repo), input=stdin
        )
        assert result.exit_code == 0, result.output
        output = result.stdout
        # Two diffs separated by a blank line
        assert "A  b.mid" in output or "D  a.mid" in output
        # There should be a blank-line separator between the two pairs
        blocks = [b.strip() for b in output.split("\n\n") if b.strip()]
        assert len(blocks) == 2

    def test_stdin_all_errors_still_exits_0(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        bad = "b" * 64  # valid format, not in store
        stdin = f"{bad} {bad}\n{bad} {bad}\n"
        result = runner.invoke(cli, ["snapshot-diff", "--json", "--stdin"], env=_env(repo), input=stdin)
        assert result.exit_code == 0
        lines = [ln for ln in result.stdout.strip().splitlines() if ln]
        assert all("error" in json.loads(ln) for ln in lines)

    def test_stdin_zero_change_pair_included(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        sid = _snap(repo, {"f.mid": _obj(repo, b"f")})
        stdin = f"{sid} {sid}\n"
        result = runner.invoke(cli, ["snapshot-diff", "--json", "--stdin"], env=_env(repo), input=stdin)
        assert result.exit_code == 0, result.output
        data = json.loads(result.stdout.strip())
        assert data["total_changes"] == 0


class TestSnapshotDiffEdgeCases:
    """Edge cases not covered by the primary test classes."""

    def test_bad_format_value_exits_user_error(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        sid = _snap(repo, {})
        result = runner.invoke(
            cli, ["snapshot-diff", "--only", "xml", sid, sid], env=_env(repo)
        )
        assert result.exit_code != 0

    def test_ref_a_provided_ref_b_missing_exits_user_error(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        sid = _snap(repo, {})
        result = runner.invoke(cli, ["snapshot-diff", "--json", sid], env=_env(repo))
        assert result.exit_code == ExitCode.USER_ERROR

    def test_raw_with_zero_changes_produces_no_diff_lines(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        sid = _snap(repo, {"f.mid": _obj(repo, b"same")})
        result = runner.invoke(
            cli, ["snapshot-diff", "--raw", sid, sid], env=_env(repo)
        )
        assert result.exit_code == 0, result.output
        # No A/M/D lines when there are no changes.
        for line in result.stdout.splitlines():
            assert not line.startswith(("A ", "M ", "D "))

    def test_json_shorthand_flag_accepted(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        sid = _snap(repo, {"f.mid": _obj(repo, b"x")})
        result = runner.invoke(cli, ["snapshot-diff", "--json", sid, sid], env=_env(repo))
        assert result.exit_code == 0, result.output
        data = json.loads(result.stdout)
        assert data["total_changes"] == 0

    def test_no_args_no_stdin_exits_user_error(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        result = runner.invoke(cli, ["snapshot-diff"], env=_env(repo))
        assert result.exit_code == ExitCode.USER_ERROR


class TestSnapshotDiffRaw:
    """Tests for ``--raw`` flag (OIDs included in text output)."""

    def test_raw_added_includes_object_id(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        oid = _obj(repo, b"new-content")
        sid_a = _snap(repo, {})
        sid_b = _snap(repo, {"new.mid": oid})
        result = runner.invoke(
            cli, ["snapshot-diff", "--raw", sid_a, sid_b], env=_env(repo)
        )
        assert result.exit_code == 0, result.output
        assert oid in result.stdout
        assert "A" in result.stdout
        assert "new.mid" in result.stdout

    def test_raw_deleted_includes_object_id(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        oid = _obj(repo, b"old-content")
        sid_a = _snap(repo, {"gone.mid": oid})
        sid_b = _snap(repo, {})
        result = runner.invoke(
            cli, ["snapshot-diff", "--raw", sid_a, sid_b], env=_env(repo)
        )
        assert result.exit_code == 0, result.output
        assert oid in result.stdout
        assert "D" in result.stdout
        assert "gone.mid" in result.stdout

    def test_raw_modified_includes_both_object_ids(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        oid_a = _obj(repo, b"version-1")
        oid_b = _obj(repo, b"version-2")
        sid_a = _snap(repo, {"track.mid": oid_a})
        sid_b = _snap(repo, {"track.mid": oid_b})
        result = runner.invoke(
            cli, ["snapshot-diff", "--raw", sid_a, sid_b], env=_env(repo)
        )
        assert result.exit_code == 0, result.output
        assert oid_a in result.stdout
        assert oid_b in result.stdout
        assert "M" in result.stdout
        assert "track.mid" in result.stdout

    def test_text_without_raw_omits_object_ids(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        oid = _obj(repo, b"some-content")
        sid_a = _snap(repo, {})
        sid_b = _snap(repo, {"file.mid": oid})
        result = runner.invoke(
            cli, ["snapshot-diff", sid_a, sid_b], env=_env(repo)
        )
        assert result.exit_code == 0, result.output
        # OID should NOT appear in non-raw text output
        assert oid not in result.stdout
        assert "A  file.mid" in result.stdout

    def test_raw_has_no_effect_on_json_output(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        oid_a = _obj(repo, b"va")
        oid_b = _obj(repo, b"vb")
        sid_a = _snap(repo, {"t.mid": oid_a})
        sid_b = _snap(repo, {"t.mid": oid_b})
        # JSON always includes OIDs; --raw flag is documented as no-op for JSON
        result_plain = runner.invoke(cli, ["snapshot-diff", "--json", sid_a, sid_b], env=_env(repo))
        result_raw = runner.invoke(cli, ["snapshot-diff", "--json", "--raw", sid_a, sid_b], env=_env(repo))
        assert result_plain.exit_code == 0
        assert result_raw.exit_code == 0
        data_plain = json.loads(result_plain.stdout)
        data_raw = json.loads(result_raw.stdout)
        # duration_ms will differ between two separate invocations — compare everything else.
        for key in ("snapshot_a", "snapshot_b", "added", "modified", "deleted", "total_changes"):
            assert data_plain[key] == data_raw[key]

    def test_raw_stdin_batch_text_includes_oids(self, tmp_path: pathlib.Path) -> None:
        repo = _init_repo(tmp_path)
        oid = _obj(repo, b"batch-raw")
        sid_a = _snap(repo, {})
        sid_b = _snap(repo, {"r.mid": oid})
        stdin = f"{sid_a} {sid_b}\n"
        result = runner.invoke(
            cli,
            ["snapshot-diff", "--stdin", "--raw"],
            env=_env(repo),
            input=stdin,
        )
        assert result.exit_code == 0, result.output
        assert oid in result.stdout
        assert "A" in result.stdout


# ---------------------------------------------------------------------------
# Flag registration tests
# ---------------------------------------------------------------------------


class TestRegisterFlags:
    def _parser(self) -> "argparse.ArgumentParser":
        import argparse
        from muse.cli.commands.snapshot_diff import register

        p = argparse.ArgumentParser()
        subs = p.add_subparsers()
        register(subs)
        return p

    def test_default_json_out_is_false(self) -> None:
        args = self._parser().parse_args(["snapshot-diff", "main", "dev"])
        assert args.json_out is False

    def test_json_flag_sets_json_out(self) -> None:
        args = self._parser().parse_args(["snapshot-diff", "--json", "main", "dev"])
        assert args.json_out is True

    def test_j_shorthand_sets_json_out(self) -> None:
        args = self._parser().parse_args(["snapshot-diff", "-j", "main", "dev"])
        assert args.json_out is True