"""Tests for the canonical ``muse status --json`` schema.

Every code path that produces ``muse status --json`` output must emit the
*same* shape.  Agents rely on this stability — a schema that changes
depending on whether a stage index is present is a latent bug.

Canonical schema
----------------
::

    {
      "branch":             str,
      "head_commit":        str | null,
      "upstream":           str | null,
      "ahead":              int | null,
      "behind":             int | null,
      "clean":              bool,
      "dirty":              bool,
      "total_changes":      int,
      "untracked_count":    int,

      // Flat view — always populated, union of staged + unstaged.
      // Primary interface: agents that only need "what changed" use these.
      "added":              [str, ...],
      "modified":           [str, ...],
      "deleted":            [str, ...],
      "renamed":            {str: str, ...},

      // Staging detail — null when domain has no staging concept.
      // When non-null, partitions the flat view.
      "staged": {
        "added":            [str, ...],
        "modified":         [str, ...],
        "deleted":          [str, ...]
      } | null,
      "unstaged": {
        "added":            [str, ...],
        "modified":         [str, ...],
        "deleted":          [str, ...]
      } | null,

      // Files on disk but not tracked by Muse.  Always [] for non-code domains.
      "untracked":          [str, ...],

      // Merge state — always present.
      "conflict_paths":     [str, ...],
      "merge_in_progress":  bool,
      "merge_from":         str | null,
      "conflict_count":     int,
      "checkout_interrupted": bool,
      "checkout_target":    str | null
    }

Coverage matrix
---------------
I   Schema invariants (always-present keys, correct types)
    I1  Clean repo — all present, all empty/false
    I2  Code domain with staged changes — same keys, staged non-null
    I3  Code domain with unstaged changes — staged sub-obj still present
    I4  Code domain with both staged and unstaged — both sub-objs populated
    I5  Code domain with untracked files — untracked list populated

II  Flat view correctness
    II1  added = staged.added ∪ unstaged.added
    II2  modified = staged.modified ∪ unstaged.modified
    II3  deleted = staged.deleted ∪ unstaged.deleted
    II4  total_changes = len(added) + len(modified) + len(deleted) + len(renamed)
    II5  File in both staged and unstaged appears once in flat view

III Stage-domain vs no-stage-domain
    III1  Code domain (stage): staged and unstaged are dicts, not null
    III2  No stage index present: staged and unstaged are null (non-staged run)

IV  Specific field values
    IV1  branch matches current branch
    IV2  head_commit is sha256:-prefixed
    IV3  clean=True only when no changes
    IV4  dirty = not clean, always

V   untracked_count
    V1  untracked_count always present as int
    V2  untracked_count == len(untracked)
    V3  untracked_count == 0 for a clean repo
    V4  untracked_count == 0 when total_changes > 0 but no untracked files
    V5  untracked_count > 0 with only untracked files (total_changes stays 0)
    V6  untracked_count and total_changes both nonzero when both kinds present
"""

from __future__ import annotations
from collections.abc import Mapping

import json
import pathlib

import pytest

from tests.cli_test_helper import CliRunner

cli = None
runner = CliRunner()

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

_REQUIRED_TOP_KEYS = {
    "branch", "head_commit", "upstream", "ahead", "behind",
    "clean", "dirty", "total_changes", "untracked_count",
    "added", "modified", "deleted", "renamed",
    "staged", "unstaged", "untracked",
    "conflict_paths", "merge_in_progress", "merge_from",
    "conflict_count", "checkout_interrupted", "checkout_target",
}

_STAGED_BUCKET_KEYS = {"added", "modified", "deleted", "renamed"}


def _env(root: pathlib.Path) -> Mapping[str, str]:
    return {"MUSE_REPO_ROOT": str(root)}


def _status_json(root: pathlib.Path) -> Mapping[str, object]:
    result = runner.invoke(cli, ["status", "--json"], env=_env(root))
    assert result.exit_code == 0, f"status --json failed: {result.output}"
    return json.loads(result.output.strip())


@pytest.fixture()
def code_repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
    """Minimal code-domain repo with one committed file."""
    monkeypatch.chdir(tmp_path)
    result = runner.invoke(cli, ["init", "--domain", "code"], env=_env(tmp_path))
    assert result.exit_code == 0, result.output
    (tmp_path / "main.py").write_text("x = 1\n")
    runner.invoke(cli, ["code", "add", "."], env=_env(tmp_path))
    result = runner.invoke(cli, ["commit", "-m", "initial"], env=_env(tmp_path))
    assert result.exit_code == 0, result.output
    return tmp_path


# ---------------------------------------------------------------------------
# I  Schema invariants
# ---------------------------------------------------------------------------


class TestSchemaInvariantsI:
    def test_I1_clean_repo_all_required_keys_present(self, code_repo: pathlib.Path) -> None:
        """I1: Clean repo — every required key is present with correct type."""
        root = code_repo
        data = _status_json(root)

        assert _REQUIRED_TOP_KEYS.issubset(data.keys()), (
            f"Missing keys: {_REQUIRED_TOP_KEYS - data.keys()}"
        )
        assert data["clean"] is True
        assert data["dirty"] is False
        assert data["added"] == []
        assert data["modified"] == []
        assert data["deleted"] == []
        assert data["renamed"] == {}
        assert data["untracked"] == []
        assert data["total_changes"] == 0
        assert data["untracked_count"] == 0
        assert data["conflict_paths"] == []
        assert data["merge_in_progress"] is False
        assert data["merge_from"] is None
        assert data["conflict_count"] == 0

    def test_I2_staged_file_schema_unchanged(self, code_repo: pathlib.Path) -> None:
        """I2: Staged changes — all top-level keys present, staged is a dict not null."""
        root = code_repo
        (root / "main.py").write_text("x = 2\n")
        runner.invoke(cli, ["code", "add", "main.py"], env=_env(root))

        data = _status_json(root)

        assert _REQUIRED_TOP_KEYS.issubset(data.keys()), (
            f"Missing keys: {_REQUIRED_TOP_KEYS - data.keys()}"
        )
        assert data["staged"] is not None
        assert data["unstaged"] is not None
        assert _STAGED_BUCKET_KEYS == set(data["staged"].keys()), (
            f"staged sub-object has wrong keys: {data['staged'].keys()}"
        )
        assert _STAGED_BUCKET_KEYS == set(data["unstaged"].keys()), (
            f"unstaged sub-object has wrong keys: {data['unstaged'].keys()}"
        )

    def test_I3_unstaged_file_schema_unchanged(self, code_repo: pathlib.Path) -> None:
        """I3: Unstaged changes only (nothing staged) — staged sub-obj still present."""
        root = code_repo
        # Modify file but do NOT stage it
        (root / "main.py").write_text("x = 3\n")

        data = _status_json(root)

        assert _REQUIRED_TOP_KEYS.issubset(data.keys())
        # staged and unstaged must be present even when nothing is staged
        assert data["staged"] is not None
        assert data["unstaged"] is not None
        assert _STAGED_BUCKET_KEYS == set(data["staged"].keys())
        assert _STAGED_BUCKET_KEYS == set(data["unstaged"].keys())

    def test_I4_both_staged_and_unstaged(self, code_repo: pathlib.Path) -> None:
        """I4: Both staged and unstaged changes — both sub-objs populated."""
        root = code_repo
        # Stage main.py modification
        (root / "main.py").write_text("x = 2\n")
        runner.invoke(cli, ["code", "add", "main.py"], env=_env(root))
        # Then modify it again (now staged M + unstaged M)
        (root / "main.py").write_text("x = 3\n")
        # Also add a new file unstaged
        (root / "other.py").write_text("y = 1\n")

        data = _status_json(root)

        assert _REQUIRED_TOP_KEYS.issubset(data.keys())
        assert data["staged"] is not None
        assert data["unstaged"] is not None
        assert data["dirty"] is True

    def test_I5_untracked_files_in_list(self, code_repo: pathlib.Path) -> None:
        """I5: Untracked files appear in untracked list, not in added."""
        root = code_repo
        (root / "brand_new.py").write_text("# not staged\n")

        data = _status_json(root)

        assert "brand_new.py" in data["untracked"]
        # Untracked (not staged) must NOT appear in flat added
        assert "brand_new.py" not in data["added"]


# ---------------------------------------------------------------------------
# II  Flat view correctness
# ---------------------------------------------------------------------------


class TestFlatViewCorrectnessII:
    def test_II1_flat_added_is_union_of_staged_and_unstaged(
        self, code_repo: pathlib.Path
    ) -> None:
        """II1: flat added = staged.added ∪ unstaged.added."""
        root = code_repo
        (root / "new_a.py").write_text("a\n")
        (root / "new_b.py").write_text("b\n")
        runner.invoke(cli, ["code", "add", "new_a.py"], env=_env(root))
        # new_b.py is untracked, not in either bucket

        data = _status_json(root)

        flat_added = set(data["added"])
        staged_added = set(data["staged"]["added"])
        unstaged_added = set(data["unstaged"]["added"])
        assert flat_added == staged_added | unstaged_added

    def test_II2_flat_modified_is_union_of_staged_and_unstaged(
        self, code_repo: pathlib.Path
    ) -> None:
        """II2: flat modified = staged.modified ∪ unstaged.modified."""
        root = code_repo
        (root / "extra.py").write_text("e = 1\n")
        runner.invoke(cli, ["code", "add", "extra.py"], env=_env(root))
        runner.invoke(cli, ["commit", "-m", "add extra"], env=_env(root))

        # Stage main.py modification
        (root / "main.py").write_text("x = 2\n")
        runner.invoke(cli, ["code", "add", "main.py"], env=_env(root))
        # Unstaged: modify extra.py
        (root / "extra.py").write_text("e = 99\n")

        data = _status_json(root)

        flat_modified = set(data["modified"])
        staged_modified = set(data["staged"]["modified"])
        unstaged_modified = set(data["unstaged"]["modified"])
        assert flat_modified == staged_modified | unstaged_modified
        assert "main.py" in staged_modified
        assert "extra.py" in unstaged_modified

    def test_II3_flat_deleted_is_union_of_staged_and_unstaged(
        self, code_repo: pathlib.Path
    ) -> None:
        """II3: flat deleted = staged.deleted ∪ unstaged.deleted."""
        root = code_repo
        (root / "to_delete.py").write_text("d = 1\n")
        runner.invoke(cli, ["code", "add", "to_delete.py"], env=_env(root))
        runner.invoke(cli, ["commit", "-m", "add to_delete"], env=_env(root))

        # Delete and stage the deletion
        (root / "to_delete.py").unlink()
        runner.invoke(cli, ["code", "add", "to_delete.py"], env=_env(root))

        data = _status_json(root)

        flat_deleted = set(data["deleted"])
        staged_deleted = set(data["staged"]["deleted"])
        unstaged_deleted = set(data["unstaged"]["deleted"])
        assert flat_deleted == staged_deleted | unstaged_deleted
        assert "to_delete.py" in flat_deleted

    def test_II4_total_changes_is_sum_of_flat(self, code_repo: pathlib.Path) -> None:
        """II4: total_changes = len(added) + len(modified) + len(deleted) + len(renamed)."""
        root = code_repo
        (root / "main.py").write_text("x = 2\n")
        (root / "new.py").write_text("n = 1\n")
        runner.invoke(cli, ["code", "add", "main.py", "new.py"], env=_env(root))

        data = _status_json(root)

        expected = (
            len(data["added"]) + len(data["modified"])
            + len(data["deleted"]) + len(data["renamed"])
        )
        assert data["total_changes"] == expected

    def test_II5_file_in_both_staged_and_unstaged_appears_once_flat(
        self, code_repo: pathlib.Path
    ) -> None:
        """II5: A file staged then modified again appears once in flat modified."""
        root = code_repo
        (root / "main.py").write_text("x = 2\n")
        runner.invoke(cli, ["code", "add", "main.py"], env=_env(root))
        (root / "main.py").write_text("x = 3\n")  # modify again after staging

        data = _status_json(root)

        assert data["modified"].count("main.py") == 1, (
            "main.py must appear exactly once in flat modified"
        )


# ---------------------------------------------------------------------------
# III  Stage-domain vs no-stage path
# ---------------------------------------------------------------------------


class TestStageDomainVsNonStageIII:
    def test_III1_code_domain_staged_and_unstaged_are_dicts(
        self, code_repo: pathlib.Path
    ) -> None:
        """III1: Code domain (has stage) — staged/unstaged are dicts, not null."""
        root = code_repo
        # Even clean — staging infrastructure exists, so never null
        data = _status_json(root)

        assert data["staged"] is not None, "staged must not be null for code domain"
        assert data["unstaged"] is not None, "unstaged must not be null for code domain"
        assert isinstance(data["staged"], dict)
        assert isinstance(data["unstaged"], dict)

    def test_III2_no_stage_index_staged_and_unstaged_are_null(
        self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """III2: When stage index is absent (domain has no staging), staged/unstaged are null."""
        monkeypatch.chdir(tmp_path)
        # Use mist domain — has no StagePlugin (unlike code domain)
        result = runner.invoke(cli, ["init", "--domain", "mist"], env=_env(tmp_path))
        assert result.exit_code == 0, result.output

        data = _status_json(tmp_path)

        assert data["staged"] is None, (
            f"staged must be null for non-stage domain, got {data['staged']}"
        )
        assert data["unstaged"] is None, (
            f"unstaged must be null for non-stage domain, got {data['unstaged']}"
        )


# ---------------------------------------------------------------------------
# IV  Specific field values
# ---------------------------------------------------------------------------


class TestSpecificFieldValuesIV:
    def test_IV1_branch_matches_current_branch(self, code_repo: pathlib.Path) -> None:
        """IV1: branch field matches the actual current branch."""
        root = code_repo
        data = _status_json(root)
        assert data["branch"] == "main"

    def test_IV2_head_commit_is_sha256_prefixed(self, code_repo: pathlib.Path) -> None:
        """IV2: head_commit is sha256:-prefixed (not bare hex, not null after first commit)."""
        root = code_repo
        data = _status_json(root)
        assert data["head_commit"] is not None
        assert data["head_commit"].startswith("sha256:"), (
            f"head_commit must be sha256:-prefixed, got {data['head_commit']!r}"
        )

    def test_IV3_clean_true_only_when_no_changes(self, code_repo: pathlib.Path) -> None:
        """IV3: clean=True only when working tree matches HEAD exactly."""
        root = code_repo
        assert _status_json(root)["clean"] is True

        (root / "main.py").write_text("x = 99\n")
        assert _status_json(root)["clean"] is False

    def test_IV4_dirty_is_not_clean(self, code_repo: pathlib.Path) -> None:
        """IV4: dirty = not clean, always — both are always present."""
        root = code_repo

        data_clean = _status_json(root)
        assert data_clean["dirty"] is not data_clean["clean"]

        (root / "main.py").write_text("x = 99\n")
        data_dirty = _status_json(root)
        assert data_dirty["dirty"] is not data_dirty["clean"]
        assert data_dirty["dirty"] is True


# ---------------------------------------------------------------------------
# V  untracked_count
# ---------------------------------------------------------------------------


class TestUntrackedCountV:
    def test_V1_untracked_count_always_present_as_int(
        self, code_repo: pathlib.Path
    ) -> None:
        """V1: untracked_count is always present and is an int."""
        data = _status_json(code_repo)
        assert "untracked_count" in data, "untracked_count must always be present"
        assert isinstance(data["untracked_count"], int), (
            f"untracked_count must be int, got {type(data['untracked_count'])}"
        )

    def test_V2_untracked_count_equals_len_untracked(
        self, code_repo: pathlib.Path
    ) -> None:
        """V2: untracked_count == len(untracked) — it is a redundant convenience field."""
        root = code_repo
        (root / "foo.txt").write_text("foo\n")
        (root / "bar.txt").write_text("bar\n")

        data = _status_json(root)

        assert data["untracked_count"] == len(data["untracked"]), (
            f"untracked_count {data['untracked_count']} != len(untracked) {len(data['untracked'])}"
        )

    def test_V3_untracked_count_zero_for_clean_repo(
        self, code_repo: pathlib.Path
    ) -> None:
        """V3: untracked_count == 0 when the working tree is clean."""
        data = _status_json(code_repo)
        assert data["untracked_count"] == 0

    def test_V4_untracked_count_zero_when_only_tracked_changes(
        self, code_repo: pathlib.Path
    ) -> None:
        """V4: untracked_count == 0 when total_changes > 0 but no untracked files."""
        root = code_repo
        (root / "main.py").write_text("x = 2\n")
        runner.invoke(cli, ["code", "add", "main.py"], env=_env(root))

        data = _status_json(root)

        assert data["total_changes"] > 0
        assert data["untracked_count"] == 0

    def test_V5_untracked_count_nonzero_total_changes_stays_zero(
        self, code_repo: pathlib.Path
    ) -> None:
        """V5: Only untracked files — untracked_count > 0, total_changes remains 0."""
        root = code_repo
        (root / "new_file.txt").write_text("hello\n")

        data = _status_json(root)

        assert data["total_changes"] == 0, (
            "total_changes must not count untracked files"
        )
        assert data["untracked_count"] > 0, (
            "untracked_count must reflect the untracked file"
        )
        assert data["dirty"] is True

    def test_V6_both_tracked_changes_and_untracked_files(
        self, code_repo: pathlib.Path
    ) -> None:
        """V6: When both tracked changes and untracked files exist, both counts are nonzero."""
        root = code_repo
        # Tracked modification
        (root / "main.py").write_text("x = 2\n")
        runner.invoke(cli, ["code", "add", "main.py"], env=_env(root))
        # Untracked
        (root / "scratch.txt").write_text("scratch\n")

        data = _status_json(root)

        assert data["total_changes"] > 0, "total_changes must reflect tracked modification"
        assert data["untracked_count"] > 0, "untracked_count must reflect untracked file"
        assert data["dirty"] is True