"""Tests for the canonical ``muse read --json`` schema.

``muse read`` is how agents inspect individual commits — metadata, delta,
and provenance in one shot.  The JSON schema must be complete and stable.

Schema (with --stat, default)
------------------------------
::

    {
      "commit_id":          "sha256:<64-hex>",
      "repo_id":            str,
      "branch":             str,
      "snapshot_id":        str,
      "message":            str,
      "committed_at":       str,            // ISO 8601 with timezone
      "parent_commit_id":   str | null,
      "parent2_commit_id":  str | null,
      "author":             str,
      "metadata":           dict,
      "structured_delta":   dict | null,    // absent with --no-delta
      "sem_ver_bump":       str,            // "none" | "patch" | "minor" | "major"
      "breaking_changes":   [str, ...],
      "agent_id":           str,            // "" for human commits
      "model_id":           str,            // "" for human commits
      "toolchain_id":       str,
      "prompt_hash":        str,
      "signature":          str,
      "signer_public_key":  str,
      "signer_key_id":      str,
      "reviewed_by":        [str, ...],
      "test_runs":          int,
      "files_added":        [str, ...],     // absent with --no-stat
      "files_removed":      [str, ...],     // absent with --no-stat
      "files_modified":     [str, ...],     // absent with --no-stat
      "total_changes":      int             // absent with --no-stat
    }

Coverage
--------
I   Schema invariants
    I1  All required keys present (full provenance set)
    I2  commit_id is sha256:-prefixed
    I3  committed_at is ISO 8601 with timezone
    I4  sem_ver_bump is a valid enum value
    I5  breaking_changes is always a list
    I6  reviewed_by is always a list
    I7  test_runs is always an int

II  Agent provenance
    II1  agent_id populated from --agent-id flag
    II2  model_id populated from --model-id flag
    II3  agent_id is empty string (not null) for human commits
    II4  model_id is empty string (not null) for human commits
    II5  toolchain_id is a string (never null)

III  File stats
    III1  total_changes present with --stat (default)
    III2  total_changes = len(files_added)+len(files_modified)+len(files_removed)
    III3  total_changes absent with --no-stat
    III4  files_added/removed/modified absent with --no-stat

IV  Error handling (agent-friendly)
    IV1  Non-existent commit exits 1 cleanly (no traceback)
    IV2  --json + non-existent ref → stdout has JSON {"error": ...}
    IV3  JSON error has "error", "ref", "message" keys
    IV4  Invalid sha256: hex digits → same clean JSON error, exit 1
    IV5  Ambiguous prefix → JSON error with "ambiguous_ref" error key

V   Structured delta
    V1  structured_delta present on non-initial commit
    V2  structured_delta is null on initial commit (no parent to diff against)
    V3  --no-delta omits structured_delta key entirely
"""

from __future__ import annotations
from collections.abc import Mapping

import json
import pathlib

import pytest

from tests.cli_test_helper import CliRunner, InvokeResult
from muse.core.types import long_id

cli = None
runner = CliRunner()

_REQUIRED_KEYS = {
    # Identity
    "commit_id", "branch", "snapshot_id",
    # Content
    "message", "committed_at", "parent_commit_id", "parent2_commit_id",
    "author", "metadata", "structured_delta",
    # Semantic versioning
    "sem_ver_bump", "breaking_changes",
    # Agent provenance (all must be present, empty string for humans)
    "agent_id", "model_id", "toolchain_id",
    "prompt_hash", "signature", "signer_public_key", "signer_key_id",
    # CRDT annotation fields
    "reviewed_by", "test_runs",
    # File stat fields (present with default --stat)
    "files_added", "files_removed", "files_modified", "total_changes",
}

_VALID_SEM_VER_BUMPS = {"none", "patch", "minor", "major"}


def _env(root: pathlib.Path) -> Mapping[str, str]:
    return {"MUSE_REPO_ROOT": str(root)}


def _show(root: pathlib.Path, *flags: str) -> Mapping[str, object]:
    result = runner.invoke(cli, ["read", "--json"] + list(flags), env=_env(root))
    assert result.exit_code == 0, f"show --json failed:\n{result.output}"
    return json.loads(result.output.strip())


def _show_raw(root: pathlib.Path, *args: str) -> InvokeResult:
    """Return the raw InvokeResult (not parsed) for error-path tests."""
    return runner.invoke(cli, ["read", "--json"] + list(args), env=_env(root))


@pytest.fixture()
def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
    """Code-domain repo with one committed file, no agent provenance."""
    monkeypatch.chdir(tmp_path)
    env = _env(tmp_path)
    result = runner.invoke(cli, ["init", "--domain", "code"], env=env)
    assert result.exit_code == 0, result.output
    (tmp_path / "module.py").write_text("def greet():\n    return 'hello'\n")
    runner.invoke(cli, ["code", "add", "module.py"], env=env)
    result = runner.invoke(cli, ["commit", "-m", "initial"], env=env)
    assert result.exit_code == 0, result.output
    return tmp_path


@pytest.fixture()
def repo_with_two_commits(
    repo: pathlib.Path,
    monkeypatch: pytest.MonkeyPatch,
) -> pathlib.Path:
    """Extends repo fixture with a second commit that modifies module.py."""
    env = _env(repo)
    (repo / "module.py").write_text(
        "def greet():\n    return 'hello'\n\ndef farewell():\n    return 'bye'\n"
    )
    runner.invoke(cli, ["code", "add", "module.py"], env=env)
    result = runner.invoke(cli, ["commit", "-m", "add farewell"], env=env)
    assert result.exit_code == 0, result.output
    return repo


# ---------------------------------------------------------------------------
# I  Schema invariants
# ---------------------------------------------------------------------------


class TestSchemaInvariantsI:
    def test_I1_all_required_keys_present(
        self, repo_with_two_commits: pathlib.Path
    ) -> None:
        """I1: Every required key must be present in the default show --json output."""
        data = _show(repo_with_two_commits)
        missing = _REQUIRED_KEYS - data.keys()
        assert not missing, f"Missing required keys in show --json: {missing}"

    def test_I2_commit_id_sha256_prefixed(self, repo: pathlib.Path) -> None:
        """I2: commit_id must start with 'sha256:'."""
        data = _show(repo)
        assert data["commit_id"].startswith("sha256:"), (
            f"commit_id must be sha256:-prefixed, got {data['commit_id']!r}"
        )

    def test_I3_committed_at_is_iso8601_with_tz(self, repo: pathlib.Path) -> None:
        """I3: committed_at must parse as ISO 8601 with timezone info."""
        import datetime
        data = _show(repo)
        dt = datetime.datetime.fromisoformat(data["committed_at"])
        assert dt.tzinfo is not None, (
            f"committed_at lacks timezone: {data['committed_at']!r}"
        )

    def test_I4_sem_ver_bump_valid_enum(self, repo: pathlib.Path) -> None:
        """I4: sem_ver_bump must be one of the four valid values."""
        data = _show(repo)
        assert data["sem_ver_bump"] in _VALID_SEM_VER_BUMPS, (
            f"sem_ver_bump {data['sem_ver_bump']!r} not in {_VALID_SEM_VER_BUMPS}"
        )

    def test_I5_breaking_changes_always_list(self, repo: pathlib.Path) -> None:
        """I5: breaking_changes is always a list (never null or absent)."""
        data = _show(repo)
        assert isinstance(data["breaking_changes"], list), (
            f"breaking_changes must be list, got {type(data['breaking_changes'])}"
        )

    def test_I6_reviewed_by_always_list(self, repo: pathlib.Path) -> None:
        """I6: reviewed_by is always a list (CRDT ORSet)."""
        data = _show(repo)
        assert isinstance(data["reviewed_by"], list), (
            f"reviewed_by must be list, got {type(data['reviewed_by'])}"
        )

    def test_I7_test_runs_always_int(self, repo: pathlib.Path) -> None:
        """I7: test_runs is always an int (CRDT GCounter)."""
        data = _show(repo)
        assert isinstance(data["test_runs"], int), (
            f"test_runs must be int, got {type(data['test_runs'])}"
        )


# ---------------------------------------------------------------------------
# II  Agent provenance
# ---------------------------------------------------------------------------


class TestAgentProvenanceII:
    def test_II1_agent_id_populated_from_flag(
        self, repo: pathlib.Path
    ) -> None:
        """II1: --agent-id value appears in agent_id field."""
        env = _env(repo)
        (repo / "helper.py").write_text("x = 1\n")
        runner.invoke(cli, ["code", "add", "helper.py"], env=env)
        runner.invoke(
            cli,
            ["commit", "-m", "agent commit", "--agent-id", "test-bot"],
            env=env,
        )
        data = _show(repo)
        assert data["agent_id"] == "test-bot", (
            f"Expected agent_id='test-bot', got {data['agent_id']!r}"
        )

    def test_II2_model_id_populated_from_flag(
        self, repo: pathlib.Path
    ) -> None:
        """II2: --model-id value appears in model_id field."""
        env = _env(repo)
        (repo / "helper2.py").write_text("y = 2\n")
        runner.invoke(cli, ["code", "add", "helper2.py"], env=env)
        runner.invoke(
            cli,
            ["commit", "-m", "model commit", "--model-id", "claude-opus-4"],
            env=env,
        )
        data = _show(repo)
        assert data["model_id"] == "claude-opus-4", (
            f"Expected model_id='claude-opus-4', got {data['model_id']!r}"
        )

    def test_II3_agent_id_empty_string_for_human_commit(
        self, repo: pathlib.Path
    ) -> None:
        """II3: agent_id is empty string (not null) for human commits."""
        data = _show(repo)
        assert data["agent_id"] == "", (
            f"agent_id must be '' for human commit, got {data['agent_id']!r}"
        )

    def test_II4_model_id_empty_string_for_human_commit(
        self, repo: pathlib.Path
    ) -> None:
        """II4: model_id is empty string (not null) for human commits."""
        data = _show(repo)
        assert data["model_id"] == "", (
            f"model_id must be '' for human commit, got {data['model_id']!r}"
        )

    def test_II5_toolchain_id_is_string_not_null(self, repo: pathlib.Path) -> None:
        """II5: toolchain_id is always a string (empty for human commits)."""
        data = _show(repo)
        assert isinstance(data["toolchain_id"], str), (
            f"toolchain_id must be str (never null), got {type(data['toolchain_id'])}"
        )


# ---------------------------------------------------------------------------
# III  File stats
# ---------------------------------------------------------------------------


class TestFileStatsIII:
    def test_III1_total_changes_present_by_default(
        self, repo_with_two_commits: pathlib.Path
    ) -> None:
        """III1: total_changes is present in default JSON output."""
        data = _show(repo_with_two_commits)
        assert "total_changes" in data, (
            f"total_changes missing from show --json output"
        )

    def test_III2_total_changes_equals_sum_of_buckets(
        self, repo_with_two_commits: pathlib.Path
    ) -> None:
        """III2: total_changes = len(files_added) + len(files_modified) + len(files_removed)."""
        data = _show(repo_with_two_commits)
        expected = (
            len(data["files_added"])
            + len(data["files_modified"])
            + len(data["files_removed"])
        )
        assert data["total_changes"] == expected, (
            f"total_changes {data['total_changes']} != "
            f"len(added={data['files_added']}) + len(modified={data['files_modified']}) "
            f"+ len(removed={data['files_removed']}) = {expected}"
        )

    def test_III3_total_changes_absent_with_no_stat(
        self, repo: pathlib.Path
    ) -> None:
        """III3: total_changes is absent when --no-stat is used."""
        result = runner.invoke(
            cli, ["read", "--json", "--no-stat"], env=_env(repo)
        )
        assert result.exit_code == 0
        data = json.loads(result.output.strip())
        assert "total_changes" not in data, (
            "total_changes must not appear with --no-stat"
        )

    def test_III4_file_buckets_absent_with_no_stat(self, repo: pathlib.Path) -> None:
        """III4: files_added/removed/modified absent with --no-stat."""
        result = runner.invoke(
            cli, ["read", "--json", "--no-stat"], env=_env(repo)
        )
        assert result.exit_code == 0
        data = json.loads(result.output.strip())
        assert "files_added" not in data
        assert "files_removed" not in data
        assert "files_modified" not in data


# ---------------------------------------------------------------------------
# IV  Error handling
# ---------------------------------------------------------------------------


class TestErrorHandlingIV:
    def test_IV1_nonexistent_ref_exits_1(self, repo: pathlib.Path) -> None:
        """IV1: Non-existent commit ref exits 1 without traceback."""
        result = _show_raw(repo, long_id("a" * 64))
        assert result.exit_code == 1, (
            f"Expected exit code 1 for nonexistent ref, got {result.exit_code}"
        )

    def test_IV2_json_error_on_nonexistent_ref(self, repo: pathlib.Path) -> None:
        """IV2: --json with nonexistent ref emits JSON on stdout (not a crash)."""
        result = _show_raw(repo, long_id("a" * 64))
        # Find the JSON line (stdout) — the ❌ text goes to stderr and may appear
        # interleaved in the combined output captured by CliRunner.
        json_line = next(
            (l for l in result.output.strip().splitlines() if l.startswith("{")),
            None,
        )
        assert json_line is not None, (
            f"No JSON line found in output for nonexistent ref: {result.output!r}"
        )
        try:
            data = json.loads(json_line)
        except json.JSONDecodeError as exc:
            pytest.fail(f"JSON line is not valid JSON: {json_line!r} — {exc}")
        assert "error" in data

    def test_IV3_json_error_has_required_keys(self, repo: pathlib.Path) -> None:
        """IV3: JSON error payload has 'error', 'ref', 'message' keys."""
        result = _show_raw(repo, long_id("b" * 64))
        # Parse the last JSON-looking line
        json_line = next(
            (l for l in reversed(result.output.strip().splitlines())
             if l.startswith("{")),
            None,
        )
        assert json_line is not None, f"No JSON line in output: {result.output!r}"
        data = json.loads(json_line)
        assert "error" in data, f"'error' key missing from error JSON: {data}"
        assert "ref" in data, f"'ref' key missing from error JSON: {data}"
        assert "message" in data, f"'message' key missing from error JSON: {data}"

    def test_IV4_invalid_sha256_hex_exits_1(self, repo: pathlib.Path) -> None:
        """IV4: sha256: prefix with non-hex chars exits 1 cleanly."""
        result = _show_raw(repo, "sha256:notvalidhex")
        assert result.exit_code == 1
        # Output must not contain a Python traceback
        assert "Traceback" not in result.output
        assert "Traceback" not in (result.stderr or "")

    def test_IV5_ambiguous_prefix_returns_json_error(
        self, repo: pathlib.Path
    ) -> None:
        """IV5: When multiple commits match a prefix, return ambiguous_ref error."""
        env = _env(repo)
        # Create enough commits that there's guaranteed to be a short common prefix
        # We simulate this by checking the behavior — even a single commit should
        # handle a 1-char prefix that might match multiple commits gracefully.
        # The key invariant: ambiguous_ref must NOT return "commit_not_found".
        result = runner.invoke(
            cli,
            ["log", "--json", "-n", "1"],
            env=env,
        )
        assert result.exit_code == 0
        log_data = json.loads(result.output.strip())
        head_id = log_data["commits"][0]["commit_id"]
        # Use a 1-char hex prefix with sha256: prefix retained
        short_prefix = head_id[:len("sha256:") + 1]
        result2 = _show_raw(repo, short_prefix)
        # Either found (1 match) or ambiguous (>1 match) — must NOT crash
        assert result2.exit_code in (0, 1), (
            f"Unexpected exit code {result2.exit_code} for prefix {short_prefix!r}"
        )
        assert "Traceback" not in result2.output
        if result2.exit_code == 1:
            # Should produce JSON with either "commit_not_found" or "ambiguous_ref"
            json_line = next(
                (l for l in reversed(result2.output.strip().splitlines())
                 if l.startswith("{")),
                None,
            )
            if json_line:
                data = json.loads(json_line)
                assert data["error"] in ("commit_not_found", "ambiguous_ref"), (
                    f"Expected error key to be 'commit_not_found' or 'ambiguous_ref', "
                    f"got {data['error']!r}"
                )


# ---------------------------------------------------------------------------
# V  Structured delta
# ---------------------------------------------------------------------------


class TestStructuredDeltaV:
    def test_V1_structured_delta_present_on_second_commit(
        self, repo_with_two_commits: pathlib.Path
    ) -> None:
        """V1: structured_delta is non-null on a commit with a parent."""
        data = _show(repo_with_two_commits)
        assert data.get("structured_delta") is not None, (
            "structured_delta must be non-null on a commit with a parent"
        )

    def test_V2_structured_delta_null_on_initial_commit(
        self, repo: pathlib.Path
    ) -> None:
        """V2: structured_delta is null on the initial commit (no parent to diff)."""
        data = _show(repo)
        # initial commit has no parent — structured_delta should be null
        assert data["structured_delta"] is None, (
            f"Initial commit structured_delta must be null, got {data['structured_delta']!r}"
        )

    def test_V3_no_delta_omits_key(self, repo_with_two_commits: pathlib.Path) -> None:
        """V3: --no-delta removes the structured_delta key entirely."""
        result = runner.invoke(
            cli, ["read", "--json", "--no-delta"], env=_env(repo_with_two_commits)
        )
        assert result.exit_code == 0
        data = json.loads(result.output.strip())
        assert "structured_delta" not in data, (
            "structured_delta must not appear with --no-delta"
        )