"""Tests for the canonical ``muse commit --json`` schema.

``muse commit`` is the core write operation — every agent pipeline ends here.
The JSON output must expose all provenance fields so downstream consumers
(hub, orchestrators, other agents) never need a follow-up ``muse read`` just
to discover what model produced a commit.

Canonical schema (success path)
---------------------------------
::

    {
      "dry_run":            false,
      "commit_id":          "sha256:<64-hex>",
      "branch":             str,
      "snapshot_id":        str,
      "message":            str,
      "parent_commit_id":   str | null,
      "parent2_commit_id":  str | null,
      "committed_at":       str,          // ISO 8601 with timezone
      "author":             str,
      "agent_id":           str,          // "" for human commits
      "model_id":           str,          // "" for human commits
      "toolchain_id":       str,
      "sem_ver_bump":       str,          // "none" | "patch" | "minor" | "major"
      "breaking_changes":   [str, ...],
      "files_changed": {
        "added":    int,
        "modified": int,
        "deleted":  int,
        "total":    int                   // added + modified + deleted
      }
    }

Dry-run schema is identical except ``dry_run`` is ``true`` and ``clean`` may
appear when the working tree has no changes.

Coverage
--------
I   Schema invariants
    I1  All required keys present on a normal commit
    I2  commit_id is sha256:-prefixed
    I3  committed_at is ISO 8601 with timezone
    I4  sem_ver_bump is a valid enum value
    I5  breaking_changes is always a list
    I6  files_changed has added, modified, deleted, total keys
    I7  files_changed.total = added + modified + deleted

II  Agent provenance in commit output
    II1  agent_id populated from --agent-id flag
    II2  model_id populated from --model-id flag
    II3  toolchain_id populated from --toolchain-id flag
    II4  agent_id empty string (not null) for human commits
    II5  model_id empty string (not null) for human commits
    II6  model_id from MUSE_MODEL_ID env when flag absent
    II7  toolchain_id from MUSE_TOOLCHAIN_ID env when flag absent
    II8  --agent-id flag overrides MUSE_AGENT_ID env

III  Dry-run schema parity
    III1  dry_run schema has same required keys as success path (minus clean)
    III2  dry_run: true in dry-run output
    III3  dry_run: false in normal commit output
    III4  dry-run output has model_id and toolchain_id
    III5  dry-run clean tree exits 1 with clean=true JSON

IV  File change accounting
    IV1  Initial commit files_changed.added >= 1
    IV2  Modification increments modified, not added
    IV3  Deletion increments deleted
    IV4  files_changed.total = added + modified + deleted always

V   Error paths (JSON mode)
    V1  Missing -m exits 1 with JSON {"error": "no_message", ...}
    V2  Empty workdir exits 1 with JSON {"error": "empty_workdir", ...}
    V3  Clean tree (no --dry-run) exits 0, no JSON output (text mode behaviour)
"""

from __future__ import annotations
from collections.abc import Mapping

import json
import os
import pathlib

import pytest

from tests.cli_test_helper import CliRunner, InvokeResult

cli = None
runner = CliRunner()

_REQUIRED_KEYS = {
    "dry_run",
    "commit_id", "branch", "snapshot_id",
    "message", "parent_commit_id", "parent2_commit_id",
    "committed_at", "author",
    "agent_id", "model_id", "toolchain_id",
    "sem_ver_bump", "breaking_changes",
    "files_changed",
}

_FILES_CHANGED_KEYS = {"added", "modified", "deleted", "total"}
_VALID_SEM_VER_BUMPS = {"none", "patch", "minor", "major"}


def _env(root: pathlib.Path) -> Mapping[str, str]:
    return {"MUSE_REPO_ROOT": str(root)}


def _commit(root: pathlib.Path, *flags: str, env: Mapping[str, str] | None = None) -> Mapping[str, object]:
    e = {**_env(root), **(env or {})}
    result = runner.invoke(cli, ["commit", "--json"] + list(flags), env=e)
    assert result.exit_code == 0, f"commit --json failed (exit {result.exit_code}):\n{result.output}"
    return json.loads(result.output.strip())


def _commit_raw(root: pathlib.Path, *args: str, env: Mapping[str, str] | None = None) -> InvokeResult:
    e = {**_env(root), **(env or {})}
    return runner.invoke(cli, ["commit", "--json"] + list(args), env=e)


@pytest.fixture()
def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
    """Code-domain repo initialised but with nothing committed yet."""
    monkeypatch.chdir(tmp_path)
    env = _env(tmp_path)
    result = runner.invoke(cli, ["init", "--domain", "code"], env=env)
    assert result.exit_code == 0, result.output
    (tmp_path / "module.py").write_text("def greet():\n    return 'hello'\n")
    runner.invoke(cli, ["code", "add", "module.py"], env=env)
    return tmp_path


@pytest.fixture()
def committed_repo(
    repo: pathlib.Path, monkeypatch: pytest.MonkeyPatch
) -> pathlib.Path:
    """Code-domain repo with one commit already made."""
    env = _env(repo)
    result = runner.invoke(cli, ["commit", "-m", "initial"], env=env)
    assert result.exit_code == 0, result.output
    return repo


# ---------------------------------------------------------------------------
# I  Schema invariants
# ---------------------------------------------------------------------------


class TestSchemaInvariantsI:
    def test_I1_all_required_keys_present(self, repo: pathlib.Path) -> None:
        """I1: Every required key must be present in commit --json output."""
        data = _commit(repo, "-m", "initial commit")
        missing = _REQUIRED_KEYS - data.keys()
        assert not missing, f"Missing required keys in commit --json: {missing}"

    def test_I2_commit_id_sha256_prefixed(self, repo: pathlib.Path) -> None:
        """I2: commit_id must start with 'sha256:'."""
        data = _commit(repo, "-m", "initial commit")
        assert data["commit_id"].startswith("sha256:"), (
            f"commit_id must be sha256:-prefixed, got {data['commit_id']!r}"
        )

    def test_I3_committed_at_is_iso8601_with_tz(self, repo: pathlib.Path) -> None:
        """I3: committed_at must parse as ISO 8601 with timezone info."""
        import datetime
        data = _commit(repo, "-m", "initial")
        dt = datetime.datetime.fromisoformat(data["committed_at"])
        assert dt.tzinfo is not None, (
            f"committed_at lacks timezone: {data['committed_at']!r}"
        )

    def test_I4_sem_ver_bump_valid_enum(self, repo: pathlib.Path) -> None:
        """I4: sem_ver_bump must be one of the four valid values."""
        data = _commit(repo, "-m", "initial")
        assert data["sem_ver_bump"] in _VALID_SEM_VER_BUMPS, (
            f"sem_ver_bump {data['sem_ver_bump']!r} not in {_VALID_SEM_VER_BUMPS}"
        )

    def test_I5_breaking_changes_always_list(self, repo: pathlib.Path) -> None:
        """I5: breaking_changes is always a list (never null or absent)."""
        data = _commit(repo, "-m", "initial")
        assert isinstance(data["breaking_changes"], list), (
            f"breaking_changes must be list, got {type(data['breaking_changes'])}"
        )

    def test_I6_files_changed_has_all_keys(self, repo: pathlib.Path) -> None:
        """I6: files_changed must have added, modified, deleted, and total keys."""
        data = _commit(repo, "-m", "initial")
        fc = data["files_changed"]
        missing = _FILES_CHANGED_KEYS - fc.keys()
        assert not missing, (
            f"files_changed missing keys: {missing}. Got: {fc}"
        )

    def test_I7_files_changed_total_is_sum(self, repo: pathlib.Path) -> None:
        """I7: files_changed.total = added + modified + deleted."""
        data = _commit(repo, "-m", "initial")
        fc = data["files_changed"]
        expected = fc["added"] + fc["modified"] + fc["deleted"]
        assert fc["total"] == expected, (
            f"files_changed.total {fc['total']} != "
            f"added({fc['added']}) + modified({fc['modified']}) + deleted({fc['deleted']}) = {expected}"
        )


# ---------------------------------------------------------------------------
# II  Agent provenance in commit output
# ---------------------------------------------------------------------------


class TestAgentProvenanceII:
    def test_II1_agent_id_in_output(self, repo: pathlib.Path) -> None:
        """II1: agent_id from --agent-id appears in JSON output."""
        data = _commit(repo, "-m", "bot commit", "--agent-id", "test-bot")
        assert data["agent_id"] == "test-bot", (
            f"Expected agent_id='test-bot', got {data['agent_id']!r}"
        )

    def test_II2_model_id_in_output(self, repo: pathlib.Path) -> None:
        """II2: model_id from --model-id appears in JSON output."""
        data = _commit(repo, "-m", "model commit", "--model-id", "claude-opus-4")
        assert data["model_id"] == "claude-opus-4", (
            f"Expected model_id='claude-opus-4', got {data['model_id']!r}"
        )

    def test_II3_toolchain_id_in_output(self, repo: pathlib.Path) -> None:
        """II3: toolchain_id from --toolchain-id appears in JSON output."""
        data = _commit(repo, "-m", "tc commit", "--toolchain-id", "cursor-v2")
        assert data["toolchain_id"] == "cursor-v2", (
            f"Expected toolchain_id='cursor-v2', got {data['toolchain_id']!r}"
        )

    def test_II4_agent_id_empty_string_for_human(self, repo: pathlib.Path) -> None:
        """II4: agent_id is '' (not null) for human commits."""
        data = _commit(repo, "-m", "human commit")
        assert data["agent_id"] == "", (
            f"agent_id must be '' for human commit, got {data['agent_id']!r}"
        )

    def test_II5_model_id_empty_string_for_human(self, repo: pathlib.Path) -> None:
        """II5: model_id is '' (not null) for human commits."""
        data = _commit(repo, "-m", "human commit")
        assert data["model_id"] == "", (
            f"model_id must be '' for human commit, got {data['model_id']!r}"
        )

    def test_II6_model_id_from_env(
        self, repo: pathlib.Path, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """II6: model_id picked up from MUSE_MODEL_ID env when --model-id absent."""
        env = {**_env(repo), "MUSE_MODEL_ID": "claude-haiku-4"}
        data = _commit(repo, "-m", "env model", env=env)
        assert data["model_id"] == "claude-haiku-4", (
            f"Expected model_id='claude-haiku-4' from env, got {data['model_id']!r}"
        )

    def test_II7_toolchain_id_from_env(
        self, repo: pathlib.Path, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """II7: toolchain_id from MUSE_TOOLCHAIN_ID when --toolchain-id absent."""
        env = {**_env(repo), "MUSE_TOOLCHAIN_ID": "agentic-v3"}
        data = _commit(repo, "-m", "env tc", env=env)
        assert data["toolchain_id"] == "agentic-v3", (
            f"Expected toolchain_id='agentic-v3' from env, got {data['toolchain_id']!r}"
        )

    def test_II8_flag_overrides_env_for_agent_id(
        self, repo: pathlib.Path
    ) -> None:
        """II8: --agent-id flag takes priority over MUSE_AGENT_ID env."""
        env = {**_env(repo), "MUSE_AGENT_ID": "env-bot"}
        data = _commit(repo, "-m", "override", "--agent-id", "flag-bot", env=env)
        assert data["agent_id"] == "flag-bot", (
            f"Expected flag-bot to override env-bot, got {data['agent_id']!r}"
        )


# ---------------------------------------------------------------------------
# III  Dry-run schema parity
# ---------------------------------------------------------------------------


class TestDryRunSchemaIII:
    def test_III1_dry_run_has_same_required_keys(self, repo: pathlib.Path) -> None:
        """III1: dry-run output has the same required keys as the success path."""
        result = _commit_raw(repo, "-m", "check", "--dry-run")
        assert result.exit_code == 0, f"dry-run failed:\n{result.output}"
        data = json.loads(result.output.strip())
        missing = _REQUIRED_KEYS - data.keys()
        assert not missing, f"dry-run missing required keys: {missing}"

    def test_III2_dry_run_flag_is_true(self, repo: pathlib.Path) -> None:
        """III2: dry_run=true in dry-run output."""
        result = _commit_raw(repo, "-m", "check", "--dry-run")
        assert result.exit_code == 0
        data = json.loads(result.output.strip())
        assert data["dry_run"] is True

    def test_III3_dry_run_false_on_real_commit(self, repo: pathlib.Path) -> None:
        """III3: dry_run=false in normal commit output."""
        data = _commit(repo, "-m", "real commit")
        assert data["dry_run"] is False

    def test_III4_dry_run_has_model_id_and_toolchain_id(
        self, repo: pathlib.Path
    ) -> None:
        """III4: dry-run output includes model_id and toolchain_id."""
        result = _commit_raw(
            repo, "-m", "preflight",
            "--dry-run", "--model-id", "claude-opus-4", "--toolchain-id", "cursor",
        )
        assert result.exit_code == 0
        data = json.loads(result.output.strip())
        assert data["model_id"] == "claude-opus-4", (
            f"model_id missing from dry-run output: {data}"
        )
        assert data["toolchain_id"] == "cursor", (
            f"toolchain_id missing from dry-run output: {data}"
        )

    def test_III5_dry_run_clean_tree_exits_1(
        self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """III5: dry-run on a clean tree exits 1 with clean=true in JSON.

        Uses its own repo to ensure a truly clean workdir (all files committed).
        muse init --domain code creates .museattributes/.museignore, so we commit
        everything once first to establish HEAD == workdir, then dry-run.
        """
        monkeypatch.chdir(tmp_path)
        env = _env(tmp_path)
        runner.invoke(cli, ["init", "--domain", "code"], env=env)
        (tmp_path / "module.py").write_text("x = 1\n")
        # Commit everything so HEAD == workdir (includes init-created files)
        result = runner.invoke(cli, ["commit", "-m", "initial"], env=env)
        assert result.exit_code == 0, result.output
        # Now dry-run should detect nothing to commit
        result = _commit_raw(tmp_path, "-m", "nothing", "--dry-run", env=env)
        assert result.exit_code == 1, (
            f"Expected exit 1 for dry-run on clean tree, got {result.exit_code}. "
            f"Output: {result.output}"
        )
        data = json.loads(result.output.strip())
        assert data.get("clean") is True, (
            f"Expected clean=true in dry-run clean-tree JSON: {data}"
        )
        assert data.get("dry_run") is True


# ---------------------------------------------------------------------------
# IV  File change accounting
# ---------------------------------------------------------------------------


class TestFileChangeAccountingIV:
    def test_IV1_initial_commit_added_gte_1(self, repo: pathlib.Path) -> None:
        """IV1: Initial commit adds at least the tracked file."""
        data = _commit(repo, "-m", "initial")
        assert data["files_changed"]["added"] >= 1, (
            f"Initial commit should add >=1 file: {data['files_changed']}"
        )

    def test_IV2_modification_increments_modified(
        self, committed_repo: pathlib.Path
    ) -> None:
        """IV2: Editing an existing file increments modified, not added."""
        env = _env(committed_repo)
        (committed_repo / "module.py").write_text("def greet():\n    return 'hi'\n")
        runner.invoke(cli, ["code", "add", "module.py"], env=env)
        data = _commit(committed_repo, "-m", "modify")
        assert data["files_changed"]["modified"] == 1
        assert data["files_changed"]["added"] == 0

    def test_IV3_deletion_increments_deleted(
        self, committed_repo: pathlib.Path
    ) -> None:
        """IV3: Removing a tracked file increments deleted.

        Uses a second file so deleting one doesn't leave an empty workdir
        (an empty manifest triggers "empty workdir" rather than a deletion).
        """
        env = _env(committed_repo)
        # Add a second file so there's still something tracked after the deletion.
        (committed_repo / "extra.py").write_text("y = 2\n")
        runner.invoke(cli, ["code", "add", "extra.py"], env=env)
        runner.invoke(cli, ["commit", "-m", "add extra"], env=env)
        # Now delete extra.py — module.py remains, so workdir is non-empty.
        (committed_repo / "extra.py").unlink()
        runner.invoke(cli, ["code", "add", "extra.py"], env=env)
        data = _commit(committed_repo, "-m", "remove extra")
        assert data["files_changed"]["deleted"] == 1
        assert data["files_changed"]["added"] == 0

    def test_IV4_total_always_matches_sum(
        self, committed_repo: pathlib.Path
    ) -> None:
        """IV4: files_changed.total = added + modified + deleted, always."""
        env = _env(committed_repo)
        (committed_repo / "new.py").write_text("x = 1\n")
        (committed_repo / "module.py").write_text("def greet():\n    return 'hi'\n")
        runner.invoke(cli, ["code", "add", "new.py"], env=env)
        runner.invoke(cli, ["code", "add", "module.py"], env=env)
        data = _commit(committed_repo, "-m", "mixed")
        fc = data["files_changed"]
        expected = fc["added"] + fc["modified"] + fc["deleted"]
        assert fc["total"] == expected, (
            f"total {fc['total']} != sum {expected}: {fc}"
        )


# ---------------------------------------------------------------------------
# V  Error paths
# ---------------------------------------------------------------------------


class TestErrorPathsV:
    def test_V1_missing_message_exits_1_with_json_error(
        self, repo: pathlib.Path
    ) -> None:
        """V1: Missing -m exits 1 with JSON error {"error": "no_message"}."""
        result = _commit_raw(repo)  # no -m
        assert result.exit_code == 1
        json_line = next(
            (l for l in result.output.strip().splitlines() if l.startswith("{")),
            None,
        )
        assert json_line is not None, f"No JSON in output: {result.output!r}"
        data = json.loads(json_line)
        assert data["error"] == "no_message", (
            f"Expected error='no_message', got {data.get('error')!r}"
        )

    def test_V2_clean_tree_json_response(
        self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
    ) -> None:
        """V2: --json on a clean tree (no --dry-run) exits 0 with clean=true JSON.

        An agent using ``muse commit --json -m "msg"`` on a clean repo must get
        a machine-readable response — not a silent text-only "Nothing to commit".
        """
        monkeypatch.chdir(tmp_path)
        env = _env(tmp_path)
        runner.invoke(cli, ["init", "--domain", "code"], env=env)
        (tmp_path / "module.py").write_text("x = 1\n")
        # Commit everything to establish HEAD == workdir
        result = runner.invoke(cli, ["commit", "-m", "initial"], env=env)
        assert result.exit_code == 0, result.output
        # Second commit on clean tree — must return JSON
        result = _commit_raw(tmp_path, "-m", "nothing", env=env)
        assert result.exit_code == 0
        json_line = next(
            (l for l in result.output.strip().splitlines() if l.startswith("{")),
            None,
        )
        assert json_line is not None, (
            f"No JSON on stdout for clean-tree --json commit: {result.output!r}"
        )
        data = json.loads(json_line)
        assert data.get("clean") is True, (
            f"Expected clean=true in clean-tree commit JSON: {data}"
        )