"""Tests for the canonical ``muse commit --json`` schema. ``muse commit`` is the core write operation — every agent pipeline ends here. The JSON output must expose all provenance fields so downstream consumers (hub, orchestrators, other agents) never need a follow-up ``muse read`` just to discover what model produced a commit. Canonical schema (success path) --------------------------------- :: { "dry_run": false, "commit_id": "sha256:<64-hex>", "branch": str, "snapshot_id": str, "message": str, "parent_commit_id": str | null, "parent2_commit_id": str | null, "committed_at": str, // ISO 8601 with timezone "author": str, "agent_id": str, // "" for human commits "model_id": str, // "" for human commits "toolchain_id": str, "sem_ver_bump": str, // "none" | "patch" | "minor" | "major" "breaking_changes": [str, ...], "files_changed": { "added": int, "modified": int, "deleted": int, "total": int // added + modified + deleted } } Dry-run schema is identical except ``dry_run`` is ``true`` and ``clean`` may appear when the working tree has no changes. Coverage -------- I Schema invariants I1 All required keys present on a normal commit I2 commit_id is sha256:-prefixed I3 committed_at is ISO 8601 with timezone I4 sem_ver_bump is a valid enum value I5 breaking_changes is always a list I6 files_changed has added, modified, deleted, total keys I7 files_changed.total = added + modified + deleted II Agent provenance in commit output II1 agent_id populated from --agent-id flag II2 model_id populated from --model-id flag II3 toolchain_id populated from --toolchain-id flag II4 agent_id empty string (not null) for human commits II5 model_id empty string (not null) for human commits II6 model_id from MUSE_MODEL_ID env when flag absent II7 toolchain_id from MUSE_TOOLCHAIN_ID env when flag absent II8 --agent-id flag overrides MUSE_AGENT_ID env III Dry-run schema parity III1 dry_run schema has same required keys as success path (minus clean) III2 dry_run: true in dry-run output III3 dry_run: false in normal commit output III4 dry-run output has model_id and toolchain_id III5 dry-run clean tree exits 1 with clean=true JSON IV File change accounting IV1 Initial commit files_changed.added >= 1 IV2 Modification increments modified, not added IV3 Deletion increments deleted IV4 files_changed.total = added + modified + deleted always V Error paths (JSON mode) V1 Missing -m exits 1 with JSON {"error": "no_message", ...} V2 Empty workdir exits 1 with JSON {"error": "empty_workdir", ...} V3 Clean tree (no --dry-run) exits 0, no JSON output (text mode behaviour) """ from __future__ import annotations from collections.abc import Mapping import json import os import pathlib import pytest from tests.cli_test_helper import CliRunner, InvokeResult cli = None runner = CliRunner() _REQUIRED_KEYS = { "dry_run", "commit_id", "branch", "snapshot_id", "message", "parent_commit_id", "parent2_commit_id", "committed_at", "author", "agent_id", "model_id", "toolchain_id", "sem_ver_bump", "breaking_changes", "files_changed", } _FILES_CHANGED_KEYS = {"added", "modified", "deleted", "total"} _VALID_SEM_VER_BUMPS = {"none", "patch", "minor", "major"} def _env(root: pathlib.Path) -> Mapping[str, str]: return {"MUSE_REPO_ROOT": str(root)} def _commit(root: pathlib.Path, *flags: str, env: Mapping[str, str] | None = None) -> Mapping[str, object]: e = {**_env(root), **(env or {})} result = runner.invoke(cli, ["commit", "--json"] + list(flags), env=e) assert result.exit_code == 0, f"commit --json failed (exit {result.exit_code}):\n{result.output}" return json.loads(result.output.strip()) def _commit_raw(root: pathlib.Path, *args: str, env: Mapping[str, str] | None = None) -> InvokeResult: e = {**_env(root), **(env or {})} return runner.invoke(cli, ["commit", "--json"] + list(args), env=e) @pytest.fixture() def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path: """Code-domain repo initialised but with nothing committed yet.""" monkeypatch.chdir(tmp_path) env = _env(tmp_path) result = runner.invoke(cli, ["init", "--domain", "code"], env=env) assert result.exit_code == 0, result.output (tmp_path / "module.py").write_text("def greet():\n return 'hello'\n") runner.invoke(cli, ["code", "add", "module.py"], env=env) return tmp_path @pytest.fixture() def committed_repo( repo: pathlib.Path, monkeypatch: pytest.MonkeyPatch ) -> pathlib.Path: """Code-domain repo with one commit already made.""" env = _env(repo) result = runner.invoke(cli, ["commit", "-m", "initial"], env=env) assert result.exit_code == 0, result.output return repo # --------------------------------------------------------------------------- # I Schema invariants # --------------------------------------------------------------------------- class TestSchemaInvariantsI: def test_I1_all_required_keys_present(self, repo: pathlib.Path) -> None: """I1: Every required key must be present in commit --json output.""" data = _commit(repo, "-m", "initial commit") missing = _REQUIRED_KEYS - data.keys() assert not missing, f"Missing required keys in commit --json: {missing}" def test_I2_commit_id_sha256_prefixed(self, repo: pathlib.Path) -> None: """I2: commit_id must start with 'sha256:'.""" data = _commit(repo, "-m", "initial commit") assert data["commit_id"].startswith("sha256:"), ( f"commit_id must be sha256:-prefixed, got {data['commit_id']!r}" ) def test_I3_committed_at_is_iso8601_with_tz(self, repo: pathlib.Path) -> None: """I3: committed_at must parse as ISO 8601 with timezone info.""" import datetime data = _commit(repo, "-m", "initial") dt = datetime.datetime.fromisoformat(data["committed_at"]) assert dt.tzinfo is not None, ( f"committed_at lacks timezone: {data['committed_at']!r}" ) def test_I4_sem_ver_bump_valid_enum(self, repo: pathlib.Path) -> None: """I4: sem_ver_bump must be one of the four valid values.""" data = _commit(repo, "-m", "initial") assert data["sem_ver_bump"] in _VALID_SEM_VER_BUMPS, ( f"sem_ver_bump {data['sem_ver_bump']!r} not in {_VALID_SEM_VER_BUMPS}" ) def test_I5_breaking_changes_always_list(self, repo: pathlib.Path) -> None: """I5: breaking_changes is always a list (never null or absent).""" data = _commit(repo, "-m", "initial") assert isinstance(data["breaking_changes"], list), ( f"breaking_changes must be list, got {type(data['breaking_changes'])}" ) def test_I6_files_changed_has_all_keys(self, repo: pathlib.Path) -> None: """I6: files_changed must have added, modified, deleted, and total keys.""" data = _commit(repo, "-m", "initial") fc = data["files_changed"] missing = _FILES_CHANGED_KEYS - fc.keys() assert not missing, ( f"files_changed missing keys: {missing}. Got: {fc}" ) def test_I7_files_changed_total_is_sum(self, repo: pathlib.Path) -> None: """I7: files_changed.total = added + modified + deleted.""" data = _commit(repo, "-m", "initial") fc = data["files_changed"] expected = fc["added"] + fc["modified"] + fc["deleted"] assert fc["total"] == expected, ( f"files_changed.total {fc['total']} != " f"added({fc['added']}) + modified({fc['modified']}) + deleted({fc['deleted']}) = {expected}" ) # --------------------------------------------------------------------------- # II Agent provenance in commit output # --------------------------------------------------------------------------- class TestAgentProvenanceII: def test_II1_agent_id_in_output(self, repo: pathlib.Path) -> None: """II1: agent_id from --agent-id appears in JSON output.""" data = _commit(repo, "-m", "bot commit", "--agent-id", "test-bot") assert data["agent_id"] == "test-bot", ( f"Expected agent_id='test-bot', got {data['agent_id']!r}" ) def test_II2_model_id_in_output(self, repo: pathlib.Path) -> None: """II2: model_id from --model-id appears in JSON output.""" data = _commit(repo, "-m", "model commit", "--model-id", "claude-opus-4") assert data["model_id"] == "claude-opus-4", ( f"Expected model_id='claude-opus-4', got {data['model_id']!r}" ) def test_II3_toolchain_id_in_output(self, repo: pathlib.Path) -> None: """II3: toolchain_id from --toolchain-id appears in JSON output.""" data = _commit(repo, "-m", "tc commit", "--toolchain-id", "cursor-v2") assert data["toolchain_id"] == "cursor-v2", ( f"Expected toolchain_id='cursor-v2', got {data['toolchain_id']!r}" ) def test_II4_agent_id_empty_string_for_human(self, repo: pathlib.Path) -> None: """II4: agent_id is '' (not null) for human commits.""" data = _commit(repo, "-m", "human commit") assert data["agent_id"] == "", ( f"agent_id must be '' for human commit, got {data['agent_id']!r}" ) def test_II5_model_id_empty_string_for_human(self, repo: pathlib.Path) -> None: """II5: model_id is '' (not null) for human commits.""" data = _commit(repo, "-m", "human commit") assert data["model_id"] == "", ( f"model_id must be '' for human commit, got {data['model_id']!r}" ) def test_II6_model_id_from_env( self, repo: pathlib.Path, monkeypatch: pytest.MonkeyPatch ) -> None: """II6: model_id picked up from MUSE_MODEL_ID env when --model-id absent.""" env = {**_env(repo), "MUSE_MODEL_ID": "claude-haiku-4"} data = _commit(repo, "-m", "env model", env=env) assert data["model_id"] == "claude-haiku-4", ( f"Expected model_id='claude-haiku-4' from env, got {data['model_id']!r}" ) def test_II7_toolchain_id_from_env( self, repo: pathlib.Path, monkeypatch: pytest.MonkeyPatch ) -> None: """II7: toolchain_id from MUSE_TOOLCHAIN_ID when --toolchain-id absent.""" env = {**_env(repo), "MUSE_TOOLCHAIN_ID": "agentic-v3"} data = _commit(repo, "-m", "env tc", env=env) assert data["toolchain_id"] == "agentic-v3", ( f"Expected toolchain_id='agentic-v3' from env, got {data['toolchain_id']!r}" ) def test_II8_flag_overrides_env_for_agent_id( self, repo: pathlib.Path ) -> None: """II8: --agent-id flag takes priority over MUSE_AGENT_ID env.""" env = {**_env(repo), "MUSE_AGENT_ID": "env-bot"} data = _commit(repo, "-m", "override", "--agent-id", "flag-bot", env=env) assert data["agent_id"] == "flag-bot", ( f"Expected flag-bot to override env-bot, got {data['agent_id']!r}" ) # --------------------------------------------------------------------------- # III Dry-run schema parity # --------------------------------------------------------------------------- class TestDryRunSchemaIII: def test_III1_dry_run_has_same_required_keys(self, repo: pathlib.Path) -> None: """III1: dry-run output has the same required keys as the success path.""" result = _commit_raw(repo, "-m", "check", "--dry-run") assert result.exit_code == 0, f"dry-run failed:\n{result.output}" data = json.loads(result.output.strip()) missing = _REQUIRED_KEYS - data.keys() assert not missing, f"dry-run missing required keys: {missing}" def test_III2_dry_run_flag_is_true(self, repo: pathlib.Path) -> None: """III2: dry_run=true in dry-run output.""" result = _commit_raw(repo, "-m", "check", "--dry-run") assert result.exit_code == 0 data = json.loads(result.output.strip()) assert data["dry_run"] is True def test_III3_dry_run_false_on_real_commit(self, repo: pathlib.Path) -> None: """III3: dry_run=false in normal commit output.""" data = _commit(repo, "-m", "real commit") assert data["dry_run"] is False def test_III4_dry_run_has_model_id_and_toolchain_id( self, repo: pathlib.Path ) -> None: """III4: dry-run output includes model_id and toolchain_id.""" result = _commit_raw( repo, "-m", "preflight", "--dry-run", "--model-id", "claude-opus-4", "--toolchain-id", "cursor", ) assert result.exit_code == 0 data = json.loads(result.output.strip()) assert data["model_id"] == "claude-opus-4", ( f"model_id missing from dry-run output: {data}" ) assert data["toolchain_id"] == "cursor", ( f"toolchain_id missing from dry-run output: {data}" ) def test_III5_dry_run_clean_tree_exits_1( self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch ) -> None: """III5: dry-run on a clean tree exits 1 with clean=true in JSON. Uses its own repo to ensure a truly clean workdir (all files committed). muse init --domain code creates .museattributes/.museignore, so we commit everything once first to establish HEAD == workdir, then dry-run. """ monkeypatch.chdir(tmp_path) env = _env(tmp_path) runner.invoke(cli, ["init", "--domain", "code"], env=env) (tmp_path / "module.py").write_text("x = 1\n") # Commit everything so HEAD == workdir (includes init-created files) result = runner.invoke(cli, ["commit", "-m", "initial"], env=env) assert result.exit_code == 0, result.output # Now dry-run should detect nothing to commit result = _commit_raw(tmp_path, "-m", "nothing", "--dry-run", env=env) assert result.exit_code == 1, ( f"Expected exit 1 for dry-run on clean tree, got {result.exit_code}. " f"Output: {result.output}" ) data = json.loads(result.output.strip()) assert data.get("clean") is True, ( f"Expected clean=true in dry-run clean-tree JSON: {data}" ) assert data.get("dry_run") is True # --------------------------------------------------------------------------- # IV File change accounting # --------------------------------------------------------------------------- class TestFileChangeAccountingIV: def test_IV1_initial_commit_added_gte_1(self, repo: pathlib.Path) -> None: """IV1: Initial commit adds at least the tracked file.""" data = _commit(repo, "-m", "initial") assert data["files_changed"]["added"] >= 1, ( f"Initial commit should add >=1 file: {data['files_changed']}" ) def test_IV2_modification_increments_modified( self, committed_repo: pathlib.Path ) -> None: """IV2: Editing an existing file increments modified, not added.""" env = _env(committed_repo) (committed_repo / "module.py").write_text("def greet():\n return 'hi'\n") runner.invoke(cli, ["code", "add", "module.py"], env=env) data = _commit(committed_repo, "-m", "modify") assert data["files_changed"]["modified"] == 1 assert data["files_changed"]["added"] == 0 def test_IV3_deletion_increments_deleted( self, committed_repo: pathlib.Path ) -> None: """IV3: Removing a tracked file increments deleted. Uses a second file so deleting one doesn't leave an empty workdir (an empty manifest triggers "empty workdir" rather than a deletion). """ env = _env(committed_repo) # Add a second file so there's still something tracked after the deletion. (committed_repo / "extra.py").write_text("y = 2\n") runner.invoke(cli, ["code", "add", "extra.py"], env=env) runner.invoke(cli, ["commit", "-m", "add extra"], env=env) # Now delete extra.py — module.py remains, so workdir is non-empty. (committed_repo / "extra.py").unlink() runner.invoke(cli, ["code", "add", "extra.py"], env=env) data = _commit(committed_repo, "-m", "remove extra") assert data["files_changed"]["deleted"] == 1 assert data["files_changed"]["added"] == 0 def test_IV4_total_always_matches_sum( self, committed_repo: pathlib.Path ) -> None: """IV4: files_changed.total = added + modified + deleted, always.""" env = _env(committed_repo) (committed_repo / "new.py").write_text("x = 1\n") (committed_repo / "module.py").write_text("def greet():\n return 'hi'\n") runner.invoke(cli, ["code", "add", "new.py"], env=env) runner.invoke(cli, ["code", "add", "module.py"], env=env) data = _commit(committed_repo, "-m", "mixed") fc = data["files_changed"] expected = fc["added"] + fc["modified"] + fc["deleted"] assert fc["total"] == expected, ( f"total {fc['total']} != sum {expected}: {fc}" ) # --------------------------------------------------------------------------- # V Error paths # --------------------------------------------------------------------------- class TestErrorPathsV: def test_V1_missing_message_exits_1_with_json_error( self, repo: pathlib.Path ) -> None: """V1: Missing -m exits 1 with JSON error {"error": "no_message"}.""" result = _commit_raw(repo) # no -m assert result.exit_code == 1 json_line = next( (l for l in result.output.strip().splitlines() if l.startswith("{")), None, ) assert json_line is not None, f"No JSON in output: {result.output!r}" data = json.loads(json_line) assert data["error"] == "no_message", ( f"Expected error='no_message', got {data.get('error')!r}" ) def test_V2_clean_tree_json_response( self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch ) -> None: """V2: --json on a clean tree (no --dry-run) exits 0 with clean=true JSON. An agent using ``muse commit --json -m "msg"`` on a clean repo must get a machine-readable response — not a silent text-only "Nothing to commit". """ monkeypatch.chdir(tmp_path) env = _env(tmp_path) runner.invoke(cli, ["init", "--domain", "code"], env=env) (tmp_path / "module.py").write_text("x = 1\n") # Commit everything to establish HEAD == workdir result = runner.invoke(cli, ["commit", "-m", "initial"], env=env) assert result.exit_code == 0, result.output # Second commit on clean tree — must return JSON result = _commit_raw(tmp_path, "-m", "nothing", env=env) assert result.exit_code == 0 json_line = next( (l for l in result.output.strip().splitlines() if l.startswith("{")), None, ) assert json_line is not None, ( f"No JSON on stdout for clean-tree --json commit: {result.output!r}" ) data = json.loads(json_line) assert data.get("clean") is True, ( f"Expected clean=true in clean-tree commit JSON: {data}" )