"""Tests for the canonical ``muse read --json`` schema. ``muse read`` is how agents inspect individual commits — metadata, delta, and provenance in one shot. The JSON schema must be complete and stable. Schema (with --stat, default) ------------------------------ :: { "commit_id": "sha256:<64-hex>", "repo_id": str, "branch": str, "snapshot_id": str, "message": str, "committed_at": str, // ISO 8601 with timezone "parent_commit_id": str | null, "parent2_commit_id": str | null, "author": str, "metadata": dict, "structured_delta": dict | null, // absent with --no-delta "sem_ver_bump": str, // "none" | "patch" | "minor" | "major" "breaking_changes": [str, ...], "agent_id": str, // "" for human commits "model_id": str, // "" for human commits "toolchain_id": str, "prompt_hash": str, "signature": str, "signer_public_key": str, "signer_key_id": str, "reviewed_by": [str, ...], "test_runs": int, "files_added": [str, ...], // absent with --no-stat "files_removed": [str, ...], // absent with --no-stat "files_modified": [str, ...], // absent with --no-stat "total_changes": int // absent with --no-stat } Coverage -------- I Schema invariants I1 All required keys present (full provenance set) I2 commit_id is sha256:-prefixed I3 committed_at is ISO 8601 with timezone I4 sem_ver_bump is a valid enum value I5 breaking_changes is always a list I6 reviewed_by is always a list I7 test_runs is always an int II Agent provenance II1 agent_id populated from --agent-id flag II2 model_id populated from --model-id flag II3 agent_id is empty string (not null) for human commits II4 model_id is empty string (not null) for human commits II5 toolchain_id is a string (never null) III File stats III1 total_changes present with --stat (default) III2 total_changes = len(files_added)+len(files_modified)+len(files_removed) III3 total_changes absent with --no-stat III4 files_added/removed/modified absent with --no-stat IV Error handling (agent-friendly) IV1 Non-existent commit exits 1 cleanly (no traceback) IV2 --json + non-existent ref → stdout has JSON {"error": ...} IV3 JSON error has "error", "ref", "message" keys IV4 Invalid sha256: hex digits → same clean JSON error, exit 1 IV5 Ambiguous prefix → JSON error with "ambiguous_ref" error key V Structured delta V1 structured_delta present on non-initial commit V2 structured_delta is null on initial commit (no parent to diff against) V3 --no-delta omits structured_delta key entirely """ from __future__ import annotations from collections.abc import Mapping import json import pathlib import pytest from tests.cli_test_helper import CliRunner, InvokeResult from muse.core.types import long_id cli = None runner = CliRunner() _REQUIRED_KEYS = { # Identity "commit_id", "branch", "snapshot_id", # Content "message", "committed_at", "parent_commit_id", "parent2_commit_id", "author", "metadata", "structured_delta", # Semantic versioning "sem_ver_bump", "breaking_changes", # Agent provenance (all must be present, empty string for humans) "agent_id", "model_id", "toolchain_id", "prompt_hash", "signature", "signer_public_key", "signer_key_id", # CRDT annotation fields "reviewed_by", "test_runs", # File stat fields (present with default --stat) "files_added", "files_removed", "files_modified", "total_changes", } _VALID_SEM_VER_BUMPS = {"none", "patch", "minor", "major"} def _env(root: pathlib.Path) -> Mapping[str, str]: return {"MUSE_REPO_ROOT": str(root)} def _show(root: pathlib.Path, *flags: str) -> Mapping[str, object]: result = runner.invoke(cli, ["read", "--json"] + list(flags), env=_env(root)) assert result.exit_code == 0, f"show --json failed:\n{result.output}" return json.loads(result.output.strip()) def _show_raw(root: pathlib.Path, *args: str) -> InvokeResult: """Return the raw InvokeResult (not parsed) for error-path tests.""" return runner.invoke(cli, ["read", "--json"] + list(args), env=_env(root)) @pytest.fixture() def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path: """Code-domain repo with one committed file, no agent provenance.""" monkeypatch.chdir(tmp_path) env = _env(tmp_path) result = runner.invoke(cli, ["init", "--domain", "code"], env=env) assert result.exit_code == 0, result.output (tmp_path / "module.py").write_text("def greet():\n return 'hello'\n") runner.invoke(cli, ["code", "add", "module.py"], env=env) result = runner.invoke(cli, ["commit", "-m", "initial"], env=env) assert result.exit_code == 0, result.output return tmp_path @pytest.fixture() def repo_with_two_commits( repo: pathlib.Path, monkeypatch: pytest.MonkeyPatch, ) -> pathlib.Path: """Extends repo fixture with a second commit that modifies module.py.""" env = _env(repo) (repo / "module.py").write_text( "def greet():\n return 'hello'\n\ndef farewell():\n return 'bye'\n" ) runner.invoke(cli, ["code", "add", "module.py"], env=env) result = runner.invoke(cli, ["commit", "-m", "add farewell"], env=env) assert result.exit_code == 0, result.output return repo # --------------------------------------------------------------------------- # I Schema invariants # --------------------------------------------------------------------------- class TestSchemaInvariantsI: def test_I1_all_required_keys_present( self, repo_with_two_commits: pathlib.Path ) -> None: """I1: Every required key must be present in the default show --json output.""" data = _show(repo_with_two_commits) missing = _REQUIRED_KEYS - data.keys() assert not missing, f"Missing required keys in show --json: {missing}" def test_I2_commit_id_sha256_prefixed(self, repo: pathlib.Path) -> None: """I2: commit_id must start with 'sha256:'.""" data = _show(repo) assert data["commit_id"].startswith("sha256:"), ( f"commit_id must be sha256:-prefixed, got {data['commit_id']!r}" ) def test_I3_committed_at_is_iso8601_with_tz(self, repo: pathlib.Path) -> None: """I3: committed_at must parse as ISO 8601 with timezone info.""" import datetime data = _show(repo) dt = datetime.datetime.fromisoformat(data["committed_at"]) assert dt.tzinfo is not None, ( f"committed_at lacks timezone: {data['committed_at']!r}" ) def test_I4_sem_ver_bump_valid_enum(self, repo: pathlib.Path) -> None: """I4: sem_ver_bump must be one of the four valid values.""" data = _show(repo) assert data["sem_ver_bump"] in _VALID_SEM_VER_BUMPS, ( f"sem_ver_bump {data['sem_ver_bump']!r} not in {_VALID_SEM_VER_BUMPS}" ) def test_I5_breaking_changes_always_list(self, repo: pathlib.Path) -> None: """I5: breaking_changes is always a list (never null or absent).""" data = _show(repo) assert isinstance(data["breaking_changes"], list), ( f"breaking_changes must be list, got {type(data['breaking_changes'])}" ) def test_I6_reviewed_by_always_list(self, repo: pathlib.Path) -> None: """I6: reviewed_by is always a list (CRDT ORSet).""" data = _show(repo) assert isinstance(data["reviewed_by"], list), ( f"reviewed_by must be list, got {type(data['reviewed_by'])}" ) def test_I7_test_runs_always_int(self, repo: pathlib.Path) -> None: """I7: test_runs is always an int (CRDT GCounter).""" data = _show(repo) assert isinstance(data["test_runs"], int), ( f"test_runs must be int, got {type(data['test_runs'])}" ) # --------------------------------------------------------------------------- # II Agent provenance # --------------------------------------------------------------------------- class TestAgentProvenanceII: def test_II1_agent_id_populated_from_flag( self, repo: pathlib.Path ) -> None: """II1: --agent-id value appears in agent_id field.""" env = _env(repo) (repo / "helper.py").write_text("x = 1\n") runner.invoke(cli, ["code", "add", "helper.py"], env=env) runner.invoke( cli, ["commit", "-m", "agent commit", "--agent-id", "test-bot"], env=env, ) data = _show(repo) assert data["agent_id"] == "test-bot", ( f"Expected agent_id='test-bot', got {data['agent_id']!r}" ) def test_II2_model_id_populated_from_flag( self, repo: pathlib.Path ) -> None: """II2: --model-id value appears in model_id field.""" env = _env(repo) (repo / "helper2.py").write_text("y = 2\n") runner.invoke(cli, ["code", "add", "helper2.py"], env=env) runner.invoke( cli, ["commit", "-m", "model commit", "--model-id", "claude-opus-4"], env=env, ) data = _show(repo) assert data["model_id"] == "claude-opus-4", ( f"Expected model_id='claude-opus-4', got {data['model_id']!r}" ) def test_II3_agent_id_empty_string_for_human_commit( self, repo: pathlib.Path ) -> None: """II3: agent_id is empty string (not null) for human commits.""" data = _show(repo) assert data["agent_id"] == "", ( f"agent_id must be '' for human commit, got {data['agent_id']!r}" ) def test_II4_model_id_empty_string_for_human_commit( self, repo: pathlib.Path ) -> None: """II4: model_id is empty string (not null) for human commits.""" data = _show(repo) assert data["model_id"] == "", ( f"model_id must be '' for human commit, got {data['model_id']!r}" ) def test_II5_toolchain_id_is_string_not_null(self, repo: pathlib.Path) -> None: """II5: toolchain_id is always a string (empty for human commits).""" data = _show(repo) assert isinstance(data["toolchain_id"], str), ( f"toolchain_id must be str (never null), got {type(data['toolchain_id'])}" ) # --------------------------------------------------------------------------- # III File stats # --------------------------------------------------------------------------- class TestFileStatsIII: def test_III1_total_changes_present_by_default( self, repo_with_two_commits: pathlib.Path ) -> None: """III1: total_changes is present in default JSON output.""" data = _show(repo_with_two_commits) assert "total_changes" in data, ( f"total_changes missing from show --json output" ) def test_III2_total_changes_equals_sum_of_buckets( self, repo_with_two_commits: pathlib.Path ) -> None: """III2: total_changes = len(files_added) + len(files_modified) + len(files_removed).""" data = _show(repo_with_two_commits) expected = ( len(data["files_added"]) + len(data["files_modified"]) + len(data["files_removed"]) ) assert data["total_changes"] == expected, ( f"total_changes {data['total_changes']} != " f"len(added={data['files_added']}) + len(modified={data['files_modified']}) " f"+ len(removed={data['files_removed']}) = {expected}" ) def test_III3_total_changes_absent_with_no_stat( self, repo: pathlib.Path ) -> None: """III3: total_changes is absent when --no-stat is used.""" result = runner.invoke( cli, ["read", "--json", "--no-stat"], env=_env(repo) ) assert result.exit_code == 0 data = json.loads(result.output.strip()) assert "total_changes" not in data, ( "total_changes must not appear with --no-stat" ) def test_III4_file_buckets_absent_with_no_stat(self, repo: pathlib.Path) -> None: """III4: files_added/removed/modified absent with --no-stat.""" result = runner.invoke( cli, ["read", "--json", "--no-stat"], env=_env(repo) ) assert result.exit_code == 0 data = json.loads(result.output.strip()) assert "files_added" not in data assert "files_removed" not in data assert "files_modified" not in data # --------------------------------------------------------------------------- # IV Error handling # --------------------------------------------------------------------------- class TestErrorHandlingIV: def test_IV1_nonexistent_ref_exits_1(self, repo: pathlib.Path) -> None: """IV1: Non-existent commit ref exits 1 without traceback.""" result = _show_raw(repo, long_id("a" * 64)) assert result.exit_code == 1, ( f"Expected exit code 1 for nonexistent ref, got {result.exit_code}" ) def test_IV2_json_error_on_nonexistent_ref(self, repo: pathlib.Path) -> None: """IV2: --json with nonexistent ref emits JSON on stdout (not a crash).""" result = _show_raw(repo, long_id("a" * 64)) # Find the JSON line (stdout) — the ❌ text goes to stderr and may appear # interleaved in the combined output captured by CliRunner. json_line = next( (l for l in result.output.strip().splitlines() if l.startswith("{")), None, ) assert json_line is not None, ( f"No JSON line found in output for nonexistent ref: {result.output!r}" ) try: data = json.loads(json_line) except json.JSONDecodeError as exc: pytest.fail(f"JSON line is not valid JSON: {json_line!r} — {exc}") assert "error" in data def test_IV3_json_error_has_required_keys(self, repo: pathlib.Path) -> None: """IV3: JSON error payload has 'error', 'ref', 'message' keys.""" result = _show_raw(repo, long_id("b" * 64)) # Parse the last JSON-looking line json_line = next( (l for l in reversed(result.output.strip().splitlines()) if l.startswith("{")), None, ) assert json_line is not None, f"No JSON line in output: {result.output!r}" data = json.loads(json_line) assert "error" in data, f"'error' key missing from error JSON: {data}" assert "ref" in data, f"'ref' key missing from error JSON: {data}" assert "message" in data, f"'message' key missing from error JSON: {data}" def test_IV4_invalid_sha256_hex_exits_1(self, repo: pathlib.Path) -> None: """IV4: sha256: prefix with non-hex chars exits 1 cleanly.""" result = _show_raw(repo, "sha256:notvalidhex") assert result.exit_code == 1 # Output must not contain a Python traceback assert "Traceback" not in result.output assert "Traceback" not in (result.stderr or "") def test_IV5_ambiguous_prefix_returns_json_error( self, repo: pathlib.Path ) -> None: """IV5: When multiple commits match a prefix, return ambiguous_ref error.""" env = _env(repo) # Create enough commits that there's guaranteed to be a short common prefix # We simulate this by checking the behavior — even a single commit should # handle a 1-char prefix that might match multiple commits gracefully. # The key invariant: ambiguous_ref must NOT return "commit_not_found". result = runner.invoke( cli, ["log", "--json", "-n", "1"], env=env, ) assert result.exit_code == 0 log_data = json.loads(result.output.strip()) head_id = log_data["commits"][0]["commit_id"] # Use a 1-char hex prefix with sha256: prefix retained short_prefix = head_id[:len("sha256:") + 1] result2 = _show_raw(repo, short_prefix) # Either found (1 match) or ambiguous (>1 match) — must NOT crash assert result2.exit_code in (0, 1), ( f"Unexpected exit code {result2.exit_code} for prefix {short_prefix!r}" ) assert "Traceback" not in result2.output if result2.exit_code == 1: # Should produce JSON with either "commit_not_found" or "ambiguous_ref" json_line = next( (l for l in reversed(result2.output.strip().splitlines()) if l.startswith("{")), None, ) if json_line: data = json.loads(json_line) assert data["error"] in ("commit_not_found", "ambiguous_ref"), ( f"Expected error key to be 'commit_not_found' or 'ambiguous_ref', " f"got {data['error']!r}" ) # --------------------------------------------------------------------------- # V Structured delta # --------------------------------------------------------------------------- class TestStructuredDeltaV: def test_V1_structured_delta_present_on_second_commit( self, repo_with_two_commits: pathlib.Path ) -> None: """V1: structured_delta is non-null on a commit with a parent.""" data = _show(repo_with_two_commits) assert data.get("structured_delta") is not None, ( "structured_delta must be non-null on a commit with a parent" ) def test_V2_structured_delta_null_on_initial_commit( self, repo: pathlib.Path ) -> None: """V2: structured_delta is null on the initial commit (no parent to diff).""" data = _show(repo) # initial commit has no parent — structured_delta should be null assert data["structured_delta"] is None, ( f"Initial commit structured_delta must be null, got {data['structured_delta']!r}" ) def test_V3_no_delta_omits_key(self, repo_with_two_commits: pathlib.Path) -> None: """V3: --no-delta removes the structured_delta key entirely.""" result = runner.invoke( cli, ["read", "--json", "--no-delta"], env=_env(repo_with_two_commits) ) assert result.exit_code == 0 data = json.loads(result.output.strip()) assert "structured_delta" not in data, ( "structured_delta must not appear with --no-delta" )