"""Tests for the canonical ``muse diff --json`` schema. Muse is a symbol-aware VCS. Its diff engine works at the symbol level, not just the file level. The JSON output must expose that — otherwise agents lose the very information that makes Muse different from a file-hashing VCS. Canonical schema ---------------- :: { "from_ref": str, // "HEAD", branch, or commit id "to_ref": str, // "working tree", "staged", or commit id "from_commit_id": str | null, // sha256:-prefixed or null "to_commit_id": str | null, // sha256:-prefixed or null "has_changes": bool, "added": [str, ...], // file paths added "deleted": [str, ...], // file paths deleted "modified": [str, ...], // file paths modified in-place "renamed": {str: str}, // {old_path: new_path} "total_changes": int, // len(added)+len(modified)+len(deleted)+len(renamed) "symbols": { // per-file symbol-level changes "": { "added": [str, ...], // symbol names inserted "deleted": [str, ...], // symbol names deleted "modified": [str, ...] // symbol names replaced / patched } }, "sem_ver_bump": str, // "none" | "patch" | "minor" | "major" "breaking_changes": [str, ...] // addresses of breaking symbol changes } Coverage matrix --------------- I Schema invariants I1 All required keys present on clean repo (no changes) I2 All required keys present when changes exist I3 from_commit_id is sha256:-prefixed I4 has_changes=false when clean, true when dirty II File-level categorisation II1 Added file appears in added, not modified or deleted II2 Deleted file appears in deleted, not modified or added II3 Modified file appears in modified II4 total_changes = len(added) + len(modified) + len(deleted) + len(renamed) II5 Renamed file appears in renamed dict, NOT in modified or added/deleted III Symbol-level output (the Muse differentiator) III1 symbols dict present even when empty (clean diff → {}) III2 New function in a modified file appears in symbols[file].added III3 Deleted function in a modified file appears in symbols[file].deleted III4 File-only add (no symbols) does not appear in symbols (or appears with empty buckets) IV Semantic fields IV1 sem_ver_bump always present (at least "none") IV2 breaking_changes always present (at least []) IV3 sem_ver_bump reflects the bump level of the changes V Diff modes V1 --staged shows staged vs HEAD (to_ref == "staged") V2 --staged no_changes=false when staged changes exist V3 Default (no flag) shows working tree vs HEAD (to_ref == "working tree") V4 Commit-to-commit diff uses sha256:-prefixed to_commit_id """ from __future__ import annotations from collections.abc import Mapping import json import pathlib import pytest from tests.cli_test_helper import CliRunner cli = None runner = CliRunner() _REQUIRED_KEYS = { "from_ref", "to_ref", "from_commit_id", "to_commit_id", "has_changes", "added", "deleted", "modified", "renamed", "total_changes", "symbols", "sem_ver_bump", "breaking_changes", } _SYMBOL_BUCKET_KEYS = {"added", "deleted", "modified"} def _env(root: pathlib.Path) -> Mapping[str, str]: return {"MUSE_REPO_ROOT": str(root)} def _diff_json(root: pathlib.Path, *extra_args: str) -> Mapping[str, object]: result = runner.invoke(cli, ["diff", "--json"] + list(extra_args), env=_env(root)) assert result.exit_code == 0, f"diff --json failed: {result.output}" return json.loads(result.output.strip()) @pytest.fixture() def code_repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path: """Code-domain repo with one committed Python file.""" monkeypatch.chdir(tmp_path) result = runner.invoke(cli, ["init", "--domain", "code"], env=_env(tmp_path)) assert result.exit_code == 0, result.output (tmp_path / "module.py").write_text("def greet():\n return 'hello'\n") runner.invoke(cli, ["code", "add", "module.py"], env=_env(tmp_path)) result = runner.invoke(cli, ["commit", "-m", "initial"], env=_env(tmp_path)) assert result.exit_code == 0, result.output return tmp_path # --------------------------------------------------------------------------- # I Schema invariants # --------------------------------------------------------------------------- class TestSchemaInvariantsI: def test_I1_clean_repo_all_keys_present(self, code_repo: pathlib.Path) -> None: """I1: All required keys present even when there are no changes.""" data = _diff_json(code_repo) missing = _REQUIRED_KEYS - data.keys() assert not missing, f"Missing keys on clean diff: {missing}" def test_I2_dirty_repo_all_keys_present(self, code_repo: pathlib.Path) -> None: """I2: All required keys present when changes exist.""" (code_repo / "module.py").write_text( "def greet():\n return 'hello'\n\ndef farewell():\n return 'bye'\n" ) data = _diff_json(code_repo) missing = _REQUIRED_KEYS - data.keys() assert not missing, f"Missing keys on dirty diff: {missing}" def test_I3_from_commit_id_is_sha256_prefixed(self, code_repo: pathlib.Path) -> None: """I3: from_commit_id is sha256:-prefixed.""" data = _diff_json(code_repo) assert data["from_commit_id"] is not None assert data["from_commit_id"].startswith("sha256:"), ( f"from_commit_id must be sha256:-prefixed, got {data['from_commit_id']!r}" ) def test_I4_has_changes_reflects_dirty_state(self, code_repo: pathlib.Path) -> None: """I4: has_changes=false when nothing staged, true when staged changes exist. Uses --staged rather than the working-tree diff because muse init creates .museattributes/.museignore in the working tree without committing them, so the working-tree diff is never truly clean after init. The staged view is clean after a commit with nothing staged. """ assert _diff_json(code_repo, "--staged")["has_changes"] is False (code_repo / "module.py").write_text("def greet():\n return 'hi'\n") runner.invoke(cli, ["code", "add", "module.py"], env=_env(code_repo)) assert _diff_json(code_repo, "--staged")["has_changes"] is True # --------------------------------------------------------------------------- # II File-level categorisation # --------------------------------------------------------------------------- class TestFileLevelCategorizationII: def test_II1_added_file_in_added(self, code_repo: pathlib.Path) -> None: """II1: A newly staged file appears in added, not modified or deleted.""" (code_repo / "new.py").write_text("x = 1\n") runner.invoke(cli, ["code", "add", "new.py"], env=_env(code_repo)) data = _diff_json(code_repo, "--staged") assert "new.py" in data["added"], f"new.py not in added: {data}" assert "new.py" not in data["modified"] assert "new.py" not in data["deleted"] def test_II2_deleted_file_in_deleted(self, code_repo: pathlib.Path) -> None: """II2: A staged deletion appears in deleted, not modified or added.""" (code_repo / "module.py").unlink() runner.invoke(cli, ["code", "add", "module.py"], env=_env(code_repo)) data = _diff_json(code_repo, "--staged") assert "module.py" in data["deleted"], f"module.py not in deleted: {data}" assert "module.py" not in data["modified"] assert "module.py" not in data["added"] def test_II3_modified_file_in_modified(self, code_repo: pathlib.Path) -> None: """II3: An in-place edit appears in modified.""" (code_repo / "module.py").write_text("def greet():\n return 'hi'\n") data = _diff_json(code_repo) assert "module.py" in data["modified"], f"module.py not in modified: {data}" def test_II4_total_changes_formula(self, code_repo: pathlib.Path) -> None: """II4: total_changes = len(added) + len(modified) + len(deleted) + len(renamed).""" (code_repo / "module.py").write_text("def greet():\n return 'hi'\n") (code_repo / "extra.py").write_text("y = 2\n") runner.invoke(cli, ["code", "add", "extra.py"], env=_env(code_repo)) data = _diff_json(code_repo) expected = ( len(data["added"]) + len(data["modified"]) + len(data["deleted"]) + len(data["renamed"]) ) assert data["total_changes"] == expected, ( f"total_changes {data['total_changes']} != formula {expected}" ) def test_II5_renamed_file_in_renamed_not_modified(self, code_repo: pathlib.Path) -> None: """II5: A renamed file appears in renamed dict, not in modified or added/deleted.""" runner.invoke( cli, ["mv", "module.py", "utils.py"], env=_env(code_repo) ) data = _diff_json(code_repo, "--staged") assert "module.py" in data["renamed"], ( f"module.py not a rename source. renamed={data['renamed']}, " f"modified={data['modified']}, added={data['added']}, deleted={data['deleted']}" ) assert data["renamed"]["module.py"] == "utils.py", ( f"Expected renamed['module.py']='utils.py', got {data['renamed']}" ) assert "utils.py" not in data["added"], "rename target must not appear in added" assert "module.py" not in data["deleted"], "rename source must not appear in deleted" assert "module.py" not in data["modified"], "rename source must not appear in modified" # --------------------------------------------------------------------------- # III Symbol-level output # --------------------------------------------------------------------------- class TestSymbolLevelOutputIII: def test_III1_symbols_always_present(self, code_repo: pathlib.Path) -> None: """III1: symbols dict is always present, even on a clean diff.""" data = _diff_json(code_repo) assert "symbols" in data assert isinstance(data["symbols"], dict) assert data["symbols"] == {} def test_III2_new_function_in_symbols_added(self, code_repo: pathlib.Path) -> None: """III2: Adding a new function appears in symbols[file].added.""" (code_repo / "module.py").write_text( "def greet():\n return 'hello'\n\ndef farewell():\n return 'bye'\n" ) data = _diff_json(code_repo) assert "module.py" in data["symbols"], ( f"module.py not in symbols: {data['symbols']}" ) sym = data["symbols"]["module.py"] assert _SYMBOL_BUCKET_KEYS == set(sym.keys()), ( f"Symbol bucket has wrong keys: {sym.keys()}" ) assert "farewell" in sym["added"], ( f"Expected 'farewell' in symbols.module.py.added, got {sym['added']}" ) def test_III3_deleted_function_in_symbols_deleted(self, code_repo: pathlib.Path) -> None: """III3: Removing a function appears in symbols[file].deleted.""" # First add a second function (code_repo / "module.py").write_text( "def greet():\n return 'hello'\n\ndef farewell():\n return 'bye'\n" ) runner.invoke(cli, ["code", "add", "module.py"], env=_env(code_repo)) runner.invoke(cli, ["commit", "-m", "add farewell"], env=_env(code_repo)) # Now delete it (code_repo / "module.py").write_text("def greet():\n return 'hello'\n") data = _diff_json(code_repo) assert "module.py" in data["symbols"] sym = data["symbols"]["module.py"] assert "farewell" in sym["deleted"], ( f"Expected 'farewell' in symbols.module.py.deleted, got {sym['deleted']}" ) def test_III4_added_file_symbols_in_symbols_or_omitted( self, code_repo: pathlib.Path ) -> None: """III4: Newly added file's symbols appear in symbols[file].added or file omitted.""" (code_repo / "fresh.py").write_text("def new_func():\n pass\n") runner.invoke(cli, ["code", "add", "fresh.py"], env=_env(code_repo)) data = _diff_json(code_repo, "--staged") assert "fresh.py" in data["added"] # If symbols present for the new file, all symbols should be in added if "fresh.py" in data["symbols"]: assert "new_func" in data["symbols"]["fresh.py"]["added"], ( f"Expected new_func in symbols for new file: {data['symbols']['fresh.py']}" ) # --------------------------------------------------------------------------- # IV Semantic fields # --------------------------------------------------------------------------- class TestSemanticFieldsIV: def test_IV1_sem_ver_bump_always_present(self, code_repo: pathlib.Path) -> None: """IV1: sem_ver_bump always present, at least 'none'.""" data = _diff_json(code_repo) assert "sem_ver_bump" in data assert isinstance(data["sem_ver_bump"], str) assert data["sem_ver_bump"] == "none" # clean repo def test_IV2_breaking_changes_always_present(self, code_repo: pathlib.Path) -> None: """IV2: breaking_changes always present, at least [].""" data = _diff_json(code_repo) assert "breaking_changes" in data assert isinstance(data["breaking_changes"], list) def test_IV3_sem_ver_bump_reflects_changes(self, code_repo: pathlib.Path) -> None: """IV3: sem_ver_bump is 'none' when clean, non-'none' when changes exist.""" # Clean → "none" assert _diff_json(code_repo)["sem_ver_bump"] == "none" # Any change should produce a non-"none" bump (code_repo / "module.py").write_text( "def greet():\n return 'hello'\n\ndef farewell():\n return 'bye'\n" ) data = _diff_json(code_repo) assert data["sem_ver_bump"] != "none", ( f"Expected non-none sem_ver_bump for dirty diff, got {data['sem_ver_bump']!r}" ) # --------------------------------------------------------------------------- # V Diff modes # --------------------------------------------------------------------------- class TestDiffModesV: def test_V1_staged_flag_sets_to_ref(self, code_repo: pathlib.Path) -> None: """V1: --staged sets to_ref to 'staged'.""" data = _diff_json(code_repo, "--staged") assert data["to_ref"] == "staged", ( f"Expected to_ref='staged', got {data['to_ref']!r}" ) def test_V2_staged_flag_shows_staged_changes(self, code_repo: pathlib.Path) -> None: """V2: --staged shows staged changes as has_changes=true.""" (code_repo / "module.py").write_text("def greet():\n return 'hi'\n") runner.invoke(cli, ["code", "add", "module.py"], env=_env(code_repo)) assert _diff_json(code_repo, "--staged")["has_changes"] is True def test_V3_default_shows_working_tree(self, code_repo: pathlib.Path) -> None: """V3: Default diff (no flags) uses to_ref='working tree'.""" data = _diff_json(code_repo) assert data["to_ref"] == "working tree", ( f"Expected to_ref='working tree', got {data['to_ref']!r}" ) def test_V4_commit_to_commit_diff_has_sha256_to_commit_id( self, code_repo: pathlib.Path ) -> None: """V4: Commit-to-commit diff populates to_commit_id with sha256:-prefixed ID.""" import json as _json log_out = runner.invoke(cli, ["log", "--json", "-n", "1"], env=_env(code_repo)) head_id = _json.loads(log_out.output)["commits"][0]["commit_id"] data = _diff_json(code_repo, head_id, head_id) assert data["to_commit_id"] is not None assert data["to_commit_id"].startswith("sha256:"), ( f"to_commit_id must be sha256:-prefixed, got {data['to_commit_id']!r}" )