test_diff_json_schema.py
python
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
6 days ago
| 1 | """Tests for the canonical ``muse diff --json`` schema. |
| 2 | |
| 3 | Muse is a symbol-aware VCS. Its diff engine works at the symbol level, not just |
| 4 | the file level. The JSON output must expose that — otherwise agents lose the very |
| 5 | information that makes Muse different from a file-hashing VCS. |
| 6 | |
| 7 | Canonical schema |
| 8 | ---------------- |
| 9 | :: |
| 10 | |
| 11 | { |
| 12 | "from_ref": str, // "HEAD", branch, or commit id |
| 13 | "to_ref": str, // "working tree", "staged", or commit id |
| 14 | "from_commit_id": str | null, // sha256:-prefixed or null |
| 15 | "to_commit_id": str | null, // sha256:-prefixed or null |
| 16 | "has_changes": bool, |
| 17 | "added": [str, ...], // file paths added |
| 18 | "deleted": [str, ...], // file paths deleted |
| 19 | "modified": [str, ...], // file paths modified in-place |
| 20 | "renamed": {str: str}, // {old_path: new_path} |
| 21 | "total_changes": int, // len(added)+len(modified)+len(deleted)+len(renamed) |
| 22 | "symbols": { // per-file symbol-level changes |
| 23 | "<file_path>": { |
| 24 | "added": [str, ...], // symbol names inserted |
| 25 | "deleted": [str, ...], // symbol names deleted |
| 26 | "modified": [str, ...] // symbol names replaced / patched |
| 27 | } |
| 28 | }, |
| 29 | "sem_ver_bump": str, // "none" | "patch" | "minor" | "major" |
| 30 | "breaking_changes": [str, ...] // addresses of breaking symbol changes |
| 31 | } |
| 32 | |
| 33 | Coverage matrix |
| 34 | --------------- |
| 35 | I Schema invariants |
| 36 | I1 All required keys present on clean repo (no changes) |
| 37 | I2 All required keys present when changes exist |
| 38 | I3 from_commit_id is sha256:-prefixed |
| 39 | I4 has_changes=false when clean, true when dirty |
| 40 | |
| 41 | II File-level categorisation |
| 42 | II1 Added file appears in added, not modified or deleted |
| 43 | II2 Deleted file appears in deleted, not modified or added |
| 44 | II3 Modified file appears in modified |
| 45 | II4 total_changes = len(added) + len(modified) + len(deleted) + len(renamed) |
| 46 | II5 Renamed file appears in renamed dict, NOT in modified or added/deleted |
| 47 | |
| 48 | III Symbol-level output (the Muse differentiator) |
| 49 | III1 symbols dict present even when empty (clean diff → {}) |
| 50 | III2 New function in a modified file appears in symbols[file].added |
| 51 | III3 Deleted function in a modified file appears in symbols[file].deleted |
| 52 | III4 File-only add (no symbols) does not appear in symbols (or appears with empty buckets) |
| 53 | |
| 54 | IV Semantic fields |
| 55 | IV1 sem_ver_bump always present (at least "none") |
| 56 | IV2 breaking_changes always present (at least []) |
| 57 | IV3 sem_ver_bump reflects the bump level of the changes |
| 58 | |
| 59 | V Diff modes |
| 60 | V1 --staged shows staged vs HEAD (to_ref == "staged") |
| 61 | V2 --staged no_changes=false when staged changes exist |
| 62 | V3 Default (no flag) shows working tree vs HEAD (to_ref == "working tree") |
| 63 | V4 Commit-to-commit diff uses sha256:-prefixed to_commit_id |
| 64 | """ |
| 65 | |
| 66 | from __future__ import annotations |
| 67 | from collections.abc import Mapping |
| 68 | |
| 69 | import json |
| 70 | import pathlib |
| 71 | |
| 72 | import pytest |
| 73 | |
| 74 | from tests.cli_test_helper import CliRunner |
| 75 | |
| 76 | cli = None |
| 77 | runner = CliRunner() |
| 78 | |
| 79 | _REQUIRED_KEYS = { |
| 80 | "from_ref", "to_ref", "from_commit_id", "to_commit_id", |
| 81 | "has_changes", |
| 82 | "added", "deleted", "modified", "renamed", |
| 83 | "total_changes", |
| 84 | "symbols", |
| 85 | "sem_ver_bump", "breaking_changes", |
| 86 | } |
| 87 | |
| 88 | _SYMBOL_BUCKET_KEYS = {"added", "deleted", "modified"} |
| 89 | |
| 90 | |
| 91 | def _env(root: pathlib.Path) -> Mapping[str, str]: |
| 92 | return {"MUSE_REPO_ROOT": str(root)} |
| 93 | |
| 94 | |
| 95 | def _diff_json(root: pathlib.Path, *extra_args: str) -> Mapping[str, object]: |
| 96 | result = runner.invoke(cli, ["diff", "--json"] + list(extra_args), env=_env(root)) |
| 97 | assert result.exit_code == 0, f"diff --json failed: {result.output}" |
| 98 | return json.loads(result.output.strip()) |
| 99 | |
| 100 | |
| 101 | @pytest.fixture() |
| 102 | def code_repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path: |
| 103 | """Code-domain repo with one committed Python file.""" |
| 104 | monkeypatch.chdir(tmp_path) |
| 105 | result = runner.invoke(cli, ["init", "--domain", "code"], env=_env(tmp_path)) |
| 106 | assert result.exit_code == 0, result.output |
| 107 | (tmp_path / "module.py").write_text("def greet():\n return 'hello'\n") |
| 108 | runner.invoke(cli, ["code", "add", "module.py"], env=_env(tmp_path)) |
| 109 | result = runner.invoke(cli, ["commit", "-m", "initial"], env=_env(tmp_path)) |
| 110 | assert result.exit_code == 0, result.output |
| 111 | return tmp_path |
| 112 | |
| 113 | |
| 114 | # --------------------------------------------------------------------------- |
| 115 | # I Schema invariants |
| 116 | # --------------------------------------------------------------------------- |
| 117 | |
| 118 | |
| 119 | class TestSchemaInvariantsI: |
| 120 | def test_I1_clean_repo_all_keys_present(self, code_repo: pathlib.Path) -> None: |
| 121 | """I1: All required keys present even when there are no changes.""" |
| 122 | data = _diff_json(code_repo) |
| 123 | missing = _REQUIRED_KEYS - data.keys() |
| 124 | assert not missing, f"Missing keys on clean diff: {missing}" |
| 125 | |
| 126 | def test_I2_dirty_repo_all_keys_present(self, code_repo: pathlib.Path) -> None: |
| 127 | """I2: All required keys present when changes exist.""" |
| 128 | (code_repo / "module.py").write_text( |
| 129 | "def greet():\n return 'hello'\n\ndef farewell():\n return 'bye'\n" |
| 130 | ) |
| 131 | data = _diff_json(code_repo) |
| 132 | missing = _REQUIRED_KEYS - data.keys() |
| 133 | assert not missing, f"Missing keys on dirty diff: {missing}" |
| 134 | |
| 135 | def test_I3_from_commit_id_is_sha256_prefixed(self, code_repo: pathlib.Path) -> None: |
| 136 | """I3: from_commit_id is sha256:-prefixed.""" |
| 137 | data = _diff_json(code_repo) |
| 138 | assert data["from_commit_id"] is not None |
| 139 | assert data["from_commit_id"].startswith("sha256:"), ( |
| 140 | f"from_commit_id must be sha256:-prefixed, got {data['from_commit_id']!r}" |
| 141 | ) |
| 142 | |
| 143 | def test_I4_has_changes_reflects_dirty_state(self, code_repo: pathlib.Path) -> None: |
| 144 | """I4: has_changes=false when nothing staged, true when staged changes exist. |
| 145 | |
| 146 | Uses --staged rather than the working-tree diff because muse init |
| 147 | creates .museattributes/.museignore in the working tree without |
| 148 | committing them, so the working-tree diff is never truly clean after |
| 149 | init. The staged view is clean after a commit with nothing staged. |
| 150 | """ |
| 151 | assert _diff_json(code_repo, "--staged")["has_changes"] is False |
| 152 | (code_repo / "module.py").write_text("def greet():\n return 'hi'\n") |
| 153 | runner.invoke(cli, ["code", "add", "module.py"], env=_env(code_repo)) |
| 154 | assert _diff_json(code_repo, "--staged")["has_changes"] is True |
| 155 | |
| 156 | |
| 157 | # --------------------------------------------------------------------------- |
| 158 | # II File-level categorisation |
| 159 | # --------------------------------------------------------------------------- |
| 160 | |
| 161 | |
| 162 | class TestFileLevelCategorizationII: |
| 163 | def test_II1_added_file_in_added(self, code_repo: pathlib.Path) -> None: |
| 164 | """II1: A newly staged file appears in added, not modified or deleted.""" |
| 165 | (code_repo / "new.py").write_text("x = 1\n") |
| 166 | runner.invoke(cli, ["code", "add", "new.py"], env=_env(code_repo)) |
| 167 | |
| 168 | data = _diff_json(code_repo, "--staged") |
| 169 | assert "new.py" in data["added"], f"new.py not in added: {data}" |
| 170 | assert "new.py" not in data["modified"] |
| 171 | assert "new.py" not in data["deleted"] |
| 172 | |
| 173 | def test_II2_deleted_file_in_deleted(self, code_repo: pathlib.Path) -> None: |
| 174 | """II2: A staged deletion appears in deleted, not modified or added.""" |
| 175 | (code_repo / "module.py").unlink() |
| 176 | runner.invoke(cli, ["code", "add", "module.py"], env=_env(code_repo)) |
| 177 | |
| 178 | data = _diff_json(code_repo, "--staged") |
| 179 | assert "module.py" in data["deleted"], f"module.py not in deleted: {data}" |
| 180 | assert "module.py" not in data["modified"] |
| 181 | assert "module.py" not in data["added"] |
| 182 | |
| 183 | def test_II3_modified_file_in_modified(self, code_repo: pathlib.Path) -> None: |
| 184 | """II3: An in-place edit appears in modified.""" |
| 185 | (code_repo / "module.py").write_text("def greet():\n return 'hi'\n") |
| 186 | data = _diff_json(code_repo) |
| 187 | assert "module.py" in data["modified"], f"module.py not in modified: {data}" |
| 188 | |
| 189 | def test_II4_total_changes_formula(self, code_repo: pathlib.Path) -> None: |
| 190 | """II4: total_changes = len(added) + len(modified) + len(deleted) + len(renamed).""" |
| 191 | (code_repo / "module.py").write_text("def greet():\n return 'hi'\n") |
| 192 | (code_repo / "extra.py").write_text("y = 2\n") |
| 193 | runner.invoke(cli, ["code", "add", "extra.py"], env=_env(code_repo)) |
| 194 | |
| 195 | data = _diff_json(code_repo) |
| 196 | expected = ( |
| 197 | len(data["added"]) + len(data["modified"]) |
| 198 | + len(data["deleted"]) + len(data["renamed"]) |
| 199 | ) |
| 200 | assert data["total_changes"] == expected, ( |
| 201 | f"total_changes {data['total_changes']} != formula {expected}" |
| 202 | ) |
| 203 | |
| 204 | def test_II5_renamed_file_in_renamed_not_modified(self, code_repo: pathlib.Path) -> None: |
| 205 | """II5: A renamed file appears in renamed dict, not in modified or added/deleted.""" |
| 206 | runner.invoke( |
| 207 | cli, ["mv", "module.py", "utils.py"], env=_env(code_repo) |
| 208 | ) |
| 209 | |
| 210 | data = _diff_json(code_repo, "--staged") |
| 211 | assert "module.py" in data["renamed"], ( |
| 212 | f"module.py not a rename source. renamed={data['renamed']}, " |
| 213 | f"modified={data['modified']}, added={data['added']}, deleted={data['deleted']}" |
| 214 | ) |
| 215 | assert data["renamed"]["module.py"] == "utils.py", ( |
| 216 | f"Expected renamed['module.py']='utils.py', got {data['renamed']}" |
| 217 | ) |
| 218 | assert "utils.py" not in data["added"], "rename target must not appear in added" |
| 219 | assert "module.py" not in data["deleted"], "rename source must not appear in deleted" |
| 220 | assert "module.py" not in data["modified"], "rename source must not appear in modified" |
| 221 | |
| 222 | |
| 223 | # --------------------------------------------------------------------------- |
| 224 | # III Symbol-level output |
| 225 | # --------------------------------------------------------------------------- |
| 226 | |
| 227 | |
| 228 | class TestSymbolLevelOutputIII: |
| 229 | def test_III1_symbols_always_present(self, code_repo: pathlib.Path) -> None: |
| 230 | """III1: symbols dict is always present, even on a clean diff.""" |
| 231 | data = _diff_json(code_repo) |
| 232 | assert "symbols" in data |
| 233 | assert isinstance(data["symbols"], dict) |
| 234 | assert data["symbols"] == {} |
| 235 | |
| 236 | def test_III2_new_function_in_symbols_added(self, code_repo: pathlib.Path) -> None: |
| 237 | """III2: Adding a new function appears in symbols[file].added.""" |
| 238 | (code_repo / "module.py").write_text( |
| 239 | "def greet():\n return 'hello'\n\ndef farewell():\n return 'bye'\n" |
| 240 | ) |
| 241 | data = _diff_json(code_repo) |
| 242 | |
| 243 | assert "module.py" in data["symbols"], ( |
| 244 | f"module.py not in symbols: {data['symbols']}" |
| 245 | ) |
| 246 | sym = data["symbols"]["module.py"] |
| 247 | assert _SYMBOL_BUCKET_KEYS == set(sym.keys()), ( |
| 248 | f"Symbol bucket has wrong keys: {sym.keys()}" |
| 249 | ) |
| 250 | assert "farewell" in sym["added"], ( |
| 251 | f"Expected 'farewell' in symbols.module.py.added, got {sym['added']}" |
| 252 | ) |
| 253 | |
| 254 | def test_III3_deleted_function_in_symbols_deleted(self, code_repo: pathlib.Path) -> None: |
| 255 | """III3: Removing a function appears in symbols[file].deleted.""" |
| 256 | # First add a second function |
| 257 | (code_repo / "module.py").write_text( |
| 258 | "def greet():\n return 'hello'\n\ndef farewell():\n return 'bye'\n" |
| 259 | ) |
| 260 | runner.invoke(cli, ["code", "add", "module.py"], env=_env(code_repo)) |
| 261 | runner.invoke(cli, ["commit", "-m", "add farewell"], env=_env(code_repo)) |
| 262 | |
| 263 | # Now delete it |
| 264 | (code_repo / "module.py").write_text("def greet():\n return 'hello'\n") |
| 265 | data = _diff_json(code_repo) |
| 266 | |
| 267 | assert "module.py" in data["symbols"] |
| 268 | sym = data["symbols"]["module.py"] |
| 269 | assert "farewell" in sym["deleted"], ( |
| 270 | f"Expected 'farewell' in symbols.module.py.deleted, got {sym['deleted']}" |
| 271 | ) |
| 272 | |
| 273 | def test_III4_added_file_symbols_in_symbols_or_omitted( |
| 274 | self, code_repo: pathlib.Path |
| 275 | ) -> None: |
| 276 | """III4: Newly added file's symbols appear in symbols[file].added or file omitted.""" |
| 277 | (code_repo / "fresh.py").write_text("def new_func():\n pass\n") |
| 278 | runner.invoke(cli, ["code", "add", "fresh.py"], env=_env(code_repo)) |
| 279 | |
| 280 | data = _diff_json(code_repo, "--staged") |
| 281 | assert "fresh.py" in data["added"] |
| 282 | # If symbols present for the new file, all symbols should be in added |
| 283 | if "fresh.py" in data["symbols"]: |
| 284 | assert "new_func" in data["symbols"]["fresh.py"]["added"], ( |
| 285 | f"Expected new_func in symbols for new file: {data['symbols']['fresh.py']}" |
| 286 | ) |
| 287 | |
| 288 | |
| 289 | # --------------------------------------------------------------------------- |
| 290 | # IV Semantic fields |
| 291 | # --------------------------------------------------------------------------- |
| 292 | |
| 293 | |
| 294 | class TestSemanticFieldsIV: |
| 295 | def test_IV1_sem_ver_bump_always_present(self, code_repo: pathlib.Path) -> None: |
| 296 | """IV1: sem_ver_bump always present, at least 'none'.""" |
| 297 | data = _diff_json(code_repo) |
| 298 | assert "sem_ver_bump" in data |
| 299 | assert isinstance(data["sem_ver_bump"], str) |
| 300 | assert data["sem_ver_bump"] == "none" # clean repo |
| 301 | |
| 302 | def test_IV2_breaking_changes_always_present(self, code_repo: pathlib.Path) -> None: |
| 303 | """IV2: breaking_changes always present, at least [].""" |
| 304 | data = _diff_json(code_repo) |
| 305 | assert "breaking_changes" in data |
| 306 | assert isinstance(data["breaking_changes"], list) |
| 307 | |
| 308 | def test_IV3_sem_ver_bump_reflects_changes(self, code_repo: pathlib.Path) -> None: |
| 309 | """IV3: sem_ver_bump is 'none' when clean, non-'none' when changes exist.""" |
| 310 | # Clean → "none" |
| 311 | assert _diff_json(code_repo)["sem_ver_bump"] == "none" |
| 312 | |
| 313 | # Any change should produce a non-"none" bump |
| 314 | (code_repo / "module.py").write_text( |
| 315 | "def greet():\n return 'hello'\n\ndef farewell():\n return 'bye'\n" |
| 316 | ) |
| 317 | data = _diff_json(code_repo) |
| 318 | assert data["sem_ver_bump"] != "none", ( |
| 319 | f"Expected non-none sem_ver_bump for dirty diff, got {data['sem_ver_bump']!r}" |
| 320 | ) |
| 321 | |
| 322 | |
| 323 | # --------------------------------------------------------------------------- |
| 324 | # V Diff modes |
| 325 | # --------------------------------------------------------------------------- |
| 326 | |
| 327 | |
| 328 | class TestDiffModesV: |
| 329 | def test_V1_staged_flag_sets_to_ref(self, code_repo: pathlib.Path) -> None: |
| 330 | """V1: --staged sets to_ref to 'staged'.""" |
| 331 | data = _diff_json(code_repo, "--staged") |
| 332 | assert data["to_ref"] == "staged", ( |
| 333 | f"Expected to_ref='staged', got {data['to_ref']!r}" |
| 334 | ) |
| 335 | |
| 336 | def test_V2_staged_flag_shows_staged_changes(self, code_repo: pathlib.Path) -> None: |
| 337 | """V2: --staged shows staged changes as has_changes=true.""" |
| 338 | (code_repo / "module.py").write_text("def greet():\n return 'hi'\n") |
| 339 | runner.invoke(cli, ["code", "add", "module.py"], env=_env(code_repo)) |
| 340 | |
| 341 | assert _diff_json(code_repo, "--staged")["has_changes"] is True |
| 342 | |
| 343 | def test_V3_default_shows_working_tree(self, code_repo: pathlib.Path) -> None: |
| 344 | """V3: Default diff (no flags) uses to_ref='working tree'.""" |
| 345 | data = _diff_json(code_repo) |
| 346 | assert data["to_ref"] == "working tree", ( |
| 347 | f"Expected to_ref='working tree', got {data['to_ref']!r}" |
| 348 | ) |
| 349 | |
| 350 | def test_V4_commit_to_commit_diff_has_sha256_to_commit_id( |
| 351 | self, code_repo: pathlib.Path |
| 352 | ) -> None: |
| 353 | """V4: Commit-to-commit diff populates to_commit_id with sha256:-prefixed ID.""" |
| 354 | import json as _json |
| 355 | log_out = runner.invoke(cli, ["log", "--json", "-n", "1"], env=_env(code_repo)) |
| 356 | head_id = _json.loads(log_out.output)["commits"][0]["commit_id"] |
| 357 | |
| 358 | data = _diff_json(code_repo, head_id, head_id) |
| 359 | assert data["to_commit_id"] is not None |
| 360 | assert data["to_commit_id"].startswith("sha256:"), ( |
| 361 | f"to_commit_id must be sha256:-prefixed, got {data['to_commit_id']!r}" |
| 362 | ) |
File History
1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
6 days ago