gabriel / muse public
test_diff_json_schema.py python
362 lines 15.8 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
1 """Tests for the canonical ``muse diff --json`` schema.
2
3 Muse is a symbol-aware VCS. Its diff engine works at the symbol level, not just
4 the file level. The JSON output must expose that — otherwise agents lose the very
5 information that makes Muse different from a file-hashing VCS.
6
7 Canonical schema
8 ----------------
9 ::
10
11 {
12 "from_ref": str, // "HEAD", branch, or commit id
13 "to_ref": str, // "working tree", "staged", or commit id
14 "from_commit_id": str | null, // sha256:-prefixed or null
15 "to_commit_id": str | null, // sha256:-prefixed or null
16 "has_changes": bool,
17 "added": [str, ...], // file paths added
18 "deleted": [str, ...], // file paths deleted
19 "modified": [str, ...], // file paths modified in-place
20 "renamed": {str: str}, // {old_path: new_path}
21 "total_changes": int, // len(added)+len(modified)+len(deleted)+len(renamed)
22 "symbols": { // per-file symbol-level changes
23 "<file_path>": {
24 "added": [str, ...], // symbol names inserted
25 "deleted": [str, ...], // symbol names deleted
26 "modified": [str, ...] // symbol names replaced / patched
27 }
28 },
29 "sem_ver_bump": str, // "none" | "patch" | "minor" | "major"
30 "breaking_changes": [str, ...] // addresses of breaking symbol changes
31 }
32
33 Coverage matrix
34 ---------------
35 I Schema invariants
36 I1 All required keys present on clean repo (no changes)
37 I2 All required keys present when changes exist
38 I3 from_commit_id is sha256:-prefixed
39 I4 has_changes=false when clean, true when dirty
40
41 II File-level categorisation
42 II1 Added file appears in added, not modified or deleted
43 II2 Deleted file appears in deleted, not modified or added
44 II3 Modified file appears in modified
45 II4 total_changes = len(added) + len(modified) + len(deleted) + len(renamed)
46 II5 Renamed file appears in renamed dict, NOT in modified or added/deleted
47
48 III Symbol-level output (the Muse differentiator)
49 III1 symbols dict present even when empty (clean diff → {})
50 III2 New function in a modified file appears in symbols[file].added
51 III3 Deleted function in a modified file appears in symbols[file].deleted
52 III4 File-only add (no symbols) does not appear in symbols (or appears with empty buckets)
53
54 IV Semantic fields
55 IV1 sem_ver_bump always present (at least "none")
56 IV2 breaking_changes always present (at least [])
57 IV3 sem_ver_bump reflects the bump level of the changes
58
59 V Diff modes
60 V1 --staged shows staged vs HEAD (to_ref == "staged")
61 V2 --staged no_changes=false when staged changes exist
62 V3 Default (no flag) shows working tree vs HEAD (to_ref == "working tree")
63 V4 Commit-to-commit diff uses sha256:-prefixed to_commit_id
64 """
65
66 from __future__ import annotations
67 from collections.abc import Mapping
68
69 import json
70 import pathlib
71
72 import pytest
73
74 from tests.cli_test_helper import CliRunner
75
76 cli = None
77 runner = CliRunner()
78
79 _REQUIRED_KEYS = {
80 "from_ref", "to_ref", "from_commit_id", "to_commit_id",
81 "has_changes",
82 "added", "deleted", "modified", "renamed",
83 "total_changes",
84 "symbols",
85 "sem_ver_bump", "breaking_changes",
86 }
87
88 _SYMBOL_BUCKET_KEYS = {"added", "deleted", "modified"}
89
90
91 def _env(root: pathlib.Path) -> Mapping[str, str]:
92 return {"MUSE_REPO_ROOT": str(root)}
93
94
95 def _diff_json(root: pathlib.Path, *extra_args: str) -> Mapping[str, object]:
96 result = runner.invoke(cli, ["diff", "--json"] + list(extra_args), env=_env(root))
97 assert result.exit_code == 0, f"diff --json failed: {result.output}"
98 return json.loads(result.output.strip())
99
100
101 @pytest.fixture()
102 def code_repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
103 """Code-domain repo with one committed Python file."""
104 monkeypatch.chdir(tmp_path)
105 result = runner.invoke(cli, ["init", "--domain", "code"], env=_env(tmp_path))
106 assert result.exit_code == 0, result.output
107 (tmp_path / "module.py").write_text("def greet():\n return 'hello'\n")
108 runner.invoke(cli, ["code", "add", "module.py"], env=_env(tmp_path))
109 result = runner.invoke(cli, ["commit", "-m", "initial"], env=_env(tmp_path))
110 assert result.exit_code == 0, result.output
111 return tmp_path
112
113
114 # ---------------------------------------------------------------------------
115 # I Schema invariants
116 # ---------------------------------------------------------------------------
117
118
119 class TestSchemaInvariantsI:
120 def test_I1_clean_repo_all_keys_present(self, code_repo: pathlib.Path) -> None:
121 """I1: All required keys present even when there are no changes."""
122 data = _diff_json(code_repo)
123 missing = _REQUIRED_KEYS - data.keys()
124 assert not missing, f"Missing keys on clean diff: {missing}"
125
126 def test_I2_dirty_repo_all_keys_present(self, code_repo: pathlib.Path) -> None:
127 """I2: All required keys present when changes exist."""
128 (code_repo / "module.py").write_text(
129 "def greet():\n return 'hello'\n\ndef farewell():\n return 'bye'\n"
130 )
131 data = _diff_json(code_repo)
132 missing = _REQUIRED_KEYS - data.keys()
133 assert not missing, f"Missing keys on dirty diff: {missing}"
134
135 def test_I3_from_commit_id_is_sha256_prefixed(self, code_repo: pathlib.Path) -> None:
136 """I3: from_commit_id is sha256:-prefixed."""
137 data = _diff_json(code_repo)
138 assert data["from_commit_id"] is not None
139 assert data["from_commit_id"].startswith("sha256:"), (
140 f"from_commit_id must be sha256:-prefixed, got {data['from_commit_id']!r}"
141 )
142
143 def test_I4_has_changes_reflects_dirty_state(self, code_repo: pathlib.Path) -> None:
144 """I4: has_changes=false when nothing staged, true when staged changes exist.
145
146 Uses --staged rather than the working-tree diff because muse init
147 creates .museattributes/.museignore in the working tree without
148 committing them, so the working-tree diff is never truly clean after
149 init. The staged view is clean after a commit with nothing staged.
150 """
151 assert _diff_json(code_repo, "--staged")["has_changes"] is False
152 (code_repo / "module.py").write_text("def greet():\n return 'hi'\n")
153 runner.invoke(cli, ["code", "add", "module.py"], env=_env(code_repo))
154 assert _diff_json(code_repo, "--staged")["has_changes"] is True
155
156
157 # ---------------------------------------------------------------------------
158 # II File-level categorisation
159 # ---------------------------------------------------------------------------
160
161
162 class TestFileLevelCategorizationII:
163 def test_II1_added_file_in_added(self, code_repo: pathlib.Path) -> None:
164 """II1: A newly staged file appears in added, not modified or deleted."""
165 (code_repo / "new.py").write_text("x = 1\n")
166 runner.invoke(cli, ["code", "add", "new.py"], env=_env(code_repo))
167
168 data = _diff_json(code_repo, "--staged")
169 assert "new.py" in data["added"], f"new.py not in added: {data}"
170 assert "new.py" not in data["modified"]
171 assert "new.py" not in data["deleted"]
172
173 def test_II2_deleted_file_in_deleted(self, code_repo: pathlib.Path) -> None:
174 """II2: A staged deletion appears in deleted, not modified or added."""
175 (code_repo / "module.py").unlink()
176 runner.invoke(cli, ["code", "add", "module.py"], env=_env(code_repo))
177
178 data = _diff_json(code_repo, "--staged")
179 assert "module.py" in data["deleted"], f"module.py not in deleted: {data}"
180 assert "module.py" not in data["modified"]
181 assert "module.py" not in data["added"]
182
183 def test_II3_modified_file_in_modified(self, code_repo: pathlib.Path) -> None:
184 """II3: An in-place edit appears in modified."""
185 (code_repo / "module.py").write_text("def greet():\n return 'hi'\n")
186 data = _diff_json(code_repo)
187 assert "module.py" in data["modified"], f"module.py not in modified: {data}"
188
189 def test_II4_total_changes_formula(self, code_repo: pathlib.Path) -> None:
190 """II4: total_changes = len(added) + len(modified) + len(deleted) + len(renamed)."""
191 (code_repo / "module.py").write_text("def greet():\n return 'hi'\n")
192 (code_repo / "extra.py").write_text("y = 2\n")
193 runner.invoke(cli, ["code", "add", "extra.py"], env=_env(code_repo))
194
195 data = _diff_json(code_repo)
196 expected = (
197 len(data["added"]) + len(data["modified"])
198 + len(data["deleted"]) + len(data["renamed"])
199 )
200 assert data["total_changes"] == expected, (
201 f"total_changes {data['total_changes']} != formula {expected}"
202 )
203
204 def test_II5_renamed_file_in_renamed_not_modified(self, code_repo: pathlib.Path) -> None:
205 """II5: A renamed file appears in renamed dict, not in modified or added/deleted."""
206 runner.invoke(
207 cli, ["mv", "module.py", "utils.py"], env=_env(code_repo)
208 )
209
210 data = _diff_json(code_repo, "--staged")
211 assert "module.py" in data["renamed"], (
212 f"module.py not a rename source. renamed={data['renamed']}, "
213 f"modified={data['modified']}, added={data['added']}, deleted={data['deleted']}"
214 )
215 assert data["renamed"]["module.py"] == "utils.py", (
216 f"Expected renamed['module.py']='utils.py', got {data['renamed']}"
217 )
218 assert "utils.py" not in data["added"], "rename target must not appear in added"
219 assert "module.py" not in data["deleted"], "rename source must not appear in deleted"
220 assert "module.py" not in data["modified"], "rename source must not appear in modified"
221
222
223 # ---------------------------------------------------------------------------
224 # III Symbol-level output
225 # ---------------------------------------------------------------------------
226
227
228 class TestSymbolLevelOutputIII:
229 def test_III1_symbols_always_present(self, code_repo: pathlib.Path) -> None:
230 """III1: symbols dict is always present, even on a clean diff."""
231 data = _diff_json(code_repo)
232 assert "symbols" in data
233 assert isinstance(data["symbols"], dict)
234 assert data["symbols"] == {}
235
236 def test_III2_new_function_in_symbols_added(self, code_repo: pathlib.Path) -> None:
237 """III2: Adding a new function appears in symbols[file].added."""
238 (code_repo / "module.py").write_text(
239 "def greet():\n return 'hello'\n\ndef farewell():\n return 'bye'\n"
240 )
241 data = _diff_json(code_repo)
242
243 assert "module.py" in data["symbols"], (
244 f"module.py not in symbols: {data['symbols']}"
245 )
246 sym = data["symbols"]["module.py"]
247 assert _SYMBOL_BUCKET_KEYS == set(sym.keys()), (
248 f"Symbol bucket has wrong keys: {sym.keys()}"
249 )
250 assert "farewell" in sym["added"], (
251 f"Expected 'farewell' in symbols.module.py.added, got {sym['added']}"
252 )
253
254 def test_III3_deleted_function_in_symbols_deleted(self, code_repo: pathlib.Path) -> None:
255 """III3: Removing a function appears in symbols[file].deleted."""
256 # First add a second function
257 (code_repo / "module.py").write_text(
258 "def greet():\n return 'hello'\n\ndef farewell():\n return 'bye'\n"
259 )
260 runner.invoke(cli, ["code", "add", "module.py"], env=_env(code_repo))
261 runner.invoke(cli, ["commit", "-m", "add farewell"], env=_env(code_repo))
262
263 # Now delete it
264 (code_repo / "module.py").write_text("def greet():\n return 'hello'\n")
265 data = _diff_json(code_repo)
266
267 assert "module.py" in data["symbols"]
268 sym = data["symbols"]["module.py"]
269 assert "farewell" in sym["deleted"], (
270 f"Expected 'farewell' in symbols.module.py.deleted, got {sym['deleted']}"
271 )
272
273 def test_III4_added_file_symbols_in_symbols_or_omitted(
274 self, code_repo: pathlib.Path
275 ) -> None:
276 """III4: Newly added file's symbols appear in symbols[file].added or file omitted."""
277 (code_repo / "fresh.py").write_text("def new_func():\n pass\n")
278 runner.invoke(cli, ["code", "add", "fresh.py"], env=_env(code_repo))
279
280 data = _diff_json(code_repo, "--staged")
281 assert "fresh.py" in data["added"]
282 # If symbols present for the new file, all symbols should be in added
283 if "fresh.py" in data["symbols"]:
284 assert "new_func" in data["symbols"]["fresh.py"]["added"], (
285 f"Expected new_func in symbols for new file: {data['symbols']['fresh.py']}"
286 )
287
288
289 # ---------------------------------------------------------------------------
290 # IV Semantic fields
291 # ---------------------------------------------------------------------------
292
293
294 class TestSemanticFieldsIV:
295 def test_IV1_sem_ver_bump_always_present(self, code_repo: pathlib.Path) -> None:
296 """IV1: sem_ver_bump always present, at least 'none'."""
297 data = _diff_json(code_repo)
298 assert "sem_ver_bump" in data
299 assert isinstance(data["sem_ver_bump"], str)
300 assert data["sem_ver_bump"] == "none" # clean repo
301
302 def test_IV2_breaking_changes_always_present(self, code_repo: pathlib.Path) -> None:
303 """IV2: breaking_changes always present, at least []."""
304 data = _diff_json(code_repo)
305 assert "breaking_changes" in data
306 assert isinstance(data["breaking_changes"], list)
307
308 def test_IV3_sem_ver_bump_reflects_changes(self, code_repo: pathlib.Path) -> None:
309 """IV3: sem_ver_bump is 'none' when clean, non-'none' when changes exist."""
310 # Clean → "none"
311 assert _diff_json(code_repo)["sem_ver_bump"] == "none"
312
313 # Any change should produce a non-"none" bump
314 (code_repo / "module.py").write_text(
315 "def greet():\n return 'hello'\n\ndef farewell():\n return 'bye'\n"
316 )
317 data = _diff_json(code_repo)
318 assert data["sem_ver_bump"] != "none", (
319 f"Expected non-none sem_ver_bump for dirty diff, got {data['sem_ver_bump']!r}"
320 )
321
322
323 # ---------------------------------------------------------------------------
324 # V Diff modes
325 # ---------------------------------------------------------------------------
326
327
328 class TestDiffModesV:
329 def test_V1_staged_flag_sets_to_ref(self, code_repo: pathlib.Path) -> None:
330 """V1: --staged sets to_ref to 'staged'."""
331 data = _diff_json(code_repo, "--staged")
332 assert data["to_ref"] == "staged", (
333 f"Expected to_ref='staged', got {data['to_ref']!r}"
334 )
335
336 def test_V2_staged_flag_shows_staged_changes(self, code_repo: pathlib.Path) -> None:
337 """V2: --staged shows staged changes as has_changes=true."""
338 (code_repo / "module.py").write_text("def greet():\n return 'hi'\n")
339 runner.invoke(cli, ["code", "add", "module.py"], env=_env(code_repo))
340
341 assert _diff_json(code_repo, "--staged")["has_changes"] is True
342
343 def test_V3_default_shows_working_tree(self, code_repo: pathlib.Path) -> None:
344 """V3: Default diff (no flags) uses to_ref='working tree'."""
345 data = _diff_json(code_repo)
346 assert data["to_ref"] == "working tree", (
347 f"Expected to_ref='working tree', got {data['to_ref']!r}"
348 )
349
350 def test_V4_commit_to_commit_diff_has_sha256_to_commit_id(
351 self, code_repo: pathlib.Path
352 ) -> None:
353 """V4: Commit-to-commit diff populates to_commit_id with sha256:-prefixed ID."""
354 import json as _json
355 log_out = runner.invoke(cli, ["log", "--json", "-n", "1"], env=_env(code_repo))
356 head_id = _json.loads(log_out.output)["commits"][0]["commit_id"]
357
358 data = _diff_json(code_repo, head_id, head_id)
359 assert data["to_commit_id"] is not None
360 assert data["to_commit_id"].startswith("sha256:"), (
361 f"to_commit_id must be sha256:-prefixed, got {data['to_commit_id']!r}"
362 )
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 28 days ago