gabriel / muse public
test_commit_json_schema.py python
462 lines 19.3 KB
Raw
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 7 days ago
1 """Tests for the canonical ``muse commit --json`` schema.
2
3 ``muse commit`` is the core write operation — every agent pipeline ends here.
4 The JSON output must expose all provenance fields so downstream consumers
5 (hub, orchestrators, other agents) never need a follow-up ``muse read`` just
6 to discover what model produced a commit.
7
8 Canonical schema (success path)
9 ---------------------------------
10 ::
11
12 {
13 "dry_run": false,
14 "commit_id": "sha256:<64-hex>",
15 "branch": str,
16 "snapshot_id": str,
17 "message": str,
18 "parent_commit_id": str | null,
19 "parent2_commit_id": str | null,
20 "committed_at": str, // ISO 8601 with timezone
21 "author": str,
22 "agent_id": str, // "" for human commits
23 "model_id": str, // "" for human commits
24 "toolchain_id": str,
25 "sem_ver_bump": str, // "none" | "patch" | "minor" | "major"
26 "breaking_changes": [str, ...],
27 "files_changed": {
28 "added": int,
29 "modified": int,
30 "deleted": int,
31 "total": int // added + modified + deleted
32 }
33 }
34
35 Dry-run schema is identical except ``dry_run`` is ``true`` and ``clean`` may
36 appear when the working tree has no changes.
37
38 Coverage
39 --------
40 I Schema invariants
41 I1 All required keys present on a normal commit
42 I2 commit_id is sha256:-prefixed
43 I3 committed_at is ISO 8601 with timezone
44 I4 sem_ver_bump is a valid enum value
45 I5 breaking_changes is always a list
46 I6 files_changed has added, modified, deleted, total keys
47 I7 files_changed.total = added + modified + deleted
48
49 II Agent provenance in commit output
50 II1 agent_id populated from --agent-id flag
51 II2 model_id populated from --model-id flag
52 II3 toolchain_id populated from --toolchain-id flag
53 II4 agent_id empty string (not null) for human commits
54 II5 model_id empty string (not null) for human commits
55 II6 model_id from MUSE_MODEL_ID env when flag absent
56 II7 toolchain_id from MUSE_TOOLCHAIN_ID env when flag absent
57 II8 --agent-id flag overrides MUSE_AGENT_ID env
58
59 III Dry-run schema parity
60 III1 dry_run schema has same required keys as success path (minus clean)
61 III2 dry_run: true in dry-run output
62 III3 dry_run: false in normal commit output
63 III4 dry-run output has model_id and toolchain_id
64 III5 dry-run clean tree exits 1 with clean=true JSON
65
66 IV File change accounting
67 IV1 Initial commit files_changed.added >= 1
68 IV2 Modification increments modified, not added
69 IV3 Deletion increments deleted
70 IV4 files_changed.total = added + modified + deleted always
71
72 V Error paths (JSON mode)
73 V1 Missing -m exits 1 with JSON {"error": "no_message", ...}
74 V2 Empty workdir exits 1 with JSON {"error": "empty_workdir", ...}
75 V3 Clean tree (no --dry-run) exits 0, no JSON output (text mode behaviour)
76 """
77
78 from __future__ import annotations
79 from collections.abc import Mapping
80
81 import json
82 import os
83 import pathlib
84
85 import pytest
86
87 from tests.cli_test_helper import CliRunner, InvokeResult
88
89 cli = None
90 runner = CliRunner()
91
92 _REQUIRED_KEYS = {
93 "dry_run",
94 "commit_id", "branch", "snapshot_id",
95 "message", "parent_commit_id", "parent2_commit_id",
96 "committed_at", "author",
97 "agent_id", "model_id", "toolchain_id",
98 "sem_ver_bump", "breaking_changes",
99 "files_changed",
100 }
101
102 _FILES_CHANGED_KEYS = {"added", "modified", "deleted", "total"}
103 _VALID_SEM_VER_BUMPS = {"none", "patch", "minor", "major"}
104
105
106 def _env(root: pathlib.Path) -> Mapping[str, str]:
107 return {"MUSE_REPO_ROOT": str(root)}
108
109
110 def _commit(root: pathlib.Path, *flags: str, env: Mapping[str, str] | None = None) -> Mapping[str, object]:
111 e = {**_env(root), **(env or {})}
112 result = runner.invoke(cli, ["commit", "--json"] + list(flags), env=e)
113 assert result.exit_code == 0, f"commit --json failed (exit {result.exit_code}):\n{result.output}"
114 return json.loads(result.output.strip())
115
116
117 def _commit_raw(root: pathlib.Path, *args: str, env: Mapping[str, str] | None = None) -> InvokeResult:
118 e = {**_env(root), **(env or {})}
119 return runner.invoke(cli, ["commit", "--json"] + list(args), env=e)
120
121
122 @pytest.fixture()
123 def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
124 """Code-domain repo initialised but with nothing committed yet."""
125 monkeypatch.chdir(tmp_path)
126 env = _env(tmp_path)
127 result = runner.invoke(cli, ["init", "--domain", "code"], env=env)
128 assert result.exit_code == 0, result.output
129 (tmp_path / "module.py").write_text("def greet():\n return 'hello'\n")
130 runner.invoke(cli, ["code", "add", "module.py"], env=env)
131 return tmp_path
132
133
134 @pytest.fixture()
135 def committed_repo(
136 repo: pathlib.Path, monkeypatch: pytest.MonkeyPatch
137 ) -> pathlib.Path:
138 """Code-domain repo with one commit already made."""
139 env = _env(repo)
140 result = runner.invoke(cli, ["commit", "-m", "initial"], env=env)
141 assert result.exit_code == 0, result.output
142 return repo
143
144
145 # ---------------------------------------------------------------------------
146 # I Schema invariants
147 # ---------------------------------------------------------------------------
148
149
150 class TestSchemaInvariantsI:
151 def test_I1_all_required_keys_present(self, repo: pathlib.Path) -> None:
152 """I1: Every required key must be present in commit --json output."""
153 data = _commit(repo, "-m", "initial commit")
154 missing = _REQUIRED_KEYS - data.keys()
155 assert not missing, f"Missing required keys in commit --json: {missing}"
156
157 def test_I2_commit_id_sha256_prefixed(self, repo: pathlib.Path) -> None:
158 """I2: commit_id must start with 'sha256:'."""
159 data = _commit(repo, "-m", "initial commit")
160 assert data["commit_id"].startswith("sha256:"), (
161 f"commit_id must be sha256:-prefixed, got {data['commit_id']!r}"
162 )
163
164 def test_I3_committed_at_is_iso8601_with_tz(self, repo: pathlib.Path) -> None:
165 """I3: committed_at must parse as ISO 8601 with timezone info."""
166 import datetime
167 data = _commit(repo, "-m", "initial")
168 dt = datetime.datetime.fromisoformat(data["committed_at"])
169 assert dt.tzinfo is not None, (
170 f"committed_at lacks timezone: {data['committed_at']!r}"
171 )
172
173 def test_I4_sem_ver_bump_valid_enum(self, repo: pathlib.Path) -> None:
174 """I4: sem_ver_bump must be one of the four valid values."""
175 data = _commit(repo, "-m", "initial")
176 assert data["sem_ver_bump"] in _VALID_SEM_VER_BUMPS, (
177 f"sem_ver_bump {data['sem_ver_bump']!r} not in {_VALID_SEM_VER_BUMPS}"
178 )
179
180 def test_I5_breaking_changes_always_list(self, repo: pathlib.Path) -> None:
181 """I5: breaking_changes is always a list (never null or absent)."""
182 data = _commit(repo, "-m", "initial")
183 assert isinstance(data["breaking_changes"], list), (
184 f"breaking_changes must be list, got {type(data['breaking_changes'])}"
185 )
186
187 def test_I6_files_changed_has_all_keys(self, repo: pathlib.Path) -> None:
188 """I6: files_changed must have added, modified, deleted, and total keys."""
189 data = _commit(repo, "-m", "initial")
190 fc = data["files_changed"]
191 missing = _FILES_CHANGED_KEYS - fc.keys()
192 assert not missing, (
193 f"files_changed missing keys: {missing}. Got: {fc}"
194 )
195
196 def test_I7_files_changed_total_is_sum(self, repo: pathlib.Path) -> None:
197 """I7: files_changed.total = added + modified + deleted."""
198 data = _commit(repo, "-m", "initial")
199 fc = data["files_changed"]
200 expected = fc["added"] + fc["modified"] + fc["deleted"]
201 assert fc["total"] == expected, (
202 f"files_changed.total {fc['total']} != "
203 f"added({fc['added']}) + modified({fc['modified']}) + deleted({fc['deleted']}) = {expected}"
204 )
205
206
207 # ---------------------------------------------------------------------------
208 # II Agent provenance in commit output
209 # ---------------------------------------------------------------------------
210
211
212 class TestAgentProvenanceII:
213 def test_II1_agent_id_in_output(self, repo: pathlib.Path) -> None:
214 """II1: agent_id from --agent-id appears in JSON output."""
215 data = _commit(repo, "-m", "bot commit", "--agent-id", "test-bot")
216 assert data["agent_id"] == "test-bot", (
217 f"Expected agent_id='test-bot', got {data['agent_id']!r}"
218 )
219
220 def test_II2_model_id_in_output(self, repo: pathlib.Path) -> None:
221 """II2: model_id from --model-id appears in JSON output."""
222 data = _commit(repo, "-m", "model commit", "--model-id", "claude-opus-4")
223 assert data["model_id"] == "claude-opus-4", (
224 f"Expected model_id='claude-opus-4', got {data['model_id']!r}"
225 )
226
227 def test_II3_toolchain_id_in_output(self, repo: pathlib.Path) -> None:
228 """II3: toolchain_id from --toolchain-id appears in JSON output."""
229 data = _commit(repo, "-m", "tc commit", "--toolchain-id", "cursor-v2")
230 assert data["toolchain_id"] == "cursor-v2", (
231 f"Expected toolchain_id='cursor-v2', got {data['toolchain_id']!r}"
232 )
233
234 def test_II4_agent_id_empty_string_for_human(self, repo: pathlib.Path) -> None:
235 """II4: agent_id is '' (not null) for human commits."""
236 data = _commit(repo, "-m", "human commit")
237 assert data["agent_id"] == "", (
238 f"agent_id must be '' for human commit, got {data['agent_id']!r}"
239 )
240
241 def test_II5_model_id_empty_string_for_human(self, repo: pathlib.Path) -> None:
242 """II5: model_id is '' (not null) for human commits."""
243 data = _commit(repo, "-m", "human commit")
244 assert data["model_id"] == "", (
245 f"model_id must be '' for human commit, got {data['model_id']!r}"
246 )
247
248 def test_II6_model_id_from_env(
249 self, repo: pathlib.Path, monkeypatch: pytest.MonkeyPatch
250 ) -> None:
251 """II6: model_id picked up from MUSE_MODEL_ID env when --model-id absent."""
252 env = {**_env(repo), "MUSE_MODEL_ID": "claude-haiku-4"}
253 data = _commit(repo, "-m", "env model", env=env)
254 assert data["model_id"] == "claude-haiku-4", (
255 f"Expected model_id='claude-haiku-4' from env, got {data['model_id']!r}"
256 )
257
258 def test_II7_toolchain_id_from_env(
259 self, repo: pathlib.Path, monkeypatch: pytest.MonkeyPatch
260 ) -> None:
261 """II7: toolchain_id from MUSE_TOOLCHAIN_ID when --toolchain-id absent."""
262 env = {**_env(repo), "MUSE_TOOLCHAIN_ID": "agentic-v3"}
263 data = _commit(repo, "-m", "env tc", env=env)
264 assert data["toolchain_id"] == "agentic-v3", (
265 f"Expected toolchain_id='agentic-v3' from env, got {data['toolchain_id']!r}"
266 )
267
268 def test_II8_flag_overrides_env_for_agent_id(
269 self, repo: pathlib.Path
270 ) -> None:
271 """II8: --agent-id flag takes priority over MUSE_AGENT_ID env."""
272 env = {**_env(repo), "MUSE_AGENT_ID": "env-bot"}
273 data = _commit(repo, "-m", "override", "--agent-id", "flag-bot", env=env)
274 assert data["agent_id"] == "flag-bot", (
275 f"Expected flag-bot to override env-bot, got {data['agent_id']!r}"
276 )
277
278
279 # ---------------------------------------------------------------------------
280 # III Dry-run schema parity
281 # ---------------------------------------------------------------------------
282
283
284 class TestDryRunSchemaIII:
285 def test_III1_dry_run_has_same_required_keys(self, repo: pathlib.Path) -> None:
286 """III1: dry-run output has the same required keys as the success path."""
287 result = _commit_raw(repo, "-m", "check", "--dry-run")
288 assert result.exit_code == 0, f"dry-run failed:\n{result.output}"
289 data = json.loads(result.output.strip())
290 missing = _REQUIRED_KEYS - data.keys()
291 assert not missing, f"dry-run missing required keys: {missing}"
292
293 def test_III2_dry_run_flag_is_true(self, repo: pathlib.Path) -> None:
294 """III2: dry_run=true in dry-run output."""
295 result = _commit_raw(repo, "-m", "check", "--dry-run")
296 assert result.exit_code == 0
297 data = json.loads(result.output.strip())
298 assert data["dry_run"] is True
299
300 def test_III3_dry_run_false_on_real_commit(self, repo: pathlib.Path) -> None:
301 """III3: dry_run=false in normal commit output."""
302 data = _commit(repo, "-m", "real commit")
303 assert data["dry_run"] is False
304
305 def test_III4_dry_run_has_model_id_and_toolchain_id(
306 self, repo: pathlib.Path
307 ) -> None:
308 """III4: dry-run output includes model_id and toolchain_id."""
309 result = _commit_raw(
310 repo, "-m", "preflight",
311 "--dry-run", "--model-id", "claude-opus-4", "--toolchain-id", "cursor",
312 )
313 assert result.exit_code == 0
314 data = json.loads(result.output.strip())
315 assert data["model_id"] == "claude-opus-4", (
316 f"model_id missing from dry-run output: {data}"
317 )
318 assert data["toolchain_id"] == "cursor", (
319 f"toolchain_id missing from dry-run output: {data}"
320 )
321
322 def test_III5_dry_run_clean_tree_exits_1(
323 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
324 ) -> None:
325 """III5: dry-run on a clean tree exits 1 with clean=true in JSON.
326
327 Uses its own repo to ensure a truly clean workdir (all files committed).
328 muse init --domain code creates .museattributes/.museignore, so we commit
329 everything once first to establish HEAD == workdir, then dry-run.
330 """
331 monkeypatch.chdir(tmp_path)
332 env = _env(tmp_path)
333 runner.invoke(cli, ["init", "--domain", "code"], env=env)
334 (tmp_path / "module.py").write_text("x = 1\n")
335 # Commit everything so HEAD == workdir (includes init-created files)
336 result = runner.invoke(cli, ["commit", "-m", "initial"], env=env)
337 assert result.exit_code == 0, result.output
338 # Now dry-run should detect nothing to commit
339 result = _commit_raw(tmp_path, "-m", "nothing", "--dry-run", env=env)
340 assert result.exit_code == 1, (
341 f"Expected exit 1 for dry-run on clean tree, got {result.exit_code}. "
342 f"Output: {result.output}"
343 )
344 data = json.loads(result.output.strip())
345 assert data.get("clean") is True, (
346 f"Expected clean=true in dry-run clean-tree JSON: {data}"
347 )
348 assert data.get("dry_run") is True
349
350
351 # ---------------------------------------------------------------------------
352 # IV File change accounting
353 # ---------------------------------------------------------------------------
354
355
356 class TestFileChangeAccountingIV:
357 def test_IV1_initial_commit_added_gte_1(self, repo: pathlib.Path) -> None:
358 """IV1: Initial commit adds at least the tracked file."""
359 data = _commit(repo, "-m", "initial")
360 assert data["files_changed"]["added"] >= 1, (
361 f"Initial commit should add >=1 file: {data['files_changed']}"
362 )
363
364 def test_IV2_modification_increments_modified(
365 self, committed_repo: pathlib.Path
366 ) -> None:
367 """IV2: Editing an existing file increments modified, not added."""
368 env = _env(committed_repo)
369 (committed_repo / "module.py").write_text("def greet():\n return 'hi'\n")
370 runner.invoke(cli, ["code", "add", "module.py"], env=env)
371 data = _commit(committed_repo, "-m", "modify")
372 assert data["files_changed"]["modified"] == 1
373 assert data["files_changed"]["added"] == 0
374
375 def test_IV3_deletion_increments_deleted(
376 self, committed_repo: pathlib.Path
377 ) -> None:
378 """IV3: Removing a tracked file increments deleted.
379
380 Uses a second file so deleting one doesn't leave an empty workdir
381 (an empty manifest triggers "empty workdir" rather than a deletion).
382 """
383 env = _env(committed_repo)
384 # Add a second file so there's still something tracked after the deletion.
385 (committed_repo / "extra.py").write_text("y = 2\n")
386 runner.invoke(cli, ["code", "add", "extra.py"], env=env)
387 runner.invoke(cli, ["commit", "-m", "add extra"], env=env)
388 # Now delete extra.py — module.py remains, so workdir is non-empty.
389 (committed_repo / "extra.py").unlink()
390 runner.invoke(cli, ["code", "add", "extra.py"], env=env)
391 data = _commit(committed_repo, "-m", "remove extra")
392 assert data["files_changed"]["deleted"] == 1
393 assert data["files_changed"]["added"] == 0
394
395 def test_IV4_total_always_matches_sum(
396 self, committed_repo: pathlib.Path
397 ) -> None:
398 """IV4: files_changed.total = added + modified + deleted, always."""
399 env = _env(committed_repo)
400 (committed_repo / "new.py").write_text("x = 1\n")
401 (committed_repo / "module.py").write_text("def greet():\n return 'hi'\n")
402 runner.invoke(cli, ["code", "add", "new.py"], env=env)
403 runner.invoke(cli, ["code", "add", "module.py"], env=env)
404 data = _commit(committed_repo, "-m", "mixed")
405 fc = data["files_changed"]
406 expected = fc["added"] + fc["modified"] + fc["deleted"]
407 assert fc["total"] == expected, (
408 f"total {fc['total']} != sum {expected}: {fc}"
409 )
410
411
412 # ---------------------------------------------------------------------------
413 # V Error paths
414 # ---------------------------------------------------------------------------
415
416
417 class TestErrorPathsV:
418 def test_V1_missing_message_exits_1_with_json_error(
419 self, repo: pathlib.Path
420 ) -> None:
421 """V1: Missing -m exits 1 with JSON error {"error": "no_message"}."""
422 result = _commit_raw(repo) # no -m
423 assert result.exit_code == 1
424 json_line = next(
425 (l for l in result.output.strip().splitlines() if l.startswith("{")),
426 None,
427 )
428 assert json_line is not None, f"No JSON in output: {result.output!r}"
429 data = json.loads(json_line)
430 assert data["error"] == "no_message", (
431 f"Expected error='no_message', got {data.get('error')!r}"
432 )
433
434 def test_V2_clean_tree_json_response(
435 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
436 ) -> None:
437 """V2: --json on a clean tree (no --dry-run) exits 0 with clean=true JSON.
438
439 An agent using ``muse commit --json -m "msg"`` on a clean repo must get
440 a machine-readable response — not a silent text-only "Nothing to commit".
441 """
442 monkeypatch.chdir(tmp_path)
443 env = _env(tmp_path)
444 runner.invoke(cli, ["init", "--domain", "code"], env=env)
445 (tmp_path / "module.py").write_text("x = 1\n")
446 # Commit everything to establish HEAD == workdir
447 result = runner.invoke(cli, ["commit", "-m", "initial"], env=env)
448 assert result.exit_code == 0, result.output
449 # Second commit on clean tree — must return JSON
450 result = _commit_raw(tmp_path, "-m", "nothing", env=env)
451 assert result.exit_code == 0
452 json_line = next(
453 (l for l in result.output.strip().splitlines() if l.startswith("{")),
454 None,
455 )
456 assert json_line is not None, (
457 f"No JSON on stdout for clean-tree --json commit: {result.output!r}"
458 )
459 data = json.loads(json_line)
460 assert data.get("clean") is True, (
461 f"Expected clean=true in clean-tree commit JSON: {data}"
462 )
File History 1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 7 days ago