tests/test_commit_json_schema.py · gabriel/muse

test_commit_json_schema.py python

462 lines 19.3 KB

sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 7 days ago

1	"""Tests for the canonical ``muse commit --json`` schema.
2
3	``muse commit`` is the core write operation — every agent pipeline ends here.
4	The JSON output must expose all provenance fields so downstream consumers
5	(hub, orchestrators, other agents) never need a follow-up ``muse read`` just
6	to discover what model produced a commit.
7
8	Canonical schema (success path)
9	---------------------------------
10	::
11
12	{
13	"dry_run": false,
14	"commit_id": "sha256:<64-hex>",
15	"branch": str,
16	"snapshot_id": str,
17	"message": str,
18	"parent_commit_id": str \| null,
19	"parent2_commit_id": str \| null,
20	"committed_at": str, // ISO 8601 with timezone
21	"author": str,
22	"agent_id": str, // "" for human commits
23	"model_id": str, // "" for human commits
24	"toolchain_id": str,
25	"sem_ver_bump": str, // "none" \| "patch" \| "minor" \| "major"
26	"breaking_changes": [str, ...],
27	"files_changed": {
28	"added": int,
29	"modified": int,
30	"deleted": int,
31	"total": int // added + modified + deleted
32	}
33	}
34
35	Dry-run schema is identical except ``dry_run`` is ``true`` and ``clean`` may
36	appear when the working tree has no changes.
37
38	Coverage
39	--------
40	I Schema invariants
41	I1 All required keys present on a normal commit
42	I2 commit_id is sha256:-prefixed
43	I3 committed_at is ISO 8601 with timezone
44	I4 sem_ver_bump is a valid enum value
45	I5 breaking_changes is always a list
46	I6 files_changed has added, modified, deleted, total keys
47	I7 files_changed.total = added + modified + deleted
48
49	II Agent provenance in commit output
50	II1 agent_id populated from --agent-id flag
51	II2 model_id populated from --model-id flag
52	II3 toolchain_id populated from --toolchain-id flag
53	II4 agent_id empty string (not null) for human commits
54	II5 model_id empty string (not null) for human commits
55	II6 model_id from MUSE_MODEL_ID env when flag absent
56	II7 toolchain_id from MUSE_TOOLCHAIN_ID env when flag absent
57	II8 --agent-id flag overrides MUSE_AGENT_ID env
58
59	III Dry-run schema parity
60	III1 dry_run schema has same required keys as success path (minus clean)
61	III2 dry_run: true in dry-run output
62	III3 dry_run: false in normal commit output
63	III4 dry-run output has model_id and toolchain_id
64	III5 dry-run clean tree exits 1 with clean=true JSON
65
66	IV File change accounting
67	IV1 Initial commit files_changed.added >= 1
68	IV2 Modification increments modified, not added
69	IV3 Deletion increments deleted
70	IV4 files_changed.total = added + modified + deleted always
71
72	V Error paths (JSON mode)
73	V1 Missing -m exits 1 with JSON {"error": "no_message", ...}
74	V2 Empty workdir exits 1 with JSON {"error": "empty_workdir", ...}
75	V3 Clean tree (no --dry-run) exits 0, no JSON output (text mode behaviour)
76	"""
77
78	from __future__ import annotations
79	from collections.abc import Mapping
80
81	import json
82	import os
83	import pathlib
84
85	import pytest
86
87	from tests.cli_test_helper import CliRunner, InvokeResult
88
89	cli = None
90	runner = CliRunner()
91
92	_REQUIRED_KEYS = {
93	"dry_run",
94	"commit_id", "branch", "snapshot_id",
95	"message", "parent_commit_id", "parent2_commit_id",
96	"committed_at", "author",
97	"agent_id", "model_id", "toolchain_id",
98	"sem_ver_bump", "breaking_changes",
99	"files_changed",
100	}
101
102	_FILES_CHANGED_KEYS = {"added", "modified", "deleted", "total"}
103	_VALID_SEM_VER_BUMPS = {"none", "patch", "minor", "major"}
104
105
106	def _env(root: pathlib.Path) -> Mapping[str, str]:
107	return {"MUSE_REPO_ROOT": str(root)}
108
109
110	def _commit(root: pathlib.Path, *flags: str, env: Mapping[str, str] \| None = None) -> Mapping[str, object]:
111	e = {_env(root), (env or {})}
112	result = runner.invoke(cli, ["commit", "--json"] + list(flags), env=e)
113	assert result.exit_code == 0, f"commit --json failed (exit {result.exit_code}):\n{result.output}"
114	return json.loads(result.output.strip())
115
116
117	def _commit_raw(root: pathlib.Path, *args: str, env: Mapping[str, str] \| None = None) -> InvokeResult:
118	e = {_env(root), (env or {})}
119	return runner.invoke(cli, ["commit", "--json"] + list(args), env=e)
120
121
122	@pytest.fixture()
123	def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
124	"""Code-domain repo initialised but with nothing committed yet."""
125	monkeypatch.chdir(tmp_path)
126	env = _env(tmp_path)
127	result = runner.invoke(cli, ["init", "--domain", "code"], env=env)
128	assert result.exit_code == 0, result.output
129	(tmp_path / "module.py").write_text("def greet():\n return 'hello'\n")
130	runner.invoke(cli, ["code", "add", "module.py"], env=env)
131	return tmp_path
132
133
134	@pytest.fixture()
135	def committed_repo(
136	repo: pathlib.Path, monkeypatch: pytest.MonkeyPatch
137	) -> pathlib.Path:
138	"""Code-domain repo with one commit already made."""
139	env = _env(repo)
140	result = runner.invoke(cli, ["commit", "-m", "initial"], env=env)
141	assert result.exit_code == 0, result.output
142	return repo
143
144
145	# ---------------------------------------------------------------------------
146	# I Schema invariants
147	# ---------------------------------------------------------------------------
148
149
150	class TestSchemaInvariantsI:
151	def test_I1_all_required_keys_present(self, repo: pathlib.Path) -> None:
152	"""I1: Every required key must be present in commit --json output."""
153	data = _commit(repo, "-m", "initial commit")
154	missing = _REQUIRED_KEYS - data.keys()
155	assert not missing, f"Missing required keys in commit --json: {missing}"
156
157	def test_I2_commit_id_sha256_prefixed(self, repo: pathlib.Path) -> None:
158	"""I2: commit_id must start with 'sha256:'."""
159	data = _commit(repo, "-m", "initial commit")
160	assert data["commit_id"].startswith("sha256:"), (
161	f"commit_id must be sha256:-prefixed, got {data['commit_id']!r}"
162	)
163
164	def test_I3_committed_at_is_iso8601_with_tz(self, repo: pathlib.Path) -> None:
165	"""I3: committed_at must parse as ISO 8601 with timezone info."""
166	import datetime
167	data = _commit(repo, "-m", "initial")
168	dt = datetime.datetime.fromisoformat(data["committed_at"])
169	assert dt.tzinfo is not None, (
170	f"committed_at lacks timezone: {data['committed_at']!r}"
171	)
172
173	def test_I4_sem_ver_bump_valid_enum(self, repo: pathlib.Path) -> None:
174	"""I4: sem_ver_bump must be one of the four valid values."""
175	data = _commit(repo, "-m", "initial")
176	assert data["sem_ver_bump"] in _VALID_SEM_VER_BUMPS, (
177	f"sem_ver_bump {data['sem_ver_bump']!r} not in {_VALID_SEM_VER_BUMPS}"
178	)
179
180	def test_I5_breaking_changes_always_list(self, repo: pathlib.Path) -> None:
181	"""I5: breaking_changes is always a list (never null or absent)."""
182	data = _commit(repo, "-m", "initial")
183	assert isinstance(data["breaking_changes"], list), (
184	f"breaking_changes must be list, got {type(data['breaking_changes'])}"
185	)
186
187	def test_I6_files_changed_has_all_keys(self, repo: pathlib.Path) -> None:
188	"""I6: files_changed must have added, modified, deleted, and total keys."""
189	data = _commit(repo, "-m", "initial")
190	fc = data["files_changed"]
191	missing = _FILES_CHANGED_KEYS - fc.keys()
192	assert not missing, (
193	f"files_changed missing keys: {missing}. Got: {fc}"
194	)
195
196	def test_I7_files_changed_total_is_sum(self, repo: pathlib.Path) -> None:
197	"""I7: files_changed.total = added + modified + deleted."""
198	data = _commit(repo, "-m", "initial")
199	fc = data["files_changed"]
200	expected = fc["added"] + fc["modified"] + fc["deleted"]
201	assert fc["total"] == expected, (
202	f"files_changed.total {fc['total']} != "
203	f"added({fc['added']}) + modified({fc['modified']}) + deleted({fc['deleted']}) = {expected}"
204	)
205
206
207	# ---------------------------------------------------------------------------
208	# II Agent provenance in commit output
209	# ---------------------------------------------------------------------------
210
211
212	class TestAgentProvenanceII:
213	def test_II1_agent_id_in_output(self, repo: pathlib.Path) -> None:
214	"""II1: agent_id from --agent-id appears in JSON output."""
215	data = _commit(repo, "-m", "bot commit", "--agent-id", "test-bot")
216	assert data["agent_id"] == "test-bot", (
217	f"Expected agent_id='test-bot', got {data['agent_id']!r}"
218	)
219
220	def test_II2_model_id_in_output(self, repo: pathlib.Path) -> None:
221	"""II2: model_id from --model-id appears in JSON output."""
222	data = _commit(repo, "-m", "model commit", "--model-id", "claude-opus-4")
223	assert data["model_id"] == "claude-opus-4", (
224	f"Expected model_id='claude-opus-4', got {data['model_id']!r}"
225	)
226
227	def test_II3_toolchain_id_in_output(self, repo: pathlib.Path) -> None:
228	"""II3: toolchain_id from --toolchain-id appears in JSON output."""
229	data = _commit(repo, "-m", "tc commit", "--toolchain-id", "cursor-v2")
230	assert data["toolchain_id"] == "cursor-v2", (
231	f"Expected toolchain_id='cursor-v2', got {data['toolchain_id']!r}"
232	)
233
234	def test_II4_agent_id_empty_string_for_human(self, repo: pathlib.Path) -> None:
235	"""II4: agent_id is '' (not null) for human commits."""
236	data = _commit(repo, "-m", "human commit")
237	assert data["agent_id"] == "", (
238	f"agent_id must be '' for human commit, got {data['agent_id']!r}"
239	)
240
241	def test_II5_model_id_empty_string_for_human(self, repo: pathlib.Path) -> None:
242	"""II5: model_id is '' (not null) for human commits."""
243	data = _commit(repo, "-m", "human commit")
244	assert data["model_id"] == "", (
245	f"model_id must be '' for human commit, got {data['model_id']!r}"
246	)
247
248	def test_II6_model_id_from_env(
249	self, repo: pathlib.Path, monkeypatch: pytest.MonkeyPatch
250	) -> None:
251	"""II6: model_id picked up from MUSE_MODEL_ID env when --model-id absent."""
252	env = {**_env(repo), "MUSE_MODEL_ID": "claude-haiku-4"}
253	data = _commit(repo, "-m", "env model", env=env)
254	assert data["model_id"] == "claude-haiku-4", (
255	f"Expected model_id='claude-haiku-4' from env, got {data['model_id']!r}"
256	)
257
258	def test_II7_toolchain_id_from_env(
259	self, repo: pathlib.Path, monkeypatch: pytest.MonkeyPatch
260	) -> None:
261	"""II7: toolchain_id from MUSE_TOOLCHAIN_ID when --toolchain-id absent."""
262	env = {**_env(repo), "MUSE_TOOLCHAIN_ID": "agentic-v3"}
263	data = _commit(repo, "-m", "env tc", env=env)
264	assert data["toolchain_id"] == "agentic-v3", (
265	f"Expected toolchain_id='agentic-v3' from env, got {data['toolchain_id']!r}"
266	)
267
268	def test_II8_flag_overrides_env_for_agent_id(
269	self, repo: pathlib.Path
270	) -> None:
271	"""II8: --agent-id flag takes priority over MUSE_AGENT_ID env."""
272	env = {**_env(repo), "MUSE_AGENT_ID": "env-bot"}
273	data = _commit(repo, "-m", "override", "--agent-id", "flag-bot", env=env)
274	assert data["agent_id"] == "flag-bot", (
275	f"Expected flag-bot to override env-bot, got {data['agent_id']!r}"
276	)
277
278
279	# ---------------------------------------------------------------------------
280	# III Dry-run schema parity
281	# ---------------------------------------------------------------------------
282
283
284	class TestDryRunSchemaIII:
285	def test_III1_dry_run_has_same_required_keys(self, repo: pathlib.Path) -> None:
286	"""III1: dry-run output has the same required keys as the success path."""
287	result = _commit_raw(repo, "-m", "check", "--dry-run")
288	assert result.exit_code == 0, f"dry-run failed:\n{result.output}"
289	data = json.loads(result.output.strip())
290	missing = _REQUIRED_KEYS - data.keys()
291	assert not missing, f"dry-run missing required keys: {missing}"
292
293	def test_III2_dry_run_flag_is_true(self, repo: pathlib.Path) -> None:
294	"""III2: dry_run=true in dry-run output."""
295	result = _commit_raw(repo, "-m", "check", "--dry-run")
296	assert result.exit_code == 0
297	data = json.loads(result.output.strip())
298	assert data["dry_run"] is True
299
300	def test_III3_dry_run_false_on_real_commit(self, repo: pathlib.Path) -> None:
301	"""III3: dry_run=false in normal commit output."""
302	data = _commit(repo, "-m", "real commit")
303	assert data["dry_run"] is False
304
305	def test_III4_dry_run_has_model_id_and_toolchain_id(
306	self, repo: pathlib.Path
307	) -> None:
308	"""III4: dry-run output includes model_id and toolchain_id."""
309	result = _commit_raw(
310	repo, "-m", "preflight",
311	"--dry-run", "--model-id", "claude-opus-4", "--toolchain-id", "cursor",
312	)
313	assert result.exit_code == 0
314	data = json.loads(result.output.strip())
315	assert data["model_id"] == "claude-opus-4", (
316	f"model_id missing from dry-run output: {data}"
317	)
318	assert data["toolchain_id"] == "cursor", (
319	f"toolchain_id missing from dry-run output: {data}"
320	)
321
322	def test_III5_dry_run_clean_tree_exits_1(
323	self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
324	) -> None:
325	"""III5: dry-run on a clean tree exits 1 with clean=true in JSON.
326
327	Uses its own repo to ensure a truly clean workdir (all files committed).
328	muse init --domain code creates .museattributes/.museignore, so we commit
329	everything once first to establish HEAD == workdir, then dry-run.
330	"""
331	monkeypatch.chdir(tmp_path)
332	env = _env(tmp_path)
333	runner.invoke(cli, ["init", "--domain", "code"], env=env)
334	(tmp_path / "module.py").write_text("x = 1\n")
335	# Commit everything so HEAD == workdir (includes init-created files)
336	result = runner.invoke(cli, ["commit", "-m", "initial"], env=env)
337	assert result.exit_code == 0, result.output
338	# Now dry-run should detect nothing to commit
339	result = _commit_raw(tmp_path, "-m", "nothing", "--dry-run", env=env)
340	assert result.exit_code == 1, (
341	f"Expected exit 1 for dry-run on clean tree, got {result.exit_code}. "
342	f"Output: {result.output}"
343	)
344	data = json.loads(result.output.strip())
345	assert data.get("clean") is True, (
346	f"Expected clean=true in dry-run clean-tree JSON: {data}"
347	)
348	assert data.get("dry_run") is True
349
350
351	# ---------------------------------------------------------------------------
352	# IV File change accounting
353	# ---------------------------------------------------------------------------
354
355
356	class TestFileChangeAccountingIV:
357	def test_IV1_initial_commit_added_gte_1(self, repo: pathlib.Path) -> None:
358	"""IV1: Initial commit adds at least the tracked file."""
359	data = _commit(repo, "-m", "initial")
360	assert data["files_changed"]["added"] >= 1, (
361	f"Initial commit should add >=1 file: {data['files_changed']}"
362	)
363
364	def test_IV2_modification_increments_modified(
365	self, committed_repo: pathlib.Path
366	) -> None:
367	"""IV2: Editing an existing file increments modified, not added."""
368	env = _env(committed_repo)
369	(committed_repo / "module.py").write_text("def greet():\n return 'hi'\n")
370	runner.invoke(cli, ["code", "add", "module.py"], env=env)
371	data = _commit(committed_repo, "-m", "modify")
372	assert data["files_changed"]["modified"] == 1
373	assert data["files_changed"]["added"] == 0
374
375	def test_IV3_deletion_increments_deleted(
376	self, committed_repo: pathlib.Path
377	) -> None:
378	"""IV3: Removing a tracked file increments deleted.
379
380	Uses a second file so deleting one doesn't leave an empty workdir
381	(an empty manifest triggers "empty workdir" rather than a deletion).
382	"""
383	env = _env(committed_repo)
384	# Add a second file so there's still something tracked after the deletion.
385	(committed_repo / "extra.py").write_text("y = 2\n")
386	runner.invoke(cli, ["code", "add", "extra.py"], env=env)
387	runner.invoke(cli, ["commit", "-m", "add extra"], env=env)
388	# Now delete extra.py — module.py remains, so workdir is non-empty.
389	(committed_repo / "extra.py").unlink()
390	runner.invoke(cli, ["code", "add", "extra.py"], env=env)
391	data = _commit(committed_repo, "-m", "remove extra")
392	assert data["files_changed"]["deleted"] == 1
393	assert data["files_changed"]["added"] == 0
394
395	def test_IV4_total_always_matches_sum(
396	self, committed_repo: pathlib.Path
397	) -> None:
398	"""IV4: files_changed.total = added + modified + deleted, always."""
399	env = _env(committed_repo)
400	(committed_repo / "new.py").write_text("x = 1\n")
401	(committed_repo / "module.py").write_text("def greet():\n return 'hi'\n")
402	runner.invoke(cli, ["code", "add", "new.py"], env=env)
403	runner.invoke(cli, ["code", "add", "module.py"], env=env)
404	data = _commit(committed_repo, "-m", "mixed")
405	fc = data["files_changed"]
406	expected = fc["added"] + fc["modified"] + fc["deleted"]
407	assert fc["total"] == expected, (
408	f"total {fc['total']} != sum {expected}: {fc}"
409	)
410
411
412	# ---------------------------------------------------------------------------
413	# V Error paths
414	# ---------------------------------------------------------------------------
415
416
417	class TestErrorPathsV:
418	def test_V1_missing_message_exits_1_with_json_error(
419	self, repo: pathlib.Path
420	) -> None:
421	"""V1: Missing -m exits 1 with JSON error {"error": "no_message"}."""
422	result = _commit_raw(repo) # no -m
423	assert result.exit_code == 1
424	json_line = next(
425	(l for l in result.output.strip().splitlines() if l.startswith("{")),
426	None,
427	)
428	assert json_line is not None, f"No JSON in output: {result.output!r}"
429	data = json.loads(json_line)
430	assert data["error"] == "no_message", (
431	f"Expected error='no_message', got {data.get('error')!r}"
432	)
433
434	def test_V2_clean_tree_json_response(
435	self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
436	) -> None:
437	"""V2: --json on a clean tree (no --dry-run) exits 0 with clean=true JSON.
438
439	An agent using ``muse commit --json -m "msg"`` on a clean repo must get
440	a machine-readable response — not a silent text-only "Nothing to commit".
441	"""
442	monkeypatch.chdir(tmp_path)
443	env = _env(tmp_path)
444	runner.invoke(cli, ["init", "--domain", "code"], env=env)
445	(tmp_path / "module.py").write_text("x = 1\n")
446	# Commit everything to establish HEAD == workdir
447	result = runner.invoke(cli, ["commit", "-m", "initial"], env=env)
448	assert result.exit_code == 0, result.output
449	# Second commit on clean tree — must return JSON
450	result = _commit_raw(tmp_path, "-m", "nothing", env=env)
451	assert result.exit_code == 0
452	json_line = next(
453	(l for l in result.output.strip().splitlines() if l.startswith("{")),
454	None,
455	)
456	assert json_line is not None, (
457	f"No JSON on stdout for clean-tree --json commit: {result.output!r}"
458	)
459	data = json.loads(json_line)
460	assert data.get("clean") is True, (
461	f"Expected clean=true in clean-tree commit JSON: {data}"
462	)

File History 1 commit

sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 7 days ago

function _env

function _commit

function _commit_raw

function repo

function committed_repo

class TestSchemaInvariantsI

function test_I1_all_required_keys_present

function test_I2_commit_id_sha256_prefixed

function test_I3_committed_at_is_iso8601_with_tz

function test_I4_sem_ver_bump_valid_enum

function test_I5_breaking_changes_always_list

function test_I6_files_changed_has_all_keys

function test_I7_files_changed_total_is_sum

class TestAgentProvenanceII

function test_II1_agent_id_in_output

function test_II2_model_id_in_output

function test_II3_toolchain_id_in_output

function test_II4_agent_id_empty_string_for_human

function test_II5_model_id_empty_string_for_human

function test_II6_model_id_from_env

function test_II7_toolchain_id_from_env

function test_II8_flag_overrides_env_for_agent_id

class TestDryRunSchemaIII

function test_III1_dry_run_has_same_required_keys

function test_III2_dry_run_flag_is_true

function test_III3_dry_run_false_on_real_commit

function test_III4_dry_run_has_model_id_and_toolchain_id

function test_III5_dry_run_clean_tree_exits_1

class TestFileChangeAccountingIV

function test_IV1_initial_commit_added_gte_1

function test_IV2_modification_increments_modified

function test_IV3_deletion_increments_deleted

function test_IV4_total_always_matches_sum

class TestErrorPathsV

function test_V1_missing_message_exits_1_with_json_error

function test_V2_clean_tree_json_response

Pathtests/test_commit_json_schema.py

Lines462

Size19.3 KB

LangPython

Refsha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b

Object ID

sha256:1a25395180f7379ac253b909e4dbd711787a6b63696ea7d550a8c36a6ea61844…

Last commit

sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b

fix: try fetch/presign before fetch/mpack to avoi…

7 days ago

Quick links

Blame History