gabriel / muse public
test_log_json_schema.py python
456 lines 18.8 KB
Raw
sha256:5c98ba9dd33607ba1557d7c03c64020e71c27c1e7bbaa984e7a91f23d5297b14 feat: add signer_public_key to muse log --json output (VII … Sonnet 4.6 20 days ago
1 """Tests for the canonical ``muse log --json`` schema.
2
3 Every commit object in the commits array must emit the same shape.
4 Agents rely on this stability — missing fields break provenance tracking
5 and force fragile ``dict.get`` guards.
6
7 Canonical schema
8 ----------------
9 ::
10
11 {
12 "truncated": bool,
13 "commits": [
14 {
15 "commit_id": str, // sha256:-prefixed
16 "branch": str,
17 "message": str,
18 "author": str, // "" when user.handle not configured
19 "agent_id": str, // "" when not an agent commit
20 "model_id": str, // "" when not an agent commit
21 "committed_at": str, // ISO-8601
22 "parent_commit_id": str | null, // sha256:-prefixed or null
23 "parent2_commit_id": str | null, // sha256:-prefixed or null (merge)
24 "snapshot_id": str | null, // sha256:-prefixed
25 "sem_ver_bump": str | null,
26 "breaking_changes": [str, ...],
27 "metadata": {str: ...},
28 "files_added": [str, ...], // always populated in --json mode
29 "files_removed": [str, ...], // always populated in --json mode
30 "files_modified": [str, ...], // always populated in --json mode
31 "structured_delta": dict | null // symbol-level diff; null for non-code commits
32 }
33 ]
34 }
35
36 Coverage matrix
37 ---------------
38 I Schema invariants (top-level shape)
39 I1 Top-level keys: truncated + commits always present
40 I2 Each commit has all required keys
41 I3 commit_id is sha256:-prefixed
42 I4 parent_commit_id is sha256:-prefixed or null
43 I5 snapshot_id is sha256:-prefixed
44
45 II File lists — always populated in --json mode, no --stat needed
46 II1 files_added populated for a commit that added a file
47 II2 files_modified populated for a commit that modified a file
48 II3 files_removed populated for a commit that deleted a file
49 II4 Initial commit: files_added non-empty, files_removed/modified empty
50
51 III Agent provenance fields
52 III1 agent_id present (empty string for non-agent commits)
53 III2 model_id present (empty string for non-agent commits)
54 III3 agent_id populated when --agent-id passed to commit
55 III4 model_id populated when --model-id passed to commit
56
57 IV Filters
58 IV1 --author filter returns only matching commits
59 IV2 --author filter is case-insensitive substring match
60 IV3 -n / --limit caps the number of commits returned
61 IV4 truncated=true when limit is hit
62 IV5 truncated=false when all commits fit
63
64 V Edge cases
65 V1 Single commit (initial): parent_commit_id is null
66 V2 Merge commit: parent2_commit_id is sha256:-prefixed (not null)
67
68 VI structured_delta
69 VI1 structured_delta key always present (never absent from commit object)
70 VI2 structured_delta is a dict with an "ops" key for a code-file commit
71 VI3 structured_delta is None when the commit produces no code-intelligence ops
72 """
73
74 from __future__ import annotations
75 from collections.abc import Mapping
76
77 import json
78 import pathlib
79
80 import pytest
81
82 from tests.cli_test_helper import CliRunner
83
84 cli = None
85 runner = CliRunner()
86
87 _REQUIRED_COMMIT_KEYS = {
88 "commit_id", "branch", "message", "author",
89 "agent_id", "model_id",
90 "committed_at", "parent_commit_id", "parent2_commit_id",
91 "snapshot_id", "sem_ver_bump", "breaking_changes", "metadata",
92 "files_added", "files_removed", "files_modified",
93 "structured_delta",
94 }
95
96 _REQUIRED_TOP_KEYS = {"truncated", "commits"}
97
98
99 def _env(root: pathlib.Path) -> Mapping[str, str]:
100 return {"MUSE_REPO_ROOT": str(root)}
101
102
103 def _log_json(root: pathlib.Path, *extra_args: str) -> Mapping[str, object]:
104 result = runner.invoke(cli, ["log", "--json"] + list(extra_args), env=_env(root))
105 assert result.exit_code == 0, f"log --json failed: {result.output}"
106 return json.loads(result.output.strip())
107
108
109 @pytest.fixture()
110 def single_commit_repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
111 """Code-domain repo with exactly one commit."""
112 monkeypatch.chdir(tmp_path)
113 result = runner.invoke(cli, ["init", "--domain", "code"], env=_env(tmp_path))
114 assert result.exit_code == 0, result.output
115 (tmp_path / "main.py").write_text("x = 1\n")
116 runner.invoke(cli, ["code", "add", "main.py"], env=_env(tmp_path))
117 result = runner.invoke(cli, ["commit", "-m", "initial"], env=_env(tmp_path))
118 assert result.exit_code == 0, result.output
119 return tmp_path
120
121
122 @pytest.fixture()
123 def multi_commit_repo(single_commit_repo: pathlib.Path) -> pathlib.Path:
124 """Repo with 3 commits: add, modify, delete."""
125 root = single_commit_repo
126 env = _env(root)
127
128 # Commit 2: modify main.py + add extra.py
129 (root / "main.py").write_text("x = 2\n")
130 (root / "extra.py").write_text("e = 1\n")
131 runner.invoke(cli, ["code", "add", "main.py", "extra.py"], env=env)
132 runner.invoke(cli, ["commit", "-m", "modify and add"], env=env)
133
134 # Commit 3: delete extra.py
135 (root / "extra.py").unlink()
136 runner.invoke(cli, ["code", "add", "extra.py"], env=env)
137 runner.invoke(cli, ["commit", "-m", "delete extra"], env=env)
138
139 return root
140
141
142 # ---------------------------------------------------------------------------
143 # I Schema invariants
144 # ---------------------------------------------------------------------------
145
146
147 class TestSchemaInvariantsI:
148 def test_I1_top_level_keys(self, single_commit_repo: pathlib.Path) -> None:
149 """I1: Top-level always has truncated + commits."""
150 data = _log_json(single_commit_repo)
151 assert _REQUIRED_TOP_KEYS.issubset(data.keys()), (
152 f"Missing top-level keys: {_REQUIRED_TOP_KEYS - data.keys()}"
153 )
154 assert isinstance(data["truncated"], bool)
155 assert isinstance(data["commits"], list)
156
157 def test_I2_each_commit_has_all_required_keys(self, single_commit_repo: pathlib.Path) -> None:
158 """I2: Every commit object has all required keys."""
159 data = _log_json(single_commit_repo)
160 assert len(data["commits"]) >= 1
161 for c in data["commits"]:
162 missing = _REQUIRED_COMMIT_KEYS - c.keys()
163 assert not missing, f"Commit missing keys: {missing}"
164
165 def test_I3_commit_id_is_sha256_prefixed(self, single_commit_repo: pathlib.Path) -> None:
166 """I3: commit_id is sha256:-prefixed."""
167 data = _log_json(single_commit_repo)
168 for c in data["commits"]:
169 assert c["commit_id"].startswith("sha256:"), (
170 f"commit_id must be sha256:-prefixed, got {c['commit_id']!r}"
171 )
172
173 def test_I4_parent_commit_id_is_sha256_prefixed_or_null(
174 self, multi_commit_repo: pathlib.Path
175 ) -> None:
176 """I4: parent_commit_id is sha256:-prefixed (non-null) or null (initial commit)."""
177 data = _log_json(multi_commit_repo)
178 commits = data["commits"]
179 # Most recent commits (non-initial) must have sha256:-prefixed parent
180 for c in commits[:-1]:
181 assert c["parent_commit_id"] is not None
182 assert c["parent_commit_id"].startswith("sha256:"), (
183 f"parent_commit_id must be sha256:-prefixed, got {c['parent_commit_id']!r}"
184 )
185 # Initial commit: parent is null
186 initial = commits[-1]
187 assert initial["parent_commit_id"] is None
188
189 def test_I5_snapshot_id_is_sha256_prefixed(self, single_commit_repo: pathlib.Path) -> None:
190 """I5: snapshot_id is sha256:-prefixed when present."""
191 data = _log_json(single_commit_repo)
192 for c in data["commits"]:
193 if c["snapshot_id"] is not None:
194 assert c["snapshot_id"].startswith("sha256:"), (
195 f"snapshot_id must be sha256:-prefixed, got {c['snapshot_id']!r}"
196 )
197
198
199 # ---------------------------------------------------------------------------
200 # II File lists — always populated in --json mode
201 # ---------------------------------------------------------------------------
202
203
204 class TestFileListsII:
205 def test_II1_files_added_populated_no_stat_flag(
206 self, single_commit_repo: pathlib.Path
207 ) -> None:
208 """II1: files_added populated in --json mode without --stat."""
209 data = _log_json(single_commit_repo)
210 # The initial commit added main.py
211 initial = data["commits"][-1]
212 assert "main.py" in initial["files_added"], (
213 f"Expected main.py in files_added, got {initial['files_added']}"
214 )
215
216 def test_II2_files_modified_populated(self, multi_commit_repo: pathlib.Path) -> None:
217 """II2: files_modified populated for a modify commit."""
218 data = _log_json(multi_commit_repo)
219 commits = data["commits"]
220 # Second-most-recent commit modified main.py (and added extra.py)
221 modify_commit = commits[1] # commits are newest-first
222 assert "main.py" in modify_commit["files_modified"], (
223 f"Expected main.py in files_modified, got {modify_commit}"
224 )
225
226 def test_II3_files_removed_populated(self, multi_commit_repo: pathlib.Path) -> None:
227 """II3: files_removed populated for a delete commit."""
228 data = _log_json(multi_commit_repo)
229 # Most recent commit deleted extra.py
230 delete_commit = data["commits"][0]
231 assert "extra.py" in delete_commit["files_removed"], (
232 f"Expected extra.py in files_removed, got {delete_commit}"
233 )
234
235 def test_II4_initial_commit_files_removed_and_modified_empty(
236 self, single_commit_repo: pathlib.Path
237 ) -> None:
238 """II4: Initial commit: files_removed and files_modified are empty lists."""
239 data = _log_json(single_commit_repo)
240 initial = data["commits"][-1]
241 assert initial["files_removed"] == []
242 assert initial["files_modified"] == []
243
244
245 # ---------------------------------------------------------------------------
246 # III Agent provenance fields
247 # ---------------------------------------------------------------------------
248
249
250 class TestAgentProvenanceIII:
251 def test_III1_agent_id_present_empty_for_non_agent_commit(
252 self, single_commit_repo: pathlib.Path
253 ) -> None:
254 """III1: agent_id is always present; empty string for non-agent commits."""
255 data = _log_json(single_commit_repo)
256 for c in data["commits"]:
257 assert "agent_id" in c, "agent_id must always be present"
258 assert isinstance(c["agent_id"], str)
259
260 def test_III2_model_id_present_empty_for_non_agent_commit(
261 self, single_commit_repo: pathlib.Path
262 ) -> None:
263 """III2: model_id is always present; empty string for non-agent commits."""
264 data = _log_json(single_commit_repo)
265 for c in data["commits"]:
266 assert "model_id" in c, "model_id must always be present"
267 assert isinstance(c["model_id"], str)
268
269 def test_III3_agent_id_populated_when_passed_to_commit(
270 self, single_commit_repo: pathlib.Path
271 ) -> None:
272 """III3: agent_id reflects --agent-id passed at commit time."""
273 root = single_commit_repo
274 env = _env(root)
275 (root / "agent_file.py").write_text("a = 1\n")
276 runner.invoke(cli, ["code", "add", "agent_file.py"], env=env)
277 result = runner.invoke(
278 cli,
279 ["commit", "-m", "agent commit", "--agent-id", "test-agent-42"],
280 env=env,
281 )
282 assert result.exit_code == 0, result.output
283
284 data = _log_json(root, "-n", "1")
285 c = data["commits"][0]
286 assert c["agent_id"] == "test-agent-42", (
287 f"Expected agent_id='test-agent-42', got {c['agent_id']!r}"
288 )
289
290 def test_III4_model_id_populated_when_passed_to_commit(
291 self, single_commit_repo: pathlib.Path
292 ) -> None:
293 """III4: model_id reflects --model-id passed at commit time."""
294 root = single_commit_repo
295 env = _env(root)
296 (root / "model_file.py").write_text("m = 1\n")
297 runner.invoke(cli, ["code", "add", "model_file.py"], env=env)
298 result = runner.invoke(
299 cli,
300 ["commit", "-m", "model commit", "--model-id", "claude-sonnet-4-6"],
301 env=env,
302 )
303 assert result.exit_code == 0, result.output
304
305 data = _log_json(root, "-n", "1")
306 c = data["commits"][0]
307 assert c["model_id"] == "claude-sonnet-4-6", (
308 f"Expected model_id='claude-sonnet-4-6', got {c['model_id']!r}"
309 )
310
311
312 # ---------------------------------------------------------------------------
313 # IV Filters
314 # ---------------------------------------------------------------------------
315
316
317 class TestFiltersIV:
318 def test_IV1_author_filter_matches_commits(
319 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
320 ) -> None:
321 """IV1: --author filter returns only commits matching the author."""
322 monkeypatch.chdir(tmp_path)
323 env = _env(tmp_path)
324 runner.invoke(cli, ["init", "--domain", "code"], env=env)
325 (tmp_path / "a.py").write_text("a\n")
326 runner.invoke(cli, ["code", "add", "a.py"], env=env)
327 runner.invoke(cli, ["commit", "-m", "gabriel commit", "--author", "gabriel"], env=env)
328
329 (tmp_path / "b.py").write_text("b\n")
330 runner.invoke(cli, ["code", "add", "b.py"], env=env)
331 runner.invoke(cli, ["commit", "-m", "agent commit", "--author", "bot-agent"], env=env)
332
333 data = _log_json(tmp_path, "--author", "gabriel")
334 assert all("gabriel" in c["author"].lower() for c in data["commits"]), (
335 f"--author filter returned non-matching commits: {[c['author'] for c in data['commits']]}"
336 )
337 assert not any(c["author"] == "bot-agent" for c in data["commits"])
338
339 def test_IV2_author_filter_is_case_insensitive(
340 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
341 ) -> None:
342 """IV2: --author filter is a case-insensitive substring match."""
343 monkeypatch.chdir(tmp_path)
344 env = _env(tmp_path)
345 runner.invoke(cli, ["init", "--domain", "code"], env=env)
346 (tmp_path / "x.py").write_text("x\n")
347 runner.invoke(cli, ["code", "add", "x.py"], env=env)
348 runner.invoke(cli, ["commit", "-m", "msg", "--author", "Gabriel"], env=env)
349
350 data_lower = _log_json(tmp_path, "--author", "gabriel")
351 data_upper = _log_json(tmp_path, "--author", "GABRIEL")
352 assert len(data_lower["commits"]) == len(data_upper["commits"])
353
354 def test_IV3_limit_caps_commits(self, multi_commit_repo: pathlib.Path) -> None:
355 """IV3: -n caps the number of commits returned."""
356 data = _log_json(multi_commit_repo, "-n", "1")
357 assert len(data["commits"]) == 1
358
359 def test_IV4_truncated_true_when_limit_hit(self, multi_commit_repo: pathlib.Path) -> None:
360 """IV4: truncated=true when -n limit is reached before exhausting history."""
361 data = _log_json(multi_commit_repo, "-n", "1")
362 assert data["truncated"] is True
363
364 def test_IV5_truncated_false_when_all_fit(self, single_commit_repo: pathlib.Path) -> None:
365 """IV5: truncated=false when limit is not reached (all commits returned)."""
366 data = _log_json(single_commit_repo)
367 assert data["truncated"] is False
368
369
370 # ---------------------------------------------------------------------------
371 # V Edge cases
372 # ---------------------------------------------------------------------------
373
374
375 class TestEdgeCasesV:
376 def test_V1_initial_commit_parent_is_null(self, single_commit_repo: pathlib.Path) -> None:
377 """V1: Initial commit has parent_commit_id=null and parent2_commit_id=null."""
378 data = _log_json(single_commit_repo)
379 initial = data["commits"][-1]
380 assert initial["parent_commit_id"] is None
381 assert initial["parent2_commit_id"] is None
382
383 def test_V2_merge_commit_has_two_parents(
384 self, single_commit_repo: pathlib.Path
385 ) -> None:
386 """V2: Merge commit has both parent_commit_id and parent2_commit_id set."""
387 root = single_commit_repo
388 env = _env(root)
389
390 # Create and commit on a feature branch
391 runner.invoke(cli, ["checkout", "-b", "feat/test"], env=env)
392 (root / "feat.py").write_text("f = 1\n")
393 runner.invoke(cli, ["code", "add", "feat.py"], env=env)
394 runner.invoke(cli, ["commit", "-m", "feat commit"], env=env)
395
396 # Merge back into main — use --no-ff to force a merge commit
397 # (a fast-forward would just move the pointer, creating no merge commit).
398 runner.invoke(cli, ["checkout", "main"], env=env)
399 merge_result = runner.invoke(cli, ["merge", "--no-ff", "feat/test"], env=env)
400 assert merge_result.exit_code == 0, merge_result.output
401
402 data = _log_json(root, "-n", "1")
403 merge_commit = data["commits"][0]
404 assert merge_commit["parent2_commit_id"] is not None, (
405 "Merge commit must have parent2_commit_id set"
406 )
407 assert merge_commit["parent2_commit_id"].startswith("sha256:"), (
408 f"parent2_commit_id must be sha256:-prefixed, got {merge_commit['parent2_commit_id']!r}"
409 )
410
411
412 # ---------------------------------------------------------------------------
413 # VI structured_delta
414 # ---------------------------------------------------------------------------
415
416
417 class TestStructuredDeltaVI:
418 def test_VI1_structured_delta_key_always_present(
419 self, multi_commit_repo: pathlib.Path
420 ) -> None:
421 """VI1: structured_delta is always present in every commit object (never absent)."""
422 data = _log_json(multi_commit_repo)
423 assert len(data["commits"]) >= 1
424 for c in data["commits"]:
425 assert "structured_delta" in c, (
426 f"structured_delta missing from commit {c.get('commit_id', '?')!r}"
427 )
428
429 def test_VI2_structured_delta_is_dict_with_ops_for_code_commit(
430 self, single_commit_repo: pathlib.Path
431 ) -> None:
432 """VI2: structured_delta is a dict with an 'ops' list for a Python file commit."""
433 data = _log_json(single_commit_repo)
434 initial = data["commits"][-1]
435 delta = initial["structured_delta"]
436 assert isinstance(delta, dict), (
437 f"Expected structured_delta to be a dict for a code commit, got {type(delta).__name__}"
438 )
439 assert "ops" in delta, (
440 f"structured_delta dict must have an 'ops' key, got keys: {list(delta.keys())}"
441 )
442 assert isinstance(delta["ops"], list), (
443 f"structured_delta['ops'] must be a list, got {type(delta['ops']).__name__}"
444 )
445
446 def test_VI3_structured_delta_type_is_dict_or_none(
447 self, multi_commit_repo: pathlib.Path
448 ) -> None:
449 """VI3: structured_delta is always a dict or None — never another type."""
450 data = _log_json(multi_commit_repo)
451 for c in data["commits"]:
452 delta = c["structured_delta"]
453 assert delta is None or isinstance(delta, dict), (
454 f"structured_delta must be dict or None, got {type(delta).__name__} "
455 f"on commit {c.get('commit_id', '?')!r}"
456 )
File History 1 commit
sha256:5c98ba9dd33607ba1557d7c03c64020e71c27c1e7bbaa984e7a91f23d5297b14 feat: add signer_public_key to muse log --json output (VII … Sonnet 4.6 20 days ago