gabriel / muse public
test_log_json_schema.py python
531 lines 21.9 KB
Raw
sha256:5c98ba9dd33607ba1557d7c03c64020e71c27c1e7bbaa984e7a91f23d5297b14 feat: add signer_public_key to muse log --json output (VII … Sonnet 4.6 21 days ago
1 """Tests for the canonical ``muse log --json`` schema.
2
3 Every commit object in the commits array must emit the same shape.
4 Agents rely on this stability — missing fields break provenance tracking
5 and force fragile ``dict.get`` guards.
6
7 Canonical schema
8 ----------------
9 ::
10
11 {
12 "truncated": bool,
13 "commits": [
14 {
15 "commit_id": str, // sha256:-prefixed
16 "branch": str,
17 "message": str,
18 "author": str, // "" when user.handle not configured
19 "agent_id": str, // "" when not an agent commit
20 "model_id": str, // "" when not an agent commit
21 "committed_at": str, // ISO-8601
22 "parent_commit_id": str | null, // sha256:-prefixed or null
23 "parent2_commit_id": str | null, // sha256:-prefixed or null (merge)
24 "snapshot_id": str | null, // sha256:-prefixed
25 "sem_ver_bump": str | null,
26 "breaking_changes": [str, ...],
27 "metadata": {str: ...},
28 "files_added": [str, ...], // always populated in --json mode
29 "files_removed": [str, ...], // always populated in --json mode
30 "files_modified": [str, ...], // always populated in --json mode
31 "structured_delta": dict | null // symbol-level diff; null for non-code commits
32 "signer_public_key": str // "" for unsigned; "ed25519:<b64url>" when --sign
33 }
34 ]
35 }
36
37 Coverage matrix
38 ---------------
39 I Schema invariants (top-level shape)
40 I1 Top-level keys: truncated + commits always present
41 I2 Each commit has all required keys
42 I3 commit_id is sha256:-prefixed
43 I4 parent_commit_id is sha256:-prefixed or null
44 I5 snapshot_id is sha256:-prefixed
45
46 II File lists — always populated in --json mode, no --stat needed
47 II1 files_added populated for a commit that added a file
48 II2 files_modified populated for a commit that modified a file
49 II3 files_removed populated for a commit that deleted a file
50 II4 Initial commit: files_added non-empty, files_removed/modified empty
51
52 III Agent provenance fields
53 III1 agent_id present (empty string for non-agent commits)
54 III2 model_id present (empty string for non-agent commits)
55 III3 agent_id populated when --agent-id passed to commit
56 III4 model_id populated when --model-id passed to commit
57
58 IV Filters
59 IV1 --author filter returns only matching commits
60 IV2 --author filter is case-insensitive substring match
61 IV3 -n / --limit caps the number of commits returned
62 IV4 truncated=true when limit is hit
63 IV5 truncated=false when all commits fit
64
65 V Edge cases
66 V1 Single commit (initial): parent_commit_id is null
67 V2 Merge commit: parent2_commit_id is sha256:-prefixed (not null)
68
69 VI structured_delta
70 VI1 structured_delta key always present (never absent from commit object)
71 VI2 structured_delta is a dict with an "ops" key for a code-file commit
72 VI3 structured_delta is None when the commit produces no code-intelligence ops
73
74 VII Signing provenance fields
75 VII1 signer_public_key present (empty string for unsigned commits)
76 VII2 signer_public_key round-trips correctly through CommitRecord → log --json
77 """
78
79 from __future__ import annotations
80 from collections.abc import Mapping
81
82 import json
83 import pathlib
84
85 import pytest
86
87 from tests.cli_test_helper import CliRunner
88
89 cli = None
90 runner = CliRunner()
91
92 _REQUIRED_COMMIT_KEYS = {
93 "commit_id", "branch", "message", "author",
94 "agent_id", "model_id",
95 "committed_at", "parent_commit_id", "parent2_commit_id",
96 "snapshot_id", "sem_ver_bump", "breaking_changes", "metadata",
97 "files_added", "files_removed", "files_modified",
98 "structured_delta",
99 "signer_public_key",
100 }
101
102 _REQUIRED_TOP_KEYS = {"truncated", "commits"}
103
104
105 def _env(root: pathlib.Path) -> Mapping[str, str]:
106 return {"MUSE_REPO_ROOT": str(root)}
107
108
109 def _log_json(root: pathlib.Path, *extra_args: str) -> Mapping[str, object]:
110 result = runner.invoke(cli, ["log", "--json"] + list(extra_args), env=_env(root))
111 assert result.exit_code == 0, f"log --json failed: {result.output}"
112 return json.loads(result.output.strip())
113
114
115 @pytest.fixture()
116 def single_commit_repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
117 """Code-domain repo with exactly one commit."""
118 monkeypatch.chdir(tmp_path)
119 result = runner.invoke(cli, ["init", "--domain", "code"], env=_env(tmp_path))
120 assert result.exit_code == 0, result.output
121 (tmp_path / "main.py").write_text("x = 1\n")
122 runner.invoke(cli, ["code", "add", "main.py"], env=_env(tmp_path))
123 result = runner.invoke(cli, ["commit", "-m", "initial"], env=_env(tmp_path))
124 assert result.exit_code == 0, result.output
125 return tmp_path
126
127
128 @pytest.fixture()
129 def multi_commit_repo(single_commit_repo: pathlib.Path) -> pathlib.Path:
130 """Repo with 3 commits: add, modify, delete."""
131 root = single_commit_repo
132 env = _env(root)
133
134 # Commit 2: modify main.py + add extra.py
135 (root / "main.py").write_text("x = 2\n")
136 (root / "extra.py").write_text("e = 1\n")
137 runner.invoke(cli, ["code", "add", "main.py", "extra.py"], env=env)
138 runner.invoke(cli, ["commit", "-m", "modify and add"], env=env)
139
140 # Commit 3: delete extra.py
141 (root / "extra.py").unlink()
142 runner.invoke(cli, ["code", "add", "extra.py"], env=env)
143 runner.invoke(cli, ["commit", "-m", "delete extra"], env=env)
144
145 return root
146
147
148 # ---------------------------------------------------------------------------
149 # I Schema invariants
150 # ---------------------------------------------------------------------------
151
152
153 class TestSchemaInvariantsI:
154 def test_I1_top_level_keys(self, single_commit_repo: pathlib.Path) -> None:
155 """I1: Top-level always has truncated + commits."""
156 data = _log_json(single_commit_repo)
157 assert _REQUIRED_TOP_KEYS.issubset(data.keys()), (
158 f"Missing top-level keys: {_REQUIRED_TOP_KEYS - data.keys()}"
159 )
160 assert isinstance(data["truncated"], bool)
161 assert isinstance(data["commits"], list)
162
163 def test_I2_each_commit_has_all_required_keys(self, single_commit_repo: pathlib.Path) -> None:
164 """I2: Every commit object has all required keys."""
165 data = _log_json(single_commit_repo)
166 assert len(data["commits"]) >= 1
167 for c in data["commits"]:
168 missing = _REQUIRED_COMMIT_KEYS - c.keys()
169 assert not missing, f"Commit missing keys: {missing}"
170
171 def test_I3_commit_id_is_sha256_prefixed(self, single_commit_repo: pathlib.Path) -> None:
172 """I3: commit_id is sha256:-prefixed."""
173 data = _log_json(single_commit_repo)
174 for c in data["commits"]:
175 assert c["commit_id"].startswith("sha256:"), (
176 f"commit_id must be sha256:-prefixed, got {c['commit_id']!r}"
177 )
178
179 def test_I4_parent_commit_id_is_sha256_prefixed_or_null(
180 self, multi_commit_repo: pathlib.Path
181 ) -> None:
182 """I4: parent_commit_id is sha256:-prefixed (non-null) or null (initial commit)."""
183 data = _log_json(multi_commit_repo)
184 commits = data["commits"]
185 # Most recent commits (non-initial) must have sha256:-prefixed parent
186 for c in commits[:-1]:
187 assert c["parent_commit_id"] is not None
188 assert c["parent_commit_id"].startswith("sha256:"), (
189 f"parent_commit_id must be sha256:-prefixed, got {c['parent_commit_id']!r}"
190 )
191 # Initial commit: parent is null
192 initial = commits[-1]
193 assert initial["parent_commit_id"] is None
194
195 def test_I5_snapshot_id_is_sha256_prefixed(self, single_commit_repo: pathlib.Path) -> None:
196 """I5: snapshot_id is sha256:-prefixed when present."""
197 data = _log_json(single_commit_repo)
198 for c in data["commits"]:
199 if c["snapshot_id"] is not None:
200 assert c["snapshot_id"].startswith("sha256:"), (
201 f"snapshot_id must be sha256:-prefixed, got {c['snapshot_id']!r}"
202 )
203
204
205 # ---------------------------------------------------------------------------
206 # II File lists — always populated in --json mode
207 # ---------------------------------------------------------------------------
208
209
210 class TestFileListsII:
211 def test_II1_files_added_populated_no_stat_flag(
212 self, single_commit_repo: pathlib.Path
213 ) -> None:
214 """II1: files_added populated in --json mode without --stat."""
215 data = _log_json(single_commit_repo)
216 # The initial commit added main.py
217 initial = data["commits"][-1]
218 assert "main.py" in initial["files_added"], (
219 f"Expected main.py in files_added, got {initial['files_added']}"
220 )
221
222 def test_II2_files_modified_populated(self, multi_commit_repo: pathlib.Path) -> None:
223 """II2: files_modified populated for a modify commit."""
224 data = _log_json(multi_commit_repo)
225 commits = data["commits"]
226 # Second-most-recent commit modified main.py (and added extra.py)
227 modify_commit = commits[1] # commits are newest-first
228 assert "main.py" in modify_commit["files_modified"], (
229 f"Expected main.py in files_modified, got {modify_commit}"
230 )
231
232 def test_II3_files_removed_populated(self, multi_commit_repo: pathlib.Path) -> None:
233 """II3: files_removed populated for a delete commit."""
234 data = _log_json(multi_commit_repo)
235 # Most recent commit deleted extra.py
236 delete_commit = data["commits"][0]
237 assert "extra.py" in delete_commit["files_removed"], (
238 f"Expected extra.py in files_removed, got {delete_commit}"
239 )
240
241 def test_II4_initial_commit_files_removed_and_modified_empty(
242 self, single_commit_repo: pathlib.Path
243 ) -> None:
244 """II4: Initial commit: files_removed and files_modified are empty lists."""
245 data = _log_json(single_commit_repo)
246 initial = data["commits"][-1]
247 assert initial["files_removed"] == []
248 assert initial["files_modified"] == []
249
250
251 # ---------------------------------------------------------------------------
252 # III Agent provenance fields
253 # ---------------------------------------------------------------------------
254
255
256 class TestAgentProvenanceIII:
257 def test_III1_agent_id_present_empty_for_non_agent_commit(
258 self, single_commit_repo: pathlib.Path
259 ) -> None:
260 """III1: agent_id is always present; empty string for non-agent commits."""
261 data = _log_json(single_commit_repo)
262 for c in data["commits"]:
263 assert "agent_id" in c, "agent_id must always be present"
264 assert isinstance(c["agent_id"], str)
265
266 def test_III2_model_id_present_empty_for_non_agent_commit(
267 self, single_commit_repo: pathlib.Path
268 ) -> None:
269 """III2: model_id is always present; empty string for non-agent commits."""
270 data = _log_json(single_commit_repo)
271 for c in data["commits"]:
272 assert "model_id" in c, "model_id must always be present"
273 assert isinstance(c["model_id"], str)
274
275 def test_III3_agent_id_populated_when_passed_to_commit(
276 self, single_commit_repo: pathlib.Path
277 ) -> None:
278 """III3: agent_id reflects --agent-id passed at commit time."""
279 root = single_commit_repo
280 env = _env(root)
281 (root / "agent_file.py").write_text("a = 1\n")
282 runner.invoke(cli, ["code", "add", "agent_file.py"], env=env)
283 result = runner.invoke(
284 cli,
285 ["commit", "-m", "agent commit", "--agent-id", "test-agent-42"],
286 env=env,
287 )
288 assert result.exit_code == 0, result.output
289
290 data = _log_json(root, "-n", "1")
291 c = data["commits"][0]
292 assert c["agent_id"] == "test-agent-42", (
293 f"Expected agent_id='test-agent-42', got {c['agent_id']!r}"
294 )
295
296 def test_III4_model_id_populated_when_passed_to_commit(
297 self, single_commit_repo: pathlib.Path
298 ) -> None:
299 """III4: model_id reflects --model-id passed at commit time."""
300 root = single_commit_repo
301 env = _env(root)
302 (root / "model_file.py").write_text("m = 1\n")
303 runner.invoke(cli, ["code", "add", "model_file.py"], env=env)
304 result = runner.invoke(
305 cli,
306 ["commit", "-m", "model commit", "--model-id", "claude-sonnet-4-6"],
307 env=env,
308 )
309 assert result.exit_code == 0, result.output
310
311 data = _log_json(root, "-n", "1")
312 c = data["commits"][0]
313 assert c["model_id"] == "claude-sonnet-4-6", (
314 f"Expected model_id='claude-sonnet-4-6', got {c['model_id']!r}"
315 )
316
317
318 # ---------------------------------------------------------------------------
319 # IV Filters
320 # ---------------------------------------------------------------------------
321
322
323 class TestFiltersIV:
324 def test_IV1_author_filter_matches_commits(
325 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
326 ) -> None:
327 """IV1: --author filter returns only commits matching the author."""
328 monkeypatch.chdir(tmp_path)
329 env = _env(tmp_path)
330 runner.invoke(cli, ["init", "--domain", "code"], env=env)
331 (tmp_path / "a.py").write_text("a\n")
332 runner.invoke(cli, ["code", "add", "a.py"], env=env)
333 runner.invoke(cli, ["commit", "-m", "gabriel commit", "--author", "gabriel"], env=env)
334
335 (tmp_path / "b.py").write_text("b\n")
336 runner.invoke(cli, ["code", "add", "b.py"], env=env)
337 runner.invoke(cli, ["commit", "-m", "agent commit", "--author", "bot-agent"], env=env)
338
339 data = _log_json(tmp_path, "--author", "gabriel")
340 assert all("gabriel" in c["author"].lower() for c in data["commits"]), (
341 f"--author filter returned non-matching commits: {[c['author'] for c in data['commits']]}"
342 )
343 assert not any(c["author"] == "bot-agent" for c in data["commits"])
344
345 def test_IV2_author_filter_is_case_insensitive(
346 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
347 ) -> None:
348 """IV2: --author filter is a case-insensitive substring match."""
349 monkeypatch.chdir(tmp_path)
350 env = _env(tmp_path)
351 runner.invoke(cli, ["init", "--domain", "code"], env=env)
352 (tmp_path / "x.py").write_text("x\n")
353 runner.invoke(cli, ["code", "add", "x.py"], env=env)
354 runner.invoke(cli, ["commit", "-m", "msg", "--author", "Gabriel"], env=env)
355
356 data_lower = _log_json(tmp_path, "--author", "gabriel")
357 data_upper = _log_json(tmp_path, "--author", "GABRIEL")
358 assert len(data_lower["commits"]) == len(data_upper["commits"])
359
360 def test_IV3_limit_caps_commits(self, multi_commit_repo: pathlib.Path) -> None:
361 """IV3: -n caps the number of commits returned."""
362 data = _log_json(multi_commit_repo, "-n", "1")
363 assert len(data["commits"]) == 1
364
365 def test_IV4_truncated_true_when_limit_hit(self, multi_commit_repo: pathlib.Path) -> None:
366 """IV4: truncated=true when -n limit is reached before exhausting history."""
367 data = _log_json(multi_commit_repo, "-n", "1")
368 assert data["truncated"] is True
369
370 def test_IV5_truncated_false_when_all_fit(self, single_commit_repo: pathlib.Path) -> None:
371 """IV5: truncated=false when limit is not reached (all commits returned)."""
372 data = _log_json(single_commit_repo)
373 assert data["truncated"] is False
374
375
376 # ---------------------------------------------------------------------------
377 # V Edge cases
378 # ---------------------------------------------------------------------------
379
380
381 class TestEdgeCasesV:
382 def test_V1_initial_commit_parent_is_null(self, single_commit_repo: pathlib.Path) -> None:
383 """V1: Initial commit has parent_commit_id=null and parent2_commit_id=null."""
384 data = _log_json(single_commit_repo)
385 initial = data["commits"][-1]
386 assert initial["parent_commit_id"] is None
387 assert initial["parent2_commit_id"] is None
388
389 def test_V2_merge_commit_has_two_parents(
390 self, single_commit_repo: pathlib.Path
391 ) -> None:
392 """V2: Merge commit has both parent_commit_id and parent2_commit_id set."""
393 root = single_commit_repo
394 env = _env(root)
395
396 # Create and commit on a feature branch
397 runner.invoke(cli, ["checkout", "-b", "feat/test"], env=env)
398 (root / "feat.py").write_text("f = 1\n")
399 runner.invoke(cli, ["code", "add", "feat.py"], env=env)
400 runner.invoke(cli, ["commit", "-m", "feat commit"], env=env)
401
402 # Merge back into main — use --no-ff to force a merge commit
403 # (a fast-forward would just move the pointer, creating no merge commit).
404 runner.invoke(cli, ["checkout", "main"], env=env)
405 merge_result = runner.invoke(cli, ["merge", "--no-ff", "feat/test"], env=env)
406 assert merge_result.exit_code == 0, merge_result.output
407
408 data = _log_json(root, "-n", "1")
409 merge_commit = data["commits"][0]
410 assert merge_commit["parent2_commit_id"] is not None, (
411 "Merge commit must have parent2_commit_id set"
412 )
413 assert merge_commit["parent2_commit_id"].startswith("sha256:"), (
414 f"parent2_commit_id must be sha256:-prefixed, got {merge_commit['parent2_commit_id']!r}"
415 )
416
417
418 # ---------------------------------------------------------------------------
419 # VI structured_delta
420 # ---------------------------------------------------------------------------
421
422
423 class TestStructuredDeltaVI:
424 def test_VI1_structured_delta_key_always_present(
425 self, multi_commit_repo: pathlib.Path
426 ) -> None:
427 """VI1: structured_delta is always present in every commit object (never absent)."""
428 data = _log_json(multi_commit_repo)
429 assert len(data["commits"]) >= 1
430 for c in data["commits"]:
431 assert "structured_delta" in c, (
432 f"structured_delta missing from commit {c.get('commit_id', '?')!r}"
433 )
434
435 def test_VI2_structured_delta_is_dict_with_ops_for_code_commit(
436 self, single_commit_repo: pathlib.Path
437 ) -> None:
438 """VI2: structured_delta is a dict with an 'ops' list for a Python file commit."""
439 data = _log_json(single_commit_repo)
440 initial = data["commits"][-1]
441 delta = initial["structured_delta"]
442 assert isinstance(delta, dict), (
443 f"Expected structured_delta to be a dict for a code commit, got {type(delta).__name__}"
444 )
445 assert "ops" in delta, (
446 f"structured_delta dict must have an 'ops' key, got keys: {list(delta.keys())}"
447 )
448 assert isinstance(delta["ops"], list), (
449 f"structured_delta['ops'] must be a list, got {type(delta['ops']).__name__}"
450 )
451
452 def test_VI3_structured_delta_type_is_dict_or_none(
453 self, multi_commit_repo: pathlib.Path
454 ) -> None:
455 """VI3: structured_delta is always a dict or None — never another type."""
456 data = _log_json(multi_commit_repo)
457 for c in data["commits"]:
458 delta = c["structured_delta"]
459 assert delta is None or isinstance(delta, dict), (
460 f"structured_delta must be dict or None, got {type(delta).__name__} "
461 f"on commit {c.get('commit_id', '?')!r}"
462 )
463
464
465 # ---------------------------------------------------------------------------
466 # VII Signing provenance fields
467 # ---------------------------------------------------------------------------
468
469
470 class TestSigningProvenanceVII:
471 def test_VII1_signer_public_key_present_empty_for_unsigned(
472 self, single_commit_repo: pathlib.Path
473 ) -> None:
474 """VII1: signer_public_key always present; empty string for unsigned commits."""
475 data = _log_json(single_commit_repo)
476 for c in data["commits"]:
477 assert "signer_public_key" in c, "signer_public_key must always be present"
478 assert isinstance(c["signer_public_key"], str), (
479 f"signer_public_key must be str, got {type(c['signer_public_key']).__name__}"
480 )
481 assert c["signer_public_key"] == "", (
482 f"unsigned commit must have signer_public_key='', got {c['signer_public_key']!r}"
483 )
484
485 def test_VII2_signer_public_key_round_trips_in_log(
486 self, single_commit_repo: pathlib.Path
487 ) -> None:
488 """VII2: signer_public_key written into a CommitRecord appears in log --json output."""
489 import datetime
490 from muse.core.commits import write_commit, get_head_commit_id
491 from muse.core.commits import CommitRecord
492 from muse.core.snapshot import compute_commit_id
493
494 root = single_commit_repo
495 fake_key = "ed25519:AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
496 head = get_head_commit_id(root, "main")
497 assert head is not None
498
499 # Read the current head to get its snapshot_id
500 from muse.core.commits import read_commit
501 parent = read_commit(root, head)
502 assert parent is not None
503
504 committed_at = datetime.datetime(2026, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc)
505 commit_id = compute_commit_id(
506 parent_ids=[head],
507 snapshot_id=parent.snapshot_id or "",
508 message="signed-looking commit",
509 committed_at_iso=committed_at.isoformat(),
510 author="gabriel",
511 signer_public_key=fake_key,
512 )
513 rec = CommitRecord(
514 commit_id=commit_id,
515 branch="main",
516 snapshot_id=parent.snapshot_id or "",
517 message="signed-looking commit",
518 committed_at=committed_at,
519 parent_commit_id=head,
520 author="gabriel",
521 signer_public_key=fake_key,
522 )
523 write_commit(root, rec)
524 from muse.core.refs import write_branch_ref
525 write_branch_ref(root, "main", commit_id, expected_id=head)
526
527 data = _log_json(root, "-n", "1")
528 c = data["commits"][0]
529 assert c["signer_public_key"] == fake_key, (
530 f"Expected signer_public_key={fake_key!r}, got {c['signer_public_key']!r}"
531 )
File History 1 commit
sha256:5c98ba9dd33607ba1557d7c03c64020e71c27c1e7bbaa984e7a91f23d5297b14 feat: add signer_public_key to muse log --json output (VII … Sonnet 4.6 21 days ago