test_log_json_schema.py
python
sha256:5c98ba9dd33607ba1557d7c03c64020e71c27c1e7bbaa984e7a91f23d5297b14
feat: add signer_public_key to muse log --json output (VII …
Sonnet 4.6
20 days ago
| 1 | """Tests for the canonical ``muse log --json`` schema. |
| 2 | |
| 3 | Every commit object in the commits array must emit the same shape. |
| 4 | Agents rely on this stability — missing fields break provenance tracking |
| 5 | and force fragile ``dict.get`` guards. |
| 6 | |
| 7 | Canonical schema |
| 8 | ---------------- |
| 9 | :: |
| 10 | |
| 11 | { |
| 12 | "truncated": bool, |
| 13 | "commits": [ |
| 14 | { |
| 15 | "commit_id": str, // sha256:-prefixed |
| 16 | "branch": str, |
| 17 | "message": str, |
| 18 | "author": str, // "" when user.handle not configured |
| 19 | "agent_id": str, // "" when not an agent commit |
| 20 | "model_id": str, // "" when not an agent commit |
| 21 | "committed_at": str, // ISO-8601 |
| 22 | "parent_commit_id": str | null, // sha256:-prefixed or null |
| 23 | "parent2_commit_id": str | null, // sha256:-prefixed or null (merge) |
| 24 | "snapshot_id": str | null, // sha256:-prefixed |
| 25 | "sem_ver_bump": str | null, |
| 26 | "breaking_changes": [str, ...], |
| 27 | "metadata": {str: ...}, |
| 28 | "files_added": [str, ...], // always populated in --json mode |
| 29 | "files_removed": [str, ...], // always populated in --json mode |
| 30 | "files_modified": [str, ...], // always populated in --json mode |
| 31 | "structured_delta": dict | null // symbol-level diff; null for non-code commits |
| 32 | } |
| 33 | ] |
| 34 | } |
| 35 | |
| 36 | Coverage matrix |
| 37 | --------------- |
| 38 | I Schema invariants (top-level shape) |
| 39 | I1 Top-level keys: truncated + commits always present |
| 40 | I2 Each commit has all required keys |
| 41 | I3 commit_id is sha256:-prefixed |
| 42 | I4 parent_commit_id is sha256:-prefixed or null |
| 43 | I5 snapshot_id is sha256:-prefixed |
| 44 | |
| 45 | II File lists — always populated in --json mode, no --stat needed |
| 46 | II1 files_added populated for a commit that added a file |
| 47 | II2 files_modified populated for a commit that modified a file |
| 48 | II3 files_removed populated for a commit that deleted a file |
| 49 | II4 Initial commit: files_added non-empty, files_removed/modified empty |
| 50 | |
| 51 | III Agent provenance fields |
| 52 | III1 agent_id present (empty string for non-agent commits) |
| 53 | III2 model_id present (empty string for non-agent commits) |
| 54 | III3 agent_id populated when --agent-id passed to commit |
| 55 | III4 model_id populated when --model-id passed to commit |
| 56 | |
| 57 | IV Filters |
| 58 | IV1 --author filter returns only matching commits |
| 59 | IV2 --author filter is case-insensitive substring match |
| 60 | IV3 -n / --limit caps the number of commits returned |
| 61 | IV4 truncated=true when limit is hit |
| 62 | IV5 truncated=false when all commits fit |
| 63 | |
| 64 | V Edge cases |
| 65 | V1 Single commit (initial): parent_commit_id is null |
| 66 | V2 Merge commit: parent2_commit_id is sha256:-prefixed (not null) |
| 67 | |
| 68 | VI structured_delta |
| 69 | VI1 structured_delta key always present (never absent from commit object) |
| 70 | VI2 structured_delta is a dict with an "ops" key for a code-file commit |
| 71 | VI3 structured_delta is None when the commit produces no code-intelligence ops |
| 72 | """ |
| 73 | |
| 74 | from __future__ import annotations |
| 75 | from collections.abc import Mapping |
| 76 | |
| 77 | import json |
| 78 | import pathlib |
| 79 | |
| 80 | import pytest |
| 81 | |
| 82 | from tests.cli_test_helper import CliRunner |
| 83 | |
| 84 | cli = None |
| 85 | runner = CliRunner() |
| 86 | |
| 87 | _REQUIRED_COMMIT_KEYS = { |
| 88 | "commit_id", "branch", "message", "author", |
| 89 | "agent_id", "model_id", |
| 90 | "committed_at", "parent_commit_id", "parent2_commit_id", |
| 91 | "snapshot_id", "sem_ver_bump", "breaking_changes", "metadata", |
| 92 | "files_added", "files_removed", "files_modified", |
| 93 | "structured_delta", |
| 94 | } |
| 95 | |
| 96 | _REQUIRED_TOP_KEYS = {"truncated", "commits"} |
| 97 | |
| 98 | |
| 99 | def _env(root: pathlib.Path) -> Mapping[str, str]: |
| 100 | return {"MUSE_REPO_ROOT": str(root)} |
| 101 | |
| 102 | |
| 103 | def _log_json(root: pathlib.Path, *extra_args: str) -> Mapping[str, object]: |
| 104 | result = runner.invoke(cli, ["log", "--json"] + list(extra_args), env=_env(root)) |
| 105 | assert result.exit_code == 0, f"log --json failed: {result.output}" |
| 106 | return json.loads(result.output.strip()) |
| 107 | |
| 108 | |
| 109 | @pytest.fixture() |
| 110 | def single_commit_repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path: |
| 111 | """Code-domain repo with exactly one commit.""" |
| 112 | monkeypatch.chdir(tmp_path) |
| 113 | result = runner.invoke(cli, ["init", "--domain", "code"], env=_env(tmp_path)) |
| 114 | assert result.exit_code == 0, result.output |
| 115 | (tmp_path / "main.py").write_text("x = 1\n") |
| 116 | runner.invoke(cli, ["code", "add", "main.py"], env=_env(tmp_path)) |
| 117 | result = runner.invoke(cli, ["commit", "-m", "initial"], env=_env(tmp_path)) |
| 118 | assert result.exit_code == 0, result.output |
| 119 | return tmp_path |
| 120 | |
| 121 | |
| 122 | @pytest.fixture() |
| 123 | def multi_commit_repo(single_commit_repo: pathlib.Path) -> pathlib.Path: |
| 124 | """Repo with 3 commits: add, modify, delete.""" |
| 125 | root = single_commit_repo |
| 126 | env = _env(root) |
| 127 | |
| 128 | # Commit 2: modify main.py + add extra.py |
| 129 | (root / "main.py").write_text("x = 2\n") |
| 130 | (root / "extra.py").write_text("e = 1\n") |
| 131 | runner.invoke(cli, ["code", "add", "main.py", "extra.py"], env=env) |
| 132 | runner.invoke(cli, ["commit", "-m", "modify and add"], env=env) |
| 133 | |
| 134 | # Commit 3: delete extra.py |
| 135 | (root / "extra.py").unlink() |
| 136 | runner.invoke(cli, ["code", "add", "extra.py"], env=env) |
| 137 | runner.invoke(cli, ["commit", "-m", "delete extra"], env=env) |
| 138 | |
| 139 | return root |
| 140 | |
| 141 | |
| 142 | # --------------------------------------------------------------------------- |
| 143 | # I Schema invariants |
| 144 | # --------------------------------------------------------------------------- |
| 145 | |
| 146 | |
| 147 | class TestSchemaInvariantsI: |
| 148 | def test_I1_top_level_keys(self, single_commit_repo: pathlib.Path) -> None: |
| 149 | """I1: Top-level always has truncated + commits.""" |
| 150 | data = _log_json(single_commit_repo) |
| 151 | assert _REQUIRED_TOP_KEYS.issubset(data.keys()), ( |
| 152 | f"Missing top-level keys: {_REQUIRED_TOP_KEYS - data.keys()}" |
| 153 | ) |
| 154 | assert isinstance(data["truncated"], bool) |
| 155 | assert isinstance(data["commits"], list) |
| 156 | |
| 157 | def test_I2_each_commit_has_all_required_keys(self, single_commit_repo: pathlib.Path) -> None: |
| 158 | """I2: Every commit object has all required keys.""" |
| 159 | data = _log_json(single_commit_repo) |
| 160 | assert len(data["commits"]) >= 1 |
| 161 | for c in data["commits"]: |
| 162 | missing = _REQUIRED_COMMIT_KEYS - c.keys() |
| 163 | assert not missing, f"Commit missing keys: {missing}" |
| 164 | |
| 165 | def test_I3_commit_id_is_sha256_prefixed(self, single_commit_repo: pathlib.Path) -> None: |
| 166 | """I3: commit_id is sha256:-prefixed.""" |
| 167 | data = _log_json(single_commit_repo) |
| 168 | for c in data["commits"]: |
| 169 | assert c["commit_id"].startswith("sha256:"), ( |
| 170 | f"commit_id must be sha256:-prefixed, got {c['commit_id']!r}" |
| 171 | ) |
| 172 | |
| 173 | def test_I4_parent_commit_id_is_sha256_prefixed_or_null( |
| 174 | self, multi_commit_repo: pathlib.Path |
| 175 | ) -> None: |
| 176 | """I4: parent_commit_id is sha256:-prefixed (non-null) or null (initial commit).""" |
| 177 | data = _log_json(multi_commit_repo) |
| 178 | commits = data["commits"] |
| 179 | # Most recent commits (non-initial) must have sha256:-prefixed parent |
| 180 | for c in commits[:-1]: |
| 181 | assert c["parent_commit_id"] is not None |
| 182 | assert c["parent_commit_id"].startswith("sha256:"), ( |
| 183 | f"parent_commit_id must be sha256:-prefixed, got {c['parent_commit_id']!r}" |
| 184 | ) |
| 185 | # Initial commit: parent is null |
| 186 | initial = commits[-1] |
| 187 | assert initial["parent_commit_id"] is None |
| 188 | |
| 189 | def test_I5_snapshot_id_is_sha256_prefixed(self, single_commit_repo: pathlib.Path) -> None: |
| 190 | """I5: snapshot_id is sha256:-prefixed when present.""" |
| 191 | data = _log_json(single_commit_repo) |
| 192 | for c in data["commits"]: |
| 193 | if c["snapshot_id"] is not None: |
| 194 | assert c["snapshot_id"].startswith("sha256:"), ( |
| 195 | f"snapshot_id must be sha256:-prefixed, got {c['snapshot_id']!r}" |
| 196 | ) |
| 197 | |
| 198 | |
| 199 | # --------------------------------------------------------------------------- |
| 200 | # II File lists — always populated in --json mode |
| 201 | # --------------------------------------------------------------------------- |
| 202 | |
| 203 | |
| 204 | class TestFileListsII: |
| 205 | def test_II1_files_added_populated_no_stat_flag( |
| 206 | self, single_commit_repo: pathlib.Path |
| 207 | ) -> None: |
| 208 | """II1: files_added populated in --json mode without --stat.""" |
| 209 | data = _log_json(single_commit_repo) |
| 210 | # The initial commit added main.py |
| 211 | initial = data["commits"][-1] |
| 212 | assert "main.py" in initial["files_added"], ( |
| 213 | f"Expected main.py in files_added, got {initial['files_added']}" |
| 214 | ) |
| 215 | |
| 216 | def test_II2_files_modified_populated(self, multi_commit_repo: pathlib.Path) -> None: |
| 217 | """II2: files_modified populated for a modify commit.""" |
| 218 | data = _log_json(multi_commit_repo) |
| 219 | commits = data["commits"] |
| 220 | # Second-most-recent commit modified main.py (and added extra.py) |
| 221 | modify_commit = commits[1] # commits are newest-first |
| 222 | assert "main.py" in modify_commit["files_modified"], ( |
| 223 | f"Expected main.py in files_modified, got {modify_commit}" |
| 224 | ) |
| 225 | |
| 226 | def test_II3_files_removed_populated(self, multi_commit_repo: pathlib.Path) -> None: |
| 227 | """II3: files_removed populated for a delete commit.""" |
| 228 | data = _log_json(multi_commit_repo) |
| 229 | # Most recent commit deleted extra.py |
| 230 | delete_commit = data["commits"][0] |
| 231 | assert "extra.py" in delete_commit["files_removed"], ( |
| 232 | f"Expected extra.py in files_removed, got {delete_commit}" |
| 233 | ) |
| 234 | |
| 235 | def test_II4_initial_commit_files_removed_and_modified_empty( |
| 236 | self, single_commit_repo: pathlib.Path |
| 237 | ) -> None: |
| 238 | """II4: Initial commit: files_removed and files_modified are empty lists.""" |
| 239 | data = _log_json(single_commit_repo) |
| 240 | initial = data["commits"][-1] |
| 241 | assert initial["files_removed"] == [] |
| 242 | assert initial["files_modified"] == [] |
| 243 | |
| 244 | |
| 245 | # --------------------------------------------------------------------------- |
| 246 | # III Agent provenance fields |
| 247 | # --------------------------------------------------------------------------- |
| 248 | |
| 249 | |
| 250 | class TestAgentProvenanceIII: |
| 251 | def test_III1_agent_id_present_empty_for_non_agent_commit( |
| 252 | self, single_commit_repo: pathlib.Path |
| 253 | ) -> None: |
| 254 | """III1: agent_id is always present; empty string for non-agent commits.""" |
| 255 | data = _log_json(single_commit_repo) |
| 256 | for c in data["commits"]: |
| 257 | assert "agent_id" in c, "agent_id must always be present" |
| 258 | assert isinstance(c["agent_id"], str) |
| 259 | |
| 260 | def test_III2_model_id_present_empty_for_non_agent_commit( |
| 261 | self, single_commit_repo: pathlib.Path |
| 262 | ) -> None: |
| 263 | """III2: model_id is always present; empty string for non-agent commits.""" |
| 264 | data = _log_json(single_commit_repo) |
| 265 | for c in data["commits"]: |
| 266 | assert "model_id" in c, "model_id must always be present" |
| 267 | assert isinstance(c["model_id"], str) |
| 268 | |
| 269 | def test_III3_agent_id_populated_when_passed_to_commit( |
| 270 | self, single_commit_repo: pathlib.Path |
| 271 | ) -> None: |
| 272 | """III3: agent_id reflects --agent-id passed at commit time.""" |
| 273 | root = single_commit_repo |
| 274 | env = _env(root) |
| 275 | (root / "agent_file.py").write_text("a = 1\n") |
| 276 | runner.invoke(cli, ["code", "add", "agent_file.py"], env=env) |
| 277 | result = runner.invoke( |
| 278 | cli, |
| 279 | ["commit", "-m", "agent commit", "--agent-id", "test-agent-42"], |
| 280 | env=env, |
| 281 | ) |
| 282 | assert result.exit_code == 0, result.output |
| 283 | |
| 284 | data = _log_json(root, "-n", "1") |
| 285 | c = data["commits"][0] |
| 286 | assert c["agent_id"] == "test-agent-42", ( |
| 287 | f"Expected agent_id='test-agent-42', got {c['agent_id']!r}" |
| 288 | ) |
| 289 | |
| 290 | def test_III4_model_id_populated_when_passed_to_commit( |
| 291 | self, single_commit_repo: pathlib.Path |
| 292 | ) -> None: |
| 293 | """III4: model_id reflects --model-id passed at commit time.""" |
| 294 | root = single_commit_repo |
| 295 | env = _env(root) |
| 296 | (root / "model_file.py").write_text("m = 1\n") |
| 297 | runner.invoke(cli, ["code", "add", "model_file.py"], env=env) |
| 298 | result = runner.invoke( |
| 299 | cli, |
| 300 | ["commit", "-m", "model commit", "--model-id", "claude-sonnet-4-6"], |
| 301 | env=env, |
| 302 | ) |
| 303 | assert result.exit_code == 0, result.output |
| 304 | |
| 305 | data = _log_json(root, "-n", "1") |
| 306 | c = data["commits"][0] |
| 307 | assert c["model_id"] == "claude-sonnet-4-6", ( |
| 308 | f"Expected model_id='claude-sonnet-4-6', got {c['model_id']!r}" |
| 309 | ) |
| 310 | |
| 311 | |
| 312 | # --------------------------------------------------------------------------- |
| 313 | # IV Filters |
| 314 | # --------------------------------------------------------------------------- |
| 315 | |
| 316 | |
| 317 | class TestFiltersIV: |
| 318 | def test_IV1_author_filter_matches_commits( |
| 319 | self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch |
| 320 | ) -> None: |
| 321 | """IV1: --author filter returns only commits matching the author.""" |
| 322 | monkeypatch.chdir(tmp_path) |
| 323 | env = _env(tmp_path) |
| 324 | runner.invoke(cli, ["init", "--domain", "code"], env=env) |
| 325 | (tmp_path / "a.py").write_text("a\n") |
| 326 | runner.invoke(cli, ["code", "add", "a.py"], env=env) |
| 327 | runner.invoke(cli, ["commit", "-m", "gabriel commit", "--author", "gabriel"], env=env) |
| 328 | |
| 329 | (tmp_path / "b.py").write_text("b\n") |
| 330 | runner.invoke(cli, ["code", "add", "b.py"], env=env) |
| 331 | runner.invoke(cli, ["commit", "-m", "agent commit", "--author", "bot-agent"], env=env) |
| 332 | |
| 333 | data = _log_json(tmp_path, "--author", "gabriel") |
| 334 | assert all("gabriel" in c["author"].lower() for c in data["commits"]), ( |
| 335 | f"--author filter returned non-matching commits: {[c['author'] for c in data['commits']]}" |
| 336 | ) |
| 337 | assert not any(c["author"] == "bot-agent" for c in data["commits"]) |
| 338 | |
| 339 | def test_IV2_author_filter_is_case_insensitive( |
| 340 | self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch |
| 341 | ) -> None: |
| 342 | """IV2: --author filter is a case-insensitive substring match.""" |
| 343 | monkeypatch.chdir(tmp_path) |
| 344 | env = _env(tmp_path) |
| 345 | runner.invoke(cli, ["init", "--domain", "code"], env=env) |
| 346 | (tmp_path / "x.py").write_text("x\n") |
| 347 | runner.invoke(cli, ["code", "add", "x.py"], env=env) |
| 348 | runner.invoke(cli, ["commit", "-m", "msg", "--author", "Gabriel"], env=env) |
| 349 | |
| 350 | data_lower = _log_json(tmp_path, "--author", "gabriel") |
| 351 | data_upper = _log_json(tmp_path, "--author", "GABRIEL") |
| 352 | assert len(data_lower["commits"]) == len(data_upper["commits"]) |
| 353 | |
| 354 | def test_IV3_limit_caps_commits(self, multi_commit_repo: pathlib.Path) -> None: |
| 355 | """IV3: -n caps the number of commits returned.""" |
| 356 | data = _log_json(multi_commit_repo, "-n", "1") |
| 357 | assert len(data["commits"]) == 1 |
| 358 | |
| 359 | def test_IV4_truncated_true_when_limit_hit(self, multi_commit_repo: pathlib.Path) -> None: |
| 360 | """IV4: truncated=true when -n limit is reached before exhausting history.""" |
| 361 | data = _log_json(multi_commit_repo, "-n", "1") |
| 362 | assert data["truncated"] is True |
| 363 | |
| 364 | def test_IV5_truncated_false_when_all_fit(self, single_commit_repo: pathlib.Path) -> None: |
| 365 | """IV5: truncated=false when limit is not reached (all commits returned).""" |
| 366 | data = _log_json(single_commit_repo) |
| 367 | assert data["truncated"] is False |
| 368 | |
| 369 | |
| 370 | # --------------------------------------------------------------------------- |
| 371 | # V Edge cases |
| 372 | # --------------------------------------------------------------------------- |
| 373 | |
| 374 | |
| 375 | class TestEdgeCasesV: |
| 376 | def test_V1_initial_commit_parent_is_null(self, single_commit_repo: pathlib.Path) -> None: |
| 377 | """V1: Initial commit has parent_commit_id=null and parent2_commit_id=null.""" |
| 378 | data = _log_json(single_commit_repo) |
| 379 | initial = data["commits"][-1] |
| 380 | assert initial["parent_commit_id"] is None |
| 381 | assert initial["parent2_commit_id"] is None |
| 382 | |
| 383 | def test_V2_merge_commit_has_two_parents( |
| 384 | self, single_commit_repo: pathlib.Path |
| 385 | ) -> None: |
| 386 | """V2: Merge commit has both parent_commit_id and parent2_commit_id set.""" |
| 387 | root = single_commit_repo |
| 388 | env = _env(root) |
| 389 | |
| 390 | # Create and commit on a feature branch |
| 391 | runner.invoke(cli, ["checkout", "-b", "feat/test"], env=env) |
| 392 | (root / "feat.py").write_text("f = 1\n") |
| 393 | runner.invoke(cli, ["code", "add", "feat.py"], env=env) |
| 394 | runner.invoke(cli, ["commit", "-m", "feat commit"], env=env) |
| 395 | |
| 396 | # Merge back into main — use --no-ff to force a merge commit |
| 397 | # (a fast-forward would just move the pointer, creating no merge commit). |
| 398 | runner.invoke(cli, ["checkout", "main"], env=env) |
| 399 | merge_result = runner.invoke(cli, ["merge", "--no-ff", "feat/test"], env=env) |
| 400 | assert merge_result.exit_code == 0, merge_result.output |
| 401 | |
| 402 | data = _log_json(root, "-n", "1") |
| 403 | merge_commit = data["commits"][0] |
| 404 | assert merge_commit["parent2_commit_id"] is not None, ( |
| 405 | "Merge commit must have parent2_commit_id set" |
| 406 | ) |
| 407 | assert merge_commit["parent2_commit_id"].startswith("sha256:"), ( |
| 408 | f"parent2_commit_id must be sha256:-prefixed, got {merge_commit['parent2_commit_id']!r}" |
| 409 | ) |
| 410 | |
| 411 | |
| 412 | # --------------------------------------------------------------------------- |
| 413 | # VI structured_delta |
| 414 | # --------------------------------------------------------------------------- |
| 415 | |
| 416 | |
| 417 | class TestStructuredDeltaVI: |
| 418 | def test_VI1_structured_delta_key_always_present( |
| 419 | self, multi_commit_repo: pathlib.Path |
| 420 | ) -> None: |
| 421 | """VI1: structured_delta is always present in every commit object (never absent).""" |
| 422 | data = _log_json(multi_commit_repo) |
| 423 | assert len(data["commits"]) >= 1 |
| 424 | for c in data["commits"]: |
| 425 | assert "structured_delta" in c, ( |
| 426 | f"structured_delta missing from commit {c.get('commit_id', '?')!r}" |
| 427 | ) |
| 428 | |
| 429 | def test_VI2_structured_delta_is_dict_with_ops_for_code_commit( |
| 430 | self, single_commit_repo: pathlib.Path |
| 431 | ) -> None: |
| 432 | """VI2: structured_delta is a dict with an 'ops' list for a Python file commit.""" |
| 433 | data = _log_json(single_commit_repo) |
| 434 | initial = data["commits"][-1] |
| 435 | delta = initial["structured_delta"] |
| 436 | assert isinstance(delta, dict), ( |
| 437 | f"Expected structured_delta to be a dict for a code commit, got {type(delta).__name__}" |
| 438 | ) |
| 439 | assert "ops" in delta, ( |
| 440 | f"structured_delta dict must have an 'ops' key, got keys: {list(delta.keys())}" |
| 441 | ) |
| 442 | assert isinstance(delta["ops"], list), ( |
| 443 | f"structured_delta['ops'] must be a list, got {type(delta['ops']).__name__}" |
| 444 | ) |
| 445 | |
| 446 | def test_VI3_structured_delta_type_is_dict_or_none( |
| 447 | self, multi_commit_repo: pathlib.Path |
| 448 | ) -> None: |
| 449 | """VI3: structured_delta is always a dict or None — never another type.""" |
| 450 | data = _log_json(multi_commit_repo) |
| 451 | for c in data["commits"]: |
| 452 | delta = c["structured_delta"] |
| 453 | assert delta is None or isinstance(delta, dict), ( |
| 454 | f"structured_delta must be dict or None, got {type(delta).__name__} " |
| 455 | f"on commit {c.get('commit_id', '?')!r}" |
| 456 | ) |
File History
1 commit
sha256:5c98ba9dd33607ba1557d7c03c64020e71c27c1e7bbaa984e7a91f23d5297b14
feat: add signer_public_key to muse log --json output (VII …
Sonnet 4.6
20 days ago