"""Tests for the canonical ``muse log --json`` schema. Every commit object in the commits array must emit the same shape. Agents rely on this stability — missing fields break provenance tracking and force fragile ``dict.get`` guards. Canonical schema ---------------- :: { "truncated": bool, "commits": [ { "commit_id": str, // sha256:-prefixed "branch": str, "message": str, "author": str, // "" when user.handle not configured "agent_id": str, // "" when not an agent commit "model_id": str, // "" when not an agent commit "committed_at": str, // ISO-8601 "parent_commit_id": str | null, // sha256:-prefixed or null "parent2_commit_id": str | null, // sha256:-prefixed or null (merge) "snapshot_id": str | null, // sha256:-prefixed "sem_ver_bump": str | null, "breaking_changes": [str, ...], "metadata": {str: ...}, "files_added": [str, ...], // always populated in --json mode "files_removed": [str, ...], // always populated in --json mode "files_modified": [str, ...], // always populated in --json mode "structured_delta": dict | null // symbol-level diff; null for non-code commits } ] } Coverage matrix --------------- I Schema invariants (top-level shape) I1 Top-level keys: truncated + commits always present I2 Each commit has all required keys I3 commit_id is sha256:-prefixed I4 parent_commit_id is sha256:-prefixed or null I5 snapshot_id is sha256:-prefixed II File lists — always populated in --json mode, no --stat needed II1 files_added populated for a commit that added a file II2 files_modified populated for a commit that modified a file II3 files_removed populated for a commit that deleted a file II4 Initial commit: files_added non-empty, files_removed/modified empty III Agent provenance fields III1 agent_id present (empty string for non-agent commits) III2 model_id present (empty string for non-agent commits) III3 agent_id populated when --agent-id passed to commit III4 model_id populated when --model-id passed to commit IV Filters IV1 --author filter returns only matching commits IV2 --author filter is case-insensitive substring match IV3 -n / --limit caps the number of commits returned IV4 truncated=true when limit is hit IV5 truncated=false when all commits fit V Edge cases V1 Single commit (initial): parent_commit_id is null V2 Merge commit: parent2_commit_id is sha256:-prefixed (not null) VI structured_delta VI1 structured_delta key always present (never absent from commit object) VI2 structured_delta is a dict with an "ops" key for a code-file commit VI3 structured_delta is None when the commit produces no code-intelligence ops """ from __future__ import annotations from collections.abc import Mapping import json import pathlib import pytest from tests.cli_test_helper import CliRunner cli = None runner = CliRunner() _REQUIRED_COMMIT_KEYS = { "commit_id", "branch", "message", "author", "agent_id", "model_id", "committed_at", "parent_commit_id", "parent2_commit_id", "snapshot_id", "sem_ver_bump", "breaking_changes", "metadata", "files_added", "files_removed", "files_modified", "structured_delta", } _REQUIRED_TOP_KEYS = {"truncated", "commits"} def _env(root: pathlib.Path) -> Mapping[str, str]: return {"MUSE_REPO_ROOT": str(root)} def _log_json(root: pathlib.Path, *extra_args: str) -> Mapping[str, object]: result = runner.invoke(cli, ["log", "--json"] + list(extra_args), env=_env(root)) assert result.exit_code == 0, f"log --json failed: {result.output}" return json.loads(result.output.strip()) @pytest.fixture() def single_commit_repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path: """Code-domain repo with exactly one commit.""" monkeypatch.chdir(tmp_path) result = runner.invoke(cli, ["init", "--domain", "code"], env=_env(tmp_path)) assert result.exit_code == 0, result.output (tmp_path / "main.py").write_text("x = 1\n") runner.invoke(cli, ["code", "add", "main.py"], env=_env(tmp_path)) result = runner.invoke(cli, ["commit", "-m", "initial"], env=_env(tmp_path)) assert result.exit_code == 0, result.output return tmp_path @pytest.fixture() def multi_commit_repo(single_commit_repo: pathlib.Path) -> pathlib.Path: """Repo with 3 commits: add, modify, delete.""" root = single_commit_repo env = _env(root) # Commit 2: modify main.py + add extra.py (root / "main.py").write_text("x = 2\n") (root / "extra.py").write_text("e = 1\n") runner.invoke(cli, ["code", "add", "main.py", "extra.py"], env=env) runner.invoke(cli, ["commit", "-m", "modify and add"], env=env) # Commit 3: delete extra.py (root / "extra.py").unlink() runner.invoke(cli, ["code", "add", "extra.py"], env=env) runner.invoke(cli, ["commit", "-m", "delete extra"], env=env) return root # --------------------------------------------------------------------------- # I Schema invariants # --------------------------------------------------------------------------- class TestSchemaInvariantsI: def test_I1_top_level_keys(self, single_commit_repo: pathlib.Path) -> None: """I1: Top-level always has truncated + commits.""" data = _log_json(single_commit_repo) assert _REQUIRED_TOP_KEYS.issubset(data.keys()), ( f"Missing top-level keys: {_REQUIRED_TOP_KEYS - data.keys()}" ) assert isinstance(data["truncated"], bool) assert isinstance(data["commits"], list) def test_I2_each_commit_has_all_required_keys(self, single_commit_repo: pathlib.Path) -> None: """I2: Every commit object has all required keys.""" data = _log_json(single_commit_repo) assert len(data["commits"]) >= 1 for c in data["commits"]: missing = _REQUIRED_COMMIT_KEYS - c.keys() assert not missing, f"Commit missing keys: {missing}" def test_I3_commit_id_is_sha256_prefixed(self, single_commit_repo: pathlib.Path) -> None: """I3: commit_id is sha256:-prefixed.""" data = _log_json(single_commit_repo) for c in data["commits"]: assert c["commit_id"].startswith("sha256:"), ( f"commit_id must be sha256:-prefixed, got {c['commit_id']!r}" ) def test_I4_parent_commit_id_is_sha256_prefixed_or_null( self, multi_commit_repo: pathlib.Path ) -> None: """I4: parent_commit_id is sha256:-prefixed (non-null) or null (initial commit).""" data = _log_json(multi_commit_repo) commits = data["commits"] # Most recent commits (non-initial) must have sha256:-prefixed parent for c in commits[:-1]: assert c["parent_commit_id"] is not None assert c["parent_commit_id"].startswith("sha256:"), ( f"parent_commit_id must be sha256:-prefixed, got {c['parent_commit_id']!r}" ) # Initial commit: parent is null initial = commits[-1] assert initial["parent_commit_id"] is None def test_I5_snapshot_id_is_sha256_prefixed(self, single_commit_repo: pathlib.Path) -> None: """I5: snapshot_id is sha256:-prefixed when present.""" data = _log_json(single_commit_repo) for c in data["commits"]: if c["snapshot_id"] is not None: assert c["snapshot_id"].startswith("sha256:"), ( f"snapshot_id must be sha256:-prefixed, got {c['snapshot_id']!r}" ) # --------------------------------------------------------------------------- # II File lists — always populated in --json mode # --------------------------------------------------------------------------- class TestFileListsII: def test_II1_files_added_populated_no_stat_flag( self, single_commit_repo: pathlib.Path ) -> None: """II1: files_added populated in --json mode without --stat.""" data = _log_json(single_commit_repo) # The initial commit added main.py initial = data["commits"][-1] assert "main.py" in initial["files_added"], ( f"Expected main.py in files_added, got {initial['files_added']}" ) def test_II2_files_modified_populated(self, multi_commit_repo: pathlib.Path) -> None: """II2: files_modified populated for a modify commit.""" data = _log_json(multi_commit_repo) commits = data["commits"] # Second-most-recent commit modified main.py (and added extra.py) modify_commit = commits[1] # commits are newest-first assert "main.py" in modify_commit["files_modified"], ( f"Expected main.py in files_modified, got {modify_commit}" ) def test_II3_files_removed_populated(self, multi_commit_repo: pathlib.Path) -> None: """II3: files_removed populated for a delete commit.""" data = _log_json(multi_commit_repo) # Most recent commit deleted extra.py delete_commit = data["commits"][0] assert "extra.py" in delete_commit["files_removed"], ( f"Expected extra.py in files_removed, got {delete_commit}" ) def test_II4_initial_commit_files_removed_and_modified_empty( self, single_commit_repo: pathlib.Path ) -> None: """II4: Initial commit: files_removed and files_modified are empty lists.""" data = _log_json(single_commit_repo) initial = data["commits"][-1] assert initial["files_removed"] == [] assert initial["files_modified"] == [] # --------------------------------------------------------------------------- # III Agent provenance fields # --------------------------------------------------------------------------- class TestAgentProvenanceIII: def test_III1_agent_id_present_empty_for_non_agent_commit( self, single_commit_repo: pathlib.Path ) -> None: """III1: agent_id is always present; empty string for non-agent commits.""" data = _log_json(single_commit_repo) for c in data["commits"]: assert "agent_id" in c, "agent_id must always be present" assert isinstance(c["agent_id"], str) def test_III2_model_id_present_empty_for_non_agent_commit( self, single_commit_repo: pathlib.Path ) -> None: """III2: model_id is always present; empty string for non-agent commits.""" data = _log_json(single_commit_repo) for c in data["commits"]: assert "model_id" in c, "model_id must always be present" assert isinstance(c["model_id"], str) def test_III3_agent_id_populated_when_passed_to_commit( self, single_commit_repo: pathlib.Path ) -> None: """III3: agent_id reflects --agent-id passed at commit time.""" root = single_commit_repo env = _env(root) (root / "agent_file.py").write_text("a = 1\n") runner.invoke(cli, ["code", "add", "agent_file.py"], env=env) result = runner.invoke( cli, ["commit", "-m", "agent commit", "--agent-id", "test-agent-42"], env=env, ) assert result.exit_code == 0, result.output data = _log_json(root, "-n", "1") c = data["commits"][0] assert c["agent_id"] == "test-agent-42", ( f"Expected agent_id='test-agent-42', got {c['agent_id']!r}" ) def test_III4_model_id_populated_when_passed_to_commit( self, single_commit_repo: pathlib.Path ) -> None: """III4: model_id reflects --model-id passed at commit time.""" root = single_commit_repo env = _env(root) (root / "model_file.py").write_text("m = 1\n") runner.invoke(cli, ["code", "add", "model_file.py"], env=env) result = runner.invoke( cli, ["commit", "-m", "model commit", "--model-id", "claude-sonnet-4-6"], env=env, ) assert result.exit_code == 0, result.output data = _log_json(root, "-n", "1") c = data["commits"][0] assert c["model_id"] == "claude-sonnet-4-6", ( f"Expected model_id='claude-sonnet-4-6', got {c['model_id']!r}" ) # --------------------------------------------------------------------------- # IV Filters # --------------------------------------------------------------------------- class TestFiltersIV: def test_IV1_author_filter_matches_commits( self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch ) -> None: """IV1: --author filter returns only commits matching the author.""" monkeypatch.chdir(tmp_path) env = _env(tmp_path) runner.invoke(cli, ["init", "--domain", "code"], env=env) (tmp_path / "a.py").write_text("a\n") runner.invoke(cli, ["code", "add", "a.py"], env=env) runner.invoke(cli, ["commit", "-m", "gabriel commit", "--author", "gabriel"], env=env) (tmp_path / "b.py").write_text("b\n") runner.invoke(cli, ["code", "add", "b.py"], env=env) runner.invoke(cli, ["commit", "-m", "agent commit", "--author", "bot-agent"], env=env) data = _log_json(tmp_path, "--author", "gabriel") assert all("gabriel" in c["author"].lower() for c in data["commits"]), ( f"--author filter returned non-matching commits: {[c['author'] for c in data['commits']]}" ) assert not any(c["author"] == "bot-agent" for c in data["commits"]) def test_IV2_author_filter_is_case_insensitive( self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch ) -> None: """IV2: --author filter is a case-insensitive substring match.""" monkeypatch.chdir(tmp_path) env = _env(tmp_path) runner.invoke(cli, ["init", "--domain", "code"], env=env) (tmp_path / "x.py").write_text("x\n") runner.invoke(cli, ["code", "add", "x.py"], env=env) runner.invoke(cli, ["commit", "-m", "msg", "--author", "Gabriel"], env=env) data_lower = _log_json(tmp_path, "--author", "gabriel") data_upper = _log_json(tmp_path, "--author", "GABRIEL") assert len(data_lower["commits"]) == len(data_upper["commits"]) def test_IV3_limit_caps_commits(self, multi_commit_repo: pathlib.Path) -> None: """IV3: -n caps the number of commits returned.""" data = _log_json(multi_commit_repo, "-n", "1") assert len(data["commits"]) == 1 def test_IV4_truncated_true_when_limit_hit(self, multi_commit_repo: pathlib.Path) -> None: """IV4: truncated=true when -n limit is reached before exhausting history.""" data = _log_json(multi_commit_repo, "-n", "1") assert data["truncated"] is True def test_IV5_truncated_false_when_all_fit(self, single_commit_repo: pathlib.Path) -> None: """IV5: truncated=false when limit is not reached (all commits returned).""" data = _log_json(single_commit_repo) assert data["truncated"] is False # --------------------------------------------------------------------------- # V Edge cases # --------------------------------------------------------------------------- class TestEdgeCasesV: def test_V1_initial_commit_parent_is_null(self, single_commit_repo: pathlib.Path) -> None: """V1: Initial commit has parent_commit_id=null and parent2_commit_id=null.""" data = _log_json(single_commit_repo) initial = data["commits"][-1] assert initial["parent_commit_id"] is None assert initial["parent2_commit_id"] is None def test_V2_merge_commit_has_two_parents( self, single_commit_repo: pathlib.Path ) -> None: """V2: Merge commit has both parent_commit_id and parent2_commit_id set.""" root = single_commit_repo env = _env(root) # Create and commit on a feature branch runner.invoke(cli, ["checkout", "-b", "feat/test"], env=env) (root / "feat.py").write_text("f = 1\n") runner.invoke(cli, ["code", "add", "feat.py"], env=env) runner.invoke(cli, ["commit", "-m", "feat commit"], env=env) # Merge back into main — use --no-ff to force a merge commit # (a fast-forward would just move the pointer, creating no merge commit). runner.invoke(cli, ["checkout", "main"], env=env) merge_result = runner.invoke(cli, ["merge", "--no-ff", "feat/test"], env=env) assert merge_result.exit_code == 0, merge_result.output data = _log_json(root, "-n", "1") merge_commit = data["commits"][0] assert merge_commit["parent2_commit_id"] is not None, ( "Merge commit must have parent2_commit_id set" ) assert merge_commit["parent2_commit_id"].startswith("sha256:"), ( f"parent2_commit_id must be sha256:-prefixed, got {merge_commit['parent2_commit_id']!r}" ) # --------------------------------------------------------------------------- # VI structured_delta # --------------------------------------------------------------------------- class TestStructuredDeltaVI: def test_VI1_structured_delta_key_always_present( self, multi_commit_repo: pathlib.Path ) -> None: """VI1: structured_delta is always present in every commit object (never absent).""" data = _log_json(multi_commit_repo) assert len(data["commits"]) >= 1 for c in data["commits"]: assert "structured_delta" in c, ( f"structured_delta missing from commit {c.get('commit_id', '?')!r}" ) def test_VI2_structured_delta_is_dict_with_ops_for_code_commit( self, single_commit_repo: pathlib.Path ) -> None: """VI2: structured_delta is a dict with an 'ops' list for a Python file commit.""" data = _log_json(single_commit_repo) initial = data["commits"][-1] delta = initial["structured_delta"] assert isinstance(delta, dict), ( f"Expected structured_delta to be a dict for a code commit, got {type(delta).__name__}" ) assert "ops" in delta, ( f"structured_delta dict must have an 'ops' key, got keys: {list(delta.keys())}" ) assert isinstance(delta["ops"], list), ( f"structured_delta['ops'] must be a list, got {type(delta['ops']).__name__}" ) def test_VI3_structured_delta_type_is_dict_or_none( self, multi_commit_repo: pathlib.Path ) -> None: """VI3: structured_delta is always a dict or None — never another type.""" data = _log_json(multi_commit_repo) for c in data["commits"]: delta = c["structured_delta"] assert delta is None or isinstance(delta, dict), ( f"structured_delta must be dict or None, got {type(delta).__name__} " f"on commit {c.get('commit_id', '?')!r}" )