"""Comprehensive tests for ``muse read-commit``. Coverage tiers -------------- - Unit: _ALL_FIELDS completeness - Integration: JSON/text format, prefix resolution, --fields filter, parent chain - Security: ANSI in branch/author/message stripped in text mode - Stress: 200 sequential reads, --fields on large schema """ from __future__ import annotations import datetime import json import pathlib from muse.core.errors import ExitCode from muse.core.ids import hash_commit, hash_snapshot from muse.core.commits import ( CommitRecord, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) from tests.cli_test_helper import CliRunner, InvokeResult from muse.core.types import fake_id, long_id, short_id from muse.core.paths import heads_dir, muse_dir runner = CliRunner() # Module-level constants so every test uses the same deterministic inputs. _SNAP_ID: str = hash_snapshot({}) _COMMITTED_AT: datetime.datetime = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: repo = tmp_path / "repo" muse = muse_dir(repo) for sub in ("objects", "commits", "snapshots", "refs/heads"): (muse / sub).mkdir(parents=True) (muse / "HEAD").write_text("ref: refs/heads/main") (muse / "repo.json").write_text(json.dumps({"repo_id": "test-repo", "domain": "code"})) return repo def _commit( repo: pathlib.Path, *, branch: str = "main", message: str = "test commit", author: str = "tester", parent: str | None = None, agent_id: str = "", model_id: str = "", ) -> str: """Write a commit with a real content-addressed ID; return the commit_id.""" parent_ids: list[str] = [parent] if parent else [] commit_id = hash_commit( parent_ids=parent_ids, snapshot_id=_SNAP_ID, message=message, committed_at_iso=_COMMITTED_AT.isoformat(), author=author, ) write_snapshot(repo, SnapshotRecord( snapshot_id=_SNAP_ID, manifest={}, created_at=_COMMITTED_AT, )) rec = CommitRecord( commit_id=commit_id, branch=branch, snapshot_id=_SNAP_ID, message=message, committed_at=_COMMITTED_AT, author=author, parent_commit_id=parent, agent_id=agent_id, model_id=model_id, ) write_commit(repo, rec) return commit_id def _rc(repo: pathlib.Path, *args: str) -> InvokeResult: from muse.cli.app import main as cli return runner.invoke( cli, ["read-commit", *args], env={"MUSE_REPO_ROOT": str(repo)}, ) def _rcj(repo: pathlib.Path, *args: str) -> InvokeResult: """Like _rc but always passes --json.""" return _rc(repo, "--json", *args) # --------------------------------------------------------------------------- # Unit — _ALL_FIELDS # --------------------------------------------------------------------------- class TestAllFields: def test_all_fields_matches_commitdict_annotations(self) -> None: """_ALL_FIELDS must be exactly the keys in CommitDict.__annotations__.""" from muse.cli.commands.read_commit import _ALL_FIELDS from muse.core.commits import CommitDict assert _ALL_FIELDS == frozenset(CommitDict.__annotations__.keys()) def test_required_fields_present(self) -> None: from muse.cli.commands.read_commit import _ALL_FIELDS for field in ("commit_id", "branch", "message", "committed_at", "agent_id", "model_id", "reviewed_by"): assert field in _ALL_FIELDS # --------------------------------------------------------------------------- # Integration — JSON format # --------------------------------------------------------------------------- class TestJsonFormat: def test_full_schema_returned(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _commit(repo, message="hello world") result = _rcj(repo, cid) assert result.exit_code == 0 data = json.loads(result.output) assert data["commit_id"] == cid assert data["message"] == "hello world" assert data["branch"] == "main" def test_json_flag_shorthand(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _commit(repo, message="shorthand test") result = _rc(repo, "--json", cid) assert result.exit_code == 0 assert "commit_id" in json.loads(result.output) def test_agent_provenance_fields_present(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _commit(repo, agent_id="my-agent", model_id="claude-4") data = json.loads(_rcj(repo, cid).output) assert data["agent_id"] == "my-agent" assert data["model_id"] == "claude-4" def test_parent_commit_id_null_for_root(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _commit(repo, message="root commit") data = json.loads(_rcj(repo, cid).output) assert data["parent_commit_id"] is None def test_parent_commit_id_set_for_child(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) parent = _commit(repo, message="parent commit") child = _commit(repo, message="child commit", parent=parent) data = json.loads(_rcj(repo, child).output) assert data["parent_commit_id"] == parent def test_committed_at_is_iso8601(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _commit(repo, message="iso date test") data = json.loads(_rcj(repo, cid).output) # Should parse without error datetime.datetime.fromisoformat(data["committed_at"]) def test_snapshot_id_in_output(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _commit(repo, message="snapshot test") data = json.loads(_rcj(repo, cid).output) import re assert re.fullmatch(r"sha256:[0-9a-f]{64}", data["snapshot_id"]) # --------------------------------------------------------------------------- # Integration — text format # --------------------------------------------------------------------------- class TestTextFormat: def test_text_format_contains_commit_prefix(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _commit(repo, message="text test") result = _rc(repo, cid) assert result.exit_code == 0 line = result.output.strip() assert short_id(cid) in line def test_text_format_contains_branch(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _commit(repo, branch="main", message="branch test") result = _rc(repo, cid) assert "main" in result.output def test_text_format_contains_message(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _commit(repo, message="my commit message") result = _rc(repo, cid) assert "my commit message" in result.output def test_text_multiline_message_flattened(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _commit(repo, message="line one\nline two") result = _rc(repo, cid) # Newline replaced with space — output stays on one line assert "\n" not in result.output.strip() assert "line one" in result.output # --------------------------------------------------------------------------- # Integration — prefix resolution # --------------------------------------------------------------------------- class TestPrefixResolution: def test_sha256_short_prefix_resolves(self, tmp_path: pathlib.Path) -> None: """sha256:<8-hex> prefix form resolves to the full commit.""" repo = _make_repo(tmp_path) cid = _commit(repo, message="prefix resolve test") # cid is "sha256:<64-hex>"; take long_id(first 8 hex chars = 15 chars) short_ref = cid[:15] result = _rcj(repo, short_ref) assert result.exit_code == 0 assert json.loads(result.output)["commit_id"] == cid def test_sha256_full_id_resolves(self, tmp_path: pathlib.Path) -> None: """Full sha256:<64-hex> canonical form resolves directly.""" repo = _make_repo(tmp_path) cid = _commit(repo, message="full id resolve test") result = _rcj(repo, cid) assert result.exit_code == 0 assert json.loads(result.output)["commit_id"] == cid def test_ambiguous_prefix_errors(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) # "msg 205" and "msg 321" produce commit IDs sharing the "990f" 4-char # hex prefix under the unified-object-store formula (author="tester", # empty manifest, 2026-01-01T00:00:00+00:00). # Verified by precomputation; changing _SNAP_ID, _COMMITTED_AT, or # author requires updating these message strings. cid1 = _commit(repo, message="msg 205") cid2 = _commit(repo, message="msg 321") result = _rc(repo, "sha256:990f") assert result.exit_code == ExitCode.USER_ERROR data = json.loads(result.stderr) assert "ambiguous" in data["error"] assert set(data["candidates"]) == {cid1, cid2} def test_missing_commit_errors(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) # Valid canonical ID that doesn't exist in the store result = _rc(repo, long_id(f"dead{'beef' * 15}")) assert result.exit_code == ExitCode.USER_ERROR data = json.loads(result.stderr) assert "not found" in data["error"] def test_bare_hex_rejected(self, tmp_path: pathlib.Path) -> None: """Bare hex without sha256: prefix is rejected with a clear error.""" repo = _make_repo(tmp_path) result = _rc(repo, "a" * 64) assert result.exit_code == ExitCode.USER_ERROR data = json.loads(result.stderr) assert "sha256:" in data["error"] def test_bare_short_hex_rejected(self, tmp_path: pathlib.Path) -> None: """Short bare hex prefix is rejected — sha256: form required.""" repo = _make_repo(tmp_path) result = _rc(repo, "deadbeef") assert result.exit_code == ExitCode.USER_ERROR data = json.loads(result.stderr) assert "sha256:" in data["error"] def test_invalid_commit_id_errors(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _rc(repo, f"ZZZZ{'a' * 60}") assert result.exit_code == ExitCode.USER_ERROR class TestSymbolicRefResolution: def test_head_resolves(self, tmp_path: pathlib.Path) -> None: """HEAD resolves to the tip of the current branch.""" repo = _make_repo(tmp_path) cid = _commit(repo, branch="main", message="head test") # Write branch ref so HEAD resolves (heads_dir(repo) / "main").write_text(cid, encoding="utf-8") result = _rcj(repo, "HEAD") assert result.exit_code == 0 assert json.loads(result.output)["commit_id"] == cid def test_branch_name_resolves(self, tmp_path: pathlib.Path) -> None: """A branch name resolves to the tip commit of that branch.""" repo = _make_repo(tmp_path) cid = _commit(repo, branch="dev", message="branch ref test") (heads_dir(repo) / "dev").write_text(cid, encoding="utf-8") result = _rcj(repo, "dev") assert result.exit_code == 0 assert json.loads(result.output)["commit_id"] == cid def test_tilde_notation_resolves(self, tmp_path: pathlib.Path) -> None: """HEAD~1 resolves to the parent of the HEAD commit.""" repo = _make_repo(tmp_path) parent_cid = _commit(repo, branch="main", message="parent") child_cid = _commit(repo, branch="main", message="child", parent=parent_cid) (heads_dir(repo) / "main").write_text(child_cid, encoding="utf-8") result = _rcj(repo, "HEAD~1") assert result.exit_code == 0 assert json.loads(result.output)["commit_id"] == parent_cid # --------------------------------------------------------------------------- # Integration — --fields filter # --------------------------------------------------------------------------- class TestFieldsFilter: def test_single_field(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _commit(repo, message="filtered") result = _rcj(repo, "--fields", "message", cid) assert result.exit_code == 0 data = json.loads(result.output) # duration_ms and exit_code are always-present metadata fields — not commit fields. commit_keys = set(data.keys()) - {"duration_ms", "exit_code", "muse_version", "schema", "timestamp", "warnings"} assert commit_keys == {"message"} assert data["message"] == "filtered" def test_multiple_fields(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _commit(repo, branch="dev", message="multi field test") result = _rcj(repo, "--fields", "commit_id,branch,message", cid) data = json.loads(result.output) commit_keys = set(data.keys()) - {"duration_ms", "exit_code", "muse_version", "schema", "timestamp", "warnings"} assert commit_keys == {"commit_id", "branch", "message"} assert data["commit_id"] == cid assert data["branch"] == "dev" def test_agent_fields_filter(self, tmp_path: pathlib.Path) -> None: """Agents extracting provenance fields should get exactly what they asked for.""" repo = _make_repo(tmp_path) cid = _commit(repo, agent_id="audit-bot", model_id="claude-4") result = _rcj(repo, "--fields", "agent_id,model_id,toolchain_id", cid) data = json.loads(result.output) commit_keys = set(data.keys()) - {"duration_ms", "exit_code", "muse_version", "schema", "timestamp", "warnings"} assert commit_keys == {"agent_id", "model_id", "toolchain_id"} assert data["agent_id"] == "audit-bot" assert data["model_id"] == "claude-4" def test_unknown_field_errors(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _commit(repo, message="unknown field test") result = _rc(repo, "--fields", "nonexistent_field", cid) assert result.exit_code == ExitCode.USER_ERROR def test_fields_with_text_format_errors(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _commit(repo, message="fields text error test") result = _rc(repo, "--fields", "commit_id", cid) assert result.exit_code == ExitCode.USER_ERROR def test_fields_whitespace_trimmed(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _commit(repo, message="whitespace trim test") result = _rcj(repo, "--fields", " commit_id , message ", cid) assert result.exit_code == 0 data = json.loads(result.output) assert "commit_id" in data assert "message" in data # --------------------------------------------------------------------------- # Security # --------------------------------------------------------------------------- class TestSecurity: def test_ansi_in_branch_stripped_in_text(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) _commit(repo, branch="main") # Write an malicious commit directly, bypassing the normal helper. # The commit_id must be a real hash of the stored fields for read_commit # to pass content-hash verification. from muse.core.commits import write_commit as _wc malicious_message = "test" malicious_cid = hash_commit( parent_ids=[], snapshot_id=_SNAP_ID, message=malicious_message, committed_at_iso=_COMMITTED_AT.isoformat(), ) malicious_rec = CommitRecord( commit_id=malicious_cid, branch="\x1b[31mmalicious\x1b[0m", snapshot_id=_SNAP_ID, message=malicious_message, committed_at=_COMMITTED_AT, ) _wc(repo, malicious_rec) result = _rc(repo, malicious_cid) assert result.exit_code == 0 assert "\x1b" not in result.output def test_ansi_in_message_stripped_in_text(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) malicious_snap_id = fake_id("malicious-snap") malicious_message = "\x1b[31mmalicious\x1b[0m" malicious_committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) malicious_cid = hash_commit( parent_ids=[], snapshot_id=malicious_snap_id, message=malicious_message, committed_at_iso=malicious_committed_at.isoformat(), ) malicious_rec = CommitRecord( commit_id=malicious_cid, branch="main", snapshot_id=malicious_snap_id, message=malicious_message, committed_at=malicious_committed_at, ) write_commit(repo, malicious_rec) result = _rc(repo, malicious_cid) assert result.exit_code == 0 assert "\x1b" not in result.output def test_ansi_in_commit_id_rejected(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _rc(repo, f"\x1b[31m{'a' * 64}") assert result.exit_code == ExitCode.USER_ERROR def test_no_traceback_on_bad_input(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _rc(repo, "not-valid") assert "Traceback" not in result.output # --------------------------------------------------------------------------- # Stress # --------------------------------------------------------------------------- class TestStress: def test_200_sequential_reads(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _commit(repo, message="stable") for i in range(200): result = _rcj(repo, cid) assert result.exit_code == 0, f"failed at iteration {i}" assert json.loads(result.output)["message"] == "stable" def test_fields_filter_200_iterations(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _commit(repo, agent_id="bot") for i in range(200): result = _rcj(repo, "--fields", "commit_id,agent_id", cid) assert result.exit_code == 0, f"failed at iteration {i}" data = json.loads(result.output) assert data["agent_id"] == "bot" class TestRegisterFlags: def _parse(self, *args: str) -> "argparse.Namespace": import argparse from muse.cli.commands.read_commit import register p = argparse.ArgumentParser() subs = p.add_subparsers() register(subs) return p.parse_args(["read-commit", fake_id("a"), *args]) def test_json_short_flag(self) -> None: args = self._parse("-j") assert args.json_out is True def test_json_long_flag(self) -> None: args = self._parse("--json") assert args.json_out is True def test_default_no_json(self) -> None: args = self._parse() assert args.json_out is False