"""Tests for ``muse snapshot-diff``. Verifies categorisation of added/modified/deleted paths, resolution of snapshot IDs, commit IDs, and branch names, text-format output, and error handling for unresolvable refs. """ from __future__ import annotations import datetime import json import pathlib from tests.cli_test_helper import CliRunner cli = None # argparse migration — CliRunner ignores this arg from muse.core.errors import ExitCode from muse.core.object_store import write_object from muse.core.ids import hash_commit, hash_snapshot from muse.core.commits import ( CommitRecord, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) from muse.core.types import Manifest, blob_id from muse.core.paths import head_path, muse_dir, ref_path runner = CliRunner() # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _init_repo(path: pathlib.Path) -> pathlib.Path: muse = muse_dir(path) (muse / "commits").mkdir(parents=True) (muse / "snapshots").mkdir(parents=True) (muse / "objects").mkdir(parents=True) (muse / "refs" / "heads").mkdir(parents=True) (muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8") (muse / "repo.json").write_text( json.dumps({"repo_id": "test-repo", "domain": "midi"}), encoding="utf-8" ) return path def _env(repo: pathlib.Path) -> Manifest: return {"MUSE_REPO_ROOT": str(repo)} def _obj(repo: pathlib.Path, content: bytes) -> str: oid = blob_id(content) write_object(repo, oid, content) return oid def _snap(repo: pathlib.Path, manifest: Manifest) -> str: sid = hash_snapshot(manifest) write_snapshot( repo, SnapshotRecord( snapshot_id=sid, manifest=manifest, created_at=datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc), ), ) return sid def _commit(repo: pathlib.Path, tag: str, sid: str, branch: str = "main") -> str: committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) cid = hash_commit( parent_ids=[], snapshot_id=sid, message=tag, committed_at_iso=committed_at.isoformat(), author="tester", ) write_commit( repo, CommitRecord( commit_id=cid, branch=branch, snapshot_id=sid, message=tag, committed_at=committed_at, author="tester", parent_commit_id=None, ), ) ref = ref_path(repo, branch) ref.write_text(cid, encoding="utf-8") return cid # --------------------------------------------------------------------------- # Tests # --------------------------------------------------------------------------- class TestSnapshotDiff: def test_added_deleted_categorised_correctly(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) shared = _obj(repo, b"shared") new_obj = _obj(repo, b"new") sid_a = _snap(repo, {"shared.mid": shared, "old.mid": shared}) sid_b = _snap(repo, {"shared.mid": shared, "new.mid": new_obj}) result = runner.invoke(cli, ["snapshot-diff", "--json", sid_a, sid_b], env=_env(repo)) assert result.exit_code == 0, result.output data = json.loads(result.stdout) assert [e["path"] for e in data["added"]] == ["new.mid"] assert [e["path"] for e in data["deleted"]] == ["old.mid"] assert data["modified"] == [] assert data["total_changes"] == 2 def test_modified_entry_contains_both_object_ids(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) v1 = _obj(repo, b"v1") v2 = _obj(repo, b"v2") sid_a = _snap(repo, {"track.mid": v1}) sid_b = _snap(repo, {"track.mid": v2}) result = runner.invoke(cli, ["snapshot-diff", "--json", sid_a, sid_b], env=_env(repo)) assert result.exit_code == 0, result.output data = json.loads(result.stdout) assert len(data["modified"]) == 1 mod = data["modified"][0] assert mod["path"] == "track.mid" assert mod["object_id_a"] == v1 assert mod["object_id_b"] == v2 def test_zero_changes_when_snapshots_identical(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) obj = _obj(repo, b"same") sid = _snap(repo, {"f.mid": obj}) result = runner.invoke(cli, ["snapshot-diff", "--json", sid, sid], env=_env(repo)) assert result.exit_code == 0, result.output data = json.loads(result.stdout) assert data["total_changes"] == 0 def test_resolves_by_branch_name(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) obj_a = _obj(repo, b"a") obj_b = _obj(repo, b"b") _commit(repo, "cmt-main", _snap(repo, {"a.mid": obj_a}), branch="main") _commit(repo, "cmt-dev", _snap(repo, {"b.mid": obj_b}), branch="dev") (head_path(repo)).write_text("ref: refs/heads/main", encoding="utf-8") result = runner.invoke(cli, ["snapshot-diff", "--json", "main", "dev"], env=_env(repo)) assert result.exit_code == 0, result.output data = json.loads(result.stdout) assert data["total_changes"] == 2 def test_text_format_shows_status_letters(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) shared = _obj(repo, b"s") new_obj = _obj(repo, b"n") sid_a = _snap(repo, {"gone.mid": shared}) sid_b = _snap(repo, {"new.mid": new_obj}) result = runner.invoke( cli, ["snapshot-diff", sid_a, sid_b], env=_env(repo) ) assert result.exit_code == 0, result.output assert "A new.mid" in result.stdout assert "D gone.mid" in result.stdout def test_stat_flag_appends_summary(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) sid_a = _snap(repo, {"gone.mid": _obj(repo, b"g")}) sid_b = _snap(repo, {"new.mid": _obj(repo, b"n")}) result = runner.invoke( cli, ["snapshot-diff", "--stat", sid_a, sid_b], env=_env(repo), ) assert result.exit_code == 0, result.output assert "added" in result.stdout assert "deleted" in result.stdout def test_unresolvable_ref_exits_user_error(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) result = runner.invoke( cli, ["snapshot-diff", "no-such-thing", "also-missing", "--json"], env=_env(repo) ) assert result.exit_code == ExitCode.USER_ERROR assert "error" in json.loads(result.stderr) def test_results_sorted_lexicographically(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) sid_a = _snap(repo, {}) sid_b = _snap( repo, {"z.mid": _obj(repo, b"z"), "a.mid": _obj(repo, b"a"), "m.mid": _obj(repo, b"m")} ) result = runner.invoke(cli, ["snapshot-diff", "--json", sid_a, sid_b], env=_env(repo)) assert result.exit_code == 0, result.output data = json.loads(result.stdout) added_paths = [e["path"] for e in data["added"]] assert added_paths == sorted(added_paths) class TestSnapshotDiffStdin: """Tests for ``--stdin`` batch mode.""" def test_single_pair_via_stdin_json(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) oid_a = _obj(repo, b"a") oid_b = _obj(repo, b"b") sid_a = _snap(repo, {"a.mid": oid_a}) sid_b = _snap(repo, {"b.mid": oid_b}) stdin = f"{sid_a} {sid_b}\n" result = runner.invoke(cli, ["snapshot-diff", "--json", "--stdin"], env=_env(repo), input=stdin) assert result.exit_code == 0, result.output lines = [ln for ln in result.stdout.strip().splitlines() if ln] assert len(lines) == 1 data = json.loads(lines[0]) assert data["snapshot_a"] == sid_a assert data["snapshot_b"] == sid_b assert len(data["added"]) == 1 assert len(data["deleted"]) == 1 assert data["total_changes"] == 2 def test_multiple_pairs_emit_ndjson(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) oid = _obj(repo, b"x") sid1 = _snap(repo, {"x.mid": oid}) sid2 = _snap(repo, {}) sid3 = _snap(repo, {"x.mid": oid, "y.mid": _obj(repo, b"y")}) stdin = f"{sid1} {sid2}\n{sid2} {sid3}\n" result = runner.invoke(cli, ["snapshot-diff", "--json", "--stdin"], env=_env(repo), input=stdin) assert result.exit_code == 0, result.output lines = [ln for ln in result.stdout.strip().splitlines() if ln] assert len(lines) == 2 first = json.loads(lines[0]) second = json.loads(lines[1]) assert first["snapshot_a"] == sid1 assert first["snapshot_b"] == sid2 assert second["snapshot_a"] == sid2 assert second["snapshot_b"] == sid3 def test_invalid_ref_reported_inline_not_exit_error(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) oid = _obj(repo, b"ok") sid_a = _snap(repo, {"f.mid": oid}) sid_b = _snap(repo, {}) # First line is bad ref, second is valid bad_ref = "a" * 64 # valid OID format but not in store stdin = f"{bad_ref} {bad_ref}\n{sid_a} {sid_b}\n" result = runner.invoke(cli, ["snapshot-diff", "--json", "--stdin"], env=_env(repo), input=stdin) assert result.exit_code == 0 # batch mode always exits 0 lines = [ln for ln in result.stdout.strip().splitlines() if ln] assert len(lines) == 2 first = json.loads(lines[0]) assert "error" in first second = json.loads(lines[1]) assert "error" not in second assert second["total_changes"] == 1 def test_empty_lines_and_comments_skipped(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) sid = _snap(repo, {}) stdin = f"\n# this is a comment\n\n{sid} {sid}\n\n" result = runner.invoke(cli, ["snapshot-diff", "--json", "--stdin"], env=_env(repo), input=stdin) assert result.exit_code == 0, result.output lines = [ln for ln in result.stdout.strip().splitlines() if ln] assert len(lines) == 1 data = json.loads(lines[0]) assert data["total_changes"] == 0 def test_malformed_line_single_token_reported_inline(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) sid = _snap(repo, {}) stdin = f"only-one-token\n{sid} {sid}\n" result = runner.invoke(cli, ["snapshot-diff", "--json", "--stdin"], env=_env(repo), input=stdin) assert result.exit_code == 0 lines = [ln for ln in result.stdout.strip().splitlines() if ln] assert len(lines) == 2 first = json.loads(lines[0]) assert "error" in first second = json.loads(lines[1]) assert "error" not in second def test_empty_stdin_produces_no_output(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) result = runner.invoke(cli, ["snapshot-diff", "--json", "--stdin"], env=_env(repo), input="") assert result.exit_code == 0 assert result.stdout.strip() == "" def test_stdin_text_format_blank_line_separated(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) oid_a = _obj(repo, b"a") oid_b = _obj(repo, b"b") sid1 = _snap(repo, {"a.mid": oid_a}) sid2 = _snap(repo, {"b.mid": oid_b}) sid3 = _snap(repo, {}) stdin = f"{sid1} {sid2}\n{sid2} {sid3}\n" result = runner.invoke( cli, ["snapshot-diff", "--stdin", ], env=_env(repo), input=stdin ) assert result.exit_code == 0, result.output output = result.stdout # Two diffs separated by a blank line assert "A b.mid" in output or "D a.mid" in output # There should be a blank-line separator between the two pairs blocks = [b.strip() for b in output.split("\n\n") if b.strip()] assert len(blocks) == 2 def test_stdin_all_errors_still_exits_0(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) bad = "b" * 64 # valid format, not in store stdin = f"{bad} {bad}\n{bad} {bad}\n" result = runner.invoke(cli, ["snapshot-diff", "--json", "--stdin"], env=_env(repo), input=stdin) assert result.exit_code == 0 lines = [ln for ln in result.stdout.strip().splitlines() if ln] assert all("error" in json.loads(ln) for ln in lines) def test_stdin_zero_change_pair_included(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) sid = _snap(repo, {"f.mid": _obj(repo, b"f")}) stdin = f"{sid} {sid}\n" result = runner.invoke(cli, ["snapshot-diff", "--json", "--stdin"], env=_env(repo), input=stdin) assert result.exit_code == 0, result.output data = json.loads(result.stdout.strip()) assert data["total_changes"] == 0 class TestSnapshotDiffEdgeCases: """Edge cases not covered by the primary test classes.""" def test_bad_format_value_exits_user_error(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) sid = _snap(repo, {}) result = runner.invoke( cli, ["snapshot-diff", "--only", "xml", sid, sid], env=_env(repo) ) assert result.exit_code != 0 def test_ref_a_provided_ref_b_missing_exits_user_error(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) sid = _snap(repo, {}) result = runner.invoke(cli, ["snapshot-diff", "--json", sid], env=_env(repo)) assert result.exit_code == ExitCode.USER_ERROR def test_raw_with_zero_changes_produces_no_diff_lines(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) sid = _snap(repo, {"f.mid": _obj(repo, b"same")}) result = runner.invoke( cli, ["snapshot-diff", "--raw", sid, sid], env=_env(repo) ) assert result.exit_code == 0, result.output # No A/M/D lines when there are no changes. for line in result.stdout.splitlines(): assert not line.startswith(("A ", "M ", "D ")) def test_json_shorthand_flag_accepted(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) sid = _snap(repo, {"f.mid": _obj(repo, b"x")}) result = runner.invoke(cli, ["snapshot-diff", "--json", sid, sid], env=_env(repo)) assert result.exit_code == 0, result.output data = json.loads(result.stdout) assert data["total_changes"] == 0 def test_no_args_no_stdin_exits_user_error(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) result = runner.invoke(cli, ["snapshot-diff"], env=_env(repo)) assert result.exit_code == ExitCode.USER_ERROR class TestSnapshotDiffRaw: """Tests for ``--raw`` flag (OIDs included in text output).""" def test_raw_added_includes_object_id(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) oid = _obj(repo, b"new-content") sid_a = _snap(repo, {}) sid_b = _snap(repo, {"new.mid": oid}) result = runner.invoke( cli, ["snapshot-diff", "--raw", sid_a, sid_b], env=_env(repo) ) assert result.exit_code == 0, result.output assert oid in result.stdout assert "A" in result.stdout assert "new.mid" in result.stdout def test_raw_deleted_includes_object_id(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) oid = _obj(repo, b"old-content") sid_a = _snap(repo, {"gone.mid": oid}) sid_b = _snap(repo, {}) result = runner.invoke( cli, ["snapshot-diff", "--raw", sid_a, sid_b], env=_env(repo) ) assert result.exit_code == 0, result.output assert oid in result.stdout assert "D" in result.stdout assert "gone.mid" in result.stdout def test_raw_modified_includes_both_object_ids(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) oid_a = _obj(repo, b"version-1") oid_b = _obj(repo, b"version-2") sid_a = _snap(repo, {"track.mid": oid_a}) sid_b = _snap(repo, {"track.mid": oid_b}) result = runner.invoke( cli, ["snapshot-diff", "--raw", sid_a, sid_b], env=_env(repo) ) assert result.exit_code == 0, result.output assert oid_a in result.stdout assert oid_b in result.stdout assert "M" in result.stdout assert "track.mid" in result.stdout def test_text_without_raw_omits_object_ids(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) oid = _obj(repo, b"some-content") sid_a = _snap(repo, {}) sid_b = _snap(repo, {"file.mid": oid}) result = runner.invoke( cli, ["snapshot-diff", sid_a, sid_b], env=_env(repo) ) assert result.exit_code == 0, result.output # OID should NOT appear in non-raw text output assert oid not in result.stdout assert "A file.mid" in result.stdout def test_raw_has_no_effect_on_json_output(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) oid_a = _obj(repo, b"va") oid_b = _obj(repo, b"vb") sid_a = _snap(repo, {"t.mid": oid_a}) sid_b = _snap(repo, {"t.mid": oid_b}) # JSON always includes OIDs; --raw flag is documented as no-op for JSON result_plain = runner.invoke(cli, ["snapshot-diff", "--json", sid_a, sid_b], env=_env(repo)) result_raw = runner.invoke(cli, ["snapshot-diff", "--json", "--raw", sid_a, sid_b], env=_env(repo)) assert result_plain.exit_code == 0 assert result_raw.exit_code == 0 data_plain = json.loads(result_plain.stdout) data_raw = json.loads(result_raw.stdout) # duration_ms will differ between two separate invocations — compare everything else. for key in ("snapshot_a", "snapshot_b", "added", "modified", "deleted", "total_changes"): assert data_plain[key] == data_raw[key] def test_raw_stdin_batch_text_includes_oids(self, tmp_path: pathlib.Path) -> None: repo = _init_repo(tmp_path) oid = _obj(repo, b"batch-raw") sid_a = _snap(repo, {}) sid_b = _snap(repo, {"r.mid": oid}) stdin = f"{sid_a} {sid_b}\n" result = runner.invoke( cli, ["snapshot-diff", "--stdin", "--raw"], env=_env(repo), input=stdin, ) assert result.exit_code == 0, result.output assert oid in result.stdout assert "A" in result.stdout # --------------------------------------------------------------------------- # Flag registration tests # --------------------------------------------------------------------------- class TestRegisterFlags: def _parser(self) -> "argparse.ArgumentParser": import argparse from muse.cli.commands.snapshot_diff import register p = argparse.ArgumentParser() subs = p.add_subparsers() register(subs) return p def test_default_json_out_is_false(self) -> None: args = self._parser().parse_args(["snapshot-diff", "main", "dev"]) assert args.json_out is False def test_json_flag_sets_json_out(self) -> None: args = self._parser().parse_args(["snapshot-diff", "--json", "main", "dev"]) assert args.json_out is True def test_j_shorthand_sets_json_out(self) -> None: args = self._parser().parse_args(["snapshot-diff", "-j", "main", "dev"]) assert args.json_out is True