"""Tests for GC path-helper correctness and stale remote tracking ref pruning. Coverage -------- Path helpers ~~~~~~~~~~~~ - _collect_shelf_objects reads shelf at _shelf_json_path (canonical location) - _collect_reachable_commits reads tags from _tags_dir (canonical location) Stale remote tracking ref pruning (prune_stale_remote_refs) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Stale remote dir (not in configured names) → files deleted, dir removed - Configured remote dir → preserved - Multiple remotes: only stale ones removed - dry_run=True → counted but not deleted - Symlinked remote dir → skipped (not deleted) - Empty stale dir → removed without error - Nested ref layout (remote/branch subdirs) → all files counted and removed - GcResult fields updated correctly (stale_remote_refs_collected, stale_remote_refs_bytes) CLI integration ~~~~~~~~~~~~~~~ - muse gc --full removes stale remote tracking refs - muse gc --full --json includes stale_remote_refs_collected / stale_remote_refs_bytes - muse gc --full --dry-run counts but does not delete - muse gc (no --full) does NOT remove stale remote refs - muse gc --full --json schema: new fields present even when nothing collected """ from __future__ import annotations import json import pathlib import msgpack import pytest from muse.core.gc import GcResult, prune_stale_remote_refs, run_gc type _EnvDict = dict[str, str] from muse.core.paths import heads_dir, muse_dir, remotes_dir as _remotes_dir, shelf_dir as _shelf_dir, tags_dir as _tags_dir from muse.core.types import blob_id, fake_id, long_id, split_id from muse.core.object_store import write_object as _write_obj from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id from muse.core.commits import ( CommitRecord, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) from tests.cli_test_helper import CliRunner, InvokeResult cli = None runner = CliRunner() # --------------------------------------------------------------------------- # Repo fixture helpers # --------------------------------------------------------------------------- def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: muse = muse_dir(tmp_path) for sub in ("objects", "commits", "snapshots", "refs/heads", "remotes", "tags"): (muse / sub).mkdir(parents=True, exist_ok=True) (muse / "repo.json").write_text( json.dumps({"repo_id": fake_id("repo"), "domain": "code"}), encoding="utf-8", ) (muse / "HEAD").write_text("ref: refs/heads/main\n", encoding="utf-8") return tmp_path def _env(root: pathlib.Path) -> _EnvDict: return {"MUSE_REPO_ROOT": str(root)} def _write_remote_ref(root: pathlib.Path, remote: str, branch: str, commit_id: str) -> pathlib.Path: """Write a tracking ref file under .muse/remotes//.""" ref_dir = _remotes_dir(root) / remote ref_dir.mkdir(parents=True, exist_ok=True) ref_file = ref_dir / branch ref_file.write_text(commit_id, encoding="utf-8") return ref_file def _make_one_commit(root: pathlib.Path) -> str: """Write a minimal commit and return its commit_id.""" import datetime content = b"hello" oid = blob_id(content) _write_obj(root, oid, content) manifest = {"a.py": oid} snap_id = compute_snapshot_id(manifest) write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) commit_id = compute_commit_id( parent_ids=[], snapshot_id=snap_id, message="base", committed_at_iso=committed_at.isoformat(), ) write_commit(root, CommitRecord( commit_id=commit_id, parent_commit_id=None, parent2_commit_id=None, snapshot_id=snap_id, message="base", committed_at=committed_at, branch="main", )) (heads_dir(root) / "main").write_text(commit_id, encoding="utf-8") return commit_id # --------------------------------------------------------------------------- # Unit — path helper correctness # --------------------------------------------------------------------------- class TestPathHelpers: def test_shelf_dir_path_is_canonical(self, tmp_path: pathlib.Path) -> None: """_collect_shelf_objects reads entries from _shelf_dir, not a hardcoded path.""" root = _make_repo(tmp_path) content = b"shelf-object" oid = blob_id(content) _write_obj(root, oid, content) entry = {"snapshot": {"a.txt": oid}, "branch": "main", "created_at": "2026-01-01T00:00:00+00:00"} packed = msgpack.packb(entry, use_bin_type=True) _, hex_id = split_id(blob_id(packed)) shelf_entry_dir = _shelf_dir(root) / "sha256" shelf_entry_dir.mkdir(parents=True, exist_ok=True) (shelf_entry_dir / f"{hex_id}.msgpack").write_bytes(packed) from muse.core.gc import _collect_reachable_objects reachable = _collect_reachable_objects(root) assert oid in reachable, "Object referenced in shelf entry must be reachable" def test_tags_dir_is_canonical(self, tmp_path: pathlib.Path) -> None: """_collect_reachable_commits finds tags under _tags_dir, not a hardcoded path.""" root = _make_repo(tmp_path) commit_id = _make_one_commit(root) # Remove the branch ref so the commit is only reachable via the tag. (heads_dir(root) / "main").write_text("", encoding="utf-8") # Write a tag at the canonical tags dir location. tag_path = _tags_dir(root) / "2026" / "01" / "01" / "my-tag.msgpack" tag_path.parent.mkdir(parents=True, exist_ok=True) tag_path.write_bytes(msgpack.packb({"commit_id": commit_id}, use_bin_type=True)) from muse.core.gc import _collect_reachable_commits reachable = _collect_reachable_commits(root) assert commit_id in reachable, "Tag-referenced commit must be reachable via _tags_dir" # --------------------------------------------------------------------------- # Unit — prune_stale_remote_refs # --------------------------------------------------------------------------- class TestPruneStaleRemoteRefs: def test_stale_remote_dir_deleted(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) ref_file = _write_remote_ref(root, "old-remote", "main", long_id("a" * 64)) result = GcResult() prune_stale_remote_refs(root, configured_remote_names=set(), result=result, dry_run=False) assert not ref_file.exists() assert not (_remotes_dir(root) / "old-remote").exists() assert result.stale_remote_refs_collected == 1 assert result.stale_remote_refs_bytes > 0 def test_configured_remote_preserved(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) ref_file = _write_remote_ref(root, "local", "dev", long_id("b" * 64)) result = GcResult() prune_stale_remote_refs(root, configured_remote_names={"local"}, result=result, dry_run=False) assert ref_file.exists(), "Configured remote's tracking ref must be preserved" assert result.stale_remote_refs_collected == 0 def test_only_stale_remotes_removed(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) _write_remote_ref(root, "local", "main", long_id("a" * 64)) # configured _write_remote_ref(root, "staging", "main", long_id("b" * 64)) # configured stale_ref = _write_remote_ref(root, "old", "main", long_id("c" * 64)) # stale result = GcResult() prune_stale_remote_refs( root, configured_remote_names={"local", "staging"}, result=result, dry_run=False, ) assert stale_ref.exists() is False assert (_remotes_dir(root) / "local" / "main").exists() assert (_remotes_dir(root) / "staging" / "main").exists() assert result.stale_remote_refs_collected == 1 def test_dry_run_counts_but_does_not_delete(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) ref_file = _write_remote_ref(root, "gone", "main", long_id("d" * 64)) result = GcResult() prune_stale_remote_refs(root, configured_remote_names=set(), result=result, dry_run=True) assert ref_file.exists(), "dry_run must not delete files" assert result.stale_remote_refs_collected == 1 def test_symlinked_remote_dir_skipped(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) real_dir = tmp_path / "real-remote-dir" real_dir.mkdir() (real_dir / "main").write_text(long_id("e" * 64), encoding="utf-8") symlink = _remotes_dir(root) / "linked-remote" symlink.symlink_to(real_dir) result = GcResult() prune_stale_remote_refs(root, configured_remote_names=set(), result=result, dry_run=False) assert symlink.exists(), "Symlinked dir must not be followed or deleted" assert result.stale_remote_refs_collected == 0 def test_empty_stale_dir_removed(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) empty_dir = _remotes_dir(root) / "empty-remote" empty_dir.mkdir() result = GcResult() prune_stale_remote_refs(root, configured_remote_names=set(), result=result, dry_run=False) assert not empty_dir.exists() assert result.stale_remote_refs_collected == 0 def test_nested_refs_all_counted(self, tmp_path: pathlib.Path) -> None: """Remote with multiple branches (nested layout) — all ref files counted.""" root = _make_repo(tmp_path) remote_dir = _remotes_dir(root) / "old-remote" remote_dir.mkdir() for branch in ("main", "dev", "feat/x"): ref = remote_dir / branch ref.parent.mkdir(parents=True, exist_ok=True) ref.write_text(long_id("f" * 64), encoding="utf-8") result = GcResult() prune_stale_remote_refs(root, configured_remote_names=set(), result=result, dry_run=False) assert result.stale_remote_refs_collected == 3 assert not remote_dir.exists() def test_no_remotes_dir_is_noop(self, tmp_path: pathlib.Path) -> None: """Repo with no .muse/remotes/ — prune is a no-op.""" root = _make_repo(tmp_path) remotes_root = _remotes_dir(root) if remotes_root.exists(): remotes_root.rmdir() result = GcResult() prune_stale_remote_refs(root, configured_remote_names=set(), result=result, dry_run=False) assert result.stale_remote_refs_collected == 0 assert result.stale_remote_refs_bytes == 0 def test_bytes_counted_correctly(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) content = long_id("a" * 64) # known length ref_file = _write_remote_ref(root, "gone", "main", content) expected_bytes = ref_file.stat().st_size result = GcResult() prune_stale_remote_refs(root, configured_remote_names=set(), result=result, dry_run=False) assert result.stale_remote_refs_bytes == expected_bytes # --------------------------------------------------------------------------- # CLI integration # --------------------------------------------------------------------------- class TestCliStaleRemoteRefs: def test_full_removes_stale_remote_refs( self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch ) -> None: monkeypatch.chdir(tmp_path) monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path)) from unittest.mock import patch with patch("muse.cli.commands.init.resolve_default_handle", return_value=None): runner.invoke(cli, ["init"], env=_env(tmp_path), catch_exceptions=False) runner.invoke(cli, ["commit", "-m", "base", "--allow-empty"], env=_env(tmp_path)) # Manually plant a stale remote dir (not in config). _write_remote_ref(tmp_path, "deleted-remote", "main", long_id("a" * 64)) r = runner.invoke(cli, ["gc", "--full", "--grace-period", "0"], env=_env(tmp_path)) assert r.exit_code == 0 assert not (_remotes_dir(tmp_path) / "deleted-remote").exists() def test_full_json_includes_stale_remote_refs_fields( self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch ) -> None: monkeypatch.chdir(tmp_path) monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path)) from unittest.mock import patch with patch("muse.cli.commands.init.resolve_default_handle", return_value=None): runner.invoke(cli, ["init"], env=_env(tmp_path), catch_exceptions=False) runner.invoke(cli, ["commit", "-m", "base", "--allow-empty"], env=_env(tmp_path)) _write_remote_ref(tmp_path, "gone", "main", long_id("b" * 64)) r = runner.invoke( cli, ["gc", "--full", "--json", "--grace-period", "0"], env=_env(tmp_path) ) assert r.exit_code == 0 data = json.loads(r.output) assert "stale_remote_refs_collected" in data assert "stale_remote_refs_bytes" in data assert data["stale_remote_refs_collected"] == 1 def test_full_dry_run_counts_stale_refs( self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch ) -> None: monkeypatch.chdir(tmp_path) monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path)) from unittest.mock import patch with patch("muse.cli.commands.init.resolve_default_handle", return_value=None): runner.invoke(cli, ["init"], env=_env(tmp_path), catch_exceptions=False) runner.invoke(cli, ["commit", "-m", "base", "--allow-empty"], env=_env(tmp_path)) ref_file = _write_remote_ref(tmp_path, "stale", "main", long_id("c" * 64)) r = runner.invoke( cli, ["gc", "--full", "--dry-run", "--json", "--grace-period", "0"], env=_env(tmp_path), ) assert r.exit_code == 0 data = json.loads(r.output) assert data["stale_remote_refs_collected"] == 1 assert ref_file.exists(), "dry_run must not delete files" def test_no_full_does_not_prune_stale_refs( self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch ) -> None: monkeypatch.chdir(tmp_path) monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path)) from unittest.mock import patch with patch("muse.cli.commands.init.resolve_default_handle", return_value=None): runner.invoke(cli, ["init"], env=_env(tmp_path), catch_exceptions=False) runner.invoke(cli, ["commit", "-m", "base", "--allow-empty"], env=_env(tmp_path)) ref_file = _write_remote_ref(tmp_path, "stale", "main", long_id("d" * 64)) r = runner.invoke( cli, ["gc", "--json", "--grace-period", "0"], env=_env(tmp_path) ) assert r.exit_code == 0 assert ref_file.exists(), "Without --full, stale refs must not be removed" data = json.loads(r.output) # Fields are present but zero (default GcResult values are not emitted # without --full, so they may be absent — just verify no deletion occurred). assert ref_file.exists() def test_full_json_schema_zero_when_nothing_stale( self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch ) -> None: monkeypatch.chdir(tmp_path) monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path)) from unittest.mock import patch with patch("muse.cli.commands.init.resolve_default_handle", return_value=None): runner.invoke(cli, ["init"], env=_env(tmp_path), catch_exceptions=False) runner.invoke(cli, ["commit", "-m", "base", "--allow-empty"], env=_env(tmp_path)) r = runner.invoke( cli, ["gc", "--full", "--json", "--grace-period", "0"], env=_env(tmp_path) ) assert r.exit_code == 0 data = json.loads(r.output) assert data["stale_remote_refs_collected"] == 0 assert data["stale_remote_refs_bytes"] == 0