"""Tests for muse/core/gc.py — garbage collection.""" from __future__ import annotations import json import pathlib from collections.abc import Mapping from typing import TypedDict import pytest from muse.core.gc import GcResult, run_gc from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id from muse.core.commits import ( CommitRecord, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) from muse.core.shelf import write_shelf_entry from muse.core.types import Manifest, blob_id, split_id from muse.core.object_store import object_path from muse.core.paths import heads_dir, muse_dir, objects_dir, shelf_dir # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: """Create a minimal .muse repo structure.""" muse = muse_dir(tmp_path) for d in ("objects", "commits", "snapshots", "refs/heads"): (muse / d).mkdir(parents=True, exist_ok=True) (muse / "repo.json").write_text(json.dumps({"repo_id": "test-repo"})) (muse / "HEAD").write_text("ref: refs/heads/main\n") return tmp_path class _ShelfEntryData(TypedDict, total=False): id: str snapshot: dict[str, str] branch: str created_at: str def _write_shelf_entry(repo: pathlib.Path, snapshot: Mapping[str, str]) -> None: """Write a shelf entry in the current format under .muse/shelf/sha256/.""" import json as _json entry: _ShelfEntryData = { "snapshot": dict(snapshot), "branch": "main", "created_at": "2026-01-01T00:00:00+00:00", } raw_bytes = _json.dumps(entry, sort_keys=True).encode() _, hex_id = split_id(blob_id(raw_bytes)) entry["id"] = f"sha256:{hex_id}" write_shelf_entry(repo, entry) def _write_object(repo: pathlib.Path, content: bytes) -> str: from muse.core.object_store import write_object oid = blob_id(content) write_object(repo, oid, content) return oid def _write_snapshot(repo: pathlib.Path, manifest: Manifest) -> str: """Write a snapshot with a valid content-hash snapshot_id. Returns the snapshot_id.""" snap_id = compute_snapshot_id(manifest) write_snapshot(repo, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) return snap_id def _write_commit(repo: pathlib.Path, snapshot_id: str) -> str: """Write a commit record with a valid content-hash commit_id. Returns the commit_id.""" import datetime committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) commit_id = compute_commit_id( parent_ids=[], snapshot_id=snapshot_id, message="test", committed_at_iso=committed_at.isoformat(), ) write_commit(repo, CommitRecord( commit_id=commit_id, branch="main", snapshot_id=snapshot_id, message="test", committed_at=committed_at, )) r_path = heads_dir(repo) / "main" r_path.parent.mkdir(parents=True, exist_ok=True) r_path.write_text(commit_id) return commit_id # --------------------------------------------------------------------------- # Tests # --------------------------------------------------------------------------- def test_gc_empty_repo(tmp_path: pathlib.Path) -> None: """GC on an empty repo should report 0 collected.""" repo = _make_repo(tmp_path) result = run_gc(repo, grace_period_seconds=0) assert isinstance(result, GcResult) assert result.collected_count == 0 def test_gc_removes_unreachable_object(tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) # Write an object but don't reference it in any commit. orphan_id = _write_object(repo, b"orphan data") obj_path = object_path(repo, orphan_id) assert obj_path.exists() result = run_gc(repo, grace_period_seconds=0) assert result.collected_count == 1 assert orphan_id in result.collected_ids assert not obj_path.exists() def test_gc_preserves_reachable_object(tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"reachable file content" obj_id = _write_object(repo, content) snap_id = _write_snapshot(repo, {"file.txt": obj_id}) _write_commit(repo, snap_id) result = run_gc(repo, grace_period_seconds=0) assert result.collected_count == 0 obj_path = object_path(repo, obj_id) assert obj_path.exists() def test_gc_dry_run_does_not_delete(tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) orphan_id = _write_object(repo, b"orphan") obj_path = object_path(repo, orphan_id) result = run_gc(repo, dry_run=True, grace_period_seconds=0) assert result.dry_run is True assert result.collected_count == 1 # File should still exist. assert obj_path.exists() def test_gc_collected_bytes(tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) content = b"x" * 1000 _write_object(repo, content) result = run_gc(repo, grace_period_seconds=0) assert result.collected_bytes >= 1000 def test_gc_multiple_orphans(tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) for i in range(5): _write_object(repo, f"orphan {i}".encode()) result = run_gc(repo, grace_period_seconds=0) assert result.collected_count == 5 def test_gc_mixed_reachable_and_orphans(tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) # One reachable object. reachable_id = _write_object(repo, b"reachable") snap_id = _write_snapshot(repo, {"file.txt": reachable_id}) _write_commit(repo, snap_id) # Two orphans. _write_object(repo, b"orphan A") _write_object(repo, b"orphan B") result = run_gc(repo, grace_period_seconds=0) assert result.collected_count == 2 assert result.reachable_count == 3 # blob + snapshot + commit all live in unified store def test_gc_elapsed_time_positive(tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = run_gc(repo, grace_period_seconds=0) assert result.duration_ms >= 0.0 # --------------------------------------------------------------------------- # Stress test # --------------------------------------------------------------------------- def test_gc_preserves_shelf_objects(tmp_path: pathlib.Path) -> None: """Objects referenced only by shelf.json must NOT be GCed. This is the critical safety case: `muse shelf save` writes file blobs to the object store and records their IDs in shelf.json. Without walking the shelf, a subsequent `muse gc` would delete those blobs and make `muse shelf pop` fail with missing objects. """ repo = _make_repo(tmp_path) # Simulate shelf save writing two objects. shelf_obj_a = _write_object(repo, b"shelved file A") shelf_obj_b = _write_object(repo, b"shelved file B") _write_shelf_entry(repo, {"a.py": shelf_obj_a, "b.py": shelf_obj_b}) result = run_gc(repo, grace_period_seconds=0) assert result.collected_count == 0, "Shelf objects must not be GCed" # The blobs must still exist. assert object_path(repo, shelf_obj_a).exists() assert object_path(repo, shelf_obj_b).exists() def test_gc_collects_objects_not_on_shelf(tmp_path: pathlib.Path) -> None: """Objects that are neither committed nor shelved ARE unreachable and must be GCed.""" repo = _make_repo(tmp_path) shelf_obj = _write_object(repo, b"shelved") orphan_obj = _write_object(repo, b"truly orphaned") _write_shelf_entry(repo, {"a.py": shelf_obj}) result = run_gc(repo, grace_period_seconds=0) assert result.collected_count == 1 assert orphan_obj in result.collected_ids assert shelf_obj not in result.collected_ids def test_gc_ignores_stray_non_hex_files_in_objects_dir(tmp_path: pathlib.Path) -> None: """Non-hex filenames in .muse/objects/ are skipped, not mistakenly deleted.""" repo = _make_repo(tmp_path) # Create a stray file that should be ignored. stray_dir = objects_dir(repo) / "ab" stray_dir.mkdir(parents=True, exist_ok=True) stray = stray_dir / ".DS_Store" stray.write_bytes(b"stray") result = run_gc(repo, grace_period_seconds=0) assert result.collected_count == 0 assert stray.exists(), ".DS_Store should survive GC" def test_gc_stress_many_orphans(tmp_path: pathlib.Path) -> None: """GC should handle 200 orphaned objects efficiently.""" repo = _make_repo(tmp_path) for i in range(200): _write_object(repo, f"orphan-{i:04d}".encode()) result = run_gc(repo, grace_period_seconds=0) assert result.collected_count == 200 # Verify the objects directory is clean. obj_dir = objects_dir(repo) remaining = list(obj_dir.rglob("*")) remaining_files = [p for p in remaining if p.is_file()] assert remaining_files == []