"""Comprehensive hardening tests for ``muse gc``. Coverage dimensions: Unit ~~~~ - ``_is_hex`` edge cases (empty string, uppercase, mixed, valid) - ``_list_stored_objects`` symlink guard for prefix dirs - ``_list_stored_objects`` symlink guard for object files - ``_list_stored_objects`` grace period filters recent files - ``_list_stored_objects`` grace_period=0 includes all files - ``_collect_reachable_objects`` symlink guard on shelf.json - ``_collect_reachable_objects`` size cap on shelf.json - ``_collect_reachable_objects`` malformed shelf.json is skipped gracefully - ``run_gc`` grace_period_seconds stored in GcResult - ``_fmt_bytes`` all size ranges - ``run_gc`` negative grace period rejected by CLI Security ~~~~~~~~ - Symlink in .muse/objects/ prefix dir not deleted or followed - Symlink object file not deleted or followed - Symlink shelf.json skipped during reachability walk - ANSI escape sequences in object IDs sanitized in text output - Invalid --format rejected with error to stderr - Negative --grace-period rejected with non-zero exit Integration (CLI) ~~~~~~~~~~~~~~~~~ - ``--json`` output schema matches ``_GcJson`` TypedDict - ``--json`` includes ``grace_period_seconds`` field - ``--grace-period`` value propagated to GcResult - ``--dry-run`` combined with ``--json`` reports correctly - ``--verbose`` combined with ``--json`` shows IDs in JSON - ``--format text`` is the default - Repeated GC runs are idempotent (JSON) E2E ~~~ - Full lifecycle: orphan accumulates across branches, GC reclaims - GC after shelf save does NOT delete shelved objects - GC with corrupt shelf.json succeeds (skips shelf walk) - ``--grace-period 0`` collects freshly-written orphan - ``--grace-period 9999`` protects freshly-written orphan Stress ~~~~~~ - 500 orphaned objects across 256 prefix dirs collected correctly - Concurrent read-only GC (dry-run) on same repo is safe """ from __future__ import annotations import json import os import pathlib import stat import threading import time from collections.abc import Mapping from typing import TypedDict import pytest from tests.cli_test_helper import CliRunner, InvokeResult from muse.core.types import fake_id, long_id from muse.core.object_store import object_path from muse.core.paths import heads_dir, merge_state_path, muse_dir, objects_dir, shelf_dir cli = None # argparse bridge — CliRunner ignores this runner = CliRunner() # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _env(root: pathlib.Path) -> Manifest: return {"MUSE_REPO_ROOT": str(root)} def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: muse = muse_dir(tmp_path) for sub in ("objects", "commits", "snapshots", "refs/heads"): (muse / sub).mkdir(parents=True, exist_ok=True) repo_id = fake_id("repo") (muse / "repo.json").write_text(json.dumps({ "repo_id": repo_id, "domain": "code", "default_branch": "main", "created_at": "2026-01-01T00:00:00+00:00", }), encoding="utf-8") (muse / "HEAD").write_text("ref: refs/heads/main\n", encoding="utf-8") return tmp_path def _write_object(root: pathlib.Path, content: bytes) -> str: from muse.core.types import blob_id from muse.core.object_store import write_object oid = blob_id(content) write_object(root, oid, content) return oid def _write_shelf_entry(root: pathlib.Path, snapshot: Mapping[str, str]) -> pathlib.Path: """Write a shelf entry in git-header+JSON format under .muse/shelf/sha256/.""" import json as _json from muse.core.types import blob_id, split_id from muse.core.shelf import write_shelf_entry entry_data: dict[str, object] = { "snapshot": dict(snapshot), "branch": "main", "created_at": "2026-01-01T00:00:00+00:00", } raw_bytes = _json.dumps(entry_data, sort_keys=True).encode() _, hex_id = split_id(blob_id(raw_bytes)) entry_id = f"sha256:{hex_id}" entry_data["id"] = entry_id write_shelf_entry(root, entry_data) return shelf_dir(root) / "sha256" / hex_id def _make_commit(root: pathlib.Path, manifest: Manifest | None = None) -> str: import datetime from muse.core.ids import hash_commit, hash_snapshot from muse.core.commits import ( CommitRecord, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) mfst: Manifest = manifest or {} snap_id = hash_snapshot(mfst) committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) commit_id = hash_commit( parent_ids=[], snapshot_id=snap_id, message="test", committed_at_iso=committed_at.isoformat(), ) write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=mfst)) write_commit(root, CommitRecord( commit_id=commit_id, branch="main", snapshot_id=snap_id, message="test", committed_at=committed_at, )) ref_path = heads_dir(root) / "main" ref_path.parent.mkdir(parents=True, exist_ok=True) ref_path.write_text(commit_id, encoding="utf-8") return commit_id def _invoke_gc(root: pathlib.Path, *extra_args: str) -> InvokeResult: """Invoke ``muse gc`` with ``--grace-period 0`` unless caller overrides.""" args = list(extra_args) if "--grace-period" not in args: args = ["--grace-period", "0"] + args return runner.invoke(cli, ["gc"] + args, env=_env(root), catch_exceptions=False) # --------------------------------------------------------------------------- # _GcJson TypedDict for test assertions # --------------------------------------------------------------------------- class _GcJson(TypedDict): collected_count: int collected_bytes: int reachable_count: int duration_ms: float grace_period_seconds: int dry_run: bool collected_ids: list[str] def _parse_gc_json(output: str) -> _GcJson: """Extract and parse the JSON blob from CliRunner output.""" for line in output.splitlines(): line = line.strip() if line.startswith("{"): raw = json.loads(line) return _GcJson( collected_count=int(raw["collected_count"]), collected_bytes=int(raw["collected_bytes"]), reachable_count=int(raw["reachable_count"]), duration_ms=float(raw["duration_ms"]), grace_period_seconds=int(raw["grace_period_seconds"]), dry_run=bool(raw["dry_run"]), collected_ids=[str(x) for x in raw["collected_ids"]], ) raise AssertionError(f"No JSON object found in output:\n{output}") # --------------------------------------------------------------------------- # Unit — _is_hex # --------------------------------------------------------------------------- class TestIsHex: def test_empty_string_is_not_hex(self) -> None: from muse.core.gc import _is_hex assert not _is_hex("") def test_valid_lowercase_hex(self) -> None: from muse.core.gc import _is_hex assert _is_hex("0123456789abcdef") def test_uppercase_rejected(self) -> None: from muse.core.gc import _is_hex assert not _is_hex("ABCDEF") def test_mixed_case_rejected(self) -> None: from muse.core.gc import _is_hex assert not _is_hex("0aF") def test_non_hex_chars_rejected(self) -> None: from muse.core.gc import _is_hex assert not _is_hex("xyz") def test_single_valid_char(self) -> None: from muse.core.gc import _is_hex assert _is_hex("a") def test_64_char_sha256(self) -> None: from muse.core.gc import _is_hex sha = "a" * 64 assert _is_hex(sha) # --------------------------------------------------------------------------- # Unit — _fmt_bytes # --------------------------------------------------------------------------- class TestFmtBytes: def test_bytes_range(self) -> None: from muse.cli.commands.gc import _fmt_bytes assert _fmt_bytes(0) == "0 B" assert _fmt_bytes(1023) == "1023 B" def test_kib_range(self) -> None: from muse.cli.commands.gc import _fmt_bytes assert "KiB" in _fmt_bytes(1024) assert "KiB" in _fmt_bytes(1024 * 1024 - 1) def test_mib_range(self) -> None: from muse.cli.commands.gc import _fmt_bytes assert "MiB" in _fmt_bytes(1024 * 1024) assert "MiB" in _fmt_bytes(1024 * 1024 * 100) # --------------------------------------------------------------------------- # Unit — _list_stored_objects # --------------------------------------------------------------------------- class TestListStoredObjects: def test_symlink_prefix_dir_is_skipped(self, tmp_path: pathlib.Path) -> None: """A symlinked prefix directory must not be entered.""" from muse.core.gc import _list_stored_objects root = _make_repo(tmp_path) real_dir = tmp_path / "external_objects" real_dir.mkdir() sha = "a" * 64 real_file = real_dir / sha[2:] real_file.write_bytes(b"content") # Create a symlink at .muse/objects/sha256/ → external dir algo_dir = objects_dir(root) / "sha256" algo_dir.mkdir(parents=True, exist_ok=True) link = algo_dir / sha[:2] link.symlink_to(real_dir) pairs = _list_stored_objects(root, grace_period_seconds=0) found_ids = {oid for oid, _ in pairs} assert sha not in found_ids, "Symlinked prefix dir must not be entered" def test_symlink_object_file_is_skipped(self, tmp_path: pathlib.Path) -> None: """A symlinked object file must not be listed or ever unlinked.""" from muse.core.gc import _list_stored_objects root = _make_repo(tmp_path) # Write a real file outside the repo. external = tmp_path / "external_secret" external.write_bytes(b"secret content") sha = "b" * 64 link = object_path(root, long_id(sha)) link.parent.mkdir(parents=True, exist_ok=True) link.symlink_to(external) pairs = _list_stored_objects(root, grace_period_seconds=0) found_ids = {oid for oid, _ in pairs} assert sha not in found_ids, "Symlinked object file must not be listed" # The external file must be untouched. assert external.exists() def test_grace_period_filters_recent_files(self, tmp_path: pathlib.Path) -> None: """Objects written within the grace window are excluded.""" from muse.core.gc import _list_stored_objects root = _make_repo(tmp_path) _write_object(root, b"new orphan") # Grace period of 60 s — the object was written <1 s ago. pairs = _list_stored_objects(root, grace_period_seconds=60) assert len(pairs) == 0 def test_grace_period_zero_includes_all_files(self, tmp_path: pathlib.Path) -> None: """grace_period_seconds=0 bypasses the mtime check.""" from muse.core.gc import _list_stored_objects root = _make_repo(tmp_path) _write_object(root, b"orphan") pairs = _list_stored_objects(root, grace_period_seconds=0) assert len(pairs) == 1 def test_non_hex_prefix_dir_skipped(self, tmp_path: pathlib.Path) -> None: from muse.core.gc import _list_stored_objects root = _make_repo(tmp_path) (objects_dir(root) / "sha256" / "zz").mkdir(parents=True) pairs = _list_stored_objects(root, grace_period_seconds=0) assert len(pairs) == 0 def test_non_hex_object_file_skipped(self, tmp_path: pathlib.Path) -> None: from muse.core.gc import _list_stored_objects root = _make_repo(tmp_path) prefix = objects_dir(root) / "sha256" / "ab" prefix.mkdir(parents=True) (prefix / "not-valid-hex!").write_bytes(b"x") pairs = _list_stored_objects(root, grace_period_seconds=0) assert len(pairs) == 0 def test_valid_object_included(self, tmp_path: pathlib.Path) -> None: from muse.core.gc import _list_stored_objects root = _make_repo(tmp_path) oid = _write_object(root, b"valid object") pairs = _list_stored_objects(root, grace_period_seconds=0) found_ids = {o for o, _ in pairs} assert oid in found_ids # --------------------------------------------------------------------------- # Unit — _collect_reachable_objects # --------------------------------------------------------------------------- class TestCollectReachableObjects: def test_shelf_symlink_skipped(self, tmp_path: pathlib.Path) -> None: """A symlinked shelf.json is ignored during the reachability walk.""" from muse.core.gc import _collect_reachable_objects root = _make_repo(tmp_path) # Write a real object and make it look shelved via a symlink. obj_id = _write_object(root, b"shelved content") external = tmp_path / "real_shelf.json" external.write_text(json.dumps([{ "snapshot_id": "s" * 64, "branch": "main", "created_at": "2026-01-01T00:00:00+00:00", "snapshot": {"a.py": obj_id}, }])) link = muse_dir(root) / "shelf.json" link.symlink_to(external) reachable = _collect_reachable_objects(root) # The object should NOT be marked reachable (symlink was skipped). assert obj_id not in reachable def test_shelf_oversized_file_skipped(self, tmp_path: pathlib.Path) -> None: """A shelf entry exceeding the size cap is skipped, not OOM-killed.""" from muse.core.gc import _collect_reachable_objects, _MAX_SHELF_BYTES import unittest.mock as mock root = _make_repo(tmp_path) obj_id = _write_object(root, b"shelved content") entry_path = _write_shelf_entry(root, {"a.py": obj_id}) fake_stat = os.stat_result(( stat.S_IFREG | 0o644, 0, 0, 1, 0, 0, _MAX_SHELF_BYTES + 1, 0, 0, 0, )) with mock.patch.object(pathlib.Path, "stat", return_value=fake_stat): reachable = _collect_reachable_objects(root) assert obj_id not in reachable def test_malformed_shelf_json_skipped(self, tmp_path: pathlib.Path) -> None: from muse.core.gc import _collect_reachable_objects root = _make_repo(tmp_path) (muse_dir(root) / "shelf.json").write_text("not valid json{{{}}", encoding="utf-8") # Should not raise. reachable = _collect_reachable_objects(root) assert isinstance(reachable, set) def test_valid_shelf_objects_marked_reachable(self, tmp_path: pathlib.Path) -> None: from muse.core.gc import _collect_reachable_objects root = _make_repo(tmp_path) obj_id = _write_object(root, b"shelved content") _write_shelf_entry(root, {"a.py": obj_id}) reachable = _collect_reachable_objects(root) assert obj_id in reachable # --------------------------------------------------------------------------- # Unit — run_gc result fields # --------------------------------------------------------------------------- class TestRunGcResult: def test_grace_period_stored_in_result(self, tmp_path: pathlib.Path) -> None: from muse.core.gc import run_gc root = _make_repo(tmp_path) result = run_gc(root, grace_period_seconds=42) assert result.grace_period_seconds == 42 def test_dry_run_flag_stored_in_result(self, tmp_path: pathlib.Path) -> None: from muse.core.gc import run_gc root = _make_repo(tmp_path) result = run_gc(root, dry_run=True, grace_period_seconds=0) assert result.dry_run is True def test_duration_ms_is_non_negative(self, tmp_path: pathlib.Path) -> None: from muse.core.gc import run_gc root = _make_repo(tmp_path) result = run_gc(root, grace_period_seconds=0) assert result.duration_ms >= 0.0 # --------------------------------------------------------------------------- # Security — CLI # --------------------------------------------------------------------------- class TestSecurity: def test_symlink_in_objects_not_deleted(self, tmp_path: pathlib.Path) -> None: """GC must never delete a file outside the repo via a symlink.""" root = _make_repo(tmp_path) _make_commit(root) external = tmp_path / "precious_file" external.write_bytes(b"important data") sha = "c" * 64 link = object_path(root, long_id(sha)) link.parent.mkdir(parents=True, exist_ok=True) link.symlink_to(external) _invoke_gc(root) assert external.exists(), "External file must not be deleted via symlink" def test_ansi_in_object_id_sanitized(self, tmp_path: pathlib.Path) -> None: """sanitize_display must strip ANSI sequences from object IDs in verbose output.""" root = _make_repo(tmp_path) _make_commit(root) # Write a real orphan (we can't control its SHA, but we test the path is taken). _write_object(root, b"orphan for sanitize test") result = _invoke_gc(root, "--verbose") assert result.exit_code == 0 # The output must not contain raw ESC bytes. assert "\x1b" not in result.output def test_invalid_format_exits_nonzero_and_writes_stderr( self, tmp_path: pathlib.Path ) -> None: root = _make_repo(tmp_path) # argparse now uses choices= so invalid format triggers argparse error. result = runner.invoke(cli, ["gc", "--format", "csv"], env=_env(root)) assert result.exit_code != 0 def test_negative_grace_period_rejected(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) result = runner.invoke(cli, ["gc", "--grace-period", "-1"], env=_env(root)) assert result.exit_code != 0 # --------------------------------------------------------------------------- # Integration — JSON output schema # --------------------------------------------------------------------------- class TestJsonSchema: def test_json_schema_all_fields_present(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) _make_commit(root) _write_object(root, b"orphan for json test") result = _invoke_gc(root, "--json") assert result.exit_code == 0 payload = _parse_gc_json(result.output) assert payload["collected_count"] == 1 assert payload["collected_bytes"] > 0 # commit + snapshot now live in the unified object store, so reachable_count >= 2 assert payload["reachable_count"] >= 2 assert payload["duration_ms"] >= 0.0 assert payload["grace_period_seconds"] == 0 assert payload["dry_run"] is False assert len(payload["collected_ids"]) == 1 def test_json_dry_run_does_not_delete(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) _make_commit(root) orphan_id = _write_object(root, b"dry run orphan") result = _invoke_gc(root, "--dry-run", "--json") assert result.exit_code == 0 payload = _parse_gc_json(result.output) assert payload["dry_run"] is True assert payload["collected_count"] == 1 # File must still exist. from muse.core.object_store import has_object assert has_object(root, orphan_id) def test_json_grace_period_field_reflects_flag(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) result = runner.invoke( cli, ["gc", "--grace-period", "99", "--json"], env=_env(root), catch_exceptions=False, ) assert result.exit_code == 0 payload = _parse_gc_json(result.output) assert payload["grace_period_seconds"] == 99 def test_json_collected_ids_sorted(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) for i in range(5): _write_object(root, f"sort test {i}".encode()) result = _invoke_gc(root, "--json") assert result.exit_code == 0 payload = _parse_gc_json(result.output) assert payload["collected_ids"] == sorted(payload["collected_ids"]) def test_json_clean_repo_shows_zero_counts(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) _make_commit(root) result = _invoke_gc(root, "--json") assert result.exit_code == 0 payload = _parse_gc_json(result.output) assert payload["collected_count"] == 0 assert payload["collected_bytes"] == 0 assert payload["collected_ids"] == [] def test_shorthand_json_flag(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) result = _invoke_gc(root, "--json") assert result.exit_code == 0 _parse_gc_json(result.output) # must not raise # --------------------------------------------------------------------------- # E2E — full lifecycle # --------------------------------------------------------------------------- class TestE2E: def test_orphan_from_abandoned_branch_reclaimed(self, tmp_path: pathlib.Path) -> None: """Objects written for a branch that was never committed are reclaimed.""" root = _make_repo(tmp_path) # Write objects that were staged but never committed. orphan_a = _write_object(root, b"branch work A") orphan_b = _write_object(root, b"branch work B") # Now run GC. result = _invoke_gc(root, "--json") assert result.exit_code == 0 payload = _parse_gc_json(result.output) assert orphan_a in payload["collected_ids"] assert orphan_b in payload["collected_ids"] def test_gc_after_shelf_save_preserves_shelf_objects(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) shelf_obj = _write_object(root, b"shelved file content") _write_shelf_entry(root, {"file.py": shelf_obj}) result = _invoke_gc(root, "--json") assert result.exit_code == 0 payload = _parse_gc_json(result.output) assert shelf_obj not in payload["collected_ids"] # Blob must still be on disk. from muse.core.object_store import has_object assert has_object(root, shelf_obj) def test_gc_with_corrupt_shelf_json_succeeds(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) orphan = _write_object(root, b"orphan despite corrupt shelf") (muse_dir(root) / "shelf.json").write_text("{not json", encoding="utf-8") result = _invoke_gc(root, "--json") assert result.exit_code == 0 payload = _parse_gc_json(result.output) # Orphan is still collected even though shelf was corrupt. assert orphan in payload["collected_ids"] def test_grace_period_zero_collects_fresh_orphan(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) orphan = _write_object(root, b"fresh orphan") result = _invoke_gc(root, "--grace-period", "0", "--json") assert result.exit_code == 0 payload = _parse_gc_json(result.output) assert orphan in payload["collected_ids"] def test_grace_period_large_protects_fresh_orphan(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) _write_object(root, b"fresh orphan protected") result = runner.invoke( cli, ["gc", "--grace-period", "9999", "--json"], env=_env(root), catch_exceptions=False, ) assert result.exit_code == 0 payload = _parse_gc_json(result.output) assert payload["collected_count"] == 0 def test_repeated_gc_is_idempotent(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) _write_object(root, b"first orphan") _invoke_gc(root) result2 = _invoke_gc(root, "--json") assert result2.exit_code == 0 payload = _parse_gc_json(result2.output) assert payload["collected_count"] == 0 def test_gc_removes_empty_prefix_dirs(self, tmp_path: pathlib.Path) -> None: """After GC, empty prefix dirs under .muse/objects/sha256/ are cleaned up.""" root = _make_repo(tmp_path) sha = _write_object(root, b"sole object in prefix") from muse.core.object_store import object_path prefix_dir = object_path(root, sha).parent assert prefix_dir.exists() _invoke_gc(root) # Directory should be removed since it's empty now. assert not prefix_dir.exists() def test_verbose_lists_full_sha256_ids(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) orphan = _write_object(root, b"verbose test object") result = _invoke_gc(root, "--verbose") assert result.exit_code == 0 assert orphan in result.output def test_dry_run_verbose_lists_without_deleting(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) orphan = _write_object(root, b"dry verbose test") result = _invoke_gc(root, "--dry-run", "--verbose") assert result.exit_code == 0 assert orphan in result.output from muse.core.object_store import object_path assert object_path(root, orphan).exists() def test_dry_run_prefix_present_in_text_output(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) result = _invoke_gc(root, "--dry-run") assert result.exit_code == 0 assert "[dry-run]" in result.output def test_reachable_count_reflects_committed_objects(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) obj = _write_object(root, b"committed content") _make_commit(root, manifest={"file.txt": obj}) result = _invoke_gc(root, "--json") payload = _parse_gc_json(result.output) # commit + snapshot + 1 blob = 3 reachable in the unified object store assert payload["reachable_count"] == 3 assert payload["collected_count"] == 0 # --------------------------------------------------------------------------- # Stress # --------------------------------------------------------------------------- class TestStress: def test_500_orphans_all_collected(self, tmp_path: pathlib.Path) -> None: root = _make_repo(tmp_path) _make_commit(root) orphan_ids = [_write_object(root, f"stress-{i:04d}".encode()) for i in range(500)] result = _invoke_gc(root, "--json") assert result.exit_code == 0 payload = _parse_gc_json(result.output) assert payload["collected_count"] == 500 assert set(payload["collected_ids"]) == set(orphan_ids) # All orphan blobs must be gone; commit + snapshot remain (reachable). from muse.core.object_store import has_object for oid in orphan_ids: assert not has_object(root, oid), f"orphan {oid} was not collected" # Only the commit and snapshot objects remain in the store. obj_dir = objects_dir(root) remaining_files = [p for p in obj_dir.rglob("*") if p.is_file()] assert len(remaining_files) == 2 def test_concurrent_dry_run_does_not_crash(self, tmp_path: pathlib.Path) -> None: """Multiple concurrent dry-run GCs on the same repo must not crash.""" root = _make_repo(tmp_path) _make_commit(root) for i in range(20): _write_object(root, f"concurrent-orphan-{i}".encode()) errors: list[str] = [] def _run_dry() -> None: try: from muse.core.gc import run_gc run_gc(root, dry_run=True, grace_period_seconds=0) except Exception as exc: errors.append(str(exc)) threads = [threading.Thread(target=_run_dry) for _ in range(8)] for t in threads: t.start() for t in threads: t.join() assert not errors, f"Concurrent dry-run GC failures: {errors}" def test_gc_across_many_prefix_dirs(self, tmp_path: pathlib.Path) -> None: """Objects spread across many prefix dirs are all found and collected.""" root = _make_repo(tmp_path) # Force objects into many distinct prefix dirs by varying content. ids: list[str] = [] for i in range(100): ids.append(_write_object(root, f"spread-{i:08d}".encode())) # Verify we have multiple prefix dirs. algo_dir = objects_dir(root) / "sha256" prefix_dirs = [d for d in algo_dir.iterdir() if d.is_dir()] assert len(prefix_dirs) > 1, "Test needs objects in multiple prefix dirs" result = _invoke_gc(root, "--json") payload = _parse_gc_json(result.output) assert payload["collected_count"] == 100 assert set(payload["collected_ids"]) == set(ids)