gabriel / muse public
test_core_gc.py python
257 lines 8.7 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
1 """Tests for muse/core/gc.py — garbage collection."""
2
3 from __future__ import annotations
4
5 import json
6 import pathlib
7 from collections.abc import Mapping
8 from typing import TypedDict
9
10 import pytest
11
12 from muse.core.gc import GcResult, run_gc
13 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
14 from muse.core.commits import (
15 CommitRecord,
16 write_commit,
17 )
18 from muse.core.snapshots import (
19 SnapshotRecord,
20 write_snapshot,
21 )
22 from muse.core.shelf import write_shelf_entry
23 from muse.core.types import Manifest, blob_id, split_id
24 from muse.core.object_store import object_path
25 from muse.core.paths import heads_dir, muse_dir, objects_dir, shelf_dir
26
27
28 # ---------------------------------------------------------------------------
29 # Helpers
30 # ---------------------------------------------------------------------------
31
32
33 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
34 """Create a minimal .muse repo structure."""
35 muse = muse_dir(tmp_path)
36 for d in ("objects", "commits", "snapshots", "refs/heads"):
37 (muse / d).mkdir(parents=True, exist_ok=True)
38 (muse / "repo.json").write_text(json.dumps({"repo_id": "test-repo"}))
39 (muse / "HEAD").write_text("ref: refs/heads/main\n")
40 return tmp_path
41
42
43 class _ShelfEntryData(TypedDict, total=False):
44 id: str
45 snapshot: dict[str, str]
46 branch: str
47 created_at: str
48
49
50 def _write_shelf_entry(repo: pathlib.Path, snapshot: Mapping[str, str]) -> None:
51 """Write a shelf entry in the current format under .muse/shelf/sha256/<hex>."""
52 import json as _json
53 entry: _ShelfEntryData = {
54 "snapshot": dict(snapshot),
55 "branch": "main",
56 "created_at": "2026-01-01T00:00:00+00:00",
57 }
58 raw_bytes = _json.dumps(entry, sort_keys=True).encode()
59 _, hex_id = split_id(blob_id(raw_bytes))
60 entry["id"] = f"sha256:{hex_id}"
61 write_shelf_entry(repo, entry)
62
63
64 def _write_object(repo: pathlib.Path, content: bytes) -> str:
65 from muse.core.object_store import write_object
66 oid = blob_id(content)
67 write_object(repo, oid, content)
68 return oid
69
70
71 def _write_snapshot(repo: pathlib.Path, manifest: Manifest) -> str:
72 """Write a snapshot with a valid content-hash snapshot_id. Returns the snapshot_id."""
73 snap_id = compute_snapshot_id(manifest)
74 write_snapshot(repo, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
75 return snap_id
76
77
78 def _write_commit(repo: pathlib.Path, snapshot_id: str) -> str:
79 """Write a commit record with a valid content-hash commit_id. Returns the commit_id."""
80 import datetime
81
82 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
83 commit_id = compute_commit_id(
84 parent_ids=[],
85 snapshot_id=snapshot_id,
86 message="test",
87 committed_at_iso=committed_at.isoformat(),
88 )
89 write_commit(repo, CommitRecord(
90 commit_id=commit_id,
91 branch="main",
92 snapshot_id=snapshot_id,
93 message="test",
94 committed_at=committed_at,
95 ))
96 r_path = heads_dir(repo) / "main"
97 r_path.parent.mkdir(parents=True, exist_ok=True)
98 r_path.write_text(commit_id)
99 return commit_id
100
101
102 # ---------------------------------------------------------------------------
103 # Tests
104 # ---------------------------------------------------------------------------
105
106
107 def test_gc_empty_repo(tmp_path: pathlib.Path) -> None:
108 """GC on an empty repo should report 0 collected."""
109 repo = _make_repo(tmp_path)
110 result = run_gc(repo, grace_period_seconds=0)
111 assert isinstance(result, GcResult)
112 assert result.collected_count == 0
113
114
115 def test_gc_removes_unreachable_object(tmp_path: pathlib.Path) -> None:
116 repo = _make_repo(tmp_path)
117 # Write an object but don't reference it in any commit.
118 orphan_id = _write_object(repo, b"orphan data")
119 obj_path = object_path(repo, orphan_id)
120 assert obj_path.exists()
121
122 result = run_gc(repo, grace_period_seconds=0)
123 assert result.collected_count == 1
124 assert orphan_id in result.collected_ids
125 assert not obj_path.exists()
126
127
128 def test_gc_preserves_reachable_object(tmp_path: pathlib.Path) -> None:
129 repo = _make_repo(tmp_path)
130 content = b"reachable file content"
131 obj_id = _write_object(repo, content)
132 snap_id = _write_snapshot(repo, {"file.txt": obj_id})
133 _write_commit(repo, snap_id)
134
135 result = run_gc(repo, grace_period_seconds=0)
136 assert result.collected_count == 0
137 obj_path = object_path(repo, obj_id)
138 assert obj_path.exists()
139
140
141 def test_gc_dry_run_does_not_delete(tmp_path: pathlib.Path) -> None:
142 repo = _make_repo(tmp_path)
143 orphan_id = _write_object(repo, b"orphan")
144 obj_path = object_path(repo, orphan_id)
145
146 result = run_gc(repo, dry_run=True, grace_period_seconds=0)
147 assert result.dry_run is True
148 assert result.collected_count == 1
149 # File should still exist.
150 assert obj_path.exists()
151
152
153 def test_gc_collected_bytes(tmp_path: pathlib.Path) -> None:
154 repo = _make_repo(tmp_path)
155 content = b"x" * 1000
156 _write_object(repo, content)
157 result = run_gc(repo, grace_period_seconds=0)
158 assert result.collected_bytes >= 1000
159
160
161 def test_gc_multiple_orphans(tmp_path: pathlib.Path) -> None:
162 repo = _make_repo(tmp_path)
163 for i in range(5):
164 _write_object(repo, f"orphan {i}".encode())
165 result = run_gc(repo, grace_period_seconds=0)
166 assert result.collected_count == 5
167
168
169 def test_gc_mixed_reachable_and_orphans(tmp_path: pathlib.Path) -> None:
170 repo = _make_repo(tmp_path)
171 # One reachable object.
172 reachable_id = _write_object(repo, b"reachable")
173 snap_id = _write_snapshot(repo, {"file.txt": reachable_id})
174 _write_commit(repo, snap_id)
175 # Two orphans.
176 _write_object(repo, b"orphan A")
177 _write_object(repo, b"orphan B")
178
179 result = run_gc(repo, grace_period_seconds=0)
180 assert result.collected_count == 2
181 assert result.reachable_count == 3 # blob + snapshot + commit all live in unified store
182
183
184 def test_gc_elapsed_time_positive(tmp_path: pathlib.Path) -> None:
185 repo = _make_repo(tmp_path)
186 result = run_gc(repo, grace_period_seconds=0)
187 assert result.duration_ms >= 0.0
188
189
190 # ---------------------------------------------------------------------------
191 # Stress test
192 # ---------------------------------------------------------------------------
193
194
195 def test_gc_preserves_shelf_objects(tmp_path: pathlib.Path) -> None:
196 """Objects referenced only by shelf.json must NOT be GCed.
197
198 This is the critical safety case: `muse shelf save` writes file blobs to the
199 object store and records their IDs in shelf.json. Without walking the
200 shelf, a subsequent `muse gc` would delete those blobs and make
201 `muse shelf pop` fail with missing objects.
202 """
203 repo = _make_repo(tmp_path)
204 # Simulate shelf save writing two objects.
205 shelf_obj_a = _write_object(repo, b"shelved file A")
206 shelf_obj_b = _write_object(repo, b"shelved file B")
207
208 _write_shelf_entry(repo, {"a.py": shelf_obj_a, "b.py": shelf_obj_b})
209
210 result = run_gc(repo, grace_period_seconds=0)
211 assert result.collected_count == 0, "Shelf objects must not be GCed"
212
213 # The blobs must still exist.
214 assert object_path(repo, shelf_obj_a).exists()
215 assert object_path(repo, shelf_obj_b).exists()
216
217
218 def test_gc_collects_objects_not_on_shelf(tmp_path: pathlib.Path) -> None:
219 """Objects that are neither committed nor shelved ARE unreachable and must be GCed."""
220 repo = _make_repo(tmp_path)
221 shelf_obj = _write_object(repo, b"shelved")
222 orphan_obj = _write_object(repo, b"truly orphaned")
223
224 _write_shelf_entry(repo, {"a.py": shelf_obj})
225
226 result = run_gc(repo, grace_period_seconds=0)
227 assert result.collected_count == 1
228 assert orphan_obj in result.collected_ids
229 assert shelf_obj not in result.collected_ids
230
231
232 def test_gc_ignores_stray_non_hex_files_in_objects_dir(tmp_path: pathlib.Path) -> None:
233 """Non-hex filenames in .muse/objects/ are skipped, not mistakenly deleted."""
234 repo = _make_repo(tmp_path)
235 # Create a stray file that should be ignored.
236 stray_dir = objects_dir(repo) / "ab"
237 stray_dir.mkdir(parents=True, exist_ok=True)
238 stray = stray_dir / ".DS_Store"
239 stray.write_bytes(b"stray")
240
241 result = run_gc(repo, grace_period_seconds=0)
242 assert result.collected_count == 0
243 assert stray.exists(), ".DS_Store should survive GC"
244
245
246 def test_gc_stress_many_orphans(tmp_path: pathlib.Path) -> None:
247 """GC should handle 200 orphaned objects efficiently."""
248 repo = _make_repo(tmp_path)
249 for i in range(200):
250 _write_object(repo, f"orphan-{i:04d}".encode())
251 result = run_gc(repo, grace_period_seconds=0)
252 assert result.collected_count == 200
253 # Verify the objects directory is clean.
254 obj_dir = objects_dir(repo)
255 remaining = list(obj_dir.rglob("*"))
256 remaining_files = [p for p in remaining if p.is_file()]
257 assert remaining_files == []
File History 5 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:09656d1b0772ea4c96f8911d7bf8042b33eb0596992c6546dfab3d21e9dee330 fix: align muse read --json schema and test contracts Sonnet 4.6 minor 23 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago