gabriel / muse public
test_object_store_write_taxonomy.py python
717 lines 27.7 KB
Raw
1 """Object store write taxonomy — exhaustive correctness and safety tests.
2
3 Every path that writes OR deletes objects is enumerated here. Each test
4 targets one invariant. If a test fails, it means a write or delete path is
5 broken; fix the production code, not the test.
6
7 Write paths covered
8 -------------------
9 W-1 write_object() — primary low-level write
10 W-2 write_object_from_path() — write from filesystem file
11 W-3 commit workflow — muse commit writes blobs then snapshot
12 W-4 shelf save — blobs written before shelf entry
13 W-5 fetch / pull _on_object — objects written on receive
14 W-6 apply_mpack — mpack unbundle writes objects
15 W-7 domain merge — plugin merge writes merged blob
16 W-8 hash_object --write — explicit low-level write
17
18 Delete paths covered
19 --------------------
20 D-1 gc non-full (default) — orphan sweep via snapshots walker
21 D-2 gc full — tight reachability from live refs
22 D-3 gc full multi-branch — objects on ALL branches survive
23 D-4 gc full object normalisation — sha256: prefixed IDs in reachable set
24 D-5 prune — mirrors gc non-full with expire window
25 D-6 maintenance gc task — calls run_gc with full=True
26
27 Consistency invariants
28 ----------------------
29 C-1 write → has_object True
30 C-2 write → object_state PRESENT
31 C-3 write → iter_stored_objects finds it
32 C-4 has_object and object_state agree
33 C-5 object_path canonical location
34 C-6 no write → object_state MISSING (no promisors)
35 C-7 no write → object_state PROMISED (promisors configured)
36 """
37
38 from __future__ import annotations
39
40 import datetime
41 import json
42 import pathlib
43 import tempfile
44 from collections.abc import Mapping
45 from typing import TypedDict
46
47 import pytest
48
49 from muse.core.types import Manifest, blob_id, long_id, split_id
50 from muse.core.gc import run_gc, _collect_reachable_snapshots, _collect_reachable_commits
51 from muse.core.object_availability import ObjectState, load_promisor_remotes, object_state
52 from muse.core.object_store import (
53 has_object,
54 iter_stored_objects,
55 object_path,
56 read_object,
57 write_object,
58 write_object_from_path,
59 )
60 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
61 from muse.core.commits import (
62 CommitRecord,
63 write_commit,
64 )
65 from muse.core.snapshots import (
66 SnapshotRecord,
67 write_snapshot,
68 )
69 from muse.core.shelf import write_shelf_entry
70 from muse.core.paths import muse_dir, objects_dir, ref_path, shelf_dir
71
72
73 # ---------------------------------------------------------------------------
74 # Shared helpers
75 # ---------------------------------------------------------------------------
76
77
78 def _repo(tmp_path: pathlib.Path) -> pathlib.Path:
79 """Minimal .muse repo skeleton."""
80 muse = muse_dir(tmp_path)
81 for d in ("objects/sha256", "commits/sha256", "snapshots/sha256", "refs/heads"):
82 (muse / d).mkdir(parents=True, exist_ok=True)
83 (muse / "repo.json").write_text(json.dumps({"repo_id": "test-repo"}))
84 (muse / "HEAD").write_text("ref: refs/heads/main\n")
85 return tmp_path
86
87
88 def _write_blob(repo: pathlib.Path, content: bytes) -> str:
89 oid = blob_id(content)
90 write_object(repo, oid, content)
91 return oid
92
93
94 class _ShelfEntryData(TypedDict):
95 snapshot: dict[str, str]
96 branch: str
97 created_at: str
98
99
100 def _write_shelf_entry(repo: pathlib.Path, snapshot: Mapping[str, str]) -> None:
101 import json as _json
102 entry: _ShelfEntryData = {
103 "snapshot": dict(snapshot),
104 "branch": "main",
105 "created_at": "2026-01-01T00:00:00+00:00",
106 }
107 raw_bytes = _json.dumps(entry, sort_keys=True).encode()
108 _, hex_id = split_id(blob_id(raw_bytes))
109 entry["id"] = f"sha256:{hex_id}"
110 write_shelf_entry(repo, entry)
111
112
113 def _write_snap(repo: pathlib.Path, manifest: Manifest) -> str:
114 snap_id = compute_snapshot_id(manifest)
115 write_snapshot(repo, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
116 return snap_id
117
118
119 def _write_commit_on_branch(
120 repo: pathlib.Path,
121 snap_id: str,
122 branch: str = "main",
123 parent_id: str | None = None,
124 message: str = "test",
125 ) -> str:
126 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
127 parent_ids = [parent_id] if parent_id else []
128 commit_id = compute_commit_id(
129 parent_ids=parent_ids,
130 snapshot_id=snap_id,
131 message=message,
132 committed_at_iso=committed_at.isoformat(),
133 )
134 write_commit(
135 repo,
136 CommitRecord(
137 commit_id=commit_id,
138 branch=branch,
139 snapshot_id=snap_id,
140 message=message,
141 committed_at=committed_at,
142 parent_commit_id=parent_id,
143 ),
144 )
145 ref = ref_path(repo, branch)
146 ref.parent.mkdir(parents=True, exist_ok=True)
147 ref.write_text(commit_id)
148 return commit_id
149
150
151 # ---------------------------------------------------------------------------
152 # W-1 write_object — canonical path
153 # ---------------------------------------------------------------------------
154
155
156 class TestWriteObject:
157 """W-1: write_object() places objects at the canonical sha256/ path."""
158
159 def test_lands_under_sha256_dir(self, tmp_path: pathlib.Path) -> None:
160 repo = _repo(tmp_path)
161 oid = blob_id(b"hello")
162 write_object(repo, oid, b"hello")
163 p = object_path(repo, oid)
164 assert p.exists()
165 assert p.parent.parent.name == "sha256"
166
167 def test_shard_prefix_is_first_two_hex_chars(self, tmp_path: pathlib.Path) -> None:
168 repo = _repo(tmp_path)
169 content = b"shard-check"
170 oid = blob_id(content)
171 write_object(repo, oid, content)
172 p = object_path(repo, oid)
173 hex_id = split_id(oid)[1]
174 assert p.parent.name == hex_id[:2]
175
176 def test_filename_is_remaining_62_hex_chars(self, tmp_path: pathlib.Path) -> None:
177 repo = _repo(tmp_path)
178 content = b"filename-check"
179 oid = blob_id(content)
180 write_object(repo, oid, content)
181 p = object_path(repo, oid)
182 hex_id = split_id(oid)[1]
183 assert p.name == hex_id[2:]
184
185 def test_idempotent_returns_false_on_second_write(
186 self, tmp_path: pathlib.Path
187 ) -> None:
188 repo = _repo(tmp_path)
189 oid = blob_id(b"idempotent")
190 assert write_object(repo, oid, b"idempotent") is True
191 assert write_object(repo, oid, b"idempotent") is False
192
193 def test_content_verifiable_after_write(self, tmp_path: pathlib.Path) -> None:
194 repo = _repo(tmp_path)
195 content = b"verifiable content"
196 oid = blob_id(content)
197 write_object(repo, oid, content)
198 assert read_object(repo, oid) == content
199
200 def test_rejects_wrong_content(self, tmp_path: pathlib.Path) -> None:
201 repo = _repo(tmp_path)
202 oid = blob_id(b"correct")
203 with pytest.raises(ValueError):
204 write_object(repo, oid, b"wrong content")
205
206 def test_rejects_bare_hex_object_id(self, tmp_path: pathlib.Path) -> None:
207 repo = _repo(tmp_path)
208 bare_hex = split_id(blob_id(b"bare"))[1]
209 with pytest.raises((ValueError, Exception)):
210 write_object(repo, bare_hex, b"bare")
211
212
213 # ---------------------------------------------------------------------------
214 # W-2 write_object_from_path — canonical path
215 # ---------------------------------------------------------------------------
216
217
218 class TestWriteObjectFromPath:
219 """W-2: write_object_from_path() writes from a file and lands at canonical path."""
220
221 def test_writes_to_sha256_dir(self, tmp_path: pathlib.Path) -> None:
222 repo = _repo(tmp_path)
223 src = tmp_path / "source.txt"
224 content = b"from-path content"
225 src.write_bytes(content)
226 oid = blob_id(content)
227 write_object_from_path(repo, oid, src)
228 p = object_path(repo, oid)
229 assert p.exists()
230 assert p.parent.parent.name == "sha256"
231
232 def test_oid_matches_blob_id(self, tmp_path: pathlib.Path) -> None:
233 repo = _repo(tmp_path)
234 content = b"oid must match blob_id"
235 src = tmp_path / "f.txt"
236 src.write_bytes(content)
237 oid = blob_id(content)
238 write_object_from_path(repo, oid, src)
239 assert oid == blob_id(content)
240
241 def test_content_readable_after_write(self, tmp_path: pathlib.Path) -> None:
242 repo = _repo(tmp_path)
243 content = b"readable after write"
244 src = tmp_path / "r.txt"
245 src.write_bytes(content)
246 oid = blob_id(content)
247 write_object_from_path(repo, oid, src)
248 assert read_object(repo, oid) == content
249
250
251 # ---------------------------------------------------------------------------
252 # C-1 … C-7 Consistency invariants
253 # ---------------------------------------------------------------------------
254
255
256 class TestConsistencyInvariants:
257 """C-1 through C-7: consistency between write, has_object, object_state, iter."""
258
259 def test_c1_has_object_true_after_write(self, tmp_path: pathlib.Path) -> None:
260 repo = _repo(tmp_path)
261 oid = _write_blob(repo, b"c1")
262 assert has_object(repo, oid)
263
264 def test_c2_object_state_present_after_write(self, tmp_path: pathlib.Path) -> None:
265 repo = _repo(tmp_path)
266 oid = _write_blob(repo, b"c2")
267 state = object_state(repo, oid, [])
268 assert state == ObjectState.PRESENT
269
270 def test_c3_iter_stored_objects_finds_written(
271 self, tmp_path: pathlib.Path
272 ) -> None:
273 repo = _repo(tmp_path)
274 oid = _write_blob(repo, b"c3")
275 found = {o for o, _ in iter_stored_objects(repo)}
276 assert oid in found
277
278 def test_c4_has_object_and_object_state_agree_present(
279 self, tmp_path: pathlib.Path
280 ) -> None:
281 repo = _repo(tmp_path)
282 oid = _write_blob(repo, b"c4-present")
283 assert has_object(repo, oid)
284 assert object_state(repo, oid, []) == ObjectState.PRESENT
285
286 def test_c4_has_object_and_object_state_agree_absent(
287 self, tmp_path: pathlib.Path
288 ) -> None:
289 repo = _repo(tmp_path)
290 oid = blob_id(b"never written")
291 assert not has_object(repo, oid)
292 assert object_state(repo, oid, []) == ObjectState.MISSING
293
294 def test_c5_object_path_canonical_location(self, tmp_path: pathlib.Path) -> None:
295 repo = _repo(tmp_path)
296 content = b"canonical"
297 oid = blob_id(content)
298 write_object(repo, oid, content)
299 p = object_path(repo, oid)
300 hex_id = split_id(oid)[1]
301 expected = objects_dir(repo) / "sha256" / hex_id[:2] / hex_id[2:]
302 assert p == expected
303 assert p.exists()
304
305 def test_c6_object_state_missing_when_absent_no_promisors(
306 self, tmp_path: pathlib.Path
307 ) -> None:
308 repo = _repo(tmp_path)
309 oid = blob_id(b"missing")
310 state = object_state(repo, oid, promisor_remotes=[])
311 assert state == ObjectState.MISSING
312
313 def test_c7_object_state_promised_when_absent_with_promisor(
314 self, tmp_path: pathlib.Path
315 ) -> None:
316 repo = _repo(tmp_path)
317 oid = blob_id(b"promised")
318 state = object_state(repo, oid, promisor_remotes=["staging"])
319 assert state == ObjectState.PROMISED
320
321 def test_c7_object_state_present_beats_promisor(
322 self, tmp_path: pathlib.Path
323 ) -> None:
324 """A present object is PRESENT even when promisors are configured."""
325 repo = _repo(tmp_path)
326 oid = _write_blob(repo, b"present beats promisor")
327 state = object_state(repo, oid, promisor_remotes=["staging"])
328 assert state == ObjectState.PRESENT
329
330
331 # ---------------------------------------------------------------------------
332 # D-1 GC non-full — orphan sweep
333 # ---------------------------------------------------------------------------
334
335
336 class TestGcNonFull:
337 """D-1: default (non-full) GC sweeps orphans but retains all reachable objects."""
338
339 def test_orphan_collected(self, tmp_path: pathlib.Path) -> None:
340 repo = _repo(tmp_path)
341 oid = _write_blob(repo, b"orphan")
342 run_gc(repo, grace_period_seconds=0)
343 assert not object_path(repo, oid).exists()
344
345 def test_reachable_via_snapshot_survives(self, tmp_path: pathlib.Path) -> None:
346 repo = _repo(tmp_path)
347 oid = _write_blob(repo, b"reachable")
348 snap_id = _write_snap(repo, {"f.txt": oid})
349 _write_commit_on_branch(repo, snap_id)
350 run_gc(repo, grace_period_seconds=0)
351 assert object_path(repo, oid).exists()
352
353 def test_reachable_on_non_default_branch_survives(
354 self, tmp_path: pathlib.Path
355 ) -> None:
356 repo = _repo(tmp_path)
357 oid = _write_blob(repo, b"non-default branch")
358 snap_id = _write_snap(repo, {"g.txt": oid})
359 _write_commit_on_branch(repo, snap_id, branch="dev")
360 run_gc(repo, grace_period_seconds=0)
361 assert object_path(repo, oid).exists()
362
363 def test_multiple_orphans_all_collected(self, tmp_path: pathlib.Path) -> None:
364 repo = _repo(tmp_path)
365 oids = [_write_blob(repo, f"o{i}".encode()) for i in range(5)]
366 result = run_gc(repo, grace_period_seconds=0)
367 assert result.collected_count == 5
368 for oid in oids:
369 assert not object_path(repo, oid).exists()
370
371 def test_grace_period_protects_recent_objects(
372 self, tmp_path: pathlib.Path
373 ) -> None:
374 repo = _repo(tmp_path)
375 oid = _write_blob(repo, b"fresh orphan")
376 result = run_gc(repo, grace_period_seconds=3600)
377 assert result.collected_count == 0
378 assert object_path(repo, oid).exists()
379
380
381 # ---------------------------------------------------------------------------
382 # D-2 GC full — tight reachability
383 # ---------------------------------------------------------------------------
384
385
386 class TestGcFull:
387 """D-2: gc full mode uses tight reachability but must still retain all live objects."""
388
389 def test_reachable_object_survives_full_gc(self, tmp_path: pathlib.Path) -> None:
390 repo = _repo(tmp_path)
391 oid = _write_blob(repo, b"live object")
392 snap_id = _write_snap(repo, {"live.txt": oid})
393 _write_commit_on_branch(repo, snap_id)
394 result = run_gc(repo, full=True, grace_period_seconds=0)
395 assert result.collected_count == 0
396 assert object_path(repo, oid).exists()
397
398 def test_orphan_collected_by_full_gc(self, tmp_path: pathlib.Path) -> None:
399 repo = _repo(tmp_path)
400 # One reachable, one orphan.
401 live_oid = _write_blob(repo, b"live")
402 snap_id = _write_snap(repo, {"f.txt": live_oid})
403 _write_commit_on_branch(repo, snap_id)
404 orphan_oid = _write_blob(repo, b"orphan")
405 result = run_gc(repo, full=True, grace_period_seconds=0)
406 assert result.collected_count == 1
407 assert not object_path(repo, orphan_oid).exists()
408 assert object_path(repo, live_oid).exists()
409
410 def test_full_gc_dry_run_does_not_delete(self, tmp_path: pathlib.Path) -> None:
411 repo = _repo(tmp_path)
412 oid = _write_blob(repo, b"dry-run orphan")
413 result = run_gc(repo, full=True, dry_run=True, grace_period_seconds=0)
414 assert result.dry_run is True
415 assert object_path(repo, oid).exists()
416
417
418 # ---------------------------------------------------------------------------
419 # D-3 GC full multi-branch — objects on ALL live branches survive
420 # ---------------------------------------------------------------------------
421
422
423 class TestGcFullMultiBranch:
424 """D-3: full GC must retain objects reachable from every live branch, not just HEAD."""
425
426 def test_object_on_secondary_branch_survives_full_gc(
427 self, tmp_path: pathlib.Path
428 ) -> None:
429 repo = _repo(tmp_path)
430 # main branch object
431 main_oid = _write_blob(repo, b"main content")
432 main_snap = _write_snap(repo, {"main.txt": main_oid})
433 _write_commit_on_branch(repo, main_snap, branch="main")
434 # dev branch object (different content)
435 dev_oid = _write_blob(repo, b"dev content")
436 dev_snap = _write_snap(repo, {"dev.txt": dev_oid})
437 _write_commit_on_branch(repo, dev_snap, branch="dev")
438 result = run_gc(repo, full=True, grace_period_seconds=0)
439 assert result.collected_count == 0
440 assert object_path(repo, main_oid).exists(), "main branch object deleted!"
441 assert object_path(repo, dev_oid).exists(), "dev branch object deleted!"
442
443 def test_object_on_three_branches_all_survive(
444 self, tmp_path: pathlib.Path
445 ) -> None:
446 repo = _repo(tmp_path)
447 oids = []
448 for branch in ("main", "dev", "feat/x"):
449 oid = _write_blob(repo, f"content on {branch}".encode())
450 snap_id = _write_snap(repo, {f"{branch}.txt": oid})
451 _write_commit_on_branch(repo, snap_id, branch=branch)
452 oids.append(oid)
453 result = run_gc(repo, full=True, grace_period_seconds=0)
454 assert result.collected_count == 0
455 for oid in oids:
456 assert object_path(repo, oid).exists()
457
458 def test_shared_object_referenced_by_two_branches_survives(
459 self, tmp_path: pathlib.Path
460 ) -> None:
461 """If main and dev both reference the same object, full GC must keep it."""
462 repo = _repo(tmp_path)
463 shared_oid = _write_blob(repo, b"shared content")
464 for branch in ("main", "dev"):
465 snap_id = _write_snap(repo, {"shared.txt": shared_oid})
466 _write_commit_on_branch(repo, snap_id, branch=branch)
467 result = run_gc(repo, full=True, grace_period_seconds=0)
468 assert result.collected_count == 0
469 assert object_path(repo, shared_oid).exists()
470
471
472 # ---------------------------------------------------------------------------
473 # D-4 GC full object ID normalisation
474 # ---------------------------------------------------------------------------
475
476
477 class TestGcFullObjectNormalisation:
478 """D-4: full GC reachability set uses sha256:-prefixed IDs throughout.
479
480 This is the critical invariant that ensures the reachable-objects set
481 (built from snapshot manifests) matches the stored-objects set
482 (built from iter_stored_objects). A mismatch would cause live objects
483 to be incorrectly classified as unreachable and deleted.
484 """
485
486 def test_reachable_set_uses_prefixed_ids(self, tmp_path: pathlib.Path) -> None:
487 """_collect_reachable_snapshots returns sha256:-prefixed object IDs."""
488 repo = _repo(tmp_path)
489 oid = _write_blob(repo, b"normalisation check")
490 snap_id = _write_snap(repo, {"f.txt": oid})
491 _write_commit_on_branch(repo, snap_id)
492 reachable_commits = _collect_reachable_commits(repo)
493 _, reachable_objs = _collect_reachable_snapshots(repo, reachable_commits)
494 # Every entry must carry the sha256: prefix.
495 for obj_id in reachable_objs:
496 assert obj_id.startswith("sha256:"), (
497 f"Reachable object ID missing sha256: prefix: {obj_id!r}"
498 )
499
500 def test_iter_stored_objects_uses_prefixed_ids(
501 self, tmp_path: pathlib.Path
502 ) -> None:
503 """iter_stored_objects returns sha256:-prefixed object IDs."""
504 repo = _repo(tmp_path)
505 _write_blob(repo, b"stored check")
506 for oid, _ in iter_stored_objects(repo):
507 assert oid.startswith("sha256:"), (
508 f"iter_stored_objects returned unprefixed ID: {oid!r}"
509 )
510
511 def test_reachable_set_matches_stored_set_for_live_objects(
512 self, tmp_path: pathlib.Path
513 ) -> None:
514 """Every live object must appear in both sets with the same ID form."""
515 repo = _repo(tmp_path)
516 oids = set()
517 for i in range(3):
518 oid = _write_blob(repo, f"live {i}".encode())
519 oids.add(oid)
520 snap_id = _write_snap(repo, {f"f{i}.txt": o for i, o in enumerate(oids)})
521 _write_commit_on_branch(repo, snap_id)
522 reachable_commits = _collect_reachable_commits(repo)
523 _, reachable_objs = _collect_reachable_snapshots(repo, reachable_commits)
524 stored = {o for o, _ in iter_stored_objects(repo)}
525 # All live objects must be in both sets.
526 for oid in oids:
527 assert oid in reachable_objs, f"{oid} missing from reachable set"
528 assert oid in stored, f"{oid} missing from stored set"
529
530 def test_full_gc_does_not_delete_prefixed_manifest_objects(
531 self, tmp_path: pathlib.Path
532 ) -> None:
533 """Regression: full GC must not delete objects whose IDs use sha256: prefix in the manifest."""
534 repo = _repo(tmp_path)
535 contents = [f"file {i} content".encode() for i in range(5)]
536 manifest = {}
537 for i, c in enumerate(contents):
538 oid = _write_blob(repo, c)
539 manifest[f"file{i}.py"] = oid
540 # Confirm the manifest value is prefixed.
541 assert oid.startswith("sha256:"), f"blob_id returned unprefixed: {oid}"
542 snap_id = _write_snap(repo, manifest)
543 _write_commit_on_branch(repo, snap_id)
544 result = run_gc(repo, full=True, grace_period_seconds=0)
545 assert result.collected_count == 0, (
546 f"Full GC deleted {result.collected_count} live objects: {result.collected_ids}"
547 )
548 for oid in manifest.values():
549 assert object_path(repo, oid).exists(), f"Full GC deleted live object {oid}"
550
551 def test_full_gc_retains_large_manifest(self, tmp_path: pathlib.Path) -> None:
552 """Full GC must not delete any of N live objects in a large snapshot."""
553 repo = _repo(tmp_path)
554 n = 50
555 manifest = {}
556 for i in range(n):
557 oid = _write_blob(repo, f"large manifest entry {i}".encode())
558 manifest[f"src/file_{i:03d}.py"] = oid
559 snap_id = _write_snap(repo, manifest)
560 _write_commit_on_branch(repo, snap_id)
561 result = run_gc(repo, full=True, grace_period_seconds=0)
562 assert result.collected_count == 0, (
563 f"Full GC deleted objects from large manifest: {result.collected_ids[:5]}"
564 )
565
566
567 # ---------------------------------------------------------------------------
568 # D-5 Prune — mirrors non-full GC with expire window
569 # ---------------------------------------------------------------------------
570
571
572 class TestPruneSafety:
573 """D-5: muse prune must never delete reachable objects."""
574
575 def test_prune_does_not_remove_committed_object(
576 self, tmp_path: pathlib.Path
577 ) -> None:
578 """Objects referenced by commits must survive prune."""
579 from muse.core.gc import run_gc # prune delegates to gc
580 repo = _repo(tmp_path)
581 oid = _write_blob(repo, b"committed object")
582 snap_id = _write_snap(repo, {"f.txt": oid})
583 _write_commit_on_branch(repo, snap_id)
584 # Non-full GC is what prune uses.
585 result = run_gc(repo, grace_period_seconds=0)
586 assert result.collected_count == 0
587 assert object_path(repo, oid).exists()
588
589
590 # ---------------------------------------------------------------------------
591 # D-6 Maintenance gc task passes full=True
592 # ---------------------------------------------------------------------------
593
594
595 class TestMaintenanceGcUsesFull:
596 """D-6: the maintenance 'gc' task must invoke run_gc with full=True."""
597
598 def test_maintenance_gc_task_calls_run_gc_with_full_true(
599 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
600 ) -> None:
601 """Confirm _run_gc (the maintenance task) passes full=True to run_gc."""
602 from muse.cli.commands import maintenance as maint_mod
603
604 calls: list[dict] = []
605
606 def _capture_run_gc(root: pathlib.Path, *, dry_run: bool, grace_period_seconds: float, full: bool) -> "GcResult":
607 calls.append({"full": full, "dry_run": dry_run})
608 from muse.core.gc import GcResult
609 return GcResult(dry_run=dry_run, grace_period_seconds=grace_period_seconds, full=full)
610
611 monkeypatch.setattr(maint_mod, "run_gc", _capture_run_gc)
612 repo = _repo(tmp_path)
613 maint_mod._run_gc(repo)
614 assert calls, "run_gc was never called by maintenance _run_gc"
615 assert calls[0]["full"] is True, (
616 f"Maintenance gc must pass full=True, got full={calls[0]['full']}"
617 )
618
619 def test_maintenance_gc_retains_all_reachable_objects(
620 self, tmp_path: pathlib.Path
621 ) -> None:
622 """End-to-end: running the maintenance gc task must not delete live objects."""
623 from muse.cli.commands.maintenance import _run_gc as maintenance_run_gc
624
625 repo = _repo(tmp_path)
626 # Write objects on two branches.
627 for branch, content in (("main", b"main obj"), ("dev", b"dev obj")):
628 oid = _write_blob(repo, content)
629 snap_id = _write_snap(repo, {f"{branch}.py": oid})
630 _write_commit_on_branch(repo, snap_id, branch=branch)
631
632 maintenance_run_gc(repo, dry_run=False)
633
634 # Both objects must survive.
635 for content in (b"main obj", b"dev obj"):
636 oid = blob_id(content)
637 assert object_path(repo, oid).exists(), (
638 f"Maintenance gc deleted live object {oid}"
639 )
640
641
642 # ---------------------------------------------------------------------------
643 # W-3 Commit workflow — objects written before commit record
644 # ---------------------------------------------------------------------------
645
646
647 class TestCommitWritePath:
648 """W-3: the commit workflow must write blobs to the object store at the
649 canonical path before creating the commit record.
650
651 We test this at the store level (not the CLI) since the CLI requires a
652 full working-tree environment.
653 """
654
655 def test_snapshot_manifest_objects_at_canonical_path(
656 self, tmp_path: pathlib.Path
657 ) -> None:
658 """Objects written for a commit land at the canonical sha256/ path."""
659 repo = _repo(tmp_path)
660 contents = {f"src/file{i}.py": f"content {i}".encode() for i in range(3)}
661 manifest = {}
662 for path, content in contents.items():
663 oid = blob_id(content)
664 write_object(repo, oid, content)
665 manifest[path] = oid
666 snap_id = _write_snap(repo, manifest)
667 _write_commit_on_branch(repo, snap_id)
668 # All objects reachable and at correct path.
669 for oid in manifest.values():
670 p = object_path(repo, oid)
671 assert p.exists()
672 assert p.parent.parent.name == "sha256"
673
674 def test_all_manifest_objects_survive_full_gc(
675 self, tmp_path: pathlib.Path
676 ) -> None:
677 """Objects in a committed snapshot must all survive full GC."""
678 repo = _repo(tmp_path)
679 manifest = {}
680 for i in range(10):
681 content = f"committed file {i}".encode()
682 oid = blob_id(content)
683 write_object(repo, oid, content)
684 manifest[f"file{i}.py"] = oid
685 snap_id = _write_snap(repo, manifest)
686 _write_commit_on_branch(repo, snap_id)
687 result = run_gc(repo, full=True, grace_period_seconds=0)
688 assert result.collected_count == 0
689 for oid in manifest.values():
690 assert object_path(repo, oid).exists()
691
692
693 # ---------------------------------------------------------------------------
694 # W-4 Shelf save — blobs written before shelf entry
695 # ---------------------------------------------------------------------------
696
697
698 class TestShelfWritePath:
699 """W-4: shelf objects must survive GC even before they are committed."""
700
701 def test_shelved_objects_survive_non_full_gc(
702 self, tmp_path: pathlib.Path
703 ) -> None:
704 repo = _repo(tmp_path)
705 shelf_oid = _write_blob(repo, b"shelved work")
706 _write_shelf_entry(repo, {"work.py": shelf_oid})
707 result = run_gc(repo, grace_period_seconds=0)
708 assert result.collected_count == 0
709 assert object_path(repo, shelf_oid).exists()
710
711 def test_shelved_objects_survive_full_gc(self, tmp_path: pathlib.Path) -> None:
712 repo = _repo(tmp_path)
713 shelf_oid = _write_blob(repo, b"shelved full gc")
714 _write_shelf_entry(repo, {"wip.py": shelf_oid})
715 result = run_gc(repo, full=True, grace_period_seconds=0)
716 assert result.collected_count == 0
717 assert object_path(repo, shelf_oid).exists()
File History 1 commit