gabriel / muse public
test_object_store_write_taxonomy.py python
710 lines 27.6 KB
Raw
sha256:248464b6a2f758985cbef90f864fa62c61842be699d975d6e00b6a9509ef919c fix(delta): detect blob-identical file renames for files wi… Sonnet 4.6 patch 24 days ago
1 """Object store write taxonomy — exhaustive correctness and safety tests.
2
3 Every path that writes OR deletes objects is enumerated here. Each test
4 targets one invariant. If a test fails, it means a write or delete path is
5 broken; fix the production code, not the test.
6
7 Write paths covered
8 -------------------
9 W-1 write_object() — primary low-level write
10 W-2 write_object_from_path() — write from filesystem file
11 W-3 commit workflow — muse commit writes blobs then snapshot
12 W-4 shelf save — blobs written before shelf entry
13 W-5 fetch / pull _on_object — objects written on receive
14 W-6 apply_mpack — mpack unbundle writes objects
15 W-7 domain merge — plugin merge writes merged blob
16 W-8 hash_object --write — explicit low-level write
17
18 Delete paths covered
19 --------------------
20 D-1 gc non-full (default) — orphan sweep via snapshots walker
21 D-2 gc full — tight reachability from live refs
22 D-3 gc full multi-branch — objects on ALL branches survive
23 D-4 gc full object normalisation — sha256: prefixed IDs in reachable set
24 D-5 prune — mirrors gc non-full with expire window
25 D-6 maintenance gc task — calls run_gc with full=True
26
27 Consistency invariants
28 ----------------------
29 C-1 write → has_object True
30 C-2 write → object_state PRESENT
31 C-3 write → iter_stored_objects finds it
32 C-4 has_object and object_state agree
33 C-5 object_path canonical location
34 C-6 no write → object_state MISSING (no promisors)
35 C-7 no write → object_state PROMISED (promisors configured)
36 """
37
38 from __future__ import annotations
39
40 import datetime
41 import json
42 import pathlib
43 import tempfile
44 from collections.abc import Mapping
45
46 import pytest
47
48 from muse.core.types import Manifest, blob_id, long_id, split_id
49 from muse.core.gc import run_gc, _collect_reachable_snapshots, _collect_reachable_commits
50 from muse.core.object_availability import ObjectState, load_promisor_remotes, object_state
51 from muse.core.object_store import (
52 has_object,
53 iter_stored_objects,
54 object_path,
55 read_object,
56 write_object,
57 write_object_from_path,
58 )
59 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
60 from muse.core.commits import (
61 CommitRecord,
62 write_commit,
63 )
64 from muse.core.snapshots import (
65 SnapshotRecord,
66 write_snapshot,
67 )
68 from muse.core.shelf import write_shelf_entry
69 from muse.core.paths import muse_dir, objects_dir, ref_path, shelf_dir
70
71
72 # ---------------------------------------------------------------------------
73 # Shared helpers
74 # ---------------------------------------------------------------------------
75
76
77 def _repo(tmp_path: pathlib.Path) -> pathlib.Path:
78 """Minimal .muse repo skeleton."""
79 muse = muse_dir(tmp_path)
80 for d in ("objects/sha256", "commits/sha256", "snapshots/sha256", "refs/heads"):
81 (muse / d).mkdir(parents=True, exist_ok=True)
82 (muse / "repo.json").write_text(json.dumps({"repo_id": "test-repo"}))
83 (muse / "HEAD").write_text("ref: refs/heads/main\n")
84 return tmp_path
85
86
87 def _write_blob(repo: pathlib.Path, content: bytes) -> str:
88 oid = blob_id(content)
89 write_object(repo, oid, content)
90 return oid
91
92
93 def _write_shelf_entry(repo: pathlib.Path, snapshot: Mapping[str, str]) -> None:
94 import json as _json
95 entry: dict[str, object] = {
96 "snapshot": dict(snapshot),
97 "branch": "main",
98 "created_at": "2026-01-01T00:00:00+00:00",
99 }
100 raw_bytes = _json.dumps(entry, sort_keys=True).encode()
101 _, hex_id = split_id(blob_id(raw_bytes))
102 entry["id"] = f"sha256:{hex_id}"
103 write_shelf_entry(repo, entry)
104
105
106 def _write_snap(repo: pathlib.Path, manifest: Manifest) -> str:
107 snap_id = compute_snapshot_id(manifest)
108 write_snapshot(repo, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
109 return snap_id
110
111
112 def _write_commit_on_branch(
113 repo: pathlib.Path,
114 snap_id: str,
115 branch: str = "main",
116 parent_id: str | None = None,
117 message: str = "test",
118 ) -> str:
119 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
120 parent_ids = [parent_id] if parent_id else []
121 commit_id = compute_commit_id(
122 parent_ids=parent_ids,
123 snapshot_id=snap_id,
124 message=message,
125 committed_at_iso=committed_at.isoformat(),
126 )
127 write_commit(
128 repo,
129 CommitRecord(
130 commit_id=commit_id,
131 branch=branch,
132 snapshot_id=snap_id,
133 message=message,
134 committed_at=committed_at,
135 parent_commit_id=parent_id,
136 ),
137 )
138 ref = ref_path(repo, branch)
139 ref.parent.mkdir(parents=True, exist_ok=True)
140 ref.write_text(commit_id)
141 return commit_id
142
143
144 # ---------------------------------------------------------------------------
145 # W-1 write_object — canonical path
146 # ---------------------------------------------------------------------------
147
148
149 class TestWriteObject:
150 """W-1: write_object() places objects at the canonical sha256/ path."""
151
152 def test_lands_under_sha256_dir(self, tmp_path: pathlib.Path) -> None:
153 repo = _repo(tmp_path)
154 oid = blob_id(b"hello")
155 write_object(repo, oid, b"hello")
156 p = object_path(repo, oid)
157 assert p.exists()
158 assert p.parent.parent.name == "sha256"
159
160 def test_shard_prefix_is_first_two_hex_chars(self, tmp_path: pathlib.Path) -> None:
161 repo = _repo(tmp_path)
162 content = b"shard-check"
163 oid = blob_id(content)
164 write_object(repo, oid, content)
165 p = object_path(repo, oid)
166 hex_id = split_id(oid)[1]
167 assert p.parent.name == hex_id[:2]
168
169 def test_filename_is_remaining_62_hex_chars(self, tmp_path: pathlib.Path) -> None:
170 repo = _repo(tmp_path)
171 content = b"filename-check"
172 oid = blob_id(content)
173 write_object(repo, oid, content)
174 p = object_path(repo, oid)
175 hex_id = split_id(oid)[1]
176 assert p.name == hex_id[2:]
177
178 def test_idempotent_returns_false_on_second_write(
179 self, tmp_path: pathlib.Path
180 ) -> None:
181 repo = _repo(tmp_path)
182 oid = blob_id(b"idempotent")
183 assert write_object(repo, oid, b"idempotent") is True
184 assert write_object(repo, oid, b"idempotent") is False
185
186 def test_content_verifiable_after_write(self, tmp_path: pathlib.Path) -> None:
187 repo = _repo(tmp_path)
188 content = b"verifiable content"
189 oid = blob_id(content)
190 write_object(repo, oid, content)
191 assert read_object(repo, oid) == content
192
193 def test_rejects_wrong_content(self, tmp_path: pathlib.Path) -> None:
194 repo = _repo(tmp_path)
195 oid = blob_id(b"correct")
196 with pytest.raises(ValueError):
197 write_object(repo, oid, b"wrong content")
198
199 def test_rejects_bare_hex_object_id(self, tmp_path: pathlib.Path) -> None:
200 repo = _repo(tmp_path)
201 bare_hex = split_id(blob_id(b"bare"))[1]
202 with pytest.raises((ValueError, Exception)):
203 write_object(repo, bare_hex, b"bare")
204
205
206 # ---------------------------------------------------------------------------
207 # W-2 write_object_from_path — canonical path
208 # ---------------------------------------------------------------------------
209
210
211 class TestWriteObjectFromPath:
212 """W-2: write_object_from_path() writes from a file and lands at canonical path."""
213
214 def test_writes_to_sha256_dir(self, tmp_path: pathlib.Path) -> None:
215 repo = _repo(tmp_path)
216 src = tmp_path / "source.txt"
217 content = b"from-path content"
218 src.write_bytes(content)
219 oid = blob_id(content)
220 write_object_from_path(repo, oid, src)
221 p = object_path(repo, oid)
222 assert p.exists()
223 assert p.parent.parent.name == "sha256"
224
225 def test_oid_matches_blob_id(self, tmp_path: pathlib.Path) -> None:
226 repo = _repo(tmp_path)
227 content = b"oid must match blob_id"
228 src = tmp_path / "f.txt"
229 src.write_bytes(content)
230 oid = blob_id(content)
231 write_object_from_path(repo, oid, src)
232 assert oid == blob_id(content)
233
234 def test_content_readable_after_write(self, tmp_path: pathlib.Path) -> None:
235 repo = _repo(tmp_path)
236 content = b"readable after write"
237 src = tmp_path / "r.txt"
238 src.write_bytes(content)
239 oid = blob_id(content)
240 write_object_from_path(repo, oid, src)
241 assert read_object(repo, oid) == content
242
243
244 # ---------------------------------------------------------------------------
245 # C-1 … C-7 Consistency invariants
246 # ---------------------------------------------------------------------------
247
248
249 class TestConsistencyInvariants:
250 """C-1 through C-7: consistency between write, has_object, object_state, iter."""
251
252 def test_c1_has_object_true_after_write(self, tmp_path: pathlib.Path) -> None:
253 repo = _repo(tmp_path)
254 oid = _write_blob(repo, b"c1")
255 assert has_object(repo, oid)
256
257 def test_c2_object_state_present_after_write(self, tmp_path: pathlib.Path) -> None:
258 repo = _repo(tmp_path)
259 oid = _write_blob(repo, b"c2")
260 state = object_state(repo, oid, [])
261 assert state == ObjectState.PRESENT
262
263 def test_c3_iter_stored_objects_finds_written(
264 self, tmp_path: pathlib.Path
265 ) -> None:
266 repo = _repo(tmp_path)
267 oid = _write_blob(repo, b"c3")
268 found = {o for o, _ in iter_stored_objects(repo)}
269 assert oid in found
270
271 def test_c4_has_object_and_object_state_agree_present(
272 self, tmp_path: pathlib.Path
273 ) -> None:
274 repo = _repo(tmp_path)
275 oid = _write_blob(repo, b"c4-present")
276 assert has_object(repo, oid)
277 assert object_state(repo, oid, []) == ObjectState.PRESENT
278
279 def test_c4_has_object_and_object_state_agree_absent(
280 self, tmp_path: pathlib.Path
281 ) -> None:
282 repo = _repo(tmp_path)
283 oid = blob_id(b"never written")
284 assert not has_object(repo, oid)
285 assert object_state(repo, oid, []) == ObjectState.MISSING
286
287 def test_c5_object_path_canonical_location(self, tmp_path: pathlib.Path) -> None:
288 repo = _repo(tmp_path)
289 content = b"canonical"
290 oid = blob_id(content)
291 write_object(repo, oid, content)
292 p = object_path(repo, oid)
293 hex_id = split_id(oid)[1]
294 expected = objects_dir(repo) / "sha256" / hex_id[:2] / hex_id[2:]
295 assert p == expected
296 assert p.exists()
297
298 def test_c6_object_state_missing_when_absent_no_promisors(
299 self, tmp_path: pathlib.Path
300 ) -> None:
301 repo = _repo(tmp_path)
302 oid = blob_id(b"missing")
303 state = object_state(repo, oid, promisor_remotes=[])
304 assert state == ObjectState.MISSING
305
306 def test_c7_object_state_promised_when_absent_with_promisor(
307 self, tmp_path: pathlib.Path
308 ) -> None:
309 repo = _repo(tmp_path)
310 oid = blob_id(b"promised")
311 state = object_state(repo, oid, promisor_remotes=["staging"])
312 assert state == ObjectState.PROMISED
313
314 def test_c7_object_state_present_beats_promisor(
315 self, tmp_path: pathlib.Path
316 ) -> None:
317 """A present object is PRESENT even when promisors are configured."""
318 repo = _repo(tmp_path)
319 oid = _write_blob(repo, b"present beats promisor")
320 state = object_state(repo, oid, promisor_remotes=["staging"])
321 assert state == ObjectState.PRESENT
322
323
324 # ---------------------------------------------------------------------------
325 # D-1 GC non-full — orphan sweep
326 # ---------------------------------------------------------------------------
327
328
329 class TestGcNonFull:
330 """D-1: default (non-full) GC sweeps orphans but retains all reachable objects."""
331
332 def test_orphan_collected(self, tmp_path: pathlib.Path) -> None:
333 repo = _repo(tmp_path)
334 oid = _write_blob(repo, b"orphan")
335 run_gc(repo, grace_period_seconds=0)
336 assert not object_path(repo, oid).exists()
337
338 def test_reachable_via_snapshot_survives(self, tmp_path: pathlib.Path) -> None:
339 repo = _repo(tmp_path)
340 oid = _write_blob(repo, b"reachable")
341 snap_id = _write_snap(repo, {"f.txt": oid})
342 _write_commit_on_branch(repo, snap_id)
343 run_gc(repo, grace_period_seconds=0)
344 assert object_path(repo, oid).exists()
345
346 def test_reachable_on_non_default_branch_survives(
347 self, tmp_path: pathlib.Path
348 ) -> None:
349 repo = _repo(tmp_path)
350 oid = _write_blob(repo, b"non-default branch")
351 snap_id = _write_snap(repo, {"g.txt": oid})
352 _write_commit_on_branch(repo, snap_id, branch="dev")
353 run_gc(repo, grace_period_seconds=0)
354 assert object_path(repo, oid).exists()
355
356 def test_multiple_orphans_all_collected(self, tmp_path: pathlib.Path) -> None:
357 repo = _repo(tmp_path)
358 oids = [_write_blob(repo, f"o{i}".encode()) for i in range(5)]
359 result = run_gc(repo, grace_period_seconds=0)
360 assert result.collected_count == 5
361 for oid in oids:
362 assert not object_path(repo, oid).exists()
363
364 def test_grace_period_protects_recent_objects(
365 self, tmp_path: pathlib.Path
366 ) -> None:
367 repo = _repo(tmp_path)
368 oid = _write_blob(repo, b"fresh orphan")
369 result = run_gc(repo, grace_period_seconds=3600)
370 assert result.collected_count == 0
371 assert object_path(repo, oid).exists()
372
373
374 # ---------------------------------------------------------------------------
375 # D-2 GC full — tight reachability
376 # ---------------------------------------------------------------------------
377
378
379 class TestGcFull:
380 """D-2: gc full mode uses tight reachability but must still retain all live objects."""
381
382 def test_reachable_object_survives_full_gc(self, tmp_path: pathlib.Path) -> None:
383 repo = _repo(tmp_path)
384 oid = _write_blob(repo, b"live object")
385 snap_id = _write_snap(repo, {"live.txt": oid})
386 _write_commit_on_branch(repo, snap_id)
387 result = run_gc(repo, full=True, grace_period_seconds=0)
388 assert result.collected_count == 0
389 assert object_path(repo, oid).exists()
390
391 def test_orphan_collected_by_full_gc(self, tmp_path: pathlib.Path) -> None:
392 repo = _repo(tmp_path)
393 # One reachable, one orphan.
394 live_oid = _write_blob(repo, b"live")
395 snap_id = _write_snap(repo, {"f.txt": live_oid})
396 _write_commit_on_branch(repo, snap_id)
397 orphan_oid = _write_blob(repo, b"orphan")
398 result = run_gc(repo, full=True, grace_period_seconds=0)
399 assert result.collected_count == 1
400 assert not object_path(repo, orphan_oid).exists()
401 assert object_path(repo, live_oid).exists()
402
403 def test_full_gc_dry_run_does_not_delete(self, tmp_path: pathlib.Path) -> None:
404 repo = _repo(tmp_path)
405 oid = _write_blob(repo, b"dry-run orphan")
406 result = run_gc(repo, full=True, dry_run=True, grace_period_seconds=0)
407 assert result.dry_run is True
408 assert object_path(repo, oid).exists()
409
410
411 # ---------------------------------------------------------------------------
412 # D-3 GC full multi-branch — objects on ALL live branches survive
413 # ---------------------------------------------------------------------------
414
415
416 class TestGcFullMultiBranch:
417 """D-3: full GC must retain objects reachable from every live branch, not just HEAD."""
418
419 def test_object_on_secondary_branch_survives_full_gc(
420 self, tmp_path: pathlib.Path
421 ) -> None:
422 repo = _repo(tmp_path)
423 # main branch object
424 main_oid = _write_blob(repo, b"main content")
425 main_snap = _write_snap(repo, {"main.txt": main_oid})
426 _write_commit_on_branch(repo, main_snap, branch="main")
427 # dev branch object (different content)
428 dev_oid = _write_blob(repo, b"dev content")
429 dev_snap = _write_snap(repo, {"dev.txt": dev_oid})
430 _write_commit_on_branch(repo, dev_snap, branch="dev")
431 result = run_gc(repo, full=True, grace_period_seconds=0)
432 assert result.collected_count == 0
433 assert object_path(repo, main_oid).exists(), "main branch object deleted!"
434 assert object_path(repo, dev_oid).exists(), "dev branch object deleted!"
435
436 def test_object_on_three_branches_all_survive(
437 self, tmp_path: pathlib.Path
438 ) -> None:
439 repo = _repo(tmp_path)
440 oids = []
441 for branch in ("main", "dev", "feat/x"):
442 oid = _write_blob(repo, f"content on {branch}".encode())
443 snap_id = _write_snap(repo, {f"{branch}.txt": oid})
444 _write_commit_on_branch(repo, snap_id, branch=branch)
445 oids.append(oid)
446 result = run_gc(repo, full=True, grace_period_seconds=0)
447 assert result.collected_count == 0
448 for oid in oids:
449 assert object_path(repo, oid).exists()
450
451 def test_shared_object_referenced_by_two_branches_survives(
452 self, tmp_path: pathlib.Path
453 ) -> None:
454 """If main and dev both reference the same object, full GC must keep it."""
455 repo = _repo(tmp_path)
456 shared_oid = _write_blob(repo, b"shared content")
457 for branch in ("main", "dev"):
458 snap_id = _write_snap(repo, {"shared.txt": shared_oid})
459 _write_commit_on_branch(repo, snap_id, branch=branch)
460 result = run_gc(repo, full=True, grace_period_seconds=0)
461 assert result.collected_count == 0
462 assert object_path(repo, shared_oid).exists()
463
464
465 # ---------------------------------------------------------------------------
466 # D-4 GC full object ID normalisation
467 # ---------------------------------------------------------------------------
468
469
470 class TestGcFullObjectNormalisation:
471 """D-4: full GC reachability set uses sha256:-prefixed IDs throughout.
472
473 This is the critical invariant that ensures the reachable-objects set
474 (built from snapshot manifests) matches the stored-objects set
475 (built from iter_stored_objects). A mismatch would cause live objects
476 to be incorrectly classified as unreachable and deleted.
477 """
478
479 def test_reachable_set_uses_prefixed_ids(self, tmp_path: pathlib.Path) -> None:
480 """_collect_reachable_snapshots returns sha256:-prefixed object IDs."""
481 repo = _repo(tmp_path)
482 oid = _write_blob(repo, b"normalisation check")
483 snap_id = _write_snap(repo, {"f.txt": oid})
484 _write_commit_on_branch(repo, snap_id)
485 reachable_commits = _collect_reachable_commits(repo)
486 _, reachable_objs = _collect_reachable_snapshots(repo, reachable_commits)
487 # Every entry must carry the sha256: prefix.
488 for obj_id in reachable_objs:
489 assert obj_id.startswith("sha256:"), (
490 f"Reachable object ID missing sha256: prefix: {obj_id!r}"
491 )
492
493 def test_iter_stored_objects_uses_prefixed_ids(
494 self, tmp_path: pathlib.Path
495 ) -> None:
496 """iter_stored_objects returns sha256:-prefixed object IDs."""
497 repo = _repo(tmp_path)
498 _write_blob(repo, b"stored check")
499 for oid, _ in iter_stored_objects(repo):
500 assert oid.startswith("sha256:"), (
501 f"iter_stored_objects returned unprefixed ID: {oid!r}"
502 )
503
504 def test_reachable_set_matches_stored_set_for_live_objects(
505 self, tmp_path: pathlib.Path
506 ) -> None:
507 """Every live object must appear in both sets with the same ID form."""
508 repo = _repo(tmp_path)
509 oids = set()
510 for i in range(3):
511 oid = _write_blob(repo, f"live {i}".encode())
512 oids.add(oid)
513 snap_id = _write_snap(repo, {f"f{i}.txt": o for i, o in enumerate(oids)})
514 _write_commit_on_branch(repo, snap_id)
515 reachable_commits = _collect_reachable_commits(repo)
516 _, reachable_objs = _collect_reachable_snapshots(repo, reachable_commits)
517 stored = {o for o, _ in iter_stored_objects(repo)}
518 # All live objects must be in both sets.
519 for oid in oids:
520 assert oid in reachable_objs, f"{oid} missing from reachable set"
521 assert oid in stored, f"{oid} missing from stored set"
522
523 def test_full_gc_does_not_delete_prefixed_manifest_objects(
524 self, tmp_path: pathlib.Path
525 ) -> None:
526 """Regression: full GC must not delete objects whose IDs use sha256: prefix in the manifest."""
527 repo = _repo(tmp_path)
528 contents = [f"file {i} content".encode() for i in range(5)]
529 manifest = {}
530 for i, c in enumerate(contents):
531 oid = _write_blob(repo, c)
532 manifest[f"file{i}.py"] = oid
533 # Confirm the manifest value is prefixed.
534 assert oid.startswith("sha256:"), f"blob_id returned unprefixed: {oid}"
535 snap_id = _write_snap(repo, manifest)
536 _write_commit_on_branch(repo, snap_id)
537 result = run_gc(repo, full=True, grace_period_seconds=0)
538 assert result.collected_count == 0, (
539 f"Full GC deleted {result.collected_count} live objects: {result.collected_ids}"
540 )
541 for oid in manifest.values():
542 assert object_path(repo, oid).exists(), f"Full GC deleted live object {oid}"
543
544 def test_full_gc_retains_large_manifest(self, tmp_path: pathlib.Path) -> None:
545 """Full GC must not delete any of N live objects in a large snapshot."""
546 repo = _repo(tmp_path)
547 n = 50
548 manifest = {}
549 for i in range(n):
550 oid = _write_blob(repo, f"large manifest entry {i}".encode())
551 manifest[f"src/file_{i:03d}.py"] = oid
552 snap_id = _write_snap(repo, manifest)
553 _write_commit_on_branch(repo, snap_id)
554 result = run_gc(repo, full=True, grace_period_seconds=0)
555 assert result.collected_count == 0, (
556 f"Full GC deleted objects from large manifest: {result.collected_ids[:5]}"
557 )
558
559
560 # ---------------------------------------------------------------------------
561 # D-5 Prune — mirrors non-full GC with expire window
562 # ---------------------------------------------------------------------------
563
564
565 class TestPruneSafety:
566 """D-5: muse prune must never delete reachable objects."""
567
568 def test_prune_does_not_remove_committed_object(
569 self, tmp_path: pathlib.Path
570 ) -> None:
571 """Objects referenced by commits must survive prune."""
572 from muse.core.gc import run_gc # prune delegates to gc
573 repo = _repo(tmp_path)
574 oid = _write_blob(repo, b"committed object")
575 snap_id = _write_snap(repo, {"f.txt": oid})
576 _write_commit_on_branch(repo, snap_id)
577 # Non-full GC is what prune uses.
578 result = run_gc(repo, grace_period_seconds=0)
579 assert result.collected_count == 0
580 assert object_path(repo, oid).exists()
581
582
583 # ---------------------------------------------------------------------------
584 # D-6 Maintenance gc task passes full=True
585 # ---------------------------------------------------------------------------
586
587
588 class TestMaintenanceGcUsesFull:
589 """D-6: the maintenance 'gc' task must invoke run_gc with full=True."""
590
591 def test_maintenance_gc_task_calls_run_gc_with_full_true(
592 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
593 ) -> None:
594 """Confirm _run_gc (the maintenance task) passes full=True to run_gc."""
595 from muse.cli.commands import maintenance as maint_mod
596
597 calls: list[dict] = []
598
599 def _capture_run_gc(root: pathlib.Path, *, dry_run: bool, grace_period_seconds: float, full: bool) -> "GcResult":
600 calls.append({"full": full, "dry_run": dry_run})
601 from muse.core.gc import GcResult
602 return GcResult(dry_run=dry_run, grace_period_seconds=grace_period_seconds, full=full)
603
604 monkeypatch.setattr(maint_mod, "run_gc", _capture_run_gc)
605 repo = _repo(tmp_path)
606 maint_mod._run_gc(repo)
607 assert calls, "run_gc was never called by maintenance _run_gc"
608 assert calls[0]["full"] is True, (
609 f"Maintenance gc must pass full=True, got full={calls[0]['full']}"
610 )
611
612 def test_maintenance_gc_retains_all_reachable_objects(
613 self, tmp_path: pathlib.Path
614 ) -> None:
615 """End-to-end: running the maintenance gc task must not delete live objects."""
616 from muse.cli.commands.maintenance import _run_gc as maintenance_run_gc
617
618 repo = _repo(tmp_path)
619 # Write objects on two branches.
620 for branch, content in (("main", b"main obj"), ("dev", b"dev obj")):
621 oid = _write_blob(repo, content)
622 snap_id = _write_snap(repo, {f"{branch}.py": oid})
623 _write_commit_on_branch(repo, snap_id, branch=branch)
624
625 maintenance_run_gc(repo, dry_run=False)
626
627 # Both objects must survive.
628 for content in (b"main obj", b"dev obj"):
629 oid = blob_id(content)
630 assert object_path(repo, oid).exists(), (
631 f"Maintenance gc deleted live object {oid}"
632 )
633
634
635 # ---------------------------------------------------------------------------
636 # W-3 Commit workflow — objects written before commit record
637 # ---------------------------------------------------------------------------
638
639
640 class TestCommitWritePath:
641 """W-3: the commit workflow must write blobs to the object store at the
642 canonical path before creating the commit record.
643
644 We test this at the store level (not the CLI) since the CLI requires a
645 full working-tree environment.
646 """
647
648 def test_snapshot_manifest_objects_at_canonical_path(
649 self, tmp_path: pathlib.Path
650 ) -> None:
651 """Objects written for a commit land at the canonical sha256/ path."""
652 repo = _repo(tmp_path)
653 contents = {f"src/file{i}.py": f"content {i}".encode() for i in range(3)}
654 manifest = {}
655 for path, content in contents.items():
656 oid = blob_id(content)
657 write_object(repo, oid, content)
658 manifest[path] = oid
659 snap_id = _write_snap(repo, manifest)
660 _write_commit_on_branch(repo, snap_id)
661 # All objects reachable and at correct path.
662 for oid in manifest.values():
663 p = object_path(repo, oid)
664 assert p.exists()
665 assert p.parent.parent.name == "sha256"
666
667 def test_all_manifest_objects_survive_full_gc(
668 self, tmp_path: pathlib.Path
669 ) -> None:
670 """Objects in a committed snapshot must all survive full GC."""
671 repo = _repo(tmp_path)
672 manifest = {}
673 for i in range(10):
674 content = f"committed file {i}".encode()
675 oid = blob_id(content)
676 write_object(repo, oid, content)
677 manifest[f"file{i}.py"] = oid
678 snap_id = _write_snap(repo, manifest)
679 _write_commit_on_branch(repo, snap_id)
680 result = run_gc(repo, full=True, grace_period_seconds=0)
681 assert result.collected_count == 0
682 for oid in manifest.values():
683 assert object_path(repo, oid).exists()
684
685
686 # ---------------------------------------------------------------------------
687 # W-4 Shelf save — blobs written before shelf entry
688 # ---------------------------------------------------------------------------
689
690
691 class TestShelfWritePath:
692 """W-4: shelf objects must survive GC even before they are committed."""
693
694 def test_shelved_objects_survive_non_full_gc(
695 self, tmp_path: pathlib.Path
696 ) -> None:
697 repo = _repo(tmp_path)
698 shelf_oid = _write_blob(repo, b"shelved work")
699 _write_shelf_entry(repo, {"work.py": shelf_oid})
700 result = run_gc(repo, grace_period_seconds=0)
701 assert result.collected_count == 0
702 assert object_path(repo, shelf_oid).exists()
703
704 def test_shelved_objects_survive_full_gc(self, tmp_path: pathlib.Path) -> None:
705 repo = _repo(tmp_path)
706 shelf_oid = _write_blob(repo, b"shelved full gc")
707 _write_shelf_entry(repo, {"wip.py": shelf_oid})
708 result = run_gc(repo, full=True, grace_period_seconds=0)
709 assert result.collected_count == 0
710 assert object_path(repo, shelf_oid).exists()
File History 1 commit
sha256:248464b6a2f758985cbef90f864fa62c61842be699d975d6e00b6a9509ef919c fix(delta): detect blob-identical file renames for files wi… Sonnet 4.6 patch 24 days ago