gabriel / muse public
test_cmd_gc_hardening.py python
727 lines 28.4 KB
Raw
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 7 days ago
1 """Comprehensive hardening tests for ``muse gc``.
2
3 Coverage dimensions:
4
5 Unit
6 ~~~~
7 - ``_is_hex`` edge cases (empty string, uppercase, mixed, valid)
8 - ``_list_stored_objects`` symlink guard for prefix dirs
9 - ``_list_stored_objects`` symlink guard for object files
10 - ``_list_stored_objects`` grace period filters recent files
11 - ``_list_stored_objects`` grace_period=0 includes all files
12 - ``_collect_reachable_objects`` symlink guard on shelf.json
13 - ``_collect_reachable_objects`` size cap on shelf.json
14 - ``_collect_reachable_objects`` malformed shelf.json is skipped gracefully
15 - ``run_gc`` grace_period_seconds stored in GcResult
16 - ``_fmt_bytes`` all size ranges
17 - ``run_gc`` negative grace period rejected by CLI
18
19 Security
20 ~~~~~~~~
21 - Symlink in .muse/objects/ prefix dir not deleted or followed
22 - Symlink object file not deleted or followed
23 - Symlink shelf.json skipped during reachability walk
24 - ANSI escape sequences in object IDs sanitized in text output
25 - Invalid --format rejected with error to stderr
26 - Negative --grace-period rejected with non-zero exit
27
28 Integration (CLI)
29 ~~~~~~~~~~~~~~~~~
30 - ``--json`` output schema matches ``_GcJson`` TypedDict
31 - ``--json`` includes ``grace_period_seconds`` field
32 - ``--grace-period`` value propagated to GcResult
33 - ``--dry-run`` combined with ``--json`` reports correctly
34 - ``--verbose`` combined with ``--json`` shows IDs in JSON
35 - ``--format text`` is the default
36 - Repeated GC runs are idempotent (JSON)
37
38 E2E
39 ~~~
40 - Full lifecycle: orphan accumulates across branches, GC reclaims
41 - GC after shelf save does NOT delete shelved objects
42 - GC with corrupt shelf.json succeeds (skips shelf walk)
43 - ``--grace-period 0`` collects freshly-written orphan
44 - ``--grace-period 9999`` protects freshly-written orphan
45
46 Stress
47 ~~~~~~
48 - 500 orphaned objects across 256 prefix dirs collected correctly
49 - Concurrent read-only GC (dry-run) on same repo is safe
50 """
51
52 from __future__ import annotations
53
54 import json
55 import os
56 import pathlib
57 import stat
58 import threading
59 import time
60 from collections.abc import Mapping
61 from typing import TypedDict
62
63
64 import pytest
65 from tests.cli_test_helper import CliRunner, InvokeResult
66 from muse.core.types import fake_id, long_id
67 from muse.core.object_store import object_path
68 from muse.core.paths import heads_dir, merge_state_path, muse_dir, objects_dir, shelf_dir
69
70 cli = None # argparse bridge — CliRunner ignores this
71 runner = CliRunner()
72
73
74 # ---------------------------------------------------------------------------
75 # Helpers
76 # ---------------------------------------------------------------------------
77
78
79 def _env(root: pathlib.Path) -> Manifest:
80 return {"MUSE_REPO_ROOT": str(root)}
81
82
83 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
84 muse = muse_dir(tmp_path)
85 for sub in ("objects", "commits", "snapshots", "refs/heads"):
86 (muse / sub).mkdir(parents=True, exist_ok=True)
87 repo_id = fake_id("repo")
88 (muse / "repo.json").write_text(json.dumps({
89 "repo_id": repo_id,
90 "domain": "code",
91 "default_branch": "main",
92 "created_at": "2026-01-01T00:00:00+00:00",
93 }), encoding="utf-8")
94 (muse / "HEAD").write_text("ref: refs/heads/main\n", encoding="utf-8")
95 return tmp_path
96
97
98 def _write_object(root: pathlib.Path, content: bytes) -> str:
99 from muse.core.types import blob_id
100 from muse.core.object_store import write_object
101 oid = blob_id(content)
102 write_object(root, oid, content)
103 return oid
104
105
106 class _ShelfEntryData(TypedDict):
107 snapshot: dict[str, str]
108 branch: str
109 created_at: str
110
111
112 def _write_shelf_entry(root: pathlib.Path, snapshot: Mapping[str, str]) -> pathlib.Path:
113 """Write a shelf entry in git-header+JSON format under .muse/shelf/sha256/."""
114 import json as _json
115 from muse.core.types import blob_id, split_id
116 from muse.core.shelf import write_shelf_entry
117 entry_data: _ShelfEntryData = {
118 "snapshot": dict(snapshot),
119 "branch": "main",
120 "created_at": "2026-01-01T00:00:00+00:00",
121 }
122 raw_bytes = _json.dumps(entry_data, sort_keys=True).encode()
123 _, hex_id = split_id(blob_id(raw_bytes))
124 entry_id = f"sha256:{hex_id}"
125 entry_data["id"] = entry_id
126 write_shelf_entry(root, entry_data)
127 return shelf_dir(root) / "sha256" / hex_id
128
129
130 def _make_commit(root: pathlib.Path, manifest: Manifest | None = None) -> str:
131 import datetime
132 from muse.core.ids import hash_commit, hash_snapshot
133 from muse.core.commits import (
134 CommitRecord,
135 write_commit,
136 )
137 from muse.core.snapshots import (
138 SnapshotRecord,
139 write_snapshot,
140 )
141
142 mfst: Manifest = manifest or {}
143 snap_id = hash_snapshot(mfst)
144 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
145 commit_id = hash_commit( parent_ids=[],
146 snapshot_id=snap_id,
147 message="test",
148 committed_at_iso=committed_at.isoformat(),
149 )
150 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=mfst))
151 write_commit(root, CommitRecord(
152 commit_id=commit_id,
153 branch="main",
154 snapshot_id=snap_id,
155 message="test",
156 committed_at=committed_at,
157 ))
158 ref_path = heads_dir(root) / "main"
159 ref_path.parent.mkdir(parents=True, exist_ok=True)
160 ref_path.write_text(commit_id, encoding="utf-8")
161 return commit_id
162
163
164 def _invoke_gc(root: pathlib.Path, *extra_args: str) -> InvokeResult:
165 """Invoke ``muse gc`` with ``--grace-period 0`` unless caller overrides."""
166 args = list(extra_args)
167 if "--grace-period" not in args:
168 args = ["--grace-period", "0"] + args
169 return runner.invoke(cli, ["gc"] + args, env=_env(root), catch_exceptions=False)
170
171
172 # ---------------------------------------------------------------------------
173 # _GcJson TypedDict for test assertions
174 # ---------------------------------------------------------------------------
175
176
177 class _GcJson(TypedDict):
178 collected_count: int
179 collected_bytes: int
180 reachable_count: int
181 duration_ms: float
182 grace_period_seconds: int
183 dry_run: bool
184 collected_ids: list[str]
185
186
187 def _parse_gc_json(output: str) -> _GcJson:
188 """Extract and parse the JSON blob from CliRunner output."""
189 for line in output.splitlines():
190 line = line.strip()
191 if line.startswith("{"):
192 raw = json.loads(line)
193 return _GcJson(
194 collected_count=int(raw["collected_count"]),
195 collected_bytes=int(raw["collected_bytes"]),
196 reachable_count=int(raw["reachable_count"]),
197 duration_ms=float(raw["duration_ms"]),
198 grace_period_seconds=int(raw["grace_period_seconds"]),
199 dry_run=bool(raw["dry_run"]),
200 collected_ids=[str(x) for x in raw["collected_ids"]],
201 )
202 raise AssertionError(f"No JSON object found in output:\n{output}")
203
204
205 # ---------------------------------------------------------------------------
206 # Unit — _is_hex
207 # ---------------------------------------------------------------------------
208
209
210 class TestIsHex:
211 def test_empty_string_is_not_hex(self) -> None:
212 from muse.core.gc import _is_hex
213 assert not _is_hex("")
214
215 def test_valid_lowercase_hex(self) -> None:
216 from muse.core.gc import _is_hex
217 assert _is_hex("0123456789abcdef")
218
219 def test_uppercase_rejected(self) -> None:
220 from muse.core.gc import _is_hex
221 assert not _is_hex("ABCDEF")
222
223 def test_mixed_case_rejected(self) -> None:
224 from muse.core.gc import _is_hex
225 assert not _is_hex("0aF")
226
227 def test_non_hex_chars_rejected(self) -> None:
228 from muse.core.gc import _is_hex
229 assert not _is_hex("xyz")
230
231 def test_single_valid_char(self) -> None:
232 from muse.core.gc import _is_hex
233 assert _is_hex("a")
234
235 def test_64_char_sha256(self) -> None:
236 from muse.core.gc import _is_hex
237 sha = "a" * 64
238 assert _is_hex(sha)
239
240
241 # ---------------------------------------------------------------------------
242 # Unit — _fmt_bytes
243 # ---------------------------------------------------------------------------
244
245
246 class TestFmtBytes:
247 def test_bytes_range(self) -> None:
248 from muse.cli.commands.gc import _fmt_bytes
249 assert _fmt_bytes(0) == "0 B"
250 assert _fmt_bytes(1023) == "1023 B"
251
252 def test_kib_range(self) -> None:
253 from muse.cli.commands.gc import _fmt_bytes
254 assert "KiB" in _fmt_bytes(1024)
255 assert "KiB" in _fmt_bytes(1024 * 1024 - 1)
256
257 def test_mib_range(self) -> None:
258 from muse.cli.commands.gc import _fmt_bytes
259 assert "MiB" in _fmt_bytes(1024 * 1024)
260 assert "MiB" in _fmt_bytes(1024 * 1024 * 100)
261
262
263 # ---------------------------------------------------------------------------
264 # Unit — _list_stored_objects
265 # ---------------------------------------------------------------------------
266
267
268 class TestListStoredObjects:
269 def test_symlink_prefix_dir_is_skipped(self, tmp_path: pathlib.Path) -> None:
270 """A symlinked prefix directory must not be entered."""
271 from muse.core.gc import _list_stored_objects
272 root = _make_repo(tmp_path)
273 real_dir = tmp_path / "external_objects"
274 real_dir.mkdir()
275 sha = "a" * 64
276 real_file = real_dir / sha[2:]
277 real_file.write_bytes(b"content")
278
279 # Create a symlink at .muse/objects/sha256/<prefix> → external dir
280 algo_dir = objects_dir(root) / "sha256"
281 algo_dir.mkdir(parents=True, exist_ok=True)
282 link = algo_dir / sha[:2]
283 link.symlink_to(real_dir)
284
285 pairs = _list_stored_objects(root, grace_period_seconds=0)
286 found_ids = {oid for oid, _ in pairs}
287 assert sha not in found_ids, "Symlinked prefix dir must not be entered"
288
289 def test_symlink_object_file_is_skipped(self, tmp_path: pathlib.Path) -> None:
290 """A symlinked object file must not be listed or ever unlinked."""
291 from muse.core.gc import _list_stored_objects
292 root = _make_repo(tmp_path)
293 # Write a real file outside the repo.
294 external = tmp_path / "external_secret"
295 external.write_bytes(b"secret content")
296
297 sha = "b" * 64
298 link = object_path(root, long_id(sha))
299 link.parent.mkdir(parents=True, exist_ok=True)
300 link.symlink_to(external)
301
302 pairs = _list_stored_objects(root, grace_period_seconds=0)
303 found_ids = {oid for oid, _ in pairs}
304 assert sha not in found_ids, "Symlinked object file must not be listed"
305 # The external file must be untouched.
306 assert external.exists()
307
308 def test_grace_period_filters_recent_files(self, tmp_path: pathlib.Path) -> None:
309 """Objects written within the grace window are excluded."""
310 from muse.core.gc import _list_stored_objects
311 root = _make_repo(tmp_path)
312 _write_object(root, b"new orphan")
313 # Grace period of 60 s — the object was written <1 s ago.
314 pairs = _list_stored_objects(root, grace_period_seconds=60)
315 assert len(pairs) == 0
316
317 def test_grace_period_zero_includes_all_files(self, tmp_path: pathlib.Path) -> None:
318 """grace_period_seconds=0 bypasses the mtime check."""
319 from muse.core.gc import _list_stored_objects
320 root = _make_repo(tmp_path)
321 _write_object(root, b"orphan")
322 pairs = _list_stored_objects(root, grace_period_seconds=0)
323 assert len(pairs) == 1
324
325 def test_non_hex_prefix_dir_skipped(self, tmp_path: pathlib.Path) -> None:
326 from muse.core.gc import _list_stored_objects
327 root = _make_repo(tmp_path)
328 (objects_dir(root) / "sha256" / "zz").mkdir(parents=True)
329 pairs = _list_stored_objects(root, grace_period_seconds=0)
330 assert len(pairs) == 0
331
332 def test_non_hex_object_file_skipped(self, tmp_path: pathlib.Path) -> None:
333 from muse.core.gc import _list_stored_objects
334 root = _make_repo(tmp_path)
335 prefix = objects_dir(root) / "sha256" / "ab"
336 prefix.mkdir(parents=True)
337 (prefix / "not-valid-hex!").write_bytes(b"x")
338 pairs = _list_stored_objects(root, grace_period_seconds=0)
339 assert len(pairs) == 0
340
341 def test_valid_object_included(self, tmp_path: pathlib.Path) -> None:
342 from muse.core.gc import _list_stored_objects
343 root = _make_repo(tmp_path)
344 oid = _write_object(root, b"valid object")
345 pairs = _list_stored_objects(root, grace_period_seconds=0)
346 found_ids = {o for o, _ in pairs}
347 assert oid in found_ids
348
349
350 # ---------------------------------------------------------------------------
351 # Unit — _collect_reachable_objects
352 # ---------------------------------------------------------------------------
353
354
355 class TestCollectReachableObjects:
356 def test_shelf_symlink_skipped(self, tmp_path: pathlib.Path) -> None:
357 """A symlinked shelf.json is ignored during the reachability walk."""
358 from muse.core.gc import _collect_reachable_objects
359 root = _make_repo(tmp_path)
360 # Write a real object and make it look shelved via a symlink.
361 obj_id = _write_object(root, b"shelved content")
362 external = tmp_path / "real_shelf.json"
363 external.write_text(json.dumps([{
364 "snapshot_id": "s" * 64,
365 "branch": "main",
366 "created_at": "2026-01-01T00:00:00+00:00",
367 "snapshot": {"a.py": obj_id},
368 }]))
369 link = muse_dir(root) / "shelf.json"
370 link.symlink_to(external)
371
372 reachable = _collect_reachable_objects(root)
373 # The object should NOT be marked reachable (symlink was skipped).
374 assert obj_id not in reachable
375
376 def test_shelf_oversized_file_skipped(self, tmp_path: pathlib.Path) -> None:
377 """A shelf entry exceeding the size cap is skipped, not OOM-killed."""
378 from muse.core.gc import _collect_reachable_objects, _MAX_SHELF_BYTES
379 import unittest.mock as mock
380 root = _make_repo(tmp_path)
381 obj_id = _write_object(root, b"shelved content")
382 entry_path = _write_shelf_entry(root, {"a.py": obj_id})
383 fake_stat = os.stat_result((
384 stat.S_IFREG | 0o644, 0, 0, 1, 0, 0,
385 _MAX_SHELF_BYTES + 1, 0, 0, 0,
386 ))
387 with mock.patch.object(pathlib.Path, "stat", return_value=fake_stat):
388 reachable = _collect_reachable_objects(root)
389 assert obj_id not in reachable
390
391 def test_malformed_shelf_json_skipped(self, tmp_path: pathlib.Path) -> None:
392 from muse.core.gc import _collect_reachable_objects
393 root = _make_repo(tmp_path)
394 (muse_dir(root) / "shelf.json").write_text("not valid json{{{}}", encoding="utf-8")
395 # Should not raise.
396 reachable = _collect_reachable_objects(root)
397 assert isinstance(reachable, set)
398
399 def test_valid_shelf_objects_marked_reachable(self, tmp_path: pathlib.Path) -> None:
400 from muse.core.gc import _collect_reachable_objects
401 root = _make_repo(tmp_path)
402 obj_id = _write_object(root, b"shelved content")
403 _write_shelf_entry(root, {"a.py": obj_id})
404 reachable = _collect_reachable_objects(root)
405 assert obj_id in reachable
406
407
408 # ---------------------------------------------------------------------------
409 # Unit — run_gc result fields
410 # ---------------------------------------------------------------------------
411
412
413 class TestRunGcResult:
414 def test_grace_period_stored_in_result(self, tmp_path: pathlib.Path) -> None:
415 from muse.core.gc import run_gc
416 root = _make_repo(tmp_path)
417 result = run_gc(root, grace_period_seconds=42)
418 assert result.grace_period_seconds == 42
419
420 def test_dry_run_flag_stored_in_result(self, tmp_path: pathlib.Path) -> None:
421 from muse.core.gc import run_gc
422 root = _make_repo(tmp_path)
423 result = run_gc(root, dry_run=True, grace_period_seconds=0)
424 assert result.dry_run is True
425
426 def test_duration_ms_is_non_negative(self, tmp_path: pathlib.Path) -> None:
427 from muse.core.gc import run_gc
428 root = _make_repo(tmp_path)
429 result = run_gc(root, grace_period_seconds=0)
430 assert result.duration_ms >= 0.0
431
432
433 # ---------------------------------------------------------------------------
434 # Security — CLI
435 # ---------------------------------------------------------------------------
436
437
438 class TestSecurity:
439 def test_symlink_in_objects_not_deleted(self, tmp_path: pathlib.Path) -> None:
440 """GC must never delete a file outside the repo via a symlink."""
441 root = _make_repo(tmp_path)
442 _make_commit(root)
443 external = tmp_path / "precious_file"
444 external.write_bytes(b"important data")
445 sha = "c" * 64
446 link = object_path(root, long_id(sha))
447 link.parent.mkdir(parents=True, exist_ok=True)
448 link.symlink_to(external)
449
450 _invoke_gc(root)
451
452 assert external.exists(), "External file must not be deleted via symlink"
453
454 def test_ansi_in_object_id_sanitized(self, tmp_path: pathlib.Path) -> None:
455 """sanitize_display must strip ANSI sequences from object IDs in verbose output."""
456 root = _make_repo(tmp_path)
457 _make_commit(root)
458 # Write a real orphan (we can't control its SHA, but we test the path is taken).
459 _write_object(root, b"orphan for sanitize test")
460 result = _invoke_gc(root, "--verbose")
461 assert result.exit_code == 0
462 # The output must not contain raw ESC bytes.
463 assert "\x1b" not in result.output
464
465 def test_invalid_format_exits_nonzero_and_writes_stderr(
466 self, tmp_path: pathlib.Path
467 ) -> None:
468 root = _make_repo(tmp_path)
469 # argparse now uses choices= so invalid format triggers argparse error.
470 result = runner.invoke(cli, ["gc", "--format", "csv"], env=_env(root))
471 assert result.exit_code != 0
472
473 def test_negative_grace_period_rejected(self, tmp_path: pathlib.Path) -> None:
474 root = _make_repo(tmp_path)
475 result = runner.invoke(cli, ["gc", "--grace-period", "-1"], env=_env(root))
476 assert result.exit_code != 0
477
478
479 # ---------------------------------------------------------------------------
480 # Integration — JSON output schema
481 # ---------------------------------------------------------------------------
482
483
484 class TestJsonSchema:
485 def test_json_schema_all_fields_present(self, tmp_path: pathlib.Path) -> None:
486 root = _make_repo(tmp_path)
487 _make_commit(root)
488 _write_object(root, b"orphan for json test")
489 result = _invoke_gc(root, "--json")
490 assert result.exit_code == 0
491 payload = _parse_gc_json(result.output)
492 assert payload["collected_count"] == 1
493 assert payload["collected_bytes"] > 0
494 # commit + snapshot now live in the unified object store, so reachable_count >= 2
495 assert payload["reachable_count"] >= 2
496 assert payload["duration_ms"] >= 0.0
497 assert payload["grace_period_seconds"] == 0
498 assert payload["dry_run"] is False
499 assert len(payload["collected_ids"]) == 1
500
501 def test_json_dry_run_does_not_delete(self, tmp_path: pathlib.Path) -> None:
502 root = _make_repo(tmp_path)
503 _make_commit(root)
504 orphan_id = _write_object(root, b"dry run orphan")
505 result = _invoke_gc(root, "--dry-run", "--json")
506 assert result.exit_code == 0
507 payload = _parse_gc_json(result.output)
508 assert payload["dry_run"] is True
509 assert payload["collected_count"] == 1
510 # File must still exist.
511 from muse.core.object_store import has_object
512 assert has_object(root, orphan_id)
513
514 def test_json_grace_period_field_reflects_flag(self, tmp_path: pathlib.Path) -> None:
515 root = _make_repo(tmp_path)
516 result = runner.invoke(
517 cli, ["gc", "--grace-period", "99", "--json"],
518 env=_env(root), catch_exceptions=False,
519 )
520 assert result.exit_code == 0
521 payload = _parse_gc_json(result.output)
522 assert payload["grace_period_seconds"] == 99
523
524 def test_json_collected_ids_sorted(self, tmp_path: pathlib.Path) -> None:
525 root = _make_repo(tmp_path)
526 for i in range(5):
527 _write_object(root, f"sort test {i}".encode())
528 result = _invoke_gc(root, "--json")
529 assert result.exit_code == 0
530 payload = _parse_gc_json(result.output)
531 assert payload["collected_ids"] == sorted(payload["collected_ids"])
532
533 def test_json_clean_repo_shows_zero_counts(self, tmp_path: pathlib.Path) -> None:
534 root = _make_repo(tmp_path)
535 _make_commit(root)
536 result = _invoke_gc(root, "--json")
537 assert result.exit_code == 0
538 payload = _parse_gc_json(result.output)
539 assert payload["collected_count"] == 0
540 assert payload["collected_bytes"] == 0
541 assert payload["collected_ids"] == []
542
543 def test_shorthand_json_flag(self, tmp_path: pathlib.Path) -> None:
544 root = _make_repo(tmp_path)
545 result = _invoke_gc(root, "--json")
546 assert result.exit_code == 0
547 _parse_gc_json(result.output) # must not raise
548
549
550 # ---------------------------------------------------------------------------
551 # E2E — full lifecycle
552 # ---------------------------------------------------------------------------
553
554
555 class TestE2E:
556 def test_orphan_from_abandoned_branch_reclaimed(self, tmp_path: pathlib.Path) -> None:
557 """Objects written for a branch that was never committed are reclaimed."""
558 root = _make_repo(tmp_path)
559 # Write objects that were staged but never committed.
560 orphan_a = _write_object(root, b"branch work A")
561 orphan_b = _write_object(root, b"branch work B")
562 # Now run GC.
563 result = _invoke_gc(root, "--json")
564 assert result.exit_code == 0
565 payload = _parse_gc_json(result.output)
566 assert orphan_a in payload["collected_ids"]
567 assert orphan_b in payload["collected_ids"]
568
569 def test_gc_after_shelf_save_preserves_shelf_objects(self, tmp_path: pathlib.Path) -> None:
570 root = _make_repo(tmp_path)
571 shelf_obj = _write_object(root, b"shelved file content")
572 _write_shelf_entry(root, {"file.py": shelf_obj})
573
574 result = _invoke_gc(root, "--json")
575 assert result.exit_code == 0
576 payload = _parse_gc_json(result.output)
577 assert shelf_obj not in payload["collected_ids"]
578 # Blob must still be on disk.
579 from muse.core.object_store import has_object
580 assert has_object(root, shelf_obj)
581
582 def test_gc_with_corrupt_shelf_json_succeeds(self, tmp_path: pathlib.Path) -> None:
583 root = _make_repo(tmp_path)
584 orphan = _write_object(root, b"orphan despite corrupt shelf")
585 (muse_dir(root) / "shelf.json").write_text("{not json", encoding="utf-8")
586 result = _invoke_gc(root, "--json")
587 assert result.exit_code == 0
588 payload = _parse_gc_json(result.output)
589 # Orphan is still collected even though shelf was corrupt.
590 assert orphan in payload["collected_ids"]
591
592 def test_grace_period_zero_collects_fresh_orphan(self, tmp_path: pathlib.Path) -> None:
593 root = _make_repo(tmp_path)
594 orphan = _write_object(root, b"fresh orphan")
595 result = _invoke_gc(root, "--grace-period", "0", "--json")
596 assert result.exit_code == 0
597 payload = _parse_gc_json(result.output)
598 assert orphan in payload["collected_ids"]
599
600 def test_grace_period_large_protects_fresh_orphan(self, tmp_path: pathlib.Path) -> None:
601 root = _make_repo(tmp_path)
602 _write_object(root, b"fresh orphan protected")
603 result = runner.invoke(
604 cli, ["gc", "--grace-period", "9999", "--json"],
605 env=_env(root), catch_exceptions=False,
606 )
607 assert result.exit_code == 0
608 payload = _parse_gc_json(result.output)
609 assert payload["collected_count"] == 0
610
611 def test_repeated_gc_is_idempotent(self, tmp_path: pathlib.Path) -> None:
612 root = _make_repo(tmp_path)
613 _write_object(root, b"first orphan")
614 _invoke_gc(root)
615 result2 = _invoke_gc(root, "--json")
616 assert result2.exit_code == 0
617 payload = _parse_gc_json(result2.output)
618 assert payload["collected_count"] == 0
619
620 def test_gc_removes_empty_prefix_dirs(self, tmp_path: pathlib.Path) -> None:
621 """After GC, empty prefix dirs under .muse/objects/sha256/ are cleaned up."""
622 root = _make_repo(tmp_path)
623 sha = _write_object(root, b"sole object in prefix")
624 from muse.core.object_store import object_path
625 prefix_dir = object_path(root, sha).parent
626 assert prefix_dir.exists()
627 _invoke_gc(root)
628 # Directory should be removed since it's empty now.
629 assert not prefix_dir.exists()
630
631 def test_verbose_lists_full_sha256_ids(self, tmp_path: pathlib.Path) -> None:
632 root = _make_repo(tmp_path)
633 orphan = _write_object(root, b"verbose test object")
634 result = _invoke_gc(root, "--verbose")
635 assert result.exit_code == 0
636 assert orphan in result.output
637
638 def test_dry_run_verbose_lists_without_deleting(self, tmp_path: pathlib.Path) -> None:
639 root = _make_repo(tmp_path)
640 orphan = _write_object(root, b"dry verbose test")
641 result = _invoke_gc(root, "--dry-run", "--verbose")
642 assert result.exit_code == 0
643 assert orphan in result.output
644 from muse.core.object_store import object_path
645 assert object_path(root, orphan).exists()
646
647 def test_dry_run_prefix_present_in_text_output(self, tmp_path: pathlib.Path) -> None:
648 root = _make_repo(tmp_path)
649 result = _invoke_gc(root, "--dry-run")
650 assert result.exit_code == 0
651 assert "[dry-run]" in result.output
652
653 def test_reachable_count_reflects_committed_objects(self, tmp_path: pathlib.Path) -> None:
654 root = _make_repo(tmp_path)
655 obj = _write_object(root, b"committed content")
656 _make_commit(root, manifest={"file.txt": obj})
657 result = _invoke_gc(root, "--json")
658 payload = _parse_gc_json(result.output)
659 # commit + snapshot + 1 blob = 3 reachable in the unified object store
660 assert payload["reachable_count"] == 3
661 assert payload["collected_count"] == 0
662
663
664 # ---------------------------------------------------------------------------
665 # Stress
666 # ---------------------------------------------------------------------------
667
668
669 class TestStress:
670 def test_500_orphans_all_collected(self, tmp_path: pathlib.Path) -> None:
671 root = _make_repo(tmp_path)
672 _make_commit(root)
673 orphan_ids = [_write_object(root, f"stress-{i:04d}".encode()) for i in range(500)]
674 result = _invoke_gc(root, "--json")
675 assert result.exit_code == 0
676 payload = _parse_gc_json(result.output)
677 assert payload["collected_count"] == 500
678 assert set(payload["collected_ids"]) == set(orphan_ids)
679 # All orphan blobs must be gone; commit + snapshot remain (reachable).
680 from muse.core.object_store import has_object
681 for oid in orphan_ids:
682 assert not has_object(root, oid), f"orphan {oid} was not collected"
683 # Only the commit and snapshot objects remain in the store.
684 obj_dir = objects_dir(root)
685 remaining_files = [p for p in obj_dir.rglob("*") if p.is_file()]
686 assert len(remaining_files) == 2
687
688 def test_concurrent_dry_run_does_not_crash(self, tmp_path: pathlib.Path) -> None:
689 """Multiple concurrent dry-run GCs on the same repo must not crash."""
690 root = _make_repo(tmp_path)
691 _make_commit(root)
692 for i in range(20):
693 _write_object(root, f"concurrent-orphan-{i}".encode())
694
695 errors: list[str] = []
696
697 def _run_dry() -> None:
698 try:
699 from muse.core.gc import run_gc
700 run_gc(root, dry_run=True, grace_period_seconds=0)
701 except Exception as exc:
702 errors.append(str(exc))
703
704 threads = [threading.Thread(target=_run_dry) for _ in range(8)]
705 for t in threads:
706 t.start()
707 for t in threads:
708 t.join()
709
710 assert not errors, f"Concurrent dry-run GC failures: {errors}"
711
712 def test_gc_across_many_prefix_dirs(self, tmp_path: pathlib.Path) -> None:
713 """Objects spread across many prefix dirs are all found and collected."""
714 root = _make_repo(tmp_path)
715 # Force objects into many distinct prefix dirs by varying content.
716 ids: list[str] = []
717 for i in range(100):
718 ids.append(_write_object(root, f"spread-{i:08d}".encode()))
719 # Verify we have multiple prefix dirs.
720 algo_dir = objects_dir(root) / "sha256"
721 prefix_dirs = [d for d in algo_dir.iterdir() if d.is_dir()]
722 assert len(prefix_dirs) > 1, "Test needs objects in multiple prefix dirs"
723
724 result = _invoke_gc(root, "--json")
725 payload = _parse_gc_json(result.output)
726 assert payload["collected_count"] == 100
727 assert set(payload["collected_ids"]) == set(ids)
File History 1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 7 days ago