gabriel / muse public
test_cmd_gc_hardening.py python
721 lines 28.3 KB
Raw
sha256:84df9126d09aeec0b8f1b908f0b06c10913feec28f3514b382efb1ba6d619385 refactor: rename StructuredMergePlugin to AddressedMergePlu… Sonnet 4.6 minor ⚠ breaking 23 days ago
1 """Comprehensive hardening tests for ``muse gc``.
2
3 Coverage dimensions:
4
5 Unit
6 ~~~~
7 - ``_is_hex`` edge cases (empty string, uppercase, mixed, valid)
8 - ``_list_stored_objects`` symlink guard for prefix dirs
9 - ``_list_stored_objects`` symlink guard for object files
10 - ``_list_stored_objects`` grace period filters recent files
11 - ``_list_stored_objects`` grace_period=0 includes all files
12 - ``_collect_reachable_objects`` symlink guard on shelf.json
13 - ``_collect_reachable_objects`` size cap on shelf.json
14 - ``_collect_reachable_objects`` malformed shelf.json is skipped gracefully
15 - ``run_gc`` grace_period_seconds stored in GcResult
16 - ``_fmt_bytes`` all size ranges
17 - ``run_gc`` negative grace period rejected by CLI
18
19 Security
20 ~~~~~~~~
21 - Symlink in .muse/objects/ prefix dir not deleted or followed
22 - Symlink object file not deleted or followed
23 - Symlink shelf.json skipped during reachability walk
24 - ANSI escape sequences in object IDs sanitized in text output
25 - Invalid --format rejected with error to stderr
26 - Negative --grace-period rejected with non-zero exit
27
28 Integration (CLI)
29 ~~~~~~~~~~~~~~~~~
30 - ``--json`` output schema matches ``_GcJson`` TypedDict
31 - ``--json`` includes ``grace_period_seconds`` field
32 - ``--grace-period`` value propagated to GcResult
33 - ``--dry-run`` combined with ``--json`` reports correctly
34 - ``--verbose`` combined with ``--json`` shows IDs in JSON
35 - ``--format text`` is the default
36 - Repeated GC runs are idempotent (JSON)
37
38 E2E
39 ~~~
40 - Full lifecycle: orphan accumulates across branches, GC reclaims
41 - GC after shelf save does NOT delete shelved objects
42 - GC with corrupt shelf.json succeeds (skips shelf walk)
43 - ``--grace-period 0`` collects freshly-written orphan
44 - ``--grace-period 9999`` protects freshly-written orphan
45
46 Stress
47 ~~~~~~
48 - 500 orphaned objects across 256 prefix dirs collected correctly
49 - Concurrent read-only GC (dry-run) on same repo is safe
50 """
51
52 from __future__ import annotations
53
54 import json
55 import os
56 import pathlib
57 import stat
58 import threading
59 import time
60 from collections.abc import Mapping
61 from typing import TypedDict
62
63
64 import pytest
65 from tests.cli_test_helper import CliRunner, InvokeResult
66 from muse.core.types import fake_id, long_id
67 from muse.core.object_store import object_path
68 from muse.core.paths import heads_dir, merge_state_path, muse_dir, objects_dir, shelf_dir
69
70 cli = None # argparse bridge — CliRunner ignores this
71 runner = CliRunner()
72
73
74 # ---------------------------------------------------------------------------
75 # Helpers
76 # ---------------------------------------------------------------------------
77
78
79 def _env(root: pathlib.Path) -> Manifest:
80 return {"MUSE_REPO_ROOT": str(root)}
81
82
83 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
84 muse = muse_dir(tmp_path)
85 for sub in ("objects", "commits", "snapshots", "refs/heads"):
86 (muse / sub).mkdir(parents=True, exist_ok=True)
87 repo_id = fake_id("repo")
88 (muse / "repo.json").write_text(json.dumps({
89 "repo_id": repo_id,
90 "domain": "code",
91 "default_branch": "main",
92 "created_at": "2026-01-01T00:00:00+00:00",
93 }), encoding="utf-8")
94 (muse / "HEAD").write_text("ref: refs/heads/main\n", encoding="utf-8")
95 return tmp_path
96
97
98 def _write_object(root: pathlib.Path, content: bytes) -> str:
99 from muse.core.types import blob_id
100 from muse.core.object_store import write_object
101 oid = blob_id(content)
102 write_object(root, oid, content)
103 return oid
104
105
106 def _write_shelf_entry(root: pathlib.Path, snapshot: Mapping[str, str]) -> pathlib.Path:
107 """Write a shelf entry in git-header+JSON format under .muse/shelf/sha256/."""
108 import json as _json
109 from muse.core.types import blob_id, split_id
110 from muse.core.shelf import write_shelf_entry
111 entry_data: dict[str, object] = {
112 "snapshot": dict(snapshot),
113 "branch": "main",
114 "created_at": "2026-01-01T00:00:00+00:00",
115 }
116 raw_bytes = _json.dumps(entry_data, sort_keys=True).encode()
117 _, hex_id = split_id(blob_id(raw_bytes))
118 entry_id = f"sha256:{hex_id}"
119 entry_data["id"] = entry_id
120 write_shelf_entry(root, entry_data)
121 return shelf_dir(root) / "sha256" / hex_id
122
123
124 def _make_commit(root: pathlib.Path, manifest: Manifest | None = None) -> str:
125 import datetime
126 from muse.core.ids import hash_commit, hash_snapshot
127 from muse.core.commits import (
128 CommitRecord,
129 write_commit,
130 )
131 from muse.core.snapshots import (
132 SnapshotRecord,
133 write_snapshot,
134 )
135
136 mfst: Manifest = manifest or {}
137 snap_id = hash_snapshot(mfst)
138 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
139 commit_id = hash_commit( parent_ids=[],
140 snapshot_id=snap_id,
141 message="test",
142 committed_at_iso=committed_at.isoformat(),
143 )
144 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=mfst))
145 write_commit(root, CommitRecord(
146 commit_id=commit_id,
147 branch="main",
148 snapshot_id=snap_id,
149 message="test",
150 committed_at=committed_at,
151 ))
152 ref_path = heads_dir(root) / "main"
153 ref_path.parent.mkdir(parents=True, exist_ok=True)
154 ref_path.write_text(commit_id, encoding="utf-8")
155 return commit_id
156
157
158 def _invoke_gc(root: pathlib.Path, *extra_args: str) -> InvokeResult:
159 """Invoke ``muse gc`` with ``--grace-period 0`` unless caller overrides."""
160 args = list(extra_args)
161 if "--grace-period" not in args:
162 args = ["--grace-period", "0"] + args
163 return runner.invoke(cli, ["gc"] + args, env=_env(root), catch_exceptions=False)
164
165
166 # ---------------------------------------------------------------------------
167 # _GcJson TypedDict for test assertions
168 # ---------------------------------------------------------------------------
169
170
171 class _GcJson(TypedDict):
172 collected_count: int
173 collected_bytes: int
174 reachable_count: int
175 duration_ms: float
176 grace_period_seconds: int
177 dry_run: bool
178 collected_ids: list[str]
179
180
181 def _parse_gc_json(output: str) -> _GcJson:
182 """Extract and parse the JSON blob from CliRunner output."""
183 for line in output.splitlines():
184 line = line.strip()
185 if line.startswith("{"):
186 raw = json.loads(line)
187 return _GcJson(
188 collected_count=int(raw["collected_count"]),
189 collected_bytes=int(raw["collected_bytes"]),
190 reachable_count=int(raw["reachable_count"]),
191 duration_ms=float(raw["duration_ms"]),
192 grace_period_seconds=int(raw["grace_period_seconds"]),
193 dry_run=bool(raw["dry_run"]),
194 collected_ids=[str(x) for x in raw["collected_ids"]],
195 )
196 raise AssertionError(f"No JSON object found in output:\n{output}")
197
198
199 # ---------------------------------------------------------------------------
200 # Unit — _is_hex
201 # ---------------------------------------------------------------------------
202
203
204 class TestIsHex:
205 def test_empty_string_is_not_hex(self) -> None:
206 from muse.core.gc import _is_hex
207 assert not _is_hex("")
208
209 def test_valid_lowercase_hex(self) -> None:
210 from muse.core.gc import _is_hex
211 assert _is_hex("0123456789abcdef")
212
213 def test_uppercase_rejected(self) -> None:
214 from muse.core.gc import _is_hex
215 assert not _is_hex("ABCDEF")
216
217 def test_mixed_case_rejected(self) -> None:
218 from muse.core.gc import _is_hex
219 assert not _is_hex("0aF")
220
221 def test_non_hex_chars_rejected(self) -> None:
222 from muse.core.gc import _is_hex
223 assert not _is_hex("xyz")
224
225 def test_single_valid_char(self) -> None:
226 from muse.core.gc import _is_hex
227 assert _is_hex("a")
228
229 def test_64_char_sha256(self) -> None:
230 from muse.core.gc import _is_hex
231 sha = "a" * 64
232 assert _is_hex(sha)
233
234
235 # ---------------------------------------------------------------------------
236 # Unit — _fmt_bytes
237 # ---------------------------------------------------------------------------
238
239
240 class TestFmtBytes:
241 def test_bytes_range(self) -> None:
242 from muse.cli.commands.gc import _fmt_bytes
243 assert _fmt_bytes(0) == "0 B"
244 assert _fmt_bytes(1023) == "1023 B"
245
246 def test_kib_range(self) -> None:
247 from muse.cli.commands.gc import _fmt_bytes
248 assert "KiB" in _fmt_bytes(1024)
249 assert "KiB" in _fmt_bytes(1024 * 1024 - 1)
250
251 def test_mib_range(self) -> None:
252 from muse.cli.commands.gc import _fmt_bytes
253 assert "MiB" in _fmt_bytes(1024 * 1024)
254 assert "MiB" in _fmt_bytes(1024 * 1024 * 100)
255
256
257 # ---------------------------------------------------------------------------
258 # Unit — _list_stored_objects
259 # ---------------------------------------------------------------------------
260
261
262 class TestListStoredObjects:
263 def test_symlink_prefix_dir_is_skipped(self, tmp_path: pathlib.Path) -> None:
264 """A symlinked prefix directory must not be entered."""
265 from muse.core.gc import _list_stored_objects
266 root = _make_repo(tmp_path)
267 real_dir = tmp_path / "external_objects"
268 real_dir.mkdir()
269 sha = "a" * 64
270 real_file = real_dir / sha[2:]
271 real_file.write_bytes(b"content")
272
273 # Create a symlink at .muse/objects/sha256/<prefix> → external dir
274 algo_dir = objects_dir(root) / "sha256"
275 algo_dir.mkdir(parents=True, exist_ok=True)
276 link = algo_dir / sha[:2]
277 link.symlink_to(real_dir)
278
279 pairs = _list_stored_objects(root, grace_period_seconds=0)
280 found_ids = {oid for oid, _ in pairs}
281 assert sha not in found_ids, "Symlinked prefix dir must not be entered"
282
283 def test_symlink_object_file_is_skipped(self, tmp_path: pathlib.Path) -> None:
284 """A symlinked object file must not be listed or ever unlinked."""
285 from muse.core.gc import _list_stored_objects
286 root = _make_repo(tmp_path)
287 # Write a real file outside the repo.
288 external = tmp_path / "external_secret"
289 external.write_bytes(b"secret content")
290
291 sha = "b" * 64
292 link = object_path(root, long_id(sha))
293 link.parent.mkdir(parents=True, exist_ok=True)
294 link.symlink_to(external)
295
296 pairs = _list_stored_objects(root, grace_period_seconds=0)
297 found_ids = {oid for oid, _ in pairs}
298 assert sha not in found_ids, "Symlinked object file must not be listed"
299 # The external file must be untouched.
300 assert external.exists()
301
302 def test_grace_period_filters_recent_files(self, tmp_path: pathlib.Path) -> None:
303 """Objects written within the grace window are excluded."""
304 from muse.core.gc import _list_stored_objects
305 root = _make_repo(tmp_path)
306 _write_object(root, b"new orphan")
307 # Grace period of 60 s — the object was written <1 s ago.
308 pairs = _list_stored_objects(root, grace_period_seconds=60)
309 assert len(pairs) == 0
310
311 def test_grace_period_zero_includes_all_files(self, tmp_path: pathlib.Path) -> None:
312 """grace_period_seconds=0 bypasses the mtime check."""
313 from muse.core.gc import _list_stored_objects
314 root = _make_repo(tmp_path)
315 _write_object(root, b"orphan")
316 pairs = _list_stored_objects(root, grace_period_seconds=0)
317 assert len(pairs) == 1
318
319 def test_non_hex_prefix_dir_skipped(self, tmp_path: pathlib.Path) -> None:
320 from muse.core.gc import _list_stored_objects
321 root = _make_repo(tmp_path)
322 (objects_dir(root) / "sha256" / "zz").mkdir(parents=True)
323 pairs = _list_stored_objects(root, grace_period_seconds=0)
324 assert len(pairs) == 0
325
326 def test_non_hex_object_file_skipped(self, tmp_path: pathlib.Path) -> None:
327 from muse.core.gc import _list_stored_objects
328 root = _make_repo(tmp_path)
329 prefix = objects_dir(root) / "sha256" / "ab"
330 prefix.mkdir(parents=True)
331 (prefix / "not-valid-hex!").write_bytes(b"x")
332 pairs = _list_stored_objects(root, grace_period_seconds=0)
333 assert len(pairs) == 0
334
335 def test_valid_object_included(self, tmp_path: pathlib.Path) -> None:
336 from muse.core.gc import _list_stored_objects
337 root = _make_repo(tmp_path)
338 oid = _write_object(root, b"valid object")
339 pairs = _list_stored_objects(root, grace_period_seconds=0)
340 found_ids = {o for o, _ in pairs}
341 assert oid in found_ids
342
343
344 # ---------------------------------------------------------------------------
345 # Unit — _collect_reachable_objects
346 # ---------------------------------------------------------------------------
347
348
349 class TestCollectReachableObjects:
350 def test_shelf_symlink_skipped(self, tmp_path: pathlib.Path) -> None:
351 """A symlinked shelf.json is ignored during the reachability walk."""
352 from muse.core.gc import _collect_reachable_objects
353 root = _make_repo(tmp_path)
354 # Write a real object and make it look shelved via a symlink.
355 obj_id = _write_object(root, b"shelved content")
356 external = tmp_path / "real_shelf.json"
357 external.write_text(json.dumps([{
358 "snapshot_id": "s" * 64,
359 "branch": "main",
360 "created_at": "2026-01-01T00:00:00+00:00",
361 "snapshot": {"a.py": obj_id},
362 }]))
363 link = muse_dir(root) / "shelf.json"
364 link.symlink_to(external)
365
366 reachable = _collect_reachable_objects(root)
367 # The object should NOT be marked reachable (symlink was skipped).
368 assert obj_id not in reachable
369
370 def test_shelf_oversized_file_skipped(self, tmp_path: pathlib.Path) -> None:
371 """A shelf entry exceeding the size cap is skipped, not OOM-killed."""
372 from muse.core.gc import _collect_reachable_objects, _MAX_SHELF_BYTES
373 import unittest.mock as mock
374 root = _make_repo(tmp_path)
375 obj_id = _write_object(root, b"shelved content")
376 entry_path = _write_shelf_entry(root, {"a.py": obj_id})
377 fake_stat = os.stat_result((
378 stat.S_IFREG | 0o644, 0, 0, 1, 0, 0,
379 _MAX_SHELF_BYTES + 1, 0, 0, 0,
380 ))
381 with mock.patch.object(pathlib.Path, "stat", return_value=fake_stat):
382 reachable = _collect_reachable_objects(root)
383 assert obj_id not in reachable
384
385 def test_malformed_shelf_json_skipped(self, tmp_path: pathlib.Path) -> None:
386 from muse.core.gc import _collect_reachable_objects
387 root = _make_repo(tmp_path)
388 (muse_dir(root) / "shelf.json").write_text("not valid json{{{}}", encoding="utf-8")
389 # Should not raise.
390 reachable = _collect_reachable_objects(root)
391 assert isinstance(reachable, set)
392
393 def test_valid_shelf_objects_marked_reachable(self, tmp_path: pathlib.Path) -> None:
394 from muse.core.gc import _collect_reachable_objects
395 root = _make_repo(tmp_path)
396 obj_id = _write_object(root, b"shelved content")
397 _write_shelf_entry(root, {"a.py": obj_id})
398 reachable = _collect_reachable_objects(root)
399 assert obj_id in reachable
400
401
402 # ---------------------------------------------------------------------------
403 # Unit — run_gc result fields
404 # ---------------------------------------------------------------------------
405
406
407 class TestRunGcResult:
408 def test_grace_period_stored_in_result(self, tmp_path: pathlib.Path) -> None:
409 from muse.core.gc import run_gc
410 root = _make_repo(tmp_path)
411 result = run_gc(root, grace_period_seconds=42)
412 assert result.grace_period_seconds == 42
413
414 def test_dry_run_flag_stored_in_result(self, tmp_path: pathlib.Path) -> None:
415 from muse.core.gc import run_gc
416 root = _make_repo(tmp_path)
417 result = run_gc(root, dry_run=True, grace_period_seconds=0)
418 assert result.dry_run is True
419
420 def test_duration_ms_is_non_negative(self, tmp_path: pathlib.Path) -> None:
421 from muse.core.gc import run_gc
422 root = _make_repo(tmp_path)
423 result = run_gc(root, grace_period_seconds=0)
424 assert result.duration_ms >= 0.0
425
426
427 # ---------------------------------------------------------------------------
428 # Security — CLI
429 # ---------------------------------------------------------------------------
430
431
432 class TestSecurity:
433 def test_symlink_in_objects_not_deleted(self, tmp_path: pathlib.Path) -> None:
434 """GC must never delete a file outside the repo via a symlink."""
435 root = _make_repo(tmp_path)
436 _make_commit(root)
437 external = tmp_path / "precious_file"
438 external.write_bytes(b"important data")
439 sha = "c" * 64
440 link = object_path(root, long_id(sha))
441 link.parent.mkdir(parents=True, exist_ok=True)
442 link.symlink_to(external)
443
444 _invoke_gc(root)
445
446 assert external.exists(), "External file must not be deleted via symlink"
447
448 def test_ansi_in_object_id_sanitized(self, tmp_path: pathlib.Path) -> None:
449 """sanitize_display must strip ANSI sequences from object IDs in verbose output."""
450 root = _make_repo(tmp_path)
451 _make_commit(root)
452 # Write a real orphan (we can't control its SHA, but we test the path is taken).
453 _write_object(root, b"orphan for sanitize test")
454 result = _invoke_gc(root, "--verbose")
455 assert result.exit_code == 0
456 # The output must not contain raw ESC bytes.
457 assert "\x1b" not in result.output
458
459 def test_invalid_format_exits_nonzero_and_writes_stderr(
460 self, tmp_path: pathlib.Path
461 ) -> None:
462 root = _make_repo(tmp_path)
463 # argparse now uses choices= so invalid format triggers argparse error.
464 result = runner.invoke(cli, ["gc", "--format", "csv"], env=_env(root))
465 assert result.exit_code != 0
466
467 def test_negative_grace_period_rejected(self, tmp_path: pathlib.Path) -> None:
468 root = _make_repo(tmp_path)
469 result = runner.invoke(cli, ["gc", "--grace-period", "-1"], env=_env(root))
470 assert result.exit_code != 0
471
472
473 # ---------------------------------------------------------------------------
474 # Integration — JSON output schema
475 # ---------------------------------------------------------------------------
476
477
478 class TestJsonSchema:
479 def test_json_schema_all_fields_present(self, tmp_path: pathlib.Path) -> None:
480 root = _make_repo(tmp_path)
481 _make_commit(root)
482 _write_object(root, b"orphan for json test")
483 result = _invoke_gc(root, "--json")
484 assert result.exit_code == 0
485 payload = _parse_gc_json(result.output)
486 assert payload["collected_count"] == 1
487 assert payload["collected_bytes"] > 0
488 # commit + snapshot now live in the unified object store, so reachable_count >= 2
489 assert payload["reachable_count"] >= 2
490 assert payload["duration_ms"] >= 0.0
491 assert payload["grace_period_seconds"] == 0
492 assert payload["dry_run"] is False
493 assert len(payload["collected_ids"]) == 1
494
495 def test_json_dry_run_does_not_delete(self, tmp_path: pathlib.Path) -> None:
496 root = _make_repo(tmp_path)
497 _make_commit(root)
498 orphan_id = _write_object(root, b"dry run orphan")
499 result = _invoke_gc(root, "--dry-run", "--json")
500 assert result.exit_code == 0
501 payload = _parse_gc_json(result.output)
502 assert payload["dry_run"] is True
503 assert payload["collected_count"] == 1
504 # File must still exist.
505 from muse.core.object_store import has_object
506 assert has_object(root, orphan_id)
507
508 def test_json_grace_period_field_reflects_flag(self, tmp_path: pathlib.Path) -> None:
509 root = _make_repo(tmp_path)
510 result = runner.invoke(
511 cli, ["gc", "--grace-period", "99", "--json"],
512 env=_env(root), catch_exceptions=False,
513 )
514 assert result.exit_code == 0
515 payload = _parse_gc_json(result.output)
516 assert payload["grace_period_seconds"] == 99
517
518 def test_json_collected_ids_sorted(self, tmp_path: pathlib.Path) -> None:
519 root = _make_repo(tmp_path)
520 for i in range(5):
521 _write_object(root, f"sort test {i}".encode())
522 result = _invoke_gc(root, "--json")
523 assert result.exit_code == 0
524 payload = _parse_gc_json(result.output)
525 assert payload["collected_ids"] == sorted(payload["collected_ids"])
526
527 def test_json_clean_repo_shows_zero_counts(self, tmp_path: pathlib.Path) -> None:
528 root = _make_repo(tmp_path)
529 _make_commit(root)
530 result = _invoke_gc(root, "--json")
531 assert result.exit_code == 0
532 payload = _parse_gc_json(result.output)
533 assert payload["collected_count"] == 0
534 assert payload["collected_bytes"] == 0
535 assert payload["collected_ids"] == []
536
537 def test_shorthand_json_flag(self, tmp_path: pathlib.Path) -> None:
538 root = _make_repo(tmp_path)
539 result = _invoke_gc(root, "--json")
540 assert result.exit_code == 0
541 _parse_gc_json(result.output) # must not raise
542
543
544 # ---------------------------------------------------------------------------
545 # E2E — full lifecycle
546 # ---------------------------------------------------------------------------
547
548
549 class TestE2E:
550 def test_orphan_from_abandoned_branch_reclaimed(self, tmp_path: pathlib.Path) -> None:
551 """Objects written for a branch that was never committed are reclaimed."""
552 root = _make_repo(tmp_path)
553 # Write objects that were staged but never committed.
554 orphan_a = _write_object(root, b"branch work A")
555 orphan_b = _write_object(root, b"branch work B")
556 # Now run GC.
557 result = _invoke_gc(root, "--json")
558 assert result.exit_code == 0
559 payload = _parse_gc_json(result.output)
560 assert orphan_a in payload["collected_ids"]
561 assert orphan_b in payload["collected_ids"]
562
563 def test_gc_after_shelf_save_preserves_shelf_objects(self, tmp_path: pathlib.Path) -> None:
564 root = _make_repo(tmp_path)
565 shelf_obj = _write_object(root, b"shelved file content")
566 _write_shelf_entry(root, {"file.py": shelf_obj})
567
568 result = _invoke_gc(root, "--json")
569 assert result.exit_code == 0
570 payload = _parse_gc_json(result.output)
571 assert shelf_obj not in payload["collected_ids"]
572 # Blob must still be on disk.
573 from muse.core.object_store import has_object
574 assert has_object(root, shelf_obj)
575
576 def test_gc_with_corrupt_shelf_json_succeeds(self, tmp_path: pathlib.Path) -> None:
577 root = _make_repo(tmp_path)
578 orphan = _write_object(root, b"orphan despite corrupt shelf")
579 (muse_dir(root) / "shelf.json").write_text("{not json", encoding="utf-8")
580 result = _invoke_gc(root, "--json")
581 assert result.exit_code == 0
582 payload = _parse_gc_json(result.output)
583 # Orphan is still collected even though shelf was corrupt.
584 assert orphan in payload["collected_ids"]
585
586 def test_grace_period_zero_collects_fresh_orphan(self, tmp_path: pathlib.Path) -> None:
587 root = _make_repo(tmp_path)
588 orphan = _write_object(root, b"fresh orphan")
589 result = _invoke_gc(root, "--grace-period", "0", "--json")
590 assert result.exit_code == 0
591 payload = _parse_gc_json(result.output)
592 assert orphan in payload["collected_ids"]
593
594 def test_grace_period_large_protects_fresh_orphan(self, tmp_path: pathlib.Path) -> None:
595 root = _make_repo(tmp_path)
596 _write_object(root, b"fresh orphan protected")
597 result = runner.invoke(
598 cli, ["gc", "--grace-period", "9999", "--json"],
599 env=_env(root), catch_exceptions=False,
600 )
601 assert result.exit_code == 0
602 payload = _parse_gc_json(result.output)
603 assert payload["collected_count"] == 0
604
605 def test_repeated_gc_is_idempotent(self, tmp_path: pathlib.Path) -> None:
606 root = _make_repo(tmp_path)
607 _write_object(root, b"first orphan")
608 _invoke_gc(root)
609 result2 = _invoke_gc(root, "--json")
610 assert result2.exit_code == 0
611 payload = _parse_gc_json(result2.output)
612 assert payload["collected_count"] == 0
613
614 def test_gc_removes_empty_prefix_dirs(self, tmp_path: pathlib.Path) -> None:
615 """After GC, empty prefix dirs under .muse/objects/sha256/ are cleaned up."""
616 root = _make_repo(tmp_path)
617 sha = _write_object(root, b"sole object in prefix")
618 from muse.core.object_store import object_path
619 prefix_dir = object_path(root, sha).parent
620 assert prefix_dir.exists()
621 _invoke_gc(root)
622 # Directory should be removed since it's empty now.
623 assert not prefix_dir.exists()
624
625 def test_verbose_lists_full_sha256_ids(self, tmp_path: pathlib.Path) -> None:
626 root = _make_repo(tmp_path)
627 orphan = _write_object(root, b"verbose test object")
628 result = _invoke_gc(root, "--verbose")
629 assert result.exit_code == 0
630 assert orphan in result.output
631
632 def test_dry_run_verbose_lists_without_deleting(self, tmp_path: pathlib.Path) -> None:
633 root = _make_repo(tmp_path)
634 orphan = _write_object(root, b"dry verbose test")
635 result = _invoke_gc(root, "--dry-run", "--verbose")
636 assert result.exit_code == 0
637 assert orphan in result.output
638 from muse.core.object_store import object_path
639 assert object_path(root, orphan).exists()
640
641 def test_dry_run_prefix_present_in_text_output(self, tmp_path: pathlib.Path) -> None:
642 root = _make_repo(tmp_path)
643 result = _invoke_gc(root, "--dry-run")
644 assert result.exit_code == 0
645 assert "[dry-run]" in result.output
646
647 def test_reachable_count_reflects_committed_objects(self, tmp_path: pathlib.Path) -> None:
648 root = _make_repo(tmp_path)
649 obj = _write_object(root, b"committed content")
650 _make_commit(root, manifest={"file.txt": obj})
651 result = _invoke_gc(root, "--json")
652 payload = _parse_gc_json(result.output)
653 # commit + snapshot + 1 blob = 3 reachable in the unified object store
654 assert payload["reachable_count"] == 3
655 assert payload["collected_count"] == 0
656
657
658 # ---------------------------------------------------------------------------
659 # Stress
660 # ---------------------------------------------------------------------------
661
662
663 class TestStress:
664 def test_500_orphans_all_collected(self, tmp_path: pathlib.Path) -> None:
665 root = _make_repo(tmp_path)
666 _make_commit(root)
667 orphan_ids = [_write_object(root, f"stress-{i:04d}".encode()) for i in range(500)]
668 result = _invoke_gc(root, "--json")
669 assert result.exit_code == 0
670 payload = _parse_gc_json(result.output)
671 assert payload["collected_count"] == 500
672 assert set(payload["collected_ids"]) == set(orphan_ids)
673 # All orphan blobs must be gone; commit + snapshot remain (reachable).
674 from muse.core.object_store import has_object
675 for oid in orphan_ids:
676 assert not has_object(root, oid), f"orphan {oid} was not collected"
677 # Only the commit and snapshot objects remain in the store.
678 obj_dir = objects_dir(root)
679 remaining_files = [p for p in obj_dir.rglob("*") if p.is_file()]
680 assert len(remaining_files) == 2
681
682 def test_concurrent_dry_run_does_not_crash(self, tmp_path: pathlib.Path) -> None:
683 """Multiple concurrent dry-run GCs on the same repo must not crash."""
684 root = _make_repo(tmp_path)
685 _make_commit(root)
686 for i in range(20):
687 _write_object(root, f"concurrent-orphan-{i}".encode())
688
689 errors: list[str] = []
690
691 def _run_dry() -> None:
692 try:
693 from muse.core.gc import run_gc
694 run_gc(root, dry_run=True, grace_period_seconds=0)
695 except Exception as exc:
696 errors.append(str(exc))
697
698 threads = [threading.Thread(target=_run_dry) for _ in range(8)]
699 for t in threads:
700 t.start()
701 for t in threads:
702 t.join()
703
704 assert not errors, f"Concurrent dry-run GC failures: {errors}"
705
706 def test_gc_across_many_prefix_dirs(self, tmp_path: pathlib.Path) -> None:
707 """Objects spread across many prefix dirs are all found and collected."""
708 root = _make_repo(tmp_path)
709 # Force objects into many distinct prefix dirs by varying content.
710 ids: list[str] = []
711 for i in range(100):
712 ids.append(_write_object(root, f"spread-{i:08d}".encode()))
713 # Verify we have multiple prefix dirs.
714 algo_dir = objects_dir(root) / "sha256"
715 prefix_dirs = [d for d in algo_dir.iterdir() if d.is_dir()]
716 assert len(prefix_dirs) > 1, "Test needs objects in multiple prefix dirs"
717
718 result = _invoke_gc(root, "--json")
719 payload = _parse_gc_json(result.output)
720 assert payload["collected_count"] == 100
721 assert set(payload["collected_ids"]) == set(ids)
File History 1 commit
sha256:84df9126d09aeec0b8f1b908f0b06c10913feec28f3514b382efb1ba6d619385 refactor: rename StructuredMergePlugin to AddressedMergePlu… Sonnet 4.6 minor 23 days ago