gabriel / muse public

test_gc_supercharge.py file-level

at sha256:d · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 πŸ’₯ blast risk
sha256:4 Merge branch 'dev' into main · gabriel · Jun 17, 2026
1 """GC JSON schema: agent-ready output fields.
2
3 Tests for ``muse gc --json``:
4
5 status "ok" | "error"
6 error empty string on success; message on bad args
7 warnings list of warning strings (symlink skips, etc.)
8 mode "conservative" (default) | "tight" (--full)
9 collected_commit_ids list[str] β€” pruned commit IDs (--full only)
10 collected_snapshot_ids list[str] β€” pruned snapshot IDs (--full only)
11 duration_ms float β€” milliseconds
12 exit_code int β€” 0 on success, 1 on error
13
14 Also covers:
15 - structured JSON error for --grace-period < 0 in --json mode
16
17 Test categories
18 ---------------
19 TestGcJsonSchema β€” every field present and typed correctly
20 TestGcJsonDurationMs β€” duration_ms is present and non-negative
21 TestGcJsonMode β€” mode field reflects --full flag
22 TestGcJsonCollectedIds β€” collected_commit_ids / collected_snapshot_ids
23 TestGcJsonBadArgs β€” structured error when --grace-period < 0
24 TestGcJsonWarnings β€” warnings list populated on symlink skip
25 TestGcJsonExitCode β€” exit_code field in JSON output
26 """
27
28 from __future__ import annotations
29 from collections.abc import Mapping
30
31 import datetime
32 import json
33 import pathlib
34
35 import pytest
36
37 from tests.cli_test_helper import CliRunner
38 from muse.core.types import blob_id, fake_id
39 from muse.core.object_store import object_path
40 from muse.core.paths import heads_dir, muse_dir, objects_dir, snapshots_dir
41
42 runner = CliRunner()
43 cli = None # argparse migration β€” CliRunner ignores this arg
44
45
46 # ---------------------------------------------------------------------------
47 # Helpers
48 # ---------------------------------------------------------------------------
49
50 def _init_repo(tmp_path: pathlib.Path) -> tuple[pathlib.Path, str]:
51 dot_muse = muse_dir(tmp_path)
52 dot_muse.mkdir()
53 repo_id = fake_id("repo")
54 (dot_muse / "repo.json").write_text(json.dumps({
55 "repo_id": repo_id,
56 "domain": "code",
57 "default_branch": "main",
58 "created_at": "2025-01-01T00:00:00+00:00",
59 }), encoding="utf-8")
60 (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
61 (dot_muse / "refs" / "heads").mkdir(parents=True)
62 (dot_muse / "snapshots").mkdir()
63 (dot_muse / "commits").mkdir()
64 (dot_muse / "objects").mkdir()
65 return tmp_path, repo_id
66
67
68 def _write_object(root: pathlib.Path, content: bytes) -> str:
69 oid = blob_id(content)
70 p = object_path(root, oid)
71 p.parent.mkdir(parents=True, exist_ok=True)
72 p.write_bytes(content)
73 return oid
74
75
76 def _make_commit(root: pathlib.Path, repo_id: str, message: str = "init") -> str:
77 from muse.core.commits import (
78 CommitRecord,
79 write_commit,
80 )
81 from muse.core.snapshots import (
82 SnapshotRecord,
83 write_snapshot,
84 )
85 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
86
87 ref_file = heads_dir(root) / "main"
88 parent_id = ref_file.read_text().strip() if ref_file.exists() else None
89 manifest = {}
90 snap_id = compute_snapshot_id(manifest)
91 committed_at = datetime.datetime.now(datetime.timezone.utc)
92 commit_id = compute_commit_id( parent_ids=[parent_id] if parent_id else [],
93 snapshot_id=snap_id, message=message,
94 committed_at_iso=committed_at.isoformat(),
95 )
96 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
97 write_commit(root, CommitRecord(
98 commit_id=commit_id, branch="main",
99 snapshot_id=snap_id, message=message, committed_at=committed_at,
100 parent_commit_id=parent_id,
101 ))
102 ref_file.parent.mkdir(parents=True, exist_ok=True)
103 ref_file.write_text(commit_id, encoding="utf-8")
104 return commit_id
105
106
107 def _gc_json(root: pathlib.Path, extra_args: list[str] | None = None) -> Mapping[str, object]:
108 """Run ``muse gc --json --grace-period 0`` and parse output."""
109 args = ["gc", "--json", "--grace-period", "0"] + (extra_args or [])
110 result = runner.invoke(cli, args, env={"MUSE_REPO_ROOT": str(root)})
111 return json.loads(result.output)
112
113
114 # ---------------------------------------------------------------------------
115 # TestGcJsonSchema
116 # ---------------------------------------------------------------------------
117
118 class TestGcJsonSchema:
119 """Every new agent-ready field must be present and correctly typed."""
120
121 def test_status_field_present(self, tmp_path: pathlib.Path) -> None:
122 root, repo_id = _init_repo(tmp_path)
123 _make_commit(root, repo_id)
124 data = _gc_json(root)
125 assert "status" in data, "JSON output must include 'status' field"
126
127 def test_status_ok_on_success(self, tmp_path: pathlib.Path) -> None:
128 root, repo_id = _init_repo(tmp_path)
129 _make_commit(root, repo_id)
130 data = _gc_json(root)
131 assert data["status"] == "ok"
132
133 def test_error_field_present(self, tmp_path: pathlib.Path) -> None:
134 root, repo_id = _init_repo(tmp_path)
135 _make_commit(root, repo_id)
136 data = _gc_json(root)
137 assert "error" in data, "JSON output must include 'error' field"
138
139 def test_error_empty_on_success(self, tmp_path: pathlib.Path) -> None:
140 root, repo_id = _init_repo(tmp_path)
141 _make_commit(root, repo_id)
142 data = _gc_json(root)
143 assert data["error"] == ""
144
145 def test_warnings_field_present(self, tmp_path: pathlib.Path) -> None:
146 root, repo_id = _init_repo(tmp_path)
147 _make_commit(root, repo_id)
148 data = _gc_json(root)
149 assert "warnings" in data, "JSON output must include 'warnings' field"
150
151 def test_warnings_is_list(self, tmp_path: pathlib.Path) -> None:
152 root, repo_id = _init_repo(tmp_path)
153 _make_commit(root, repo_id)
154 data = _gc_json(root)
155 assert isinstance(data["warnings"], list)
156
157 def test_warnings_empty_on_clean_run(self, tmp_path: pathlib.Path) -> None:
158 root, repo_id = _init_repo(tmp_path)
159 _make_commit(root, repo_id)
160 data = _gc_json(root)
161 assert data["warnings"] == []
162
163 def test_mode_field_present(self, tmp_path: pathlib.Path) -> None:
164 root, repo_id = _init_repo(tmp_path)
165 _make_commit(root, repo_id)
166 data = _gc_json(root)
167 assert "mode" in data, "JSON output must include 'mode' field"
168
169 def test_exit_code_field_present(self, tmp_path: pathlib.Path) -> None:
170 root, repo_id = _init_repo(tmp_path)
171 _make_commit(root, repo_id)
172 data = _gc_json(root)
173 assert "exit_code" in data, "JSON output must include 'exit_code' field"
174
175 def test_collected_commit_ids_present(self, tmp_path: pathlib.Path) -> None:
176 root, repo_id = _init_repo(tmp_path)
177 _make_commit(root, repo_id)
178 data = _gc_json(root, ["--full"])
179 assert "collected_commit_ids" in data, "JSON must include collected_commit_ids"
180
181 def test_collected_snapshot_ids_present(self, tmp_path: pathlib.Path) -> None:
182 root, repo_id = _init_repo(tmp_path)
183 _make_commit(root, repo_id)
184 data = _gc_json(root, ["--full"])
185 assert "collected_snapshot_ids" in data, "JSON must include collected_snapshot_ids"
186
187 def test_collected_commit_ids_is_list(self, tmp_path: pathlib.Path) -> None:
188 root, repo_id = _init_repo(tmp_path)
189 _make_commit(root, repo_id)
190 data = _gc_json(root, ["--full"])
191 assert isinstance(data["collected_commit_ids"], list)
192
193 def test_collected_snapshot_ids_is_list(self, tmp_path: pathlib.Path) -> None:
194 root, repo_id = _init_repo(tmp_path)
195 _make_commit(root, repo_id)
196 data = _gc_json(root, ["--full"])
197 assert isinstance(data["collected_snapshot_ids"], list)
198
199
200 # ---------------------------------------------------------------------------
201 # TestGcJsonDurationMs
202 # ---------------------------------------------------------------------------
203
204 class TestGcJsonDurationMs:
205 """duration_ms field is present, numeric, and non-negative."""
206
207 def test_duration_ms_present(self, tmp_path: pathlib.Path) -> None:
208 root, repo_id = _init_repo(tmp_path)
209 _make_commit(root, repo_id)
210 data = _gc_json(root)
211 assert "duration_ms" in data, "JSON must include 'duration_ms' field"
212
213 def test_duration_ms_is_float(self, tmp_path: pathlib.Path) -> None:
214 root, repo_id = _init_repo(tmp_path)
215 _make_commit(root, repo_id)
216 data = _gc_json(root)
217 assert isinstance(data["duration_ms"], (int, float))
218
219 def test_duration_ms_non_negative(self, tmp_path: pathlib.Path) -> None:
220 root, repo_id = _init_repo(tmp_path)
221 _make_commit(root, repo_id)
222 data = _gc_json(root)
223 assert data["duration_ms"] >= 0
224
225 def test_no_elapsed_key(self, tmp_path: pathlib.Path) -> None:
226 root, repo_id = _init_repo(tmp_path)
227 _make_commit(root, repo_id)
228 data = _gc_json(root)
229 assert "elapsed_ms" not in data
230 assert "elapsed" not in data
231
232
233 # ---------------------------------------------------------------------------
234 # TestGcJsonMode
235 # ---------------------------------------------------------------------------
236
237 class TestGcJsonMode:
238 """mode field reflects which reachability strategy was used."""
239
240 def test_mode_conservative_by_default(self, tmp_path: pathlib.Path) -> None:
241 root, repo_id = _init_repo(tmp_path)
242 _make_commit(root, repo_id)
243 data = _gc_json(root)
244 assert data["mode"] == "conservative"
245
246 def test_mode_tight_with_full_flag(self, tmp_path: pathlib.Path) -> None:
247 root, repo_id = _init_repo(tmp_path)
248 _make_commit(root, repo_id)
249 data = _gc_json(root, ["--full"])
250 assert data["mode"] == "tight"
251
252
253 # ---------------------------------------------------------------------------
254 # TestGcJsonCollectedIds
255 # ---------------------------------------------------------------------------
256
257 class TestGcJsonCollectedIds:
258 """collected_commit_ids and collected_snapshot_ids populated in --full mode."""
259
260 def test_collected_commit_ids_empty_when_all_reachable(
261 self, tmp_path: pathlib.Path
262 ) -> None:
263 root, repo_id = _init_repo(tmp_path)
264 _make_commit(root, repo_id)
265 data = _gc_json(root, ["--full"])
266 assert data["collected_commit_ids"] == []
267
268 def test_collected_snapshot_ids_empty_when_all_reachable(
269 self, tmp_path: pathlib.Path
270 ) -> None:
271 root, repo_id = _init_repo(tmp_path)
272 _make_commit(root, repo_id)
273 data = _gc_json(root, ["--full"])
274 assert data["collected_snapshot_ids"] == []
275
276 def test_collected_commit_ids_conservative_mode_always_empty(
277 self, tmp_path: pathlib.Path
278 ) -> None:
279 """Conservative mode doesn't prune commits β€” list must be empty."""
280 root, repo_id = _init_repo(tmp_path)
281 _make_commit(root, repo_id)
282 data = _gc_json(root) # no --full
283 assert data["collected_commit_ids"] == []
284
285 def test_collected_snapshot_ids_conservative_mode_always_empty(
286 self, tmp_path: pathlib.Path
287 ) -> None:
288 root, repo_id = _init_repo(tmp_path)
289 _make_commit(root, repo_id)
290 data = _gc_json(root) # no --full
291 assert data["collected_snapshot_ids"] == []
292
293
294 # ---------------------------------------------------------------------------
295 # TestGcJsonBadArgs
296 # ---------------------------------------------------------------------------
297
298 class TestGcJsonBadArgs:
299 """--grace-period < 0 with --json must emit structured JSON error, not crash."""
300
301 def test_bad_grace_period_json_mode_exit_code_1(
302 self, tmp_path: pathlib.Path
303 ) -> None:
304 root, _ = _init_repo(tmp_path)
305 result = runner.invoke(
306 cli,
307 ["gc", "--json", "--grace-period", "-1"],
308 env={"MUSE_REPO_ROOT": str(root)},
309 )
310 assert result.exit_code == 1
311
312 def test_bad_grace_period_json_mode_emits_json(
313 self, tmp_path: pathlib.Path
314 ) -> None:
315 root, _ = _init_repo(tmp_path)
316 result = runner.invoke(
317 cli,
318 ["gc", "--json", "--grace-period", "-1"],
319 env={"MUSE_REPO_ROOT": str(root)},
320 )
321 # Output must be valid JSON (not just a stderr print)
322 data = json.loads(result.output)
323 assert data["status"] == "error"
324
325 def test_bad_grace_period_json_error_field_non_empty(
326 self, tmp_path: pathlib.Path
327 ) -> None:
328 root, _ = _init_repo(tmp_path)
329 result = runner.invoke(
330 cli,
331 ["gc", "--json", "--grace-period", "-1"],
332 env={"MUSE_REPO_ROOT": str(root)},
333 )
334 data = json.loads(result.output)
335 assert data["error"] != "", "error field must contain a message on bad args"
336
337 def test_bad_grace_period_json_error_mentions_grace_period(
338 self, tmp_path: pathlib.Path
339 ) -> None:
340 root, _ = _init_repo(tmp_path)
341 result = runner.invoke(
342 cli,
343 ["gc", "--json", "--grace-period", "-1"],
344 env={"MUSE_REPO_ROOT": str(root)},
345 )
346 data = json.loads(result.output)
347 assert "grace" in data["error"].lower() or "-1" in data["error"], (
348 "error message must mention the problematic argument"
349 )
350
351 def test_bad_grace_period_json_has_exit_code(
352 self, tmp_path: pathlib.Path
353 ) -> None:
354 root, _ = _init_repo(tmp_path)
355 result = runner.invoke(
356 cli,
357 ["gc", "--json", "--grace-period", "-1"],
358 env={"MUSE_REPO_ROOT": str(root)},
359 )
360 data = json.loads(result.output)
361 assert "exit_code" in data
362 assert data["exit_code"] == 1
363
364
365 # ---------------------------------------------------------------------------
366 # TestGcJsonWarnings
367 # ---------------------------------------------------------------------------
368
369 class TestGcJsonWarnings:
370 """warnings list is populated when symlinks are skipped during GC walk."""
371
372 def test_symlink_object_file_skip_adds_warning(
373 self, tmp_path: pathlib.Path
374 ) -> None:
375 """A symlink inside .muse/objects/ triggers a warning in JSON output."""
376 root, repo_id = _init_repo(tmp_path)
377 _make_commit(root, repo_id)
378
379 # Plant a symlink disguised as an object file
380 prefix_dir = objects_dir(root) / "aa"
381 prefix_dir.mkdir(parents=True, exist_ok=True)
382 symlink_target = objects_dir(root) / "aa" / ("a" * 62)
383 symlink_target.symlink_to("/etc/passwd")
384
385 data = _gc_json(root)
386 assert isinstance(data["warnings"], list)
387 # The symlink should have been skipped β€” there may or may not be a warning
388 # depending on implementation, but the field must exist and be a list.
389 # (Symlink in object files currently silently skips β€” warning is the new behavior.)
390
391 def test_symlink_snapshot_file_warning(self, tmp_path: pathlib.Path) -> None:
392 """A symlink inside the unified objects store triggers a warning in warnings list."""
393 root, repo_id = _init_repo(tmp_path)
394 _make_commit(root, repo_id)
395
396 # Plant a symlink in the unified object store (snapshots now live here).
397 # The walker descends objects/sha256/<shard>/<rest>.
398 prefix_dir = objects_dir(root) / "sha256" / "de"
399 prefix_dir.mkdir(parents=True, exist_ok=True)
400 snap_link = prefix_dir / ("e" * 62)
401 snap_link.symlink_to("/etc/passwd")
402
403 data = _gc_json(root)
404 assert isinstance(data["warnings"], list)
405 # The symlink warning from the reachability walk must appear
406 symlink_warnings = [w for w in data["warnings"] if "symlink" in w.lower()]
407 assert len(symlink_warnings) >= 1, (
408 "symlink snapshot file must produce a warning in JSON output"
409 )
410
411
412 # ---------------------------------------------------------------------------
413 # TestGcJsonExitCode
414 # ---------------------------------------------------------------------------
415
416 class TestGcJsonExitCode:
417 """exit_code field matches actual process exit code."""
418
419 def test_exit_code_zero_on_success(self, tmp_path: pathlib.Path) -> None:
420 root, repo_id = _init_repo(tmp_path)
421 _make_commit(root, repo_id)
422 data = _gc_json(root)
423 assert data["exit_code"] == 0
424
425 def test_exit_code_zero_with_dry_run(self, tmp_path: pathlib.Path) -> None:
426 root, repo_id = _init_repo(tmp_path)
427 _make_commit(root, repo_id)
428 _write_object(root, b"orphan")
429 data = _gc_json(root, ["--dry-run"])
430 assert data["exit_code"] == 0
431
432 def test_exit_code_zero_with_full(self, tmp_path: pathlib.Path) -> None:
433 root, repo_id = _init_repo(tmp_path)
434 _make_commit(root, repo_id)
435 data = _gc_json(root, ["--full"])
436 assert data["exit_code"] == 0