gabriel / muse public

test_cmd_verify.py file-level

at sha256:2 · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 💥 blast risk
sha256:b adding issues docs to bust staging mpack prebuild cache. · gabriel · Jun 20, 2026
1 """Tests for ``muse verify`` and ``muse/core/verify.py``.
2
3 Covers: empty repo, healthy repo, missing commit, missing snapshot,
4 missing object, corrupted object (hash mismatch), --no-objects flag,
5 --quiet flag, --format json, stress: 100-commit chain.
6 """
7
8 from __future__ import annotations
9
10 import datetime
11 import json
12 import pathlib
13
14 import pytest
15 from tests.cli_test_helper import CliRunner
16
17 cli = None # argparse migration — CliRunner ignores this arg
18 import os
19
20 from muse.core.object_store import object_path, write_object
21 from muse.core.ids import hash_commit, hash_snapshot
22 from muse.core.commits import (
23 CommitRecord,
24 write_commit,
25 )
26 from muse.core.snapshots import (
27 SnapshotRecord,
28 write_snapshot,
29 )
30 from muse.core.verify import run_verify
31 from muse.core.types import Manifest, blob_id, long_id, fake_id
32 from muse.core.paths import muse_dir, heads_dir, ref_path
33
34 runner = CliRunner()
35
36 _REPO_ID = "verify-test"
37
38
39 # ---------------------------------------------------------------------------
40 # Helpers
41 # ---------------------------------------------------------------------------
42
43
44
45
46 def _init_repo(path: pathlib.Path) -> pathlib.Path:
47 muse = muse_dir(path)
48 for d in ("commits", "snapshots", "objects", "refs/heads"):
49 (muse / d).mkdir(parents=True, exist_ok=True)
50 (muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
51 (muse / "repo.json").write_text(
52 json.dumps({"repo_id": _REPO_ID, "domain": "midi"}), encoding="utf-8"
53 )
54 return path
55
56
57 def _env(repo: pathlib.Path) -> Manifest:
58 return {"MUSE_REPO_ROOT": str(repo)}
59
60
61 def _make_commit(
62 root: pathlib.Path,
63 parent_id: str | None = None,
64 content: bytes = b"data",
65 branch: str = "main",
66 idx: int = 0,
67 ) -> str:
68 raw = content + str(idx).encode()
69 obj_id = blob_id(raw)
70 write_object(root, obj_id, raw)
71 manifest = {f"file_{idx}.txt": obj_id}
72 snap_id = hash_snapshot(manifest)
73 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
74 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) + datetime.timedelta(hours=idx)
75 parent_ids = [parent_id] if parent_id else []
76 commit_id = hash_commit(
77 parent_ids=parent_ids,
78 snapshot_id=snap_id,
79 message=f"commit {idx}",
80 committed_at_iso=committed_at.isoformat(),
81 )
82 write_commit(root, CommitRecord(
83 commit_id=commit_id,
84 branch=branch,
85 snapshot_id=snap_id,
86 message=f"commit {idx}",
87 committed_at=committed_at,
88 parent_commit_id=parent_id,
89 ))
90 (ref_path(root, branch)).write_text(commit_id, encoding="utf-8")
91 return commit_id
92
93
94 # ---------------------------------------------------------------------------
95 # Unit: core run_verify
96 # ---------------------------------------------------------------------------
97
98
99 def test_verify_empty_repo_no_failures(tmp_path: pathlib.Path) -> None:
100 _init_repo(tmp_path)
101 result = run_verify(tmp_path)
102 assert result["all_ok"] is True
103 assert result["failures"] == []
104 assert result["nothing_checked"] is True
105
106
107 # ---------------------------------------------------------------------------
108 # Supercharged verify — snapshot sweep, nothing_checked, zero-byte detection
109 # ---------------------------------------------------------------------------
110
111
112 class TestVerifySupercharged:
113 """Tests for the three supercharged verify capabilities:
114
115 1. Snapshot store sweep — finds missing objects even when branch refs are absent.
116 2. nothing_checked flag — distinguishes "empty repo" from "all healthy".
117 3. Truncated objects are caught by the hash check (check_objects=True);
118 existence-only mode (check_objects=False) does not hash-verify content.
119 """
120
121 def test_nothing_checked_false_when_commits_exist(self, tmp_path: pathlib.Path) -> None:
122 _init_repo(tmp_path)
123 _make_commit(tmp_path, content=b"data", idx=0)
124 result = run_verify(tmp_path)
125 assert result["nothing_checked"] is False
126
127 def test_nothing_checked_true_when_no_refs_and_no_snapshots(self, tmp_path: pathlib.Path) -> None:
128 _init_repo(tmp_path)
129 result = run_verify(tmp_path)
130 assert result["nothing_checked"] is True
131
132 def test_orphan_snapshot_with_missing_object_detected(self, tmp_path: pathlib.Path) -> None:
133 """Snapshot exists in .muse/snapshots/ but no commit or branch ref points to it.
134 Its objects are missing. Verify should catch this via the snapshot store sweep."""
135 _init_repo(tmp_path)
136 obj_id = long_id("a" * 64) # non-existent object
137 manifest = {"orphan.py": obj_id}
138 snap_id = hash_snapshot(manifest)
139 write_snapshot(tmp_path, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
140 # No branch ref written, no commit written.
141
142 result = run_verify(tmp_path)
143 assert result["all_ok"] is False
144 assert any(f["kind"] == "object" and f["id"] == obj_id for f in result["failures"])
145 assert result["nothing_checked"] is False # sweep found something to check
146
147 def test_orphan_snapshot_with_present_object_passes(self, tmp_path: pathlib.Path) -> None:
148 """Orphan snapshot whose object IS present should not cause failures."""
149 _init_repo(tmp_path)
150 content = b"orphan content"
151 obj_id = blob_id(content)
152 write_object(tmp_path, obj_id, content)
153 manifest = {"file.py": obj_id}
154 snap_id = hash_snapshot(manifest)
155 write_snapshot(tmp_path, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
156
157 result = run_verify(tmp_path)
158 assert result["all_ok"] is True
159 assert result["nothing_checked"] is False # sweep found the snapshot
160
161 def test_partial_clone_missing_objects_detected(self, tmp_path: pathlib.Path) -> None:
162 """Simulate a failed clone: commits and snapshots written to store,
163 but the branch ref file was never created and objects are absent.
164 Verify must detect the missing objects via the snapshot sweep."""
165 import datetime
166 _init_repo(tmp_path)
167 obj_id = blob_id(b"important file content")
168 manifest = {"src/main.py": obj_id}
169 snap_id = hash_snapshot(manifest)
170 write_snapshot(tmp_path, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
171 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
172 commit_id = hash_commit(
173 parent_ids=[],
174 snapshot_id=snap_id,
175 message="partial clone",
176 committed_at_iso=committed_at.isoformat(),
177 )
178 write_commit(tmp_path, CommitRecord(
179 commit_id=commit_id,
180 branch="main",
181 snapshot_id=snap_id,
182 message="partial clone",
183 committed_at=committed_at,
184 ))
185 # Critically: the branch ref file is NOT written (simulates clone crash).
186 # The object is also NOT written (simulates R2 gap).
187
188 result = run_verify(tmp_path)
189 assert result["all_ok"] is False
190 object_failures = [f for f in result["failures"] if f["kind"] == "object"]
191 assert any(f["id"] == obj_id for f in object_failures)
192
193 def test_truncated_object_caught_by_hash_check(self, tmp_path: pathlib.Path) -> None:
194 """An object file truncated to empty bytes is caught as a hash mismatch
195 when check_objects=True. Empty bytes have OID sha256:e3b0c44… which
196 differs from the stored OID unless the file was always empty."""
197 import os as _os
198 _init_repo(tmp_path)
199 content = b"real content here"
200 obj_id = blob_id(content)
201 write_object(tmp_path, obj_id, content)
202 manifest = {"real.py": obj_id}
203 snap_id = hash_snapshot(manifest)
204 write_snapshot(tmp_path, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
205 committed_at = datetime.datetime(2026, 4, 1, tzinfo=datetime.timezone.utc)
206 commit_id = hash_commit(
207 parent_ids=[],
208 snapshot_id=snap_id,
209 message="truncated test",
210 committed_at_iso=committed_at.isoformat(),
211 )
212 write_commit(tmp_path, CommitRecord(
213 commit_id=commit_id, branch="main",
214 snapshot_id=snap_id, message="truncated test", committed_at=committed_at,
215 ))
216 (heads_dir(tmp_path) / "main").write_text(commit_id)
217
218 # Simulate truncation (e.g. R2 serving empty body for a non-empty OID).
219 obj_file = object_path(tmp_path, obj_id)
220 _os.chmod(obj_file, 0o644)
221 obj_file.write_bytes(b"")
222
223 # Hash check catches the mismatch.
224 result = run_verify(tmp_path, check_objects=True)
225 assert result["all_ok"] is False
226 assert any(f["kind"] == "object" and f["id"] == obj_id for f in result["failures"])
227
228 def test_truncated_object_passes_existence_check(self, tmp_path: pathlib.Path) -> None:
229 """check_objects=False only verifies the object file exists — it does not
230 re-hash. A truncated file passes existence-only mode."""
231 import os as _os
232 _init_repo(tmp_path)
233 content = b"real content here"
234 obj_id = blob_id(content)
235 write_object(tmp_path, obj_id, content)
236 manifest = {"real.py": obj_id}
237 snap_id = hash_snapshot(manifest)
238 write_snapshot(tmp_path, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
239 committed_at = datetime.datetime(2026, 4, 1, tzinfo=datetime.timezone.utc)
240 commit_id = hash_commit(
241 parent_ids=[],
242 snapshot_id=snap_id,
243 message="existence test",
244 committed_at_iso=committed_at.isoformat(),
245 )
246 write_commit(tmp_path, CommitRecord(
247 commit_id=commit_id, branch="main",
248 snapshot_id=snap_id, message="existence test", committed_at=committed_at,
249 ))
250 (heads_dir(tmp_path) / "main").write_text(commit_id)
251
252 obj_file = object_path(tmp_path, obj_id)
253 _os.chmod(obj_file, 0o644)
254 obj_file.write_bytes(b"")
255
256 result = run_verify(tmp_path, check_objects=False)
257 assert result["all_ok"] is True
258
259 def test_genuinely_empty_file_passes_hash_check(self, tmp_path: pathlib.Path) -> None:
260 """A file whose content is genuinely empty bytes has OID sha256:e3b0c44…
261 The object file is zero bytes and the hash check must pass — empty is valid."""
262 _init_repo(tmp_path)
263 content = b""
264 obj_id = blob_id(content) # sha256:e3b0c44...
265 write_object(tmp_path, obj_id, content)
266 manifest = {"__init__.py": obj_id}
267 snap_id = hash_snapshot(manifest)
268 write_snapshot(tmp_path, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
269 committed_at = datetime.datetime(2026, 4, 3, tzinfo=datetime.timezone.utc)
270 commit_id = hash_commit(
271 parent_ids=[],
272 snapshot_id=snap_id,
273 message="empty file test",
274 committed_at_iso=committed_at.isoformat(),
275 )
276 write_commit(tmp_path, CommitRecord(
277 commit_id=commit_id, branch="main",
278 snapshot_id=snap_id, message="empty file test", committed_at=committed_at,
279 ))
280 (heads_dir(tmp_path) / "main").write_text(commit_id)
281
282 result = run_verify(tmp_path, check_objects=True)
283 assert result["all_ok"] is True, f"Failures: {result['failures']}"
284
285 def test_truncated_object_reported_exactly_once(self, tmp_path: pathlib.Path) -> None:
286 """A truncated object should appear exactly once in failures — the hash
287 mismatch check, not duplicated by any secondary check."""
288 import os as _os
289 _init_repo(tmp_path)
290 content = b"will be truncated"
291 obj_id = blob_id(content)
292 write_object(tmp_path, obj_id, content)
293 manifest = {"f.py": obj_id}
294 snap_id = hash_snapshot(manifest)
295 write_snapshot(tmp_path, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
296 committed_at = datetime.datetime(2026, 4, 2, tzinfo=datetime.timezone.utc)
297 commit_id = hash_commit(
298 parent_ids=[],
299 snapshot_id=snap_id,
300 message="dup test",
301 committed_at_iso=committed_at.isoformat(),
302 )
303 write_commit(tmp_path, CommitRecord(
304 commit_id=commit_id, branch="main",
305 snapshot_id=snap_id, message="dup test", committed_at=committed_at,
306 ))
307 (heads_dir(tmp_path) / "main").write_text(commit_id)
308
309 obj_file = object_path(tmp_path, obj_id)
310 _os.chmod(obj_file, 0o644)
311 obj_file.write_bytes(b"")
312
313 result = run_verify(tmp_path, check_objects=True)
314 matching = [f for f in result["failures"] if f["id"] == obj_id]
315 assert len(matching) == 1, f"Expected 1 failure for {obj_id[:12]}, got {len(matching)}"
316
317 def test_snapshot_sweep_does_not_recheck_already_verified(self, tmp_path: pathlib.Path) -> None:
318 """Snapshots reachable from branch refs should not be double-counted
319 by the orphan sweep pass."""
320 _init_repo(tmp_path)
321 commit_id = _make_commit(tmp_path, content=b"data", idx=0)
322 result = run_verify(tmp_path)
323 assert result["snapshots_checked"] == 1 # not 2
324
325 def test_json_output_includes_nothing_checked(self, tmp_path: pathlib.Path) -> None:
326 """The --json output must include nothing_checked so scripts can distinguish
327 empty repos from healthy ones."""
328 _init_repo(tmp_path)
329 result = runner.invoke(cli, ["verify", "--json"], env=_env(tmp_path))
330 assert result.exit_code == 0
331 data = json.loads(result.output)
332 assert "nothing_checked" in data
333 assert data["nothing_checked"] is True
334
335
336 def test_verify_healthy_repo(tmp_path: pathlib.Path) -> None:
337 _init_repo(tmp_path)
338 _make_commit(tmp_path, content=b"healthy", idx=0)
339 result = run_verify(tmp_path)
340 assert result["all_ok"] is True
341 assert result["commits_checked"] == 1
342 assert result["objects_checked"] >= 1
343
344
345 def test_verify_missing_commit_fails(tmp_path: pathlib.Path) -> None:
346 _init_repo(tmp_path)
347 # Write a ref pointing to a nonexistent commit.
348 missing_commit = fake_id("nonexistent-commit")
349 (heads_dir(tmp_path) / "main").write_text(missing_commit, encoding="utf-8")
350 result = run_verify(tmp_path)
351 assert result["all_ok"] is False
352 kinds = [f["kind"] for f in result["failures"]]
353 assert "commit" in kinds
354
355
356 def test_verify_corrupted_object_detected(tmp_path: pathlib.Path) -> None:
357 _init_repo(tmp_path)
358 content = b"original content"
359 obj_id = blob_id(content)
360 write_object(tmp_path, obj_id, content)
361 manifest = {"file.txt": obj_id}
362 snap_id = hash_snapshot(manifest)
363 write_snapshot(tmp_path, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
364 committed_at = datetime.datetime(2026, 3, 1, tzinfo=datetime.timezone.utc)
365 commit_id = hash_commit(
366 parent_ids=[],
367 snapshot_id=snap_id,
368 message="corrupt test",
369 committed_at_iso=committed_at.isoformat(),
370 )
371 write_commit(tmp_path, CommitRecord(
372 commit_id=commit_id,
373 branch="main",
374 snapshot_id=snap_id,
375 message="corrupt test",
376 committed_at=committed_at,
377 ))
378 (heads_dir(tmp_path) / "main").write_text(commit_id, encoding="utf-8")
379
380 # Object store writes files as 0o444 (immutable) — chmod before corrupting.
381 obj_file = object_path(tmp_path, obj_id)
382 os.chmod(obj_file, 0o644)
383 obj_file.write_bytes(b"tampered data!")
384
385 result = run_verify(tmp_path, check_objects=True)
386 assert result["all_ok"] is False
387 kinds = [f["kind"] for f in result["failures"]]
388 assert "object" in kinds
389
390
391 def test_verify_no_objects_flag_skips_rehash(tmp_path: pathlib.Path) -> None:
392 _init_repo(tmp_path)
393 content = b"clean"
394 obj_id = blob_id(content)
395 write_object(tmp_path, obj_id, content)
396 manifest = {"f.txt": obj_id}
397 snap_id = hash_snapshot(manifest)
398 write_snapshot(tmp_path, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
399 committed_at = datetime.datetime(2026, 3, 2, tzinfo=datetime.timezone.utc)
400 commit_id = hash_commit(
401 parent_ids=[],
402 snapshot_id=snap_id,
403 message="test",
404 committed_at_iso=committed_at.isoformat(),
405 )
406 write_commit(tmp_path, CommitRecord(
407 commit_id=commit_id, branch="main",
408 snapshot_id=snap_id, message="test", committed_at=committed_at,
409 ))
410 (heads_dir(tmp_path) / "main").write_text(commit_id, encoding="utf-8")
411
412 # Object store writes files as 0o444 (immutable) — chmod before corrupting.
413 obj_file = object_path(tmp_path, obj_id)
414 os.chmod(obj_file, 0o644)
415 obj_file.write_bytes(b"corrupted!")
416
417 result = run_verify(tmp_path, check_objects=False)
418 # Should not flag the corruption since we skipped re-hashing.
419 assert result["all_ok"] is True
420
421
422 # ---------------------------------------------------------------------------
423 # CLI: muse verify
424 # ---------------------------------------------------------------------------
425
426
427 def test_verify_cli_help() -> None:
428 result = runner.invoke(cli, ["verify", "--help"])
429 assert result.exit_code == 0
430 # Rich injects ANSI codes between '--' dashes; the short flag '-O' is reliable.
431 assert "--no-objects" in result.output or "-O" in result.output
432
433
434 def test_verify_cli_healthy(tmp_path: pathlib.Path) -> None:
435 _init_repo(tmp_path)
436 _make_commit(tmp_path, content=b"cli healthy", idx=99)
437 result = runner.invoke(cli, ["verify"], env=_env(tmp_path))
438 assert result.exit_code == 0
439 assert "healthy" in result.output.lower()
440
441
442 def test_verify_cli_json(tmp_path: pathlib.Path) -> None:
443 _init_repo(tmp_path)
444 _make_commit(tmp_path, content=b"json verify", idx=88)
445 result = runner.invoke(cli, ["verify", "--json"], env=_env(tmp_path))
446 assert result.exit_code == 0
447 data = json.loads(result.output)
448 assert data["all_ok"] is True
449 assert data["failures"] == []
450
451
452 def test_verify_cli_quiet_exit_zero_when_clean(tmp_path: pathlib.Path) -> None:
453 _init_repo(tmp_path)
454 _make_commit(tmp_path, content=b"quiet clean", idx=77)
455 result = runner.invoke(cli, ["verify", "--quiet"], env=_env(tmp_path))
456 assert result.exit_code == 0
457
458
459 def test_verify_cli_quiet_exit_one_when_broken(tmp_path: pathlib.Path) -> None:
460 _init_repo(tmp_path)
461 fake_id = "b" * 64
462 (heads_dir(tmp_path) / "main").write_text(fake_id, encoding="utf-8")
463 result = runner.invoke(cli, ["verify", "-q"], env=_env(tmp_path))
464 assert result.exit_code != 0
465
466
467 def test_verify_cli_no_objects_flag(tmp_path: pathlib.Path) -> None:
468 _init_repo(tmp_path)
469 _make_commit(tmp_path, content=b"no-obj flag", idx=66)
470 result = runner.invoke(cli, ["verify", "--no-objects"], env=_env(tmp_path))
471 assert result.exit_code == 0
472
473
474 # ---------------------------------------------------------------------------
475 # Stress: 100-commit chain
476 # ---------------------------------------------------------------------------
477
478
479 def test_verify_stress_100_commit_chain(tmp_path: pathlib.Path) -> None:
480 _init_repo(tmp_path)
481 prev: str | None = None
482 for i in range(100):
483 prev = _make_commit(tmp_path, parent_id=prev, content=b"chain", idx=i)
484
485 result = run_verify(tmp_path, check_objects=True)
486 assert result["all_ok"] is True
487 assert result["commits_checked"] == 100