gabriel / muse public
test_cmd_verify_object.py python
793 lines 33.0 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
1 """Comprehensive tests for ``muse verify-object``.
2
3 Coverage tiers
4 --------------
5 - Unit: _iter_all_object_ids, _verify_one (all paths), schema, constants
6 - Integration: JSON/text/quiet, --all, --stdin, --fail-fast, ordering, counts
7 - Data integrity: truncated file, zero-byte blob, large-object streaming
8 - Security: stderr routing, ANSI stripping, path traversal, unicode, CRLF,
9 symlink shard directory
10 - Stress: 100-object --all, 1000-object --all, 200 sequential verifies,
11 stdin 200 ids, duration bounded for small ops
12 """
13 from __future__ import annotations
14
15 import json
16 import os
17 import pathlib
18
19 import pytest
20
21 from muse.core.types import blob_id, fake_id
22 from muse.core.errors import ExitCode
23 from muse.core.object_store import object_path, write_object
24 from muse.core.paths import muse_dir, objects_dir
25 from tests.cli_test_helper import CliRunner, InvokeResult
26
27 runner = CliRunner()
28
29 # ---------------------------------------------------------------------------
30 # Helpers
31 # ---------------------------------------------------------------------------
32
33 _FAKE_CONTENT = b"hello muse"
34 _GOOD_OID = blob_id(_FAKE_CONTENT)
35
36
37 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
38 repo = tmp_path / "repo"
39 dot_muse = muse_dir(repo)
40 (dot_muse / "objects").mkdir(parents=True)
41 (dot_muse / "commits").mkdir(parents=True)
42 (dot_muse / "snapshots").mkdir(parents=True)
43 (dot_muse / "refs" / "heads").mkdir(parents=True)
44 (dot_muse / "HEAD").write_text("ref: refs/heads/main")
45 (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "r1", "domain": "code"}))
46 return repo
47
48
49 def _write_object(repo: pathlib.Path, content: bytes) -> str:
50 """Write real content into the store and return its sha256:-prefixed ID."""
51 oid = blob_id(content)
52 write_object(repo, oid, content)
53 return oid
54
55
56 def _corrupt_object(repo: pathlib.Path, oid: str) -> None:
57 """Overwrite the object file with garbage (simulates bit-rot).
58
59 The object store writes files as 0o444 (read-only) to enforce immutability.
60 We must make the file writable before overwriting it in tests.
61 """
62 obj_file = object_path(repo, oid)
63 os.chmod(obj_file, 0o644)
64 obj_file.write_bytes(b"corrupted data that does not hash to the oid")
65
66
67 def _truncate_object(repo: pathlib.Path, oid: str, keep_bytes: int = 0) -> None:
68 """Truncate the object file to ``keep_bytes`` bytes."""
69 obj_file = object_path(repo, oid)
70 os.chmod(obj_file, 0o644)
71 data = obj_file.read_bytes()
72 obj_file.write_bytes(data[:keep_bytes])
73
74
75 def _vo(repo: pathlib.Path, *args: str, stdin: str | None = None) -> InvokeResult:
76 from muse.cli.app import main as cli
77 return runner.invoke(
78 cli,
79 ["verify-object", *args],
80 env={"MUSE_REPO_ROOT": str(repo)},
81 input=stdin,
82 )
83
84
85 # ---------------------------------------------------------------------------
86 # Unit — _iter_all_object_ids
87 # ---------------------------------------------------------------------------
88
89
90 class TestIterAllObjectIds:
91 def test_empty_store(self, tmp_path: pathlib.Path) -> None:
92 from muse.cli.commands.verify_object import _iter_all_object_ids
93 repo = _make_repo(tmp_path)
94 assert _iter_all_object_ids(repo) == []
95
96 def test_missing_objects_dir(self, tmp_path: pathlib.Path) -> None:
97 from muse.cli.commands.verify_object import _iter_all_object_ids
98 import shutil
99 repo = _make_repo(tmp_path)
100 shutil.rmtree(objects_dir(repo))
101 assert _iter_all_object_ids(repo) == []
102
103 def test_finds_written_object(self, tmp_path: pathlib.Path) -> None:
104 from muse.cli.commands.verify_object import _iter_all_object_ids
105 repo = _make_repo(tmp_path)
106 oid = _write_object(repo, b"test content")
107 assert oid in _iter_all_object_ids(repo)
108
109 def test_multiple_objects_sorted(self, tmp_path: pathlib.Path) -> None:
110 from muse.cli.commands.verify_object import _iter_all_object_ids
111 repo = _make_repo(tmp_path)
112 oids = [_write_object(repo, f"content {i}".encode()) for i in range(5)]
113 found = _iter_all_object_ids(repo)
114 assert set(oids) == set(found)
115 assert found == sorted(found)
116
117 def test_symlinks_in_shard_skipped(self, tmp_path: pathlib.Path) -> None:
118 from muse.cli.commands.verify_object import _iter_all_object_ids
119 repo = _make_repo(tmp_path)
120 oid = _write_object(repo, b"real content")
121 shard = object_path(repo, oid).parent
122 sym = shard / "symlink_file"
123 sym.symlink_to(object_path(repo, oid))
124 ids = _iter_all_object_ids(repo)
125 assert ids.count(oid) == 1
126
127 def test_short_shard_dir_names_ignored(self, tmp_path: pathlib.Path) -> None:
128 from muse.cli.commands.verify_object import _iter_all_object_ids
129 from muse.core.object_store import objects_dir
130 repo = _make_repo(tmp_path)
131 (objects_dir(repo) / "sha256" / "abc").mkdir(parents=True, exist_ok=True)
132 assert _iter_all_object_ids(repo) == []
133
134 def test_returns_sha256_prefixed_ids(self, tmp_path: pathlib.Path) -> None:
135 from muse.cli.commands.verify_object import _iter_all_object_ids
136 repo = _make_repo(tmp_path)
137 _write_object(repo, b"prefix check")
138 ids = _iter_all_object_ids(repo)
139 assert all(oid.startswith("sha256:") for oid in ids)
140
141
142 # ---------------------------------------------------------------------------
143 # Unit — _verify_one
144 # ---------------------------------------------------------------------------
145
146
147 class TestVerifyOne:
148 def test_valid_object_ok(self, tmp_path: pathlib.Path) -> None:
149 from muse.cli.commands.verify_object import _verify_one
150 repo = _make_repo(tmp_path)
151 oid = _write_object(repo, b"hello world")
152 result = _verify_one(repo, oid)
153 assert result["ok"] is True
154 assert result["size_bytes"] == len(b"hello world")
155 assert result["error"] is None
156
157 def test_ok_result_preserves_object_id(self, tmp_path: pathlib.Path) -> None:
158 from muse.cli.commands.verify_object import _verify_one
159 repo = _make_repo(tmp_path)
160 oid = _write_object(repo, b"id check")
161 result = _verify_one(repo, oid)
162 assert result["object_id"] == oid
163
164 def test_error_is_none_when_ok(self, tmp_path: pathlib.Path) -> None:
165 from muse.cli.commands.verify_object import _verify_one
166 repo = _make_repo(tmp_path)
167 oid = _write_object(repo, b"clean")
168 result = _verify_one(repo, oid)
169 assert result["ok"] is True
170 assert result["error"] is None
171
172 def test_size_counted_during_hash(self, tmp_path: pathlib.Path) -> None:
173 from muse.cli.commands.verify_object import _verify_one
174 repo = _make_repo(tmp_path)
175 content = b"x" * 12345
176 oid = _write_object(repo, content)
177 result = _verify_one(repo, oid)
178 assert result["size_bytes"] == 12345
179
180 def test_zero_byte_object_ok(self, tmp_path: pathlib.Path) -> None:
181 from muse.cli.commands.verify_object import _verify_one
182 repo = _make_repo(tmp_path)
183 oid = _write_object(repo, b"")
184 result = _verify_one(repo, oid)
185 assert result["ok"] is True
186 assert result["size_bytes"] == 0
187
188 def test_missing_object_not_ok(self, tmp_path: pathlib.Path) -> None:
189 from muse.cli.commands.verify_object import _verify_one
190 repo = _make_repo(tmp_path)
191 result = _verify_one(repo, blob_id(b"nonexistent object"))
192 assert result["ok"] is False
193 assert "not found" in (result["error"] or "")
194 assert result["size_bytes"] is None
195
196 def test_corrupt_object_mismatch(self, tmp_path: pathlib.Path) -> None:
197 from muse.cli.commands.verify_object import _verify_one
198 repo = _make_repo(tmp_path)
199 oid = _write_object(repo, b"original content")
200 _corrupt_object(repo, oid)
201 result = _verify_one(repo, oid)
202 assert result["ok"] is False
203 assert "mismatch" in (result["error"] or "")
204
205 def test_corrupt_object_has_size_bytes(self, tmp_path: pathlib.Path) -> None:
206 """Even on hash mismatch, size_bytes is populated (bytes were read)."""
207 from muse.cli.commands.verify_object import _verify_one
208 repo = _make_repo(tmp_path)
209 oid = _write_object(repo, b"original content")
210 _corrupt_object(repo, oid)
211 result = _verify_one(repo, oid)
212 assert result["size_bytes"] is not None
213 assert result["size_bytes"] > 0
214
215 def test_truncated_object_mismatch(self, tmp_path: pathlib.Path) -> None:
216 from muse.cli.commands.verify_object import _verify_one
217 repo = _make_repo(tmp_path)
218 oid = _write_object(repo, b"original content that will be truncated")
219 _truncate_object(repo, oid, keep_bytes=4)
220 result = _verify_one(repo, oid)
221 assert result["ok"] is False
222 assert "mismatch" in (result["error"] or "")
223
224 def test_empty_truncated_object_mismatch(self, tmp_path: pathlib.Path) -> None:
225 from muse.cli.commands.verify_object import _verify_one
226 repo = _make_repo(tmp_path)
227 oid = _write_object(repo, b"will be emptied")
228 _truncate_object(repo, oid, keep_bytes=0)
229 result = _verify_one(repo, oid)
230 assert result["ok"] is False
231
232 def test_invalid_object_id_format(self, tmp_path: pathlib.Path) -> None:
233 from muse.cli.commands.verify_object import _verify_one
234 repo = _make_repo(tmp_path)
235 result = _verify_one(repo, "not-a-sha256")
236 assert result["ok"] is False
237 assert result["error"] is not None
238
239 def test_invalid_object_id_never_raises(self, tmp_path: pathlib.Path) -> None:
240 from muse.cli.commands.verify_object import _verify_one
241 repo = _make_repo(tmp_path)
242 result = _verify_one(repo, "\x00" * 64)
243 assert isinstance(result, dict)
244 assert result["ok"] is False
245
246 def test_io_error_returns_error_dict(self, tmp_path: pathlib.Path) -> None:
247 """OSError during read returns an error result, never raises."""
248 from muse.cli.commands.verify_object import _verify_one
249 repo = _make_repo(tmp_path)
250 oid = _write_object(repo, b"to be made unreadable")
251 obj_file = object_path(repo, oid)
252 obj_file.chmod(0o000)
253 try:
254 result = _verify_one(repo, oid)
255 assert result["ok"] is False
256 assert result["error"] is not None
257 assert "I/O error" in (result["error"] or "")
258 finally:
259 obj_file.chmod(0o644)
260
261
262 class TestObjectResultSchema:
263 def test_fields(self) -> None:
264 from muse.cli.commands.verify_object import _ObjectResult
265 assert set(_ObjectResult.__annotations__) == {"object_id", "ok", "size_bytes", "error"}
266
267
268 class TestChunkConstant:
269 def test_chunk_is_power_of_two(self) -> None:
270 from muse.cli.commands.verify_object import _CHUNK
271 assert _CHUNK > 0
272 assert (_CHUNK & (_CHUNK - 1)) == 0
273
274
275 # ---------------------------------------------------------------------------
276 # Integration — JSON output
277 # ---------------------------------------------------------------------------
278
279
280 class TestJsonOutput:
281 def test_valid_object_all_ok(self, tmp_path: pathlib.Path) -> None:
282 repo = _make_repo(tmp_path)
283 oid = _write_object(repo, _FAKE_CONTENT)
284 result = _vo(repo, "--json", oid)
285 assert result.exit_code == 0
286 data = json.loads(result.output)
287 assert data["all_ok"] is True
288 assert data["checked"] == 1
289 assert data["failed"] == 0
290 assert data["results"][0]["ok"] is True
291 assert data["results"][0]["size_bytes"] == len(_FAKE_CONTENT)
292
293 def test_missing_object_fails(self, tmp_path: pathlib.Path) -> None:
294 repo = _make_repo(tmp_path)
295 result = _vo(repo, "--json", blob_id(b"nonexistent object"))
296 assert result.exit_code == ExitCode.USER_ERROR
297 data = json.loads(result.output)
298 assert data["all_ok"] is False
299 assert data["failed"] == 1
300
301 def test_corrupt_object_fails(self, tmp_path: pathlib.Path) -> None:
302 repo = _make_repo(tmp_path)
303 oid = _write_object(repo, b"good content")
304 _corrupt_object(repo, oid)
305 result = _vo(repo, "--json", oid)
306 assert result.exit_code == ExitCode.USER_ERROR
307 data = json.loads(result.output)
308 assert data["results"][0]["ok"] is False
309 assert "mismatch" in data["results"][0]["error"]
310
311 def test_mixed_pass_fail(self, tmp_path: pathlib.Path) -> None:
312 repo = _make_repo(tmp_path)
313 good = _write_object(repo, b"good")
314 bad = blob_id(b"nonexistent object b")
315 result = _vo(repo, "--json", good, bad)
316 assert result.exit_code == ExitCode.USER_ERROR
317 data = json.loads(result.output)
318 assert data["checked"] == 2
319 assert data["failed"] == 1
320
321 def test_json_shorthand(self, tmp_path: pathlib.Path) -> None:
322 repo = _make_repo(tmp_path)
323 oid = _write_object(repo, b"data")
324 result = _vo(repo, "--json", oid)
325 assert result.exit_code == 0
326 assert "all_ok" in json.loads(result.output)
327
328 def test_duration_ms_and_exit_code_present(self, tmp_path: pathlib.Path) -> None:
329 repo = _make_repo(tmp_path)
330 oid = _write_object(repo, _FAKE_CONTENT)
331 data = json.loads(_vo(repo, "--json", oid).output)
332 assert "duration_ms" in data
333 assert isinstance(data["duration_ms"], float)
334 assert data["duration_ms"] >= 0.0
335 assert data["exit_code"] == 0
336
337 def test_exit_code_nonzero_on_failure(self, tmp_path: pathlib.Path) -> None:
338 repo = _make_repo(tmp_path)
339 data = json.loads(_vo(repo, "--json", blob_id(b"nonexistent object")).output)
340 assert data["exit_code"] != 0
341 assert data["duration_ms"] >= 0.0
342
343 def test_results_order_matches_input(self, tmp_path: pathlib.Path) -> None:
344 """Results must appear in the same order as the positional arguments."""
345 repo = _make_repo(tmp_path)
346 oids = [_write_object(repo, f"ordered {i}".encode()) for i in range(5)]
347 data = json.loads(_vo(repo, "--json", *oids).output)
348 returned = [r["object_id"] for r in data["results"]]
349 assert returned == oids
350
351 def test_checked_equals_len_results(self, tmp_path: pathlib.Path) -> None:
352 repo = _make_repo(tmp_path)
353 oids = [_write_object(repo, f"cnt {i}".encode()) for i in range(3)]
354 data = json.loads(_vo(repo, "--json", *oids).output)
355 assert data["checked"] == len(data["results"])
356
357 def test_failed_count_matches_failed_results(self, tmp_path: pathlib.Path) -> None:
358 repo = _make_repo(tmp_path)
359 good = _write_object(repo, b"ok")
360 bad1 = blob_id(b"missing a")
361 bad2 = blob_id(b"missing b")
362 data = json.loads(_vo(repo, "--json", good, bad1, bad2).output)
363 assert data["failed"] == sum(1 for r in data["results"] if not r["ok"])
364 assert data["failed"] == 2
365
366 def test_error_null_when_ok(self, tmp_path: pathlib.Path) -> None:
367 repo = _make_repo(tmp_path)
368 oid = _write_object(repo, b"clean object")
369 data = json.loads(_vo(repo, "--json", oid).output)
370 assert data["results"][0]["error"] is None
371
372 def test_duplicate_id_verified_twice(self, tmp_path: pathlib.Path) -> None:
373 """Passing the same OID twice verifies it twice — no implicit dedup."""
374 repo = _make_repo(tmp_path)
375 oid = _write_object(repo, b"dedup test")
376 data = json.loads(_vo(repo, "--json", oid, oid).output)
377 assert data["checked"] == 2
378 assert data["all_ok"] is True
379
380
381 # ---------------------------------------------------------------------------
382 # Integration — text output
383 # ---------------------------------------------------------------------------
384
385
386 class TestTextOutput:
387 def test_ok_label_and_size(self, tmp_path: pathlib.Path) -> None:
388 repo = _make_repo(tmp_path)
389 oid = _write_object(repo, _FAKE_CONTENT)
390 result = _vo(repo, oid)
391 assert result.exit_code == 0
392 assert "OK" in result.output
393 assert str(len(_FAKE_CONTENT)) in result.output
394
395 def test_fail_label_on_missing(self, tmp_path: pathlib.Path) -> None:
396 repo = _make_repo(tmp_path)
397 result = _vo(repo, blob_id(b"nonexistent object c"))
398 assert "FAIL" in result.output
399 assert result.exit_code == ExitCode.USER_ERROR
400
401 def test_summary_line_present(self, tmp_path: pathlib.Path) -> None:
402 """Text mode always ends with a Checked/Failed summary line."""
403 repo = _make_repo(tmp_path)
404 oid = _write_object(repo, b"summary test")
405 result = _vo(repo, oid)
406 assert "Checked:" in result.output
407 assert "Failed:" in result.output
408
409 def test_summary_reflects_counts(self, tmp_path: pathlib.Path) -> None:
410 repo = _make_repo(tmp_path)
411 good = _write_object(repo, b"good")
412 bad = blob_id(b"missing for summary")
413 result = _vo(repo, good, bad)
414 assert "Checked: 2" in result.output
415 assert "Failed: 1" in result.output
416
417 def test_summary_all_pass(self, tmp_path: pathlib.Path) -> None:
418 repo = _make_repo(tmp_path)
419 for i in range(3):
420 _write_object(repo, f"text pass {i}".encode())
421 result = _vo(repo, "--all")
422 assert "Checked: 3" in result.output
423 assert "Failed: 0" in result.output
424
425
426 # ---------------------------------------------------------------------------
427 # Integration — --quiet mode
428 # ---------------------------------------------------------------------------
429
430
431 class TestQuietMode:
432 def test_all_ok_exits_0(self, tmp_path: pathlib.Path) -> None:
433 repo = _make_repo(tmp_path)
434 oid = _write_object(repo, _FAKE_CONTENT)
435 result = _vo(repo, "--quiet", oid)
436 assert result.exit_code == 0
437 assert result.output.strip() == ""
438
439 def test_failure_exits_1(self, tmp_path: pathlib.Path) -> None:
440 repo = _make_repo(tmp_path)
441 result = _vo(repo, "--quiet", blob_id(b"nonexistent object d"))
442 assert result.exit_code == ExitCode.USER_ERROR
443 assert result.output.strip() == ""
444
445 def test_quiet_with_text_format_no_output(self, tmp_path: pathlib.Path) -> None:
446 """--quiet suppresses output regardless of --format."""
447 repo = _make_repo(tmp_path)
448 oid = _write_object(repo, b"quiet text")
449 result = _vo(repo, "--quiet", oid)
450 assert result.output.strip() == ""
451
452
453 # ---------------------------------------------------------------------------
454 # Integration — --all (fsck mode)
455 # ---------------------------------------------------------------------------
456
457
458 class TestAllMode:
459 def test_empty_store_all_ok(self, tmp_path: pathlib.Path) -> None:
460 repo = _make_repo(tmp_path)
461 data = json.loads(_vo(repo, "--all", "--json").output)
462 assert data["all_ok"] is True
463 assert data["checked"] == 0
464
465 def test_all_finds_written_objects(self, tmp_path: pathlib.Path) -> None:
466 repo = _make_repo(tmp_path)
467 for i in range(5):
468 _write_object(repo, f"content {i}".encode())
469 data = json.loads(_vo(repo, "--all", "--json").output)
470 assert data["checked"] == 5
471 assert data["all_ok"] is True
472
473 def test_all_detects_corruption(self, tmp_path: pathlib.Path) -> None:
474 repo = _make_repo(tmp_path)
475 oid = _write_object(repo, b"good data")
476 _corrupt_object(repo, oid)
477 data = json.loads(_vo(repo, "--all", "--json").output)
478 assert data["failed"] == 1
479
480 def test_all_plus_explicit_ids_rejected(self, tmp_path: pathlib.Path) -> None:
481 repo = _make_repo(tmp_path)
482 result = _vo(repo, "--all", blob_id(b"explicit id arg"))
483 assert result.exit_code == ExitCode.USER_ERROR
484 assert result.stdout_bytes == b""
485
486 def test_all_plus_stdin_rejected(self, tmp_path: pathlib.Path) -> None:
487 """--all + --stdin is rejected for consistency with --all + positional."""
488 repo = _make_repo(tmp_path)
489 oid = _write_object(repo, b"stdin data")
490 result = _vo(repo, "--all", "--stdin", stdin=f"{oid}\n")
491 assert result.exit_code == ExitCode.USER_ERROR
492 assert result.stdout_bytes == b""
493
494 def test_all_quiet(self, tmp_path: pathlib.Path) -> None:
495 repo = _make_repo(tmp_path)
496 _write_object(repo, b"content")
497 result = _vo(repo, "--all", "--quiet")
498 assert result.exit_code == 0
499 assert result.output.strip() == ""
500
501
502 # ---------------------------------------------------------------------------
503 # Integration — --stdin
504 # ---------------------------------------------------------------------------
505
506
507 class TestStdinMode:
508 def test_reads_ids_from_stdin(self, tmp_path: pathlib.Path) -> None:
509 repo = _make_repo(tmp_path)
510 oid = _write_object(repo, _FAKE_CONTENT)
511 data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"{oid}\n").output)
512 assert data["checked"] == 1
513 assert data["all_ok"] is True
514
515 def test_comments_and_blank_lines_skipped(self, tmp_path: pathlib.Path) -> None:
516 repo = _make_repo(tmp_path)
517 oid = _write_object(repo, _FAKE_CONTENT)
518 data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"\n# comment\n{oid}\n\n").output)
519 assert data["checked"] == 1
520
521 def test_stdin_combines_with_positional(self, tmp_path: pathlib.Path) -> None:
522 repo = _make_repo(tmp_path)
523 oid1 = _write_object(repo, b"one")
524 oid2 = _write_object(repo, b"two")
525 data = json.loads(_vo(repo, "--stdin", "--json", oid1, stdin=f"{oid2}\n").output)
526 assert data["checked"] == 2
527
528 def test_empty_stdin_no_explicit_errors(self, tmp_path: pathlib.Path) -> None:
529 repo = _make_repo(tmp_path)
530 result = _vo(repo, "--stdin", "--json", stdin="")
531 assert result.exit_code == ExitCode.USER_ERROR
532
533 def test_crlf_line_endings_stripped(self, tmp_path: pathlib.Path) -> None:
534 """Windows CRLF line endings must not corrupt the object ID."""
535 repo = _make_repo(tmp_path)
536 oid = _write_object(repo, b"crlf test")
537 data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"{oid}\r\n").output)
538 assert data["all_ok"] is True
539 assert data["results"][0]["object_id"] == oid
540
541
542 # ---------------------------------------------------------------------------
543 # Integration — --fail-fast
544 # ---------------------------------------------------------------------------
545
546
547 class TestFailFast:
548 def test_stops_after_first_failure(self, tmp_path: pathlib.Path) -> None:
549 """With --fail-fast, only the first failing result appears in output."""
550 repo = _make_repo(tmp_path)
551 bad1 = blob_id(b"missing ff a")
552 bad2 = blob_id(b"missing ff b")
553 good = _write_object(repo, b"good after bad")
554 # bad1, bad2, good — should stop after bad1
555 data = json.loads(_vo(repo, "--fail-fast", "--json", bad1, bad2, good).output)
556 assert data["checked"] == 1
557 assert data["failed"] == 1
558 assert data["all_ok"] is False
559
560 def test_no_effect_when_all_pass(self, tmp_path: pathlib.Path) -> None:
561 """--fail-fast is a no-op when every object passes."""
562 repo = _make_repo(tmp_path)
563 oids = [_write_object(repo, f"ff pass {i}".encode()) for i in range(5)]
564 data = json.loads(_vo(repo, "--fail-fast", "--json", *oids).output)
565 assert data["checked"] == 5
566 assert data["all_ok"] is True
567
568 def test_fail_fast_exits_nonzero(self, tmp_path: pathlib.Path) -> None:
569 repo = _make_repo(tmp_path)
570 result = _vo(repo, "--fail-fast", "--json", blob_id(b"missing ff c"))
571 assert result.exit_code == ExitCode.USER_ERROR
572
573 def test_fail_fast_with_all(self, tmp_path: pathlib.Path) -> None:
574 """--fail-fast + --all stops the scan on the first corrupt object."""
575 repo = _make_repo(tmp_path)
576 for i in range(10):
577 _write_object(repo, f"store {i}".encode())
578 # Corrupt one object somewhere in the store.
579 from muse.cli.commands.verify_object import _iter_all_object_ids
580 all_ids = _iter_all_object_ids(repo)
581 _corrupt_object(repo, all_ids[0])
582 data = json.loads(_vo(repo, "--all", "--fail-fast", "--json").output)
583 # Should have stopped early — checked < 10.
584 assert data["checked"] < len(all_ids)
585 assert data["failed"] == 1
586
587 def test_fail_fast_duration_ms_present(self, tmp_path: pathlib.Path) -> None:
588 repo = _make_repo(tmp_path)
589 data = json.loads(_vo(repo, "--fail-fast", "--json", blob_id(b"missing ff d")).output)
590 assert "duration_ms" in data
591 assert data["duration_ms"] >= 0.0
592
593
594 # ---------------------------------------------------------------------------
595 # Security
596 # ---------------------------------------------------------------------------
597
598
599 class TestSecurity:
600 def test_format_error_goes_to_stderr(self, tmp_path: pathlib.Path) -> None:
601 repo = _make_repo(tmp_path)
602 result = _vo(repo, fake_id("a"))
603 assert result.exit_code == ExitCode.USER_ERROR
604 assert "Traceback" not in result.output
605
606 def test_no_traceback_on_bad_format(self, tmp_path: pathlib.Path) -> None:
607 repo = _make_repo(tmp_path)
608 result = _vo(repo, fake_id("b"))
609 assert "Traceback" not in result.output
610
611 def test_ansi_in_error_message_stripped_text(self, tmp_path: pathlib.Path) -> None:
612 repo = _make_repo(tmp_path)
613 result = _vo(repo, blob_id(b"nonexistent"))
614 assert "\x1b" not in result.output
615
616 def test_invalid_id_returns_error_not_crash(self, tmp_path: pathlib.Path) -> None:
617 repo = _make_repo(tmp_path)
618 result = _vo(repo, "not-a-sha256")
619 assert result.exit_code == ExitCode.USER_ERROR
620 assert "Traceback" not in result.output
621
622 def test_no_ids_errors_to_stderr(self, tmp_path: pathlib.Path) -> None:
623 repo = _make_repo(tmp_path)
624 result = _vo(repo)
625 assert result.exit_code == ExitCode.USER_ERROR
626 assert "error" in result.stderr.lower()
627
628 def test_path_traversal_in_object_id_rejected(self, tmp_path: pathlib.Path) -> None:
629 """Path-traversal-looking IDs must be rejected by validation before any I/O."""
630 repo = _make_repo(tmp_path)
631 traversal = f"sha256:../../etc/passwd{'a' * 50}"
632 result = _vo(repo, "--json", traversal)
633 # Validation must reject it — never attempts to open a path.
634 assert result.exit_code == ExitCode.USER_ERROR
635 data = json.loads(result.output)
636 # The error message explains the format violation, not an fs operation.
637 assert data["results"][0]["ok"] is False
638 assert "expected" in data["results"][0]["error"]
639
640 def test_unicode_in_object_id_rejected(self, tmp_path: pathlib.Path) -> None:
641 repo = _make_repo(tmp_path)
642 result = _vo(repo, f"sha256:café{'a' * 60}")
643 assert result.exit_code == ExitCode.USER_ERROR
644
645 def test_symlink_shard_directory_skipped(self, tmp_path: pathlib.Path) -> None:
646 """A symlinked shard directory must not be followed during --all."""
647 from muse.cli.commands.verify_object import _iter_all_object_ids
648 from muse.core.object_store import objects_dir
649 repo = _make_repo(tmp_path)
650 # Write a real object so the algo dir exists.
651 _write_object(repo, b"real")
652 algo_dir = objects_dir(repo) / "sha256"
653 # Add a symlink that points outside the repo.
654 sym_shard = algo_dir / "ff"
655 sym_shard.symlink_to(tmp_path)
656 ids = _iter_all_object_ids(repo)
657 # The symlinked shard's entries must not appear.
658 assert all(oid.startswith("sha256:") for oid in ids)
659
660 def test_crlf_injection_in_stdin_does_not_corrupt_id(self, tmp_path: pathlib.Path) -> None:
661 """A \r embedded in a stdin line must not be part of the stored OID."""
662 repo = _make_repo(tmp_path)
663 oid = _write_object(repo, b"crlf injection")
664 # Feed oid with embedded \r before the newline.
665 data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"{oid}\r\n").output)
666 assert data["all_ok"] is True
667
668 def test_all_error_goes_to_stderr_not_stdout(self, tmp_path: pathlib.Path) -> None:
669 """Argument errors for --all always land on stderr, stdout stays empty."""
670 repo = _make_repo(tmp_path)
671 result = _vo(repo, "--all", "--stdin", stdin="")
672 assert result.stdout_bytes == b""
673 assert len(result.stderr) > 0
674
675
676 # ---------------------------------------------------------------------------
677 # Data integrity
678 # ---------------------------------------------------------------------------
679
680
681 class TestDataIntegrity:
682 def test_zero_byte_blob_round_trips(self, tmp_path: pathlib.Path) -> None:
683 """A zero-byte object has a well-defined SHA-256 and must verify clean."""
684 repo = _make_repo(tmp_path)
685 oid = _write_object(repo, b"")
686 data = json.loads(_vo(repo, "--json", oid).output)
687 assert data["all_ok"] is True
688 assert data["results"][0]["size_bytes"] == 0
689
690 def test_truncated_file_is_hash_mismatch(self, tmp_path: pathlib.Path) -> None:
691 repo = _make_repo(tmp_path)
692 oid = _write_object(repo, b"file that will be truncated")
693 _truncate_object(repo, oid, keep_bytes=3)
694 data = json.loads(_vo(repo, "--json", oid).output)
695 assert data["results"][0]["ok"] is False
696 assert "mismatch" in data["results"][0]["error"]
697
698 def test_completely_emptied_file_is_hash_mismatch(self, tmp_path: pathlib.Path) -> None:
699 repo = _make_repo(tmp_path)
700 oid = _write_object(repo, b"non-empty content")
701 _truncate_object(repo, oid, keep_bytes=0)
702 data = json.loads(_vo(repo, "--json", oid).output)
703 assert data["results"][0]["ok"] is False
704
705 def test_large_object_streams_without_loading_all(self, tmp_path: pathlib.Path) -> None:
706 """A 4 MiB object must verify correctly via streaming (no heap spike)."""
707 repo = _make_repo(tmp_path)
708 content = b"a" * (4 * 1024 * 1024)
709 oid = _write_object(repo, content)
710 data = json.loads(_vo(repo, "--json", oid).output)
711 assert data["all_ok"] is True
712 assert data["results"][0]["size_bytes"] == len(content)
713
714 def test_multiple_corrupt_objects_all_reported(self, tmp_path: pathlib.Path) -> None:
715 """All corruptions are reported — not just the first one."""
716 repo = _make_repo(tmp_path)
717 oids = [_write_object(repo, f"corrupt me {i}".encode()) for i in range(3)]
718 for oid in oids:
719 _corrupt_object(repo, oid)
720 data = json.loads(_vo(repo, "--json", *oids).output)
721 assert data["failed"] == 3
722 assert data["all_ok"] is False
723
724
725 # ---------------------------------------------------------------------------
726 # Stress
727 # ---------------------------------------------------------------------------
728
729
730 class TestStress:
731 def test_100_object_store_all_pass(self, tmp_path: pathlib.Path) -> None:
732 repo = _make_repo(tmp_path)
733 for i in range(100):
734 _write_object(repo, f"stress content {i}".encode())
735 data = json.loads(_vo(repo, "--all", "--json").output)
736 assert data["checked"] == 100
737 assert data["all_ok"] is True
738
739 def test_1000_object_store_all_pass(self, tmp_path: pathlib.Path) -> None:
740 repo = _make_repo(tmp_path)
741 for i in range(1000):
742 _write_object(repo, f"large stress {i}".encode())
743 data = json.loads(_vo(repo, "--all", "--json").output)
744 assert data["checked"] == 1000
745 assert data["all_ok"] is True
746
747 def test_200_sequential_verifies(self, tmp_path: pathlib.Path) -> None:
748 repo = _make_repo(tmp_path)
749 oid = _write_object(repo, _FAKE_CONTENT)
750 for i in range(200):
751 result = _vo(repo, oid)
752 assert result.exit_code == 0, f"failed at iteration {i}"
753
754 def test_stdin_200_ids(self, tmp_path: pathlib.Path) -> None:
755 repo = _make_repo(tmp_path)
756 oids = [_write_object(repo, f"content_{i}".encode()) for i in range(200)]
757 data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"{'\n'.join(oids)}\n").output)
758 assert data["checked"] == 200
759 assert data["all_ok"] is True
760
761 def test_duration_ms_bounded_for_small_op(self, tmp_path: pathlib.Path) -> None:
762 """Verifying one small object should complete in well under 5 seconds."""
763 repo = _make_repo(tmp_path)
764 oid = _write_object(repo, b"small")
765 data = json.loads(_vo(repo, "--json", oid).output)
766 assert data["duration_ms"] < 5_000
767
768
769 # ---------------------------------------------------------------------------
770 # Flag registration
771 # ---------------------------------------------------------------------------
772
773
774 class TestRegisterFlags:
775 def _parse(self, *args: str) -> "argparse.Namespace":
776 import argparse
777 from muse.cli.commands.verify_object import register
778 p = argparse.ArgumentParser()
779 sub = p.add_subparsers()
780 register(sub)
781 return p.parse_args(["verify-object", *args])
782
783 def test_default_json_out_is_false(self) -> None:
784 ns = self._parse(fake_id("a"))
785 assert ns.json_out is False
786
787 def test_json_flag_sets_json_out(self) -> None:
788 ns = self._parse("--json", fake_id("a"))
789 assert ns.json_out is True
790
791 def test_j_shorthand_sets_json_out(self) -> None:
792 ns = self._parse("-j", fake_id("a"))
793 assert ns.json_out is True
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago