tests/test_cmd_verify_object.py · gabriel/muse

test_cmd_verify_object.py python

793 lines 33.0 KB

sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago

1	"""Comprehensive tests for ``muse verify-object``.
2
3	Coverage tiers
4	--------------
5	- Unit: _iter_all_object_ids, _verify_one (all paths), schema, constants
6	- Integration: JSON/text/quiet, --all, --stdin, --fail-fast, ordering, counts
7	- Data integrity: truncated file, zero-byte blob, large-object streaming
8	- Security: stderr routing, ANSI stripping, path traversal, unicode, CRLF,
9	symlink shard directory
10	- Stress: 100-object --all, 1000-object --all, 200 sequential verifies,
11	stdin 200 ids, duration bounded for small ops
12	"""
13	from __future__ import annotations
14
15	import json
16	import os
17	import pathlib
18
19	import pytest
20
21	from muse.core.types import blob_id, fake_id
22	from muse.core.errors import ExitCode
23	from muse.core.object_store import object_path, write_object
24	from muse.core.paths import muse_dir, objects_dir
25	from tests.cli_test_helper import CliRunner, InvokeResult
26
27	runner = CliRunner()
28
29	# ---------------------------------------------------------------------------
30	# Helpers
31	# ---------------------------------------------------------------------------
32
33	_FAKE_CONTENT = b"hello muse"
34	_GOOD_OID = blob_id(_FAKE_CONTENT)
35
36
37	def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
38	repo = tmp_path / "repo"
39	dot_muse = muse_dir(repo)
40	(dot_muse / "objects").mkdir(parents=True)
41	(dot_muse / "commits").mkdir(parents=True)
42	(dot_muse / "snapshots").mkdir(parents=True)
43	(dot_muse / "refs" / "heads").mkdir(parents=True)
44	(dot_muse / "HEAD").write_text("ref: refs/heads/main")
45	(dot_muse / "repo.json").write_text(json.dumps({"repo_id": "r1", "domain": "code"}))
46	return repo
47
48
49	def _write_object(repo: pathlib.Path, content: bytes) -> str:
50	"""Write real content into the store and return its sha256:-prefixed ID."""
51	oid = blob_id(content)
52	write_object(repo, oid, content)
53	return oid
54
55
56	def _corrupt_object(repo: pathlib.Path, oid: str) -> None:
57	"""Overwrite the object file with garbage (simulates bit-rot).
58
59	The object store writes files as 0o444 (read-only) to enforce immutability.
60	We must make the file writable before overwriting it in tests.
61	"""
62	obj_file = object_path(repo, oid)
63	os.chmod(obj_file, 0o644)
64	obj_file.write_bytes(b"corrupted data that does not hash to the oid")
65
66
67	def _truncate_object(repo: pathlib.Path, oid: str, keep_bytes: int = 0) -> None:
68	"""Truncate the object file to ``keep_bytes`` bytes."""
69	obj_file = object_path(repo, oid)
70	os.chmod(obj_file, 0o644)
71	data = obj_file.read_bytes()
72	obj_file.write_bytes(data[:keep_bytes])
73
74
75	def _vo(repo: pathlib.Path, *args: str, stdin: str \| None = None) -> InvokeResult:
76	from muse.cli.app import main as cli
77	return runner.invoke(
78	cli,
79	["verify-object", *args],
80	env={"MUSE_REPO_ROOT": str(repo)},
81	input=stdin,
82	)
83
84
85	# ---------------------------------------------------------------------------
86	# Unit — _iter_all_object_ids
87	# ---------------------------------------------------------------------------
88
89
90	class TestIterAllObjectIds:
91	def test_empty_store(self, tmp_path: pathlib.Path) -> None:
92	from muse.cli.commands.verify_object import _iter_all_object_ids
93	repo = _make_repo(tmp_path)
94	assert _iter_all_object_ids(repo) == []
95
96	def test_missing_objects_dir(self, tmp_path: pathlib.Path) -> None:
97	from muse.cli.commands.verify_object import _iter_all_object_ids
98	import shutil
99	repo = _make_repo(tmp_path)
100	shutil.rmtree(objects_dir(repo))
101	assert _iter_all_object_ids(repo) == []
102
103	def test_finds_written_object(self, tmp_path: pathlib.Path) -> None:
104	from muse.cli.commands.verify_object import _iter_all_object_ids
105	repo = _make_repo(tmp_path)
106	oid = _write_object(repo, b"test content")
107	assert oid in _iter_all_object_ids(repo)
108
109	def test_multiple_objects_sorted(self, tmp_path: pathlib.Path) -> None:
110	from muse.cli.commands.verify_object import _iter_all_object_ids
111	repo = _make_repo(tmp_path)
112	oids = [_write_object(repo, f"content {i}".encode()) for i in range(5)]
113	found = _iter_all_object_ids(repo)
114	assert set(oids) == set(found)
115	assert found == sorted(found)
116
117	def test_symlinks_in_shard_skipped(self, tmp_path: pathlib.Path) -> None:
118	from muse.cli.commands.verify_object import _iter_all_object_ids
119	repo = _make_repo(tmp_path)
120	oid = _write_object(repo, b"real content")
121	shard = object_path(repo, oid).parent
122	sym = shard / "symlink_file"
123	sym.symlink_to(object_path(repo, oid))
124	ids = _iter_all_object_ids(repo)
125	assert ids.count(oid) == 1
126
127	def test_short_shard_dir_names_ignored(self, tmp_path: pathlib.Path) -> None:
128	from muse.cli.commands.verify_object import _iter_all_object_ids
129	from muse.core.object_store import objects_dir
130	repo = _make_repo(tmp_path)
131	(objects_dir(repo) / "sha256" / "abc").mkdir(parents=True, exist_ok=True)
132	assert _iter_all_object_ids(repo) == []
133
134	def test_returns_sha256_prefixed_ids(self, tmp_path: pathlib.Path) -> None:
135	from muse.cli.commands.verify_object import _iter_all_object_ids
136	repo = _make_repo(tmp_path)
137	_write_object(repo, b"prefix check")
138	ids = _iter_all_object_ids(repo)
139	assert all(oid.startswith("sha256:") for oid in ids)
140
141
142	# ---------------------------------------------------------------------------
143	# Unit — _verify_one
144	# ---------------------------------------------------------------------------
145
146
147	class TestVerifyOne:
148	def test_valid_object_ok(self, tmp_path: pathlib.Path) -> None:
149	from muse.cli.commands.verify_object import _verify_one
150	repo = _make_repo(tmp_path)
151	oid = _write_object(repo, b"hello world")
152	result = _verify_one(repo, oid)
153	assert result["ok"] is True
154	assert result["size_bytes"] == len(b"hello world")
155	assert result["error"] is None
156
157	def test_ok_result_preserves_object_id(self, tmp_path: pathlib.Path) -> None:
158	from muse.cli.commands.verify_object import _verify_one
159	repo = _make_repo(tmp_path)
160	oid = _write_object(repo, b"id check")
161	result = _verify_one(repo, oid)
162	assert result["object_id"] == oid
163
164	def test_error_is_none_when_ok(self, tmp_path: pathlib.Path) -> None:
165	from muse.cli.commands.verify_object import _verify_one
166	repo = _make_repo(tmp_path)
167	oid = _write_object(repo, b"clean")
168	result = _verify_one(repo, oid)
169	assert result["ok"] is True
170	assert result["error"] is None
171
172	def test_size_counted_during_hash(self, tmp_path: pathlib.Path) -> None:
173	from muse.cli.commands.verify_object import _verify_one
174	repo = _make_repo(tmp_path)
175	content = b"x" * 12345
176	oid = _write_object(repo, content)
177	result = _verify_one(repo, oid)
178	assert result["size_bytes"] == 12345
179
180	def test_zero_byte_object_ok(self, tmp_path: pathlib.Path) -> None:
181	from muse.cli.commands.verify_object import _verify_one
182	repo = _make_repo(tmp_path)
183	oid = _write_object(repo, b"")
184	result = _verify_one(repo, oid)
185	assert result["ok"] is True
186	assert result["size_bytes"] == 0
187
188	def test_missing_object_not_ok(self, tmp_path: pathlib.Path) -> None:
189	from muse.cli.commands.verify_object import _verify_one
190	repo = _make_repo(tmp_path)
191	result = _verify_one(repo, blob_id(b"nonexistent object"))
192	assert result["ok"] is False
193	assert "not found" in (result["error"] or "")
194	assert result["size_bytes"] is None
195
196	def test_corrupt_object_mismatch(self, tmp_path: pathlib.Path) -> None:
197	from muse.cli.commands.verify_object import _verify_one
198	repo = _make_repo(tmp_path)
199	oid = _write_object(repo, b"original content")
200	_corrupt_object(repo, oid)
201	result = _verify_one(repo, oid)
202	assert result["ok"] is False
203	assert "mismatch" in (result["error"] or "")
204
205	def test_corrupt_object_has_size_bytes(self, tmp_path: pathlib.Path) -> None:
206	"""Even on hash mismatch, size_bytes is populated (bytes were read)."""
207	from muse.cli.commands.verify_object import _verify_one
208	repo = _make_repo(tmp_path)
209	oid = _write_object(repo, b"original content")
210	_corrupt_object(repo, oid)
211	result = _verify_one(repo, oid)
212	assert result["size_bytes"] is not None
213	assert result["size_bytes"] > 0
214
215	def test_truncated_object_mismatch(self, tmp_path: pathlib.Path) -> None:
216	from muse.cli.commands.verify_object import _verify_one
217	repo = _make_repo(tmp_path)
218	oid = _write_object(repo, b"original content that will be truncated")
219	_truncate_object(repo, oid, keep_bytes=4)
220	result = _verify_one(repo, oid)
221	assert result["ok"] is False
222	assert "mismatch" in (result["error"] or "")
223
224	def test_empty_truncated_object_mismatch(self, tmp_path: pathlib.Path) -> None:
225	from muse.cli.commands.verify_object import _verify_one
226	repo = _make_repo(tmp_path)
227	oid = _write_object(repo, b"will be emptied")
228	_truncate_object(repo, oid, keep_bytes=0)
229	result = _verify_one(repo, oid)
230	assert result["ok"] is False
231
232	def test_invalid_object_id_format(self, tmp_path: pathlib.Path) -> None:
233	from muse.cli.commands.verify_object import _verify_one
234	repo = _make_repo(tmp_path)
235	result = _verify_one(repo, "not-a-sha256")
236	assert result["ok"] is False
237	assert result["error"] is not None
238
239	def test_invalid_object_id_never_raises(self, tmp_path: pathlib.Path) -> None:
240	from muse.cli.commands.verify_object import _verify_one
241	repo = _make_repo(tmp_path)
242	result = _verify_one(repo, "\x00" * 64)
243	assert isinstance(result, dict)
244	assert result["ok"] is False
245
246	def test_io_error_returns_error_dict(self, tmp_path: pathlib.Path) -> None:
247	"""OSError during read returns an error result, never raises."""
248	from muse.cli.commands.verify_object import _verify_one
249	repo = _make_repo(tmp_path)
250	oid = _write_object(repo, b"to be made unreadable")
251	obj_file = object_path(repo, oid)
252	obj_file.chmod(0o000)
253	try:
254	result = _verify_one(repo, oid)
255	assert result["ok"] is False
256	assert result["error"] is not None
257	assert "I/O error" in (result["error"] or "")
258	finally:
259	obj_file.chmod(0o644)
260
261
262	class TestObjectResultSchema:
263	def test_fields(self) -> None:
264	from muse.cli.commands.verify_object import _ObjectResult
265	assert set(_ObjectResult.__annotations__) == {"object_id", "ok", "size_bytes", "error"}
266
267
268	class TestChunkConstant:
269	def test_chunk_is_power_of_two(self) -> None:
270	from muse.cli.commands.verify_object import _CHUNK
271	assert _CHUNK > 0
272	assert (_CHUNK & (_CHUNK - 1)) == 0
273
274
275	# ---------------------------------------------------------------------------
276	# Integration — JSON output
277	# ---------------------------------------------------------------------------
278
279
280	class TestJsonOutput:
281	def test_valid_object_all_ok(self, tmp_path: pathlib.Path) -> None:
282	repo = _make_repo(tmp_path)
283	oid = _write_object(repo, _FAKE_CONTENT)
284	result = _vo(repo, "--json", oid)
285	assert result.exit_code == 0
286	data = json.loads(result.output)
287	assert data["all_ok"] is True
288	assert data["checked"] == 1
289	assert data["failed"] == 0
290	assert data["results"][0]["ok"] is True
291	assert data["results"][0]["size_bytes"] == len(_FAKE_CONTENT)
292
293	def test_missing_object_fails(self, tmp_path: pathlib.Path) -> None:
294	repo = _make_repo(tmp_path)
295	result = _vo(repo, "--json", blob_id(b"nonexistent object"))
296	assert result.exit_code == ExitCode.USER_ERROR
297	data = json.loads(result.output)
298	assert data["all_ok"] is False
299	assert data["failed"] == 1
300
301	def test_corrupt_object_fails(self, tmp_path: pathlib.Path) -> None:
302	repo = _make_repo(tmp_path)
303	oid = _write_object(repo, b"good content")
304	_corrupt_object(repo, oid)
305	result = _vo(repo, "--json", oid)
306	assert result.exit_code == ExitCode.USER_ERROR
307	data = json.loads(result.output)
308	assert data["results"][0]["ok"] is False
309	assert "mismatch" in data["results"][0]["error"]
310
311	def test_mixed_pass_fail(self, tmp_path: pathlib.Path) -> None:
312	repo = _make_repo(tmp_path)
313	good = _write_object(repo, b"good")
314	bad = blob_id(b"nonexistent object b")
315	result = _vo(repo, "--json", good, bad)
316	assert result.exit_code == ExitCode.USER_ERROR
317	data = json.loads(result.output)
318	assert data["checked"] == 2
319	assert data["failed"] == 1
320
321	def test_json_shorthand(self, tmp_path: pathlib.Path) -> None:
322	repo = _make_repo(tmp_path)
323	oid = _write_object(repo, b"data")
324	result = _vo(repo, "--json", oid)
325	assert result.exit_code == 0
326	assert "all_ok" in json.loads(result.output)
327
328	def test_duration_ms_and_exit_code_present(self, tmp_path: pathlib.Path) -> None:
329	repo = _make_repo(tmp_path)
330	oid = _write_object(repo, _FAKE_CONTENT)
331	data = json.loads(_vo(repo, "--json", oid).output)
332	assert "duration_ms" in data
333	assert isinstance(data["duration_ms"], float)
334	assert data["duration_ms"] >= 0.0
335	assert data["exit_code"] == 0
336
337	def test_exit_code_nonzero_on_failure(self, tmp_path: pathlib.Path) -> None:
338	repo = _make_repo(tmp_path)
339	data = json.loads(_vo(repo, "--json", blob_id(b"nonexistent object")).output)
340	assert data["exit_code"] != 0
341	assert data["duration_ms"] >= 0.0
342
343	def test_results_order_matches_input(self, tmp_path: pathlib.Path) -> None:
344	"""Results must appear in the same order as the positional arguments."""
345	repo = _make_repo(tmp_path)
346	oids = [_write_object(repo, f"ordered {i}".encode()) for i in range(5)]
347	data = json.loads(_vo(repo, "--json", *oids).output)
348	returned = [r["object_id"] for r in data["results"]]
349	assert returned == oids
350
351	def test_checked_equals_len_results(self, tmp_path: pathlib.Path) -> None:
352	repo = _make_repo(tmp_path)
353	oids = [_write_object(repo, f"cnt {i}".encode()) for i in range(3)]
354	data = json.loads(_vo(repo, "--json", *oids).output)
355	assert data["checked"] == len(data["results"])
356
357	def test_failed_count_matches_failed_results(self, tmp_path: pathlib.Path) -> None:
358	repo = _make_repo(tmp_path)
359	good = _write_object(repo, b"ok")
360	bad1 = blob_id(b"missing a")
361	bad2 = blob_id(b"missing b")
362	data = json.loads(_vo(repo, "--json", good, bad1, bad2).output)
363	assert data["failed"] == sum(1 for r in data["results"] if not r["ok"])
364	assert data["failed"] == 2
365
366	def test_error_null_when_ok(self, tmp_path: pathlib.Path) -> None:
367	repo = _make_repo(tmp_path)
368	oid = _write_object(repo, b"clean object")
369	data = json.loads(_vo(repo, "--json", oid).output)
370	assert data["results"][0]["error"] is None
371
372	def test_duplicate_id_verified_twice(self, tmp_path: pathlib.Path) -> None:
373	"""Passing the same OID twice verifies it twice — no implicit dedup."""
374	repo = _make_repo(tmp_path)
375	oid = _write_object(repo, b"dedup test")
376	data = json.loads(_vo(repo, "--json", oid, oid).output)
377	assert data["checked"] == 2
378	assert data["all_ok"] is True
379
380
381	# ---------------------------------------------------------------------------
382	# Integration — text output
383	# ---------------------------------------------------------------------------
384
385
386	class TestTextOutput:
387	def test_ok_label_and_size(self, tmp_path: pathlib.Path) -> None:
388	repo = _make_repo(tmp_path)
389	oid = _write_object(repo, _FAKE_CONTENT)
390	result = _vo(repo, oid)
391	assert result.exit_code == 0
392	assert "OK" in result.output
393	assert str(len(_FAKE_CONTENT)) in result.output
394
395	def test_fail_label_on_missing(self, tmp_path: pathlib.Path) -> None:
396	repo = _make_repo(tmp_path)
397	result = _vo(repo, blob_id(b"nonexistent object c"))
398	assert "FAIL" in result.output
399	assert result.exit_code == ExitCode.USER_ERROR
400
401	def test_summary_line_present(self, tmp_path: pathlib.Path) -> None:
402	"""Text mode always ends with a Checked/Failed summary line."""
403	repo = _make_repo(tmp_path)
404	oid = _write_object(repo, b"summary test")
405	result = _vo(repo, oid)
406	assert "Checked:" in result.output
407	assert "Failed:" in result.output
408
409	def test_summary_reflects_counts(self, tmp_path: pathlib.Path) -> None:
410	repo = _make_repo(tmp_path)
411	good = _write_object(repo, b"good")
412	bad = blob_id(b"missing for summary")
413	result = _vo(repo, good, bad)
414	assert "Checked: 2" in result.output
415	assert "Failed: 1" in result.output
416
417	def test_summary_all_pass(self, tmp_path: pathlib.Path) -> None:
418	repo = _make_repo(tmp_path)
419	for i in range(3):
420	_write_object(repo, f"text pass {i}".encode())
421	result = _vo(repo, "--all")
422	assert "Checked: 3" in result.output
423	assert "Failed: 0" in result.output
424
425
426	# ---------------------------------------------------------------------------
427	# Integration — --quiet mode
428	# ---------------------------------------------------------------------------
429
430
431	class TestQuietMode:
432	def test_all_ok_exits_0(self, tmp_path: pathlib.Path) -> None:
433	repo = _make_repo(tmp_path)
434	oid = _write_object(repo, _FAKE_CONTENT)
435	result = _vo(repo, "--quiet", oid)
436	assert result.exit_code == 0
437	assert result.output.strip() == ""
438
439	def test_failure_exits_1(self, tmp_path: pathlib.Path) -> None:
440	repo = _make_repo(tmp_path)
441	result = _vo(repo, "--quiet", blob_id(b"nonexistent object d"))
442	assert result.exit_code == ExitCode.USER_ERROR
443	assert result.output.strip() == ""
444
445	def test_quiet_with_text_format_no_output(self, tmp_path: pathlib.Path) -> None:
446	"""--quiet suppresses output regardless of --format."""
447	repo = _make_repo(tmp_path)
448	oid = _write_object(repo, b"quiet text")
449	result = _vo(repo, "--quiet", oid)
450	assert result.output.strip() == ""
451
452
453	# ---------------------------------------------------------------------------
454	# Integration — --all (fsck mode)
455	# ---------------------------------------------------------------------------
456
457
458	class TestAllMode:
459	def test_empty_store_all_ok(self, tmp_path: pathlib.Path) -> None:
460	repo = _make_repo(tmp_path)
461	data = json.loads(_vo(repo, "--all", "--json").output)
462	assert data["all_ok"] is True
463	assert data["checked"] == 0
464
465	def test_all_finds_written_objects(self, tmp_path: pathlib.Path) -> None:
466	repo = _make_repo(tmp_path)
467	for i in range(5):
468	_write_object(repo, f"content {i}".encode())
469	data = json.loads(_vo(repo, "--all", "--json").output)
470	assert data["checked"] == 5
471	assert data["all_ok"] is True
472
473	def test_all_detects_corruption(self, tmp_path: pathlib.Path) -> None:
474	repo = _make_repo(tmp_path)
475	oid = _write_object(repo, b"good data")
476	_corrupt_object(repo, oid)
477	data = json.loads(_vo(repo, "--all", "--json").output)
478	assert data["failed"] == 1
479
480	def test_all_plus_explicit_ids_rejected(self, tmp_path: pathlib.Path) -> None:
481	repo = _make_repo(tmp_path)
482	result = _vo(repo, "--all", blob_id(b"explicit id arg"))
483	assert result.exit_code == ExitCode.USER_ERROR
484	assert result.stdout_bytes == b""
485
486	def test_all_plus_stdin_rejected(self, tmp_path: pathlib.Path) -> None:
487	"""--all + --stdin is rejected for consistency with --all + positional."""
488	repo = _make_repo(tmp_path)
489	oid = _write_object(repo, b"stdin data")
490	result = _vo(repo, "--all", "--stdin", stdin=f"{oid}\n")
491	assert result.exit_code == ExitCode.USER_ERROR
492	assert result.stdout_bytes == b""
493
494	def test_all_quiet(self, tmp_path: pathlib.Path) -> None:
495	repo = _make_repo(tmp_path)
496	_write_object(repo, b"content")
497	result = _vo(repo, "--all", "--quiet")
498	assert result.exit_code == 0
499	assert result.output.strip() == ""
500
501
502	# ---------------------------------------------------------------------------
503	# Integration — --stdin
504	# ---------------------------------------------------------------------------
505
506
507	class TestStdinMode:
508	def test_reads_ids_from_stdin(self, tmp_path: pathlib.Path) -> None:
509	repo = _make_repo(tmp_path)
510	oid = _write_object(repo, _FAKE_CONTENT)
511	data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"{oid}\n").output)
512	assert data["checked"] == 1
513	assert data["all_ok"] is True
514
515	def test_comments_and_blank_lines_skipped(self, tmp_path: pathlib.Path) -> None:
516	repo = _make_repo(tmp_path)
517	oid = _write_object(repo, _FAKE_CONTENT)
518	data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"\n# comment\n{oid}\n\n").output)
519	assert data["checked"] == 1
520
521	def test_stdin_combines_with_positional(self, tmp_path: pathlib.Path) -> None:
522	repo = _make_repo(tmp_path)
523	oid1 = _write_object(repo, b"one")
524	oid2 = _write_object(repo, b"two")
525	data = json.loads(_vo(repo, "--stdin", "--json", oid1, stdin=f"{oid2}\n").output)
526	assert data["checked"] == 2
527
528	def test_empty_stdin_no_explicit_errors(self, tmp_path: pathlib.Path) -> None:
529	repo = _make_repo(tmp_path)
530	result = _vo(repo, "--stdin", "--json", stdin="")
531	assert result.exit_code == ExitCode.USER_ERROR
532
533	def test_crlf_line_endings_stripped(self, tmp_path: pathlib.Path) -> None:
534	"""Windows CRLF line endings must not corrupt the object ID."""
535	repo = _make_repo(tmp_path)
536	oid = _write_object(repo, b"crlf test")
537	data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"{oid}\r\n").output)
538	assert data["all_ok"] is True
539	assert data["results"][0]["object_id"] == oid
540
541
542	# ---------------------------------------------------------------------------
543	# Integration — --fail-fast
544	# ---------------------------------------------------------------------------
545
546
547	class TestFailFast:
548	def test_stops_after_first_failure(self, tmp_path: pathlib.Path) -> None:
549	"""With --fail-fast, only the first failing result appears in output."""
550	repo = _make_repo(tmp_path)
551	bad1 = blob_id(b"missing ff a")
552	bad2 = blob_id(b"missing ff b")
553	good = _write_object(repo, b"good after bad")
554	# bad1, bad2, good — should stop after bad1
555	data = json.loads(_vo(repo, "--fail-fast", "--json", bad1, bad2, good).output)
556	assert data["checked"] == 1
557	assert data["failed"] == 1
558	assert data["all_ok"] is False
559
560	def test_no_effect_when_all_pass(self, tmp_path: pathlib.Path) -> None:
561	"""--fail-fast is a no-op when every object passes."""
562	repo = _make_repo(tmp_path)
563	oids = [_write_object(repo, f"ff pass {i}".encode()) for i in range(5)]
564	data = json.loads(_vo(repo, "--fail-fast", "--json", *oids).output)
565	assert data["checked"] == 5
566	assert data["all_ok"] is True
567
568	def test_fail_fast_exits_nonzero(self, tmp_path: pathlib.Path) -> None:
569	repo = _make_repo(tmp_path)
570	result = _vo(repo, "--fail-fast", "--json", blob_id(b"missing ff c"))
571	assert result.exit_code == ExitCode.USER_ERROR
572
573	def test_fail_fast_with_all(self, tmp_path: pathlib.Path) -> None:
574	"""--fail-fast + --all stops the scan on the first corrupt object."""
575	repo = _make_repo(tmp_path)
576	for i in range(10):
577	_write_object(repo, f"store {i}".encode())
578	# Corrupt one object somewhere in the store.
579	from muse.cli.commands.verify_object import _iter_all_object_ids
580	all_ids = _iter_all_object_ids(repo)
581	_corrupt_object(repo, all_ids[0])
582	data = json.loads(_vo(repo, "--all", "--fail-fast", "--json").output)
583	# Should have stopped early — checked < 10.
584	assert data["checked"] < len(all_ids)
585	assert data["failed"] == 1
586
587	def test_fail_fast_duration_ms_present(self, tmp_path: pathlib.Path) -> None:
588	repo = _make_repo(tmp_path)
589	data = json.loads(_vo(repo, "--fail-fast", "--json", blob_id(b"missing ff d")).output)
590	assert "duration_ms" in data
591	assert data["duration_ms"] >= 0.0
592
593
594	# ---------------------------------------------------------------------------
595	# Security
596	# ---------------------------------------------------------------------------
597
598
599	class TestSecurity:
600	def test_format_error_goes_to_stderr(self, tmp_path: pathlib.Path) -> None:
601	repo = _make_repo(tmp_path)
602	result = _vo(repo, fake_id("a"))
603	assert result.exit_code == ExitCode.USER_ERROR
604	assert "Traceback" not in result.output
605
606	def test_no_traceback_on_bad_format(self, tmp_path: pathlib.Path) -> None:
607	repo = _make_repo(tmp_path)
608	result = _vo(repo, fake_id("b"))
609	assert "Traceback" not in result.output
610
611	def test_ansi_in_error_message_stripped_text(self, tmp_path: pathlib.Path) -> None:
612	repo = _make_repo(tmp_path)
613	result = _vo(repo, blob_id(b"nonexistent"))
614	assert "\x1b" not in result.output
615
616	def test_invalid_id_returns_error_not_crash(self, tmp_path: pathlib.Path) -> None:
617	repo = _make_repo(tmp_path)
618	result = _vo(repo, "not-a-sha256")
619	assert result.exit_code == ExitCode.USER_ERROR
620	assert "Traceback" not in result.output
621
622	def test_no_ids_errors_to_stderr(self, tmp_path: pathlib.Path) -> None:
623	repo = _make_repo(tmp_path)
624	result = _vo(repo)
625	assert result.exit_code == ExitCode.USER_ERROR
626	assert "error" in result.stderr.lower()
627
628	def test_path_traversal_in_object_id_rejected(self, tmp_path: pathlib.Path) -> None:
629	"""Path-traversal-looking IDs must be rejected by validation before any I/O."""
630	repo = _make_repo(tmp_path)
631	traversal = f"sha256:../../etc/passwd{'a' * 50}"
632	result = _vo(repo, "--json", traversal)
633	# Validation must reject it — never attempts to open a path.
634	assert result.exit_code == ExitCode.USER_ERROR
635	data = json.loads(result.output)
636	# The error message explains the format violation, not an fs operation.
637	assert data["results"][0]["ok"] is False
638	assert "expected" in data["results"][0]["error"]
639
640	def test_unicode_in_object_id_rejected(self, tmp_path: pathlib.Path) -> None:
641	repo = _make_repo(tmp_path)
642	result = _vo(repo, f"sha256:café{'a' * 60}")
643	assert result.exit_code == ExitCode.USER_ERROR
644
645	def test_symlink_shard_directory_skipped(self, tmp_path: pathlib.Path) -> None:
646	"""A symlinked shard directory must not be followed during --all."""
647	from muse.cli.commands.verify_object import _iter_all_object_ids
648	from muse.core.object_store import objects_dir
649	repo = _make_repo(tmp_path)
650	# Write a real object so the algo dir exists.
651	_write_object(repo, b"real")
652	algo_dir = objects_dir(repo) / "sha256"
653	# Add a symlink that points outside the repo.
654	sym_shard = algo_dir / "ff"
655	sym_shard.symlink_to(tmp_path)
656	ids = _iter_all_object_ids(repo)
657	# The symlinked shard's entries must not appear.
658	assert all(oid.startswith("sha256:") for oid in ids)
659
660	def test_crlf_injection_in_stdin_does_not_corrupt_id(self, tmp_path: pathlib.Path) -> None:
661	"""A \r embedded in a stdin line must not be part of the stored OID."""
662	repo = _make_repo(tmp_path)
663	oid = _write_object(repo, b"crlf injection")
664	# Feed oid with embedded \r before the newline.
665	data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"{oid}\r\n").output)
666	assert data["all_ok"] is True
667
668	def test_all_error_goes_to_stderr_not_stdout(self, tmp_path: pathlib.Path) -> None:
669	"""Argument errors for --all always land on stderr, stdout stays empty."""
670	repo = _make_repo(tmp_path)
671	result = _vo(repo, "--all", "--stdin", stdin="")
672	assert result.stdout_bytes == b""
673	assert len(result.stderr) > 0
674
675
676	# ---------------------------------------------------------------------------
677	# Data integrity
678	# ---------------------------------------------------------------------------
679
680
681	class TestDataIntegrity:
682	def test_zero_byte_blob_round_trips(self, tmp_path: pathlib.Path) -> None:
683	"""A zero-byte object has a well-defined SHA-256 and must verify clean."""
684	repo = _make_repo(tmp_path)
685	oid = _write_object(repo, b"")
686	data = json.loads(_vo(repo, "--json", oid).output)
687	assert data["all_ok"] is True
688	assert data["results"][0]["size_bytes"] == 0
689
690	def test_truncated_file_is_hash_mismatch(self, tmp_path: pathlib.Path) -> None:
691	repo = _make_repo(tmp_path)
692	oid = _write_object(repo, b"file that will be truncated")
693	_truncate_object(repo, oid, keep_bytes=3)
694	data = json.loads(_vo(repo, "--json", oid).output)
695	assert data["results"][0]["ok"] is False
696	assert "mismatch" in data["results"][0]["error"]
697
698	def test_completely_emptied_file_is_hash_mismatch(self, tmp_path: pathlib.Path) -> None:
699	repo = _make_repo(tmp_path)
700	oid = _write_object(repo, b"non-empty content")
701	_truncate_object(repo, oid, keep_bytes=0)
702	data = json.loads(_vo(repo, "--json", oid).output)
703	assert data["results"][0]["ok"] is False
704
705	def test_large_object_streams_without_loading_all(self, tmp_path: pathlib.Path) -> None:
706	"""A 4 MiB object must verify correctly via streaming (no heap spike)."""
707	repo = _make_repo(tmp_path)
708	content = b"a" * (4 * 1024 * 1024)
709	oid = _write_object(repo, content)
710	data = json.loads(_vo(repo, "--json", oid).output)
711	assert data["all_ok"] is True
712	assert data["results"][0]["size_bytes"] == len(content)
713
714	def test_multiple_corrupt_objects_all_reported(self, tmp_path: pathlib.Path) -> None:
715	"""All corruptions are reported — not just the first one."""
716	repo = _make_repo(tmp_path)
717	oids = [_write_object(repo, f"corrupt me {i}".encode()) for i in range(3)]
718	for oid in oids:
719	_corrupt_object(repo, oid)
720	data = json.loads(_vo(repo, "--json", *oids).output)
721	assert data["failed"] == 3
722	assert data["all_ok"] is False
723
724
725	# ---------------------------------------------------------------------------
726	# Stress
727	# ---------------------------------------------------------------------------
728
729
730	class TestStress:
731	def test_100_object_store_all_pass(self, tmp_path: pathlib.Path) -> None:
732	repo = _make_repo(tmp_path)
733	for i in range(100):
734	_write_object(repo, f"stress content {i}".encode())
735	data = json.loads(_vo(repo, "--all", "--json").output)
736	assert data["checked"] == 100
737	assert data["all_ok"] is True
738
739	def test_1000_object_store_all_pass(self, tmp_path: pathlib.Path) -> None:
740	repo = _make_repo(tmp_path)
741	for i in range(1000):
742	_write_object(repo, f"large stress {i}".encode())
743	data = json.loads(_vo(repo, "--all", "--json").output)
744	assert data["checked"] == 1000
745	assert data["all_ok"] is True
746
747	def test_200_sequential_verifies(self, tmp_path: pathlib.Path) -> None:
748	repo = _make_repo(tmp_path)
749	oid = _write_object(repo, _FAKE_CONTENT)
750	for i in range(200):
751	result = _vo(repo, oid)
752	assert result.exit_code == 0, f"failed at iteration {i}"
753
754	def test_stdin_200_ids(self, tmp_path: pathlib.Path) -> None:
755	repo = _make_repo(tmp_path)
756	oids = [_write_object(repo, f"content_{i}".encode()) for i in range(200)]
757	data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"{'\n'.join(oids)}\n").output)
758	assert data["checked"] == 200
759	assert data["all_ok"] is True
760
761	def test_duration_ms_bounded_for_small_op(self, tmp_path: pathlib.Path) -> None:
762	"""Verifying one small object should complete in well under 5 seconds."""
763	repo = _make_repo(tmp_path)
764	oid = _write_object(repo, b"small")
765	data = json.loads(_vo(repo, "--json", oid).output)
766	assert data["duration_ms"] < 5_000
767
768
769	# ---------------------------------------------------------------------------
770	# Flag registration
771	# ---------------------------------------------------------------------------
772
773
774	class TestRegisterFlags:
775	def _parse(self, *args: str) -> "argparse.Namespace":
776	import argparse
777	from muse.cli.commands.verify_object import register
778	p = argparse.ArgumentParser()
779	sub = p.add_subparsers()
780	register(sub)
781	return p.parse_args(["verify-object", *args])
782
783	def test_default_json_out_is_false(self) -> None:
784	ns = self._parse(fake_id("a"))
785	assert ns.json_out is False
786
787	def test_json_flag_sets_json_out(self) -> None:
788	ns = self._parse("--json", fake_id("a"))
789	assert ns.json_out is True
790
791	def test_j_shorthand_sets_json_out(self) -> None:
792	ns = self._parse("-j", fake_id("a"))
793	assert ns.json_out is True

File History 4 commits

sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago

sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago

sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago

sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor ⚠ 29 days ago

function _make_repo

function _write_object

function _corrupt_object

function _truncate_object

function _vo

class TestIterAllObjectIds

function test_empty_store

function test_missing_objects_dir

function test_finds_written_object

function test_multiple_objects_sorted

function test_symlinks_in_shard_skipped

function test_short_shard_dir_names_ignored

function test_returns_sha256_prefixed_ids

class TestVerifyOne

function test_valid_object_ok

function test_ok_result_preserves_object_id

function test_error_is_none_when_ok

function test_size_counted_during_hash

function test_zero_byte_object_ok

function test_missing_object_not_ok

function test_corrupt_object_mismatch

function test_corrupt_object_has_size_bytes

function test_truncated_object_mismatch

function test_empty_truncated_object_mismatch

function test_invalid_object_id_format

function test_invalid_object_id_never_raises

function test_io_error_returns_error_dict

class TestObjectResultSchema

function test_fields

class TestChunkConstant

function test_chunk_is_power_of_two

class TestJsonOutput

function test_valid_object_all_ok

function test_missing_object_fails

function test_corrupt_object_fails

function test_mixed_pass_fail

function test_json_shorthand

function test_duration_ms_and_exit_code_present

function test_exit_code_nonzero_on_failure

function test_results_order_matches_input

function test_checked_equals_len_results

function test_failed_count_matches_failed_results

function test_error_null_when_ok

function test_duplicate_id_verified_twice

class TestTextOutput

function test_ok_label_and_size

function test_fail_label_on_missing

function test_summary_line_present

function test_summary_reflects_counts

function test_summary_all_pass

class TestQuietMode

function test_all_ok_exits_0

function test_failure_exits_1

function test_quiet_with_text_format_no_output

class TestAllMode

function test_empty_store_all_ok

function test_all_finds_written_objects

function test_all_detects_corruption

function test_all_plus_explicit_ids_rejected

function test_all_plus_stdin_rejected

function test_all_quiet

class TestStdinMode

function test_reads_ids_from_stdin

function test_comments_and_blank_lines_skipped

function test_stdin_combines_with_positional

function test_empty_stdin_no_explicit_errors

function test_crlf_line_endings_stripped

class TestFailFast

function test_stops_after_first_failure

function test_no_effect_when_all_pass

function test_fail_fast_exits_nonzero

function test_fail_fast_with_all

function test_fail_fast_duration_ms_present

class TestSecurity

function test_format_error_goes_to_stderr

function test_no_traceback_on_bad_format

function test_ansi_in_error_message_stripped_text

function test_invalid_id_returns_error_not_crash

function test_no_ids_errors_to_stderr

function test_path_traversal_in_object_id_rejected

function test_unicode_in_object_id_rejected

function test_symlink_shard_directory_skipped

function test_crlf_injection_in_stdin_does_not_corrupt_id

function test_all_error_goes_to_stderr_not_stdout

class TestDataIntegrity

function test_zero_byte_blob_round_trips

function test_truncated_file_is_hash_mismatch

function test_completely_emptied_file_is_hash_mismatch

function test_large_object_streams_without_loading_all

function test_multiple_corrupt_objects_all_reported

class TestStress

function test_100_object_store_all_pass

function test_1000_object_store_all_pass

function test_200_sequential_verifies

function test_stdin_200_ids

function test_duration_ms_bounded_for_small_op

class TestRegisterFlags

function _parse

function test_default_json_out_is_false

function test_json_flag_sets_json_out

function test_j_shorthand_sets_json_out

Pathtests/test_cmd_verify_object.py

Lines793

Size33.0 KB

LangPython

Refsha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2

Object ID

sha256:4fc0b731aa42c1635e283df65954d1d27075de30b26a18759a500f019ee73e25…

Last commit

sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2

fix: remove commit_exists filter from have anchor…

21 days ago

Quick links

Blame History