"""Comprehensive tests for ``muse verify-object``. Coverage tiers -------------- - Unit: _iter_all_object_ids, _verify_one (all paths), schema, constants - Integration: JSON/text/quiet, --all, --stdin, --fail-fast, ordering, counts - Data integrity: truncated file, zero-byte blob, large-object streaming - Security: stderr routing, ANSI stripping, path traversal, unicode, CRLF, symlink shard directory - Stress: 100-object --all, 1000-object --all, 200 sequential verifies, stdin 200 ids, duration bounded for small ops """ from __future__ import annotations import json import os import pathlib import pytest from muse.core.types import blob_id, fake_id from muse.core.errors import ExitCode from muse.core.object_store import object_path, write_object from muse.core.paths import muse_dir, objects_dir from tests.cli_test_helper import CliRunner, InvokeResult runner = CliRunner() # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- _FAKE_CONTENT = b"hello muse" _GOOD_OID = blob_id(_FAKE_CONTENT) def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: repo = tmp_path / "repo" dot_muse = muse_dir(repo) (dot_muse / "objects").mkdir(parents=True) (dot_muse / "commits").mkdir(parents=True) (dot_muse / "snapshots").mkdir(parents=True) (dot_muse / "refs" / "heads").mkdir(parents=True) (dot_muse / "HEAD").write_text("ref: refs/heads/main") (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "r1", "domain": "code"})) return repo def _write_object(repo: pathlib.Path, content: bytes) -> str: """Write real content into the store and return its sha256:-prefixed ID.""" oid = blob_id(content) write_object(repo, oid, content) return oid def _corrupt_object(repo: pathlib.Path, oid: str) -> None: """Overwrite the object file with garbage (simulates bit-rot). The object store writes files as 0o444 (read-only) to enforce immutability. We must make the file writable before overwriting it in tests. """ obj_file = object_path(repo, oid) os.chmod(obj_file, 0o644) obj_file.write_bytes(b"corrupted data that does not hash to the oid") def _truncate_object(repo: pathlib.Path, oid: str, keep_bytes: int = 0) -> None: """Truncate the object file to ``keep_bytes`` bytes.""" obj_file = object_path(repo, oid) os.chmod(obj_file, 0o644) data = obj_file.read_bytes() obj_file.write_bytes(data[:keep_bytes]) def _vo(repo: pathlib.Path, *args: str, stdin: str | None = None) -> InvokeResult: from muse.cli.app import main as cli return runner.invoke( cli, ["verify-object", *args], env={"MUSE_REPO_ROOT": str(repo)}, input=stdin, ) # --------------------------------------------------------------------------- # Unit — _iter_all_object_ids # --------------------------------------------------------------------------- class TestIterAllObjectIds: def test_empty_store(self, tmp_path: pathlib.Path) -> None: from muse.cli.commands.verify_object import _iter_all_object_ids repo = _make_repo(tmp_path) assert _iter_all_object_ids(repo) == [] def test_missing_objects_dir(self, tmp_path: pathlib.Path) -> None: from muse.cli.commands.verify_object import _iter_all_object_ids import shutil repo = _make_repo(tmp_path) shutil.rmtree(objects_dir(repo)) assert _iter_all_object_ids(repo) == [] def test_finds_written_object(self, tmp_path: pathlib.Path) -> None: from muse.cli.commands.verify_object import _iter_all_object_ids repo = _make_repo(tmp_path) oid = _write_object(repo, b"test content") assert oid in _iter_all_object_ids(repo) def test_multiple_objects_sorted(self, tmp_path: pathlib.Path) -> None: from muse.cli.commands.verify_object import _iter_all_object_ids repo = _make_repo(tmp_path) oids = [_write_object(repo, f"content {i}".encode()) for i in range(5)] found = _iter_all_object_ids(repo) assert set(oids) == set(found) assert found == sorted(found) def test_symlinks_in_shard_skipped(self, tmp_path: pathlib.Path) -> None: from muse.cli.commands.verify_object import _iter_all_object_ids repo = _make_repo(tmp_path) oid = _write_object(repo, b"real content") shard = object_path(repo, oid).parent sym = shard / "symlink_file" sym.symlink_to(object_path(repo, oid)) ids = _iter_all_object_ids(repo) assert ids.count(oid) == 1 def test_short_shard_dir_names_ignored(self, tmp_path: pathlib.Path) -> None: from muse.cli.commands.verify_object import _iter_all_object_ids from muse.core.object_store import objects_dir repo = _make_repo(tmp_path) (objects_dir(repo) / "sha256" / "abc").mkdir(parents=True, exist_ok=True) assert _iter_all_object_ids(repo) == [] def test_returns_sha256_prefixed_ids(self, tmp_path: pathlib.Path) -> None: from muse.cli.commands.verify_object import _iter_all_object_ids repo = _make_repo(tmp_path) _write_object(repo, b"prefix check") ids = _iter_all_object_ids(repo) assert all(oid.startswith("sha256:") for oid in ids) # --------------------------------------------------------------------------- # Unit — _verify_one # --------------------------------------------------------------------------- class TestVerifyOne: def test_valid_object_ok(self, tmp_path: pathlib.Path) -> None: from muse.cli.commands.verify_object import _verify_one repo = _make_repo(tmp_path) oid = _write_object(repo, b"hello world") result = _verify_one(repo, oid) assert result["ok"] is True assert result["size_bytes"] == len(b"hello world") assert result["error"] is None def test_ok_result_preserves_object_id(self, tmp_path: pathlib.Path) -> None: from muse.cli.commands.verify_object import _verify_one repo = _make_repo(tmp_path) oid = _write_object(repo, b"id check") result = _verify_one(repo, oid) assert result["object_id"] == oid def test_error_is_none_when_ok(self, tmp_path: pathlib.Path) -> None: from muse.cli.commands.verify_object import _verify_one repo = _make_repo(tmp_path) oid = _write_object(repo, b"clean") result = _verify_one(repo, oid) assert result["ok"] is True assert result["error"] is None def test_size_counted_during_hash(self, tmp_path: pathlib.Path) -> None: from muse.cli.commands.verify_object import _verify_one repo = _make_repo(tmp_path) content = b"x" * 12345 oid = _write_object(repo, content) result = _verify_one(repo, oid) assert result["size_bytes"] == 12345 def test_zero_byte_object_ok(self, tmp_path: pathlib.Path) -> None: from muse.cli.commands.verify_object import _verify_one repo = _make_repo(tmp_path) oid = _write_object(repo, b"") result = _verify_one(repo, oid) assert result["ok"] is True assert result["size_bytes"] == 0 def test_missing_object_not_ok(self, tmp_path: pathlib.Path) -> None: from muse.cli.commands.verify_object import _verify_one repo = _make_repo(tmp_path) result = _verify_one(repo, blob_id(b"nonexistent object")) assert result["ok"] is False assert "not found" in (result["error"] or "") assert result["size_bytes"] is None def test_corrupt_object_mismatch(self, tmp_path: pathlib.Path) -> None: from muse.cli.commands.verify_object import _verify_one repo = _make_repo(tmp_path) oid = _write_object(repo, b"original content") _corrupt_object(repo, oid) result = _verify_one(repo, oid) assert result["ok"] is False assert "mismatch" in (result["error"] or "") def test_corrupt_object_has_size_bytes(self, tmp_path: pathlib.Path) -> None: """Even on hash mismatch, size_bytes is populated (bytes were read).""" from muse.cli.commands.verify_object import _verify_one repo = _make_repo(tmp_path) oid = _write_object(repo, b"original content") _corrupt_object(repo, oid) result = _verify_one(repo, oid) assert result["size_bytes"] is not None assert result["size_bytes"] > 0 def test_truncated_object_mismatch(self, tmp_path: pathlib.Path) -> None: from muse.cli.commands.verify_object import _verify_one repo = _make_repo(tmp_path) oid = _write_object(repo, b"original content that will be truncated") _truncate_object(repo, oid, keep_bytes=4) result = _verify_one(repo, oid) assert result["ok"] is False assert "mismatch" in (result["error"] or "") def test_empty_truncated_object_mismatch(self, tmp_path: pathlib.Path) -> None: from muse.cli.commands.verify_object import _verify_one repo = _make_repo(tmp_path) oid = _write_object(repo, b"will be emptied") _truncate_object(repo, oid, keep_bytes=0) result = _verify_one(repo, oid) assert result["ok"] is False def test_invalid_object_id_format(self, tmp_path: pathlib.Path) -> None: from muse.cli.commands.verify_object import _verify_one repo = _make_repo(tmp_path) result = _verify_one(repo, "not-a-sha256") assert result["ok"] is False assert result["error"] is not None def test_invalid_object_id_never_raises(self, tmp_path: pathlib.Path) -> None: from muse.cli.commands.verify_object import _verify_one repo = _make_repo(tmp_path) result = _verify_one(repo, "\x00" * 64) assert isinstance(result, dict) assert result["ok"] is False def test_io_error_returns_error_dict(self, tmp_path: pathlib.Path) -> None: """OSError during read returns an error result, never raises.""" from muse.cli.commands.verify_object import _verify_one repo = _make_repo(tmp_path) oid = _write_object(repo, b"to be made unreadable") obj_file = object_path(repo, oid) obj_file.chmod(0o000) try: result = _verify_one(repo, oid) assert result["ok"] is False assert result["error"] is not None assert "I/O error" in (result["error"] or "") finally: obj_file.chmod(0o644) class TestObjectResultSchema: def test_fields(self) -> None: from muse.cli.commands.verify_object import _ObjectResult assert set(_ObjectResult.__annotations__) == {"object_id", "ok", "size_bytes", "error"} class TestChunkConstant: def test_chunk_is_power_of_two(self) -> None: from muse.cli.commands.verify_object import _CHUNK assert _CHUNK > 0 assert (_CHUNK & (_CHUNK - 1)) == 0 # --------------------------------------------------------------------------- # Integration — JSON output # --------------------------------------------------------------------------- class TestJsonOutput: def test_valid_object_all_ok(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = _write_object(repo, _FAKE_CONTENT) result = _vo(repo, "--json", oid) assert result.exit_code == 0 data = json.loads(result.output) assert data["all_ok"] is True assert data["checked"] == 1 assert data["failed"] == 0 assert data["results"][0]["ok"] is True assert data["results"][0]["size_bytes"] == len(_FAKE_CONTENT) def test_missing_object_fails(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _vo(repo, "--json", blob_id(b"nonexistent object")) assert result.exit_code == ExitCode.USER_ERROR data = json.loads(result.output) assert data["all_ok"] is False assert data["failed"] == 1 def test_corrupt_object_fails(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = _write_object(repo, b"good content") _corrupt_object(repo, oid) result = _vo(repo, "--json", oid) assert result.exit_code == ExitCode.USER_ERROR data = json.loads(result.output) assert data["results"][0]["ok"] is False assert "mismatch" in data["results"][0]["error"] def test_mixed_pass_fail(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) good = _write_object(repo, b"good") bad = blob_id(b"nonexistent object b") result = _vo(repo, "--json", good, bad) assert result.exit_code == ExitCode.USER_ERROR data = json.loads(result.output) assert data["checked"] == 2 assert data["failed"] == 1 def test_json_shorthand(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = _write_object(repo, b"data") result = _vo(repo, "--json", oid) assert result.exit_code == 0 assert "all_ok" in json.loads(result.output) def test_duration_ms_and_exit_code_present(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = _write_object(repo, _FAKE_CONTENT) data = json.loads(_vo(repo, "--json", oid).output) assert "duration_ms" in data assert isinstance(data["duration_ms"], float) assert data["duration_ms"] >= 0.0 assert data["exit_code"] == 0 def test_exit_code_nonzero_on_failure(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) data = json.loads(_vo(repo, "--json", blob_id(b"nonexistent object")).output) assert data["exit_code"] != 0 assert data["duration_ms"] >= 0.0 def test_results_order_matches_input(self, tmp_path: pathlib.Path) -> None: """Results must appear in the same order as the positional arguments.""" repo = _make_repo(tmp_path) oids = [_write_object(repo, f"ordered {i}".encode()) for i in range(5)] data = json.loads(_vo(repo, "--json", *oids).output) returned = [r["object_id"] for r in data["results"]] assert returned == oids def test_checked_equals_len_results(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oids = [_write_object(repo, f"cnt {i}".encode()) for i in range(3)] data = json.loads(_vo(repo, "--json", *oids).output) assert data["checked"] == len(data["results"]) def test_failed_count_matches_failed_results(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) good = _write_object(repo, b"ok") bad1 = blob_id(b"missing a") bad2 = blob_id(b"missing b") data = json.loads(_vo(repo, "--json", good, bad1, bad2).output) assert data["failed"] == sum(1 for r in data["results"] if not r["ok"]) assert data["failed"] == 2 def test_error_null_when_ok(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = _write_object(repo, b"clean object") data = json.loads(_vo(repo, "--json", oid).output) assert data["results"][0]["error"] is None def test_duplicate_id_verified_twice(self, tmp_path: pathlib.Path) -> None: """Passing the same OID twice verifies it twice — no implicit dedup.""" repo = _make_repo(tmp_path) oid = _write_object(repo, b"dedup test") data = json.loads(_vo(repo, "--json", oid, oid).output) assert data["checked"] == 2 assert data["all_ok"] is True # --------------------------------------------------------------------------- # Integration — text output # --------------------------------------------------------------------------- class TestTextOutput: def test_ok_label_and_size(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = _write_object(repo, _FAKE_CONTENT) result = _vo(repo, oid) assert result.exit_code == 0 assert "OK" in result.output assert str(len(_FAKE_CONTENT)) in result.output def test_fail_label_on_missing(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _vo(repo, blob_id(b"nonexistent object c")) assert "FAIL" in result.output assert result.exit_code == ExitCode.USER_ERROR def test_summary_line_present(self, tmp_path: pathlib.Path) -> None: """Text mode always ends with a Checked/Failed summary line.""" repo = _make_repo(tmp_path) oid = _write_object(repo, b"summary test") result = _vo(repo, oid) assert "Checked:" in result.output assert "Failed:" in result.output def test_summary_reflects_counts(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) good = _write_object(repo, b"good") bad = blob_id(b"missing for summary") result = _vo(repo, good, bad) assert "Checked: 2" in result.output assert "Failed: 1" in result.output def test_summary_all_pass(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) for i in range(3): _write_object(repo, f"text pass {i}".encode()) result = _vo(repo, "--all") assert "Checked: 3" in result.output assert "Failed: 0" in result.output # --------------------------------------------------------------------------- # Integration — --quiet mode # --------------------------------------------------------------------------- class TestQuietMode: def test_all_ok_exits_0(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = _write_object(repo, _FAKE_CONTENT) result = _vo(repo, "--quiet", oid) assert result.exit_code == 0 assert result.output.strip() == "" def test_failure_exits_1(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _vo(repo, "--quiet", blob_id(b"nonexistent object d")) assert result.exit_code == ExitCode.USER_ERROR assert result.output.strip() == "" def test_quiet_with_text_format_no_output(self, tmp_path: pathlib.Path) -> None: """--quiet suppresses output regardless of --format.""" repo = _make_repo(tmp_path) oid = _write_object(repo, b"quiet text") result = _vo(repo, "--quiet", oid) assert result.output.strip() == "" # --------------------------------------------------------------------------- # Integration — --all (fsck mode) # --------------------------------------------------------------------------- class TestAllMode: def test_empty_store_all_ok(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) data = json.loads(_vo(repo, "--all", "--json").output) assert data["all_ok"] is True assert data["checked"] == 0 def test_all_finds_written_objects(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) for i in range(5): _write_object(repo, f"content {i}".encode()) data = json.loads(_vo(repo, "--all", "--json").output) assert data["checked"] == 5 assert data["all_ok"] is True def test_all_detects_corruption(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = _write_object(repo, b"good data") _corrupt_object(repo, oid) data = json.loads(_vo(repo, "--all", "--json").output) assert data["failed"] == 1 def test_all_plus_explicit_ids_rejected(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _vo(repo, "--all", blob_id(b"explicit id arg")) assert result.exit_code == ExitCode.USER_ERROR assert result.stdout_bytes == b"" def test_all_plus_stdin_rejected(self, tmp_path: pathlib.Path) -> None: """--all + --stdin is rejected for consistency with --all + positional.""" repo = _make_repo(tmp_path) oid = _write_object(repo, b"stdin data") result = _vo(repo, "--all", "--stdin", stdin=f"{oid}\n") assert result.exit_code == ExitCode.USER_ERROR assert result.stdout_bytes == b"" def test_all_quiet(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) _write_object(repo, b"content") result = _vo(repo, "--all", "--quiet") assert result.exit_code == 0 assert result.output.strip() == "" # --------------------------------------------------------------------------- # Integration — --stdin # --------------------------------------------------------------------------- class TestStdinMode: def test_reads_ids_from_stdin(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = _write_object(repo, _FAKE_CONTENT) data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"{oid}\n").output) assert data["checked"] == 1 assert data["all_ok"] is True def test_comments_and_blank_lines_skipped(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = _write_object(repo, _FAKE_CONTENT) data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"\n# comment\n{oid}\n\n").output) assert data["checked"] == 1 def test_stdin_combines_with_positional(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid1 = _write_object(repo, b"one") oid2 = _write_object(repo, b"two") data = json.loads(_vo(repo, "--stdin", "--json", oid1, stdin=f"{oid2}\n").output) assert data["checked"] == 2 def test_empty_stdin_no_explicit_errors(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _vo(repo, "--stdin", "--json", stdin="") assert result.exit_code == ExitCode.USER_ERROR def test_crlf_line_endings_stripped(self, tmp_path: pathlib.Path) -> None: """Windows CRLF line endings must not corrupt the object ID.""" repo = _make_repo(tmp_path) oid = _write_object(repo, b"crlf test") data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"{oid}\r\n").output) assert data["all_ok"] is True assert data["results"][0]["object_id"] == oid # --------------------------------------------------------------------------- # Integration — --fail-fast # --------------------------------------------------------------------------- class TestFailFast: def test_stops_after_first_failure(self, tmp_path: pathlib.Path) -> None: """With --fail-fast, only the first failing result appears in output.""" repo = _make_repo(tmp_path) bad1 = blob_id(b"missing ff a") bad2 = blob_id(b"missing ff b") good = _write_object(repo, b"good after bad") # bad1, bad2, good — should stop after bad1 data = json.loads(_vo(repo, "--fail-fast", "--json", bad1, bad2, good).output) assert data["checked"] == 1 assert data["failed"] == 1 assert data["all_ok"] is False def test_no_effect_when_all_pass(self, tmp_path: pathlib.Path) -> None: """--fail-fast is a no-op when every object passes.""" repo = _make_repo(tmp_path) oids = [_write_object(repo, f"ff pass {i}".encode()) for i in range(5)] data = json.loads(_vo(repo, "--fail-fast", "--json", *oids).output) assert data["checked"] == 5 assert data["all_ok"] is True def test_fail_fast_exits_nonzero(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _vo(repo, "--fail-fast", "--json", blob_id(b"missing ff c")) assert result.exit_code == ExitCode.USER_ERROR def test_fail_fast_with_all(self, tmp_path: pathlib.Path) -> None: """--fail-fast + --all stops the scan on the first corrupt object.""" repo = _make_repo(tmp_path) for i in range(10): _write_object(repo, f"store {i}".encode()) # Corrupt one object somewhere in the store. from muse.cli.commands.verify_object import _iter_all_object_ids all_ids = _iter_all_object_ids(repo) _corrupt_object(repo, all_ids[0]) data = json.loads(_vo(repo, "--all", "--fail-fast", "--json").output) # Should have stopped early — checked < 10. assert data["checked"] < len(all_ids) assert data["failed"] == 1 def test_fail_fast_duration_ms_present(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) data = json.loads(_vo(repo, "--fail-fast", "--json", blob_id(b"missing ff d")).output) assert "duration_ms" in data assert data["duration_ms"] >= 0.0 # --------------------------------------------------------------------------- # Security # --------------------------------------------------------------------------- class TestSecurity: def test_format_error_goes_to_stderr(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _vo(repo, fake_id("a")) assert result.exit_code == ExitCode.USER_ERROR assert "Traceback" not in result.output def test_no_traceback_on_bad_format(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _vo(repo, fake_id("b")) assert "Traceback" not in result.output def test_ansi_in_error_message_stripped_text(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _vo(repo, blob_id(b"nonexistent")) assert "\x1b" not in result.output def test_invalid_id_returns_error_not_crash(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _vo(repo, "not-a-sha256") assert result.exit_code == ExitCode.USER_ERROR assert "Traceback" not in result.output def test_no_ids_errors_to_stderr(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _vo(repo) assert result.exit_code == ExitCode.USER_ERROR assert "error" in result.stderr.lower() def test_path_traversal_in_object_id_rejected(self, tmp_path: pathlib.Path) -> None: """Path-traversal-looking IDs must be rejected by validation before any I/O.""" repo = _make_repo(tmp_path) traversal = f"sha256:../../etc/passwd{'a' * 50}" result = _vo(repo, "--json", traversal) # Validation must reject it — never attempts to open a path. assert result.exit_code == ExitCode.USER_ERROR data = json.loads(result.output) # The error message explains the format violation, not an fs operation. assert data["results"][0]["ok"] is False assert "expected" in data["results"][0]["error"] def test_unicode_in_object_id_rejected(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _vo(repo, f"sha256:café{'a' * 60}") assert result.exit_code == ExitCode.USER_ERROR def test_symlink_shard_directory_skipped(self, tmp_path: pathlib.Path) -> None: """A symlinked shard directory must not be followed during --all.""" from muse.cli.commands.verify_object import _iter_all_object_ids from muse.core.object_store import objects_dir repo = _make_repo(tmp_path) # Write a real object so the algo dir exists. _write_object(repo, b"real") algo_dir = objects_dir(repo) / "sha256" # Add a symlink that points outside the repo. sym_shard = algo_dir / "ff" sym_shard.symlink_to(tmp_path) ids = _iter_all_object_ids(repo) # The symlinked shard's entries must not appear. assert all(oid.startswith("sha256:") for oid in ids) def test_crlf_injection_in_stdin_does_not_corrupt_id(self, tmp_path: pathlib.Path) -> None: """A \r embedded in a stdin line must not be part of the stored OID.""" repo = _make_repo(tmp_path) oid = _write_object(repo, b"crlf injection") # Feed oid with embedded \r before the newline. data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"{oid}\r\n").output) assert data["all_ok"] is True def test_all_error_goes_to_stderr_not_stdout(self, tmp_path: pathlib.Path) -> None: """Argument errors for --all always land on stderr, stdout stays empty.""" repo = _make_repo(tmp_path) result = _vo(repo, "--all", "--stdin", stdin="") assert result.stdout_bytes == b"" assert len(result.stderr) > 0 # --------------------------------------------------------------------------- # Data integrity # --------------------------------------------------------------------------- class TestDataIntegrity: def test_zero_byte_blob_round_trips(self, tmp_path: pathlib.Path) -> None: """A zero-byte object has a well-defined SHA-256 and must verify clean.""" repo = _make_repo(tmp_path) oid = _write_object(repo, b"") data = json.loads(_vo(repo, "--json", oid).output) assert data["all_ok"] is True assert data["results"][0]["size_bytes"] == 0 def test_truncated_file_is_hash_mismatch(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = _write_object(repo, b"file that will be truncated") _truncate_object(repo, oid, keep_bytes=3) data = json.loads(_vo(repo, "--json", oid).output) assert data["results"][0]["ok"] is False assert "mismatch" in data["results"][0]["error"] def test_completely_emptied_file_is_hash_mismatch(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = _write_object(repo, b"non-empty content") _truncate_object(repo, oid, keep_bytes=0) data = json.loads(_vo(repo, "--json", oid).output) assert data["results"][0]["ok"] is False def test_large_object_streams_without_loading_all(self, tmp_path: pathlib.Path) -> None: """A 4 MiB object must verify correctly via streaming (no heap spike).""" repo = _make_repo(tmp_path) content = b"a" * (4 * 1024 * 1024) oid = _write_object(repo, content) data = json.loads(_vo(repo, "--json", oid).output) assert data["all_ok"] is True assert data["results"][0]["size_bytes"] == len(content) def test_multiple_corrupt_objects_all_reported(self, tmp_path: pathlib.Path) -> None: """All corruptions are reported — not just the first one.""" repo = _make_repo(tmp_path) oids = [_write_object(repo, f"corrupt me {i}".encode()) for i in range(3)] for oid in oids: _corrupt_object(repo, oid) data = json.loads(_vo(repo, "--json", *oids).output) assert data["failed"] == 3 assert data["all_ok"] is False # --------------------------------------------------------------------------- # Stress # --------------------------------------------------------------------------- class TestStress: def test_100_object_store_all_pass(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) for i in range(100): _write_object(repo, f"stress content {i}".encode()) data = json.loads(_vo(repo, "--all", "--json").output) assert data["checked"] == 100 assert data["all_ok"] is True def test_1000_object_store_all_pass(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) for i in range(1000): _write_object(repo, f"large stress {i}".encode()) data = json.loads(_vo(repo, "--all", "--json").output) assert data["checked"] == 1000 assert data["all_ok"] is True def test_200_sequential_verifies(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oid = _write_object(repo, _FAKE_CONTENT) for i in range(200): result = _vo(repo, oid) assert result.exit_code == 0, f"failed at iteration {i}" def test_stdin_200_ids(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) oids = [_write_object(repo, f"content_{i}".encode()) for i in range(200)] data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"{'\n'.join(oids)}\n").output) assert data["checked"] == 200 assert data["all_ok"] is True def test_duration_ms_bounded_for_small_op(self, tmp_path: pathlib.Path) -> None: """Verifying one small object should complete in well under 5 seconds.""" repo = _make_repo(tmp_path) oid = _write_object(repo, b"small") data = json.loads(_vo(repo, "--json", oid).output) assert data["duration_ms"] < 5_000 # --------------------------------------------------------------------------- # Flag registration # --------------------------------------------------------------------------- class TestRegisterFlags: def _parse(self, *args: str) -> "argparse.Namespace": import argparse from muse.cli.commands.verify_object import register p = argparse.ArgumentParser() sub = p.add_subparsers() register(sub) return p.parse_args(["verify-object", *args]) def test_default_json_out_is_false(self) -> None: ns = self._parse(fake_id("a")) assert ns.json_out is False def test_json_flag_sets_json_out(self) -> None: ns = self._parse("--json", fake_id("a")) assert ns.json_out is True def test_j_shorthand_sets_json_out(self) -> None: ns = self._parse("-j", fake_id("a")) assert ns.json_out is True