test_cmd_verify_object.py
python
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2
fix: remove commit_exists filter from have anchors — server…
Sonnet 4.6
patch
21 days ago
| 1 | """Comprehensive tests for ``muse verify-object``. |
| 2 | |
| 3 | Coverage tiers |
| 4 | -------------- |
| 5 | - Unit: _iter_all_object_ids, _verify_one (all paths), schema, constants |
| 6 | - Integration: JSON/text/quiet, --all, --stdin, --fail-fast, ordering, counts |
| 7 | - Data integrity: truncated file, zero-byte blob, large-object streaming |
| 8 | - Security: stderr routing, ANSI stripping, path traversal, unicode, CRLF, |
| 9 | symlink shard directory |
| 10 | - Stress: 100-object --all, 1000-object --all, 200 sequential verifies, |
| 11 | stdin 200 ids, duration bounded for small ops |
| 12 | """ |
| 13 | from __future__ import annotations |
| 14 | |
| 15 | import json |
| 16 | import os |
| 17 | import pathlib |
| 18 | |
| 19 | import pytest |
| 20 | |
| 21 | from muse.core.types import blob_id, fake_id |
| 22 | from muse.core.errors import ExitCode |
| 23 | from muse.core.object_store import object_path, write_object |
| 24 | from muse.core.paths import muse_dir, objects_dir |
| 25 | from tests.cli_test_helper import CliRunner, InvokeResult |
| 26 | |
| 27 | runner = CliRunner() |
| 28 | |
| 29 | # --------------------------------------------------------------------------- |
| 30 | # Helpers |
| 31 | # --------------------------------------------------------------------------- |
| 32 | |
| 33 | _FAKE_CONTENT = b"hello muse" |
| 34 | _GOOD_OID = blob_id(_FAKE_CONTENT) |
| 35 | |
| 36 | |
| 37 | def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 38 | repo = tmp_path / "repo" |
| 39 | dot_muse = muse_dir(repo) |
| 40 | (dot_muse / "objects").mkdir(parents=True) |
| 41 | (dot_muse / "commits").mkdir(parents=True) |
| 42 | (dot_muse / "snapshots").mkdir(parents=True) |
| 43 | (dot_muse / "refs" / "heads").mkdir(parents=True) |
| 44 | (dot_muse / "HEAD").write_text("ref: refs/heads/main") |
| 45 | (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "r1", "domain": "code"})) |
| 46 | return repo |
| 47 | |
| 48 | |
| 49 | def _write_object(repo: pathlib.Path, content: bytes) -> str: |
| 50 | """Write real content into the store and return its sha256:-prefixed ID.""" |
| 51 | oid = blob_id(content) |
| 52 | write_object(repo, oid, content) |
| 53 | return oid |
| 54 | |
| 55 | |
| 56 | def _corrupt_object(repo: pathlib.Path, oid: str) -> None: |
| 57 | """Overwrite the object file with garbage (simulates bit-rot). |
| 58 | |
| 59 | The object store writes files as 0o444 (read-only) to enforce immutability. |
| 60 | We must make the file writable before overwriting it in tests. |
| 61 | """ |
| 62 | obj_file = object_path(repo, oid) |
| 63 | os.chmod(obj_file, 0o644) |
| 64 | obj_file.write_bytes(b"corrupted data that does not hash to the oid") |
| 65 | |
| 66 | |
| 67 | def _truncate_object(repo: pathlib.Path, oid: str, keep_bytes: int = 0) -> None: |
| 68 | """Truncate the object file to ``keep_bytes`` bytes.""" |
| 69 | obj_file = object_path(repo, oid) |
| 70 | os.chmod(obj_file, 0o644) |
| 71 | data = obj_file.read_bytes() |
| 72 | obj_file.write_bytes(data[:keep_bytes]) |
| 73 | |
| 74 | |
| 75 | def _vo(repo: pathlib.Path, *args: str, stdin: str | None = None) -> InvokeResult: |
| 76 | from muse.cli.app import main as cli |
| 77 | return runner.invoke( |
| 78 | cli, |
| 79 | ["verify-object", *args], |
| 80 | env={"MUSE_REPO_ROOT": str(repo)}, |
| 81 | input=stdin, |
| 82 | ) |
| 83 | |
| 84 | |
| 85 | # --------------------------------------------------------------------------- |
| 86 | # Unit — _iter_all_object_ids |
| 87 | # --------------------------------------------------------------------------- |
| 88 | |
| 89 | |
| 90 | class TestIterAllObjectIds: |
| 91 | def test_empty_store(self, tmp_path: pathlib.Path) -> None: |
| 92 | from muse.cli.commands.verify_object import _iter_all_object_ids |
| 93 | repo = _make_repo(tmp_path) |
| 94 | assert _iter_all_object_ids(repo) == [] |
| 95 | |
| 96 | def test_missing_objects_dir(self, tmp_path: pathlib.Path) -> None: |
| 97 | from muse.cli.commands.verify_object import _iter_all_object_ids |
| 98 | import shutil |
| 99 | repo = _make_repo(tmp_path) |
| 100 | shutil.rmtree(objects_dir(repo)) |
| 101 | assert _iter_all_object_ids(repo) == [] |
| 102 | |
| 103 | def test_finds_written_object(self, tmp_path: pathlib.Path) -> None: |
| 104 | from muse.cli.commands.verify_object import _iter_all_object_ids |
| 105 | repo = _make_repo(tmp_path) |
| 106 | oid = _write_object(repo, b"test content") |
| 107 | assert oid in _iter_all_object_ids(repo) |
| 108 | |
| 109 | def test_multiple_objects_sorted(self, tmp_path: pathlib.Path) -> None: |
| 110 | from muse.cli.commands.verify_object import _iter_all_object_ids |
| 111 | repo = _make_repo(tmp_path) |
| 112 | oids = [_write_object(repo, f"content {i}".encode()) for i in range(5)] |
| 113 | found = _iter_all_object_ids(repo) |
| 114 | assert set(oids) == set(found) |
| 115 | assert found == sorted(found) |
| 116 | |
| 117 | def test_symlinks_in_shard_skipped(self, tmp_path: pathlib.Path) -> None: |
| 118 | from muse.cli.commands.verify_object import _iter_all_object_ids |
| 119 | repo = _make_repo(tmp_path) |
| 120 | oid = _write_object(repo, b"real content") |
| 121 | shard = object_path(repo, oid).parent |
| 122 | sym = shard / "symlink_file" |
| 123 | sym.symlink_to(object_path(repo, oid)) |
| 124 | ids = _iter_all_object_ids(repo) |
| 125 | assert ids.count(oid) == 1 |
| 126 | |
| 127 | def test_short_shard_dir_names_ignored(self, tmp_path: pathlib.Path) -> None: |
| 128 | from muse.cli.commands.verify_object import _iter_all_object_ids |
| 129 | from muse.core.object_store import objects_dir |
| 130 | repo = _make_repo(tmp_path) |
| 131 | (objects_dir(repo) / "sha256" / "abc").mkdir(parents=True, exist_ok=True) |
| 132 | assert _iter_all_object_ids(repo) == [] |
| 133 | |
| 134 | def test_returns_sha256_prefixed_ids(self, tmp_path: pathlib.Path) -> None: |
| 135 | from muse.cli.commands.verify_object import _iter_all_object_ids |
| 136 | repo = _make_repo(tmp_path) |
| 137 | _write_object(repo, b"prefix check") |
| 138 | ids = _iter_all_object_ids(repo) |
| 139 | assert all(oid.startswith("sha256:") for oid in ids) |
| 140 | |
| 141 | |
| 142 | # --------------------------------------------------------------------------- |
| 143 | # Unit — _verify_one |
| 144 | # --------------------------------------------------------------------------- |
| 145 | |
| 146 | |
| 147 | class TestVerifyOne: |
| 148 | def test_valid_object_ok(self, tmp_path: pathlib.Path) -> None: |
| 149 | from muse.cli.commands.verify_object import _verify_one |
| 150 | repo = _make_repo(tmp_path) |
| 151 | oid = _write_object(repo, b"hello world") |
| 152 | result = _verify_one(repo, oid) |
| 153 | assert result["ok"] is True |
| 154 | assert result["size_bytes"] == len(b"hello world") |
| 155 | assert result["error"] is None |
| 156 | |
| 157 | def test_ok_result_preserves_object_id(self, tmp_path: pathlib.Path) -> None: |
| 158 | from muse.cli.commands.verify_object import _verify_one |
| 159 | repo = _make_repo(tmp_path) |
| 160 | oid = _write_object(repo, b"id check") |
| 161 | result = _verify_one(repo, oid) |
| 162 | assert result["object_id"] == oid |
| 163 | |
| 164 | def test_error_is_none_when_ok(self, tmp_path: pathlib.Path) -> None: |
| 165 | from muse.cli.commands.verify_object import _verify_one |
| 166 | repo = _make_repo(tmp_path) |
| 167 | oid = _write_object(repo, b"clean") |
| 168 | result = _verify_one(repo, oid) |
| 169 | assert result["ok"] is True |
| 170 | assert result["error"] is None |
| 171 | |
| 172 | def test_size_counted_during_hash(self, tmp_path: pathlib.Path) -> None: |
| 173 | from muse.cli.commands.verify_object import _verify_one |
| 174 | repo = _make_repo(tmp_path) |
| 175 | content = b"x" * 12345 |
| 176 | oid = _write_object(repo, content) |
| 177 | result = _verify_one(repo, oid) |
| 178 | assert result["size_bytes"] == 12345 |
| 179 | |
| 180 | def test_zero_byte_object_ok(self, tmp_path: pathlib.Path) -> None: |
| 181 | from muse.cli.commands.verify_object import _verify_one |
| 182 | repo = _make_repo(tmp_path) |
| 183 | oid = _write_object(repo, b"") |
| 184 | result = _verify_one(repo, oid) |
| 185 | assert result["ok"] is True |
| 186 | assert result["size_bytes"] == 0 |
| 187 | |
| 188 | def test_missing_object_not_ok(self, tmp_path: pathlib.Path) -> None: |
| 189 | from muse.cli.commands.verify_object import _verify_one |
| 190 | repo = _make_repo(tmp_path) |
| 191 | result = _verify_one(repo, blob_id(b"nonexistent object")) |
| 192 | assert result["ok"] is False |
| 193 | assert "not found" in (result["error"] or "") |
| 194 | assert result["size_bytes"] is None |
| 195 | |
| 196 | def test_corrupt_object_mismatch(self, tmp_path: pathlib.Path) -> None: |
| 197 | from muse.cli.commands.verify_object import _verify_one |
| 198 | repo = _make_repo(tmp_path) |
| 199 | oid = _write_object(repo, b"original content") |
| 200 | _corrupt_object(repo, oid) |
| 201 | result = _verify_one(repo, oid) |
| 202 | assert result["ok"] is False |
| 203 | assert "mismatch" in (result["error"] or "") |
| 204 | |
| 205 | def test_corrupt_object_has_size_bytes(self, tmp_path: pathlib.Path) -> None: |
| 206 | """Even on hash mismatch, size_bytes is populated (bytes were read).""" |
| 207 | from muse.cli.commands.verify_object import _verify_one |
| 208 | repo = _make_repo(tmp_path) |
| 209 | oid = _write_object(repo, b"original content") |
| 210 | _corrupt_object(repo, oid) |
| 211 | result = _verify_one(repo, oid) |
| 212 | assert result["size_bytes"] is not None |
| 213 | assert result["size_bytes"] > 0 |
| 214 | |
| 215 | def test_truncated_object_mismatch(self, tmp_path: pathlib.Path) -> None: |
| 216 | from muse.cli.commands.verify_object import _verify_one |
| 217 | repo = _make_repo(tmp_path) |
| 218 | oid = _write_object(repo, b"original content that will be truncated") |
| 219 | _truncate_object(repo, oid, keep_bytes=4) |
| 220 | result = _verify_one(repo, oid) |
| 221 | assert result["ok"] is False |
| 222 | assert "mismatch" in (result["error"] or "") |
| 223 | |
| 224 | def test_empty_truncated_object_mismatch(self, tmp_path: pathlib.Path) -> None: |
| 225 | from muse.cli.commands.verify_object import _verify_one |
| 226 | repo = _make_repo(tmp_path) |
| 227 | oid = _write_object(repo, b"will be emptied") |
| 228 | _truncate_object(repo, oid, keep_bytes=0) |
| 229 | result = _verify_one(repo, oid) |
| 230 | assert result["ok"] is False |
| 231 | |
| 232 | def test_invalid_object_id_format(self, tmp_path: pathlib.Path) -> None: |
| 233 | from muse.cli.commands.verify_object import _verify_one |
| 234 | repo = _make_repo(tmp_path) |
| 235 | result = _verify_one(repo, "not-a-sha256") |
| 236 | assert result["ok"] is False |
| 237 | assert result["error"] is not None |
| 238 | |
| 239 | def test_invalid_object_id_never_raises(self, tmp_path: pathlib.Path) -> None: |
| 240 | from muse.cli.commands.verify_object import _verify_one |
| 241 | repo = _make_repo(tmp_path) |
| 242 | result = _verify_one(repo, "\x00" * 64) |
| 243 | assert isinstance(result, dict) |
| 244 | assert result["ok"] is False |
| 245 | |
| 246 | def test_io_error_returns_error_dict(self, tmp_path: pathlib.Path) -> None: |
| 247 | """OSError during read returns an error result, never raises.""" |
| 248 | from muse.cli.commands.verify_object import _verify_one |
| 249 | repo = _make_repo(tmp_path) |
| 250 | oid = _write_object(repo, b"to be made unreadable") |
| 251 | obj_file = object_path(repo, oid) |
| 252 | obj_file.chmod(0o000) |
| 253 | try: |
| 254 | result = _verify_one(repo, oid) |
| 255 | assert result["ok"] is False |
| 256 | assert result["error"] is not None |
| 257 | assert "I/O error" in (result["error"] or "") |
| 258 | finally: |
| 259 | obj_file.chmod(0o644) |
| 260 | |
| 261 | |
| 262 | class TestObjectResultSchema: |
| 263 | def test_fields(self) -> None: |
| 264 | from muse.cli.commands.verify_object import _ObjectResult |
| 265 | assert set(_ObjectResult.__annotations__) == {"object_id", "ok", "size_bytes", "error"} |
| 266 | |
| 267 | |
| 268 | class TestChunkConstant: |
| 269 | def test_chunk_is_power_of_two(self) -> None: |
| 270 | from muse.cli.commands.verify_object import _CHUNK |
| 271 | assert _CHUNK > 0 |
| 272 | assert (_CHUNK & (_CHUNK - 1)) == 0 |
| 273 | |
| 274 | |
| 275 | # --------------------------------------------------------------------------- |
| 276 | # Integration — JSON output |
| 277 | # --------------------------------------------------------------------------- |
| 278 | |
| 279 | |
| 280 | class TestJsonOutput: |
| 281 | def test_valid_object_all_ok(self, tmp_path: pathlib.Path) -> None: |
| 282 | repo = _make_repo(tmp_path) |
| 283 | oid = _write_object(repo, _FAKE_CONTENT) |
| 284 | result = _vo(repo, "--json", oid) |
| 285 | assert result.exit_code == 0 |
| 286 | data = json.loads(result.output) |
| 287 | assert data["all_ok"] is True |
| 288 | assert data["checked"] == 1 |
| 289 | assert data["failed"] == 0 |
| 290 | assert data["results"][0]["ok"] is True |
| 291 | assert data["results"][0]["size_bytes"] == len(_FAKE_CONTENT) |
| 292 | |
| 293 | def test_missing_object_fails(self, tmp_path: pathlib.Path) -> None: |
| 294 | repo = _make_repo(tmp_path) |
| 295 | result = _vo(repo, "--json", blob_id(b"nonexistent object")) |
| 296 | assert result.exit_code == ExitCode.USER_ERROR |
| 297 | data = json.loads(result.output) |
| 298 | assert data["all_ok"] is False |
| 299 | assert data["failed"] == 1 |
| 300 | |
| 301 | def test_corrupt_object_fails(self, tmp_path: pathlib.Path) -> None: |
| 302 | repo = _make_repo(tmp_path) |
| 303 | oid = _write_object(repo, b"good content") |
| 304 | _corrupt_object(repo, oid) |
| 305 | result = _vo(repo, "--json", oid) |
| 306 | assert result.exit_code == ExitCode.USER_ERROR |
| 307 | data = json.loads(result.output) |
| 308 | assert data["results"][0]["ok"] is False |
| 309 | assert "mismatch" in data["results"][0]["error"] |
| 310 | |
| 311 | def test_mixed_pass_fail(self, tmp_path: pathlib.Path) -> None: |
| 312 | repo = _make_repo(tmp_path) |
| 313 | good = _write_object(repo, b"good") |
| 314 | bad = blob_id(b"nonexistent object b") |
| 315 | result = _vo(repo, "--json", good, bad) |
| 316 | assert result.exit_code == ExitCode.USER_ERROR |
| 317 | data = json.loads(result.output) |
| 318 | assert data["checked"] == 2 |
| 319 | assert data["failed"] == 1 |
| 320 | |
| 321 | def test_json_shorthand(self, tmp_path: pathlib.Path) -> None: |
| 322 | repo = _make_repo(tmp_path) |
| 323 | oid = _write_object(repo, b"data") |
| 324 | result = _vo(repo, "--json", oid) |
| 325 | assert result.exit_code == 0 |
| 326 | assert "all_ok" in json.loads(result.output) |
| 327 | |
| 328 | def test_duration_ms_and_exit_code_present(self, tmp_path: pathlib.Path) -> None: |
| 329 | repo = _make_repo(tmp_path) |
| 330 | oid = _write_object(repo, _FAKE_CONTENT) |
| 331 | data = json.loads(_vo(repo, "--json", oid).output) |
| 332 | assert "duration_ms" in data |
| 333 | assert isinstance(data["duration_ms"], float) |
| 334 | assert data["duration_ms"] >= 0.0 |
| 335 | assert data["exit_code"] == 0 |
| 336 | |
| 337 | def test_exit_code_nonzero_on_failure(self, tmp_path: pathlib.Path) -> None: |
| 338 | repo = _make_repo(tmp_path) |
| 339 | data = json.loads(_vo(repo, "--json", blob_id(b"nonexistent object")).output) |
| 340 | assert data["exit_code"] != 0 |
| 341 | assert data["duration_ms"] >= 0.0 |
| 342 | |
| 343 | def test_results_order_matches_input(self, tmp_path: pathlib.Path) -> None: |
| 344 | """Results must appear in the same order as the positional arguments.""" |
| 345 | repo = _make_repo(tmp_path) |
| 346 | oids = [_write_object(repo, f"ordered {i}".encode()) for i in range(5)] |
| 347 | data = json.loads(_vo(repo, "--json", *oids).output) |
| 348 | returned = [r["object_id"] for r in data["results"]] |
| 349 | assert returned == oids |
| 350 | |
| 351 | def test_checked_equals_len_results(self, tmp_path: pathlib.Path) -> None: |
| 352 | repo = _make_repo(tmp_path) |
| 353 | oids = [_write_object(repo, f"cnt {i}".encode()) for i in range(3)] |
| 354 | data = json.loads(_vo(repo, "--json", *oids).output) |
| 355 | assert data["checked"] == len(data["results"]) |
| 356 | |
| 357 | def test_failed_count_matches_failed_results(self, tmp_path: pathlib.Path) -> None: |
| 358 | repo = _make_repo(tmp_path) |
| 359 | good = _write_object(repo, b"ok") |
| 360 | bad1 = blob_id(b"missing a") |
| 361 | bad2 = blob_id(b"missing b") |
| 362 | data = json.loads(_vo(repo, "--json", good, bad1, bad2).output) |
| 363 | assert data["failed"] == sum(1 for r in data["results"] if not r["ok"]) |
| 364 | assert data["failed"] == 2 |
| 365 | |
| 366 | def test_error_null_when_ok(self, tmp_path: pathlib.Path) -> None: |
| 367 | repo = _make_repo(tmp_path) |
| 368 | oid = _write_object(repo, b"clean object") |
| 369 | data = json.loads(_vo(repo, "--json", oid).output) |
| 370 | assert data["results"][0]["error"] is None |
| 371 | |
| 372 | def test_duplicate_id_verified_twice(self, tmp_path: pathlib.Path) -> None: |
| 373 | """Passing the same OID twice verifies it twice — no implicit dedup.""" |
| 374 | repo = _make_repo(tmp_path) |
| 375 | oid = _write_object(repo, b"dedup test") |
| 376 | data = json.loads(_vo(repo, "--json", oid, oid).output) |
| 377 | assert data["checked"] == 2 |
| 378 | assert data["all_ok"] is True |
| 379 | |
| 380 | |
| 381 | # --------------------------------------------------------------------------- |
| 382 | # Integration — text output |
| 383 | # --------------------------------------------------------------------------- |
| 384 | |
| 385 | |
| 386 | class TestTextOutput: |
| 387 | def test_ok_label_and_size(self, tmp_path: pathlib.Path) -> None: |
| 388 | repo = _make_repo(tmp_path) |
| 389 | oid = _write_object(repo, _FAKE_CONTENT) |
| 390 | result = _vo(repo, oid) |
| 391 | assert result.exit_code == 0 |
| 392 | assert "OK" in result.output |
| 393 | assert str(len(_FAKE_CONTENT)) in result.output |
| 394 | |
| 395 | def test_fail_label_on_missing(self, tmp_path: pathlib.Path) -> None: |
| 396 | repo = _make_repo(tmp_path) |
| 397 | result = _vo(repo, blob_id(b"nonexistent object c")) |
| 398 | assert "FAIL" in result.output |
| 399 | assert result.exit_code == ExitCode.USER_ERROR |
| 400 | |
| 401 | def test_summary_line_present(self, tmp_path: pathlib.Path) -> None: |
| 402 | """Text mode always ends with a Checked/Failed summary line.""" |
| 403 | repo = _make_repo(tmp_path) |
| 404 | oid = _write_object(repo, b"summary test") |
| 405 | result = _vo(repo, oid) |
| 406 | assert "Checked:" in result.output |
| 407 | assert "Failed:" in result.output |
| 408 | |
| 409 | def test_summary_reflects_counts(self, tmp_path: pathlib.Path) -> None: |
| 410 | repo = _make_repo(tmp_path) |
| 411 | good = _write_object(repo, b"good") |
| 412 | bad = blob_id(b"missing for summary") |
| 413 | result = _vo(repo, good, bad) |
| 414 | assert "Checked: 2" in result.output |
| 415 | assert "Failed: 1" in result.output |
| 416 | |
| 417 | def test_summary_all_pass(self, tmp_path: pathlib.Path) -> None: |
| 418 | repo = _make_repo(tmp_path) |
| 419 | for i in range(3): |
| 420 | _write_object(repo, f"text pass {i}".encode()) |
| 421 | result = _vo(repo, "--all") |
| 422 | assert "Checked: 3" in result.output |
| 423 | assert "Failed: 0" in result.output |
| 424 | |
| 425 | |
| 426 | # --------------------------------------------------------------------------- |
| 427 | # Integration — --quiet mode |
| 428 | # --------------------------------------------------------------------------- |
| 429 | |
| 430 | |
| 431 | class TestQuietMode: |
| 432 | def test_all_ok_exits_0(self, tmp_path: pathlib.Path) -> None: |
| 433 | repo = _make_repo(tmp_path) |
| 434 | oid = _write_object(repo, _FAKE_CONTENT) |
| 435 | result = _vo(repo, "--quiet", oid) |
| 436 | assert result.exit_code == 0 |
| 437 | assert result.output.strip() == "" |
| 438 | |
| 439 | def test_failure_exits_1(self, tmp_path: pathlib.Path) -> None: |
| 440 | repo = _make_repo(tmp_path) |
| 441 | result = _vo(repo, "--quiet", blob_id(b"nonexistent object d")) |
| 442 | assert result.exit_code == ExitCode.USER_ERROR |
| 443 | assert result.output.strip() == "" |
| 444 | |
| 445 | def test_quiet_with_text_format_no_output(self, tmp_path: pathlib.Path) -> None: |
| 446 | """--quiet suppresses output regardless of --format.""" |
| 447 | repo = _make_repo(tmp_path) |
| 448 | oid = _write_object(repo, b"quiet text") |
| 449 | result = _vo(repo, "--quiet", oid) |
| 450 | assert result.output.strip() == "" |
| 451 | |
| 452 | |
| 453 | # --------------------------------------------------------------------------- |
| 454 | # Integration — --all (fsck mode) |
| 455 | # --------------------------------------------------------------------------- |
| 456 | |
| 457 | |
| 458 | class TestAllMode: |
| 459 | def test_empty_store_all_ok(self, tmp_path: pathlib.Path) -> None: |
| 460 | repo = _make_repo(tmp_path) |
| 461 | data = json.loads(_vo(repo, "--all", "--json").output) |
| 462 | assert data["all_ok"] is True |
| 463 | assert data["checked"] == 0 |
| 464 | |
| 465 | def test_all_finds_written_objects(self, tmp_path: pathlib.Path) -> None: |
| 466 | repo = _make_repo(tmp_path) |
| 467 | for i in range(5): |
| 468 | _write_object(repo, f"content {i}".encode()) |
| 469 | data = json.loads(_vo(repo, "--all", "--json").output) |
| 470 | assert data["checked"] == 5 |
| 471 | assert data["all_ok"] is True |
| 472 | |
| 473 | def test_all_detects_corruption(self, tmp_path: pathlib.Path) -> None: |
| 474 | repo = _make_repo(tmp_path) |
| 475 | oid = _write_object(repo, b"good data") |
| 476 | _corrupt_object(repo, oid) |
| 477 | data = json.loads(_vo(repo, "--all", "--json").output) |
| 478 | assert data["failed"] == 1 |
| 479 | |
| 480 | def test_all_plus_explicit_ids_rejected(self, tmp_path: pathlib.Path) -> None: |
| 481 | repo = _make_repo(tmp_path) |
| 482 | result = _vo(repo, "--all", blob_id(b"explicit id arg")) |
| 483 | assert result.exit_code == ExitCode.USER_ERROR |
| 484 | assert result.stdout_bytes == b"" |
| 485 | |
| 486 | def test_all_plus_stdin_rejected(self, tmp_path: pathlib.Path) -> None: |
| 487 | """--all + --stdin is rejected for consistency with --all + positional.""" |
| 488 | repo = _make_repo(tmp_path) |
| 489 | oid = _write_object(repo, b"stdin data") |
| 490 | result = _vo(repo, "--all", "--stdin", stdin=f"{oid}\n") |
| 491 | assert result.exit_code == ExitCode.USER_ERROR |
| 492 | assert result.stdout_bytes == b"" |
| 493 | |
| 494 | def test_all_quiet(self, tmp_path: pathlib.Path) -> None: |
| 495 | repo = _make_repo(tmp_path) |
| 496 | _write_object(repo, b"content") |
| 497 | result = _vo(repo, "--all", "--quiet") |
| 498 | assert result.exit_code == 0 |
| 499 | assert result.output.strip() == "" |
| 500 | |
| 501 | |
| 502 | # --------------------------------------------------------------------------- |
| 503 | # Integration — --stdin |
| 504 | # --------------------------------------------------------------------------- |
| 505 | |
| 506 | |
| 507 | class TestStdinMode: |
| 508 | def test_reads_ids_from_stdin(self, tmp_path: pathlib.Path) -> None: |
| 509 | repo = _make_repo(tmp_path) |
| 510 | oid = _write_object(repo, _FAKE_CONTENT) |
| 511 | data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"{oid}\n").output) |
| 512 | assert data["checked"] == 1 |
| 513 | assert data["all_ok"] is True |
| 514 | |
| 515 | def test_comments_and_blank_lines_skipped(self, tmp_path: pathlib.Path) -> None: |
| 516 | repo = _make_repo(tmp_path) |
| 517 | oid = _write_object(repo, _FAKE_CONTENT) |
| 518 | data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"\n# comment\n{oid}\n\n").output) |
| 519 | assert data["checked"] == 1 |
| 520 | |
| 521 | def test_stdin_combines_with_positional(self, tmp_path: pathlib.Path) -> None: |
| 522 | repo = _make_repo(tmp_path) |
| 523 | oid1 = _write_object(repo, b"one") |
| 524 | oid2 = _write_object(repo, b"two") |
| 525 | data = json.loads(_vo(repo, "--stdin", "--json", oid1, stdin=f"{oid2}\n").output) |
| 526 | assert data["checked"] == 2 |
| 527 | |
| 528 | def test_empty_stdin_no_explicit_errors(self, tmp_path: pathlib.Path) -> None: |
| 529 | repo = _make_repo(tmp_path) |
| 530 | result = _vo(repo, "--stdin", "--json", stdin="") |
| 531 | assert result.exit_code == ExitCode.USER_ERROR |
| 532 | |
| 533 | def test_crlf_line_endings_stripped(self, tmp_path: pathlib.Path) -> None: |
| 534 | """Windows CRLF line endings must not corrupt the object ID.""" |
| 535 | repo = _make_repo(tmp_path) |
| 536 | oid = _write_object(repo, b"crlf test") |
| 537 | data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"{oid}\r\n").output) |
| 538 | assert data["all_ok"] is True |
| 539 | assert data["results"][0]["object_id"] == oid |
| 540 | |
| 541 | |
| 542 | # --------------------------------------------------------------------------- |
| 543 | # Integration — --fail-fast |
| 544 | # --------------------------------------------------------------------------- |
| 545 | |
| 546 | |
| 547 | class TestFailFast: |
| 548 | def test_stops_after_first_failure(self, tmp_path: pathlib.Path) -> None: |
| 549 | """With --fail-fast, only the first failing result appears in output.""" |
| 550 | repo = _make_repo(tmp_path) |
| 551 | bad1 = blob_id(b"missing ff a") |
| 552 | bad2 = blob_id(b"missing ff b") |
| 553 | good = _write_object(repo, b"good after bad") |
| 554 | # bad1, bad2, good — should stop after bad1 |
| 555 | data = json.loads(_vo(repo, "--fail-fast", "--json", bad1, bad2, good).output) |
| 556 | assert data["checked"] == 1 |
| 557 | assert data["failed"] == 1 |
| 558 | assert data["all_ok"] is False |
| 559 | |
| 560 | def test_no_effect_when_all_pass(self, tmp_path: pathlib.Path) -> None: |
| 561 | """--fail-fast is a no-op when every object passes.""" |
| 562 | repo = _make_repo(tmp_path) |
| 563 | oids = [_write_object(repo, f"ff pass {i}".encode()) for i in range(5)] |
| 564 | data = json.loads(_vo(repo, "--fail-fast", "--json", *oids).output) |
| 565 | assert data["checked"] == 5 |
| 566 | assert data["all_ok"] is True |
| 567 | |
| 568 | def test_fail_fast_exits_nonzero(self, tmp_path: pathlib.Path) -> None: |
| 569 | repo = _make_repo(tmp_path) |
| 570 | result = _vo(repo, "--fail-fast", "--json", blob_id(b"missing ff c")) |
| 571 | assert result.exit_code == ExitCode.USER_ERROR |
| 572 | |
| 573 | def test_fail_fast_with_all(self, tmp_path: pathlib.Path) -> None: |
| 574 | """--fail-fast + --all stops the scan on the first corrupt object.""" |
| 575 | repo = _make_repo(tmp_path) |
| 576 | for i in range(10): |
| 577 | _write_object(repo, f"store {i}".encode()) |
| 578 | # Corrupt one object somewhere in the store. |
| 579 | from muse.cli.commands.verify_object import _iter_all_object_ids |
| 580 | all_ids = _iter_all_object_ids(repo) |
| 581 | _corrupt_object(repo, all_ids[0]) |
| 582 | data = json.loads(_vo(repo, "--all", "--fail-fast", "--json").output) |
| 583 | # Should have stopped early — checked < 10. |
| 584 | assert data["checked"] < len(all_ids) |
| 585 | assert data["failed"] == 1 |
| 586 | |
| 587 | def test_fail_fast_duration_ms_present(self, tmp_path: pathlib.Path) -> None: |
| 588 | repo = _make_repo(tmp_path) |
| 589 | data = json.loads(_vo(repo, "--fail-fast", "--json", blob_id(b"missing ff d")).output) |
| 590 | assert "duration_ms" in data |
| 591 | assert data["duration_ms"] >= 0.0 |
| 592 | |
| 593 | |
| 594 | # --------------------------------------------------------------------------- |
| 595 | # Security |
| 596 | # --------------------------------------------------------------------------- |
| 597 | |
| 598 | |
| 599 | class TestSecurity: |
| 600 | def test_format_error_goes_to_stderr(self, tmp_path: pathlib.Path) -> None: |
| 601 | repo = _make_repo(tmp_path) |
| 602 | result = _vo(repo, fake_id("a")) |
| 603 | assert result.exit_code == ExitCode.USER_ERROR |
| 604 | assert "Traceback" not in result.output |
| 605 | |
| 606 | def test_no_traceback_on_bad_format(self, tmp_path: pathlib.Path) -> None: |
| 607 | repo = _make_repo(tmp_path) |
| 608 | result = _vo(repo, fake_id("b")) |
| 609 | assert "Traceback" not in result.output |
| 610 | |
| 611 | def test_ansi_in_error_message_stripped_text(self, tmp_path: pathlib.Path) -> None: |
| 612 | repo = _make_repo(tmp_path) |
| 613 | result = _vo(repo, blob_id(b"nonexistent")) |
| 614 | assert "\x1b" not in result.output |
| 615 | |
| 616 | def test_invalid_id_returns_error_not_crash(self, tmp_path: pathlib.Path) -> None: |
| 617 | repo = _make_repo(tmp_path) |
| 618 | result = _vo(repo, "not-a-sha256") |
| 619 | assert result.exit_code == ExitCode.USER_ERROR |
| 620 | assert "Traceback" not in result.output |
| 621 | |
| 622 | def test_no_ids_errors_to_stderr(self, tmp_path: pathlib.Path) -> None: |
| 623 | repo = _make_repo(tmp_path) |
| 624 | result = _vo(repo) |
| 625 | assert result.exit_code == ExitCode.USER_ERROR |
| 626 | assert "error" in result.stderr.lower() |
| 627 | |
| 628 | def test_path_traversal_in_object_id_rejected(self, tmp_path: pathlib.Path) -> None: |
| 629 | """Path-traversal-looking IDs must be rejected by validation before any I/O.""" |
| 630 | repo = _make_repo(tmp_path) |
| 631 | traversal = f"sha256:../../etc/passwd{'a' * 50}" |
| 632 | result = _vo(repo, "--json", traversal) |
| 633 | # Validation must reject it — never attempts to open a path. |
| 634 | assert result.exit_code == ExitCode.USER_ERROR |
| 635 | data = json.loads(result.output) |
| 636 | # The error message explains the format violation, not an fs operation. |
| 637 | assert data["results"][0]["ok"] is False |
| 638 | assert "expected" in data["results"][0]["error"] |
| 639 | |
| 640 | def test_unicode_in_object_id_rejected(self, tmp_path: pathlib.Path) -> None: |
| 641 | repo = _make_repo(tmp_path) |
| 642 | result = _vo(repo, f"sha256:café{'a' * 60}") |
| 643 | assert result.exit_code == ExitCode.USER_ERROR |
| 644 | |
| 645 | def test_symlink_shard_directory_skipped(self, tmp_path: pathlib.Path) -> None: |
| 646 | """A symlinked shard directory must not be followed during --all.""" |
| 647 | from muse.cli.commands.verify_object import _iter_all_object_ids |
| 648 | from muse.core.object_store import objects_dir |
| 649 | repo = _make_repo(tmp_path) |
| 650 | # Write a real object so the algo dir exists. |
| 651 | _write_object(repo, b"real") |
| 652 | algo_dir = objects_dir(repo) / "sha256" |
| 653 | # Add a symlink that points outside the repo. |
| 654 | sym_shard = algo_dir / "ff" |
| 655 | sym_shard.symlink_to(tmp_path) |
| 656 | ids = _iter_all_object_ids(repo) |
| 657 | # The symlinked shard's entries must not appear. |
| 658 | assert all(oid.startswith("sha256:") for oid in ids) |
| 659 | |
| 660 | def test_crlf_injection_in_stdin_does_not_corrupt_id(self, tmp_path: pathlib.Path) -> None: |
| 661 | """A \r embedded in a stdin line must not be part of the stored OID.""" |
| 662 | repo = _make_repo(tmp_path) |
| 663 | oid = _write_object(repo, b"crlf injection") |
| 664 | # Feed oid with embedded \r before the newline. |
| 665 | data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"{oid}\r\n").output) |
| 666 | assert data["all_ok"] is True |
| 667 | |
| 668 | def test_all_error_goes_to_stderr_not_stdout(self, tmp_path: pathlib.Path) -> None: |
| 669 | """Argument errors for --all always land on stderr, stdout stays empty.""" |
| 670 | repo = _make_repo(tmp_path) |
| 671 | result = _vo(repo, "--all", "--stdin", stdin="") |
| 672 | assert result.stdout_bytes == b"" |
| 673 | assert len(result.stderr) > 0 |
| 674 | |
| 675 | |
| 676 | # --------------------------------------------------------------------------- |
| 677 | # Data integrity |
| 678 | # --------------------------------------------------------------------------- |
| 679 | |
| 680 | |
| 681 | class TestDataIntegrity: |
| 682 | def test_zero_byte_blob_round_trips(self, tmp_path: pathlib.Path) -> None: |
| 683 | """A zero-byte object has a well-defined SHA-256 and must verify clean.""" |
| 684 | repo = _make_repo(tmp_path) |
| 685 | oid = _write_object(repo, b"") |
| 686 | data = json.loads(_vo(repo, "--json", oid).output) |
| 687 | assert data["all_ok"] is True |
| 688 | assert data["results"][0]["size_bytes"] == 0 |
| 689 | |
| 690 | def test_truncated_file_is_hash_mismatch(self, tmp_path: pathlib.Path) -> None: |
| 691 | repo = _make_repo(tmp_path) |
| 692 | oid = _write_object(repo, b"file that will be truncated") |
| 693 | _truncate_object(repo, oid, keep_bytes=3) |
| 694 | data = json.loads(_vo(repo, "--json", oid).output) |
| 695 | assert data["results"][0]["ok"] is False |
| 696 | assert "mismatch" in data["results"][0]["error"] |
| 697 | |
| 698 | def test_completely_emptied_file_is_hash_mismatch(self, tmp_path: pathlib.Path) -> None: |
| 699 | repo = _make_repo(tmp_path) |
| 700 | oid = _write_object(repo, b"non-empty content") |
| 701 | _truncate_object(repo, oid, keep_bytes=0) |
| 702 | data = json.loads(_vo(repo, "--json", oid).output) |
| 703 | assert data["results"][0]["ok"] is False |
| 704 | |
| 705 | def test_large_object_streams_without_loading_all(self, tmp_path: pathlib.Path) -> None: |
| 706 | """A 4 MiB object must verify correctly via streaming (no heap spike).""" |
| 707 | repo = _make_repo(tmp_path) |
| 708 | content = b"a" * (4 * 1024 * 1024) |
| 709 | oid = _write_object(repo, content) |
| 710 | data = json.loads(_vo(repo, "--json", oid).output) |
| 711 | assert data["all_ok"] is True |
| 712 | assert data["results"][0]["size_bytes"] == len(content) |
| 713 | |
| 714 | def test_multiple_corrupt_objects_all_reported(self, tmp_path: pathlib.Path) -> None: |
| 715 | """All corruptions are reported — not just the first one.""" |
| 716 | repo = _make_repo(tmp_path) |
| 717 | oids = [_write_object(repo, f"corrupt me {i}".encode()) for i in range(3)] |
| 718 | for oid in oids: |
| 719 | _corrupt_object(repo, oid) |
| 720 | data = json.loads(_vo(repo, "--json", *oids).output) |
| 721 | assert data["failed"] == 3 |
| 722 | assert data["all_ok"] is False |
| 723 | |
| 724 | |
| 725 | # --------------------------------------------------------------------------- |
| 726 | # Stress |
| 727 | # --------------------------------------------------------------------------- |
| 728 | |
| 729 | |
| 730 | class TestStress: |
| 731 | def test_100_object_store_all_pass(self, tmp_path: pathlib.Path) -> None: |
| 732 | repo = _make_repo(tmp_path) |
| 733 | for i in range(100): |
| 734 | _write_object(repo, f"stress content {i}".encode()) |
| 735 | data = json.loads(_vo(repo, "--all", "--json").output) |
| 736 | assert data["checked"] == 100 |
| 737 | assert data["all_ok"] is True |
| 738 | |
| 739 | def test_1000_object_store_all_pass(self, tmp_path: pathlib.Path) -> None: |
| 740 | repo = _make_repo(tmp_path) |
| 741 | for i in range(1000): |
| 742 | _write_object(repo, f"large stress {i}".encode()) |
| 743 | data = json.loads(_vo(repo, "--all", "--json").output) |
| 744 | assert data["checked"] == 1000 |
| 745 | assert data["all_ok"] is True |
| 746 | |
| 747 | def test_200_sequential_verifies(self, tmp_path: pathlib.Path) -> None: |
| 748 | repo = _make_repo(tmp_path) |
| 749 | oid = _write_object(repo, _FAKE_CONTENT) |
| 750 | for i in range(200): |
| 751 | result = _vo(repo, oid) |
| 752 | assert result.exit_code == 0, f"failed at iteration {i}" |
| 753 | |
| 754 | def test_stdin_200_ids(self, tmp_path: pathlib.Path) -> None: |
| 755 | repo = _make_repo(tmp_path) |
| 756 | oids = [_write_object(repo, f"content_{i}".encode()) for i in range(200)] |
| 757 | data = json.loads(_vo(repo, "--stdin", "--json", stdin=f"{'\n'.join(oids)}\n").output) |
| 758 | assert data["checked"] == 200 |
| 759 | assert data["all_ok"] is True |
| 760 | |
| 761 | def test_duration_ms_bounded_for_small_op(self, tmp_path: pathlib.Path) -> None: |
| 762 | """Verifying one small object should complete in well under 5 seconds.""" |
| 763 | repo = _make_repo(tmp_path) |
| 764 | oid = _write_object(repo, b"small") |
| 765 | data = json.loads(_vo(repo, "--json", oid).output) |
| 766 | assert data["duration_ms"] < 5_000 |
| 767 | |
| 768 | |
| 769 | # --------------------------------------------------------------------------- |
| 770 | # Flag registration |
| 771 | # --------------------------------------------------------------------------- |
| 772 | |
| 773 | |
| 774 | class TestRegisterFlags: |
| 775 | def _parse(self, *args: str) -> "argparse.Namespace": |
| 776 | import argparse |
| 777 | from muse.cli.commands.verify_object import register |
| 778 | p = argparse.ArgumentParser() |
| 779 | sub = p.add_subparsers() |
| 780 | register(sub) |
| 781 | return p.parse_args(["verify-object", *args]) |
| 782 | |
| 783 | def test_default_json_out_is_false(self) -> None: |
| 784 | ns = self._parse(fake_id("a")) |
| 785 | assert ns.json_out is False |
| 786 | |
| 787 | def test_json_flag_sets_json_out(self) -> None: |
| 788 | ns = self._parse("--json", fake_id("a")) |
| 789 | assert ns.json_out is True |
| 790 | |
| 791 | def test_j_shorthand_sets_json_out(self) -> None: |
| 792 | ns = self._parse("-j", fake_id("a")) |
| 793 | assert ns.json_out is True |
File History
4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2
fix: remove commit_exists filter from have anchors — server…
Sonnet 4.6
patch
21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e
fix: rename objects→blobs in push client and all stale test…
Sonnet 4.6
patch
22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a
fix: repair four test failures from post-migration audit
Sonnet 4.6
patch
28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf
fix: unified object store migration — idempotent writes, JS…
Sonnet 4.6
minor
⚠
29 days ago