test_security_msgpack_hardening.py
python
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2
fix: remove commit_exists filter from have anchors — server…
Sonnet 4.6
patch
20 days ago
| 1 | """Phase 2.4 — Malicious msgpack payload hardening tests. |
| 2 | |
| 3 | Attack surface covered |
| 4 | ---------------------- |
| 5 | * ``safe_unpackb`` — the new canonical deserialization primitive: |
| 6 | - Size-bomb (payload larger than max_bytes cap) |
| 7 | - Billion-laughs via enormous maps / arrays |
| 8 | - String length bomb |
| 9 | - Binary blob injection (allow_binary=False default) |
| 10 | - ``strict_map_key=False`` passthrough for legacy staging index |
| 11 | - Clean inputs still deserialise correctly |
| 12 | - 10 000-entry random fuzz with no unhandled exceptions |
| 13 | - Concurrent deserialization stress (50 threads) |
| 14 | |
| 15 | * ``read_msgpack_file`` — file-based wrapper: |
| 16 | - Stat check fires before read_bytes (no OOM on 4 GiB file placeholder) |
| 17 | - Per-value limits enforced after stat |
| 18 | |
| 19 | * ``MAX_PACK_MSGPACK_BYTES`` — new pack/mpack constant: |
| 20 | - Exported and larger than ``MAX_MSGPACK_BYTES`` |
| 21 | - Verified against the 512 MiB specification |
| 22 | |
| 23 | * Callsite hardening (end-to-end via CliRunner / direct call): |
| 24 | - ``mpack._load_bundle`` rejects oversized mpack files |
| 25 | - ``unpack_objects`` stdin rejects size-bomb payloads |
| 26 | - ``verify_pack`` stdin rejects size-bomb payloads |
| 27 | - ``symbol_cache.SymbolCache.load`` rejects oversized cache files |
| 28 | - ``test_history.load_history`` rejects oversized history files |
| 29 | - ``transport._decode`` enforces per-value limits on server responses |
| 30 | - ``_invariants._FileCache.load`` rejects oversized cache files |
| 31 | - ``stage.read_stage`` rejects oversized staging index files |
| 32 | |
| 33 | * TypeGuard narrowing (``_is_commit_dict``, ``_is_snapshot_dict``) — |
| 34 | non-dict entries in a wire mpack are silently dropped, not propagated. |
| 35 | """ |
| 36 | from __future__ import annotations |
| 37 | |
| 38 | import os |
| 39 | import pathlib |
| 40 | import random |
| 41 | import struct |
| 42 | import tempfile |
| 43 | import threading |
| 44 | import time |
| 45 | from unittest.mock import MagicMock, patch |
| 46 | |
| 47 | import msgpack |
| 48 | import pytest |
| 49 | |
| 50 | from muse.core.io import ( |
| 51 | MAX_MSGPACK_BYTES, |
| 52 | MAX_PACK_MSGPACK_BYTES, |
| 53 | read_msgpack_file, |
| 54 | safe_unpackb, |
| 55 | ) |
| 56 | from muse.core.types import MsgpackValue |
| 57 | from muse.core.paths import muse_dir |
| 58 | |
| 59 | |
| 60 | # --------------------------------------------------------------------------- |
| 61 | # Helpers |
| 62 | # --------------------------------------------------------------------------- |
| 63 | |
| 64 | def _pack(obj: MsgpackValue) -> bytes: |
| 65 | raw = msgpack.packb(obj, use_bin_type=True) |
| 66 | assert isinstance(raw, bytes) |
| 67 | return raw |
| 68 | |
| 69 | |
| 70 | def _nested_map(depth: int) -> MsgpackDict: |
| 71 | """Build a dict nested *depth* levels deep.""" |
| 72 | result: MsgpackDict = {"x": None} |
| 73 | for _ in range(depth - 1): |
| 74 | result = {"x": result} |
| 75 | return result |
| 76 | |
| 77 | |
| 78 | def _nested_list(depth: int) -> list[MsgpackValue]: |
| 79 | """Build a list nested *depth* levels deep.""" |
| 80 | result: list[MsgpackValue] = [None] |
| 81 | for _ in range(depth - 1): |
| 82 | result = [result] |
| 83 | return result |
| 84 | |
| 85 | |
| 86 | # --------------------------------------------------------------------------- |
| 87 | # 1. safe_unpackb — unit tests |
| 88 | # --------------------------------------------------------------------------- |
| 89 | |
| 90 | class TestSafeUnpackbSizeBomb: |
| 91 | """safe_unpackb raises ValueError before parsing when len(raw) > max_bytes.""" |
| 92 | |
| 93 | def test_exact_limit_is_accepted(self) -> None: |
| 94 | payload = _pack("x") |
| 95 | assert safe_unpackb(payload, max_bytes=len(payload)) == "x" |
| 96 | |
| 97 | def test_one_byte_over_raises(self) -> None: |
| 98 | payload = _pack("x") |
| 99 | with pytest.raises(ValueError, match="safety cap"): |
| 100 | safe_unpackb(payload, max_bytes=len(payload) - 1) |
| 101 | |
| 102 | def test_default_limit_is_max_msgpack_bytes(self) -> None: |
| 103 | """Payloads at exactly MAX_MSGPACK_BYTES are accepted.""" |
| 104 | tiny = _pack({"k": "v"}) |
| 105 | result = safe_unpackb(tiny) |
| 106 | assert result == {"k": "v"} |
| 107 | |
| 108 | def test_size_error_includes_context_label(self) -> None: |
| 109 | payload = _pack("big") |
| 110 | with pytest.raises(ValueError, match="stdin"): |
| 111 | safe_unpackb(payload, context="stdin", max_bytes=0) |
| 112 | |
| 113 | def test_empty_bytes_accepted(self) -> None: |
| 114 | """Empty msgpack (nil) is valid and accepted.""" |
| 115 | nil_bytes = _pack(None) |
| 116 | result = safe_unpackb(nil_bytes) |
| 117 | assert result is None |
| 118 | |
| 119 | |
| 120 | class TestSafeUnpackbPerValueLimits: |
| 121 | """Per-value limits block billion-laughs payloads.""" |
| 122 | |
| 123 | def test_string_over_1_mib_raises(self) -> None: |
| 124 | big_str = "A" * (1_048_577) # 1 MiB + 1 byte |
| 125 | payload = _pack(big_str) |
| 126 | with pytest.raises(Exception): |
| 127 | safe_unpackb(payload, max_bytes=len(payload) + 100) |
| 128 | |
| 129 | def test_string_exactly_at_1_mib_accepted(self) -> None: |
| 130 | ok_str = "A" * 1_048_576 |
| 131 | payload = _pack(ok_str) |
| 132 | result = safe_unpackb(payload, max_bytes=len(payload) + 100) |
| 133 | assert result == ok_str |
| 134 | |
| 135 | def test_huge_map_rejected(self) -> None: |
| 136 | """A map with 1_000_001 keys exceeds _MSGPACK_MAX_MAP_LEN.""" |
| 137 | # Build a msgpack map header with count > limit directly to avoid |
| 138 | # allocating 1M Python strings in the test process. |
| 139 | count = 1_000_001 |
| 140 | # msgpack map32: 0xdf + uint32 BE count |
| 141 | raw = struct.pack(">BI", 0xDF, count) + b"\xa1x\xa1y" * count |
| 142 | with pytest.raises(Exception): |
| 143 | safe_unpackb(raw, max_bytes=len(raw) + 1000) |
| 144 | |
| 145 | def test_huge_array_rejected(self) -> None: |
| 146 | """An array with 1_000_001 entries exceeds _MSGPACK_MAX_ARRAY_LEN.""" |
| 147 | count = 1_000_001 |
| 148 | # msgpack array32: 0xdd + uint32 BE count |
| 149 | raw = struct.pack(">BI", 0xDD, count) + b"\xc0" * count |
| 150 | with pytest.raises(Exception): |
| 151 | safe_unpackb(raw, max_bytes=len(raw) + 1000) |
| 152 | |
| 153 | def test_binary_blob_rejected_by_default(self) -> None: |
| 154 | """allow_binary=False (default) rejects msgpack binary blobs.""" |
| 155 | payload = _pack(b"\x00\xff" * 10) |
| 156 | with pytest.raises(Exception): |
| 157 | safe_unpackb(payload, max_bytes=len(payload) + 100) |
| 158 | |
| 159 | def test_binary_blob_accepted_with_allow_binary(self) -> None: |
| 160 | """allow_binary=True permits binary blobs (pack/mpack payloads).""" |
| 161 | blob = b"\xde\xad\xbe\xef" * 4 |
| 162 | payload = _pack(blob) |
| 163 | result = safe_unpackb(payload, max_bytes=len(payload) + 100, allow_binary=True) |
| 164 | assert result == blob |
| 165 | |
| 166 | def test_strict_map_key_true_rejects_integer_keys(self) -> None: |
| 167 | """By default, integer map keys are rejected.""" |
| 168 | # Hand-craft msgpack with integer key |
| 169 | payload = struct.pack(">BB", 0x81, 0x01) + b"\xa1v" # {1: "v"} |
| 170 | with pytest.raises(Exception): |
| 171 | safe_unpackb(payload, max_bytes=100) |
| 172 | |
| 173 | def test_strict_map_key_false_allows_integer_keys(self) -> None: |
| 174 | """strict_map_key=False permits legacy integer keys (e.g. {1: 'v'}).""" |
| 175 | payload = struct.pack(">BB", 0x81, 0x01) + b"\xa1v" # {1: "v"} |
| 176 | result = safe_unpackb(payload, max_bytes=100, strict_map_key=False) |
| 177 | # Returned as a dict with one entry whose value is "v" |
| 178 | assert isinstance(result, dict) and list(result.values()) == ["v"] |
| 179 | |
| 180 | def test_invalid_msgpack_raises(self) -> None: |
| 181 | with pytest.raises(Exception): |
| 182 | safe_unpackb(b"\xff\xfe\xfd\xfc", max_bytes=100) |
| 183 | |
| 184 | def test_clean_dict_roundtrip(self) -> None: |
| 185 | original: MsgpackDict = {"key": "value", "n": 42, "flag": True} |
| 186 | result = safe_unpackb(_pack(original)) |
| 187 | assert result == original |
| 188 | |
| 189 | def test_clean_list_roundtrip(self) -> None: |
| 190 | original: list[MsgpackValue] = ["a", 1, None, True] |
| 191 | result = safe_unpackb(_pack(original)) |
| 192 | assert result == original |
| 193 | |
| 194 | |
| 195 | class TestSafeUnpackbNestingBomb: |
| 196 | """Deeply nested structures — should raise or return, never hang.""" |
| 197 | |
| 198 | def test_500_nested_dicts_terminates_quickly(self) -> None: |
| 199 | """500 nested dicts must terminate (raise or succeed) within 1 second.""" |
| 200 | import sys |
| 201 | old_limit = sys.getrecursionlimit() |
| 202 | sys.setrecursionlimit(max(old_limit, 5000)) |
| 203 | try: |
| 204 | payload = _pack(_nested_map(500)) |
| 205 | start = time.monotonic() |
| 206 | try: |
| 207 | safe_unpackb(payload, max_bytes=len(payload) + 10_000) |
| 208 | except Exception: |
| 209 | pass |
| 210 | elapsed = time.monotonic() - start |
| 211 | assert elapsed < 1.0, f"Nested dict deserialization hung ({elapsed:.2f}s)" |
| 212 | finally: |
| 213 | sys.setrecursionlimit(old_limit) |
| 214 | |
| 215 | def test_500_nested_lists_terminates_quickly(self) -> None: |
| 216 | import sys |
| 217 | old_limit = sys.getrecursionlimit() |
| 218 | sys.setrecursionlimit(max(old_limit, 5000)) |
| 219 | try: |
| 220 | payload = _pack(_nested_list(500)) |
| 221 | start = time.monotonic() |
| 222 | try: |
| 223 | safe_unpackb(payload, max_bytes=len(payload) + 10_000) |
| 224 | except Exception: |
| 225 | pass |
| 226 | elapsed = time.monotonic() - start |
| 227 | assert elapsed < 1.0, f"Nested list deserialization hung ({elapsed:.2f}s)" |
| 228 | finally: |
| 229 | sys.setrecursionlimit(old_limit) |
| 230 | |
| 231 | |
| 232 | # --------------------------------------------------------------------------- |
| 233 | # 2. read_msgpack_file — unit tests |
| 234 | # --------------------------------------------------------------------------- |
| 235 | |
| 236 | class TestReadMsgpackFile: |
| 237 | """read_msgpack_file enforces the size cap via stat before read_bytes.""" |
| 238 | |
| 239 | def test_normal_file_roundtrips(self, tmp_path: pathlib.Path) -> None: |
| 240 | f = tmp_path / "ok.msgpack" |
| 241 | f.write_bytes(_pack({"a": 1})) |
| 242 | result = read_msgpack_file(f) |
| 243 | assert result == {"a": 1} |
| 244 | |
| 245 | def test_oversized_file_raises_os_error_before_read(self, tmp_path: pathlib.Path) -> None: |
| 246 | """The stat check must fire *before* read_bytes so no OOM occurs.""" |
| 247 | f = tmp_path / "big.msgpack" |
| 248 | # Write minimal valid msgpack but lie to stat via mock |
| 249 | f.write_bytes(_pack("tiny")) |
| 250 | with patch.object(pathlib.Path, "stat") as mock_stat: |
| 251 | mock_stat.return_value = MagicMock(st_size=MAX_MSGPACK_BYTES + 1) |
| 252 | with pytest.raises(OSError, match="safety cap"): |
| 253 | read_msgpack_file(f) |
| 254 | |
| 255 | def test_error_message_includes_filename(self, tmp_path: pathlib.Path) -> None: |
| 256 | f = tmp_path / "corrupt.msgpack" |
| 257 | f.write_bytes(_pack("x")) |
| 258 | with patch.object(pathlib.Path, "stat") as mock_stat: |
| 259 | mock_stat.return_value = MagicMock(st_size=MAX_MSGPACK_BYTES + 1) |
| 260 | with pytest.raises(OSError, match="corrupt.msgpack"): |
| 261 | read_msgpack_file(f) |
| 262 | |
| 263 | def test_custom_max_bytes_respected(self, tmp_path: pathlib.Path) -> None: |
| 264 | f = tmp_path / "small.msgpack" |
| 265 | f.write_bytes(_pack("x")) |
| 266 | size = f.stat().st_size |
| 267 | # One byte under custom limit — OK |
| 268 | result = read_msgpack_file(f, max_bytes=size + 1) |
| 269 | assert result == "x" |
| 270 | # One byte over custom limit — raises |
| 271 | with pytest.raises(OSError): |
| 272 | read_msgpack_file(f, max_bytes=size - 1) |
| 273 | |
| 274 | def test_strict_map_key_false_passed_through(self, tmp_path: pathlib.Path) -> None: |
| 275 | """strict_map_key=False is forwarded to safe_unpackb.""" |
| 276 | f = tmp_path / "int_keys.msgpack" |
| 277 | # hand-craft msgpack with int key 1 -> "v" |
| 278 | f.write_bytes(struct.pack(">BB", 0x81, 0x01) + b"\xa1v") |
| 279 | result = read_msgpack_file(f, strict_map_key=False) |
| 280 | assert isinstance(result, dict) and list(result.values()) == ["v"] |
| 281 | |
| 282 | def test_per_value_limits_apply_after_stat(self, tmp_path: pathlib.Path) -> None: |
| 283 | """Even within the size cap, a huge string is rejected.""" |
| 284 | big_str = "Z" * (1_048_577) |
| 285 | f = tmp_path / "big_str.msgpack" |
| 286 | f.write_bytes(_pack(big_str)) |
| 287 | size = f.stat().st_size |
| 288 | with pytest.raises(Exception): |
| 289 | read_msgpack_file(f, max_bytes=size + 1000) |
| 290 | |
| 291 | |
| 292 | # --------------------------------------------------------------------------- |
| 293 | # 3. MAX_PACK_MSGPACK_BYTES — constant tests |
| 294 | # --------------------------------------------------------------------------- |
| 295 | |
| 296 | class TestMaxPackMsgpackBytes: |
| 297 | def test_exported(self) -> None: |
| 298 | assert MAX_PACK_MSGPACK_BYTES is not None |
| 299 | |
| 300 | def test_is_int(self) -> None: |
| 301 | assert isinstance(MAX_PACK_MSGPACK_BYTES, int) |
| 302 | |
| 303 | def test_larger_than_max_msgpack_bytes(self) -> None: |
| 304 | assert MAX_PACK_MSGPACK_BYTES > MAX_MSGPACK_BYTES |
| 305 | |
| 306 | def test_is_512_mib(self) -> None: |
| 307 | assert MAX_PACK_MSGPACK_BYTES == 512 * 1024 * 1024 |
| 308 | |
| 309 | |
| 310 | # --------------------------------------------------------------------------- |
| 311 | # 4. safe_unpackb fuzzing — 10 000 random inputs, no unhandled exceptions |
| 312 | # --------------------------------------------------------------------------- |
| 313 | |
| 314 | class TestSafeUnpackbFuzz10k: |
| 315 | """Feed 10 000 random byte strings to safe_unpackb. |
| 316 | |
| 317 | All calls must raise a known exception or return a valid MsgpackValue. |
| 318 | No unhandled exceptions (AttributeError, KeyError, etc.) are permitted. |
| 319 | """ |
| 320 | |
| 321 | def test_fuzz_10k_random_bytes(self) -> None: |
| 322 | rng = random.Random(0xDEADBEEF) |
| 323 | allowed_exc = ( |
| 324 | ValueError, # size cap |
| 325 | msgpack.UnpackException, |
| 326 | msgpack.ExtraData, |
| 327 | msgpack.FormatError, |
| 328 | RecursionError, |
| 329 | UnicodeDecodeError, |
| 330 | MemoryError, |
| 331 | ) |
| 332 | for i in range(10_000): |
| 333 | size = rng.randint(0, 64) |
| 334 | payload = bytes(rng.randint(0, 255) for _ in range(size)) |
| 335 | try: |
| 336 | safe_unpackb(payload, max_bytes=256) |
| 337 | except allowed_exc: |
| 338 | pass |
| 339 | except Exception as exc: |
| 340 | pytest.fail( |
| 341 | f"Unexpected exception on iteration {i} " |
| 342 | f"(payload={payload.hex()!r}): {type(exc).__name__}: {exc}" |
| 343 | ) |
| 344 | |
| 345 | def test_fuzz_10k_valid_msgpack(self) -> None: |
| 346 | """All valid msgpack inputs (within limits) must deserialise cleanly.""" |
| 347 | rng = random.Random(0xCAFEBABE) |
| 348 | allowed_exc = ( |
| 349 | ValueError, # size cap or value limit |
| 350 | msgpack.UnpackException, |
| 351 | ) |
| 352 | for i in range(10_000): |
| 353 | # Generate a small valid msgpack object |
| 354 | kind = rng.randint(0, 4) |
| 355 | if kind == 0: |
| 356 | obj: MsgpackValue = rng.randint(-(2**31), 2**31 - 1) |
| 357 | elif kind == 1: |
| 358 | obj = rng.random() |
| 359 | elif kind == 2: |
| 360 | obj = "".join(chr(rng.randint(32, 126)) for _ in range(rng.randint(0, 32))) |
| 361 | elif kind == 3: |
| 362 | obj = None |
| 363 | else: |
| 364 | obj = bool(rng.randint(0, 1)) |
| 365 | payload = _pack(obj) |
| 366 | try: |
| 367 | safe_unpackb(payload, max_bytes=len(payload) + 10) |
| 368 | except allowed_exc: |
| 369 | pass |
| 370 | except Exception as exc: |
| 371 | pytest.fail( |
| 372 | f"Unexpected exception on iteration {i}: " |
| 373 | f"{type(exc).__name__}: {exc}" |
| 374 | ) |
| 375 | |
| 376 | |
| 377 | # --------------------------------------------------------------------------- |
| 378 | # 5. Concurrent deserialization stress |
| 379 | # --------------------------------------------------------------------------- |
| 380 | |
| 381 | class TestSafeUnpackbConcurrent: |
| 382 | """50 threads calling safe_unpackb simultaneously — no data races.""" |
| 383 | |
| 384 | def test_50_threads_concurrent_safe_unpackb(self) -> None: |
| 385 | payload = _pack({"key": "value", "n": 42}) |
| 386 | errors: list[Exception] = [] |
| 387 | |
| 388 | def _worker() -> None: |
| 389 | try: |
| 390 | result = safe_unpackb(payload) |
| 391 | assert result == {"key": "value", "n": 42} |
| 392 | except Exception as exc: |
| 393 | errors.append(exc) |
| 394 | |
| 395 | threads = [threading.Thread(target=_worker) for _ in range(50)] |
| 396 | for t in threads: |
| 397 | t.start() |
| 398 | for t in threads: |
| 399 | t.join(timeout=5) |
| 400 | |
| 401 | assert not errors, f"Concurrent errors: {errors}" |
| 402 | |
| 403 | def test_50_threads_size_bomb_rejected_concurrently(self) -> None: |
| 404 | payload = _pack("tiny") |
| 405 | errors: list[str] = [] |
| 406 | |
| 407 | def _worker() -> None: |
| 408 | try: |
| 409 | safe_unpackb(payload, max_bytes=0) |
| 410 | errors.append("Expected ValueError, got success") |
| 411 | except ValueError: |
| 412 | pass # correct |
| 413 | except Exception as exc: |
| 414 | errors.append(f"Wrong exception: {type(exc).__name__}: {exc}") |
| 415 | |
| 416 | threads = [threading.Thread(target=_worker) for _ in range(50)] |
| 417 | for t in threads: |
| 418 | t.start() |
| 419 | for t in threads: |
| 420 | t.join(timeout=5) |
| 421 | |
| 422 | assert not errors, f"Errors: {errors}" |
| 423 | |
| 424 | |
| 425 | # --------------------------------------------------------------------------- |
| 426 | # 6. Callsite hardening — mpack._load_bundle |
| 427 | # --------------------------------------------------------------------------- |
| 428 | |
| 429 | class TestBundleLoadHardening: |
| 430 | """_load_bundle rejects oversized files before reading into memory.""" |
| 431 | |
| 432 | def test_oversized_bundle_file_exits_cleanly(self, tmp_path: pathlib.Path) -> None: |
| 433 | from muse.cli.commands.bundle import _load_bundle |
| 434 | |
| 435 | bundle_file = tmp_path / "huge.mpack" |
| 436 | bundle_file.write_bytes(_pack({"commits": [], "snapshots": [], "blobs": []})) |
| 437 | with patch.object(pathlib.Path, "stat") as mock_stat: |
| 438 | mock_stat.return_value = MagicMock(st_size=MAX_PACK_MSGPACK_BYTES + 1) |
| 439 | with pytest.raises(SystemExit): |
| 440 | _load_bundle(bundle_file) |
| 441 | |
| 442 | def test_bundle_size_check_fires_before_read(self, tmp_path: pathlib.Path) -> None: |
| 443 | """Stat check must happen *before* read_bytes — never allocate the big buffer.""" |
| 444 | from muse.cli.commands.bundle import _load_bundle |
| 445 | |
| 446 | bundle_file = tmp_path / "fake_huge.mpack" |
| 447 | bundle_file.write_bytes(_pack({})) |
| 448 | |
| 449 | read_bytes_calls: list[int] = [] |
| 450 | original_read_bytes = pathlib.Path.read_bytes |
| 451 | |
| 452 | def tracked_read_bytes(self: pathlib.Path) -> bytes: |
| 453 | read_bytes_calls.append(1) |
| 454 | return original_read_bytes(self) |
| 455 | |
| 456 | with patch.object(pathlib.Path, "stat") as mock_stat, \ |
| 457 | patch.object(pathlib.Path, "read_bytes", tracked_read_bytes): |
| 458 | mock_stat.return_value = MagicMock(st_size=MAX_PACK_MSGPACK_BYTES + 1) |
| 459 | with pytest.raises(SystemExit): |
| 460 | _load_bundle(bundle_file) |
| 461 | |
| 462 | # read_bytes must NOT have been called — stat check fired first |
| 463 | assert not read_bytes_calls, "read_bytes was called despite oversized stat" |
| 464 | |
| 465 | def test_valid_bundle_loads_correctly(self, tmp_path: pathlib.Path) -> None: |
| 466 | from muse.cli.commands.bundle import _load_bundle |
| 467 | |
| 468 | bundle_file = tmp_path / "valid.mpack" |
| 469 | bundle_file.write_bytes(_pack({"commits": [], "snapshots": [], "blobs": []})) |
| 470 | result = _load_bundle(bundle_file) |
| 471 | assert isinstance(result, dict) |
| 472 | |
| 473 | def test_non_dict_bundle_payload_rejected(self, tmp_path: pathlib.Path) -> None: |
| 474 | from muse.cli.commands.bundle import _load_bundle |
| 475 | |
| 476 | bundle_file = tmp_path / "list.mpack" |
| 477 | bundle_file.write_bytes(_pack([1, 2, 3])) |
| 478 | with pytest.raises(SystemExit): |
| 479 | _load_bundle(bundle_file) |
| 480 | |
| 481 | def test_non_dict_commits_entries_silently_dropped(self, tmp_path: pathlib.Path) -> None: |
| 482 | """Non-dict entries in commits list are filtered by _is_commit_dict.""" |
| 483 | from muse.cli.commands.bundle import _load_bundle |
| 484 | |
| 485 | bundle_file = tmp_path / "mixed.mpack" |
| 486 | bundle_file.write_bytes(_pack({ |
| 487 | "commits": ["string_entry", 42, None, {"real_key": "real_value"}], |
| 488 | "snapshots": [], |
| 489 | "blobs": [], |
| 490 | })) |
| 491 | result = _load_bundle(bundle_file) |
| 492 | commits = result.get("commits", []) |
| 493 | # Only the dict entry should survive |
| 494 | assert len(commits) == 1 |
| 495 | assert commits[0] == {"real_key": "real_value"} |
| 496 | |
| 497 | |
| 498 | # --------------------------------------------------------------------------- |
| 499 | # 7. Callsite hardening — unpack_objects (stdin) |
| 500 | # --------------------------------------------------------------------------- |
| 501 | |
| 502 | class TestUnpackObjectsStdinHardening: |
| 503 | """unpack_objects rejects size-bomb payloads from stdin.""" |
| 504 | |
| 505 | def test_size_bomb_stdin_rejected(self, tmp_path: pathlib.Path) -> None: |
| 506 | """A stdin payload exceeding MAX_PACK_MSGPACK_BYTES is rejected.""" |
| 507 | import sys |
| 508 | from io import BytesIO |
| 509 | |
| 510 | import muse.cli.commands.unpack_objects as _mod |
| 511 | |
| 512 | tiny_payload = _pack({"commits": [], "snapshots": [], "blobs": []}) |
| 513 | # Mock stdin.buffer.read to return a payload that exceeds the limit |
| 514 | with patch.object(_mod, "sys") as mock_sys, \ |
| 515 | patch("muse.cli.commands.unpack_objects.require_repo", |
| 516 | return_value=tmp_path): |
| 517 | # Build a .muse dir so require_repo doesn't fail |
| 518 | muse_dir(tmp_path).mkdir(exist_ok=True) |
| 519 | mock_sys.stdin = MagicMock() |
| 520 | mock_sys.stdin.buffer = MagicMock() |
| 521 | |
| 522 | # Simulate payload larger than MAX_PACK_MSGPACK_BYTES |
| 523 | oversized = b"X" * (MAX_PACK_MSGPACK_BYTES + 1) |
| 524 | mock_sys.stdin.buffer.read.return_value = oversized |
| 525 | mock_sys.stderr = sys.stderr |
| 526 | mock_sys.stdout = sys.stdout |
| 527 | |
| 528 | with pytest.raises(SystemExit): |
| 529 | args = MagicMock() |
| 530 | args.fmt = "json" |
| 531 | _mod.run(args) |
| 532 | |
| 533 | def test_invalid_msgpack_stdin_rejected(self, tmp_path: pathlib.Path) -> None: |
| 534 | """Garbage stdin bytes produce a clean error exit, not a traceback.""" |
| 535 | import sys |
| 536 | import muse.cli.commands.unpack_objects as _mod |
| 537 | |
| 538 | with patch.object(_mod, "sys") as mock_sys, \ |
| 539 | patch("muse.cli.commands.unpack_objects.require_repo", |
| 540 | return_value=tmp_path): |
| 541 | muse_dir(tmp_path).mkdir(exist_ok=True) |
| 542 | mock_sys.stdin = MagicMock() |
| 543 | mock_sys.stdin.buffer = MagicMock() |
| 544 | mock_sys.stdin.buffer.read.return_value = b"\xff\xfe\xfd garbage" |
| 545 | mock_sys.stderr = sys.stderr |
| 546 | mock_sys.stdout = sys.stdout |
| 547 | |
| 548 | with pytest.raises(SystemExit) as exc_info: |
| 549 | args = MagicMock() |
| 550 | args.fmt = "json" |
| 551 | _mod.run(args) |
| 552 | |
| 553 | assert exc_info.value.code != 0 |
| 554 | |
| 555 | |
| 556 | # --------------------------------------------------------------------------- |
| 557 | # 8. Callsite hardening — verify_pack (stdin / file) |
| 558 | # --------------------------------------------------------------------------- |
| 559 | |
| 560 | class TestVerifyPackHardening: |
| 561 | """verify_pack rejects oversized and malformed msgpack.""" |
| 562 | |
| 563 | def test_size_bomb_raises_system_exit(self) -> None: |
| 564 | """A stdin payload exceeding MAX_PACK_MSGPACK_BYTES is rejected.""" |
| 565 | import sys |
| 566 | import muse.cli.commands.verify_pack as _mod |
| 567 | |
| 568 | oversized = b"X" * (MAX_PACK_MSGPACK_BYTES + 1) |
| 569 | |
| 570 | with patch.object(_mod, "sys") as mock_sys: |
| 571 | mock_sys.stdin = MagicMock() |
| 572 | mock_sys.stdin.buffer = MagicMock() |
| 573 | mock_sys.stdin.buffer.read.return_value = oversized |
| 574 | mock_sys.stderr = sys.stderr |
| 575 | mock_sys.stdout = sys.stdout |
| 576 | |
| 577 | with pytest.raises(SystemExit): |
| 578 | args = MagicMock() |
| 579 | args.format = "json" |
| 580 | args.file = None |
| 581 | _mod.run(args) |
| 582 | |
| 583 | |
| 584 | # --------------------------------------------------------------------------- |
| 585 | # 9. Callsite hardening — symbol_cache.SymbolCache.load |
| 586 | # --------------------------------------------------------------------------- |
| 587 | |
| 588 | class TestSymbolCacheHardening: |
| 589 | """SymbolCache.load returns empty cache for oversized or corrupt files.""" |
| 590 | |
| 591 | def test_oversized_cache_returns_empty(self, tmp_path: pathlib.Path) -> None: |
| 592 | from muse.core.symbol_cache import SymbolCache |
| 593 | from muse.core.paths import symbol_cache_path |
| 594 | |
| 595 | dot_muse = muse_dir(tmp_path) |
| 596 | (dot_muse / "cache").mkdir(parents=True) |
| 597 | symbol_cache_path(tmp_path).write_bytes(_pack({"version": 1, "entries": {}})) |
| 598 | |
| 599 | with patch.object(pathlib.Path, "stat") as mock_stat: |
| 600 | mock_stat.return_value = MagicMock(st_size=MAX_MSGPACK_BYTES + 1) |
| 601 | result = SymbolCache.load(dot_muse) |
| 602 | |
| 603 | assert result.size == 0 |
| 604 | |
| 605 | def test_corrupt_cache_returns_empty(self, tmp_path: pathlib.Path) -> None: |
| 606 | from muse.core.symbol_cache import SymbolCache |
| 607 | from muse.core.paths import symbol_cache_path |
| 608 | |
| 609 | dot_muse = muse_dir(tmp_path) |
| 610 | (dot_muse / "cache").mkdir(parents=True) |
| 611 | symbol_cache_path(tmp_path).write_bytes(b"\xff\xfe garbage") |
| 612 | |
| 613 | result = SymbolCache.load(dot_muse) |
| 614 | assert result.size == 0 |
| 615 | |
| 616 | def test_valid_cache_still_loads(self, tmp_path: pathlib.Path) -> None: |
| 617 | from muse.core.symbol_cache import SymbolCache |
| 618 | from muse.core.paths import symbol_cache_path |
| 619 | |
| 620 | dot_muse = muse_dir(tmp_path) |
| 621 | (dot_muse / "cache").mkdir(parents=True) |
| 622 | symbol_cache_path(tmp_path).write_bytes(_pack({"version": 1, "entries": {}})) |
| 623 | |
| 624 | result = SymbolCache.load(dot_muse) |
| 625 | assert result.size == 0 |
| 626 | |
| 627 | |
| 628 | # --------------------------------------------------------------------------- |
| 629 | # 10. Callsite hardening — test_history.load_history |
| 630 | # --------------------------------------------------------------------------- |
| 631 | |
| 632 | class TestTestHistoryHardening: |
| 633 | """load_history returns [] for oversized or corrupt history files.""" |
| 634 | |
| 635 | def test_oversized_history_returns_empty_list(self, tmp_path: pathlib.Path) -> None: |
| 636 | from muse.core.test_history import load_history |
| 637 | from muse.core.paths import test_history_path |
| 638 | |
| 639 | history_file = test_history_path(tmp_path) |
| 640 | history_file.parent.mkdir(parents=True, exist_ok=True) |
| 641 | history_file.write_bytes(_pack({"version": 1, "runs": []})) |
| 642 | |
| 643 | with patch.object(pathlib.Path, "stat") as mock_stat: |
| 644 | mock_stat.return_value = MagicMock(st_size=MAX_MSGPACK_BYTES + 1) |
| 645 | result = load_history(tmp_path) |
| 646 | |
| 647 | assert result == [] |
| 648 | |
| 649 | def test_corrupt_history_returns_empty_list(self, tmp_path: pathlib.Path) -> None: |
| 650 | from muse.core.test_history import load_history |
| 651 | from muse.core.paths import test_history_path |
| 652 | |
| 653 | history_file = test_history_path(tmp_path) |
| 654 | history_file.parent.mkdir(parents=True, exist_ok=True) |
| 655 | history_file.write_bytes(b"\xde\xad\xbe\xef garbage") |
| 656 | |
| 657 | result = load_history(tmp_path) |
| 658 | assert result == [] |
| 659 | |
| 660 | |
| 661 | # --------------------------------------------------------------------------- |
| 662 | # 11. Callsite hardening — transport._decode |
| 663 | # --------------------------------------------------------------------------- |
| 664 | |
| 665 | class TestTransportDecodeHardening: |
| 666 | """_decode (static method on HttpTransport) enforces per-value limits.""" |
| 667 | |
| 668 | def test_empty_response_returns_empty_dict(self) -> None: |
| 669 | from muse.core.transport import HttpTransport |
| 670 | result = HttpTransport._decode(b"") |
| 671 | assert result == {} |
| 672 | |
| 673 | def test_valid_msgpack_decodes(self) -> None: |
| 674 | from muse.core.transport import HttpTransport |
| 675 | payload = _pack({"status": "ok", "count": 42}) |
| 676 | result = HttpTransport._decode(payload) |
| 677 | assert result == {"status": "ok", "count": 42} |
| 678 | |
| 679 | def test_string_over_limit_raises_transport_error(self) -> None: |
| 680 | from muse.core.transport import HttpTransport, TransportError |
| 681 | big_str = "B" * (1_048_577) |
| 682 | payload = _pack({"msg": big_str}) |
| 683 | with pytest.raises(TransportError, match="invalid msgpack"): |
| 684 | HttpTransport._decode(payload) |
| 685 | |
| 686 | def test_non_dict_top_level_returns_empty_dict(self) -> None: |
| 687 | from muse.core.transport import HttpTransport |
| 688 | payload = _pack([1, 2, 3]) |
| 689 | result = HttpTransport._decode(payload) |
| 690 | assert result == {} |
| 691 | |
| 692 | def test_invalid_msgpack_raises_transport_error(self) -> None: |
| 693 | from muse.core.transport import HttpTransport, TransportError |
| 694 | with pytest.raises(TransportError, match="invalid msgpack"): |
| 695 | HttpTransport._decode(b"\xff\xfe garbage") |
| 696 | |
| 697 | |
| 698 | # --------------------------------------------------------------------------- |
| 699 | # 12. Callsite hardening — stage.read_stage |
| 700 | # --------------------------------------------------------------------------- |
| 701 | |
| 702 | class TestReadStageHardening: |
| 703 | """read_stage returns {} for oversized or corrupt staging index files.""" |
| 704 | |
| 705 | def test_oversized_stage_returns_empty(self, tmp_path: pathlib.Path) -> None: |
| 706 | from muse.plugins.code.stage import read_stage, stage_path |
| 707 | |
| 708 | root = tmp_path |
| 709 | sp = stage_path(root) |
| 710 | sp.parent.mkdir(parents=True, exist_ok=True) |
| 711 | sp.write_bytes(_pack({"version": 2, "entries": {}})) |
| 712 | |
| 713 | with patch.object(pathlib.Path, "stat") as mock_stat: |
| 714 | mock_stat.return_value = MagicMock(st_size=MAX_MSGPACK_BYTES + 1) |
| 715 | result = read_stage(root) |
| 716 | |
| 717 | assert result == {} |
| 718 | |
| 719 | def test_corrupt_stage_returns_empty(self, tmp_path: pathlib.Path) -> None: |
| 720 | from muse.plugins.code.stage import read_stage, stage_path |
| 721 | |
| 722 | root = tmp_path |
| 723 | sp = stage_path(root) |
| 724 | sp.parent.mkdir(parents=True, exist_ok=True) |
| 725 | sp.write_bytes(b"\xfe\xed garbage") |
| 726 | |
| 727 | result = read_stage(root) |
| 728 | assert result == {} |
| 729 | |
| 730 | def test_non_dict_stage_returns_empty(self, tmp_path: pathlib.Path) -> None: |
| 731 | from muse.plugins.code.stage import read_stage, stage_path |
| 732 | |
| 733 | root = tmp_path |
| 734 | sp = stage_path(root) |
| 735 | sp.parent.mkdir(parents=True, exist_ok=True) |
| 736 | sp.write_bytes(_pack([1, 2, 3])) # list, not dict |
| 737 | |
| 738 | result = read_stage(root) |
| 739 | assert result == {} |
| 740 | |
| 741 | |
| 742 | # --------------------------------------------------------------------------- |
| 743 | # 13. TypeGuard narrowing — mpack and unpack_objects |
| 744 | # --------------------------------------------------------------------------- |
| 745 | |
| 746 | class TestTypeGuardNarrowing: |
| 747 | """Non-dict entries in wire bundles are filtered, not propagated.""" |
| 748 | |
| 749 | def test_is_commit_dict_rejects_non_dicts(self) -> None: |
| 750 | from muse.cli.commands.bundle import _is_commit_dict |
| 751 | |
| 752 | assert _is_commit_dict({}) is True |
| 753 | assert _is_commit_dict({"commit_id": "abc"}) is True |
| 754 | assert _is_commit_dict("string") is False |
| 755 | assert _is_commit_dict(42) is False |
| 756 | assert _is_commit_dict(None) is False |
| 757 | assert _is_commit_dict([]) is False |
| 758 | |
| 759 | def test_is_snapshot_dict_rejects_non_dicts(self) -> None: |
| 760 | from muse.cli.commands.bundle import _is_snapshot_dict |
| 761 | |
| 762 | assert _is_snapshot_dict({}) is True |
| 763 | assert _is_snapshot_dict({"snapshot_id": "abc"}) is True |
| 764 | assert _is_snapshot_dict("string") is False |
| 765 | assert _is_snapshot_dict(42) is False |
| 766 | assert _is_snapshot_dict(None) is False |
| 767 | |
| 768 | def test_unpack_objects_is_commit_dict(self) -> None: |
| 769 | from muse.cli.commands.unpack_objects import _is_commit_dict |
| 770 | |
| 771 | assert _is_commit_dict({"commit_id": "abc"}) is True |
| 772 | assert _is_commit_dict("not a dict") is False |
| 773 | |
| 774 | def test_unpack_objects_is_snapshot_dict(self) -> None: |
| 775 | from muse.cli.commands.unpack_objects import _is_snapshot_dict |
| 776 | |
| 777 | assert _is_snapshot_dict({"snapshot_id": "abc"}) is True |
| 778 | assert _is_snapshot_dict(99) is False |
| 779 | |
| 780 | def test_as_branch_heads_filters_non_str_values(self) -> None: |
| 781 | from muse.cli.commands.unpack_objects import _as_branch_heads |
| 782 | |
| 783 | result = _as_branch_heads({"main": "abc123", "bad": 42, "ok": "def456"}) |
| 784 | assert result == {"main": "abc123", "ok": "def456"} |
| 785 | |
| 786 | def test_as_branch_heads_non_dict_input(self) -> None: |
| 787 | from muse.cli.commands.unpack_objects import _as_branch_heads |
| 788 | |
| 789 | assert _as_branch_heads(None) == {} |
| 790 | assert _as_branch_heads("string") == {} |
| 791 | assert _as_branch_heads([]) == {} |
File History
5 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2
fix: remove commit_exists filter from have anchors — server…
Sonnet 4.6
patch
20 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e
fix: rename objects→blobs in push client and all stale test…
Sonnet 4.6
patch
22 days ago
sha256:0313c134f0ef4518a9c3a0ec359ffdc42546dc720010730374edfe0857caf7ef
rename: delta_add → delta_upsert across wire format, source…
Sonnet 4.6
minor
⚠
22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a
fix: repair four test failures from post-migration audit
Sonnet 4.6
patch
28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf
fix: unified object store migration — idempotent writes, JS…
Sonnet 4.6
minor
⚠
29 days ago