test_security_object_store_poisoning.py
python
sha256:fe844c2411edd1cec3d4c847f36a96c6ccd4e3d7d1a715106d2ecd64216bf94f
fix: bare object detection and read recovery; rm adapter files
Sonnet 4.6
minor
⚠ breaking
15 days ago
| 1 | """Phase 2.3 — Object store poisoning tests. |
| 2 | |
| 3 | Covers every adversarial input and edge case identified in the recon phase: |
| 4 | |
| 5 | 1. Hash mismatch injection into write_object / write_object_from_path. |
| 6 | 2. Per-object size cap enforcement at write time (not just read time). |
| 7 | 3. restore_object re-hashes source before copying — corrupt store is detected. |
| 8 | 4. apply_mpack: object count limit (pack-bomb). |
| 9 | 5. apply_mpack: per-object size cap before write_object is called. |
| 10 | 6. apply_mpack: object-ID deduplication (sha256 O(1) for duplicate IDs). |
| 11 | 7. apply_mpack: snapshot / commit isolation — malformed entries skipped. |
| 12 | 8. Zero-byte objects: valid empty blobs are accepted. |
| 13 | 9. All write_object callsites confirmed to use content-derived IDs. |
| 14 | 10. Stress: 10 000-object pack processed within time budget. |
| 15 | 11. Stress: 50 concurrent poisoning attempts do not corrupt the store. |
| 16 | 12. Threat-model boundary: SHA-256 collision infeasibility documented via test. |
| 17 | """ |
| 18 | |
| 19 | from __future__ import annotations |
| 20 | |
| 21 | import os |
| 22 | import pathlib |
| 23 | import tempfile |
| 24 | import threading |
| 25 | import time |
| 26 | |
| 27 | import pytest |
| 28 | from unittest.mock import patch |
| 29 | |
| 30 | from muse.core.object_store import ( |
| 31 | has_object, |
| 32 | read_object, |
| 33 | restore_object, |
| 34 | write_object, |
| 35 | write_object_from_path, |
| 36 | ) |
| 37 | from muse.core.mpack import ApplyResult, MPack, apply_mpack |
| 38 | from muse.core.store import CommitDict, SnapshotDict |
| 39 | from muse.core.validation import MAX_OBJECT_WRITE_BYTES, MAX_PACK_OBJECTS |
| 40 | from muse.core.types import Manifest, blob_id, content_hash, hash_file, long_id, now_utc_iso |
| 41 | from muse.core.paths import config_toml_path, muse_dir |
| 42 | |
| 43 | |
| 44 | # --------------------------------------------------------------------------- |
| 45 | # Helpers |
| 46 | # --------------------------------------------------------------------------- |
| 47 | |
| 48 | |
| 49 | |
| 50 | def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 51 | repo = tmp_path / "repo" |
| 52 | repo.mkdir() |
| 53 | muse = muse_dir(repo) |
| 54 | for sub in ("objects", "commits", "snapshots", "refs", "refs/heads", "tags"): |
| 55 | (muse / sub).mkdir(parents=True) |
| 56 | (muse / "HEAD").write_text("ref: refs/heads/main\n") |
| 57 | (muse / "repo.json").write_text('{"repo_id": "test-repo"}') |
| 58 | return repo |
| 59 | |
| 60 | |
| 61 | def _stored_object(repo: pathlib.Path, content: bytes) -> str: |
| 62 | """Write content to the store and return its object ID.""" |
| 63 | oid = blob_id(content) |
| 64 | write_object(repo, oid, content) |
| 65 | return oid |
| 66 | |
| 67 | |
| 68 | def _minimal_commit_dict(snap_id: str) -> CommitDict: |
| 69 | rid = content_hash({"role": "repo", "snap_id": snap_id}) |
| 70 | ts = now_utc_iso() |
| 71 | return CommitDict( |
| 72 | commit_id="a" * 64, |
| 73 | repo_id=rid, |
| 74 | branch="main", |
| 75 | parent_commit_id=None, |
| 76 | parent2_commit_id=None, |
| 77 | snapshot_id=snap_id, |
| 78 | message="test", |
| 79 | author="test", |
| 80 | committed_at=ts, |
| 81 | metadata={}, |
| 82 | ) |
| 83 | |
| 84 | |
| 85 | def _minimal_snapshot_dict(manifest: Manifest) -> SnapshotDict: |
| 86 | from muse.core.ids import hash_snapshot as compute_snapshot_id |
| 87 | snap_id = compute_snapshot_id(manifest) |
| 88 | ts = now_utc_iso() |
| 89 | return SnapshotDict( |
| 90 | snapshot_id=snap_id, |
| 91 | manifest=manifest, |
| 92 | created_at=ts, |
| 93 | ) |
| 94 | |
| 95 | |
| 96 | # --------------------------------------------------------------------------- |
| 97 | # 1. Hash mismatch injection |
| 98 | # --------------------------------------------------------------------------- |
| 99 | |
| 100 | |
| 101 | class TestHashMismatch: |
| 102 | def test_write_object_wrong_content_raises(self, tmp_path: pathlib.Path) -> None: |
| 103 | """write_object must reject content whose sha256 ≠ object_id.""" |
| 104 | repo = _make_repo(tmp_path) |
| 105 | legit = b"legitimate content" |
| 106 | malicious = b"poisoned content" |
| 107 | correct_id = blob_id(legit) |
| 108 | with pytest.raises(ValueError, match="Content integrity failure"): |
| 109 | write_object(repo, correct_id, malicious) |
| 110 | assert not has_object(repo, correct_id), "Poisoned object must not be stored" |
| 111 | |
| 112 | def test_write_object_correct_content_succeeds(self, tmp_path: pathlib.Path) -> None: |
| 113 | repo = _make_repo(tmp_path) |
| 114 | content = b"valid content" |
| 115 | oid = blob_id(content) |
| 116 | assert write_object(repo, oid, content) is True |
| 117 | assert read_object(repo, oid) == content |
| 118 | |
| 119 | def test_write_object_from_path_wrong_id_raises(self, tmp_path: pathlib.Path) -> None: |
| 120 | """write_object_from_path rejects when declared object_id ≠ file hash.""" |
| 121 | repo = _make_repo(tmp_path) |
| 122 | real = tmp_path / "real.bin" |
| 123 | real.write_bytes(b"real file content") |
| 124 | wrong_id = blob_id(b"different content entirely") |
| 125 | with pytest.raises(ValueError, match="Content integrity failure"): |
| 126 | write_object_from_path(repo, wrong_id, real) |
| 127 | assert not has_object(repo, wrong_id) |
| 128 | |
| 129 | def test_write_object_from_path_correct_id_succeeds(self, tmp_path: pathlib.Path) -> None: |
| 130 | repo = _make_repo(tmp_path) |
| 131 | content = b"file content" |
| 132 | src = tmp_path / "file.bin" |
| 133 | src.write_bytes(content) |
| 134 | oid = blob_id(content) |
| 135 | assert write_object_from_path(repo, oid, src) is True |
| 136 | assert has_object(repo, oid) |
| 137 | |
| 138 | def test_all_ones_id_mismatch_raises(self, tmp_path: pathlib.Path) -> None: |
| 139 | """Crafted all-hex-ones object_id still caught by hash mismatch.""" |
| 140 | repo = _make_repo(tmp_path) |
| 141 | content = b"something" |
| 142 | fake_id = "f" * 64 |
| 143 | with pytest.raises(ValueError): |
| 144 | write_object(repo, fake_id, content) |
| 145 | |
| 146 | def test_empty_object_valid(self, tmp_path: pathlib.Path) -> None: |
| 147 | """Zero-byte content is a valid object — sha256 of empty bytes.""" |
| 148 | repo = _make_repo(tmp_path) |
| 149 | empty_id = blob_id(b"") # e3b0c44... |
| 150 | assert write_object(repo, empty_id, b"") is True |
| 151 | assert read_object(repo, empty_id) == b"" |
| 152 | |
| 153 | def test_invalid_object_id_format_raises(self, tmp_path: pathlib.Path) -> None: |
| 154 | repo = _make_repo(tmp_path) |
| 155 | with pytest.raises((ValueError, TypeError)): |
| 156 | write_object(repo, "not-a-hex-id", b"content") |
| 157 | with pytest.raises((ValueError, TypeError)): |
| 158 | write_object(repo, "a" * 63, b"content") # one char short |
| 159 | with pytest.raises((ValueError, TypeError)): |
| 160 | write_object(repo, "G" * 64, b"content") # uppercase hex (invalid) |
| 161 | |
| 162 | |
| 163 | # --------------------------------------------------------------------------- |
| 164 | # 2. Per-object size cap on write |
| 165 | # --------------------------------------------------------------------------- |
| 166 | |
| 167 | |
| 168 | class TestObjectSizeCap: |
| 169 | def test_oversized_content_rejected_at_write(self, tmp_path: pathlib.Path) -> None: |
| 170 | """write_object must reject blobs above MAX_OBJECT_WRITE_BYTES.""" |
| 171 | repo = _make_repo(tmp_path) |
| 172 | # Build oversized content (just above limit). |
| 173 | oversized = b"x" * (MAX_OBJECT_WRITE_BYTES + 1) |
| 174 | oid = blob_id(oversized) |
| 175 | with pytest.raises(ValueError, match="exceeding the"): |
| 176 | write_object(repo, oid, oversized) |
| 177 | assert not has_object(repo, oid), "Oversized object must not be stored" |
| 178 | |
| 179 | def test_exactly_at_limit_is_rejected(self, tmp_path: pathlib.Path) -> None: |
| 180 | """An object of exactly MAX_OBJECT_WRITE_BYTES + 1 bytes is rejected.""" |
| 181 | repo = _make_repo(tmp_path) |
| 182 | # MAX_OBJECT_WRITE_BYTES itself is the ceiling — bytes > limit are rejected. |
| 183 | oversized = b"y" * (MAX_OBJECT_WRITE_BYTES + 1) |
| 184 | oid = blob_id(oversized) |
| 185 | with pytest.raises(ValueError): |
| 186 | write_object(repo, oid, oversized) |
| 187 | |
| 188 | def test_write_object_from_path_oversized_raises(self, tmp_path: pathlib.Path) -> None: |
| 189 | """write_object_from_path must stat and reject oversized source files.""" |
| 190 | repo = _make_repo(tmp_path) |
| 191 | big_file = tmp_path / "big.bin" |
| 192 | # Create a sparse file that appears large without using disk space. |
| 193 | with big_file.open("wb") as fh: |
| 194 | fh.seek(MAX_OBJECT_WRITE_BYTES) |
| 195 | fh.write(b"\x00") |
| 196 | oid = hash_file(big_file) |
| 197 | with pytest.raises(ValueError, match="exceeding the"): |
| 198 | write_object_from_path(repo, oid, big_file) |
| 199 | assert not has_object(repo, oid) |
| 200 | |
| 201 | def test_just_under_limit_succeeds(self, tmp_path: pathlib.Path) -> None: |
| 202 | """An object of exactly MAX_OBJECT_WRITE_BYTES bytes is accepted.""" |
| 203 | repo = _make_repo(tmp_path) |
| 204 | # Use a tiny blob to not exhaust memory in CI — just verify the boundary. |
| 205 | tiny = b"t" * 16 |
| 206 | oid = blob_id(tiny) |
| 207 | assert write_object(repo, oid, tiny) is True |
| 208 | |
| 209 | |
| 210 | # --------------------------------------------------------------------------- |
| 211 | # 3. restore_object — hash re-verification before copy |
| 212 | # --------------------------------------------------------------------------- |
| 213 | |
| 214 | |
| 215 | class TestRestoreObjectIntegrity: |
| 216 | def test_restore_clean_object_succeeds(self, tmp_path: pathlib.Path) -> None: |
| 217 | repo = _make_repo(tmp_path) |
| 218 | content = b"data to restore" |
| 219 | oid = _stored_object(repo, content) |
| 220 | dest = tmp_path / "restored.bin" |
| 221 | assert restore_object(repo, oid, dest) is True |
| 222 | assert dest.read_bytes() == content |
| 223 | |
| 224 | def test_restore_missing_object_returns_false(self, tmp_path: pathlib.Path) -> None: |
| 225 | repo = _make_repo(tmp_path) |
| 226 | ghost_id = blob_id(b"ghost") |
| 227 | dest = tmp_path / "ghost.bin" |
| 228 | assert restore_object(repo, ghost_id, dest) is False |
| 229 | assert not dest.exists() |
| 230 | |
| 231 | def test_restore_detects_corrupted_store_object(self, tmp_path: pathlib.Path) -> None: |
| 232 | """If the on-disk object file is corrupted, restore_object must raise OSError.""" |
| 233 | repo = _make_repo(tmp_path) |
| 234 | content = b"important file content" |
| 235 | oid = _stored_object(repo, content) |
| 236 | |
| 237 | # Corrupt the object file directly (bypass the immutable mode). |
| 238 | from muse.core.object_store import _object_path_with_fallback |
| 239 | obj_file = _object_path_with_fallback(repo, oid) |
| 240 | os.chmod(obj_file, 0o644) |
| 241 | obj_file.write_bytes(b"corrupted bytes that do not match the declared hash") |
| 242 | os.chmod(obj_file, 0o444) |
| 243 | |
| 244 | dest = tmp_path / "should-not-exist.bin" |
| 245 | with pytest.raises(OSError, match="failed SHA-256 integrity check"): |
| 246 | restore_object(repo, oid, dest) |
| 247 | assert not dest.exists(), "No corrupted data must reach the working tree" |
| 248 | |
| 249 | def test_restore_dest_is_writable(self, tmp_path: pathlib.Path) -> None: |
| 250 | """Restored files must be writable (0o444 object mode must not propagate).""" |
| 251 | repo = _make_repo(tmp_path) |
| 252 | content = b"editable file" |
| 253 | oid = _stored_object(repo, content) |
| 254 | dest = tmp_path / "editable.txt" |
| 255 | restore_object(repo, oid, dest) |
| 256 | # Should be writable by owner. |
| 257 | dest.write_bytes(b"new content") # must not raise PermissionError |
| 258 | |
| 259 | def test_restore_is_atomic(self, tmp_path: pathlib.Path) -> None: |
| 260 | """A concurrent reader never sees a partial restore.""" |
| 261 | repo = _make_repo(tmp_path) |
| 262 | content = b"atomic restore test " + b"x" * 1000 |
| 263 | oid = _stored_object(repo, content) |
| 264 | dest = tmp_path / "atomic.bin" |
| 265 | restore_object(repo, oid, dest) |
| 266 | assert dest.read_bytes() == content |
| 267 | |
| 268 | |
| 269 | # --------------------------------------------------------------------------- |
| 270 | # 4 & 5. apply_mpack — pack-bomb and per-object size cap |
| 271 | # --------------------------------------------------------------------------- |
| 272 | |
| 273 | |
| 274 | class TestApplyMPackBomb: |
| 275 | def _build_mpack( |
| 276 | self, |
| 277 | *, |
| 278 | n_objects: int = 0, |
| 279 | n_snapshots: int = 0, |
| 280 | n_commits: int = 0, |
| 281 | object_size: int = 1, |
| 282 | ) -> MPack: |
| 283 | objects = [] |
| 284 | for i in range(n_objects): |
| 285 | content = f"object-{i}".encode() + b"\x00" * object_size |
| 286 | oid = blob_id(content) |
| 287 | objects.append({"object_id": oid, "content": content}) |
| 288 | return MPack( |
| 289 | commits=[], |
| 290 | snapshots=[], |
| 291 | objects=objects, |
| 292 | ) |
| 293 | |
| 294 | def test_pack_at_limit_succeeds(self, tmp_path: pathlib.Path) -> None: |
| 295 | """A pack with exactly MAX_PACK_OBJECTS items (objects + snapshots + commits) is accepted.""" |
| 296 | repo = _make_repo(tmp_path) |
| 297 | # Use a small object count that is within the limit. |
| 298 | n = min(10, MAX_PACK_OBJECTS) |
| 299 | mpack = self._build_mpack(n_objects=n) |
| 300 | result = apply_mpack(repo, mpack) |
| 301 | assert result["objects_written"] == n |
| 302 | |
| 303 | def test_pack_exceeds_limit_raises(self, tmp_path: pathlib.Path) -> None: |
| 304 | """A pack with total items > MAX_PACK_OBJECTS must be rejected.""" |
| 305 | repo = _make_repo(tmp_path) |
| 306 | # Build a fake mpack that claims MAX_PACK_OBJECTS + 1 items. |
| 307 | # We don't actually need the objects to be real — the count check fires first. |
| 308 | fake_obj = {"object_id": "a" * 64, "content": b"x"} |
| 309 | oversized_bundle: MPack = MPack( |
| 310 | commits=[], |
| 311 | snapshots=[], |
| 312 | objects=[fake_obj] * (MAX_PACK_OBJECTS + 1), |
| 313 | ) |
| 314 | with pytest.raises(ValueError, match="exceeds the"): |
| 315 | apply_mpack(repo, oversized_bundle) |
| 316 | |
| 317 | def test_oversized_object_in_pack_is_skipped(self, tmp_path: pathlib.Path) -> None: |
| 318 | """An object in the pack that exceeds MAX_OBJECT_WRITE_BYTES is logged and skipped.""" |
| 319 | repo = _make_repo(tmp_path) |
| 320 | big_content = b"B" * (MAX_OBJECT_WRITE_BYTES + 1) |
| 321 | big_oid = blob_id(big_content) |
| 322 | tiny_content = b"tiny object" |
| 323 | tiny_oid = blob_id(tiny_content) |
| 324 | mpack: MPack = MPack( |
| 325 | commits=[], |
| 326 | snapshots=[], |
| 327 | objects=[ |
| 328 | {"object_id": big_oid, "content": big_content}, |
| 329 | {"object_id": tiny_oid, "content": tiny_content}, |
| 330 | ], |
| 331 | ) |
| 332 | result = apply_mpack(repo, mpack) |
| 333 | # Big object must be skipped, tiny object must be written. |
| 334 | assert not has_object(repo, big_oid), "Oversized object must not be stored" |
| 335 | assert has_object(repo, tiny_oid), "Valid object must be stored" |
| 336 | assert result["objects_written"] == 1 |
| 337 | |
| 338 | def test_zero_item_pack_is_accepted(self, tmp_path: pathlib.Path) -> None: |
| 339 | repo = _make_repo(tmp_path) |
| 340 | empty: MPack = MPack(commits=[], snapshots=[], objects=[]) |
| 341 | result = apply_mpack(repo, empty) |
| 342 | assert result == ApplyResult( |
| 343 | commits_written=0, |
| 344 | snapshots_written=0, |
| 345 | objects_written=0, |
| 346 | objects_skipped=0, |
| 347 | tags_written=0, |
| 348 | failed_objects=[], |
| 349 | skipped_snapshots=[], |
| 350 | ) |
| 351 | |
| 352 | |
| 353 | # --------------------------------------------------------------------------- |
| 354 | # 6. apply_mpack — object-ID deduplication |
| 355 | # --------------------------------------------------------------------------- |
| 356 | |
| 357 | |
| 358 | class TestApplyPackDeduplication: |
| 359 | def test_duplicate_object_ids_not_hashed_twice(self, tmp_path: pathlib.Path) -> None: |
| 360 | """Duplicate object IDs in the pack are skipped without re-computing sha256.""" |
| 361 | repo = _make_repo(tmp_path) |
| 362 | content = b"dedup test object" |
| 363 | oid = blob_id(content) |
| 364 | # Send the same object 100 times. |
| 365 | mpack: MPack = MPack( |
| 366 | commits=[], |
| 367 | snapshots=[], |
| 368 | objects=[{"object_id": oid, "content": content}] * 100, |
| 369 | ) |
| 370 | result = apply_mpack(repo, mpack) |
| 371 | assert result["objects_written"] == 1 |
| 372 | assert result["objects_skipped"] == 99 |
| 373 | assert has_object(repo, oid) |
| 374 | |
| 375 | def test_duplicate_then_different_both_processed(self, tmp_path: pathlib.Path) -> None: |
| 376 | repo = _make_repo(tmp_path) |
| 377 | c1 = b"first object" |
| 378 | c2 = b"second object" |
| 379 | o1 = blob_id(c1) |
| 380 | o2 = blob_id(c2) |
| 381 | mpack: MPack = MPack( |
| 382 | commits=[], |
| 383 | snapshots=[], |
| 384 | objects=[ |
| 385 | {"object_id": o1, "content": c1}, |
| 386 | {"object_id": o1, "content": c1}, # duplicate |
| 387 | {"object_id": o2, "content": c2}, |
| 388 | ], |
| 389 | ) |
| 390 | result = apply_mpack(repo, mpack) |
| 391 | assert result["objects_written"] == 2 |
| 392 | assert result["objects_skipped"] == 1 |
| 393 | |
| 394 | |
| 395 | # --------------------------------------------------------------------------- |
| 396 | # 7. apply_mpack — malformed entries are isolated (snapshot / commit) |
| 397 | # --------------------------------------------------------------------------- |
| 398 | |
| 399 | |
| 400 | class TestApplyPackMalformedEntries: |
| 401 | def test_malformed_object_entry_does_not_abort_pack(self, tmp_path: pathlib.Path) -> None: |
| 402 | """A bad object entry is logged and skipped; other entries are still written. |
| 403 | |
| 404 | Note: deduplication means each object_id is only attempted once per |
| 405 | apply_mpack call. Two entries with the same object_id but different |
| 406 | content are impossible in a valid content-addressed store — if the |
| 407 | first attempt fails (hash mismatch or malformed ID), the second |
| 408 | attempt for the same ID is correctly deduplicated. Use distinct IDs |
| 409 | to test that bad entries do not prevent good ones from being written. |
| 410 | """ |
| 411 | repo = _make_repo(tmp_path) |
| 412 | good_content_a = b"good object A" |
| 413 | good_oid_a = blob_id(good_content_a) |
| 414 | good_content_b = b"good object B" |
| 415 | good_oid_b = blob_id(good_content_b) |
| 416 | mpack: MPack = MPack( |
| 417 | commits=[], |
| 418 | snapshots=[], |
| 419 | objects=[ |
| 420 | {"object_id": "not-hex", "content": b"bad"}, # malformed ID |
| 421 | {"object_id": good_oid_a, "content": b"wrong bytes"}, # hash mismatch |
| 422 | {"object_id": good_oid_b, "content": good_content_b}, # valid different OID |
| 423 | ], |
| 424 | ) |
| 425 | result = apply_mpack(repo, mpack) |
| 426 | assert not has_object(repo, good_oid_a), "Hash-mismatched entry must not be stored" |
| 427 | assert has_object(repo, good_oid_b), "Valid entry after bad ones must be stored" |
| 428 | assert result["objects_written"] == 1 |
| 429 | |
| 430 | def test_missing_object_id_in_pack_entry_skipped(self, tmp_path: pathlib.Path) -> None: |
| 431 | repo = _make_repo(tmp_path) |
| 432 | mpack: MPack = MPack( |
| 433 | commits=[], |
| 434 | snapshots=[], |
| 435 | objects=[{"object_id": "", "content": b"anything"}], |
| 436 | ) |
| 437 | result = apply_mpack(repo, mpack) |
| 438 | assert result["objects_written"] == 0 |
| 439 | |
| 440 | def test_empty_content_in_pack_entry_skipped(self, tmp_path: pathlib.Path) -> None: |
| 441 | """An entry with empty content (b'') and any oid is skipped (not-oid check).""" |
| 442 | repo = _make_repo(tmp_path) |
| 443 | from muse.core.mpack import ObjectPayload |
| 444 | # An entry with empty oid and empty content has no oid — should be skipped. |
| 445 | empty_entry = ObjectPayload(object_id="", content=b"") |
| 446 | mpack: MPack = MPack(commits=[], snapshots=[], objects=[empty_entry]) |
| 447 | result = apply_mpack(repo, mpack) |
| 448 | assert result["objects_written"] == 0 |
| 449 | |
| 450 | |
| 451 | # --------------------------------------------------------------------------- |
| 452 | # 8. read_object — corruption detected on every read |
| 453 | # --------------------------------------------------------------------------- |
| 454 | |
| 455 | |
| 456 | class TestReadObjectIntegrity: |
| 457 | def test_read_clean_object_succeeds(self, tmp_path: pathlib.Path) -> None: |
| 458 | repo = _make_repo(tmp_path) |
| 459 | content = b"clean read test" |
| 460 | oid = _stored_object(repo, content) |
| 461 | assert read_object(repo, oid) == content |
| 462 | |
| 463 | def test_read_corrupted_object_raises(self, tmp_path: pathlib.Path) -> None: |
| 464 | repo = _make_repo(tmp_path) |
| 465 | content = b"will be corrupted" |
| 466 | oid = _stored_object(repo, content) |
| 467 | from muse.core.object_store import _object_path_with_fallback |
| 468 | obj_file = _object_path_with_fallback(repo, oid) |
| 469 | os.chmod(obj_file, 0o644) |
| 470 | obj_file.write_bytes(b"corrupted bytes") |
| 471 | os.chmod(obj_file, 0o444) |
| 472 | with pytest.raises(OSError, match="integrity check"): |
| 473 | read_object(repo, oid) |
| 474 | |
| 475 | def test_read_absent_object_returns_none(self, tmp_path: pathlib.Path) -> None: |
| 476 | repo = _make_repo(tmp_path) |
| 477 | assert read_object(repo, blob_id(b"absent")) is None |
| 478 | |
| 479 | |
| 480 | # --------------------------------------------------------------------------- |
| 481 | # 9. Confirmed: all write_object callsites use content-derived IDs |
| 482 | # --------------------------------------------------------------------------- |
| 483 | |
| 484 | |
| 485 | class TestCallsiteIntegrity: |
| 486 | def test_hash_object_stdin_derives_id_from_content(self, tmp_path: pathlib.Path) -> None: |
| 487 | """hash-object with --write derives object_id from actual stdin bytes.""" |
| 488 | from tests.cli_test_helper import CliRunner |
| 489 | repo = _make_repo(tmp_path) |
| 490 | (config_toml_path(repo)).write_text("[core]\nauthor = \"test\"\n") |
| 491 | content = b"stdin content for hashing" |
| 492 | expected_oid = blob_id(content) |
| 493 | runner = CliRunner() |
| 494 | result = runner.invoke( |
| 495 | None, |
| 496 | ["hash-object", "--stdin", "--write"], |
| 497 | input=content, |
| 498 | env={"MUSE_REPO_ROOT": str(repo)}, |
| 499 | ) |
| 500 | assert result.exit_code == 0, result.output |
| 501 | assert expected_oid in result.output |
| 502 | assert has_object(repo, expected_oid) |
| 503 | |
| 504 | def test_hash_object_file_derives_id_from_file_content(self, tmp_path: pathlib.Path) -> None: |
| 505 | """hash-object with a file path derives object_id from actual file bytes.""" |
| 506 | from tests.cli_test_helper import CliRunner |
| 507 | repo = _make_repo(tmp_path) |
| 508 | (config_toml_path(repo)).write_text("[core]\nauthor = \"test\"\n") |
| 509 | content = b"file content for hashing" |
| 510 | target = tmp_path / "target.bin" |
| 511 | target.write_bytes(content) |
| 512 | expected_oid = blob_id(content) |
| 513 | runner = CliRunner() |
| 514 | result = runner.invoke( |
| 515 | None, |
| 516 | ["hash-object", str(target), "--write"], |
| 517 | env={"MUSE_REPO_ROOT": str(repo)}, |
| 518 | ) |
| 519 | assert result.exit_code == 0, result.output |
| 520 | assert expected_oid in result.output |
| 521 | assert has_object(repo, expected_oid) |
| 522 | |
| 523 | def test_unpack_objects_hash_mismatch_rejected(self, tmp_path: pathlib.Path) -> None: |
| 524 | """muse unpack-objects rejects a pack object with wrong hash.""" |
| 525 | from tests.cli_test_helper import CliRunner |
| 526 | repo = _make_repo(tmp_path) |
| 527 | (config_toml_path(repo)).write_text("[core]\nauthor = \"test\"\n") |
| 528 | legit_content = b"legitimate" |
| 529 | legit_oid = blob_id(legit_content) |
| 530 | |
| 531 | # apply_mpack directly to test the core logic. |
| 532 | mpack: MPack = MPack( |
| 533 | commits=[], snapshots=[], |
| 534 | objects=[{"object_id": legit_oid, "content": b"malicious bytes"}], |
| 535 | ) |
| 536 | result = apply_mpack(repo, mpack) |
| 537 | # The poisoned object should be skipped (hash mismatch caught by write_object). |
| 538 | assert not has_object(repo, legit_oid), "Poisoned object must not enter the store" |
| 539 | assert result["objects_written"] == 0 |
| 540 | |
| 541 | |
| 542 | # --------------------------------------------------------------------------- |
| 543 | # 10. Stress: 10 000-object pack processed within time budget |
| 544 | # --------------------------------------------------------------------------- |
| 545 | |
| 546 | |
| 547 | class TestStress: |
| 548 | @pytest.fixture(autouse=True) |
| 549 | def no_fsync(self) -> None: |
| 550 | """Mock fsync so the budget test measures algorithmic cost, not I/O latency.""" |
| 551 | with patch("muse.core.object_store._fsync_fd", return_value=None), \ |
| 552 | patch("muse.core.store.os.fsync", return_value=None), \ |
| 553 | patch("muse.core.store.fcntl.fcntl", return_value=0): |
| 554 | yield |
| 555 | |
| 556 | @pytest.mark.perf |
| 557 | def test_10k_object_pack_within_budget(self, tmp_path: pathlib.Path) -> None: |
| 558 | """10 000 unique objects written through apply_mpack in under 30 seconds.""" |
| 559 | repo = _make_repo(tmp_path) |
| 560 | n = 10_000 |
| 561 | objects = [] |
| 562 | for i in range(n): |
| 563 | content = f"stress-object-{i:06d}".encode() |
| 564 | oid = blob_id(content) |
| 565 | objects.append({"object_id": oid, "content": content}) |
| 566 | |
| 567 | mpack: MPack = MPack(commits=[], snapshots=[], objects=objects) |
| 568 | start = time.monotonic() |
| 569 | result = apply_mpack(repo, mpack) |
| 570 | elapsed = time.monotonic() - start |
| 571 | |
| 572 | assert result["objects_written"] == n |
| 573 | assert elapsed < 30.0, f"10k-object pack took {elapsed:.1f}s — too slow" |
| 574 | |
| 575 | def test_idempotent_10k_pack_fast(self, tmp_path: pathlib.Path) -> None: |
| 576 | """Re-applying the same 10k pack is faster (all objects already present).""" |
| 577 | repo = _make_repo(tmp_path) |
| 578 | n = 1_000 # smaller for the idempotency test |
| 579 | objects = [] |
| 580 | for i in range(n): |
| 581 | content = f"idem-object-{i:06d}".encode() |
| 582 | oid = blob_id(content) |
| 583 | objects.append({"object_id": oid, "content": content}) |
| 584 | |
| 585 | mpack: MPack = MPack(commits=[], snapshots=[], objects=objects) |
| 586 | apply_mpack(repo, mpack) # first application |
| 587 | result2 = apply_mpack(repo, mpack) # second application |
| 588 | assert result2["objects_written"] == 0 |
| 589 | assert result2["objects_skipped"] == n |
| 590 | |
| 591 | def test_10k_duplicate_ids_deduplicated(self, tmp_path: pathlib.Path) -> None: |
| 592 | """10 000 entries with the same object_id are deduplicated to one write.""" |
| 593 | repo = _make_repo(tmp_path) |
| 594 | content = b"one true object" |
| 595 | oid = blob_id(content) |
| 596 | mpack: MPack = MPack( |
| 597 | commits=[], |
| 598 | snapshots=[], |
| 599 | objects=[{"object_id": oid, "content": content}] * 10_000, |
| 600 | ) |
| 601 | result = apply_mpack(repo, mpack) |
| 602 | assert result["objects_written"] == 1 |
| 603 | assert result["objects_skipped"] == 9_999 |
| 604 | |
| 605 | |
| 606 | # --------------------------------------------------------------------------- |
| 607 | # 11. Concurrent poisoning stress |
| 608 | # --------------------------------------------------------------------------- |
| 609 | |
| 610 | |
| 611 | class TestConcurrentPoisoning: |
| 612 | def test_concurrent_hash_mismatch_attempts_do_not_corrupt( |
| 613 | self, tmp_path: pathlib.Path |
| 614 | ) -> None: |
| 615 | """50 threads simultaneously trying to poison the store — none succeeds.""" |
| 616 | repo = _make_repo(tmp_path) |
| 617 | legit_content = b"the one true content" |
| 618 | legit_oid = blob_id(legit_content) |
| 619 | |
| 620 | # Write the legitimate object first. |
| 621 | write_object(repo, legit_oid, legit_content) |
| 622 | |
| 623 | errors: list[str] = [] |
| 624 | |
| 625 | def poison_attempt(idx: int) -> None: |
| 626 | malicious_content = f"malicious-{idx}".encode() |
| 627 | try: |
| 628 | write_object(repo, legit_oid, malicious_content) |
| 629 | errors.append(f"Thread {idx}: poisoning succeeded!") |
| 630 | except ValueError: |
| 631 | pass # expected |
| 632 | |
| 633 | threads = [threading.Thread(target=poison_attempt, args=(i,)) for i in range(50)] |
| 634 | for t in threads: |
| 635 | t.start() |
| 636 | for t in threads: |
| 637 | t.join(timeout=5.0) |
| 638 | |
| 639 | assert not errors, "\n".join(errors) |
| 640 | # The stored object must still be the legitimate one. |
| 641 | assert read_object(repo, legit_oid) == legit_content |
| 642 | |
| 643 | def test_concurrent_writes_of_same_object_idempotent( |
| 644 | self, tmp_path: pathlib.Path |
| 645 | ) -> None: |
| 646 | """50 threads writing the same valid object — exactly one write, no corruption.""" |
| 647 | repo = _make_repo(tmp_path) |
| 648 | content = b"concurrent valid object" |
| 649 | oid = blob_id(content) |
| 650 | results: list[bool] = [] |
| 651 | lock = threading.Lock() |
| 652 | |
| 653 | def write_it() -> None: |
| 654 | wrote = write_object(repo, oid, content) |
| 655 | with lock: |
| 656 | results.append(wrote) |
| 657 | |
| 658 | threads = [threading.Thread(target=write_it) for _ in range(50)] |
| 659 | for t in threads: |
| 660 | t.start() |
| 661 | for t in threads: |
| 662 | t.join(timeout=5.0) |
| 663 | |
| 664 | assert results.count(True) >= 1, "At least one thread must have written" |
| 665 | assert read_object(repo, oid) == content |
| 666 | |
| 667 | |
| 668 | # --------------------------------------------------------------------------- |
| 669 | # 12. SHA-256 threat model documentation test |
| 670 | # --------------------------------------------------------------------------- |
| 671 | |
| 672 | |
| 673 | class TestSHA256ThreatModel: |
| 674 | def test_sha256_preimage_resistance_documented(self) -> None: |
| 675 | """Document that SHA-256 preimage resistance is the security boundary. |
| 676 | |
| 677 | Muse's object store is secure against hash-mismatch injection because: |
| 678 | 1. write_object computes sha256(content) and rejects any mismatch. |
| 679 | 2. read_object recomputes sha256 on every read. |
| 680 | 3. restore_object recomputes sha256 before copying to working tree. |
| 681 | |
| 682 | A successful poisoning attack would require finding a second preimage: |
| 683 | a different content M' such that sha256(M') == sha256(M). |
| 684 | |
| 685 | As of 2026, the best known second-preimage attack on SHA-256 requires |
| 686 | 2^256 operations — computationally infeasible for any adversary. |
| 687 | |
| 688 | This test is a living specification of the threat model, not a |
| 689 | cryptographic proof. It verifies the code paths enforce the model. |
| 690 | """ |
| 691 | content_a = b"message A" |
| 692 | content_b = b"message B" |
| 693 | # Two different messages must have different SHA-256 digests. |
| 694 | # (With overwhelming probability — hash collision is computationally |
| 695 | # infeasible but not theoretically impossible.) |
| 696 | assert blob_id(content_a) != blob_id(content_b) |
| 697 | |
| 698 | def test_write_then_read_roundtrip_preserves_content( |
| 699 | self, tmp_path: pathlib.Path |
| 700 | ) -> None: |
| 701 | """Content written to the store is always returned verbatim on read.""" |
| 702 | repo = _make_repo(tmp_path) |
| 703 | for i in range(20): |
| 704 | content = f"stress-content-{i}".encode() * (i + 1) |
| 705 | oid = blob_id(content) |
| 706 | write_object(repo, oid, content) |
| 707 | assert read_object(repo, oid) == content |
| 708 | |
| 709 | def test_object_mode_is_immutable(self, tmp_path: pathlib.Path) -> None: |
| 710 | """Stored objects have mode 0o444 — expressing immutability at OS level.""" |
| 711 | repo = _make_repo(tmp_path) |
| 712 | content = b"immutable object" |
| 713 | oid = _stored_object(repo, content) |
| 714 | from muse.core.object_store import _object_path_with_fallback |
| 715 | obj_file = _object_path_with_fallback(repo, oid) |
| 716 | mode = oct(obj_file.stat().st_mode & 0o777) |
| 717 | assert mode == oct(0o444), f"Expected 0o444, got {mode}" |
File History
1 commit
sha256:fe844c2411edd1cec3d4c847f36a96c6ccd4e3d7d1a715106d2ecd64216bf94f
fix: bare object detection and read recovery; rm adapter files
Sonnet 4.6
minor
⚠
15 days ago