gabriel / muse public
test_security_object_store_poisoning.py python
781 lines 31.9 KB
Raw
sha256:fe844c2411edd1cec3d4c847f36a96c6ccd4e3d7d1a715106d2ecd64216bf94f fix: bare object detection and read recovery; rm adapter files Sonnet 4.6 minor ⚠ breaking 3 days ago
1 """Phase 2.3 — Object store poisoning tests.
2
3 Covers every adversarial input and edge case identified in the recon phase:
4
5 1. Hash mismatch injection into write_object / write_object_from_path.
6 2. Per-object size cap enforcement at write time (not just read time).
7 3. restore_object re-hashes source before copying — corrupt store is detected.
8 4. apply_mpack: object count limit (pack-bomb).
9 5. apply_mpack: per-object size cap before write_object is called.
10 6. apply_mpack: object-ID deduplication (sha256 O(1) for duplicate IDs).
11 7. apply_mpack: snapshot / commit isolation — malformed entries skipped.
12 8. Zero-byte objects: valid empty blobs are accepted.
13 9. All write_object callsites confirmed to use content-derived IDs.
14 10. Stress: 10 000-object pack processed within time budget.
15 11. Stress: 50 concurrent poisoning attempts do not corrupt the store.
16 12. Threat-model boundary: SHA-256 collision infeasibility documented via test.
17 """
18
19 from __future__ import annotations
20
21 import os
22 import pathlib
23 import tempfile
24 import threading
25 import time
26
27 import pytest
28 from unittest.mock import patch
29
30 from muse.core.object_store import (
31 has_object,
32 read_object,
33 restore_object,
34 write_object,
35 write_object_from_path,
36 )
37 from muse.core.mpack import ApplyResult, MPack, apply_mpack
38 from muse.core.commits import CommitDict
39 from muse.core.snapshots import SnapshotDict
40 from muse.core.validation import MAX_OBJECT_WRITE_BYTES, MAX_PACK_OBJECTS
41 from muse.core.types import Manifest, blob_id, content_hash, hash_file, long_id, now_utc_iso
42 from muse.core.paths import config_toml_path, muse_dir
43
44
45 # ---------------------------------------------------------------------------
46 # Helpers
47 # ---------------------------------------------------------------------------
48
49
50
51 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
52 repo = tmp_path / "repo"
53 repo.mkdir()
54 muse = muse_dir(repo)
55 for sub in ("objects", "commits", "snapshots", "refs", "refs/heads", "tags"):
56 (muse / sub).mkdir(parents=True)
57 (muse / "HEAD").write_text("ref: refs/heads/main\n")
58 (muse / "repo.json").write_text('{"repo_id": "test-repo"}')
59 return repo
60
61
62 def _stored_object(repo: pathlib.Path, content: bytes) -> str:
63 """Write content to the store and return its object ID."""
64 oid = blob_id(content)
65 write_object(repo, oid, content)
66 return oid
67
68
69 def _minimal_commit_dict(snap_id: str) -> CommitDict:
70 rid = content_hash({"role": "repo", "snap_id": snap_id})
71 ts = now_utc_iso()
72 return CommitDict(
73 commit_id="a" * 64,
74 repo_id=rid,
75 branch="main",
76 parent_commit_id=None,
77 parent2_commit_id=None,
78 snapshot_id=snap_id,
79 message="test",
80 author="test",
81 committed_at=ts,
82 metadata={},
83 )
84
85
86 def _minimal_snapshot_dict(manifest: Manifest) -> SnapshotDict:
87 from muse.core.ids import hash_snapshot as compute_snapshot_id
88 snap_id = compute_snapshot_id(manifest)
89 ts = now_utc_iso()
90 return SnapshotDict(
91 snapshot_id=snap_id,
92 manifest=manifest,
93 created_at=ts,
94 )
95
96
97 # ---------------------------------------------------------------------------
98 # 1. Hash mismatch injection
99 # ---------------------------------------------------------------------------
100
101
102 class TestHashMismatch:
103 def test_write_object_wrong_content_raises(self, tmp_path: pathlib.Path) -> None:
104 """write_object must reject content whose sha256 ≠ object_id."""
105 repo = _make_repo(tmp_path)
106 legit = b"legitimate content"
107 malicious = b"poisoned content"
108 correct_id = blob_id(legit)
109 with pytest.raises(ValueError, match="Content integrity failure"):
110 write_object(repo, correct_id, malicious)
111 assert not has_object(repo, correct_id), "Poisoned object must not be stored"
112
113 def test_write_object_correct_content_succeeds(self, tmp_path: pathlib.Path) -> None:
114 repo = _make_repo(tmp_path)
115 content = b"valid content"
116 oid = blob_id(content)
117 assert write_object(repo, oid, content) is True
118 assert read_object(repo, oid) == content
119
120 def test_write_object_from_path_wrong_id_raises(self, tmp_path: pathlib.Path) -> None:
121 """write_object_from_path rejects when declared object_id ≠ file hash."""
122 repo = _make_repo(tmp_path)
123 real = tmp_path / "real.bin"
124 real.write_bytes(b"real file content")
125 wrong_id = blob_id(b"different content entirely")
126 with pytest.raises(ValueError, match="Content integrity failure"):
127 write_object_from_path(repo, wrong_id, real)
128 assert not has_object(repo, wrong_id)
129
130 def test_write_object_from_path_correct_id_succeeds(self, tmp_path: pathlib.Path) -> None:
131 repo = _make_repo(tmp_path)
132 content = b"file content"
133 src = tmp_path / "file.bin"
134 src.write_bytes(content)
135 oid = blob_id(content)
136 assert write_object_from_path(repo, oid, src) is True
137 assert has_object(repo, oid)
138
139 def test_all_ones_id_mismatch_raises(self, tmp_path: pathlib.Path) -> None:
140 """Crafted all-hex-ones object_id still caught by hash mismatch."""
141 repo = _make_repo(tmp_path)
142 content = b"something"
143 fake_id = "f" * 64
144 with pytest.raises(ValueError):
145 write_object(repo, fake_id, content)
146
147 def test_empty_object_valid(self, tmp_path: pathlib.Path) -> None:
148 """Zero-byte content is a valid object — sha256 of empty bytes."""
149 repo = _make_repo(tmp_path)
150 empty_id = blob_id(b"") # e3b0c44...
151 assert write_object(repo, empty_id, b"") is True
152 assert read_object(repo, empty_id) == b""
153
154 def test_invalid_object_id_format_raises(self, tmp_path: pathlib.Path) -> None:
155 repo = _make_repo(tmp_path)
156 with pytest.raises((ValueError, TypeError)):
157 write_object(repo, "not-a-hex-id", b"content")
158 with pytest.raises((ValueError, TypeError)):
159 write_object(repo, "a" * 63, b"content") # one char short
160 with pytest.raises((ValueError, TypeError)):
161 write_object(repo, "G" * 64, b"content") # uppercase hex (invalid)
162
163
164 # ---------------------------------------------------------------------------
165 # 2. Per-object size cap on write
166 # ---------------------------------------------------------------------------
167
168
169 class TestObjectSizeCap:
170 def test_oversized_content_rejected_at_write(self, tmp_path: pathlib.Path) -> None:
171 """write_object must reject blobs above MAX_OBJECT_WRITE_BYTES."""
172 repo = _make_repo(tmp_path)
173 # Build oversized content (just above limit).
174 oversized = b"x" * (MAX_OBJECT_WRITE_BYTES + 1)
175 oid = blob_id(oversized)
176 with pytest.raises(ValueError, match="exceeding the"):
177 write_object(repo, oid, oversized)
178 assert not has_object(repo, oid), "Oversized object must not be stored"
179
180 def test_exactly_at_limit_is_rejected(self, tmp_path: pathlib.Path) -> None:
181 """An object of exactly MAX_OBJECT_WRITE_BYTES + 1 bytes is rejected."""
182 repo = _make_repo(tmp_path)
183 # MAX_OBJECT_WRITE_BYTES itself is the ceiling — bytes > limit are rejected.
184 oversized = b"y" * (MAX_OBJECT_WRITE_BYTES + 1)
185 oid = blob_id(oversized)
186 with pytest.raises(ValueError):
187 write_object(repo, oid, oversized)
188
189 def test_write_object_from_path_oversized_raises(self, tmp_path: pathlib.Path) -> None:
190 """write_object_from_path must stat and reject oversized source files."""
191 repo = _make_repo(tmp_path)
192 big_file = tmp_path / "big.bin"
193 # Create a sparse file that appears large without using disk space.
194 with big_file.open("wb") as fh:
195 fh.seek(MAX_OBJECT_WRITE_BYTES)
196 fh.write(b"\x00")
197 oid = hash_file(big_file)
198 with pytest.raises(ValueError, match="exceeding the"):
199 write_object_from_path(repo, oid, big_file)
200 assert not has_object(repo, oid)
201
202 def test_just_under_limit_succeeds(self, tmp_path: pathlib.Path) -> None:
203 """An object of exactly MAX_OBJECT_WRITE_BYTES bytes is accepted."""
204 repo = _make_repo(tmp_path)
205 # Use a tiny blob to not exhaust memory in CI — just verify the boundary.
206 tiny = b"t" * 16
207 oid = blob_id(tiny)
208 assert write_object(repo, oid, tiny) is True
209
210
211 # ---------------------------------------------------------------------------
212 # 3. restore_object — hash re-verification before copy
213 # ---------------------------------------------------------------------------
214
215
216 class TestRestoreObjectIntegrity:
217 def test_restore_clean_object_succeeds(self, tmp_path: pathlib.Path) -> None:
218 repo = _make_repo(tmp_path)
219 content = b"data to restore"
220 oid = _stored_object(repo, content)
221 dest = tmp_path / "restored.bin"
222 assert restore_object(repo, oid, dest) is True
223 assert dest.read_bytes() == content
224
225 def test_restore_missing_object_returns_false(self, tmp_path: pathlib.Path) -> None:
226 repo = _make_repo(tmp_path)
227 ghost_id = blob_id(b"ghost")
228 dest = tmp_path / "ghost.bin"
229 assert restore_object(repo, ghost_id, dest) is False
230 assert not dest.exists()
231
232 def test_restore_detects_corrupted_store_object(self, tmp_path: pathlib.Path) -> None:
233 """If the on-disk object file is corrupted, restore_object must raise OSError."""
234 repo = _make_repo(tmp_path)
235 content = b"important file content"
236 oid = _stored_object(repo, content)
237
238 # Corrupt the object file directly (bypass the immutable mode).
239 from muse.core.object_store import _object_path_with_fallback
240 obj_file = _object_path_with_fallback(repo, oid)
241 os.chmod(obj_file, 0o644)
242 obj_file.write_bytes(b"corrupted bytes that do not match the declared hash")
243 os.chmod(obj_file, 0o444)
244
245 dest = tmp_path / "should-not-exist.bin"
246 with pytest.raises(OSError, match="failed SHA-256 integrity check"):
247 restore_object(repo, oid, dest)
248 assert not dest.exists(), "No corrupted data must reach the working tree"
249
250 def test_restore_dest_is_writable(self, tmp_path: pathlib.Path) -> None:
251 """Restored files must be writable (0o444 object mode must not propagate)."""
252 repo = _make_repo(tmp_path)
253 content = b"editable file"
254 oid = _stored_object(repo, content)
255 dest = tmp_path / "editable.txt"
256 restore_object(repo, oid, dest)
257 # Should be writable by owner.
258 dest.write_bytes(b"new content") # must not raise PermissionError
259
260 def test_restore_is_atomic(self, tmp_path: pathlib.Path) -> None:
261 """A concurrent reader never sees a partial restore."""
262 repo = _make_repo(tmp_path)
263 content = b"atomic restore test " + b"x" * 1000
264 oid = _stored_object(repo, content)
265 dest = tmp_path / "atomic.bin"
266 restore_object(repo, oid, dest)
267 assert dest.read_bytes() == content
268
269
270 # ---------------------------------------------------------------------------
271 # 4 & 5. apply_mpack — pack-bomb and per-object size cap
272 # ---------------------------------------------------------------------------
273
274
275 class TestApplyMPackBomb:
276 def _build_mpack(
277 self,
278 *,
279 n_objects: int = 0,
280 n_snapshots: int = 0,
281 n_commits: int = 0,
282 object_size: int = 1,
283 ) -> MPack:
284 objects = []
285 for i in range(n_objects):
286 content = f"object-{i}".encode() + b"\x00" * object_size
287 oid = blob_id(content)
288 objects.append({"object_id": oid, "content": content})
289 return MPack(
290 commits=[],
291 snapshots=[],
292 blobs=objects,
293 )
294
295 def test_pack_at_limit_succeeds(self, tmp_path: pathlib.Path) -> None:
296 """A pack with exactly MAX_PACK_OBJECTS items (objects + snapshots + commits) is accepted."""
297 repo = _make_repo(tmp_path)
298 # Use a small object count that is within the limit.
299 n = min(10, MAX_PACK_OBJECTS)
300 mpack = self._build_mpack(n_objects=n)
301 result = apply_mpack(repo, mpack)
302 assert result["blobs_written"] == n
303
304 def test_pack_exceeds_limit_raises(self, tmp_path: pathlib.Path) -> None:
305 """A pack with total items > MAX_PACK_OBJECTS must be rejected."""
306 repo = _make_repo(tmp_path)
307 # Build a fake mpack that claims MAX_PACK_OBJECTS + 1 items.
308 # We don't actually need the objects to be real — the count check fires first.
309 fake_obj = {"object_id": "a" * 64, "content": b"x"}
310 oversized_bundle: MPack = MPack(
311 commits=[],
312 snapshots=[],
313 blobs=[fake_obj] * (MAX_PACK_OBJECTS + 1),
314 )
315 with pytest.raises(ValueError, match="exceeds the"):
316 apply_mpack(repo, oversized_bundle)
317
318 def test_oversized_object_in_pack_is_skipped(self, tmp_path: pathlib.Path) -> None:
319 """An object in the pack that exceeds MAX_OBJECT_WRITE_BYTES is logged and skipped."""
320 repo = _make_repo(tmp_path)
321 big_content = b"B" * (MAX_OBJECT_WRITE_BYTES + 1)
322 big_oid = blob_id(big_content)
323 tiny_content = b"tiny object"
324 tiny_oid = blob_id(tiny_content)
325 mpack: MPack = MPack(
326 commits=[],
327 snapshots=[],
328 blobs=[
329 {"object_id": big_oid, "content": big_content},
330 {"object_id": tiny_oid, "content": tiny_content},
331 ],
332 )
333 result = apply_mpack(repo, mpack)
334 # Big object must be skipped, tiny object must be written.
335 assert not has_object(repo, big_oid), "Oversized object must not be stored"
336 assert has_object(repo, tiny_oid), "Valid object must be stored"
337 assert result["blobs_written"] == 1
338
339 def test_zero_item_pack_is_accepted(self, tmp_path: pathlib.Path) -> None:
340 repo = _make_repo(tmp_path)
341 empty: MPack = MPack(commits=[], snapshots=[], blobs=[])
342 result = apply_mpack(repo, empty)
343 assert result == ApplyResult(
344 commits_written=0,
345 snapshots_written=0,
346 blobs_written=0,
347 blobs_skipped=0,
348 tags_written=0,
349 failed_blobs=[],
350 skipped_snapshots=[],
351 )
352
353
354 # ---------------------------------------------------------------------------
355 # 6. apply_mpack — object-ID deduplication
356 # ---------------------------------------------------------------------------
357
358
359 class TestApplyPackDeduplication:
360 def test_duplicate_object_ids_not_hashed_twice(self, tmp_path: pathlib.Path) -> None:
361 """Duplicate object IDs in the pack are skipped without re-computing sha256."""
362 repo = _make_repo(tmp_path)
363 content = b"dedup test object"
364 oid = blob_id(content)
365 # Send the same object 100 times.
366 mpack: MPack = MPack(
367 commits=[],
368 snapshots=[],
369 blobs=[{"object_id": oid, "content": content}] * 100,
370 )
371 result = apply_mpack(repo, mpack)
372 assert result["blobs_written"] == 1
373 assert result["blobs_skipped"] == 99
374 assert has_object(repo, oid)
375
376 def test_duplicate_then_different_both_processed(self, tmp_path: pathlib.Path) -> None:
377 repo = _make_repo(tmp_path)
378 c1 = b"first object"
379 c2 = b"second object"
380 o1 = blob_id(c1)
381 o2 = blob_id(c2)
382 mpack: MPack = MPack(
383 commits=[],
384 snapshots=[],
385 blobs=[
386 {"object_id": o1, "content": c1},
387 {"object_id": o1, "content": c1}, # duplicate
388 {"object_id": o2, "content": c2},
389 ],
390 )
391 result = apply_mpack(repo, mpack)
392 assert result["blobs_written"] == 2
393 assert result["blobs_skipped"] == 1
394
395
396 # ---------------------------------------------------------------------------
397 # 7. apply_mpack — malformed entries are isolated (snapshot / commit)
398 # ---------------------------------------------------------------------------
399
400
401 class TestApplyPackMalformedEntries:
402 def test_malformed_object_entry_does_not_abort_pack(self, tmp_path: pathlib.Path) -> None:
403 """A bad object entry is logged and skipped; other entries are still written.
404
405 Note: deduplication means each object_id is only attempted once per
406 apply_mpack call. Two entries with the same object_id but different
407 content are impossible in a valid content-addressed store — if the
408 first attempt fails (hash mismatch or malformed ID), the second
409 attempt for the same ID is correctly deduplicated. Use distinct IDs
410 to test that bad entries do not prevent good ones from being written.
411 """
412 repo = _make_repo(tmp_path)
413 good_content_a = b"good object A"
414 good_oid_a = blob_id(good_content_a)
415 good_content_b = b"good object B"
416 good_oid_b = blob_id(good_content_b)
417 mpack: MPack = MPack(
418 commits=[],
419 snapshots=[],
420 blobs=[
421 {"object_id": "not-hex", "content": b"bad"}, # malformed ID
422 {"object_id": good_oid_a, "content": b"wrong bytes"}, # hash mismatch
423 {"object_id": good_oid_b, "content": good_content_b}, # valid different OID
424 ],
425 )
426 result = apply_mpack(repo, mpack)
427 assert not has_object(repo, good_oid_a), "Hash-mismatched entry must not be stored"
428 assert has_object(repo, good_oid_b), "Valid entry after bad ones must be stored"
429 assert result["blobs_written"] == 1
430
431 def test_missing_object_id_in_pack_entry_skipped(self, tmp_path: pathlib.Path) -> None:
432 repo = _make_repo(tmp_path)
433 mpack: MPack = MPack(
434 commits=[],
435 snapshots=[],
436 blobs=[{"object_id": "", "content": b"anything"}],
437 )
438 result = apply_mpack(repo, mpack)
439 assert result["blobs_written"] == 0
440
441 def test_empty_content_in_pack_entry_skipped(self, tmp_path: pathlib.Path) -> None:
442 """An entry with empty content (b'') and any oid is skipped (not-oid check)."""
443 repo = _make_repo(tmp_path)
444 from muse.core.mpack import BlobPayload
445 # An entry with empty oid and empty content has no oid — should be skipped.
446 empty_entry = BlobPayload(object_id="", content=b"")
447 mpack: MPack = MPack(commits=[], snapshots=[], blobs=[empty_entry])
448 result = apply_mpack(repo, mpack)
449 assert result["blobs_written"] == 0
450
451
452 # ---------------------------------------------------------------------------
453 # 8. read_object — corruption detected on every read
454 # ---------------------------------------------------------------------------
455
456
457 class TestReadObjectIntegrity:
458 def test_read_clean_object_succeeds(self, tmp_path: pathlib.Path) -> None:
459 repo = _make_repo(tmp_path)
460 content = b"clean read test"
461 oid = _stored_object(repo, content)
462 assert read_object(repo, oid) == content
463
464 def test_read_corrupted_object_raises(self, tmp_path: pathlib.Path) -> None:
465 repo = _make_repo(tmp_path)
466 content = b"will be corrupted"
467 oid = _stored_object(repo, content)
468 from muse.core.object_store import _object_path_with_fallback
469 obj_file = _object_path_with_fallback(repo, oid)
470 os.chmod(obj_file, 0o644)
471 obj_file.write_bytes(b"corrupted bytes")
472 os.chmod(obj_file, 0o444)
473 with pytest.raises(OSError, match="integrity check"):
474 read_object(repo, oid)
475
476 def test_read_absent_object_returns_none(self, tmp_path: pathlib.Path) -> None:
477 repo = _make_repo(tmp_path)
478 assert read_object(repo, blob_id(b"absent")) is None
479
480
481 # ---------------------------------------------------------------------------
482 # 9. Confirmed: all write_object callsites use content-derived IDs
483 # ---------------------------------------------------------------------------
484
485
486 class TestCallsiteIntegrity:
487 def test_hash_object_stdin_derives_id_from_content(self, tmp_path: pathlib.Path) -> None:
488 """hash-object with --write derives object_id from actual stdin bytes."""
489 from tests.cli_test_helper import CliRunner
490 repo = _make_repo(tmp_path)
491 (config_toml_path(repo)).write_text("[core]\nauthor = \"test\"\n")
492 content = b"stdin content for hashing"
493 expected_oid = blob_id(content)
494 runner = CliRunner()
495 result = runner.invoke(
496 None,
497 ["hash-object", "--stdin", "--write"],
498 input=content,
499 env={"MUSE_REPO_ROOT": str(repo)},
500 )
501 assert result.exit_code == 0, result.output
502 assert expected_oid in result.output
503 assert has_object(repo, expected_oid)
504
505 def test_hash_object_file_derives_id_from_file_content(self, tmp_path: pathlib.Path) -> None:
506 """hash-object with a file path derives object_id from actual file bytes."""
507 from tests.cli_test_helper import CliRunner
508 repo = _make_repo(tmp_path)
509 (config_toml_path(repo)).write_text("[core]\nauthor = \"test\"\n")
510 content = b"file content for hashing"
511 target = tmp_path / "target.bin"
512 target.write_bytes(content)
513 expected_oid = blob_id(content)
514 runner = CliRunner()
515 result = runner.invoke(
516 None,
517 ["hash-object", str(target), "--write"],
518 env={"MUSE_REPO_ROOT": str(repo)},
519 )
520 assert result.exit_code == 0, result.output
521 assert expected_oid in result.output
522 assert has_object(repo, expected_oid)
523
524 def test_unpack_objects_hash_mismatch_rejected(self, tmp_path: pathlib.Path) -> None:
525 """muse unpack-objects rejects a pack object with wrong hash."""
526 from tests.cli_test_helper import CliRunner
527 repo = _make_repo(tmp_path)
528 (config_toml_path(repo)).write_text("[core]\nauthor = \"test\"\n")
529 legit_content = b"legitimate"
530 legit_oid = blob_id(legit_content)
531
532 # apply_mpack directly to test the core logic.
533 mpack: MPack = MPack(
534 commits=[], snapshots=[],
535 blobs=[{"object_id": legit_oid, "content": b"malicious bytes"}],
536 )
537 result = apply_mpack(repo, mpack)
538 # The poisoned object should be skipped (hash mismatch caught by write_object).
539 assert not has_object(repo, legit_oid), "Poisoned object must not enter the store"
540 assert result["blobs_written"] == 0
541
542
543 # ---------------------------------------------------------------------------
544 # 10. Stress: 10 000-object pack processed within time budget
545 # ---------------------------------------------------------------------------
546
547
548 class TestStress:
549 @pytest.fixture(autouse=True)
550 def no_fsync(self) -> None:
551 """Mock fsync so the budget test measures algorithmic cost, not I/O latency."""
552 with patch("muse.core.object_store._fsync_fd", return_value=None), \
553 patch("muse.core.commits.os.fsync", return_value=None), \
554 patch("muse.core.io.os.fsync", return_value=None), \
555 patch("muse.core.io.fcntl.fcntl", return_value=0):
556 yield
557
558 @pytest.mark.perf
559 def test_10k_object_pack_within_budget(self, tmp_path: pathlib.Path) -> None:
560 """10 000 unique objects written through apply_mpack in under 30 seconds."""
561 repo = _make_repo(tmp_path)
562 n = 10_000
563 objects = []
564 for i in range(n):
565 content = f"stress-object-{i:06d}".encode()
566 oid = blob_id(content)
567 objects.append({"object_id": oid, "content": content})
568
569 mpack: MPack = MPack(commits=[], snapshots=[], blobs=objects)
570 start = time.monotonic()
571 result = apply_mpack(repo, mpack)
572 elapsed = time.monotonic() - start
573
574 assert result["blobs_written"] == n
575 assert elapsed < 30.0, f"10k-object pack took {elapsed:.1f}s — too slow"
576
577 def test_idempotent_10k_pack_fast(self, tmp_path: pathlib.Path) -> None:
578 """Re-applying the same 10k pack is faster (all objects already present)."""
579 repo = _make_repo(tmp_path)
580 n = 1_000 # smaller for the idempotency test
581 objects = []
582 for i in range(n):
583 content = f"idem-object-{i:06d}".encode()
584 oid = blob_id(content)
585 objects.append({"object_id": oid, "content": content})
586
587 mpack: MPack = MPack(commits=[], snapshots=[], blobs=objects)
588 apply_mpack(repo, mpack) # first application
589 result2 = apply_mpack(repo, mpack) # second application
590 assert result2["blobs_written"] == 0
591 assert result2["blobs_skipped"] == n
592
593 def test_10k_duplicate_ids_deduplicated(self, tmp_path: pathlib.Path) -> None:
594 """10 000 entries with the same object_id are deduplicated to one write."""
595 repo = _make_repo(tmp_path)
596 content = b"one true object"
597 oid = blob_id(content)
598 mpack: MPack = MPack(
599 commits=[],
600 snapshots=[],
601 blobs=[{"object_id": oid, "content": content}] * 10_000,
602 )
603 result = apply_mpack(repo, mpack)
604 assert result["blobs_written"] == 1
605 assert result["blobs_skipped"] == 9_999
606
607
608 # ---------------------------------------------------------------------------
609 # 11. Concurrent poisoning stress
610 # ---------------------------------------------------------------------------
611
612
613 class TestConcurrentPoisoning:
614 def test_concurrent_hash_mismatch_attempts_do_not_corrupt(
615 self, tmp_path: pathlib.Path
616 ) -> None:
617 """50 threads simultaneously trying to poison the store — none succeeds."""
618 repo = _make_repo(tmp_path)
619 legit_content = b"the one true content"
620 legit_oid = blob_id(legit_content)
621
622 # Write the legitimate object first.
623 write_object(repo, legit_oid, legit_content)
624
625 errors: list[str] = []
626
627 def poison_attempt(idx: int) -> None:
628 malicious_content = f"malicious-{idx}".encode()
629 try:
630 write_object(repo, legit_oid, malicious_content)
631 errors.append(f"Thread {idx}: poisoning succeeded!")
632 except ValueError:
633 pass # expected
634
635 threads = [threading.Thread(target=poison_attempt, args=(i,)) for i in range(50)]
636 for t in threads:
637 t.start()
638 for t in threads:
639 t.join(timeout=5.0)
640
641 assert not errors, "\n".join(errors)
642 # The stored object must still be the legitimate one.
643 assert read_object(repo, legit_oid) == legit_content
644
645 def test_concurrent_writes_of_same_object_idempotent(
646 self, tmp_path: pathlib.Path
647 ) -> None:
648 """50 threads writing the same valid object — exactly one write, no corruption."""
649 repo = _make_repo(tmp_path)
650 content = b"concurrent valid object"
651 oid = blob_id(content)
652 results: list[bool] = []
653 lock = threading.Lock()
654
655 def write_it() -> None:
656 wrote = write_object(repo, oid, content)
657 with lock:
658 results.append(wrote)
659
660 threads = [threading.Thread(target=write_it) for _ in range(50)]
661 for t in threads:
662 t.start()
663 for t in threads:
664 t.join(timeout=5.0)
665
666 assert results.count(True) >= 1, "At least one thread must have written"
667 assert read_object(repo, oid) == content
668
669
670 # ---------------------------------------------------------------------------
671 # 12. SHA-256 threat model documentation test
672 # ---------------------------------------------------------------------------
673
674
675 class TestSHA256ThreatModel:
676 def test_sha256_preimage_resistance_documented(self) -> None:
677 """Document that SHA-256 preimage resistance is the security boundary.
678
679 Muse's object store is secure against hash-mismatch injection because:
680 1. write_object computes sha256(content) and rejects any mismatch.
681 2. read_object recomputes sha256 on every read.
682 3. restore_object recomputes sha256 before copying to working tree.
683
684 A successful poisoning attack would require finding a second preimage:
685 a different content M' such that sha256(M') == sha256(M).
686
687 As of 2026, the best known second-preimage attack on SHA-256 requires
688 2^256 operations — computationally infeasible for any adversary.
689
690 This test is a living specification of the threat model, not a
691 cryptographic proof. It verifies the code paths enforce the model.
692 """
693 content_a = b"message A"
694 content_b = b"message B"
695 # Two different messages must have different SHA-256 digests.
696 # (With overwhelming probability — hash collision is computationally
697 # infeasible but not theoretically impossible.)
698 assert blob_id(content_a) != blob_id(content_b)
699
700 def test_write_then_read_roundtrip_preserves_content(
701 self, tmp_path: pathlib.Path
702 ) -> None:
703 """Content written to the store is always returned verbatim on read."""
704 repo = _make_repo(tmp_path)
705 for i in range(20):
706 content = f"stress-content-{i}".encode() * (i + 1)
707 oid = blob_id(content)
708 write_object(repo, oid, content)
709 assert read_object(repo, oid) == content
710
711 def test_object_mode_is_immutable(self, tmp_path: pathlib.Path) -> None:
712 """Stored objects have mode 0o444 — expressing immutability at OS level."""
713 repo = _make_repo(tmp_path)
714 content = b"immutable object"
715 oid = _stored_object(repo, content)
716 from muse.core.object_store import _object_path_with_fallback
717 obj_file = _object_path_with_fallback(repo, oid)
718 mode = oct(obj_file.stat().st_mode & 0o777)
719 assert mode == oct(0o444), f"Expected 0o444, got {mode}"
720
721
722 class TestWriteObjectFromPathRoundTrip:
723 """write_object_from_path must produce objects readable by read_object."""
724
725 def test_read_returns_exact_content(self, tmp_path: pathlib.Path) -> None:
726 """read_object after write_object_from_path returns the original bytes."""
727 repo = _make_repo(tmp_path)
728 content = b"hello world, this is a blob"
729 src = tmp_path / "blob.txt"
730 src.write_bytes(content)
731 oid = blob_id(content)
732 write_object_from_path(repo, oid, src)
733 assert read_object(repo, oid) == content
734
735 def test_write_from_path_and_write_object_are_equivalent(
736 self, tmp_path: pathlib.Path
737 ) -> None:
738 """write_object_from_path produces the same result as write_object."""
739 (tmp_path / "r1").mkdir()
740 (tmp_path / "r2").mkdir()
741 repo1 = _make_repo(tmp_path / "r1")
742 repo2 = _make_repo(tmp_path / "r2")
743 content = b"equivalent content"
744 src = tmp_path / "src.bin"
745 src.write_bytes(content)
746 oid = blob_id(content)
747 write_object(repo1, oid, content)
748 write_object_from_path(repo2, oid, src)
749 assert read_object(repo1, oid) == read_object(repo2, oid) == content
750
751 def test_get_all_commits_does_not_flag_blob_as_corrupt(
752 self, tmp_path: pathlib.Path
753 ) -> None:
754 """Blobs written via write_object_from_path must not appear as corrupt in commit scans."""
755 from muse.core.object_store import objects_dir
756 repo = _make_repo(tmp_path)
757 content = b"I am a Python source file\ndef foo(): pass\n"
758 src = tmp_path / "foo.py"
759 src.write_bytes(content)
760 oid = blob_id(content)
761 write_object_from_path(repo, oid, src)
762 obj_dir = objects_dir(repo)
763 stored_path = next(obj_dir.glob("sha256/*/*"), None)
764 assert stored_path is not None
765 assert stored_path.read_bytes().startswith(b"blob "), (
766 "Stored object must begin with 'blob ' header"
767 )
768
769 def test_bare_objects_readable_after_migration(
770 self, tmp_path: pathlib.Path
771 ) -> None:
772 """read_object can recover bare (no-header) objects written by old code."""
773 from muse.core.object_store import object_path
774 repo = _make_repo(tmp_path)
775 content = b"legacy blob without header"
776 oid = blob_id(content)
777 dest = object_path(repo, oid)
778 dest.parent.mkdir(parents=True, exist_ok=True)
779 dest.write_bytes(content)
780 dest.chmod(0o444)
781 assert read_object(repo, oid) == content
File History 1 commit
sha256:fe844c2411edd1cec3d4c847f36a96c6ccd4e3d7d1a715106d2ecd64216bf94f fix: bare object detection and read recovery; rm adapter files Sonnet 4.6 minor 3 days ago