gabriel / muse public
test_security_object_store_poisoning.py python
717 lines 29.3 KB
Raw
sha256:fe844c2411edd1cec3d4c847f36a96c6ccd4e3d7d1a715106d2ecd64216bf94f fix: bare object detection and read recovery; rm adapter files Sonnet 4.6 minor ⚠ breaking 15 days ago
1 """Phase 2.3 — Object store poisoning tests.
2
3 Covers every adversarial input and edge case identified in the recon phase:
4
5 1. Hash mismatch injection into write_object / write_object_from_path.
6 2. Per-object size cap enforcement at write time (not just read time).
7 3. restore_object re-hashes source before copying — corrupt store is detected.
8 4. apply_mpack: object count limit (pack-bomb).
9 5. apply_mpack: per-object size cap before write_object is called.
10 6. apply_mpack: object-ID deduplication (sha256 O(1) for duplicate IDs).
11 7. apply_mpack: snapshot / commit isolation — malformed entries skipped.
12 8. Zero-byte objects: valid empty blobs are accepted.
13 9. All write_object callsites confirmed to use content-derived IDs.
14 10. Stress: 10 000-object pack processed within time budget.
15 11. Stress: 50 concurrent poisoning attempts do not corrupt the store.
16 12. Threat-model boundary: SHA-256 collision infeasibility documented via test.
17 """
18
19 from __future__ import annotations
20
21 import os
22 import pathlib
23 import tempfile
24 import threading
25 import time
26
27 import pytest
28 from unittest.mock import patch
29
30 from muse.core.object_store import (
31 has_object,
32 read_object,
33 restore_object,
34 write_object,
35 write_object_from_path,
36 )
37 from muse.core.mpack import ApplyResult, MPack, apply_mpack
38 from muse.core.store import CommitDict, SnapshotDict
39 from muse.core.validation import MAX_OBJECT_WRITE_BYTES, MAX_PACK_OBJECTS
40 from muse.core.types import Manifest, blob_id, content_hash, hash_file, long_id, now_utc_iso
41 from muse.core.paths import config_toml_path, muse_dir
42
43
44 # ---------------------------------------------------------------------------
45 # Helpers
46 # ---------------------------------------------------------------------------
47
48
49
50 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
51 repo = tmp_path / "repo"
52 repo.mkdir()
53 muse = muse_dir(repo)
54 for sub in ("objects", "commits", "snapshots", "refs", "refs/heads", "tags"):
55 (muse / sub).mkdir(parents=True)
56 (muse / "HEAD").write_text("ref: refs/heads/main\n")
57 (muse / "repo.json").write_text('{"repo_id": "test-repo"}')
58 return repo
59
60
61 def _stored_object(repo: pathlib.Path, content: bytes) -> str:
62 """Write content to the store and return its object ID."""
63 oid = blob_id(content)
64 write_object(repo, oid, content)
65 return oid
66
67
68 def _minimal_commit_dict(snap_id: str) -> CommitDict:
69 rid = content_hash({"role": "repo", "snap_id": snap_id})
70 ts = now_utc_iso()
71 return CommitDict(
72 commit_id="a" * 64,
73 repo_id=rid,
74 branch="main",
75 parent_commit_id=None,
76 parent2_commit_id=None,
77 snapshot_id=snap_id,
78 message="test",
79 author="test",
80 committed_at=ts,
81 metadata={},
82 )
83
84
85 def _minimal_snapshot_dict(manifest: Manifest) -> SnapshotDict:
86 from muse.core.ids import hash_snapshot as compute_snapshot_id
87 snap_id = compute_snapshot_id(manifest)
88 ts = now_utc_iso()
89 return SnapshotDict(
90 snapshot_id=snap_id,
91 manifest=manifest,
92 created_at=ts,
93 )
94
95
96 # ---------------------------------------------------------------------------
97 # 1. Hash mismatch injection
98 # ---------------------------------------------------------------------------
99
100
101 class TestHashMismatch:
102 def test_write_object_wrong_content_raises(self, tmp_path: pathlib.Path) -> None:
103 """write_object must reject content whose sha256 ≠ object_id."""
104 repo = _make_repo(tmp_path)
105 legit = b"legitimate content"
106 malicious = b"poisoned content"
107 correct_id = blob_id(legit)
108 with pytest.raises(ValueError, match="Content integrity failure"):
109 write_object(repo, correct_id, malicious)
110 assert not has_object(repo, correct_id), "Poisoned object must not be stored"
111
112 def test_write_object_correct_content_succeeds(self, tmp_path: pathlib.Path) -> None:
113 repo = _make_repo(tmp_path)
114 content = b"valid content"
115 oid = blob_id(content)
116 assert write_object(repo, oid, content) is True
117 assert read_object(repo, oid) == content
118
119 def test_write_object_from_path_wrong_id_raises(self, tmp_path: pathlib.Path) -> None:
120 """write_object_from_path rejects when declared object_id ≠ file hash."""
121 repo = _make_repo(tmp_path)
122 real = tmp_path / "real.bin"
123 real.write_bytes(b"real file content")
124 wrong_id = blob_id(b"different content entirely")
125 with pytest.raises(ValueError, match="Content integrity failure"):
126 write_object_from_path(repo, wrong_id, real)
127 assert not has_object(repo, wrong_id)
128
129 def test_write_object_from_path_correct_id_succeeds(self, tmp_path: pathlib.Path) -> None:
130 repo = _make_repo(tmp_path)
131 content = b"file content"
132 src = tmp_path / "file.bin"
133 src.write_bytes(content)
134 oid = blob_id(content)
135 assert write_object_from_path(repo, oid, src) is True
136 assert has_object(repo, oid)
137
138 def test_all_ones_id_mismatch_raises(self, tmp_path: pathlib.Path) -> None:
139 """Crafted all-hex-ones object_id still caught by hash mismatch."""
140 repo = _make_repo(tmp_path)
141 content = b"something"
142 fake_id = "f" * 64
143 with pytest.raises(ValueError):
144 write_object(repo, fake_id, content)
145
146 def test_empty_object_valid(self, tmp_path: pathlib.Path) -> None:
147 """Zero-byte content is a valid object — sha256 of empty bytes."""
148 repo = _make_repo(tmp_path)
149 empty_id = blob_id(b"") # e3b0c44...
150 assert write_object(repo, empty_id, b"") is True
151 assert read_object(repo, empty_id) == b""
152
153 def test_invalid_object_id_format_raises(self, tmp_path: pathlib.Path) -> None:
154 repo = _make_repo(tmp_path)
155 with pytest.raises((ValueError, TypeError)):
156 write_object(repo, "not-a-hex-id", b"content")
157 with pytest.raises((ValueError, TypeError)):
158 write_object(repo, "a" * 63, b"content") # one char short
159 with pytest.raises((ValueError, TypeError)):
160 write_object(repo, "G" * 64, b"content") # uppercase hex (invalid)
161
162
163 # ---------------------------------------------------------------------------
164 # 2. Per-object size cap on write
165 # ---------------------------------------------------------------------------
166
167
168 class TestObjectSizeCap:
169 def test_oversized_content_rejected_at_write(self, tmp_path: pathlib.Path) -> None:
170 """write_object must reject blobs above MAX_OBJECT_WRITE_BYTES."""
171 repo = _make_repo(tmp_path)
172 # Build oversized content (just above limit).
173 oversized = b"x" * (MAX_OBJECT_WRITE_BYTES + 1)
174 oid = blob_id(oversized)
175 with pytest.raises(ValueError, match="exceeding the"):
176 write_object(repo, oid, oversized)
177 assert not has_object(repo, oid), "Oversized object must not be stored"
178
179 def test_exactly_at_limit_is_rejected(self, tmp_path: pathlib.Path) -> None:
180 """An object of exactly MAX_OBJECT_WRITE_BYTES + 1 bytes is rejected."""
181 repo = _make_repo(tmp_path)
182 # MAX_OBJECT_WRITE_BYTES itself is the ceiling — bytes > limit are rejected.
183 oversized = b"y" * (MAX_OBJECT_WRITE_BYTES + 1)
184 oid = blob_id(oversized)
185 with pytest.raises(ValueError):
186 write_object(repo, oid, oversized)
187
188 def test_write_object_from_path_oversized_raises(self, tmp_path: pathlib.Path) -> None:
189 """write_object_from_path must stat and reject oversized source files."""
190 repo = _make_repo(tmp_path)
191 big_file = tmp_path / "big.bin"
192 # Create a sparse file that appears large without using disk space.
193 with big_file.open("wb") as fh:
194 fh.seek(MAX_OBJECT_WRITE_BYTES)
195 fh.write(b"\x00")
196 oid = hash_file(big_file)
197 with pytest.raises(ValueError, match="exceeding the"):
198 write_object_from_path(repo, oid, big_file)
199 assert not has_object(repo, oid)
200
201 def test_just_under_limit_succeeds(self, tmp_path: pathlib.Path) -> None:
202 """An object of exactly MAX_OBJECT_WRITE_BYTES bytes is accepted."""
203 repo = _make_repo(tmp_path)
204 # Use a tiny blob to not exhaust memory in CI — just verify the boundary.
205 tiny = b"t" * 16
206 oid = blob_id(tiny)
207 assert write_object(repo, oid, tiny) is True
208
209
210 # ---------------------------------------------------------------------------
211 # 3. restore_object — hash re-verification before copy
212 # ---------------------------------------------------------------------------
213
214
215 class TestRestoreObjectIntegrity:
216 def test_restore_clean_object_succeeds(self, tmp_path: pathlib.Path) -> None:
217 repo = _make_repo(tmp_path)
218 content = b"data to restore"
219 oid = _stored_object(repo, content)
220 dest = tmp_path / "restored.bin"
221 assert restore_object(repo, oid, dest) is True
222 assert dest.read_bytes() == content
223
224 def test_restore_missing_object_returns_false(self, tmp_path: pathlib.Path) -> None:
225 repo = _make_repo(tmp_path)
226 ghost_id = blob_id(b"ghost")
227 dest = tmp_path / "ghost.bin"
228 assert restore_object(repo, ghost_id, dest) is False
229 assert not dest.exists()
230
231 def test_restore_detects_corrupted_store_object(self, tmp_path: pathlib.Path) -> None:
232 """If the on-disk object file is corrupted, restore_object must raise OSError."""
233 repo = _make_repo(tmp_path)
234 content = b"important file content"
235 oid = _stored_object(repo, content)
236
237 # Corrupt the object file directly (bypass the immutable mode).
238 from muse.core.object_store import _object_path_with_fallback
239 obj_file = _object_path_with_fallback(repo, oid)
240 os.chmod(obj_file, 0o644)
241 obj_file.write_bytes(b"corrupted bytes that do not match the declared hash")
242 os.chmod(obj_file, 0o444)
243
244 dest = tmp_path / "should-not-exist.bin"
245 with pytest.raises(OSError, match="failed SHA-256 integrity check"):
246 restore_object(repo, oid, dest)
247 assert not dest.exists(), "No corrupted data must reach the working tree"
248
249 def test_restore_dest_is_writable(self, tmp_path: pathlib.Path) -> None:
250 """Restored files must be writable (0o444 object mode must not propagate)."""
251 repo = _make_repo(tmp_path)
252 content = b"editable file"
253 oid = _stored_object(repo, content)
254 dest = tmp_path / "editable.txt"
255 restore_object(repo, oid, dest)
256 # Should be writable by owner.
257 dest.write_bytes(b"new content") # must not raise PermissionError
258
259 def test_restore_is_atomic(self, tmp_path: pathlib.Path) -> None:
260 """A concurrent reader never sees a partial restore."""
261 repo = _make_repo(tmp_path)
262 content = b"atomic restore test " + b"x" * 1000
263 oid = _stored_object(repo, content)
264 dest = tmp_path / "atomic.bin"
265 restore_object(repo, oid, dest)
266 assert dest.read_bytes() == content
267
268
269 # ---------------------------------------------------------------------------
270 # 4 & 5. apply_mpack — pack-bomb and per-object size cap
271 # ---------------------------------------------------------------------------
272
273
274 class TestApplyMPackBomb:
275 def _build_mpack(
276 self,
277 *,
278 n_objects: int = 0,
279 n_snapshots: int = 0,
280 n_commits: int = 0,
281 object_size: int = 1,
282 ) -> MPack:
283 objects = []
284 for i in range(n_objects):
285 content = f"object-{i}".encode() + b"\x00" * object_size
286 oid = blob_id(content)
287 objects.append({"object_id": oid, "content": content})
288 return MPack(
289 commits=[],
290 snapshots=[],
291 objects=objects,
292 )
293
294 def test_pack_at_limit_succeeds(self, tmp_path: pathlib.Path) -> None:
295 """A pack with exactly MAX_PACK_OBJECTS items (objects + snapshots + commits) is accepted."""
296 repo = _make_repo(tmp_path)
297 # Use a small object count that is within the limit.
298 n = min(10, MAX_PACK_OBJECTS)
299 mpack = self._build_mpack(n_objects=n)
300 result = apply_mpack(repo, mpack)
301 assert result["objects_written"] == n
302
303 def test_pack_exceeds_limit_raises(self, tmp_path: pathlib.Path) -> None:
304 """A pack with total items > MAX_PACK_OBJECTS must be rejected."""
305 repo = _make_repo(tmp_path)
306 # Build a fake mpack that claims MAX_PACK_OBJECTS + 1 items.
307 # We don't actually need the objects to be real — the count check fires first.
308 fake_obj = {"object_id": "a" * 64, "content": b"x"}
309 oversized_bundle: MPack = MPack(
310 commits=[],
311 snapshots=[],
312 objects=[fake_obj] * (MAX_PACK_OBJECTS + 1),
313 )
314 with pytest.raises(ValueError, match="exceeds the"):
315 apply_mpack(repo, oversized_bundle)
316
317 def test_oversized_object_in_pack_is_skipped(self, tmp_path: pathlib.Path) -> None:
318 """An object in the pack that exceeds MAX_OBJECT_WRITE_BYTES is logged and skipped."""
319 repo = _make_repo(tmp_path)
320 big_content = b"B" * (MAX_OBJECT_WRITE_BYTES + 1)
321 big_oid = blob_id(big_content)
322 tiny_content = b"tiny object"
323 tiny_oid = blob_id(tiny_content)
324 mpack: MPack = MPack(
325 commits=[],
326 snapshots=[],
327 objects=[
328 {"object_id": big_oid, "content": big_content},
329 {"object_id": tiny_oid, "content": tiny_content},
330 ],
331 )
332 result = apply_mpack(repo, mpack)
333 # Big object must be skipped, tiny object must be written.
334 assert not has_object(repo, big_oid), "Oversized object must not be stored"
335 assert has_object(repo, tiny_oid), "Valid object must be stored"
336 assert result["objects_written"] == 1
337
338 def test_zero_item_pack_is_accepted(self, tmp_path: pathlib.Path) -> None:
339 repo = _make_repo(tmp_path)
340 empty: MPack = MPack(commits=[], snapshots=[], objects=[])
341 result = apply_mpack(repo, empty)
342 assert result == ApplyResult(
343 commits_written=0,
344 snapshots_written=0,
345 objects_written=0,
346 objects_skipped=0,
347 tags_written=0,
348 failed_objects=[],
349 skipped_snapshots=[],
350 )
351
352
353 # ---------------------------------------------------------------------------
354 # 6. apply_mpack — object-ID deduplication
355 # ---------------------------------------------------------------------------
356
357
358 class TestApplyPackDeduplication:
359 def test_duplicate_object_ids_not_hashed_twice(self, tmp_path: pathlib.Path) -> None:
360 """Duplicate object IDs in the pack are skipped without re-computing sha256."""
361 repo = _make_repo(tmp_path)
362 content = b"dedup test object"
363 oid = blob_id(content)
364 # Send the same object 100 times.
365 mpack: MPack = MPack(
366 commits=[],
367 snapshots=[],
368 objects=[{"object_id": oid, "content": content}] * 100,
369 )
370 result = apply_mpack(repo, mpack)
371 assert result["objects_written"] == 1
372 assert result["objects_skipped"] == 99
373 assert has_object(repo, oid)
374
375 def test_duplicate_then_different_both_processed(self, tmp_path: pathlib.Path) -> None:
376 repo = _make_repo(tmp_path)
377 c1 = b"first object"
378 c2 = b"second object"
379 o1 = blob_id(c1)
380 o2 = blob_id(c2)
381 mpack: MPack = MPack(
382 commits=[],
383 snapshots=[],
384 objects=[
385 {"object_id": o1, "content": c1},
386 {"object_id": o1, "content": c1}, # duplicate
387 {"object_id": o2, "content": c2},
388 ],
389 )
390 result = apply_mpack(repo, mpack)
391 assert result["objects_written"] == 2
392 assert result["objects_skipped"] == 1
393
394
395 # ---------------------------------------------------------------------------
396 # 7. apply_mpack — malformed entries are isolated (snapshot / commit)
397 # ---------------------------------------------------------------------------
398
399
400 class TestApplyPackMalformedEntries:
401 def test_malformed_object_entry_does_not_abort_pack(self, tmp_path: pathlib.Path) -> None:
402 """A bad object entry is logged and skipped; other entries are still written.
403
404 Note: deduplication means each object_id is only attempted once per
405 apply_mpack call. Two entries with the same object_id but different
406 content are impossible in a valid content-addressed store — if the
407 first attempt fails (hash mismatch or malformed ID), the second
408 attempt for the same ID is correctly deduplicated. Use distinct IDs
409 to test that bad entries do not prevent good ones from being written.
410 """
411 repo = _make_repo(tmp_path)
412 good_content_a = b"good object A"
413 good_oid_a = blob_id(good_content_a)
414 good_content_b = b"good object B"
415 good_oid_b = blob_id(good_content_b)
416 mpack: MPack = MPack(
417 commits=[],
418 snapshots=[],
419 objects=[
420 {"object_id": "not-hex", "content": b"bad"}, # malformed ID
421 {"object_id": good_oid_a, "content": b"wrong bytes"}, # hash mismatch
422 {"object_id": good_oid_b, "content": good_content_b}, # valid different OID
423 ],
424 )
425 result = apply_mpack(repo, mpack)
426 assert not has_object(repo, good_oid_a), "Hash-mismatched entry must not be stored"
427 assert has_object(repo, good_oid_b), "Valid entry after bad ones must be stored"
428 assert result["objects_written"] == 1
429
430 def test_missing_object_id_in_pack_entry_skipped(self, tmp_path: pathlib.Path) -> None:
431 repo = _make_repo(tmp_path)
432 mpack: MPack = MPack(
433 commits=[],
434 snapshots=[],
435 objects=[{"object_id": "", "content": b"anything"}],
436 )
437 result = apply_mpack(repo, mpack)
438 assert result["objects_written"] == 0
439
440 def test_empty_content_in_pack_entry_skipped(self, tmp_path: pathlib.Path) -> None:
441 """An entry with empty content (b'') and any oid is skipped (not-oid check)."""
442 repo = _make_repo(tmp_path)
443 from muse.core.mpack import ObjectPayload
444 # An entry with empty oid and empty content has no oid — should be skipped.
445 empty_entry = ObjectPayload(object_id="", content=b"")
446 mpack: MPack = MPack(commits=[], snapshots=[], objects=[empty_entry])
447 result = apply_mpack(repo, mpack)
448 assert result["objects_written"] == 0
449
450
451 # ---------------------------------------------------------------------------
452 # 8. read_object — corruption detected on every read
453 # ---------------------------------------------------------------------------
454
455
456 class TestReadObjectIntegrity:
457 def test_read_clean_object_succeeds(self, tmp_path: pathlib.Path) -> None:
458 repo = _make_repo(tmp_path)
459 content = b"clean read test"
460 oid = _stored_object(repo, content)
461 assert read_object(repo, oid) == content
462
463 def test_read_corrupted_object_raises(self, tmp_path: pathlib.Path) -> None:
464 repo = _make_repo(tmp_path)
465 content = b"will be corrupted"
466 oid = _stored_object(repo, content)
467 from muse.core.object_store import _object_path_with_fallback
468 obj_file = _object_path_with_fallback(repo, oid)
469 os.chmod(obj_file, 0o644)
470 obj_file.write_bytes(b"corrupted bytes")
471 os.chmod(obj_file, 0o444)
472 with pytest.raises(OSError, match="integrity check"):
473 read_object(repo, oid)
474
475 def test_read_absent_object_returns_none(self, tmp_path: pathlib.Path) -> None:
476 repo = _make_repo(tmp_path)
477 assert read_object(repo, blob_id(b"absent")) is None
478
479
480 # ---------------------------------------------------------------------------
481 # 9. Confirmed: all write_object callsites use content-derived IDs
482 # ---------------------------------------------------------------------------
483
484
485 class TestCallsiteIntegrity:
486 def test_hash_object_stdin_derives_id_from_content(self, tmp_path: pathlib.Path) -> None:
487 """hash-object with --write derives object_id from actual stdin bytes."""
488 from tests.cli_test_helper import CliRunner
489 repo = _make_repo(tmp_path)
490 (config_toml_path(repo)).write_text("[core]\nauthor = \"test\"\n")
491 content = b"stdin content for hashing"
492 expected_oid = blob_id(content)
493 runner = CliRunner()
494 result = runner.invoke(
495 None,
496 ["hash-object", "--stdin", "--write"],
497 input=content,
498 env={"MUSE_REPO_ROOT": str(repo)},
499 )
500 assert result.exit_code == 0, result.output
501 assert expected_oid in result.output
502 assert has_object(repo, expected_oid)
503
504 def test_hash_object_file_derives_id_from_file_content(self, tmp_path: pathlib.Path) -> None:
505 """hash-object with a file path derives object_id from actual file bytes."""
506 from tests.cli_test_helper import CliRunner
507 repo = _make_repo(tmp_path)
508 (config_toml_path(repo)).write_text("[core]\nauthor = \"test\"\n")
509 content = b"file content for hashing"
510 target = tmp_path / "target.bin"
511 target.write_bytes(content)
512 expected_oid = blob_id(content)
513 runner = CliRunner()
514 result = runner.invoke(
515 None,
516 ["hash-object", str(target), "--write"],
517 env={"MUSE_REPO_ROOT": str(repo)},
518 )
519 assert result.exit_code == 0, result.output
520 assert expected_oid in result.output
521 assert has_object(repo, expected_oid)
522
523 def test_unpack_objects_hash_mismatch_rejected(self, tmp_path: pathlib.Path) -> None:
524 """muse unpack-objects rejects a pack object with wrong hash."""
525 from tests.cli_test_helper import CliRunner
526 repo = _make_repo(tmp_path)
527 (config_toml_path(repo)).write_text("[core]\nauthor = \"test\"\n")
528 legit_content = b"legitimate"
529 legit_oid = blob_id(legit_content)
530
531 # apply_mpack directly to test the core logic.
532 mpack: MPack = MPack(
533 commits=[], snapshots=[],
534 objects=[{"object_id": legit_oid, "content": b"malicious bytes"}],
535 )
536 result = apply_mpack(repo, mpack)
537 # The poisoned object should be skipped (hash mismatch caught by write_object).
538 assert not has_object(repo, legit_oid), "Poisoned object must not enter the store"
539 assert result["objects_written"] == 0
540
541
542 # ---------------------------------------------------------------------------
543 # 10. Stress: 10 000-object pack processed within time budget
544 # ---------------------------------------------------------------------------
545
546
547 class TestStress:
548 @pytest.fixture(autouse=True)
549 def no_fsync(self) -> None:
550 """Mock fsync so the budget test measures algorithmic cost, not I/O latency."""
551 with patch("muse.core.object_store._fsync_fd", return_value=None), \
552 patch("muse.core.store.os.fsync", return_value=None), \
553 patch("muse.core.store.fcntl.fcntl", return_value=0):
554 yield
555
556 @pytest.mark.perf
557 def test_10k_object_pack_within_budget(self, tmp_path: pathlib.Path) -> None:
558 """10 000 unique objects written through apply_mpack in under 30 seconds."""
559 repo = _make_repo(tmp_path)
560 n = 10_000
561 objects = []
562 for i in range(n):
563 content = f"stress-object-{i:06d}".encode()
564 oid = blob_id(content)
565 objects.append({"object_id": oid, "content": content})
566
567 mpack: MPack = MPack(commits=[], snapshots=[], objects=objects)
568 start = time.monotonic()
569 result = apply_mpack(repo, mpack)
570 elapsed = time.monotonic() - start
571
572 assert result["objects_written"] == n
573 assert elapsed < 30.0, f"10k-object pack took {elapsed:.1f}s — too slow"
574
575 def test_idempotent_10k_pack_fast(self, tmp_path: pathlib.Path) -> None:
576 """Re-applying the same 10k pack is faster (all objects already present)."""
577 repo = _make_repo(tmp_path)
578 n = 1_000 # smaller for the idempotency test
579 objects = []
580 for i in range(n):
581 content = f"idem-object-{i:06d}".encode()
582 oid = blob_id(content)
583 objects.append({"object_id": oid, "content": content})
584
585 mpack: MPack = MPack(commits=[], snapshots=[], objects=objects)
586 apply_mpack(repo, mpack) # first application
587 result2 = apply_mpack(repo, mpack) # second application
588 assert result2["objects_written"] == 0
589 assert result2["objects_skipped"] == n
590
591 def test_10k_duplicate_ids_deduplicated(self, tmp_path: pathlib.Path) -> None:
592 """10 000 entries with the same object_id are deduplicated to one write."""
593 repo = _make_repo(tmp_path)
594 content = b"one true object"
595 oid = blob_id(content)
596 mpack: MPack = MPack(
597 commits=[],
598 snapshots=[],
599 objects=[{"object_id": oid, "content": content}] * 10_000,
600 )
601 result = apply_mpack(repo, mpack)
602 assert result["objects_written"] == 1
603 assert result["objects_skipped"] == 9_999
604
605
606 # ---------------------------------------------------------------------------
607 # 11. Concurrent poisoning stress
608 # ---------------------------------------------------------------------------
609
610
611 class TestConcurrentPoisoning:
612 def test_concurrent_hash_mismatch_attempts_do_not_corrupt(
613 self, tmp_path: pathlib.Path
614 ) -> None:
615 """50 threads simultaneously trying to poison the store — none succeeds."""
616 repo = _make_repo(tmp_path)
617 legit_content = b"the one true content"
618 legit_oid = blob_id(legit_content)
619
620 # Write the legitimate object first.
621 write_object(repo, legit_oid, legit_content)
622
623 errors: list[str] = []
624
625 def poison_attempt(idx: int) -> None:
626 malicious_content = f"malicious-{idx}".encode()
627 try:
628 write_object(repo, legit_oid, malicious_content)
629 errors.append(f"Thread {idx}: poisoning succeeded!")
630 except ValueError:
631 pass # expected
632
633 threads = [threading.Thread(target=poison_attempt, args=(i,)) for i in range(50)]
634 for t in threads:
635 t.start()
636 for t in threads:
637 t.join(timeout=5.0)
638
639 assert not errors, "\n".join(errors)
640 # The stored object must still be the legitimate one.
641 assert read_object(repo, legit_oid) == legit_content
642
643 def test_concurrent_writes_of_same_object_idempotent(
644 self, tmp_path: pathlib.Path
645 ) -> None:
646 """50 threads writing the same valid object — exactly one write, no corruption."""
647 repo = _make_repo(tmp_path)
648 content = b"concurrent valid object"
649 oid = blob_id(content)
650 results: list[bool] = []
651 lock = threading.Lock()
652
653 def write_it() -> None:
654 wrote = write_object(repo, oid, content)
655 with lock:
656 results.append(wrote)
657
658 threads = [threading.Thread(target=write_it) for _ in range(50)]
659 for t in threads:
660 t.start()
661 for t in threads:
662 t.join(timeout=5.0)
663
664 assert results.count(True) >= 1, "At least one thread must have written"
665 assert read_object(repo, oid) == content
666
667
668 # ---------------------------------------------------------------------------
669 # 12. SHA-256 threat model documentation test
670 # ---------------------------------------------------------------------------
671
672
673 class TestSHA256ThreatModel:
674 def test_sha256_preimage_resistance_documented(self) -> None:
675 """Document that SHA-256 preimage resistance is the security boundary.
676
677 Muse's object store is secure against hash-mismatch injection because:
678 1. write_object computes sha256(content) and rejects any mismatch.
679 2. read_object recomputes sha256 on every read.
680 3. restore_object recomputes sha256 before copying to working tree.
681
682 A successful poisoning attack would require finding a second preimage:
683 a different content M' such that sha256(M') == sha256(M).
684
685 As of 2026, the best known second-preimage attack on SHA-256 requires
686 2^256 operations — computationally infeasible for any adversary.
687
688 This test is a living specification of the threat model, not a
689 cryptographic proof. It verifies the code paths enforce the model.
690 """
691 content_a = b"message A"
692 content_b = b"message B"
693 # Two different messages must have different SHA-256 digests.
694 # (With overwhelming probability — hash collision is computationally
695 # infeasible but not theoretically impossible.)
696 assert blob_id(content_a) != blob_id(content_b)
697
698 def test_write_then_read_roundtrip_preserves_content(
699 self, tmp_path: pathlib.Path
700 ) -> None:
701 """Content written to the store is always returned verbatim on read."""
702 repo = _make_repo(tmp_path)
703 for i in range(20):
704 content = f"stress-content-{i}".encode() * (i + 1)
705 oid = blob_id(content)
706 write_object(repo, oid, content)
707 assert read_object(repo, oid) == content
708
709 def test_object_mode_is_immutable(self, tmp_path: pathlib.Path) -> None:
710 """Stored objects have mode 0o444 — expressing immutability at OS level."""
711 repo = _make_repo(tmp_path)
712 content = b"immutable object"
713 oid = _stored_object(repo, content)
714 from muse.core.object_store import _object_path_with_fallback
715 obj_file = _object_path_with_fallback(repo, oid)
716 mode = oct(obj_file.stat().st_mode & 0o777)
717 assert mode == oct(0o444), f"Expected 0o444, got {mode}"
File History 1 commit
sha256:fe844c2411edd1cec3d4c847f36a96c6ccd4e3d7d1a715106d2ecd64216bf94f fix: bare object detection and read recovery; rm adapter files Sonnet 4.6 minor 15 days ago