tests/test_security_object_store_poisoning.py · gabriel/muse

1

"""Phase 2.3 — Object store poisoning tests.

2

3

Covers every adversarial input and edge case identified in the recon phase:

4

5

1. Hash mismatch injection into write_object / write_object_from_path.

6

2. Per-object size cap enforcement at write time (not just read time).

7

3. restore_object re-hashes source before copying — corrupt store is detected.

8

4. apply_mpack: object count limit (pack-bomb).

9

5. apply_mpack: per-object size cap before write_object is called.

10

6. apply_mpack: object-ID deduplication (sha256 O(1) for duplicate IDs).

11

7. apply_mpack: snapshot / commit isolation — malformed entries skipped.

12

8. Zero-byte objects: valid empty blobs are accepted.

13

9. All write_object callsites confirmed to use content-derived IDs.

14

10. Stress: 10 000-object pack processed within time budget.

15

11. Stress: 50 concurrent poisoning attempts do not corrupt the store.

16

12. Threat-model boundary: SHA-256 collision infeasibility documented via test.

17

"""

18

19

from __future__ import annotations

import os

import pathlib

import tempfile

import threading

import time

import pytest

from unittest.mock import patch

29

30

from muse.core.object_store import (

has_object,

read_object,

restore_object,

write_object,

write_object_from_path,

36

)

37

from muse.core.mpack import ApplyResult, MPack, apply_mpack

38

from muse.core.store import CommitDict, SnapshotDict

39

from muse.core.validation import MAX_OBJECT_WRITE_BYTES, MAX_PACK_OBJECTS

40

from muse.core.types import Manifest, blob_id, content_hash, hash_file, long_id, now_utc_iso

41

from muse.core.paths import config_toml_path, muse_dir

42

43

44

# ---------------------------------------------------------------------------

45

# Helpers

46

# ---------------------------------------------------------------------------

def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:

51

repo = tmp_path / "repo"

52

repo.mkdir()

53

muse = muse_dir(repo)

54

for sub in ("objects", "commits", "snapshots", "refs", "refs/heads", "tags"):

55

(muse / sub).mkdir(parents=True)

56

(muse / "HEAD").write_text("ref: refs/heads/main\n")

57

(muse / "repo.json").write_text('{"repo_id": "test-repo"}')

return repo

def _stored_object(repo: pathlib.Path, content: bytes) -> str:

62

"""Write content to the store and return its object ID."""

63

oid = blob_id(content)

64

write_object(repo, oid, content)

return oid

def _minimal_commit_dict(snap_id: str) -> CommitDict:

69

rid = content_hash({"role": "repo", "snap_id": snap_id})

ts = now_utc_iso()

return CommitDict(

commit_id="a" * 64,

repo_id=rid,

branch="main",

parent_commit_id=None,

76

parent2_commit_id=None,

snapshot_id=snap_id,

message="test",

author="test",

committed_at=ts,

metadata={},

)

def _minimal_snapshot_dict(manifest: Manifest) -> SnapshotDict:

86

from muse.core.ids import hash_snapshot as compute_snapshot_id

87

snap_id = compute_snapshot_id(manifest)

ts = now_utc_iso()

return SnapshotDict(

snapshot_id=snap_id,

manifest=manifest,

created_at=ts,

)

# ---------------------------------------------------------------------------

97

# 1. Hash mismatch injection

98

# ---------------------------------------------------------------------------

99

100

101

class TestHashMismatch:

102

def test_write_object_wrong_content_raises(self, tmp_path: pathlib.Path) -> None:

103

"""write_object must reject content whose sha256 ≠ object_id."""

104

repo = _make_repo(tmp_path)

105

legit = b"legitimate content"

106

malicious = b"poisoned content"

107

correct_id = blob_id(legit)

108

with pytest.raises(ValueError, match="Content integrity failure"):

109

write_object(repo, correct_id, malicious)

110

assert not has_object(repo, correct_id), "Poisoned object must not be stored"

111

112

def test_write_object_correct_content_succeeds(self, tmp_path: pathlib.Path) -> None:

113

repo = _make_repo(tmp_path)

114

content = b"valid content"

115

oid = blob_id(content)

116

assert write_object(repo, oid, content) is True

117

assert read_object(repo, oid) == content

118

119

def test_write_object_from_path_wrong_id_raises(self, tmp_path: pathlib.Path) -> None:

120

"""write_object_from_path rejects when declared object_id ≠ file hash."""

121

repo = _make_repo(tmp_path)

122

real = tmp_path / "real.bin"

123

real.write_bytes(b"real file content")

124

wrong_id = blob_id(b"different content entirely")

125

with pytest.raises(ValueError, match="Content integrity failure"):

126

write_object_from_path(repo, wrong_id, real)

127

assert not has_object(repo, wrong_id)

128

129

def test_write_object_from_path_correct_id_succeeds(self, tmp_path: pathlib.Path) -> None:

130

repo = _make_repo(tmp_path)

131

content = b"file content"

132

src = tmp_path / "file.bin"

133

src.write_bytes(content)

134

oid = blob_id(content)

135

assert write_object_from_path(repo, oid, src) is True

136

assert has_object(repo, oid)

137

138

def test_all_ones_id_mismatch_raises(self, tmp_path: pathlib.Path) -> None:

139

"""Crafted all-hex-ones object_id still caught by hash mismatch."""

140

repo = _make_repo(tmp_path)

141

content = b"something"

142

fake_id = "f" * 64

143

with pytest.raises(ValueError):

144

write_object(repo, fake_id, content)

145

146

def test_empty_object_valid(self, tmp_path: pathlib.Path) -> None:

147

"""Zero-byte content is a valid object — sha256 of empty bytes."""

148

repo = _make_repo(tmp_path)

149

empty_id = blob_id(b"") # e3b0c44...

150

assert write_object(repo, empty_id, b"") is True

151

assert read_object(repo, empty_id) == b""

152

153

def test_invalid_object_id_format_raises(self, tmp_path: pathlib.Path) -> None:

154

repo = _make_repo(tmp_path)

155

with pytest.raises((ValueError, TypeError)):

156

write_object(repo, "not-a-hex-id", b"content")

157

with pytest.raises((ValueError, TypeError)):

158

write_object(repo, "a" * 63, b"content") # one char short

159

with pytest.raises((ValueError, TypeError)):

160

write_object(repo, "G" * 64, b"content") # uppercase hex (invalid)

161

162

163

# ---------------------------------------------------------------------------

164

# 2. Per-object size cap on write

165

# ---------------------------------------------------------------------------

166

167

168

class TestObjectSizeCap:

169

def test_oversized_content_rejected_at_write(self, tmp_path: pathlib.Path) -> None:

170

"""write_object must reject blobs above MAX_OBJECT_WRITE_BYTES."""

171

repo = _make_repo(tmp_path)

172

# Build oversized content (just above limit).

173

oversized = b"x" * (MAX_OBJECT_WRITE_BYTES + 1)

174

oid = blob_id(oversized)

175

with pytest.raises(ValueError, match="exceeding the"):

176

write_object(repo, oid, oversized)

177

assert not has_object(repo, oid), "Oversized object must not be stored"

178

179

def test_exactly_at_limit_is_rejected(self, tmp_path: pathlib.Path) -> None:

180

"""An object of exactly MAX_OBJECT_WRITE_BYTES + 1 bytes is rejected."""

181

repo = _make_repo(tmp_path)

182

# MAX_OBJECT_WRITE_BYTES itself is the ceiling — bytes > limit are rejected.

183

oversized = b"y" * (MAX_OBJECT_WRITE_BYTES + 1)

184

oid = blob_id(oversized)

185

with pytest.raises(ValueError):

186

write_object(repo, oid, oversized)

187

188

def test_write_object_from_path_oversized_raises(self, tmp_path: pathlib.Path) -> None:

189

"""write_object_from_path must stat and reject oversized source files."""

190

repo = _make_repo(tmp_path)

191

big_file = tmp_path / "big.bin"

192

# Create a sparse file that appears large without using disk space.

193

with big_file.open("wb") as fh:

194

fh.seek(MAX_OBJECT_WRITE_BYTES)

195

fh.write(b"\x00")

196

oid = hash_file(big_file)

197

with pytest.raises(ValueError, match="exceeding the"):

198

write_object_from_path(repo, oid, big_file)

199

assert not has_object(repo, oid)

200

201

def test_just_under_limit_succeeds(self, tmp_path: pathlib.Path) -> None:

202

"""An object of exactly MAX_OBJECT_WRITE_BYTES bytes is accepted."""

203

repo = _make_repo(tmp_path)

204

# Use a tiny blob to not exhaust memory in CI — just verify the boundary.

205

tiny = b"t" * 16

206

oid = blob_id(tiny)

207

assert write_object(repo, oid, tiny) is True

208

209

210

# ---------------------------------------------------------------------------

211

# 3. restore_object — hash re-verification before copy

212

# ---------------------------------------------------------------------------

213

214

215

class TestRestoreObjectIntegrity:

216

def test_restore_clean_object_succeeds(self, tmp_path: pathlib.Path) -> None:

217

repo = _make_repo(tmp_path)

218

content = b"data to restore"

219

oid = _stored_object(repo, content)

220

dest = tmp_path / "restored.bin"

221

assert restore_object(repo, oid, dest) is True

222

assert dest.read_bytes() == content

223

224

def test_restore_missing_object_returns_false(self, tmp_path: pathlib.Path) -> None:

225

repo = _make_repo(tmp_path)

226

ghost_id = blob_id(b"ghost")

227

dest = tmp_path / "ghost.bin"

228

assert restore_object(repo, ghost_id, dest) is False

229

assert not dest.exists()

230

231

def test_restore_detects_corrupted_store_object(self, tmp_path: pathlib.Path) -> None:

232

"""If the on-disk object file is corrupted, restore_object must raise OSError."""

233

repo = _make_repo(tmp_path)

234

content = b"important file content"

235

oid = _stored_object(repo, content)

236

237

# Corrupt the object file directly (bypass the immutable mode).

238

from muse.core.object_store import _object_path_with_fallback

239

obj_file = _object_path_with_fallback(repo, oid)

240

os.chmod(obj_file, 0o644)

241

obj_file.write_bytes(b"corrupted bytes that do not match the declared hash")

242

os.chmod(obj_file, 0o444)

243

244

dest = tmp_path / "should-not-exist.bin"

245

with pytest.raises(OSError, match="failed SHA-256 integrity check"):

246

restore_object(repo, oid, dest)

247

assert not dest.exists(), "No corrupted data must reach the working tree"

248

249

def test_restore_dest_is_writable(self, tmp_path: pathlib.Path) -> None:

250

"""Restored files must be writable (0o444 object mode must not propagate)."""

251

repo = _make_repo(tmp_path)

252

content = b"editable file"

253

oid = _stored_object(repo, content)

254

dest = tmp_path / "editable.txt"

255

restore_object(repo, oid, dest)

256

# Should be writable by owner.

257

dest.write_bytes(b"new content") # must not raise PermissionError

258

259

def test_restore_is_atomic(self, tmp_path: pathlib.Path) -> None:

260

"""A concurrent reader never sees a partial restore."""

261

repo = _make_repo(tmp_path)

262

content = b"atomic restore test " + b"x" * 1000

263

oid = _stored_object(repo, content)

264

dest = tmp_path / "atomic.bin"

265

restore_object(repo, oid, dest)

266

assert dest.read_bytes() == content

267

268

269

# ---------------------------------------------------------------------------

270

# 4 & 5. apply_mpack — pack-bomb and per-object size cap

271

# ---------------------------------------------------------------------------

272

273

274

class TestApplyMPackBomb:

def _build_mpack(

self,

*,

n_objects: int = 0,

n_snapshots: int = 0,

280

n_commits: int = 0,

281

object_size: int = 1,

282

) -> MPack:

283

objects = []

284

for i in range(n_objects):

285

content = f"object-{i}".encode() + b"\x00" * object_size

286

oid = blob_id(content)

287

objects.append({"object_id": oid, "content": content})

return MPack(

commits=[],

snapshots=[],

objects=objects,

)

def test_pack_at_limit_succeeds(self, tmp_path: pathlib.Path) -> None:

295

"""A pack with exactly MAX_PACK_OBJECTS items (objects + snapshots + commits) is accepted."""

296

repo = _make_repo(tmp_path)

297

# Use a small object count that is within the limit.

298

n = min(10, MAX_PACK_OBJECTS)

299

mpack = self._build_mpack(n_objects=n)

300

result = apply_mpack(repo, mpack)

301

assert result["objects_written"] == n

302

303

def test_pack_exceeds_limit_raises(self, tmp_path: pathlib.Path) -> None:

304

"""A pack with total items > MAX_PACK_OBJECTS must be rejected."""

305

repo = _make_repo(tmp_path)

306

# Build a fake mpack that claims MAX_PACK_OBJECTS + 1 items.

307

# We don't actually need the objects to be real — the count check fires first.

308

fake_obj = {"object_id": "a" * 64, "content": b"x"}

309

oversized_bundle: MPack = MPack(

310

commits=[],

311

snapshots=[],

312

objects=[fake_obj] * (MAX_PACK_OBJECTS + 1),

313

)

314

with pytest.raises(ValueError, match="exceeds the"):

315

apply_mpack(repo, oversized_bundle)

316

317

def test_oversized_object_in_pack_is_skipped(self, tmp_path: pathlib.Path) -> None:

318

"""An object in the pack that exceeds MAX_OBJECT_WRITE_BYTES is logged and skipped."""

319

repo = _make_repo(tmp_path)

320

big_content = b"B" * (MAX_OBJECT_WRITE_BYTES + 1)

321

big_oid = blob_id(big_content)

322

tiny_content = b"tiny object"

323

tiny_oid = blob_id(tiny_content)

324

mpack: MPack = MPack(

commits=[],

snapshots=[],

objects=[

{"object_id": big_oid, "content": big_content},

329

{"object_id": tiny_oid, "content": tiny_content},

330

],

331

)

332

result = apply_mpack(repo, mpack)

333

# Big object must be skipped, tiny object must be written.

334

assert not has_object(repo, big_oid), "Oversized object must not be stored"

335

assert has_object(repo, tiny_oid), "Valid object must be stored"

336

assert result["objects_written"] == 1

337

338

def test_zero_item_pack_is_accepted(self, tmp_path: pathlib.Path) -> None:

339

repo = _make_repo(tmp_path)

340

empty: MPack = MPack(commits=[], snapshots=[], objects=[])

341

result = apply_mpack(repo, empty)

342

assert result == ApplyResult(

commits_written=0,

snapshots_written=0,

objects_written=0,

objects_skipped=0,

tags_written=0,

failed_objects=[],

skipped_snapshots=[],

)

# ---------------------------------------------------------------------------

354

# 6. apply_mpack — object-ID deduplication

355

# ---------------------------------------------------------------------------

356

357

358

class TestApplyPackDeduplication:

359

def test_duplicate_object_ids_not_hashed_twice(self, tmp_path: pathlib.Path) -> None:

360

"""Duplicate object IDs in the pack are skipped without re-computing sha256."""

361

repo = _make_repo(tmp_path)

362

content = b"dedup test object"

363

oid = blob_id(content)

364

# Send the same object 100 times.

365

mpack: MPack = MPack(

366

commits=[],

367

snapshots=[],

368

objects=[{"object_id": oid, "content": content}] * 100,

369

)

370

result = apply_mpack(repo, mpack)

371

assert result["objects_written"] == 1

372

assert result["objects_skipped"] == 99

373

assert has_object(repo, oid)

374

375

def test_duplicate_then_different_both_processed(self, tmp_path: pathlib.Path) -> None:

376

repo = _make_repo(tmp_path)

377

c1 = b"first object"

378

c2 = b"second object"

379

o1 = blob_id(c1)

380

o2 = blob_id(c2)

381

mpack: MPack = MPack(

commits=[],

snapshots=[],

objects=[

{"object_id": o1, "content": c1},

386

{"object_id": o1, "content": c1}, # duplicate

387

{"object_id": o2, "content": c2},

388

],

389

)

390

result = apply_mpack(repo, mpack)

391

assert result["objects_written"] == 2

392

assert result["objects_skipped"] == 1

393

394

395

# ---------------------------------------------------------------------------

396

# 7. apply_mpack — malformed entries are isolated (snapshot / commit)

397

# ---------------------------------------------------------------------------

398

399

400

class TestApplyPackMalformedEntries:

401

def test_malformed_object_entry_does_not_abort_pack(self, tmp_path: pathlib.Path) -> None:

402

"""A bad object entry is logged and skipped; other entries are still written.

403

404

Note: deduplication means each object_id is only attempted once per

405

apply_mpack call. Two entries with the same object_id but different

406

content are impossible in a valid content-addressed store — if the

407

first attempt fails (hash mismatch or malformed ID), the second

408

attempt for the same ID is correctly deduplicated. Use distinct IDs

409

to test that bad entries do not prevent good ones from being written.

410

"""

411

repo = _make_repo(tmp_path)

412

good_content_a = b"good object A"

413

good_oid_a = blob_id(good_content_a)

414

good_content_b = b"good object B"

415

good_oid_b = blob_id(good_content_b)

416

mpack: MPack = MPack(

commits=[],

snapshots=[],

objects=[

{"object_id": "not-hex", "content": b"bad"}, # malformed ID

421

{"object_id": good_oid_a, "content": b"wrong bytes"}, # hash mismatch

422

{"object_id": good_oid_b, "content": good_content_b}, # valid different OID

423

],

424

)

425

result = apply_mpack(repo, mpack)

426

assert not has_object(repo, good_oid_a), "Hash-mismatched entry must not be stored"

427

assert has_object(repo, good_oid_b), "Valid entry after bad ones must be stored"

428

assert result["objects_written"] == 1

429

430

def test_missing_object_id_in_pack_entry_skipped(self, tmp_path: pathlib.Path) -> None:

431

repo = _make_repo(tmp_path)

432

mpack: MPack = MPack(

433

commits=[],

434

snapshots=[],

435

objects=[{"object_id": "", "content": b"anything"}],

436

)

437

result = apply_mpack(repo, mpack)

438

assert result["objects_written"] == 0

439

440

def test_empty_content_in_pack_entry_skipped(self, tmp_path: pathlib.Path) -> None:

441

"""An entry with empty content (b'') and any oid is skipped (not-oid check)."""

442

repo = _make_repo(tmp_path)

443

from muse.core.mpack import ObjectPayload

444

# An entry with empty oid and empty content has no oid — should be skipped.

445

empty_entry = ObjectPayload(object_id="", content=b"")

446

mpack: MPack = MPack(commits=[], snapshots=[], objects=[empty_entry])

447

result = apply_mpack(repo, mpack)

448

assert result["objects_written"] == 0

449

450

451

# ---------------------------------------------------------------------------

452

# 8. read_object — corruption detected on every read

453

# ---------------------------------------------------------------------------

454

455

456

class TestReadObjectIntegrity:

457

def test_read_clean_object_succeeds(self, tmp_path: pathlib.Path) -> None:

458

repo = _make_repo(tmp_path)

459

content = b"clean read test"

460

oid = _stored_object(repo, content)

461

assert read_object(repo, oid) == content

462

463

def test_read_corrupted_object_raises(self, tmp_path: pathlib.Path) -> None:

464

repo = _make_repo(tmp_path)

465

content = b"will be corrupted"

466

oid = _stored_object(repo, content)

467

from muse.core.object_store import _object_path_with_fallback

468

obj_file = _object_path_with_fallback(repo, oid)

469

os.chmod(obj_file, 0o644)

470

obj_file.write_bytes(b"corrupted bytes")

471

os.chmod(obj_file, 0o444)

472

with pytest.raises(OSError, match="integrity check"):

473

read_object(repo, oid)

474

475

def test_read_absent_object_returns_none(self, tmp_path: pathlib.Path) -> None:

476

repo = _make_repo(tmp_path)

477

assert read_object(repo, blob_id(b"absent")) is None

478

479

480

# ---------------------------------------------------------------------------

481

# 9. Confirmed: all write_object callsites use content-derived IDs

482

# ---------------------------------------------------------------------------

483

484

485

class TestCallsiteIntegrity:

486

def test_hash_object_stdin_derives_id_from_content(self, tmp_path: pathlib.Path) -> None:

487

"""hash-object with --write derives object_id from actual stdin bytes."""

488

from tests.cli_test_helper import CliRunner

489

repo = _make_repo(tmp_path)

490

(config_toml_path(repo)).write_text("[core]\nauthor = \"test\"\n")

491

content = b"stdin content for hashing"

492

expected_oid = blob_id(content)

493

runner = CliRunner()

494

result = runner.invoke(

495

None,

496

["hash-object", "--stdin", "--write"],

497

input=content,

498

env={"MUSE_REPO_ROOT": str(repo)},

499

)

500

assert result.exit_code == 0, result.output

501

assert expected_oid in result.output

502

assert has_object(repo, expected_oid)

503

504

def test_hash_object_file_derives_id_from_file_content(self, tmp_path: pathlib.Path) -> None:

505

"""hash-object with a file path derives object_id from actual file bytes."""

506

from tests.cli_test_helper import CliRunner

507

repo = _make_repo(tmp_path)

508

(config_toml_path(repo)).write_text("[core]\nauthor = \"test\"\n")

509

content = b"file content for hashing"

510

target = tmp_path / "target.bin"

511

target.write_bytes(content)

512

expected_oid = blob_id(content)

513

runner = CliRunner()

514

result = runner.invoke(

515

None,

516

["hash-object", str(target), "--write"],

517

env={"MUSE_REPO_ROOT": str(repo)},

518

)

519

assert result.exit_code == 0, result.output

520

assert expected_oid in result.output

521

assert has_object(repo, expected_oid)

522

523

def test_unpack_objects_hash_mismatch_rejected(self, tmp_path: pathlib.Path) -> None:

524

"""muse unpack-objects rejects a pack object with wrong hash."""

525

from tests.cli_test_helper import CliRunner

526

repo = _make_repo(tmp_path)

527

(config_toml_path(repo)).write_text("[core]\nauthor = \"test\"\n")

528

legit_content = b"legitimate"

529

legit_oid = blob_id(legit_content)

530

531

# apply_mpack directly to test the core logic.

532

mpack: MPack = MPack(

533

commits=[], snapshots=[],

534

objects=[{"object_id": legit_oid, "content": b"malicious bytes"}],

535

)

536

result = apply_mpack(repo, mpack)

537

# The poisoned object should be skipped (hash mismatch caught by write_object).

538

assert not has_object(repo, legit_oid), "Poisoned object must not enter the store"

539

assert result["objects_written"] == 0

540

541

542

# ---------------------------------------------------------------------------

543

# 10. Stress: 10 000-object pack processed within time budget

544

# ---------------------------------------------------------------------------

class TestStress:

@pytest.fixture(autouse=True)

549

def no_fsync(self) -> None:

550

"""Mock fsync so the budget test measures algorithmic cost, not I/O latency."""

551

with patch("muse.core.object_store._fsync_fd", return_value=None), \

552

patch("muse.core.store.os.fsync", return_value=None), \

553

patch("muse.core.store.fcntl.fcntl", return_value=0):

yield

@pytest.mark.perf

def test_10k_object_pack_within_budget(self, tmp_path: pathlib.Path) -> None:

558

"""10 000 unique objects written through apply_mpack in under 30 seconds."""

559

repo = _make_repo(tmp_path)

n = 10_000

objects = []

for i in range(n):

content = f"stress-object-{i:06d}".encode()

564

oid = blob_id(content)

565

objects.append({"object_id": oid, "content": content})

566

567

mpack: MPack = MPack(commits=[], snapshots=[], objects=objects)

568

start = time.monotonic()

569

result = apply_mpack(repo, mpack)

570

elapsed = time.monotonic() - start

571

572

assert result["objects_written"] == n

573

assert elapsed < 30.0, f"10k-object pack took {elapsed:.1f}s — too slow"

574

575

def test_idempotent_10k_pack_fast(self, tmp_path: pathlib.Path) -> None:

576

"""Re-applying the same 10k pack is faster (all objects already present)."""

577

repo = _make_repo(tmp_path)

578

n = 1_000 # smaller for the idempotency test

579

objects = []

580

for i in range(n):

581

content = f"idem-object-{i:06d}".encode()

582

oid = blob_id(content)

583

objects.append({"object_id": oid, "content": content})

584

585

mpack: MPack = MPack(commits=[], snapshots=[], objects=objects)

586

apply_mpack(repo, mpack) # first application

587

result2 = apply_mpack(repo, mpack) # second application

588

assert result2["objects_written"] == 0

589

assert result2["objects_skipped"] == n

590

591

def test_10k_duplicate_ids_deduplicated(self, tmp_path: pathlib.Path) -> None:

592

"""10 000 entries with the same object_id are deduplicated to one write."""

593

repo = _make_repo(tmp_path)

594

content = b"one true object"

595

oid = blob_id(content)

596

mpack: MPack = MPack(

597

commits=[],

598

snapshots=[],

599

objects=[{"object_id": oid, "content": content}] * 10_000,

600

)

601

result = apply_mpack(repo, mpack)

602

assert result["objects_written"] == 1

603

assert result["objects_skipped"] == 9_999

604

605

606

# ---------------------------------------------------------------------------

607

# 11. Concurrent poisoning stress

608

# ---------------------------------------------------------------------------

609

610

611

class TestConcurrentPoisoning:

612

def test_concurrent_hash_mismatch_attempts_do_not_corrupt(

613

self, tmp_path: pathlib.Path

614

) -> None:

615

"""50 threads simultaneously trying to poison the store — none succeeds."""

616

repo = _make_repo(tmp_path)

617

legit_content = b"the one true content"

618

legit_oid = blob_id(legit_content)

619

620

# Write the legitimate object first.

621

write_object(repo, legit_oid, legit_content)

622

623

errors: list[str] = []

624

625

def poison_attempt(idx: int) -> None:

626

malicious_content = f"malicious-{idx}".encode()

627

try:

628

write_object(repo, legit_oid, malicious_content)

629

errors.append(f"Thread {idx}: poisoning succeeded!")

except ValueError:

pass # expected

threads = [threading.Thread(target=poison_attempt, args=(i,)) for i in range(50)]

for t in threads:

t.start()

for t in threads:

t.join(timeout=5.0)

assert not errors, "\n".join(errors)

640

# The stored object must still be the legitimate one.

641

assert read_object(repo, legit_oid) == legit_content

642

643

def test_concurrent_writes_of_same_object_idempotent(

644

self, tmp_path: pathlib.Path

645

) -> None:

646

"""50 threads writing the same valid object — exactly one write, no corruption."""

647

repo = _make_repo(tmp_path)

648

content = b"concurrent valid object"

649

oid = blob_id(content)

650

results: list[bool] = []

651

lock = threading.Lock()

652

653

def write_it() -> None:

654

wrote = write_object(repo, oid, content)

655

with lock:

656

results.append(wrote)

657

658

threads = [threading.Thread(target=write_it) for _ in range(50)]

for t in threads:

t.start()

for t in threads:

t.join(timeout=5.0)

assert results.count(True) >= 1, "At least one thread must have written"

665

assert read_object(repo, oid) == content

666

667

668

# ---------------------------------------------------------------------------

669

# 12. SHA-256 threat model documentation test

670

# ---------------------------------------------------------------------------

671

672

673

class TestSHA256ThreatModel:

674

def test_sha256_preimage_resistance_documented(self) -> None:

675

"""Document that SHA-256 preimage resistance is the security boundary.

676

677

Muse's object store is secure against hash-mismatch injection because:

678

1. write_object computes sha256(content) and rejects any mismatch.

679

2. read_object recomputes sha256 on every read.

680

3. restore_object recomputes sha256 before copying to working tree.

681

682

A successful poisoning attack would require finding a second preimage:

683

a different content M' such that sha256(M') == sha256(M).

684

685

As of 2026, the best known second-preimage attack on SHA-256 requires

686

2^256 operations — computationally infeasible for any adversary.

687

688

This test is a living specification of the threat model, not a

689

cryptographic proof. It verifies the code paths enforce the model.

690

"""

691

content_a = b"message A"

692

content_b = b"message B"

693

# Two different messages must have different SHA-256 digests.

694

# (With overwhelming probability — hash collision is computationally

695

# infeasible but not theoretically impossible.)

696

assert blob_id(content_a) != blob_id(content_b)

697

698

def test_write_then_read_roundtrip_preserves_content(

699

self, tmp_path: pathlib.Path

700

) -> None:

701

"""Content written to the store is always returned verbatim on read."""

702

repo = _make_repo(tmp_path)

703

for i in range(20):

704

content = f"stress-content-{i}".encode() * (i + 1)

705

oid = blob_id(content)

706

write_object(repo, oid, content)

707

assert read_object(repo, oid) == content

708

709

def test_object_mode_is_immutable(self, tmp_path: pathlib.Path) -> None:

710

"""Stored objects have mode 0o444 — expressing immutability at OS level."""

711

repo = _make_repo(tmp_path)

712

content = b"immutable object"

713

oid = _stored_object(repo, content)

714

from muse.core.object_store import _object_path_with_fallback

715

obj_file = _object_path_with_fallback(repo, oid)

716

mode = oct(obj_file.stat().st_mode & 0o777)

717

assert mode == oct(0o444), f"Expected 0o444, got {mode}"