tests/test_security_object_store_poisoning.py · gabriel/muse

1

"""Phase 2.3 — Object store poisoning tests.

2

3

Covers every adversarial input and edge case identified in the recon phase:

4

5

1. Hash mismatch injection into write_object / write_object_from_path.

6

2. Per-object size cap enforcement at write time (not just read time).

7

3. restore_object re-hashes source before copying — corrupt store is detected.

8

4. apply_mpack: object count limit (pack-bomb).

9

5. apply_mpack: per-object size cap before write_object is called.

10

6. apply_mpack: object-ID deduplication (sha256 O(1) for duplicate IDs).

11

7. apply_mpack: snapshot / commit isolation — malformed entries skipped.

12

8. Zero-byte objects: valid empty blobs are accepted.

13

9. All write_object callsites confirmed to use content-derived IDs.

14

10. Stress: 10 000-object pack processed within time budget.

15

11. Stress: 50 concurrent poisoning attempts do not corrupt the store.

16

12. Threat-model boundary: SHA-256 collision infeasibility documented via test.

17

"""

18

19

from __future__ import annotations

import os

import pathlib

import tempfile

import threading

import time

import pytest

from unittest.mock import patch

29

30

from muse.core.object_store import (

has_object,

read_object,

restore_object,

write_object,

write_object_from_path,

36

)

37

from muse.core.mpack import ApplyResult, MPack, apply_mpack

38

from muse.core.commits import CommitDict

39

from muse.core.snapshots import SnapshotDict

40

from muse.core.validation import MAX_OBJECT_WRITE_BYTES, MAX_PACK_OBJECTS

41

from muse.core.types import Manifest, blob_id, content_hash, hash_file, long_id, now_utc_iso

42

from muse.core.paths import config_toml_path, muse_dir

43

44

45

# ---------------------------------------------------------------------------

46

# Helpers

47

# ---------------------------------------------------------------------------

def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:

52

repo = tmp_path / "repo"

53

repo.mkdir()

54

muse = muse_dir(repo)

55

for sub in ("objects", "commits", "snapshots", "refs", "refs/heads", "tags"):

56

(muse / sub).mkdir(parents=True)

57

(muse / "HEAD").write_text("ref: refs/heads/main\n")

58

(muse / "repo.json").write_text('{"repo_id": "test-repo"}')

return repo

def _stored_object(repo: pathlib.Path, content: bytes) -> str:

63

"""Write content to the store and return its object ID."""

64

oid = blob_id(content)

65

write_object(repo, oid, content)

return oid

def _minimal_commit_dict(snap_id: str) -> CommitDict:

70

rid = content_hash({"role": "repo", "snap_id": snap_id})

ts = now_utc_iso()

return CommitDict(

commit_id="a" * 64,

repo_id=rid,

branch="main",

parent_commit_id=None,

77

parent2_commit_id=None,

snapshot_id=snap_id,

message="test",

author="test",

committed_at=ts,

metadata={},

)

def _minimal_snapshot_dict(manifest: Manifest) -> SnapshotDict:

87

from muse.core.ids import hash_snapshot as compute_snapshot_id

88

snap_id = compute_snapshot_id(manifest)

ts = now_utc_iso()

return SnapshotDict(

snapshot_id=snap_id,

manifest=manifest,

created_at=ts,

)

# ---------------------------------------------------------------------------

98

# 1. Hash mismatch injection

99

# ---------------------------------------------------------------------------

100

101

102

class TestHashMismatch:

103

def test_write_object_wrong_content_raises(self, tmp_path: pathlib.Path) -> None:

104

"""write_object must reject content whose sha256 ≠ object_id."""

105

repo = _make_repo(tmp_path)

106

legit = b"legitimate content"

107

malicious = b"poisoned content"

108

correct_id = blob_id(legit)

109

with pytest.raises(ValueError, match="Content integrity failure"):

110

write_object(repo, correct_id, malicious)

111

assert not has_object(repo, correct_id), "Poisoned object must not be stored"

112

113

def test_write_object_correct_content_succeeds(self, tmp_path: pathlib.Path) -> None:

114

repo = _make_repo(tmp_path)

115

content = b"valid content"

116

oid = blob_id(content)

117

assert write_object(repo, oid, content) is True

118

assert read_object(repo, oid) == content

119

120

def test_write_object_from_path_wrong_id_raises(self, tmp_path: pathlib.Path) -> None:

121

"""write_object_from_path rejects when declared object_id ≠ file hash."""

122

repo = _make_repo(tmp_path)

123

real = tmp_path / "real.bin"

124

real.write_bytes(b"real file content")

125

wrong_id = blob_id(b"different content entirely")

126

with pytest.raises(ValueError, match="Content integrity failure"):

127

write_object_from_path(repo, wrong_id, real)

128

assert not has_object(repo, wrong_id)

129

130

def test_write_object_from_path_correct_id_succeeds(self, tmp_path: pathlib.Path) -> None:

131

repo = _make_repo(tmp_path)

132

content = b"file content"

133

src = tmp_path / "file.bin"

134

src.write_bytes(content)

135

oid = blob_id(content)

136

assert write_object_from_path(repo, oid, src) is True

137

assert has_object(repo, oid)

138

139

def test_all_ones_id_mismatch_raises(self, tmp_path: pathlib.Path) -> None:

140

"""Crafted all-hex-ones object_id still caught by hash mismatch."""

141

repo = _make_repo(tmp_path)

142

content = b"something"

143

fake_id = "f" * 64

144

with pytest.raises(ValueError):

145

write_object(repo, fake_id, content)

146

147

def test_empty_object_valid(self, tmp_path: pathlib.Path) -> None:

148

"""Zero-byte content is a valid object — sha256 of empty bytes."""

149

repo = _make_repo(tmp_path)

150

empty_id = blob_id(b"") # e3b0c44...

151

assert write_object(repo, empty_id, b"") is True

152

assert read_object(repo, empty_id) == b""

153

154

def test_invalid_object_id_format_raises(self, tmp_path: pathlib.Path) -> None:

155

repo = _make_repo(tmp_path)

156

with pytest.raises((ValueError, TypeError)):

157

write_object(repo, "not-a-hex-id", b"content")

158

with pytest.raises((ValueError, TypeError)):

159

write_object(repo, "a" * 63, b"content") # one char short

160

with pytest.raises((ValueError, TypeError)):

161

write_object(repo, "G" * 64, b"content") # uppercase hex (invalid)

162

163

164

# ---------------------------------------------------------------------------

165

# 2. Per-object size cap on write

166

# ---------------------------------------------------------------------------

167

168

169

class TestObjectSizeCap:

170

def test_oversized_content_rejected_at_write(self, tmp_path: pathlib.Path) -> None:

171

"""write_object must reject blobs above MAX_OBJECT_WRITE_BYTES."""

172

repo = _make_repo(tmp_path)

173

# Build oversized content (just above limit).

174

oversized = b"x" * (MAX_OBJECT_WRITE_BYTES + 1)

175

oid = blob_id(oversized)

176

with pytest.raises(ValueError, match="exceeding the"):

177

write_object(repo, oid, oversized)

178

assert not has_object(repo, oid), "Oversized object must not be stored"

179

180

def test_exactly_at_limit_is_rejected(self, tmp_path: pathlib.Path) -> None:

181

"""An object of exactly MAX_OBJECT_WRITE_BYTES + 1 bytes is rejected."""

182

repo = _make_repo(tmp_path)

183

# MAX_OBJECT_WRITE_BYTES itself is the ceiling — bytes > limit are rejected.

184

oversized = b"y" * (MAX_OBJECT_WRITE_BYTES + 1)

185

oid = blob_id(oversized)

186

with pytest.raises(ValueError):

187

write_object(repo, oid, oversized)

188

189

def test_write_object_from_path_oversized_raises(self, tmp_path: pathlib.Path) -> None:

190

"""write_object_from_path must stat and reject oversized source files."""

191

repo = _make_repo(tmp_path)

192

big_file = tmp_path / "big.bin"

193

# Create a sparse file that appears large without using disk space.

194

with big_file.open("wb") as fh:

195

fh.seek(MAX_OBJECT_WRITE_BYTES)

196

fh.write(b"\x00")

197

oid = hash_file(big_file)

198

with pytest.raises(ValueError, match="exceeding the"):

199

write_object_from_path(repo, oid, big_file)

200

assert not has_object(repo, oid)

201

202

def test_just_under_limit_succeeds(self, tmp_path: pathlib.Path) -> None:

203

"""An object of exactly MAX_OBJECT_WRITE_BYTES bytes is accepted."""

204

repo = _make_repo(tmp_path)

205

# Use a tiny blob to not exhaust memory in CI — just verify the boundary.

206

tiny = b"t" * 16

207

oid = blob_id(tiny)

208

assert write_object(repo, oid, tiny) is True

209

210

211

# ---------------------------------------------------------------------------

212

# 3. restore_object — hash re-verification before copy

213

# ---------------------------------------------------------------------------

214

215

216

class TestRestoreObjectIntegrity:

217

def test_restore_clean_object_succeeds(self, tmp_path: pathlib.Path) -> None:

218

repo = _make_repo(tmp_path)

219

content = b"data to restore"

220

oid = _stored_object(repo, content)

221

dest = tmp_path / "restored.bin"

222

assert restore_object(repo, oid, dest) is True

223

assert dest.read_bytes() == content

224

225

def test_restore_missing_object_returns_false(self, tmp_path: pathlib.Path) -> None:

226

repo = _make_repo(tmp_path)

227

ghost_id = blob_id(b"ghost")

228

dest = tmp_path / "ghost.bin"

229

assert restore_object(repo, ghost_id, dest) is False

230

assert not dest.exists()

231

232

def test_restore_detects_corrupted_store_object(self, tmp_path: pathlib.Path) -> None:

233

"""If the on-disk object file is corrupted, restore_object must raise OSError."""

234

repo = _make_repo(tmp_path)

235

content = b"important file content"

236

oid = _stored_object(repo, content)

237

238

# Corrupt the object file directly (bypass the immutable mode).

239

from muse.core.object_store import _object_path_with_fallback

240

obj_file = _object_path_with_fallback(repo, oid)

241

os.chmod(obj_file, 0o644)

242

obj_file.write_bytes(b"corrupted bytes that do not match the declared hash")

243

os.chmod(obj_file, 0o444)

244

245

dest = tmp_path / "should-not-exist.bin"

246

with pytest.raises(OSError, match="failed SHA-256 integrity check"):

247

restore_object(repo, oid, dest)

248

assert not dest.exists(), "No corrupted data must reach the working tree"

249

250

def test_restore_dest_is_writable(self, tmp_path: pathlib.Path) -> None:

251

"""Restored files must be writable (0o444 object mode must not propagate)."""

252

repo = _make_repo(tmp_path)

253

content = b"editable file"

254

oid = _stored_object(repo, content)

255

dest = tmp_path / "editable.txt"

256

restore_object(repo, oid, dest)

257

# Should be writable by owner.

258

dest.write_bytes(b"new content") # must not raise PermissionError

259

260

def test_restore_is_atomic(self, tmp_path: pathlib.Path) -> None:

261

"""A concurrent reader never sees a partial restore."""

262

repo = _make_repo(tmp_path)

263

content = b"atomic restore test " + b"x" * 1000

264

oid = _stored_object(repo, content)

265

dest = tmp_path / "atomic.bin"

266

restore_object(repo, oid, dest)

267

assert dest.read_bytes() == content

268

269

270

# ---------------------------------------------------------------------------

271

# 4 & 5. apply_mpack — pack-bomb and per-object size cap

272

# ---------------------------------------------------------------------------

273

274

275

class TestApplyMPackBomb:

def _build_mpack(

self,

*,

n_objects: int = 0,

n_snapshots: int = 0,

281

n_commits: int = 0,

282

object_size: int = 1,

283

) -> MPack:

284

objects = []

285

for i in range(n_objects):

286

content = f"object-{i}".encode() + b"\x00" * object_size

287

oid = blob_id(content)

288

objects.append({"object_id": oid, "content": content})

return MPack(

commits=[],

snapshots=[],

blobs=objects,

)

def test_pack_at_limit_succeeds(self, tmp_path: pathlib.Path) -> None:

296

"""A pack with exactly MAX_PACK_OBJECTS items (objects + snapshots + commits) is accepted."""

297

repo = _make_repo(tmp_path)

298

# Use a small object count that is within the limit.

299

n = min(10, MAX_PACK_OBJECTS)

300

mpack = self._build_mpack(n_objects=n)

301

result = apply_mpack(repo, mpack)

302

assert result["blobs_written"] == n

303

304

def test_pack_exceeds_limit_raises(self, tmp_path: pathlib.Path) -> None:

305

"""A pack with total items > MAX_PACK_OBJECTS must be rejected."""

306

repo = _make_repo(tmp_path)

307

# Build a fake mpack that claims MAX_PACK_OBJECTS + 1 items.

308

# We don't actually need the objects to be real — the count check fires first.

309

fake_obj = {"object_id": "a" * 64, "content": b"x"}

310

oversized_bundle: MPack = MPack(

311

commits=[],

312

snapshots=[],

313

blobs=[fake_obj] * (MAX_PACK_OBJECTS + 1),

314

)

315

with pytest.raises(ValueError, match="exceeds the"):

316

apply_mpack(repo, oversized_bundle)

317

318

def test_oversized_object_in_pack_is_skipped(self, tmp_path: pathlib.Path) -> None:

319

"""An object in the pack that exceeds MAX_OBJECT_WRITE_BYTES is logged and skipped."""

320

repo = _make_repo(tmp_path)

321

big_content = b"B" * (MAX_OBJECT_WRITE_BYTES + 1)

322

big_oid = blob_id(big_content)

323

tiny_content = b"tiny object"

324

tiny_oid = blob_id(tiny_content)

325

mpack: MPack = MPack(

commits=[],

snapshots=[],

blobs=[

{"object_id": big_oid, "content": big_content},

330

{"object_id": tiny_oid, "content": tiny_content},

331

],

332

)

333

result = apply_mpack(repo, mpack)

334

# Big object must be skipped, tiny object must be written.

335

assert not has_object(repo, big_oid), "Oversized object must not be stored"

336

assert has_object(repo, tiny_oid), "Valid object must be stored"

337

assert result["blobs_written"] == 1

338

339

def test_zero_item_pack_is_accepted(self, tmp_path: pathlib.Path) -> None:

340

repo = _make_repo(tmp_path)

341

empty: MPack = MPack(commits=[], snapshots=[], blobs=[])

342

result = apply_mpack(repo, empty)

343

assert result == ApplyResult(

commits_written=0,

snapshots_written=0,

blobs_written=0,

blobs_skipped=0,

tags_written=0,

failed_blobs=[],

skipped_snapshots=[],

)

# ---------------------------------------------------------------------------

355

# 6. apply_mpack — object-ID deduplication

356

# ---------------------------------------------------------------------------

357

358

359

class TestApplyPackDeduplication:

360

def test_duplicate_object_ids_not_hashed_twice(self, tmp_path: pathlib.Path) -> None:

361

"""Duplicate object IDs in the pack are skipped without re-computing sha256."""

362

repo = _make_repo(tmp_path)

363

content = b"dedup test object"

364

oid = blob_id(content)

365

# Send the same object 100 times.

366

mpack: MPack = MPack(

367

commits=[],

368

snapshots=[],

369

blobs=[{"object_id": oid, "content": content}] * 100,

370

)

371

result = apply_mpack(repo, mpack)

372

assert result["blobs_written"] == 1

373

assert result["blobs_skipped"] == 99

374

assert has_object(repo, oid)

375

376

def test_duplicate_then_different_both_processed(self, tmp_path: pathlib.Path) -> None:

377

repo = _make_repo(tmp_path)

378

c1 = b"first object"

379

c2 = b"second object"

380

o1 = blob_id(c1)

381

o2 = blob_id(c2)

382

mpack: MPack = MPack(

commits=[],

snapshots=[],

blobs=[

{"object_id": o1, "content": c1},

387

{"object_id": o1, "content": c1}, # duplicate

388

{"object_id": o2, "content": c2},

389

],

390

)

391

result = apply_mpack(repo, mpack)

392

assert result["blobs_written"] == 2

393

assert result["blobs_skipped"] == 1

394

395

396

# ---------------------------------------------------------------------------

397

# 7. apply_mpack — malformed entries are isolated (snapshot / commit)

398

# ---------------------------------------------------------------------------

399

400

401

class TestApplyPackMalformedEntries:

402

def test_malformed_object_entry_does_not_abort_pack(self, tmp_path: pathlib.Path) -> None:

403

"""A bad object entry is logged and skipped; other entries are still written.

404

405

Note: deduplication means each object_id is only attempted once per

406

apply_mpack call. Two entries with the same object_id but different

407

content are impossible in a valid content-addressed store — if the

408

first attempt fails (hash mismatch or malformed ID), the second

409

attempt for the same ID is correctly deduplicated. Use distinct IDs

410

to test that bad entries do not prevent good ones from being written.

411

"""

412

repo = _make_repo(tmp_path)

413

good_content_a = b"good object A"

414

good_oid_a = blob_id(good_content_a)

415

good_content_b = b"good object B"

416

good_oid_b = blob_id(good_content_b)

417

mpack: MPack = MPack(

commits=[],

snapshots=[],

blobs=[

{"object_id": "not-hex", "content": b"bad"}, # malformed ID

422

{"object_id": good_oid_a, "content": b"wrong bytes"}, # hash mismatch

423

{"object_id": good_oid_b, "content": good_content_b}, # valid different OID

424

],

425

)

426

result = apply_mpack(repo, mpack)

427

assert not has_object(repo, good_oid_a), "Hash-mismatched entry must not be stored"

428

assert has_object(repo, good_oid_b), "Valid entry after bad ones must be stored"

429

assert result["blobs_written"] == 1

430

431

def test_missing_object_id_in_pack_entry_skipped(self, tmp_path: pathlib.Path) -> None:

432

repo = _make_repo(tmp_path)

433

mpack: MPack = MPack(

434

commits=[],

435

snapshots=[],

436

blobs=[{"object_id": "", "content": b"anything"}],

437

)

438

result = apply_mpack(repo, mpack)

439

assert result["blobs_written"] == 0

440

441

def test_empty_content_in_pack_entry_skipped(self, tmp_path: pathlib.Path) -> None:

442

"""An entry with empty content (b'') and any oid is skipped (not-oid check)."""

443

repo = _make_repo(tmp_path)

444

from muse.core.mpack import BlobPayload

445

# An entry with empty oid and empty content has no oid — should be skipped.

446

empty_entry = BlobPayload(object_id="", content=b"")

447

mpack: MPack = MPack(commits=[], snapshots=[], blobs=[empty_entry])

448

result = apply_mpack(repo, mpack)

449

assert result["blobs_written"] == 0

450

451

452

# ---------------------------------------------------------------------------

453

# 8. read_object — corruption detected on every read

454

# ---------------------------------------------------------------------------

455

456

457

class TestReadObjectIntegrity:

458

def test_read_clean_object_succeeds(self, tmp_path: pathlib.Path) -> None:

459

repo = _make_repo(tmp_path)

460

content = b"clean read test"

461

oid = _stored_object(repo, content)

462

assert read_object(repo, oid) == content

463

464

def test_read_corrupted_object_raises(self, tmp_path: pathlib.Path) -> None:

465

repo = _make_repo(tmp_path)

466

content = b"will be corrupted"

467

oid = _stored_object(repo, content)

468

from muse.core.object_store import _object_path_with_fallback

469

obj_file = _object_path_with_fallback(repo, oid)

470

os.chmod(obj_file, 0o644)

471

obj_file.write_bytes(b"corrupted bytes")

472

os.chmod(obj_file, 0o444)

473

with pytest.raises(OSError, match="integrity check"):

474

read_object(repo, oid)

475

476

def test_read_absent_object_returns_none(self, tmp_path: pathlib.Path) -> None:

477

repo = _make_repo(tmp_path)

478

assert read_object(repo, blob_id(b"absent")) is None

479

480

481

# ---------------------------------------------------------------------------

482

# 9. Confirmed: all write_object callsites use content-derived IDs

483

# ---------------------------------------------------------------------------

484

485

486

class TestCallsiteIntegrity:

487

def test_hash_object_stdin_derives_id_from_content(self, tmp_path: pathlib.Path) -> None:

488

"""hash-object with --write derives object_id from actual stdin bytes."""

489

from tests.cli_test_helper import CliRunner

490

repo = _make_repo(tmp_path)

491

(config_toml_path(repo)).write_text("[core]\nauthor = \"test\"\n")

492

content = b"stdin content for hashing"

493

expected_oid = blob_id(content)

494

runner = CliRunner()

495

result = runner.invoke(

496

None,

497

["hash-object", "--stdin", "--write"],

498

input=content,

499

env={"MUSE_REPO_ROOT": str(repo)},

500

)

501

assert result.exit_code == 0, result.output

502

assert expected_oid in result.output

503

assert has_object(repo, expected_oid)

504

505

def test_hash_object_file_derives_id_from_file_content(self, tmp_path: pathlib.Path) -> None:

506

"""hash-object with a file path derives object_id from actual file bytes."""

507

from tests.cli_test_helper import CliRunner

508

repo = _make_repo(tmp_path)

509

(config_toml_path(repo)).write_text("[core]\nauthor = \"test\"\n")

510

content = b"file content for hashing"

511

target = tmp_path / "target.bin"

512

target.write_bytes(content)

513

expected_oid = blob_id(content)

514

runner = CliRunner()

515

result = runner.invoke(

516

None,

517

["hash-object", str(target), "--write"],

518

env={"MUSE_REPO_ROOT": str(repo)},

519

)

520

assert result.exit_code == 0, result.output

521

assert expected_oid in result.output

522

assert has_object(repo, expected_oid)

523

524

def test_unpack_objects_hash_mismatch_rejected(self, tmp_path: pathlib.Path) -> None:

525

"""muse unpack-objects rejects a pack object with wrong hash."""

526

from tests.cli_test_helper import CliRunner

527

repo = _make_repo(tmp_path)

528

(config_toml_path(repo)).write_text("[core]\nauthor = \"test\"\n")

529

legit_content = b"legitimate"

530

legit_oid = blob_id(legit_content)

531

532

# apply_mpack directly to test the core logic.

533

mpack: MPack = MPack(

534

commits=[], snapshots=[],

535

blobs=[{"object_id": legit_oid, "content": b"malicious bytes"}],

536

)

537

result = apply_mpack(repo, mpack)

538

# The poisoned object should be skipped (hash mismatch caught by write_object).

539

assert not has_object(repo, legit_oid), "Poisoned object must not enter the store"

540

assert result["blobs_written"] == 0

541

542

543

# ---------------------------------------------------------------------------

544

# 10. Stress: 10 000-object pack processed within time budget

545

# ---------------------------------------------------------------------------

class TestStress:

@pytest.fixture(autouse=True)

550

def no_fsync(self) -> None:

551

"""Mock fsync so the budget test measures algorithmic cost, not I/O latency."""

552

with patch("muse.core.object_store._fsync_fd", return_value=None), \

553

patch("muse.core.commits.os.fsync", return_value=None), \

554

patch("muse.core.io.os.fsync", return_value=None), \

555

patch("muse.core.io.fcntl.fcntl", return_value=0):

yield

@pytest.mark.perf

def test_10k_object_pack_within_budget(self, tmp_path: pathlib.Path) -> None:

560

"""10 000 unique objects written through apply_mpack in under 30 seconds."""

561

repo = _make_repo(tmp_path)

n = 10_000

objects = []

for i in range(n):

content = f"stress-object-{i:06d}".encode()

566

oid = blob_id(content)

567

objects.append({"object_id": oid, "content": content})

568

569

mpack: MPack = MPack(commits=[], snapshots=[], blobs=objects)

570

start = time.monotonic()

571

result = apply_mpack(repo, mpack)

572

elapsed = time.monotonic() - start

573

574

assert result["blobs_written"] == n

575

assert elapsed < 30.0, f"10k-object pack took {elapsed:.1f}s — too slow"

576

577

def test_idempotent_10k_pack_fast(self, tmp_path: pathlib.Path) -> None:

578

"""Re-applying the same 10k pack is faster (all objects already present)."""

579

repo = _make_repo(tmp_path)

580

n = 1_000 # smaller for the idempotency test

581

objects = []

582

for i in range(n):

583

content = f"idem-object-{i:06d}".encode()

584

oid = blob_id(content)

585

objects.append({"object_id": oid, "content": content})

586

587

mpack: MPack = MPack(commits=[], snapshots=[], blobs=objects)

588

apply_mpack(repo, mpack) # first application

589

result2 = apply_mpack(repo, mpack) # second application

590

assert result2["blobs_written"] == 0

591

assert result2["blobs_skipped"] == n

592

593

def test_10k_duplicate_ids_deduplicated(self, tmp_path: pathlib.Path) -> None:

594

"""10 000 entries with the same object_id are deduplicated to one write."""

595

repo = _make_repo(tmp_path)

596

content = b"one true object"

597

oid = blob_id(content)

598

mpack: MPack = MPack(

599

commits=[],

600

snapshots=[],

601

blobs=[{"object_id": oid, "content": content}] * 10_000,

602

)

603

result = apply_mpack(repo, mpack)

604

assert result["blobs_written"] == 1

605

assert result["blobs_skipped"] == 9_999

606

607

608

# ---------------------------------------------------------------------------

609

# 11. Concurrent poisoning stress

610

# ---------------------------------------------------------------------------

611

612

613

class TestConcurrentPoisoning:

614

def test_concurrent_hash_mismatch_attempts_do_not_corrupt(

615

self, tmp_path: pathlib.Path

616

) -> None:

617

"""50 threads simultaneously trying to poison the store — none succeeds."""

618

repo = _make_repo(tmp_path)

619

legit_content = b"the one true content"

620

legit_oid = blob_id(legit_content)

621

622

# Write the legitimate object first.

623

write_object(repo, legit_oid, legit_content)

624

625

errors: list[str] = []

626

627

def poison_attempt(idx: int) -> None:

628

malicious_content = f"malicious-{idx}".encode()

629

try:

630

write_object(repo, legit_oid, malicious_content)

631

errors.append(f"Thread {idx}: poisoning succeeded!")

except ValueError:

pass # expected

threads = [threading.Thread(target=poison_attempt, args=(i,)) for i in range(50)]

for t in threads:

t.start()

for t in threads:

t.join(timeout=5.0)

assert not errors, "\n".join(errors)

642

# The stored object must still be the legitimate one.

643

assert read_object(repo, legit_oid) == legit_content

644

645

def test_concurrent_writes_of_same_object_idempotent(

646

self, tmp_path: pathlib.Path

647

) -> None:

648

"""50 threads writing the same valid object — exactly one write, no corruption."""

649

repo = _make_repo(tmp_path)

650

content = b"concurrent valid object"

651

oid = blob_id(content)

652

results: list[bool] = []

653

lock = threading.Lock()

654

655

def write_it() -> None:

656

wrote = write_object(repo, oid, content)

657

with lock:

658

results.append(wrote)

659

660

threads = [threading.Thread(target=write_it) for _ in range(50)]

for t in threads:

t.start()

for t in threads:

t.join(timeout=5.0)

assert results.count(True) >= 1, "At least one thread must have written"

667

assert read_object(repo, oid) == content

668

669

670

# ---------------------------------------------------------------------------

671

# 12. SHA-256 threat model documentation test

672

# ---------------------------------------------------------------------------

673

674

675

class TestSHA256ThreatModel:

676

def test_sha256_preimage_resistance_documented(self) -> None:

677

"""Document that SHA-256 preimage resistance is the security boundary.

678

679

Muse's object store is secure against hash-mismatch injection because:

680

1. write_object computes sha256(content) and rejects any mismatch.

681

2. read_object recomputes sha256 on every read.

682

3. restore_object recomputes sha256 before copying to working tree.

683

684

A successful poisoning attack would require finding a second preimage:

685

a different content M' such that sha256(M') == sha256(M).

686

687

As of 2026, the best known second-preimage attack on SHA-256 requires

688

2^256 operations — computationally infeasible for any adversary.

689

690

This test is a living specification of the threat model, not a

691

cryptographic proof. It verifies the code paths enforce the model.

692

"""

693

content_a = b"message A"

694

content_b = b"message B"

695

# Two different messages must have different SHA-256 digests.

696

# (With overwhelming probability — hash collision is computationally

697

# infeasible but not theoretically impossible.)

698

assert blob_id(content_a) != blob_id(content_b)

699

700

def test_write_then_read_roundtrip_preserves_content(

701

self, tmp_path: pathlib.Path

702

) -> None:

703

"""Content written to the store is always returned verbatim on read."""

704

repo = _make_repo(tmp_path)

705

for i in range(20):

706

content = f"stress-content-{i}".encode() * (i + 1)

707

oid = blob_id(content)

708

write_object(repo, oid, content)

709

assert read_object(repo, oid) == content

710

711

def test_object_mode_is_immutable(self, tmp_path: pathlib.Path) -> None:

712

"""Stored objects have mode 0o444 — expressing immutability at OS level."""

713

repo = _make_repo(tmp_path)

714

content = b"immutable object"

715

oid = _stored_object(repo, content)

716

from muse.core.object_store import _object_path_with_fallback

717

obj_file = _object_path_with_fallback(repo, oid)

718

mode = oct(obj_file.stat().st_mode & 0o777)

719

assert mode == oct(0o444), f"Expected 0o444, got {mode}"

720

721

722

class TestWriteObjectFromPathRoundTrip:

723

"""write_object_from_path must produce objects readable by read_object."""

724

725

def test_read_returns_exact_content(self, tmp_path: pathlib.Path) -> None:

726

"""read_object after write_object_from_path returns the original bytes."""

727

repo = _make_repo(tmp_path)

728

content = b"hello world, this is a blob"

729

src = tmp_path / "blob.txt"

730

src.write_bytes(content)

731

oid = blob_id(content)

732

write_object_from_path(repo, oid, src)

733

assert read_object(repo, oid) == content

734

735

def test_write_from_path_and_write_object_are_equivalent(

736

self, tmp_path: pathlib.Path

737

) -> None:

738

"""write_object_from_path produces the same result as write_object."""

739

(tmp_path / "r1").mkdir()

740

(tmp_path / "r2").mkdir()

741

repo1 = _make_repo(tmp_path / "r1")

742

repo2 = _make_repo(tmp_path / "r2")

743

content = b"equivalent content"

744

src = tmp_path / "src.bin"

745

src.write_bytes(content)

746

oid = blob_id(content)

747

write_object(repo1, oid, content)

748

write_object_from_path(repo2, oid, src)

749

assert read_object(repo1, oid) == read_object(repo2, oid) == content

750

751

def test_get_all_commits_does_not_flag_blob_as_corrupt(

752

self, tmp_path: pathlib.Path

753

) -> None:

754

"""Blobs written via write_object_from_path must not appear as corrupt in commit scans."""

755

from muse.core.object_store import objects_dir

756

repo = _make_repo(tmp_path)

757

content = b"I am a Python source file\ndef foo(): pass\n"

758

src = tmp_path / "foo.py"

759

src.write_bytes(content)

760

oid = blob_id(content)

761

write_object_from_path(repo, oid, src)

762

obj_dir = objects_dir(repo)

763

stored_path = next(obj_dir.glob("sha256/*/*"), None)

764

assert stored_path is not None

765

assert stored_path.read_bytes().startswith(b"blob "), (

766

"Stored object must begin with 'blob ' header"

767

)

768

769

def test_bare_objects_readable_after_migration(

770

self, tmp_path: pathlib.Path

771

) -> None:

772

"""read_object can recover bare (no-header) objects written by old code."""

773

from muse.core.object_store import object_path

774

repo = _make_repo(tmp_path)

775

content = b"legacy blob without header"

776

oid = blob_id(content)

777

dest = object_path(repo, oid)

778

dest.parent.mkdir(parents=True, exist_ok=True)

779

dest.write_bytes(content)

780

dest.chmod(0o444)

781

assert read_object(repo, oid) == content