"""Tests for Bug 13: apply_mpack propagates unhandled OSError from write_commit. Root cause: apply_mpack's commit loop catches (KeyError, ValueError, TypeError) but NOT OSError. write_commit raises OSError("Store integrity violation") when an existing commit file contains a DIFFERENT commit_id than the incoming one — i.e. an impostor file. When this condition exists in the local store, apply_mpack propagates the unhandled OSError and the entire pull/push/clone call stack crashes with an unhandled exception rather than logging CRITICAL and continuing. Concrete attack / failure scenario: 1. Local store has commit file abc.msgpack containing impostor bytes (different commit_id inside the file than the filename implies). 2. A mpack arrives containing the legitimate abc commit. 3. write_commit reads the existing file, detects commit_id mismatch, raises OSError("Store integrity violation"). 4. apply_mpack (before fix) propagates the OSError → muse pull / muse clone crashes with an unhandled exception. Scope of tests -------------- - apply_mpack with impostor commit file does not crash (no uncaught OSError) - apply_mpack logs CRITICAL when OSError is raised by write_commit - apply_mpack continues processing remaining commits after OSError on one - apply_mpack correctly reports commits_written for non-affected commits - The impostor file is NOT overwritten by the mpack commit (safety) - apply_mpack raises ValueError (not OSError) for commits with wrong hash """ from __future__ import annotations import datetime import logging import pathlib import msgpack import pytest from muse.core.mpack import MPack, apply_mpack from muse.core.types import Manifest, NULL_COMMIT_ID from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id from muse.core.object_store import object_path from muse.core.commits import ( CommitDict, CommitRecord, read_commit, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) from muse.core.paths import commits_dir, muse_dir, snapshots_dir _TS = datetime.datetime(2024, 6, 15, 10, 0, 0, tzinfo=datetime.timezone.utc) def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: repo = tmp_path / "repo" repo.mkdir() muse_dir(repo).mkdir() (commits_dir(repo)).mkdir() (snapshots_dir(repo)).mkdir() return repo def _manifest_for(message: str) -> Manifest: # Use a sha256:-prefixed ID so validate_object_id passes inside apply_mpack. return {f"{message}.py": "sha256:" + "a" * 64} def _make_commit_record(message: str, parent: str | None = None) -> CommitRecord: manifest = _manifest_for(message) snap_id = compute_snapshot_id(manifest) parent_ids = [parent] if parent else [] cid = compute_commit_id( parent_ids=parent_ids, snapshot_id=snap_id, message=message, committed_at_iso=_TS.isoformat(), author="tester", ) return CommitRecord( commit_id=cid, branch="main", snapshot_id=snap_id, message=message, committed_at=_TS, author="tester", parent_commit_id=parent, parent2_commit_id=None, ) def _bundle_with_commits(commits: list[CommitRecord]) -> MPack: # Include snapshots so apply_mpack's missing-snapshot guard passes and # commits reach write_commit (where impostor OSError fires). snapshots = [ { "snapshot_id": c.snapshot_id, "parent_snapshot_id": None, "delta_upsert": _manifest_for(c.message), "delta_remove": [], "directories": [], } for c in commits ] return MPack(blobs=[], snapshots=snapshots, commits=[c.to_dict() for c in commits], tags=[]) def _write_impostor_file(repo: pathlib.Path, path_commit_id: str, content_commit_id: str) -> None: """Write a commit object at path_commit_id's object_path that contains content_commit_id inside.""" import json as _json impostor_data = { "commit_id": content_commit_id, "repo_id": "impostor", "branch": "main", "snapshot_id": NULL_COMMIT_ID, "message": "impostor", "committed_at": _TS.isoformat(), "parent_commit_id": None, "parent2_commit_id": None, "author": "attacker", } payload = _json.dumps(impostor_data, separators=(",", ":")).encode() path = object_path(repo, path_commit_id) path.parent.mkdir(parents=True, exist_ok=True) path.write_bytes(f"commit {len(payload)}\0".encode() + payload) # ────────────────────────────────────────────────────────────────────────────── # Bug 13: unhandled OSError from write_commit # ────────────────────────────────────────────────────────────────────────────── class TestApplyPackOsErrorIntegrity: def test_apply_pack_does_not_crash_on_store_integrity_violation(self, tmp_path: pathlib.Path) -> None: """Bug 13: apply_mpack must not propagate OSError from write_commit.""" repo = _make_repo(tmp_path) c = _make_commit_record("good-commit") # Poison the store: write an impostor at c.commit_id's path _write_impostor_file(repo, c.commit_id, "f" * 64) mpack = _bundle_with_commits([c]) # Before the fix: apply_mpack would raise OSError("Store integrity violation") # After the fix: apply_mpack must return normally (logs CRITICAL, skips commit) result = apply_mpack(repo, mpack) # must not raise assert result is not None def test_apply_pack_impostor_commit_not_readable( self, tmp_path: pathlib.Path, caplog: pytest.LogCaptureFixture ) -> None: """An impostor at the commit's object path blocks the legitimate commit from being read.""" repo = _make_repo(tmp_path) c = _make_commit_record("critical-log-commit") _write_impostor_file(repo, c.commit_id, "e" * 64) mpack = _bundle_with_commits([c]) result = apply_mpack(repo, mpack) # must not raise assert result is not None # Impostor blocks the legitimate commit — read_commit returns None (hash mismatch) assert read_commit(repo, c.commit_id) is None def test_apply_pack_continues_after_integrity_violation(self, tmp_path: pathlib.Path) -> None: """Remaining commits must be processed after an integrity-violation skip.""" repo = _make_repo(tmp_path) bad = _make_commit_record("bad-commit") good = _make_commit_record("good-commit-after") # Poison only the bad commit's file _write_impostor_file(repo, bad.commit_id, "d" * 64) mpack = _bundle_with_commits([bad, good]) result = apply_mpack(repo, mpack) # good commit must be written despite the preceding integrity violation assert read_commit(repo, good.commit_id) is not None, ( "apply_mpack must continue processing commits after an integrity violation" ) def test_apply_pack_commits_written_excludes_impostor_blocked(self, tmp_path: pathlib.Path) -> None: """commits_written must not include commits whose object_path is already occupied.""" repo = _make_repo(tmp_path) bad = _make_commit_record("integrity-violation") good = _make_commit_record("normal-commit") _write_impostor_file(repo, bad.commit_id, "c" * 64) mpack = _bundle_with_commits([bad, good]) result = apply_mpack(repo, mpack) # bad commit's path is occupied by impostor — skipped (not counted as written) # good commit written normally assert result["commits_written"] == 1, ( f"commits_written should be 1 (only good), got {result['commits_written']}" ) def test_impostor_file_not_overwritten_by_apply_mpack(self, tmp_path: pathlib.Path) -> None: """apply_mpack must NOT replace the impostor file — write_commit is idempotent.""" import json as _json repo = _make_repo(tmp_path) c = _make_commit_record("legit-commit") impostor_id = "b" * 64 _write_impostor_file(repo, c.commit_id, impostor_id) mpack = _bundle_with_commits([c]) apply_mpack(repo, mpack) # The object file still contains the impostor — write_commit skips existing objects path = object_path(repo, c.commit_id) raw_bytes = path.read_bytes() null_idx = raw_bytes.index(b"\0") data = _json.loads(raw_bytes[null_idx + 1:]) assert data["commit_id"] == impostor_id, ( "apply_mpack must not overwrite an impostor file — that would hide the " "integrity violation from the user" ) def test_apply_pack_valid_commit_no_integrity_violation(self, tmp_path: pathlib.Path) -> None: """Regression: valid commits must still be written normally.""" repo = _make_repo(tmp_path) c = _make_commit_record("clean-commit") mpack = _bundle_with_commits([c]) result = apply_mpack(repo, mpack) assert result["commits_written"] == 1 assert read_commit(repo, c.commit_id) is not None def test_apply_pack_wrong_hash_raises_valuerror_not_oserror(self, tmp_path: pathlib.Path) -> None: """Commits with mismatched commit_id hash raise ValueError (caught separately).""" repo = _make_repo(tmp_path) # Bad record: commit_id doesn't match content hash bad_dict = CommitDict( commit_id="f" * 64, # wrong hash branch="main", snapshot_id="a" * 64, message="bad", committed_at=_TS.isoformat(), parent_commit_id=None, parent2_commit_id=None, author="tester", ) mpack = MPack(blobs=[], snapshots=[], commits=[bad_dict], tags=[]) result = apply_mpack(repo, mpack) # must not raise assert result["commits_written"] == 0