"""Tests for Bug 13: apply_pack propagates unhandled OSError from write_commit. Root cause: apply_pack's commit loop catches (KeyError, ValueError, TypeError) but NOT OSError. write_commit raises OSError("Store integrity violation") when an existing commit file contains a DIFFERENT commit_id than the incoming one — i.e. an impostor file. When this condition exists in the local store, apply_pack propagates the unhandled OSError and the entire pull/push/clone call stack crashes with an unhandled exception rather than logging CRITICAL and continuing. Concrete attack / failure scenario: 1. Local store has commit file abc.msgpack containing impostor bytes (different commit_id inside the file than the filename implies). 2. A bundle arrives containing the legitimate abc commit. 3. write_commit reads the existing file, detects commit_id mismatch, raises OSError("Store integrity violation"). 4. apply_pack (before fix) propagates the OSError → muse pull / muse clone crashes with an unhandled exception. Scope of tests -------------- - apply_pack with impostor commit file does not crash (no uncaught OSError) - apply_pack logs CRITICAL when OSError is raised by write_commit - apply_pack continues processing remaining commits after OSError on one - apply_pack correctly reports commits_written for non-affected commits - The impostor file is NOT overwritten by the bundle commit (safety) - apply_pack raises ValueError (not OSError) for commits with wrong hash """ from __future__ import annotations import datetime import logging import pathlib import msgpack import pytest from muse.core.pack import PackBundle, apply_pack from muse.core.snapshot import compute_commit_id, compute_snapshot_id from muse.core.store import ( CommitDict, CommitRecord, SnapshotRecord, read_commit, write_commit, write_snapshot, ) _TS = datetime.datetime(2024, 6, 15, 10, 0, 0, tzinfo=datetime.timezone.utc) def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: repo = tmp_path / "repo" repo.mkdir() (repo / ".muse").mkdir() (repo / ".muse" / "commits").mkdir() (repo / ".muse" / "snapshots").mkdir() return repo def _make_commit_record(message: str, parent: str | None = None) -> CommitRecord: manifest = {f"{message}.py": "a" * 64} snap_id = compute_snapshot_id(manifest) parent_ids = [parent] if parent else [] cid = compute_commit_id(parent_ids, snap_id, message, _TS.isoformat()) return CommitRecord( commit_id=cid, repo_id="test-repo", branch="main", snapshot_id=snap_id, message=message, committed_at=_TS, author="tester", parent_commit_id=parent, parent2_commit_id=None, ) def _bundle_with_commits(commits: list[CommitRecord]) -> PackBundle: return PackBundle(objects=[], snapshots=[], commits=[c.to_dict() for c in commits], tags=[]) def _write_impostor_file(repo: pathlib.Path, path_commit_id: str, content_commit_id: str) -> None: """Write a commit file at path_commit_id.msgpack that contains content_commit_id inside.""" impostor_data = { "commit_id": content_commit_id, "repo_id": "impostor", "branch": "main", "snapshot_id": "0" * 64, "message": "impostor", "committed_at": _TS.isoformat(), "parent_commit_id": None, "parent2_commit_id": None, "author": "attacker", } path = repo / ".muse" / "commits" / f"{path_commit_id}.msgpack" path.write_bytes(msgpack.packb(impostor_data, use_bin_type=True)) # ────────────────────────────────────────────────────────────────────────────── # Bug 13: unhandled OSError from write_commit # ────────────────────────────────────────────────────────────────────────────── class TestApplyPackOsErrorIntegrity: def test_apply_pack_does_not_crash_on_store_integrity_violation(self, tmp_path: pathlib.Path) -> None: """Bug 13: apply_pack must not propagate OSError from write_commit.""" repo = _make_repo(tmp_path) c = _make_commit_record("good-commit") # Poison the store: write an impostor at c.commit_id's path _write_impostor_file(repo, c.commit_id, "f" * 64) bundle = _bundle_with_commits([c]) # Before the fix: apply_pack would raise OSError("Store integrity violation") # After the fix: apply_pack must return normally (logs CRITICAL, skips commit) result = apply_pack(repo, bundle) # must not raise assert result is not None def test_apply_pack_logs_critical_on_store_integrity_violation( self, tmp_path: pathlib.Path, caplog: pytest.LogCaptureFixture ) -> None: """apply_pack must log CRITICAL (not swallow silently) when OSError is raised.""" repo = _make_repo(tmp_path) c = _make_commit_record("critical-log-commit") _write_impostor_file(repo, c.commit_id, "e" * 64) bundle = _bundle_with_commits([c]) with caplog.at_level(logging.CRITICAL, logger="muse.core.pack"): apply_pack(repo, bundle) crits = [r for r in caplog.records if r.levelno >= logging.CRITICAL] assert crits, "apply_pack must log CRITICAL when write_commit raises OSError" assert any("integrity" in r.message.lower() or "violation" in r.message.lower() for r in crits) def test_apply_pack_continues_after_integrity_violation(self, tmp_path: pathlib.Path) -> None: """Remaining commits must be processed after an integrity-violation skip.""" repo = _make_repo(tmp_path) bad = _make_commit_record("bad-commit") good = _make_commit_record("good-commit-after") # Poison only the bad commit's file _write_impostor_file(repo, bad.commit_id, "d" * 64) bundle = _bundle_with_commits([bad, good]) result = apply_pack(repo, bundle) # good commit must be written despite the preceding integrity violation assert read_commit(repo, good.commit_id) is not None, ( "apply_pack must continue processing commits after an integrity violation" ) def test_apply_pack_commits_written_excludes_integrity_violation(self, tmp_path: pathlib.Path) -> None: """commits_written must not include commits that triggered OSError.""" repo = _make_repo(tmp_path) bad = _make_commit_record("integrity-violation") good = _make_commit_record("normal-commit") _write_impostor_file(repo, bad.commit_id, "c" * 64) bundle = _bundle_with_commits([bad, good]) result = apply_pack(repo, bundle) # Only the good commit should be counted as written assert result["commits_written"] == 1, ( f"commits_written should be 1 (only good), got {result['commits_written']}" ) def test_impostor_file_not_overwritten_by_apply_pack(self, tmp_path: pathlib.Path) -> None: """apply_pack must NOT replace the impostor file — that would hide the violation.""" repo = _make_repo(tmp_path) c = _make_commit_record("legit-commit") impostor_id = "b" * 64 _write_impostor_file(repo, c.commit_id, impostor_id) bundle = _bundle_with_commits([c]) apply_pack(repo, bundle) # The file still contains the impostor — write_commit's OSError branch # does NOT overwrite (only write_commit with a valid incoming record # and a corrupt EXISTING file overwrites via the except Exception branch). # apply_pack catches the OSError and skips — no write happens. path = repo / ".muse" / "commits" / f"{c.commit_id}.msgpack" raw = msgpack.unpackb(path.read_bytes(), raw=False) assert raw["commit_id"] == impostor_id, ( "apply_pack must not overwrite an impostor file — that would hide the " "integrity violation from the user" ) def test_apply_pack_valid_commit_no_integrity_violation(self, tmp_path: pathlib.Path) -> None: """Regression: valid commits must still be written normally.""" repo = _make_repo(tmp_path) c = _make_commit_record("clean-commit") bundle = _bundle_with_commits([c]) result = apply_pack(repo, bundle) assert result["commits_written"] == 1 assert read_commit(repo, c.commit_id) is not None def test_apply_pack_wrong_hash_raises_valuerror_not_oserror(self, tmp_path: pathlib.Path) -> None: """Commits with mismatched commit_id hash raise ValueError (caught separately).""" repo = _make_repo(tmp_path) # Bad record: commit_id doesn't match content hash bad_dict = CommitDict( commit_id="f" * 64, # wrong hash repo_id="test-repo", branch="main", snapshot_id="a" * 64, message="bad", committed_at=_TS.isoformat(), parent_commit_id=None, parent2_commit_id=None, author="tester", ) bundle = PackBundle(objects=[], snapshots=[], commits=[bad_dict], tags=[]) result = apply_pack(repo, bundle) # must not raise assert result["commits_written"] == 0