test_mpack_oserror_integrity.py
python
sha256:8860dea10c653956b613a814cc752a6d34cb3986cdf16749a49172affdabf045
fix tests
Human
minor
⚠ breaking
4 days ago
| 1 | """Tests for Bug 13: apply_mpack propagates unhandled OSError from write_commit. |
| 2 | |
| 3 | Root cause: apply_mpack's commit loop catches (KeyError, ValueError, TypeError) |
| 4 | but NOT OSError. write_commit raises OSError("Store integrity violation") when |
| 5 | an existing commit file contains a DIFFERENT commit_id than the incoming one — |
| 6 | i.e. an impostor file. When this condition exists in the local store, apply_mpack |
| 7 | propagates the unhandled OSError and the entire pull/push/clone call stack crashes |
| 8 | with an unhandled exception rather than logging CRITICAL and continuing. |
| 9 | |
| 10 | Concrete attack / failure scenario: |
| 11 | 1. Local store has commit file abc.msgpack containing impostor bytes |
| 12 | (different commit_id inside the file than the filename implies). |
| 13 | 2. A mpack arrives containing the legitimate abc commit. |
| 14 | 3. write_commit reads the existing file, detects commit_id mismatch, raises |
| 15 | OSError("Store integrity violation"). |
| 16 | 4. apply_mpack (before fix) propagates the OSError → muse pull / muse clone |
| 17 | crashes with an unhandled exception. |
| 18 | |
| 19 | Scope of tests |
| 20 | -------------- |
| 21 | - apply_mpack with impostor commit file does not crash (no uncaught OSError) |
| 22 | - apply_mpack logs CRITICAL when OSError is raised by write_commit |
| 23 | - apply_mpack continues processing remaining commits after OSError on one |
| 24 | - apply_mpack correctly reports commits_written for non-affected commits |
| 25 | - The impostor file is NOT overwritten by the mpack commit (safety) |
| 26 | - apply_mpack raises ValueError (not OSError) for commits with wrong hash |
| 27 | """ |
| 28 | from __future__ import annotations |
| 29 | |
| 30 | import datetime |
| 31 | import logging |
| 32 | import pathlib |
| 33 | |
| 34 | import msgpack |
| 35 | import pytest |
| 36 | |
| 37 | from muse.core.mpack import MPack, apply_mpack |
| 38 | from muse.core.types import Manifest, NULL_COMMIT_ID |
| 39 | from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id |
| 40 | from muse.core.object_store import object_path |
| 41 | from muse.core.commits import ( |
| 42 | CommitDict, |
| 43 | CommitRecord, |
| 44 | read_commit, |
| 45 | write_commit, |
| 46 | ) |
| 47 | from muse.core.snapshots import ( |
| 48 | SnapshotRecord, |
| 49 | write_snapshot, |
| 50 | ) |
| 51 | from muse.core.paths import commits_dir, muse_dir, snapshots_dir |
| 52 | |
| 53 | _TS = datetime.datetime(2024, 6, 15, 10, 0, 0, tzinfo=datetime.timezone.utc) |
| 54 | |
| 55 | |
| 56 | def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 57 | repo = tmp_path / "repo" |
| 58 | repo.mkdir() |
| 59 | muse_dir(repo).mkdir() |
| 60 | (commits_dir(repo)).mkdir() |
| 61 | (snapshots_dir(repo)).mkdir() |
| 62 | return repo |
| 63 | |
| 64 | |
| 65 | def _manifest_for(message: str) -> Manifest: |
| 66 | # Use a sha256:-prefixed ID so validate_object_id passes inside apply_mpack. |
| 67 | return {f"{message}.py": "sha256:" + "a" * 64} |
| 68 | |
| 69 | |
| 70 | def _make_commit_record(message: str, parent: str | None = None) -> CommitRecord: |
| 71 | manifest = _manifest_for(message) |
| 72 | snap_id = compute_snapshot_id(manifest) |
| 73 | parent_ids = [parent] if parent else [] |
| 74 | cid = compute_commit_id( |
| 75 | parent_ids=parent_ids, |
| 76 | snapshot_id=snap_id, |
| 77 | message=message, |
| 78 | committed_at_iso=_TS.isoformat(), |
| 79 | author="tester", |
| 80 | ) |
| 81 | return CommitRecord( |
| 82 | commit_id=cid, |
| 83 | branch="main", |
| 84 | snapshot_id=snap_id, |
| 85 | message=message, |
| 86 | committed_at=_TS, |
| 87 | author="tester", |
| 88 | parent_commit_id=parent, |
| 89 | parent2_commit_id=None, |
| 90 | ) |
| 91 | |
| 92 | |
| 93 | def _bundle_with_commits(commits: list[CommitRecord]) -> MPack: |
| 94 | # Include snapshots so apply_mpack's missing-snapshot guard passes and |
| 95 | # commits reach write_commit (where impostor OSError fires). |
| 96 | snapshots = [ |
| 97 | { |
| 98 | "snapshot_id": c.snapshot_id, |
| 99 | "parent_snapshot_id": None, |
| 100 | "delta_upsert": _manifest_for(c.message), |
| 101 | "delta_remove": [], |
| 102 | "directories": [], |
| 103 | } |
| 104 | for c in commits |
| 105 | ] |
| 106 | return MPack(blobs=[], snapshots=snapshots, commits=[c.to_dict() for c in commits], tags=[]) |
| 107 | |
| 108 | |
| 109 | def _write_impostor_file(repo: pathlib.Path, path_commit_id: str, content_commit_id: str) -> None: |
| 110 | """Write a commit object at path_commit_id's object_path that contains content_commit_id inside.""" |
| 111 | import json as _json |
| 112 | impostor_data = { |
| 113 | "commit_id": content_commit_id, |
| 114 | "repo_id": "impostor", |
| 115 | "branch": "main", |
| 116 | "snapshot_id": NULL_COMMIT_ID, |
| 117 | "message": "impostor", |
| 118 | "committed_at": _TS.isoformat(), |
| 119 | "parent_commit_id": None, |
| 120 | "parent2_commit_id": None, |
| 121 | "author": "attacker", |
| 122 | } |
| 123 | payload = _json.dumps(impostor_data, separators=(",", ":")).encode() |
| 124 | path = object_path(repo, path_commit_id) |
| 125 | path.parent.mkdir(parents=True, exist_ok=True) |
| 126 | path.write_bytes(f"commit {len(payload)}\0".encode() + payload) |
| 127 | |
| 128 | |
| 129 | # ────────────────────────────────────────────────────────────────────────────── |
| 130 | # Bug 13: unhandled OSError from write_commit |
| 131 | # ────────────────────────────────────────────────────────────────────────────── |
| 132 | |
| 133 | class TestApplyPackOsErrorIntegrity: |
| 134 | |
| 135 | def test_apply_pack_does_not_crash_on_store_integrity_violation(self, tmp_path: pathlib.Path) -> None: |
| 136 | """Bug 13: apply_mpack must not propagate OSError from write_commit.""" |
| 137 | repo = _make_repo(tmp_path) |
| 138 | c = _make_commit_record("good-commit") |
| 139 | |
| 140 | # Poison the store: write an impostor at c.commit_id's path |
| 141 | _write_impostor_file(repo, c.commit_id, "f" * 64) |
| 142 | |
| 143 | mpack = _bundle_with_commits([c]) |
| 144 | # Before the fix: apply_mpack would raise OSError("Store integrity violation") |
| 145 | # After the fix: apply_mpack must return normally (logs CRITICAL, skips commit) |
| 146 | result = apply_mpack(repo, mpack) # must not raise |
| 147 | assert result is not None |
| 148 | |
| 149 | def test_apply_pack_impostor_commit_not_readable( |
| 150 | self, tmp_path: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 151 | ) -> None: |
| 152 | """An impostor at the commit's object path blocks the legitimate commit from being read.""" |
| 153 | repo = _make_repo(tmp_path) |
| 154 | c = _make_commit_record("critical-log-commit") |
| 155 | _write_impostor_file(repo, c.commit_id, "e" * 64) |
| 156 | |
| 157 | mpack = _bundle_with_commits([c]) |
| 158 | result = apply_mpack(repo, mpack) # must not raise |
| 159 | assert result is not None |
| 160 | # Impostor blocks the legitimate commit — read_commit returns None (hash mismatch) |
| 161 | assert read_commit(repo, c.commit_id) is None |
| 162 | |
| 163 | def test_apply_pack_continues_after_integrity_violation(self, tmp_path: pathlib.Path) -> None: |
| 164 | """Remaining commits must be processed after an integrity-violation skip.""" |
| 165 | repo = _make_repo(tmp_path) |
| 166 | bad = _make_commit_record("bad-commit") |
| 167 | good = _make_commit_record("good-commit-after") |
| 168 | |
| 169 | # Poison only the bad commit's file |
| 170 | _write_impostor_file(repo, bad.commit_id, "d" * 64) |
| 171 | |
| 172 | mpack = _bundle_with_commits([bad, good]) |
| 173 | result = apply_mpack(repo, mpack) |
| 174 | |
| 175 | # good commit must be written despite the preceding integrity violation |
| 176 | assert read_commit(repo, good.commit_id) is not None, ( |
| 177 | "apply_mpack must continue processing commits after an integrity violation" |
| 178 | ) |
| 179 | |
| 180 | def test_apply_pack_commits_written_excludes_impostor_blocked(self, tmp_path: pathlib.Path) -> None: |
| 181 | """commits_written must not include commits whose object_path is already occupied.""" |
| 182 | repo = _make_repo(tmp_path) |
| 183 | bad = _make_commit_record("integrity-violation") |
| 184 | good = _make_commit_record("normal-commit") |
| 185 | |
| 186 | _write_impostor_file(repo, bad.commit_id, "c" * 64) |
| 187 | |
| 188 | mpack = _bundle_with_commits([bad, good]) |
| 189 | result = apply_mpack(repo, mpack) |
| 190 | |
| 191 | # bad commit's path is occupied by impostor — skipped (not counted as written) |
| 192 | # good commit written normally |
| 193 | assert result["commits_written"] == 1, ( |
| 194 | f"commits_written should be 1 (only good), got {result['commits_written']}" |
| 195 | ) |
| 196 | |
| 197 | def test_impostor_file_not_overwritten_by_apply_mpack(self, tmp_path: pathlib.Path) -> None: |
| 198 | """apply_mpack must NOT replace the impostor file — write_commit is idempotent.""" |
| 199 | import json as _json |
| 200 | repo = _make_repo(tmp_path) |
| 201 | c = _make_commit_record("legit-commit") |
| 202 | impostor_id = "b" * 64 |
| 203 | _write_impostor_file(repo, c.commit_id, impostor_id) |
| 204 | |
| 205 | mpack = _bundle_with_commits([c]) |
| 206 | apply_mpack(repo, mpack) |
| 207 | |
| 208 | # The object file still contains the impostor — write_commit skips existing objects |
| 209 | path = object_path(repo, c.commit_id) |
| 210 | raw_bytes = path.read_bytes() |
| 211 | null_idx = raw_bytes.index(b"\0") |
| 212 | data = _json.loads(raw_bytes[null_idx + 1:]) |
| 213 | assert data["commit_id"] == impostor_id, ( |
| 214 | "apply_mpack must not overwrite an impostor file — that would hide the " |
| 215 | "integrity violation from the user" |
| 216 | ) |
| 217 | |
| 218 | def test_apply_pack_valid_commit_no_integrity_violation(self, tmp_path: pathlib.Path) -> None: |
| 219 | """Regression: valid commits must still be written normally.""" |
| 220 | repo = _make_repo(tmp_path) |
| 221 | c = _make_commit_record("clean-commit") |
| 222 | mpack = _bundle_with_commits([c]) |
| 223 | result = apply_mpack(repo, mpack) |
| 224 | |
| 225 | assert result["commits_written"] == 1 |
| 226 | assert read_commit(repo, c.commit_id) is not None |
| 227 | |
| 228 | def test_apply_pack_wrong_hash_raises_valuerror_not_oserror(self, tmp_path: pathlib.Path) -> None: |
| 229 | """Commits with mismatched commit_id hash raise ValueError (caught separately).""" |
| 230 | repo = _make_repo(tmp_path) |
| 231 | # Bad record: commit_id doesn't match content hash |
| 232 | bad_dict = CommitDict( |
| 233 | commit_id="f" * 64, # wrong hash |
| 234 | branch="main", |
| 235 | snapshot_id="a" * 64, |
| 236 | message="bad", |
| 237 | committed_at=_TS.isoformat(), |
| 238 | parent_commit_id=None, |
| 239 | parent2_commit_id=None, |
| 240 | author="tester", |
| 241 | ) |
| 242 | mpack = MPack(blobs=[], snapshots=[], commits=[bad_dict], tags=[]) |
| 243 | result = apply_mpack(repo, mpack) # must not raise |
| 244 | assert result["commits_written"] == 0 |
File History
1 commit
sha256:8860dea10c653956b613a814cc752a6d34cb3986cdf16749a49172affdabf045
fix tests
Human
minor
⚠
4 days ago