test_mpack_oserror_integrity.py
python
sha256:8860dea10c653956b613a814cc752a6d34cb3986cdf16749a49172affdabf045
fix tests
Human
minor
⚠ breaking
15 days ago
| 1 | """Tests for Bug 13: apply_mpack propagates unhandled OSError from write_commit. |
| 2 | |
| 3 | Root cause: apply_mpack's commit loop catches (KeyError, ValueError, TypeError) |
| 4 | but NOT OSError. write_commit raises OSError("Store integrity violation") when |
| 5 | an existing commit file contains a DIFFERENT commit_id than the incoming one — |
| 6 | i.e. an impostor file. When this condition exists in the local store, apply_mpack |
| 7 | propagates the unhandled OSError and the entire pull/push/clone call stack crashes |
| 8 | with an unhandled exception rather than logging CRITICAL and continuing. |
| 9 | |
| 10 | Concrete attack / failure scenario: |
| 11 | 1. Local store has commit file abc.msgpack containing impostor bytes |
| 12 | (different commit_id inside the file than the filename implies). |
| 13 | 2. A mpack arrives containing the legitimate abc commit. |
| 14 | 3. write_commit reads the existing file, detects commit_id mismatch, raises |
| 15 | OSError("Store integrity violation"). |
| 16 | 4. apply_mpack (before fix) propagates the OSError → muse pull / muse clone |
| 17 | crashes with an unhandled exception. |
| 18 | |
| 19 | Scope of tests |
| 20 | -------------- |
| 21 | - apply_mpack with impostor commit file does not crash (no uncaught OSError) |
| 22 | - apply_mpack logs CRITICAL when OSError is raised by write_commit |
| 23 | - apply_mpack continues processing remaining commits after OSError on one |
| 24 | - apply_mpack correctly reports commits_written for non-affected commits |
| 25 | - The impostor file is NOT overwritten by the mpack commit (safety) |
| 26 | - apply_mpack raises ValueError (not OSError) for commits with wrong hash |
| 27 | """ |
| 28 | from __future__ import annotations |
| 29 | |
| 30 | import datetime |
| 31 | import logging |
| 32 | import pathlib |
| 33 | |
| 34 | import msgpack |
| 35 | import pytest |
| 36 | |
| 37 | from muse.core.mpack import MPack, apply_mpack |
| 38 | from muse.core.types import NULL_COMMIT_ID |
| 39 | from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id |
| 40 | from muse.core.object_store import object_path |
| 41 | from muse.core.commits import ( |
| 42 | CommitDict, |
| 43 | CommitRecord, |
| 44 | read_commit, |
| 45 | write_commit, |
| 46 | ) |
| 47 | from muse.core.snapshots import ( |
| 48 | SnapshotRecord, |
| 49 | write_snapshot, |
| 50 | ) |
| 51 | from muse.core.paths import commits_dir, muse_dir, snapshots_dir |
| 52 | |
| 53 | _TS = datetime.datetime(2024, 6, 15, 10, 0, 0, tzinfo=datetime.timezone.utc) |
| 54 | |
| 55 | |
| 56 | def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 57 | repo = tmp_path / "repo" |
| 58 | repo.mkdir() |
| 59 | muse_dir(repo).mkdir() |
| 60 | (commits_dir(repo)).mkdir() |
| 61 | (snapshots_dir(repo)).mkdir() |
| 62 | return repo |
| 63 | |
| 64 | |
| 65 | def _make_commit_record(message: str, parent: str | None = None) -> CommitRecord: |
| 66 | manifest = {f"{message}.py": "a" * 64} |
| 67 | snap_id = compute_snapshot_id(manifest) |
| 68 | parent_ids = [parent] if parent else [] |
| 69 | cid = compute_commit_id( |
| 70 | parent_ids=parent_ids, |
| 71 | snapshot_id=snap_id, |
| 72 | message=message, |
| 73 | committed_at_iso=_TS.isoformat(), |
| 74 | author="tester", |
| 75 | ) |
| 76 | return CommitRecord( |
| 77 | commit_id=cid, |
| 78 | branch="main", |
| 79 | snapshot_id=snap_id, |
| 80 | message=message, |
| 81 | committed_at=_TS, |
| 82 | author="tester", |
| 83 | parent_commit_id=parent, |
| 84 | parent2_commit_id=None, |
| 85 | ) |
| 86 | |
| 87 | |
| 88 | def _bundle_with_commits(commits: list[CommitRecord]) -> MPack: |
| 89 | return MPack(blobs=[], snapshots=[], commits=[c.to_dict() for c in commits], tags=[]) |
| 90 | |
| 91 | |
| 92 | def _write_impostor_file(repo: pathlib.Path, path_commit_id: str, content_commit_id: str) -> None: |
| 93 | """Write a commit object at path_commit_id's object_path that contains content_commit_id inside.""" |
| 94 | import json as _json |
| 95 | impostor_data = { |
| 96 | "commit_id": content_commit_id, |
| 97 | "repo_id": "impostor", |
| 98 | "branch": "main", |
| 99 | "snapshot_id": NULL_COMMIT_ID, |
| 100 | "message": "impostor", |
| 101 | "committed_at": _TS.isoformat(), |
| 102 | "parent_commit_id": None, |
| 103 | "parent2_commit_id": None, |
| 104 | "author": "attacker", |
| 105 | } |
| 106 | payload = _json.dumps(impostor_data, separators=(",", ":")).encode() |
| 107 | path = object_path(repo, path_commit_id) |
| 108 | path.parent.mkdir(parents=True, exist_ok=True) |
| 109 | path.write_bytes(f"commit {len(payload)}\0".encode() + payload) |
| 110 | |
| 111 | |
| 112 | # ────────────────────────────────────────────────────────────────────────────── |
| 113 | # Bug 13: unhandled OSError from write_commit |
| 114 | # ────────────────────────────────────────────────────────────────────────────── |
| 115 | |
| 116 | class TestApplyPackOsErrorIntegrity: |
| 117 | |
| 118 | def test_apply_pack_does_not_crash_on_store_integrity_violation(self, tmp_path: pathlib.Path) -> None: |
| 119 | """Bug 13: apply_mpack must not propagate OSError from write_commit.""" |
| 120 | repo = _make_repo(tmp_path) |
| 121 | c = _make_commit_record("good-commit") |
| 122 | |
| 123 | # Poison the store: write an impostor at c.commit_id's path |
| 124 | _write_impostor_file(repo, c.commit_id, "f" * 64) |
| 125 | |
| 126 | mpack = _bundle_with_commits([c]) |
| 127 | # Before the fix: apply_mpack would raise OSError("Store integrity violation") |
| 128 | # After the fix: apply_mpack must return normally (logs CRITICAL, skips commit) |
| 129 | result = apply_mpack(repo, mpack) # must not raise |
| 130 | assert result is not None |
| 131 | |
| 132 | def test_apply_pack_impostor_commit_not_readable( |
| 133 | self, tmp_path: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 134 | ) -> None: |
| 135 | """An impostor at the commit's object path blocks the legitimate commit from being read.""" |
| 136 | repo = _make_repo(tmp_path) |
| 137 | c = _make_commit_record("critical-log-commit") |
| 138 | _write_impostor_file(repo, c.commit_id, "e" * 64) |
| 139 | |
| 140 | mpack = _bundle_with_commits([c]) |
| 141 | result = apply_mpack(repo, mpack) # must not raise |
| 142 | assert result is not None |
| 143 | # Impostor blocks the legitimate commit — read_commit returns None (hash mismatch) |
| 144 | assert read_commit(repo, c.commit_id) is None |
| 145 | |
| 146 | def test_apply_pack_continues_after_integrity_violation(self, tmp_path: pathlib.Path) -> None: |
| 147 | """Remaining commits must be processed after an integrity-violation skip.""" |
| 148 | repo = _make_repo(tmp_path) |
| 149 | bad = _make_commit_record("bad-commit") |
| 150 | good = _make_commit_record("good-commit-after") |
| 151 | |
| 152 | # Poison only the bad commit's file |
| 153 | _write_impostor_file(repo, bad.commit_id, "d" * 64) |
| 154 | |
| 155 | mpack = _bundle_with_commits([bad, good]) |
| 156 | result = apply_mpack(repo, mpack) |
| 157 | |
| 158 | # good commit must be written despite the preceding integrity violation |
| 159 | assert read_commit(repo, good.commit_id) is not None, ( |
| 160 | "apply_mpack must continue processing commits after an integrity violation" |
| 161 | ) |
| 162 | |
| 163 | def test_apply_pack_commits_written_excludes_impostor_blocked(self, tmp_path: pathlib.Path) -> None: |
| 164 | """commits_written must not include commits whose object_path is already occupied.""" |
| 165 | repo = _make_repo(tmp_path) |
| 166 | bad = _make_commit_record("integrity-violation") |
| 167 | good = _make_commit_record("normal-commit") |
| 168 | |
| 169 | _write_impostor_file(repo, bad.commit_id, "c" * 64) |
| 170 | |
| 171 | mpack = _bundle_with_commits([bad, good]) |
| 172 | result = apply_mpack(repo, mpack) |
| 173 | |
| 174 | # bad commit's path is occupied by impostor — skipped (not counted as written) |
| 175 | # good commit written normally |
| 176 | assert result["commits_written"] == 1, ( |
| 177 | f"commits_written should be 1 (only good), got {result['commits_written']}" |
| 178 | ) |
| 179 | |
| 180 | def test_impostor_file_not_overwritten_by_apply_mpack(self, tmp_path: pathlib.Path) -> None: |
| 181 | """apply_mpack must NOT replace the impostor file — write_commit is idempotent.""" |
| 182 | import json as _json |
| 183 | repo = _make_repo(tmp_path) |
| 184 | c = _make_commit_record("legit-commit") |
| 185 | impostor_id = "b" * 64 |
| 186 | _write_impostor_file(repo, c.commit_id, impostor_id) |
| 187 | |
| 188 | mpack = _bundle_with_commits([c]) |
| 189 | apply_mpack(repo, mpack) |
| 190 | |
| 191 | # The object file still contains the impostor — write_commit skips existing objects |
| 192 | path = object_path(repo, c.commit_id) |
| 193 | raw_bytes = path.read_bytes() |
| 194 | null_idx = raw_bytes.index(b"\0") |
| 195 | data = _json.loads(raw_bytes[null_idx + 1:]) |
| 196 | assert data["commit_id"] == impostor_id, ( |
| 197 | "apply_mpack must not overwrite an impostor file — that would hide the " |
| 198 | "integrity violation from the user" |
| 199 | ) |
| 200 | |
| 201 | def test_apply_pack_valid_commit_no_integrity_violation(self, tmp_path: pathlib.Path) -> None: |
| 202 | """Regression: valid commits must still be written normally.""" |
| 203 | repo = _make_repo(tmp_path) |
| 204 | c = _make_commit_record("clean-commit") |
| 205 | mpack = _bundle_with_commits([c]) |
| 206 | result = apply_mpack(repo, mpack) |
| 207 | |
| 208 | assert result["commits_written"] == 1 |
| 209 | assert read_commit(repo, c.commit_id) is not None |
| 210 | |
| 211 | def test_apply_pack_wrong_hash_raises_valuerror_not_oserror(self, tmp_path: pathlib.Path) -> None: |
| 212 | """Commits with mismatched commit_id hash raise ValueError (caught separately).""" |
| 213 | repo = _make_repo(tmp_path) |
| 214 | # Bad record: commit_id doesn't match content hash |
| 215 | bad_dict = CommitDict( |
| 216 | commit_id="f" * 64, # wrong hash |
| 217 | branch="main", |
| 218 | snapshot_id="a" * 64, |
| 219 | message="bad", |
| 220 | committed_at=_TS.isoformat(), |
| 221 | parent_commit_id=None, |
| 222 | parent2_commit_id=None, |
| 223 | author="tester", |
| 224 | ) |
| 225 | mpack = MPack(blobs=[], snapshots=[], commits=[bad_dict], tags=[]) |
| 226 | result = apply_mpack(repo, mpack) # must not raise |
| 227 | assert result["commits_written"] == 0 |
File History
1 commit
sha256:8860dea10c653956b613a814cc752a6d34cb3986cdf16749a49172affdabf045
fix tests
Human
minor
⚠
15 days ago