gabriel / muse public
test_mpack_oserror_integrity.py python
227 lines 9.4 KB
Raw
sha256:8860dea10c653956b613a814cc752a6d34cb3986cdf16749a49172affdabf045 fix tests Human minor ⚠ breaking 15 days ago
1 """Tests for Bug 13: apply_mpack propagates unhandled OSError from write_commit.
2
3 Root cause: apply_mpack's commit loop catches (KeyError, ValueError, TypeError)
4 but NOT OSError. write_commit raises OSError("Store integrity violation") when
5 an existing commit file contains a DIFFERENT commit_id than the incoming one —
6 i.e. an impostor file. When this condition exists in the local store, apply_mpack
7 propagates the unhandled OSError and the entire pull/push/clone call stack crashes
8 with an unhandled exception rather than logging CRITICAL and continuing.
9
10 Concrete attack / failure scenario:
11 1. Local store has commit file abc.msgpack containing impostor bytes
12 (different commit_id inside the file than the filename implies).
13 2. A mpack arrives containing the legitimate abc commit.
14 3. write_commit reads the existing file, detects commit_id mismatch, raises
15 OSError("Store integrity violation").
16 4. apply_mpack (before fix) propagates the OSError → muse pull / muse clone
17 crashes with an unhandled exception.
18
19 Scope of tests
20 --------------
21 - apply_mpack with impostor commit file does not crash (no uncaught OSError)
22 - apply_mpack logs CRITICAL when OSError is raised by write_commit
23 - apply_mpack continues processing remaining commits after OSError on one
24 - apply_mpack correctly reports commits_written for non-affected commits
25 - The impostor file is NOT overwritten by the mpack commit (safety)
26 - apply_mpack raises ValueError (not OSError) for commits with wrong hash
27 """
28 from __future__ import annotations
29
30 import datetime
31 import logging
32 import pathlib
33
34 import msgpack
35 import pytest
36
37 from muse.core.mpack import MPack, apply_mpack
38 from muse.core.types import NULL_COMMIT_ID
39 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
40 from muse.core.object_store import object_path
41 from muse.core.commits import (
42 CommitDict,
43 CommitRecord,
44 read_commit,
45 write_commit,
46 )
47 from muse.core.snapshots import (
48 SnapshotRecord,
49 write_snapshot,
50 )
51 from muse.core.paths import commits_dir, muse_dir, snapshots_dir
52
53 _TS = datetime.datetime(2024, 6, 15, 10, 0, 0, tzinfo=datetime.timezone.utc)
54
55
56 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
57 repo = tmp_path / "repo"
58 repo.mkdir()
59 muse_dir(repo).mkdir()
60 (commits_dir(repo)).mkdir()
61 (snapshots_dir(repo)).mkdir()
62 return repo
63
64
65 def _make_commit_record(message: str, parent: str | None = None) -> CommitRecord:
66 manifest = {f"{message}.py": "a" * 64}
67 snap_id = compute_snapshot_id(manifest)
68 parent_ids = [parent] if parent else []
69 cid = compute_commit_id(
70 parent_ids=parent_ids,
71 snapshot_id=snap_id,
72 message=message,
73 committed_at_iso=_TS.isoformat(),
74 author="tester",
75 )
76 return CommitRecord(
77 commit_id=cid,
78 branch="main",
79 snapshot_id=snap_id,
80 message=message,
81 committed_at=_TS,
82 author="tester",
83 parent_commit_id=parent,
84 parent2_commit_id=None,
85 )
86
87
88 def _bundle_with_commits(commits: list[CommitRecord]) -> MPack:
89 return MPack(blobs=[], snapshots=[], commits=[c.to_dict() for c in commits], tags=[])
90
91
92 def _write_impostor_file(repo: pathlib.Path, path_commit_id: str, content_commit_id: str) -> None:
93 """Write a commit object at path_commit_id's object_path that contains content_commit_id inside."""
94 import json as _json
95 impostor_data = {
96 "commit_id": content_commit_id,
97 "repo_id": "impostor",
98 "branch": "main",
99 "snapshot_id": NULL_COMMIT_ID,
100 "message": "impostor",
101 "committed_at": _TS.isoformat(),
102 "parent_commit_id": None,
103 "parent2_commit_id": None,
104 "author": "attacker",
105 }
106 payload = _json.dumps(impostor_data, separators=(",", ":")).encode()
107 path = object_path(repo, path_commit_id)
108 path.parent.mkdir(parents=True, exist_ok=True)
109 path.write_bytes(f"commit {len(payload)}\0".encode() + payload)
110
111
112 # ──────────────────────────────────────────────────────────────────────────────
113 # Bug 13: unhandled OSError from write_commit
114 # ──────────────────────────────────────────────────────────────────────────────
115
116 class TestApplyPackOsErrorIntegrity:
117
118 def test_apply_pack_does_not_crash_on_store_integrity_violation(self, tmp_path: pathlib.Path) -> None:
119 """Bug 13: apply_mpack must not propagate OSError from write_commit."""
120 repo = _make_repo(tmp_path)
121 c = _make_commit_record("good-commit")
122
123 # Poison the store: write an impostor at c.commit_id's path
124 _write_impostor_file(repo, c.commit_id, "f" * 64)
125
126 mpack = _bundle_with_commits([c])
127 # Before the fix: apply_mpack would raise OSError("Store integrity violation")
128 # After the fix: apply_mpack must return normally (logs CRITICAL, skips commit)
129 result = apply_mpack(repo, mpack) # must not raise
130 assert result is not None
131
132 def test_apply_pack_impostor_commit_not_readable(
133 self, tmp_path: pathlib.Path, caplog: pytest.LogCaptureFixture
134 ) -> None:
135 """An impostor at the commit's object path blocks the legitimate commit from being read."""
136 repo = _make_repo(tmp_path)
137 c = _make_commit_record("critical-log-commit")
138 _write_impostor_file(repo, c.commit_id, "e" * 64)
139
140 mpack = _bundle_with_commits([c])
141 result = apply_mpack(repo, mpack) # must not raise
142 assert result is not None
143 # Impostor blocks the legitimate commit — read_commit returns None (hash mismatch)
144 assert read_commit(repo, c.commit_id) is None
145
146 def test_apply_pack_continues_after_integrity_violation(self, tmp_path: pathlib.Path) -> None:
147 """Remaining commits must be processed after an integrity-violation skip."""
148 repo = _make_repo(tmp_path)
149 bad = _make_commit_record("bad-commit")
150 good = _make_commit_record("good-commit-after")
151
152 # Poison only the bad commit's file
153 _write_impostor_file(repo, bad.commit_id, "d" * 64)
154
155 mpack = _bundle_with_commits([bad, good])
156 result = apply_mpack(repo, mpack)
157
158 # good commit must be written despite the preceding integrity violation
159 assert read_commit(repo, good.commit_id) is not None, (
160 "apply_mpack must continue processing commits after an integrity violation"
161 )
162
163 def test_apply_pack_commits_written_excludes_impostor_blocked(self, tmp_path: pathlib.Path) -> None:
164 """commits_written must not include commits whose object_path is already occupied."""
165 repo = _make_repo(tmp_path)
166 bad = _make_commit_record("integrity-violation")
167 good = _make_commit_record("normal-commit")
168
169 _write_impostor_file(repo, bad.commit_id, "c" * 64)
170
171 mpack = _bundle_with_commits([bad, good])
172 result = apply_mpack(repo, mpack)
173
174 # bad commit's path is occupied by impostor — skipped (not counted as written)
175 # good commit written normally
176 assert result["commits_written"] == 1, (
177 f"commits_written should be 1 (only good), got {result['commits_written']}"
178 )
179
180 def test_impostor_file_not_overwritten_by_apply_mpack(self, tmp_path: pathlib.Path) -> None:
181 """apply_mpack must NOT replace the impostor file — write_commit is idempotent."""
182 import json as _json
183 repo = _make_repo(tmp_path)
184 c = _make_commit_record("legit-commit")
185 impostor_id = "b" * 64
186 _write_impostor_file(repo, c.commit_id, impostor_id)
187
188 mpack = _bundle_with_commits([c])
189 apply_mpack(repo, mpack)
190
191 # The object file still contains the impostor — write_commit skips existing objects
192 path = object_path(repo, c.commit_id)
193 raw_bytes = path.read_bytes()
194 null_idx = raw_bytes.index(b"\0")
195 data = _json.loads(raw_bytes[null_idx + 1:])
196 assert data["commit_id"] == impostor_id, (
197 "apply_mpack must not overwrite an impostor file — that would hide the "
198 "integrity violation from the user"
199 )
200
201 def test_apply_pack_valid_commit_no_integrity_violation(self, tmp_path: pathlib.Path) -> None:
202 """Regression: valid commits must still be written normally."""
203 repo = _make_repo(tmp_path)
204 c = _make_commit_record("clean-commit")
205 mpack = _bundle_with_commits([c])
206 result = apply_mpack(repo, mpack)
207
208 assert result["commits_written"] == 1
209 assert read_commit(repo, c.commit_id) is not None
210
211 def test_apply_pack_wrong_hash_raises_valuerror_not_oserror(self, tmp_path: pathlib.Path) -> None:
212 """Commits with mismatched commit_id hash raise ValueError (caught separately)."""
213 repo = _make_repo(tmp_path)
214 # Bad record: commit_id doesn't match content hash
215 bad_dict = CommitDict(
216 commit_id="f" * 64, # wrong hash
217 branch="main",
218 snapshot_id="a" * 64,
219 message="bad",
220 committed_at=_TS.isoformat(),
221 parent_commit_id=None,
222 parent2_commit_id=None,
223 author="tester",
224 )
225 mpack = MPack(blobs=[], snapshots=[], commits=[bad_dict], tags=[])
226 result = apply_mpack(repo, mpack) # must not raise
227 assert result["commits_written"] == 0
File History 1 commit