gabriel / muse public
test_mpack_oserror_integrity.py python
244 lines 10.0 KB
Raw
sha256:8860dea10c653956b613a814cc752a6d34cb3986cdf16749a49172affdabf045 fix tests Human minor ⚠ breaking 4 days ago
1 """Tests for Bug 13: apply_mpack propagates unhandled OSError from write_commit.
2
3 Root cause: apply_mpack's commit loop catches (KeyError, ValueError, TypeError)
4 but NOT OSError. write_commit raises OSError("Store integrity violation") when
5 an existing commit file contains a DIFFERENT commit_id than the incoming one —
6 i.e. an impostor file. When this condition exists in the local store, apply_mpack
7 propagates the unhandled OSError and the entire pull/push/clone call stack crashes
8 with an unhandled exception rather than logging CRITICAL and continuing.
9
10 Concrete attack / failure scenario:
11 1. Local store has commit file abc.msgpack containing impostor bytes
12 (different commit_id inside the file than the filename implies).
13 2. A mpack arrives containing the legitimate abc commit.
14 3. write_commit reads the existing file, detects commit_id mismatch, raises
15 OSError("Store integrity violation").
16 4. apply_mpack (before fix) propagates the OSError → muse pull / muse clone
17 crashes with an unhandled exception.
18
19 Scope of tests
20 --------------
21 - apply_mpack with impostor commit file does not crash (no uncaught OSError)
22 - apply_mpack logs CRITICAL when OSError is raised by write_commit
23 - apply_mpack continues processing remaining commits after OSError on one
24 - apply_mpack correctly reports commits_written for non-affected commits
25 - The impostor file is NOT overwritten by the mpack commit (safety)
26 - apply_mpack raises ValueError (not OSError) for commits with wrong hash
27 """
28 from __future__ import annotations
29
30 import datetime
31 import logging
32 import pathlib
33
34 import msgpack
35 import pytest
36
37 from muse.core.mpack import MPack, apply_mpack
38 from muse.core.types import Manifest, NULL_COMMIT_ID
39 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
40 from muse.core.object_store import object_path
41 from muse.core.commits import (
42 CommitDict,
43 CommitRecord,
44 read_commit,
45 write_commit,
46 )
47 from muse.core.snapshots import (
48 SnapshotRecord,
49 write_snapshot,
50 )
51 from muse.core.paths import commits_dir, muse_dir, snapshots_dir
52
53 _TS = datetime.datetime(2024, 6, 15, 10, 0, 0, tzinfo=datetime.timezone.utc)
54
55
56 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
57 repo = tmp_path / "repo"
58 repo.mkdir()
59 muse_dir(repo).mkdir()
60 (commits_dir(repo)).mkdir()
61 (snapshots_dir(repo)).mkdir()
62 return repo
63
64
65 def _manifest_for(message: str) -> Manifest:
66 # Use a sha256:-prefixed ID so validate_object_id passes inside apply_mpack.
67 return {f"{message}.py": "sha256:" + "a" * 64}
68
69
70 def _make_commit_record(message: str, parent: str | None = None) -> CommitRecord:
71 manifest = _manifest_for(message)
72 snap_id = compute_snapshot_id(manifest)
73 parent_ids = [parent] if parent else []
74 cid = compute_commit_id(
75 parent_ids=parent_ids,
76 snapshot_id=snap_id,
77 message=message,
78 committed_at_iso=_TS.isoformat(),
79 author="tester",
80 )
81 return CommitRecord(
82 commit_id=cid,
83 branch="main",
84 snapshot_id=snap_id,
85 message=message,
86 committed_at=_TS,
87 author="tester",
88 parent_commit_id=parent,
89 parent2_commit_id=None,
90 )
91
92
93 def _bundle_with_commits(commits: list[CommitRecord]) -> MPack:
94 # Include snapshots so apply_mpack's missing-snapshot guard passes and
95 # commits reach write_commit (where impostor OSError fires).
96 snapshots = [
97 {
98 "snapshot_id": c.snapshot_id,
99 "parent_snapshot_id": None,
100 "delta_upsert": _manifest_for(c.message),
101 "delta_remove": [],
102 "directories": [],
103 }
104 for c in commits
105 ]
106 return MPack(blobs=[], snapshots=snapshots, commits=[c.to_dict() for c in commits], tags=[])
107
108
109 def _write_impostor_file(repo: pathlib.Path, path_commit_id: str, content_commit_id: str) -> None:
110 """Write a commit object at path_commit_id's object_path that contains content_commit_id inside."""
111 import json as _json
112 impostor_data = {
113 "commit_id": content_commit_id,
114 "repo_id": "impostor",
115 "branch": "main",
116 "snapshot_id": NULL_COMMIT_ID,
117 "message": "impostor",
118 "committed_at": _TS.isoformat(),
119 "parent_commit_id": None,
120 "parent2_commit_id": None,
121 "author": "attacker",
122 }
123 payload = _json.dumps(impostor_data, separators=(",", ":")).encode()
124 path = object_path(repo, path_commit_id)
125 path.parent.mkdir(parents=True, exist_ok=True)
126 path.write_bytes(f"commit {len(payload)}\0".encode() + payload)
127
128
129 # ──────────────────────────────────────────────────────────────────────────────
130 # Bug 13: unhandled OSError from write_commit
131 # ──────────────────────────────────────────────────────────────────────────────
132
133 class TestApplyPackOsErrorIntegrity:
134
135 def test_apply_pack_does_not_crash_on_store_integrity_violation(self, tmp_path: pathlib.Path) -> None:
136 """Bug 13: apply_mpack must not propagate OSError from write_commit."""
137 repo = _make_repo(tmp_path)
138 c = _make_commit_record("good-commit")
139
140 # Poison the store: write an impostor at c.commit_id's path
141 _write_impostor_file(repo, c.commit_id, "f" * 64)
142
143 mpack = _bundle_with_commits([c])
144 # Before the fix: apply_mpack would raise OSError("Store integrity violation")
145 # After the fix: apply_mpack must return normally (logs CRITICAL, skips commit)
146 result = apply_mpack(repo, mpack) # must not raise
147 assert result is not None
148
149 def test_apply_pack_impostor_commit_not_readable(
150 self, tmp_path: pathlib.Path, caplog: pytest.LogCaptureFixture
151 ) -> None:
152 """An impostor at the commit's object path blocks the legitimate commit from being read."""
153 repo = _make_repo(tmp_path)
154 c = _make_commit_record("critical-log-commit")
155 _write_impostor_file(repo, c.commit_id, "e" * 64)
156
157 mpack = _bundle_with_commits([c])
158 result = apply_mpack(repo, mpack) # must not raise
159 assert result is not None
160 # Impostor blocks the legitimate commit — read_commit returns None (hash mismatch)
161 assert read_commit(repo, c.commit_id) is None
162
163 def test_apply_pack_continues_after_integrity_violation(self, tmp_path: pathlib.Path) -> None:
164 """Remaining commits must be processed after an integrity-violation skip."""
165 repo = _make_repo(tmp_path)
166 bad = _make_commit_record("bad-commit")
167 good = _make_commit_record("good-commit-after")
168
169 # Poison only the bad commit's file
170 _write_impostor_file(repo, bad.commit_id, "d" * 64)
171
172 mpack = _bundle_with_commits([bad, good])
173 result = apply_mpack(repo, mpack)
174
175 # good commit must be written despite the preceding integrity violation
176 assert read_commit(repo, good.commit_id) is not None, (
177 "apply_mpack must continue processing commits after an integrity violation"
178 )
179
180 def test_apply_pack_commits_written_excludes_impostor_blocked(self, tmp_path: pathlib.Path) -> None:
181 """commits_written must not include commits whose object_path is already occupied."""
182 repo = _make_repo(tmp_path)
183 bad = _make_commit_record("integrity-violation")
184 good = _make_commit_record("normal-commit")
185
186 _write_impostor_file(repo, bad.commit_id, "c" * 64)
187
188 mpack = _bundle_with_commits([bad, good])
189 result = apply_mpack(repo, mpack)
190
191 # bad commit's path is occupied by impostor — skipped (not counted as written)
192 # good commit written normally
193 assert result["commits_written"] == 1, (
194 f"commits_written should be 1 (only good), got {result['commits_written']}"
195 )
196
197 def test_impostor_file_not_overwritten_by_apply_mpack(self, tmp_path: pathlib.Path) -> None:
198 """apply_mpack must NOT replace the impostor file — write_commit is idempotent."""
199 import json as _json
200 repo = _make_repo(tmp_path)
201 c = _make_commit_record("legit-commit")
202 impostor_id = "b" * 64
203 _write_impostor_file(repo, c.commit_id, impostor_id)
204
205 mpack = _bundle_with_commits([c])
206 apply_mpack(repo, mpack)
207
208 # The object file still contains the impostor — write_commit skips existing objects
209 path = object_path(repo, c.commit_id)
210 raw_bytes = path.read_bytes()
211 null_idx = raw_bytes.index(b"\0")
212 data = _json.loads(raw_bytes[null_idx + 1:])
213 assert data["commit_id"] == impostor_id, (
214 "apply_mpack must not overwrite an impostor file — that would hide the "
215 "integrity violation from the user"
216 )
217
218 def test_apply_pack_valid_commit_no_integrity_violation(self, tmp_path: pathlib.Path) -> None:
219 """Regression: valid commits must still be written normally."""
220 repo = _make_repo(tmp_path)
221 c = _make_commit_record("clean-commit")
222 mpack = _bundle_with_commits([c])
223 result = apply_mpack(repo, mpack)
224
225 assert result["commits_written"] == 1
226 assert read_commit(repo, c.commit_id) is not None
227
228 def test_apply_pack_wrong_hash_raises_valuerror_not_oserror(self, tmp_path: pathlib.Path) -> None:
229 """Commits with mismatched commit_id hash raise ValueError (caught separately)."""
230 repo = _make_repo(tmp_path)
231 # Bad record: commit_id doesn't match content hash
232 bad_dict = CommitDict(
233 commit_id="f" * 64, # wrong hash
234 branch="main",
235 snapshot_id="a" * 64,
236 message="bad",
237 committed_at=_TS.isoformat(),
238 parent_commit_id=None,
239 parent2_commit_id=None,
240 author="tester",
241 )
242 mpack = MPack(blobs=[], snapshots=[], commits=[bad_dict], tags=[])
243 result = apply_mpack(repo, mpack) # must not raise
244 assert result["commits_written"] == 0
File History 1 commit