gabriel / muse public
test_transport_snapshot_directories_dropped.py python
281 lines 11.2 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
1 """Tests for Bug 11: _coerce_snapshot_dict silently drops the ``directories``
2 (and ``note``) fields from incoming snapshot data.
3
4 Root cause: the original implementation returned a SnapshotDict with only
5 ``snapshot_id``, ``manifest``, and ``created_at`` — omitting ``directories``
6 and ``note``. Since ``directories`` feeds into ``compute_snapshot_id``, any
7 snapshot with non-empty directories would:
8
9 (before Bug-9 fix) be written with directories=[] → stored snapshot_id
10 doesn't match recomputed hash → read_snapshot returns None forever.
11
12 (after Bug-9 fix) be rejected by write_snapshot's incoming hash verification
13 → apply_mpack logs a warning and skips it → the snapshot never reaches disk.
14
15 Either way, snapshots with non-empty directories could never be received via
16 the HTTP transport layer.
17
18 Scope of tests
19 --------------
20 Unit (_coerce_snapshot_dict):
21 - directories list is preserved (was silently dropped)
22 - note string is preserved (was silently dropped)
23 - missing directories defaults to empty list (no crash)
24 - empty string note preserved
25 - extra unknown keys in raw dict are safely ignored
26
27 Integration (_parse_mpack round-trip):
28 - mpack with snapshot directories survives parse
29 - mpack with snapshot note survives parse
30 - mpack without directories key produces empty list (not KeyError)
31
32 End-to-end (parse → write_snapshot → read_snapshot):
33 - snapshot with directories written from parsed mpack is readable
34 - snapshot_id matches after parse + write + read
35 - snapshot without directories still works after fix
36
37 Security:
38 - directories entries that are not strings are filtered out
39 - non-list directories value is treated as empty list
40 """
41 from __future__ import annotations
42
43 import datetime
44 import pathlib
45
46 import msgpack
47 import pytest
48
49 from muse.core.ids import hash_snapshot as compute_snapshot_id
50 from muse.core.snapshots import (
51 SnapshotDict,
52 SnapshotRecord,
53 read_snapshot,
54 write_snapshot,
55 )
56 from muse.core.paths import muse_dir, snapshots_dir
57 from muse.core.types import Manifest, MsgpackDict
58 from muse.core.transport import _coerce_snapshot_dict, _parse_mpack
59
60 _TS = "2024-06-15T10:00:00+00:00"
61
62
63 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
64 repo = tmp_path / "repo"
65 repo.mkdir()
66 muse_dir(repo).mkdir()
67 snapshots_dir(repo).mkdir()
68 return repo
69
70
71 def _snap_wire(
72 manifest: Manifest,
73 directories: list[str],
74 note: str = "",
75 ) -> SnapshotDict:
76 snap_id = compute_snapshot_id(manifest, directories)
77 return SnapshotDict(
78 snapshot_id=snap_id,
79 manifest=manifest,
80 directories=directories,
81 created_at=_TS,
82 note=note,
83 )
84
85
86 # ──────────────────────────────────────────────────────────────────────────────
87 # Unit: _coerce_snapshot_dict
88 # ──────────────────────────────────────────────────────────────────────────────
89
90 class TestCoerceSnapshotDict:
91
92 def test_directories_preserved(self) -> None:
93 """Bug 11: directories must not be dropped by _coerce_snapshot_dict."""
94 raw = {
95 "snapshot_id": "a" * 64,
96 "manifest": {"src/main.py": "b" * 64},
97 "directories": ["src", "tests"],
98 "created_at": _TS,
99 "note": "",
100 }
101 result = _coerce_snapshot_dict(raw)
102 assert result["directories"] == ["src", "tests"], (
103 "BUG 11: directories were silently dropped during snapshot coercion"
104 )
105
106 def test_note_preserved(self) -> None:
107 """Bug 11: note must not be dropped by _coerce_snapshot_dict."""
108 raw = {
109 "snapshot_id": "a" * 64,
110 "manifest": {},
111 "directories": [],
112 "created_at": _TS,
113 "note": "initial snapshot",
114 }
115 result = _coerce_snapshot_dict(raw)
116 assert result["note"] == "initial snapshot"
117
118 def test_missing_directories_defaults_to_empty_list(self) -> None:
119 raw = {
120 "snapshot_id": "a" * 64,
121 "manifest": {},
122 "created_at": _TS,
123 }
124 result = _coerce_snapshot_dict(raw)
125 assert result["directories"] == []
126
127 def test_empty_string_note_preserved(self) -> None:
128 raw = {
129 "snapshot_id": "a" * 64,
130 "manifest": {},
131 "directories": [],
132 "created_at": _TS,
133 "note": "",
134 }
135 result = _coerce_snapshot_dict(raw)
136 assert result["note"] == ""
137
138 def test_non_list_directories_treated_as_empty(self) -> None:
139 """A non-list directories value must not crash — default to []."""
140 raw = {
141 "snapshot_id": "a" * 64,
142 "manifest": {},
143 "directories": "not-a-list",
144 "created_at": _TS,
145 "note": "",
146 }
147 result = _coerce_snapshot_dict(raw)
148 assert result["directories"] == []
149
150 def test_non_string_directory_entries_filtered(self) -> None:
151 """Non-string entries in the directories list must be filtered out."""
152 raw = {
153 "snapshot_id": "a" * 64,
154 "manifest": {},
155 "directories": ["src", 42, None, "tests", b"bytes"],
156 "created_at": _TS,
157 "note": "",
158 }
159 result = _coerce_snapshot_dict(raw)
160 assert result["directories"] == ["src", "tests"]
161
162
163 # ──────────────────────────────────────────────────────────────────────────────
164 # Integration: _parse_mpack round-trip
165 # ──────────────────────────────────────────────────────────────────────────────
166
167 class TestParseBundleSnapshotDirectories:
168
169 def _make_bundle_bytes(self, snapshots: list[SnapshotDict]) -> bytes:
170 return msgpack.packb({"snapshots": snapshots, "commits": [], "blobs": [], "tags": []}, use_bin_type=True)
171
172 def test_bundle_snapshot_directories_survive_parse(self) -> None:
173 """Snapshot directories must be present after _parse_mpack."""
174 wire = _snap_wire({"src/main.py": "a" * 64}, ["src", "tests"])
175 bundle_bytes = self._make_bundle_bytes([wire])
176 mpack = _parse_mpack(bundle_bytes)
177
178 snaps = mpack.get("snapshots") or []
179 assert len(snaps) == 1
180 assert snaps[0]["directories"] == ["src", "tests"], (
181 "BUG 11: directories were dropped during _parse_mpack"
182 )
183
184 def test_bundle_snapshot_note_survives_parse(self) -> None:
185 wire = _snap_wire({"src/main.py": "a" * 64}, [], note="feature snapshot")
186 bundle_bytes = self._make_bundle_bytes([wire])
187 mpack = _parse_mpack(bundle_bytes)
188
189 snaps = mpack.get("snapshots") or []
190 assert snaps[0]["note"] == "feature snapshot"
191
192 def test_bundle_snapshot_without_directories_key_no_keyerror(self) -> None:
193 """A snapshot with no directories key must not raise KeyError."""
194 wire = {
195 "snapshot_id": compute_snapshot_id({"f.py": "a" * 64}, []),
196 "manifest": {"f.py": "a" * 64},
197 "created_at": _TS,
198 # deliberately omit "directories" and "note"
199 }
200 bundle_bytes = self._make_bundle_bytes([wire])
201 mpack = _parse_mpack(bundle_bytes) # must not raise
202 snaps = mpack.get("snapshots") or []
203 assert snaps[0]["directories"] == []
204
205
206 # ──────────────────────────────────────────────────────────────────────────────
207 # End-to-end: parse → write_snapshot → read_snapshot
208 # ──────────────────────────────────────────────────────────────────────────────
209
210 class TestSnapshotDirectoriesE2E:
211
212 def _apply_snapshot_from_bundle(self, repo: pathlib.Path, wire: MsgpackDict) -> None:
213 """Simulate apply_mpack for a single snapshot via _parse_mpack."""
214 bundle_bytes = msgpack.packb(
215 {"snapshots": [wire], "commits": [], "blobs": [], "tags": []},
216 use_bin_type=True,
217 )
218 mpack = _parse_mpack(bundle_bytes)
219 for snap_dict in mpack.get("snapshots") or []:
220 snap = SnapshotRecord.from_dict(snap_dict)
221 write_snapshot(repo, snap)
222
223 def test_snapshot_with_directories_readable_after_parse_and_write(self, tmp_path: pathlib.Path) -> None:
224 """Bug 11: snapshot with directories must be readable after HTTP parse + write."""
225 repo = _make_repo(tmp_path)
226 manifest = {"src/main.py": "a" * 64, "tests/test_main.py": "b" * 64}
227 directories = ["src", "tests"]
228 wire = _snap_wire(manifest, directories)
229
230 self._apply_snapshot_from_bundle(repo, wire)
231
232 stored = read_snapshot(repo, wire["snapshot_id"])
233 assert stored is not None, (
234 "BUG 11: snapshot with directories is unreadable after HTTP parse → write"
235 )
236 assert stored.snapshot_id == wire["snapshot_id"]
237 assert stored.directories == directories
238 assert stored.manifest == manifest
239
240 def test_snapshot_id_matches_after_parse_write_read(self, tmp_path: pathlib.Path) -> None:
241 """The snapshot_id must be the same at every stage of the pipeline."""
242 repo = _make_repo(tmp_path)
243 manifest = {"app/main.py": "c" * 64}
244 directories = ["app"]
245 wire = _snap_wire(manifest, directories)
246 expected_id = wire["snapshot_id"]
247
248 self._apply_snapshot_from_bundle(repo, wire)
249
250 stored = read_snapshot(repo, expected_id)
251 assert stored is not None
252 assert stored.snapshot_id == expected_id
253
254 def test_snapshot_without_directories_still_works_after_fix(self, tmp_path: pathlib.Path) -> None:
255 """Regression: snapshots without directories must still be stored after the fix."""
256 repo = _make_repo(tmp_path)
257 manifest = {"README.md": "d" * 64}
258 wire = _snap_wire(manifest, directories=[])
259
260 self._apply_snapshot_from_bundle(repo, wire)
261
262 stored = read_snapshot(repo, wire["snapshot_id"])
263 assert stored is not None
264 assert stored.directories == []
265
266 def test_snapshot_with_multiple_directories_round_trips(self, tmp_path: pathlib.Path) -> None:
267 """Snapshot with several directories must survive the full HTTP transport pipeline."""
268 repo = _make_repo(tmp_path)
269 manifest = {
270 "src/a.py": "a" * 64,
271 "lib/b.py": "b" * 64,
272 "docs/c.md": "c" * 64,
273 }
274 directories = ["docs", "lib", "src"]
275 wire = _snap_wire(manifest, directories)
276
277 self._apply_snapshot_from_bundle(repo, wire)
278
279 stored = read_snapshot(repo, wire["snapshot_id"])
280 assert stored is not None
281 assert sorted(stored.directories) == sorted(directories)
File History 5 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:0313c134f0ef4518a9c3a0ec359ffdc42546dc720010730374edfe0857caf7ef rename: delta_add → delta_upsert across wire format, source… Sonnet 4.6 minor 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 28 days ago