gabriel / muse public
test_push_mpack_e2e.py python
485 lines 19.9 KB
Raw
sha256:fe844c2411edd1cec3d4c847f36a96c6ccd4e3d7d1a715106d2ecd64216bf94f fix: bare object detection and read recovery; rm adapter files Sonnet 4.6 minor ⚠ breaking 4 days ago
1 """TDD — mpack push path end-to-end tests.
2
3 Gap 1: _push_mpack() is the production push path but was never exercised
4 in any test. These tests drive the full mpack protocol (presign → PUT
5 → unpack-mpack) with a fake urllib layer.
6
7 Gap 6: The merge-commit P2 ancestor optimisation: when the local tip is a
8 merge commit, _push_mpack adds ALL remote branch heads to branch_have
9 so the BFS stops at any already-remote ancestor on the P2 chain rather
10 than walking back to the repo root.
11
12 Ported from deleted transport tests:
13 V1 Second push sends commits_count=1 (have filter works)
14 V2 First push (empty remote) sends all commits
15 V3 Live GET /refs anchors the BFS even without a tracking ref
16 VII1 Two-push sequence: commits_count in second push equals 1
17
18 New tests:
19 E1 presign → PUT → unpack-mpack sequence is called in order
20 E2 mpack_key in presign body == sha256 of the actual wire bytes
21 E6 Non-merge commit: branch_have contains only the direct remote head
22 E7 Merge commit: branch_have includes all remote branch heads (P2 opt)
23 """
24 from __future__ import annotations
25
26 import datetime
27 import json
28 import pathlib
29 from collections.abc import Mapping
30 from typing import TYPE_CHECKING
31 from unittest.mock import MagicMock, patch
32
33 import msgpack
34 import pytest
35
36 from muse._version import __version__
37 from muse.core.mpack import PushResult, RemoteInfo
38 from muse.core.transport import SigningIdentity
39 from muse.core.object_store import write_object
40 from muse.core.paths import heads_dir, muse_dir, remotes_dir
41
42 if TYPE_CHECKING:
43 from muse.core.mpack import _WalkResult
44
45 _Headers = dict[str, str] # HTTP header map
46 _JsonDict = dict[str, str | int | float | bool | None | list[str]] # JSON object
47 _RemoteHeads = dict[str, str] # branch → commit_id
48 _WalkCall = dict[str, list[str] | None] # {"tips": [...], "have": [...] | None}
49 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
50 from muse.core.commits import (
51 CommitRecord,
52 write_commit,
53 )
54 from muse.core.snapshots import (
55 SnapshotRecord,
56 write_snapshot,
57 )
58 from muse.core.types import Manifest, blob_id
59 from tests.cli_test_helper import CliRunner
60
61
62 cli = None
63 runner = CliRunner()
64
65 _UPLOAD_URL = "https://minio.example.com/mpacks/put?sig=x"
66
67
68 # ---------------------------------------------------------------------------
69 # Helpers
70 # ---------------------------------------------------------------------------
71
72 def _bare_repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
73 muse = muse_dir(tmp_path)
74 for d in ("commits", "snapshots", "objects", "refs/heads", "remotes"):
75 (muse / d).mkdir(parents=True, exist_ok=True)
76 (muse / "HEAD").write_text("ref: refs/heads/main\n")
77 (muse / "repo.json").write_text(
78 json.dumps({"repo_id": "test-repo", "schema_version": __version__, "domain": "code"})
79 )
80 (muse / "config.toml").write_text('[remotes.origin]\nurl = "https://hub.example.com/r"\n')
81 monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))
82 monkeypatch.chdir(tmp_path)
83 return tmp_path
84
85
86 def _make_commit(
87 root: pathlib.Path,
88 label: str,
89 parent_id: str | None = None,
90 parent2_id: str | None = None,
91 content: bytes | None = None,
92 ) -> CommitRecord:
93 raw = content if content is not None else f"content-{label}".encode()
94 oid = blob_id(raw)
95 write_object(root, oid, raw)
96 manifest: Manifest = {"file.txt": oid}
97 snap_id = compute_snapshot_id(manifest)
98 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
99 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
100 parent_ids = [p for p in (parent_id, parent2_id) if p]
101 cid = compute_commit_id(
102 parent_ids=parent_ids,
103 snapshot_id=snap_id,
104 message=f"commit {label}",
105 committed_at_iso=committed_at.isoformat(),
106 )
107 commit = CommitRecord(
108 commit_id=cid,
109 branch="main",
110 snapshot_id=snap_id,
111 message=f"commit {label}",
112 committed_at=committed_at,
113 parent_commit_id=parent_id,
114 parent2_commit_id=parent2_id,
115 )
116 write_commit(root, commit)
117 return commit
118
119
120 class _CapturingClient:
121 """Fake urllib layer that records all requests and returns success responses."""
122 def _fake_resp(body: bytes, status: int = 200) -> MagicMock:
123 r = MagicMock()
124 r.status_code = status
125 r.content = body
126 r.headers = {"content-type": "application/x-msgpack"}
127 r.text = ""
128 return r
129
130
131 class _CapturingTransport:
132 """Fake transport that records calls to push_mpack_presign/put/unpack."""
133
134 def __init__(self, remote_heads: _RemoteHeads | None = None, result_head: str = "") -> None:
135 self._remote_heads = remote_heads or {}
136 self._result_head = result_head
137 self.presign_calls: list[tuple[str, "SigningIdentity | None", bytes]] = []
138 self.put_calls: list[tuple[str, bytes, str]] = []
139 self.unpack_calls: list[tuple[str, "SigningIdentity | None", str, dict[str, str | int | bool]]] = []
140 self.events: list[str] = []
141
142 def fake_post(self, url: str, data: bytes, headers: Mapping[str, str],
143 *, verify: bool = True) -> tuple[int, bytes]:
144 self.posts.append((url, data))
145 self.events.append((url, "post"))
146 if "mpack-presign" in url:
147 return 200, json.dumps({"upload_url": _UPLOAD_URL}).encode()
148 return 200, json.dumps(
149 {"job_id": "job-e2e", "head": self._result_head, "branch": "main",
150 "blobs_in_mpack": 0, "commits_in_mpack": 0}
151 ).encode()
152 def fetch_remote_info(self, url: str, signing: "SigningIdentity | None") -> RemoteInfo:
153 return RemoteInfo(domain="code", default_branch="main", branch_heads=self._remote_heads)
154
155 def _build_request(self, method: str, url: str, signing: "SigningIdentity | None", body: bytes | None = None, **kw: "str | bytes | None") -> MagicMock:
156 req = MagicMock()
157 req.headers = {"Authorization": "MSign stub", "Content-Type": "application/x-msgpack"}
158 return req
159
160 def push_mpack_presign(self, url: str, signing: "SigningIdentity | None", mpack_bytes: bytes, ttl_seconds: int = 3600) -> "dict[str, str]":
161 self.presign_calls.append((url, signing, mpack_bytes))
162 self.events.append("presign")
163 return {"upload_url": _UPLOAD_URL, "mpack_key": blob_id(mpack_bytes)}
164
165 def fake_put(self, url: str, data: bytes, **kw: int | bool) -> tuple[int, bytes]:
166 self.puts.append((url, data))
167 self.events.append((url, "put"))
168 return 200, b""
169 def push_mpack_put(self, upload_url: str, mpack_bytes: bytes, mpack_key: str = "") -> None:
170 self.put_calls.append((upload_url, mpack_bytes, mpack_key))
171 self.events.append("put")
172
173 def push_mpack_unpack(self, url: str, signing: "SigningIdentity | None", mpack_key: str, **kwargs: "str | int | bool") -> "dict[str, str | int]":
174 self.unpack_calls.append((url, signing, mpack_key, kwargs)) # type: ignore[arg-type]
175 self.events.append("unpack")
176 return {
177 "job_id": "job-e2e",
178 "head": self._result_head,
179 "branch": str(kwargs.get("branch", "main")),
180 "blobs_in_mpack": 0,
181 "commits_in_mpack": int(kwargs.get("commits_count", 0)),
182 }
183
184
185 def _run_push_cli(
186 monkeypatch: pytest.MonkeyPatch,
187 transport: "_CapturingTransport",
188 client: "_CapturingTransport | None" = None, # kept for compat, ignored
189 args: list[str] | None = None,
190 ) -> "object":
191 with (
192 patch("muse.cli.commands.push.make_transport", return_value=transport),
193 patch("muse.cli.commands.push._urllib_post", side_effect=client.fake_post if client else lambda *a, **kw: (200, b"")),
194 patch("muse.cli.commands.push._urllib_put", side_effect=client.fake_put if client else lambda *a, **kw: (200, b"")),
195 ):
196 return runner.invoke(cli, args or ["push", "origin"], catch_exceptions=False)
197
198
199 def _unpack_kwargs(transport: "_CapturingTransport") -> _JsonDict:
200 """Return the kwargs dict from the first push_mpack_unpack call."""
201 assert transport.unpack_calls, "push_mpack_unpack was not called"
202 return dict(transport.unpack_calls[0][3])
203
204
205 # ===========================================================================
206 # E1 — presign → PUT → unpack sequence
207 # ===========================================================================
208
209 def test_e1_three_step_sequence_in_order(
210 tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
211 ) -> None:
212 """_run_mpack_path must call presign, then PUT, then unpack in that order."""
213 root = _bare_repo(tmp_path, monkeypatch)
214 commit = _make_commit(root, "e1")
215 (heads_dir(root) / "main").write_text(commit.commit_id)
216
217 transport = _CapturingTransport(result_head=commit.commit_id)
218
219 result = _run_push_cli(monkeypatch, transport)
220 assert result.exit_code == 0, result.output
221
222 assert transport.presign_calls, "push_mpack_presign not called"
223 assert transport.put_calls, "push_mpack_put not called"
224 assert transport.unpack_calls, "push_mpack_unpack not called"
225
226 presign_idx = transport.events.index("presign")
227 put_idx = transport.events.index("put")
228 unpack_idx = transport.events.index("unpack")
229
230 assert presign_idx < put_idx < unpack_idx, (
231 f"Sequence wrong: presign@{presign_idx} put@{put_idx} unpack@{unpack_idx}"
232 )
233
234
235 # ===========================================================================
236 # E2 — mpack_key in presign body matches sha256 of wire bytes
237 # ===========================================================================
238
239 def test_e2_presign_key_matches_put_bytes(
240 tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
241 ) -> None:
242 """The mpack_key in the presign request must equal blob_id(wire_bytes PUT to MinIO)."""
243 from muse.core.types import blob_id
244
245 root = _bare_repo(tmp_path, monkeypatch)
246 commit = _make_commit(root, "e2", content=b"key integrity test")
247 (heads_dir(root) / "main").write_text(commit.commit_id)
248
249 transport = _CapturingTransport(result_head=commit.commit_id)
250 result = _run_push_cli(monkeypatch, transport)
251 assert result.exit_code == 0
252
253 presign_mpack_bytes = transport.presign_calls[0][2]
254 put_mpack_bytes = transport.put_calls[0][1]
255
256 assert presign_mpack_bytes == put_mpack_bytes, (
257 "mpack bytes sent to presign and put must be identical"
258 )
259 assert blob_id(put_mpack_bytes) == blob_id(presign_mpack_bytes)
260
261
262 # ===========================================================================
263 # V1-port — second push sends commits_count=1 (have filter works)
264 # ===========================================================================
265
266 def test_v1_second_push_commits_count_is_one(
267 tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
268 ) -> None:
269 """Second push: remote has c1, local has c2 → unpack body has commits_count=1."""
270 root = _bare_repo(tmp_path, monkeypatch)
271 c1 = _make_commit(root, "v1-first", content=b"v1")
272 c2 = _make_commit(root, "v1-second", parent_id=c1.commit_id, content=b"v2")
273 (heads_dir(root) / "main").write_text(c2.commit_id)
274
275 # Remote already has c1 (tracking ref + live remote info)
276 transport = _CapturingTransport(remote_heads={"main": c1.commit_id}, result_head=c2.commit_id)
277 result = _run_push_cli(monkeypatch, transport)
278 assert result.exit_code == 0, result.output
279
280 kwargs = _unpack_kwargs(transport)
281 assert kwargs["commits_count"] == 1, (
282 f"Second push must send commits_count=1, got {kwargs['commits_count']}. "
283 "Bug: have filter dropped the ancestor anchor so BFS walked all commits."
284 )
285
286
287 # ===========================================================================
288 # V2-port — first push (empty remote) sends all commits
289 # ===========================================================================
290
291 def test_v2_first_push_sends_all_commits(
292 tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
293 ) -> None:
294 """When the remote is empty, unpack body has commits_count equal to full history."""
295 root = _bare_repo(tmp_path, monkeypatch)
296 c1 = _make_commit(root, "v2-a", content=b"a")
297 c2 = _make_commit(root, "v2-b", parent_id=c1.commit_id, content=b"b")
298 (heads_dir(root) / "main").write_text(c2.commit_id)
299
300 transport = _CapturingTransport(remote_heads={}, result_head=c2.commit_id)
301 result = _run_push_cli(monkeypatch, transport)
302 assert result.exit_code == 0, result.output
303
304 kwargs = _unpack_kwargs(transport)
305 assert kwargs["commits_count"] == 2, (
306 f"Full history push must send commits_count=2, got {kwargs['commits_count']}"
307 )
308
309
310 # ===========================================================================
311 # V3-port — live GET /refs anchors BFS even without tracking ref
312 # ===========================================================================
313
314 def test_v3_live_refs_anchors_bfs(
315 tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
316 ) -> None:
317 """Even without a local tracking ref, GET /refs result anchors the BFS."""
318 root = _bare_repo(tmp_path, monkeypatch)
319 c1 = _make_commit(root, "v3-base", content=b"base")
320 c2 = _make_commit(root, "v3-new", parent_id=c1.commit_id, content=b"new")
321 (heads_dir(root) / "main").write_text(c2.commit_id)
322 # No tracking ref written — only live remote info has c1
323
324 transport = _CapturingTransport(remote_heads={"main": c1.commit_id}, result_head=c2.commit_id)
325 result = _run_push_cli(monkeypatch, transport)
326 assert result.exit_code == 0, result.output
327
328 kwargs = _unpack_kwargs(transport)
329 assert kwargs["commits_count"] == 1, (
330 f"Live /refs anchor must limit BFS to 1 new commit, got {kwargs['commits_count']}"
331 )
332
333
334 # ===========================================================================
335 # VII1-port — two sequential pushes; second has commits_count=1
336 # ===========================================================================
337
338 def test_vii1_push_twice_second_sends_one_commit(
339 tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
340 ) -> None:
341 """Full two-push sequence: first sends N commits; second sends exactly 1."""
342 root = _bare_repo(tmp_path, monkeypatch)
343 n = 5
344 ids: list[str] = []
345 parent: str | None = None
346 for i in range(n):
347 c = _make_commit(root, f"chain-{i}", parent_id=parent, content=f"v{i}".encode())
348 ids.append(c.commit_id)
349 parent = c.commit_id
350 (heads_dir(root) / "main").write_text(ids[-1])
351
352 # First push — remote is empty
353 t1 = _CapturingTransport(remote_heads={}, result_head=ids[-1])
354 r1 = _run_push_cli(monkeypatch, t1)
355 assert r1.exit_code == 0, r1.output
356 kwargs1 = _unpack_kwargs(t1)
357 assert kwargs1["commits_count"] == n, (
358 f"First push must send all {n} commits, got {kwargs1['commits_count']}"
359 )
360
361 # Simulate tracking ref updated by set_remote_head
362 origin_dir = remotes_dir(root) / "origin"
363 origin_dir.mkdir(parents=True, exist_ok=True)
364 (origin_dir / "main").write_text(ids[-1])
365
366 # Add one new commit
367 c_new = _make_commit(root, "one-more", parent_id=ids[-1], content=b"new")
368 (heads_dir(root) / "main").write_text(c_new.commit_id)
369
370 # Second push — remote has ids[-1]
371 t2 = _CapturingTransport(remote_heads={"main": ids[-1]}, result_head=c_new.commit_id)
372 r2 = _run_push_cli(monkeypatch, t2)
373 assert r2.exit_code == 0, r2.output
374
375 kwargs2 = _unpack_kwargs(t2)
376 assert kwargs2["commits_count"] == 1, (
377 f"Second push must send commits_count=1, got {kwargs2['commits_count']}. "
378 "Regression: have filter dropped the ancestor anchor."
379 )
380
381
382 # ===========================================================================
383 # E6 — non-merge commit: branch_have is single remote head
384 # ===========================================================================
385
386 def test_e6_non_merge_branch_have_is_single_head(
387 tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
388 ) -> None:
389 """A non-merge commit: branch_have contains only the direct remote branch head."""
390 from unittest.mock import call
391 root = _bare_repo(tmp_path, monkeypatch)
392 c1 = _make_commit(root, "e6-base", content=b"base")
393 c2 = _make_commit(root, "e6-tip", parent_id=c1.commit_id, content=b"tip")
394 (heads_dir(root) / "main").write_text(c2.commit_id)
395
396 # Remote has both main and dev; local tip is a simple (non-merge) commit
397 transport = _CapturingTransport(
398 remote_heads={"main": c1.commit_id, "dev": c1.commit_id},
399 result_head=c2.commit_id,
400 )
401
402 walk_calls: list[_WalkCall] = []
403 from muse.cli.commands import push as _push_mod
404 original_walk = _push_mod.walk_commits
405
406 def _capture_walk(
407 root_arg: pathlib.Path,
408 tips: list[str],
409 *,
410 have: list[str] | None,
411 ) -> "_WalkResult":
412 walk_calls.append({"tips": tips, "have": have})
413 return original_walk(root_arg, tips, have=have)
414
415 with patch.object(_push_mod, "walk_commits", side_effect=_capture_walk):
416 _run_push_cli(monkeypatch, transport)
417
418 # The BFS walk that counts commits uses _branch_have (from branch_have param in _push_mpack).
419 # For a non-merge commit: branch_have = [remote_head of the target branch only]
420 branch_walks = [w for w in walk_calls if w["tips"] == [c2.commit_id]]
421 assert branch_walks, f"Expected a walk with tip={c2.commit_id[:16]}"
422 # The have list for the main BFS should contain c1 (remote main head)
423 # and NOT the dev head (no P2 optimisation for non-merge commits)
424 main_walk = branch_walks[0]
425 assert c1.commit_id in main_walk["have"], "Remote main head must be in have"
426
427
428 # ===========================================================================
429 # E7 — merge commit: branch_have includes ALL remote branch heads (P2 opt)
430 # ===========================================================================
431
432 def test_e7_merge_commit_branch_have_includes_all_remote_heads(
433 tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
434 ) -> None:
435 """Gap 6: when the local tip is a merge commit, branch_have must include all
436 remote branch heads so the BFS stops at any already-remote ancestor on P2's chain."""
437 root = _bare_repo(tmp_path, monkeypatch)
438 # Build: main-base → feature-tip; merge commit has both as parents
439 base = _make_commit(root, "e7-base", content=b"base")
440 feature = _make_commit(root, "e7-feature", parent_id=base.commit_id, content=b"feature")
441 merge = _make_commit(
442 root, "e7-merge",
443 parent_id=base.commit_id,
444 parent2_id=feature.commit_id,
445 content=b"merge",
446 )
447 (heads_dir(root) / "main").write_text(merge.commit_id)
448
449 remote_feature_head = feature.commit_id
450 remote_main_head = base.commit_id
451
452 transport = _CapturingTransport(
453 remote_heads={"main": remote_main_head, "feature": remote_feature_head},
454 result_head=merge.commit_id,
455 )
456
457 walk_calls: list[_WalkCall] = []
458 from muse.cli.commands import push as _push_mod
459 original_walk = _push_mod.walk_commits
460
461 def _capture_walk(
462 root_arg: pathlib.Path,
463 tips: list[str],
464 *,
465 have: list[str] | None,
466 ) -> "_WalkResult":
467 walk_calls.append({"tips": list(tips), "have": list(have) if have else have})
468 return original_walk(root_arg, tips, have=have)
469
470 with patch.object(_push_mod, "walk_commits", side_effect=_capture_walk):
471 _run_push_cli(monkeypatch, transport)
472
473 # The first walk in _push_mpack uses branch_have = [remote_main_head] + all other heads
474 # for merge commits (to stop BFS on P2's chain)
475 merge_tip_walks = [w for w in walk_calls if merge.commit_id in w["tips"]]
476 assert merge_tip_walks, "Expected a walk with merge commit tip"
477
478 main_walk_have = merge_tip_walks[0]["have"]
479 assert remote_main_head in main_walk_have, (
480 "branch_have must include remote main head for merge commit"
481 )
482 assert remote_feature_head in main_walk_have, (
483 "branch_have must include feature branch head (P2 optimisation) for merge commit. "
484 "Fix: add all remote_branch_heads.values() to branch_have when local tip is a merge."
485 )
File History 5 commits
sha256:fe844c2411edd1cec3d4c847f36a96c6ccd4e3d7d1a715106d2ecd64216bf94f fix: bare object detection and read recovery; rm adapter files Sonnet 4.6 minor 4 days ago
sha256:79ffe87f5fe2ec146e35f05521218bbf54dffdb0440c07f970bad05f16efb89f chore: merge main — carry all urllib/typing/test fixes from dev Sonnet 4.6 minor 9 days ago
sha256:0bea7600d1eee83e87950be49933b1006fa9dc2c71e7c4ee748d324f61138156 chore: bump version to 0.2.0rc11; fix typing audit violatio… Sonnet 4.6 minor 9 days ago
sha256:7e95b29f2d502ad5eccf2a57af4092763a2e705f1bf1569a8cb7e063b6e6d5bd refactor: replace httpx with stdlib urllib in push path Sonnet 4.6 minor 11 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 12 days ago