gabriel / muse public
test_push_mpack_e2e.py python
493 lines 19.5 KB
Raw
sha256:fe844c2411edd1cec3d4c847f36a96c6ccd4e3d7d1a715106d2ecd64216bf94f fix: bare object detection and read recovery; rm adapter files Sonnet 4.6 minor ⚠ breaking 15 days ago
1 """TDD — mpack push path end-to-end tests.
2
3 Gap 1: _run_mpack_path() is the production push path but was never exercised
4 in any test. These tests drive the full mpack protocol (presign → PUT
5 → unpack-mpack) with a fake httpx layer.
6
7 Gap 6: The merge-commit P2 ancestor optimisation: when the local tip is a
8 merge commit, _push_mpack adds ALL remote branch heads to branch_have
9 so the BFS stops at any already-remote ancestor on the P2 chain rather
10 than walking back to the repo root.
11
12 Ported from deleted transport tests:
13 V1 Second push sends commits_count=1 (have filter works)
14 V2 First push (empty remote) sends all commits
15 V3 Live GET /refs anchors the BFS even without a tracking ref
16 VII1 Two-push sequence: commits_count in second push equals 1
17
18 New tests:
19 E1 presign → PUT → unpack-mpack sequence is called in order
20 E2 mpack_key in presign body == sha256 of the actual wire bytes
21 E6 Non-merge commit: branch_have contains only the direct remote head
22 E7 Merge commit: branch_have includes all remote branch heads (P2 opt)
23 """
24 from __future__ import annotations
25
26 import asyncio
27 import datetime
28 import json
29 import pathlib
30 from typing import TYPE_CHECKING
31 from unittest.mock import AsyncMock, MagicMock, patch
32
33 import msgpack
34 import pytest
35
36 from muse._version import __version__
37 from muse.core.mpack import PushResult, RemoteInfo
38 from muse.core.object_store import write_object
39 from muse.core.paths import heads_dir, muse_dir, remotes_dir
40
41 if TYPE_CHECKING:
42 from muse.core.mpack import _WalkResult
43
44 _Headers = dict[str, str] # HTTP header map
45 _JsonDict = dict[str, str | int | float | bool | None | list[str]] # JSON object
46 _RemoteHeads = dict[str, str] # branch → commit_id
47 _WalkCall = dict[str, list[str] | None] # {"tips": [...], "have": [...] | None}
48 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
49 from muse.core.commits import (
50 CommitRecord,
51 write_commit,
52 )
53 from muse.core.snapshots import (
54 SnapshotRecord,
55 write_snapshot,
56 )
57 from muse.core.types import Manifest, blob_id
58 from tests.cli_test_helper import CliRunner
59
60
61 cli = None
62 runner = CliRunner()
63
64 _UPLOAD_URL = "https://minio.example.com/mpacks/put?sig=x"
65
66
67 # ---------------------------------------------------------------------------
68 # Helpers
69 # ---------------------------------------------------------------------------
70
71 def _bare_repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
72 muse = muse_dir(tmp_path)
73 for d in ("commits", "snapshots", "objects", "refs/heads", "remotes"):
74 (muse / d).mkdir(parents=True, exist_ok=True)
75 (muse / "HEAD").write_text("ref: refs/heads/main\n")
76 (muse / "repo.json").write_text(
77 json.dumps({"repo_id": "test-repo", "schema_version": __version__, "domain": "code"})
78 )
79 (muse / "config.toml").write_text('[remotes.origin]\nurl = "https://hub.example.com/r"\n')
80 monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))
81 monkeypatch.chdir(tmp_path)
82 return tmp_path
83
84
85 def _make_commit(
86 root: pathlib.Path,
87 label: str,
88 parent_id: str | None = None,
89 parent2_id: str | None = None,
90 content: bytes | None = None,
91 ) -> CommitRecord:
92 raw = content if content is not None else f"content-{label}".encode()
93 oid = blob_id(raw)
94 write_object(root, oid, raw)
95 manifest: Manifest = {"file.txt": oid}
96 snap_id = compute_snapshot_id(manifest)
97 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
98 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
99 parent_ids = [p for p in (parent_id, parent2_id) if p]
100 cid = compute_commit_id(
101 parent_ids=parent_ids,
102 snapshot_id=snap_id,
103 message=f"commit {label}",
104 committed_at_iso=committed_at.isoformat(),
105 )
106 commit = CommitRecord(
107 commit_id=cid,
108 branch="main",
109 snapshot_id=snap_id,
110 message=f"commit {label}",
111 committed_at=committed_at,
112 parent_commit_id=parent_id,
113 parent2_commit_id=parent2_id,
114 )
115 write_commit(root, commit)
116 return commit
117
118
119 def _fake_resp(body: bytes, status: int = 200) -> MagicMock:
120 r = MagicMock()
121 r.status_code = status
122 r.content = body
123 r.headers = {"content-type": "application/x-msgpack"}
124 r.text = ""
125 return r
126
127
128 class _CapturingClient:
129 """Fake httpx AsyncClient that records all requests and returns success responses."""
130
131 def __init__(self, result_head: str = "") -> None:
132 self.posts: list[tuple[str, bytes]] = []
133 self.puts: list[tuple[str, bytes]] = []
134 # events records (url, kind) in true call order for sequence assertions
135 self.events: list[tuple[str, str]] = []
136 self._result_head = result_head
137
138 async def __aenter__(self) -> "_CapturingClient":
139 return self
140
141 async def __aexit__(self, *_: object) -> None:
142 pass
143
144 async def post(self, url: str, *, content: bytes, headers: _Headers) -> MagicMock:
145 self.posts.append((url, content))
146 self.events.append((url, "post"))
147 if "mpack-presign" in url:
148 return _fake_resp(msgpack.packb(
149 {"upload_url": _UPLOAD_URL}, use_bin_type=True,
150 ))
151 return _fake_resp(msgpack.packb(
152 {"job_id": "job-e2e", "head": self._result_head, "branch": "main",
153 "objects_in_mpack": 0, "commits_in_mpack": 0},
154 use_bin_type=True,
155 ))
156
157 async def put(self, url: str, *, content: bytes) -> MagicMock:
158 self.puts.append((url, content))
159 self.events.append((url, "put"))
160 return _fake_resp(b"", 200)
161
162
163 def _push_transport(remote_heads: _RemoteHeads | None = None) -> MagicMock:
164 transport = MagicMock()
165 transport.fetch_remote_info.return_value = RemoteInfo(
166 domain="code",
167 default_branch="main",
168 branch_heads=remote_heads or {},
169 )
170 mock_req = MagicMock()
171 mock_req.headers = {"Authorization": "MSign stub", "Content-Type": "application/x-msgpack"}
172 transport._build_request.return_value = mock_req
173 return transport
174
175
176 def _run_push_cli(
177 monkeypatch: pytest.MonkeyPatch,
178 transport: MagicMock,
179 client: "_CapturingClient",
180 args: list[str] | None = None,
181 ) -> "object":
182 with (
183 patch("muse.cli.commands.push.make_transport", return_value=transport),
184 patch("muse.cli.commands.push._httpx.AsyncClient", return_value=client),
185 patch("muse.cli.commands.push._make_r2_client", return_value=client),
186 ):
187 return runner.invoke(cli, args or ["push", "origin"], catch_exceptions=False)
188
189
190 def _unpack_body_of(posts: list[tuple[str, bytes]], path_fragment: str) -> _JsonDict:
191 import json
192 matching = [body for url, body in posts if path_fragment in url]
193 assert matching, f"No POST to URL containing {path_fragment!r}"
194 return json.loads(matching[0])
195
196
197 # ===========================================================================
198 # E1 — presign → PUT → unpack sequence
199 # ===========================================================================
200
201 def test_e1_three_step_sequence_in_order(
202 tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
203 ) -> None:
204 """_run_mpack_path must call presign, then PUT, then unpack in that order."""
205 root = _bare_repo(tmp_path, monkeypatch)
206 commit = _make_commit(root, "e1")
207 (heads_dir(root) / "main").write_text(commit.commit_id)
208
209 transport = _push_transport()
210 client = _CapturingClient(result_head=commit.commit_id)
211
212 result = _run_push_cli(monkeypatch, transport, client)
213 assert result.exit_code == 0, result.output
214
215 # Verify all three steps happened
216 presign_posts = [url for url, _ in client.posts if "mpack-presign" in url]
217 unpack_posts = [url for url, _ in client.posts if "unpack-mpack" in url]
218 puts = [url for url, _ in client.puts]
219
220 assert presign_posts, "POST to /push/mpack-presign missing"
221 assert puts, "PUT to MinIO missing"
222 assert unpack_posts, "POST to /push/unpack-mpack missing"
223
224 # Order: presign first, then PUT, then unpack (use events for true call order)
225 presign_idx = next(i for i, (u, _) in enumerate(client.events) if "mpack-presign" in u)
226 put_idx = next(i for i, (_, t) in enumerate(client.events) if t == "put")
227 unpack_idx = next(i for i, (u, _) in enumerate(client.events) if "unpack-mpack" in u)
228
229 assert presign_idx < put_idx < unpack_idx, (
230 f"Sequence wrong: presign@{presign_idx} put@{put_idx} unpack@{unpack_idx}"
231 )
232
233
234 # ===========================================================================
235 # E2 — mpack_key in presign body matches sha256 of wire bytes
236 # ===========================================================================
237
238 def test_e2_presign_key_matches_put_bytes(
239 tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
240 ) -> None:
241 """The mpack_key in the presign request must equal sha256(wire_bytes PUT to MinIO)."""
242 import hashlib
243
244 root = _bare_repo(tmp_path, monkeypatch)
245 commit = _make_commit(root, "e2", content=b"key integrity test")
246 (heads_dir(root) / "main").write_text(commit.commit_id)
247
248 transport = _push_transport()
249 client = _CapturingClient(result_head=commit.commit_id)
250 result = _run_push_cli(monkeypatch, transport, client)
251 assert result.exit_code == 0
252
253 presign_body = _unpack_body_of(client.posts, "mpack-presign")
254 put_bytes = client.puts[0][1]
255
256 expected_key = "sha256:" + hashlib.sha256(put_bytes).hexdigest()
257 assert presign_body["mpack_key"] == expected_key, (
258 f"presign mpack_key={presign_body['mpack_key'][:30]} "
259 f"does not match sha256 of PUT bytes={expected_key[:30]}"
260 )
261
262
263 # ===========================================================================
264 # V1-port — second push sends commits_count=1 (have filter works)
265 # ===========================================================================
266
267 def test_v1_second_push_commits_count_is_one(
268 tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
269 ) -> None:
270 """Second push: remote has c1, local has c2 → unpack body has commits_count=1."""
271 root = _bare_repo(tmp_path, monkeypatch)
272 c1 = _make_commit(root, "v1-first", content=b"v1")
273 c2 = _make_commit(root, "v1-second", parent_id=c1.commit_id, content=b"v2")
274 (heads_dir(root) / "main").write_text(c2.commit_id)
275
276 # Remote already has c1 (tracking ref + live remote info)
277 transport = _push_transport(remote_heads={"main": c1.commit_id})
278 client = _CapturingClient(result_head=c2.commit_id)
279 result = _run_push_cli(monkeypatch, transport, client)
280 assert result.exit_code == 0, result.output
281
282 unpack_body = _unpack_body_of(client.posts, "unpack-mpack")
283 assert unpack_body["commits_count"] == 1, (
284 f"Second push must send commits_count=1, got {unpack_body['commits_count']}. "
285 "Bug: have filter dropped the ancestor anchor so BFS walked all commits."
286 )
287
288
289 # ===========================================================================
290 # V2-port — first push (empty remote) sends all commits
291 # ===========================================================================
292
293 def test_v2_first_push_sends_all_commits(
294 tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
295 ) -> None:
296 """When the remote is empty, unpack body has commits_count equal to full history."""
297 root = _bare_repo(tmp_path, monkeypatch)
298 c1 = _make_commit(root, "v2-a", content=b"a")
299 c2 = _make_commit(root, "v2-b", parent_id=c1.commit_id, content=b"b")
300 (heads_dir(root) / "main").write_text(c2.commit_id)
301
302 transport = _push_transport(remote_heads={}) # remote has nothing
303 client = _CapturingClient(result_head=c2.commit_id)
304 result = _run_push_cli(monkeypatch, transport, client)
305 assert result.exit_code == 0, result.output
306
307 unpack_body = _unpack_body_of(client.posts, "unpack-mpack")
308 assert unpack_body["commits_count"] == 2, (
309 f"Full history push must send commits_count=2, got {unpack_body['commits_count']}"
310 )
311
312
313 # ===========================================================================
314 # V3-port — live GET /refs anchors BFS even without tracking ref
315 # ===========================================================================
316
317 def test_v3_live_refs_anchors_bfs(
318 tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
319 ) -> None:
320 """Even without a local tracking ref, GET /refs result anchors the BFS."""
321 root = _bare_repo(tmp_path, monkeypatch)
322 c1 = _make_commit(root, "v3-base", content=b"base")
323 c2 = _make_commit(root, "v3-new", parent_id=c1.commit_id, content=b"new")
324 (heads_dir(root) / "main").write_text(c2.commit_id)
325 # No tracking ref written — only live remote info has c1
326
327 transport = _push_transport(remote_heads={"main": c1.commit_id})
328 client = _CapturingClient(result_head=c2.commit_id)
329 result = _run_push_cli(monkeypatch, transport, client)
330 assert result.exit_code == 0, result.output
331
332 unpack_body = _unpack_body_of(client.posts, "unpack-mpack")
333 assert unpack_body["commits_count"] == 1, (
334 f"Live /refs anchor must limit BFS to 1 new commit, got {unpack_body['commits_count']}"
335 )
336
337
338 # ===========================================================================
339 # VII1-port — two sequential pushes; second has commits_count=1
340 # ===========================================================================
341
342 def test_vii1_push_twice_second_sends_one_commit(
343 tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
344 ) -> None:
345 """Full two-push sequence: first sends N commits; second sends exactly 1."""
346 root = _bare_repo(tmp_path, monkeypatch)
347 n = 5
348 ids: list[str] = []
349 parent: str | None = None
350 for i in range(n):
351 c = _make_commit(root, f"chain-{i}", parent_id=parent, content=f"v{i}".encode())
352 ids.append(c.commit_id)
353 parent = c.commit_id
354 (heads_dir(root) / "main").write_text(ids[-1])
355
356 # First push — remote is empty
357 t1 = _push_transport(remote_heads={})
358 c1_client = _CapturingClient(result_head=ids[-1])
359 r1 = _run_push_cli(monkeypatch, t1, c1_client)
360 assert r1.exit_code == 0, r1.output
361 unpack1 = _unpack_body_of(c1_client.posts, "unpack-mpack")
362 assert unpack1["commits_count"] == n, (
363 f"First push must send all {n} commits, got {unpack1['commits_count']}"
364 )
365
366 # Simulate tracking ref updated by set_remote_head
367 origin_dir = remotes_dir(root) / "origin"
368 origin_dir.mkdir(parents=True, exist_ok=True)
369 (origin_dir / "main").write_text(ids[-1])
370
371 # Add one new commit
372 c_new = _make_commit(root, "one-more", parent_id=ids[-1], content=b"new")
373 (heads_dir(root) / "main").write_text(c_new.commit_id)
374
375 # Second push — remote has ids[-1]
376 t2 = _push_transport(remote_heads={"main": ids[-1]})
377 c2_client = _CapturingClient(result_head=c_new.commit_id)
378 r2 = _run_push_cli(monkeypatch, t2, c2_client)
379 assert r2.exit_code == 0, r2.output
380
381 unpack2 = _unpack_body_of(c2_client.posts, "unpack-mpack")
382 assert unpack2["commits_count"] == 1, (
383 f"Second push must send commits_count=1, got {unpack2['commits_count']}. "
384 "Regression: have filter dropped the ancestor anchor."
385 )
386
387
388 # ===========================================================================
389 # E6 — non-merge commit: branch_have is single remote head
390 # ===========================================================================
391
392 def test_e6_non_merge_branch_have_is_single_head(
393 tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
394 ) -> None:
395 """A non-merge commit: branch_have contains only the direct remote branch head."""
396 from unittest.mock import call
397 root = _bare_repo(tmp_path, monkeypatch)
398 c1 = _make_commit(root, "e6-base", content=b"base")
399 c2 = _make_commit(root, "e6-tip", parent_id=c1.commit_id, content=b"tip")
400 (heads_dir(root) / "main").write_text(c2.commit_id)
401
402 # Remote has both main and dev; local tip is a simple (non-merge) commit
403 transport = _push_transport(remote_heads={
404 "main": c1.commit_id,
405 "dev": c1.commit_id,
406 })
407 client = _CapturingClient(result_head=c2.commit_id)
408
409 walk_calls: list[_WalkCall] = []
410 from muse.cli.commands import push as _push_mod
411 original_walk = _push_mod.walk_commits
412
413 def _capture_walk(
414 root_arg: pathlib.Path,
415 tips: list[str],
416 *,
417 have: list[str] | None,
418 ) -> "_WalkResult":
419 walk_calls.append({"tips": tips, "have": have})
420 return original_walk(root_arg, tips, have=have)
421
422 with patch.object(_push_mod, "walk_commits", side_effect=_capture_walk):
423 _run_push_cli(monkeypatch, transport, client)
424
425 # The BFS walk that counts commits uses _branch_have (from branch_have param in _push_mpack).
426 # For a non-merge commit: branch_have = [remote_head of the target branch only]
427 branch_walks = [w for w in walk_calls if w["tips"] == [c2.commit_id]]
428 assert branch_walks, f"Expected a walk with tip={c2.commit_id[:16]}"
429 # The have list for the main BFS should contain c1 (remote main head)
430 # and NOT the dev head (no P2 optimisation for non-merge commits)
431 main_walk = branch_walks[0]
432 assert c1.commit_id in main_walk["have"], "Remote main head must be in have"
433
434
435 # ===========================================================================
436 # E7 — merge commit: branch_have includes ALL remote branch heads (P2 opt)
437 # ===========================================================================
438
439 def test_e7_merge_commit_branch_have_includes_all_remote_heads(
440 tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
441 ) -> None:
442 """Gap 6: when the local tip is a merge commit, branch_have must include all
443 remote branch heads so the BFS stops at any already-remote ancestor on P2's chain."""
444 root = _bare_repo(tmp_path, monkeypatch)
445 # Build: main-base → feature-tip; merge commit has both as parents
446 base = _make_commit(root, "e7-base", content=b"base")
447 feature = _make_commit(root, "e7-feature", parent_id=base.commit_id, content=b"feature")
448 merge = _make_commit(
449 root, "e7-merge",
450 parent_id=base.commit_id,
451 parent2_id=feature.commit_id,
452 content=b"merge",
453 )
454 (heads_dir(root) / "main").write_text(merge.commit_id)
455
456 remote_feature_head = feature.commit_id
457 remote_main_head = base.commit_id
458
459 transport = _push_transport(remote_heads={
460 "main": remote_main_head,
461 "feature": remote_feature_head,
462 })
463 client = _CapturingClient(result_head=merge.commit_id)
464
465 walk_calls: list[_WalkCall] = []
466 from muse.cli.commands import push as _push_mod
467 original_walk = _push_mod.walk_commits
468
469 def _capture_walk(
470 root_arg: pathlib.Path,
471 tips: list[str],
472 *,
473 have: list[str] | None,
474 ) -> "_WalkResult":
475 walk_calls.append({"tips": list(tips), "have": list(have) if have else have})
476 return original_walk(root_arg, tips, have=have)
477
478 with patch.object(_push_mod, "walk_commits", side_effect=_capture_walk):
479 _run_push_cli(monkeypatch, transport, client)
480
481 # The first walk in _push_mpack uses branch_have = [remote_main_head] + all other heads
482 # for merge commits (to stop BFS on P2's chain)
483 merge_tip_walks = [w for w in walk_calls if merge.commit_id in w["tips"]]
484 assert merge_tip_walks, "Expected a walk with merge commit tip"
485
486 main_walk_have = merge_tip_walks[0]["have"]
487 assert remote_main_head in main_walk_have, (
488 "branch_have must include remote main head for merge commit"
489 )
490 assert remote_feature_head in main_walk_have, (
491 "branch_have must include feature branch head (P2 optimisation) for merge commit. "
492 "Fix: add all remote_branch_heads.values() to branch_have when local tip is a merge."
493 )
File History 5 commits
sha256:fe844c2411edd1cec3d4c847f36a96c6ccd4e3d7d1a715106d2ecd64216bf94f fix: bare object detection and read recovery; rm adapter files Sonnet 4.6 minor 15 days ago
sha256:79ffe87f5fe2ec146e35f05521218bbf54dffdb0440c07f970bad05f16efb89f chore: merge main — carry all urllib/typing/test fixes from dev Sonnet 4.6 minor 20 days ago
sha256:0bea7600d1eee83e87950be49933b1006fa9dc2c71e7c4ee748d324f61138156 chore: bump version to 0.2.0rc11; fix typing audit violatio… Sonnet 4.6 minor 20 days ago
sha256:7e95b29f2d502ad5eccf2a57af4092763a2e705f1bf1569a8cb7e063b6e6d5bd refactor: replace httpx with stdlib urllib in push path Sonnet 4.6 minor 22 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 23 days ago