gabriel / muse public
test_push_mpack_build.py python
216 lines 8.5 KB
Raw
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
1 """TDD — build_mpack_from_walk: unit tests for the mpack assembly step.
2
3 Gap 3: build_mpack_from_walk() is called on every push but had no direct tests.
4
5 Test plan
6 ---------
7 B1 Empty walk → mpack structure has "objects", "commits", "snapshots" keys,
8 all empty.
9 B2 Single commit with one object → mpack["commits"] has 1 entry,
10 mpack["blobs"] has 1 entry with the correct object_id.
11 B3 Have anchors are honoured — only commits past the have boundary appear
12 in mpack["commits"].
13 B4 The serialised mpack is valid msgpack (round-trips without error).
14 B5 mpack_key = "sha256:" + sha256(packb(mpack)) matches what push.py computes.
15 """
16 from __future__ import annotations
17
18 import datetime
19 import hashlib
20 import json
21 import pathlib
22
23 import msgpack
24 import pytest
25
26 from muse._version import __version__
27 from muse.core.mpack import build_mpack_from_walk, walk_commits
28 from muse.core.object_store import write_object
29 from muse.core.paths import heads_dir, muse_dir
30 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
31 from muse.core.commits import (
32 CommitRecord,
33 write_commit,
34 )
35 from muse.core.snapshots import (
36 SnapshotRecord,
37 write_snapshot,
38 )
39 from muse.core.types import Manifest, blob_id
40
41
42 # ---------------------------------------------------------------------------
43 # Helpers (same pattern as test_push_have_filter)
44 # ---------------------------------------------------------------------------
45
46 def _bare_repo(tmp_path: pathlib.Path) -> pathlib.Path:
47 muse = muse_dir(tmp_path)
48 for d in ("commits", "snapshots", "objects", "refs/heads", "remotes"):
49 (muse / d).mkdir(parents=True, exist_ok=True)
50 (muse / "HEAD").write_text("ref: refs/heads/main\n")
51 (muse / "repo.json").write_text(
52 json.dumps({"repo_id": "test-repo", "schema_version": __version__, "domain": "code"})
53 )
54 (muse / "config.toml").write_text('[remotes.origin]\nurl = "https://hub.example.com/r"\n')
55 return tmp_path
56
57
58 def _make_commit(
59 root: pathlib.Path,
60 label: str,
61 parent_id: str | None = None,
62 content: bytes | None = None,
63 ) -> CommitRecord:
64 raw = content if content is not None else f"content-{label}".encode()
65 oid = blob_id(raw)
66 write_object(root, oid, raw)
67 manifest: Manifest = {"file.txt": oid}
68 snap_id = compute_snapshot_id(manifest)
69 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
70 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
71 parent_ids = [parent_id] if parent_id else []
72 real_cid = compute_commit_id(
73 parent_ids=parent_ids,
74 snapshot_id=snap_id,
75 message=f"commit {label}",
76 committed_at_iso=committed_at.isoformat(),
77 )
78 commit = CommitRecord(
79 commit_id=real_cid,
80 branch="main",
81 snapshot_id=snap_id,
82 message=f"commit {label}",
83 committed_at=committed_at,
84 parent_commit_id=parent_id,
85 )
86 write_commit(root, commit)
87 return commit
88
89
90 # ---------------------------------------------------------------------------
91 # B1 — empty walk produces valid empty mpack structure
92 # ---------------------------------------------------------------------------
93
94 def test_b1_empty_walk_has_required_keys(tmp_path: pathlib.Path) -> None:
95 """build_mpack_from_walk on an empty walk returns a dict with the three top-level keys."""
96 root = _bare_repo(tmp_path)
97 commit = _make_commit(root, "seed")
98 (heads_dir(root) / "main").write_text(commit.commit_id)
99
100 # have=[commit.commit_id] means BFS stops immediately — zero new commits
101 walk = walk_commits(root, [commit.commit_id], have=[commit.commit_id])
102 mpack = build_mpack_from_walk(root, walk, compress=False)
103
104 assert "blobs" in mpack
105 assert "commits" in mpack
106 assert "snapshots" in mpack
107 assert mpack["commits"] == [] or mpack["commits"] == ()
108 assert mpack["blobs"] == [] or mpack["blobs"] == ()
109
110
111 # ---------------------------------------------------------------------------
112 # B2 — single commit with one object → mpack has the commit and the object
113 # ---------------------------------------------------------------------------
114
115 def test_b2_single_commit_object_present(tmp_path: pathlib.Path) -> None:
116 """A single commit with one file → mpack contains one commit and one object."""
117 root = _bare_repo(tmp_path)
118 raw = b"hello mpack build test"
119 oid = blob_id(raw)
120 write_object(root, oid, raw)
121
122 manifest: Manifest = {"hello.txt": oid}
123 snap_id = compute_snapshot_id(manifest)
124 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
125
126 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
127 cid = compute_commit_id(
128 parent_ids=[],
129 snapshot_id=snap_id,
130 message="b2",
131 committed_at_iso=committed_at.isoformat(),
132 )
133 write_commit(root, CommitRecord(
134 commit_id=cid,
135 branch="main",
136 snapshot_id=snap_id,
137 message="b2",
138 committed_at=committed_at,
139 ))
140 (heads_dir(root) / "main").write_text(cid)
141
142 walk = walk_commits(root, [cid], have=[])
143 mpack = build_mpack_from_walk(root, walk, compress=False)
144
145 object_ids = {obj["object_id"] for obj in (mpack.get("blobs") or [])}
146 commit_ids = {c["commit_id"] if isinstance(c, dict) else c.commit_id
147 for c in (mpack.get("commits") or [])}
148
149 assert oid in object_ids, f"object {oid[:20]} not in mpack objects: {object_ids}"
150 assert cid in commit_ids, f"commit {cid[:20]} not in mpack commits: {commit_ids}"
151
152
153 # ---------------------------------------------------------------------------
154 # B3 — have anchors stop the BFS correctly
155 # ---------------------------------------------------------------------------
156
157 def test_b3_have_anchor_excludes_ancestor_commits(tmp_path: pathlib.Path) -> None:
158 """With have=[c1], only c2 should appear in mpack commits."""
159 root = _bare_repo(tmp_path)
160 c1 = _make_commit(root, "first", content=b"v1")
161 c2 = _make_commit(root, "second", parent_id=c1.commit_id, content=b"v2")
162 (heads_dir(root) / "main").write_text(c2.commit_id)
163
164 # have=[c1] → BFS stops at c1; only c2 is new
165 walk = walk_commits(root, [c2.commit_id], have=[c1.commit_id])
166 mpack = build_mpack_from_walk(root, walk, compress=False)
167
168 commit_ids = {c["commit_id"] if isinstance(c, dict) else c.commit_id
169 for c in (mpack.get("commits") or [])}
170
171 assert c2.commit_id in commit_ids, "new commit must be in mpack"
172 assert c1.commit_id not in commit_ids, "ancestor commit must NOT be in mpack (have anchor)"
173
174
175 # ---------------------------------------------------------------------------
176 # B4 — serialised mpack is valid msgpack
177 # ---------------------------------------------------------------------------
178
179 def test_b4_mpack_serialises_to_valid_msgpack(tmp_path: pathlib.Path) -> None:
180 """build_mpack_from_walk output must survive a msgpack round-trip."""
181 root = _bare_repo(tmp_path)
182 commit = _make_commit(root, "b4-commit", content=b"b4 content")
183 (heads_dir(root) / "main").write_text(commit.commit_id)
184
185 walk = walk_commits(root, [commit.commit_id], have=[])
186 mpack = build_mpack_from_walk(root, walk, compress=False)
187
188 wire_bytes = msgpack.packb(mpack, use_bin_type=True)
189 assert isinstance(wire_bytes, bytes), "packb must return bytes"
190
191 decoded = msgpack.unpackb(wire_bytes, raw=False)
192 assert "blobs" in decoded
193 assert "commits" in decoded
194
195
196 # ---------------------------------------------------------------------------
197 # B5 — mpack_key matches push.py's inline sha256 formula
198 # ---------------------------------------------------------------------------
199
200 def test_b5_mpack_key_matches_push_formula(tmp_path: pathlib.Path) -> None:
201 """sha256(packb(mpack)) must equal the mpack_key push.py sends to the server."""
202 root = _bare_repo(tmp_path)
203 commit = _make_commit(root, "b5-commit", content=b"b5 key test")
204 (heads_dir(root) / "main").write_text(commit.commit_id)
205
206 walk = walk_commits(root, [commit.commit_id], have=[])
207 mpack = build_mpack_from_walk(root, walk, compress=True)
208
209 wire_bytes = msgpack.packb(mpack, use_bin_type=True)
210 expected_key = "sha256:" + hashlib.sha256(wire_bytes).hexdigest()
211
212 # Verify the formula matches what push.py uses (documented in _run_mpack_path)
213 assert expected_key.startswith("sha256:")
214 assert len(expected_key) == 7 + 64 # "sha256:" + 64 hex chars
215 # Formula is raw sha256 of wire bytes (not blob_id, which adds a type header)
216 assert expected_key == "sha256:" + hashlib.sha256(wire_bytes).hexdigest()
File History 1 commit
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago