gabriel / musehub public
test_push_clone_roundtrip.py python
306 lines 11.3 KB
Raw
sha256:eead4146ec6c9905a097a89f8dbffa3d5c5e8ef9c1acd0e8a5b2a93b0084d273 Mpack content-addressability Human 13 hours ago
1 """TDD — push → clone round-trip correctness, built from first principles.
2
3 Each tier proves one layer works before the next builds on it.
4
5 Tier 1 — Storage layer
6 T1a: put_mpack/get_mpack round-trip: sha256(get(key)) == key
7 T1b: presigned PUT / get_mpack round-trip: sha256(get(key)) == key
8 T1c: put_mpack with real MUSE-format bytes round-trips correctly
9
10 Tier 2 — Wire push: one commit, one file
11 T2: push a repo with 1 commit, 1 file → server accepts → mpack_index populated
12
13 Tier 3 — Wire fetch after push
14 T3: fetch after push returns the blob that was pushed
15
16 Tier 4 — Full clone
17 T4: clone after push produces a working tree with correct file content
18
19 Run with:
20 python3 -m pytest tests/test_push_clone_roundtrip.py -v --tb=short
21 """
22 from __future__ import annotations
23
24 import hashlib
25 import json
26 import secrets
27 import subprocess
28 import urllib.request
29 from pathlib import Path
30
31 import pytest
32
33 # ── MinIO / hub constants ──────────────────────────────────────────────────────
34
35 MINIO_ENDPOINT = "http://localhost:9000"
36 MINIO_BUCKET = "muse-objects"
37 MINIO_ACCESS_KEY = "minioadmin"
38 MINIO_SECRET_KEY = "minioadmin"
39 HUB = "https://localhost:1337"
40 REPO_ROOT = Path(__file__).parent.parent
41
42
43 # ── reachability guards ────────────────────────────────────────────────────────
44
45 def _minio_up() -> bool:
46 try:
47 urllib.request.urlopen(f"{MINIO_ENDPOINT}/minio/health/live", timeout=2)
48 return True
49 except Exception:
50 return False
51
52
53 def _hub_up() -> bool:
54 try:
55 import ssl
56 ctx = ssl.create_default_context()
57 ctx.check_hostname = False
58 ctx.verify_mode = ssl.CERT_NONE
59 urllib.request.urlopen(f"{HUB}/healthz", context=ctx, timeout=3)
60 return True
61 except Exception:
62 return False
63
64
65 requires_minio = pytest.mark.skipif(not _minio_up(), reason="MinIO not reachable at localhost:9000")
66 requires_hub = pytest.mark.skipif(not _hub_up(), reason="Hub not reachable at localhost:1337")
67
68
69 # ── storage backend helper ─────────────────────────────────────────────────────
70
71 def _backend():
72 from musehub.storage.backends import BlobBackend
73 return BlobBackend(
74 bucket=MINIO_BUCKET,
75 endpoint_url=MINIO_ENDPOINT,
76 public_endpoint_url=MINIO_ENDPOINT,
77 access_key_id=MINIO_ACCESS_KEY,
78 secret_access_key=MINIO_SECRET_KEY,
79 region="us-east-1",
80 )
81
82
83 def _sha256_key(data: bytes) -> str:
84 return "sha256:" + hashlib.sha256(data).hexdigest()
85
86
87 # ── muse subprocess helper ─────────────────────────────────────────────────────
88
89 def muse(*args: str, cwd: Path, timeout: int = 120, check: bool = True) -> subprocess.CompletedProcess:
90 r = subprocess.run(
91 ["muse"] + list(args),
92 cwd=str(cwd), capture_output=True, text=True, timeout=timeout,
93 )
94 if check and r.returncode != 0:
95 raise AssertionError(
96 f"muse {' '.join(args)} failed (exit {r.returncode}):\n"
97 f"stdout: {r.stdout[:600]}\nstderr: {r.stderr[:600]}"
98 )
99 return r
100
101
102 # ── Tier 1: Storage layer ──────────────────────────────────────────────────────
103
104 class TestStorageRoundTrip:
105 """Tier 1: prove the mpack storage layer is content-addressed."""
106
107 @requires_minio
108 async def test_t1a_put_mpack_get_mpack(self) -> None:
109 """put_mpack(key, data) → get_mpack(key) must return data where sha256==key."""
110 backend = _backend()
111 data = secrets.token_bytes(256)
112 key = _sha256_key(data)
113
114 await backend.put_mpack(key, data)
115 result = await backend.get_mpack(key)
116
117 assert result is not None, "get_mpack returned None"
118 assert _sha256_key(result) == key, (
119 f"sha256(result)={_sha256_key(result)[:30]} ≠ key={key[:30]}"
120 )
121 assert result == data
122
123 @requires_minio
124 async def test_t1b_presign_put_get_mpack(self) -> None:
125 """Presigned PUT → get_mpack must return data where sha256==key.
126
127 This is the exact path used by muse push: client PUTs to presigned URL,
128 server later reads via get_mpack. Any key-encoding mismatch surfaces here.
129 """
130 backend = _backend()
131 data = secrets.token_bytes(256)
132 key = _sha256_key(data)
133
134 upload_url = await backend.presign_mpack_put(key, ttl_seconds=300)
135
136 req = urllib.request.Request(
137 upload_url, data=data, method="PUT",
138 headers={"Content-Type": "application/x-muse-pack"},
139 )
140 with urllib.request.urlopen(req) as resp:
141 assert resp.status == 200, f"presigned PUT → HTTP {resp.status}"
142
143 result = await backend.get_mpack(key)
144
145 assert result is not None, (
146 f"get_mpack returned None after presigned PUT\n"
147 f"upload_url={upload_url}"
148 )
149 assert _sha256_key(result) == key, (
150 f"sha256(result)={_sha256_key(result)[:30]} ≠ key={key[:30]}\n"
151 f"upload_url={upload_url}"
152 )
153
154 @requires_minio
155 @pytest.mark.parametrize("size_mb", [0.001, 0.1, 1, 10, 50])
156 async def test_t1b_presign_put_large_payload(self, size_mb: float) -> None:
157 """Presigned PUT → get_mpack round-trip at increasing sizes.
158
159 Production mpacks range from ~100 KB to ~400 MB. A bug that only
160 manifests above a certain size will show up here.
161 """
162 backend = _backend()
163 data = secrets.token_bytes(int(size_mb * 1024 * 1024))
164 key = _sha256_key(data)
165
166 upload_url = await backend.presign_mpack_put(key, ttl_seconds=300)
167
168 req = urllib.request.Request(
169 upload_url, data=data, method="PUT",
170 headers={"Content-Type": "application/x-muse-pack"},
171 )
172 with urllib.request.urlopen(req) as resp:
173 assert resp.status == 200, f"presigned PUT ({size_mb} MB) → HTTP {resp.status}"
174
175 result = await backend.get_mpack(key)
176
177 assert result is not None, f"get_mpack returned None for {size_mb} MB payload"
178 assert _sha256_key(result) == key, (
179 f"{size_mb} MB: sha256(result)={_sha256_key(result)[:30]} ≠ key={key[:30]}"
180 )
181
182 @requires_minio
183 async def test_t1c_real_muse_mpack_format(self) -> None:
184 """A real MUSE-format mpack round-trips correctly through put/get."""
185 from muse.core.mpack import build_wire_mpack
186
187 # Minimal valid mpack payload
188 mpack = {
189 "commits": [],
190 "snapshots": [],
191 "blobs": [{"object_id": _sha256_key(b"hello"), "content": b"hello"}],
192 "tags": [],
193 }
194 data = build_wire_mpack(mpack)
195 key = _sha256_key(data)
196
197 backend = _backend()
198 await backend.put_mpack(key, data)
199 result = await backend.get_mpack(key)
200
201 assert result is not None
202 assert _sha256_key(result) == key
203 assert result == data
204
205
206 # ── Tier 2: Wire push ──────────────────────────────────────────────────────────
207
208 @pytest.fixture
209 def tmp_repo(tmp_path: Path) -> Path:
210 """Fresh muse repo with 3 commits and varied file content."""
211 repo = tmp_path / "src"
212 repo.mkdir()
213 muse("init", cwd=repo)
214
215 # commit 1
216 (repo / "hello.txt").write_text("hello world\n")
217 (repo / "data.bin").write_bytes(secrets.token_bytes(1024))
218 (repo / "notes.txt").write_text("line one\nline two\nline three\n")
219 muse("code", "add", ".", cwd=repo)
220 muse("commit", "-m", "initial commit",
221 "--agent-id", "test", "--model-id", "test", cwd=repo)
222
223 # commit 2 — modify one file, add one
224 (repo / "hello.txt").write_text("hello world v2\n")
225 (repo / "extra.bin").write_bytes(secrets.token_bytes(512))
226 muse("code", "add", ".", cwd=repo)
227 muse("commit", "-m", "second commit",
228 "--agent-id", "test", "--model-id", "test", cwd=repo)
229
230 # commit 3 — delete one file, modify another
231 (repo / "notes.txt").write_text("updated notes\n")
232 muse("code", "add", ".", cwd=repo)
233 muse("commit", "-m", "third commit",
234 "--agent-id", "test", "--model-id", "test", cwd=repo)
235
236 return repo
237
238
239 @pytest.fixture
240 def hub_repo(tmp_path: Path):
241 """Create a fresh hub repo, yield its slug, delete after test."""
242 name = f"roundtrip-probe-{tmp_path.name[-8:]}"
243 out = muse(
244 "-C", str(REPO_ROOT), "hub", "repo", "create",
245 "--name", name, "--visibility", "public", "--no-init",
246 "--hub", HUB, "--json", cwd=REPO_ROOT,
247 )
248 slug = json.loads(out.stdout)["slug"]
249 full = f"gabriel/{slug}"
250 yield full
251 muse(
252 "-C", str(REPO_ROOT), "hub", "repo", "delete", full,
253 "--yes", "--hub", HUB, "--json", cwd=REPO_ROOT,
254 check=False,
255 )
256
257
258 @requires_hub
259 class TestPushCloneRoundTrip:
260 """Tier 2–4: push a repo, clone it, verify every file matches exactly."""
261
262 def test_t2_push_succeeds(self, tmp_repo: Path, hub_repo: str) -> None:
263 """muse push must complete without error."""
264 hub_url = f"{HUB}/{hub_repo}"
265 muse("remote", "add", "origin", hub_url, cwd=tmp_repo)
266 muse("push", "origin", "main", cwd=tmp_repo, timeout=180)
267
268 def test_t3_clone_succeeds(self, tmp_repo: Path, hub_repo: str, tmp_path: Path) -> None:
269 """muse clone after push must complete without error."""
270 hub_url = f"{HUB}/{hub_repo}"
271 muse("remote", "add", "origin", hub_url, cwd=tmp_repo)
272 muse("push", "origin", "main", cwd=tmp_repo, timeout=180)
273
274 clone_dir = tmp_path / "clone"
275 muse("-C", str(REPO_ROOT), "clone", hub_url, str(clone_dir),
276 cwd=REPO_ROOT, timeout=180)
277
278 assert clone_dir.exists(), "clone directory was not created"
279
280 def test_t4_clone_files_match_source(self, tmp_repo: Path, hub_repo: str, tmp_path: Path) -> None:
281 """Every file in the cloned working tree must match the source exactly."""
282 hub_url = f"{HUB}/{hub_repo}"
283 muse("remote", "add", "origin", hub_url, cwd=tmp_repo)
284 muse("push", "origin", "main", cwd=tmp_repo, timeout=180)
285
286 clone_dir = tmp_path / "clone"
287 muse("-C", str(REPO_ROOT), "clone", hub_url, str(clone_dir),
288 cwd=REPO_ROOT, timeout=180)
289
290 # Every file tracked in the source must exist in the clone with identical content
291 source_files = {
292 p.relative_to(tmp_repo): p.read_bytes()
293 for p in tmp_repo.rglob("*")
294 if p.is_file() and ".muse" not in p.parts
295 }
296 assert source_files, "no source files found — fixture broken"
297
298 mismatches = []
299 for rel, expected in source_files.items():
300 cloned = clone_dir / rel
301 if not cloned.exists():
302 mismatches.append(f"MISSING: {rel}")
303 elif cloned.read_bytes() != expected:
304 mismatches.append(f"WRONG CONTENT: {rel}")
305
306 assert not mismatches, "Clone → source mismatches:\n" + "\n".join(mismatches)
File History 1 commit
sha256:eead4146ec6c9905a097a89f8dbffa3d5c5e8ef9c1acd0e8a5b2a93b0084d273 Mpack content-addressability Human 13 hours ago