gabriel / musehub public
test_push_delta_only_parent_manifest.py python
153 lines 6.8 KB
Raw
sha256:4669620efda9ff41c55bdefd1f7bfe1c239d468428744c84ead9957e5a003a53 merge: rescue snapshot-recovery hardening (c00aa21d) into d… Opus 4.8 minor ⚠ breaking 18 hours ago
1 """TDD — wire-push must not persist an empty/incomplete manifest when a pushed
2 snapshot's parent is a delta-only external snapshot.
3
4 Root cause (musehub_wire_push.py :: wire_push_unpack_mpack base resolution):
5 external parent manifests are loaded at push.py:598-605 but ONLY kept when their
6 `manifest_blob` is non-NULL. A delta-only external parent (manifest_blob=None) is
7 therefore absent from `_parent_snap_manifests`, and push.py:634-635 falls back to
8 `_base = {}`. Applying the child's delta onto an empty base yields an incomplete
9 manifest, which is persisted and no longer reproduces the snapshot_id. On clone
10 the snapshot is rejected, its commit is dropped, and every descendant fails with
11 "parent not in mpack" — an empty working tree.
12
13 This is the staging gabriel/muse clone failure: snapshots 708d5734 / 3d5ae8b5 /
14 edd649a9 were stored with entry_count=0. The bulk-push ladder (a single push)
15 never created the trigger, so localhost cloned clean. The minimal topology that
16 DOES trigger it is two pushes where a commit branches off a *middle* snapshot:
17
18 push 1 (main): A -> B -> C # B is a middle snapshot => stored delta-only
19 push 2 (feat): B -> D # D's parent snapshot is the delta-only B
20
21 INVARIANT under test: a snapshot the server stores with a manifest_blob must hold
22 the snapshot's complete file set. RED before the fix (D's manifest is missing the
23 files inherited from B); GREEN after.
24
25 Integration test against localhost (musehub @ :1337, postgres @ :5434), matching
26 the repo's existing push-test convention (real `muse` CLI, real DB assertions).
27 """
28 from __future__ import annotations
29
30 import asyncio
31 import json
32 import subprocess
33 from pathlib import Path
34
35 import msgpack
36 import pytest
37 from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
38 from sqlalchemy.orm import sessionmaker
39
40 from muse.core.ids import hash_snapshot
41 from musehub.db.musehub_repo_models import MusehubCommit, MusehubSnapshot
42
43 HUB = "https://localhost:1337"
44 DB_URL = "postgresql+asyncpg://musehub:musehub@localhost:5434/musehub"
45 REPO_ROOT = Path(__file__).parent.parent
46
47
48 def muse(*args: str, cwd: Path, timeout: int = 90) -> subprocess.CompletedProcess:
49 return subprocess.run(
50 ["muse", *args], cwd=str(cwd), capture_output=True, text=True, timeout=timeout
51 )
52
53
54 def muse_check(*args: str, cwd: Path, timeout: int = 90) -> str:
55 r = muse(*args, cwd=cwd, timeout=timeout)
56 if r.returncode != 0:
57 raise RuntimeError(f"muse {' '.join(args)} failed (rc={r.returncode}):\n{r.stderr[:600]}")
58 return r.stdout
59
60
61 def _commit_id_by_message(repo: Path, message: str) -> str:
62 commits = json.loads(muse_check("log", "--json", cwd=repo))["commits"]
63 for c in commits:
64 if (c.get("message") or "").strip() == message:
65 return c["commit_id"]
66 raise AssertionError(
67 f"no commit with message {message!r}; saw {[ (c.get('message') or '').strip() for c in commits]}"
68 )
69
70
71 async def _stored_snapshot_for_commit(commit_id: str):
72 """(snapshot_id, manifest dict|None, directories, entry_count) for a commit's snapshot."""
73 engine = create_async_engine(DB_URL)
74 Session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
75 try:
76 async with Session() as s:
77 commit = await s.get(MusehubCommit, commit_id)
78 assert commit is not None, f"commit {commit_id[:18]} not found on server"
79 snap_id = commit.snapshot_id
80 assert snap_id, f"commit {commit_id[:18]} has no snapshot_id"
81 snap = await s.get(MusehubSnapshot, snap_id)
82 assert snap is not None, f"snapshot {snap_id[:18]} not found on server"
83 manifest = (
84 dict(msgpack.unpackb(snap.manifest_blob, raw=False))
85 if snap.manifest_blob is not None
86 else None
87 )
88 return snap_id, manifest, list(snap.directories or []), snap.entry_count
89 finally:
90 await engine.dispose()
91
92
93 @pytest.fixture
94 def hub_repo(tmp_path: Path):
95 """Fresh empty (--no-init) hub repo; deleted after the test."""
96 name = f"test-delta-parent-{tmp_path.name[-6:]}"
97 out = muse_check(
98 "hub", "repo", "create", "--name", name,
99 "--visibility", "public", "--no-init", "--hub", HUB, "--json",
100 cwd=REPO_ROOT,
101 )
102 slug = f"gabriel/{json.loads(out)['slug']}"
103 yield slug
104 muse("hub", "repo", "delete", slug, "--yes", "--hub", HUB, "--json", cwd=REPO_ROOT)
105
106
107 def test_child_of_delta_only_parent_keeps_complete_manifest(tmp_path: Path, hub_repo: str) -> None:
108 repo = tmp_path / "seed"
109 repo.mkdir()
110 muse_check("init", cwd=repo)
111
112 # push 1 (main): A -> B -> C. B is a middle snapshot => server stores it delta-only.
113 for fname, msg in [("f1.txt", "A"), ("f2.txt", "B"), ("f3.txt", "C")]:
114 (repo / fname).write_text(f"{msg}\n")
115 muse_check("code", "add", ".", cwd=repo)
116 muse_check("commit", "-m", msg, "--agent-id", "test", "--model-id", "test", cwd=repo)
117 muse_check("remote", "add", "origin", f"{HUB}/{hub_repo}", cwd=repo)
118 muse_check("push", "origin", "main", cwd=repo)
119
120 # push 2 (feat): branch off the MIDDLE commit B, then add D.
121 b_commit = _commit_id_by_message(repo, "B")
122 muse_check("branch", "feat", b_commit, cwd=repo)
123 muse_check("checkout", "feat", cwd=repo)
124 assert not (repo / "f3.txt").exists(), (
125 "precondition: checkout to feat@B must restore the working tree to B (no f3.txt)"
126 )
127 (repo / "f4.txt").write_text("D\n")
128 muse_check("code", "add", ".", cwd=repo)
129 muse_check("commit", "-m", "D", "--agent-id", "test", "--model-id", "test", cwd=repo)
130 muse_check("push", "origin", "feat", cwd=repo)
131
132 # D's snapshot must inherit f1+f2 from B and add f4.
133 d_commit = _commit_id_by_message(repo, "D")
134 snap_id, manifest, directories, entry_count = asyncio.run(_stored_snapshot_for_commit(d_commit))
135
136 assert manifest is not None, "D's snapshot was stored with a NULL manifest_blob"
137 stored_paths = set(manifest)
138 # The inherited files (f1, f2 from the delta-only parent B) must NOT be dropped,
139 # and D's own file (f4) must be present. Subset check tolerates muse-init scaffolding
140 # files (.museattributes/.museignore) that are also tracked.
141 must_have = {"f1.txt", "f2.txt", "f4.txt"}
142 assert must_have <= stored_paths, (
143 f"D's stored manifest dropped files inherited from the delta-only parent B "
144 f"(base resolved to empty).\n"
145 f" stored = {sorted(stored_paths)} (entry_count={entry_count})\n"
146 f" must_have = {sorted(must_have)}\n"
147 f" missing = {sorted(must_have - stored_paths)}"
148 )
149 # And the stored manifest must reproduce the snapshot_id (no silent corruption).
150 assert hash_snapshot(manifest, directories) == snap_id, (
151 f"stored manifest does not reproduce snapshot_id {snap_id[:18]} "
152 f"(paths={sorted(stored_paths)} dirs={directories})"
153 )
File History 1 commit
sha256:4669620efda9ff41c55bdefd1f7bfe1c239d468428744c84ead9957e5a003a53 merge: rescue snapshot-recovery hardening (c00aa21d) into d… Opus 4.8 minor 18 hours ago