gabriel / musehub public

test_push_delta_only_parent_manifest.py file-level

at sha256:4 · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 💥 blast risk
sha256:9 docs: document all 6 HD path segments; fix deep link anchors Three new… · gabriel · Jun 17, 2026
1 """TDD — wire-push must not persist an empty/incomplete manifest when a pushed
2 snapshot's parent is a delta-only external snapshot.
3
4 Root cause (musehub_wire_push.py :: wire_push_unpack_mpack base resolution):
5 external parent manifests are loaded at push.py:598-605 but ONLY kept when their
6 `manifest_blob` is non-NULL. A delta-only external parent (manifest_blob=None) is
7 therefore absent from `_parent_snap_manifests`, and push.py:634-635 falls back to
8 `_base = {}`. Applying the child's delta onto an empty base yields an incomplete
9 manifest, which is persisted and no longer reproduces the snapshot_id. On clone
10 the snapshot is rejected, its commit is dropped, and every descendant fails with
11 "parent not in mpack" — an empty working tree.
12
13 This is the staging gabriel/muse clone failure: snapshots 708d5734 / 3d5ae8b5 /
14 edd649a9 were stored with entry_count=0. The bulk-push ladder (a single push)
15 never created the trigger, so localhost cloned clean. The minimal topology that
16 DOES trigger it is two pushes where a commit branches off a *middle* snapshot:
17
18 push 1 (main): A -> B -> C # B is a middle snapshot => stored delta-only
19 push 2 (feat): B -> D # D's parent snapshot is the delta-only B
20
21 INVARIANT under test: a snapshot the server stores with a manifest_blob must hold
22 the snapshot's complete file set. RED before the fix (D's manifest is missing the
23 files inherited from B); GREEN after.
24
25 Integration test against localhost (musehub @ :1337, postgres @ :5434), matching
26 the repo's existing push-test convention (real `muse` CLI, real DB assertions).
27 """
28 from __future__ import annotations
29
30 import asyncio
31 import json
32 import subprocess
33 from collections.abc import Iterator
34 from pathlib import Path
35
36 import msgpack
37 import pytest
38 from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
39 from sqlalchemy.orm import sessionmaker
40
41 from muse.core.ids import hash_snapshot
42 from musehub.db.musehub_repo_models import MusehubCommit, MusehubSnapshot
43 from musehub.types.json_types import StrDict
44
45 HUB = "https://localhost:1337"
46 DB_URL = "postgresql+asyncpg://musehub:musehub@localhost:5434/musehub"
47 REPO_ROOT = Path(__file__).parent.parent
48
49
50 def muse(*args: str, cwd: Path, timeout: int = 90) -> subprocess.CompletedProcess:
51 return subprocess.run(
52 ["muse", *args], cwd=str(cwd), capture_output=True, text=True, timeout=timeout
53 )
54
55
56 def muse_check(*args: str, cwd: Path, timeout: int = 90) -> str:
57 r = muse(*args, cwd=cwd, timeout=timeout)
58 if r.returncode != 0:
59 raise RuntimeError(f"muse {' '.join(args)} failed (rc={r.returncode}):\n{r.stderr[:600]}")
60 return r.stdout
61
62
63 def _commit_id_by_message(repo: Path, message: str) -> str:
64 commits = json.loads(muse_check("log", "--json", cwd=repo))["commits"]
65 for c in commits:
66 if (c.get("message") or "").strip() == message:
67 return c["commit_id"]
68 raise AssertionError(
69 f"no commit with message {message!r}; saw {[ (c.get('message') or '').strip() for c in commits]}"
70 )
71
72
73 async def _stored_snapshot_for_commit(commit_id: str) -> tuple[str, StrDict | None, list[str], int]:
74 """(snapshot_id, manifest dict|None, directories, entry_count) for a commit's snapshot."""
75 engine = create_async_engine(DB_URL)
76 Session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
77 try:
78 async with Session() as s:
79 commit = await s.get(MusehubCommit, commit_id)
80 assert commit is not None, f"commit {commit_id[:18]} not found on server"
81 snap_id = commit.snapshot_id
82 assert snap_id, f"commit {commit_id[:18]} has no snapshot_id"
83 snap = await s.get(MusehubSnapshot, snap_id)
84 assert snap is not None, f"snapshot {snap_id[:18]} not found on server"
85 manifest = (
86 dict(msgpack.unpackb(snap.manifest_blob, raw=False))
87 if snap.manifest_blob is not None
88 else None
89 )
90 return snap_id, manifest, list(snap.directories or []), snap.entry_count
91 finally:
92 await engine.dispose()
93
94
95 @pytest.fixture
96 def hub_repo(tmp_path: Path) -> Iterator[str]:
97 """Fresh empty (--no-init) hub repo; deleted after the test."""
98 name = f"test-delta-parent-{tmp_path.name[-6:]}"
99 out = muse_check(
100 "hub", "repo", "create", "--name", name,
101 "--visibility", "public", "--no-init", "--hub", HUB, "--json",
102 cwd=REPO_ROOT,
103 )
104 slug = f"gabriel/{json.loads(out)['slug']}"
105 yield slug
106 muse("hub", "repo", "delete", slug, "--yes", "--hub", HUB, "--json", cwd=REPO_ROOT)
107
108
109 def test_child_of_delta_only_parent_keeps_complete_manifest(tmp_path: Path, hub_repo: str) -> None:
110 repo = tmp_path / "seed"
111 repo.mkdir()
112 muse_check("init", cwd=repo)
113
114 # push 1 (main): A -> B -> C. B is a middle snapshot => server stores it delta-only.
115 for fname, msg in [("f1.txt", "A"), ("f2.txt", "B"), ("f3.txt", "C")]:
116 (repo / fname).write_text(f"{msg}\n")
117 muse_check("code", "add", ".", cwd=repo)
118 muse_check("commit", "-m", msg, "--agent-id", "test", "--model-id", "test", cwd=repo)
119 muse_check("remote", "add", "origin", f"{HUB}/{hub_repo}", cwd=repo)
120 muse_check("push", "origin", "main", cwd=repo)
121
122 # push 2 (feat): branch off the MIDDLE commit B, then add D.
123 b_commit = _commit_id_by_message(repo, "B")
124 muse_check("branch", "feat", b_commit, cwd=repo)
125 muse_check("checkout", "feat", cwd=repo)
126 assert not (repo / "f3.txt").exists(), (
127 "precondition: checkout to feat@B must restore the working tree to B (no f3.txt)"
128 )
129 (repo / "f4.txt").write_text("D\n")
130 muse_check("code", "add", ".", cwd=repo)
131 muse_check("commit", "-m", "D", "--agent-id", "test", "--model-id", "test", cwd=repo)
132 muse_check("push", "origin", "feat", cwd=repo)
133
134 # D's snapshot must inherit f1+f2 from B and add f4.
135 d_commit = _commit_id_by_message(repo, "D")
136 snap_id, manifest, directories, entry_count = asyncio.run(_stored_snapshot_for_commit(d_commit))
137
138 assert manifest is not None, "D's snapshot was stored with a NULL manifest_blob"
139 stored_paths = set(manifest)
140 # The inherited files (f1, f2 from the delta-only parent B) must NOT be dropped,
141 # and D's own file (f4) must be present. Subset check tolerates muse-init scaffolding
142 # files (.museattributes/.museignore) that are also tracked.
143 must_have = {"f1.txt", "f2.txt", "f4.txt"}
144 assert must_have <= stored_paths, (
145 f"D's stored manifest dropped files inherited from the delta-only parent B "
146 f"(base resolved to empty).\n"
147 f" stored = {sorted(stored_paths)} (entry_count={entry_count})\n"
148 f" must_have = {sorted(must_have)}\n"
149 f" missing = {sorted(must_have - stored_paths)}"
150 )
151 # And the stored manifest must reproduce the snapshot_id (no silent corruption).
152 assert hash_snapshot(manifest, directories) == snap_id, (
153 f"stored manifest does not reproduce snapshot_id {snap_id[:18]} "
154 f"(paths={sorted(stored_paths)} dirs={directories})"
155 )