"""TDD — wire-push must not persist an empty/incomplete manifest when a pushed snapshot's parent is a delta-only external snapshot. Root cause (musehub_wire_push.py :: wire_push_unpack_mpack base resolution): external parent manifests are loaded at push.py:598-605 but ONLY kept when their `manifest_blob` is non-NULL. A delta-only external parent (manifest_blob=None) is therefore absent from `_parent_snap_manifests`, and push.py:634-635 falls back to `_base = {}`. Applying the child's delta onto an empty base yields an incomplete manifest, which is persisted and no longer reproduces the snapshot_id. On clone the snapshot is rejected, its commit is dropped, and every descendant fails with "parent not in mpack" — an empty working tree. This is the staging gabriel/muse clone failure: snapshots 708d5734 / 3d5ae8b5 / edd649a9 were stored with entry_count=0. The bulk-push ladder (a single push) never created the trigger, so localhost cloned clean. The minimal topology that DOES trigger it is two pushes where a commit branches off a *middle* snapshot: push 1 (main): A -> B -> C # B is a middle snapshot => stored delta-only push 2 (feat): B -> D # D's parent snapshot is the delta-only B INVARIANT under test: a snapshot the server stores with a manifest_blob must hold the snapshot's complete file set. RED before the fix (D's manifest is missing the files inherited from B); GREEN after. Integration test against localhost (musehub @ :1337, postgres @ :5434), matching the repo's existing push-test convention (real `muse` CLI, real DB assertions). """ from __future__ import annotations import asyncio import json import subprocess from pathlib import Path import msgpack import pytest from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine from sqlalchemy.orm import sessionmaker from muse.core.ids import hash_snapshot from musehub.db.musehub_repo_models import MusehubCommit, MusehubSnapshot HUB = "https://localhost:1337" DB_URL = "postgresql+asyncpg://musehub:musehub@localhost:5434/musehub" REPO_ROOT = Path(__file__).parent.parent def muse(*args: str, cwd: Path, timeout: int = 90) -> subprocess.CompletedProcess: return subprocess.run( ["muse", *args], cwd=str(cwd), capture_output=True, text=True, timeout=timeout ) def muse_check(*args: str, cwd: Path, timeout: int = 90) -> str: r = muse(*args, cwd=cwd, timeout=timeout) if r.returncode != 0: raise RuntimeError(f"muse {' '.join(args)} failed (rc={r.returncode}):\n{r.stderr[:600]}") return r.stdout def _commit_id_by_message(repo: Path, message: str) -> str: commits = json.loads(muse_check("log", "--json", cwd=repo))["commits"] for c in commits: if (c.get("message") or "").strip() == message: return c["commit_id"] raise AssertionError( f"no commit with message {message!r}; saw {[ (c.get('message') or '').strip() for c in commits]}" ) async def _stored_snapshot_for_commit(commit_id: str): """(snapshot_id, manifest dict|None, directories, entry_count) for a commit's snapshot.""" engine = create_async_engine(DB_URL) Session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) try: async with Session() as s: commit = await s.get(MusehubCommit, commit_id) assert commit is not None, f"commit {commit_id[:18]} not found on server" snap_id = commit.snapshot_id assert snap_id, f"commit {commit_id[:18]} has no snapshot_id" snap = await s.get(MusehubSnapshot, snap_id) assert snap is not None, f"snapshot {snap_id[:18]} not found on server" manifest = ( dict(msgpack.unpackb(snap.manifest_blob, raw=False)) if snap.manifest_blob is not None else None ) return snap_id, manifest, list(snap.directories or []), snap.entry_count finally: await engine.dispose() @pytest.fixture def hub_repo(tmp_path: Path): """Fresh empty (--no-init) hub repo; deleted after the test.""" name = f"test-delta-parent-{tmp_path.name[-6:]}" out = muse_check( "hub", "repo", "create", "--name", name, "--visibility", "public", "--no-init", "--hub", HUB, "--json", cwd=REPO_ROOT, ) slug = f"gabriel/{json.loads(out)['slug']}" yield slug muse("hub", "repo", "delete", slug, "--yes", "--hub", HUB, "--json", cwd=REPO_ROOT) def test_child_of_delta_only_parent_keeps_complete_manifest(tmp_path: Path, hub_repo: str) -> None: repo = tmp_path / "seed" repo.mkdir() muse_check("init", cwd=repo) # push 1 (main): A -> B -> C. B is a middle snapshot => server stores it delta-only. for fname, msg in [("f1.txt", "A"), ("f2.txt", "B"), ("f3.txt", "C")]: (repo / fname).write_text(f"{msg}\n") muse_check("code", "add", ".", cwd=repo) muse_check("commit", "-m", msg, "--agent-id", "test", "--model-id", "test", cwd=repo) muse_check("remote", "add", "origin", f"{HUB}/{hub_repo}", cwd=repo) muse_check("push", "origin", "main", cwd=repo) # push 2 (feat): branch off the MIDDLE commit B, then add D. b_commit = _commit_id_by_message(repo, "B") muse_check("branch", "feat", b_commit, cwd=repo) muse_check("checkout", "feat", cwd=repo) assert not (repo / "f3.txt").exists(), ( "precondition: checkout to feat@B must restore the working tree to B (no f3.txt)" ) (repo / "f4.txt").write_text("D\n") muse_check("code", "add", ".", cwd=repo) muse_check("commit", "-m", "D", "--agent-id", "test", "--model-id", "test", cwd=repo) muse_check("push", "origin", "feat", cwd=repo) # D's snapshot must inherit f1+f2 from B and add f4. d_commit = _commit_id_by_message(repo, "D") snap_id, manifest, directories, entry_count = asyncio.run(_stored_snapshot_for_commit(d_commit)) assert manifest is not None, "D's snapshot was stored with a NULL manifest_blob" stored_paths = set(manifest) # The inherited files (f1, f2 from the delta-only parent B) must NOT be dropped, # and D's own file (f4) must be present. Subset check tolerates muse-init scaffolding # files (.museattributes/.museignore) that are also tracked. must_have = {"f1.txt", "f2.txt", "f4.txt"} assert must_have <= stored_paths, ( f"D's stored manifest dropped files inherited from the delta-only parent B " f"(base resolved to empty).\n" f" stored = {sorted(stored_paths)} (entry_count={entry_count})\n" f" must_have = {sorted(must_have)}\n" f" missing = {sorted(must_have - stored_paths)}" ) # And the stored manifest must reproduce the snapshot_id (no silent corruption). assert hash_snapshot(manifest, directories) == snap_id, ( f"stored manifest does not reproduce snapshot_id {snap_id[:18]} " f"(paths={sorted(stored_paths)} dirs={directories})" )