test_push_delta_only_parent_manifest.py
file-level
1
files
1
commits
0
hotspots
0
🧊 dead
0
💥 blast risk
| 1 | """TDD — wire-push must not persist an empty/incomplete manifest when a pushed |
| 2 | snapshot's parent is a delta-only external snapshot. |
| 3 | |
| 4 | Root cause (musehub_wire_push.py :: wire_push_unpack_mpack base resolution): |
| 5 | external parent manifests are loaded at push.py:598-605 but ONLY kept when their |
| 6 | `manifest_blob` is non-NULL. A delta-only external parent (manifest_blob=None) is |
| 7 | therefore absent from `_parent_snap_manifests`, and push.py:634-635 falls back to |
| 8 | `_base = {}`. Applying the child's delta onto an empty base yields an incomplete |
| 9 | manifest, which is persisted and no longer reproduces the snapshot_id. On clone |
| 10 | the snapshot is rejected, its commit is dropped, and every descendant fails with |
| 11 | "parent not in mpack" — an empty working tree. |
| 12 | |
| 13 | This is the staging gabriel/muse clone failure: snapshots 708d5734 / 3d5ae8b5 / |
| 14 | edd649a9 were stored with entry_count=0. The bulk-push ladder (a single push) |
| 15 | never created the trigger, so localhost cloned clean. The minimal topology that |
| 16 | DOES trigger it is two pushes where a commit branches off a *middle* snapshot: |
| 17 | |
| 18 | push 1 (main): A -> B -> C # B is a middle snapshot => stored delta-only |
| 19 | push 2 (feat): B -> D # D's parent snapshot is the delta-only B |
| 20 | |
| 21 | INVARIANT under test: a snapshot the server stores with a manifest_blob must hold |
| 22 | the snapshot's complete file set. RED before the fix (D's manifest is missing the |
| 23 | files inherited from B); GREEN after. |
| 24 | |
| 25 | Integration test against localhost (musehub @ :1337, postgres @ :5434), matching |
| 26 | the repo's existing push-test convention (real `muse` CLI, real DB assertions). |
| 27 | """ |
| 28 | from __future__ import annotations |
| 29 | |
| 30 | import asyncio |
| 31 | import json |
| 32 | import subprocess |
| 33 | from collections.abc import Iterator |
| 34 | from pathlib import Path |
| 35 | |
| 36 | import msgpack |
| 37 | import pytest |
| 38 | from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine |
| 39 | from sqlalchemy.orm import sessionmaker |
| 40 | |
| 41 | from muse.core.ids import hash_snapshot |
| 42 | from musehub.db.musehub_repo_models import MusehubCommit, MusehubSnapshot |
| 43 | from musehub.types.json_types import StrDict |
| 44 | |
| 45 | HUB = "https://localhost:1337" |
| 46 | DB_URL = "postgresql+asyncpg://musehub:musehub@localhost:5434/musehub" |
| 47 | REPO_ROOT = Path(__file__).parent.parent |
| 48 | |
| 49 | |
| 50 | def muse(*args: str, cwd: Path, timeout: int = 90) -> subprocess.CompletedProcess: |
| 51 | return subprocess.run( |
| 52 | ["muse", *args], cwd=str(cwd), capture_output=True, text=True, timeout=timeout |
| 53 | ) |
| 54 | |
| 55 | |
| 56 | def muse_check(*args: str, cwd: Path, timeout: int = 90) -> str: |
| 57 | r = muse(*args, cwd=cwd, timeout=timeout) |
| 58 | if r.returncode != 0: |
| 59 | raise RuntimeError(f"muse {' '.join(args)} failed (rc={r.returncode}):\n{r.stderr[:600]}") |
| 60 | return r.stdout |
| 61 | |
| 62 | |
| 63 | def _commit_id_by_message(repo: Path, message: str) -> str: |
| 64 | commits = json.loads(muse_check("log", "--json", cwd=repo))["commits"] |
| 65 | for c in commits: |
| 66 | if (c.get("message") or "").strip() == message: |
| 67 | return c["commit_id"] |
| 68 | raise AssertionError( |
| 69 | f"no commit with message {message!r}; saw {[ (c.get('message') or '').strip() for c in commits]}" |
| 70 | ) |
| 71 | |
| 72 | |
| 73 | async def _stored_snapshot_for_commit(commit_id: str) -> tuple[str, StrDict | None, list[str], int]: |
| 74 | """(snapshot_id, manifest dict|None, directories, entry_count) for a commit's snapshot.""" |
| 75 | engine = create_async_engine(DB_URL) |
| 76 | Session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) |
| 77 | try: |
| 78 | async with Session() as s: |
| 79 | commit = await s.get(MusehubCommit, commit_id) |
| 80 | assert commit is not None, f"commit {commit_id[:18]} not found on server" |
| 81 | snap_id = commit.snapshot_id |
| 82 | assert snap_id, f"commit {commit_id[:18]} has no snapshot_id" |
| 83 | snap = await s.get(MusehubSnapshot, snap_id) |
| 84 | assert snap is not None, f"snapshot {snap_id[:18]} not found on server" |
| 85 | manifest = ( |
| 86 | dict(msgpack.unpackb(snap.manifest_blob, raw=False)) |
| 87 | if snap.manifest_blob is not None |
| 88 | else None |
| 89 | ) |
| 90 | return snap_id, manifest, list(snap.directories or []), snap.entry_count |
| 91 | finally: |
| 92 | await engine.dispose() |
| 93 | |
| 94 | |
| 95 | @pytest.fixture |
| 96 | def hub_repo(tmp_path: Path) -> Iterator[str]: |
| 97 | """Fresh empty (--no-init) hub repo; deleted after the test.""" |
| 98 | name = f"test-delta-parent-{tmp_path.name[-6:]}" |
| 99 | out = muse_check( |
| 100 | "hub", "repo", "create", "--name", name, |
| 101 | "--visibility", "public", "--no-init", "--hub", HUB, "--json", |
| 102 | cwd=REPO_ROOT, |
| 103 | ) |
| 104 | slug = f"gabriel/{json.loads(out)['slug']}" |
| 105 | yield slug |
| 106 | muse("hub", "repo", "delete", slug, "--yes", "--hub", HUB, "--json", cwd=REPO_ROOT) |
| 107 | |
| 108 | |
| 109 | def test_child_of_delta_only_parent_keeps_complete_manifest(tmp_path: Path, hub_repo: str) -> None: |
| 110 | repo = tmp_path / "seed" |
| 111 | repo.mkdir() |
| 112 | muse_check("init", cwd=repo) |
| 113 | |
| 114 | # push 1 (main): A -> B -> C. B is a middle snapshot => server stores it delta-only. |
| 115 | for fname, msg in [("f1.txt", "A"), ("f2.txt", "B"), ("f3.txt", "C")]: |
| 116 | (repo / fname).write_text(f"{msg}\n") |
| 117 | muse_check("code", "add", ".", cwd=repo) |
| 118 | muse_check("commit", "-m", msg, "--agent-id", "test", "--model-id", "test", cwd=repo) |
| 119 | muse_check("remote", "add", "origin", f"{HUB}/{hub_repo}", cwd=repo) |
| 120 | muse_check("push", "origin", "main", cwd=repo) |
| 121 | |
| 122 | # push 2 (feat): branch off the MIDDLE commit B, then add D. |
| 123 | b_commit = _commit_id_by_message(repo, "B") |
| 124 | muse_check("branch", "feat", b_commit, cwd=repo) |
| 125 | muse_check("checkout", "feat", cwd=repo) |
| 126 | assert not (repo / "f3.txt").exists(), ( |
| 127 | "precondition: checkout to feat@B must restore the working tree to B (no f3.txt)" |
| 128 | ) |
| 129 | (repo / "f4.txt").write_text("D\n") |
| 130 | muse_check("code", "add", ".", cwd=repo) |
| 131 | muse_check("commit", "-m", "D", "--agent-id", "test", "--model-id", "test", cwd=repo) |
| 132 | muse_check("push", "origin", "feat", cwd=repo) |
| 133 | |
| 134 | # D's snapshot must inherit f1+f2 from B and add f4. |
| 135 | d_commit = _commit_id_by_message(repo, "D") |
| 136 | snap_id, manifest, directories, entry_count = asyncio.run(_stored_snapshot_for_commit(d_commit)) |
| 137 | |
| 138 | assert manifest is not None, "D's snapshot was stored with a NULL manifest_blob" |
| 139 | stored_paths = set(manifest) |
| 140 | # The inherited files (f1, f2 from the delta-only parent B) must NOT be dropped, |
| 141 | # and D's own file (f4) must be present. Subset check tolerates muse-init scaffolding |
| 142 | # files (.museattributes/.museignore) that are also tracked. |
| 143 | must_have = {"f1.txt", "f2.txt", "f4.txt"} |
| 144 | assert must_have <= stored_paths, ( |
| 145 | f"D's stored manifest dropped files inherited from the delta-only parent B " |
| 146 | f"(base resolved to empty).\n" |
| 147 | f" stored = {sorted(stored_paths)} (entry_count={entry_count})\n" |
| 148 | f" must_have = {sorted(must_have)}\n" |
| 149 | f" missing = {sorted(must_have - stored_paths)}" |
| 150 | ) |
| 151 | # And the stored manifest must reproduce the snapshot_id (no silent corruption). |
| 152 | assert hash_snapshot(manifest, directories) == snap_id, ( |
| 153 | f"stored manifest does not reproduce snapshot_id {snap_id[:18]} " |
| 154 | f"(paths={sorted(stored_paths)} dirs={directories})" |
| 155 | ) |