"""Phase 1 TDD — issue #61: clone after push from copytree fails with empty mpack. Contract being specified: - `muse remote remove ` must purge all tracking refs for that remote - After `muse push` from a copytree'd repo, the server must hold the objects - `muse clone` immediately after such a push must succeed All three tests are expected to FAIL until the bug is fixed. Repro sequence (mirrors _bench_fetch_or_pull exactly): shutil.copytree(seed) → remote remove → remote add → push → clone """ from __future__ import annotations import json import shutil import subprocess import tempfile from pathlib import Path import pytest pytestmark = pytest.mark.skip(reason="muse wire protocol in flux") HUB = "https://localhost:1337" REPO_ROOT = Path(__file__).parent.parent # ── helpers ─────────────────────────────────────────────────────────────────── def muse(*args: str, cwd: Path, timeout: int = 60) -> subprocess.CompletedProcess: return subprocess.run( ["muse"] + list(args), cwd=str(cwd), capture_output=True, text=True, timeout=timeout, ) def muse_check(*args: str, cwd: Path, timeout: int = 60) -> str: r = muse(*args, cwd=cwd, timeout=timeout) if r.returncode != 0: raise RuntimeError(f"muse {' '.join(args)} failed:\n{r.stderr[:400]}") return r.stdout # ── fixtures ────────────────────────────────────────────────────────────────── @pytest.fixture def seed_repo(tmp_path: Path) -> Path: """A minimal muse repo with one commit — no remotes configured.""" repo = tmp_path / "seed" repo.mkdir() muse_check("init", cwd=repo) (repo / "words.txt").write_text( "abandon ability able about above absent absorb abstract absurd abuse\n" * 20 ) muse_check("code", "add", ".", cwd=repo) muse_check( "commit", "-m", "initial", "--agent-id", "test", "--model-id", "test", cwd=repo, ) return repo @pytest.fixture def hub_repo(tmp_path: Path) -> None: """Create a fresh hub repo, yield its full slug, delete after the test.""" name = f"test-issue-61-probe-{tmp_path.name[-6:]}" out = muse_check( "hub", "repo", "create", "--name", name, "--visibility", "public", "--hub", HUB, "--json", cwd=REPO_ROOT, ) slug = f"gabriel/{json.loads(out)['slug']}" yield slug muse("hub", "repo", "delete", slug, "--yes", "--hub", HUB, "--json", cwd=REPO_ROOT) # ── Phase 2 pre-req: tracking ref hygiene ──────────────────────────────────── class TestRemoteRemoveClearsTrackingRefs: def test_tracking_ref_absent_before_push(self, seed_repo: Path, hub_repo: str) -> None: """Baseline: no tracking refs exist before any remote is configured.""" remotes_dir = seed_repo / ".muse" / "remotes" assert not remotes_dir.exists() or not any(remotes_dir.iterdir()), ( "fresh repo must have no tracking refs" ) def test_push_creates_tracking_ref(self, seed_repo: Path, hub_repo: str) -> None: """After push, a tracking ref for origin/main must exist.""" muse_check("remote", "add", "origin", f"{HUB}/{hub_repo}", cwd=seed_repo) muse_check("push", "origin", "main", cwd=seed_repo) tracking_ref = seed_repo / ".muse" / "remotes" / "origin" / "main" assert tracking_ref.exists(), ( "push must write a tracking ref at .muse/remotes/origin/main" ) def test_remote_remove_clears_tracking_refs(self, seed_repo: Path, hub_repo: str) -> None: """After `muse remote remove origin`, the tracking ref directory must be gone.""" muse_check("remote", "add", "origin", f"{HUB}/{hub_repo}", cwd=seed_repo) muse_check("push", "origin", "main", cwd=seed_repo) muse_check("remote", "remove", "origin", cwd=seed_repo) tracking_dir = seed_repo / ".muse" / "remotes" / "origin" assert not tracking_dir.exists(), ( "`muse remote remove origin` must delete .muse/remotes/origin/ — " "stale tracking refs cause push from copytree to send 0 objects" ) # ── Phase 1: server-side object storage ────────────────────────────────────── class TestCloneAfterPushFromCopytree: """The core bug: clone after push from a shutil.copytree'd repo returns empty mpack.""" def _push_from_copy(self, seed_repo: Path, hub_repo: str, tmp_path: Path) -> Path: """Copy seed, wire new remote, push. Returns the copy path.""" copy = tmp_path / "copy" shutil.copytree(str(seed_repo), str(copy), symlinks=False) muse("remote", "remove", "origin", cwd=copy) # no-op if absent; ignore rc muse_check("remote", "add", "origin", f"{HUB}/{hub_repo}", cwd=copy) muse_check("push", "origin", "main", cwd=copy) return copy def test_push_from_copytree_exits_zero( self, seed_repo: Path, hub_repo: str, tmp_path: Path ) -> None: """Push from a copytree'd repo must exit 0.""" copy = tmp_path / "copy" shutil.copytree(str(seed_repo), str(copy), symlinks=False) muse("remote", "remove", "origin", cwd=copy) muse_check("remote", "add", "origin", f"{HUB}/{hub_repo}", cwd=copy) r = muse("push", "origin", "main", cwd=copy) assert r.returncode == 0, f"push from copytree must exit 0:\n{r.stderr}" def test_server_has_branch_after_push_from_copytree( self, seed_repo: Path, hub_repo: str, tmp_path: Path ) -> None: """After push, ls-remote must report a non-null main branch on the server.""" self._push_from_copy(seed_repo, hub_repo, tmp_path) # Use a throw-away local repo to run ls-remote — avoids polluting seed probe = tmp_path / "probe" probe.mkdir() muse_check("init", cwd=probe) muse_check("remote", "add", "origin", f"{HUB}/{hub_repo}", cwd=probe) out = muse_check("ls-remote", "origin", "--json", cwd=probe) branches = json.loads(out).get("branches", {}) assert "main" in branches and branches["main"], ( f"server must have a main branch after push from copytree — " f"ls-remote returned: {branches}" ) def test_clone_after_push_from_copytree_succeeds( self, seed_repo: Path, hub_repo: str, tmp_path: Path ) -> None: """Clone immediately after push from copytree must exit 0. This is the bug. Expected to FAIL until the root cause is fixed. sha256:e3b0c44 = SHA256(b'') — server returns a zero-byte fetch mpack. """ self._push_from_copy(seed_repo, hub_repo, tmp_path) clone_dir = tmp_path / "clone" clone_dir.mkdir() r = muse("clone", f"{HUB}/{hub_repo}", cwd=clone_dir) assert r.returncode == 0, ( f"clone after push from copytree must succeed — got empty mpack:\n{r.stderr}" ) def test_cloned_repo_has_correct_commit_count( self, seed_repo: Path, hub_repo: str, tmp_path: Path ) -> None: """The cloned repo must have the same number of commits as the source.""" self._push_from_copy(seed_repo, hub_repo, tmp_path) clone_dir = tmp_path / "clone" clone_dir.mkdir() muse_check("clone", f"{HUB}/{hub_repo}", cwd=clone_dir) slug_name = hub_repo.split("/")[-1] cloned = clone_dir / slug_name src_commits = json.loads(muse_check("log", "--json", cwd=seed_repo))["commits"] clone_commits = json.loads(muse_check("log", "--json", cwd=cloned))["commits"] assert len(clone_commits) == len(src_commits), ( f"clone must have {len(src_commits)} commit(s), got {len(clone_commits)}" ) # ── Root cause: commit dedup across repos ──────────────────────────────────── class TestCommitDedupAcrossRepos: """Pins the root cause: musehub_commits.commit_id is the sole PK. When Repo A pushes commit sha256:X, the row is stored with repo_id=A. When Repo B pushes the identical commit, ON CONFLICT DO NOTHING skips the insert. The row stays repo_id=A. The fetch BFS for Repo B queries WHERE commit_id=sha256:X AND repo_id=B — finds nothing — returns empty mpack. The fix must ensure commits pushed to Repo B are visible to Repo B's fetch BFS regardless of which repo first stored the commit. """ @pytest.fixture def hub_repo_b(self, tmp_path: Path) -> None: """A second hub repo for the cross-repo dedup test.""" name = f"test-issue-61-repo-b-{tmp_path.name[-6:]}" out = muse_check( "hub", "repo", "create", "--name", name, "--visibility", "public", "--hub", HUB, "--json", cwd=REPO_ROOT, ) slug = f"gabriel/{json.loads(out)['slug']}" yield slug muse("hub", "repo", "delete", slug, "--yes", "--hub", HUB, "--json", cwd=REPO_ROOT) def test_clone_second_repo_after_same_commits_pushed_to_first( self, seed_repo: Path, hub_repo: str, hub_repo_b: str, tmp_path: Path ) -> None: """Push identical commits to two repos — both must be cloneable. This is the exact bench scenario: bench-seed-xs ← pushed first (Repo A) bench-fetch-xs-0-xxx ← pushed second with same content (Repo B) Repo B clone fails because musehub_commits stores the commit with repo_id=A and the fetch BFS filters WHERE repo_id=B. """ # Push to Repo A first (simulates ensure_hub_seed / bench-seed-xs) muse_check("remote", "add", "origin", f"{HUB}/{hub_repo}", cwd=seed_repo) muse_check("push", "origin", "main", cwd=seed_repo) # Push same commits to Repo B (simulates the bench fetch/pull run repo) copy = tmp_path / "copy" shutil.copytree(str(seed_repo), str(copy), symlinks=False) muse("remote", "remove", "origin", cwd=copy) muse_check("remote", "add", "origin", f"{HUB}/{hub_repo_b}", cwd=copy) muse_check("push", "origin", "main", cwd=copy) # Clone Repo B — this is the failing case clone_dir = tmp_path / "clone" clone_dir.mkdir() r = muse("clone", f"{HUB}/{hub_repo_b}", cwd=clone_dir) assert r.returncode == 0, ( "clone of Repo B must succeed when Repo A already holds the same commits.\n" "Root cause: musehub_commits.commit_id is a sole PK — the second push is " "silently skipped and Repo B's fetch BFS finds zero commits.\n" f"stderr: {r.stderr}" )