"""TDD — Phase 1: branch_have uses all remote branch heads unconditionally. Root cause (issue #56) ----------------------- ``branch_have`` — the BFS stop set passed to ``walk_commits`` / ``build_mpack`` — only contains the target branch's remote HEAD for non-merge commits. When pushing a second branch (e.g. ``dev`` after ``main``), the client should stop the DAG walk at any commit already on the remote, regardless of which remote branch contains it. Without this, the client walks back to the repository root and packs every commit the remote already holds under ``main``. Scenario -------- Repo history:: A ← B ← C ← D (main) ↑ E (dev, 1 commit ahead) Remote state after first push:: main → D Correct second push (dev):: branch_have = [D] # D is main's remote head — walk stops there new_commits = [E] # only E is new Current buggy second push (dev):: branch_have = [] # dev has no remote head yet → target head is null new_commits = [E, D, C, B, A] # entire history re-sent Coverage -------- BH-1 branch_have includes all remote branch heads, not just target branch. BH-2 Non-merge commit: walk stops at any remote branch head, not just target. BH-3 walk_commits BFS count equals expected new commits only. BH-4 build_mpack sends only genuinely new commits + objects. BH-5 mpack size is small (no redundant commits) on second-branch push. """ from __future__ import annotations import datetime import pathlib import pytest from muse.core.commits import write_commit, CommitRecord from muse.core.mpack import walk_commits, build_mpack_from_walk from muse.core.object_store import write_object from muse.core.refs import write_branch_ref from muse.core.snapshot import compute_commit_id, compute_snapshot_id from muse.core.snapshots import write_snapshot, SnapshotRecord # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- _TS = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) def _obj(root: pathlib.Path, content: bytes) -> str: from muse.core.types import blob_id oid = blob_id(content) write_object(root, oid, content) return oid def _snap(root: pathlib.Path, manifest: dict[str, str], dirs: list[str] | None = None) -> str: sid = compute_snapshot_id(manifest, dirs) write_snapshot(root, SnapshotRecord( snapshot_id=sid, manifest=manifest, directories=dirs or [], )) return sid def _commit( root: pathlib.Path, message: str, snapshot_id: str, parent: str | None = None, author: str = "test", ) -> str: cid = compute_commit_id( parent_ids=[parent] if parent else [], snapshot_id=snapshot_id, message=message, committed_at_iso=_TS.isoformat(), author=author, ) write_commit(root, CommitRecord( commit_id=cid, branch="main", snapshot_id=snapshot_id, message=message, committed_at=_TS, parent_commit_id=parent, author=author, )) return cid def _build_linear_chain(root: pathlib.Path, n: int) -> list[str]: """Build n commits A→B→…→N, each adding a file. Returns commit IDs oldest first.""" commits: list[str] = [] manifest: dict[str, str] = {} prev: str | None = None for i in range(n): content = f"file {i}".encode() oid = _obj(root, content) manifest[f"file{i}.txt"] = oid sid = _snap(root, dict(manifest)) cid = _commit(root, f"commit {i}", sid, parent=prev) commits.append(cid) prev = cid return commits # --------------------------------------------------------------------------- # BH-1 branch_have includes ALL remote branch heads # --------------------------------------------------------------------------- def test_BH1_branch_have_includes_all_remote_heads( tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch ) -> None: """BH-1: branch_have from _push_mpack includes all remote branch heads.""" from muse.cli.commands import push as push_mod # Simulate remote_branch_heads returned by fetch_remote_refs remote_heads = { "main": "sha256:" + "a" * 64, "dev": "sha256:" + "b" * 64, "feat": "sha256:" + "c" * 64, } local_head = "sha256:" + "d" * 64 push_branch = "newbranch" # The branch_have logic lives in the run() function. # We extract just the computation: all remote heads that are valid commit IDs. from muse.cli.commands.push import _is_valid_commit_id # Current (buggy) logic: only uses remote_head for target branch target_remote_head = remote_heads.get(push_branch) # None — new branch buggy_branch_have = ( [target_remote_head] if target_remote_head and _is_valid_commit_id(target_remote_head) else [] ) assert buggy_branch_have == [], "Buggy logic: empty when pushing new branch" # Fixed logic: use ALL remote heads unconditionally fixed_branch_have = [ h for h in remote_heads.values() if _is_valid_commit_id(h) ] assert len(fixed_branch_have) == 3, ( f"Fixed logic must include all {len(remote_heads)} remote heads, " f"got {len(fixed_branch_have)}" ) assert "sha256:" + "a" * 64 in fixed_branch_have assert "sha256:" + "b" * 64 in fixed_branch_have assert "sha256:" + "c" * 64 in fixed_branch_have # --------------------------------------------------------------------------- # BH-2 Non-merge push stops at remote branches other than target # --------------------------------------------------------------------------- def test_BH2_walk_stops_at_any_remote_head(tmp_path: pathlib.Path) -> None: """BH-2: walk_commits stops at a commit on a different remote branch.""" monkeypatch = None # not needed — we test walk_commits directly from muse.core.mpack import walk_commits as _walk root = tmp_path from muse.core.paths import init_repo_dirs as init_repo init_repo(root) # Build: A → B → C → D (main), D → E (dev) commits = _build_linear_chain(root, 5) # A B C D E A, B, C, D, E = commits # Simulate: main is at D on remote, dev has E locally but no remote head # Fixed branch_have: [D] (main's remote head) branch_have_fixed = [D] result = _walk(root, [E], have=branch_have_fixed) new_commit_ids = [c.commit_id for c in result["commits"]] # Only E should be new — D and below are already on remote (via main) assert E in new_commit_ids, "E (new dev commit) must be in walk result" assert D not in new_commit_ids, ( "D must NOT be in walk result — it's already on remote under main" ) assert len(new_commit_ids) == 1, ( f"Only 1 new commit (E), got {len(new_commit_ids)}: {new_commit_ids}" ) # --------------------------------------------------------------------------- # BH-3 Buggy logic walks entire history for new branch # --------------------------------------------------------------------------- def test_BH3_buggy_branch_have_walks_entire_history(tmp_path: pathlib.Path) -> None: """BH-3: Without the fix, pushing a new branch re-sends the entire DAG.""" from muse.core.mpack import walk_commits as _walk from muse.core.paths import init_repo_dirs as init_repo root = tmp_path init_repo(root) commits = _build_linear_chain(root, 5) A, B, C, D, E = commits # Buggy branch_have: [] (target branch has no remote head yet) branch_have_buggy: list[str] = [] result = _walk(root, [E], have=branch_have_buggy) new_commit_ids = [c.commit_id for c in result["commits"]] # All 5 commits are sent — the entire history assert len(new_commit_ids) == 5, ( f"Buggy logic must walk all 5 commits, got {len(new_commit_ids)}" ) # --------------------------------------------------------------------------- # BH-4 build_mpack_from_walk sends only new commits + objects # --------------------------------------------------------------------------- def test_BH4_build_mpack_only_contains_new_commits(tmp_path: pathlib.Path) -> None: """BH-4: With fixed branch_have, mpack only contains E's commit and objects.""" from muse.core.mpack import walk_commits as _walk, build_mpack_from_walk from muse.core.paths import init_repo_dirs as init_repo root = tmp_path init_repo(root) commits = _build_linear_chain(root, 5) A, B, C, D, E = commits branch_have_fixed = [D] result = _walk(root, [E], have=branch_have_fixed) mpack = build_mpack_from_walk(root, result) commit_ids_in_mpack = [c["commit_id"] if isinstance(c, dict) else c.commit_id for c in mpack.get("commits", [])] assert E in commit_ids_in_mpack assert D not in commit_ids_in_mpack, "D already on remote — must not be in mpack" assert len(commit_ids_in_mpack) == 1, ( f"Mpack must contain exactly 1 commit (E), got {len(commit_ids_in_mpack)}" ) # Blobs: only the object added in commit E (file4.txt), not earlier files blob_ids = [b["object_id"] for b in mpack.get("blobs", [])] assert len(blob_ids) <= 1, ( f"Mpack must contain at most 1 new blob (file4.txt), got {len(blob_ids)}" ) # --------------------------------------------------------------------------- # BH-5 Fixed push.py branch_have is unconditional for all commits # --------------------------------------------------------------------------- def test_BH5_push_run_uses_all_remote_heads_as_branch_have( tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch ) -> None: """BH-5: push.run() builds branch_have from ALL remote branch heads.""" from muse.core.paths import init_repo_dirs as init_repo from muse.cli.commands.push import _push_mpack, _is_valid_commit_id root = tmp_path init_repo(root) commits = _build_linear_chain(root, 3) A, B, C = commits write_branch_ref(root, "main", C) captured_branch_have: list[list[str]] = [] original_push_mpack = _push_mpack def _spy_push_mpack(*args: object, branch_have: list[str] | None = None, **kwargs: object) -> object: captured_branch_have.append(list(branch_have or [])) raise SystemExit(0) # abort early — we just need branch_have monkeypatch.setattr("muse.cli.commands.push._push_mpack", _spy_push_mpack) import argparse from unittest.mock import MagicMock, patch remote_heads = { "main": "sha256:" + "a" * 64, "staging": "sha256:" + "b" * 64, } mock_info = remote_heads # push.py accesses info["branch_heads"] — return a plain dict mock_transport = MagicMock() mock_transport.fetch_remote_info.return_value = {"branch_heads": remote_heads} with patch("muse.cli.commands.push.make_transport", return_value=mock_transport), \ patch("muse.cli.commands.push.get_remote", return_value="https://example.com/repo"), \ patch("muse.cli.commands.push.get_signing_identity", return_value=None), \ patch("muse.cli.commands.push.get_remote_head", return_value=None), \ patch("muse.cli.commands.push.require_repo", return_value=root), \ patch("muse.cli.commands.push.read_current_branch", return_value="newbranch"), \ patch("muse.cli.commands.push.get_head_commit_id", return_value=C): try: args = argparse.Namespace( remote="origin", branch=None, force=False, force_with_lease=False, dry_run=False, delete=False, json_out=False, upstream=False, workers=4, set_upstream_flag=False, ) from muse.cli.commands.push import run run(args) except SystemExit: pass assert captured_branch_have, "branch_have was never captured — spy not called" bh = captured_branch_have[0] # Fixed: ALL remote heads must be in branch_have for remote_head in remote_heads.values(): assert remote_head in bh, ( f"Fixed branch_have must include all remote heads. " f"Missing {remote_head[:20]}. Got: {[h[:20] for h in bh]}" )