"""TDD — push step 1: walk local DAG to find commits not on remote. Pseudocode (issue #57 step 1): if remote_head is null: new_commits = all commits reachable from local tip (topo sorted, ancestors first) else if remote_head == local_tip: nothing to push → exit else: new_commits = commits reachable from local tip, not reachable from remote_head (topo sorted, ancestors first) NOTE: use remote_head (target branch only) as the commit walk boundary; use full "have" set (all remote branch heads) for object dedup Coverage -------- A remote_head is null → all commits sent (first push to a new branch) B remote_head == local_tip → zero commits (already up to date) C remote_head behind local_tip → only the delta commits sent (normal incremental push) D commit walk boundary is remote_head only (not all of have) E object dedup uses full have set (all branch heads), not just remote_head F topo order: ancestors appear before descendants in new_commits G multi-branch: objects already on remote via another branch are not resent """ from __future__ import annotations import datetime import json import pathlib import pytest from muse._version import __version__ from muse.core.object_store import write_object from muse.core.mpack import walk_commits from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id from muse.core.commits import CommitRecord, write_commit from muse.core.snapshots import SnapshotRecord, write_snapshot from muse.core.types import blob_id from muse.core.paths import muse_dir # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- _TS = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) def _repo(tmp_path: pathlib.Path) -> pathlib.Path: muse = muse_dir(tmp_path) for d in ("objects", "refs/heads", "remotes"): (muse / d).mkdir(parents=True, exist_ok=True) (muse / "HEAD").write_text("ref: refs/heads/main\n") (muse / "repo.json").write_text( json.dumps({"repo_id": "test-repo", "schema_version": __version__, "domain": "code"}) ) return tmp_path def _obj(root: pathlib.Path, content: bytes) -> str: oid = blob_id(content) write_object(root, oid, content) return oid def _commit( root: pathlib.Path, manifest: dict[str, str], *, parent_id: str | None = None, message: str = "test", ) -> CommitRecord: snap_id = compute_snapshot_id(manifest) write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) parent_ids = [parent_id] if parent_id else [] cid = compute_commit_id( parent_ids=parent_ids, snapshot_id=snap_id, message=message, committed_at_iso=_TS.isoformat(), ) cr = CommitRecord( commit_id=cid, branch="main", snapshot_id=snap_id, message=message, committed_at=_TS, parent_commit_id=parent_id, ) write_commit(root, cr) return cr # --------------------------------------------------------------------------- # A — remote_head is null → send everything (first push) # --------------------------------------------------------------------------- class TestCaseA: def test_null_remote_head_sends_all_commits(self, tmp_path: pathlib.Path) -> None: root = _repo(tmp_path) o1 = _obj(root, b"file 1") o2 = _obj(root, b"file 2") c1 = _commit(root, {"a.txt": o1}) c2 = _commit(root, {"a.txt": o1, "b.txt": o2}, parent_id=c1.commit_id) # remote_head=null means have=[] walk = walk_commits(root, [c2.commit_id], have=[]) commit_ids = [c.commit_id for c in walk["commits"]] assert c1.commit_id in commit_ids assert c2.commit_id in commit_ids assert len(commit_ids) == 2 def test_null_remote_head_sends_all_objects(self, tmp_path: pathlib.Path) -> None: root = _repo(tmp_path) o1 = _obj(root, b"file 1") o2 = _obj(root, b"file 2") c1 = _commit(root, {"a.txt": o1}) c2 = _commit(root, {"a.txt": o1, "b.txt": o2}, parent_id=c1.commit_id) walk = walk_commits(root, [c2.commit_id], have=[]) assert o1 in walk["all_blob_ids"] assert o2 in walk["all_blob_ids"] # --------------------------------------------------------------------------- # B — remote_head == local_tip → nothing to send # --------------------------------------------------------------------------- class TestCaseB: def test_up_to_date_sends_zero_commits(self, tmp_path: pathlib.Path) -> None: root = _repo(tmp_path) o1 = _obj(root, b"file content") c1 = _commit(root, {"a.txt": o1}) # remote_head == local_tip → have=[c1] stops the walk immediately walk = walk_commits(root, [c1.commit_id], have=[c1.commit_id]) assert walk["commits"] == [] def test_up_to_date_sends_zero_objects(self, tmp_path: pathlib.Path) -> None: root = _repo(tmp_path) o1 = _obj(root, b"file content") c1 = _commit(root, {"a.txt": o1}) walk = walk_commits(root, [c1.commit_id], have=[c1.commit_id]) assert walk["all_blob_ids"] == [] # --------------------------------------------------------------------------- # C — remote_head behind local_tip → only delta commits sent # --------------------------------------------------------------------------- class TestCaseC: def test_incremental_push_sends_only_new_commits(self, tmp_path: pathlib.Path) -> None: root = _repo(tmp_path) o1 = _obj(root, b"original") o2 = _obj(root, b"new content") c1 = _commit(root, {"a.txt": o1}) c2 = _commit(root, {"a.txt": o2}, parent_id=c1.commit_id) # remote has c1, local is at c2 walk = walk_commits(root, [c2.commit_id], have=[c1.commit_id]) commit_ids = [c.commit_id for c in walk["commits"]] assert c2.commit_id in commit_ids assert c1.commit_id not in commit_ids, "c1 is already on remote — must not be resent" def test_three_commit_chain_sends_two_new(self, tmp_path: pathlib.Path) -> None: root = _repo(tmp_path) o = [_obj(root, f"v{i}".encode()) for i in range(3)] c1 = _commit(root, {"f.txt": o[0]}) c2 = _commit(root, {"f.txt": o[1]}, parent_id=c1.commit_id) c3 = _commit(root, {"f.txt": o[2]}, parent_id=c2.commit_id) # remote has c1; local is at c3 walk = walk_commits(root, [c3.commit_id], have=[c1.commit_id]) commit_ids = [c.commit_id for c in walk["commits"]] assert c3.commit_id in commit_ids assert c2.commit_id in commit_ids assert c1.commit_id not in commit_ids assert len(commit_ids) == 2 # --------------------------------------------------------------------------- # D — commit walk boundary is remote_head only (not all of have) # --------------------------------------------------------------------------- class TestCaseD: def test_walk_boundary_is_remote_head_not_other_branch_heads( self, tmp_path: pathlib.Path ) -> None: """Walk uses remote_head as the commit boundary on the target branch. Other branch heads in have may sit anywhere in history — they must not accidentally cut off commits that belong to this push. """ root = _repo(tmp_path) o = [_obj(root, f"rev{i}".encode()) for i in range(4)] # Linear chain: c1 → c2 → c3 → c4 c1 = _commit(root, {"f.txt": o[0]}) c2 = _commit(root, {"f.txt": o[1]}, parent_id=c1.commit_id) c3 = _commit(root, {"f.txt": o[2]}, parent_id=c2.commit_id) c4 = _commit(root, {"f.txt": o[3]}, parent_id=c3.commit_id) # remote/main is at c2 (remote_head for the target branch) # remote/feat is at c3 (another branch head → in have but NOT the boundary) remote_head = c2.commit_id other_branch_head = c3.commit_id have = [remote_head, other_branch_head] # Pushing local tip c4 with remote_head=c2 as the boundary walk = walk_commits(root, [c4.commit_id], have=[remote_head]) commit_ids = [c.commit_id for c in walk["commits"]] # c3 and c4 are new relative to remote_head=c2 assert c4.commit_id in commit_ids assert c3.commit_id in commit_ids assert c2.commit_id not in commit_ids assert c1.commit_id not in commit_ids # --------------------------------------------------------------------------- # E — object dedup uses full have set, not just remote_head # --------------------------------------------------------------------------- class TestCaseE: def test_object_on_other_branch_not_resent(self, tmp_path: pathlib.Path) -> None: """Object introduced on another remote branch must not be resent. Scenario: remote/feat already has object O (via some other push). We push to remote/main and our new commit also references O. O must be excluded from the pack because it's in have (feat's head snapshot). """ root = _repo(tmp_path) shared_obj = _obj(root, b"shared object") new_obj = _obj(root, b"only in this push") # c_feat is the tip of remote/feat; its snapshot contains shared_obj c_feat = _commit(root, {"shared.txt": shared_obj}, message="feat commit") # c_main_old is remote/main's current tip c_main_old = _commit(root, {"readme.txt": _obj(root, b"readme")}, message="main base") # New commit on main: adds shared_obj AND new_obj c_main_new = _commit( root, {"readme.txt": _obj(root, b"readme"), "shared.txt": shared_obj, "new.txt": new_obj}, parent_id=c_main_old.commit_id, message="new main commit", ) # have = both remote branch heads have = [c_main_old.commit_id, c_feat.commit_id] walk = walk_commits(root, [c_main_new.commit_id], have=[c_main_old.commit_id]) # object dedup is against objects reachable from have # shared_obj is in c_feat's snapshot → should not be in the pack # Note: walk_commits uses have for BOTH commit boundary AND object dedup. # We pass both branch heads so shared_obj is subtracted. walk_full = walk_commits(root, [c_main_new.commit_id], have=have) assert new_obj in walk_full["all_blob_ids"], "New object must be sent" assert shared_obj not in walk_full["all_blob_ids"], ( "Object already on remote via another branch must not be resent" ) # --------------------------------------------------------------------------- # F — topo order: ancestors before descendants # --------------------------------------------------------------------------- class TestCaseF: def test_ancestors_before_descendants_in_new_commits( self, tmp_path: pathlib.Path ) -> None: root = _repo(tmp_path) o = [_obj(root, f"v{i}".encode()) for i in range(4)] c1 = _commit(root, {"f.txt": o[0]}) c2 = _commit(root, {"f.txt": o[1]}, parent_id=c1.commit_id) c3 = _commit(root, {"f.txt": o[2]}, parent_id=c2.commit_id) walk = walk_commits(root, [c3.commit_id], have=[]) commits = walk["commits"] # walk_commits returns newest-first; caller reverses for wire # The raw list from walk_commits is newest-first (BFS order) # reversed() gives ancestors-first. Test both orderings are consistent. ids = [c.commit_id for c in commits] assert ids.index(c1.commit_id) > ids.index(c3.commit_id), ( "walk_commits returns newest-first; caller must reverse for wire encoding" ) # Reversed = ancestors-first (what goes on the wire) ids_oldest_first = [c.commit_id for c in reversed(commits)] assert ids_oldest_first.index(c1.commit_id) < ids_oldest_first.index(c2.commit_id) assert ids_oldest_first.index(c2.commit_id) < ids_oldest_first.index(c3.commit_id) # --------------------------------------------------------------------------- # G — multi-branch: objects on remote via another branch not resent # --------------------------------------------------------------------------- class TestCaseG: def test_multi_branch_repo_no_redundant_objects( self, tmp_path: pathlib.Path ) -> None: """Full scenario: two remote branches, push to main, objects from feat not resent.""" root = _repo(tmp_path) # Objects base_obj = _obj(root, b"base file") feat_obj = _obj(root, b"feat-only file") main_new_obj = _obj(root, b"new on main") # Remote state: main at c_base, feat at c_feat (has feat_obj) c_base = _commit(root, {"base.txt": base_obj}, message="initial") c_feat = _commit( root, {"base.txt": base_obj, "feat.txt": feat_obj}, parent_id=c_base.commit_id, message="feat work", ) # New local commit on main: picks up feat.txt too + adds new file c_new_main = _commit( root, {"base.txt": base_obj, "feat.txt": feat_obj, "new.txt": main_new_obj}, parent_id=c_base.commit_id, message="merge result", ) have = [c_base.commit_id, c_feat.commit_id] walk = walk_commits(root, [c_new_main.commit_id], have=have) assert main_new_obj in walk["all_blob_ids"], "New object must be included" assert base_obj not in walk["all_blob_ids"], "Base object already on remote" assert feat_obj not in walk["all_blob_ids"], ( "feat_obj is reachable from c_feat which is in have — must not be resent" )