test_push_step1_dag_walk.py
python
sha256:d11a87833d5fad6059b7662844bf5448a8911a17cce7a51811f71ad394f248eb
bump to v0.2.0rc13
Human
patch
7 days ago
| 1 | """TDD — push step 1: walk local DAG to find commits not on remote. |
| 2 | |
| 3 | Pseudocode (issue #57 step 1): |
| 4 | |
| 5 | if remote_head is null: |
| 6 | new_commits = all commits reachable from local tip (topo sorted, ancestors first) |
| 7 | else if remote_head == local_tip: |
| 8 | nothing to push → exit |
| 9 | else: |
| 10 | new_commits = commits reachable from local tip, not reachable from remote_head |
| 11 | (topo sorted, ancestors first) |
| 12 | |
| 13 | NOTE: use remote_head (target branch only) as the commit walk boundary; |
| 14 | use full "have" set (all remote branch heads) for object dedup |
| 15 | |
| 16 | Coverage |
| 17 | -------- |
| 18 | A remote_head is null → all commits sent (first push to a new branch) |
| 19 | B remote_head == local_tip → zero commits (already up to date) |
| 20 | C remote_head behind local_tip → only the delta commits sent (normal incremental push) |
| 21 | D commit walk boundary is remote_head only (not all of have) |
| 22 | E object dedup uses full have set (all branch heads), not just remote_head |
| 23 | F topo order: ancestors appear before descendants in new_commits |
| 24 | G multi-branch: objects already on remote via another branch are not resent |
| 25 | """ |
| 26 | from __future__ import annotations |
| 27 | |
| 28 | import datetime |
| 29 | import json |
| 30 | import pathlib |
| 31 | |
| 32 | import pytest |
| 33 | |
| 34 | from muse._version import __version__ |
| 35 | from muse.core.object_store import write_object |
| 36 | from muse.core.mpack import walk_commits |
| 37 | from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id |
| 38 | from muse.core.commits import CommitRecord, write_commit |
| 39 | from muse.core.snapshots import SnapshotRecord, write_snapshot |
| 40 | from muse.core.types import blob_id |
| 41 | from muse.core.paths import muse_dir |
| 42 | |
| 43 | |
| 44 | # --------------------------------------------------------------------------- |
| 45 | # Helpers |
| 46 | # --------------------------------------------------------------------------- |
| 47 | |
| 48 | _TS = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) |
| 49 | |
| 50 | |
| 51 | def _repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 52 | muse = muse_dir(tmp_path) |
| 53 | for d in ("objects", "refs/heads", "remotes"): |
| 54 | (muse / d).mkdir(parents=True, exist_ok=True) |
| 55 | (muse / "HEAD").write_text("ref: refs/heads/main\n") |
| 56 | (muse / "repo.json").write_text( |
| 57 | json.dumps({"repo_id": "test-repo", "schema_version": __version__, "domain": "code"}) |
| 58 | ) |
| 59 | return tmp_path |
| 60 | |
| 61 | |
| 62 | def _obj(root: pathlib.Path, content: bytes) -> str: |
| 63 | oid = blob_id(content) |
| 64 | write_object(root, oid, content) |
| 65 | return oid |
| 66 | |
| 67 | |
| 68 | def _commit( |
| 69 | root: pathlib.Path, |
| 70 | manifest: dict[str, str], |
| 71 | *, |
| 72 | parent_id: str | None = None, |
| 73 | message: str = "test", |
| 74 | ) -> CommitRecord: |
| 75 | snap_id = compute_snapshot_id(manifest) |
| 76 | write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) |
| 77 | parent_ids = [parent_id] if parent_id else [] |
| 78 | cid = compute_commit_id( |
| 79 | parent_ids=parent_ids, |
| 80 | snapshot_id=snap_id, |
| 81 | message=message, |
| 82 | committed_at_iso=_TS.isoformat(), |
| 83 | ) |
| 84 | cr = CommitRecord( |
| 85 | commit_id=cid, |
| 86 | branch="main", |
| 87 | snapshot_id=snap_id, |
| 88 | message=message, |
| 89 | committed_at=_TS, |
| 90 | parent_commit_id=parent_id, |
| 91 | ) |
| 92 | write_commit(root, cr) |
| 93 | return cr |
| 94 | |
| 95 | |
| 96 | # --------------------------------------------------------------------------- |
| 97 | # A — remote_head is null → send everything (first push) |
| 98 | # --------------------------------------------------------------------------- |
| 99 | |
| 100 | class TestCaseA: |
| 101 | def test_null_remote_head_sends_all_commits(self, tmp_path: pathlib.Path) -> None: |
| 102 | root = _repo(tmp_path) |
| 103 | o1 = _obj(root, b"file 1") |
| 104 | o2 = _obj(root, b"file 2") |
| 105 | c1 = _commit(root, {"a.txt": o1}) |
| 106 | c2 = _commit(root, {"a.txt": o1, "b.txt": o2}, parent_id=c1.commit_id) |
| 107 | |
| 108 | # remote_head=null means have=[] |
| 109 | walk = walk_commits(root, [c2.commit_id], have=[]) |
| 110 | |
| 111 | commit_ids = [c.commit_id for c in walk["commits"]] |
| 112 | assert c1.commit_id in commit_ids |
| 113 | assert c2.commit_id in commit_ids |
| 114 | assert len(commit_ids) == 2 |
| 115 | |
| 116 | def test_null_remote_head_sends_all_objects(self, tmp_path: pathlib.Path) -> None: |
| 117 | root = _repo(tmp_path) |
| 118 | o1 = _obj(root, b"file 1") |
| 119 | o2 = _obj(root, b"file 2") |
| 120 | c1 = _commit(root, {"a.txt": o1}) |
| 121 | c2 = _commit(root, {"a.txt": o1, "b.txt": o2}, parent_id=c1.commit_id) |
| 122 | |
| 123 | walk = walk_commits(root, [c2.commit_id], have=[]) |
| 124 | |
| 125 | assert o1 in walk["all_blob_ids"] |
| 126 | assert o2 in walk["all_blob_ids"] |
| 127 | |
| 128 | |
| 129 | # --------------------------------------------------------------------------- |
| 130 | # B — remote_head == local_tip → nothing to send |
| 131 | # --------------------------------------------------------------------------- |
| 132 | |
| 133 | class TestCaseB: |
| 134 | def test_up_to_date_sends_zero_commits(self, tmp_path: pathlib.Path) -> None: |
| 135 | root = _repo(tmp_path) |
| 136 | o1 = _obj(root, b"file content") |
| 137 | c1 = _commit(root, {"a.txt": o1}) |
| 138 | |
| 139 | # remote_head == local_tip → have=[c1] stops the walk immediately |
| 140 | walk = walk_commits(root, [c1.commit_id], have=[c1.commit_id]) |
| 141 | |
| 142 | assert walk["commits"] == [] |
| 143 | |
| 144 | def test_up_to_date_sends_zero_objects(self, tmp_path: pathlib.Path) -> None: |
| 145 | root = _repo(tmp_path) |
| 146 | o1 = _obj(root, b"file content") |
| 147 | c1 = _commit(root, {"a.txt": o1}) |
| 148 | |
| 149 | walk = walk_commits(root, [c1.commit_id], have=[c1.commit_id]) |
| 150 | |
| 151 | assert walk["all_blob_ids"] == [] |
| 152 | |
| 153 | |
| 154 | # --------------------------------------------------------------------------- |
| 155 | # C — remote_head behind local_tip → only delta commits sent |
| 156 | # --------------------------------------------------------------------------- |
| 157 | |
| 158 | class TestCaseC: |
| 159 | def test_incremental_push_sends_only_new_commits(self, tmp_path: pathlib.Path) -> None: |
| 160 | root = _repo(tmp_path) |
| 161 | o1 = _obj(root, b"original") |
| 162 | o2 = _obj(root, b"new content") |
| 163 | c1 = _commit(root, {"a.txt": o1}) |
| 164 | c2 = _commit(root, {"a.txt": o2}, parent_id=c1.commit_id) |
| 165 | |
| 166 | # remote has c1, local is at c2 |
| 167 | walk = walk_commits(root, [c2.commit_id], have=[c1.commit_id]) |
| 168 | |
| 169 | commit_ids = [c.commit_id for c in walk["commits"]] |
| 170 | assert c2.commit_id in commit_ids |
| 171 | assert c1.commit_id not in commit_ids, "c1 is already on remote — must not be resent" |
| 172 | |
| 173 | def test_three_commit_chain_sends_two_new(self, tmp_path: pathlib.Path) -> None: |
| 174 | root = _repo(tmp_path) |
| 175 | o = [_obj(root, f"v{i}".encode()) for i in range(3)] |
| 176 | c1 = _commit(root, {"f.txt": o[0]}) |
| 177 | c2 = _commit(root, {"f.txt": o[1]}, parent_id=c1.commit_id) |
| 178 | c3 = _commit(root, {"f.txt": o[2]}, parent_id=c2.commit_id) |
| 179 | |
| 180 | # remote has c1; local is at c3 |
| 181 | walk = walk_commits(root, [c3.commit_id], have=[c1.commit_id]) |
| 182 | |
| 183 | commit_ids = [c.commit_id for c in walk["commits"]] |
| 184 | assert c3.commit_id in commit_ids |
| 185 | assert c2.commit_id in commit_ids |
| 186 | assert c1.commit_id not in commit_ids |
| 187 | assert len(commit_ids) == 2 |
| 188 | |
| 189 | |
| 190 | # --------------------------------------------------------------------------- |
| 191 | # D — commit walk boundary is remote_head only (not all of have) |
| 192 | # --------------------------------------------------------------------------- |
| 193 | |
| 194 | class TestCaseD: |
| 195 | def test_walk_boundary_is_remote_head_not_other_branch_heads( |
| 196 | self, tmp_path: pathlib.Path |
| 197 | ) -> None: |
| 198 | """Walk uses remote_head as the commit boundary on the target branch. |
| 199 | |
| 200 | Other branch heads in have may sit anywhere in history — they must |
| 201 | not accidentally cut off commits that belong to this push. |
| 202 | """ |
| 203 | root = _repo(tmp_path) |
| 204 | o = [_obj(root, f"rev{i}".encode()) for i in range(4)] |
| 205 | |
| 206 | # Linear chain: c1 → c2 → c3 → c4 |
| 207 | c1 = _commit(root, {"f.txt": o[0]}) |
| 208 | c2 = _commit(root, {"f.txt": o[1]}, parent_id=c1.commit_id) |
| 209 | c3 = _commit(root, {"f.txt": o[2]}, parent_id=c2.commit_id) |
| 210 | c4 = _commit(root, {"f.txt": o[3]}, parent_id=c3.commit_id) |
| 211 | |
| 212 | # remote/main is at c2 (remote_head for the target branch) |
| 213 | # remote/feat is at c3 (another branch head → in have but NOT the boundary) |
| 214 | remote_head = c2.commit_id |
| 215 | other_branch_head = c3.commit_id |
| 216 | have = [remote_head, other_branch_head] |
| 217 | |
| 218 | # Pushing local tip c4 with remote_head=c2 as the boundary |
| 219 | walk = walk_commits(root, [c4.commit_id], have=[remote_head]) |
| 220 | |
| 221 | commit_ids = [c.commit_id for c in walk["commits"]] |
| 222 | # c3 and c4 are new relative to remote_head=c2 |
| 223 | assert c4.commit_id in commit_ids |
| 224 | assert c3.commit_id in commit_ids |
| 225 | assert c2.commit_id not in commit_ids |
| 226 | assert c1.commit_id not in commit_ids |
| 227 | |
| 228 | |
| 229 | # --------------------------------------------------------------------------- |
| 230 | # E — object dedup uses full have set, not just remote_head |
| 231 | # --------------------------------------------------------------------------- |
| 232 | |
| 233 | class TestCaseE: |
| 234 | def test_object_on_other_branch_not_resent(self, tmp_path: pathlib.Path) -> None: |
| 235 | """Object introduced on another remote branch must not be resent. |
| 236 | |
| 237 | Scenario: |
| 238 | remote/feat already has object O (via some other push). |
| 239 | We push to remote/main and our new commit also references O. |
| 240 | O must be excluded from the pack because it's in have (feat's head snapshot). |
| 241 | """ |
| 242 | root = _repo(tmp_path) |
| 243 | shared_obj = _obj(root, b"shared object") |
| 244 | new_obj = _obj(root, b"only in this push") |
| 245 | |
| 246 | # c_feat is the tip of remote/feat; its snapshot contains shared_obj |
| 247 | c_feat = _commit(root, {"shared.txt": shared_obj}, message="feat commit") |
| 248 | |
| 249 | # c_main_old is remote/main's current tip |
| 250 | c_main_old = _commit(root, {"readme.txt": _obj(root, b"readme")}, message="main base") |
| 251 | |
| 252 | # New commit on main: adds shared_obj AND new_obj |
| 253 | c_main_new = _commit( |
| 254 | root, |
| 255 | {"readme.txt": _obj(root, b"readme"), "shared.txt": shared_obj, "new.txt": new_obj}, |
| 256 | parent_id=c_main_old.commit_id, |
| 257 | message="new main commit", |
| 258 | ) |
| 259 | |
| 260 | # have = both remote branch heads |
| 261 | have = [c_main_old.commit_id, c_feat.commit_id] |
| 262 | |
| 263 | walk = walk_commits(root, [c_main_new.commit_id], have=[c_main_old.commit_id]) |
| 264 | # object dedup is against objects reachable from have |
| 265 | # shared_obj is in c_feat's snapshot → should not be in the pack |
| 266 | |
| 267 | # Note: walk_commits uses have for BOTH commit boundary AND object dedup. |
| 268 | # We pass both branch heads so shared_obj is subtracted. |
| 269 | walk_full = walk_commits(root, [c_main_new.commit_id], have=have) |
| 270 | |
| 271 | assert new_obj in walk_full["all_blob_ids"], "New object must be sent" |
| 272 | assert shared_obj not in walk_full["all_blob_ids"], ( |
| 273 | "Object already on remote via another branch must not be resent" |
| 274 | ) |
| 275 | |
| 276 | |
| 277 | # --------------------------------------------------------------------------- |
| 278 | # F — topo order: ancestors before descendants |
| 279 | # --------------------------------------------------------------------------- |
| 280 | |
| 281 | class TestCaseF: |
| 282 | def test_ancestors_before_descendants_in_new_commits( |
| 283 | self, tmp_path: pathlib.Path |
| 284 | ) -> None: |
| 285 | root = _repo(tmp_path) |
| 286 | o = [_obj(root, f"v{i}".encode()) for i in range(4)] |
| 287 | c1 = _commit(root, {"f.txt": o[0]}) |
| 288 | c2 = _commit(root, {"f.txt": o[1]}, parent_id=c1.commit_id) |
| 289 | c3 = _commit(root, {"f.txt": o[2]}, parent_id=c2.commit_id) |
| 290 | |
| 291 | walk = walk_commits(root, [c3.commit_id], have=[]) |
| 292 | commits = walk["commits"] |
| 293 | |
| 294 | # walk_commits returns newest-first; caller reverses for wire |
| 295 | # The raw list from walk_commits is newest-first (BFS order) |
| 296 | # reversed() gives ancestors-first. Test both orderings are consistent. |
| 297 | ids = [c.commit_id for c in commits] |
| 298 | assert ids.index(c1.commit_id) > ids.index(c3.commit_id), ( |
| 299 | "walk_commits returns newest-first; caller must reverse for wire encoding" |
| 300 | ) |
| 301 | |
| 302 | # Reversed = ancestors-first (what goes on the wire) |
| 303 | ids_oldest_first = [c.commit_id for c in reversed(commits)] |
| 304 | assert ids_oldest_first.index(c1.commit_id) < ids_oldest_first.index(c2.commit_id) |
| 305 | assert ids_oldest_first.index(c2.commit_id) < ids_oldest_first.index(c3.commit_id) |
| 306 | |
| 307 | |
| 308 | # --------------------------------------------------------------------------- |
| 309 | # G — multi-branch: objects on remote via another branch not resent |
| 310 | # --------------------------------------------------------------------------- |
| 311 | |
| 312 | class TestCaseG: |
| 313 | def test_multi_branch_repo_no_redundant_objects( |
| 314 | self, tmp_path: pathlib.Path |
| 315 | ) -> None: |
| 316 | """Full scenario: two remote branches, push to main, objects from feat not resent.""" |
| 317 | root = _repo(tmp_path) |
| 318 | |
| 319 | # Objects |
| 320 | base_obj = _obj(root, b"base file") |
| 321 | feat_obj = _obj(root, b"feat-only file") |
| 322 | main_new_obj = _obj(root, b"new on main") |
| 323 | |
| 324 | # Remote state: main at c_base, feat at c_feat (has feat_obj) |
| 325 | c_base = _commit(root, {"base.txt": base_obj}, message="initial") |
| 326 | c_feat = _commit( |
| 327 | root, |
| 328 | {"base.txt": base_obj, "feat.txt": feat_obj}, |
| 329 | parent_id=c_base.commit_id, |
| 330 | message="feat work", |
| 331 | ) |
| 332 | |
| 333 | # New local commit on main: picks up feat.txt too + adds new file |
| 334 | c_new_main = _commit( |
| 335 | root, |
| 336 | {"base.txt": base_obj, "feat.txt": feat_obj, "new.txt": main_new_obj}, |
| 337 | parent_id=c_base.commit_id, |
| 338 | message="merge result", |
| 339 | ) |
| 340 | |
| 341 | have = [c_base.commit_id, c_feat.commit_id] |
| 342 | walk = walk_commits(root, [c_new_main.commit_id], have=have) |
| 343 | |
| 344 | assert main_new_obj in walk["all_blob_ids"], "New object must be included" |
| 345 | assert base_obj not in walk["all_blob_ids"], "Base object already on remote" |
| 346 | assert feat_obj not in walk["all_blob_ids"], ( |
| 347 | "feat_obj is reachable from c_feat which is in have — must not be resent" |
| 348 | ) |
File History
1 commit
sha256:d11a87833d5fad6059b7662844bf5448a8911a17cce7a51811f71ad394f248eb
bump to v0.2.0rc13
Human
patch
7 days ago