gabriel / muse public
test_push_step1_dag_walk.py python
348 lines 13.6 KB
Raw
1 """TDD — push step 1: walk local DAG to find commits not on remote.
2
3 Pseudocode (issue #57 step 1):
4
5 if remote_head is null:
6 new_commits = all commits reachable from local tip (topo sorted, ancestors first)
7 else if remote_head == local_tip:
8 nothing to push → exit
9 else:
10 new_commits = commits reachable from local tip, not reachable from remote_head
11 (topo sorted, ancestors first)
12
13 NOTE: use remote_head (target branch only) as the commit walk boundary;
14 use full "have" set (all remote branch heads) for object dedup
15
16 Coverage
17 --------
18 A remote_head is null → all commits sent (first push to a new branch)
19 B remote_head == local_tip → zero commits (already up to date)
20 C remote_head behind local_tip → only the delta commits sent (normal incremental push)
21 D commit walk boundary is remote_head only (not all of have)
22 E object dedup uses full have set (all branch heads), not just remote_head
23 F topo order: ancestors appear before descendants in new_commits
24 G multi-branch: objects already on remote via another branch are not resent
25 """
26 from __future__ import annotations
27
28 import datetime
29 import json
30 import pathlib
31
32 import pytest
33
34 from muse._version import __version__
35 from muse.core.object_store import write_object
36 from muse.core.mpack import walk_commits
37 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
38 from muse.core.commits import CommitRecord, write_commit
39 from muse.core.snapshots import SnapshotRecord, write_snapshot
40 from muse.core.types import blob_id
41 from muse.core.paths import muse_dir
42
43
44 # ---------------------------------------------------------------------------
45 # Helpers
46 # ---------------------------------------------------------------------------
47
48 _TS = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
49
50
51 def _repo(tmp_path: pathlib.Path) -> pathlib.Path:
52 muse = muse_dir(tmp_path)
53 for d in ("objects", "refs/heads", "remotes"):
54 (muse / d).mkdir(parents=True, exist_ok=True)
55 (muse / "HEAD").write_text("ref: refs/heads/main\n")
56 (muse / "repo.json").write_text(
57 json.dumps({"repo_id": "test-repo", "schema_version": __version__, "domain": "code"})
58 )
59 return tmp_path
60
61
62 def _obj(root: pathlib.Path, content: bytes) -> str:
63 oid = blob_id(content)
64 write_object(root, oid, content)
65 return oid
66
67
68 def _commit(
69 root: pathlib.Path,
70 manifest: dict[str, str],
71 *,
72 parent_id: str | None = None,
73 message: str = "test",
74 ) -> CommitRecord:
75 snap_id = compute_snapshot_id(manifest)
76 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
77 parent_ids = [parent_id] if parent_id else []
78 cid = compute_commit_id(
79 parent_ids=parent_ids,
80 snapshot_id=snap_id,
81 message=message,
82 committed_at_iso=_TS.isoformat(),
83 )
84 cr = CommitRecord(
85 commit_id=cid,
86 branch="main",
87 snapshot_id=snap_id,
88 message=message,
89 committed_at=_TS,
90 parent_commit_id=parent_id,
91 )
92 write_commit(root, cr)
93 return cr
94
95
96 # ---------------------------------------------------------------------------
97 # A — remote_head is null → send everything (first push)
98 # ---------------------------------------------------------------------------
99
100 class TestCaseA:
101 def test_null_remote_head_sends_all_commits(self, tmp_path: pathlib.Path) -> None:
102 root = _repo(tmp_path)
103 o1 = _obj(root, b"file 1")
104 o2 = _obj(root, b"file 2")
105 c1 = _commit(root, {"a.txt": o1})
106 c2 = _commit(root, {"a.txt": o1, "b.txt": o2}, parent_id=c1.commit_id)
107
108 # remote_head=null means have=[]
109 walk = walk_commits(root, [c2.commit_id], have=[])
110
111 commit_ids = [c.commit_id for c in walk["commits"]]
112 assert c1.commit_id in commit_ids
113 assert c2.commit_id in commit_ids
114 assert len(commit_ids) == 2
115
116 def test_null_remote_head_sends_all_objects(self, tmp_path: pathlib.Path) -> None:
117 root = _repo(tmp_path)
118 o1 = _obj(root, b"file 1")
119 o2 = _obj(root, b"file 2")
120 c1 = _commit(root, {"a.txt": o1})
121 c2 = _commit(root, {"a.txt": o1, "b.txt": o2}, parent_id=c1.commit_id)
122
123 walk = walk_commits(root, [c2.commit_id], have=[])
124
125 assert o1 in walk["all_blob_ids"]
126 assert o2 in walk["all_blob_ids"]
127
128
129 # ---------------------------------------------------------------------------
130 # B — remote_head == local_tip → nothing to send
131 # ---------------------------------------------------------------------------
132
133 class TestCaseB:
134 def test_up_to_date_sends_zero_commits(self, tmp_path: pathlib.Path) -> None:
135 root = _repo(tmp_path)
136 o1 = _obj(root, b"file content")
137 c1 = _commit(root, {"a.txt": o1})
138
139 # remote_head == local_tip → have=[c1] stops the walk immediately
140 walk = walk_commits(root, [c1.commit_id], have=[c1.commit_id])
141
142 assert walk["commits"] == []
143
144 def test_up_to_date_sends_zero_objects(self, tmp_path: pathlib.Path) -> None:
145 root = _repo(tmp_path)
146 o1 = _obj(root, b"file content")
147 c1 = _commit(root, {"a.txt": o1})
148
149 walk = walk_commits(root, [c1.commit_id], have=[c1.commit_id])
150
151 assert walk["all_blob_ids"] == []
152
153
154 # ---------------------------------------------------------------------------
155 # C — remote_head behind local_tip → only delta commits sent
156 # ---------------------------------------------------------------------------
157
158 class TestCaseC:
159 def test_incremental_push_sends_only_new_commits(self, tmp_path: pathlib.Path) -> None:
160 root = _repo(tmp_path)
161 o1 = _obj(root, b"original")
162 o2 = _obj(root, b"new content")
163 c1 = _commit(root, {"a.txt": o1})
164 c2 = _commit(root, {"a.txt": o2}, parent_id=c1.commit_id)
165
166 # remote has c1, local is at c2
167 walk = walk_commits(root, [c2.commit_id], have=[c1.commit_id])
168
169 commit_ids = [c.commit_id for c in walk["commits"]]
170 assert c2.commit_id in commit_ids
171 assert c1.commit_id not in commit_ids, "c1 is already on remote — must not be resent"
172
173 def test_three_commit_chain_sends_two_new(self, tmp_path: pathlib.Path) -> None:
174 root = _repo(tmp_path)
175 o = [_obj(root, f"v{i}".encode()) for i in range(3)]
176 c1 = _commit(root, {"f.txt": o[0]})
177 c2 = _commit(root, {"f.txt": o[1]}, parent_id=c1.commit_id)
178 c3 = _commit(root, {"f.txt": o[2]}, parent_id=c2.commit_id)
179
180 # remote has c1; local is at c3
181 walk = walk_commits(root, [c3.commit_id], have=[c1.commit_id])
182
183 commit_ids = [c.commit_id for c in walk["commits"]]
184 assert c3.commit_id in commit_ids
185 assert c2.commit_id in commit_ids
186 assert c1.commit_id not in commit_ids
187 assert len(commit_ids) == 2
188
189
190 # ---------------------------------------------------------------------------
191 # D — commit walk boundary is remote_head only (not all of have)
192 # ---------------------------------------------------------------------------
193
194 class TestCaseD:
195 def test_walk_boundary_is_remote_head_not_other_branch_heads(
196 self, tmp_path: pathlib.Path
197 ) -> None:
198 """Walk uses remote_head as the commit boundary on the target branch.
199
200 Other branch heads in have may sit anywhere in history — they must
201 not accidentally cut off commits that belong to this push.
202 """
203 root = _repo(tmp_path)
204 o = [_obj(root, f"rev{i}".encode()) for i in range(4)]
205
206 # Linear chain: c1 → c2 → c3 → c4
207 c1 = _commit(root, {"f.txt": o[0]})
208 c2 = _commit(root, {"f.txt": o[1]}, parent_id=c1.commit_id)
209 c3 = _commit(root, {"f.txt": o[2]}, parent_id=c2.commit_id)
210 c4 = _commit(root, {"f.txt": o[3]}, parent_id=c3.commit_id)
211
212 # remote/main is at c2 (remote_head for the target branch)
213 # remote/feat is at c3 (another branch head → in have but NOT the boundary)
214 remote_head = c2.commit_id
215 other_branch_head = c3.commit_id
216 have = [remote_head, other_branch_head]
217
218 # Pushing local tip c4 with remote_head=c2 as the boundary
219 walk = walk_commits(root, [c4.commit_id], have=[remote_head])
220
221 commit_ids = [c.commit_id for c in walk["commits"]]
222 # c3 and c4 are new relative to remote_head=c2
223 assert c4.commit_id in commit_ids
224 assert c3.commit_id in commit_ids
225 assert c2.commit_id not in commit_ids
226 assert c1.commit_id not in commit_ids
227
228
229 # ---------------------------------------------------------------------------
230 # E — object dedup uses full have set, not just remote_head
231 # ---------------------------------------------------------------------------
232
233 class TestCaseE:
234 def test_object_on_other_branch_not_resent(self, tmp_path: pathlib.Path) -> None:
235 """Object introduced on another remote branch must not be resent.
236
237 Scenario:
238 remote/feat already has object O (via some other push).
239 We push to remote/main and our new commit also references O.
240 O must be excluded from the pack because it's in have (feat's head snapshot).
241 """
242 root = _repo(tmp_path)
243 shared_obj = _obj(root, b"shared object")
244 new_obj = _obj(root, b"only in this push")
245
246 # c_feat is the tip of remote/feat; its snapshot contains shared_obj
247 c_feat = _commit(root, {"shared.txt": shared_obj}, message="feat commit")
248
249 # c_main_old is remote/main's current tip
250 c_main_old = _commit(root, {"readme.txt": _obj(root, b"readme")}, message="main base")
251
252 # New commit on main: adds shared_obj AND new_obj
253 c_main_new = _commit(
254 root,
255 {"readme.txt": _obj(root, b"readme"), "shared.txt": shared_obj, "new.txt": new_obj},
256 parent_id=c_main_old.commit_id,
257 message="new main commit",
258 )
259
260 # have = both remote branch heads
261 have = [c_main_old.commit_id, c_feat.commit_id]
262
263 walk = walk_commits(root, [c_main_new.commit_id], have=[c_main_old.commit_id])
264 # object dedup is against objects reachable from have
265 # shared_obj is in c_feat's snapshot → should not be in the pack
266
267 # Note: walk_commits uses have for BOTH commit boundary AND object dedup.
268 # We pass both branch heads so shared_obj is subtracted.
269 walk_full = walk_commits(root, [c_main_new.commit_id], have=have)
270
271 assert new_obj in walk_full["all_blob_ids"], "New object must be sent"
272 assert shared_obj not in walk_full["all_blob_ids"], (
273 "Object already on remote via another branch must not be resent"
274 )
275
276
277 # ---------------------------------------------------------------------------
278 # F — topo order: ancestors before descendants
279 # ---------------------------------------------------------------------------
280
281 class TestCaseF:
282 def test_ancestors_before_descendants_in_new_commits(
283 self, tmp_path: pathlib.Path
284 ) -> None:
285 root = _repo(tmp_path)
286 o = [_obj(root, f"v{i}".encode()) for i in range(4)]
287 c1 = _commit(root, {"f.txt": o[0]})
288 c2 = _commit(root, {"f.txt": o[1]}, parent_id=c1.commit_id)
289 c3 = _commit(root, {"f.txt": o[2]}, parent_id=c2.commit_id)
290
291 walk = walk_commits(root, [c3.commit_id], have=[])
292 commits = walk["commits"]
293
294 # walk_commits returns newest-first; caller reverses for wire
295 # The raw list from walk_commits is newest-first (BFS order)
296 # reversed() gives ancestors-first. Test both orderings are consistent.
297 ids = [c.commit_id for c in commits]
298 assert ids.index(c1.commit_id) > ids.index(c3.commit_id), (
299 "walk_commits returns newest-first; caller must reverse for wire encoding"
300 )
301
302 # Reversed = ancestors-first (what goes on the wire)
303 ids_oldest_first = [c.commit_id for c in reversed(commits)]
304 assert ids_oldest_first.index(c1.commit_id) < ids_oldest_first.index(c2.commit_id)
305 assert ids_oldest_first.index(c2.commit_id) < ids_oldest_first.index(c3.commit_id)
306
307
308 # ---------------------------------------------------------------------------
309 # G — multi-branch: objects on remote via another branch not resent
310 # ---------------------------------------------------------------------------
311
312 class TestCaseG:
313 def test_multi_branch_repo_no_redundant_objects(
314 self, tmp_path: pathlib.Path
315 ) -> None:
316 """Full scenario: two remote branches, push to main, objects from feat not resent."""
317 root = _repo(tmp_path)
318
319 # Objects
320 base_obj = _obj(root, b"base file")
321 feat_obj = _obj(root, b"feat-only file")
322 main_new_obj = _obj(root, b"new on main")
323
324 # Remote state: main at c_base, feat at c_feat (has feat_obj)
325 c_base = _commit(root, {"base.txt": base_obj}, message="initial")
326 c_feat = _commit(
327 root,
328 {"base.txt": base_obj, "feat.txt": feat_obj},
329 parent_id=c_base.commit_id,
330 message="feat work",
331 )
332
333 # New local commit on main: picks up feat.txt too + adds new file
334 c_new_main = _commit(
335 root,
336 {"base.txt": base_obj, "feat.txt": feat_obj, "new.txt": main_new_obj},
337 parent_id=c_base.commit_id,
338 message="merge result",
339 )
340
341 have = [c_base.commit_id, c_feat.commit_id]
342 walk = walk_commits(root, [c_new_main.commit_id], have=have)
343
344 assert main_new_obj in walk["all_blob_ids"], "New object must be included"
345 assert base_obj not in walk["all_blob_ids"], "Base object already on remote"
346 assert feat_obj not in walk["all_blob_ids"], (
347 "feat_obj is reachable from c_feat which is in have — must not be resent"
348 )
File History 1 commit