gabriel / muse public
test_push_object_delta.py python
362 lines 13.5 KB
Raw
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 30 days ago
1 """TDD — push only sends objects that are genuinely new.
2
3 Root cause
4 ----------
5 ``walk_commits`` and ``collect_object_ids`` collect ALL objects from the
6 snapshots of new commits without subtracting objects already present in the
7 ``have`` commits' snapshots.
8
9 A snapshot is a full manifest of the repo state at a point in time — it
10 includes every file, not just changed ones. So for a 900-object repo, 1 new
11 commit still sends all 900 objects instead of just the 1–2 that changed.
12
13 The fix: subtract objects reachable from ``have`` commits' snapshots.
14
15 new_objects = objects_in_new_snapshots − objects_in_have_snapshots
16
17 Coverage
18 --------
19 I Unit — collect_object_ids: unchanged objects excluded when have is set
20 II Unit — collect_object_ids: new object (not in have snapshot) is included
21 III Unit — collect_object_ids: object removed in new commit is excluded
22 IV Unit — walk_commits: all_object_ids obeys the same delta semantics
23 V Unit — multi-file repo: 1 changed file → 1 object sent, not all files
24 VI Integration — 10-file repo, 9 unchanged, 1 changed → only 1 object pushed
25 VII Regression — have=[] sends all objects (no regression)
26 VIII Regression — have commit with no local snapshot handled gracefully
27 """
28 from __future__ import annotations
29
30 import datetime
31 import json
32 import pathlib
33
34 import pytest
35
36 from muse._version import __version__
37 from muse.core.object_store import write_object
38 from muse.core.mpack import collect_object_ids, walk_commits
39 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
40 from muse.core.store import (
41 CommitRecord,
42 SnapshotRecord,
43 write_commit,
44 write_snapshot,
45 )
46 from muse.core.types import Manifest, blob_id
47 from muse.core.paths import muse_dir
48
49
50 # ---------------------------------------------------------------------------
51 # Helpers
52 # ---------------------------------------------------------------------------
53
54
55 def _oid(content: bytes) -> str:
56 return blob_id(content)
57
58
59 def _repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
60 dot_muse = muse_dir(tmp_path)
61 for d in ("commits", "snapshots", "objects", "refs/heads", "remotes"):
62 (dot_muse / d).mkdir(parents=True, exist_ok=True)
63 (dot_muse / "HEAD").write_text("ref: refs/heads/main\n")
64 (dot_muse / "repo.json").write_text(
65 json.dumps({"repo_id": "test-repo", "schema_version": __version__, "domain": "code"})
66 )
67 (dot_muse / "config.toml").write_text('[remotes.origin]\nurl = "https://hub.example.com/r"\n')
68 monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))
69 monkeypatch.chdir(tmp_path)
70 return tmp_path
71
72
73 def _write_commit(
74 root: pathlib.Path,
75 manifest: Manifest,
76 *,
77 parent_id: str | None = None,
78 ) -> CommitRecord:
79 """Write objects, snapshot, and commit; return the CommitRecord."""
80 for oid, raw in [(oid, None) for oid in manifest.values()]:
81 # objects were written by the caller via _write_object
82 pass
83 snap_id = compute_snapshot_id(manifest)
84 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
85 ts = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
86 parent_ids = [parent_id] if parent_id else []
87 cid = compute_commit_id(
88 parent_ids=parent_ids,
89 snapshot_id=snap_id,
90 message="test",
91 committed_at_iso=ts.isoformat(),
92 )
93 commit = CommitRecord(
94 commit_id=cid,
95 branch="main",
96 snapshot_id=snap_id,
97 message="test",
98 committed_at=ts,
99 parent_commit_id=parent_id,
100 )
101 write_commit(root, commit)
102 return commit
103
104
105 def _write_object(root: pathlib.Path, content: bytes) -> str:
106 oid = _oid(content)
107 write_object(root, oid, content)
108 return oid
109
110
111 # ---------------------------------------------------------------------------
112 # I — unchanged objects are excluded when have is set
113 # ---------------------------------------------------------------------------
114
115
116 class TestCollectObjectIdsDelta:
117 def test_unchanged_object_excluded_when_in_have_snapshot(
118 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
119 ) -> None:
120 """Object present in both have-commit and new-commit snapshot → not sent."""
121 root = _repo(tmp_path, monkeypatch)
122 unchanged = _write_object(root, b"unchanged file")
123
124 # Commit A (the server has this)
125 commit_a = _write_commit(root, {"file.txt": unchanged})
126
127 # Commit B (new — same file, no changes)
128 commit_b = _write_commit(root, {"file.txt": unchanged}, parent_id=commit_a.commit_id)
129
130 result = collect_object_ids(root, [commit_b.commit_id], have=[commit_a.commit_id])
131
132 assert unchanged not in result, (
133 "Object present in have-snapshot must not be re-sent"
134 )
135
136 def test_new_object_included_when_not_in_have_snapshot(
137 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
138 ) -> None:
139 """Object only in new-commit snapshot → must be sent."""
140 root = _repo(tmp_path, monkeypatch)
141 old_file = _write_object(root, b"old file content")
142 new_file = _write_object(root, b"brand new file")
143
144 commit_a = _write_commit(root, {"old.txt": old_file})
145 commit_b = _write_commit(
146 root,
147 {"old.txt": old_file, "new.txt": new_file},
148 parent_id=commit_a.commit_id,
149 )
150
151 result = collect_object_ids(root, [commit_b.commit_id], have=[commit_a.commit_id])
152
153 assert new_file in result, "New object not in have-snapshot must be sent"
154 assert old_file not in result, "Object already in have-snapshot must not be sent"
155
156 def test_removed_object_excluded(
157 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
158 ) -> None:
159 """Object present in have-snapshot but deleted in new commit → not sent."""
160 root = _repo(tmp_path, monkeypatch)
161 kept = _write_object(root, b"kept file")
162 removed = _write_object(root, b"file that gets deleted")
163
164 commit_a = _write_commit(root, {"kept.txt": kept, "gone.txt": removed})
165 # Commit B removes gone.txt
166 commit_b = _write_commit(root, {"kept.txt": kept}, parent_id=commit_a.commit_id)
167
168 result = collect_object_ids(root, [commit_b.commit_id], have=[commit_a.commit_id])
169
170 assert removed not in result
171 assert kept not in result # still unchanged
172
173 def test_empty_delta_when_no_changes(
174 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
175 ) -> None:
176 """Identical snapshot in new commit → zero objects sent."""
177 root = _repo(tmp_path, monkeypatch)
178 obj = _write_object(root, b"content")
179
180 commit_a = _write_commit(root, {"f.txt": obj})
181 # Commit B — identical snapshot (content unchanged)
182 commit_b = _write_commit(root, {"f.txt": obj}, parent_id=commit_a.commit_id)
183
184 result = collect_object_ids(root, [commit_b.commit_id], have=[commit_a.commit_id])
185
186 assert result == [], f"Expected no objects to send, got {result}"
187
188
189 # ---------------------------------------------------------------------------
190 # IV — walk_commits obeys the same delta semantics
191 # ---------------------------------------------------------------------------
192
193
194 class TestWalkCommitsDelta:
195 def test_walk_all_object_ids_excludes_have_objects(
196 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
197 ) -> None:
198 """walk_commits.all_object_ids must subtract have-snapshot objects."""
199 root = _repo(tmp_path, monkeypatch)
200 shared = _write_object(root, b"shared across commits")
201 new_obj = _write_object(root, b"only in new commit")
202
203 commit_a = _write_commit(root, {"shared.txt": shared})
204 commit_b = _write_commit(
205 root,
206 {"shared.txt": shared, "new.txt": new_obj},
207 parent_id=commit_a.commit_id,
208 )
209
210 walk = walk_commits(root, [commit_b.commit_id], have=[commit_a.commit_id])
211
212 assert new_obj in walk["all_object_ids"]
213 assert shared not in walk["all_object_ids"], (
214 "walk_commits must exclude objects already in have-snapshot"
215 )
216
217
218 # ---------------------------------------------------------------------------
219 # V — multi-file repo: only the changed file is sent
220 # ---------------------------------------------------------------------------
221
222
223 class TestMultiFileDelta:
224 def test_only_changed_file_sent_in_10_file_repo(
225 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
226 ) -> None:
227 """10-file repo: 9 unchanged + 1 modified → only 1 object sent."""
228 root = _repo(tmp_path, monkeypatch)
229
230 # Create 10 files in commit A
231 files_a: Manifest = {}
232 for i in range(10):
233 content = f"file {i} original content".encode()
234 oid = _write_object(root, content)
235 files_a[f"file{i:02d}.mid"] = oid
236
237 commit_a = _write_commit(root, files_a)
238
239 # Commit B: modify only file05.mid
240 files_b = dict(files_a)
241 modified_oid = _write_object(root, b"file 5 modified content")
242 files_b["file05.mid"] = modified_oid
243
244 commit_b = _write_commit(root, files_b, parent_id=commit_a.commit_id)
245
246 result = collect_object_ids(root, [commit_b.commit_id], have=[commit_a.commit_id])
247
248 assert result == [modified_oid], (
249 f"Expected only 1 modified object, got {len(result)}: {result}"
250 )
251
252
253 # ---------------------------------------------------------------------------
254 # VI — integration: 1 added file in large repo
255 # ---------------------------------------------------------------------------
256
257
258 class TestLargeRepoDelta:
259 def test_one_added_file_sends_one_object(
260 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
261 ) -> None:
262 """100-file repo, add 1 new file → 1 object sent."""
263 root = _repo(tmp_path, monkeypatch)
264
265 files_a: Manifest = {}
266 for i in range(100):
267 oid = _write_object(root, f"track {i} content".encode())
268 files_a[f"track{i:03d}.mid"] = oid
269
270 commit_a = _write_commit(root, files_a)
271
272 # Add one new file
273 new_oid = _write_object(root, b"brand new track content")
274 files_b = {**files_a, "new_track.mid": new_oid}
275 commit_b = _write_commit(root, files_b, parent_id=commit_a.commit_id)
276
277 result = collect_object_ids(root, [commit_b.commit_id], have=[commit_a.commit_id])
278
279 assert result == [new_oid], (
280 f"Expected exactly 1 new object, got {len(result)}"
281 )
282
283
284 # ---------------------------------------------------------------------------
285 # VII — regression: have=[] sends all objects
286 # ---------------------------------------------------------------------------
287
288
289 class TestNoHaveRegression:
290 def test_no_have_sends_all_objects(
291 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
292 ) -> None:
293 """Without have, all objects in the commit graph are returned."""
294 root = _repo(tmp_path, monkeypatch)
295 obj1 = _write_object(root, b"obj1")
296 obj2 = _write_object(root, b"obj2")
297
298 commit_a = _write_commit(root, {"a.txt": obj1})
299 commit_b = _write_commit(root, {"a.txt": obj1, "b.txt": obj2}, parent_id=commit_a.commit_id)
300
301 result = collect_object_ids(root, [commit_b.commit_id], have=[])
302
303 assert obj1 in result
304 assert obj2 in result
305
306
307 # ---------------------------------------------------------------------------
308 # VIII — graceful handling: have commit has no local snapshot
309 # ---------------------------------------------------------------------------
310
311
312 class TestMissingHaveSnapshot:
313 def test_missing_have_snapshot_treated_as_no_have(
314 self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch
315 ) -> None:
316 """If a have-commit's snapshot isn't local, don't crash — send the objects."""
317 root = _repo(tmp_path, monkeypatch)
318 obj = _write_object(root, b"some object")
319
320 # Write only a commit record without writing its snapshot locally
321 snap_id = compute_snapshot_id({"f.txt": obj})
322 ts = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
323 fake_have_cid = compute_commit_id(
324 parent_ids=[],
325 snapshot_id=snap_id,
326 message="phantom",
327 committed_at_iso=ts.isoformat(),
328 )
329 phantom_commit = CommitRecord(
330 commit_id=fake_have_cid,
331 branch="main",
332 snapshot_id=snap_id,
333 message="phantom",
334 committed_at=ts,
335 )
336 write_commit(root, phantom_commit)
337 # Note: snapshot is NOT written locally
338
339 new_obj = _write_object(root, b"new object")
340 snap2_id = compute_snapshot_id({"f.txt": obj, "g.txt": new_obj})
341 write_snapshot(root, SnapshotRecord(snapshot_id=snap2_id, manifest={"f.txt": obj, "g.txt": new_obj}))
342 write_object(root, obj, b"some object")
343 cid2 = compute_commit_id(
344 parent_ids=[fake_have_cid],
345 snapshot_id=snap2_id,
346 message="real",
347 committed_at_iso=ts.isoformat(),
348 )
349 commit2 = CommitRecord(
350 commit_id=cid2,
351 branch="main",
352 snapshot_id=snap2_id,
353 message="real",
354 committed_at=ts,
355 parent_commit_id=fake_have_cid,
356 )
357 write_commit(root, commit2)
358
359 # Should not crash; since have-snapshot is missing, objects may be over-sent
360 # but must not raise
361 result = collect_object_ids(root, [cid2], have=[fake_have_cid])
362 assert isinstance(result, list)
File History 2 commits
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 30 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 30 days ago