test_push_object_delta.py
file-level
1
files
1
commits
0
hotspots
0
π§ dead
0
π₯ blast risk
| 1 | """TDD β push only sends objects that are genuinely new. |
| 2 | |
| 3 | Root cause |
| 4 | ---------- |
| 5 | ``walk_commits`` and ``collect_blob_ids`` collect ALL objects from the |
| 6 | snapshots of new commits without subtracting objects already present in the |
| 7 | ``have`` commits' snapshots. |
| 8 | |
| 9 | A snapshot is a full manifest of the repo state at a point in time β it |
| 10 | includes every file, not just changed ones. So for a 900-object repo, 1 new |
| 11 | commit still sends all 900 objects instead of just the 1β2 that changed. |
| 12 | |
| 13 | The fix: subtract objects reachable from ``have`` commits' snapshots. |
| 14 | |
| 15 | new_objects = objects_in_new_snapshots β objects_in_have_snapshots |
| 16 | |
| 17 | Coverage |
| 18 | -------- |
| 19 | I Unit β collect_blob_ids: unchanged objects excluded when have is set |
| 20 | II Unit β collect_blob_ids: new object (not in have snapshot) is included |
| 21 | III Unit β collect_blob_ids: object removed in new commit is excluded |
| 22 | IV Unit β walk_commits: all_blob_ids obeys the same delta semantics |
| 23 | V Unit β multi-file repo: 1 changed file β 1 object sent, not all files |
| 24 | VI Integration β 10-file repo, 9 unchanged, 1 changed β only 1 object pushed |
| 25 | VII Regression β have=[] sends all objects (no regression) |
| 26 | VIII Regression β have commit with no local snapshot handled gracefully |
| 27 | """ |
| 28 | from __future__ import annotations |
| 29 | |
| 30 | import datetime |
| 31 | import json |
| 32 | import pathlib |
| 33 | |
| 34 | import pytest |
| 35 | |
| 36 | from muse._version import __version__ |
| 37 | from muse.core.object_store import write_object |
| 38 | from muse.core.mpack import collect_blob_ids, walk_commits |
| 39 | from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id |
| 40 | from muse.core.commits import ( |
| 41 | CommitRecord, |
| 42 | write_commit, |
| 43 | ) |
| 44 | from muse.core.snapshots import ( |
| 45 | SnapshotRecord, |
| 46 | write_snapshot, |
| 47 | ) |
| 48 | from muse.core.types import Manifest, blob_id |
| 49 | from muse.core.paths import muse_dir |
| 50 | |
| 51 | |
| 52 | # --------------------------------------------------------------------------- |
| 53 | # Helpers |
| 54 | # --------------------------------------------------------------------------- |
| 55 | |
| 56 | |
| 57 | def _oid(content: bytes) -> str: |
| 58 | return blob_id(content) |
| 59 | |
| 60 | |
| 61 | def _repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path: |
| 62 | dot_muse = muse_dir(tmp_path) |
| 63 | for d in ("commits", "snapshots", "objects", "refs/heads", "remotes"): |
| 64 | (dot_muse / d).mkdir(parents=True, exist_ok=True) |
| 65 | (dot_muse / "HEAD").write_text("ref: refs/heads/main\n") |
| 66 | (dot_muse / "repo.json").write_text( |
| 67 | json.dumps({"repo_id": "test-repo", "schema_version": __version__, "domain": "code"}) |
| 68 | ) |
| 69 | (dot_muse / "config.toml").write_text('[remotes.origin]\nurl = "https://hub.example.com/r"\n') |
| 70 | monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path)) |
| 71 | monkeypatch.chdir(tmp_path) |
| 72 | return tmp_path |
| 73 | |
| 74 | |
| 75 | def _write_commit( |
| 76 | root: pathlib.Path, |
| 77 | manifest: Manifest, |
| 78 | *, |
| 79 | parent_id: str | None = None, |
| 80 | ) -> CommitRecord: |
| 81 | """Write objects, snapshot, and commit; return the CommitRecord.""" |
| 82 | for oid, raw in [(oid, None) for oid in manifest.values()]: |
| 83 | # objects were written by the caller via _write_object |
| 84 | pass |
| 85 | snap_id = compute_snapshot_id(manifest) |
| 86 | write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) |
| 87 | ts = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) |
| 88 | parent_ids = [parent_id] if parent_id else [] |
| 89 | cid = compute_commit_id( |
| 90 | parent_ids=parent_ids, |
| 91 | snapshot_id=snap_id, |
| 92 | message="test", |
| 93 | committed_at_iso=ts.isoformat(), |
| 94 | ) |
| 95 | commit = CommitRecord( |
| 96 | commit_id=cid, |
| 97 | branch="main", |
| 98 | snapshot_id=snap_id, |
| 99 | message="test", |
| 100 | committed_at=ts, |
| 101 | parent_commit_id=parent_id, |
| 102 | ) |
| 103 | write_commit(root, commit) |
| 104 | return commit |
| 105 | |
| 106 | |
| 107 | def _write_object(root: pathlib.Path, content: bytes) -> str: |
| 108 | oid = _oid(content) |
| 109 | write_object(root, oid, content) |
| 110 | return oid |
| 111 | |
| 112 | |
| 113 | # --------------------------------------------------------------------------- |
| 114 | # I β unchanged objects are excluded when have is set |
| 115 | # --------------------------------------------------------------------------- |
| 116 | |
| 117 | |
| 118 | class TestCollectObjectIdsDelta: |
| 119 | def test_unchanged_object_excluded_when_in_have_snapshot( |
| 120 | self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch |
| 121 | ) -> None: |
| 122 | """Object present in both have-commit and new-commit snapshot β not sent.""" |
| 123 | root = _repo(tmp_path, monkeypatch) |
| 124 | unchanged = _write_object(root, b"unchanged file") |
| 125 | |
| 126 | # Commit A (the server has this) |
| 127 | commit_a = _write_commit(root, {"file.txt": unchanged}) |
| 128 | |
| 129 | # Commit B (new β same file, no changes) |
| 130 | commit_b = _write_commit(root, {"file.txt": unchanged}, parent_id=commit_a.commit_id) |
| 131 | |
| 132 | result = collect_blob_ids(root, [commit_b.commit_id], have=[commit_a.commit_id]) |
| 133 | |
| 134 | assert unchanged not in result, ( |
| 135 | "Object present in have-snapshot must not be re-sent" |
| 136 | ) |
| 137 | |
| 138 | def test_new_object_included_when_not_in_have_snapshot( |
| 139 | self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch |
| 140 | ) -> None: |
| 141 | """Object only in new-commit snapshot β must be sent.""" |
| 142 | root = _repo(tmp_path, monkeypatch) |
| 143 | old_file = _write_object(root, b"old file content") |
| 144 | new_file = _write_object(root, b"brand new file") |
| 145 | |
| 146 | commit_a = _write_commit(root, {"old.txt": old_file}) |
| 147 | commit_b = _write_commit( |
| 148 | root, |
| 149 | {"old.txt": old_file, "new.txt": new_file}, |
| 150 | parent_id=commit_a.commit_id, |
| 151 | ) |
| 152 | |
| 153 | result = collect_blob_ids(root, [commit_b.commit_id], have=[commit_a.commit_id]) |
| 154 | |
| 155 | assert new_file in result, "New object not in have-snapshot must be sent" |
| 156 | assert old_file not in result, "Object already in have-snapshot must not be sent" |
| 157 | |
| 158 | def test_removed_object_excluded( |
| 159 | self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch |
| 160 | ) -> None: |
| 161 | """Object present in have-snapshot but deleted in new commit β not sent.""" |
| 162 | root = _repo(tmp_path, monkeypatch) |
| 163 | kept = _write_object(root, b"kept file") |
| 164 | removed = _write_object(root, b"file that gets deleted") |
| 165 | |
| 166 | commit_a = _write_commit(root, {"kept.txt": kept, "gone.txt": removed}) |
| 167 | # Commit B removes gone.txt |
| 168 | commit_b = _write_commit(root, {"kept.txt": kept}, parent_id=commit_a.commit_id) |
| 169 | |
| 170 | result = collect_blob_ids(root, [commit_b.commit_id], have=[commit_a.commit_id]) |
| 171 | |
| 172 | assert removed not in result |
| 173 | assert kept not in result # still unchanged |
| 174 | |
| 175 | def test_empty_delta_when_no_changes( |
| 176 | self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch |
| 177 | ) -> None: |
| 178 | """Identical snapshot in new commit β zero objects sent.""" |
| 179 | root = _repo(tmp_path, monkeypatch) |
| 180 | obj = _write_object(root, b"content") |
| 181 | |
| 182 | commit_a = _write_commit(root, {"f.txt": obj}) |
| 183 | # Commit B β identical snapshot (content unchanged) |
| 184 | commit_b = _write_commit(root, {"f.txt": obj}, parent_id=commit_a.commit_id) |
| 185 | |
| 186 | result = collect_blob_ids(root, [commit_b.commit_id], have=[commit_a.commit_id]) |
| 187 | |
| 188 | assert result == [], f"Expected no objects to send, got {result}" |
| 189 | |
| 190 | |
| 191 | # --------------------------------------------------------------------------- |
| 192 | # IV β walk_commits obeys the same delta semantics |
| 193 | # --------------------------------------------------------------------------- |
| 194 | |
| 195 | |
| 196 | class TestWalkCommitsDelta: |
| 197 | def test_walk_all_blob_ids_excludes_have_objects( |
| 198 | self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch |
| 199 | ) -> None: |
| 200 | """walk_commits.all_blob_ids must subtract have-snapshot objects.""" |
| 201 | root = _repo(tmp_path, monkeypatch) |
| 202 | shared = _write_object(root, b"shared across commits") |
| 203 | new_obj = _write_object(root, b"only in new commit") |
| 204 | |
| 205 | commit_a = _write_commit(root, {"shared.txt": shared}) |
| 206 | commit_b = _write_commit( |
| 207 | root, |
| 208 | {"shared.txt": shared, "new.txt": new_obj}, |
| 209 | parent_id=commit_a.commit_id, |
| 210 | ) |
| 211 | |
| 212 | walk = walk_commits(root, [commit_b.commit_id], have=[commit_a.commit_id]) |
| 213 | |
| 214 | assert new_obj in walk["all_blob_ids"] |
| 215 | assert shared not in walk["all_blob_ids"], ( |
| 216 | "walk_commits must exclude objects already in have-snapshot" |
| 217 | ) |
| 218 | |
| 219 | |
| 220 | # --------------------------------------------------------------------------- |
| 221 | # V β multi-file repo: only the changed file is sent |
| 222 | # --------------------------------------------------------------------------- |
| 223 | |
| 224 | |
| 225 | class TestMultiFileDelta: |
| 226 | def test_only_changed_file_sent_in_10_file_repo( |
| 227 | self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch |
| 228 | ) -> None: |
| 229 | """10-file repo: 9 unchanged + 1 modified β only 1 object sent.""" |
| 230 | root = _repo(tmp_path, monkeypatch) |
| 231 | |
| 232 | # Create 10 files in commit A |
| 233 | files_a: Manifest = {} |
| 234 | for i in range(10): |
| 235 | content = f"file {i} original content".encode() |
| 236 | oid = _write_object(root, content) |
| 237 | files_a[f"file{i:02d}.mid"] = oid |
| 238 | |
| 239 | commit_a = _write_commit(root, files_a) |
| 240 | |
| 241 | # Commit B: modify only file05.mid |
| 242 | files_b = dict(files_a) |
| 243 | modified_oid = _write_object(root, b"file 5 modified content") |
| 244 | files_b["file05.mid"] = modified_oid |
| 245 | |
| 246 | commit_b = _write_commit(root, files_b, parent_id=commit_a.commit_id) |
| 247 | |
| 248 | result = collect_blob_ids(root, [commit_b.commit_id], have=[commit_a.commit_id]) |
| 249 | |
| 250 | assert result == [modified_oid], ( |
| 251 | f"Expected only 1 modified object, got {len(result)}: {result}" |
| 252 | ) |
| 253 | |
| 254 | |
| 255 | # --------------------------------------------------------------------------- |
| 256 | # VI β integration: 1 added file in large repo |
| 257 | # --------------------------------------------------------------------------- |
| 258 | |
| 259 | |
| 260 | class TestLargeRepoDelta: |
| 261 | def test_one_added_file_sends_one_object( |
| 262 | self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch |
| 263 | ) -> None: |
| 264 | """100-file repo, add 1 new file β 1 object sent.""" |
| 265 | root = _repo(tmp_path, monkeypatch) |
| 266 | |
| 267 | files_a: Manifest = {} |
| 268 | for i in range(100): |
| 269 | oid = _write_object(root, f"track {i} content".encode()) |
| 270 | files_a[f"track{i:03d}.mid"] = oid |
| 271 | |
| 272 | commit_a = _write_commit(root, files_a) |
| 273 | |
| 274 | # Add one new file |
| 275 | new_oid = _write_object(root, b"brand new track content") |
| 276 | files_b = {**files_a, "new_track.mid": new_oid} |
| 277 | commit_b = _write_commit(root, files_b, parent_id=commit_a.commit_id) |
| 278 | |
| 279 | result = collect_blob_ids(root, [commit_b.commit_id], have=[commit_a.commit_id]) |
| 280 | |
| 281 | assert result == [new_oid], ( |
| 282 | f"Expected exactly 1 new object, got {len(result)}" |
| 283 | ) |
| 284 | |
| 285 | |
| 286 | # --------------------------------------------------------------------------- |
| 287 | # VII β regression: have=[] sends all objects |
| 288 | # --------------------------------------------------------------------------- |
| 289 | |
| 290 | |
| 291 | class TestNoHaveRegression: |
| 292 | def test_no_have_sends_all_objects( |
| 293 | self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch |
| 294 | ) -> None: |
| 295 | """Without have, all objects in the commit graph are returned.""" |
| 296 | root = _repo(tmp_path, monkeypatch) |
| 297 | obj1 = _write_object(root, b"obj1") |
| 298 | obj2 = _write_object(root, b"obj2") |
| 299 | |
| 300 | commit_a = _write_commit(root, {"a.txt": obj1}) |
| 301 | commit_b = _write_commit(root, {"a.txt": obj1, "b.txt": obj2}, parent_id=commit_a.commit_id) |
| 302 | |
| 303 | result = collect_blob_ids(root, [commit_b.commit_id], have=[]) |
| 304 | |
| 305 | assert obj1 in result |
| 306 | assert obj2 in result |
| 307 | |
| 308 | |
| 309 | # --------------------------------------------------------------------------- |
| 310 | # VIII β graceful handling: have commit has no local snapshot |
| 311 | # --------------------------------------------------------------------------- |
| 312 | |
| 313 | |
| 314 | class TestMissingHaveSnapshot: |
| 315 | def test_missing_have_snapshot_treated_as_no_have( |
| 316 | self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch |
| 317 | ) -> None: |
| 318 | """If a have-commit's snapshot isn't local, don't crash β send the objects.""" |
| 319 | root = _repo(tmp_path, monkeypatch) |
| 320 | obj = _write_object(root, b"some object") |
| 321 | |
| 322 | # Write only a commit record without writing its snapshot locally |
| 323 | snap_id = compute_snapshot_id({"f.txt": obj}) |
| 324 | ts = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) |
| 325 | fake_have_cid = compute_commit_id( |
| 326 | parent_ids=[], |
| 327 | snapshot_id=snap_id, |
| 328 | message="phantom", |
| 329 | committed_at_iso=ts.isoformat(), |
| 330 | ) |
| 331 | phantom_commit = CommitRecord( |
| 332 | commit_id=fake_have_cid, |
| 333 | branch="main", |
| 334 | snapshot_id=snap_id, |
| 335 | message="phantom", |
| 336 | committed_at=ts, |
| 337 | ) |
| 338 | write_commit(root, phantom_commit) |
| 339 | # Note: snapshot is NOT written locally |
| 340 | |
| 341 | new_obj = _write_object(root, b"new object") |
| 342 | snap2_id = compute_snapshot_id({"f.txt": obj, "g.txt": new_obj}) |
| 343 | write_snapshot(root, SnapshotRecord(snapshot_id=snap2_id, manifest={"f.txt": obj, "g.txt": new_obj})) |
| 344 | write_object(root, obj, b"some object") |
| 345 | cid2 = compute_commit_id( |
| 346 | parent_ids=[fake_have_cid], |
| 347 | snapshot_id=snap2_id, |
| 348 | message="real", |
| 349 | committed_at_iso=ts.isoformat(), |
| 350 | ) |
| 351 | commit2 = CommitRecord( |
| 352 | commit_id=cid2, |
| 353 | branch="main", |
| 354 | snapshot_id=snap2_id, |
| 355 | message="real", |
| 356 | committed_at=ts, |
| 357 | parent_commit_id=fake_have_cid, |
| 358 | ) |
| 359 | write_commit(root, commit2) |
| 360 | |
| 361 | # Should not crash; since have-snapshot is missing, objects may be over-sent |
| 362 | # but must not raise |
| 363 | result = collect_blob_ids(root, [cid2], have=[fake_have_cid]) |
| 364 | assert isinstance(result, list) |