test_phase7_bundle_atomicity.py
python
sha256:8860dea10c653956b613a814cc752a6d34cb3986cdf16749a49172affdabf045
fix tests
Human
minor
⚠ breaking
4 days ago
| 1 | """Phase 7 — MPack atomicity: objects before refs, topological commit order. |
| 2 | |
| 3 | Invariants: |
| 4 | 1. apply_mpack writes objects → snapshots → commits → (caller advances refs). |
| 5 | A crash after objects but before refs leaves reachable but ref-less objects |
| 6 | (safe — GC-able). A crash after refs but before objects leaves a ref |
| 7 | pointing to a commit whose snapshot has no objects (broken checkout). |
| 8 | The safe order is already enforced by apply_mpack: objects first, refs last. |
| 9 | |
| 10 | 2. Commits in a mpack may arrive newest-first (BFS order). Phase 2's |
| 11 | MissingParentError guard rejects a commit whose parent hasn't been written |
| 12 | yet. apply_mpack must retry deferred commits until all parents in the |
| 13 | mpack are resolved, or give up if a parent is genuinely absent. |
| 14 | |
| 15 | Testing tiers |
| 16 | ------------- |
| 17 | Unit apply_mpack handles newest-first commit ordering without error |
| 18 | Unit apply_mpack retries and eventually writes all commits when parents |
| 19 | arrive after children in the mpack |
| 20 | Unit apply_mpack logs and skips commits with truly absent parents |
| 21 | (not in mpack, not in store) |
| 22 | Integration mpack create → unbundle round-trip writes all commits to store |
| 23 | Data after unbundle, every commit in the mpack is readable from store |
| 24 | """ |
| 25 | |
| 26 | from __future__ import annotations |
| 27 | |
| 28 | import datetime |
| 29 | import pathlib |
| 30 | |
| 31 | import msgpack |
| 32 | import pytest |
| 33 | |
| 34 | from muse.core.mpack import apply_mpack, MPack |
| 35 | from muse.core.commits import ( |
| 36 | CommitRecord, |
| 37 | commit_exists, |
| 38 | read_commit, |
| 39 | write_commit, |
| 40 | ) |
| 41 | from muse.core.snapshots import ( |
| 42 | SnapshotRecord, |
| 43 | read_snapshot, |
| 44 | write_snapshot, |
| 45 | ) |
| 46 | from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id |
| 47 | from muse.core.types import fake_id, long_id |
| 48 | |
| 49 | |
| 50 | # --------------------------------------------------------------------------- |
| 51 | # Helpers |
| 52 | # --------------------------------------------------------------------------- |
| 53 | |
| 54 | _REPO_ID = "repo-phase7-test" |
| 55 | _BRANCH = "main" |
| 56 | |
| 57 | |
| 58 | |
| 59 | def _make_real_commit( |
| 60 | repo: pathlib.Path, |
| 61 | tag: str, |
| 62 | parent_id: str | None, |
| 63 | content: str = "hello", |
| 64 | ) -> CommitRecord: |
| 65 | """Write a fully content-addressed commit to *repo* and return it.""" |
| 66 | manifest = {"file.txt": fake_id(f"obj-{content}")} |
| 67 | dirs: dict[str, list[str]] = {} |
| 68 | snap_id = compute_snapshot_id(manifest, dirs) |
| 69 | write_snapshot(repo, SnapshotRecord(snapshot_id=snap_id, manifest=manifest, directories=dirs)) |
| 70 | |
| 71 | committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) |
| 72 | commit_id = compute_commit_id( |
| 73 | parent_ids=[parent_id] if parent_id else [], |
| 74 | snapshot_id=snap_id, |
| 75 | message=tag, |
| 76 | committed_at_iso=committed_at.isoformat(), |
| 77 | author="gabriel", |
| 78 | ) |
| 79 | rec = CommitRecord( |
| 80 | commit_id=commit_id, |
| 81 | branch=_BRANCH, |
| 82 | snapshot_id=snap_id, |
| 83 | message=tag, |
| 84 | committed_at=committed_at, |
| 85 | parent_commit_id=parent_id, |
| 86 | author="gabriel", |
| 87 | ) |
| 88 | write_commit(repo, rec) |
| 89 | return rec |
| 90 | |
| 91 | |
| 92 | def _snap_dicts_for(commits: list[CommitRecord], source: pathlib.Path) -> list[dict]: |
| 93 | """Return wire-format snapshot dicts for the given commits, deduped by snapshot_id. |
| 94 | |
| 95 | Reads the real snapshots from *source* so apply_mpack can write them to the |
| 96 | destination repo and the missing-snapshot guard passes. |
| 97 | """ |
| 98 | seen: set[str] = set() |
| 99 | result = [] |
| 100 | for c in commits: |
| 101 | if c.snapshot_id in seen: |
| 102 | continue |
| 103 | seen.add(c.snapshot_id) |
| 104 | snap = read_snapshot(source, c.snapshot_id) |
| 105 | if snap is None: |
| 106 | continue |
| 107 | result.append({ |
| 108 | "snapshot_id": snap.snapshot_id, |
| 109 | "parent_snapshot_id": None, |
| 110 | "delta_upsert": snap.manifest, |
| 111 | "delta_remove": [], |
| 112 | "directories": list(snap.directories) if snap.directories else [], |
| 113 | }) |
| 114 | return result |
| 115 | |
| 116 | |
| 117 | # --------------------------------------------------------------------------- |
| 118 | # Unit — topological retry: newest-first ordering succeeds |
| 119 | # --------------------------------------------------------------------------- |
| 120 | |
| 121 | class TestTopologicalRetry: |
| 122 | def test_newest_first_ordering_writes_all_commits(self, tmp_path: pathlib.Path) -> None: |
| 123 | """apply_mpack must succeed even when commits arrive newest-first.""" |
| 124 | root_commit = _make_real_commit(tmp_path, "root", None) |
| 125 | child_commit = _make_real_commit(tmp_path, "child", root_commit.commit_id) |
| 126 | grandchild = _make_real_commit(tmp_path, "grandchild", child_commit.commit_id) |
| 127 | |
| 128 | # Fresh repo — no commits yet |
| 129 | dest = tmp_path / "dest" |
| 130 | dest.mkdir() |
| 131 | |
| 132 | commits = [grandchild, child_commit, root_commit] |
| 133 | # MPack commits in newest-first order (BFS from tip) |
| 134 | mpack: MPack = { |
| 135 | "blobs": [], |
| 136 | "snapshots": _snap_dicts_for(commits, tmp_path), |
| 137 | "commits": [c.to_dict() for c in commits], |
| 138 | } |
| 139 | |
| 140 | result = apply_mpack(dest, mpack) |
| 141 | |
| 142 | assert result["commits_written"] == 3, ( |
| 143 | f"Expected 3 commits written, got {result['commits_written']}. " |
| 144 | "apply_mpack may not be retrying MissingParentError commits." |
| 145 | ) |
| 146 | assert commit_exists(dest, root_commit.commit_id) |
| 147 | assert commit_exists(dest, child_commit.commit_id) |
| 148 | assert commit_exists(dest, grandchild.commit_id) |
| 149 | |
| 150 | def test_correct_order_still_works(self, tmp_path: pathlib.Path) -> None: |
| 151 | """Oldest-first ordering (already correct) must still succeed.""" |
| 152 | root_commit = _make_real_commit(tmp_path, "root2", None) |
| 153 | child_commit = _make_real_commit(tmp_path, "child2", root_commit.commit_id) |
| 154 | |
| 155 | dest = tmp_path / "dest2" |
| 156 | dest.mkdir() |
| 157 | |
| 158 | commits = [root_commit, child_commit] |
| 159 | mpack: MPack = { |
| 160 | "blobs": [], |
| 161 | "snapshots": _snap_dicts_for(commits, tmp_path), |
| 162 | "commits": [c.to_dict() for c in commits], |
| 163 | } |
| 164 | |
| 165 | result = apply_mpack(dest, mpack) |
| 166 | assert result["commits_written"] == 2 |
| 167 | |
| 168 | def test_absent_parent_skipped_gracefully(self, tmp_path: pathlib.Path) -> None: |
| 169 | """A commit whose parent is not in the mpack or store must be skipped, |
| 170 | not crash apply_mpack.""" |
| 171 | root_commit = _make_real_commit(tmp_path, "root3", None) |
| 172 | child_commit = _make_real_commit(tmp_path, "child3", root_commit.commit_id) |
| 173 | |
| 174 | dest = tmp_path / "dest3" |
| 175 | dest.mkdir() |
| 176 | |
| 177 | # MPack only has the child — root is absent |
| 178 | mpack: MPack = { |
| 179 | "blobs": [], |
| 180 | "snapshots": [], |
| 181 | "commits": [ |
| 182 | child_commit.to_dict(), # parent (root) not in mpack or dest |
| 183 | ], |
| 184 | } |
| 185 | |
| 186 | # Should not raise — should log and skip |
| 187 | result = apply_mpack(dest, mpack) |
| 188 | |
| 189 | assert result["commits_written"] == 0, ( |
| 190 | "commit with missing parent should have been skipped" |
| 191 | ) |
| 192 | assert not commit_exists(dest, child_commit.commit_id), ( |
| 193 | "commit with missing parent was written despite dangling parent" |
| 194 | ) |
| 195 | |
| 196 | |
| 197 | # --------------------------------------------------------------------------- |
| 198 | # Data — objects present after unbundle before refs are advanced |
| 199 | # --------------------------------------------------------------------------- |
| 200 | |
| 201 | class TestObjectsBeforeRefs: |
| 202 | def test_apply_mpack_writes_commits_before_caller_advances_refs( |
| 203 | self, tmp_path: pathlib.Path |
| 204 | ) -> None: |
| 205 | """apply_mpack (object writes) completes before write_branch_ref is called. |
| 206 | |
| 207 | This is verified structurally: apply_mpack returns successfully before |
| 208 | the caller's write_branch_ref call. If objects were not yet written |
| 209 | at the time refs were advanced, a checkout immediately after would fail. |
| 210 | """ |
| 211 | root_commit = _make_real_commit(tmp_path, "root4", None) |
| 212 | |
| 213 | dest = tmp_path / "dest4" |
| 214 | dest.mkdir() |
| 215 | |
| 216 | mpack: MPack = { |
| 217 | "blobs": [], |
| 218 | "snapshots": _snap_dicts_for([root_commit], tmp_path), |
| 219 | "commits": [root_commit.to_dict()], |
| 220 | } |
| 221 | |
| 222 | # apply_mpack returns — at this point commits are durable |
| 223 | result = apply_mpack(dest, mpack) |
| 224 | assert result["commits_written"] == 1 |
| 225 | |
| 226 | # read_commit must work immediately — no ref advancement needed |
| 227 | read_back = read_commit(dest, root_commit.commit_id) |
| 228 | assert read_back is not None, ( |
| 229 | "commit not readable after apply_mpack — write did not complete" |
| 230 | ) |
| 231 | assert read_back.commit_id == root_commit.commit_id |
File History
1 commit
sha256:8860dea10c653956b613a814cc752a6d34cb3986cdf16749a49172affdabf045
fix tests
Human
minor
⚠
4 days ago