test_commit_object_store_completeness.py
python
sha256:1d3f5470f45db58e32047678debc9438fdded1b2c7332cc743d2b8be32fdafc8
fixing more broken tests
Human
patch
3 days ago
| 1 | """Tests for the invariant: after muse commit, every object in the snapshot |
| 2 | manifest is present in the local object store. |
| 3 | |
| 4 | Bug description |
| 5 | --------------- |
| 6 | ``muse commit`` skips writing objects for files that are unchanged from the |
| 7 | parent commit, on the assumption that "their objects are already in the store." |
| 8 | The assumption is wrong when parent objects have been removed (e.g. after a |
| 9 | fresh clone without fetching blobs, after ``muse gc``, or when the very first |
| 10 | commit on the repo happened without the object store being populated). |
| 11 | |
| 12 | The consequence is that ``apply_manifest`` raises ``RuntimeError`` at the end |
| 13 | of ``commit``, and subsequent ``checkout`` commands fail with "missing objects" |
| 14 | errors even though the commit record exists in ``.muse/commits/``. |
| 15 | |
| 16 | The invariant these tests enforce |
| 17 | ---------------------------------- |
| 18 | ∀ (path, oid) ∈ snapshot.manifest → has_object(repo, oid) is True |
| 19 | |
| 20 | immediately after a successful ``muse commit`` returns exit code 0. |
| 21 | """ |
| 22 | |
| 23 | from __future__ import annotations |
| 24 | from collections.abc import Mapping |
| 25 | |
| 26 | import os |
| 27 | import pathlib |
| 28 | |
| 29 | import pytest |
| 30 | |
| 31 | from tests.cli_test_helper import CliRunner, InvokeResult |
| 32 | from muse.core.types import long_id |
| 33 | from muse.core.object_store import has_object, iter_stored_objects, object_path |
| 34 | from muse.core.refs import ( |
| 35 | get_head_commit_id, |
| 36 | read_current_branch, |
| 37 | ) |
| 38 | from muse.core.commits import read_commit |
| 39 | from muse.core.snapshots import read_snapshot |
| 40 | |
| 41 | runner = CliRunner() |
| 42 | |
| 43 | |
| 44 | # --------------------------------------------------------------------------- |
| 45 | # Helpers |
| 46 | # --------------------------------------------------------------------------- |
| 47 | |
| 48 | |
| 49 | def _invoke(repo: pathlib.Path, args: list[str]) -> InvokeResult: |
| 50 | saved = os.getcwd() |
| 51 | try: |
| 52 | os.chdir(repo) |
| 53 | return runner.invoke(None, args) |
| 54 | finally: |
| 55 | os.chdir(saved) |
| 56 | |
| 57 | |
| 58 | def _commit(repo: pathlib.Path, *extra: str) -> InvokeResult: |
| 59 | return _invoke(repo, ["commit", *extra]) |
| 60 | |
| 61 | |
| 62 | def _add(repo: pathlib.Path, *paths: str) -> None: |
| 63 | _invoke(repo, ["code", "add", *paths]) |
| 64 | |
| 65 | |
| 66 | def _init_repo(repo: pathlib.Path) -> None: |
| 67 | repo.mkdir(parents=True, exist_ok=True) |
| 68 | result = _invoke(repo, ["init"]) |
| 69 | assert result.exit_code == 0, f"muse init failed: {result.output}" |
| 70 | |
| 71 | |
| 72 | def _head_snapshot_manifest(repo: pathlib.Path) -> Mapping[str, str]: |
| 73 | """Return the manifest dict for the current HEAD commit.""" |
| 74 | branch = read_current_branch(repo) |
| 75 | cid = get_head_commit_id(repo, branch) |
| 76 | assert cid is not None |
| 77 | rec = read_commit(repo, cid) |
| 78 | assert rec is not None |
| 79 | snap = read_snapshot(repo, rec.snapshot_id) |
| 80 | assert snap is not None |
| 81 | return snap.manifest |
| 82 | |
| 83 | |
| 84 | def _delete_all_objects(repo: pathlib.Path) -> list[str]: |
| 85 | """Remove all blob objects from the local store; return deleted OIDs.""" |
| 86 | deleted: list[str] = [] |
| 87 | for oid, obj_file in iter_stored_objects(repo): |
| 88 | obj_file.unlink() |
| 89 | deleted.append(oid) |
| 90 | return deleted |
| 91 | |
| 92 | |
| 93 | # --------------------------------------------------------------------------- |
| 94 | # Fixture |
| 95 | # --------------------------------------------------------------------------- |
| 96 | |
| 97 | |
| 98 | @pytest.fixture() |
| 99 | def repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 100 | """Initialised code repo with one file ready to commit.""" |
| 101 | _init_repo(tmp_path) |
| 102 | (tmp_path / "main.py").write_text("x = 1\n") |
| 103 | return tmp_path |
| 104 | |
| 105 | |
| 106 | # --------------------------------------------------------------------------- |
| 107 | # TestAllObjectsInStoreAfterCommit |
| 108 | # |
| 109 | # Core invariant: every object referenced by the committed snapshot is present |
| 110 | # in the local object store immediately after a successful commit. |
| 111 | # --------------------------------------------------------------------------- |
| 112 | |
| 113 | |
| 114 | class TestAllObjectsInStoreAfterCommit: |
| 115 | def test_first_commit_stores_all_objects(self, repo: pathlib.Path) -> None: |
| 116 | """Every object in the first commit's manifest is in the store.""" |
| 117 | result = _commit(repo, "-m", "first") |
| 118 | assert result.exit_code == 0, result.output |
| 119 | |
| 120 | manifest = _head_snapshot_manifest(repo) |
| 121 | assert manifest, "Manifest must not be empty" |
| 122 | for path, oid in manifest.items(): |
| 123 | assert has_object(repo, oid), ( |
| 124 | f"Object for '{path}' ({oid[:20]}…) missing after first commit" |
| 125 | ) |
| 126 | |
| 127 | def test_second_commit_stores_all_objects(self, repo: pathlib.Path) -> None: |
| 128 | """Every object in the second commit's manifest is in the store.""" |
| 129 | _commit(repo, "-m", "first") |
| 130 | (repo / "util.py").write_text("y = 2\n") |
| 131 | _add(repo, "util.py") |
| 132 | result = _commit(repo, "-m", "second") |
| 133 | assert result.exit_code == 0, result.output |
| 134 | |
| 135 | manifest = _head_snapshot_manifest(repo) |
| 136 | for path, oid in manifest.items(): |
| 137 | assert has_object(repo, oid), ( |
| 138 | f"Object for '{path}' ({oid[:20]}…) missing after second commit" |
| 139 | ) |
| 140 | |
| 141 | def test_unchanged_file_object_present_after_second_commit( |
| 142 | self, repo: pathlib.Path |
| 143 | ) -> None: |
| 144 | """An unchanged file's object from a prior commit is still accessible.""" |
| 145 | _commit(repo, "-m", "first") |
| 146 | manifest_1 = _head_snapshot_manifest(repo) |
| 147 | |
| 148 | # Add a new file; main.py is UNCHANGED. |
| 149 | (repo / "extra.py").write_text("z = 3\n") |
| 150 | _add(repo, "extra.py") |
| 151 | result = _commit(repo, "-m", "second") |
| 152 | assert result.exit_code == 0, result.output |
| 153 | |
| 154 | manifest_2 = _head_snapshot_manifest(repo) |
| 155 | # The unchanged file's object ID is the same in both manifests. |
| 156 | for path, oid in manifest_2.items(): |
| 157 | if manifest_1.get(path) == oid: |
| 158 | # This is an UNCHANGED file — its object must still be in the store. |
| 159 | assert has_object(repo, oid), ( |
| 160 | f"Object for unchanged '{path}' ({oid[:20]}…) missing after second commit" |
| 161 | ) |
| 162 | |
| 163 | def test_parent_objects_missing_rewritten_on_next_commit( |
| 164 | self, repo: pathlib.Path |
| 165 | ) -> None: |
| 166 | """THE BUG: if parent objects are deleted, the next commit must restore them. |
| 167 | |
| 168 | Scenario: |
| 169 | 1. First commit stores objects for main.py. |
| 170 | 2. All objects are deleted from the store (simulating a clone without blobs). |
| 171 | 3. A new file is added; main.py is unchanged. |
| 172 | 4. Second commit runs. |
| 173 | |
| 174 | BEFORE THE FIX: main.py's object is skipped ("unchanged from parent") |
| 175 | → has_object(repo, oid_main) is False after the commit. |
| 176 | |
| 177 | AFTER THE FIX: the commit notices the object is missing and writes it. |
| 178 | → has_object(repo, oid_main) is True. |
| 179 | """ |
| 180 | _commit(repo, "-m", "first") |
| 181 | manifest_1 = _head_snapshot_manifest(repo) |
| 182 | |
| 183 | # Simulate objects disappearing (clone without objects, gc, corruption). |
| 184 | deleted = _delete_all_objects(repo) |
| 185 | assert deleted, "Expected at least one object to have been written by first commit" |
| 186 | |
| 187 | # Verify the objects are actually gone. |
| 188 | for path, oid in manifest_1.items(): |
| 189 | assert not has_object(repo, oid), ( |
| 190 | f"Expected object for '{path}' to be absent before second commit" |
| 191 | ) |
| 192 | |
| 193 | # Add a new file so the snapshot changes (otherwise "nothing to commit"). |
| 194 | (repo / "new.py").write_text("new = True\n") |
| 195 | result = _commit(repo, "-m", "second") |
| 196 | assert result.exit_code == 0, ( |
| 197 | f"Commit failed with missing parent objects: {result.output}" |
| 198 | ) |
| 199 | |
| 200 | # INVARIANT: every object in the new manifest must be in the store. |
| 201 | manifest_2 = _head_snapshot_manifest(repo) |
| 202 | missing = [ |
| 203 | (path, oid) |
| 204 | for path, oid in manifest_2.items() |
| 205 | if not has_object(repo, oid) |
| 206 | ] |
| 207 | assert not missing, ( |
| 208 | "Objects missing from store after commit:\n" |
| 209 | + "\n".join(f" {p}: {o[:20]}…" for p, o in missing) |
| 210 | ) |
| 211 | |
| 212 | def test_commit_does_not_leave_partial_state_on_apply_manifest_failure( |
| 213 | self, repo: pathlib.Path |
| 214 | ) -> None: |
| 215 | """If apply_manifest would fail, commit must not succeed. |
| 216 | |
| 217 | After the fix, apply_manifest never fails because all objects are |
| 218 | written before it is called. This test confirms that a commit with |
| 219 | missing parent objects completes without raising RuntimeError. |
| 220 | """ |
| 221 | _commit(repo, "-m", "first") |
| 222 | _delete_all_objects(repo) |
| 223 | (repo / "extra.py").write_text("extra = 1\n") |
| 224 | |
| 225 | # Must not raise RuntimeError("apply_manifest: N object(s) missing …") |
| 226 | result = _commit(repo, "-m", "after deletion") |
| 227 | assert result.exit_code == 0, ( |
| 228 | f"Commit raised an exception or exited non-zero: {result.output}" |
| 229 | ) |
| 230 | assert "missing" not in result.output.lower(), ( |
| 231 | f"Unexpected 'missing' in commit output: {result.output}" |
| 232 | ) |
| 233 | |
| 234 | |
| 235 | # --------------------------------------------------------------------------- |
| 236 | # TestCheckoutAfterCommit |
| 237 | # |
| 238 | # Regression: checkout must succeed after a commit that had missing parent |
| 239 | # objects. Before the fix, checkout would fail with "N object(s) not in |
| 240 | # local store." |
| 241 | # --------------------------------------------------------------------------- |
| 242 | |
| 243 | |
| 244 | class TestCheckoutAfterCommit: |
| 245 | def test_checkout_after_commit_with_missing_parent_objects( |
| 246 | self, tmp_path: pathlib.Path |
| 247 | ) -> None: |
| 248 | """Checkout must not fail due to missing objects after a commit. |
| 249 | |
| 250 | Regression test for: muse checkout <branch> failing with |
| 251 | '11 object(s) not in local store' immediately after muse commit. |
| 252 | """ |
| 253 | repo = tmp_path / "repo" |
| 254 | _init_repo(repo) |
| 255 | (repo / "main.py").write_text("x = 1\n") |
| 256 | _commit(repo, "-m", "first") |
| 257 | |
| 258 | # Create a second branch so we have something to checkout to. |
| 259 | result = _invoke(repo, ["checkout", "-b", "feature"]) |
| 260 | assert result.exit_code == 0, f"checkout -b feature failed: {result.output}" |
| 261 | |
| 262 | # Switch back to main. |
| 263 | result = _invoke(repo, ["checkout", "main"]) |
| 264 | assert result.exit_code == 0, f"checkout main failed: {result.output}" |
| 265 | |
| 266 | # Delete objects and make a new commit on main. |
| 267 | _delete_all_objects(repo) |
| 268 | (repo / "extra.py").write_text("extra = 1\n") |
| 269 | result = _commit(repo, "-m", "second") |
| 270 | assert result.exit_code == 0, f"commit failed: {result.output}" |
| 271 | |
| 272 | # REGRESSION: checkout must succeed after the fixed commit. |
| 273 | result = _invoke(repo, ["checkout", "feature"]) |
| 274 | assert result.exit_code == 0, ( |
| 275 | f"checkout failed after commit with missing parent objects:\n{result.output}" |
| 276 | ) |
| 277 | |
| 278 | def test_checkout_back_and_forth_after_multi_commit_session( |
| 279 | self, tmp_path: pathlib.Path |
| 280 | ) -> None: |
| 281 | """Multiple commits with object deletions between them; checkout works.""" |
| 282 | repo = tmp_path / "repo" |
| 283 | _init_repo(repo) |
| 284 | (repo / "a.py").write_text("a = 1\n") |
| 285 | _commit(repo, "-m", "c1") |
| 286 | |
| 287 | _invoke(repo, ["checkout", "-b", "dev"]) |
| 288 | (repo / "b.py").write_text("b = 2\n") |
| 289 | _commit(repo, "-m", "c2") |
| 290 | |
| 291 | _delete_all_objects(repo) |
| 292 | (repo / "c.py").write_text("c = 3\n") |
| 293 | _commit(repo, "-m", "c3") |
| 294 | |
| 295 | # Checkout main — should restore working tree to c1 state. |
| 296 | result = _invoke(repo, ["checkout", "main"]) |
| 297 | assert result.exit_code == 0, ( |
| 298 | f"checkout main failed: {result.output}" |
| 299 | ) |
| 300 | |
| 301 | # Checkout dev again. |
| 302 | result = _invoke(repo, ["checkout", "dev"]) |
| 303 | assert result.exit_code == 0, ( |
| 304 | f"checkout dev failed: {result.output}" |
| 305 | ) |
| 306 | |
| 307 | |
| 308 | # --------------------------------------------------------------------------- |
| 309 | # TestApplyManifestAfterCommit |
| 310 | # |
| 311 | # Direct verification that apply_manifest does not raise RuntimeError |
| 312 | # when called with the manifest from the latest commit. |
| 313 | # --------------------------------------------------------------------------- |
| 314 | |
| 315 | |
| 316 | class TestApplyManifestAfterCommit: |
| 317 | def test_apply_manifest_does_not_raise_after_commit( |
| 318 | self, repo: pathlib.Path |
| 319 | ) -> None: |
| 320 | """apply_manifest must not raise after a successful commit.""" |
| 321 | from muse.core.workdir import apply_manifest |
| 322 | |
| 323 | _commit(repo, "-m", "first") |
| 324 | manifest = _head_snapshot_manifest(repo) |
| 325 | |
| 326 | # This must not raise RuntimeError("apply_manifest: N object(s) missing …") |
| 327 | apply_manifest(repo, {}, manifest) |
| 328 | |
| 329 | def test_apply_manifest_does_not_raise_when_parent_objects_were_missing( |
| 330 | self, repo: pathlib.Path |
| 331 | ) -> None: |
| 332 | """apply_manifest works even if parent objects were absent before commit.""" |
| 333 | from muse.core.workdir import apply_manifest |
| 334 | |
| 335 | _commit(repo, "-m", "first") |
| 336 | _delete_all_objects(repo) |
| 337 | (repo / "new.py").write_text("n = 0\n") |
| 338 | result = _commit(repo, "-m", "second") |
| 339 | assert result.exit_code == 0, result.output |
| 340 | |
| 341 | manifest = _head_snapshot_manifest(repo) |
| 342 | apply_manifest(repo, {}, manifest) # must not raise |
File History
1 commit
sha256:1d3f5470f45db58e32047678debc9438fdded1b2c7332cc743d2b8be32fdafc8
fixing more broken tests
Human
patch
3 days ago