test_write_commit_snapshot_hash_verify.py
file-level
1
files
1
commits
0
hotspots
0
π§ dead
0
π₯ blast risk
| 1 | """ |
| 2 | Tests for data-integrity behaviour of write_commit / write_snapshot. |
| 3 | |
| 4 | === Current architecture: idempotent writes, detection at read time === |
| 5 | |
| 6 | write_commit and write_snapshot are both idempotent: if the object |
| 7 | already exists at object_path, the call returns immediately without |
| 8 | modifying disk. This means: |
| 9 | |
| 10 | - Corruption that lands at object_path is NOT repaired by write_commit |
| 11 | or write_snapshot. |
| 12 | - Corruption IS detected at read time: read_commit and read_snapshot |
| 13 | recompute the hash from stored fields and return None on mismatch. |
| 14 | |
| 15 | === Coverage === |
| 16 | |
| 17 | Unit β write_commit skips clean existing record (no regression) |
| 18 | Unit β write_commit skips on corrupt object (idempotent) |
| 19 | Unit β read_commit returns None for corrupt snapshot_id field |
| 20 | Unit β read_commit returns None for corrupt message field |
| 21 | Unit β read_commit returns None for corrupt parent_commit_id field |
| 22 | Unit β write_commit skips on content-level hash mismatch (no OSError) |
| 23 | Unit β write_snapshot skips clean existing record (no regression) |
| 24 | Unit β read_snapshot returns None for corrupt manifest |
| 25 | Data β parent chain (AβBβC): corrupt B β B unreadable, A/C readable |
| 26 | Data β corrupting one snapshot does not affect sibling snapshots |
| 27 | Security β corrupt snapshot_id in commit is rejected at read time |
| 28 | Security β injected manifest entry is rejected at read time |
| 29 | Stress β 20 concurrent write_commit calls are all idempotent |
| 30 | Stress β 20 concurrent write_snapshot calls are all idempotent |
| 31 | Stress β 50 sequential commits all written and readable |
| 32 | Regression β write_commit new file works |
| 33 | Regression β write_snapshot new file works |
| 34 | Regression β write_commit idempotent on clean file |
| 35 | Regression β write_snapshot idempotent on clean file |
| 36 | """ |
| 37 | from __future__ import annotations |
| 38 | |
| 39 | import datetime |
| 40 | import json as _json |
| 41 | import pathlib |
| 42 | import threading |
| 43 | |
| 44 | import pytest |
| 45 | |
| 46 | from muse.core.types import Manifest, fake_id |
| 47 | from muse.core.object_store import object_path as _obj_path |
| 48 | from muse.core.paths import muse_dir |
| 49 | |
| 50 | # --------------------------------------------------------------------------- |
| 51 | # Helpers |
| 52 | # --------------------------------------------------------------------------- |
| 53 | |
| 54 | _DEFAULT_BLOB = fake_id("default-blob") |
| 55 | _DEFAULT_SNAP = fake_id("default-snap") |
| 56 | _CorruptField = dict[str, str | int | None] |
| 57 | |
| 58 | |
| 59 | def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 60 | (muse_dir(tmp_path) / "objects" / "sha256").mkdir(parents=True, exist_ok=True) |
| 61 | return tmp_path |
| 62 | |
| 63 | |
| 64 | def _ts(year: int = 2024) -> str: |
| 65 | return f"{year}-01-01T00:00:00+00:00" |
| 66 | |
| 67 | |
| 68 | def _good_commit( |
| 69 | snapshot_id: str | None = None, |
| 70 | message: str = "test commit", |
| 71 | parent_commit_id: str | None = None, |
| 72 | ts: str | None = None, |
| 73 | ) -> "CommitRecord": |
| 74 | from muse.core.commits import CommitRecord |
| 75 | from muse.core.ids import hash_commit |
| 76 | |
| 77 | snap_id = snapshot_id or _DEFAULT_SNAP |
| 78 | timestamp = ts or _ts() |
| 79 | parent_ids = [parent_commit_id] if parent_commit_id else [] |
| 80 | commit_id = hash_commit( |
| 81 | parent_ids=parent_ids, |
| 82 | snapshot_id=snap_id, |
| 83 | message=message, |
| 84 | committed_at_iso=timestamp, |
| 85 | author="gabriel", |
| 86 | ) |
| 87 | return CommitRecord( |
| 88 | commit_id=commit_id, |
| 89 | branch="main", |
| 90 | snapshot_id=snap_id, |
| 91 | message=message, |
| 92 | committed_at=datetime.datetime.fromisoformat(timestamp), |
| 93 | parent_commit_id=parent_commit_id, |
| 94 | parent2_commit_id=None, |
| 95 | author="gabriel", |
| 96 | metadata={}, |
| 97 | ) |
| 98 | |
| 99 | |
| 100 | def _good_snapshot(manifest: Manifest | None = None) -> "SnapshotRecord": |
| 101 | from muse.core.snapshots import SnapshotRecord |
| 102 | from muse.core.ids import hash_snapshot |
| 103 | |
| 104 | m = manifest or {"src/main.py": _DEFAULT_BLOB} |
| 105 | snapshot_id = hash_snapshot(m) |
| 106 | return SnapshotRecord(snapshot_id=snapshot_id, manifest=m, directories={}) |
| 107 | |
| 108 | |
| 109 | def _write_corrupt_commit(repo: pathlib.Path, good: "CommitRecord", corrupt_field: _CorruptField) -> None: |
| 110 | """Write a corrupt commit object to object_path (valid header, wrong field values).""" |
| 111 | base = { |
| 112 | "commit_id": good.commit_id, |
| 113 | "repo_id": "test-repo", |
| 114 | "branch": "main", |
| 115 | "snapshot_id": good.snapshot_id, |
| 116 | "message": good.message, |
| 117 | "committed_at": good.committed_at.isoformat(), |
| 118 | "parent_commit_id": good.parent_commit_id, |
| 119 | "parent2_commit_id": None, |
| 120 | "author": "gabriel", |
| 121 | "metadata": {}, |
| 122 | "reviewed_by": [], |
| 123 | } |
| 124 | base.update(corrupt_field) |
| 125 | payload = _json.dumps(base, separators=(",", ":")).encode() |
| 126 | path = _obj_path(repo, good.commit_id) |
| 127 | path.parent.mkdir(parents=True, exist_ok=True) |
| 128 | path.write_bytes(f"commit {len(payload)}\0".encode() + payload) |
| 129 | |
| 130 | |
| 131 | def _write_corrupt_snapshot(repo: pathlib.Path, good: "SnapshotRecord", corrupt_manifest: Manifest) -> None: |
| 132 | """Write a corrupt snapshot object to object_path (valid header, wrong manifest).""" |
| 133 | record = { |
| 134 | "snapshot_id": good.snapshot_id, |
| 135 | "manifest": corrupt_manifest, |
| 136 | "directories": {}, |
| 137 | } |
| 138 | payload = _json.dumps(record, separators=(",", ":")).encode() |
| 139 | path = _obj_path(repo, good.snapshot_id) |
| 140 | path.parent.mkdir(parents=True, exist_ok=True) |
| 141 | path.write_bytes(f"snapshot {len(payload)}\0".encode() + payload) |
| 142 | |
| 143 | |
| 144 | # ============================================================================= |
| 145 | # 1. UNIT β write_commit idempotency and corruption detection |
| 146 | # ============================================================================= |
| 147 | |
| 148 | |
| 149 | class TestWriteCommitHashVerification: |
| 150 | |
| 151 | def test_idempotent_skip_clean_record(self, tmp_path: pathlib.Path) -> None: |
| 152 | """Regression: write_commit on a clean existing file still returns fast.""" |
| 153 | from muse.core.commits import ( |
| 154 | read_commit, |
| 155 | write_commit, |
| 156 | ) |
| 157 | |
| 158 | repo = _make_repo(tmp_path) |
| 159 | good = _good_commit() |
| 160 | write_commit(repo, good) |
| 161 | write_commit(repo, good) # second call: must not raise, must not change data |
| 162 | result = read_commit(repo, good.commit_id) |
| 163 | assert result is not None |
| 164 | assert result.commit_id == good.commit_id |
| 165 | |
| 166 | def test_corrupt_snapshot_id_detected_at_read(self, tmp_path: pathlib.Path) -> None: |
| 167 | """ |
| 168 | A commit object with a corrupt snapshot_id is detected at read time. |
| 169 | |
| 170 | write_commit is idempotent: it skips if object_path exists, so a |
| 171 | pre-existing corrupt file is NOT overwritten. read_commit recomputes |
| 172 | the hash and returns None when snapshot_id doesn't match commit_id. |
| 173 | """ |
| 174 | from muse.core.commits import ( |
| 175 | read_commit, |
| 176 | write_commit, |
| 177 | ) |
| 178 | |
| 179 | repo = _make_repo(tmp_path) |
| 180 | good = _good_commit() |
| 181 | _write_corrupt_commit(repo, good, {"snapshot_id": fake_id("attacker-snapshot")}) |
| 182 | |
| 183 | write_commit(repo, good) # skips β object already exists |
| 184 | result = read_commit(repo, good.commit_id) |
| 185 | assert result is None, ( |
| 186 | "read_commit must detect corrupt snapshot_id via hash verification " |
| 187 | "and return None β not silently serve corrupt content." |
| 188 | ) |
| 189 | |
| 190 | def test_corrupt_message_detected_at_read(self, tmp_path: pathlib.Path) -> None: |
| 191 | """A commit with a corrupt message is detected at read time.""" |
| 192 | from muse.core.commits import ( |
| 193 | read_commit, |
| 194 | write_commit, |
| 195 | ) |
| 196 | |
| 197 | repo = _make_repo(tmp_path) |
| 198 | good = _good_commit(message="original message") |
| 199 | _write_corrupt_commit(repo, good, {"message": "CORRUPTED MESSAGE"}) |
| 200 | |
| 201 | write_commit(repo, good) # skips β object already exists |
| 202 | result = read_commit(repo, good.commit_id) |
| 203 | assert result is None, "read_commit must detect corrupt message via hash verification" |
| 204 | |
| 205 | def test_corrupt_parent_commit_id_detected_at_read(self, tmp_path: pathlib.Path) -> None: |
| 206 | """A commit with a corrupt parent_commit_id is detected at read time.""" |
| 207 | from muse.core.commits import ( |
| 208 | read_commit, |
| 209 | write_commit, |
| 210 | ) |
| 211 | |
| 212 | repo = _make_repo(tmp_path) |
| 213 | good = _good_commit(parent_commit_id=None) |
| 214 | _write_corrupt_commit(repo, good, {"parent_commit_id": fake_id("injected-parent")}) |
| 215 | |
| 216 | write_commit(repo, good) # skips β object already exists |
| 217 | result = read_commit(repo, good.commit_id) |
| 218 | assert result is None, "read_commit must detect corrupt parent_commit_id via hash verification" |
| 219 | |
| 220 | def test_content_hash_mismatch_skipped_not_raised(self, tmp_path: pathlib.Path) -> None: |
| 221 | """ |
| 222 | write_commit is always idempotent β never raises OSError for content-level |
| 223 | mismatches. A corrupt object at object_path is silently skipped. |
| 224 | Hash mismatches are detected later by read_commit. |
| 225 | """ |
| 226 | from muse.core.commits import ( |
| 227 | read_commit, |
| 228 | write_commit, |
| 229 | ) |
| 230 | |
| 231 | repo = _make_repo(tmp_path) |
| 232 | good_a = _good_commit(message="commit A", ts=_ts(2024)) |
| 233 | good_b = _good_commit(message="commit B", ts=_ts(2025)) |
| 234 | |
| 235 | # Write B's data under A's object_path (commit_id field mismatch) |
| 236 | _write_corrupt_commit(repo, good_a, { |
| 237 | "commit_id": good_b.commit_id, |
| 238 | "message": good_b.message, |
| 239 | "snapshot_id": good_b.snapshot_id, |
| 240 | }) |
| 241 | |
| 242 | # write_commit must NOT raise β it skips (idempotent) |
| 243 | write_commit(repo, good_a) |
| 244 | |
| 245 | # read_commit detects the hash mismatch and returns None |
| 246 | result = read_commit(repo, good_a.commit_id) |
| 247 | assert result is None, "read_commit must detect commit_id field mismatch" |
| 248 | |
| 249 | |
| 250 | # ============================================================================= |
| 251 | # 2. UNIT β write_snapshot idempotency and corruption detection |
| 252 | # ============================================================================= |
| 253 | |
| 254 | |
| 255 | class TestWriteSnapshotHashVerification: |
| 256 | |
| 257 | def test_idempotent_skip_clean_snapshot(self, tmp_path: pathlib.Path) -> None: |
| 258 | """Regression: write_snapshot on a clean existing file still skips correctly.""" |
| 259 | from muse.core.snapshots import ( |
| 260 | read_snapshot, |
| 261 | write_snapshot, |
| 262 | ) |
| 263 | |
| 264 | repo = _make_repo(tmp_path) |
| 265 | good = _good_snapshot() |
| 266 | write_snapshot(repo, good) |
| 267 | write_snapshot(repo, good) # second call: idempotent |
| 268 | result = read_snapshot(repo, good.snapshot_id) |
| 269 | assert result is not None |
| 270 | assert result.snapshot_id == good.snapshot_id |
| 271 | |
| 272 | def test_corrupt_object_id_in_manifest_detected_at_read(self, tmp_path: pathlib.Path) -> None: |
| 273 | """ |
| 274 | A snapshot with a wrong object ID for a file is detected at read time. |
| 275 | |
| 276 | write_snapshot is idempotent: pre-existing corrupt object is skipped. |
| 277 | read_snapshot recomputes the manifest hash and returns None on mismatch. |
| 278 | """ |
| 279 | from muse.core.snapshots import ( |
| 280 | read_snapshot, |
| 281 | write_snapshot, |
| 282 | ) |
| 283 | |
| 284 | repo = _make_repo(tmp_path) |
| 285 | blob = fake_id("main-blob") |
| 286 | good = _good_snapshot({"src/main.py": blob}) |
| 287 | _write_corrupt_snapshot(repo, good, {"src/main.py": fake_id("wrong-blob")}) |
| 288 | |
| 289 | write_snapshot(repo, good) # skips β object already exists |
| 290 | result = read_snapshot(repo, good.snapshot_id) |
| 291 | assert result is None, "read_snapshot must detect corrupt manifest object ID" |
| 292 | |
| 293 | def test_extra_manifest_entry_detected_at_read(self, tmp_path: pathlib.Path) -> None: |
| 294 | """An extra file in the manifest (hash mismatch) is detected at read time.""" |
| 295 | from muse.core.snapshots import ( |
| 296 | read_snapshot, |
| 297 | write_snapshot, |
| 298 | ) |
| 299 | |
| 300 | repo = _make_repo(tmp_path) |
| 301 | blob = fake_id("main-blob") |
| 302 | good = _good_snapshot({"src/main.py": blob}) |
| 303 | _write_corrupt_snapshot(repo, good, { |
| 304 | "src/main.py": blob, |
| 305 | "INJECTED_FILE.py": fake_id("injected-blob"), |
| 306 | }) |
| 307 | |
| 308 | write_snapshot(repo, good) # skips |
| 309 | result = read_snapshot(repo, good.snapshot_id) |
| 310 | assert result is None, "read_snapshot must detect injected manifest entry" |
| 311 | |
| 312 | def test_empty_manifest_detected_at_read(self, tmp_path: pathlib.Path) -> None: |
| 313 | """A snapshot with an empty manifest (should have files) is detected at read time.""" |
| 314 | from muse.core.snapshots import ( |
| 315 | read_snapshot, |
| 316 | write_snapshot, |
| 317 | ) |
| 318 | |
| 319 | repo = _make_repo(tmp_path) |
| 320 | good = _good_snapshot({ |
| 321 | "src/main.py": fake_id("main-blob"), |
| 322 | "src/utils.py": fake_id("utils-blob"), |
| 323 | }) |
| 324 | _write_corrupt_snapshot(repo, good, {}) # manifest wiped |
| 325 | |
| 326 | write_snapshot(repo, good) # skips |
| 327 | result = read_snapshot(repo, good.snapshot_id) |
| 328 | assert result is None, "read_snapshot must detect empty manifest (hash mismatch)" |
| 329 | |
| 330 | def test_missing_manifest_entry_detected_at_read(self, tmp_path: pathlib.Path) -> None: |
| 331 | """A snapshot with a missing file entry is detected at read time.""" |
| 332 | from muse.core.snapshots import ( |
| 333 | read_snapshot, |
| 334 | write_snapshot, |
| 335 | ) |
| 336 | |
| 337 | repo = _make_repo(tmp_path) |
| 338 | main_blob = fake_id("main-blob") |
| 339 | good = _good_snapshot({ |
| 340 | "src/main.py": main_blob, |
| 341 | "src/utils.py": fake_id("utils-blob"), |
| 342 | }) |
| 343 | _write_corrupt_snapshot(repo, good, {"src/main.py": main_blob}) # utils.py missing |
| 344 | |
| 345 | write_snapshot(repo, good) # skips |
| 346 | result = read_snapshot(repo, good.snapshot_id) |
| 347 | assert result is None, "read_snapshot must detect missing manifest entry" |
| 348 | |
| 349 | |
| 350 | # ============================================================================= |
| 351 | # 3. DATA INTEGRITY β full commit β snapshot chain |
| 352 | # ============================================================================= |
| 353 | |
| 354 | |
| 355 | class TestCommitSnapshotChain: |
| 356 | |
| 357 | def test_clean_commit_and_snapshot_both_readable(self, tmp_path: pathlib.Path) -> None: |
| 358 | """Clean commit and snapshot written correctly are both readable.""" |
| 359 | from muse.core.commits import ( |
| 360 | read_commit, |
| 361 | write_commit, |
| 362 | ) |
| 363 | from muse.core.snapshots import ( |
| 364 | read_snapshot, |
| 365 | write_snapshot, |
| 366 | ) |
| 367 | |
| 368 | repo = _make_repo(tmp_path) |
| 369 | good_snap = _good_snapshot({"src/main.py": fake_id("main-blob")}) |
| 370 | good_commit = _good_commit(snapshot_id=good_snap.snapshot_id) |
| 371 | |
| 372 | write_snapshot(repo, good_snap) |
| 373 | write_commit(repo, good_commit) |
| 374 | |
| 375 | commit = read_commit(repo, good_commit.commit_id) |
| 376 | assert commit is not None |
| 377 | snap = read_snapshot(repo, commit.snapshot_id) |
| 378 | assert snap is not None |
| 379 | |
| 380 | def test_parent_chain_corrupt_middle_unreadable(self, tmp_path: pathlib.Path) -> None: |
| 381 | """AβBβC chain: corrupt B's object β B unreadable; A and C still readable.""" |
| 382 | from muse.core.commits import ( |
| 383 | read_commit, |
| 384 | write_commit, |
| 385 | ) |
| 386 | |
| 387 | repo = _make_repo(tmp_path) |
| 388 | commit_a = _good_commit(message="commit A", ts=_ts(2022)) |
| 389 | commit_b = _good_commit(message="commit B", parent_commit_id=commit_a.commit_id, ts=_ts(2023)) |
| 390 | commit_c = _good_commit(message="commit C", parent_commit_id=commit_b.commit_id, ts=_ts(2024)) |
| 391 | |
| 392 | write_commit(repo, commit_a) |
| 393 | write_commit(repo, commit_b) |
| 394 | write_commit(repo, commit_c) |
| 395 | |
| 396 | # Corrupt B by overwriting its object with a bad payload |
| 397 | _write_corrupt_commit(repo, commit_b, {"snapshot_id": fake_id("wrong-snap")}) |
| 398 | |
| 399 | assert read_commit(repo, commit_a.commit_id) is not None, "A must be readable" |
| 400 | assert read_commit(repo, commit_b.commit_id) is None, "B must be unreadable after corruption" |
| 401 | assert read_commit(repo, commit_c.commit_id) is not None, "C must be readable" |
| 402 | |
| 403 | def test_corrupting_one_snapshot_does_not_affect_siblings(self, tmp_path: pathlib.Path) -> None: |
| 404 | """Corrupting one snapshot leaves sibling snapshots readable.""" |
| 405 | from muse.core.snapshots import ( |
| 406 | read_snapshot, |
| 407 | write_snapshot, |
| 408 | ) |
| 409 | |
| 410 | repo = _make_repo(tmp_path) |
| 411 | snap_a = _good_snapshot({"a.py": fake_id("a-blob")}) |
| 412 | snap_b = _good_snapshot({"b.py": fake_id("b-blob")}) |
| 413 | snap_c = _good_snapshot({"c.py": fake_id("c-blob")}) |
| 414 | |
| 415 | write_snapshot(repo, snap_a) |
| 416 | write_snapshot(repo, snap_b) |
| 417 | write_snapshot(repo, snap_c) |
| 418 | |
| 419 | _write_corrupt_snapshot(repo, snap_b, {"b.py": fake_id("wrong-blob")}) |
| 420 | |
| 421 | assert read_snapshot(repo, snap_a.snapshot_id) is not None, "snap_a must be readable" |
| 422 | assert read_snapshot(repo, snap_b.snapshot_id) is None, "snap_b must be unreadable after corruption" |
| 423 | assert read_snapshot(repo, snap_c.snapshot_id) is not None, "snap_c must be readable" |
| 424 | |
| 425 | |
| 426 | # ============================================================================= |
| 427 | # 4. SECURITY β corrupt fields cannot forge content |
| 428 | # ============================================================================= |
| 429 | |
| 430 | |
| 431 | class TestSecurityCorruptFields: |
| 432 | |
| 433 | def test_corrupt_snapshot_id_in_commit_rejected_at_read(self, tmp_path: pathlib.Path) -> None: |
| 434 | """ |
| 435 | An attacker who corrupts a commit's snapshot_id cannot make Muse read |
| 436 | different content β the hash mismatch is detected by read_commit. |
| 437 | """ |
| 438 | from muse.core.commits import ( |
| 439 | read_commit, |
| 440 | write_commit, |
| 441 | ) |
| 442 | |
| 443 | repo = _make_repo(tmp_path) |
| 444 | good = _good_commit() |
| 445 | attacker_snapshot = fake_id("attacker-snapshot") |
| 446 | |
| 447 | _write_corrupt_commit(repo, good, {"snapshot_id": attacker_snapshot}) |
| 448 | |
| 449 | write_commit(repo, good) # skips β object exists |
| 450 | result = read_commit(repo, good.commit_id) |
| 451 | assert result is None, ( |
| 452 | "Corrupt commit with attacker's snapshot_id must be rejected at read " |
| 453 | "time β hash verification must detect the field substitution." |
| 454 | ) |
| 455 | |
| 456 | def test_injected_manifest_entry_rejected_at_read(self, tmp_path: pathlib.Path) -> None: |
| 457 | """ |
| 458 | An injected file in the manifest (hash mismatch) is rejected at read time. |
| 459 | """ |
| 460 | from muse.core.snapshots import ( |
| 461 | read_snapshot, |
| 462 | write_snapshot, |
| 463 | ) |
| 464 | |
| 465 | repo = _make_repo(tmp_path) |
| 466 | blob = fake_id("main-blob") |
| 467 | good = _good_snapshot({"src/main.py": blob}) |
| 468 | _write_corrupt_snapshot(repo, good, { |
| 469 | "src/main.py": blob, |
| 470 | "malicious_backdoor.py": fake_id("backdoor-blob"), |
| 471 | }) |
| 472 | |
| 473 | write_snapshot(repo, good) # skips |
| 474 | result = read_snapshot(repo, good.snapshot_id) |
| 475 | assert result is None, ( |
| 476 | "Snapshot with injected manifest entry must be rejected at read time." |
| 477 | ) |
| 478 | |
| 479 | |
| 480 | # ============================================================================= |
| 481 | # 5. STRESS β concurrent writes are idempotent |
| 482 | # ============================================================================= |
| 483 | |
| 484 | |
| 485 | class TestStressConcurrentWrite: |
| 486 | |
| 487 | def test_concurrent_write_commit_all_idempotent(self, tmp_path: pathlib.Path) -> None: |
| 488 | """20 concurrent write_commit calls on a good commit are all idempotent.""" |
| 489 | from muse.core.commits import ( |
| 490 | read_commit, |
| 491 | write_commit, |
| 492 | ) |
| 493 | |
| 494 | repo = _make_repo(tmp_path) |
| 495 | good = _good_commit() |
| 496 | |
| 497 | def worker() -> None: |
| 498 | write_commit(repo, good) |
| 499 | |
| 500 | threads = [threading.Thread(target=worker) for _ in range(20)] |
| 501 | for t in threads: |
| 502 | t.start() |
| 503 | for t in threads: |
| 504 | t.join() |
| 505 | |
| 506 | result = read_commit(repo, good.commit_id) |
| 507 | assert result is not None, "After 20 concurrent write_commit calls, commit must be readable" |
| 508 | assert result.snapshot_id == good.snapshot_id |
| 509 | |
| 510 | def test_concurrent_write_snapshot_all_idempotent(self, tmp_path: pathlib.Path) -> None: |
| 511 | """20 concurrent write_snapshot calls on a good snapshot are all idempotent.""" |
| 512 | from muse.core.snapshots import ( |
| 513 | read_snapshot, |
| 514 | write_snapshot, |
| 515 | ) |
| 516 | |
| 517 | repo = _make_repo(tmp_path) |
| 518 | good = _good_snapshot() |
| 519 | |
| 520 | threads = [threading.Thread(target=lambda: write_snapshot(repo, good)) for _ in range(20)] |
| 521 | for t in threads: |
| 522 | t.start() |
| 523 | for t in threads: |
| 524 | t.join() |
| 525 | |
| 526 | result = read_snapshot(repo, good.snapshot_id) |
| 527 | assert result is not None, "After 20 concurrent write_snapshot calls, snapshot must be readable" |
| 528 | |
| 529 | def test_50_sequential_commits_all_readable(self, tmp_path: pathlib.Path) -> None: |
| 530 | """50 different commits all written and readable.""" |
| 531 | from muse.core.commits import ( |
| 532 | read_commit, |
| 533 | write_commit, |
| 534 | ) |
| 535 | |
| 536 | for i in range(50): |
| 537 | repo = _make_repo(tmp_path / str(i)) |
| 538 | good = _good_commit(message=f"commit {i}", ts=f"202{i % 10}-01-01T00:00:00+00:00") |
| 539 | write_commit(repo, good) |
| 540 | result = read_commit(repo, good.commit_id) |
| 541 | assert result is not None, f"commit {i} not readable" |
| 542 | |
| 543 | |
| 544 | # ============================================================================= |
| 545 | # 6. REGRESSION β normal write paths still work |
| 546 | # ============================================================================= |
| 547 | |
| 548 | |
| 549 | class TestRegression: |
| 550 | |
| 551 | def test_write_commit_new_file_works(self, tmp_path: pathlib.Path) -> None: |
| 552 | from muse.core.commits import ( |
| 553 | read_commit, |
| 554 | write_commit, |
| 555 | ) |
| 556 | |
| 557 | repo = _make_repo(tmp_path) |
| 558 | good = _good_commit() |
| 559 | write_commit(repo, good) |
| 560 | assert read_commit(repo, good.commit_id) is not None |
| 561 | |
| 562 | def test_write_snapshot_new_file_works(self, tmp_path: pathlib.Path) -> None: |
| 563 | from muse.core.snapshots import ( |
| 564 | read_snapshot, |
| 565 | write_snapshot, |
| 566 | ) |
| 567 | |
| 568 | repo = _make_repo(tmp_path) |
| 569 | good = _good_snapshot() |
| 570 | write_snapshot(repo, good) |
| 571 | assert read_snapshot(repo, good.snapshot_id) is not None |
| 572 | |
| 573 | def test_write_commit_idempotent_on_clean_file(self, tmp_path: pathlib.Path) -> None: |
| 574 | from muse.core.commits import ( |
| 575 | read_commit, |
| 576 | write_commit, |
| 577 | ) |
| 578 | |
| 579 | repo = _make_repo(tmp_path) |
| 580 | good = _good_commit() |
| 581 | write_commit(repo, good) |
| 582 | for _ in range(10): |
| 583 | write_commit(repo, good) |
| 584 | assert read_commit(repo, good.commit_id) is not None |
| 585 | |
| 586 | def test_write_snapshot_idempotent_on_clean_file(self, tmp_path: pathlib.Path) -> None: |
| 587 | from muse.core.snapshots import ( |
| 588 | read_snapshot, |
| 589 | write_snapshot, |
| 590 | ) |
| 591 | |
| 592 | repo = _make_repo(tmp_path) |
| 593 | good = _good_snapshot() |
| 594 | write_snapshot(repo, good) |
| 595 | for _ in range(10): |
| 596 | write_snapshot(repo, good) |
| 597 | assert read_snapshot(repo, good.snapshot_id) is not None |