test_integrity_I5_commit_integrity.py
file-level
1
files
1
commits
0
hotspots
0
🧊 dead
0
💥 blast risk
| 1 | """Phase 1.5 — Commit record integrity on re-read. |
| 2 | |
| 3 | Tests cover: |
| 4 | - write_commit idempotency: silent drop of duplicate ID |
| 5 | - write_commit collision detection: existing file is corrupt → CRITICAL + overwrite |
| 6 | - write_commit integrity violation: existing record has mismatched commit_id |
| 7 | - read_commit: WARNING→CRITICAL upgrade for corrupt files |
| 8 | - read_commit_result: discriminated union (ok / not_found / corrupt) |
| 9 | - read_snapshot / read_snapshot_result: same guarantees |
| 10 | - get_all_commits / get_all_tags: CRITICAL on corrupt (previously silent) |
| 11 | - list_releases: CRITICAL on corrupt (previously silent) |
| 12 | - verify-pack integration after write_commit |
| 13 | - Concurrent write with same ID: first writer always wins (idempotency at scale) |
| 14 | - Regression: corrupt file must log CRITICAL (level 50), never WARNING (level 30) |
| 15 | """ |
| 16 | |
| 17 | from __future__ import annotations |
| 18 | |
| 19 | import datetime |
| 20 | import json |
| 21 | import logging |
| 22 | import pathlib |
| 23 | import threading |
| 24 | |
| 25 | import pytest |
| 26 | |
| 27 | from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id |
| 28 | |
| 29 | from muse.core.types import Manifest, fake_id, long_id |
| 30 | from muse.core.paths import muse_dir |
| 31 | |
| 32 | _REPO_ID = fake_id("repo") |
| 33 | from muse.core.object_store import object_path as _obj_path |
| 34 | from muse.core.semver import SemVerTag |
| 35 | from muse.core.commits import ( |
| 36 | CommitReadCorrupt, |
| 37 | CommitReadNotFound, |
| 38 | CommitReadOk, |
| 39 | CommitRecord, |
| 40 | commit_read_is_corrupt, |
| 41 | commit_read_is_not_found, |
| 42 | commit_read_is_ok, |
| 43 | get_all_commits, |
| 44 | read_commit, |
| 45 | read_commit_result, |
| 46 | write_commit, |
| 47 | ) |
| 48 | from muse.core.snapshots import ( |
| 49 | SnapshotReadCorrupt, |
| 50 | SnapshotReadNotFound, |
| 51 | SnapshotReadOk, |
| 52 | SnapshotRecord, |
| 53 | read_snapshot, |
| 54 | read_snapshot_result, |
| 55 | snapshot_read_is_corrupt, |
| 56 | snapshot_read_is_ok, |
| 57 | write_snapshot, |
| 58 | ) |
| 59 | from muse.core.tags import ( |
| 60 | TagRecord, |
| 61 | get_all_tags, |
| 62 | tag_path, |
| 63 | write_tag, |
| 64 | ) |
| 65 | from muse.core.releases import ( |
| 66 | ReleaseRecord, |
| 67 | list_releases, |
| 68 | release_path as _release_path, |
| 69 | write_release, |
| 70 | ) |
| 71 | |
| 72 | # --------------------------------------------------------------------------- |
| 73 | # Helpers |
| 74 | # --------------------------------------------------------------------------- |
| 75 | |
| 76 | def _make_commit( |
| 77 | root: pathlib.Path, |
| 78 | message: str = "msg", |
| 79 | branch: str = "main", |
| 80 | parent: str | None = None, |
| 81 | write: bool = True, |
| 82 | ) -> CommitRecord: |
| 83 | """Create a CommitRecord with a content-addressed commit_id. |
| 84 | |
| 85 | Uses ``compute_commit_id`` so every record passes ``_verify_commit_id`` |
| 86 | on read-back. ``write=False`` builds the record without persisting it — |
| 87 | useful for testing concurrent or idempotent write scenarios. |
| 88 | """ |
| 89 | committed_at = datetime.datetime(2026, 3, 1, tzinfo=datetime.timezone.utc) |
| 90 | snap_id = compute_snapshot_id({}) |
| 91 | parent_ids = [parent] if parent else [] |
| 92 | cid = compute_commit_id( |
| 93 | parent_ids=parent_ids, |
| 94 | snapshot_id=snap_id, |
| 95 | message=message, |
| 96 | committed_at_iso=committed_at.isoformat(), |
| 97 | author="tester", |
| 98 | ) |
| 99 | c = CommitRecord( |
| 100 | commit_id=cid, |
| 101 | branch=branch, |
| 102 | snapshot_id=snap_id, |
| 103 | message=message, |
| 104 | committed_at=committed_at, |
| 105 | author="tester", |
| 106 | parent_commit_id=parent, |
| 107 | parent2_commit_id=None, |
| 108 | ) |
| 109 | if write: |
| 110 | write_commit(root, c) |
| 111 | return c |
| 112 | |
| 113 | |
| 114 | def _make_snapshot( |
| 115 | root: pathlib.Path, manifest: Manifest | None = None |
| 116 | ) -> SnapshotRecord: |
| 117 | """Create a SnapshotRecord with a content-addressed snapshot_id. |
| 118 | |
| 119 | Pass distinct ``manifest`` dicts to get distinct snapshot_ids — e.g. |
| 120 | ``{"file-A.py": "a" * 64}`` vs ``{"file-B.py": "b" * 64}``. |
| 121 | """ |
| 122 | m = manifest or {} |
| 123 | sid = compute_snapshot_id(m) |
| 124 | s = SnapshotRecord( |
| 125 | snapshot_id=sid, |
| 126 | manifest=m, |
| 127 | created_at=datetime.datetime(2026, 3, 1, tzinfo=datetime.timezone.utc), |
| 128 | ) |
| 129 | write_snapshot(root, s) |
| 130 | return s |
| 131 | |
| 132 | |
| 133 | def _make_tag(root: pathlib.Path, tag_name: str) -> TagRecord: |
| 134 | t = TagRecord( |
| 135 | repo_id=_REPO_ID, |
| 136 | tag_id=fake_id(tag_name), |
| 137 | commit_id=fake_id("tag-commit"), |
| 138 | tag=tag_name, |
| 139 | created_at=datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc), |
| 140 | ) |
| 141 | write_tag(root, t) |
| 142 | return t |
| 143 | |
| 144 | |
| 145 | def _make_release(root: pathlib.Path, tag: str, semver: SemVerTag) -> ReleaseRecord: |
| 146 | r = ReleaseRecord( |
| 147 | repo_id=_REPO_ID, |
| 148 | release_id=fake_id(tag + "-release"), |
| 149 | tag=tag, |
| 150 | semver=semver, |
| 151 | channel="stable", |
| 152 | commit_id=fake_id("release-commit"), |
| 153 | snapshot_id=fake_id(tag), |
| 154 | title=tag, |
| 155 | body="", |
| 156 | changelog=[], |
| 157 | ) |
| 158 | write_release(root, r) |
| 159 | return r |
| 160 | |
| 161 | |
| 162 | |
| 163 | def _tag_path(root: pathlib.Path, tag_id: str) -> pathlib.Path: |
| 164 | return tag_path(root, _REPO_ID, tag_id) |
| 165 | |
| 166 | |
| 167 | # --------------------------------------------------------------------------- |
| 168 | # Fixtures |
| 169 | # --------------------------------------------------------------------------- |
| 170 | |
| 171 | @pytest.fixture() |
| 172 | def repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 173 | dot_muse = muse_dir(tmp_path) |
| 174 | (dot_muse / "commits").mkdir(parents=True) |
| 175 | (dot_muse / "snapshots").mkdir(parents=True) |
| 176 | (dot_muse / "refs" / "heads").mkdir(parents=True) |
| 177 | (dot_muse / "tags").mkdir(parents=True) |
| 178 | (dot_muse / "releases").mkdir(parents=True) |
| 179 | (dot_muse / "repo.json").write_text(json.dumps({"repo_id": _REPO_ID})) |
| 180 | (dot_muse / "HEAD").write_text("ref: refs/heads/main\n") |
| 181 | (dot_muse / "refs" / "heads" / "main").write_text("") |
| 182 | return tmp_path |
| 183 | |
| 184 | |
| 185 | # =========================================================================== |
| 186 | # 1. write_commit — idempotency |
| 187 | # =========================================================================== |
| 188 | |
| 189 | class TestWriteCommitIdempotency: |
| 190 | def test_first_writer_wins(self, repo: pathlib.Path) -> None: |
| 191 | """A record with wrong incoming hash is rejected before it can overwrite anything. |
| 192 | |
| 193 | The old "first writer wins via silent drop" path is superseded by incoming |
| 194 | hash verification: a record whose commit_id doesn't match its content hash |
| 195 | raises ValueError immediately — the good file on disk is never touched. |
| 196 | """ |
| 197 | c1 = _make_commit(repo, message="first-wins") |
| 198 | # Construct a record with the same commit_id but different content — |
| 199 | # the hash won't match, so write_commit must raise before touching disk. |
| 200 | c2 = CommitRecord( |
| 201 | commit_id=c1.commit_id, |
| 202 | branch="main", |
| 203 | snapshot_id=c1.snapshot_id, |
| 204 | message="second-attempt", |
| 205 | committed_at=c1.committed_at, |
| 206 | author="tester", |
| 207 | parent_commit_id=None, |
| 208 | parent2_commit_id=None, |
| 209 | ) |
| 210 | with pytest.raises(ValueError): |
| 211 | write_commit(repo, c2) |
| 212 | loaded = read_commit(repo, c1.commit_id) |
| 213 | assert loaded is not None |
| 214 | assert loaded.message == "first-wins", "bad incoming record must not overwrite good file" |
| 215 | |
| 216 | def test_exact_duplicate_emits_no_critical( |
| 217 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 218 | ) -> None: |
| 219 | """Writing the exact same record twice must not log CRITICAL.""" |
| 220 | c = _make_commit(repo, message="exact-dup-no-critical") |
| 221 | with caplog.at_level(logging.DEBUG, logger="muse.core.store"): |
| 222 | write_commit(repo, c) |
| 223 | assert not any(r.levelno >= logging.CRITICAL for r in caplog.records) |
| 224 | |
| 225 | def test_idempotent_round_trip_preserves_all_fields(self, repo: pathlib.Path) -> None: |
| 226 | c = _make_commit(repo, message="preserve-me", branch="feat/x") |
| 227 | write_commit(repo, c) # second write — must be completely harmless |
| 228 | loaded = read_commit(repo, c.commit_id) |
| 229 | assert loaded is not None |
| 230 | assert loaded.message == "preserve-me" |
| 231 | assert loaded.branch == "feat/x" |
| 232 | |
| 233 | |
| 234 | # =========================================================================== |
| 235 | # 2. write_commit — corrupt existing file → CRITICAL + overwrite |
| 236 | # =========================================================================== |
| 237 | |
| 238 | class TestWriteCommitCorruptExistingFile: |
| 239 | def test_corrupt_existing_is_skipped_by_write_commit( |
| 240 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 241 | ) -> None: |
| 242 | """write_commit is idempotent: if object_path exists, it is skipped. |
| 243 | |
| 244 | Corruption written to object_path after the initial write is NOT repaired |
| 245 | by a subsequent write_commit call — first writer wins. |
| 246 | read_commit detects the corruption and returns None. |
| 247 | """ |
| 248 | c = _make_commit(repo, message="original-overwrite") |
| 249 | # Simulate disk corruption after the initial write |
| 250 | _obj_path(repo, c.commit_id).write_bytes(b"\xff\xfe\x00bad-data\x99") |
| 251 | write_commit(repo, c) # idempotent — skips, file already exists |
| 252 | loaded = read_commit(repo, c.commit_id) |
| 253 | assert loaded is None # corruption detected at read time |
| 254 | |
| 255 | def test_empty_existing_is_skipped_by_write_commit( |
| 256 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 257 | ) -> None: |
| 258 | """Zero-byte commit file is not repaired by write_commit (idempotent).""" |
| 259 | c = _make_commit(repo, message="after-crash-overwrite") |
| 260 | _obj_path(repo, c.commit_id).write_bytes(b"") |
| 261 | write_commit(repo, c) # skips — file exists |
| 262 | loaded = read_commit(repo, c.commit_id) |
| 263 | assert loaded is None # empty file is corrupt |
| 264 | |
| 265 | def test_truncated_existing_is_skipped_by_write_commit( |
| 266 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 267 | ) -> None: |
| 268 | """Truncated commit file is not repaired by write_commit (idempotent).""" |
| 269 | c = _make_commit(repo, message="after-truncation-overwrite") |
| 270 | path = _obj_path(repo, c.commit_id) |
| 271 | good_bytes = path.read_bytes() |
| 272 | path.write_bytes(good_bytes[: len(good_bytes) // 2]) |
| 273 | write_commit(repo, c) # skips — file exists |
| 274 | loaded = read_commit(repo, c.commit_id) |
| 275 | assert loaded is None # truncated file is corrupt |
| 276 | |
| 277 | |
| 278 | # =========================================================================== |
| 279 | # 3. write_commit — store integrity violation |
| 280 | # =========================================================================== |
| 281 | |
| 282 | class TestWriteCommitIntegrityViolation: |
| 283 | def test_commit_id_mismatch_detected_at_read_time(self, repo: pathlib.Path) -> None: |
| 284 | """Impostor bytes at object_path are detected by read_commit, not write_commit. |
| 285 | |
| 286 | write_commit is idempotent: if object_path exists, it is skipped regardless |
| 287 | of content. Hash verification happens at read time — read_commit returns None |
| 288 | when the stored payload's commit_id doesn't match the recomputed hash. |
| 289 | """ |
| 290 | import json as _json_mod |
| 291 | c_legit = _make_commit(repo, message="legitimate-mismatch") |
| 292 | c_impostor = _make_commit(repo, message="impostor-mismatch") |
| 293 | # Overwrite c_legit's object_path with impostor's JSON payload |
| 294 | impostor_dict = c_impostor.to_dict() |
| 295 | payload = _json_mod.dumps(impostor_dict, separators=(",", ":")).encode() |
| 296 | _obj_path(repo, c_legit.commit_id).write_bytes( |
| 297 | f"commit {len(payload)}\0".encode() + payload |
| 298 | ) |
| 299 | # write_commit skips — file exists, no OSError raised |
| 300 | write_commit(repo, c_legit) |
| 301 | # read_commit detects hash mismatch → returns None |
| 302 | assert read_commit(repo, c_legit.commit_id) is None |
| 303 | |
| 304 | |
| 305 | # =========================================================================== |
| 306 | # 4. read_commit — CRITICAL log for corrupt |
| 307 | # =========================================================================== |
| 308 | |
| 309 | class TestReadCommitCriticalLogging: |
| 310 | def test_corrupt_file_logs_critical( |
| 311 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 312 | ) -> None: |
| 313 | c = _make_commit(repo, message="garbage-payload") |
| 314 | _obj_path(repo, c.commit_id).write_bytes(b"\x00\x01garbage\xff") |
| 315 | with caplog.at_level(logging.CRITICAL, logger="muse.core.store"): |
| 316 | result = read_commit(repo, c.commit_id) |
| 317 | assert result is None |
| 318 | crits = [r for r in caplog.records if r.levelno >= logging.CRITICAL] |
| 319 | assert crits, "Must log CRITICAL for corrupt commit file" |
| 320 | assert any("Corrupt" in r.message for r in crits) |
| 321 | |
| 322 | def test_missing_file_returns_none_no_log( |
| 323 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 324 | ) -> None: |
| 325 | with caplog.at_level(logging.DEBUG, logger="muse.core.store"): |
| 326 | result = read_commit(repo, fake_id("missing-commit")) |
| 327 | assert result is None |
| 328 | assert not any(r.levelno >= logging.WARNING for r in caplog.records) |
| 329 | |
| 330 | def test_valid_file_returns_record_no_critical( |
| 331 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 332 | ) -> None: |
| 333 | c = _make_commit(repo, message="clean-read") |
| 334 | with caplog.at_level(logging.CRITICAL, logger="muse.core.store"): |
| 335 | result = read_commit(repo, c.commit_id) |
| 336 | assert result is not None |
| 337 | assert result.message == "clean-read" |
| 338 | assert not any(r.levelno >= logging.CRITICAL for r in caplog.records) |
| 339 | |
| 340 | def test_corrupt_log_references_filename( |
| 341 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 342 | ) -> None: |
| 343 | c = _make_commit(repo, message="not-msgpack-content") |
| 344 | _obj_path(repo, c.commit_id).write_bytes(b"not-msgpack") |
| 345 | with caplog.at_level(logging.CRITICAL, logger="muse.core.store"): |
| 346 | read_commit(repo, c.commit_id) |
| 347 | messages = " ".join(r.message + str(r.args) for r in caplog.records) |
| 348 | bare = long_id(c.commit_id, strip=True) |
| 349 | assert bare[:8] in messages or bare in messages |
| 350 | |
| 351 | |
| 352 | # =========================================================================== |
| 353 | # 5. read_commit_result — discriminated union |
| 354 | # =========================================================================== |
| 355 | |
| 356 | class TestReadCommitResult: |
| 357 | def test_ok_status_on_valid_record(self, repo: pathlib.Path) -> None: |
| 358 | c = _make_commit(repo, message="typed-ok") |
| 359 | r = read_commit_result(repo, c.commit_id) |
| 360 | assert commit_read_is_ok(r) |
| 361 | assert isinstance(r["commit"], CommitRecord) |
| 362 | assert r["commit"].message == "typed-ok" |
| 363 | |
| 364 | def test_not_found_status_when_missing(self, repo: pathlib.Path) -> None: |
| 365 | r = read_commit_result(repo, fake_id("ff-missing-commit")) |
| 366 | assert commit_read_is_not_found(r) |
| 367 | |
| 368 | def test_corrupt_status_on_bad_bytes( |
| 369 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 370 | ) -> None: |
| 371 | c = _make_commit(repo, message="corrupt-bytes") |
| 372 | _obj_path(repo, c.commit_id).write_bytes(b"\xff\x00garbage") |
| 373 | with caplog.at_level(logging.CRITICAL, logger="muse.core.store"): |
| 374 | r = read_commit_result(repo, c.commit_id) |
| 375 | assert commit_read_is_corrupt(r) |
| 376 | assert r["path"] != "" |
| 377 | assert r["error"] != "" |
| 378 | crits = [rec for rec in caplog.records if rec.levelno >= logging.CRITICAL] |
| 379 | assert crits |
| 380 | |
| 381 | def test_corrupt_result_path_contains_commit_id(self, repo: pathlib.Path) -> None: |
| 382 | c = _make_commit(repo, message="path-in-corrupt") |
| 383 | _obj_path(repo, c.commit_id).write_bytes(b"") |
| 384 | r = read_commit_result(repo, c.commit_id) |
| 385 | assert commit_read_is_corrupt(r) |
| 386 | # object_path splits the 64-char hex at position 2 (dir prefix), so |
| 387 | # the full hex is never a contiguous substring. Check the first 2 |
| 388 | # chars (the dir component) and the following chars (the filename). |
| 389 | bare = long_id(c.commit_id, strip=True) |
| 390 | assert bare[:2] in r["path"] and bare[2:10] in r["path"] |
| 391 | |
| 392 | def test_ok_result_roundtrips_all_metadata(self, repo: pathlib.Path) -> None: |
| 393 | # Build with a real content-addressed ID so _verify_commit_id passes. |
| 394 | snap_id = fake_id("snap-meta-roundtrip") |
| 395 | committed_at = datetime.datetime(2026, 3, 15, tzinfo=datetime.timezone.utc) |
| 396 | cid = compute_commit_id( |
| 397 | parent_ids=[], |
| 398 | snapshot_id=snap_id, |
| 399 | message="full metadata", |
| 400 | committed_at_iso=committed_at.isoformat(), |
| 401 | author="alice", |
| 402 | ) |
| 403 | c = CommitRecord( |
| 404 | commit_id=cid, |
| 405 | branch="dev", |
| 406 | snapshot_id=snap_id, |
| 407 | message="full metadata", |
| 408 | committed_at=committed_at, |
| 409 | author="alice", |
| 410 | parent_commit_id=None, |
| 411 | parent2_commit_id=None, |
| 412 | metadata={"key": "val"}, |
| 413 | ) |
| 414 | write_commit(repo, c) |
| 415 | r = read_commit_result(repo, cid) |
| 416 | assert commit_read_is_ok(r) |
| 417 | assert r["commit"].branch == "dev" |
| 418 | assert r["commit"].author == "alice" |
| 419 | assert r["commit"].metadata == {"key": "val"} |
| 420 | |
| 421 | def test_status_field_is_string(self, repo: pathlib.Path) -> None: |
| 422 | """Status values are plain strings — easy for agents to pattern-match.""" |
| 423 | c = _make_commit(repo, message="status-str-check") |
| 424 | r = read_commit_result(repo, c.commit_id) |
| 425 | assert isinstance(r["status"], str) |
| 426 | |
| 427 | def test_not_found_has_only_status_key(self, repo: pathlib.Path) -> None: |
| 428 | r = read_commit_result(repo, fake_id("90-not-existing")) |
| 429 | assert set(r.keys()) == {"status"} |
| 430 | |
| 431 | def test_three_outcomes_are_mutually_exclusive(self, repo: pathlib.Path) -> None: |
| 432 | """Confirm all three outcome strings are distinct and unambiguous.""" |
| 433 | c_ok = _make_commit(repo, message="outcome-ok") |
| 434 | c_corrupt = _make_commit(repo, message="outcome-corrupt") |
| 435 | _obj_path(repo, c_corrupt.commit_id).write_bytes(b"bad") |
| 436 | statuses = { |
| 437 | read_commit_result(repo, c_ok.commit_id)["status"], |
| 438 | read_commit_result(repo, fake_id("cc-missing"))["status"], |
| 439 | read_commit_result(repo, c_corrupt.commit_id)["status"], |
| 440 | } |
| 441 | assert statuses == {"ok", "not_found", "corrupt"} |
| 442 | |
| 443 | |
| 444 | # =========================================================================== |
| 445 | # 6. read_snapshot / read_snapshot_result |
| 446 | # =========================================================================== |
| 447 | |
| 448 | class TestReadSnapshotIntegrity: |
| 449 | def test_corrupt_snapshot_logs_critical( |
| 450 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 451 | ) -> None: |
| 452 | s = _make_snapshot(repo, manifest={"snap-critical.py": fake_id("oid-a")}) |
| 453 | _obj_path(repo, s.snapshot_id).write_bytes(b"\xde\xad\xbe\xef") |
| 454 | with caplog.at_level(logging.CRITICAL, logger="muse.core.store"): |
| 455 | result = read_snapshot(repo, s.snapshot_id) |
| 456 | assert result is None |
| 457 | assert any(r.levelno >= logging.CRITICAL for r in caplog.records) |
| 458 | |
| 459 | def test_snapshot_result_ok(self, repo: pathlib.Path) -> None: |
| 460 | s = _make_snapshot(repo, manifest={"snap-ok.py": fake_id("oid-b")}) |
| 461 | r = read_snapshot_result(repo, s.snapshot_id) |
| 462 | assert snapshot_read_is_ok(r) |
| 463 | assert isinstance(r["snapshot"], SnapshotRecord) |
| 464 | |
| 465 | def test_snapshot_result_not_found(self, repo: pathlib.Path) -> None: |
| 466 | r = read_snapshot_result(repo, fake_id("no-snap")) |
| 467 | assert r["status"] == "not_found" |
| 468 | assert set(r.keys()) == {"status"} |
| 469 | |
| 470 | def test_snapshot_result_corrupt( |
| 471 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 472 | ) -> None: |
| 473 | s = _make_snapshot(repo, manifest={"snap-corrupt.py": fake_id("oid-c")}) |
| 474 | _obj_path(repo, s.snapshot_id).write_bytes(b"garbage-bytes\x00") |
| 475 | with caplog.at_level(logging.CRITICAL, logger="muse.core.store"): |
| 476 | r = read_snapshot_result(repo, s.snapshot_id) |
| 477 | assert snapshot_read_is_corrupt(r) |
| 478 | assert r["path"] != "" |
| 479 | assert r["error"] != "" |
| 480 | |
| 481 | def test_missing_snapshot_no_log( |
| 482 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 483 | ) -> None: |
| 484 | with caplog.at_level(logging.DEBUG, logger="muse.core.store"): |
| 485 | result = read_snapshot(repo, fake_id("missing")) |
| 486 | assert result is None |
| 487 | assert not any(r.levelno >= logging.WARNING for r in caplog.records) |
| 488 | |
| 489 | |
| 490 | # =========================================================================== |
| 491 | # 7. get_all_commits — CRITICAL on corrupt (previously silent) |
| 492 | # =========================================================================== |
| 493 | |
| 494 | class TestGetAllCommitsCorruptLogging: |
| 495 | def test_one_corrupt_skipped_with_critical( |
| 496 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 497 | ) -> None: |
| 498 | """Corrupt commit is skipped; good commits returned; CRITICAL emitted.""" |
| 499 | c_good = _make_commit(repo, message="good-survives") |
| 500 | c_bad = _make_commit(repo, message="will-corrupt") |
| 501 | _obj_path(repo, c_bad.commit_id).write_bytes(b"\xff\x00") |
| 502 | with caplog.at_level(logging.CRITICAL, logger="muse.core.store"): |
| 503 | commits = get_all_commits(repo) |
| 504 | ids = {c.commit_id for c in commits} |
| 505 | assert c_good.commit_id in ids, "good commit must still appear" |
| 506 | assert c_bad.commit_id not in ids, "corrupt commit must be excluded" |
| 507 | assert any(r.levelno >= logging.CRITICAL for r in caplog.records) |
| 508 | |
| 509 | def test_all_corrupt_returns_empty_with_critical( |
| 510 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 511 | ) -> None: |
| 512 | written = [_make_commit(repo, message=f"c{i}") for i in range(3)] |
| 513 | for c in written: |
| 514 | _obj_path(repo, c.commit_id).write_bytes(b"bad") |
| 515 | with caplog.at_level(logging.CRITICAL, logger="muse.core.store"): |
| 516 | commits = get_all_commits(repo) |
| 517 | assert commits == [] |
| 518 | crits = [r for r in caplog.records if r.levelno >= logging.CRITICAL] |
| 519 | assert len(crits) == 3 |
| 520 | |
| 521 | def test_empty_store_returns_empty_no_log( |
| 522 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 523 | ) -> None: |
| 524 | with caplog.at_level(logging.DEBUG, logger="muse.core.store"): |
| 525 | commits = get_all_commits(repo) |
| 526 | assert commits == [] |
| 527 | assert not any(r.levelno >= logging.WARNING for r in caplog.records) |
| 528 | |
| 529 | def test_mixed_good_and_corrupt_correct_count(self, repo: pathlib.Path) -> None: |
| 530 | good = [_make_commit(repo, message=f"g{i}") for i in range(5)] |
| 531 | bad = [_make_commit(repo, message=f"b{i}") for i in range(3)] |
| 532 | for c in bad: |
| 533 | _obj_path(repo, c.commit_id).write_bytes(b"corrupt") |
| 534 | commits = get_all_commits(repo) |
| 535 | assert len(commits) == len(good) |
| 536 | |
| 537 | |
| 538 | # =========================================================================== |
| 539 | # 8. get_all_tags — CRITICAL on corrupt (previously silent) |
| 540 | # =========================================================================== |
| 541 | |
| 542 | class TestGetAllTagsCorruptLogging: |
| 543 | def test_corrupt_tag_skipped_with_critical( |
| 544 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 545 | ) -> None: |
| 546 | t1 = _make_tag(repo, "v1.0.0") |
| 547 | t2 = _make_tag(repo, "v2.0.0") |
| 548 | _tag_path(repo, t2.tag_id).write_bytes(b"\x00bad") |
| 549 | with caplog.at_level(logging.CRITICAL, logger="muse.core.store"): |
| 550 | tags = get_all_tags(repo, _REPO_ID) |
| 551 | tag_values = {t.tag for t in tags} |
| 552 | assert "v1.0.0" in tag_values |
| 553 | assert "v2.0.0" not in tag_values |
| 554 | assert any(r.levelno >= logging.CRITICAL for r in caplog.records) |
| 555 | |
| 556 | def test_good_tags_all_returned(self, repo: pathlib.Path) -> None: |
| 557 | _make_tag(repo, "v0.1") |
| 558 | _make_tag(repo, "v0.2") |
| 559 | tags = get_all_tags(repo, _REPO_ID) |
| 560 | assert len(tags) == 2 |
| 561 | |
| 562 | def test_all_corrupt_tags_returns_empty_with_critical( |
| 563 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 564 | ) -> None: |
| 565 | for name in ("v1", "v2", "v3"): |
| 566 | t = _make_tag(repo, name) |
| 567 | _tag_path(repo, t.tag_id).write_bytes(b"bad") |
| 568 | with caplog.at_level(logging.CRITICAL, logger="muse.core.store"): |
| 569 | tags = get_all_tags(repo, _REPO_ID) |
| 570 | assert tags == [] |
| 571 | crits = [r for r in caplog.records if r.levelno >= logging.CRITICAL] |
| 572 | assert len(crits) == 3 |
| 573 | |
| 574 | |
| 575 | # =========================================================================== |
| 576 | # 9. list_releases — CRITICAL on corrupt (previously silent) |
| 577 | # =========================================================================== |
| 578 | |
| 579 | class TestListReleasesCorruptLogging: |
| 580 | def test_corrupt_release_skipped_with_critical( |
| 581 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 582 | ) -> None: |
| 583 | good = _make_release( |
| 584 | repo, "v1.0.0", SemVerTag(major=1, minor=0, patch=0, pre="", build="") |
| 585 | ) |
| 586 | bad = _make_release( |
| 587 | repo, "v2.0.0", SemVerTag(major=2, minor=0, patch=0, pre="", build="") |
| 588 | ) |
| 589 | _release_path(repo, _REPO_ID, bad.release_id).write_bytes(b"\xff\x00garbage") |
| 590 | with caplog.at_level(logging.CRITICAL, logger="muse.core.store"): |
| 591 | releases = list_releases(repo, _REPO_ID) |
| 592 | ids = {r.release_id for r in releases} |
| 593 | assert good.release_id in ids |
| 594 | assert bad.release_id not in ids |
| 595 | assert any(r.levelno >= logging.CRITICAL for r in caplog.records) |
| 596 | |
| 597 | def test_all_releases_good_returns_all(self, repo: pathlib.Path) -> None: |
| 598 | _make_release(repo, "v1.0.0", SemVerTag(major=1, minor=0, patch=0, pre="", build="")) |
| 599 | _make_release(repo, "v1.1.0", SemVerTag(major=1, minor=1, patch=0, pre="", build="")) |
| 600 | releases = list_releases(repo, _REPO_ID) |
| 601 | assert len(releases) == 2 |
| 602 | |
| 603 | |
| 604 | # =========================================================================== |
| 605 | # 10. verify-pack integration after write_commit |
| 606 | # =========================================================================== |
| 607 | |
| 608 | class TestVerifyPackAfterWriteCommit: |
| 609 | def test_cmd_read_commit_roundtrip(self, repo: pathlib.Path) -> None: |
| 610 | """``muse read-commit`` must succeed for every written commit.""" |
| 611 | from tests.cli_test_helper import CliRunner |
| 612 | |
| 613 | c = _make_commit(repo, message="plumbing-check") |
| 614 | |
| 615 | runner = CliRunner() |
| 616 | result = runner.invoke( |
| 617 | None, |
| 618 | ["read-commit", c.commit_id, "--json"], |
| 619 | env={"MUSE_REPO_ROOT": str(repo)}, |
| 620 | ) |
| 621 | assert result.exit_code == 0 |
| 622 | import json as _json |
| 623 | data = _json.loads(result.output) |
| 624 | assert data["commit_id"] == c.commit_id |
| 625 | assert data["message"] == "plumbing-check" |
| 626 | |
| 627 | |
| 628 | # =========================================================================== |
| 629 | # 11. Concurrent idempotency — 50 threads race to write the same commit |
| 630 | # =========================================================================== |
| 631 | |
| 632 | class TestConcurrentIdempotentWrite: |
| 633 | def test_50_threads_same_commit_id_first_wins(self, repo: pathlib.Path) -> None: |
| 634 | """50 threads writing the EXACT same commit — idempotent, exactly one file written.""" |
| 635 | # In a content-addressed system, identical content → identical commit_id. |
| 636 | c = _make_commit(repo, message="concurrent-idempotent", write=False) |
| 637 | errors: list[Exception] = [] |
| 638 | |
| 639 | def write_one() -> None: |
| 640 | try: |
| 641 | write_commit(repo, c) |
| 642 | except Exception as exc: |
| 643 | errors.append(exc) |
| 644 | |
| 645 | threads = [threading.Thread(target=write_one) for _ in range(50)] |
| 646 | for t in threads: |
| 647 | t.start() |
| 648 | for t in threads: |
| 649 | t.join() |
| 650 | |
| 651 | assert not errors, f"Unexpected errors in same-ID concurrent writes: {errors[:3]}" |
| 652 | |
| 653 | loaded = read_commit(repo, c.commit_id) |
| 654 | assert loaded is not None |
| 655 | assert loaded.commit_id == c.commit_id |
| 656 | assert loaded.message == "concurrent-idempotent" |
| 657 | |
| 658 | def test_50_threads_distinct_ids_all_survive(self, repo: pathlib.Path) -> None: |
| 659 | """50 threads writing distinct commit IDs must all persist without errors.""" |
| 660 | errors: list[Exception] = [] |
| 661 | |
| 662 | def write_unique(i: int) -> None: |
| 663 | # _make_commit uses compute_commit_id so the hash always matches content. |
| 664 | c = _make_commit(repo, message=f"unique {i}", write=False) |
| 665 | try: |
| 666 | write_commit(repo, c) |
| 667 | except Exception as exc: |
| 668 | errors.append(exc) |
| 669 | |
| 670 | threads = [threading.Thread(target=write_unique, args=(i,)) for i in range(50)] |
| 671 | for t in threads: |
| 672 | t.start() |
| 673 | for t in threads: |
| 674 | t.join() |
| 675 | |
| 676 | assert not errors, f"Unexpected errors in distinct-ID concurrent writes: {errors[:3]}" |
| 677 | commits = get_all_commits(repo) |
| 678 | assert len(commits) == 50 |
| 679 | |
| 680 | |
| 681 | # =========================================================================== |
| 682 | # 12. Regression: WARNING→CRITICAL upgrade is permanent |
| 683 | # =========================================================================== |
| 684 | |
| 685 | class TestRegressionCorruptLevelUpgrade: |
| 686 | """Confirm the upgrade from WARNING to CRITICAL is permanent and precise.""" |
| 687 | |
| 688 | def test_corrupt_commit_logs_at_critical_not_warning( |
| 689 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 690 | ) -> None: |
| 691 | c = _make_commit(repo, message="level-upgrade-commit") |
| 692 | _obj_path(repo, c.commit_id).write_bytes(b"trash") |
| 693 | with caplog.at_level(logging.DEBUG, logger="muse.core.store"): |
| 694 | read_commit(repo, c.commit_id) |
| 695 | levels = [r.levelno for r in caplog.records] |
| 696 | assert any(lvl == logging.CRITICAL for lvl in levels), ( |
| 697 | f"Expected CRITICAL (50) but got levels: {levels}" |
| 698 | ) |
| 699 | assert not any(lvl == logging.WARNING for lvl in levels), ( |
| 700 | "Must not downgrade corruption to WARNING — only CRITICAL is acceptable" |
| 701 | ) |
| 702 | |
| 703 | def test_corrupt_snapshot_logs_at_critical_not_warning( |
| 704 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 705 | ) -> None: |
| 706 | s = _make_snapshot(repo, manifest={"snap-level.py": fake_id("oid-d")}) |
| 707 | _obj_path(repo, s.snapshot_id).write_bytes(b"bad") |
| 708 | with caplog.at_level(logging.DEBUG, logger="muse.core.store"): |
| 709 | read_snapshot(repo, s.snapshot_id) |
| 710 | levels = [r.levelno for r in caplog.records] |
| 711 | assert any(lvl == logging.CRITICAL for lvl in levels) |
| 712 | assert not any(lvl == logging.WARNING for lvl in levels) |
| 713 | |
| 714 | def test_get_all_commits_logs_corrupt_at_critical( |
| 715 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 716 | ) -> None: |
| 717 | c = _make_commit(repo, message="level-upgrade-get-all") |
| 718 | _obj_path(repo, c.commit_id).write_bytes(b"trash") |
| 719 | with caplog.at_level(logging.DEBUG, logger="muse.core.store"): |
| 720 | get_all_commits(repo) |
| 721 | levels = [r.levelno for r in caplog.records] |
| 722 | assert any(lvl == logging.CRITICAL for lvl in levels) |
| 723 | assert not any(lvl == logging.WARNING for lvl in levels) |
| 724 | |
| 725 | def test_get_all_tags_logs_corrupt_at_critical( |
| 726 | self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture |
| 727 | ) -> None: |
| 728 | t = _make_tag(repo, "v-crit") |
| 729 | _tag_path(repo, t.tag_id).write_bytes(b"trash") |
| 730 | with caplog.at_level(logging.DEBUG, logger="muse.core.store"): |
| 731 | get_all_tags(repo, _REPO_ID) |
| 732 | levels = [r.levelno for r in caplog.records] |
| 733 | assert any(lvl == logging.CRITICAL for lvl in levels) |
| 734 | assert not any(lvl == logging.WARNING for lvl in levels) |