test_stress_store_provenance.py
python
sha256:d11a87833d5fad6059b7662844bf5448a8911a17cce7a51811f71ad394f248eb
bump to v0.2.0rc13
Human
patch
6 days ago
| 1 | """Stress tests for CommitRecord, SnapshotRecord, TagRecord, and provenance fields. |
| 2 | |
| 3 | Covers: |
| 4 | - CommitRecord round-trip through to_dict/from_dict for all format versions. |
| 5 | - format_version evolution: missing fields default correctly when reading old records. |
| 6 | - reviewed_by (ORSet semantics): list preserved, sorted, deduplicated via overwrite_commit. |
| 7 | - test_runs (GCounter semantics): monotonically increases via overwrite_commit. |
| 8 | - agent_id / model_id / toolchain_id / prompt_hash / signature fields. |
| 9 | - SnapshotRecord round-trip with large manifests. |
| 10 | - TagRecord round-trip. |
| 11 | - get_head_commit_id on empty branch returns None. |
| 12 | - write_commit is idempotent (won't overwrite). |
| 13 | - overwrite_commit updates the persisted record correctly. |
| 14 | - read_commit for absent commit returns None. |
| 15 | - list_commits and list_branches. |
| 16 | - list_tags returns all tags. |
| 17 | """ |
| 18 | |
| 19 | import datetime |
| 20 | import pathlib |
| 21 | |
| 22 | import pytest |
| 23 | |
| 24 | from muse.core.types import fake_id, long_id |
| 25 | from muse.core.paths import ref_path, muse_dir |
| 26 | from muse.core.crdts.or_set import ORSet |
| 27 | from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id |
| 28 | from muse.domain import SemVerBump |
| 29 | from muse.core.refs import get_head_commit_id |
| 30 | from muse.core.commits import ( |
| 31 | CommitDict, |
| 32 | CommitRecord, |
| 33 | get_all_commits, |
| 34 | overwrite_commit, |
| 35 | read_commit, |
| 36 | write_commit, |
| 37 | ) |
| 38 | from muse.core.snapshots import ( |
| 39 | SnapshotRecord, |
| 40 | read_snapshot, |
| 41 | write_snapshot, |
| 42 | ) |
| 43 | from muse.core.tags import ( |
| 44 | TagRecord, |
| 45 | get_all_tags, |
| 46 | write_tag, |
| 47 | ) |
| 48 | |
| 49 | |
| 50 | # --------------------------------------------------------------------------- |
| 51 | # Fixtures |
| 52 | # --------------------------------------------------------------------------- |
| 53 | |
| 54 | |
| 55 | @pytest.fixture |
| 56 | def repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 57 | dot_muse = muse_dir(tmp_path) |
| 58 | (dot_muse / "commits").mkdir(parents=True) |
| 59 | (dot_muse / "snapshots").mkdir(parents=True) |
| 60 | (dot_muse / "tags").mkdir(parents=True) |
| 61 | (dot_muse / "refs" / "heads").mkdir(parents=True) |
| 62 | return tmp_path |
| 63 | |
| 64 | |
| 65 | def _now() -> datetime.datetime: |
| 66 | return datetime.datetime.now(datetime.timezone.utc) |
| 67 | |
| 68 | |
| 69 | _SNAP_ID: str = compute_snapshot_id({}) |
| 70 | |
| 71 | |
| 72 | def _commit( |
| 73 | label: str = "default", |
| 74 | branch: str = "main", |
| 75 | parent: str | None = None, |
| 76 | ) -> CommitRecord: |
| 77 | """Create a CommitRecord with a real content-addressed commit_id.""" |
| 78 | committed_at = _now() |
| 79 | cid = compute_commit_id( |
| 80 | parent_ids=[parent] if parent else [], |
| 81 | snapshot_id=_SNAP_ID, |
| 82 | message=f"commit {label}", |
| 83 | committed_at_iso=committed_at.isoformat(), |
| 84 | ) |
| 85 | return CommitRecord( |
| 86 | commit_id=cid, |
| 87 | branch=branch, |
| 88 | snapshot_id=_SNAP_ID, |
| 89 | message=f"commit {label}", |
| 90 | committed_at=committed_at, |
| 91 | parent_commit_id=parent, |
| 92 | ) |
| 93 | |
| 94 | |
| 95 | # =========================================================================== |
| 96 | # CommitRecord round-trip |
| 97 | # =========================================================================== |
| 98 | |
| 99 | |
| 100 | class TestCommitRecordRoundTrip: |
| 101 | def test_minimal_round_trip(self) -> None: |
| 102 | c = _commit() |
| 103 | restored = CommitRecord.from_dict(c.to_dict()) |
| 104 | assert restored.commit_id == c.commit_id |
| 105 | assert restored.branch == c.branch |
| 106 | assert restored.message == c.message |
| 107 | |
| 108 | def test_all_provenance_fields_preserved(self) -> None: |
| 109 | c = CommitRecord( |
| 110 | commit_id="prov123", |
| 111 | branch="main", |
| 112 | snapshot_id="snap", |
| 113 | message="provenance commit", |
| 114 | committed_at=_now(), |
| 115 | agent_id="claude-v4", |
| 116 | model_id="claude-3-5-sonnet", |
| 117 | toolchain_id="muse-cli-1.0", |
| 118 | prompt_hash="abc" * 10 + "ab", |
| 119 | signature=f"sig-{'x' * 60}", |
| 120 | signer_key_id="key-001", |
| 121 | ) |
| 122 | d = c.to_dict() |
| 123 | restored = CommitRecord.from_dict(d) |
| 124 | assert restored.agent_id == "claude-v4" |
| 125 | assert restored.model_id == "claude-3-5-sonnet" |
| 126 | assert restored.toolchain_id == "muse-cli-1.0" |
| 127 | assert restored.signature == c.signature |
| 128 | assert restored.signer_key_id == "key-001" |
| 129 | |
| 130 | def test_crdt_fields_preserved(self) -> None: |
| 131 | c = CommitRecord( |
| 132 | commit_id="crdt123", |
| 133 | branch="main", |
| 134 | snapshot_id="snap", |
| 135 | message="crdt", |
| 136 | committed_at=_now(), |
| 137 | reviewed_by=["alice", "bob", "charlie"], |
| 138 | test_runs=42, |
| 139 | ) |
| 140 | d = c.to_dict() |
| 141 | restored = CommitRecord.from_dict(d) |
| 142 | assert sorted(restored.reviewed_by) == ["alice", "bob", "charlie"] |
| 143 | assert restored.test_runs == 42 |
| 144 | |
| 145 | def test_sem_ver_bump_preserved(self) -> None: |
| 146 | bumps: tuple[SemVerBump, ...] = ("none", "patch", "minor", "major") |
| 147 | for bump in bumps: |
| 148 | c = CommitRecord( |
| 149 | commit_id="sv", |
| 150 | branch="main", |
| 151 | snapshot_id="s", |
| 152 | message="m", |
| 153 | committed_at=_now(), |
| 154 | sem_ver_bump=bump, |
| 155 | ) |
| 156 | assert CommitRecord.from_dict(c.to_dict()).sem_ver_bump == bump |
| 157 | |
| 158 | def test_breaking_changes_preserved(self) -> None: |
| 159 | c = CommitRecord( |
| 160 | commit_id="bc", |
| 161 | branch="main", |
| 162 | snapshot_id="s", |
| 163 | message="m", |
| 164 | committed_at=_now(), |
| 165 | breaking_changes=["removed `old_api`", "renamed `foo` → `bar`"], |
| 166 | ) |
| 167 | restored = CommitRecord.from_dict(c.to_dict()) |
| 168 | assert restored.breaking_changes == ["removed `old_api`", "renamed `foo` → `bar`"] |
| 169 | |
| 170 | def test_parent_ids_preserved(self) -> None: |
| 171 | c = CommitRecord( |
| 172 | commit_id="merge", |
| 173 | branch="main", |
| 174 | snapshot_id="s", |
| 175 | message="m", |
| 176 | committed_at=_now(), |
| 177 | parent_commit_id="parent-1", |
| 178 | parent2_commit_id="parent-2", |
| 179 | ) |
| 180 | restored = CommitRecord.from_dict(c.to_dict()) |
| 181 | assert restored.parent_commit_id == "parent-1" |
| 182 | assert restored.parent2_commit_id == "parent-2" |
| 183 | |
| 184 | def test_missing_crdt_fields_default_correctly(self) -> None: |
| 185 | """Simulates reading an older commit that lacks reviewed_by / test_runs.""" |
| 186 | minimal: CommitDict = { |
| 187 | "commit_id": "old", |
| 188 | "repo_id": "r", |
| 189 | "branch": "main", |
| 190 | "snapshot_id": "snap", |
| 191 | "message": "old commit", |
| 192 | "committed_at": _now().isoformat(), |
| 193 | } |
| 194 | restored = CommitRecord.from_dict(minimal) |
| 195 | assert restored.reviewed_by == [] |
| 196 | assert restored.test_runs == 0 |
| 197 | |
| 198 | def test_committed_at_timezone_aware(self) -> None: |
| 199 | c = _commit() |
| 200 | restored = CommitRecord.from_dict(c.to_dict()) |
| 201 | assert restored.committed_at.tzinfo is not None |
| 202 | |
| 203 | |
| 204 | # =========================================================================== |
| 205 | # CommitRecord persistence |
| 206 | # =========================================================================== |
| 207 | |
| 208 | |
| 209 | class TestCommitPersistence: |
| 210 | def test_write_and_read_back(self, repo: pathlib.Path) -> None: |
| 211 | c = _commit("id001") |
| 212 | write_commit(repo, c) |
| 213 | restored = read_commit(repo, c.commit_id) |
| 214 | assert restored is not None |
| 215 | assert restored.commit_id == c.commit_id |
| 216 | |
| 217 | def test_write_is_idempotent(self, repo: pathlib.Path) -> None: |
| 218 | c = _commit("id002") |
| 219 | write_commit(repo, c) |
| 220 | # Write the same commit again — the store must not corrupt it. |
| 221 | write_commit(repo, c) |
| 222 | restored = read_commit(repo, c.commit_id) |
| 223 | assert restored is not None |
| 224 | assert restored.message == c.message |
| 225 | |
| 226 | def test_read_absent_commit_returns_none(self, repo: pathlib.Path) -> None: |
| 227 | assert read_commit(repo, fake_id("does-not-exist")) is None |
| 228 | |
| 229 | def test_overwrite_commit_updates_reviewed_by(self, repo: pathlib.Path) -> None: |
| 230 | c = _commit("id003") |
| 231 | write_commit(repo, c) |
| 232 | # Simulate ORSet merge: add reviewer. |
| 233 | updated = read_commit(repo, c.commit_id) |
| 234 | assert updated is not None |
| 235 | updated.reviewed_by = ["agent-x", "human-bob"] |
| 236 | overwrite_commit(repo, updated) |
| 237 | restored = read_commit(repo, c.commit_id) |
| 238 | assert restored is not None |
| 239 | assert "agent-x" in restored.reviewed_by |
| 240 | assert "human-bob" in restored.reviewed_by |
| 241 | |
| 242 | def test_overwrite_commit_updates_test_runs(self, repo: pathlib.Path) -> None: |
| 243 | c = _commit("id004") |
| 244 | write_commit(repo, c) |
| 245 | for expected in range(1, 6): |
| 246 | rec = read_commit(repo, c.commit_id) |
| 247 | assert rec is not None |
| 248 | rec.test_runs += 1 |
| 249 | overwrite_commit(repo, rec) |
| 250 | after = read_commit(repo, c.commit_id) |
| 251 | assert after is not None |
| 252 | assert after.test_runs == expected |
| 253 | |
| 254 | def test_list_commits_returns_all_written(self, repo: pathlib.Path) -> None: |
| 255 | commits = [_commit(f"c{i:04d}") for i in range(20)] |
| 256 | for c in commits: |
| 257 | write_commit(repo, c) |
| 258 | found = {c.commit_id for c in get_all_commits(repo)} |
| 259 | for c in commits: |
| 260 | assert c.commit_id in found |
| 261 | |
| 262 | def test_many_commits_all_retrievable(self, repo: pathlib.Path) -> None: |
| 263 | real_ids: list[str] = [] |
| 264 | prev: str | None = None |
| 265 | base_ts = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) |
| 266 | for i in range(100): |
| 267 | committed_at = base_ts + datetime.timedelta(seconds=i) |
| 268 | cid = compute_commit_id( |
| 269 | parent_ids=[prev] if prev else [], |
| 270 | snapshot_id=_SNAP_ID, |
| 271 | message=f"stress-{i:04d}", |
| 272 | committed_at_iso=committed_at.isoformat(), |
| 273 | ) |
| 274 | write_commit(repo, CommitRecord( |
| 275 | commit_id=cid, |
| 276 | branch="main", |
| 277 | snapshot_id=_SNAP_ID, |
| 278 | message=f"stress-{i:04d}", |
| 279 | committed_at=committed_at, |
| 280 | parent_commit_id=prev, |
| 281 | )) |
| 282 | real_ids.append(cid) |
| 283 | prev = cid |
| 284 | for cid in real_ids: |
| 285 | assert read_commit(repo, cid) is not None |
| 286 | |
| 287 | |
| 288 | # =========================================================================== |
| 289 | # SnapshotRecord |
| 290 | # =========================================================================== |
| 291 | |
| 292 | |
| 293 | class TestSnapshotRecordRoundTrip: |
| 294 | def test_minimal_round_trip(self) -> None: |
| 295 | s = SnapshotRecord(snapshot_id="snap-1", manifest={"f.mid": "hash1"}) |
| 296 | restored = SnapshotRecord.from_dict(s.to_dict()) |
| 297 | assert restored.snapshot_id == "snap-1" |
| 298 | assert restored.manifest == {"f.mid": "hash1"} |
| 299 | |
| 300 | def test_large_manifest(self) -> None: |
| 301 | manifest = {f"track_{i:04d}.mid": f"hash-{i:064d}" for i in range(500)} |
| 302 | s = SnapshotRecord(snapshot_id="big-snap", manifest=manifest) |
| 303 | restored = SnapshotRecord.from_dict(s.to_dict()) |
| 304 | assert len(restored.manifest) == 500 |
| 305 | assert restored.manifest["track_0000.mid"] == f"hash-{0:064d}" |
| 306 | |
| 307 | def test_write_and_read_back(self, repo: pathlib.Path) -> None: |
| 308 | manifest = {"a.mid": fake_id("a.mid-content"), "b.mid": fake_id("b.mid-content")} |
| 309 | snap_id = compute_snapshot_id(manifest) |
| 310 | s = SnapshotRecord(snapshot_id=snap_id, manifest=manifest) |
| 311 | write_snapshot(repo, s) |
| 312 | restored = read_snapshot(repo, snap_id) |
| 313 | assert restored is not None |
| 314 | assert restored.manifest == manifest |
| 315 | |
| 316 | def test_empty_manifest_round_trip(self) -> None: |
| 317 | s = SnapshotRecord(snapshot_id="empty-snap", manifest={}) |
| 318 | restored = SnapshotRecord.from_dict(s.to_dict()) |
| 319 | assert restored.manifest == {} |
| 320 | |
| 321 | |
| 322 | # =========================================================================== |
| 323 | # TagRecord |
| 324 | # =========================================================================== |
| 325 | |
| 326 | |
| 327 | class TestTagRecord: |
| 328 | def test_round_trip(self) -> None: |
| 329 | t = TagRecord( |
| 330 | tag_id="tag-001", |
| 331 | repo_id="test-repo", |
| 332 | commit_id="abc123", |
| 333 | tag="v1.0.0", |
| 334 | ) |
| 335 | restored = TagRecord.from_dict(t.to_dict()) |
| 336 | assert restored.tag_id == "tag-001" |
| 337 | assert restored.tag == "v1.0.0" |
| 338 | assert restored.commit_id == "abc123" |
| 339 | |
| 340 | def test_write_and_list(self, repo: pathlib.Path) -> None: |
| 341 | for i in range(10): |
| 342 | write_tag(repo, TagRecord( |
| 343 | tag_id=fake_id(f"tag-{i:04d}"), |
| 344 | repo_id=fake_id("repo"), |
| 345 | commit_id=fake_id(f"commit-{i:04d}"), |
| 346 | tag=f"v{i}.0.0", |
| 347 | )) |
| 348 | tags = get_all_tags(repo, fake_id("repo")) |
| 349 | assert len(tags) == 10 |
| 350 | |
| 351 | def test_created_at_preserved(self) -> None: |
| 352 | ts = datetime.datetime(2025, 6, 15, 12, 0, 0, tzinfo=datetime.timezone.utc) |
| 353 | t = TagRecord(tag_id="t", repo_id="r", commit_id="c", tag="v1", created_at=ts) |
| 354 | restored = TagRecord.from_dict(t.to_dict()) |
| 355 | assert abs((restored.created_at - ts).total_seconds()) < 1.0 |
| 356 | |
| 357 | |
| 358 | # =========================================================================== |
| 359 | # get_head_commit_id |
| 360 | # =========================================================================== |
| 361 | |
| 362 | |
| 363 | class TestGetHeadCommitId: |
| 364 | def test_empty_branch_returns_none(self, repo: pathlib.Path) -> None: |
| 365 | assert get_head_commit_id(repo, "nonexistent-branch") is None |
| 366 | |
| 367 | def test_returns_id_after_writing_head_ref(self, repo: pathlib.Path) -> None: |
| 368 | cid = long_id("a" * 64) |
| 369 | ref_path(repo, "main").write_text(f"{cid}\n") |
| 370 | assert get_head_commit_id(repo, "main") == cid |
| 371 | |
| 372 | def test_strips_whitespace(self, repo: pathlib.Path) -> None: |
| 373 | cid = long_id("b" * 64) |
| 374 | ref_path(repo, "feature").write_text(f" {cid} \n") |
| 375 | assert get_head_commit_id(repo, "feature") == cid |
| 376 | |
| 377 | |
| 378 | # =========================================================================== |
| 379 | # CRDT semantics on CommitRecord fields |
| 380 | # =========================================================================== |
| 381 | |
| 382 | |
| 383 | class TestCRDTAnnotationSemantics: |
| 384 | def test_reviewed_by_orset_union_semantics(self, repo: pathlib.Path) -> None: |
| 385 | """ORSet union: multiple overwrite_commit calls accumulate reviewers.""" |
| 386 | c = _commit("crdt-or-001") |
| 387 | write_commit(repo, c) |
| 388 | |
| 389 | # Agent 1 adds their name. |
| 390 | rec = read_commit(repo, c.commit_id) |
| 391 | assert rec is not None |
| 392 | s, tok1 = ORSet().add("agent-alpha") |
| 393 | rec.reviewed_by = list(s.elements()) |
| 394 | overwrite_commit(repo, rec) |
| 395 | |
| 396 | # Agent 2 independently adds their name. |
| 397 | rec2 = read_commit(repo, c.commit_id) |
| 398 | assert rec2 is not None |
| 399 | s2 = ORSet() |
| 400 | for name in rec2.reviewed_by: |
| 401 | s2, _ = s2.add(name) |
| 402 | s2, tok2 = s2.add("agent-beta") |
| 403 | rec2.reviewed_by = sorted(s2.elements()) |
| 404 | overwrite_commit(repo, rec2) |
| 405 | |
| 406 | final = read_commit(repo, c.commit_id) |
| 407 | assert final is not None |
| 408 | assert "agent-alpha" in final.reviewed_by |
| 409 | assert "agent-beta" in final.reviewed_by |
| 410 | |
| 411 | def test_test_runs_gcounter_monotone(self, repo: pathlib.Path) -> None: |
| 412 | """GCounter: test_runs must never decrease.""" |
| 413 | c = _commit("crdt-gc-001") |
| 414 | write_commit(repo, c) |
| 415 | prev = 0 |
| 416 | for _ in range(50): |
| 417 | rec = read_commit(repo, c.commit_id) |
| 418 | assert rec is not None |
| 419 | rec.test_runs += 1 |
| 420 | overwrite_commit(repo, rec) |
| 421 | current = read_commit(repo, c.commit_id) |
| 422 | assert current is not None |
| 423 | assert current.test_runs >= prev |
| 424 | prev = current.test_runs |
| 425 | assert prev == 50 |
| 426 | |
| 427 | def test_all_provenance_fields_default_to_empty_string(self) -> None: |
| 428 | c = _commit() |
| 429 | assert c.agent_id == "" |
| 430 | assert c.model_id == "" |
| 431 | assert c.toolchain_id == "" |
| 432 | assert c.prompt_hash == "" |
| 433 | assert c.signature == "" |
| 434 | assert c.signer_key_id == "" |
File History
1 commit
sha256:d11a87833d5fad6059b7662844bf5448a8911a17cce7a51811f71ad394f248eb
bump to v0.2.0rc13
Human
patch
6 days ago