tests/test_integrity_I5_commit_integrity.py · gabriel/muse

test_integrity_I5_commit_integrity.py python

734 lines 30.3 KB

sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago

1	"""Phase 1.5 — Commit record integrity on re-read.
2
3	Tests cover:
4	- write_commit idempotency: silent drop of duplicate ID
5	- write_commit collision detection: existing file is corrupt → CRITICAL + overwrite
6	- write_commit integrity violation: existing record has mismatched commit_id
7	- read_commit: WARNING→CRITICAL upgrade for corrupt files
8	- read_commit_result: discriminated union (ok / not_found / corrupt)
9	- read_snapshot / read_snapshot_result: same guarantees
10	- get_all_commits / get_all_tags: CRITICAL on corrupt (previously silent)
11	- list_releases: CRITICAL on corrupt (previously silent)
12	- verify-pack integration after write_commit
13	- Concurrent write with same ID: first writer always wins (idempotency at scale)
14	- Regression: corrupt file must log CRITICAL (level 50), never WARNING (level 30)
15	"""
16
17	from __future__ import annotations
18
19	import datetime
20	import json
21	import logging
22	import pathlib
23	import threading
24
25	import pytest
26
27	from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
28
29	from muse.core.types import Manifest, fake_id, long_id
30	from muse.core.paths import muse_dir
31
32	_REPO_ID = fake_id("repo")
33	from muse.core.object_store import object_path as _obj_path
34	from muse.core.semver import SemVerTag
35	from muse.core.commits import (
36	CommitReadCorrupt,
37	CommitReadNotFound,
38	CommitReadOk,
39	CommitRecord,
40	commit_read_is_corrupt,
41	commit_read_is_not_found,
42	commit_read_is_ok,
43	get_all_commits,
44	read_commit,
45	read_commit_result,
46	write_commit,
47	)
48	from muse.core.snapshots import (
49	SnapshotReadCorrupt,
50	SnapshotReadNotFound,
51	SnapshotReadOk,
52	SnapshotRecord,
53	read_snapshot,
54	read_snapshot_result,
55	snapshot_read_is_corrupt,
56	snapshot_read_is_ok,
57	write_snapshot,
58	)
59	from muse.core.tags import (
60	TagRecord,
61	get_all_tags,
62	tag_path,
63	write_tag,
64	)
65	from muse.core.releases import (
66	ReleaseRecord,
67	list_releases,
68	release_path as _release_path,
69	write_release,
70	)
71
72	# ---------------------------------------------------------------------------
73	# Helpers
74	# ---------------------------------------------------------------------------
75
76	def _make_commit(
77	root: pathlib.Path,
78	message: str = "msg",
79	branch: str = "main",
80	parent: str \| None = None,
81	write: bool = True,
82	) -> CommitRecord:
83	"""Create a CommitRecord with a content-addressed commit_id.
84
85	Uses ``compute_commit_id`` so every record passes ``_verify_commit_id``
86	on read-back. ``write=False`` builds the record without persisting it —
87	useful for testing concurrent or idempotent write scenarios.
88	"""
89	committed_at = datetime.datetime(2026, 3, 1, tzinfo=datetime.timezone.utc)
90	snap_id = compute_snapshot_id({})
91	parent_ids = [parent] if parent else []
92	cid = compute_commit_id(
93	parent_ids=parent_ids,
94	snapshot_id=snap_id,
95	message=message,
96	committed_at_iso=committed_at.isoformat(),
97	author="tester",
98	)
99	c = CommitRecord(
100	commit_id=cid,
101	branch=branch,
102	snapshot_id=snap_id,
103	message=message,
104	committed_at=committed_at,
105	author="tester",
106	parent_commit_id=parent,
107	parent2_commit_id=None,
108	)
109	if write:
110	write_commit(root, c)
111	return c
112
113
114	def _make_snapshot(
115	root: pathlib.Path, manifest: Manifest \| None = None
116	) -> SnapshotRecord:
117	"""Create a SnapshotRecord with a content-addressed snapshot_id.
118
119	Pass distinct ``manifest`` dicts to get distinct snapshot_ids — e.g.
120	``{"file-A.py": "a" * 64}`` vs ``{"file-B.py": "b" * 64}``.
121	"""
122	m = manifest or {}
123	sid = compute_snapshot_id(m)
124	s = SnapshotRecord(
125	snapshot_id=sid,
126	manifest=m,
127	created_at=datetime.datetime(2026, 3, 1, tzinfo=datetime.timezone.utc),
128	)
129	write_snapshot(root, s)
130	return s
131
132
133	def _make_tag(root: pathlib.Path, tag_name: str) -> TagRecord:
134	t = TagRecord(
135	repo_id=_REPO_ID,
136	tag_id=fake_id(tag_name),
137	commit_id=fake_id("tag-commit"),
138	tag=tag_name,
139	created_at=datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc),
140	)
141	write_tag(root, t)
142	return t
143
144
145	def _make_release(root: pathlib.Path, tag: str, semver: SemVerTag) -> ReleaseRecord:
146	r = ReleaseRecord(
147	repo_id=_REPO_ID,
148	release_id=fake_id(tag + "-release"),
149	tag=tag,
150	semver=semver,
151	channel="stable",
152	commit_id=fake_id("release-commit"),
153	snapshot_id=fake_id(tag),
154	title=tag,
155	body="",
156	changelog=[],
157	)
158	write_release(root, r)
159	return r
160
161
162
163	def _tag_path(root: pathlib.Path, tag_id: str) -> pathlib.Path:
164	return tag_path(root, _REPO_ID, tag_id)
165
166
167	# ---------------------------------------------------------------------------
168	# Fixtures
169	# ---------------------------------------------------------------------------
170
171	@pytest.fixture()
172	def repo(tmp_path: pathlib.Path) -> pathlib.Path:
173	dot_muse = muse_dir(tmp_path)
174	(dot_muse / "commits").mkdir(parents=True)
175	(dot_muse / "snapshots").mkdir(parents=True)
176	(dot_muse / "refs" / "heads").mkdir(parents=True)
177	(dot_muse / "tags").mkdir(parents=True)
178	(dot_muse / "releases").mkdir(parents=True)
179	(dot_muse / "repo.json").write_text(json.dumps({"repo_id": _REPO_ID}))
180	(dot_muse / "HEAD").write_text("ref: refs/heads/main\n")
181	(dot_muse / "refs" / "heads" / "main").write_text("")
182	return tmp_path
183
184
185	# ===========================================================================
186	# 1. write_commit — idempotency
187	# ===========================================================================
188
189	class TestWriteCommitIdempotency:
190	def test_first_writer_wins(self, repo: pathlib.Path) -> None:
191	"""A record with wrong incoming hash is rejected before it can overwrite anything.
192
193	The old "first writer wins via silent drop" path is superseded by incoming
194	hash verification: a record whose commit_id doesn't match its content hash
195	raises ValueError immediately — the good file on disk is never touched.
196	"""
197	c1 = _make_commit(repo, message="first-wins")
198	# Construct a record with the same commit_id but different content —
199	# the hash won't match, so write_commit must raise before touching disk.
200	c2 = CommitRecord(
201	commit_id=c1.commit_id,
202	branch="main",
203	snapshot_id=c1.snapshot_id,
204	message="second-attempt",
205	committed_at=c1.committed_at,
206	author="tester",
207	parent_commit_id=None,
208	parent2_commit_id=None,
209	)
210	with pytest.raises(ValueError):
211	write_commit(repo, c2)
212	loaded = read_commit(repo, c1.commit_id)
213	assert loaded is not None
214	assert loaded.message == "first-wins", "bad incoming record must not overwrite good file"
215
216	def test_exact_duplicate_emits_no_critical(
217	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
218	) -> None:
219	"""Writing the exact same record twice must not log CRITICAL."""
220	c = _make_commit(repo, message="exact-dup-no-critical")
221	with caplog.at_level(logging.DEBUG, logger="muse.core.store"):
222	write_commit(repo, c)
223	assert not any(r.levelno >= logging.CRITICAL for r in caplog.records)
224
225	def test_idempotent_round_trip_preserves_all_fields(self, repo: pathlib.Path) -> None:
226	c = _make_commit(repo, message="preserve-me", branch="feat/x")
227	write_commit(repo, c) # second write — must be completely harmless
228	loaded = read_commit(repo, c.commit_id)
229	assert loaded is not None
230	assert loaded.message == "preserve-me"
231	assert loaded.branch == "feat/x"
232
233
234	# ===========================================================================
235	# 2. write_commit — corrupt existing file → CRITICAL + overwrite
236	# ===========================================================================
237
238	class TestWriteCommitCorruptExistingFile:
239	def test_corrupt_existing_is_skipped_by_write_commit(
240	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
241	) -> None:
242	"""write_commit is idempotent: if object_path exists, it is skipped.
243
244	Corruption written to object_path after the initial write is NOT repaired
245	by a subsequent write_commit call — first writer wins.
246	read_commit detects the corruption and returns None.
247	"""
248	c = _make_commit(repo, message="original-overwrite")
249	# Simulate disk corruption after the initial write
250	_obj_path(repo, c.commit_id).write_bytes(b"\xff\xfe\x00bad-data\x99")
251	write_commit(repo, c) # idempotent — skips, file already exists
252	loaded = read_commit(repo, c.commit_id)
253	assert loaded is None # corruption detected at read time
254
255	def test_empty_existing_is_skipped_by_write_commit(
256	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
257	) -> None:
258	"""Zero-byte commit file is not repaired by write_commit (idempotent)."""
259	c = _make_commit(repo, message="after-crash-overwrite")
260	_obj_path(repo, c.commit_id).write_bytes(b"")
261	write_commit(repo, c) # skips — file exists
262	loaded = read_commit(repo, c.commit_id)
263	assert loaded is None # empty file is corrupt
264
265	def test_truncated_existing_is_skipped_by_write_commit(
266	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
267	) -> None:
268	"""Truncated commit file is not repaired by write_commit (idempotent)."""
269	c = _make_commit(repo, message="after-truncation-overwrite")
270	path = _obj_path(repo, c.commit_id)
271	good_bytes = path.read_bytes()
272	path.write_bytes(good_bytes[: len(good_bytes) // 2])
273	write_commit(repo, c) # skips — file exists
274	loaded = read_commit(repo, c.commit_id)
275	assert loaded is None # truncated file is corrupt
276
277
278	# ===========================================================================
279	# 3. write_commit — store integrity violation
280	# ===========================================================================
281
282	class TestWriteCommitIntegrityViolation:
283	def test_commit_id_mismatch_detected_at_read_time(self, repo: pathlib.Path) -> None:
284	"""Impostor bytes at object_path are detected by read_commit, not write_commit.
285
286	write_commit is idempotent: if object_path exists, it is skipped regardless
287	of content. Hash verification happens at read time — read_commit returns None
288	when the stored payload's commit_id doesn't match the recomputed hash.
289	"""
290	import json as _json_mod
291	c_legit = _make_commit(repo, message="legitimate-mismatch")
292	c_impostor = _make_commit(repo, message="impostor-mismatch")
293	# Overwrite c_legit's object_path with impostor's JSON payload
294	impostor_dict = c_impostor.to_dict()
295	payload = _json_mod.dumps(impostor_dict, separators=(",", ":")).encode()
296	_obj_path(repo, c_legit.commit_id).write_bytes(
297	f"commit {len(payload)}\0".encode() + payload
298	)
299	# write_commit skips — file exists, no OSError raised
300	write_commit(repo, c_legit)
301	# read_commit detects hash mismatch → returns None
302	assert read_commit(repo, c_legit.commit_id) is None
303
304
305	# ===========================================================================
306	# 4. read_commit — CRITICAL log for corrupt
307	# ===========================================================================
308
309	class TestReadCommitCriticalLogging:
310	def test_corrupt_file_logs_critical(
311	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
312	) -> None:
313	c = _make_commit(repo, message="garbage-payload")
314	_obj_path(repo, c.commit_id).write_bytes(b"\x00\x01garbage\xff")
315	with caplog.at_level(logging.CRITICAL, logger="muse.core.store"):
316	result = read_commit(repo, c.commit_id)
317	assert result is None
318	crits = [r for r in caplog.records if r.levelno >= logging.CRITICAL]
319	assert crits, "Must log CRITICAL for corrupt commit file"
320	assert any("Corrupt" in r.message for r in crits)
321
322	def test_missing_file_returns_none_no_log(
323	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
324	) -> None:
325	with caplog.at_level(logging.DEBUG, logger="muse.core.store"):
326	result = read_commit(repo, fake_id("missing-commit"))
327	assert result is None
328	assert not any(r.levelno >= logging.WARNING for r in caplog.records)
329
330	def test_valid_file_returns_record_no_critical(
331	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
332	) -> None:
333	c = _make_commit(repo, message="clean-read")
334	with caplog.at_level(logging.CRITICAL, logger="muse.core.store"):
335	result = read_commit(repo, c.commit_id)
336	assert result is not None
337	assert result.message == "clean-read"
338	assert not any(r.levelno >= logging.CRITICAL for r in caplog.records)
339
340	def test_corrupt_log_references_filename(
341	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
342	) -> None:
343	c = _make_commit(repo, message="not-msgpack-content")
344	_obj_path(repo, c.commit_id).write_bytes(b"not-msgpack")
345	with caplog.at_level(logging.CRITICAL, logger="muse.core.store"):
346	read_commit(repo, c.commit_id)
347	messages = " ".join(r.message + str(r.args) for r in caplog.records)
348	bare = long_id(c.commit_id, strip=True)
349	assert bare[:8] in messages or bare in messages
350
351
352	# ===========================================================================
353	# 5. read_commit_result — discriminated union
354	# ===========================================================================
355
356	class TestReadCommitResult:
357	def test_ok_status_on_valid_record(self, repo: pathlib.Path) -> None:
358	c = _make_commit(repo, message="typed-ok")
359	r = read_commit_result(repo, c.commit_id)
360	assert commit_read_is_ok(r)
361	assert isinstance(r["commit"], CommitRecord)
362	assert r["commit"].message == "typed-ok"
363
364	def test_not_found_status_when_missing(self, repo: pathlib.Path) -> None:
365	r = read_commit_result(repo, fake_id("ff-missing-commit"))
366	assert commit_read_is_not_found(r)
367
368	def test_corrupt_status_on_bad_bytes(
369	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
370	) -> None:
371	c = _make_commit(repo, message="corrupt-bytes")
372	_obj_path(repo, c.commit_id).write_bytes(b"\xff\x00garbage")
373	with caplog.at_level(logging.CRITICAL, logger="muse.core.store"):
374	r = read_commit_result(repo, c.commit_id)
375	assert commit_read_is_corrupt(r)
376	assert r["path"] != ""
377	assert r["error"] != ""
378	crits = [rec for rec in caplog.records if rec.levelno >= logging.CRITICAL]
379	assert crits
380
381	def test_corrupt_result_path_contains_commit_id(self, repo: pathlib.Path) -> None:
382	c = _make_commit(repo, message="path-in-corrupt")
383	_obj_path(repo, c.commit_id).write_bytes(b"")
384	r = read_commit_result(repo, c.commit_id)
385	assert commit_read_is_corrupt(r)
386	# object_path splits the 64-char hex at position 2 (dir prefix), so
387	# the full hex is never a contiguous substring. Check the first 2
388	# chars (the dir component) and the following chars (the filename).
389	bare = long_id(c.commit_id, strip=True)
390	assert bare[:2] in r["path"] and bare[2:10] in r["path"]
391
392	def test_ok_result_roundtrips_all_metadata(self, repo: pathlib.Path) -> None:
393	# Build with a real content-addressed ID so _verify_commit_id passes.
394	snap_id = fake_id("snap-meta-roundtrip")
395	committed_at = datetime.datetime(2026, 3, 15, tzinfo=datetime.timezone.utc)
396	cid = compute_commit_id(
397	parent_ids=[],
398	snapshot_id=snap_id,
399	message="full metadata",
400	committed_at_iso=committed_at.isoformat(),
401	author="alice",
402	)
403	c = CommitRecord(
404	commit_id=cid,
405	branch="dev",
406	snapshot_id=snap_id,
407	message="full metadata",
408	committed_at=committed_at,
409	author="alice",
410	parent_commit_id=None,
411	parent2_commit_id=None,
412	metadata={"key": "val"},
413	)
414	write_commit(repo, c)
415	r = read_commit_result(repo, cid)
416	assert commit_read_is_ok(r)
417	assert r["commit"].branch == "dev"
418	assert r["commit"].author == "alice"
419	assert r["commit"].metadata == {"key": "val"}
420
421	def test_status_field_is_string(self, repo: pathlib.Path) -> None:
422	"""Status values are plain strings — easy for agents to pattern-match."""
423	c = _make_commit(repo, message="status-str-check")
424	r = read_commit_result(repo, c.commit_id)
425	assert isinstance(r["status"], str)
426
427	def test_not_found_has_only_status_key(self, repo: pathlib.Path) -> None:
428	r = read_commit_result(repo, fake_id("90-not-existing"))
429	assert set(r.keys()) == {"status"}
430
431	def test_three_outcomes_are_mutually_exclusive(self, repo: pathlib.Path) -> None:
432	"""Confirm all three outcome strings are distinct and unambiguous."""
433	c_ok = _make_commit(repo, message="outcome-ok")
434	c_corrupt = _make_commit(repo, message="outcome-corrupt")
435	_obj_path(repo, c_corrupt.commit_id).write_bytes(b"bad")
436	statuses = {
437	read_commit_result(repo, c_ok.commit_id)["status"],
438	read_commit_result(repo, fake_id("cc-missing"))["status"],
439	read_commit_result(repo, c_corrupt.commit_id)["status"],
440	}
441	assert statuses == {"ok", "not_found", "corrupt"}
442
443
444	# ===========================================================================
445	# 6. read_snapshot / read_snapshot_result
446	# ===========================================================================
447
448	class TestReadSnapshotIntegrity:
449	def test_corrupt_snapshot_logs_critical(
450	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
451	) -> None:
452	s = _make_snapshot(repo, manifest={"snap-critical.py": fake_id("oid-a")})
453	_obj_path(repo, s.snapshot_id).write_bytes(b"\xde\xad\xbe\xef")
454	with caplog.at_level(logging.CRITICAL, logger="muse.core.store"):
455	result = read_snapshot(repo, s.snapshot_id)
456	assert result is None
457	assert any(r.levelno >= logging.CRITICAL for r in caplog.records)
458
459	def test_snapshot_result_ok(self, repo: pathlib.Path) -> None:
460	s = _make_snapshot(repo, manifest={"snap-ok.py": fake_id("oid-b")})
461	r = read_snapshot_result(repo, s.snapshot_id)
462	assert snapshot_read_is_ok(r)
463	assert isinstance(r["snapshot"], SnapshotRecord)
464
465	def test_snapshot_result_not_found(self, repo: pathlib.Path) -> None:
466	r = read_snapshot_result(repo, fake_id("no-snap"))
467	assert r["status"] == "not_found"
468	assert set(r.keys()) == {"status"}
469
470	def test_snapshot_result_corrupt(
471	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
472	) -> None:
473	s = _make_snapshot(repo, manifest={"snap-corrupt.py": fake_id("oid-c")})
474	_obj_path(repo, s.snapshot_id).write_bytes(b"garbage-bytes\x00")
475	with caplog.at_level(logging.CRITICAL, logger="muse.core.store"):
476	r = read_snapshot_result(repo, s.snapshot_id)
477	assert snapshot_read_is_corrupt(r)
478	assert r["path"] != ""
479	assert r["error"] != ""
480
481	def test_missing_snapshot_no_log(
482	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
483	) -> None:
484	with caplog.at_level(logging.DEBUG, logger="muse.core.store"):
485	result = read_snapshot(repo, fake_id("missing"))
486	assert result is None
487	assert not any(r.levelno >= logging.WARNING for r in caplog.records)
488
489
490	# ===========================================================================
491	# 7. get_all_commits — CRITICAL on corrupt (previously silent)
492	# ===========================================================================
493
494	class TestGetAllCommitsCorruptLogging:
495	def test_one_corrupt_skipped_with_critical(
496	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
497	) -> None:
498	"""Corrupt commit is skipped; good commits returned; CRITICAL emitted."""
499	c_good = _make_commit(repo, message="good-survives")
500	c_bad = _make_commit(repo, message="will-corrupt")
501	_obj_path(repo, c_bad.commit_id).write_bytes(b"\xff\x00")
502	with caplog.at_level(logging.CRITICAL, logger="muse.core.store"):
503	commits = get_all_commits(repo)
504	ids = {c.commit_id for c in commits}
505	assert c_good.commit_id in ids, "good commit must still appear"
506	assert c_bad.commit_id not in ids, "corrupt commit must be excluded"
507	assert any(r.levelno >= logging.CRITICAL for r in caplog.records)
508
509	def test_all_corrupt_returns_empty_with_critical(
510	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
511	) -> None:
512	written = [_make_commit(repo, message=f"c{i}") for i in range(3)]
513	for c in written:
514	_obj_path(repo, c.commit_id).write_bytes(b"bad")
515	with caplog.at_level(logging.CRITICAL, logger="muse.core.store"):
516	commits = get_all_commits(repo)
517	assert commits == []
518	crits = [r for r in caplog.records if r.levelno >= logging.CRITICAL]
519	assert len(crits) == 3
520
521	def test_empty_store_returns_empty_no_log(
522	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
523	) -> None:
524	with caplog.at_level(logging.DEBUG, logger="muse.core.store"):
525	commits = get_all_commits(repo)
526	assert commits == []
527	assert not any(r.levelno >= logging.WARNING for r in caplog.records)
528
529	def test_mixed_good_and_corrupt_correct_count(self, repo: pathlib.Path) -> None:
530	good = [_make_commit(repo, message=f"g{i}") for i in range(5)]
531	bad = [_make_commit(repo, message=f"b{i}") for i in range(3)]
532	for c in bad:
533	_obj_path(repo, c.commit_id).write_bytes(b"corrupt")
534	commits = get_all_commits(repo)
535	assert len(commits) == len(good)
536
537
538	# ===========================================================================
539	# 8. get_all_tags — CRITICAL on corrupt (previously silent)
540	# ===========================================================================
541
542	class TestGetAllTagsCorruptLogging:
543	def test_corrupt_tag_skipped_with_critical(
544	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
545	) -> None:
546	t1 = _make_tag(repo, "v1.0.0")
547	t2 = _make_tag(repo, "v2.0.0")
548	_tag_path(repo, t2.tag_id).write_bytes(b"\x00bad")
549	with caplog.at_level(logging.CRITICAL, logger="muse.core.store"):
550	tags = get_all_tags(repo, _REPO_ID)
551	tag_values = {t.tag for t in tags}
552	assert "v1.0.0" in tag_values
553	assert "v2.0.0" not in tag_values
554	assert any(r.levelno >= logging.CRITICAL for r in caplog.records)
555
556	def test_good_tags_all_returned(self, repo: pathlib.Path) -> None:
557	_make_tag(repo, "v0.1")
558	_make_tag(repo, "v0.2")
559	tags = get_all_tags(repo, _REPO_ID)
560	assert len(tags) == 2
561
562	def test_all_corrupt_tags_returns_empty_with_critical(
563	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
564	) -> None:
565	for name in ("v1", "v2", "v3"):
566	t = _make_tag(repo, name)
567	_tag_path(repo, t.tag_id).write_bytes(b"bad")
568	with caplog.at_level(logging.CRITICAL, logger="muse.core.store"):
569	tags = get_all_tags(repo, _REPO_ID)
570	assert tags == []
571	crits = [r for r in caplog.records if r.levelno >= logging.CRITICAL]
572	assert len(crits) == 3
573
574
575	# ===========================================================================
576	# 9. list_releases — CRITICAL on corrupt (previously silent)
577	# ===========================================================================
578
579	class TestListReleasesCorruptLogging:
580	def test_corrupt_release_skipped_with_critical(
581	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
582	) -> None:
583	good = _make_release(
584	repo, "v1.0.0", SemVerTag(major=1, minor=0, patch=0, pre="", build="")
585	)
586	bad = _make_release(
587	repo, "v2.0.0", SemVerTag(major=2, minor=0, patch=0, pre="", build="")
588	)
589	_release_path(repo, _REPO_ID, bad.release_id).write_bytes(b"\xff\x00garbage")
590	with caplog.at_level(logging.CRITICAL, logger="muse.core.store"):
591	releases = list_releases(repo, _REPO_ID)
592	ids = {r.release_id for r in releases}
593	assert good.release_id in ids
594	assert bad.release_id not in ids
595	assert any(r.levelno >= logging.CRITICAL for r in caplog.records)
596
597	def test_all_releases_good_returns_all(self, repo: pathlib.Path) -> None:
598	_make_release(repo, "v1.0.0", SemVerTag(major=1, minor=0, patch=0, pre="", build=""))
599	_make_release(repo, "v1.1.0", SemVerTag(major=1, minor=1, patch=0, pre="", build=""))
600	releases = list_releases(repo, _REPO_ID)
601	assert len(releases) == 2
602
603
604	# ===========================================================================
605	# 10. verify-pack integration after write_commit
606	# ===========================================================================
607
608	class TestVerifyPackAfterWriteCommit:
609	def test_cmd_read_commit_roundtrip(self, repo: pathlib.Path) -> None:
610	"""``muse read-commit`` must succeed for every written commit."""
611	from tests.cli_test_helper import CliRunner
612
613	c = _make_commit(repo, message="plumbing-check")
614
615	runner = CliRunner()
616	result = runner.invoke(
617	None,
618	["read-commit", c.commit_id, "--json"],
619	env={"MUSE_REPO_ROOT": str(repo)},
620	)
621	assert result.exit_code == 0
622	import json as _json
623	data = _json.loads(result.output)
624	assert data["commit_id"] == c.commit_id
625	assert data["message"] == "plumbing-check"
626
627
628	# ===========================================================================
629	# 11. Concurrent idempotency — 50 threads race to write the same commit
630	# ===========================================================================
631
632	class TestConcurrentIdempotentWrite:
633	def test_50_threads_same_commit_id_first_wins(self, repo: pathlib.Path) -> None:
634	"""50 threads writing the EXACT same commit — idempotent, exactly one file written."""
635	# In a content-addressed system, identical content → identical commit_id.
636	c = _make_commit(repo, message="concurrent-idempotent", write=False)
637	errors: list[Exception] = []
638
639	def write_one() -> None:
640	try:
641	write_commit(repo, c)
642	except Exception as exc:
643	errors.append(exc)
644
645	threads = [threading.Thread(target=write_one) for _ in range(50)]
646	for t in threads:
647	t.start()
648	for t in threads:
649	t.join()
650
651	assert not errors, f"Unexpected errors in same-ID concurrent writes: {errors[:3]}"
652
653	loaded = read_commit(repo, c.commit_id)
654	assert loaded is not None
655	assert loaded.commit_id == c.commit_id
656	assert loaded.message == "concurrent-idempotent"
657
658	def test_50_threads_distinct_ids_all_survive(self, repo: pathlib.Path) -> None:
659	"""50 threads writing distinct commit IDs must all persist without errors."""
660	errors: list[Exception] = []
661
662	def write_unique(i: int) -> None:
663	# _make_commit uses compute_commit_id so the hash always matches content.
664	c = _make_commit(repo, message=f"unique {i}", write=False)
665	try:
666	write_commit(repo, c)
667	except Exception as exc:
668	errors.append(exc)
669
670	threads = [threading.Thread(target=write_unique, args=(i,)) for i in range(50)]
671	for t in threads:
672	t.start()
673	for t in threads:
674	t.join()
675
676	assert not errors, f"Unexpected errors in distinct-ID concurrent writes: {errors[:3]}"
677	commits = get_all_commits(repo)
678	assert len(commits) == 50
679
680
681	# ===========================================================================
682	# 12. Regression: WARNING→CRITICAL upgrade is permanent
683	# ===========================================================================
684
685	class TestRegressionCorruptLevelUpgrade:
686	"""Confirm the upgrade from WARNING to CRITICAL is permanent and precise."""
687
688	def test_corrupt_commit_logs_at_critical_not_warning(
689	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
690	) -> None:
691	c = _make_commit(repo, message="level-upgrade-commit")
692	_obj_path(repo, c.commit_id).write_bytes(b"trash")
693	with caplog.at_level(logging.DEBUG, logger="muse.core.store"):
694	read_commit(repo, c.commit_id)
695	levels = [r.levelno for r in caplog.records]
696	assert any(lvl == logging.CRITICAL for lvl in levels), (
697	f"Expected CRITICAL (50) but got levels: {levels}"
698	)
699	assert not any(lvl == logging.WARNING for lvl in levels), (
700	"Must not downgrade corruption to WARNING — only CRITICAL is acceptable"
701	)
702
703	def test_corrupt_snapshot_logs_at_critical_not_warning(
704	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
705	) -> None:
706	s = _make_snapshot(repo, manifest={"snap-level.py": fake_id("oid-d")})
707	_obj_path(repo, s.snapshot_id).write_bytes(b"bad")
708	with caplog.at_level(logging.DEBUG, logger="muse.core.store"):
709	read_snapshot(repo, s.snapshot_id)
710	levels = [r.levelno for r in caplog.records]
711	assert any(lvl == logging.CRITICAL for lvl in levels)
712	assert not any(lvl == logging.WARNING for lvl in levels)
713
714	def test_get_all_commits_logs_corrupt_at_critical(
715	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
716	) -> None:
717	c = _make_commit(repo, message="level-upgrade-get-all")
718	_obj_path(repo, c.commit_id).write_bytes(b"trash")
719	with caplog.at_level(logging.DEBUG, logger="muse.core.store"):
720	get_all_commits(repo)
721	levels = [r.levelno for r in caplog.records]
722	assert any(lvl == logging.CRITICAL for lvl in levels)
723	assert not any(lvl == logging.WARNING for lvl in levels)
724
725	def test_get_all_tags_logs_corrupt_at_critical(
726	self, repo: pathlib.Path, caplog: pytest.LogCaptureFixture
727	) -> None:
728	t = _make_tag(repo, "v-crit")
729	_tag_path(repo, t.tag_id).write_bytes(b"trash")
730	with caplog.at_level(logging.DEBUG, logger="muse.core.store"):
731	get_all_tags(repo, _REPO_ID)
732	levels = [r.levelno for r in caplog.records]
733	assert any(lvl == logging.CRITICAL for lvl in levels)
734	assert not any(lvl == logging.WARNING for lvl in levels)

File History 4 commits

sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago

sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago

sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago

sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor ⚠ 28 days ago

function _make_commit

function _make_snapshot

function _make_tag

function _make_release

function _tag_path

function repo

class TestWriteCommitIdempotency

function test_first_writer_wins

function test_exact_duplicate_emits_no_critical

function test_idempotent_round_trip_preserves_all_fields

class TestWriteCommitCorruptExistingFile

function test_corrupt_existing_is_skipped_by_write_commit

function test_empty_existing_is_skipped_by_write_commit

function test_truncated_existing_is_skipped_by_write_commit

class TestWriteCommitIntegrityViolation

function test_commit_id_mismatch_detected_at_read_time

class TestReadCommitCriticalLogging

function test_corrupt_file_logs_critical

function test_missing_file_returns_none_no_log

function test_valid_file_returns_record_no_critical

function test_corrupt_log_references_filename

class TestReadCommitResult

function test_ok_status_on_valid_record

function test_not_found_status_when_missing

function test_corrupt_status_on_bad_bytes

function test_corrupt_result_path_contains_commit_id

function test_ok_result_roundtrips_all_metadata

function test_status_field_is_string

function test_not_found_has_only_status_key

function test_three_outcomes_are_mutually_exclusive

class TestReadSnapshotIntegrity

function test_corrupt_snapshot_logs_critical

function test_snapshot_result_ok

function test_snapshot_result_not_found

function test_snapshot_result_corrupt

function test_missing_snapshot_no_log

class TestGetAllCommitsCorruptLogging

function test_one_corrupt_skipped_with_critical

function test_all_corrupt_returns_empty_with_critical

function test_empty_store_returns_empty_no_log

function test_mixed_good_and_corrupt_correct_count

class TestGetAllTagsCorruptLogging

function test_corrupt_tag_skipped_with_critical

function test_good_tags_all_returned

function test_all_corrupt_tags_returns_empty_with_critical

class TestListReleasesCorruptLogging

function test_corrupt_release_skipped_with_critical

function test_all_releases_good_returns_all

class TestVerifyPackAfterWriteCommit

function test_cmd_read_commit_roundtrip

class TestConcurrentIdempotentWrite

function test_50_threads_same_commit_id_first_wins

function write_one

function test_50_threads_distinct_ids_all_survive

function write_unique

class TestRegressionCorruptLevelUpgrade

function test_corrupt_commit_logs_at_critical_not_warning

function test_corrupt_snapshot_logs_at_critical_not_warning

function test_get_all_commits_logs_corrupt_at_critical

function test_get_all_tags_logs_corrupt_at_critical

Pathtests/test_integrity_I5_commit_integrity.py

Lines734

Size30.3 KB

LangPython

Refsha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2

Object ID

sha256:42ad303da2cc2a46577af9c5ad3868c8ff33227ce6ea537a0b973765ae91deba…

Last commit

sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2

fix: remove commit_exists filter from have anchor…

20 days ago

Quick links

Blame History