tests/test_integrity_I4_msgpack_size.py · gabriel/muse

test_integrity_I4_msgpack_size.py python

760 lines 30.6 KB

sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago

1	"""I-4: Store file size limit — prevent OOM from oversized store files.
2
3	Problem (pre-fix): ``_read_msgpack`` called ``path.read_bytes()`` with no
4	size guard. A 10 GiB corrupt or adversarially crafted store file would
5	allocate 10 GiB of RAM, crashing the process or triggering the OOM killer
6	— a critical data-integrity and availability failure.
7
8	``read_object`` in the object store already had a 256 MiB cap. The commit,
9	snapshot, tag, release, shelf, and index stores did not.
10
11	Fix: added to both ``muse/core/store.py`` and ``muse/core/indices.py``:
12
13	1. ``MAX_MSGPACK_BYTES = 64 MiB`` — ``stat().st_size`` is checked before
14	``read_bytes()`` so no allocation ever occurs. The constant name is
15	legacy; it also guards the new JSON/git-header store files.
16	2. Per-value limits on msgpack wire reads — ``max_str_len``,
17	``max_bin_len``, ``max_array_len``, ``max_map_len`` — prevent deeply
18	nested or pathologically large single-value documents from consuming
19	unbounded memory even within the size cap.
20
21	This file proves every aspect of the fix:
22
23	Tier 0 — constant export
24	Low-level — stat check before read (OOM prevention)
25	High-level — per-value unpack limits
26	Tier 3 — all high-level read functions (read_commit, read_snapshot, …)
27	Tier 4 — index file protection
28	Tier 5 — CLI command (clean JSON error, no traceback)
29	Tier 6 — boundary / exact-limit behaviour
30	Tier 7 — performance (size check adds < 1 ms overhead)
31	Tier 8 — warning log on oversized file
32	"""
33	from __future__ import annotations
34
35	import datetime
36	import logging
37	import pathlib
38	import time
39	from unittest.mock import patch, MagicMock
40
41	import msgpack
42	import pytest
43
44	from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
45	from muse.core.object_store import object_path as _obj_path
46	from muse.core.io import MAX_MSGPACK_BYTES
47	from muse.core.types import MsgpackValue
48	from muse.core.commits import (
49	CommitRecord,
50	read_commit,
51	write_commit,
52	)
53	from muse.core.snapshots import (
54	SnapshotRecord,
55	read_snapshot,
56	write_snapshot,
57	)
58	from muse.core.tags import (
59	TagRecord,
60	get_all_tags,
61	write_tag,
62	)
63	from muse.core.releases import list_releases
64
65	from muse.core.types import Manifest, MsgpackDict, fake_id
66	from muse.core.indices import (
67	load_symbol_history,
68	load_hash_occurrence,
69	)
70	from muse.core.paths import commits_dir, indices_dir, muse_dir, releases_dir, snapshots_dir
71
72
73	# ---------------------------------------------------------------------------
74	# Helpers
75	# ---------------------------------------------------------------------------
76
77	_REPO_ID = fake_id("test-repo")
78
79
80	def _repo(tmp_path: pathlib.Path) -> pathlib.Path:
81	muse = muse_dir(tmp_path)
82	(muse / "commits").mkdir(parents=True)
83	(muse / "snapshots").mkdir()
84	(muse / "tags").mkdir()
85	(muse / "releases").mkdir()
86	(muse / "indices").mkdir()
87	(muse / "refs" / "heads").mkdir(parents=True)
88	(muse / "HEAD").write_text("ref: refs/heads/main\n")
89	(muse / "repo.json").write_text(f'{{"repo_id": "{_REPO_ID}"}}\n')
90	return tmp_path
91
92
93	def _commit(idx: int = 0) -> CommitRecord:
94	snapshot_id = compute_snapshot_id({})
95	committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
96	message = f"commit {idx}"
97	commit_id = compute_commit_id(
98	parent_ids=[],
99	snapshot_id=snapshot_id,
100	message=message,
101	committed_at_iso=committed_at.isoformat(),
102	author="tester",
103	)
104	return CommitRecord(
105	commit_id=commit_id,
106	branch="main",
107	snapshot_id=snapshot_id,
108	message=message,
109	committed_at=committed_at,
110	author="tester",
111	parent_commit_id=None,
112	parent2_commit_id=None,
113	)
114
115
116	def _snapshot(idx: int = 0) -> SnapshotRecord:
117	manifest: Manifest = {f"__idx__": fake_id(f"snap-{idx}")}
118	sid = compute_snapshot_id(manifest)
119	return SnapshotRecord(
120	snapshot_id=sid,
121	manifest=manifest,
122	)
123
124
125	def _tag(idx: int = 0) -> TagRecord:
126	return TagRecord(
127	repo_id=_REPO_ID,
128	tag_id=fake_id(f"tag-id-{idx}"),
129	commit_id=fake_id(f"tag-commit-{idx}"),
130	tag=f"v{idx}.0.0",
131	)
132
133
134	# ---------------------------------------------------------------------------
135	# Tier 0 — constant export
136	# ---------------------------------------------------------------------------
137
138	class TestConstantExport:
139	"""MAX_MSGPACK_BYTES must be importable and have the correct value.
140
141	The constant name is legacy (predates the JSON migration); it also guards
142	the new git-header+JSON store files and legacy shelf .msgpack files.
143	"""
144
145	def test_max_msgpack_bytes_is_exported(self) -> None:
146	from muse.core.io import MAX_MSGPACK_BYTES as cap
147	assert cap == 64 * 1024 * 1024, (
148	f"Expected 64 MiB (67108864), got {cap}"
149	)
150
151	def test_max_msgpack_bytes_is_int(self) -> None:
152	assert isinstance(MAX_MSGPACK_BYTES, int)
153
154	def test_max_msgpack_bytes_less_than_256mib(self) -> None:
155	"""Store records should be capped well below 256 MiB."""
156	assert MAX_MSGPACK_BYTES < 256 * 1024 * 1024, (
157	"Store records should be capped below the object store's 256 MiB limit"
158	)
159
160
161	# ---------------------------------------------------------------------------
162	# Low-level — stat check fires BEFORE read_bytes (the OOM prevention)
163	# ---------------------------------------------------------------------------
164
165	class TestStatCheckBeforeRead:
166	"""The size guard must fire before any read_bytes() call.
167
168	We prove this by mocking stat to report an oversized file while keeping
169	the actual file tiny — if read_bytes() were called first, we would NOT
170	trigger the OSError from the stat check.
171	"""
172
173	def _oversized_stat(self, real_path: pathlib.Path) -> MagicMock:
174	"""Return a MagicMock that reports st_size = MAX_MSGPACK_BYTES + 1."""
175	stat_result = MagicMock()
176	stat_result.st_size = MAX_MSGPACK_BYTES + 1
177	return stat_result
178
179	def test_read_commit_corrupt_object_returns_none(
180	self, tmp_path: pathlib.Path
181	) -> None:
182	"""Commit object store file with corrupt content causes read_commit to return None.
183
184	The stat-before-read guard existed in the old msgpack store; in the unified
185	object store, any corrupt/unreadable content causes graceful failure.
186	"""
187	root = _repo(tmp_path)
188	c = _commit(0)
189	write_commit(root, c)
190	# Overwrite the object file with garbage — no valid muse object header
191	_obj_path(root, c.commit_id).write_bytes(b"not-valid-content")
192	result = read_commit(root, c.commit_id)
193	assert result is None, "read_commit must return None for corrupt object"
194
195	def test_read_snapshot_corrupt_object_returns_none(
196	self, tmp_path: pathlib.Path
197	) -> None:
198	"""Snapshot object store file with corrupt content causes read_snapshot to return None."""
199	root = _repo(tmp_path)
200	s = _snapshot(0)
201	write_snapshot(root, s)
202	_obj_path(root, s.snapshot_id).write_bytes(b"not-valid-content")
203	result = read_snapshot(root, s.snapshot_id)
204	assert result is None
205
206
207	# ---------------------------------------------------------------------------
208	# High-level — high-level read functions return None for oversized files
209	# ---------------------------------------------------------------------------
210
211	class TestReadFunctionsReturnNoneOnOversize:
212	"""All public read functions must gracefully handle oversized files.
213
214	We patch MAX_MSGPACK_BYTES to a small value so we can create real files
215	that exceed it without writing gigabytes to disk.
216	"""
217
218	def test_read_commit_returns_none_for_corrupt_object(
219	self, tmp_path: pathlib.Path
220	) -> None:
221	"""read_commit returns None (not raises) for corrupt object store content.
222
223	The old msgpack-based size limit (MAX_MSGPACK_BYTES) is superseded by the
224	unified object store; any corrupt content triggers graceful failure.
225	"""
226	root = _repo(tmp_path)
227	c = _commit(1)
228	write_commit(root, c)
229	# Overwrite with large garbage — no valid muse object header
230	_obj_path(root, c.commit_id).write_bytes(b"\x00" * 200)
231	result = read_commit(root, c.commit_id)
232	assert result is None, "read_commit must return None, not raise, for corrupt object"
233
234	def test_read_snapshot_returns_none_for_corrupt_object(
235	self, tmp_path: pathlib.Path
236	) -> None:
237	"""read_snapshot returns None for corrupt object store content."""
238	root = _repo(tmp_path)
239	s = _snapshot(1)
240	write_snapshot(root, s)
241	_obj_path(root, s.snapshot_id).write_bytes(b"\x00" * 200)
242	result = read_snapshot(root, s.snapshot_id)
243	assert result is None
244
245	def test_get_all_tags_skips_oversized_files(
246	self, tmp_path: pathlib.Path
247	) -> None:
248	"""get_all_tags iterates all tag files — oversized ones are skipped."""
249	root = _repo(tmp_path)
250	good = _tag(0)
251	bad = _tag(1)
252	write_tag(root, good)
253	write_tag(root, bad)
254
255	# A real tag record is ~200 bytes packed (64-char IDs + timestamp).
256	# Choose a limit above a real tag but below our inflated bad file.
257	from muse.core.tags import tag_path
258	good_path = tag_path(root, _REPO_ID, good.tag_id)
259	real_size = good_path.stat().st_size
260	test_limit = real_size * 2 # real tag fits; we'll inflate the bad tag to 3×
261
262	bad_path = tag_path(root, _REPO_ID, bad.tag_id)
263	bad_path.write_bytes(b"\x00" * (real_size * 3)) # definitely exceeds limit
264
265	with patch("muse.core.io.MAX_MSGPACK_BYTES", test_limit):
266	tags = get_all_tags(root, _REPO_ID)
267	tag_ids = {t.tag_id for t in tags}
268	assert good.tag_id in tag_ids, "Good tag was incorrectly dropped"
269	assert bad.tag_id not in tag_ids, "Oversized tag was not skipped"
270
271	def test_list_releases_skips_oversized_files(
272	self, tmp_path: pathlib.Path
273	) -> None:
274	"""list_releases must skip oversized release files."""
275	root = _repo(tmp_path)
276	from muse.core.types import split_id
277	r_algo, r_hex = split_id(_REPO_ID)
278	rel_dir = releases_dir(root) / r_algo / r_hex
279	rel_dir.mkdir(parents=True)
280	# Write a fake oversized release file.
281	fake_release = rel_dir / f"{'a' * 64}.msgpack"
282	fake_release.write_bytes(b"\x00" * 101)
283	with patch("muse.core.io.MAX_MSGPACK_BYTES", 100):
284	results = list_releases(root, _REPO_ID)
285	assert results == [], "Oversized release should be skipped, not crash"
286
287
288	# ---------------------------------------------------------------------------
289	# Tier 3 — exact boundary behaviour
290	# ---------------------------------------------------------------------------
291
292	class TestExactBoundary:
293	"""At the boundary: MAX_MSGPACK_BYTES is the last allowed size."""
294
295	def test_file_exactly_at_limit_is_read(self, tmp_path: pathlib.Path) -> None:
296	"""A file of exactly MAX_MSGPACK_BYTES bytes passes the size check.
297
298	The content may be unparseable (zeros are not valid msgpack), but the
299	OSError raised is a parse error, not a size-limit error.
300	"""
301	test_limit = 256 # small limit for test speed
302	path = tmp_path / "exactly_at_limit.msgpack"
303	path.write_bytes(b"\x00" * test_limit)
304	with patch("muse.core.io.MAX_MSGPACK_BYTES", test_limit):
305	# Should raise a parse error (invalid msgpack), NOT an OSError about size.
306	from muse.core.io import _read_msgpack
307	try:
308	_read_msgpack(path)
309	pytest.fail("Expected an error for invalid msgpack content")
310	except OSError as exc:
311	assert "MiB read limit" not in str(exc), (
312	f"Got size-limit OSError at the boundary — should be parse error: {exc}"
313	)
314	except Exception:
315	pass # Any non-size-limit error is acceptable here
316
317	def test_file_one_byte_over_limit_raises_oslimit_error(
318	self, tmp_path: pathlib.Path
319	) -> None:
320	"""A file of MAX_MSGPACK_BYTES + 1 bytes raises OSError before reading."""
321	test_limit = 256
322	path = tmp_path / "one_over.msgpack"
323	path.write_bytes(b"\x00" * (test_limit + 1))
324	with patch("muse.core.io.MAX_MSGPACK_BYTES", test_limit):
325	from muse.core.io import _read_msgpack
326	with pytest.raises(OSError, match="read limit"):
327	_read_msgpack(path)
328
329	def test_zero_byte_file_does_not_trigger_size_limit(
330	self, tmp_path: pathlib.Path
331	) -> None:
332	"""An empty file passes the size check but fails msgpack parse."""
333	path = tmp_path / "empty.msgpack"
334	path.write_bytes(b"")
335	from muse.core.io import _read_msgpack
336	with pytest.raises(Exception): # parse error, not size error
337	_read_msgpack(path)
338
339	def test_size_limit_error_message_includes_filename_and_limit(
340	self, tmp_path: pathlib.Path
341	) -> None:
342	"""The OSError message must include the file name and limit in MiB."""
343	test_limit = 1024 # 1 KiB for test speed
344	path = tmp_path / "big.msgpack"
345	path.write_bytes(b"\x00" * (test_limit + 1))
346	with patch("muse.core.io.MAX_MSGPACK_BYTES", test_limit):
347	from muse.core.io import _read_msgpack
348	with pytest.raises(OSError) as exc_info:
349	_read_msgpack(path)
350	msg = str(exc_info.value)
351	assert "big.msgpack" in msg, f"Filename missing from error: {msg}"
352	assert "KiB" in msg or "MiB" in msg or "bytes" in msg, (
353	f"Size info missing from error: {msg}"
354	)
355
356
357	# ---------------------------------------------------------------------------
358	# Tier 4 — per-value unpack limits
359	# ---------------------------------------------------------------------------
360
361	class TestPerValueUnpackLimits:
362	"""Verify that per-value limits from msgpack.unpackb are enforced."""
363
364	def _pack_to_path(self, tmp_path: pathlib.Path, data: MsgpackValue) -> pathlib.Path:
365	path = tmp_path / "test.msgpack"
366	path.write_bytes(msgpack.packb(data, use_bin_type=True))
367	return path
368
369	def test_string_exceeding_max_str_len_rejected(self, tmp_path: pathlib.Path) -> None:
370	"""A string longer than _MSGPACK_MAX_STR_LEN must raise an exception."""
371	huge_str = "x" * 200
372	path = self._pack_to_path(tmp_path, {"key": huge_str})
373	from muse.core.io import _read_msgpack
374	with patch("muse.core.io._MSGPACK_MAX_STR_LEN", 100):
375	with pytest.raises(Exception):
376	_read_msgpack(path)
377
378	def test_string_within_max_str_len_accepted(self, tmp_path: pathlib.Path) -> None:
379	"""A string within the limit unpacks normally."""
380	path = self._pack_to_path(tmp_path, {"key": "short"})
381	from muse.core.io import _read_msgpack
382	result = _read_msgpack(path)
383	assert isinstance(result, dict)
384
385	def test_binary_blob_rejected_in_store_records(self, tmp_path: pathlib.Path) -> None:
386	"""Binary data (msgpack bin type) must be rejected for store records.
387
388	Commit/snapshot/tag records contain no binary fields. A file with
389	binary data is either corrupt or tampered. max_bin_len=0 ensures
390	this is caught immediately during unpack rather than producing a
391	``bytes`` value that callers are not prepared to handle.
392	"""
393	path = self._pack_to_path(tmp_path, {"body": b"some binary blob"})
394	from muse.core.io import _read_msgpack
395	# max_bin_len=0 means any bin-type value raises an error.
396	with pytest.raises(Exception):
397	_read_msgpack(path)
398
399	def test_map_exceeding_max_map_len_rejected(self, tmp_path: pathlib.Path) -> None:
400	"""A map with more than _MSGPACK_MAX_MAP_LEN entries must raise."""
401	big_map: MsgpackDict = {str(i): i for i in range(200)}
402	path = self._pack_to_path(tmp_path, big_map)
403	from muse.core.io import _read_msgpack
404	with patch("muse.core.io._MSGPACK_MAX_MAP_LEN", 100):
405	with pytest.raises(Exception):
406	_read_msgpack(path)
407
408	def test_array_exceeding_max_array_len_rejected(self, tmp_path: pathlib.Path) -> None:
409	"""An array with more than _MSGPACK_MAX_ARRAY_LEN entries must raise."""
410	big_list: list[MsgpackValue] = list(range(200))
411	path = self._pack_to_path(tmp_path, big_list)
412	from muse.core.io import _read_msgpack
413	with patch("muse.core.io._MSGPACK_MAX_ARRAY_LEN", 100):
414	with pytest.raises(Exception):
415	_read_msgpack(path)
416
417	def _make_deep_nested_msgpack(self, depth: int) -> bytes:
418	"""Build msgpack bytes for a depth-deep nested dict without Python recursion.
419
420	``msgpack.packb`` uses Python-level recursion so packing a 600-deep
421	dict hits the default recursion limit. We build the bytes directly:
422
423	fixmap(1) fixstr("x") fixmap(1) fixstr("x") ... fixmap(0)
424
425	Each level is 3 bytes: ``0x81`` (fixmap 1 entry) + ``0xa1 0x78``
426	(fixstr "x"). The leaf is ``0x80`` (fixmap 0 entries).
427
428	This produces a valid msgpack binary that ``unpackb`` will parse up
429	to its stack limit and then raise ``StackError``.
430	"""
431	# 0x81 = fixmap with 1 item; 0xa1 0x78 = fixstr "x"
432	frame = b"\x81\xa1x"
433	leaf = b"\x80" # fixmap with 0 items
434	return frame * depth + leaf
435
436	def test_deeply_nested_map_raises_stack_error(self, tmp_path: pathlib.Path) -> None:
437	"""A pathologically nested document hits msgpack's StackError.
438
439	At extreme depth (10 000 levels), msgpack's C-extension stack limit is
440	exceeded and an exception is raised. The file is only ~30 KiB so the
441	size check passes; the protection comes from msgpack's internal stack
442	guard, not the 64 MiB cap.
443	"""
444	packed = self._make_deep_nested_msgpack(10_000)
445	path = tmp_path / "deep_nest.msgpack"
446	path.write_bytes(packed)
447	from muse.core.io import _read_msgpack
448	with pytest.raises(Exception): # msgpack.exceptions.StackError
449	_read_msgpack(path)
450
451	def test_deeply_nested_terminates_quickly(self, tmp_path: pathlib.Path) -> None:
452	"""The StackError for deeply nested documents is raised in < 1 second."""
453	packed = self._make_deep_nested_msgpack(10_000)
454	path = tmp_path / "deep_nest_perf.msgpack"
455	path.write_bytes(packed)
456	from muse.core.io import _read_msgpack
457	start = time.perf_counter()
458	try:
459	_read_msgpack(path)
460	except Exception:
461	pass
462	elapsed = time.perf_counter() - start
463	assert elapsed < 1.0, (
464	f"Deeply nested document took {elapsed:.3f}s to fail — not fast enough"
465	)
466
467	def test_valid_large_map_within_limits_is_accepted(self, tmp_path: pathlib.Path) -> None:
468	"""A large but within-limit map (simulating a 1k-file snapshot) unpacks cleanly."""
469	# Simulate a 1000-file snapshot manifest: {path: object_id}
470	manifest = {f"src/file_{i:04d}.py": fake_id(f"obj-{i}") for i in range(1000)}
471	path = tmp_path / "big_valid.msgpack"
472	path.write_bytes(msgpack.packb(manifest, use_bin_type=True))
473	from muse.core.io import _read_msgpack
474	result = _read_msgpack(path)
475	assert isinstance(result, dict)
476	assert len(result) == 1000
477
478
479	# ---------------------------------------------------------------------------
480	# Tier 5 — index file protection
481	# ---------------------------------------------------------------------------
482
483	class TestIndexReadProtection:
484	"""muse/core/indices.py has its own _read_msgpack — must also be protected."""
485
486	def test_load_symbol_history_skips_oversized_index(
487	self, tmp_path: pathlib.Path
488	) -> None:
489	"""An oversized symbol history index returns an empty dict, not OOM."""
490	(indices_dir(tmp_path)).mkdir(parents=True)
491	index_path = indices_dir(tmp_path) / "symbol_history.msgpack"
492	index_path.write_bytes(b"\x00" * 101)
493	with patch("muse.core.indices._MAX_INDEX_BYTES", 100):
494	result = load_symbol_history(tmp_path)
495	assert result == {}, "Oversized index must return empty dict, not crash"
496
497	def test_load_hash_occurrence_skips_oversized_index(
498	self, tmp_path: pathlib.Path
499	) -> None:
500	"""An oversized hash_occurrence index returns an empty dict."""
501	(indices_dir(tmp_path)).mkdir(parents=True)
502	index_path = indices_dir(tmp_path) / "hash_occurrence.msgpack"
503	index_path.write_bytes(b"\x00" * 101)
504	with patch("muse.core.indices._MAX_INDEX_BYTES", 100):
505	result = load_hash_occurrence(tmp_path)
506	assert result == {}
507
508	def test_index_size_limit_is_more_generous_than_store(self) -> None:
509	"""Index files are allowed to be larger than store records."""
510	from muse.core.indices import _MAX_INDEX_BYTES
511	assert _MAX_INDEX_BYTES > MAX_MSGPACK_BYTES, (
512	"Index limit should be larger than store limit — indices grow with repo size"
513	)
514
515	def test_index_read_checks_stat_before_read_bytes(
516	self, tmp_path: pathlib.Path
517	) -> None:
518	"""The index stat check must fire before read_bytes (no allocation)."""
519	(indices_dir(tmp_path)).mkdir(parents=True)
520	index_path = indices_dir(tmp_path) / "symbol_history.msgpack"
521	index_path.write_bytes(b"\x85") # 1 byte — well within any size limit
522	read_bytes_called = [False]
523	real_rb = index_path.read_bytes
524
525	def tracking_rb() -> bytes:
526	read_bytes_called[0] = True
527	return real_rb()
528
529	stat_result = MagicMock()
530	stat_result.st_size = 1024 * 1024 * 1024 # 1 GiB — way over limit
531
532	with patch.object(type(index_path), "stat", return_value=stat_result):
533	with patch.object(type(index_path), "read_bytes", tracking_rb):
534	result = load_symbol_history(tmp_path)
535
536	assert result == {}
537	assert not read_bytes_called[0], "read_bytes was called before the stat check!"
538
539
540	# ---------------------------------------------------------------------------
541	# Tier 6 — warning log on oversized file
542	# ---------------------------------------------------------------------------
543
544	class TestWarningLogOnOversizedFile:
545	"""Operators need to know when oversized files are detected.
546
547	read_commit / read_snapshot log a WARNING when they catch the OSError
548	from _read_msgpack — this surfaces corruption or tampering in monitoring.
549	"""
550
551	def test_warning_logged_for_corrupt_commit(
552	self, tmp_path: pathlib.Path, caplog: pytest.LogCaptureFixture
553	) -> None:
554	"""CRITICAL is logged when a corrupt commit object is detected.
555
556	The old msgpack size-limit guard produced WARNING; the unified object store
557	produces CRITICAL for any corrupt content (consistent with read_commit behavior).
558	"""
559	root = _repo(tmp_path)
560	c = _commit(10)
561	write_commit(root, c)
562	_obj_path(root, c.commit_id).write_bytes(b"\x00" * 51)
563	with caplog.at_level(logging.WARNING, logger="muse.core.store"):
564	result = read_commit(root, c.commit_id)
565	assert result is None
566	assert any(
567	"Corrupt" in rec.message or "corrupt" in rec.message
568	for rec in caplog.records
569	), f"No log for corrupt commit. Records: {[r.message for r in caplog.records]}"
570
571	def test_warning_logged_for_corrupt_snapshot(
572	self, tmp_path: pathlib.Path, caplog: pytest.LogCaptureFixture
573	) -> None:
574	"""CRITICAL is logged when a corrupt snapshot object is detected."""
575	root = _repo(tmp_path)
576	s = _snapshot(10)
577	write_snapshot(root, s)
578	_obj_path(root, s.snapshot_id).write_bytes(b"\x00" * 51)
579	with caplog.at_level(logging.WARNING, logger="muse.core.store"):
580	result = read_snapshot(root, s.snapshot_id)
581	assert result is None
582	assert any(
583	"Corrupt" in rec.message or "corrupt" in rec.message
584	for rec in caplog.records
585	), f"No log for corrupt snapshot. Records: {[r.message for r in caplog.records]}"
586
587
588	# ---------------------------------------------------------------------------
589	# Tier 7 — CLI: clean JSON error, no traceback
590	# ---------------------------------------------------------------------------
591
592	class TestPlumbingReadCommitOversized:
593	"""muse read-commit with an oversized commit file must produce
594	a clean, machine-readable JSON error — no Python traceback, no process crash.
595	"""
596
597	def test_corrupt_commit_produces_json_error_not_traceback(
598	self, tmp_path: pathlib.Path
599	) -> None:
600	"""write a commit, corrupt its object store file, run read-commit — must get JSON error."""
601	import json
602	import sys
603	from tests.cli_test_helper import CliRunner
604
605	root = _repo(tmp_path)
606	c = _commit(99)
607	write_commit(root, c)
608
609	# Corrupt the commit object file (unified store).
610	_obj_path(root, c.commit_id).write_bytes(b"\x00" * 101)
611
612	runner = CliRunner()
613	result = runner.invoke(None, ["read-commit", c.commit_id],
614	env={"MUSE_REPO_ROOT": str(root)})
615
616	# Must not crash (exit code may be non-zero, but not a Python traceback).
617	assert "Traceback" not in (result.output or ""), (
618	f"CLI produced a Python traceback for oversized commit:\n{result.output}"
619	)
620	assert "Traceback" not in (result.stderr or ""), (
621	f"CLI stderr has a Python traceback:\n{result.stderr}"
622	)
623	# The error output must be valid JSON (or include a meaningful error).
624	combined = (result.output or "") + (result.stderr or "")
625	try:
626	# Check if any JSON blob exists in the output.
627	for line in combined.splitlines():
628	line = line.strip()
629	if line.startswith("{"):
630	parsed = json.loads(line)
631	assert "error" in parsed, f"JSON lacks 'error' key: {parsed}"
632	break
633	else:
634	# If no JSON line found, at minimum confirm no traceback and
635	# that "not found" or "error" appears in the output.
636	assert (
637	"not found" in combined.lower()
638	or "error" in combined.lower()
639	), f"No useful error in CLI output:\n{combined}"
640	except json.JSONDecodeError as exc:
641	pytest.fail(f"Output is not valid JSON: {exc}\nOutput:\n{combined}")
642
643
644	# ---------------------------------------------------------------------------
645	# Tier 8 — round-trip: valid files still read correctly
646	# ---------------------------------------------------------------------------
647
648	class TestValidFilesUnaffected:
649	"""The size guard must not regress normal reads."""
650
651	def test_read_commit_roundtrip_unaffected(self, tmp_path: pathlib.Path) -> None:
652	root = _repo(tmp_path)
653	c = _commit(42)
654	write_commit(root, c)
655	got = read_commit(root, c.commit_id)
656	assert got is not None
657	assert got.commit_id == c.commit_id
658	assert got.message == c.message
659
660	def test_read_snapshot_roundtrip_unaffected(self, tmp_path: pathlib.Path) -> None:
661	root = _repo(tmp_path)
662	s = _snapshot(42)
663	write_snapshot(root, s)
664	got = read_snapshot(root, s.snapshot_id)
665	assert got is not None
666	assert got.snapshot_id == s.snapshot_id
667
668	def test_snapshot_with_large_manifest_reads_correctly(
669	self, tmp_path: pathlib.Path
670	) -> None:
671	"""A 1000-file snapshot manifest (realistic scale) reads without issue."""
672	root = _repo(tmp_path)
673	manifest = {f"src/file_{i:05d}.py": fake_id(f"obj-{i}") for i in range(1000)}
674	sid = compute_snapshot_id(manifest)
675	s = SnapshotRecord(
676	snapshot_id=sid,
677	manifest=manifest,
678	)
679	write_snapshot(root, s)
680	got = read_snapshot(root, sid)
681	assert got is not None
682	assert len(got.manifest) == 1000
683
684	def test_commit_with_long_message_reads_correctly(
685	self, tmp_path: pathlib.Path
686	) -> None:
687	"""A commit with a 64 KiB message reads correctly (well within 1 MiB str limit)."""
688	root = _repo(tmp_path)
689	long_msg = "a" * 65536
690	committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
691	snapshot_id = compute_snapshot_id({})
692	cid = compute_commit_id(
693	parent_ids=[],
694	snapshot_id=snapshot_id,
695	message=long_msg,
696	committed_at_iso=committed_at.isoformat(),
697	author="tester",
698	)
699	c = CommitRecord(
700	commit_id=cid,
701	branch="main",
702	snapshot_id=snapshot_id,
703	message=long_msg,
704	committed_at=committed_at,
705	author="tester",
706	parent_commit_id=None,
707	parent2_commit_id=None,
708	)
709	write_commit(root, c)
710	got = read_commit(root, cid)
711	assert got is not None
712	assert len(got.message) == 65536
713
714
715	# ---------------------------------------------------------------------------
716	# Tier 9 — performance: size check adds < 1 ms per read
717	# ---------------------------------------------------------------------------
718
719	class TestSizeCheckPerformance:
720	"""The stat() check should add negligible overhead to normal reads."""
721
722	@pytest.mark.perf
723	def test_stat_check_overhead_under_1ms_per_read(
724	self, tmp_path: pathlib.Path
725	) -> None:
726	"""100 sequential read_commit calls with the size guard active < 100ms total."""
727	root = _repo(tmp_path)
728	commits = [_commit(i) for i in range(100)]
729	for c in commits:
730	write_commit(root, c)
731
732	start = time.perf_counter()
733	for c in commits:
734	result = read_commit(root, c.commit_id)
735	assert result is not None
736	elapsed = time.perf_counter() - start
737
738	assert elapsed < 0.1, (
739	f"100 read_commit calls took {elapsed:.3f}s — "
740	"size check is adding too much overhead (< 100ms expected)"
741	)
742
743	@pytest.mark.perf
744	def test_oversized_rejection_under_1ms(self, tmp_path: pathlib.Path) -> None:
745	"""Rejecting an oversized file (via stat) takes < 1ms — no disk I/O."""
746	root = _repo(tmp_path)
747	c = _commit(200)
748	write_commit(root, c)
749	path = commits_dir(root) / f"{c.commit_id}.msgpack"
750	path.write_bytes(b"\x00" * 101)
751
752	start = time.perf_counter()
753	with patch("muse.core.io.MAX_MSGPACK_BYTES", 100):
754	for _ in range(1000):
755	read_commit(root, c.commit_id)
756	elapsed = time.perf_counter() - start
757
758	assert elapsed < 1.0, (
759	f"1000 oversized-rejection calls took {elapsed:.3f}s (> 1ms each)"
760	)

File History 4 commits

sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago

sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago

sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago

sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor ⚠ 29 days ago

function _repo

function _commit

function _snapshot

function _tag

class TestConstantExport

function test_max_msgpack_bytes_is_exported

function test_max_msgpack_bytes_is_int

function test_max_msgpack_bytes_less_than_256mib

class TestStatCheckBeforeRead

function _oversized_stat

function test_read_commit_corrupt_object_returns_none

function test_read_snapshot_corrupt_object_returns_none

class TestReadFunctionsReturnNoneOnOversize

function test_read_commit_returns_none_for_corrupt_object

function test_read_snapshot_returns_none_for_corrupt_object

function test_get_all_tags_skips_oversized_files

function test_list_releases_skips_oversized_files

class TestExactBoundary

function test_file_exactly_at_limit_is_read

function test_file_one_byte_over_limit_raises_oslimit_error

function test_zero_byte_file_does_not_trigger_size_limit

function test_size_limit_error_message_includes_filename_and_limit

class TestPerValueUnpackLimits

function _pack_to_path

function test_string_exceeding_max_str_len_rejected

function test_string_within_max_str_len_accepted

function test_binary_blob_rejected_in_store_records

function test_map_exceeding_max_map_len_rejected

function test_array_exceeding_max_array_len_rejected

function _make_deep_nested_msgpack

function test_deeply_nested_map_raises_stack_error

function test_deeply_nested_terminates_quickly

function test_valid_large_map_within_limits_is_accepted

class TestIndexReadProtection

function test_load_symbol_history_skips_oversized_index

function test_load_hash_occurrence_skips_oversized_index

function test_index_size_limit_is_more_generous_than_store

function test_index_read_checks_stat_before_read_bytes

function tracking_rb

class TestWarningLogOnOversizedFile

function test_warning_logged_for_corrupt_commit

function test_warning_logged_for_corrupt_snapshot

class TestPlumbingReadCommitOversized

function test_corrupt_commit_produces_json_error_not_traceback

class TestValidFilesUnaffected

function test_read_commit_roundtrip_unaffected

function test_read_snapshot_roundtrip_unaffected

function test_snapshot_with_large_manifest_reads_correctly

function test_commit_with_long_message_reads_correctly

class TestSizeCheckPerformance

function test_stat_check_overhead_under_1ms_per_read

function test_oversized_rejection_under_1ms

Pathtests/test_integrity_I4_msgpack_size.py

Lines760

Size30.6 KB

LangPython

Refsha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2

Object ID

sha256:01c2876b4c3b39d3f5134748c023394b8f88269935e050ea4bd65896e9b5f8f7…

Last commit

sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2

fix: remove commit_exists filter from have anchor…

21 days ago

Quick links

Blame History