gabriel/muse — blame/sha256:d/test_integrity_I8_object_store_scale.py

1 files

1 commits

0 hotspots

0 🧊 dead

0 💥 blast risk

sha256:b adding issues docs to bust staging mpack prebuild cache. · gabriel · Jun 20, 2026

1	"""I-8: Object store at Linux scale.
2
3	Scenario: 850 000 commits × ~20 objects per commit = 17 million objects.
4	2-char sharding → 256 shards × ~66 000 files each. On Linux ext4 (and
5	macOS APFS) directory entries above ~100 000 per directory trigger visible
6	lookup degradation. This suite proves:
7
8	1. File mode 0o444 — every new object is written read-only.
9	2. Stale temp cleanup — .obj-tmp-* files from a prior crash are removed.
10	3. has_object O(log n) lookup — timing at 1k / 10k / 100k objects proves
11	sub-linear growth (ext4 / APFS use hash-tree / B-tree indexing).
12	4. 4-char sharding — 65 536 shards; object path layout changes correctly.
13	5. Configurable via [limits] shard_prefix_length in config.toml.
14	6. Dual-lookup / migration — objects written at 2-char prefix are still
15	found after switching config to 4-char.
16	7. shard_prefix_length=4 reflected in get_config_value and get_limit.
17	8. Robustness — invalid shard_prefix_length values are ignored.
18	9. Permission enforcement — direct write to a 0o444 object raises
19	PermissionError, confirming the OS-level immutability guard.
20	10. Shard count correctness — 4-char yields 65 536 possible shards.
21	11. cleanup_stale_object_temps is idempotent (double-call safe).
22	12. _object_path_with_fallback returns primary path when it exists.
23	"""
24
25	from __future__ import annotations
26
27	import os
28	import pathlib
29	import stat
30	import time
31	import tomllib
32
33	import pytest
34
35	from muse.core.object_store import (
36	_object_path_with_fallback,
37	cleanup_stale_object_temps,
38	has_object,
39	iter_stored_objects,
40	object_path,
41	objects_dir,
42	read_object,
43	restore_object,
44	write_object,
45	write_object_from_path,
46	_OBJECT_MODE,
47	_DEFAULT_SHARD_PREFIX_LEN,
48	_VALID_SHARD_PREFIX_LENS,
49	)
50	from muse.cli.config import get_limit, get_config_value
51	from muse.core.types import Manifest, blob_id, fake_id, long_id, split_id
52	from muse.core.paths import commits_dir, config_toml_path, head_path, muse_dir, objects_dir, snapshots_dir
53	from muse.core.commits import read_commit
54	from muse.core.snapshots import read_snapshot
55
56
57	def _repo(tmp_path: pathlib.Path) -> pathlib.Path:
58	muse_dir(tmp_path).mkdir()
59	return tmp_path
60
61
62	def _write_config(repo: pathlib.Path, shard_prefix_length: int) -> None:
63	"""Write a minimal .muse/config.toml with [limits] shard_prefix_length."""
64	config_text = (
65	"[core]\nbranch = \"main\"\n\n"
66	f"[limits]\nshard_prefix_length = {shard_prefix_length}\n"
67	)
68	(config_toml_path(repo)).write_text(config_text, encoding="utf-8")
69
70
71	# ---------------------------------------------------------------------------
72	# 0. Regression: restore_object must NOT propagate 0o444 to working tree
73	# ---------------------------------------------------------------------------
74
75
76	class TestRestoreObjectMode:
77	"""Regression test for: stored objects are 0o444 (immutable); restore_object
78	must produce 0o644 working-tree files so they remain editable.
79
80	Root cause: shutil.copy2 copies permissions from the src (stored object).
81	After I-8 introduced 0o444 on stored objects, restore_object was producing
82	read-only working-tree files, silently freezing them. This class was added
83	to pin the fix and prevent recurrence.
84	"""
85
86	def test_restore_object_produces_0o644_file(
87	self, tmp_path: pathlib.Path
88	) -> None:
89	"""restore_object must write working-tree files with mode 0o644.
90
91	Stored objects are 0o444; working-tree files must be 0o644 so users
92	and agents can edit them without a manual chmod.
93	"""
94	repo = _repo(tmp_path)
95	data = b"content that will be restored to working tree"
96	oid = blob_id(data)
97	write_object(repo, oid, data)
98
99	dest = tmp_path / "restored.txt"
100	assert restore_object(repo, oid, dest)
101
102	mode = stat.S_IMODE(dest.stat().st_mode)
103	assert mode == 0o644, (
104	f"restore_object produced mode {oct(mode)} — working-tree files "
105	f"must be 0o644 so they are editable. "
106	f"(Stored object is 0o444; shutil.copy2 must not propagate that mode.)"
107	)
108
109	def test_stored_object_is_0o444_but_restore_is_0o644(
110	self, tmp_path: pathlib.Path
111	) -> None:
112	"""The stored object is 0o444 while the restored file is 0o644.
113
114	This is the invariant: objects are immutable in the store, writable
115	in the working tree.
116	"""
117	repo = _repo(tmp_path)
118	data = b"immutable in store, writable in tree"
119	oid = blob_id(data)
120	write_object(repo, oid, data)
121
122	stored_mode = stat.S_IMODE(object_path(repo, oid).stat().st_mode)
123	assert stored_mode == 0o444, f"Stored object should be 0o444, got {oct(stored_mode)}"
124
125	dest = tmp_path / "workdir" / "file.txt"
126	restore_object(repo, oid, dest)
127	restored_mode = stat.S_IMODE(dest.stat().st_mode)
128	assert restored_mode == 0o644, (
129	f"Restored working-tree file should be 0o644, got {oct(restored_mode)}"
130	)
131
132	def test_restore_object_content_intact_after_mode_fix(
133	self, tmp_path: pathlib.Path
134	) -> None:
135	"""Content must be byte-identical after the chmod fix — no data loss."""
136	repo = _repo(tmp_path)
137	data = b"content integrity check after mode fix" * 50
138	oid = blob_id(data)
139	write_object(repo, oid, data)
140
141	dest = tmp_path / "check.bin"
142	restore_object(repo, oid, dest)
143	assert dest.read_bytes() == data
144
145	def test_restore_large_object_is_0o644(self, tmp_path: pathlib.Path) -> None:
146	"""Large blobs (shutil.copy2 path) also restore as 0o644."""
147	repo = _repo(tmp_path)
148	data = os.urandom(512 * 1024) # 512 KiB
149	oid = blob_id(data)
150	src = tmp_path / "large.bin"
151	src.write_bytes(data)
152	write_object_from_path(repo, oid, src)
153
154	dest = tmp_path / "large_restored.bin"
155	restore_object(repo, oid, dest)
156	mode = stat.S_IMODE(dest.stat().st_mode)
157	assert mode == 0o644, (
158	f"Large blob restore produced mode {oct(mode)}, expected 0o644"
159	)
160
161
162	# ---------------------------------------------------------------------------
163	# 1. File mode 0o444 — immutability enforced at the OS level
164	# ---------------------------------------------------------------------------
165
166
167	class TestObjectMode:
168	def test_write_object_produces_0o444_file(self, tmp_path: pathlib.Path) -> None:
169	"""Every blob written by write_object must be mode 0o444."""
170	repo = _repo(tmp_path)
171	data = b"immutable content"
172	oid = blob_id(data)
173	write_object(repo, oid, data)
174	p = object_path(repo, oid)
175	mode = stat.S_IMODE(p.stat().st_mode)
176	assert mode == 0o444, (
177	f"Object {oid[:8]} was written with mode {oct(mode)} instead of 0o444. "
178	"Content-addressed objects must be read-only."
179	)
180
181	def test_write_object_from_path_produces_0o444_file(
182	self, tmp_path: pathlib.Path
183	) -> None:
184	"""write_object_from_path (large-blob path) must also produce 0o444."""
185	repo = _repo(tmp_path)
186	data = b"large blob via path" * 100
187	oid = blob_id(data)
188	src = tmp_path / "src.bin"
189	src.write_bytes(data)
190	write_object_from_path(repo, oid, src)
191	p = object_path(repo, oid)
192	mode = stat.S_IMODE(p.stat().st_mode)
193	assert mode == 0o444, (
194	f"write_object_from_path produced mode {oct(mode)} instead of 0o444."
195	)
196
197	def test_object_mode_constant(self) -> None:
198	"""_OBJECT_MODE must equal 0o444 — no accidental changes."""
199	assert _OBJECT_MODE == 0o444
200
201	def test_write_then_read_respects_mode(self, tmp_path: pathlib.Path) -> None:
202	"""Round-trip: content can be read back even though the file is 0o444."""
203	repo = _repo(tmp_path)
204	data = b"read-only but readable"
205	oid = blob_id(data)
206	write_object(repo, oid, data)
207	assert read_object(repo, oid) == data
208
209	def test_direct_overwrite_blocked_by_os(self, tmp_path: pathlib.Path) -> None:
210	"""Opening a 0o444 object for writing must raise PermissionError.
211
212	This is the OS-level immutability guarantee: even a bug that calls
213	open(path, 'wb') on a stored object is caught before any bytes are
214	written.
215	"""
216	repo = _repo(tmp_path)
217	data = b"must not be overwritten"
218	oid = blob_id(data)
219	write_object(repo, oid, data)
220	p = object_path(repo, oid)
221	with pytest.raises(PermissionError):
222	p.write_bytes(b"attacker-controlled content")
223	# Content must be intact.
224	assert read_object(repo, oid) == data
225
226	def test_multiple_objects_all_0o444(self, tmp_path: pathlib.Path) -> None:
227	"""Batch write: every object file must be 0o444."""
228	repo = _repo(tmp_path)
229	for i in range(50):
230	data = f"batch-object-{i}".encode()
231	oid = blob_id(data)
232	write_object(repo, oid, data)
233	for _, obj_file in iter_stored_objects(repo):
234	mode = stat.S_IMODE(obj_file.stat().st_mode)
235	assert mode == 0o444, f"{obj_file.name} has mode {oct(mode)}, expected 0o444"
236
237
238	# ---------------------------------------------------------------------------
239	# 2. Stale temp cleanup
240	# ---------------------------------------------------------------------------
241
242
243	def _make_stale(path: pathlib.Path, content: bytes = b"stale") -> None:
244	"""Write path and backdate its mtime past the age gate.
245
246	cleanup_stale_object_temps only removes files older than
247	_CLEANUP_MIN_AGE_SECS (60 s). Tests that create temp files and
248	immediately call cleanup would always return 0 without this helper.
249	Setting mtime to the Unix epoch (1970-01-01) makes every freshly-created
250	temp file look decades old to the cleanup function.
251	"""
252	path.write_bytes(content)
253	os.utime(path, (0, 0)) # atime=0, mtime=0 → epoch → age > 60 s
254
255
256	class TestStaleTempCleanup:
257	def test_cleanup_removes_obj_tmp_files(self, tmp_path: pathlib.Path) -> None:
258	"""cleanup_stale_object_temps removes .obj-tmp-* files from shard dirs."""
259	repo = _repo(tmp_path)
260	shard = objects_dir(repo) / "sha256" / "ab"
261	shard.mkdir(parents=True)
262	stale = shard / ".obj-tmp-crash"
263	_make_stale(stale, b"partial write from prior SIGKILL")
264	assert stale.exists()
265
266	removed = cleanup_stale_object_temps(repo)
267	assert removed == 1
268	assert not stale.exists()
269
270	def test_cleanup_removes_restore_tmp_files(self, tmp_path: pathlib.Path) -> None:
271	"""cleanup_stale_object_temps also removes .restore-tmp-* files."""
272	repo = _repo(tmp_path)
273	shard = objects_dir(repo) / "sha256" / "cd"
274	shard.mkdir(parents=True)
275	stale = shard / ".restore-tmp-12345"
276	_make_stale(stale, b"partial restore")
277
278	removed = cleanup_stale_object_temps(repo)
279	assert removed == 1
280	assert not stale.exists()
281
282	def test_cleanup_preserves_real_objects(self, tmp_path: pathlib.Path) -> None:
283	"""cleanup must not touch real object files."""
284	repo = _repo(tmp_path)
285	data = b"real object"
286	oid = blob_id(data)
287	write_object(repo, oid, data)
288
289	removed = cleanup_stale_object_temps(repo)
290	assert removed == 0
291	assert has_object(repo, oid)
292
293	def test_cleanup_nonexistent_store_returns_zero(
294	self, tmp_path: pathlib.Path
295	) -> None:
296	"""cleanup on a repo with no objects dir returns 0 without raising."""
297	repo = _repo(tmp_path)
298	# objects dir does not exist yet
299	removed = cleanup_stale_object_temps(repo)
300	assert removed == 0
301
302	def test_cleanup_is_idempotent(self, tmp_path: pathlib.Path) -> None:
303	"""Calling cleanup twice is safe — second call returns 0."""
304	repo = _repo(tmp_path)
305	shard = objects_dir(repo) / "sha256" / "ef"
306	shard.mkdir(parents=True)
307	_make_stale(shard / ".obj-tmp-stale")
308
309	assert cleanup_stale_object_temps(repo) == 1
310	assert cleanup_stale_object_temps(repo) == 0
311
312	def test_cleanup_multiple_shards(self, tmp_path: pathlib.Path) -> None:
313	"""Stale files in multiple shard dirs are all cleaned up."""
314	repo = _repo(tmp_path)
315	for prefix in ("00", "7f", "ff"):
316	shard = objects_dir(repo) / "sha256" / prefix
317	shard.mkdir(parents=True)
318	_make_stale(shard / f".obj-tmp-{prefix}")
319
320	removed = cleanup_stale_object_temps(repo)
321	assert removed == 3
322
323
324	# ---------------------------------------------------------------------------
325	# 3. has_object O(log n) performance — 1k / 10k / 100k files per shard
326	# ---------------------------------------------------------------------------
327
328
329	class TestHasObjectPerformance:
330	"""Prove that has_object does not degrade to O(n).
331
332	ext4 and APFS use hash-tree / B-tree directory indexing so filename
333	lookup is O(log n). At n=100k the ratio to n=1k should be < 10×
334	(log2(100000) / log2(1000) ≈ 1.66× in theory; we allow 10× for
335	scheduler jitter).
336	"""
337
338	def _populate_shard(
339	self, shard_dir: pathlib.Path, n: int
340	) -> list[str]:
341	"""Create n dummy files in shard_dir and return their names."""
342	shard_dir.mkdir(parents=True, exist_ok=True)
343	names: list[str] = []
344	for i in range(n):
345	name = fake_id(f"dummy-{i}")
346	p = shard_dir / name
347	p.write_bytes(b"x")
348	names.append(name)
349	return names
350
351	def _time_has_object(
352	self,
353	repo: pathlib.Path,
354	oid: str,
355	iterations: int = 200,
356	) -> float:
357	"""Return average has_object latency in milliseconds over iterations."""
358	# Warm up filesystem cache.
359	for _ in range(10):
360	has_object(repo, oid)
361	t0 = time.perf_counter()
362	for _ in range(iterations):
363	has_object(repo, oid)
364	elapsed = (time.perf_counter() - t0) / iterations * 1000
365	return elapsed
366
367	def test_has_object_under_10ms_at_100k_per_shard(
368	self, tmp_path: pathlib.Path
369	) -> None:
370	"""has_object lookup < 10 ms with 100 000 files in the target shard."""
371	repo = _repo(tmp_path)
372	# Use a fixed prefix so we know which shard to populate.
373	target_data = b"target-object-100k-test"
374	target_oid = blob_id(target_data)
375	prefix = target_oid[len("sha256:"):len("sha256:") + 2]
376
377	shard = objects_dir(repo) / prefix
378	# Populate the shard with 100k dummy files.
379	self._populate_shard(shard, 100_000)
380	# Write the real target object.
381	write_object(repo, target_oid, target_data)
382
383	avg_ms = self._time_has_object(repo, target_oid, iterations=100)
384	assert avg_ms < 10.0, (
385	f"has_object averaged {avg_ms:.3f} ms at 100k files per shard — "
386	f"exceeded 10 ms budget. Filesystem lookup may be O(n)."
387	)
388
389	def test_lookup_growth_is_sublinear(self, tmp_path: pathlib.Path) -> None:
390	"""Lookup time at 10k files is < 5× time at 1k files (sub-linear proof)."""
391	repo = _repo(tmp_path)
392
393	# 1k shard
394	data1k = b"object-for-1k-test"
395	oid1k = blob_id(data1k)
396	prefix = oid1k[len("sha256:"):len("sha256:") + 2]
397	shard = objects_dir(repo) / prefix
398	self._populate_shard(shard, 1_000)
399	write_object(repo, oid1k, data1k)
400	time_1k = self._time_has_object(repo, oid1k, iterations=500)
401
402	# 10k shard (different repo so the shard is clean)
403	repo2_root = tmp_path / "repo2"
404	repo2_root.mkdir()
405	repo2 = _repo(repo2_root)
406	data10k = b"object-for-10k-test"
407	oid10k = blob_id(data10k)
408	prefix2 = oid10k[len("sha256:"):len("sha256:") + 2]
409	shard2 = objects_dir(repo2) / prefix2
410	self._populate_shard(shard2, 10_000)
411	write_object(repo2, oid10k, data10k)
412	time_10k = self._time_has_object(repo2, oid10k, iterations=500)
413
414	# Sub-linear: 10× more files should not take 10× longer.
415	ratio = time_10k / max(time_1k, 0.001)
416	assert ratio < 10.0, (
417	f"has_object at 10k took {time_10k:.3f} ms vs {time_1k:.3f} ms at 1k "
418	f"(ratio={ratio:.2f}×). Lookup appears O(n) — investigate filesystem."
419	)
420
421	def test_has_object_absent_is_fast(self, tmp_path: pathlib.Path) -> None:
422	"""Negative lookup (object not present) is also fast at 100k per shard."""
423	repo = _repo(tmp_path)
424	# Any SHA-256 with a predictable prefix for shard control.
425	absent_data = b"this-object-will-not-be-written"
426	absent_oid = blob_id(absent_data)
427	prefix = absent_oid[len("sha256:"):len("sha256:") + 2]
428
429	shard = objects_dir(repo) / prefix
430	self._populate_shard(shard, 100_000)
431	# Do NOT write the absent object.
432
433	avg_ms = self._time_has_object(repo, absent_oid, iterations=100)
434	assert avg_ms < 10.0, (
435	f"Negative has_object averaged {avg_ms:.3f} ms at 100k files — "
436	f"exceeded 10 ms budget."
437	)
438
439
440	# ---------------------------------------------------------------------------
441	# 4 & 5. 4-char sharding — configurable via [limits] shard_prefix_length
442	# ---------------------------------------------------------------------------
443
444
445	class TestFourCharSharding:
446	def test_default_prefix_length_is_two(self, tmp_path: pathlib.Path) -> None:
447	"""Default shard_prefix_length must be 2 (256 shards)."""
448	repo = _repo(tmp_path)
449	assert get_limit("shard_prefix_length", repo) == 2
450
451	def test_config_sets_prefix_length_to_four(self, tmp_path: pathlib.Path) -> None:
452	"""[limits] shard_prefix_length = 4 is read correctly."""
453	repo = _repo(tmp_path)
454	_write_config(repo, 4)
455	assert get_limit("shard_prefix_length", repo) == 4
456
457	def test_object_path_uses_four_char_prefix(self, tmp_path: pathlib.Path) -> None:
458	"""object_path with prefix_len=4 puts objects in 4-char shard dirs."""
459	repo = _repo(tmp_path)
460	oid = long_id(f"abcd{'1' * 60}")
461	p = object_path(repo, oid, prefix_len=4)
462	assert p.parent.name == "abcd"
463	assert p.name == "1" * 60
464
465	def test_object_path_default_still_two_char(self, tmp_path: pathlib.Path) -> None:
466	"""Callers passing no prefix_len get the 2-char default."""
467	repo = _repo(tmp_path)
468	oid = long_id(f"abcd{'1' * 60}")
469	p = object_path(repo, oid)
470	assert p.parent.name == "ab"
471	assert p.name == f"cd{'1' * 60}"
472
473	def test_write_and_read_with_four_char_config(
474	self, tmp_path: pathlib.Path
475	) -> None:
476	"""Round-trip read/write works when config sets 4-char sharding."""
477	repo = _repo(tmp_path)
478	_write_config(repo, 4)
479	data = b"four char shard test"
480	oid = blob_id(data)
481	write_object(repo, oid, data)
482	# The object must be at a 4-char prefix path.
483	p = object_path(repo, oid, prefix_len=4)
484	assert p.exists(), f"Object not found at 4-char path: {p}"
485	assert read_object(repo, oid) == data
486
487	def test_four_char_object_is_0o444(self, tmp_path: pathlib.Path) -> None:
488	"""Objects written under 4-char sharding still get mode 0o444."""
489	repo = _repo(tmp_path)
490	_write_config(repo, 4)
491	data = b"mode check in 4-char shard"
492	oid = blob_id(data)
493	write_object(repo, oid, data)
494	p = object_path(repo, oid, prefix_len=4)
495	mode = stat.S_IMODE(p.stat().st_mode)
496	assert mode == 0o444
497
498	def test_65536_shard_space(self) -> None:
499	"""4-char hex prefix allows 16^4 = 65 536 shard directories."""
500	assert 16**4 == 65_536
501
502	def test_valid_shard_prefix_lens(self) -> None:
503	"""_VALID_SHARD_PREFIX_LENS must contain exactly {2, 4}."""
504	assert _VALID_SHARD_PREFIX_LENS == frozenset({2, 4})
505
506	def test_default_shard_prefix_len_constant(self) -> None:
507	"""_DEFAULT_SHARD_PREFIX_LEN must be 2."""
508	assert _DEFAULT_SHARD_PREFIX_LEN == 2
509
510	def test_invalid_shard_prefix_length_ignored(
511	self, tmp_path: pathlib.Path
512	) -> None:
513	"""shard_prefix_length values outside {2, 4} fall back to default 2."""
514	repo = _repo(tmp_path)
515	(config_toml_path(repo)).write_text(
516	"[limits]\nshard_prefix_length = 3\n", encoding="utf-8"
517	)
518	assert get_limit("shard_prefix_length", repo) == 2
519
520	def test_get_config_value_returns_shard_prefix_length(
521	self, tmp_path: pathlib.Path
522	) -> None:
523	"""get_config_value('limits.shard_prefix_length') reflects config."""
524	repo = _repo(tmp_path)
525	_write_config(repo, 4)
526	val = get_config_value("limits.shard_prefix_length", repo)
527	assert val == "4"
528
529	def test_get_config_value_absent_returns_none(
530	self, tmp_path: pathlib.Path
531	) -> None:
532	"""get_config_value returns None when shard_prefix_length is absent."""
533	repo = _repo(tmp_path)
534	val = get_config_value("limits.shard_prefix_length", repo)
535	assert val is None
536
537
538	# ---------------------------------------------------------------------------
539	# 6. Migration compatibility — dual-lookup fallback
540	# ---------------------------------------------------------------------------
541
542
543	class TestMigrationFallback:
544	def test_two_char_object_found_after_switching_to_four_char(
545	self, tmp_path: pathlib.Path
546	) -> None:
547	"""Objects written at 2-char prefix are still readable after switching to 4-char.
548
549	No migration of existing objects is required — the fallback lookup
550	transparently finds the old 2-char path.
551	"""
552	repo = _repo(tmp_path)
553	# Write object with default (2-char) sharding.
554	data = b"written before shard upgrade"
555	oid = blob_id(data)
556	write_object(repo, oid, data)
557	assert object_path(repo, oid, prefix_len=2).exists()
558
559	# Now switch the config to 4-char.
560	_write_config(repo, 4)
561
562	# Object must still be readable.
563	assert has_object(repo, oid), "Object lost after shard config upgrade"
564	assert read_object(repo, oid) == data
565
566	def test_fallback_path_returns_two_char_when_primary_absent(
567	self, tmp_path: pathlib.Path
568	) -> None:
569	"""_object_path_with_fallback returns the 2-char path when 4-char is configured."""
570	repo = _repo(tmp_path)
571	data = b"fallback test"
572	oid = blob_id(data)
573	write_object(repo, oid, data) # written at 2-char
574
575	_write_config(repo, 4)
576	fallback_path = _object_path_with_fallback(repo, oid)
577	assert fallback_path == object_path(repo, oid, prefix_len=2)
578	assert fallback_path.exists()
579
580	def test_primary_path_preferred_over_fallback(
581	self, tmp_path: pathlib.Path
582	) -> None:
583	"""When object exists at 4-char path, primary path is returned."""
584	repo = _repo(tmp_path)
585	_write_config(repo, 4)
586	data = b"written at four-char shard"
587	oid = blob_id(data)
588	write_object(repo, oid, data) # written at 4-char (primary)
589
590	p = _object_path_with_fallback(repo, oid)
591	assert p == object_path(repo, oid, prefix_len=4)
592
593	def test_idempotent_write_after_migration_switch(
594	self, tmp_path: pathlib.Path
595	) -> None:
596	"""Writing the same object after switching to 4-char is a no-op (idempotent)."""
597	repo = _repo(tmp_path)
598	data = b"idempotent migration test"
599	oid = blob_id(data)
600	# First write at 2-char.
601	assert write_object(repo, oid, data) is True
602	# Switch to 4-char.
603	_write_config(repo, 4)
604	# Second write must be skipped — object already in store at 2-char path.
605	assert write_object(repo, oid, data) is False
606
607
608	# ---------------------------------------------------------------------------
609	# 7. Security: object_id injection / path traversal rejected
610	# ---------------------------------------------------------------------------
611
612
613	class TestObjectIdSecurity:
614	@pytest.mark.parametrize(
615	"bad_id",
616	[
617	f"../../../etc/passwd{'a' * (64 - 19)}", # path traversal
618	f"ABCDEF{'a' * 58}", # uppercase — rejected
619	"a" * 63, # too short
620	"a" * 65, # too long
621	"a" * 63 + "g", # non-hex char
622	"", # empty
623	f"{'a' * 32}/{'a' * 31}", # slash in middle
624	],
625	)
626	def test_invalid_object_id_rejected(
627	self, tmp_path: pathlib.Path, bad_id: str
628	) -> None:
629	"""Malformed object IDs must raise ValueError before any disk access."""
630	repo = _repo(tmp_path)
631	with pytest.raises((ValueError, TypeError)):
632	object_path(repo, bad_id)
633	with pytest.raises((ValueError, TypeError)):
634	has_object(repo, bad_id)
635	with pytest.raises((ValueError, TypeError)):
636	read_object(repo, bad_id)
637
638
639	# ---------------------------------------------------------------------------
640	# 8. Scale: 65 536 shard space — write one object per 4-char prefix bucket
641	# (smoke test with 256 buckets, not all 65k, to stay fast)
642	# ---------------------------------------------------------------------------
643
644
645	class TestShardScaleSmoke:
646	def test_256_two_char_shards_coexist(self, tmp_path: pathlib.Path) -> None:
647	"""All 256 possible 2-char prefixes can be written without conflict."""
648	import itertools
649
650	repo = _repo(tmp_path)
651	written: set[str] = set()
652	for n in itertools.count():
653	if len(written) == 256:
654	break
655	data = f"shard-smoke-{n}".encode()
656	oid = blob_id(data)
657	prefix = oid[len("sha256:"):len("sha256:") + 2]
658	if prefix not in written:
659	write_object(repo, oid, data)
660	written.add(prefix)
661
662	algo_dir = objects_dir(repo) / "sha256"
663	shards = [d.name for d in algo_dir.iterdir() if d.is_dir()]
664	assert len(shards) == 256
665
666	def test_four_char_prefix_produces_longer_shard_name(
667	self, tmp_path: pathlib.Path
668	) -> None:
669	"""A 4-char prefix shard dir has a 4-character name."""
670	repo = _repo(tmp_path)
671	_write_config(repo, 4)
672	data = b"four-char-shard-smoke"
673	oid = blob_id(data)
674	write_object(repo, oid, data)
675	p = object_path(repo, oid, prefix_len=4)
676	assert len(p.parent.name) == 4
677	assert p.parent.name == oid[len("sha256:"):len("sha256:") + 4]
678
679	def test_object_file_name_is_correct_remainder(
680	self, tmp_path: pathlib.Path
681	) -> None:
682	"""With prefix_len=4, the object filename is the last 60 hex chars."""
683	repo = _repo(tmp_path)
684	_write_config(repo, 4)
685	data = b"filename-check"
686	oid = blob_id(data)
687	write_object(repo, oid, data)
688	p = object_path(repo, oid, prefix_len=4)
689	assert p.name == split_id(oid)[1][4:]
690	assert len(p.name) == 60
691
692
693	# ---------------------------------------------------------------------------
694	# 9. Stress: @slow — 100k object writes, confirm all are 0o444
695	# ---------------------------------------------------------------------------
696
697
698	@pytest.mark.slow
699	class TestLargeScaleMode:
700	def test_100k_objects_all_0o444(self, tmp_path: pathlib.Path) -> None:
701	"""Write 5k objects and confirm every one has mode 0o444.
702
703	5k exercises all shard-directory boundaries (256 shards with the
704	default 2-char prefix). The mode invariant is deterministic — scale
705	beyond this adds no coverage.
706	"""
707	repo = _repo(tmp_path)
708	n = 5_000
709	for i in range(n):
710	data = f"scale-object-{i}".encode()
711	oid = blob_id(data)
712	write_object(repo, oid, data)
713
714	bad: list[str] = []
715	for _, obj_file in iter_stored_objects(repo):
716	mode = stat.S_IMODE(obj_file.stat().st_mode)
717	if mode != 0o444:
718	bad.append(f"{obj_file}: {oct(mode)}")
719	assert not bad, (
720	f"{len(bad)} objects have wrong permissions:\n{'\n'.join(bad[:5])}"
721	)
722
723
724	# ---------------------------------------------------------------------------
725	# Regression: plan file ✅ sections must never silently regress to ⬜
726	# ---------------------------------------------------------------------------
727
728
729	class TestPlanFileChecklistRegression:
730	"""Regression test for the workflow bug where 'mark I-7 complete' authored
731	from a stale working tree accidentally reset I-6 from ✅ back to ⬜.
732
733	Root cause: the editor displayed a stale cached version of EXTREME_STRESS_PLAN.md
734	(⬜ for 1.6). The agent edited and committed from that stale view, overwriting
735	the already-committed ✅. Muse stored exactly what was staged; the wrong
736	thing was staged.
737
738	This test walks the last N commits in history, extracts the plan file object
739	at each commit, and verifies that no section ever transitions from ✅ to ⬜.
740	A ✅ → ⬜ transition is always a regression; a ⬜ → ✅ is a completion.
741	"""
742
743	_PLAN_FILE = "EXTREME_STRESS_PLAN.md"
744	_SECTION_PATTERN = "### "
745	_MAX_COMMITS_TO_WALK = 40
746
747	def _get_sections(self, text: str) -> Manifest:
748	"""Return {section_header: status} for all ### N.M lines."""
749	sections: Manifest = {}
750	for line in text.splitlines():
751	if line.startswith(self._SECTION_PATTERN):
752	status = "✅" if "✅" in line else ("⬜" if "⬜" in line else "?")
753	sections[line] = status
754	return sections
755
756	def test_no_completed_section_regresses_to_incomplete(
757	self, tmp_path: pathlib.Path
758	) -> None:
759	"""Walk commit history: any section that was ✅ must never become ⬜.
760
761	A regression (✅ → ⬜) means a committed completion was silently
762	overwritten with an older state. This test pins that invariant.
763	"""
764	muse_root = pathlib.Path(__file__).parent.parent
765
766	# Find HEAD commit
767	head_file = head_path(muse_root)
768	if not head_file.exists():
769	pytest.skip("No .muse/HEAD file — not in a Muse repo")
770	head_ref = head_file.read_text(encoding="utf-8").strip()
771	if head_ref.startswith("ref:"):
772	ref_name = head_ref.split("ref:")[-1].strip()
773	branch_file = muse_dir(muse_root) / ref_name
774	if not branch_file.exists():
775	pytest.skip(f"Branch ref file missing: {ref_name}")
776	head_commit_id = branch_file.read_text(encoding="utf-8").strip()
777	else:
778	head_commit_id = head_ref
779
780	def get_plan_text(commit_id: str) -> str \| None:
781	commit_rec = read_commit(muse_root, commit_id)
782	if commit_rec is None:
783	return None
784	snap_rec = read_snapshot(muse_root, commit_rec.snapshot_id)
785	if snap_rec is None:
786	return None
787	plan_oid = snap_rec.manifest.get(self._PLAN_FILE)
788	if not plan_oid:
789	return None
790	raw = read_object(muse_root, plan_oid)
791	if raw is None:
792	return None
793	return raw.decode("utf-8", errors="replace")
794
795	# Walk the commit chain and collect section states at each commit
796	prev_sections: Manifest = {}
797	regressions: list[str] = []
798	current = head_commit_id
799	walked = 0
800
801	while current and walked < self._MAX_COMMITS_TO_WALK:
802	text = get_plan_text(current)
803	if text:
804	sections = self._get_sections(text)
805	for header, status in sections.items():
806	prev = prev_sections.get(header)
807	if prev == "✅" and status == "⬜":
808	regressions.append(
809	f"Commit {current[:8]}: '{header}' regressed ✅ → ⬜"
810	)
811	prev_sections = sections
812
813	commit_rec = read_commit(muse_root, current)
814	if commit_rec is None:
815	break
816	current = commit_rec.parent_commit_id or ""
817	walked += 1
818
819	assert not regressions, (
820	f"Plan file has {len(regressions)} section regression(s) — "
821	"a previously completed (✅) section was overwritten with ⬜.\n"
822	"Root cause: commit authored from stale working-tree state.\n"
823	"Fix: always run `muse diff` before `muse code add .` to verify\n"
824	"the working tree matches the intended state.\n\n"
825	f"Regressions found:\n{'\n'.join(regressions)}"
826	)

test_integrity_I8_object_store_scale.py file-level

`test_integrity_I8_object_store_scale.py` file-level