gabriel / muse public
test_integrity_I8_object_store_scale.py python
826 lines 32.1 KB
Raw
1 """I-8: Object store at Linux scale.
2
3 Scenario: 850 000 commits × ~20 objects per commit = 17 million objects.
4 2-char sharding → 256 shards × ~66 000 files each. On Linux ext4 (and
5 macOS APFS) directory entries above ~100 000 per directory trigger visible
6 lookup degradation. This suite proves:
7
8 1. File mode 0o444 — every new object is written read-only.
9 2. Stale temp cleanup — .obj-tmp-* files from a prior crash are removed.
10 3. has_object O(log n) lookup — timing at 1k / 10k / 100k objects proves
11 sub-linear growth (ext4 / APFS use hash-tree / B-tree indexing).
12 4. 4-char sharding — 65 536 shards; object path layout changes correctly.
13 5. Configurable via [limits] shard_prefix_length in config.toml.
14 6. Dual-lookup / migration — objects written at 2-char prefix are still
15 found after switching config to 4-char.
16 7. shard_prefix_length=4 reflected in get_config_value and get_limit.
17 8. Robustness — invalid shard_prefix_length values are ignored.
18 9. Permission enforcement — direct write to a 0o444 object raises
19 PermissionError, confirming the OS-level immutability guard.
20 10. Shard count correctness — 4-char yields 65 536 possible shards.
21 11. cleanup_stale_object_temps is idempotent (double-call safe).
22 12. _object_path_with_fallback returns primary path when it exists.
23 """
24
25 from __future__ import annotations
26
27 import os
28 import pathlib
29 import stat
30 import time
31 import tomllib
32
33 import pytest
34
35 from muse.core.object_store import (
36 _object_path_with_fallback,
37 cleanup_stale_object_temps,
38 has_object,
39 iter_stored_objects,
40 object_path,
41 objects_dir,
42 read_object,
43 restore_object,
44 write_object,
45 write_object_from_path,
46 _OBJECT_MODE,
47 _DEFAULT_SHARD_PREFIX_LEN,
48 _VALID_SHARD_PREFIX_LENS,
49 )
50 from muse.cli.config import get_limit, get_config_value
51 from muse.core.types import Manifest, blob_id, fake_id, long_id, split_id
52 from muse.core.paths import commits_dir, config_toml_path, head_path, muse_dir, objects_dir, snapshots_dir
53 from muse.core.commits import read_commit
54 from muse.core.snapshots import read_snapshot
55
56
57 def _repo(tmp_path: pathlib.Path) -> pathlib.Path:
58 muse_dir(tmp_path).mkdir()
59 return tmp_path
60
61
62 def _write_config(repo: pathlib.Path, shard_prefix_length: int) -> None:
63 """Write a minimal .muse/config.toml with [limits] shard_prefix_length."""
64 config_text = (
65 "[core]\nbranch = \"main\"\n\n"
66 f"[limits]\nshard_prefix_length = {shard_prefix_length}\n"
67 )
68 (config_toml_path(repo)).write_text(config_text, encoding="utf-8")
69
70
71 # ---------------------------------------------------------------------------
72 # 0. Regression: restore_object must NOT propagate 0o444 to working tree
73 # ---------------------------------------------------------------------------
74
75
76 class TestRestoreObjectMode:
77 """Regression test for: stored objects are 0o444 (immutable); restore_object
78 must produce 0o644 working-tree files so they remain editable.
79
80 Root cause: shutil.copy2 copies permissions from the src (stored object).
81 After I-8 introduced 0o444 on stored objects, restore_object was producing
82 read-only working-tree files, silently freezing them. This class was added
83 to pin the fix and prevent recurrence.
84 """
85
86 def test_restore_object_produces_0o644_file(
87 self, tmp_path: pathlib.Path
88 ) -> None:
89 """restore_object must write working-tree files with mode 0o644.
90
91 Stored objects are 0o444; working-tree files must be 0o644 so users
92 and agents can edit them without a manual chmod.
93 """
94 repo = _repo(tmp_path)
95 data = b"content that will be restored to working tree"
96 oid = blob_id(data)
97 write_object(repo, oid, data)
98
99 dest = tmp_path / "restored.txt"
100 assert restore_object(repo, oid, dest)
101
102 mode = stat.S_IMODE(dest.stat().st_mode)
103 assert mode == 0o644, (
104 f"restore_object produced mode {oct(mode)} — working-tree files "
105 f"must be 0o644 so they are editable. "
106 f"(Stored object is 0o444; shutil.copy2 must not propagate that mode.)"
107 )
108
109 def test_stored_object_is_0o444_but_restore_is_0o644(
110 self, tmp_path: pathlib.Path
111 ) -> None:
112 """The stored object is 0o444 while the restored file is 0o644.
113
114 This is the invariant: objects are immutable in the store, writable
115 in the working tree.
116 """
117 repo = _repo(tmp_path)
118 data = b"immutable in store, writable in tree"
119 oid = blob_id(data)
120 write_object(repo, oid, data)
121
122 stored_mode = stat.S_IMODE(object_path(repo, oid).stat().st_mode)
123 assert stored_mode == 0o444, f"Stored object should be 0o444, got {oct(stored_mode)}"
124
125 dest = tmp_path / "workdir" / "file.txt"
126 restore_object(repo, oid, dest)
127 restored_mode = stat.S_IMODE(dest.stat().st_mode)
128 assert restored_mode == 0o644, (
129 f"Restored working-tree file should be 0o644, got {oct(restored_mode)}"
130 )
131
132 def test_restore_object_content_intact_after_mode_fix(
133 self, tmp_path: pathlib.Path
134 ) -> None:
135 """Content must be byte-identical after the chmod fix — no data loss."""
136 repo = _repo(tmp_path)
137 data = b"content integrity check after mode fix" * 50
138 oid = blob_id(data)
139 write_object(repo, oid, data)
140
141 dest = tmp_path / "check.bin"
142 restore_object(repo, oid, dest)
143 assert dest.read_bytes() == data
144
145 def test_restore_large_object_is_0o644(self, tmp_path: pathlib.Path) -> None:
146 """Large blobs (shutil.copy2 path) also restore as 0o644."""
147 repo = _repo(tmp_path)
148 data = os.urandom(512 * 1024) # 512 KiB
149 oid = blob_id(data)
150 src = tmp_path / "large.bin"
151 src.write_bytes(data)
152 write_object_from_path(repo, oid, src)
153
154 dest = tmp_path / "large_restored.bin"
155 restore_object(repo, oid, dest)
156 mode = stat.S_IMODE(dest.stat().st_mode)
157 assert mode == 0o644, (
158 f"Large blob restore produced mode {oct(mode)}, expected 0o644"
159 )
160
161
162 # ---------------------------------------------------------------------------
163 # 1. File mode 0o444 — immutability enforced at the OS level
164 # ---------------------------------------------------------------------------
165
166
167 class TestObjectMode:
168 def test_write_object_produces_0o444_file(self, tmp_path: pathlib.Path) -> None:
169 """Every blob written by write_object must be mode 0o444."""
170 repo = _repo(tmp_path)
171 data = b"immutable content"
172 oid = blob_id(data)
173 write_object(repo, oid, data)
174 p = object_path(repo, oid)
175 mode = stat.S_IMODE(p.stat().st_mode)
176 assert mode == 0o444, (
177 f"Object {oid[:8]} was written with mode {oct(mode)} instead of 0o444. "
178 "Content-addressed objects must be read-only."
179 )
180
181 def test_write_object_from_path_produces_0o444_file(
182 self, tmp_path: pathlib.Path
183 ) -> None:
184 """write_object_from_path (large-blob path) must also produce 0o444."""
185 repo = _repo(tmp_path)
186 data = b"large blob via path" * 100
187 oid = blob_id(data)
188 src = tmp_path / "src.bin"
189 src.write_bytes(data)
190 write_object_from_path(repo, oid, src)
191 p = object_path(repo, oid)
192 mode = stat.S_IMODE(p.stat().st_mode)
193 assert mode == 0o444, (
194 f"write_object_from_path produced mode {oct(mode)} instead of 0o444."
195 )
196
197 def test_object_mode_constant(self) -> None:
198 """_OBJECT_MODE must equal 0o444 — no accidental changes."""
199 assert _OBJECT_MODE == 0o444
200
201 def test_write_then_read_respects_mode(self, tmp_path: pathlib.Path) -> None:
202 """Round-trip: content can be read back even though the file is 0o444."""
203 repo = _repo(tmp_path)
204 data = b"read-only but readable"
205 oid = blob_id(data)
206 write_object(repo, oid, data)
207 assert read_object(repo, oid) == data
208
209 def test_direct_overwrite_blocked_by_os(self, tmp_path: pathlib.Path) -> None:
210 """Opening a 0o444 object for writing must raise PermissionError.
211
212 This is the OS-level immutability guarantee: even a bug that calls
213 open(path, 'wb') on a stored object is caught before any bytes are
214 written.
215 """
216 repo = _repo(tmp_path)
217 data = b"must not be overwritten"
218 oid = blob_id(data)
219 write_object(repo, oid, data)
220 p = object_path(repo, oid)
221 with pytest.raises(PermissionError):
222 p.write_bytes(b"attacker-controlled content")
223 # Content must be intact.
224 assert read_object(repo, oid) == data
225
226 def test_multiple_objects_all_0o444(self, tmp_path: pathlib.Path) -> None:
227 """Batch write: every object file must be 0o444."""
228 repo = _repo(tmp_path)
229 for i in range(50):
230 data = f"batch-object-{i}".encode()
231 oid = blob_id(data)
232 write_object(repo, oid, data)
233 for _, obj_file in iter_stored_objects(repo):
234 mode = stat.S_IMODE(obj_file.stat().st_mode)
235 assert mode == 0o444, f"{obj_file.name} has mode {oct(mode)}, expected 0o444"
236
237
238 # ---------------------------------------------------------------------------
239 # 2. Stale temp cleanup
240 # ---------------------------------------------------------------------------
241
242
243 def _make_stale(path: pathlib.Path, content: bytes = b"stale") -> None:
244 """Write *path* and backdate its mtime past the age gate.
245
246 cleanup_stale_object_temps only removes files older than
247 _CLEANUP_MIN_AGE_SECS (60 s). Tests that create temp files and
248 immediately call cleanup would always return 0 without this helper.
249 Setting mtime to the Unix epoch (1970-01-01) makes every freshly-created
250 temp file look decades old to the cleanup function.
251 """
252 path.write_bytes(content)
253 os.utime(path, (0, 0)) # atime=0, mtime=0 → epoch → age > 60 s
254
255
256 class TestStaleTempCleanup:
257 def test_cleanup_removes_obj_tmp_files(self, tmp_path: pathlib.Path) -> None:
258 """cleanup_stale_object_temps removes .obj-tmp-* files from shard dirs."""
259 repo = _repo(tmp_path)
260 shard = objects_dir(repo) / "sha256" / "ab"
261 shard.mkdir(parents=True)
262 stale = shard / ".obj-tmp-crash"
263 _make_stale(stale, b"partial write from prior SIGKILL")
264 assert stale.exists()
265
266 removed = cleanup_stale_object_temps(repo)
267 assert removed == 1
268 assert not stale.exists()
269
270 def test_cleanup_removes_restore_tmp_files(self, tmp_path: pathlib.Path) -> None:
271 """cleanup_stale_object_temps also removes .restore-tmp-* files."""
272 repo = _repo(tmp_path)
273 shard = objects_dir(repo) / "sha256" / "cd"
274 shard.mkdir(parents=True)
275 stale = shard / ".restore-tmp-12345"
276 _make_stale(stale, b"partial restore")
277
278 removed = cleanup_stale_object_temps(repo)
279 assert removed == 1
280 assert not stale.exists()
281
282 def test_cleanup_preserves_real_objects(self, tmp_path: pathlib.Path) -> None:
283 """cleanup must not touch real object files."""
284 repo = _repo(tmp_path)
285 data = b"real object"
286 oid = blob_id(data)
287 write_object(repo, oid, data)
288
289 removed = cleanup_stale_object_temps(repo)
290 assert removed == 0
291 assert has_object(repo, oid)
292
293 def test_cleanup_nonexistent_store_returns_zero(
294 self, tmp_path: pathlib.Path
295 ) -> None:
296 """cleanup on a repo with no objects dir returns 0 without raising."""
297 repo = _repo(tmp_path)
298 # objects dir does not exist yet
299 removed = cleanup_stale_object_temps(repo)
300 assert removed == 0
301
302 def test_cleanup_is_idempotent(self, tmp_path: pathlib.Path) -> None:
303 """Calling cleanup twice is safe — second call returns 0."""
304 repo = _repo(tmp_path)
305 shard = objects_dir(repo) / "sha256" / "ef"
306 shard.mkdir(parents=True)
307 _make_stale(shard / ".obj-tmp-stale")
308
309 assert cleanup_stale_object_temps(repo) == 1
310 assert cleanup_stale_object_temps(repo) == 0
311
312 def test_cleanup_multiple_shards(self, tmp_path: pathlib.Path) -> None:
313 """Stale files in multiple shard dirs are all cleaned up."""
314 repo = _repo(tmp_path)
315 for prefix in ("00", "7f", "ff"):
316 shard = objects_dir(repo) / "sha256" / prefix
317 shard.mkdir(parents=True)
318 _make_stale(shard / f".obj-tmp-{prefix}")
319
320 removed = cleanup_stale_object_temps(repo)
321 assert removed == 3
322
323
324 # ---------------------------------------------------------------------------
325 # 3. has_object O(log n) performance — 1k / 10k / 100k files per shard
326 # ---------------------------------------------------------------------------
327
328
329 class TestHasObjectPerformance:
330 """Prove that has_object does not degrade to O(n).
331
332 ext4 and APFS use hash-tree / B-tree directory indexing so filename
333 lookup is O(log n). At n=100k the ratio to n=1k should be < 10×
334 (log2(100000) / log2(1000) ≈ 1.66× in theory; we allow 10× for
335 scheduler jitter).
336 """
337
338 def _populate_shard(
339 self, shard_dir: pathlib.Path, n: int
340 ) -> list[str]:
341 """Create n dummy files in *shard_dir* and return their names."""
342 shard_dir.mkdir(parents=True, exist_ok=True)
343 names: list[str] = []
344 for i in range(n):
345 name = fake_id(f"dummy-{i}")
346 p = shard_dir / name
347 p.write_bytes(b"x")
348 names.append(name)
349 return names
350
351 def _time_has_object(
352 self,
353 repo: pathlib.Path,
354 oid: str,
355 iterations: int = 200,
356 ) -> float:
357 """Return average has_object latency in milliseconds over *iterations*."""
358 # Warm up filesystem cache.
359 for _ in range(10):
360 has_object(repo, oid)
361 t0 = time.perf_counter()
362 for _ in range(iterations):
363 has_object(repo, oid)
364 elapsed = (time.perf_counter() - t0) / iterations * 1000
365 return elapsed
366
367 def test_has_object_under_10ms_at_100k_per_shard(
368 self, tmp_path: pathlib.Path
369 ) -> None:
370 """has_object lookup < 10 ms with 100 000 files in the target shard."""
371 repo = _repo(tmp_path)
372 # Use a fixed prefix so we know which shard to populate.
373 target_data = b"target-object-100k-test"
374 target_oid = blob_id(target_data)
375 prefix = target_oid[len("sha256:"):len("sha256:") + 2]
376
377 shard = objects_dir(repo) / prefix
378 # Populate the shard with 100k dummy files.
379 self._populate_shard(shard, 100_000)
380 # Write the real target object.
381 write_object(repo, target_oid, target_data)
382
383 avg_ms = self._time_has_object(repo, target_oid, iterations=100)
384 assert avg_ms < 10.0, (
385 f"has_object averaged {avg_ms:.3f} ms at 100k files per shard — "
386 f"exceeded 10 ms budget. Filesystem lookup may be O(n)."
387 )
388
389 def test_lookup_growth_is_sublinear(self, tmp_path: pathlib.Path) -> None:
390 """Lookup time at 10k files is < 5× time at 1k files (sub-linear proof)."""
391 repo = _repo(tmp_path)
392
393 # 1k shard
394 data1k = b"object-for-1k-test"
395 oid1k = blob_id(data1k)
396 prefix = oid1k[len("sha256:"):len("sha256:") + 2]
397 shard = objects_dir(repo) / prefix
398 self._populate_shard(shard, 1_000)
399 write_object(repo, oid1k, data1k)
400 time_1k = self._time_has_object(repo, oid1k, iterations=500)
401
402 # 10k shard (different repo so the shard is clean)
403 repo2_root = tmp_path / "repo2"
404 repo2_root.mkdir()
405 repo2 = _repo(repo2_root)
406 data10k = b"object-for-10k-test"
407 oid10k = blob_id(data10k)
408 prefix2 = oid10k[len("sha256:"):len("sha256:") + 2]
409 shard2 = objects_dir(repo2) / prefix2
410 self._populate_shard(shard2, 10_000)
411 write_object(repo2, oid10k, data10k)
412 time_10k = self._time_has_object(repo2, oid10k, iterations=500)
413
414 # Sub-linear: 10× more files should not take 10× longer.
415 ratio = time_10k / max(time_1k, 0.001)
416 assert ratio < 10.0, (
417 f"has_object at 10k took {time_10k:.3f} ms vs {time_1k:.3f} ms at 1k "
418 f"(ratio={ratio:.2f}×). Lookup appears O(n) — investigate filesystem."
419 )
420
421 def test_has_object_absent_is_fast(self, tmp_path: pathlib.Path) -> None:
422 """Negative lookup (object not present) is also fast at 100k per shard."""
423 repo = _repo(tmp_path)
424 # Any SHA-256 with a predictable prefix for shard control.
425 absent_data = b"this-object-will-not-be-written"
426 absent_oid = blob_id(absent_data)
427 prefix = absent_oid[len("sha256:"):len("sha256:") + 2]
428
429 shard = objects_dir(repo) / prefix
430 self._populate_shard(shard, 100_000)
431 # Do NOT write the absent object.
432
433 avg_ms = self._time_has_object(repo, absent_oid, iterations=100)
434 assert avg_ms < 10.0, (
435 f"Negative has_object averaged {avg_ms:.3f} ms at 100k files — "
436 f"exceeded 10 ms budget."
437 )
438
439
440 # ---------------------------------------------------------------------------
441 # 4 & 5. 4-char sharding — configurable via [limits] shard_prefix_length
442 # ---------------------------------------------------------------------------
443
444
445 class TestFourCharSharding:
446 def test_default_prefix_length_is_two(self, tmp_path: pathlib.Path) -> None:
447 """Default shard_prefix_length must be 2 (256 shards)."""
448 repo = _repo(tmp_path)
449 assert get_limit("shard_prefix_length", repo) == 2
450
451 def test_config_sets_prefix_length_to_four(self, tmp_path: pathlib.Path) -> None:
452 """[limits] shard_prefix_length = 4 is read correctly."""
453 repo = _repo(tmp_path)
454 _write_config(repo, 4)
455 assert get_limit("shard_prefix_length", repo) == 4
456
457 def test_object_path_uses_four_char_prefix(self, tmp_path: pathlib.Path) -> None:
458 """object_path with prefix_len=4 puts objects in 4-char shard dirs."""
459 repo = _repo(tmp_path)
460 oid = long_id(f"abcd{'1' * 60}")
461 p = object_path(repo, oid, prefix_len=4)
462 assert p.parent.name == "abcd"
463 assert p.name == "1" * 60
464
465 def test_object_path_default_still_two_char(self, tmp_path: pathlib.Path) -> None:
466 """Callers passing no prefix_len get the 2-char default."""
467 repo = _repo(tmp_path)
468 oid = long_id(f"abcd{'1' * 60}")
469 p = object_path(repo, oid)
470 assert p.parent.name == "ab"
471 assert p.name == f"cd{'1' * 60}"
472
473 def test_write_and_read_with_four_char_config(
474 self, tmp_path: pathlib.Path
475 ) -> None:
476 """Round-trip read/write works when config sets 4-char sharding."""
477 repo = _repo(tmp_path)
478 _write_config(repo, 4)
479 data = b"four char shard test"
480 oid = blob_id(data)
481 write_object(repo, oid, data)
482 # The object must be at a 4-char prefix path.
483 p = object_path(repo, oid, prefix_len=4)
484 assert p.exists(), f"Object not found at 4-char path: {p}"
485 assert read_object(repo, oid) == data
486
487 def test_four_char_object_is_0o444(self, tmp_path: pathlib.Path) -> None:
488 """Objects written under 4-char sharding still get mode 0o444."""
489 repo = _repo(tmp_path)
490 _write_config(repo, 4)
491 data = b"mode check in 4-char shard"
492 oid = blob_id(data)
493 write_object(repo, oid, data)
494 p = object_path(repo, oid, prefix_len=4)
495 mode = stat.S_IMODE(p.stat().st_mode)
496 assert mode == 0o444
497
498 def test_65536_shard_space(self) -> None:
499 """4-char hex prefix allows 16^4 = 65 536 shard directories."""
500 assert 16**4 == 65_536
501
502 def test_valid_shard_prefix_lens(self) -> None:
503 """_VALID_SHARD_PREFIX_LENS must contain exactly {2, 4}."""
504 assert _VALID_SHARD_PREFIX_LENS == frozenset({2, 4})
505
506 def test_default_shard_prefix_len_constant(self) -> None:
507 """_DEFAULT_SHARD_PREFIX_LEN must be 2."""
508 assert _DEFAULT_SHARD_PREFIX_LEN == 2
509
510 def test_invalid_shard_prefix_length_ignored(
511 self, tmp_path: pathlib.Path
512 ) -> None:
513 """shard_prefix_length values outside {2, 4} fall back to default 2."""
514 repo = _repo(tmp_path)
515 (config_toml_path(repo)).write_text(
516 "[limits]\nshard_prefix_length = 3\n", encoding="utf-8"
517 )
518 assert get_limit("shard_prefix_length", repo) == 2
519
520 def test_get_config_value_returns_shard_prefix_length(
521 self, tmp_path: pathlib.Path
522 ) -> None:
523 """get_config_value('limits.shard_prefix_length') reflects config."""
524 repo = _repo(tmp_path)
525 _write_config(repo, 4)
526 val = get_config_value("limits.shard_prefix_length", repo)
527 assert val == "4"
528
529 def test_get_config_value_absent_returns_none(
530 self, tmp_path: pathlib.Path
531 ) -> None:
532 """get_config_value returns None when shard_prefix_length is absent."""
533 repo = _repo(tmp_path)
534 val = get_config_value("limits.shard_prefix_length", repo)
535 assert val is None
536
537
538 # ---------------------------------------------------------------------------
539 # 6. Migration compatibility — dual-lookup fallback
540 # ---------------------------------------------------------------------------
541
542
543 class TestMigrationFallback:
544 def test_two_char_object_found_after_switching_to_four_char(
545 self, tmp_path: pathlib.Path
546 ) -> None:
547 """Objects written at 2-char prefix are still readable after switching to 4-char.
548
549 No migration of existing objects is required — the fallback lookup
550 transparently finds the old 2-char path.
551 """
552 repo = _repo(tmp_path)
553 # Write object with default (2-char) sharding.
554 data = b"written before shard upgrade"
555 oid = blob_id(data)
556 write_object(repo, oid, data)
557 assert object_path(repo, oid, prefix_len=2).exists()
558
559 # Now switch the config to 4-char.
560 _write_config(repo, 4)
561
562 # Object must still be readable.
563 assert has_object(repo, oid), "Object lost after shard config upgrade"
564 assert read_object(repo, oid) == data
565
566 def test_fallback_path_returns_two_char_when_primary_absent(
567 self, tmp_path: pathlib.Path
568 ) -> None:
569 """_object_path_with_fallback returns the 2-char path when 4-char is configured."""
570 repo = _repo(tmp_path)
571 data = b"fallback test"
572 oid = blob_id(data)
573 write_object(repo, oid, data) # written at 2-char
574
575 _write_config(repo, 4)
576 fallback_path = _object_path_with_fallback(repo, oid)
577 assert fallback_path == object_path(repo, oid, prefix_len=2)
578 assert fallback_path.exists()
579
580 def test_primary_path_preferred_over_fallback(
581 self, tmp_path: pathlib.Path
582 ) -> None:
583 """When object exists at 4-char path, primary path is returned."""
584 repo = _repo(tmp_path)
585 _write_config(repo, 4)
586 data = b"written at four-char shard"
587 oid = blob_id(data)
588 write_object(repo, oid, data) # written at 4-char (primary)
589
590 p = _object_path_with_fallback(repo, oid)
591 assert p == object_path(repo, oid, prefix_len=4)
592
593 def test_idempotent_write_after_migration_switch(
594 self, tmp_path: pathlib.Path
595 ) -> None:
596 """Writing the same object after switching to 4-char is a no-op (idempotent)."""
597 repo = _repo(tmp_path)
598 data = b"idempotent migration test"
599 oid = blob_id(data)
600 # First write at 2-char.
601 assert write_object(repo, oid, data) is True
602 # Switch to 4-char.
603 _write_config(repo, 4)
604 # Second write must be skipped — object already in store at 2-char path.
605 assert write_object(repo, oid, data) is False
606
607
608 # ---------------------------------------------------------------------------
609 # 7. Security: object_id injection / path traversal rejected
610 # ---------------------------------------------------------------------------
611
612
613 class TestObjectIdSecurity:
614 @pytest.mark.parametrize(
615 "bad_id",
616 [
617 f"../../../etc/passwd{'a' * (64 - 19)}", # path traversal
618 f"ABCDEF{'a' * 58}", # uppercase — rejected
619 "a" * 63, # too short
620 "a" * 65, # too long
621 "a" * 63 + "g", # non-hex char
622 "", # empty
623 f"{'a' * 32}/{'a' * 31}", # slash in middle
624 ],
625 )
626 def test_invalid_object_id_rejected(
627 self, tmp_path: pathlib.Path, bad_id: str
628 ) -> None:
629 """Malformed object IDs must raise ValueError before any disk access."""
630 repo = _repo(tmp_path)
631 with pytest.raises((ValueError, TypeError)):
632 object_path(repo, bad_id)
633 with pytest.raises((ValueError, TypeError)):
634 has_object(repo, bad_id)
635 with pytest.raises((ValueError, TypeError)):
636 read_object(repo, bad_id)
637
638
639 # ---------------------------------------------------------------------------
640 # 8. Scale: 65 536 shard space — write one object per 4-char prefix bucket
641 # (smoke test with 256 buckets, not all 65k, to stay fast)
642 # ---------------------------------------------------------------------------
643
644
645 class TestShardScaleSmoke:
646 def test_256_two_char_shards_coexist(self, tmp_path: pathlib.Path) -> None:
647 """All 256 possible 2-char prefixes can be written without conflict."""
648 import itertools
649
650 repo = _repo(tmp_path)
651 written: set[str] = set()
652 for n in itertools.count():
653 if len(written) == 256:
654 break
655 data = f"shard-smoke-{n}".encode()
656 oid = blob_id(data)
657 prefix = oid[len("sha256:"):len("sha256:") + 2]
658 if prefix not in written:
659 write_object(repo, oid, data)
660 written.add(prefix)
661
662 algo_dir = objects_dir(repo) / "sha256"
663 shards = [d.name for d in algo_dir.iterdir() if d.is_dir()]
664 assert len(shards) == 256
665
666 def test_four_char_prefix_produces_longer_shard_name(
667 self, tmp_path: pathlib.Path
668 ) -> None:
669 """A 4-char prefix shard dir has a 4-character name."""
670 repo = _repo(tmp_path)
671 _write_config(repo, 4)
672 data = b"four-char-shard-smoke"
673 oid = blob_id(data)
674 write_object(repo, oid, data)
675 p = object_path(repo, oid, prefix_len=4)
676 assert len(p.parent.name) == 4
677 assert p.parent.name == oid[len("sha256:"):len("sha256:") + 4]
678
679 def test_object_file_name_is_correct_remainder(
680 self, tmp_path: pathlib.Path
681 ) -> None:
682 """With prefix_len=4, the object filename is the last 60 hex chars."""
683 repo = _repo(tmp_path)
684 _write_config(repo, 4)
685 data = b"filename-check"
686 oid = blob_id(data)
687 write_object(repo, oid, data)
688 p = object_path(repo, oid, prefix_len=4)
689 assert p.name == split_id(oid)[1][4:]
690 assert len(p.name) == 60
691
692
693 # ---------------------------------------------------------------------------
694 # 9. Stress: @slow — 100k object writes, confirm all are 0o444
695 # ---------------------------------------------------------------------------
696
697
698 @pytest.mark.slow
699 class TestLargeScaleMode:
700 def test_100k_objects_all_0o444(self, tmp_path: pathlib.Path) -> None:
701 """Write 5k objects and confirm every one has mode 0o444.
702
703 5k exercises all shard-directory boundaries (256 shards with the
704 default 2-char prefix). The mode invariant is deterministic — scale
705 beyond this adds no coverage.
706 """
707 repo = _repo(tmp_path)
708 n = 5_000
709 for i in range(n):
710 data = f"scale-object-{i}".encode()
711 oid = blob_id(data)
712 write_object(repo, oid, data)
713
714 bad: list[str] = []
715 for _, obj_file in iter_stored_objects(repo):
716 mode = stat.S_IMODE(obj_file.stat().st_mode)
717 if mode != 0o444:
718 bad.append(f"{obj_file}: {oct(mode)}")
719 assert not bad, (
720 f"{len(bad)} objects have wrong permissions:\n{'\n'.join(bad[:5])}"
721 )
722
723
724 # ---------------------------------------------------------------------------
725 # Regression: plan file ✅ sections must never silently regress to ⬜
726 # ---------------------------------------------------------------------------
727
728
729 class TestPlanFileChecklistRegression:
730 """Regression test for the workflow bug where 'mark I-7 complete' authored
731 from a stale working tree accidentally reset I-6 from ✅ back to ⬜.
732
733 Root cause: the editor displayed a stale cached version of EXTREME_STRESS_PLAN.md
734 (⬜ for 1.6). The agent edited and committed from that stale view, overwriting
735 the already-committed ✅. Muse stored exactly what was staged; the wrong
736 thing was staged.
737
738 This test walks the last N commits in history, extracts the plan file object
739 at each commit, and verifies that no section ever transitions from ✅ to ⬜.
740 A ✅ → ⬜ transition is always a regression; a ⬜ → ✅ is a completion.
741 """
742
743 _PLAN_FILE = "EXTREME_STRESS_PLAN.md"
744 _SECTION_PATTERN = "### "
745 _MAX_COMMITS_TO_WALK = 40
746
747 def _get_sections(self, text: str) -> Manifest:
748 """Return {section_header: status} for all ### N.M lines."""
749 sections: Manifest = {}
750 for line in text.splitlines():
751 if line.startswith(self._SECTION_PATTERN):
752 status = "✅" if "✅" in line else ("⬜" if "⬜" in line else "?")
753 sections[line] = status
754 return sections
755
756 def test_no_completed_section_regresses_to_incomplete(
757 self, tmp_path: pathlib.Path
758 ) -> None:
759 """Walk commit history: any section that was ✅ must never become ⬜.
760
761 A regression (✅ → ⬜) means a committed completion was silently
762 overwritten with an older state. This test pins that invariant.
763 """
764 muse_root = pathlib.Path(__file__).parent.parent
765
766 # Find HEAD commit
767 head_file = head_path(muse_root)
768 if not head_file.exists():
769 pytest.skip("No .muse/HEAD file — not in a Muse repo")
770 head_ref = head_file.read_text(encoding="utf-8").strip()
771 if head_ref.startswith("ref:"):
772 ref_name = head_ref.split("ref:")[-1].strip()
773 branch_file = muse_dir(muse_root) / ref_name
774 if not branch_file.exists():
775 pytest.skip(f"Branch ref file missing: {ref_name}")
776 head_commit_id = branch_file.read_text(encoding="utf-8").strip()
777 else:
778 head_commit_id = head_ref
779
780 def get_plan_text(commit_id: str) -> str | None:
781 commit_rec = read_commit(muse_root, commit_id)
782 if commit_rec is None:
783 return None
784 snap_rec = read_snapshot(muse_root, commit_rec.snapshot_id)
785 if snap_rec is None:
786 return None
787 plan_oid = snap_rec.manifest.get(self._PLAN_FILE)
788 if not plan_oid:
789 return None
790 raw = read_object(muse_root, plan_oid)
791 if raw is None:
792 return None
793 return raw.decode("utf-8", errors="replace")
794
795 # Walk the commit chain and collect section states at each commit
796 prev_sections: Manifest = {}
797 regressions: list[str] = []
798 current = head_commit_id
799 walked = 0
800
801 while current and walked < self._MAX_COMMITS_TO_WALK:
802 text = get_plan_text(current)
803 if text:
804 sections = self._get_sections(text)
805 for header, status in sections.items():
806 prev = prev_sections.get(header)
807 if prev == "✅" and status == "⬜":
808 regressions.append(
809 f"Commit {current[:8]}: '{header}' regressed ✅ → ⬜"
810 )
811 prev_sections = sections
812
813 commit_rec = read_commit(muse_root, current)
814 if commit_rec is None:
815 break
816 current = commit_rec.parent_commit_id or ""
817 walked += 1
818
819 assert not regressions, (
820 f"Plan file has {len(regressions)} section regression(s) — "
821 "a previously completed (✅) section was overwritten with ⬜.\n"
822 "Root cause: commit authored from stale working-tree state.\n"
823 "Fix: always run `muse diff` before `muse code add .` to verify\n"
824 "the working tree matches the intended state.\n\n"
825 f"Regressions found:\n{'\n'.join(regressions)}"
826 )
File History 1 commit