test_integrity_I8_object_store_scale.py
file-level
1
files
1
commits
0
hotspots
0
π§ dead
0
π₯ blast risk
| 1 | """I-8: Object store at Linux scale. |
| 2 | |
| 3 | Scenario: 850 000 commits Γ ~20 objects per commit = 17 million objects. |
| 4 | 2-char sharding β 256 shards Γ ~66 000 files each. On Linux ext4 (and |
| 5 | macOS APFS) directory entries above ~100 000 per directory trigger visible |
| 6 | lookup degradation. This suite proves: |
| 7 | |
| 8 | 1. File mode 0o444 β every new object is written read-only. |
| 9 | 2. Stale temp cleanup β .obj-tmp-* files from a prior crash are removed. |
| 10 | 3. has_object O(log n) lookup β timing at 1k / 10k / 100k objects proves |
| 11 | sub-linear growth (ext4 / APFS use hash-tree / B-tree indexing). |
| 12 | 4. 4-char sharding β 65 536 shards; object path layout changes correctly. |
| 13 | 5. Configurable via [limits] shard_prefix_length in config.toml. |
| 14 | 6. Dual-lookup / migration β objects written at 2-char prefix are still |
| 15 | found after switching config to 4-char. |
| 16 | 7. shard_prefix_length=4 reflected in get_config_value and get_limit. |
| 17 | 8. Robustness β invalid shard_prefix_length values are ignored. |
| 18 | 9. Permission enforcement β direct write to a 0o444 object raises |
| 19 | PermissionError, confirming the OS-level immutability guard. |
| 20 | 10. Shard count correctness β 4-char yields 65 536 possible shards. |
| 21 | 11. cleanup_stale_object_temps is idempotent (double-call safe). |
| 22 | 12. _object_path_with_fallback returns primary path when it exists. |
| 23 | """ |
| 24 | |
| 25 | from __future__ import annotations |
| 26 | |
| 27 | import os |
| 28 | import pathlib |
| 29 | import stat |
| 30 | import time |
| 31 | import tomllib |
| 32 | |
| 33 | import pytest |
| 34 | |
| 35 | from muse.core.object_store import ( |
| 36 | _object_path_with_fallback, |
| 37 | cleanup_stale_object_temps, |
| 38 | has_object, |
| 39 | iter_stored_objects, |
| 40 | object_path, |
| 41 | objects_dir, |
| 42 | read_object, |
| 43 | restore_object, |
| 44 | write_object, |
| 45 | write_object_from_path, |
| 46 | _OBJECT_MODE, |
| 47 | _DEFAULT_SHARD_PREFIX_LEN, |
| 48 | _VALID_SHARD_PREFIX_LENS, |
| 49 | ) |
| 50 | from muse.cli.config import get_limit, get_config_value |
| 51 | from muse.core.types import Manifest, blob_id, fake_id, long_id, split_id |
| 52 | from muse.core.paths import commits_dir, config_toml_path, head_path, muse_dir, objects_dir, snapshots_dir |
| 53 | from muse.core.commits import read_commit |
| 54 | from muse.core.snapshots import read_snapshot |
| 55 | |
| 56 | |
| 57 | def _repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 58 | muse_dir(tmp_path).mkdir() |
| 59 | return tmp_path |
| 60 | |
| 61 | |
| 62 | def _write_config(repo: pathlib.Path, shard_prefix_length: int) -> None: |
| 63 | """Write a minimal .muse/config.toml with [limits] shard_prefix_length.""" |
| 64 | config_text = ( |
| 65 | "[core]\nbranch = \"main\"\n\n" |
| 66 | f"[limits]\nshard_prefix_length = {shard_prefix_length}\n" |
| 67 | ) |
| 68 | (config_toml_path(repo)).write_text(config_text, encoding="utf-8") |
| 69 | |
| 70 | |
| 71 | # --------------------------------------------------------------------------- |
| 72 | # 0. Regression: restore_object must NOT propagate 0o444 to working tree |
| 73 | # --------------------------------------------------------------------------- |
| 74 | |
| 75 | |
| 76 | class TestRestoreObjectMode: |
| 77 | """Regression test for: stored objects are 0o444 (immutable); restore_object |
| 78 | must produce 0o644 working-tree files so they remain editable. |
| 79 | |
| 80 | Root cause: shutil.copy2 copies permissions from the src (stored object). |
| 81 | After I-8 introduced 0o444 on stored objects, restore_object was producing |
| 82 | read-only working-tree files, silently freezing them. This class was added |
| 83 | to pin the fix and prevent recurrence. |
| 84 | """ |
| 85 | |
| 86 | def test_restore_object_produces_0o644_file( |
| 87 | self, tmp_path: pathlib.Path |
| 88 | ) -> None: |
| 89 | """restore_object must write working-tree files with mode 0o644. |
| 90 | |
| 91 | Stored objects are 0o444; working-tree files must be 0o644 so users |
| 92 | and agents can edit them without a manual chmod. |
| 93 | """ |
| 94 | repo = _repo(tmp_path) |
| 95 | data = b"content that will be restored to working tree" |
| 96 | oid = blob_id(data) |
| 97 | write_object(repo, oid, data) |
| 98 | |
| 99 | dest = tmp_path / "restored.txt" |
| 100 | assert restore_object(repo, oid, dest) |
| 101 | |
| 102 | mode = stat.S_IMODE(dest.stat().st_mode) |
| 103 | assert mode == 0o644, ( |
| 104 | f"restore_object produced mode {oct(mode)} β working-tree files " |
| 105 | f"must be 0o644 so they are editable. " |
| 106 | f"(Stored object is 0o444; shutil.copy2 must not propagate that mode.)" |
| 107 | ) |
| 108 | |
| 109 | def test_stored_object_is_0o444_but_restore_is_0o644( |
| 110 | self, tmp_path: pathlib.Path |
| 111 | ) -> None: |
| 112 | """The stored object is 0o444 while the restored file is 0o644. |
| 113 | |
| 114 | This is the invariant: objects are immutable in the store, writable |
| 115 | in the working tree. |
| 116 | """ |
| 117 | repo = _repo(tmp_path) |
| 118 | data = b"immutable in store, writable in tree" |
| 119 | oid = blob_id(data) |
| 120 | write_object(repo, oid, data) |
| 121 | |
| 122 | stored_mode = stat.S_IMODE(object_path(repo, oid).stat().st_mode) |
| 123 | assert stored_mode == 0o444, f"Stored object should be 0o444, got {oct(stored_mode)}" |
| 124 | |
| 125 | dest = tmp_path / "workdir" / "file.txt" |
| 126 | restore_object(repo, oid, dest) |
| 127 | restored_mode = stat.S_IMODE(dest.stat().st_mode) |
| 128 | assert restored_mode == 0o644, ( |
| 129 | f"Restored working-tree file should be 0o644, got {oct(restored_mode)}" |
| 130 | ) |
| 131 | |
| 132 | def test_restore_object_content_intact_after_mode_fix( |
| 133 | self, tmp_path: pathlib.Path |
| 134 | ) -> None: |
| 135 | """Content must be byte-identical after the chmod fix β no data loss.""" |
| 136 | repo = _repo(tmp_path) |
| 137 | data = b"content integrity check after mode fix" * 50 |
| 138 | oid = blob_id(data) |
| 139 | write_object(repo, oid, data) |
| 140 | |
| 141 | dest = tmp_path / "check.bin" |
| 142 | restore_object(repo, oid, dest) |
| 143 | assert dest.read_bytes() == data |
| 144 | |
| 145 | def test_restore_large_object_is_0o644(self, tmp_path: pathlib.Path) -> None: |
| 146 | """Large blobs (shutil.copy2 path) also restore as 0o644.""" |
| 147 | repo = _repo(tmp_path) |
| 148 | data = os.urandom(512 * 1024) # 512 KiB |
| 149 | oid = blob_id(data) |
| 150 | src = tmp_path / "large.bin" |
| 151 | src.write_bytes(data) |
| 152 | write_object_from_path(repo, oid, src) |
| 153 | |
| 154 | dest = tmp_path / "large_restored.bin" |
| 155 | restore_object(repo, oid, dest) |
| 156 | mode = stat.S_IMODE(dest.stat().st_mode) |
| 157 | assert mode == 0o644, ( |
| 158 | f"Large blob restore produced mode {oct(mode)}, expected 0o644" |
| 159 | ) |
| 160 | |
| 161 | |
| 162 | # --------------------------------------------------------------------------- |
| 163 | # 1. File mode 0o444 β immutability enforced at the OS level |
| 164 | # --------------------------------------------------------------------------- |
| 165 | |
| 166 | |
| 167 | class TestObjectMode: |
| 168 | def test_write_object_produces_0o444_file(self, tmp_path: pathlib.Path) -> None: |
| 169 | """Every blob written by write_object must be mode 0o444.""" |
| 170 | repo = _repo(tmp_path) |
| 171 | data = b"immutable content" |
| 172 | oid = blob_id(data) |
| 173 | write_object(repo, oid, data) |
| 174 | p = object_path(repo, oid) |
| 175 | mode = stat.S_IMODE(p.stat().st_mode) |
| 176 | assert mode == 0o444, ( |
| 177 | f"Object {oid[:8]} was written with mode {oct(mode)} instead of 0o444. " |
| 178 | "Content-addressed objects must be read-only." |
| 179 | ) |
| 180 | |
| 181 | def test_write_object_from_path_produces_0o444_file( |
| 182 | self, tmp_path: pathlib.Path |
| 183 | ) -> None: |
| 184 | """write_object_from_path (large-blob path) must also produce 0o444.""" |
| 185 | repo = _repo(tmp_path) |
| 186 | data = b"large blob via path" * 100 |
| 187 | oid = blob_id(data) |
| 188 | src = tmp_path / "src.bin" |
| 189 | src.write_bytes(data) |
| 190 | write_object_from_path(repo, oid, src) |
| 191 | p = object_path(repo, oid) |
| 192 | mode = stat.S_IMODE(p.stat().st_mode) |
| 193 | assert mode == 0o444, ( |
| 194 | f"write_object_from_path produced mode {oct(mode)} instead of 0o444." |
| 195 | ) |
| 196 | |
| 197 | def test_object_mode_constant(self) -> None: |
| 198 | """_OBJECT_MODE must equal 0o444 β no accidental changes.""" |
| 199 | assert _OBJECT_MODE == 0o444 |
| 200 | |
| 201 | def test_write_then_read_respects_mode(self, tmp_path: pathlib.Path) -> None: |
| 202 | """Round-trip: content can be read back even though the file is 0o444.""" |
| 203 | repo = _repo(tmp_path) |
| 204 | data = b"read-only but readable" |
| 205 | oid = blob_id(data) |
| 206 | write_object(repo, oid, data) |
| 207 | assert read_object(repo, oid) == data |
| 208 | |
| 209 | def test_direct_overwrite_blocked_by_os(self, tmp_path: pathlib.Path) -> None: |
| 210 | """Opening a 0o444 object for writing must raise PermissionError. |
| 211 | |
| 212 | This is the OS-level immutability guarantee: even a bug that calls |
| 213 | open(path, 'wb') on a stored object is caught before any bytes are |
| 214 | written. |
| 215 | """ |
| 216 | repo = _repo(tmp_path) |
| 217 | data = b"must not be overwritten" |
| 218 | oid = blob_id(data) |
| 219 | write_object(repo, oid, data) |
| 220 | p = object_path(repo, oid) |
| 221 | with pytest.raises(PermissionError): |
| 222 | p.write_bytes(b"attacker-controlled content") |
| 223 | # Content must be intact. |
| 224 | assert read_object(repo, oid) == data |
| 225 | |
| 226 | def test_multiple_objects_all_0o444(self, tmp_path: pathlib.Path) -> None: |
| 227 | """Batch write: every object file must be 0o444.""" |
| 228 | repo = _repo(tmp_path) |
| 229 | for i in range(50): |
| 230 | data = f"batch-object-{i}".encode() |
| 231 | oid = blob_id(data) |
| 232 | write_object(repo, oid, data) |
| 233 | for _, obj_file in iter_stored_objects(repo): |
| 234 | mode = stat.S_IMODE(obj_file.stat().st_mode) |
| 235 | assert mode == 0o444, f"{obj_file.name} has mode {oct(mode)}, expected 0o444" |
| 236 | |
| 237 | |
| 238 | # --------------------------------------------------------------------------- |
| 239 | # 2. Stale temp cleanup |
| 240 | # --------------------------------------------------------------------------- |
| 241 | |
| 242 | |
| 243 | def _make_stale(path: pathlib.Path, content: bytes = b"stale") -> None: |
| 244 | """Write *path* and backdate its mtime past the age gate. |
| 245 | |
| 246 | cleanup_stale_object_temps only removes files older than |
| 247 | _CLEANUP_MIN_AGE_SECS (60 s). Tests that create temp files and |
| 248 | immediately call cleanup would always return 0 without this helper. |
| 249 | Setting mtime to the Unix epoch (1970-01-01) makes every freshly-created |
| 250 | temp file look decades old to the cleanup function. |
| 251 | """ |
| 252 | path.write_bytes(content) |
| 253 | os.utime(path, (0, 0)) # atime=0, mtime=0 β epoch β age > 60 s |
| 254 | |
| 255 | |
| 256 | class TestStaleTempCleanup: |
| 257 | def test_cleanup_removes_obj_tmp_files(self, tmp_path: pathlib.Path) -> None: |
| 258 | """cleanup_stale_object_temps removes .obj-tmp-* files from shard dirs.""" |
| 259 | repo = _repo(tmp_path) |
| 260 | shard = objects_dir(repo) / "sha256" / "ab" |
| 261 | shard.mkdir(parents=True) |
| 262 | stale = shard / ".obj-tmp-crash" |
| 263 | _make_stale(stale, b"partial write from prior SIGKILL") |
| 264 | assert stale.exists() |
| 265 | |
| 266 | removed = cleanup_stale_object_temps(repo) |
| 267 | assert removed == 1 |
| 268 | assert not stale.exists() |
| 269 | |
| 270 | def test_cleanup_removes_restore_tmp_files(self, tmp_path: pathlib.Path) -> None: |
| 271 | """cleanup_stale_object_temps also removes .restore-tmp-* files.""" |
| 272 | repo = _repo(tmp_path) |
| 273 | shard = objects_dir(repo) / "sha256" / "cd" |
| 274 | shard.mkdir(parents=True) |
| 275 | stale = shard / ".restore-tmp-12345" |
| 276 | _make_stale(stale, b"partial restore") |
| 277 | |
| 278 | removed = cleanup_stale_object_temps(repo) |
| 279 | assert removed == 1 |
| 280 | assert not stale.exists() |
| 281 | |
| 282 | def test_cleanup_preserves_real_objects(self, tmp_path: pathlib.Path) -> None: |
| 283 | """cleanup must not touch real object files.""" |
| 284 | repo = _repo(tmp_path) |
| 285 | data = b"real object" |
| 286 | oid = blob_id(data) |
| 287 | write_object(repo, oid, data) |
| 288 | |
| 289 | removed = cleanup_stale_object_temps(repo) |
| 290 | assert removed == 0 |
| 291 | assert has_object(repo, oid) |
| 292 | |
| 293 | def test_cleanup_nonexistent_store_returns_zero( |
| 294 | self, tmp_path: pathlib.Path |
| 295 | ) -> None: |
| 296 | """cleanup on a repo with no objects dir returns 0 without raising.""" |
| 297 | repo = _repo(tmp_path) |
| 298 | # objects dir does not exist yet |
| 299 | removed = cleanup_stale_object_temps(repo) |
| 300 | assert removed == 0 |
| 301 | |
| 302 | def test_cleanup_is_idempotent(self, tmp_path: pathlib.Path) -> None: |
| 303 | """Calling cleanup twice is safe β second call returns 0.""" |
| 304 | repo = _repo(tmp_path) |
| 305 | shard = objects_dir(repo) / "sha256" / "ef" |
| 306 | shard.mkdir(parents=True) |
| 307 | _make_stale(shard / ".obj-tmp-stale") |
| 308 | |
| 309 | assert cleanup_stale_object_temps(repo) == 1 |
| 310 | assert cleanup_stale_object_temps(repo) == 0 |
| 311 | |
| 312 | def test_cleanup_multiple_shards(self, tmp_path: pathlib.Path) -> None: |
| 313 | """Stale files in multiple shard dirs are all cleaned up.""" |
| 314 | repo = _repo(tmp_path) |
| 315 | for prefix in ("00", "7f", "ff"): |
| 316 | shard = objects_dir(repo) / "sha256" / prefix |
| 317 | shard.mkdir(parents=True) |
| 318 | _make_stale(shard / f".obj-tmp-{prefix}") |
| 319 | |
| 320 | removed = cleanup_stale_object_temps(repo) |
| 321 | assert removed == 3 |
| 322 | |
| 323 | |
| 324 | # --------------------------------------------------------------------------- |
| 325 | # 3. has_object O(log n) performance β 1k / 10k / 100k files per shard |
| 326 | # --------------------------------------------------------------------------- |
| 327 | |
| 328 | |
| 329 | class TestHasObjectPerformance: |
| 330 | """Prove that has_object does not degrade to O(n). |
| 331 | |
| 332 | ext4 and APFS use hash-tree / B-tree directory indexing so filename |
| 333 | lookup is O(log n). At n=100k the ratio to n=1k should be < 10Γ |
| 334 | (log2(100000) / log2(1000) β 1.66Γ in theory; we allow 10Γ for |
| 335 | scheduler jitter). |
| 336 | """ |
| 337 | |
| 338 | def _populate_shard( |
| 339 | self, shard_dir: pathlib.Path, n: int |
| 340 | ) -> list[str]: |
| 341 | """Create n dummy files in *shard_dir* and return their names.""" |
| 342 | shard_dir.mkdir(parents=True, exist_ok=True) |
| 343 | names: list[str] = [] |
| 344 | for i in range(n): |
| 345 | name = fake_id(f"dummy-{i}") |
| 346 | p = shard_dir / name |
| 347 | p.write_bytes(b"x") |
| 348 | names.append(name) |
| 349 | return names |
| 350 | |
| 351 | def _time_has_object( |
| 352 | self, |
| 353 | repo: pathlib.Path, |
| 354 | oid: str, |
| 355 | iterations: int = 200, |
| 356 | ) -> float: |
| 357 | """Return average has_object latency in milliseconds over *iterations*.""" |
| 358 | # Warm up filesystem cache. |
| 359 | for _ in range(10): |
| 360 | has_object(repo, oid) |
| 361 | t0 = time.perf_counter() |
| 362 | for _ in range(iterations): |
| 363 | has_object(repo, oid) |
| 364 | elapsed = (time.perf_counter() - t0) / iterations * 1000 |
| 365 | return elapsed |
| 366 | |
| 367 | def test_has_object_under_10ms_at_100k_per_shard( |
| 368 | self, tmp_path: pathlib.Path |
| 369 | ) -> None: |
| 370 | """has_object lookup < 10 ms with 100 000 files in the target shard.""" |
| 371 | repo = _repo(tmp_path) |
| 372 | # Use a fixed prefix so we know which shard to populate. |
| 373 | target_data = b"target-object-100k-test" |
| 374 | target_oid = blob_id(target_data) |
| 375 | prefix = target_oid[len("sha256:"):len("sha256:") + 2] |
| 376 | |
| 377 | shard = objects_dir(repo) / prefix |
| 378 | # Populate the shard with 100k dummy files. |
| 379 | self._populate_shard(shard, 100_000) |
| 380 | # Write the real target object. |
| 381 | write_object(repo, target_oid, target_data) |
| 382 | |
| 383 | avg_ms = self._time_has_object(repo, target_oid, iterations=100) |
| 384 | assert avg_ms < 10.0, ( |
| 385 | f"has_object averaged {avg_ms:.3f} ms at 100k files per shard β " |
| 386 | f"exceeded 10 ms budget. Filesystem lookup may be O(n)." |
| 387 | ) |
| 388 | |
| 389 | def test_lookup_growth_is_sublinear(self, tmp_path: pathlib.Path) -> None: |
| 390 | """Lookup time at 10k files is < 5Γ time at 1k files (sub-linear proof).""" |
| 391 | repo = _repo(tmp_path) |
| 392 | |
| 393 | # 1k shard |
| 394 | data1k = b"object-for-1k-test" |
| 395 | oid1k = blob_id(data1k) |
| 396 | prefix = oid1k[len("sha256:"):len("sha256:") + 2] |
| 397 | shard = objects_dir(repo) / prefix |
| 398 | self._populate_shard(shard, 1_000) |
| 399 | write_object(repo, oid1k, data1k) |
| 400 | time_1k = self._time_has_object(repo, oid1k, iterations=500) |
| 401 | |
| 402 | # 10k shard (different repo so the shard is clean) |
| 403 | repo2_root = tmp_path / "repo2" |
| 404 | repo2_root.mkdir() |
| 405 | repo2 = _repo(repo2_root) |
| 406 | data10k = b"object-for-10k-test" |
| 407 | oid10k = blob_id(data10k) |
| 408 | prefix2 = oid10k[len("sha256:"):len("sha256:") + 2] |
| 409 | shard2 = objects_dir(repo2) / prefix2 |
| 410 | self._populate_shard(shard2, 10_000) |
| 411 | write_object(repo2, oid10k, data10k) |
| 412 | time_10k = self._time_has_object(repo2, oid10k, iterations=500) |
| 413 | |
| 414 | # Sub-linear: 10Γ more files should not take 10Γ longer. |
| 415 | ratio = time_10k / max(time_1k, 0.001) |
| 416 | assert ratio < 10.0, ( |
| 417 | f"has_object at 10k took {time_10k:.3f} ms vs {time_1k:.3f} ms at 1k " |
| 418 | f"(ratio={ratio:.2f}Γ). Lookup appears O(n) β investigate filesystem." |
| 419 | ) |
| 420 | |
| 421 | def test_has_object_absent_is_fast(self, tmp_path: pathlib.Path) -> None: |
| 422 | """Negative lookup (object not present) is also fast at 100k per shard.""" |
| 423 | repo = _repo(tmp_path) |
| 424 | # Any SHA-256 with a predictable prefix for shard control. |
| 425 | absent_data = b"this-object-will-not-be-written" |
| 426 | absent_oid = blob_id(absent_data) |
| 427 | prefix = absent_oid[len("sha256:"):len("sha256:") + 2] |
| 428 | |
| 429 | shard = objects_dir(repo) / prefix |
| 430 | self._populate_shard(shard, 100_000) |
| 431 | # Do NOT write the absent object. |
| 432 | |
| 433 | avg_ms = self._time_has_object(repo, absent_oid, iterations=100) |
| 434 | assert avg_ms < 10.0, ( |
| 435 | f"Negative has_object averaged {avg_ms:.3f} ms at 100k files β " |
| 436 | f"exceeded 10 ms budget." |
| 437 | ) |
| 438 | |
| 439 | |
| 440 | # --------------------------------------------------------------------------- |
| 441 | # 4 & 5. 4-char sharding β configurable via [limits] shard_prefix_length |
| 442 | # --------------------------------------------------------------------------- |
| 443 | |
| 444 | |
| 445 | class TestFourCharSharding: |
| 446 | def test_default_prefix_length_is_two(self, tmp_path: pathlib.Path) -> None: |
| 447 | """Default shard_prefix_length must be 2 (256 shards).""" |
| 448 | repo = _repo(tmp_path) |
| 449 | assert get_limit("shard_prefix_length", repo) == 2 |
| 450 | |
| 451 | def test_config_sets_prefix_length_to_four(self, tmp_path: pathlib.Path) -> None: |
| 452 | """[limits] shard_prefix_length = 4 is read correctly.""" |
| 453 | repo = _repo(tmp_path) |
| 454 | _write_config(repo, 4) |
| 455 | assert get_limit("shard_prefix_length", repo) == 4 |
| 456 | |
| 457 | def test_object_path_uses_four_char_prefix(self, tmp_path: pathlib.Path) -> None: |
| 458 | """object_path with prefix_len=4 puts objects in 4-char shard dirs.""" |
| 459 | repo = _repo(tmp_path) |
| 460 | oid = long_id(f"abcd{'1' * 60}") |
| 461 | p = object_path(repo, oid, prefix_len=4) |
| 462 | assert p.parent.name == "abcd" |
| 463 | assert p.name == "1" * 60 |
| 464 | |
| 465 | def test_object_path_default_still_two_char(self, tmp_path: pathlib.Path) -> None: |
| 466 | """Callers passing no prefix_len get the 2-char default.""" |
| 467 | repo = _repo(tmp_path) |
| 468 | oid = long_id(f"abcd{'1' * 60}") |
| 469 | p = object_path(repo, oid) |
| 470 | assert p.parent.name == "ab" |
| 471 | assert p.name == f"cd{'1' * 60}" |
| 472 | |
| 473 | def test_write_and_read_with_four_char_config( |
| 474 | self, tmp_path: pathlib.Path |
| 475 | ) -> None: |
| 476 | """Round-trip read/write works when config sets 4-char sharding.""" |
| 477 | repo = _repo(tmp_path) |
| 478 | _write_config(repo, 4) |
| 479 | data = b"four char shard test" |
| 480 | oid = blob_id(data) |
| 481 | write_object(repo, oid, data) |
| 482 | # The object must be at a 4-char prefix path. |
| 483 | p = object_path(repo, oid, prefix_len=4) |
| 484 | assert p.exists(), f"Object not found at 4-char path: {p}" |
| 485 | assert read_object(repo, oid) == data |
| 486 | |
| 487 | def test_four_char_object_is_0o444(self, tmp_path: pathlib.Path) -> None: |
| 488 | """Objects written under 4-char sharding still get mode 0o444.""" |
| 489 | repo = _repo(tmp_path) |
| 490 | _write_config(repo, 4) |
| 491 | data = b"mode check in 4-char shard" |
| 492 | oid = blob_id(data) |
| 493 | write_object(repo, oid, data) |
| 494 | p = object_path(repo, oid, prefix_len=4) |
| 495 | mode = stat.S_IMODE(p.stat().st_mode) |
| 496 | assert mode == 0o444 |
| 497 | |
| 498 | def test_65536_shard_space(self) -> None: |
| 499 | """4-char hex prefix allows 16^4 = 65 536 shard directories.""" |
| 500 | assert 16**4 == 65_536 |
| 501 | |
| 502 | def test_valid_shard_prefix_lens(self) -> None: |
| 503 | """_VALID_SHARD_PREFIX_LENS must contain exactly {2, 4}.""" |
| 504 | assert _VALID_SHARD_PREFIX_LENS == frozenset({2, 4}) |
| 505 | |
| 506 | def test_default_shard_prefix_len_constant(self) -> None: |
| 507 | """_DEFAULT_SHARD_PREFIX_LEN must be 2.""" |
| 508 | assert _DEFAULT_SHARD_PREFIX_LEN == 2 |
| 509 | |
| 510 | def test_invalid_shard_prefix_length_ignored( |
| 511 | self, tmp_path: pathlib.Path |
| 512 | ) -> None: |
| 513 | """shard_prefix_length values outside {2, 4} fall back to default 2.""" |
| 514 | repo = _repo(tmp_path) |
| 515 | (config_toml_path(repo)).write_text( |
| 516 | "[limits]\nshard_prefix_length = 3\n", encoding="utf-8" |
| 517 | ) |
| 518 | assert get_limit("shard_prefix_length", repo) == 2 |
| 519 | |
| 520 | def test_get_config_value_returns_shard_prefix_length( |
| 521 | self, tmp_path: pathlib.Path |
| 522 | ) -> None: |
| 523 | """get_config_value('limits.shard_prefix_length') reflects config.""" |
| 524 | repo = _repo(tmp_path) |
| 525 | _write_config(repo, 4) |
| 526 | val = get_config_value("limits.shard_prefix_length", repo) |
| 527 | assert val == "4" |
| 528 | |
| 529 | def test_get_config_value_absent_returns_none( |
| 530 | self, tmp_path: pathlib.Path |
| 531 | ) -> None: |
| 532 | """get_config_value returns None when shard_prefix_length is absent.""" |
| 533 | repo = _repo(tmp_path) |
| 534 | val = get_config_value("limits.shard_prefix_length", repo) |
| 535 | assert val is None |
| 536 | |
| 537 | |
| 538 | # --------------------------------------------------------------------------- |
| 539 | # 6. Migration compatibility β dual-lookup fallback |
| 540 | # --------------------------------------------------------------------------- |
| 541 | |
| 542 | |
| 543 | class TestMigrationFallback: |
| 544 | def test_two_char_object_found_after_switching_to_four_char( |
| 545 | self, tmp_path: pathlib.Path |
| 546 | ) -> None: |
| 547 | """Objects written at 2-char prefix are still readable after switching to 4-char. |
| 548 | |
| 549 | No migration of existing objects is required β the fallback lookup |
| 550 | transparently finds the old 2-char path. |
| 551 | """ |
| 552 | repo = _repo(tmp_path) |
| 553 | # Write object with default (2-char) sharding. |
| 554 | data = b"written before shard upgrade" |
| 555 | oid = blob_id(data) |
| 556 | write_object(repo, oid, data) |
| 557 | assert object_path(repo, oid, prefix_len=2).exists() |
| 558 | |
| 559 | # Now switch the config to 4-char. |
| 560 | _write_config(repo, 4) |
| 561 | |
| 562 | # Object must still be readable. |
| 563 | assert has_object(repo, oid), "Object lost after shard config upgrade" |
| 564 | assert read_object(repo, oid) == data |
| 565 | |
| 566 | def test_fallback_path_returns_two_char_when_primary_absent( |
| 567 | self, tmp_path: pathlib.Path |
| 568 | ) -> None: |
| 569 | """_object_path_with_fallback returns the 2-char path when 4-char is configured.""" |
| 570 | repo = _repo(tmp_path) |
| 571 | data = b"fallback test" |
| 572 | oid = blob_id(data) |
| 573 | write_object(repo, oid, data) # written at 2-char |
| 574 | |
| 575 | _write_config(repo, 4) |
| 576 | fallback_path = _object_path_with_fallback(repo, oid) |
| 577 | assert fallback_path == object_path(repo, oid, prefix_len=2) |
| 578 | assert fallback_path.exists() |
| 579 | |
| 580 | def test_primary_path_preferred_over_fallback( |
| 581 | self, tmp_path: pathlib.Path |
| 582 | ) -> None: |
| 583 | """When object exists at 4-char path, primary path is returned.""" |
| 584 | repo = _repo(tmp_path) |
| 585 | _write_config(repo, 4) |
| 586 | data = b"written at four-char shard" |
| 587 | oid = blob_id(data) |
| 588 | write_object(repo, oid, data) # written at 4-char (primary) |
| 589 | |
| 590 | p = _object_path_with_fallback(repo, oid) |
| 591 | assert p == object_path(repo, oid, prefix_len=4) |
| 592 | |
| 593 | def test_idempotent_write_after_migration_switch( |
| 594 | self, tmp_path: pathlib.Path |
| 595 | ) -> None: |
| 596 | """Writing the same object after switching to 4-char is a no-op (idempotent).""" |
| 597 | repo = _repo(tmp_path) |
| 598 | data = b"idempotent migration test" |
| 599 | oid = blob_id(data) |
| 600 | # First write at 2-char. |
| 601 | assert write_object(repo, oid, data) is True |
| 602 | # Switch to 4-char. |
| 603 | _write_config(repo, 4) |
| 604 | # Second write must be skipped β object already in store at 2-char path. |
| 605 | assert write_object(repo, oid, data) is False |
| 606 | |
| 607 | |
| 608 | # --------------------------------------------------------------------------- |
| 609 | # 7. Security: object_id injection / path traversal rejected |
| 610 | # --------------------------------------------------------------------------- |
| 611 | |
| 612 | |
| 613 | class TestObjectIdSecurity: |
| 614 | @pytest.mark.parametrize( |
| 615 | "bad_id", |
| 616 | [ |
| 617 | f"../../../etc/passwd{'a' * (64 - 19)}", # path traversal |
| 618 | f"ABCDEF{'a' * 58}", # uppercase β rejected |
| 619 | "a" * 63, # too short |
| 620 | "a" * 65, # too long |
| 621 | "a" * 63 + "g", # non-hex char |
| 622 | "", # empty |
| 623 | f"{'a' * 32}/{'a' * 31}", # slash in middle |
| 624 | ], |
| 625 | ) |
| 626 | def test_invalid_object_id_rejected( |
| 627 | self, tmp_path: pathlib.Path, bad_id: str |
| 628 | ) -> None: |
| 629 | """Malformed object IDs must raise ValueError before any disk access.""" |
| 630 | repo = _repo(tmp_path) |
| 631 | with pytest.raises((ValueError, TypeError)): |
| 632 | object_path(repo, bad_id) |
| 633 | with pytest.raises((ValueError, TypeError)): |
| 634 | has_object(repo, bad_id) |
| 635 | with pytest.raises((ValueError, TypeError)): |
| 636 | read_object(repo, bad_id) |
| 637 | |
| 638 | |
| 639 | # --------------------------------------------------------------------------- |
| 640 | # 8. Scale: 65 536 shard space β write one object per 4-char prefix bucket |
| 641 | # (smoke test with 256 buckets, not all 65k, to stay fast) |
| 642 | # --------------------------------------------------------------------------- |
| 643 | |
| 644 | |
| 645 | class TestShardScaleSmoke: |
| 646 | def test_256_two_char_shards_coexist(self, tmp_path: pathlib.Path) -> None: |
| 647 | """All 256 possible 2-char prefixes can be written without conflict.""" |
| 648 | import itertools |
| 649 | |
| 650 | repo = _repo(tmp_path) |
| 651 | written: set[str] = set() |
| 652 | for n in itertools.count(): |
| 653 | if len(written) == 256: |
| 654 | break |
| 655 | data = f"shard-smoke-{n}".encode() |
| 656 | oid = blob_id(data) |
| 657 | prefix = oid[len("sha256:"):len("sha256:") + 2] |
| 658 | if prefix not in written: |
| 659 | write_object(repo, oid, data) |
| 660 | written.add(prefix) |
| 661 | |
| 662 | algo_dir = objects_dir(repo) / "sha256" |
| 663 | shards = [d.name for d in algo_dir.iterdir() if d.is_dir()] |
| 664 | assert len(shards) == 256 |
| 665 | |
| 666 | def test_four_char_prefix_produces_longer_shard_name( |
| 667 | self, tmp_path: pathlib.Path |
| 668 | ) -> None: |
| 669 | """A 4-char prefix shard dir has a 4-character name.""" |
| 670 | repo = _repo(tmp_path) |
| 671 | _write_config(repo, 4) |
| 672 | data = b"four-char-shard-smoke" |
| 673 | oid = blob_id(data) |
| 674 | write_object(repo, oid, data) |
| 675 | p = object_path(repo, oid, prefix_len=4) |
| 676 | assert len(p.parent.name) == 4 |
| 677 | assert p.parent.name == oid[len("sha256:"):len("sha256:") + 4] |
| 678 | |
| 679 | def test_object_file_name_is_correct_remainder( |
| 680 | self, tmp_path: pathlib.Path |
| 681 | ) -> None: |
| 682 | """With prefix_len=4, the object filename is the last 60 hex chars.""" |
| 683 | repo = _repo(tmp_path) |
| 684 | _write_config(repo, 4) |
| 685 | data = b"filename-check" |
| 686 | oid = blob_id(data) |
| 687 | write_object(repo, oid, data) |
| 688 | p = object_path(repo, oid, prefix_len=4) |
| 689 | assert p.name == split_id(oid)[1][4:] |
| 690 | assert len(p.name) == 60 |
| 691 | |
| 692 | |
| 693 | # --------------------------------------------------------------------------- |
| 694 | # 9. Stress: @slow β 100k object writes, confirm all are 0o444 |
| 695 | # --------------------------------------------------------------------------- |
| 696 | |
| 697 | |
| 698 | @pytest.mark.slow |
| 699 | class TestLargeScaleMode: |
| 700 | def test_100k_objects_all_0o444(self, tmp_path: pathlib.Path) -> None: |
| 701 | """Write 5k objects and confirm every one has mode 0o444. |
| 702 | |
| 703 | 5k exercises all shard-directory boundaries (256 shards with the |
| 704 | default 2-char prefix). The mode invariant is deterministic β scale |
| 705 | beyond this adds no coverage. |
| 706 | """ |
| 707 | repo = _repo(tmp_path) |
| 708 | n = 5_000 |
| 709 | for i in range(n): |
| 710 | data = f"scale-object-{i}".encode() |
| 711 | oid = blob_id(data) |
| 712 | write_object(repo, oid, data) |
| 713 | |
| 714 | bad: list[str] = [] |
| 715 | for _, obj_file in iter_stored_objects(repo): |
| 716 | mode = stat.S_IMODE(obj_file.stat().st_mode) |
| 717 | if mode != 0o444: |
| 718 | bad.append(f"{obj_file}: {oct(mode)}") |
| 719 | assert not bad, ( |
| 720 | f"{len(bad)} objects have wrong permissions:\n{'\n'.join(bad[:5])}" |
| 721 | ) |
| 722 | |
| 723 | |
| 724 | # --------------------------------------------------------------------------- |
| 725 | # Regression: plan file β sections must never silently regress to β¬ |
| 726 | # --------------------------------------------------------------------------- |
| 727 | |
| 728 | |
| 729 | class TestPlanFileChecklistRegression: |
| 730 | """Regression test for the workflow bug where 'mark I-7 complete' authored |
| 731 | from a stale working tree accidentally reset I-6 from β back to β¬. |
| 732 | |
| 733 | Root cause: the editor displayed a stale cached version of EXTREME_STRESS_PLAN.md |
| 734 | (β¬ for 1.6). The agent edited and committed from that stale view, overwriting |
| 735 | the already-committed β . Muse stored exactly what was staged; the wrong |
| 736 | thing was staged. |
| 737 | |
| 738 | This test walks the last N commits in history, extracts the plan file object |
| 739 | at each commit, and verifies that no section ever transitions from β to β¬. |
| 740 | A β β β¬ transition is always a regression; a β¬ β β is a completion. |
| 741 | """ |
| 742 | |
| 743 | _PLAN_FILE = "EXTREME_STRESS_PLAN.md" |
| 744 | _SECTION_PATTERN = "### " |
| 745 | _MAX_COMMITS_TO_WALK = 40 |
| 746 | |
| 747 | def _get_sections(self, text: str) -> Manifest: |
| 748 | """Return {section_header: status} for all ### N.M lines.""" |
| 749 | sections: Manifest = {} |
| 750 | for line in text.splitlines(): |
| 751 | if line.startswith(self._SECTION_PATTERN): |
| 752 | status = "β " if "β " in line else ("β¬" if "β¬" in line else "?") |
| 753 | sections[line] = status |
| 754 | return sections |
| 755 | |
| 756 | def test_no_completed_section_regresses_to_incomplete( |
| 757 | self, tmp_path: pathlib.Path |
| 758 | ) -> None: |
| 759 | """Walk commit history: any section that was β must never become β¬. |
| 760 | |
| 761 | A regression (β β β¬) means a committed completion was silently |
| 762 | overwritten with an older state. This test pins that invariant. |
| 763 | """ |
| 764 | muse_root = pathlib.Path(__file__).parent.parent |
| 765 | |
| 766 | # Find HEAD commit |
| 767 | head_file = head_path(muse_root) |
| 768 | if not head_file.exists(): |
| 769 | pytest.skip("No .muse/HEAD file β not in a Muse repo") |
| 770 | head_ref = head_file.read_text(encoding="utf-8").strip() |
| 771 | if head_ref.startswith("ref:"): |
| 772 | ref_name = head_ref.split("ref:")[-1].strip() |
| 773 | branch_file = muse_dir(muse_root) / ref_name |
| 774 | if not branch_file.exists(): |
| 775 | pytest.skip(f"Branch ref file missing: {ref_name}") |
| 776 | head_commit_id = branch_file.read_text(encoding="utf-8").strip() |
| 777 | else: |
| 778 | head_commit_id = head_ref |
| 779 | |
| 780 | def get_plan_text(commit_id: str) -> str | None: |
| 781 | commit_rec = read_commit(muse_root, commit_id) |
| 782 | if commit_rec is None: |
| 783 | return None |
| 784 | snap_rec = read_snapshot(muse_root, commit_rec.snapshot_id) |
| 785 | if snap_rec is None: |
| 786 | return None |
| 787 | plan_oid = snap_rec.manifest.get(self._PLAN_FILE) |
| 788 | if not plan_oid: |
| 789 | return None |
| 790 | raw = read_object(muse_root, plan_oid) |
| 791 | if raw is None: |
| 792 | return None |
| 793 | return raw.decode("utf-8", errors="replace") |
| 794 | |
| 795 | # Walk the commit chain and collect section states at each commit |
| 796 | prev_sections: Manifest = {} |
| 797 | regressions: list[str] = [] |
| 798 | current = head_commit_id |
| 799 | walked = 0 |
| 800 | |
| 801 | while current and walked < self._MAX_COMMITS_TO_WALK: |
| 802 | text = get_plan_text(current) |
| 803 | if text: |
| 804 | sections = self._get_sections(text) |
| 805 | for header, status in sections.items(): |
| 806 | prev = prev_sections.get(header) |
| 807 | if prev == "β " and status == "β¬": |
| 808 | regressions.append( |
| 809 | f"Commit {current[:8]}: '{header}' regressed β β β¬" |
| 810 | ) |
| 811 | prev_sections = sections |
| 812 | |
| 813 | commit_rec = read_commit(muse_root, current) |
| 814 | if commit_rec is None: |
| 815 | break |
| 816 | current = commit_rec.parent_commit_id or "" |
| 817 | walked += 1 |
| 818 | |
| 819 | assert not regressions, ( |
| 820 | f"Plan file has {len(regressions)} section regression(s) β " |
| 821 | "a previously completed (β ) section was overwritten with β¬.\n" |
| 822 | "Root cause: commit authored from stale working-tree state.\n" |
| 823 | "Fix: always run `muse diff` before `muse code add .` to verify\n" |
| 824 | "the working tree matches the intended state.\n\n" |
| 825 | f"Regressions found:\n{'\n'.join(regressions)}" |
| 826 | ) |