"""Tests: algorithm-directory layout for the object store. Canonical layout:: .muse/objects/sha256// Also covers ``iter_stored_objects`` — the single canonical walker that replaces the six inline ``iterdir`` loops scattered across gc, maintenance, prune, count_objects, verify_object, and object_store itself. """ from __future__ import annotations import pathlib import pytest from muse.core.types import blob_id, DEFAULT_HASH_ALGO, split_id from muse.core.object_store import ( _object_path_with_fallback, cleanup_stale_object_temps, has_object, iter_stored_objects, object_path, objects_dir, read_object, write_object, ) from muse.core.paths import muse_dir # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _repo(tmp_path: pathlib.Path) -> pathlib.Path: muse_dir(tmp_path).mkdir() return tmp_path # --------------------------------------------------------------------------- # 1. object_path — algo directory # --------------------------------------------------------------------------- class TestObjectPathAlgoDirectory: """object_path must embed the algorithm as a directory component.""" def test_path_contains_sha256_directory(self, tmp_path: pathlib.Path) -> None: """object_path returns .muse/objects/sha256//.""" repo = _repo(tmp_path) oid = blob_id(b"hello") p = object_path(repo, oid) # algo dir is the first component under objects/ assert p.parent.parent.name == "sha256" def test_algo_directory_is_inside_objects(self, tmp_path: pathlib.Path) -> None: """sha256/ sits directly under .muse/objects/.""" repo = _repo(tmp_path) oid = blob_id(b"world") p = object_path(repo, oid) assert p.parent.parent.parent == objects_dir(repo) def test_shard_prefix_still_correct(self, tmp_path: pathlib.Path) -> None: """The 2-char shard prefix is the first 2 hex chars of the hash.""" repo = _repo(tmp_path) data = b"shard-check" oid = blob_id(data) p = object_path(repo, oid) assert p.parent.name == oid[len("sha256:"):len("sha256:") + 2] def test_filename_is_remaining_hex(self, tmp_path: pathlib.Path) -> None: """Object filename is the last 62 hex chars of the hash.""" repo = _repo(tmp_path) data = b"filename-check" oid = blob_id(data) p = object_path(repo, oid) assert p.name == split_id(oid)[1][2:] def test_four_char_prefix_still_nested_under_sha256( self, tmp_path: pathlib.Path ) -> None: """prefix_len=4 still places the shard under sha256/.""" repo = _repo(tmp_path) oid = blob_id(b"four-char") p = object_path(repo, oid, prefix_len=4) assert p.parent.parent.name == DEFAULT_HASH_ALGO assert p.parent.name == split_id(oid)[1][:4] assert p.name == split_id(oid)[1][4:] def test_write_object_lands_in_sha256_dir(self, tmp_path: pathlib.Path) -> None: """write_object places the file under .muse/objects/sha256/.""" repo = _repo(tmp_path) data = b"write-check" oid = blob_id(data) write_object(repo, oid, data) p = object_path(repo, oid) assert p.exists() assert p.parent.parent.name == "sha256" # --------------------------------------------------------------------------- # 2. iter_stored_objects — new layout # --------------------------------------------------------------------------- class TestIterStoredObjectsNewLayout: """iter_stored_objects yields (prefixed_id, path) from the new layout.""" def test_empty_store_yields_nothing(self, tmp_path: pathlib.Path) -> None: repo = _repo(tmp_path) assert list(iter_stored_objects(repo)) == [] def test_yields_written_object(self, tmp_path: pathlib.Path) -> None: repo = _repo(tmp_path) data = b"single object" oid = blob_id(data) write_object(repo, oid, data) results = list(iter_stored_objects(repo)) assert len(results) == 1 yielded_id, yielded_path = results[0] assert yielded_id == oid def test_yielded_path_exists(self, tmp_path: pathlib.Path) -> None: repo = _repo(tmp_path) oid = blob_id(b"path-exists") write_object(repo, oid, b"path-exists") _, p = list(iter_stored_objects(repo))[0] assert p.exists() assert p.is_file() def test_yields_all_objects(self, tmp_path: pathlib.Path) -> None: repo = _repo(tmp_path) written = set() for i in range(10): data = f"obj-{i}".encode() oid = blob_id(data) write_object(repo, oid, data) written.add(oid) yielded = {oid for oid, _ in iter_stored_objects(repo)} assert yielded == written def test_ids_are_sha256_prefixed(self, tmp_path: pathlib.Path) -> None: """All yielded IDs carry the sha256: prefix.""" repo = _repo(tmp_path) write_object(repo, blob_id(b"prefix-check"), b"prefix-check") for oid, _ in iter_stored_objects(repo): assert oid.startswith("sha256:") def test_no_duplicates(self, tmp_path: pathlib.Path) -> None: repo = _repo(tmp_path) data = b"idempotent" oid = blob_id(data) write_object(repo, oid, data) write_object(repo, oid, data) # second write is no-op results = list(iter_stored_objects(repo)) assert len(results) == 1 def test_skips_symlinks(self, tmp_path: pathlib.Path) -> None: """Symlinked files inside shard dirs are not yielded.""" repo = _repo(tmp_path) oid = blob_id(b"real") write_object(repo, oid, b"real") p = object_path(repo, oid) link = p.parent / f"symlink{'a' * 60}" link.symlink_to(p) results = list(iter_stored_objects(repo)) ids = [r[0] for r in results] assert len(ids) == 1 assert oid in ids def test_skips_non_hex_filenames(self, tmp_path: pathlib.Path) -> None: """Stray files (DS_Store, editor temps) with non-hex names are skipped.""" repo = _repo(tmp_path) oid = blob_id(b"real-obj") write_object(repo, oid, b"real-obj") p = object_path(repo, oid) (p.parent / ".DS_Store").write_bytes(b"") (p.parent / "editor.tmp").write_bytes(b"") results = list(iter_stored_objects(repo)) assert len(results) == 1 # --------------------------------------------------------------------------- # 3. _object_path_with_fallback — shard-prefix fallback only # --------------------------------------------------------------------------- class TestFallbackShardPrefix: """_object_path_with_fallback handles the 2-char/4-char shard-prefix migration.""" def test_finds_canonical_layout(self, tmp_path: pathlib.Path) -> None: repo = _repo(tmp_path) data = b"canonical find" oid = blob_id(data) write_object(repo, oid, data) p = _object_path_with_fallback(repo, oid) assert p.exists() assert p.parent.parent.name == "sha256" def test_has_object_canonical(self, tmp_path: pathlib.Path) -> None: repo = _repo(tmp_path) data = b"has-object canonical" oid = blob_id(data) write_object(repo, oid, data) assert has_object(repo, oid) def test_read_object_canonical(self, tmp_path: pathlib.Path) -> None: repo = _repo(tmp_path) data = b"read-object canonical" oid = blob_id(data) write_object(repo, oid, data) assert read_object(repo, oid) == data # --------------------------------------------------------------------------- # 4. cleanup_stale_object_temps # --------------------------------------------------------------------------- class TestCleanupStaleTempNewLayout: """cleanup_stale_object_temps handles the algo-directory structure.""" def test_cleanup_finds_temps_in_sha256_shards( self, tmp_path: pathlib.Path ) -> None: """Stale .obj-tmp-* files inside sha256// are cleaned up.""" import time repo = _repo(tmp_path) # Create a temp file in the new layout shard directory. shard = objects_dir(repo) / "sha256" / "ab" shard.mkdir(parents=True) stale = shard / ".obj-tmp-stale" stale.write_bytes(b"stale") # Back-date so it exceeds the min-age threshold. old_time = time.time() - 120 os.utime(stale, (old_time, old_time)) removed = cleanup_stale_object_temps(repo) assert removed >= 1 assert not stale.exists() import os # noqa: E402 — needed by the last test