gabriel / muse public
shelf.py python
212 lines 7.5 KB
Raw
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf chore: bump version to 0.2.0rc14 Sonnet 4.6 patch 1 day ago
1 """muse.core.shelf — shelf (stash) layer for the Muse VCS.
2
3 Everything that reads, writes, or queries shelf entries lives here.
4
5 Public API
6 ----------
7 shelf_entry_path
8 On-disk path helper.
9
10 write_shelf_entry / read_shelf_entry / list_shelf_entries / delete_shelf_entry
11 Core shelf I/O.
12 """
13 from __future__ import annotations
14
15 import json as _json
16 import logging
17 import pathlib
18
19 from muse.core.io import (
20 MAX_MSGPACK_BYTES,
21 _read_msgpack_dict,
22 _write_shelf_header_atomic,
23 )
24 from muse.core.paths import shelf_dir as _shelf_dir
25 from muse.core.types import split_id
26
27 logger = logging.getLogger(__name__)
28
29
30 # ---------------------------------------------------------------------------
31 # Path helper
32 # ---------------------------------------------------------------------------
33
34 def shelf_entry_path(repo_root: pathlib.Path, entry_id: str) -> pathlib.Path:
35 """Return the on-disk path for a shelf entry.
36
37 Path shape: ``.muse/shelf/<algo>/<hex>`` (no extension)
38
39 Shelf entries are stored as individual content-addressed git-header+JSON
40 files (``shelf <size>\\0<json>`` framing), matching the layout of the
41 unified object store. One file per entry means:
42
43 - Writes are atomic (temp-rename) and do not touch sibling entries.
44 - Deletes are a single ``unlink`` — no JSON array rewrite.
45 - Concurrent saves from multiple agents cannot corrupt one another.
46 - GC reachability walks glob ``shelf/<algo>/*``.
47
48 The algorithm segment is extracted from *entry_id*'s prefix so the
49 layout remains correct when a future algorithm (blake3, sha3-256, …) is
50 introduced.
51
52 Args:
53 repo_root: Repository root directory.
54 entry_id: A ``<algo>:<hex>`` shelf entry ID.
55
56 Returns:
57 Absolute path to the shelf entry file (no extension).
58 """
59 algo, hex_id = split_id(entry_id)
60 return _shelf_dir(repo_root) / algo / hex_id
61
62
63 # ---------------------------------------------------------------------------
64 # Shelf I/O
65 # ---------------------------------------------------------------------------
66
67 def write_shelf_entry(repo_root: pathlib.Path, entry: "dict[str, object]") -> None:
68 """Persist a shelf entry as ``.muse/shelf/<algo>/<hex>`` (git-header+JSON).
69
70 The file uses the same ``shelf <size>\\0<json>`` framing as commits and
71 snapshots in the unified object store. Each entry is content-addressed by
72 its ``id`` field:
73
74 - Atomic temp-rename write: a crash mid-write never corrupts sibling entries.
75 - Idempotent: writing the same entry twice produces exactly one file.
76 - Concurrent-safe: two agents shelving simultaneously write different files.
77
78 Args:
79 repo_root: Repository root directory.
80 entry: A shelf entry dict that must contain an ``id`` field with a
81 ``<algo>:<hex>`` prefix (e.g. ``sha256:<64-hex>``).
82
83 Raises:
84 ValueError: If the ``.muse/shelf/`` parent is a symlink
85 (symlink-swap attack guard).
86 OSError: On filesystem errors (disk full, permission denied, etc.).
87 """
88 entry_id = str(entry.get("id", ""))
89 path = shelf_entry_path(repo_root, entry_id)
90 shelf = _shelf_dir(repo_root)
91 if shelf.is_symlink():
92 raise ValueError(
93 f".muse/shelf/ is a symlink — refusing to write shelf entry "
94 f"(symlink-swap attack guard)"
95 )
96 path.parent.mkdir(parents=True, exist_ok=True)
97 _write_shelf_header_atomic(path, entry)
98
99
100 def _read_shelf_file(path: pathlib.Path, entry_id_hint: str = "") -> "dict[str, object] | None":
101 """Read a shelf file in either new header+JSON or legacy msgpack format.
102
103 If *path* is a legacy ``.msgpack`` file, the entry is silently migrated
104 to the new format and the old file deleted.
105 """
106 if path.stat().st_size > MAX_MSGPACK_BYTES:
107 logger.warning("⚠️ shelf entry %s exceeds size limit — skipping", path.name[:24])
108 return None
109 try:
110 raw = path.read_bytes()
111 if path.suffix == ".msgpack":
112 data = _read_msgpack_dict(path)
113 # Migrate to new format
114 entry_id = str(data.get("id", "")) or entry_id_hint
115 if entry_id:
116 new_path = path.with_suffix("")
117 _write_shelf_header_atomic(new_path, data) # type: ignore[arg-type]
118 path.unlink(missing_ok=True)
119 else:
120 null_idx = raw.index(b"\0")
121 data = _json.loads(raw[null_idx + 1:].decode("utf-8"))
122 if not isinstance(data, dict):
123 return None
124 return data # type: ignore[return-value]
125 except Exception as exc:
126 logger.warning("⚠️ Could not read shelf entry %s: %s", path.name[:24], exc)
127 return None
128
129
130 def read_shelf_entry(repo_root: pathlib.Path, entry_id: str) -> "dict[str, object] | None":
131 """Read and deserialise a shelf entry by its content-addressed ID.
132
133 Returns ``None`` on any error — missing file, corrupt payload, or
134 oversized file — so callers never need to handle exceptions for routine
135 storage failures. Falls back to the legacy ``.msgpack`` path on miss and
136 silently migrates the entry to the new git-header+JSON format.
137
138 Args:
139 repo_root: Repository root directory.
140 entry_id: A ``<algo>:<hex>`` shelf entry ID.
141
142 Returns:
143 The entry dict on success, or ``None`` if the entry does not exist or
144 cannot be safely deserialised.
145 """
146 path = shelf_entry_path(repo_root, entry_id)
147 if path.exists():
148 return _read_shelf_file(path)
149 # Fallback: legacy .msgpack file (silent upgrade to new format)
150 legacy = path.with_suffix(".msgpack")
151 if legacy.exists():
152 return _read_shelf_file(legacy, entry_id)
153 return None
154
155
156 def list_shelf_entries(repo_root: pathlib.Path) -> "list[dict[str, object]]":
157 """Return all shelf entries sorted by ``created_at`` descending (newest first).
158
159 Globs ``.muse/shelf/<algo>/*`` matching exactly one algo-level directory
160 depth. Handles both new git-header+JSON files (no extension) and legacy
161 ``.msgpack`` files, migrating the latter on first read. Corrupt or
162 oversized files are silently skipped.
163
164 Args:
165 repo_root: Repository root directory.
166
167 Returns:
168 List of entry dicts, newest-first. Empty list when the shelf directory
169 does not exist or contains no valid entries.
170 """
171 shelf = _shelf_dir(repo_root)
172 if not shelf.is_dir():
173 return []
174 entries = []
175 for path in shelf.glob("*/*"):
176 # Skip temp files and unexpected extensions (only allow .msgpack or none)
177 if path.name.startswith("."):
178 continue
179 if path.suffix not in ("", ".msgpack"):
180 continue
181 data = _read_shelf_file(path)
182 if data is not None:
183 entries.append(data)
184 entries.sort(key=lambda e: str(e.get("created_at", "")), reverse=True)
185 return entries
186
187
188 def delete_shelf_entry(repo_root: pathlib.Path, entry_id: str) -> bool:
189 """Delete a shelf entry by its content-addressed ID.
190
191 Checks both the new (no extension) path and the legacy ``.msgpack`` path.
192
193 Args:
194 repo_root: Repository root directory.
195 entry_id: A ``<algo>:<hex>`` shelf entry ID.
196
197 Returns:
198 ``True`` if a file existed and was removed; ``False`` if absent.
199
200 Raises:
201 OSError: On filesystem errors other than ``FileNotFoundError``.
202 """
203 path = shelf_entry_path(repo_root, entry_id)
204 found = False
205 if path.exists():
206 path.unlink()
207 found = True
208 legacy = path.with_suffix(".msgpack")
209 if legacy.exists():
210 legacy.unlink()
211 found = True
212 return found
File History 1 commit
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf chore: bump version to 0.2.0rc14 Sonnet 4.6 patch 1 day ago