gabriel / muse public
ignore.py python
304 lines 11.7 KB
Raw
sha256:a73c3f57b665e8c0be2c9e977b3ebefdb7ae8d46f196986d911c6a8f5d8b8d49 docs: update store.py references to focused module paths Sonnet 4.6 29 days ago
1 """Muse ignore — ``.museignore`` TOML parser and workspace path filter.
2
3 ``.museignore`` uses TOML with two kinds of sections:
4
5 ``[global]``
6 Patterns applied to every domain. Evaluated first, in array order.
7
8 ``[domain.<name>]``
9 Patterns applied only when the active domain is *<name>*. Appended
10 after global patterns and evaluated in array order.
11
12 Pattern syntax (gitignore-compatible):
13
14 - A trailing ``/`` marks a directory pattern; it is never matched against
15 individual files (Muse VCS tracks files, not directories).
16 - A leading ``/`` **anchors** the pattern to the repository root, so
17 ``/tmp/*.mid`` matches only ``tmp/drums.mid`` and not ``cache/tmp/drums.mid``.
18 - A leading ``!`` **negates** a pattern: a path previously matched by an ignore
19 rule is un-ignored when it matches a subsequent negation rule.
20 - ``*`` matches any sequence of characters **except** a path separator (``/``).
21 - ``**`` matches any sequence of characters **including** path separators.
22 - All other characters are matched literally.
23
24 Rule evaluation
25 ---------------
26 Patterns are evaluated in the order they appear (global first, then
27 domain-specific). The **last matching rule wins**, mirroring gitignore
28 behaviour. A later ``!important.tmp`` overrides an earlier ``*.tmp`` for
29 that specific path.
30
31 Public API
32 ----------
33 - :func:`load_ignore_config` — parse ``.museignore`` → :data:`MuseIgnoreConfig`
34 - :func:`resolve_patterns` — flatten config to ``list[str]`` for a domain
35 - :func:`is_ignored` — test a relative POSIX path against a pattern list
36 """
37
38 import fnmatch
39 import pathlib
40 import tomllib
41 from typing import TypedDict
42
43 type _DomainMap = dict[str, "DomainSection"]
44 _FILENAME = ".museignore"
45
46 class DomainSection(TypedDict, total=False):
47 """Patterns for one ignore section (global or a named domain)."""
48
49 patterns: list[str]
50
51 class ForceTrackSection(TypedDict, total=False):
52 """Explicit whitelist that overrides the built-in secrets blocklist.
53
54 ``paths`` contains exact repo-relative POSIX paths (no glob expansion).
55 Use this for dev infrastructure that needs to be committed despite matching
56 a secrets pattern — the canonical example is a self-signed localhost TLS
57 cert/key.
58 """
59
60 paths: list[str]
61
62 # ``global`` is a Python keyword, so we use the functional TypedDict form.
63 MuseIgnoreConfig = TypedDict(
64 "MuseIgnoreConfig",
65 {
66 "global": DomainSection,
67 "domain": _DomainMap,
68 "force_track": ForceTrackSection,
69 },
70 total=False,
71 )
72
73 def load_ignore_config(root: pathlib.Path) -> MuseIgnoreConfig:
74 """Read ``.museignore`` from *root* and return the parsed configuration.
75
76 Builds :data:`MuseIgnoreConfig` from the raw TOML dict using explicit
77 ``isinstance`` checks — no ``Any`` propagated into the return value.
78
79 Args:
80 root: Repository root directory (the directory that contains ``.muse/``
81 and ``state/``). The ``.museignore`` file, if present, lives
82 directly inside *root*.
83
84 Returns:
85 A :data:`MuseIgnoreConfig` mapping. Both the ``"global"`` key and the
86 ``"domain"`` key are optional; use :func:`resolve_patterns` which
87 handles all missing-key cases. Returns an empty mapping when
88 ``.museignore`` is absent.
89
90 Raises:
91 ValueError: When ``.museignore`` exists but contains invalid TOML.
92 """
93 ignore_file = root / _FILENAME
94 if not ignore_file.exists():
95 return {}
96
97 raw_bytes = ignore_file.read_bytes()
98 try:
99 raw = tomllib.loads(raw_bytes.decode("utf-8"))
100 except tomllib.TOMLDecodeError as exc:
101 raise ValueError(f"{_FILENAME}: TOML parse error — {exc}") from exc
102
103 result: MuseIgnoreConfig = {}
104
105 # [global] section
106 global_raw = raw.get("global")
107 if isinstance(global_raw, dict):
108 global_section: DomainSection = {}
109 global_patterns_val = global_raw.get("patterns")
110 if isinstance(global_patterns_val, list):
111 global_section["patterns"] = [
112 p for p in global_patterns_val if isinstance(p, str)
113 ]
114 result["global"] = global_section
115
116 # [domain.*] sections — each key under [domain] is a domain name.
117 domain_raw = raw.get("domain")
118 if isinstance(domain_raw, dict):
119 domain_map: _DomainMap = {}
120 for domain_name, domain_val in domain_raw.items():
121 if isinstance(domain_name, str) and isinstance(domain_val, dict):
122 section: DomainSection = {}
123 domain_patterns_val = domain_val.get("patterns")
124 if isinstance(domain_patterns_val, list):
125 section["patterns"] = [
126 p for p in domain_patterns_val if isinstance(p, str)
127 ]
128 domain_map[domain_name] = section
129 result["domain"] = domain_map
130
131 # [force_track] section — explicit whitelist overriding the secrets blocklist.
132 force_track_raw = raw.get("force_track")
133 if isinstance(force_track_raw, dict):
134 ft_section: ForceTrackSection = {}
135 ft_paths_val = force_track_raw.get("paths")
136 if isinstance(ft_paths_val, list):
137 ft_section["paths"] = [p for p in ft_paths_val if isinstance(p, str)]
138 result["force_track"] = ft_section
139
140 return result
141
142 def load_force_track_paths(root: pathlib.Path) -> frozenset[str]:
143 """Return the set of explicitly whitelisted paths from ``[force_track]``.
144
145 Paths are exact repo-relative POSIX strings — no glob expansion. A file
146 whose relative path is in this set bypasses both the built-in secrets
147 blocklist and any user ``.museignore`` patterns.
148
149 Returns an empty :class:`frozenset` when ``.museignore`` is absent or
150 contains no ``[force_track]`` section.
151 """
152 config = load_ignore_config(root)
153 ft = config.get("force_track", {})
154 paths = ft.get("paths") or []
155 return frozenset(paths)
156
157 def resolve_patterns(config: MuseIgnoreConfig, domain: str) -> list[str]:
158 """Flatten *config* into an ordered pattern list for *domain*.
159
160 Global patterns come first (in array order), followed by domain-specific
161 patterns. Patterns declared under any other domain are never included.
162
163 Args:
164 config: Parsed ignore configuration from :func:`load_ignore_config`.
165 domain: The active domain name, e.g. ``"music"`` or ``"code"``.
166
167 Returns:
168 Ordered ``list[str]`` of raw glob pattern strings. Returns an empty
169 list when *config* is empty or neither section contains patterns.
170 """
171 global_patterns: list[str] = []
172 if "global" in config:
173 global_section = config["global"]
174 if "patterns" in global_section:
175 global_patterns = global_section["patterns"]
176
177 domain_patterns: list[str] = []
178 if "domain" in config:
179 domain_map = config["domain"]
180 if domain in domain_map:
181 domain_section = domain_map[domain]
182 if "patterns" in domain_section:
183 domain_patterns = domain_section["patterns"]
184
185 return global_patterns + domain_patterns
186
187 def check_path_with_pattern(
188 rel_posix: str,
189 patterns: list[str],
190 ) -> tuple[bool, str | None]:
191 """Return ``(ignored, matching_pattern)`` for *rel_posix*.
192
193 Identical semantics to :func:`is_ignored` but also returns the last pattern
194 that caused *ignored* to be ``True``, or ``None`` when the path is not
195 ignored (either never matched, or un-ignored by a ``!negation`` rule).
196
197 This is the authoritative matching implementation. ``muse check-ignore``
198 uses it directly so the command and the snapshot engine always agree on
199 ignore semantics.
200
201 Args:
202 rel_posix: Workspace-relative POSIX path, e.g. ``"tracks/drums.mid"``.
203 patterns: Ordered pattern list from :func:`resolve_patterns`.
204
205 Returns:
206 A 2-tuple ``(ignored: bool, matching_pattern: str | None)``.
207 ``matching_pattern`` is the last pattern that set ``ignored = True``,
208 or ``None`` when the path ended up not ignored.
209 """
210 # Hot path: avoid constructing a PurePosixPath for every call — the pure-
211 # string _matches is 7-8× faster for the common case of 10-20 patterns
212 # evaluated against every file in a large working tree.
213 ignored = False
214 matching: str | None = None
215
216 for pattern in patterns:
217 negate = pattern.startswith("!")
218 pat = pattern[1:] if negate else pattern
219
220 matched = False
221 if pat.endswith("/"):
222 dir_prefix = pat
223 if rel_posix.startswith(dir_prefix) or _matches(rel_posix, f"{pat.rstrip('/')}/**"):
224 matched = True
225 elif _matches(rel_posix, pat):
226 matched = True
227
228 if matched:
229 ignored = not negate
230 matching = pattern if not negate else None
231
232 return ignored, matching
233
234 def is_ignored(rel_posix: str, patterns: list[str]) -> bool:
235 """Return ``True`` if *rel_posix* should be excluded from the snapshot.
236
237 Args:
238 rel_posix: Workspace-relative POSIX path, e.g. ``"tracks/drums.mid"``.
239 patterns: Ordered pattern list from :func:`resolve_patterns`.
240
241 Returns:
242 ``True`` when the path is ignored, ``False`` otherwise. An empty
243 *patterns* list means nothing is ignored.
244
245 The last matching rule wins. A negation rule (``!pattern``) can un-ignore
246 a path that was matched by an earlier rule.
247
248 Directory patterns (trailing ``/``) match any file whose path starts with
249 that directory prefix — e.g. ``artifacts/`` ignores ``artifacts/demo.html``.
250
251 Delegates to :func:`check_path_with_pattern` — the single authoritative
252 matching loop — to guarantee that this function and ``muse check-ignore``
253 always agree on ignore semantics.
254 """
255 ignored, _ = check_path_with_pattern(rel_posix, patterns)
256 return ignored
257
258 def _matches(path_str: str, pattern: str) -> bool:
259 """Return ``True`` if *path_str* matches *pattern*.
260
261 Implements gitignore path-matching semantics using pure string operations
262 and :func:`fnmatch.fnmatch` — no :class:`pathlib.PurePosixPath` objects
263 are constructed. This keeps the per-file overhead negligible even with
264 20+ patterns evaluated against every file in a 75 000-file working tree.
265
266 Semantics:
267
268 - **Anchored** (leading ``/``): matched against the full relative path
269 with the leading slash stripped.
270 - **Pattern with embedded ``/``**: matched against the full relative path
271 via ``fnmatch.fnmatch``. A leading ``**/`` is additionally tried
272 against the path with the prefix stripped (handles ``**/cache/*.dat``
273 matching ``cache/index.dat``).
274 - **Pattern without ``/``**: matched against the filename and every
275 trailing suffix of the path so that ``*.tmp`` matches ``drums.tmp``
276 *and* ``tracks/drums.tmp``.
277
278 Args:
279 path_str: Workspace-relative POSIX path as a plain string
280 (e.g. ``"tracks/drums.mid"``).
281 pattern: A single gitignore-style pattern (no leading ``!``).
282 """
283 # Anchored: match the full path from the root.
284 if pattern.startswith("/"):
285 return fnmatch.fnmatch(path_str, pattern[1:])
286
287 # Embedded slash: match from the right.
288 # ``**/foo/*.dat`` is tried first against the full path; if that fails,
289 # the ``**/`` prefix is stripped and tried again — this makes the pattern
290 # match paths that do not have any leading components (e.g. ``foo/a.dat``).
291 if "/" in pattern:
292 if fnmatch.fnmatch(path_str, pattern):
293 return True
294 if pattern.startswith("**/"):
295 return fnmatch.fnmatch(path_str, pattern[3:])
296 return False
297
298 # No slash: match against the filename and every trailing suffix so that
299 # ``*.tmp`` matches both ``drums.tmp`` and ``tracks/drums.tmp``.
300 parts = path_str.split("/")
301 for i in range(len(parts)):
302 if fnmatch.fnmatch("/".join(parts[i:]), pattern):
303 return True
304 return False
File History 2 commits
sha256:a73c3f57b665e8c0be2c9e977b3ebefdb7ae8d46f196986d911c6a8f5d8b8d49 docs: update store.py references to focused module paths Sonnet 4.6 29 days ago
sha256:b6cae4448122b2cc690d913be26f7e0a539f11855b8d288bd48be43eb532b5b2 refactor: migrate all source callers off muse.core.store re… Sonnet 4.6 minor 29 days ago