gabriel / muse public
test_core_ignore.py python
554 lines 22.0 KB
Raw
1 """Tests for muse/core/ignore.py — .museignore TOML parser and path filter."""
2
3 import pathlib
4
5 import pytest
6
7 from muse.core.ignore import (
8 MuseIgnoreConfig,
9 _matches,
10 is_ignored,
11 load_ignore_config,
12 resolve_patterns,
13 )
14 from muse.core.snapshot import _BUILTIN_SECRET_PATTERNS, walk_workdir
15 from muse.core.paths import muse_dir, repo_json_path
16
17
18 # ---------------------------------------------------------------------------
19 # load_ignore_config
20 # ---------------------------------------------------------------------------
21
22
23 class TestLoadIgnoreConfig:
24 def test_returns_empty_when_no_file(self, tmp_path: pathlib.Path) -> None:
25 assert load_ignore_config(tmp_path) == {}
26
27 def test_empty_toml_file(self, tmp_path: pathlib.Path) -> None:
28 (tmp_path / ".museignore").write_text("")
29 assert load_ignore_config(tmp_path) == {}
30
31 def test_toml_comments_only(self, tmp_path: pathlib.Path) -> None:
32 (tmp_path / ".museignore").write_text("# just a comment\n")
33 assert load_ignore_config(tmp_path) == {}
34
35 def test_global_section_parsed(self, tmp_path: pathlib.Path) -> None:
36 (tmp_path / ".museignore").write_text(
37 '[global]\npatterns = ["*.tmp", "*.bak"]\n'
38 )
39 config = load_ignore_config(tmp_path)
40 assert config.get("global", {}).get("patterns") == ["*.tmp", "*.bak"]
41
42 def test_domain_section_parsed(self, tmp_path: pathlib.Path) -> None:
43 (tmp_path / ".museignore").write_text(
44 '[domain.midi]\npatterns = ["*.bak"]\n'
45 )
46 config = load_ignore_config(tmp_path)
47 domain_map = config.get("domain", {})
48 assert domain_map.get("midi", {}).get("patterns") == ["*.bak"]
49
50 def test_multiple_domain_sections_parsed(self, tmp_path: pathlib.Path) -> None:
51 content = (
52 '[domain.midi]\npatterns = ["*.bak"]\n'
53 '[domain.code]\npatterns = ["__pycache__/"]\n'
54 )
55 (tmp_path / ".museignore").write_text(content)
56 config = load_ignore_config(tmp_path)
57 domain_map = config.get("domain", {})
58 assert domain_map.get("midi", {}).get("patterns") == ["*.bak"]
59 assert domain_map.get("code", {}).get("patterns") == ["__pycache__/"]
60
61 def test_global_and_domain_sections_parsed(self, tmp_path: pathlib.Path) -> None:
62 content = (
63 '[global]\npatterns = ["*.tmp"]\n'
64 '[domain.midi]\npatterns = ["*.bak"]\n'
65 )
66 (tmp_path / ".museignore").write_text(content)
67 config = load_ignore_config(tmp_path)
68 assert config.get("global", {}).get("patterns") == ["*.tmp"]
69 domain_map = config.get("domain", {})
70 assert domain_map.get("midi", {}).get("patterns") == ["*.bak"]
71
72 def test_negation_pattern_preserved(self, tmp_path: pathlib.Path) -> None:
73 (tmp_path / ".museignore").write_text(
74 '[global]\npatterns = ["*.bak", "!keep.bak"]\n'
75 )
76 config = load_ignore_config(tmp_path)
77 assert config.get("global", {}).get("patterns") == ["*.bak", "!keep.bak"]
78
79 def test_invalid_toml_raises_value_error(self, tmp_path: pathlib.Path) -> None:
80 (tmp_path / ".museignore").write_text("this is not valid toml ][")
81 with pytest.raises(ValueError, match=".museignore"):
82 load_ignore_config(tmp_path)
83
84 def test_section_without_patterns_key(self, tmp_path: pathlib.Path) -> None:
85 # A section with no patterns key produces an empty DomainSection.
86 (tmp_path / ".museignore").write_text("[global]\n")
87 config = load_ignore_config(tmp_path)
88 assert config.get("global") == {}
89
90 def test_non_string_patterns_silently_dropped(
91 self, tmp_path: pathlib.Path
92 ) -> None:
93 # Non-string items in the patterns array are silently skipped.
94 (tmp_path / ".museignore").write_text(
95 '[global]\npatterns = ["*.tmp", 42, true, "*.bak"]\n'
96 )
97 config = load_ignore_config(tmp_path)
98 assert config.get("global", {}).get("patterns") == ["*.tmp", "*.bak"]
99
100
101 # ---------------------------------------------------------------------------
102 # resolve_patterns
103 # ---------------------------------------------------------------------------
104
105
106 class TestResolvePatterns:
107 def test_empty_config_returns_empty(self) -> None:
108 config: MuseIgnoreConfig = {}
109 assert resolve_patterns(config, "midi") == []
110
111 def test_global_only(self) -> None:
112 config: MuseIgnoreConfig = {"global": {"patterns": ["*.tmp", ".DS_Store"]}}
113 assert resolve_patterns(config, "midi") == ["*.tmp", ".DS_Store"]
114
115 def test_domain_only(self) -> None:
116 config: MuseIgnoreConfig = {"domain": {"midi": {"patterns": ["*.bak"]}}}
117 assert resolve_patterns(config, "midi") == ["*.bak"]
118
119 def test_global_and_matching_domain_merged(self) -> None:
120 config: MuseIgnoreConfig = {
121 "global": {"patterns": ["*.tmp"]},
122 "domain": {"midi": {"patterns": ["*.bak"]}},
123 }
124 result = resolve_patterns(config, "midi")
125 # Global comes first, then domain-specific.
126 assert result == ["*.tmp", "*.bak"]
127
128 def test_other_domain_patterns_excluded(self) -> None:
129 config: MuseIgnoreConfig = {
130 "global": {"patterns": ["*.tmp"]},
131 "domain": {
132 "midi": {"patterns": ["*.bak"]},
133 "code": {"patterns": ["node_modules/"]},
134 },
135 }
136 # Asking for "midi" — code patterns must not appear.
137 result = resolve_patterns(config, "midi")
138 assert "*.bak" in result
139 assert "node_modules/" not in result
140
141 def test_active_domain_not_in_config_returns_global_only(self) -> None:
142 config: MuseIgnoreConfig = {
143 "global": {"patterns": ["*.tmp"]},
144 "domain": {"midi": {"patterns": ["*.bak"]}},
145 }
146 # Active domain "genomics" has no section — only global patterns.
147 result = resolve_patterns(config, "genomics")
148 assert result == ["*.tmp"]
149
150 def test_global_section_without_patterns_key(self) -> None:
151 config: MuseIgnoreConfig = {"global": {}}
152 assert resolve_patterns(config, "midi") == []
153
154 def test_domain_section_without_patterns_key(self) -> None:
155 config: MuseIgnoreConfig = {"domain": {"midi": {}}}
156 assert resolve_patterns(config, "midi") == []
157
158 def test_order_preserved(self) -> None:
159 config: MuseIgnoreConfig = {
160 "global": {"patterns": ["a", "b", "c"]},
161 "domain": {"midi": {"patterns": ["d", "e"]}},
162 }
163 assert resolve_patterns(config, "midi") == ["a", "b", "c", "d", "e"]
164
165 def test_negation_in_global_preserved(self) -> None:
166 config: MuseIgnoreConfig = {
167 "global": {"patterns": ["*.bak", "!keep.bak"]},
168 }
169 patterns = resolve_patterns(config, "midi")
170 assert patterns == ["*.bak", "!keep.bak"]
171
172 def test_negation_in_domain_overrides_global(self) -> None:
173 # A negation in the domain section can un-ignore a globally ignored path.
174 config: MuseIgnoreConfig = {
175 "global": {"patterns": ["*.bak"]},
176 "domain": {"midi": {"patterns": ["!session.bak"]}},
177 }
178 patterns = resolve_patterns(config, "midi")
179 # session.bak is globally ignored but negated by domain section.
180 assert not is_ignored("session.bak", patterns)
181 # other.bak is globally ignored and not negated.
182 assert is_ignored("other.bak", patterns)
183
184
185 # ---------------------------------------------------------------------------
186 # _matches (internal — gitignore path semantics, unchanged)
187 # ---------------------------------------------------------------------------
188
189
190 class TestMatchesInternal:
191 """Verify the core matching logic in isolation."""
192
193 # ---- Patterns without slash: match any component ----
194
195 def test_ext_pattern_matches_top_level(self) -> None:
196 assert _matches("drums.tmp", "*.tmp")
197
198 def test_ext_pattern_matches_nested(self) -> None:
199 assert _matches("tracks/drums.tmp", "*.tmp")
200
201 def test_ext_pattern_matches_deep_nested(self) -> None:
202 assert _matches("a/b/c/drums.tmp", "*.tmp")
203
204 def test_ext_pattern_no_false_positive(self) -> None:
205 assert not _matches("tracks/drums.mid", "*.tmp")
206
207 def test_exact_name_matches_any_depth(self) -> None:
208 assert _matches("a/b/.DS_Store", ".DS_Store")
209
210 def test_exact_name_top_level(self) -> None:
211 assert _matches(".DS_Store", ".DS_Store")
212
213 # ---- Patterns with slash: match full path from right ----
214
215 def test_dir_ext_matches_direct_child(self) -> None:
216 import pathlib as pl
217 assert _matches(pl.PurePosixPath("tracks/drums.bak"), "tracks/*.bak")
218
219 def test_dir_ext_no_match_different_dir(self) -> None:
220 import pathlib as pl
221 assert not _matches(pl.PurePosixPath("exports/drums.bak"), "tracks/*.bak")
222
223 def test_double_star_matches_nested(self) -> None:
224 import pathlib as pl
225 assert _matches(pl.PurePosixPath("a/b/cache/index.dat"), "**/cache/*.dat")
226
227 def test_double_star_matches_shallow(self) -> None:
228 import pathlib as pl
229 # **/cache/*.dat should match cache/index.dat (** = zero components)
230 assert _matches(pl.PurePosixPath("cache/index.dat"), "**/cache/*.dat")
231
232 # ---- Anchored patterns (leading /) ----
233
234 def test_anchored_matches_root_level(self) -> None:
235 import pathlib as pl
236 assert _matches(pl.PurePosixPath("scratch.mid"), "/scratch.mid")
237
238 def test_anchored_no_match_nested(self) -> None:
239 import pathlib as pl
240 assert not _matches(pl.PurePosixPath("tracks/scratch.mid"), "/scratch.mid")
241
242 def test_anchored_dir_pattern_no_match_file(self) -> None:
243 import pathlib as pl
244 # /renders/*.wav anchored to root
245 assert _matches(pl.PurePosixPath("renders/mix.wav"), "/renders/*.wav")
246 assert not _matches(pl.PurePosixPath("exports/renders/mix.wav"), "/renders/*.wav")
247
248
249 # ---------------------------------------------------------------------------
250 # is_ignored — full rule evaluation with negation (unchanged layer)
251 # ---------------------------------------------------------------------------
252
253
254 class TestIsIgnored:
255 def test_empty_patterns_ignores_nothing(self) -> None:
256 assert not is_ignored("tracks/drums.mid", [])
257
258 def test_simple_ext_ignored(self) -> None:
259 assert is_ignored("drums.tmp", ["*.tmp"])
260
261 def test_simple_ext_nested_ignored(self) -> None:
262 assert is_ignored("tracks/drums.tmp", ["*.tmp"])
263
264 def test_non_matching_not_ignored(self) -> None:
265 assert not is_ignored("drums.mid", ["*.tmp"])
266
267 def test_directory_pattern_ignores_files_inside(self) -> None:
268 # Trailing / means "this directory and all its contents" — files inside
269 # the directory are ignored, matching gitignore semantics.
270 assert is_ignored("renders/mix.wav", ["renders/"])
271 assert is_ignored("renders/deep/session.mid", ["renders/"])
272 assert not is_ignored("other/mix.wav", ["renders/"])
273
274 def test_negation_un_ignores(self) -> None:
275 patterns = ["*.bak", "!keep.bak"]
276 assert is_ignored("session.bak", patterns)
277 assert not is_ignored("keep.bak", patterns)
278
279 def test_negation_nested_un_ignores(self) -> None:
280 patterns = ["*.bak", "!tracks/keeper.bak"]
281 assert is_ignored("tracks/session.bak", patterns)
282 assert not is_ignored("tracks/keeper.bak", patterns)
283
284 def test_last_rule_wins(self) -> None:
285 # First rule ignores, second negates, third re-ignores.
286 patterns = ["*.bak", "!session.bak", "*.bak"]
287 assert is_ignored("session.bak", patterns)
288
289 def test_anchored_pattern_root_only(self) -> None:
290 patterns = ["/scratch.mid"]
291 assert is_ignored("scratch.mid", patterns)
292 assert not is_ignored("tracks/scratch.mid", patterns)
293
294 def test_ds_store_at_any_depth(self) -> None:
295 patterns = [".DS_Store"]
296 assert is_ignored(".DS_Store", patterns)
297 assert is_ignored("tracks/.DS_Store", patterns)
298 assert is_ignored("a/b/c/.DS_Store", patterns)
299
300 def test_double_star_glob(self) -> None:
301 # Match *.pyc at any depth using a no-slash pattern.
302 assert is_ignored("__pycache__/foo.pyc", ["*.pyc"])
303 assert is_ignored("tracks/__pycache__/foo.pyc", ["*.pyc"])
304 # Pattern with embedded slash + ** at start.
305 assert is_ignored("cache/index.dat", ["**/cache/*.dat"])
306 assert is_ignored("a/b/cache/index.dat", ["**/cache/*.dat"])
307
308 def test_multiple_patterns_first_matches(self) -> None:
309 patterns = ["*.tmp", "*.bak"]
310 assert is_ignored("drums.tmp", patterns)
311 assert is_ignored("drums.bak", patterns)
312 assert not is_ignored("drums.mid", patterns)
313
314 def test_negation_before_rule_has_no_effect(self) -> None:
315 # Negation appears before the rule it would override — last rule wins,
316 # so the file ends up ignored.
317 patterns = ["!session.bak", "*.bak"]
318 assert is_ignored("session.bak", patterns)
319
320
321 # ---------------------------------------------------------------------------
322 # Integration: MidiPlugin.snapshot() honours .museignore TOML format
323 # ---------------------------------------------------------------------------
324
325
326 class TestMidiPluginSnapshotIgnore:
327 """End-to-end: .museignore TOML format filters paths during snapshot()."""
328
329 def _make_repo(self, tmp_path: pathlib.Path) -> pathlib.Path:
330 """Create a minimal repo structure with a state/ directory."""
331 workdir = tmp_path
332 return tmp_path
333
334 def test_snapshot_without_museignore_includes_all(
335 self, tmp_path: pathlib.Path
336 ) -> None:
337 from muse.plugins.midi.plugin import MidiPlugin
338
339 root = self._make_repo(tmp_path)
340 workdir = root
341 (workdir / "beat.mid").write_text("data")
342 (workdir / "session.tmp").write_text("temp")
343
344 plugin = MidiPlugin()
345 snap = plugin.snapshot(workdir)
346 assert "beat.mid" in snap["files"]
347 assert "session.tmp" in snap["files"]
348
349 def test_snapshot_excludes_global_pattern(self, tmp_path: pathlib.Path) -> None:
350 from muse.plugins.midi.plugin import MidiPlugin
351
352 root = self._make_repo(tmp_path)
353 workdir = root
354 (workdir / "beat.mid").write_text("data")
355 (workdir / "session.tmp").write_text("temp")
356 (root / ".museignore").write_text('[global]\npatterns = ["*.tmp"]\n')
357
358 plugin = MidiPlugin()
359 snap = plugin.snapshot(workdir)
360 assert "beat.mid" in snap["files"]
361 assert "session.tmp" not in snap["files"]
362
363 def test_snapshot_excludes_domain_specific_pattern(
364 self, tmp_path: pathlib.Path
365 ) -> None:
366 from muse.plugins.midi.plugin import MidiPlugin
367
368 root = self._make_repo(tmp_path)
369 workdir = root
370 (workdir / "beat.mid").write_text("data")
371 (workdir / "session.bak").write_text("backup")
372 (root / ".museignore").write_text(
373 '[domain.midi]\npatterns = ["*.bak"]\n'
374 )
375
376 plugin = MidiPlugin()
377 snap = plugin.snapshot(workdir)
378 assert "beat.mid" in snap["files"]
379 assert "session.bak" not in snap["files"]
380
381 def test_snapshot_domain_isolation_other_domain_ignored(
382 self, tmp_path: pathlib.Path
383 ) -> None:
384 from muse.plugins.midi.plugin import MidiPlugin
385
386 root = self._make_repo(tmp_path)
387 workdir = root
388 (workdir / "beat.mid").write_text("data")
389 (workdir / "requirements.txt").write_text("pytest\n")
390 # code-only ignore — must NOT apply to the midi plugin.
391 (root / ".museignore").write_text(
392 '[domain.code]\npatterns = ["requirements.txt"]\n'
393 )
394
395 plugin = MidiPlugin()
396 snap = plugin.snapshot(workdir)
397 # requirements.txt should remain because the [domain.code] section
398 # does not apply when the active domain is "midi".
399 assert "requirements.txt" in snap["files"]
400 assert "beat.mid" in snap["files"]
401
402 def test_snapshot_negation_keeps_file(self, tmp_path: pathlib.Path) -> None:
403 from muse.plugins.midi.plugin import MidiPlugin
404
405 root = self._make_repo(tmp_path)
406 workdir = root
407 (workdir / "session.tmp").write_text("temp")
408 (workdir / "important.tmp").write_text("keep me")
409 (root / ".museignore").write_text(
410 '[global]\npatterns = ["*.tmp", "!important.tmp"]\n'
411 )
412
413 plugin = MidiPlugin()
414 snap = plugin.snapshot(workdir)
415 assert "session.tmp" not in snap["files"]
416 assert "important.tmp" in snap["files"]
417
418 def test_snapshot_domain_negation_overrides_global(
419 self, tmp_path: pathlib.Path
420 ) -> None:
421 from muse.plugins.midi.plugin import MidiPlugin
422
423 root = self._make_repo(tmp_path)
424 workdir = root
425 (workdir / "session.bak").write_text("backup")
426 content = (
427 '[global]\npatterns = ["*.bak"]\n'
428 '[domain.midi]\npatterns = ["!session.bak"]\n'
429 )
430 (root / ".museignore").write_text(content)
431
432 plugin = MidiPlugin()
433 snap = plugin.snapshot(workdir)
434 # session.bak is globally ignored but un-ignored by the midi domain section.
435 assert "session.bak" in snap["files"]
436
437 def test_snapshot_nested_pattern(self, tmp_path: pathlib.Path) -> None:
438 from muse.plugins.midi.plugin import MidiPlugin
439
440 root = self._make_repo(tmp_path)
441 workdir = root
442 renders = workdir / "renders"
443 renders.mkdir()
444 (workdir / "beat.mid").write_text("data")
445 (renders / "preview.wav").write_text("audio")
446 (root / ".museignore").write_text(
447 '[global]\npatterns = ["renders/*.wav"]\n'
448 )
449
450 plugin = MidiPlugin()
451 snap = plugin.snapshot(workdir)
452 assert "beat.mid" in snap["files"]
453 assert "renders/preview.wav" not in snap["files"]
454
455 def test_snapshot_dotfiles_always_excluded(self, tmp_path: pathlib.Path) -> None:
456 from muse.plugins.midi.plugin import MidiPlugin
457
458 root = self._make_repo(tmp_path)
459 workdir = root
460 (workdir / "beat.mid").write_text("data")
461 (workdir / ".DS_Store").write_bytes(b"\x00" * 16)
462 # No .museignore — dotfiles excluded by the built-in plugin rule.
463
464 plugin = MidiPlugin()
465 snap = plugin.snapshot(workdir)
466 assert "beat.mid" in snap["files"]
467 assert ".DS_Store" not in snap["files"]
468
469 def test_snapshot_with_empty_museignore(self, tmp_path: pathlib.Path) -> None:
470 from muse.plugins.midi.plugin import MidiPlugin
471
472 root = self._make_repo(tmp_path)
473 workdir = root
474 (workdir / "beat.mid").write_text("data")
475 # Valid TOML — just a comment, no sections.
476 (root / ".museignore").write_text("# empty config\n")
477
478 plugin = MidiPlugin()
479 snap = plugin.snapshot(workdir)
480 assert "beat.mid" in snap["files"]
481
482 def test_snapshot_directory_pattern_excludes_files_inside(
483 self, tmp_path: pathlib.Path
484 ) -> None:
485 from muse.plugins.midi.plugin import MidiPlugin
486
487 root = self._make_repo(tmp_path)
488 workdir = root
489 renders = workdir / "renders"
490 renders.mkdir()
491 (renders / "mix.wav").write_text("audio")
492 # Directory pattern ignores all files inside it — gitignore semantics.
493 (root / ".museignore").write_text('[global]\npatterns = ["renders/"]\n')
494
495 plugin = MidiPlugin()
496 snap = plugin.snapshot(workdir)
497 assert "renders/mix.wav" not in snap["files"]
498
499
500 # ---------------------------------------------------------------------------
501 # Regression: _BUILTIN_SECRET_PATTERNS must not exclude .env.example
502 # ---------------------------------------------------------------------------
503
504
505 class TestBuiltinSecretPatterns:
506 """.env.example is the universal convention for a credential-free template.
507 It must never be caught by the builtin secrets blocklist.
508
509 Regression: .env.* was previously in _BUILTIN_SECRET_PATTERNS, which
510 caused walk_workdir / workdir_snapshot to exclude .env.example even when
511 .museignore had no such rule — making ``muse diff`` falsely report it as
512 deleted and blocking ``muse checkout`` with a false dirty-tree error.
513 """
514
515 def test_env_wildcard_not_in_builtin_patterns(self) -> None:
516 assert ".env.*" not in _BUILTIN_SECRET_PATTERNS, (
517 ".env.* must not be in _BUILTIN_SECRET_PATTERNS — it catches "
518 ".env.example, the standard credential-free template convention"
519 )
520
521 def test_env_example_not_ignored_by_builtins(self) -> None:
522 assert not is_ignored(".env.example", _BUILTIN_SECRET_PATTERNS), (
523 ".env.example must not be excluded by the builtin secret patterns"
524 )
525
526 def test_real_secret_files_still_ignored(self) -> None:
527 for path in (".env", ".env.local", ".env.staging", ".env.production",
528 ".env.prod", ".env.development", ".envrc"):
529 assert is_ignored(path, _BUILTIN_SECRET_PATTERNS), (
530 f"{path} must still be excluded by builtin secret patterns"
531 )
532
533 def test_walk_workdir_includes_env_example(self, tmp_path: pathlib.Path) -> None:
534 """walk_workdir must include .env.example with no .museignore present."""
535 muse_dir(tmp_path).mkdir()
536 (repo_json_path(tmp_path)).write_text('{"repo_id": "x", "domain": "code"}')
537 (tmp_path / ".env.example").write_text("DB_PASSWORD=changeme\n")
538 (tmp_path / "app.py").write_text("x = 1\n")
539
540 manifest = walk_workdir(tmp_path)
541 assert ".env.example" in manifest, (
542 "walk_workdir must include .env.example — it is not a secret file"
543 )
544
545 def test_walk_workdir_excludes_real_env_secrets(self, tmp_path: pathlib.Path) -> None:
546 """walk_workdir must still exclude real secret .env files."""
547 muse_dir(tmp_path).mkdir()
548 (repo_json_path(tmp_path)).write_text('{"repo_id": "x", "domain": "code"}')
549 (tmp_path / ".env").write_text("DB_PASSWORD=secret\n")
550 (tmp_path / ".env.local").write_text("DB_PASSWORD=local\n")
551
552 manifest = walk_workdir(tmp_path)
553 assert ".env" not in manifest
554 assert ".env.local" not in manifest
File History 1 commit