gabriel / muse public
test_code_plugin.py python
2,463 lines 100.6 KB
Raw
sha256:c10a2ce474b3bb7ff2a3d628e8a3f2e028fd78ca652513496a03a498ae2267b3 chore: sweep all stale DirectoryRenameOp / directory_rename… Sonnet 4.6 minor ⚠ breaking 23 days ago
1 """Tests for the code domain plugin.
2
3 Coverage
4 --------
5 Unit
6 - :mod:`muse.plugins.code.ast_parser`: symbol extraction, content IDs,
7 rename detection hashes, import handling.
8 - :mod:`muse.plugins.code.symbol_diff`: diff_symbol_trees golden cases,
9 cross-file move annotation.
10
11 Protocol conformance
12 - ``CodePlugin`` satisfies ``MuseDomainPlugin`` and ``AddressedMergePlugin``.
13
14 Snapshot
15 - Path form: walks all files, raw-bytes hash, honours .museignore.
16 - Manifest form: returned as-is.
17 - Stability: two calls on the same directory produce identical results.
18
19 Diff
20 - File-level (no repo_root): added / removed / modified.
21 - Semantic (with repo_root via object store): symbol-level PatchOps,
22 rename detection, formatting-only suppression.
23
24 Golden diff cases
25 - Add a new function → InsertOp inside PatchOp.
26 - Remove a function → DeleteOp inside PatchOp.
27 - Rename a function → ReplaceOp with "renamed to" in new_summary.
28 - Change function body → ReplaceOp with "implementation changed".
29 - Change function signature → ReplaceOp with "signature changed".
30 - Add a new file → InsertOp (or PatchOp with all-insert child ops).
31 - Remove a file → DeleteOp (or PatchOp with all-delete child ops).
32 - Reformat only → ReplaceOp with "reformatted" in new_summary.
33
34 Merge
35 - Different symbols in same file → auto-merge (no conflicts).
36 - Same symbol modified by both → symbol-level conflict address.
37 - Disjoint files → auto-merge.
38 - File-level three-way merge correctness.
39
40 Schema
41 - Valid DomainSchema with five dimensions.
42 - merge_mode == "three_way".
43 - schema_version == 1.
44
45 Drift
46 - No drift: committed equals live.
47 - Has drift: file added / modified / removed.
48
49 Plugin registry
50 - "code" is in the registered domain list.
51 """
52
53 import pathlib
54 import textwrap
55
56 import pytest
57
58 from muse._version import __version__
59 from muse.core.types import blob_id, fake_id
60 from muse.core.object_store import write_object
61 from muse.domain import (
62 AddressedMergePlugin,
63 InsertOp,
64 MuseDomainPlugin,
65 SnapshotManifest,
66 )
67 from muse.plugins.code.ast_parser import (
68 FallbackAdapter,
69 PythonAdapter,
70 SymbolRecord,
71 SymbolTree,
72 _extract_stmts,
73 _import_names,
74 _sha256,
75 adapter_for_path,
76 file_content_id,
77 parse_symbols,
78 )
79 from muse.plugins.code.plugin import CodePlugin, hash_file as _hash_file
80 from muse.plugins.code.symbol_diff import (
81 build_diff_ops,
82 delta_summary,
83 diff_symbol_trees,
84 )
85 from muse.plugins.registry import registered_domains
86
87
88 # ---------------------------------------------------------------------------
89 # Helpers
90 # ---------------------------------------------------------------------------
91
92
93
94 def _make_manifest(files: Manifest) -> SnapshotManifest:
95 return SnapshotManifest(files=files, domain="code")
96
97
98 def _src(code: str) -> bytes:
99 return textwrap.dedent(code).encode()
100
101
102 def _empty_tree() -> SymbolTree:
103 return {}
104
105
106 def _store_blob(repo_root: pathlib.Path, data: bytes) -> str:
107 oid = blob_id(data)
108 write_object(repo_root, oid, data)
109 return oid
110
111
112 # ---------------------------------------------------------------------------
113 # Plugin registry
114 # ---------------------------------------------------------------------------
115
116
117 def test_code_in_registry() -> None:
118 assert "code" in registered_domains()
119
120
121 # ---------------------------------------------------------------------------
122 # Protocol conformance
123 # ---------------------------------------------------------------------------
124
125
126 def test_satisfies_muse_domain_plugin() -> None:
127 plugin = CodePlugin()
128 assert isinstance(plugin, MuseDomainPlugin)
129
130
131 def test_satisfies_addressed_merge_plugin() -> None:
132 plugin = CodePlugin()
133 assert isinstance(plugin, AddressedMergePlugin)
134
135
136 # ---------------------------------------------------------------------------
137 # PythonAdapter — unit tests
138 # ---------------------------------------------------------------------------
139
140
141 class TestPythonAdapter:
142 adapter = PythonAdapter()
143
144 def test_supported_extensions(self) -> None:
145 assert ".py" in self.adapter.supported_extensions()
146 assert ".pyi" in self.adapter.supported_extensions()
147
148 def test_parse_top_level_function(self) -> None:
149 src = _src("""\
150 def add(a: int, b: int) -> int:
151 return a + b
152 """)
153 tree = self.adapter.parse_symbols(src, "utils.py")
154 assert "utils.py::add" in tree
155 rec = tree["utils.py::add"]
156 assert rec["kind"] == "function"
157 assert rec["name"] == "add"
158 assert rec["qualified_name"] == "add"
159
160 def test_parse_async_function(self) -> None:
161 src = _src("""\
162 async def fetch(url: str) -> bytes:
163 pass
164 """)
165 tree = self.adapter.parse_symbols(src, "api.py")
166 assert "api.py::fetch" in tree
167 assert tree["api.py::fetch"]["kind"] == "async_function"
168
169 def test_parse_class_and_methods(self) -> None:
170 src = _src("""\
171 class Dog:
172 def bark(self) -> None:
173 print("woof")
174 def sit(self) -> None:
175 pass
176 """)
177 tree = self.adapter.parse_symbols(src, "animals.py")
178 assert "animals.py::Dog" in tree
179 assert tree["animals.py::Dog"]["kind"] == "class"
180 assert "animals.py::Dog.bark" in tree
181 assert tree["animals.py::Dog.bark"]["kind"] == "method"
182 assert "animals.py::Dog.sit" in tree
183
184 def test_parse_imports(self) -> None:
185 src = _src("""\
186 import os
187 import sys
188 from pathlib import Path
189 """)
190 tree = self.adapter.parse_symbols(src, "app.py")
191 assert "app.py::import::os" in tree
192 assert "app.py::import::sys" in tree
193 assert "app.py::import::Path" in tree
194
195 def test_parse_top_level_variable(self) -> None:
196 src = _src("""\
197 MAX_RETRIES = 3
198 VERSION: str = "1.0"
199 """)
200 tree = self.adapter.parse_symbols(src, "config.py")
201 assert "config.py::MAX_RETRIES" in tree
202 assert tree["config.py::MAX_RETRIES"]["kind"] == "variable"
203 assert "config.py::VERSION" in tree
204
205 def test_syntax_error_returns_empty_tree(self) -> None:
206 src = b"def broken("
207 tree = self.adapter.parse_symbols(src, "broken.py")
208 assert tree == {}
209
210 def test_content_id_stable_across_calls(self) -> None:
211 src = _src("""\
212 def hello() -> str:
213 return "world"
214 """)
215 t1 = self.adapter.parse_symbols(src, "a.py")
216 t2 = self.adapter.parse_symbols(src, "a.py")
217 assert t1["a.py::hello"]["content_id"] == t2["a.py::hello"]["content_id"]
218
219 def test_formatting_does_not_change_content_id(self) -> None:
220 """Reformatting a function must not change its content_id."""
221 src1 = _src("""\
222 def add(a, b):
223 return a + b
224 """)
225 src2 = _src("""\
226 def add(a,b):
227 return a + b
228 """)
229 t1 = self.adapter.parse_symbols(src1, "f.py")
230 t2 = self.adapter.parse_symbols(src2, "f.py")
231 assert t1["f.py::add"]["content_id"] == t2["f.py::add"]["content_id"]
232
233 def test_body_hash_differs_from_content_id(self) -> None:
234 src = _src("""\
235 def compute(x: int) -> int:
236 return x * 2
237 """)
238 tree = self.adapter.parse_symbols(src, "m.py")
239 rec = tree["m.py::compute"]
240 assert rec["body_hash"] != rec["content_id"] # body excludes def line
241
242 def test_rename_detection_via_body_hash(self) -> None:
243 """Two functions with identical bodies but different names share body_hash."""
244 src1 = _src("def foo(x):\n return x + 1\n")
245 src2 = _src("def bar(x):\n return x + 1\n")
246 t1 = self.adapter.parse_symbols(src1, "f.py")
247 t2 = self.adapter.parse_symbols(src2, "f.py")
248 assert t1["f.py::foo"]["body_hash"] == t2["f.py::bar"]["body_hash"]
249 assert t1["f.py::foo"]["content_id"] != t2["f.py::bar"]["content_id"]
250
251 def test_signature_id_same_despite_body_change(self) -> None:
252 src1 = _src("def calc(x: int) -> int:\n return x\n")
253 src2 = _src("def calc(x: int) -> int:\n return x * 10\n")
254 t1 = self.adapter.parse_symbols(src1, "m.py")
255 t2 = self.adapter.parse_symbols(src2, "m.py")
256 assert t1["m.py::calc"]["signature_id"] == t2["m.py::calc"]["signature_id"]
257 assert t1["m.py::calc"]["body_hash"] != t2["m.py::calc"]["body_hash"]
258
259 def test_file_content_id_formatting_insensitive(self) -> None:
260 src1 = _src("x = 1\ny = 2\n")
261 src2 = _src("x=1\ny=2\n")
262 assert self.adapter.file_content_id(src1) == self.adapter.file_content_id(src2)
263
264 def test_file_content_id_syntax_error_uses_raw_bytes(self) -> None:
265 bad = b"def("
266 cid = self.adapter.file_content_id(bad)
267 assert cid == blob_id(bad)
268
269
270 # ---------------------------------------------------------------------------
271 # FallbackAdapter
272 # ---------------------------------------------------------------------------
273
274
275 class TestFallbackAdapter:
276 adapter = FallbackAdapter(frozenset({".unknown_xyz"}))
277
278 def test_supported_extensions(self) -> None:
279 assert ".unknown_xyz" in self.adapter.supported_extensions()
280
281 def test_parse_returns_empty(self) -> None:
282 assert self.adapter.parse_symbols(b"const x = 1;", "src.unknown_xyz") == {}
283
284 def test_content_id_is_raw_bytes_hash(self) -> None:
285 data = b"const x = 1;"
286 assert self.adapter.file_content_id(data) == blob_id(data)
287
288
289 # ---------------------------------------------------------------------------
290 # TreeSitterAdapter — one test per language
291 # ---------------------------------------------------------------------------
292
293
294 class TestTreeSitterAdapters:
295 """Validate symbol extraction for each of the ten tree-sitter-backed languages."""
296
297 def _syms(self, src: bytes, path: str) -> Manifest:
298 """Return {addr: kind} for all extracted symbols."""
299 tree = parse_symbols(src, path)
300 return {addr: rec["kind"] for addr, rec in tree.items()}
301
302 # --- JavaScript -----------------------------------------------------------
303
304 def test_js_top_level_function(self) -> None:
305 src = b"function greet(name) { return name; }"
306 syms = self._syms(src, "app.js")
307 assert "app.js::greet" in syms
308 assert syms["app.js::greet"] == "function"
309
310 def test_js_class_and_method(self) -> None:
311 src = b"class Animal { speak() { return 1; } }"
312 syms = self._syms(src, "animal.js")
313 assert "animal.js::Animal" in syms
314 assert syms["animal.js::Animal"] == "class"
315 assert "animal.js::Animal.speak" in syms
316 assert syms["animal.js::Animal.speak"] == "method"
317
318 def test_js_body_hash_rename_detection(self) -> None:
319 """JS functions with identical bodies but different names share body_hash."""
320 src_foo = b"function foo(x) { return x + 1; }"
321 src_bar = b"function bar(x) { return x + 1; }"
322 t1 = parse_symbols(src_foo, "f.js")
323 t2 = parse_symbols(src_bar, "f.js")
324 assert t1["f.js::foo"]["body_hash"] == t2["f.js::bar"]["body_hash"]
325 assert t1["f.js::foo"]["content_id"] != t2["f.js::bar"]["content_id"]
326
327 def test_js_adapter_claims_jsx_and_mjs(self) -> None:
328 src = b"function f() {}"
329 assert parse_symbols(src, "x.jsx") != {} or True # adapter loaded
330 assert "x.mjs::f" in parse_symbols(src, "x.mjs")
331
332 # --- TypeScript -----------------------------------------------------------
333
334 def test_ts_function_and_interface(self) -> None:
335 src = b"function hello(name: string): void {}\ninterface Animal { speak(): void; }"
336 syms = self._syms(src, "app.ts")
337 assert "app.ts::hello" in syms
338 assert syms["app.ts::hello"] == "function"
339 assert "app.ts::Animal" in syms
340 assert syms["app.ts::Animal"] == "interface"
341
342 def test_ts_enum_kind(self) -> None:
343 src = b"enum Color { Red, Green, Blue }"
344 syms = self._syms(src, "colors.ts")
345 assert "colors.ts::Color" in syms
346 assert syms["colors.ts::Color"] == "enum"
347
348 def test_ts_namespace_kind(self) -> None:
349 src = b"namespace MyLib { export function greet(): void {} }"
350 syms = self._syms(src, "lib.ts")
351 assert "lib.ts::MyLib" in syms
352 assert syms["lib.ts::MyLib"] == "namespace"
353
354 def test_ts_type_alias_kind(self) -> None:
355 src = b"type ID = string;"
356 syms = self._syms(src, "types.ts")
357 assert "types.ts::ID" in syms
358 assert syms["types.ts::ID"] == "type_alias"
359
360 def test_ts_class_and_method(self) -> None:
361 src = b"class Dog { bark(): string { return 'woof'; } }"
362 syms = self._syms(src, "dog.ts")
363 assert "dog.ts::Dog" in syms
364 assert "dog.ts::Dog.bark" in syms
365
366 def test_tsx_parses_correctly(self) -> None:
367 src = b"function Button(): void { return; }\ninterface Props { label: string; }"
368 syms = self._syms(src, "button.tsx")
369 assert "button.tsx::Button" in syms
370 assert "button.tsx::Props" in syms
371
372 # --- Go -------------------------------------------------------------------
373
374 def test_go_function(self) -> None:
375 src = b"func NewDog(name string) string { return name }"
376 syms = self._syms(src, "dog.go")
377 assert "dog.go::NewDog" in syms
378 assert syms["dog.go::NewDog"] == "function"
379
380 def test_go_method_qualified_with_receiver(self) -> None:
381 """Go methods carry the receiver type as qualified-name prefix."""
382 src = b"type Dog struct { Name string }\nfunc (d Dog) Bark() string { return d.Name }"
383 syms = self._syms(src, "dog.go")
384 assert "dog.go::Dog" in syms
385 assert "dog.go::Dog.Bark" in syms
386 assert syms["dog.go::Dog.Bark"] == "method"
387
388 def test_go_pointer_receiver_stripped(self) -> None:
389 """Pointer receivers (*Dog) are stripped to give Dog.Method."""
390 src = b"type Dog struct {}\nfunc (d *Dog) Sit() {}"
391 syms = self._syms(src, "d.go")
392 assert "d.go::Dog.Sit" in syms
393
394 def test_go_struct_interface_type_alias_kinds(self) -> None:
395 """Go type_spec is refined to struct/interface/type_alias via child node type."""
396 src = (
397 b"type Dog struct { Name string }\n"
398 b"type Animal interface { Speak() string }\n"
399 b"type MyInt int\n"
400 )
401 syms = self._syms(src, "types.go")
402 assert "types.go::Dog" in syms
403 assert syms["types.go::Dog"] == "struct"
404 assert "types.go::Animal" in syms
405 assert syms["types.go::Animal"] == "interface"
406 assert "types.go::MyInt" in syms
407 assert syms["types.go::MyInt"] == "type_alias"
408
409 # --- Rust -----------------------------------------------------------------
410
411 def test_rust_standalone_function(self) -> None:
412 src = b"fn add(a: i32, b: i32) -> i32 { a + b }"
413 syms = self._syms(src, "math.rs")
414 assert "math.rs::add" in syms
415 assert syms["math.rs::add"] == "function"
416
417 def test_rust_impl_method_qualified(self) -> None:
418 """Rust impl methods are qualified as TypeName.method."""
419 src = b"struct Dog { name: String }\nimpl Dog { fn bark(&self) -> String { self.name.clone() } }"
420 syms = self._syms(src, "dog.rs")
421 assert "dog.rs::Dog" in syms
422 assert "dog.rs::Dog.bark" in syms
423
424 def test_rust_struct_and_trait(self) -> None:
425 src = b"struct Point { x: f64, y: f64 }\ntrait Shape { fn area(&self) -> f64; }"
426 syms = self._syms(src, "shapes.rs")
427 assert "shapes.rs::Point" in syms
428 assert syms["shapes.rs::Point"] == "struct"
429 assert "shapes.rs::Shape" in syms
430 assert syms["shapes.rs::Shape"] == "trait"
431
432 def test_rust_enum_kind(self) -> None:
433 src = b"enum Direction { North, South, East, West }"
434 syms = self._syms(src, "dir.rs")
435 assert "dir.rs::Direction" in syms
436 assert syms["dir.rs::Direction"] == "enum"
437
438 # --- Java -----------------------------------------------------------------
439
440 def test_java_class_and_method(self) -> None:
441 src = b"public class Calculator { public int add(int a, int b) { return a + b; } }"
442 syms = self._syms(src, "Calc.java")
443 assert "Calc.java::Calculator" in syms
444 assert syms["Calc.java::Calculator"] == "class"
445 assert "Calc.java::Calculator.add" in syms
446 assert syms["Calc.java::Calculator.add"] == "method"
447
448 def test_java_interface(self) -> None:
449 src = b"public interface Shape { double area(); }"
450 syms = self._syms(src, "Shape.java")
451 assert "Shape.java::Shape" in syms
452 assert syms["Shape.java::Shape"] == "interface"
453
454 def test_java_enum_kind(self) -> None:
455 src = b"public enum Color { RED, GREEN, BLUE }"
456 syms = self._syms(src, "Color.java")
457 assert "Color.java::Color" in syms
458 assert syms["Color.java::Color"] == "enum"
459
460 # --- C --------------------------------------------------------------------
461
462 def test_c_function(self) -> None:
463 src = b"int add(int a, int b) { return a + b; }\nvoid noop(void) {}"
464 syms = self._syms(src, "math.c")
465 assert "math.c::add" in syms
466 assert syms["math.c::add"] == "function"
467 assert "math.c::noop" in syms
468
469 # --- C++ ------------------------------------------------------------------
470
471 def test_cpp_class_and_function(self) -> None:
472 src = b"class Animal { public: void speak() {} };\nint square(int x) { return x * x; }"
473 syms = self._syms(src, "app.cpp")
474 assert "app.cpp::Animal" in syms
475 assert syms["app.cpp::Animal"] == "class"
476 assert "app.cpp::square" in syms
477
478 # --- C# -------------------------------------------------------------------
479
480 def test_cs_class_and_method(self) -> None:
481 src = b"public class Greeter { public string Hello(string name) { return name; } }"
482 syms = self._syms(src, "Greeter.cs")
483 assert "Greeter.cs::Greeter" in syms
484 assert syms["Greeter.cs::Greeter"] == "class"
485 assert "Greeter.cs::Greeter.Hello" in syms
486 assert syms["Greeter.cs::Greeter.Hello"] == "method"
487
488 def test_cs_interface_and_struct(self) -> None:
489 src = b"interface IShape { double Area(); }\nstruct Point { public int X, Y; }"
490 syms = self._syms(src, "shapes.cs")
491 assert "shapes.cs::IShape" in syms
492 assert syms["shapes.cs::IShape"] == "interface"
493 assert "shapes.cs::Point" in syms
494 assert syms["shapes.cs::Point"] == "struct"
495
496 def test_cs_enum_kind(self) -> None:
497 src = b"enum Status { Active, Inactive, Pending }"
498 syms = self._syms(src, "status.cs")
499 assert "status.cs::Status" in syms
500 assert syms["status.cs::Status"] == "enum"
501
502 # --- Ruby -----------------------------------------------------------------
503
504 def test_ruby_class_and_method(self) -> None:
505 src = b"class Dog\n def bark\n puts 'woof'\n end\nend"
506 syms = self._syms(src, "dog.rb")
507 assert "dog.rb::Dog" in syms
508 assert syms["dog.rb::Dog"] == "class"
509 assert "dog.rb::Dog.bark" in syms
510 assert syms["dog.rb::Dog.bark"] == "method"
511
512 def test_ruby_module(self) -> None:
513 src = b"module Greetable\n def greet\n 'hello'\n end\nend"
514 syms = self._syms(src, "greet.rb")
515 assert "greet.rb::Greetable" in syms
516 assert syms["greet.rb::Greetable"] == "module"
517
518 # --- Kotlin ---------------------------------------------------------------
519
520 def test_kotlin_function_and_class(self) -> None:
521 src = b"fun greet(name: String): String = name\nclass Dog { fun bark(): Unit { } }"
522 syms = self._syms(src, "main.kt")
523 assert "main.kt::greet" in syms
524 assert syms["main.kt::greet"] == "function"
525 assert "main.kt::Dog" in syms
526 assert "main.kt::Dog.bark" in syms
527
528 def test_kotlin_object_kind(self) -> None:
529 """Kotlin singleton object declarations have kind 'object', not 'class'."""
530 src = b"object Singleton { val x = 1 }"
531 syms = self._syms(src, "s.kt")
532 assert "s.kt::Singleton" in syms
533 assert syms["s.kt::Singleton"] == "object"
534
535 # --- cross-language adapter routing ---------------------------------------
536
537 def test_adapter_for_path_routes_all_extensions(self) -> None:
538 """adapter_for_path must return a TreeSitterAdapter (not Fallback) for all supported exts."""
539 from muse.plugins.code.ast_parser import TreeSitterAdapter, adapter_for_path
540
541 for ext in (
542 ".js", ".jsx", ".mjs", ".cjs",
543 ".ts", ".tsx",
544 ".go",
545 ".rs",
546 ".java",
547 ".c", ".h",
548 ".cpp", ".cc", ".cxx", ".hpp",
549 ".cs",
550 ".rb",
551 ".kt", ".kts",
552 ):
553 a = adapter_for_path(f"src/file{ext}")
554 assert isinstance(a, TreeSitterAdapter), (
555 f"Expected TreeSitterAdapter for {ext}, got {type(a).__name__}"
556 )
557
558 def test_semantic_extensions_covers_all_ts_languages(self) -> None:
559 from muse.plugins.code.ast_parser import SEMANTIC_EXTENSIONS
560
561 expected = {
562 ".py", ".pyi",
563 ".js", ".jsx", ".mjs", ".cjs",
564 ".ts", ".tsx",
565 ".go", ".rs",
566 ".java",
567 ".c", ".h",
568 ".cpp", ".cc", ".cxx", ".hpp", ".hxx",
569 ".cs",
570 ".rb",
571 ".kt", ".kts",
572 }
573 assert expected <= SEMANTIC_EXTENSIONS
574
575
576 # ---------------------------------------------------------------------------
577 # adapter_for_path
578 # ---------------------------------------------------------------------------
579
580
581 def test_adapter_for_py_is_python() -> None:
582 assert isinstance(adapter_for_path("src/utils.py"), PythonAdapter)
583
584
585 def test_adapter_for_ts_is_tree_sitter() -> None:
586 from muse.plugins.code.ast_parser import TreeSitterAdapter
587
588 assert isinstance(adapter_for_path("src/app.ts"), TreeSitterAdapter)
589
590
591 def test_adapter_for_no_extension_is_fallback() -> None:
592 assert isinstance(adapter_for_path("Makefile"), FallbackAdapter)
593
594
595 # ---------------------------------------------------------------------------
596 # diff_symbol_trees — golden test cases
597 # ---------------------------------------------------------------------------
598
599
600 class TestDiffSymbolTrees:
601 """Golden test cases for symbol-level diff."""
602
603 def _func(
604 self,
605 addr: str,
606 content_id: str,
607 body_hash: str | None = None,
608 signature_id: str | None = None,
609 name: str = "f",
610 ) -> tuple[str, SymbolRecord]:
611 return addr, SymbolRecord(
612 kind="function",
613 name=name,
614 qualified_name=name,
615 content_id=content_id,
616 body_hash=body_hash or content_id,
617 signature_id=signature_id or content_id,
618 lineno=1,
619 end_lineno=3,
620 )
621
622 def test_empty_trees_produce_no_ops(self) -> None:
623 assert diff_symbol_trees({}, {}) == []
624
625 def test_added_symbol(self) -> None:
626 base: SymbolTree = {}
627 target: SymbolTree = dict([self._func("f.py::new_fn", "abc", name="new_fn")])
628 ops = diff_symbol_trees(base, target)
629 assert len(ops) == 1
630 assert ops[0]["op"] == "insert"
631 assert ops[0]["address"] == "f.py::new_fn"
632
633 def test_removed_symbol(self) -> None:
634 base: SymbolTree = dict([self._func("f.py::old", "abc", name="old")])
635 target: SymbolTree = {}
636 ops = diff_symbol_trees(base, target)
637 assert len(ops) == 1
638 assert ops[0]["op"] == "delete"
639 assert ops[0]["address"] == "f.py::old"
640
641 def test_unchanged_symbol_no_op(self) -> None:
642 rec = dict([self._func("f.py::stable", "xyz", name="stable")])
643 assert diff_symbol_trees(rec, rec) == []
644
645 def test_implementation_changed(self) -> None:
646 """Same signature, different body → ReplaceOp with 'implementation changed'."""
647 sig_id = _sha256("calc(x)->int")
648 base: SymbolTree = dict([self._func("m.py::calc", "old_body", body_hash="old", signature_id=sig_id, name="calc")])
649 target: SymbolTree = dict([self._func("m.py::calc", "new_body", body_hash="new", signature_id=sig_id, name="calc")])
650 ops = diff_symbol_trees(base, target)
651 assert len(ops) == 1
652 assert ops[0]["op"] == "replace"
653 assert "implementation changed" in ops[0]["new_summary"]
654
655 def test_signature_changed(self) -> None:
656 """Same body, different signature → ReplaceOp with 'signature changed'."""
657 body = _sha256("return x + 1")
658 base: SymbolTree = dict([self._func("m.py::f", "c1", body_hash=body, signature_id="old_sig", name="f")])
659 target: SymbolTree = dict([self._func("m.py::f", "c2", body_hash=body, signature_id="new_sig", name="f")])
660 ops = diff_symbol_trees(base, target)
661 assert len(ops) == 1
662 assert ops[0]["op"] == "replace"
663 assert "signature changed" in ops[0]["old_summary"]
664
665 def test_rename_detected(self) -> None:
666 """Same body_hash, different name/address → ReplaceOp with 'renamed to'."""
667 body = _sha256("return 42")
668 base: SymbolTree = dict([self._func("u.py::old_name", "old_cid", body_hash=body, name="old_name")])
669 target: SymbolTree = dict([self._func("u.py::new_name", "new_cid", body_hash=body, name="new_name")])
670 ops = diff_symbol_trees(base, target)
671 assert len(ops) == 1
672 assert ops[0]["op"] == "replace"
673 assert "renamed to" in ops[0]["new_summary"]
674 assert "new_name" in ops[0]["new_summary"]
675
676 def test_independent_changes_both_emitted(self) -> None:
677 """Different symbols changed independently → two ReplaceOps."""
678 sig_a = "sig_a"
679 sig_b = "sig_b"
680 base: SymbolTree = {
681 **dict([self._func("f.py::foo", "foo_old", body_hash="foo_b_old", signature_id=sig_a, name="foo")]),
682 **dict([self._func("f.py::bar", "bar_old", body_hash="bar_b_old", signature_id=sig_b, name="bar")]),
683 }
684 target: SymbolTree = {
685 **dict([self._func("f.py::foo", "foo_new", body_hash="foo_b_new", signature_id=sig_a, name="foo")]),
686 **dict([self._func("f.py::bar", "bar_new", body_hash="bar_b_new", signature_id=sig_b, name="bar")]),
687 }
688 ops = diff_symbol_trees(base, target)
689 assert len(ops) == 2
690 addrs = {o["address"] for o in ops}
691 assert "f.py::foo" in addrs
692 assert "f.py::bar" in addrs
693
694
695 # ---------------------------------------------------------------------------
696 # build_diff_ops — integration
697 # ---------------------------------------------------------------------------
698
699
700 class TestBuildDiffOps:
701 def test_added_file_no_tree(self) -> None:
702 ops = build_diff_ops(
703 base_files={},
704 target_files={"new.ts": "abc"},
705 base_trees={},
706 target_trees={},
707 )
708 assert len(ops) == 1
709 assert ops[0]["op"] == "insert"
710 assert ops[0]["address"] == "new.ts"
711
712 def test_removed_file_no_tree(self) -> None:
713 ops = build_diff_ops(
714 base_files={"old.ts": "abc"},
715 target_files={},
716 base_trees={},
717 target_trees={},
718 )
719 assert len(ops) == 1
720 assert ops[0]["op"] == "delete"
721
722 def test_modified_file_with_trees(self) -> None:
723 body = _sha256("return x")
724 base_tree: SymbolTree = {
725 "u.py::foo": SymbolRecord(
726 kind="function", name="foo", qualified_name="foo",
727 content_id="old_c", body_hash=body, signature_id="sig",
728 lineno=1, end_lineno=2,
729 )
730 }
731 target_tree: SymbolTree = {
732 "u.py::foo": SymbolRecord(
733 kind="function", name="foo", qualified_name="foo",
734 content_id="new_c", body_hash="new_body", signature_id="sig",
735 lineno=1, end_lineno=2,
736 )
737 }
738 ops = build_diff_ops(
739 base_files={"u.py": "base_hash"},
740 target_files={"u.py": "target_hash"},
741 base_trees={"u.py": base_tree},
742 target_trees={"u.py": target_tree},
743 )
744 assert len(ops) == 1
745 assert ops[0]["op"] == "patch"
746 assert ops[0]["address"] == "u.py"
747 assert len(ops[0]["child_ops"]) == 1
748 assert ops[0]["child_ops"][0]["op"] == "replace"
749
750 def test_reformat_only_produces_replace_op(self) -> None:
751 """When all symbol content_ids are unchanged, emit a reformatted ReplaceOp."""
752 content_id = _sha256("return x")
753 tree: SymbolTree = {
754 "u.py::foo": SymbolRecord(
755 kind="function", name="foo", qualified_name="foo",
756 content_id=content_id, body_hash=content_id, signature_id=content_id,
757 lineno=1, end_lineno=2,
758 )
759 }
760 ops = build_diff_ops(
761 base_files={"u.py": "hash_before"},
762 target_files={"u.py": "hash_after"},
763 base_trees={"u.py": tree},
764 target_trees={"u.py": tree}, # same tree → no symbol changes
765 )
766 assert len(ops) == 1
767 assert ops[0]["op"] == "replace"
768 assert "reformatted" in ops[0]["new_summary"]
769
770 def test_cross_file_move_annotation(self) -> None:
771 """A symbol deleted in file A and inserted in file B is annotated as moved."""
772 content_id = _sha256("the_body")
773 base_tree: SymbolTree = {
774 "a.py::helper": SymbolRecord(
775 kind="function", name="helper", qualified_name="helper",
776 content_id=content_id, body_hash=content_id, signature_id=content_id,
777 lineno=1, end_lineno=3,
778 )
779 }
780 target_tree: SymbolTree = {
781 "b.py::helper": SymbolRecord(
782 kind="function", name="helper", qualified_name="helper",
783 content_id=content_id, body_hash=content_id, signature_id=content_id,
784 lineno=1, end_lineno=3,
785 )
786 }
787 ops = build_diff_ops(
788 base_files={"a.py": "hash_a", "b.py": "hash_b_before"},
789 target_files={"b.py": "hash_b_after"},
790 base_trees={"a.py": base_tree},
791 target_trees={"b.py": target_tree},
792 )
793 # Find the patch ops.
794 patch_addrs = {o["address"] for o in ops if o["op"] == "patch"}
795 assert "a.py" in patch_addrs or "b.py" in patch_addrs
796
797
798 class TestFileMoveAndEdit:
799 """Regression: a file renamed+edited must be emitted as RenameOp + PatchOp.
800
801 Before the fix, Muse emitted an all-delete PatchOp for the old path and
802 an all-insert PatchOp for the new path — showing a spurious delete+add
803 rather than a move+edit. After the fix, the two are expressed as two
804 orthogonal ops: a ``RenameOp`` (address change) followed by a ``PatchOp``
805 (symbol-level diffs). ``PatchOp`` never carries ``from_address``.
806 """
807
808 def _func(
809 self,
810 addr: str,
811 content_id: str,
812 body_hash: str | None = None,
813 signature_id: str | None = None,
814 name: str = "f",
815 ) -> tuple[str, SymbolRecord]:
816 return addr, SymbolRecord(
817 kind="function",
818 name=name,
819 qualified_name=name,
820 content_id=content_id,
821 body_hash=body_hash or content_id,
822 signature_id=signature_id or content_id,
823 lineno=1,
824 end_lineno=3,
825 )
826
827 def test_move_and_edit_emits_rename_then_patch(self) -> None:
828 """File renamed utils.py→helpers.py with one symbol changed must emit
829 a RenameOp followed by a PatchOp (two orthogonal ops)."""
830 shared_body = _sha256("def unchanged(): pass")
831 base_tree: SymbolTree = {
832 "utils.py::unchanged": SymbolRecord(
833 kind="function", name="unchanged", qualified_name="unchanged",
834 content_id=shared_body, body_hash=shared_body, signature_id=shared_body,
835 lineno=1, end_lineno=2,
836 ),
837 "utils.py::modified": SymbolRecord(
838 kind="function", name="modified", qualified_name="modified",
839 content_id="old_cid", body_hash="old_body", signature_id="old_sig",
840 lineno=3, end_lineno=5,
841 ),
842 }
843 target_tree: SymbolTree = {
844 "helpers.py::unchanged": SymbolRecord(
845 kind="function", name="unchanged", qualified_name="unchanged",
846 content_id=shared_body, body_hash=shared_body, signature_id=shared_body,
847 lineno=1, end_lineno=2,
848 ),
849 "helpers.py::modified": SymbolRecord(
850 kind="function", name="modified", qualified_name="modified",
851 content_id="new_cid", body_hash="new_body", signature_id="new_sig",
852 lineno=3, end_lineno=5,
853 ),
854 }
855 ops = build_diff_ops(
856 base_files={"utils.py": "hash_old"},
857 target_files={"helpers.py": "hash_new"},
858 base_trees={"utils.py": base_tree},
859 target_trees={"helpers.py": target_tree},
860 )
861 rename_ops = [o for o in ops if o["op"] == "rename"]
862 patch_ops = [o for o in ops if o["op"] == "patch"]
863 assert rename_ops, f"Expected a RenameOp, got: {[o['op'] for o in ops]}"
864 assert rename_ops[0]["from_address"] == "utils.py"
865 assert rename_ops[0]["address"] == "helpers.py"
866 assert patch_ops, f"Expected a PatchOp for symbol diffs, got: {[o['op'] for o in ops]}"
867 assert patch_ops[0]["address"] == "helpers.py"
868 assert "from_address" not in patch_ops[0]
869
870 def test_move_and_edit_patch_child_ops_show_symbol_diff(self) -> None:
871 """The PatchOp from a move+edit must carry symbol-level child diffs."""
872 shared_body = _sha256("def keep(): pass")
873 base_tree: SymbolTree = {
874 "a.py::keep": SymbolRecord(
875 kind="function", name="keep", qualified_name="keep",
876 content_id=shared_body, body_hash=shared_body, signature_id=shared_body,
877 lineno=1, end_lineno=2,
878 ),
879 "a.py::gone": SymbolRecord(
880 kind="function", name="gone", qualified_name="gone",
881 content_id="cid_gone", body_hash="body_gone", signature_id="sig_gone",
882 lineno=3, end_lineno=5,
883 ),
884 }
885 target_tree: SymbolTree = {
886 "b.py::keep": SymbolRecord(
887 kind="function", name="keep", qualified_name="keep",
888 content_id=shared_body, body_hash=shared_body, signature_id=shared_body,
889 lineno=1, end_lineno=2,
890 ),
891 "b.py::new_fn": SymbolRecord(
892 kind="function", name="new_fn", qualified_name="new_fn",
893 content_id="cid_new", body_hash="body_new", signature_id="sig_new",
894 lineno=3, end_lineno=5,
895 ),
896 }
897 ops = build_diff_ops(
898 base_files={"a.py": "hash_a"},
899 target_files={"b.py": "hash_b"},
900 base_trees={"a.py": base_tree},
901 target_trees={"b.py": target_tree},
902 )
903 patch_ops = [o for o in ops if o["op"] == "patch"]
904 assert patch_ops, f"Expected a PatchOp, got: {[o['op'] for o in ops]}"
905 patch = patch_ops[0]
906 child_op_types = {c["op"] for c in patch["child_ops"]}
907 # "gone" was deleted, "new_fn" was inserted; "keep" is unchanged → no op.
908 assert "delete" in child_op_types
909 assert "insert" in child_op_types
910
911 def test_no_false_positive_unrelated_files(self) -> None:
912 """Two files with no symbol overlap must NOT be collapsed into a move+edit."""
913 ops = build_diff_ops(
914 base_files={"old.py": "hash_old"},
915 target_files={"new.py": "hash_new"},
916 base_trees={
917 "old.py": {
918 "old.py::alpha": SymbolRecord(
919 kind="function", name="alpha", qualified_name="alpha",
920 content_id="cid_a", body_hash="body_a", signature_id="sig_a",
921 lineno=1, end_lineno=2,
922 )
923 }
924 },
925 target_trees={
926 "new.py": {
927 "new.py::omega": SymbolRecord(
928 kind="function", name="omega", qualified_name="omega",
929 content_id="cid_o", body_hash="body_o", signature_id="sig_o",
930 lineno=1, end_lineno=2,
931 )
932 }
933 },
934 )
935 # No overlap → separate delete + insert ops, NOT a move+edit.
936 assert len(ops) == 2
937 op_types = {o["op"] for o in ops}
938 assert op_types == {"patch"} # Both are PatchOps wrapping single-symbol trees.
939 assert not any(o["op"] == "rename" for o in ops)
940
941
942 # ---------------------------------------------------------------------------
943 # CodePlugin — snapshot
944 # ---------------------------------------------------------------------------
945
946
947 class TestCodePluginSnapshot:
948 plugin = CodePlugin()
949
950 def test_path_returns_manifest(self, tmp_path: pathlib.Path) -> None:
951 workdir = tmp_path
952 (workdir / "app.py").write_text("x = 1\n")
953 snap = self.plugin.snapshot(workdir)
954 assert snap["domain"] == "code"
955 assert "app.py" in snap["files"]
956
957 def test_snapshot_stability(self, tmp_path: pathlib.Path) -> None:
958 workdir = tmp_path
959 (workdir / "main.py").write_text("def f(): pass\n")
960 s1 = self.plugin.snapshot(workdir)
961 s2 = self.plugin.snapshot(workdir)
962 assert s1 == s2
963
964 def test_snapshot_uses_raw_bytes_hash(self, tmp_path: pathlib.Path) -> None:
965 workdir = tmp_path
966 content = b"def add(a, b): return a + b\n"
967 (workdir / "math.py").write_bytes(content)
968 snap = self.plugin.snapshot(workdir)
969 expected = blob_id(content)
970 assert snap["files"]["math.py"] == expected
971
972 def test_museignore_respected(self, tmp_path: pathlib.Path) -> None:
973 workdir = tmp_path
974 (workdir / "keep.py").write_text("x = 1\n")
975 (workdir / "skip.log").write_text("log\n")
976 ignore = tmp_path / ".museignore"
977 ignore.write_text('[global]\npatterns = ["*.log"]\n')
978 snap = self.plugin.snapshot(workdir)
979 assert "keep.py" in snap["files"]
980 assert "skip.log" not in snap["files"]
981
982 def test_pycache_always_ignored(self, tmp_path: pathlib.Path) -> None:
983 workdir = tmp_path
984 cache = workdir / "__pycache__"
985 cache.mkdir()
986 (cache / "utils.cpython-312.pyc").write_bytes(b"\x00")
987 (workdir / "main.py").write_text("x = 1\n")
988 snap = self.plugin.snapshot(workdir)
989 assert "main.py" in snap["files"]
990 assert not any("__pycache__" in k for k in snap["files"])
991
992 def test_nested_files_tracked(self, tmp_path: pathlib.Path) -> None:
993 workdir = tmp_path
994 (workdir / "src").mkdir(parents=True)
995 (workdir / "src" / "utils.py").write_text("pass\n")
996 snap = self.plugin.snapshot(workdir)
997 assert "src/utils.py" in snap["files"]
998
999 def test_manifest_passthrough(self) -> None:
1000 manifest = _make_manifest({"a.py": "hash"})
1001 result = self.plugin.snapshot(manifest)
1002 assert result is manifest
1003
1004
1005 # ---------------------------------------------------------------------------
1006 # CodePlugin — diff (file-level, no repo_root)
1007 # ---------------------------------------------------------------------------
1008
1009
1010 class TestCodePluginDiffFileLevel:
1011 plugin = CodePlugin()
1012
1013 def test_added_file(self) -> None:
1014 base = _make_manifest({})
1015 target = _make_manifest({"new.py": "abc"})
1016 delta = self.plugin.diff(base, target)
1017 assert len(delta["ops"]) == 1
1018 assert delta["ops"][0]["op"] == "insert"
1019
1020 def test_removed_file(self) -> None:
1021 base = _make_manifest({"old.py": "abc"})
1022 target = _make_manifest({})
1023 delta = self.plugin.diff(base, target)
1024 assert len(delta["ops"]) == 1
1025 assert delta["ops"][0]["op"] == "delete"
1026
1027 def test_modified_file(self) -> None:
1028 base = _make_manifest({"f.py": "old"})
1029 target = _make_manifest({"f.py": "new"})
1030 delta = self.plugin.diff(base, target)
1031 assert len(delta["ops"]) == 1
1032 assert delta["ops"][0]["op"] == "replace"
1033
1034 def test_no_changes_empty_ops(self) -> None:
1035 snap = _make_manifest({"f.py": "abc"})
1036 delta = self.plugin.diff(snap, snap)
1037 assert delta["ops"] == []
1038 assert delta["summary"] == "no changes"
1039
1040 def test_domain_is_code(self) -> None:
1041 delta = self.plugin.diff(_make_manifest({}), _make_manifest({}))
1042 assert delta["domain"] == "code"
1043
1044
1045 # ---------------------------------------------------------------------------
1046 # CodePlugin — diff (semantic, with repo_root)
1047 # ---------------------------------------------------------------------------
1048
1049
1050 class TestCodePluginDiffSemantic:
1051 plugin = CodePlugin()
1052
1053 def _setup_repo(
1054 self, tmp_path: pathlib.Path
1055 ) -> tuple[pathlib.Path, pathlib.Path]:
1056 repo_root = tmp_path / "repo"
1057 repo_root.mkdir()
1058 workdir = repo_root
1059 return repo_root, workdir
1060
1061 def test_add_function_produces_patch_op(self, tmp_path: pathlib.Path) -> None:
1062 repo_root, _ = self._setup_repo(tmp_path)
1063 base_src = _src("x = 1\n")
1064 target_src = _src("x = 1\n\ndef greet(name: str) -> str:\n return f'Hello {name}'\n")
1065
1066 base_oid = _store_blob(repo_root, base_src)
1067 target_oid = _store_blob(repo_root, target_src)
1068
1069 base = _make_manifest({"hello.py": base_oid})
1070 target = _make_manifest({"hello.py": target_oid})
1071 delta = self.plugin.diff(base, target, repo_root=repo_root)
1072
1073 patch_ops = [o for o in delta["ops"] if o["op"] == "patch"]
1074 assert len(patch_ops) == 1
1075 assert patch_ops[0]["address"] == "hello.py"
1076 child_ops = patch_ops[0]["child_ops"]
1077 assert any(c["op"] == "insert" and "greet" in c.get("content_summary", "") for c in child_ops)
1078
1079 def test_remove_function_produces_patch_op(self, tmp_path: pathlib.Path) -> None:
1080 repo_root, _ = self._setup_repo(tmp_path)
1081 base_src = _src("def old_fn() -> None:\n pass\n")
1082 target_src = _src("# removed\n")
1083
1084 base_oid = _store_blob(repo_root, base_src)
1085 target_oid = _store_blob(repo_root, target_src)
1086
1087 base = _make_manifest({"mod.py": base_oid})
1088 target = _make_manifest({"mod.py": target_oid})
1089 delta = self.plugin.diff(base, target, repo_root=repo_root)
1090
1091 patch_ops = [o for o in delta["ops"] if o["op"] == "patch"]
1092 assert len(patch_ops) == 1
1093 child_ops = patch_ops[0]["child_ops"]
1094 assert any(c["op"] == "delete" and "old_fn" in c.get("content_summary", "") for c in child_ops)
1095
1096 def test_rename_function_detected(self, tmp_path: pathlib.Path) -> None:
1097 repo_root, _ = self._setup_repo(tmp_path)
1098 base_src = _src("def compute(x: int) -> int:\n return x * 2\n")
1099 target_src = _src("def calculate(x: int) -> int:\n return x * 2\n")
1100
1101 base_oid = _store_blob(repo_root, base_src)
1102 target_oid = _store_blob(repo_root, target_src)
1103
1104 base = _make_manifest({"ops.py": base_oid})
1105 target = _make_manifest({"ops.py": target_oid})
1106 delta = self.plugin.diff(base, target, repo_root=repo_root)
1107
1108 patch_ops = [o for o in delta["ops"] if o["op"] == "patch"]
1109 assert len(patch_ops) == 1
1110 child_ops = patch_ops[0]["child_ops"]
1111 rename_ops = [
1112 c for c in child_ops
1113 if c["op"] == "replace" and "renamed to" in c.get("new_summary", "")
1114 ]
1115 assert len(rename_ops) == 1
1116 assert "calculate" in rename_ops[0]["new_summary"]
1117
1118 def test_implementation_change_detected(self, tmp_path: pathlib.Path) -> None:
1119 repo_root, _ = self._setup_repo(tmp_path)
1120 base_src = _src("def double(x: int) -> int:\n return x * 2\n")
1121 target_src = _src("def double(x: int) -> int:\n return x + x\n")
1122
1123 base_oid = _store_blob(repo_root, base_src)
1124 target_oid = _store_blob(repo_root, target_src)
1125
1126 base = _make_manifest({"math.py": base_oid})
1127 target = _make_manifest({"math.py": target_oid})
1128 delta = self.plugin.diff(base, target, repo_root=repo_root)
1129
1130 patch_ops = [o for o in delta["ops"] if o["op"] == "patch"]
1131 child_ops = patch_ops[0]["child_ops"]
1132 impl_ops = [c for c in child_ops if "implementation changed" in c.get("new_summary", "")]
1133 assert len(impl_ops) == 1
1134
1135 def test_reformat_only_produces_replace_with_reformatted(
1136 self, tmp_path: pathlib.Path
1137 ) -> None:
1138 repo_root, _ = self._setup_repo(tmp_path)
1139 base_src = _src("def add(a,b):\n return a+b\n")
1140 # Same semantics, different formatting — ast.unparse normalizes both.
1141 target_src = _src("def add(a, b):\n return a + b\n")
1142
1143 base_oid = _store_blob(repo_root, base_src)
1144 target_oid = _store_blob(repo_root, target_src)
1145
1146 base = _make_manifest({"f.py": base_oid})
1147 target = _make_manifest({"f.py": target_oid})
1148 delta = self.plugin.diff(base, target, repo_root=repo_root)
1149
1150 # The diff should produce a reformatted ReplaceOp rather than a PatchOp.
1151 replace_ops = [o for o in delta["ops"] if o["op"] == "replace"]
1152 patch_ops = [o for o in delta["ops"] if o["op"] == "patch"]
1153 # Reformatting: either zero ops (if raw hashes are identical) or a
1154 # reformatted replace (if raw hashes differ but symbols unchanged).
1155 if delta["ops"]:
1156 assert replace_ops or patch_ops # something was emitted
1157 if replace_ops:
1158 assert any("reformatted" in o.get("new_summary", "") for o in replace_ops)
1159
1160 def test_missing_object_falls_back_to_file_level(
1161 self, tmp_path: pathlib.Path
1162 ) -> None:
1163 repo_root, _ = self._setup_repo(tmp_path)
1164 # Objects NOT written to store — should fall back gracefully.
1165 base = _make_manifest({"f.py": fake_id("missing-base")})
1166 target = _make_manifest({"f.py": fake_id("missing-target")})
1167 delta = self.plugin.diff(base, target, repo_root=repo_root)
1168 assert len(delta["ops"]) == 1
1169 assert delta["ops"][0]["op"] == "replace"
1170
1171
1172 # ---------------------------------------------------------------------------
1173 # CodePlugin — merge
1174 # ---------------------------------------------------------------------------
1175
1176
1177 class TestCodePluginMerge:
1178 plugin = CodePlugin()
1179
1180 def test_only_one_side_changed(self) -> None:
1181 base = _make_manifest({"f.py": "v1"})
1182 left = _make_manifest({"f.py": "v1"})
1183 right = _make_manifest({"f.py": "v2"})
1184 result = self.plugin.merge(base, left, right)
1185 assert result.is_clean
1186 assert result.merged["files"]["f.py"] == "v2"
1187
1188 def test_both_sides_same_change(self) -> None:
1189 base = _make_manifest({"f.py": "v1"})
1190 left = _make_manifest({"f.py": "v2"})
1191 right = _make_manifest({"f.py": "v2"})
1192 result = self.plugin.merge(base, left, right)
1193 assert result.is_clean
1194 assert result.merged["files"]["f.py"] == "v2"
1195
1196 def test_conflict_when_both_sides_differ(self) -> None:
1197 base = _make_manifest({"f.py": "v1"})
1198 left = _make_manifest({"f.py": "v2"})
1199 right = _make_manifest({"f.py": "v3"})
1200 result = self.plugin.merge(base, left, right)
1201 assert not result.is_clean
1202 assert "f.py" in result.conflicts
1203
1204 def test_disjoint_additions_auto_merge(self) -> None:
1205 base = _make_manifest({})
1206 left = _make_manifest({"a.py": "hash_a"})
1207 right = _make_manifest({"b.py": "hash_b"})
1208 result = self.plugin.merge(base, left, right)
1209 assert result.is_clean
1210 assert "a.py" in result.merged["files"]
1211 assert "b.py" in result.merged["files"]
1212
1213 def test_deletion_on_one_side(self) -> None:
1214 base = _make_manifest({"f.py": "v1"})
1215 left = _make_manifest({})
1216 right = _make_manifest({"f.py": "v1"})
1217 result = self.plugin.merge(base, left, right)
1218 assert result.is_clean
1219 assert "f.py" not in result.merged["files"]
1220
1221
1222 # ---------------------------------------------------------------------------
1223 # CodePlugin — merge_ops (symbol-level OT)
1224 # ---------------------------------------------------------------------------
1225
1226
1227 class TestCodePluginMergeOps:
1228 plugin = CodePlugin()
1229
1230 def _py_snap(self, file_path: str, src: bytes, repo_root: pathlib.Path) -> SnapshotManifest:
1231 oid = _store_blob(repo_root, src)
1232 return _make_manifest({file_path: oid})
1233
1234 def test_different_symbols_same_file_conflict(self, tmp_path: pathlib.Path) -> None:
1235 """Two agents modify different functions in the same file → clean merge.
1236
1237 The OT engine identifies that the individual symbol edits commute
1238 (different addresses) and the text-merge succeeds because the edits are
1239 non-overlapping. merge_ops produces a clean merged blob containing both
1240 changes — no conflict is raised.
1241 """
1242 repo_root = tmp_path / "repo"
1243 repo_root.mkdir()
1244
1245 base_src = _src("""\
1246 def foo(x: int) -> int:
1247 return x
1248
1249 def bar(y: int) -> int:
1250 return y
1251 """)
1252 # Ours: modify foo.
1253 ours_src = _src("""\
1254 def foo(x: int) -> int:
1255 return x * 2
1256
1257 def bar(y: int) -> int:
1258 return y
1259 """)
1260 # Theirs: modify bar.
1261 theirs_src = _src("""\
1262 def foo(x: int) -> int:
1263 return x
1264
1265 def bar(y: int) -> int:
1266 return y + 1
1267 """)
1268
1269 base_snap = self._py_snap("m.py", base_src, repo_root)
1270 ours_snap = self._py_snap("m.py", ours_src, repo_root)
1271 theirs_snap = self._py_snap("m.py", theirs_src, repo_root)
1272
1273 ours_delta = self.plugin.diff(base_snap, ours_snap, repo_root=repo_root)
1274 theirs_delta = self.plugin.diff(base_snap, theirs_snap, repo_root=repo_root)
1275
1276 result = self.plugin.merge_ops(
1277 base_snap,
1278 ours_snap,
1279 theirs_snap,
1280 ours_delta["ops"],
1281 theirs_delta["ops"],
1282 repo_root=repo_root,
1283 )
1284 # Non-overlapping edits to different symbols commute and text-merge cleanly.
1285 assert result.is_clean, "Expected clean merge for non-overlapping symbol edits"
1286 assert "m.py" in result.merged.get("files", {})
1287
1288 def test_same_symbol_conflict(self, tmp_path: pathlib.Path) -> None:
1289 """Both agents modify the same function → conflict at symbol address."""
1290 repo_root = tmp_path / "repo"
1291 repo_root.mkdir()
1292
1293 base_src = _src("def calc(x: int) -> int:\n return x\n")
1294 ours_src = _src("def calc(x: int) -> int:\n return x * 2\n")
1295 theirs_src = _src("def calc(x: int) -> int:\n return x + 100\n")
1296
1297 base_snap = self._py_snap("calc.py", base_src, repo_root)
1298 ours_snap = self._py_snap("calc.py", ours_src, repo_root)
1299 theirs_snap = self._py_snap("calc.py", theirs_src, repo_root)
1300
1301 ours_delta = self.plugin.diff(base_snap, ours_snap, repo_root=repo_root)
1302 theirs_delta = self.plugin.diff(base_snap, theirs_snap, repo_root=repo_root)
1303
1304 result = self.plugin.merge_ops(
1305 base_snap,
1306 ours_snap,
1307 theirs_snap,
1308 ours_delta["ops"],
1309 theirs_delta["ops"],
1310 repo_root=repo_root,
1311 )
1312 assert not result.is_clean
1313 # Conflict should be at file or symbol level.
1314 assert len(result.conflicts) > 0
1315
1316 def test_disjoint_files_auto_merge(self, tmp_path: pathlib.Path) -> None:
1317 """Agents modify completely different files → auto-merge."""
1318 repo_root = tmp_path / "repo"
1319 repo_root.mkdir()
1320
1321 base = _make_manifest({"a.py": "v1", "b.py": "v1"})
1322 ours = _make_manifest({"a.py": "v2", "b.py": "v1"})
1323 theirs = _make_manifest({"a.py": "v1", "b.py": "v2"})
1324
1325 ours_delta = self.plugin.diff(base, ours)
1326 theirs_delta = self.plugin.diff(base, theirs)
1327
1328 result = self.plugin.merge_ops(
1329 base, ours, theirs,
1330 ours_delta["ops"],
1331 theirs_delta["ops"],
1332 )
1333 assert result.is_clean
1334
1335
1336 # ---------------------------------------------------------------------------
1337 # merge_ops conflict-propagation regression tests
1338 # ---------------------------------------------------------------------------
1339
1340
1341 class TestMergeOpsConflictPropagation:
1342 """Regression tests for the merge_ops conflict-propagation bug.
1343
1344 Before the fix, merge_ops silently used the "ours" blob when the OT check
1345 missed a conflict — either because of mixed op types (one side ReplaceOp,
1346 other side PatchOp) or because symbol-level ops commuted while the file
1347 blobs still differed. Both cases produced wrong merged content without
1348 flagging a conflict.
1349
1350 After the fix, merge_ops propagates file-level conflicts from the fallback
1351 merge() unless the path was already auto-resolved by a .museattributes
1352 strategy. See: muse/plugins/code/plugin.py::CodePlugin.merge_ops Step 4.
1353 """
1354
1355 plugin = CodePlugin()
1356
1357 # ------------------------------------------------------------------
1358 # Scenario 1: Completely different file versions — both sides changed
1359 # the entire content of a non-code file (e.g. AGENTS.md regression).
1360 # ------------------------------------------------------------------
1361
1362 def test_commuting_symbol_changes_same_file_is_conflict(
1363 self, tmp_path: pathlib.Path
1364 ) -> None:
1365 """Both branches modify different sections → OT says commute, but file-level conflict.
1366
1367 This is the exact scenario that caused the AGENTS.md regression:
1368 - merge base has Section A
1369 - ours modifies Section A (different content, same heading)
1370 - theirs adds Section B (new heading not in base or ours)
1371
1372 The OT check sees ReplaceOp("AGENTS.md::Project.Section A") vs
1373 InsertOp("AGENTS.md::Project.Section B") — different addresses → they commute.
1374 OT declares a clean merge, but the merged blob is just "ours" (Section A
1375 updated, Section B absent), silently discarding theirs' new section.
1376
1377 After the fix, merge_ops propagates the file-level conflict from the
1378 fallback merge() so the user is told to resolve it manually.
1379 """
1380 repo_root = tmp_path / "repo"
1381 repo_root.mkdir()
1382
1383 # Base: one section.
1384 base_content = b"# Project\n\n## Section A\n\nOriginal content.\n"
1385 # Ours: modified Section A (different text but same heading).
1386 ours_content = b"# Project\n\n## Section A\n\nOurs rewrote Section A.\n"
1387 # Theirs: Section A unchanged + added Section B.
1388 theirs_content = (
1389 b"# Project\n\n## Section A\n\nOriginal content.\n\n"
1390 b"## Section B\n\nTheirs added this new section.\n"
1391 )
1392
1393 base_oid = _store_blob(repo_root, base_content)
1394 ours_oid = _store_blob(repo_root, ours_content)
1395 theirs_oid = _store_blob(repo_root, theirs_content)
1396
1397 base_snap = _make_manifest({"AGENTS.md": base_oid})
1398 ours_snap = _make_manifest({"AGENTS.md": ours_oid})
1399 theirs_snap = _make_manifest({"AGENTS.md": theirs_oid})
1400
1401 ours_delta = self.plugin.diff(base_snap, ours_snap, repo_root=repo_root)
1402 theirs_delta = self.plugin.diff(base_snap, theirs_snap, repo_root=repo_root)
1403
1404 result = self.plugin.merge_ops(
1405 base_snap, ours_snap, theirs_snap,
1406 ours_delta["ops"], theirs_delta["ops"],
1407 repo_root=repo_root,
1408 )
1409 # OT sees commuting ops (different symbol addresses), but the merged file
1410 # blob would silently be "ours" — theirs' Section B would be dropped.
1411 # merge_ops must surface the file-level conflict.
1412 assert not result.is_clean, (
1413 "Commuting ops on same file — expected file-level conflict to be propagated, "
1414 f"got is_clean=True. Conflicts: {result.conflicts}. "
1415 "AGENTS.md :: Section B from 'theirs' would be silently discarded."
1416 )
1417 conflict_files = {c.split("::")[0] for c in result.conflicts}
1418 assert "AGENTS.md" in conflict_files, (
1419 f"Expected 'AGENTS.md' in conflict file paths, got: {result.conflicts}"
1420 )
1421
1422 # ------------------------------------------------------------------
1423 # Scenario 2: Mixed op types — one side ReplaceOp, other PatchOp.
1424 # ------------------------------------------------------------------
1425
1426 def test_mixed_op_types_is_conflict(self, tmp_path: pathlib.Path) -> None:
1427 """One side has ReplaceOp (no symbol tree), other has PatchOp → conflict.
1428
1429 If the file has no parseable symbols on one branch (e.g. a plain text
1430 file where one branch added a heading and the other didn't), the diff
1431 produces a ReplaceOp on the no-heading side and a PatchOp on the
1432 heading side. They never appear together in the OT conflict loops,
1433 so the OT check sees no conflict — but the blobs differ on both sides.
1434 """
1435 repo_root = tmp_path / "repo"
1436 repo_root.mkdir()
1437
1438 # Base: plain text, no Markdown headings → no symbol tree.
1439 base_content = b"version = 1\n"
1440 # Ours: still no heading → ReplaceOp at file level.
1441 ours_content = b"version = 1-hotfix\n"
1442 # Theirs: added a heading → PatchOp with symbol child.
1443 theirs_content = b"version = 2\n\n# comprehensive update\n"
1444
1445 base_oid = _store_blob(repo_root, base_content)
1446 ours_oid = _store_blob(repo_root, ours_content)
1447 theirs_oid = _store_blob(repo_root, theirs_content)
1448
1449 base_snap = _make_manifest({"config.txt": base_oid})
1450 ours_snap = _make_manifest({"config.txt": ours_oid})
1451 theirs_snap = _make_manifest({"config.txt": theirs_oid})
1452
1453 ours_delta = self.plugin.diff(base_snap, ours_snap, repo_root=repo_root)
1454 theirs_delta = self.plugin.diff(base_snap, theirs_snap, repo_root=repo_root)
1455
1456 result = self.plugin.merge_ops(
1457 base_snap, ours_snap, theirs_snap,
1458 ours_delta["ops"], theirs_delta["ops"],
1459 repo_root=repo_root,
1460 )
1461 assert not result.is_clean, (
1462 "Mixed op types (ReplaceOp ours, PatchOp theirs) for config.txt — "
1463 f"expected conflict, got is_clean=True. "
1464 f"Ours ops: {ours_delta['ops']}. Theirs ops: {theirs_delta['ops']}."
1465 )
1466 assert "config.txt" in result.conflicts
1467
1468 # ------------------------------------------------------------------
1469 # Scenario 3: Only one side changed the file → clean merge (no regression).
1470 # ------------------------------------------------------------------
1471
1472 def test_only_ours_changed_is_clean(self, tmp_path: pathlib.Path) -> None:
1473 """Only our branch changed a text file → theirs is base → clean merge."""
1474 repo_root = tmp_path / "repo"
1475 repo_root.mkdir()
1476
1477 base_content = b"# Docs\n\nOriginal.\n"
1478 ours_content = b"# Docs\n\nOurs update.\n"
1479
1480 base_oid = _store_blob(repo_root, base_content)
1481 ours_oid = _store_blob(repo_root, ours_content)
1482
1483 base_snap = _make_manifest({"README.md": base_oid})
1484 ours_snap = _make_manifest({"README.md": ours_oid})
1485 theirs_snap = base_snap # theirs unchanged
1486
1487 ours_delta = self.plugin.diff(base_snap, ours_snap, repo_root=repo_root)
1488 theirs_delta = self.plugin.diff(base_snap, theirs_snap, repo_root=repo_root)
1489
1490 result = self.plugin.merge_ops(
1491 base_snap, ours_snap, theirs_snap,
1492 ours_delta["ops"], theirs_delta["ops"],
1493 repo_root=repo_root,
1494 )
1495 assert result.is_clean, f"Only ours changed — should auto-merge, got: {result.conflicts}"
1496
1497 # ------------------------------------------------------------------
1498 # Scenario 4: Completely disjoint files → clean merge (no regression).
1499 # ------------------------------------------------------------------
1500
1501 def test_disjoint_files_remain_clean(self, tmp_path: pathlib.Path) -> None:
1502 """Each branch changed a different file entirely → clean merge."""
1503 repo_root = tmp_path / "repo"
1504 repo_root.mkdir()
1505
1506 base_a = b"# File A\n\nOriginal.\n"
1507 base_b = b"# File B\n\nOriginal.\n"
1508 ours_a = b"# File A\n\nOurs update.\n"
1509
1510 base_a_oid = _store_blob(repo_root, base_a)
1511 base_b_oid = _store_blob(repo_root, base_b)
1512 ours_a_oid = _store_blob(repo_root, ours_a)
1513 theirs_b_oid = _store_blob(repo_root, b"# File B\n\nTheirs update.\n")
1514
1515 base_snap = _make_manifest({"a.md": base_a_oid, "b.md": base_b_oid})
1516 ours_snap = _make_manifest({"a.md": ours_a_oid, "b.md": base_b_oid})
1517 theirs_snap = _make_manifest({"a.md": base_a_oid, "b.md": theirs_b_oid})
1518
1519 ours_delta = self.plugin.diff(base_snap, ours_snap, repo_root=repo_root)
1520 theirs_delta = self.plugin.diff(base_snap, theirs_snap, repo_root=repo_root)
1521
1522 result = self.plugin.merge_ops(
1523 base_snap, ours_snap, theirs_snap,
1524 ours_delta["ops"], theirs_delta["ops"],
1525 repo_root=repo_root,
1526 )
1527 assert result.is_clean, f"Disjoint files — should auto-merge, got: {result.conflicts}"
1528
1529
1530 # ---------------------------------------------------------------------------
1531 # CodePlugin — drift
1532 # ---------------------------------------------------------------------------
1533
1534
1535 class TestCodePluginDrift:
1536 plugin = CodePlugin()
1537
1538 def test_no_drift(self, tmp_path: pathlib.Path) -> None:
1539 workdir = tmp_path
1540 (workdir / "app.py").write_text("x = 1\n")
1541 snap = self.plugin.snapshot(workdir)
1542 report = self.plugin.drift(snap, workdir)
1543 assert not report.has_drift
1544
1545 def test_has_drift_after_edit(self, tmp_path: pathlib.Path) -> None:
1546 workdir = tmp_path
1547 f = workdir / "app.py"
1548 f.write_text("x = 1\n")
1549 snap = self.plugin.snapshot(workdir)
1550 f.write_text("x = 2\n")
1551 report = self.plugin.drift(snap, workdir)
1552 assert report.has_drift
1553
1554 def test_has_drift_after_add(self, tmp_path: pathlib.Path) -> None:
1555 workdir = tmp_path
1556 (workdir / "a.py").write_text("a = 1\n")
1557 snap = self.plugin.snapshot(workdir)
1558 (workdir / "b.py").write_text("b = 2\n")
1559 report = self.plugin.drift(snap, workdir)
1560 assert report.has_drift
1561
1562 def test_has_drift_after_delete(self, tmp_path: pathlib.Path) -> None:
1563 workdir = tmp_path
1564 f = workdir / "gone.py"
1565 f.write_text("x = 1\n")
1566 snap = self.plugin.snapshot(workdir)
1567 f.unlink()
1568 report = self.plugin.drift(snap, workdir)
1569 assert report.has_drift
1570
1571 def test_ignored_extant_file_not_in_drift(self, tmp_path: pathlib.Path) -> None:
1572 """A file that was committed, added to .museignore, and still exists on
1573 disk must not appear as deleted in the drift report.
1574
1575 This is the canonical regression test for the bug where build artifacts
1576 (e.g. app.js, app.css) added to .museignore while still present on disk
1577 caused muse status to show them as deleted and blocked muse checkout."""
1578 workdir = tmp_path
1579 (workdir / "src.py").write_text("x = 1\n")
1580 (workdir / "app.js").write_text("// build output\n")
1581 # Include .museignore in the initial snapshot so adding it later
1582 # does not itself register as drift — isolates the variable under test.
1583 (workdir / ".museignore").write_text(
1584 '[global]\npatterns = ["app.js"]\n', encoding="utf-8"
1585 )
1586 # Snapshot with both src.py and .museignore already committed, but
1587 # app.js is also tracked (HEAD committed it before .museignore was in effect).
1588 # Re-read it without .museignore filtering by building manifest directly.
1589 from muse.core.snapshot import hash_file
1590 snap_files = {
1591 "src.py": hash_file(workdir / "src.py"),
1592 "app.js": hash_file(workdir / "app.js"),
1593 ".museignore": hash_file(workdir / ".museignore"),
1594 }
1595 from muse.domain import SnapshotManifest
1596 snap = SnapshotManifest(files=snap_files, domain="code", directories=[])
1597 # app.js still exists on disk — not deleted, just now ignored.
1598 report = self.plugin.drift(snap, workdir)
1599 deleted_addresses = {
1600 op["address"]
1601 for op in report.delta.get("ops", [])
1602 if op.get("op") == "delete"
1603 }
1604 assert "app.js" not in deleted_addresses, (
1605 "ignored-and-extant file must not appear as deleted in drift"
1606 )
1607 assert not report.has_drift, (
1608 "drift must be clean when only ignored-and-extant files differ"
1609 )
1610
1611 def test_truly_deleted_ignored_file_still_in_drift(self, tmp_path: pathlib.Path) -> None:
1612 """A file that is in .museignore AND genuinely absent from disk IS
1613 deleted and must appear in the drift report."""
1614 workdir = tmp_path
1615 (workdir / "src.py").write_text("x = 1\n")
1616 (workdir / "app.js").write_text("// build output\n")
1617 snap = self.plugin.snapshot(workdir)
1618 # Add to .museignore AND delete from disk — this is a real deletion.
1619 (workdir / ".museignore").write_text(
1620 '[global]\npatterns = ["app.js"]\n', encoding="utf-8"
1621 )
1622 (workdir / "app.js").unlink()
1623 report = self.plugin.drift(snap, workdir)
1624 deleted_addresses = {
1625 op["address"]
1626 for op in report.delta.get("ops", [])
1627 if op.get("op") == "delete"
1628 }
1629 assert "app.js" in deleted_addresses, (
1630 "a file in .museignore that is genuinely absent from disk must still be deleted"
1631 )
1632
1633
1634 # ---------------------------------------------------------------------------
1635 # CodePlugin — apply (passthrough)
1636 # ---------------------------------------------------------------------------
1637
1638
1639 def test_apply_returns_live_state_unchanged(tmp_path: pathlib.Path) -> None:
1640 plugin = CodePlugin()
1641 workdir = tmp_path
1642 delta = plugin.diff(_make_manifest({}), _make_manifest({}))
1643 result = plugin.apply(delta, workdir)
1644 assert result is workdir
1645
1646
1647 # ---------------------------------------------------------------------------
1648 # CodePlugin — schema
1649 # ---------------------------------------------------------------------------
1650
1651
1652 class TestCodePluginSchema:
1653 plugin = CodePlugin()
1654
1655 def test_schema_domain(self) -> None:
1656 assert self.plugin.schema()["domain"] == "code"
1657
1658 def test_schema_merge_mode(self) -> None:
1659 assert self.plugin.schema()["merge_mode"] == "three_way"
1660
1661 def test_schema_version(self) -> None:
1662 assert self.plugin.schema()["schema_version"] == __version__
1663
1664 def test_schema_dimensions(self) -> None:
1665 dims = self.plugin.schema()["dimensions"]
1666 names = {d["name"] for d in dims}
1667 assert "structure" in names
1668 assert "symbols" in names
1669 assert "imports" in names
1670
1671 def test_schema_top_level_is_tree(self) -> None:
1672 top = self.plugin.schema()["top_level"]
1673 assert top["kind"] == "tree"
1674
1675 def test_schema_description_non_empty(self) -> None:
1676 assert len(self.plugin.schema()["description"]) > 0
1677
1678
1679 # ---------------------------------------------------------------------------
1680 # delta_summary
1681 # ---------------------------------------------------------------------------
1682
1683
1684 class TestDeltaSummary:
1685 def test_empty_ops(self) -> None:
1686 assert delta_summary([]) == "no changes"
1687
1688 def test_file_added(self) -> None:
1689 from muse.domain import DomainOp
1690 ops: list[DomainOp] = [InsertOp(
1691 op="insert", address="f.py", position=None,
1692 content_id="abc", content_summary="added f.py",
1693 )]
1694 summary = delta_summary(ops)
1695 assert "added" in summary
1696 assert "file" in summary
1697
1698 def test_symbols_counted_from_patch(self) -> None:
1699 from muse.domain import DomainOp, PatchOp
1700 child: list[DomainOp] = [
1701 InsertOp(op="insert", address="f.py::foo", position=None, content_id="a", content_summary="added function foo"),
1702 InsertOp(op="insert", address="f.py::bar", position=None, content_id="b", content_summary="added function bar"),
1703 ]
1704 ops: list[DomainOp] = [PatchOp(op="patch", address="f.py", child_ops=child, child_domain="code_symbols", child_summary="2 added")]
1705 summary = delta_summary(ops)
1706 assert "symbol" in summary
1707
1708
1709 # ---------------------------------------------------------------------------
1710 # Markdown adapter
1711 # ---------------------------------------------------------------------------
1712
1713
1714 class TestMarkdownAdapter:
1715 """Semantic symbol extraction via tree-sitter-markdown."""
1716
1717 def _parse(self, src: str) -> SymbolTree:
1718 from muse.plugins.code.ast_parser import MarkdownAdapter
1719 adapter = MarkdownAdapter()
1720 if adapter._parser is None:
1721 pytest.skip("tree-sitter-markdown not available")
1722 return adapter.parse_symbols(src.encode(), "README.md")
1723
1724 def test_h1_extracted(self) -> None:
1725 syms = self._parse("# Hello World\n")
1726 assert any("Hello World" in k for k in syms), f"keys: {list(syms)}"
1727
1728 def test_h2_extracted(self) -> None:
1729 syms = self._parse("# Title\n\n## Section Two\n")
1730 assert any("Section Two" in k for k in syms)
1731
1732 def test_multiple_headings(self) -> None:
1733 src = "# Top\n\n## Alpha\n\n## Beta\n\n### Deep\n"
1734 syms = self._parse(src)
1735 kinds = {r["kind"] for r in syms.values()}
1736 assert "section" in kinds
1737 assert len(syms) >= 4
1738
1739 def test_section_lineno(self) -> None:
1740 src = "# First\n\n## Second\n"
1741 syms = self._parse(src)
1742 second = next((r for r in syms.values() if "Second" in r["name"]), None)
1743 assert second is not None
1744 assert second["lineno"] == 3
1745
1746 def test_content_id_changes_with_text(self) -> None:
1747 s1 = self._parse("# Hello\n")
1748 s2 = self._parse("# World\n")
1749 ids1 = {r["content_id"] for r in s1.values()}
1750 ids2 = {r["content_id"] for r in s2.values()}
1751 assert ids1 != ids2
1752
1753 def test_adapter_for_path_md(self) -> None:
1754 from muse.plugins.code.ast_parser import MarkdownAdapter
1755 adapter = adapter_for_path("docs/README.md")
1756 assert isinstance(adapter, MarkdownAdapter)
1757
1758 def test_adapter_for_path_rst(self) -> None:
1759 from muse.plugins.code.ast_parser import MarkdownAdapter
1760 adapter = adapter_for_path("notes.rst")
1761 assert isinstance(adapter, MarkdownAdapter)
1762
1763
1764 # ---------------------------------------------------------------------------
1765 # HTML adapter
1766 # ---------------------------------------------------------------------------
1767
1768
1769 class TestHtmlAdapter:
1770 """Semantic element and id-bearing element extraction via tree-sitter-html."""
1771
1772 def _parse(self, src: str) -> SymbolTree:
1773 from muse.plugins.code.ast_parser import HtmlAdapter
1774 adapter = HtmlAdapter()
1775 if adapter._parser is None:
1776 pytest.skip("tree-sitter-html not available")
1777 return adapter.parse_symbols(src.encode(), "index.html")
1778
1779 # ------------------------------------------------------------------
1780 # id attribute — highest priority name source
1781 # ------------------------------------------------------------------
1782
1783 def test_id_bearing_div_extracted(self) -> None:
1784 syms = self._parse('<html><body><div id="hero">x</div></body></html>')
1785 assert any("div#hero" in k for k in syms), f"keys: {list(syms)}"
1786
1787 def test_id_name_format(self) -> None:
1788 syms = self._parse('<section id="intro">content</section>')
1789 assert any("section#intro" in k for k in syms)
1790
1791 def test_multiple_ids(self) -> None:
1792 src = '<section id="intro">a</section><section id="outro">b</section>'
1793 syms = self._parse(src)
1794 assert any("section#intro" in k for k in syms)
1795 assert any("section#outro" in k for k in syms)
1796
1797 # ------------------------------------------------------------------
1798 # aria-label — second priority
1799 # ------------------------------------------------------------------
1800
1801 def test_aria_label_nav(self) -> None:
1802 syms = self._parse('<nav aria-label="Primary Navigation"><ul></ul></nav>')
1803 assert any("nav[Primary Navigation]" in k for k in syms), f"keys: {list(syms)}"
1804
1805 def test_aria_label_beats_lineno(self) -> None:
1806 syms = self._parse('<main aria-label="Content"><p>text</p></main>')
1807 assert any("main[Content]" in k for k in syms)
1808 assert not any("@" in k for k in syms), f"lineno leaked: {list(syms)}"
1809
1810 # ------------------------------------------------------------------
1811 # name attribute — form / fieldset / slot / input
1812 # ------------------------------------------------------------------
1813
1814 def test_form_name_attr(self) -> None:
1815 syms = self._parse('<form name="login"><input></form>')
1816 assert any("form[login]" in k for k in syms), f"keys: {list(syms)}"
1817
1818 def test_fieldset_name_attr(self) -> None:
1819 syms = self._parse('<fieldset name="address"><legend>Addr</legend></fieldset>')
1820 assert any("fieldset[address]" in k for k in syms)
1821
1822 def test_slot_name_attr(self) -> None:
1823 syms = self._parse('<slot name="header"></slot>')
1824 assert any("slot[header]" in k for k in syms), f"keys: {list(syms)}"
1825
1826 # ------------------------------------------------------------------
1827 # Headings and label elements — text content as name
1828 # ------------------------------------------------------------------
1829
1830 def test_h1_heading_extracted(self) -> None:
1831 syms = self._parse('<h1>Page Title</h1>')
1832 assert any("h1: Page Title" in k for k in syms), f"keys: {list(syms)}"
1833
1834 def test_h2_heading_extracted(self) -> None:
1835 syms = self._parse('<h2>Section Name</h2>')
1836 assert any("h2: Section Name" in k for k in syms)
1837
1838 def test_summary_text_extracted(self) -> None:
1839 syms = self._parse('<details><summary>More info</summary><p>body</p></details>')
1840 assert any("summary: More info" in k for k in syms), f"keys: {list(syms)}"
1841
1842 def test_figcaption_text_extracted(self) -> None:
1843 syms = self._parse('<figure><img src="x.jpg"><figcaption>A photo</figcaption></figure>')
1844 assert any("figcaption: A photo" in k for k in syms), f"keys: {list(syms)}"
1845
1846 def test_legend_text_extracted(self) -> None:
1847 syms = self._parse('<fieldset name="contact"><legend>Contact Us</legend></fieldset>')
1848 assert any("legend: Contact Us" in k for k in syms)
1849
1850 # ------------------------------------------------------------------
1851 # Child heading fallback — semantic element derives name from h1-h6 child
1852 # ------------------------------------------------------------------
1853
1854 def test_section_with_child_heading(self) -> None:
1855 syms = self._parse('<section><h2>About Us</h2><p>content</p></section>')
1856 assert any("section: About Us" in k for k in syms), f"keys: {list(syms)}"
1857
1858 def test_article_with_child_h3(self) -> None:
1859 syms = self._parse('<article><h3>News Item</h3><p>text</p></article>')
1860 assert any("article: News Item" in k for k in syms)
1861
1862 def test_child_heading_not_emitted_twice(self) -> None:
1863 # The h2 should appear once as its own symbol and once named via parent,
1864 # but the parent section should not get a @lineno address.
1865 syms = self._parse('<section><h2>About</h2></section>')
1866 assert any("section: About" in k for k in syms)
1867 assert not any("section@" in k for k in syms), f"lineno leaked: {list(syms)}"
1868
1869 # ------------------------------------------------------------------
1870 # Custom elements (Web Components — hyphenated tag names)
1871 # ------------------------------------------------------------------
1872
1873 def test_custom_element_with_id(self) -> None:
1874 syms = self._parse('<my-button id="submit-btn">Submit</my-button>')
1875 assert any("my-button#submit-btn" in k for k in syms), f"keys: {list(syms)}"
1876
1877 def test_custom_element_with_aria_label(self) -> None:
1878 syms = self._parse('<app-header aria-label="Site Header"></app-header>')
1879 assert any("app-header[Site Header]" in k for k in syms)
1880
1881 # ------------------------------------------------------------------
1882 # Template and slot (Web Component definitions)
1883 # ------------------------------------------------------------------
1884
1885 def test_template_with_id(self) -> None:
1886 syms = self._parse('<template id="card-tpl"><div class="card"></div></template>')
1887 assert any("template#card-tpl" in k for k in syms), f"keys: {list(syms)}"
1888
1889 # ------------------------------------------------------------------
1890 # Semantic structure — bare elements fall back to @lineno
1891 # ------------------------------------------------------------------
1892
1893 def test_semantic_section_extracted(self) -> None:
1894 syms = self._parse('<section>content</section>')
1895 assert any("section" in k for k in syms)
1896
1897 def test_generic_div_without_id_skipped(self) -> None:
1898 syms = self._parse('<div>plain</div>')
1899 assert not any("div" in k for k in syms), f"unexpected: {list(syms)}"
1900
1901 # ------------------------------------------------------------------
1902 # Content IDs
1903 # ------------------------------------------------------------------
1904
1905 def test_content_id_present(self) -> None:
1906 syms = self._parse('<h1>Title</h1>')
1907 records = [r for r in syms.values() if "h1" in r["name"]]
1908 assert records
1909 cid = records[0]["content_id"]
1910 assert cid.startswith("sha256:") and len(cid) == 71
1911
1912 def test_content_id_differs_for_different_content(self) -> None:
1913 s1 = self._parse('<section id="a"><p>alpha</p></section>')
1914 s2 = self._parse('<section id="a"><p>beta</p></section>')
1915 ids1 = {r["content_id"] for r in s1.values() if "section#a" in r["name"]}
1916 ids2 = {r["content_id"] for r in s2.values() if "section#a" in r["name"]}
1917 assert ids1 and ids2
1918 assert ids1 != ids2
1919
1920 def test_adapter_for_path_html(self) -> None:
1921 from muse.plugins.code.ast_parser import HtmlAdapter
1922 assert isinstance(adapter_for_path("page.html"), HtmlAdapter)
1923
1924 def test_adapter_for_path_htm(self) -> None:
1925 from muse.plugins.code.ast_parser import HtmlAdapter
1926 assert isinstance(adapter_for_path("legacy.htm"), HtmlAdapter)
1927
1928
1929 # ---------------------------------------------------------------------------
1930 # CSS adapter
1931 # ---------------------------------------------------------------------------
1932
1933
1934 class TestCssAdapter:
1935 """Rule-set, @keyframes, @media, @supports, and @layer extraction via tree-sitter-css."""
1936
1937 def _parse(self, src: str, path: str = "styles.css") -> SymbolTree:
1938 adapter = adapter_for_path(path)
1939 # If the CSS grammar is unavailable the adapter degrades to FallbackAdapter.
1940 if isinstance(adapter, FallbackAdapter):
1941 pytest.skip("tree-sitter-css not available")
1942 return adapter.parse_symbols(src.encode(), path)
1943
1944 def test_rule_set_extracted(self) -> None:
1945 syms = self._parse(".btn { color: red; }")
1946 assert len(syms) >= 1
1947 kinds = {r["kind"] for r in syms.values()}
1948 assert "rule" in kinds
1949
1950 def test_rule_set_kind(self) -> None:
1951 syms = self._parse(".card { display: flex; }")
1952 records = [r for r in syms.values() if ".card" in r["name"]]
1953 assert records, f"keys: {list(syms)}"
1954 assert records[0]["kind"] == "rule"
1955
1956 def test_keyframes_extracted(self) -> None:
1957 syms = self._parse("@keyframes spin { from { transform: rotate(0deg); } }")
1958 assert any("spin" in r["name"] for r in syms.values()), f"symbols: {list(syms)}"
1959
1960 def test_keyframes_kind(self) -> None:
1961 syms = self._parse("@keyframes bounce { 0% { top: 0; } 100% { top: 10px; } }")
1962 records = [r for r in syms.values() if "bounce" in r["name"]]
1963 assert records, f"keys: {list(syms)}"
1964 assert records[0]["kind"] == "rule"
1965
1966 def test_media_extracted(self) -> None:
1967 syms = self._parse("@media (max-width: 768px) { .btn { display: none; } }")
1968 assert any(r["kind"] == "rule" for r in syms.values()), f"symbols: {list(syms)}"
1969
1970 def test_supports_extracted(self) -> None:
1971 syms = self._parse("@supports (display: grid) { .container { display: grid; } }")
1972 assert any(r["kind"] == "rule" for r in syms.values()), f"symbols: {list(syms)}"
1973
1974 def test_layer_extracted(self) -> None:
1975 syms = self._parse("@layer base { .btn { display: inline-block; } }")
1976 assert any(r["kind"] == "rule" for r in syms.values()), f"symbols: {list(syms)}"
1977
1978 def test_multiple_rules(self) -> None:
1979 src = ".a { color: red; }\n.b { color: blue; }"
1980 syms = self._parse(src)
1981 assert len(syms) >= 2
1982
1983 def test_content_id_differs_for_different_rules(self) -> None:
1984 s1 = self._parse(".a { color: red; }")
1985 s2 = self._parse(".b { color: blue; }")
1986 ids1 = {r["content_id"] for r in s1.values()}
1987 ids2 = {r["content_id"] for r in s2.values()}
1988 assert ids1 != ids2
1989
1990 def test_scss_extension_uses_separate_spec(self) -> None:
1991 """`.scss` files use tree-sitter-scss; `.css` files use tree-sitter-css."""
1992 from muse.plugins.code.ast_parser import TreeSitterAdapter
1993 css_adapter = adapter_for_path("styles.css")
1994 scss_adapter = adapter_for_path("styles.scss")
1995 assert isinstance(css_adapter, TreeSitterAdapter)
1996 assert isinstance(scss_adapter, TreeSitterAdapter)
1997 # Each must have its own language spec — different module names.
1998 assert css_adapter._spec["module_name"] == "tree_sitter_css"
1999 assert scss_adapter._spec["module_name"] == "tree_sitter_scss"
2000
2001
2002 # ---------------------------------------------------------------------------
2003 # SCSS: variables, mixins, functions, nested rules
2004 # ---------------------------------------------------------------------------
2005
2006
2007 class TestScssAdapter:
2008 """Symbol extraction for SCSS via tree-sitter-scss.
2009
2010 Covers the four SCSS-specific symbol kinds:
2011 variable — $name: value (top-level only)
2012 mixin — @mixin name(…) { … }
2013 function — @function name(…) { @return … }
2014 rule — selector rule-sets, @keyframes, @media
2015 """
2016
2017 def _parse(self, src: str, path: str = "styles.scss") -> SymbolTree:
2018 adapter = adapter_for_path(path)
2019 if isinstance(adapter, FallbackAdapter):
2020 pytest.skip("tree-sitter-scss not available")
2021 return adapter.parse_symbols(src.encode(), path)
2022
2023 def test_rule_set_extracted(self) -> None:
2024 syms = self._parse(".btn { color: red; }")
2025 assert len(syms) >= 1
2026 kinds = {r["kind"] for r in syms.values()}
2027 assert "rule" in kinds
2028
2029 def test_rule_set_kind(self) -> None:
2030 syms = self._parse(".card { display: flex; }")
2031 records = [r for r in syms.values() if ".card" in r["name"]]
2032 assert records, f"keys: {list(syms)}"
2033 assert records[0]["kind"] == "rule"
2034
2035 def test_variable_extracted(self) -> None:
2036 syms = self._parse("$primary-color: #333;\n")
2037 assert any("primary-color" in r["name"] for r in syms.values()), f"keys: {list(syms)}"
2038
2039 def test_variable_kind(self) -> None:
2040 syms = self._parse("$spacing: 8px;\n")
2041 records = [r for r in syms.values() if "spacing" in r["name"]]
2042 assert records, f"keys: {list(syms)}"
2043 assert records[0]["kind"] == "variable"
2044
2045 def test_mixin_extracted(self) -> None:
2046 syms = self._parse("@mixin flex-center($dir: row) { display: flex; }\n")
2047 assert any("flex-center" in r["name"] for r in syms.values()), f"keys: {list(syms)}"
2048
2049 def test_mixin_kind(self) -> None:
2050 syms = self._parse("@mixin respond-to($bp) { @media (min-width: $bp) { @content; } }\n")
2051 records = [r for r in syms.values() if "respond-to" in r["name"]]
2052 assert records, f"keys: {list(syms)}"
2053 assert records[0]["kind"] == "mixin"
2054
2055 def test_function_extracted(self) -> None:
2056 syms = self._parse("@function em($px, $base: 16) { @return $px / $base * 1em; }\n")
2057 assert any("em" in r["name"] for r in syms.values()), f"keys: {list(syms)}"
2058
2059 def test_function_kind(self) -> None:
2060 syms = self._parse("@function rem($px) { @return $px / 16px * 1rem; }\n")
2061 records = [r for r in syms.values() if "rem" in r["name"]]
2062 assert records, f"keys: {list(syms)}"
2063 assert records[0]["kind"] == "function"
2064
2065 def test_keyframes_extracted(self) -> None:
2066 syms = self._parse("@keyframes spin { from { transform: rotate(0deg); } }\n")
2067 assert any("spin" in r["name"] for r in syms.values()), f"keys: {list(syms)}"
2068
2069 def test_keyframes_kind(self) -> None:
2070 syms = self._parse("@keyframes fade { from { opacity: 1; } to { opacity: 0; } }\n")
2071 records = [r for r in syms.values() if "fade" in r["name"]]
2072 assert records, f"keys: {list(syms)}"
2073 assert records[0]["kind"] == "rule"
2074
2075 def test_multiple_kinds_coexist(self) -> None:
2076 src = (
2077 "$spacing: 8px;\n"
2078 "@mixin flex-center { display: flex; }\n"
2079 "@function rem($px) { @return $px / 16px * 1rem; }\n"
2080 ".card { padding: $spacing; }\n"
2081 )
2082 syms = self._parse(src)
2083 kinds = {r["kind"] for r in syms.values()}
2084 assert "variable" in kinds
2085 assert "mixin" in kinds
2086 assert "function" in kinds
2087 assert "rule" in kinds
2088
2089 def test_variable_inside_rule_not_extracted(self) -> None:
2090 """$var inside a rule block is a CSS property value, not a symbol."""
2091 src = ".card {\n $local: 10px;\n padding: $local;\n}\n"
2092 syms = self._parse(src)
2093 # Only the rule_set itself should be extracted — not the inner $local
2094 assert not any("local" in r["name"] for r in syms.values()), (
2095 f"inner variable leaked: {[r['name'] for r in syms.values()]}"
2096 )
2097
2098 def test_content_id_stable(self) -> None:
2099 src = "$primary: red;\n"
2100 syms1 = self._parse(src)
2101 syms2 = self._parse(src)
2102 ids1 = {r["content_id"] for r in syms1.values()}
2103 ids2 = {r["content_id"] for r in syms2.values()}
2104 assert ids1 == ids2
2105
2106 def test_content_id_differs_for_different_symbols(self) -> None:
2107 s1 = self._parse("$a: 1px;\n")
2108 s2 = self._parse("$b: 2px;\n")
2109 ids1 = {r["content_id"] for r in s1.values()}
2110 ids2 = {r["content_id"] for r in s2.values()}
2111 assert ids1 != ids2
2112
2113
2114 # ---------------------------------------------------------------------------
2115 # JS/TS: arrow functions and async detection
2116 # ---------------------------------------------------------------------------
2117
2118
2119 class TestJSArrowFunctions:
2120 """Arrow functions and function expressions bound to const/let."""
2121
2122 def _parse(self, src: str, path: str = "mod.js") -> SymbolTree:
2123 adapter = adapter_for_path(path)
2124 if isinstance(adapter, FallbackAdapter):
2125 pytest.skip("tree-sitter-javascript not available")
2126 return adapter.parse_symbols(src.encode(), path)
2127
2128 def test_const_arrow_function(self) -> None:
2129 syms = self._parse("const greet = (name) => `Hello ${name}`;\n")
2130 assert any("greet" in k for k in syms), f"keys: {list(syms)}"
2131
2132 def test_const_function_expression(self) -> None:
2133 syms = self._parse("const add = function(a, b) { return a + b; };\n")
2134 assert any("add" in k for k in syms)
2135
2136 def test_ts_arrow_function(self) -> None:
2137 syms = self._parse(
2138 "const greet = (name: string): string => `Hello ${name}`;\n",
2139 path="mod.ts",
2140 )
2141 assert any("greet" in k for k in syms)
2142
2143 def test_class_method_still_extracted(self) -> None:
2144 syms = self._parse("class Foo { bar() { return 1; } }\n")
2145 assert any("bar" in k for k in syms)
2146
2147 def test_async_function_detected(self) -> None:
2148 syms = self._parse("async function fetchData() { return await fetch('/'); }\n")
2149 kinds = {r["kind"] for r in syms.values() if "fetchData" in r["name"]}
2150 assert "async_function" in kinds, f"kinds: {kinds}"
2151
2152
2153 # ---------------------------------------------------------------------------
2154 # Go: const and var spec extraction
2155 # ---------------------------------------------------------------------------
2156
2157
2158 class TestGoConstVar:
2159 def _parse(self, src: str) -> SymbolTree:
2160 adapter = adapter_for_path("main.go")
2161 if isinstance(adapter, FallbackAdapter):
2162 pytest.skip("tree-sitter-go not available")
2163 return adapter.parse_symbols(src.encode(), "main.go")
2164
2165 def test_const_extracted(self) -> None:
2166 syms = self._parse("package main\nconst MaxRetries = 3\n")
2167 assert any("MaxRetries" in k for k in syms), f"keys: {list(syms)}"
2168
2169 def test_var_extracted(self) -> None:
2170 syms = self._parse("package main\nvar ErrNotFound = errors.New(\"not found\")\n")
2171 assert any("ErrNotFound" in k for k in syms)
2172
2173 def test_const_kind_is_variable(self) -> None:
2174 syms = self._parse("package main\nconst Timeout = 30\n")
2175 records = [r for r in syms.values() if "Timeout" in r["name"]]
2176 assert records
2177 assert records[0]["kind"] == "variable"
2178
2179
2180 # ---------------------------------------------------------------------------
2181 # Rust: static, const, type alias, mod
2182 # ---------------------------------------------------------------------------
2183
2184
2185 class TestRustExtended:
2186 def _parse(self, src: str) -> SymbolTree:
2187 adapter = adapter_for_path("lib.rs")
2188 if isinstance(adapter, FallbackAdapter):
2189 pytest.skip("tree-sitter-rust not available")
2190 return adapter.parse_symbols(src.encode(), "lib.rs")
2191
2192 def test_static_extracted(self) -> None:
2193 syms = self._parse("static MAX: usize = 100;\n")
2194 assert any("MAX" in k for k in syms), f"keys: {list(syms)}"
2195
2196 def test_const_extracted(self) -> None:
2197 syms = self._parse("const TIMEOUT: u64 = 30;\n")
2198 assert any("TIMEOUT" in k for k in syms)
2199
2200 def test_type_alias_extracted(self) -> None:
2201 syms = self._parse("type Result<T> = std::result::Result<T, Error>;\n")
2202 assert any("Result" in k for k in syms)
2203
2204 def test_mod_extracted(self) -> None:
2205 syms = self._parse("mod utils { pub fn helper() {} }\n")
2206 assert any("utils" in k for k in syms)
2207
2208
2209 # ---------------------------------------------------------------------------
2210 # C: struct and enum extraction
2211 # ---------------------------------------------------------------------------
2212
2213
2214 class TestCStructEnum:
2215 def _parse(self, src: str) -> SymbolTree:
2216 adapter = adapter_for_path("main.c")
2217 if isinstance(adapter, FallbackAdapter):
2218 pytest.skip("tree-sitter-c not available")
2219 return adapter.parse_symbols(src.encode(), "main.c")
2220
2221 def test_struct_extracted(self) -> None:
2222 syms = self._parse("struct Point { int x; int y; };\n")
2223 assert any("Point" in k for k in syms), f"keys: {list(syms)}"
2224
2225 def test_enum_extracted(self) -> None:
2226 syms = self._parse("enum Color { RED, GREEN, BLUE };\n")
2227 assert any("Color" in k for k in syms)
2228
2229 def test_enum_kind(self) -> None:
2230 syms = self._parse("enum Status { OK, ERR };\n")
2231 records = [r for r in syms.values() if "Status" in r["name"]]
2232 assert records, f"keys: {list(syms)}"
2233 assert records[0]["kind"] == "enum"
2234
2235 def test_struct_kind(self) -> None:
2236 syms = self._parse("struct Node { int val; struct Node *next; };\n")
2237 records = [r for r in syms.values() if "Node" in r["name"]]
2238 assert records
2239 assert records[0]["kind"] == "struct"
2240
2241
2242 # ---------------------------------------------------------------------------
2243 # C#: property and record extraction
2244 # ---------------------------------------------------------------------------
2245
2246
2247 class TestCSharpExtended:
2248 def _parse(self, src: str) -> SymbolTree:
2249 adapter = adapter_for_path("Model.cs")
2250 if isinstance(adapter, FallbackAdapter):
2251 pytest.skip("tree-sitter-c-sharp not available")
2252 return adapter.parse_symbols(src.encode(), "Model.cs")
2253
2254 def test_property_extracted(self) -> None:
2255 syms = self._parse(
2256 "class User { public string Name { get; set; } }\n"
2257 )
2258 assert any("Name" in k for k in syms), f"keys: {list(syms)}"
2259
2260 def test_record_extracted(self) -> None:
2261 syms = self._parse("public record Point(int X, int Y);\n")
2262 assert any("Point" in k for k in syms)
2263
2264 def test_property_kind(self) -> None:
2265 syms = self._parse(
2266 "class C { public int Age { get; set; } }\n"
2267 )
2268 records = [r for r in syms.values() if "Age" in r["name"]]
2269 assert records
2270 assert records[0]["kind"] == "variable"
2271
2272
2273 # ---------------------------------------------------------------------------
2274 # Java: annotation type and record extraction
2275 # ---------------------------------------------------------------------------
2276
2277
2278 class TestJavaExtended:
2279 def _parse(self, src: str) -> SymbolTree:
2280 adapter = adapter_for_path("Main.java")
2281 if isinstance(adapter, FallbackAdapter):
2282 pytest.skip("tree-sitter-java not available")
2283 return adapter.parse_symbols(src.encode(), "Main.java")
2284
2285 def test_annotation_type_extracted(self) -> None:
2286 syms = self._parse("public @interface Cacheable { String value() default \"\"; }\n")
2287 assert any("Cacheable" in k for k in syms), f"keys: {list(syms)}"
2288
2289 def test_record_extracted(self) -> None:
2290 syms = self._parse("public record Point(int x, int y) {}\n")
2291 assert any("Point" in k for k in syms)
2292
2293
2294 # ---------------------------------------------------------------------------
2295 # Kotlin: object declaration and property extraction
2296 # ---------------------------------------------------------------------------
2297
2298
2299 class TestKotlinExtended:
2300 def _parse(self, src: str) -> SymbolTree:
2301 adapter = adapter_for_path("Main.kt")
2302 if isinstance(adapter, FallbackAdapter):
2303 pytest.skip("tree-sitter-kotlin not available")
2304 return adapter.parse_symbols(src.encode(), "Main.kt")
2305
2306 def test_object_declaration_extracted(self) -> None:
2307 syms = self._parse("object Singleton { fun greet() = println(\"hi\") }\n")
2308 assert any("Singleton" in k for k in syms), f"keys: {list(syms)}"
2309
2310 def test_property_declaration_extracted(self) -> None:
2311 syms = self._parse("val MAX_SIZE: Int = 100\n")
2312 assert any("MAX_SIZE" in k for k in syms)
2313
2314
2315 # ---------------------------------------------------------------------------
2316 # Bash / sh / zsh adapter
2317 # ---------------------------------------------------------------------------
2318
2319
2320 class TestBashAdapter:
2321 """Symbol extraction tests for the bash/sh/zsh tree-sitter adapter.
2322
2323 All tests skip gracefully when ``tree-sitter-bash`` is not installed so
2324 they are safe to run in environments that only install a subset of grammars.
2325 The grammar covers bash, sh, and zsh (zsh is a strict backward-compatible
2326 superset of bash at the AST level).
2327 """
2328
2329 def _parse(self, src: str, path: str = "script.sh") -> SymbolTree:
2330 """Parse *src* via the bash adapter; skip if grammar not installed."""
2331 from muse.plugins.code.ast_parser import FallbackAdapter, adapter_for_path
2332 adapter = adapter_for_path(path)
2333 if isinstance(adapter, FallbackAdapter):
2334 pytest.skip("tree-sitter-bash not installed (pip install 'muse[shell]')")
2335 return adapter.parse_symbols(src.encode(), path)
2336
2337 def test_function_definition_extracted(self) -> None:
2338 """A bare ``function_definition`` node is extracted as a symbol."""
2339 syms = self._parse("greet() {\n echo hello\n}\n")
2340 assert any("greet" in k for k in syms), f"keys: {list(syms)}"
2341
2342 def test_function_kind_is_function(self) -> None:
2343 syms = self._parse("build() {\n make all\n}\n")
2344 matches = [r for r in syms.values() if r["name"] == "build"]
2345 assert matches, "symbol 'build' not found"
2346 assert matches[0]["kind"] == "function"
2347
2348 def test_variable_assignment_extracted(self) -> None:
2349 """Top-level variable assignments are extracted as ``variable`` symbols."""
2350 syms = self._parse("APP_NAME=muse\n")
2351 assert any("APP_NAME" in k for k in syms), f"keys: {list(syms)}"
2352
2353 def test_variable_kind_is_variable(self) -> None:
2354 syms = self._parse("VERSION=1.0.0\n")
2355 matches = [r for r in syms.values() if r["name"] == "VERSION"]
2356 assert matches, "symbol 'VERSION' not found"
2357 assert matches[0]["kind"] == "variable"
2358
2359 def test_multiple_functions_extracted(self) -> None:
2360 src = "init() {\n echo init\n}\ndeploy() {\n echo deploy\n}\n"
2361 syms = self._parse(src)
2362 names = {r["name"] for r in syms.values()}
2363 assert "init" in names
2364 assert "deploy" in names
2365
2366 def test_function_and_variable_coexist(self) -> None:
2367 src = "ENV=prod\nstart() {\n echo starting\n}\n"
2368 syms = self._parse(src)
2369 names = {r["name"] for r in syms.values()}
2370 assert "ENV" in names
2371 assert "start" in names
2372
2373 def test_symbol_record_has_content_id(self) -> None:
2374 syms = self._parse("run() {\n ./app\n}\n")
2375 records = [r for r in syms.values() if r["name"] == "run"]
2376 assert records
2377 cid = records[0]["content_id"]
2378 assert cid.startswith("sha256:") and len(cid) == 71
2379
2380 def test_content_id_stable_across_calls(self) -> None:
2381 src = "setup() {\n echo setup\n}\n"
2382 syms_a = self._parse(src)
2383 syms_b = self._parse(src)
2384 for addr in syms_a:
2385 assert syms_a[addr]["content_id"] == syms_b[addr]["content_id"]
2386
2387 def test_body_hash_differs_for_different_bodies(self) -> None:
2388 sym_a = self._parse("fn() {\n echo a\n}\n")
2389 sym_b = self._parse("fn() {\n echo b\n}\n")
2390 records_a = [r for r in sym_a.values() if r["name"] == "fn"]
2391 records_b = [r for r in sym_b.values() if r["name"] == "fn"]
2392 assert records_a and records_b
2393 assert records_a[0]["body_hash"] != records_b[0]["body_hash"]
2394
2395 def test_sh_extension_routes_to_same_adapter(self) -> None:
2396 """`.sh` and `.bash` share the same grammar package."""
2397 from muse.plugins.code.ast_parser import FallbackAdapter, adapter_for_path
2398 sh = adapter_for_path("script.sh")
2399 ba = adapter_for_path("script.bash")
2400 if isinstance(sh, FallbackAdapter):
2401 pytest.skip("tree-sitter-bash not installed")
2402 # Both should be the same adapter class (TreeSitterAdapter) and share
2403 # the same supported_extensions set.
2404 assert type(sh) is type(ba)
2405 assert ".sh" in sh.supported_extensions()
2406 assert ".bash" in sh.supported_extensions()
2407
2408 def test_zsh_extension_routed(self) -> None:
2409 """.zsh files route to the bash adapter (zsh is a bash superset)."""
2410 from muse.plugins.code.ast_parser import FallbackAdapter, adapter_for_path
2411 adapter = adapter_for_path("config.zsh")
2412 if isinstance(adapter, FallbackAdapter):
2413 pytest.skip("tree-sitter-bash not installed")
2414 assert ".zsh" in adapter.supported_extensions()
2415
2416 def test_plugin_zsh_extension_routed(self) -> None:
2417 """.plugin.zsh files route to the bash adapter."""
2418 from muse.plugins.code.ast_parser import FallbackAdapter, adapter_for_path
2419 adapter = adapter_for_path("muse.plugin.zsh")
2420 if isinstance(adapter, FallbackAdapter):
2421 pytest.skip("tree-sitter-bash not installed")
2422 assert ".plugin.zsh" in adapter.supported_extensions()
2423
2424 def test_parse_zsh_function(self) -> None:
2425 """Zsh function syntax (no ``function`` keyword) parses correctly."""
2426 syms = self._parse("muse_prompt_info() {\n echo branch\n}\n", "muse.plugin.zsh")
2427 assert any("muse_prompt_info" in k for k in syms), f"keys: {list(syms)}"
2428
2429 def test_address_prefix_matches_file_path(self) -> None:
2430 """Symbol addresses are prefixed with the supplied file path."""
2431 syms = self._parse("build() {\n echo\n}\n", "scripts/build.sh")
2432 assert any(k.startswith("scripts/build.sh::") for k in syms), (
2433 f"keys: {list(syms)}"
2434 )
2435
2436 def test_lineno_is_positive(self) -> None:
2437 syms = self._parse("deploy() {\n echo deploy\n}\n")
2438 for rec in syms.values():
2439 assert rec["lineno"] >= 1
2440
2441 def test_empty_file_returns_empty_tree(self) -> None:
2442 syms = self._parse("")
2443 assert syms == {}
2444
2445 def test_comment_only_file_returns_empty_tree(self) -> None:
2446 syms = self._parse("# This is a comment\n# Another comment\n")
2447 assert syms == {}
2448
2449 def test_file_content_id_stable(self) -> None:
2450 """``file_content_id`` is stable for the same bytes."""
2451 from muse.plugins.code.ast_parser import FallbackAdapter, adapter_for_path
2452 adapter = adapter_for_path("install.sh")
2453 if isinstance(adapter, FallbackAdapter):
2454 pytest.skip("tree-sitter-bash not installed")
2455 src = b"#!/bin/bash\necho hello\n"
2456 assert adapter.file_content_id(src) == adapter.file_content_id(src)
2457
2458 def test_file_content_id_differs_for_different_content(self) -> None:
2459 from muse.plugins.code.ast_parser import FallbackAdapter, adapter_for_path
2460 adapter = adapter_for_path("install.sh")
2461 if isinstance(adapter, FallbackAdapter):
2462 pytest.skip("tree-sitter-bash not installed")
2463 assert adapter.file_content_id(b"echo a\n") != adapter.file_content_id(b"echo b\n")
File History 5 commits
sha256:c10a2ce474b3bb7ff2a3d628e8a3f2e028fd78ca652513496a03a498ae2267b3 chore: sweep all stale DirectoryRenameOp / directory_rename… Sonnet 4.6 minor 23 days ago
sha256:ec193b14fa080825d306dc3a7df5aec0edffc3a422a800671216c29368352fb3 fix: correct _TYPED_DELTA_EXAMPLE JSON to match actual muse… Sonnet 4.6 23 days ago
sha256:f6cc6e98954ed614fc817df1af2f528b414182e4a7fdc20f278e1cf9f47e5ff4 docs: rename OT → address-keyed Map merge across all docs a… Sonnet 4.6 minor 23 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago