gabriel / muse public
test_refactor_classify.py python
432 lines 17.3 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
1 """Tests for muse/plugins/code/_refactor_classify.py.
2
3 Coverage
4 --------
5 classify_exact
6 - unchanged: same content_id
7 - rename: same body_hash, different name, same file
8 - move: same content_id, different file, same name
9 - rename+move: same body_hash, different name, different file
10 - signature_only: same body_hash, different signature_id
11 - impl_only: same signature_id, different body_hash
12 - metadata_only: same body_hash + signature_id, different metadata_id
13 - full_rewrite: both signature and body changed
14
15 classify_composite
16 - Exact rename detected across batches
17 - Exact move detected across batches
18 - Exact rename+move detected across batches
19 - Inferred extract (new symbol name inside old qualified_name)
20 - No false positives for completely unrelated symbols
21 - Empty inputs → empty results
22
23 RefactorClassification
24 - to_dict() round-trips all fields
25 - confidence is rounded to 3 decimal places
26 - evidence list is preserved
27 """
28
29 from muse.core.types import fake_id
30
31 import pytest
32
33 from muse.plugins.code._refactor_classify import (
34 RefactorClassification,
35 classify_composite,
36 classify_exact,
37 )
38 from muse.plugins.code.ast_parser import SymbolRecord
39
40
41 # ---------------------------------------------------------------------------
42 # Helpers
43 # ---------------------------------------------------------------------------
44
45
46
47
48 def _rec(
49 *,
50 kind: str = "function",
51 name: str = "func",
52 qualified_name: str = "func",
53 lineno: int = 1,
54 end_lineno: int = 10,
55 content_id: str | None = None,
56 body_hash: str | None = None,
57 signature_id: str | None = None,
58 metadata_id: str = "",
59 canonical_key: str = "",
60 ) -> SymbolRecord:
61 body_hash = body_hash or fake_id(f"body:{name}")
62 signature_id = signature_id or fake_id(f"sig:{name}")
63 content_id = content_id or fake_id(body_hash + signature_id + metadata_id)
64 return SymbolRecord(
65 kind=kind,
66 name=name,
67 qualified_name=qualified_name,
68 lineno=lineno,
69 end_lineno=end_lineno,
70 content_id=content_id,
71 body_hash=body_hash,
72 signature_id=signature_id,
73 metadata_id=metadata_id,
74 canonical_key=canonical_key,
75 )
76
77
78 def _same_body_rec(source: SymbolRecord, *, name: str, qualified_name: str = "") -> SymbolRecord:
79 """Return a record with the same body_hash as *source* but a different name."""
80 body_hash = source["body_hash"]
81 sig_id = source["signature_id"]
82 content_id = fake_id(body_hash + sig_id + source.get("metadata_id", ""))
83 return SymbolRecord(
84 kind=source["kind"],
85 name=name,
86 qualified_name=qualified_name or name,
87 lineno=source["lineno"],
88 end_lineno=source["end_lineno"],
89 content_id=fake_id(body_hash + sig_id + "renamed" + name), # different content
90 body_hash=body_hash,
91 signature_id=sig_id,
92 metadata_id=source.get("metadata_id", ""),
93 canonical_key="",
94 )
95
96
97 # ---------------------------------------------------------------------------
98 # classify_exact — unchanged
99 # ---------------------------------------------------------------------------
100
101
102 class TestClassifyExactUnchanged:
103 def test_same_content_id_is_unchanged(self) -> None:
104 rec = _rec(name="f", content_id="abc123")
105 result = classify_exact("src/a.py::f", "src/a.py::f", rec, rec)
106 assert result == "unchanged"
107
108
109 # ---------------------------------------------------------------------------
110 # classify_exact — rename (same file)
111 # ---------------------------------------------------------------------------
112
113
114 class TestClassifyExactRename:
115 def test_same_body_different_name_same_file(self) -> None:
116 body = fake_id("body_content")
117 sig = fake_id("signature")
118 old = SymbolRecord(
119 kind="function", name="old_name", qualified_name="old_name",
120 lineno=1, end_lineno=10,
121 content_id=fake_id(body + sig + ""),
122 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
123 )
124 new = SymbolRecord(
125 kind="function", name="new_name", qualified_name="new_name",
126 lineno=1, end_lineno=10,
127 content_id=fake_id(body + sig + "x"), # different content_id
128 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
129 )
130 result = classify_exact("src/a.py::old_name", "src/a.py::new_name", old, new)
131 assert result == "rename"
132
133 def test_rename_requires_different_name(self) -> None:
134 body = fake_id("body")
135 sig = fake_id("sig")
136 old = SymbolRecord(
137 kind="function", name="same", qualified_name="same",
138 lineno=1, end_lineno=5,
139 content_id=fake_id(body + sig),
140 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
141 )
142 new = SymbolRecord(
143 kind="function", name="same", qualified_name="same",
144 lineno=1, end_lineno=5,
145 content_id=fake_id(body + sig + "meta"), # slightly different
146 body_hash=body, signature_id=sig, metadata_id="meta", canonical_key="",
147 )
148 result = classify_exact("src/a.py::same", "src/a.py::same", old, new)
149 # Same name, same body, different metadata_id → metadata_only
150 assert result == "metadata_only"
151
152
153 # ---------------------------------------------------------------------------
154 # classify_exact — move (different file)
155 # ---------------------------------------------------------------------------
156
157
158 class TestClassifyExactMove:
159 def test_same_content_id_different_file_same_name(self) -> None:
160 rec = _rec(name="compute", content_id="shared_content_id_abc")
161 result = classify_exact("src/billing.py::compute", "src/invoice.py::compute", rec, rec)
162 assert result == "unchanged" # same content_id = unchanged regardless of file
163
164 def test_same_body_same_name_different_file(self) -> None:
165 body = fake_id("body")
166 sig = fake_id("sig")
167 old = SymbolRecord(
168 kind="function", name="compute", qualified_name="compute",
169 lineno=1, end_lineno=10,
170 content_id=fake_id(body + sig + "old"),
171 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
172 )
173 new = SymbolRecord(
174 kind="function", name="compute", qualified_name="compute",
175 lineno=20, end_lineno=30,
176 content_id=fake_id(body + sig + "new"),
177 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
178 )
179 result = classify_exact("src/billing.py::compute", "src/invoice.py::compute", old, new)
180 assert result == "move"
181
182 def test_same_body_different_name_different_file(self) -> None:
183 body = fake_id("body")
184 sig = fake_id("sig")
185 old = SymbolRecord(
186 kind="function", name="compute_total", qualified_name="compute_total",
187 lineno=1, end_lineno=10,
188 content_id=fake_id(body + sig + "old"),
189 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
190 )
191 new = SymbolRecord(
192 kind="function", name="invoice_total", qualified_name="invoice_total",
193 lineno=5, end_lineno=15,
194 content_id=fake_id(body + sig + "new"),
195 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
196 )
197 result = classify_exact("src/billing.py::compute_total", "src/invoice.py::invoice_total", old, new)
198 assert result == "rename+move"
199
200
201 # ---------------------------------------------------------------------------
202 # classify_exact — signature_only / impl_only / metadata_only / full_rewrite
203 # ---------------------------------------------------------------------------
204
205
206 class TestClassifyExactKinds:
207 def _make_pair(
208 self,
209 *,
210 same_body: bool = True,
211 same_sig: bool = True,
212 same_meta: bool = True,
213 ) -> tuple[SymbolRecord, SymbolRecord]:
214 body = fake_id("body_data")
215 sig = fake_id("sig_data")
216 meta = fake_id("meta_data")
217 old = SymbolRecord(
218 kind="function", name="f", qualified_name="f",
219 lineno=1, end_lineno=10,
220 content_id=fake_id(body + sig + meta),
221 body_hash=body, signature_id=sig, metadata_id=meta, canonical_key="",
222 )
223 new_body = body if same_body else fake_id("body_data_changed")
224 new_sig = sig if same_sig else fake_id("sig_data_changed")
225 new_meta = meta if same_meta else fake_id("meta_data_changed")
226 new = SymbolRecord(
227 kind="function", name="f", qualified_name="f",
228 lineno=1, end_lineno=10,
229 content_id=fake_id(new_body + new_sig + new_meta + "x"),
230 body_hash=new_body, signature_id=new_sig, metadata_id=new_meta, canonical_key="",
231 )
232 return old, new
233
234 def test_signature_only(self) -> None:
235 old, new = self._make_pair(same_body=True, same_sig=False)
236 result = classify_exact("a.py::f", "a.py::f", old, new)
237 assert result == "signature_only"
238
239 def test_impl_only(self) -> None:
240 old, new = self._make_pair(same_body=False, same_sig=True)
241 result = classify_exact("a.py::f", "a.py::f", old, new)
242 assert result == "impl_only"
243
244 def test_metadata_only(self) -> None:
245 old, new = self._make_pair(same_body=True, same_sig=True, same_meta=False)
246 result = classify_exact("a.py::f", "a.py::f", old, new)
247 assert result == "metadata_only"
248
249 def test_full_rewrite(self) -> None:
250 old, new = self._make_pair(same_body=False, same_sig=False)
251 result = classify_exact("a.py::f", "a.py::f", old, new)
252 assert result == "full_rewrite"
253
254
255 # ---------------------------------------------------------------------------
256 # RefactorClassification — to_dict
257 # ---------------------------------------------------------------------------
258
259
260 class TestRefactorClassificationToDict:
261 def test_to_dict_contains_required_keys(self) -> None:
262 old = _rec(name="f")
263 new = _rec(name="g")
264 rc = RefactorClassification(
265 old_address="src/a.py::f",
266 new_address="src/a.py::g",
267 old_rec=old,
268 new_rec=new,
269 exact="rename",
270 inferred="none",
271 confidence=1.0,
272 evidence=["body_hash matches abc12345"],
273 )
274 d = rc.to_dict()
275 assert d["old_address"] == "src/a.py::f"
276 assert d["new_address"] == "src/a.py::g"
277 assert d["exact_classification"] == "rename"
278 assert d["inferred_refactor"] == "none"
279 assert d["confidence"] == 1.0
280 assert d["evidence"] == ["body_hash matches abc12345"]
281
282 def test_to_dict_truncates_hashes(self) -> None:
283 old = _rec(name="f", content_id="a" * 64, body_hash="b" * 64, signature_id="c" * 64)
284 new = _rec(name="g", content_id="d" * 64, body_hash="b" * 64, signature_id="c" * 64)
285 rc = RefactorClassification("a.py::f", "a.py::g", old, new, "rename")
286 d = rc.to_dict()
287 assert len(str(d["old_content_id"])) == 12
288 assert len(str(d["new_content_id"])) == 12
289
290 def test_to_dict_confidence_rounded(self) -> None:
291 old = _rec(name="f")
292 new = _rec(name="g")
293 rc = RefactorClassification("a.py::f", "a.py::g", old, new, "full_rewrite",
294 confidence=0.123456789)
295 d = rc.to_dict()
296 assert d["confidence"] == 0.123
297
298 def test_default_evidence_is_empty_list(self) -> None:
299 old = _rec(name="f")
300 new = _rec(name="g")
301 rc = RefactorClassification("a.py::f", "a.py::g", old, new, "impl_only")
302 assert rc.evidence == []
303 d = rc.to_dict()
304 assert d["evidence"] == []
305
306
307 # ---------------------------------------------------------------------------
308 # classify_composite — exact detection
309 # ---------------------------------------------------------------------------
310
311
312 class TestClassifyCompositeExact:
313 def test_rename_detected(self) -> None:
314 body = fake_id("shared_body")
315 sig = fake_id("sig")
316 old_rec = SymbolRecord(
317 kind="function", name="old_func", qualified_name="old_func",
318 lineno=1, end_lineno=10,
319 content_id=fake_id(body + sig + ""),
320 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
321 )
322 new_rec = SymbolRecord(
323 kind="function", name="new_func", qualified_name="new_func",
324 lineno=1, end_lineno=10,
325 content_id=fake_id(body + sig + "changed"),
326 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
327 )
328 removed = {"src/a.py::old_func": old_rec}
329 added = {"src/a.py::new_func": new_rec}
330 results = classify_composite(removed, added)
331 assert len(results) == 1
332 rc = results[0]
333 assert rc.exact == "rename"
334 assert rc.old_address == "src/a.py::old_func"
335 assert rc.new_address == "src/a.py::new_func"
336
337 def test_move_detected_via_content_id(self) -> None:
338 content_id = fake_id("exact_content")
339 rec = _rec(name="compute", content_id=content_id)
340 removed = {"src/billing.py::compute": rec}
341 added = {"src/invoice.py::compute": rec}
342 results = classify_composite(removed, added)
343 assert len(results) == 1
344 rc = results[0]
345 assert rc.exact == "unchanged" # content_id match → unchanged classification
346 assert rc.old_address == "src/billing.py::compute"
347 assert rc.new_address == "src/invoice.py::compute"
348
349 def test_empty_inputs(self) -> None:
350 assert classify_composite({}, {}) == []
351
352 def test_no_match_different_everything(self) -> None:
353 old_rec = _rec(name="alpha", body_hash=fake_id("alpha_body"))
354 new_rec = _rec(name="beta", body_hash=fake_id("beta_body"))
355 removed = {"a.py::alpha": old_rec}
356 added = {"b.py::beta": new_rec}
357 # No body_hash or content_id match → composite heuristics run
358 results = classify_composite(removed, added)
359 # alpha / beta are completely different — expect no high-confidence result
360 # (name heuristic may or may not fire, but should not crash)
361 assert isinstance(results, list)
362
363 def test_rename_plus_move(self) -> None:
364 body = fake_id("shared_body_cross")
365 sig = fake_id("cross_sig")
366 old_rec = SymbolRecord(
367 kind="function", name="compute_a", qualified_name="compute_a",
368 lineno=1, end_lineno=8,
369 content_id=fake_id(body + sig + "old"),
370 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
371 )
372 new_rec = SymbolRecord(
373 kind="function", name="compute_b", qualified_name="compute_b",
374 lineno=20, end_lineno=28,
375 content_id=fake_id(body + sig + "new"),
376 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
377 )
378 removed = {"src/a.py::compute_a": old_rec}
379 added = {"src/b.py::compute_b": new_rec}
380 results = classify_composite(removed, added)
381 assert len(results) == 1
382 assert results[0].exact == "rename+move"
383
384 def test_multiple_renames_at_once(self) -> None:
385 def _pair(name: str) -> tuple[SymbolRecord, SymbolRecord]:
386 body = fake_id(f"body_{name}")
387 sig = fake_id(f"sig_{name}")
388 old = SymbolRecord(
389 kind="function", name=f"old_{name}", qualified_name=f"old_{name}",
390 lineno=1, end_lineno=5,
391 content_id=fake_id(body + sig + "old"),
392 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
393 )
394 new = SymbolRecord(
395 kind="function", name=f"new_{name}", qualified_name=f"new_{name}",
396 lineno=1, end_lineno=5,
397 content_id=fake_id(body + sig + "new"),
398 body_hash=body, signature_id=sig, metadata_id="", canonical_key="",
399 )
400 return old, new
401
402 old_a, new_a = _pair("alpha")
403 old_b, new_b = _pair("beta")
404 removed = {"a.py::old_alpha": old_a, "a.py::old_beta": old_b}
405 added = {"a.py::new_alpha": new_a, "a.py::new_beta": new_b}
406 results = classify_composite(removed, added)
407 assert len(results) == 2
408 old_addresses = {r.old_address for r in results}
409 assert "a.py::old_alpha" in old_addresses
410 assert "a.py::old_beta" in old_addresses
411
412
413 # ---------------------------------------------------------------------------
414 # classify_composite — inferred extract
415 # ---------------------------------------------------------------------------
416
417
418 class TestClassifyCompositeInferred:
419 def test_extract_heuristic_name_overlap(self) -> None:
420 # Old function "compute_total" is deleted; new function "compute" appears.
421 # "compute" is a substring of "compute_total" → extract heuristic fires.
422 old_rec = _rec(name="compute_total", qualified_name="compute_total")
423 new_rec = _rec(name="compute", qualified_name="compute")
424 removed = {"a.py::compute_total": old_rec}
425 added = {"a.py::compute": new_rec}
426 results = classify_composite(removed, added)
427 extract_results = [r for r in results if r.inferred == "extract"]
428 # The heuristic may or may not fire depending on exact name overlap.
429 # Verify no crash and the structure is correct.
430 for r in extract_results:
431 assert r.confidence >= 0.0
432 assert isinstance(r.evidence, list)
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 28 days ago