gabriel / muse public
test_file_rename_detection.py python
417 lines 16.0 KB
Raw
sha256:f8e686793bb93114c2923d0d294162d13b4e6f4d57ae0f6cbc1e0d493e80f965 fix: ls-remote signing identity uses resolved remote URL Sonnet 4.6 patch 12 days ago
1 """Tests for file-level rename detection in structured_delta.
2
3 When a file is renamed (deleted at one path, inserted at another with
4 identical blob content), ``CodePlugin.diff()`` must emit a ``RenameOp``
5 (``op="rename"``) — not a bare ``InsertOp + DeleteOp`` pair.
6
7 A moved+edited file emits ``RenameOp`` followed by ``PatchOp`` (two
8 orthogonal ops). ``PatchOp`` never carries ``from_address``.
9 """
10
11 from __future__ import annotations
12
13 import json
14 import pathlib
15 from collections.abc import Mapping
16
17 import pytest
18
19 from muse.core.types import blob_id
20 from muse.core.object_store import write_object
21 from muse.core.paths import muse_dir
22 from muse.domain import SnapshotManifest
23 from muse.plugins.code.plugin import CodePlugin
24
25 plugin = CodePlugin()
26
27 # ---------------------------------------------------------------------------
28 # Repo and snapshot helpers
29 # ---------------------------------------------------------------------------
30
31
32 def _init_repo(tmp_path: pathlib.Path) -> pathlib.Path:
33 """Create the minimal .muse directory structure needed for plugin.diff()."""
34 dot_muse = muse_dir(tmp_path)
35 dot_muse.mkdir()
36 (dot_muse / "repo.json").write_text(
37 json.dumps({
38 "repo_id": "sha256:" + "a" * 64,
39 "domain": "code",
40 "default_branch": "main",
41 "created_at": "2025-01-01T00:00:00+00:00",
42 }),
43 encoding="utf-8",
44 )
45 (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
46 (dot_muse / "objects").mkdir()
47 (dot_muse / "snapshots").mkdir()
48 (dot_muse / "commits").mkdir()
49 (dot_muse / "refs" / "heads").mkdir(parents=True)
50 return tmp_path
51
52
53 def _snap(root: pathlib.Path, files: Mapping[str, bytes]) -> SnapshotManifest:
54 """Write blobs to the object store and return a SnapshotManifest."""
55 manifest: dict[str, str] = {}
56 for path, content in files.items():
57 oid = blob_id(content)
58 write_object(root, oid, content)
59 manifest[path] = oid
60 return SnapshotManifest(files=manifest, domain="code")
61
62
63 def _op_types(ops: list[Mapping[str, object]]) -> list[str]:
64 return [str(o["op"]) for o in ops]
65
66
67 def _op_addresses(ops: list[Mapping[str, object]]) -> list[str]:
68 return [str(o["address"]) for o in ops]
69
70
71 # ---------------------------------------------------------------------------
72 # Core rename detection — blob-identical files
73 # ---------------------------------------------------------------------------
74
75
76 class TestBlobIdenticalRename:
77 """A file moved to a new path with no content change must produce a
78 ``RenameOp`` (op="rename"), not a bare insert + delete pair."""
79
80 def test_txt_rename_produces_rename_op_not_insert_delete(
81 self, tmp_path: pathlib.Path
82 ) -> None:
83 """Renaming hello.txt → hello.md (identical content) must emit a
84 ``RenameOp`` — not an ``InsertOp`` plus a ``DeleteOp``."""
85 root = _init_repo(tmp_path)
86 content = b"some plain text content\n"
87 base = _snap(root, {"hello.txt": content})
88 target = _snap(root, {"hello.md": content})
89
90 delta = plugin.diff(base, target, repo_root=root)
91 ops = delta["ops"]
92
93 rename_ops = [o for o in ops if o["op"] == "rename"]
94 assert rename_ops, f"Expected RenameOp for rename, got ops: {ops}"
95 assert not any(o["op"] == "insert" for o in ops), (
96 f"InsertOp must not appear for a pure rename, got: {ops}"
97 )
98 assert not any(o["op"] == "delete" for o in ops), (
99 f"DeleteOp must not appear for a pure rename, got: {ops}"
100 )
101
102 def test_txt_rename_op_has_correct_address(
103 self, tmp_path: pathlib.Path
104 ) -> None:
105 """The RenameOp address must be the new path (hello.md)."""
106 root = _init_repo(tmp_path)
107 content = b"some plain text content\n"
108 base = _snap(root, {"hello.txt": content})
109 target = _snap(root, {"hello.md": content})
110
111 delta = plugin.diff(base, target, repo_root=root)
112 rename_ops = [o for o in delta["ops"] if o["op"] == "rename"]
113
114 assert len(rename_ops) == 1
115 assert rename_ops[0]["address"] == "hello.md", (
116 f"RenameOp address must be 'hello.md', got: {rename_ops[0]['address']!r}"
117 )
118
119 def test_txt_rename_op_has_from_address(
120 self, tmp_path: pathlib.Path
121 ) -> None:
122 """The RenameOp must carry ``from_address`` pointing to the old path."""
123 root = _init_repo(tmp_path)
124 content = b"some plain text content\n"
125 base = _snap(root, {"hello.txt": content})
126 target = _snap(root, {"hello.md": content})
127
128 delta = plugin.diff(base, target, repo_root=root)
129 rename_ops = [o for o in delta["ops"] if o["op"] == "rename"]
130
131 assert len(rename_ops) == 1
132 assert rename_ops[0]["from_address"] == "hello.txt", (
133 f"from_address must be 'hello.txt', got: {rename_ops[0]['from_address']!r}"
134 )
135
136 def test_rename_summary_mentions_renamed(self, tmp_path: pathlib.Path) -> None:
137 """The delta summary must describe the operation as a rename, not
138 as an addition plus a removal."""
139 root = _init_repo(tmp_path)
140 content = b"plain text\n"
141 base = _snap(root, {"hello.txt": content})
142 target = _snap(root, {"hello.md": content})
143
144 delta = plugin.diff(base, target, repo_root=root)
145
146 assert "renamed" in delta["summary"].lower(), (
147 f"Summary must mention rename, got: {delta['summary']!r}"
148 )
149 assert "added" not in delta["summary"].lower(), (
150 f"Summary must not say 'added' for a rename, got: {delta['summary']!r}"
151 )
152 assert "removed" not in delta["summary"].lower(), (
153 f"Summary must not say 'removed' for a rename, got: {delta['summary']!r}"
154 )
155
156 def test_no_patch_op_has_from_address(self, tmp_path: pathlib.Path) -> None:
157 """PatchOp must never carry from_address — rename is now RenameOp."""
158 root = _init_repo(tmp_path)
159 content = b"plain text\n"
160 base = _snap(root, {"hello.txt": content})
161 target = _snap(root, {"hello.md": content})
162
163 delta = plugin.diff(base, target, repo_root=root)
164 for op in delta["ops"]:
165 if op["op"] == "patch":
166 assert "from_address" not in op, (
167 f"PatchOp at {op['address']} must not carry from_address"
168 )
169
170
171 # ---------------------------------------------------------------------------
172 # Rename of files that DO have symbol trees (regression guard)
173 # ---------------------------------------------------------------------------
174
175
176 class TestSymbolFileRename:
177 """Moving a Python file to a new path with identical content must also
178 produce a RenameOp — exercising the existing _detect_file_move_edits
179 symbol-tree path as a regression guard."""
180
181 def test_python_file_rename_same_content_produces_rename_op(
182 self, tmp_path: pathlib.Path
183 ) -> None:
184 """Renaming utils.py → helpers.py with identical content must not
185 produce InsertOp + DeleteOp."""
186 root = _init_repo(tmp_path)
187 content = b"def add(a, b):\n return a + b\n"
188 base = _snap(root, {"utils.py": content})
189 target = _snap(root, {"helpers.py": content})
190
191 delta = plugin.diff(base, target, repo_root=root)
192
193 bare_inserts = [
194 o for o in delta["ops"]
195 if o["op"] == "insert" and "::" not in o["address"]
196 ]
197 bare_deletes = [
198 o for o in delta["ops"]
199 if o["op"] == "delete" and "::" not in o["address"]
200 ]
201 assert not bare_inserts, (
202 f"No bare file-level InsertOp expected for rename, got: {bare_inserts}"
203 )
204 assert not bare_deletes, (
205 f"No bare file-level DeleteOp expected for rename, got: {bare_deletes}"
206 )
207
208 def test_python_file_rename_emits_rename_op(
209 self, tmp_path: pathlib.Path
210 ) -> None:
211 """A Python file rename must emit a RenameOp with from_address."""
212 root = _init_repo(tmp_path)
213 content = b"def add(a, b):\n return a + b\n"
214 base = _snap(root, {"utils.py": content})
215 target = _snap(root, {"helpers.py": content})
216
217 delta = plugin.diff(base, target, repo_root=root)
218 rename_ops = [o for o in delta["ops"] if o["op"] == "rename"]
219
220 assert any(o["from_address"] == "utils.py" for o in rename_ops), (
221 f"Expected RenameOp with from_address='utils.py', got: {rename_ops}"
222 )
223
224
225 # ---------------------------------------------------------------------------
226 # Non-rename: different content, same extension — must NOT be detected
227 # ---------------------------------------------------------------------------
228
229
230 class TestNotARename:
231 """When content differs between the deleted and added file, it is NOT a
232 rename — it is a genuine insert + delete and must be treated as such."""
233
234 def test_different_content_not_detected_as_rename(
235 self, tmp_path: pathlib.Path
236 ) -> None:
237 """A deleted file and an added file with *different* content must
238 produce InsertOp + DeleteOp, not a RenameOp."""
239 root = _init_repo(tmp_path)
240 base = _snap(root, {"old.txt": b"original content\n"})
241 target = _snap(root, {"new.txt": b"completely different\n"})
242
243 delta = plugin.diff(base, target, repo_root=root)
244 op_types = _op_types(delta["ops"])
245
246 assert "insert" in op_types, (
247 f"Non-rename must produce InsertOp, got: {op_types}"
248 )
249 assert "delete" in op_types, (
250 f"Non-rename must produce DeleteOp, got: {op_types}"
251 )
252 assert not any(o["op"] == "rename" for o in delta["ops"]), (
253 f"No RenameOp expected for different content, got: {delta['ops']}"
254 )
255
256 def test_same_extension_different_content_not_rename(
257 self, tmp_path: pathlib.Path
258 ) -> None:
259 """Two different .txt files (added + deleted) with different content
260 must not be collapsed into a rename even if they share an extension."""
261 root = _init_repo(tmp_path)
262 base = _snap(root, {"a.txt": b"aaa\n"})
263 target = _snap(root, {"b.txt": b"bbb\n"})
264
265 delta = plugin.diff(base, target, repo_root=root)
266 op_types = _op_types(delta["ops"])
267
268 assert "insert" in op_types
269 assert "delete" in op_types
270
271
272 # ---------------------------------------------------------------------------
273 # Rename alongside other changes
274 # ---------------------------------------------------------------------------
275
276
277 class TestRenameWithSiblings:
278 """A rename must be detected correctly even when other files are
279 simultaneously added, deleted, or modified in the same delta."""
280
281 def test_rename_plus_new_file(self, tmp_path: pathlib.Path) -> None:
282 """A rename and a genuinely new file in the same commit must each be
283 represented correctly — RenameOp for the rename and InsertOp for the
284 new file."""
285 root = _init_repo(tmp_path)
286 base = _snap(root, {"hello.txt": b"content\n"})
287 target = _snap(root, {
288 "hello.md": b"content\n", # rename
289 "new_file.py": b"x = 1\n", # new addition
290 })
291
292 delta = plugin.diff(base, target, repo_root=root)
293
294 rename_ops = [
295 o for o in delta["ops"]
296 if o["op"] == "rename" and o["from_address"] == "hello.txt"
297 ]
298 insert_ops = [o for o in delta["ops"] if o["op"] == "insert" and "new_file" in o["address"]]
299
300 assert rename_ops, "Rename must be detected alongside a new file"
301 assert insert_ops or any(
302 o["op"] == "patch" and "new_file" in o["address"] for o in delta["ops"]
303 ), "New file must appear as an insert or patch op"
304
305 def test_rename_plus_deletion(self, tmp_path: pathlib.Path) -> None:
306 """A rename and a genuine deletion in the same delta must each be
307 handled independently — the deletion must not be absorbed into the rename."""
308 root = _init_repo(tmp_path)
309 base = _snap(root, {
310 "hello.txt": b"content\n",
311 "old.txt": b"old content\n",
312 })
313 target = _snap(root, {
314 "hello.md": b"content\n", # rename of hello.txt
315 # old.txt is genuinely deleted
316 })
317
318 delta = plugin.diff(base, target, repo_root=root)
319
320 rename_ops = [
321 o for o in delta["ops"]
322 if o["op"] == "rename" and o["from_address"] == "hello.txt"
323 ]
324 delete_ops = [o for o in delta["ops"] if o["op"] == "delete" and "old" in o["address"]]
325
326 assert rename_ops, "hello.txt → hello.md rename must be detected"
327 assert delete_ops, "old.txt deletion must produce a DeleteOp"
328
329 def test_two_simultaneous_renames(self, tmp_path: pathlib.Path) -> None:
330 """Two files renamed in the same commit must both produce RenameOps —
331 neither rename must consume the other's partner."""
332 root = _init_repo(tmp_path)
333 base = _snap(root, {
334 "a.txt": b"content A\n",
335 "b.txt": b"content B\n",
336 })
337 target = _snap(root, {
338 "a.md": b"content A\n", # rename of a.txt
339 "b.md": b"content B\n", # rename of b.txt
340 })
341
342 delta = plugin.diff(base, target, repo_root=root)
343
344 a_rename = [
345 o for o in delta["ops"]
346 if o["op"] == "rename" and o["from_address"] == "a.txt"
347 ]
348 b_rename = [
349 o for o in delta["ops"]
350 if o["op"] == "rename" and o["from_address"] == "b.txt"
351 ]
352
353 assert a_rename, "a.txt → a.md rename must be detected"
354 assert b_rename, "b.txt → b.md rename must be detected"
355 assert not any(
356 o["op"] in ("insert", "delete") for o in delta["ops"]
357 ), f"No bare insert/delete expected for two pure renames, got: {delta['ops']}"
358
359 def test_rename_ambiguity_resolved_by_content_id(
360 self, tmp_path: pathlib.Path
361 ) -> None:
362 """When one file is deleted and two files with the same content are
363 added, the content_id uniquely identifies the rename target. Only one
364 of the added files must be paired as the rename; the other remains an
365 insert."""
366 root = _init_repo(tmp_path)
367 content = b"shared content\n"
368 base = _snap(root, {"original.txt": content})
369 target = _snap(root, {
370 "copy_a.txt": content,
371 "copy_b.txt": content,
372 })
373
374 delta = plugin.diff(base, target, repo_root=root)
375
376 rename_ops = [
377 o for o in delta["ops"]
378 if o["op"] == "rename" and o["from_address"] == "original.txt"
379 ]
380 assert len(rename_ops) == 1, (
381 f"Exactly one rename expected when one source matches two targets, "
382 f"got {len(rename_ops)} rename ops"
383 )
384
385
386 # ---------------------------------------------------------------------------
387 # delta_summary
388 # ---------------------------------------------------------------------------
389
390
391 class TestDeltaSummaryForRenames:
392 """delta_summary must report renamed files correctly, not as adds+removes."""
393
394 def test_summary_single_rename(self, tmp_path: pathlib.Path) -> None:
395 """One renamed file → summary says '1 renamed file' (or similar)."""
396 root = _init_repo(tmp_path)
397 content = b"text\n"
398 base = _snap(root, {"a.txt": content})
399 target = _snap(root, {"b.txt": content})
400
401 delta = plugin.diff(base, target, repo_root=root)
402
403 assert "renamed" in delta["summary"].lower(), (
404 f"Summary must mention rename for a single renamed file: {delta['summary']!r}"
405 )
406
407 def test_summary_two_renames(self, tmp_path: pathlib.Path) -> None:
408 """Two renamed files → summary must count both."""
409 root = _init_repo(tmp_path)
410 base = _snap(root, {"a.txt": b"AAA\n", "b.txt": b"BBB\n"})
411 target = _snap(root, {"a.md": b"AAA\n", "b.md": b"BBB\n"})
412
413 delta = plugin.diff(base, target, repo_root=root)
414
415 assert "2" in delta["summary"] or "renamed" in delta["summary"].lower(), (
416 f"Summary must count both renames: {delta['summary']!r}"
417 )
File History 1 commit
sha256:f8e686793bb93114c2923d0d294162d13b4e6f4d57ae0f6cbc1e0d493e80f965 fix: ls-remote signing identity uses resolved remote URL Sonnet 4.6 patch 12 days ago