test_file_rename_detection.py
python
sha256:ff478cfdcdd4b7fd6de89cb68896601a981f945634463275ec333bd20ca36402
Merge branch 'dev' into main
Human
21 days ago
| 1 | """Tests for file-level rename detection in structured_delta. |
| 2 | |
| 3 | When a file is renamed (deleted at one path, inserted at another with |
| 4 | identical blob content), ``CodePlugin.diff()`` must emit a ``RenameOp`` |
| 5 | (``op="rename"``) — not a bare ``InsertOp + DeleteOp`` pair. |
| 6 | |
| 7 | A moved+edited file emits ``RenameOp`` followed by ``PatchOp`` (two |
| 8 | orthogonal ops). ``PatchOp`` never carries ``from_address``. |
| 9 | """ |
| 10 | |
| 11 | from __future__ import annotations |
| 12 | |
| 13 | import json |
| 14 | import pathlib |
| 15 | from collections.abc import Mapping |
| 16 | |
| 17 | import pytest |
| 18 | |
| 19 | from muse.core.types import blob_id |
| 20 | from muse.core.object_store import write_object |
| 21 | from muse.core.paths import muse_dir |
| 22 | from muse.domain import SnapshotManifest |
| 23 | from muse.plugins.code.plugin import CodePlugin |
| 24 | |
| 25 | plugin = CodePlugin() |
| 26 | |
| 27 | # --------------------------------------------------------------------------- |
| 28 | # Repo and snapshot helpers |
| 29 | # --------------------------------------------------------------------------- |
| 30 | |
| 31 | |
| 32 | def _init_repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 33 | """Create the minimal .muse directory structure needed for plugin.diff().""" |
| 34 | dot_muse = muse_dir(tmp_path) |
| 35 | dot_muse.mkdir() |
| 36 | (dot_muse / "repo.json").write_text( |
| 37 | json.dumps({ |
| 38 | "repo_id": "sha256:" + "a" * 64, |
| 39 | "domain": "code", |
| 40 | "default_branch": "main", |
| 41 | "created_at": "2025-01-01T00:00:00+00:00", |
| 42 | }), |
| 43 | encoding="utf-8", |
| 44 | ) |
| 45 | (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8") |
| 46 | (dot_muse / "objects").mkdir() |
| 47 | (dot_muse / "snapshots").mkdir() |
| 48 | (dot_muse / "commits").mkdir() |
| 49 | (dot_muse / "refs" / "heads").mkdir(parents=True) |
| 50 | return tmp_path |
| 51 | |
| 52 | |
| 53 | def _snap(root: pathlib.Path, files: Mapping[str, bytes]) -> SnapshotManifest: |
| 54 | """Write blobs to the object store and return a SnapshotManifest.""" |
| 55 | manifest: dict[str, str] = {} |
| 56 | for path, content in files.items(): |
| 57 | oid = blob_id(content) |
| 58 | write_object(root, oid, content) |
| 59 | manifest[path] = oid |
| 60 | return SnapshotManifest(files=manifest, domain="code") |
| 61 | |
| 62 | |
| 63 | def _op_types(ops: list[Mapping[str, object]]) -> list[str]: |
| 64 | return [str(o["op"]) for o in ops] |
| 65 | |
| 66 | |
| 67 | def _op_addresses(ops: list[Mapping[str, object]]) -> list[str]: |
| 68 | return [str(o["address"]) for o in ops] |
| 69 | |
| 70 | |
| 71 | # --------------------------------------------------------------------------- |
| 72 | # Core rename detection — blob-identical files |
| 73 | # --------------------------------------------------------------------------- |
| 74 | |
| 75 | |
| 76 | class TestBlobIdenticalRename: |
| 77 | """A file moved to a new path with no content change must produce a |
| 78 | ``RenameOp`` (op="rename"), not a bare insert + delete pair.""" |
| 79 | |
| 80 | def test_txt_rename_produces_rename_op_not_insert_delete( |
| 81 | self, tmp_path: pathlib.Path |
| 82 | ) -> None: |
| 83 | """Renaming hello.txt → hello.md (identical content) must emit a |
| 84 | ``RenameOp`` — not an ``InsertOp`` plus a ``DeleteOp``.""" |
| 85 | root = _init_repo(tmp_path) |
| 86 | content = b"some plain text content\n" |
| 87 | base = _snap(root, {"hello.txt": content}) |
| 88 | target = _snap(root, {"hello.md": content}) |
| 89 | |
| 90 | delta = plugin.diff(base, target, repo_root=root) |
| 91 | ops = delta["ops"] |
| 92 | |
| 93 | rename_ops = [o for o in ops if o["op"] == "rename"] |
| 94 | assert rename_ops, f"Expected RenameOp for rename, got ops: {ops}" |
| 95 | assert not any(o["op"] == "insert" for o in ops), ( |
| 96 | f"InsertOp must not appear for a pure rename, got: {ops}" |
| 97 | ) |
| 98 | assert not any(o["op"] == "delete" for o in ops), ( |
| 99 | f"DeleteOp must not appear for a pure rename, got: {ops}" |
| 100 | ) |
| 101 | |
| 102 | def test_txt_rename_op_has_correct_address( |
| 103 | self, tmp_path: pathlib.Path |
| 104 | ) -> None: |
| 105 | """The RenameOp address must be the new path (hello.md).""" |
| 106 | root = _init_repo(tmp_path) |
| 107 | content = b"some plain text content\n" |
| 108 | base = _snap(root, {"hello.txt": content}) |
| 109 | target = _snap(root, {"hello.md": content}) |
| 110 | |
| 111 | delta = plugin.diff(base, target, repo_root=root) |
| 112 | rename_ops = [o for o in delta["ops"] if o["op"] == "rename"] |
| 113 | |
| 114 | assert len(rename_ops) == 1 |
| 115 | assert rename_ops[0]["address"] == "hello.md", ( |
| 116 | f"RenameOp address must be 'hello.md', got: {rename_ops[0]['address']!r}" |
| 117 | ) |
| 118 | |
| 119 | def test_txt_rename_op_has_from_address( |
| 120 | self, tmp_path: pathlib.Path |
| 121 | ) -> None: |
| 122 | """The RenameOp must carry ``from_address`` pointing to the old path.""" |
| 123 | root = _init_repo(tmp_path) |
| 124 | content = b"some plain text content\n" |
| 125 | base = _snap(root, {"hello.txt": content}) |
| 126 | target = _snap(root, {"hello.md": content}) |
| 127 | |
| 128 | delta = plugin.diff(base, target, repo_root=root) |
| 129 | rename_ops = [o for o in delta["ops"] if o["op"] == "rename"] |
| 130 | |
| 131 | assert len(rename_ops) == 1 |
| 132 | assert rename_ops[0]["from_address"] == "hello.txt", ( |
| 133 | f"from_address must be 'hello.txt', got: {rename_ops[0]['from_address']!r}" |
| 134 | ) |
| 135 | |
| 136 | def test_rename_summary_mentions_renamed(self, tmp_path: pathlib.Path) -> None: |
| 137 | """The delta summary must describe the operation as a rename, not |
| 138 | as an addition plus a removal.""" |
| 139 | root = _init_repo(tmp_path) |
| 140 | content = b"plain text\n" |
| 141 | base = _snap(root, {"hello.txt": content}) |
| 142 | target = _snap(root, {"hello.md": content}) |
| 143 | |
| 144 | delta = plugin.diff(base, target, repo_root=root) |
| 145 | |
| 146 | assert "renamed" in delta["summary"].lower(), ( |
| 147 | f"Summary must mention rename, got: {delta['summary']!r}" |
| 148 | ) |
| 149 | assert "added" not in delta["summary"].lower(), ( |
| 150 | f"Summary must not say 'added' for a rename, got: {delta['summary']!r}" |
| 151 | ) |
| 152 | assert "removed" not in delta["summary"].lower(), ( |
| 153 | f"Summary must not say 'removed' for a rename, got: {delta['summary']!r}" |
| 154 | ) |
| 155 | |
| 156 | def test_no_patch_op_has_from_address(self, tmp_path: pathlib.Path) -> None: |
| 157 | """PatchOp must never carry from_address — rename is now RenameOp.""" |
| 158 | root = _init_repo(tmp_path) |
| 159 | content = b"plain text\n" |
| 160 | base = _snap(root, {"hello.txt": content}) |
| 161 | target = _snap(root, {"hello.md": content}) |
| 162 | |
| 163 | delta = plugin.diff(base, target, repo_root=root) |
| 164 | for op in delta["ops"]: |
| 165 | if op["op"] == "patch": |
| 166 | assert "from_address" not in op, ( |
| 167 | f"PatchOp at {op['address']} must not carry from_address" |
| 168 | ) |
| 169 | |
| 170 | |
| 171 | # --------------------------------------------------------------------------- |
| 172 | # Rename of files that DO have symbol trees (regression guard) |
| 173 | # --------------------------------------------------------------------------- |
| 174 | |
| 175 | |
| 176 | class TestSymbolFileRename: |
| 177 | """Moving a Python file to a new path with identical content must also |
| 178 | produce a RenameOp — exercising the existing _detect_file_move_edits |
| 179 | symbol-tree path as a regression guard.""" |
| 180 | |
| 181 | def test_python_file_rename_same_content_produces_rename_op( |
| 182 | self, tmp_path: pathlib.Path |
| 183 | ) -> None: |
| 184 | """Renaming utils.py → helpers.py with identical content must not |
| 185 | produce InsertOp + DeleteOp.""" |
| 186 | root = _init_repo(tmp_path) |
| 187 | content = b"def add(a, b):\n return a + b\n" |
| 188 | base = _snap(root, {"utils.py": content}) |
| 189 | target = _snap(root, {"helpers.py": content}) |
| 190 | |
| 191 | delta = plugin.diff(base, target, repo_root=root) |
| 192 | |
| 193 | bare_inserts = [ |
| 194 | o for o in delta["ops"] |
| 195 | if o["op"] == "insert" and "::" not in o["address"] |
| 196 | ] |
| 197 | bare_deletes = [ |
| 198 | o for o in delta["ops"] |
| 199 | if o["op"] == "delete" and "::" not in o["address"] |
| 200 | ] |
| 201 | assert not bare_inserts, ( |
| 202 | f"No bare file-level InsertOp expected for rename, got: {bare_inserts}" |
| 203 | ) |
| 204 | assert not bare_deletes, ( |
| 205 | f"No bare file-level DeleteOp expected for rename, got: {bare_deletes}" |
| 206 | ) |
| 207 | |
| 208 | def test_python_file_rename_emits_rename_op( |
| 209 | self, tmp_path: pathlib.Path |
| 210 | ) -> None: |
| 211 | """A Python file rename must emit a RenameOp with from_address.""" |
| 212 | root = _init_repo(tmp_path) |
| 213 | content = b"def add(a, b):\n return a + b\n" |
| 214 | base = _snap(root, {"utils.py": content}) |
| 215 | target = _snap(root, {"helpers.py": content}) |
| 216 | |
| 217 | delta = plugin.diff(base, target, repo_root=root) |
| 218 | rename_ops = [o for o in delta["ops"] if o["op"] == "rename"] |
| 219 | |
| 220 | assert any(o["from_address"] == "utils.py" for o in rename_ops), ( |
| 221 | f"Expected RenameOp with from_address='utils.py', got: {rename_ops}" |
| 222 | ) |
| 223 | |
| 224 | |
| 225 | # --------------------------------------------------------------------------- |
| 226 | # Non-rename: different content, same extension — must NOT be detected |
| 227 | # --------------------------------------------------------------------------- |
| 228 | |
| 229 | |
| 230 | class TestNotARename: |
| 231 | """When content differs between the deleted and added file, it is NOT a |
| 232 | rename — it is a genuine insert + delete and must be treated as such.""" |
| 233 | |
| 234 | def test_different_content_not_detected_as_rename( |
| 235 | self, tmp_path: pathlib.Path |
| 236 | ) -> None: |
| 237 | """A deleted file and an added file with *different* content must |
| 238 | produce InsertOp + DeleteOp, not a RenameOp.""" |
| 239 | root = _init_repo(tmp_path) |
| 240 | base = _snap(root, {"old.txt": b"original content\n"}) |
| 241 | target = _snap(root, {"new.txt": b"completely different\n"}) |
| 242 | |
| 243 | delta = plugin.diff(base, target, repo_root=root) |
| 244 | op_types = _op_types(delta["ops"]) |
| 245 | |
| 246 | assert "insert" in op_types, ( |
| 247 | f"Non-rename must produce InsertOp, got: {op_types}" |
| 248 | ) |
| 249 | assert "delete" in op_types, ( |
| 250 | f"Non-rename must produce DeleteOp, got: {op_types}" |
| 251 | ) |
| 252 | assert not any(o["op"] == "rename" for o in delta["ops"]), ( |
| 253 | f"No RenameOp expected for different content, got: {delta['ops']}" |
| 254 | ) |
| 255 | |
| 256 | def test_same_extension_different_content_not_rename( |
| 257 | self, tmp_path: pathlib.Path |
| 258 | ) -> None: |
| 259 | """Two different .txt files (added + deleted) with different content |
| 260 | must not be collapsed into a rename even if they share an extension.""" |
| 261 | root = _init_repo(tmp_path) |
| 262 | base = _snap(root, {"a.txt": b"aaa\n"}) |
| 263 | target = _snap(root, {"b.txt": b"bbb\n"}) |
| 264 | |
| 265 | delta = plugin.diff(base, target, repo_root=root) |
| 266 | op_types = _op_types(delta["ops"]) |
| 267 | |
| 268 | assert "insert" in op_types |
| 269 | assert "delete" in op_types |
| 270 | |
| 271 | |
| 272 | # --------------------------------------------------------------------------- |
| 273 | # Rename alongside other changes |
| 274 | # --------------------------------------------------------------------------- |
| 275 | |
| 276 | |
| 277 | class TestRenameWithSiblings: |
| 278 | """A rename must be detected correctly even when other files are |
| 279 | simultaneously added, deleted, or modified in the same delta.""" |
| 280 | |
| 281 | def test_rename_plus_new_file(self, tmp_path: pathlib.Path) -> None: |
| 282 | """A rename and a genuinely new file in the same commit must each be |
| 283 | represented correctly — RenameOp for the rename and InsertOp for the |
| 284 | new file.""" |
| 285 | root = _init_repo(tmp_path) |
| 286 | base = _snap(root, {"hello.txt": b"content\n"}) |
| 287 | target = _snap(root, { |
| 288 | "hello.md": b"content\n", # rename |
| 289 | "new_file.py": b"x = 1\n", # new addition |
| 290 | }) |
| 291 | |
| 292 | delta = plugin.diff(base, target, repo_root=root) |
| 293 | |
| 294 | rename_ops = [ |
| 295 | o for o in delta["ops"] |
| 296 | if o["op"] == "rename" and o["from_address"] == "hello.txt" |
| 297 | ] |
| 298 | insert_ops = [o for o in delta["ops"] if o["op"] == "insert" and "new_file" in o["address"]] |
| 299 | |
| 300 | assert rename_ops, "Rename must be detected alongside a new file" |
| 301 | assert insert_ops or any( |
| 302 | o["op"] == "patch" and "new_file" in o["address"] for o in delta["ops"] |
| 303 | ), "New file must appear as an insert or patch op" |
| 304 | |
| 305 | def test_rename_plus_deletion(self, tmp_path: pathlib.Path) -> None: |
| 306 | """A rename and a genuine deletion in the same delta must each be |
| 307 | handled independently — the deletion must not be absorbed into the rename.""" |
| 308 | root = _init_repo(tmp_path) |
| 309 | base = _snap(root, { |
| 310 | "hello.txt": b"content\n", |
| 311 | "old.txt": b"old content\n", |
| 312 | }) |
| 313 | target = _snap(root, { |
| 314 | "hello.md": b"content\n", # rename of hello.txt |
| 315 | # old.txt is genuinely deleted |
| 316 | }) |
| 317 | |
| 318 | delta = plugin.diff(base, target, repo_root=root) |
| 319 | |
| 320 | rename_ops = [ |
| 321 | o for o in delta["ops"] |
| 322 | if o["op"] == "rename" and o["from_address"] == "hello.txt" |
| 323 | ] |
| 324 | delete_ops = [o for o in delta["ops"] if o["op"] == "delete" and "old" in o["address"]] |
| 325 | |
| 326 | assert rename_ops, "hello.txt → hello.md rename must be detected" |
| 327 | assert delete_ops, "old.txt deletion must produce a DeleteOp" |
| 328 | |
| 329 | def test_two_simultaneous_renames(self, tmp_path: pathlib.Path) -> None: |
| 330 | """Two files renamed in the same commit must both produce RenameOps — |
| 331 | neither rename must consume the other's partner.""" |
| 332 | root = _init_repo(tmp_path) |
| 333 | base = _snap(root, { |
| 334 | "a.txt": b"content A\n", |
| 335 | "b.txt": b"content B\n", |
| 336 | }) |
| 337 | target = _snap(root, { |
| 338 | "a.md": b"content A\n", # rename of a.txt |
| 339 | "b.md": b"content B\n", # rename of b.txt |
| 340 | }) |
| 341 | |
| 342 | delta = plugin.diff(base, target, repo_root=root) |
| 343 | |
| 344 | a_rename = [ |
| 345 | o for o in delta["ops"] |
| 346 | if o["op"] == "rename" and o["from_address"] == "a.txt" |
| 347 | ] |
| 348 | b_rename = [ |
| 349 | o for o in delta["ops"] |
| 350 | if o["op"] == "rename" and o["from_address"] == "b.txt" |
| 351 | ] |
| 352 | |
| 353 | assert a_rename, "a.txt → a.md rename must be detected" |
| 354 | assert b_rename, "b.txt → b.md rename must be detected" |
| 355 | assert not any( |
| 356 | o["op"] in ("insert", "delete") for o in delta["ops"] |
| 357 | ), f"No bare insert/delete expected for two pure renames, got: {delta['ops']}" |
| 358 | |
| 359 | def test_rename_ambiguity_resolved_by_content_id( |
| 360 | self, tmp_path: pathlib.Path |
| 361 | ) -> None: |
| 362 | """When one file is deleted and two files with the same content are |
| 363 | added, the content_id uniquely identifies the rename target. Only one |
| 364 | of the added files must be paired as the rename; the other remains an |
| 365 | insert.""" |
| 366 | root = _init_repo(tmp_path) |
| 367 | content = b"shared content\n" |
| 368 | base = _snap(root, {"original.txt": content}) |
| 369 | target = _snap(root, { |
| 370 | "copy_a.txt": content, |
| 371 | "copy_b.txt": content, |
| 372 | }) |
| 373 | |
| 374 | delta = plugin.diff(base, target, repo_root=root) |
| 375 | |
| 376 | rename_ops = [ |
| 377 | o for o in delta["ops"] |
| 378 | if o["op"] == "rename" and o["from_address"] == "original.txt" |
| 379 | ] |
| 380 | assert len(rename_ops) == 1, ( |
| 381 | f"Exactly one rename expected when one source matches two targets, " |
| 382 | f"got {len(rename_ops)} rename ops" |
| 383 | ) |
| 384 | |
| 385 | |
| 386 | # --------------------------------------------------------------------------- |
| 387 | # delta_summary |
| 388 | # --------------------------------------------------------------------------- |
| 389 | |
| 390 | |
| 391 | class TestDeltaSummaryForRenames: |
| 392 | """delta_summary must report renamed files correctly, not as adds+removes.""" |
| 393 | |
| 394 | def test_summary_single_rename(self, tmp_path: pathlib.Path) -> None: |
| 395 | """One renamed file → summary says '1 renamed file' (or similar).""" |
| 396 | root = _init_repo(tmp_path) |
| 397 | content = b"text\n" |
| 398 | base = _snap(root, {"a.txt": content}) |
| 399 | target = _snap(root, {"b.txt": content}) |
| 400 | |
| 401 | delta = plugin.diff(base, target, repo_root=root) |
| 402 | |
| 403 | assert "renamed" in delta["summary"].lower(), ( |
| 404 | f"Summary must mention rename for a single renamed file: {delta['summary']!r}" |
| 405 | ) |
| 406 | |
| 407 | def test_summary_two_renames(self, tmp_path: pathlib.Path) -> None: |
| 408 | """Two renamed files → summary must count both.""" |
| 409 | root = _init_repo(tmp_path) |
| 410 | base = _snap(root, {"a.txt": b"AAA\n", "b.txt": b"BBB\n"}) |
| 411 | target = _snap(root, {"a.md": b"AAA\n", "b.md": b"BBB\n"}) |
| 412 | |
| 413 | delta = plugin.diff(base, target, repo_root=root) |
| 414 | |
| 415 | assert "2" in delta["summary"] or "renamed" in delta["summary"].lower(), ( |
| 416 | f"Summary must count both renames: {delta['summary']!r}" |
| 417 | ) |
File History
1 commit
sha256:ff478cfdcdd4b7fd6de89cb68896601a981f945634463275ec333bd20ca36402
Merge branch 'dev' into main
Human
21 days ago