gabriel / muse public
test_directories_feature.py python
1,116 lines 44.8 KB
Raw
sha256:1d3f5470f45db58e32047678debc9438fdded1b2c7332cc743d2b8be32fdafc8 fixing more broken tests Human patch 2 days ago
1 """Comprehensive tests for the "directories as first-class objects" feature.
2
3 Covers every changed surface:
4 - directories_from_manifest (unit)
5 - walk_workdir_with_dirs (unit + integration)
6 - compute_snapshot_id with directories parameter (unit)
7 - detect_directory_renames (unit + property-style)
8 - diff_workdir_vs_snapshot 6-tuple (unit + integration)
9 - SnapshotRecord.directories serialisation round-trip (unit)
10 - write_snapshot / read_snapshot with directories (integration)
11 - CodePlugin.diff directory rename detection (integration)
12 - delta_summary directory rename counting (unit)
13 - replay_one propagates directories to new SnapshotRecord (integration)
14 - Full commit → branch → rename → commit → merge E2E workflow (e2e)
15 - Stress / performance (stress)
16 - Security: path traversal, symlinks, adversarial inputs (security)
17 """
18
19 from __future__ import annotations
20
21 import datetime
22 import hashlib
23 import json
24 import os
25 import pathlib
26 import subprocess
27 import sys
28 import time
29 import pytest
30
31 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
32 from muse.core.snapshot import (
33 detect_directory_renames,
34 diff_workdir_vs_snapshot,
35 directories_from_manifest,
36 hash_file,
37 walk_workdir_with_dirs,
38 )
39 from muse.core.commits import (
40 CommitRecord,
41 read_commit,
42 write_commit,
43 )
44 from muse.core.snapshots import (
45 SnapshotRecord,
46 read_snapshot,
47 write_snapshot,
48 )
49 from muse.domain import RenameOp, SnapshotManifest
50 from muse.core.types import Manifest, MsgpackDict, blob_id, fake_id, now_utc_iso, split_id
51 from muse.plugins.code.plugin import CodePlugin
52 from muse.core.paths import ref_path, muse_dir
53
54
55 # ---------------------------------------------------------------------------
56 # Shared helpers
57 # ---------------------------------------------------------------------------
58
59 _REPO_ID = "test-repo-dirs"
60 _counter = 0
61
62
63
64
65 def _init_store(root: pathlib.Path) -> None:
66 dot_muse = muse_dir(root)
67 for d in ("commits", "snapshots", "objects", "refs/heads"):
68 (dot_muse / d).mkdir(parents=True, exist_ok=True)
69 (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
70 (dot_muse / "repo.json").write_text(
71 json.dumps({"repo_id": _REPO_ID, "domain": "code"}), encoding="utf-8"
72 )
73
74
75 def _make_snap(root: pathlib.Path, manifest: Manifest, dirs: list[str] | None = None) -> SnapshotRecord:
76 dirs = dirs if dirs is not None else directories_from_manifest(manifest)
77 sid = compute_snapshot_id(manifest, dirs)
78 rec = SnapshotRecord(snapshot_id=sid, manifest=manifest, directories=dirs)
79 write_snapshot(root, rec)
80 return rec
81
82
83 def _make_commit_rec(
84 root: pathlib.Path,
85 snap: SnapshotRecord,
86 branch: str = "main",
87 parent_id: str | None = None,
88 message: str = "test commit",
89 ) -> CommitRecord:
90 global _counter
91 _counter += 1
92 committed_at = datetime.datetime.now(datetime.timezone.utc)
93 cid = compute_commit_id(
94 [parent_id] if parent_id else [],
95 snap.snapshot_id,
96 message,
97 committed_at.isoformat(),
98 )
99 rec = CommitRecord(
100 commit_id=cid,
101 branch=branch,
102 snapshot_id=snap.snapshot_id,
103 message=message,
104 committed_at=committed_at,
105 parent_commit_id=parent_id,
106 )
107 write_commit(root, rec)
108 (ref_path(root, branch)).write_text(cid, encoding="utf-8")
109 return rec
110
111
112 @pytest.fixture()
113 def store(tmp_path: pathlib.Path) -> pathlib.Path:
114 _init_store(tmp_path)
115 return tmp_path
116
117
118 @pytest.fixture()
119 def workdir(tmp_path: pathlib.Path) -> pathlib.Path:
120 return tmp_path
121
122
123 # ===========================================================================
124 # 1. directories_from_manifest — unit
125 # ===========================================================================
126
127 class TestDirectoriesFromManifest:
128 def test_empty_manifest_returns_empty(self) -> None:
129 assert directories_from_manifest({}) == []
130
131 def test_flat_files_no_dirs(self) -> None:
132 result = directories_from_manifest({"a.py": "h1", "b.py": "h2"})
133 assert result == []
134
135 def test_single_nested_file(self) -> None:
136 result = directories_from_manifest({"src/main.py": "h1"})
137 assert result == ["src"]
138
139 def test_deeply_nested(self) -> None:
140 result = directories_from_manifest({"a/b/c/d.py": "h1"})
141 assert result == ["a", "a/b", "a/b/c"]
142
143 def test_multiple_files_same_dir_deduped(self) -> None:
144 result = directories_from_manifest({"src/a.py": "h1", "src/b.py": "h2"})
145 assert result == ["src"]
146
147 def test_sibling_dirs(self) -> None:
148 result = directories_from_manifest({
149 "src/foo.py": "h1",
150 "tests/bar.py": "h2",
151 })
152 assert result == ["src", "tests"]
153
154 def test_mixed_flat_and_nested(self) -> None:
155 result = directories_from_manifest({
156 "root.py": "h0",
157 "src/main.py": "h1",
158 "src/lib/util.py": "h2",
159 })
160 assert result == ["src", "src/lib"]
161
162 def test_result_is_sorted(self) -> None:
163 result = directories_from_manifest({
164 "z/file.py": "h1",
165 "a/file.py": "h2",
166 "m/sub/file.py": "h3",
167 })
168 assert result == sorted(result)
169
170 def test_result_is_deduplicated(self) -> None:
171 result = directories_from_manifest({
172 "pkg/a.py": "h1",
173 "pkg/b.py": "h2",
174 "pkg/c.py": "h3",
175 })
176 assert result.count("pkg") == 1
177
178 def test_large_flat_tree_no_dirs(self) -> None:
179 manifest = {f"file_{i}.txt": f"hash{i}" for i in range(200)}
180 assert directories_from_manifest(manifest) == []
181
182 def test_preserves_posix_separators(self) -> None:
183 result = directories_from_manifest({"foo/bar/baz.py": "h"})
184 assert all("/" in d or d == "foo" for d in result)
185 assert "\\" not in "".join(result)
186
187
188 # ===========================================================================
189 # 2. compute_snapshot_id with directories — unit
190 # ===========================================================================
191
192 class TestComputeSnapshotIdWithDirectories:
193 def test_no_dirs_matches_legacy_behaviour(self) -> None:
194 m = {"a.py": fake_id("h1")}
195 assert compute_snapshot_id(m) == compute_snapshot_id(m, None)
196 assert compute_snapshot_id(m) == compute_snapshot_id(m, [])
197
198 def test_dirs_change_the_id(self) -> None:
199 m = {"a.py": fake_id("h1")}
200 without = compute_snapshot_id(m, [])
201 with_dir = compute_snapshot_id(m, ["src"])
202 assert without != with_dir
203
204 def test_different_dirs_different_id(self) -> None:
205 m = {"a.py": fake_id("h1")}
206 id1 = compute_snapshot_id(m, ["src"])
207 id2 = compute_snapshot_id(m, ["lib"])
208 assert id1 != id2
209
210 def test_same_files_same_dirs_deterministic(self) -> None:
211 m = {"a/b.py": fake_id("h1"), "c/d.py": fake_id("h2")}
212 dirs = ["a", "c"]
213 assert compute_snapshot_id(m, dirs) == compute_snapshot_id(m, dirs)
214
215 def test_dir_order_independent(self) -> None:
216 m = {"a.py": fake_id("h1")}
217 id1 = compute_snapshot_id(m, ["src", "lib"])
218 id2 = compute_snapshot_id(m, ["lib", "src"])
219 assert id1 == id2
220
221 def test_file_rename_changes_id_even_with_same_dirs(self) -> None:
222 dirs = ["src"]
223 id1 = compute_snapshot_id({"src/a.py": fake_id("h1")}, dirs)
224 id2 = compute_snapshot_id({"src/b.py": fake_id("h1")}, dirs)
225 assert id1 != id2
226
227 def test_dir_rename_changes_id_same_file_content(self) -> None:
228 manifest = {"f.py": fake_id("h1")}
229 id_old = compute_snapshot_id(manifest, ["old_name"])
230 id_new = compute_snapshot_id(manifest, ["new_name"])
231 assert id_old != id_new
232
233 def test_result_is_64_hex_chars(self) -> None:
234 sid = compute_snapshot_id({"a.py": fake_id("h")}, ["src"])
235 assert len(sid) == 71
236 assert all(c in "0123456789abcdef" for c in split_id(sid)[1])
237
238
239 # ===========================================================================
240 # 3. detect_directory_renames — unit
241 # ===========================================================================
242
243 class TestDetectDirectoryRenames:
244 def test_clean_single_rename(self) -> None:
245 last = {"old/a.py": "h1", "old/b.py": "h2"}
246 current = {"new/a.py": "h1", "new/b.py": "h2"}
247 renames = detect_directory_renames({"old"}, {"new"}, last, current)
248 assert renames == [("old", "new")]
249
250 def test_no_rename_content_changed(self) -> None:
251 last = {"old/a.py": "h1"}
252 current = {"new/a.py": "DIFFERENT"}
253 renames = detect_directory_renames({"old"}, {"new"}, last, current)
254 assert renames == []
255
256 def test_no_rename_empty_old_dir(self) -> None:
257 # old dir has no files in last_manifest → can't match
258 last: Manifest = {}
259 current = {"new/a.py": "h1"}
260 renames = detect_directory_renames({"old"}, {"new"}, last, current)
261 assert renames == []
262
263 def test_multiple_independent_renames(self) -> None:
264 last = {"foo/x.py": "h1", "bar/y.py": "h2"}
265 current = {"baz/x.py": "h1", "qux/y.py": "h2"}
266 renames = detect_directory_renames({"foo", "bar"}, {"baz", "qux"}, last, current)
267 assert set(renames) == {("foo", "baz"), ("bar", "qux")}
268
269 def test_ambiguous_candidates_not_renamed(self) -> None:
270 # Two added dirs have identical file sets → ambiguous, none matched
271 last = {"old/f.py": "h1"}
272 current = {"new1/f.py": "h1", "new2/f.py": "h1"}
273 renames = detect_directory_renames({"old"}, {"new1", "new2"}, last, current)
274 # Should match exactly one (first sorted candidate wins)
275 assert len(renames) == 1
276
277 def test_partial_match_not_renamed(self) -> None:
278 last = {"old/a.py": "h1", "old/b.py": "h2"}
279 current = {"new/a.py": "h1"} # b.py missing
280 renames = detect_directory_renames({"old"}, {"new"}, last, current)
281 assert renames == []
282
283 def test_extra_file_in_new_dir_not_renamed(self) -> None:
284 last = {"old/a.py": "h1"}
285 current = {"new/a.py": "h1", "new/extra.py": "h2"}
286 renames = detect_directory_renames({"old"}, {"new"}, last, current)
287 assert renames == []
288
289 def test_returns_list_of_tuples(self) -> None:
290 last = {"src/main.py": "abc"}
291 current = {"lib/main.py": "abc"}
292 result = detect_directory_renames({"src"}, {"lib"}, last, current)
293 assert isinstance(result, list)
294 assert all(isinstance(r, tuple) and len(r) == 2 for r in result)
295
296 def test_empty_sets_returns_empty(self) -> None:
297 assert detect_directory_renames(set(), set(), {}, {}) == []
298
299 def test_single_file_dir_rename(self) -> None:
300 last = {"pkg/module.py": "cafebabe"}
301 current = {"renamed_pkg/module.py": "cafebabe"}
302 renames = detect_directory_renames({"pkg"}, {"renamed_pkg"}, last, current)
303 assert renames == [("pkg", "renamed_pkg")]
304
305
306 # ===========================================================================
307 # 4. diff_workdir_vs_snapshot — 6-tuple (fix broken existing + new)
308 # ===========================================================================
309
310 class TestDiffWorkdirVsSnapshot6Tuple:
311 def test_returns_6_tuple(self, workdir: pathlib.Path) -> None:
312 result = diff_workdir_vs_snapshot(workdir, {})
313 assert len(result) == 6
314
315 def test_untracked_first_commit(self, workdir: pathlib.Path) -> None:
316 (workdir / "f.py").write_bytes(b"x")
317 added, modified, deleted, untracked, added_dirs, deleted_dirs = \
318 diff_workdir_vs_snapshot(workdir, {})
319 assert added == set()
320 assert "f.py" in untracked
321
322 def test_added_file_detected(self, workdir: pathlib.Path) -> None:
323 (workdir / "f.py").write_bytes(b"x")
324 added, modified, deleted, untracked, added_dirs, deleted_dirs = \
325 diff_workdir_vs_snapshot(workdir, {"other.py": "abc"})
326 assert "f.py" in added
327 assert "other.py" in deleted
328
329 def test_modified_file_detected(self, workdir: pathlib.Path) -> None:
330 f = workdir / "f.py"
331 f.write_bytes(b"new content")
332 added, modified, deleted, untracked, added_dirs, deleted_dirs = \
333 diff_workdir_vs_snapshot(workdir, {"f.py": "oldhash"})
334 assert "f.py" in modified
335
336 def test_clean_workdir_all_empty(self, workdir: pathlib.Path) -> None:
337 f = workdir / "f.py"
338 f.write_bytes(b"content")
339 h = hash_file(f)
340 added, modified, deleted, untracked, added_dirs, deleted_dirs = \
341 diff_workdir_vs_snapshot(workdir, {"f.py": h})
342 assert not added and not modified and not deleted and not untracked
343
344 def test_added_dir_detected(self, workdir: pathlib.Path) -> None:
345 (workdir / "src").mkdir()
346 (workdir / "src" / "main.py").write_bytes(b"x")
347 added, modified, deleted, untracked, added_dirs, deleted_dirs = \
348 diff_workdir_vs_snapshot(workdir, {"root.py": "abc"}, last_directories=["lib"])
349 assert "src" in added_dirs
350 assert "lib" in deleted_dirs
351
352 def test_deleted_dir_detected(self, workdir: pathlib.Path) -> None:
353 (workdir / "f.py").write_bytes(b"x")
354 h = hash_file(workdir / "f.py")
355 added, modified, deleted, untracked, added_dirs, deleted_dirs = \
356 diff_workdir_vs_snapshot(workdir, {"f.py": h}, last_directories=["old_dir"])
357 assert "old_dir" in deleted_dirs
358
359 def test_unchanged_dirs_not_in_delta(self, workdir: pathlib.Path) -> None:
360 (workdir / "src").mkdir()
361 (workdir / "src" / "main.py").write_bytes(b"x")
362 h = hash_file(workdir / "src" / "main.py")
363 added, modified, deleted, untracked, added_dirs, deleted_dirs = \
364 diff_workdir_vs_snapshot(workdir, {"src/main.py": h}, last_directories=["src"])
365 assert "src" not in added_dirs
366 assert "src" not in deleted_dirs
367
368 def test_nonexistent_workdir_returns_all_deleted(self, tmp_path: pathlib.Path) -> None:
369 missing = tmp_path / "gone"
370 added, modified, deleted, untracked, added_dirs, deleted_dirs = \
371 diff_workdir_vs_snapshot(missing, {"f.py": "h"}, last_directories=["src"])
372 assert "f.py" in deleted
373 assert "src" in deleted_dirs
374 assert not added
375
376 def test_pruned_dirs_not_tracked(self, workdir: pathlib.Path) -> None:
377 (workdir / "node_modules").mkdir()
378 (workdir / "node_modules" / "pkg.js").write_bytes(b"x")
379 (workdir / "src").mkdir()
380 (workdir / "src" / "app.py").write_bytes(b"y")
381 added, modified, deleted, untracked, added_dirs, deleted_dirs = \
382 diff_workdir_vs_snapshot(workdir, {})
383 assert "node_modules" not in added_dirs
384 assert "src" in added_dirs or "src/app.py" in untracked
385
386
387 # ===========================================================================
388 # 5. walk_workdir_with_dirs — unit
389 # ===========================================================================
390
391 class TestWalkWorkdirWithDirs:
392 def test_empty_dir_returns_empty(self, workdir: pathlib.Path) -> None:
393 files, dirs = walk_workdir_with_dirs(workdir)
394 assert files == {}
395 assert dirs == []
396
397 def test_flat_files_no_dirs(self, workdir: pathlib.Path) -> None:
398 (workdir / "a.py").write_bytes(b"x")
399 files, dirs = walk_workdir_with_dirs(workdir)
400 assert "a.py" in files
401 assert dirs == []
402
403 def test_nested_file_dir_tracked(self, workdir: pathlib.Path) -> None:
404 (workdir / "src").mkdir()
405 (workdir / "src" / "main.py").write_bytes(b"x")
406 files, dirs = walk_workdir_with_dirs(workdir)
407 assert "src/main.py" in files
408 assert "src" in dirs
409
410 def test_deeply_nested_dirs_all_tracked(self, workdir: pathlib.Path) -> None:
411 deep = workdir / "a" / "b" / "c"
412 deep.mkdir(parents=True)
413 (deep / "f.py").write_bytes(b"x")
414 files, dirs = walk_workdir_with_dirs(workdir)
415 assert "a" in dirs
416 assert "a/b" in dirs
417 assert "a/b/c" in dirs
418
419 def test_dirs_sorted(self, workdir: pathlib.Path) -> None:
420 for name in ("zzz", "aaa", "mmm"):
421 (workdir / name).mkdir()
422 (workdir / name / "f.py").write_bytes(b"x")
423 _, dirs = walk_workdir_with_dirs(workdir)
424 assert dirs == sorted(dirs)
425
426 def test_pruned_dirs_excluded(self, workdir: pathlib.Path) -> None:
427 (workdir / "node_modules").mkdir()
428 (workdir / "node_modules" / "lib.js").write_bytes(b"x")
429 (workdir / "__pycache__").mkdir()
430 (workdir / "__pycache__" / "mod.pyc").write_bytes(b"x")
431 _, dirs = walk_workdir_with_dirs(workdir)
432 assert "node_modules" not in dirs
433 assert "__pycache__" not in dirs
434
435 def test_symlinks_not_followed(self, workdir: pathlib.Path) -> None:
436 real = workdir / "real_dir"
437 real.mkdir()
438 (real / "secret.py").write_bytes(b"secret")
439 link = workdir / "link_dir"
440 link.symlink_to(real)
441 files, dirs = walk_workdir_with_dirs(workdir)
442 # symlink directory should not be descended (followlinks=False)
443 assert "link_dir/secret.py" not in files
444
445
446 # ===========================================================================
447 # 6. SnapshotRecord.directories serialisation — unit
448 # ===========================================================================
449
450 class TestSnapshotRecordDirectories:
451 def test_default_directories_is_empty_list(self) -> None:
452 rec = SnapshotRecord(snapshot_id="abc", manifest={})
453 assert rec.directories == []
454
455 def test_to_dict_includes_directories(self) -> None:
456 rec = SnapshotRecord(snapshot_id="abc", manifest={}, directories=["src", "lib"])
457 d = rec.to_dict()
458 assert d["directories"] == ["src", "lib"]
459
460 def test_from_dict_roundtrip(self) -> None:
461 rec = SnapshotRecord(snapshot_id="abc", manifest={"f.py": "h"}, directories=["pkg"])
462 loaded = SnapshotRecord.from_dict(rec.to_dict())
463 assert loaded.directories == ["pkg"]
464
465 def test_from_dict_roundtrip(self) -> None:
466 rec = SnapshotRecord(snapshot_id="xyz", manifest={}, directories=["a", "b"])
467 d: MsgpackDict = dict(rec.to_dict())
468 loaded = SnapshotRecord.from_dict(d)
469 assert loaded.directories == ["a", "b"]
470
471 def test_from_dict_missing_field_defaults_empty(self) -> None:
472 d: MsgpackDict = {
473 "snapshot_id": "abc",
474 "manifest": {},
475 "created_at": now_utc_iso(),
476 "note": "",
477 }
478 rec = SnapshotRecord.from_dict(d)
479 assert rec.directories == []
480
481 def test_from_dict_filters_non_string_items(self) -> None:
482 d: MsgpackDict = {
483 "snapshot_id": "abc",
484 "manifest": {},
485 "directories": ["valid", 42, None, "also_valid"],
486 "created_at": now_utc_iso(),
487 "note": "",
488 }
489 rec = SnapshotRecord.from_dict(d)
490 assert rec.directories == ["valid", "also_valid"]
491
492 def test_from_dict_non_list_directories_defaults_empty(self) -> None:
493 d: MsgpackDict = {
494 "snapshot_id": "abc",
495 "manifest": {},
496 "directories": "not-a-list",
497 "created_at": now_utc_iso(),
498 "note": "",
499 }
500 rec = SnapshotRecord.from_dict(d)
501 assert rec.directories == []
502
503 def test_to_dict_returns_copy_not_reference(self) -> None:
504 dirs = ["src"]
505 rec = SnapshotRecord(snapshot_id="abc", manifest={}, directories=dirs)
506 d = rec.to_dict()
507 d["directories"].append("mutated")
508 assert rec.directories == ["src"]
509
510
511 # ===========================================================================
512 # 7. write_snapshot / read_snapshot roundtrip with directories — integration
513 # ===========================================================================
514
515 class TestWriteReadSnapshotWithDirectories:
516 def test_roundtrip_preserves_directories(self, store: pathlib.Path) -> None:
517 manifest = {"src/main.py": fake_id("h1"), "src/util.py": fake_id("h2")}
518 dirs = ["src"]
519 sid = compute_snapshot_id(manifest, dirs)
520 rec = SnapshotRecord(snapshot_id=sid, manifest=manifest, directories=dirs)
521 write_snapshot(store, rec)
522
523 loaded = read_snapshot(store, sid)
524 assert loaded is not None
525 assert loaded.directories == ["src"]
526
527 def test_roundtrip_empty_directories(self, store: pathlib.Path) -> None:
528 manifest = {"f.py": fake_id("h1")}
529 sid = compute_snapshot_id(manifest, [])
530 rec = SnapshotRecord(snapshot_id=sid, manifest=manifest, directories=[])
531 write_snapshot(store, rec)
532 loaded = read_snapshot(store, sid)
533 assert loaded is not None
534 assert loaded.directories == []
535
536 def test_roundtrip_deeply_nested_dirs(self, store: pathlib.Path) -> None:
537 manifest = {"a/b/c/d.py": fake_id("h1")}
538 dirs = directories_from_manifest(manifest)
539 sid = compute_snapshot_id(manifest, dirs)
540 rec = SnapshotRecord(snapshot_id=sid, manifest=manifest, directories=dirs)
541 write_snapshot(store, rec)
542 loaded = read_snapshot(store, sid)
543 assert loaded is not None
544 assert loaded.directories == ["a", "a/b", "a/b/c"]
545
546 def test_snapshot_id_includes_dirs_in_verification(self, store: pathlib.Path) -> None:
547 """read_snapshot verifies the stored ID — tampering with dirs must fail."""
548 manifest = {"f.py": fake_id("h1")}
549 dirs = ["src"]
550 sid = compute_snapshot_id(manifest, dirs)
551 rec = SnapshotRecord(snapshot_id=sid, manifest=manifest, directories=dirs)
552 write_snapshot(store, rec)
553
554 # Compute ID without dirs — must be different
555 sid_no_dirs = compute_snapshot_id(manifest, [])
556 assert sid != sid_no_dirs
557
558 def test_directory_rename_produces_different_snapshot_id(self, store: pathlib.Path) -> None:
559 manifest = {"f.py": fake_id("h1")}
560 id_old = compute_snapshot_id(manifest, ["old_name"])
561 id_new = compute_snapshot_id(manifest, ["new_name"])
562 assert id_old != id_new
563
564
565 # ===========================================================================
566 # 8. RenameOp TypedDict — unit
567 # ===========================================================================
568
569 class TestDirectoryRenameOp:
570 def test_construct_fields(self) -> None:
571 op = RenameOp(
572 op="rename",
573 address="new/path",
574 from_address="old/path",
575 )
576 assert op["op"] == "rename"
577 assert op["address"] == "new/path"
578 assert op["from_address"] == "old/path"
579
580 def test_rename_op_has_no_file_count(self) -> None:
581 import typing
582 hints = typing.get_type_hints(RenameOp)
583 assert "file_count" not in hints
584
585
586 # ===========================================================================
587 # 9. CodePlugin.diff directory rename detection — integration
588 # ===========================================================================
589
590 class TestCodePluginDiffDirectories:
591 @pytest.fixture()
592 def plugin(self) -> CodePlugin:
593 from muse.plugins.code.plugin import CodePlugin
594 return CodePlugin()
595
596 def _snap(self, files: Manifest, dirs: list[str] | None = None) -> SnapshotManifest:
597 d = dirs if dirs is not None else directories_from_manifest(files)
598 return SnapshotManifest(files=files, domain="code", directories=d)
599
600 def test_directory_rename_emits_rename_op(self, plugin: CodePlugin) -> None:
601 base = self._snap({"src/a.py": "h1", "src/b.py": "h2"}, ["src"])
602 target = self._snap({"lib/a.py": "h1", "lib/b.py": "h2"}, ["lib"])
603 delta = plugin.diff(base, target)
604 ops = delta["ops"]
605 dir_rename_ops = [o for o in ops if o["op"] == "rename"]
606 assert len(dir_rename_ops) == 1
607 assert dir_rename_ops[0]["from_address"] == "src/"
608 assert dir_rename_ops[0]["address"] == "lib/"
609
610 def test_directory_rename_suppresses_file_level_ops(self, plugin: CodePlugin) -> None:
611 base = self._snap({"src/a.py": "h1"}, ["src"])
612 target = self._snap({"lib/a.py": "h1"}, ["lib"])
613 delta = plugin.diff(base, target)
614 ops = delta["ops"]
615 # No plain insert/delete for the covered file paths
616 file_ops = [o for o in ops if o["op"] in ("insert", "delete") and "/" in o["address"]]
617 assert not any(o["address"] in ("src/a.py", "lib/a.py") for o in file_ops)
618
619 def test_plain_added_dir_emits_insert_op(self, plugin: CodePlugin) -> None:
620 base = self._snap({}, [])
621 target = self._snap({"new/f.py": "h1"}, ["new"])
622 delta = plugin.diff(base, target)
623 ops = delta["ops"]
624 insert_dir_ops = [o for o in ops if o["op"] == "insert" and o["address"] == "new/"]
625 assert len(insert_dir_ops) == 1
626
627 def test_plain_deleted_dir_emits_delete_op(self, plugin: CodePlugin) -> None:
628 base = self._snap({"old/f.py": "h1"}, ["old"])
629 target = self._snap({}, [])
630 delta = plugin.diff(base, target)
631 ops = delta["ops"]
632 delete_dir_ops = [o for o in ops if o["op"] == "delete" and o["address"] == "old/"]
633 assert len(delete_dir_ops) == 1
634
635 def test_no_dir_changes_no_dir_ops(self, plugin: CodePlugin) -> None:
636 base = self._snap({"src/a.py": "h1"}, ["src"])
637 target = self._snap({"src/a.py": "h2"}, ["src"])
638 delta = plugin.diff(base, target)
639 ops = delta["ops"]
640 dir_ops = [o for o in ops if o["op"] == "rename" or
641 (o["op"] in ("insert", "delete") and "::" not in o["address"] and "/" not in o["address"])]
642 assert not any(o["op"] == "rename" for o in dir_ops)
643
644 def test_no_directories_field_no_crash(self, plugin: CodePlugin) -> None:
645 # Snapshots without the directories key should not crash
646 base = SnapshotManifest(files={"a.py": "h1"}, domain="code", directories=[])
647 target = SnapshotManifest(files={"b.py": "h1"}, domain="code", directories=[])
648 delta = plugin.diff(base, target)
649 assert "ops" in delta
650
651
652 # ===========================================================================
653 # 10. delta_summary directory rename counting — unit
654 # ===========================================================================
655
656 class TestDeltaSummaryDirectories:
657 def _make_dir_rename_op(self, old: str, new: str) -> RenameOp:
658 return RenameOp(op="rename", address=new, from_address=old)
659
660 def test_no_changes_returns_no_changes(self) -> None:
661 from muse.plugins.code.symbol_diff import delta_summary
662 assert delta_summary([]) == "no changes"
663
664 def test_single_directory_rename(self) -> None:
665 from muse.plugins.code.symbol_diff import delta_summary
666 ops = [self._make_dir_rename_op("old", "new")]
667 result = delta_summary(ops)
668 assert "renamed" in result
669
670 def test_two_directory_renames_plural(self) -> None:
671 from muse.plugins.code.symbol_diff import delta_summary
672 ops = [
673 self._make_dir_rename_op("a", "x"),
674 self._make_dir_rename_op("b", "y"),
675 ]
676 result = delta_summary(ops)
677 assert "2" in result
678 assert "renamed" in result
679
680 def test_directory_rename_combined_with_file_ops(self) -> None:
681 from muse.plugins.code.symbol_diff import delta_summary
682 from muse.domain import InsertOp
683 insert_op = InsertOp(
684 op="insert", address="new_file.py",
685 position=None, content_id="h1", content_summary="",
686 )
687 rename_op = self._make_dir_rename_op("src", "lib")
688 result = delta_summary([insert_op, rename_op])
689 assert "added" in result
690 assert "renamed" in result
691
692 def test_directory_rename_not_counted_as_file(self) -> None:
693 from muse.plugins.code.symbol_diff import delta_summary
694 ops = [self._make_dir_rename_op("old", "new")]
695 result = delta_summary(ops)
696 assert "1 added" not in result
697 assert "1 removed" not in result
698
699
700 # ===========================================================================
701 # 11. replay_one propagates directories — integration
702 # ===========================================================================
703
704 class TestReplayOneWithDirectories:
705 def _write_obj(self, store: pathlib.Path, content: bytes) -> str:
706 from muse.core.object_store import write_object
707 oid = blob_id(content)
708 write_object(store, oid, content)
709 return oid
710
711 def test_clean_merge_produces_snapshot_with_dirs(self, store: pathlib.Path) -> None:
712 from muse.core.rebase import replay_one
713 from muse.plugins.code.plugin import CodePlugin
714
715 plugin = CodePlugin()
716 domain = "code"
717
718 # Write actual file objects so apply_manifest can restore them
719 oid_a = self._write_obj(store, b"# a.py content\n")
720 oid_b = self._write_obj(store, b"# b.py content\n")
721
722 # Create a base commit: one file in src/
723 base_manifest = {"src/a.py": oid_a}
724 base_dirs = directories_from_manifest(base_manifest)
725 base_snap = _make_snap(store, base_manifest, base_dirs)
726 base_commit = _make_commit_rec(store, base_snap, message="base")
727
728 # Create "theirs" commit: adds src/b.py (same parent = base)
729 theirs_manifest = {"src/a.py": oid_a, "src/b.py": oid_b}
730 theirs_dirs = directories_from_manifest(theirs_manifest)
731 theirs_snap = _make_snap(store, theirs_manifest, theirs_dirs)
732 theirs_commit = _make_commit_rec(
733 store, theirs_snap, parent_id=base_commit.commit_id, message="theirs"
734 )
735
736 # replay theirs_commit on top of base_commit (onto = base)
737 result = replay_one(
738 root=store,
739 commit=theirs_commit,
740 parent_id=base_commit.commit_id,
741 plugin=plugin,
742 domain=domain,
743 branch="main",
744 )
745
746 assert isinstance(result, CommitRecord), f"Expected CommitRecord, got: {result}"
747 replayed_snap = read_snapshot(store, result.snapshot_id)
748 assert replayed_snap is not None
749 assert replayed_snap.directories == ["src"]
750
751 def test_conflict_returns_path_list_not_commit(self, store: pathlib.Path) -> None:
752 from muse.core.rebase import replay_one
753 from muse.plugins.code.plugin import CodePlugin
754
755 plugin = CodePlugin()
756
757 oid_v1 = self._write_obj(store, b"version 1\n")
758 oid_v2 = self._write_obj(store, b"version 2\n")
759 oid_v3 = self._write_obj(store, b"version 3\n")
760
761 # base: file a.py = v1
762 base_manifest = {"a.py": oid_v1}
763 base_snap = _make_snap(store, base_manifest)
764 base_commit = _make_commit_rec(store, base_snap, message="base")
765
766 # "theirs" modifies a.py from v1 → v2
767 theirs_manifest = {"a.py": oid_v2}
768 theirs_snap = _make_snap(store, theirs_manifest)
769 theirs_commit = _make_commit_rec(
770 store, theirs_snap, parent_id=base_commit.commit_id, message="theirs"
771 )
772
773 # "ours" (parent_id in replay) also modified a.py from v1 → v3 (conflict)
774 ours_manifest = {"a.py": oid_v3}
775 ours_snap = _make_snap(store, ours_manifest)
776 ours_commit = _make_commit_rec(store, ours_snap, message="ours")
777
778 result = replay_one(
779 root=store,
780 commit=theirs_commit,
781 parent_id=ours_commit.commit_id,
782 plugin=plugin,
783 domain="code",
784 branch="main",
785 )
786
787 # Should return conflict paths, not a CommitRecord
788 assert isinstance(result, list)
789
790
791 # ===========================================================================
792 # 12. E2E workflow — full CLI commit/branch/rename/merge cycle
793 # ===========================================================================
794
795 def _muse(repo: pathlib.Path, *args: str) -> subprocess.CompletedProcess[str]:
796 import shutil
797 import sys
798 # Prefer a .venv installation (development mode), fall back to the
799 # interpreter's sibling or the PATH-resolved binary.
800 venv_muse = pathlib.Path(__file__).parent.parent / ".venv" / "bin" / "muse"
801 if venv_muse.exists():
802 muse_bin = str(venv_muse)
803 else:
804 sibling = pathlib.Path(sys.executable).parent / "muse"
805 muse_bin = str(sibling) if sibling.exists() else (shutil.which("muse") or "muse")
806 return subprocess.run(
807 [muse_bin, *args],
808 cwd=str(repo),
809 capture_output=True,
810 text=True,
811 )
812
813
814 class TestDirectoriesE2EWorkflow:
815 @pytest.fixture()
816 def repo(self, tmp_path: pathlib.Path) -> pathlib.Path:
817 result = _muse(tmp_path, "init")
818 assert result.returncode == 0, result.stderr
819 return tmp_path
820
821 def test_commit_records_directories(self, repo: pathlib.Path) -> None:
822 (repo / "src").mkdir()
823 (repo / "src" / "main.py").write_text("x = 1\n")
824 r = _muse(repo, "commit", "-m", "add src/main.py")
825 assert r.returncode == 0, r.stderr
826
827 # Read the snapshot from store and confirm directories is populated
828 from muse.core.commits import (
829 get_head_snapshot_id,
830 read_commit,
831 )
832 from muse.core.refs import read_current_branch
833 branch = read_current_branch(repo)
834 snap_id = get_head_snapshot_id(repo, branch)
835 assert snap_id is not None
836 snap = read_snapshot(repo, snap_id)
837 assert snap is not None
838 assert "src" in snap.directories
839
840 def test_snapshot_id_changes_on_dir_rename(self, repo: pathlib.Path) -> None:
841 (repo / "old_name").mkdir()
842 (repo / "old_name" / "f.py").write_text("x = 1\n")
843 _muse(repo, "commit", "-m", "add old_name/")
844
845 from muse.core.refs import read_current_branch
846 from muse.core.commits import get_head_snapshot_id
847 branch = read_current_branch(repo)
848 sid_before = get_head_snapshot_id(repo, branch)
849
850 # Simulate rename: remove old dir, create new dir with same content
851 import shutil
852 shutil.move(str(repo / "old_name"), str(repo / "new_name"))
853 _muse(repo, "code", "add", ".")
854 _muse(repo, "commit", "-m", "rename dir")
855
856 sid_after = get_head_snapshot_id(repo, branch)
857 assert sid_before != sid_after
858
859 def test_status_handles_directory_rename_op(self, repo: pathlib.Path) -> None:
860 (repo / "src").mkdir()
861 (repo / "src" / "app.py").write_text("app = True\n")
862 _muse(repo, "commit", "-m", "initial")
863
864 import shutil
865 shutil.move(str(repo / "src"), str(repo / "lib"))
866
867 r = _muse(repo, "status")
868 assert r.returncode == 0, r.stderr
869
870 def test_nested_directories_tracked_through_commit(self, repo: pathlib.Path) -> None:
871 deep = repo / "a" / "b" / "c"
872 deep.mkdir(parents=True)
873 (deep / "f.py").write_text("pass\n")
874 r = _muse(repo, "commit", "-m", "deep nest")
875 assert r.returncode == 0, r.stderr
876
877 from muse.core.refs import read_current_branch
878 from muse.core.commits import get_head_snapshot_id
879 branch = read_current_branch(repo)
880 snap = read_snapshot(repo, get_head_snapshot_id(repo, branch))
881 assert snap is not None
882 assert "a" in snap.directories
883 assert "a/b" in snap.directories
884 assert "a/b/c" in snap.directories
885
886
887 # ===========================================================================
888 # 13. Empty directory ghost bug
889 #
890 # Regression tests for: empty directories left on disk after their files are
891 # deleted and committed must NOT appear in `muse status --json` `added`.
892 #
893 # Root cause: CodePlugin.snapshot() recorded every directory visited by
894 # os.walk() into `dirs`, including empty ones. These empty dirs had no
895 # counterpart in HEAD, so diff() produced InsertOp entries for them,
896 # and status --json reported them as `added`.
897 # ===========================================================================
898
899 class TestEmptyDirectoryGhost:
900 """Empty orphan directories must not appear as 'added' in muse status."""
901
902 @pytest.fixture()
903 def repo(self, tmp_path: pathlib.Path) -> pathlib.Path:
904 result = _muse(tmp_path, "init")
905 assert result.returncode == 0, result.stderr
906 return tmp_path
907
908 # ── Unit: snapshot() must not include empty dirs ──────────────────────────
909
910 def test_snapshot_excludes_empty_directory(self, tmp_path: pathlib.Path) -> None:
911 """CodePlugin.snapshot() must not list a directory that has no files."""
912 from muse.plugins.code.plugin import CodePlugin
913 _muse(tmp_path, "init")
914 plugin = CodePlugin()
915
916 # Empty nested directory — no files, no .musekeep
917 (tmp_path / "empty_pkg" / "sub").mkdir(parents=True)
918
919 snap = plugin.snapshot(tmp_path)
920 assert "empty_pkg" not in snap["directories"], (
921 "Empty directory 'empty_pkg' must not appear in snapshot directories"
922 )
923 assert "empty_pkg/sub" not in snap["directories"], (
924 "Empty nested directory 'empty_pkg/sub' must not appear in snapshot directories"
925 )
926
927 def test_snapshot_includes_dir_with_files(self, tmp_path: pathlib.Path) -> None:
928 """Directories containing files must still appear in the snapshot."""
929 from muse.plugins.code.plugin import CodePlugin
930 _muse(tmp_path, "init")
931 plugin = CodePlugin()
932
933 (tmp_path / "pkg").mkdir()
934 (tmp_path / "pkg" / "mod.py").write_text("x = 1\n")
935
936 snap = plugin.snapshot(tmp_path)
937 assert "pkg" in snap["directories"]
938
939 # ── Integration: status --json must not list orphan empty dirs ────────────
940
941 def test_status_does_not_report_never_committed_empty_dir(self, repo: pathlib.Path) -> None:
942 """An empty directory that was never committed must not appear in added."""
943 (repo / "orphan" / "nested").mkdir(parents=True)
944 # No files, never committed
945
946 r = _muse(repo, "status", "--json")
947 assert r.returncode == 0, r.stderr
948 data = json.loads(r.stdout)
949
950 assert "orphan" not in data["added"], (
951 "Untracked empty directory 'orphan' must not appear as added"
952 )
953 assert "orphan/nested" not in data["added"], (
954 "Untracked empty nested directory must not appear as added"
955 )
956
957 def test_status_reports_added_for_dir_with_new_file(self, repo: pathlib.Path) -> None:
958 """A new directory containing a real file must still appear as added."""
959 (repo / "new_pkg").mkdir()
960 (repo / "new_pkg" / "api.py").write_text("pass\n")
961
962 r = _muse(repo, "status", "--json")
963 assert r.returncode == 0, r.stderr
964 data = json.loads(r.stdout)
965
966 # The file should be added (the directory entry itself may or may not be
967 # in added — what matters is the file is visible and dirs without files are not)
968 all_visible = data["added"] + data["untracked"]
969 assert any("new_pkg" in p for p in all_visible), (
970 "New directory with a file should be reflected in added or untracked"
971 )
972
973
974 # ===========================================================================
975 # 13. Stress / performance
976 # ===========================================================================
977
978 class TestDirectoriesStress:
979 def test_directories_from_manifest_1000_files(self) -> None:
980 manifest = {
981 f"pkg_{i}/sub_{j}/file_{k}.py": f"hash{i}{j}{k}"
982 for i in range(10)
983 for j in range(10)
984 for k in range(10)
985 }
986 assert len(manifest) == 1000
987 start = time.monotonic()
988 dirs = directories_from_manifest(manifest)
989 elapsed = time.monotonic() - start
990 # Should complete in under 1 second for 1000 files
991 assert elapsed < 1.0, f"directories_from_manifest took {elapsed:.3f}s for 1000 files"
992 # 10 top-level dirs (pkg_0..9) + 100 second-level dirs (pkg_N/sub_M) = 110
993 assert len(dirs) == 110
994
995 def test_detect_directory_renames_50_dirs(self) -> None:
996 # 50 dirs each with 5 files, all renamed old_N → new_N
997 last: Manifest = {}
998 current: Manifest = {}
999 for i in range(50):
1000 for j in range(5):
1001 h = blob_id(f"content_{i}_{j}".encode())
1002 last[f"old_{i}/file_{j}.py"] = h
1003 current[f"new_{i}/file_{j}.py"] = h
1004
1005 deleted = {f"old_{i}" for i in range(50)}
1006 added = {f"new_{i}" for i in range(50)}
1007
1008 start = time.monotonic()
1009 renames = detect_directory_renames(deleted, added, last, current)
1010 elapsed = time.monotonic() - start
1011
1012 assert elapsed < 2.0, f"detect_directory_renames took {elapsed:.3f}s for 50 dirs"
1013 assert len(renames) == 50
1014
1015 def test_compute_snapshot_id_large_dir_list(self) -> None:
1016 manifest = {f"f_{i}.py": fake_id(f"h{i}") for i in range(500)}
1017 dirs = [f"dir_{i}" for i in range(500)]
1018 start = time.monotonic()
1019 sid = compute_snapshot_id(manifest, dirs)
1020 elapsed = time.monotonic() - start
1021 assert elapsed < 1.0, f"compute_snapshot_id took {elapsed:.3f}s for 500 dirs"
1022 assert len(sid) == 71
1023
1024 def test_walk_workdir_with_dirs_deep_tree(self, tmp_path: pathlib.Path) -> None:
1025 # 20 levels of nesting
1026 deep = tmp_path
1027 for level in range(20):
1028 deep = deep / f"level_{level}"
1029 deep.mkdir()
1030 (deep / "leaf.py").write_bytes(b"x")
1031
1032 start = time.monotonic()
1033 files, dirs = walk_workdir_with_dirs(tmp_path)
1034 elapsed = time.monotonic() - start
1035
1036 assert elapsed < 2.0, f"walk_workdir_with_dirs took {elapsed:.3f}s on 20-level tree"
1037 assert "leaf.py" in "".join(files.keys())
1038 assert len(dirs) == 20
1039
1040
1041 # ===========================================================================
1042 # 14. Security
1043 # ===========================================================================
1044
1045 class TestDirectoriesSecurity:
1046 def test_path_traversal_in_directory_address_not_resolved(self) -> None:
1047 # directories_from_manifest should treat path components literally
1048 manifest = {"../../etc/shadow": "h1"}
1049 dirs = directories_from_manifest(manifest)
1050 # The result should contain "../.." and "../../etc" literally, not resolve them
1051 # The important thing: no OS path resolution happens
1052 for d in dirs:
1053 assert not pathlib.Path(d).is_absolute()
1054
1055 def test_null_byte_in_directory_path_handled(self) -> None:
1056 # Null bytes in paths are unusual but should not crash
1057 manifest = {"src\x00/malicious.py": fake_id("h1")}
1058 try:
1059 dirs = directories_from_manifest(manifest)
1060 sid = compute_snapshot_id(manifest, dirs)
1061 assert len(sid) == 71
1062 except (ValueError, TypeError):
1063 pass # rejecting is also acceptable
1064
1065 def test_very_long_directory_path(self) -> None:
1066 long_name = "a" * 4096
1067 manifest = {f"{long_name}/f.py": fake_id("h1")}
1068 dirs = directories_from_manifest(manifest)
1069 assert dirs == [long_name]
1070 sid = compute_snapshot_id(manifest, dirs)
1071 assert len(sid) == 71
1072
1073 def test_symlinked_dir_not_followed_during_walk(self, tmp_path: pathlib.Path) -> None:
1074 sensitive = tmp_path / "sensitive"
1075 sensitive.mkdir()
1076 (sensitive / "secret.txt").write_bytes(b"SECRET")
1077
1078 repo_root = tmp_path / "repo"
1079 repo_root.mkdir()
1080 link = repo_root / "malicious_link"
1081 link.symlink_to(sensitive)
1082
1083 files, dirs = walk_workdir_with_dirs(repo_root)
1084 assert "malicious_link/secret.txt" not in files
1085
1086 def test_snapshot_record_with_adversarial_dirs_survives_roundtrip(self, store: pathlib.Path) -> None:
1087 # Adversarial: dirs containing special characters
1088 dirs = ["src", "src/sub dir", "a-b_c.d"]
1089 manifest = {"src/f.py": fake_id("h1")}
1090 sid = compute_snapshot_id(manifest, dirs)
1091 rec = SnapshotRecord(snapshot_id=sid, manifest=manifest, directories=dirs)
1092 # to_dict / from_dict roundtrip
1093 loaded = SnapshotRecord.from_dict(rec.to_dict())
1094 assert loaded.directories == dirs
1095
1096 def test_detect_directory_renames_no_prefix_confusion(self) -> None:
1097 # "a" should not confuse files under "ab/" as being under "a/"
1098 # because the prefix check uses "a/" (with trailing slash)
1099 last = {"a/f.py": "h1"}
1100 current = {"ab/f.py": "h1"}
1101 # "ab/f.py" does NOT start with "a/" so old_files under "a/" = {"f.py": "h1"}
1102 # but new_files under "ab/" = {"f.py": "h1"} — these DO match, so rename is detected
1103 # (which is correct: the file genuinely moved from a/ to ab/)
1104 renames = detect_directory_renames({"a"}, {"ab"}, last, current)
1105 assert renames == [("a", "ab")]
1106
1107 def test_detect_directory_renames_prefix_does_not_bleed_across_siblings(self) -> None:
1108 # "models" should never absorb files from "models_v2" in the source manifest
1109 # when looking at what files belong to "models/"
1110 last = {"models/user.py": "h1", "models_v2/user.py": "h2"}
1111 # Both dirs deleted, one new dir added with only models_v2's content
1112 current = {"new_home/user.py": "h2"}
1113 renames = detect_directory_renames({"models", "models_v2"}, {"new_home"}, last, current)
1114 # "new_home" has {"user.py": "h2"} which matches "models_v2/" not "models/"
1115 assert ("models_v2", "new_home") in renames
1116 assert ("models", "new_home") not in renames
File History 2 commits
sha256:1d3f5470f45db58e32047678debc9438fdded1b2c7332cc743d2b8be32fdafc8 fixing more broken tests Human patch 2 days ago
sha256:a154bc65916614c833d5a40a10d81ba3eae0d0495b0afddd34dc34f18d5e91b8 fix: test suite alignment and typing audit — zero violations Sonnet 4.6 minor 10 days ago