gabriel / muse public
test_bridge_git_import.py python
917 lines 34.5 KB
Raw
sha256:37ed8fecaafe7d683537a0182d49132c06dcef1f9016d218281831d30d58ed01 refactor(store): Phase 2 — extract ref and HEAD management … Sonnet 4.6 minor ⚠ breaking 29 days ago
1 """Phase 2 TDD tests for ``muse bridge git-import``.
2
3 Tests are organised into eight tiers:
4
5 Tier 1 — Shape/Schema flag presence, dry-run, exclude defaults
6 Tier 2 — Round-Trip full import integration tests
7 Tier 3 — Edge Cases empty repos, bad refs, LFS, conventional commits
8 Tier 4 — Stress 100-commit import
9 Tier 5 — Data Integrity SHA-256 correctness, determinism, deduplication
10 Tier 6 — Performance time gates
11 Tier 7 — Security ANSI stripping, path traversal, bad handles
12 Tier 8 — Docstrings implementation docstrings present
13
14 NOTE: git subprocess calls in this file are INTENTIONAL — they create real
15 git repositories used as import sources. The bridge command itself converts
16 those into Muse commits. The muse codebase otherwise never uses git.
17 """
18
19 from __future__ import annotations
20
21 import json
22 import os
23 import pathlib
24 import subprocess
25 import time
26
27 import pytest
28
29 from tests.cli_test_helper import CliRunner
30 from muse.core.paths import git_bridge_state_path, logs_dir, objects_dir
31
32 runner = CliRunner()
33
34
35 # ---------------------------------------------------------------------------
36 # Helpers
37 # ---------------------------------------------------------------------------
38
39 def _invoke(*args: str, cwd: pathlib.Path | None = None) -> "CliRunner":
40 """Invoke the muse CLI from *cwd* (or CWD if None)."""
41 return runner.invoke(None, list(args), cwd=cwd)
42
43
44 def _make_git_repo(path: pathlib.Path, commits: list[dict]) -> pathlib.Path:
45 """Create a real git repo with the given commits.
46
47 Each commit dict:
48 files: {relative_path: content_str}
49 message: commit message string
50 author_email: (optional) author email
51 author_name: (optional) author name
52 """
53 subprocess.run(["git", "init", str(path)], check=True, capture_output=True)
54 subprocess.run(
55 ["git", "-C", str(path), "config", "user.email", "[email protected]"],
56 check=True, capture_output=True,
57 )
58 subprocess.run(
59 ["git", "-C", str(path), "config", "user.name", "Test User"],
60 check=True, capture_output=True,
61 )
62 for commit in commits:
63 for filepath, content in commit["files"].items():
64 full = path / filepath
65 full.parent.mkdir(parents=True, exist_ok=True)
66 full.write_text(content)
67 subprocess.run(["git", "-C", str(path), "add", "."], check=True, capture_output=True)
68 email = commit.get("author_email", "[email protected]")
69 name = commit.get("author_name", "Test User")
70 env = {
71 **os.environ,
72 "GIT_AUTHOR_EMAIL": email,
73 "GIT_AUTHOR_NAME": name,
74 "GIT_COMMITTER_EMAIL": email,
75 "GIT_COMMITTER_NAME": name,
76 }
77 subprocess.run(
78 ["git", "-C", str(path), "commit", "-m", commit["message"]],
79 check=True, capture_output=True, env=env,
80 )
81 return path
82
83
84 def _make_muse_repo(path: pathlib.Path) -> pathlib.Path:
85 """Initialise a Muse repository at *path* using the CLI."""
86 path.mkdir(parents=True, exist_ok=True)
87 result = _invoke("init", cwd=path)
88 assert result.exit_code == 0, f"muse init failed: {result.stderr}"
89 return path
90
91
92 def _get_muse_log(muse_root: pathlib.Path) -> list[dict]:
93 """Return the muse log as a list of commit dicts."""
94 result = _invoke("log", "--json", cwd=muse_root)
95 if result.exit_code != 0:
96 return []
97 try:
98 data = json.loads(result.output.strip())
99 return data.get("commits", [])
100 except json.JSONDecodeError:
101 return []
102
103
104 def _get_muse_branches(muse_root: pathlib.Path) -> list[str]:
105 """Return list of branch names in the muse repo."""
106 result = _invoke("branch", "--json", cwd=muse_root)
107 if result.exit_code != 0:
108 return []
109 try:
110 data = json.loads(result.output.strip())
111 if isinstance(data, list):
112 return [b["name"] for b in data]
113 return []
114 except (json.JSONDecodeError, KeyError):
115 return []
116
117
118 # ---------------------------------------------------------------------------
119 # Tier 1 — Shape/Schema
120 # ---------------------------------------------------------------------------
121
122 class TestSchemaFlags:
123 """Flag presence and output shape validation."""
124
125 def test_help_contains_incremental_flag(self) -> None:
126 result = _invoke("bridge", "git-import", "--help")
127 assert "--incremental" in result.output
128
129 def test_help_contains_attribution_map(self) -> None:
130 result = _invoke("bridge", "git-import", "--help")
131 assert "--attribution-map" in result.output
132
133 def test_help_contains_import_tags(self) -> None:
134 result = _invoke("bridge", "git-import", "--help")
135 assert "--import-tags" in result.output
136
137 def test_dry_run_writes_nothing(self, tmp_path: pathlib.Path) -> None:
138 git_dir = tmp_path / "git_repo"
139 muse_dir = tmp_path / "muse_repo"
140 _make_git_repo(git_dir, [{"files": {"a.txt": "hello"}, "message": "init"}])
141 _make_muse_repo(muse_dir)
142
143 result = _invoke(
144 "bridge", "git-import", str(git_dir),
145 "--target", str(muse_dir),
146 "--dry-run",
147 cwd=muse_dir,
148 )
149 assert result.exit_code == 0
150
151 # Dry-run must not write any commits to the object store
152 assert _get_muse_log(muse_dir) == [], "dry-run wrote commits to the object store"
153
154 def test_json_output_valid_ndjson(self, tmp_path: pathlib.Path) -> None:
155 git_dir = tmp_path / "git_repo"
156 muse_dir = tmp_path / "muse_repo"
157 _make_git_repo(git_dir, [{"files": {"a.txt": "hello"}, "message": "init"}])
158 _make_muse_repo(muse_dir)
159
160 result = _invoke(
161 "bridge", "git-import", str(git_dir),
162 "--target", str(muse_dir),
163 "--json",
164 cwd=muse_dir,
165 )
166 assert result.exit_code == 0
167 for line in result.output.strip().splitlines():
168 if line.strip():
169 json.loads(line) # raises if invalid JSON
170
171 def test_default_excludes_cover_git_dir(self) -> None:
172 from muse.cli.commands.bridge import _should_exclude
173 assert _should_exclude(".git/config") is True
174 assert _should_exclude(".git/COMMIT_EDITMSG") is True
175
176 def test_default_excludes_cover_node_modules(self) -> None:
177 from muse.cli.commands.bridge import _should_exclude
178 assert _should_exclude("node_modules/lodash/index.js") is True
179
180 def test_default_excludes_cover_pyc(self) -> None:
181 from muse.cli.commands.bridge import _should_exclude
182 assert _should_exclude("src/__pycache__/foo.cpython-312.pyc") is True
183
184 def test_default_excludes_cover_venv(self) -> None:
185 from muse.cli.commands.bridge import _should_exclude
186 assert _should_exclude(".venv/lib/python3.12/site-packages/pip/__init__.py") is True
187
188 def test_non_excluded_path(self) -> None:
189 from muse.cli.commands.bridge import _should_exclude
190 assert _should_exclude("src/main.py") is False
191 assert _should_exclude("README.md") is False
192
193
194 # ---------------------------------------------------------------------------
195 # Tier 2 — Round-Trip / Integration
196 # ---------------------------------------------------------------------------
197
198 class TestRoundTrip:
199 """Full git → muse import round trips."""
200
201 def test_import_3_commits_creates_3_muse_commits(self, tmp_path: pathlib.Path) -> None:
202 git_dir = tmp_path / "git_repo"
203 muse_dir = tmp_path / "muse_repo"
204 _make_git_repo(git_dir, [
205 {"files": {"a.py": "x=1"}, "message": "first"},
206 {"files": {"b.py": "y=2"}, "message": "second"},
207 {"files": {"c.py": "z=3"}, "message": "third"},
208 ])
209 _make_muse_repo(muse_dir)
210
211 result = _invoke(
212 "bridge", "git-import", str(git_dir),
213 "--target", str(muse_dir),
214 cwd=muse_dir,
215 )
216 assert result.exit_code == 0
217
218 commits = _get_muse_log(muse_dir)
219 assert len(commits) == 3, f"Expected 3 commits, got {len(commits)}: {commits}"
220
221 def test_import_creates_expected_branch(self, tmp_path: pathlib.Path) -> None:
222 git_dir = tmp_path / "git_repo"
223 muse_dir = tmp_path / "muse_repo"
224 _make_git_repo(git_dir, [{"files": {"a.py": "x=1"}, "message": "init"}])
225 _make_muse_repo(muse_dir)
226
227 result = _invoke(
228 "bridge", "git-import", str(git_dir),
229 "--target", str(muse_dir),
230 cwd=muse_dir,
231 )
232 assert result.exit_code == 0
233 branches = _get_muse_branches(muse_dir)
234 # Should have imported to main or master
235 assert any(b in ("main", "master") for b in branches), f"branches: {branches}"
236
237 def test_import_2_branches(self, tmp_path: pathlib.Path) -> None:
238 git_dir = tmp_path / "git_repo"
239 muse_dir = tmp_path / "muse_repo"
240 _make_git_repo(git_dir, [{"files": {"a.py": "x=1"}, "message": "init"}])
241
242 # Create a second branch in git
243 subprocess.run(["git", "-C", str(git_dir), "checkout", "-b", "develop"], check=True, capture_output=True)
244 (git_dir / "b.py").write_text("y=2")
245 subprocess.run(["git", "-C", str(git_dir), "add", "."], check=True, capture_output=True)
246 subprocess.run(
247 ["git", "-C", str(git_dir), "commit", "-m", "dev commit"],
248 check=True, capture_output=True,
249 env={**os.environ, "GIT_AUTHOR_EMAIL": "[email protected]", "GIT_AUTHOR_NAME": "T",
250 "GIT_COMMITTER_EMAIL": "[email protected]", "GIT_COMMITTER_NAME": "T"},
251 )
252
253 _make_muse_repo(muse_dir)
254 result = _invoke(
255 "bridge", "git-import", str(git_dir),
256 "--target", str(muse_dir),
257 "--all",
258 cwd=muse_dir,
259 )
260 assert result.exit_code == 0
261 branches = _get_muse_branches(muse_dir)
262 assert len(branches) >= 2, f"Expected >= 2 branches, got {branches}"
263
264 def test_incremental_import_only_imports_new(self, tmp_path: pathlib.Path) -> None:
265 git_dir = tmp_path / "git_repo"
266 muse_dir = tmp_path / "muse_repo"
267 _make_git_repo(git_dir, [
268 {"files": {"a.py": "x=1"}, "message": "first"},
269 {"files": {"b.py": "y=2"}, "message": "second"},
270 {"files": {"c.py": "z=3"}, "message": "third"},
271 ])
272 _make_muse_repo(muse_dir)
273
274 # First import
275 result = _invoke(
276 "bridge", "git-import", str(git_dir),
277 "--target", str(muse_dir),
278 cwd=muse_dir,
279 )
280 assert result.exit_code == 0
281 commits_after_first = _get_muse_log(muse_dir)
282 assert len(commits_after_first) == 3
283
284 # Add 2 more commits to git
285 for filepath, content, msg in [("d.py", "d=4", "fourth"), ("e.py", "e=5", "fifth")]:
286 (git_dir / filepath).write_text(content)
287 subprocess.run(["git", "-C", str(git_dir), "add", "."], check=True, capture_output=True)
288 subprocess.run(
289 ["git", "-C", str(git_dir), "commit", "-m", msg],
290 check=True, capture_output=True,
291 env={**os.environ, "GIT_AUTHOR_EMAIL": "[email protected]", "GIT_AUTHOR_NAME": "T",
292 "GIT_COMMITTER_EMAIL": "[email protected]", "GIT_COMMITTER_NAME": "T"},
293 )
294
295 # Incremental import
296 result = _invoke(
297 "bridge", "git-import", str(git_dir),
298 "--target", str(muse_dir),
299 "--incremental",
300 cwd=muse_dir,
301 )
302 assert result.exit_code == 0
303 commits_after_second = _get_muse_log(muse_dir)
304 assert len(commits_after_second) == 5, (
305 f"Expected 5 commits after incremental import, got {len(commits_after_second)}"
306 )
307
308 def test_attribution_map_applies(self, tmp_path: pathlib.Path) -> None:
309 git_dir = tmp_path / "git_repo"
310 muse_dir = tmp_path / "muse_repo"
311 attr_file = tmp_path / "attr.json"
312 attr_file.write_text(json.dumps({"[email protected]": "alice-muse"}))
313
314 _make_git_repo(git_dir, [
315 {
316 "files": {"a.py": "x=1"},
317 "message": "init",
318 "author_email": "[email protected]",
319 "author_name": "Alice",
320 }
321 ])
322 _make_muse_repo(muse_dir)
323
324 result = _invoke(
325 "bridge", "git-import", str(git_dir),
326 "--target", str(muse_dir),
327 "--attribution-map", str(attr_file),
328 cwd=muse_dir,
329 )
330 assert result.exit_code == 0
331
332 commits = _get_muse_log(muse_dir)
333 assert len(commits) == 1
334 # The author field should contain the mapped handle
335 assert "alice-muse" in commits[0].get("author", ""), (
336 f"Expected alice-muse in author, got: {commits[0]}"
337 )
338
339 def test_unmapped_email_gets_synthetic_handle(self, tmp_path: pathlib.Path) -> None:
340 git_dir = tmp_path / "git_repo"
341 muse_dir = tmp_path / "muse_repo"
342
343 _make_git_repo(git_dir, [
344 {
345 "files": {"a.py": "x=1"},
346 "message": "init",
347 "author_email": "[email protected]",
348 }
349 ])
350 _make_muse_repo(muse_dir)
351
352 result = _invoke(
353 "bridge", "git-import", str(git_dir),
354 "--target", str(muse_dir),
355 cwd=muse_dir,
356 )
357 assert result.exit_code == 0
358
359 commits = _get_muse_log(muse_dir)
360 assert len(commits) == 1
361 author = commits[0].get("author", "")
362 assert "git-import/" in author, f"Expected synthetic git-import/ handle, got: {author!r}"
363
364 def test_bridge_state_written_after_import(self, tmp_path: pathlib.Path) -> None:
365 git_dir = tmp_path / "git_repo"
366 muse_dir = tmp_path / "muse_repo"
367 _make_git_repo(git_dir, [{"files": {"a.py": "x=1"}, "message": "init"}])
368 _make_muse_repo(muse_dir)
369
370 result = _invoke(
371 "bridge", "git-import", str(git_dir),
372 "--target", str(muse_dir),
373 cwd=muse_dir,
374 )
375 assert result.exit_code == 0
376
377 state_file = git_bridge_state_path(muse_dir)
378 assert state_file.exists(), "git-bridge.toml was not written"
379 content = state_file.read_text()
380 assert "[last_import]" in content
381
382 def test_file_content_preserved(self, tmp_path: pathlib.Path) -> None:
383 git_dir = tmp_path / "git_repo"
384 muse_dir = tmp_path / "muse_repo"
385 expected_content = "# This is a test file\nresult = 42\n"
386 _make_git_repo(git_dir, [
387 {"files": {"src/calc.py": expected_content}, "message": "add calc"},
388 ])
389 _make_muse_repo(muse_dir)
390
391 result = _invoke(
392 "bridge", "git-import", str(git_dir),
393 "--target", str(muse_dir),
394 cwd=muse_dir,
395 )
396 assert result.exit_code == 0
397
398 # Read the snapshot manifest and find the object ID for src/calc.py
399 from muse.core.refs import write_branch_ref
400 from muse.core.paths import git_bridge_state_path, heads_dir
401 import tomllib
402
403 # Get HEAD commit
404 log_result = _invoke("log", "--json", cwd=muse_dir)
405 log_data = json.loads(log_result.output.strip())
406 commits = log_data.get("commits", [])
407 assert commits, "No commits in log"
408
409 snapshot_id = commits[0].get("snapshot_id", "")
410 assert snapshot_id, f"No snapshot_id in commit: {commits[0]}"
411
412 from muse.core.store import read_snapshot
413 snapshot = read_snapshot(muse_dir, snapshot_id)
414 assert snapshot is not None, "Could not read snapshot"
415
416 assert "src/calc.py" in snapshot.manifest, (
417 f"src/calc.py not in manifest: {list(snapshot.manifest.keys())}"
418 )
419
420 object_id = snapshot.manifest["src/calc.py"]
421 from muse.core.object_store import read_object
422 stored = read_object(muse_dir, object_id)
423 assert stored is not None
424 assert stored.decode() == expected_content
425
426
427 # ---------------------------------------------------------------------------
428 # Tier 3 — Edge Cases
429 # ---------------------------------------------------------------------------
430
431 class TestEdgeCases:
432 """Edge case handling."""
433
434 def test_empty_git_repo_exits_zero(self, tmp_path: pathlib.Path) -> None:
435 git_dir = tmp_path / "git_repo"
436 muse_dir = tmp_path / "muse_repo"
437 # Create a git repo but no commits
438 subprocess.run(["git", "init", str(git_dir)], check=True, capture_output=True)
439 subprocess.run(
440 ["git", "-C", str(git_dir), "config", "user.email", "[email protected]"],
441 check=True, capture_output=True,
442 )
443 subprocess.run(
444 ["git", "-C", str(git_dir), "config", "user.name", "T"],
445 check=True, capture_output=True,
446 )
447 _make_muse_repo(muse_dir)
448
449 result = _invoke(
450 "bridge", "git-import", str(git_dir),
451 "--target", str(muse_dir),
452 "--json",
453 cwd=muse_dir,
454 )
455 assert result.exit_code == 0
456 # Check the JSON output includes total_commits_written: 0
457 found_done = False
458 for line in result.output.strip().splitlines():
459 if not line.strip():
460 continue
461 obj = json.loads(line)
462 if obj.get("event") == "done":
463 found_done = True
464 assert obj.get("total_commits_written", -1) == 0
465 assert found_done, f"No done event in output: {result.output!r}"
466
467 def test_from_ref_nonexistent_exits_user_error(self, tmp_path: pathlib.Path) -> None:
468 git_dir = tmp_path / "git_repo"
469 muse_dir = tmp_path / "muse_repo"
470 _make_git_repo(git_dir, [{"files": {"a.py": "x"}, "message": "init"}])
471 _make_muse_repo(muse_dir)
472
473 result = _invoke(
474 "bridge", "git-import", str(git_dir),
475 "--target", str(muse_dir),
476 "--from-ref", "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef",
477 cwd=muse_dir,
478 )
479 # Should fail with USER_ERROR before writing anything
480 assert result.exit_code != 0
481
482 # Error path must not write any commits to the object store
483 assert _get_muse_log(muse_dir) == [], "from-ref with bad SHA wrote commits to the object store"
484
485 def test_incremental_no_bridge_state_does_full_import(self, tmp_path: pathlib.Path) -> None:
486 git_dir = tmp_path / "git_repo"
487 muse_dir = tmp_path / "muse_repo"
488 _make_git_repo(git_dir, [
489 {"files": {"a.py": "x=1"}, "message": "first"},
490 {"files": {"b.py": "y=2"}, "message": "second"},
491 ])
492 _make_muse_repo(muse_dir)
493
494 # No bridge state file — incremental should fall back to full import
495 result = _invoke(
496 "bridge", "git-import", str(git_dir),
497 "--target", str(muse_dir),
498 "--incremental",
499 cwd=muse_dir,
500 )
501 assert result.exit_code == 0
502 commits = _get_muse_log(muse_dir)
503 assert len(commits) == 2, f"Expected 2 commits, got {len(commits)}"
504
505 def test_lfs_pointer_skipped_with_lfs_skip(self, tmp_path: pathlib.Path) -> None:
506 git_dir = tmp_path / "git_repo"
507 muse_dir = tmp_path / "muse_repo"
508 lfs_pointer = (
509 "version https://git-lfs.github.com/spec/v1\n"
510 "oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393\n"
511 "size 12345\n"
512 )
513 _make_git_repo(git_dir, [
514 {"files": {"large_file.bin": lfs_pointer, "a.py": "x=1"}, "message": "add files"},
515 ])
516 _make_muse_repo(muse_dir)
517
518 result = _invoke(
519 "bridge", "git-import", str(git_dir),
520 "--target", str(muse_dir),
521 "--lfs-skip",
522 cwd=muse_dir,
523 )
524 assert result.exit_code == 0
525
526 # The LFS pointer file should not appear in the manifest
527 log_result = _invoke("log", "--json", cwd=muse_dir)
528 log_data = json.loads(log_result.output.strip())
529 commits = log_data.get("commits", [])
530 if commits:
531 from muse.core.store import read_snapshot
532 snapshot = read_snapshot(muse_dir, commits[0].get("snapshot_id", ""))
533 if snapshot:
534 assert "large_file.bin" not in snapshot.manifest, (
535 "LFS pointer should be excluded when --lfs-skip is set"
536 )
537
538 def test_excluded_file_not_in_manifest(self, tmp_path: pathlib.Path) -> None:
539 git_dir = tmp_path / "git_repo"
540 muse_dir = tmp_path / "muse_repo"
541 _make_git_repo(git_dir, [
542 {
543 "files": {
544 "src/main.py": "x=1",
545 "src/util.pyc": "\x00" * 16,
546 },
547 "message": "add files",
548 },
549 ])
550 _make_muse_repo(muse_dir)
551
552 result = _invoke(
553 "bridge", "git-import", str(git_dir),
554 "--target", str(muse_dir),
555 cwd=muse_dir,
556 )
557 assert result.exit_code == 0
558
559 log_result = _invoke("log", "--json", cwd=muse_dir)
560 log_data = json.loads(log_result.output.strip())
561 commits = log_data.get("commits", [])
562 if commits:
563 from muse.core.store import read_snapshot
564 snapshot = read_snapshot(muse_dir, commits[0].get("snapshot_id", ""))
565 if snapshot:
566 assert "src/util.pyc" not in snapshot.manifest, (
567 ".pyc files should be excluded by default"
568 )
569
570 def test_conventional_commit_feat_becomes_minor_bump(self, tmp_path: pathlib.Path) -> None:
571 git_dir = tmp_path / "git_repo"
572 muse_dir = tmp_path / "muse_repo"
573 _make_git_repo(git_dir, [
574 {"files": {"a.py": "x=1"}, "message": "feat: add awesome feature"},
575 ])
576 _make_muse_repo(muse_dir)
577
578 result = _invoke(
579 "bridge", "git-import", str(git_dir),
580 "--target", str(muse_dir),
581 cwd=muse_dir,
582 )
583 assert result.exit_code == 0
584
585 log_result = _invoke("log", "--json", cwd=muse_dir)
586 log_data = json.loads(log_result.output.strip())
587 commits = log_data.get("commits", [])
588 assert commits
589 assert commits[0].get("sem_ver_bump") == "minor", (
590 f"feat: commit should have minor sem_ver_bump, got {commits[0].get('sem_ver_bump')!r}"
591 )
592
593 def test_conventional_commit_fix_becomes_patch_bump(self, tmp_path: pathlib.Path) -> None:
594 git_dir = tmp_path / "git_repo"
595 muse_dir = tmp_path / "muse_repo"
596 _make_git_repo(git_dir, [
597 {"files": {"a.py": "x=1"}, "message": "fix: correct off-by-one"},
598 ])
599 _make_muse_repo(muse_dir)
600
601 result = _invoke(
602 "bridge", "git-import", str(git_dir),
603 "--target", str(muse_dir),
604 cwd=muse_dir,
605 )
606 assert result.exit_code == 0
607
608 log_result = _invoke("log", "--json", cwd=muse_dir)
609 log_data = json.loads(log_result.output.strip())
610 commits = log_data.get("commits", [])
611 assert commits
612 assert commits[0].get("sem_ver_bump") == "patch"
613
614 def test_conventional_commit_breaking_becomes_major_bump(self, tmp_path: pathlib.Path) -> None:
615 git_dir = tmp_path / "git_repo"
616 muse_dir = tmp_path / "muse_repo"
617 _make_git_repo(git_dir, [
618 {"files": {"a.py": "x=1"}, "message": "feat!: BREAKING CHANGE remove old API"},
619 ])
620 _make_muse_repo(muse_dir)
621
622 result = _invoke(
623 "bridge", "git-import", str(git_dir),
624 "--target", str(muse_dir),
625 cwd=muse_dir,
626 )
627 assert result.exit_code == 0
628
629 log_result = _invoke("log", "--json", cwd=muse_dir)
630 log_data = json.loads(log_result.output.strip())
631 commits = log_data.get("commits", [])
632 assert commits
633 assert commits[0].get("sem_ver_bump") == "major"
634
635
636 # ---------------------------------------------------------------------------
637 # Tier 4 — Stress
638 # ---------------------------------------------------------------------------
639
640 class TestStress:
641 """Stress tests with large commit counts."""
642
643 def test_import_100_commits(self, tmp_path: pathlib.Path) -> None:
644 git_dir = tmp_path / "git_repo"
645 muse_dir = tmp_path / "muse_repo"
646
647 # Create 100 commits
648 commits = [
649 {"files": {f"file_{i:03d}.txt": f"content {i}"}, "message": f"commit {i:03d}"}
650 for i in range(100)
651 ]
652 _make_git_repo(git_dir, commits)
653 _make_muse_repo(muse_dir)
654
655 start = time.time()
656 result = _invoke(
657 "bridge", "git-import", str(git_dir),
658 "--target", str(muse_dir),
659 cwd=muse_dir,
660 )
661 elapsed = time.time() - start
662
663 assert result.exit_code == 0, f"import failed: {result.stderr}"
664 assert elapsed < 30.0, f"100-commit import took {elapsed:.1f}s (limit: 30s)"
665
666 log_commits = _get_muse_log(muse_dir)
667 assert len(log_commits) == 100, f"Expected 100 commits, got {len(log_commits)}"
668
669 def test_cat_file_stays_alive(self, tmp_path: pathlib.Path) -> None:
670 """Single _CatFile instance handles multiple reads without crashing."""
671 git_dir = tmp_path / "git_repo"
672 _make_git_repo(git_dir, [
673 {"files": {f"f{i}.txt": f"content {i}"}, "message": f"c{i}"}
674 for i in range(10)
675 ])
676
677 from muse.cli.commands.bridge import _CatFile, _git
678
679 # Get all blob SHAs from the git repo
680 ls_tree = _git(git_dir, "ls-tree", "-r", "--format=%(objectname)", "HEAD")
681 shas = [s.strip() for s in ls_tree.strip().splitlines() if s.strip()]
682
683 with _CatFile(git_dir) as cf:
684 for sha in shas:
685 content = cf.read(sha)
686 assert isinstance(content, bytes)
687 assert len(content) >= 0
688
689
690 # ---------------------------------------------------------------------------
691 # Tier 5 — Data Integrity
692 # ---------------------------------------------------------------------------
693
694 class TestDataIntegrity:
695 """Content-addressed integrity and determinism tests."""
696
697 def test_sha256_of_blob_matches_object_store(self, tmp_path: pathlib.Path) -> None:
698 git_dir = tmp_path / "git_repo"
699 muse_dir = tmp_path / "muse_repo"
700 content = "unique content for hash verification\n"
701 _make_git_repo(git_dir, [{"files": {"verify.txt": content}, "message": "add file"}])
702 _make_muse_repo(muse_dir)
703
704 result = _invoke(
705 "bridge", "git-import", str(git_dir),
706 "--target", str(muse_dir),
707 cwd=muse_dir,
708 )
709 assert result.exit_code == 0
710
711 log_result = _invoke("log", "--json", cwd=muse_dir)
712 log_data = json.loads(log_result.output.strip())
713 commits = log_data.get("commits", [])
714 assert commits
715
716 from muse.core.store import read_snapshot
717 from muse.core.object_store import read_object
718 from muse.core.types import blob_id
719
720 snapshot = read_snapshot(muse_dir, commits[0].get("snapshot_id", ""))
721 assert snapshot is not None
722 assert "verify.txt" in snapshot.manifest
723
724 stored_id = snapshot.manifest["verify.txt"]
725 stored_bytes = read_object(muse_dir, stored_id)
726 assert stored_bytes is not None
727
728 expected_id = blob_id(content.encode())
729 assert stored_id == expected_id, f"stored {stored_id} != expected {expected_id}"
730
731 def test_no_duplicate_objects_on_reimport(self, tmp_path: pathlib.Path) -> None:
732 git_dir = tmp_path / "git_repo"
733 muse_dir = tmp_path / "muse_repo"
734 _make_git_repo(git_dir, [
735 {"files": {"a.py": "x=1"}, "message": "init"},
736 {"files": {"b.py": "y=2"}, "message": "second"},
737 ])
738 _make_muse_repo(muse_dir)
739
740 # First import
741 result = _invoke("bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir)
742 assert result.exit_code == 0
743
744 # Count objects after first import
745 obj_dir = objects_dir(muse_dir)
746 first_count = sum(1 for _ in obj_dir.glob("**/*") if _.is_file())
747
748 # Second import of same repo — same commits
749 result = _invoke("bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir)
750 assert result.exit_code == 0
751
752 second_count = sum(1 for _ in obj_dir.glob("**/*") if _.is_file())
753 assert second_count == first_count, (
754 f"Re-import created {second_count - first_count} new objects (expected 0)"
755 )
756
757 def test_reflog_appended(self, tmp_path: pathlib.Path) -> None:
758 git_dir = tmp_path / "git_repo"
759 muse_dir = tmp_path / "muse_repo"
760 _make_git_repo(git_dir, [{"files": {"a.py": "x=1"}, "message": "init"}])
761 _make_muse_repo(muse_dir)
762
763 result = _invoke(
764 "bridge", "git-import", str(git_dir),
765 "--target", str(muse_dir),
766 cwd=muse_dir,
767 )
768 assert result.exit_code == 0
769
770 # Reflog lives at .muse/logs/refs/heads/<branch>
771 log_dir = logs_dir(muse_dir)
772 log_files = list(log_dir.glob("**/*")) if log_dir.exists() else []
773 has_content = any(f.is_file() and f.stat().st_size > 0 for f in log_files)
774 assert has_content, (
775 f"No reflog entries were written after import. "
776 f"Log dir contents: {[str(f) for f in log_files]}"
777 )
778
779
780 # ---------------------------------------------------------------------------
781 # Tier 6 — Performance
782 # ---------------------------------------------------------------------------
783
784 class TestPerformance:
785 """Performance gate tests."""
786
787 @pytest.mark.slow
788 def test_100_commit_import_under_5_seconds(self, tmp_path: pathlib.Path) -> None:
789 git_dir = tmp_path / "git_repo"
790 muse_dir = tmp_path / "muse_repo"
791 commits = [
792 {"files": {f"f{i}.py": f"x={i}"}, "message": f"c{i}"}
793 for i in range(100)
794 ]
795 _make_git_repo(git_dir, commits)
796 _make_muse_repo(muse_dir)
797
798 start = time.time()
799 result = _invoke(
800 "bridge", "git-import", str(git_dir),
801 "--target", str(muse_dir),
802 cwd=muse_dir,
803 )
804 elapsed = time.time() - start
805 assert result.exit_code == 0
806 assert elapsed < 5.0, f"100-commit import took {elapsed:.2f}s (limit: 5s)"
807
808 @pytest.mark.slow
809 def test_incremental_1_commit_under_500ms(self, tmp_path: pathlib.Path) -> None:
810 git_dir = tmp_path / "git_repo"
811 muse_dir = tmp_path / "muse_repo"
812 _make_git_repo(git_dir, [{"files": {"a.py": "x=1"}, "message": "init"}])
813 _make_muse_repo(muse_dir)
814
815 # Full import first
816 _invoke("bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir)
817
818 # Add one more commit
819 (git_dir / "b.py").write_text("y=2")
820 subprocess.run(["git", "-C", str(git_dir), "add", "."], check=True, capture_output=True)
821 subprocess.run(
822 ["git", "-C", str(git_dir), "commit", "-m", "incremental"],
823 check=True, capture_output=True,
824 env={**os.environ, "GIT_AUTHOR_EMAIL": "[email protected]", "GIT_AUTHOR_NAME": "T",
825 "GIT_COMMITTER_EMAIL": "[email protected]", "GIT_COMMITTER_NAME": "T"},
826 )
827
828 start = time.time()
829 result = _invoke(
830 "bridge", "git-import", str(git_dir),
831 "--target", str(muse_dir),
832 "--incremental",
833 cwd=muse_dir,
834 )
835 elapsed = time.time() - start
836 assert result.exit_code == 0
837 assert elapsed < 0.5, f"Incremental 1-commit import took {elapsed:.3f}s (limit: 0.5s)"
838
839
840 # ---------------------------------------------------------------------------
841 # Tier 7 — Security
842 # ---------------------------------------------------------------------------
843
844 class TestSecurity:
845 """Security-sensitive input handling."""
846
847 def test_git_commit_message_ansi_stripped(self, tmp_path: pathlib.Path) -> None:
848 git_dir = tmp_path / "git_repo"
849 muse_dir = tmp_path / "muse_repo"
850 ansi_msg = "\x1b[31mRed message\x1b[0m"
851 _make_git_repo(git_dir, [{"files": {"a.py": "x"}, "message": ansi_msg}])
852 _make_muse_repo(muse_dir)
853
854 result = _invoke(
855 "bridge", "git-import", str(git_dir),
856 "--target", str(muse_dir),
857 cwd=muse_dir,
858 )
859 assert result.exit_code == 0
860
861 log_result = _invoke("log", "--json", cwd=muse_dir)
862 log_data = json.loads(log_result.output.strip())
863 commits = log_data.get("commits", [])
864 if commits:
865 msg = commits[0].get("message", "")
866 assert "\x1b[" not in msg, f"ANSI escape sequence found in stored message: {msg!r}"
867
868 def test_attribution_map_control_chars_rejected(self, tmp_path: pathlib.Path) -> None:
869 from muse.cli.commands.bridge import AttributionMapper
870 attr_file = tmp_path / "attr.json"
871 # Map with NUL byte in handle
872 attr_file.write_text(json.dumps({"[email protected]": "alice\x00bad"}))
873
874 mapper = AttributionMapper(attr_file)
875 handle = mapper.get_handle("[email protected]", "Alice")
876 # NUL byte must not appear in the returned handle
877 assert "\x00" not in handle, f"Control char in handle: {handle!r}"
878
879 def test_source_path_traversal_rejected(self, tmp_path: pathlib.Path) -> None:
880 muse_dir = tmp_path / "muse_repo"
881 _make_muse_repo(muse_dir)
882
883 # Use a path that looks like traversal — has no .git dir so should fail gracefully
884 result = _invoke(
885 "bridge", "git-import", "../../../../etc",
886 "--target", str(muse_dir),
887 cwd=muse_dir,
888 )
889 assert result.exit_code != 0, "Path traversal source should be rejected"
890
891
892 # ---------------------------------------------------------------------------
893 # Tier 8 — Docstrings
894 # ---------------------------------------------------------------------------
895
896 class TestDocstrings:
897 """Implementation symbols carry docstrings."""
898
899 def test_replay_commit_has_docstring(self) -> None:
900 from muse.cli.commands.bridge import _replay_commit
901 assert _replay_commit.__doc__, "_replay_commit missing docstring"
902
903 def test_cat_file_has_docstring(self) -> None:
904 from muse.cli.commands.bridge import _CatFile
905 assert _CatFile.__doc__, "_CatFile missing docstring"
906
907 def test_attribution_mapper_has_docstring(self) -> None:
908 from muse.cli.commands.bridge import AttributionMapper
909 assert AttributionMapper.__doc__, "AttributionMapper missing docstring"
910
911 def test_replay_branch_has_docstring(self) -> None:
912 from muse.cli.commands.bridge import _replay_branch
913 assert _replay_branch.__doc__, "_replay_branch missing docstring"
914
915 def test_batch_commit_log_has_docstring(self) -> None:
916 from muse.cli.commands.bridge import _batch_commit_log
917 assert _batch_commit_log.__doc__, "_batch_commit_log missing docstring"
File History 1 commit
sha256:37ed8fecaafe7d683537a0182d49132c06dcef1f9016d218281831d30d58ed01 refactor(store): Phase 2 — extract ref and HEAD management … Sonnet 4.6 minor 29 days ago