"""Phase 2 TDD tests for ``muse bridge git-import``. Tests are organised into eight tiers: Tier 1 — Shape/Schema flag presence, dry-run, exclude defaults Tier 2 — Round-Trip full import integration tests Tier 3 — Edge Cases empty repos, bad refs, LFS, conventional commits Tier 4 — Stress 100-commit import Tier 5 — Data Integrity SHA-256 correctness, determinism, deduplication Tier 6 — Performance time gates Tier 7 — Security ANSI stripping, path traversal, bad handles Tier 8 — Docstrings implementation docstrings present NOTE: git subprocess calls in this file are INTENTIONAL — they create real git repositories used as import sources. The bridge command itself converts those into Muse commits. The muse codebase otherwise never uses git. """ from __future__ import annotations import json import os import pathlib import subprocess import time import pytest from tests.cli_test_helper import CliRunner from muse.core.paths import git_bridge_state_path, logs_dir, objects_dir runner = CliRunner() # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _invoke(*args: str, cwd: pathlib.Path | None = None) -> "CliRunner": """Invoke the muse CLI from *cwd* (or CWD if None).""" return runner.invoke(None, list(args), cwd=cwd) def _make_git_repo(path: pathlib.Path, commits: list[dict]) -> pathlib.Path: """Create a real git repo with the given commits. Each commit dict: files: {relative_path: content_str} message: commit message string author_email: (optional) author email author_name: (optional) author name """ subprocess.run(["git", "init", str(path)], check=True, capture_output=True) subprocess.run( ["git", "-C", str(path), "config", "user.email", "test@example.com"], check=True, capture_output=True, ) subprocess.run( ["git", "-C", str(path), "config", "user.name", "Test User"], check=True, capture_output=True, ) for commit in commits: for filepath, content in commit["files"].items(): full = path / filepath full.parent.mkdir(parents=True, exist_ok=True) full.write_text(content) subprocess.run(["git", "-C", str(path), "add", "."], check=True, capture_output=True) email = commit.get("author_email", "test@example.com") name = commit.get("author_name", "Test User") env = { **os.environ, "GIT_AUTHOR_EMAIL": email, "GIT_AUTHOR_NAME": name, "GIT_COMMITTER_EMAIL": email, "GIT_COMMITTER_NAME": name, } subprocess.run( ["git", "-C", str(path), "commit", "-m", commit["message"]], check=True, capture_output=True, env=env, ) return path def _make_muse_repo(path: pathlib.Path) -> pathlib.Path: """Initialise a Muse repository at *path* using the CLI.""" path.mkdir(parents=True, exist_ok=True) result = _invoke("init", cwd=path) assert result.exit_code == 0, f"muse init failed: {result.stderr}" return path def _get_muse_log(muse_root: pathlib.Path) -> list[dict]: """Return the muse log as a list of commit dicts.""" result = _invoke("log", "--json", cwd=muse_root) if result.exit_code != 0: return [] try: data = json.loads(result.output.strip()) return data.get("commits", []) except json.JSONDecodeError: return [] def _get_muse_branches(muse_root: pathlib.Path) -> list[str]: """Return list of branch names in the muse repo.""" result = _invoke("branch", "--json", cwd=muse_root) if result.exit_code != 0: return [] try: data = json.loads(result.output.strip()) if isinstance(data, list): return [b["name"] for b in data] return [] except (json.JSONDecodeError, KeyError): return [] # --------------------------------------------------------------------------- # Tier 1 — Shape/Schema # --------------------------------------------------------------------------- class TestSchemaFlags: """Flag presence and output shape validation.""" def test_help_contains_incremental_flag(self) -> None: result = _invoke("bridge", "git-import", "--help") assert "--incremental" in result.output def test_help_contains_attribution_map(self) -> None: result = _invoke("bridge", "git-import", "--help") assert "--attribution-map" in result.output def test_help_contains_import_tags(self) -> None: result = _invoke("bridge", "git-import", "--help") assert "--import-tags" in result.output def test_dry_run_writes_nothing(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" _make_git_repo(git_dir, [{"files": {"a.txt": "hello"}, "message": "init"}]) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), "--dry-run", cwd=muse_dir, ) assert result.exit_code == 0 # Dry-run must not write any commits to the object store assert _get_muse_log(muse_dir) == [], "dry-run wrote commits to the object store" def test_json_output_valid_ndjson(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" _make_git_repo(git_dir, [{"files": {"a.txt": "hello"}, "message": "init"}]) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), "--json", cwd=muse_dir, ) assert result.exit_code == 0 for line in result.output.strip().splitlines(): if line.strip(): json.loads(line) # raises if invalid JSON def test_default_excludes_cover_git_dir(self) -> None: from muse.core.bridge.git_primitives import _should_exclude assert _should_exclude(".git/config") is True assert _should_exclude(".git/COMMIT_EDITMSG") is True def test_default_excludes_cover_node_modules(self) -> None: from muse.core.bridge.git_primitives import _should_exclude assert _should_exclude("node_modules/lodash/index.js") is True def test_default_excludes_cover_pyc(self) -> None: from muse.core.bridge.git_primitives import _should_exclude assert _should_exclude("src/__pycache__/foo.cpython-312.pyc") is True def test_default_excludes_cover_venv(self) -> None: from muse.core.bridge.git_primitives import _should_exclude assert _should_exclude(".venv/lib/python3.12/site-packages/pip/__init__.py") is True def test_non_excluded_path(self) -> None: from muse.core.bridge.git_primitives import _should_exclude assert _should_exclude("src/main.py") is False assert _should_exclude("README.md") is False # --------------------------------------------------------------------------- # Tier 2 — Round-Trip / Integration # --------------------------------------------------------------------------- class TestRoundTrip: """Full git → muse import round trips.""" def test_import_3_commits_creates_3_muse_commits(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" _make_git_repo(git_dir, [ {"files": {"a.py": "x=1"}, "message": "first"}, {"files": {"b.py": "y=2"}, "message": "second"}, {"files": {"c.py": "z=3"}, "message": "third"}, ]) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir, ) assert result.exit_code == 0 commits = _get_muse_log(muse_dir) assert len(commits) == 3, f"Expected 3 commits, got {len(commits)}: {commits}" def test_import_creates_expected_branch(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" _make_git_repo(git_dir, [{"files": {"a.py": "x=1"}, "message": "init"}]) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir, ) assert result.exit_code == 0 branches = _get_muse_branches(muse_dir) # Should have imported to main or master assert any(b in ("main", "master") for b in branches), f"branches: {branches}" def test_import_2_branches(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" _make_git_repo(git_dir, [{"files": {"a.py": "x=1"}, "message": "init"}]) # Create a second branch in git subprocess.run(["git", "-C", str(git_dir), "checkout", "-b", "develop"], check=True, capture_output=True) (git_dir / "b.py").write_text("y=2") subprocess.run(["git", "-C", str(git_dir), "add", "."], check=True, capture_output=True) subprocess.run( ["git", "-C", str(git_dir), "commit", "-m", "dev commit"], check=True, capture_output=True, env={**os.environ, "GIT_AUTHOR_EMAIL": "t@t.com", "GIT_AUTHOR_NAME": "T", "GIT_COMMITTER_EMAIL": "t@t.com", "GIT_COMMITTER_NAME": "T"}, ) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), "--all", cwd=muse_dir, ) assert result.exit_code == 0 branches = _get_muse_branches(muse_dir) assert len(branches) >= 2, f"Expected >= 2 branches, got {branches}" def test_incremental_import_only_imports_new(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" _make_git_repo(git_dir, [ {"files": {"a.py": "x=1"}, "message": "first"}, {"files": {"b.py": "y=2"}, "message": "second"}, {"files": {"c.py": "z=3"}, "message": "third"}, ]) _make_muse_repo(muse_dir) # First import result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir, ) assert result.exit_code == 0 commits_after_first = _get_muse_log(muse_dir) assert len(commits_after_first) == 3 # Add 2 more commits to git for filepath, content, msg in [("d.py", "d=4", "fourth"), ("e.py", "e=5", "fifth")]: (git_dir / filepath).write_text(content) subprocess.run(["git", "-C", str(git_dir), "add", "."], check=True, capture_output=True) subprocess.run( ["git", "-C", str(git_dir), "commit", "-m", msg], check=True, capture_output=True, env={**os.environ, "GIT_AUTHOR_EMAIL": "t@t.com", "GIT_AUTHOR_NAME": "T", "GIT_COMMITTER_EMAIL": "t@t.com", "GIT_COMMITTER_NAME": "T"}, ) # Incremental import result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), "--incremental", cwd=muse_dir, ) assert result.exit_code == 0 commits_after_second = _get_muse_log(muse_dir) assert len(commits_after_second) == 5, ( f"Expected 5 commits after incremental import, got {len(commits_after_second)}" ) def test_attribution_map_applies(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" attr_file = tmp_path / "attr.json" attr_file.write_text(json.dumps({"alice@example.com": "alice-muse"})) _make_git_repo(git_dir, [ { "files": {"a.py": "x=1"}, "message": "init", "author_email": "alice@example.com", "author_name": "Alice", } ]) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), "--attribution-map", str(attr_file), cwd=muse_dir, ) assert result.exit_code == 0 commits = _get_muse_log(muse_dir) assert len(commits) == 1 # The author field should contain the mapped handle assert "alice-muse" in commits[0].get("author", ""), ( f"Expected alice-muse in author, got: {commits[0]}" ) def test_unmapped_email_gets_synthetic_handle(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" _make_git_repo(git_dir, [ { "files": {"a.py": "x=1"}, "message": "init", "author_email": "unknown@noreply.example.com", } ]) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir, ) assert result.exit_code == 0 commits = _get_muse_log(muse_dir) assert len(commits) == 1 author = commits[0].get("author", "") assert "git-import/" in author, f"Expected synthetic git-import/ handle, got: {author!r}" def test_bridge_state_written_after_import(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" _make_git_repo(git_dir, [{"files": {"a.py": "x=1"}, "message": "init"}]) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir, ) assert result.exit_code == 0 state_file = git_bridge_state_path(muse_dir) assert state_file.exists(), "git-bridge.toml was not written" content = state_file.read_text() assert "[last_import]" in content def test_file_content_preserved(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" expected_content = "# This is a test file\nresult = 42\n" _make_git_repo(git_dir, [ {"files": {"src/calc.py": expected_content}, "message": "add calc"}, ]) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir, ) assert result.exit_code == 0 # Read the snapshot manifest and find the object ID for src/calc.py from muse.core.refs import write_branch_ref from muse.core.paths import git_bridge_state_path, heads_dir import tomllib # Get HEAD commit log_result = _invoke("log", "--json", cwd=muse_dir) log_data = json.loads(log_result.output.strip()) commits = log_data.get("commits", []) assert commits, "No commits in log" snapshot_id = commits[0].get("snapshot_id", "") assert snapshot_id, f"No snapshot_id in commit: {commits[0]}" from muse.core.snapshots import read_snapshot snapshot = read_snapshot(muse_dir, snapshot_id) assert snapshot is not None, "Could not read snapshot" assert "src/calc.py" in snapshot.manifest, ( f"src/calc.py not in manifest: {list(snapshot.manifest.keys())}" ) object_id = snapshot.manifest["src/calc.py"] from muse.core.object_store import read_object stored = read_object(muse_dir, object_id) assert stored is not None assert stored.decode() == expected_content # --------------------------------------------------------------------------- # Tier 3 — Edge Cases # --------------------------------------------------------------------------- class TestEdgeCases: """Edge case handling.""" def test_empty_git_repo_exits_zero(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" # Create a git repo but no commits subprocess.run(["git", "init", str(git_dir)], check=True, capture_output=True) subprocess.run( ["git", "-C", str(git_dir), "config", "user.email", "t@t.com"], check=True, capture_output=True, ) subprocess.run( ["git", "-C", str(git_dir), "config", "user.name", "T"], check=True, capture_output=True, ) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), "--json", cwd=muse_dir, ) assert result.exit_code == 0 # Check the JSON output includes total_commits_written: 0 found_done = False for line in result.output.strip().splitlines(): if not line.strip(): continue obj = json.loads(line) if obj.get("event") == "done": found_done = True assert obj.get("total_commits_written", -1) == 0 assert found_done, f"No done event in output: {result.output!r}" def test_from_ref_nonexistent_exits_user_error(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" _make_git_repo(git_dir, [{"files": {"a.py": "x"}, "message": "init"}]) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), "--from-ref", "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef", cwd=muse_dir, ) # Should fail with USER_ERROR before writing anything assert result.exit_code != 0 # Error path must not write any commits to the object store assert _get_muse_log(muse_dir) == [], "from-ref with bad SHA wrote commits to the object store" def test_incremental_no_bridge_state_does_full_import(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" _make_git_repo(git_dir, [ {"files": {"a.py": "x=1"}, "message": "first"}, {"files": {"b.py": "y=2"}, "message": "second"}, ]) _make_muse_repo(muse_dir) # No bridge state file — incremental should fall back to full import result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), "--incremental", cwd=muse_dir, ) assert result.exit_code == 0 commits = _get_muse_log(muse_dir) assert len(commits) == 2, f"Expected 2 commits, got {len(commits)}" def test_lfs_pointer_skipped_with_lfs_skip(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" lfs_pointer = ( "version https://git-lfs.github.com/spec/v1\n" "oid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393\n" "size 12345\n" ) _make_git_repo(git_dir, [ {"files": {"large_file.bin": lfs_pointer, "a.py": "x=1"}, "message": "add files"}, ]) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), "--lfs-skip", cwd=muse_dir, ) assert result.exit_code == 0 # The LFS pointer file should not appear in the manifest log_result = _invoke("log", "--json", cwd=muse_dir) log_data = json.loads(log_result.output.strip()) commits = log_data.get("commits", []) if commits: from muse.core.snapshots import read_snapshot snapshot = read_snapshot(muse_dir, commits[0].get("snapshot_id", "")) if snapshot: assert "large_file.bin" not in snapshot.manifest, ( "LFS pointer should be excluded when --lfs-skip is set" ) def test_excluded_file_not_in_manifest(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" _make_git_repo(git_dir, [ { "files": { "src/main.py": "x=1", "src/util.pyc": "\x00" * 16, }, "message": "add files", }, ]) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir, ) assert result.exit_code == 0 log_result = _invoke("log", "--json", cwd=muse_dir) log_data = json.loads(log_result.output.strip()) commits = log_data.get("commits", []) if commits: from muse.core.snapshots import read_snapshot snapshot = read_snapshot(muse_dir, commits[0].get("snapshot_id", "")) if snapshot: assert "src/util.pyc" not in snapshot.manifest, ( ".pyc files should be excluded by default" ) def test_conventional_commit_feat_becomes_minor_bump(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" _make_git_repo(git_dir, [ {"files": {"a.py": "x=1"}, "message": "feat: add awesome feature"}, ]) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir, ) assert result.exit_code == 0 log_result = _invoke("log", "--json", cwd=muse_dir) log_data = json.loads(log_result.output.strip()) commits = log_data.get("commits", []) assert commits assert commits[0].get("sem_ver_bump") == "minor", ( f"feat: commit should have minor sem_ver_bump, got {commits[0].get('sem_ver_bump')!r}" ) def test_conventional_commit_fix_becomes_patch_bump(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" _make_git_repo(git_dir, [ {"files": {"a.py": "x=1"}, "message": "fix: correct off-by-one"}, ]) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir, ) assert result.exit_code == 0 log_result = _invoke("log", "--json", cwd=muse_dir) log_data = json.loads(log_result.output.strip()) commits = log_data.get("commits", []) assert commits assert commits[0].get("sem_ver_bump") == "patch" def test_conventional_commit_breaking_becomes_major_bump(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" _make_git_repo(git_dir, [ {"files": {"a.py": "x=1"}, "message": "feat!: BREAKING CHANGE remove old API"}, ]) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir, ) assert result.exit_code == 0 log_result = _invoke("log", "--json", cwd=muse_dir) log_data = json.loads(log_result.output.strip()) commits = log_data.get("commits", []) assert commits assert commits[0].get("sem_ver_bump") == "major" # --------------------------------------------------------------------------- # Tier 4 — Stress # --------------------------------------------------------------------------- class TestStress: """Stress tests with large commit counts.""" def test_import_100_commits(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" # Create 100 commits commits = [ {"files": {f"file_{i:03d}.txt": f"content {i}"}, "message": f"commit {i:03d}"} for i in range(100) ] _make_git_repo(git_dir, commits) _make_muse_repo(muse_dir) start = time.time() result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir, ) elapsed = time.time() - start assert result.exit_code == 0, f"import failed: {result.stderr}" assert elapsed < 30.0, f"100-commit import took {elapsed:.1f}s (limit: 30s)" log_commits = _get_muse_log(muse_dir) assert len(log_commits) == 100, f"Expected 100 commits, got {len(log_commits)}" def test_cat_file_stays_alive(self, tmp_path: pathlib.Path) -> None: """Single _CatFile instance handles multiple reads without crashing.""" git_dir = tmp_path / "git_repo" _make_git_repo(git_dir, [ {"files": {f"f{i}.txt": f"content {i}"}, "message": f"c{i}"} for i in range(10) ]) from muse.core.bridge.git_primitives import _CatFile, _git # Get all blob SHAs from the git repo ls_tree = _git(git_dir, "ls-tree", "-r", "--format=%(objectname)", "HEAD") shas = [s.strip() for s in ls_tree.strip().splitlines() if s.strip()] with _CatFile(git_dir) as cf: for sha in shas: content = cf.read(sha) assert isinstance(content, bytes) assert len(content) >= 0 # --------------------------------------------------------------------------- # Tier 5 — Data Integrity # --------------------------------------------------------------------------- class TestDataIntegrity: """Content-addressed integrity and determinism tests.""" def test_sha256_of_blob_matches_object_store(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" content = "unique content for hash verification\n" _make_git_repo(git_dir, [{"files": {"verify.txt": content}, "message": "add file"}]) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir, ) assert result.exit_code == 0 log_result = _invoke("log", "--json", cwd=muse_dir) log_data = json.loads(log_result.output.strip()) commits = log_data.get("commits", []) assert commits from muse.core.snapshots import read_snapshot from muse.core.object_store import read_object from muse.core.types import blob_id snapshot = read_snapshot(muse_dir, commits[0].get("snapshot_id", "")) assert snapshot is not None assert "verify.txt" in snapshot.manifest stored_id = snapshot.manifest["verify.txt"] stored_bytes = read_object(muse_dir, stored_id) assert stored_bytes is not None expected_id = blob_id(content.encode()) assert stored_id == expected_id, f"stored {stored_id} != expected {expected_id}" def test_no_duplicate_objects_on_reimport(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" _make_git_repo(git_dir, [ {"files": {"a.py": "x=1"}, "message": "init"}, {"files": {"b.py": "y=2"}, "message": "second"}, ]) _make_muse_repo(muse_dir) # First import result = _invoke("bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir) assert result.exit_code == 0 # Count objects after first import obj_dir = objects_dir(muse_dir) first_count = sum(1 for _ in obj_dir.glob("**/*") if _.is_file()) # Second import of same repo — same commits result = _invoke("bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir) assert result.exit_code == 0 second_count = sum(1 for _ in obj_dir.glob("**/*") if _.is_file()) assert second_count == first_count, ( f"Re-import created {second_count - first_count} new objects (expected 0)" ) def test_reflog_appended(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" _make_git_repo(git_dir, [{"files": {"a.py": "x=1"}, "message": "init"}]) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir, ) assert result.exit_code == 0 # Reflog lives at .muse/logs/refs/heads/ log_dir = logs_dir(muse_dir) log_files = list(log_dir.glob("**/*")) if log_dir.exists() else [] has_content = any(f.is_file() and f.stat().st_size > 0 for f in log_files) assert has_content, ( f"No reflog entries were written after import. " f"Log dir contents: {[str(f) for f in log_files]}" ) # --------------------------------------------------------------------------- # Tier 6 — Performance # --------------------------------------------------------------------------- class TestPerformance: """Performance gate tests.""" @pytest.mark.slow def test_100_commit_import_under_5_seconds(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" commits = [ {"files": {f"f{i}.py": f"x={i}"}, "message": f"c{i}"} for i in range(100) ] _make_git_repo(git_dir, commits) _make_muse_repo(muse_dir) start = time.time() result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir, ) elapsed = time.time() - start assert result.exit_code == 0 assert elapsed < 5.0, f"100-commit import took {elapsed:.2f}s (limit: 5s)" @pytest.mark.slow def test_incremental_1_commit_under_500ms(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" _make_git_repo(git_dir, [{"files": {"a.py": "x=1"}, "message": "init"}]) _make_muse_repo(muse_dir) # Full import first _invoke("bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir) # Add one more commit (git_dir / "b.py").write_text("y=2") subprocess.run(["git", "-C", str(git_dir), "add", "."], check=True, capture_output=True) subprocess.run( ["git", "-C", str(git_dir), "commit", "-m", "incremental"], check=True, capture_output=True, env={**os.environ, "GIT_AUTHOR_EMAIL": "t@t.com", "GIT_AUTHOR_NAME": "T", "GIT_COMMITTER_EMAIL": "t@t.com", "GIT_COMMITTER_NAME": "T"}, ) start = time.time() result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), "--incremental", cwd=muse_dir, ) elapsed = time.time() - start assert result.exit_code == 0 assert elapsed < 0.5, f"Incremental 1-commit import took {elapsed:.3f}s (limit: 0.5s)" # --------------------------------------------------------------------------- # Tier 7 — Security # --------------------------------------------------------------------------- class TestSecurity: """Security-sensitive input handling.""" def test_git_commit_message_ansi_stripped(self, tmp_path: pathlib.Path) -> None: git_dir = tmp_path / "git_repo" muse_dir = tmp_path / "muse_repo" ansi_msg = "\x1b[31mRed message\x1b[0m" _make_git_repo(git_dir, [{"files": {"a.py": "x"}, "message": ansi_msg}]) _make_muse_repo(muse_dir) result = _invoke( "bridge", "git-import", str(git_dir), "--target", str(muse_dir), cwd=muse_dir, ) assert result.exit_code == 0 log_result = _invoke("log", "--json", cwd=muse_dir) log_data = json.loads(log_result.output.strip()) commits = log_data.get("commits", []) if commits: msg = commits[0].get("message", "") assert "\x1b[" not in msg, f"ANSI escape sequence found in stored message: {msg!r}" def test_attribution_map_control_chars_rejected(self, tmp_path: pathlib.Path) -> None: from muse.core.bridge.git_primitives import AttributionMapper attr_file = tmp_path / "attr.json" # Map with NUL byte in handle attr_file.write_text(json.dumps({"alice@example.com": "alice\x00bad"})) mapper = AttributionMapper(attr_file) handle = mapper.get_handle("alice@example.com", "Alice") # NUL byte must not appear in the returned handle assert "\x00" not in handle, f"Control char in handle: {handle!r}" def test_source_path_traversal_rejected(self, tmp_path: pathlib.Path) -> None: muse_dir = tmp_path / "muse_repo" _make_muse_repo(muse_dir) # Use a path that looks like traversal — has no .git dir so should fail gracefully result = _invoke( "bridge", "git-import", "../../../../etc", "--target", str(muse_dir), cwd=muse_dir, ) assert result.exit_code != 0, "Path traversal source should be rejected" # --------------------------------------------------------------------------- # Tier 8 — Docstrings # --------------------------------------------------------------------------- class TestDocstrings: """Implementation symbols carry docstrings.""" def test_replay_commit_has_docstring(self) -> None: from muse.core.bridge.importer import _replay_commit assert _replay_commit.__doc__, "_replay_commit missing docstring" def test_cat_file_has_docstring(self) -> None: from muse.core.bridge.git_primitives import _CatFile assert _CatFile.__doc__, "_CatFile missing docstring" def test_attribution_mapper_has_docstring(self) -> None: from muse.core.bridge.git_primitives import AttributionMapper assert AttributionMapper.__doc__, "AttributionMapper missing docstring" def test_replay_branch_has_docstring(self) -> None: from muse.core.bridge.importer import _replay_branch assert _replay_branch.__doc__, "_replay_branch missing docstring" def test_batch_commit_log_has_docstring(self) -> None: from muse.core.bridge.git_primitives import _batch_commit_log assert _batch_commit_log.__doc__, "_batch_commit_log missing docstring"