"""Comprehensive unit tests for build_lineage — the symbol provenance engine. build_lineage(address, commits) takes a pre-sorted list of CommitRecord objects and returns the provenance chain. No repo, no disk I/O — pure in-memory. Coverage matrix --------------- Created — InsertOp with no prior live symbol sharing the content_id Copied — InsertOp whose content_id matches a currently-live symbol Renamed — InsertOp + DeleteOp in same commit with matching content_id, same file Moved — InsertOp + DeleteOp in same commit with matching content_id, different file Modified — ReplaceOp classified as signature_change / full_rewrite Deleted — DeleteOp at the target address Multi-event — symbol created, modified, deleted, re-created in the same history Registry — incremental content_id registry enables accurate copy detection across many commits without re-parsing blobs No events — address absent from all commits → empty list Empty list — no commits at all → empty list """ from __future__ import annotations import datetime import pytest from muse.cli.commands.lineage import build_lineage from muse.core.store import CommitRecord from muse.domain import DeleteOp, DomainOp, InsertOp, ReplaceOp # --------------------------------------------------------------------------- # Helpers — pure in-memory, no disk I/O # --------------------------------------------------------------------------- _T0 = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) _SEQ: list[int] = [0] def _uid() -> str: _SEQ[0] += 1 return f"{_SEQ[0]:064d}" def _commit( ops: list[DomainOp], *, offset_days: int = 0, message: str = "commit", commit_id: str | None = None, ) -> CommitRecord: """Return a synthetic CommitRecord — no repo, no disk write.""" return CommitRecord( commit_id=commit_id or _uid(), branch="main", snapshot_id=f"snap-{commit_id or _uid()}", message=message, committed_at=_T0 + datetime.timedelta(days=offset_days), structured_delta={"ops": ops}, ) def _insert(address: str, content_id: str) -> InsertOp: return InsertOp(op="insert", address=address, content_id=content_id, position=None, content_summary="") def _delete(address: str, content_id: str) -> DeleteOp: return DeleteOp(op="delete", address=address, content_id=content_id, position=None, content_summary="") def _replace( address: str, old_cid: str, new_cid: str, old_summary: str = "impl changed", new_summary: str = "impl changed", ) -> ReplaceOp: return ReplaceOp( op="replace", address=address, old_content_id=old_cid, new_content_id=new_cid, old_summary=old_summary, new_summary=new_summary, position=None, ) # --------------------------------------------------------------------------- # Empty / no-op cases # --------------------------------------------------------------------------- class TestEmptyCases: def test_no_commits_returns_empty(self) -> None: assert build_lineage("src/billing.py::compute_total", []) == [] def test_address_not_in_any_commit(self) -> None: c = _commit([_insert("src/auth.py::login", "cid-login")]) assert build_lineage("src/billing.py::compute_total", [c]) == [] def test_commit_with_no_structured_delta(self) -> None: record = CommitRecord( commit_id="b" * 64, branch="main", snapshot_id="snap", message="no delta", committed_at=_T0, structured_delta=None, ) assert build_lineage("src/main.py::main", [record]) == [] # --------------------------------------------------------------------------- # Created # --------------------------------------------------------------------------- class TestCreated: def test_first_insert_is_created(self) -> None: addr = "src/billing.py::compute_total" c = _commit([_insert(addr, "cid-v1")]) events = build_lineage(addr, [c]) assert len(events) == 1 assert events[0].kind == "created" def test_created_event_has_correct_commit_id(self) -> None: addr = "src/main.py::main" cid = "d" * 64 c = _commit([_insert(addr, "cid-v1")], commit_id=cid) events = build_lineage(addr, [c]) assert events[0].commit_id == cid def test_created_event_records_content_id(self) -> None: addr = "src/main.py::main" c = _commit([_insert(addr, "cid-abc")]) events = build_lineage(addr, [c]) assert events[0].new_content_id == "cid-abc" # --------------------------------------------------------------------------- # Deleted # --------------------------------------------------------------------------- class TestDeleted: def test_delete_after_insert_is_deleted(self) -> None: addr = "src/api.py::get_user" c1 = _commit([_insert(addr, "cid-v1")], offset_days=0) c2 = _commit([_delete(addr, "cid-v1")], offset_days=1) events = build_lineage(addr, [c1, c2]) assert events[-1].kind == "deleted" def test_deleted_event_records_content_id(self) -> None: addr = "src/api.py::delete_user" c1 = _commit([_insert(addr, "cid-v1")], offset_days=0) c2 = _commit([_delete(addr, "cid-v1")], offset_days=1) events = build_lineage(addr, [c1, c2]) assert events[-1].kind == "deleted" assert events[-1].old_content_id == "cid-v1" # --------------------------------------------------------------------------- # Modified # --------------------------------------------------------------------------- class TestModified: def test_replace_op_is_modified(self) -> None: addr = "src/core.py::hash_content" c1 = _commit([_insert(addr, "cid-v1")], offset_days=0) c2 = _commit([_replace(addr, "cid-v1", "cid-v2")], offset_days=1) events = build_lineage(addr, [c1, c2]) assert any(e.kind == "modified" for e in events) def test_modified_signature_change(self) -> None: addr = "src/core.py::process" c1 = _commit([_insert(addr, "cid-v1")], offset_days=0) c2 = _commit( [_replace(addr, "cid-v1", "cid-v2", "signature changed", "signature changed")], offset_days=1, ) events = build_lineage(addr, [c1, c2]) modified = [e for e in events if e.kind == "modified"] assert modified[0].detail == "signature_change" def test_modified_full_rewrite(self) -> None: addr = "src/core.py::transform" c1 = _commit([_insert(addr, "cid-aaaa")], offset_days=0) c2 = _commit([_replace(addr, "cid-aaaa", "cid-bbbb")], offset_days=1) events = build_lineage(addr, [c1, c2]) modified = [e for e in events if e.kind == "modified"] assert modified[0].detail == "full_rewrite" def test_multiple_modifications(self) -> None: addr = "src/worker.py::run" c1 = _commit([_insert(addr, "cid-v1")], offset_days=0) c2 = _commit([_replace(addr, "cid-v1", "cid-v2")], offset_days=1) c3 = _commit([_replace(addr, "cid-v2", "cid-v3")], offset_days=2) events = build_lineage(addr, [c1, c2, c3]) assert len([e for e in events if e.kind == "modified"]) == 2 # --------------------------------------------------------------------------- # Renamed # --------------------------------------------------------------------------- class TestRenamed: def test_insert_delete_same_file_is_renamed(self) -> None: old_addr = "src/billing.py::_compute_total" new_addr = "src/billing.py::compute_total" c1 = _commit([_insert(old_addr, "cid-body")], offset_days=0) c2 = _commit([_insert(new_addr, "cid-body"), _delete(old_addr, "cid-body")], offset_days=1) events = build_lineage(new_addr, [c1, c2]) assert any(e.kind == "renamed_from" for e in events) def test_renamed_from_detail_is_source_address(self) -> None: old_addr = "src/billing.py::_inner" new_addr = "src/billing.py::public_api" c1 = _commit([_insert(old_addr, "cid-body")], offset_days=0) c2 = _commit([_insert(new_addr, "cid-body"), _delete(old_addr, "cid-body")], offset_days=1) events = build_lineage(new_addr, [c1, c2]) renamed = [e for e in events if e.kind == "renamed_from"] assert renamed[0].detail == old_addr # --------------------------------------------------------------------------- # Moved # --------------------------------------------------------------------------- class TestMoved: def test_insert_delete_different_file_is_moved(self) -> None: old_addr = "old/billing.py::compute_invoice_total" new_addr = "src/billing.py::compute_invoice_total" c1 = _commit([_insert(old_addr, "cid-body")], offset_days=0) c2 = _commit([_insert(new_addr, "cid-body"), _delete(old_addr, "cid-body")], offset_days=1) events = build_lineage(new_addr, [c1, c2]) assert any(e.kind == "moved_from" for e in events) def test_moved_from_detail_is_original_address(self) -> None: old_addr = "legacy/module.py::process" new_addr = "src/processing.py::process" c1 = _commit([_insert(old_addr, "cid-body")], offset_days=0) c2 = _commit([_insert(new_addr, "cid-body"), _delete(old_addr, "cid-body")], offset_days=1) events = build_lineage(new_addr, [c1, c2]) moved = [e for e in events if e.kind == "moved_from"] assert moved[0].detail == old_addr # --------------------------------------------------------------------------- # Copied # --------------------------------------------------------------------------- class TestCopied: def test_insert_matching_live_symbol_is_copied(self) -> None: original = "src/utils.py::helper" copy = "src/other.py::helper" shared = "cid-shared-body" c1 = _commit([_insert(original, shared)], offset_days=0) c2 = _commit([_insert(copy, shared)], offset_days=1) events = build_lineage(copy, [c1, c2]) assert any(e.kind == "copied_from" for e in events) def test_copied_from_detail_is_source_address(self) -> None: original = "src/utils.py::helper" copy = "src/other.py::helper" shared = "cid-shared" c1 = _commit([_insert(original, shared)], offset_days=0) c2 = _commit([_insert(copy, shared)], offset_days=1) events = build_lineage(copy, [c1, c2]) copied = [e for e in events if e.kind == "copied_from"] assert copied[0].detail == original def test_no_copy_if_source_is_dead(self) -> None: """Source deleted before target inserted → 'created', not 'copied_from'.""" original = "src/utils.py::helper" copy = "src/other.py::helper" shared = "cid-shared" c1 = _commit([_insert(original, shared)], offset_days=0) c2 = _commit([_delete(original, shared)], offset_days=1) c3 = _commit([_insert(copy, shared)], offset_days=2) events = build_lineage(copy, [c1, c2, c3]) insert_events = [e for e in events if e.kind in ("created", "copied_from")] assert insert_events[0].kind == "created" # --------------------------------------------------------------------------- # Complex multi-event sequences # --------------------------------------------------------------------------- class TestMultiEvent: def test_create_modify_delete_sequence(self) -> None: addr = "src/core.py::process" c1 = _commit([_insert(addr, "cid-v1")], offset_days=0) c2 = _commit([_replace(addr, "cid-v1", "cid-v2")], offset_days=1) c3 = _commit([_delete(addr, "cid-v2")], offset_days=2) events = build_lineage(addr, [c1, c2, c3]) assert [e.kind for e in events] == ["created", "modified", "deleted"] def test_delete_then_recreate(self) -> None: addr = "src/api.py::endpoint" c1 = _commit([_insert(addr, "cid-v1")], offset_days=0) c2 = _commit([_delete(addr, "cid-v1")], offset_days=1) c3 = _commit([_insert(addr, "cid-v2")], offset_days=2) events = build_lineage(addr, [c1, c2, c3]) kinds = [e.kind for e in events] assert kinds == ["created", "deleted", "created"] def test_ordered_by_commit_position_in_list(self) -> None: """build_lineage processes commits in list order — caller is responsible for sorting.""" addr = "src/main.py::main" # Caller passes in chronological order (oldest-first) as documented. c1 = _commit([_insert(addr, "cid-v1")], offset_days=0) c2 = _commit([_replace(addr, "cid-v1", "cid-v2")], offset_days=2) events = build_lineage(addr, [c1, c2]) assert events[0].kind == "created" assert events[1].kind == "modified" def test_many_commits_accumulate_all_events(self) -> None: addr = "src/worker.py::run" commits = [_commit([_insert(addr, "cid-0")], offset_days=0)] prev = "cid-0" for i in range(1, 10): nxt = f"cid-{i}" commits.append(_commit([_replace(addr, prev, nxt)], offset_days=i)) prev = nxt events = build_lineage(addr, commits) assert len(events) == 10 assert events[0].kind == "created" assert all(e.kind == "modified" for e in events[1:]) def test_commit_message_propagated(self) -> None: addr = "src/main.py::main" c = _commit([_insert(addr, "cid-v1")], message="Initial commit") events = build_lineage(addr, [c]) assert events[0].message == "Initial commit" # --------------------------------------------------------------------------- # to_dict output shape # --------------------------------------------------------------------------- class TestJsonOutputShape: def test_to_dict_has_expected_keys(self) -> None: addr = "src/main.py::main" c = _commit([_insert(addr, "cid-abc123")], commit_id="a" * 64) events = build_lineage(addr, [c]) d = events[0].to_dict() assert "commit_id" in d assert "committed_at" in d assert "event" in d assert "message" in d assert d["event"] == "created" def test_to_dict_commit_id_is_full_sha(self) -> None: """commit_id must be the full 64-character SHA — never truncated.""" addr = "src/main.py::main" full_cid = "f" * 64 c = _commit([_insert(addr, "cid-abc")], commit_id=full_cid) events = build_lineage(addr, [c]) assert events[0].to_dict()["commit_id"] == full_cid assert len(events[0].to_dict()["commit_id"]) == 64 # --------------------------------------------------------------------------- # Incremental registry — copy detection across many commits # --------------------------------------------------------------------------- class TestIncrementalRegistry: def test_registry_tracks_all_live_symbols(self) -> None: """Registry must track symbols in commits that don't touch the target.""" shared_cid = "cid-shared" c1 = _commit([_insert("src/a.py::foo", shared_cid)], offset_days=0) c2 = _commit([_insert("src/b.py::foo", shared_cid)], offset_days=1) events = build_lineage("src/b.py::foo", [c1, c2]) assert events[0].kind == "copied_from" assert events[0].detail == "src/a.py::foo" def test_registry_prunes_deleted_symbols(self) -> None: """After deleting the original, its content_id leaves the live registry.""" shared = "cid-shared" original = "src/a.py::foo" target = "src/b.py::foo" c1 = _commit([_insert(original, shared)], offset_days=0) c2 = _commit([_delete(original, shared)], offset_days=1) c3 = _commit([_insert(target, shared)], offset_days=2) events = build_lineage(target, [c1, c2, c3]) assert events[0].kind == "created" def test_registry_survives_many_intermediate_commits(self) -> None: """Original in commit 1; target copied in commit 12 — registry must persist.""" shared = "cid-shared" original = "src/lib.py::util" target = "src/app.py::util" commits = [_commit([_insert(original, shared)], offset_days=0)] for i in range(1, 11): commits.append(_commit([_insert(f"src/other_{i}.py::fn", f"cid-other-{i}")], offset_days=i)) commits.append(_commit([_insert(target, shared)], offset_days=11)) events = build_lineage(target, commits) assert events[0].kind == "copied_from"