gabriel / muse public
test_lineage_algorithm.py python
407 lines 16.3 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
1 """Comprehensive unit tests for build_lineage — the symbol provenance engine.
2
3 build_lineage(address, commits) takes a pre-sorted list of CommitRecord objects
4 and returns the provenance chain. No repo, no disk I/O — pure in-memory.
5
6 Coverage matrix
7 ---------------
8 Created — InsertOp with no prior live symbol sharing the content_id
9 Copied — InsertOp whose content_id matches a currently-live symbol
10 Renamed — InsertOp + DeleteOp in same commit with matching content_id, same file
11 Moved — InsertOp + DeleteOp in same commit with matching content_id, different file
12 Modified — ReplaceOp classified as signature_change / full_rewrite
13 Deleted — DeleteOp at the target address
14 Multi-event — symbol created, modified, deleted, re-created in the same history
15 Registry — incremental content_id registry enables accurate copy detection
16 across many commits without re-parsing blobs
17 No events — address absent from all commits → empty list
18 Empty list — no commits at all → empty list
19 """
20
21 from __future__ import annotations
22
23 import datetime
24
25 import pytest
26
27 from muse.cli.commands.lineage import build_lineage
28 from muse.core.commits import CommitRecord
29 from muse.domain import DeleteOp, DomainOp, InsertOp, ReplaceOp
30
31
32 # ---------------------------------------------------------------------------
33 # Helpers — pure in-memory, no disk I/O
34 # ---------------------------------------------------------------------------
35
36
37 _T0 = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
38 _SEQ: list[int] = [0]
39
40
41 def _uid() -> str:
42 _SEQ[0] += 1
43 return f"{_SEQ[0]:064d}"
44
45
46 def _commit(
47 ops: list[DomainOp],
48 *,
49 offset_days: int = 0,
50 message: str = "commit",
51 commit_id: str | None = None,
52 ) -> CommitRecord:
53 """Return a synthetic CommitRecord — no repo, no disk write."""
54 return CommitRecord(
55 commit_id=commit_id or _uid(),
56 branch="main",
57 snapshot_id=f"snap-{commit_id or _uid()}",
58 message=message,
59 committed_at=_T0 + datetime.timedelta(days=offset_days),
60 structured_delta={"ops": ops},
61 )
62
63
64 def _insert(address: str, content_id: str) -> InsertOp:
65 return InsertOp(op="insert", address=address, content_id=content_id, position=None, content_summary="")
66
67
68 def _delete(address: str, content_id: str) -> DeleteOp:
69 return DeleteOp(op="delete", address=address, content_id=content_id, position=None, content_summary="")
70
71
72 def _replace(
73 address: str,
74 old_cid: str,
75 new_cid: str,
76 old_summary: str = "impl changed",
77 new_summary: str = "impl changed",
78 ) -> ReplaceOp:
79 return ReplaceOp(
80 op="replace",
81 address=address,
82 old_content_id=old_cid,
83 new_content_id=new_cid,
84 old_summary=old_summary,
85 new_summary=new_summary,
86 position=None,
87 )
88
89
90 # ---------------------------------------------------------------------------
91 # Empty / no-op cases
92 # ---------------------------------------------------------------------------
93
94
95 class TestEmptyCases:
96 def test_no_commits_returns_empty(self) -> None:
97 assert build_lineage("src/billing.py::compute_total", []) == []
98
99 def test_address_not_in_any_commit(self) -> None:
100 c = _commit([_insert("src/auth.py::login", "cid-login")])
101 assert build_lineage("src/billing.py::compute_total", [c]) == []
102
103 def test_commit_with_no_structured_delta(self) -> None:
104 record = CommitRecord(
105 commit_id="b" * 64,
106 branch="main",
107 snapshot_id="snap",
108 message="no delta",
109 committed_at=_T0,
110 structured_delta=None,
111 )
112 assert build_lineage("src/main.py::main", [record]) == []
113
114
115 # ---------------------------------------------------------------------------
116 # Created
117 # ---------------------------------------------------------------------------
118
119
120 class TestCreated:
121 def test_first_insert_is_created(self) -> None:
122 addr = "src/billing.py::compute_total"
123 c = _commit([_insert(addr, "cid-v1")])
124 events = build_lineage(addr, [c])
125 assert len(events) == 1
126 assert events[0].kind == "created"
127
128 def test_created_event_has_correct_commit_id(self) -> None:
129 addr = "src/main.py::main"
130 cid = "d" * 64
131 c = _commit([_insert(addr, "cid-v1")], commit_id=cid)
132 events = build_lineage(addr, [c])
133 assert events[0].commit_id == cid
134
135 def test_created_event_records_content_id(self) -> None:
136 addr = "src/main.py::main"
137 c = _commit([_insert(addr, "cid-abc")])
138 events = build_lineage(addr, [c])
139 assert events[0].new_content_id == "cid-abc"
140
141
142 # ---------------------------------------------------------------------------
143 # Deleted
144 # ---------------------------------------------------------------------------
145
146
147 class TestDeleted:
148 def test_delete_after_insert_is_deleted(self) -> None:
149 addr = "src/api.py::get_user"
150 c1 = _commit([_insert(addr, "cid-v1")], offset_days=0)
151 c2 = _commit([_delete(addr, "cid-v1")], offset_days=1)
152 events = build_lineage(addr, [c1, c2])
153 assert events[-1].kind == "deleted"
154
155 def test_deleted_event_records_content_id(self) -> None:
156 addr = "src/api.py::delete_user"
157 c1 = _commit([_insert(addr, "cid-v1")], offset_days=0)
158 c2 = _commit([_delete(addr, "cid-v1")], offset_days=1)
159 events = build_lineage(addr, [c1, c2])
160 assert events[-1].kind == "deleted"
161 assert events[-1].old_content_id == "cid-v1"
162
163
164 # ---------------------------------------------------------------------------
165 # Modified
166 # ---------------------------------------------------------------------------
167
168
169 class TestModified:
170 def test_replace_op_is_modified(self) -> None:
171 addr = "src/core.py::hash_content"
172 c1 = _commit([_insert(addr, "cid-v1")], offset_days=0)
173 c2 = _commit([_replace(addr, "cid-v1", "cid-v2")], offset_days=1)
174 events = build_lineage(addr, [c1, c2])
175 assert any(e.kind == "modified" for e in events)
176
177 def test_modified_signature_change(self) -> None:
178 addr = "src/core.py::process"
179 c1 = _commit([_insert(addr, "cid-v1")], offset_days=0)
180 c2 = _commit(
181 [_replace(addr, "cid-v1", "cid-v2", "signature changed", "signature changed")],
182 offset_days=1,
183 )
184 events = build_lineage(addr, [c1, c2])
185 modified = [e for e in events if e.kind == "modified"]
186 assert modified[0].detail == "signature_change"
187
188 def test_modified_full_rewrite(self) -> None:
189 addr = "src/core.py::transform"
190 c1 = _commit([_insert(addr, "cid-aaaa")], offset_days=0)
191 c2 = _commit([_replace(addr, "cid-aaaa", "cid-bbbb")], offset_days=1)
192 events = build_lineage(addr, [c1, c2])
193 modified = [e for e in events if e.kind == "modified"]
194 assert modified[0].detail == "full_rewrite"
195
196 def test_multiple_modifications(self) -> None:
197 addr = "src/worker.py::run"
198 c1 = _commit([_insert(addr, "cid-v1")], offset_days=0)
199 c2 = _commit([_replace(addr, "cid-v1", "cid-v2")], offset_days=1)
200 c3 = _commit([_replace(addr, "cid-v2", "cid-v3")], offset_days=2)
201 events = build_lineage(addr, [c1, c2, c3])
202 assert len([e for e in events if e.kind == "modified"]) == 2
203
204
205 # ---------------------------------------------------------------------------
206 # Renamed
207 # ---------------------------------------------------------------------------
208
209
210 class TestRenamed:
211 def test_insert_delete_same_file_is_renamed(self) -> None:
212 old_addr = "src/billing.py::_compute_total"
213 new_addr = "src/billing.py::compute_total"
214 c1 = _commit([_insert(old_addr, "cid-body")], offset_days=0)
215 c2 = _commit([_insert(new_addr, "cid-body"), _delete(old_addr, "cid-body")], offset_days=1)
216 events = build_lineage(new_addr, [c1, c2])
217 assert any(e.kind == "renamed_from" for e in events)
218
219 def test_renamed_from_detail_is_source_address(self) -> None:
220 old_addr = "src/billing.py::_inner"
221 new_addr = "src/billing.py::public_api"
222 c1 = _commit([_insert(old_addr, "cid-body")], offset_days=0)
223 c2 = _commit([_insert(new_addr, "cid-body"), _delete(old_addr, "cid-body")], offset_days=1)
224 events = build_lineage(new_addr, [c1, c2])
225 renamed = [e for e in events if e.kind == "renamed_from"]
226 assert renamed[0].detail == old_addr
227
228
229 # ---------------------------------------------------------------------------
230 # Moved
231 # ---------------------------------------------------------------------------
232
233
234 class TestMoved:
235 def test_insert_delete_different_file_is_moved(self) -> None:
236 old_addr = "old/billing.py::compute_invoice_total"
237 new_addr = "src/billing.py::compute_invoice_total"
238 c1 = _commit([_insert(old_addr, "cid-body")], offset_days=0)
239 c2 = _commit([_insert(new_addr, "cid-body"), _delete(old_addr, "cid-body")], offset_days=1)
240 events = build_lineage(new_addr, [c1, c2])
241 assert any(e.kind == "moved_from" for e in events)
242
243 def test_moved_from_detail_is_original_address(self) -> None:
244 old_addr = "legacy/module.py::process"
245 new_addr = "src/processing.py::process"
246 c1 = _commit([_insert(old_addr, "cid-body")], offset_days=0)
247 c2 = _commit([_insert(new_addr, "cid-body"), _delete(old_addr, "cid-body")], offset_days=1)
248 events = build_lineage(new_addr, [c1, c2])
249 moved = [e for e in events if e.kind == "moved_from"]
250 assert moved[0].detail == old_addr
251
252
253 # ---------------------------------------------------------------------------
254 # Copied
255 # ---------------------------------------------------------------------------
256
257
258 class TestCopied:
259 def test_insert_matching_live_symbol_is_copied(self) -> None:
260 original = "src/utils.py::helper"
261 copy = "src/other.py::helper"
262 shared = "cid-shared-body"
263 c1 = _commit([_insert(original, shared)], offset_days=0)
264 c2 = _commit([_insert(copy, shared)], offset_days=1)
265 events = build_lineage(copy, [c1, c2])
266 assert any(e.kind == "copied_from" for e in events)
267
268 def test_copied_from_detail_is_source_address(self) -> None:
269 original = "src/utils.py::helper"
270 copy = "src/other.py::helper"
271 shared = "cid-shared"
272 c1 = _commit([_insert(original, shared)], offset_days=0)
273 c2 = _commit([_insert(copy, shared)], offset_days=1)
274 events = build_lineage(copy, [c1, c2])
275 copied = [e for e in events if e.kind == "copied_from"]
276 assert copied[0].detail == original
277
278 def test_no_copy_if_source_is_dead(self) -> None:
279 """Source deleted before target inserted → 'created', not 'copied_from'."""
280 original = "src/utils.py::helper"
281 copy = "src/other.py::helper"
282 shared = "cid-shared"
283 c1 = _commit([_insert(original, shared)], offset_days=0)
284 c2 = _commit([_delete(original, shared)], offset_days=1)
285 c3 = _commit([_insert(copy, shared)], offset_days=2)
286 events = build_lineage(copy, [c1, c2, c3])
287 insert_events = [e for e in events if e.kind in ("created", "copied_from")]
288 assert insert_events[0].kind == "created"
289
290
291 # ---------------------------------------------------------------------------
292 # Complex multi-event sequences
293 # ---------------------------------------------------------------------------
294
295
296 class TestMultiEvent:
297 def test_create_modify_delete_sequence(self) -> None:
298 addr = "src/core.py::process"
299 c1 = _commit([_insert(addr, "cid-v1")], offset_days=0)
300 c2 = _commit([_replace(addr, "cid-v1", "cid-v2")], offset_days=1)
301 c3 = _commit([_delete(addr, "cid-v2")], offset_days=2)
302 events = build_lineage(addr, [c1, c2, c3])
303 assert [e.kind for e in events] == ["created", "modified", "deleted"]
304
305 def test_delete_then_recreate(self) -> None:
306 addr = "src/api.py::endpoint"
307 c1 = _commit([_insert(addr, "cid-v1")], offset_days=0)
308 c2 = _commit([_delete(addr, "cid-v1")], offset_days=1)
309 c3 = _commit([_insert(addr, "cid-v2")], offset_days=2)
310 events = build_lineage(addr, [c1, c2, c3])
311 kinds = [e.kind for e in events]
312 assert kinds == ["created", "deleted", "created"]
313
314 def test_ordered_by_commit_position_in_list(self) -> None:
315 """build_lineage processes commits in list order — caller is responsible for sorting."""
316 addr = "src/main.py::main"
317 # Caller passes in chronological order (oldest-first) as documented.
318 c1 = _commit([_insert(addr, "cid-v1")], offset_days=0)
319 c2 = _commit([_replace(addr, "cid-v1", "cid-v2")], offset_days=2)
320 events = build_lineage(addr, [c1, c2])
321 assert events[0].kind == "created"
322 assert events[1].kind == "modified"
323
324 def test_many_commits_accumulate_all_events(self) -> None:
325 addr = "src/worker.py::run"
326 commits = [_commit([_insert(addr, "cid-0")], offset_days=0)]
327 prev = "cid-0"
328 for i in range(1, 10):
329 nxt = f"cid-{i}"
330 commits.append(_commit([_replace(addr, prev, nxt)], offset_days=i))
331 prev = nxt
332 events = build_lineage(addr, commits)
333 assert len(events) == 10
334 assert events[0].kind == "created"
335 assert all(e.kind == "modified" for e in events[1:])
336
337 def test_commit_message_propagated(self) -> None:
338 addr = "src/main.py::main"
339 c = _commit([_insert(addr, "cid-v1")], message="Initial commit")
340 events = build_lineage(addr, [c])
341 assert events[0].message == "Initial commit"
342
343
344 # ---------------------------------------------------------------------------
345 # to_dict output shape
346 # ---------------------------------------------------------------------------
347
348
349 class TestJsonOutputShape:
350 def test_to_dict_has_expected_keys(self) -> None:
351 addr = "src/main.py::main"
352 c = _commit([_insert(addr, "cid-abc123")], commit_id="a" * 64)
353 events = build_lineage(addr, [c])
354 d = events[0].to_dict()
355 assert "commit_id" in d
356 assert "committed_at" in d
357 assert "event" in d
358 assert "message" in d
359 assert d["event"] == "created"
360
361 def test_to_dict_commit_id_is_full_sha(self) -> None:
362 """commit_id must be the full 64-character SHA — never truncated."""
363 addr = "src/main.py::main"
364 full_cid = "f" * 64
365 c = _commit([_insert(addr, "cid-abc")], commit_id=full_cid)
366 events = build_lineage(addr, [c])
367 assert events[0].to_dict()["commit_id"] == full_cid
368 assert len(events[0].to_dict()["commit_id"]) == 64
369
370
371 # ---------------------------------------------------------------------------
372 # Incremental registry — copy detection across many commits
373 # ---------------------------------------------------------------------------
374
375
376 class TestIncrementalRegistry:
377 def test_registry_tracks_all_live_symbols(self) -> None:
378 """Registry must track symbols in commits that don't touch the target."""
379 shared_cid = "cid-shared"
380 c1 = _commit([_insert("src/a.py::foo", shared_cid)], offset_days=0)
381 c2 = _commit([_insert("src/b.py::foo", shared_cid)], offset_days=1)
382 events = build_lineage("src/b.py::foo", [c1, c2])
383 assert events[0].kind == "copied_from"
384 assert events[0].detail == "src/a.py::foo"
385
386 def test_registry_prunes_deleted_symbols(self) -> None:
387 """After deleting the original, its content_id leaves the live registry."""
388 shared = "cid-shared"
389 original = "src/a.py::foo"
390 target = "src/b.py::foo"
391 c1 = _commit([_insert(original, shared)], offset_days=0)
392 c2 = _commit([_delete(original, shared)], offset_days=1)
393 c3 = _commit([_insert(target, shared)], offset_days=2)
394 events = build_lineage(target, [c1, c2, c3])
395 assert events[0].kind == "created"
396
397 def test_registry_survives_many_intermediate_commits(self) -> None:
398 """Original in commit 1; target copied in commit 12 — registry must persist."""
399 shared = "cid-shared"
400 original = "src/lib.py::util"
401 target = "src/app.py::util"
402 commits = [_commit([_insert(original, shared)], offset_days=0)]
403 for i in range(1, 11):
404 commits.append(_commit([_insert(f"src/other_{i}.py::fn", f"cid-other-{i}")], offset_days=i))
405 commits.append(_commit([_insert(target, shared)], offset_days=11))
406 events = build_lineage(target, commits)
407 assert events[0].kind == "copied_from"
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 28 days ago