gabriel / musehub public

test_musehub_proposals_touched_symbols.py file-level

at sha256:3 · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 💥 blast risk
sha256:0 fix: fall back to any indexed mpack in read_object_bytes when push mpac… · gabriel · Jun 17, 2026
1 """Tests for Signal 2: symbol anchor overlap via touched_symbols on proposals.
2
3 Covers:
4 - _symbols_from_delta extracts correct symbol addresses
5 - touched_symbols populated at create_proposal time from existing branch commits
6 - touched_symbols refreshed at merge_proposal time
7 - find_proposals_by_symbol_overlap returns match when anchors intersect
8 - find_proposals_by_symbol_overlap returns empty when no intersection
9 - empty symbol_anchors returns empty list immediately
10 - cross-repo isolation
11 """
12 from __future__ import annotations
13
14 import secrets
15 from datetime import datetime, timezone
16 from typing import TypedDict
17
18 import pytest
19 from sqlalchemy.ext.asyncio import AsyncSession
20
21 from muse.core.types import fake_id, now_utc_iso
22 from musehub.core.genesis import compute_branch_id, compute_identity_id, compute_proposal_id, compute_repo_id
23 from musehub.db.musehub_repo_models import MusehubBranch, MusehubCommit, MusehubCommitRef, MusehubRepo
24 from musehub.db.musehub_social_models import MusehubProposal
25 from musehub.services import musehub_issues
26 from musehub.services.musehub_proposals import (
27 _symbols_from_delta,
28 _touched_symbols_for_branch,
29 create_proposal,
30 merge_proposal,
31 )
32
33
34 # ---------------------------------------------------------------------------
35 # Helpers
36 # ---------------------------------------------------------------------------
37
38
39 class _ChildOp(TypedDict):
40 op: str
41 address: str
42 content_summary: str
43
44
45 class _DeltaOp(TypedDict):
46 address: str
47 child_ops: list[_ChildOp]
48
49
50 class _Delta(TypedDict):
51 ops: list[_DeltaOp]
52
53
54 def _uid() -> str:
55 return secrets.token_hex(16)
56
57
58 def _commit_id() -> str:
59 return fake_id(_uid())
60
61
62 def _delta(*symbol_addresses: str) -> _Delta:
63 """Build a minimal structured_delta containing the given symbol addresses."""
64 return _Delta(
65 ops=[
66 _DeltaOp(
67 address=addr.split("::")[0],
68 child_ops=[_ChildOp(op="update", address=addr, content_summary="function")],
69 )
70 for addr in symbol_addresses
71 ]
72 )
73
74
75 async def _make_repo(db: AsyncSession, slug: str = "sym-test") -> str:
76 created_at = datetime.now(tz=timezone.utc)
77 owner_id = compute_identity_id(b"testuser")
78 repo_id = compute_repo_id(owner_id, slug, "code", created_at.isoformat())
79 repo = MusehubRepo(
80 repo_id=repo_id,
81 name=slug,
82 owner="testuser",
83 slug=slug,
84 visibility="public",
85 owner_user_id=owner_id,
86 created_at=created_at,
87 updated_at=created_at,
88 )
89 db.add(repo)
90 await db.commit()
91 await db.refresh(repo)
92 return str(repo.repo_id)
93
94
95 async def _make_commit(
96 db: AsyncSession,
97 repo_id: str,
98 *,
99 branch: str,
100 symbol_addresses: list[str] | None = None,
101 commit_id: str | None = None,
102 ) -> str:
103 """Seed a commit with an optional structured_delta and return its commit_id."""
104 cid = commit_id or _commit_id()
105 row = MusehubCommit(
106 commit_id=cid,
107 branch=branch,
108 parent_ids=[],
109 message="test commit",
110 author="tester",
111 timestamp=datetime.now(timezone.utc),
112 structured_delta=_delta(*symbol_addresses) if symbol_addresses else None,
113 )
114 db.add(row)
115 db.add(MusehubCommitRef(repo_id=repo_id, commit_id=cid))
116 await db.flush()
117 return cid
118
119
120 async def _make_branch(
121 db: AsyncSession, repo_id: str, name: str, head_commit_id: str | None = None
122 ) -> None:
123 """Seed a branch record."""
124 branch = MusehubBranch(
125 branch_id=compute_branch_id(repo_id, name),
126 repo_id=repo_id,
127 name=name,
128 head_commit_id=head_commit_id,
129 )
130 db.add(branch)
131 await db.flush()
132
133
134 # ---------------------------------------------------------------------------
135 # Unit tests for _symbols_from_delta
136 # ---------------------------------------------------------------------------
137
138
139 def test_symbols_from_delta_extracts_symbol_addresses() -> None:
140 delta = _delta(
141 "musehub/services/musehub_issues.py::create_issue",
142 "musehub/services/musehub_issues.py::get_issue",
143 )
144 result = _symbols_from_delta(delta)
145 assert "musehub/services/musehub_issues.py::create_issue" in result
146 assert "musehub/services/musehub_issues.py::get_issue" in result
147 assert len(result) == 2
148
149
150 def test_symbols_from_delta_skips_file_level_ops() -> None:
151 """File-level ops without '::' in address must not appear in result."""
152 delta = {
153 "ops": [
154 {
155 "address": "musehub/services/musehub_issues.py",
156 "child_ops": [],
157 }
158 ]
159 }
160 result = _symbols_from_delta(delta)
161 assert result == []
162
163
164 def test_symbols_from_delta_handles_non_dict() -> None:
165 assert _symbols_from_delta(None) == []
166 assert _symbols_from_delta("bad") == []
167 assert _symbols_from_delta({}) == []
168
169
170 def test_symbols_from_delta_deduplicates() -> None:
171 delta = _delta(
172 "musehub/services/foo.py::bar",
173 "musehub/services/foo.py::bar",
174 )
175 result = _symbols_from_delta(delta)
176 assert result.count("musehub/services/foo.py::bar") == 1
177
178
179 # ---------------------------------------------------------------------------
180 # Integration tests: touched_symbols populated at create / merge
181 # ---------------------------------------------------------------------------
182
183
184 async def test_touched_symbols_for_branch_extracts_from_commits(
185 db_session: AsyncSession,
186 ) -> None:
187 repo_id = await _make_repo(db_session, "ts-branch-extract")
188 await _make_commit(
189 db_session, repo_id,
190 branch="feat/s2",
191 symbol_addresses=["a/b.py::foo", "a/b.py::bar"],
192 )
193 await _make_commit(
194 db_session, repo_id,
195 branch="feat/s2",
196 symbol_addresses=["a/c.py::baz"],
197 )
198 await db_session.commit()
199
200 result = await _touched_symbols_for_branch(db_session, repo_id, "feat/s2")
201 assert "a/b.py::foo" in result
202 assert "a/b.py::bar" in result
203 assert "a/c.py::baz" in result
204 assert len(result) == 3
205
206
207 async def test_create_proposal_populates_touched_symbols(
208 db_session: AsyncSession,
209 ) -> None:
210 repo_id = await _make_repo(db_session, "ts-create")
211 head_cid = await _make_commit(
212 db_session, repo_id,
213 branch="feat/create-signal",
214 symbol_addresses=["musehub/services/x.py::MyFunc"],
215 )
216 await _make_branch(db_session, repo_id, "feat/create-signal", head_cid)
217 await _make_branch(db_session, repo_id, "main", head_cid)
218 await db_session.commit()
219
220 proposal = await create_proposal(
221 db_session,
222 repo_id=repo_id,
223 title="Test proposal",
224 from_branch="feat/create-signal",
225 to_branch="main",
226 )
227 await db_session.commit()
228
229 # Fetch the raw ORM row to verify the column was written.
230 from sqlalchemy import select as _select
231 row = (await db_session.execute(
232 _select(MusehubProposal).where(MusehubProposal.proposal_id == proposal.proposal_id)
233 )).scalar_one()
234 assert "musehub/services/x.py::MyFunc" in (row.touched_symbols or [])
235
236
237 async def test_merge_proposal_refreshes_touched_symbols(
238 db_session: AsyncSession,
239 ) -> None:
240 """touched_symbols at merge time includes any new commits added after create."""
241 repo_id = await _make_repo(db_session, "ts-merge")
242 initial_cid = await _make_commit(
243 db_session, repo_id,
244 branch="feat/refresh",
245 symbol_addresses=["svc/old.py::OldFunc"],
246 )
247 await _make_branch(db_session, repo_id, "feat/refresh", initial_cid)
248 to_cid = await _make_commit(db_session, repo_id, branch="main")
249 await _make_branch(db_session, repo_id, "main", to_cid)
250 await db_session.commit()
251
252 proposal = await create_proposal(
253 db_session,
254 repo_id=repo_id,
255 title="Refresh test",
256 from_branch="feat/refresh",
257 to_branch="main",
258 )
259 await db_session.commit()
260
261 # Push a new commit to the feature branch after proposal creation.
262 new_cid = await _make_commit(
263 db_session, repo_id,
264 branch="feat/refresh",
265 symbol_addresses=["svc/new.py::NewFunc"],
266 )
267 # Update the branch head.
268 from sqlalchemy import select as _select
269 branch_row = (await db_session.execute(
270 _select(MusehubBranch).where(
271 MusehubBranch.repo_id == repo_id, MusehubBranch.name == "feat/refresh"
272 )
273 )).scalar_one()
274 branch_row.head_commit_id = new_cid
275 await db_session.flush()
276 await db_session.commit()
277
278 await merge_proposal(db_session, repo_id, proposal.proposal_id)
279 await db_session.commit()
280
281 row = (await db_session.execute(
282 _select(MusehubProposal).where(MusehubProposal.proposal_id == proposal.proposal_id)
283 )).scalar_one()
284 touched = row.touched_symbols or []
285 assert "svc/old.py::OldFunc" in touched
286 assert "svc/new.py::NewFunc" in touched
287
288
289 # ---------------------------------------------------------------------------
290 # Integration tests: find_proposals_by_symbol_overlap
291 # ---------------------------------------------------------------------------
292
293
294 async def test_symbol_overlap_returns_match(db_session: AsyncSession) -> None:
295 repo_id = await _make_repo(db_session, "overlap-match")
296
297 # Manually seed a proposal with a known touched_symbols.
298 author_id = compute_identity_id(b"tester")
299 pid = compute_proposal_id(repo_id, author_id, "feat/fix", "main", now_utc_iso())
300 row = MusehubProposal(
301 proposal_id=pid,
302 repo_id=repo_id,
303 proposal_number=1,
304 title="Fix create_issue bug",
305 body="",
306 state="merged",
307 from_branch="feat/fix",
308 to_branch="main",
309 author="tester",
310 touched_symbols=["musehub/services/musehub_issues.py::create_issue"],
311 )
312 db_session.add(row)
313 await db_session.commit()
314
315 results = await musehub_issues.find_proposals_by_symbol_overlap(
316 db_session, repo_id,
317 ["musehub/services/musehub_issues.py::create_issue"],
318 )
319 assert len(results) == 1
320 assert results[0]["proposal_id"] == pid
321 assert results[0]["state"] == "merged"
322 assert results[0]["match_reason"] == "symbol_overlap"
323
324
325 async def test_symbol_overlap_no_match(db_session: AsyncSession) -> None:
326 repo_id = await _make_repo(db_session, "overlap-no-match")
327
328 author_id = compute_identity_id(b"tester")
329 pid = compute_proposal_id(repo_id, author_id, "feat/unrelated", "main", now_utc_iso())
330 row = MusehubProposal(
331 proposal_id=pid,
332 repo_id=repo_id,
333 proposal_number=1,
334 title="Unrelated proposal",
335 body="",
336 state="merged",
337 from_branch="feat/unrelated",
338 to_branch="main",
339 author="tester",
340 touched_symbols=["musehub/services/other.py::some_fn"],
341 )
342 db_session.add(row)
343 await db_session.commit()
344
345 results = await musehub_issues.find_proposals_by_symbol_overlap(
346 db_session, repo_id,
347 ["musehub/services/musehub_issues.py::create_issue"],
348 )
349 assert results == []
350
351
352 async def test_symbol_overlap_empty_anchors_returns_empty(
353 db_session: AsyncSession,
354 ) -> None:
355 repo_id = await _make_repo(db_session, "overlap-empty")
356 await db_session.commit()
357
358 results = await musehub_issues.find_proposals_by_symbol_overlap(
359 db_session, repo_id, []
360 )
361 assert results == []
362
363
364 async def test_symbol_overlap_cross_repo_isolation(db_session: AsyncSession) -> None:
365 repo_a = await _make_repo(db_session, "overlap-repo-a")
366 repo_b = await _make_repo(db_session, "overlap-repo-b")
367
368 author_id = compute_identity_id(b"tester")
369 pid = compute_proposal_id(repo_a, author_id, "feat/a", "main", now_utc_iso())
370 row = MusehubProposal(
371 proposal_id=pid,
372 repo_id=repo_a,
373 proposal_number=1,
374 title="Proposal in repo A",
375 body="",
376 state="merged",
377 from_branch="feat/a",
378 to_branch="main",
379 author="tester",
380 touched_symbols=["musehub/services/musehub_issues.py::create_issue"],
381 )
382 db_session.add(row)
383 await db_session.commit()
384
385 # Query against repo_b — must return nothing.
386 results = await musehub_issues.find_proposals_by_symbol_overlap(
387 db_session, repo_b,
388 ["musehub/services/musehub_issues.py::create_issue"],
389 )
390 assert results == []
391
392
393 async def test_symbol_overlap_open_proposal_matched(db_session: AsyncSession) -> None:
394 """Open proposals with matching touched_symbols are returned."""
395 repo_id = await _make_repo(db_session, "overlap-open")
396
397 author_id = compute_identity_id(b"tester")
398 pid = compute_proposal_id(repo_id, author_id, "feat/in-progress", "main", now_utc_iso())
399 row = MusehubProposal(
400 proposal_id=pid,
401 repo_id=repo_id,
402 proposal_number=1,
403 title="In-progress fix",
404 body="",
405 state="open",
406 from_branch="feat/in-progress",
407 to_branch="main",
408 author="tester",
409 touched_symbols=["musehub/api/routes/musehub/ui_issues.py::issue_detail_page"],
410 )
411 db_session.add(row)
412 await db_session.commit()
413
414 results = await musehub_issues.find_proposals_by_symbol_overlap(
415 db_session, repo_id,
416 ["musehub/api/routes/musehub/ui_issues.py::issue_detail_page"],
417 )
418 assert len(results) == 1
419 assert results[0]["proposal_id"] == pid
420 assert results[0]["state"] == "open"