gabriel / musehub public
test_symbols_v2_p1_coupling_count.py python
292 lines 10.6 KB
Raw
sha256:94ef169c149a452bff7c604ded8b280b19bd477c2dabcb56972780b0b784c7aa Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: As… Human 1 day ago
1 """TDD spec — Phase 1: coupling_count column on MusehubSymbolVitals.
2
3 Problem
4 ───────
5 The symbol list page needs a per-symbol coupling score (how many unique
6 symbols this one co-changes with) without running a COUNT(*) on
7 musehub_symbol_coupling at request time.
8
9 Solution
10 ────────
11 Add ``coupling_count INT DEFAULT 0`` to ``musehub_symbol_vitals``.
12 After ``_upsert_symbol_coupling`` runs, update vitals with the count derived
13 from the coupling table (SELECT COUNT(*) WHERE repo_id=? AND address=?).
14
15 This keeps the symbol list query a single LEFT JOIN — no sub-selects,
16 no aggregations at request time.
17
18 Tier breakdown
19 ──────────────
20 V101 Schema — coupling_count column exists on musehub_symbol_vitals
21 V102 Schema — default value is 0, not nullable
22 V103 Indexer — coupling_count populated after build_symbol_index
23 V104 Indexer — coupling_count accurate: matches distinct co_address count
24 V105 Indexer — coupling_count idempotent: re-running build_symbol_index gives same result
25 V106 Indexer — symbol with no coupling partners has coupling_count = 0
26 V107 Schema — cascade delete removes vitals row (coupling_count included)
27 """
28 from __future__ import annotations
29
30 import secrets
31 from datetime import datetime, timezone
32
33 import pytest
34 from sqlalchemy import select, text
35 from sqlalchemy.ext.asyncio import AsyncSession
36
37 from musehub.db.musehub_intel_models import MusehubSymbolCoupling, MusehubSymbolVitals
38 from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef
39 from muse.core.types import blob_id, long_id
40 from musehub.services.musehub_symbol_indexer import build_symbol_index
41 from tests.factories import create_repo
42
43
44 # ---------------------------------------------------------------------------
45 # Helpers
46 # ---------------------------------------------------------------------------
47
48 def _now() -> datetime:
49 return datetime.now(tz=timezone.utc)
50
51
52 def _cid() -> str:
53 return blob_id(secrets.token_bytes(32))
54
55
56 def _lid() -> str:
57 return long_id(secrets.token_hex(32))
58
59
60 async def _make_commit(
61 session: AsyncSession,
62 repo_id: str,
63 addresses: list[str],
64 *,
65 parent_id: str | None = None,
66 branch: str = "dev",
67 message: str = "feat: test",
68 op: str = "insert",
69 ) -> MusehubCommit:
70 """Create a MusehubCommit with structured_delta so the indexer processes it."""
71 cid = _lid()
72 commit = MusehubCommit(
73 commit_id=cid,
74 branch=branch,
75 message=message,
76 author="gabriel",
77 parent_ids=[parent_id] if parent_id else [],
78 timestamp=_now(),
79 structured_delta={"ops": [
80 {"address": addr, "op": op, "new_content_id": _cid()}
81 for addr in addresses
82 ]},
83 )
84 session.add(commit)
85 session.add(MusehubCommitRef(repo_id=repo_id, commit_id=cid))
86 await session.flush()
87 return commit
88
89
90 async def _push_index(session: AsyncSession, repo_id: str, head_commit_id: str) -> None:
91 """Run build_symbol_index + backfill_coupling as the background job does at push time."""
92 from musehub.services.musehub_symbol_indexer import backfill_coupling
93 await build_symbol_index(session, repo_id, head_commit_id)
94 await backfill_coupling(session, repo_id, min_shared=1)
95 await session.flush()
96
97
98 # ---------------------------------------------------------------------------
99 # V101 — coupling_count column exists on musehub_symbol_vitals
100 # ---------------------------------------------------------------------------
101
102 @pytest.mark.asyncio
103 async def test_v101_coupling_count_column_exists(db_session: AsyncSession) -> None:
104 """musehub_symbol_vitals must have a coupling_count column."""
105 result = await db_session.execute(
106 text(
107 "SELECT column_name FROM information_schema.columns "
108 "WHERE table_name = 'musehub_symbol_vitals' AND column_name = 'coupling_count'"
109 )
110 )
111 assert result.fetchone() is not None, "coupling_count column not found on musehub_symbol_vitals"
112
113
114 # ---------------------------------------------------------------------------
115 # V102 — default is 0, not nullable
116 # ---------------------------------------------------------------------------
117
118 @pytest.mark.asyncio
119 async def test_v102_coupling_count_default_zero_not_nullable(db_session: AsyncSession) -> None:
120 """coupling_count must default to 0 and be NOT NULL."""
121 result = await db_session.execute(
122 text(
123 "SELECT is_nullable, column_default "
124 "FROM information_schema.columns "
125 "WHERE table_name = 'musehub_symbol_vitals' AND column_name = 'coupling_count'"
126 )
127 )
128 row = result.fetchone()
129 assert row is not None
130 is_nullable, column_default = row
131 assert is_nullable == "NO", "coupling_count must be NOT NULL"
132 assert column_default is not None and "0" in str(column_default), \
133 f"coupling_count must default to 0, got: {column_default}"
134
135
136 # ---------------------------------------------------------------------------
137 # V103 — coupling_count populated after build_symbol_index
138 # ---------------------------------------------------------------------------
139
140 @pytest.mark.asyncio
141 async def test_v103_coupling_count_populated_after_index(db_session: AsyncSession) -> None:
142 """After build_symbol_index, symbols with coupling partners have coupling_count > 0."""
143 repo = await create_repo(db_session)
144 repo_id = repo.repo_id
145
146 # Two symbols changed in the same commit → they are coupled
147 commit = await _make_commit(
148 db_session, repo_id,
149 ["src/foo.py::alpha", "src/foo.py::beta"],
150 )
151 await _push_index(db_session, repo_id, commit.commit_id)
152
153 vitals_alpha = (await db_session.execute(
154 select(MusehubSymbolVitals).where(
155 MusehubSymbolVitals.repo_id == repo_id,
156 MusehubSymbolVitals.address == "src/foo.py::alpha",
157 )
158 )).scalar_one_or_none()
159
160 assert vitals_alpha is not None
161 assert vitals_alpha.coupling_count == 1, \
162 f"alpha coupled to beta → coupling_count should be 1, got {vitals_alpha.coupling_count}"
163
164
165 # ---------------------------------------------------------------------------
166 # V104 — coupling_count accurate: matches distinct co_address count
167 # ---------------------------------------------------------------------------
168
169 @pytest.mark.asyncio
170 async def test_v104_coupling_count_matches_distinct_co_address_count(db_session: AsyncSession) -> None:
171 """coupling_count must equal the number of distinct partners in musehub_symbol_coupling."""
172 repo = await create_repo(db_session)
173 repo_id = repo.repo_id
174
175 # Commit 1: alpha + beta + gamma change together
176 c1 = await _make_commit(
177 db_session, repo_id,
178 ["src/a.py::alpha", "src/a.py::beta", "src/a.py::gamma"],
179 message="feat: first",
180 )
181 # Commit 2: alpha + delta change together (another partner for alpha)
182 c2 = await _make_commit(
183 db_session, repo_id,
184 ["src/a.py::alpha", "src/a.py::delta"],
185 parent_id=c1.commit_id,
186 message="feat: second",
187 op="replace",
188 )
189 await _push_index(db_session, repo_id, c2.commit_id)
190
191 # alpha is coupled to beta, gamma, delta → coupling_count = 3
192 vitals = (await db_session.execute(
193 select(MusehubSymbolVitals).where(
194 MusehubSymbolVitals.repo_id == repo_id,
195 MusehubSymbolVitals.address == "src/a.py::alpha",
196 )
197 )).scalar_one()
198
199 coupling_rows = (await db_session.execute(
200 select(MusehubSymbolCoupling).where(
201 MusehubSymbolCoupling.repo_id == repo_id,
202 MusehubSymbolCoupling.address == "src/a.py::alpha",
203 )
204 )).scalars().all()
205
206 assert vitals.coupling_count == len(coupling_rows), (
207 f"coupling_count {vitals.coupling_count} != distinct coupling rows {len(coupling_rows)}"
208 )
209 assert vitals.coupling_count == 3
210
211
212 # ---------------------------------------------------------------------------
213 # V105 — idempotent: re-running build_symbol_index gives same result
214 # ---------------------------------------------------------------------------
215
216 @pytest.mark.asyncio
217 async def test_v105_coupling_count_idempotent(db_session: AsyncSession) -> None:
218 """Running build_symbol_index twice produces the same coupling_count."""
219 repo = await create_repo(db_session)
220 repo_id = repo.repo_id
221
222 c1 = await _make_commit(db_session, repo_id, ["src/b.py::x", "src/b.py::y"])
223
224 await _push_index(db_session, repo_id, c1.commit_id)
225 await _push_index(db_session, repo_id, c1.commit_id)
226
227 vitals = (await db_session.execute(
228 select(MusehubSymbolVitals).where(
229 MusehubSymbolVitals.repo_id == repo_id,
230 MusehubSymbolVitals.address == "src/b.py::x",
231 )
232 )).scalar_one()
233
234 assert vitals.coupling_count == 1
235
236
237 # ---------------------------------------------------------------------------
238 # V106 — isolated symbol has coupling_count = 0
239 # ---------------------------------------------------------------------------
240
241 @pytest.mark.asyncio
242 async def test_v106_isolated_symbol_has_zero_coupling_count(db_session: AsyncSession) -> None:
243 """A symbol that never changes with others must have coupling_count = 0."""
244 repo = await create_repo(db_session)
245 repo_id = repo.repo_id
246
247 # Single symbol in the commit → no coupling partners
248 c1 = await _make_commit(db_session, repo_id, ["src/solo.py::lone_wolf"])
249 await _push_index(db_session, repo_id, c1.commit_id)
250
251 vitals = (await db_session.execute(
252 select(MusehubSymbolVitals).where(
253 MusehubSymbolVitals.repo_id == repo_id,
254 MusehubSymbolVitals.address == "src/solo.py::lone_wolf",
255 )
256 )).scalar_one()
257
258 assert vitals.coupling_count == 0
259
260
261 # ---------------------------------------------------------------------------
262 # V107 — cascade delete removes vitals row (coupling_count included)
263 # ---------------------------------------------------------------------------
264
265 @pytest.mark.asyncio
266 async def test_v107_cascade_delete_removes_vitals(db_session: AsyncSession) -> None:
267 """Deleting the repo must cascade-delete all musehub_symbol_vitals rows."""
268 repo = await create_repo(db_session)
269 repo_id = repo.repo_id
270
271 row = MusehubSymbolVitals(
272 repo_id=repo_id,
273 address="src/c.py::fn",
274 first_introduced=_now(),
275 change_count=1,
276 version_count=1,
277 op_add=1,
278 op_modify=0,
279 op_delete=0,
280 op_move=0,
281 coupling_count=3,
282 )
283 db_session.add(row)
284 await db_session.flush()
285
286 await db_session.delete(repo)
287 await db_session.flush()
288
289 remaining = (await db_session.execute(
290 select(MusehubSymbolVitals).where(MusehubSymbolVitals.repo_id == repo_id)
291 )).scalars().all()
292 assert remaining == []
File History 3 commits
sha256:94ef169c149a452bff7c604ded8b280b19bd477c2dabcb56972780b0b784c7aa Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: As… Human 1 day ago
sha256:6b1949fc2797ca4c1936a637a4cbfec828ef56cf52398a2e74ca3c4f494e728f fix: use wire_bytes not mpack_bytes_raw in compute_object_b… Sonnet 4.6 patch 10 days ago
sha256:4aed3d8601c8dd3ed37074de35f11f4a9699a0a4b99d43727048fd3f8e6fd13d chore: doc sweep, ignore wrangler build state, misc fixes Sonnet 4.6 minor 12 days ago