gabriel / musehub public
test_symbols_v2_p4_intel_fields.py python
403 lines 15.0 KB
Raw
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2 feat: add repair-commit wire endpoint (API parity with repa… Opus 4.8 minor ⚠ breaking 1 day ago
1 """TDD spec — Phase 4: populate op and last_commit_id on musehub_symbol_intel.
2
3 Problem
4 ───────
5 musehub_symbol_intel.op and last_commit_id are always NULL.
6 The columns exist on the model and are read by the symbol list route,
7 but _compute_symbol_intel never computes them and _upsert_symbol_intel
8 never writes them.
9
10 Solution
11 ────────
12 1. Add last_op and last_commit_id to SymbolIntel TypedDict.
13 2. _compute_symbol_intel: track them alongside last_author (same loop,
14 same "is this the newest ts?" guard).
15 3. _upsert_symbol_intel: include op and last_commit_id in the row dict
16 and in the on_conflict set_{} update.
17 4. backfill_intel_fields(session, repo_id): one UPDATE...FROM that reads
18 the most-recent history entry per (repo_id, address) and writes op +
19 last_commit_id into intel rows — so existing records are fixed without
20 a full re-index.
21
22 Tier breakdown
23 ──────────────
24 I401 _compute_symbol_intel returns last_op per symbol
25 I402 _compute_symbol_intel returns last_commit_id per symbol
26 I403 last_op reflects the most recent commit's op, not the first
27 I404 _upsert_symbol_intel writes op to musehub_symbol_intel
28 I405 _upsert_symbol_intel writes last_commit_id to musehub_symbol_intel
29 I406 build_symbol_index populates op end-to-end
30 I407 build_symbol_index populates last_commit_id end-to-end
31 I408 backfill_intel_fields fixes NULL op from existing history entries
32 I409 backfill_intel_fields fixes NULL last_commit_id from existing history
33 I410 backfill_intel_fields is idempotent (safe to run twice)
34 """
35 from __future__ import annotations
36
37 import secrets
38 from datetime import datetime, timezone, timedelta
39
40 import pytest
41 from sqlalchemy import select
42 from sqlalchemy.ext.asyncio import AsyncSession
43
44 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry, MusehubSymbolIntel
45 from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef
46 from muse.core.types import blob_id, long_id
47 from tests.factories import create_repo
48
49
50 # ---------------------------------------------------------------------------
51 # Helpers
52 # ---------------------------------------------------------------------------
53
54 def _now() -> datetime:
55 return datetime.now(tz=timezone.utc)
56
57
58 def _cid() -> str:
59 return blob_id(secrets.token_bytes(32))
60
61
62 def _lid() -> str:
63 return long_id(secrets.token_hex(32))
64
65
66 async def _make_commit(
67 session: AsyncSession,
68 repo_id: str,
69 addresses: list[str],
70 *,
71 parent_id: str | None = None,
72 branch: str = "dev",
73 message: str = "feat: test",
74 op: str = "insert",
75 ts: datetime | None = None,
76 ) -> MusehubCommit:
77 commit_id = _lid()
78 committed_at = ts or _now()
79 commit = MusehubCommit(
80 commit_id=commit_id,
81 message=message,
82 author="gabriel",
83 branch=branch,
84 timestamp=committed_at,
85 parent_ids=[parent_id] if parent_id else [],
86 structured_delta={
87 "ops": [
88 {"address": addr, "op": op, "new_content_id": _cid()}
89 for addr in addresses
90 ]
91 },
92 )
93 session.add(commit)
94 session.add(MusehubCommitRef(repo_id=repo_id, commit_id=commit_id))
95 await session.flush()
96 return commit
97
98
99 # ---------------------------------------------------------------------------
100 # I401 — _compute_symbol_intel returns last_op per symbol
101 # ---------------------------------------------------------------------------
102
103 @pytest.mark.asyncio
104 async def test_i401_compute_returns_last_op() -> None:
105 """_compute_symbol_intel must include last_op on every symbol entry."""
106 from musehub.services.musehub_symbol_indexer import _compute_symbol_intel
107
108 history = {
109 "src/a.py::fn": [
110 {"commit_id": _lid(), "committed_at": _now().isoformat(),
111 "author": "gabriel", "op": "insert", "op_payload": {}, "content_id": _cid()},
112 ]
113 }
114 result = _compute_symbol_intel(history)
115 assert "src/a.py::fn" in result
116 assert "last_op" in result["src/a.py::fn"]
117 assert result["src/a.py::fn"]["last_op"] == "insert"
118
119
120 # ---------------------------------------------------------------------------
121 # I402 — _compute_symbol_intel returns last_commit_id per symbol
122 # ---------------------------------------------------------------------------
123
124 @pytest.mark.asyncio
125 async def test_i402_compute_returns_last_commit_id() -> None:
126 """_compute_symbol_intel must include last_commit_id on every symbol entry."""
127 from musehub.services.musehub_symbol_indexer import _compute_symbol_intel
128
129 commit_id = _lid()
130 history = {
131 "src/b.py::fn": [
132 {"commit_id": commit_id, "committed_at": _now().isoformat(),
133 "author": "gabriel", "op": "replace", "op_payload": {}, "content_id": _cid()},
134 ]
135 }
136 result = _compute_symbol_intel(history)
137 assert result["src/b.py::fn"]["last_commit_id"] == commit_id
138
139
140 # ---------------------------------------------------------------------------
141 # I403 — last_op reflects the MOST RECENT commit, not the first
142 # ---------------------------------------------------------------------------
143
144 @pytest.mark.asyncio
145 async def test_i403_last_op_is_most_recent() -> None:
146 """When a symbol has multiple history entries, last_op must be from the newest."""
147 from musehub.services.musehub_symbol_indexer import _compute_symbol_intel
148
149 older = (_now() - timedelta(days=10)).isoformat()
150 newer = _now().isoformat()
151 cid_new = _lid()
152
153 history = {
154 "src/c.py::fn": [
155 {"commit_id": _lid(), "committed_at": older,
156 "author": "gabriel", "op": "insert", "op_payload": {}, "content_id": _cid()},
157 {"commit_id": cid_new, "committed_at": newer,
158 "author": "gabriel", "op": "replace", "op_payload": {}, "content_id": _cid()},
159 ]
160 }
161 result = _compute_symbol_intel(history)
162 assert result["src/c.py::fn"]["last_op"] == "replace"
163 assert result["src/c.py::fn"]["last_commit_id"] == cid_new
164
165
166 # ---------------------------------------------------------------------------
167 # I404 — _upsert_symbol_intel writes op to the DB
168 # ---------------------------------------------------------------------------
169
170 @pytest.mark.asyncio
171 async def test_i404_upsert_writes_op(db_session: AsyncSession) -> None:
172 """After _upsert_symbol_intel, musehub_symbol_intel.op must be set."""
173 from musehub.services.musehub_symbol_indexer import _upsert_symbol_intel
174
175 repo = await create_repo(db_session, owner="gabriel")
176 intel = {
177 "src/d.py::fn": {
178 "churn": 2, "churn_30d": 1, "churn_90d": 2,
179 "blast": 0, "blast_direct": 0, "blast_cross": 0, "blast_top": [],
180 "last_changed": _now().isoformat(), "last_author": "gabriel",
181 "author_count": 1, "gravity": 0.1, "weekly": [0] * 12,
182 "last_op": "replace",
183 "last_commit_id": _lid(),
184 }
185 }
186 await _upsert_symbol_intel(db_session, repo.repo_id, intel)
187 await db_session.flush()
188
189 row = (await db_session.execute(
190 select(MusehubSymbolIntel).where(
191 MusehubSymbolIntel.repo_id == repo.repo_id,
192 MusehubSymbolIntel.address == "src/d.py::fn",
193 )
194 )).scalar_one()
195 assert row.op == "replace"
196
197
198 # ---------------------------------------------------------------------------
199 # I405 — _upsert_symbol_intel writes last_commit_id to the DB
200 # ---------------------------------------------------------------------------
201
202 @pytest.mark.asyncio
203 async def test_i405_upsert_writes_last_commit_id(db_session: AsyncSession) -> None:
204 """After _upsert_symbol_intel, musehub_symbol_intel.last_commit_id must be set."""
205 from musehub.services.musehub_symbol_indexer import _upsert_symbol_intel
206
207 repo = await create_repo(db_session, owner="gabriel")
208 commit_id = _lid()
209 intel = {
210 "src/e.py::fn": {
211 "churn": 1, "churn_30d": 1, "churn_90d": 1,
212 "blast": 0, "blast_direct": 0, "blast_cross": 0, "blast_top": [],
213 "last_changed": _now().isoformat(), "last_author": "gabriel",
214 "author_count": 1, "gravity": 0.0, "weekly": [0] * 12,
215 "last_op": "insert",
216 "last_commit_id": commit_id,
217 }
218 }
219 await _upsert_symbol_intel(db_session, repo.repo_id, intel)
220 await db_session.flush()
221
222 row = (await db_session.execute(
223 select(MusehubSymbolIntel).where(
224 MusehubSymbolIntel.repo_id == repo.repo_id,
225 MusehubSymbolIntel.address == "src/e.py::fn",
226 )
227 )).scalar_one()
228 assert row.last_commit_id == commit_id
229
230
231 # ---------------------------------------------------------------------------
232 # I406 — build_symbol_index populates op end-to-end
233 # ---------------------------------------------------------------------------
234
235 @pytest.mark.asyncio
236 async def test_i406_build_index_populates_op(db_session: AsyncSession) -> None:
237 """After build_symbol_index, musehub_symbol_intel.op must not be NULL."""
238 from musehub.services.musehub_symbol_indexer import build_symbol_index
239
240 repo = await create_repo(db_session, owner="gabriel")
241 commit = await _make_commit(
242 db_session, repo.repo_id, ["src/f.py::my_fn"], op="insert"
243 )
244 await db_session.flush()
245
246 await build_symbol_index(db_session, repo.repo_id, commit.commit_id)
247 await db_session.flush()
248
249 row = (await db_session.execute(
250 select(MusehubSymbolIntel).where(
251 MusehubSymbolIntel.repo_id == repo.repo_id,
252 MusehubSymbolIntel.address == "src/f.py::my_fn",
253 )
254 )).scalar_one()
255 assert row.op is not None
256 assert row.op == "insert"
257
258
259 # ---------------------------------------------------------------------------
260 # I407 — build_symbol_index populates last_commit_id end-to-end
261 # ---------------------------------------------------------------------------
262
263 @pytest.mark.asyncio
264 async def test_i407_build_index_populates_last_commit_id(db_session: AsyncSession) -> None:
265 """After build_symbol_index, musehub_symbol_intel.last_commit_id must not be NULL."""
266 from musehub.services.musehub_symbol_indexer import build_symbol_index
267
268 repo = await create_repo(db_session, owner="gabriel")
269 commit = await _make_commit(
270 db_session, repo.repo_id, ["src/g.py::helper"], op="insert"
271 )
272 await db_session.flush()
273
274 await build_symbol_index(db_session, repo.repo_id, commit.commit_id)
275 await db_session.flush()
276
277 row = (await db_session.execute(
278 select(MusehubSymbolIntel).where(
279 MusehubSymbolIntel.repo_id == repo.repo_id,
280 MusehubSymbolIntel.address == "src/g.py::helper",
281 )
282 )).scalar_one()
283 assert row.last_commit_id is not None
284 assert row.last_commit_id == commit.commit_id
285
286
287 # ---------------------------------------------------------------------------
288 # I408 — backfill_intel_fields fixes NULL op from existing history entries
289 # ---------------------------------------------------------------------------
290
291 @pytest.mark.asyncio
292 async def test_i408_backfill_fixes_null_op(db_session: AsyncSession) -> None:
293 """backfill_intel_fields must set op from the most recent history entry."""
294 from musehub.services.musehub_symbol_indexer import backfill_intel_fields
295
296 repo = await create_repo(db_session, owner="gabriel")
297 commit_id = _lid()
298
299 # Insert intel row with NULL op
300 db_session.add(MusehubSymbolIntel(
301 repo_id=repo.repo_id, address="src/h.py::fn",
302 churn=1, churn_30d=1, churn_90d=1,
303 blast=0, blast_direct=0, blast_cross=0, blast_top=[],
304 last_changed=_now(), author_count=1, gravity=0.0,
305 weekly=[0] * 12,
306 ))
307 # Insert matching history entry
308 db_session.add(MusehubSymbolHistoryEntry(
309 repo_id=repo.repo_id, address="src/h.py::fn",
310 commit_id=commit_id, committed_at=_now(),
311 author="gabriel", op="mutate",
312 ))
313 await db_session.flush()
314
315 await backfill_intel_fields(db_session, repo.repo_id)
316 await db_session.flush()
317
318 row = (await db_session.execute(
319 select(MusehubSymbolIntel).where(
320 MusehubSymbolIntel.repo_id == repo.repo_id,
321 MusehubSymbolIntel.address == "src/h.py::fn",
322 )
323 )).scalar_one()
324 assert row.op == "mutate"
325
326
327 # ---------------------------------------------------------------------------
328 # I409 — backfill_intel_fields fixes NULL last_commit_id
329 # ---------------------------------------------------------------------------
330
331 @pytest.mark.asyncio
332 async def test_i409_backfill_fixes_null_last_commit_id(db_session: AsyncSession) -> None:
333 """backfill_intel_fields must set last_commit_id from the most recent history entry."""
334 from musehub.services.musehub_symbol_indexer import backfill_intel_fields
335
336 repo = await create_repo(db_session, owner="gabriel")
337 commit_id = _lid()
338
339 db_session.add(MusehubSymbolIntel(
340 repo_id=repo.repo_id, address="src/i.py::fn",
341 churn=1, churn_30d=1, churn_90d=1,
342 blast=0, blast_direct=0, blast_cross=0, blast_top=[],
343 last_changed=_now(), author_count=1, gravity=0.0,
344 weekly=[0] * 12,
345 ))
346 db_session.add(MusehubSymbolHistoryEntry(
347 repo_id=repo.repo_id, address="src/i.py::fn",
348 commit_id=commit_id, committed_at=_now(),
349 author="gabriel", op="replace",
350 ))
351 await db_session.flush()
352
353 await backfill_intel_fields(db_session, repo.repo_id)
354 await db_session.flush()
355
356 row = (await db_session.execute(
357 select(MusehubSymbolIntel).where(
358 MusehubSymbolIntel.repo_id == repo.repo_id,
359 MusehubSymbolIntel.address == "src/i.py::fn",
360 )
361 )).scalar_one()
362 assert row.last_commit_id == commit_id
363
364
365 # ---------------------------------------------------------------------------
366 # I410 — backfill_intel_fields is idempotent
367 # ---------------------------------------------------------------------------
368
369 @pytest.mark.asyncio
370 async def test_i410_backfill_idempotent(db_session: AsyncSession) -> None:
371 """Running backfill_intel_fields twice must produce the same result."""
372 from musehub.services.musehub_symbol_indexer import backfill_intel_fields
373
374 repo = await create_repo(db_session, owner="gabriel")
375 commit_id = _lid()
376
377 db_session.add(MusehubSymbolIntel(
378 repo_id=repo.repo_id, address="src/j.py::fn",
379 churn=1, churn_30d=1, churn_90d=1,
380 blast=0, blast_direct=0, blast_cross=0, blast_top=[],
381 last_changed=_now(), author_count=1, gravity=0.0,
382 weekly=[0] * 12,
383 ))
384 db_session.add(MusehubSymbolHistoryEntry(
385 repo_id=repo.repo_id, address="src/j.py::fn",
386 commit_id=commit_id, committed_at=_now(),
387 author="gabriel", op="patch",
388 ))
389 await db_session.flush()
390
391 await backfill_intel_fields(db_session, repo.repo_id)
392 await db_session.flush()
393 await backfill_intel_fields(db_session, repo.repo_id)
394 await db_session.flush()
395
396 row = (await db_session.execute(
397 select(MusehubSymbolIntel).where(
398 MusehubSymbolIntel.repo_id == repo.repo_id,
399 MusehubSymbolIntel.address == "src/j.py::fn",
400 )
401 )).scalar_one()
402 assert row.op == "patch"
403 assert row.last_commit_id == commit_id
File History 1 commit
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2 feat: add repair-commit wire endpoint (API parity with repa… Opus 4.8 minor 1 day ago