gabriel / musehub public
test_intel_normalized_schema.py python
840 lines 36.8 KB
Raw
sha256:9b711047e27df5ac91681c74aadfb0e31f69ffd4269932ea52f0c113764d8c0a docs(phase-03): rewrite Domain Protocol — AddressedMergePlu… Sonnet 4.6 minor ⚠ breaking 24 days ago
1 """TDD spec for normalized symbol intel schema — SI1–SI40.
2
3 Current architecture stores per-symbol data as unbounded JSON blobs in
4 musehub_intel_results.data_json. Every symbol page load deserializes
5 megabytes of JSON to return one entry. This test file defines the
6 correct normalized architecture.
7
8 All tests are RED until the implementation is complete.
9
10 New tables (replaces code.symbol_history / code.per_symbol_intel / code.hash_occurrence blobs):
11
12 musehub_symbol_history_entries — one row per (repo_id, address, commit_id)
13 musehub_symbol_intel — one row per (repo_id, address)
14 musehub_hash_occurrence_entries — one row per (content_id, repo_id, address)
15
16 musehub_intel_results keeps only:
17 code.intel_summary — small scalar aggregate, fine as blob
18 code.intel_snapshot — computed panel data, fine as blob
19
20 Layers:
21 1. Schema — ORM model shape, column types, indexes, constraints
22 2. Write — build_symbol_index upserts normalized rows
23 3. Read — helpers return correct data from normalized tables
24 4. Incremental— second push merges without duplication
25 5. Integrity — corrupt data, unknown refs, empty repos
26 6. Performance— point lookups sub-millisecond; no full-table deserialize
27 7. Stress — 500 symbols × 50 commits each
28 8. Aggregates — intel_summary and intel_snapshot still produced as blobs
29 """
30 from __future__ import annotations
31
32 import json
33 import secrets
34 import time
35 from datetime import datetime, timezone
36
37 import pytest
38 from sqlalchemy import select, func
39 from sqlalchemy.ext.asyncio import AsyncSession
40
41 from muse.core.types import long_id
42 from tests.factories import create_repo
43 from musehub.types.json_types import JSONObject
44
45
46 # ─────────────────────────────────────────────────────────────────────────────
47 # Helpers
48 # ─────────────────────────────────────────────────────────────────────────────
49
50 def _now() -> datetime:
51 return datetime.now(tz=timezone.utc)
52
53
54 def _uid() -> str:
55 return long_id(secrets.token_hex(32))
56
57
58 def _cid() -> str:
59 return long_id(secrets.token_hex(32))
60
61
62 def _insert_op(address: str, content_id: str | None = None) -> JSONObject:
63 return {
64 "address": address,
65 "op": "insert",
66 "content_id": content_id or _cid(),
67 }
68
69
70 def _patch_op(file_addr: str, children: list[JSONObject]) -> JSONObject:
71 return {"address": file_addr, "op": "patch", "child_ops": children}
72
73
74 async def _commit_with_delta(
75 session: AsyncSession,
76 repo_id: str,
77 commit_id: str,
78 ops: list[JSONObject],
79 parent_ids: list[str] | None = None,
80 author: str = "gabriel",
81 ) -> None:
82 from musehub.db.musehub_repo_models import MusehubCommit
83 c = MusehubCommit(
84 commit_id=commit_id,
85 repo_id=repo_id,
86 branch="main",
87 parent_ids=parent_ids or [],
88 message="test commit",
89 author=author,
90 timestamp=_now(),
91 structured_delta={"ops": ops},
92 )
93 session.add(c)
94 await session.flush()
95
96
97 async def _build_and_persist(
98 session: AsyncSession,
99 repo_id: str,
100 commit_id: str,
101 ) -> list[tuple[str, dict]]:
102 from musehub.services.musehub_symbol_indexer import build_symbol_index
103 from musehub.services.musehub_intel_providers import persist_intel_results
104 results = await build_symbol_index(session, repo_id, commit_id)
105 if results:
106 await persist_intel_results(session, repo_id, commit_id, results)
107 return results
108
109
110 # ─────────────────────────────────────────────────────────────────────────────
111 # Layer 1 — Schema: ORM model shape
112 # ─────────────────────────────────────────────────────────────────────────────
113
114 class TestNormalizedSchemaModels:
115 """SI1–SI6: ORM models for the three new normalized tables exist and have
116 the right columns, primary keys, and indexes."""
117
118 def test_SI1_symbol_history_entry_model_importable(self) -> None:
119 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
120 assert MusehubSymbolHistoryEntry.__tablename__ == "musehub_symbol_history_entries"
121
122 def test_SI2_symbol_history_entry_columns(self) -> None:
123 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
124 cols = {c.name for c in MusehubSymbolHistoryEntry.__table__.columns}
125 assert {"repo_id", "address", "commit_id", "committed_at",
126 "author", "op", "content_id"} <= cols
127
128 def test_SI3_symbol_history_entry_pk(self) -> None:
129 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
130 pk_cols = {c.name for c in MusehubSymbolHistoryEntry.__table__.primary_key}
131 assert pk_cols == {"repo_id", "address", "commit_id"}
132
133 def test_SI4_symbol_intel_model_importable(self) -> None:
134 from musehub.db.musehub_intel_models import MusehubSymbolIntel
135 assert MusehubSymbolIntel.__tablename__ == "musehub_symbol_intel"
136
137 def test_SI5_symbol_intel_columns(self) -> None:
138 from musehub.db.musehub_intel_models import MusehubSymbolIntel
139 cols = {c.name for c in MusehubSymbolIntel.__table__.columns}
140 assert {"repo_id", "address", "churn", "churn_30d", "churn_90d",
141 "blast", "blast_direct", "blast_cross", "blast_top",
142 "last_changed", "last_author", "author_count",
143 "gravity", "weekly"} <= cols
144
145 def test_SI6_symbol_intel_pk(self) -> None:
146 from musehub.db.musehub_intel_models import MusehubSymbolIntel
147 pk_cols = {c.name for c in MusehubSymbolIntel.__table__.primary_key}
148 assert pk_cols == {"repo_id", "address"}
149
150 def test_SI7_hash_occurrence_entry_model_importable(self) -> None:
151 from musehub.db.musehub_intel_models import MusehubHashOccurrenceEntry
152 assert MusehubHashOccurrenceEntry.__tablename__ == "musehub_hash_occurrence_entries"
153
154 def test_SI8_hash_occurrence_entry_pk(self) -> None:
155 from musehub.db.musehub_intel_models import MusehubHashOccurrenceEntry
156 pk_cols = {c.name for c in MusehubHashOccurrenceEntry.__table__.primary_key}
157 assert pk_cols == {"content_id", "repo_id", "address"}
158
159
160 # ─────────────────────────────────────────────────────────────────────────────
161 # Layer 2 — Write: build_symbol_index upserts normalized rows
162 # ─────────────────────────────────────────────────────────────────────────────
163
164 class TestBuildWritesNormalizedRows:
165 """SI9–SI16: build_symbol_index + persist_intel_results write to the
166 normalized tables, not just to intel_results blobs."""
167
168 @pytest.mark.asyncio
169 async def test_SI9_single_commit_writes_history_entry_rows(
170 self, db_session: AsyncSession,
171 ) -> None:
172 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
173 repo = await create_repo(db_session)
174 commit_id = _uid()
175 await _commit_with_delta(db_session, repo.repo_id, commit_id, [
176 _insert_op("src/main.py::parse"),
177 _insert_op("src/main.py::render"),
178 ])
179 await _build_and_persist(db_session, repo.repo_id, commit_id)
180 rows = (await db_session.execute(
181 select(MusehubSymbolHistoryEntry).where(
182 MusehubSymbolHistoryEntry.repo_id == repo.repo_id
183 )
184 )).scalars().all()
185 addresses = {r.address for r in rows}
186 assert "src/main.py::parse" in addresses
187 assert "src/main.py::render" in addresses
188
189 @pytest.mark.asyncio
190 async def test_SI10_history_entry_commit_id_stored(
191 self, db_session: AsyncSession,
192 ) -> None:
193 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
194 repo = await create_repo(db_session)
195 commit_id = _uid()
196 await _commit_with_delta(db_session, repo.repo_id, commit_id, [
197 _insert_op("src/auth.py::login"),
198 ])
199 await _build_and_persist(db_session, repo.repo_id, commit_id)
200 row = (await db_session.execute(
201 select(MusehubSymbolHistoryEntry).where(
202 MusehubSymbolHistoryEntry.repo_id == repo.repo_id,
203 MusehubSymbolHistoryEntry.address == "src/auth.py::login",
204 )
205 )).scalar_one()
206 assert row.commit_id == commit_id
207 assert row.op in ("add", "insert", "modify")
208
209 @pytest.mark.asyncio
210 async def test_SI11_single_commit_writes_symbol_intel_rows(
211 self, db_session: AsyncSession,
212 ) -> None:
213 from musehub.db.musehub_intel_models import MusehubSymbolIntel
214 repo = await create_repo(db_session)
215 commit_id = _uid()
216 await _commit_with_delta(db_session, repo.repo_id, commit_id, [
217 _insert_op("src/core.py::Engine"),
218 ])
219 await _build_and_persist(db_session, repo.repo_id, commit_id)
220 row = (await db_session.execute(
221 select(MusehubSymbolIntel).where(
222 MusehubSymbolIntel.repo_id == repo.repo_id,
223 MusehubSymbolIntel.address == "src/core.py::Engine",
224 )
225 )).scalar_one_or_none()
226 assert row is not None
227 assert row.churn >= 1
228
229 @pytest.mark.asyncio
230 async def test_SI12_hash_occurrence_rows_written(
231 self, db_session: AsyncSession,
232 ) -> None:
233 from musehub.db.musehub_intel_models import MusehubHashOccurrenceEntry
234 repo = await create_repo(db_session)
235 commit_id = _uid()
236 shared_content_id = _cid()
237 await _commit_with_delta(db_session, repo.repo_id, commit_id, [
238 _insert_op("src/a.py::foo", shared_content_id),
239 _insert_op("src/b.py::bar", shared_content_id),
240 ])
241 await _build_and_persist(db_session, repo.repo_id, commit_id)
242 rows = (await db_session.execute(
243 select(MusehubHashOccurrenceEntry).where(
244 MusehubHashOccurrenceEntry.repo_id == repo.repo_id,
245 MusehubHashOccurrenceEntry.content_id == shared_content_id,
246 )
247 )).scalars().all()
248 addrs = {r.address for r in rows}
249 assert "src/a.py::foo" in addrs
250 assert "src/b.py::bar" in addrs
251
252 @pytest.mark.asyncio
253 async def test_SI13_intel_summary_still_written_to_intel_results(
254 self, db_session: AsyncSession,
255 ) -> None:
256 from musehub.db.musehub_intel_models import MusehubIntelResult
257 repo = await create_repo(db_session)
258 commit_id = _uid()
259 await _commit_with_delta(db_session, repo.repo_id, commit_id, [
260 _insert_op("src/main.py::run"),
261 ])
262 await _build_and_persist(db_session, repo.repo_id, commit_id)
263 row = (await db_session.execute(
264 select(MusehubIntelResult).where(
265 MusehubIntelResult.repo_id == repo.repo_id,
266 MusehubIntelResult.intel_type == "code.intel_summary",
267 )
268 )).scalar_one_or_none()
269 assert row is not None
270 data = json.loads(row.data_json)
271 assert "health_score" in data
272
273 @pytest.mark.asyncio
274 async def test_SI14_intel_snapshot_still_written_to_intel_results(
275 self, db_session: AsyncSession,
276 ) -> None:
277 from musehub.db.musehub_intel_models import MusehubIntelResult
278 repo = await create_repo(db_session)
279 commit_id = _uid()
280 await _commit_with_delta(db_session, repo.repo_id, commit_id, [
281 _insert_op("src/main.py::run"),
282 ])
283 await _build_and_persist(db_session, repo.repo_id, commit_id)
284 row = (await db_session.execute(
285 select(MusehubIntelResult).where(
286 MusehubIntelResult.repo_id == repo.repo_id,
287 MusehubIntelResult.intel_type == "code.intel_snapshot",
288 )
289 )).scalar_one_or_none()
290 assert row is not None
291
292 @pytest.mark.asyncio
293 async def test_SI15_blob_types_not_written_to_intel_results(
294 self, db_session: AsyncSession,
295 ) -> None:
296 """code.symbol_history, code.per_symbol_intel, code.hash_occurrence
297 must NOT be written as blobs anymore."""
298 from musehub.db.musehub_intel_models import MusehubIntelResult
299 repo = await create_repo(db_session)
300 commit_id = _uid()
301 await _commit_with_delta(db_session, repo.repo_id, commit_id, [
302 _insert_op("src/main.py::run"),
303 ])
304 await _build_and_persist(db_session, repo.repo_id, commit_id)
305 blob_types = (await db_session.execute(
306 select(MusehubIntelResult.intel_type).where(
307 MusehubIntelResult.repo_id == repo.repo_id,
308 MusehubIntelResult.intel_type.in_([
309 "code.symbol_history",
310 "code.per_symbol_intel",
311 "code.hash_occurrence",
312 ])
313 )
314 )).scalars().all()
315 assert blob_types == [], f"blob types still written: {blob_types}"
316
317 @pytest.mark.asyncio
318 async def test_SI16_author_stored_in_history_entry(
319 self, db_session: AsyncSession,
320 ) -> None:
321 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
322 repo = await create_repo(db_session)
323 commit_id = _uid()
324 await _commit_with_delta(
325 db_session, repo.repo_id, commit_id,
326 [_insert_op("src/auth.py::validate")],
327 author="gabriel",
328 )
329 await _build_and_persist(db_session, repo.repo_id, commit_id)
330 row = (await db_session.execute(
331 select(MusehubSymbolHistoryEntry).where(
332 MusehubSymbolHistoryEntry.repo_id == repo.repo_id,
333 MusehubSymbolHistoryEntry.address == "src/auth.py::validate",
334 )
335 )).scalar_one()
336 assert row.author == "gabriel"
337
338
339 # ─────────────────────────────────────────────────────────────────────────────
340 # Layer 3 — Read: helpers return correct data from normalized tables
341 # ─────────────────────────────────────────────────────────────────────────────
342
343 class TestReadHelpers:
344 """SI17–SI24: read helpers query normalized tables, not blobs."""
345
346 @pytest.mark.asyncio
347 async def test_SI17_load_symbol_history_returns_entries_for_address(
348 self, db_session: AsyncSession,
349 ) -> None:
350 from musehub.services.musehub_symbol_indexer import load_symbol_history
351 repo = await create_repo(db_session)
352 commit_id = _uid()
353 await _commit_with_delta(db_session, repo.repo_id, commit_id, [
354 _insert_op("src/auth.py::login"),
355 _insert_op("src/core.py::Engine"),
356 ])
357 await _build_and_persist(db_session, repo.repo_id, commit_id)
358 history = await load_symbol_history(db_session, repo.repo_id)
359 assert "src/auth.py::login" in history
360 assert "src/core.py::Engine" in history
361
362 @pytest.mark.asyncio
363 async def test_SI18_load_symbol_history_file_path_filter(
364 self, db_session: AsyncSession,
365 ) -> None:
366 from musehub.services.musehub_symbol_indexer import load_symbol_history
367 repo = await create_repo(db_session)
368 commit_id = _uid()
369 await _commit_with_delta(db_session, repo.repo_id, commit_id, [
370 _insert_op("src/auth.py::login"),
371 _insert_op("src/auth.py::logout"),
372 _insert_op("src/core.py::Engine"),
373 ])
374 await _build_and_persist(db_session, repo.repo_id, commit_id)
375 history = await load_symbol_history(db_session, repo.repo_id, file_path="src/auth.py")
376 assert "src/auth.py::login" in history
377 assert "src/auth.py::logout" in history
378 assert "src/core.py::Engine" not in history
379
380 @pytest.mark.asyncio
381 async def test_SI19_load_symbol_history_empty_when_no_index(
382 self, db_session: AsyncSession,
383 ) -> None:
384 from musehub.services.musehub_symbol_indexer import load_symbol_history
385 repo = await create_repo(db_session)
386 history = await load_symbol_history(db_session, repo.repo_id)
387 assert history == {}
388
389 @pytest.mark.asyncio
390 async def test_SI20_lookup_symbol_intel_returns_metrics(
391 self, db_session: AsyncSession,
392 ) -> None:
393 from musehub.services.musehub_symbol_indexer import lookup_symbol_intel
394 repo = await create_repo(db_session)
395 commit_id = _uid()
396 await _commit_with_delta(db_session, repo.repo_id, commit_id, [
397 _insert_op("src/billing.py::compute_total"),
398 ])
399 await _build_and_persist(db_session, repo.repo_id, commit_id)
400 result = await lookup_symbol_intel(
401 db_session, repo.repo_id, ["src/billing.py::compute_total"]
402 )
403 assert "src/billing.py::compute_total" in result
404 intel = result["src/billing.py::compute_total"]
405 assert "churn" in intel
406 assert "gravity" in intel
407 assert "blast" in intel
408
409 @pytest.mark.asyncio
410 async def test_SI21_lookup_symbol_intel_missing_address_excluded(
411 self, db_session: AsyncSession,
412 ) -> None:
413 from musehub.services.musehub_symbol_indexer import lookup_symbol_intel
414 repo = await create_repo(db_session)
415 result = await lookup_symbol_intel(db_session, repo.repo_id, ["nonexistent::fn"])
416 assert result == {}
417
418 @pytest.mark.asyncio
419 async def test_SI22_load_hash_occurrence_returns_clone_pairs(
420 self, db_session: AsyncSession,
421 ) -> None:
422 from musehub.services.musehub_symbol_indexer import load_hash_occurrence
423 repo = await create_repo(db_session)
424 commit_id = _uid()
425 content_id = _cid()
426 await _commit_with_delta(db_session, repo.repo_id, commit_id, [
427 _insert_op("src/a.py::foo", content_id),
428 _insert_op("src/b.py::bar", content_id),
429 ])
430 await _build_and_persist(db_session, repo.repo_id, commit_id)
431 occurrence = await load_hash_occurrence(db_session, repo.repo_id)
432 assert content_id in occurrence
433 assert set(occurrence[content_id]) == {"src/a.py::foo", "src/b.py::bar"}
434
435 @pytest.mark.asyncio
436 async def test_SI23_load_intel_snapshot_still_works(
437 self, db_session: AsyncSession,
438 ) -> None:
439 from musehub.services.musehub_symbol_indexer import load_intel_snapshot
440 repo = await create_repo(db_session)
441 commit_id = _uid()
442 await _commit_with_delta(db_session, repo.repo_id, commit_id, [
443 _insert_op("src/main.py::run"),
444 ])
445 await _build_and_persist(db_session, repo.repo_id, commit_id)
446 snap = await load_intel_snapshot(db_session, repo.repo_id)
447 assert snap is not None
448
449 @pytest.mark.asyncio
450 async def test_SI24_get_index_meta_returns_correct_ref(
451 self, db_session: AsyncSession,
452 ) -> None:
453 from musehub.services.musehub_symbol_indexer import get_index_meta
454 repo = await create_repo(db_session)
455 commit_id = _uid()
456 await _commit_with_delta(db_session, repo.repo_id, commit_id, [
457 _insert_op("src/main.py::run"),
458 ])
459 await _build_and_persist(db_session, repo.repo_id, commit_id)
460 meta = await get_index_meta(db_session, repo.repo_id)
461 assert meta is not None
462 assert meta["ref"] == commit_id
463
464
465 # ─────────────────────────────────────────────────────────────────────────────
466 # Layer 4 — Incremental: second push merges without duplication
467 # ─────────────────────────────────────────────────────────────────────────────
468
469 class TestIncrementalUpdates:
470 """SI25–SI29: second push adds new rows, does not duplicate existing ones."""
471
472 @pytest.mark.asyncio
473 async def test_SI25_second_push_adds_new_history_entries(
474 self, db_session: AsyncSession,
475 ) -> None:
476 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
477 repo = await create_repo(db_session)
478 c1 = _uid()
479 await _commit_with_delta(db_session, repo.repo_id, c1, [
480 _insert_op("src/auth.py::login"),
481 ])
482 await _build_and_persist(db_session, repo.repo_id, c1)
483
484 c2 = _uid()
485 await _commit_with_delta(db_session, repo.repo_id, c2, [
486 _insert_op("src/auth.py::logout"),
487 ], parent_ids=[c1])
488 await _build_and_persist(db_session, repo.repo_id, c2)
489
490 rows = (await db_session.execute(
491 select(MusehubSymbolHistoryEntry).where(
492 MusehubSymbolHistoryEntry.repo_id == repo.repo_id
493 )
494 )).scalars().all()
495 addresses = {r.address for r in rows}
496 assert "src/auth.py::login" in addresses
497 assert "src/auth.py::logout" in addresses
498
499 @pytest.mark.asyncio
500 async def test_SI26_second_push_does_not_duplicate_existing_entries(
501 self, db_session: AsyncSession,
502 ) -> None:
503 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
504 repo = await create_repo(db_session)
505 c1 = _uid()
506 await _commit_with_delta(db_session, repo.repo_id, c1, [
507 _insert_op("src/core.py::Engine"),
508 ])
509 await _build_and_persist(db_session, repo.repo_id, c1)
510 # Re-build with same head — no new rows
511 await _build_and_persist(db_session, repo.repo_id, c1)
512
513 count = (await db_session.execute(
514 select(func.count()).select_from(MusehubSymbolHistoryEntry).where(
515 MusehubSymbolHistoryEntry.repo_id == repo.repo_id,
516 MusehubSymbolHistoryEntry.address == "src/core.py::Engine",
517 )
518 )).scalar_one()
519 assert count == 1
520
521 @pytest.mark.asyncio
522 async def test_SI27_modify_op_updates_symbol_intel_churn(
523 self, db_session: AsyncSession,
524 ) -> None:
525 from musehub.services.musehub_symbol_indexer import lookup_symbol_intel
526 repo = await create_repo(db_session)
527 c1 = _uid()
528 await _commit_with_delta(db_session, repo.repo_id, c1, [
529 _insert_op("src/core.py::Engine"),
530 ])
531 await _build_and_persist(db_session, repo.repo_id, c1)
532
533 c2 = _uid()
534 await _commit_with_delta(db_session, repo.repo_id, c2, [
535 {"address": "src/core.py::Engine", "op": "replace",
536 "content_id": _cid()},
537 ], parent_ids=[c1])
538 await _build_and_persist(db_session, repo.repo_id, c2)
539
540 intel = await lookup_symbol_intel(db_session, repo.repo_id, ["src/core.py::Engine"])
541 assert intel["src/core.py::Engine"]["churn"] == 2
542
543 @pytest.mark.asyncio
544 async def test_SI28_second_push_history_has_both_commit_ids(
545 self, db_session: AsyncSession,
546 ) -> None:
547 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
548 repo = await create_repo(db_session)
549 c1, c2 = _uid(), _uid()
550 await _commit_with_delta(db_session, repo.repo_id, c1, [
551 _insert_op("src/auth.py::login"),
552 ])
553 await _build_and_persist(db_session, repo.repo_id, c1)
554 await _commit_with_delta(db_session, repo.repo_id, c2, [
555 {"address": "src/auth.py::login", "op": "replace", "content_id": _cid()},
556 ], parent_ids=[c1])
557 await _build_and_persist(db_session, repo.repo_id, c2)
558
559 rows = (await db_session.execute(
560 select(MusehubSymbolHistoryEntry).where(
561 MusehubSymbolHistoryEntry.repo_id == repo.repo_id,
562 MusehubSymbolHistoryEntry.address == "src/auth.py::login",
563 )
564 )).scalars().all()
565 commit_ids = {r.commit_id for r in rows}
566 assert c1 in commit_ids
567 assert c2 in commit_ids
568
569 @pytest.mark.asyncio
570 async def test_SI29_intel_summary_ref_advances_after_second_push(
571 self, db_session: AsyncSession,
572 ) -> None:
573 from musehub.services.musehub_symbol_indexer import get_index_meta
574 repo = await create_repo(db_session)
575 c1, c2 = _uid(), _uid()
576 await _commit_with_delta(db_session, repo.repo_id, c1, [_insert_op("src/a.py::f")])
577 await _build_and_persist(db_session, repo.repo_id, c1)
578 await _commit_with_delta(db_session, repo.repo_id, c2, [_insert_op("src/b.py::g")], parent_ids=[c1])
579 await _build_and_persist(db_session, repo.repo_id, c2)
580
581 meta = await get_index_meta(db_session, repo.repo_id)
582 assert meta is not None
583 assert meta["ref"] == c2
584
585
586 # ─────────────────────────────────────────────────────────────────────────────
587 # Layer 5 — Integrity: corrupt data, unknown refs, empty repos
588 # ─────────────────────────────────────────────────────────────────────────────
589
590 class TestDataIntegrity:
591 """SI30–SI33: edge cases that must not raise or corrupt state."""
592
593 @pytest.mark.asyncio
594 async def test_SI30_empty_repo_returns_empty_history(
595 self, db_session: AsyncSession,
596 ) -> None:
597 from musehub.services.musehub_symbol_indexer import load_symbol_history
598 repo = await create_repo(db_session)
599 assert await load_symbol_history(db_session, repo.repo_id) == {}
600
601 @pytest.mark.asyncio
602 async def test_SI31_unknown_head_commit_returns_empty_results(
603 self, db_session: AsyncSession,
604 ) -> None:
605 from musehub.services.musehub_symbol_indexer import build_symbol_index
606 repo = await create_repo(db_session)
607 results = await build_symbol_index(db_session, repo.repo_id, _uid())
608 assert results == []
609
610 @pytest.mark.asyncio
611 async def test_SI32_commit_with_no_structured_delta_skipped(
612 self, db_session: AsyncSession,
613 ) -> None:
614 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
615 from musehub.db.musehub_repo_models import MusehubCommit
616 repo = await create_repo(db_session)
617 commit_id = _uid()
618 c = MusehubCommit(
619 commit_id=commit_id, repo_id=repo.repo_id, branch="main",
620 parent_ids=[], message="no delta", author="gabriel",
621 timestamp=_now(),
622 )
623 db_session.add(c)
624 await db_session.flush()
625 await _build_and_persist(db_session, repo.repo_id, commit_id)
626
627 count = (await db_session.execute(
628 select(func.count()).select_from(MusehubSymbolHistoryEntry).where(
629 MusehubSymbolHistoryEntry.repo_id == repo.repo_id
630 )
631 )).scalar_one()
632 assert count == 0
633
634 @pytest.mark.asyncio
635 async def test_SI33_lookup_symbol_intel_empty_address_list(
636 self, db_session: AsyncSession,
637 ) -> None:
638 from musehub.services.musehub_symbol_indexer import lookup_symbol_intel
639 repo = await create_repo(db_session)
640 result = await lookup_symbol_intel(db_session, repo.repo_id, [])
641 assert result == {}
642
643
644 # ─────────────────────────────────────────────────────────────────────────────
645 # Layer 6 — Performance: point lookups do not deserialize blobs
646 # ─────────────────────────────────────────────────────────────────────────────
647
648 class TestPerformance:
649 """SI34–SI36: normalized reads are fast regardless of repo size.
650 These budgets would be impossible with the blob approach at scale."""
651
652 @pytest.mark.asyncio
653 async def test_SI34_single_symbol_lookup_under_50ms(
654 self, db_session: AsyncSession,
655 ) -> None:
656 from musehub.services.musehub_symbol_indexer import lookup_symbol_intel
657 repo = await create_repo(db_session)
658 # Build a 50-commit index
659 parent: list[str] = []
660 last_id = ""
661 for i in range(50):
662 cid = _uid()
663 await _commit_with_delta(db_session, repo.repo_id, cid, [
664 _insert_op(f"src/file_{i}.py::fn_{i}"),
665 _insert_op(f"src/file_{i}.py::helper_{i}"),
666 ], parent_ids=parent)
667 parent = [cid]
668 last_id = cid
669 await _build_and_persist(db_session, repo.repo_id, last_id)
670
671 target = "src/file_25.py::fn_25"
672 t0 = time.perf_counter()
673 result = await lookup_symbol_intel(db_session, repo.repo_id, [target])
674 elapsed_ms = (time.perf_counter() - t0) * 1000
675 assert target in result
676 assert elapsed_ms < 50, f"point lookup took {elapsed_ms:.1f}ms — too slow"
677
678 @pytest.mark.asyncio
679 async def test_SI35_file_scoped_history_lookup_under_50ms(
680 self, db_session: AsyncSession,
681 ) -> None:
682 from musehub.services.musehub_symbol_indexer import load_symbol_history
683 repo = await create_repo(db_session)
684 parent: list[str] = []
685 last_id = ""
686 for i in range(50):
687 cid = _uid()
688 await _commit_with_delta(db_session, repo.repo_id, cid, [
689 _insert_op(f"src/other_{i}.py::fn"),
690 _insert_op("src/target.py::hot_fn"),
691 ], parent_ids=parent)
692 parent = [cid]
693 last_id = cid
694 await _build_and_persist(db_session, repo.repo_id, last_id)
695
696 t0 = time.perf_counter()
697 history = await load_symbol_history(db_session, repo.repo_id, file_path="src/target.py")
698 elapsed_ms = (time.perf_counter() - t0) * 1000
699 assert "src/target.py::hot_fn" in history
700 assert elapsed_ms < 50, f"file-scoped lookup took {elapsed_ms:.1f}ms"
701
702 @pytest.mark.asyncio
703 async def test_SI36_load_symbol_history_no_file_filter_returns_all(
704 self, db_session: AsyncSession,
705 ) -> None:
706 from musehub.services.musehub_symbol_indexer import load_symbol_history
707 repo = await create_repo(db_session)
708 cid = _uid()
709 await _commit_with_delta(db_session, repo.repo_id, cid, [
710 _insert_op("src/a.py::fn_a"),
711 _insert_op("src/b.py::fn_b"),
712 _insert_op("src/c.py::fn_c"),
713 ])
714 await _build_and_persist(db_session, repo.repo_id, cid)
715 history = await load_symbol_history(db_session, repo.repo_id)
716 assert {"src/a.py::fn_a", "src/b.py::fn_b", "src/c.py::fn_c"} <= set(history.keys())
717
718
719 # ─────────────────────────────────────────────────────────────────────────────
720 # Layer 7 — Stress: 500 symbols × realistic commit volume
721 # ─────────────────────────────────────────────────────────────────────────────
722
723 class TestStress:
724 """SI37–SI38: large repos index without timeout or corruption."""
725
726 @pytest.mark.asyncio
727 async def test_SI37_index_500_symbols_across_10_commits(
728 self, db_session: AsyncSession,
729 ) -> None:
730 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
731 repo = await create_repo(db_session)
732 symbols = [f"src/module_{i // 10}.py::fn_{i}" for i in range(500)]
733 parent: list[str] = []
734 last_id = ""
735 chunk = len(symbols) // 10
736 for batch_idx in range(10):
737 cid = _uid()
738 ops = [_insert_op(s) for s in symbols[batch_idx * chunk:(batch_idx + 1) * chunk]]
739 await _commit_with_delta(db_session, repo.repo_id, cid, ops, parent_ids=parent)
740 parent = [cid]
741 last_id = cid
742
743 t0 = time.perf_counter()
744 await _build_and_persist(db_session, repo.repo_id, last_id)
745 elapsed = time.perf_counter() - t0
746
747 count = (await db_session.execute(
748 select(func.count()).select_from(MusehubSymbolHistoryEntry).where(
749 MusehubSymbolHistoryEntry.repo_id == repo.repo_id
750 )
751 )).scalar_one()
752 assert count == 500
753 assert elapsed < 10.0, f"500-symbol index took {elapsed:.1f}s"
754
755 @pytest.mark.asyncio
756 async def test_SI38_same_symbol_modified_50_times(
757 self, db_session: AsyncSession,
758 ) -> None:
759 from musehub.db.musehub_intel_models import MusehubSymbolHistoryEntry
760 from musehub.services.musehub_symbol_indexer import lookup_symbol_intel
761 repo = await create_repo(db_session)
762 addr = "src/hot.py::hot_fn"
763 parent: list[str] = []
764 last_id = ""
765 for i in range(50):
766 cid = _uid()
767 op_type = "insert" if i == 0 else "replace"
768 await _commit_with_delta(db_session, repo.repo_id, cid, [
769 {"address": addr, "op": op_type, "content_id": _cid()},
770 ], parent_ids=parent)
771 parent = [cid]
772 last_id = cid
773 await _build_and_persist(db_session, repo.repo_id, last_id)
774
775 rows = (await db_session.execute(
776 select(MusehubSymbolHistoryEntry).where(
777 MusehubSymbolHistoryEntry.repo_id == repo.repo_id,
778 MusehubSymbolHistoryEntry.address == addr,
779 )
780 )).scalars().all()
781 assert len(rows) == 50
782
783 intel = await lookup_symbol_intel(db_session, repo.repo_id, [addr])
784 assert intel[addr]["churn"] == 50
785
786
787 # ─────────────────────────────────────────────────────────────────────────────
788 # Layer 8 — Aggregates: intel_summary and intel_snapshot still produced
789 # ─────────────────────────────────────────────────────────────────────────────
790
791 class TestAggregatesStillWork:
792 """SI39–SI40: aggregate outputs (summary, snapshot) are unaffected."""
793
794 @pytest.mark.asyncio
795 async def test_SI39_intel_summary_fields_correct(
796 self, db_session: AsyncSession,
797 ) -> None:
798 from musehub.db.musehub_intel_models import MusehubIntelResult
799 repo = await create_repo(db_session)
800 cid = _uid()
801 await _commit_with_delta(db_session, repo.repo_id, cid, [
802 _insert_op("src/a.py::fn1"),
803 _insert_op("src/b.py::fn2"),
804 _insert_op("src/c.py::fn3"),
805 ])
806 await _build_and_persist(db_session, repo.repo_id, cid)
807 row = (await db_session.execute(
808 select(MusehubIntelResult).where(
809 MusehubIntelResult.repo_id == repo.repo_id,
810 MusehubIntelResult.intel_type == "code.intel_summary",
811 )
812 )).scalar_one()
813 data = json.loads(row.data_json)
814 assert data.get("symbol_count", 0) >= 3
815 assert "health_score" in data
816 assert "health_label" in data
817
818 @pytest.mark.asyncio
819 async def test_SI40_rebuild_updates_summary_symbol_count(
820 self, db_session: AsyncSession,
821 ) -> None:
822 from musehub.db.musehub_intel_models import MusehubIntelResult
823 repo = await create_repo(db_session)
824 c1, c2 = _uid(), _uid()
825 await _commit_with_delta(db_session, repo.repo_id, c1, [_insert_op("src/a.py::fn1")])
826 await _build_and_persist(db_session, repo.repo_id, c1)
827 await _commit_with_delta(db_session, repo.repo_id, c2, [
828 _insert_op("src/b.py::fn2"),
829 _insert_op("src/c.py::fn3"),
830 ], parent_ids=[c1])
831 await _build_and_persist(db_session, repo.repo_id, c2)
832
833 row = (await db_session.execute(
834 select(MusehubIntelResult).where(
835 MusehubIntelResult.repo_id == repo.repo_id,
836 MusehubIntelResult.intel_type == "code.intel_summary",
837 )
838 )).scalar_one()
839 data = json.loads(row.data_json)
840 assert data.get("symbol_count", 0) >= 3
File History 1 commit
sha256:9b711047e27df5ac91681c74aadfb0e31f69ffd4269932ea52f0c113764d8c0a docs(phase-03): rewrite Domain Protocol — AddressedMergePlu… Sonnet 4.6 minor 24 days ago