gabriel / musehub public
test_phase1_stable_provider.py python
709 lines 27.8 KB
Raw
sha256:4992098130166d191cefed0a2821d19cd3cdd3cf50867a4e715c2b30636826c7 fix: repair syntax errors from typing annotation cleanup Sonnet 4.6 20 days ago
1 """TDD spec for Phase 1 — StableProvider migration + pure SQL rewrite (issue #12).
2
3 Verifies that ``StableProvider`` derives stability records entirely from
4 ``musehub_symbol_intel`` without any subprocess calls, and that the new
5 ``last_changed_commit`` column is populated correctly.
6
7 Seven test tiers
8 ----------------
9 Unit P1_01 – P1_06 _days_stable_from_dt() helper
10 Integration P1_07 – P1_14 Provider upserts, filtering, reruns
11 E2E P1_15 – P1_18 Seed symbol_intel → run provider → verify DB
12 Stress P1_19 – P1_21 500-row batch, idempotency
13 Data Integrity P1_22 – P1_24 NULL exclusion, kind filter, uniqueness
14 Performance P1_25 – P1_26 Batch timing bounds
15 Security P1_27 – P1_28 Injection verbatim storage, repo isolation
16 """
17 from __future__ import annotations
18
19 import secrets
20 import time
21 from datetime import datetime, timedelta, timezone
22
23 import pytest
24 import pytest_asyncio
25 from sqlalchemy.dialects.postgresql import insert as pg_insert
26 from sqlalchemy.ext.asyncio import AsyncSession
27
28 from muse.core.types import fake_id, long_id
29 from musehub.db.musehub_intel_models import MusehubIntelStable, MusehubSymbolIntel
30 from musehub.db.musehub_repo_models import MusehubRepo
31 from musehub.services.musehub_intel_providers import StableProvider, _days_stable_from_dt
32 from tests.factories import create_repo
33
34
35 # ---------------------------------------------------------------------------
36 # Helpers
37 # ---------------------------------------------------------------------------
38
39 def _uid() -> str:
40 return fake_id(secrets.token_hex(16))
41
42
43 _OWNER = "testuser"
44 _SLUG = "stableprovider"
45 _REF = long_id("a" * 64)
46 _REF2 = long_id("b" * 64)
47
48
49 async def _seed_symbol(
50 session: AsyncSession,
51 repo_id: str,
52 *,
53 address: str,
54 churn: int = 0,
55 churn_30d: int = 0,
56 churn_90d: int = 0,
57 last_changed: datetime | None = None,
58 last_commit_id: str | None = None,
59 symbol_kind: str = "function",
60 ) -> None:
61 """Insert or upsert a ``musehub_symbol_intel`` row for test fixtures.
62
63 Parameters
64 ----------
65 session: Active async SQLAlchemy session.
66 repo_id: Target repository ID.
67 address: Symbol address (``file.py::fn``).
68 churn: Lifetime change count (0 → since_start eligible).
69 churn_30d: Changes in last 30 days (0 → stable candidate).
70 churn_90d: Changes in last 90 days (0 → stable candidate).
71 last_changed: UTC datetime of last modification; None excludes from stable.
72 last_commit_id: Commit ID of last modification; stored as last_changed_commit.
73 symbol_kind: Symbol kind string (function / class / etc.).
74 """
75 stmt = (
76 pg_insert(MusehubSymbolIntel)
77 .values(
78 repo_id=repo_id,
79 address=address,
80 symbol_kind=symbol_kind,
81 churn=churn,
82 churn_30d=churn_30d,
83 churn_90d=churn_90d,
84 blast=0,
85 blast_direct=0,
86 blast_cross=0,
87 last_changed=last_changed,
88 last_commit_id=last_commit_id,
89 author_count=1,
90 gravity=0.0,
91 weekly=[0] * 12,
92 blast_top=[],
93 )
94 .on_conflict_do_update(
95 index_elements=["repo_id", "address"],
96 set_={
97 "churn": churn,
98 "churn_30d": churn_30d,
99 "churn_90d": churn_90d,
100 "last_changed": last_changed,
101 "last_commit_id": last_commit_id,
102 },
103 )
104 )
105 await session.execute(stmt)
106 await session.flush()
107
108
109 # ---------------------------------------------------------------------------
110 # Fixtures
111 # ---------------------------------------------------------------------------
112
113 @pytest_asyncio.fixture
114 async def stable_repo(db_session: AsyncSession) -> MusehubRepo:
115 """Bare repo — no symbol_intel rows seeded."""
116 return await create_repo(db_session, owner=_OWNER, slug=_SLUG)
117
118
119 @pytest_asyncio.fixture
120 async def stable_repo_with_symbols(db_session: AsyncSession, stable_repo: MusehubRepo) -> MusehubRepo:
121 """Repo seeded with a mix of stable and unstable symbols."""
122 repo_id = stable_repo.repo_id
123 now = datetime.now(timezone.utc)
124 await db_session.commit()
125
126 # stable — untouched for 180 days, churn_30d=0, churn_90d=0
127 await _seed_symbol(
128 db_session, repo_id,
129 address="pkg/core.py::parse_frame",
130 churn=3, churn_30d=0, churn_90d=0,
131 last_changed=now - timedelta(days=180),
132 last_commit_id=_REF,
133 )
134 # eternal — never modified
135 await _seed_symbol(
136 db_session, repo_id,
137 address="pkg/codec.py::pack",
138 churn=0, churn_30d=0, churn_90d=0,
139 last_changed=now - timedelta(days=365),
140 last_commit_id=None,
141 )
142 # unstable — active in last 30 days
143 await _seed_symbol(
144 db_session, repo_id,
145 address="pkg/api.py::handler",
146 churn=12, churn_30d=4, churn_90d=4,
147 last_changed=now - timedelta(days=10),
148 last_commit_id=_REF,
149 )
150 # no last_changed — should be excluded
151 await _seed_symbol(
152 db_session, repo_id,
153 address="pkg/init.py::bootstrap",
154 churn=0, churn_30d=0, churn_90d=0,
155 last_changed=None,
156 last_commit_id=None,
157 )
158 await db_session.commit()
159 return stable_repo
160
161
162 # ---------------------------------------------------------------------------
163 # Tier 1 — Unit: _days_stable_from_dt()
164 # ---------------------------------------------------------------------------
165
166 class TestDaysStableHelper:
167 """Unit tests for the ``_days_stable_from_dt`` pure helper function."""
168
169 def test_P1_01_none_returns_zero(self) -> None:
170 """None input → 0 (no last_changed means no stability data)."""
171 assert _days_stable_from_dt(None) == 0
172
173 def test_P1_02_exactly_90_days_ago(self) -> None:
174 """Datetime 90 days ago → 90."""
175 dt = datetime.now(timezone.utc) - timedelta(days=90)
176 assert _days_stable_from_dt(dt) == 90
177
178 def test_P1_03_future_timestamp_clamped_to_zero(self) -> None:
179 """Future timestamp → 0, never negative."""
180 dt = datetime.now(timezone.utc) + timedelta(days=30)
181 assert _days_stable_from_dt(dt) == 0
182
183 def test_P1_04_epoch_returns_large_positive(self) -> None:
184 """Unix epoch → thousands of days (sanity check for ancient timestamps)."""
185 epoch = datetime(1970, 1, 1, tzinfo=timezone.utc)
186 assert _days_stable_from_dt(epoch) > 10_000
187
188 def test_P1_05_naive_datetime_treated_as_utc(self) -> None:
189 """Timezone-naive datetime treated as UTC — no TypeError raised."""
190 naive = datetime.now(timezone.utc).replace(tzinfo=None) - timedelta(days=45)
191 assert _days_stable_from_dt(naive) == 45
192
193 def test_P1_06_one_day_ago_returns_one(self) -> None:
194 """One day ago → 1."""
195 dt = datetime.now(timezone.utc) - timedelta(days=1, seconds=1)
196 assert _days_stable_from_dt(dt) == 1
197
198
199 # ---------------------------------------------------------------------------
200 # Tier 2 — Integration: provider upserts and filtering
201 # ---------------------------------------------------------------------------
202
203 class TestStableProviderIntegration:
204 """Integration tests — provider run against a real async DB session."""
205
206 @pytest.mark.asyncio
207 async def test_P1_07_stable_symbol_upserted(
208 self, db_session: AsyncSession, stable_repo_with_symbols: MusehubRepo
209 ) -> None:
210 """Stable symbol with churn_30d=0, churn_90d=0 → row written to intel_stable."""
211 repo_id = stable_repo_with_symbols.repo_id
212 await StableProvider().compute(db_session, repo_id, _REF, {})
213 await db_session.flush()
214 from sqlalchemy import select
215 row = (await db_session.execute(
216 select(MusehubIntelStable).where(
217 MusehubIntelStable.repo_id == repo_id,
218 MusehubIntelStable.address == "pkg/core.py::parse_frame",
219 )
220 )).scalar_one_or_none()
221 assert row is not None
222
223 @pytest.mark.asyncio
224 async def test_P1_08_days_stable_value_correct(
225 self, db_session: AsyncSession, stable_repo_with_symbols: MusehubRepo
226 ) -> None:
227 """days_stable ≈ 180 for symbol last changed 180 days ago."""
228 repo_id = stable_repo_with_symbols.repo_id
229 await StableProvider().compute(db_session, repo_id, _REF, {})
230 await db_session.flush()
231 from sqlalchemy import select
232 row = (await db_session.execute(
233 select(MusehubIntelStable).where(
234 MusehubIntelStable.repo_id == repo_id,
235 MusehubIntelStable.address == "pkg/core.py::parse_frame",
236 )
237 )).scalar_one()
238 assert 178 <= row.days_stable <= 182
239
240 @pytest.mark.asyncio
241 async def test_P1_09_since_start_true_for_zero_lifetime_churn(
242 self, db_session: AsyncSession, stable_repo_with_symbols: MusehubRepo
243 ) -> None:
244 """Symbol with churn=0 → since_start=True."""
245 repo_id = stable_repo_with_symbols.repo_id
246 await StableProvider().compute(db_session, repo_id, _REF, {})
247 await db_session.flush()
248 from sqlalchemy import select
249 row = (await db_session.execute(
250 select(MusehubIntelStable).where(
251 MusehubIntelStable.repo_id == repo_id,
252 MusehubIntelStable.address == "pkg/codec.py::pack",
253 )
254 )).scalar_one()
255 assert row.since_start is True
256
257 @pytest.mark.asyncio
258 async def test_P1_10_since_start_false_for_nonzero_lifetime_churn(
259 self, db_session: AsyncSession, stable_repo_with_symbols: MusehubRepo
260 ) -> None:
261 """Symbol with churn=3 (but churn_30d=0) → since_start=False."""
262 repo_id = stable_repo_with_symbols.repo_id
263 await StableProvider().compute(db_session, repo_id, _REF, {})
264 await db_session.flush()
265 from sqlalchemy import select
266 row = (await db_session.execute(
267 select(MusehubIntelStable).where(
268 MusehubIntelStable.repo_id == repo_id,
269 MusehubIntelStable.address == "pkg/core.py::parse_frame",
270 )
271 )).scalar_one()
272 assert row.since_start is False
273
274 @pytest.mark.asyncio
275 async def test_P1_11_last_changed_commit_populated(
276 self, db_session: AsyncSession, stable_repo_with_symbols: MusehubRepo
277 ) -> None:
278 """last_changed_commit carries the last_commit_id from symbol_intel."""
279 repo_id = stable_repo_with_symbols.repo_id
280 await StableProvider().compute(db_session, repo_id, _REF, {})
281 await db_session.flush()
282 from sqlalchemy import select
283 row = (await db_session.execute(
284 select(MusehubIntelStable).where(
285 MusehubIntelStable.repo_id == repo_id,
286 MusehubIntelStable.address == "pkg/core.py::parse_frame",
287 )
288 )).scalar_one()
289 assert row.last_changed_commit == _REF
290
291 @pytest.mark.asyncio
292 async def test_P1_12_nonzero_churn_30d_excluded(
293 self, db_session: AsyncSession, stable_repo_with_symbols: MusehubRepo
294 ) -> None:
295 """Symbol with churn_30d=4 must NOT appear in intel_stable."""
296 repo_id = stable_repo_with_symbols.repo_id
297 await StableProvider().compute(db_session, repo_id, _REF, {})
298 await db_session.flush()
299 from sqlalchemy import select
300 row = (await db_session.execute(
301 select(MusehubIntelStable).where(
302 MusehubIntelStable.repo_id == repo_id,
303 MusehubIntelStable.address == "pkg/api.py::handler",
304 )
305 )).scalar_one_or_none()
306 assert row is None
307
308 @pytest.mark.asyncio
309 async def test_P1_13_rerun_updates_days_stable_in_place(
310 self, db_session: AsyncSession, stable_repo_with_symbols: MusehubRepo
311 ) -> None:
312 """Second provider run updates existing row — no duplicate."""
313 repo_id = stable_repo_with_symbols.repo_id
314 await StableProvider().compute(db_session, repo_id, _REF, {})
315 await db_session.flush()
316 await StableProvider().compute(db_session, repo_id, _REF2, {})
317 await db_session.flush()
318 from sqlalchemy import select, func
319 count = (await db_session.execute(
320 select(func.count()).where(
321 MusehubIntelStable.repo_id == repo_id,
322 MusehubIntelStable.address == "pkg/core.py::parse_frame",
323 )
324 )).scalar_one()
325 assert count == 1
326
327 @pytest.mark.asyncio
328 async def test_P1_14_ref_column_updated_on_rerun(
329 self, db_session: AsyncSession, stable_repo_with_symbols: MusehubRepo
330 ) -> None:
331 """Second run with a different ref → ref column reflects the new value."""
332 repo_id = stable_repo_with_symbols.repo_id
333 await StableProvider().compute(db_session, repo_id, _REF, {})
334 await db_session.flush()
335 db_session.expire_all()
336 await StableProvider().compute(db_session, repo_id, _REF2, {})
337 await db_session.flush()
338 from sqlalchemy import select
339 row = (await db_session.execute(
340 select(MusehubIntelStable).where(
341 MusehubIntelStable.repo_id == repo_id,
342 MusehubIntelStable.address == "pkg/core.py::parse_frame",
343 ).execution_options(populate_existing=True)
344 )).scalar_one()
345 assert row.ref == _REF2
346
347
348 # ---------------------------------------------------------------------------
349 # Tier 3 — E2E: seed → provider → verify DB shape
350 # ---------------------------------------------------------------------------
351
352 class TestStableProviderE2E:
353 """End-to-end tests — full seed-to-DB round-trip."""
354
355 @pytest.mark.asyncio
356 async def test_P1_15_row_count_positive(
357 self, db_session: AsyncSession, stable_repo_with_symbols: MusehubRepo
358 ) -> None:
359 """At least one row written after running provider on seeded data."""
360 repo_id = stable_repo_with_symbols.repo_id
361 results = await StableProvider().compute(db_session, repo_id, _REF, {})
362 await db_session.flush()
363 count = results[0][1]["count"] if results else 0
364 assert count > 0
365
366 @pytest.mark.asyncio
367 async def test_P1_16_days_stable_positive(
368 self, db_session: AsyncSession, stable_repo_with_symbols: MusehubRepo
369 ) -> None:
370 """All written rows have days_stable > 0."""
371 from sqlalchemy import select
372 repo_id = stable_repo_with_symbols.repo_id
373 await StableProvider().compute(db_session, repo_id, _REF, {})
374 await db_session.flush()
375 rows = (await db_session.execute(
376 select(MusehubIntelStable).where(
377 MusehubIntelStable.repo_id == repo_id
378 )
379 )).scalars().all()
380 assert all(r.days_stable > 0 for r in rows)
381
382 @pytest.mark.asyncio
383 async def test_P1_17_last_changed_commit_is_sha256_prefixed_or_none(
384 self, db_session: AsyncSession, stable_repo_with_symbols: MusehubRepo
385 ) -> None:
386 """last_changed_commit is either None or starts with 'sha256:'."""
387 from sqlalchemy import select
388 repo_id = stable_repo_with_symbols.repo_id
389 await StableProvider().compute(db_session, repo_id, _REF, {})
390 await db_session.flush()
391 rows = (await db_session.execute(
392 select(MusehubIntelStable).where(
393 MusehubIntelStable.repo_id == repo_id
394 )
395 )).scalars().all()
396 for row in rows:
397 assert row.last_changed_commit is None or row.last_changed_commit.startswith("sha256:")
398
399 @pytest.mark.asyncio
400 async def test_P1_18_since_start_only_when_lifetime_churn_zero(
401 self, db_session: AsyncSession, stable_repo_with_symbols: MusehubRepo
402 ) -> None:
403 """since_start=True only for symbols whose lifetime churn is 0."""
404 from sqlalchemy import select
405 repo_id = stable_repo_with_symbols.repo_id
406 await StableProvider().compute(db_session, repo_id, _REF, {})
407 await db_session.flush()
408 rows = (await db_session.execute(
409 select(MusehubIntelStable).where(
410 MusehubIntelStable.repo_id == repo_id,
411 MusehubIntelStable.since_start == True, # noqa: E712
412 )
413 )).scalars().all()
414 # Only pkg/codec.py::pack has churn=0
415 addresses = {r.address for r in rows}
416 assert "pkg/api.py::handler" not in addresses
417 assert "pkg/core.py::parse_frame" not in addresses
418
419
420 # ---------------------------------------------------------------------------
421 # Tier 4 — Stress: large batch, idempotency
422 # ---------------------------------------------------------------------------
423
424 class TestStableProviderStress:
425 """Stress tests — large symbol counts and repeated runs."""
426
427 @pytest.mark.asyncio
428 async def test_P1_19_500_symbols_all_upserted(
429 self, db_session: AsyncSession, stable_repo: MusehubRepo
430 ) -> None:
431 """500 qualifying symbols all land in intel_stable after one run."""
432 from sqlalchemy import select, func
433 repo_id = stable_repo.repo_id
434 now = datetime.now(timezone.utc)
435 await db_session.commit()
436 for i in range(500):
437 await _seed_symbol(
438 db_session, repo_id,
439 address=f"pkg/mod{i}.py::fn_{i}",
440 churn=0, churn_30d=0, churn_90d=0,
441 last_changed=now - timedelta(days=100 + i),
442 last_commit_id=_REF,
443 )
444 await db_session.commit()
445 await StableProvider().compute(db_session, repo_id, _REF, {})
446 await db_session.flush()
447 count = (await db_session.execute(
448 select(func.count()).where(MusehubIntelStable.repo_id == repo_id)
449 )).scalar_one()
450 assert count == 500
451
452 @pytest.mark.asyncio
453 async def test_P1_20_no_duplicates_after_single_run(
454 self, db_session: AsyncSession, stable_repo: MusehubRepo
455 ) -> None:
456 """No duplicate (repo_id, address) pairs after a single run."""
457 from sqlalchemy import select, func
458 repo_id = stable_repo.repo_id
459 now = datetime.now(timezone.utc)
460 await db_session.commit()
461 for i in range(50):
462 await _seed_symbol(
463 db_session, repo_id,
464 address=f"pkg/dup{i}.py::fn",
465 churn=0, churn_30d=0, churn_90d=0,
466 last_changed=now - timedelta(days=200),
467 last_commit_id=_REF,
468 )
469 await db_session.commit()
470 await StableProvider().compute(db_session, repo_id, _REF, {})
471 await db_session.flush()
472 total = (await db_session.execute(
473 select(func.count()).where(MusehubIntelStable.repo_id == repo_id)
474 )).scalar_one()
475 assert total == 50
476
477 @pytest.mark.asyncio
478 async def test_P1_21_upsert_is_idempotent(
479 self, db_session: AsyncSession, stable_repo: MusehubRepo
480 ) -> None:
481 """Running the provider twice produces the same row count as running once."""
482 from sqlalchemy import select, func
483 repo_id = stable_repo.repo_id
484 now = datetime.now(timezone.utc)
485 await db_session.commit()
486 for i in range(20):
487 await _seed_symbol(
488 db_session, repo_id,
489 address=f"pkg/idem{i}.py::fn",
490 churn=0, churn_30d=0, churn_90d=0,
491 last_changed=now - timedelta(days=150),
492 last_commit_id=_REF,
493 )
494 await db_session.commit()
495 await StableProvider().compute(db_session, repo_id, _REF, {})
496 await db_session.flush()
497 await StableProvider().compute(db_session, repo_id, _REF2, {})
498 await db_session.flush()
499 count = (await db_session.execute(
500 select(func.count()).where(MusehubIntelStable.repo_id == repo_id)
501 )).scalar_one()
502 assert count == 20
503
504
505 # ---------------------------------------------------------------------------
506 # Tier 5 — Data Integrity
507 # ---------------------------------------------------------------------------
508
509 class TestStableProviderDataIntegrity:
510 """Data integrity tests — exclusion rules and uniqueness guarantees."""
511
512 @pytest.mark.asyncio
513 async def test_P1_22_null_last_changed_excluded(
514 self, db_session: AsyncSession, stable_repo: MusehubRepo
515 ) -> None:
516 """Symbols with last_changed=NULL are not written to intel_stable."""
517 from sqlalchemy import select, func
518 repo_id = stable_repo.repo_id
519 await db_session.commit()
520 await _seed_symbol(
521 db_session, repo_id,
522 address="pkg/null.py::fn",
523 churn=0, churn_30d=0, churn_90d=0,
524 last_changed=None,
525 last_commit_id=None,
526 )
527 await db_session.commit()
528 await StableProvider().compute(db_session, repo_id, _REF, {})
529 await db_session.flush()
530 count = (await db_session.execute(
531 select(func.count()).where(MusehubIntelStable.repo_id == repo_id)
532 )).scalar_one()
533 assert count == 0
534
535 @pytest.mark.asyncio
536 async def test_P1_23_nonzero_churn_90d_excluded(
537 self, db_session: AsyncSession, stable_repo: MusehubRepo
538 ) -> None:
539 """Symbol with churn_90d > 0 is excluded even if churn_30d = 0."""
540 from sqlalchemy import select, func
541 repo_id = stable_repo.repo_id
542 now = datetime.now(timezone.utc)
543 await db_session.commit()
544 await _seed_symbol(
545 db_session, repo_id,
546 address="pkg/slow.py::fn",
547 churn=5, churn_30d=0, churn_90d=2,
548 last_changed=now - timedelta(days=45),
549 last_commit_id=_REF,
550 )
551 await db_session.commit()
552 await StableProvider().compute(db_session, repo_id, _REF, {})
553 await db_session.flush()
554 count = (await db_session.execute(
555 select(func.count()).where(MusehubIntelStable.repo_id == repo_id)
556 )).scalar_one()
557 assert count == 0
558
559 @pytest.mark.asyncio
560 async def test_P1_24_address_unique_per_repo(
561 self, db_session: AsyncSession, stable_repo: MusehubRepo
562 ) -> None:
563 """(repo_id, address) primary key — two repos can share the same address."""
564 from sqlalchemy import select, func
565 repo_id = stable_repo.repo_id
566 repo2 = await create_repo(db_session, owner=_OWNER, slug="stableprovider2")
567 repo_id2 = repo2.repo_id
568 now = datetime.now(timezone.utc)
569 await db_session.commit()
570 for rid in (repo_id, repo_id2):
571 await _seed_symbol(
572 db_session, rid,
573 address="shared/utils.py::parse",
574 churn=0, churn_30d=0, churn_90d=0,
575 last_changed=now - timedelta(days=200),
576 last_commit_id=_REF,
577 )
578 await db_session.commit()
579 await StableProvider().compute(db_session, repo_id, _REF, {})
580 await StableProvider().compute(db_session, repo_id2, _REF, {})
581 await db_session.flush()
582 count = (await db_session.execute(
583 select(func.count()).where(
584 MusehubIntelStable.address == "shared/utils.py::parse"
585 )
586 )).scalar_one()
587 assert count == 2
588
589
590 # ---------------------------------------------------------------------------
591 # Tier 6 — Performance
592 # ---------------------------------------------------------------------------
593
594 class TestStableProviderPerformance:
595 """Performance tests — batch timing bounds for production-scale data."""
596
597 @pytest.mark.asyncio
598 async def test_P1_25_1000_row_batch_under_5s(
599 self, db_session: AsyncSession, stable_repo: MusehubRepo
600 ) -> None:
601 """First-run upsert of 1000 symbols completes in under 5 seconds."""
602 repo_id = stable_repo.repo_id
603 now = datetime.now(timezone.utc)
604 await db_session.commit()
605 for i in range(1000):
606 await _seed_symbol(
607 db_session, repo_id,
608 address=f"pkg/perf{i}.py::fn",
609 churn=0, churn_30d=0, churn_90d=0,
610 last_changed=now - timedelta(days=100 + (i % 900)),
611 last_commit_id=_REF,
612 )
613 await db_session.commit()
614 start = time.monotonic()
615 await StableProvider().compute(db_session, repo_id, _REF, {})
616 await db_session.flush()
617 elapsed = time.monotonic() - start
618 assert elapsed < 5.0, f"First run took {elapsed:.2f}s — expected < 5s"
619
620 @pytest.mark.asyncio
621 async def test_P1_26_second_run_all_conflicts_under_5s(
622 self, db_session: AsyncSession, stable_repo: MusehubRepo
623 ) -> None:
624 """Second run (all-conflict upsert path) also completes in under 5 seconds."""
625 repo_id = stable_repo.repo_id
626 now = datetime.now(timezone.utc)
627 await db_session.commit()
628 for i in range(1000):
629 await _seed_symbol(
630 db_session, repo_id,
631 address=f"pkg/perf2_{i}.py::fn",
632 churn=0, churn_30d=0, churn_90d=0,
633 last_changed=now - timedelta(days=100 + (i % 900)),
634 last_commit_id=_REF,
635 )
636 await db_session.commit()
637 await StableProvider().compute(db_session, repo_id, _REF, {})
638 await db_session.flush()
639 start = time.monotonic()
640 await StableProvider().compute(db_session, repo_id, _REF2, {})
641 await db_session.flush()
642 elapsed = time.monotonic() - start
643 assert elapsed < 5.0, f"Second run took {elapsed:.2f}s — expected < 5s"
644
645
646 # ---------------------------------------------------------------------------
647 # Tier 7 — Security
648 # ---------------------------------------------------------------------------
649
650 class TestStableProviderSecurity:
651 """Security tests — injection safety and repo isolation."""
652
653 @pytest.mark.asyncio
654 async def test_P1_27_sql_injection_in_address_stored_verbatim(
655 self, db_session: AsyncSession, stable_repo: MusehubRepo
656 ) -> None:
657 """Malicious address string is stored as plain text — not executed."""
658 from sqlalchemy import select
659 repo_id = stable_repo.repo_id
660 injection = "'; DROP TABLE musehub_intel_stable; --"
661 now = datetime.now(timezone.utc)
662 await db_session.commit()
663 await _seed_symbol(
664 db_session, repo_id,
665 address=injection,
666 churn=0, churn_30d=0, churn_90d=0,
667 last_changed=now - timedelta(days=100),
668 last_commit_id=_REF,
669 )
670 await db_session.commit()
671 await StableProvider().compute(db_session, repo_id, _REF, {})
672 await db_session.flush()
673 # Table must still exist and contain the verbatim string
674 row = (await db_session.execute(
675 select(MusehubIntelStable).where(
676 MusehubIntelStable.repo_id == repo_id,
677 MusehubIntelStable.address == injection,
678 )
679 )).scalar_one_or_none()
680 assert row is not None
681 assert row.address == injection
682
683 @pytest.mark.asyncio
684 async def test_P1_28_repo_isolation(
685 self, db_session: AsyncSession, stable_repo: MusehubRepo
686 ) -> None:
687 """Running provider for repo A does not write rows for repo B."""
688 from sqlalchemy import select, func
689 repo_id_a = stable_repo.repo_id
690 repo_b = await create_repo(db_session, owner=_OWNER, slug="stableisolation")
691 repo_id_b = repo_b.repo_id
692 now = datetime.now(timezone.utc)
693 await db_session.commit()
694 # Seed only repo A
695 await _seed_symbol(
696 db_session, repo_id_a,
697 address="shared/fn.py::do_work",
698 churn=0, churn_30d=0, churn_90d=0,
699 last_changed=now - timedelta(days=120),
700 last_commit_id=_REF,
701 )
702 await db_session.commit()
703 await StableProvider().compute(db_session, repo_id_a, _REF, {})
704 await db_session.flush()
705 # repo B must have zero rows
706 count_b = (await db_session.execute(
707 select(func.count()).where(MusehubIntelStable.repo_id == repo_id_b)
708 )).scalar_one()
709 assert count_b == 0
File History 2 commits
sha256:4992098130166d191cefed0a2821d19cd3cdd3cf50867a4e715c2b30636826c7 fix: repair syntax errors from typing annotation cleanup Sonnet 4.6 20 days ago
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor 20 days ago