"""TDD — _walk_commit_delta scale regression (issue #60). L-tier bench (1000 commits) fails: wire_fetch_mpack returns mpack_id='' because _walk_commit_delta returns {} for a 1000-commit chain. M-tier (100 commits) passes. These tests confirm _walk_commit_delta returns the full delta at any scale: WD-1 Single commit, have=[] → returns 1 commit. WD-2 100-commit linear chain, have=[] → returns 100 commits. WD-3 1000-commit linear chain, have=[] → returns 1000 commits. ← RED at L-tier WD-4 have cuts the chain — only commits above have are returned. """ from __future__ import annotations import pytest import msgpack from datetime import datetime, timezone from muse.core.types import fake_id from musehub.db import musehub_repo_models as db from musehub.db.musehub_repo_models import MusehubCommitGraph from sqlalchemy.ext.asyncio import AsyncSession def _now() -> datetime: return datetime.now(tz=timezone.utc) async def _build_chain(session: AsyncSession, n: int) -> list[str]: """Insert a linear chain of N commits. Returns commit_ids tip→root order.""" commit_ids: list[str] = [] parent: list[str] = [] for i in range(n): snap_id = fake_id(f"snap-wdscale-{i}") snap = db.MusehubSnapshot( snapshot_id=snap_id, manifest_blob=msgpack.packb({f"file{i}.txt": fake_id(f"obj-{i}")}, use_bin_type=True), directories=[], entry_count=1, created_at=_now(), ) session.add(snap) cid = fake_id(f"commit-wdscale-{i}") commit = db.MusehubCommit( commit_id=cid, branch="main", parent_ids=parent, message=f"commit {i}", author="gabriel", timestamp=_now(), snapshot_id=snap_id, ) session.add(commit) session.add(MusehubCommitGraph( commit_id=cid, parent_ids=parent, generation=i, snapshot_id=snap_id, )) commit_ids.append(cid) parent = [cid] await session.commit() return commit_ids # [root, ..., tip] # ── WD-1 ────────────────────────────────────────────────────────────────────── @pytest.mark.asyncio async def test_wd1_single_commit(db_session: AsyncSession) -> None: """Single commit, have=[] → _walk_commit_delta returns exactly that commit.""" from musehub.services.musehub_wire import _walk_commit_delta ids = await _build_chain(db_session, 1) result = await _walk_commit_delta(db_session, want=[ids[-1]], have=[]) assert len(result) == 1 assert ids[-1] in result # ── WD-2 ────────────────────────────────────────────────────────────────────── @pytest.mark.asyncio async def test_wd2_hundred_commit_chain(db_session: AsyncSession) -> None: """100-commit linear chain, have=[] → _walk_commit_delta returns all 100 commits.""" from musehub.services.musehub_wire import _walk_commit_delta ids = await _build_chain(db_session, 100) result = await _walk_commit_delta(db_session, want=[ids[-1]], have=[]) assert len(result) == 100, f"expected 100 commits, got {len(result)}" # ── WD-3 ────────────────────────────────────────────────────────────────────── @pytest.mark.asyncio async def test_wd3_thousand_commit_chain(db_session: AsyncSession) -> None: """1000-commit linear chain, have=[] → _walk_commit_delta returns all 1000 commits. This is the L-tier regression: the bench push succeeds (commits exist in DB) but clone/fetch/pull return mpack_id='' because _walk_commit_delta returns {}. """ from musehub.services.musehub_wire import _walk_commit_delta ids = await _build_chain(db_session, 1_000) result = await _walk_commit_delta(db_session, want=[ids[-1]], have=[]) assert len(result) == 1_000, f"expected 1000 commits, got {len(result)}" # ── WD-4 ────────────────────────────────────────────────────────────────────── @pytest.mark.asyncio async def test_wd4_have_cuts_delta(db_session: AsyncSession) -> None: """have=[root] → only commits above root are returned.""" from musehub.services.musehub_wire import _walk_commit_delta ids = await _build_chain(db_session, 10) # ids[0] = root, ids[9] = tip; have=[ids[4]] means commits 5–9 are needed result = await _walk_commit_delta(db_session, want=[ids[-1]], have=[ids[4]]) assert len(result) == 5, f"expected 5 commits above have, got {len(result)}" for cid in ids[5:]: assert cid in result for cid in ids[:5]: assert cid not in result