gabriel / musehub public
test_file_last_commits.py python
398 lines 16.7 KB
Raw
sha256:94ef169c149a452bff7c604ded8b280b19bd477c2dabcb56972780b0b784c7aa Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: As… Human 1 day ago
1 """
2 Tests for materialized per-file last-commit data.
3
4 FLC1 — get_file_last_commits returns empty dict when table has no rows for repo
5 FLC2 — compute_and_store_file_last_commits populates table from commit history
6 FLC3 — get_file_last_commits reads from table (single query, no blob decode)
7 FLC4 — file changed in newer commit → attributed to newer commit
8 FLC5 — file unchanged since first commit → attributed to oldest commit
9 FLC6 — directory path returns the commit of its most-recently-changed file
10 FLC7 — compute is idempotent: running twice gives same result, no duplicates
11 FLC8 — only paths requested are returned (no extra rows leaked)
12 FLC9 — unknown paths return no entry (no crash)
13 FLC10 — second push updates attribution for files that changed
14 FLC11 — more than 100 commits does not raise (batch chunking)
15 """
16 from __future__ import annotations
17
18 import hashlib
19 from collections.abc import Mapping
20 from datetime import datetime, timezone, timedelta
21
22 import pytest
23 from sqlalchemy.ext.asyncio import AsyncSession
24 from sqlalchemy import select
25
26 from musehub.db.musehub_intel_models import MusehubFileLastCommit
27 from musehub.db.musehub_repo_models import MusehubCommit, MusehubCommitRef, MusehubSnapshot, MusehubSnapshotRef
28 from tests.factories import create_repo, create_branch
29
30
31 # ---------------------------------------------------------------------------
32 # Helpers
33 # ---------------------------------------------------------------------------
34
35 def _utc(offset_days: int = 0) -> datetime:
36 return datetime.now(tz=timezone.utc) + timedelta(days=offset_days)
37
38
39 def _snap_id(name: str) -> str:
40 return "sha256:" + hashlib.sha256(name.encode()).hexdigest()
41
42
43 def _commit_id(name: str) -> str:
44 return "sha256:" + hashlib.sha256(f"commit:{name}".encode()).hexdigest()
45
46
47 def _obj_id(name: str) -> str:
48 return "sha256:" + hashlib.sha256(f"obj:{name}".encode()).hexdigest()
49
50
51 async def _add_snapshot(session: AsyncSession, repo_id: str, snap_name: str, manifest: Mapping[str, str]) -> str:
52 """Store a snapshot with manifest blob."""
53 import msgpack
54 snap_id = _snap_id(snap_name)
55 existing = await session.get(MusehubSnapshot, snap_id)
56 if existing is None:
57 session.add(MusehubSnapshot(
58 snapshot_id=snap_id,
59 directories=[],
60 manifest_blob=msgpack.packb(manifest, use_bin_type=True),
61 entry_count=len(manifest),
62 created_at=_utc(),
63 ))
64 session.add(MusehubSnapshotRef(repo_id=repo_id, snapshot_id=snap_id))
65 await session.flush()
66 return snap_id
67
68
69 async def _add_commit(
70 session: AsyncSession,
71 repo_id: str,
72 name: str,
73 snap_name: str,
74 manifest: dict[str, str],
75 branch: str = "main",
76 ts_offset: int = 0,
77 agent_id: str = "",
78 message: str = "",
79 parent: "MusehubCommit | None" = None,
80 ) -> MusehubCommit:
81 snap_id = await _add_snapshot(session, repo_id, snap_name, manifest)
82 commit = MusehubCommit(
83 commit_id=_commit_id(name),
84 branch=branch,
85 parent_ids=[parent.commit_id] if parent else [],
86 message=message or f"commit {name}",
87 author="gabriel",
88 timestamp=_utc(ts_offset),
89 snapshot_id=snap_id,
90 agent_id=agent_id,
91 )
92 session.add(commit)
93 session.add(MusehubCommitRef(repo_id=repo_id, commit_id=commit.commit_id))
94 await session.flush()
95 return commit
96
97
98 # ---------------------------------------------------------------------------
99 # FLC1 — empty table → empty result
100 # ---------------------------------------------------------------------------
101
102 @pytest.mark.asyncio
103 async def test_flc1_empty_table_returns_empty(db_session: AsyncSession) -> None:
104 """FLC1: no rows in table → empty dict, no crash."""
105 from musehub.services.musehub_repository import get_file_last_commits
106
107 repo = await create_repo(db_session)
108 result = await get_file_last_commits(db_session, repo.repo_id, ["README.md"], ref="main")
109 assert result == {}
110
111
112 # ---------------------------------------------------------------------------
113 # FLC2 — compute populates table
114 # ---------------------------------------------------------------------------
115
116 @pytest.mark.asyncio
117 async def test_flc2_compute_populates_table(db_session: AsyncSession) -> None:
118 """FLC2: compute_and_store_file_last_commits writes rows to musehub_file_last_commits."""
119 from musehub.services.file_last_commits import compute_and_store_file_last_commits
120
121 repo = await create_repo(db_session)
122 branch = await create_branch(db_session, repo.repo_id, name="main")
123
124 manifest = {"README.md": _obj_id("readme"), "src/app.py": _obj_id("app")}
125 commit = await _add_commit(db_session, repo.repo_id, "c1", "s1", manifest)
126 branch.head_commit_id = commit.commit_id
127 await db_session.flush()
128
129 await compute_and_store_file_last_commits(db_session, repo.repo_id, "main", commit.commit_id)
130 await db_session.flush()
131
132 rows = (await db_session.execute(
133 select(MusehubFileLastCommit).where(
134 MusehubFileLastCommit.repo_id == repo.repo_id,
135 MusehubFileLastCommit.branch == "main",
136 )
137 )).scalars().all()
138
139 paths = {r.path for r in rows}
140 assert "README.md" in paths
141 assert "src/app.py" in paths
142
143
144 # ---------------------------------------------------------------------------
145 # FLC3 — get_file_last_commits reads from table
146 # ---------------------------------------------------------------------------
147
148 @pytest.mark.asyncio
149 async def test_flc3_reads_from_table(db_session: AsyncSession) -> None:
150 """FLC3: after compute, get_file_last_commits returns data without blob decode."""
151 from musehub.services.file_last_commits import compute_and_store_file_last_commits
152 from musehub.services.musehub_repository import get_file_last_commits
153
154 repo = await create_repo(db_session)
155 branch = await create_branch(db_session, repo.repo_id, name="main")
156
157 manifest = {"README.md": _obj_id("readme-v1")}
158 commit = await _add_commit(db_session, repo.repo_id, "c1", "s1", manifest, message="feat: init")
159 branch.head_commit_id = commit.commit_id
160 await db_session.flush()
161
162 await compute_and_store_file_last_commits(db_session, repo.repo_id, "main", commit.commit_id)
163 await db_session.flush()
164
165 result = await get_file_last_commits(db_session, repo.repo_id, ["README.md"], ref="main")
166
167 assert "README.md" in result
168 assert result["README.md"]["sha"] == commit.commit_id
169 assert result["README.md"]["message"] == "feat: init"
170
171
172 # ---------------------------------------------------------------------------
173 # FLC4 — changed file attributed to newer commit
174 # ---------------------------------------------------------------------------
175
176 @pytest.mark.asyncio
177 async def test_flc4_changed_file_attributed_to_newer_commit(db_session: AsyncSession) -> None:
178 """FLC4: file that changed in commit 2 is attributed to commit 2, not commit 1."""
179 from musehub.services.file_last_commits import compute_and_store_file_last_commits
180 from musehub.services.musehub_repository import get_file_last_commits
181
182 repo = await create_repo(db_session)
183 branch = await create_branch(db_session, repo.repo_id, name="main")
184
185 manifest_v1 = {"README.md": _obj_id("readme-v1"), "src/app.py": _obj_id("app-v1")}
186 manifest_v2 = {"README.md": _obj_id("readme-v2"), "src/app.py": _obj_id("app-v1")}
187
188 c1 = await _add_commit(db_session, repo.repo_id, "c1", "s1", manifest_v1, ts_offset=-1)
189 c2 = await _add_commit(db_session, repo.repo_id, "c2", "s2", manifest_v2, ts_offset=0, parent=c1)
190 branch.head_commit_id = c2.commit_id
191 await db_session.flush()
192
193 await compute_and_store_file_last_commits(db_session, repo.repo_id, "main", c2.commit_id)
194 await db_session.flush()
195
196 result = await get_file_last_commits(db_session, repo.repo_id, ["README.md", "src/app.py"], ref="main")
197
198 assert result["README.md"]["sha"] == c2.commit_id
199 assert result["src/app.py"]["sha"] == c1.commit_id
200
201
202 # ---------------------------------------------------------------------------
203 # FLC5 — unchanged file attributed to first commit
204 # ---------------------------------------------------------------------------
205
206 @pytest.mark.asyncio
207 async def test_flc5_unchanged_file_attributed_to_oldest_commit(db_session: AsyncSession) -> None:
208 """FLC5: file never changed is attributed to the oldest commit in the walk."""
209 from musehub.services.file_last_commits import compute_and_store_file_last_commits
210 from musehub.services.musehub_repository import get_file_last_commits
211
212 repo = await create_repo(db_session)
213 branch = await create_branch(db_session, repo.repo_id, name="main")
214
215 oid = _obj_id("stable")
216 c1 = await _add_commit(db_session, repo.repo_id, "c1", "s1", {"stable.py": oid}, ts_offset=-2)
217 c2 = await _add_commit(db_session, repo.repo_id, "c2", "s2", {"stable.py": oid}, ts_offset=-1, parent=c1)
218 c3 = await _add_commit(db_session, repo.repo_id, "c3", "s3", {"stable.py": oid}, ts_offset=0, parent=c2)
219 branch.head_commit_id = c3.commit_id
220 await db_session.flush()
221
222 await compute_and_store_file_last_commits(db_session, repo.repo_id, "main", c3.commit_id)
223 await db_session.flush()
224
225 result = await get_file_last_commits(db_session, repo.repo_id, ["stable.py"], ref="main")
226 assert result["stable.py"]["sha"] == c1.commit_id
227
228
229 # ---------------------------------------------------------------------------
230 # FLC6 — directory path → most-recently-changed file in dir
231 # ---------------------------------------------------------------------------
232
233 @pytest.mark.asyncio
234 async def test_flc6_directory_attributed_to_most_recent_child_commit(db_session: AsyncSession) -> None:
235 """FLC6: directory path resolves to the commit that last touched any file inside it."""
236 from musehub.services.file_last_commits import compute_and_store_file_last_commits
237 from musehub.services.musehub_repository import get_file_last_commits
238
239 repo = await create_repo(db_session)
240 branch = await create_branch(db_session, repo.repo_id, name="main")
241
242 m1 = {"src/a.py": _obj_id("a-v1"), "src/b.py": _obj_id("b-v1")}
243 m2 = {"src/a.py": _obj_id("a-v1"), "src/b.py": _obj_id("b-v2")}
244
245 c1 = await _add_commit(db_session, repo.repo_id, "c1", "s1", m1, ts_offset=-1)
246 c2 = await _add_commit(db_session, repo.repo_id, "c2", "s2", m2, ts_offset=0)
247 branch.head_commit_id = c2.commit_id
248 await db_session.flush()
249
250 await compute_and_store_file_last_commits(db_session, repo.repo_id, "main", c2.commit_id)
251 await db_session.flush()
252
253 result = await get_file_last_commits(db_session, repo.repo_id, ["src"], ref="main")
254 assert result["src"]["sha"] == c2.commit_id
255
256
257 # ---------------------------------------------------------------------------
258 # FLC7 — idempotent
259 # ---------------------------------------------------------------------------
260
261 @pytest.mark.asyncio
262 async def test_flc7_compute_is_idempotent(db_session: AsyncSession) -> None:
263 """FLC7: running compute twice yields same result, no duplicate rows."""
264 from musehub.services.file_last_commits import compute_and_store_file_last_commits
265
266 repo = await create_repo(db_session)
267 branch = await create_branch(db_session, repo.repo_id, name="main")
268
269 commit = await _add_commit(db_session, repo.repo_id, "c1", "s1", {"a.py": _obj_id("a")})
270 branch.head_commit_id = commit.commit_id
271 await db_session.flush()
272
273 await compute_and_store_file_last_commits(db_session, repo.repo_id, "main", commit.commit_id)
274 await compute_and_store_file_last_commits(db_session, repo.repo_id, "main", commit.commit_id)
275 await db_session.flush()
276
277 rows = (await db_session.execute(
278 select(MusehubFileLastCommit).where(
279 MusehubFileLastCommit.repo_id == repo.repo_id,
280 MusehubFileLastCommit.branch == "main",
281 MusehubFileLastCommit.path == "a.py",
282 )
283 )).scalars().all()
284 assert len(rows) == 1
285
286
287 # ---------------------------------------------------------------------------
288 # FLC8 — only requested paths returned
289 # ---------------------------------------------------------------------------
290
291 @pytest.mark.asyncio
292 async def test_flc8_only_requested_paths_returned(db_session: AsyncSession) -> None:
293 """FLC8: get_file_last_commits returns only the paths asked for."""
294 from musehub.services.file_last_commits import compute_and_store_file_last_commits
295 from musehub.services.musehub_repository import get_file_last_commits
296
297 repo = await create_repo(db_session)
298 branch = await create_branch(db_session, repo.repo_id, name="main")
299
300 manifest = {"a.py": _obj_id("a"), "b.py": _obj_id("b"), "c.py": _obj_id("c")}
301 commit = await _add_commit(db_session, repo.repo_id, "c1", "s1", manifest)
302 branch.head_commit_id = commit.commit_id
303 await db_session.flush()
304
305 await compute_and_store_file_last_commits(db_session, repo.repo_id, "main", commit.commit_id)
306 await db_session.flush()
307
308 result = await get_file_last_commits(db_session, repo.repo_id, ["a.py"], ref="main")
309 assert set(result.keys()) == {"a.py"}
310
311
312 # ---------------------------------------------------------------------------
313 # FLC9 — unknown paths return no entry
314 # ---------------------------------------------------------------------------
315
316 @pytest.mark.asyncio
317 async def test_flc9_unknown_paths_not_in_result(db_session: AsyncSession) -> None:
318 """FLC9: paths not in any snapshot are silently absent from result."""
319 from musehub.services.file_last_commits import compute_and_store_file_last_commits
320 from musehub.services.musehub_repository import get_file_last_commits
321
322 repo = await create_repo(db_session)
323 branch = await create_branch(db_session, repo.repo_id, name="main")
324
325 commit = await _add_commit(db_session, repo.repo_id, "c1", "s1", {"real.py": _obj_id("r")})
326 branch.head_commit_id = commit.commit_id
327 await db_session.flush()
328
329 await compute_and_store_file_last_commits(db_session, repo.repo_id, "main", commit.commit_id)
330 await db_session.flush()
331
332 result = await get_file_last_commits(db_session, repo.repo_id, ["ghost.py"], ref="main")
333 assert "ghost.py" not in result
334
335
336 # ---------------------------------------------------------------------------
337 # FLC10 — second push updates changed files
338 # ---------------------------------------------------------------------------
339
340 @pytest.mark.asyncio
341 async def test_flc10_second_push_updates_changed_files(db_session: AsyncSession) -> None:
342 """FLC10: after a second push, files that changed point to the new commit."""
343 from musehub.services.file_last_commits import compute_and_store_file_last_commits
344 from musehub.services.musehub_repository import get_file_last_commits
345
346 repo = await create_repo(db_session)
347 branch = await create_branch(db_session, repo.repo_id, name="main")
348
349 m1 = {"README.md": _obj_id("readme-v1")}
350 m2 = {"README.md": _obj_id("readme-v2")}
351
352 c1 = await _add_commit(db_session, repo.repo_id, "c1", "s1", m1, ts_offset=-1)
353 branch.head_commit_id = c1.commit_id
354 await db_session.flush()
355 await compute_and_store_file_last_commits(db_session, repo.repo_id, "main", c1.commit_id)
356 await db_session.flush()
357
358 c2 = await _add_commit(db_session, repo.repo_id, "c2", "s2", m2, ts_offset=0)
359 branch.head_commit_id = c2.commit_id
360 await db_session.flush()
361 await compute_and_store_file_last_commits(db_session, repo.repo_id, "main", c2.commit_id)
362 await db_session.flush()
363
364 result = await get_file_last_commits(db_session, repo.repo_id, ["README.md"], ref="main")
365 assert result["README.md"]["sha"] == c2.commit_id
366
367
368 # ---------------------------------------------------------------------------
369 # FLC11 — more than 100 commits does not raise
370 # ---------------------------------------------------------------------------
371
372 @pytest.mark.asyncio
373 async def test_flc11_over_100_commits_does_not_raise(db_session: AsyncSession) -> None:
374 """FLC11: repos with >100 commits must not raise ValueError from batch limit."""
375 from musehub.services.file_last_commits import compute_and_store_file_last_commits
376 from musehub.services.musehub_repository import get_file_last_commits
377
378 repo = await create_repo(db_session)
379 branch = await create_branch(db_session, repo.repo_id, name="main")
380
381 n = 150
382 last_commit = None
383 for i in range(n):
384 manifest = {"src/app.py": _obj_id(f"app-v{i}")}
385 last_commit = await _add_commit(
386 db_session, repo.repo_id, f"c{i}", f"s{i}", manifest, ts_offset=i - n
387 )
388 branch.head_commit_id = last_commit.commit_id # type: ignore[union-attr]
389 await db_session.flush()
390
391 # Must not raise ValueError: batch size N exceeds limit 100
392 await compute_and_store_file_last_commits(
393 db_session, repo.repo_id, "main", last_commit.commit_id # type: ignore[union-attr]
394 )
395 await db_session.flush()
396
397 result = await get_file_last_commits(db_session, repo.repo_id, ["src/app.py"], ref="main")
398 assert "src/app.py" in result
File History 3 commits
sha256:94ef169c149a452bff7c604ded8b280b19bd477c2dabcb56972780b0b784c7aa Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: As… Human 1 day ago
sha256:6b1949fc2797ca4c1936a637a4cbfec828ef56cf52398a2e74ca3c4f494e728f fix: use wire_bytes not mpack_bytes_raw in compute_object_b… Sonnet 4.6 patch 9 days ago
sha256:4aed3d8601c8dd3ed37074de35f11f4a9699a0a4b99d43727048fd3f8e6fd13d chore: doc sweep, ignore wrangler build state, misc fixes Sonnet 4.6 minor 12 days ago