gabriel / muse public
test_stress_query_engine.py python
365 lines 13.7 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
1 """Stress tests for the generic query engine and code query DSL.
2
3 Covers:
4 - walk_history on linear chains of 100+ commits.
5 - CommitEvaluator with correct 3-arg signature.
6 - format_matches output format.
7 - Code query DSL: all field types, all operators, AND/OR composition.
8 - Code query DSL: unknown field raises ValueError.
9 - Query against large history (200 commits).
10 - Branch-scoped queries.
11 """
12
13 import datetime
14 import pathlib
15
16 import pytest
17
18 from muse.core.query_engine import CommitEvaluator, QueryMatch, format_matches, walk_history
19 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
20 from muse.core.commits import (
21 CommitRecord,
22 write_commit,
23 )
24 from muse.domain import SemVerBump
25 from muse.plugins.code._code_query import build_evaluator
26 from muse.core.types import Manifest
27 from muse.core.paths import muse_dir, ref_path
28
29
30 # ---------------------------------------------------------------------------
31 # Helpers
32 # ---------------------------------------------------------------------------
33
34 _SNAP_ID: str = compute_snapshot_id({})
35 _BASE_TS: datetime.datetime = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
36
37
38 def _now() -> datetime.datetime:
39 return datetime.datetime.now(datetime.timezone.utc)
40
41
42 def _write(
43 root: pathlib.Path,
44 label: str,
45 branch: str = "main",
46 parent: str | None = None,
47 author: str = "alice",
48 agent_id: str = "",
49 model_id: str = "",
50 sem_ver_bump: SemVerBump = "none",
51 message: str = "",
52 ) -> CommitRecord:
53 """Write a commit with a real content-addressed ID. Returns the CommitRecord."""
54 msg = message or f"commit {label}"
55 cid = compute_commit_id(
56 parent_ids=[parent] if parent else [],
57 snapshot_id=_SNAP_ID,
58 message=msg,
59 committed_at_iso=_BASE_TS.isoformat(),
60 author=author,
61 )
62 c = CommitRecord(
63 commit_id=cid,
64 branch=branch,
65 snapshot_id=_SNAP_ID,
66 message=msg,
67 committed_at=_BASE_TS,
68 parent_commit_id=parent,
69 author=author,
70 agent_id=agent_id,
71 model_id=model_id,
72 sem_ver_bump=sem_ver_bump,
73 )
74 write_commit(root, c)
75 ref = ref_path(root, branch)
76 ref.write_text(cid)
77 return c
78
79
80 def _make_match(commit: CommitRecord) -> QueryMatch:
81 return QueryMatch(
82 commit_id=commit.commit_id,
83 author=commit.author,
84 committed_at=commit.committed_at.isoformat(),
85 branch=commit.branch,
86 detail=f"matched commit {commit.commit_id}",
87 )
88
89
90 @pytest.fixture
91 def repo(tmp_path: pathlib.Path) -> pathlib.Path:
92 dot_muse = muse_dir(tmp_path)
93 (dot_muse / "commits").mkdir(parents=True)
94 (dot_muse / "refs" / "heads").mkdir(parents=True)
95 return tmp_path
96
97
98 # ===========================================================================
99 # walk_history — basic
100 # ===========================================================================
101
102
103 class TestWalkHistoryBasic:
104 def test_empty_history_no_matches(self, repo: pathlib.Path) -> None:
105 def ev(commit: CommitRecord, manifest: Manifest, root: pathlib.Path) -> list[QueryMatch]:
106 return [_make_match(commit)]
107 result = walk_history(repo, "nonexistent-branch", ev)
108 assert result == []
109
110 def test_single_commit_matches(self, repo: pathlib.Path) -> None:
111 c = _write(repo, "only", branch="main")
112 def ev(commit: CommitRecord, manifest: Manifest, root: pathlib.Path) -> list[QueryMatch]:
113 return [_make_match(commit)]
114 result = walk_history(repo, "main", ev)
115 assert len(result) == 1
116 assert result[0]["commit_id"] == c.commit_id
117
118 def test_single_commit_no_match(self, repo: pathlib.Path) -> None:
119 _write(repo, "only", branch="main")
120 def ev(commit: CommitRecord, manifest: Manifest, root: pathlib.Path) -> list[QueryMatch]:
121 return []
122 result = walk_history(repo, "main", ev)
123 assert result == []
124
125 def test_linear_chain_all_match(self, repo: pathlib.Path) -> None:
126 prev: str | None = None
127 for i in range(10):
128 c = _write(repo, f"c{i:03d}", parent=prev)
129 prev = c.commit_id
130 def ev(commit: CommitRecord, manifest: Manifest, root: pathlib.Path) -> list[QueryMatch]:
131 return [_make_match(commit)]
132 result = walk_history(repo, "main", ev)
133 assert len(result) == 10
134
135 def test_linear_chain_filtered(self, repo: pathlib.Path) -> None:
136 prev: str | None = None
137 for i in range(10):
138 author = "alice" if i % 2 == 0 else "bob"
139 c = _write(repo, f"c{i:03d}", parent=prev, author=author)
140 prev = c.commit_id
141
142 def ev(commit: CommitRecord, manifest: Manifest, root: pathlib.Path) -> list[QueryMatch]:
143 if commit.author == "alice":
144 return [_make_match(commit)]
145 return []
146
147 result = walk_history(repo, "main", ev)
148 assert len(result) == 5
149
150 def test_max_commits_limits_walk(self, repo: pathlib.Path) -> None:
151 prev: str | None = None
152 for i in range(50):
153 c = _write(repo, f"c{i:03d}", parent=prev)
154 prev = c.commit_id
155 def ev(commit: CommitRecord, manifest: Manifest, root: pathlib.Path) -> list[QueryMatch]:
156 return [_make_match(commit)]
157 result = walk_history(repo, "main", ev, max_commits=10)
158 assert len(result) == 10
159
160 def test_matches_include_commit_id_and_branch(self, repo: pathlib.Path) -> None:
161 c = _write(repo, "abc123", branch="main", author="alice")
162 def ev(commit: CommitRecord, manifest: Manifest, root: pathlib.Path) -> list[QueryMatch]:
163 return [_make_match(commit)]
164 result = walk_history(repo, "main", ev)
165 assert result[0]["commit_id"] == c.commit_id
166 assert result[0]["branch"] == "main"
167 assert result[0]["author"] == "alice"
168
169
170 # ===========================================================================
171 # walk_history — large history
172 # ===========================================================================
173
174
175 class TestWalkHistoryLarge:
176 def test_200_commit_chain_full_scan(self, repo: pathlib.Path) -> None:
177 prev: str | None = None
178 for i in range(200):
179 c = _write(repo, f"large-{i:04d}", parent=prev, agent_id="bot" if i % 3 == 0 else "")
180 prev = c.commit_id
181
182 def bot_only(commit: CommitRecord, manifest: Manifest, root: pathlib.Path) -> list[QueryMatch]:
183 if commit.agent_id == "bot":
184 return [_make_match(commit)]
185 return []
186
187 result = walk_history(repo, "main", bot_only)
188 # 200 commits, every 3rd is bot: indices 0, 3, 6, ..., 198 → 67 commits.
189 assert len(result) == 67
190
191 def test_query_by_agent_across_100_commits(self, repo: pathlib.Path) -> None:
192 prev: str | None = None
193 for i in range(100):
194 agent = f"agent-{i % 5}"
195 c = _write(repo, f"agent-test-{i:04d}", parent=prev, agent_id=agent)
196 prev = c.commit_id
197
198 def agent_0_only(commit: CommitRecord, manifest: Manifest, root: pathlib.Path) -> list[QueryMatch]:
199 if commit.agent_id == "agent-0":
200 return [_make_match(commit)]
201 return []
202
203 result = walk_history(repo, "main", agent_0_only)
204 assert len(result) == 20 # 100 / 5 = 20
205
206
207 # ===========================================================================
208 # format_matches
209 # ===========================================================================
210
211
212 class TestFormatMatches:
213 def test_empty_matches_produces_output(self) -> None:
214 out = format_matches([])
215 assert isinstance(out, str)
216
217 def test_single_match_includes_commit_id(self) -> None:
218 match = QueryMatch(
219 commit_id="a" * 64,
220 branch="main",
221 author="alice",
222 committed_at=_now().isoformat(),
223 detail="test match",
224 )
225 out = format_matches([match])
226 assert "aaaaaaaa" in out
227
228 def test_multiple_matches_all_present(self) -> None:
229 matches = [
230 QueryMatch(
231 commit_id=f"id{i:04d}",
232 branch="main",
233 author="alice",
234 committed_at=_now().isoformat(),
235 detail="matched",
236 )
237 for i in range(5)
238 ]
239 out = format_matches(matches)
240 for i in range(5):
241 assert f"id{i:04d}" in out
242
243
244 # ===========================================================================
245 # Code query DSL — build_evaluator
246 # ===========================================================================
247
248
249 class TestCodeQueryDSL:
250 # --- author field ---
251
252 def test_author_equals(self, repo: pathlib.Path) -> None:
253 c1 = _write(repo, "a1", author="alice")
254 _write(repo, "a2", author="bob", parent=c1.commit_id)
255 evaluator = build_evaluator("author == 'alice'")
256 result = walk_history(repo, "main", evaluator)
257 assert any(m["commit_id"] == c1.commit_id for m in result)
258 assert not any(m["author"] == "bob" and m["commit_id"] == c1.commit_id for m in result)
259
260 def test_author_not_equals(self, repo: pathlib.Path) -> None:
261 c1 = _write(repo, "b1", author="alice")
262 _write(repo, "b2", author="bob", parent=c1.commit_id)
263 evaluator = build_evaluator("author != 'alice'")
264 result = walk_history(repo, "main", evaluator)
265 assert all(m["author"] != "alice" for m in result)
266
267 def test_author_contains(self, repo: pathlib.Path) -> None:
268 c1 = _write(repo, "c1", author="alice-smith")
269 _write(repo, "c2", author="bob-jones", parent=c1.commit_id)
270 evaluator = build_evaluator("author contains 'alice'")
271 result = walk_history(repo, "main", evaluator)
272 assert len(result) == 1
273 assert "alice" in result[0]["author"]
274
275 def test_author_startswith(self, repo: pathlib.Path) -> None:
276 c1 = _write(repo, "d1", author="agent-claude")
277 _write(repo, "d2", author="human-alice", parent=c1.commit_id)
278 evaluator = build_evaluator("author startswith 'agent'")
279 result = walk_history(repo, "main", evaluator)
280 assert len(result) == 1
281 assert result[0]["author"].startswith("agent")
282
283 # --- agent_id field ---
284
285 def test_agent_id_equals(self, repo: pathlib.Path) -> None:
286 c1 = _write(repo, "e1", agent_id="claude-v4")
287 _write(repo, "e2", agent_id="gpt-4o", parent=c1.commit_id)
288 evaluator = build_evaluator("agent_id == 'claude-v4'")
289 result = walk_history(repo, "main", evaluator)
290 assert len(result) == 1
291 assert result[0]["commit_id"] == c1.commit_id
292
293 # --- sem_ver_bump field ---
294
295 def test_sem_ver_bump_major(self, repo: pathlib.Path) -> None:
296 c1 = _write(repo, "f1", sem_ver_bump="major")
297 c2 = _write(repo, "f2", sem_ver_bump="minor", parent=c1.commit_id)
298 _write(repo, "f3", sem_ver_bump="patch", parent=c2.commit_id)
299 evaluator = build_evaluator("sem_ver_bump == 'major'")
300 result = walk_history(repo, "main", evaluator)
301 assert len(result) == 1
302
303 # --- model_id field ---
304
305 def test_model_id_contains(self, repo: pathlib.Path) -> None:
306 c1 = _write(repo, "g1", model_id="claude-3-5-sonnet-20241022")
307 _write(repo, "g2", model_id="gpt-4o-2024-08-06", parent=c1.commit_id)
308 evaluator = build_evaluator("model_id contains 'claude'")
309 result = walk_history(repo, "main", evaluator)
310 assert len(result) == 1
311
312 # --- AND composition ---
313
314 def test_and_composition(self, repo: pathlib.Path) -> None:
315 c1 = _write(repo, "h1", author="alice", agent_id="bot-1")
316 c2 = _write(repo, "h2", author="alice", agent_id="bot-2", parent=c1.commit_id)
317 _write(repo, "h3", author="bob", agent_id="bot-1", parent=c2.commit_id)
318 evaluator = build_evaluator("author == 'alice' and agent_id == 'bot-1'")
319 result = walk_history(repo, "main", evaluator)
320 assert len(result) == 1
321 assert result[0]["commit_id"] == c1.commit_id
322
323 # --- OR composition ---
324
325 def test_or_composition(self, repo: pathlib.Path) -> None:
326 c1 = _write(repo, "i1", author="alice")
327 c2 = _write(repo, "i2", author="bob", parent=c1.commit_id)
328 _write(repo, "i3", author="charlie", parent=c2.commit_id)
329 evaluator = build_evaluator("author == 'alice' or author == 'bob'")
330 result = walk_history(repo, "main", evaluator)
331 assert len(result) == 2
332
333 # --- complex nested AND OR ---
334
335 def test_complex_and_or(self, repo: pathlib.Path) -> None:
336 c1 = _write(repo, "j1", author="alice", sem_ver_bump="major")
337 c2 = _write(repo, "j2", author="bob", sem_ver_bump="minor", parent=c1.commit_id)
338 _write(repo, "j3", author="alice", sem_ver_bump="patch", parent=c2.commit_id)
339 evaluator = build_evaluator(
340 "sem_ver_bump == 'major' or sem_ver_bump == 'minor'"
341 )
342 result = walk_history(repo, "main", evaluator)
343 assert len(result) == 2
344
345 # --- error cases ---
346
347 def test_unknown_field_raises_value_error(self) -> None:
348 with pytest.raises(ValueError):
349 build_evaluator("unknown_field == 'something'")
350
351 def test_unknown_operator_raises_value_error(self) -> None:
352 with pytest.raises(ValueError):
353 build_evaluator("author REGEX 'alice'")
354
355 def test_empty_query_raises(self) -> None:
356 with pytest.raises((ValueError, IndexError)):
357 build_evaluator("")
358
359 # --- branch field ---
360
361 def test_branch_field_matches_correctly(self, repo: pathlib.Path) -> None:
362 _write(repo, "k1", branch="main", author="alice")
363 evaluator = build_evaluator("branch == 'main'")
364 result = walk_history(repo, "main", evaluator)
365 assert all(m["branch"] == "main" for m in result)
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago