gabriel / muse public
test_core_doc_extractor.py python
558 lines 19.7 KB
Raw
sha256:be3641f35bdbcc094677776a77b9aa6a5dab891f8fab201dc162d03c2bab5aea fix(read): strip position:null from structured_delta ops in… Sonnet 4.6 patch 23 days ago
1 """Unit and integration tests for ``muse.core.doc_extractor``.
2
3 Coverage:
4 - :func:`_build_lineno_docstring_map` with valid and invalid Python source.
5 - :func:`_get_docstring` with object-store hits, file fallback, and caching.
6 - :func:`_extract_signature` with functions, classes, and edge cases.
7 - :func:`_compute_health` for each health dimension.
8 - :func:`build_symbol_test_map` BFS logic.
9 - :func:`extract_docs` integration with a synthetic repository.
10 - :func:`_is_public` naming convention.
11 - DocSummary aggregation (avg_health, debt_score, counts).
12 """
13
14 from __future__ import annotations
15
16 import ast
17 import datetime
18 import hashlib
19 import pathlib
20
21 import pytest
22
23 from muse.core.doc_extractor import (
24 DocHealthReason,
25 DocReport,
26 DocSummary,
27 MissingDocEntry,
28 StaleDocEntry,
29 SymbolDoc,
30 _build_lineno_docstring_map,
31 _compute_health,
32 _extract_signature,
33 _get_docstring,
34 _is_public,
35 build_symbol_test_map,
36 extract_docs,
37 )
38 from muse.core.doc_history import StaleInfo
39 from muse.core.object_store import write_object
40 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
41 from muse.core.commits import (
42 CommitRecord,
43 write_commit,
44 )
45 from muse.core.snapshots import (
46 SnapshotRecord,
47 write_snapshot,
48 )
49 from muse.plugins.code._callgraph import ForwardGraph, ReverseGraph
50 from muse.plugins.code.ast_parser import SymbolKind, SymbolRecord
51 from muse.core.types import Manifest, NULL_LONG_ID, blob_id, fake_id, long_id
52 from muse.core.paths import heads_dir, muse_dir
53
54
55 # ---------------------------------------------------------------------------
56 # Helpers
57 # ---------------------------------------------------------------------------
58
59
60 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
61 dot_muse = muse_dir(tmp_path)
62 dot_muse.mkdir()
63 (dot_muse / "repo.json").write_text('{"repo_id": "test-repo-123", "name": "test"}')
64 refs = dot_muse / "refs" / "heads"
65 refs.mkdir(parents=True)
66 (dot_muse / "HEAD").write_text("ref: refs/heads/main\n")
67 return tmp_path
68
69
70 def _write_commit_with_snapshot(
71 root: pathlib.Path,
72 manifest: Manifest,
73 ) -> str:
74 snap_id = compute_snapshot_id(manifest)
75 snap = SnapshotRecord(snapshot_id=snap_id, manifest=manifest)
76 write_snapshot(root, snap)
77
78 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
79 commit_id = compute_commit_id(
80 parent_ids=[],
81 snapshot_id=snap_id,
82 message="init",
83 committed_at_iso=committed_at.isoformat(),
84 author="test",
85 )
86 commit = CommitRecord(
87 commit_id=commit_id,
88 branch="main",
89 snapshot_id=snap_id,
90 message="init",
91 committed_at=committed_at,
92 author="test",
93 )
94 write_commit(root, commit)
95 (heads_dir(root) / "main").write_text(commit_id)
96 return commit_id
97
98
99 def _make_sym(
100 name: str,
101 lineno: int = 1,
102 end_lineno: int = 5,
103 kind: SymbolKind = "function",
104 ) -> SymbolRecord:
105 return SymbolRecord(
106 kind=kind,
107 name=name,
108 qualified_name=name,
109 content_id=fake_id(name),
110 body_hash=blob_id(b"body"),
111 signature_id=blob_id(b"sig"),
112 metadata_id="",
113 canonical_key=f"f.py##{kind}#{name}#{lineno}",
114 lineno=lineno,
115 end_lineno=end_lineno,
116 )
117
118
119 def _stale(is_stale: bool = False) -> StaleInfo:
120 return StaleInfo(
121 is_stale=is_stale,
122 last_doc_commit=None,
123 last_impl_commit=None,
124 signature_changed=False,
125 body_changed=False,
126 )
127
128
129 # ---------------------------------------------------------------------------
130 # Tests: _build_lineno_docstring_map
131 # ---------------------------------------------------------------------------
132
133
134 class TestBuildLinnoDocstringMap:
135 def test_function_with_docstring(self) -> None:
136 src = b'def foo():\n """My docstring."""\n pass\n'
137 m = _build_lineno_docstring_map(src)
138 assert m.get(1) == "My docstring."
139
140 def test_function_without_docstring(self) -> None:
141 src = b"def foo():\n pass\n"
142 m = _build_lineno_docstring_map(src)
143 assert m.get(1) is None
144
145 def test_class_with_docstring(self) -> None:
146 src = b'class Foo:\n """Class doc."""\n pass\n'
147 m = _build_lineno_docstring_map(src)
148 assert m.get(1) == "Class doc."
149
150 def test_nested_method_lineno(self) -> None:
151 src = (
152 b"class Foo:\n"
153 b" def bar(self):\n"
154 b' """Bar doc."""\n'
155 b" pass\n"
156 )
157 m = _build_lineno_docstring_map(src)
158 assert m.get(2) == "Bar doc."
159
160 def test_syntax_error_returns_empty(self) -> None:
161 src = b"def foo(:\n pass\n"
162 m = _build_lineno_docstring_map(src)
163 assert m == {}
164
165 def test_multiline_docstring(self) -> None:
166 src = (
167 b'def foo():\n'
168 b' """First line.\n'
169 b'\n'
170 b' Second paragraph.\n'
171 b' """\n'
172 b' pass\n'
173 )
174 m = _build_lineno_docstring_map(src)
175 doc = m.get(1)
176 assert doc is not None
177 assert "First line" in doc
178 assert "Second paragraph" in doc
179
180
181 # ---------------------------------------------------------------------------
182 # Tests: _get_docstring
183 # ---------------------------------------------------------------------------
184
185
186 class TestGetDocstring:
187 def test_from_object_store(self, tmp_path: pathlib.Path) -> None:
188 root = _make_repo(tmp_path)
189 src = b'def foo():\n """Object store doc."""\n pass\n'
190 content_hash = blob_id(src)
191 write_object(root, content_hash, src)
192
193 cache: dict[tuple[str, str], dict[int, str | None]] = {}
194 result = _get_docstring(root, "foo.py", 1, content_hash, cache)
195 assert result == "Object store doc."
196 # Cache should be populated.
197 assert ("foo.py", content_hash) in cache
198
199 def test_from_file_fallback(self, tmp_path: pathlib.Path) -> None:
200 root = _make_repo(tmp_path)
201 src = b'def bar():\n """File fallback doc."""\n pass\n'
202 (tmp_path / "bar.py").write_bytes(src)
203 # Use a fake hash so object store misses.
204 fake_hash = NULL_LONG_ID
205
206 cache: dict[tuple[str, str], dict[int, str | None]] = {}
207 result = _get_docstring(root, "bar.py", 1, fake_hash, cache)
208 assert result == "File fallback doc."
209
210 def test_no_docstring_returns_none(self, tmp_path: pathlib.Path) -> None:
211 root = _make_repo(tmp_path)
212 src = b"def baz():\n pass\n"
213 h = blob_id(src)
214 write_object(root, h, src)
215 cache: dict[tuple[str, str], dict[int, str | None]] = {}
216 result = _get_docstring(root, "baz.py", 1, h, cache)
217 assert result is None
218
219 def test_cache_avoids_reparse(self, tmp_path: pathlib.Path) -> None:
220 """Once a (file, hash) is in cache, subsequent calls return the cached result."""
221 root = _make_repo(tmp_path)
222 src = b'def fn():\n """Cached doc."""\n pass\n'
223 h = blob_id(src)
224 write_object(root, h, src)
225 cache: dict[tuple[str, str], dict[int, str | None]] = {}
226 # First call — populates cache.
227 result1 = _get_docstring(root, "fn.py", 1, h, cache)
228 assert result1 == "Cached doc."
229 # Manually corrupt the cached map to verify cache is used on second call.
230 cache[("fn.py", h)][1] = "INJECTED"
231 result2 = _get_docstring(root, "fn.py", 1, h, cache)
232 assert result2 == "INJECTED" # cache hit — object store not re-read
233
234
235 # ---------------------------------------------------------------------------
236 # Tests: _extract_signature
237 # ---------------------------------------------------------------------------
238
239
240 class TestExtractSignature:
241 def test_function(self) -> None:
242 src = b"def my_func(x: int) -> str:\n return str(x)\n"
243 sig = _extract_signature(src, 1, 2)
244 assert "def my_func" in sig
245
246 def test_class(self) -> None:
247 src = b"class MyClass(Base):\n pass\n"
248 sig = _extract_signature(src, 1, 2)
249 assert "class MyClass" in sig
250
251 def test_async_function(self) -> None:
252 src = b"async def fetch():\n pass\n"
253 sig = _extract_signature(src, 1, 2)
254 assert "async def fetch" in sig
255
256 def test_decorator_skipped(self) -> None:
257 src = b"@property\ndef value(self) -> int:\n return 0\n"
258 sig = _extract_signature(src, 1, 3)
259 # The first line is a decorator — should still return something.
260 assert sig # non-empty
261
262 def test_out_of_range_fallback(self) -> None:
263 src = b"x = 1\n"
264 sig = _extract_signature(src, 100, 110)
265 assert sig == ""
266
267
268 # ---------------------------------------------------------------------------
269 # Tests: _compute_health
270 # ---------------------------------------------------------------------------
271
272
273 class TestComputeHealth:
274 def test_all_zero(self) -> None:
275 # No docstring = 0, no tests = 0, no version = 0, not stale = +0.15
276 score, reasons = _compute_health(None, [], None, _stale(False))
277 assert score == pytest.approx(0.15)
278 assert "no_docstring" in reasons
279 assert "no_tests" in reasons
280 assert "no_version_annotation" in reasons
281 assert "stale_impl" not in reasons
282
283 def test_stale_penalty(self) -> None:
284 score, reasons = _compute_health(None, [], None, _stale(True))
285 assert score == pytest.approx(0.0)
286 assert "stale_impl" in reasons
287
288 def test_full_score(self) -> None:
289 long_doc = "A" * 50
290 score, reasons = _compute_health(long_doc, ["test1"], "v1.0", _stale(False))
291 assert score == pytest.approx(1.0)
292 assert reasons == []
293
294 def test_short_docstring_penalty(self) -> None:
295 short_doc = "Short." # < 40 chars
296 score, reasons = _compute_health(short_doc, ["t1"], "v1.0", _stale(False))
297 # has doc: 0.30, short: no +0.20, has test: 0.20, has version: 0.15, not stale: 0.15
298 assert score == pytest.approx(0.80)
299 assert "docstring_too_short" in reasons
300
301 def test_capped_at_one(self) -> None:
302 long_doc = "A" * 100
303 score, _ = _compute_health(long_doc, ["t1", "t2"], "v1.0", _stale(False))
304 assert score <= 1.0
305
306 def test_no_tests(self) -> None:
307 long_doc = "A" * 50
308 score, reasons = _compute_health(long_doc, [], "v1.0", _stale(False))
309 # 0.30 + 0.20 (long) + 0 (no tests) + 0.15 (version) + 0.15 (not stale) = 0.80
310 assert score == pytest.approx(0.80)
311 assert "no_tests" in reasons
312
313
314 # ---------------------------------------------------------------------------
315 # Tests: build_symbol_test_map
316 # ---------------------------------------------------------------------------
317
318
319 class TestBuildSymbolTestMap:
320 def test_empty_symbols(self) -> None:
321 result = build_symbol_test_map({}, {})
322 assert result == {}
323
324 def test_test_not_linked_to_non_test(self) -> None:
325 """Test functions should not appear as callers of themselves."""
326 sym: SymbolRecord = _make_sym("test_foo", kind="function")
327 all_syms = {"tests/test_a.py::test_foo": sym}
328 fg: ForwardGraph = {"tests/test_a.py::test_foo": frozenset({"bar"})}
329 result = build_symbol_test_map(fg, all_syms)
330 # "bar" is in callees but has no SymbolRecord — map should be empty.
331 assert result == {}
332
333 def test_single_test_links_to_production(self) -> None:
334 test_sym: SymbolRecord = _make_sym("test_foo", kind="function")
335 prod_sym: SymbolRecord = _make_sym("bar", kind="function")
336 all_syms = {
337 "tests/test_a.py::test_foo": test_sym,
338 "muse/core/a.py::bar": prod_sym,
339 }
340 fg: ForwardGraph = {
341 "tests/test_a.py::test_foo": frozenset({"bar"}),
342 "muse/core/a.py::bar": frozenset(),
343 }
344 result = build_symbol_test_map(fg, all_syms)
345 assert "muse/core/a.py::bar" in result
346 assert "tests/test_a.py::test_foo" in result["muse/core/a.py::bar"]
347
348 def test_depth_limit_respected(self) -> None:
349 """BFS stops at max_depth hops."""
350 all_syms = {
351 "tests/t.py::test_x": _make_sym("test_x", kind="function"),
352 "a.py::a": _make_sym("a", kind="function"),
353 "b.py::b": _make_sym("b", kind="function"),
354 "c.py::c": _make_sym("c", kind="function"),
355 "d.py::d": _make_sym("d", kind="function"),
356 }
357 fg: ForwardGraph = {
358 "tests/t.py::test_x": frozenset({"a"}),
359 "a.py::a": frozenset({"b"}),
360 "b.py::b": frozenset({"c"}),
361 "c.py::c": frozenset({"d"}),
362 }
363 # max_depth=2 → test_x → a (depth 1) → b (depth 2), stop.
364 result = build_symbol_test_map(fg, all_syms, max_depth=2)
365 assert "a.py::a" in result
366 assert "b.py::b" in result
367 assert "c.py::c" not in result
368 assert "d.py::d" not in result
369
370 def test_no_infinite_loop(self) -> None:
371 """Cyclic call graph does not cause infinite loop."""
372 all_syms = {
373 "tests/t.py::test_cycle": _make_sym("test_cycle", kind="function"),
374 "a.py::alpha": _make_sym("alpha", kind="function"),
375 "b.py::beta": _make_sym("beta", kind="function"),
376 }
377 fg: ForwardGraph = {
378 "tests/t.py::test_cycle": frozenset({"alpha"}),
379 "a.py::alpha": frozenset({"beta"}),
380 "b.py::beta": frozenset({"alpha"}), # cycle
381 }
382 result = build_symbol_test_map(fg, all_syms)
383 # Should complete without recursion limit.
384 assert isinstance(result, dict)
385
386
387 # ---------------------------------------------------------------------------
388 # Tests: _is_public
389 # ---------------------------------------------------------------------------
390
391
392 class TestIsPublic:
393 def test_public_name(self) -> None:
394 assert _is_public("my_function") is True
395
396 def test_private_name(self) -> None:
397 assert _is_public("_private") is False
398
399 def test_dunder(self) -> None:
400 assert _is_public("__init__") is False
401
402 def test_empty_string(self) -> None:
403 assert _is_public("") is True
404
405
406 # ---------------------------------------------------------------------------
407 # Tests: extract_docs integration
408 # ---------------------------------------------------------------------------
409
410
411 class TestExtractDocs:
412 def test_empty_repo_no_commit(self, tmp_path: pathlib.Path) -> None:
413 """When there's no HEAD commit, returns an empty report."""
414 root = _make_repo(tmp_path)
415 report = extract_docs(root, "test-repo-123")
416 assert report["commit_id"] == ""
417 assert report["symbols"] == []
418 assert report["summary"]["total_symbols"] == 0
419
420 def test_repo_with_python_file(self, tmp_path: pathlib.Path) -> None:
421 """A repo with one documented Python file produces at least one SymbolDoc."""
422 root = _make_repo(tmp_path)
423
424 src = (
425 b"def documented_fn(x: int) -> str:\n"
426 b' """Return x as a string."""\n'
427 b" return str(x)\n"
428 )
429 content_hash = blob_id(src)
430 write_object(root, content_hash, src)
431 (tmp_path / "documented.py").write_bytes(src)
432
433 manifest = {"documented.py": content_hash}
434 _write_commit_with_snapshot(root, manifest)
435
436 report = extract_docs(root, "test-repo-123")
437 assert report["summary"]["total_symbols"] >= 1
438
439 addrs = [d["address"] for d in report["symbols"]]
440 assert any("documented_fn" in a for a in addrs)
441
442 def test_missing_list_populated(self, tmp_path: pathlib.Path) -> None:
443 """Public functions without docstrings appear in 'missing'."""
444 root = _make_repo(tmp_path)
445
446 src = b"def undocumented() -> None:\n pass\n"
447 h = blob_id(src)
448 write_object(root, h, src)
449 (tmp_path / "nodoc.py").write_bytes(src)
450
451 manifest = {"nodoc.py": h}
452 _write_commit_with_snapshot(root, manifest)
453
454 report = extract_docs(root, "test-repo-123")
455 missing_addrs = [m["address"] for m in report["missing"]]
456 assert any("undocumented" in a for a in missing_addrs)
457
458 def test_targets_filter(self, tmp_path: pathlib.Path) -> None:
459 """When targets is set, only those symbols appear in the report."""
460 root = _make_repo(tmp_path)
461
462 src = (
463 b"def alpha() -> None:\n pass\n"
464 b"def beta() -> None:\n pass\n"
465 )
466 h = blob_id(src)
467 write_object(root, h, src)
468 (tmp_path / "ab.py").write_bytes(src)
469
470 manifest = {"ab.py": h}
471 _write_commit_with_snapshot(root, manifest)
472
473 # Find the alpha address.
474 full_report = extract_docs(root, "test-repo-123")
475 alpha_addrs = [
476 d["address"] for d in full_report["symbols"] if "alpha" in d["address"]
477 ]
478 if not alpha_addrs:
479 pytest.skip("alpha not found in snapshot — symbol cache not populated")
480
481 targeted = extract_docs(root, "test-repo-123", targets=[alpha_addrs[0]])
482 addrs = [d["address"] for d in targeted["symbols"]]
483 assert any("alpha" in a for a in addrs)
484 assert not any("beta" in a for a in addrs)
485
486 def test_summary_aggregation(self, tmp_path: pathlib.Path) -> None:
487 """DocSummary counts are consistent with the symbols list."""
488 root = _make_repo(tmp_path)
489
490 src = (
491 b"def with_doc():\n"
492 b' """Has doc."""\n'
493 b" pass\n"
494 b"def without_doc():\n"
495 b" pass\n"
496 )
497 h = blob_id(src)
498 write_object(root, h, src)
499 (tmp_path / "mixed.py").write_bytes(src)
500
501 manifest = {"mixed.py": h}
502 _write_commit_with_snapshot(root, manifest)
503
504 report = extract_docs(root, "test-repo-123")
505 s = report["summary"]
506 assert s["total_symbols"] == len(report["symbols"])
507 assert s["documented"] + s["undocumented"] <= s["total_symbols"]
508 assert 0.0 <= s["avg_health"] <= 1.0
509 assert 0.0 <= s["doc_debt_score"] <= 1.0
510
511 def test_at_commit_param(self, tmp_path: pathlib.Path) -> None:
512 """Passing commit_id uses that commit rather than HEAD."""
513 root = _make_repo(tmp_path)
514
515 src = b"def fn():\n pass\n"
516 h = blob_id(src)
517 write_object(root, h, src)
518 (tmp_path / "fn.py").write_bytes(src)
519
520 manifest = {"fn.py": h}
521 cid = _write_commit_with_snapshot(root, manifest)
522
523 report = extract_docs(root, "test-repo-123", commit_id=cid)
524 assert report["commit_id"] == cid
525
526 def test_invalid_commit_returns_empty(self, tmp_path: pathlib.Path) -> None:
527 """An unknown commit_id returns an empty report, not an error."""
528 root = _make_repo(tmp_path)
529 report = extract_docs(root, "test-repo-123", commit_id=NULL_LONG_ID)
530 assert report["symbols"] == []
531
532
533 # ---------------------------------------------------------------------------
534 # Stress tests
535 # ---------------------------------------------------------------------------
536
537
538 class TestExtractDocsStress:
539 def test_many_symbols(self, tmp_path: pathlib.Path) -> None:
540 """extract_docs handles a file with 100 functions without crashing."""
541 root = _make_repo(tmp_path)
542
543 lines: list[str] = []
544 for i in range(100):
545 lines.append(f'def fn_{i}(x: int) -> int:')
546 lines.append(f' """Function {i} — does something useful."""')
547 lines.append(f' return x + {i}')
548 lines.append("")
549 src = "\n".join(lines).encode()
550 h = blob_id(src)
551 write_object(root, h, src)
552 (tmp_path / "big.py").write_bytes(src)
553
554 manifest = {"big.py": h}
555 _write_commit_with_snapshot(root, manifest)
556
557 report = extract_docs(root, "test-repo-123")
558 assert report["summary"]["total_symbols"] >= 50 # at least most parsed
File History 3 commits
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago