gabriel / muse public
breakage.py python
541 lines 21.1 KB
Raw
sha256:18b983389ee1b55900fcd799bfbb496552d2e3ecded9d18cefbfef188947a12e chore: remove blob-debug test marker file Sonnet 4.6 1 day ago
1 """muse code breakage — detect symbol-level breakage in the working tree.
2
3 Checks the current working tree against a committed snapshot for structural
4 breakage that would fail at runtime or import time:
5
6 1. **stale_import** — a working-tree file imports a name that exists nowhere
7 in the HEAD snapshot (and is also not defined locally). Severity: warning.
8 2. **removed_public_method** — a class that appears in both HEAD and the
9 working tree is missing a public method it had in HEAD. This catches
10 public-API regressions before they break callers. Severity: error.
11
12 Analysis is purely structural — no code is executed, no type checker is
13 invoked. It operates on the committed symbol graph plus a live working-tree
14 parse (results are served from the persistent symbol cache when available,
15 so repeated runs on a warm cache are fast).
16
17 Usage::
18
19 muse code breakage
20 muse code breakage --language Python
21 muse code breakage --path "muse/core/*.py"
22 muse code breakage --commit HEAD~3
23 muse code breakage --strict
24 muse code breakage --json
25
26 Flags:
27
28 ``--language LANG``
29 Restrict analysis to files of this language (e.g. ``Python``).
30
31 ``--path PATTERN``
32 Only check files whose path matches this glob pattern
33 (e.g. ``"muse/core/*.py"``).
34
35 ``--commit REF``
36 Diff against this commit instead of HEAD (branch name, commit ID, or
37 tag). Useful for checking "does my working tree still build cleanly
38 against an older baseline?"
39
40 ``--strict``
41 Treat warnings as errors: exit non-zero if any warning-level issues are
42 found, not just error-level ones.
43
44 ``--json``
45 Emit a machine-readable JSON object. Consumers should check
46 ``$.errors`` and ``$.warnings`` (and respect ``strict``) rather than
47 the exit code alone.
48 """
49
50 import argparse
51 import fnmatch
52 import json
53 import logging
54 import pathlib
55 import sys
56 from typing import TypedDict
57
58 from muse.core.envelope import EnvelopeJson, make_envelope
59 from muse.core.timing import start_timer
60 from muse.core.repo import require_repo
61 from muse.core.types import Manifest
62 from muse.core.refs import (
63 get_head_commit_id,
64 read_current_branch,
65 )
66 from muse.core.commits import resolve_commit_ref
67 from muse.core.snapshots import get_commit_snapshot_manifest
68 from muse.core.symbol_cache import load_symbol_cache
69 from muse.plugins.code._query import is_semantic, language_of, symbols_for_snapshot
70 from muse.plugins.code.ast_parser import SymbolTree
71 from muse.core.validation import sanitize_display
72
73 type _SymbolTreeMap = dict[str, SymbolTree]
74 type _MethodMap = dict[str, set[str]]
75
76 logger = logging.getLogger(__name__)
77
78 # ---------------------------------------------------------------------------
79 # Data types
80 # ---------------------------------------------------------------------------
81
82 class _BreakageIssue(TypedDict):
83 """One breakage finding, serialisable to JSON."""
84
85 issue_type: str
86 path: str
87 description: str
88 severity: str # "error" | "warning"
89
90 class _BreakageOutputJson(EnvelopeJson):
91 """Top-level JSON payload emitted by ``muse code breakage --json``.
92
93 Fields
94 ------
95 commit Short commit ID checked against.
96 branch Current branch name.
97 language_filter Language filter passed via ``--language``, or ``None``.
98 path_filter Glob filter passed via ``--path``, or ``None``.
99 strict Whether ``--strict`` was set.
100 file_count Number of files analysed.
101 issues List of :class:`_BreakageIssue` dicts.
102 total Total issue count (errors + warning_count).
103 errors Number of error-severity issues.
104 warning_count Number of warning-severity issues.
105 """
106
107 commit: str
108 branch: str
109 language_filter: str | None
110 path_filter: str | None
111 strict: bool
112 file_count: int
113 issues: list[_BreakageIssue]
114 total: int
115 errors: int
116 warning_count: int
117
118 # ---------------------------------------------------------------------------
119 # Index helpers
120 # ---------------------------------------------------------------------------
121
122 def _build_head_names_set(head_sym_map: _SymbolTreeMap) -> set[str]:
123 """Return the set of all non-import symbol *names* across HEAD.
124
125 Used for O(1) stale-import lookup: a working-tree import is stale if and
126 only if the imported name is absent from this set (and also not defined
127 locally in the working-tree file).
128 """
129 names: set[str] = set()
130 for tree in head_sym_map.values():
131 for rec in tree.values():
132 if rec["kind"] != "import":
133 names.add(rec["name"])
134 return names
135
136 def _build_head_class_methods(
137 head_sym_map: _SymbolTreeMap,
138 ) -> _MethodMap:
139 """Return a map of ``"file_path::ClassName"`` → ``{method_name, ...}`` from HEAD.
140
141 Used for Check 2: a class that drops a public method it had in HEAD is
142 flagged as a ``removed_public_method`` breakage.
143 """
144 class_methods: _MethodMap = {}
145 for fp, tree in head_sym_map.items():
146 for rec in tree.values():
147 if rec["kind"] != "method":
148 continue
149 qn: str = rec["qualified_name"]
150 if "." not in qn:
151 continue
152 class_part, _ = qn.rsplit(".", 1)
153 key = f"{fp}::{class_part}"
154 class_methods.setdefault(key, set()).add(rec["name"])
155 return class_methods
156
157 # ---------------------------------------------------------------------------
158 # Per-file analysis
159 # ---------------------------------------------------------------------------
160
161 def _check_file(
162 file_path: str,
163 working_tree: SymbolTree,
164 head_tree: SymbolTree,
165 head_names_set: set[str],
166 head_class_methods: _MethodMap,
167 head_file_paths: frozenset[str],
168 ) -> list[_BreakageIssue]:
169 """Return all breakage issues for one file.
170
171 Args:
172 file_path: Workspace-relative POSIX path.
173 working_tree: Symbols parsed from the working-tree version of the
174 file (may be empty if the file is new or not
175 semantic).
176 head_tree: Symbols parsed from the HEAD-committed version of
177 the file (empty if the file is new).
178 head_names_set: O(1)-lookup set of all non-import symbol names in
179 the entire HEAD snapshot.
180 head_class_methods: ``"file::Class"`` → public method names in HEAD,
181 used to detect removed methods.
182 head_file_paths: O(1)-lookup set of all file paths in the HEAD
183 snapshot; used to distinguish module imports (e.g.
184 ``from muse.cli.commands import breakage``) from
185 symbol imports so they are not falsely flagged as
186 stale.
187 """
188 if not working_tree:
189 return []
190
191 issues: list[_BreakageIssue] = []
192
193 # Names defined locally in the working-tree file (non-import symbols).
194 local_names: set[str] = {
195 rec["name"]
196 for rec in working_tree.values()
197 if rec["kind"] != "import"
198 }
199
200 # -----------------------------------------------------------------------
201 # Check 1: stale imports
202 # -----------------------------------------------------------------------
203 # A working-tree import is stale when its name exists neither in the HEAD
204 # snapshot (anywhere — we use a codebase-wide set for speed) nor is it
205 # defined locally in the same file, AND it does not resolve to a known
206 # module file in the HEAD snapshot.
207 #
208 # Only muse-internal imports are checked. Stdlib, third-party, __future__,
209 # and typing imports are deliberately excluded — they live outside the
210 # Muse symbol graph and can never go "stale" by definition.
211 #
212 # The qualified_name written by the AST parser is:
213 # "import::<module>::<name>" — from <module> import <name>
214 # "import::<name>" — import <name> (module IS the name)
215 # A muse-internal import is one where <module> starts with "muse." or
216 # equals "muse", or (for bare `import muse.X`) the name starts with "muse.".
217 #
218 # Module-import disambiguation: ``from muse.cli.commands import breakage``
219 # records name="breakage" and source_module="muse.cli.commands". The name
220 # "breakage" will never appear in ``head_names_set`` (which contains symbol
221 # names, not module names), so without a module check it would be a false
222 # positive. We convert source_module to a filesystem path and check
223 # whether ``{path}/{name}.py`` or ``{path}/{name}/__init__.py`` is a known
224 # file in the HEAD snapshot — if so, the import targets a module, not a
225 # symbol, and is valid.
226 #
227 # Complexity: O(1) per import — all lookups hit frozenset/set.
228 for rec in working_tree.values():
229 if rec["kind"] != "import":
230 continue
231 name: str = rec["name"]
232 if name.startswith("*:"):
233 continue # wildcard imports — cannot check statically
234
235 # Determine source module from the qualified_name written by ast_parser.
236 # Format: "import::<module>::<name>" or "import::<name>".
237 qn: str = rec.get("qualified_name", "")
238 parts = qn.split("::")
239 if len(parts) == 3:
240 # from <module> import <name>
241 source_module = parts[1]
242 else:
243 # bare `import <name>` — the name IS the module
244 source_module = name
245
246 # Skip anything that is not a muse-internal import.
247 if source_module != "muse" and not source_module.startswith("muse."):
248 continue
249
250 # ``parts[2]`` is the *original* pre-alias name stored in qualified_name
251 # by the AST parser. For ``from muse.core.store import CommitRecord as
252 # MuseCliCommit``, parts[2]="CommitRecord" and name="MuseCliCommit".
253 # We must look up the original in the HEAD snapshot — the alias will
254 # never appear as a top-level symbol anywhere.
255 original = parts[2] if len(parts) == 3 else name
256
257 if original not in head_names_set and name not in local_names:
258 # Check whether the original name resolves to a submodule file.
259 #
260 # Two cases:
261 #
262 # 1. ``from muse.cli.commands import age`` (len==3):
263 # source_module="muse.cli.commands", original="age"
264 # → check "muse/cli/commands/age.py"
265 #
266 # 2. ``import muse.core.rebase`` (len!=3, bare import):
267 # source_module=name=original="muse.core.rebase"
268 # → the dotted name IS the full module path
269 # → check "muse/core/rebase.py" directly (not appending again)
270 module_dir = source_module.replace(".", "/")
271 if len(parts) == 3:
272 is_module = (
273 f"{module_dir}/{original}.py" in head_file_paths
274 or f"{module_dir}/{original}/__init__.py" in head_file_paths
275 )
276 else:
277 # bare import: module_dir already encodes the full path
278 is_module = (
279 f"{module_dir}.py" in head_file_paths
280 or f"{module_dir}/__init__.py" in head_file_paths
281 )
282 if is_module:
283 continue # valid module import — not stale
284
285 issues.append(
286 _BreakageIssue(
287 issue_type="stale_import",
288 path=file_path,
289 description=(
290 f"imports '{original}'"
291 f"{f' (as {name!r})' if name != original else ''}"
292 " but no symbol or module with that name "
293 "exists in the HEAD snapshot"
294 ),
295 severity="warning",
296 )
297 )
298
299 # -----------------------------------------------------------------------
300 # Check 2: removed public methods
301 # -----------------------------------------------------------------------
302 # For each class that appears in BOTH HEAD and the working tree, flag any
303 # public method that existed in HEAD but is missing from the working-tree
304 # class body. Private methods (``_``-prefixed) are intentionally excluded
305 # — they are implementation detail, not public API.
306 #
307 # Only applies to Python / Python-stub files; other adapters may not
308 # produce reliable method records.
309 suffix = pathlib.PurePosixPath(file_path).suffix.lower()
310 if suffix in {".py", ".pyi"} and head_tree:
311 # Build working-tree class → methods map for this file.
312 working_class_methods: _MethodMap = {}
313 for rec in working_tree.values():
314 if rec["kind"] != "method":
315 continue
316 qn = rec["qualified_name"]
317 if "." not in qn:
318 continue
319 class_part, _ = qn.rsplit(".", 1)
320 working_class_methods.setdefault(class_part, set()).add(rec["name"])
321
322 for rec in head_tree.values():
323 if rec["kind"] != "class":
324 continue
325 class_name: str = rec["name"]
326 head_key = f"{file_path}::{class_name}"
327 expected = head_class_methods.get(head_key, set())
328 actual = working_class_methods.get(class_name, set())
329 for method in sorted(expected - actual):
330 if not method.startswith("_"):
331 issues.append(
332 _BreakageIssue(
333 issue_type="removed_public_method",
334 path=file_path,
335 description=(
336 f"class '{class_name}' is missing public method "
337 f"'{method}' that existed in HEAD"
338 ),
339 severity="error",
340 )
341 )
342
343 return issues
344
345 # ---------------------------------------------------------------------------
346 # CLI registration
347 # ---------------------------------------------------------------------------
348
349 def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None:
350 """Register the ``breakage`` subcommand."""
351 parser = subparsers.add_parser(
352 "breakage",
353 help="Detect symbol-level breakage in the working tree vs HEAD snapshot.",
354 description=__doc__,
355 formatter_class=argparse.RawDescriptionHelpFormatter,
356 )
357 parser.add_argument(
358 "--language", "-l",
359 default=None, metavar="LANG", dest="language",
360 help="Restrict to files of this language (e.g. Python).",
361 )
362 parser.add_argument(
363 "--commit", "-c",
364 default=None, metavar="REF", dest="commit_ref",
365 help="Check against this commit/branch/tag instead of HEAD.",
366 )
367 parser.add_argument(
368 "--path", "-p",
369 default=None, metavar="PATTERN", dest="path_filter",
370 help="Only check files matching this glob pattern (e.g. 'muse/core/*.py').",
371 )
372 parser.add_argument(
373 "--strict",
374 action="store_true", dest="strict",
375 help="Exit non-zero if any warnings are found (not just errors).",
376 )
377 parser.add_argument(
378 "--json", "-j",
379 action="store_true", dest="json_out",
380 help="Emit results as JSON.",
381 )
382 parser.set_defaults(func=run)
383
384 def run(args: argparse.Namespace) -> None:
385 """Detect symbol-level breakage in the working tree vs the HEAD snapshot.
386
387 Compares the working tree against the committed HEAD snapshot (or a named
388 ref) for two classes of structural breakage: stale imports (a file imports
389 a name that no longer exists in the snapshot) and removed public methods
390 (a class drops a method that callers may depend on). No code is executed —
391 analysis is purely structural.
392
393 Agent quickstart
394 ----------------
395 ::
396
397 muse code breakage --json
398 muse code breakage --language Python --json
399 muse code breakage --path "muse/core/*.py" --json
400 muse code breakage --strict --json
401
402 JSON fields
403 -----------
404 commit Short commit ID checked against.
405 branch Current branch name.
406 language_filter Language filter passed via ``--language``; ``null`` if none.
407 path_filter Glob filter passed via ``--path``; ``null`` if none.
408 strict ``true`` if ``--strict`` was set.
409 file_count Number of files analysed.
410 issues List of issue objects; each has ``issue_type``,
411 ``file_path``, ``description``, and ``severity``
412 (``"error"`` or ``"warning"``).
413 total Total issue count (``errors + warning_count``).
414 errors Number of error-severity issues.
415 warning_count Number of warning-severity issues.
416 exit_code 0 = clean; 1 = errors found (or warnings under ``--strict``).
417
418 Exit codes
419 ----------
420 0 No errors (warnings tolerated unless ``--strict``).
421 1 Errors found; or warnings found when ``--strict`` is active.
422 2 Not inside a Muse repository.
423 """
424 elapsed = start_timer()
425 language: str | None = args.language
426 json_out: bool = args.json_out
427 commit_ref: str | None = getattr(args, "commit_ref", None)
428 path_filter: str | None = getattr(args, "path_filter", None)
429 strict: bool = getattr(args, "strict", False)
430
431 root = require_repo()
432 branch = read_current_branch(root)
433
434 # Resolve branch names before calling resolve_commit_ref, which only
435 # handles commit SHAs and HEAD~N notation.
436 resolved_ref: str | None = commit_ref
437 if commit_ref is not None:
438 branch_head = get_head_commit_id(root, commit_ref)
439 if branch_head is not None:
440 resolved_ref = branch_head # promote to full commit ID
441
442 commit = resolve_commit_ref(root, branch, resolved_ref)
443 if commit is None:
444 ref_label = commit_ref or "HEAD"
445 print(f"❌ No commit found for ref '{ref_label}'.", file=sys.stderr)
446 raise SystemExit(1)
447
448 manifest = get_commit_snapshot_manifest(root, commit.commit_id)
449 if manifest is None:
450 print(
451 f"❌ Cannot read snapshot for commit {commit.commit_id} — "
452 "repository may be corrupt.",
453 file=sys.stderr,
454 )
455 raise SystemExit(1)
456
457 # Apply glob path filter before loading symbols (avoids parsing unused files).
458 filtered_manifest: Manifest = (
459 {fp: oid for fp, oid in manifest.items() if fnmatch.fnmatch(fp, path_filter)}
460 if path_filter is not None
461 else dict(manifest)
462 )
463
464 # Load HEAD symbols (committed, from object store) and working-tree symbols
465 # (from disk) in a single shared cache cycle to minimise I/O.
466 shared_cache = load_symbol_cache(root)
467 head_sym_map = symbols_for_snapshot(
468 root, filtered_manifest,
469 language_filter=language,
470 cache=shared_cache,
471 )
472 working_sym_map = symbols_for_snapshot(
473 root, filtered_manifest,
474 workdir=root,
475 language_filter=language,
476 cache=shared_cache,
477 )
478 shared_cache.save()
479
480 # Build O(1) indexes once; _check_file uses them per-file.
481 head_names_set = _build_head_names_set(head_sym_map)
482 head_class_methods = _build_head_class_methods(head_sym_map)
483 head_file_paths = frozenset(head_sym_map.keys())
484
485 all_issues: list[_BreakageIssue] = []
486 for file_path in sorted(filtered_manifest.keys()):
487 if not is_semantic(file_path):
488 continue
489 if language and language_of(file_path) != language:
490 continue
491 working_tree = working_sym_map.get(file_path, {})
492 head_tree = head_sym_map.get(file_path, {})
493 issues = _check_file(
494 file_path, working_tree, head_tree,
495 head_names_set, head_class_methods,
496 head_file_paths,
497 )
498 all_issues.extend(issues)
499
500 errors = sum(1 for i in all_issues if i["severity"] == "error")
501 warnings = sum(1 for i in all_issues if i["severity"] == "warning")
502 exit_code = 1 if (errors > 0 or (strict and warnings > 0)) else 0
503
504 if json_out:
505 print(json.dumps(_BreakageOutputJson(
506 **make_envelope(elapsed, exit_code=exit_code),
507 commit=commit.commit_id,
508 branch=branch,
509 language_filter=language,
510 path_filter=path_filter,
511 strict=strict,
512 file_count=len(filtered_manifest),
513 issues=list(all_issues),
514 total=len(all_issues),
515 errors=errors,
516 warning_count=warnings,
517 )))
518 raise SystemExit(exit_code)
519
520 ref_label = commit.commit_id
521 if commit_ref:
522 ref_label = f"{commit_ref} ({commit.commit_id})"
523 print(f"\nBreakage check — working tree vs {ref_label}")
524 if language:
525 print(f" (language: {language})")
526 if path_filter:
527 print(f" (path: {sanitize_display(path_filter)})")
528 print("─" * 62)
529
530 if not all_issues:
531 print("\n ✅ No structural breakage detected.")
532 raise SystemExit(0)
533
534 for issue in all_issues:
535 icon = "🔴" if issue["severity"] == "error" else "⚠️ "
536 print(f"\n{icon} {sanitize_display(issue['issue_type'])}")
537 print(f" {sanitize_display(issue['path'])}")
538 print(f" {issue['description']}")
539
540 print(f"\n {errors} error(s), {warnings} warning(s)")
541 raise SystemExit(exit_code)
File History 7 commits
sha256:18b983389ee1b55900fcd799bfbb496552d2e3ecded9d18cefbfef188947a12e chore: remove blob-debug test marker file Sonnet 4.6 1 day ago
sha256:e452ad9a6ace6ccc6d875a35e06caf9da5576a970c1c36133b69a891ce5fefa8 chore: prebuild timing test Sonnet 4.6 8 days ago
sha256:0008ab6695e3e064b3e236b24fd19e538fef6a588eb0d211622f4466d919c0b1 merge: pull staging/dev — advance to 0.2.0rc12 Sonnet 4.6 patch 10 days ago
sha256:9c33d61749fff814c5226d5386aa2af7064c2c02788594a25fdd709358132eea fix: _PROPOSAL_PREFIX_RESOLVE_LIMIT 200 → 100 to match hub … Sonnet 4.6 21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 24 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 30 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 30 days ago