"""Typing audit — zero-tolerance type-safety enforcement for mission-critical code. Every banned pattern maps to a future Rust port liability: if Python cannot name a type, ``rustc`` cannot either. The ratchet keeps the rule enforced continuously so violations never accumulate. Patterns checked ---------------- *Any-as-type* — ``dict[str, Any]``, ``list[Any]``, ``type[Any]``, ``Any | X``, ``X | Any``, ``Mapping[str, Any]``, etc. *object-as-type* — same severity as Any; erases all structural information. *cast()* — all usage banned; it conceals a broken callee return type. *# type: ignore* — every suppressed error is an unaudited assumption. *Bare collections* — ``list``, ``dict``, ``set``, ``tuple`` without ``[T]``. *Optional[X]* and *Union[X, Y]* — use ``X | None`` and ``X | Y`` (PEP 604). *Legacy typing imports* — ``List``, ``Dict``, ``Set``, ``Tuple``. *Bare Callable / Callable returning Any* — must carry a full signature. *Untyped varargs* — ``*args: Any``, ``**kwargs: Any``, and unannotated ``*args`` / ``**kwargs`` (annotation absent entirely). *Untyped function definitions* — missing return or parameter annotation. *Unconstrained TypeVar* — ``TypeVar(...)`` with no ``bound=`` and no constraint arguments; behaves identically to ``Any`` in practice. *Naked dict at boundary* — ``dict[str, X]`` as a parameter or return type is banned at function/method boundaries. Every dict with known keys must be a ``TypedDict``; every dict with dynamic keys must justify its key space. The only valid ``dict[str, ...]`` at a boundary is an explicitly named ``TypedDict`` subclass. This rule exists because ``rustc`` cannot infer struct fields from a ``HashMap`` — named fields must be declared. Pattern ``boundary_dict`` fires on ``: dict[str,`` and ``-> dict[str,``. *Anonymous dict in collection* — ``list[dict[str, X]]``, ``dict[str, dict[str, X]]``, ``tuple[dict[str, X], ...]``. An anonymous dict nested inside a collection is always a named struct waiting to be declared. Use a ``TypedDict`` subclass or a named type alias (e.g. ``list[JSONObject]``, ``list[SymbolHistoryEntry]``). Named type aliases do NOT trigger this rule — only the literal expansion does. This is by design: ``list[JSONObject]`` is fine; ``list[dict[str, JSONValue]]`` is not. Rust requires every struct field to be named; ``Vec>`` is never the right answer when ``Vec`` is possible. ``concrete_dict_in_list`` — fires on ``list[dict[str,``, ``tuple[dict[str,``, ``set[dict[str,`` ``dict_of_dict`` — fires on ``dict[str, dict[str,`` Usage:: python tools/typing_audit.py # musehub/ + tests/ python tools/typing_audit.py --dirs musehub/ tests/ python tools/typing_audit.py --dirs musehub/ --max-any 0 --max-untyped 0 python tools/typing_audit.py --json artifacts/typing_audit.json """ from __future__ import annotations import argparse import ast import io import json import operator import re import sys import tokenize from collections import defaultdict from pathlib import Path from typing import TypedDict # --------------------------------------------------------------------------- # Type aliases — avoid dict[str, X] at function/class-field boundaries. # --------------------------------------------------------------------------- type PatternCounts = dict[str, int] type PatternLines = dict[str, list[int]] type PatternMap = dict[str, re.Pattern[str]] type PerFileViolations = dict[str, PatternCounts] class Violation(TypedDict): """A single typed violation — one pattern match at one source location.""" file: str line: int kind: str # --------------------------------------------------------------------------- # Data shapes — TypedDicts replace every dict[str, Any] in the old script. # All shapes mirror the Rust struct that will eventually own them. # --------------------------------------------------------------------------- class UntypedDef(TypedDict): """A function or method that is missing a required type annotation. ``issue`` is one of: - ``"missing_return_type"`` — no return annotation. - ``"missing_param_type"`` — a non-self/cls parameter lacks annotation. - ``"untyped_args"`` — ``*args`` is annotated as ``Any`` or has no annotation at all. - ``"untyped_kwargs"`` — ``**kwargs`` is annotated as ``Any`` or has no annotation at all. - ``"unconstrained_typevar"``— a ``TypeVar`` with no ``bound=`` and no positional constraints. """ file: str line: int name: str issue: str class FileResult(TypedDict): """Typing-violation summary for a single Python source file.""" file: str imports_any: bool patterns: PatternCounts pattern_lines: PatternLines type_ignore_variants: PatternCounts untyped_defs: list[UntypedDef] class Offender(TypedDict): """A file with at least one typing violation, ranked by total count.""" file: str total: int patterns: PatternCounts class ReportSummary(TypedDict): """High-level aggregate counts for the entire scan.""" total_files_scanned: int files_importing_any: int total_any_patterns: int untyped_defs: int class Report(TypedDict): """Full typing-audit report produced by :func:`generate_report`.""" summary: ReportSummary pattern_totals: PatternCounts type_ignore_variants: PatternCounts top_offenders: list[Offender] per_file: PerFileViolations violations: list[Violation] untyped_defs: list[UntypedDef] # --------------------------------------------------------------------------- # String-literal masking # --------------------------------------------------------------------------- def _mask_string_literals(source: str) -> str: """Replace string-literal content with spaces, preserving newlines. Pattern matching runs on the masked source so that raw regex strings, docstrings, and string constants never produce false positives. All newlines are preserved so that line numbers stay accurate. Tokenisation errors (e.g. incomplete source snippets) are silently ignored — the original source is returned unchanged so the caller still produces *some* output rather than silently dropping the file. Args: source: Full UTF-8 source text of a Python file. Returns: A copy of *source* with the content of every string token replaced by space characters (newlines within multi-line strings preserved). """ chars = list(source) lines = source.splitlines(keepends=True) # Pre-compute cumulative line offsets for O(1) (row, col) → offset. offsets: list[int] = [0] for ln in lines: offsets.append(offsets[-1] + len(ln)) def _abs(row: int, col: int) -> int: return offsets[row - 1] + col # Token types that contain string literal content — including f-string # middle segments which are FSTRING_MIDDLE (not STRING) in Python 3.12+. _FSTRING_MIDDLE = getattr(tokenize, "FSTRING_MIDDLE", None) _STRING_TYPES = {tokenize.STRING} if _FSTRING_MIDDLE is not None: _STRING_TYPES.add(_FSTRING_MIDDLE) try: gen = tokenize.generate_tokens(io.StringIO(source).readline) for tok_type, _tok_str, (srow, scol), (erow, ecol), _ in gen: if tok_type not in _STRING_TYPES: continue start = _abs(srow, scol) end = _abs(erow, ecol) for i in range(start, end): if chars[i] not in {"\n", "\r"}: chars[i] = " " except tokenize.TokenError: pass return "".join(chars) # --------------------------------------------------------------------------- # Pattern registry # --------------------------------------------------------------------------- #: All patterns that count toward the violation total. #: Keys are stable identifiers used in JSON output and tests. #: #: NOTE: do NOT use re.IGNORECASE — Python type annotations are case-sensitive. #: ``List`` and ``list`` are distinct identifiers; matching ``list[any]`` #: (where ``any`` is the built-in function) would be a false positive. _PATTERNS: PatternMap = { # Any-as-type ───────────────────────────────────────────────────────── "dict_str_any": re.compile(r"\bdict\[str,\s*Any\]|\bDict\[str,\s*Any\]"), "list_any": re.compile(r"\blist\[Any\]|\bList\[Any\]"), "type_any": re.compile(r"\btype\[Any\]"), "any_in_union": re.compile(r"\bAny\s*\||\|\s*Any\b"), "return_any": re.compile(r"->\s*Any\b"), "param_any": re.compile(r":\s*Any\b"), "mapping_any": re.compile(r"\bMapping\[str,\s*Any\]"), "optional_any": re.compile(r"\bOptional\[Any\]"), "sequence_any": re.compile(r"\bSequence\[Any\]|\bIterable\[Any\]"), "tuple_any": re.compile(r"\btuple\[[^\n]*Any[^\n]*\]|\bTuple\[[^\n]*Any[^\n]*\]"), # object-as-type ────────────────────────────────────────────────────── "param_object": re.compile(r":\s*object\b"), "return_object": re.compile(r"->\s*object\b"), # Handles one level of nesting, e.g. dict[str, list[object]]. # NOTE: Mapping is intentionally excluded — Mapping[str, object] is the # correct type for read-only, covariant mappings at framework boundaries # (e.g. Jinja2 template contexts). Mapping[str, Any] is caught separately # by mapping_any. Only mutable collection types need this guard. "collection_object": re.compile( r"\b(?:dict|list|set|tuple|Sequence)" r"\[[^\n\[\]]*(?:\[[^\n\[\]]*\][^\n\[\]]*)*\bobject\b" ), # cast() — banned ───────────────────────────────────────────────────── "cast_usage": re.compile(r"(?\s*)list\b(?!\[|\(|\s+[a-z])"), "bare_dict": re.compile(r"(?::\s*|->\s*)dict\b(?!\[|\(|\s+[a-z])"), "bare_set": re.compile(r"(?::\s*|->\s*)set\b(?!\[|\(|\s+[a-z])"), "bare_tuple": re.compile(r"(?::\s*|->\s*)tuple\b(?!\[|\(|\s+[a-z])"), # Optional[X] — use X | None (PEP 604) ──────────────────────────────── "optional_usage": re.compile(r"\bOptional\[(?!Any\b)"), # Union[X, Y] — use X | Y (PEP 604) ────────────────────────────────── "union_usage": re.compile(r"\bUnion\["), # Legacy typing imports (use lowercase builtins) ────────────────────── "legacy_List": re.compile(r"\bList\["), "legacy_Dict": re.compile(r"\bDict\["), "legacy_Set": re.compile(r"\bSet\["), "legacy_Tuple": re.compile(r"\bTuple\["), # Callable — must carry full signature ──────────────────────────────── "bare_callable": re.compile(r"(?::\s*|->\s*)Callable\b(?!\[)"), "callable_any": re.compile(r"\bCallable\[[^\n]*,\s*Any\s*\]"), # Untyped varargs — *args: Any / **kwargs: Any ──────────────────────── # Unannotated *args/**kwargs are caught by the AST walker instead. "varargs_any": re.compile(r"\*{1,2}\w+:\s*Any\b"), # Naked dict at boundary — dict[str, X] as param/return type is banned. # Every structured boundary must use a TypedDict (or dataclass/enum). # Matches ": dict[str," and "-> dict[str," — the two annotation positions. # # APPROVED alternatives at boundaries: # - ReadOnlyJSONObject (= Mapping[str, JSONValue]) for read-only JSON params # - A named TypedDict subclass for any dict with statically known keys # # Mapping[str, JSONValue] is covariant so any dict[str, T where T ⊆ JSONValue] # is assignable to it. This pattern (boundary_dict) does NOT fire on # Mapping[...]; mapping_any does NOT fire on Mapping[str, JSONValue]. # Therefore Mapping[str, JSONValue] is the safe boundary form for JSON dicts. "boundary_dict": re.compile(r"(?::\s*|->\s*)dict\[str\s*,"), # Anonymous dict in collection — list[dict[str, X]] / dict[str, dict[str, X]]. # A dict nested inside a collection is always a named struct opportunity. # Use a TypedDict subclass or a named type alias (e.g. list[JSONObject]). # Named aliases do NOT trigger this rule — only the literal expansion does. # This is intentional: list[JSONObject] is fine; list[dict[str, JSONValue]] is not. "concrete_dict_in_list": re.compile( r"\b(?:list|tuple|set)\[dict\[str," ), "dict_of_dict": re.compile( r"\bdict\[str,\s*dict\[str," ), } # Category groupings for the human-readable report, in display order. _CATEGORY_ORDER: list[tuple[str, list[str]]] = [ ("Any-as-type", [ "dict_str_any", "list_any", "type_any", "any_in_union", "return_any", "param_any", "mapping_any", "optional_any", "sequence_any", "tuple_any", ]), ("object-as-type", ["param_object", "return_object", "collection_object"]), ("cast() usage", ["cast_usage"]), ("type: ignore", ["type_ignore"]), ("Bare collections", ["bare_list", "bare_dict", "bare_set", "bare_tuple"]), ("Optional (use X | None)", ["optional_usage"]), ("Union (use X | Y)", ["union_usage"]), ("Legacy typing imports", ["legacy_List", "legacy_Dict", "legacy_Set", "legacy_Tuple"]), ("Callable (must carry full signature)", ["bare_callable", "callable_any"]), ("Untyped varargs", ["varargs_any"]), ("Naked dict at boundary (use TypedDict)", ["boundary_dict"]), ("Anonymous dict in collection (use TypedDict or named alias)", [ "concrete_dict_in_list", "dict_of_dict", ]), ] # Directories that are never source code and must be skipped during scanning. _SKIP_DIRS: frozenset[str] = frozenset({ "venv", ".venv", "env", ".env", "__pycache__", ".git", ".muse", ".mypy_cache", ".ruff_cache", ".pytest_cache", ".tox", "dist", "build", "site-packages", "__pypackages__", }) # --------------------------------------------------------------------------- # Pattern helpers # --------------------------------------------------------------------------- def _count_pattern_in_line(line: str, pattern: re.Pattern[str]) -> int: """Return the number of non-overlapping matches of *pattern* in *line*.""" return len(pattern.findall(line)) def _imports_any(source: str) -> bool: """Return ``True`` if the source file imports ``Any`` from ``typing`` or ``typing_extensions``. Excludes commented-out import lines (lines where ``from`` is preceded only by ``#`` and optional whitespace). """ return bool(re.search( r"^[ \t]*from\s+typing(?:_extensions)?\s+import\s+.*\bAny\b", source, re.MULTILINE, )) def _classify_type_ignore(line: str) -> str: """Classify the style of a ``# type: ignore`` comment. Returns ``"type_ignore[code]"`` for code-specific ignores, or ``"type_ignore[blanket]"`` for bare ``# type: ignore``. Args: line: A single source line that contains ``# type: ignore``. Returns: A string label for the variant. """ m = re.search(r"#\s*type:\s*ignore\[([^\]]+)\]", line) if m: return f"type_ignore[{m.group(1)}]" return "type_ignore[blanket]" # --------------------------------------------------------------------------- # AST-based detection # --------------------------------------------------------------------------- def _is_any_annotation(node: ast.expr | None) -> bool: """Return ``True`` if *node* is the bare ``Any`` name.""" return isinstance(node, ast.Name) and node.id == "Any" def _find_untyped_defs(source: str, filepath: str) -> list[UntypedDef]: """Walk the AST and collect every function with a missing annotation. Checks: - Missing return type (``node.returns is None``). - Missing parameter annotation (excluding ``self`` and ``cls``). - ``*args`` annotated as ``Any`` **or** with no annotation at all. - ``**kwargs`` annotated as ``Any`` **or** with no annotation at all. - ``TypeVar(...)`` assignments with no ``bound=`` and no constraints. Line numbers for parameter violations use the argument's own line number (``arg.lineno``) rather than the function definition line, so the report points directly at the problematic parameter. Skips files that cannot be parsed. Args: source: Full source text of the file. filepath: Path string used in the returned records. Returns: A list of :class:`UntypedDef` records, one per violation found. """ results: list[UntypedDef] = [] try: tree = ast.parse(source) except SyntaxError: return results for node in ast.walk(tree): if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): continue if node.returns is None: results.append(UntypedDef( file=filepath, line=node.lineno, name=node.name, issue="missing_return_type", )) all_args = ( node.args.args + node.args.posonlyargs + node.args.kwonlyargs ) for arg in all_args: if arg.arg in {"self", "cls"}: continue if arg.annotation is None: results.append(UntypedDef( file=filepath, line=arg.lineno, name=f"{node.name}.{arg.arg}", issue="missing_param_type", )) vararg = node.args.vararg if vararg is not None: if vararg.annotation is None or _is_any_annotation(vararg.annotation): results.append(UntypedDef( file=filepath, line=vararg.lineno, name=f"{node.name}.*{vararg.arg}", issue="untyped_args", )) kwarg = node.args.kwarg if kwarg is not None: if kwarg.annotation is None or _is_any_annotation(kwarg.annotation): results.append(UntypedDef( file=filepath, line=kwarg.lineno, name=f"{node.name}.**{kwarg.arg}", issue="untyped_kwargs", )) # TypeVar without constraints or bound — behaves identically to Any. results.extend(_find_unconstrained_typevars(tree, filepath)) return results def _find_unconstrained_typevars(tree: ast.Module, filepath: str) -> list[UntypedDef]: """Return a record for every ``TypeVar(...)`` with no bound or constraints. A bare ``T = TypeVar("T")`` is semantically equivalent to ``T: Any``. The Rust port requires every generic to carry an explicit trait bound. Args: tree: Parsed AST of the file. filepath: Path string used in the returned records. Returns: A list of :class:`UntypedDef` records for unconstrained ``TypeVar`` definitions. """ results: list[UntypedDef] = [] for node in ast.walk(tree): # Match: T = TypeVar("T") or T = TypeVar("T", bound=...) if not isinstance(node, ast.Assign): continue value = node.value if not isinstance(value, ast.Call): continue func = value.func if not (isinstance(func, ast.Name) and func.id == "TypeVar"): continue # A TypeVar is constrained when it has: # - positional args beyond the name (constraint types), OR # - a keyword arg named "bound" extra_args = value.args[1:] # args[0] is the name string kw_names = {kw.arg for kw in value.keywords} if extra_args or "bound" in kw_names: continue # constrained — OK # Unconstrained TypeVar. target_name = ( node.targets[0].id if isinstance(node.targets[0], ast.Name) else "" ) results.append(UntypedDef( file=filepath, line=node.lineno, name=target_name, issue="unconstrained_typevar", )) return results # --------------------------------------------------------------------------- # File and directory scanner # --------------------------------------------------------------------------- def scan_file(filepath: Path) -> FileResult | None: """Scan a single Python file and return its violation summary. String literals are masked before pattern matching so that raw regex strings and docstring prose never produce false positives. The ``# type: ignore`` check runs on the *original* source because those comments are not string literals. Returns ``None`` when the file cannot be read (I/O or encoding error). Args: filepath: Absolute or relative path to the Python file. Returns: A :class:`FileResult` on success, ``None`` on I/O failure. """ try: source = filepath.read_text(encoding="utf-8") except (OSError, UnicodeDecodeError): return None masked = _mask_string_literals(source) original_lines = source.splitlines() masked_lines = masked.splitlines() patterns: defaultdict[str, int] = defaultdict(int) pattern_lines: defaultdict[str, list[int]] = defaultdict(list) type_ignore_variants: defaultdict[str, int] = defaultdict(int) for lineno, (orig_line, masked_line) in enumerate( zip(original_lines, masked_lines), 1 ): stripped = masked_line.strip() if not stripped or stripped.startswith("#"): continue for name, pattern in _PATTERNS.items(): # All patterns run on the masked line — string literals are blanked # so raw regex strings and docstring prose never trigger false # positives. Comments are NOT masked (they are not string tokens) # so "# type: ignore" on real code lines is still detected. # # Exception: dunder methods legitimately use `: object` (e.g. # `__eq__(self, other: object)`, `__contains__(self, item: object)`). # Skip param_object/return_object for those signatures. if name in {"param_object", "return_object"}: if re.search(r"def\s+__\w+__\s*\(", masked_line): continue # boundary_dict fires on `: dict[str,` and `-> dict[str,`. # Local variable annotations (e.g. ``x: dict[str, int] = {}``) are # NOT boundaries — only function parameter and return annotations are. # `: dict[str,` on a non-def line is a local variable; skip it. if name == "boundary_dict" and ": dict[str," in masked_line: if not re.search(r"\bdef\b", masked_line): continue count = _count_pattern_in_line(masked_line, pattern) if count > 0: patterns[name] += count pattern_lines[name].append(lineno) if name == "type_ignore": # Classify against the original line so we can distinguish # blanket ignores from code-specific ones. variant = _classify_type_ignore(orig_line) type_ignore_variants[variant] += 1 return FileResult( file=str(filepath), imports_any=_imports_any(source), patterns=dict(patterns), pattern_lines=dict(pattern_lines), type_ignore_variants=dict(type_ignore_variants), untyped_defs=_find_untyped_defs(source, str(filepath)), ) def scan_directory(directory: Path) -> list[FileResult]: """Recursively scan all Python files in *directory*. Skips virtual environments, caches, build artefacts, and VCS/tool metadata directories (see ``_SKIP_DIRS``). Args: directory: Root of the directory tree to scan. Returns: A list of :class:`FileResult` objects, one per successfully scanned file. """ results: list[FileResult] = [] for py_file in sorted(directory.rglob("*.py")): if any(part in _SKIP_DIRS for part in py_file.parts): continue file_result = scan_file(py_file) if file_result is not None: results.append(file_result) return results # --------------------------------------------------------------------------- # Report generation # --------------------------------------------------------------------------- def _offender_sort_key(entry: Offender) -> int: """Return the sort key for an :class:`Offender` (total violation count).""" return entry["total"] def generate_report(results: list[FileResult]) -> Report: """Aggregate per-file scan results into a :class:`Report`. Args: results: List of :class:`FileResult` objects from :func:`scan_file` or :func:`scan_directory`. Returns: A :class:`Report` ready for human display or JSON serialisation. """ totals: defaultdict[str, int] = defaultdict(int) files_with_any_import = 0 per_file: PerFileViolations = {} top_offenders: list[Offender] = [] all_type_ignore_variants: defaultdict[str, int] = defaultdict(int) all_untyped_defs: list[UntypedDef] = [] all_violations: list[Violation] = [] for r in results: filepath = r["file"] if r["imports_any"]: files_with_any_import += 1 file_total = 0 file_patterns: PatternCounts = {} for pattern, count in r["patterns"].items(): totals[pattern] += count file_patterns[pattern] = count file_total += count for lineno in r["pattern_lines"].get(pattern, []): all_violations.append(Violation(file=filepath, line=lineno, kind=pattern)) if file_total > 0: per_file[filepath] = file_patterns top_offenders.append(Offender( file=filepath, total=file_total, patterns=file_patterns, )) for variant, count in r["type_ignore_variants"].items(): all_type_ignore_variants[variant] += count all_untyped_defs.extend(r["untyped_defs"]) all_violations.sort(key=lambda v: (v["file"], v["line"])) top_offenders.sort(key=_offender_sort_key, reverse=True) return Report( summary=ReportSummary( total_files_scanned=len(results), files_importing_any=files_with_any_import, total_any_patterns=sum(totals.values()), untyped_defs=len(all_untyped_defs), ), pattern_totals=dict(totals), type_ignore_variants=dict(all_type_ignore_variants), # Store all offenders in JSON; display is capped separately in the # human-readable printer. top_offenders=top_offenders, per_file=per_file, violations=all_violations, # Store the full list — callers that need all records can use --json. untyped_defs=all_untyped_defs, ) # --------------------------------------------------------------------------- # Human-readable report printer # --------------------------------------------------------------------------- def print_human_summary(report: Report, top_n: int = 15) -> None: """Print a formatted, human-readable summary of *report* to stdout. Args: report: A :class:`Report` produced by :func:`generate_report`. top_n: How many offenders to display in the top-offenders list. """ s = report["summary"] totals = report["pattern_totals"] print("\n" + "=" * 70) print(" TYPING AUDIT — Violation Report") print("=" * 70) print(f" Files scanned: {s['total_files_scanned']}") print(f" Files importing Any: {s['files_importing_any']}") print(f" Total violations: {s['total_any_patterns']}") print(f" Untyped defs: {s['untyped_defs']}") print() has_violations = False for category, pattern_names in _CATEGORY_ORDER: category_total = sum(totals.get(p, 0) for p in pattern_names) if category_total == 0: continue has_violations = True print(f" {category}:") for p in pattern_names: count = totals.get(p, 0) if count > 0: print(f" {p:38s} {count:5d}") print() if not has_violations: print(" Pattern breakdown: (none)") print() if report["type_ignore_variants"]: print(" # type: ignore variants:") for variant, count in sorted( report["type_ignore_variants"].items(), key=operator.itemgetter(1), reverse=True, ): print(f" {variant:44s} {count:5d}") print() if report["violations"]: print(" Violations (file:line [kind]):") for v in report["violations"]: print(f" {v['file']}:{v['line']} [{v['kind']}]") print() print(f" Top {top_n} offenders:") for entry in report["top_offenders"][:top_n]: print(f" {entry['total']:4d} {entry['file']}") print("=" * 70 + "\n") # --------------------------------------------------------------------------- # CLI # --------------------------------------------------------------------------- def main() -> None: """Entry point: parse CLI flags, run the scan, and enforce the ratchet. Scans the specified directories (or individual files), prints a human summary, optionally writes a JSON report, and exits non-zero when either the pattern violation count exceeds ``--max-any`` or the untyped-def count exceeds ``--max-untyped``. """ parser = argparse.ArgumentParser( description=( "Audit typing violations: Any, object, cast, bare collections, " "Optional/Union (legacy), Callable without signature, untyped " "varargs, type: ignore, untyped defs, unconstrained TypeVars." ), ) parser.add_argument( "--dirs", nargs="+", default=["muse/", "tests/"], help="Directories or individual .py files to scan. Default: muse/ tests/", ) parser.add_argument( "--json", type=str, metavar="PATH", help="Write the JSON report to PATH.", ) parser.add_argument( "--max-any", type=int, default=None, metavar="N", help="Exit non-zero if total pattern violations exceed N (ratchet mode).", ) parser.add_argument( "--max-untyped", type=int, default=None, metavar="N", help="Exit non-zero if total untyped-def count exceeds N (ratchet mode).", ) parser.add_argument( "--top-n", type=int, default=15, metavar="N", help="Number of offenders to display in the human summary. Default: 15.", ) args = parser.parse_args() all_results: list[FileResult] = [] for d in args.dirs: p = Path(d) if p.is_file() and p.suffix == ".py": result = scan_file(p) if result is not None: all_results.append(result) elif p.is_dir(): all_results.extend(scan_directory(p)) else: print(f"WARNING: {d} does not exist, skipping", file=sys.stderr) report = generate_report(all_results) print_human_summary(report, top_n=args.top_n) if args.json: out = Path(args.json) out.parent.mkdir(parents=True, exist_ok=True) out.write_text(json.dumps(report, indent=2), encoding="utf-8") print(f" JSON report written to {args.json}") failed = False if args.max_any is not None: total = report["summary"]["total_any_patterns"] if total > args.max_any: print( f"\n❌ RATCHET FAILED (patterns): {total} violations exceed " f"threshold of {args.max_any}", file=sys.stderr, ) failed = True else: print( f"\n✅ RATCHET OK (patterns): {total} violations within " f"threshold of {args.max_any}", ) if args.max_untyped is not None: untyped = report["summary"]["untyped_defs"] if untyped > args.max_untyped: print( f"\n❌ RATCHET FAILED (untyped defs): {untyped} exceed " f"threshold of {args.max_untyped}", file=sys.stderr, ) failed = True else: print( f"\n✅ RATCHET OK (untyped defs): {untyped} within " f"threshold of {args.max_untyped}", ) if failed: sys.exit(1) if __name__ == "__main__": main()