"""Typing audit — zero-tolerance type-safety enforcement for mission-critical code.

Every banned pattern maps to a future Rust port liability: if Python cannot
name a type, ``rustc`` cannot either.  The ratchet keeps the rule enforced
continuously so violations never accumulate.

Patterns checked
----------------
*Any-as-type* — ``dict[str, Any]``, ``list[Any]``, ``type[Any]``,
``Any | X``, ``X | Any``, ``Mapping[str, Any]``, etc.

*object-as-type* — same severity as Any; erases all structural information.

*cast()* — all usage banned; it conceals a broken callee return type.

*# type: ignore* — every suppressed error is an unaudited assumption.

*Bare collections* — ``list``, ``dict``, ``set``, ``tuple`` without ``[T]``.

*Optional[X]* and *Union[X, Y]* — use ``X | None`` and ``X | Y`` (PEP 604).

*Legacy typing imports* — ``List``, ``Dict``, ``Set``, ``Tuple``.

*Bare Callable / Callable returning Any* — must carry a full signature.

*Untyped varargs* — ``*args: Any``, ``**kwargs: Any``, and unannotated
``*args`` / ``**kwargs`` (annotation absent entirely).

*Untyped function definitions* — missing return or parameter annotation.

*Unconstrained TypeVar* — ``TypeVar(...)`` with no ``bound=`` and no
constraint arguments; behaves identically to ``Any`` in practice.

*Naked dict at boundary* — ``dict[str, X]`` as a parameter or return type
is banned at function/method boundaries.  Every dict with known keys must
be a ``TypedDict``; every dict with dynamic keys must justify its key space.
The only valid ``dict[str, ...]`` at a boundary is an explicitly named
``TypedDict`` subclass.  This rule exists because ``rustc`` cannot infer
struct fields from a ``HashMap<String, X>`` — named fields must be declared.
Pattern ``boundary_dict`` fires on ``: dict[str,`` and ``-> dict[str,``.

*Anonymous dict in collection* — ``list[dict[str, X]]``, ``dict[str, dict[str, X]]``,
``tuple[dict[str, X], ...]``.  An anonymous dict nested inside a collection is
always a named struct waiting to be declared.  Use a ``TypedDict`` subclass or a
named type alias (e.g. ``list[JSONObject]``, ``list[SymbolHistoryEntry]``).

Named type aliases do NOT trigger this rule — only the literal expansion does.
This is by design: ``list[JSONObject]`` is fine; ``list[dict[str, JSONValue]]`` is not.
Rust requires every struct field to be named; ``Vec<HashMap<String, Value>>`` is
never the right answer when ``Vec<SymbolEntry>`` is possible.

  ``concrete_dict_in_list`` — fires on ``list[dict[str,``, ``tuple[dict[str,``,
                               ``set[dict[str,``
  ``dict_of_dict``           — fires on ``dict[str, dict[str,``

Usage::

    python tools/typing_audit.py                        # musehub/ + tests/
    python tools/typing_audit.py --dirs musehub/ tests/
    python tools/typing_audit.py --dirs musehub/ --max-any 0 --max-untyped 0
    python tools/typing_audit.py --json artifacts/typing_audit.json
"""

from __future__ import annotations

import argparse
import ast
import io
import json
import operator
import re
import sys
import tokenize
from collections import defaultdict
from pathlib import Path
from typing import TypedDict

# ---------------------------------------------------------------------------
# Type aliases — avoid dict[str, X] at function/class-field boundaries.
# ---------------------------------------------------------------------------

type PatternCounts = dict[str, int]
type PatternLines = dict[str, list[int]]
type PatternMap = dict[str, re.Pattern[str]]
type PerFileViolations = dict[str, PatternCounts]


class Violation(TypedDict):
    """A single typed violation — one pattern match at one source location."""

    file: str
    line: int
    kind: str

# ---------------------------------------------------------------------------
# Data shapes — TypedDicts replace every dict[str, Any] in the old script.
# All shapes mirror the Rust struct that will eventually own them.
# ---------------------------------------------------------------------------


class UntypedDef(TypedDict):
    """A function or method that is missing a required type annotation.

    ``issue`` is one of:

    - ``"missing_return_type"``  — no return annotation.
    - ``"missing_param_type"``   — a non-self/cls parameter lacks annotation.
    - ``"untyped_args"``         — ``*args`` is annotated as ``Any`` or has
                                    no annotation at all.
    - ``"untyped_kwargs"``       — ``**kwargs`` is annotated as ``Any`` or has
                                    no annotation at all.
    - ``"unconstrained_typevar"``— a ``TypeVar`` with no ``bound=`` and no
                                    positional constraints.
    """

    file: str
    line: int
    name: str
    issue: str


class FileResult(TypedDict):
    """Typing-violation summary for a single Python source file."""

    file: str
    imports_any: bool
    patterns: PatternCounts
    pattern_lines: PatternLines
    type_ignore_variants: PatternCounts
    untyped_defs: list[UntypedDef]


class Offender(TypedDict):
    """A file with at least one typing violation, ranked by total count."""

    file: str
    total: int
    patterns: PatternCounts


class ReportSummary(TypedDict):
    """High-level aggregate counts for the entire scan."""

    total_files_scanned: int
    files_importing_any: int
    total_any_patterns: int
    untyped_defs: int


class Report(TypedDict):
    """Full typing-audit report produced by :func:`generate_report`."""

    summary: ReportSummary
    pattern_totals: PatternCounts
    type_ignore_variants: PatternCounts
    top_offenders: list[Offender]
    per_file: PerFileViolations
    violations: list[Violation]
    untyped_defs: list[UntypedDef]


# ---------------------------------------------------------------------------
# String-literal masking
# ---------------------------------------------------------------------------


def _mask_string_literals(source: str) -> str:
    """Replace string-literal content with spaces, preserving newlines.

    Pattern matching runs on the masked source so that raw regex strings,
    docstrings, and string constants never produce false positives.  All
    newlines are preserved so that line numbers stay accurate.

    Tokenisation errors (e.g. incomplete source snippets) are silently
    ignored — the original source is returned unchanged so the caller still
    produces *some* output rather than silently dropping the file.

    Args:
        source: Full UTF-8 source text of a Python file.

    Returns:
        A copy of *source* with the content of every string token replaced
        by space characters (newlines within multi-line strings preserved).
    """
    chars = list(source)
    lines = source.splitlines(keepends=True)

    # Pre-compute cumulative line offsets for O(1) (row, col) → offset.
    offsets: list[int] = [0]
    for ln in lines:
        offsets.append(offsets[-1] + len(ln))

    def _abs(row: int, col: int) -> int:
        return offsets[row - 1] + col

    # Token types that contain string literal content — including f-string
    # middle segments which are FSTRING_MIDDLE (not STRING) in Python 3.12+.
    _FSTRING_MIDDLE = getattr(tokenize, "FSTRING_MIDDLE", None)
    _STRING_TYPES = {tokenize.STRING}
    if _FSTRING_MIDDLE is not None:
        _STRING_TYPES.add(_FSTRING_MIDDLE)

    try:
        gen = tokenize.generate_tokens(io.StringIO(source).readline)
        for tok_type, _tok_str, (srow, scol), (erow, ecol), _ in gen:
            if tok_type not in _STRING_TYPES:
                continue
            start = _abs(srow, scol)
            end = _abs(erow, ecol)
            for i in range(start, end):
                if chars[i] not in {"\n", "\r"}:
                    chars[i] = " "
    except tokenize.TokenError:
        pass

    return "".join(chars)


# ---------------------------------------------------------------------------
# Pattern registry
# ---------------------------------------------------------------------------

#: All patterns that count toward the violation total.
#: Keys are stable identifiers used in JSON output and tests.
#:
#: NOTE: do NOT use re.IGNORECASE — Python type annotations are case-sensitive.
#: ``List`` and ``list`` are distinct identifiers; matching ``list[any]``
#: (where ``any`` is the built-in function) would be a false positive.
_PATTERNS: PatternMap = {
    # Any-as-type ─────────────────────────────────────────────────────────
    "dict_str_any":    re.compile(r"\bdict\[str,\s*Any\]|\bDict\[str,\s*Any\]"),
    "list_any":        re.compile(r"\blist\[Any\]|\bList\[Any\]"),
    "type_any":        re.compile(r"\btype\[Any\]"),
    "any_in_union":    re.compile(r"\bAny\s*\||\|\s*Any\b"),
    "return_any":      re.compile(r"->\s*Any\b"),
    "param_any":       re.compile(r":\s*Any\b"),
    "mapping_any":     re.compile(r"\bMapping\[str,\s*Any\]"),
    "optional_any":    re.compile(r"\bOptional\[Any\]"),
    "sequence_any":    re.compile(r"\bSequence\[Any\]|\bIterable\[Any\]"),
    "tuple_any":       re.compile(r"\btuple\[[^\n]*Any[^\n]*\]|\bTuple\[[^\n]*Any[^\n]*\]"),
    # object-as-type ──────────────────────────────────────────────────────
    "param_object":      re.compile(r":\s*object\b"),
    "return_object":     re.compile(r"->\s*object\b"),
    # Handles one level of nesting, e.g. dict[str, list[object]].
    # NOTE: Mapping is intentionally excluded — Mapping[str, object] is the
    # correct type for read-only, covariant mappings at framework boundaries
    # (e.g. Jinja2 template contexts).  Mapping[str, Any] is caught separately
    # by mapping_any.  Only mutable collection types need this guard.
    "collection_object": re.compile(
        r"\b(?:dict|list|set|tuple|Sequence)"
        r"\[[^\n\[\]]*(?:\[[^\n\[\]]*\][^\n\[\]]*)*\bobject\b"
    ),
    # cast() — banned ─────────────────────────────────────────────────────
    "cast_usage":   re.compile(r"(?<![.\w])cast\("),
    # type: ignore — only flag blanket suppresses (no specific error code).
    # ``# type: ignore[some-code]`` is acceptable when the exact issue is known;
    # ``# type: ignore`` with no code is a blind suppression and always banned.
    "type_ignore":  re.compile(r"#\s*type:\s*ignore(?!\s*\[)"),
    # Bare collections (no type parameters) ───────────────────────────────
    # Negative lookaheads exclude parameterised forms and prose.
    "bare_list":   re.compile(r"(?::\s*|->\s*)list\b(?!\[|\(|\s+[a-z])"),
    "bare_dict":   re.compile(r"(?::\s*|->\s*)dict\b(?!\[|\(|\s+[a-z])"),
    "bare_set":    re.compile(r"(?::\s*|->\s*)set\b(?!\[|\(|\s+[a-z])"),
    "bare_tuple":  re.compile(r"(?::\s*|->\s*)tuple\b(?!\[|\(|\s+[a-z])"),
    # Optional[X] — use X | None (PEP 604) ────────────────────────────────
    "optional_usage":  re.compile(r"\bOptional\[(?!Any\b)"),
    # Union[X, Y] — use X | Y (PEP 604) ──────────────────────────────────
    "union_usage":     re.compile(r"\bUnion\["),
    # Legacy typing imports (use lowercase builtins) ──────────────────────
    "legacy_List":   re.compile(r"\bList\["),
    "legacy_Dict":   re.compile(r"\bDict\["),
    "legacy_Set":    re.compile(r"\bSet\["),
    "legacy_Tuple":  re.compile(r"\bTuple\["),
    # Callable — must carry full signature ────────────────────────────────
    "bare_callable":   re.compile(r"(?::\s*|->\s*)Callable\b(?!\[)"),
    "callable_any":    re.compile(r"\bCallable\[[^\n]*,\s*Any\s*\]"),
    # Untyped varargs — *args: Any / **kwargs: Any ────────────────────────
    # Unannotated *args/**kwargs are caught by the AST walker instead.
    "varargs_any":     re.compile(r"\*{1,2}\w+:\s*Any\b"),
    # Naked dict at boundary — dict[str, X] as param/return type is banned.
    # Every structured boundary must use a TypedDict (or dataclass/enum).
    # Matches ": dict[str," and "-> dict[str," — the two annotation positions.
    #
    # APPROVED alternatives at boundaries:
    #   - ReadOnlyJSONObject (= Mapping[str, JSONValue]) for read-only JSON params
    #   - A named TypedDict subclass for any dict with statically known keys
    #
    # Mapping[str, JSONValue] is covariant so any dict[str, T where T ⊆ JSONValue]
    # is assignable to it.  This pattern (boundary_dict) does NOT fire on
    # Mapping[...]; mapping_any does NOT fire on Mapping[str, JSONValue].
    # Therefore Mapping[str, JSONValue] is the safe boundary form for JSON dicts.
    "boundary_dict":   re.compile(r"(?::\s*|->\s*)dict\[str\s*,"),
    # Anonymous dict in collection — list[dict[str, X]] / dict[str, dict[str, X]].
    # A dict nested inside a collection is always a named struct opportunity.
    # Use a TypedDict subclass or a named type alias (e.g. list[JSONObject]).
    # Named aliases do NOT trigger this rule — only the literal expansion does.
    # This is intentional: list[JSONObject] is fine; list[dict[str, JSONValue]] is not.
    "concrete_dict_in_list": re.compile(
        r"\b(?:list|tuple|set)\[dict\[str,"
    ),
    "dict_of_dict": re.compile(
        r"\bdict\[str,\s*dict\[str,"
    ),
}

# Category groupings for the human-readable report, in display order.
_CATEGORY_ORDER: list[tuple[str, list[str]]] = [
    ("Any-as-type", [
        "dict_str_any", "list_any", "type_any", "any_in_union",
        "return_any", "param_any",
        "mapping_any", "optional_any", "sequence_any", "tuple_any",
    ]),
    ("object-as-type", ["param_object", "return_object", "collection_object"]),
    ("cast() usage", ["cast_usage"]),
    ("type: ignore", ["type_ignore"]),
    ("Bare collections", ["bare_list", "bare_dict", "bare_set", "bare_tuple"]),
    ("Optional (use X | None)", ["optional_usage"]),
    ("Union (use X | Y)", ["union_usage"]),
    ("Legacy typing imports", ["legacy_List", "legacy_Dict", "legacy_Set", "legacy_Tuple"]),
    ("Callable (must carry full signature)", ["bare_callable", "callable_any"]),
    ("Untyped varargs", ["varargs_any"]),
    ("Naked dict at boundary (use TypedDict)", ["boundary_dict"]),
    ("Anonymous dict in collection (use TypedDict or named alias)", [
        "concrete_dict_in_list", "dict_of_dict",
    ]),
]

# Directories that are never source code and must be skipped during scanning.
_SKIP_DIRS: frozenset[str] = frozenset({
    "venv", ".venv", "env", ".env",
    "__pycache__",
    ".git", ".muse", ".mypy_cache", ".ruff_cache", ".pytest_cache", ".tox",
    "dist", "build", "site-packages", "__pypackages__",
})


# ---------------------------------------------------------------------------
# Pattern helpers
# ---------------------------------------------------------------------------


def _count_pattern_in_line(line: str, pattern: re.Pattern[str]) -> int:
    """Return the number of non-overlapping matches of *pattern* in *line*."""
    return len(pattern.findall(line))


def _imports_any(source: str) -> bool:
    """Return ``True`` if the source file imports ``Any`` from ``typing``
    or ``typing_extensions``.

    Excludes commented-out import lines (lines where ``from`` is preceded only
    by ``#`` and optional whitespace).
    """
    return bool(re.search(
        r"^[ \t]*from\s+typing(?:_extensions)?\s+import\s+.*\bAny\b",
        source,
        re.MULTILINE,
    ))


def _classify_type_ignore(line: str) -> str:
    """Classify the style of a ``# type: ignore`` comment.

    Returns ``"type_ignore[code]"`` for code-specific ignores, or
    ``"type_ignore[blanket]"`` for bare ``# type: ignore``.

    Args:
        line: A single source line that contains ``# type: ignore``.

    Returns:
        A string label for the variant.
    """
    m = re.search(r"#\s*type:\s*ignore\[([^\]]+)\]", line)
    if m:
        return f"type_ignore[{m.group(1)}]"
    return "type_ignore[blanket]"


# ---------------------------------------------------------------------------
# AST-based detection
# ---------------------------------------------------------------------------


def _is_any_annotation(node: ast.expr | None) -> bool:
    """Return ``True`` if *node* is the bare ``Any`` name."""
    return isinstance(node, ast.Name) and node.id == "Any"


def _find_untyped_defs(source: str, filepath: str) -> list[UntypedDef]:
    """Walk the AST and collect every function with a missing annotation.

    Checks:

    - Missing return type (``node.returns is None``).
    - Missing parameter annotation (excluding ``self`` and ``cls``).
    - ``*args`` annotated as ``Any`` **or** with no annotation at all.
    - ``**kwargs`` annotated as ``Any`` **or** with no annotation at all.
    - ``TypeVar(...)`` assignments with no ``bound=`` and no constraints.

    Line numbers for parameter violations use the argument's own line number
    (``arg.lineno``) rather than the function definition line, so the report
    points directly at the problematic parameter.

    Skips files that cannot be parsed.

    Args:
        source:   Full source text of the file.
        filepath: Path string used in the returned records.

    Returns:
        A list of :class:`UntypedDef` records, one per violation found.
    """
    results: list[UntypedDef] = []
    try:
        tree = ast.parse(source)
    except SyntaxError:
        return results

    for node in ast.walk(tree):
        if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
            continue

        if node.returns is None:
            results.append(UntypedDef(
                file=filepath,
                line=node.lineno,
                name=node.name,
                issue="missing_return_type",
            ))

        all_args = (
            node.args.args
            + node.args.posonlyargs
            + node.args.kwonlyargs
        )
        for arg in all_args:
            if arg.arg in {"self", "cls"}:
                continue
            if arg.annotation is None:
                results.append(UntypedDef(
                    file=filepath,
                    line=arg.lineno,
                    name=f"{node.name}.{arg.arg}",
                    issue="missing_param_type",
                ))

        vararg = node.args.vararg
        if vararg is not None:
            if vararg.annotation is None or _is_any_annotation(vararg.annotation):
                results.append(UntypedDef(
                    file=filepath,
                    line=vararg.lineno,
                    name=f"{node.name}.*{vararg.arg}",
                    issue="untyped_args",
                ))

        kwarg = node.args.kwarg
        if kwarg is not None:
            if kwarg.annotation is None or _is_any_annotation(kwarg.annotation):
                results.append(UntypedDef(
                    file=filepath,
                    line=kwarg.lineno,
                    name=f"{node.name}.**{kwarg.arg}",
                    issue="untyped_kwargs",
                ))

    # TypeVar without constraints or bound — behaves identically to Any.
    results.extend(_find_unconstrained_typevars(tree, filepath))

    return results


def _find_unconstrained_typevars(tree: ast.Module, filepath: str) -> list[UntypedDef]:
    """Return a record for every ``TypeVar(...)`` with no bound or constraints.

    A bare ``T = TypeVar("T")`` is semantically equivalent to ``T: Any``.
    The Rust port requires every generic to carry an explicit trait bound.

    Args:
        tree:     Parsed AST of the file.
        filepath: Path string used in the returned records.

    Returns:
        A list of :class:`UntypedDef` records for unconstrained ``TypeVar``
        definitions.
    """
    results: list[UntypedDef] = []
    for node in ast.walk(tree):
        # Match: T = TypeVar("T") or T = TypeVar("T", bound=...)
        if not isinstance(node, ast.Assign):
            continue
        value = node.value
        if not isinstance(value, ast.Call):
            continue
        func = value.func
        if not (isinstance(func, ast.Name) and func.id == "TypeVar"):
            continue
        # A TypeVar is constrained when it has:
        #   - positional args beyond the name (constraint types), OR
        #   - a keyword arg named "bound"
        extra_args = value.args[1:]  # args[0] is the name string
        kw_names = {kw.arg for kw in value.keywords}
        if extra_args or "bound" in kw_names:
            continue  # constrained — OK
        # Unconstrained TypeVar.
        target_name = (
            node.targets[0].id
            if isinstance(node.targets[0], ast.Name)
            else "<TypeVar>"
        )
        results.append(UntypedDef(
            file=filepath,
            line=node.lineno,
            name=target_name,
            issue="unconstrained_typevar",
        ))
    return results


# ---------------------------------------------------------------------------
# File and directory scanner
# ---------------------------------------------------------------------------


def scan_file(filepath: Path) -> FileResult | None:
    """Scan a single Python file and return its violation summary.

    String literals are masked before pattern matching so that raw regex
    strings and docstring prose never produce false positives.  The
    ``# type: ignore`` check runs on the *original* source because those
    comments are not string literals.

    Returns ``None`` when the file cannot be read (I/O or encoding error).

    Args:
        filepath: Absolute or relative path to the Python file.

    Returns:
        A :class:`FileResult` on success, ``None`` on I/O failure.
    """
    try:
        source = filepath.read_text(encoding="utf-8")
    except (OSError, UnicodeDecodeError):
        return None

    masked = _mask_string_literals(source)

    original_lines = source.splitlines()
    masked_lines = masked.splitlines()

    patterns: defaultdict[str, int] = defaultdict(int)
    pattern_lines: defaultdict[str, list[int]] = defaultdict(list)
    type_ignore_variants: defaultdict[str, int] = defaultdict(int)

    for lineno, (orig_line, masked_line) in enumerate(
        zip(original_lines, masked_lines), 1
    ):
        stripped = masked_line.strip()
        if not stripped or stripped.startswith("#"):
            continue

        for name, pattern in _PATTERNS.items():
            # All patterns run on the masked line — string literals are blanked
            # so raw regex strings and docstring prose never trigger false
            # positives.  Comments are NOT masked (they are not string tokens)
            # so "# type: ignore" on real code lines is still detected.
            #
            # Exception: dunder methods legitimately use `: object` (e.g.
            # `__eq__(self, other: object)`, `__contains__(self, item: object)`).
            # Skip param_object/return_object for those signatures.
            if name in {"param_object", "return_object"}:
                if re.search(r"def\s+__\w+__\s*\(", masked_line):
                    continue
            # boundary_dict fires on `: dict[str,` and `-> dict[str,`.
            # Local variable annotations (e.g. ``x: dict[str, int] = {}``) are
            # NOT boundaries — only function parameter and return annotations are.
            # `: dict[str,` on a non-def line is a local variable; skip it.
            if name == "boundary_dict" and ": dict[str," in masked_line:
                if not re.search(r"\bdef\b", masked_line):
                    continue
            count = _count_pattern_in_line(masked_line, pattern)
            if count > 0:
                patterns[name] += count
                pattern_lines[name].append(lineno)

                if name == "type_ignore":
                    # Classify against the original line so we can distinguish
                    # blanket ignores from code-specific ones.
                    variant = _classify_type_ignore(orig_line)
                    type_ignore_variants[variant] += 1

    return FileResult(
        file=str(filepath),
        imports_any=_imports_any(source),
        patterns=dict(patterns),
        pattern_lines=dict(pattern_lines),
        type_ignore_variants=dict(type_ignore_variants),
        untyped_defs=_find_untyped_defs(source, str(filepath)),
    )


def scan_directory(directory: Path) -> list[FileResult]:
    """Recursively scan all Python files in *directory*.

    Skips virtual environments, caches, build artefacts, and VCS/tool
    metadata directories (see ``_SKIP_DIRS``).

    Args:
        directory: Root of the directory tree to scan.

    Returns:
        A list of :class:`FileResult` objects, one per successfully scanned file.
    """
    results: list[FileResult] = []
    for py_file in sorted(directory.rglob("*.py")):
        if any(part in _SKIP_DIRS for part in py_file.parts):
            continue
        file_result = scan_file(py_file)
        if file_result is not None:
            results.append(file_result)
    return results


# ---------------------------------------------------------------------------
# Report generation
# ---------------------------------------------------------------------------


def _offender_sort_key(entry: Offender) -> int:
    """Return the sort key for an :class:`Offender` (total violation count)."""
    return entry["total"]


def generate_report(results: list[FileResult]) -> Report:
    """Aggregate per-file scan results into a :class:`Report`.

    Args:
        results: List of :class:`FileResult` objects from :func:`scan_file`
                 or :func:`scan_directory`.

    Returns:
        A :class:`Report` ready for human display or JSON serialisation.
    """
    totals: defaultdict[str, int] = defaultdict(int)
    files_with_any_import = 0
    per_file: PerFileViolations = {}
    top_offenders: list[Offender] = []
    all_type_ignore_variants: defaultdict[str, int] = defaultdict(int)
    all_untyped_defs: list[UntypedDef] = []
    all_violations: list[Violation] = []

    for r in results:
        filepath = r["file"]
        if r["imports_any"]:
            files_with_any_import += 1

        file_total = 0
        file_patterns: PatternCounts = {}
        for pattern, count in r["patterns"].items():
            totals[pattern] += count
            file_patterns[pattern] = count
            file_total += count
            for lineno in r["pattern_lines"].get(pattern, []):
                all_violations.append(Violation(file=filepath, line=lineno, kind=pattern))

        if file_total > 0:
            per_file[filepath] = file_patterns
            top_offenders.append(Offender(
                file=filepath,
                total=file_total,
                patterns=file_patterns,
            ))

        for variant, count in r["type_ignore_variants"].items():
            all_type_ignore_variants[variant] += count

        all_untyped_defs.extend(r["untyped_defs"])

    all_violations.sort(key=lambda v: (v["file"], v["line"]))
    top_offenders.sort(key=_offender_sort_key, reverse=True)

    return Report(
        summary=ReportSummary(
            total_files_scanned=len(results),
            files_importing_any=files_with_any_import,
            total_any_patterns=sum(totals.values()),
            untyped_defs=len(all_untyped_defs),
        ),
        pattern_totals=dict(totals),
        type_ignore_variants=dict(all_type_ignore_variants),
        # Store all offenders in JSON; display is capped separately in the
        # human-readable printer.
        top_offenders=top_offenders,
        per_file=per_file,
        violations=all_violations,
        # Store the full list — callers that need all records can use --json.
        untyped_defs=all_untyped_defs,
    )


# ---------------------------------------------------------------------------
# Human-readable report printer
# ---------------------------------------------------------------------------


def print_human_summary(report: Report, top_n: int = 15) -> None:
    """Print a formatted, human-readable summary of *report* to stdout.

    Args:
        report: A :class:`Report` produced by :func:`generate_report`.
        top_n:  How many offenders to display in the top-offenders list.
    """
    s = report["summary"]
    totals = report["pattern_totals"]

    print("\n" + "=" * 70)
    print("  TYPING AUDIT — Violation Report")
    print("=" * 70)
    print(f"  Files scanned:        {s['total_files_scanned']}")
    print(f"  Files importing Any:  {s['files_importing_any']}")
    print(f"  Total violations:     {s['total_any_patterns']}")
    print(f"  Untyped defs:         {s['untyped_defs']}")
    print()

    has_violations = False
    for category, pattern_names in _CATEGORY_ORDER:
        category_total = sum(totals.get(p, 0) for p in pattern_names)
        if category_total == 0:
            continue
        has_violations = True
        print(f"  {category}:")
        for p in pattern_names:
            count = totals.get(p, 0)
            if count > 0:
                print(f"    {p:38s} {count:5d}")
        print()

    if not has_violations:
        print("  Pattern breakdown:    (none)")
        print()

    if report["type_ignore_variants"]:
        print("  # type: ignore variants:")
        for variant, count in sorted(
            report["type_ignore_variants"].items(),
            key=operator.itemgetter(1),
            reverse=True,
        ):
            print(f"    {variant:44s} {count:5d}")
        print()

    if report["violations"]:
        print("  Violations (file:line  [kind]):")
        for v in report["violations"]:
            print(f"    {v['file']}:{v['line']}  [{v['kind']}]")
        print()

    print(f"  Top {top_n} offenders:")
    for entry in report["top_offenders"][:top_n]:
        print(f"    {entry['total']:4d}  {entry['file']}")
    print("=" * 70 + "\n")


# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------


def main() -> None:
    """Entry point: parse CLI flags, run the scan, and enforce the ratchet.

    Scans the specified directories (or individual files), prints a human
    summary, optionally writes a JSON report, and exits non-zero when either
    the pattern violation count exceeds ``--max-any`` or the untyped-def
    count exceeds ``--max-untyped``.
    """
    parser = argparse.ArgumentParser(
        description=(
            "Audit typing violations: Any, object, cast, bare collections, "
            "Optional/Union (legacy), Callable without signature, untyped "
            "varargs, type: ignore, untyped defs, unconstrained TypeVars."
        ),
    )
    parser.add_argument(
        "--dirs",
        nargs="+",
        default=["muse/", "tests/"],
        help="Directories or individual .py files to scan. Default: muse/ tests/",
    )
    parser.add_argument(
        "--json",
        type=str,
        metavar="PATH",
        help="Write the JSON report to PATH.",
    )
    parser.add_argument(
        "--max-any",
        type=int,
        default=None,
        metavar="N",
        help="Exit non-zero if total pattern violations exceed N (ratchet mode).",
    )
    parser.add_argument(
        "--max-untyped",
        type=int,
        default=None,
        metavar="N",
        help="Exit non-zero if total untyped-def count exceeds N (ratchet mode).",
    )
    parser.add_argument(
        "--top-n",
        type=int,
        default=15,
        metavar="N",
        help="Number of offenders to display in the human summary. Default: 15.",
    )
    args = parser.parse_args()

    all_results: list[FileResult] = []
    for d in args.dirs:
        p = Path(d)
        if p.is_file() and p.suffix == ".py":
            result = scan_file(p)
            if result is not None:
                all_results.append(result)
        elif p.is_dir():
            all_results.extend(scan_directory(p))
        else:
            print(f"WARNING: {d} does not exist, skipping", file=sys.stderr)

    report = generate_report(all_results)
    print_human_summary(report, top_n=args.top_n)

    if args.json:
        out = Path(args.json)
        out.parent.mkdir(parents=True, exist_ok=True)
        out.write_text(json.dumps(report, indent=2), encoding="utf-8")
        print(f"  JSON report written to {args.json}")

    failed = False

    if args.max_any is not None:
        total = report["summary"]["total_any_patterns"]
        if total > args.max_any:
            print(
                f"\n❌ RATCHET FAILED (patterns): {total} violations exceed "
                f"threshold of {args.max_any}",
                file=sys.stderr,
            )
            failed = True
        else:
            print(
                f"\n✅ RATCHET OK (patterns): {total} violations within "
                f"threshold of {args.max_any}",
            )

    if args.max_untyped is not None:
        untyped = report["summary"]["untyped_defs"]
        if untyped > args.max_untyped:
            print(
                f"\n❌ RATCHET FAILED (untyped defs): {untyped} exceed "
                f"threshold of {args.max_untyped}",
                file=sys.stderr,
            )
            failed = True
        else:
            print(
                f"\n✅ RATCHET OK (untyped defs): {untyped} within "
                f"threshold of {args.max_untyped}",
            )

    if failed:
        sys.exit(1)


if __name__ == "__main__":
    main()