ts_audit.py
python
sha256:0557470fbe1a91614f0dcd761387323ba93f0e8afe48856cda1609075ab064c7
fix: fall back to any indexed mpack in read_object_bytes wh…
Sonnet 4.6
patch
2 days ago
| 1 | """TypeScript typing audit — zero-tolerance enforcement for mission-critical code. |
| 2 | |
| 3 | Complements ``npx tsc --noEmit`` (the primary type gate) by catching patterns |
| 4 | that TypeScript's compiler allows but that undermine type safety in practice. |
| 5 | |
| 6 | Patterns checked |
| 7 | ---------------- |
| 8 | *any* — ``as any``, ``: any``, ``Array<any>``, ``Promise<any>``, ``Record<string, any>``, |
| 9 | ``Map<any``, ``Set<any>``, ``<any>`` type parameters. Every ``any`` escapes the type |
| 10 | system and makes the surrounding code unverifiable. Use ``unknown`` and narrow. |
| 11 | |
| 12 | *@ts-ignore / @ts-nocheck* — suppresses real errors. Absolute ban. |
| 13 | |
| 14 | *Function type* — ``(fn: Function)`` carries no signature information; the compiler |
| 15 | cannot verify call sites. Use ``() => void`` or a named callable interface. |
| 16 | |
| 17 | *Non-null assertion on DOM queries* — ``document.getElementById(...)!`` (and |
| 18 | ``querySelector!``, ``querySelectorAll!``). These crash silently when the element |
| 19 | is absent. Narrow with ``if (!el) return;`` instead. |
| 20 | |
| 21 | *JSON.parse without explicit cast* — ``JSON.parse(...)`` returns ``any``. Every call |
| 22 | site must either narrow via ``as T`` or validate through a type guard. |
| 23 | |
| 24 | *as unknown as X* is explicitly NOT flagged — it is the idiomatic safe double-cast |
| 25 | for DOM / window extension patterns where the TypeScript type system has no better |
| 26 | mechanism. |
| 27 | |
| 28 | Usage:: |
| 29 | |
| 30 | python tools/ts_audit.py # src/ts/ |
| 31 | python tools/ts_audit.py --dirs src/ts/ |
| 32 | python tools/ts_audit.py --json artifacts/ts_audit.json |
| 33 | """ |
| 34 | |
| 35 | from __future__ import annotations |
| 36 | |
| 37 | import argparse |
| 38 | import json |
| 39 | import operator |
| 40 | import re |
| 41 | import sys |
| 42 | from collections import defaultdict |
| 43 | from pathlib import Path |
| 44 | from typing import TypedDict |
| 45 | |
| 46 | # --------------------------------------------------------------------------- |
| 47 | # Type aliases |
| 48 | # --------------------------------------------------------------------------- |
| 49 | |
| 50 | type PatternCounts = dict[str, int] |
| 51 | type PatternLines = dict[str, list[int]] |
| 52 | type PatternMap = dict[str, re.Pattern[str]] |
| 53 | type PerFileViolations = dict[str, PatternCounts] |
| 54 | |
| 55 | # --------------------------------------------------------------------------- |
| 56 | # Data shapes |
| 57 | # --------------------------------------------------------------------------- |
| 58 | |
| 59 | |
| 60 | class Violation(TypedDict): |
| 61 | """One pattern match at one source location.""" |
| 62 | |
| 63 | file: str |
| 64 | line: int |
| 65 | kind: str |
| 66 | |
| 67 | |
| 68 | class FileResult(TypedDict): |
| 69 | """Typing-violation summary for one TypeScript source file.""" |
| 70 | |
| 71 | file: str |
| 72 | patterns: PatternCounts |
| 73 | pattern_lines: PatternLines |
| 74 | |
| 75 | |
| 76 | class Offender(TypedDict): |
| 77 | """A file with at least one violation, ranked by count.""" |
| 78 | |
| 79 | file: str |
| 80 | total: int |
| 81 | patterns: PatternCounts |
| 82 | |
| 83 | |
| 84 | class ReportSummary(TypedDict): |
| 85 | """High-level aggregate counts for the entire scan.""" |
| 86 | |
| 87 | total_files_scanned: int |
| 88 | total_violations: int |
| 89 | |
| 90 | |
| 91 | class Report(TypedDict): |
| 92 | """Full audit report produced by :func:`generate_report`.""" |
| 93 | |
| 94 | summary: ReportSummary |
| 95 | pattern_totals: PatternCounts |
| 96 | top_offenders: list[Offender] |
| 97 | per_file: PerFileViolations |
| 98 | violations: list[Violation] |
| 99 | |
| 100 | |
| 101 | # --------------------------------------------------------------------------- |
| 102 | # Pattern registry |
| 103 | # --------------------------------------------------------------------------- |
| 104 | |
| 105 | #: Patterns that count toward the violation total. |
| 106 | #: Each key is a stable identifier used in JSON output. |
| 107 | _PATTERNS: PatternMap = { |
| 108 | # any-as-type ───────────────────────────────────────────────────────── |
| 109 | # ``as any`` — explicit escape; conceals broken upstream type. |
| 110 | # Negative lookbehind excludes ``as unknown as`` (the safe double-cast). |
| 111 | "as_any": re.compile(r"(?<!unknown )\bas\s+any\b"), |
| 112 | # ``: any`` — explicit annotation; same severity. |
| 113 | "annot_any": re.compile(r":\s*any\b"), |
| 114 | # Generic slots filled with any — Array<any>, Promise<any>, etc. |
| 115 | "generic_any": re.compile( |
| 116 | r"\b(?:Array|Promise|Record|Map|Set|Readonly|Partial|Required|" |
| 117 | r"NonNullable|Awaited|ReturnType|Parameters)\s*<[^>]*\bany\b" |
| 118 | ), |
| 119 | # <any> inline type argument — e.g. foo<any>(...) |
| 120 | "type_arg_any": re.compile(r"<any>"), |
| 121 | # @ts-ignore / @ts-nocheck ──────────────────────────────────────────── |
| 122 | "ts_ignore": re.compile(r"//\s*@ts-ignore"), |
| 123 | "ts_nocheck": re.compile(r"//\s*@ts-nocheck"), |
| 124 | # Function type ─────────────────────────────────────────────────────── |
| 125 | # Matches ``: Function`` and ``<Function>`` but not ``Function.prototype``, |
| 126 | # ``Function.bind``, or import-style usages. |
| 127 | "function_type": re.compile(r"(?::\s*|<)Function\b(?!\.|\s*prototype)"), |
| 128 | # Non-null assertion on DOM queries ─────────────────────────────────── |
| 129 | # ``getElementById(...)!`` crashes when the element is absent. |
| 130 | # Narrow with ``if (!el) return;`` instead. |
| 131 | "nonnull_dom": re.compile( |
| 132 | r"\b(?:getElementById|querySelector|querySelectorAll|" |
| 133 | r"closest|parentElement)\s*\([^)]*\)\s*!" |
| 134 | ), |
| 135 | # JSON.parse without narrowing ──────────────────────────────────────── |
| 136 | # ``JSON.parse(...)`` returns ``any``. Every call site must cast or guard. |
| 137 | # Pattern fires on the bare call; ``as T`` or ``as unknown`` on the same |
| 138 | # line is sufficient to suppress — those lines are skipped in the loop. |
| 139 | "json_parse_any": re.compile(r"\bJSON\.parse\s*\("), |
| 140 | } |
| 141 | |
| 142 | #: Display order for the human-readable report. |
| 143 | _CATEGORY_ORDER: list[tuple[str, list[str]]] = [ |
| 144 | ("any escapes", ["as_any", "annot_any", "generic_any", "type_arg_any"]), |
| 145 | ("type: suppression", ["ts_ignore", "ts_nocheck"]), |
| 146 | ("unsafe types", ["function_type"]), |
| 147 | ("DOM safety", ["nonnull_dom"]), |
| 148 | ("JSON safety", ["json_parse_any"]), |
| 149 | ] |
| 150 | |
| 151 | # --------------------------------------------------------------------------- |
| 152 | # File scanning |
| 153 | # --------------------------------------------------------------------------- |
| 154 | |
| 155 | |
| 156 | def _scan_file(filepath: Path) -> FileResult: |
| 157 | """Scan one TypeScript file for violations. |
| 158 | |
| 159 | Args: |
| 160 | filepath: Absolute or relative path to a ``.ts`` file. |
| 161 | |
| 162 | Returns: |
| 163 | :class:`FileResult` with per-pattern counts and line numbers. |
| 164 | """ |
| 165 | source = filepath.read_text(encoding="utf-8", errors="replace") |
| 166 | lines = source.splitlines() |
| 167 | |
| 168 | patterns: defaultdict[str, int] = defaultdict(int) |
| 169 | pattern_lines: defaultdict[str, list[int]] = defaultdict(list) |
| 170 | |
| 171 | for lineno, line in enumerate(lines, 1): |
| 172 | stripped = line.strip() |
| 173 | |
| 174 | # Skip blank lines and pure comment lines. |
| 175 | if not stripped or stripped.startswith("//") or stripped.startswith("*"): |
| 176 | continue |
| 177 | |
| 178 | # ``JSON.parse`` is fine when the same line narrows the result |
| 179 | # with ``as T`` or ``as unknown`` anywhere after the call. |
| 180 | # Simple heuristic: ``as `` appears on the line after ``JSON.parse``. |
| 181 | json_parse_narrowed = bool( |
| 182 | re.search(r"\bJSON\.parse\b", line) |
| 183 | and re.search(r"\bas\s+\w", line) |
| 184 | ) |
| 185 | |
| 186 | for name, pattern in _PATTERNS.items(): |
| 187 | if name == "json_parse_any" and json_parse_narrowed: |
| 188 | continue |
| 189 | if pattern.search(line): |
| 190 | patterns[name] += 1 |
| 191 | pattern_lines[name].append(lineno) |
| 192 | |
| 193 | return FileResult( |
| 194 | file=str(filepath), |
| 195 | patterns=dict(patterns), |
| 196 | pattern_lines=dict(pattern_lines), |
| 197 | ) |
| 198 | |
| 199 | |
| 200 | # --------------------------------------------------------------------------- |
| 201 | # Report generation |
| 202 | # --------------------------------------------------------------------------- |
| 203 | |
| 204 | |
| 205 | def generate_report(dirs: list[str]) -> Report: |
| 206 | """Scan all ``.ts`` files under *dirs* and return a full audit report. |
| 207 | |
| 208 | Args: |
| 209 | dirs: Directory paths to scan recursively. |
| 210 | |
| 211 | Returns: |
| 212 | :class:`Report` with summary, per-file breakdowns, and flat violations. |
| 213 | """ |
| 214 | files: list[Path] = [] |
| 215 | for d in dirs: |
| 216 | files.extend(sorted(Path(d).rglob("*.ts"))) |
| 217 | |
| 218 | totals: defaultdict[str, int] = defaultdict(int) |
| 219 | per_file: PerFileViolations = {} |
| 220 | top_offenders: list[Offender] = [] |
| 221 | all_violations: list[Violation] = [] |
| 222 | |
| 223 | for fp in files: |
| 224 | r = _scan_file(fp) |
| 225 | if not r["patterns"]: |
| 226 | continue |
| 227 | |
| 228 | per_file[r["file"]] = r["patterns"] |
| 229 | file_total = sum(r["patterns"].values()) |
| 230 | |
| 231 | top_offenders.append(Offender( |
| 232 | file=r["file"], |
| 233 | total=file_total, |
| 234 | patterns=r["patterns"], |
| 235 | )) |
| 236 | |
| 237 | for name, count in r["patterns"].items(): |
| 238 | totals[name] += count |
| 239 | for lineno in r["pattern_lines"].get(name, []): |
| 240 | all_violations.append(Violation( |
| 241 | file=r["file"], |
| 242 | line=lineno, |
| 243 | kind=name, |
| 244 | )) |
| 245 | |
| 246 | top_offenders.sort(key=operator.itemgetter("total"), reverse=True) |
| 247 | all_violations.sort(key=lambda v: (v["file"], v["line"])) |
| 248 | |
| 249 | total_violations = sum(totals.values()) |
| 250 | |
| 251 | return Report( |
| 252 | summary=ReportSummary( |
| 253 | total_files_scanned=len(files), |
| 254 | total_violations=total_violations, |
| 255 | ), |
| 256 | pattern_totals=dict(totals), |
| 257 | top_offenders=top_offenders, |
| 258 | per_file=per_file, |
| 259 | violations=all_violations, |
| 260 | ) |
| 261 | |
| 262 | |
| 263 | # --------------------------------------------------------------------------- |
| 264 | # Human-readable printer |
| 265 | # --------------------------------------------------------------------------- |
| 266 | |
| 267 | |
| 268 | def print_report(report: Report) -> None: |
| 269 | """Print the audit report to stdout in a human-readable format.""" |
| 270 | w = 70 |
| 271 | print("=" * w) |
| 272 | print(" TS TYPING AUDIT — Violation Report") |
| 273 | print("=" * w) |
| 274 | |
| 275 | s = report["summary"] |
| 276 | print(f" Files scanned: {s['total_files_scanned']}") |
| 277 | print(f" Total violations: {s['total_violations']}") |
| 278 | print() |
| 279 | |
| 280 | totals = report["pattern_totals"] |
| 281 | if totals: |
| 282 | print(" Pattern breakdown:") |
| 283 | for category, names in _CATEGORY_ORDER: |
| 284 | cat_counts = {n: totals[n] for n in names if n in totals} |
| 285 | if cat_counts: |
| 286 | print(f" {category}:") |
| 287 | for name, count in cat_counts.items(): |
| 288 | print(f" {name:<30} {count}") |
| 289 | print() |
| 290 | |
| 291 | print(" Violations (file:line [kind]):") |
| 292 | for v in report["violations"]: |
| 293 | print(f" {v['file']}:{v['line']} [{v['kind']}]") |
| 294 | print() |
| 295 | |
| 296 | top = report["top_offenders"][:15] |
| 297 | if top: |
| 298 | print(f" Top {len(top)} offenders:") |
| 299 | for o in top: |
| 300 | print(f" {o['total']:>4} {o['file']}") |
| 301 | else: |
| 302 | print(" Pattern breakdown: (none)") |
| 303 | print() |
| 304 | print(" Top 15 offenders:") |
| 305 | |
| 306 | print("=" * w) |
| 307 | |
| 308 | |
| 309 | # --------------------------------------------------------------------------- |
| 310 | # CLI |
| 311 | # --------------------------------------------------------------------------- |
| 312 | |
| 313 | |
| 314 | def _build_parser() -> argparse.ArgumentParser: |
| 315 | p = argparse.ArgumentParser( |
| 316 | description="TypeScript typing audit — zero-tolerance type-safety enforcement.", |
| 317 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 318 | ) |
| 319 | p.add_argument( |
| 320 | "--dirs", |
| 321 | nargs="+", |
| 322 | default=["src/ts/"], |
| 323 | metavar="DIR", |
| 324 | help="Directories to scan recursively (default: src/ts/).", |
| 325 | ) |
| 326 | p.add_argument( |
| 327 | "--json", |
| 328 | metavar="PATH", |
| 329 | help="Write full JSON report to PATH in addition to stdout.", |
| 330 | ) |
| 331 | p.add_argument( |
| 332 | "--max-violations", |
| 333 | type=int, |
| 334 | default=0, |
| 335 | metavar="N", |
| 336 | help="Exit 1 if total violations exceed N (default: 0).", |
| 337 | ) |
| 338 | return p |
| 339 | |
| 340 | |
| 341 | def main() -> None: |
| 342 | """Entry point.""" |
| 343 | args = _build_parser().parse_args() |
| 344 | report = generate_report(args.dirs) |
| 345 | print_report(report) |
| 346 | |
| 347 | if args.json: |
| 348 | Path(args.json).write_text( |
| 349 | json.dumps(report, indent=2), |
| 350 | encoding="utf-8", |
| 351 | ) |
| 352 | |
| 353 | if report["summary"]["total_violations"] > args.max_violations: |
| 354 | sys.exit(1) |
| 355 | |
| 356 | |
| 357 | if __name__ == "__main__": |
| 358 | main() |
File History
1 commit
sha256:0557470fbe1a91614f0dcd761387323ba93f0e8afe48856cda1609075ab064c7
fix: fall back to any indexed mpack in read_object_bytes wh…
Sonnet 4.6
patch
2 days ago