gabriel / musehub public
ts_audit.py python
358 lines 12.1 KB
Raw
sha256:7d6dd8f4a89e2d1fef2d84f6e65feaff51385d382f466766b7f690a22ec18e32 fix: fall back to DB ancestry check when mpack-only fast-fo… Sonnet 4.6 patch 7 days ago
1 """TypeScript typing audit — zero-tolerance enforcement for mission-critical code.
2
3 Complements ``npx tsc --noEmit`` (the primary type gate) by catching patterns
4 that TypeScript's compiler allows but that undermine type safety in practice.
5
6 Patterns checked
7 ----------------
8 *any* — ``as any``, ``: any``, ``Array<any>``, ``Promise<any>``, ``Record<string, any>``,
9 ``Map<any``, ``Set<any>``, ``<any>`` type parameters. Every ``any`` escapes the type
10 system and makes the surrounding code unverifiable. Use ``unknown`` and narrow.
11
12 *@ts-ignore / @ts-nocheck* — suppresses real errors. Absolute ban.
13
14 *Function type* — ``(fn: Function)`` carries no signature information; the compiler
15 cannot verify call sites. Use ``() => void`` or a named callable interface.
16
17 *Non-null assertion on DOM queries* — ``document.getElementById(...)!`` (and
18 ``querySelector!``, ``querySelectorAll!``). These crash silently when the element
19 is absent. Narrow with ``if (!el) return;`` instead.
20
21 *JSON.parse without explicit cast* — ``JSON.parse(...)`` returns ``any``. Every call
22 site must either narrow via ``as T`` or validate through a type guard.
23
24 *as unknown as X* is explicitly NOT flagged — it is the idiomatic safe double-cast
25 for DOM / window extension patterns where the TypeScript type system has no better
26 mechanism.
27
28 Usage::
29
30 python tools/ts_audit.py # src/ts/
31 python tools/ts_audit.py --dirs src/ts/
32 python tools/ts_audit.py --json artifacts/ts_audit.json
33 """
34
35 from __future__ import annotations
36
37 import argparse
38 import json
39 import operator
40 import re
41 import sys
42 from collections import defaultdict
43 from pathlib import Path
44 from typing import TypedDict
45
46 # ---------------------------------------------------------------------------
47 # Type aliases
48 # ---------------------------------------------------------------------------
49
50 type PatternCounts = dict[str, int]
51 type PatternLines = dict[str, list[int]]
52 type PatternMap = dict[str, re.Pattern[str]]
53 type PerFileViolations = dict[str, PatternCounts]
54
55 # ---------------------------------------------------------------------------
56 # Data shapes
57 # ---------------------------------------------------------------------------
58
59
60 class Violation(TypedDict):
61 """One pattern match at one source location."""
62
63 file: str
64 line: int
65 kind: str
66
67
68 class FileResult(TypedDict):
69 """Typing-violation summary for one TypeScript source file."""
70
71 file: str
72 patterns: PatternCounts
73 pattern_lines: PatternLines
74
75
76 class Offender(TypedDict):
77 """A file with at least one violation, ranked by count."""
78
79 file: str
80 total: int
81 patterns: PatternCounts
82
83
84 class ReportSummary(TypedDict):
85 """High-level aggregate counts for the entire scan."""
86
87 total_files_scanned: int
88 total_violations: int
89
90
91 class Report(TypedDict):
92 """Full audit report produced by :func:`generate_report`."""
93
94 summary: ReportSummary
95 pattern_totals: PatternCounts
96 top_offenders: list[Offender]
97 per_file: PerFileViolations
98 violations: list[Violation]
99
100
101 # ---------------------------------------------------------------------------
102 # Pattern registry
103 # ---------------------------------------------------------------------------
104
105 #: Patterns that count toward the violation total.
106 #: Each key is a stable identifier used in JSON output.
107 _PATTERNS: PatternMap = {
108 # any-as-type ─────────────────────────────────────────────────────────
109 # ``as any`` — explicit escape; conceals broken upstream type.
110 # Negative lookbehind excludes ``as unknown as`` (the safe double-cast).
111 "as_any": re.compile(r"(?<!unknown )\bas\s+any\b"),
112 # ``: any`` — explicit annotation; same severity.
113 "annot_any": re.compile(r":\s*any\b"),
114 # Generic slots filled with any — Array<any>, Promise<any>, etc.
115 "generic_any": re.compile(
116 r"\b(?:Array|Promise|Record|Map|Set|Readonly|Partial|Required|"
117 r"NonNullable|Awaited|ReturnType|Parameters)\s*<[^>]*\bany\b"
118 ),
119 # <any> inline type argument — e.g. foo<any>(...)
120 "type_arg_any": re.compile(r"<any>"),
121 # @ts-ignore / @ts-nocheck ────────────────────────────────────────────
122 "ts_ignore": re.compile(r"//\s*@ts-ignore"),
123 "ts_nocheck": re.compile(r"//\s*@ts-nocheck"),
124 # Function type ───────────────────────────────────────────────────────
125 # Matches ``: Function`` and ``<Function>`` but not ``Function.prototype``,
126 # ``Function.bind``, or import-style usages.
127 "function_type": re.compile(r"(?::\s*|<)Function\b(?!\.|\s*prototype)"),
128 # Non-null assertion on DOM queries ───────────────────────────────────
129 # ``getElementById(...)!`` crashes when the element is absent.
130 # Narrow with ``if (!el) return;`` instead.
131 "nonnull_dom": re.compile(
132 r"\b(?:getElementById|querySelector|querySelectorAll|"
133 r"closest|parentElement)\s*\([^)]*\)\s*!"
134 ),
135 # JSON.parse without narrowing ────────────────────────────────────────
136 # ``JSON.parse(...)`` returns ``any``. Every call site must cast or guard.
137 # Pattern fires on the bare call; ``as T`` or ``as unknown`` on the same
138 # line is sufficient to suppress — those lines are skipped in the loop.
139 "json_parse_any": re.compile(r"\bJSON\.parse\s*\("),
140 }
141
142 #: Display order for the human-readable report.
143 _CATEGORY_ORDER: list[tuple[str, list[str]]] = [
144 ("any escapes", ["as_any", "annot_any", "generic_any", "type_arg_any"]),
145 ("type: suppression", ["ts_ignore", "ts_nocheck"]),
146 ("unsafe types", ["function_type"]),
147 ("DOM safety", ["nonnull_dom"]),
148 ("JSON safety", ["json_parse_any"]),
149 ]
150
151 # ---------------------------------------------------------------------------
152 # File scanning
153 # ---------------------------------------------------------------------------
154
155
156 def _scan_file(filepath: Path) -> FileResult:
157 """Scan one TypeScript file for violations.
158
159 Args:
160 filepath: Absolute or relative path to a ``.ts`` file.
161
162 Returns:
163 :class:`FileResult` with per-pattern counts and line numbers.
164 """
165 source = filepath.read_text(encoding="utf-8", errors="replace")
166 lines = source.splitlines()
167
168 patterns: defaultdict[str, int] = defaultdict(int)
169 pattern_lines: defaultdict[str, list[int]] = defaultdict(list)
170
171 for lineno, line in enumerate(lines, 1):
172 stripped = line.strip()
173
174 # Skip blank lines and pure comment lines.
175 if not stripped or stripped.startswith("//") or stripped.startswith("*"):
176 continue
177
178 # ``JSON.parse`` is fine when the same line narrows the result
179 # with ``as T`` or ``as unknown`` anywhere after the call.
180 # Simple heuristic: ``as `` appears on the line after ``JSON.parse``.
181 json_parse_narrowed = bool(
182 re.search(r"\bJSON\.parse\b", line)
183 and re.search(r"\bas\s+\w", line)
184 )
185
186 for name, pattern in _PATTERNS.items():
187 if name == "json_parse_any" and json_parse_narrowed:
188 continue
189 if pattern.search(line):
190 patterns[name] += 1
191 pattern_lines[name].append(lineno)
192
193 return FileResult(
194 file=str(filepath),
195 patterns=dict(patterns),
196 pattern_lines=dict(pattern_lines),
197 )
198
199
200 # ---------------------------------------------------------------------------
201 # Report generation
202 # ---------------------------------------------------------------------------
203
204
205 def generate_report(dirs: list[str]) -> Report:
206 """Scan all ``.ts`` files under *dirs* and return a full audit report.
207
208 Args:
209 dirs: Directory paths to scan recursively.
210
211 Returns:
212 :class:`Report` with summary, per-file breakdowns, and flat violations.
213 """
214 files: list[Path] = []
215 for d in dirs:
216 files.extend(sorted(Path(d).rglob("*.ts")))
217
218 totals: defaultdict[str, int] = defaultdict(int)
219 per_file: PerFileViolations = {}
220 top_offenders: list[Offender] = []
221 all_violations: list[Violation] = []
222
223 for fp in files:
224 r = _scan_file(fp)
225 if not r["patterns"]:
226 continue
227
228 per_file[r["file"]] = r["patterns"]
229 file_total = sum(r["patterns"].values())
230
231 top_offenders.append(Offender(
232 file=r["file"],
233 total=file_total,
234 patterns=r["patterns"],
235 ))
236
237 for name, count in r["patterns"].items():
238 totals[name] += count
239 for lineno in r["pattern_lines"].get(name, []):
240 all_violations.append(Violation(
241 file=r["file"],
242 line=lineno,
243 kind=name,
244 ))
245
246 top_offenders.sort(key=operator.itemgetter("total"), reverse=True)
247 all_violations.sort(key=lambda v: (v["file"], v["line"]))
248
249 total_violations = sum(totals.values())
250
251 return Report(
252 summary=ReportSummary(
253 total_files_scanned=len(files),
254 total_violations=total_violations,
255 ),
256 pattern_totals=dict(totals),
257 top_offenders=top_offenders,
258 per_file=per_file,
259 violations=all_violations,
260 )
261
262
263 # ---------------------------------------------------------------------------
264 # Human-readable printer
265 # ---------------------------------------------------------------------------
266
267
268 def print_report(report: Report) -> None:
269 """Print the audit report to stdout in a human-readable format."""
270 w = 70
271 print("=" * w)
272 print(" TS TYPING AUDIT — Violation Report")
273 print("=" * w)
274
275 s = report["summary"]
276 print(f" Files scanned: {s['total_files_scanned']}")
277 print(f" Total violations: {s['total_violations']}")
278 print()
279
280 totals = report["pattern_totals"]
281 if totals:
282 print(" Pattern breakdown:")
283 for category, names in _CATEGORY_ORDER:
284 cat_counts = {n: totals[n] for n in names if n in totals}
285 if cat_counts:
286 print(f" {category}:")
287 for name, count in cat_counts.items():
288 print(f" {name:<30} {count}")
289 print()
290
291 print(" Violations (file:line [kind]):")
292 for v in report["violations"]:
293 print(f" {v['file']}:{v['line']} [{v['kind']}]")
294 print()
295
296 top = report["top_offenders"][:15]
297 if top:
298 print(f" Top {len(top)} offenders:")
299 for o in top:
300 print(f" {o['total']:>4} {o['file']}")
301 else:
302 print(" Pattern breakdown: (none)")
303 print()
304 print(" Top 15 offenders:")
305
306 print("=" * w)
307
308
309 # ---------------------------------------------------------------------------
310 # CLI
311 # ---------------------------------------------------------------------------
312
313
314 def _build_parser() -> argparse.ArgumentParser:
315 p = argparse.ArgumentParser(
316 description="TypeScript typing audit — zero-tolerance type-safety enforcement.",
317 formatter_class=argparse.RawDescriptionHelpFormatter,
318 )
319 p.add_argument(
320 "--dirs",
321 nargs="+",
322 default=["src/ts/"],
323 metavar="DIR",
324 help="Directories to scan recursively (default: src/ts/).",
325 )
326 p.add_argument(
327 "--json",
328 metavar="PATH",
329 help="Write full JSON report to PATH in addition to stdout.",
330 )
331 p.add_argument(
332 "--max-violations",
333 type=int,
334 default=0,
335 metavar="N",
336 help="Exit 1 if total violations exceed N (default: 0).",
337 )
338 return p
339
340
341 def main() -> None:
342 """Entry point."""
343 args = _build_parser().parse_args()
344 report = generate_report(args.dirs)
345 print_report(report)
346
347 if args.json:
348 Path(args.json).write_text(
349 json.dumps(report, indent=2),
350 encoding="utf-8",
351 )
352
353 if report["summary"]["total_violations"] > args.max_violations:
354 sys.exit(1)
355
356
357 if __name__ == "__main__":
358 main()
File History 1 commit
sha256:7d6dd8f4a89e2d1fef2d84f6e65feaff51385d382f466766b7f690a22ec18e32 fix: fall back to DB ancestry check when mpack-only fast-fo… Sonnet 4.6 patch 7 days ago