coupling.py
python
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf
chore: bump version to 0.2.0rc14
Sonnet 4.6
patch
1 day ago
| 1 | """muse code coupling — file co-change analysis. |
| 2 | |
| 3 | Identifies files that change together most often. High co-change frequency |
| 4 | between two files signals a hidden dependency — they are logically coupled |
| 5 | even if there is no explicit import between them. |
| 6 | |
| 7 | This is structurally impossible in Git at the semantic level: Git could |
| 8 | count raw file modifications, but ``muse code coupling`` counts only |
| 9 | *semantic* co-changes — commits where both files had AST-level symbol |
| 10 | modifications, not formatting-only edits (which Muse already separates |
| 11 | from real changes). |
| 12 | |
| 13 | Commits that touch more than 50 files semantically are skipped — they |
| 14 | are almost always mass-renames or initial imports whose coupling signal is |
| 15 | noise, not signal. |
| 16 | |
| 17 | Usage:: |
| 18 | |
| 19 | muse code coupling |
| 20 | muse code coupling --top 20 |
| 21 | muse code coupling --from HEAD~30 |
| 22 | muse code coupling --file muse/cli/commands/stable.py # focus on one file |
| 23 | muse code coupling --json # machine-readable |
| 24 | |
| 25 | Output:: |
| 26 | |
| 27 | File co-change analysis — top 10 most coupled pairs |
| 28 | Commits analysed: 302 |
| 29 | |
| 30 | 1 muse/cli/commands/symbol_log.py ↔ tests/test_code_commands.py co-changed in 3 commits |
| 31 | 2 muse/plugins/code/_query.py ↔ tests/test_code_commands.py co-changed in 2 commits |
| 32 | |
| 33 | High coupling = hidden dependency. Consider extracting a shared interface. |
| 34 | """ |
| 35 | |
| 36 | import argparse |
| 37 | import json |
| 38 | import logging |
| 39 | import pathlib |
| 40 | import sys |
| 41 | from typing import TypedDict |
| 42 | |
| 43 | from muse.core.errors import ExitCode |
| 44 | from muse.core.repo import require_repo |
| 45 | from muse.core.refs import read_current_branch |
| 46 | from muse.core.commits import resolve_commit_ref |
| 47 | from muse.core.envelope import EnvelopeJson, make_envelope |
| 48 | from muse.core.timing import start_timer |
| 49 | from muse.plugins.code._query import file_pairs, touched_files, walk_commits_bfs |
| 50 | from muse.core.validation import clamp_int, sanitize_display |
| 51 | |
| 52 | logger = logging.getLogger(__name__) |
| 53 | |
| 54 | _DEFAULT_TOP = 20 |
| 55 | _DEFAULT_MIN = 2 |
| 56 | _DEFAULT_MAX_COMMITS = 10_000 |
| 57 | # Commits touching more than this many files semantically are skipped — |
| 58 | # they are mass-renames or bulk imports with no meaningful coupling signal, |
| 59 | # and they would generate O(N²) pair combinations. |
| 60 | _MAX_FILES_PER_COMMIT = 50 |
| 61 | |
| 62 | # --------------------------------------------------------------------------- |
| 63 | # Typed output shape |
| 64 | # --------------------------------------------------------------------------- |
| 65 | |
| 66 | class _CouplingFilters(TypedDict): |
| 67 | top: int |
| 68 | min_count: int |
| 69 | file: str | None |
| 70 | max_commits: int |
| 71 | |
| 72 | class _CouplingPairDict(TypedDict, total=False): |
| 73 | file: str |
| 74 | partner: str |
| 75 | file_a: str |
| 76 | file_b: str |
| 77 | co_changes: int |
| 78 | |
| 79 | class _CouplingOutputJson(EnvelopeJson): |
| 80 | """Top-level JSON output emitted by ``muse code coupling --json``. |
| 81 | |
| 82 | Fields |
| 83 | ------ |
| 84 | from_ref Exclusive start ref, or None (initial commit). |
| 85 | to_ref Inclusive end ref (branch name or HEAD). |
| 86 | commits_analysed Number of commits walked. |
| 87 | truncated True if scan hit --max-commits before reaching root. |
| 88 | filters Echo of the filter arguments used. |
| 89 | pairs Ranked co-change pairs (schema varies with --file). |
| 90 | """ |
| 91 | |
| 92 | from_ref: str | None |
| 93 | to_ref: str |
| 94 | commits_analysed: int |
| 95 | truncated: bool |
| 96 | filters: _CouplingFilters |
| 97 | pairs: list[_CouplingPairDict] |
| 98 | |
| 99 | def _resolve_file_suffix( |
| 100 | file_filter: str, |
| 101 | all_files: set[str], |
| 102 | ) -> str | None: |
| 103 | """Return the unique file from *all_files* whose path ends with *file_filter*. |
| 104 | |
| 105 | Returns ``None`` if no file matches. Prints a diagnostic and exits if |
| 106 | more than one file matches (ambiguous). |
| 107 | """ |
| 108 | matches = [f for f in all_files if f == file_filter or f.endswith(f"/{file_filter}")] |
| 109 | if len(matches) == 1: |
| 110 | return matches[0] |
| 111 | if len(matches) > 1: |
| 112 | print(f"❌ --file {file_filter!r} is ambiguous — multiple matches:", file=sys.stderr) |
| 113 | for m in sorted(matches): |
| 114 | print(f" {m}", file=sys.stderr) |
| 115 | raise SystemExit(ExitCode.USER_ERROR) |
| 116 | return None |
| 117 | |
| 118 | def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None: |
| 119 | """Register the coupling subcommand.""" |
| 120 | parser = subparsers.add_parser( |
| 121 | "coupling", |
| 122 | help="Find files that change together most often — hidden dependencies.", |
| 123 | description=__doc__, |
| 124 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 125 | ) |
| 126 | parser.add_argument( |
| 127 | "--top", "-n", type=int, default=_DEFAULT_TOP, metavar="N", |
| 128 | help=f"Number of pairs to show (default: {_DEFAULT_TOP}).", |
| 129 | ) |
| 130 | parser.add_argument( |
| 131 | "--from", default=None, metavar="REF", dest="from_ref", |
| 132 | help="Exclusive start of the commit range (default: initial commit).", |
| 133 | ) |
| 134 | parser.add_argument( |
| 135 | "--to", default=None, metavar="REF", dest="to_ref", |
| 136 | help="Inclusive end of the commit range (default: HEAD).", |
| 137 | ) |
| 138 | parser.add_argument( |
| 139 | "--min", type=int, default=_DEFAULT_MIN, metavar="N", dest="min_count", |
| 140 | help=f"Minimum co-change count to include (default: {_DEFAULT_MIN}).", |
| 141 | ) |
| 142 | parser.add_argument( |
| 143 | "--file", "-f", default=None, metavar="FILE", dest="file_filter", |
| 144 | help=( |
| 145 | "Focus on a single file: show only its coupling partners " |
| 146 | "and rank them by co-change count. " |
| 147 | "Accepts a suffix (e.g. 'billing.py') or a full path." |
| 148 | ), |
| 149 | ) |
| 150 | parser.add_argument( |
| 151 | "--max-commits", type=int, default=_DEFAULT_MAX_COMMITS, metavar="N", |
| 152 | help=f"Maximum commits to scan (default: {_DEFAULT_MAX_COMMITS}).", |
| 153 | ) |
| 154 | parser.add_argument( |
| 155 | "--json", "-j", action="store_true", dest="json_out", |
| 156 | help="Emit results as JSON.", |
| 157 | ) |
| 158 | parser.set_defaults(func=run) |
| 159 | |
| 160 | def run(args: argparse.Namespace) -> None: |
| 161 | """Find files that change together most often — hidden dependencies. |
| 162 | |
| 163 | Identifies semantic co-change: file pairs that had AST-level symbol |
| 164 | modifications in the same commit. Stricter than raw file co-change — |
| 165 | formatting-only edits and non-code files are excluded. |
| 166 | |
| 167 | Agent quickstart |
| 168 | ---------------- |
| 169 | :: |
| 170 | |
| 171 | muse code coupling --json |
| 172 | muse code coupling --file src/billing.py --json |
| 173 | muse code coupling --from v1.0.0 --to dev --min 5 --json |
| 174 | |
| 175 | JSON fields |
| 176 | ----------- |
| 177 | from_ref Exclusive start ref; ``null`` for initial commit. |
| 178 | to_ref Inclusive end ref. |
| 179 | commits_analysed Number of commits walked. |
| 180 | truncated ``true`` if ``--max-commits`` was reached. |
| 181 | filters Echo of filter arguments used. |
| 182 | pairs Ranked co-change pairs: ``file_a``, ``file_b``, |
| 183 | ``co_changes``, ``pct_a``, ``pct_b``. |
| 184 | |
| 185 | Exit codes |
| 186 | ---------- |
| 187 | 0 Analysis complete. |
| 188 | 1 Invalid arguments or ref not found. |
| 189 | 2 Not inside a Muse repository. |
| 190 | """ |
| 191 | elapsed = start_timer() |
| 192 | top: int = clamp_int(args.top, 1, 10_000, 'top') |
| 193 | from_ref: str | None = args.from_ref |
| 194 | to_ref: str | None = args.to_ref |
| 195 | min_count: int = clamp_int(args.min_count, 1, 100_000, 'min_count') |
| 196 | file_filter: str | None = args.file_filter |
| 197 | max_commits: int = clamp_int(args.max_commits, 1, 100_000, 'max_commits') |
| 198 | json_out: bool = args.json_out |
| 199 | |
| 200 | if top < 1: |
| 201 | print("❌ --top must be >= 1.", file=sys.stderr) |
| 202 | raise SystemExit(ExitCode.USER_ERROR) |
| 203 | if min_count < 1: |
| 204 | print("❌ --min must be >= 1.", file=sys.stderr) |
| 205 | raise SystemExit(ExitCode.USER_ERROR) |
| 206 | if max_commits < 1: |
| 207 | print("❌ --max-commits must be >= 1.", file=sys.stderr) |
| 208 | raise SystemExit(ExitCode.USER_ERROR) |
| 209 | |
| 210 | root = require_repo() |
| 211 | branch = read_current_branch(root) |
| 212 | |
| 213 | to_commit = resolve_commit_ref(root, branch, to_ref) |
| 214 | if to_commit is None: |
| 215 | print(f"❌ Commit '{to_ref or 'HEAD'}' not found.", file=sys.stderr) |
| 216 | raise SystemExit(ExitCode.USER_ERROR) |
| 217 | |
| 218 | stop_at: str | None = None |
| 219 | if from_ref is not None: |
| 220 | from_commit = resolve_commit_ref(root, branch, from_ref) |
| 221 | if from_commit is None: |
| 222 | print(f"❌ Commit '{from_ref}' not found.", file=sys.stderr) |
| 223 | raise SystemExit(ExitCode.USER_ERROR) |
| 224 | stop_at = from_commit.commit_id |
| 225 | |
| 226 | commits, truncated = walk_commits_bfs( |
| 227 | root, |
| 228 | to_commit.commit_id, |
| 229 | max_commits=max_commits, |
| 230 | stop_at_commit_id=stop_at, |
| 231 | ) |
| 232 | |
| 233 | # Collect all file paths seen across any commit (for --file suffix matching). |
| 234 | all_seen_files: set[str] = set() |
| 235 | for commit in commits: |
| 236 | if commit.structured_delta is None: |
| 237 | continue |
| 238 | all_seen_files.update(touched_files(commit.structured_delta["ops"])) |
| 239 | |
| 240 | # Resolve --file suffix to a canonical path, if requested. |
| 241 | resolved_file: str | None = None |
| 242 | if file_filter is not None: |
| 243 | resolved_file = _resolve_file_suffix(file_filter, all_seen_files) |
| 244 | if resolved_file is None: |
| 245 | # No commits touched this file — either the path is wrong or it has |
| 246 | # never had a semantic change. |
| 247 | if json_out: |
| 248 | print(json.dumps(_CouplingOutputJson( |
| 249 | **make_envelope(elapsed), |
| 250 | from_ref=from_ref, |
| 251 | to_ref=to_ref or branch, |
| 252 | commits_analysed=len(commits), |
| 253 | truncated=truncated, |
| 254 | filters=_CouplingFilters( |
| 255 | top=top, min_count=min_count, |
| 256 | file=file_filter, max_commits=max_commits, |
| 257 | ), |
| 258 | pairs=[], |
| 259 | ))) |
| 260 | else: |
| 261 | print(f"\nNo semantic co-changes found for {file_filter!r}.") |
| 262 | print("The file may not exist or may never have had symbol-level changes.") |
| 263 | return |
| 264 | |
| 265 | # Count co-changing pairs. |
| 266 | pair_counts: dict[tuple[str, str], int] = {} |
| 267 | for commit in commits: |
| 268 | if commit.structured_delta is None: |
| 269 | continue |
| 270 | files = touched_files(commit.structured_delta["ops"]) |
| 271 | if len(files) < 2: |
| 272 | continue |
| 273 | # Skip commits that touched too many files — they add noise, not signal. |
| 274 | if len(files) > _MAX_FILES_PER_COMMIT: |
| 275 | continue |
| 276 | for a, b in file_pairs(files): |
| 277 | # When --file is set, only count pairs involving that file. |
| 278 | if resolved_file is not None and resolved_file not in (a, b): |
| 279 | continue |
| 280 | pair_counts[(a, b)] = pair_counts.get((a, b), 0) + 1 |
| 281 | |
| 282 | filtered = {pair: cnt for pair, cnt in pair_counts.items() if cnt >= min_count} |
| 283 | ranked = sorted(filtered.items(), key=lambda kv: kv[1], reverse=True)[:top] |
| 284 | |
| 285 | if json_out: |
| 286 | pairs_out: list[_CouplingPairDict] |
| 287 | if resolved_file is not None: |
| 288 | # When --file is set, emit partner + count rather than a/b pair. |
| 289 | pairs_out = [ |
| 290 | { |
| 291 | "file": resolved_file, |
| 292 | "partner": b if a == resolved_file else a, |
| 293 | "co_changes": c, |
| 294 | } |
| 295 | for (a, b), c in ranked |
| 296 | ] |
| 297 | else: |
| 298 | pairs_out = [ |
| 299 | {"file_a": a, "file_b": b, "co_changes": c} |
| 300 | for (a, b), c in ranked |
| 301 | ] |
| 302 | print(json.dumps(_CouplingOutputJson( |
| 303 | **make_envelope(elapsed), |
| 304 | from_ref=from_ref, |
| 305 | to_ref=to_ref or branch, |
| 306 | commits_analysed=len(commits), |
| 307 | truncated=truncated, |
| 308 | filters=_CouplingFilters( |
| 309 | top=top, |
| 310 | min_count=min_count, |
| 311 | file=file_filter, |
| 312 | max_commits=max_commits, |
| 313 | ), |
| 314 | pairs=pairs_out, |
| 315 | ))) |
| 316 | return |
| 317 | |
| 318 | # Human-readable output. |
| 319 | if resolved_file is not None: |
| 320 | print(f"\nCoupling partners of {resolved_file}") |
| 321 | else: |
| 322 | print(f"\nFile co-change analysis — top {len(ranked)} most coupled pairs") |
| 323 | print(f"Commits analysed: {len(commits)}") |
| 324 | if truncated: |
| 325 | print(f"⚠️ Scan capped at {max_commits} commits — pass --max-commits to extend.") |
| 326 | print("") |
| 327 | |
| 328 | if not ranked: |
| 329 | threshold_msg = f"{min_count}+" if min_count > 1 else "2+" |
| 330 | if resolved_file: |
| 331 | print(f" (no files co-changed with {sanitize_display(str(resolved_file))!r} {threshold_msg} times)") |
| 332 | else: |
| 333 | print(f" (no file pairs co-changed {threshold_msg} times)") |
| 334 | return |
| 335 | |
| 336 | width = len(str(len(ranked))) |
| 337 | |
| 338 | if resolved_file is not None: |
| 339 | # Partner-focused display. |
| 340 | max_partner = max(len(b if a == resolved_file else a) for (a, b), _ in ranked) |
| 341 | for rank, ((a, b), count) in enumerate(ranked, 1): |
| 342 | partner = b if a == resolved_file else a |
| 343 | label = "commit" if count == 1 else "commits" |
| 344 | print(f" {rank:>{width}} {sanitize_display(partner):<{max_partner}} co-changed in {count:>3} {label}") |
| 345 | else: |
| 346 | max_a = max(len(a) for (a, _), _ in ranked) |
| 347 | for rank, ((a, b), count) in enumerate(ranked, 1): |
| 348 | label = "commit" if count == 1 else "commits" |
| 349 | print( |
| 350 | f" {rank:>{width}} {sanitize_display(a):<{max_a}} ↔ {sanitize_display(b):<50} " |
| 351 | f"co-changed in {count:>3} {label}" |
| 352 | ) |
| 353 | |
| 354 | print("") |
| 355 | if resolved_file: |
| 356 | print(f"These files always change with {sanitize_display(str(resolved_file))}. Hidden coupling.") |
| 357 | else: |
| 358 | print("High coupling = hidden dependency. Consider extracting a shared interface.") |
File History
1 commit
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf
chore: bump version to 0.2.0rc14
Sonnet 4.6
patch
1 day ago