gabriel / muse public
coupling.py python
358 lines 13.3 KB
Raw
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf chore: bump version to 0.2.0rc14 Sonnet 4.6 patch 1 day ago
1 """muse code coupling — file co-change analysis.
2
3 Identifies files that change together most often. High co-change frequency
4 between two files signals a hidden dependency — they are logically coupled
5 even if there is no explicit import between them.
6
7 This is structurally impossible in Git at the semantic level: Git could
8 count raw file modifications, but ``muse code coupling`` counts only
9 *semantic* co-changes — commits where both files had AST-level symbol
10 modifications, not formatting-only edits (which Muse already separates
11 from real changes).
12
13 Commits that touch more than 50 files semantically are skipped — they
14 are almost always mass-renames or initial imports whose coupling signal is
15 noise, not signal.
16
17 Usage::
18
19 muse code coupling
20 muse code coupling --top 20
21 muse code coupling --from HEAD~30
22 muse code coupling --file muse/cli/commands/stable.py # focus on one file
23 muse code coupling --json # machine-readable
24
25 Output::
26
27 File co-change analysis — top 10 most coupled pairs
28 Commits analysed: 302
29
30 1 muse/cli/commands/symbol_log.py ↔ tests/test_code_commands.py co-changed in 3 commits
31 2 muse/plugins/code/_query.py ↔ tests/test_code_commands.py co-changed in 2 commits
32
33 High coupling = hidden dependency. Consider extracting a shared interface.
34 """
35
36 import argparse
37 import json
38 import logging
39 import pathlib
40 import sys
41 from typing import TypedDict
42
43 from muse.core.errors import ExitCode
44 from muse.core.repo import require_repo
45 from muse.core.refs import read_current_branch
46 from muse.core.commits import resolve_commit_ref
47 from muse.core.envelope import EnvelopeJson, make_envelope
48 from muse.core.timing import start_timer
49 from muse.plugins.code._query import file_pairs, touched_files, walk_commits_bfs
50 from muse.core.validation import clamp_int, sanitize_display
51
52 logger = logging.getLogger(__name__)
53
54 _DEFAULT_TOP = 20
55 _DEFAULT_MIN = 2
56 _DEFAULT_MAX_COMMITS = 10_000
57 # Commits touching more than this many files semantically are skipped —
58 # they are mass-renames or bulk imports with no meaningful coupling signal,
59 # and they would generate O(N²) pair combinations.
60 _MAX_FILES_PER_COMMIT = 50
61
62 # ---------------------------------------------------------------------------
63 # Typed output shape
64 # ---------------------------------------------------------------------------
65
66 class _CouplingFilters(TypedDict):
67 top: int
68 min_count: int
69 file: str | None
70 max_commits: int
71
72 class _CouplingPairDict(TypedDict, total=False):
73 file: str
74 partner: str
75 file_a: str
76 file_b: str
77 co_changes: int
78
79 class _CouplingOutputJson(EnvelopeJson):
80 """Top-level JSON output emitted by ``muse code coupling --json``.
81
82 Fields
83 ------
84 from_ref Exclusive start ref, or None (initial commit).
85 to_ref Inclusive end ref (branch name or HEAD).
86 commits_analysed Number of commits walked.
87 truncated True if scan hit --max-commits before reaching root.
88 filters Echo of the filter arguments used.
89 pairs Ranked co-change pairs (schema varies with --file).
90 """
91
92 from_ref: str | None
93 to_ref: str
94 commits_analysed: int
95 truncated: bool
96 filters: _CouplingFilters
97 pairs: list[_CouplingPairDict]
98
99 def _resolve_file_suffix(
100 file_filter: str,
101 all_files: set[str],
102 ) -> str | None:
103 """Return the unique file from *all_files* whose path ends with *file_filter*.
104
105 Returns ``None`` if no file matches. Prints a diagnostic and exits if
106 more than one file matches (ambiguous).
107 """
108 matches = [f for f in all_files if f == file_filter or f.endswith(f"/{file_filter}")]
109 if len(matches) == 1:
110 return matches[0]
111 if len(matches) > 1:
112 print(f"❌ --file {file_filter!r} is ambiguous — multiple matches:", file=sys.stderr)
113 for m in sorted(matches):
114 print(f" {m}", file=sys.stderr)
115 raise SystemExit(ExitCode.USER_ERROR)
116 return None
117
118 def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None:
119 """Register the coupling subcommand."""
120 parser = subparsers.add_parser(
121 "coupling",
122 help="Find files that change together most often — hidden dependencies.",
123 description=__doc__,
124 formatter_class=argparse.RawDescriptionHelpFormatter,
125 )
126 parser.add_argument(
127 "--top", "-n", type=int, default=_DEFAULT_TOP, metavar="N",
128 help=f"Number of pairs to show (default: {_DEFAULT_TOP}).",
129 )
130 parser.add_argument(
131 "--from", default=None, metavar="REF", dest="from_ref",
132 help="Exclusive start of the commit range (default: initial commit).",
133 )
134 parser.add_argument(
135 "--to", default=None, metavar="REF", dest="to_ref",
136 help="Inclusive end of the commit range (default: HEAD).",
137 )
138 parser.add_argument(
139 "--min", type=int, default=_DEFAULT_MIN, metavar="N", dest="min_count",
140 help=f"Minimum co-change count to include (default: {_DEFAULT_MIN}).",
141 )
142 parser.add_argument(
143 "--file", "-f", default=None, metavar="FILE", dest="file_filter",
144 help=(
145 "Focus on a single file: show only its coupling partners "
146 "and rank them by co-change count. "
147 "Accepts a suffix (e.g. 'billing.py') or a full path."
148 ),
149 )
150 parser.add_argument(
151 "--max-commits", type=int, default=_DEFAULT_MAX_COMMITS, metavar="N",
152 help=f"Maximum commits to scan (default: {_DEFAULT_MAX_COMMITS}).",
153 )
154 parser.add_argument(
155 "--json", "-j", action="store_true", dest="json_out",
156 help="Emit results as JSON.",
157 )
158 parser.set_defaults(func=run)
159
160 def run(args: argparse.Namespace) -> None:
161 """Find files that change together most often — hidden dependencies.
162
163 Identifies semantic co-change: file pairs that had AST-level symbol
164 modifications in the same commit. Stricter than raw file co-change —
165 formatting-only edits and non-code files are excluded.
166
167 Agent quickstart
168 ----------------
169 ::
170
171 muse code coupling --json
172 muse code coupling --file src/billing.py --json
173 muse code coupling --from v1.0.0 --to dev --min 5 --json
174
175 JSON fields
176 -----------
177 from_ref Exclusive start ref; ``null`` for initial commit.
178 to_ref Inclusive end ref.
179 commits_analysed Number of commits walked.
180 truncated ``true`` if ``--max-commits`` was reached.
181 filters Echo of filter arguments used.
182 pairs Ranked co-change pairs: ``file_a``, ``file_b``,
183 ``co_changes``, ``pct_a``, ``pct_b``.
184
185 Exit codes
186 ----------
187 0 Analysis complete.
188 1 Invalid arguments or ref not found.
189 2 Not inside a Muse repository.
190 """
191 elapsed = start_timer()
192 top: int = clamp_int(args.top, 1, 10_000, 'top')
193 from_ref: str | None = args.from_ref
194 to_ref: str | None = args.to_ref
195 min_count: int = clamp_int(args.min_count, 1, 100_000, 'min_count')
196 file_filter: str | None = args.file_filter
197 max_commits: int = clamp_int(args.max_commits, 1, 100_000, 'max_commits')
198 json_out: bool = args.json_out
199
200 if top < 1:
201 print("❌ --top must be >= 1.", file=sys.stderr)
202 raise SystemExit(ExitCode.USER_ERROR)
203 if min_count < 1:
204 print("❌ --min must be >= 1.", file=sys.stderr)
205 raise SystemExit(ExitCode.USER_ERROR)
206 if max_commits < 1:
207 print("❌ --max-commits must be >= 1.", file=sys.stderr)
208 raise SystemExit(ExitCode.USER_ERROR)
209
210 root = require_repo()
211 branch = read_current_branch(root)
212
213 to_commit = resolve_commit_ref(root, branch, to_ref)
214 if to_commit is None:
215 print(f"❌ Commit '{to_ref or 'HEAD'}' not found.", file=sys.stderr)
216 raise SystemExit(ExitCode.USER_ERROR)
217
218 stop_at: str | None = None
219 if from_ref is not None:
220 from_commit = resolve_commit_ref(root, branch, from_ref)
221 if from_commit is None:
222 print(f"❌ Commit '{from_ref}' not found.", file=sys.stderr)
223 raise SystemExit(ExitCode.USER_ERROR)
224 stop_at = from_commit.commit_id
225
226 commits, truncated = walk_commits_bfs(
227 root,
228 to_commit.commit_id,
229 max_commits=max_commits,
230 stop_at_commit_id=stop_at,
231 )
232
233 # Collect all file paths seen across any commit (for --file suffix matching).
234 all_seen_files: set[str] = set()
235 for commit in commits:
236 if commit.structured_delta is None:
237 continue
238 all_seen_files.update(touched_files(commit.structured_delta["ops"]))
239
240 # Resolve --file suffix to a canonical path, if requested.
241 resolved_file: str | None = None
242 if file_filter is not None:
243 resolved_file = _resolve_file_suffix(file_filter, all_seen_files)
244 if resolved_file is None:
245 # No commits touched this file — either the path is wrong or it has
246 # never had a semantic change.
247 if json_out:
248 print(json.dumps(_CouplingOutputJson(
249 **make_envelope(elapsed),
250 from_ref=from_ref,
251 to_ref=to_ref or branch,
252 commits_analysed=len(commits),
253 truncated=truncated,
254 filters=_CouplingFilters(
255 top=top, min_count=min_count,
256 file=file_filter, max_commits=max_commits,
257 ),
258 pairs=[],
259 )))
260 else:
261 print(f"\nNo semantic co-changes found for {file_filter!r}.")
262 print("The file may not exist or may never have had symbol-level changes.")
263 return
264
265 # Count co-changing pairs.
266 pair_counts: dict[tuple[str, str], int] = {}
267 for commit in commits:
268 if commit.structured_delta is None:
269 continue
270 files = touched_files(commit.structured_delta["ops"])
271 if len(files) < 2:
272 continue
273 # Skip commits that touched too many files — they add noise, not signal.
274 if len(files) > _MAX_FILES_PER_COMMIT:
275 continue
276 for a, b in file_pairs(files):
277 # When --file is set, only count pairs involving that file.
278 if resolved_file is not None and resolved_file not in (a, b):
279 continue
280 pair_counts[(a, b)] = pair_counts.get((a, b), 0) + 1
281
282 filtered = {pair: cnt for pair, cnt in pair_counts.items() if cnt >= min_count}
283 ranked = sorted(filtered.items(), key=lambda kv: kv[1], reverse=True)[:top]
284
285 if json_out:
286 pairs_out: list[_CouplingPairDict]
287 if resolved_file is not None:
288 # When --file is set, emit partner + count rather than a/b pair.
289 pairs_out = [
290 {
291 "file": resolved_file,
292 "partner": b if a == resolved_file else a,
293 "co_changes": c,
294 }
295 for (a, b), c in ranked
296 ]
297 else:
298 pairs_out = [
299 {"file_a": a, "file_b": b, "co_changes": c}
300 for (a, b), c in ranked
301 ]
302 print(json.dumps(_CouplingOutputJson(
303 **make_envelope(elapsed),
304 from_ref=from_ref,
305 to_ref=to_ref or branch,
306 commits_analysed=len(commits),
307 truncated=truncated,
308 filters=_CouplingFilters(
309 top=top,
310 min_count=min_count,
311 file=file_filter,
312 max_commits=max_commits,
313 ),
314 pairs=pairs_out,
315 )))
316 return
317
318 # Human-readable output.
319 if resolved_file is not None:
320 print(f"\nCoupling partners of {resolved_file}")
321 else:
322 print(f"\nFile co-change analysis — top {len(ranked)} most coupled pairs")
323 print(f"Commits analysed: {len(commits)}")
324 if truncated:
325 print(f"⚠️ Scan capped at {max_commits} commits — pass --max-commits to extend.")
326 print("")
327
328 if not ranked:
329 threshold_msg = f"{min_count}+" if min_count > 1 else "2+"
330 if resolved_file:
331 print(f" (no files co-changed with {sanitize_display(str(resolved_file))!r} {threshold_msg} times)")
332 else:
333 print(f" (no file pairs co-changed {threshold_msg} times)")
334 return
335
336 width = len(str(len(ranked)))
337
338 if resolved_file is not None:
339 # Partner-focused display.
340 max_partner = max(len(b if a == resolved_file else a) for (a, b), _ in ranked)
341 for rank, ((a, b), count) in enumerate(ranked, 1):
342 partner = b if a == resolved_file else a
343 label = "commit" if count == 1 else "commits"
344 print(f" {rank:>{width}} {sanitize_display(partner):<{max_partner}} co-changed in {count:>3} {label}")
345 else:
346 max_a = max(len(a) for (a, _), _ in ranked)
347 for rank, ((a, b), count) in enumerate(ranked, 1):
348 label = "commit" if count == 1 else "commits"
349 print(
350 f" {rank:>{width}} {sanitize_display(a):<{max_a}} ↔ {sanitize_display(b):<50} "
351 f"co-changed in {count:>3} {label}"
352 )
353
354 print("")
355 if resolved_file:
356 print(f"These files always change with {sanitize_display(str(resolved_file))}. Hidden coupling.")
357 else:
358 print("High coupling = hidden dependency. Consider extracting a shared interface.")
File History 1 commit
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf chore: bump version to 0.2.0rc14 Sonnet 4.6 patch 1 day ago