gabriel / musehub public
proposal_simulation.py python
261 lines 9.7 KB
Raw
sha256:94ef169c149a452bff7c604ded8b280b19bd477c2dabcb56972780b0b784c7aa Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: As… Human 2 days ago
1 """Proposal Simulation Engine — Phase 4 of the Proposal Reimagination (issue #37).
2
3 Three pure simulation functions, each returning a typed result dict:
4
5 simulate_conflict_scan(to, from, *, ancestor, strategy, selective_domains)
6 Runs the merge strategy dry-run and returns per-domain conflict breakdown.
7
8 simulate_risk_projection(to, from, *, ancestor, current_dimensional_risk, strategy)
9 Projects post-merge dimensional risk scores using change ratio + conflict
10 density + existing risk as inputs.
11
12 simulate_dependency_order(dag)
13 Runs Kahn's topological sort on the live DAG and groups nodes into
14 parallel phases. Returns cycle info if a cycle is detected.
15
16 All three are pure (no I/O). DB read/write lives in musehub_proposals.py.
17 """
18
19 from __future__ import annotations
20
21 from typing import TypedDict
22
23 from musehub.services.proposal_dag import CycleError, ProposalDag, topological_sort
24 from musehub.services.proposal_merge_strategies import classify_domain, execute_merge_strategy
25
26 StrDict = dict[str, str]
27
28
29 class ConflictScanResult(TypedDict):
30 conflict_count: int
31 conflicting_files: list[str]
32 conflicts_by_domain: dict[str, list[str]]
33 files_added: int
34 files_modified: int
35 files_removed: int
36 domains_affected: list[str]
37 strategy_used: str
38
39
40 class RiskProjectionResult(TypedDict):
41 projected_domain_risk: dict[str, float]
42 overall_projected_risk: float
43 risk_band: str
44 risk_delta: float
45 conflict_count: int
46 files_changed_count: int
47 domains_affected: list[str]
48
49
50 class DependencyOrderResult(TypedDict):
51 order: list[str]
52 phases: list[list[str]]
53 node_count: int
54 phase_count: int
55 cycle_detected: bool
56 cycle_ids: list[str]
57
58
59 # ─────────────────────────────────────────────────────────────────────────────
60 # conflict_scan
61 # ─────────────────────────────────────────────────────────────────────────────
62
63
64 def simulate_conflict_scan(
65 to_manifest: StrDict,
66 from_manifest: StrDict,
67 *,
68 ancestor_manifest: StrDict | None = None,
69 strategy: str = "overlay",
70 selective_domains: list[str] | None = None,
71 ) -> ConflictScanResult:
72 """Dry-run the merge strategy and return a conflict breakdown.
73
74 Result keys:
75 conflict_count int — total conflicting files
76 conflicting_files list — paths that conflict
77 conflicts_by_domain dict — domain → [paths]
78 files_added int
79 files_modified int
80 files_removed int
81 domains_affected list — domains touched by the merge
82 strategy_used str
83 """
84 result = execute_merge_strategy(
85 strategy,
86 to_manifest,
87 from_manifest,
88 ancestor_manifest=ancestor_manifest,
89 selective_domains=selective_domains,
90 )
91
92 conflicts_by_domain: dict[str, list[str]] = {}
93 for c in result.conflicts:
94 d = classify_domain(c.path)
95 conflicts_by_domain.setdefault(d, []).append(c.path)
96
97 return {
98 "conflict_count": len(result.conflicts),
99 "conflicting_files": [c.path for c in result.conflicts],
100 "conflicts_by_domain": conflicts_by_domain,
101 "files_added": result.files_added,
102 "files_modified": result.files_modified,
103 "files_removed": result.files_removed,
104 "domains_affected": sorted(result.domains_merged),
105 "strategy_used": result.strategy,
106 }
107
108
109 # ─────────────────────────────────────────────────────────────────────────────
110 # risk_projection
111 # ─────────────────────────────────────────────────────────────────────────────
112
113
114 def simulate_risk_projection(
115 to_manifest: StrDict,
116 from_manifest: StrDict,
117 *,
118 ancestor_manifest: StrDict | None = None,
119 current_dimensional_risk: dict[str, float] | None = None,
120 strategy: str = "overlay",
121 selective_domains: list[str] | None = None,
122 ) -> RiskProjectionResult:
123 """Project dimensional risk scores for the post-merge state.
124
125 Risk per domain is a weighted blend of three signals:
126 40% change_ratio — (files changed in domain) / (total files in domain in to_manifest)
127 40% conflict_ratio — (conflicting files in domain) / (changed files in domain)
128 20% existing_risk — current_dimensional_risk[domain] if provided
129
130 Result keys:
131 projected_domain_risk dict[str, float] — per-domain risk score in [0, 1]
132 overall_projected_risk float — mean across all affected domains
133 risk_band str — "low" | "medium" | "high"
134 risk_delta float — overall_projected_risk − pre_merge_risk
135 conflict_count int
136 files_changed_count int
137 domains_affected list[str]
138 """
139 existing = current_dimensional_risk or {}
140
141 merge_result = execute_merge_strategy(
142 strategy,
143 to_manifest,
144 from_manifest,
145 ancestor_manifest=ancestor_manifest,
146 selective_domains=selective_domains,
147 )
148
149 # Files that differ between to and from (added, modified, or removed)
150 all_paths = set(to_manifest) | set(from_manifest)
151 changed_files: set[str] = {
152 p for p in all_paths
153 if to_manifest.get(p) != from_manifest.get(p)
154 }
155
156 # Per-domain baseline file counts (from to_manifest)
157 domain_base_counts: dict[str, int] = {}
158 for path in to_manifest:
159 d = classify_domain(path)
160 domain_base_counts[d] = domain_base_counts.get(d, 0) + 1
161
162 # Per-domain changed file counts
163 domain_change_counts: dict[str, int] = {}
164 for path in changed_files:
165 d = classify_domain(path)
166 domain_change_counts[d] = domain_change_counts.get(d, 0) + 1
167
168 # Per-domain conflict counts
169 domain_conflict_counts: dict[str, int] = {}
170 for c in merge_result.conflicts:
171 d = classify_domain(c.path)
172 domain_conflict_counts[d] = domain_conflict_counts.get(d, 0) + 1
173
174 # Compute projected risk per domain
175 all_domains: set[str] = set(domain_change_counts) | set(existing)
176 projected: dict[str, float] = {}
177 for d in all_domains:
178 base = max(domain_base_counts.get(d, 1), 1)
179 changed = domain_change_counts.get(d, 0)
180 conflicted = domain_conflict_counts.get(d, 0)
181 change_ratio = min(1.0, changed / base)
182 conflict_ratio = min(1.0, conflicted / max(changed, 1))
183 prior = existing.get(d, 0.0)
184 projected[d] = round(0.4 * change_ratio + 0.4 * conflict_ratio + 0.2 * prior, 4)
185
186 overall = sum(projected.values()) / max(len(projected), 1) if projected else 0.0
187 pre_merge = sum(existing.values()) / max(len(existing), 1) if existing else 0.0
188
189 if overall >= 0.7:
190 band = "high"
191 elif overall >= 0.4:
192 band = "medium"
193 else:
194 band = "low"
195
196 return {
197 "projected_domain_risk": projected,
198 "overall_projected_risk": round(overall, 4),
199 "risk_band": band,
200 "risk_delta": round(overall - pre_merge, 4),
201 "conflict_count": len(merge_result.conflicts),
202 "files_changed_count": len(changed_files),
203 "domains_affected": sorted(domain_change_counts.keys()),
204 }
205
206
207 # ─────────────────────────────────────────────────────────────────────────────
208 # dependency_order
209 # ─────────────────────────────────────────────────────────────────────────────
210
211
212 def simulate_dependency_order(dag: ProposalDag) -> DependencyOrderResult:
213 """Compute topological order and parallel phases for the dependency DAG.
214
215 Result keys:
216 order list[str] — full topological order (proposal IDs)
217 phases list[list[str]] — grouped into parallel execution phases
218 node_count int
219 phase_count int
220 cycle_detected bool
221 cycle_ids list[str] — IDs in the cycle (empty if no cycle)
222 """
223 try:
224 order = topological_sort(dag)
225 except CycleError as e:
226 return {
227 "order": [],
228 "phases": [],
229 "node_count": 0,
230 "phase_count": 0,
231 "cycle_detected": True,
232 "cycle_ids": sorted(e.cycle_ids),
233 }
234
235 # Group into parallel phases: a node enters phase N when all its unmerged
236 # deps appear in earlier phases.
237 phases: list[list[str]] = []
238 placed: set[str] = set(dag.merged_ids)
239 remaining = list(order)
240
241 while remaining:
242 phase = [
243 pid for pid in remaining
244 if (dag.depends_on.get(pid, set()) - dag.merged_ids) <= placed
245 ]
246 if not phase:
247 # Defensive: topological_sort guarantees this never happens, but
248 # drain one node to avoid an infinite loop in case of a bug.
249 phase = remaining[:1]
250 phases.append(phase)
251 placed.update(phase)
252 remaining = [p for p in remaining if p not in placed]
253
254 return {
255 "order": order,
256 "phases": phases,
257 "node_count": len(order),
258 "phase_count": len(phases),
259 "cycle_detected": False,
260 "cycle_ids": [],
261 }
File History 3 commits
sha256:94ef169c149a452bff7c604ded8b280b19bd477c2dabcb56972780b0b784c7aa Merge 'fix/assignee-sigil-inline' into 'dev' — proposal: As… Human 2 days ago
sha256:6b1949fc2797ca4c1936a637a4cbfec828ef56cf52398a2e74ca3c4f494e728f fix: use wire_bytes not mpack_bytes_raw in compute_object_b… Sonnet 4.6 patch 10 days ago
sha256:4aed3d8601c8dd3ed37074de35f11f4a9699a0a4b99d43727048fd3f8e6fd13d chore: doc sweep, ignore wrangler build state, misc fixes Sonnet 4.6 minor 13 days ago