gabriel / muse public
merge_engine.py python
811 lines 32.7 KB
Raw
sha256:ecfc7b5d19db951f256942ac0908b53d55a2da37c6cd1e6cf85b4a6088870865 feat(phase6): unified MergeEngine code path via run_merge() Sonnet 4.6 patch 9 days ago
1 """Muse VCS merge engine — fast-forward, 3-way, op-level, and CRDT merge.
2
3 Public API
4 ----------
5 Pure functions (no I/O):
6
7 - :func:`diff_snapshots` — paths that changed between two snapshot manifests.
8 - :func:`detect_conflicts` — paths where both branches made DIVERGENT changes.
9 - :func:`apply_merge` — build merged manifest for a conflict-free 3-way merge.
10 - :func:`crdt_join_snapshots` — convergent CRDT join; always succeeds.
11
12 Address-keyed Map merge (operation-level):
13
14 - :mod:`muse.core.op_merge` — ``ops_commute``, ``adjust_sequence_positions``,
15 ``merge_op_lists``, ``merge_structured``, and
16 :class:`~muse.core.op_merge.MergeOpsResult`.
17 Plugins that implement :class:`~muse.domain.AddressedMergePlugin` use these
18 functions to auto-merge non-conflicting ``DomainOp`` lists. Operations on
19 different addresses commute automatically; same-address conflicts route to
20 Harmony.
21
22 CRDT convergent merge:
23
24 - :func:`crdt_join_snapshots` — detects :class:`~muse.domain.CRDTPlugin` at
25 runtime and delegates to ``plugin.join(a, b)``. Returns a
26 :class:`~muse.domain.MergeResult` with an empty ``conflicts`` list; CRDT
27 joins never fail.
28
29 File-based helpers:
30
31 - :func:`find_merge_base` — lowest common ancestor (LCA) of two commits.
32 - :func:`read_merge_state` — detect and load an in-progress merge.
33 - :func:`write_merge_state` — persist conflict state before exiting.
34 - :func:`clear_merge_state` — remove MERGE_STATE.json after resolution.
35 - :func:`resolve_path` — clear all conflict entries for a file path (Phase 1, issue #8).
36 - :func:`resolve_symbol` — clear one conflict entry by full symbol address (Phase 1, issue #8).
37 - :func:`apply_resolution` — restore a specific object version to state/.
38
39 ``MERGE_STATE.json`` schema
40 ---------------------------
41
42 .. code-block:: json
43
44 {
45 "base_commit": "abc123...",
46 "ours_commit": "def456...",
47 "theirs_commit": "789abc...",
48 "conflict_paths": ["beat.mid", "lead.mp3"],
49 "other_branch": "feature/experiment"
50 }
51
52 ``other_branch`` is optional; all other fields are required when conflicts exist.
53 """
54
55 import json
56 import logging
57 import pathlib
58 from dataclasses import dataclass, field
59 from typing import TYPE_CHECKING, TypedDict
60
61 from muse._version import __version__
62 from muse.core.types import MUSE_DIR, Manifest, load_json_file, short_id
63 from muse.core.paths import merge_state_path as _merge_state_path
64 from muse.core.io import write_text_atomic
65 from muse.core.validation import contain_path, validate_object_id, validate_ref_id
66
67 if TYPE_CHECKING:
68 from muse.domain import MergeResult, MuseDomainPlugin
69
70 logger = logging.getLogger(__name__)
71
72 type VectorClock = dict[str, int] # agent_id → logical clock count
73 type CRDTState = dict[str, str] # path → blob hash for CRDT metadata
74
75 _MERGE_STATE_FILENAME = "MERGE_STATE.json"
76
77 # ---------------------------------------------------------------------------
78 # Wire-format TypedDict
79 # ---------------------------------------------------------------------------
80
81 class MergeStatePayload(TypedDict, total=False):
82 """JSON-serialisable form of an in-progress merge state."""
83
84 base_commit: str
85 ours_commit: str
86 theirs_commit: str
87 conflict_paths: list[str]
88 original_conflict_paths: list[str]
89 manually_resolved: list[str]
90 other_branch: str
91
92 # ---------------------------------------------------------------------------
93 # MergeState dataclass
94 # ---------------------------------------------------------------------------
95
96 @dataclass(frozen=True)
97 class MergeState:
98 """Describes an in-progress merge with unresolved conflicts.
99
100 ``conflict_paths`` — paths still unresolved; shrinks as the user runs
101 ``muse checkout --ours/--theirs`` or ``muse resolve`` for each conflict.
102
103 ``original_conflict_paths`` — full list of paths that conflicted when the
104 merge was first written. Never modified after initial write. Used by
105 ``muse commit`` to pass the complete conflict set to harmony even when all
106 individual conflicts have been cleared.
107
108 ``manually_resolved`` — subset of original_conflict_paths that the user
109 explicitly marked resolved via ``muse resolve`` (i.e. edited manually and
110 confirmed). Harmony uses this to assign ``confidence=1.0,
111 human_verified=True`` to these paths and lower confidence to side-picked
112 resolutions (``checkout --ours/--theirs``).
113 """
114
115 conflict_paths: list[str] = field(default_factory=list)
116 original_conflict_paths: list[str] = field(default_factory=list)
117 manually_resolved: list[str] = field(default_factory=list)
118 base_commit: str | None = None
119 ours_commit: str | None = None
120 theirs_commit: str | None = None
121 other_branch: str | None = None
122
123 # ---------------------------------------------------------------------------
124 # Filesystem helpers
125 # ---------------------------------------------------------------------------
126
127 def read_merge_state(root: pathlib.Path) -> MergeState | None:
128 """Return :class:`MergeState` if a merge is in progress, otherwise ``None``."""
129 merge_state_path = root / MUSE_DIR / _MERGE_STATE_FILENAME
130 if not merge_state_path.exists():
131 return None
132 data = load_json_file(merge_state_path)
133 if data is None:
134 logger.warning("⚠️ Failed to read %s", _MERGE_STATE_FILENAME)
135 return None
136
137 def _safe_conflict_list(key: str) -> list[str]:
138 raw = data.get(key, [])
139 out: list[str] = []
140 if isinstance(raw, list):
141 for c in raw:
142 try:
143 contained = contain_path(root, str(c))
144 out.append(contained.relative_to(root.resolve()).as_posix())
145 except ValueError:
146 logger.warning(
147 "⚠️ Skipping unsafe conflict path %r from MERGE_STATE.json", c
148 )
149 return out
150
151 safe_conflict_paths = _safe_conflict_list("conflict_paths")
152 safe_original_paths = _safe_conflict_list("original_conflict_paths")
153
154 # manually_resolved entries are already validated conflict paths — read as plain strings.
155 raw_manually = data.get("manually_resolved", [])
156 safe_manually: list[str] = [str(s) for s in raw_manually if isinstance(s, str)] if isinstance(raw_manually, list) else []
157
158 def _validated_ref(key: str) -> str | None:
159 val = data.get(key)
160 if val is None:
161 return None
162 s = str(val)
163 try:
164 validate_ref_id(s)
165 return s
166 except ValueError:
167 logger.warning(
168 "⚠️ Invalid %s %r in MERGE_STATE.json — ignoring", key, s
169 )
170 return None
171
172 def _str_or_none(key: str) -> str | None:
173 val = data.get(key)
174 return str(val) if val is not None else None
175
176 return MergeState(
177 conflict_paths=safe_conflict_paths,
178 original_conflict_paths=safe_original_paths or safe_conflict_paths,
179 manually_resolved=safe_manually,
180 base_commit=_validated_ref("base_commit"),
181 ours_commit=_validated_ref("ours_commit"),
182 theirs_commit=_validated_ref("theirs_commit"),
183 other_branch=_str_or_none("other_branch"),
184 )
185
186 def write_merge_state(
187 root: pathlib.Path,
188 *,
189 base_commit: str,
190 ours_commit: str,
191 theirs_commit: str,
192 conflict_paths: list[str],
193 other_branch: str | None = None,
194 ) -> None:
195 """Write ``.muse/MERGE_STATE.json`` to signal an in-progress conflicted merge.
196
197 Called by the ``muse merge`` command when the merge produces at least one
198 conflict that cannot be auto-resolved. The file is read back by
199 :func:`read_merge_state` on subsequent ``muse status`` and ``muse commit``
200 invocations to surface conflict state to the user.
201
202 Args:
203 root: Repository root (parent of ``.muse/``).
204 base_commit: Commit ID of the merge base (common ancestor).
205 ours_commit: Commit ID of the current branch (HEAD) at merge time.
206 theirs_commit: Commit ID of the branch being merged in.
207 conflict_paths: Sorted list of workspace-relative POSIX paths with
208 unresolvable conflicts.
209 other_branch: Name of the branch being merged in; stored for
210 informational display but not required for resolution.
211 """
212 merge_state_path = root / MUSE_DIR / _MERGE_STATE_FILENAME
213
214 # Preserve original_conflict_paths from the first write so that commit
215 # can pass the full set to harmony even after checkout --ours/--theirs
216 # has cleared individual paths from conflict_paths.
217 existing = read_merge_state(root)
218 if existing is not None and existing.original_conflict_paths:
219 original = existing.original_conflict_paths
220 else:
221 original = sorted(conflict_paths)
222
223 payload: MergeStatePayload = {
224 "base_commit": base_commit,
225 "ours_commit": ours_commit,
226 "theirs_commit": theirs_commit,
227 "conflict_paths": sorted(conflict_paths),
228 "original_conflict_paths": original,
229 }
230 if other_branch is not None:
231 payload["other_branch"] = other_branch
232 write_text_atomic(merge_state_path, json.dumps(payload, indent=2))
233 logger.info("✅ Wrote MERGE_STATE.json with %d conflict(s)", len(conflict_paths))
234
235 def clear_merge_state(root: pathlib.Path) -> None:
236 """Remove ``.muse/MERGE_STATE.json`` after a successful merge or resolution."""
237 merge_state_path = root / MUSE_DIR / _MERGE_STATE_FILENAME
238 if merge_state_path.exists():
239 merge_state_path.unlink()
240 logger.debug("✅ Cleared MERGE_STATE.json")
241
242 def _write_conflict_paths(
243 root: pathlib.Path,
244 state: MergeState,
245 new_paths: list[str],
246 manually_resolved: list[str] | None = None,
247 ) -> None:
248 """Atomically persist *new_paths* as ``conflict_paths`` in MERGE_STATE.json.
249
250 All other fields (``original_conflict_paths``, commit refs, branch name) are
251 preserved verbatim. ``original_conflict_paths`` is intentionally never
252 mutated here — Harmony reads it at commit time.
253
254 *manually_resolved* — if provided, replaces ``state.manually_resolved``
255 (used by resolve_path / resolve_symbol to accumulate the set of explicitly
256 confirmed resolutions). When ``None`` the existing value is preserved.
257 """
258 merge_state_path = root / MUSE_DIR / _MERGE_STATE_FILENAME
259 new_manually = manually_resolved if manually_resolved is not None else state.manually_resolved
260 payload: MergeStatePayload = {
261 "conflict_paths": sorted(new_paths),
262 "original_conflict_paths": state.original_conflict_paths,
263 "manually_resolved": sorted(new_manually),
264 }
265 if state.base_commit is not None:
266 payload["base_commit"] = state.base_commit
267 if state.ours_commit is not None:
268 payload["ours_commit"] = state.ours_commit
269 if state.theirs_commit is not None:
270 payload["theirs_commit"] = state.theirs_commit
271 if state.other_branch is not None:
272 payload["other_branch"] = state.other_branch
273 write_text_atomic(merge_state_path, json.dumps(payload, indent=2))
274
275
276 def resolve_path(root: pathlib.Path, path: str) -> list[str]:
277 """Remove all conflict entries whose file portion matches *path*.
278
279 The file portion of a conflict entry is everything before the first ``::``
280 (or the whole string if no ``::`` is present). For example, calling
281 ``resolve_path(root, "hello.md")`` clears both ``"hello.md"`` and
282 ``"hello.md::Hello World"`` in one call.
283
284 ``original_conflict_paths`` is **never mutated** — Harmony reads it at
285 commit time to know what was learned from this merge.
286
287 Args:
288 root: Repository root (parent of ``.muse/``).
289 path: Workspace-relative POSIX path of the file to mark resolved.
290
291 Returns:
292 List of conflict entries that were removed (may be empty if none matched).
293
294 Raises:
295 ValueError: If no merge is in progress.
296 """
297 state = read_merge_state(root)
298 if state is None:
299 raise ValueError("No merge in progress — nothing to resolve.")
300 cleared: list[str] = []
301 remaining: list[str] = []
302 for entry in state.conflict_paths:
303 file_part = entry.split("::")[0] if "::" in entry else entry
304 if file_part == path:
305 cleared.append(entry)
306 else:
307 remaining.append(entry)
308 if cleared:
309 new_manually = sorted(set(state.manually_resolved) | set(cleared))
310 _write_conflict_paths(root, state, remaining, manually_resolved=new_manually)
311 logger.info("✅ resolve_path: cleared %d conflict(s) for %r", len(cleared), path)
312 return cleared
313
314
315 def resolve_symbol(root: pathlib.Path, symbol_address: str) -> bool:
316 """Remove exactly one entry matching *symbol_address* from ``conflict_paths``.
317
318 Idempotent — if the address is not in ``conflict_paths`` the function
319 returns ``False`` without raising. ``original_conflict_paths`` is never
320 mutated.
321
322 Args:
323 root: Repository root (parent of ``.muse/``).
324 symbol_address: Full conflict address, e.g. ``"hello.md"`` or
325 ``"hello.md::Hello World"``.
326
327 Returns:
328 ``True`` if the entry was found and removed, ``False`` if not present.
329
330 Raises:
331 ValueError: If no merge is in progress.
332 """
333 state = read_merge_state(root)
334 if state is None:
335 raise ValueError("No merge in progress — nothing to resolve.")
336 if symbol_address not in state.conflict_paths:
337 return False
338 remaining = [e for e in state.conflict_paths if e != symbol_address]
339 new_manually = sorted(set(state.manually_resolved) | {symbol_address})
340 _write_conflict_paths(root, state, remaining, manually_resolved=new_manually)
341 logger.info("✅ resolve_symbol: cleared %r", symbol_address)
342 return True
343
344
345 def apply_resolution(
346 root: pathlib.Path,
347 rel_path: str,
348 object_id: str,
349 ) -> None:
350 """Restore a specific object version to the working tree at ``<rel_path>``.
351
352 Used by the ``muse merge --resolve`` workflow: after a user has chosen
353 which version of a conflicting file to keep, this function writes that
354 version into the working tree so ``muse commit`` can snapshot it.
355
356 Args:
357 root: Repository root (parent of ``.muse/``).
358 rel_path: Workspace-relative POSIX path of the conflicting file.
359 object_id: SHA-256 of the chosen resolution content in the object store.
360
361 Raises:
362 FileNotFoundError: When *object_id* is not present in the local store.
363 """
364 from muse.core.object_store import read_object
365
366 validate_object_id(object_id)
367 dest = contain_path(root, rel_path)
368
369 content = read_object(root, object_id)
370 if content is None:
371 raise FileNotFoundError(
372 f"Object {object_id} for '{rel_path}' not found in local store."
373 )
374 dest.parent.mkdir(parents=True, exist_ok=True)
375 dest.write_bytes(content)
376 logger.debug("✅ Restored '%s' from object %s", rel_path, short_id(object_id))
377
378 # ---------------------------------------------------------------------------
379 # Pure merge functions (no I/O)
380 # ---------------------------------------------------------------------------
381
382 def diff_snapshots(
383 base_manifest: Manifest,
384 other_manifest: Manifest,
385 ) -> set[str]:
386 """Return the set of paths that differ between *base_manifest* and *other_manifest*.
387
388 A path is "different" if it was added (in *other* but not *base*), deleted
389 (in *base* but not *other*), or modified (present in both with different
390 content hashes).
391
392 Args:
393 base_manifest: Path → content-hash map for the ancestor snapshot.
394 other_manifest: Path → content-hash map for the other snapshot.
395
396 Returns:
397 Set of workspace-relative POSIX paths that differ.
398 """
399 base_paths = set(base_manifest.keys())
400 other_paths = set(other_manifest.keys())
401 added = other_paths - base_paths
402 deleted = base_paths - other_paths
403 common = base_paths & other_paths
404 modified = {p for p in common if base_manifest[p] != other_manifest[p]}
405 return added | deleted | modified
406
407 def detect_conflicts(
408 ours_changed: set[str],
409 theirs_changed: set[str],
410 ours_manifest: Manifest,
411 theirs_manifest: Manifest,
412 ) -> set[str]:
413 """Return paths where both branches made DIVERGENT changes since the merge base.
414
415 Two branches conflict on a path only when they both changed it AND arrived at
416 DIFFERENT results. Convergent changes — both deleted the same file, or both
417 added/modified it to the same content hash — are auto-resolved and are NOT
418 returned as conflicts.
419
420 Examples of convergent (non-conflict) changes:
421 - Both branches deleted the same file → agreed on deletion, not a conflict.
422 - Both branches independently added the same file with identical content →
423 agreed on the new content, not a conflict.
424
425 Args:
426 ours_changed: Paths changed by our branch (from :func:`diff_snapshots`).
427 theirs_changed: Paths changed by their branch.
428 ours_manifest: Path → content-hash for our branch's snapshot.
429 theirs_manifest: Path → content-hash for their branch's snapshot.
430
431 Returns:
432 Set of paths where both branches made changes that disagree on the result.
433 """
434 return {
435 path
436 for path in ours_changed & theirs_changed
437 if ours_manifest.get(path) != theirs_manifest.get(path)
438 }
439
440 def apply_merge(
441 base_manifest: Manifest,
442 ours_manifest: Manifest,
443 theirs_manifest: Manifest,
444 ours_changed: set[str],
445 theirs_changed: set[str],
446 conflict_paths: set[str],
447 ) -> Manifest:
448 """Build the merged snapshot manifest for a conflict-free 3-way merge.
449
450 Starts from *base_manifest* and applies non-conflicting changes from both
451 branches:
452
453 - Ours-only changes (in *ours_changed* but not *conflict_paths*) are taken
454 from *ours_manifest*. Deletions are handled by the absence of the path
455 in *ours_manifest*.
456 - Theirs-only changes (in *theirs_changed* but not *conflict_paths*) are
457 taken from *theirs_manifest* by the same logic.
458 - Paths in *conflict_paths* are excluded — callers must resolve them
459 separately before producing a final merged snapshot.
460
461 Args:
462 base_manifest: Path → content-hash for the common ancestor.
463 ours_manifest: Path → content-hash for our branch.
464 theirs_manifest: Path → content-hash for their branch.
465 ours_changed: Paths changed by our branch (from :func:`diff_snapshots`).
466 theirs_changed: Paths changed by their branch.
467 conflict_paths: Paths with concurrent changes — excluded from output.
468
469 Returns:
470 Merged path → content-hash mapping; conflict paths are absent.
471 """
472 merged: Manifest = dict(base_manifest)
473 for path in ours_changed - conflict_paths:
474 if path in ours_manifest:
475 merged[path] = ours_manifest[path]
476 else:
477 merged.pop(path, None)
478 for path in theirs_changed - conflict_paths:
479 if path in theirs_manifest:
480 merged[path] = theirs_manifest[path]
481 else:
482 merged.pop(path, None)
483 return merged
484
485 # ---------------------------------------------------------------------------
486 # CRDT convergent join
487 # ---------------------------------------------------------------------------
488
489 def crdt_join_snapshots(
490 plugin: MuseDomainPlugin,
491 a_snapshot: Manifest,
492 b_snapshot: Manifest,
493 a_vclock: VectorClock,
494 b_vclock: VectorClock,
495 a_crdt_state: CRDTState,
496 b_crdt_state: CRDTState,
497 domain: str,
498 ) -> MergeResult:
499 """Convergent CRDT merge — always succeeds, no conflicts possible.
500
501 Detects :class:`~muse.domain.CRDTPlugin` support via ``isinstance`` and
502 delegates to ``plugin.join(a, b)``. The returned :class:`~muse.domain.MergeResult`
503 always has an empty ``conflicts`` list — the defining property of CRDT joins.
504
505 This function is the CRDT entry point for the ``muse merge`` command.
506 It is only called when ``DomainSchema.merge_mode == "crdt"`` AND the plugin
507 passes the ``isinstance(plugin, CRDTPlugin)`` check.
508
509 Args:
510 plugin: The loaded domain plugin instance.
511 a_snapshot: ``files`` mapping (path → content hash) for replica A.
512 b_snapshot: ``files`` mapping (path → content hash) for replica B.
513 a_vclock: Vector clock ``{agent_id: count}`` for replica A.
514 b_vclock: Vector clock ``{agent_id: count}`` for replica B.
515 a_crdt_state: CRDT metadata hashes (path → blob hash) for replica A.
516 b_crdt_state: CRDT metadata hashes (path → blob hash) for replica B.
517 domain: Domain name string (e.g. ``"midi"``).
518
519 Returns:
520 A :class:`~muse.domain.MergeResult` with the joined snapshot and an
521 empty ``conflicts`` list.
522
523 Raises:
524 TypeError: When *plugin* does not implement the
525 :class:`~muse.domain.CRDTPlugin` protocol.
526 """
527 from muse.domain import CRDTPlugin, CRDTSnapshotManifest, MergeResult, StateSnapshot
528
529 if not isinstance(plugin, CRDTPlugin):
530 raise TypeError(
531 f"crdt_join_snapshots: plugin {type(plugin).__name__!r} does not "
532 "implement CRDTPlugin — cannot use CRDT join path."
533 )
534
535 a_crdt: CRDTSnapshotManifest = {
536 "files": a_snapshot,
537 "domain": domain,
538 "vclock": a_vclock,
539 "crdt_state": a_crdt_state,
540 "schema_version": __version__,
541 }
542 b_crdt: CRDTSnapshotManifest = {
543 "files": b_snapshot,
544 "domain": domain,
545 "vclock": b_vclock,
546 "crdt_state": b_crdt_state,
547 "schema_version": __version__,
548 }
549
550 result_crdt = plugin.join(a_crdt, b_crdt)
551 plain_snapshot: StateSnapshot = plugin.from_crdt_state(result_crdt)
552
553 return MergeResult(
554 merged=plain_snapshot,
555 conflicts=[],
556 applied_strategies={},
557 )
558
559 # ---------------------------------------------------------------------------
560 # File-based merge base finder
561 # ---------------------------------------------------------------------------
562
563 def find_merge_base(
564 repo_root: pathlib.Path,
565 commit_id_a: str,
566 commit_id_b: str,
567 ) -> str | None:
568 """Find the Lowest Common Ancestor (LCA) of two commits.
569
570 Delegates to :func:`muse.core.graph.find_merge_base` with the
571 repo-configured ancestor cap, converting the generic :class:`ValueError`
572 into a user-facing :class:`~muse.core.errors.MuseCLIError`.
573 """
574 from muse.cli.config import get_limit
575 from muse.core.errors import MuseCLIError
576 from muse.core.graph import find_merge_base as _graph_find_merge_base
577
578 max_ancestors = get_limit("max_ancestors", repo_root)
579 try:
580 return _graph_find_merge_base(repo_root, commit_id_a, commit_id_b, max_ancestors=max_ancestors)
581 except ValueError:
582 raise MuseCLIError(
583 f"Ancestor graph exceeds {max_ancestors:,} commits during "
584 "merge-base search — the repository history is too deep or "
585 "the DAG may be malformed. "
586 f"Raise [limits] max_ancestors in .muse/config.toml "
587 f"(current cap: {max_ancestors:,})."
588 )
589
590 # ---------------------------------------------------------------------------
591 # MergeEngine — strategy vocabulary
592 # ---------------------------------------------------------------------------
593
594 _VALID_DIFF_UNITS = frozenset({"three_way", "snapshot", "replay_ours", "replay_theirs"})
595 _VALID_RESOLUTIONS = frozenset({"escalate", "prefer_ours", "prefer_theirs"})
596
597
598 @dataclass(frozen=True)
599 class MergeEngine:
600 """Canonical description of a merge strategy as two orthogonal axes.
601
602 ``diff_unit`` — how the two sides are compared:
603 - ``three_way``: combine(delta(base→ours), delta(base→theirs))
604 - ``snapshot``: compare(ours_state, theirs_state) — no base
605 - ``replay_ours``: apply delta(base→ours) onto theirs_state
606 - ``replay_theirs``: apply delta(base→theirs) onto ours_state
607
608 ``resolution`` — what to do when a unit conflicts:
609 - ``escalate``: surface it for human / Harmony resolution
610 - ``prefer_ours``: auto-resolve keeping ours
611 - ``prefer_theirs``: auto-resolve keeping theirs
612 """
613
614 diff_unit: str
615 resolution: str
616
617 def __post_init__(self) -> None:
618 if self.diff_unit not in _VALID_DIFF_UNITS:
619 raise ValueError(
620 f"Invalid diff_unit {self.diff_unit!r}. "
621 f"Valid values: {sorted(_VALID_DIFF_UNITS)}"
622 )
623 if self.resolution not in _VALID_RESOLUTIONS:
624 raise ValueError(
625 f"Invalid resolution {self.resolution!r}. "
626 f"Valid values: {sorted(_VALID_RESOLUTIONS)}"
627 )
628
629
630 STRATEGY_MAP: dict[str, MergeEngine] = {
631 "recursive": MergeEngine(diff_unit="three_way", resolution="escalate"),
632 "overlay": MergeEngine(diff_unit="snapshot", resolution="prefer_theirs"),
633 "snapshot": MergeEngine(diff_unit="snapshot", resolution="escalate"),
634 "replay": MergeEngine(diff_unit="replay_ours", resolution="escalate"),
635 "ours": MergeEngine(diff_unit="three_way", resolution="prefer_ours"),
636 "theirs": MergeEngine(diff_unit="three_way", resolution="prefer_theirs"),
637 }
638
639
640 def run_merge(
641 base_manifest: Manifest,
642 ours_manifest: Manifest,
643 theirs_manifest: Manifest,
644 engine: MergeEngine,
645 *,
646 plugin: MuseDomainPlugin | None = None,
647 repo_root: pathlib.Path | None = None,
648 domain: str = "",
649 ) -> MergeResult:
650 """Execute a merge using the given engine configuration.
651
652 This is the single shared implementation used by both :command:`muse merge`
653 (local CLI) and :command:`muse hub proposal merge` (server). Callers are
654 responsible for all I/O: loading snapshots before calling, writing commits
655 and MERGE_STATE after.
656
657 Execution paths by ``diff_unit``:
658
659 - ``three_way``: standard three-way diff relative to the common ancestor.
660 When *plugin* is supplied the merge is symbol-level (via
661 ``plugin.merge_ops`` or ``plugin.merge``). Without a plugin the merge
662 falls back to manifest-level using :func:`detect_conflicts` +
663 :func:`apply_merge`.
664 - ``snapshot``: compare *ours* and *theirs* directly — no ancestor.
665 Implemented as ``three_way`` with ``base_manifest = {}``.
666 - ``replay_ours``: apply ``base→ours`` delta onto ``theirs`` state.
667 - ``replay_theirs``: apply ``base→theirs`` delta onto ``ours`` state.
668
669 When ``engine.resolution`` is ``prefer_ours`` or ``prefer_theirs``, all
670 conflicts are auto-resolved to the nominated side and the returned
671 ``conflicts`` list is empty.
672
673 Args:
674 base_manifest: Path → content-hash for the common ancestor.
675 Pass ``{}`` when no ancestor is available (e.g. fresh
676 repos) — this is equivalent to ``snapshot`` diff.
677 ours_manifest: Path → content-hash for the current branch (HEAD).
678 theirs_manifest: Path → content-hash for the incoming branch.
679 engine: Strategy configuration from :data:`STRATEGY_MAP`.
680 plugin: Optional domain plugin for symbol-level merge.
681 When ``None`` the merge is manifest-level only.
682 repo_root: Repository root passed to the plugin. May be ``None``
683 on the server (the plugin degrades gracefully).
684 domain: Domain name placed in the returned snapshot manifest.
685
686 Returns:
687 :class:`~muse.domain.MergeResult` with the reconciled snapshot and any
688 remaining conflict paths. ``conflicts`` is empty when the merge is
689 clean or when the resolution policy auto-resolved everything.
690 """
691 from muse.domain import AddressedMergePlugin, MergeResult, SnapshotManifest
692 from muse.core.snapshot import directories_from_manifest
693
694 def _snap(manifest: Manifest) -> SnapshotManifest:
695 return SnapshotManifest(
696 files=manifest,
697 domain=domain,
698 directories=directories_from_manifest(manifest),
699 )
700
701 def _make_result(merged_files: Manifest, conflicts: list[str]) -> MergeResult:
702 return MergeResult(
703 merged=_snap(merged_files),
704 conflicts=conflicts,
705 )
706
707 # ── snapshot diff_unit: treat as three_way with empty base ──────────────
708 effective_base = {} if engine.diff_unit == "snapshot" else base_manifest
709
710 # ── replay paths: apply one side's delta onto the other's state ─────────
711 if engine.diff_unit in ("replay_ours", "replay_theirs"):
712 ours_changed = diff_snapshots(base_manifest, ours_manifest)
713 theirs_changed = diff_snapshots(base_manifest, theirs_manifest)
714 conflict_paths = detect_conflicts(
715 ours_changed, theirs_changed, ours_manifest, theirs_manifest
716 )
717 if engine.diff_unit == "replay_ours":
718 # Apply ours' delta (base→ours) onto theirs' state.
719 merged: Manifest = dict(theirs_manifest)
720 for path in ours_changed:
721 if path in ours_manifest:
722 merged[path] = ours_manifest[path]
723 else:
724 merged.pop(path, None)
725 else:
726 # Apply theirs' delta (base→theirs) onto ours' state.
727 merged = dict(ours_manifest)
728 for path in theirs_changed:
729 if path in theirs_manifest:
730 merged[path] = theirs_manifest[path]
731 else:
732 merged.pop(path, None)
733 if engine.resolution == "prefer_ours":
734 for path in conflict_paths:
735 if path in ours_manifest:
736 merged[path] = ours_manifest[path]
737 else:
738 merged.pop(path, None)
739 return _make_result(merged, [])
740 elif engine.resolution == "prefer_theirs":
741 for path in conflict_paths:
742 if path in theirs_manifest:
743 merged[path] = theirs_manifest[path]
744 else:
745 merged.pop(path, None)
746 return _make_result(merged, [])
747 return _make_result(merged, sorted(conflict_paths))
748
749 # ── three_way (or snapshot as three_way with empty base) ────────────────
750 if plugin is not None:
751 # Symbol-level merge via domain plugin. The plugin always receives the
752 # real base — the snapshot empty-base concept applies only to
753 # manifest-level comparisons; plugins do their own internal diffing.
754 base_snap = _snap(base_manifest)
755 ours_snap = _snap(ours_manifest)
756 theirs_snap = _snap(theirs_manifest)
757
758 if isinstance(plugin, AddressedMergePlugin):
759 ours_delta = plugin.diff(base_snap, ours_snap, repo_root=repo_root)
760 theirs_delta = plugin.diff(base_snap, theirs_snap, repo_root=repo_root)
761 result = plugin.merge_ops(
762 base_snap, ours_snap, theirs_snap,
763 ours_delta["ops"], theirs_delta["ops"],
764 repo_root=repo_root,
765 )
766 else:
767 result = plugin.merge(base_snap, ours_snap, theirs_snap, repo_root=repo_root)
768
769 if not result.conflicts or engine.resolution == "escalate":
770 return result
771
772 # Auto-resolve plugin conflicts via the nominated side.
773 side = ours_manifest if engine.resolution == "prefer_ours" else theirs_manifest
774 merged_files = dict(result.merged["files"])
775 for addr in result.conflicts:
776 file_path = addr.split("::")[0] if "::" in addr else addr
777 if file_path in side:
778 merged_files[file_path] = side[file_path]
779 else:
780 merged_files.pop(file_path, None)
781 return _make_result(merged_files, [])
782
783 # ── manifest-level three_way (no plugin) ────────────────────────────────
784 ours_changed = diff_snapshots(effective_base, ours_manifest)
785 theirs_changed = diff_snapshots(effective_base, theirs_manifest)
786 conflict_paths = detect_conflicts(
787 ours_changed, theirs_changed, ours_manifest, theirs_manifest
788 )
789 merged_m = apply_merge(
790 effective_base, ours_manifest, theirs_manifest,
791 ours_changed, theirs_changed, conflict_paths,
792 )
793 if engine.resolution == "prefer_ours":
794 for path in conflict_paths:
795 if path in ours_manifest:
796 merged_m[path] = ours_manifest[path]
797 else:
798 merged_m.pop(path, None)
799 return _make_result(merged_m, [])
800 elif engine.resolution == "prefer_theirs":
801 for path in conflict_paths:
802 if path in theirs_manifest:
803 merged_m[path] = theirs_manifest[path]
804 else:
805 merged_m.pop(path, None)
806 return _make_result(merged_m, [])
807 # escalate: put ours placeholder for conflict paths so manifest is complete
808 for path in conflict_paths:
809 if path in ours_manifest:
810 merged_m[path] = ours_manifest[path]
811 return _make_result(merged_m, sorted(conflict_paths))
File History 2 commits
sha256:ecfc7b5d19db951f256942ac0908b53d55a2da37c6cd1e6cf85b4a6088870865 feat(phase6): unified MergeEngine code path via run_merge() Sonnet 4.6 patch 9 days ago
sha256:981b89ffe0b877cbb076d011e5d9148ad88c255b66a4eef5cafac7f11ce26ab1 feat: Phase 1 — MergeEngine class, --on-conflict, --history… Sonnet 4.6 patch 9 days ago