gabriel / muse public
cohen_transform.py python
583 lines 22.6 KB
Raw
sha256:18b983389ee1b55900fcd799bfbb496552d2e3ecded9d18cefbfef188947a12e chore: remove blob-debug test marker file Sonnet 4.6 19 hours ago
1 """cohen_transform.py — Three-way line-level merge with Manyana-style action labels.
2
3 Named in honour of Bram Cohen (creator of BitTorrent, inventor of the Manyana
4 CRDT weave algorithm), whose conflict-presentation insight is the direct
5 inspiration for this module.
6
7 The Cohen Transform observation
8 --------------------------------
9 Traditional VCS tools display a merge conflict as two opaque blobs — "ours"
10 and "theirs" — and leave the user to reconstruct *what each side actually did*.
11 Bram Cohen's Manyana project labels every conflict hunk with the action and the
12 side that performed it:
13
14 <<<<<<< ours [deleted]
15 def calculate(x):
16 a = x * 2
17 ||||||| base
18 def calculate(x):
19 a = x * 2
20 b = a + 1
21 return b
22 ======= theirs [inserted]
23 logger.debug(f"a={a}")
24 >>>>>>> end conflict
25
26 You can immediately see: ours deleted a function, theirs inserted a line into
27 the middle of it. That is a structurally different level of information from
28 two unlabelled blobs.
29
30 Public API
31 ----------
32 three_way_merge_lines(base, ours, theirs, ...)
33 Core merge function. Returns (merged_lines, has_conflict).
34
35 MergeRegion
36 Dataclass representing one segment of the three-way analysis.
37
38 classify_action(base_lines, other_lines)
39 Classify a change as 'inserted', 'deleted', or 'modified'.
40
41 annotate_hunk_action(hunk_lines, side_label)
42 Post-process a unified-diff hunk list, rewriting @@ headers with
43 action labels for use in ``muse diff --conflict`` output.
44
45 format_conflict_diff(path, root, base_manifest, ours_manifest, theirs_manifest,
46 read_object_fn, *, use_color, ours_label, theirs_label)
47 Render a Manyana-style two-sided labeled diff for a single conflicting file.
48 Used by ``muse diff --conflict``.
49
50 References
51 ----------
52 - Bram Cohen, Manyana: https://github.com/bramcohen/manyana
53 - Bram Cohen's blog: https://bramcohen.com/
54 """
55
56 import difflib
57 import pathlib
58 from dataclasses import dataclass, field
59 from typing import Callable, Sequence
60
61 type _ManifestMap = dict[str, str]
62
63 # ── Public constants ──────────────────────────────────────────────────────────
64
65 #: Separator used between the two sides of a conflict-aware diff header.
66 CONFLICT_SEPARATOR = "═" * 54
67
68 #: Marker tokens — match git's diff3 style so editors recognise them, but
69 #: the action label suffix (e.g. ``[deleted]``) is the Cohen extension.
70 _MARKER_OURS_PREFIX = "<<<<<<< "
71 _MARKER_BASE = "||||||| base"
72 _MARKER_SEP_PREFIX = "======= "
73 _MARKER_END = ">>>>>>> end conflict"
74
75 # ── Data classes ──────────────────────────────────────────────────────────────
76
77 @dataclass
78 class MergeRegion:
79 """One segment of a three-way merge analysis.
80
81 Attributes:
82 kind:
83 ``'stable'`` — identical in all three versions; output base.
84 ``'ours_only'`` — only ours changed from base; output ours.
85 ``'theirs_only'`` — only theirs changed from base; output theirs.
86 ``'both_same'`` — both sides made the same change; output either.
87 ``'conflict'`` — both sides changed differently; needs markers.
88 base_lines: Lines from the common ancestor for this segment.
89 ours_lines: Lines from the ours version for this segment.
90 theirs_lines: Lines from the theirs version for this segment.
91 """
92
93 kind: str
94 base_lines: list[str] = field(default_factory=list)
95 ours_lines: list[str] = field(default_factory=list)
96 theirs_lines: list[str] = field(default_factory=list)
97
98 # ── Core algorithm ────────────────────────────────────────────────────────────
99
100 def _find_sync_regions(
101 base: Sequence[str],
102 ours: Sequence[str],
103 theirs: Sequence[str],
104 ) -> list[tuple[int, int, int, int, int, int]]:
105 """Return maximal regions identical in all three sequences.
106
107 Each entry is ``(base_s, base_e, ours_s, ours_e, theirs_s, theirs_e)``
108 representing a contiguous block where
109 ``base[base_s:base_e] == ours[ours_s:ours_e] == theirs[theirs_s:theirs_e]``.
110
111 The algorithm finds base positions matched in BOTH ``base→ours`` and
112 ``base→theirs`` LCS runs, then groups them into maximal consecutive
113 triples (consecutive in base, ours, *and* theirs simultaneously).
114 """
115 sm_a = difflib.SequenceMatcher(None, base, ours, autojunk=False)
116 sm_b = difflib.SequenceMatcher(None, base, theirs, autojunk=False)
117
118 a_map: dict[int, int] = {} # base_pos → ours_pos
119 b_map: dict[int, int] = {} # base_pos → theirs_pos
120
121 for bi, ai, n in sm_a.get_matching_blocks():
122 for k in range(n):
123 a_map[bi + k] = ai + k
124
125 for bi, ti, n in sm_b.get_matching_blocks():
126 for k in range(n):
127 b_map[bi + k] = ti + k
128
129 stable = sorted(set(a_map) & set(b_map))
130 if not stable:
131 return []
132
133 sync_regions: list[tuple[int, int, int, int, int, int]] = []
134 run_bp = run_ap = run_tp = -1
135 # Use -2 so that the first stable element (bp=0) does not falsely satisfy
136 # ``bp == prev_bp + 1`` (which would be ``0 == -1 + 1 == 0``) and skip
137 # initialising the run variables, causing the entire sync region to be lost.
138 prev_bp = prev_ap = prev_tp = -2
139
140 for bp in stable:
141 ap = a_map[bp]
142 tp = b_map[bp]
143
144 if bp == prev_bp + 1 and ap == prev_ap + 1 and tp == prev_tp + 1:
145 # Extend current run.
146 prev_bp, prev_ap, prev_tp = bp, ap, tp
147 else:
148 # Flush previous run (if any) and start a new one.
149 if run_bp >= 0:
150 sync_regions.append((
151 run_bp, prev_bp + 1,
152 run_ap, prev_ap + 1,
153 run_tp, prev_tp + 1,
154 ))
155 run_bp, run_ap, run_tp = bp, ap, tp
156 prev_bp, prev_ap, prev_tp = bp, ap, tp
157
158 # Flush final run.
159 if run_bp >= 0:
160 sync_regions.append((
161 run_bp, prev_bp + 1,
162 run_ap, prev_ap + 1,
163 run_tp, prev_tp + 1,
164 ))
165
166 return sync_regions
167
168 def compute_regions(
169 base: Sequence[str],
170 ours: Sequence[str],
171 theirs: Sequence[str],
172 ) -> list[MergeRegion]:
173 """Decompose three sequences into a list of :class:`MergeRegion` objects.
174
175 Uses the sync-region algorithm: finds maximal blocks identical in all
176 three sequences (the "stable skeleton"), then classifies each gap between
177 stable blocks as one of:
178
179 - ``ours_only`` — only ours changed from base.
180 - ``theirs_only`` — only theirs changed from base.
181 - ``both_same`` — both changed to the same content (clean).
182 - ``conflict`` — both changed to different content.
183
184 Handles pure insertions (both sides insert different content at the same
185 base position) as conflicts with an empty base section.
186
187 Args:
188 base: Common ancestor lines (each ending with ``\\n`` or empty).
189 ours: Working-tree / our-branch lines.
190 theirs: Target-branch / their-branch lines.
191
192 Returns:
193 Ordered list of :class:`MergeRegion` objects covering the entire
194 three-way merge.
195 """
196 base = list(base)
197 ours = list(ours)
198 theirs = list(theirs)
199
200 sync_regions = _find_sync_regions(base, ours, theirs)
201
202 # Sentinel at each end so the loop below handles the leading and trailing
203 # unstable regions uniformly without special-casing.
204 sentinels: list[tuple[int, int, int, int, int, int]] = [
205 (0, 0, 0, 0, 0, 0),
206 *sync_regions,
207 (len(base), len(base), len(ours), len(ours), len(theirs), len(theirs)),
208 ]
209
210 regions: list[MergeRegion] = []
211
212 for idx in range(len(sentinels) - 1):
213 cur = sentinels[idx]
214 nxt = sentinels[idx + 1]
215
216 # ── Stable block (the sync region itself) ─────────────────────────
217 bs, be, as_, ae, ts, te = cur
218 if idx > 0 and be > bs:
219 regions.append(MergeRegion(
220 kind="stable",
221 base_lines=list(base[bs:be]),
222 ours_lines=list(ours[as_:ae]),
223 theirs_lines=list(theirs[ts:te]),
224 ))
225
226 # ── Unstable gap between this sync region and the next ────────────
227 nbs, nbe, nas, nae, nts, nte = nxt
228 b_chunk = list(base[be:nbs])
229 a_chunk = list(ours[ae:nas])
230 t_chunk = list(theirs[te:nts])
231
232 if not b_chunk and not a_chunk and not t_chunk:
233 continue
234
235 if a_chunk == b_chunk and t_chunk == b_chunk:
236 # All three identical — degenerate stable region (shouldn't
237 # normally occur given sync detection, but guard it).
238 regions.append(MergeRegion("stable", b_chunk, a_chunk, t_chunk))
239 elif a_chunk == b_chunk:
240 # Only theirs changed.
241 regions.append(MergeRegion("theirs_only", b_chunk, b_chunk, t_chunk))
242 elif t_chunk == b_chunk:
243 # Only ours changed.
244 regions.append(MergeRegion("ours_only", b_chunk, a_chunk, b_chunk))
245 elif a_chunk == t_chunk:
246 # Both sides made the same change — clean (take either).
247 regions.append(MergeRegion("both_same", b_chunk, a_chunk, t_chunk))
248 else:
249 # True conflict — both sides changed differently.
250 regions.append(MergeRegion("conflict", b_chunk, a_chunk, t_chunk))
251
252 return regions
253
254 # ── Action classification ─────────────────────────────────────────────────────
255
256 def classify_action(base_lines: list[str], other_lines: list[str]) -> str:
257 """Classify the action performed from *base_lines* to *other_lines*.
258
259 Returns one of:
260 - ``'inserted'`` — *other_lines* is non-empty, *base_lines* is empty.
261 - ``'deleted'`` — *base_lines* is non-empty, *other_lines* is empty.
262 - ``'modified'`` — both are non-empty but differ.
263
264 Used to annotate conflict markers with the Cohen-transform action label,
265 e.g. ``<<<<<<< ours [deleted]``.
266 """
267 if not base_lines and other_lines:
268 return "inserted"
269 if base_lines and not other_lines:
270 return "deleted"
271 return "modified"
272
273 def annotate_hunk_action(hunk_lines: list[str], side_label: str) -> list[str]:
274 """Rewrite ``@@`` hunk headers with a Manyana-style action annotation.
275
276 Scans the ``+``/``-`` lines in each hunk to classify the dominant action
277 on *side_label* (``'ours'`` or ``'theirs'``), then rewrites each ``@@``
278 header line from::
279
280 @@ -12,4 +12,4 @@
281
282 to::
283
284 @@ -12,4 +12,4 @@ [ours: deleted]
285 @@ -12,4 +12,4 @@ [theirs: inserted]
286 @@ -12,4 +12,4 @@ [ours: modified]
287
288 This is the direct translation of Bram Cohen's ``begin deleted left`` /
289 ``begin added right`` labeling into unified-diff format.
290
291 Args:
292 hunk_lines: Lines produced by :func:`difflib.unified_diff`, including
293 the ``---``/``+++`` header and all ``@@`` blocks.
294 side_label: Human-readable side identifier, e.g. ``'ours'`` or
295 ``'theirs'``.
296
297 Returns:
298 Annotated copy of *hunk_lines* with ``@@`` lines rewritten.
299 """
300 result: list[str] = []
301 # Collect lines in the current @@ block to classify when we see the next @@.
302 current_hunk_body: list[str] = []
303 pending_at: str | None = None # the @@ line waiting to be annotated
304
305 def _flush(body: list[str]) -> str:
306 """Classify and return the annotated @@ line."""
307 assert pending_at is not None
308 adds = sum(1 for ln in body if ln.startswith("+") and not ln.startswith("+++"))
309 dels = sum(1 for ln in body if ln.startswith("-") and not ln.startswith("---"))
310 if adds > 0 and dels == 0:
311 action = "inserted"
312 elif dels > 0 and adds == 0:
313 action = "deleted"
314 else:
315 action = "modified"
316 base_at = pending_at.rstrip()
317 # Remove any trailing existing annotation before appending a new one.
318 if " [" in base_at:
319 base_at = base_at[: base_at.rfind(" [")]
320 return f"{base_at} [{side_label}: {action}]"
321
322 for line in hunk_lines:
323 if line.startswith("@@"):
324 if pending_at is not None:
325 result.append(_flush(current_hunk_body))
326 result.extend(current_hunk_body)
327 current_hunk_body = []
328 pending_at = line
329 elif pending_at is not None:
330 current_hunk_body.append(line)
331 else:
332 result.append(line)
333
334 # Flush the final hunk.
335 if pending_at is not None:
336 result.append(_flush(current_hunk_body))
337 result.extend(current_hunk_body)
338
339 return result
340
341 # ── High-level merge function ─────────────────────────────────────────────────
342
343 def three_way_merge_lines(
344 base: Sequence[str],
345 ours: Sequence[str],
346 theirs: Sequence[str],
347 *,
348 label_ours: str = "ours",
349 label_base: str = "base",
350 label_theirs: str = "theirs",
351 ) -> tuple[list[str], bool]:
352 """Three-way line merge with Manyana-style labeled conflict markers.
353
354 Implements the Cohen Transform: conflict markers include an action label
355 (``[inserted]``, ``[deleted]``, or ``[modified]``) so the reader
356 immediately sees *what each side did*, not just *that they conflicted*.
357
358 Marker format (diff3 style with Cohen extensions)::
359
360 <<<<<<< ours [deleted]
361 [lines from ours — what ours changed base to]
362 ||||||| base
363 [lines from common ancestor — the original context]
364 ======= theirs [inserted]
365 [lines from theirs — what theirs changed base to]
366 >>>>>>> end conflict
367
368 The ``||||||| base`` section is always included (diff3 style) because it
369 gives both humans and agents the context needed to understand *why* each
370 side made its change.
371
372 Clean merge rules:
373 - Stable (no change on either side): output base unchanged.
374 - Ours-only change: output ours version.
375 - Theirs-only change: output theirs version.
376 - Both changed to the same content: output that content once.
377 - Both changed differently: emit conflict markers.
378
379 Args:
380 base: Lines from the common ancestor.
381 ours: Lines from our version (working tree or our branch).
382 theirs: Lines from their version (target branch).
383 label_ours: Label shown in the ``<<<<<<<`` marker. Defaults to
384 ``'ours'``.
385 label_base: Label shown in the ``|||||||`` marker. Defaults to
386 ``'base'``.
387 label_theirs: Label shown in the ``=======`` marker. Defaults to
388 ``'theirs'``.
389
390 Returns:
391 A ``(merged_lines, has_conflict)`` tuple. *merged_lines* is the
392 fully merged sequence; *has_conflict* is ``True`` when at least one
393 conflict block was written.
394 """
395 regions = compute_regions(base, ours, theirs)
396 merged: list[str] = []
397 has_conflict = False
398
399 for region in regions:
400 if region.kind == "stable":
401 merged.extend(region.base_lines)
402
403 elif region.kind == "ours_only":
404 merged.extend(region.ours_lines)
405
406 elif region.kind == "theirs_only":
407 merged.extend(region.theirs_lines)
408
409 elif region.kind == "both_same":
410 merged.extend(region.ours_lines)
411
412 else: # conflict
413 has_conflict = True
414 ours_action = classify_action(region.base_lines, region.ours_lines)
415 theirs_action = classify_action(region.base_lines, region.theirs_lines)
416
417 merged.append(f"{_MARKER_OURS_PREFIX}{label_ours} [{ours_action}]\n")
418 merged.extend(region.ours_lines)
419 merged.append(f"||||||| {label_base}\n")
420 merged.extend(region.base_lines)
421 merged.append(f"{_MARKER_SEP_PREFIX}{label_theirs} [{theirs_action}]\n")
422 merged.extend(region.theirs_lines)
423 merged.append(f"{_MARKER_END}\n")
424
425 return merged, has_conflict
426
427 # ── Conflict-diff renderer (for muse diff --conflict) ────────────────────────
428
429 def format_conflict_diff(
430 path: str,
431 root: pathlib.Path,
432 base_manifest: dict[str, str],
433 ours_manifest: dict[str, str],
434 theirs_manifest: dict[str, str],
435 read_object_fn: Callable[[pathlib.Path, str], bytes | None],
436 *,
437 use_color: bool = False,
438 ours_label: str = "ours",
439 theirs_label: str = "theirs",
440 ) -> list[str]:
441 """Render a labeled two-sided diff for a single conflicting file.
442
443 Produces a Cohen-transform conflict view: two separate unified diffs
444 (``base→ours`` and ``base→theirs``), each with hunk headers annotated
445 with the action performed by that side. This replaces the opaque
446 ``<<<<<<< / ======= / >>>>>>>`` blob with a clear narrative of *what
447 each side did*.
448
449 Example output::
450
451 ══════════════════════════════════════════════════════
452 CONFLICT src/utils.py
453 ══════════════════════════════════════════════════════
454 [ours] what ours changed from base
455 --- base/src/utils.py
456 +++ ours/src/utils.py
457 @@ -12,4 +12,4 @@ [ours: deleted]
458 - def calculate(x):
459 - return x * 2
460 + def compute(x):
461 + return x * 3
462
463 [theirs] what theirs changed from base
464 --- base/src/utils.py
465 +++ theirs/src/utils.py
466 @@ -12,4 +12,4 @@ [theirs: modified]
467 def calculate(x):
468 - return x * 2
469 + return x * 4
470
471 Args:
472 path: Workspace-relative POSIX path of the conflicting file.
473 root: Repository root (used to read objects from the store).
474 base_manifest: Manifest of the merge-base commit.
475 ours_manifest: Manifest of our branch at merge time.
476 theirs_manifest: Manifest of their branch.
477 read_object_fn: Callable ``(root, object_id) → bytes | None``.
478 use_color: When ``True``, emit ANSI colour escapes.
479 ours_label: Human-readable label for the ours side (e.g. the
480 branch name).
481 theirs_label: Human-readable label for the theirs side.
482
483 Returns:
484 A list of text lines (each *without* a trailing newline) ready to
485 print. Returns an empty list when no diff exists for this path.
486 """
487 def _read_lines(manifest: _ManifestMap, fallback_disk: bool = False) -> list[str]:
488 oid = manifest.get(path)
489 if oid:
490 raw = read_object_fn(root, oid)
491 if raw is not None:
492 text = raw.decode("utf-8", errors="replace")
493 return text.splitlines(keepends=True)
494 if fallback_disk:
495 disk = root / path
496 if disk.is_file():
497 return disk.read_text(encoding="utf-8", errors="replace").splitlines(keepends=True)
498 return []
499
500 safe_path = path.replace("\x1b", "?") # sanitize ANSI injection
501 base_lines = _read_lines(base_manifest)
502 ours_lines = _read_lines(ours_manifest, fallback_disk=True)
503 theirs_lines = _read_lines(theirs_manifest)
504
505 # ── ANSI colour helpers ───────────────────────────────────────────────────
506 def _c(code: str, text: str) -> str:
507 return f"\x1b[{code}m{text}\x1b[0m" if use_color else text
508
509 def _bold(t: str) -> str:
510 return _c("1", t)
511
512 def _cyan(t: str) -> str:
513 return _c("36", t)
514
515 def _green(t: str) -> str:
516 return _c("32", t)
517
518 def _red(t: str) -> str:
519 return _c("31", t)
520
521 def _yellow(t: str) -> str:
522 return _c("33", t)
523
524 # ── Header ────────────────────────────────────────────────────────────────
525 output: list[str] = []
526 output.append(_bold(CONFLICT_SEPARATOR))
527 output.append(_bold(f"CONFLICT {safe_path}"))
528 output.append(_bold(CONFLICT_SEPARATOR))
529
530 # ── Ours diff (base → ours) ───────────────────────────────────────────────
531 ours_hunks = list(difflib.unified_diff(
532 base_lines, ours_lines,
533 fromfile=f"base/{safe_path}",
534 tofile=f"{ours_label}/{safe_path}",
535 lineterm="",
536 ))
537 annotated_ours = annotate_hunk_action(ours_hunks, ours_label)
538
539 output.append("")
540 output.append(_yellow(f"[{ours_label}] what {ours_label} changed from base"))
541 if annotated_ours:
542 for line in annotated_ours:
543 if line.startswith("---") or line.startswith("+++"):
544 output.append(_bold(line))
545 elif line.startswith("@@"):
546 output.append(_cyan(line))
547 elif line.startswith("+"):
548 output.append(_green(line))
549 elif line.startswith("-"):
550 output.append(_red(line))
551 else:
552 output.append(line)
553 else:
554 output.append(" (no changes from base on this side)")
555
556 # ── Theirs diff (base → theirs) ───────────────────────────────────────────
557 theirs_hunks = list(difflib.unified_diff(
558 base_lines, theirs_lines,
559 fromfile=f"base/{safe_path}",
560 tofile=f"{theirs_label}/{safe_path}",
561 lineterm="",
562 ))
563 annotated_theirs = annotate_hunk_action(theirs_hunks, theirs_label)
564
565 output.append("")
566 output.append(_yellow(f"[{theirs_label}] what {theirs_label} changed from base"))
567 if annotated_theirs:
568 for line in annotated_theirs:
569 if line.startswith("---") or line.startswith("+++"):
570 output.append(_bold(line))
571 elif line.startswith("@@"):
572 output.append(_cyan(line))
573 elif line.startswith("+"):
574 output.append(_green(line))
575 elif line.startswith("-"):
576 output.append(_red(line))
577 else:
578 output.append(line)
579 else:
580 output.append(" (no changes from base on this side)")
581
582 output.append("")
583 return output
File History 7 commits
sha256:18b983389ee1b55900fcd799bfbb496552d2e3ecded9d18cefbfef188947a12e chore: remove blob-debug test marker file Sonnet 4.6 19 hours ago
sha256:e452ad9a6ace6ccc6d875a35e06caf9da5576a970c1c36133b69a891ce5fefa8 chore: prebuild timing test Sonnet 4.6 8 days ago
sha256:0008ab6695e3e064b3e236b24fd19e538fef6a588eb0d211622f4466d919c0b1 merge: pull staging/dev — advance to 0.2.0rc12 Sonnet 4.6 patch 9 days ago
sha256:9c33d61749fff814c5226d5386aa2af7064c2c02788594a25fdd709358132eea fix: _PROPOSAL_PREFIX_RESOLVE_LIMIT 200 → 100 to match hub … Sonnet 4.6 21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 24 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 30 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 30 days ago