muse/core/cohen_transform.py · gabriel/muse

cohen_transform.py python

583 lines 22.6 KB

sha256:18b983389ee1b55900fcd799bfbb496552d2e3ecded9d18cefbfef188947a12e chore: remove blob-debug test marker file Sonnet 4.6 19 hours ago

1	"""cohen_transform.py — Three-way line-level merge with Manyana-style action labels.
2
3	Named in honour of Bram Cohen (creator of BitTorrent, inventor of the Manyana
4	CRDT weave algorithm), whose conflict-presentation insight is the direct
5	inspiration for this module.
6
7	The Cohen Transform observation
8	--------------------------------
9	Traditional VCS tools display a merge conflict as two opaque blobs — "ours"
10	and "theirs" — and leave the user to reconstruct what each side actually did.
11	Bram Cohen's Manyana project labels every conflict hunk with the action and the
12	side that performed it:
13
14	<<<<<<< ours [deleted]
15	def calculate(x):
16	a = x * 2
17	\|\|\|\|\|\|\| base
18	def calculate(x):
19	a = x * 2
20	b = a + 1
21	return b
22	======= theirs [inserted]
23	logger.debug(f"a={a}")
24	>>>>>>> end conflict
25
26	You can immediately see: ours deleted a function, theirs inserted a line into
27	the middle of it. That is a structurally different level of information from
28	two unlabelled blobs.
29
30	Public API
31	----------
32	three_way_merge_lines(base, ours, theirs, ...)
33	Core merge function. Returns (merged_lines, has_conflict).
34
35	MergeRegion
36	Dataclass representing one segment of the three-way analysis.
37
38	classify_action(base_lines, other_lines)
39	Classify a change as 'inserted', 'deleted', or 'modified'.
40
41	annotate_hunk_action(hunk_lines, side_label)
42	Post-process a unified-diff hunk list, rewriting @@ headers with
43	action labels for use in ``muse diff --conflict`` output.
44
45	format_conflict_diff(path, root, base_manifest, ours_manifest, theirs_manifest,
46	read_object_fn, *, use_color, ours_label, theirs_label)
47	Render a Manyana-style two-sided labeled diff for a single conflicting file.
48	Used by ``muse diff --conflict``.
49
50	References
51	----------
52	- Bram Cohen, Manyana: https://github.com/bramcohen/manyana
53	- Bram Cohen's blog: https://bramcohen.com/
54	"""
55
56	import difflib
57	import pathlib
58	from dataclasses import dataclass, field
59	from typing import Callable, Sequence
60
61	type _ManifestMap = dict[str, str]
62
63	# ── Public constants ──────────────────────────────────────────────────────────
64
65	#: Separator used between the two sides of a conflict-aware diff header.
66	CONFLICT_SEPARATOR = "═" * 54
67
68	#: Marker tokens — match git's diff3 style so editors recognise them, but
69	#: the action label suffix (e.g. ``[deleted]``) is the Cohen extension.
70	_MARKER_OURS_PREFIX = "<<<<<<< "
71	_MARKER_BASE = "\|\|\|\|\|\|\| base"
72	_MARKER_SEP_PREFIX = "======= "
73	_MARKER_END = ">>>>>>> end conflict"
74
75	# ── Data classes ──────────────────────────────────────────────────────────────
76
77	@dataclass
78	class MergeRegion:
79	"""One segment of a three-way merge analysis.
80
81	Attributes:
82	kind:
83	``'stable'`` — identical in all three versions; output base.
84	``'ours_only'`` — only ours changed from base; output ours.
85	``'theirs_only'`` — only theirs changed from base; output theirs.
86	``'both_same'`` — both sides made the same change; output either.
87	``'conflict'`` — both sides changed differently; needs markers.
88	base_lines: Lines from the common ancestor for this segment.
89	ours_lines: Lines from the ours version for this segment.
90	theirs_lines: Lines from the theirs version for this segment.
91	"""
92
93	kind: str
94	base_lines: list[str] = field(default_factory=list)
95	ours_lines: list[str] = field(default_factory=list)
96	theirs_lines: list[str] = field(default_factory=list)
97
98	# ── Core algorithm ────────────────────────────────────────────────────────────
99
100	def _find_sync_regions(
101	base: Sequence[str],
102	ours: Sequence[str],
103	theirs: Sequence[str],
104	) -> list[tuple[int, int, int, int, int, int]]:
105	"""Return maximal regions identical in all three sequences.
106
107	Each entry is ``(base_s, base_e, ours_s, ours_e, theirs_s, theirs_e)``
108	representing a contiguous block where
109	``base[base_s:base_e] == ours[ours_s:ours_e] == theirs[theirs_s:theirs_e]``.
110
111	The algorithm finds base positions matched in BOTH ``base→ours`` and
112	``base→theirs`` LCS runs, then groups them into maximal consecutive
113	triples (consecutive in base, ours, and theirs simultaneously).
114	"""
115	sm_a = difflib.SequenceMatcher(None, base, ours, autojunk=False)
116	sm_b = difflib.SequenceMatcher(None, base, theirs, autojunk=False)
117
118	a_map: dict[int, int] = {} # base_pos → ours_pos
119	b_map: dict[int, int] = {} # base_pos → theirs_pos
120
121	for bi, ai, n in sm_a.get_matching_blocks():
122	for k in range(n):
123	a_map[bi + k] = ai + k
124
125	for bi, ti, n in sm_b.get_matching_blocks():
126	for k in range(n):
127	b_map[bi + k] = ti + k
128
129	stable = sorted(set(a_map) & set(b_map))
130	if not stable:
131	return []
132
133	sync_regions: list[tuple[int, int, int, int, int, int]] = []
134	run_bp = run_ap = run_tp = -1
135	# Use -2 so that the first stable element (bp=0) does not falsely satisfy
136	# ``bp == prev_bp + 1`` (which would be ``0 == -1 + 1 == 0``) and skip
137	# initialising the run variables, causing the entire sync region to be lost.
138	prev_bp = prev_ap = prev_tp = -2
139
140	for bp in stable:
141	ap = a_map[bp]
142	tp = b_map[bp]
143
144	if bp == prev_bp + 1 and ap == prev_ap + 1 and tp == prev_tp + 1:
145	# Extend current run.
146	prev_bp, prev_ap, prev_tp = bp, ap, tp
147	else:
148	# Flush previous run (if any) and start a new one.
149	if run_bp >= 0:
150	sync_regions.append((
151	run_bp, prev_bp + 1,
152	run_ap, prev_ap + 1,
153	run_tp, prev_tp + 1,
154	))
155	run_bp, run_ap, run_tp = bp, ap, tp
156	prev_bp, prev_ap, prev_tp = bp, ap, tp
157
158	# Flush final run.
159	if run_bp >= 0:
160	sync_regions.append((
161	run_bp, prev_bp + 1,
162	run_ap, prev_ap + 1,
163	run_tp, prev_tp + 1,
164	))
165
166	return sync_regions
167
168	def compute_regions(
169	base: Sequence[str],
170	ours: Sequence[str],
171	theirs: Sequence[str],
172	) -> list[MergeRegion]:
173	"""Decompose three sequences into a list of :class:`MergeRegion` objects.
174
175	Uses the sync-region algorithm: finds maximal blocks identical in all
176	three sequences (the "stable skeleton"), then classifies each gap between
177	stable blocks as one of:
178
179	- ``ours_only`` — only ours changed from base.
180	- ``theirs_only`` — only theirs changed from base.
181	- ``both_same`` — both changed to the same content (clean).
182	- ``conflict`` — both changed to different content.
183
184	Handles pure insertions (both sides insert different content at the same
185	base position) as conflicts with an empty base section.
186
187	Args:
188	base: Common ancestor lines (each ending with ``\\n`` or empty).
189	ours: Working-tree / our-branch lines.
190	theirs: Target-branch / their-branch lines.
191
192	Returns:
193	Ordered list of :class:`MergeRegion` objects covering the entire
194	three-way merge.
195	"""
196	base = list(base)
197	ours = list(ours)
198	theirs = list(theirs)
199
200	sync_regions = _find_sync_regions(base, ours, theirs)
201
202	# Sentinel at each end so the loop below handles the leading and trailing
203	# unstable regions uniformly without special-casing.
204	sentinels: list[tuple[int, int, int, int, int, int]] = [
205	(0, 0, 0, 0, 0, 0),
206	*sync_regions,
207	(len(base), len(base), len(ours), len(ours), len(theirs), len(theirs)),
208	]
209
210	regions: list[MergeRegion] = []
211
212	for idx in range(len(sentinels) - 1):
213	cur = sentinels[idx]
214	nxt = sentinels[idx + 1]
215
216	# ── Stable block (the sync region itself) ─────────────────────────
217	bs, be, as_, ae, ts, te = cur
218	if idx > 0 and be > bs:
219	regions.append(MergeRegion(
220	kind="stable",
221	base_lines=list(base[bs:be]),
222	ours_lines=list(ours[as_:ae]),
223	theirs_lines=list(theirs[ts:te]),
224	))
225
226	# ── Unstable gap between this sync region and the next ────────────
227	nbs, nbe, nas, nae, nts, nte = nxt
228	b_chunk = list(base[be:nbs])
229	a_chunk = list(ours[ae:nas])
230	t_chunk = list(theirs[te:nts])
231
232	if not b_chunk and not a_chunk and not t_chunk:
233	continue
234
235	if a_chunk == b_chunk and t_chunk == b_chunk:
236	# All three identical — degenerate stable region (shouldn't
237	# normally occur given sync detection, but guard it).
238	regions.append(MergeRegion("stable", b_chunk, a_chunk, t_chunk))
239	elif a_chunk == b_chunk:
240	# Only theirs changed.
241	regions.append(MergeRegion("theirs_only", b_chunk, b_chunk, t_chunk))
242	elif t_chunk == b_chunk:
243	# Only ours changed.
244	regions.append(MergeRegion("ours_only", b_chunk, a_chunk, b_chunk))
245	elif a_chunk == t_chunk:
246	# Both sides made the same change — clean (take either).
247	regions.append(MergeRegion("both_same", b_chunk, a_chunk, t_chunk))
248	else:
249	# True conflict — both sides changed differently.
250	regions.append(MergeRegion("conflict", b_chunk, a_chunk, t_chunk))
251
252	return regions
253
254	# ── Action classification ─────────────────────────────────────────────────────
255
256	def classify_action(base_lines: list[str], other_lines: list[str]) -> str:
257	"""Classify the action performed from base_lines to other_lines.
258
259	Returns one of:
260	- ``'inserted'`` — other_lines is non-empty, base_lines is empty.
261	- ``'deleted'`` — base_lines is non-empty, other_lines is empty.
262	- ``'modified'`` — both are non-empty but differ.
263
264	Used to annotate conflict markers with the Cohen-transform action label,
265	e.g. ``<<<<<<< ours [deleted]``.
266	"""
267	if not base_lines and other_lines:
268	return "inserted"
269	if base_lines and not other_lines:
270	return "deleted"
271	return "modified"
272
273	def annotate_hunk_action(hunk_lines: list[str], side_label: str) -> list[str]:
274	"""Rewrite ``@@`` hunk headers with a Manyana-style action annotation.
275
276	Scans the ``+``/``-`` lines in each hunk to classify the dominant action
277	on side_label (``'ours'`` or ``'theirs'``), then rewrites each ``@@``
278	header line from::
279
280	@@ -12,4 +12,4 @@
281
282	to::
283
284	@@ -12,4 +12,4 @@ [ours: deleted]
285	@@ -12,4 +12,4 @@ [theirs: inserted]
286	@@ -12,4 +12,4 @@ [ours: modified]
287
288	This is the direct translation of Bram Cohen's ``begin deleted left`` /
289	``begin added right`` labeling into unified-diff format.
290
291	Args:
292	hunk_lines: Lines produced by :func:`difflib.unified_diff`, including
293	the ``---``/``+++`` header and all ``@@`` blocks.
294	side_label: Human-readable side identifier, e.g. ``'ours'`` or
295	``'theirs'``.
296
297	Returns:
298	Annotated copy of hunk_lines with ``@@`` lines rewritten.
299	"""
300	result: list[str] = []
301	# Collect lines in the current @@ block to classify when we see the next @@.
302	current_hunk_body: list[str] = []
303	pending_at: str \| None = None # the @@ line waiting to be annotated
304
305	def _flush(body: list[str]) -> str:
306	"""Classify and return the annotated @@ line."""
307	assert pending_at is not None
308	adds = sum(1 for ln in body if ln.startswith("+") and not ln.startswith("+++"))
309	dels = sum(1 for ln in body if ln.startswith("-") and not ln.startswith("---"))
310	if adds > 0 and dels == 0:
311	action = "inserted"
312	elif dels > 0 and adds == 0:
313	action = "deleted"
314	else:
315	action = "modified"
316	base_at = pending_at.rstrip()
317	# Remove any trailing existing annotation before appending a new one.
318	if " [" in base_at:
319	base_at = base_at[: base_at.rfind(" [")]
320	return f"{base_at} [{side_label}: {action}]"
321
322	for line in hunk_lines:
323	if line.startswith("@@"):
324	if pending_at is not None:
325	result.append(_flush(current_hunk_body))
326	result.extend(current_hunk_body)
327	current_hunk_body = []
328	pending_at = line
329	elif pending_at is not None:
330	current_hunk_body.append(line)
331	else:
332	result.append(line)
333
334	# Flush the final hunk.
335	if pending_at is not None:
336	result.append(_flush(current_hunk_body))
337	result.extend(current_hunk_body)
338
339	return result
340
341	# ── High-level merge function ─────────────────────────────────────────────────
342
343	def three_way_merge_lines(
344	base: Sequence[str],
345	ours: Sequence[str],
346	theirs: Sequence[str],
347	*,
348	label_ours: str = "ours",
349	label_base: str = "base",
350	label_theirs: str = "theirs",
351	) -> tuple[list[str], bool]:
352	"""Three-way line merge with Manyana-style labeled conflict markers.
353
354	Implements the Cohen Transform: conflict markers include an action label
355	(``[inserted]``, ``[deleted]``, or ``[modified]``) so the reader
356	immediately sees what each side did, not just that they conflicted.
357
358	Marker format (diff3 style with Cohen extensions)::
359
360	<<<<<<< ours [deleted]
361	[lines from ours — what ours changed base to]
362	\|\|\|\|\|\|\| base
363	[lines from common ancestor — the original context]
364	======= theirs [inserted]
365	[lines from theirs — what theirs changed base to]
366	>>>>>>> end conflict
367
368	The ``\|\|\|\|\|\|\| base`` section is always included (diff3 style) because it
369	gives both humans and agents the context needed to understand why each
370	side made its change.
371
372	Clean merge rules:
373	- Stable (no change on either side): output base unchanged.
374	- Ours-only change: output ours version.
375	- Theirs-only change: output theirs version.
376	- Both changed to the same content: output that content once.
377	- Both changed differently: emit conflict markers.
378
379	Args:
380	base: Lines from the common ancestor.
381	ours: Lines from our version (working tree or our branch).
382	theirs: Lines from their version (target branch).
383	label_ours: Label shown in the ``<<<<<<<`` marker. Defaults to
384	``'ours'``.
385	label_base: Label shown in the ``\|\|\|\|\|\|\|`` marker. Defaults to
386	``'base'``.
387	label_theirs: Label shown in the ``=======`` marker. Defaults to
388	``'theirs'``.
389
390	Returns:
391	A ``(merged_lines, has_conflict)`` tuple. merged_lines is the
392	fully merged sequence; has_conflict is ``True`` when at least one
393	conflict block was written.
394	"""
395	regions = compute_regions(base, ours, theirs)
396	merged: list[str] = []
397	has_conflict = False
398
399	for region in regions:
400	if region.kind == "stable":
401	merged.extend(region.base_lines)
402
403	elif region.kind == "ours_only":
404	merged.extend(region.ours_lines)
405
406	elif region.kind == "theirs_only":
407	merged.extend(region.theirs_lines)
408
409	elif region.kind == "both_same":
410	merged.extend(region.ours_lines)
411
412	else: # conflict
413	has_conflict = True
414	ours_action = classify_action(region.base_lines, region.ours_lines)
415	theirs_action = classify_action(region.base_lines, region.theirs_lines)
416
417	merged.append(f"{_MARKER_OURS_PREFIX}{label_ours} [{ours_action}]\n")
418	merged.extend(region.ours_lines)
419	merged.append(f"\|\|\|\|\|\|\| {label_base}\n")
420	merged.extend(region.base_lines)
421	merged.append(f"{_MARKER_SEP_PREFIX}{label_theirs} [{theirs_action}]\n")
422	merged.extend(region.theirs_lines)
423	merged.append(f"{_MARKER_END}\n")
424
425	return merged, has_conflict
426
427	# ── Conflict-diff renderer (for muse diff --conflict) ────────────────────────
428
429	def format_conflict_diff(
430	path: str,
431	root: pathlib.Path,
432	base_manifest: dict[str, str],
433	ours_manifest: dict[str, str],
434	theirs_manifest: dict[str, str],
435	read_object_fn: Callable[[pathlib.Path, str], bytes \| None],
436	*,
437	use_color: bool = False,
438	ours_label: str = "ours",
439	theirs_label: str = "theirs",
440	) -> list[str]:
441	"""Render a labeled two-sided diff for a single conflicting file.
442
443	Produces a Cohen-transform conflict view: two separate unified diffs
444	(``base→ours`` and ``base→theirs``), each with hunk headers annotated
445	with the action performed by that side. This replaces the opaque
446	``<<<<<<< / ======= / >>>>>>>`` blob with a clear narrative of *what
447	each side did*.
448
449	Example output::
450
451	══════════════════════════════════════════════════════
452	CONFLICT src/utils.py
453	══════════════════════════════════════════════════════
454	[ours] what ours changed from base
455	--- base/src/utils.py
456	+++ ours/src/utils.py
457	@@ -12,4 +12,4 @@ [ours: deleted]
458	- def calculate(x):
459	- return x * 2
460	+ def compute(x):
461	+ return x * 3
462
463	[theirs] what theirs changed from base
464	--- base/src/utils.py
465	+++ theirs/src/utils.py
466	@@ -12,4 +12,4 @@ [theirs: modified]
467	def calculate(x):
468	- return x * 2
469	+ return x * 4
470
471	Args:
472	path: Workspace-relative POSIX path of the conflicting file.
473	root: Repository root (used to read objects from the store).
474	base_manifest: Manifest of the merge-base commit.
475	ours_manifest: Manifest of our branch at merge time.
476	theirs_manifest: Manifest of their branch.
477	read_object_fn: Callable ``(root, object_id) → bytes \| None``.
478	use_color: When ``True``, emit ANSI colour escapes.
479	ours_label: Human-readable label for the ours side (e.g. the
480	branch name).
481	theirs_label: Human-readable label for the theirs side.
482
483	Returns:
484	A list of text lines (each without a trailing newline) ready to
485	print. Returns an empty list when no diff exists for this path.
486	"""
487	def _read_lines(manifest: _ManifestMap, fallback_disk: bool = False) -> list[str]:
488	oid = manifest.get(path)
489	if oid:
490	raw = read_object_fn(root, oid)
491	if raw is not None:
492	text = raw.decode("utf-8", errors="replace")
493	return text.splitlines(keepends=True)
494	if fallback_disk:
495	disk = root / path
496	if disk.is_file():
497	return disk.read_text(encoding="utf-8", errors="replace").splitlines(keepends=True)
498	return []
499
500	safe_path = path.replace("\x1b", "?") # sanitize ANSI injection
501	base_lines = _read_lines(base_manifest)
502	ours_lines = _read_lines(ours_manifest, fallback_disk=True)
503	theirs_lines = _read_lines(theirs_manifest)
504
505	# ── ANSI colour helpers ───────────────────────────────────────────────────
506	def _c(code: str, text: str) -> str:
507	return f"\x1b[{code}m{text}\x1b[0m" if use_color else text
508
509	def _bold(t: str) -> str:
510	return _c("1", t)
511
512	def _cyan(t: str) -> str:
513	return _c("36", t)
514
515	def _green(t: str) -> str:
516	return _c("32", t)
517
518	def _red(t: str) -> str:
519	return _c("31", t)
520
521	def _yellow(t: str) -> str:
522	return _c("33", t)
523
524	# ── Header ────────────────────────────────────────────────────────────────
525	output: list[str] = []
526	output.append(_bold(CONFLICT_SEPARATOR))
527	output.append(_bold(f"CONFLICT {safe_path}"))
528	output.append(_bold(CONFLICT_SEPARATOR))
529
530	# ── Ours diff (base → ours) ───────────────────────────────────────────────
531	ours_hunks = list(difflib.unified_diff(
532	base_lines, ours_lines,
533	fromfile=f"base/{safe_path}",
534	tofile=f"{ours_label}/{safe_path}",
535	lineterm="",
536	))
537	annotated_ours = annotate_hunk_action(ours_hunks, ours_label)
538
539	output.append("")
540	output.append(_yellow(f"[{ours_label}] what {ours_label} changed from base"))
541	if annotated_ours:
542	for line in annotated_ours:
543	if line.startswith("---") or line.startswith("+++"):
544	output.append(_bold(line))
545	elif line.startswith("@@"):
546	output.append(_cyan(line))
547	elif line.startswith("+"):
548	output.append(_green(line))
549	elif line.startswith("-"):
550	output.append(_red(line))
551	else:
552	output.append(line)
553	else:
554	output.append(" (no changes from base on this side)")
555
556	# ── Theirs diff (base → theirs) ───────────────────────────────────────────
557	theirs_hunks = list(difflib.unified_diff(
558	base_lines, theirs_lines,
559	fromfile=f"base/{safe_path}",
560	tofile=f"{theirs_label}/{safe_path}",
561	lineterm="",
562	))
563	annotated_theirs = annotate_hunk_action(theirs_hunks, theirs_label)
564
565	output.append("")
566	output.append(_yellow(f"[{theirs_label}] what {theirs_label} changed from base"))
567	if annotated_theirs:
568	for line in annotated_theirs:
569	if line.startswith("---") or line.startswith("+++"):
570	output.append(_bold(line))
571	elif line.startswith("@@"):
572	output.append(_cyan(line))
573	elif line.startswith("+"):
574	output.append(_green(line))
575	elif line.startswith("-"):
576	output.append(_red(line))
577	else:
578	output.append(line)
579	else:
580	output.append(" (no changes from base on this side)")
581
582	output.append("")
583	return output

File History 7 commits

sha256:18b983389ee1b55900fcd799bfbb496552d2e3ecded9d18cefbfef188947a12e chore: remove blob-debug test marker file Sonnet 4.6 19 hours ago

sha256:e452ad9a6ace6ccc6d875a35e06caf9da5576a970c1c36133b69a891ce5fefa8 chore: prebuild timing test Sonnet 4.6 8 days ago

sha256:0008ab6695e3e064b3e236b24fd19e538fef6a588eb0d211622f4466d919c0b1 merge: pull staging/dev — advance to 0.2.0rc12 Sonnet 4.6 patch 9 days ago

sha256:9c33d61749fff814c5226d5386aa2af7064c2c02788594a25fdd709358132eea fix: _PROPOSAL_PREFIX_RESOLVE_LIMIT 200 → 100 to match hub … Sonnet 4.6 21 days ago

sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 24 days ago

sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 30 days ago

sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor ⚠ 30 days ago

class MergeRegion

function _find_sync_regions

function compute_regions

function classify_action

function annotate_hunk_action

function _flush

function three_way_merge_lines

function format_conflict_diff

function _read_lines

function _c

function _bold

function _cyan

function _green

function _red

function _yellow

Pathmuse/core/cohen_transform.py

Lines583

Size22.6 KB

LangPython

Refsha256:18b983389ee1b55900fcd799bfbb496552d2e3ecded9d18cefbfef188947a12e

Object ID

sha256:3eccb16e905483506772d1c3d36abadbc5bfb8e6c05bd62f3d73d8c47be5e08c…

Last commit

sha256:18b983389ee1b55900fcd799bfbb496552d2e3ecded9d18cefbfef188947a12e

chore: remove blob-debug test marker file

19 hours ago

Quick links

Blame History