gabriel / muse public
rebase.py python
414 lines 14.5 KB
Raw
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf chore: bump version to 0.2.0rc14 Sonnet 4.6 patch 15 hours ago
1 """Rebase engine for ``muse rebase``.
2
3 A Muse rebase replays a sequence of commits onto a new base. Because commits
4 are content-addressed, replaying a commit produces a *new* commit with a new
5 ID — the original commits are untouched in the store.
6
7 Algorithm
8 ---------
9 Given::
10
11 A ─── B ─── C ─── D (current branch HEAD = D)
12 \\
13 E ─── F (upstream = F)
14
15 After ``muse rebase F`` (or ``muse rebase --onto F A`` where A is the merge
16 base)::
17
18 E ─── F ─── B' ─── C' ─── D' (current branch HEAD = D')
19
20 Each replayed commit ``X'`` is produced by:
21
22 1. Taking the delta between ``X`` and its parent ``X-1`` (what changed).
23 2. Applying that delta on top of the current tip via the domain plugin's
24 three-way merge (same logic as cherry-pick).
25 3. Writing a new ``CommitRecord`` with the new parent pointer.
26
27 State
28 -----
29 When a conflict occurs mid-replay, the rebase pauses and writes
30 ``.muse/REBASE_STATE.json``. The user resolves the conflict and runs
31 ``muse rebase --continue`` to resume, or ``muse rebase --abort`` to undo.
32
33 Squash mode
34 -----------
35 When ``squash=True``, all commits are replayed without writing intermediate
36 commits — only the final merged state is committed. Squash mode does not
37 support ``--continue`` because no incremental state is tracked.
38
39 Security model
40 --------------
41 - ``REBASE_STATE.json`` is checked for symlinks before any read or write to
42 prevent path-traversal via a planted symlink.
43 - File size is capped at ``_MAX_STATE_BYTES`` before parsing to prevent OOM
44 via a crafted or corrupt state file.
45 """
46
47 import datetime
48 import json
49 import logging
50 import pathlib
51 from typing import TypedDict
52
53 from muse.core.ids import hash_commit, hash_snapshot
54 from muse.core.snapshot import directories_from_manifest
55 from muse.core.io import write_text_atomic
56 from muse.core.refs import (
57 RefConflictError,
58 write_branch_ref,
59 )
60 from muse.core.commits import (
61 CommitRecord,
62 read_commit,
63 write_commit,
64 )
65 from muse.core.snapshots import (
66 SnapshotRecord,
67 read_snapshot,
68 write_snapshot,
69 )
70 from muse.core.validation import validate_branch_name
71 from muse.core.workdir import apply_manifest
72 from muse.domain import MergeResult, MuseDomainPlugin, SnapshotManifest
73 from muse.core.types import Manifest, load_json_file
74 from muse.core.paths import rebase_state_path
75
76 logger = logging.getLogger(__name__)
77
78
79 # 4 MiB — a state file with 10 000 commit IDs at ~64 bytes each is ~640 KiB.
80 _MAX_STATE_BYTES = 4 * 1024 * 1024
81
82 # ---------------------------------------------------------------------------
83 # State TypedDict
84 # ---------------------------------------------------------------------------
85
86 class RebaseState(TypedDict):
87 """Serialisable state for an in-progress rebase session."""
88
89 original_branch: str
90 original_head: str
91 onto: str
92 remaining: list[str]
93 completed: list[str]
94 squash: bool
95
96 class RebaseProgress(TypedDict):
97 """Snapshot of rebase progress for ``--status`` output.
98
99 Attributes:
100 active: Whether a rebase is in progress.
101 original_branch: Branch being rebased (empty string if not active).
102 original_head: HEAD before the rebase started.
103 onto: Target commit ID.
104 total: Total commits to replay.
105 done: Commits already replayed.
106 remaining: Commits yet to replay.
107 squash: Whether this is a squash rebase.
108 """
109
110 active: bool
111 original_branch: str
112 original_head: str
113 onto: str
114 total: int
115 done: int
116 remaining: int
117 squash: bool
118
119 # ---------------------------------------------------------------------------
120 # State file I/O
121 # ---------------------------------------------------------------------------
122
123 def load_rebase_state(root: pathlib.Path) -> RebaseState | None:
124 """Return the current rebase state, or ``None`` if none is active.
125
126 Security guards applied before any read:
127
128 - Symlink check: a symlink at the state path could redirect reads to
129 sensitive files outside the repo or writes to unintended locations.
130 - Size cap (``_MAX_STATE_BYTES``): a tampered or corrupt state file cannot
131 exhaust memory.
132 """
133 path = rebase_state_path(root)
134 if not path.exists():
135 return None
136 if path.is_symlink():
137 logger.warning(
138 "⚠️ REBASE_STATE.json is a symlink — ignoring to prevent path traversal"
139 )
140 return None
141 try:
142 size = path.stat().st_size
143 except OSError:
144 return None
145 if size > _MAX_STATE_BYTES:
146 logger.warning(
147 "⚠️ REBASE_STATE.json is %.1f MiB — exceeds cap of %d MiB; ignoring",
148 size / (1024 * 1024),
149 _MAX_STATE_BYTES // (1024 * 1024),
150 )
151 return None
152 raw = load_json_file(path)
153 if raw is None:
154 return None
155 if not isinstance(raw, dict):
156 return None
157 remaining = raw.get("remaining")
158 completed = raw.get("completed")
159 if not isinstance(remaining, list) or not isinstance(completed, list):
160 return None
161 return RebaseState(
162 original_branch=str(raw.get("original_branch", "")),
163 original_head=str(raw.get("original_head", "")),
164 onto=str(raw.get("onto", "")),
165 remaining=[str(x) for x in remaining if isinstance(x, str)],
166 completed=[str(x) for x in completed if isinstance(x, str)],
167 squash=bool(raw.get("squash", False)),
168 )
169
170 def save_rebase_state(root: pathlib.Path, state: RebaseState) -> None:
171 """Write rebase state to ``.muse/REBASE_STATE.json`` atomically.
172
173 Raises ``OSError`` if the target path is a symlink (would redirect the
174 write to an unintended location).
175 """
176 path = rebase_state_path(root)
177 if path.exists() and path.is_symlink():
178 raise OSError(
179 f"Refusing to write rebase state — {path} is a symlink"
180 )
181 write_text_atomic(path, json.dumps(dict(state), indent=2))
182
183 def clear_rebase_state(root: pathlib.Path) -> None:
184 """Remove ``.muse/REBASE_STATE.json``.
185
186 No-op if the state file does not exist. Refuses to unlink a symlink
187 to prevent unintentional deletion of a file outside the repo.
188 """
189 path = rebase_state_path(root)
190 if not path.exists():
191 return
192 if path.is_symlink():
193 logger.warning(
194 "⚠️ REBASE_STATE.json is a symlink — refusing to unlink"
195 )
196 return
197 path.unlink()
198 logger.debug("✅ Cleared REBASE_STATE.json")
199
200 def get_rebase_progress(root: pathlib.Path) -> RebaseProgress:
201 """Return a :class:`RebaseProgress` describing the current rebase state.
202
203 Always returns a valid ``RebaseProgress`` — ``active=False`` when no
204 rebase is in progress.
205
206 Used by ``muse rebase --status`` to give agents and humans a structured
207 view of an ongoing rebase without re-reading the raw state file.
208 """
209 state = load_rebase_state(root)
210 if state is None:
211 return RebaseProgress(
212 active=False,
213 original_branch="",
214 original_head="",
215 onto="",
216 total=0,
217 done=0,
218 remaining=0,
219 squash=False,
220 )
221 total = len(state["completed"]) + len(state["remaining"])
222 return RebaseProgress(
223 active=True,
224 original_branch=state["original_branch"],
225 original_head=state["original_head"],
226 onto=state["onto"],
227 total=total,
228 done=len(state["completed"]),
229 remaining=len(state["remaining"]),
230 squash=state["squash"],
231 )
232
233 # ---------------------------------------------------------------------------
234 # Commit collection
235 # ---------------------------------------------------------------------------
236
237 def collect_commits_to_replay(
238 root: pathlib.Path,
239 stop_at: str,
240 tip: str,
241 max_commits: int = 10_000,
242 ) -> list[CommitRecord]:
243 """Return commits from *tip* back to (but not including) *stop_at*.
244
245 The result is in chronological order (oldest first) so the replay loop
246 can iterate forward.
247
248 Only the first-parent chain is walked. Merge commits are replayed as
249 a single commit (their second-parent history is not re-played).
250
251 Args:
252 root: Repository root.
253 stop_at: Commit ID to stop at (exclusive — the merge base).
254 tip: Starting commit ID (the current branch HEAD).
255 max_commits: Safety cap on the number of commits returned. Prevents
256 unbounded traversal on very long histories.
257
258 Returns:
259 List of ``CommitRecord`` objects, oldest first (ready to replay).
260 """
261 commits: list[CommitRecord] = []
262 seen: set[str] = set()
263 current: str | None = tip
264
265 while current and current not in seen and len(commits) < max_commits:
266 seen.add(current)
267 if current == stop_at:
268 break
269 commit = read_commit(root, current)
270 if commit is None:
271 break
272 commits.append(commit)
273 current = commit.parent_commit_id
274
275 # Reverse so oldest is first.
276 commits.reverse()
277 return commits
278
279 # ---------------------------------------------------------------------------
280 # Single-commit replay
281 # ---------------------------------------------------------------------------
282
283 def replay_one(
284 root: pathlib.Path,
285 commit: CommitRecord,
286 parent_id: str,
287 plugin: MuseDomainPlugin,
288 domain: str,
289 branch: str,
290 ) -> CommitRecord | list[str]:
291 """Replay *commit* on top of *parent_id* using the domain plugin.
292
293 Performs a three-way merge where:
294
295 - ``base`` = commit's original parent snapshot (what existed before)
296 - ``ours`` = the current rebased tip snapshot (what we've built so far)
297 - ``theirs`` = commit's snapshot (what we want to apply)
298
299 When the merge is clean, writes the new commit and snapshot and returns
300 the new ``CommitRecord``. When conflicts exist, returns the list of
301 conflicting paths — the caller is responsible for writing
302 ``MERGE_STATE.json`` and stopping the rebase.
303
304 Args:
305 root: Repository root.
306 commit: The original commit being replayed.
307 parent_id: The new parent commit ID (last replayed commit or onto base).
308 plugin: The active domain plugin instance.
309 domain: Domain name string.
310 branch: Current branch name.
311
312 Returns:
313 New ``CommitRecord`` on clean merge; ``list[str]`` of conflict paths
314 on conflict.
315
316 Raises:
317 TypeError: If *plugin* is not a ``MuseDomainPlugin``.
318 """
319 if not isinstance(plugin, MuseDomainPlugin):
320 raise TypeError(
321 f"replay_one: plugin {type(plugin).__name__!r} is not a MuseDomainPlugin"
322 )
323
324 # Resolve original parent snapshot (the "base" for the merge).
325 base_manifest: Manifest = {}
326 if commit.parent_commit_id:
327 parent_commit = read_commit(root, commit.parent_commit_id)
328 if parent_commit:
329 parent_snap = read_snapshot(root, parent_commit.snapshot_id)
330 if parent_snap:
331 base_manifest = parent_snap.manifest
332
333 # "Theirs" = the original commit's snapshot.
334 theirs_snap = read_snapshot(root, commit.snapshot_id)
335 if theirs_snap is None:
336 # A missing snapshot means we cannot reconstruct the commit's content.
337 # Falling back to {} would silently delete all files the commit added or
338 # modified — producing a wrong rebased history. Raise so the caller
339 # surfaces a clear error rather than silently corrupting the rebase.
340 raise ValueError(
341 f"rebase: snapshot {commit.snapshot_id} for commit "
342 f"{commit.commit_id} ({commit.message!r}) is missing or corrupt — "
343 "cannot replay this commit. Run `muse verify-pack` to audit the store."
344 )
345 theirs_manifest = theirs_snap.manifest
346
347 # "Ours" = the current rebased tip.
348 ours_manifest: Manifest = {}
349 if parent_id:
350 parent_rec = read_commit(root, parent_id)
351 if parent_rec:
352 ours_snap = read_snapshot(root, parent_rec.snapshot_id)
353 if ours_snap:
354 ours_manifest = ours_snap.manifest
355
356 base_snap_obj = SnapshotManifest(files=base_manifest, domain=domain, directories=directories_from_manifest(base_manifest))
357 ours_snap_obj = SnapshotManifest(files=ours_manifest, domain=domain, directories=directories_from_manifest(ours_manifest))
358 theirs_snap_obj = SnapshotManifest(files=theirs_manifest, domain=domain, directories=directories_from_manifest(theirs_manifest))
359
360 result: MergeResult = plugin.merge(
361 base_snap_obj, ours_snap_obj, theirs_snap_obj, repo_root=root
362 )
363
364 if not result.is_clean:
365 return result.conflicts
366
367 merged_manifest = result.merged["files"]
368
369 # Apply the merged state to the working tree.
370 apply_manifest(root, ours_manifest, merged_manifest)
371
372 merged_dirs = directories_from_manifest(merged_manifest)
373 snapshot_id = hash_snapshot(merged_manifest, merged_dirs)
374 committed_at = datetime.datetime.now(datetime.timezone.utc)
375 new_commit_id = hash_commit(
376 parent_ids=[parent_id] if parent_id else [],
377 snapshot_id=snapshot_id,
378 message=commit.message,
379 committed_at_iso=committed_at.isoformat(),
380 author=commit.author or "",
381 )
382
383 write_snapshot(root, SnapshotRecord(snapshot_id=snapshot_id, manifest=merged_manifest, directories=merged_dirs))
384 new_commit = CommitRecord(
385 commit_id=new_commit_id,
386 branch=branch,
387 snapshot_id=snapshot_id,
388 message=commit.message,
389 committed_at=committed_at,
390 parent_commit_id=parent_id if parent_id else None,
391 author=commit.author,
392 agent_id=commit.agent_id,
393 model_id=commit.model_id,
394 )
395 write_commit(root, new_commit)
396 return new_commit
397
398 def _write_branch_ref(root: pathlib.Path, branch: str, commit_id: str, **kwargs: str) -> None:
399 """Write *commit_id* to the branch ref file atomically and durably.
400
401 Accepts ``expected_id=<str>`` for compare-and-swap semantics: the write
402 only proceeds if the current ref equals *expected_id*, raising
403 ``RefConflictError`` when another agent advanced the branch concurrently.
404 Callers should pass the pre-operation HEAD as *expected_id* so concurrent
405 advances are detected rather than silently orphaned.
406
407 Omit *expected_id* for an unconditional write (e.g. abort restoring the
408 original branch pointer).
409
410 Raises:
411 RefConflictError: When *expected_id* is provided and the current ref
412 does not match — another writer advanced the branch.
413 """
414 write_branch_ref(root, branch, commit_id, **kwargs)
File History 1 commit
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf chore: bump version to 0.2.0rc14 Sonnet 4.6 patch 15 hours ago