rebase.py
python
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf
chore: bump version to 0.2.0rc14
Sonnet 4.6
patch
15 hours ago
| 1 | """Rebase engine for ``muse rebase``. |
| 2 | |
| 3 | A Muse rebase replays a sequence of commits onto a new base. Because commits |
| 4 | are content-addressed, replaying a commit produces a *new* commit with a new |
| 5 | ID — the original commits are untouched in the store. |
| 6 | |
| 7 | Algorithm |
| 8 | --------- |
| 9 | Given:: |
| 10 | |
| 11 | A ─── B ─── C ─── D (current branch HEAD = D) |
| 12 | \\ |
| 13 | E ─── F (upstream = F) |
| 14 | |
| 15 | After ``muse rebase F`` (or ``muse rebase --onto F A`` where A is the merge |
| 16 | base):: |
| 17 | |
| 18 | E ─── F ─── B' ─── C' ─── D' (current branch HEAD = D') |
| 19 | |
| 20 | Each replayed commit ``X'`` is produced by: |
| 21 | |
| 22 | 1. Taking the delta between ``X`` and its parent ``X-1`` (what changed). |
| 23 | 2. Applying that delta on top of the current tip via the domain plugin's |
| 24 | three-way merge (same logic as cherry-pick). |
| 25 | 3. Writing a new ``CommitRecord`` with the new parent pointer. |
| 26 | |
| 27 | State |
| 28 | ----- |
| 29 | When a conflict occurs mid-replay, the rebase pauses and writes |
| 30 | ``.muse/REBASE_STATE.json``. The user resolves the conflict and runs |
| 31 | ``muse rebase --continue`` to resume, or ``muse rebase --abort`` to undo. |
| 32 | |
| 33 | Squash mode |
| 34 | ----------- |
| 35 | When ``squash=True``, all commits are replayed without writing intermediate |
| 36 | commits — only the final merged state is committed. Squash mode does not |
| 37 | support ``--continue`` because no incremental state is tracked. |
| 38 | |
| 39 | Security model |
| 40 | -------------- |
| 41 | - ``REBASE_STATE.json`` is checked for symlinks before any read or write to |
| 42 | prevent path-traversal via a planted symlink. |
| 43 | - File size is capped at ``_MAX_STATE_BYTES`` before parsing to prevent OOM |
| 44 | via a crafted or corrupt state file. |
| 45 | """ |
| 46 | |
| 47 | import datetime |
| 48 | import json |
| 49 | import logging |
| 50 | import pathlib |
| 51 | from typing import TypedDict |
| 52 | |
| 53 | from muse.core.ids import hash_commit, hash_snapshot |
| 54 | from muse.core.snapshot import directories_from_manifest |
| 55 | from muse.core.io import write_text_atomic |
| 56 | from muse.core.refs import ( |
| 57 | RefConflictError, |
| 58 | write_branch_ref, |
| 59 | ) |
| 60 | from muse.core.commits import ( |
| 61 | CommitRecord, |
| 62 | read_commit, |
| 63 | write_commit, |
| 64 | ) |
| 65 | from muse.core.snapshots import ( |
| 66 | SnapshotRecord, |
| 67 | read_snapshot, |
| 68 | write_snapshot, |
| 69 | ) |
| 70 | from muse.core.validation import validate_branch_name |
| 71 | from muse.core.workdir import apply_manifest |
| 72 | from muse.domain import MergeResult, MuseDomainPlugin, SnapshotManifest |
| 73 | from muse.core.types import Manifest, load_json_file |
| 74 | from muse.core.paths import rebase_state_path |
| 75 | |
| 76 | logger = logging.getLogger(__name__) |
| 77 | |
| 78 | |
| 79 | # 4 MiB — a state file with 10 000 commit IDs at ~64 bytes each is ~640 KiB. |
| 80 | _MAX_STATE_BYTES = 4 * 1024 * 1024 |
| 81 | |
| 82 | # --------------------------------------------------------------------------- |
| 83 | # State TypedDict |
| 84 | # --------------------------------------------------------------------------- |
| 85 | |
| 86 | class RebaseState(TypedDict): |
| 87 | """Serialisable state for an in-progress rebase session.""" |
| 88 | |
| 89 | original_branch: str |
| 90 | original_head: str |
| 91 | onto: str |
| 92 | remaining: list[str] |
| 93 | completed: list[str] |
| 94 | squash: bool |
| 95 | |
| 96 | class RebaseProgress(TypedDict): |
| 97 | """Snapshot of rebase progress for ``--status`` output. |
| 98 | |
| 99 | Attributes: |
| 100 | active: Whether a rebase is in progress. |
| 101 | original_branch: Branch being rebased (empty string if not active). |
| 102 | original_head: HEAD before the rebase started. |
| 103 | onto: Target commit ID. |
| 104 | total: Total commits to replay. |
| 105 | done: Commits already replayed. |
| 106 | remaining: Commits yet to replay. |
| 107 | squash: Whether this is a squash rebase. |
| 108 | """ |
| 109 | |
| 110 | active: bool |
| 111 | original_branch: str |
| 112 | original_head: str |
| 113 | onto: str |
| 114 | total: int |
| 115 | done: int |
| 116 | remaining: int |
| 117 | squash: bool |
| 118 | |
| 119 | # --------------------------------------------------------------------------- |
| 120 | # State file I/O |
| 121 | # --------------------------------------------------------------------------- |
| 122 | |
| 123 | def load_rebase_state(root: pathlib.Path) -> RebaseState | None: |
| 124 | """Return the current rebase state, or ``None`` if none is active. |
| 125 | |
| 126 | Security guards applied before any read: |
| 127 | |
| 128 | - Symlink check: a symlink at the state path could redirect reads to |
| 129 | sensitive files outside the repo or writes to unintended locations. |
| 130 | - Size cap (``_MAX_STATE_BYTES``): a tampered or corrupt state file cannot |
| 131 | exhaust memory. |
| 132 | """ |
| 133 | path = rebase_state_path(root) |
| 134 | if not path.exists(): |
| 135 | return None |
| 136 | if path.is_symlink(): |
| 137 | logger.warning( |
| 138 | "⚠️ REBASE_STATE.json is a symlink — ignoring to prevent path traversal" |
| 139 | ) |
| 140 | return None |
| 141 | try: |
| 142 | size = path.stat().st_size |
| 143 | except OSError: |
| 144 | return None |
| 145 | if size > _MAX_STATE_BYTES: |
| 146 | logger.warning( |
| 147 | "⚠️ REBASE_STATE.json is %.1f MiB — exceeds cap of %d MiB; ignoring", |
| 148 | size / (1024 * 1024), |
| 149 | _MAX_STATE_BYTES // (1024 * 1024), |
| 150 | ) |
| 151 | return None |
| 152 | raw = load_json_file(path) |
| 153 | if raw is None: |
| 154 | return None |
| 155 | if not isinstance(raw, dict): |
| 156 | return None |
| 157 | remaining = raw.get("remaining") |
| 158 | completed = raw.get("completed") |
| 159 | if not isinstance(remaining, list) or not isinstance(completed, list): |
| 160 | return None |
| 161 | return RebaseState( |
| 162 | original_branch=str(raw.get("original_branch", "")), |
| 163 | original_head=str(raw.get("original_head", "")), |
| 164 | onto=str(raw.get("onto", "")), |
| 165 | remaining=[str(x) for x in remaining if isinstance(x, str)], |
| 166 | completed=[str(x) for x in completed if isinstance(x, str)], |
| 167 | squash=bool(raw.get("squash", False)), |
| 168 | ) |
| 169 | |
| 170 | def save_rebase_state(root: pathlib.Path, state: RebaseState) -> None: |
| 171 | """Write rebase state to ``.muse/REBASE_STATE.json`` atomically. |
| 172 | |
| 173 | Raises ``OSError`` if the target path is a symlink (would redirect the |
| 174 | write to an unintended location). |
| 175 | """ |
| 176 | path = rebase_state_path(root) |
| 177 | if path.exists() and path.is_symlink(): |
| 178 | raise OSError( |
| 179 | f"Refusing to write rebase state — {path} is a symlink" |
| 180 | ) |
| 181 | write_text_atomic(path, json.dumps(dict(state), indent=2)) |
| 182 | |
| 183 | def clear_rebase_state(root: pathlib.Path) -> None: |
| 184 | """Remove ``.muse/REBASE_STATE.json``. |
| 185 | |
| 186 | No-op if the state file does not exist. Refuses to unlink a symlink |
| 187 | to prevent unintentional deletion of a file outside the repo. |
| 188 | """ |
| 189 | path = rebase_state_path(root) |
| 190 | if not path.exists(): |
| 191 | return |
| 192 | if path.is_symlink(): |
| 193 | logger.warning( |
| 194 | "⚠️ REBASE_STATE.json is a symlink — refusing to unlink" |
| 195 | ) |
| 196 | return |
| 197 | path.unlink() |
| 198 | logger.debug("✅ Cleared REBASE_STATE.json") |
| 199 | |
| 200 | def get_rebase_progress(root: pathlib.Path) -> RebaseProgress: |
| 201 | """Return a :class:`RebaseProgress` describing the current rebase state. |
| 202 | |
| 203 | Always returns a valid ``RebaseProgress`` — ``active=False`` when no |
| 204 | rebase is in progress. |
| 205 | |
| 206 | Used by ``muse rebase --status`` to give agents and humans a structured |
| 207 | view of an ongoing rebase without re-reading the raw state file. |
| 208 | """ |
| 209 | state = load_rebase_state(root) |
| 210 | if state is None: |
| 211 | return RebaseProgress( |
| 212 | active=False, |
| 213 | original_branch="", |
| 214 | original_head="", |
| 215 | onto="", |
| 216 | total=0, |
| 217 | done=0, |
| 218 | remaining=0, |
| 219 | squash=False, |
| 220 | ) |
| 221 | total = len(state["completed"]) + len(state["remaining"]) |
| 222 | return RebaseProgress( |
| 223 | active=True, |
| 224 | original_branch=state["original_branch"], |
| 225 | original_head=state["original_head"], |
| 226 | onto=state["onto"], |
| 227 | total=total, |
| 228 | done=len(state["completed"]), |
| 229 | remaining=len(state["remaining"]), |
| 230 | squash=state["squash"], |
| 231 | ) |
| 232 | |
| 233 | # --------------------------------------------------------------------------- |
| 234 | # Commit collection |
| 235 | # --------------------------------------------------------------------------- |
| 236 | |
| 237 | def collect_commits_to_replay( |
| 238 | root: pathlib.Path, |
| 239 | stop_at: str, |
| 240 | tip: str, |
| 241 | max_commits: int = 10_000, |
| 242 | ) -> list[CommitRecord]: |
| 243 | """Return commits from *tip* back to (but not including) *stop_at*. |
| 244 | |
| 245 | The result is in chronological order (oldest first) so the replay loop |
| 246 | can iterate forward. |
| 247 | |
| 248 | Only the first-parent chain is walked. Merge commits are replayed as |
| 249 | a single commit (their second-parent history is not re-played). |
| 250 | |
| 251 | Args: |
| 252 | root: Repository root. |
| 253 | stop_at: Commit ID to stop at (exclusive — the merge base). |
| 254 | tip: Starting commit ID (the current branch HEAD). |
| 255 | max_commits: Safety cap on the number of commits returned. Prevents |
| 256 | unbounded traversal on very long histories. |
| 257 | |
| 258 | Returns: |
| 259 | List of ``CommitRecord`` objects, oldest first (ready to replay). |
| 260 | """ |
| 261 | commits: list[CommitRecord] = [] |
| 262 | seen: set[str] = set() |
| 263 | current: str | None = tip |
| 264 | |
| 265 | while current and current not in seen and len(commits) < max_commits: |
| 266 | seen.add(current) |
| 267 | if current == stop_at: |
| 268 | break |
| 269 | commit = read_commit(root, current) |
| 270 | if commit is None: |
| 271 | break |
| 272 | commits.append(commit) |
| 273 | current = commit.parent_commit_id |
| 274 | |
| 275 | # Reverse so oldest is first. |
| 276 | commits.reverse() |
| 277 | return commits |
| 278 | |
| 279 | # --------------------------------------------------------------------------- |
| 280 | # Single-commit replay |
| 281 | # --------------------------------------------------------------------------- |
| 282 | |
| 283 | def replay_one( |
| 284 | root: pathlib.Path, |
| 285 | commit: CommitRecord, |
| 286 | parent_id: str, |
| 287 | plugin: MuseDomainPlugin, |
| 288 | domain: str, |
| 289 | branch: str, |
| 290 | ) -> CommitRecord | list[str]: |
| 291 | """Replay *commit* on top of *parent_id* using the domain plugin. |
| 292 | |
| 293 | Performs a three-way merge where: |
| 294 | |
| 295 | - ``base`` = commit's original parent snapshot (what existed before) |
| 296 | - ``ours`` = the current rebased tip snapshot (what we've built so far) |
| 297 | - ``theirs`` = commit's snapshot (what we want to apply) |
| 298 | |
| 299 | When the merge is clean, writes the new commit and snapshot and returns |
| 300 | the new ``CommitRecord``. When conflicts exist, returns the list of |
| 301 | conflicting paths — the caller is responsible for writing |
| 302 | ``MERGE_STATE.json`` and stopping the rebase. |
| 303 | |
| 304 | Args: |
| 305 | root: Repository root. |
| 306 | commit: The original commit being replayed. |
| 307 | parent_id: The new parent commit ID (last replayed commit or onto base). |
| 308 | plugin: The active domain plugin instance. |
| 309 | domain: Domain name string. |
| 310 | branch: Current branch name. |
| 311 | |
| 312 | Returns: |
| 313 | New ``CommitRecord`` on clean merge; ``list[str]`` of conflict paths |
| 314 | on conflict. |
| 315 | |
| 316 | Raises: |
| 317 | TypeError: If *plugin* is not a ``MuseDomainPlugin``. |
| 318 | """ |
| 319 | if not isinstance(plugin, MuseDomainPlugin): |
| 320 | raise TypeError( |
| 321 | f"replay_one: plugin {type(plugin).__name__!r} is not a MuseDomainPlugin" |
| 322 | ) |
| 323 | |
| 324 | # Resolve original parent snapshot (the "base" for the merge). |
| 325 | base_manifest: Manifest = {} |
| 326 | if commit.parent_commit_id: |
| 327 | parent_commit = read_commit(root, commit.parent_commit_id) |
| 328 | if parent_commit: |
| 329 | parent_snap = read_snapshot(root, parent_commit.snapshot_id) |
| 330 | if parent_snap: |
| 331 | base_manifest = parent_snap.manifest |
| 332 | |
| 333 | # "Theirs" = the original commit's snapshot. |
| 334 | theirs_snap = read_snapshot(root, commit.snapshot_id) |
| 335 | if theirs_snap is None: |
| 336 | # A missing snapshot means we cannot reconstruct the commit's content. |
| 337 | # Falling back to {} would silently delete all files the commit added or |
| 338 | # modified — producing a wrong rebased history. Raise so the caller |
| 339 | # surfaces a clear error rather than silently corrupting the rebase. |
| 340 | raise ValueError( |
| 341 | f"rebase: snapshot {commit.snapshot_id} for commit " |
| 342 | f"{commit.commit_id} ({commit.message!r}) is missing or corrupt — " |
| 343 | "cannot replay this commit. Run `muse verify-pack` to audit the store." |
| 344 | ) |
| 345 | theirs_manifest = theirs_snap.manifest |
| 346 | |
| 347 | # "Ours" = the current rebased tip. |
| 348 | ours_manifest: Manifest = {} |
| 349 | if parent_id: |
| 350 | parent_rec = read_commit(root, parent_id) |
| 351 | if parent_rec: |
| 352 | ours_snap = read_snapshot(root, parent_rec.snapshot_id) |
| 353 | if ours_snap: |
| 354 | ours_manifest = ours_snap.manifest |
| 355 | |
| 356 | base_snap_obj = SnapshotManifest(files=base_manifest, domain=domain, directories=directories_from_manifest(base_manifest)) |
| 357 | ours_snap_obj = SnapshotManifest(files=ours_manifest, domain=domain, directories=directories_from_manifest(ours_manifest)) |
| 358 | theirs_snap_obj = SnapshotManifest(files=theirs_manifest, domain=domain, directories=directories_from_manifest(theirs_manifest)) |
| 359 | |
| 360 | result: MergeResult = plugin.merge( |
| 361 | base_snap_obj, ours_snap_obj, theirs_snap_obj, repo_root=root |
| 362 | ) |
| 363 | |
| 364 | if not result.is_clean: |
| 365 | return result.conflicts |
| 366 | |
| 367 | merged_manifest = result.merged["files"] |
| 368 | |
| 369 | # Apply the merged state to the working tree. |
| 370 | apply_manifest(root, ours_manifest, merged_manifest) |
| 371 | |
| 372 | merged_dirs = directories_from_manifest(merged_manifest) |
| 373 | snapshot_id = hash_snapshot(merged_manifest, merged_dirs) |
| 374 | committed_at = datetime.datetime.now(datetime.timezone.utc) |
| 375 | new_commit_id = hash_commit( |
| 376 | parent_ids=[parent_id] if parent_id else [], |
| 377 | snapshot_id=snapshot_id, |
| 378 | message=commit.message, |
| 379 | committed_at_iso=committed_at.isoformat(), |
| 380 | author=commit.author or "", |
| 381 | ) |
| 382 | |
| 383 | write_snapshot(root, SnapshotRecord(snapshot_id=snapshot_id, manifest=merged_manifest, directories=merged_dirs)) |
| 384 | new_commit = CommitRecord( |
| 385 | commit_id=new_commit_id, |
| 386 | branch=branch, |
| 387 | snapshot_id=snapshot_id, |
| 388 | message=commit.message, |
| 389 | committed_at=committed_at, |
| 390 | parent_commit_id=parent_id if parent_id else None, |
| 391 | author=commit.author, |
| 392 | agent_id=commit.agent_id, |
| 393 | model_id=commit.model_id, |
| 394 | ) |
| 395 | write_commit(root, new_commit) |
| 396 | return new_commit |
| 397 | |
| 398 | def _write_branch_ref(root: pathlib.Path, branch: str, commit_id: str, **kwargs: str) -> None: |
| 399 | """Write *commit_id* to the branch ref file atomically and durably. |
| 400 | |
| 401 | Accepts ``expected_id=<str>`` for compare-and-swap semantics: the write |
| 402 | only proceeds if the current ref equals *expected_id*, raising |
| 403 | ``RefConflictError`` when another agent advanced the branch concurrently. |
| 404 | Callers should pass the pre-operation HEAD as *expected_id* so concurrent |
| 405 | advances are detected rather than silently orphaned. |
| 406 | |
| 407 | Omit *expected_id* for an unconditional write (e.g. abort restoring the |
| 408 | original branch pointer). |
| 409 | |
| 410 | Raises: |
| 411 | RefConflictError: When *expected_id* is provided and the current ref |
| 412 | does not match — another writer advanced the branch. |
| 413 | """ |
| 414 | write_branch_ref(root, branch, commit_id, **kwargs) |
File History
1 commit
sha256:06dba78c2a78e251b580422dd1fd547f3c8357ff18f7709a860873b2d24dbbbf
chore: bump version to 0.2.0rc14
Sonnet 4.6
patch
15 hours ago