test_repair_commit_endpoint.py
file-level
1
files
1
commits
0
hotspots
0
🧊 dead
0
💥 blast risk
| 1 | """TDD — the repair-commit endpoint must fix a commit whose identity no longer |
| 2 | reproduces its id because a ``signer_public_key`` was stamped onto the stored row |
| 3 | *without* recomputing the commit_id. |
| 4 | |
| 5 | This is the exact corruption found on staging gabriel/muse: the rc10 object-store |
| 6 | migration's signing pass wrote an Ed25519 ``signer_public_key`` (and signature) onto |
| 7 | two main-line merge commit rows that were originally committed *unsigned*. Because |
| 8 | ``signer_public_key`` is part of the commit identity hash, the serve path now |
| 9 | recomputes a different id than the stored ``commit_id`` — the client's hash check |
| 10 | fails on clone, the commit is dropped, and every descendant fails "parent not in |
| 11 | mpack", emptying the working tree. |
| 12 | |
| 13 | ``repair-commit`` is the commit-level analog of ``repair-object`` / |
| 14 | ``repair-snapshot``: the caller submits the verified-correct commit record, the |
| 15 | server recomputes the identity *exactly as the serve path will reproduce it* |
| 16 | (round-tripping ``committed_at`` through ``timestamp.isoformat()`` so the stored |
| 17 | value is what gets re-served), verifies it equals ``commit_id``, and force-overwrites |
| 18 | the row. RED before the handler exists; GREEN after. |
| 19 | |
| 20 | Integration test against localhost (musehub @ :1337, postgres @ :5434). It pushes a |
| 21 | short chain, simulates the migration corruption by stamping a spurious signer onto |
| 22 | the head commit's row, repairs it via ``wire_repair_commit``, and asserts the serve |
| 23 | path then reproduces the id. The corruption is on a content-addressed row shared |
| 24 | globally, so the test uses unique commit content per run and never leaves the row |
| 25 | corrupt (repair restores it to the correct unsigned state). |
| 26 | """ |
| 27 | from __future__ import annotations |
| 28 | |
| 29 | import asyncio |
| 30 | import hashlib |
| 31 | import json |
| 32 | import subprocess |
| 33 | import time as _time |
| 34 | from collections.abc import Iterator |
| 35 | from pathlib import Path |
| 36 | |
| 37 | import pytest |
| 38 | from sqlalchemy import text as _sa_text |
| 39 | from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine |
| 40 | from sqlalchemy.orm import sessionmaker |
| 41 | |
| 42 | from muse.core.ids import long_id |
| 43 | from musehub.db.musehub_repo_models import MusehubCommit |
| 44 | from musehub.services.musehub_wire_push import wire_repair_commit |
| 45 | from musehub.services.musehub_wire_shared import _commit_identity_bytes, _to_wire_commit |
| 46 | from musehub.types.json_types import JSONObject |
| 47 | |
| 48 | HUB = "https://localhost:1337" |
| 49 | DB_URL = "postgresql+asyncpg://musehub:musehub@localhost:5434/musehub" |
| 50 | REPO_ROOT = Path(__file__).parent.parent |
| 51 | SPURIOUS_SIGNER = "ed25519:ziza_Zad72Q0bEkBeKeRIzkLbUOaEgy59AtoIv4rUeM" |
| 52 | |
| 53 | |
| 54 | def muse(*args: str, cwd: Path, timeout: int = 90) -> subprocess.CompletedProcess: |
| 55 | return subprocess.run( |
| 56 | ["muse", *args], cwd=str(cwd), capture_output=True, text=True, timeout=timeout |
| 57 | ) |
| 58 | |
| 59 | |
| 60 | def muse_check(*args: str, cwd: Path, timeout: int = 90) -> str: |
| 61 | r = muse(*args, cwd=cwd, timeout=timeout) |
| 62 | if r.returncode != 0: |
| 63 | raise RuntimeError(f"muse {' '.join(args)} failed (rc={r.returncode}):\n{r.stderr[:600]}") |
| 64 | return r.stdout |
| 65 | |
| 66 | |
| 67 | def _commit_id_by_message(repo: Path, message: str) -> str: |
| 68 | commits = json.loads(muse_check("log", "--json", cwd=repo))["commits"] |
| 69 | for c in commits: |
| 70 | if (c.get("message") or "").strip() == message: |
| 71 | return c["commit_id"] |
| 72 | raise AssertionError(f"no commit with message {message!r}") |
| 73 | |
| 74 | |
| 75 | def _serve_hash(row: MusehubCommit) -> str: |
| 76 | """The id the serve path recomputes for this row (what the clone client checks).""" |
| 77 | return long_id(hashlib.sha256(_commit_identity_bytes(_to_wire_commit(row))).hexdigest()) |
| 78 | |
| 79 | |
| 80 | def _good_commit_dict(row: MusehubCommit) -> JSONObject: |
| 81 | """Build the verified-correct (unsigned) wire-commit payload from the pristine row.""" |
| 82 | parents = list(row.parent_ids or []) |
| 83 | return { |
| 84 | "commit_id": row.commit_id, |
| 85 | "branch": row.branch, |
| 86 | "snapshot_id": row.snapshot_id, |
| 87 | "message": row.message, |
| 88 | "committed_at": row.timestamp.isoformat() if row.timestamp else "", |
| 89 | "parent_commit_id": parents[0] if len(parents) >= 1 else None, |
| 90 | "parent2_commit_id": parents[1] if len(parents) >= 2 else None, |
| 91 | "author": row.author, |
| 92 | "signer_public_key": "", # the correct, original (unsigned) state |
| 93 | "signature": "", |
| 94 | "signer_key_id": "", |
| 95 | } |
| 96 | |
| 97 | |
| 98 | async def _capture_corrupt_repair(commit_id: str) -> JSONObject: |
| 99 | """Capture the pristine row, prove serve-hash==id, stamp a spurious signer, prove |
| 100 | serve-hash!=id, then repair via wire_repair_commit and report the post-repair state.""" |
| 101 | engine = create_async_engine(DB_URL) |
| 102 | Session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) |
| 103 | try: |
| 104 | async with Session() as s: |
| 105 | row = await s.get(MusehubCommit, commit_id) |
| 106 | assert row is not None, f"commit {commit_id[:18]} not on server after push" |
| 107 | good = _good_commit_dict(row) |
| 108 | repo_id = ( |
| 109 | await s.execute( |
| 110 | _sa_text("SELECT repo_id FROM musehub_commit_refs WHERE commit_id=:c LIMIT 1"), |
| 111 | {"c": commit_id}, |
| 112 | ) |
| 113 | ).scalar_one() |
| 114 | pristine_ok = _serve_hash(row) == commit_id |
| 115 | |
| 116 | # Inject the migration corruption: a spurious signer with the id left unchanged. |
| 117 | async with Session() as s: |
| 118 | row = await s.get(MusehubCommit, commit_id) |
| 119 | row.signer_public_key = SPURIOUS_SIGNER |
| 120 | await s.commit() |
| 121 | async with Session() as s: |
| 122 | row = await s.get(MusehubCommit, commit_id) |
| 123 | corrupt_serve = _serve_hash(row) |
| 124 | |
| 125 | # Repair as the repo owner. |
| 126 | async with Session() as s: |
| 127 | result = await wire_repair_commit(s, repo_id, good, caller_id="gabriel") |
| 128 | |
| 129 | async with Session() as s: |
| 130 | row = await s.get(MusehubCommit, commit_id) |
| 131 | return { |
| 132 | "pristine_ok": pristine_ok, |
| 133 | "corrupt_serve": corrupt_serve, |
| 134 | "result": result, |
| 135 | "post_signer": row.signer_public_key, |
| 136 | "post_serve": _serve_hash(row), |
| 137 | "commit_id": commit_id, |
| 138 | } |
| 139 | finally: |
| 140 | await engine.dispose() |
| 141 | |
| 142 | |
| 143 | @pytest.fixture |
| 144 | def hub_repo(tmp_path: Path) -> Iterator[str]: |
| 145 | name = f"test-repair-commit-{tmp_path.name[-6:]}" |
| 146 | out = muse_check( |
| 147 | "hub", "repo", "create", "--name", name, |
| 148 | "--visibility", "public", "--no-init", "--hub", HUB, "--json", |
| 149 | cwd=REPO_ROOT, |
| 150 | ) |
| 151 | slug = f"gabriel/{json.loads(out)['slug']}" |
| 152 | yield slug |
| 153 | muse("hub", "repo", "delete", slug, "--yes", "--hub", HUB, "--json", cwd=REPO_ROOT) |
| 154 | |
| 155 | |
| 156 | def test_repair_commit_fixes_spurious_signer(tmp_path: Path, hub_repo: str) -> None: |
| 157 | repo = tmp_path / "seed" |
| 158 | repo.mkdir() |
| 159 | muse_check("init", cwd=repo) |
| 160 | |
| 161 | # Unique content per run so the content-addressed commit rows are not shared |
| 162 | # with another repo (safe to corrupt/repair the global row). |
| 163 | tag = f"{tmp_path.name}-{int(_time.time())}" |
| 164 | for fname, msg in [("f1.txt", "A"), ("f2.txt", "B"), ("f3.txt", "C")]: |
| 165 | (repo / fname).write_text(f"{msg}-{tag}\n") |
| 166 | muse_check("code", "add", ".", cwd=repo) |
| 167 | # Unsigned commits — signer_public_key == "" (matches the original staging state). |
| 168 | muse_check("commit", "-m", msg, "--agent-id", "test", "--model-id", "test", cwd=repo) |
| 169 | muse_check("remote", "add", "origin", f"{HUB}/{hub_repo}", cwd=repo) |
| 170 | muse_check("push", "origin", "main", cwd=repo) |
| 171 | |
| 172 | head = _commit_id_by_message(repo, "C") |
| 173 | r = asyncio.run(_capture_corrupt_repair(head)) |
| 174 | |
| 175 | assert r["pristine_ok"], "pushed commit's serve-hash must equal its id before corruption" |
| 176 | assert r["corrupt_serve"] != head, ( |
| 177 | "stamping a spurious signer must break the serve-hash — otherwise the test " |
| 178 | "is not reproducing the staging corruption" |
| 179 | ) |
| 180 | assert r["result"] == {"repaired": True}, f"unexpected repair result: {r['result']}" |
| 181 | assert r["post_signer"] == "", "repair must clear the spurious signer_public_key" |
| 182 | assert r["post_serve"] == head, ( |
| 183 | f"after repair the serve path must reproduce the commit id {head[:18]} — " |
| 184 | f"got {r['post_serve'][:18]}. The repair did not stick." |
| 185 | ) |