"""TDD — the repair-commit endpoint must fix a commit whose identity no longer reproduces its id because a ``signer_public_key`` was stamped onto the stored row *without* recomputing the commit_id. This is the exact corruption found on staging gabriel/muse: the rc10 object-store migration's signing pass wrote an Ed25519 ``signer_public_key`` (and signature) onto two main-line merge commit rows that were originally committed *unsigned*. Because ``signer_public_key`` is part of the commit identity hash, the serve path now recomputes a different id than the stored ``commit_id`` — the client's hash check fails on clone, the commit is dropped, and every descendant fails "parent not in mpack", emptying the working tree. ``repair-commit`` is the commit-level analog of ``repair-object`` / ``repair-snapshot``: the caller submits the verified-correct commit record, the server recomputes the identity *exactly as the serve path will reproduce it* (round-tripping ``committed_at`` through ``timestamp.isoformat()`` so the stored value is what gets re-served), verifies it equals ``commit_id``, and force-overwrites the row. RED before the handler exists; GREEN after. Integration test against localhost (musehub @ :1337, postgres @ :5434). It pushes a short chain, simulates the migration corruption by stamping a spurious signer onto the head commit's row, repairs it via ``wire_repair_commit``, and asserts the serve path then reproduces the id. The corruption is on a content-addressed row shared globally, so the test uses unique commit content per run and never leaves the row corrupt (repair restores it to the correct unsigned state). """ from __future__ import annotations import asyncio import hashlib import json import subprocess import time as _time from collections.abc import Iterator from pathlib import Path import pytest from sqlalchemy import text as _sa_text from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine from sqlalchemy.orm import sessionmaker from muse.core.ids import long_id from musehub.db.musehub_repo_models import MusehubCommit from musehub.services.musehub_wire_push import wire_repair_commit from musehub.services.musehub_wire_shared import _commit_identity_bytes, _to_wire_commit from musehub.types.json_types import JSONObject HUB = "https://localhost:1337" DB_URL = "postgresql+asyncpg://musehub:musehub@localhost:5434/musehub" REPO_ROOT = Path(__file__).parent.parent SPURIOUS_SIGNER = "ed25519:ziza_Zad72Q0bEkBeKeRIzkLbUOaEgy59AtoIv4rUeM" def muse(*args: str, cwd: Path, timeout: int = 90) -> subprocess.CompletedProcess: return subprocess.run( ["muse", *args], cwd=str(cwd), capture_output=True, text=True, timeout=timeout ) def muse_check(*args: str, cwd: Path, timeout: int = 90) -> str: r = muse(*args, cwd=cwd, timeout=timeout) if r.returncode != 0: raise RuntimeError(f"muse {' '.join(args)} failed (rc={r.returncode}):\n{r.stderr[:600]}") return r.stdout def _commit_id_by_message(repo: Path, message: str) -> str: commits = json.loads(muse_check("log", "--json", cwd=repo))["commits"] for c in commits: if (c.get("message") or "").strip() == message: return c["commit_id"] raise AssertionError(f"no commit with message {message!r}") def _serve_hash(row: MusehubCommit) -> str: """The id the serve path recomputes for this row (what the clone client checks).""" return long_id(hashlib.sha256(_commit_identity_bytes(_to_wire_commit(row))).hexdigest()) def _good_commit_dict(row: MusehubCommit) -> JSONObject: """Build the verified-correct (unsigned) wire-commit payload from the pristine row.""" parents = list(row.parent_ids or []) return { "commit_id": row.commit_id, "branch": row.branch, "snapshot_id": row.snapshot_id, "message": row.message, "committed_at": row.timestamp.isoformat() if row.timestamp else "", "parent_commit_id": parents[0] if len(parents) >= 1 else None, "parent2_commit_id": parents[1] if len(parents) >= 2 else None, "author": row.author, "signer_public_key": "", # the correct, original (unsigned) state "signature": "", "signer_key_id": "", } async def _capture_corrupt_repair(commit_id: str) -> JSONObject: """Capture the pristine row, prove serve-hash==id, stamp a spurious signer, prove serve-hash!=id, then repair via wire_repair_commit and report the post-repair state.""" engine = create_async_engine(DB_URL) Session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) try: async with Session() as s: row = await s.get(MusehubCommit, commit_id) assert row is not None, f"commit {commit_id[:18]} not on server after push" good = _good_commit_dict(row) repo_id = ( await s.execute( _sa_text("SELECT repo_id FROM musehub_commit_refs WHERE commit_id=:c LIMIT 1"), {"c": commit_id}, ) ).scalar_one() pristine_ok = _serve_hash(row) == commit_id # Inject the migration corruption: a spurious signer with the id left unchanged. async with Session() as s: row = await s.get(MusehubCommit, commit_id) row.signer_public_key = SPURIOUS_SIGNER await s.commit() async with Session() as s: row = await s.get(MusehubCommit, commit_id) corrupt_serve = _serve_hash(row) # Repair as the repo owner. async with Session() as s: result = await wire_repair_commit(s, repo_id, good, caller_id="gabriel") async with Session() as s: row = await s.get(MusehubCommit, commit_id) return { "pristine_ok": pristine_ok, "corrupt_serve": corrupt_serve, "result": result, "post_signer": row.signer_public_key, "post_serve": _serve_hash(row), "commit_id": commit_id, } finally: await engine.dispose() @pytest.fixture def hub_repo(tmp_path: Path) -> Iterator[str]: name = f"test-repair-commit-{tmp_path.name[-6:]}" out = muse_check( "hub", "repo", "create", "--name", name, "--visibility", "public", "--no-init", "--hub", HUB, "--json", cwd=REPO_ROOT, ) slug = f"gabriel/{json.loads(out)['slug']}" yield slug muse("hub", "repo", "delete", slug, "--yes", "--hub", HUB, "--json", cwd=REPO_ROOT) def test_repair_commit_fixes_spurious_signer(tmp_path: Path, hub_repo: str) -> None: repo = tmp_path / "seed" repo.mkdir() muse_check("init", cwd=repo) # Unique content per run so the content-addressed commit rows are not shared # with another repo (safe to corrupt/repair the global row). tag = f"{tmp_path.name}-{int(_time.time())}" for fname, msg in [("f1.txt", "A"), ("f2.txt", "B"), ("f3.txt", "C")]: (repo / fname).write_text(f"{msg}-{tag}\n") muse_check("code", "add", ".", cwd=repo) # Unsigned commits — signer_public_key == "" (matches the original staging state). muse_check("commit", "-m", msg, "--agent-id", "test", "--model-id", "test", cwd=repo) muse_check("remote", "add", "origin", f"{HUB}/{hub_repo}", cwd=repo) muse_check("push", "origin", "main", cwd=repo) head = _commit_id_by_message(repo, "C") r = asyncio.run(_capture_corrupt_repair(head)) assert r["pristine_ok"], "pushed commit's serve-hash must equal its id before corruption" assert r["corrupt_serve"] != head, ( "stamping a spurious signer must break the serve-hash — otherwise the test " "is not reproducing the staging corruption" ) assert r["result"] == {"repaired": True}, f"unexpected repair result: {r['result']}" assert r["post_signer"] == "", "repair must clear the spurious signer_public_key" assert r["post_serve"] == head, ( f"after repair the serve path must reproduce the commit id {head[:18]} — " f"got {r['post_serve'][:18]}. The repair did not stick." )