gabriel / musehub public
test_repair_commit_endpoint.py python
183 lines 7.9 KB
Raw
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2 feat: add repair-commit wire endpoint (API parity with repa… Opus 4.8 minor ⚠ breaking 22 hours ago
1 """TDD — the repair-commit endpoint must fix a commit whose identity no longer
2 reproduces its id because a ``signer_public_key`` was stamped onto the stored row
3 *without* recomputing the commit_id.
4
5 This is the exact corruption found on staging gabriel/muse: the rc10 object-store
6 migration's signing pass wrote an Ed25519 ``signer_public_key`` (and signature) onto
7 two main-line merge commit rows that were originally committed *unsigned*. Because
8 ``signer_public_key`` is part of the commit identity hash, the serve path now
9 recomputes a different id than the stored ``commit_id`` — the client's hash check
10 fails on clone, the commit is dropped, and every descendant fails "parent not in
11 mpack", emptying the working tree.
12
13 ``repair-commit`` is the commit-level analog of ``repair-object`` /
14 ``repair-snapshot``: the caller submits the verified-correct commit record, the
15 server recomputes the identity *exactly as the serve path will reproduce it*
16 (round-tripping ``committed_at`` through ``timestamp.isoformat()`` so the stored
17 value is what gets re-served), verifies it equals ``commit_id``, and force-overwrites
18 the row. RED before the handler exists; GREEN after.
19
20 Integration test against localhost (musehub @ :1337, postgres @ :5434). It pushes a
21 short chain, simulates the migration corruption by stamping a spurious signer onto
22 the head commit's row, repairs it via ``wire_repair_commit``, and asserts the serve
23 path then reproduces the id. The corruption is on a content-addressed row shared
24 globally, so the test uses unique commit content per run and never leaves the row
25 corrupt (repair restores it to the correct unsigned state).
26 """
27 from __future__ import annotations
28
29 import asyncio
30 import hashlib
31 import json
32 import subprocess
33 import time as _time
34 from pathlib import Path
35
36 import pytest
37 from sqlalchemy import text as _sa_text
38 from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
39 from sqlalchemy.orm import sessionmaker
40
41 from muse.core.ids import long_id
42 from musehub.db.musehub_repo_models import MusehubCommit
43 from musehub.services.musehub_wire_push import wire_repair_commit
44 from musehub.services.musehub_wire_shared import _commit_identity_bytes, _to_wire_commit
45
46 HUB = "https://localhost:1337"
47 DB_URL = "postgresql+asyncpg://musehub:musehub@localhost:5434/musehub"
48 REPO_ROOT = Path(__file__).parent.parent
49 SPURIOUS_SIGNER = "ed25519:ziza_Zad72Q0bEkBeKeRIzkLbUOaEgy59AtoIv4rUeM"
50
51
52 def muse(*args: str, cwd: Path, timeout: int = 90) -> subprocess.CompletedProcess:
53 return subprocess.run(
54 ["muse", *args], cwd=str(cwd), capture_output=True, text=True, timeout=timeout
55 )
56
57
58 def muse_check(*args: str, cwd: Path, timeout: int = 90) -> str:
59 r = muse(*args, cwd=cwd, timeout=timeout)
60 if r.returncode != 0:
61 raise RuntimeError(f"muse {' '.join(args)} failed (rc={r.returncode}):\n{r.stderr[:600]}")
62 return r.stdout
63
64
65 def _commit_id_by_message(repo: Path, message: str) -> str:
66 commits = json.loads(muse_check("log", "--json", cwd=repo))["commits"]
67 for c in commits:
68 if (c.get("message") or "").strip() == message:
69 return c["commit_id"]
70 raise AssertionError(f"no commit with message {message!r}")
71
72
73 def _serve_hash(row: MusehubCommit) -> str:
74 """The id the serve path recomputes for this row (what the clone client checks)."""
75 return long_id(hashlib.sha256(_commit_identity_bytes(_to_wire_commit(row))).hexdigest())
76
77
78 def _good_commit_dict(row: MusehubCommit) -> dict:
79 """Build the verified-correct (unsigned) wire-commit payload from the pristine row."""
80 parents = list(row.parent_ids or [])
81 return {
82 "commit_id": row.commit_id,
83 "branch": row.branch,
84 "snapshot_id": row.snapshot_id,
85 "message": row.message,
86 "committed_at": row.timestamp.isoformat() if row.timestamp else "",
87 "parent_commit_id": parents[0] if len(parents) >= 1 else None,
88 "parent2_commit_id": parents[1] if len(parents) >= 2 else None,
89 "author": row.author,
90 "signer_public_key": "", # the correct, original (unsigned) state
91 "signature": "",
92 "signer_key_id": "",
93 }
94
95
96 async def _capture_corrupt_repair(commit_id: str) -> dict:
97 """Capture the pristine row, prove serve-hash==id, stamp a spurious signer, prove
98 serve-hash!=id, then repair via wire_repair_commit and report the post-repair state."""
99 engine = create_async_engine(DB_URL)
100 Session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
101 try:
102 async with Session() as s:
103 row = await s.get(MusehubCommit, commit_id)
104 assert row is not None, f"commit {commit_id[:18]} not on server after push"
105 good = _good_commit_dict(row)
106 repo_id = (
107 await s.execute(
108 _sa_text("SELECT repo_id FROM musehub_commit_refs WHERE commit_id=:c LIMIT 1"),
109 {"c": commit_id},
110 )
111 ).scalar_one()
112 pristine_ok = _serve_hash(row) == commit_id
113
114 # Inject the migration corruption: a spurious signer with the id left unchanged.
115 async with Session() as s:
116 row = await s.get(MusehubCommit, commit_id)
117 row.signer_public_key = SPURIOUS_SIGNER
118 await s.commit()
119 async with Session() as s:
120 row = await s.get(MusehubCommit, commit_id)
121 corrupt_serve = _serve_hash(row)
122
123 # Repair as the repo owner.
124 async with Session() as s:
125 result = await wire_repair_commit(s, repo_id, good, caller_id="gabriel")
126
127 async with Session() as s:
128 row = await s.get(MusehubCommit, commit_id)
129 return {
130 "pristine_ok": pristine_ok,
131 "corrupt_serve": corrupt_serve,
132 "result": result,
133 "post_signer": row.signer_public_key,
134 "post_serve": _serve_hash(row),
135 "commit_id": commit_id,
136 }
137 finally:
138 await engine.dispose()
139
140
141 @pytest.fixture
142 def hub_repo(tmp_path: Path):
143 name = f"test-repair-commit-{tmp_path.name[-6:]}"
144 out = muse_check(
145 "hub", "repo", "create", "--name", name,
146 "--visibility", "public", "--no-init", "--hub", HUB, "--json",
147 cwd=REPO_ROOT,
148 )
149 slug = f"gabriel/{json.loads(out)['slug']}"
150 yield slug
151 muse("hub", "repo", "delete", slug, "--yes", "--hub", HUB, "--json", cwd=REPO_ROOT)
152
153
154 def test_repair_commit_fixes_spurious_signer(tmp_path: Path, hub_repo: str) -> None:
155 repo = tmp_path / "seed"
156 repo.mkdir()
157 muse_check("init", cwd=repo)
158
159 # Unique content per run so the content-addressed commit rows are not shared
160 # with another repo (safe to corrupt/repair the global row).
161 tag = f"{tmp_path.name}-{int(_time.time())}"
162 for fname, msg in [("f1.txt", "A"), ("f2.txt", "B"), ("f3.txt", "C")]:
163 (repo / fname).write_text(f"{msg}-{tag}\n")
164 muse_check("code", "add", ".", cwd=repo)
165 # Unsigned commits — signer_public_key == "" (matches the original staging state).
166 muse_check("commit", "-m", msg, "--agent-id", "test", "--model-id", "test", cwd=repo)
167 muse_check("remote", "add", "origin", f"{HUB}/{hub_repo}", cwd=repo)
168 muse_check("push", "origin", "main", cwd=repo)
169
170 head = _commit_id_by_message(repo, "C")
171 r = asyncio.run(_capture_corrupt_repair(head))
172
173 assert r["pristine_ok"], "pushed commit's serve-hash must equal its id before corruption"
174 assert r["corrupt_serve"] != head, (
175 "stamping a spurious signer must break the serve-hash — otherwise the test "
176 "is not reproducing the staging corruption"
177 )
178 assert r["result"] == {"repaired": True}, f"unexpected repair result: {r['result']}"
179 assert r["post_signer"] == "", "repair must clear the spurious signer_public_key"
180 assert r["post_serve"] == head, (
181 f"after repair the serve path must reproduce the commit id {head[:18]} — "
182 f"got {r['post_serve'][:18]}. The repair did not stick."
183 )
File History 1 commit
sha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2 feat: add repair-commit wire endpoint (API parity with repa… Opus 4.8 minor 22 hours ago