gabriel / musehub public
test_repair_commit_endpoint.py python
185 lines 8.0 KB
Raw
sha256:dc28fb2384d12a52d4b4fea7743873940b89d9d08ce298f96d0fdc8d694724d4 test+types: green the musehub suite and ratchet typing audi… Opus 4.8 minor ⚠ breaking 14 hours ago
1 """TDD — the repair-commit endpoint must fix a commit whose identity no longer
2 reproduces its id because a ``signer_public_key`` was stamped onto the stored row
3 *without* recomputing the commit_id.
4
5 This is the exact corruption found on staging gabriel/muse: the rc10 object-store
6 migration's signing pass wrote an Ed25519 ``signer_public_key`` (and signature) onto
7 two main-line merge commit rows that were originally committed *unsigned*. Because
8 ``signer_public_key`` is part of the commit identity hash, the serve path now
9 recomputes a different id than the stored ``commit_id`` — the client's hash check
10 fails on clone, the commit is dropped, and every descendant fails "parent not in
11 mpack", emptying the working tree.
12
13 ``repair-commit`` is the commit-level analog of ``repair-object`` /
14 ``repair-snapshot``: the caller submits the verified-correct commit record, the
15 server recomputes the identity *exactly as the serve path will reproduce it*
16 (round-tripping ``committed_at`` through ``timestamp.isoformat()`` so the stored
17 value is what gets re-served), verifies it equals ``commit_id``, and force-overwrites
18 the row. RED before the handler exists; GREEN after.
19
20 Integration test against localhost (musehub @ :1337, postgres @ :5434). It pushes a
21 short chain, simulates the migration corruption by stamping a spurious signer onto
22 the head commit's row, repairs it via ``wire_repair_commit``, and asserts the serve
23 path then reproduces the id. The corruption is on a content-addressed row shared
24 globally, so the test uses unique commit content per run and never leaves the row
25 corrupt (repair restores it to the correct unsigned state).
26 """
27 from __future__ import annotations
28
29 import asyncio
30 import hashlib
31 import json
32 import subprocess
33 import time as _time
34 from collections.abc import Iterator
35 from pathlib import Path
36
37 import pytest
38 from sqlalchemy import text as _sa_text
39 from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
40 from sqlalchemy.orm import sessionmaker
41
42 from muse.core.ids import long_id
43 from musehub.db.musehub_repo_models import MusehubCommit
44 from musehub.services.musehub_wire_push import wire_repair_commit
45 from musehub.services.musehub_wire_shared import _commit_identity_bytes, _to_wire_commit
46 from musehub.types.json_types import JSONObject
47
48 HUB = "https://localhost:1337"
49 DB_URL = "postgresql+asyncpg://musehub:musehub@localhost:5434/musehub"
50 REPO_ROOT = Path(__file__).parent.parent
51 SPURIOUS_SIGNER = "ed25519:ziza_Zad72Q0bEkBeKeRIzkLbUOaEgy59AtoIv4rUeM"
52
53
54 def muse(*args: str, cwd: Path, timeout: int = 90) -> subprocess.CompletedProcess:
55 return subprocess.run(
56 ["muse", *args], cwd=str(cwd), capture_output=True, text=True, timeout=timeout
57 )
58
59
60 def muse_check(*args: str, cwd: Path, timeout: int = 90) -> str:
61 r = muse(*args, cwd=cwd, timeout=timeout)
62 if r.returncode != 0:
63 raise RuntimeError(f"muse {' '.join(args)} failed (rc={r.returncode}):\n{r.stderr[:600]}")
64 return r.stdout
65
66
67 def _commit_id_by_message(repo: Path, message: str) -> str:
68 commits = json.loads(muse_check("log", "--json", cwd=repo))["commits"]
69 for c in commits:
70 if (c.get("message") or "").strip() == message:
71 return c["commit_id"]
72 raise AssertionError(f"no commit with message {message!r}")
73
74
75 def _serve_hash(row: MusehubCommit) -> str:
76 """The id the serve path recomputes for this row (what the clone client checks)."""
77 return long_id(hashlib.sha256(_commit_identity_bytes(_to_wire_commit(row))).hexdigest())
78
79
80 def _good_commit_dict(row: MusehubCommit) -> JSONObject:
81 """Build the verified-correct (unsigned) wire-commit payload from the pristine row."""
82 parents = list(row.parent_ids or [])
83 return {
84 "commit_id": row.commit_id,
85 "branch": row.branch,
86 "snapshot_id": row.snapshot_id,
87 "message": row.message,
88 "committed_at": row.timestamp.isoformat() if row.timestamp else "",
89 "parent_commit_id": parents[0] if len(parents) >= 1 else None,
90 "parent2_commit_id": parents[1] if len(parents) >= 2 else None,
91 "author": row.author,
92 "signer_public_key": "", # the correct, original (unsigned) state
93 "signature": "",
94 "signer_key_id": "",
95 }
96
97
98 async def _capture_corrupt_repair(commit_id: str) -> JSONObject:
99 """Capture the pristine row, prove serve-hash==id, stamp a spurious signer, prove
100 serve-hash!=id, then repair via wire_repair_commit and report the post-repair state."""
101 engine = create_async_engine(DB_URL)
102 Session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
103 try:
104 async with Session() as s:
105 row = await s.get(MusehubCommit, commit_id)
106 assert row is not None, f"commit {commit_id[:18]} not on server after push"
107 good = _good_commit_dict(row)
108 repo_id = (
109 await s.execute(
110 _sa_text("SELECT repo_id FROM musehub_commit_refs WHERE commit_id=:c LIMIT 1"),
111 {"c": commit_id},
112 )
113 ).scalar_one()
114 pristine_ok = _serve_hash(row) == commit_id
115
116 # Inject the migration corruption: a spurious signer with the id left unchanged.
117 async with Session() as s:
118 row = await s.get(MusehubCommit, commit_id)
119 row.signer_public_key = SPURIOUS_SIGNER
120 await s.commit()
121 async with Session() as s:
122 row = await s.get(MusehubCommit, commit_id)
123 corrupt_serve = _serve_hash(row)
124
125 # Repair as the repo owner.
126 async with Session() as s:
127 result = await wire_repair_commit(s, repo_id, good, caller_id="gabriel")
128
129 async with Session() as s:
130 row = await s.get(MusehubCommit, commit_id)
131 return {
132 "pristine_ok": pristine_ok,
133 "corrupt_serve": corrupt_serve,
134 "result": result,
135 "post_signer": row.signer_public_key,
136 "post_serve": _serve_hash(row),
137 "commit_id": commit_id,
138 }
139 finally:
140 await engine.dispose()
141
142
143 @pytest.fixture
144 def hub_repo(tmp_path: Path) -> Iterator[str]:
145 name = f"test-repair-commit-{tmp_path.name[-6:]}"
146 out = muse_check(
147 "hub", "repo", "create", "--name", name,
148 "--visibility", "public", "--no-init", "--hub", HUB, "--json",
149 cwd=REPO_ROOT,
150 )
151 slug = f"gabriel/{json.loads(out)['slug']}"
152 yield slug
153 muse("hub", "repo", "delete", slug, "--yes", "--hub", HUB, "--json", cwd=REPO_ROOT)
154
155
156 def test_repair_commit_fixes_spurious_signer(tmp_path: Path, hub_repo: str) -> None:
157 repo = tmp_path / "seed"
158 repo.mkdir()
159 muse_check("init", cwd=repo)
160
161 # Unique content per run so the content-addressed commit rows are not shared
162 # with another repo (safe to corrupt/repair the global row).
163 tag = f"{tmp_path.name}-{int(_time.time())}"
164 for fname, msg in [("f1.txt", "A"), ("f2.txt", "B"), ("f3.txt", "C")]:
165 (repo / fname).write_text(f"{msg}-{tag}\n")
166 muse_check("code", "add", ".", cwd=repo)
167 # Unsigned commits — signer_public_key == "" (matches the original staging state).
168 muse_check("commit", "-m", msg, "--agent-id", "test", "--model-id", "test", cwd=repo)
169 muse_check("remote", "add", "origin", f"{HUB}/{hub_repo}", cwd=repo)
170 muse_check("push", "origin", "main", cwd=repo)
171
172 head = _commit_id_by_message(repo, "C")
173 r = asyncio.run(_capture_corrupt_repair(head))
174
175 assert r["pristine_ok"], "pushed commit's serve-hash must equal its id before corruption"
176 assert r["corrupt_serve"] != head, (
177 "stamping a spurious signer must break the serve-hash — otherwise the test "
178 "is not reproducing the staging corruption"
179 )
180 assert r["result"] == {"repaired": True}, f"unexpected repair result: {r['result']}"
181 assert r["post_signer"] == "", "repair must clear the spurious signer_public_key"
182 assert r["post_serve"] == head, (
183 f"after repair the serve path must reproduce the commit id {head[:18]} — "
184 f"got {r['post_serve'][:18]}. The repair did not stick."
185 )
File History 1 commit
sha256:dc28fb2384d12a52d4b4fea7743873940b89d9d08ce298f96d0fdc8d694724d4 test+types: green the musehub suite and ratchet typing audi… Opus 4.8 minor 14 hours ago