gabriel / muse public
test_migrate_force_resign.py python
366 lines 15.6 KB
Raw
1 """TDD — migrate --force-resign must re-sign commits that have stale signatures.
2
3 After ``muse code migrate`` rewrites commit IDs, any existing signature becomes
4 invalid (the signature covers the old commit_id, not the new one). Running
5 ``migrate(force_resign=True)`` must detect this and re-sign every commit,
6 returning ``commits_signed == N`` and leaving the repo in a verifiable state.
7
8 Tests
9 -----
10 FR1 (RED) force_resign=True returns commits_signed == N for a repo whose
11 commits all carry stale signatures (migrated but not re-signed).
12 FR2 (RED) After force-resign the written commits pass Ed25519 verification.
13 FR3 (GREEN) force_resign=False (default) does NOT re-sign already-signed commits.
14 FR4 (RED) force_resign=True is idempotent — running it twice still returns
15 commits_signed == N on the second run (existing valid sigs replaced).
16 FR5 (RED) A repo with a mix of unsigned commits and stale-signed commits:
17 force_resign re-signs only the signed ones (unsigned ones are also
18 signed, just via the ``is_unsigned`` path — commits_signed == total).
19 """
20 from __future__ import annotations
21
22 import datetime
23 import json
24 import pathlib
25
26 import msgpack
27 import pytest
28 from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey
29
30 from muse.core.migrate import migrate
31 from muse.core.paths import commits_dir, muse_dir, ref_path, snapshots_dir
32 from muse.core.provenance import (
33 encode_public_key,
34 provenance_payload,
35 sign_commit_ed25519,
36 verify_commit_ed25519,
37 )
38 from muse.core.types import decode_pubkey
39 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
40 from muse.core.transport import SigningIdentity
41 from muse.core.types import b64url_encode, blob_id, long_id, split_id
42
43 _CommitDict = dict[str, str | int | None | list[str]] # raw commit msgpack dict
44
45
46 # ---------------------------------------------------------------------------
47 # Constants
48 # ---------------------------------------------------------------------------
49
50 _AT = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
51 _AT_ISO = _AT.isoformat()
52
53
54 # ---------------------------------------------------------------------------
55 # Helpers
56 # ---------------------------------------------------------------------------
57
58 def _make_key() -> Ed25519PrivateKey:
59 return Ed25519PrivateKey.generate()
60
61
62 def _make_identity(handle: str = "gabriel") -> SigningIdentity:
63 return SigningIdentity(handle=handle, private_key=_make_key())
64
65
66 def _init_repo(tmp_path: pathlib.Path) -> pathlib.Path:
67 muse = muse_dir(tmp_path)
68 for sub in ("commits/sha256", "snapshots/sha256", "objects/sha256", "refs/heads", "remotes"):
69 (muse / sub).mkdir(parents=True)
70 (muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
71 (muse / "repo.json").write_text(
72 json.dumps({"repo_id": blob_id(b"test-repo"), "domain": "code"}),
73 encoding="utf-8",
74 )
75 return tmp_path
76
77
78 def _write_snap(repo: pathlib.Path, tag: str = "a") -> str:
79 manifest = {f"file_{tag}.py": long_id("a" * 64)}
80 sid = compute_snapshot_id(manifest)
81 hex_id = long_id(sid, strip=True)
82 path = snapshots_dir(repo) / "sha256" / f"{hex_id}.msgpack"
83 path.parent.mkdir(parents=True, exist_ok=True)
84 path.write_bytes(msgpack.packb(
85 {"snapshot_id": sid, "manifest": manifest, "created_at": _AT_ISO},
86 use_bin_type=True,
87 ))
88 return sid
89
90
91 def _sign_over_id(commit_id: str, private_key: Ed25519PrivateKey, author: str = "gabriel") -> str:
92 """Produce a valid Ed25519 signature whose payload references ``commit_id``."""
93 payload = provenance_payload(
94 commit_id=commit_id,
95 author=author,
96 committed_at=_AT_ISO,
97 )
98 return sign_commit_ed25519(payload, private_key)
99
100
101 def _pub_str(private_key: Ed25519PrivateKey) -> str:
102 _, pub = encode_public_key(private_key)
103 return pub
104
105
106 def _write_commit_raw(
107 repo: pathlib.Path,
108 *,
109 commit_id: str,
110 snapshot_id: str,
111 message: str,
112 parent_id: str | None = None,
113 signature: str = "",
114 signer_public_key: str = "",
115 ) -> None:
116 """Write a raw commit dict to the canonical commits/sha256/ location."""
117 raw = {
118 "commit_id": commit_id,
119 "branch": "main",
120 "snapshot_id": snapshot_id,
121 "message": message,
122 "committed_at": _AT_ISO,
123 "parent_commit_id": parent_id,
124 "parent2_commit_id": None,
125 "author": "gabriel",
126 "metadata": {},
127 "structured_delta": None,
128 "sem_ver_bump": "none",
129 "breaking_changes": [],
130 "agent_id": "",
131 "model_id": "",
132 "toolchain_id": "",
133 "prompt_hash": "",
134 "signature": signature,
135 "signer_public_key": signer_public_key,
136 "signer_key_id": "",
137 "format_version": 8,
138 "reviewed_by": [],
139 "test_runs": 0,
140 "labels": [],
141 "status": "",
142 "notes": [],
143 "score": None,
144 }
145 hex_id = long_id(commit_id, strip=True)
146 path = commits_dir(repo) / "sha256" / f"{hex_id}.msgpack"
147 path.parent.mkdir(parents=True, exist_ok=True)
148 path.write_bytes(msgpack.packb(raw, use_bin_type=True))
149
150
151 def _set_ref(repo: pathlib.Path, branch: str, commit_id: str) -> None:
152 path = ref_path(repo, branch)
153 path.parent.mkdir(parents=True, exist_ok=True)
154 path.write_text(commit_id + "\n", encoding="utf-8")
155
156
157 def _read_raw_commit(repo: pathlib.Path, commit_id: str) -> _CommitDict:
158 hex_id = long_id(commit_id, strip=True)
159 path = commits_dir(repo) / "sha256" / f"{hex_id}.msgpack"
160 return msgpack.unpackb(path.read_bytes(), raw=False)
161
162
163 def _build_stale_signed_chain(
164 repo: pathlib.Path, signing_key: Ed25519PrivateKey, n: int = 3
165 ) -> list[str]:
166 """Build a chain of N commits each signed over their own commit_id.
167
168 The key twist: we sign each commit over a STALE commit_id — a fake ID
169 that differs from the canonical ``compute_commit_id`` result. This
170 simulates the post-migration state where the ID was rewritten but the
171 signature was not updated.
172
173 Returns list of (canonical) commit_ids written to disk.
174 """
175 pub = _pub_str(signing_key)
176 commit_ids: list[str] = []
177 parent_id: str | None = None
178
179 for i in range(n):
180 sid = _write_snap(repo, tag=str(i))
181 # Canonical ID (what the record will be stored as)
182 canonical_id = compute_commit_id(
183 parent_ids=[parent_id] if parent_id else [],
184 snapshot_id=sid,
185 message=f"commit {i}",
186 committed_at_iso=_AT_ISO,
187 author="gabriel",
188 signer_public_key=pub,
189 )
190 # Stale ID — blob of something different; the signature covers THIS, not canonical_id
191 stale_id = blob_id(f"stale-{i}".encode())
192 stale_sig = _sign_over_id(stale_id, signing_key)
193
194 _write_commit_raw(
195 repo,
196 commit_id=canonical_id,
197 snapshot_id=sid,
198 message=f"commit {i}",
199 parent_id=parent_id,
200 signature=stale_sig,
201 signer_public_key=pub,
202 )
203 commit_ids.append(canonical_id)
204 parent_id = canonical_id
205
206 _set_ref(repo, "main", commit_ids[-1])
207 return commit_ids
208
209
210 # ══════════════════════════════════════════════════════════════════════════════
211 # FR1 — force_resign=True returns commits_signed == N
212 # ══════════════════════════════════════════════════════════════════════════════
213
214 def test_fr1_force_resign_returns_commits_signed(tmp_path: pathlib.Path) -> None:
215 """migrate(force_resign=True) must return commits_signed == N when commits
216 carry stale signatures (signed over a different commit_id).
217
218 This test is RED until the --force-resign re-signing path is fixed.
219 """
220 repo = _init_repo(tmp_path)
221 identity = _make_identity()
222 n = 3
223 _build_stale_signed_chain(repo, identity.private_key, n)
224
225 result = migrate(repo, signing_identity=identity, force_resign=True)
226
227 assert result.commits_signed == n, (
228 f"Expected commits_signed={n}, got {result.commits_signed}. "
229 "force_resign=True must re-sign every commit that carries a stale signature."
230 )
231
232
233 # ══════════════════════════════════════════════════════════════════════════════
234 # FR2 — commits pass Ed25519 verification after force-resign
235 # ══════════════════════════════════════════════════════════════════════════════
236
237 def test_fr2_commits_verify_after_force_resign(tmp_path: pathlib.Path) -> None:
238 """After migrate(force_resign=True), every written commit must have a
239 signature that verifies correctly against its stored commit_id.
240 """
241 repo = _init_repo(tmp_path)
242 identity = _make_identity()
243 commit_ids = _build_stale_signed_chain(repo, identity.private_key, 3)
244
245 migrate(repo, signing_identity=identity, force_resign=True)
246
247 # After migrate, commit IDs may have changed — discover actual stored IDs.
248 # For simplicity, verify all commits in commits/sha256/
249 failed: list[str] = []
250 for commit_path in sorted((commits_dir(repo) / "sha256").glob("**/*.msgpack")):
251 raw = msgpack.unpackb(commit_path.read_bytes(), raw=False)
252 sig = raw.get("signature", "")
253 pub_key_str = raw.get("signer_public_key", "")
254 cid = raw.get("commit_id", "")
255 if not sig or not pub_key_str or not cid:
256 continue
257 payload = provenance_payload(
258 commit_id=cid,
259 author=raw.get("author", ""),
260 agent_id=raw.get("agent_id", ""),
261 model_id=raw.get("model_id", ""),
262 toolchain_id=raw.get("toolchain_id", ""),
263 prompt_hash=raw.get("prompt_hash", ""),
264 committed_at=raw.get("committed_at", ""),
265 )
266 try:
267 _, pub_raw = decode_pubkey(pub_key_str)
268 except Exception:
269 failed.append(cid)
270 continue
271 if not verify_commit_ed25519(payload, sig, pub_raw):
272 failed.append(cid)
273
274 assert not failed, (
275 f"{len(failed)} commit(s) still have invalid signatures after force-resign: "
276 f"{failed[:3]}"
277 )
278
279
280 # ══════════════════════════════════════════════════════════════════════════════
281 # FR3 — force_resign=False does NOT re-sign commits that are already signed
282 # ══════════════════════════════════════════════════════════════════════════════
283
284 def test_fr3_no_resign_without_flag(tmp_path: pathlib.Path) -> None:
285 """Without force_resign, migrate must not re-sign commits that already have
286 a signature (even if that signature is stale). commits_signed must be 0.
287 """
288 repo = _init_repo(tmp_path)
289 identity = _make_identity()
290 _build_stale_signed_chain(repo, identity.private_key, 3)
291
292 result = migrate(repo, signing_identity=identity, force_resign=False)
293
294 assert result.commits_signed == 0, (
295 f"Expected commits_signed=0 without --force-resign, got {result.commits_signed}."
296 )
297
298
299 # ══════════════════════════════════════════════════════════════════════════════
300 # FR4 — force_resign=True is idempotent (re-sign already-valid signatures)
301 # ══════════════════════════════════════════════════════════════════════════════
302
303 def test_fr4_force_resign_idempotent(tmp_path: pathlib.Path) -> None:
304 """Running migrate(force_resign=True) a second time on an already-resigned
305 repo must still report commits_signed == N — the flag is unconditional.
306 """
307 repo = _init_repo(tmp_path)
308 identity = _make_identity()
309 n = 2
310 _build_stale_signed_chain(repo, identity.private_key, n)
311
312 # First run — fixes stale sigs
313 r1 = migrate(repo, signing_identity=identity, force_resign=True)
314 assert r1.commits_signed == n, f"First run: expected {n}, got {r1.commits_signed}"
315
316 # Second run — sigs are now valid, but force_resign must re-sign anyway
317 r2 = migrate(repo, signing_identity=identity, force_resign=True)
318 assert r2.commits_signed == n, (
319 f"Second run: expected {n} (idempotent), got {r2.commits_signed}. "
320 "force_resign=True must always re-sign, regardless of current sig validity."
321 )
322
323
324 # ══════════════════════════════════════════════════════════════════════════════
325 # FR5 — force_resign=True re-signs the stale-signed commits
326 # ══════════════════════════════════════════════════════════════════════════════
327
328 # ══════════════════════════════════════════════════════════════════════════════
329 # FR6 — force_resign=True with signing_identity=None raises ValueError
330 # ══════════════════════════════════════════════════════════════════════════════
331
332 def test_fr6_force_resign_without_identity_raises(tmp_path: pathlib.Path) -> None:
333 """migrate(force_resign=True, signing_identity=None) must raise ValueError.
334
335 The root cause of the production bug: when the repo has no hub configured,
336 get_signing_identity() returns None. migrate() then silently returns
337 commits_signed=0 — a silent no-op that leaves all signatures invalid.
338
339 The fix: migrate() must raise ValueError when force_resign=True but no
340 signing identity is provided, so the caller gets a clear error instead of
341 a mysterious zero count.
342 """
343 repo = _init_repo(tmp_path)
344 identity = _make_identity()
345 _build_stale_signed_chain(repo, identity.private_key, 2)
346
347 with pytest.raises(ValueError, match="force_resign"):
348 migrate(repo, signing_identity=None, force_resign=True)
349
350
351 def test_fr5_force_resign_covers_stale_signed_commits(tmp_path: pathlib.Path) -> None:
352 """force_resign=True must re-sign commits that have stale signatures
353 (not just unsigned commits). commits_signed must equal the total count.
354 """
355 repo = _init_repo(tmp_path)
356 identity = _make_identity()
357 n = 4
358 _build_stale_signed_chain(repo, identity.private_key, n)
359
360 result = migrate(repo, signing_identity=identity, force_resign=True)
361
362 # All commits (stale-signed) must be re-signed
363 assert result.commits_signed == n, (
364 f"Expected all {n} stale-signed commits to be re-signed, "
365 f"got commits_signed={result.commits_signed}."
366 )
File History 1 commit