gabriel / musehub public
test_compliance.py python
591 lines 25.2 KB
Raw
sha256:0997d6250ae6476362f6fe2025af7789f46d03df3e9f34356d5e8ee79b201923 fix(issues): use issue number as pagination cursor, not cre… Sonnet 4.6 patch 8 days ago
1 """Section 9 — Compliance & Legal Minimums.
2
3 Covers:
4 Privacy policy : exists, linked from footer, covers agent-first model.
5 Terms of Service : exists, implicit acceptance via key registration.
6 Minimum data : no unnecessary PII fields in MusehubIdentity.
7 GDPR / CCPA : GET /me/export and DELETE /me endpoints exist and work.
8 DMCA : takedown process documented.
9 OSS license audit : license-audit.md exists and covers all direct deps.
10 DB migration : 0001 (consolidated) includes training_opt_out + tos_accepted_at/tos_version.
11 Auth service : tos_accepted_at set at registration time.
12 Training opt-out : MusehubRepo.training_opt_out field exists, defaults False.
13 """
14 from __future__ import annotations
15
16 import json
17 from pathlib import Path
18 from unittest.mock import AsyncMock, MagicMock, patch
19
20 import pytest
21
22 _ROOT = Path(__file__).resolve().parents[1]
23 _MUSEHUB_PKG = _ROOT / "musehub"
24 _DOCS_LEGAL = _ROOT / "docs" / "legal"
25 _BASE_HTML = _MUSEHUB_PKG / "templates" / "musehub" / "base.html"
26 _AUTH_SVC = _MUSEHUB_PKG / "services" / "musehub_auth.py"
27 _USERS_ROUTES = _MUSEHUB_PKG / "api" / "routes" / "musehub" / "users.py"
28
29 _MIGRATION = _ROOT / "alembic" / "versions" / "0001_consolidated_schema.py"
30 _CHECKLIST = _ROOT / "docs" / "pre-launch-checklist.md"
31
32
33 # ═══════════════════════════════════════════════════════════════════════════════
34 # Privacy Policy
35 # ═══════════════════════════════════════════════════════════════════════════════
36
37 class TestPrivacyPolicy:
38 _pp = _DOCS_LEGAL / "privacy-policy.md"
39
40 def test_privacy_policy_exists(self) -> None:
41 assert self._pp.exists(), "docs/legal/privacy-policy.md must exist"
42
43 def test_privacy_policy_covers_pubkey_identity(self) -> None:
44 text = self._pp.read_text()
45 assert "public key" in text.lower() or "pubkey" in text.lower()
46
47 def test_privacy_policy_covers_agents(self) -> None:
48 text = self._pp.read_text()
49 assert "agent" in text.lower()
50
51 def test_privacy_policy_covers_training_data(self) -> None:
52 text = self._pp.read_text()
53 # Must mention training data policy
54 assert "training" in text.lower()
55
56 def test_privacy_policy_covers_training_opt_out(self) -> None:
57 text = self._pp.read_text()
58 assert "training_opt_out" in text or "opt-out" in text.lower() or "opt out" in text.lower()
59
60 def test_privacy_policy_mentions_export_endpoint(self) -> None:
61 text = self._pp.read_text()
62 assert "/me/export" in text or "export" in text.lower()
63
64 def test_privacy_policy_mentions_delete_endpoint(self) -> None:
65 text = self._pp.read_text()
66 assert "DELETE" in text or "deletion" in text.lower()
67
68 def test_privacy_policy_covers_private_repos_exclusion(self) -> None:
69 text = self._pp.read_text()
70 # Private repos must never be used for training
71 assert "private" in text.lower()
72
73 def test_privacy_policy_has_effective_date(self) -> None:
74 text = self._pp.read_text()
75 assert "Effective date" in text or "effective date" in text.lower()
76
77
78 # ═══════════════════════════════════════════════════════════════════════════════
79 # Terms of Service
80 # ═══════════════════════════════════════════════════════════════════════════════
81
82 class TestTermsOfService:
83 _tos = _DOCS_LEGAL / "terms-of-service.md"
84
85 def test_tos_exists(self) -> None:
86 assert self._tos.exists(), "docs/legal/terms-of-service.md must exist"
87
88 def test_tos_implicit_acceptance_via_key_registration(self) -> None:
89 text = self._tos.read_text()
90 # Must explain that key registration = acceptance
91 assert "key registration" in text.lower() or "registering a key" in text.lower()
92
93 def test_tos_records_tos_accepted_at(self) -> None:
94 text = self._tos.read_text()
95 assert "tos_accepted_at" in text
96
97 def test_tos_records_tos_version(self) -> None:
98 text = self._tos.read_text()
99 assert "tos_version" in text
100
101 def test_tos_agent_operator_responsibility(self) -> None:
102 text = self._tos.read_text()
103 assert "operator" in text.lower()
104
105 def test_tos_training_data_policy_section(self) -> None:
106 text = self._tos.read_text()
107 assert "training" in text.lower()
108
109 def test_tos_private_repos_never_used_for_training(self) -> None:
110 text = self._tos.read_text()
111 # The word "private" must appear in context with training
112 assert "private" in text.lower()
113
114 def test_tos_training_opt_out_mentioned(self) -> None:
115 text = self._tos.read_text()
116 assert "training_opt_out" in text
117
118 def test_tos_osi_license_condition(self) -> None:
119 text = self._tos.read_text()
120 # Must condition training use on OSI license
121 assert "osi" in text.lower() or "open-source license" in text.lower() or "open source license" in text.lower()
122
123 def test_tos_has_effective_date(self) -> None:
124 text = self._tos.read_text()
125 assert "Effective date" in text or "effective date" in text.lower()
126
127
128 # ═══════════════════════════════════════════════════════════════════════════════
129 # DMCA
130 # ═══════════════════════════════════════════════════════════════════════════════
131
132 class TestDmca:
133 _dmca = _DOCS_LEGAL / "dmca.md"
134
135 def test_dmca_exists(self) -> None:
136 assert self._dmca.exists(), "docs/legal/dmca.md must exist"
137
138 def test_dmca_has_contact_email(self) -> None:
139 text = self._dmca.read_text()
140 assert "dmca@" in text or "@musehub" in text
141
142 def test_dmca_has_response_timeline(self) -> None:
143 text = self._dmca.read_text()
144 # Must commit to a response time
145 assert "business day" in text.lower() or "days" in text.lower()
146
147 def test_dmca_covers_counter_notice(self) -> None:
148 text = self._dmca.read_text()
149 assert "counter" in text.lower()
150
151 def test_dmca_mentions_repeat_infringers(self) -> None:
152 text = self._dmca.read_text()
153 assert "repeat" in text.lower()
154
155 def test_dmca_covers_agent_operators(self) -> None:
156 text = self._dmca.read_text()
157 assert "agent" in text.lower() or "operator" in text.lower()
158
159
160 # ═══════════════════════════════════════════════════════════════════════════════
161 # OSS License Audit
162 # ═══════════════════════════════════════════════════════════════════════════════
163
164 class TestLicenseAudit:
165 _audit = _DOCS_LEGAL / "license-audit.md"
166
167 def test_license_audit_exists(self) -> None:
168 assert self._audit.exists(), "docs/legal/license-audit.md must exist"
169
170 def test_license_audit_covers_fastapi(self) -> None:
171 text = self._audit.read_text()
172 assert "fastapi" in text.lower()
173
174 def test_license_audit_covers_sqlalchemy(self) -> None:
175 text = self._audit.read_text()
176 assert "sqlalchemy" in text.lower()
177
178 def test_license_audit_covers_cryptography(self) -> None:
179 text = self._audit.read_text()
180 assert "cryptography" in text.lower()
181
182 def test_license_audit_covers_psycopg2(self) -> None:
183 text = self._audit.read_text()
184 assert "psycopg2" in text.lower()
185
186 def test_license_audit_has_review_schedule(self) -> None:
187 text = self._audit.read_text()
188 assert "review" in text.lower()
189
190 def test_all_direct_deps_are_osi_or_noted(self) -> None:
191 """Every dep row must declare 'Yes' (OSI) or have an explanation."""
192 text = self._audit.read_text()
193 # We just check that 'OSI approved' header is present and 'Yes' appears
194 assert "OSI approved" in text or "osi" in text.lower()
195
196
197 # ═══════════════════════════════════════════════════════════════════════════════
198 # Footer — legal links in base.html
199 # ═══════════════════════════════════════════════════════════════════════════════
200
201 class TestLegalFooter:
202 def test_footer_exists_in_base_html(self) -> None:
203 text = _BASE_HTML.read_text()
204 assert "site-footer" in text or "<footer" in text
205
206 def test_footer_has_privacy_link(self) -> None:
207 text = _BASE_HTML.read_text()
208 assert "privacy" in text.lower()
209
210 def test_footer_has_terms_link(self) -> None:
211 text = _BASE_HTML.read_text()
212 assert "terms" in text.lower() or "Terms" in text
213
214 def test_footer_has_dmca_link(self) -> None:
215 text = _BASE_HTML.read_text()
216 assert "dmca" in text.lower() or "DMCA" in text
217
218
219 # ═══════════════════════════════════════════════════════════════════════════════
220 # DB Model — compliance fields
221 # ═══════════════════════════════════════════════════════════════════════════════
222
223 class TestDbComplianceFields:
224 def test_musehub_repo_has_training_opt_out(self) -> None:
225 from musehub.db.musehub_repo_models import MusehubRepo
226 assert hasattr(MusehubRepo, "training_opt_out")
227
228 def test_training_opt_out_defaults_false(self) -> None:
229 from musehub.db.musehub_repo_models import MusehubRepo
230 col = MusehubRepo.__table__.c["training_opt_out"]
231 # default is False
232 assert col.default is not None or col.server_default is not None or col.nullable is False
233
234 def test_musehub_identity_has_tos_accepted_at(self) -> None:
235 from musehub.db.musehub_identity_models import MusehubIdentity
236 assert hasattr(MusehubIdentity, "tos_accepted_at")
237
238 def test_musehub_identity_has_tos_version(self) -> None:
239 from musehub.db.musehub_identity_models import MusehubIdentity
240 assert hasattr(MusehubIdentity, "tos_version")
241
242
243 # ═══════════════════════════════════════════════════════════════════════════════
244 # Alembic migration 0001 (consolidated)
245 # ═══════════════════════════════════════════════════════════════════════════════
246
247 class TestMigration0023:
248 def test_migration_file_exists(self) -> None:
249 assert _MIGRATION.exists(), "alembic/versions/0001_consolidated_schema.py must exist"
250
251 def test_revision_is_0023(self) -> None:
252 src = _MIGRATION.read_text()
253 assert 'revision = "0001"' in src
254
255 def test_down_revision_is_0022(self) -> None:
256 src = _MIGRATION.read_text()
257 assert 'down_revision = None' in src
258
259 def test_adds_training_opt_out_to_repos(self) -> None:
260 src = _MIGRATION.read_text()
261 assert "training_opt_out" in src
262 assert "musehub_repos" in src
263
264 def test_adds_tos_accepted_at_to_identities(self) -> None:
265 src = _MIGRATION.read_text()
266 assert "tos_accepted_at" in src
267 assert "musehub_identities" in src
268
269 def test_adds_tos_version_to_identities(self) -> None:
270 src = _MIGRATION.read_text()
271 assert "tos_version" in src
272
273 def test_has_downgrade(self) -> None:
274 src = _MIGRATION.read_text()
275 assert "def downgrade" in src
276 assert "drop_table" in src
277
278
279 # ═══════════════════════════════════════════════════════════════════════════════
280 # Auth service — tos_accepted_at at registration
281 # ═══════════════════════════════════════════════════════════════════════════════
282
283 class TestAuthTosRecording:
284 def test_auth_sets_tos_accepted_at_on_registration(self) -> None:
285 src = _AUTH_SVC.read_text()
286 assert "tos_accepted_at" in src
287
288 def test_auth_sets_tos_version_on_registration(self) -> None:
289 src = _AUTH_SVC.read_text()
290 assert "tos_version" in src
291
292 def test_tos_version_is_1_0(self) -> None:
293 src = _AUTH_SVC.read_text()
294 assert '"1.0"' in src or "'1.0'" in src
295
296 def test_tos_accepted_at_set_at_identity_creation(self) -> None:
297 src = _AUTH_SVC.read_text()
298 # tos_accepted_at should be passed into MusehubIdentity constructor
299 assert "MusehubIdentity(" in src
300 # After MusehubIdentity( the tos_accepted_at should appear before the next session.add
301 idx_construct = src.index("MusehubIdentity(")
302 idx_add = src.index("session.add(identity)", idx_construct)
303 segment = src[idx_construct:idx_add]
304 assert "tos_accepted_at" in segment
305
306
307 # ═══════════════════════════════════════════════════════════════════════════════
308 # GDPR endpoints — source-level checks
309 # ═══════════════════════════════════════════════════════════════════════════════
310
311 class TestGdprEndpointsExist:
312 def test_export_endpoint_defined(self) -> None:
313 src = _USERS_ROUTES.read_text()
314 assert "/me/export" in src
315
316 def test_delete_endpoint_defined(self) -> None:
317 src = _USERS_ROUTES.read_text()
318 assert '"/me"' in src
319 assert "delete" in src.lower()
320
321 def test_export_requires_auth(self) -> None:
322 src = _USERS_ROUTES.read_text()
323 # export endpoint must use require_valid_token
324 # Find the block around /me/export
325 idx = src.index("/me/export")
326 segment = src[max(0, idx - 200):idx + 500]
327 assert "require_valid_token" in segment
328
329 def test_delete_requires_auth(self) -> None:
330 src = _USERS_ROUTES.read_text()
331 # Find the delete /me block — look for HTTP_204_NO_CONTENT (unique to delete)
332 assert "HTTP_204_NO_CONTENT" in src
333 idx = src.index("HTTP_204_NO_CONTENT")
334 segment = src[max(0, idx - 500):idx + 1000]
335 assert "require_valid_token" in segment
336
337 def test_export_returns_identity_data(self) -> None:
338 src = _USERS_ROUTES.read_text()
339 # Export must include identity, keys, repos, commits
340 assert '"identity"' in src or "\"identity\"" in src
341 assert '"keys"' in src or "\"keys\"" in src
342 assert '"repos"' in src or "\"repos\"" in src
343 assert '"commits"' in src or "\"commits\"" in src
344
345 def test_export_includes_tos_acceptance(self) -> None:
346 src = _USERS_ROUTES.read_text()
347 # Export response must include tos_accepted_at
348 assert "tos_accepted_at" in src
349
350 def test_delete_hard_deletes_auth_keys(self) -> None:
351 src = _USERS_ROUTES.read_text()
352 # Deletion of auth keys must happen in the delete endpoint
353 assert "MusehubAuthKey" in src
354 assert "delete(" in src or "Delete(" in src
355
356 def test_export_schema_version_field(self) -> None:
357 src = _USERS_ROUTES.read_text()
358 assert "schema_version" in src
359
360 def test_delete_returns_204(self) -> None:
361 src = _USERS_ROUTES.read_text()
362 assert "HTTP_204_NO_CONTENT" in src
363
364 def test_gdpr_import_added_to_users(self) -> None:
365 src = _USERS_ROUTES.read_text()
366 assert "MusehubAuthKey" in src
367
368
369 # ═══════════════════════════════════════════════════════════════════════════════
370 # GDPR endpoint integration — unit tests with mocked DB
371 # ═══════════════════════════════════════════════════════════════════════════════
372
373 class TestGdprExportUnit:
374 """Test GET /api/me/export response structure."""
375
376 @pytest.mark.asyncio
377 async def test_export_response_structure(self) -> None:
378 from datetime import datetime, timezone
379 from musehub.api.routes.musehub.users import export_my_data
380 from musehub.auth.dependencies import TokenClaims
381
382 now = datetime.now(timezone.utc)
383
384 mock_identity = MagicMock()
385 mock_identity.identity_id = "id-123"
386 mock_identity.handle = "gabriel"
387 mock_identity.identity_type = "human"
388 mock_identity.display_name = "Gabriel"
389 mock_identity.bio = "Music maker"
390 mock_identity.email = None
391 mock_identity.website_url = None
392 mock_identity.location = None
393 mock_identity.created_at = now
394 mock_identity.tos_accepted_at = now
395 mock_identity.tos_version = "1.0"
396
397 mock_key = MagicMock()
398 mock_key.key_id = "key-1"
399 mock_key.algorithm = "ed25519"
400 mock_key.fingerprint = "abc123"
401 mock_key.label = "main"
402 mock_key.created_at = now
403 mock_key.last_used_at = now
404
405 mock_repo = MagicMock()
406 mock_repo.repo_id = "repo-1"
407 mock_repo.name = "test-repo"
408 mock_repo.slug = "test-repo"
409 mock_repo.visibility = "public"
410 mock_repo.description = ""
411 mock_repo.tags = []
412 mock_repo.training_opt_out = False
413 mock_repo.created_at = now
414
415 mock_commit = MagicMock()
416 mock_commit.commit_id = "c-1"
417 mock_commit.repo_id = "repo-1"
418 mock_commit.branch = "main"
419 mock_commit.message = "init"
420 mock_commit.timestamp = now
421
422 # Mock DB session
423 db = AsyncMock()
424
425 def make_result(obj_or_list: MagicMock | list[MagicMock]) -> None:
426 r = MagicMock()
427 if isinstance(obj_or_list, list):
428 r.scalars.return_value.all.return_value = obj_or_list
429 else:
430 r.scalar_one_or_none.return_value = obj_or_list
431 return r
432
433 db.execute = AsyncMock(side_effect=[
434 make_result(mock_identity), # identity query
435 make_result([mock_key]), # keys query
436 make_result([mock_repo]), # repos query
437 make_result([mock_commit]), # commits query
438 ])
439
440 claims = MagicMock(spec=TokenClaims)
441 claims.identity_id = "id-123"
442
443 import json as _json
444 response = await export_my_data(claims=claims, db=db)
445 result = _json.loads(response.body)
446
447 assert result["schema_version"] == "1.0"
448 assert result["identity"]["handle"] == "gabriel"
449 assert result["identity"]["tos_version"] == "1.0"
450 assert len(result["keys"]) == 1
451 assert result["keys"][0]["algorithm"] == "ed25519"
452 assert len(result["repos"]) == 1
453 assert result["repos"][0]["training_opt_out"] is False
454 assert len(result["commits"]) == 1
455
456 @pytest.mark.asyncio
457 async def test_export_404_when_identity_missing(self) -> None:
458 from musehub.api.routes.musehub.users import export_my_data
459 from musehub.auth.dependencies import TokenClaims
460 from fastapi import HTTPException
461
462 db = AsyncMock()
463 result = MagicMock()
464 result.scalar_one_or_none.return_value = None
465 db.execute = AsyncMock(return_value=result)
466
467 claims = MagicMock(spec=TokenClaims)
468 claims.identity_id = "missing-id"
469
470 with pytest.raises(HTTPException) as exc_info:
471 await export_my_data(claims=claims, db=db)
472 assert exc_info.value.status_code == 404
473
474
475 class TestGdprDeleteUnit:
476 """Test DELETE /api/me endpoint."""
477
478 @pytest.mark.asyncio
479 async def test_delete_calls_commit(self) -> None:
480 from datetime import datetime, timezone
481 from musehub.api.routes.musehub.users import delete_my_account
482 from musehub.auth.dependencies import TokenClaims
483
484 now = datetime.now(timezone.utc)
485
486 mock_identity = MagicMock()
487 mock_identity.identity_id = "id-123"
488 mock_identity.handle = "gabriel"
489 mock_identity.deleted_at = None
490
491 db = AsyncMock()
492
493 identity_result = MagicMock()
494 identity_result.scalar_one_or_none.return_value = mock_identity
495
496 db.execute = AsyncMock(return_value=MagicMock())
497 # First call returns identity, subsequent calls (delete keys, update repos) return MagicMock
498 call_count = 0
499
500 async def execute_side_effect(stmt: MagicMock) -> None:
501 nonlocal call_count
502 call_count += 1
503 if call_count == 1:
504 return identity_result
505 return MagicMock()
506
507 db.execute = execute_side_effect
508 db.commit = AsyncMock()
509
510 claims = MagicMock(spec=TokenClaims)
511 claims.identity_id = "id-123"
512
513 await delete_my_account(claims=claims, db=db)
514
515 # commit must have been called
516 db.commit.assert_awaited_once()
517
518 @pytest.mark.asyncio
519 async def test_delete_sets_deleted_at_on_identity(self) -> None:
520 from datetime import datetime, timezone
521 from musehub.api.routes.musehub.users import delete_my_account
522 from musehub.auth.dependencies import TokenClaims
523
524 mock_identity = MagicMock()
525 mock_identity.identity_id = "id-123"
526 mock_identity.handle = "gabriel"
527 mock_identity.deleted_at = None
528
529 db = AsyncMock()
530 identity_result = MagicMock()
531 identity_result.scalar_one_or_none.return_value = mock_identity
532
533 call_count = 0
534
535 async def execute_side_effect(stmt: MagicMock) -> None:
536 nonlocal call_count
537 call_count += 1
538 if call_count == 1:
539 return identity_result
540 return MagicMock()
541
542 db.execute = execute_side_effect
543 db.commit = AsyncMock()
544
545 claims = MagicMock(spec=TokenClaims)
546 claims.identity_id = "id-123"
547
548 await delete_my_account(claims=claims, db=db)
549
550 # identity.deleted_at must have been set
551 assert mock_identity.deleted_at is not None
552
553 @pytest.mark.asyncio
554 async def test_delete_404_when_identity_missing(self) -> None:
555 from musehub.api.routes.musehub.users import delete_my_account
556 from musehub.auth.dependencies import TokenClaims
557 from fastapi import HTTPException
558
559 db = AsyncMock()
560 result = MagicMock()
561 result.scalar_one_or_none.return_value = None
562 db.execute = AsyncMock(return_value=result)
563
564 claims = MagicMock(spec=TokenClaims)
565 claims.identity_id = "missing-id"
566
567 with pytest.raises(HTTPException) as exc_info:
568 await delete_my_account(claims=claims, db=db)
569 assert exc_info.value.status_code == 404
570
571
572 # ═══════════════════════════════════════════════════════════════════════════════
573 # Checklist updated
574 # ═══════════════════════════════════════════════════════════════════════════════
575
576 class TestChecklistSection9:
577 def test_checklist_section9_exists(self) -> None:
578 text = _CHECKLIST.read_text()
579 assert "## 9. Compliance" in text
580
581 def test_checklist_has_six_items(self) -> None:
582 text = _CHECKLIST.read_text()
583 # Find the section 9 block
584 start = text.index("## 9. Compliance")
585 # End at next ## heading
586 try:
587 end = text.index("\n## ", start + 1)
588 except ValueError:
589 end = len(text)
590 section = text[start:end]
591 assert section.count("- [x]") >= 6, "All 6 section 9 items should be checked"
File History 1 commit
sha256:0997d6250ae6476362f6fe2025af7789f46d03df3e9f34356d5e8ee79b201923 fix(issues): use issue number as pagination cursor, not cre… Sonnet 4.6 patch 8 days ago