gabriel / musehub public
search.py python
231 lines 8.4 KB
Raw
sha256:3c58668648c7323bb9f5c6881cfe6a3f14fc93fcb73b537d253732952a5bf8bf chore: bump version to 0.2.0rc12 Sonnet 4.6 patch 8 days ago
1
2 """MuseHub search route handlers.
3
4 Endpoints:
5 GET /musehub/search?q={q}&mode={mode}
6 — Global cross-repo commit search (keyword or pattern).
7
8 GET /repos/{repo_id}/search?q={q}&mode={mode}
9 — In-repo commit search with four modes:
10 property — filter by musical properties (harmony, rhythm, etc.)
11 ask — natural-language query (keyword extraction + overlap scoring)
12 keyword — keyword/phrase overlap scored search
13 pattern — substring pattern match against message and branch name
14
15 Authentication: MSign token required (inherited from musehub router).
16
17 """
18
19 import json
20 import logging
21 from datetime import datetime
22
23 from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
24 from fastapi.responses import Response
25 from sqlalchemy.ext.asyncio import AsyncSession
26
27 from musehub.auth.dependencies import TokenClaims, optional_token
28 from musehub.db import get_db
29 from musehub.models.musehub import GlobalSearchResult, SearchResponse
30 from musehub.rate_limits import limiter, SEARCH_LIMIT
31
32 from musehub.services import musehub_repository, musehub_search, musehub_discover
33
34 logger = logging.getLogger(__name__)
35
36 router = APIRouter()
37
38
39 _VALID_MODES = frozenset({"property", "ask", "keyword", "pattern"})
40
41 _GLOBAL_VALID_MODES = frozenset({"keyword", "pattern"})
42 _REPO_VALID_MODES = frozenset({"property", "ask", "keyword", "pattern"})
43
44
45 @router.get(
46 "/search/repos",
47 summary="Discover repos by meaning or name",
48 operation_id="searchRepos",
49 )
50 @limiter.limit(SEARCH_LIMIT)
51 async def search_repos(
52 request: Request,
53 q: str = Query(..., min_length=1, max_length=500, description="Natural-language or keyword query"),
54 limit: int = Query(20, ge=1, le=50, description="Max results"),
55 db: AsyncSession = Depends(get_db),
56 _: TokenClaims | None = Depends(optional_token),
57 ) -> Response:
58 """Discover public repos by name, slug, description, or tags.
59
60 Returns ``{ query, repos: ExploreRepoResult[] }``.
61 """
62 text_results = await musehub_discover.search_repos_by_text(db, q, limit=limit)
63 repos = [r.model_dump(mode="json") for r in text_results]
64 logger.info("🔍 search/repos q=%r → %d results", q, len(repos))
65 return Response(
66 content=json.dumps({"query": q, "repos": repos}),
67 media_type="application/json",
68 )
69
70
71 @router.get(
72 "/search",
73 response_model=GlobalSearchResult,
74 operation_id="globalSearch",
75 summary="Global cross-repo search across all public MuseHub repos",
76 )
77 @limiter.limit(SEARCH_LIMIT)
78 async def global_search(
79 request: Request,
80 q: str = Query(..., min_length=1, max_length=500, description="Search query string"),
81 mode: str = Query("keyword", description="Search mode: 'keyword' or 'pattern'"),
82 cursor: str | None = Query(None, description="Opaque pagination cursor from previous nextCursor"),
83 limit: int = Query(10, ge=1, le=50, description="Number of repo groups per page"),
84 db: AsyncSession = Depends(get_db),
85 _: TokenClaims | None = Depends(optional_token),
86 ) -> GlobalSearchResult:
87 """Search commit messages across all public MuseHub repos.
88
89 Results are grouped by repo — each group contains up to 20 matching
90 commits ordered newest-first with repo-level metadata (name, owner).
91
92 Only ``visibility='public'`` repos are searched. Private repos are
93 excluded at the persistence layer regardless of caller identity.
94
95 Pass ``nextCursor`` from a previous response as ``?cursor=`` to advance to the
96 next page of repo-groups. A null ``nextCursor`` means this is the last page.
97
98 Supported search modes:
99 - ``keyword``: OR-match whitespace-split terms against commit messages and
100 repo names (case-insensitive).
101 - ``pattern``: raw SQL LIKE pattern applied to commit messages only.
102 Use ``%`` as wildcard (e.g. ``q=%minor%``).
103
104 Content negotiation: this endpoint always returns JSON. The companion
105 HTML page at ``GET /search`` renders the browser UI shell.
106 """
107 effective_mode = mode if mode in _GLOBAL_VALID_MODES else "keyword"
108 if effective_mode != mode:
109 logger.warning("⚠️ Unknown search mode %r — falling back to 'keyword'", mode)
110
111 result = await musehub_repository.global_search(
112 db,
113 query=q,
114 mode=effective_mode,
115 cursor=cursor,
116 limit=limit,
117 )
118 logger.info(
119 "✅ Global search q=%r mode=%s cursor=%r → %d repo groups",
120 q,
121 effective_mode,
122 cursor,
123 len(result.groups),
124 )
125 return result
126
127
128 @router.get(
129 "/repos/{repo_id}/search",
130 response_model=SearchResponse,
131 operation_id="searchRepo",
132 summary="Search Muse repo commits",
133 )
134 @limiter.limit(SEARCH_LIMIT)
135 async def search_repo(
136 request: Request,
137 repo_id: str,
138 q: str = Query("", description="Search query — interpreted by the selected mode"),
139 mode: str = Query("keyword", description="Search mode: property | ask | keyword | pattern"),
140 harmony: str | None = Query(None, description="[property mode] Harmony filter"),
141 rhythm: str | None = Query(None, description="[property mode] Rhythm filter"),
142 melody: str | None = Query(None, description="[property mode] Melody filter"),
143 structure: str | None = Query(None, description="[property mode] Structure filter"),
144 dynamic: str | None = Query(None, description="[property mode] Dynamics filter"),
145 emotion: str | None = Query(None, description="[property mode] Emotion filter"),
146 since: datetime | None = Query(None, description="Only include commits on or after this ISO datetime"),
147 until: datetime | None = Query(None, description="Only include commits on or before this ISO datetime"),
148 limit: int = Query(20, ge=1, le=200, description="Maximum results to return"),
149 db: AsyncSession = Depends(get_db),
150 claims: TokenClaims | None = Depends(optional_token),
151 ) -> SearchResponse:
152 """Search commit history using one of four musical search modes.
153
154 The ``mode`` parameter selects the search algorithm:
155
156 - **property** — filter commits by musical properties using AND logic.
157 Supply any of ``harmony``, ``rhythm``, ``melody``, ``structure``,
158 ``dynamic``, ``emotion`` query params. Accepts ``key=low-high`` range
159 syntax (e.g. ``rhythm=tempo=120-130``).
160
161 - **ask** — treat ``q`` as a natural-language question. Stop-words are
162 stripped; remaining keywords are scored by overlap coefficient.
163
164 - **keyword** — score commits by keyword overlap against ``q``.
165 Useful for exact term search (e.g. ``q=Fmin_jazz_bassline``).
166
167 - **pattern** — case-insensitive substring match of ``q`` against commit
168 messages and branch names. No scoring; matched rows returned newest-first.
169
170 Returns 404 if the repo does not exist. Returns an empty ``matches`` list
171 when no commits satisfy the criteria (not a 404).
172 """
173 if mode not in _REPO_VALID_MODES:
174 raise HTTPException(
175 status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
176 detail=f"Invalid mode '{mode}'. Must be one of: {sorted(_REPO_VALID_MODES)}",
177 )
178
179 repo = await musehub_repository.get_repo(db, repo_id)
180 if repo is None:
181 raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")
182 if repo.visibility != "public" and claims is None:
183 raise HTTPException(
184 status_code=status.HTTP_401_UNAUTHORIZED,
185 detail="Authentication required to access private repos.",
186 headers={"WWW-Authenticate": 'MSign realm="musehub"'},
187 )
188
189 if mode == "property":
190 return await musehub_search.search_by_property(
191 db,
192 repo_id=repo_id,
193 harmony=harmony,
194 rhythm=rhythm,
195 melody=melody,
196 structure=structure,
197 dynamic=dynamic,
198 emotion=emotion,
199 since=since,
200 until=until,
201 limit=limit,
202 )
203
204 if mode == "ask":
205 return await musehub_search.search_by_ask(
206 db,
207 repo_id=repo_id,
208 question=q,
209 since=since,
210 until=until,
211 limit=limit,
212 )
213
214 if mode == "keyword":
215 return await musehub_search.search_by_keyword(
216 db,
217 repo_id=repo_id,
218 keyword=q,
219 since=since,
220 until=until,
221 limit=limit,
222 )
223
224 return await musehub_search.search_by_pattern(
225 db,
226 repo_id=repo_id,
227 pattern=q,
228 since=since,
229 until=until,
230 limit=limit,
231 )
File History 1 commit
sha256:3c58668648c7323bb9f5c6881cfe6a3f14fc93fcb73b537d253732952a5bf8bf chore: bump version to 0.2.0rc12 Sonnet 4.6 patch 8 days ago