search.mjs
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠ breaking
1 day ago
| 1 | /** |
| 2 | * Semantic search: embed query, vector store search, filters, SPEC §4.2 output shape. |
| 3 | * Phase 3: folder, project, tag, limit, fields, snippet-chars, count-only. |
| 4 | */ |
| 5 | |
| 6 | import { loadConfig } from './config.mjs'; |
| 7 | import { embed } from './embedding.mjs'; |
| 8 | import { createVectorStore } from './vector-store.mjs'; |
| 9 | import { readNote, normalizeSlug } from './vault.mjs'; |
| 10 | import { filterHitsByContentScope, resolveSearchFolderForContentScope } from './approval-log.mjs'; |
| 11 | import { MAX_VECTOR_KNN } from './vector-knn-limit.mjs'; |
| 12 | |
| 13 | const DEFAULT_SNIPPET_CHARS = 300; |
| 14 | |
| 15 | /** |
| 16 | * Truncate text to max chars, at word boundary if possible. |
| 17 | * @param {string} text |
| 18 | * @param {number} maxChars |
| 19 | * @returns {string} |
| 20 | */ |
| 21 | export function truncateSnippet(text, maxChars) { |
| 22 | if (text == null || typeof text !== 'string') return ''; |
| 23 | const t = text.trim(); |
| 24 | if (t.length <= maxChars) return t; |
| 25 | const slice = t.slice(0, maxChars); |
| 26 | const lastSpace = slice.lastIndexOf(' '); |
| 27 | if (lastSpace > maxChars / 2) { |
| 28 | return slice.slice(0, lastSpace) + '…'; |
| 29 | } |
| 30 | return slice + '…'; |
| 31 | } |
| 32 | |
| 33 | /** |
| 34 | * Run semantic search. Loads config, embeds query, searches vector store, formats per SPEC §4.2. |
| 35 | * Phase 3.1: --since, --until, --order, --chain, --entity, --episode. |
| 36 | * @param {string} query - Search query string |
| 37 | * @param {{ |
| 38 | * folder?: string, |
| 39 | * project?: string, |
| 40 | * tag?: string, |
| 41 | * limit?: number, |
| 42 | * fields?: 'path'|'path+snippet'|'full', |
| 43 | * snippetChars?: number, |
| 44 | * countOnly?: boolean, |
| 45 | * since?: string, |
| 46 | * until?: string, |
| 47 | * order?: string, |
| 48 | * chain?: string, |
| 49 | * entity?: string, |
| 50 | * episode?: string, |
| 51 | * vault_id?: string, |
| 52 | * content_scope?: 'all'|'notes'|'approval_logs' |
| 53 | * }} options |
| 54 | * @param {{ vault_path?: string, qdrant_url?: string, vector_store?: string, data_dir?: string, embedding?: object, ignore?: string[] }} [configOverride] - When provided (e.g. Hub), use instead of loadConfig() |
| 55 | * @returns {Promise<{ results?: { path, snippet?, score, project, tags }[], count?: number, query: string }>} |
| 56 | */ |
| 57 | export async function runSearch(query, options = {}, configOverride = null) { |
| 58 | const config = configOverride || loadConfig(); |
| 59 | const store = await createVectorStore(config); |
| 60 | |
| 61 | const countOnly = options.countOnly === true; |
| 62 | const limit = Math.max(1, Math.min(options.limit ?? 10, 100)); |
| 63 | const fields = options.fields || 'path+snippet'; |
| 64 | const snippetChars = options.snippetChars ?? DEFAULT_SNIPPET_CHARS; |
| 65 | |
| 66 | const project = options.project != null ? normalizeSlug(String(options.project)) : undefined; |
| 67 | const tag = options.tag != null ? normalizeSlug(String(options.tag)) : undefined; |
| 68 | const folder = options.folder != null ? String(options.folder).trim() : undefined; |
| 69 | const since = options.since != null ? String(options.since).trim() : undefined; |
| 70 | const until = options.until != null ? String(options.until).trim() : undefined; |
| 71 | const order = options.order === 'date-asc' ? 'date-asc' : (options.order === 'date' ? 'date' : undefined); |
| 72 | const chain = options.chain != null ? normalizeSlug(String(options.chain)) : undefined; |
| 73 | const entity = options.entity != null ? normalizeSlug(String(options.entity)) : undefined; |
| 74 | const episode = options.episode != null ? normalizeSlug(String(options.episode)) : undefined; |
| 75 | |
| 76 | const vector = await embed([query], config.embedding, { voyageInputType: 'query' }); |
| 77 | if (!vector || !vector[0]) { |
| 78 | throw new Error('Embedding failed: no vector returned for query.'); |
| 79 | } |
| 80 | |
| 81 | const scope = options.content_scope || 'all'; |
| 82 | const resolved = resolveSearchFolderForContentScope(scope, folder); |
| 83 | if (resolved.impossible) { |
| 84 | if (countOnly) return { count: 0, query }; |
| 85 | return { results: [], query }; |
| 86 | } |
| 87 | const effectiveFolder = resolved.folder; |
| 88 | let searchLimit = countOnly ? 1000 : limit; |
| 89 | if (!countOnly && resolved.wideNotesFetch) { |
| 90 | searchLimit = Math.min(10000, Math.max(limit * 120, 2500)); |
| 91 | } else if (!countOnly && scope !== 'all') { |
| 92 | searchLimit = Math.min(10000, Math.max(limit * 40, 800)); |
| 93 | } |
| 94 | searchLimit = Math.min(searchLimit, MAX_VECTOR_KNN); |
| 95 | let hits = await store.search(vector[0], { |
| 96 | limit: searchLimit, |
| 97 | vault_id: options.vault_id, |
| 98 | project, |
| 99 | tag, |
| 100 | folder: effectiveFolder, |
| 101 | since, |
| 102 | until, |
| 103 | order, |
| 104 | chain, |
| 105 | entity, |
| 106 | episode, |
| 107 | }); |
| 108 | hits = filterHitsByContentScope(hits, scope); |
| 109 | if (countOnly) { |
| 110 | return { count: hits.length, query }; |
| 111 | } |
| 112 | hits = hits.slice(0, limit); |
| 113 | |
| 114 | const results = hits.map((h) => { |
| 115 | const base = { |
| 116 | path: h.path, |
| 117 | score: h.score, |
| 118 | project: h.project ?? null, |
| 119 | tags: h.tags ?? [], |
| 120 | }; |
| 121 | if (fields === 'path') { |
| 122 | return base; |
| 123 | } |
| 124 | if (fields === 'path+snippet') { |
| 125 | return { |
| 126 | ...base, |
| 127 | snippet: truncateSnippet(h.text, snippetChars), |
| 128 | }; |
| 129 | } |
| 130 | if (fields === 'full') { |
| 131 | let frontmatter = {}; |
| 132 | let body = ''; |
| 133 | try { |
| 134 | const note = readNote(config.vault_path, h.path); |
| 135 | frontmatter = note.frontmatter || {}; |
| 136 | body = note.body || ''; |
| 137 | } catch (_) { |
| 138 | body = truncateSnippet(h.text, snippetChars); |
| 139 | } |
| 140 | return { |
| 141 | ...base, |
| 142 | snippet: truncateSnippet(h.text, snippetChars), |
| 143 | frontmatter, |
| 144 | body, |
| 145 | }; |
| 146 | } |
| 147 | return { ...base, snippet: truncateSnippet(h.text, snippetChars) }; |
| 148 | }); |
| 149 | |
| 150 | return { results, query }; |
| 151 | } |
File History
2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠
1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6
docs: accept Calendar Events v0 spec with Phase 0 security …
Human
1 day ago