keyword-search.mjs
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠ breaking
1 day ago
| 1 | /** |
| 2 | * Keyword search: substring or all-terms matching over vault note path, body, and selected frontmatter. |
| 3 | * Same filter dimensions as list-notes / semantic search where applicable. |
| 4 | */ |
| 5 | |
| 6 | import { loadConfig } from './config.mjs'; |
| 7 | import { getNotesWithMeta, filterNotesByListOptions } from './list-notes.mjs'; |
| 8 | import { effectiveProjectSlug, normalizeSlug, normalizeTags } from './vault.mjs'; |
| 9 | import { truncateSnippet } from './search.mjs'; |
| 10 | |
| 11 | const DEFAULT_SNIPPET_CHARS = 300; |
| 12 | |
| 13 | /** |
| 14 | * Build a readNote-shaped record from a hosted export JSON element (path, body, frontmatter string or object). |
| 15 | * @param {{ path?: string, body?: string, frontmatter?: string|object }} n |
| 16 | * @returns {{ path: string, body: string, frontmatter: object, project?: string, tags?: string[], date?: string, updated?: string, causal_chain_id?: string, entity?: string[], episode_id?: string }} |
| 17 | */ |
| 18 | export function noteRecordFromExportPayload(n) { |
| 19 | const path = n.path != null ? String(n.path) : ''; |
| 20 | const body = n.body != null ? String(n.body) : ''; |
| 21 | let fm = {}; |
| 22 | if (typeof n.frontmatter === 'string') { |
| 23 | try { |
| 24 | const parsed = JSON.parse(n.frontmatter); |
| 25 | if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) fm = parsed; |
| 26 | } catch (_) { |
| 27 | fm = {}; |
| 28 | } |
| 29 | } else if (n.frontmatter && typeof n.frontmatter === 'object' && !Array.isArray(n.frontmatter)) { |
| 30 | fm = /** @type {Record<string, unknown>} */ (n.frontmatter); |
| 31 | } |
| 32 | const project = effectiveProjectSlug(path, fm); |
| 33 | const tags = normalizeTags(fm.tags); |
| 34 | const date = |
| 35 | fm.date != null |
| 36 | ? fm.date instanceof Date |
| 37 | ? fm.date.toISOString() |
| 38 | : String(fm.date) |
| 39 | : undefined; |
| 40 | const updated = |
| 41 | fm.updated != null |
| 42 | ? fm.updated instanceof Date |
| 43 | ? fm.updated.toISOString() |
| 44 | : String(fm.updated) |
| 45 | : undefined; |
| 46 | const causal_chain_id = |
| 47 | fm.causal_chain_id != null ? normalizeSlug(String(fm.causal_chain_id)) : undefined; |
| 48 | const entityRaw = fm.entity; |
| 49 | const entity = |
| 50 | entityRaw != null |
| 51 | ? (Array.isArray(entityRaw) ? entityRaw : [entityRaw]).map((e) => normalizeSlug(String(e))).filter(Boolean) |
| 52 | : undefined; |
| 53 | const episode_id = fm.episode_id != null ? normalizeSlug(String(fm.episode_id)) : undefined; |
| 54 | // Phase 12 — blockchain frontmatter fields |
| 55 | const network = fm.network != null ? String(fm.network).trim() : undefined; |
| 56 | const wallet_address = fm.wallet_address != null ? String(fm.wallet_address).trim() : undefined; |
| 57 | const tx_hash = fm.tx_hash != null ? String(fm.tx_hash).trim() : undefined; |
| 58 | const payment_status = fm.payment_status != null ? String(fm.payment_status).trim() : undefined; |
| 59 | return { |
| 60 | path, |
| 61 | body, |
| 62 | frontmatter: fm, |
| 63 | project, |
| 64 | tags, |
| 65 | date, |
| 66 | updated, |
| 67 | causal_chain_id, |
| 68 | entity, |
| 69 | episode_id, |
| 70 | network, |
| 71 | wallet_address, |
| 72 | tx_hash, |
| 73 | payment_status, |
| 74 | }; |
| 75 | } |
| 76 | |
| 77 | /** |
| 78 | * @param {Record<string, unknown>} fm |
| 79 | */ |
| 80 | function frontmatterSearchStrings(fm) { |
| 81 | if (!fm || typeof fm !== 'object') return ''; |
| 82 | const keys = [ |
| 83 | 'title', 'intent', 'source', 'proposal_id', 'target_path', 'description', 'summary', |
| 84 | // Phase 12 — blockchain fields included in keyword match |
| 85 | 'network', 'wallet_address', 'tx_hash', 'payment_status', 'currency', 'direction', 'air_id', |
| 86 | ]; |
| 87 | const parts = []; |
| 88 | for (const k of keys) { |
| 89 | const v = fm[k]; |
| 90 | if (v != null && typeof v !== 'object') parts.push(String(v)); |
| 91 | } |
| 92 | if (fm.tags != null) parts.push(Array.isArray(fm.tags) ? fm.tags.join(' ') : String(fm.tags)); |
| 93 | return parts.join('\n'); |
| 94 | } |
| 95 | |
| 96 | /** |
| 97 | * @param {{ path: string, body?: string, frontmatter?: object }} note |
| 98 | */ |
| 99 | export function keywordHaystackForNote(note) { |
| 100 | const fm = note.frontmatter && typeof note.frontmatter === 'object' ? note.frontmatter : {}; |
| 101 | const fmStr = frontmatterSearchStrings(fm); |
| 102 | return [note.path || '', fmStr, note.body || ''].join('\n'); |
| 103 | } |
| 104 | |
| 105 | /** |
| 106 | * Pure keyword rank/filter on an already-filtered list of notes (same shape as readNote output). |
| 107 | * @param {Array<{ path: string, body?: string, frontmatter?: object, project?: string, tags?: string[], date?: string, updated?: string }>} notes |
| 108 | * @param {string} query |
| 109 | * @param {{ |
| 110 | * match?: 'phrase'|'all_terms', |
| 111 | * order?: string, |
| 112 | * limit?: number, |
| 113 | * fields?: string, |
| 114 | * snippetChars?: number, |
| 115 | * countOnly?: boolean, |
| 116 | * }} options |
| 117 | * @returns {{ results?: Array<{ path: string, score: number, project?: string|null, tags?: string[], snippet?: string, frontmatter?: object, body?: string }>, count?: number, query: string, mode: 'keyword' }} |
| 118 | */ |
| 119 | export function keywordSearchNotesArray(notes, query, options = {}) { |
| 120 | const rawQ = query != null ? String(query).trim() : ''; |
| 121 | const match = options.match === 'all_terms' ? 'all_terms' : 'phrase'; |
| 122 | const limit = Math.max(1, Math.min(options.limit ?? 10, 100)); |
| 123 | const fields = options.fields || 'path+snippet'; |
| 124 | const snippetChars = options.snippetChars ?? DEFAULT_SNIPPET_CHARS; |
| 125 | const countOnly = options.countOnly === true; |
| 126 | const order = options.order === 'date-asc' ? 'date-asc' : options.order === 'date' ? 'date' : undefined; |
| 127 | |
| 128 | if (!rawQ) { |
| 129 | if (countOnly) return { count: 0, query: rawQ, mode: 'keyword' }; |
| 130 | return { results: [], query: rawQ, mode: 'keyword' }; |
| 131 | } |
| 132 | |
| 133 | const lowerHay = (note) => keywordHaystackForNote(note).toLowerCase(); |
| 134 | const qLower = rawQ.toLowerCase(); |
| 135 | |
| 136 | /** @type {Array<{ note: typeof notes[0], score: number }>} */ |
| 137 | const matched = []; |
| 138 | |
| 139 | if (match === 'phrase') { |
| 140 | for (const note of notes) { |
| 141 | const h = lowerHay(note); |
| 142 | if (h.includes(qLower)) { |
| 143 | matched.push({ note, score: 1 }); |
| 144 | } |
| 145 | } |
| 146 | } else { |
| 147 | const terms = rawQ |
| 148 | .split(/\s+/) |
| 149 | .map((t) => t.trim().toLowerCase()) |
| 150 | .filter(Boolean); |
| 151 | if (terms.length === 0) { |
| 152 | if (countOnly) return { count: 0, query: rawQ, mode: 'keyword' }; |
| 153 | return { results: [], query: rawQ, mode: 'keyword' }; |
| 154 | } |
| 155 | for (const note of notes) { |
| 156 | const h = lowerHay(note); |
| 157 | let matchedTerms = 0; |
| 158 | let ok = true; |
| 159 | for (const t of terms) { |
| 160 | if (h.includes(t)) matchedTerms += 1; |
| 161 | else { |
| 162 | ok = false; |
| 163 | break; |
| 164 | } |
| 165 | } |
| 166 | if (ok && terms.length > 0) { |
| 167 | matched.push({ note, score: matchedTerms / terms.length }); |
| 168 | } |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | const sorted = matched.sort((a, b) => { |
| 173 | if (order === 'date-asc') { |
| 174 | const da = a.note.date || a.note.updated || ''; |
| 175 | const db = b.note.date || b.note.updated || ''; |
| 176 | const c = String(da).localeCompare(String(db)); |
| 177 | if (c !== 0) return c; |
| 178 | } else if (order === 'date') { |
| 179 | const da = a.note.date || a.note.updated || ''; |
| 180 | const db = b.note.date || b.note.updated || ''; |
| 181 | const c = String(db).localeCompare(String(da)); |
| 182 | if (c !== 0) return c; |
| 183 | } |
| 184 | return (a.note.path || '').localeCompare(b.note.path || ''); |
| 185 | }); |
| 186 | |
| 187 | if (countOnly) { |
| 188 | return { count: sorted.length, query: rawQ, mode: 'keyword' }; |
| 189 | } |
| 190 | |
| 191 | const slice = sorted.slice(0, limit); |
| 192 | const results = slice.map(({ note, score }) => { |
| 193 | const base = { |
| 194 | path: note.path, |
| 195 | score, |
| 196 | project: note.project ?? null, |
| 197 | tags: Array.isArray(note.tags) ? note.tags : [], |
| 198 | }; |
| 199 | const snipSource = note.body || keywordHaystackForNote(note); |
| 200 | if (fields === 'path') { |
| 201 | return base; |
| 202 | } |
| 203 | if (fields === 'path+snippet') { |
| 204 | return { ...base, snippet: truncateSnippet(snipSource, snippetChars) }; |
| 205 | } |
| 206 | if (fields === 'full') { |
| 207 | return { |
| 208 | ...base, |
| 209 | snippet: truncateSnippet(snipSource, snippetChars), |
| 210 | frontmatter: note.frontmatter || {}, |
| 211 | body: note.body || '', |
| 212 | }; |
| 213 | } |
| 214 | return { ...base, snippet: truncateSnippet(snipSource, snippetChars) }; |
| 215 | }); |
| 216 | |
| 217 | return { results, query: rawQ, mode: 'keyword' }; |
| 218 | } |
| 219 | |
| 220 | /** |
| 221 | * Keyword search over on-disk vault (CLI, MCP, Node Hub). |
| 222 | * @param {string} query |
| 223 | * @param {{ |
| 224 | * folder?: string, |
| 225 | * project?: string, |
| 226 | * tag?: string, |
| 227 | * limit?: number, |
| 228 | * fields?: 'path'|'path+snippet'|'full', |
| 229 | * snippetChars?: number, |
| 230 | * countOnly?: boolean, |
| 231 | * since?: string, |
| 232 | * until?: string, |
| 233 | * order?: string, |
| 234 | * chain?: string, |
| 235 | * entity?: string, |
| 236 | * episode?: string, |
| 237 | * content_scope?: 'all'|'notes'|'approval_logs', |
| 238 | * match?: 'phrase'|'all_terms', |
| 239 | * }} options |
| 240 | * @param {{ vault_path?: string, ignore?: string[] }|null} configOverride |
| 241 | */ |
| 242 | export async function runKeywordSearch(query, options = {}, configOverride = null) { |
| 243 | const config = configOverride || loadConfig(); |
| 244 | const vaultPath = config.vault_path; |
| 245 | if (!vaultPath) { |
| 246 | throw new Error('vault_path required for keyword search'); |
| 247 | } |
| 248 | let notes = getNotesWithMeta(vaultPath, config); |
| 249 | notes = filterNotesByListOptions(notes, options); |
| 250 | return keywordSearchNotesArray(notes, query, options); |
| 251 | } |
File History
2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠
1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6
docs: accept Calendar Events v0 spec with Phase 0 security …
Human
2 days ago