keyword-search.mjs
251 lines 8.6 KB
Raw
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 1 day ago
1 /**
2 * Keyword search: substring or all-terms matching over vault note path, body, and selected frontmatter.
3 * Same filter dimensions as list-notes / semantic search where applicable.
4 */
5
6 import { loadConfig } from './config.mjs';
7 import { getNotesWithMeta, filterNotesByListOptions } from './list-notes.mjs';
8 import { effectiveProjectSlug, normalizeSlug, normalizeTags } from './vault.mjs';
9 import { truncateSnippet } from './search.mjs';
10
11 const DEFAULT_SNIPPET_CHARS = 300;
12
13 /**
14 * Build a readNote-shaped record from a hosted export JSON element (path, body, frontmatter string or object).
15 * @param {{ path?: string, body?: string, frontmatter?: string|object }} n
16 * @returns {{ path: string, body: string, frontmatter: object, project?: string, tags?: string[], date?: string, updated?: string, causal_chain_id?: string, entity?: string[], episode_id?: string }}
17 */
18 export function noteRecordFromExportPayload(n) {
19 const path = n.path != null ? String(n.path) : '';
20 const body = n.body != null ? String(n.body) : '';
21 let fm = {};
22 if (typeof n.frontmatter === 'string') {
23 try {
24 const parsed = JSON.parse(n.frontmatter);
25 if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) fm = parsed;
26 } catch (_) {
27 fm = {};
28 }
29 } else if (n.frontmatter && typeof n.frontmatter === 'object' && !Array.isArray(n.frontmatter)) {
30 fm = /** @type {Record<string, unknown>} */ (n.frontmatter);
31 }
32 const project = effectiveProjectSlug(path, fm);
33 const tags = normalizeTags(fm.tags);
34 const date =
35 fm.date != null
36 ? fm.date instanceof Date
37 ? fm.date.toISOString()
38 : String(fm.date)
39 : undefined;
40 const updated =
41 fm.updated != null
42 ? fm.updated instanceof Date
43 ? fm.updated.toISOString()
44 : String(fm.updated)
45 : undefined;
46 const causal_chain_id =
47 fm.causal_chain_id != null ? normalizeSlug(String(fm.causal_chain_id)) : undefined;
48 const entityRaw = fm.entity;
49 const entity =
50 entityRaw != null
51 ? (Array.isArray(entityRaw) ? entityRaw : [entityRaw]).map((e) => normalizeSlug(String(e))).filter(Boolean)
52 : undefined;
53 const episode_id = fm.episode_id != null ? normalizeSlug(String(fm.episode_id)) : undefined;
54 // Phase 12 — blockchain frontmatter fields
55 const network = fm.network != null ? String(fm.network).trim() : undefined;
56 const wallet_address = fm.wallet_address != null ? String(fm.wallet_address).trim() : undefined;
57 const tx_hash = fm.tx_hash != null ? String(fm.tx_hash).trim() : undefined;
58 const payment_status = fm.payment_status != null ? String(fm.payment_status).trim() : undefined;
59 return {
60 path,
61 body,
62 frontmatter: fm,
63 project,
64 tags,
65 date,
66 updated,
67 causal_chain_id,
68 entity,
69 episode_id,
70 network,
71 wallet_address,
72 tx_hash,
73 payment_status,
74 };
75 }
76
77 /**
78 * @param {Record<string, unknown>} fm
79 */
80 function frontmatterSearchStrings(fm) {
81 if (!fm || typeof fm !== 'object') return '';
82 const keys = [
83 'title', 'intent', 'source', 'proposal_id', 'target_path', 'description', 'summary',
84 // Phase 12 — blockchain fields included in keyword match
85 'network', 'wallet_address', 'tx_hash', 'payment_status', 'currency', 'direction', 'air_id',
86 ];
87 const parts = [];
88 for (const k of keys) {
89 const v = fm[k];
90 if (v != null && typeof v !== 'object') parts.push(String(v));
91 }
92 if (fm.tags != null) parts.push(Array.isArray(fm.tags) ? fm.tags.join(' ') : String(fm.tags));
93 return parts.join('\n');
94 }
95
96 /**
97 * @param {{ path: string, body?: string, frontmatter?: object }} note
98 */
99 export function keywordHaystackForNote(note) {
100 const fm = note.frontmatter && typeof note.frontmatter === 'object' ? note.frontmatter : {};
101 const fmStr = frontmatterSearchStrings(fm);
102 return [note.path || '', fmStr, note.body || ''].join('\n');
103 }
104
105 /**
106 * Pure keyword rank/filter on an already-filtered list of notes (same shape as readNote output).
107 * @param {Array<{ path: string, body?: string, frontmatter?: object, project?: string, tags?: string[], date?: string, updated?: string }>} notes
108 * @param {string} query
109 * @param {{
110 * match?: 'phrase'|'all_terms',
111 * order?: string,
112 * limit?: number,
113 * fields?: string,
114 * snippetChars?: number,
115 * countOnly?: boolean,
116 * }} options
117 * @returns {{ results?: Array<{ path: string, score: number, project?: string|null, tags?: string[], snippet?: string, frontmatter?: object, body?: string }>, count?: number, query: string, mode: 'keyword' }}
118 */
119 export function keywordSearchNotesArray(notes, query, options = {}) {
120 const rawQ = query != null ? String(query).trim() : '';
121 const match = options.match === 'all_terms' ? 'all_terms' : 'phrase';
122 const limit = Math.max(1, Math.min(options.limit ?? 10, 100));
123 const fields = options.fields || 'path+snippet';
124 const snippetChars = options.snippetChars ?? DEFAULT_SNIPPET_CHARS;
125 const countOnly = options.countOnly === true;
126 const order = options.order === 'date-asc' ? 'date-asc' : options.order === 'date' ? 'date' : undefined;
127
128 if (!rawQ) {
129 if (countOnly) return { count: 0, query: rawQ, mode: 'keyword' };
130 return { results: [], query: rawQ, mode: 'keyword' };
131 }
132
133 const lowerHay = (note) => keywordHaystackForNote(note).toLowerCase();
134 const qLower = rawQ.toLowerCase();
135
136 /** @type {Array<{ note: typeof notes[0], score: number }>} */
137 const matched = [];
138
139 if (match === 'phrase') {
140 for (const note of notes) {
141 const h = lowerHay(note);
142 if (h.includes(qLower)) {
143 matched.push({ note, score: 1 });
144 }
145 }
146 } else {
147 const terms = rawQ
148 .split(/\s+/)
149 .map((t) => t.trim().toLowerCase())
150 .filter(Boolean);
151 if (terms.length === 0) {
152 if (countOnly) return { count: 0, query: rawQ, mode: 'keyword' };
153 return { results: [], query: rawQ, mode: 'keyword' };
154 }
155 for (const note of notes) {
156 const h = lowerHay(note);
157 let matchedTerms = 0;
158 let ok = true;
159 for (const t of terms) {
160 if (h.includes(t)) matchedTerms += 1;
161 else {
162 ok = false;
163 break;
164 }
165 }
166 if (ok && terms.length > 0) {
167 matched.push({ note, score: matchedTerms / terms.length });
168 }
169 }
170 }
171
172 const sorted = matched.sort((a, b) => {
173 if (order === 'date-asc') {
174 const da = a.note.date || a.note.updated || '';
175 const db = b.note.date || b.note.updated || '';
176 const c = String(da).localeCompare(String(db));
177 if (c !== 0) return c;
178 } else if (order === 'date') {
179 const da = a.note.date || a.note.updated || '';
180 const db = b.note.date || b.note.updated || '';
181 const c = String(db).localeCompare(String(da));
182 if (c !== 0) return c;
183 }
184 return (a.note.path || '').localeCompare(b.note.path || '');
185 });
186
187 if (countOnly) {
188 return { count: sorted.length, query: rawQ, mode: 'keyword' };
189 }
190
191 const slice = sorted.slice(0, limit);
192 const results = slice.map(({ note, score }) => {
193 const base = {
194 path: note.path,
195 score,
196 project: note.project ?? null,
197 tags: Array.isArray(note.tags) ? note.tags : [],
198 };
199 const snipSource = note.body || keywordHaystackForNote(note);
200 if (fields === 'path') {
201 return base;
202 }
203 if (fields === 'path+snippet') {
204 return { ...base, snippet: truncateSnippet(snipSource, snippetChars) };
205 }
206 if (fields === 'full') {
207 return {
208 ...base,
209 snippet: truncateSnippet(snipSource, snippetChars),
210 frontmatter: note.frontmatter || {},
211 body: note.body || '',
212 };
213 }
214 return { ...base, snippet: truncateSnippet(snipSource, snippetChars) };
215 });
216
217 return { results, query: rawQ, mode: 'keyword' };
218 }
219
220 /**
221 * Keyword search over on-disk vault (CLI, MCP, Node Hub).
222 * @param {string} query
223 * @param {{
224 * folder?: string,
225 * project?: string,
226 * tag?: string,
227 * limit?: number,
228 * fields?: 'path'|'path+snippet'|'full',
229 * snippetChars?: number,
230 * countOnly?: boolean,
231 * since?: string,
232 * until?: string,
233 * order?: string,
234 * chain?: string,
235 * entity?: string,
236 * episode?: string,
237 * content_scope?: 'all'|'notes'|'approval_logs',
238 * match?: 'phrase'|'all_terms',
239 * }} options
240 * @param {{ vault_path?: string, ignore?: string[] }|null} configOverride
241 */
242 export async function runKeywordSearch(query, options = {}, configOverride = null) {
243 const config = configOverride || loadConfig();
244 const vaultPath = config.vault_path;
245 if (!vaultPath) {
246 throw new Error('vault_path required for keyword search');
247 }
248 let notes = getNotesWithMeta(vaultPath, config);
249 notes = filterNotesByListOptions(notes, options);
250 return keywordSearchNotesArray(notes, query, options);
251 }
File History 2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor 1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 2 days ago