search.mjs
151 lines 5.2 KB
Raw
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 1 day ago
1 /**
2 * Semantic search: embed query, vector store search, filters, SPEC §4.2 output shape.
3 * Phase 3: folder, project, tag, limit, fields, snippet-chars, count-only.
4 */
5
6 import { loadConfig } from './config.mjs';
7 import { embed } from './embedding.mjs';
8 import { createVectorStore } from './vector-store.mjs';
9 import { readNote, normalizeSlug } from './vault.mjs';
10 import { filterHitsByContentScope, resolveSearchFolderForContentScope } from './approval-log.mjs';
11 import { MAX_VECTOR_KNN } from './vector-knn-limit.mjs';
12
13 const DEFAULT_SNIPPET_CHARS = 300;
14
15 /**
16 * Truncate text to max chars, at word boundary if possible.
17 * @param {string} text
18 * @param {number} maxChars
19 * @returns {string}
20 */
21 export function truncateSnippet(text, maxChars) {
22 if (text == null || typeof text !== 'string') return '';
23 const t = text.trim();
24 if (t.length <= maxChars) return t;
25 const slice = t.slice(0, maxChars);
26 const lastSpace = slice.lastIndexOf(' ');
27 if (lastSpace > maxChars / 2) {
28 return slice.slice(0, lastSpace) + '…';
29 }
30 return slice + '…';
31 }
32
33 /**
34 * Run semantic search. Loads config, embeds query, searches vector store, formats per SPEC §4.2.
35 * Phase 3.1: --since, --until, --order, --chain, --entity, --episode.
36 * @param {string} query - Search query string
37 * @param {{
38 * folder?: string,
39 * project?: string,
40 * tag?: string,
41 * limit?: number,
42 * fields?: 'path'|'path+snippet'|'full',
43 * snippetChars?: number,
44 * countOnly?: boolean,
45 * since?: string,
46 * until?: string,
47 * order?: string,
48 * chain?: string,
49 * entity?: string,
50 * episode?: string,
51 * vault_id?: string,
52 * content_scope?: 'all'|'notes'|'approval_logs'
53 * }} options
54 * @param {{ vault_path?: string, qdrant_url?: string, vector_store?: string, data_dir?: string, embedding?: object, ignore?: string[] }} [configOverride] - When provided (e.g. Hub), use instead of loadConfig()
55 * @returns {Promise<{ results?: { path, snippet?, score, project, tags }[], count?: number, query: string }>}
56 */
57 export async function runSearch(query, options = {}, configOverride = null) {
58 const config = configOverride || loadConfig();
59 const store = await createVectorStore(config);
60
61 const countOnly = options.countOnly === true;
62 const limit = Math.max(1, Math.min(options.limit ?? 10, 100));
63 const fields = options.fields || 'path+snippet';
64 const snippetChars = options.snippetChars ?? DEFAULT_SNIPPET_CHARS;
65
66 const project = options.project != null ? normalizeSlug(String(options.project)) : undefined;
67 const tag = options.tag != null ? normalizeSlug(String(options.tag)) : undefined;
68 const folder = options.folder != null ? String(options.folder).trim() : undefined;
69 const since = options.since != null ? String(options.since).trim() : undefined;
70 const until = options.until != null ? String(options.until).trim() : undefined;
71 const order = options.order === 'date-asc' ? 'date-asc' : (options.order === 'date' ? 'date' : undefined);
72 const chain = options.chain != null ? normalizeSlug(String(options.chain)) : undefined;
73 const entity = options.entity != null ? normalizeSlug(String(options.entity)) : undefined;
74 const episode = options.episode != null ? normalizeSlug(String(options.episode)) : undefined;
75
76 const vector = await embed([query], config.embedding, { voyageInputType: 'query' });
77 if (!vector || !vector[0]) {
78 throw new Error('Embedding failed: no vector returned for query.');
79 }
80
81 const scope = options.content_scope || 'all';
82 const resolved = resolveSearchFolderForContentScope(scope, folder);
83 if (resolved.impossible) {
84 if (countOnly) return { count: 0, query };
85 return { results: [], query };
86 }
87 const effectiveFolder = resolved.folder;
88 let searchLimit = countOnly ? 1000 : limit;
89 if (!countOnly && resolved.wideNotesFetch) {
90 searchLimit = Math.min(10000, Math.max(limit * 120, 2500));
91 } else if (!countOnly && scope !== 'all') {
92 searchLimit = Math.min(10000, Math.max(limit * 40, 800));
93 }
94 searchLimit = Math.min(searchLimit, MAX_VECTOR_KNN);
95 let hits = await store.search(vector[0], {
96 limit: searchLimit,
97 vault_id: options.vault_id,
98 project,
99 tag,
100 folder: effectiveFolder,
101 since,
102 until,
103 order,
104 chain,
105 entity,
106 episode,
107 });
108 hits = filterHitsByContentScope(hits, scope);
109 if (countOnly) {
110 return { count: hits.length, query };
111 }
112 hits = hits.slice(0, limit);
113
114 const results = hits.map((h) => {
115 const base = {
116 path: h.path,
117 score: h.score,
118 project: h.project ?? null,
119 tags: h.tags ?? [],
120 };
121 if (fields === 'path') {
122 return base;
123 }
124 if (fields === 'path+snippet') {
125 return {
126 ...base,
127 snippet: truncateSnippet(h.text, snippetChars),
128 };
129 }
130 if (fields === 'full') {
131 let frontmatter = {};
132 let body = '';
133 try {
134 const note = readNote(config.vault_path, h.path);
135 frontmatter = note.frontmatter || {};
136 body = note.body || '';
137 } catch (_) {
138 body = truncateSnippet(h.text, snippetChars);
139 }
140 return {
141 ...base,
142 snippet: truncateSnippet(h.text, snippetChars),
143 frontmatter,
144 body,
145 };
146 }
147 return { ...base, snippet: truncateSnippet(h.text, snippetChars) };
148 });
149
150 return { results, query };
151 }
File History 2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor 1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 1 day ago