server.mjs
3,430 lines 134.9 KB
Raw
sha256:6f47d53a6adbcf105ba1b9cfc126c788d6a0f461d197f84f78794914305b4bd5 fix(mcp): bound hosted discovery context Human patch 15 hours ago
1 /**
2 * Knowtation Hub Bridge — Connect GitHub + Back up now + indexer + search for hosted product.
3 * Stores GitHub token per user; sync fetches notes + full proposals from canister and pushes to repo (snapshot JSON + markdown).
4 * Index/search: pull vault from canister, chunk → embed → sqlite-vec per user; search via POST /api/v1/search.
5 * On Netlify, tokens and vector DBs persist via Netlify Blobs (set by netlify/functions/bridge.mjs).
6 * Env: SESSION_SECRET, CANISTER_URL, HUB_BASE_URL; optional HUB_UI_ORIGIN, HUB_UI_PATH (default /hub), GITHUB_*, EMBEDDING_*, BRIDGE_PORT, DATA_DIR.
7 * Consolidation: CONSOLIDATION_LLM_API_KEY / OPENAI_API_KEY, CONSOLIDATION_LLM_MODEL; CONSOLIDATION_MEMORY_ENCRYPT=true omits raw event payloads from consolidation LLM prompts.
8 */
9
10 import fs from 'fs';
11 import path from 'path';
12 import os from 'os';
13 import { fileURLToPath } from 'url';
14 import crypto from 'crypto';
15 import dotenv from 'dotenv';
16 import express from 'express';
17 import jwt from 'jsonwebtoken';
18 import multer from 'multer';
19 import AdmZip from 'adm-zip';
20 import { parseCanisterProposalGetBody } from '../../lib/canister-proposal-response-parse.mjs';
21 import { runImport } from '../../lib/import.mjs';
22 import { IMPORT_SOURCE_TYPES } from '../../lib/import-source-types.mjs';
23 import { commitImageToRepo, parseGitHubRepoUrl, validateImageExtension, validateMagicBytes } from '../../lib/github-commit-image.mjs';
24 import { mergeProvenanceFrontmatter } from '../../lib/hub-provenance.mjs';
25 import { createIndexTimer } from './index-timing.mjs';
26 import { computeChunkContentHashTagged } from '../../lib/chunk-content-hash.mjs';
27 import {
28 defaultBridgeEmbeddingModelForProvider,
29 resolveIndexerChunkOptions,
30 } from '../../lib/indexer-chunk-options.mjs';
31 import {
32 runWithConcurrency,
33 parseEmbedConcurrency,
34 parseEmbedBatchSize,
35 } from '../../lib/parallel-embed-pool.mjs';
36 import { partitionChunksForReindex } from '../../lib/index-partition.mjs';
37 import {
38 estimateEmbedSeconds,
39 shouldUseBackgroundIndex,
40 parseSyncBudgetSeconds,
41 parseMaxSyncChunks,
42 } from '../../lib/bridge-index-preflight-estimate.mjs';
43 import {
44 acquireJobLock,
45 releaseJobLock,
46 peekJobLock,
47 } from '../../lib/bridge-index-job-lock.mjs';
48 import {
49 setLastIndexedAt,
50 getLastIndexedAt,
51 } from '../../lib/bridge-index-last-indexed.mjs';
52 import { signInternalRequest } from '../../lib/bridge-internal-hmac.mjs';
53 import { assertBackgroundKickoffOk } from '../../lib/bridge-index-kickoff-response.mjs';
54 import { writeNote } from '../../lib/write.mjs';
55 import { resolveVaultRelativePath, parseFrontmatterAndBody } from '../../lib/vault.mjs';
56 import {
57 resolveEffectiveCanisterUser,
58 getScopeForUserVaultFromScopeMap,
59 resolveAllowedVaultIdsForHostedContext,
60 } from '../lib/hosted-workspace-resolve.mjs';
61 import { applyScopeFilterToNotes, applyScopeFilterToProposals } from '../lib/scope-filter.mjs';
62 import { actorMayApproveProposals } from '../lib/hub-evaluator-may-approve.mjs';
63
64 // When Netlify bundles as CJS, import.meta.url is empty; avoid it in serverless so the app loads and routes register.
65 const inServerless = Boolean(process.env.AWS_LAMBDA_FUNCTION_NAME || process.env.NETLIFY);
66 let projectRoot;
67 if (inServerless) {
68 projectRoot = process.cwd();
69 } else {
70 projectRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..');
71 }
72 const __dirname = path.join(projectRoot, 'hub', 'bridge');
73 const envPath = path.join(projectRoot, '.env');
74 if (fs.existsSync(envPath)) dotenv.config({ path: envPath });
75
76 const PORT = parseInt(process.env.BRIDGE_PORT || process.env.PORT || '3341', 10);
77 const BASE_URL = (process.env.HUB_BASE_URL || `http://localhost:${PORT}`).replace(/\/$/, '');
78 const CANISTER_URL = (process.env.CANISTER_URL || '').replace(/\/$/, '');
79 const HUB_UI_ORIGIN = (process.env.HUB_UI_ORIGIN || BASE_URL).replace(/\/$/, '');
80 // Path under HUB_UI_ORIGIN where the Hub app lives (e.g. /hub). Empty string = root.
81 const HUB_UI_PATH = (process.env.HUB_UI_PATH || '/hub').replace(/\/$/, '');
82 const SESSION_SECRET = process.env.SESSION_SECRET || process.env.HUB_JWT_SECRET;
83 const CANISTER_AUTH_SECRET = process.env.CANISTER_AUTH_SECRET || '';
84 const HOSTED_CONTEXT_FETCH_TIMEOUT_MS = (() => {
85 const n = parseInt(String(process.env.HOSTED_CONTEXT_FETCH_TIMEOUT_MS || ''), 10);
86 if (!Number.isFinite(n)) return 3000;
87 return Math.min(10_000, Math.max(250, n));
88 })();
89 const HOSTED_CONTEXT_CACHE_TTL_MS = 60_000;
90 const canisterVaultIdsCache = new Map();
91
92 function hostedContextAbortSignal() {
93 return typeof AbortSignal !== 'undefined' && typeof AbortSignal.timeout === 'function'
94 ? AbortSignal.timeout(HOSTED_CONTEXT_FETCH_TIMEOUT_MS)
95 : undefined;
96 }
97
98 /**
99 * Base headers for all bridge→canister requests.
100 * Includes x-gateway-auth when CANISTER_AUTH_SECRET is configured so the
101 * canister's gatewayAuthorized() check (Phase 0) passes.
102 * Uses the same env var name as the gateway (CANISTER_AUTH_SECRET).
103 */
104 function canisterHeaders(extra = {}) {
105 const h = { Accept: 'application/json', ...extra };
106 if (CANISTER_AUTH_SECRET) h['x-gateway-auth'] = CANISTER_AUTH_SECRET;
107 return h;
108 }
109 // On Netlify Lambda /var/task/ is read-only; only /tmp is writable.
110 // Use /tmp/knowtation-bridge-data when serverless and DATA_DIR is not explicitly set.
111 const DATA_DIR = process.env.DATA_DIR
112 ? (path.isAbsolute(process.env.DATA_DIR) ? process.env.DATA_DIR : path.join(projectRoot, process.env.DATA_DIR))
113 : (inServerless ? path.join(os.tmpdir(), 'knowtation-bridge-data') : path.join(projectRoot, 'data'));
114 const TOKENS_FILE = path.join(DATA_DIR, 'hub_github_tokens.json');
115 const ROLES_FILE = path.join(DATA_DIR, 'hub_roles.json');
116 const INVITES_FILE = path.join(DATA_DIR, 'hub_invites.json');
117 const WORKSPACE_FILE = path.join(DATA_DIR, 'hub_workspace.json');
118 const VAULT_ACCESS_FILE = path.join(DATA_DIR, 'hub_vault_access.json');
119 const SCOPE_FILE = path.join(DATA_DIR, 'hub_scope.json');
120 const EVALUATOR_MAY_APPROVE_FILE = path.join(DATA_DIR, 'hub_evaluator_may_approve.json');
121 const VALID_ROLES = new Set(['admin', 'editor', 'viewer', 'evaluator']);
122 const INVITE_EXPIRY_MS = 7 * 24 * 60 * 60 * 1000;
123
124 const adminUserIdsSet = new Set(
125 (process.env.HUB_ADMIN_USER_IDS || '')
126 .split(',')
127 .map((s) => s.trim())
128 .filter(Boolean)
129 );
130
131 function sanitizeUserId(uid) {
132 return String(uid).replace(/[^a-zA-Z0-9_-]/g, '_').slice(0, 128) || 'default';
133 }
134
135 function sanitizeVaultId(vaultId) {
136 return String(vaultId || 'default').replace(/[^a-zA-Z0-9_-]/g, '_').slice(0, 64) || 'default';
137 }
138
139 let warnedOllamaLocalhostOnNetlify = false;
140
141 /** Trim + default empty env so accidental whitespace does not break provider matching or Ollama URL. */
142 function getBridgeEmbeddingConfig() {
143 const pEnv = process.env.EMBEDDING_PROVIDER;
144 const provider = (
145 pEnv == null || String(pEnv).trim() === '' ? 'ollama' : String(pEnv).trim()
146 ).toLowerCase();
147 const mEnv = process.env.EMBEDDING_MODEL;
148 const model =
149 mEnv == null || String(mEnv).trim() === ''
150 ? defaultBridgeEmbeddingModelForProvider(provider)
151 : String(mEnv).trim();
152 const oEnv = process.env.OLLAMA_URL;
153 const ollama_url =
154 oEnv == null || String(oEnv).trim() === '' ? 'http://localhost:11434' : String(oEnv).trim();
155 if (inServerless && provider === 'ollama' && !warnedOllamaLocalhostOnNetlify) {
156 warnedOllamaLocalhostOnNetlify = true;
157 const t = String(ollama_url).trim() || 'http://localhost:11434';
158 try {
159 if (/^https?:\/\//i.test(t)) {
160 const u = new URL(t);
161 if (u.hostname === 'localhost' || u.hostname === '127.0.0.1') {
162 console.warn(
163 '[bridge] EMBEDDING_PROVIDER=ollama with localhost OLLAMA_URL cannot reach your machine from Netlify. ' +
164 'Set EMBEDDING_PROVIDER=openai and OPENAI_API_KEY, or OLLAMA_URL to a public https:// Ollama API base.',
165 );
166 }
167 }
168 } catch (_) {
169 /* embed path will throw a clearer error via normalizeOllamaEmbedBaseUrl */
170 }
171 }
172 return {
173 provider,
174 model,
175 ollama_url,
176 };
177 }
178
179 /**
180 * Undici/fetch often throws TypeError with message "Invalid URL" only — map to actionable text for operators.
181 * @param {unknown} err
182 * @param {'index'|'search'|'embed'} kind
183 */
184 function bridgeEmbedFailureMessage(err, kind) {
185 const raw = err && typeof err.message === 'string' ? err.message : String(err);
186 if (raw !== 'Invalid URL' && !raw.includes('Invalid URL')) return raw;
187 const c = getBridgeEmbeddingConfig();
188 const hasOpenAiKey = Boolean(
189 process.env.OPENAI_API_KEY && String(process.env.OPENAI_API_KEY).trim(),
190 );
191 const hasVoyageKey = Boolean(process.env.VOYAGE_API_KEY && String(process.env.VOYAGE_API_KEY).trim());
192 return (
193 `${raw} (${kind}). On Netlify, Invalid URL often means sqlite-vec was esbuild-bundled ` +
194 '(stack: getLoadablePath / input ".") — set [functions].external_node_modules for sqlite-vec and better-sqlite3 in netlify.toml. ' +
195 `Resolved EMBEDDING_PROVIDER="${c.provider}"; OPENAI_API_KEY ${hasOpenAiKey ? 'is set' : 'is missing'}; ` +
196 `VOYAGE_API_KEY ${hasVoyageKey ? 'is set' : 'is missing'}. ` +
197 'If provider is ollama, OLLAMA_URL must be a full http(s) URL. Remove bad HTTP_PROXY/HTTPS_PROXY if set. ' +
198 'See hub/bridge/README.md (semantic index/search).'
199 );
200 }
201
202 const DB_FILENAME = 'knowtation_vectors.db';
203
204 function getBridgeStoreConfig(uid, vectorsDirOverride) {
205 const vectorsDir = vectorsDirOverride ?? (() => {
206 const d = path.join(DATA_DIR, 'vectors', sanitizeUserId(uid));
207 if (!fs.existsSync(d)) fs.mkdirSync(d, { recursive: true });
208 return d;
209 })();
210 return {
211 vector_store: 'sqlite-vec',
212 data_dir: vectorsDir,
213 embedding: getBridgeEmbeddingConfig(),
214 // Bridge owns the data lifecycle (downloads from blob → re-indexes → uploads to blob).
215 // A dimension change (e.g. OpenAI 1536 → DeepInfra 1024) can only resolve via a full
216 // re-embed of every vault in this DB. CLI keeps the throw so an accidental swap surfaces
217 // loudly. See `lib/vector-store-sqlite.mjs ensureCollection` migration logic.
218 allow_dimension_migration: true,
219 };
220 }
221
222 const isServerless = Boolean(process.env.AWS_LAMBDA_FUNCTION_NAME || process.env.NETLIFY);
223
224 function ensureDataDir() {
225 if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
226 }
227
228 const ALGO = 'aes-256-gcm';
229 const IV_LEN = 16;
230 const TAG_LEN = 16;
231 const SALT_LEN = 16;
232 // Ciphertext format (v2): saltB64url.ivB64url.tagB64url.encB64url (4 parts)
233 // Legacy format (v1): ivB64url.tagB64url.encB64url (3 parts — decrypt will return null → graceful reconnect)
234 function encrypt(text, secret) {
235 const salt = crypto.randomBytes(SALT_LEN);
236 const key = crypto.scryptSync(secret, salt, 32);
237 const iv = crypto.randomBytes(IV_LEN);
238 const cipher = crypto.createCipheriv(ALGO, key, iv);
239 const enc = Buffer.concat([cipher.update(text, 'utf8'), cipher.final()]);
240 const tag = cipher.getAuthTag();
241 return (
242 salt.toString('base64url') + '.' +
243 iv.toString('base64url') + '.' +
244 tag.toString('base64url') + '.' +
245 enc.toString('base64url')
246 );
247 }
248 function decrypt(encrypted, secret) {
249 const parts = encrypted.split('.');
250 // v1 ciphertexts had 3 parts (hardcoded salt); treat as not-found so the
251 // caller falls through to "prompt reconnect" without crashing.
252 if (parts.length !== 4) return null;
253 const [saltB, ivB, tagB, encB] = parts;
254 if (!saltB || !ivB || !tagB || !encB) return null;
255 try {
256 const key = crypto.scryptSync(secret, Buffer.from(saltB, 'base64url'), 32);
257 const decipher = crypto.createDecipheriv(ALGO, key, Buffer.from(ivB, 'base64url'));
258 decipher.setAuthTag(Buffer.from(tagB, 'base64url'));
259 return decipher.update(Buffer.from(encB, 'base64url')) + decipher.final('utf8');
260 } catch {
261 return null;
262 }
263 }
264
265 function parseAndDecryptTokens(raw) {
266 if (!raw || typeof raw !== 'object' || Array.isArray(raw)) return {};
267 const out = {};
268 let decryptFailures = 0;
269 for (const [uid, v] of Object.entries(raw)) {
270 if (v && typeof v.token === 'string') {
271 const t = decrypt(v.token, SESSION_SECRET);
272 if (t) out[uid] = { token: t, repo: v.repo || null };
273 else decryptFailures++;
274 }
275 }
276 if (decryptFailures > 0) {
277 console.warn(
278 '[bridge] loadTokens: decrypt failed for',
279 decryptFailures,
280 'stored GitHub token(s). If SESSION_SECRET was rotated on the bridge, run Connect GitHub again to re-store the token.'
281 );
282 }
283 return out;
284 }
285
286 async function loadTokens(blobStore) {
287 if (!blobStore) {
288 ensureDataDir();
289 if (!fs.existsSync(TOKENS_FILE)) return {};
290 try {
291 const raw = JSON.parse(fs.readFileSync(TOKENS_FILE, 'utf8'));
292 return parseAndDecryptTokens(raw);
293 } catch (_) {
294 return {};
295 }
296 }
297 try {
298 const rawStr = await blobStore.get('hub_github_tokens');
299 if (!rawStr) return {};
300 const raw = JSON.parse(rawStr);
301 return parseAndDecryptTokens(raw);
302 } catch (_) {
303 return {};
304 }
305 }
306
307 async function saveTokens(blobStore, tokens) {
308 const toWrite = {};
309 for (const [uid, v] of Object.entries(tokens)) {
310 toWrite[uid] = { token: encrypt(v.token, SESSION_SECRET), repo: v.repo || null };
311 }
312 const str = JSON.stringify(toWrite, null, 2);
313 if (!blobStore) {
314 ensureDataDir();
315 fs.writeFileSync(TOKENS_FILE, str, 'utf8');
316 return;
317 }
318 await blobStore.set('hub_github_tokens', str);
319 }
320
321 // ——— Roles & invites (hosted parity: same contract as self-hosted hub/roles.mjs, hub/invites.mjs) ———
322 async function loadRoles(blobStore) {
323 if (!blobStore) {
324 ensureDataDir();
325 if (!fs.existsSync(ROLES_FILE)) return {};
326 try {
327 const data = JSON.parse(fs.readFileSync(ROLES_FILE, 'utf8'));
328 const roles = data.roles != null ? data.roles : data;
329 return typeof roles === 'object' && roles !== null ? roles : {};
330 } catch (_) {
331 return {};
332 }
333 }
334 try {
335 const rawStr = await blobStore.get('hub_roles');
336 if (!rawStr) return {};
337 const data = JSON.parse(rawStr);
338 const roles = data.roles != null ? data.roles : data;
339 return typeof roles === 'object' && roles !== null ? roles : {};
340 } catch (_) {
341 return {};
342 }
343 }
344
345 async function saveRoles(blobStore, roles) {
346 const obj = {};
347 for (const [sub, role] of Object.entries(roles)) {
348 if (typeof sub === 'string' && sub.trim() && VALID_ROLES.has(role)) obj[sub.trim()] = role;
349 }
350 const str = JSON.stringify({ roles: obj }, null, 2);
351 if (!blobStore) {
352 ensureDataDir();
353 fs.writeFileSync(ROLES_FILE, str, 'utf8');
354 return;
355 }
356 await blobStore.set('hub_roles', str);
357 }
358
359 function bridgeEnvEvaluatorMayApprove() {
360 return process.env.HUB_EVALUATOR_MAY_APPROVE === '1';
361 }
362
363 async function loadEvaluatorMayApproveMap(blobStore) {
364 if (!blobStore) {
365 ensureDataDir();
366 if (!fs.existsSync(EVALUATOR_MAY_APPROVE_FILE)) return {};
367 try {
368 const data = JSON.parse(fs.readFileSync(EVALUATOR_MAY_APPROVE_FILE, 'utf8'));
369 const m = data?.evaluator_may_approve != null ? data.evaluator_may_approve : data;
370 if (typeof m !== 'object' || m === null) return {};
371 const out = {};
372 for (const [k, v] of Object.entries(m)) {
373 if (typeof k === 'string' && k.trim()) out[k.trim()] = Boolean(v);
374 }
375 return out;
376 } catch (_) {
377 return {};
378 }
379 }
380 try {
381 const rawStr = await blobStore.get('hub_evaluator_may_approve');
382 if (!rawStr) return {};
383 const data = JSON.parse(rawStr);
384 const m = data?.evaluator_may_approve != null ? data.evaluator_may_approve : data;
385 if (typeof m !== 'object' || m === null) return {};
386 const out = {};
387 for (const [k, v] of Object.entries(m)) {
388 if (typeof k === 'string' && k.trim()) out[k.trim()] = Boolean(v);
389 }
390 return out;
391 } catch (_) {
392 return {};
393 }
394 }
395
396 async function saveEvaluatorMayApproveMap(blobStore, map) {
397 const obj = {};
398 for (const [k, v] of Object.entries(map)) {
399 if (typeof k === 'string' && k.trim()) obj[k.trim()] = Boolean(v);
400 }
401 const str = JSON.stringify({ evaluator_may_approve: obj }, null, 2);
402 if (!blobStore) {
403 ensureDataDir();
404 fs.writeFileSync(EVALUATOR_MAY_APPROVE_FILE, str, 'utf8');
405 return;
406 }
407 await blobStore.set('hub_evaluator_may_approve', str);
408 }
409
410 /** Effective “may approve proposals” for Hub UI and gateway (admin always; evaluator from map + env). */
411 function mayApproveProposalsForUser(uid, storedRoles, mayMap) {
412 const role = effectiveRole(uid, storedRoles);
413 return actorMayApproveProposals(uid, role, mayMap, bridgeEnvEvaluatorMayApprove());
414 }
415
416 async function loadInvites(blobStore) {
417 if (!blobStore) {
418 ensureDataDir();
419 if (!fs.existsSync(INVITES_FILE)) return {};
420 try {
421 const data = JSON.parse(fs.readFileSync(INVITES_FILE, 'utf8'));
422 const invites = data.invites && typeof data.invites === 'object' ? data.invites : {};
423 return invites;
424 } catch (_) {
425 return {};
426 }
427 }
428 try {
429 const rawStr = await blobStore.get('hub_invites');
430 if (!rawStr) return {};
431 const data = JSON.parse(rawStr);
432 const invites = data.invites && typeof data.invites === 'object' ? data.invites : {};
433 return invites;
434 } catch (_) {
435 return {};
436 }
437 }
438
439 async function saveInvites(blobStore, invites) {
440 const obj = {};
441 for (const [token, entry] of Object.entries(invites)) {
442 if (typeof token === 'string' && token && entry && typeof entry.role === 'string' && typeof entry.created_at === 'string') {
443 obj[token] = { role: entry.role, created_at: entry.created_at };
444 }
445 }
446 const str = JSON.stringify({ invites: obj }, null, 2);
447 if (!blobStore) {
448 ensureDataDir();
449 fs.writeFileSync(INVITES_FILE, str, 'utf8');
450 return;
451 }
452 await blobStore.set('hub_invites', str);
453 }
454
455 async function loadWorkspace(blobStore) {
456 if (!blobStore) {
457 ensureDataDir();
458 if (!fs.existsSync(WORKSPACE_FILE)) return { owner_user_id: null };
459 try {
460 const data = JSON.parse(fs.readFileSync(WORKSPACE_FILE, 'utf8'));
461 const id = data?.owner_user_id;
462 return { owner_user_id: typeof id === 'string' && id.trim() ? id.trim() : null };
463 } catch (_) {
464 return { owner_user_id: null };
465 }
466 }
467 try {
468 const rawStr = await blobStore.get('hub_workspace');
469 if (!rawStr) return { owner_user_id: null };
470 const data = JSON.parse(rawStr);
471 const id = data?.owner_user_id;
472 return { owner_user_id: typeof id === 'string' && id.trim() ? id.trim() : null };
473 } catch (_) {
474 return { owner_user_id: null };
475 }
476 }
477
478 async function saveWorkspace(blobStore, ownerUserId) {
479 const payload = JSON.stringify(
480 { owner_user_id: ownerUserId && String(ownerUserId).trim() ? String(ownerUserId).trim() : null },
481 null,
482 2,
483 );
484 if (!blobStore) {
485 ensureDataDir();
486 fs.writeFileSync(WORKSPACE_FILE, payload, 'utf8');
487 return;
488 }
489 await blobStore.set('hub_workspace', payload);
490 }
491
492 async function loadVaultAccess(blobStore) {
493 if (!blobStore) {
494 ensureDataDir();
495 if (!fs.existsSync(VAULT_ACCESS_FILE)) return {};
496 try {
497 const data = JSON.parse(fs.readFileSync(VAULT_ACCESS_FILE, 'utf8'));
498 const out = {};
499 if (data && typeof data === 'object') {
500 for (const [uid, arr] of Object.entries(data)) {
501 if (typeof uid === 'string' && uid.trim() && Array.isArray(arr)) {
502 out[uid.trim()] = arr.filter((v) => typeof v === 'string' && v.trim()).map((v) => v.trim());
503 }
504 }
505 }
506 return out;
507 } catch (_) {
508 return {};
509 }
510 }
511 try {
512 const rawStr = await blobStore.get('hub_vault_access');
513 if (!rawStr) return {};
514 const data = JSON.parse(rawStr);
515 const out = {};
516 if (data && typeof data === 'object') {
517 for (const [uid, arr] of Object.entries(data)) {
518 if (typeof uid === 'string' && uid.trim() && Array.isArray(arr)) {
519 out[uid.trim()] = arr.filter((v) => typeof v === 'string' && v.trim()).map((v) => v.trim());
520 }
521 }
522 }
523 return out;
524 } catch (_) {
525 return {};
526 }
527 }
528
529 async function saveVaultAccess(blobStore, access) {
530 const obj = {};
531 for (const [uid, arr] of Object.entries(access || {})) {
532 if (typeof uid === 'string' && uid.trim() && Array.isArray(arr)) {
533 obj[uid.trim()] = arr.filter((v) => typeof v === 'string' && v.trim()).map((v) => v.trim());
534 }
535 }
536 const str = JSON.stringify(obj, null, 2);
537 if (!blobStore) {
538 ensureDataDir();
539 fs.writeFileSync(VAULT_ACCESS_FILE, str, 'utf8');
540 return;
541 }
542 await blobStore.set('hub_vault_access', str);
543 }
544
545 async function loadScope(blobStore) {
546 if (!blobStore) {
547 ensureDataDir();
548 if (!fs.existsSync(SCOPE_FILE)) return {};
549 try {
550 const data = JSON.parse(fs.readFileSync(SCOPE_FILE, 'utf8'));
551 return data && typeof data === 'object' ? data : {};
552 } catch (_) {
553 return {};
554 }
555 }
556 try {
557 const rawStr = await blobStore.get('hub_scope');
558 if (!rawStr) return {};
559 const data = JSON.parse(rawStr);
560 return data && typeof data === 'object' ? data : {};
561 } catch (_) {
562 return {};
563 }
564 }
565
566 async function saveScope(blobStore, scope) {
567 const cleaned = {};
568 for (const [uid, vaultMap] of Object.entries(scope || {})) {
569 if (typeof uid !== 'string' || !uid.trim() || !vaultMap || typeof vaultMap !== 'object') continue;
570 cleaned[uid.trim()] = {};
571 for (const [vaultId, rules] of Object.entries(vaultMap)) {
572 if (typeof vaultId !== 'string' || !vaultId.trim() || !rules || typeof rules !== 'object') continue;
573 const projects = Array.isArray(rules.projects)
574 ? rules.projects.filter((p) => typeof p === 'string' && p.trim()).map((p) => p.trim())
575 : [];
576 const folders = Array.isArray(rules.folders)
577 ? rules.folders.filter((f) => typeof f === 'string' && f.trim()).map((f) => f.trim())
578 : [];
579 if (projects.length > 0 || folders.length > 0) {
580 cleaned[uid.trim()][vaultId.trim()] = { projects, folders };
581 }
582 }
583 }
584 const str = JSON.stringify(cleaned, null, 2);
585 if (!blobStore) {
586 ensureDataDir();
587 fs.writeFileSync(SCOPE_FILE, str, 'utf8');
588 return;
589 }
590 await blobStore.set('hub_scope', str);
591 }
592
593 /** Remove vault id from all hub_vault_access lists and hub_scope maps (hosted team). */
594 async function stripHostedVaultFromAccessAndScope(blobStore, vaultId) {
595 const id = String(vaultId || '').trim();
596 if (!id || id === 'default') return;
597 const access = await loadVaultAccess(blobStore);
598 const nextAccess = {};
599 for (const [uid, arr] of Object.entries(access)) {
600 if (!Array.isArray(arr)) continue;
601 const filtered = arr.filter((x) => String(x).trim() !== id);
602 if (filtered.length > 0) nextAccess[uid] = filtered;
603 }
604 await saveVaultAccess(blobStore, nextAccess);
605
606 const scope = await loadScope(blobStore);
607 const nextScope = {};
608 for (const [uid, vmap] of Object.entries(scope)) {
609 if (!vmap || typeof vmap !== 'object') continue;
610 const inner = {};
611 for (const [vid, rules] of Object.entries(vmap)) {
612 if (String(vid).trim() === id) continue;
613 inner[vid] = rules;
614 }
615 if (Object.keys(inner).length > 0) nextScope[uid] = inner;
616 }
617 await saveScope(blobStore, nextScope);
618 }
619
620 /** Drop bridge vector store for (effective user, vault). */
621 async function removeHostedVectorBlobForVault(blobStore, effectiveUid, vaultId) {
622 const safeUid = sanitizeUserId(effectiveUid);
623 const vid = sanitizeVaultId(vaultId);
624 const localDir = path.join(DATA_DIR, 'vectors', safeUid, vid);
625 if (!blobStore) {
626 if (fs.existsSync(localDir)) fs.rmSync(localDir, { recursive: true, force: true });
627 return;
628 }
629 const key = 'vectors/' + safeUid + '/' + vid;
630 try {
631 if (typeof blobStore.delete === 'function') await blobStore.delete(key);
632 } catch (_) {
633 /* Netlify Blobs may omit delete; ignore */
634 }
635 }
636
637 /** @returns {Promise<string[]>} */
638 async function fetchCanisterVaultIdsForUser(canisterUserId) {
639 if (!CANISTER_URL || !canisterUserId) return ['default'];
640 const cacheKey = String(canisterUserId);
641 const now = Date.now();
642 const hit = canisterVaultIdsCache.get(cacheKey);
643 if (hit && hit.expires > now) return [...hit.ids];
644 try {
645 const signal = hostedContextAbortSignal();
646 const vRes = await fetch(CANISTER_URL + '/api/v1/vaults', {
647 method: 'GET',
648 headers: canisterHeaders({ 'X-User-Id': canisterUserId }),
649 ...(signal ? { signal } : {}),
650 });
651 if (!vRes.ok) return ['default'];
652 const data = await vRes.json();
653 const vaults = Array.isArray(data.vaults) ? data.vaults : [];
654 if (vaults.length === 0) return ['default'];
655 const ids = vaults.map((v) => String(v.id || 'default')).filter(Boolean);
656 canisterVaultIdsCache.set(cacheKey, { expires: now + HOSTED_CONTEXT_CACHE_TTL_MS, ids });
657 return ids;
658 } catch (_) {
659 return ['default'];
660 }
661 }
662
663 function explicitVaultAccessForUser(accessMap, actorUid) {
664 const raw = accessMap && typeof accessMap === 'object' ? accessMap[actorUid] : null;
665 if (!Array.isArray(raw) || raw.length === 0) return null;
666 const out = raw.map((x) => String(x).trim()).filter(Boolean);
667 return out.length > 0 ? out : null;
668 }
669
670 /**
671 * @param {import('express').Request} req
672 * @param {string} actorUid
673 * @returns {Promise<{ ok: true, effectiveCanisterUid: string, actorUid: string, vaultId: string, scope: { projects: string[], folders: string[] } | null, allowedVaultIds: string[], delegating: boolean } | { ok: false, status: number, code: string, error: string }>}
674 */
675 async function resolveHostedBridgeContext(req, actorUid) {
676 const vaultId = sanitizeVaultId(req.headers['x-vault-id']);
677 const workspace = await loadWorkspace(req.blobStore);
678 const roles = await loadRoles(req.blobStore);
679 const access = await loadVaultAccess(req.blobStore);
680 const scopeMap = await loadScope(req.blobStore);
681 const ownerId = workspace.owner_user_id;
682 const { effective, delegate } = resolveEffectiveCanisterUser({
683 actorSub: actorUid,
684 workspaceOwnerId: ownerId,
685 storedRoles: roles,
686 adminUserIdsSet,
687 });
688 const explicitVaultIds = explicitVaultAccessForUser(access, actorUid);
689 const canisterIds =
690 delegate && explicitVaultIds ? explicitVaultIds : await fetchCanisterVaultIdsForUser(effective);
691 const allowedVaultIds = resolveAllowedVaultIdsForHostedContext({
692 delegate,
693 actorUid,
694 accessMap: access,
695 canisterIds,
696 });
697 if (!allowedVaultIds.includes(vaultId)) {
698 return {
699 ok: false,
700 status: 403,
701 code: 'FORBIDDEN',
702 error: 'Access to this vault is not allowed.',
703 };
704 }
705 let scope = getScopeForUserVaultFromScopeMap(scopeMap, actorUid, vaultId);
706 // Evaluators must see the full vault (per allowed_vault_ids) to review proposals in context;
707 // project/folder scope still applies to viewer/editor/admin delegating members.
708 const actorRole = effectiveRole(actorUid, roles);
709 if (actorRole === 'evaluator') {
710 scope = null;
711 }
712 return {
713 ok: true,
714 effectiveCanisterUid: effective,
715 actorUid,
716 vaultId,
717 scope,
718 allowedVaultIds,
719 delegating: delegate,
720 };
721 }
722
723 /**
724 * Hosted settings need the actor's vault allowlist without first proving access
725 * to a specific vault. This keeps Business-only delegated users from being
726 * denied while the UI is still deciding which vault to select.
727 *
728 * @param {import('express').Request} req
729 * @param {string} actorUid
730 */
731 async function resolveHostedBridgeSettingsContext(req, actorUid) {
732 const workspace = await loadWorkspace(req.blobStore);
733 const roles = await loadRoles(req.blobStore);
734 const access = await loadVaultAccess(req.blobStore);
735 const ownerId = workspace.owner_user_id;
736 const { effective, delegate } = resolveEffectiveCanisterUser({
737 actorSub: actorUid,
738 workspaceOwnerId: ownerId,
739 storedRoles: roles,
740 adminUserIdsSet,
741 });
742 const explicitVaultIds = explicitVaultAccessForUser(access, actorUid);
743 const canisterIds =
744 delegate && explicitVaultIds ? explicitVaultIds : await fetchCanisterVaultIdsForUser(effective);
745 const allowedVaultIds = resolveAllowedVaultIdsForHostedContext({
746 delegate,
747 actorUid,
748 accessMap: access,
749 canisterIds,
750 });
751 return {
752 effectiveCanisterUid: effective,
753 actorUid,
754 allowedVaultIds,
755 delegating: delegate,
756 workspaceOwnerId: ownerId,
757 role: effectiveRole(actorUid, roles),
758 };
759 }
760
761 function effectiveRole(uid, storedRoles) {
762 if (!uid) return 'member';
763 const stored = storedRoles && storedRoles[uid];
764 if (stored && VALID_ROLES.has(stored)) return stored;
765 return adminUserIdsSet.has(uid) ? 'admin' : 'member';
766 }
767
768 /** Return a directory path that contains (or will contain) knowtation_vectors.db for this user and vault. Rehydrates from Blob if needed. Phase 15: keyed by (uid, vault_id). */
769 async function getVectorsDirForUser(req, uid) {
770 const safeUid = sanitizeUserId(uid);
771 const vaultId = sanitizeVaultId(req.headers['x-vault-id']);
772 if (!req.blobStore) {
773 const d = path.join(DATA_DIR, 'vectors', safeUid, vaultId);
774 if (!fs.existsSync(d)) fs.mkdirSync(d, { recursive: true });
775 return d;
776 }
777 const dir = path.join(os.tmpdir(), 'knowtation-bridge-vectors', safeUid, vaultId);
778 if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
779 const key = 'vectors/' + safeUid + '/' + vaultId;
780 try {
781 const data = await req.blobStore.get(key, { type: 'arrayBuffer' });
782 if (data && data.byteLength > 0) {
783 fs.writeFileSync(path.join(dir, DB_FILENAME), Buffer.from(data));
784 }
785 } catch (_) {
786 // No existing blob or read error; start fresh
787 }
788 return dir;
789 }
790
791 /** Persist user's vector DB from disk to Blob (call after index). Phase 15: key includes vault_id. */
792 async function persistVectorsToBlob(req, uid, vectorsDir) {
793 if (!req.blobStore) return;
794 const dbPath = path.join(vectorsDir, DB_FILENAME);
795 if (!fs.existsSync(dbPath)) return;
796 const vaultId = sanitizeVaultId(req.headers['x-vault-id']);
797 const key = 'vectors/' + sanitizeUserId(uid) + '/' + vaultId;
798 const buf = fs.readFileSync(dbPath);
799 const arrayBuffer = buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
800 await req.blobStore.set(key, arrayBuffer);
801 }
802
803 function signState(payload) {
804 const payloadStr = JSON.stringify(payload);
805 const sig = crypto.createHmac('sha256', SESSION_SECRET).update(payloadStr).digest('hex');
806 return Buffer.from(payloadStr).toString('base64url') + '.' + sig;
807 }
808
809 function verifyState(stateStr, maxAgeMs = 600000) {
810 if (!stateStr || typeof stateStr !== 'string') return null;
811 const [b64, sig] = stateStr.split('.');
812 if (!b64 || !sig) return null;
813 try {
814 const payload = JSON.parse(Buffer.from(b64, 'base64url').toString());
815 const expected = crypto.createHmac('sha256', SESSION_SECRET).update(JSON.stringify(payload)).digest('hex');
816 if (expected !== sig) return null;
817 if (Date.now() - (payload.ts || 0) > maxAgeMs) return null;
818 return payload;
819 } catch (_) {
820 return null;
821 }
822 }
823
824 function userIdFromJwt(token) {
825 try {
826 const payload = jwt.verify(token, SESSION_SECRET);
827 return payload.sub ?? null;
828 } catch (_) {
829 return null;
830 }
831 }
832
833 const app = express();
834 app.use(express.json({ limit: '1mb' }));
835
836 app.use((req, _res, next) => {
837 req.blobStore = globalThis.__netlify_blob_store || null;
838 next();
839 });
840
841 // Background-function marker. `netlify/functions/bridge-index-background.mjs`
842 // validates an HMAC-signed inbound request, then sets
843 // `globalThis.__bridge_internal_request = { canisterUid, vaultId, jobId }` BEFORE
844 // invoking the same Express app via serverless-http. The index handler reads
845 // `req.bridgeInternalRequest` to decide whether to route (sync path) or skip
846 // routing and execute inline (background path). Globals are used because
847 // serverless-http does not let us inject per-request locals from the wrapper.
848 app.use((req, _res, next) => {
849 req.bridgeInternalRequest = globalThis.__bridge_internal_request || null;
850 next();
851 });
852
853 app.use((_req, res, next) => {
854 res.set('Access-Control-Allow-Origin', process.env.HUB_CORS_ORIGIN || '*');
855 res.set('Access-Control-Allow-Methods', 'GET, POST, DELETE, OPTIONS');
856 res.set('Access-Control-Allow-Headers', 'Authorization, Content-Type, X-Vault-Id, X-User-Id');
857 res.set('Access-Control-Allow-Credentials', 'true');
858 next();
859 });
860
861 // When Netlify rewrites /* to /.netlify/functions/bridge/:splat, Express sees the full path; strip prefix so routes match.
862 if (inServerless) {
863 const bridgePrefix = '/.netlify/functions/bridge';
864 app.use((req, _res, next) => {
865 if (req.url.startsWith(bridgePrefix)) {
866 req.url = req.url.slice(bridgePrefix.length) || '/';
867 }
868 next();
869 });
870 }
871
872 // Public deploy probe (no auth): compare to Netlify **knowtation-bridge** Production commit vs gateway.
873 app.get('/api/v1/bridge-version', (_req, res) => {
874 res.json({
875 service: 'knowtation-bridge',
876 commit: process.env.COMMIT_REF || process.env.VERCEL_GIT_COMMIT_SHA || null,
877 deploy_id: process.env.DEPLOY_ID || null,
878 context: process.env.CONTEXT || null,
879 netlify: Boolean(process.env.NETLIFY || process.env.AWS_LAMBDA_FUNCTION_NAME),
880 });
881 });
882
883 // ——— Roles & invites (hosted parity) ———
884 async function requireBridgeAuth(req, res, next) {
885 const auth = req.headers.authorization;
886 const token = auth && auth.startsWith('Bearer ') ? auth.slice(7) : null;
887 const uid = token ? userIdFromJwt(token) : null;
888 if (!uid) return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
889 req.uid = uid;
890 next();
891 }
892
893 async function requireBridgeAdmin(req, res, next) {
894 const roles = await loadRoles(req.blobStore);
895 const role = effectiveRole(req.uid, roles);
896 if (role !== 'admin') return res.status(403).json({ error: 'Admin only', code: 'FORBIDDEN' });
897 next();
898 }
899
900 /** Import / index parity: viewers cannot write; default role is member (treated like editor for hosted). */
901 async function requireBridgeEditorOrAdmin(req, res, next) {
902 const roles = await loadRoles(req.blobStore);
903 const role = effectiveRole(req.uid, roles);
904 if (role === 'viewer') {
905 return res.status(403).json({ error: 'This action requires editor or admin.', code: 'FORBIDDEN' });
906 }
907 next();
908 }
909
910 app.get('/api/v1/roles', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
911 try {
912 const roles = await loadRoles(req.blobStore);
913 const evaluator_may_approve = await loadEvaluatorMayApproveMap(req.blobStore);
914 res.json({ roles, evaluator_may_approve });
915 } catch (e) {
916 console.error('[bridge] GET /api/v1/roles', e?.message);
917 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
918 }
919 });
920
921 app.post('/api/v1/roles', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
922 const { user_id: userId, role } = req.body || {};
923 if (!userId || typeof userId !== 'string' || !userId.trim()) {
924 return res.status(400).json({ error: 'user_id required (e.g. github:12345)', code: 'BAD_REQUEST' });
925 }
926 const r = (role || 'editor').toLowerCase();
927 if (!VALID_ROLES.has(r)) {
928 return res.status(400).json({ error: 'role must be admin, editor, viewer, or evaluator', code: 'BAD_REQUEST' });
929 }
930 try {
931 const roles = await loadRoles(req.blobStore);
932 const uidKey = userId.trim();
933 roles[uidKey] = r;
934 await saveRoles(req.blobStore, roles);
935 const mayMap = await loadEvaluatorMayApproveMap(req.blobStore);
936 if (r === 'evaluator' && req.body && Object.prototype.hasOwnProperty.call(req.body, 'evaluator_may_approve')) {
937 mayMap[uidKey] = Boolean(req.body.evaluator_may_approve);
938 await saveEvaluatorMayApproveMap(req.blobStore, mayMap);
939 } else if (r !== 'evaluator' && Object.prototype.hasOwnProperty.call(mayMap, uidKey)) {
940 delete mayMap[uidKey];
941 await saveEvaluatorMayApproveMap(req.blobStore, mayMap);
942 }
943 res.json({ ok: true });
944 } catch (e) {
945 console.error('[bridge] POST /api/v1/roles', e?.message);
946 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
947 }
948 });
949
950 app.post('/api/v1/roles/evaluator-may-approve', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
951 const { user_id: userId, evaluator_may_approve: flag } = req.body || {};
952 if (!userId || typeof userId !== 'string' || !userId.trim()) {
953 return res.status(400).json({ error: 'user_id required', code: 'BAD_REQUEST' });
954 }
955 if (typeof flag !== 'boolean') {
956 return res.status(400).json({ error: 'evaluator_may_approve must be boolean', code: 'BAD_REQUEST' });
957 }
958 const uidKey = userId.trim();
959 try {
960 const roles = await loadRoles(req.blobStore);
961 if (effectiveRole(uidKey, roles) !== 'evaluator') {
962 return res.status(400).json({ error: 'User must have evaluator role', code: 'BAD_REQUEST' });
963 }
964 const mayMap = await loadEvaluatorMayApproveMap(req.blobStore);
965 mayMap[uidKey] = flag;
966 await saveEvaluatorMayApproveMap(req.blobStore, mayMap);
967 res.json({ ok: true });
968 } catch (e) {
969 console.error('[bridge] POST /api/v1/roles/evaluator-may-approve', e?.message);
970 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
971 }
972 });
973
974 app.get('/api/v1/invites', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
975 try {
976 const invitesMap = await loadInvites(req.blobStore);
977 const now = Date.now();
978 const list = [];
979 for (const [token, entry] of Object.entries(invitesMap)) {
980 const created = new Date(entry.created_at).getTime();
981 const expires_at = new Date(created + INVITE_EXPIRY_MS).toISOString();
982 if (now - created <= INVITE_EXPIRY_MS) {
983 list.push({ token, role: entry.role, created_at: entry.created_at, expires_at });
984 }
985 }
986 list.sort((a, b) => new Date(b.created_at) - new Date(a.created_at));
987 res.json({ invites: list });
988 } catch (e) {
989 console.error('[bridge] GET /api/v1/invites', e?.message);
990 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
991 }
992 });
993
994 app.post('/api/v1/invites', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
995 const role = (req.body?.role || 'editor').toLowerCase();
996 if (!['viewer', 'editor', 'admin', 'evaluator'].includes(role)) {
997 return res.status(400).json({ error: 'role must be viewer, editor, admin, or evaluator', code: 'BAD_REQUEST' });
998 }
999 try {
1000 const token = crypto.randomBytes(24).toString('base64url');
1001 const created_at = new Date().toISOString();
1002 const expires_at = new Date(Date.now() + INVITE_EXPIRY_MS).toISOString();
1003 const invites = await loadInvites(req.blobStore);
1004 invites[token] = { role, created_at };
1005 await saveInvites(req.blobStore, invites);
1006 const base = (HUB_UI_ORIGIN + (HUB_UI_PATH || '/hub') + '/').replace(/(\/)+$/, '/');
1007 const invite_url = base + '?invite=' + encodeURIComponent(token);
1008 res.status(201).json({ invite_url, token, role, created_at, expires_at });
1009 } catch (e) {
1010 console.error('[bridge] POST /api/v1/invites', e?.message);
1011 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1012 }
1013 });
1014
1015 app.delete('/api/v1/invites/:token', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
1016 const token = req.params.token;
1017 if (!token) return res.status(400).json({ error: 'token required', code: 'BAD_REQUEST' });
1018 try {
1019 const invites = await loadInvites(req.blobStore);
1020 const had = token in invites;
1021 delete invites[token];
1022 await saveInvites(req.blobStore, invites);
1023 res.json({ ok: true, removed: had });
1024 } catch (e) {
1025 console.error('[bridge] DELETE /api/v1/invites/:token', e?.message);
1026 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1027 }
1028 });
1029
1030 app.post('/api/v1/invites/consume', requireBridgeAuth, async (req, res) => {
1031 const token = req.body?.token;
1032 if (!token || typeof token !== 'string' || !token.trim()) {
1033 return res.status(400).json({ error: 'token required', code: 'BAD_REQUEST' });
1034 }
1035 const uid = req.uid;
1036 try {
1037 const invites = await loadInvites(req.blobStore);
1038 const entry = invites[token];
1039 if (!entry) {
1040 return res.status(404).json({ error: 'Invite not found or already used', code: 'NOT_FOUND' });
1041 }
1042 const created = new Date(entry.created_at).getTime();
1043 if (Date.now() - created > INVITE_EXPIRY_MS) {
1044 delete invites[token];
1045 await saveInvites(req.blobStore, invites);
1046 return res.status(410).json({ error: 'Invite expired', code: 'EXPIRED' });
1047 }
1048 const roles = await loadRoles(req.blobStore);
1049 roles[uid] = entry.role;
1050 await saveRoles(req.blobStore, roles);
1051 delete invites[token];
1052 await saveInvites(req.blobStore, invites);
1053 res.json({ ok: true, role: entry.role });
1054 } catch (e) {
1055 console.error('[bridge] POST /api/v1/invites/consume', e?.message);
1056 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1057 }
1058 });
1059
1060 // For gateway GET /api/v1/settings: return role from bridge store so invited users get correct role
1061 app.get('/api/v1/role', requireBridgeAuth, async (req, res) => {
1062 try {
1063 const roles = await loadRoles(req.blobStore);
1064 const mayMap = await loadEvaluatorMayApproveMap(req.blobStore);
1065 const role = effectiveRole(req.uid, roles);
1066 const may_approve_proposals = mayApproveProposalsForUser(req.uid, roles, mayMap);
1067 res.json({ role, may_approve_proposals });
1068 } catch (e) {
1069 console.error('[bridge] GET /api/v1/role', e?.message);
1070 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1071 }
1072 });
1073
1074 app.get('/api/v1/workspace', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
1075 try {
1076 const w = await loadWorkspace(req.blobStore);
1077 res.json({ owner_user_id: w.owner_user_id });
1078 } catch (e) {
1079 console.error('[bridge] GET /api/v1/workspace', e?.message);
1080 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1081 }
1082 });
1083
1084 app.post('/api/v1/workspace', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
1085 const raw = req.body?.owner_user_id;
1086 const owner_user_id =
1087 raw === null || raw === undefined || raw === ''
1088 ? null
1089 : typeof raw === 'string' && raw.trim()
1090 ? raw.trim()
1091 : null;
1092 try {
1093 await saveWorkspace(req.blobStore, owner_user_id);
1094 const w = await loadWorkspace(req.blobStore);
1095 res.json({ ok: true, owner_user_id: w.owner_user_id });
1096 } catch (e) {
1097 console.error('[bridge] POST /api/v1/workspace', e?.message);
1098 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1099 }
1100 });
1101
1102 app.get('/api/v1/vault-access', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
1103 try {
1104 const access = await loadVaultAccess(req.blobStore);
1105 res.json({ access });
1106 } catch (e) {
1107 console.error('[bridge] GET /api/v1/vault-access', e?.message);
1108 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1109 }
1110 });
1111
1112 app.post('/api/v1/vault-access', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
1113 const access = req.body?.access;
1114 if (!access || typeof access !== 'object') {
1115 return res.status(400).json({ error: 'access object required', code: 'BAD_REQUEST' });
1116 }
1117 try {
1118 await saveVaultAccess(req.blobStore, access);
1119 const out = await loadVaultAccess(req.blobStore);
1120 res.json({ ok: true, access: out });
1121 } catch (e) {
1122 console.error('[bridge] POST /api/v1/vault-access', e?.message);
1123 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1124 }
1125 });
1126
1127 app.get('/api/v1/scope', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
1128 try {
1129 const scope = await loadScope(req.blobStore);
1130 res.json({ scope });
1131 } catch (e) {
1132 console.error('[bridge] GET /api/v1/scope', e?.message);
1133 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1134 }
1135 });
1136
1137 app.post('/api/v1/scope', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
1138 const scope = req.body?.scope;
1139 if (!scope || typeof scope !== 'object') {
1140 return res.status(400).json({ error: 'scope object required', code: 'BAD_REQUEST' });
1141 }
1142 try {
1143 await saveScope(req.blobStore, scope);
1144 const out = await loadScope(req.blobStore);
1145 res.json({ ok: true, scope: out });
1146 } catch (e) {
1147 console.error('[bridge] POST /api/v1/scope', e?.message);
1148 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1149 }
1150 });
1151
1152 app.get('/api/v1/hosted-context', requireBridgeAuth, async (req, res) => {
1153 try {
1154 const actor = req.uid;
1155 const workspace = await loadWorkspace(req.blobStore);
1156 const roles = await loadRoles(req.blobStore);
1157 const ctx = await resolveHostedBridgeContext(req, actor);
1158 if (!ctx.ok) {
1159 return res.status(ctx.status).json({ error: ctx.error, code: ctx.code });
1160 }
1161 const role = effectiveRole(actor, roles);
1162 const mayMap = await loadEvaluatorMayApproveMap(req.blobStore);
1163 const may_approve_proposals = mayApproveProposalsForUser(actor, roles, mayMap);
1164 res.json({
1165 actor_sub: actor,
1166 workspace_owner_id: workspace.owner_user_id,
1167 effective_canister_user_id: ctx.effectiveCanisterUid,
1168 delegating: ctx.delegating,
1169 allowed_vault_ids: ctx.allowedVaultIds,
1170 scope: ctx.scope,
1171 role,
1172 may_approve_proposals,
1173 });
1174 } catch (e) {
1175 console.error('[bridge] GET /api/v1/hosted-context', e?.message);
1176 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1177 }
1178 });
1179
1180 app.get('/api/v1/hosted-context/settings', requireBridgeAuth, async (req, res) => {
1181 try {
1182 const actor = req.uid;
1183 const ctx = await resolveHostedBridgeSettingsContext(req, actor);
1184 const mayMap = await loadEvaluatorMayApproveMap(req.blobStore);
1185 const may_approve_proposals = mayApproveProposalsForUser(actor, { [actor]: ctx.role }, mayMap);
1186 res.json({
1187 actor_sub: actor,
1188 workspace_owner_id: ctx.workspaceOwnerId,
1189 effective_canister_user_id: ctx.effectiveCanisterUid,
1190 delegating: ctx.delegating,
1191 allowed_vault_ids: ctx.allowedVaultIds,
1192 scope: null,
1193 role: ctx.role,
1194 may_approve_proposals,
1195 });
1196 } catch (e) {
1197 console.error('[bridge] GET /api/v1/hosted-context/settings', e?.message);
1198 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1199 }
1200 });
1201
1202 // ——— Connect GitHub ———
1203 if (process.env.GITHUB_CLIENT_ID && process.env.GITHUB_CLIENT_SECRET) {
1204 app.get('/auth/github-connect', (req, res) => {
1205 const token = req.query.token || (req.headers.authorization && req.headers.authorization.startsWith('Bearer ') && req.headers.authorization.slice(7));
1206 const uid = token ? userIdFromJwt(token) : null;
1207 if (!uid) {
1208 return res.redirect(HUB_UI_ORIGIN + HUB_UI_PATH + '/?github_connect_error=not_authenticated');
1209 }
1210 const state = signState({ uid, ts: Date.now() });
1211 const redirectUri = BASE_URL + '/auth/callback/github-connect';
1212 const url = 'https://github.com/login/oauth/authorize?client_id=' + encodeURIComponent(process.env.GITHUB_CLIENT_ID)
1213 + '&redirect_uri=' + encodeURIComponent(redirectUri)
1214 + '&scope=repo'
1215 + '&state=' + encodeURIComponent(state);
1216 res.redirect(url);
1217 });
1218
1219 app.get('/auth/callback/github-connect', async (req, res) => {
1220 const { code, state } = req.query || {};
1221 const hubBase = HUB_UI_ORIGIN + HUB_UI_PATH + '/';
1222 console.log('[bridge] callback: hubBase=%s (ORIGIN=%s PATH=%s)', hubBase, HUB_UI_ORIGIN, HUB_UI_PATH);
1223 const payload = verifyState(state);
1224 if (!payload) {
1225 const url = hubBase + '?github_connect_error=error_state';
1226 console.log('[bridge] redirect (error_state): %s', url);
1227 return res.redirect(302, url);
1228 }
1229 if (!code) {
1230 const url = hubBase + '?github_connect_error=error_code';
1231 console.log('[bridge] redirect (error_code): %s', url);
1232 return res.redirect(302, url);
1233 }
1234 const uid = payload.uid;
1235 const redirectUri = BASE_URL + '/auth/callback/github-connect';
1236 const tokenRes = await fetch('https://github.com/login/oauth/access_token', {
1237 method: 'POST',
1238 headers: { Accept: 'application/json', 'Content-Type': 'application/json' },
1239 body: JSON.stringify({
1240 client_id: process.env.GITHUB_CLIENT_ID,
1241 client_secret: process.env.GITHUB_CLIENT_SECRET,
1242 code,
1243 redirect_uri: redirectUri,
1244 }),
1245 });
1246 const data = await tokenRes.json();
1247 if (!data.access_token) {
1248 const url = hubBase + '?github_connect_error=error_token';
1249 console.log('[bridge] redirect (error_token): %s', url);
1250 return res.redirect(302, url);
1251 }
1252 const tokensByUser = await loadTokens(req.blobStore);
1253 tokensByUser[uid] = { token: data.access_token, repo: tokensByUser[uid]?.repo || null };
1254 try {
1255 await saveTokens(req.blobStore, tokensByUser);
1256 } catch (e) {
1257 console.error('[bridge] saveTokens after GitHub OAuth failed:', e?.message || e);
1258 const url = hubBase + '?github_connect_error=blob_storage';
1259 return res.redirect(302, url);
1260 }
1261 const redirectTo = hubBase + '?github_connected=1';
1262 console.log('[bridge] redirect after connect: HUB_UI_ORIGIN=%s HUB_UI_PATH=%s redirectTo=%s', HUB_UI_ORIGIN, HUB_UI_PATH, redirectTo);
1263 res.redirect(302, redirectTo);
1264 });
1265 }
1266
1267 // ——— Delete vault (canister + team access/scope + vector blob) ———
1268 app.delete('/api/v1/vaults/:vaultId', requireBridgeAuth, requireBridgeEditorOrAdmin, async (req, res) => {
1269 if (!CANISTER_URL) {
1270 return res.status(503).json({ error: 'CANISTER_URL not configured', code: 'NOT_AVAILABLE' });
1271 }
1272 const vaultId = sanitizeVaultId(req.params.vaultId);
1273 if (!req.params.vaultId || String(req.params.vaultId).trim() === '' || vaultId === 'default') {
1274 return res.status(400).json({ error: 'Cannot delete the default vault', code: 'BAD_REQUEST' });
1275 }
1276
1277 const prevVaultHeader = req.headers['x-vault-id'];
1278 req.headers['x-vault-id'] = vaultId;
1279 const hctx = await resolveHostedBridgeContext(req, req.uid);
1280 req.headers['x-vault-id'] = prevVaultHeader;
1281
1282 if (!hctx.ok) {
1283 return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
1284 }
1285
1286 const workspace = await loadWorkspace(req.blobStore);
1287 const owner = workspace.owner_user_id && String(workspace.owner_user_id).trim();
1288 if (owner && req.uid !== owner) {
1289 return res.status(403).json({
1290 error: 'Only the workspace owner can delete vaults.',
1291 code: 'FORBIDDEN',
1292 });
1293 }
1294
1295 let canRes;
1296 try {
1297 canRes = await fetch(`${CANISTER_URL}/api/v1/vaults/${encodeURIComponent(vaultId)}`, {
1298 method: 'DELETE',
1299 headers: canisterHeaders({ 'X-User-Id': hctx.effectiveCanisterUid }),
1300 });
1301 } catch (e) {
1302 console.error('[bridge] DELETE vault canister fetch', e?.message);
1303 return res.status(502).json({ error: 'Could not reach canister', code: 'BAD_GATEWAY' });
1304 }
1305
1306 const text = await canRes.text();
1307 if (!canRes.ok) {
1308 let errMsg = text;
1309 try {
1310 const j = JSON.parse(text);
1311 if (j && j.error) errMsg = j.error;
1312 } catch (_) {}
1313 return res.status(canRes.status >= 400 ? canRes.status : 502).json({
1314 error: errMsg || 'Canister error',
1315 code: 'UPSTREAM_ERROR',
1316 });
1317 }
1318
1319 await stripHostedVaultFromAccessAndScope(req.blobStore, vaultId);
1320 await removeHostedVectorBlobForVault(req.blobStore, hctx.effectiveCanisterUid, vaultId);
1321
1322 try {
1323 const data = text ? JSON.parse(text) : {};
1324 res.json({ ok: true, ...data });
1325 } catch (_) {
1326 res.json({ ok: true, deleted_vault_id: vaultId });
1327 }
1328 });
1329
1330 /**
1331 * Full proposal documents for GitHub backup (list + GET each id). Same scope as notes.
1332 * @param {string} canisterUrl
1333 * @param {string} canisterUid
1334 * @param {string} vaultId
1335 * @param {{ projects?: string[], folders?: string[] } | null | undefined} scope
1336 */
1337 async function fetchFullProposalsForGithubBackup(canisterUrl, canisterUid, vaultId, scope) {
1338 const base = String(canisterUrl || '').replace(/\/$/, '');
1339 const headers = canisterHeaders({ 'X-User-Id': canisterUid, 'X-Vault-Id': vaultId });
1340 const listRes = await fetch(`${base}/api/v1/proposals`, { method: 'GET', headers });
1341 if (!listRes.ok) {
1342 const err = new Error(`Canister proposals list ${listRes.status}`);
1343 err.status = 502;
1344 throw err;
1345 }
1346 let listJson;
1347 try {
1348 listJson = await listRes.json();
1349 } catch {
1350 const err = new Error('Invalid canister proposals list JSON');
1351 err.status = 502;
1352 throw err;
1353 }
1354 const stubs = Array.isArray(listJson.proposals) ? listJson.proposals : [];
1355 const full = [];
1356 for (const stub of stubs) {
1357 const id = stub && stub.proposal_id ? String(stub.proposal_id) : '';
1358 if (!id) continue;
1359 const oneRes = await fetch(`${base}/api/v1/proposals/${encodeURIComponent(id)}`, {
1360 method: 'GET',
1361 headers,
1362 });
1363 if (!oneRes.ok) {
1364 const err = new Error(`Canister proposal ${id} ${oneRes.status}`);
1365 err.status = 502;
1366 throw err;
1367 }
1368 const text = await oneRes.text();
1369 const body = parseCanisterProposalGetBody(id, text, stub);
1370 if (body._knowtation_backup_json_unparseable) {
1371 console.error('[bridge] vault/sync proposal JSON parse failed', { id, preview: text.slice(0, 300) });
1372 }
1373 full.push(body);
1374 }
1375 return applyScopeFilterToProposals(full, scope);
1376 }
1377
1378 // ——— Back up now: fetch vault from canister, push to GitHub ———
1379 app.post('/api/v1/vault/sync', requireBridgeAuth, requireBridgeEditorOrAdmin, async (req, res) => {
1380 const uid = req.uid;
1381
1382 const hctx = await resolveHostedBridgeContext(req, uid);
1383 if (!hctx.ok) {
1384 return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
1385 }
1386 const canisterUid = hctx.effectiveCanisterUid;
1387
1388 const tokensByUser = await loadTokens(req.blobStore);
1389 const conn = tokensByUser[uid];
1390 const repo = req.body?.repo || conn?.repo;
1391 if (!conn?.token) {
1392 return res.status(400).json({ error: 'GitHub not connected', code: 'GITHUB_NOT_CONNECTED' });
1393 }
1394 if (!repo || typeof repo !== 'string') {
1395 return res.status(400).json({ error: 'Repo required', code: 'REPO_REQUIRED', hint: 'Send { "repo": "owner/name" } or set repo after connecting GitHub.' });
1396 }
1397
1398 const [owner, name] = repo.split('/').filter(Boolean);
1399 if (!owner || !name) {
1400 return res.status(400).json({ error: 'Invalid repo format', code: 'BAD_REQUEST' });
1401 }
1402
1403 const vaultId = sanitizeVaultId(req.headers['x-vault-id']);
1404
1405 // Fetch vault from canister (export)
1406 let exportRes;
1407 try {
1408 exportRes = await fetch(CANISTER_URL + '/api/v1/export', {
1409 method: 'GET',
1410 headers: canisterHeaders({ 'X-User-Id': canisterUid, 'X-Vault-Id': vaultId }),
1411 });
1412 } catch (e) {
1413 return res.status(502).json({ error: 'Could not reach canister', code: 'BAD_GATEWAY' });
1414 }
1415 if (!exportRes.ok) {
1416 return res.status(502).json({ error: 'Canister error', code: 'BAD_GATEWAY', status: exportRes.status });
1417 }
1418 let vault;
1419 try {
1420 vault = await exportRes.json();
1421 } catch (_) {
1422 return res.status(502).json({ error: 'Invalid canister response', code: 'BAD_GATEWAY' });
1423 }
1424 let notes = vault.notes || [];
1425 if (hctx.scope) {
1426 notes = applyScopeFilterToNotes(notes, hctx.scope);
1427 }
1428
1429 let proposals = [];
1430 try {
1431 proposals = await fetchFullProposalsForGithubBackup(CANISTER_URL, canisterUid, vaultId, hctx.scope);
1432 } catch (e) {
1433 console.error('[bridge] vault/sync proposals fetch', e?.message);
1434 return res.status(e.status || 502).json({
1435 error: e.message || 'Could not fetch proposals for backup',
1436 code: 'BAD_GATEWAY',
1437 });
1438 }
1439
1440 // Store repo for next time
1441 if (req.body?.repo && (!conn.repo || conn.repo !== repo)) {
1442 tokensByUser[uid] = { ...conn, repo };
1443 await saveTokens(req.blobStore, tokensByUser);
1444 }
1445
1446 // Push to GitHub: get default branch, create blobs, create tree, commit, push
1447 const ghToken = conn.token;
1448 const ghApi = 'https://api.github.com';
1449 // GitHub requires a non-empty User-Agent; some serverless runtimes send none → 403 "Administrative rules".
1450 const ghHeaders = {
1451 Authorization: 'token ' + ghToken,
1452 Accept: 'application/vnd.github.v3+json',
1453 'Content-Type': 'application/json',
1454 'User-Agent': 'KnowtationHub-Bridge/1.0 (+https://knowtation.store)',
1455 };
1456 const headsRefEnc = (branch) => encodeURIComponent(`heads/${String(branch || 'main').trim()}`);
1457
1458 let defaultBranch;
1459 try {
1460 const repoRes = await fetch(`${ghApi}/repos/${owner}/${name}`, { headers: ghHeaders });
1461 if (!repoRes.ok) {
1462 if (repoRes.status === 404) {
1463 return res.status(400).json({ error: 'Repo not found or no access', code: 'REPO_NOT_FOUND' });
1464 }
1465 throw new Error('GitHub API ' + repoRes.status);
1466 }
1467 const repoData = await repoRes.json();
1468 defaultBranch = String(repoData.default_branch || 'main').trim() || 'main';
1469 } catch (e) {
1470 return res.status(502).json({ error: 'GitHub API error', code: 'BAD_GATEWAY' });
1471 }
1472
1473 // GET single ref: documented as /git/ref/{ref} with ref = heads/<branch> (URL-encoded). Avoids edge cases with /git/refs/... on some hosts.
1474 const refRes = await fetch(`${ghApi}/repos/${owner}/${name}/git/ref/${headsRefEnc(defaultBranch)}`, { headers: ghHeaders });
1475 let baseSha = null;
1476 let baseTreeSha = null;
1477 if (refRes.ok) {
1478 const refData = await refRes.json();
1479 baseSha = refData.object?.sha;
1480 if (!baseSha) {
1481 return res.status(502).json({ error: 'Invalid ref response', code: 'BAD_GATEWAY' });
1482 }
1483 const baseTreeRes = await fetch(`${ghApi}/repos/${owner}/${name}/git/commits/${baseSha}`, { headers: ghHeaders });
1484 if (!baseTreeRes.ok) {
1485 return res.status(502).json({ error: 'Could not get base commit', code: 'BAD_GATEWAY' });
1486 }
1487 const baseCommit = await baseTreeRes.json();
1488 baseTreeSha = baseCommit.tree?.sha;
1489 } else if (refRes.status === 404) {
1490 // Repo exists on GitHub but has no commits yet (Quick setup / empty repo) — no refs/heads/* yet.
1491 baseSha = null;
1492 baseTreeSha = null;
1493 } else {
1494 const refErrBody = await refRes.text();
1495 console.warn('[bridge] GitHub GET ref failed', { owner, name, branch: defaultBranch, status: refRes.status, body: refErrBody.slice(0, 500) });
1496 if (refRes.status === 403 || refRes.status === 401) {
1497 return res.status(502).json({
1498 error:
1499 'GitHub denied access when reading the branch (often missing User-Agent or expired token). Use Settings → Connect GitHub again.',
1500 code: 'BAD_GATEWAY',
1501 });
1502 }
1503 return res.status(502).json({
1504 error: 'Could not read branch on GitHub. If the repo is new with no commits, try Back up again after redeploying the bridge; otherwise check bridge logs.',
1505 code: 'BAD_GATEWAY',
1506 });
1507 }
1508
1509 const tree = [];
1510 for (const note of notes) {
1511 const path = note.path || 'note.md';
1512 const content = (note.frontmatter && note.frontmatter !== '{}' ? '---\n' + note.frontmatter + '\n---\n\n' : '') + (note.body || '');
1513 const blobRes = await fetch(`${ghApi}/repos/${owner}/${name}/git/blobs`, {
1514 method: 'POST',
1515 headers: ghHeaders,
1516 body: JSON.stringify({ content: Buffer.from(content, 'utf8').toString('base64'), encoding: 'base64' }),
1517 });
1518 if (!blobRes.ok) {
1519 return res.status(502).json({ error: 'GitHub blob failed', code: 'BAD_GATEWAY' });
1520 }
1521 const blob = await blobRes.json();
1522 tree.push({ path, mode: '100644', type: 'blob', sha: blob.sha });
1523 }
1524
1525 const snapshotObj = {
1526 format_version: 1,
1527 kind: 'knowtation-hosted-backup',
1528 exported_at: new Date().toISOString(),
1529 vault_id: vaultId,
1530 proposals,
1531 };
1532 const snapshotJson = JSON.stringify(snapshotObj);
1533 const snapBlobRes = await fetch(`${ghApi}/repos/${owner}/${name}/git/blobs`, {
1534 method: 'POST',
1535 headers: ghHeaders,
1536 body: JSON.stringify({
1537 content: Buffer.from(snapshotJson, 'utf8').toString('base64'),
1538 encoding: 'base64',
1539 }),
1540 });
1541 if (!snapBlobRes.ok) {
1542 return res.status(502).json({ error: 'GitHub blob failed (snapshot)', code: 'BAD_GATEWAY' });
1543 }
1544 const snapBlob = await snapBlobRes.json();
1545 tree.push({
1546 path: '.knowtation/backup/v1/snapshot.json',
1547 mode: '100644',
1548 type: 'blob',
1549 sha: snapBlob.sha,
1550 });
1551
1552 const isInitialCommit = !baseSha;
1553 if (isInitialCommit && notes.length === 0 && proposals.length === 0) {
1554 const placeholder =
1555 '# Knowtation vault backup\n\n'
1556 + 'This folder is written by **Back up now** on hosted Knowtation.\n\n'
1557 + '- **Markdown files** elsewhere in this repo are your vault **notes**.\n'
1558 + '- **`.knowtation/backup/v1/snapshot.json`** holds full **proposal** records (status, review, enrich metadata, bodies).\n\n'
1559 + 'Your vault had no notes or proposals yet. Add content in the Hub and run **Back up now** again.\n';
1560 const blobRes = await fetch(`${ghApi}/repos/${owner}/${name}/git/blobs`, {
1561 method: 'POST',
1562 headers: ghHeaders,
1563 body: JSON.stringify({
1564 content: Buffer.from(placeholder, 'utf8').toString('base64'),
1565 encoding: 'base64',
1566 }),
1567 });
1568 if (!blobRes.ok) {
1569 return res.status(502).json({ error: 'GitHub blob failed', code: 'BAD_GATEWAY' });
1570 }
1571 const blob = await blobRes.json();
1572 tree.push({ path: '.knowtation/README.md', mode: '100644', type: 'blob', sha: blob.sha });
1573 }
1574
1575 const treePayload = baseTreeSha ? { base_tree: baseTreeSha, tree } : { tree };
1576 const treeRes = await fetch(`${ghApi}/repos/${owner}/${name}/git/trees`, {
1577 method: 'POST',
1578 headers: ghHeaders,
1579 body: JSON.stringify(treePayload),
1580 });
1581 if (!treeRes.ok) {
1582 return res.status(502).json({ error: 'GitHub tree failed', code: 'BAD_GATEWAY' });
1583 }
1584 const newTree = await treeRes.json();
1585
1586 const commitRes = await fetch(`${ghApi}/repos/${owner}/${name}/git/commits`, {
1587 method: 'POST',
1588 headers: ghHeaders,
1589 body: JSON.stringify({
1590 message: 'Knowtation Hub backup ' + new Date().toISOString(),
1591 tree: newTree.sha,
1592 parents: baseSha ? [baseSha] : [],
1593 }),
1594 });
1595 if (!commitRes.ok) {
1596 return res.status(502).json({ error: 'GitHub commit failed', code: 'BAD_GATEWAY' });
1597 }
1598 const newCommit = await commitRes.json();
1599
1600 let refUpdateRes;
1601 if (baseSha) {
1602 refUpdateRes = await fetch(`${ghApi}/repos/${owner}/${name}/git/refs/${headsRefEnc(defaultBranch)}`, {
1603 method: 'PATCH',
1604 headers: ghHeaders,
1605 body: JSON.stringify({ sha: newCommit.sha, force: false }),
1606 });
1607 } else {
1608 refUpdateRes = await fetch(`${ghApi}/repos/${owner}/${name}/git/refs`, {
1609 method: 'POST',
1610 headers: ghHeaders,
1611 body: JSON.stringify({ ref: `refs/heads/${defaultBranch}`, sha: newCommit.sha }),
1612 });
1613 }
1614 if (!refUpdateRes.ok) {
1615 return res.status(502).json({ error: 'GitHub push failed', code: 'BAD_GATEWAY' });
1616 }
1617
1618 res.json({
1619 ok: true,
1620 message: 'Synced',
1621 notesCount: notes.length,
1622 proposalsCount: proposals.length,
1623 });
1624 });
1625
1626 // Optional: GET status for Settings (connected + repo)
1627 app.get('/api/v1/vault/github-status', async (req, res) => {
1628 const auth = req.headers.authorization;
1629 const token = auth && auth.startsWith('Bearer ') ? auth.slice(7) : null;
1630 const uid = token ? userIdFromJwt(token) : null;
1631 if (!uid) {
1632 return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
1633 }
1634 const tokensByUser = await loadTokens(req.blobStore);
1635 const conn = tokensByUser[uid];
1636 res.json({
1637 github_connected: Boolean(conn?.token),
1638 repo: conn?.repo || null,
1639 });
1640 });
1641
1642 // Internal: GET GitHub connection (token + repo) for the gateway to use for image upload.
1643 // Server-to-server only — never exposed to the browser. Auth required.
1644 app.get('/api/v1/vault/github-token', async (req, res) => {
1645 const auth = req.headers.authorization;
1646 const token = auth && auth.startsWith('Bearer ') ? auth.slice(7) : null;
1647 const uid = token ? userIdFromJwt(token) : null;
1648 if (!uid) {
1649 return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
1650 }
1651 try {
1652 const tokensByUser = await loadTokens(req.blobStore);
1653 const conn = tokensByUser[uid];
1654 if (!conn?.token) {
1655 return res.status(400).json({ error: 'GitHub not connected', code: 'GITHUB_NOT_CONNECTED' });
1656 }
1657 res.json({ token: conn.token, repo: conn.repo || null });
1658 } catch (e) {
1659 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1660 }
1661 });
1662
1663 /** Max notes per canister POST /api/v1/notes/batch (must match hub/icp NOTES_BATCH cap). */
1664 const CANISTER_NOTES_BATCH_MAX = 100;
1665
1666 /**
1667 * @param {string} canisterUid
1668 * @param {string} actorUid
1669 * @param {string} vaultId
1670 * @param {{ path: string, body: string, frontmatter?: Record<string, unknown> }[]} notes
1671 */
1672 async function postNotesBatchToCanister(canisterUid, actorUid, vaultId, notes) {
1673 if (!notes.length) return;
1674 for (let offset = 0; offset < notes.length; offset += CANISTER_NOTES_BATCH_MAX) {
1675 const chunk = notes.slice(offset, offset + CANISTER_NOTES_BATCH_MAX);
1676 const r = await fetch(CANISTER_URL + '/api/v1/notes/batch', {
1677 method: 'POST',
1678 headers: canisterHeaders({
1679 'Content-Type': 'application/json',
1680 'X-User-Id': canisterUid,
1681 'X-Actor-Id': actorUid,
1682 'X-Vault-Id': vaultId,
1683 }),
1684 body: JSON.stringify({ notes: chunk }),
1685 });
1686 const text = await r.text();
1687 if (!r.ok) {
1688 throw new Error(`Canister batch note write failed (${r.status}): ${text.slice(0, 800)}`);
1689 }
1690 }
1691 }
1692
1693 /**
1694 * Sanitize a user-supplied filename before writing it to disk.
1695 * - Strips all directory components (path traversal prevention).
1696 * - Removes every character that is not alphanumeric, a dot, hyphen, or underscore.
1697 * - Truncates to 200 chars so filesystem limits are never approached.
1698 * - Falls back to 'upload' when the result would be empty.
1699 */
1700 function sanitizeUploadFilename(rawName) {
1701 const base = path.basename(rawName || '');
1702 const safe = base.replace(/[^a-zA-Z0-9._-]/g, '_').slice(0, 200);
1703 return safe || 'upload';
1704 }
1705
1706 const importTempDirMiddleware = (req, _res, next) => {
1707 req._importTempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'knowtation-bridge-import-'));
1708 next();
1709 };
1710 const bridgeImportUpload = multer({
1711 storage: multer.diskStorage({
1712 destination: (req, _file, cb) => cb(null, req._importTempDir),
1713 filename: (_req, file, cb) => cb(null, sanitizeUploadFilename(file.originalname)),
1714 }),
1715 limits: { fileSize: 100 * 1024 * 1024 },
1716 }).single('file');
1717
1718 // ——— Phase 18: GitHub image upload + image proxy ———
1719
1720 const bridgeImageUpload = multer({
1721 storage: multer.memoryStorage(),
1722 limits: { fileSize: 25 * 1024 * 1024 },
1723 }).single('image');
1724
1725 app.post(
1726 /^\/api\/v1\/notes\/(.+)\/upload-image$/,
1727 requireBridgeAuth,
1728 requireBridgeEditorOrAdmin,
1729 bridgeImageUpload,
1730 async (req, res) => {
1731 try {
1732 if (!req.file) return res.status(400).json({ error: 'image file required', code: 'BAD_REQUEST' });
1733
1734 const originalName = req.file.originalname || 'image.jpg';
1735 try { validateImageExtension(originalName); } catch (e) {
1736 return res.status(400).json({ error: e.message, code: 'BAD_REQUEST' });
1737 }
1738 if (!(req.file.mimetype || '').toLowerCase().startsWith('image/')) {
1739 return res.status(400).json({ error: 'File content-type must be image/*', code: 'BAD_REQUEST' });
1740 }
1741 const ext = originalName.split('.').pop().toLowerCase();
1742 try { validateMagicBytes(req.file.buffer, ext); } catch (e) {
1743 return res.status(400).json({ error: e.message, code: 'BAD_REQUEST' });
1744 }
1745
1746 const tokensByUser = await loadTokens(req.blobStore);
1747 const conn = tokensByUser[req.uid];
1748 if (!conn?.token) {
1749 return res.status(400).json({ error: 'GitHub not connected. Go to Settings → Backup → Connect GitHub.', code: 'GITHUB_NOT_CONNECTED' });
1750 }
1751 if (!conn.repo) {
1752 return res.status(400).json({ error: 'GitHub repo not set. Back up once first to set the remote.', code: 'GITHUB_NOT_CONFIGURED' });
1753 }
1754
1755 const now = new Date();
1756 const yearMonth = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, '0')}`;
1757 const safeName = originalName.replace(/[^a-zA-Z0-9._-]/g, '_').slice(0, 128);
1758 const uniqueName = `${Date.now()}-${safeName}`;
1759 const repoFilePath = `media/images/${yearMonth}/${uniqueName}`;
1760
1761 const result = await commitImageToRepo({
1762 accessToken: conn.token,
1763 repoUrl: conn.repo,
1764 filePath: repoFilePath,
1765 fileBuffer: req.file.buffer,
1766 commitMessage: `Add image: ${safeName}`,
1767 });
1768
1769 res.json({
1770 url: result.url,
1771 inserted_markdown: `![${safeName}](${result.url})`,
1772 sha: result.sha,
1773 repo_path: repoFilePath,
1774 repo_private: result.isPrivate === true,
1775 });
1776 } catch (e) {
1777 console.error('[bridge] upload-image error:', e?.message);
1778 const msg = e.message || String(e);
1779 const clientErr = /not found|not connected|lacks permission|lacks repo|Reconnect|scope|remote/i.test(msg);
1780 res.status(clientErr ? 400 : 500).json({ error: msg, code: clientErr ? 'BAD_REQUEST' : 'RUNTIME_ERROR' });
1781 }
1782 }
1783 );
1784
1785 // Image proxy: serve raw.githubusercontent.com images via the stored GitHub token.
1786 // Accepts JWT via ?token= query param (browsers cannot send headers for <img> tags).
1787 const BRIDGE_IMAGE_PROXY_SIZE_LIMIT = 10 * 1024 * 1024;
1788 app.get('/api/v1/vault/image-proxy', async (req, res) => {
1789 const auth = req.headers.authorization;
1790 const headerToken = auth && auth.startsWith('Bearer ') ? auth.slice(7) : null;
1791 const queryToken = typeof req.query.token === 'string' ? req.query.token : null;
1792 const uid = (headerToken || queryToken) ? userIdFromJwt(headerToken || queryToken) : null;
1793 if (!uid) return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
1794
1795 const rawUrl = typeof req.query.url === 'string' ? req.query.url : '';
1796 if (!/^https:\/\/raw\.githubusercontent\.com\/[^/]+\/[^/]+\/.+$/i.test(rawUrl)) {
1797 return res.status(400).json({ error: 'url must be a raw.githubusercontent.com path', code: 'BAD_REQUEST' });
1798 }
1799
1800 let accessToken = '';
1801 try {
1802 const tokensByUser = await loadTokens(req.blobStore);
1803 const conn = tokensByUser[uid];
1804 if (conn?.token) accessToken = conn.token;
1805 } catch (_) {}
1806
1807 const fetchHeaders = { 'User-Agent': 'Knowtation-Hub/1.0' };
1808 if (accessToken) fetchHeaders.Authorization = `token ${accessToken}`;
1809
1810 let upstream;
1811 try {
1812 upstream = await fetch(rawUrl, { headers: fetchHeaders });
1813 } catch (e) {
1814 return res.status(502).json({ error: 'Failed to fetch image from GitHub', code: 'UPSTREAM_ERROR' });
1815 }
1816 if (!upstream.ok) {
1817 return res.status(upstream.status).json({ error: 'Image not found on GitHub', code: 'UPSTREAM_ERROR' });
1818 }
1819 const ct = upstream.headers.get('content-type') || '';
1820 if (!ct.startsWith('image/')) {
1821 return res.status(400).json({ error: 'URL does not point to an image', code: 'BAD_REQUEST' });
1822 }
1823 const buf = Buffer.from(await upstream.arrayBuffer());
1824 if (buf.byteLength > BRIDGE_IMAGE_PROXY_SIZE_LIMIT) {
1825 return res.status(400).json({ error: 'Image too large (max 10 MB)', code: 'BAD_REQUEST' });
1826 }
1827 res.setHeader('Content-Type', ct);
1828 res.setHeader('Content-Length', buf.byteLength);
1829 res.setHeader('Cache-Control', 'private, max-age=3600');
1830 res.setHeader('X-Content-Type-Options', 'nosniff');
1831 res.send(buf);
1832 });
1833
1834 app.post(
1835 '/api/v1/import',
1836 requireBridgeAuth,
1837 requireBridgeEditorOrAdmin,
1838 importTempDirMiddleware,
1839 bridgeImportUpload,
1840 async (req, res) => {
1841 const tempDir = req._importTempDir;
1842 try {
1843 if (!CANISTER_URL) {
1844 return res.status(503).json({ error: 'Canister not configured', code: 'SERVICE_UNAVAILABLE' });
1845 }
1846 const sourceType = req.body && req.body.source_type ? String(req.body.source_type).trim() : '';
1847 if (!IMPORT_SOURCE_TYPES.includes(sourceType)) {
1848 return res.status(400).json({
1849 error: `source_type must be one of: ${IMPORT_SOURCE_TYPES.join(', ')}`,
1850 code: 'BAD_REQUEST',
1851 });
1852 }
1853 const sheetId = req.body && req.body.spreadsheet_id ? String(req.body.spreadsheet_id).trim() : '';
1854 const sheetsRange = req.body && req.body.sheets_range ? String(req.body.sheets_range).trim() : undefined;
1855 if (sourceType === 'google-sheets') {
1856 if (!sheetId) {
1857 return res
1858 .status(400)
1859 .json({ error: 'google-sheets: spreadsheet_id is required in the multipart body', code: 'BAD_REQUEST' });
1860 }
1861 if (req.file) {
1862 return res
1863 .status(400)
1864 .json({ error: 'google-sheets: do not send a file; use spreadsheet_id only', code: 'BAD_REQUEST' });
1865 }
1866 } else if (!req.file) {
1867 return res.status(400).json({ error: 'file required', code: 'BAD_REQUEST' });
1868 }
1869 const project = req.body && req.body.project ? String(req.body.project).trim() : undefined;
1870 const outputDir = req.body && req.body.output_dir ? String(req.body.output_dir).trim() : undefined;
1871 const tagsRaw = req.body && req.body.tags ? String(req.body.tags) : '';
1872 const tags = tagsRaw ? tagsRaw.split(',').map((s) => s.trim()).filter(Boolean) : [];
1873 let inputPath = sourceType === 'google-sheets' ? sheetId : req.file.path;
1874 if (sourceType !== 'google-sheets' && req.file && req.file.originalname && req.file.originalname.toLowerCase().endsWith('.zip')) {
1875 const extractDir = path.join(tempDir, 'extracted');
1876 fs.mkdirSync(extractDir, { recursive: true });
1877 const zip = new AdmZip(req.file.path);
1878 // Zip-slip protection: every entry must resolve inside extractDir
1879 const extractDirResolved = path.resolve(extractDir) + path.sep;
1880 for (const entry of zip.getEntries()) {
1881 const entryResolved = path.resolve(extractDir, entry.entryName);
1882 if (entryResolved !== path.resolve(extractDir) && !entryResolved.startsWith(extractDirResolved)) {
1883 return res.status(400).json({ error: 'Invalid zip entry: path traversal detected', code: 'BAD_REQUEST' });
1884 }
1885 }
1886 zip.extractAllTo(extractDir, true);
1887 inputPath = extractDir;
1888 }
1889 const hctx = await resolveHostedBridgeContext(req, req.uid);
1890 if (!hctx.ok) {
1891 return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
1892 }
1893 const vaultPath = path.join(tempDir, 'vault-work');
1894 fs.mkdirSync(vaultPath, { recursive: true });
1895 const result = await runImport(sourceType, inputPath, {
1896 project,
1897 outputDir,
1898 tags,
1899 vaultPath,
1900 ...(sheetsRange ? { sheetsRange } : {}),
1901 });
1902 const importStamp = mergeProvenanceFrontmatter({}, {
1903 sub: hctx.actorUid,
1904 kind: 'import',
1905 });
1906 /** @type {{ path: string, body: string, frontmatter: Record<string, unknown> }[]} */
1907 const notesForCanister = [];
1908 for (const item of result.imported || []) {
1909 if (item.path && typeof item.path === 'string') {
1910 try {
1911 writeNote(vaultPath, item.path, { frontmatter: importStamp });
1912 const safe = resolveVaultRelativePath(vaultPath, item.path);
1913 const fullPath = path.join(vaultPath, safe);
1914 const markdownFull = fs.readFileSync(fullPath, 'utf8');
1915 const parsed = parseFrontmatterAndBody(markdownFull);
1916 const fm =
1917 parsed.frontmatter && typeof parsed.frontmatter === 'object' && !Array.isArray(parsed.frontmatter)
1918 ? /** @type {Record<string, unknown>} */ ({ ...parsed.frontmatter })
1919 : {};
1920 notesForCanister.push({
1921 path: safe.replace(/\\/g, '/'),
1922 body: parsed.body || '',
1923 frontmatter: fm,
1924 });
1925 } catch (e) {
1926 console.error('[bridge] import prepare note for canister failed for', item.path, e?.message || e);
1927 return res.status(502).json({
1928 error: e.message || 'Canister write failed',
1929 code: 'BAD_GATEWAY',
1930 });
1931 }
1932 }
1933 }
1934 try {
1935 await postNotesBatchToCanister(
1936 hctx.effectiveCanisterUid,
1937 hctx.actorUid,
1938 hctx.vaultId,
1939 notesForCanister,
1940 );
1941 } catch (e) {
1942 console.error('[bridge] import canister batch write failed', e?.message || e);
1943 return res.status(502).json({
1944 error: e.message || 'Canister write failed',
1945 code: 'BAD_GATEWAY',
1946 });
1947 }
1948 return res.json({ imported: result.imported, count: result.count });
1949 } catch (e) {
1950 const msg = e.message || String(e);
1951 const clientError =
1952 /OPENAI_API_KEY|required for transcription|Unsupported format|file not found|not found:|Transcription failed|413|Payload Too Large|25MB|Whisper accepts|Only https|blocked|private IP|timed out|exceeds \d+ bytes|Invalid URL|URL is required|Extract mode requires|Could not extract|DNS resolution failed|Too many redirects|non-https/i.test(
1953 msg,
1954 );
1955 res.status(clientError ? 400 : 500).json({
1956 error: msg,
1957 code: clientError ? 'BAD_REQUEST' : 'RUNTIME_ERROR',
1958 });
1959 } finally {
1960 if (tempDir && fs.existsSync(tempDir)) {
1961 try {
1962 fs.rmSync(tempDir, { recursive: true, force: true });
1963 } catch (_) {}
1964 }
1965 }
1966 },
1967 );
1968
1969 /**
1970 * @param {unknown} raw
1971 * @returns {'auto' | 'bookmark' | 'extract'}
1972 */
1973 function normalizeBridgeImportUrlMode(raw) {
1974 const s = typeof raw === 'string' ? raw.trim().toLowerCase() : '';
1975 if (s === 'bookmark' || s === 'extract' || s === 'auto') return s;
1976 return 'auto';
1977 }
1978
1979 /**
1980 * @param {unknown} body
1981 * @returns {string[]}
1982 */
1983 function tagsFromBridgeImportUrlBody(body) {
1984 const t = body && body.tags;
1985 if (Array.isArray(t)) return t.map((x) => String(x).trim()).filter(Boolean);
1986 if (typeof t === 'string') return t.split(',').map((s) => s.trim()).filter(Boolean);
1987 return [];
1988 }
1989
1990 app.post('/api/v1/import-url', requireBridgeAuth, requireBridgeEditorOrAdmin, async (req, res) => {
1991 const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'knowtation-bridge-import-url-'));
1992 try {
1993 if (!CANISTER_URL) {
1994 return res.status(503).json({ error: 'Canister not configured', code: 'SERVICE_UNAVAILABLE' });
1995 }
1996 const body = req.body && typeof req.body === 'object' ? req.body : {};
1997 const urlStr = typeof body.url === 'string' ? body.url.trim() : '';
1998 if (!urlStr) return res.status(400).json({ error: 'url required', code: 'BAD_REQUEST' });
1999 const urlMode = normalizeBridgeImportUrlMode(body.mode);
2000 const project = body.project != null && String(body.project).trim() !== '' ? String(body.project).trim() : undefined;
2001 const outputDir =
2002 body.output_dir != null && String(body.output_dir).trim() !== '' ? String(body.output_dir).trim() : undefined;
2003 const tags = tagsFromBridgeImportUrlBody(body);
2004
2005 const hctx = await resolveHostedBridgeContext(req, req.uid);
2006 if (!hctx.ok) {
2007 return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
2008 }
2009 const vaultPath = path.join(tempDir, 'vault-work');
2010 fs.mkdirSync(vaultPath, { recursive: true });
2011 const result = await runImport('url', urlStr, { project, outputDir, tags, vaultPath, urlMode });
2012 const importStamp = mergeProvenanceFrontmatter({}, {
2013 sub: hctx.actorUid,
2014 kind: 'import',
2015 });
2016 /** @type {{ path: string, body: string, frontmatter: Record<string, unknown> }[]} */
2017 const notesForCanister = [];
2018 for (const item of result.imported || []) {
2019 if (item.path && typeof item.path === 'string') {
2020 try {
2021 writeNote(vaultPath, item.path, { frontmatter: importStamp });
2022 const safe = resolveVaultRelativePath(vaultPath, item.path);
2023 const fullPath = path.join(vaultPath, safe);
2024 const markdownFull = fs.readFileSync(fullPath, 'utf8');
2025 const parsed = parseFrontmatterAndBody(markdownFull);
2026 const fm =
2027 parsed.frontmatter && typeof parsed.frontmatter === 'object' && !Array.isArray(parsed.frontmatter)
2028 ? /** @type {Record<string, unknown>} */ ({ ...parsed.frontmatter })
2029 : {};
2030 notesForCanister.push({
2031 path: safe.replace(/\\/g, '/'),
2032 body: parsed.body || '',
2033 frontmatter: fm,
2034 });
2035 } catch (e) {
2036 console.error('[bridge] import-url prepare note for canister failed for', item.path, e?.message || e);
2037 return res.status(502).json({
2038 error: e.message || 'Canister write failed',
2039 code: 'BAD_GATEWAY',
2040 });
2041 }
2042 }
2043 }
2044 try {
2045 await postNotesBatchToCanister(hctx.effectiveCanisterUid, hctx.actorUid, hctx.vaultId, notesForCanister);
2046 } catch (e) {
2047 console.error('[bridge] import-url canister batch write failed', e?.message || e);
2048 return res.status(502).json({
2049 error: e.message || 'Canister write failed',
2050 code: 'BAD_GATEWAY',
2051 });
2052 }
2053 return res.json({ imported: result.imported, count: result.count });
2054 } catch (e) {
2055 const msg = e.message || String(e);
2056 const clientError =
2057 /OPENAI_API_KEY|required for transcription|Unsupported format|file not found|not found:|Transcription failed|413|Payload Too Large|25MB|Whisper accepts|Only https|blocked|private IP|timed out|exceeds \d+ bytes|Invalid URL|URL is required|Extract mode requires|Could not extract|DNS resolution failed|Too many redirects|non-https/i.test(
2058 msg,
2059 );
2060 res.status(clientError ? 400 : 500).json({
2061 error: msg,
2062 code: clientError ? 'BAD_REQUEST' : 'RUNTIME_ERROR',
2063 });
2064 } finally {
2065 if (tempDir && fs.existsSync(tempDir)) {
2066 try {
2067 fs.rmSync(tempDir, { recursive: true, force: true });
2068 } catch (_) {}
2069 }
2070 }
2071 });
2072
2073 // ——— Index + Search (hosted: indexer runs in bridge, canister does not run Node) ———
2074 // BATCH_EMBED + INDEXER_EMBED_CONCURRENCY together drive how much wall time the index
2075 // step takes against Netlify's 60 s sync-function cap. With DeepInfra (BAAI/bge-large-en-v1.5)
2076 // per-batch latencies trending 2.5 – 8.5 s, the previous serial loop at BATCH=10 ran ~65 – 80 s
2077 // for a 251-chunk vault and got killed. Defaults below (BATCH=50, CONCURRENCY=5) bring that
2078 // same vault under ~10 – 15 s when a full re-embed is needed; the content-hash cache below
2079 // makes subsequent re-indexes a few seconds regardless of vault size.
2080 const BATCH_EMBED_DEFAULT = parseEmbedBatchSize(process.env.INDEXER_EMBED_BATCH_SIZE);
2081 const EMBED_CONCURRENCY_DEFAULT = parseEmbedConcurrency(process.env.INDEXER_EMBED_CONCURRENCY);
2082 const BATCH_UPSERT = 50;
2083 const SYNC_BUDGET_SECONDS = parseSyncBudgetSeconds(process.env.INDEXER_SYNC_BUDGET_SECONDS);
2084 const MAX_SYNC_CHUNKS = parseMaxSyncChunks(process.env.INDEXER_MAX_SYNC_CHUNKS);
2085
2086 /**
2087 * Kick off the `bridge-index-background` Netlify Function. Used by the
2088 * synchronous `POST /api/v1/index` handler when the preflight estimate exceeds
2089 * the sync budget (`SYNC_BUDGET_SECONDS`) or the chunk-count safety net
2090 * (`MAX_SYNC_CHUNKS`). The background function returns 202 instantly and runs
2091 * up to 15 min in a separate Lambda; this fetch only waits for that 202 so the
2092 * sync handler can return its own 202 to the browser without blocking on the
2093 * actual embed work.
2094 *
2095 * Two layers of auth on the inbound side (see `lib/bridge-internal-hmac.mjs`):
2096 * 1. The user JWT is forwarded verbatim so the background function still
2097 * runs `requireBridgeAuth` and the user must be a real authenticated user.
2098 * 2. An HMAC signature over (canisterUid, vaultId, jobId, ts) signed with
2099 * `SESSION_SECRET` proves the request originated from this sync handler
2100 * (the background URL is publicly addressable on Netlify).
2101 */
2102 async function kickOffBackgroundIndex(req, jobId, canisterUid, vaultId) {
2103 if (!SESSION_SECRET) {
2104 throw new Error(
2105 'kickOffBackgroundIndex: SESSION_SECRET is not set; cannot sign internal request',
2106 );
2107 }
2108 const ts = Date.now();
2109 const sig = signInternalRequest(SESSION_SECRET, { canisterUid, vaultId, jobId, ts });
2110 const protocol =
2111 req.protocol ||
2112 (req.headers['x-forwarded-proto'] && String(req.headers['x-forwarded-proto']).split(',')[0]) ||
2113 'https';
2114 const host = (req.get && req.get('host')) || req.headers.host;
2115 if (!host) {
2116 throw new Error('kickOffBackgroundIndex: cannot determine host header for background URL');
2117 }
2118 const url = `${protocol}://${host}/.netlify/functions/bridge-index-background`;
2119 const auth = req.headers.authorization || '';
2120 // Background functions on Netlify return 202 within ~50–100 ms regardless of
2121 // what the function body does; we only await that 202 so the sync handler
2122 // can immediately return its own 202 to the browser.
2123 //
2124 // CRITICAL (May 2026 hotfix): we MUST inspect `response.status`. fetch()
2125 // resolves successfully on 4xx/5xx responses (it only throws on network
2126 // errors), so without this check a non-202 response (function not deployed,
2127 // wrong host header, HMAC misconfiguration, future routing bug, etc.) would
2128 // be silently treated as success — the sync handler would return
2129 // `202 status:"background"` to the browser while no work runs in the
2130 // background. The job lock would then sit for its full 16-min TTL blocking
2131 // any retry. See `lib/bridge-index-kickoff-response.mjs` for full context
2132 // and the failure-mode test matrix in
2133 // `test/bridge-index-kickoff-response.test.mjs`.
2134 const response = await fetch(url, {
2135 method: 'POST',
2136 headers: {
2137 'content-type': 'application/json',
2138 authorization: auth,
2139 'x-vault-id': vaultId,
2140 'x-bridge-internal-uid': canisterUid,
2141 'x-bridge-internal-vault-id': vaultId,
2142 'x-bridge-internal-job-id': jobId,
2143 'x-bridge-internal-ts': String(ts),
2144 'x-bridge-internal-sig': sig,
2145 },
2146 body: '{}',
2147 });
2148 let body = '';
2149 try {
2150 body = await response.text();
2151 } catch (_) {
2152 // body read failure is non-fatal here — the helper accepts undefined body
2153 // and the status code alone is sufficient to detect the failure mode.
2154 }
2155 assertBackgroundKickoffOk(response, body);
2156 }
2157
2158 /**
2159 * Read-only snapshot of "is the index for this vault currently being rebuilt
2160 * in the background, and when did it last finish successfully?". The Hub UI
2161 * polls this on page load to render `Last indexed: 2 minutes ago` next to the
2162 * Re-index button and to disable the button while a background job is live.
2163 *
2164 * Same auth + scope as `POST /api/v1/index`: the user must be authenticated
2165 * AND have the vault in their effective hosted-bridge context.
2166 */
2167 app.get('/api/v1/index/status', requireBridgeAuth, async (req, res) => {
2168 const hctx = await resolveHostedBridgeContext(req, req.uid);
2169 if (!hctx.ok) return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
2170 const canisterUid = hctx.effectiveCanisterUid;
2171 const vaultId = sanitizeVaultId(req.headers['x-vault-id']);
2172 const lastIndexed = req.blobStore
2173 ? await getLastIndexedAt(req.blobStore, { canisterUid, vaultId })
2174 : null;
2175 const jobLock = req.blobStore
2176 ? await peekJobLock(req.blobStore, { canisterUid, vaultId })
2177 : null;
2178 const inProgress =
2179 jobLock != null &&
2180 Number.isFinite(jobLock.expiresAt) &&
2181 jobLock.expiresAt > Date.now();
2182 res.json({
2183 lastIndexed,
2184 inProgress,
2185 job: inProgress ? jobLock : null,
2186 });
2187 });
2188
2189 app.post('/api/v1/index', requireBridgeAuth, requireBridgeEditorOrAdmin, async (req, res) => {
2190 const uid = req.uid;
2191 const earlyVaultId = sanitizeVaultId(req.headers['x-vault-id']);
2192 const timer = createIndexTimer({ vaultId: earlyVaultId, canisterUid: null });
2193 const hctx = await resolveHostedBridgeContext(req, uid);
2194 timer.step('resolve_context', { ok: hctx.ok });
2195 if (!hctx.ok) {
2196 timer.finish({ ok: false, phase: 'resolve_context', status: hctx.status, code: hctx.code });
2197 return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
2198 }
2199 const canisterUid = hctx.effectiveCanisterUid;
2200 const vaultId = sanitizeVaultId(req.headers['x-vault-id']);
2201 let exportRes;
2202 try {
2203 exportRes = await fetch(CANISTER_URL + '/api/v1/export', {
2204 method: 'GET',
2205 headers: canisterHeaders({ 'X-User-Id': canisterUid, 'X-Vault-Id': vaultId }),
2206 });
2207 } catch (e) {
2208 timer.finish({ ok: false, phase: 'canister_export_fetch', error: e?.message || String(e) });
2209 return res.status(502).json({ error: 'Could not reach canister', code: 'BAD_GATEWAY' });
2210 }
2211 if (!exportRes.ok) {
2212 timer.finish({ ok: false, phase: 'canister_export_status', status: exportRes.status });
2213 return res.status(502).json({ error: 'Canister export failed', code: 'BAD_GATEWAY', status: exportRes.status });
2214 }
2215 let vault;
2216 try {
2217 vault = await exportRes.json();
2218 } catch (_) {
2219 timer.finish({ ok: false, phase: 'canister_export_parse' });
2220 return res.status(502).json({ error: 'Invalid canister response', code: 'BAD_GATEWAY' });
2221 }
2222 let notes = vault.notes || [];
2223 timer.step('canister_export', { note_count: notes.length, scoped: Boolean(hctx.scope) });
2224 if (hctx.scope) {
2225 notes = applyScopeFilterToNotes(notes, hctx.scope);
2226 timer.step('scope_filter', { note_count_after: notes.length });
2227 }
2228 try {
2229 if (!globalThis.__knowtation_bridge_embed_logged) {
2230 globalThis.__knowtation_bridge_embed_logged = true;
2231 const c = getBridgeEmbeddingConfig();
2232 const hasOpenAiKey = Boolean(
2233 process.env.OPENAI_API_KEY && String(process.env.OPENAI_API_KEY).trim(),
2234 );
2235 console.log(
2236 '[bridge] embedding (no secrets):',
2237 JSON.stringify({
2238 provider: c.provider,
2239 model: c.model,
2240 ollama_url_set: Boolean(process.env.OLLAMA_URL && String(process.env.OLLAMA_URL).trim()),
2241 openai_key_set: hasOpenAiKey,
2242 }),
2243 );
2244 }
2245 const { chunkNote } = await import('../../lib/chunk.mjs');
2246 const { embedWithUsage, embeddingDimension } = await import('../../lib/embedding.mjs');
2247 const { createVectorStore } = await import('../../lib/vector-store.mjs');
2248 timer.step('import_modules');
2249
2250 const vectorsDir = await getVectorsDirForUser(req, canisterUid);
2251 const storeConfig = getBridgeStoreConfig(canisterUid, vectorsDir);
2252 timer.step('get_vectors_dir');
2253 const chunkOpts = resolveIndexerChunkOptions(process.env, storeConfig.embedding);
2254 const allChunks = [];
2255 for (const n of notes) {
2256 const note = {
2257 body: n.body || '',
2258 path: n.path || 'note.md',
2259 project: undefined,
2260 tags: [],
2261 date: undefined,
2262 };
2263 const chunks = chunkNote(note, chunkOpts);
2264 for (const c of chunks) allChunks.push(c);
2265 }
2266 // Tag every chunk with a versioned content hash + the namespaced store id so the
2267 // sqlite-vec backend's `getChunkHashes(vaultId)` lookup keys line up. The hash prefix
2268 // includes the active provider+model so a same-dimension model swap (e.g. BGE-large 1024
2269 // → BGE-m3 1024) automatically invalidates the cache instead of silently keeping stale
2270 // vectors. See `lib/chunk-content-hash.mjs:computeChunkContentHashTagged`.
2271 const embeddingConfigForHash = storeConfig.embedding;
2272 const chunksWithHash = allChunks.map((chunk) => ({
2273 chunk,
2274 storeId: `${vaultId}::${chunk.id}`,
2275 contentHash: computeChunkContentHashTagged(chunk, embeddingConfigForHash),
2276 }));
2277 timer.step('chunk_notes', { chunk_count: allChunks.length });
2278
2279 const dim = embeddingDimension(storeConfig.embedding);
2280 const store = await createVectorStore(storeConfig);
2281 await store.ensureCollection(dim);
2282 timer.step('ensure_collection', { dim });
2283
2284 // Empty vault: drop everything for this vault and persist (covers note-deletion case).
2285 if (chunksWithHash.length === 0) {
2286 let vectors_deleted = 0;
2287 if (typeof store.deleteByVaultId === 'function') {
2288 vectors_deleted = await store.deleteByVaultId(vaultId);
2289 }
2290 timer.step('delete_old_vectors_empty', { vectors_deleted });
2291 await persistVectorsToBlob(req, canisterUid, vectorsDir);
2292 timer.step('persist_vectors_empty');
2293 // Sidecar update so the Hub UI's "Last indexed" line stays correct even after
2294 // an all-notes-deleted re-index (notes=0 is a legitimate steady state).
2295 if (req.blobStore) {
2296 try {
2297 await setLastIndexedAt(req.blobStore, {
2298 canisterUid,
2299 vaultId,
2300 actorUid: sanitizeUserId(uid),
2301 notesProcessed: notes.length,
2302 chunksIndexed: 0,
2303 chunksEmbedded: 0,
2304 chunksSkippedCached: 0,
2305 vectorsDeleted: vectors_deleted,
2306 embeddingInputTokens: 0,
2307 durationMs: timer.totalMs(),
2308 mode: req.bridgeInternalRequest != null ? 'background' : 'sync',
2309 provider: storeConfig.embedding?.provider || null,
2310 model: storeConfig.embedding?.model || null,
2311 });
2312 } catch (sidecarErr) {
2313 // Sidecar write failure must not fail the index — UI just falls back to "never indexed".
2314 console.warn('[bridge] setLastIndexedAt failed (empty path):', sidecarErr?.message || sidecarErr);
2315 }
2316 }
2317 // If this is a background-mode invocation, release the lock so subsequent
2318 // re-indexes are not falsely blocked. Use expectedJobId so a stale background
2319 // function (whose lock has since been overwritten) cannot clobber a newer one.
2320 if (req.bridgeInternalRequest != null && req.blobStore) {
2321 try {
2322 await releaseJobLock(req.blobStore, {
2323 canisterUid,
2324 vaultId,
2325 expectedJobId: req.bridgeInternalRequest.jobId,
2326 });
2327 } catch (lockErr) {
2328 console.warn('[bridge] releaseJobLock failed (empty path):', lockErr?.message || lockErr);
2329 }
2330 }
2331 console.log(
2332 '[bridge] index',
2333 JSON.stringify({
2334 vault_id: vaultId,
2335 canister_uid: sanitizeUserId(canisterUid),
2336 notes_processed: notes.length,
2337 chunks_indexed: 0,
2338 vectors_deleted,
2339 chunks_skipped_cached: 0,
2340 }),
2341 );
2342 timer.finish({
2343 ok: true,
2344 notes_processed: notes.length,
2345 chunks_indexed: 0,
2346 vectors_deleted,
2347 chunks_skipped_cached: 0,
2348 });
2349 return res.json({
2350 ok: true,
2351 notesProcessed: notes.length,
2352 chunksIndexed: 0,
2353 embedding_input_tokens: 0,
2354 vectors_deleted,
2355 chunksSkippedCached: 0,
2356 });
2357 }
2358
2359 // Content-hash cache lookup. If the store doesn't expose getChunkHashes (older
2360 // backend or test mock), treat every chunk as cache miss — correct, just slower.
2361 let existingHashes = new Map();
2362 if (typeof store.getChunkHashes === 'function') {
2363 try {
2364 existingHashes = await store.getChunkHashes(vaultId);
2365 } catch (e) {
2366 console.warn(
2367 '[bridge] getChunkHashes failed; falling back to full re-embed for this vault:',
2368 e?.message || e,
2369 );
2370 existingHashes = new Map();
2371 }
2372 }
2373 const partitioned = partitionChunksForReindex(chunksWithHash, existingHashes);
2374 const toEmbed = partitioned.toEmbed;
2375 const chunks_skipped_cached = partitioned.skippedCachedCount;
2376 timer.step('cache_lookup', {
2377 cache_size: existingHashes.size,
2378 chunks_total: chunksWithHash.length,
2379 chunks_skipped_cached,
2380 chunks_to_embed: toEmbed.length,
2381 orphan_count: partitioned.orphanIds.length,
2382 });
2383
2384 // —— Auto-routing: sync vs background ——
2385 // The bridge runs as a Netlify synchronous function (60 s platform max). After the
2386 // OpenAI(1536)→DeepInfra(1024) switch, a 251-chunk full re-embed costs ~10–15 s
2387 // and a 1 500-chunk re-embed pushes past 30 s. To keep the snappy UX for the 99 %
2388 // case (small delta or cache-hit) AND eliminate timeout risk for the 1 % case
2389 // (first-time index, dim migration, big import), we estimate the embed wall-clock
2390 // here and either (a) continue inline OR (b) hand the work to the
2391 // `bridge-index-background` Netlify Function (15-min cap).
2392 //
2393 // The background path itself re-enters this same handler via
2394 // `req.bridgeInternalRequest` (set by the wrapper after HMAC verification); when
2395 // that's truthy we SKIP the routing decision and execute inline regardless of size.
2396 const isInternalBackgroundRequest = req.bridgeInternalRequest != null;
2397 const embeddingConfig = storeConfig.embedding;
2398 const BATCH_EMBED = BATCH_EMBED_DEFAULT;
2399 const EMBED_CONCURRENCY = EMBED_CONCURRENCY_DEFAULT;
2400 if (!isInternalBackgroundRequest && toEmbed.length > 0) {
2401 const estimatedSeconds = estimateEmbedSeconds({
2402 chunksToEmbed: toEmbed.length,
2403 batchSize: BATCH_EMBED,
2404 concurrency: EMBED_CONCURRENCY,
2405 });
2406 // `isFirstIndex` covers BOTH a true first-time index (no prior cache rows) AND
2407 // the post-dim-migration state where `ensureCollection` just dropped + recreated
2408 // the table (so `getChunkHashes` returned empty). Both require a full re-embed
2409 // and both should route to background regardless of estimate.
2410 const isFirstIndex = existingHashes.size === 0;
2411 const decision = shouldUseBackgroundIndex({
2412 chunksToEmbed: toEmbed.length,
2413 estimatedSeconds,
2414 syncBudgetSeconds: SYNC_BUDGET_SECONDS,
2415 maxSyncChunks: MAX_SYNC_CHUNKS,
2416 isFirstIndex,
2417 });
2418 timer.step('routing_decision', {
2419 chunks_to_embed: toEmbed.length,
2420 estimated_seconds: estimatedSeconds,
2421 is_first_index: isFirstIndex,
2422 sync_budget_seconds: SYNC_BUDGET_SECONDS,
2423 max_sync_chunks: MAX_SYNC_CHUNKS,
2424 decision: decision.shouldUseBackground ? 'background' : 'sync',
2425 reason: decision.reason,
2426 });
2427 if (decision.shouldUseBackground) {
2428 if (!req.blobStore) {
2429 // No Blob store available (local self-host without Netlify Blobs): we cannot
2430 // safely run the background path because lock + sidecar persistence would be
2431 // lost. Fall through to sync — local self-host is single-tenant and operators
2432 // can tolerate a longer wait.
2433 timer.step('routing_fallback_no_blobstore');
2434 } else {
2435 const lockResult = await acquireJobLock(req.blobStore, {
2436 canisterUid,
2437 vaultId,
2438 actorUid: sanitizeUserId(uid),
2439 chunksToEmbed: toEmbed.length,
2440 estimatedSeconds,
2441 reason: decision.reason,
2442 });
2443 if (!lockResult.acquired) {
2444 timer.finish({
2445 ok: true,
2446 phase: 'background_already_running',
2447 existing_job_id: lockResult.existing?.jobId || null,
2448 });
2449 return res.status(409).json({
2450 status: 'already_running',
2451 message:
2452 'A background re-index is already running for this vault. Refresh in a minute.',
2453 jobId: lockResult.existing?.jobId || null,
2454 startedAt: lockResult.existing?.startedAt || null,
2455 });
2456 }
2457 try {
2458 await kickOffBackgroundIndex(req, lockResult.jobId, canisterUid, vaultId);
2459 } catch (kickoffErr) {
2460 // Kickoff failed (network blip, missing SESSION_SECRET, etc.) — release the
2461 // lock so the user can retry, and surface the error.
2462 await releaseJobLock(req.blobStore, {
2463 canisterUid,
2464 vaultId,
2465 expectedJobId: lockResult.jobId,
2466 });
2467 timer.finish({
2468 ok: false,
2469 phase: 'background_kickoff',
2470 error: kickoffErr?.message || String(kickoffErr),
2471 });
2472 return res.status(502).json({
2473 error: 'Could not start background re-index',
2474 code: 'BACKGROUND_KICKOFF_FAILED',
2475 message: kickoffErr?.message || String(kickoffErr),
2476 });
2477 }
2478 timer.finish({
2479 ok: true,
2480 phase: 'background_started',
2481 job_id: lockResult.jobId,
2482 chunks_to_embed: toEmbed.length,
2483 estimated_seconds: estimatedSeconds,
2484 reason: decision.reason,
2485 });
2486 return res.status(202).json({
2487 status: 'background',
2488 jobId: lockResult.jobId,
2489 message:
2490 'Large re-index started in the background. Refresh in 1–2 minutes — search will use the new vectors as soon as the job finishes.',
2491 estimatedSeconds,
2492 chunksToEmbed: toEmbed.length,
2493 reason: decision.reason,
2494 });
2495 }
2496 }
2497 }
2498 // —— end auto-routing ——
2499
2500 const embedBatches = [];
2501 for (let i = 0; i < toEmbed.length; i += BATCH_EMBED) {
2502 embedBatches.push(toEmbed.slice(i, i + BATCH_EMBED));
2503 }
2504 let embedding_input_tokens = 0;
2505 let embed_total_ms = 0;
2506 let embed_max_batch_ms = 0;
2507 let embed_min_batch_ms = embedBatches.length > 0 ? Number.POSITIVE_INFINITY : 0;
2508 const embedBatchCount = embedBatches.length;
2509 // Result vectors keyed by toEmbed index (preserves order so the upsert step can
2510 // zip vectors[i] back to toEmbed[i].chunk without depending on completion order).
2511 const embedResults = await runWithConcurrency(
2512 embedBatches.map((batch, batchIndex) => async () => {
2513 const texts = batch.map((item) => item.chunk.text);
2514 const { vectors: batchVectors, embedding_input_tokens: batchTok } = await embedWithUsage(
2515 texts,
2516 embeddingConfig,
2517 { voyageInputType: 'document' },
2518 );
2519 return { batchIndex, batchVectors, batchTok };
2520 }),
2521 {
2522 concurrency: EMBED_CONCURRENCY,
2523 onSettled: ({ index, ok, ms, error }) => {
2524 if (!ok) {
2525 timer.step('embed_batch_error', {
2526 batch_index: index,
2527 embed_ms: ms,
2528 error: error?.message || String(error),
2529 });
2530 return;
2531 }
2532 embed_total_ms += ms;
2533 if (ms > embed_max_batch_ms) embed_max_batch_ms = ms;
2534 if (ms < embed_min_batch_ms) embed_min_batch_ms = ms;
2535 timer.step('embed_batch', {
2536 batch_index: index,
2537 batch_size: embedBatches[index].length,
2538 embed_ms: ms,
2539 });
2540 },
2541 },
2542 );
2543 const vectorsByEmbedIndex = new Array(toEmbed.length);
2544 for (const { batchIndex, batchVectors, batchTok } of embedResults) {
2545 embedding_input_tokens += batchTok;
2546 const start = batchIndex * BATCH_EMBED;
2547 const batch = embedBatches[batchIndex];
2548 for (let j = 0; j < batch.length; j++) {
2549 vectorsByEmbedIndex[start + j] = batchVectors[j] || [];
2550 }
2551 }
2552 timer.step('embed_total', {
2553 batches: embedBatchCount,
2554 embed_total_ms,
2555 embed_avg_batch_ms: embedBatchCount > 0 ? Math.round(embed_total_ms / embedBatchCount) : 0,
2556 embed_min_batch_ms: embed_min_batch_ms === Number.POSITIVE_INFINITY ? 0 : embed_min_batch_ms,
2557 embed_max_batch_ms,
2558 embedding_input_tokens,
2559 concurrency: EMBED_CONCURRENCY,
2560 batch_size: BATCH_EMBED,
2561 provider: embeddingConfig?.provider || null,
2562 model: embeddingConfig?.model || null,
2563 });
2564
2565 // Orphans = chunk_ids in the store but not in the current export (deleted/renamed notes).
2566 let vectors_deleted = 0;
2567 if (partitioned.orphanIds.length > 0 && typeof store.deleteByChunkIds === 'function') {
2568 vectors_deleted = await store.deleteByChunkIds(partitioned.orphanIds);
2569 } else if (
2570 partitioned.orphanIds.length === 0 &&
2571 existingHashes.size === 0 &&
2572 typeof store.deleteByVaultId === 'function'
2573 ) {
2574 // First run for this vault (no prior cache rows) — clear any leftover rows that
2575 // lacked content_hash but might still match the vault, so search cannot return paths
2576 // that no longer exist in the export.
2577 vectors_deleted = await store.deleteByVaultId(vaultId);
2578 }
2579 timer.step('delete_old_vectors', {
2580 vectors_deleted,
2581 orphan_count: partitioned.orphanIds.length,
2582 });
2583
2584 let upsert_total_ms = 0;
2585 const upsertBatchCount = Math.ceil(toEmbed.length / BATCH_UPSERT);
2586 for (let i = 0; i < toEmbed.length; i += BATCH_UPSERT) {
2587 const slice = toEmbed.slice(i, i + BATCH_UPSERT);
2588 const points = slice.map((item, j) => ({
2589 id: item.storeId,
2590 vector: vectorsByEmbedIndex[i + j] || [],
2591 text: item.chunk.text,
2592 path: item.chunk.path,
2593 vault_id: vaultId,
2594 project: item.chunk.project,
2595 tags: item.chunk.tags,
2596 date: item.chunk.date,
2597 causal_chain_id: item.chunk.causal_chain_id,
2598 entity: item.chunk.entity,
2599 episode_id: item.chunk.episode_id,
2600 content_hash: item.contentHash,
2601 }));
2602 const upsertStart = Date.now();
2603 await store.upsert(points);
2604 upsert_total_ms += Date.now() - upsertStart;
2605 }
2606 timer.step('upsert_total', {
2607 batches: upsertBatchCount,
2608 upsert_total_ms,
2609 upsert_avg_batch_ms: upsertBatchCount > 0 ? Math.round(upsert_total_ms / upsertBatchCount) : 0,
2610 points_upserted: toEmbed.length,
2611 });
2612 await persistVectorsToBlob(req, canisterUid, vectorsDir);
2613 timer.step('persist_vectors');
2614 // Sidecar update so the Hub UI's "Last indexed" line is correct after BOTH
2615 // the synchronous and the background path. The same record format is read
2616 // by `GET /api/v1/index/status` and rendered next to the Re-index button.
2617 if (req.blobStore) {
2618 try {
2619 await setLastIndexedAt(req.blobStore, {
2620 canisterUid,
2621 vaultId,
2622 actorUid: sanitizeUserId(uid),
2623 notesProcessed: notes.length,
2624 chunksIndexed: allChunks.length,
2625 chunksEmbedded: toEmbed.length,
2626 chunksSkippedCached: chunks_skipped_cached,
2627 vectorsDeleted: vectors_deleted,
2628 embeddingInputTokens: embedding_input_tokens,
2629 durationMs: timer.totalMs(),
2630 mode: req.bridgeInternalRequest != null ? 'background' : 'sync',
2631 provider: embeddingConfig?.provider || null,
2632 model: embeddingConfig?.model || null,
2633 });
2634 } catch (sidecarErr) {
2635 // Sidecar write failure must not fail the index — UI just falls back to "never indexed".
2636 console.warn('[bridge] setLastIndexedAt failed:', sidecarErr?.message || sidecarErr);
2637 }
2638 }
2639 // Background path: release the job lock so a future re-index is not falsely blocked.
2640 // `expectedJobId` ensures we only release OUR lock — if a stale background job
2641 // finishes after a fresh background job has already acquired a new lock (rare,
2642 // but possible if the first job exceeded the lock TTL), we leave the new lock alone.
2643 if (req.bridgeInternalRequest != null && req.blobStore) {
2644 try {
2645 await releaseJobLock(req.blobStore, {
2646 canisterUid,
2647 vaultId,
2648 expectedJobId: req.bridgeInternalRequest.jobId,
2649 });
2650 } catch (lockErr) {
2651 console.warn('[bridge] releaseJobLock failed:', lockErr?.message || lockErr);
2652 }
2653 }
2654 console.log(
2655 '[bridge] index',
2656 JSON.stringify({
2657 vault_id: vaultId,
2658 canister_uid: sanitizeUserId(canisterUid),
2659 notes_processed: notes.length,
2660 chunks_indexed: allChunks.length,
2661 chunks_skipped_cached,
2662 chunks_embedded: toEmbed.length,
2663 vectors_deleted,
2664 mode: req.bridgeInternalRequest != null ? 'background' : 'sync',
2665 }),
2666 );
2667 const indexResult = {
2668 ok: true,
2669 notesProcessed: notes.length,
2670 chunksIndexed: allChunks.length,
2671 chunksSkippedCached: chunks_skipped_cached,
2672 chunksEmbedded: toEmbed.length,
2673 embedding_input_tokens,
2674 vectors_deleted,
2675 };
2676 timer.finish({
2677 ok: true,
2678 notes_processed: notes.length,
2679 chunks_indexed: allChunks.length,
2680 chunks_skipped_cached,
2681 chunks_embedded: toEmbed.length,
2682 vectors_deleted,
2683 embedding_input_tokens,
2684 mode: req.bridgeInternalRequest != null ? 'background' : 'sync',
2685 });
2686 res.json(indexResult);
2687 fireBridgeCaptureEvent(
2688 'index',
2689 {
2690 note_count: notes.length,
2691 chunk_count: allChunks.length,
2692 chunks_skipped_cached,
2693 chunks_embedded: toEmbed.length,
2694 vectors_deleted,
2695 },
2696 sanitizeUserId(uid),
2697 vaultId,
2698 );
2699 return;
2700 } catch (e) {
2701 console.error('Bridge index error:', e);
2702 timer.finish({ ok: false, phase: 'catch', error: e?.message || String(e) });
2703 // Background path: release the lock on error so the operator can retry without
2704 // waiting for the 16-min TTL. We do this defensively regardless of whether the
2705 // error happened before or after the lock was acquired.
2706 if (req.bridgeInternalRequest != null && req.blobStore) {
2707 try {
2708 await releaseJobLock(req.blobStore, {
2709 canisterUid: req.bridgeInternalRequest.canisterUid,
2710 vaultId: req.bridgeInternalRequest.vaultId,
2711 expectedJobId: req.bridgeInternalRequest.jobId,
2712 });
2713 } catch (lockErr) {
2714 console.warn('[bridge] releaseJobLock failed (catch path):', lockErr?.message || lockErr);
2715 }
2716 }
2717 return res.status(500).json({
2718 error: 'Index failed',
2719 code: 'INTERNAL_ERROR',
2720 message: bridgeEmbedFailureMessage(e, 'index'),
2721 });
2722 }
2723 });
2724
2725 function truncateSnippet(text, maxChars = 300) {
2726 if (text == null || typeof text !== 'string') return '';
2727 const t = text.trim();
2728 if (t.length <= maxChars) return t;
2729 const slice = t.slice(0, maxChars);
2730 const lastSpace = slice.lastIndexOf(' ');
2731 return (lastSpace > maxChars / 2 ? slice.slice(0, lastSpace) : slice) + '…';
2732 }
2733
2734 /**
2735 * Batch document embeddings for hosted MCP `cluster` (and similar callers).
2736 * Auth + vault access mirror `POST /api/v1/search`: JWT in `Authorization`, `X-Vault-Id`,
2737 * `resolveHostedBridgeContext` (effective canister user + allowed vault ids + optional scope).
2738 * Embedding model/env match `POST /api/v1/index` via `getBridgeStoreConfig` + `embedWithUsage` with `voyageInputType: "document"`.
2739 */
2740 const HOSTED_EMBED_MAX_TEXTS = 200;
2741 const HOSTED_EMBED_MAX_CHARS_PER_TEXT = 1200;
2742
2743 app.post('/api/v1/embed', async (req, res) => {
2744 const auth = req.headers.authorization;
2745 const token = auth && auth.startsWith('Bearer ') ? auth.slice(7) : null;
2746 const uid = token ? userIdFromJwt(token) : null;
2747 if (!uid) {
2748 return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
2749 }
2750 const hctx = await resolveHostedBridgeContext(req, uid);
2751 if (!hctx.ok) {
2752 return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
2753 }
2754 const canisterUid = hctx.effectiveCanisterUid;
2755 const rawTexts = req.body?.texts;
2756 if (!Array.isArray(rawTexts)) {
2757 return res.status(400).json({ error: 'texts array required', code: 'BAD_REQUEST' });
2758 }
2759 const texts = rawTexts
2760 .slice(0, HOSTED_EMBED_MAX_TEXTS)
2761 .map((t) => String(t ?? '').slice(0, HOSTED_EMBED_MAX_CHARS_PER_TEXT));
2762 if (texts.length === 0) {
2763 return res.status(400).json({ error: 'texts must be non-empty', code: 'BAD_REQUEST' });
2764 }
2765 try {
2766 const { embedWithUsage } = await import('../../lib/embedding.mjs');
2767 const vectorsDir = await getVectorsDirForUser(req, canisterUid);
2768 const storeConfig = getBridgeStoreConfig(canisterUid, vectorsDir);
2769 const embeddingConfig = storeConfig.embedding;
2770 let embedding_input_tokens = 0;
2771 const vectors = [];
2772 for (let i = 0; i < texts.length; i += BATCH_EMBED) {
2773 const batch = texts.slice(i, i + BATCH_EMBED);
2774 const { vectors: batchVectors, embedding_input_tokens: batchTok } = await embedWithUsage(
2775 batch,
2776 embeddingConfig,
2777 { voyageInputType: 'document' },
2778 );
2779 embedding_input_tokens += batchTok;
2780 for (const v of batchVectors) {
2781 vectors.push(v);
2782 }
2783 }
2784 return res.json({
2785 vectors,
2786 embedding_input_tokens,
2787 texts_count: texts.length,
2788 });
2789 } catch (e) {
2790 console.error('Bridge embed batch error:', e);
2791 return res.status(500).json({
2792 error: 'Embed failed',
2793 code: 'INTERNAL_ERROR',
2794 message: bridgeEmbedFailureMessage(e, 'embed'),
2795 });
2796 }
2797 });
2798
2799 app.post('/api/v1/search', async (req, res) => {
2800 const auth = req.headers.authorization;
2801 const token = auth && auth.startsWith('Bearer ') ? auth.slice(7) : null;
2802 const uid = token ? userIdFromJwt(token) : null;
2803 if (!uid) {
2804 return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
2805 }
2806 const hctx = await resolveHostedBridgeContext(req, uid);
2807 if (!hctx.ok) {
2808 return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
2809 }
2810 const canisterUid = hctx.effectiveCanisterUid;
2811 const query = req.body?.query;
2812 // Auto-capture after successful response — fire-and-forget, does not affect latency.
2813 const _captureVaultId = sanitizeVaultId(req.headers['x-vault-id']);
2814 const _captureMode = req.body?.mode === 'keyword' ? 'keyword' : 'semantic';
2815 res.on('finish', () => {
2816 if (res.statusCode >= 200 && res.statusCode < 300 && query) {
2817 fireBridgeCaptureEvent('search', { query, mode: _captureMode }, sanitizeUserId(uid), _captureVaultId);
2818 }
2819 });
2820 if (!query || typeof query !== 'string') {
2821 return res.status(400).json({ error: 'query required', code: 'BAD_REQUEST' });
2822 }
2823 const limit = Math.max(1, Math.min(parseInt(req.body?.limit, 10) || 20, 100));
2824 const snippetChars = parseInt(req.body?.snippetChars, 10) || 300;
2825 try {
2826 const mode = req.body?.mode === 'keyword' ? 'keyword' : 'semantic';
2827 const bridgeVaultId = sanitizeVaultId(req.headers['x-vault-id']);
2828
2829 if (mode === 'keyword') {
2830 let exportRes;
2831 try {
2832 exportRes = await fetch(CANISTER_URL + '/api/v1/export', {
2833 method: 'GET',
2834 headers: canisterHeaders({ 'X-User-Id': canisterUid, 'X-Vault-Id': bridgeVaultId }),
2835 });
2836 } catch (_e) {
2837 return res.status(502).json({ error: 'Could not reach canister', code: 'BAD_GATEWAY' });
2838 }
2839 if (!exportRes.ok) {
2840 return res.status(502).json({ error: 'Canister export failed', code: 'BAD_GATEWAY', status: exportRes.status });
2841 }
2842 let vault;
2843 try {
2844 vault = await exportRes.json();
2845 } catch (_e) {
2846 return res.status(502).json({ error: 'Invalid canister response', code: 'BAD_GATEWAY' });
2847 }
2848 let rawNotes = vault.notes || [];
2849 if (hctx.scope) {
2850 rawNotes = applyScopeFilterToNotes(rawNotes, hctx.scope);
2851 }
2852 const { noteRecordFromExportPayload, keywordSearchNotesArray } = await import('../../lib/keyword-search.mjs');
2853 const { filterNotesByListOptions } = await import('../../lib/list-notes.mjs');
2854 let shaped = rawNotes.map((n) => noteRecordFromExportPayload(n));
2855 shaped = filterNotesByListOptions(shaped, {
2856 folder: req.body?.folder,
2857 project: req.body?.project,
2858 tag: req.body?.tag,
2859 since: req.body?.since,
2860 until: req.body?.until,
2861 chain: req.body?.chain,
2862 entity: req.body?.entity,
2863 episode: req.body?.episode,
2864 content_scope: req.body?.content_scope,
2865 });
2866 const fields =
2867 req.body?.fields === 'path' || req.body?.fields === 'full' ? req.body.fields : 'path+snippet';
2868 const out = keywordSearchNotesArray(shaped, query, {
2869 limit,
2870 order: req.body?.order,
2871 fields,
2872 snippetChars,
2873 match: req.body?.match === 'all_terms' ? 'all_terms' : 'phrase',
2874 countOnly: req.body?.count_only === true || req.body?.countOnly === true,
2875 });
2876 if (out.results && hctx.scope) {
2877 return res.json({ ...out, results: applyScopeFilterToNotes(out.results, hctx.scope) });
2878 }
2879 return res.json(out);
2880 }
2881
2882 const { embed } = await import('../../lib/embedding.mjs');
2883 const { createVectorStore } = await import('../../lib/vector-store.mjs');
2884 const { filterHitsByContentScope, resolveSearchFolderForContentScope } = await import('../../lib/approval-log.mjs');
2885 const { MAX_VECTOR_KNN } = await import('../../lib/vector-knn-limit.mjs');
2886
2887 const vectorsDir = await getVectorsDirForUser(req, canisterUid);
2888 const storeConfig = getBridgeStoreConfig(canisterUid, vectorsDir);
2889 const store = await createVectorStore(storeConfig);
2890 const [queryVector] = await embed([query], storeConfig.embedding, { voyageInputType: 'query' });
2891 if (!queryVector) {
2892 return res.status(500).json({ error: 'Embedding failed', code: 'INTERNAL_ERROR' });
2893 }
2894 const cs = req.body?.content_scope || 'all';
2895 const userFolder = req.body?.folder;
2896 const resolved = resolveSearchFolderForContentScope(cs, userFolder);
2897 if (resolved.impossible) {
2898 return res.json({ results: [], query, mode: 'semantic' });
2899 }
2900 let searchLimit = limit;
2901 if (resolved.wideNotesFetch) {
2902 searchLimit = Math.min(10000, Math.max(limit * 120, 2500));
2903 } else if (cs !== 'all') {
2904 searchLimit = Math.min(10000, Math.max(limit * 40, 800));
2905 }
2906 searchLimit = Math.min(searchLimit, MAX_VECTOR_KNN);
2907 const hits = await store.search(queryVector, {
2908 limit: searchLimit,
2909 vault_id: bridgeVaultId,
2910 project: req.body?.project,
2911 tag: req.body?.tag,
2912 folder: resolved.folder,
2913 since: req.body?.since,
2914 until: req.body?.until,
2915 order: req.body?.order,
2916 chain: req.body?.chain,
2917 entity: req.body?.entity,
2918 episode: req.body?.episode,
2919 });
2920 let scopedHits = filterHitsByContentScope(hits || [], cs);
2921 scopedHits = scopedHits.slice(0, limit);
2922 let results = scopedHits.map((h) => ({
2923 path: h.path,
2924 score: h.score,
2925 ...(typeof h.vec_distance === 'number' && Number.isFinite(h.vec_distance)
2926 ? { vec_distance: h.vec_distance }
2927 : {}),
2928 project: h.project ?? null,
2929 tags: h.tags ?? [],
2930 snippet: truncateSnippet(h.text, snippetChars),
2931 }));
2932 if (hctx.scope) {
2933 results = applyScopeFilterToNotes(results, hctx.scope);
2934 }
2935 return res.json({ results, query, mode: 'semantic' });
2936 } catch (e) {
2937 console.error('Bridge search error:', e);
2938 return res.status(500).json({
2939 error: 'Search failed',
2940 code: 'INTERNAL_ERROR',
2941 message: bridgeEmbedFailureMessage(e, 'search'),
2942 });
2943 }
2944 });
2945
2946 app.use((err, req, res, _next) => {
2947 if (res.headersSent) return;
2948 console.error('[bridge] unhandled error:', err?.stack || err?.message || err);
2949 let status = 500;
2950 if (err instanceof multer.MulterError) {
2951 if (err.code === 'LIMIT_FILE_SIZE') status = 413;
2952 else status = 400;
2953 } else if (typeof err.status === 'number' && err.status >= 400 && err.status < 600) {
2954 status = err.status;
2955 } else if (typeof err.statusCode === 'number' && err.statusCode >= 400 && err.statusCode < 600) {
2956 status = err.statusCode;
2957 }
2958 res.status(status).json({
2959 error: err.message || 'Internal error',
2960 code: err.code || 'INTERNAL_ERROR',
2961 });
2962 });
2963
2964 // ——— Memory endpoints (Phase 8) ———
2965
2966 /**
2967 * Fire-and-forget memory event capture for hosted bridge endpoints.
2968 * Uses Netlify Blobs when available (hosted), falls back to file-based for self-hosted.
2969 * Never throws, never delays the response.
2970 */
2971 function fireBridgeCaptureEvent(type, data, uid, vaultId) {
2972 (async () => {
2973 try {
2974 if (globalThis.__netlify_blob_store) {
2975 // Hosted: append to Netlify Blobs for durability across Lambda invocations.
2976 const { createMemoryEvent, MEMORY_EVENT_TYPES } = await import('../../lib/memory-event.mjs');
2977 if (!MEMORY_EVENT_TYPES.includes(type)) return;
2978 const event = createMemoryEvent(type, data, { vaultId: vaultId || 'default' });
2979 await blobsAppendMemoryEvent(uid, vaultId, event);
2980 } else {
2981 // Self-hosted: file-based.
2982 const { FileMemoryProvider } = await import('../../lib/memory-provider-file.mjs');
2983 const { MemoryManager } = await import('../../lib/memory.mjs');
2984 const mm = new MemoryManager(new FileMemoryProvider(bridgeMemoryDir(uid, vaultId || 'default')));
2985 if (mm.shouldCapture(type)) mm.store(type, data);
2986 }
2987 } catch (_) {}
2988 })();
2989 }
2990
2991 function bridgeMemoryAuth(req) {
2992 const auth = req.headers.authorization;
2993 const token = auth && auth.startsWith('Bearer ') ? auth.slice(7) : null;
2994 const uid = token ? userIdFromJwt(token) : null;
2995 const vaultId = sanitizeVaultId(req.headers['x-vault-id'] || req.query.vault_id);
2996 const scope = req.query.scope === 'global' ? 'global' : 'vault';
2997 return { uid: uid ? sanitizeUserId(uid) : null, vaultId, scope };
2998 }
2999
3000 function bridgeMemoryDir(uid, vaultId, scope) {
3001 if (scope === 'global') {
3002 return path.join(DATA_DIR, 'memory', uid, '_global');
3003 }
3004 return path.join(DATA_DIR, 'memory', uid, vaultId);
3005 }
3006
3007 app.get('/api/v1/memory/:key', async (req, res) => {
3008 const { uid, vaultId } = bridgeMemoryAuth(req);
3009 if (!uid) return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
3010 try {
3011 const { FileMemoryProvider } = await import('../../lib/memory-provider-file.mjs');
3012 const { MemoryManager } = await import('../../lib/memory.mjs');
3013 const provider = new FileMemoryProvider(bridgeMemoryDir(uid, vaultId));
3014 const mm = new MemoryManager(provider);
3015 const event = mm.getLatest(req.params.key);
3016 if (!event) return res.json({ key: req.params.key, value: null, updated_at: null });
3017 res.json({ key: req.params.key, value: event.data, updated_at: event.ts, id: event.id });
3018 } catch (e) {
3019 res.status(500).json({ error: e.message, code: 'RUNTIME_ERROR' });
3020 }
3021 });
3022
3023 app.post('/api/v1/memory/store', requireBridgeAuth, requireBridgeEditorOrAdmin, express.json(), async (req, res) => {
3024 const { uid, vaultId } = bridgeMemoryAuth(req);
3025 try {
3026 const { FileMemoryProvider } = await import('../../lib/memory-provider-file.mjs');
3027 const { MemoryManager } = await import('../../lib/memory.mjs');
3028 const provider = new FileMemoryProvider(bridgeMemoryDir(uid, vaultId));
3029 const mm = new MemoryManager(provider);
3030 const { key, value, ttl } = req.body || {};
3031 if (!key || !value) return res.status(400).json({ error: 'key and value required', code: 'BAD_REQUEST' });
3032 const data = typeof value === 'object' ? { key, ...value } : { key, text: String(value) };
3033 const result = mm.store('user', data, { vaultId, ttl });
3034 res.json(result);
3035 } catch (e) {
3036 res.status(500).json({ error: e.message, code: 'RUNTIME_ERROR' });
3037 }
3038 });
3039
3040 app.get('/api/v1/memory', async (req, res) => {
3041 const { uid, vaultId } = bridgeMemoryAuth(req);
3042 if (!uid) return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
3043 try {
3044 let events;
3045 if (globalThis.__netlify_blob_store) {
3046 // Hosted: read from Blobs.
3047 events = await blobsGetMemoryEvents(uid, vaultId);
3048 if (req.query.type) events = events.filter((e) => e.type === req.query.type);
3049 if (req.query.since) events = events.filter((e) => e.ts >= req.query.since);
3050 if (req.query.until) events = events.filter((e) => e.ts <= req.query.until);
3051 events.sort((a, b) => (b.ts > a.ts ? 1 : b.ts < a.ts ? -1 : 0));
3052 events = events.slice(0, Math.min(parseInt(req.query.limit) || 20, 100));
3053 } else {
3054 const { FileMemoryProvider } = await import('../../lib/memory-provider-file.mjs');
3055 const { MemoryManager } = await import('../../lib/memory.mjs');
3056 const mm = new MemoryManager(new FileMemoryProvider(bridgeMemoryDir(uid, vaultId)));
3057 events = mm.list({
3058 type: req.query.type || undefined,
3059 since: req.query.since || undefined,
3060 until: req.query.until || undefined,
3061 limit: Math.min(parseInt(req.query.limit) || 20, 100),
3062 });
3063 }
3064 res.json({ events, count: events.length });
3065 } catch (e) {
3066 res.status(500).json({ error: e.message, code: 'RUNTIME_ERROR' });
3067 }
3068 });
3069
3070 app.post('/api/v1/memory/search', express.json(), async (req, res) => {
3071 const { uid, vaultId } = bridgeMemoryAuth(req);
3072 if (!uid) return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
3073 res.json({ results: [], count: 0, note: 'Hosted memory search requires vector provider (future).' });
3074 });
3075
3076 app.delete('/api/v1/memory/clear', requireBridgeAuth, requireBridgeEditorOrAdmin, async (req, res) => {
3077 const { uid, vaultId } = bridgeMemoryAuth(req);
3078 try {
3079 const { FileMemoryProvider } = await import('../../lib/memory-provider-file.mjs');
3080 const { MemoryManager } = await import('../../lib/memory.mjs');
3081 const provider = new FileMemoryProvider(bridgeMemoryDir(uid, vaultId));
3082 const mm = new MemoryManager(provider);
3083 const result = mm.clear({
3084 type: req.query.type || undefined,
3085 before: req.query.before || undefined,
3086 });
3087 res.json(result);
3088 } catch (e) {
3089 res.status(500).json({ error: e.message, code: 'RUNTIME_ERROR' });
3090 }
3091 });
3092
3093 app.get('/api/v1/memory-stats', async (req, res) => {
3094 const { uid, vaultId } = bridgeMemoryAuth(req);
3095 if (!uid) return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
3096 try {
3097 const { FileMemoryProvider } = await import('../../lib/memory-provider-file.mjs');
3098 const { MemoryManager } = await import('../../lib/memory.mjs');
3099 const provider = new FileMemoryProvider(bridgeMemoryDir(uid, vaultId));
3100 const mm = new MemoryManager(provider);
3101 res.json(mm.stats());
3102 } catch (e) {
3103 res.status(500).json({ error: e.message, code: 'RUNTIME_ERROR' });
3104 }
3105 });
3106
3107 // ——— Hosted Consolidation (Phase 10 / Stream 1) ———
3108
3109 // ——— Blobs-backed memory helpers (hosted path) ———
3110
3111 /** Netlify Blobs key for a user's raw memory events. */
3112 function memoryBlobKey(uid, vaultId) {
3113 return `memory/${uid}/${vaultId || 'default'}/events`;
3114 }
3115
3116 /** Load memory events from Netlify Blobs (hosted) or return [] if unavailable. */
3117 async function blobsGetMemoryEvents(uid, vaultId) {
3118 const store = globalThis.__netlify_blob_store;
3119 if (!store) return [];
3120 try {
3121 const raw = await store.get(memoryBlobKey(uid, vaultId), { type: 'text' });
3122 if (!raw) return [];
3123 return JSON.parse(raw) || [];
3124 } catch (_) { return []; }
3125 }
3126
3127 /** Persist memory events to Netlify Blobs, capped at 500 events. */
3128 async function blobsSetMemoryEvents(uid, vaultId, events) {
3129 const store = globalThis.__netlify_blob_store;
3130 if (!store) return;
3131 try {
3132 await store.set(memoryBlobKey(uid, vaultId), JSON.stringify(events.slice(-500)));
3133 } catch (_) {}
3134 }
3135
3136 /** Append a single event to Blobs memory store (read-modify-write). */
3137 async function blobsAppendMemoryEvent(uid, vaultId, event) {
3138 const events = await blobsGetMemoryEvents(uid, vaultId);
3139 events.push(event);
3140 await blobsSetMemoryEvents(uid, vaultId, events);
3141 }
3142
3143 // ——— Consolidation cost tracking ———
3144
3145 function utcDateString() {
3146 return new Date().toISOString().slice(0, 10);
3147 }
3148
3149 function utcMonthString() {
3150 return new Date().toISOString().slice(0, 7);
3151 }
3152
3153 /** Blobs key for per-user consolidation cost record. */
3154 function consolidationCostBlobKey(uid) {
3155 return `memory/${uid}/consolidation-cost`;
3156 }
3157
3158 /** Load consolidation cost record — Blobs on hosted, file on self-hosted. */
3159 async function loadConsolidationCost(uid) {
3160 const store = globalThis.__netlify_blob_store;
3161 if (store) {
3162 try {
3163 const raw = await store.get(consolidationCostBlobKey(uid), { type: 'text' });
3164 if (!raw) return {};
3165 return JSON.parse(raw) || {};
3166 } catch (_) { return {}; }
3167 }
3168 const filePath = path.join(DATA_DIR, 'consolidation', uid + '_cost.json');
3169 try {
3170 const raw = JSON.parse(fs.readFileSync(filePath, 'utf8'));
3171 return raw && typeof raw === 'object' ? raw : {};
3172 } catch (_) { return {}; }
3173 }
3174
3175 /** Persist consolidation cost record — Blobs on hosted, file on self-hosted. */
3176 async function saveConsolidationCost(uid, data) {
3177 const store = globalThis.__netlify_blob_store;
3178 if (store) {
3179 try {
3180 await store.set(consolidationCostBlobKey(uid), JSON.stringify(data));
3181 } catch (_) {}
3182 return;
3183 }
3184 const filePath = path.join(DATA_DIR, 'consolidation', uid + '_cost.json');
3185 const dir = path.dirname(filePath);
3186 if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
3187 fs.writeFileSync(filePath, JSON.stringify(data, null, 2), 'utf8');
3188 }
3189
3190 async function recordConsolidationPass(uid, costUsd) {
3191 const rec = await loadConsolidationCost(uid);
3192 const today = utcDateString();
3193 const month = utcMonthString();
3194 const prevCostDate = rec.cost_date;
3195 const prevMonth = rec.pass_month;
3196 return {
3197 last_pass: new Date().toISOString(),
3198 cost_today_usd: prevCostDate === today ? Number((rec.cost_today_usd || 0) + costUsd) : costUsd,
3199 cost_date: today,
3200 cost_cap_usd: process.env.CONSOLIDATION_COST_CAP_USD ? parseFloat(process.env.CONSOLIDATION_COST_CAP_USD) : null,
3201 pass_count_month: prevMonth === month ? (rec.pass_count_month || 0) + 1 : 1,
3202 pass_month: month,
3203 };
3204 }
3205
3206 /**
3207 * POST /api/v1/memory/consolidate
3208 * Body: { dry_run?, passes?, lookback_hours?, max_events_per_pass?, max_topics_per_pass?, llm?: { max_tokens? } }
3209 * Response: { topics, total_events, verify, discover, cost_usd, pass_id }
3210 */
3211 app.post('/api/v1/memory/consolidate', requireBridgeAuth, requireBridgeEditorOrAdmin, express.json(), async (req, res) => {
3212 const { uid, vaultId } = bridgeMemoryAuth(req);
3213
3214 const llmApiKey = process.env.CONSOLIDATION_LLM_API_KEY || process.env.OPENAI_API_KEY;
3215 if (!llmApiKey) {
3216 return res.status(503).json({
3217 error: 'No LLM API key configured for hosted consolidation (CONSOLIDATION_LLM_API_KEY or OPENAI_API_KEY).',
3218 code: 'LLM_NOT_CONFIGURED',
3219 });
3220 }
3221
3222 const mergedBody =
3223 req.body && typeof req.body === 'object' ? { ...req.body } : {};
3224
3225 const { dry_run, passes } = mergedBody;
3226
3227 // 30-minute server-side cooldown on real (non-dry-run) passes to prevent runaway costs.
3228 // Automated scheduler runs respect their own configured interval; this guards manual triggers.
3229 if (!dry_run) {
3230 try {
3231 const costRec = await loadConsolidationCost(uid);
3232 const lastPassAt = costRec?.last_pass;
3233 if (lastPassAt) {
3234 const elapsedMs = Date.now() - new Date(lastPassAt).getTime();
3235 const cooldownMs = 30 * 60 * 1000;
3236 if (elapsedMs < cooldownMs) {
3237 const waitMin = Math.ceil((cooldownMs - elapsedMs) / 60_000);
3238 return res.status(429).json({
3239 error: `Consolidation available again in ${waitMin} minute${waitMin === 1 ? '' : 's'}.`,
3240 code: 'RATE_LIMITED',
3241 retry_after_minutes: waitMin,
3242 });
3243 }
3244 }
3245 } catch (_) {
3246 // If cost record can't be read, allow the pass through — don't block on a read error.
3247 }
3248 }
3249
3250 try {
3251 const { FileMemoryProvider } = await import('../../lib/memory-provider-file.mjs');
3252 const { MemoryManager } = await import('../../lib/memory.mjs');
3253 const { consolidateMemory } = await import('../../lib/memory-consolidate.mjs');
3254 const { computeCallCost } = await import('../../lib/daemon-cost.mjs');
3255 const { createMemoryEvent } = await import('../../lib/memory-event.mjs');
3256
3257 // Hosted (Blobs): load events into a temp FileMemoryProvider so consolidateMemory
3258 // can read and write to it, then sync remaining events back to Blobs.
3259 // Self-hosted: use the normal file-based memory directory.
3260 let mm;
3261 let tempDir = null;
3262 const isHostedBlobs = Boolean(globalThis.__netlify_blob_store);
3263
3264 if (isHostedBlobs) {
3265 const rawEvents = await blobsGetMemoryEvents(uid, vaultId);
3266 tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'knowtation-mm-'));
3267 if (rawEvents.length > 0) {
3268 fs.writeFileSync(
3269 path.join(tempDir, 'events.jsonl'),
3270 rawEvents.map((e) => JSON.stringify(e)).join('\n') + '\n',
3271 'utf8',
3272 );
3273 }
3274 mm = new MemoryManager(new FileMemoryProvider(tempDir));
3275 } else {
3276 mm = new MemoryManager(new FileMemoryProvider(bridgeMemoryDir(uid, vaultId)));
3277 }
3278
3279 const maxTok =
3280 mergedBody.llm && typeof mergedBody.llm === 'object' && mergedBody.llm.max_tokens != null
3281 ? Math.floor(Number(mergedBody.llm.max_tokens))
3282 : 1024;
3283 const lbH =
3284 mergedBody.lookback_hours != null && Number.isFinite(Number(mergedBody.lookback_hours))
3285 ? Number(mergedBody.lookback_hours)
3286 : 24;
3287 const maxEv =
3288 mergedBody.max_events_per_pass != null && Number.isFinite(Number(mergedBody.max_events_per_pass))
3289 ? Number(mergedBody.max_events_per_pass)
3290 : 200;
3291 const maxTop =
3292 mergedBody.max_topics_per_pass != null && Number.isFinite(Number(mergedBody.max_topics_per_pass))
3293 ? Number(mergedBody.max_topics_per_pass)
3294 : 10;
3295
3296 const consolidationConfig = {
3297 data_dir: isHostedBlobs ? os.tmpdir() : DATA_DIR,
3298 llm: {
3299 provider: 'openai',
3300 api_key: llmApiKey,
3301 model: process.env.CONSOLIDATION_LLM_MODEL || 'gpt-4o-mini',
3302 },
3303 daemon: {
3304 lookback_hours: lbH,
3305 max_events_per_pass: maxEv,
3306 max_topics_per_pass: maxTop,
3307 llm: { max_tokens: Number.isFinite(maxTok) ? maxTok : 1024 },
3308 },
3309 memory: {
3310 provider: 'file',
3311 encrypt: process.env.CONSOLIDATION_MEMORY_ENCRYPT === 'true',
3312 },
3313 };
3314
3315 // Track LLM call cost via a wrapping llmFn.
3316 let totalCostUsd = 0;
3317 const { completeChat } = await import('../../lib/llm-complete.mjs');
3318 const trackingLlmFn = async (cfg, callOpts) => {
3319 const rawResponse = await completeChat(consolidationConfig, callOpts);
3320 totalCostUsd += computeCallCost(callOpts, rawResponse);
3321 return rawResponse;
3322 };
3323
3324 const result = await consolidateMemory(consolidationConfig, {
3325 mm,
3326 dryRun: Boolean(dry_run),
3327 passes: passes ?? undefined,
3328 llmFn: dry_run ? undefined : trackingLlmFn,
3329 });
3330
3331 const pass_id = 'cpass_' + Date.now().toString(36) + '_' + Math.random().toString(36).slice(2, 6);
3332
3333 if (!dry_run) {
3334 const updated = await recordConsolidationPass(uid, totalCostUsd);
3335 await saveConsolidationCost(uid, updated);
3336
3337 // Store pass-level summary event.
3338 const passEvent = createMemoryEvent('consolidation_pass', {
3339 topics_count: Array.isArray(result.topics) ? result.topics.length : (result.topics ?? 0),
3340 total_events: result.total_events,
3341 cost_usd: totalCostUsd,
3342 pass_id,
3343 verify: result.verify ?? null,
3344 discover: result.discover ?? null,
3345 });
3346
3347 if (isHostedBlobs) {
3348 // Sync remaining events (post-consolidation) + pass summary back to Blobs.
3349 const remaining = mm.list({ limit: 500 });
3350 await blobsSetMemoryEvents(uid, vaultId, [...remaining, passEvent]);
3351 } else {
3352 mm.store('consolidation_pass', passEvent.data);
3353 }
3354 }
3355
3356 // Clean up temp dir used for hosted path.
3357 if (tempDir) {
3358 try { fs.rmSync(tempDir, { recursive: true, force: true }); } catch (_) {}
3359 }
3360
3361 return res.json({
3362 topics: result.topics,
3363 total_events: result.total_events,
3364 verify: result.verify ?? null,
3365 discover: result.discover ?? null,
3366 cost_usd: totalCostUsd,
3367 pass_id,
3368 dry_run: result.dry_run,
3369 });
3370 } catch (e) {
3371 console.error('[bridge] POST /api/v1/memory/consolidate', e?.message);
3372 res.status(500).json({ error: e.message || 'Consolidation failed', code: 'RUNTIME_ERROR' });
3373 }
3374 });
3375
3376 /**
3377 * GET /api/v1/memory/consolidate/status
3378 * Response: { last_pass, cost_today_usd, cost_cap_usd, pass_count_month }
3379 */
3380 app.get('/api/v1/memory/consolidate/status', async (req, res) => {
3381 const { uid } = bridgeMemoryAuth(req);
3382 if (!uid) return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
3383
3384 try {
3385 const rec = await loadConsolidationCost(uid);
3386 const today = utcDateString();
3387 const month = utcMonthString();
3388 const passCountMonth = rec.pass_month === month ? (rec.pass_count_month || 0) : 0;
3389
3390 // Cooldown: minutes until the next manual consolidation is available.
3391 const lastPass = rec.last_pass ?? null;
3392 let cooldownMinutes = 0;
3393 if (lastPass) {
3394 const elapsedMs = Date.now() - new Date(lastPass).getTime();
3395 const remaining = 30 * 60 * 1000 - elapsedMs;
3396 cooldownMinutes = remaining > 0 ? Math.ceil(remaining / 60_000) : 0;
3397 }
3398
3399 return res.json({
3400 last_pass: lastPass,
3401 pass_count_month: passCountMonth,
3402 cooldown_minutes: cooldownMinutes,
3403 // Legacy cost fields kept for backward compat
3404 cost_today_usd: rec.cost_date === today ? (rec.cost_today_usd || 0) : 0,
3405 cost_cap_usd: process.env.CONSOLIDATION_COST_CAP_USD
3406 ? parseFloat(process.env.CONSOLIDATION_COST_CAP_USD)
3407 : null,
3408 });
3409 } catch (e) {
3410 console.error('[bridge] GET /api/v1/memory/consolidate/status', e?.message);
3411 res.status(500).json({ error: e.message || 'Internal error', code: 'RUNTIME_ERROR' });
3412 }
3413 });
3414
3415 if (!isServerless) {
3416 if (!CANISTER_URL || !SESSION_SECRET) {
3417 console.error('Bridge: CANISTER_URL and SESSION_SECRET (or HUB_JWT_SECRET) are required.');
3418 console.error(' Add them to the repo root .env (bridge loads ../../.env) or export in your shell.');
3419 console.error(' Template: hub/bridge/.env.example');
3420 process.exit(1);
3421 }
3422 app.listen(PORT, () => {
3423 console.log('Knowtation Hub Bridge listening on http://localhost:' + PORT);
3424 console.log(' Canister: ' + CANISTER_URL);
3425 console.log(' GitHub connect: ' + (process.env.GITHUB_CLIENT_ID ? 'enabled' : 'not configured'));
3426 console.log(' Index/Search: ' + (process.env.EMBEDDING_PROVIDER || 'ollama') + ' (run POST /api/v1/index to index)');
3427 });
3428 }
3429
3430 export { app };
File History 2 commits
sha256:6f47d53a6adbcf105ba1b9cfc126c788d6a0f461d197f84f78794914305b4bd5 fix(mcp): bound hosted discovery context Human patch 15 hours ago
sha256:cc3351fcd73f3ec8e4238e2183a504d995f61e507d0061a37f01547f53305762 Fix SectionSource local route and DeepInfra chunks Human minor 14 days ago