server.mjs
3,638 lines 143.2 KB
Raw
sha256:94ec65bd2b200240ac785a97cf14c5db066832bd608a24d6a9c151f17b918b02 feat(calendar): hosted bridge/gateway route parity and time… Human minor ⚠ breaking 12 hours ago
1 /**
2 * Knowtation Hub Bridge — Connect GitHub + Back up now + indexer + search for hosted product.
3 * Stores GitHub token per user; sync fetches notes + full proposals from canister and pushes to repo (snapshot JSON + markdown).
4 * Index/search: pull vault from canister, chunk → embed → sqlite-vec per user; search via POST /api/v1/search.
5 * On Netlify, tokens and vector DBs persist via Netlify Blobs (set by netlify/functions/bridge.mjs).
6 * Env: SESSION_SECRET, CANISTER_URL, HUB_BASE_URL; optional HUB_UI_ORIGIN, HUB_UI_PATH (default /hub), GITHUB_*, EMBEDDING_*, BRIDGE_PORT, DATA_DIR.
7 * Consolidation: CONSOLIDATION_LLM_API_KEY / OPENAI_API_KEY, CONSOLIDATION_LLM_MODEL; CONSOLIDATION_MEMORY_ENCRYPT=true omits raw event payloads from consolidation LLM prompts.
8 */
9
10 import fs from 'fs';
11 import path from 'path';
12 import os from 'os';
13 import { fileURLToPath } from 'url';
14 import crypto from 'crypto';
15 import dotenv from 'dotenv';
16 import express from 'express';
17 import jwt from 'jsonwebtoken';
18 import multer from 'multer';
19 import AdmZip from 'adm-zip';
20 import { parseCanisterProposalGetBody } from '../../lib/canister-proposal-response-parse.mjs';
21 import { runImport } from '../../lib/import.mjs';
22 import { IMPORT_SOURCE_TYPES } from '../../lib/import-source-types.mjs';
23 import { commitImageToRepo, parseGitHubRepoUrl, validateImageExtension, validateMagicBytes } from '../../lib/github-commit-image.mjs';
24 import { mergeProvenanceFrontmatter } from '../../lib/hub-provenance.mjs';
25 import { createIndexTimer } from './index-timing.mjs';
26 import { computeChunkContentHashTagged } from '../../lib/chunk-content-hash.mjs';
27 import {
28 defaultBridgeEmbeddingModelForProvider,
29 resolveIndexerChunkOptions,
30 } from '../../lib/indexer-chunk-options.mjs';
31 import {
32 runWithConcurrency,
33 parseEmbedConcurrency,
34 parseEmbedBatchSize,
35 } from '../../lib/parallel-embed-pool.mjs';
36 import { partitionChunksForReindex } from '../../lib/index-partition.mjs';
37 import {
38 estimateEmbedSeconds,
39 shouldUseBackgroundIndex,
40 parseSyncBudgetSeconds,
41 parseMaxSyncChunks,
42 } from '../../lib/bridge-index-preflight-estimate.mjs';
43 import {
44 acquireJobLock,
45 releaseJobLock,
46 peekJobLock,
47 } from '../../lib/bridge-index-job-lock.mjs';
48 import {
49 setLastIndexedAt,
50 getLastIndexedAt,
51 } from '../../lib/bridge-index-last-indexed.mjs';
52 import { signInternalRequest } from '../../lib/bridge-internal-hmac.mjs';
53 import { assertBackgroundKickoffOk } from '../../lib/bridge-index-kickoff-response.mjs';
54 import { writeNote } from '../../lib/write.mjs';
55 import { resolveVaultRelativePath, parseFrontmatterAndBody } from '../../lib/vault.mjs';
56 import {
57 resolveEffectiveCanisterUser,
58 getScopeForUserVaultFromScopeMap,
59 resolveAllowedVaultIdsForHostedContext,
60 } from '../lib/hosted-workspace-resolve.mjs';
61 import { applyScopeFilterToNotes, applyScopeFilterToProposals } from '../lib/scope-filter.mjs';
62 import { actorMayApproveProposals } from '../lib/hub-evaluator-may-approve.mjs';
63 import {
64 buildCalendarTimeline,
65 listSourceCalendarsForClient,
66 } from '../../lib/calendar/timeline.mjs';
67 import { importIcsIntoVault } from '../../lib/calendar/event-store.mjs';
68 import { patchSourceCalendar, parseSourceCalendarPatchBody } from '../../lib/calendar/source-calendar-patch.mjs';
69 import { retrieveAgentCalendarContext } from '../../lib/calendar/agent-retrieval.mjs';
70 import { materializeListFrontmatter } from '../gateway/note-facets.mjs';
71
72 // When Netlify bundles as CJS, import.meta.url is empty; avoid it in serverless so the app loads and routes register.
73 const inServerless = Boolean(process.env.AWS_LAMBDA_FUNCTION_NAME || process.env.NETLIFY);
74 let projectRoot;
75 if (inServerless) {
76 projectRoot = process.cwd();
77 } else {
78 projectRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..');
79 }
80 const __dirname = path.join(projectRoot, 'hub', 'bridge');
81 const envPath = path.join(projectRoot, '.env');
82 if (fs.existsSync(envPath)) dotenv.config({ path: envPath });
83
84 const PORT = parseInt(process.env.BRIDGE_PORT || process.env.PORT || '3341', 10);
85 const BASE_URL = (process.env.HUB_BASE_URL || `http://localhost:${PORT}`).replace(/\/$/, '');
86 const CANISTER_URL = (process.env.CANISTER_URL || '').replace(/\/$/, '');
87 const HUB_UI_ORIGIN = (process.env.HUB_UI_ORIGIN || BASE_URL).replace(/\/$/, '');
88 // Path under HUB_UI_ORIGIN where the Hub app lives (e.g. /hub). Empty string = root.
89 const HUB_UI_PATH = (process.env.HUB_UI_PATH || '/hub').replace(/\/$/, '');
90 const SESSION_SECRET = process.env.SESSION_SECRET || process.env.HUB_JWT_SECRET;
91 const CANISTER_AUTH_SECRET = process.env.CANISTER_AUTH_SECRET || '';
92 const HOSTED_CONTEXT_FETCH_TIMEOUT_MS = (() => {
93 const n = parseInt(String(process.env.HOSTED_CONTEXT_FETCH_TIMEOUT_MS || ''), 10);
94 if (!Number.isFinite(n)) return 3000;
95 return Math.min(10_000, Math.max(250, n));
96 })();
97 const HOSTED_CONTEXT_CACHE_TTL_MS = 60_000;
98 const canisterVaultIdsCache = new Map();
99
100 function hostedContextAbortSignal() {
101 return typeof AbortSignal !== 'undefined' && typeof AbortSignal.timeout === 'function'
102 ? AbortSignal.timeout(HOSTED_CONTEXT_FETCH_TIMEOUT_MS)
103 : undefined;
104 }
105
106 /**
107 * Base headers for all bridge→canister requests.
108 * Includes x-gateway-auth when CANISTER_AUTH_SECRET is configured so the
109 * canister's gatewayAuthorized() check (Phase 0) passes.
110 * Uses the same env var name as the gateway (CANISTER_AUTH_SECRET).
111 */
112 function canisterHeaders(extra = {}) {
113 const h = { Accept: 'application/json', ...extra };
114 if (CANISTER_AUTH_SECRET) h['x-gateway-auth'] = CANISTER_AUTH_SECRET;
115 return h;
116 }
117 // On Netlify Lambda /var/task/ is read-only; only /tmp is writable.
118 // Use /tmp/knowtation-bridge-data when serverless and DATA_DIR is not explicitly set.
119 const DATA_DIR = process.env.DATA_DIR
120 ? (path.isAbsolute(process.env.DATA_DIR) ? process.env.DATA_DIR : path.join(projectRoot, process.env.DATA_DIR))
121 : (inServerless ? path.join(os.tmpdir(), 'knowtation-bridge-data') : path.join(projectRoot, 'data'));
122 const TOKENS_FILE = path.join(DATA_DIR, 'hub_github_tokens.json');
123 const ROLES_FILE = path.join(DATA_DIR, 'hub_roles.json');
124 const INVITES_FILE = path.join(DATA_DIR, 'hub_invites.json');
125 const WORKSPACE_FILE = path.join(DATA_DIR, 'hub_workspace.json');
126 const VAULT_ACCESS_FILE = path.join(DATA_DIR, 'hub_vault_access.json');
127 const SCOPE_FILE = path.join(DATA_DIR, 'hub_scope.json');
128 const EVALUATOR_MAY_APPROVE_FILE = path.join(DATA_DIR, 'hub_evaluator_may_approve.json');
129 const VALID_ROLES = new Set(['admin', 'editor', 'viewer', 'evaluator']);
130 const INVITE_EXPIRY_MS = 7 * 24 * 60 * 60 * 1000;
131
132 const adminUserIdsSet = new Set(
133 (process.env.HUB_ADMIN_USER_IDS || '')
134 .split(',')
135 .map((s) => s.trim())
136 .filter(Boolean)
137 );
138
139 function sanitizeUserId(uid) {
140 return String(uid).replace(/[^a-zA-Z0-9_-]/g, '_').slice(0, 128) || 'default';
141 }
142
143 function sanitizeVaultId(vaultId) {
144 return String(vaultId || 'default').replace(/[^a-zA-Z0-9_-]/g, '_').slice(0, 64) || 'default';
145 }
146
147 let warnedOllamaLocalhostOnNetlify = false;
148
149 /** Trim + default empty env so accidental whitespace does not break provider matching or Ollama URL. */
150 function getBridgeEmbeddingConfig() {
151 const pEnv = process.env.EMBEDDING_PROVIDER;
152 const provider = (
153 pEnv == null || String(pEnv).trim() === '' ? 'ollama' : String(pEnv).trim()
154 ).toLowerCase();
155 const mEnv = process.env.EMBEDDING_MODEL;
156 const model =
157 mEnv == null || String(mEnv).trim() === ''
158 ? defaultBridgeEmbeddingModelForProvider(provider)
159 : String(mEnv).trim();
160 const oEnv = process.env.OLLAMA_URL;
161 const ollama_url =
162 oEnv == null || String(oEnv).trim() === '' ? 'http://localhost:11434' : String(oEnv).trim();
163 if (inServerless && provider === 'ollama' && !warnedOllamaLocalhostOnNetlify) {
164 warnedOllamaLocalhostOnNetlify = true;
165 const t = String(ollama_url).trim() || 'http://localhost:11434';
166 try {
167 if (/^https?:\/\//i.test(t)) {
168 const u = new URL(t);
169 if (u.hostname === 'localhost' || u.hostname === '127.0.0.1') {
170 console.warn(
171 '[bridge] EMBEDDING_PROVIDER=ollama with localhost OLLAMA_URL cannot reach your machine from Netlify. ' +
172 'Set EMBEDDING_PROVIDER=openai and OPENAI_API_KEY, or OLLAMA_URL to a public https:// Ollama API base.',
173 );
174 }
175 }
176 } catch (_) {
177 /* embed path will throw a clearer error via normalizeOllamaEmbedBaseUrl */
178 }
179 }
180 return {
181 provider,
182 model,
183 ollama_url,
184 };
185 }
186
187 /**
188 * Undici/fetch often throws TypeError with message "Invalid URL" only — map to actionable text for operators.
189 * @param {unknown} err
190 * @param {'index'|'search'|'embed'} kind
191 */
192 function bridgeEmbedFailureMessage(err, kind) {
193 const raw = err && typeof err.message === 'string' ? err.message : String(err);
194 if (raw !== 'Invalid URL' && !raw.includes('Invalid URL')) return raw;
195 const c = getBridgeEmbeddingConfig();
196 const hasOpenAiKey = Boolean(
197 process.env.OPENAI_API_KEY && String(process.env.OPENAI_API_KEY).trim(),
198 );
199 const hasVoyageKey = Boolean(process.env.VOYAGE_API_KEY && String(process.env.VOYAGE_API_KEY).trim());
200 return (
201 `${raw} (${kind}). On Netlify, Invalid URL often means sqlite-vec was esbuild-bundled ` +
202 '(stack: getLoadablePath / input ".") — set [functions].external_node_modules for sqlite-vec and better-sqlite3 in netlify.toml. ' +
203 `Resolved EMBEDDING_PROVIDER="${c.provider}"; OPENAI_API_KEY ${hasOpenAiKey ? 'is set' : 'is missing'}; ` +
204 `VOYAGE_API_KEY ${hasVoyageKey ? 'is set' : 'is missing'}. ` +
205 'If provider is ollama, OLLAMA_URL must be a full http(s) URL. Remove bad HTTP_PROXY/HTTPS_PROXY if set. ' +
206 'See hub/bridge/README.md (semantic index/search).'
207 );
208 }
209
210 const DB_FILENAME = 'knowtation_vectors.db';
211
212 function getBridgeStoreConfig(uid, vectorsDirOverride) {
213 const vectorsDir = vectorsDirOverride ?? (() => {
214 const d = path.join(DATA_DIR, 'vectors', sanitizeUserId(uid));
215 if (!fs.existsSync(d)) fs.mkdirSync(d, { recursive: true });
216 return d;
217 })();
218 return {
219 vector_store: 'sqlite-vec',
220 data_dir: vectorsDir,
221 embedding: getBridgeEmbeddingConfig(),
222 // Bridge owns the data lifecycle (downloads from blob → re-indexes → uploads to blob).
223 // A dimension change (e.g. OpenAI 1536 → DeepInfra 1024) can only resolve via a full
224 // re-embed of every vault in this DB. CLI keeps the throw so an accidental swap surfaces
225 // loudly. See `lib/vector-store-sqlite.mjs ensureCollection` migration logic.
226 allow_dimension_migration: true,
227 };
228 }
229
230 const isServerless = Boolean(process.env.AWS_LAMBDA_FUNCTION_NAME || process.env.NETLIFY);
231
232 function ensureDataDir() {
233 if (!fs.existsSync(DATA_DIR)) fs.mkdirSync(DATA_DIR, { recursive: true });
234 }
235
236 const ALGO = 'aes-256-gcm';
237 const IV_LEN = 16;
238 const TAG_LEN = 16;
239 const SALT_LEN = 16;
240 // Ciphertext format (v2): saltB64url.ivB64url.tagB64url.encB64url (4 parts)
241 // Legacy format (v1): ivB64url.tagB64url.encB64url (3 parts — decrypt will return null → graceful reconnect)
242 function encrypt(text, secret) {
243 const salt = crypto.randomBytes(SALT_LEN);
244 const key = crypto.scryptSync(secret, salt, 32);
245 const iv = crypto.randomBytes(IV_LEN);
246 const cipher = crypto.createCipheriv(ALGO, key, iv);
247 const enc = Buffer.concat([cipher.update(text, 'utf8'), cipher.final()]);
248 const tag = cipher.getAuthTag();
249 return (
250 salt.toString('base64url') + '.' +
251 iv.toString('base64url') + '.' +
252 tag.toString('base64url') + '.' +
253 enc.toString('base64url')
254 );
255 }
256 function decrypt(encrypted, secret) {
257 const parts = encrypted.split('.');
258 // v1 ciphertexts had 3 parts (hardcoded salt); treat as not-found so the
259 // caller falls through to "prompt reconnect" without crashing.
260 if (parts.length !== 4) return null;
261 const [saltB, ivB, tagB, encB] = parts;
262 if (!saltB || !ivB || !tagB || !encB) return null;
263 try {
264 const key = crypto.scryptSync(secret, Buffer.from(saltB, 'base64url'), 32);
265 const decipher = crypto.createDecipheriv(ALGO, key, Buffer.from(ivB, 'base64url'));
266 decipher.setAuthTag(Buffer.from(tagB, 'base64url'));
267 return decipher.update(Buffer.from(encB, 'base64url')) + decipher.final('utf8');
268 } catch {
269 return null;
270 }
271 }
272
273 function parseAndDecryptTokens(raw) {
274 if (!raw || typeof raw !== 'object' || Array.isArray(raw)) return {};
275 const out = {};
276 let decryptFailures = 0;
277 for (const [uid, v] of Object.entries(raw)) {
278 if (v && typeof v.token === 'string') {
279 const t = decrypt(v.token, SESSION_SECRET);
280 if (t) out[uid] = { token: t, repo: v.repo || null };
281 else decryptFailures++;
282 }
283 }
284 if (decryptFailures > 0) {
285 console.warn(
286 '[bridge] loadTokens: decrypt failed for',
287 decryptFailures,
288 'stored GitHub token(s). If SESSION_SECRET was rotated on the bridge, run Connect GitHub again to re-store the token.'
289 );
290 }
291 return out;
292 }
293
294 async function loadTokens(blobStore) {
295 if (!blobStore) {
296 ensureDataDir();
297 if (!fs.existsSync(TOKENS_FILE)) return {};
298 try {
299 const raw = JSON.parse(fs.readFileSync(TOKENS_FILE, 'utf8'));
300 return parseAndDecryptTokens(raw);
301 } catch (_) {
302 return {};
303 }
304 }
305 try {
306 const rawStr = await blobStore.get('hub_github_tokens');
307 if (!rawStr) return {};
308 const raw = JSON.parse(rawStr);
309 return parseAndDecryptTokens(raw);
310 } catch (_) {
311 return {};
312 }
313 }
314
315 async function saveTokens(blobStore, tokens) {
316 const toWrite = {};
317 for (const [uid, v] of Object.entries(tokens)) {
318 toWrite[uid] = { token: encrypt(v.token, SESSION_SECRET), repo: v.repo || null };
319 }
320 const str = JSON.stringify(toWrite, null, 2);
321 if (!blobStore) {
322 ensureDataDir();
323 fs.writeFileSync(TOKENS_FILE, str, 'utf8');
324 return;
325 }
326 await blobStore.set('hub_github_tokens', str);
327 }
328
329 // ——— Roles & invites (hosted parity: same contract as self-hosted hub/roles.mjs, hub/invites.mjs) ———
330 async function loadRoles(blobStore) {
331 if (!blobStore) {
332 ensureDataDir();
333 if (!fs.existsSync(ROLES_FILE)) return {};
334 try {
335 const data = JSON.parse(fs.readFileSync(ROLES_FILE, 'utf8'));
336 const roles = data.roles != null ? data.roles : data;
337 return typeof roles === 'object' && roles !== null ? roles : {};
338 } catch (_) {
339 return {};
340 }
341 }
342 try {
343 const rawStr = await blobStore.get('hub_roles');
344 if (!rawStr) return {};
345 const data = JSON.parse(rawStr);
346 const roles = data.roles != null ? data.roles : data;
347 return typeof roles === 'object' && roles !== null ? roles : {};
348 } catch (_) {
349 return {};
350 }
351 }
352
353 async function saveRoles(blobStore, roles) {
354 const obj = {};
355 for (const [sub, role] of Object.entries(roles)) {
356 if (typeof sub === 'string' && sub.trim() && VALID_ROLES.has(role)) obj[sub.trim()] = role;
357 }
358 const str = JSON.stringify({ roles: obj }, null, 2);
359 if (!blobStore) {
360 ensureDataDir();
361 fs.writeFileSync(ROLES_FILE, str, 'utf8');
362 return;
363 }
364 await blobStore.set('hub_roles', str);
365 }
366
367 function bridgeEnvEvaluatorMayApprove() {
368 return process.env.HUB_EVALUATOR_MAY_APPROVE === '1';
369 }
370
371 async function loadEvaluatorMayApproveMap(blobStore) {
372 if (!blobStore) {
373 ensureDataDir();
374 if (!fs.existsSync(EVALUATOR_MAY_APPROVE_FILE)) return {};
375 try {
376 const data = JSON.parse(fs.readFileSync(EVALUATOR_MAY_APPROVE_FILE, 'utf8'));
377 const m = data?.evaluator_may_approve != null ? data.evaluator_may_approve : data;
378 if (typeof m !== 'object' || m === null) return {};
379 const out = {};
380 for (const [k, v] of Object.entries(m)) {
381 if (typeof k === 'string' && k.trim()) out[k.trim()] = Boolean(v);
382 }
383 return out;
384 } catch (_) {
385 return {};
386 }
387 }
388 try {
389 const rawStr = await blobStore.get('hub_evaluator_may_approve');
390 if (!rawStr) return {};
391 const data = JSON.parse(rawStr);
392 const m = data?.evaluator_may_approve != null ? data.evaluator_may_approve : data;
393 if (typeof m !== 'object' || m === null) return {};
394 const out = {};
395 for (const [k, v] of Object.entries(m)) {
396 if (typeof k === 'string' && k.trim()) out[k.trim()] = Boolean(v);
397 }
398 return out;
399 } catch (_) {
400 return {};
401 }
402 }
403
404 async function saveEvaluatorMayApproveMap(blobStore, map) {
405 const obj = {};
406 for (const [k, v] of Object.entries(map)) {
407 if (typeof k === 'string' && k.trim()) obj[k.trim()] = Boolean(v);
408 }
409 const str = JSON.stringify({ evaluator_may_approve: obj }, null, 2);
410 if (!blobStore) {
411 ensureDataDir();
412 fs.writeFileSync(EVALUATOR_MAY_APPROVE_FILE, str, 'utf8');
413 return;
414 }
415 await blobStore.set('hub_evaluator_may_approve', str);
416 }
417
418 /** Effective “may approve proposals” for Hub UI and gateway (admin always; evaluator from map + env). */
419 function mayApproveProposalsForUser(uid, storedRoles, mayMap) {
420 const role = effectiveRole(uid, storedRoles);
421 return actorMayApproveProposals(uid, role, mayMap, bridgeEnvEvaluatorMayApprove());
422 }
423
424 async function loadInvites(blobStore) {
425 if (!blobStore) {
426 ensureDataDir();
427 if (!fs.existsSync(INVITES_FILE)) return {};
428 try {
429 const data = JSON.parse(fs.readFileSync(INVITES_FILE, 'utf8'));
430 const invites = data.invites && typeof data.invites === 'object' ? data.invites : {};
431 return invites;
432 } catch (_) {
433 return {};
434 }
435 }
436 try {
437 const rawStr = await blobStore.get('hub_invites');
438 if (!rawStr) return {};
439 const data = JSON.parse(rawStr);
440 const invites = data.invites && typeof data.invites === 'object' ? data.invites : {};
441 return invites;
442 } catch (_) {
443 return {};
444 }
445 }
446
447 async function saveInvites(blobStore, invites) {
448 const obj = {};
449 for (const [token, entry] of Object.entries(invites)) {
450 if (typeof token === 'string' && token && entry && typeof entry.role === 'string' && typeof entry.created_at === 'string') {
451 obj[token] = { role: entry.role, created_at: entry.created_at };
452 }
453 }
454 const str = JSON.stringify({ invites: obj }, null, 2);
455 if (!blobStore) {
456 ensureDataDir();
457 fs.writeFileSync(INVITES_FILE, str, 'utf8');
458 return;
459 }
460 await blobStore.set('hub_invites', str);
461 }
462
463 async function loadWorkspace(blobStore) {
464 if (!blobStore) {
465 ensureDataDir();
466 if (!fs.existsSync(WORKSPACE_FILE)) return { owner_user_id: null };
467 try {
468 const data = JSON.parse(fs.readFileSync(WORKSPACE_FILE, 'utf8'));
469 const id = data?.owner_user_id;
470 return { owner_user_id: typeof id === 'string' && id.trim() ? id.trim() : null };
471 } catch (_) {
472 return { owner_user_id: null };
473 }
474 }
475 try {
476 const rawStr = await blobStore.get('hub_workspace');
477 if (!rawStr) return { owner_user_id: null };
478 const data = JSON.parse(rawStr);
479 const id = data?.owner_user_id;
480 return { owner_user_id: typeof id === 'string' && id.trim() ? id.trim() : null };
481 } catch (_) {
482 return { owner_user_id: null };
483 }
484 }
485
486 async function saveWorkspace(blobStore, ownerUserId) {
487 const payload = JSON.stringify(
488 { owner_user_id: ownerUserId && String(ownerUserId).trim() ? String(ownerUserId).trim() : null },
489 null,
490 2,
491 );
492 if (!blobStore) {
493 ensureDataDir();
494 fs.writeFileSync(WORKSPACE_FILE, payload, 'utf8');
495 return;
496 }
497 await blobStore.set('hub_workspace', payload);
498 }
499
500 async function loadVaultAccess(blobStore) {
501 if (!blobStore) {
502 ensureDataDir();
503 if (!fs.existsSync(VAULT_ACCESS_FILE)) return {};
504 try {
505 const data = JSON.parse(fs.readFileSync(VAULT_ACCESS_FILE, 'utf8'));
506 const out = {};
507 if (data && typeof data === 'object') {
508 for (const [uid, arr] of Object.entries(data)) {
509 if (typeof uid === 'string' && uid.trim() && Array.isArray(arr)) {
510 out[uid.trim()] = arr.filter((v) => typeof v === 'string' && v.trim()).map((v) => v.trim());
511 }
512 }
513 }
514 return out;
515 } catch (_) {
516 return {};
517 }
518 }
519 try {
520 const rawStr = await blobStore.get('hub_vault_access');
521 if (!rawStr) return {};
522 const data = JSON.parse(rawStr);
523 const out = {};
524 if (data && typeof data === 'object') {
525 for (const [uid, arr] of Object.entries(data)) {
526 if (typeof uid === 'string' && uid.trim() && Array.isArray(arr)) {
527 out[uid.trim()] = arr.filter((v) => typeof v === 'string' && v.trim()).map((v) => v.trim());
528 }
529 }
530 }
531 return out;
532 } catch (_) {
533 return {};
534 }
535 }
536
537 async function saveVaultAccess(blobStore, access) {
538 const obj = {};
539 for (const [uid, arr] of Object.entries(access || {})) {
540 if (typeof uid === 'string' && uid.trim() && Array.isArray(arr)) {
541 obj[uid.trim()] = arr.filter((v) => typeof v === 'string' && v.trim()).map((v) => v.trim());
542 }
543 }
544 const str = JSON.stringify(obj, null, 2);
545 if (!blobStore) {
546 ensureDataDir();
547 fs.writeFileSync(VAULT_ACCESS_FILE, str, 'utf8');
548 return;
549 }
550 await blobStore.set('hub_vault_access', str);
551 }
552
553 async function loadScope(blobStore) {
554 if (!blobStore) {
555 ensureDataDir();
556 if (!fs.existsSync(SCOPE_FILE)) return {};
557 try {
558 const data = JSON.parse(fs.readFileSync(SCOPE_FILE, 'utf8'));
559 return data && typeof data === 'object' ? data : {};
560 } catch (_) {
561 return {};
562 }
563 }
564 try {
565 const rawStr = await blobStore.get('hub_scope');
566 if (!rawStr) return {};
567 const data = JSON.parse(rawStr);
568 return data && typeof data === 'object' ? data : {};
569 } catch (_) {
570 return {};
571 }
572 }
573
574 async function saveScope(blobStore, scope) {
575 const cleaned = {};
576 for (const [uid, vaultMap] of Object.entries(scope || {})) {
577 if (typeof uid !== 'string' || !uid.trim() || !vaultMap || typeof vaultMap !== 'object') continue;
578 cleaned[uid.trim()] = {};
579 for (const [vaultId, rules] of Object.entries(vaultMap)) {
580 if (typeof vaultId !== 'string' || !vaultId.trim() || !rules || typeof rules !== 'object') continue;
581 const projects = Array.isArray(rules.projects)
582 ? rules.projects.filter((p) => typeof p === 'string' && p.trim()).map((p) => p.trim())
583 : [];
584 const folders = Array.isArray(rules.folders)
585 ? rules.folders.filter((f) => typeof f === 'string' && f.trim()).map((f) => f.trim())
586 : [];
587 if (projects.length > 0 || folders.length > 0) {
588 cleaned[uid.trim()][vaultId.trim()] = { projects, folders };
589 }
590 }
591 }
592 const str = JSON.stringify(cleaned, null, 2);
593 if (!blobStore) {
594 ensureDataDir();
595 fs.writeFileSync(SCOPE_FILE, str, 'utf8');
596 return;
597 }
598 await blobStore.set('hub_scope', str);
599 }
600
601 /** Remove vault id from all hub_vault_access lists and hub_scope maps (hosted team). */
602 async function stripHostedVaultFromAccessAndScope(blobStore, vaultId) {
603 const id = String(vaultId || '').trim();
604 if (!id || id === 'default') return;
605 const access = await loadVaultAccess(blobStore);
606 const nextAccess = {};
607 for (const [uid, arr] of Object.entries(access)) {
608 if (!Array.isArray(arr)) continue;
609 const filtered = arr.filter((x) => String(x).trim() !== id);
610 if (filtered.length > 0) nextAccess[uid] = filtered;
611 }
612 await saveVaultAccess(blobStore, nextAccess);
613
614 const scope = await loadScope(blobStore);
615 const nextScope = {};
616 for (const [uid, vmap] of Object.entries(scope)) {
617 if (!vmap || typeof vmap !== 'object') continue;
618 const inner = {};
619 for (const [vid, rules] of Object.entries(vmap)) {
620 if (String(vid).trim() === id) continue;
621 inner[vid] = rules;
622 }
623 if (Object.keys(inner).length > 0) nextScope[uid] = inner;
624 }
625 await saveScope(blobStore, nextScope);
626 }
627
628 /** Drop bridge vector store for (effective user, vault). */
629 async function removeHostedVectorBlobForVault(blobStore, effectiveUid, vaultId) {
630 const safeUid = sanitizeUserId(effectiveUid);
631 const vid = sanitizeVaultId(vaultId);
632 const localDir = path.join(DATA_DIR, 'vectors', safeUid, vid);
633 if (!blobStore) {
634 if (fs.existsSync(localDir)) fs.rmSync(localDir, { recursive: true, force: true });
635 return;
636 }
637 const key = 'vectors/' + safeUid + '/' + vid;
638 try {
639 if (typeof blobStore.delete === 'function') await blobStore.delete(key);
640 } catch (_) {
641 /* Netlify Blobs may omit delete; ignore */
642 }
643 }
644
645 /** @returns {Promise<string[]>} */
646 async function fetchCanisterVaultIdsForUser(canisterUserId) {
647 if (!CANISTER_URL || !canisterUserId) return ['default'];
648 const cacheKey = String(canisterUserId);
649 const now = Date.now();
650 const hit = canisterVaultIdsCache.get(cacheKey);
651 if (hit && hit.expires > now) return [...hit.ids];
652 try {
653 const signal = hostedContextAbortSignal();
654 const vRes = await fetch(CANISTER_URL + '/api/v1/vaults', {
655 method: 'GET',
656 headers: canisterHeaders({ 'X-User-Id': canisterUserId }),
657 ...(signal ? { signal } : {}),
658 });
659 if (!vRes.ok) return ['default'];
660 const data = await vRes.json();
661 const vaults = Array.isArray(data.vaults) ? data.vaults : [];
662 if (vaults.length === 0) return ['default'];
663 const ids = vaults.map((v) => String(v.id || 'default')).filter(Boolean);
664 canisterVaultIdsCache.set(cacheKey, { expires: now + HOSTED_CONTEXT_CACHE_TTL_MS, ids });
665 return ids;
666 } catch (_) {
667 return ['default'];
668 }
669 }
670
671 function explicitVaultAccessForUser(accessMap, actorUid) {
672 const raw = accessMap && typeof accessMap === 'object' ? accessMap[actorUid] : null;
673 if (!Array.isArray(raw) || raw.length === 0) return null;
674 const out = raw.map((x) => String(x).trim()).filter(Boolean);
675 return out.length > 0 ? out : null;
676 }
677
678 /**
679 * @param {import('express').Request} req
680 * @param {string} actorUid
681 * @returns {Promise<{ ok: true, effectiveCanisterUid: string, actorUid: string, vaultId: string, scope: { projects: string[], folders: string[] } | null, allowedVaultIds: string[], delegating: boolean } | { ok: false, status: number, code: string, error: string }>}
682 */
683 async function resolveHostedBridgeContext(req, actorUid) {
684 const vaultId = sanitizeVaultId(req.headers['x-vault-id']);
685 const workspace = await loadWorkspace(req.blobStore);
686 const roles = await loadRoles(req.blobStore);
687 const access = await loadVaultAccess(req.blobStore);
688 const scopeMap = await loadScope(req.blobStore);
689 const ownerId = workspace.owner_user_id;
690 const { effective, delegate } = resolveEffectiveCanisterUser({
691 actorSub: actorUid,
692 workspaceOwnerId: ownerId,
693 storedRoles: roles,
694 adminUserIdsSet,
695 });
696 const explicitVaultIds = explicitVaultAccessForUser(access, actorUid);
697 const canisterIds =
698 delegate && explicitVaultIds ? explicitVaultIds : await fetchCanisterVaultIdsForUser(effective);
699 const allowedVaultIds = resolveAllowedVaultIdsForHostedContext({
700 delegate,
701 actorUid,
702 accessMap: access,
703 canisterIds,
704 });
705 if (!allowedVaultIds.includes(vaultId)) {
706 return {
707 ok: false,
708 status: 403,
709 code: 'FORBIDDEN',
710 error: 'Access to this vault is not allowed.',
711 };
712 }
713 let scope = getScopeForUserVaultFromScopeMap(scopeMap, actorUid, vaultId);
714 // Evaluators must see the full vault (per allowed_vault_ids) to review proposals in context;
715 // project/folder scope still applies to viewer/editor/admin delegating members.
716 const actorRole = effectiveRole(actorUid, roles);
717 if (actorRole === 'evaluator') {
718 scope = null;
719 }
720 return {
721 ok: true,
722 effectiveCanisterUid: effective,
723 actorUid,
724 vaultId,
725 scope,
726 allowedVaultIds,
727 delegating: delegate,
728 };
729 }
730
731 /**
732 * Hosted settings need the actor's vault allowlist without first proving access
733 * to a specific vault. This keeps Business-only delegated users from being
734 * denied while the UI is still deciding which vault to select.
735 *
736 * @param {import('express').Request} req
737 * @param {string} actorUid
738 */
739 async function resolveHostedBridgeSettingsContext(req, actorUid) {
740 const workspace = await loadWorkspace(req.blobStore);
741 const roles = await loadRoles(req.blobStore);
742 const access = await loadVaultAccess(req.blobStore);
743 const ownerId = workspace.owner_user_id;
744 const { effective, delegate } = resolveEffectiveCanisterUser({
745 actorSub: actorUid,
746 workspaceOwnerId: ownerId,
747 storedRoles: roles,
748 adminUserIdsSet,
749 });
750 const explicitVaultIds = explicitVaultAccessForUser(access, actorUid);
751 const canisterIds =
752 delegate && explicitVaultIds ? explicitVaultIds : await fetchCanisterVaultIdsForUser(effective);
753 const allowedVaultIds = resolveAllowedVaultIdsForHostedContext({
754 delegate,
755 actorUid,
756 accessMap: access,
757 canisterIds,
758 });
759 return {
760 effectiveCanisterUid: effective,
761 actorUid,
762 allowedVaultIds,
763 delegating: delegate,
764 workspaceOwnerId: ownerId,
765 role: effectiveRole(actorUid, roles),
766 };
767 }
768
769 function effectiveRole(uid, storedRoles) {
770 if (!uid) return 'member';
771 const stored = storedRoles && storedRoles[uid];
772 if (stored && VALID_ROLES.has(stored)) return stored;
773 return adminUserIdsSet.has(uid) ? 'admin' : 'member';
774 }
775
776 /** Return a directory path that contains (or will contain) knowtation_vectors.db for this user and vault. Rehydrates from Blob if needed. Phase 15: keyed by (uid, vault_id). */
777 async function getVectorsDirForUser(req, uid) {
778 const safeUid = sanitizeUserId(uid);
779 const vaultId = sanitizeVaultId(req.headers['x-vault-id']);
780 if (!req.blobStore) {
781 const d = path.join(DATA_DIR, 'vectors', safeUid, vaultId);
782 if (!fs.existsSync(d)) fs.mkdirSync(d, { recursive: true });
783 return d;
784 }
785 const dir = path.join(os.tmpdir(), 'knowtation-bridge-vectors', safeUid, vaultId);
786 if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
787 const key = 'vectors/' + safeUid + '/' + vaultId;
788 try {
789 const data = await req.blobStore.get(key, { type: 'arrayBuffer' });
790 if (data && data.byteLength > 0) {
791 fs.writeFileSync(path.join(dir, DB_FILENAME), Buffer.from(data));
792 }
793 } catch (_) {
794 // No existing blob or read error; start fresh
795 }
796 return dir;
797 }
798
799 /** Persist user's vector DB from disk to Blob (call after index). Phase 15: key includes vault_id. */
800 async function persistVectorsToBlob(req, uid, vectorsDir) {
801 if (!req.blobStore) return;
802 const dbPath = path.join(vectorsDir, DB_FILENAME);
803 if (!fs.existsSync(dbPath)) return;
804 const vaultId = sanitizeVaultId(req.headers['x-vault-id']);
805 const key = 'vectors/' + sanitizeUserId(uid) + '/' + vaultId;
806 const buf = fs.readFileSync(dbPath);
807 const arrayBuffer = buf.buffer.slice(buf.byteOffset, buf.byteOffset + buf.byteLength);
808 await req.blobStore.set(key, arrayBuffer);
809 }
810
811 function signState(payload) {
812 const payloadStr = JSON.stringify(payload);
813 const sig = crypto.createHmac('sha256', SESSION_SECRET).update(payloadStr).digest('hex');
814 return Buffer.from(payloadStr).toString('base64url') + '.' + sig;
815 }
816
817 function verifyState(stateStr, maxAgeMs = 600000) {
818 if (!stateStr || typeof stateStr !== 'string') return null;
819 const [b64, sig] = stateStr.split('.');
820 if (!b64 || !sig) return null;
821 try {
822 const payload = JSON.parse(Buffer.from(b64, 'base64url').toString());
823 const expected = crypto.createHmac('sha256', SESSION_SECRET).update(JSON.stringify(payload)).digest('hex');
824 if (expected !== sig) return null;
825 if (Date.now() - (payload.ts || 0) > maxAgeMs) return null;
826 return payload;
827 } catch (_) {
828 return null;
829 }
830 }
831
832 function userIdFromJwt(token) {
833 try {
834 const payload = jwt.verify(token, SESSION_SECRET);
835 return payload.sub ?? null;
836 } catch (_) {
837 return null;
838 }
839 }
840
841 const app = express();
842 app.use(express.json({ limit: '1mb' }));
843
844 app.use((req, _res, next) => {
845 req.blobStore = globalThis.__netlify_blob_store || null;
846 next();
847 });
848
849 // Background-function marker. `netlify/functions/bridge-index-background.mjs`
850 // validates an HMAC-signed inbound request, then sets
851 // `globalThis.__bridge_internal_request = { canisterUid, vaultId, jobId }` BEFORE
852 // invoking the same Express app via serverless-http. The index handler reads
853 // `req.bridgeInternalRequest` to decide whether to route (sync path) or skip
854 // routing and execute inline (background path). Globals are used because
855 // serverless-http does not let us inject per-request locals from the wrapper.
856 app.use((req, _res, next) => {
857 req.bridgeInternalRequest = globalThis.__bridge_internal_request || null;
858 next();
859 });
860
861 app.use((_req, res, next) => {
862 res.set('Access-Control-Allow-Origin', process.env.HUB_CORS_ORIGIN || '*');
863 res.set('Access-Control-Allow-Methods', 'GET, POST, DELETE, OPTIONS');
864 res.set('Access-Control-Allow-Headers', 'Authorization, Content-Type, X-Vault-Id, X-User-Id');
865 res.set('Access-Control-Allow-Credentials', 'true');
866 next();
867 });
868
869 // When Netlify rewrites /* to /.netlify/functions/bridge/:splat, Express sees the full path; strip prefix so routes match.
870 if (inServerless) {
871 const bridgePrefix = '/.netlify/functions/bridge';
872 app.use((req, _res, next) => {
873 if (req.url.startsWith(bridgePrefix)) {
874 req.url = req.url.slice(bridgePrefix.length) || '/';
875 }
876 next();
877 });
878 }
879
880 // Public deploy probe (no auth): compare to Netlify **knowtation-bridge** Production commit vs gateway.
881 app.get('/api/v1/bridge-version', (_req, res) => {
882 res.json({
883 service: 'knowtation-bridge',
884 commit: process.env.COMMIT_REF || process.env.VERCEL_GIT_COMMIT_SHA || null,
885 deploy_id: process.env.DEPLOY_ID || null,
886 context: process.env.CONTEXT || null,
887 netlify: Boolean(process.env.NETLIFY || process.env.AWS_LAMBDA_FUNCTION_NAME),
888 });
889 });
890
891 // ——— Roles & invites (hosted parity) ———
892 async function requireBridgeAuth(req, res, next) {
893 const auth = req.headers.authorization;
894 const token = auth && auth.startsWith('Bearer ') ? auth.slice(7) : null;
895 const uid = token ? userIdFromJwt(token) : null;
896 if (!uid) return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
897 req.uid = uid;
898 next();
899 }
900
901 async function requireBridgeAdmin(req, res, next) {
902 const roles = await loadRoles(req.blobStore);
903 const role = effectiveRole(req.uid, roles);
904 if (role !== 'admin') return res.status(403).json({ error: 'Admin only', code: 'FORBIDDEN' });
905 next();
906 }
907
908 /** Import / index parity: viewers cannot write; default role is member (treated like editor for hosted). */
909 async function requireBridgeEditorOrAdmin(req, res, next) {
910 const roles = await loadRoles(req.blobStore);
911 const role = effectiveRole(req.uid, roles);
912 if (role === 'viewer') {
913 return res.status(403).json({ error: 'This action requires editor or admin.', code: 'FORBIDDEN' });
914 }
915 next();
916 }
917
918 app.get('/api/v1/roles', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
919 try {
920 const roles = await loadRoles(req.blobStore);
921 const evaluator_may_approve = await loadEvaluatorMayApproveMap(req.blobStore);
922 res.json({ roles, evaluator_may_approve });
923 } catch (e) {
924 console.error('[bridge] GET /api/v1/roles', e?.message);
925 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
926 }
927 });
928
929 app.post('/api/v1/roles', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
930 const { user_id: userId, role } = req.body || {};
931 if (!userId || typeof userId !== 'string' || !userId.trim()) {
932 return res.status(400).json({ error: 'user_id required (e.g. github:12345)', code: 'BAD_REQUEST' });
933 }
934 const r = (role || 'editor').toLowerCase();
935 if (!VALID_ROLES.has(r)) {
936 return res.status(400).json({ error: 'role must be admin, editor, viewer, or evaluator', code: 'BAD_REQUEST' });
937 }
938 try {
939 const roles = await loadRoles(req.blobStore);
940 const uidKey = userId.trim();
941 roles[uidKey] = r;
942 await saveRoles(req.blobStore, roles);
943 const mayMap = await loadEvaluatorMayApproveMap(req.blobStore);
944 if (r === 'evaluator' && req.body && Object.prototype.hasOwnProperty.call(req.body, 'evaluator_may_approve')) {
945 mayMap[uidKey] = Boolean(req.body.evaluator_may_approve);
946 await saveEvaluatorMayApproveMap(req.blobStore, mayMap);
947 } else if (r !== 'evaluator' && Object.prototype.hasOwnProperty.call(mayMap, uidKey)) {
948 delete mayMap[uidKey];
949 await saveEvaluatorMayApproveMap(req.blobStore, mayMap);
950 }
951 res.json({ ok: true });
952 } catch (e) {
953 console.error('[bridge] POST /api/v1/roles', e?.message);
954 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
955 }
956 });
957
958 app.post('/api/v1/roles/evaluator-may-approve', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
959 const { user_id: userId, evaluator_may_approve: flag } = req.body || {};
960 if (!userId || typeof userId !== 'string' || !userId.trim()) {
961 return res.status(400).json({ error: 'user_id required', code: 'BAD_REQUEST' });
962 }
963 if (typeof flag !== 'boolean') {
964 return res.status(400).json({ error: 'evaluator_may_approve must be boolean', code: 'BAD_REQUEST' });
965 }
966 const uidKey = userId.trim();
967 try {
968 const roles = await loadRoles(req.blobStore);
969 if (effectiveRole(uidKey, roles) !== 'evaluator') {
970 return res.status(400).json({ error: 'User must have evaluator role', code: 'BAD_REQUEST' });
971 }
972 const mayMap = await loadEvaluatorMayApproveMap(req.blobStore);
973 mayMap[uidKey] = flag;
974 await saveEvaluatorMayApproveMap(req.blobStore, mayMap);
975 res.json({ ok: true });
976 } catch (e) {
977 console.error('[bridge] POST /api/v1/roles/evaluator-may-approve', e?.message);
978 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
979 }
980 });
981
982 app.get('/api/v1/invites', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
983 try {
984 const invitesMap = await loadInvites(req.blobStore);
985 const now = Date.now();
986 const list = [];
987 for (const [token, entry] of Object.entries(invitesMap)) {
988 const created = new Date(entry.created_at).getTime();
989 const expires_at = new Date(created + INVITE_EXPIRY_MS).toISOString();
990 if (now - created <= INVITE_EXPIRY_MS) {
991 list.push({ token, role: entry.role, created_at: entry.created_at, expires_at });
992 }
993 }
994 list.sort((a, b) => new Date(b.created_at) - new Date(a.created_at));
995 res.json({ invites: list });
996 } catch (e) {
997 console.error('[bridge] GET /api/v1/invites', e?.message);
998 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
999 }
1000 });
1001
1002 app.post('/api/v1/invites', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
1003 const role = (req.body?.role || 'editor').toLowerCase();
1004 if (!['viewer', 'editor', 'admin', 'evaluator'].includes(role)) {
1005 return res.status(400).json({ error: 'role must be viewer, editor, admin, or evaluator', code: 'BAD_REQUEST' });
1006 }
1007 try {
1008 const token = crypto.randomBytes(24).toString('base64url');
1009 const created_at = new Date().toISOString();
1010 const expires_at = new Date(Date.now() + INVITE_EXPIRY_MS).toISOString();
1011 const invites = await loadInvites(req.blobStore);
1012 invites[token] = { role, created_at };
1013 await saveInvites(req.blobStore, invites);
1014 const base = (HUB_UI_ORIGIN + (HUB_UI_PATH || '/hub') + '/').replace(/(\/)+$/, '/');
1015 const invite_url = base + '?invite=' + encodeURIComponent(token);
1016 res.status(201).json({ invite_url, token, role, created_at, expires_at });
1017 } catch (e) {
1018 console.error('[bridge] POST /api/v1/invites', e?.message);
1019 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1020 }
1021 });
1022
1023 app.delete('/api/v1/invites/:token', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
1024 const token = req.params.token;
1025 if (!token) return res.status(400).json({ error: 'token required', code: 'BAD_REQUEST' });
1026 try {
1027 const invites = await loadInvites(req.blobStore);
1028 const had = token in invites;
1029 delete invites[token];
1030 await saveInvites(req.blobStore, invites);
1031 res.json({ ok: true, removed: had });
1032 } catch (e) {
1033 console.error('[bridge] DELETE /api/v1/invites/:token', e?.message);
1034 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1035 }
1036 });
1037
1038 app.post('/api/v1/invites/consume', requireBridgeAuth, async (req, res) => {
1039 const token = req.body?.token;
1040 if (!token || typeof token !== 'string' || !token.trim()) {
1041 return res.status(400).json({ error: 'token required', code: 'BAD_REQUEST' });
1042 }
1043 const uid = req.uid;
1044 try {
1045 const invites = await loadInvites(req.blobStore);
1046 const entry = invites[token];
1047 if (!entry) {
1048 return res.status(404).json({ error: 'Invite not found or already used', code: 'NOT_FOUND' });
1049 }
1050 const created = new Date(entry.created_at).getTime();
1051 if (Date.now() - created > INVITE_EXPIRY_MS) {
1052 delete invites[token];
1053 await saveInvites(req.blobStore, invites);
1054 return res.status(410).json({ error: 'Invite expired', code: 'EXPIRED' });
1055 }
1056 const roles = await loadRoles(req.blobStore);
1057 roles[uid] = entry.role;
1058 await saveRoles(req.blobStore, roles);
1059 delete invites[token];
1060 await saveInvites(req.blobStore, invites);
1061 res.json({ ok: true, role: entry.role });
1062 } catch (e) {
1063 console.error('[bridge] POST /api/v1/invites/consume', e?.message);
1064 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1065 }
1066 });
1067
1068 // For gateway GET /api/v1/settings: return role from bridge store so invited users get correct role
1069 app.get('/api/v1/role', requireBridgeAuth, async (req, res) => {
1070 try {
1071 const roles = await loadRoles(req.blobStore);
1072 const mayMap = await loadEvaluatorMayApproveMap(req.blobStore);
1073 const role = effectiveRole(req.uid, roles);
1074 const may_approve_proposals = mayApproveProposalsForUser(req.uid, roles, mayMap);
1075 res.json({ role, may_approve_proposals });
1076 } catch (e) {
1077 console.error('[bridge] GET /api/v1/role', e?.message);
1078 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1079 }
1080 });
1081
1082 app.get('/api/v1/workspace', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
1083 try {
1084 const w = await loadWorkspace(req.blobStore);
1085 res.json({ owner_user_id: w.owner_user_id });
1086 } catch (e) {
1087 console.error('[bridge] GET /api/v1/workspace', e?.message);
1088 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1089 }
1090 });
1091
1092 app.post('/api/v1/workspace', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
1093 const raw = req.body?.owner_user_id;
1094 const owner_user_id =
1095 raw === null || raw === undefined || raw === ''
1096 ? null
1097 : typeof raw === 'string' && raw.trim()
1098 ? raw.trim()
1099 : null;
1100 try {
1101 await saveWorkspace(req.blobStore, owner_user_id);
1102 const w = await loadWorkspace(req.blobStore);
1103 res.json({ ok: true, owner_user_id: w.owner_user_id });
1104 } catch (e) {
1105 console.error('[bridge] POST /api/v1/workspace', e?.message);
1106 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1107 }
1108 });
1109
1110 app.get('/api/v1/vault-access', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
1111 try {
1112 const access = await loadVaultAccess(req.blobStore);
1113 res.json({ access });
1114 } catch (e) {
1115 console.error('[bridge] GET /api/v1/vault-access', e?.message);
1116 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1117 }
1118 });
1119
1120 app.post('/api/v1/vault-access', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
1121 const access = req.body?.access;
1122 if (!access || typeof access !== 'object') {
1123 return res.status(400).json({ error: 'access object required', code: 'BAD_REQUEST' });
1124 }
1125 try {
1126 await saveVaultAccess(req.blobStore, access);
1127 const out = await loadVaultAccess(req.blobStore);
1128 res.json({ ok: true, access: out });
1129 } catch (e) {
1130 console.error('[bridge] POST /api/v1/vault-access', e?.message);
1131 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1132 }
1133 });
1134
1135 app.get('/api/v1/scope', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
1136 try {
1137 const scope = await loadScope(req.blobStore);
1138 res.json({ scope });
1139 } catch (e) {
1140 console.error('[bridge] GET /api/v1/scope', e?.message);
1141 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1142 }
1143 });
1144
1145 app.post('/api/v1/scope', requireBridgeAuth, requireBridgeAdmin, async (req, res) => {
1146 const scope = req.body?.scope;
1147 if (!scope || typeof scope !== 'object') {
1148 return res.status(400).json({ error: 'scope object required', code: 'BAD_REQUEST' });
1149 }
1150 try {
1151 await saveScope(req.blobStore, scope);
1152 const out = await loadScope(req.blobStore);
1153 res.json({ ok: true, scope: out });
1154 } catch (e) {
1155 console.error('[bridge] POST /api/v1/scope', e?.message);
1156 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1157 }
1158 });
1159
1160 app.get('/api/v1/hosted-context', requireBridgeAuth, async (req, res) => {
1161 try {
1162 const actor = req.uid;
1163 const workspace = await loadWorkspace(req.blobStore);
1164 const roles = await loadRoles(req.blobStore);
1165 const ctx = await resolveHostedBridgeContext(req, actor);
1166 if (!ctx.ok) {
1167 return res.status(ctx.status).json({ error: ctx.error, code: ctx.code });
1168 }
1169 const role = effectiveRole(actor, roles);
1170 const mayMap = await loadEvaluatorMayApproveMap(req.blobStore);
1171 const may_approve_proposals = mayApproveProposalsForUser(actor, roles, mayMap);
1172 res.json({
1173 actor_sub: actor,
1174 workspace_owner_id: workspace.owner_user_id,
1175 effective_canister_user_id: ctx.effectiveCanisterUid,
1176 delegating: ctx.delegating,
1177 allowed_vault_ids: ctx.allowedVaultIds,
1178 scope: ctx.scope,
1179 role,
1180 may_approve_proposals,
1181 });
1182 } catch (e) {
1183 console.error('[bridge] GET /api/v1/hosted-context', e?.message);
1184 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1185 }
1186 });
1187
1188 app.get('/api/v1/hosted-context/settings', requireBridgeAuth, async (req, res) => {
1189 try {
1190 const actor = req.uid;
1191 const ctx = await resolveHostedBridgeSettingsContext(req, actor);
1192 const mayMap = await loadEvaluatorMayApproveMap(req.blobStore);
1193 const may_approve_proposals = mayApproveProposalsForUser(actor, { [actor]: ctx.role }, mayMap);
1194 res.json({
1195 actor_sub: actor,
1196 workspace_owner_id: ctx.workspaceOwnerId,
1197 effective_canister_user_id: ctx.effectiveCanisterUid,
1198 delegating: ctx.delegating,
1199 allowed_vault_ids: ctx.allowedVaultIds,
1200 scope: null,
1201 role: ctx.role,
1202 may_approve_proposals,
1203 });
1204 } catch (e) {
1205 console.error('[bridge] GET /api/v1/hosted-context/settings', e?.message);
1206 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1207 }
1208 });
1209
1210 // ——— Connect GitHub ———
1211 if (process.env.GITHUB_CLIENT_ID && process.env.GITHUB_CLIENT_SECRET) {
1212 app.get('/auth/github-connect', (req, res) => {
1213 const token = req.query.token || (req.headers.authorization && req.headers.authorization.startsWith('Bearer ') && req.headers.authorization.slice(7));
1214 const uid = token ? userIdFromJwt(token) : null;
1215 if (!uid) {
1216 return res.redirect(HUB_UI_ORIGIN + HUB_UI_PATH + '/?github_connect_error=not_authenticated');
1217 }
1218 const state = signState({ uid, ts: Date.now() });
1219 const redirectUri = BASE_URL + '/auth/callback/github-connect';
1220 const url = 'https://github.com/login/oauth/authorize?client_id=' + encodeURIComponent(process.env.GITHUB_CLIENT_ID)
1221 + '&redirect_uri=' + encodeURIComponent(redirectUri)
1222 + '&scope=repo'
1223 + '&state=' + encodeURIComponent(state);
1224 res.redirect(url);
1225 });
1226
1227 app.get('/auth/callback/github-connect', async (req, res) => {
1228 const { code, state } = req.query || {};
1229 const hubBase = HUB_UI_ORIGIN + HUB_UI_PATH + '/';
1230 console.log('[bridge] callback: hubBase=%s (ORIGIN=%s PATH=%s)', hubBase, HUB_UI_ORIGIN, HUB_UI_PATH);
1231 const payload = verifyState(state);
1232 if (!payload) {
1233 const url = hubBase + '?github_connect_error=error_state';
1234 console.log('[bridge] redirect (error_state): %s', url);
1235 return res.redirect(302, url);
1236 }
1237 if (!code) {
1238 const url = hubBase + '?github_connect_error=error_code';
1239 console.log('[bridge] redirect (error_code): %s', url);
1240 return res.redirect(302, url);
1241 }
1242 const uid = payload.uid;
1243 const redirectUri = BASE_URL + '/auth/callback/github-connect';
1244 const tokenRes = await fetch('https://github.com/login/oauth/access_token', {
1245 method: 'POST',
1246 headers: { Accept: 'application/json', 'Content-Type': 'application/json' },
1247 body: JSON.stringify({
1248 client_id: process.env.GITHUB_CLIENT_ID,
1249 client_secret: process.env.GITHUB_CLIENT_SECRET,
1250 code,
1251 redirect_uri: redirectUri,
1252 }),
1253 });
1254 const data = await tokenRes.json();
1255 if (!data.access_token) {
1256 const url = hubBase + '?github_connect_error=error_token';
1257 console.log('[bridge] redirect (error_token): %s', url);
1258 return res.redirect(302, url);
1259 }
1260 const tokensByUser = await loadTokens(req.blobStore);
1261 tokensByUser[uid] = { token: data.access_token, repo: tokensByUser[uid]?.repo || null };
1262 try {
1263 await saveTokens(req.blobStore, tokensByUser);
1264 } catch (e) {
1265 console.error('[bridge] saveTokens after GitHub OAuth failed:', e?.message || e);
1266 const url = hubBase + '?github_connect_error=blob_storage';
1267 return res.redirect(302, url);
1268 }
1269 const redirectTo = hubBase + '?github_connected=1';
1270 console.log('[bridge] redirect after connect: HUB_UI_ORIGIN=%s HUB_UI_PATH=%s redirectTo=%s', HUB_UI_ORIGIN, HUB_UI_PATH, redirectTo);
1271 res.redirect(302, redirectTo);
1272 });
1273 }
1274
1275 // ——— Delete vault (canister + team access/scope + vector blob) ———
1276 app.delete('/api/v1/vaults/:vaultId', requireBridgeAuth, requireBridgeEditorOrAdmin, async (req, res) => {
1277 if (!CANISTER_URL) {
1278 return res.status(503).json({ error: 'CANISTER_URL not configured', code: 'NOT_AVAILABLE' });
1279 }
1280 const vaultId = sanitizeVaultId(req.params.vaultId);
1281 if (!req.params.vaultId || String(req.params.vaultId).trim() === '' || vaultId === 'default') {
1282 return res.status(400).json({ error: 'Cannot delete the default vault', code: 'BAD_REQUEST' });
1283 }
1284
1285 const prevVaultHeader = req.headers['x-vault-id'];
1286 req.headers['x-vault-id'] = vaultId;
1287 const hctx = await resolveHostedBridgeContext(req, req.uid);
1288 req.headers['x-vault-id'] = prevVaultHeader;
1289
1290 if (!hctx.ok) {
1291 return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
1292 }
1293
1294 const workspace = await loadWorkspace(req.blobStore);
1295 const owner = workspace.owner_user_id && String(workspace.owner_user_id).trim();
1296 if (owner && req.uid !== owner) {
1297 return res.status(403).json({
1298 error: 'Only the workspace owner can delete vaults.',
1299 code: 'FORBIDDEN',
1300 });
1301 }
1302
1303 let canRes;
1304 try {
1305 canRes = await fetch(`${CANISTER_URL}/api/v1/vaults/${encodeURIComponent(vaultId)}`, {
1306 method: 'DELETE',
1307 headers: canisterHeaders({ 'X-User-Id': hctx.effectiveCanisterUid }),
1308 });
1309 } catch (e) {
1310 console.error('[bridge] DELETE vault canister fetch', e?.message);
1311 return res.status(502).json({ error: 'Could not reach canister', code: 'BAD_GATEWAY' });
1312 }
1313
1314 const text = await canRes.text();
1315 if (!canRes.ok) {
1316 let errMsg = text;
1317 try {
1318 const j = JSON.parse(text);
1319 if (j && j.error) errMsg = j.error;
1320 } catch (_) {}
1321 return res.status(canRes.status >= 400 ? canRes.status : 502).json({
1322 error: errMsg || 'Canister error',
1323 code: 'UPSTREAM_ERROR',
1324 });
1325 }
1326
1327 await stripHostedVaultFromAccessAndScope(req.blobStore, vaultId);
1328 await removeHostedVectorBlobForVault(req.blobStore, hctx.effectiveCanisterUid, vaultId);
1329
1330 try {
1331 const data = text ? JSON.parse(text) : {};
1332 res.json({ ok: true, ...data });
1333 } catch (_) {
1334 res.json({ ok: true, deleted_vault_id: vaultId });
1335 }
1336 });
1337
1338 /**
1339 * Full proposal documents for GitHub backup (list + GET each id). Same scope as notes.
1340 * @param {string} canisterUrl
1341 * @param {string} canisterUid
1342 * @param {string} vaultId
1343 * @param {{ projects?: string[], folders?: string[] } | null | undefined} scope
1344 */
1345 async function fetchFullProposalsForGithubBackup(canisterUrl, canisterUid, vaultId, scope) {
1346 const base = String(canisterUrl || '').replace(/\/$/, '');
1347 const headers = canisterHeaders({ 'X-User-Id': canisterUid, 'X-Vault-Id': vaultId });
1348 const listRes = await fetch(`${base}/api/v1/proposals`, { method: 'GET', headers });
1349 if (!listRes.ok) {
1350 const err = new Error(`Canister proposals list ${listRes.status}`);
1351 err.status = 502;
1352 throw err;
1353 }
1354 let listJson;
1355 try {
1356 listJson = await listRes.json();
1357 } catch {
1358 const err = new Error('Invalid canister proposals list JSON');
1359 err.status = 502;
1360 throw err;
1361 }
1362 const stubs = Array.isArray(listJson.proposals) ? listJson.proposals : [];
1363 const full = [];
1364 for (const stub of stubs) {
1365 const id = stub && stub.proposal_id ? String(stub.proposal_id) : '';
1366 if (!id) continue;
1367 const oneRes = await fetch(`${base}/api/v1/proposals/${encodeURIComponent(id)}`, {
1368 method: 'GET',
1369 headers,
1370 });
1371 if (!oneRes.ok) {
1372 const err = new Error(`Canister proposal ${id} ${oneRes.status}`);
1373 err.status = 502;
1374 throw err;
1375 }
1376 const text = await oneRes.text();
1377 const body = parseCanisterProposalGetBody(id, text, stub);
1378 if (body._knowtation_backup_json_unparseable) {
1379 console.error('[bridge] vault/sync proposal JSON parse failed', { id, preview: text.slice(0, 300) });
1380 }
1381 full.push(body);
1382 }
1383 return applyScopeFilterToProposals(full, scope);
1384 }
1385
1386 // ——— Back up now: fetch vault from canister, push to GitHub ———
1387 app.post('/api/v1/vault/sync', requireBridgeAuth, requireBridgeEditorOrAdmin, async (req, res) => {
1388 const uid = req.uid;
1389
1390 const hctx = await resolveHostedBridgeContext(req, uid);
1391 if (!hctx.ok) {
1392 return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
1393 }
1394 const canisterUid = hctx.effectiveCanisterUid;
1395
1396 const tokensByUser = await loadTokens(req.blobStore);
1397 const conn = tokensByUser[uid];
1398 const repo = req.body?.repo || conn?.repo;
1399 if (!conn?.token) {
1400 return res.status(400).json({ error: 'GitHub not connected', code: 'GITHUB_NOT_CONNECTED' });
1401 }
1402 if (!repo || typeof repo !== 'string') {
1403 return res.status(400).json({ error: 'Repo required', code: 'REPO_REQUIRED', hint: 'Send { "repo": "owner/name" } or set repo after connecting GitHub.' });
1404 }
1405
1406 const [owner, name] = repo.split('/').filter(Boolean);
1407 if (!owner || !name) {
1408 return res.status(400).json({ error: 'Invalid repo format', code: 'BAD_REQUEST' });
1409 }
1410
1411 const vaultId = sanitizeVaultId(req.headers['x-vault-id']);
1412
1413 // Fetch vault from canister (export)
1414 let exportRes;
1415 try {
1416 exportRes = await fetch(CANISTER_URL + '/api/v1/export', {
1417 method: 'GET',
1418 headers: canisterHeaders({ 'X-User-Id': canisterUid, 'X-Vault-Id': vaultId }),
1419 });
1420 } catch (e) {
1421 return res.status(502).json({ error: 'Could not reach canister', code: 'BAD_GATEWAY' });
1422 }
1423 if (!exportRes.ok) {
1424 return res.status(502).json({ error: 'Canister error', code: 'BAD_GATEWAY', status: exportRes.status });
1425 }
1426 let vault;
1427 try {
1428 vault = await exportRes.json();
1429 } catch (_) {
1430 return res.status(502).json({ error: 'Invalid canister response', code: 'BAD_GATEWAY' });
1431 }
1432 let notes = vault.notes || [];
1433 if (hctx.scope) {
1434 notes = applyScopeFilterToNotes(notes, hctx.scope);
1435 }
1436
1437 let proposals = [];
1438 try {
1439 proposals = await fetchFullProposalsForGithubBackup(CANISTER_URL, canisterUid, vaultId, hctx.scope);
1440 } catch (e) {
1441 console.error('[bridge] vault/sync proposals fetch', e?.message);
1442 return res.status(e.status || 502).json({
1443 error: e.message || 'Could not fetch proposals for backup',
1444 code: 'BAD_GATEWAY',
1445 });
1446 }
1447
1448 // Store repo for next time
1449 if (req.body?.repo && (!conn.repo || conn.repo !== repo)) {
1450 tokensByUser[uid] = { ...conn, repo };
1451 await saveTokens(req.blobStore, tokensByUser);
1452 }
1453
1454 // Push to GitHub: get default branch, create blobs, create tree, commit, push
1455 const ghToken = conn.token;
1456 const ghApi = 'https://api.github.com';
1457 // GitHub requires a non-empty User-Agent; some serverless runtimes send none → 403 "Administrative rules".
1458 const ghHeaders = {
1459 Authorization: 'token ' + ghToken,
1460 Accept: 'application/vnd.github.v3+json',
1461 'Content-Type': 'application/json',
1462 'User-Agent': 'KnowtationHub-Bridge/1.0 (+https://knowtation.store)',
1463 };
1464 const headsRefEnc = (branch) => encodeURIComponent(`heads/${String(branch || 'main').trim()}`);
1465
1466 let defaultBranch;
1467 try {
1468 const repoRes = await fetch(`${ghApi}/repos/${owner}/${name}`, { headers: ghHeaders });
1469 if (!repoRes.ok) {
1470 if (repoRes.status === 404) {
1471 return res.status(400).json({ error: 'Repo not found or no access', code: 'REPO_NOT_FOUND' });
1472 }
1473 throw new Error('GitHub API ' + repoRes.status);
1474 }
1475 const repoData = await repoRes.json();
1476 defaultBranch = String(repoData.default_branch || 'main').trim() || 'main';
1477 } catch (e) {
1478 return res.status(502).json({ error: 'GitHub API error', code: 'BAD_GATEWAY' });
1479 }
1480
1481 // GET single ref: documented as /git/ref/{ref} with ref = heads/<branch> (URL-encoded). Avoids edge cases with /git/refs/... on some hosts.
1482 const refRes = await fetch(`${ghApi}/repos/${owner}/${name}/git/ref/${headsRefEnc(defaultBranch)}`, { headers: ghHeaders });
1483 let baseSha = null;
1484 let baseTreeSha = null;
1485 if (refRes.ok) {
1486 const refData = await refRes.json();
1487 baseSha = refData.object?.sha;
1488 if (!baseSha) {
1489 return res.status(502).json({ error: 'Invalid ref response', code: 'BAD_GATEWAY' });
1490 }
1491 const baseTreeRes = await fetch(`${ghApi}/repos/${owner}/${name}/git/commits/${baseSha}`, { headers: ghHeaders });
1492 if (!baseTreeRes.ok) {
1493 return res.status(502).json({ error: 'Could not get base commit', code: 'BAD_GATEWAY' });
1494 }
1495 const baseCommit = await baseTreeRes.json();
1496 baseTreeSha = baseCommit.tree?.sha;
1497 } else if (refRes.status === 404) {
1498 // Repo exists on GitHub but has no commits yet (Quick setup / empty repo) — no refs/heads/* yet.
1499 baseSha = null;
1500 baseTreeSha = null;
1501 } else {
1502 const refErrBody = await refRes.text();
1503 console.warn('[bridge] GitHub GET ref failed', { owner, name, branch: defaultBranch, status: refRes.status, body: refErrBody.slice(0, 500) });
1504 if (refRes.status === 403 || refRes.status === 401) {
1505 return res.status(502).json({
1506 error:
1507 'GitHub denied access when reading the branch (often missing User-Agent or expired token). Use Settings → Connect GitHub again.',
1508 code: 'BAD_GATEWAY',
1509 });
1510 }
1511 return res.status(502).json({
1512 error: 'Could not read branch on GitHub. If the repo is new with no commits, try Back up again after redeploying the bridge; otherwise check bridge logs.',
1513 code: 'BAD_GATEWAY',
1514 });
1515 }
1516
1517 const tree = [];
1518 for (const note of notes) {
1519 const path = note.path || 'note.md';
1520 const content = (note.frontmatter && note.frontmatter !== '{}' ? '---\n' + note.frontmatter + '\n---\n\n' : '') + (note.body || '');
1521 const blobRes = await fetch(`${ghApi}/repos/${owner}/${name}/git/blobs`, {
1522 method: 'POST',
1523 headers: ghHeaders,
1524 body: JSON.stringify({ content: Buffer.from(content, 'utf8').toString('base64'), encoding: 'base64' }),
1525 });
1526 if (!blobRes.ok) {
1527 return res.status(502).json({ error: 'GitHub blob failed', code: 'BAD_GATEWAY' });
1528 }
1529 const blob = await blobRes.json();
1530 tree.push({ path, mode: '100644', type: 'blob', sha: blob.sha });
1531 }
1532
1533 const snapshotObj = {
1534 format_version: 1,
1535 kind: 'knowtation-hosted-backup',
1536 exported_at: new Date().toISOString(),
1537 vault_id: vaultId,
1538 proposals,
1539 };
1540 const snapshotJson = JSON.stringify(snapshotObj);
1541 const snapBlobRes = await fetch(`${ghApi}/repos/${owner}/${name}/git/blobs`, {
1542 method: 'POST',
1543 headers: ghHeaders,
1544 body: JSON.stringify({
1545 content: Buffer.from(snapshotJson, 'utf8').toString('base64'),
1546 encoding: 'base64',
1547 }),
1548 });
1549 if (!snapBlobRes.ok) {
1550 return res.status(502).json({ error: 'GitHub blob failed (snapshot)', code: 'BAD_GATEWAY' });
1551 }
1552 const snapBlob = await snapBlobRes.json();
1553 tree.push({
1554 path: '.knowtation/backup/v1/snapshot.json',
1555 mode: '100644',
1556 type: 'blob',
1557 sha: snapBlob.sha,
1558 });
1559
1560 const isInitialCommit = !baseSha;
1561 if (isInitialCommit && notes.length === 0 && proposals.length === 0) {
1562 const placeholder =
1563 '# Knowtation vault backup\n\n'
1564 + 'This folder is written by **Back up now** on hosted Knowtation.\n\n'
1565 + '- **Markdown files** elsewhere in this repo are your vault **notes**.\n'
1566 + '- **`.knowtation/backup/v1/snapshot.json`** holds full **proposal** records (status, review, enrich metadata, bodies).\n\n'
1567 + 'Your vault had no notes or proposals yet. Add content in the Hub and run **Back up now** again.\n';
1568 const blobRes = await fetch(`${ghApi}/repos/${owner}/${name}/git/blobs`, {
1569 method: 'POST',
1570 headers: ghHeaders,
1571 body: JSON.stringify({
1572 content: Buffer.from(placeholder, 'utf8').toString('base64'),
1573 encoding: 'base64',
1574 }),
1575 });
1576 if (!blobRes.ok) {
1577 return res.status(502).json({ error: 'GitHub blob failed', code: 'BAD_GATEWAY' });
1578 }
1579 const blob = await blobRes.json();
1580 tree.push({ path: '.knowtation/README.md', mode: '100644', type: 'blob', sha: blob.sha });
1581 }
1582
1583 const treePayload = baseTreeSha ? { base_tree: baseTreeSha, tree } : { tree };
1584 const treeRes = await fetch(`${ghApi}/repos/${owner}/${name}/git/trees`, {
1585 method: 'POST',
1586 headers: ghHeaders,
1587 body: JSON.stringify(treePayload),
1588 });
1589 if (!treeRes.ok) {
1590 return res.status(502).json({ error: 'GitHub tree failed', code: 'BAD_GATEWAY' });
1591 }
1592 const newTree = await treeRes.json();
1593
1594 const commitRes = await fetch(`${ghApi}/repos/${owner}/${name}/git/commits`, {
1595 method: 'POST',
1596 headers: ghHeaders,
1597 body: JSON.stringify({
1598 message: 'Knowtation Hub backup ' + new Date().toISOString(),
1599 tree: newTree.sha,
1600 parents: baseSha ? [baseSha] : [],
1601 }),
1602 });
1603 if (!commitRes.ok) {
1604 return res.status(502).json({ error: 'GitHub commit failed', code: 'BAD_GATEWAY' });
1605 }
1606 const newCommit = await commitRes.json();
1607
1608 let refUpdateRes;
1609 if (baseSha) {
1610 refUpdateRes = await fetch(`${ghApi}/repos/${owner}/${name}/git/refs/${headsRefEnc(defaultBranch)}`, {
1611 method: 'PATCH',
1612 headers: ghHeaders,
1613 body: JSON.stringify({ sha: newCommit.sha, force: false }),
1614 });
1615 } else {
1616 refUpdateRes = await fetch(`${ghApi}/repos/${owner}/${name}/git/refs`, {
1617 method: 'POST',
1618 headers: ghHeaders,
1619 body: JSON.stringify({ ref: `refs/heads/${defaultBranch}`, sha: newCommit.sha }),
1620 });
1621 }
1622 if (!refUpdateRes.ok) {
1623 return res.status(502).json({ error: 'GitHub push failed', code: 'BAD_GATEWAY' });
1624 }
1625
1626 res.json({
1627 ok: true,
1628 message: 'Synced',
1629 notesCount: notes.length,
1630 proposalsCount: proposals.length,
1631 });
1632 });
1633
1634 // Optional: GET status for Settings (connected + repo)
1635 app.get('/api/v1/vault/github-status', async (req, res) => {
1636 const auth = req.headers.authorization;
1637 const token = auth && auth.startsWith('Bearer ') ? auth.slice(7) : null;
1638 const uid = token ? userIdFromJwt(token) : null;
1639 if (!uid) {
1640 return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
1641 }
1642 const tokensByUser = await loadTokens(req.blobStore);
1643 const conn = tokensByUser[uid];
1644 res.json({
1645 github_connected: Boolean(conn?.token),
1646 repo: conn?.repo || null,
1647 });
1648 });
1649
1650 // Internal: GET GitHub connection (token + repo) for the gateway to use for image upload.
1651 // Server-to-server only — never exposed to the browser. Auth required.
1652 app.get('/api/v1/vault/github-token', async (req, res) => {
1653 const auth = req.headers.authorization;
1654 const token = auth && auth.startsWith('Bearer ') ? auth.slice(7) : null;
1655 const uid = token ? userIdFromJwt(token) : null;
1656 if (!uid) {
1657 return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
1658 }
1659 try {
1660 const tokensByUser = await loadTokens(req.blobStore);
1661 const conn = tokensByUser[uid];
1662 if (!conn?.token) {
1663 return res.status(400).json({ error: 'GitHub not connected', code: 'GITHUB_NOT_CONNECTED' });
1664 }
1665 res.json({ token: conn.token, repo: conn.repo || null });
1666 } catch (e) {
1667 res.status(500).json({ error: e.message || 'Internal error', code: 'INTERNAL_ERROR' });
1668 }
1669 });
1670
1671 /** Max notes per canister POST /api/v1/notes/batch (must match hub/icp NOTES_BATCH cap). */
1672 const CANISTER_NOTES_BATCH_MAX = 100;
1673
1674 /**
1675 * @param {string} canisterUid
1676 * @param {string} actorUid
1677 * @param {string} vaultId
1678 * @param {{ path: string, body: string, frontmatter?: Record<string, unknown> }[]} notes
1679 */
1680 async function postNotesBatchToCanister(canisterUid, actorUid, vaultId, notes) {
1681 if (!notes.length) return;
1682 for (let offset = 0; offset < notes.length; offset += CANISTER_NOTES_BATCH_MAX) {
1683 const chunk = notes.slice(offset, offset + CANISTER_NOTES_BATCH_MAX);
1684 const r = await fetch(CANISTER_URL + '/api/v1/notes/batch', {
1685 method: 'POST',
1686 headers: canisterHeaders({
1687 'Content-Type': 'application/json',
1688 'X-User-Id': canisterUid,
1689 'X-Actor-Id': actorUid,
1690 'X-Vault-Id': vaultId,
1691 }),
1692 body: JSON.stringify({ notes: chunk }),
1693 });
1694 const text = await r.text();
1695 if (!r.ok) {
1696 throw new Error(`Canister batch note write failed (${r.status}): ${text.slice(0, 800)}`);
1697 }
1698 }
1699 }
1700
1701 /**
1702 * Sanitize a user-supplied filename before writing it to disk.
1703 * - Strips all directory components (path traversal prevention).
1704 * - Removes every character that is not alphanumeric, a dot, hyphen, or underscore.
1705 * - Truncates to 200 chars so filesystem limits are never approached.
1706 * - Falls back to 'upload' when the result would be empty.
1707 */
1708 function sanitizeUploadFilename(rawName) {
1709 const base = path.basename(rawName || '');
1710 const safe = base.replace(/[^a-zA-Z0-9._-]/g, '_').slice(0, 200);
1711 return safe || 'upload';
1712 }
1713
1714 const importTempDirMiddleware = (req, _res, next) => {
1715 req._importTempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'knowtation-bridge-import-'));
1716 next();
1717 };
1718 const bridgeImportUpload = multer({
1719 storage: multer.diskStorage({
1720 destination: (req, _file, cb) => cb(null, req._importTempDir),
1721 filename: (_req, file, cb) => cb(null, sanitizeUploadFilename(file.originalname)),
1722 }),
1723 limits: { fileSize: 100 * 1024 * 1024 },
1724 }).single('file');
1725
1726 // ——— Phase 18: GitHub image upload + image proxy ———
1727
1728 const bridgeImageUpload = multer({
1729 storage: multer.memoryStorage(),
1730 limits: { fileSize: 25 * 1024 * 1024 },
1731 }).single('image');
1732
1733 app.post(
1734 /^\/api\/v1\/notes\/(.+)\/upload-image$/,
1735 requireBridgeAuth,
1736 requireBridgeEditorOrAdmin,
1737 bridgeImageUpload,
1738 async (req, res) => {
1739 try {
1740 if (!req.file) return res.status(400).json({ error: 'image file required', code: 'BAD_REQUEST' });
1741
1742 const originalName = req.file.originalname || 'image.jpg';
1743 try { validateImageExtension(originalName); } catch (e) {
1744 return res.status(400).json({ error: e.message, code: 'BAD_REQUEST' });
1745 }
1746 if (!(req.file.mimetype || '').toLowerCase().startsWith('image/')) {
1747 return res.status(400).json({ error: 'File content-type must be image/*', code: 'BAD_REQUEST' });
1748 }
1749 const ext = originalName.split('.').pop().toLowerCase();
1750 try { validateMagicBytes(req.file.buffer, ext); } catch (e) {
1751 return res.status(400).json({ error: e.message, code: 'BAD_REQUEST' });
1752 }
1753
1754 const tokensByUser = await loadTokens(req.blobStore);
1755 const conn = tokensByUser[req.uid];
1756 if (!conn?.token) {
1757 return res.status(400).json({ error: 'GitHub not connected. Go to Settings → Backup → Connect GitHub.', code: 'GITHUB_NOT_CONNECTED' });
1758 }
1759 if (!conn.repo) {
1760 return res.status(400).json({ error: 'GitHub repo not set. Back up once first to set the remote.', code: 'GITHUB_NOT_CONFIGURED' });
1761 }
1762
1763 const now = new Date();
1764 const yearMonth = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, '0')}`;
1765 const safeName = originalName.replace(/[^a-zA-Z0-9._-]/g, '_').slice(0, 128);
1766 const uniqueName = `${Date.now()}-${safeName}`;
1767 const repoFilePath = `media/images/${yearMonth}/${uniqueName}`;
1768
1769 const result = await commitImageToRepo({
1770 accessToken: conn.token,
1771 repoUrl: conn.repo,
1772 filePath: repoFilePath,
1773 fileBuffer: req.file.buffer,
1774 commitMessage: `Add image: ${safeName}`,
1775 });
1776
1777 res.json({
1778 url: result.url,
1779 inserted_markdown: `![${safeName}](${result.url})`,
1780 sha: result.sha,
1781 repo_path: repoFilePath,
1782 repo_private: result.isPrivate === true,
1783 });
1784 } catch (e) {
1785 console.error('[bridge] upload-image error:', e?.message);
1786 const msg = e.message || String(e);
1787 const clientErr = /not found|not connected|lacks permission|lacks repo|Reconnect|scope|remote/i.test(msg);
1788 res.status(clientErr ? 400 : 500).json({ error: msg, code: clientErr ? 'BAD_REQUEST' : 'RUNTIME_ERROR' });
1789 }
1790 }
1791 );
1792
1793 // Image proxy: serve raw.githubusercontent.com images via the stored GitHub token.
1794 // Accepts JWT via ?token= query param (browsers cannot send headers for <img> tags).
1795 const BRIDGE_IMAGE_PROXY_SIZE_LIMIT = 10 * 1024 * 1024;
1796 app.get('/api/v1/vault/image-proxy', async (req, res) => {
1797 const auth = req.headers.authorization;
1798 const headerToken = auth && auth.startsWith('Bearer ') ? auth.slice(7) : null;
1799 const queryToken = typeof req.query.token === 'string' ? req.query.token : null;
1800 const uid = (headerToken || queryToken) ? userIdFromJwt(headerToken || queryToken) : null;
1801 if (!uid) return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
1802
1803 const rawUrl = typeof req.query.url === 'string' ? req.query.url : '';
1804 if (!/^https:\/\/raw\.githubusercontent\.com\/[^/]+\/[^/]+\/.+$/i.test(rawUrl)) {
1805 return res.status(400).json({ error: 'url must be a raw.githubusercontent.com path', code: 'BAD_REQUEST' });
1806 }
1807
1808 let accessToken = '';
1809 try {
1810 const tokensByUser = await loadTokens(req.blobStore);
1811 const conn = tokensByUser[uid];
1812 if (conn?.token) accessToken = conn.token;
1813 } catch (_) {}
1814
1815 const fetchHeaders = { 'User-Agent': 'Knowtation-Hub/1.0' };
1816 if (accessToken) fetchHeaders.Authorization = `token ${accessToken}`;
1817
1818 let upstream;
1819 try {
1820 upstream = await fetch(rawUrl, { headers: fetchHeaders });
1821 } catch (e) {
1822 return res.status(502).json({ error: 'Failed to fetch image from GitHub', code: 'UPSTREAM_ERROR' });
1823 }
1824 if (!upstream.ok) {
1825 return res.status(upstream.status).json({ error: 'Image not found on GitHub', code: 'UPSTREAM_ERROR' });
1826 }
1827 const ct = upstream.headers.get('content-type') || '';
1828 if (!ct.startsWith('image/')) {
1829 return res.status(400).json({ error: 'URL does not point to an image', code: 'BAD_REQUEST' });
1830 }
1831 const buf = Buffer.from(await upstream.arrayBuffer());
1832 if (buf.byteLength > BRIDGE_IMAGE_PROXY_SIZE_LIMIT) {
1833 return res.status(400).json({ error: 'Image too large (max 10 MB)', code: 'BAD_REQUEST' });
1834 }
1835 res.setHeader('Content-Type', ct);
1836 res.setHeader('Content-Length', buf.byteLength);
1837 res.setHeader('Cache-Control', 'private, max-age=3600');
1838 res.setHeader('X-Content-Type-Options', 'nosniff');
1839 res.send(buf);
1840 });
1841
1842 app.post(
1843 '/api/v1/import',
1844 requireBridgeAuth,
1845 requireBridgeEditorOrAdmin,
1846 importTempDirMiddleware,
1847 bridgeImportUpload,
1848 async (req, res) => {
1849 const tempDir = req._importTempDir;
1850 try {
1851 if (!CANISTER_URL) {
1852 return res.status(503).json({ error: 'Canister not configured', code: 'SERVICE_UNAVAILABLE' });
1853 }
1854 const sourceType = req.body && req.body.source_type ? String(req.body.source_type).trim() : '';
1855 if (!IMPORT_SOURCE_TYPES.includes(sourceType)) {
1856 return res.status(400).json({
1857 error: `source_type must be one of: ${IMPORT_SOURCE_TYPES.join(', ')}`,
1858 code: 'BAD_REQUEST',
1859 });
1860 }
1861 const sheetId = req.body && req.body.spreadsheet_id ? String(req.body.spreadsheet_id).trim() : '';
1862 const sheetsRange = req.body && req.body.sheets_range ? String(req.body.sheets_range).trim() : undefined;
1863 if (sourceType === 'google-sheets') {
1864 if (!sheetId) {
1865 return res
1866 .status(400)
1867 .json({ error: 'google-sheets: spreadsheet_id is required in the multipart body', code: 'BAD_REQUEST' });
1868 }
1869 if (req.file) {
1870 return res
1871 .status(400)
1872 .json({ error: 'google-sheets: do not send a file; use spreadsheet_id only', code: 'BAD_REQUEST' });
1873 }
1874 } else if (!req.file) {
1875 return res.status(400).json({ error: 'file required', code: 'BAD_REQUEST' });
1876 }
1877 const project = req.body && req.body.project ? String(req.body.project).trim() : undefined;
1878 const outputDir = req.body && req.body.output_dir ? String(req.body.output_dir).trim() : undefined;
1879 const tagsRaw = req.body && req.body.tags ? String(req.body.tags) : '';
1880 const tags = tagsRaw ? tagsRaw.split(',').map((s) => s.trim()).filter(Boolean) : [];
1881 let inputPath = sourceType === 'google-sheets' ? sheetId : req.file.path;
1882 if (sourceType !== 'google-sheets' && req.file && req.file.originalname && req.file.originalname.toLowerCase().endsWith('.zip')) {
1883 const extractDir = path.join(tempDir, 'extracted');
1884 fs.mkdirSync(extractDir, { recursive: true });
1885 const zip = new AdmZip(req.file.path);
1886 // Zip-slip protection: every entry must resolve inside extractDir
1887 const extractDirResolved = path.resolve(extractDir) + path.sep;
1888 for (const entry of zip.getEntries()) {
1889 const entryResolved = path.resolve(extractDir, entry.entryName);
1890 if (entryResolved !== path.resolve(extractDir) && !entryResolved.startsWith(extractDirResolved)) {
1891 return res.status(400).json({ error: 'Invalid zip entry: path traversal detected', code: 'BAD_REQUEST' });
1892 }
1893 }
1894 zip.extractAllTo(extractDir, true);
1895 inputPath = extractDir;
1896 }
1897 const hctx = await resolveHostedBridgeContext(req, req.uid);
1898 if (!hctx.ok) {
1899 return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
1900 }
1901 const vaultPath = path.join(tempDir, 'vault-work');
1902 fs.mkdirSync(vaultPath, { recursive: true });
1903 const result = await runImport(sourceType, inputPath, {
1904 project,
1905 outputDir,
1906 tags,
1907 vaultPath,
1908 ...(sheetsRange ? { sheetsRange } : {}),
1909 });
1910 const importStamp = mergeProvenanceFrontmatter({}, {
1911 sub: hctx.actorUid,
1912 kind: 'import',
1913 });
1914 /** @type {{ path: string, body: string, frontmatter: Record<string, unknown> }[]} */
1915 const notesForCanister = [];
1916 for (const item of result.imported || []) {
1917 if (item.path && typeof item.path === 'string') {
1918 try {
1919 writeNote(vaultPath, item.path, { frontmatter: importStamp });
1920 const safe = resolveVaultRelativePath(vaultPath, item.path);
1921 const fullPath = path.join(vaultPath, safe);
1922 const markdownFull = fs.readFileSync(fullPath, 'utf8');
1923 const parsed = parseFrontmatterAndBody(markdownFull);
1924 const fm =
1925 parsed.frontmatter && typeof parsed.frontmatter === 'object' && !Array.isArray(parsed.frontmatter)
1926 ? /** @type {Record<string, unknown>} */ ({ ...parsed.frontmatter })
1927 : {};
1928 notesForCanister.push({
1929 path: safe.replace(/\\/g, '/'),
1930 body: parsed.body || '',
1931 frontmatter: fm,
1932 });
1933 } catch (e) {
1934 console.error('[bridge] import prepare note for canister failed for', item.path, e?.message || e);
1935 return res.status(502).json({
1936 error: e.message || 'Canister write failed',
1937 code: 'BAD_GATEWAY',
1938 });
1939 }
1940 }
1941 }
1942 try {
1943 await postNotesBatchToCanister(
1944 hctx.effectiveCanisterUid,
1945 hctx.actorUid,
1946 hctx.vaultId,
1947 notesForCanister,
1948 );
1949 } catch (e) {
1950 console.error('[bridge] import canister batch write failed', e?.message || e);
1951 return res.status(502).json({
1952 error: e.message || 'Canister write failed',
1953 code: 'BAD_GATEWAY',
1954 });
1955 }
1956 return res.json({ imported: result.imported, count: result.count });
1957 } catch (e) {
1958 const msg = e.message || String(e);
1959 const clientError =
1960 /OPENAI_API_KEY|required for transcription|Unsupported format|file not found|not found:|Transcription failed|413|Payload Too Large|25MB|Whisper accepts|Only https|blocked|private IP|timed out|exceeds \d+ bytes|Invalid URL|URL is required|Extract mode requires|Could not extract|DNS resolution failed|Too many redirects|non-https/i.test(
1961 msg,
1962 );
1963 res.status(clientError ? 400 : 500).json({
1964 error: msg,
1965 code: clientError ? 'BAD_REQUEST' : 'RUNTIME_ERROR',
1966 });
1967 } finally {
1968 if (tempDir && fs.existsSync(tempDir)) {
1969 try {
1970 fs.rmSync(tempDir, { recursive: true, force: true });
1971 } catch (_) {}
1972 }
1973 }
1974 },
1975 );
1976
1977 /**
1978 * @param {unknown} raw
1979 * @returns {'auto' | 'bookmark' | 'extract'}
1980 */
1981 function normalizeBridgeImportUrlMode(raw) {
1982 const s = typeof raw === 'string' ? raw.trim().toLowerCase() : '';
1983 if (s === 'bookmark' || s === 'extract' || s === 'auto') return s;
1984 return 'auto';
1985 }
1986
1987 /**
1988 * @param {unknown} body
1989 * @returns {string[]}
1990 */
1991 function tagsFromBridgeImportUrlBody(body) {
1992 const t = body && body.tags;
1993 if (Array.isArray(t)) return t.map((x) => String(x).trim()).filter(Boolean);
1994 if (typeof t === 'string') return t.split(',').map((s) => s.trim()).filter(Boolean);
1995 return [];
1996 }
1997
1998 app.post('/api/v1/import-url', requireBridgeAuth, requireBridgeEditorOrAdmin, async (req, res) => {
1999 const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'knowtation-bridge-import-url-'));
2000 try {
2001 if (!CANISTER_URL) {
2002 return res.status(503).json({ error: 'Canister not configured', code: 'SERVICE_UNAVAILABLE' });
2003 }
2004 const body = req.body && typeof req.body === 'object' ? req.body : {};
2005 const urlStr = typeof body.url === 'string' ? body.url.trim() : '';
2006 if (!urlStr) return res.status(400).json({ error: 'url required', code: 'BAD_REQUEST' });
2007 const urlMode = normalizeBridgeImportUrlMode(body.mode);
2008 const project = body.project != null && String(body.project).trim() !== '' ? String(body.project).trim() : undefined;
2009 const outputDir =
2010 body.output_dir != null && String(body.output_dir).trim() !== '' ? String(body.output_dir).trim() : undefined;
2011 const tags = tagsFromBridgeImportUrlBody(body);
2012
2013 const hctx = await resolveHostedBridgeContext(req, req.uid);
2014 if (!hctx.ok) {
2015 return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
2016 }
2017 const vaultPath = path.join(tempDir, 'vault-work');
2018 fs.mkdirSync(vaultPath, { recursive: true });
2019 const result = await runImport('url', urlStr, { project, outputDir, tags, vaultPath, urlMode });
2020 const importStamp = mergeProvenanceFrontmatter({}, {
2021 sub: hctx.actorUid,
2022 kind: 'import',
2023 });
2024 /** @type {{ path: string, body: string, frontmatter: Record<string, unknown> }[]} */
2025 const notesForCanister = [];
2026 for (const item of result.imported || []) {
2027 if (item.path && typeof item.path === 'string') {
2028 try {
2029 writeNote(vaultPath, item.path, { frontmatter: importStamp });
2030 const safe = resolveVaultRelativePath(vaultPath, item.path);
2031 const fullPath = path.join(vaultPath, safe);
2032 const markdownFull = fs.readFileSync(fullPath, 'utf8');
2033 const parsed = parseFrontmatterAndBody(markdownFull);
2034 const fm =
2035 parsed.frontmatter && typeof parsed.frontmatter === 'object' && !Array.isArray(parsed.frontmatter)
2036 ? /** @type {Record<string, unknown>} */ ({ ...parsed.frontmatter })
2037 : {};
2038 notesForCanister.push({
2039 path: safe.replace(/\\/g, '/'),
2040 body: parsed.body || '',
2041 frontmatter: fm,
2042 });
2043 } catch (e) {
2044 console.error('[bridge] import-url prepare note for canister failed for', item.path, e?.message || e);
2045 return res.status(502).json({
2046 error: e.message || 'Canister write failed',
2047 code: 'BAD_GATEWAY',
2048 });
2049 }
2050 }
2051 }
2052 try {
2053 await postNotesBatchToCanister(hctx.effectiveCanisterUid, hctx.actorUid, hctx.vaultId, notesForCanister);
2054 } catch (e) {
2055 console.error('[bridge] import-url canister batch write failed', e?.message || e);
2056 return res.status(502).json({
2057 error: e.message || 'Canister write failed',
2058 code: 'BAD_GATEWAY',
2059 });
2060 }
2061 return res.json({ imported: result.imported, count: result.count });
2062 } catch (e) {
2063 const msg = e.message || String(e);
2064 const clientError =
2065 /OPENAI_API_KEY|required for transcription|Unsupported format|file not found|not found:|Transcription failed|413|Payload Too Large|25MB|Whisper accepts|Only https|blocked|private IP|timed out|exceeds \d+ bytes|Invalid URL|URL is required|Extract mode requires|Could not extract|DNS resolution failed|Too many redirects|non-https/i.test(
2066 msg,
2067 );
2068 res.status(clientError ? 400 : 500).json({
2069 error: msg,
2070 code: clientError ? 'BAD_REQUEST' : 'RUNTIME_ERROR',
2071 });
2072 } finally {
2073 if (tempDir && fs.existsSync(tempDir)) {
2074 try {
2075 fs.rmSync(tempDir, { recursive: true, force: true });
2076 } catch (_) {}
2077 }
2078 }
2079 });
2080
2081 // ——— Index + Search (hosted: indexer runs in bridge, canister does not run Node) ———
2082 // BATCH_EMBED + INDEXER_EMBED_CONCURRENCY together drive how much wall time the index
2083 // step takes against Netlify's 60 s sync-function cap. With DeepInfra (BAAI/bge-large-en-v1.5)
2084 // per-batch latencies trending 2.5 – 8.5 s, the previous serial loop at BATCH=10 ran ~65 – 80 s
2085 // for a 251-chunk vault and got killed. Defaults below (BATCH=50, CONCURRENCY=5) bring that
2086 // same vault under ~10 – 15 s when a full re-embed is needed; the content-hash cache below
2087 // makes subsequent re-indexes a few seconds regardless of vault size.
2088 const BATCH_EMBED_DEFAULT = parseEmbedBatchSize(process.env.INDEXER_EMBED_BATCH_SIZE);
2089 const EMBED_CONCURRENCY_DEFAULT = parseEmbedConcurrency(process.env.INDEXER_EMBED_CONCURRENCY);
2090 const BATCH_UPSERT = 50;
2091 const SYNC_BUDGET_SECONDS = parseSyncBudgetSeconds(process.env.INDEXER_SYNC_BUDGET_SECONDS);
2092 const MAX_SYNC_CHUNKS = parseMaxSyncChunks(process.env.INDEXER_MAX_SYNC_CHUNKS);
2093
2094 /**
2095 * Kick off the `bridge-index-background` Netlify Function. Used by the
2096 * synchronous `POST /api/v1/index` handler when the preflight estimate exceeds
2097 * the sync budget (`SYNC_BUDGET_SECONDS`) or the chunk-count safety net
2098 * (`MAX_SYNC_CHUNKS`). The background function returns 202 instantly and runs
2099 * up to 15 min in a separate Lambda; this fetch only waits for that 202 so the
2100 * sync handler can return its own 202 to the browser without blocking on the
2101 * actual embed work.
2102 *
2103 * Two layers of auth on the inbound side (see `lib/bridge-internal-hmac.mjs`):
2104 * 1. The user JWT is forwarded verbatim so the background function still
2105 * runs `requireBridgeAuth` and the user must be a real authenticated user.
2106 * 2. An HMAC signature over (canisterUid, vaultId, jobId, ts) signed with
2107 * `SESSION_SECRET` proves the request originated from this sync handler
2108 * (the background URL is publicly addressable on Netlify).
2109 */
2110 async function kickOffBackgroundIndex(req, jobId, canisterUid, vaultId) {
2111 if (!SESSION_SECRET) {
2112 throw new Error(
2113 'kickOffBackgroundIndex: SESSION_SECRET is not set; cannot sign internal request',
2114 );
2115 }
2116 const ts = Date.now();
2117 const sig = signInternalRequest(SESSION_SECRET, { canisterUid, vaultId, jobId, ts });
2118 const protocol =
2119 req.protocol ||
2120 (req.headers['x-forwarded-proto'] && String(req.headers['x-forwarded-proto']).split(',')[0]) ||
2121 'https';
2122 const host = (req.get && req.get('host')) || req.headers.host;
2123 if (!host) {
2124 throw new Error('kickOffBackgroundIndex: cannot determine host header for background URL');
2125 }
2126 const url = `${protocol}://${host}/.netlify/functions/bridge-index-background`;
2127 const auth = req.headers.authorization || '';
2128 // Background functions on Netlify return 202 within ~50–100 ms regardless of
2129 // what the function body does; we only await that 202 so the sync handler
2130 // can immediately return its own 202 to the browser.
2131 //
2132 // CRITICAL (May 2026 hotfix): we MUST inspect `response.status`. fetch()
2133 // resolves successfully on 4xx/5xx responses (it only throws on network
2134 // errors), so without this check a non-202 response (function not deployed,
2135 // wrong host header, HMAC misconfiguration, future routing bug, etc.) would
2136 // be silently treated as success — the sync handler would return
2137 // `202 status:"background"` to the browser while no work runs in the
2138 // background. The job lock would then sit for its full 16-min TTL blocking
2139 // any retry. See `lib/bridge-index-kickoff-response.mjs` for full context
2140 // and the failure-mode test matrix in
2141 // `test/bridge-index-kickoff-response.test.mjs`.
2142 const response = await fetch(url, {
2143 method: 'POST',
2144 headers: {
2145 'content-type': 'application/json',
2146 authorization: auth,
2147 'x-vault-id': vaultId,
2148 'x-bridge-internal-uid': canisterUid,
2149 'x-bridge-internal-vault-id': vaultId,
2150 'x-bridge-internal-job-id': jobId,
2151 'x-bridge-internal-ts': String(ts),
2152 'x-bridge-internal-sig': sig,
2153 },
2154 body: '{}',
2155 });
2156 let body = '';
2157 try {
2158 body = await response.text();
2159 } catch (_) {
2160 // body read failure is non-fatal here — the helper accepts undefined body
2161 // and the status code alone is sufficient to detect the failure mode.
2162 }
2163 assertBackgroundKickoffOk(response, body);
2164 }
2165
2166 /**
2167 * Read-only snapshot of "is the index for this vault currently being rebuilt
2168 * in the background, and when did it last finish successfully?". The Hub UI
2169 * polls this on page load to render `Last indexed: 2 minutes ago` next to the
2170 * Re-index button and to disable the button while a background job is live.
2171 *
2172 * Same auth + scope as `POST /api/v1/index`: the user must be authenticated
2173 * AND have the vault in their effective hosted-bridge context.
2174 */
2175 app.get('/api/v1/index/status', requireBridgeAuth, async (req, res) => {
2176 const hctx = await resolveHostedBridgeContext(req, req.uid);
2177 if (!hctx.ok) return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
2178 const canisterUid = hctx.effectiveCanisterUid;
2179 const vaultId = sanitizeVaultId(req.headers['x-vault-id']);
2180 const lastIndexed = req.blobStore
2181 ? await getLastIndexedAt(req.blobStore, { canisterUid, vaultId })
2182 : null;
2183 const jobLock = req.blobStore
2184 ? await peekJobLock(req.blobStore, { canisterUid, vaultId })
2185 : null;
2186 const inProgress =
2187 jobLock != null &&
2188 Number.isFinite(jobLock.expiresAt) &&
2189 jobLock.expiresAt > Date.now();
2190 res.json({
2191 lastIndexed,
2192 inProgress,
2193 job: inProgress ? jobLock : null,
2194 });
2195 });
2196
2197 app.post('/api/v1/index', requireBridgeAuth, requireBridgeEditorOrAdmin, async (req, res) => {
2198 const uid = req.uid;
2199 const earlyVaultId = sanitizeVaultId(req.headers['x-vault-id']);
2200 const timer = createIndexTimer({ vaultId: earlyVaultId, canisterUid: null });
2201 const hctx = await resolveHostedBridgeContext(req, uid);
2202 timer.step('resolve_context', { ok: hctx.ok });
2203 if (!hctx.ok) {
2204 timer.finish({ ok: false, phase: 'resolve_context', status: hctx.status, code: hctx.code });
2205 return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
2206 }
2207 const canisterUid = hctx.effectiveCanisterUid;
2208 const vaultId = sanitizeVaultId(req.headers['x-vault-id']);
2209 let exportRes;
2210 try {
2211 exportRes = await fetch(CANISTER_URL + '/api/v1/export', {
2212 method: 'GET',
2213 headers: canisterHeaders({ 'X-User-Id': canisterUid, 'X-Vault-Id': vaultId }),
2214 });
2215 } catch (e) {
2216 timer.finish({ ok: false, phase: 'canister_export_fetch', error: e?.message || String(e) });
2217 return res.status(502).json({ error: 'Could not reach canister', code: 'BAD_GATEWAY' });
2218 }
2219 if (!exportRes.ok) {
2220 timer.finish({ ok: false, phase: 'canister_export_status', status: exportRes.status });
2221 return res.status(502).json({ error: 'Canister export failed', code: 'BAD_GATEWAY', status: exportRes.status });
2222 }
2223 let vault;
2224 try {
2225 vault = await exportRes.json();
2226 } catch (_) {
2227 timer.finish({ ok: false, phase: 'canister_export_parse' });
2228 return res.status(502).json({ error: 'Invalid canister response', code: 'BAD_GATEWAY' });
2229 }
2230 let notes = vault.notes || [];
2231 timer.step('canister_export', { note_count: notes.length, scoped: Boolean(hctx.scope) });
2232 if (hctx.scope) {
2233 notes = applyScopeFilterToNotes(notes, hctx.scope);
2234 timer.step('scope_filter', { note_count_after: notes.length });
2235 }
2236 try {
2237 if (!globalThis.__knowtation_bridge_embed_logged) {
2238 globalThis.__knowtation_bridge_embed_logged = true;
2239 const c = getBridgeEmbeddingConfig();
2240 const hasOpenAiKey = Boolean(
2241 process.env.OPENAI_API_KEY && String(process.env.OPENAI_API_KEY).trim(),
2242 );
2243 console.log(
2244 '[bridge] embedding (no secrets):',
2245 JSON.stringify({
2246 provider: c.provider,
2247 model: c.model,
2248 ollama_url_set: Boolean(process.env.OLLAMA_URL && String(process.env.OLLAMA_URL).trim()),
2249 openai_key_set: hasOpenAiKey,
2250 }),
2251 );
2252 }
2253 const { chunkNote } = await import('../../lib/chunk.mjs');
2254 const { embedWithUsage, embeddingDimension } = await import('../../lib/embedding.mjs');
2255 const { createVectorStore } = await import('../../lib/vector-store.mjs');
2256 timer.step('import_modules');
2257
2258 const vectorsDir = await getVectorsDirForUser(req, canisterUid);
2259 const storeConfig = getBridgeStoreConfig(canisterUid, vectorsDir);
2260 timer.step('get_vectors_dir');
2261 const chunkOpts = resolveIndexerChunkOptions(process.env, storeConfig.embedding);
2262 const allChunks = [];
2263 for (const n of notes) {
2264 const note = {
2265 body: n.body || '',
2266 path: n.path || 'note.md',
2267 project: undefined,
2268 tags: [],
2269 date: undefined,
2270 };
2271 const chunks = chunkNote(note, chunkOpts);
2272 for (const c of chunks) allChunks.push(c);
2273 }
2274 // Tag every chunk with a versioned content hash + the namespaced store id so the
2275 // sqlite-vec backend's `getChunkHashes(vaultId)` lookup keys line up. The hash prefix
2276 // includes the active provider+model so a same-dimension model swap (e.g. BGE-large 1024
2277 // → BGE-m3 1024) automatically invalidates the cache instead of silently keeping stale
2278 // vectors. See `lib/chunk-content-hash.mjs:computeChunkContentHashTagged`.
2279 const embeddingConfigForHash = storeConfig.embedding;
2280 const chunksWithHash = allChunks.map((chunk) => ({
2281 chunk,
2282 storeId: `${vaultId}::${chunk.id}`,
2283 contentHash: computeChunkContentHashTagged(chunk, embeddingConfigForHash),
2284 }));
2285 timer.step('chunk_notes', { chunk_count: allChunks.length });
2286
2287 const dim = embeddingDimension(storeConfig.embedding);
2288 const store = await createVectorStore(storeConfig);
2289 await store.ensureCollection(dim);
2290 timer.step('ensure_collection', { dim });
2291
2292 // Empty vault: drop everything for this vault and persist (covers note-deletion case).
2293 if (chunksWithHash.length === 0) {
2294 let vectors_deleted = 0;
2295 if (typeof store.deleteByVaultId === 'function') {
2296 vectors_deleted = await store.deleteByVaultId(vaultId);
2297 }
2298 timer.step('delete_old_vectors_empty', { vectors_deleted });
2299 await persistVectorsToBlob(req, canisterUid, vectorsDir);
2300 timer.step('persist_vectors_empty');
2301 // Sidecar update so the Hub UI's "Last indexed" line stays correct even after
2302 // an all-notes-deleted re-index (notes=0 is a legitimate steady state).
2303 if (req.blobStore) {
2304 try {
2305 await setLastIndexedAt(req.blobStore, {
2306 canisterUid,
2307 vaultId,
2308 actorUid: sanitizeUserId(uid),
2309 notesProcessed: notes.length,
2310 chunksIndexed: 0,
2311 chunksEmbedded: 0,
2312 chunksSkippedCached: 0,
2313 vectorsDeleted: vectors_deleted,
2314 embeddingInputTokens: 0,
2315 durationMs: timer.totalMs(),
2316 mode: req.bridgeInternalRequest != null ? 'background' : 'sync',
2317 provider: storeConfig.embedding?.provider || null,
2318 model: storeConfig.embedding?.model || null,
2319 });
2320 } catch (sidecarErr) {
2321 // Sidecar write failure must not fail the index — UI just falls back to "never indexed".
2322 console.warn('[bridge] setLastIndexedAt failed (empty path):', sidecarErr?.message || sidecarErr);
2323 }
2324 }
2325 // If this is a background-mode invocation, release the lock so subsequent
2326 // re-indexes are not falsely blocked. Use expectedJobId so a stale background
2327 // function (whose lock has since been overwritten) cannot clobber a newer one.
2328 if (req.bridgeInternalRequest != null && req.blobStore) {
2329 try {
2330 await releaseJobLock(req.blobStore, {
2331 canisterUid,
2332 vaultId,
2333 expectedJobId: req.bridgeInternalRequest.jobId,
2334 });
2335 } catch (lockErr) {
2336 console.warn('[bridge] releaseJobLock failed (empty path):', lockErr?.message || lockErr);
2337 }
2338 }
2339 console.log(
2340 '[bridge] index',
2341 JSON.stringify({
2342 vault_id: vaultId,
2343 canister_uid: sanitizeUserId(canisterUid),
2344 notes_processed: notes.length,
2345 chunks_indexed: 0,
2346 vectors_deleted,
2347 chunks_skipped_cached: 0,
2348 }),
2349 );
2350 timer.finish({
2351 ok: true,
2352 notes_processed: notes.length,
2353 chunks_indexed: 0,
2354 vectors_deleted,
2355 chunks_skipped_cached: 0,
2356 });
2357 return res.json({
2358 ok: true,
2359 notesProcessed: notes.length,
2360 chunksIndexed: 0,
2361 embedding_input_tokens: 0,
2362 vectors_deleted,
2363 chunksSkippedCached: 0,
2364 });
2365 }
2366
2367 // Content-hash cache lookup. If the store doesn't expose getChunkHashes (older
2368 // backend or test mock), treat every chunk as cache miss — correct, just slower.
2369 let existingHashes = new Map();
2370 if (typeof store.getChunkHashes === 'function') {
2371 try {
2372 existingHashes = await store.getChunkHashes(vaultId);
2373 } catch (e) {
2374 console.warn(
2375 '[bridge] getChunkHashes failed; falling back to full re-embed for this vault:',
2376 e?.message || e,
2377 );
2378 existingHashes = new Map();
2379 }
2380 }
2381 const partitioned = partitionChunksForReindex(chunksWithHash, existingHashes);
2382 const toEmbed = partitioned.toEmbed;
2383 const chunks_skipped_cached = partitioned.skippedCachedCount;
2384 timer.step('cache_lookup', {
2385 cache_size: existingHashes.size,
2386 chunks_total: chunksWithHash.length,
2387 chunks_skipped_cached,
2388 chunks_to_embed: toEmbed.length,
2389 orphan_count: partitioned.orphanIds.length,
2390 });
2391
2392 // —— Auto-routing: sync vs background ——
2393 // The bridge runs as a Netlify synchronous function (60 s platform max). After the
2394 // OpenAI(1536)→DeepInfra(1024) switch, a 251-chunk full re-embed costs ~10–15 s
2395 // and a 1 500-chunk re-embed pushes past 30 s. To keep the snappy UX for the 99 %
2396 // case (small delta or cache-hit) AND eliminate timeout risk for the 1 % case
2397 // (first-time index, dim migration, big import), we estimate the embed wall-clock
2398 // here and either (a) continue inline OR (b) hand the work to the
2399 // `bridge-index-background` Netlify Function (15-min cap).
2400 //
2401 // The background path itself re-enters this same handler via
2402 // `req.bridgeInternalRequest` (set by the wrapper after HMAC verification); when
2403 // that's truthy we SKIP the routing decision and execute inline regardless of size.
2404 const isInternalBackgroundRequest = req.bridgeInternalRequest != null;
2405 const embeddingConfig = storeConfig.embedding;
2406 const BATCH_EMBED = BATCH_EMBED_DEFAULT;
2407 const EMBED_CONCURRENCY = EMBED_CONCURRENCY_DEFAULT;
2408 if (!isInternalBackgroundRequest && toEmbed.length > 0) {
2409 const estimatedSeconds = estimateEmbedSeconds({
2410 chunksToEmbed: toEmbed.length,
2411 batchSize: BATCH_EMBED,
2412 concurrency: EMBED_CONCURRENCY,
2413 });
2414 // `isFirstIndex` covers BOTH a true first-time index (no prior cache rows) AND
2415 // the post-dim-migration state where `ensureCollection` just dropped + recreated
2416 // the table (so `getChunkHashes` returned empty). Both require a full re-embed
2417 // and both should route to background regardless of estimate.
2418 const isFirstIndex = existingHashes.size === 0;
2419 const decision = shouldUseBackgroundIndex({
2420 chunksToEmbed: toEmbed.length,
2421 estimatedSeconds,
2422 syncBudgetSeconds: SYNC_BUDGET_SECONDS,
2423 maxSyncChunks: MAX_SYNC_CHUNKS,
2424 isFirstIndex,
2425 });
2426 timer.step('routing_decision', {
2427 chunks_to_embed: toEmbed.length,
2428 estimated_seconds: estimatedSeconds,
2429 is_first_index: isFirstIndex,
2430 sync_budget_seconds: SYNC_BUDGET_SECONDS,
2431 max_sync_chunks: MAX_SYNC_CHUNKS,
2432 decision: decision.shouldUseBackground ? 'background' : 'sync',
2433 reason: decision.reason,
2434 });
2435 if (decision.shouldUseBackground) {
2436 if (!req.blobStore) {
2437 // No Blob store available (local self-host without Netlify Blobs): we cannot
2438 // safely run the background path because lock + sidecar persistence would be
2439 // lost. Fall through to sync — local self-host is single-tenant and operators
2440 // can tolerate a longer wait.
2441 timer.step('routing_fallback_no_blobstore');
2442 } else {
2443 const lockResult = await acquireJobLock(req.blobStore, {
2444 canisterUid,
2445 vaultId,
2446 actorUid: sanitizeUserId(uid),
2447 chunksToEmbed: toEmbed.length,
2448 estimatedSeconds,
2449 reason: decision.reason,
2450 });
2451 if (!lockResult.acquired) {
2452 timer.finish({
2453 ok: true,
2454 phase: 'background_already_running',
2455 existing_job_id: lockResult.existing?.jobId || null,
2456 });
2457 return res.status(409).json({
2458 status: 'already_running',
2459 message:
2460 'A background re-index is already running for this vault. Refresh in a minute.',
2461 jobId: lockResult.existing?.jobId || null,
2462 startedAt: lockResult.existing?.startedAt || null,
2463 });
2464 }
2465 try {
2466 await kickOffBackgroundIndex(req, lockResult.jobId, canisterUid, vaultId);
2467 } catch (kickoffErr) {
2468 // Kickoff failed (network blip, missing SESSION_SECRET, etc.) — release the
2469 // lock so the user can retry, and surface the error.
2470 await releaseJobLock(req.blobStore, {
2471 canisterUid,
2472 vaultId,
2473 expectedJobId: lockResult.jobId,
2474 });
2475 timer.finish({
2476 ok: false,
2477 phase: 'background_kickoff',
2478 error: kickoffErr?.message || String(kickoffErr),
2479 });
2480 return res.status(502).json({
2481 error: 'Could not start background re-index',
2482 code: 'BACKGROUND_KICKOFF_FAILED',
2483 message: kickoffErr?.message || String(kickoffErr),
2484 });
2485 }
2486 timer.finish({
2487 ok: true,
2488 phase: 'background_started',
2489 job_id: lockResult.jobId,
2490 chunks_to_embed: toEmbed.length,
2491 estimated_seconds: estimatedSeconds,
2492 reason: decision.reason,
2493 });
2494 return res.status(202).json({
2495 status: 'background',
2496 jobId: lockResult.jobId,
2497 message:
2498 'Large re-index started in the background. Refresh in 1–2 minutes — search will use the new vectors as soon as the job finishes.',
2499 estimatedSeconds,
2500 chunksToEmbed: toEmbed.length,
2501 reason: decision.reason,
2502 });
2503 }
2504 }
2505 }
2506 // —— end auto-routing ——
2507
2508 const embedBatches = [];
2509 for (let i = 0; i < toEmbed.length; i += BATCH_EMBED) {
2510 embedBatches.push(toEmbed.slice(i, i + BATCH_EMBED));
2511 }
2512 let embedding_input_tokens = 0;
2513 let embed_total_ms = 0;
2514 let embed_max_batch_ms = 0;
2515 let embed_min_batch_ms = embedBatches.length > 0 ? Number.POSITIVE_INFINITY : 0;
2516 const embedBatchCount = embedBatches.length;
2517 // Result vectors keyed by toEmbed index (preserves order so the upsert step can
2518 // zip vectors[i] back to toEmbed[i].chunk without depending on completion order).
2519 const embedResults = await runWithConcurrency(
2520 embedBatches.map((batch, batchIndex) => async () => {
2521 const texts = batch.map((item) => item.chunk.text);
2522 const { vectors: batchVectors, embedding_input_tokens: batchTok } = await embedWithUsage(
2523 texts,
2524 embeddingConfig,
2525 { voyageInputType: 'document' },
2526 );
2527 return { batchIndex, batchVectors, batchTok };
2528 }),
2529 {
2530 concurrency: EMBED_CONCURRENCY,
2531 onSettled: ({ index, ok, ms, error }) => {
2532 if (!ok) {
2533 timer.step('embed_batch_error', {
2534 batch_index: index,
2535 embed_ms: ms,
2536 error: error?.message || String(error),
2537 });
2538 return;
2539 }
2540 embed_total_ms += ms;
2541 if (ms > embed_max_batch_ms) embed_max_batch_ms = ms;
2542 if (ms < embed_min_batch_ms) embed_min_batch_ms = ms;
2543 timer.step('embed_batch', {
2544 batch_index: index,
2545 batch_size: embedBatches[index].length,
2546 embed_ms: ms,
2547 });
2548 },
2549 },
2550 );
2551 const vectorsByEmbedIndex = new Array(toEmbed.length);
2552 for (const { batchIndex, batchVectors, batchTok } of embedResults) {
2553 embedding_input_tokens += batchTok;
2554 const start = batchIndex * BATCH_EMBED;
2555 const batch = embedBatches[batchIndex];
2556 for (let j = 0; j < batch.length; j++) {
2557 vectorsByEmbedIndex[start + j] = batchVectors[j] || [];
2558 }
2559 }
2560 timer.step('embed_total', {
2561 batches: embedBatchCount,
2562 embed_total_ms,
2563 embed_avg_batch_ms: embedBatchCount > 0 ? Math.round(embed_total_ms / embedBatchCount) : 0,
2564 embed_min_batch_ms: embed_min_batch_ms === Number.POSITIVE_INFINITY ? 0 : embed_min_batch_ms,
2565 embed_max_batch_ms,
2566 embedding_input_tokens,
2567 concurrency: EMBED_CONCURRENCY,
2568 batch_size: BATCH_EMBED,
2569 provider: embeddingConfig?.provider || null,
2570 model: embeddingConfig?.model || null,
2571 });
2572
2573 // Orphans = chunk_ids in the store but not in the current export (deleted/renamed notes).
2574 let vectors_deleted = 0;
2575 if (partitioned.orphanIds.length > 0 && typeof store.deleteByChunkIds === 'function') {
2576 vectors_deleted = await store.deleteByChunkIds(partitioned.orphanIds);
2577 } else if (
2578 partitioned.orphanIds.length === 0 &&
2579 existingHashes.size === 0 &&
2580 typeof store.deleteByVaultId === 'function'
2581 ) {
2582 // First run for this vault (no prior cache rows) — clear any leftover rows that
2583 // lacked content_hash but might still match the vault, so search cannot return paths
2584 // that no longer exist in the export.
2585 vectors_deleted = await store.deleteByVaultId(vaultId);
2586 }
2587 timer.step('delete_old_vectors', {
2588 vectors_deleted,
2589 orphan_count: partitioned.orphanIds.length,
2590 });
2591
2592 let upsert_total_ms = 0;
2593 const upsertBatchCount = Math.ceil(toEmbed.length / BATCH_UPSERT);
2594 for (let i = 0; i < toEmbed.length; i += BATCH_UPSERT) {
2595 const slice = toEmbed.slice(i, i + BATCH_UPSERT);
2596 const points = slice.map((item, j) => ({
2597 id: item.storeId,
2598 vector: vectorsByEmbedIndex[i + j] || [],
2599 text: item.chunk.text,
2600 path: item.chunk.path,
2601 vault_id: vaultId,
2602 project: item.chunk.project,
2603 tags: item.chunk.tags,
2604 date: item.chunk.date,
2605 causal_chain_id: item.chunk.causal_chain_id,
2606 entity: item.chunk.entity,
2607 episode_id: item.chunk.episode_id,
2608 content_hash: item.contentHash,
2609 }));
2610 const upsertStart = Date.now();
2611 await store.upsert(points);
2612 upsert_total_ms += Date.now() - upsertStart;
2613 }
2614 timer.step('upsert_total', {
2615 batches: upsertBatchCount,
2616 upsert_total_ms,
2617 upsert_avg_batch_ms: upsertBatchCount > 0 ? Math.round(upsert_total_ms / upsertBatchCount) : 0,
2618 points_upserted: toEmbed.length,
2619 });
2620 await persistVectorsToBlob(req, canisterUid, vectorsDir);
2621 timer.step('persist_vectors');
2622 // Sidecar update so the Hub UI's "Last indexed" line is correct after BOTH
2623 // the synchronous and the background path. The same record format is read
2624 // by `GET /api/v1/index/status` and rendered next to the Re-index button.
2625 if (req.blobStore) {
2626 try {
2627 await setLastIndexedAt(req.blobStore, {
2628 canisterUid,
2629 vaultId,
2630 actorUid: sanitizeUserId(uid),
2631 notesProcessed: notes.length,
2632 chunksIndexed: allChunks.length,
2633 chunksEmbedded: toEmbed.length,
2634 chunksSkippedCached: chunks_skipped_cached,
2635 vectorsDeleted: vectors_deleted,
2636 embeddingInputTokens: embedding_input_tokens,
2637 durationMs: timer.totalMs(),
2638 mode: req.bridgeInternalRequest != null ? 'background' : 'sync',
2639 provider: embeddingConfig?.provider || null,
2640 model: embeddingConfig?.model || null,
2641 });
2642 } catch (sidecarErr) {
2643 // Sidecar write failure must not fail the index — UI just falls back to "never indexed".
2644 console.warn('[bridge] setLastIndexedAt failed:', sidecarErr?.message || sidecarErr);
2645 }
2646 }
2647 // Background path: release the job lock so a future re-index is not falsely blocked.
2648 // `expectedJobId` ensures we only release OUR lock — if a stale background job
2649 // finishes after a fresh background job has already acquired a new lock (rare,
2650 // but possible if the first job exceeded the lock TTL), we leave the new lock alone.
2651 if (req.bridgeInternalRequest != null && req.blobStore) {
2652 try {
2653 await releaseJobLock(req.blobStore, {
2654 canisterUid,
2655 vaultId,
2656 expectedJobId: req.bridgeInternalRequest.jobId,
2657 });
2658 } catch (lockErr) {
2659 console.warn('[bridge] releaseJobLock failed:', lockErr?.message || lockErr);
2660 }
2661 }
2662 console.log(
2663 '[bridge] index',
2664 JSON.stringify({
2665 vault_id: vaultId,
2666 canister_uid: sanitizeUserId(canisterUid),
2667 notes_processed: notes.length,
2668 chunks_indexed: allChunks.length,
2669 chunks_skipped_cached,
2670 chunks_embedded: toEmbed.length,
2671 vectors_deleted,
2672 mode: req.bridgeInternalRequest != null ? 'background' : 'sync',
2673 }),
2674 );
2675 const indexResult = {
2676 ok: true,
2677 notesProcessed: notes.length,
2678 chunksIndexed: allChunks.length,
2679 chunksSkippedCached: chunks_skipped_cached,
2680 chunksEmbedded: toEmbed.length,
2681 embedding_input_tokens,
2682 vectors_deleted,
2683 };
2684 timer.finish({
2685 ok: true,
2686 notes_processed: notes.length,
2687 chunks_indexed: allChunks.length,
2688 chunks_skipped_cached,
2689 chunks_embedded: toEmbed.length,
2690 vectors_deleted,
2691 embedding_input_tokens,
2692 mode: req.bridgeInternalRequest != null ? 'background' : 'sync',
2693 });
2694 res.json(indexResult);
2695 fireBridgeCaptureEvent(
2696 'index',
2697 {
2698 note_count: notes.length,
2699 chunk_count: allChunks.length,
2700 chunks_skipped_cached,
2701 chunks_embedded: toEmbed.length,
2702 vectors_deleted,
2703 },
2704 sanitizeUserId(uid),
2705 vaultId,
2706 );
2707 return;
2708 } catch (e) {
2709 console.error('Bridge index error:', e);
2710 timer.finish({ ok: false, phase: 'catch', error: e?.message || String(e) });
2711 // Background path: release the lock on error so the operator can retry without
2712 // waiting for the 16-min TTL. We do this defensively regardless of whether the
2713 // error happened before or after the lock was acquired.
2714 if (req.bridgeInternalRequest != null && req.blobStore) {
2715 try {
2716 await releaseJobLock(req.blobStore, {
2717 canisterUid: req.bridgeInternalRequest.canisterUid,
2718 vaultId: req.bridgeInternalRequest.vaultId,
2719 expectedJobId: req.bridgeInternalRequest.jobId,
2720 });
2721 } catch (lockErr) {
2722 console.warn('[bridge] releaseJobLock failed (catch path):', lockErr?.message || lockErr);
2723 }
2724 }
2725 return res.status(500).json({
2726 error: 'Index failed',
2727 code: 'INTERNAL_ERROR',
2728 message: bridgeEmbedFailureMessage(e, 'index'),
2729 });
2730 }
2731 });
2732
2733 function truncateSnippet(text, maxChars = 300) {
2734 if (text == null || typeof text !== 'string') return '';
2735 const t = text.trim();
2736 if (t.length <= maxChars) return t;
2737 const slice = t.slice(0, maxChars);
2738 const lastSpace = slice.lastIndexOf(' ');
2739 return (lastSpace > maxChars / 2 ? slice.slice(0, lastSpace) : slice) + '…';
2740 }
2741
2742 /**
2743 * Batch document embeddings for hosted MCP `cluster` (and similar callers).
2744 * Auth + vault access mirror `POST /api/v1/search`: JWT in `Authorization`, `X-Vault-Id`,
2745 * `resolveHostedBridgeContext` (effective canister user + allowed vault ids + optional scope).
2746 * Embedding model/env match `POST /api/v1/index` via `getBridgeStoreConfig` + `embedWithUsage` with `voyageInputType: "document"`.
2747 */
2748 const HOSTED_EMBED_MAX_TEXTS = 200;
2749 const HOSTED_EMBED_MAX_CHARS_PER_TEXT = 1200;
2750
2751 app.post('/api/v1/embed', async (req, res) => {
2752 const auth = req.headers.authorization;
2753 const token = auth && auth.startsWith('Bearer ') ? auth.slice(7) : null;
2754 const uid = token ? userIdFromJwt(token) : null;
2755 if (!uid) {
2756 return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
2757 }
2758 const hctx = await resolveHostedBridgeContext(req, uid);
2759 if (!hctx.ok) {
2760 return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
2761 }
2762 const canisterUid = hctx.effectiveCanisterUid;
2763 const rawTexts = req.body?.texts;
2764 if (!Array.isArray(rawTexts)) {
2765 return res.status(400).json({ error: 'texts array required', code: 'BAD_REQUEST' });
2766 }
2767 const texts = rawTexts
2768 .slice(0, HOSTED_EMBED_MAX_TEXTS)
2769 .map((t) => String(t ?? '').slice(0, HOSTED_EMBED_MAX_CHARS_PER_TEXT));
2770 if (texts.length === 0) {
2771 return res.status(400).json({ error: 'texts must be non-empty', code: 'BAD_REQUEST' });
2772 }
2773 try {
2774 const { embedWithUsage } = await import('../../lib/embedding.mjs');
2775 const vectorsDir = await getVectorsDirForUser(req, canisterUid);
2776 const storeConfig = getBridgeStoreConfig(canisterUid, vectorsDir);
2777 const embeddingConfig = storeConfig.embedding;
2778 let embedding_input_tokens = 0;
2779 const vectors = [];
2780 for (let i = 0; i < texts.length; i += BATCH_EMBED) {
2781 const batch = texts.slice(i, i + BATCH_EMBED);
2782 const { vectors: batchVectors, embedding_input_tokens: batchTok } = await embedWithUsage(
2783 batch,
2784 embeddingConfig,
2785 { voyageInputType: 'document' },
2786 );
2787 embedding_input_tokens += batchTok;
2788 for (const v of batchVectors) {
2789 vectors.push(v);
2790 }
2791 }
2792 return res.json({
2793 vectors,
2794 embedding_input_tokens,
2795 texts_count: texts.length,
2796 });
2797 } catch (e) {
2798 console.error('Bridge embed batch error:', e);
2799 return res.status(500).json({
2800 error: 'Embed failed',
2801 code: 'INTERNAL_ERROR',
2802 message: bridgeEmbedFailureMessage(e, 'embed'),
2803 });
2804 }
2805 });
2806
2807 app.post('/api/v1/search', async (req, res) => {
2808 const auth = req.headers.authorization;
2809 const token = auth && auth.startsWith('Bearer ') ? auth.slice(7) : null;
2810 const uid = token ? userIdFromJwt(token) : null;
2811 if (!uid) {
2812 return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
2813 }
2814 const hctx = await resolveHostedBridgeContext(req, uid);
2815 if (!hctx.ok) {
2816 return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
2817 }
2818 const canisterUid = hctx.effectiveCanisterUid;
2819 const query = req.body?.query;
2820 // Auto-capture after successful response — fire-and-forget, does not affect latency.
2821 const _captureVaultId = sanitizeVaultId(req.headers['x-vault-id']);
2822 const _captureMode = req.body?.mode === 'keyword' ? 'keyword' : 'semantic';
2823 res.on('finish', () => {
2824 if (res.statusCode >= 200 && res.statusCode < 300 && query) {
2825 fireBridgeCaptureEvent('search', { query, mode: _captureMode }, sanitizeUserId(uid), _captureVaultId);
2826 }
2827 });
2828 if (!query || typeof query !== 'string') {
2829 return res.status(400).json({ error: 'query required', code: 'BAD_REQUEST' });
2830 }
2831 const limit = Math.max(1, Math.min(parseInt(req.body?.limit, 10) || 20, 100));
2832 const snippetChars = parseInt(req.body?.snippetChars, 10) || 300;
2833 try {
2834 const mode = req.body?.mode === 'keyword' ? 'keyword' : 'semantic';
2835 const bridgeVaultId = sanitizeVaultId(req.headers['x-vault-id']);
2836
2837 if (mode === 'keyword') {
2838 let exportRes;
2839 try {
2840 exportRes = await fetch(CANISTER_URL + '/api/v1/export', {
2841 method: 'GET',
2842 headers: canisterHeaders({ 'X-User-Id': canisterUid, 'X-Vault-Id': bridgeVaultId }),
2843 });
2844 } catch (_e) {
2845 return res.status(502).json({ error: 'Could not reach canister', code: 'BAD_GATEWAY' });
2846 }
2847 if (!exportRes.ok) {
2848 return res.status(502).json({ error: 'Canister export failed', code: 'BAD_GATEWAY', status: exportRes.status });
2849 }
2850 let vault;
2851 try {
2852 vault = await exportRes.json();
2853 } catch (_e) {
2854 return res.status(502).json({ error: 'Invalid canister response', code: 'BAD_GATEWAY' });
2855 }
2856 let rawNotes = vault.notes || [];
2857 if (hctx.scope) {
2858 rawNotes = applyScopeFilterToNotes(rawNotes, hctx.scope);
2859 }
2860 const { noteRecordFromExportPayload, keywordSearchNotesArray } = await import('../../lib/keyword-search.mjs');
2861 const { filterNotesByListOptions } = await import('../../lib/list-notes.mjs');
2862 let shaped = rawNotes.map((n) => noteRecordFromExportPayload(n));
2863 shaped = filterNotesByListOptions(shaped, {
2864 folder: req.body?.folder,
2865 project: req.body?.project,
2866 tag: req.body?.tag,
2867 since: req.body?.since,
2868 until: req.body?.until,
2869 chain: req.body?.chain,
2870 entity: req.body?.entity,
2871 episode: req.body?.episode,
2872 content_scope: req.body?.content_scope,
2873 });
2874 const fields =
2875 req.body?.fields === 'path' || req.body?.fields === 'full' ? req.body.fields : 'path+snippet';
2876 const out = keywordSearchNotesArray(shaped, query, {
2877 limit,
2878 order: req.body?.order,
2879 fields,
2880 snippetChars,
2881 match: req.body?.match === 'all_terms' ? 'all_terms' : 'phrase',
2882 countOnly: req.body?.count_only === true || req.body?.countOnly === true,
2883 });
2884 if (out.results && hctx.scope) {
2885 return res.json({ ...out, results: applyScopeFilterToNotes(out.results, hctx.scope) });
2886 }
2887 return res.json(out);
2888 }
2889
2890 const { embed } = await import('../../lib/embedding.mjs');
2891 const { createVectorStore } = await import('../../lib/vector-store.mjs');
2892 const { filterHitsByContentScope, resolveSearchFolderForContentScope } = await import('../../lib/approval-log.mjs');
2893 const { MAX_VECTOR_KNN } = await import('../../lib/vector-knn-limit.mjs');
2894
2895 const vectorsDir = await getVectorsDirForUser(req, canisterUid);
2896 const storeConfig = getBridgeStoreConfig(canisterUid, vectorsDir);
2897 const store = await createVectorStore(storeConfig);
2898 const [queryVector] = await embed([query], storeConfig.embedding, { voyageInputType: 'query' });
2899 if (!queryVector) {
2900 return res.status(500).json({ error: 'Embedding failed', code: 'INTERNAL_ERROR' });
2901 }
2902 const cs = req.body?.content_scope || 'all';
2903 const userFolder = req.body?.folder;
2904 const resolved = resolveSearchFolderForContentScope(cs, userFolder);
2905 if (resolved.impossible) {
2906 return res.json({ results: [], query, mode: 'semantic' });
2907 }
2908 let searchLimit = limit;
2909 if (resolved.wideNotesFetch) {
2910 searchLimit = Math.min(10000, Math.max(limit * 120, 2500));
2911 } else if (cs !== 'all') {
2912 searchLimit = Math.min(10000, Math.max(limit * 40, 800));
2913 }
2914 searchLimit = Math.min(searchLimit, MAX_VECTOR_KNN);
2915 const hits = await store.search(queryVector, {
2916 limit: searchLimit,
2917 vault_id: bridgeVaultId,
2918 project: req.body?.project,
2919 tag: req.body?.tag,
2920 folder: resolved.folder,
2921 since: req.body?.since,
2922 until: req.body?.until,
2923 order: req.body?.order,
2924 chain: req.body?.chain,
2925 entity: req.body?.entity,
2926 episode: req.body?.episode,
2927 });
2928 let scopedHits = filterHitsByContentScope(hits || [], cs);
2929 scopedHits = scopedHits.slice(0, limit);
2930 let results = scopedHits.map((h) => ({
2931 path: h.path,
2932 score: h.score,
2933 ...(typeof h.vec_distance === 'number' && Number.isFinite(h.vec_distance)
2934 ? { vec_distance: h.vec_distance }
2935 : {}),
2936 project: h.project ?? null,
2937 tags: h.tags ?? [],
2938 snippet: truncateSnippet(h.text, snippetChars),
2939 }));
2940 if (hctx.scope) {
2941 results = applyScopeFilterToNotes(results, hctx.scope);
2942 }
2943 return res.json({ results, query, mode: 'semantic' });
2944 } catch (e) {
2945 console.error('Bridge search error:', e);
2946 return res.status(500).json({
2947 error: 'Search failed',
2948 code: 'INTERNAL_ERROR',
2949 message: bridgeEmbedFailureMessage(e, 'search'),
2950 });
2951 }
2952 });
2953
2954 app.use((err, req, res, _next) => {
2955 if (res.headersSent) return;
2956 console.error('[bridge] unhandled error:', err?.stack || err?.message || err);
2957 let status = 500;
2958 if (err instanceof multer.MulterError) {
2959 if (err.code === 'LIMIT_FILE_SIZE') status = 413;
2960 else status = 400;
2961 } else if (typeof err.status === 'number' && err.status >= 400 && err.status < 600) {
2962 status = err.status;
2963 } else if (typeof err.statusCode === 'number' && err.statusCode >= 400 && err.statusCode < 600) {
2964 status = err.statusCode;
2965 }
2966 res.status(status).json({
2967 error: err.message || 'Internal error',
2968 code: err.code || 'INTERNAL_ERROR',
2969 });
2970 });
2971
2972 // ——— Memory endpoints (Phase 8) ———
2973
2974 /**
2975 * Fire-and-forget memory event capture for hosted bridge endpoints.
2976 * Uses Netlify Blobs when available (hosted), falls back to file-based for self-hosted.
2977 * Never throws, never delays the response.
2978 */
2979 function fireBridgeCaptureEvent(type, data, uid, vaultId) {
2980 (async () => {
2981 try {
2982 if (globalThis.__netlify_blob_store) {
2983 // Hosted: append to Netlify Blobs for durability across Lambda invocations.
2984 const { createMemoryEvent, MEMORY_EVENT_TYPES } = await import('../../lib/memory-event.mjs');
2985 if (!MEMORY_EVENT_TYPES.includes(type)) return;
2986 const event = createMemoryEvent(type, data, { vaultId: vaultId || 'default' });
2987 await blobsAppendMemoryEvent(uid, vaultId, event);
2988 } else {
2989 // Self-hosted: file-based.
2990 const { FileMemoryProvider } = await import('../../lib/memory-provider-file.mjs');
2991 const { MemoryManager } = await import('../../lib/memory.mjs');
2992 const mm = new MemoryManager(new FileMemoryProvider(bridgeMemoryDir(uid, vaultId || 'default')));
2993 if (mm.shouldCapture(type)) mm.store(type, data);
2994 }
2995 } catch (_) {}
2996 })();
2997 }
2998
2999 // ——— Calendar (hosted parity — step 12): event store on bridge DATA_DIR + notes from canister ———
3000
3001 /**
3002 * Fetches canister note metadata for calendar timeline merge. Returns an empty array when
3003 * the canister is unreachable so the events layer can still succeed.
3004 *
3005 * @param {string} canisterUid
3006 * @param {string} actorUid
3007 * @param {string} vaultId
3008 * @returns {Promise<Array<{ path: string, frontmatter: object, date?: string|null, updated?: string|null, project?: string|null, tags?: string[] }>>}
3009 */
3010 async function fetchCanisterNoteRecordsForTimeline(canisterUid, actorUid, vaultId) {
3011 if (!CANISTER_URL) return [];
3012 try {
3013 const upstream = await fetch(`${CANISTER_URL}/api/v1/notes?limit=10000&offset=0`, {
3014 headers: canisterHeaders({
3015 'x-user-id': canisterUid,
3016 'x-actor-id': actorUid,
3017 'x-vault-id': vaultId,
3018 }),
3019 });
3020 if (!upstream.ok) return [];
3021 const data = await upstream.json();
3022 if (!Array.isArray(data.notes)) return [];
3023 return data.notes
3024 .map((note) => {
3025 const path = typeof note.path === 'string' ? note.path.trim() : '';
3026 if (!path) return null;
3027 const fm = materializeListFrontmatter(note.frontmatter ?? {});
3028 return {
3029 path,
3030 frontmatter: note.frontmatter ?? {},
3031 date: typeof fm.date === 'string' ? fm.date : null,
3032 updated: typeof note.updated === 'string' ? note.updated : null,
3033 project: typeof fm.project === 'string' ? fm.project : null,
3034 tags: Array.isArray(note.tags) ? note.tags.map(String) : [],
3035 };
3036 })
3037 .filter(Boolean);
3038 } catch (_) {
3039 return [];
3040 }
3041 }
3042
3043 app.get('/api/v1/calendar/timeline', requireBridgeAuth, async (req, res) => {
3044 const from = typeof req.query.from === 'string' ? req.query.from.trim() : '';
3045 const to = typeof req.query.to === 'string' ? req.query.to.trim() : '';
3046 if (!from || !to) {
3047 return res.status(400).json({ error: '`from` and `to` are required', code: 'BAD_REQUEST' });
3048 }
3049 const hctx = await resolveHostedBridgeContext(req, req.uid);
3050 if (!hctx.ok) return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
3051 try {
3052 const noteRecords = await fetchCanisterNoteRecordsForTimeline(
3053 hctx.effectiveCanisterUid,
3054 hctx.actorUid,
3055 hctx.vaultId,
3056 );
3057 const payload = buildCalendarTimeline({
3058 dataDir: DATA_DIR,
3059 vaultId: hctx.vaultId,
3060 noteRecords,
3061 from,
3062 to,
3063 layers: req.query.layers,
3064 sourceCalendarIds: req.query.source_calendar_ids,
3065 scope: hctx.scope,
3066 });
3067 return res.json(payload);
3068 } catch (e) {
3069 const message = e?.message ? String(e.message) : 'Invalid timeline request';
3070 if (
3071 message.includes('Unsupported timeline layer')
3072 || message.includes('Invalid')
3073 || message.includes('required')
3074 || message.includes('before')
3075 ) {
3076 return res.status(400).json({ error: message, code: 'BAD_REQUEST' });
3077 }
3078 return res.status(500).json({ error: message, code: 'RUNTIME_ERROR' });
3079 }
3080 });
3081
3082 app.get('/api/v1/calendar/agent-context', requireBridgeAuth, async (req, res) => {
3083 const from = typeof req.query.from === 'string' ? req.query.from.trim() : '';
3084 const to = typeof req.query.to === 'string' ? req.query.to.trim() : '';
3085 if (!from || !to) {
3086 return res.status(400).json({ error: '`from` and `to` are required', code: 'BAD_REQUEST' });
3087 }
3088 const hctx = await resolveHostedBridgeContext(req, req.uid);
3089 if (!hctx.ok) return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
3090 try {
3091 const payload = retrieveAgentCalendarContext(DATA_DIR, hctx.vaultId, {
3092 from,
3093 to,
3094 agentContextTier: req.query.agent_context_tier,
3095 sourceCalendarIds: req.query.source_calendar_ids,
3096 });
3097 return res.json(payload);
3098 } catch (e) {
3099 const message = e?.message ? String(e.message) : 'Invalid agent context request';
3100 if (
3101 message.includes('agent_context_tier')
3102 || message.includes('Invalid')
3103 || message.includes('required')
3104 || message.includes('before')
3105 ) {
3106 return res.status(400).json({ error: message, code: 'BAD_REQUEST' });
3107 }
3108 return res.status(500).json({ error: message, code: 'RUNTIME_ERROR' });
3109 }
3110 });
3111
3112 app.get('/api/v1/calendar/source-calendars', requireBridgeAuth, async (req, res) => {
3113 const hctx = await resolveHostedBridgeSettingsContext(req, req.uid);
3114 const vaultId = sanitizeVaultId(req.headers['x-vault-id']);
3115 if (!hctx.allowedVaultIds.includes(vaultId)) {
3116 return res.status(403).json({ error: 'Access to this vault is not allowed.', code: 'FORBIDDEN' });
3117 }
3118 try {
3119 return res.json({
3120 schema: 'knowtation.source_calendars/v0',
3121 vault_id: vaultId,
3122 source_calendars: listSourceCalendarsForClient(DATA_DIR, vaultId),
3123 });
3124 } catch (e) {
3125 return res.status(500).json({ error: e.message, code: 'RUNTIME_ERROR' });
3126 }
3127 });
3128
3129 app.patch('/api/v1/calendar/source-calendars/:id', requireBridgeAuth, requireBridgeEditorOrAdmin, async (req, res) => {
3130 const sourceCalendarId = typeof req.params.id === 'string' ? decodeURIComponent(req.params.id).trim() : '';
3131 if (!sourceCalendarId) {
3132 return res.status(400).json({ error: 'source calendar id is required', code: 'BAD_REQUEST' });
3133 }
3134 const hctx = await resolveHostedBridgeContext(req, req.uid);
3135 if (!hctx.ok) return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
3136 try {
3137 const patch = parseSourceCalendarPatchBody(req.body);
3138 const result = patchSourceCalendar(DATA_DIR, hctx.vaultId, sourceCalendarId, patch);
3139 return res.json({
3140 schema: 'knowtation.source_calendar_patch/v0',
3141 vault_id: hctx.vaultId,
3142 policy_agent_context_tier_max_cap: result.policy_agent_context_tier_max_cap,
3143 source_calendar: result.source_calendar,
3144 });
3145 } catch (e) {
3146 const message = e?.message ? String(e.message) : 'Patch failed';
3147 if (e?.code === 'POLICY_CAP_EXCEEDED') {
3148 return res.status(403).json({ error: message, code: 'POLICY_CAP_EXCEEDED' });
3149 }
3150 if (message.includes('not found')) {
3151 return res.status(404).json({ error: message, code: 'NOT_FOUND' });
3152 }
3153 if (
3154 message.includes('must be')
3155 || message.includes('required')
3156 || message.includes('exceeds policy')
3157 ) {
3158 return res.status(400).json({ error: message, code: 'BAD_REQUEST' });
3159 }
3160 return res.status(500).json({ error: message, code: 'RUNTIME_ERROR' });
3161 }
3162 });
3163
3164 app.post('/api/v1/calendar/events/import', requireBridgeAuth, requireBridgeEditorOrAdmin, async (req, res) => {
3165 const hctx = await resolveHostedBridgeContext(req, req.uid);
3166 if (!hctx.ok) return res.status(hctx.status).json({ error: hctx.error, code: hctx.code });
3167 const body = req.body && typeof req.body === 'object' ? req.body : {};
3168 const icsText = typeof body.ics_text === 'string' ? body.ics_text : '';
3169 if (!icsText.trim()) {
3170 return res.status(400).json({ error: 'ics_text (string) is required', code: 'BAD_REQUEST' });
3171 }
3172 try {
3173 const result = importIcsIntoVault(DATA_DIR, hctx.vaultId, {
3174 icsText,
3175 displayName: typeof body.display_name === 'string' ? body.display_name : undefined,
3176 sourceCalendarId: typeof body.source_calendar_id === 'string' ? body.source_calendar_id : undefined,
3177 connectorId: typeof body.connector_id === 'string' ? body.connector_id : undefined,
3178 defaultTimezone: typeof body.default_timezone === 'string' ? body.default_timezone : undefined,
3179 });
3180 return res.status(200).json({
3181 schema: 'knowtation.calendar_import/v0',
3182 vault_id: hctx.vaultId,
3183 ...result,
3184 });
3185 } catch (e) {
3186 const message = e?.message ? String(e.message) : 'Import failed';
3187 if (
3188 message.includes('not found')
3189 || message.includes('required')
3190 || message.includes('exceeds')
3191 || message.includes('ICS')
3192 ) {
3193 return res.status(400).json({ error: message, code: 'BAD_REQUEST' });
3194 }
3195 return res.status(500).json({ error: message, code: 'RUNTIME_ERROR' });
3196 }
3197 });
3198
3199 function bridgeMemoryAuth(req) {
3200 const auth = req.headers.authorization;
3201 const token = auth && auth.startsWith('Bearer ') ? auth.slice(7) : null;
3202 const uid = token ? userIdFromJwt(token) : null;
3203 const vaultId = sanitizeVaultId(req.headers['x-vault-id'] || req.query.vault_id);
3204 const scope = req.query.scope === 'global' ? 'global' : 'vault';
3205 return { uid: uid ? sanitizeUserId(uid) : null, vaultId, scope };
3206 }
3207
3208 function bridgeMemoryDir(uid, vaultId, scope) {
3209 if (scope === 'global') {
3210 return path.join(DATA_DIR, 'memory', uid, '_global');
3211 }
3212 return path.join(DATA_DIR, 'memory', uid, vaultId);
3213 }
3214
3215 app.get('/api/v1/memory/:key', async (req, res) => {
3216 const { uid, vaultId } = bridgeMemoryAuth(req);
3217 if (!uid) return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
3218 try {
3219 const { FileMemoryProvider } = await import('../../lib/memory-provider-file.mjs');
3220 const { MemoryManager } = await import('../../lib/memory.mjs');
3221 const provider = new FileMemoryProvider(bridgeMemoryDir(uid, vaultId));
3222 const mm = new MemoryManager(provider);
3223 const event = mm.getLatest(req.params.key);
3224 if (!event) return res.json({ key: req.params.key, value: null, updated_at: null });
3225 res.json({ key: req.params.key, value: event.data, updated_at: event.ts, id: event.id });
3226 } catch (e) {
3227 res.status(500).json({ error: e.message, code: 'RUNTIME_ERROR' });
3228 }
3229 });
3230
3231 app.post('/api/v1/memory/store', requireBridgeAuth, requireBridgeEditorOrAdmin, express.json(), async (req, res) => {
3232 const { uid, vaultId } = bridgeMemoryAuth(req);
3233 try {
3234 const { FileMemoryProvider } = await import('../../lib/memory-provider-file.mjs');
3235 const { MemoryManager } = await import('../../lib/memory.mjs');
3236 const provider = new FileMemoryProvider(bridgeMemoryDir(uid, vaultId));
3237 const mm = new MemoryManager(provider);
3238 const { key, value, ttl } = req.body || {};
3239 if (!key || !value) return res.status(400).json({ error: 'key and value required', code: 'BAD_REQUEST' });
3240 const data = typeof value === 'object' ? { key, ...value } : { key, text: String(value) };
3241 const result = mm.store('user', data, { vaultId, ttl });
3242 res.json(result);
3243 } catch (e) {
3244 res.status(500).json({ error: e.message, code: 'RUNTIME_ERROR' });
3245 }
3246 });
3247
3248 app.get('/api/v1/memory', async (req, res) => {
3249 const { uid, vaultId } = bridgeMemoryAuth(req);
3250 if (!uid) return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
3251 try {
3252 let events;
3253 if (globalThis.__netlify_blob_store) {
3254 // Hosted: read from Blobs.
3255 events = await blobsGetMemoryEvents(uid, vaultId);
3256 if (req.query.type) events = events.filter((e) => e.type === req.query.type);
3257 if (req.query.since) events = events.filter((e) => e.ts >= req.query.since);
3258 if (req.query.until) events = events.filter((e) => e.ts <= req.query.until);
3259 events.sort((a, b) => (b.ts > a.ts ? 1 : b.ts < a.ts ? -1 : 0));
3260 events = events.slice(0, Math.min(parseInt(req.query.limit) || 20, 100));
3261 } else {
3262 const { FileMemoryProvider } = await import('../../lib/memory-provider-file.mjs');
3263 const { MemoryManager } = await import('../../lib/memory.mjs');
3264 const mm = new MemoryManager(new FileMemoryProvider(bridgeMemoryDir(uid, vaultId)));
3265 events = mm.list({
3266 type: req.query.type || undefined,
3267 since: req.query.since || undefined,
3268 until: req.query.until || undefined,
3269 limit: Math.min(parseInt(req.query.limit) || 20, 100),
3270 });
3271 }
3272 res.json({ events, count: events.length });
3273 } catch (e) {
3274 res.status(500).json({ error: e.message, code: 'RUNTIME_ERROR' });
3275 }
3276 });
3277
3278 app.post('/api/v1/memory/search', express.json(), async (req, res) => {
3279 const { uid, vaultId } = bridgeMemoryAuth(req);
3280 if (!uid) return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
3281 res.json({ results: [], count: 0, note: 'Hosted memory search requires vector provider (future).' });
3282 });
3283
3284 app.delete('/api/v1/memory/clear', requireBridgeAuth, requireBridgeEditorOrAdmin, async (req, res) => {
3285 const { uid, vaultId } = bridgeMemoryAuth(req);
3286 try {
3287 const { FileMemoryProvider } = await import('../../lib/memory-provider-file.mjs');
3288 const { MemoryManager } = await import('../../lib/memory.mjs');
3289 const provider = new FileMemoryProvider(bridgeMemoryDir(uid, vaultId));
3290 const mm = new MemoryManager(provider);
3291 const result = mm.clear({
3292 type: req.query.type || undefined,
3293 before: req.query.before || undefined,
3294 });
3295 res.json(result);
3296 } catch (e) {
3297 res.status(500).json({ error: e.message, code: 'RUNTIME_ERROR' });
3298 }
3299 });
3300
3301 app.get('/api/v1/memory-stats', async (req, res) => {
3302 const { uid, vaultId } = bridgeMemoryAuth(req);
3303 if (!uid) return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
3304 try {
3305 const { FileMemoryProvider } = await import('../../lib/memory-provider-file.mjs');
3306 const { MemoryManager } = await import('../../lib/memory.mjs');
3307 const provider = new FileMemoryProvider(bridgeMemoryDir(uid, vaultId));
3308 const mm = new MemoryManager(provider);
3309 res.json(mm.stats());
3310 } catch (e) {
3311 res.status(500).json({ error: e.message, code: 'RUNTIME_ERROR' });
3312 }
3313 });
3314
3315 // ——— Hosted Consolidation (Phase 10 / Stream 1) ———
3316
3317 // ——— Blobs-backed memory helpers (hosted path) ———
3318
3319 /** Netlify Blobs key for a user's raw memory events. */
3320 function memoryBlobKey(uid, vaultId) {
3321 return `memory/${uid}/${vaultId || 'default'}/events`;
3322 }
3323
3324 /** Load memory events from Netlify Blobs (hosted) or return [] if unavailable. */
3325 async function blobsGetMemoryEvents(uid, vaultId) {
3326 const store = globalThis.__netlify_blob_store;
3327 if (!store) return [];
3328 try {
3329 const raw = await store.get(memoryBlobKey(uid, vaultId), { type: 'text' });
3330 if (!raw) return [];
3331 return JSON.parse(raw) || [];
3332 } catch (_) { return []; }
3333 }
3334
3335 /** Persist memory events to Netlify Blobs, capped at 500 events. */
3336 async function blobsSetMemoryEvents(uid, vaultId, events) {
3337 const store = globalThis.__netlify_blob_store;
3338 if (!store) return;
3339 try {
3340 await store.set(memoryBlobKey(uid, vaultId), JSON.stringify(events.slice(-500)));
3341 } catch (_) {}
3342 }
3343
3344 /** Append a single event to Blobs memory store (read-modify-write). */
3345 async function blobsAppendMemoryEvent(uid, vaultId, event) {
3346 const events = await blobsGetMemoryEvents(uid, vaultId);
3347 events.push(event);
3348 await blobsSetMemoryEvents(uid, vaultId, events);
3349 }
3350
3351 // ——— Consolidation cost tracking ———
3352
3353 function utcDateString() {
3354 return new Date().toISOString().slice(0, 10);
3355 }
3356
3357 function utcMonthString() {
3358 return new Date().toISOString().slice(0, 7);
3359 }
3360
3361 /** Blobs key for per-user consolidation cost record. */
3362 function consolidationCostBlobKey(uid) {
3363 return `memory/${uid}/consolidation-cost`;
3364 }
3365
3366 /** Load consolidation cost record — Blobs on hosted, file on self-hosted. */
3367 async function loadConsolidationCost(uid) {
3368 const store = globalThis.__netlify_blob_store;
3369 if (store) {
3370 try {
3371 const raw = await store.get(consolidationCostBlobKey(uid), { type: 'text' });
3372 if (!raw) return {};
3373 return JSON.parse(raw) || {};
3374 } catch (_) { return {}; }
3375 }
3376 const filePath = path.join(DATA_DIR, 'consolidation', uid + '_cost.json');
3377 try {
3378 const raw = JSON.parse(fs.readFileSync(filePath, 'utf8'));
3379 return raw && typeof raw === 'object' ? raw : {};
3380 } catch (_) { return {}; }
3381 }
3382
3383 /** Persist consolidation cost record — Blobs on hosted, file on self-hosted. */
3384 async function saveConsolidationCost(uid, data) {
3385 const store = globalThis.__netlify_blob_store;
3386 if (store) {
3387 try {
3388 await store.set(consolidationCostBlobKey(uid), JSON.stringify(data));
3389 } catch (_) {}
3390 return;
3391 }
3392 const filePath = path.join(DATA_DIR, 'consolidation', uid + '_cost.json');
3393 const dir = path.dirname(filePath);
3394 if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
3395 fs.writeFileSync(filePath, JSON.stringify(data, null, 2), 'utf8');
3396 }
3397
3398 async function recordConsolidationPass(uid, costUsd) {
3399 const rec = await loadConsolidationCost(uid);
3400 const today = utcDateString();
3401 const month = utcMonthString();
3402 const prevCostDate = rec.cost_date;
3403 const prevMonth = rec.pass_month;
3404 return {
3405 last_pass: new Date().toISOString(),
3406 cost_today_usd: prevCostDate === today ? Number((rec.cost_today_usd || 0) + costUsd) : costUsd,
3407 cost_date: today,
3408 cost_cap_usd: process.env.CONSOLIDATION_COST_CAP_USD ? parseFloat(process.env.CONSOLIDATION_COST_CAP_USD) : null,
3409 pass_count_month: prevMonth === month ? (rec.pass_count_month || 0) + 1 : 1,
3410 pass_month: month,
3411 };
3412 }
3413
3414 /**
3415 * POST /api/v1/memory/consolidate
3416 * Body: { dry_run?, passes?, lookback_hours?, max_events_per_pass?, max_topics_per_pass?, llm?: { max_tokens? } }
3417 * Response: { topics, total_events, verify, discover, cost_usd, pass_id }
3418 */
3419 app.post('/api/v1/memory/consolidate', requireBridgeAuth, requireBridgeEditorOrAdmin, express.json(), async (req, res) => {
3420 const { uid, vaultId } = bridgeMemoryAuth(req);
3421
3422 const llmApiKey = process.env.CONSOLIDATION_LLM_API_KEY || process.env.OPENAI_API_KEY;
3423 if (!llmApiKey) {
3424 return res.status(503).json({
3425 error: 'No LLM API key configured for hosted consolidation (CONSOLIDATION_LLM_API_KEY or OPENAI_API_KEY).',
3426 code: 'LLM_NOT_CONFIGURED',
3427 });
3428 }
3429
3430 const mergedBody =
3431 req.body && typeof req.body === 'object' ? { ...req.body } : {};
3432
3433 const { dry_run, passes } = mergedBody;
3434
3435 // 30-minute server-side cooldown on real (non-dry-run) passes to prevent runaway costs.
3436 // Automated scheduler runs respect their own configured interval; this guards manual triggers.
3437 if (!dry_run) {
3438 try {
3439 const costRec = await loadConsolidationCost(uid);
3440 const lastPassAt = costRec?.last_pass;
3441 if (lastPassAt) {
3442 const elapsedMs = Date.now() - new Date(lastPassAt).getTime();
3443 const cooldownMs = 30 * 60 * 1000;
3444 if (elapsedMs < cooldownMs) {
3445 const waitMin = Math.ceil((cooldownMs - elapsedMs) / 60_000);
3446 return res.status(429).json({
3447 error: `Consolidation available again in ${waitMin} minute${waitMin === 1 ? '' : 's'}.`,
3448 code: 'RATE_LIMITED',
3449 retry_after_minutes: waitMin,
3450 });
3451 }
3452 }
3453 } catch (_) {
3454 // If cost record can't be read, allow the pass through — don't block on a read error.
3455 }
3456 }
3457
3458 try {
3459 const { FileMemoryProvider } = await import('../../lib/memory-provider-file.mjs');
3460 const { MemoryManager } = await import('../../lib/memory.mjs');
3461 const { consolidateMemory } = await import('../../lib/memory-consolidate.mjs');
3462 const { computeCallCost } = await import('../../lib/daemon-cost.mjs');
3463 const { createMemoryEvent } = await import('../../lib/memory-event.mjs');
3464
3465 // Hosted (Blobs): load events into a temp FileMemoryProvider so consolidateMemory
3466 // can read and write to it, then sync remaining events back to Blobs.
3467 // Self-hosted: use the normal file-based memory directory.
3468 let mm;
3469 let tempDir = null;
3470 const isHostedBlobs = Boolean(globalThis.__netlify_blob_store);
3471
3472 if (isHostedBlobs) {
3473 const rawEvents = await blobsGetMemoryEvents(uid, vaultId);
3474 tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'knowtation-mm-'));
3475 if (rawEvents.length > 0) {
3476 fs.writeFileSync(
3477 path.join(tempDir, 'events.jsonl'),
3478 rawEvents.map((e) => JSON.stringify(e)).join('\n') + '\n',
3479 'utf8',
3480 );
3481 }
3482 mm = new MemoryManager(new FileMemoryProvider(tempDir));
3483 } else {
3484 mm = new MemoryManager(new FileMemoryProvider(bridgeMemoryDir(uid, vaultId)));
3485 }
3486
3487 const maxTok =
3488 mergedBody.llm && typeof mergedBody.llm === 'object' && mergedBody.llm.max_tokens != null
3489 ? Math.floor(Number(mergedBody.llm.max_tokens))
3490 : 1024;
3491 const lbH =
3492 mergedBody.lookback_hours != null && Number.isFinite(Number(mergedBody.lookback_hours))
3493 ? Number(mergedBody.lookback_hours)
3494 : 24;
3495 const maxEv =
3496 mergedBody.max_events_per_pass != null && Number.isFinite(Number(mergedBody.max_events_per_pass))
3497 ? Number(mergedBody.max_events_per_pass)
3498 : 200;
3499 const maxTop =
3500 mergedBody.max_topics_per_pass != null && Number.isFinite(Number(mergedBody.max_topics_per_pass))
3501 ? Number(mergedBody.max_topics_per_pass)
3502 : 10;
3503
3504 const consolidationConfig = {
3505 data_dir: isHostedBlobs ? os.tmpdir() : DATA_DIR,
3506 llm: {
3507 provider: 'openai',
3508 api_key: llmApiKey,
3509 model: process.env.CONSOLIDATION_LLM_MODEL || 'gpt-4o-mini',
3510 },
3511 daemon: {
3512 lookback_hours: lbH,
3513 max_events_per_pass: maxEv,
3514 max_topics_per_pass: maxTop,
3515 llm: { max_tokens: Number.isFinite(maxTok) ? maxTok : 1024 },
3516 },
3517 memory: {
3518 provider: 'file',
3519 encrypt: process.env.CONSOLIDATION_MEMORY_ENCRYPT === 'true',
3520 },
3521 };
3522
3523 // Track LLM call cost via a wrapping llmFn.
3524 let totalCostUsd = 0;
3525 const { completeChat } = await import('../../lib/llm-complete.mjs');
3526 const trackingLlmFn = async (cfg, callOpts) => {
3527 const rawResponse = await completeChat(consolidationConfig, callOpts);
3528 totalCostUsd += computeCallCost(callOpts, rawResponse);
3529 return rawResponse;
3530 };
3531
3532 const result = await consolidateMemory(consolidationConfig, {
3533 mm,
3534 dryRun: Boolean(dry_run),
3535 passes: passes ?? undefined,
3536 llmFn: dry_run ? undefined : trackingLlmFn,
3537 });
3538
3539 const pass_id = 'cpass_' + Date.now().toString(36) + '_' + Math.random().toString(36).slice(2, 6);
3540
3541 if (!dry_run) {
3542 const updated = await recordConsolidationPass(uid, totalCostUsd);
3543 await saveConsolidationCost(uid, updated);
3544
3545 // Store pass-level summary event.
3546 const passEvent = createMemoryEvent('consolidation_pass', {
3547 topics_count: Array.isArray(result.topics) ? result.topics.length : (result.topics ?? 0),
3548 total_events: result.total_events,
3549 cost_usd: totalCostUsd,
3550 pass_id,
3551 verify: result.verify ?? null,
3552 discover: result.discover ?? null,
3553 });
3554
3555 if (isHostedBlobs) {
3556 // Sync remaining events (post-consolidation) + pass summary back to Blobs.
3557 const remaining = mm.list({ limit: 500 });
3558 await blobsSetMemoryEvents(uid, vaultId, [...remaining, passEvent]);
3559 } else {
3560 mm.store('consolidation_pass', passEvent.data);
3561 }
3562 }
3563
3564 // Clean up temp dir used for hosted path.
3565 if (tempDir) {
3566 try { fs.rmSync(tempDir, { recursive: true, force: true }); } catch (_) {}
3567 }
3568
3569 return res.json({
3570 topics: result.topics,
3571 total_events: result.total_events,
3572 verify: result.verify ?? null,
3573 discover: result.discover ?? null,
3574 cost_usd: totalCostUsd,
3575 pass_id,
3576 dry_run: result.dry_run,
3577 });
3578 } catch (e) {
3579 console.error('[bridge] POST /api/v1/memory/consolidate', e?.message);
3580 res.status(500).json({ error: e.message || 'Consolidation failed', code: 'RUNTIME_ERROR' });
3581 }
3582 });
3583
3584 /**
3585 * GET /api/v1/memory/consolidate/status
3586 * Response: { last_pass, cost_today_usd, cost_cap_usd, pass_count_month }
3587 */
3588 app.get('/api/v1/memory/consolidate/status', async (req, res) => {
3589 const { uid } = bridgeMemoryAuth(req);
3590 if (!uid) return res.status(401).json({ error: 'Unauthorized', code: 'UNAUTHORIZED' });
3591
3592 try {
3593 const rec = await loadConsolidationCost(uid);
3594 const today = utcDateString();
3595 const month = utcMonthString();
3596 const passCountMonth = rec.pass_month === month ? (rec.pass_count_month || 0) : 0;
3597
3598 // Cooldown: minutes until the next manual consolidation is available.
3599 const lastPass = rec.last_pass ?? null;
3600 let cooldownMinutes = 0;
3601 if (lastPass) {
3602 const elapsedMs = Date.now() - new Date(lastPass).getTime();
3603 const remaining = 30 * 60 * 1000 - elapsedMs;
3604 cooldownMinutes = remaining > 0 ? Math.ceil(remaining / 60_000) : 0;
3605 }
3606
3607 return res.json({
3608 last_pass: lastPass,
3609 pass_count_month: passCountMonth,
3610 cooldown_minutes: cooldownMinutes,
3611 // Legacy cost fields kept for backward compat
3612 cost_today_usd: rec.cost_date === today ? (rec.cost_today_usd || 0) : 0,
3613 cost_cap_usd: process.env.CONSOLIDATION_COST_CAP_USD
3614 ? parseFloat(process.env.CONSOLIDATION_COST_CAP_USD)
3615 : null,
3616 });
3617 } catch (e) {
3618 console.error('[bridge] GET /api/v1/memory/consolidate/status', e?.message);
3619 res.status(500).json({ error: e.message || 'Internal error', code: 'RUNTIME_ERROR' });
3620 }
3621 });
3622
3623 if (!isServerless) {
3624 if (!CANISTER_URL || !SESSION_SECRET) {
3625 console.error('Bridge: CANISTER_URL and SESSION_SECRET (or HUB_JWT_SECRET) are required.');
3626 console.error(' Add them to the repo root .env (bridge loads ../../.env) or export in your shell.');
3627 console.error(' Template: hub/bridge/.env.example');
3628 process.exit(1);
3629 }
3630 app.listen(PORT, () => {
3631 console.log('Knowtation Hub Bridge listening on http://localhost:' + PORT);
3632 console.log(' Canister: ' + CANISTER_URL);
3633 console.log(' GitHub connect: ' + (process.env.GITHUB_CLIENT_ID ? 'enabled' : 'not configured'));
3634 console.log(' Index/Search: ' + (process.env.EMBEDDING_PROVIDER || 'ollama') + ' (run POST /api/v1/index to index)');
3635 });
3636 }
3637
3638 export { app };
File History 1 commit
sha256:94ec65bd2b200240ac785a97cf14c5db066832bd608a24d6a9c151f17b918b02 feat(calendar): hosted bridge/gateway route parity and time… Human minor 12 hours ago