companion-provenance-validator.mjs
sha256:0d530f9ef27b8b75547d1db7701a74bc77b77aa8f3d7fa3a8672cf2af36e63bb
reconcile: import GitHub-direct RBAC/OAuth/companion and ho…
Human
minor
⚠ breaking
6 hours ago
| 1 | /** |
| 2 | * Phase 6 — provenance schema validator (D6.2). |
| 3 | * |
| 4 | * Validates the canonical provenance record as a write precondition. |
| 5 | * Every derived-artifact write is REJECTED if any required field is missing |
| 6 | * or malformed, if the provenance or artifact payload contains sensitive keys, |
| 7 | * or if the privacy_max + source=managed contradiction is present (D6.2, §fail-closed). |
| 8 | * |
| 9 | * DESIGN INVARIANTS: |
| 10 | * - Pure function — no I/O, no network, no side effects. |
| 11 | * - Fail-closed: any ambiguity or missing field → rejection. |
| 12 | * - No secret-bearing field ever passes (D6.2.3). |
| 13 | * - Provenance is a flag, not a lifecycle state (D6.2.5). |
| 14 | */ |
| 15 | |
| 16 | import { hasSensitiveKeys } from './memory-event.mjs'; |
| 17 | import { RUNTIME_LANES } from './model-runtime-lane.mjs'; |
| 18 | |
| 19 | /** Current schema version. Increment when the schema changes. */ |
| 20 | export const PROVENANCE_SCHEMA_VERSION = 1; |
| 21 | |
| 22 | /** |
| 23 | * Valid `source` values for the provenance record (D6.2.1). |
| 24 | * Maps from the inference lane origin. 'managed' = cloud/direct_provider lane. |
| 25 | * @readonly |
| 26 | */ |
| 27 | export const PROVENANCE_SOURCES = /** @type {const} */ ([ |
| 28 | 'companion', |
| 29 | 'in_browser', |
| 30 | 'managed', |
| 31 | 'self_hosted', |
| 32 | 'enterprise', |
| 33 | 'openrouter', |
| 34 | ]); |
| 35 | |
| 36 | /** |
| 37 | * Valid `privacy_tier` values (D6.2.1). |
| 38 | * The owner's vault tier — governs storage routing and encryption. |
| 39 | * @readonly |
| 40 | */ |
| 41 | export const PRIVACY_TIERS = /** @type {const} */ (['convenience', 'privacy_max']); |
| 42 | |
| 43 | /** |
| 44 | * Valid `artifact_type` values (D6.2.1). |
| 45 | * @readonly |
| 46 | */ |
| 47 | export const ARTIFACT_TYPES = /** @type {const} */ ([ |
| 48 | 'ai_summary', |
| 49 | 'embedding', |
| 50 | 'insight', |
| 51 | 'discovery_facet', |
| 52 | ]); |
| 53 | |
| 54 | /** |
| 55 | * Fixed reason codes for provenance validation failures. |
| 56 | * Callers surface these codes to the user/logger — never the raw field value. |
| 57 | * @readonly |
| 58 | */ |
| 59 | export const PROVENANCE_REJECT_REASONS = Object.freeze({ |
| 60 | /** A required field is missing from the provenance record. */ |
| 61 | MISSING_FIELD: 'provenance_missing_required_field', |
| 62 | /** A required field is present but has an invalid value. */ |
| 63 | MALFORMED_FIELD: 'provenance_malformed_field', |
| 64 | /** hasSensitiveKeys detected secret-bearing content in provenance or artifact. */ |
| 65 | SENSITIVE_DATA: 'provenance_sensitive_data_detected', |
| 66 | /** |
| 67 | * privacy_max + source=managed contradiction (D6.2 §fail-closed): |
| 68 | * private content never routes to a managed (cloud) lane. |
| 69 | */ |
| 70 | PRIVACY_MAX_MANAGED_CONTRADICTION: 'provenance_privacy_max_managed_contradiction', |
| 71 | /** |
| 72 | * Both model_version and runtime_version are absent (D6.2.1): |
| 73 | * at least one MUST be a concrete value. |
| 74 | */ |
| 75 | BOTH_VERSIONS_ABSENT: 'provenance_both_versions_absent', |
| 76 | }); |
| 77 | |
| 78 | /** |
| 79 | * Validate an ISO-8601 timestamp string (lenient: Date constructor parses it). |
| 80 | * @param {unknown} v |
| 81 | * @returns {boolean} |
| 82 | */ |
| 83 | function isIso8601(v) { |
| 84 | if (typeof v !== 'string' || !v.trim()) return false; |
| 85 | const d = new Date(v); |
| 86 | return !Number.isNaN(d.getTime()); |
| 87 | } |
| 88 | |
| 89 | /** |
| 90 | * Validate the canonical provenance record before any derived-artifact write. |
| 91 | * |
| 92 | * Required fields per D6.2.1: |
| 93 | * generated_by, source, model, model_version|runtime_version (one MUST be concrete), |
| 94 | * runtime_version, lane, privacy_tier, source_note_path, source_event_id, |
| 95 | * created_at, artifact_type, schema_version. |
| 96 | * |
| 97 | * Additional checks (D6.2.3): |
| 98 | * - hasSensitiveKeys scan over provenance AND artifact payload. |
| 99 | * - privacy_max + source=managed → rejected as a contradiction. |
| 100 | * |
| 101 | * @param {unknown} provenance - The provenance record to validate. |
| 102 | * @param {unknown} [artifact] - The artifact payload (also scanned for sensitive keys). |
| 103 | * @returns {{ ok: true } | { ok: false; reason: string; field?: string }} |
| 104 | */ |
| 105 | export function validateProvenance(provenance, artifact) { |
| 106 | if (provenance == null || typeof provenance !== 'object' || Array.isArray(provenance)) { |
| 107 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.MISSING_FIELD, field: 'provenance' }; |
| 108 | } |
| 109 | |
| 110 | const p = /** @type {Record<string, unknown>} */ (provenance); |
| 111 | |
| 112 | // generated_by — non-empty string actor id |
| 113 | if (typeof p.generated_by !== 'string' || !p.generated_by.trim()) { |
| 114 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.MISSING_FIELD, field: 'generated_by' }; |
| 115 | } |
| 116 | |
| 117 | // source — valid enum |
| 118 | if (!PROVENANCE_SOURCES.includes(/** @type {any} */ (p.source))) { |
| 119 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'source' }; |
| 120 | } |
| 121 | |
| 122 | // model — non-empty string |
| 123 | if (typeof p.model !== 'string' || !p.model.trim()) { |
| 124 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.MISSING_FIELD, field: 'model' }; |
| 125 | } |
| 126 | |
| 127 | // model_version OR runtime_version MUST be a concrete non-empty string (D6.2.1) |
| 128 | const hasMV = typeof p.model_version === 'string' && p.model_version.trim().length > 0; |
| 129 | const hasRV = typeof p.runtime_version === 'string' && p.runtime_version.trim().length > 0; |
| 130 | if (!hasMV && !hasRV) { |
| 131 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.BOTH_VERSIONS_ABSENT, field: 'model_version|runtime_version' }; |
| 132 | } |
| 133 | |
| 134 | // runtime_version — must be present (null is OK; undefined is not — missing field) |
| 135 | if (!('runtime_version' in p)) { |
| 136 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.MISSING_FIELD, field: 'runtime_version' }; |
| 137 | } |
| 138 | if (p.runtime_version !== null && typeof p.runtime_version !== 'string') { |
| 139 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'runtime_version' }; |
| 140 | } |
| 141 | |
| 142 | // lane — valid RUNTIME_LANES enum |
| 143 | if (!RUNTIME_LANES.includes(/** @type {any} */ (p.lane))) { |
| 144 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'lane' }; |
| 145 | } |
| 146 | |
| 147 | // privacy_tier — valid enum |
| 148 | if (!PRIVACY_TIERS.includes(/** @type {any} */ (p.privacy_tier))) { |
| 149 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'privacy_tier' }; |
| 150 | } |
| 151 | |
| 152 | // source_note_path — must be present (null is OK for aggregate artifacts) |
| 153 | if (!('source_note_path' in p)) { |
| 154 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.MISSING_FIELD, field: 'source_note_path' }; |
| 155 | } |
| 156 | if (p.source_note_path !== null && typeof p.source_note_path !== 'string') { |
| 157 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'source_note_path' }; |
| 158 | } |
| 159 | |
| 160 | // source_event_id — string or non-empty array of non-empty strings |
| 161 | if (typeof p.source_event_id !== 'string' && !Array.isArray(p.source_event_id)) { |
| 162 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.MISSING_FIELD, field: 'source_event_id' }; |
| 163 | } |
| 164 | if (typeof p.source_event_id === 'string' && !p.source_event_id.trim()) { |
| 165 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'source_event_id' }; |
| 166 | } |
| 167 | if (Array.isArray(p.source_event_id)) { |
| 168 | if (p.source_event_id.length === 0) { |
| 169 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'source_event_id' }; |
| 170 | } |
| 171 | if (!p.source_event_id.every((id) => typeof id === 'string' && id.trim())) { |
| 172 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'source_event_id' }; |
| 173 | } |
| 174 | } |
| 175 | |
| 176 | // created_at — ISO-8601 timestamp |
| 177 | if (!isIso8601(/** @type {any} */ (p.created_at))) { |
| 178 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'created_at' }; |
| 179 | } |
| 180 | |
| 181 | // artifact_type — valid enum |
| 182 | if (!ARTIFACT_TYPES.includes(/** @type {any} */ (p.artifact_type))) { |
| 183 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'artifact_type' }; |
| 184 | } |
| 185 | |
| 186 | // schema_version — positive integer |
| 187 | if (!Number.isInteger(p.schema_version) || /** @type {number} */ (p.schema_version) < 1) { |
| 188 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'schema_version' }; |
| 189 | } |
| 190 | |
| 191 | // D6.2.3: privacy_max + source=managed contradiction |
| 192 | // private content never routes to a managed (cloud) lane (D2.3) |
| 193 | if (p.privacy_tier === 'privacy_max' && p.source === 'managed') { |
| 194 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.PRIVACY_MAX_MANAGED_CONTRADICTION }; |
| 195 | } |
| 196 | |
| 197 | // D6.2.3: hasSensitiveKeys scan over provenance (no DEK, IK, JWT, token, etc.) |
| 198 | if (hasSensitiveKeys(p)) { |
| 199 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.SENSITIVE_DATA }; |
| 200 | } |
| 201 | |
| 202 | // D6.2.3: hasSensitiveKeys scan over artifact payload |
| 203 | if (artifact != null && hasSensitiveKeys(artifact)) { |
| 204 | return { ok: false, reason: PROVENANCE_REJECT_REASONS.SENSITIVE_DATA }; |
| 205 | } |
| 206 | |
| 207 | return { ok: true }; |
| 208 | } |
| 209 | |
| 210 | /** |
| 211 | * Build a minimal valid provenance record for the convenience/self-partition default case. |
| 212 | * Used by migrated enrichment callers that have a config + lane context but no explicit actor. |
| 213 | * |
| 214 | * @param {{ |
| 215 | * generatedBy: string, |
| 216 | * source: string, |
| 217 | * model: string, |
| 218 | * modelVersion?: string, |
| 219 | * runtimeVersion?: string, |
| 220 | * lane: string, |
| 221 | * artifactType: string, |
| 222 | * sourceNotePath?: string | null, |
| 223 | * sourceEventId: string | string[], |
| 224 | * }} params |
| 225 | * @returns {object} A provenance record with schema_version, privacy_tier=convenience, created_at=now. |
| 226 | */ |
| 227 | export function buildConvenienceProvenance(params) { |
| 228 | return { |
| 229 | generated_by: params.generatedBy, |
| 230 | source: params.source, |
| 231 | model: params.model, |
| 232 | model_version: params.modelVersion ?? null, |
| 233 | runtime_version: params.runtimeVersion ?? null, |
| 234 | lane: params.lane, |
| 235 | privacy_tier: 'convenience', |
| 236 | source_note_path: params.sourceNotePath ?? null, |
| 237 | source_event_id: params.sourceEventId, |
| 238 | created_at: new Date().toISOString(), |
| 239 | artifact_type: params.artifactType, |
| 240 | schema_version: PROVENANCE_SCHEMA_VERSION, |
| 241 | }; |
| 242 | } |
File History
1 commit
sha256:0d530f9ef27b8b75547d1db7701a74bc77b77aa8f3d7fa3a8672cf2af36e63bb
reconcile: import GitHub-direct RBAC/OAuth/companion and ho…
Human
minor
⚠
6 hours ago