companion-provenance-validator.mjs
242 lines 9.3 KB
Raw
sha256:0d530f9ef27b8b75547d1db7701a74bc77b77aa8f3d7fa3a8672cf2af36e63bb reconcile: import GitHub-direct RBAC/OAuth/companion and ho… Human minor ⚠ breaking 6 hours ago
1 /**
2 * Phase 6 — provenance schema validator (D6.2).
3 *
4 * Validates the canonical provenance record as a write precondition.
5 * Every derived-artifact write is REJECTED if any required field is missing
6 * or malformed, if the provenance or artifact payload contains sensitive keys,
7 * or if the privacy_max + source=managed contradiction is present (D6.2, §fail-closed).
8 *
9 * DESIGN INVARIANTS:
10 * - Pure function — no I/O, no network, no side effects.
11 * - Fail-closed: any ambiguity or missing field → rejection.
12 * - No secret-bearing field ever passes (D6.2.3).
13 * - Provenance is a flag, not a lifecycle state (D6.2.5).
14 */
15
16 import { hasSensitiveKeys } from './memory-event.mjs';
17 import { RUNTIME_LANES } from './model-runtime-lane.mjs';
18
19 /** Current schema version. Increment when the schema changes. */
20 export const PROVENANCE_SCHEMA_VERSION = 1;
21
22 /**
23 * Valid `source` values for the provenance record (D6.2.1).
24 * Maps from the inference lane origin. 'managed' = cloud/direct_provider lane.
25 * @readonly
26 */
27 export const PROVENANCE_SOURCES = /** @type {const} */ ([
28 'companion',
29 'in_browser',
30 'managed',
31 'self_hosted',
32 'enterprise',
33 'openrouter',
34 ]);
35
36 /**
37 * Valid `privacy_tier` values (D6.2.1).
38 * The owner's vault tier — governs storage routing and encryption.
39 * @readonly
40 */
41 export const PRIVACY_TIERS = /** @type {const} */ (['convenience', 'privacy_max']);
42
43 /**
44 * Valid `artifact_type` values (D6.2.1).
45 * @readonly
46 */
47 export const ARTIFACT_TYPES = /** @type {const} */ ([
48 'ai_summary',
49 'embedding',
50 'insight',
51 'discovery_facet',
52 ]);
53
54 /**
55 * Fixed reason codes for provenance validation failures.
56 * Callers surface these codes to the user/logger — never the raw field value.
57 * @readonly
58 */
59 export const PROVENANCE_REJECT_REASONS = Object.freeze({
60 /** A required field is missing from the provenance record. */
61 MISSING_FIELD: 'provenance_missing_required_field',
62 /** A required field is present but has an invalid value. */
63 MALFORMED_FIELD: 'provenance_malformed_field',
64 /** hasSensitiveKeys detected secret-bearing content in provenance or artifact. */
65 SENSITIVE_DATA: 'provenance_sensitive_data_detected',
66 /**
67 * privacy_max + source=managed contradiction (D6.2 §fail-closed):
68 * private content never routes to a managed (cloud) lane.
69 */
70 PRIVACY_MAX_MANAGED_CONTRADICTION: 'provenance_privacy_max_managed_contradiction',
71 /**
72 * Both model_version and runtime_version are absent (D6.2.1):
73 * at least one MUST be a concrete value.
74 */
75 BOTH_VERSIONS_ABSENT: 'provenance_both_versions_absent',
76 });
77
78 /**
79 * Validate an ISO-8601 timestamp string (lenient: Date constructor parses it).
80 * @param {unknown} v
81 * @returns {boolean}
82 */
83 function isIso8601(v) {
84 if (typeof v !== 'string' || !v.trim()) return false;
85 const d = new Date(v);
86 return !Number.isNaN(d.getTime());
87 }
88
89 /**
90 * Validate the canonical provenance record before any derived-artifact write.
91 *
92 * Required fields per D6.2.1:
93 * generated_by, source, model, model_version|runtime_version (one MUST be concrete),
94 * runtime_version, lane, privacy_tier, source_note_path, source_event_id,
95 * created_at, artifact_type, schema_version.
96 *
97 * Additional checks (D6.2.3):
98 * - hasSensitiveKeys scan over provenance AND artifact payload.
99 * - privacy_max + source=managed → rejected as a contradiction.
100 *
101 * @param {unknown} provenance - The provenance record to validate.
102 * @param {unknown} [artifact] - The artifact payload (also scanned for sensitive keys).
103 * @returns {{ ok: true } | { ok: false; reason: string; field?: string }}
104 */
105 export function validateProvenance(provenance, artifact) {
106 if (provenance == null || typeof provenance !== 'object' || Array.isArray(provenance)) {
107 return { ok: false, reason: PROVENANCE_REJECT_REASONS.MISSING_FIELD, field: 'provenance' };
108 }
109
110 const p = /** @type {Record<string, unknown>} */ (provenance);
111
112 // generated_by — non-empty string actor id
113 if (typeof p.generated_by !== 'string' || !p.generated_by.trim()) {
114 return { ok: false, reason: PROVENANCE_REJECT_REASONS.MISSING_FIELD, field: 'generated_by' };
115 }
116
117 // source — valid enum
118 if (!PROVENANCE_SOURCES.includes(/** @type {any} */ (p.source))) {
119 return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'source' };
120 }
121
122 // model — non-empty string
123 if (typeof p.model !== 'string' || !p.model.trim()) {
124 return { ok: false, reason: PROVENANCE_REJECT_REASONS.MISSING_FIELD, field: 'model' };
125 }
126
127 // model_version OR runtime_version MUST be a concrete non-empty string (D6.2.1)
128 const hasMV = typeof p.model_version === 'string' && p.model_version.trim().length > 0;
129 const hasRV = typeof p.runtime_version === 'string' && p.runtime_version.trim().length > 0;
130 if (!hasMV && !hasRV) {
131 return { ok: false, reason: PROVENANCE_REJECT_REASONS.BOTH_VERSIONS_ABSENT, field: 'model_version|runtime_version' };
132 }
133
134 // runtime_version — must be present (null is OK; undefined is not — missing field)
135 if (!('runtime_version' in p)) {
136 return { ok: false, reason: PROVENANCE_REJECT_REASONS.MISSING_FIELD, field: 'runtime_version' };
137 }
138 if (p.runtime_version !== null && typeof p.runtime_version !== 'string') {
139 return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'runtime_version' };
140 }
141
142 // lane — valid RUNTIME_LANES enum
143 if (!RUNTIME_LANES.includes(/** @type {any} */ (p.lane))) {
144 return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'lane' };
145 }
146
147 // privacy_tier — valid enum
148 if (!PRIVACY_TIERS.includes(/** @type {any} */ (p.privacy_tier))) {
149 return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'privacy_tier' };
150 }
151
152 // source_note_path — must be present (null is OK for aggregate artifacts)
153 if (!('source_note_path' in p)) {
154 return { ok: false, reason: PROVENANCE_REJECT_REASONS.MISSING_FIELD, field: 'source_note_path' };
155 }
156 if (p.source_note_path !== null && typeof p.source_note_path !== 'string') {
157 return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'source_note_path' };
158 }
159
160 // source_event_id — string or non-empty array of non-empty strings
161 if (typeof p.source_event_id !== 'string' && !Array.isArray(p.source_event_id)) {
162 return { ok: false, reason: PROVENANCE_REJECT_REASONS.MISSING_FIELD, field: 'source_event_id' };
163 }
164 if (typeof p.source_event_id === 'string' && !p.source_event_id.trim()) {
165 return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'source_event_id' };
166 }
167 if (Array.isArray(p.source_event_id)) {
168 if (p.source_event_id.length === 0) {
169 return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'source_event_id' };
170 }
171 if (!p.source_event_id.every((id) => typeof id === 'string' && id.trim())) {
172 return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'source_event_id' };
173 }
174 }
175
176 // created_at — ISO-8601 timestamp
177 if (!isIso8601(/** @type {any} */ (p.created_at))) {
178 return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'created_at' };
179 }
180
181 // artifact_type — valid enum
182 if (!ARTIFACT_TYPES.includes(/** @type {any} */ (p.artifact_type))) {
183 return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'artifact_type' };
184 }
185
186 // schema_version — positive integer
187 if (!Number.isInteger(p.schema_version) || /** @type {number} */ (p.schema_version) < 1) {
188 return { ok: false, reason: PROVENANCE_REJECT_REASONS.MALFORMED_FIELD, field: 'schema_version' };
189 }
190
191 // D6.2.3: privacy_max + source=managed contradiction
192 // private content never routes to a managed (cloud) lane (D2.3)
193 if (p.privacy_tier === 'privacy_max' && p.source === 'managed') {
194 return { ok: false, reason: PROVENANCE_REJECT_REASONS.PRIVACY_MAX_MANAGED_CONTRADICTION };
195 }
196
197 // D6.2.3: hasSensitiveKeys scan over provenance (no DEK, IK, JWT, token, etc.)
198 if (hasSensitiveKeys(p)) {
199 return { ok: false, reason: PROVENANCE_REJECT_REASONS.SENSITIVE_DATA };
200 }
201
202 // D6.2.3: hasSensitiveKeys scan over artifact payload
203 if (artifact != null && hasSensitiveKeys(artifact)) {
204 return { ok: false, reason: PROVENANCE_REJECT_REASONS.SENSITIVE_DATA };
205 }
206
207 return { ok: true };
208 }
209
210 /**
211 * Build a minimal valid provenance record for the convenience/self-partition default case.
212 * Used by migrated enrichment callers that have a config + lane context but no explicit actor.
213 *
214 * @param {{
215 * generatedBy: string,
216 * source: string,
217 * model: string,
218 * modelVersion?: string,
219 * runtimeVersion?: string,
220 * lane: string,
221 * artifactType: string,
222 * sourceNotePath?: string | null,
223 * sourceEventId: string | string[],
224 * }} params
225 * @returns {object} A provenance record with schema_version, privacy_tier=convenience, created_at=now.
226 */
227 export function buildConvenienceProvenance(params) {
228 return {
229 generated_by: params.generatedBy,
230 source: params.source,
231 model: params.model,
232 model_version: params.modelVersion ?? null,
233 runtime_version: params.runtimeVersion ?? null,
234 lane: params.lane,
235 privacy_tier: 'convenience',
236 source_note_path: params.sourceNotePath ?? null,
237 source_event_id: params.sourceEventId,
238 created_at: new Date().toISOString(),
239 artifact_type: params.artifactType,
240 schema_version: PROVENANCE_SCHEMA_VERSION,
241 };
242 }
File History 1 commit
sha256:0d530f9ef27b8b75547d1db7701a74bc77b77aa8f3d7fa3a8672cf2af36e63bb reconcile: import GitHub-direct RBAC/OAuth/companion and ho… Human minor 6 hours ago