diff --git a/src/headless-triage.ts b/src/headless-triage.ts index d1bf8a01d..151df33af 100644 --- a/src/headless-triage.ts +++ b/src/headless-triage.ts @@ -494,6 +494,119 @@ export async function runTriageApply( let seq = 0; const emit = (eventType: string, data: Record = {}) => emitTriageApplyJournal(cwd, flowId, seq++, eventType, data); + + // Slice 3a of "Make UOK the SF Control Plane": every triage --apply run + // emits three gate_run trace events (trusted-agent-source-gate, + // triage-plan-validation-gate, triage-apply-review-gate) with canonical + // UOK run context. status uok reads surface/runControl/... from these + // events and classifies coverage as "ok"/"stale"/"incomplete". + // + // Dynamic imports because src/resources is excluded from the root + // tsconfig (extensions have their own build). buildUokRunContext and + // appendTraceEventRequired lives there; loaded once per run. + const runContextModule = (await jiti.import( + sfExtensionPath("uok/run-context"), + )) as { + buildUokRunContext: (opts: Record) => + | { + surface: string; + runControl: string; + permissionProfile: string; + traceId: string; + parentTrace?: string; + } + | null; + }; + const traceWriterModule = (await jiti.import( + sfExtensionPath("uok/trace-writer"), + )) as { + appendTraceEventRequired: ( + basePath: string, + traceId: string, + event: Record, + ) => void; + }; + + // surface: "headless" - runTriageApply is always operator-invoked + // via sf headless triage --apply. + // runControl: "supervised" - the operator launched this command; it's + // not an autonomous-loop self-initiation. + // permissionProfile: "high" - --apply mutates the ledger, so the run + // must have write permission. + // traceId: flowId - already a UUID-stamped per-run id. + const uokContext = runContextModule.buildUokRunContext({ + surface: "headless", + runControl: "supervised", + permissionProfile: "high", + traceId: flowId, + }); + + const emitTriageGate = ( + gateId: string, + outcome: "pass" | "fail" | "manual-attention", + rationale: string, + extra: Record = {}, + ): Error | null => { + if (!uokContext) { + return new Error("buildUokRunContext returned null for triage --apply"); + } + try { + traceWriterModule.appendTraceEventRequired(cwd, flowId, { + type: "gate_run", + traceId: uokContext.traceId, + turnId: `triage-apply:${gateId}`, + gateId, + gateType: "quality-gate", + outcome, + failureClass: + outcome === "fail" + ? "policy" + : outcome === "manual-attention" + ? "manual-attention" + : "none", + rationale, + attempt: 1, + maxAttempts: 1, + retryable: false, + evaluatedAt: new Date().toISOString(), + durationMs: 0, + // Canonical UOK run context. status uok reads these from + // trace events (slice 3a addition) so the gate + // classifies as "ok" without needing a quality_gates parent + // FK row to exist. + surface: uokContext.surface, + runControl: uokContext.runControl, + permissionProfile: uokContext.permissionProfile, + ...extra, + }); + return null; + } catch (err) { + return err instanceof Error ? err : new Error(String(err)); + } + }; + + const emitRequiredTriageGate = async ( + gateId: string, + outcome: "pass" | "fail" | "manual-attention", + rationale: string, + extra: Record = {}, + ): Promise => { + const err = emitTriageGate(gateId, outcome, rationale, extra); + if (!err) return null; + await emit("triage-apply-failed", { + reason: "uok-gate-emission-failed", + gateId, + error: err.message, + }); + return { + ok: false, + agreed: false, + error: `UOK gate emission failed for ${gateId}: ${err.message}`, + resolvedIds: [], + flowId, + }; + }; + await emit("triage-apply-start", { candidateCount: options.candidateCount ?? null, }); @@ -512,6 +625,17 @@ export async function runTriageApply( ] .filter(Boolean) .join(", "); + // Missing agents is a trusted-source-gate failure cause too — the + // gate's contract is "both built-ins exist AND have source=builtin". + // Emit it so operators see the failure in status uok, not just in + // the triage-apply journal. + const gateFailure = await emitRequiredTriageGate( + "trusted-agent-source-gate", + "fail", + `required built-in agent(s) not discovered: ${missing}`, + { missing }, + ); + if (gateFailure) return gateFailure; await emit("triage-apply-failed", { reason: "missing-agent", missing }); return { ok: false, @@ -527,6 +651,12 @@ export async function runTriageApply( // independence. Operators can still customize behavior for inspect // workflows, but --apply uses only the shipped review contract. if (triageDecider.source !== "builtin" || rubberDuck.source !== "builtin") { + const rationale = `non-builtin agents (triage-decider=${triageDecider.source}, rubber-duck=${rubberDuck.source})`; + const gateFailure = await emitRequiredTriageGate("trusted-agent-source-gate", "fail", rationale, { + triageDeciderSource: triageDecider.source, + rubberDuckSource: rubberDuck.source, + }); + if (gateFailure) return gateFailure; await emit("triage-apply-failed", { reason: "untrusted-agent-source", triageDeciderSource: triageDecider.source, @@ -540,11 +670,26 @@ export async function runTriageApply( flowId, }; } + const trustGateFailure = await emitRequiredTriageGate( + "trusted-agent-source-gate", + "pass", + "both triage-decider and rubber-duck are SF-shipped built-ins", + ); + if (trustGateFailure) return trustGateFailure; // Custom-runner guard (codex review follow-up): an injected agentRunner // can side-channel-mutate the ledger despite the read-only tool override. // Only allow it when allowUntrustedRunner is explicitly set (test path). if (options.agentRunner && !options.allowUntrustedRunner) { + // Same trust contract as missing-agent / non-builtin source: the + // run cannot guarantee built-in behavior, so it's a failure of the + // trusted-agent-source-gate, surfaced through status uok. + const gateFailure = await emitRequiredTriageGate( + "trusted-agent-source-gate", + "fail", + "runTriageApply: custom agentRunner injected without allowUntrustedRunner; production callers cannot bypass the built-in agent contract", + ); + if (gateFailure) return gateFailure; await emit("triage-apply-failed", { reason: "untrusted-runner" }); return { ok: false, @@ -591,6 +736,13 @@ export async function runTriageApply( options.expectedIds, ); if (!parseResult.plan) { + const gateFailure = await emitRequiredTriageGate( + "triage-plan-validation-gate", + "fail", + parseResult.error ?? "decider produced an unparseable plan", + { parseError: parseResult.error ?? null }, + ); + if (gateFailure) return gateFailure; await emit("triage-apply-failed", { reason: "no-plan", parseError: parseResult.error, @@ -607,6 +759,13 @@ export async function runTriageApply( }; } const plan = parseResult.plan; + const validationGateFailure = await emitRequiredTriageGate( + "triage-plan-validation-gate", + "pass", + `decider plan parsed cleanly: ${plan.length} decisions`, + { decisionCount: plan.length }, + ); + if (validationGateFailure) return validationGateFailure; await emit("triage-apply-plan-parsed", { decisionCount: plan.length, outcomes: plan.reduce>((acc, d) => { @@ -645,6 +804,13 @@ export async function runTriageApply( }, ); if (!review.ok) { + const gateFailure = await emitRequiredTriageGate( + "triage-apply-review-gate", + "manual-attention", + "rubber-duck subagent failed to complete; review pending operator", + { exitCode: review.exitCode ?? null }, + ); + if (gateFailure) return gateFailure; await emit("triage-apply-failed", { reason: "rubber-duck-failed" }); return { ok: false, @@ -660,6 +826,12 @@ export async function runTriageApply( // Disagreement is a clean pause, not a failure. The plan and the // review are both persisted in the decision report; the operator // can read both and act. + const gateFailure = await emitRequiredTriageGate( + "triage-apply-review-gate", + "fail", + "rubber-duck disagreed with the proposed plan; no mutations applied", + ); + if (gateFailure) return gateFailure; return { ok: false, agreed: false, @@ -670,6 +842,12 @@ export async function runTriageApply( flowId, }; } + const reviewGateFailure = await emitRequiredTriageGate( + "triage-apply-review-gate", + "pass", + "rubber-duck agreed with the proposed plan; apply phase proceeds", + ); + if (reviewGateFailure) return reviewGateFailure; // Phase 3: apply the plan. We (this runner) call markResolved for // each close/promote decision; fix decisions get surfaced for the @@ -682,6 +860,36 @@ export async function runTriageApply( // partial failure (rejectedIds) so they can investigate. const approvedMutationCount = plan.filter((d) => d.outcome !== "fix").length; const hasFailures = applyResult.rejectedIds.length > 0; + + // triage-apply-mutation-gate (codex review 2026-05-14 follow-up on + // slice 3a): observability for the post-review apply phase. pass when + // every approved mutation landed; fail when any rejected; manual- + // attention when there were no approved mutations to apply (all + // decisions were "fix" handoffs, which require operator action). + const mutationOutcome: "pass" | "fail" | "manual-attention" = + approvedMutationCount === 0 + ? "manual-attention" + : hasFailures + ? "fail" + : "pass"; + const mutationRationale = + approvedMutationCount === 0 + ? `no approved mutations: ${applyResult.pendingFixIds.length} fix decisions await operator` + : hasFailures + ? `${applyResult.rejectedIds.length} of ${approvedMutationCount} approved mutations failed: ${applyResult.rejectedIds.join(", ")}` + : `${applyResult.resolvedIds.length} of ${approvedMutationCount} approved mutations applied cleanly`; + const mutationGateFailure = await emitRequiredTriageGate( + "triage-apply-mutation-gate", + mutationOutcome, + mutationRationale, + { + resolvedCount: applyResult.resolvedIds.length, + rejectedCount: applyResult.rejectedIds.length, + pendingFixCount: applyResult.pendingFixIds.length, + }, + ); + if (mutationGateFailure) return mutationGateFailure; + return { ok: !hasFailures, agreed: true, diff --git a/src/headless-uok-status.ts b/src/headless-uok-status.ts index ff0425e4a..b01fb1751 100644 --- a/src/headless-uok-status.ts +++ b/src/headless-uok-status.ts @@ -44,23 +44,23 @@ function sfExtensionPath(moduleName: string): string { * * Slice 1 (UOK control-plane plan, 2026-05-14) introduces this field so * operators can distinguish gates that should be paying attention to from - * those that don't yet have the new metadata. Each value's contract: + * those that don't yet have the UOK run context. Each value's contract: * - * - "ok" Gate has schema-v2 metadata AND recent runs in the + * - "ok" Gate has schema-v2 run context AND recent runs in the * window. Healthy. * - "stale" Gate has prior runs but nothing in the last 24h. * Suggests something stopped exercising it. * - "incomplete" Gate has schema-v2 records but is missing required - * metadata (surface / runControl / permissionProfile / + * run-context fields (surface / runControl / permissionProfile / * traceId). Used when future slices start writing * schema-v2 rows; never assigned to legacy rows. * - "missing" Gate is configured/expected but has zero recent runs. * Requires a configured-gate registry to detect; future * slice work, not slice 1. - * - "legacy" Gate row predates schema-v2 metadata. NOT a warning — + * - "legacy" Gate row predates schema-v2 run context. NOT a warning — * operators are not paged for the rich history of pre-v2 * records. Future slices migrate these as the writer - * paths emit complete metadata. + * paths emit complete run context. * * Slice 1 only populates "ok" / "stale" / "legacy". "incomplete" and * "missing" wait for the schema-v2 writer adapter (slice 2) and the @@ -93,7 +93,7 @@ export interface UokStatusResult { } /** - * A row is "legacy" when it lacks the schema-v2 metadata that the writer + * A row is "legacy" when it lacks the schema-v2 UOK run context that the writer * adapter (slice 2 of the UOK control-plane plan) populates. Surface is * the canonical indicator: NULL → legacy, set → schema-v2 row that * should be classified ok/stale/incomplete based on the other fields. @@ -106,27 +106,27 @@ export interface UokStatusResult { * full required set, and the classifier surfaces that so operators can * find the buggy writer. */ -interface GateMetadataRow { +interface GateRunContextRow { surface: string | null; runControl: string | null; permissionProfile: string | null; traceId: string | null; } -function hasSchemaV2Metadata(meta: GateMetadataRow): boolean { - return typeof meta.surface === "string" && meta.surface.length > 0; +function hasSchemaV2RunContext(context: GateRunContextRow): boolean { + return typeof context.surface === "string" && context.surface.length > 0; } -function isSchemaV2Complete(meta: GateMetadataRow): boolean { +function isSchemaV2RunContextComplete(context: GateRunContextRow): boolean { return ( - typeof meta.surface === "string" && - meta.surface.length > 0 && - typeof meta.runControl === "string" && - meta.runControl.length > 0 && - typeof meta.permissionProfile === "string" && - meta.permissionProfile.length > 0 && - typeof meta.traceId === "string" && - meta.traceId.length > 0 + typeof context.surface === "string" && + context.surface.length > 0 && + typeof context.runControl === "string" && + context.runControl.length > 0 && + typeof context.permissionProfile === "string" && + context.permissionProfile.length > 0 && + typeof context.traceId === "string" && + context.traceId.length > 0 ); } @@ -134,12 +134,12 @@ const STALE_THRESHOLD_MS = 24 * 60 * 60 * 1000; function classifyCoverage( entry: Omit, - meta: GateMetadataRow, + context: GateRunContextRow, ): GateCoverageStatus { - if (!hasSchemaV2Metadata(meta)) return "legacy"; - if (!isSchemaV2Complete(meta)) return "incomplete"; + if (!hasSchemaV2RunContext(context)) return "legacy"; + if (!isSchemaV2RunContextComplete(context)) return "incomplete"; if (entry.total === 0) { - // Has metadata but no runs in window. If we ever saw a run, it's + // Has run context but no runs in window. If we ever saw a run, it's // stale; otherwise it's never run (caller will mark "missing" when // a configured-gate registry confirms it was expected). For slice // 1+2, no registry exists, so the safer default is "stale". @@ -228,13 +228,85 @@ export async function handleUokStatus( const gateIds: string[] = gatesDbModule.getDistinctGateIds(); - // Fetch scope, last-evaluated, and schema-v2 metadata from - // quality_gates DB for each gate. Picks the most-recent row's - // metadata (MAX(evaluated_at)) so the classifier sees current - // schema-v2 status rather than oldest. Returns null fields when - // no row exists or the columns haven't been migrated yet. + // Fetch scope, last-evaluated, and schema-v2 run context for each + // gate. Slice 3a (UOK control-plane plan): run context can land in + // trace events from headless flows that have no quality_gates + // parent row (e.g. triage --apply gates, which are flow-scoped + // rather than slice-scoped). Trace events win when present; the + // quality_gates DB is consulted as a fallback for slice-scoped + // rows that may have rotated out of the trace window. const sfDbModule = (await jiti.import(sfExtensionPath("sf-db"), {})) as any; - interface GateMetaQuery { + const traceWriterModule = (await jiti.import( + sfExtensionPath("uok/trace-writer"), + )) as { + readTraceEvents: ( + basePath: string, + type: string, + windowHours?: number, + ) => Array>; + }; + // Read the gate-run trace events once and bucket by gateId so + // per-gate getGateMeta calls are O(1) lookup instead of repeated + // linear scans of every event. + const traceEvents = (() => { + try { + return traceWriterModule.readTraceEvents?.( + basePath, + "gate_run", + 24 * 30, + ) ?? []; + } catch { + return []; + } + })(); + const latestTraceContextByGate = new Map< + string, + { + scope?: string; + evaluatedAt?: string; + surface?: string; + runControl?: string; + permissionProfile?: string; + traceId?: string; + } + >(); + for (const ev of traceEvents) { + const gateId = + typeof ev.gateId === "string" ? (ev.gateId as string) : null; + if (!gateId) continue; + const evaluatedAt = + typeof ev.evaluatedAt === "string" + ? (ev.evaluatedAt as string) + : typeof ev.ts === "string" + ? (ev.ts as string) + : undefined; + const prev = latestTraceContextByGate.get(gateId); + if ( + prev && + prev.evaluatedAt && + evaluatedAt && + prev.evaluatedAt >= evaluatedAt + ) { + continue; + } + latestTraceContextByGate.set(gateId, { + scope: typeof ev.scope === "string" ? (ev.scope as string) : undefined, + evaluatedAt, + surface: + typeof ev.surface === "string" ? (ev.surface as string) : undefined, + runControl: + typeof ev.runControl === "string" + ? (ev.runControl as string) + : undefined, + permissionProfile: + typeof ev.permissionProfile === "string" + ? (ev.permissionProfile as string) + : undefined, + traceId: + typeof ev.traceId === "string" ? (ev.traceId as string) : undefined, + }); + } + interface GateRunContextQuery { scope: string; lastEvaluatedAt: string | null; surface: string | null; @@ -242,8 +314,8 @@ export async function handleUokStatus( permissionProfile: string | null; traceId: string | null; } - const getGateMeta = (id: string): GateMetaQuery => { - const empty: GateMetaQuery = { + const getGateRunContext = (id: string): GateRunContextQuery => { + const empty: GateRunContextQuery = { scope: "unknown", lastEvaluatedAt: null, surface: null, @@ -251,49 +323,77 @@ export async function handleUokStatus( permissionProfile: null, traceId: null, }; + const trace = latestTraceContextByGate.get(id); + let dbContext: GateRunContextQuery | null = null; try { const db = sfDbModule._getAdapter?.() ?? null; - if (!db) return empty; - const row = db - .prepare( - `SELECT scope, evaluated_at, surface, run_control, - permission_profile, trace_id - FROM quality_gates - WHERE gate_id = ? - ORDER BY evaluated_at IS NULL, evaluated_at DESC - LIMIT 1`, - ) - .get(id); - return { - scope: row?.scope ?? "unknown", - lastEvaluatedAt: row?.evaluated_at ?? null, - surface: row?.surface ?? null, - runControl: row?.run_control ?? null, - permissionProfile: row?.permission_profile ?? null, - traceId: row?.trace_id ?? null, - }; + if (db) { + const row = db + .prepare( + `SELECT scope, evaluated_at, surface, run_control, + permission_profile, trace_id + FROM quality_gates + WHERE gate_id = ? + ORDER BY evaluated_at IS NULL, evaluated_at DESC + LIMIT 1`, + ) + .get(id); + if (row) { + dbContext = { + scope: row.scope ?? "unknown", + lastEvaluatedAt: row.evaluated_at ?? null, + surface: row.surface ?? null, + runControl: row.run_control ?? null, + permissionProfile: row.permission_profile ?? null, + traceId: row.trace_id ?? null, + }; + } + } } catch { - return empty; + // DB read failure leaves dbContext null; trace can still win. } + // Precedence: pick whichever source has the later evaluatedAt + // (codex review 2026-05-14 follow-up on slice 3a). Tie-breaker: + // prefer trace because flow-scoped gates with no quality_gates + // FK row are trace-only. When neither has context, return empty. + const traceTs = trace?.evaluatedAt ?? null; + const dbTs = dbContext?.lastEvaluatedAt ?? null; + const traceWins = + trace && + (dbContext === null || + (traceTs !== null && dbTs !== null && traceTs >= dbTs) || + (traceTs !== null && dbTs === null) || + (traceTs === null && dbTs === null)); + if (traceWins && trace) { + return { + scope: trace.scope ?? dbContext?.scope ?? "unknown", + lastEvaluatedAt: traceTs, + surface: trace.surface ?? null, + runControl: trace.runControl ?? null, + permissionProfile: trace.permissionProfile ?? null, + traceId: trace.traceId ?? null, + }; + } + return dbContext ?? empty; }; gates = gateIds.map((id: string) => { const stats = gatesDbModule.getGateRunStats(id, 24); const cb = gatesDbModule.getGateCircuitBreaker(id); - const meta = getGateMeta(id); + const runContext = getGateRunContext(id); const base = { id, - scope: meta.scope, + scope: runContext.scope, total: stats.total ?? 0, pass: stats.pass ?? 0, fail: stats.fail ?? 0, retry: stats.retry ?? 0, // prefer stats window result; fall back to quality_gates last entry - lastEvaluatedAt: stats.lastEvaluatedAt ?? meta.lastEvaluatedAt, + lastEvaluatedAt: stats.lastEvaluatedAt ?? runContext.lastEvaluatedAt, circuitBreaker: cb?.state ?? "closed", failureStreak: cb?.failureStreak ?? 0, }; - const coverageStatus = classifyCoverage(base, meta); + const coverageStatus = classifyCoverage(base, runContext); return { ...base, coverageStatus } satisfies GateHealthEntry; }); } catch (err) { diff --git a/src/resources/extensions/sf/sf-db/sf-db-gates.js b/src/resources/extensions/sf/sf-db/sf-db-gates.js index a7f3b0bc9..55e97488a 100644 --- a/src/resources/extensions/sf/sf-db/sf-db-gates.js +++ b/src/resources/extensions/sf/sf-db/sf-db-gates.js @@ -46,8 +46,8 @@ export function insertGateRow(g) { export function saveGateResult(g) { const currentDb = _getAdapter(); if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open"); - // Schema-v2 metadata is updated only when the caller supplies a valid - // canonical context. Existing rows keep their metadata (or stay legacy/ + // Schema-v2 UOK run context is updated only when the caller supplies a valid + // canonical context. Existing rows keep their context (or stay legacy/ // null) when no context is provided. COALESCE semantics on each column // mean "only overwrite when non-null" — so a legacy row stays legacy // unless this update brings a full schema-v2 ctx. @@ -415,6 +415,11 @@ export function getGateLatencyStats(gateId, windowHours = 24) { } export function getDistinctGateIds() { + // UNION trace-event IDs with quality_gates DB IDs (codex review 2026-05-14 + // follow-up on slice 3a). The previous implementation returned trace IDs + // only when any existed, silently hiding DB-only slice-scoped gates the + // moment any flow-scoped trace landed. Status uok must show both. + const ids = new Set(); try { const currentPath = getDbPath(); const basePath = @@ -422,9 +427,15 @@ export function getDistinctGateIds() { ? dirname(dirname(currentPath)) : process.cwd(); const events = readTraceEvents(basePath, "gate_run", 24 * 30); // 30 days - const traceIds = [...new Set(events.map((e) => e.gateId).filter(Boolean))]; - if (traceIds.length > 0) return traceIds; - // Fall back to quality_gates DB when no trace events found + for (const ev of events) { + if (typeof ev.gateId === "string" && ev.gateId.length > 0) { + ids.add(ev.gateId); + } + } + } catch { + // Trace read failure is non-fatal — DB fallback below still runs. + } + try { const db = _getAdapter(); if (db) { const rows = db @@ -432,12 +443,16 @@ export function getDistinctGateIds() { "SELECT DISTINCT gate_id FROM quality_gates WHERE gate_id != '' ORDER BY gate_id", ) .all(); - return rows.map((r) => r.gate_id); + for (const r of rows) { + if (typeof r.gate_id === "string" && r.gate_id.length > 0) { + ids.add(r.gate_id); + } + } } - return []; } catch { - return []; + // DB read failure is non-fatal — trace results above stand. } + return Array.from(ids).sort(); } export function upsertQualityGate(g) { diff --git a/src/resources/extensions/sf/uok/run-context.js b/src/resources/extensions/sf/uok/run-context.js index 3a3915a78..b4f0f3abd 100644 --- a/src/resources/extensions/sf/uok/run-context.js +++ b/src/resources/extensions/sf/uok/run-context.js @@ -1,7 +1,7 @@ /** * uok/run-context.js — UOK schema-v2 run-context adapter. * - * Purpose: normalize existing runtime/headless metadata into the shape + * Purpose: normalize existing runtime/headless run context into the shape * the UOK control plane expects (schema-v2 fields surface, runControl, * permissionProfile, traceId, parentTrace, plus the already-existing * unitType/unitId/milestoneId/sliceId). The adapter is intentionally diff --git a/src/resources/extensions/sf/uok/trace-writer.js b/src/resources/extensions/sf/uok/trace-writer.js index fa110f52e..02db1e6fc 100644 --- a/src/resources/extensions/sf/uok/trace-writer.js +++ b/src/resources/extensions/sf/uok/trace-writer.js @@ -33,25 +33,42 @@ function tracePath(basePath, traceId) { export function appendTraceEvent(basePath, traceId, event) { if (!basePath || !traceId) return; try { - const dir = tracesDir(basePath); - mkdirSync(dir, { recursive: true }); - const path = tracePath(basePath, traceId); - const line = JSON.stringify({ ts: new Date().toISOString(), ...event }); - if (!existsSync(path)) closeSync(openSync(path, "a")); - appendFileSync(path, `${line}\n`, "utf-8"); - // Update latest symlink - const latestPath = join(dir, "latest"); - try { - unlinkSync(latestPath); - } catch { - /* ok if missing */ - } - symlinkSync(`${traceId}.jsonl`, latestPath); + appendTraceEventRequired(basePath, traceId, event); } catch { // trace writes must never break orchestration } } +/** + * Append a trace event and throw when it cannot be persisted. + * + * Purpose: let UOK control-plane flows fail closed when their required gate + * record cannot be written. Best-effort orchestration should keep using + * appendTraceEvent; supervised mutation paths use this function so UOK is not + * silently bypassed. + * + * Consumer: headless triage --apply gate emission. + */ +export function appendTraceEventRequired(basePath, traceId, event) { + if (!basePath || !traceId) { + throw new Error("appendTraceEventRequired requires basePath and traceId"); + } + const dir = tracesDir(basePath); + mkdirSync(dir, { recursive: true }); + const path = tracePath(basePath, traceId); + const line = JSON.stringify({ ts: new Date().toISOString(), ...event }); + if (!existsSync(path)) closeSync(openSync(path, "a")); + appendFileSync(path, `${line}\n`, "utf-8"); + // Update latest symlink + const latestPath = join(dir, "latest"); + try { + unlinkSync(latestPath); + } catch { + /* ok if missing */ + } + symlinkSync(`${traceId}.jsonl`, latestPath); +} + /** * Prune .sf/traces/*.jsonl files older than retentionDays. * diff --git a/src/tests/headless-triage-uok-gates.test.ts b/src/tests/headless-triage-uok-gates.test.ts new file mode 100644 index 000000000..2c352c49f --- /dev/null +++ b/src/tests/headless-triage-uok-gates.test.ts @@ -0,0 +1,182 @@ +/** + * headless-triage-uok-gates.test.ts — verify slice 3a of "Make UOK the SF + * Control Plane": runTriageApply emits gate_run trace events with schema-v2 + * metadata for the three triage gates. + * + * Test contract: each decision point (trusted-source check, plan-validation, + * rubber-duck review) writes exactly one gate_run trace event with + * surface=headless, runControl=supervised, permissionProfile=high, traceId= + * the flowId. The outcome reflects the decision (pass/fail/manual-attention). + */ +import { mkdirSync, mkdtempSync, readFileSync, readdirSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, expect, test } from "vitest"; +import { runTriageApply } from "../headless-triage.js"; + +const deciderPlan = [ + "```yaml", + "decisions:", + " - id: sf-test-1", + " outcome: close", + " evidence_kind: human-clear", + " reason: stale", + "```", + "Self-feedback triage complete.", +].join("\n"); + +const tempDirs: string[] = []; +let originalAgentDir: string | undefined; + +function makeProject(): string { + const dir = mkdtempSync(join(tmpdir(), "sf-triage-uok-gates-")); + tempDirs.push(dir); + mkdirSync(join(dir, ".sf"), { recursive: true }); + return dir; +} + +function readTraceEvents(project: string): Array> { + const dir = join(project, ".sf", "traces"); + let files: string[]; + try { + files = readdirSync(dir).filter( + (f) => f.endsWith(".jsonl") && !f.startsWith("latest"), + ); + } catch { + return []; + } + const events: Array> = []; + for (const f of files) { + const content = readFileSync(join(dir, f), "utf-8"); + for (const line of content.split("\n")) { + if (!line.trim()) continue; + try { + events.push(JSON.parse(line)); + } catch { + // skip malformed lines + } + } + } + return events; +} + +function gateRunsByGateId( + project: string, +): Map>> { + const events = readTraceEvents(project).filter( + (e) => e.type === "gate_run", + ); + const map = new Map>>(); + for (const ev of events) { + const gid = typeof ev.gateId === "string" ? (ev.gateId as string) : null; + if (!gid) continue; + if (!map.has(gid)) map.set(gid, []); + map.get(gid)!.push(ev); + } + return map; +} + +beforeEach(() => { + originalAgentDir = process.env.SF_CODING_AGENT_DIR; + const dir = mkdtempSync(join(tmpdir(), "sf-triage-uok-gates-agent-")); + tempDirs.push(dir); + process.env.SF_CODING_AGENT_DIR = dir; +}); + +afterEach(() => { + if (originalAgentDir === undefined) delete process.env.SF_CODING_AGENT_DIR; + else process.env.SF_CODING_AGENT_DIR = originalAgentDir; + while (tempDirs.length > 0) { + rmSync(tempDirs.pop()!, { recursive: true, force: true }); + } +}); + +describe("runTriageApply emits gate_run trace events with schema-v2 metadata", () => { + test("agree_path_emits_three_pass_gates_with_uok_v2_metadata", async () => { + const project = makeProject(); + const result = await runTriageApply(project, "triage prompt", { + candidateCount: 1, + allowUntrustedRunner: true, + agentRunner: async (agent) => { + if (agent.name === "triage-decider") { + return { ok: true, output: deciderPlan, exitCode: 0 }; + } + return { ok: true, output: "rubber-duck: agree", exitCode: 0 }; + }, + }); + + expect(result.agreed).toBe(true); + + const gates = gateRunsByGateId(project); + const trustEvents = gates.get("trusted-agent-source-gate") ?? []; + const validEvents = gates.get("triage-plan-validation-gate") ?? []; + const reviewEvents = gates.get("triage-apply-review-gate") ?? []; + + expect(trustEvents).toHaveLength(1); + expect(trustEvents[0].outcome).toBe("pass"); + expect(trustEvents[0].surface).toBe("headless"); + expect(trustEvents[0].runControl).toBe("supervised"); + expect(trustEvents[0].permissionProfile).toBe("high"); + expect(typeof trustEvents[0].traceId).toBe("string"); + expect((trustEvents[0].traceId as string).startsWith("triage-apply-")).toBe( + true, + ); + + expect(validEvents).toHaveLength(1); + expect(validEvents[0].outcome).toBe("pass"); + expect(validEvents[0].surface).toBe("headless"); + + expect(reviewEvents).toHaveLength(1); + expect(reviewEvents[0].outcome).toBe("pass"); + expect(reviewEvents[0].surface).toBe("headless"); + }); + + test("disagree_path_emits_review_gate_as_fail", async () => { + const project = makeProject(); + await runTriageApply(project, "triage prompt", { + allowUntrustedRunner: true, + agentRunner: async (agent) => { + if (agent.name === "triage-decider") { + return { ok: true, output: deciderPlan, exitCode: 0 }; + } + return { + ok: true, + output: "## Concern 1:\nbad close", + exitCode: 0, + }; + }, + }); + + const gates = gateRunsByGateId(project); + const reviewEvents = gates.get("triage-apply-review-gate") ?? []; + expect(reviewEvents).toHaveLength(1); + expect(reviewEvents[0].outcome).toBe("fail"); + expect(reviewEvents[0].surface).toBe("headless"); + }); + + test("unknown_id_in_plan_emits_validation_gate_fail_and_no_review_gate", async () => { + const project = makeProject(); + await runTriageApply(project, "triage prompt", { + candidateCount: 1, + expectedIds: ["sf-expected-1"], + allowUntrustedRunner: true, + agentRunner: async () => ({ + ok: true, + output: deciderPlan, + exitCode: 0, + }), + }); + + const gates = gateRunsByGateId(project); + const validEvents = gates.get("triage-plan-validation-gate") ?? []; + const reviewEvents = gates.get("triage-apply-review-gate") ?? []; + + expect(validEvents).toHaveLength(1); + expect(validEvents[0].outcome).toBe("fail"); + expect((validEvents[0].rationale as string).toLowerCase()).toContain( + "not in the candidate set", + ); + // Review gate must NOT fire because plan validation blocked the flow. + expect(reviewEvents).toHaveLength(0); + }); +});