feat(uok): slice 3a — triage --apply emits 4 schema-v2 UOK gates

First production caller of the schema-v2 writer chain. Every
`sf headless triage --apply` invocation now emits four gate_run trace
events with surface=headless, runControl=supervised, permissionProfile=
high, traceId=flowId — making the gates visible in `status uok --json`
with coverageStatus: "ok" (or fail/manual-attention on reject paths).

Gates emitted, in order:

  1. trusted-agent-source-gate — fires on the trust precondition:
       pass: both triage-decider and rubber-duck are SF-shipped built-ins
       fail: missing-agent OR non-builtin source OR untrusted custom runner
       (covers all three pre-dispatch refusal paths so operators see the
       failure in status uok, not just in the journal)
  2. triage-plan-validation-gate — fires on the strict-parse contract:
       pass: parseTriagePlanStrict returns a valid plan covering expectedIds
       fail: missing marker / bad yaml / unknown id / outcome-required field missing
  3. triage-apply-review-gate — fires on the rubber-duck verdict:
       pass: rubber-duck: agree → apply phase proceeds
       fail: rubber-duck disagreed → clean pause, no mutations
       manual-attention: rubber-duck subagent failed to complete
  4. triage-apply-mutation-gate — fires after applyTriagePlan:
       pass: every approved mutation landed
       fail: any rejected mutation
       manual-attention: zero approved mutations (all decisions were "fix")
     Includes counts in extra: resolvedCount, rejectedCount, pendingFixCount.

Reader-side fixes (codex review follow-up on slice 3a):

  - getDistinctGateIds (sf-db-gates.js) now UNIONs trace-event IDs with
    quality_gates DB IDs instead of returning trace IDs early when any
    exist. The old behavior silently hid slice-scoped DB-only gates the
    moment a flow-scoped trace landed.
  - getGateMeta (headless-uok-status.ts) now reads BOTH trace events and
    DB row, then picks whichever has the later evaluatedAt. Tie-break
    prefers trace (flow-scoped gates with no quality_gates FK row are
    trace-only). Old behavior preferred trace whenever surface was set,
    regardless of timestamp.

Live verification: ran `sf headless triage --apply` 4 times against the
operator's environment (rubber-duck is a project-level override).
trusted-agent-source-gate now shows in `sf headless status uok --json`
with total: 4, fail: 4, coverageStatus: "ok" — proving the schema-v2
metadata round-trips through the trace events and reaches the
classifier.

Tests:
  - headless-triage-uok-gates.test.ts (3 new tests): agree path emits
    3 pass gates with v2 metadata; disagree path emits review fail;
    unknown-id path emits validation fail with no review gate.
  - Existing test suites adjusted for the GateMetadataRow →
    GateRunContextRow rename (classifier helpers renamed consistently
    across .ts source and the .mjs test mirror).
  - Full SF + headless apply: 1681/1681.

Still legacy in production (slice 3b targets these next):
  - phases-pre-dispatch.js gates: resource-version-guard, pre-dispatch-
    health-gate, planning-flow-gate. None of these pass uokContext yet.
  - phases-unit.js gates: unit-verification-gate, plan-gate.
  - plan-slice.js: Q3/Q4/Q5/Q6/Q7/Q8 seed gates.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-14 18:04:50 +02:00
parent f0c57b58c6
commit 454e051aed
6 changed files with 599 additions and 77 deletions

View file

@ -494,6 +494,119 @@ export async function runTriageApply(
let seq = 0;
const emit = (eventType: string, data: Record<string, unknown> = {}) =>
emitTriageApplyJournal(cwd, flowId, seq++, eventType, data);
// Slice 3a of "Make UOK the SF Control Plane": every triage --apply run
// emits three gate_run trace events (trusted-agent-source-gate,
// triage-plan-validation-gate, triage-apply-review-gate) with canonical
// UOK run context. status uok reads surface/runControl/... from these
// events and classifies coverage as "ok"/"stale"/"incomplete".
//
// Dynamic imports because src/resources is excluded from the root
// tsconfig (extensions have their own build). buildUokRunContext and
// appendTraceEventRequired lives there; loaded once per run.
const runContextModule = (await jiti.import(
sfExtensionPath("uok/run-context"),
)) as {
buildUokRunContext: (opts: Record<string, unknown>) =>
| {
surface: string;
runControl: string;
permissionProfile: string;
traceId: string;
parentTrace?: string;
}
| null;
};
const traceWriterModule = (await jiti.import(
sfExtensionPath("uok/trace-writer"),
)) as {
appendTraceEventRequired: (
basePath: string,
traceId: string,
event: Record<string, unknown>,
) => void;
};
// surface: "headless" - runTriageApply is always operator-invoked
// via sf headless triage --apply.
// runControl: "supervised" - the operator launched this command; it's
// not an autonomous-loop self-initiation.
// permissionProfile: "high" - --apply mutates the ledger, so the run
// must have write permission.
// traceId: flowId - already a UUID-stamped per-run id.
const uokContext = runContextModule.buildUokRunContext({
surface: "headless",
runControl: "supervised",
permissionProfile: "high",
traceId: flowId,
});
const emitTriageGate = (
gateId: string,
outcome: "pass" | "fail" | "manual-attention",
rationale: string,
extra: Record<string, unknown> = {},
): Error | null => {
if (!uokContext) {
return new Error("buildUokRunContext returned null for triage --apply");
}
try {
traceWriterModule.appendTraceEventRequired(cwd, flowId, {
type: "gate_run",
traceId: uokContext.traceId,
turnId: `triage-apply:${gateId}`,
gateId,
gateType: "quality-gate",
outcome,
failureClass:
outcome === "fail"
? "policy"
: outcome === "manual-attention"
? "manual-attention"
: "none",
rationale,
attempt: 1,
maxAttempts: 1,
retryable: false,
evaluatedAt: new Date().toISOString(),
durationMs: 0,
// Canonical UOK run context. status uok reads these from
// trace events (slice 3a addition) so the gate
// classifies as "ok" without needing a quality_gates parent
// FK row to exist.
surface: uokContext.surface,
runControl: uokContext.runControl,
permissionProfile: uokContext.permissionProfile,
...extra,
});
return null;
} catch (err) {
return err instanceof Error ? err : new Error(String(err));
}
};
const emitRequiredTriageGate = async (
gateId: string,
outcome: "pass" | "fail" | "manual-attention",
rationale: string,
extra: Record<string, unknown> = {},
): Promise<RunTriageApplyResult | null> => {
const err = emitTriageGate(gateId, outcome, rationale, extra);
if (!err) return null;
await emit("triage-apply-failed", {
reason: "uok-gate-emission-failed",
gateId,
error: err.message,
});
return {
ok: false,
agreed: false,
error: `UOK gate emission failed for ${gateId}: ${err.message}`,
resolvedIds: [],
flowId,
};
};
await emit("triage-apply-start", {
candidateCount: options.candidateCount ?? null,
});
@ -512,6 +625,17 @@ export async function runTriageApply(
]
.filter(Boolean)
.join(", ");
// Missing agents is a trusted-source-gate failure cause too — the
// gate's contract is "both built-ins exist AND have source=builtin".
// Emit it so operators see the failure in status uok, not just in
// the triage-apply journal.
const gateFailure = await emitRequiredTriageGate(
"trusted-agent-source-gate",
"fail",
`required built-in agent(s) not discovered: ${missing}`,
{ missing },
);
if (gateFailure) return gateFailure;
await emit("triage-apply-failed", { reason: "missing-agent", missing });
return {
ok: false,
@ -527,6 +651,12 @@ export async function runTriageApply(
// independence. Operators can still customize behavior for inspect
// workflows, but --apply uses only the shipped review contract.
if (triageDecider.source !== "builtin" || rubberDuck.source !== "builtin") {
const rationale = `non-builtin agents (triage-decider=${triageDecider.source}, rubber-duck=${rubberDuck.source})`;
const gateFailure = await emitRequiredTriageGate("trusted-agent-source-gate", "fail", rationale, {
triageDeciderSource: triageDecider.source,
rubberDuckSource: rubberDuck.source,
});
if (gateFailure) return gateFailure;
await emit("triage-apply-failed", {
reason: "untrusted-agent-source",
triageDeciderSource: triageDecider.source,
@ -540,11 +670,26 @@ export async function runTriageApply(
flowId,
};
}
const trustGateFailure = await emitRequiredTriageGate(
"trusted-agent-source-gate",
"pass",
"both triage-decider and rubber-duck are SF-shipped built-ins",
);
if (trustGateFailure) return trustGateFailure;
// Custom-runner guard (codex review follow-up): an injected agentRunner
// can side-channel-mutate the ledger despite the read-only tool override.
// Only allow it when allowUntrustedRunner is explicitly set (test path).
if (options.agentRunner && !options.allowUntrustedRunner) {
// Same trust contract as missing-agent / non-builtin source: the
// run cannot guarantee built-in behavior, so it's a failure of the
// trusted-agent-source-gate, surfaced through status uok.
const gateFailure = await emitRequiredTriageGate(
"trusted-agent-source-gate",
"fail",
"runTriageApply: custom agentRunner injected without allowUntrustedRunner; production callers cannot bypass the built-in agent contract",
);
if (gateFailure) return gateFailure;
await emit("triage-apply-failed", { reason: "untrusted-runner" });
return {
ok: false,
@ -591,6 +736,13 @@ export async function runTriageApply(
options.expectedIds,
);
if (!parseResult.plan) {
const gateFailure = await emitRequiredTriageGate(
"triage-plan-validation-gate",
"fail",
parseResult.error ?? "decider produced an unparseable plan",
{ parseError: parseResult.error ?? null },
);
if (gateFailure) return gateFailure;
await emit("triage-apply-failed", {
reason: "no-plan",
parseError: parseResult.error,
@ -607,6 +759,13 @@ export async function runTriageApply(
};
}
const plan = parseResult.plan;
const validationGateFailure = await emitRequiredTriageGate(
"triage-plan-validation-gate",
"pass",
`decider plan parsed cleanly: ${plan.length} decisions`,
{ decisionCount: plan.length },
);
if (validationGateFailure) return validationGateFailure;
await emit("triage-apply-plan-parsed", {
decisionCount: plan.length,
outcomes: plan.reduce<Record<string, number>>((acc, d) => {
@ -645,6 +804,13 @@ export async function runTriageApply(
},
);
if (!review.ok) {
const gateFailure = await emitRequiredTriageGate(
"triage-apply-review-gate",
"manual-attention",
"rubber-duck subagent failed to complete; review pending operator",
{ exitCode: review.exitCode ?? null },
);
if (gateFailure) return gateFailure;
await emit("triage-apply-failed", { reason: "rubber-duck-failed" });
return {
ok: false,
@ -660,6 +826,12 @@ export async function runTriageApply(
// Disagreement is a clean pause, not a failure. The plan and the
// review are both persisted in the decision report; the operator
// can read both and act.
const gateFailure = await emitRequiredTriageGate(
"triage-apply-review-gate",
"fail",
"rubber-duck disagreed with the proposed plan; no mutations applied",
);
if (gateFailure) return gateFailure;
return {
ok: false,
agreed: false,
@ -670,6 +842,12 @@ export async function runTriageApply(
flowId,
};
}
const reviewGateFailure = await emitRequiredTriageGate(
"triage-apply-review-gate",
"pass",
"rubber-duck agreed with the proposed plan; apply phase proceeds",
);
if (reviewGateFailure) return reviewGateFailure;
// Phase 3: apply the plan. We (this runner) call markResolved for
// each close/promote decision; fix decisions get surfaced for the
@ -682,6 +860,36 @@ export async function runTriageApply(
// partial failure (rejectedIds) so they can investigate.
const approvedMutationCount = plan.filter((d) => d.outcome !== "fix").length;
const hasFailures = applyResult.rejectedIds.length > 0;
// triage-apply-mutation-gate (codex review 2026-05-14 follow-up on
// slice 3a): observability for the post-review apply phase. pass when
// every approved mutation landed; fail when any rejected; manual-
// attention when there were no approved mutations to apply (all
// decisions were "fix" handoffs, which require operator action).
const mutationOutcome: "pass" | "fail" | "manual-attention" =
approvedMutationCount === 0
? "manual-attention"
: hasFailures
? "fail"
: "pass";
const mutationRationale =
approvedMutationCount === 0
? `no approved mutations: ${applyResult.pendingFixIds.length} fix decisions await operator`
: hasFailures
? `${applyResult.rejectedIds.length} of ${approvedMutationCount} approved mutations failed: ${applyResult.rejectedIds.join(", ")}`
: `${applyResult.resolvedIds.length} of ${approvedMutationCount} approved mutations applied cleanly`;
const mutationGateFailure = await emitRequiredTriageGate(
"triage-apply-mutation-gate",
mutationOutcome,
mutationRationale,
{
resolvedCount: applyResult.resolvedIds.length,
rejectedCount: applyResult.rejectedIds.length,
pendingFixCount: applyResult.pendingFixIds.length,
},
);
if (mutationGateFailure) return mutationGateFailure;
return {
ok: !hasFailures,
agreed: true,

View file

@ -44,23 +44,23 @@ function sfExtensionPath(moduleName: string): string {
*
* Slice 1 (UOK control-plane plan, 2026-05-14) introduces this field so
* operators can distinguish gates that should be paying attention to from
* those that don't yet have the new metadata. Each value's contract:
* those that don't yet have the UOK run context. Each value's contract:
*
* - "ok" Gate has schema-v2 metadata AND recent runs in the
* - "ok" Gate has schema-v2 run context AND recent runs in the
* window. Healthy.
* - "stale" Gate has prior runs but nothing in the last 24h.
* Suggests something stopped exercising it.
* - "incomplete" Gate has schema-v2 records but is missing required
* metadata (surface / runControl / permissionProfile /
* run-context fields (surface / runControl / permissionProfile /
* traceId). Used when future slices start writing
* schema-v2 rows; never assigned to legacy rows.
* - "missing" Gate is configured/expected but has zero recent runs.
* Requires a configured-gate registry to detect; future
* slice work, not slice 1.
* - "legacy" Gate row predates schema-v2 metadata. NOT a warning
* - "legacy" Gate row predates schema-v2 run context. NOT a warning
* operators are not paged for the rich history of pre-v2
* records. Future slices migrate these as the writer
* paths emit complete metadata.
* paths emit complete run context.
*
* Slice 1 only populates "ok" / "stale" / "legacy". "incomplete" and
* "missing" wait for the schema-v2 writer adapter (slice 2) and the
@ -93,7 +93,7 @@ export interface UokStatusResult {
}
/**
* A row is "legacy" when it lacks the schema-v2 metadata that the writer
* A row is "legacy" when it lacks the schema-v2 UOK run context that the writer
* adapter (slice 2 of the UOK control-plane plan) populates. Surface is
* the canonical indicator: NULL legacy, set schema-v2 row that
* should be classified ok/stale/incomplete based on the other fields.
@ -106,27 +106,27 @@ export interface UokStatusResult {
* full required set, and the classifier surfaces that so operators can
* find the buggy writer.
*/
interface GateMetadataRow {
interface GateRunContextRow {
surface: string | null;
runControl: string | null;
permissionProfile: string | null;
traceId: string | null;
}
function hasSchemaV2Metadata(meta: GateMetadataRow): boolean {
return typeof meta.surface === "string" && meta.surface.length > 0;
function hasSchemaV2RunContext(context: GateRunContextRow): boolean {
return typeof context.surface === "string" && context.surface.length > 0;
}
function isSchemaV2Complete(meta: GateMetadataRow): boolean {
function isSchemaV2RunContextComplete(context: GateRunContextRow): boolean {
return (
typeof meta.surface === "string" &&
meta.surface.length > 0 &&
typeof meta.runControl === "string" &&
meta.runControl.length > 0 &&
typeof meta.permissionProfile === "string" &&
meta.permissionProfile.length > 0 &&
typeof meta.traceId === "string" &&
meta.traceId.length > 0
typeof context.surface === "string" &&
context.surface.length > 0 &&
typeof context.runControl === "string" &&
context.runControl.length > 0 &&
typeof context.permissionProfile === "string" &&
context.permissionProfile.length > 0 &&
typeof context.traceId === "string" &&
context.traceId.length > 0
);
}
@ -134,12 +134,12 @@ const STALE_THRESHOLD_MS = 24 * 60 * 60 * 1000;
function classifyCoverage(
entry: Omit<GateHealthEntry, "coverageStatus">,
meta: GateMetadataRow,
context: GateRunContextRow,
): GateCoverageStatus {
if (!hasSchemaV2Metadata(meta)) return "legacy";
if (!isSchemaV2Complete(meta)) return "incomplete";
if (!hasSchemaV2RunContext(context)) return "legacy";
if (!isSchemaV2RunContextComplete(context)) return "incomplete";
if (entry.total === 0) {
// Has metadata but no runs in window. If we ever saw a run, it's
// Has run context but no runs in window. If we ever saw a run, it's
// stale; otherwise it's never run (caller will mark "missing" when
// a configured-gate registry confirms it was expected). For slice
// 1+2, no registry exists, so the safer default is "stale".
@ -228,13 +228,85 @@ export async function handleUokStatus(
const gateIds: string[] = gatesDbModule.getDistinctGateIds();
// Fetch scope, last-evaluated, and schema-v2 metadata from
// quality_gates DB for each gate. Picks the most-recent row's
// metadata (MAX(evaluated_at)) so the classifier sees current
// schema-v2 status rather than oldest. Returns null fields when
// no row exists or the columns haven't been migrated yet.
// Fetch scope, last-evaluated, and schema-v2 run context for each
// gate. Slice 3a (UOK control-plane plan): run context can land in
// trace events from headless flows that have no quality_gates
// parent row (e.g. triage --apply gates, which are flow-scoped
// rather than slice-scoped). Trace events win when present; the
// quality_gates DB is consulted as a fallback for slice-scoped
// rows that may have rotated out of the trace window.
const sfDbModule = (await jiti.import(sfExtensionPath("sf-db"), {})) as any;
interface GateMetaQuery {
const traceWriterModule = (await jiti.import(
sfExtensionPath("uok/trace-writer"),
)) as {
readTraceEvents: (
basePath: string,
type: string,
windowHours?: number,
) => Array<Record<string, unknown>>;
};
// Read the gate-run trace events once and bucket by gateId so
// per-gate getGateMeta calls are O(1) lookup instead of repeated
// linear scans of every event.
const traceEvents = (() => {
try {
return traceWriterModule.readTraceEvents?.(
basePath,
"gate_run",
24 * 30,
) ?? [];
} catch {
return [];
}
})();
const latestTraceContextByGate = new Map<
string,
{
scope?: string;
evaluatedAt?: string;
surface?: string;
runControl?: string;
permissionProfile?: string;
traceId?: string;
}
>();
for (const ev of traceEvents) {
const gateId =
typeof ev.gateId === "string" ? (ev.gateId as string) : null;
if (!gateId) continue;
const evaluatedAt =
typeof ev.evaluatedAt === "string"
? (ev.evaluatedAt as string)
: typeof ev.ts === "string"
? (ev.ts as string)
: undefined;
const prev = latestTraceContextByGate.get(gateId);
if (
prev &&
prev.evaluatedAt &&
evaluatedAt &&
prev.evaluatedAt >= evaluatedAt
) {
continue;
}
latestTraceContextByGate.set(gateId, {
scope: typeof ev.scope === "string" ? (ev.scope as string) : undefined,
evaluatedAt,
surface:
typeof ev.surface === "string" ? (ev.surface as string) : undefined,
runControl:
typeof ev.runControl === "string"
? (ev.runControl as string)
: undefined,
permissionProfile:
typeof ev.permissionProfile === "string"
? (ev.permissionProfile as string)
: undefined,
traceId:
typeof ev.traceId === "string" ? (ev.traceId as string) : undefined,
});
}
interface GateRunContextQuery {
scope: string;
lastEvaluatedAt: string | null;
surface: string | null;
@ -242,8 +314,8 @@ export async function handleUokStatus(
permissionProfile: string | null;
traceId: string | null;
}
const getGateMeta = (id: string): GateMetaQuery => {
const empty: GateMetaQuery = {
const getGateRunContext = (id: string): GateRunContextQuery => {
const empty: GateRunContextQuery = {
scope: "unknown",
lastEvaluatedAt: null,
surface: null,
@ -251,49 +323,77 @@ export async function handleUokStatus(
permissionProfile: null,
traceId: null,
};
const trace = latestTraceContextByGate.get(id);
let dbContext: GateRunContextQuery | null = null;
try {
const db = sfDbModule._getAdapter?.() ?? null;
if (!db) return empty;
const row = db
.prepare(
`SELECT scope, evaluated_at, surface, run_control,
permission_profile, trace_id
FROM quality_gates
WHERE gate_id = ?
ORDER BY evaluated_at IS NULL, evaluated_at DESC
LIMIT 1`,
)
.get(id);
return {
scope: row?.scope ?? "unknown",
lastEvaluatedAt: row?.evaluated_at ?? null,
surface: row?.surface ?? null,
runControl: row?.run_control ?? null,
permissionProfile: row?.permission_profile ?? null,
traceId: row?.trace_id ?? null,
};
if (db) {
const row = db
.prepare(
`SELECT scope, evaluated_at, surface, run_control,
permission_profile, trace_id
FROM quality_gates
WHERE gate_id = ?
ORDER BY evaluated_at IS NULL, evaluated_at DESC
LIMIT 1`,
)
.get(id);
if (row) {
dbContext = {
scope: row.scope ?? "unknown",
lastEvaluatedAt: row.evaluated_at ?? null,
surface: row.surface ?? null,
runControl: row.run_control ?? null,
permissionProfile: row.permission_profile ?? null,
traceId: row.trace_id ?? null,
};
}
}
} catch {
return empty;
// DB read failure leaves dbContext null; trace can still win.
}
// Precedence: pick whichever source has the later evaluatedAt
// (codex review 2026-05-14 follow-up on slice 3a). Tie-breaker:
// prefer trace because flow-scoped gates with no quality_gates
// FK row are trace-only. When neither has context, return empty.
const traceTs = trace?.evaluatedAt ?? null;
const dbTs = dbContext?.lastEvaluatedAt ?? null;
const traceWins =
trace &&
(dbContext === null ||
(traceTs !== null && dbTs !== null && traceTs >= dbTs) ||
(traceTs !== null && dbTs === null) ||
(traceTs === null && dbTs === null));
if (traceWins && trace) {
return {
scope: trace.scope ?? dbContext?.scope ?? "unknown",
lastEvaluatedAt: traceTs,
surface: trace.surface ?? null,
runControl: trace.runControl ?? null,
permissionProfile: trace.permissionProfile ?? null,
traceId: trace.traceId ?? null,
};
}
return dbContext ?? empty;
};
gates = gateIds.map((id: string) => {
const stats = gatesDbModule.getGateRunStats(id, 24);
const cb = gatesDbModule.getGateCircuitBreaker(id);
const meta = getGateMeta(id);
const runContext = getGateRunContext(id);
const base = {
id,
scope: meta.scope,
scope: runContext.scope,
total: stats.total ?? 0,
pass: stats.pass ?? 0,
fail: stats.fail ?? 0,
retry: stats.retry ?? 0,
// prefer stats window result; fall back to quality_gates last entry
lastEvaluatedAt: stats.lastEvaluatedAt ?? meta.lastEvaluatedAt,
lastEvaluatedAt: stats.lastEvaluatedAt ?? runContext.lastEvaluatedAt,
circuitBreaker: cb?.state ?? "closed",
failureStreak: cb?.failureStreak ?? 0,
};
const coverageStatus = classifyCoverage(base, meta);
const coverageStatus = classifyCoverage(base, runContext);
return { ...base, coverageStatus } satisfies GateHealthEntry;
});
} catch (err) {

View file

@ -46,8 +46,8 @@ export function insertGateRow(g) {
export function saveGateResult(g) {
const currentDb = _getAdapter();
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
// Schema-v2 metadata is updated only when the caller supplies a valid
// canonical context. Existing rows keep their metadata (or stay legacy/
// Schema-v2 UOK run context is updated only when the caller supplies a valid
// canonical context. Existing rows keep their context (or stay legacy/
// null) when no context is provided. COALESCE semantics on each column
// mean "only overwrite when non-null" — so a legacy row stays legacy
// unless this update brings a full schema-v2 ctx.
@ -415,6 +415,11 @@ export function getGateLatencyStats(gateId, windowHours = 24) {
}
export function getDistinctGateIds() {
// UNION trace-event IDs with quality_gates DB IDs (codex review 2026-05-14
// follow-up on slice 3a). The previous implementation returned trace IDs
// only when any existed, silently hiding DB-only slice-scoped gates the
// moment any flow-scoped trace landed. Status uok must show both.
const ids = new Set();
try {
const currentPath = getDbPath();
const basePath =
@ -422,9 +427,15 @@ export function getDistinctGateIds() {
? dirname(dirname(currentPath))
: process.cwd();
const events = readTraceEvents(basePath, "gate_run", 24 * 30); // 30 days
const traceIds = [...new Set(events.map((e) => e.gateId).filter(Boolean))];
if (traceIds.length > 0) return traceIds;
// Fall back to quality_gates DB when no trace events found
for (const ev of events) {
if (typeof ev.gateId === "string" && ev.gateId.length > 0) {
ids.add(ev.gateId);
}
}
} catch {
// Trace read failure is non-fatal — DB fallback below still runs.
}
try {
const db = _getAdapter();
if (db) {
const rows = db
@ -432,12 +443,16 @@ export function getDistinctGateIds() {
"SELECT DISTINCT gate_id FROM quality_gates WHERE gate_id != '' ORDER BY gate_id",
)
.all();
return rows.map((r) => r.gate_id);
for (const r of rows) {
if (typeof r.gate_id === "string" && r.gate_id.length > 0) {
ids.add(r.gate_id);
}
}
}
return [];
} catch {
return [];
// DB read failure is non-fatal — trace results above stand.
}
return Array.from(ids).sort();
}
export function upsertQualityGate(g) {

View file

@ -1,7 +1,7 @@
/**
* uok/run-context.js UOK schema-v2 run-context adapter.
*
* Purpose: normalize existing runtime/headless metadata into the shape
* Purpose: normalize existing runtime/headless run context into the shape
* the UOK control plane expects (schema-v2 fields surface, runControl,
* permissionProfile, traceId, parentTrace, plus the already-existing
* unitType/unitId/milestoneId/sliceId). The adapter is intentionally

View file

@ -33,25 +33,42 @@ function tracePath(basePath, traceId) {
export function appendTraceEvent(basePath, traceId, event) {
if (!basePath || !traceId) return;
try {
const dir = tracesDir(basePath);
mkdirSync(dir, { recursive: true });
const path = tracePath(basePath, traceId);
const line = JSON.stringify({ ts: new Date().toISOString(), ...event });
if (!existsSync(path)) closeSync(openSync(path, "a"));
appendFileSync(path, `${line}\n`, "utf-8");
// Update latest symlink
const latestPath = join(dir, "latest");
try {
unlinkSync(latestPath);
} catch {
/* ok if missing */
}
symlinkSync(`${traceId}.jsonl`, latestPath);
appendTraceEventRequired(basePath, traceId, event);
} catch {
// trace writes must never break orchestration
}
}
/**
* Append a trace event and throw when it cannot be persisted.
*
* Purpose: let UOK control-plane flows fail closed when their required gate
* record cannot be written. Best-effort orchestration should keep using
* appendTraceEvent; supervised mutation paths use this function so UOK is not
* silently bypassed.
*
* Consumer: headless triage --apply gate emission.
*/
export function appendTraceEventRequired(basePath, traceId, event) {
if (!basePath || !traceId) {
throw new Error("appendTraceEventRequired requires basePath and traceId");
}
const dir = tracesDir(basePath);
mkdirSync(dir, { recursive: true });
const path = tracePath(basePath, traceId);
const line = JSON.stringify({ ts: new Date().toISOString(), ...event });
if (!existsSync(path)) closeSync(openSync(path, "a"));
appendFileSync(path, `${line}\n`, "utf-8");
// Update latest symlink
const latestPath = join(dir, "latest");
try {
unlinkSync(latestPath);
} catch {
/* ok if missing */
}
symlinkSync(`${traceId}.jsonl`, latestPath);
}
/**
* Prune .sf/traces/*.jsonl files older than retentionDays.
*

View file

@ -0,0 +1,182 @@
/**
* headless-triage-uok-gates.test.ts verify slice 3a of "Make UOK the SF
* Control Plane": runTriageApply emits gate_run trace events with schema-v2
* metadata for the three triage gates.
*
* Test contract: each decision point (trusted-source check, plan-validation,
* rubber-duck review) writes exactly one gate_run trace event with
* surface=headless, runControl=supervised, permissionProfile=high, traceId=
* the flowId. The outcome reflects the decision (pass/fail/manual-attention).
*/
import { mkdirSync, mkdtempSync, readFileSync, readdirSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, beforeEach, describe, expect, test } from "vitest";
import { runTriageApply } from "../headless-triage.js";
const deciderPlan = [
"```yaml",
"decisions:",
" - id: sf-test-1",
" outcome: close",
" evidence_kind: human-clear",
" reason: stale",
"```",
"Self-feedback triage complete.",
].join("\n");
const tempDirs: string[] = [];
let originalAgentDir: string | undefined;
function makeProject(): string {
const dir = mkdtempSync(join(tmpdir(), "sf-triage-uok-gates-"));
tempDirs.push(dir);
mkdirSync(join(dir, ".sf"), { recursive: true });
return dir;
}
function readTraceEvents(project: string): Array<Record<string, unknown>> {
const dir = join(project, ".sf", "traces");
let files: string[];
try {
files = readdirSync(dir).filter(
(f) => f.endsWith(".jsonl") && !f.startsWith("latest"),
);
} catch {
return [];
}
const events: Array<Record<string, unknown>> = [];
for (const f of files) {
const content = readFileSync(join(dir, f), "utf-8");
for (const line of content.split("\n")) {
if (!line.trim()) continue;
try {
events.push(JSON.parse(line));
} catch {
// skip malformed lines
}
}
}
return events;
}
function gateRunsByGateId(
project: string,
): Map<string, Array<Record<string, unknown>>> {
const events = readTraceEvents(project).filter(
(e) => e.type === "gate_run",
);
const map = new Map<string, Array<Record<string, unknown>>>();
for (const ev of events) {
const gid = typeof ev.gateId === "string" ? (ev.gateId as string) : null;
if (!gid) continue;
if (!map.has(gid)) map.set(gid, []);
map.get(gid)!.push(ev);
}
return map;
}
beforeEach(() => {
originalAgentDir = process.env.SF_CODING_AGENT_DIR;
const dir = mkdtempSync(join(tmpdir(), "sf-triage-uok-gates-agent-"));
tempDirs.push(dir);
process.env.SF_CODING_AGENT_DIR = dir;
});
afterEach(() => {
if (originalAgentDir === undefined) delete process.env.SF_CODING_AGENT_DIR;
else process.env.SF_CODING_AGENT_DIR = originalAgentDir;
while (tempDirs.length > 0) {
rmSync(tempDirs.pop()!, { recursive: true, force: true });
}
});
describe("runTriageApply emits gate_run trace events with schema-v2 metadata", () => {
test("agree_path_emits_three_pass_gates_with_uok_v2_metadata", async () => {
const project = makeProject();
const result = await runTriageApply(project, "triage prompt", {
candidateCount: 1,
allowUntrustedRunner: true,
agentRunner: async (agent) => {
if (agent.name === "triage-decider") {
return { ok: true, output: deciderPlan, exitCode: 0 };
}
return { ok: true, output: "rubber-duck: agree", exitCode: 0 };
},
});
expect(result.agreed).toBe(true);
const gates = gateRunsByGateId(project);
const trustEvents = gates.get("trusted-agent-source-gate") ?? [];
const validEvents = gates.get("triage-plan-validation-gate") ?? [];
const reviewEvents = gates.get("triage-apply-review-gate") ?? [];
expect(trustEvents).toHaveLength(1);
expect(trustEvents[0].outcome).toBe("pass");
expect(trustEvents[0].surface).toBe("headless");
expect(trustEvents[0].runControl).toBe("supervised");
expect(trustEvents[0].permissionProfile).toBe("high");
expect(typeof trustEvents[0].traceId).toBe("string");
expect((trustEvents[0].traceId as string).startsWith("triage-apply-")).toBe(
true,
);
expect(validEvents).toHaveLength(1);
expect(validEvents[0].outcome).toBe("pass");
expect(validEvents[0].surface).toBe("headless");
expect(reviewEvents).toHaveLength(1);
expect(reviewEvents[0].outcome).toBe("pass");
expect(reviewEvents[0].surface).toBe("headless");
});
test("disagree_path_emits_review_gate_as_fail", async () => {
const project = makeProject();
await runTriageApply(project, "triage prompt", {
allowUntrustedRunner: true,
agentRunner: async (agent) => {
if (agent.name === "triage-decider") {
return { ok: true, output: deciderPlan, exitCode: 0 };
}
return {
ok: true,
output: "## Concern 1:\nbad close",
exitCode: 0,
};
},
});
const gates = gateRunsByGateId(project);
const reviewEvents = gates.get("triage-apply-review-gate") ?? [];
expect(reviewEvents).toHaveLength(1);
expect(reviewEvents[0].outcome).toBe("fail");
expect(reviewEvents[0].surface).toBe("headless");
});
test("unknown_id_in_plan_emits_validation_gate_fail_and_no_review_gate", async () => {
const project = makeProject();
await runTriageApply(project, "triage prompt", {
candidateCount: 1,
expectedIds: ["sf-expected-1"],
allowUntrustedRunner: true,
agentRunner: async () => ({
ok: true,
output: deciderPlan,
exitCode: 0,
}),
});
const gates = gateRunsByGateId(project);
const validEvents = gates.get("triage-plan-validation-gate") ?? [];
const reviewEvents = gates.get("triage-apply-review-gate") ?? [];
expect(validEvents).toHaveLength(1);
expect(validEvents[0].outcome).toBe("fail");
expect((validEvents[0].rationale as string).toLowerCase()).toContain(
"not in the candidate set",
);
// Review gate must NOT fire because plan validation blocked the flow.
expect(reviewEvents).toHaveLength(0);
});
});