feat(uok): slice 3a — triage --apply emits 4 schema-v2 UOK gates
First production caller of the schema-v2 writer chain. Every
`sf headless triage --apply` invocation now emits four gate_run trace
events with surface=headless, runControl=supervised, permissionProfile=
high, traceId=flowId — making the gates visible in `status uok --json`
with coverageStatus: "ok" (or fail/manual-attention on reject paths).
Gates emitted, in order:
1. trusted-agent-source-gate — fires on the trust precondition:
pass: both triage-decider and rubber-duck are SF-shipped built-ins
fail: missing-agent OR non-builtin source OR untrusted custom runner
(covers all three pre-dispatch refusal paths so operators see the
failure in status uok, not just in the journal)
2. triage-plan-validation-gate — fires on the strict-parse contract:
pass: parseTriagePlanStrict returns a valid plan covering expectedIds
fail: missing marker / bad yaml / unknown id / outcome-required field missing
3. triage-apply-review-gate — fires on the rubber-duck verdict:
pass: rubber-duck: agree → apply phase proceeds
fail: rubber-duck disagreed → clean pause, no mutations
manual-attention: rubber-duck subagent failed to complete
4. triage-apply-mutation-gate — fires after applyTriagePlan:
pass: every approved mutation landed
fail: any rejected mutation
manual-attention: zero approved mutations (all decisions were "fix")
Includes counts in extra: resolvedCount, rejectedCount, pendingFixCount.
Reader-side fixes (codex review follow-up on slice 3a):
- getDistinctGateIds (sf-db-gates.js) now UNIONs trace-event IDs with
quality_gates DB IDs instead of returning trace IDs early when any
exist. The old behavior silently hid slice-scoped DB-only gates the
moment a flow-scoped trace landed.
- getGateMeta (headless-uok-status.ts) now reads BOTH trace events and
DB row, then picks whichever has the later evaluatedAt. Tie-break
prefers trace (flow-scoped gates with no quality_gates FK row are
trace-only). Old behavior preferred trace whenever surface was set,
regardless of timestamp.
Live verification: ran `sf headless triage --apply` 4 times against the
operator's environment (rubber-duck is a project-level override).
trusted-agent-source-gate now shows in `sf headless status uok --json`
with total: 4, fail: 4, coverageStatus: "ok" — proving the schema-v2
metadata round-trips through the trace events and reaches the
classifier.
Tests:
- headless-triage-uok-gates.test.ts (3 new tests): agree path emits
3 pass gates with v2 metadata; disagree path emits review fail;
unknown-id path emits validation fail with no review gate.
- Existing test suites adjusted for the GateMetadataRow →
GateRunContextRow rename (classifier helpers renamed consistently
across .ts source and the .mjs test mirror).
- Full SF + headless apply: 1681/1681.
Still legacy in production (slice 3b targets these next):
- phases-pre-dispatch.js gates: resource-version-guard, pre-dispatch-
health-gate, planning-flow-gate. None of these pass uokContext yet.
- phases-unit.js gates: unit-verification-gate, plan-gate.
- plan-slice.js: Q3/Q4/Q5/Q6/Q7/Q8 seed gates.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f0c57b58c6
commit
454e051aed
6 changed files with 599 additions and 77 deletions
|
|
@ -494,6 +494,119 @@ export async function runTriageApply(
|
|||
let seq = 0;
|
||||
const emit = (eventType: string, data: Record<string, unknown> = {}) =>
|
||||
emitTriageApplyJournal(cwd, flowId, seq++, eventType, data);
|
||||
|
||||
// Slice 3a of "Make UOK the SF Control Plane": every triage --apply run
|
||||
// emits three gate_run trace events (trusted-agent-source-gate,
|
||||
// triage-plan-validation-gate, triage-apply-review-gate) with canonical
|
||||
// UOK run context. status uok reads surface/runControl/... from these
|
||||
// events and classifies coverage as "ok"/"stale"/"incomplete".
|
||||
//
|
||||
// Dynamic imports because src/resources is excluded from the root
|
||||
// tsconfig (extensions have their own build). buildUokRunContext and
|
||||
// appendTraceEventRequired lives there; loaded once per run.
|
||||
const runContextModule = (await jiti.import(
|
||||
sfExtensionPath("uok/run-context"),
|
||||
)) as {
|
||||
buildUokRunContext: (opts: Record<string, unknown>) =>
|
||||
| {
|
||||
surface: string;
|
||||
runControl: string;
|
||||
permissionProfile: string;
|
||||
traceId: string;
|
||||
parentTrace?: string;
|
||||
}
|
||||
| null;
|
||||
};
|
||||
const traceWriterModule = (await jiti.import(
|
||||
sfExtensionPath("uok/trace-writer"),
|
||||
)) as {
|
||||
appendTraceEventRequired: (
|
||||
basePath: string,
|
||||
traceId: string,
|
||||
event: Record<string, unknown>,
|
||||
) => void;
|
||||
};
|
||||
|
||||
// surface: "headless" - runTriageApply is always operator-invoked
|
||||
// via sf headless triage --apply.
|
||||
// runControl: "supervised" - the operator launched this command; it's
|
||||
// not an autonomous-loop self-initiation.
|
||||
// permissionProfile: "high" - --apply mutates the ledger, so the run
|
||||
// must have write permission.
|
||||
// traceId: flowId - already a UUID-stamped per-run id.
|
||||
const uokContext = runContextModule.buildUokRunContext({
|
||||
surface: "headless",
|
||||
runControl: "supervised",
|
||||
permissionProfile: "high",
|
||||
traceId: flowId,
|
||||
});
|
||||
|
||||
const emitTriageGate = (
|
||||
gateId: string,
|
||||
outcome: "pass" | "fail" | "manual-attention",
|
||||
rationale: string,
|
||||
extra: Record<string, unknown> = {},
|
||||
): Error | null => {
|
||||
if (!uokContext) {
|
||||
return new Error("buildUokRunContext returned null for triage --apply");
|
||||
}
|
||||
try {
|
||||
traceWriterModule.appendTraceEventRequired(cwd, flowId, {
|
||||
type: "gate_run",
|
||||
traceId: uokContext.traceId,
|
||||
turnId: `triage-apply:${gateId}`,
|
||||
gateId,
|
||||
gateType: "quality-gate",
|
||||
outcome,
|
||||
failureClass:
|
||||
outcome === "fail"
|
||||
? "policy"
|
||||
: outcome === "manual-attention"
|
||||
? "manual-attention"
|
||||
: "none",
|
||||
rationale,
|
||||
attempt: 1,
|
||||
maxAttempts: 1,
|
||||
retryable: false,
|
||||
evaluatedAt: new Date().toISOString(),
|
||||
durationMs: 0,
|
||||
// Canonical UOK run context. status uok reads these from
|
||||
// trace events (slice 3a addition) so the gate
|
||||
// classifies as "ok" without needing a quality_gates parent
|
||||
// FK row to exist.
|
||||
surface: uokContext.surface,
|
||||
runControl: uokContext.runControl,
|
||||
permissionProfile: uokContext.permissionProfile,
|
||||
...extra,
|
||||
});
|
||||
return null;
|
||||
} catch (err) {
|
||||
return err instanceof Error ? err : new Error(String(err));
|
||||
}
|
||||
};
|
||||
|
||||
const emitRequiredTriageGate = async (
|
||||
gateId: string,
|
||||
outcome: "pass" | "fail" | "manual-attention",
|
||||
rationale: string,
|
||||
extra: Record<string, unknown> = {},
|
||||
): Promise<RunTriageApplyResult | null> => {
|
||||
const err = emitTriageGate(gateId, outcome, rationale, extra);
|
||||
if (!err) return null;
|
||||
await emit("triage-apply-failed", {
|
||||
reason: "uok-gate-emission-failed",
|
||||
gateId,
|
||||
error: err.message,
|
||||
});
|
||||
return {
|
||||
ok: false,
|
||||
agreed: false,
|
||||
error: `UOK gate emission failed for ${gateId}: ${err.message}`,
|
||||
resolvedIds: [],
|
||||
flowId,
|
||||
};
|
||||
};
|
||||
|
||||
await emit("triage-apply-start", {
|
||||
candidateCount: options.candidateCount ?? null,
|
||||
});
|
||||
|
|
@ -512,6 +625,17 @@ export async function runTriageApply(
|
|||
]
|
||||
.filter(Boolean)
|
||||
.join(", ");
|
||||
// Missing agents is a trusted-source-gate failure cause too — the
|
||||
// gate's contract is "both built-ins exist AND have source=builtin".
|
||||
// Emit it so operators see the failure in status uok, not just in
|
||||
// the triage-apply journal.
|
||||
const gateFailure = await emitRequiredTriageGate(
|
||||
"trusted-agent-source-gate",
|
||||
"fail",
|
||||
`required built-in agent(s) not discovered: ${missing}`,
|
||||
{ missing },
|
||||
);
|
||||
if (gateFailure) return gateFailure;
|
||||
await emit("triage-apply-failed", { reason: "missing-agent", missing });
|
||||
return {
|
||||
ok: false,
|
||||
|
|
@ -527,6 +651,12 @@ export async function runTriageApply(
|
|||
// independence. Operators can still customize behavior for inspect
|
||||
// workflows, but --apply uses only the shipped review contract.
|
||||
if (triageDecider.source !== "builtin" || rubberDuck.source !== "builtin") {
|
||||
const rationale = `non-builtin agents (triage-decider=${triageDecider.source}, rubber-duck=${rubberDuck.source})`;
|
||||
const gateFailure = await emitRequiredTriageGate("trusted-agent-source-gate", "fail", rationale, {
|
||||
triageDeciderSource: triageDecider.source,
|
||||
rubberDuckSource: rubberDuck.source,
|
||||
});
|
||||
if (gateFailure) return gateFailure;
|
||||
await emit("triage-apply-failed", {
|
||||
reason: "untrusted-agent-source",
|
||||
triageDeciderSource: triageDecider.source,
|
||||
|
|
@ -540,11 +670,26 @@ export async function runTriageApply(
|
|||
flowId,
|
||||
};
|
||||
}
|
||||
const trustGateFailure = await emitRequiredTriageGate(
|
||||
"trusted-agent-source-gate",
|
||||
"pass",
|
||||
"both triage-decider and rubber-duck are SF-shipped built-ins",
|
||||
);
|
||||
if (trustGateFailure) return trustGateFailure;
|
||||
|
||||
// Custom-runner guard (codex review follow-up): an injected agentRunner
|
||||
// can side-channel-mutate the ledger despite the read-only tool override.
|
||||
// Only allow it when allowUntrustedRunner is explicitly set (test path).
|
||||
if (options.agentRunner && !options.allowUntrustedRunner) {
|
||||
// Same trust contract as missing-agent / non-builtin source: the
|
||||
// run cannot guarantee built-in behavior, so it's a failure of the
|
||||
// trusted-agent-source-gate, surfaced through status uok.
|
||||
const gateFailure = await emitRequiredTriageGate(
|
||||
"trusted-agent-source-gate",
|
||||
"fail",
|
||||
"runTriageApply: custom agentRunner injected without allowUntrustedRunner; production callers cannot bypass the built-in agent contract",
|
||||
);
|
||||
if (gateFailure) return gateFailure;
|
||||
await emit("triage-apply-failed", { reason: "untrusted-runner" });
|
||||
return {
|
||||
ok: false,
|
||||
|
|
@ -591,6 +736,13 @@ export async function runTriageApply(
|
|||
options.expectedIds,
|
||||
);
|
||||
if (!parseResult.plan) {
|
||||
const gateFailure = await emitRequiredTriageGate(
|
||||
"triage-plan-validation-gate",
|
||||
"fail",
|
||||
parseResult.error ?? "decider produced an unparseable plan",
|
||||
{ parseError: parseResult.error ?? null },
|
||||
);
|
||||
if (gateFailure) return gateFailure;
|
||||
await emit("triage-apply-failed", {
|
||||
reason: "no-plan",
|
||||
parseError: parseResult.error,
|
||||
|
|
@ -607,6 +759,13 @@ export async function runTriageApply(
|
|||
};
|
||||
}
|
||||
const plan = parseResult.plan;
|
||||
const validationGateFailure = await emitRequiredTriageGate(
|
||||
"triage-plan-validation-gate",
|
||||
"pass",
|
||||
`decider plan parsed cleanly: ${plan.length} decisions`,
|
||||
{ decisionCount: plan.length },
|
||||
);
|
||||
if (validationGateFailure) return validationGateFailure;
|
||||
await emit("triage-apply-plan-parsed", {
|
||||
decisionCount: plan.length,
|
||||
outcomes: plan.reduce<Record<string, number>>((acc, d) => {
|
||||
|
|
@ -645,6 +804,13 @@ export async function runTriageApply(
|
|||
},
|
||||
);
|
||||
if (!review.ok) {
|
||||
const gateFailure = await emitRequiredTriageGate(
|
||||
"triage-apply-review-gate",
|
||||
"manual-attention",
|
||||
"rubber-duck subagent failed to complete; review pending operator",
|
||||
{ exitCode: review.exitCode ?? null },
|
||||
);
|
||||
if (gateFailure) return gateFailure;
|
||||
await emit("triage-apply-failed", { reason: "rubber-duck-failed" });
|
||||
return {
|
||||
ok: false,
|
||||
|
|
@ -660,6 +826,12 @@ export async function runTriageApply(
|
|||
// Disagreement is a clean pause, not a failure. The plan and the
|
||||
// review are both persisted in the decision report; the operator
|
||||
// can read both and act.
|
||||
const gateFailure = await emitRequiredTriageGate(
|
||||
"triage-apply-review-gate",
|
||||
"fail",
|
||||
"rubber-duck disagreed with the proposed plan; no mutations applied",
|
||||
);
|
||||
if (gateFailure) return gateFailure;
|
||||
return {
|
||||
ok: false,
|
||||
agreed: false,
|
||||
|
|
@ -670,6 +842,12 @@ export async function runTriageApply(
|
|||
flowId,
|
||||
};
|
||||
}
|
||||
const reviewGateFailure = await emitRequiredTriageGate(
|
||||
"triage-apply-review-gate",
|
||||
"pass",
|
||||
"rubber-duck agreed with the proposed plan; apply phase proceeds",
|
||||
);
|
||||
if (reviewGateFailure) return reviewGateFailure;
|
||||
|
||||
// Phase 3: apply the plan. We (this runner) call markResolved for
|
||||
// each close/promote decision; fix decisions get surfaced for the
|
||||
|
|
@ -682,6 +860,36 @@ export async function runTriageApply(
|
|||
// partial failure (rejectedIds) so they can investigate.
|
||||
const approvedMutationCount = plan.filter((d) => d.outcome !== "fix").length;
|
||||
const hasFailures = applyResult.rejectedIds.length > 0;
|
||||
|
||||
// triage-apply-mutation-gate (codex review 2026-05-14 follow-up on
|
||||
// slice 3a): observability for the post-review apply phase. pass when
|
||||
// every approved mutation landed; fail when any rejected; manual-
|
||||
// attention when there were no approved mutations to apply (all
|
||||
// decisions were "fix" handoffs, which require operator action).
|
||||
const mutationOutcome: "pass" | "fail" | "manual-attention" =
|
||||
approvedMutationCount === 0
|
||||
? "manual-attention"
|
||||
: hasFailures
|
||||
? "fail"
|
||||
: "pass";
|
||||
const mutationRationale =
|
||||
approvedMutationCount === 0
|
||||
? `no approved mutations: ${applyResult.pendingFixIds.length} fix decisions await operator`
|
||||
: hasFailures
|
||||
? `${applyResult.rejectedIds.length} of ${approvedMutationCount} approved mutations failed: ${applyResult.rejectedIds.join(", ")}`
|
||||
: `${applyResult.resolvedIds.length} of ${approvedMutationCount} approved mutations applied cleanly`;
|
||||
const mutationGateFailure = await emitRequiredTriageGate(
|
||||
"triage-apply-mutation-gate",
|
||||
mutationOutcome,
|
||||
mutationRationale,
|
||||
{
|
||||
resolvedCount: applyResult.resolvedIds.length,
|
||||
rejectedCount: applyResult.rejectedIds.length,
|
||||
pendingFixCount: applyResult.pendingFixIds.length,
|
||||
},
|
||||
);
|
||||
if (mutationGateFailure) return mutationGateFailure;
|
||||
|
||||
return {
|
||||
ok: !hasFailures,
|
||||
agreed: true,
|
||||
|
|
|
|||
|
|
@ -44,23 +44,23 @@ function sfExtensionPath(moduleName: string): string {
|
|||
*
|
||||
* Slice 1 (UOK control-plane plan, 2026-05-14) introduces this field so
|
||||
* operators can distinguish gates that should be paying attention to from
|
||||
* those that don't yet have the new metadata. Each value's contract:
|
||||
* those that don't yet have the UOK run context. Each value's contract:
|
||||
*
|
||||
* - "ok" Gate has schema-v2 metadata AND recent runs in the
|
||||
* - "ok" Gate has schema-v2 run context AND recent runs in the
|
||||
* window. Healthy.
|
||||
* - "stale" Gate has prior runs but nothing in the last 24h.
|
||||
* Suggests something stopped exercising it.
|
||||
* - "incomplete" Gate has schema-v2 records but is missing required
|
||||
* metadata (surface / runControl / permissionProfile /
|
||||
* run-context fields (surface / runControl / permissionProfile /
|
||||
* traceId). Used when future slices start writing
|
||||
* schema-v2 rows; never assigned to legacy rows.
|
||||
* - "missing" Gate is configured/expected but has zero recent runs.
|
||||
* Requires a configured-gate registry to detect; future
|
||||
* slice work, not slice 1.
|
||||
* - "legacy" Gate row predates schema-v2 metadata. NOT a warning —
|
||||
* - "legacy" Gate row predates schema-v2 run context. NOT a warning —
|
||||
* operators are not paged for the rich history of pre-v2
|
||||
* records. Future slices migrate these as the writer
|
||||
* paths emit complete metadata.
|
||||
* paths emit complete run context.
|
||||
*
|
||||
* Slice 1 only populates "ok" / "stale" / "legacy". "incomplete" and
|
||||
* "missing" wait for the schema-v2 writer adapter (slice 2) and the
|
||||
|
|
@ -93,7 +93,7 @@ export interface UokStatusResult {
|
|||
}
|
||||
|
||||
/**
|
||||
* A row is "legacy" when it lacks the schema-v2 metadata that the writer
|
||||
* A row is "legacy" when it lacks the schema-v2 UOK run context that the writer
|
||||
* adapter (slice 2 of the UOK control-plane plan) populates. Surface is
|
||||
* the canonical indicator: NULL → legacy, set → schema-v2 row that
|
||||
* should be classified ok/stale/incomplete based on the other fields.
|
||||
|
|
@ -106,27 +106,27 @@ export interface UokStatusResult {
|
|||
* full required set, and the classifier surfaces that so operators can
|
||||
* find the buggy writer.
|
||||
*/
|
||||
interface GateMetadataRow {
|
||||
interface GateRunContextRow {
|
||||
surface: string | null;
|
||||
runControl: string | null;
|
||||
permissionProfile: string | null;
|
||||
traceId: string | null;
|
||||
}
|
||||
|
||||
function hasSchemaV2Metadata(meta: GateMetadataRow): boolean {
|
||||
return typeof meta.surface === "string" && meta.surface.length > 0;
|
||||
function hasSchemaV2RunContext(context: GateRunContextRow): boolean {
|
||||
return typeof context.surface === "string" && context.surface.length > 0;
|
||||
}
|
||||
|
||||
function isSchemaV2Complete(meta: GateMetadataRow): boolean {
|
||||
function isSchemaV2RunContextComplete(context: GateRunContextRow): boolean {
|
||||
return (
|
||||
typeof meta.surface === "string" &&
|
||||
meta.surface.length > 0 &&
|
||||
typeof meta.runControl === "string" &&
|
||||
meta.runControl.length > 0 &&
|
||||
typeof meta.permissionProfile === "string" &&
|
||||
meta.permissionProfile.length > 0 &&
|
||||
typeof meta.traceId === "string" &&
|
||||
meta.traceId.length > 0
|
||||
typeof context.surface === "string" &&
|
||||
context.surface.length > 0 &&
|
||||
typeof context.runControl === "string" &&
|
||||
context.runControl.length > 0 &&
|
||||
typeof context.permissionProfile === "string" &&
|
||||
context.permissionProfile.length > 0 &&
|
||||
typeof context.traceId === "string" &&
|
||||
context.traceId.length > 0
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -134,12 +134,12 @@ const STALE_THRESHOLD_MS = 24 * 60 * 60 * 1000;
|
|||
|
||||
function classifyCoverage(
|
||||
entry: Omit<GateHealthEntry, "coverageStatus">,
|
||||
meta: GateMetadataRow,
|
||||
context: GateRunContextRow,
|
||||
): GateCoverageStatus {
|
||||
if (!hasSchemaV2Metadata(meta)) return "legacy";
|
||||
if (!isSchemaV2Complete(meta)) return "incomplete";
|
||||
if (!hasSchemaV2RunContext(context)) return "legacy";
|
||||
if (!isSchemaV2RunContextComplete(context)) return "incomplete";
|
||||
if (entry.total === 0) {
|
||||
// Has metadata but no runs in window. If we ever saw a run, it's
|
||||
// Has run context but no runs in window. If we ever saw a run, it's
|
||||
// stale; otherwise it's never run (caller will mark "missing" when
|
||||
// a configured-gate registry confirms it was expected). For slice
|
||||
// 1+2, no registry exists, so the safer default is "stale".
|
||||
|
|
@ -228,13 +228,85 @@ export async function handleUokStatus(
|
|||
|
||||
const gateIds: string[] = gatesDbModule.getDistinctGateIds();
|
||||
|
||||
// Fetch scope, last-evaluated, and schema-v2 metadata from
|
||||
// quality_gates DB for each gate. Picks the most-recent row's
|
||||
// metadata (MAX(evaluated_at)) so the classifier sees current
|
||||
// schema-v2 status rather than oldest. Returns null fields when
|
||||
// no row exists or the columns haven't been migrated yet.
|
||||
// Fetch scope, last-evaluated, and schema-v2 run context for each
|
||||
// gate. Slice 3a (UOK control-plane plan): run context can land in
|
||||
// trace events from headless flows that have no quality_gates
|
||||
// parent row (e.g. triage --apply gates, which are flow-scoped
|
||||
// rather than slice-scoped). Trace events win when present; the
|
||||
// quality_gates DB is consulted as a fallback for slice-scoped
|
||||
// rows that may have rotated out of the trace window.
|
||||
const sfDbModule = (await jiti.import(sfExtensionPath("sf-db"), {})) as any;
|
||||
interface GateMetaQuery {
|
||||
const traceWriterModule = (await jiti.import(
|
||||
sfExtensionPath("uok/trace-writer"),
|
||||
)) as {
|
||||
readTraceEvents: (
|
||||
basePath: string,
|
||||
type: string,
|
||||
windowHours?: number,
|
||||
) => Array<Record<string, unknown>>;
|
||||
};
|
||||
// Read the gate-run trace events once and bucket by gateId so
|
||||
// per-gate getGateMeta calls are O(1) lookup instead of repeated
|
||||
// linear scans of every event.
|
||||
const traceEvents = (() => {
|
||||
try {
|
||||
return traceWriterModule.readTraceEvents?.(
|
||||
basePath,
|
||||
"gate_run",
|
||||
24 * 30,
|
||||
) ?? [];
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
})();
|
||||
const latestTraceContextByGate = new Map<
|
||||
string,
|
||||
{
|
||||
scope?: string;
|
||||
evaluatedAt?: string;
|
||||
surface?: string;
|
||||
runControl?: string;
|
||||
permissionProfile?: string;
|
||||
traceId?: string;
|
||||
}
|
||||
>();
|
||||
for (const ev of traceEvents) {
|
||||
const gateId =
|
||||
typeof ev.gateId === "string" ? (ev.gateId as string) : null;
|
||||
if (!gateId) continue;
|
||||
const evaluatedAt =
|
||||
typeof ev.evaluatedAt === "string"
|
||||
? (ev.evaluatedAt as string)
|
||||
: typeof ev.ts === "string"
|
||||
? (ev.ts as string)
|
||||
: undefined;
|
||||
const prev = latestTraceContextByGate.get(gateId);
|
||||
if (
|
||||
prev &&
|
||||
prev.evaluatedAt &&
|
||||
evaluatedAt &&
|
||||
prev.evaluatedAt >= evaluatedAt
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
latestTraceContextByGate.set(gateId, {
|
||||
scope: typeof ev.scope === "string" ? (ev.scope as string) : undefined,
|
||||
evaluatedAt,
|
||||
surface:
|
||||
typeof ev.surface === "string" ? (ev.surface as string) : undefined,
|
||||
runControl:
|
||||
typeof ev.runControl === "string"
|
||||
? (ev.runControl as string)
|
||||
: undefined,
|
||||
permissionProfile:
|
||||
typeof ev.permissionProfile === "string"
|
||||
? (ev.permissionProfile as string)
|
||||
: undefined,
|
||||
traceId:
|
||||
typeof ev.traceId === "string" ? (ev.traceId as string) : undefined,
|
||||
});
|
||||
}
|
||||
interface GateRunContextQuery {
|
||||
scope: string;
|
||||
lastEvaluatedAt: string | null;
|
||||
surface: string | null;
|
||||
|
|
@ -242,8 +314,8 @@ export async function handleUokStatus(
|
|||
permissionProfile: string | null;
|
||||
traceId: string | null;
|
||||
}
|
||||
const getGateMeta = (id: string): GateMetaQuery => {
|
||||
const empty: GateMetaQuery = {
|
||||
const getGateRunContext = (id: string): GateRunContextQuery => {
|
||||
const empty: GateRunContextQuery = {
|
||||
scope: "unknown",
|
||||
lastEvaluatedAt: null,
|
||||
surface: null,
|
||||
|
|
@ -251,49 +323,77 @@ export async function handleUokStatus(
|
|||
permissionProfile: null,
|
||||
traceId: null,
|
||||
};
|
||||
const trace = latestTraceContextByGate.get(id);
|
||||
let dbContext: GateRunContextQuery | null = null;
|
||||
try {
|
||||
const db = sfDbModule._getAdapter?.() ?? null;
|
||||
if (!db) return empty;
|
||||
const row = db
|
||||
.prepare(
|
||||
`SELECT scope, evaluated_at, surface, run_control,
|
||||
permission_profile, trace_id
|
||||
FROM quality_gates
|
||||
WHERE gate_id = ?
|
||||
ORDER BY evaluated_at IS NULL, evaluated_at DESC
|
||||
LIMIT 1`,
|
||||
)
|
||||
.get(id);
|
||||
return {
|
||||
scope: row?.scope ?? "unknown",
|
||||
lastEvaluatedAt: row?.evaluated_at ?? null,
|
||||
surface: row?.surface ?? null,
|
||||
runControl: row?.run_control ?? null,
|
||||
permissionProfile: row?.permission_profile ?? null,
|
||||
traceId: row?.trace_id ?? null,
|
||||
};
|
||||
if (db) {
|
||||
const row = db
|
||||
.prepare(
|
||||
`SELECT scope, evaluated_at, surface, run_control,
|
||||
permission_profile, trace_id
|
||||
FROM quality_gates
|
||||
WHERE gate_id = ?
|
||||
ORDER BY evaluated_at IS NULL, evaluated_at DESC
|
||||
LIMIT 1`,
|
||||
)
|
||||
.get(id);
|
||||
if (row) {
|
||||
dbContext = {
|
||||
scope: row.scope ?? "unknown",
|
||||
lastEvaluatedAt: row.evaluated_at ?? null,
|
||||
surface: row.surface ?? null,
|
||||
runControl: row.run_control ?? null,
|
||||
permissionProfile: row.permission_profile ?? null,
|
||||
traceId: row.trace_id ?? null,
|
||||
};
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
return empty;
|
||||
// DB read failure leaves dbContext null; trace can still win.
|
||||
}
|
||||
// Precedence: pick whichever source has the later evaluatedAt
|
||||
// (codex review 2026-05-14 follow-up on slice 3a). Tie-breaker:
|
||||
// prefer trace because flow-scoped gates with no quality_gates
|
||||
// FK row are trace-only. When neither has context, return empty.
|
||||
const traceTs = trace?.evaluatedAt ?? null;
|
||||
const dbTs = dbContext?.lastEvaluatedAt ?? null;
|
||||
const traceWins =
|
||||
trace &&
|
||||
(dbContext === null ||
|
||||
(traceTs !== null && dbTs !== null && traceTs >= dbTs) ||
|
||||
(traceTs !== null && dbTs === null) ||
|
||||
(traceTs === null && dbTs === null));
|
||||
if (traceWins && trace) {
|
||||
return {
|
||||
scope: trace.scope ?? dbContext?.scope ?? "unknown",
|
||||
lastEvaluatedAt: traceTs,
|
||||
surface: trace.surface ?? null,
|
||||
runControl: trace.runControl ?? null,
|
||||
permissionProfile: trace.permissionProfile ?? null,
|
||||
traceId: trace.traceId ?? null,
|
||||
};
|
||||
}
|
||||
return dbContext ?? empty;
|
||||
};
|
||||
|
||||
gates = gateIds.map((id: string) => {
|
||||
const stats = gatesDbModule.getGateRunStats(id, 24);
|
||||
const cb = gatesDbModule.getGateCircuitBreaker(id);
|
||||
const meta = getGateMeta(id);
|
||||
const runContext = getGateRunContext(id);
|
||||
const base = {
|
||||
id,
|
||||
scope: meta.scope,
|
||||
scope: runContext.scope,
|
||||
total: stats.total ?? 0,
|
||||
pass: stats.pass ?? 0,
|
||||
fail: stats.fail ?? 0,
|
||||
retry: stats.retry ?? 0,
|
||||
// prefer stats window result; fall back to quality_gates last entry
|
||||
lastEvaluatedAt: stats.lastEvaluatedAt ?? meta.lastEvaluatedAt,
|
||||
lastEvaluatedAt: stats.lastEvaluatedAt ?? runContext.lastEvaluatedAt,
|
||||
circuitBreaker: cb?.state ?? "closed",
|
||||
failureStreak: cb?.failureStreak ?? 0,
|
||||
};
|
||||
const coverageStatus = classifyCoverage(base, meta);
|
||||
const coverageStatus = classifyCoverage(base, runContext);
|
||||
return { ...base, coverageStatus } satisfies GateHealthEntry;
|
||||
});
|
||||
} catch (err) {
|
||||
|
|
|
|||
|
|
@ -46,8 +46,8 @@ export function insertGateRow(g) {
|
|||
export function saveGateResult(g) {
|
||||
const currentDb = _getAdapter();
|
||||
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
|
||||
// Schema-v2 metadata is updated only when the caller supplies a valid
|
||||
// canonical context. Existing rows keep their metadata (or stay legacy/
|
||||
// Schema-v2 UOK run context is updated only when the caller supplies a valid
|
||||
// canonical context. Existing rows keep their context (or stay legacy/
|
||||
// null) when no context is provided. COALESCE semantics on each column
|
||||
// mean "only overwrite when non-null" — so a legacy row stays legacy
|
||||
// unless this update brings a full schema-v2 ctx.
|
||||
|
|
@ -415,6 +415,11 @@ export function getGateLatencyStats(gateId, windowHours = 24) {
|
|||
}
|
||||
|
||||
export function getDistinctGateIds() {
|
||||
// UNION trace-event IDs with quality_gates DB IDs (codex review 2026-05-14
|
||||
// follow-up on slice 3a). The previous implementation returned trace IDs
|
||||
// only when any existed, silently hiding DB-only slice-scoped gates the
|
||||
// moment any flow-scoped trace landed. Status uok must show both.
|
||||
const ids = new Set();
|
||||
try {
|
||||
const currentPath = getDbPath();
|
||||
const basePath =
|
||||
|
|
@ -422,9 +427,15 @@ export function getDistinctGateIds() {
|
|||
? dirname(dirname(currentPath))
|
||||
: process.cwd();
|
||||
const events = readTraceEvents(basePath, "gate_run", 24 * 30); // 30 days
|
||||
const traceIds = [...new Set(events.map((e) => e.gateId).filter(Boolean))];
|
||||
if (traceIds.length > 0) return traceIds;
|
||||
// Fall back to quality_gates DB when no trace events found
|
||||
for (const ev of events) {
|
||||
if (typeof ev.gateId === "string" && ev.gateId.length > 0) {
|
||||
ids.add(ev.gateId);
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Trace read failure is non-fatal — DB fallback below still runs.
|
||||
}
|
||||
try {
|
||||
const db = _getAdapter();
|
||||
if (db) {
|
||||
const rows = db
|
||||
|
|
@ -432,12 +443,16 @@ export function getDistinctGateIds() {
|
|||
"SELECT DISTINCT gate_id FROM quality_gates WHERE gate_id != '' ORDER BY gate_id",
|
||||
)
|
||||
.all();
|
||||
return rows.map((r) => r.gate_id);
|
||||
for (const r of rows) {
|
||||
if (typeof r.gate_id === "string" && r.gate_id.length > 0) {
|
||||
ids.add(r.gate_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
return [];
|
||||
} catch {
|
||||
return [];
|
||||
// DB read failure is non-fatal — trace results above stand.
|
||||
}
|
||||
return Array.from(ids).sort();
|
||||
}
|
||||
|
||||
export function upsertQualityGate(g) {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/**
|
||||
* uok/run-context.js — UOK schema-v2 run-context adapter.
|
||||
*
|
||||
* Purpose: normalize existing runtime/headless metadata into the shape
|
||||
* Purpose: normalize existing runtime/headless run context into the shape
|
||||
* the UOK control plane expects (schema-v2 fields surface, runControl,
|
||||
* permissionProfile, traceId, parentTrace, plus the already-existing
|
||||
* unitType/unitId/milestoneId/sliceId). The adapter is intentionally
|
||||
|
|
|
|||
|
|
@ -33,25 +33,42 @@ function tracePath(basePath, traceId) {
|
|||
export function appendTraceEvent(basePath, traceId, event) {
|
||||
if (!basePath || !traceId) return;
|
||||
try {
|
||||
const dir = tracesDir(basePath);
|
||||
mkdirSync(dir, { recursive: true });
|
||||
const path = tracePath(basePath, traceId);
|
||||
const line = JSON.stringify({ ts: new Date().toISOString(), ...event });
|
||||
if (!existsSync(path)) closeSync(openSync(path, "a"));
|
||||
appendFileSync(path, `${line}\n`, "utf-8");
|
||||
// Update latest symlink
|
||||
const latestPath = join(dir, "latest");
|
||||
try {
|
||||
unlinkSync(latestPath);
|
||||
} catch {
|
||||
/* ok if missing */
|
||||
}
|
||||
symlinkSync(`${traceId}.jsonl`, latestPath);
|
||||
appendTraceEventRequired(basePath, traceId, event);
|
||||
} catch {
|
||||
// trace writes must never break orchestration
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Append a trace event and throw when it cannot be persisted.
|
||||
*
|
||||
* Purpose: let UOK control-plane flows fail closed when their required gate
|
||||
* record cannot be written. Best-effort orchestration should keep using
|
||||
* appendTraceEvent; supervised mutation paths use this function so UOK is not
|
||||
* silently bypassed.
|
||||
*
|
||||
* Consumer: headless triage --apply gate emission.
|
||||
*/
|
||||
export function appendTraceEventRequired(basePath, traceId, event) {
|
||||
if (!basePath || !traceId) {
|
||||
throw new Error("appendTraceEventRequired requires basePath and traceId");
|
||||
}
|
||||
const dir = tracesDir(basePath);
|
||||
mkdirSync(dir, { recursive: true });
|
||||
const path = tracePath(basePath, traceId);
|
||||
const line = JSON.stringify({ ts: new Date().toISOString(), ...event });
|
||||
if (!existsSync(path)) closeSync(openSync(path, "a"));
|
||||
appendFileSync(path, `${line}\n`, "utf-8");
|
||||
// Update latest symlink
|
||||
const latestPath = join(dir, "latest");
|
||||
try {
|
||||
unlinkSync(latestPath);
|
||||
} catch {
|
||||
/* ok if missing */
|
||||
}
|
||||
symlinkSync(`${traceId}.jsonl`, latestPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Prune .sf/traces/*.jsonl files older than retentionDays.
|
||||
*
|
||||
|
|
|
|||
182
src/tests/headless-triage-uok-gates.test.ts
Normal file
182
src/tests/headless-triage-uok-gates.test.ts
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
/**
|
||||
* headless-triage-uok-gates.test.ts — verify slice 3a of "Make UOK the SF
|
||||
* Control Plane": runTriageApply emits gate_run trace events with schema-v2
|
||||
* metadata for the three triage gates.
|
||||
*
|
||||
* Test contract: each decision point (trusted-source check, plan-validation,
|
||||
* rubber-duck review) writes exactly one gate_run trace event with
|
||||
* surface=headless, runControl=supervised, permissionProfile=high, traceId=
|
||||
* the flowId. The outcome reflects the decision (pass/fail/manual-attention).
|
||||
*/
|
||||
import { mkdirSync, mkdtempSync, readFileSync, readdirSync, rmSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, test } from "vitest";
|
||||
import { runTriageApply } from "../headless-triage.js";
|
||||
|
||||
const deciderPlan = [
|
||||
"```yaml",
|
||||
"decisions:",
|
||||
" - id: sf-test-1",
|
||||
" outcome: close",
|
||||
" evidence_kind: human-clear",
|
||||
" reason: stale",
|
||||
"```",
|
||||
"Self-feedback triage complete.",
|
||||
].join("\n");
|
||||
|
||||
const tempDirs: string[] = [];
|
||||
let originalAgentDir: string | undefined;
|
||||
|
||||
function makeProject(): string {
|
||||
const dir = mkdtempSync(join(tmpdir(), "sf-triage-uok-gates-"));
|
||||
tempDirs.push(dir);
|
||||
mkdirSync(join(dir, ".sf"), { recursive: true });
|
||||
return dir;
|
||||
}
|
||||
|
||||
function readTraceEvents(project: string): Array<Record<string, unknown>> {
|
||||
const dir = join(project, ".sf", "traces");
|
||||
let files: string[];
|
||||
try {
|
||||
files = readdirSync(dir).filter(
|
||||
(f) => f.endsWith(".jsonl") && !f.startsWith("latest"),
|
||||
);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
const events: Array<Record<string, unknown>> = [];
|
||||
for (const f of files) {
|
||||
const content = readFileSync(join(dir, f), "utf-8");
|
||||
for (const line of content.split("\n")) {
|
||||
if (!line.trim()) continue;
|
||||
try {
|
||||
events.push(JSON.parse(line));
|
||||
} catch {
|
||||
// skip malformed lines
|
||||
}
|
||||
}
|
||||
}
|
||||
return events;
|
||||
}
|
||||
|
||||
function gateRunsByGateId(
|
||||
project: string,
|
||||
): Map<string, Array<Record<string, unknown>>> {
|
||||
const events = readTraceEvents(project).filter(
|
||||
(e) => e.type === "gate_run",
|
||||
);
|
||||
const map = new Map<string, Array<Record<string, unknown>>>();
|
||||
for (const ev of events) {
|
||||
const gid = typeof ev.gateId === "string" ? (ev.gateId as string) : null;
|
||||
if (!gid) continue;
|
||||
if (!map.has(gid)) map.set(gid, []);
|
||||
map.get(gid)!.push(ev);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
originalAgentDir = process.env.SF_CODING_AGENT_DIR;
|
||||
const dir = mkdtempSync(join(tmpdir(), "sf-triage-uok-gates-agent-"));
|
||||
tempDirs.push(dir);
|
||||
process.env.SF_CODING_AGENT_DIR = dir;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (originalAgentDir === undefined) delete process.env.SF_CODING_AGENT_DIR;
|
||||
else process.env.SF_CODING_AGENT_DIR = originalAgentDir;
|
||||
while (tempDirs.length > 0) {
|
||||
rmSync(tempDirs.pop()!, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
describe("runTriageApply emits gate_run trace events with schema-v2 metadata", () => {
|
||||
test("agree_path_emits_three_pass_gates_with_uok_v2_metadata", async () => {
|
||||
const project = makeProject();
|
||||
const result = await runTriageApply(project, "triage prompt", {
|
||||
candidateCount: 1,
|
||||
allowUntrustedRunner: true,
|
||||
agentRunner: async (agent) => {
|
||||
if (agent.name === "triage-decider") {
|
||||
return { ok: true, output: deciderPlan, exitCode: 0 };
|
||||
}
|
||||
return { ok: true, output: "rubber-duck: agree", exitCode: 0 };
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.agreed).toBe(true);
|
||||
|
||||
const gates = gateRunsByGateId(project);
|
||||
const trustEvents = gates.get("trusted-agent-source-gate") ?? [];
|
||||
const validEvents = gates.get("triage-plan-validation-gate") ?? [];
|
||||
const reviewEvents = gates.get("triage-apply-review-gate") ?? [];
|
||||
|
||||
expect(trustEvents).toHaveLength(1);
|
||||
expect(trustEvents[0].outcome).toBe("pass");
|
||||
expect(trustEvents[0].surface).toBe("headless");
|
||||
expect(trustEvents[0].runControl).toBe("supervised");
|
||||
expect(trustEvents[0].permissionProfile).toBe("high");
|
||||
expect(typeof trustEvents[0].traceId).toBe("string");
|
||||
expect((trustEvents[0].traceId as string).startsWith("triage-apply-")).toBe(
|
||||
true,
|
||||
);
|
||||
|
||||
expect(validEvents).toHaveLength(1);
|
||||
expect(validEvents[0].outcome).toBe("pass");
|
||||
expect(validEvents[0].surface).toBe("headless");
|
||||
|
||||
expect(reviewEvents).toHaveLength(1);
|
||||
expect(reviewEvents[0].outcome).toBe("pass");
|
||||
expect(reviewEvents[0].surface).toBe("headless");
|
||||
});
|
||||
|
||||
test("disagree_path_emits_review_gate_as_fail", async () => {
|
||||
const project = makeProject();
|
||||
await runTriageApply(project, "triage prompt", {
|
||||
allowUntrustedRunner: true,
|
||||
agentRunner: async (agent) => {
|
||||
if (agent.name === "triage-decider") {
|
||||
return { ok: true, output: deciderPlan, exitCode: 0 };
|
||||
}
|
||||
return {
|
||||
ok: true,
|
||||
output: "## Concern 1:\nbad close",
|
||||
exitCode: 0,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const gates = gateRunsByGateId(project);
|
||||
const reviewEvents = gates.get("triage-apply-review-gate") ?? [];
|
||||
expect(reviewEvents).toHaveLength(1);
|
||||
expect(reviewEvents[0].outcome).toBe("fail");
|
||||
expect(reviewEvents[0].surface).toBe("headless");
|
||||
});
|
||||
|
||||
test("unknown_id_in_plan_emits_validation_gate_fail_and_no_review_gate", async () => {
|
||||
const project = makeProject();
|
||||
await runTriageApply(project, "triage prompt", {
|
||||
candidateCount: 1,
|
||||
expectedIds: ["sf-expected-1"],
|
||||
allowUntrustedRunner: true,
|
||||
agentRunner: async () => ({
|
||||
ok: true,
|
||||
output: deciderPlan,
|
||||
exitCode: 0,
|
||||
}),
|
||||
});
|
||||
|
||||
const gates = gateRunsByGateId(project);
|
||||
const validEvents = gates.get("triage-plan-validation-gate") ?? [];
|
||||
const reviewEvents = gates.get("triage-apply-review-gate") ?? [];
|
||||
|
||||
expect(validEvents).toHaveLength(1);
|
||||
expect(validEvents[0].outcome).toBe("fail");
|
||||
expect((validEvents[0].rationale as string).toLowerCase()).toContain(
|
||||
"not in the candidate set",
|
||||
);
|
||||
// Review gate must NOT fire because plan validation blocked the flow.
|
||||
expect(reviewEvents).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue