singularity-forge/src/headless-triage.ts
2026-05-15 03:53:15 +02:00

1233 lines
40 KiB
TypeScript

/**
* headless-triage.ts — `sf headless triage`
*
* Purpose: operator-driven entry point for self-feedback triage. Lists open
* forge-local self-feedback entries (sorted by impact ↓ effort ↑ ts ↑) and
* either:
* - emits the canonical triage prompt (default): the same one the
* bootstrap session_start drain queues as a followUp, but rendered
* synchronously to stdout so operators can pipe it into any model
* (`sf headless triage | sf headless -p -`, or any external assistant)
* without depending on the autonomous-loop turn semantics that
* swallow the followUp when no other unit is dispatchable.
* - --list: human-readable candidate digest, no prompt — for scanning
* the queue at a glance.
* - --json: structured candidate list for tooling.
*
* Why this command exists (sf-mp4rxkwb-l4baga): the inline-fix worker
* currently delivers via `triggerTurn:true, deliverAs:"followUp"`. When
* autonomous mode bails at milestone validation before any turn runs,
* the followUp never lands and the queue stays unprocessed. This command
* gives operators a deterministic path to drain the queue today, ahead of
* the larger refactor that promotes triage to a real SF unit type.
*
* Consumer: headless.ts when command === "triage".
*/
import { randomUUID } from "node:crypto";
import { existsSync } from "node:fs";
import { join } from "node:path";
import { createJiti } from "@mariozechner/jiti";
import { runSubagent } from "@singularity-forge/coding-agent";
import { parse as parseYaml } from "yaml";
import { resolveBundledSourceResource } from "./bundled-resource-path.js";
import { getSfEnv } from "./env.js";
const jiti = createJiti(import.meta.filename, {
interopDefault: true,
debug: false,
});
function sfExtensionPath(moduleName: string): string {
const agentExtensionsDir = join(getSfEnv().agentDir, "extensions", "sf");
const useAgentDir = existsSync(join(agentExtensionsDir, "state.js"));
if (useAgentDir) return join(agentExtensionsDir, `${moduleName}.js`);
const tsPath = resolveBundledSourceResource(
import.meta.url,
"extensions",
"sf",
`${moduleName}.ts`,
);
if (existsSync(tsPath)) return tsPath;
return resolveBundledSourceResource(
import.meta.url,
"extensions",
"sf",
`${moduleName}.js`,
);
}
export interface HandleTriageOptions {
json?: boolean;
list?: boolean;
max?: number;
run?: boolean;
apply?: boolean;
model?: string;
agentRunner?: AgentRunner;
}
export interface HandleTriageResult {
exitCode: number;
}
interface TriageCandidate {
id: string;
kind: string;
severity: string;
summary: string;
ts: string;
impactScore?: number;
effortEstimate?: number;
}
interface AgentConfig {
name: string;
model?: string;
tools?: string[];
systemPrompt: string;
promptParts?: string[];
source?: string;
filePath?: string;
}
interface AgentRunResult {
ok: boolean;
output: string;
stderr?: string;
exitCode?: number;
}
type AgentRunner = (
agent: AgentConfig,
task: string,
options?: { tools?: string[]; model?: string; cwd?: string },
) => Promise<AgentRunResult>;
/**
* Triage-decider's output contract is a YAML fenced block with key
* `decisions:`. Parse it. Returns null when no plan is present or YAML
* fails to load — runTriageApply treats null as "do not apply" (safe
* default: when in doubt, never mutate).
*
* Why a structured plan instead of letting the decider call resolve_issue
* directly: codex review 2026-05-14 flagged that the original sequential
* design (decider → review-code) let the decider mutate state during its
* own turn, before review-code ever saw the decisions. This parser pulls
* the proposed actions out of the decider's text so they can be reviewed
* BEFORE any resolve_issue call.
*/
export interface TriageDecision {
id: string;
outcome: "fix" | "promote" | "close";
evidenceKind?: string;
reason?: string;
proposedApproach?: string;
requirementId?: string;
}
export interface ParseTriagePlanResult {
plan: TriageDecision[] | null;
error: string | null;
}
const COMPLETION_MARKER = "Self-feedback triage complete";
/**
* Parse a decider output into a strict decision plan, or return a
* structured error explaining what's wrong. Codex review 2026-05-14
* follow-up: refuse the whole plan if ANY item is malformed instead of
* silently dropping items — partial-trust on a triage plan is worse
* than no apply at all.
*
* Validates:
* 1. The completion marker is present (signals the decider finished).
* 2. A single fenced ```yaml block with key `decisions:` exists.
* 3. Every item has `id` (non-empty string) and `outcome` ∈
* {fix, promote, close}.
* 4. Outcome-specific required fields:
* - close → reason (non-empty) + evidence_kind (defaults to
* human-clear if omitted, but if provided must be
* a non-empty string).
* - promote → reason + requirement_id (non-empty strings).
* - fix → proposed_approach (non-empty).
* 5. If `expectedIds` is supplied (the candidate set the decider was
* shown), every decision id must be in that set and every expected
* id must have exactly one decision — no extras, no missing.
*/
export function parseTriagePlanStrict(
text: string,
expectedIds?: string[],
): ParseTriagePlanResult {
if (typeof text !== "string" || text.length === 0) {
return { plan: null, error: "decider output was empty" };
}
if (!text.includes(COMPLETION_MARKER)) {
return {
plan: null,
error: `decider output is missing the completion marker "${COMPLETION_MARKER}" — treating as incomplete`,
};
}
const fenceMatches = Array.from(
text.matchAll(/```ya?ml\s*\n([\s\S]*?)\n```/gi),
);
if (fenceMatches.length === 0) {
return {
plan: null,
error: "decider output has no fenced yaml block with the decision plan",
};
}
if (fenceMatches.length > 1) {
return {
plan: null,
error: `decider output has ${fenceMatches.length} fenced yaml blocks — the contract is exactly one`,
};
}
const yamlBody = fenceMatches[0][1];
let parsed: unknown;
try {
parsed = parseYaml(yamlBody);
} catch (err) {
return {
plan: null,
error: `decision plan failed to parse as yaml: ${
err instanceof Error ? err.message : String(err)
}`,
};
}
const root = parsed as Record<string, unknown> | null;
const decisions = root?.decisions;
if (!Array.isArray(decisions)) {
return {
plan: null,
error: "decision plan must have a top-level `decisions:` array",
};
}
if (decisions.length === 0) {
return {
plan: null,
error: "decision plan has zero decisions — nothing to apply",
};
}
const out: TriageDecision[] = [];
const seenIds = new Set<string>();
for (let i = 0; i < decisions.length; i++) {
const raw = decisions[i];
if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
return {
plan: null,
error: `decisions[${i}] is not an object`,
};
}
const item = raw as Record<string, unknown>;
const id = typeof item.id === "string" ? item.id.trim() : "";
if (!id) {
return {
plan: null,
error: `decisions[${i}] is missing a non-empty id`,
};
}
if (seenIds.has(id)) {
return {
plan: null,
error: `decisions[${i}] duplicates id "${id}" — each ledger entry must appear exactly once`,
};
}
seenIds.add(id);
const outcome = item.outcome;
if (outcome !== "fix" && outcome !== "promote" && outcome !== "close") {
return {
plan: null,
error: `decisions[${i}] (id=${id}) has invalid outcome "${outcome}" — expected fix|promote|close`,
};
}
const decision: TriageDecision = { id, outcome };
const reason = typeof item.reason === "string" ? item.reason.trim() : "";
const evidenceKindRaw =
typeof item.evidence_kind === "string"
? item.evidence_kind
: typeof item.evidenceKind === "string"
? (item.evidenceKind as string)
: "";
if (evidenceKindRaw.trim()) decision.evidenceKind = evidenceKindRaw.trim();
if (reason) decision.reason = reason;
const proposedApproachRaw =
typeof item.proposed_approach === "string"
? item.proposed_approach
: typeof item.proposedApproach === "string"
? (item.proposedApproach as string)
: "";
if (proposedApproachRaw.trim()) {
decision.proposedApproach = proposedApproachRaw.trim();
}
const requirementIdRaw =
typeof item.requirement_id === "string"
? item.requirement_id
: typeof item.requirementId === "string"
? (item.requirementId as string)
: "";
if (requirementIdRaw.trim()) {
decision.requirementId = requirementIdRaw.trim();
}
// Outcome-specific required fields.
if (outcome === "close" && !decision.reason) {
return {
plan: null,
error: `decisions[${i}] (id=${id}, outcome=close) is missing required field reason`,
};
}
if (outcome === "promote") {
if (!decision.reason) {
return {
plan: null,
error: `decisions[${i}] (id=${id}, outcome=promote) is missing required field reason`,
};
}
if (!decision.requirementId) {
return {
plan: null,
error: `decisions[${i}] (id=${id}, outcome=promote) is missing required field requirement_id`,
};
}
}
if (outcome === "fix" && !decision.proposedApproach) {
return {
plan: null,
error: `decisions[${i}] (id=${id}, outcome=fix) is missing required field proposed_approach`,
};
}
out.push(decision);
}
// Expected-id coverage check: if the caller knows which entries the
// decider was shown, ensure the plan covers them exactly.
if (Array.isArray(expectedIds) && expectedIds.length > 0) {
const expected = new Set(expectedIds);
for (const decision of out) {
if (!expected.has(decision.id)) {
return {
plan: null,
error: `decision id "${decision.id}" is not in the candidate set the decider was shown — possible hallucination`,
};
}
}
for (const id of expected) {
if (!seenIds.has(id)) {
return {
plan: null,
error: `candidate id "${id}" has no decision in the plan — incomplete coverage`,
};
}
}
}
return { plan: out, error: null };
}
/**
* Backwards-compat wrapper for the lenient parser used by existing tests
* and the test fixtures. Returns just the plan, no error explanation.
* Production callers should use parseTriagePlanStrict.
*/
export function parseTriagePlan(text: string): TriageDecision[] | null {
return parseTriagePlanStrict(text).plan;
}
/**
* Default per-agent timeout: 8 minutes. Long enough for a real LLM reasoning
* pass + tool calls; short enough that a hung gemini OAuth or stalled
* provider doesn't lock the whole triage flow indefinitely. Operators can
* override via SF_TRIAGE_AGENT_TIMEOUT_MS env var.
*
* The earlier version had no timeout at all — `defaultAgentRunner` waited
* forever on `proc.on("close")`, so a single hung subagent dispatch
* blocked the orchestrator until manual kill (observed 2026-05-14:
* 33-minute-stuck triage --apply caused by an unresponsive provider).
* The watchdog is now enforced inside `runSubagent` (SDK helper).
*/
const DEFAULT_AGENT_TIMEOUT_MS = (() => {
const fromEnv = Number.parseInt(
process.env.SF_TRIAGE_AGENT_TIMEOUT_MS ?? "",
10,
);
if (Number.isFinite(fromEnv) && fromEnv > 0) return fromEnv;
return 8 * 60 * 1000;
})();
async function defaultAgentRunner(
agent: AgentConfig,
task: string,
options: { tools?: string[]; model?: string; cwd?: string } = {},
): Promise<AgentRunResult> {
// Compose the system prompt via the prompt-parts registry. Dynamic
// import because src/resources/ is excluded from the root tsconfig
// (extensions get their own build). If the module isn't available
// fall back to the agent's raw systemPrompt — degrades gracefully.
const promptPartsModule = (await jiti.import(
sfExtensionPath("subagent/prompt-parts"),
)) as {
composeAgentPrompt?: (
agent: AgentConfig,
context: { cwd: string; surface: string; tools?: string[] },
) => string;
};
const composed =
promptPartsModule.composeAgentPrompt?.(agent, {
cwd: options.cwd ?? process.cwd(),
surface: "headless",
tools: options.tools ?? agent.tools,
}) ?? agent.systemPrompt;
const appendedPrompt = `${composed}\n\n## Task Input\n\n${task}`;
const result = await runSubagent(
{
systemPrompt: appendedPrompt,
model: options.model ?? agent.model,
tools: options.tools ?? agent.tools,
cwd: options.cwd ?? process.cwd(),
name: agent.name,
},
task,
{ timeoutMs: DEFAULT_AGENT_TIMEOUT_MS },
);
return {
ok: result.ok,
output: result.output,
stderr:
result.exitCode === 124
? `${agent.name} timed out after ${DEFAULT_AGENT_TIMEOUT_MS}ms (configure SF_TRIAGE_AGENT_TIMEOUT_MS to extend)`
: (result.stderr ?? ""),
exitCode: result.exitCode,
};
}
async function emitTriageApplyJournal(
cwd: string,
flowId: string,
seq: number,
eventType: string,
data: Record<string, unknown> = {},
): Promise<void> {
try {
const journalModule = (await jiti.import(sfExtensionPath("journal"))) as {
emitJournalEvent?: (
basePath: string,
entry: Record<string, unknown>,
) => void;
};
journalModule.emitJournalEvent?.(cwd, {
ts: new Date().toISOString(),
flowId,
seq,
eventType,
data,
});
} catch {
// Journal is best-effort; the apply result remains authoritative.
}
}
export interface RunTriageApplyResult {
ok: boolean;
agreed: boolean;
error?: string;
deciderOutput?: string;
reviewOutput?: string;
resolvedIds: string[];
rejectedIds?: string[];
pendingFixIds?: string[];
flowId: string;
}
export async function runTriageApply(
cwd: string,
prompt: string,
options: {
model?: string;
agentRunner?: AgentRunner;
candidateCount?: number;
// Expected ledger ids the decider was shown. When supplied, the
// strict plan parser refuses any plan that adds new ids or omits
// expected ones.
expectedIds?: string[];
// Test escape hatch. Production callers MUST NOT set this. Required
// to pass a custom agentRunner because an arbitrary runner could
// side-channel-mutate the ledger despite the read-only tool override
// the orchestrator enforces (codex review 2026-05-14 follow-up).
allowUntrustedRunner?: boolean;
} = {},
): Promise<RunTriageApplyResult> {
const flowId = `triage-apply-${randomUUID()}`;
let seq = 0;
const emit = (eventType: string, data: Record<string, unknown> = {}) =>
emitTriageApplyJournal(cwd, flowId, seq++, eventType, data);
// Slice 3a of "Make UOK the SF Control Plane": every triage --apply run
// emits three gate_run trace events (trusted-agent-source-gate,
// triage-plan-validation-gate, triage-apply-review-gate) with canonical
// UOK run context. status uok reads surface/runControl/... from these
// events and classifies coverage as "ok"/"stale"/"incomplete".
//
// Dynamic imports because src/resources is excluded from the root
// tsconfig (extensions have their own build). buildUokRunContext and
// appendTraceEventRequired lives there; loaded once per run.
const runContextModule = (await jiti.import(
sfExtensionPath("uok/run-context"),
)) as {
buildUokRunContext: (opts: Record<string, unknown>) => {
surface: string;
runControl: string;
permissionProfile: string;
traceId: string;
parentTrace?: string;
} | null;
};
const traceWriterModule = (await jiti.import(
sfExtensionPath("uok/trace-writer"),
)) as {
appendTraceEvent?: (
basePath: string,
traceId: string,
event: Record<string, unknown>,
) => void;
appendTraceEventRequired?: (
basePath: string,
traceId: string,
event: Record<string, unknown>,
) => void;
readTraceEvents?: (
basePath: string,
type: string,
windowHours?: number,
) => Array<Record<string, unknown>>;
};
// surface: "headless" - runTriageApply is always operator-invoked
// via sf headless triage --apply.
// runControl: "supervised" - the operator launched this command; it's
// not an autonomous-loop self-initiation.
// permissionProfile: "high" - --apply mutates the ledger, so the run
// must have write permission.
// traceId: flowId - already a UUID-stamped per-run id.
const uokContext = runContextModule.buildUokRunContext({
surface: "headless",
runControl: "supervised",
permissionProfile: "high",
traceId: flowId,
});
const emitTriageGate = (
gateId: string,
outcome: "pass" | "fail" | "manual-attention",
rationale: string,
extra: Record<string, unknown> = {},
): Error | null => {
if (!uokContext) {
return new Error("buildUokRunContext returned null for triage --apply");
}
const event = {
type: "gate_run",
traceId: uokContext.traceId,
turnId: `triage-apply:${gateId}`,
gateId,
gateType: "quality-gate",
outcome,
failureClass:
outcome === "fail"
? "policy"
: outcome === "manual-attention"
? "manual-attention"
: "none",
rationale,
attempt: 1,
maxAttempts: 1,
retryable: false,
evaluatedAt: new Date().toISOString(),
durationMs: 0,
// Canonical UOK run context. status uok reads these from
// trace events (slice 3a addition) so the gate
// classifies as "ok" without needing a quality_gates parent
// FK row to exist.
surface: uokContext.surface,
runControl: uokContext.runControl,
permissionProfile: uokContext.permissionProfile,
...extra,
};
try {
if (typeof traceWriterModule.appendTraceEventRequired === "function") {
traceWriterModule.appendTraceEventRequired(cwd, flowId, event);
} else if (typeof traceWriterModule.appendTraceEvent === "function") {
traceWriterModule.appendTraceEvent(cwd, flowId, event);
const persisted = traceWriterModule
.readTraceEvents?.(cwd, "gate_run", 24 * 30)
.some(
(ev) =>
ev.traceId === event.traceId &&
ev.turnId === event.turnId &&
ev.gateId === event.gateId,
);
if (!persisted) {
return new Error(
`legacy trace writer did not persist ${gateId} gate event`,
);
}
} else {
return new Error("trace writer does not expose appendTraceEvent");
}
return null;
} catch (err) {
return err instanceof Error ? err : new Error(String(err));
}
};
const emitRequiredTriageGate = async (
gateId: string,
outcome: "pass" | "fail" | "manual-attention",
rationale: string,
extra: Record<string, unknown> = {},
): Promise<RunTriageApplyResult | null> => {
const err = emitTriageGate(gateId, outcome, rationale, extra);
if (!err) return null;
await emit("triage-apply-failed", {
reason: "uok-gate-emission-failed",
gateId,
error: err.message,
});
return {
ok: false,
agreed: false,
error: `UOK gate emission failed for ${gateId}: ${err.message}`,
resolvedIds: [],
flowId,
};
};
await emit("triage-apply-start", {
candidateCount: options.candidateCount ?? null,
});
const agentsModule = (await jiti.import(
sfExtensionPath("subagent/agents"),
)) as {
discoverAgents?: (cwd: string, scope: string) => { agents: AgentConfig[] };
};
const agents = agentsModule.discoverAgents?.(cwd, "both").agents ?? [];
const triageDecider = agents.find((agent) => agent.name === "triage-decider");
const reviewCode = agents.find((agent) => agent.name === "review-code");
if (!triageDecider || !reviewCode) {
const missing = [
triageDecider ? null : "triage-decider",
reviewCode ? null : "review-code",
]
.filter(Boolean)
.join(", ");
// Missing agents is a trusted-source-gate failure cause too — the
// gate's contract is "both built-ins exist AND have source=builtin".
// Emit it so operators see the failure in status uok, not just in
// the triage-apply journal.
const gateFailure = await emitRequiredTriageGate(
"trusted-agent-source-gate",
"fail",
`required built-in agent(s) not discovered: ${missing}`,
{ missing },
);
if (gateFailure) return gateFailure;
await emit("triage-apply-failed", { reason: "missing-agent", missing });
return {
ok: false,
agreed: false,
error: `Missing built-in agent(s): ${missing}`,
resolvedIds: [],
flowId,
};
}
// Trusted-source guard (codex review 2026-05-14): when --apply will
// mutate the ledger, BOTH agents must be SF-shipped built-ins. A
// project-level override could silently disable review-code's
// independence. Operators can still customize behavior for inspect
// workflows, but --apply uses only the shipped review contract.
if (triageDecider.source !== "builtin" || reviewCode.source !== "builtin") {
const rationale = `non-builtin agents (triage-decider=${triageDecider.source}, review-code=${reviewCode.source})`;
const gateFailure = await emitRequiredTriageGate(
"trusted-agent-source-gate",
"fail",
rationale,
{
triageDeciderSource: triageDecider.source,
reviewCodeSource: reviewCode.source,
},
);
if (gateFailure) return gateFailure;
await emit("triage-apply-failed", {
reason: "untrusted-agent-source",
triageDeciderSource: triageDecider.source,
reviewCodeSource: reviewCode.source,
});
return {
ok: false,
agreed: false,
error: `Refusing to --apply with non-builtin agents (triage-decider=${triageDecider.source}, review-code=${reviewCode.source}). Use \`sf headless triage --run\` for a reviewable decision artifact, or remove the project/user override.`,
resolvedIds: [],
flowId,
};
}
const trustGateFailure = await emitRequiredTriageGate(
"trusted-agent-source-gate",
"pass",
"both triage-decider and review-code are SF-shipped built-ins",
);
if (trustGateFailure) return trustGateFailure;
// Custom-runner guard (codex review follow-up): an injected agentRunner
// can side-channel-mutate the ledger despite the read-only tool override.
// Only allow it when allowUntrustedRunner is explicitly set (test path).
if (options.agentRunner && !options.allowUntrustedRunner) {
// Same trust contract as missing-agent / non-builtin source: the
// run cannot guarantee built-in behavior, so it's a failure of the
// trusted-agent-source-gate, surfaced through status uok.
const gateFailure = await emitRequiredTriageGate(
"trusted-agent-source-gate",
"fail",
"runTriageApply: custom agentRunner injected without allowUntrustedRunner; production callers cannot bypass the built-in agent contract",
);
if (gateFailure) return gateFailure;
await emit("triage-apply-failed", { reason: "untrusted-runner" });
return {
ok: false,
agreed: false,
error:
"runTriageApply: a custom agentRunner was supplied without allowUntrustedRunner. Production callers cannot inject a runner — only tests can, via the explicit allowUntrustedRunner option.",
resolvedIds: [],
flowId,
};
}
const runner = options.agentRunner ?? defaultAgentRunner;
// Phase 1: triage-decider runs in PLAN-ONLY mode. Drop resolve_issue
// from its tool list (the YAML already drops it, but this is defense-
// in-depth in case a project override resurrects it). The decider
// emits a YAML decision plan; we parse it post-hoc.
const decider = await runner(triageDecider, prompt, {
model: options.model,
cwd,
tools: ["view", "grep", "glob", "git_log"],
});
await emit("triage-apply-decider-finished", {
ok: decider.ok,
exitCode: decider.exitCode ?? null,
});
if (!decider.ok) {
await emit("triage-apply-failed", { reason: "decider-failed" });
return {
ok: false,
agreed: false,
error: decider.stderr || "triage-decider failed",
deciderOutput: decider.output,
resolvedIds: [],
flowId,
};
}
// Parse the structured plan in strict mode. Refuses on any malformed
// item, missing completion marker, multiple yaml blocks, or mismatch
// against the expected candidate set (when supplied). Partial trust on
// a triage plan is worse than no apply at all.
const parseResult = parseTriagePlanStrict(
decider.output,
options.expectedIds,
);
if (!parseResult.plan) {
const gateFailure = await emitRequiredTriageGate(
"triage-plan-validation-gate",
"fail",
parseResult.error ?? "decider produced an unparseable plan",
{ parseError: parseResult.error ?? null },
);
if (gateFailure) return gateFailure;
await emit("triage-apply-failed", {
reason: "no-plan",
parseError: parseResult.error,
});
return {
ok: false,
agreed: false,
error: `triage-decider plan rejected: ${
parseResult.error ?? "unknown parse error"
}`,
deciderOutput: decider.output,
resolvedIds: [],
flowId,
};
}
const plan = parseResult.plan;
const validationGateFailure = await emitRequiredTriageGate(
"triage-plan-validation-gate",
"pass",
`decider plan parsed cleanly: ${plan.length} decisions`,
{ decisionCount: plan.length },
);
if (validationGateFailure) return validationGateFailure;
await emit("triage-apply-plan-parsed", {
decisionCount: plan.length,
outcomes: plan.reduce<Record<string, number>>((acc, d) => {
acc[d.outcome] = (acc[d.outcome] ?? 0) + 1;
return acc;
}, {}),
});
// Phase 2: review-code reviews the plan with read-only tools. The
// review task explicitly hands the plan as the artifact under
// scrutiny — the reviewer's job is to spot bad calls before they land.
const reviewTask = [
"Review this self-feedback triage decision PLAN. The plan has NOT yet been applied — your verdict gates whether any resolve_issue mutation runs.",
'Return "review-code: agree" only if every decision in the plan is safe and coherent against the current code/ledger state.',
"On disagreement, name each concerning decision explicitly so the operator (or a follow-up apply pass) can pull just those entries out and proceed with the rest.",
"",
"## Original triage prompt (the ledger entries the decider saw)",
prompt,
"",
"## triage-decider output (includes the plan as a fenced yaml block)",
decider.output,
].join("\n");
const review = await runner(reviewCode, reviewTask, {
model: options.model,
cwd,
tools: ["view", "grep", "glob", "git_log", "query_journal"],
});
const agreed = /^review-code:\s*agree\b/im.test(review.output.trim());
await emit(
agreed
? "triage-apply-review-code-agree"
: "triage-apply-review-code-disagree",
{
ok: review.ok,
exitCode: review.exitCode ?? null,
},
);
if (!review.ok) {
const gateFailure = await emitRequiredTriageGate(
"triage-apply-review-gate",
"manual-attention",
"review-code subagent failed to complete; review pending operator",
{ exitCode: review.exitCode ?? null },
);
if (gateFailure) return gateFailure;
await emit("triage-apply-failed", { reason: "review-code-failed" });
return {
ok: false,
agreed: false,
error: review.stderr || "review-code failed",
deciderOutput: decider.output,
reviewOutput: review.output,
resolvedIds: [],
flowId,
};
}
if (!agreed) {
// Disagreement is a clean pause, not a failure. The plan and the
// review are both persisted in the decision report; the operator
// can read both and act.
const gateFailure = await emitRequiredTriageGate(
"triage-apply-review-gate",
"fail",
"review-code disagreed with the proposed plan; no mutations applied",
);
if (gateFailure) return gateFailure;
return {
ok: false,
agreed: false,
error: "review-code disagreed — pausing for operator review",
deciderOutput: decider.output,
reviewOutput: review.output,
resolvedIds: [],
flowId,
};
}
const reviewGateFailure = await emitRequiredTriageGate(
"triage-apply-review-gate",
"pass",
"review-code agreed with the proposed plan; apply phase proceeds",
);
if (reviewGateFailure) return reviewGateFailure;
// Phase 3: apply the plan. We (this runner) call markResolved for
// each close/promote decision; fix decisions get surfaced for the
// operator but never auto-mutate. Mutations happen ONCE, post-review,
// and the resolvedIds we return reflect actual ledger state.
const applyResult = await applyTriagePlan(cwd, plan, emit);
// Per-decision failure surfacing (codex review follow-up): if any
// approved close/promote failed to apply, runTriageApply reports
// ok=false. Operator sees both partial success (resolvedIds) and
// partial failure (rejectedIds) so they can investigate.
const approvedMutationCount = plan.filter((d) => d.outcome !== "fix").length;
const hasFailures = applyResult.rejectedIds.length > 0;
// triage-apply-mutation-gate (codex review 2026-05-14 follow-up on
// slice 3a): observability for the post-review apply phase. pass when
// every approved mutation landed; fail when any rejected; manual-
// attention when there were no approved mutations to apply (all
// decisions were "fix" handoffs, which require operator action).
const mutationOutcome: "pass" | "fail" | "manual-attention" =
approvedMutationCount === 0
? "manual-attention"
: hasFailures
? "fail"
: "pass";
const mutationRationale =
approvedMutationCount === 0
? `no approved mutations: ${applyResult.pendingFixIds.length} fix decisions await operator`
: hasFailures
? `${applyResult.rejectedIds.length} of ${approvedMutationCount} approved mutations failed: ${applyResult.rejectedIds.join(", ")}`
: `${applyResult.resolvedIds.length} of ${approvedMutationCount} approved mutations applied cleanly`;
const mutationGateFailure = await emitRequiredTriageGate(
"triage-apply-mutation-gate",
mutationOutcome,
mutationRationale,
{
resolvedCount: applyResult.resolvedIds.length,
rejectedCount: applyResult.rejectedIds.length,
pendingFixCount: applyResult.pendingFixIds.length,
},
);
if (mutationGateFailure) return mutationGateFailure;
return {
ok: !hasFailures,
agreed: true,
error: hasFailures
? `${applyResult.rejectedIds.length} of ${approvedMutationCount} approved mutations failed: ${applyResult.rejectedIds.join(", ")}`
: undefined,
deciderOutput: decider.output,
reviewOutput: review.output,
resolvedIds: applyResult.resolvedIds,
rejectedIds: applyResult.rejectedIds,
pendingFixIds: applyResult.pendingFixIds,
flowId,
};
}
interface ApplyTriagePlanResult {
resolvedIds: string[];
rejectedIds: string[];
pendingFixIds: string[];
}
/**
* Apply an approved decision plan. Calls markResolved (via the SF
* extension's self-feedback writer, which runs the existing writer-
* layer constraints — accepted evidence kinds, commit-exists check for
* agent-fix, etc.) for each close/promote decision. Fix decisions are
* not auto-applied; they require operator implementation work.
*
* Returns three lists: resolvedIds (actually mutated), rejectedIds
* (writer-layer refused OR markResolved threw), pendingFixIds (fix
* decisions surfaced for operator handoff). runTriageApply uses the
* rejectedIds count to decide ok=true vs ok=false.
*/
async function applyTriagePlan(
cwd: string,
plan: TriageDecision[],
emit: (eventType: string, data?: Record<string, unknown>) => Promise<void>,
): Promise<ApplyTriagePlanResult> {
const resolvedIds: string[] = [];
const rejectedIds: string[] = [];
const pendingFixIds: string[] = [];
const sfModule = (await jiti.import(sfExtensionPath("self-feedback"))) as {
markResolved?: (
entryId: string,
resolution: Record<string, unknown>,
basePath?: string,
) => boolean;
};
if (typeof sfModule.markResolved !== "function") {
await emit("triage-apply-mutation-failed", {
reason: "markResolved-unavailable",
});
// Every approved close/promote becomes a rejection — can't mutate
// anything if the writer module isn't loadable.
for (const decision of plan) {
if (decision.outcome === "fix") pendingFixIds.push(decision.id);
else rejectedIds.push(decision.id);
}
return { resolvedIds, rejectedIds, pendingFixIds };
}
for (const decision of plan) {
if (decision.outcome === "fix") {
// Fix decisions are operator handoffs — surface in the report
// (via the caller's deciderOutput / decision plan), don't mutate.
pendingFixIds.push(decision.id);
await emit("triage-apply-fix-pending-operator", { id: decision.id });
continue;
}
const evidenceKind =
decision.evidenceKind ??
(decision.outcome === "promote"
? "promoted-to-requirement"
: "human-clear");
const evidence: Record<string, unknown> = { kind: evidenceKind };
if (decision.outcome === "promote" && decision.requirementId) {
evidence.requirementId = decision.requirementId;
}
const reason = decision.reason ?? "";
try {
const ok = sfModule.markResolved(decision.id, { reason, evidence }, cwd);
if (ok) {
resolvedIds.push(decision.id);
await emit("triage-apply-resolved", {
id: decision.id,
outcome: decision.outcome,
evidenceKind,
});
} else {
rejectedIds.push(decision.id);
await emit("triage-apply-mutation-rejected", {
id: decision.id,
outcome: decision.outcome,
evidenceKind,
note: "writer layer refused the resolution",
});
}
} catch (err) {
rejectedIds.push(decision.id);
await emit("triage-apply-mutation-failed", {
id: decision.id,
error: err instanceof Error ? err.message : String(err),
});
}
}
return { resolvedIds, rejectedIds, pendingFixIds };
}
/**
* Render the triage queue or canonical triage prompt to stdout.
*
* Never throws — best-effort, returns non-zero exit on assembly failure.
*/
export async function handleTriage(
cwd: string,
options: HandleTriageOptions = {},
): Promise<HandleTriageResult> {
// Open the project DB before reading. The one-shot bypass path doesn't
// run the full SF agent bootstrap, so DB-open isn't done for us.
try {
const autoStartModule = (await jiti.import(
sfExtensionPath("auto-start"),
{},
)) as { openProjectDbIfPresent?: (cwd: string) => Promise<unknown> };
if (typeof autoStartModule.openProjectDbIfPresent === "function") {
await autoStartModule.openProjectDbIfPresent(cwd);
}
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
process.stderr.write(`[triage] DB pre-open warning: ${msg}\n`);
}
let drainModule: {
selectInlineFixCandidates: (basePath: string) => TriageCandidate[];
buildInlineFixPrompt: (entries: TriageCandidate[]) => string;
runTriage: (
prompt: string,
options?: { model?: string; timeoutMs?: number },
) => Promise<{
ok: boolean;
content?: string;
error?: string;
cleanFinish?: boolean;
provider?: string;
modelId?: string;
}>;
writeTriageDecisionReport: (
basePath: string,
content: string,
) => string | null;
rankTriageModelsViaRouter: (candidates?: string[]) => Promise<string[]>;
};
try {
drainModule = (await jiti.import(
sfExtensionPath("self-feedback-drain"),
)) as typeof drainModule;
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
process.stderr.write(
`[triage] failed to load self-feedback-drain module: ${msg}\n`,
);
return { exitCode: 1 };
}
let candidates: TriageCandidate[];
try {
candidates = drainModule.selectInlineFixCandidates(cwd);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
process.stderr.write(`[triage] candidate selection failed: ${msg}\n`);
return { exitCode: 1 };
}
if (typeof options.max === "number" && options.max > 0) {
candidates = candidates.slice(0, options.max);
}
if (candidates.length === 0) {
if (options.json) {
process.stdout.write(`${JSON.stringify({ ok: true, candidates: [] })}\n`);
} else {
process.stdout.write("No open self-feedback candidates to triage.\n");
}
return { exitCode: 0 };
}
// --run/--apply take precedence over --json/--list because they describe the
// ACTION, not the output format. With --run/--apply, --json controls whether
// the result is JSON vs. human text. Without an action, --json emits
// the candidate digest as JSON (the inspect path).
if (!options.run && !options.apply) {
if (options.json) {
process.stdout.write(
`${JSON.stringify({
ok: true,
count: candidates.length,
candidates: candidates.map((c) => ({
id: c.id,
kind: c.kind,
severity: c.severity,
summary: c.summary,
ts: c.ts,
impact: c.impactScore ?? null,
effort: c.effortEstimate ?? null,
})),
})}\n`,
);
return { exitCode: 0 };
}
if (options.list) {
process.stdout.write(
`${candidates.length} candidate${candidates.length === 1 ? "" : "s"} (priority: impact↓ effort↑ ts↑)\n\n`,
);
for (const c of candidates) {
const impact = c.impactScore != null ? `i${c.impactScore}` : "i?";
const effort = c.effortEstimate != null ? `e${c.effortEstimate}` : "e?";
process.stdout.write(
` [${c.severity}] ${impact} ${effort} ${c.id} ${c.kind}\n`,
);
process.stdout.write(` ${c.summary}\n`);
}
return { exitCode: 0 };
}
}
// Render the canonical triage prompt (used by both the default
// pipe-to-model output and the --run dispatch path below).
let prompt: string;
try {
prompt = drainModule.buildInlineFixPrompt(candidates);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
process.stderr.write(`[triage] prompt render failed: ${msg}\n`);
return { exitCode: 1 };
}
if (!options.run && !options.apply) {
process.stdout.write(`${prompt}\n`);
return { exitCode: 0 };
}
if (options.apply) {
// Pre-resolve a model via the router when no --model was supplied and
// no custom runner is injected. Without this, `defaultAgentRunner`
// would spawn `sf -p` with no `--model` flag, and that path hangs
// indefinitely during the subprocess's own model-selection step
// (see sf-mp5tuvdx-ibyk9b). The watchdog still backs this up.
let resolvedModel = options.model;
if (!resolvedModel && !options.agentRunner) {
try {
const ranked = await drainModule.rankTriageModelsViaRouter();
resolvedModel = ranked[0];
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
process.stderr.write(
`[triage] router pre-resolution failed; falling back to subprocess default: ${msg}\n`,
);
}
}
process.stderr.write(
`[triage] applying via triage-decider -> review-code${
resolvedModel ? ` (model: ${resolvedModel})` : ""
} (this can take a few minutes)…\n`,
);
const result = await runTriageApply(cwd, prompt, {
model: resolvedModel,
agentRunner: options.agentRunner,
candidateCount: candidates.length,
expectedIds: candidates.map((candidate) => candidate.id),
});
const payload = {
ok: result.ok,
agreed: result.agreed,
error: result.error,
flowId: result.flowId,
resolvedIds: result.resolvedIds,
deciderOutput: result.deciderOutput,
reviewOutput: result.reviewOutput,
};
if (options.json) {
process.stdout.write(`${JSON.stringify(payload)}\n`);
} else if (result.ok) {
process.stdout.write(
`Triage apply complete: review-code agreed (${result.resolvedIds.length} resolved)\n`,
);
if (result.resolvedIds.length > 0) {
process.stdout.write(`Resolved: ${result.resolvedIds.join(", ")}\n`);
}
} else {
process.stdout.write(`[triage] apply blocked: ${result.error}\n`);
if (result.reviewOutput) process.stdout.write(`${result.reviewOutput}\n`);
}
return { exitCode: result.ok ? 0 : 1 };
}
// --run: dispatch the prompt via @singularity-forge/ai completeSimple,
// capture the decision text, persist to .sf/triage/decisions/<ts>.md.
// Same shape as `sf headless reflect --run`. The model's output is a
// decision matrix — applying the decisions (resolve_issue calls, code
// edits) is operator-driven; a tool-enabled variant is follow-up work.
process.stderr.write(
"[triage] dispatching to model (this can take a few minutes)…\n",
);
const result = await drainModule.runTriage(prompt, { model: options.model });
if (!result.ok) {
const payload = {
ok: false,
error: result.error ?? "unknown triage error",
provider: result.provider,
modelId: result.modelId,
};
process.stdout.write(
options.json
? `${JSON.stringify(payload)}\n`
: `[triage] failed: ${payload.error}\n`,
);
return { exitCode: 1 };
}
const reportPath = drainModule.writeTriageDecisionReport(
cwd,
result.content ?? "",
);
const payload = {
ok: true,
reportPath,
cleanFinish: result.cleanFinish === true,
provider: result.provider,
modelId: result.modelId,
};
if (options.json) {
process.stdout.write(`${JSON.stringify(payload)}\n`);
} else {
process.stdout.write(`Triage decisions written to: ${reportPath}\n`);
if (!result.cleanFinish) {
process.stderr.write(
'[triage] WARNING: report did not include "Self-feedback triage complete" terminator — output may be truncated\n',
);
}
}
return { exitCode: 0 };
}