fix(triage-apply): strict plan validation + custom-runner guard + per-decision failures

Codex review follow-up (2026-05-14) addressed all three remaining
issues from the earlier rescue pass:

1. Strict plan validation. parseTriagePlanStrict refuses the WHOLE
   plan on any malformed item instead of silently dropping. Enforces:
   - completion marker "Self-feedback triage complete" present
   - exactly one fenced ```yaml block
   - every decision has non-empty id + outcome ∈ {fix, promote, close}
   - outcome-specific required fields (close → reason; promote →
     reason + requirement_id; fix → proposed_approach)
   - duplicate ids rejected
   - when expectedIds is supplied, decisions must cover the candidate
     set exactly — no extras (hallucinated ids), no missing
   Returns ParseTriagePlanResult with {plan, error} so the caller can
   surface the specific failure reason.

2. Custom-runner trust guard. runTriageApply refuses an injected
   options.agentRunner unless allowUntrustedRunner is also explicitly
   set. Production callers cannot inject a runner. Without this guard
   a custom runner could side-channel-mutate the ledger despite the
   read-only tool override (codex Q2).

3. Per-decision failure surfacing. applyTriagePlan now returns
   {resolvedIds, rejectedIds, pendingFixIds} instead of just
   resolvedIds. runTriageApply reports ok=false if rejectedIds is
   non-empty, with the count + ids in the error message. Mutations
   still happen one-by-one (no SQL transaction wrapping) but the
   failure is no longer silent (codex Q3).

Tests: src/tests/headless-triage-apply.test.ts now covers:
   - agree-path runs both agents in order; apply fails on missing
     ledger entry → ok=false, rejectedIds populated (the realistic
     contract for a test fixture without a seeded DB)
   - custom runner without allowUntrustedRunner refuses, agentRunner
     never invoked
   - rubber-duck disagrees → clean pause, ok=false, agreed=false
   - decider fails → skip rubber-duck
   - unknown id in plan rejected before review

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-14 17:19:12 +02:00
parent d8ce433c7a
commit 289bf9e264
2 changed files with 465 additions and 61 deletions

View file

@ -103,7 +103,7 @@ interface AgentRunResult {
type AgentRunner = (
agent: AgentConfig,
task: string,
options?: { tools?: string[]; model?: string },
options?: { tools?: string[]; model?: string; cwd?: string },
) => Promise<AgentRunResult>;
/**
@ -128,56 +128,219 @@ export interface TriageDecision {
requirementId?: string;
}
export function parseTriagePlan(text: string): TriageDecision[] | null {
if (typeof text !== "string" || text.length === 0) return null;
const fenceMatch = text.match(/```ya?ml\s*\n([\s\S]*?)\n```/i);
if (!fenceMatch) return null;
const yamlBody = fenceMatch[1];
export interface ParseTriagePlanResult {
plan: TriageDecision[] | null;
error: string | null;
}
const COMPLETION_MARKER = "Self-feedback triage complete";
/**
* Parse a decider output into a strict decision plan, or return a
* structured error explaining what's wrong. Codex review 2026-05-14
* follow-up: refuse the whole plan if ANY item is malformed instead of
* silently dropping items partial-trust on a triage plan is worse
* than no apply at all.
*
* Validates:
* 1. The completion marker is present (signals the decider finished).
* 2. A single fenced ```yaml block with key `decisions:` exists.
* 3. Every item has `id` (non-empty string) and `outcome`
* {fix, promote, close}.
* 4. Outcome-specific required fields:
* - close reason (non-empty) + evidence_kind (defaults to
* human-clear if omitted, but if provided must be
* a non-empty string).
* - promote reason + requirement_id (non-empty strings).
* - fix proposed_approach (non-empty).
* 5. If `expectedIds` is supplied (the candidate set the decider was
* shown), every decision id must be in that set and every expected
* id must have exactly one decision no extras, no missing.
*/
export function parseTriagePlanStrict(
text: string,
expectedIds?: string[],
): ParseTriagePlanResult {
if (typeof text !== "string" || text.length === 0) {
return { plan: null, error: "decider output was empty" };
}
if (!text.includes(COMPLETION_MARKER)) {
return {
plan: null,
error: `decider output is missing the completion marker "${COMPLETION_MARKER}" — treating as incomplete`,
};
}
const fenceMatches = Array.from(
text.matchAll(/```ya?ml\s*\n([\s\S]*?)\n```/gi),
);
if (fenceMatches.length === 0) {
return {
plan: null,
error: "decider output has no fenced yaml block with the decision plan",
};
}
if (fenceMatches.length > 1) {
return {
plan: null,
error: `decider output has ${fenceMatches.length} fenced yaml blocks — the contract is exactly one`,
};
}
const yamlBody = fenceMatches[0][1];
let parsed: unknown;
try {
parsed = parseYaml(yamlBody);
} catch {
return null;
} catch (err) {
return {
plan: null,
error: `decision plan failed to parse as yaml: ${
err instanceof Error ? err.message : String(err)
}`,
};
}
const root = parsed as Record<string, unknown> | null;
const decisions = root?.decisions;
if (!Array.isArray(decisions)) return null;
if (!Array.isArray(decisions)) {
return {
plan: null,
error: "decision plan must have a top-level `decisions:` array",
};
}
if (decisions.length === 0) {
return {
plan: null,
error: "decision plan has zero decisions — nothing to apply",
};
}
const out: TriageDecision[] = [];
for (const raw of decisions) {
if (!raw || typeof raw !== "object") continue;
const seenIds = new Set<string>();
for (let i = 0; i < decisions.length; i++) {
const raw = decisions[i];
if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
return {
plan: null,
error: `decisions[${i}] is not an object`,
};
}
const item = raw as Record<string, unknown>;
const id = typeof item.id === "string" ? item.id : null;
const id = typeof item.id === "string" ? item.id.trim() : "";
if (!id) {
return {
plan: null,
error: `decisions[${i}] is missing a non-empty id`,
};
}
if (seenIds.has(id)) {
return {
plan: null,
error: `decisions[${i}] duplicates id "${id}" — each ledger entry must appear exactly once`,
};
}
seenIds.add(id);
const outcome = item.outcome;
if (!id) continue;
if (outcome !== "fix" && outcome !== "promote" && outcome !== "close")
continue;
if (outcome !== "fix" && outcome !== "promote" && outcome !== "close") {
return {
plan: null,
error: `decisions[${i}] (id=${id}) has invalid outcome "${outcome}" — expected fix|promote|close`,
};
}
const decision: TriageDecision = { id, outcome };
if (typeof item.evidence_kind === "string") {
decision.evidenceKind = item.evidence_kind;
} else if (typeof item.evidenceKind === "string") {
decision.evidenceKind = item.evidenceKind;
const reason = typeof item.reason === "string" ? item.reason.trim() : "";
const evidenceKindRaw =
typeof item.evidence_kind === "string"
? item.evidence_kind
: typeof item.evidenceKind === "string"
? (item.evidenceKind as string)
: "";
if (evidenceKindRaw.trim()) decision.evidenceKind = evidenceKindRaw.trim();
if (reason) decision.reason = reason;
const proposedApproachRaw =
typeof item.proposed_approach === "string"
? item.proposed_approach
: typeof item.proposedApproach === "string"
? (item.proposedApproach as string)
: "";
if (proposedApproachRaw.trim()) {
decision.proposedApproach = proposedApproachRaw.trim();
}
if (typeof item.reason === "string") decision.reason = item.reason;
if (typeof item.proposed_approach === "string") {
decision.proposedApproach = item.proposed_approach;
} else if (typeof item.proposedApproach === "string") {
decision.proposedApproach = item.proposedApproach;
const requirementIdRaw =
typeof item.requirement_id === "string"
? item.requirement_id
: typeof item.requirementId === "string"
? (item.requirementId as string)
: "";
if (requirementIdRaw.trim()) {
decision.requirementId = requirementIdRaw.trim();
}
if (typeof item.requirement_id === "string") {
decision.requirementId = item.requirement_id;
} else if (typeof item.requirementId === "string") {
decision.requirementId = item.requirementId;
// Outcome-specific required fields.
if (outcome === "close" && !decision.reason) {
return {
plan: null,
error: `decisions[${i}] (id=${id}, outcome=close) is missing required field reason`,
};
}
if (outcome === "promote") {
if (!decision.reason) {
return {
plan: null,
error: `decisions[${i}] (id=${id}, outcome=promote) is missing required field reason`,
};
}
if (!decision.requirementId) {
return {
plan: null,
error: `decisions[${i}] (id=${id}, outcome=promote) is missing required field requirement_id`,
};
}
}
if (outcome === "fix" && !decision.proposedApproach) {
return {
plan: null,
error: `decisions[${i}] (id=${id}, outcome=fix) is missing required field proposed_approach`,
};
}
out.push(decision);
}
return out;
// Expected-id coverage check: if the caller knows which entries the
// decider was shown, ensure the plan covers them exactly.
if (Array.isArray(expectedIds) && expectedIds.length > 0) {
const expected = new Set(expectedIds);
for (const decision of out) {
if (!expected.has(decision.id)) {
return {
plan: null,
error: `decision id "${decision.id}" is not in the candidate set the decider was shown — possible hallucination`,
};
}
}
for (const id of expected) {
if (!seenIds.has(id)) {
return {
plan: null,
error: `candidate id "${id}" has no decision in the plan — incomplete coverage`,
};
}
}
}
return { plan: out, error: null };
}
/**
* Backwards-compat wrapper for the lenient parser used by existing tests
* and the test fixtures. Returns just the plan, no error explanation.
* Production callers should use parseTriagePlanStrict.
*/
export function parseTriagePlan(text: string): TriageDecision[] | null {
return parseTriagePlanStrict(text).plan;
}
function buildSfPrintLaunchArgs(
promptPath: string,
task: string,
agent: AgentConfig,
options: { tools?: string[]; model?: string } = {},
options: { tools?: string[]; model?: string; cwd?: string } = {},
): { command: string; args: string[] } {
const sfBinPath = process.env.SF_BIN_PATH || process.argv[1];
const baseArgs = [
@ -203,7 +366,7 @@ function buildSfPrintLaunchArgs(
async function defaultAgentRunner(
agent: AgentConfig,
task: string,
options: { tools?: string[]; model?: string } = {},
options: { tools?: string[]; model?: string; cwd?: string } = {},
): Promise<AgentRunResult> {
const tmpDir = mkdtempSync(join(tmpdir(), "sf-triage-agent-"));
const promptPath = join(tmpDir, `${agent.name}.md`);
@ -221,7 +384,7 @@ async function defaultAgentRunner(
};
const composed =
promptPartsModule.composeAgentPrompt?.(agent, {
cwd: process.cwd(),
cwd: options.cwd ?? process.cwd(),
surface: "headless",
tools: options.tools ?? agent.tools,
}) ?? agent.systemPrompt;
@ -236,7 +399,7 @@ async function defaultAgentRunner(
);
return await new Promise<AgentRunResult>((resolve) => {
const proc = spawn(launch.command, launch.args, {
cwd: process.cwd(),
cwd: options.cwd ?? process.cwd(),
env: process.env,
shell: false,
stdio: ["ignore", "pipe", "pipe"],
@ -297,6 +460,18 @@ async function emitTriageApplyJournal(
}
}
export interface RunTriageApplyResult {
ok: boolean;
agreed: boolean;
error?: string;
deciderOutput?: string;
reviewOutput?: string;
resolvedIds: string[];
rejectedIds?: string[];
pendingFixIds?: string[];
flowId: string;
}
export async function runTriageApply(
cwd: string,
prompt: string,
@ -304,16 +479,17 @@ export async function runTriageApply(
model?: string;
agentRunner?: AgentRunner;
candidateCount?: number;
// Expected ledger ids the decider was shown. When supplied, the
// strict plan parser refuses any plan that adds new ids or omits
// expected ones.
expectedIds?: string[];
// Test escape hatch. Production callers MUST NOT set this. Required
// to pass a custom agentRunner because an arbitrary runner could
// side-channel-mutate the ledger despite the read-only tool override
// the orchestrator enforces (codex review 2026-05-14 follow-up).
allowUntrustedRunner?: boolean;
} = {},
): Promise<{
ok: boolean;
agreed: boolean;
error?: string;
deciderOutput?: string;
reviewOutput?: string;
resolvedIds: string[];
flowId: string;
}> {
): Promise<RunTriageApplyResult> {
const flowId = `triage-apply-${randomUUID()}`;
let seq = 0;
const emit = (eventType: string, data: Record<string, unknown> = {}) =>
@ -348,8 +524,8 @@ export async function runTriageApply(
// Trusted-source guard (codex review 2026-05-14): when --apply will
// mutate the ledger, BOTH agents must be SF-shipped built-ins. A
// project-level override could silently disable rubber-duck's
// independence — operators can still customize behavior, but they
// have to use --review mode where mutations aren't auto-applied.
// independence. Operators can still customize behavior for inspect
// workflows, but --apply uses only the shipped review contract.
if (triageDecider.source !== "builtin" || rubberDuck.source !== "builtin") {
await emit("triage-apply-failed", {
reason: "untrusted-agent-source",
@ -359,12 +535,26 @@ export async function runTriageApply(
return {
ok: false,
agreed: false,
error: `Refusing to --apply with non-builtin agents (triage-decider=${triageDecider.source}, rubber-duck=${rubberDuck.source}). Use --review for inspect-only mode, or remove the project/user override.`,
error: `Refusing to --apply with non-builtin agents (triage-decider=${triageDecider.source}, rubber-duck=${rubberDuck.source}). Use \`sf headless triage --run\` for a reviewable decision artifact, or remove the project/user override.`,
resolvedIds: [],
flowId,
};
}
// Custom-runner guard (codex review follow-up): an injected agentRunner
// can side-channel-mutate the ledger despite the read-only tool override.
// Only allow it when allowUntrustedRunner is explicitly set (test path).
if (options.agentRunner && !options.allowUntrustedRunner) {
await emit("triage-apply-failed", { reason: "untrusted-runner" });
return {
ok: false,
agreed: false,
error:
"runTriageApply: a custom agentRunner was supplied without allowUntrustedRunner. Production callers cannot inject a runner — only tests can, via the explicit allowUntrustedRunner option.",
resolvedIds: [],
flowId,
};
}
const runner = options.agentRunner ?? defaultAgentRunner;
// Phase 1: triage-decider runs in PLAN-ONLY mode. Drop resolve_issue
@ -373,6 +563,7 @@ export async function runTriageApply(
// emits a YAML decision plan; we parse it post-hoc.
const decider = await runner(triageDecider, prompt, {
model: options.model,
cwd,
tools: ["view", "grep", "glob", "git_log"],
});
await emit("triage-apply-decider-finished", {
@ -391,22 +582,31 @@ export async function runTriageApply(
};
}
// Parse the structured plan. If the decider didn't emit a valid plan
// (missing fenced block, malformed YAML, no decisions key), refuse to
// apply — we can't audit what we can't read.
const plan = parseTriagePlan(decider.output);
if (!plan || plan.length === 0) {
await emit("triage-apply-failed", { reason: "no-plan" });
// Parse the structured plan in strict mode. Refuses on any malformed
// item, missing completion marker, multiple yaml blocks, or mismatch
// against the expected candidate set (when supplied). Partial trust on
// a triage plan is worse than no apply at all.
const parseResult = parseTriagePlanStrict(
decider.output,
options.expectedIds,
);
if (!parseResult.plan) {
await emit("triage-apply-failed", {
reason: "no-plan",
parseError: parseResult.error,
});
return {
ok: false,
agreed: false,
error:
"triage-decider did not emit a parseable decision plan (expected a fenced ```yaml block with a `decisions:` key)",
error: `triage-decider plan rejected: ${
parseResult.error ?? "unknown parse error"
}`,
deciderOutput: decider.output,
resolvedIds: [],
flowId,
};
}
const plan = parseResult.plan;
await emit("triage-apply-plan-parsed", {
decisionCount: plan.length,
outcomes: plan.reduce<Record<string, number>>((acc, d) => {
@ -431,6 +631,7 @@ export async function runTriageApply(
].join("\n");
const review = await runner(rubberDuck, reviewTask, {
model: options.model,
cwd,
tools: ["view", "grep", "glob", "git_log", "query_journal"],
});
const agreed = /^rubber-duck:\s*agree\b/im.test(review.output.trim());
@ -470,21 +671,38 @@ export async function runTriageApply(
};
}
// Phase 3: apply the plan. We (this runner) call resolve_issue for
// Phase 3: apply the plan. We (this runner) call markResolved for
// each close/promote decision; fix decisions get surfaced for the
// operator but never auto-mutate. Mutations happen ONCE, post-review,
// and the resolvedIds we return reflect actual ledger state.
const resolvedIds = await applyTriagePlan(cwd, plan, emit);
const applyResult = await applyTriagePlan(cwd, plan, emit);
// Per-decision failure surfacing (codex review follow-up): if any
// approved close/promote failed to apply, runTriageApply reports
// ok=false. Operator sees both partial success (resolvedIds) and
// partial failure (rejectedIds) so they can investigate.
const approvedMutationCount = plan.filter((d) => d.outcome !== "fix").length;
const hasFailures = applyResult.rejectedIds.length > 0;
return {
ok: true,
ok: !hasFailures,
agreed: true,
error: hasFailures
? `${applyResult.rejectedIds.length} of ${approvedMutationCount} approved mutations failed: ${applyResult.rejectedIds.join(", ")}`
: undefined,
deciderOutput: decider.output,
reviewOutput: review.output,
resolvedIds,
resolvedIds: applyResult.resolvedIds,
rejectedIds: applyResult.rejectedIds,
pendingFixIds: applyResult.pendingFixIds,
flowId,
};
}
interface ApplyTriagePlanResult {
resolvedIds: string[];
rejectedIds: string[];
pendingFixIds: string[];
}
/**
* Apply an approved decision plan. Calls markResolved (via the SF
* extension's self-feedback writer, which runs the existing writer-
@ -492,14 +710,19 @@ export async function runTriageApply(
* agent-fix, etc.) for each close/promote decision. Fix decisions are
* not auto-applied; they require operator implementation work.
*
* Returns the list of ledger ids that were actually closed/promoted.
* Returns three lists: resolvedIds (actually mutated), rejectedIds
* (writer-layer refused OR markResolved threw), pendingFixIds (fix
* decisions surfaced for operator handoff). runTriageApply uses the
* rejectedIds count to decide ok=true vs ok=false.
*/
async function applyTriagePlan(
cwd: string,
plan: TriageDecision[],
emit: (eventType: string, data?: Record<string, unknown>) => Promise<void>,
): Promise<string[]> {
): Promise<ApplyTriagePlanResult> {
const resolvedIds: string[] = [];
const rejectedIds: string[] = [];
const pendingFixIds: string[] = [];
const sfModule = (await jiti.import(sfExtensionPath("self-feedback"))) as {
markResolved?: (
entryId: string,
@ -511,12 +734,19 @@ async function applyTriagePlan(
await emit("triage-apply-mutation-failed", {
reason: "markResolved-unavailable",
});
return resolvedIds;
// Every approved close/promote becomes a rejection — can't mutate
// anything if the writer module isn't loadable.
for (const decision of plan) {
if (decision.outcome === "fix") pendingFixIds.push(decision.id);
else rejectedIds.push(decision.id);
}
return { resolvedIds, rejectedIds, pendingFixIds };
}
for (const decision of plan) {
if (decision.outcome === "fix") {
// Fix decisions are operator handoffs — surface in the report
// (via the caller's deciderOutput / decision plan), don't mutate.
pendingFixIds.push(decision.id);
await emit("triage-apply-fix-pending-operator", { id: decision.id });
continue;
}
@ -540,6 +770,7 @@ async function applyTriagePlan(
evidenceKind,
});
} else {
rejectedIds.push(decision.id);
await emit("triage-apply-mutation-rejected", {
id: decision.id,
outcome: decision.outcome,
@ -548,13 +779,14 @@ async function applyTriagePlan(
});
}
} catch (err) {
rejectedIds.push(decision.id);
await emit("triage-apply-mutation-failed", {
id: decision.id,
error: err instanceof Error ? err.message : String(err),
});
}
}
return resolvedIds;
return { resolvedIds, rejectedIds, pendingFixIds };
}
/**
@ -698,6 +930,7 @@ export async function handleTriage(
model: options.model,
agentRunner: options.agentRunner,
candidateCount: candidates.length,
expectedIds: candidates.map((candidate) => candidate.id),
});
const payload = {
ok: result.ok,

View file

@ -0,0 +1,171 @@
import assert from "node:assert/strict";
import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, beforeEach, test } from "vitest";
import { runTriageApply } from "../headless-triage.js";
const tempDirs: string[] = [];
let originalAgentDir: string | undefined;
const deciderPlan = [
"```yaml",
"decisions:",
" - id: sf-test-1",
" outcome: close",
" evidence_kind: human-clear",
" reason: stale",
"```",
"Self-feedback triage complete.",
].join("\n");
function makeProject(): string {
const dir = mkdtempSync(join(tmpdir(), "sf-headless-triage-"));
tempDirs.push(dir);
mkdirSync(join(dir, ".sf"), { recursive: true });
return dir;
}
afterEach(() => {
if (originalAgentDir === undefined) delete process.env.SF_CODING_AGENT_DIR;
else process.env.SF_CODING_AGENT_DIR = originalAgentDir;
while (tempDirs.length > 0) {
rmSync(tempDirs.pop()!, { recursive: true, force: true });
}
});
beforeEach(() => {
originalAgentDir = process.env.SF_CODING_AGENT_DIR;
const dir = mkdtempSync(join(tmpdir(), "sf-headless-agent-dir-"));
tempDirs.push(dir);
process.env.SF_CODING_AGENT_DIR = dir;
});
test("runTriageApply_when_rubber_duck_agrees_runs_both_agents_in_order", async () => {
const project = makeProject();
const calls: string[] = [];
const result = await runTriageApply(project, "triage prompt", {
candidateCount: 1,
allowUntrustedRunner: true,
agentRunner: async (agent) => {
calls.push(agent.name);
if (agent.name === "triage-decider") {
return {
ok: true,
output: deciderPlan,
exitCode: 0,
};
}
return { ok: true, output: "rubber-duck: agree", exitCode: 0 };
},
});
// Orchestration contract: rubber-duck agreed, so the apply phase ran.
// The actual mutation on sf-test-1 fails because the test project has
// no real ledger entry — that's the rejectedIds path, not a contract
// violation. ok=false on apply failures is itself the contract
// (codex review 2026-05-14 follow-up).
assert.equal(result.agreed, true, "rubber-duck must agree");
assert.deepEqual(
calls,
["triage-decider", "rubber-duck"],
"decider must run before rubber-duck",
);
assert.equal(result.ok, false, "apply should fail on missing ledger entry");
assert.deepEqual(result.rejectedIds, ["sf-test-1"]);
assert.deepEqual(result.resolvedIds, []);
});
test("runTriageApply_when_custom_runner_without_trust_flag_refuses", async () => {
// Defense-in-depth: an injected agentRunner can side-channel-mutate
// the ledger despite the read-only tool override. Only test paths
// that explicitly set allowUntrustedRunner can pass a custom runner.
const project = makeProject();
let calls = 0;
const result = await runTriageApply(project, "triage prompt", {
// NOTE: allowUntrustedRunner intentionally omitted.
agentRunner: async () => {
calls += 1;
return { ok: true, output: deciderPlan, exitCode: 0 };
},
});
assert.equal(result.ok, false);
assert.equal(
calls,
0,
"agentRunner must not be invoked when trust flag is absent",
);
assert.match(result.error ?? "", /allowUntrustedRunner|custom agentRunner/i);
});
test("runTriageApply_when_rubber_duck_disagrees_blocks_completion", async () => {
const project = makeProject();
const result = await runTriageApply(project, "triage prompt", {
allowUntrustedRunner: true,
agentRunner: async (agent) => {
if (agent.name === "triage-decider") {
return {
ok: true,
output: deciderPlan,
exitCode: 0,
};
}
return {
ok: true,
output: "## Concern 1:\nThe cited closure is not supported.",
exitCode: 0,
};
},
});
assert.equal(result.ok, false);
assert.equal(result.agreed, false);
assert.equal(
result.error,
"rubber-duck disagreed — pausing for operator review",
);
});
test("runTriageApply_when_decider_fails_skips_rubber_duck", async () => {
const project = makeProject();
const calls: string[] = [];
const result = await runTriageApply(project, "triage prompt", {
allowUntrustedRunner: true,
agentRunner: async (agent) => {
calls.push(agent.name);
return {
ok: false,
output: "",
stderr: "model failed",
exitCode: 1,
};
},
});
assert.equal(result.ok, false);
assert.equal(result.error, "model failed");
assert.deepEqual(calls, ["triage-decider"]);
});
test("runTriageApply_when_decider_outputs_unknown_id_rejects_plan_before_review", async () => {
const project = makeProject();
const calls: string[] = [];
const result = await runTriageApply(project, "triage prompt", {
candidateCount: 1,
expectedIds: ["sf-expected-1"],
allowUntrustedRunner: true,
agentRunner: async (agent) => {
calls.push(agent.name);
return {
ok: true,
output: deciderPlan,
exitCode: 0,
};
},
});
assert.equal(result.ok, false);
assert.match(result.error ?? "", /not in the candidate set/);
assert.deepEqual(calls, ["triage-decider"]);
});