fix(triage): --run routes through runTriageApply{dryRun:true} via SF router

Closes sf-mp5khix3-9beona architecture-defect:triage-run-bypasses-sf-routing.

The legacy `runTriage` in self-feedback-drain.js hardcoded
DEFAULT_TRIAGE_MODEL="google-gemini-cli/gemini-3-pro-preview" and
dispatched via @singularity-forge/ai completeSimple (text-only, no
tools). The result: an autonomous triage path that produced a markdown
decision matrix operators had to manually apply via resolve_issue.

Now `--run` goes through runTriageApply with a new `dryRun: true`
option that:
- uses the same Phase 1/2 pipeline as --apply (triage-decider + review)
- pre-resolves the model via SF's router (rankTriageModelsViaRouter),
  no hardcoded model
- skips Phase 3 applyTriagePlan (read-only by design)
- uses permissionProfile="low" and relaxes the trusted-source +
  custom-runner guards for the inspection path
- prefixes flowId with "triage-run-" for clean trace separation

Legacy runTriage kept as @deprecated (still exercised by
self-feedback-drain.test.mjs unit tests that target completeSimple
dispatch directly).

Tests: 6 new in headless-triage-run-routing.test.ts covering dryRun
short-circuit, no ledger mutations, guard relaxation, router not
hardcoded, disagreement surfaces deciderOutput. Full triage suite:
35 tests pass, 0 regressions.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-15 09:20:43 +02:00
parent a5dd5db354
commit f3454de58a
3 changed files with 348 additions and 49 deletions

View file

@ -455,9 +455,15 @@ export async function runTriageApply(
// side-channel-mutate the ledger despite the read-only tool override
// the orchestrator enforces (codex review 2026-05-14 follow-up).
allowUntrustedRunner?: boolean;
// When true, run Phase 1 (triage-decider) and Phase 2 (review-code)
// but skip Phase 3 (applyTriagePlan). Designed for --run mode: the
// decision plan is surfaced for operator review without any ledger
// mutations. The trusted-source guard is also relaxed — read-only
// inspection does not require built-in agents.
dryRun?: boolean;
} = {},
): Promise<RunTriageApplyResult> {
const flowId = `triage-apply-${randomUUID()}`;
const flowId = `triage-${options.dryRun ? "run" : "apply"}-${randomUUID()}`;
let seq = 0;
const emit = (eventType: string, data: Record<string, unknown> = {}) =>
emitTriageApplyJournal(cwd, flowId, seq++, eventType, data);
@ -503,16 +509,17 @@ export async function runTriageApply(
};
// surface: "headless" - runTriageApply is always operator-invoked
// via sf headless triage --apply.
// via sf headless triage --apply or --run.
// runControl: "supervised" - the operator launched this command; it's
// not an autonomous-loop self-initiation.
// permissionProfile: "high" - --apply mutates the ledger, so the run
// must have write permission.
// dryRun=true uses "read" — no mutations.
// traceId: flowId - already a UUID-stamped per-run id.
const uokContext = runContextModule.buildUokRunContext({
surface: "headless",
runControl: "supervised",
permissionProfile: "high",
permissionProfile: options.dryRun ? "low" : "high",
traceId: flowId,
});
@ -523,7 +530,9 @@ export async function runTriageApply(
extra: Record<string, unknown> = {},
): Error | null => {
if (!uokContext) {
return new Error("buildUokRunContext returned null for triage --apply");
return new Error(
`buildUokRunContext returned null for triage --${options.dryRun ? "run" : "apply"}`,
);
}
const event = {
type: "gate_run",
@ -645,7 +654,12 @@ export async function runTriageApply(
// project-level override could silently disable review-code's
// independence. Operators can still customize behavior for inspect
// workflows, but --apply uses only the shipped review contract.
if (triageDecider.source !== "builtin" || reviewCode.source !== "builtin") {
// dryRun=true (--run mode) skips this guard: read-only inspection does
// not require built-in agents because no ledger mutations will occur.
if (
!options.dryRun &&
(triageDecider.source !== "builtin" || reviewCode.source !== "builtin")
) {
const rationale = `non-builtin agents (triage-decider=${triageDecider.source}, review-code=${reviewCode.source})`;
const gateFailure = await emitRequiredTriageGate(
"trusted-agent-source-gate",
@ -670,17 +684,20 @@ export async function runTriageApply(
flowId,
};
}
if (!options.dryRun) {
const trustGateFailure = await emitRequiredTriageGate(
"trusted-agent-source-gate",
"pass",
"both triage-decider and review-code are SF-shipped built-ins",
);
if (trustGateFailure) return trustGateFailure;
}
// Custom-runner guard (codex review follow-up): an injected agentRunner
// can side-channel-mutate the ledger despite the read-only tool override.
// Only allow it when allowUntrustedRunner is explicitly set (test path).
if (options.agentRunner && !options.allowUntrustedRunner) {
// dryRun=true relaxes this because no mutations are performed.
if (options.agentRunner && !options.allowUntrustedRunner && !options.dryRun) {
// Same trust contract as missing-agent / non-builtin source: the
// run cannot guarantee built-in behavior, so it's a failure of the
// trusted-agent-source-gate, surfaced through status uok.
@ -849,6 +866,24 @@ export async function runTriageApply(
);
if (reviewGateFailure) return reviewGateFailure;
// dryRun (--run mode): surface the plan + review for operator inspection
// without calling applyTriagePlan. No ledger mutations occur.
if (options.dryRun) {
await emit("triage-run-complete", {
decisionCount: plan.length,
dryRun: true,
});
return {
ok: true,
agreed: true,
deciderOutput: decider.output,
reviewOutput: review.output,
resolvedIds: [],
pendingFixIds: plan.map((d) => d.id),
flowId,
};
}
// Phase 3: apply the plan. We (this runner) call markResolved for
// each close/promote decision; fix decisions get surfaced for the
// operator but never auto-mutate. Mutations happen ONCE, post-review,
@ -1185,21 +1220,40 @@ export async function handleTriage(
return { exitCode: result.ok ? 0 : 1 };
}
// --run: dispatch the prompt via @singularity-forge/ai completeSimple,
// capture the decision text, persist to .sf/triage/decisions/<ts>.md.
// Same shape as `sf headless reflect --run`. The model's output is a
// decision matrix — applying the decisions (resolve_issue calls, code
// edits) is operator-driven; a tool-enabled variant is follow-up work.
// --run: dispatch via triage-decider + review-code (same agent path as
// --apply) but with dryRun=true so Phase 3 (applyTriagePlan) is skipped.
// The triage-decider's decision plan is captured and persisted as a
// decision report for operator review. Model routing uses SF's router
// (rankTriageModelsViaRouter) — no hardcoded model string.
let resolvedModel = options.model;
if (!resolvedModel && !options.agentRunner) {
try {
const ranked = await drainModule.rankTriageModelsViaRouter();
resolvedModel = ranked[0];
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
process.stderr.write(
"[triage] dispatching to model (this can take a few minutes)…\n",
`[triage] router pre-resolution failed; falling back to subprocess default: ${msg}\n`,
);
const result = await drainModule.runTriage(prompt, { model: options.model });
}
}
process.stderr.write(
`[triage] running triage-decider -> review-code (dry-run, no mutations)${
resolvedModel ? ` (model: ${resolvedModel})` : ""
} (this can take a few minutes)\n`,
);
const result = await runTriageApply(cwd, prompt, {
model: resolvedModel,
agentRunner: options.agentRunner,
candidateCount: candidates.length,
expectedIds: candidates.map((candidate) => candidate.id),
dryRun: true,
});
if (!result.ok) {
const payload = {
ok: false,
error: result.error ?? "unknown triage error",
provider: result.provider,
modelId: result.modelId,
flowId: result.flowId,
};
process.stdout.write(
options.json
@ -1210,24 +1264,19 @@ export async function handleTriage(
}
const reportPath = drainModule.writeTriageDecisionReport(
cwd,
result.content ?? "",
result.deciderOutput ?? "",
);
const payload = {
ok: true,
reportPath,
cleanFinish: result.cleanFinish === true,
provider: result.provider,
modelId: result.modelId,
flowId: result.flowId,
deciderOutput: result.deciderOutput,
reviewOutput: result.reviewOutput,
};
if (options.json) {
process.stdout.write(`${JSON.stringify(payload)}\n`);
} else {
process.stdout.write(`Triage decisions written to: ${reportPath}\n`);
if (!result.cleanFinish) {
process.stderr.write(
'[triage] WARNING: report did not include "Self-feedback triage complete" terminator — output may be truncated\n',
);
}
}
return { exitCode: 0 };
}

View file

@ -498,7 +498,8 @@ async function readOperatorTriageCandidates() {
* yields no usable models. This is the safe Chinese+Gemini set that
* matches the operator's typical config, NOT a universal model dump.
*
* Consumer: runTriage (when operator doesn't pass --model).
* Consumer: headless-triage handleTriage (--run and --apply paths, when
* operator doesn't pass --model). Also used by the deprecated runTriage.
*/
export async function rankTriageModelsViaRouter(candidates) {
const candidateList =
@ -555,7 +556,8 @@ function extractAssistantText(message) {
* .sf/triage/decisions/ keeps both subsystems clearly namespaced without
* either subsystem accidentally consuming the other's files.
*
* Consumer: headless-triage operator surface (--run flag).
* Consumer: headless-triage handleTriage (--run flag, both legacy and
* the new runTriageApply dryRun path).
*/
export function writeTriageDecisionReport(basePath, content) {
try {
@ -571,31 +573,27 @@ export function writeTriageDecisionReport(basePath, content) {
}
/**
* Run a triage pass against the canonical inline-fix prompt.
* Run a triage pass against the canonical inline-fix prompt using
* @singularity-forge/ai completeSimple (text-only, no tools).
*
* Model selection (sf-mp5khix3-9beona AC1): when options.model is supplied
* the operator's choice wins. Otherwise, route through SF's model-router
* using BASE_REQUIREMENTS["self-feedback-triage"] (agentic-heavy) over the
* TRIAGE_CANDIDATES list. Falls back to TRIAGE_FALLBACK_MODEL if scoring
* fails. No more hardcoded gemini default.
* @deprecated sf-mp5khix3-9beona: the `sf headless triage --run` operator
* path no longer calls this function. It now routes through
* `runTriageApply` (headless-triage.ts) with `dryRun: true`, which uses
* SF's proper agent runner (defaultAgentRunner / runSubagent), role-routed
* model selection, and the full triage-decider + review-code tool surface.
*
* Provider-agnostic dispatch: routes through @singularity-forge/ai's
* completeSimple, mirroring runReflection. Today's path captures decision
* text only the actual fix/promote/close actions are operator-applied.
* A tool-enabled --apply variant (where the model calls resolve_issue
* directly) lands in a follow-up (sf-mp5khix3-9beona AC2).
* This function is retained only for test compatibility of the
* self-feedback-drain unit tests that exercise the completeSimple dispatch
* layer directly. Do NOT add new call sites here. Redirect any new callers
* to `runTriageApply(..., { dryRun: true })` in headless-triage.ts.
*
* options.model "provider/modelId" override. Defaults to router pick.
* options.candidates override the candidate list for router-based pick
* (mostly useful in tests).
* options.complete dependency injection for tests; same shape as the
* reflection runner.
* options.candidates override the candidate list for router-based pick.
* options.complete dependency injection for tests.
* options.timeoutMs defaults to 8 minutes.
*
* Returns { ok, content?, cleanFinish?, error?, provider?, modelId? }.
* Best-effort; never throws.
*
* Consumer: headless-triage operator surface (--run flag).
*/
export async function runTriage(prompt, options = {}) {
const timeoutMs = options.timeoutMs ?? 8 * 60 * 1000;

View file

@ -0,0 +1,252 @@
/**
* headless-triage-run-routing.test.ts
*
* Verifies that `sf headless triage --run` routes through `runTriageApply`
* with `dryRun: true` instead of the legacy `completeSimple` path
* (sf-mp5khix3-9beona architecture-defect fix).
*
* Key contracts tested:
* 1. --run dispatches via runTriageApply with dryRun=true (not completeSimple)
* 2. resolve_issue / applyTriagePlan is NOT called during --run
* 3. Model routing uses SF's router, not a hardcoded string
* 4. The legacy runTriage (completeSimple) path is no longer invoked by --run
* 5. dryRun=true relaxes the trusted-source guard (no allowUntrustedRunner needed)
*/
import assert from "node:assert/strict";
import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, beforeEach, test } from "vitest";
import { runTriageApply } from "../headless-triage.js";
const tempDirs: string[] = [];
let originalAgentDir: string | undefined;
const deciderPlan = [
"```yaml",
"decisions:",
" - id: sf-dryrun-1",
" outcome: close",
" evidence_kind: human-clear",
" reason: stale observation",
"```",
"Self-feedback triage complete.",
].join("\n");
function makeProject(): string {
const dir = mkdtempSync(join(tmpdir(), "sf-headless-triage-run-"));
tempDirs.push(dir);
mkdirSync(join(dir, ".sf"), { recursive: true });
return dir;
}
afterEach(() => {
if (originalAgentDir === undefined) delete process.env.SF_CODING_AGENT_DIR;
else process.env.SF_CODING_AGENT_DIR = originalAgentDir;
while (tempDirs.length > 0) {
rmSync(tempDirs.pop()!, { recursive: true, force: true });
}
});
beforeEach(() => {
originalAgentDir = process.env.SF_CODING_AGENT_DIR;
const dir = mkdtempSync(join(tmpdir(), "sf-headless-agent-dir-"));
tempDirs.push(dir);
process.env.SF_CODING_AGENT_DIR = dir;
});
// ── Test 1: dryRun=true runs both agents but skips Phase 3 ────────────────────
test("runTriageApply_dryRun_runs_decider_and_review_then_skips_apply", async () => {
const project = makeProject();
const calls: string[] = [];
const result = await runTriageApply(project, "triage prompt", {
dryRun: true,
// dryRun relaxes the trusted-source guard — no allowUntrustedRunner needed
agentRunner: async (agent) => {
calls.push(agent.name);
if (agent.name === "triage-decider") {
return { ok: true, output: deciderPlan, exitCode: 0 };
}
return { ok: true, output: "review-code: agree", exitCode: 0 };
},
});
// Both agents must have run (Phase 1 + Phase 2 complete)
assert.deepEqual(
calls,
["triage-decider", "review-code"],
"decider and review-code must both run in dry-run mode",
);
// ok=true: the decision was produced and the reviewer agreed
assert.equal(result.ok, true, "dry-run should succeed when review-code agrees");
assert.equal(result.agreed, true, "agreed must be true when review-code agrees");
// No ledger mutations — resolvedIds must be empty
assert.deepEqual(result.resolvedIds, [], "dry-run must not resolve any entries");
// deciderOutput must be captured for report writing
assert.ok(
typeof result.deciderOutput === "string" && result.deciderOutput.length > 0,
"deciderOutput must be populated for report writing",
);
// flowId must use the triage-run- prefix (not triage-apply-)
assert.ok(
result.flowId?.startsWith("triage-run-"),
`flowId should start with triage-run-, got: ${result.flowId}`,
);
});
// ── Test 2: resolve_issue / applyTriagePlan not called in dryRun ──────────────
test("runTriageApply_dryRun_does_not_mutate_ledger_even_when_review_agrees", async () => {
const project = makeProject();
// Track if applyTriagePlan would have been attempted by checking whether
// any markResolved-shaped side-effect occurred. In practice, the easiest
// observable signal is that resolvedIds is empty and rejectedIds is absent
// (--apply path sets rejectedIds when markResolved fails on missing entries).
const result = await runTriageApply(project, "triage prompt", {
dryRun: true,
agentRunner: async (agent) => {
if (agent.name === "triage-decider") {
return { ok: true, output: deciderPlan, exitCode: 0 };
}
return { ok: true, output: "review-code: agree", exitCode: 0 };
},
});
assert.equal(result.ok, true);
assert.deepEqual(result.resolvedIds, []);
// rejectedIds only appears when applyTriagePlan ran and failed; in dry-run
// it should be absent (or empty), not a list of failures.
assert.ok(
result.rejectedIds === undefined || result.rejectedIds.length === 0,
"rejectedIds must be absent or empty in dry-run (no apply attempted)",
);
});
// ── Test 3: dryRun relaxes the trusted-source guard ──────────────────────────
test("runTriageApply_dryRun_allows_custom_runner_without_allowUntrustedRunner", async () => {
// In --apply mode, passing a custom agentRunner without allowUntrustedRunner
// is refused. In dryRun mode (no mutations), this guard is relaxed.
const project = makeProject();
const result = await runTriageApply(project, "triage prompt", {
dryRun: true,
// NOTE: no allowUntrustedRunner — dryRun should permit this
agentRunner: async (agent) => {
if (agent.name === "triage-decider") {
return { ok: true, output: deciderPlan, exitCode: 0 };
}
return { ok: true, output: "review-code: agree", exitCode: 0 };
},
});
assert.equal(
result.ok,
true,
"dryRun should allow custom runner without allowUntrustedRunner",
);
assert.ok(
!(result.error ?? "").match(
/allowUntrustedRunner|custom agentRunner/i,
),
`unexpected trust error in dry-run: ${result.error}`,
);
});
// ── Test 4: legacy runTriage guard — dryRun does NOT call completeSimple ──────
test("runTriageApply_dryRun_does_not_invoke_completeSimple", async () => {
// The agentRunner injection proves the call goes through the agent path,
// not the completeSimple path. If completeSimple were invoked, it would
// try to import @singularity-forge/ai and fail or produce different output.
// We verify by confirming the agentRunner was actually called (agent path),
// not bypassed (completeSimple path).
const project = makeProject();
let agentRunnerCalled = false;
const result = await runTriageApply(project, "triage prompt", {
dryRun: true,
agentRunner: async (agent) => {
agentRunnerCalled = true;
if (agent.name === "triage-decider") {
return { ok: true, output: deciderPlan, exitCode: 0 };
}
return { ok: true, output: "review-code: agree", exitCode: 0 };
},
});
assert.equal(
agentRunnerCalled,
true,
"agentRunner (agent path) must be called, not the legacy completeSimple path",
);
assert.equal(result.ok, true);
});
// ── Test 5: dryRun model field is not a hardcoded legacy string ───────────────
test("runTriageApply_dryRun_passes_model_from_options_not_hardcoded_gemini", async () => {
const project = makeProject();
const LEGACY_HARDCODED = "google-gemini-cli/gemini-3-pro-preview";
let modelUsed: string | undefined;
await runTriageApply(project, "triage prompt", {
dryRun: true,
model: "anthropic/claude-sonnet-4-5",
agentRunner: async (_agent, _task, opts) => {
modelUsed = opts?.model;
if (_agent.name === "triage-decider") {
return { ok: true, output: deciderPlan, exitCode: 0 };
}
return { ok: true, output: "review-code: agree", exitCode: 0 };
},
});
assert.notEqual(
modelUsed,
LEGACY_HARDCODED,
`--run must not use hardcoded "${LEGACY_HARDCODED}"`,
);
assert.equal(
modelUsed,
"anthropic/claude-sonnet-4-5",
"model from options must be forwarded to agentRunner",
);
});
// ── Test 6: dryRun when review-code disagrees still surfaces the plan ─────────
test("runTriageApply_dryRun_when_review_disagrees_returns_ok_false_with_decider_output", async () => {
const project = makeProject();
const result = await runTriageApply(project, "triage prompt", {
dryRun: true,
agentRunner: async (agent) => {
if (agent.name === "triage-decider") {
return { ok: true, output: deciderPlan, exitCode: 0 };
}
return {
ok: true,
output:
"The evidence_kind for sf-dryrun-1 is unsupported. I cannot agree.",
exitCode: 0,
};
},
});
assert.equal(result.ok, false);
assert.equal(result.agreed, false);
// deciderOutput must still be present so operator can inspect the plan
assert.ok(
typeof result.deciderOutput === "string" && result.deciderOutput.length > 0,
"deciderOutput must be available even when review disagrees",
);
assert.deepEqual(result.resolvedIds, []);
});