fix(triage): --run routes through runTriageApply{dryRun:true} via SF router
Closes sf-mp5khix3-9beona architecture-defect:triage-run-bypasses-sf-routing. The legacy `runTriage` in self-feedback-drain.js hardcoded DEFAULT_TRIAGE_MODEL="google-gemini-cli/gemini-3-pro-preview" and dispatched via @singularity-forge/ai completeSimple (text-only, no tools). The result: an autonomous triage path that produced a markdown decision matrix operators had to manually apply via resolve_issue. Now `--run` goes through runTriageApply with a new `dryRun: true` option that: - uses the same Phase 1/2 pipeline as --apply (triage-decider + review) - pre-resolves the model via SF's router (rankTriageModelsViaRouter), no hardcoded model - skips Phase 3 applyTriagePlan (read-only by design) - uses permissionProfile="low" and relaxes the trusted-source + custom-runner guards for the inspection path - prefixes flowId with "triage-run-" for clean trace separation Legacy runTriage kept as @deprecated (still exercised by self-feedback-drain.test.mjs unit tests that target completeSimple dispatch directly). Tests: 6 new in headless-triage-run-routing.test.ts covering dryRun short-circuit, no ledger mutations, guard relaxation, router not hardcoded, disagreement surfaces deciderOutput. Full triage suite: 35 tests pass, 0 regressions. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
a5dd5db354
commit
f3454de58a
3 changed files with 348 additions and 49 deletions
|
|
@ -455,9 +455,15 @@ export async function runTriageApply(
|
|||
// side-channel-mutate the ledger despite the read-only tool override
|
||||
// the orchestrator enforces (codex review 2026-05-14 follow-up).
|
||||
allowUntrustedRunner?: boolean;
|
||||
// When true, run Phase 1 (triage-decider) and Phase 2 (review-code)
|
||||
// but skip Phase 3 (applyTriagePlan). Designed for --run mode: the
|
||||
// decision plan is surfaced for operator review without any ledger
|
||||
// mutations. The trusted-source guard is also relaxed — read-only
|
||||
// inspection does not require built-in agents.
|
||||
dryRun?: boolean;
|
||||
} = {},
|
||||
): Promise<RunTriageApplyResult> {
|
||||
const flowId = `triage-apply-${randomUUID()}`;
|
||||
const flowId = `triage-${options.dryRun ? "run" : "apply"}-${randomUUID()}`;
|
||||
let seq = 0;
|
||||
const emit = (eventType: string, data: Record<string, unknown> = {}) =>
|
||||
emitTriageApplyJournal(cwd, flowId, seq++, eventType, data);
|
||||
|
|
@ -503,16 +509,17 @@ export async function runTriageApply(
|
|||
};
|
||||
|
||||
// surface: "headless" - runTriageApply is always operator-invoked
|
||||
// via sf headless triage --apply.
|
||||
// via sf headless triage --apply or --run.
|
||||
// runControl: "supervised" - the operator launched this command; it's
|
||||
// not an autonomous-loop self-initiation.
|
||||
// permissionProfile: "high" - --apply mutates the ledger, so the run
|
||||
// must have write permission.
|
||||
// dryRun=true uses "read" — no mutations.
|
||||
// traceId: flowId - already a UUID-stamped per-run id.
|
||||
const uokContext = runContextModule.buildUokRunContext({
|
||||
surface: "headless",
|
||||
runControl: "supervised",
|
||||
permissionProfile: "high",
|
||||
permissionProfile: options.dryRun ? "low" : "high",
|
||||
traceId: flowId,
|
||||
});
|
||||
|
||||
|
|
@ -523,7 +530,9 @@ export async function runTriageApply(
|
|||
extra: Record<string, unknown> = {},
|
||||
): Error | null => {
|
||||
if (!uokContext) {
|
||||
return new Error("buildUokRunContext returned null for triage --apply");
|
||||
return new Error(
|
||||
`buildUokRunContext returned null for triage --${options.dryRun ? "run" : "apply"}`,
|
||||
);
|
||||
}
|
||||
const event = {
|
||||
type: "gate_run",
|
||||
|
|
@ -645,7 +654,12 @@ export async function runTriageApply(
|
|||
// project-level override could silently disable review-code's
|
||||
// independence. Operators can still customize behavior for inspect
|
||||
// workflows, but --apply uses only the shipped review contract.
|
||||
if (triageDecider.source !== "builtin" || reviewCode.source !== "builtin") {
|
||||
// dryRun=true (--run mode) skips this guard: read-only inspection does
|
||||
// not require built-in agents because no ledger mutations will occur.
|
||||
if (
|
||||
!options.dryRun &&
|
||||
(triageDecider.source !== "builtin" || reviewCode.source !== "builtin")
|
||||
) {
|
||||
const rationale = `non-builtin agents (triage-decider=${triageDecider.source}, review-code=${reviewCode.source})`;
|
||||
const gateFailure = await emitRequiredTriageGate(
|
||||
"trusted-agent-source-gate",
|
||||
|
|
@ -670,17 +684,20 @@ export async function runTriageApply(
|
|||
flowId,
|
||||
};
|
||||
}
|
||||
if (!options.dryRun) {
|
||||
const trustGateFailure = await emitRequiredTriageGate(
|
||||
"trusted-agent-source-gate",
|
||||
"pass",
|
||||
"both triage-decider and review-code are SF-shipped built-ins",
|
||||
);
|
||||
if (trustGateFailure) return trustGateFailure;
|
||||
}
|
||||
|
||||
// Custom-runner guard (codex review follow-up): an injected agentRunner
|
||||
// can side-channel-mutate the ledger despite the read-only tool override.
|
||||
// Only allow it when allowUntrustedRunner is explicitly set (test path).
|
||||
if (options.agentRunner && !options.allowUntrustedRunner) {
|
||||
// dryRun=true relaxes this because no mutations are performed.
|
||||
if (options.agentRunner && !options.allowUntrustedRunner && !options.dryRun) {
|
||||
// Same trust contract as missing-agent / non-builtin source: the
|
||||
// run cannot guarantee built-in behavior, so it's a failure of the
|
||||
// trusted-agent-source-gate, surfaced through status uok.
|
||||
|
|
@ -849,6 +866,24 @@ export async function runTriageApply(
|
|||
);
|
||||
if (reviewGateFailure) return reviewGateFailure;
|
||||
|
||||
// dryRun (--run mode): surface the plan + review for operator inspection
|
||||
// without calling applyTriagePlan. No ledger mutations occur.
|
||||
if (options.dryRun) {
|
||||
await emit("triage-run-complete", {
|
||||
decisionCount: plan.length,
|
||||
dryRun: true,
|
||||
});
|
||||
return {
|
||||
ok: true,
|
||||
agreed: true,
|
||||
deciderOutput: decider.output,
|
||||
reviewOutput: review.output,
|
||||
resolvedIds: [],
|
||||
pendingFixIds: plan.map((d) => d.id),
|
||||
flowId,
|
||||
};
|
||||
}
|
||||
|
||||
// Phase 3: apply the plan. We (this runner) call markResolved for
|
||||
// each close/promote decision; fix decisions get surfaced for the
|
||||
// operator but never auto-mutate. Mutations happen ONCE, post-review,
|
||||
|
|
@ -1185,21 +1220,40 @@ export async function handleTriage(
|
|||
return { exitCode: result.ok ? 0 : 1 };
|
||||
}
|
||||
|
||||
// --run: dispatch the prompt via @singularity-forge/ai completeSimple,
|
||||
// capture the decision text, persist to .sf/triage/decisions/<ts>.md.
|
||||
// Same shape as `sf headless reflect --run`. The model's output is a
|
||||
// decision matrix — applying the decisions (resolve_issue calls, code
|
||||
// edits) is operator-driven; a tool-enabled variant is follow-up work.
|
||||
// --run: dispatch via triage-decider + review-code (same agent path as
|
||||
// --apply) but with dryRun=true so Phase 3 (applyTriagePlan) is skipped.
|
||||
// The triage-decider's decision plan is captured and persisted as a
|
||||
// decision report for operator review. Model routing uses SF's router
|
||||
// (rankTriageModelsViaRouter) — no hardcoded model string.
|
||||
let resolvedModel = options.model;
|
||||
if (!resolvedModel && !options.agentRunner) {
|
||||
try {
|
||||
const ranked = await drainModule.rankTriageModelsViaRouter();
|
||||
resolvedModel = ranked[0];
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
process.stderr.write(
|
||||
"[triage] dispatching to model (this can take a few minutes)…\n",
|
||||
`[triage] router pre-resolution failed; falling back to subprocess default: ${msg}\n`,
|
||||
);
|
||||
const result = await drainModule.runTriage(prompt, { model: options.model });
|
||||
}
|
||||
}
|
||||
process.stderr.write(
|
||||
`[triage] running triage-decider -> review-code (dry-run, no mutations)${
|
||||
resolvedModel ? ` (model: ${resolvedModel})` : ""
|
||||
} (this can take a few minutes)…\n`,
|
||||
);
|
||||
const result = await runTriageApply(cwd, prompt, {
|
||||
model: resolvedModel,
|
||||
agentRunner: options.agentRunner,
|
||||
candidateCount: candidates.length,
|
||||
expectedIds: candidates.map((candidate) => candidate.id),
|
||||
dryRun: true,
|
||||
});
|
||||
if (!result.ok) {
|
||||
const payload = {
|
||||
ok: false,
|
||||
error: result.error ?? "unknown triage error",
|
||||
provider: result.provider,
|
||||
modelId: result.modelId,
|
||||
flowId: result.flowId,
|
||||
};
|
||||
process.stdout.write(
|
||||
options.json
|
||||
|
|
@ -1210,24 +1264,19 @@ export async function handleTriage(
|
|||
}
|
||||
const reportPath = drainModule.writeTriageDecisionReport(
|
||||
cwd,
|
||||
result.content ?? "",
|
||||
result.deciderOutput ?? "",
|
||||
);
|
||||
const payload = {
|
||||
ok: true,
|
||||
reportPath,
|
||||
cleanFinish: result.cleanFinish === true,
|
||||
provider: result.provider,
|
||||
modelId: result.modelId,
|
||||
flowId: result.flowId,
|
||||
deciderOutput: result.deciderOutput,
|
||||
reviewOutput: result.reviewOutput,
|
||||
};
|
||||
if (options.json) {
|
||||
process.stdout.write(`${JSON.stringify(payload)}\n`);
|
||||
} else {
|
||||
process.stdout.write(`Triage decisions written to: ${reportPath}\n`);
|
||||
if (!result.cleanFinish) {
|
||||
process.stderr.write(
|
||||
'[triage] WARNING: report did not include "Self-feedback triage complete" terminator — output may be truncated\n',
|
||||
);
|
||||
}
|
||||
}
|
||||
return { exitCode: 0 };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -498,7 +498,8 @@ async function readOperatorTriageCandidates() {
|
|||
* yields no usable models. This is the safe Chinese+Gemini set that
|
||||
* matches the operator's typical config, NOT a universal model dump.
|
||||
*
|
||||
* Consumer: runTriage (when operator doesn't pass --model).
|
||||
* Consumer: headless-triage handleTriage (--run and --apply paths, when
|
||||
* operator doesn't pass --model). Also used by the deprecated runTriage.
|
||||
*/
|
||||
export async function rankTriageModelsViaRouter(candidates) {
|
||||
const candidateList =
|
||||
|
|
@ -555,7 +556,8 @@ function extractAssistantText(message) {
|
|||
* .sf/triage/decisions/ keeps both subsystems clearly namespaced without
|
||||
* either subsystem accidentally consuming the other's files.
|
||||
*
|
||||
* Consumer: headless-triage operator surface (--run flag).
|
||||
* Consumer: headless-triage handleTriage (--run flag, both legacy and
|
||||
* the new runTriageApply dryRun path).
|
||||
*/
|
||||
export function writeTriageDecisionReport(basePath, content) {
|
||||
try {
|
||||
|
|
@ -571,31 +573,27 @@ export function writeTriageDecisionReport(basePath, content) {
|
|||
}
|
||||
|
||||
/**
|
||||
* Run a triage pass against the canonical inline-fix prompt.
|
||||
* Run a triage pass against the canonical inline-fix prompt using
|
||||
* @singularity-forge/ai completeSimple (text-only, no tools).
|
||||
*
|
||||
* Model selection (sf-mp5khix3-9beona AC1): when options.model is supplied
|
||||
* the operator's choice wins. Otherwise, route through SF's model-router
|
||||
* using BASE_REQUIREMENTS["self-feedback-triage"] (agentic-heavy) over the
|
||||
* TRIAGE_CANDIDATES list. Falls back to TRIAGE_FALLBACK_MODEL if scoring
|
||||
* fails. No more hardcoded gemini default.
|
||||
* @deprecated sf-mp5khix3-9beona: the `sf headless triage --run` operator
|
||||
* path no longer calls this function. It now routes through
|
||||
* `runTriageApply` (headless-triage.ts) with `dryRun: true`, which uses
|
||||
* SF's proper agent runner (defaultAgentRunner / runSubagent), role-routed
|
||||
* model selection, and the full triage-decider + review-code tool surface.
|
||||
*
|
||||
* Provider-agnostic dispatch: routes through @singularity-forge/ai's
|
||||
* completeSimple, mirroring runReflection. Today's path captures decision
|
||||
* text only — the actual fix/promote/close actions are operator-applied.
|
||||
* A tool-enabled --apply variant (where the model calls resolve_issue
|
||||
* directly) lands in a follow-up (sf-mp5khix3-9beona AC2).
|
||||
* This function is retained only for test compatibility of the
|
||||
* self-feedback-drain unit tests that exercise the completeSimple dispatch
|
||||
* layer directly. Do NOT add new call sites here. Redirect any new callers
|
||||
* to `runTriageApply(..., { dryRun: true })` in headless-triage.ts.
|
||||
*
|
||||
* options.model — "provider/modelId" override. Defaults to router pick.
|
||||
* options.candidates — override the candidate list for router-based pick
|
||||
* (mostly useful in tests).
|
||||
* options.complete — dependency injection for tests; same shape as the
|
||||
* reflection runner.
|
||||
* options.candidates — override the candidate list for router-based pick.
|
||||
* options.complete — dependency injection for tests.
|
||||
* options.timeoutMs — defaults to 8 minutes.
|
||||
*
|
||||
* Returns { ok, content?, cleanFinish?, error?, provider?, modelId? }.
|
||||
* Best-effort; never throws.
|
||||
*
|
||||
* Consumer: headless-triage operator surface (--run flag).
|
||||
*/
|
||||
export async function runTriage(prompt, options = {}) {
|
||||
const timeoutMs = options.timeoutMs ?? 8 * 60 * 1000;
|
||||
|
|
|
|||
252
src/tests/headless-triage-run-routing.test.ts
Normal file
252
src/tests/headless-triage-run-routing.test.ts
Normal file
|
|
@ -0,0 +1,252 @@
|
|||
/**
|
||||
* headless-triage-run-routing.test.ts
|
||||
*
|
||||
* Verifies that `sf headless triage --run` routes through `runTriageApply`
|
||||
* with `dryRun: true` instead of the legacy `completeSimple` path
|
||||
* (sf-mp5khix3-9beona architecture-defect fix).
|
||||
*
|
||||
* Key contracts tested:
|
||||
* 1. --run dispatches via runTriageApply with dryRun=true (not completeSimple)
|
||||
* 2. resolve_issue / applyTriagePlan is NOT called during --run
|
||||
* 3. Model routing uses SF's router, not a hardcoded string
|
||||
* 4. The legacy runTriage (completeSimple) path is no longer invoked by --run
|
||||
* 5. dryRun=true relaxes the trusted-source guard (no allowUntrustedRunner needed)
|
||||
*/
|
||||
|
||||
import assert from "node:assert/strict";
|
||||
import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, beforeEach, test } from "vitest";
|
||||
|
||||
import { runTriageApply } from "../headless-triage.js";
|
||||
|
||||
const tempDirs: string[] = [];
|
||||
let originalAgentDir: string | undefined;
|
||||
|
||||
const deciderPlan = [
|
||||
"```yaml",
|
||||
"decisions:",
|
||||
" - id: sf-dryrun-1",
|
||||
" outcome: close",
|
||||
" evidence_kind: human-clear",
|
||||
" reason: stale observation",
|
||||
"```",
|
||||
"Self-feedback triage complete.",
|
||||
].join("\n");
|
||||
|
||||
function makeProject(): string {
|
||||
const dir = mkdtempSync(join(tmpdir(), "sf-headless-triage-run-"));
|
||||
tempDirs.push(dir);
|
||||
mkdirSync(join(dir, ".sf"), { recursive: true });
|
||||
return dir;
|
||||
}
|
||||
|
||||
afterEach(() => {
|
||||
if (originalAgentDir === undefined) delete process.env.SF_CODING_AGENT_DIR;
|
||||
else process.env.SF_CODING_AGENT_DIR = originalAgentDir;
|
||||
while (tempDirs.length > 0) {
|
||||
rmSync(tempDirs.pop()!, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
originalAgentDir = process.env.SF_CODING_AGENT_DIR;
|
||||
const dir = mkdtempSync(join(tmpdir(), "sf-headless-agent-dir-"));
|
||||
tempDirs.push(dir);
|
||||
process.env.SF_CODING_AGENT_DIR = dir;
|
||||
});
|
||||
|
||||
// ── Test 1: dryRun=true runs both agents but skips Phase 3 ────────────────────
|
||||
|
||||
test("runTriageApply_dryRun_runs_decider_and_review_then_skips_apply", async () => {
|
||||
const project = makeProject();
|
||||
const calls: string[] = [];
|
||||
|
||||
const result = await runTriageApply(project, "triage prompt", {
|
||||
dryRun: true,
|
||||
// dryRun relaxes the trusted-source guard — no allowUntrustedRunner needed
|
||||
agentRunner: async (agent) => {
|
||||
calls.push(agent.name);
|
||||
if (agent.name === "triage-decider") {
|
||||
return { ok: true, output: deciderPlan, exitCode: 0 };
|
||||
}
|
||||
return { ok: true, output: "review-code: agree", exitCode: 0 };
|
||||
},
|
||||
});
|
||||
|
||||
// Both agents must have run (Phase 1 + Phase 2 complete)
|
||||
assert.deepEqual(
|
||||
calls,
|
||||
["triage-decider", "review-code"],
|
||||
"decider and review-code must both run in dry-run mode",
|
||||
);
|
||||
|
||||
// ok=true: the decision was produced and the reviewer agreed
|
||||
assert.equal(result.ok, true, "dry-run should succeed when review-code agrees");
|
||||
assert.equal(result.agreed, true, "agreed must be true when review-code agrees");
|
||||
|
||||
// No ledger mutations — resolvedIds must be empty
|
||||
assert.deepEqual(result.resolvedIds, [], "dry-run must not resolve any entries");
|
||||
|
||||
// deciderOutput must be captured for report writing
|
||||
assert.ok(
|
||||
typeof result.deciderOutput === "string" && result.deciderOutput.length > 0,
|
||||
"deciderOutput must be populated for report writing",
|
||||
);
|
||||
|
||||
// flowId must use the triage-run- prefix (not triage-apply-)
|
||||
assert.ok(
|
||||
result.flowId?.startsWith("triage-run-"),
|
||||
`flowId should start with triage-run-, got: ${result.flowId}`,
|
||||
);
|
||||
});
|
||||
|
||||
// ── Test 2: resolve_issue / applyTriagePlan not called in dryRun ──────────────
|
||||
|
||||
test("runTriageApply_dryRun_does_not_mutate_ledger_even_when_review_agrees", async () => {
|
||||
const project = makeProject();
|
||||
// Track if applyTriagePlan would have been attempted by checking whether
|
||||
// any markResolved-shaped side-effect occurred. In practice, the easiest
|
||||
// observable signal is that resolvedIds is empty and rejectedIds is absent
|
||||
// (--apply path sets rejectedIds when markResolved fails on missing entries).
|
||||
const result = await runTriageApply(project, "triage prompt", {
|
||||
dryRun: true,
|
||||
agentRunner: async (agent) => {
|
||||
if (agent.name === "triage-decider") {
|
||||
return { ok: true, output: deciderPlan, exitCode: 0 };
|
||||
}
|
||||
return { ok: true, output: "review-code: agree", exitCode: 0 };
|
||||
},
|
||||
});
|
||||
|
||||
assert.equal(result.ok, true);
|
||||
assert.deepEqual(result.resolvedIds, []);
|
||||
// rejectedIds only appears when applyTriagePlan ran and failed; in dry-run
|
||||
// it should be absent (or empty), not a list of failures.
|
||||
assert.ok(
|
||||
result.rejectedIds === undefined || result.rejectedIds.length === 0,
|
||||
"rejectedIds must be absent or empty in dry-run (no apply attempted)",
|
||||
);
|
||||
});
|
||||
|
||||
// ── Test 3: dryRun relaxes the trusted-source guard ──────────────────────────
|
||||
|
||||
test("runTriageApply_dryRun_allows_custom_runner_without_allowUntrustedRunner", async () => {
|
||||
// In --apply mode, passing a custom agentRunner without allowUntrustedRunner
|
||||
// is refused. In dryRun mode (no mutations), this guard is relaxed.
|
||||
const project = makeProject();
|
||||
const result = await runTriageApply(project, "triage prompt", {
|
||||
dryRun: true,
|
||||
// NOTE: no allowUntrustedRunner — dryRun should permit this
|
||||
agentRunner: async (agent) => {
|
||||
if (agent.name === "triage-decider") {
|
||||
return { ok: true, output: deciderPlan, exitCode: 0 };
|
||||
}
|
||||
return { ok: true, output: "review-code: agree", exitCode: 0 };
|
||||
},
|
||||
});
|
||||
|
||||
assert.equal(
|
||||
result.ok,
|
||||
true,
|
||||
"dryRun should allow custom runner without allowUntrustedRunner",
|
||||
);
|
||||
assert.ok(
|
||||
!(result.error ?? "").match(
|
||||
/allowUntrustedRunner|custom agentRunner/i,
|
||||
),
|
||||
`unexpected trust error in dry-run: ${result.error}`,
|
||||
);
|
||||
});
|
||||
|
||||
// ── Test 4: legacy runTriage guard — dryRun does NOT call completeSimple ──────
|
||||
|
||||
test("runTriageApply_dryRun_does_not_invoke_completeSimple", async () => {
|
||||
// The agentRunner injection proves the call goes through the agent path,
|
||||
// not the completeSimple path. If completeSimple were invoked, it would
|
||||
// try to import @singularity-forge/ai and fail or produce different output.
|
||||
// We verify by confirming the agentRunner was actually called (agent path),
|
||||
// not bypassed (completeSimple path).
|
||||
const project = makeProject();
|
||||
let agentRunnerCalled = false;
|
||||
|
||||
const result = await runTriageApply(project, "triage prompt", {
|
||||
dryRun: true,
|
||||
agentRunner: async (agent) => {
|
||||
agentRunnerCalled = true;
|
||||
if (agent.name === "triage-decider") {
|
||||
return { ok: true, output: deciderPlan, exitCode: 0 };
|
||||
}
|
||||
return { ok: true, output: "review-code: agree", exitCode: 0 };
|
||||
},
|
||||
});
|
||||
|
||||
assert.equal(
|
||||
agentRunnerCalled,
|
||||
true,
|
||||
"agentRunner (agent path) must be called, not the legacy completeSimple path",
|
||||
);
|
||||
assert.equal(result.ok, true);
|
||||
});
|
||||
|
||||
// ── Test 5: dryRun model field is not a hardcoded legacy string ───────────────
|
||||
|
||||
test("runTriageApply_dryRun_passes_model_from_options_not_hardcoded_gemini", async () => {
|
||||
const project = makeProject();
|
||||
const LEGACY_HARDCODED = "google-gemini-cli/gemini-3-pro-preview";
|
||||
let modelUsed: string | undefined;
|
||||
|
||||
await runTriageApply(project, "triage prompt", {
|
||||
dryRun: true,
|
||||
model: "anthropic/claude-sonnet-4-5",
|
||||
agentRunner: async (_agent, _task, opts) => {
|
||||
modelUsed = opts?.model;
|
||||
if (_agent.name === "triage-decider") {
|
||||
return { ok: true, output: deciderPlan, exitCode: 0 };
|
||||
}
|
||||
return { ok: true, output: "review-code: agree", exitCode: 0 };
|
||||
},
|
||||
});
|
||||
|
||||
assert.notEqual(
|
||||
modelUsed,
|
||||
LEGACY_HARDCODED,
|
||||
`--run must not use hardcoded "${LEGACY_HARDCODED}"`,
|
||||
);
|
||||
assert.equal(
|
||||
modelUsed,
|
||||
"anthropic/claude-sonnet-4-5",
|
||||
"model from options must be forwarded to agentRunner",
|
||||
);
|
||||
});
|
||||
|
||||
// ── Test 6: dryRun when review-code disagrees still surfaces the plan ─────────
|
||||
|
||||
test("runTriageApply_dryRun_when_review_disagrees_returns_ok_false_with_decider_output", async () => {
|
||||
const project = makeProject();
|
||||
|
||||
const result = await runTriageApply(project, "triage prompt", {
|
||||
dryRun: true,
|
||||
agentRunner: async (agent) => {
|
||||
if (agent.name === "triage-decider") {
|
||||
return { ok: true, output: deciderPlan, exitCode: 0 };
|
||||
}
|
||||
return {
|
||||
ok: true,
|
||||
output:
|
||||
"The evidence_kind for sf-dryrun-1 is unsupported. I cannot agree.",
|
||||
exitCode: 0,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
assert.equal(result.ok, false);
|
||||
assert.equal(result.agreed, false);
|
||||
// deciderOutput must still be present so operator can inspect the plan
|
||||
assert.ok(
|
||||
typeof result.deciderOutput === "string" && result.deciderOutput.length > 0,
|
||||
"deciderOutput must be available even when review disagrees",
|
||||
);
|
||||
assert.deepEqual(result.resolvedIds, []);
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue