refactor: rename review gate agent
This commit is contained in:
parent
62fbc5d57b
commit
18aa257ede
22 changed files with 80 additions and 114 deletions
|
|
@ -1,8 +1,3 @@
|
|||
---
|
||||
name: sf-autonomous-runtime
|
||||
description: Explains SF autonomous loop, UOK gates, installed-runtime drift, and recovery paths.
|
||||
---
|
||||
|
||||
# SF Autonomous Runtime
|
||||
|
||||
## Context
|
||||
|
|
|
|||
|
|
@ -1,13 +1,8 @@
|
|||
---
|
||||
name: sf-command-surface
|
||||
description: Use when changing SF slash commands, browser command parity, or headless command dispatch.
|
||||
---
|
||||
|
||||
# SF Command Surface
|
||||
|
||||
## When to Use
|
||||
|
||||
This skill applies when:
|
||||
This reference applies when:
|
||||
- Adding or modifying SF slash commands (`/mode`, `/tasks`, `/skills`, etc.)
|
||||
- Changing command handlers in `src/resources/extensions/sf/commands/handlers/`
|
||||
- Updating command catalog descriptions
|
||||
|
|
|
|||
|
|
@ -1,8 +1,3 @@
|
|||
---
|
||||
name: sf-harness
|
||||
description: Use when changing SF harness behavior, generated evidence, verification loops, or repo-native harness boundaries.
|
||||
---
|
||||
|
||||
# SF Harness
|
||||
|
||||
Use this only for SF harness changes in this repository.
|
||||
|
|
|
|||
|
|
@ -1,8 +1,3 @@
|
|||
---
|
||||
name: sf-operating-model
|
||||
description: Use when changing SF operating-model vocabulary, surfaces, protocols, output formats, run control, or permission-profile boundaries.
|
||||
---
|
||||
|
||||
# SF Operating Model
|
||||
|
||||
Use this when editing SF operating-model docs or code paths.
|
||||
|
|
|
|||
|
|
@ -1,8 +1,3 @@
|
|||
---
|
||||
name: sf-planning
|
||||
description: Use when changing SF milestone, slice, task, backlog, promotion, or spec-first planning behavior.
|
||||
---
|
||||
|
||||
# SF Planning
|
||||
|
||||
Use this for SF planning-state changes in this repository.
|
||||
|
|
|
|||
|
|
@ -1,8 +1,3 @@
|
|||
---
|
||||
name: sf-state
|
||||
description: Use when changing SF state ownership, DB-first runtime state, generated artifacts, or .sf/.agents boundary rules.
|
||||
---
|
||||
|
||||
# SF State
|
||||
|
||||
Use this for SF state-boundary work in this repository.
|
||||
|
|
|
|||
|
|
@ -42,7 +42,6 @@ const HIDDEN_OR_ALIAS_SUBCOMMANDS = new Set([
|
|||
"auto",
|
||||
"footer-config", // alias for /statusline
|
||||
"h",
|
||||
"review-code", // alias for /rubber-duck
|
||||
"stop", // platform-intercepted via BASE_RUNTIME_COMMANDS — never reaches SF handler
|
||||
"undo-turn", // alias for /rewind
|
||||
"wt",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ const src = resolve(
|
|||
__dirname,
|
||||
"..",
|
||||
"packages",
|
||||
"pi-coding-agent",
|
||||
"coding-agent",
|
||||
"dist",
|
||||
"core",
|
||||
"export-html",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ const src = resolve(
|
|||
__dirname,
|
||||
"..",
|
||||
"packages",
|
||||
"pi-coding-agent",
|
||||
"coding-agent",
|
||||
"dist",
|
||||
"modes",
|
||||
"interactive",
|
||||
|
|
|
|||
|
|
@ -39,10 +39,9 @@ const jiti = createJiti(import.meta.filename, {
|
|||
debug: false,
|
||||
});
|
||||
|
||||
const agentExtensionsDir = join(getSfEnv().agentDir, "extensions", "sf");
|
||||
const useAgentDir = existsSync(join(agentExtensionsDir, "state.js"));
|
||||
|
||||
function sfExtensionPath(moduleName: string): string {
|
||||
const agentExtensionsDir = join(getSfEnv().agentDir, "extensions", "sf");
|
||||
const useAgentDir = existsSync(join(agentExtensionsDir, "state.js"));
|
||||
if (useAgentDir) return join(agentExtensionsDir, `${moduleName}.js`);
|
||||
const tsPath = resolveBundledSourceResource(
|
||||
import.meta.url,
|
||||
|
|
@ -114,8 +113,8 @@ type AgentRunner = (
|
|||
*
|
||||
* Why a structured plan instead of letting the decider call resolve_issue
|
||||
* directly: codex review 2026-05-14 flagged that the original sequential
|
||||
* design (decider → rubber-duck) let the decider mutate state during its
|
||||
* own turn, before rubber-duck ever saw the decisions. This parser pulls
|
||||
* design (decider → review-code) let the decider mutate state during its
|
||||
* own turn, before review-code ever saw the decisions. This parser pulls
|
||||
* the proposed actions out of the decider's text so they can be reviewed
|
||||
* BEFORE any resolve_issue call.
|
||||
*/
|
||||
|
|
@ -647,11 +646,11 @@ export async function runTriageApply(
|
|||
};
|
||||
const agents = agentsModule.discoverAgents?.(cwd, "both").agents ?? [];
|
||||
const triageDecider = agents.find((agent) => agent.name === "triage-decider");
|
||||
const rubberDuck = agents.find((agent) => agent.name === "rubber-duck");
|
||||
if (!triageDecider || !rubberDuck) {
|
||||
const reviewCode = agents.find((agent) => agent.name === "review-code");
|
||||
if (!triageDecider || !reviewCode) {
|
||||
const missing = [
|
||||
triageDecider ? null : "triage-decider",
|
||||
rubberDuck ? null : "rubber-duck",
|
||||
reviewCode ? null : "review-code",
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(", ");
|
||||
|
|
@ -677,25 +676,25 @@ export async function runTriageApply(
|
|||
}
|
||||
// Trusted-source guard (codex review 2026-05-14): when --apply will
|
||||
// mutate the ledger, BOTH agents must be SF-shipped built-ins. A
|
||||
// project-level override could silently disable rubber-duck's
|
||||
// project-level override could silently disable review-code's
|
||||
// independence. Operators can still customize behavior for inspect
|
||||
// workflows, but --apply uses only the shipped review contract.
|
||||
if (triageDecider.source !== "builtin" || rubberDuck.source !== "builtin") {
|
||||
const rationale = `non-builtin agents (triage-decider=${triageDecider.source}, rubber-duck=${rubberDuck.source})`;
|
||||
if (triageDecider.source !== "builtin" || reviewCode.source !== "builtin") {
|
||||
const rationale = `non-builtin agents (triage-decider=${triageDecider.source}, review-code=${reviewCode.source})`;
|
||||
const gateFailure = await emitRequiredTriageGate("trusted-agent-source-gate", "fail", rationale, {
|
||||
triageDeciderSource: triageDecider.source,
|
||||
rubberDuckSource: rubberDuck.source,
|
||||
reviewCodeSource: reviewCode.source,
|
||||
});
|
||||
if (gateFailure) return gateFailure;
|
||||
await emit("triage-apply-failed", {
|
||||
reason: "untrusted-agent-source",
|
||||
triageDeciderSource: triageDecider.source,
|
||||
rubberDuckSource: rubberDuck.source,
|
||||
reviewCodeSource: reviewCode.source,
|
||||
});
|
||||
return {
|
||||
ok: false,
|
||||
agreed: false,
|
||||
error: `Refusing to --apply with non-builtin agents (triage-decider=${triageDecider.source}, rubber-duck=${rubberDuck.source}). Use \`sf headless triage --run\` for a reviewable decision artifact, or remove the project/user override.`,
|
||||
error: `Refusing to --apply with non-builtin agents (triage-decider=${triageDecider.source}, review-code=${reviewCode.source}). Use \`sf headless triage --run\` for a reviewable decision artifact, or remove the project/user override.`,
|
||||
resolvedIds: [],
|
||||
flowId,
|
||||
};
|
||||
|
|
@ -703,7 +702,7 @@ export async function runTriageApply(
|
|||
const trustGateFailure = await emitRequiredTriageGate(
|
||||
"trusted-agent-source-gate",
|
||||
"pass",
|
||||
"both triage-decider and rubber-duck are SF-shipped built-ins",
|
||||
"both triage-decider and review-code are SF-shipped built-ins",
|
||||
);
|
||||
if (trustGateFailure) return trustGateFailure;
|
||||
|
||||
|
|
@ -804,12 +803,12 @@ export async function runTriageApply(
|
|||
}, {}),
|
||||
});
|
||||
|
||||
// Phase 2: rubber-duck reviews the plan with read-only tools. The
|
||||
// Phase 2: review-code reviews the plan with read-only tools. The
|
||||
// review task explicitly hands the plan as the artifact under
|
||||
// scrutiny — the reviewer's job is to spot bad calls before they land.
|
||||
const reviewTask = [
|
||||
"Review this self-feedback triage decision PLAN. The plan has NOT yet been applied — your verdict gates whether any resolve_issue mutation runs.",
|
||||
'Return "rubber-duck: agree" only if every decision in the plan is safe and coherent against the current code/ledger state.',
|
||||
'Return "review-code: agree" only if every decision in the plan is safe and coherent against the current code/ledger state.',
|
||||
"On disagreement, name each concerning decision explicitly so the operator (or a follow-up apply pass) can pull just those entries out and proceed with the rest.",
|
||||
"",
|
||||
"## Original triage prompt (the ledger entries the decider saw)",
|
||||
|
|
@ -818,16 +817,16 @@ export async function runTriageApply(
|
|||
"## triage-decider output (includes the plan as a fenced yaml block)",
|
||||
decider.output,
|
||||
].join("\n");
|
||||
const review = await runner(rubberDuck, reviewTask, {
|
||||
const review = await runner(reviewCode, reviewTask, {
|
||||
model: options.model,
|
||||
cwd,
|
||||
tools: ["view", "grep", "glob", "git_log", "query_journal"],
|
||||
});
|
||||
const agreed = /^rubber-duck:\s*agree\b/im.test(review.output.trim());
|
||||
const agreed = /^review-code:\s*agree\b/im.test(review.output.trim());
|
||||
await emit(
|
||||
agreed
|
||||
? "triage-apply-rubber-duck-agree"
|
||||
: "triage-apply-rubber-duck-disagree",
|
||||
? "triage-apply-review-code-agree"
|
||||
: "triage-apply-review-code-disagree",
|
||||
{
|
||||
ok: review.ok,
|
||||
exitCode: review.exitCode ?? null,
|
||||
|
|
@ -837,15 +836,15 @@ export async function runTriageApply(
|
|||
const gateFailure = await emitRequiredTriageGate(
|
||||
"triage-apply-review-gate",
|
||||
"manual-attention",
|
||||
"rubber-duck subagent failed to complete; review pending operator",
|
||||
"review-code subagent failed to complete; review pending operator",
|
||||
{ exitCode: review.exitCode ?? null },
|
||||
);
|
||||
if (gateFailure) return gateFailure;
|
||||
await emit("triage-apply-failed", { reason: "rubber-duck-failed" });
|
||||
await emit("triage-apply-failed", { reason: "review-code-failed" });
|
||||
return {
|
||||
ok: false,
|
||||
agreed: false,
|
||||
error: review.stderr || "rubber-duck failed",
|
||||
error: review.stderr || "review-code failed",
|
||||
deciderOutput: decider.output,
|
||||
reviewOutput: review.output,
|
||||
resolvedIds: [],
|
||||
|
|
@ -859,13 +858,13 @@ export async function runTriageApply(
|
|||
const gateFailure = await emitRequiredTriageGate(
|
||||
"triage-apply-review-gate",
|
||||
"fail",
|
||||
"rubber-duck disagreed with the proposed plan; no mutations applied",
|
||||
"review-code disagreed with the proposed plan; no mutations applied",
|
||||
);
|
||||
if (gateFailure) return gateFailure;
|
||||
return {
|
||||
ok: false,
|
||||
agreed: false,
|
||||
error: "rubber-duck disagreed — pausing for operator review",
|
||||
error: "review-code disagreed — pausing for operator review",
|
||||
deciderOutput: decider.output,
|
||||
reviewOutput: review.output,
|
||||
resolvedIds: [],
|
||||
|
|
@ -875,7 +874,7 @@ export async function runTriageApply(
|
|||
const reviewGateFailure = await emitRequiredTriageGate(
|
||||
"triage-apply-review-gate",
|
||||
"pass",
|
||||
"rubber-duck agreed with the proposed plan; apply phase proceeds",
|
||||
"review-code agreed with the proposed plan; apply phase proceeds",
|
||||
);
|
||||
if (reviewGateFailure) return reviewGateFailure;
|
||||
|
||||
|
|
@ -1162,7 +1161,7 @@ export async function handleTriage(
|
|||
|
||||
if (options.apply) {
|
||||
process.stderr.write(
|
||||
"[triage] applying via triage-decider -> rubber-duck (this can take a few minutes)…\n",
|
||||
"[triage] applying via triage-decider -> review-code (this can take a few minutes)…\n",
|
||||
);
|
||||
const result = await runTriageApply(cwd, prompt, {
|
||||
model: options.model,
|
||||
|
|
@ -1183,7 +1182,7 @@ export async function handleTriage(
|
|||
process.stdout.write(`${JSON.stringify(payload)}\n`);
|
||||
} else if (result.ok) {
|
||||
process.stdout.write(
|
||||
`Triage apply complete: rubber-duck agreed (${result.resolvedIds.length} resolved)\n`,
|
||||
`Triage apply complete: review-code agreed (${result.resolvedIds.length} resolved)\n`,
|
||||
);
|
||||
if (result.resolvedIds.length > 0) {
|
||||
process.stdout.write(`Resolved: ${result.resolvedIds.join(", ")}\n`);
|
||||
|
|
|
|||
|
|
@ -225,7 +225,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
|
|||
" query Machine snapshot: JSON state + next dispatch + costs (no LLM)",
|
||||
" usage Live LLM-provider usage snapshot (today: gemini-cli tier + per-model quota)",
|
||||
" reflect Assemble reflection corpus + render prompt for cross-corpus pattern analysis (--json for raw, --run to dispatch to gemini-cli, --model <id> to override)",
|
||||
" triage Render canonical self-feedback triage prompt (--list/--json inspect, --run writes decisions, --apply runs triage-decider -> rubber-duck)",
|
||||
" triage Render canonical self-feedback triage prompt (--list/--json inspect, --run writes decisions, --apply runs triage-decider -> review-code)",
|
||||
"",
|
||||
"new-milestone flags:",
|
||||
" --context <path> Path to spec/PRD file (use '-' for stdin)",
|
||||
|
|
@ -259,7 +259,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
|
|||
" sf headless triage --list Self-feedback queue digest (impact↓ effort↑ ts↑)",
|
||||
" sf headless triage | sf-some-model Pipe triage prompt to any model",
|
||||
" sf headless triage --run Dispatch triage to default model + write decisions",
|
||||
" sf headless triage --apply Apply via triage-decider, then gate with rubber-duck",
|
||||
" sf headless triage --apply Apply via triage-decider, then gate with review-code",
|
||||
" sf headless reflect Render reflection prompt for piping",
|
||||
" sf headless reflect --run Dispatch reflection + write report",
|
||||
"",
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
---
|
||||
name: rubber-duck
|
||||
name: review-code
|
||||
description: Constructive pre-implementation critic — catches design flaws, missing edge cases, and gaps before code is written
|
||||
model: sonnet
|
||||
tools: read, grep, find, ls, bash
|
||||
|
|
@ -1,11 +1,11 @@
|
|||
name: rubber-duck
|
||||
displayName: Rubber Duck Agent
|
||||
name: review-code
|
||||
displayName: Review Code Agent
|
||||
description: >
|
||||
A constructive critic and second opinion. Reviews proposals, designs,
|
||||
decision matrices, or implementations and surfaces ONLY weak points that
|
||||
matter — bugs, logic errors, missed constraints, decisions that don't
|
||||
match the ledger state. The default consumer is the triage --apply
|
||||
pipeline (rubber-duck reviews triage decisions before they auto-apply),
|
||||
pipeline (review-code reviews triage decisions before they auto-apply),
|
||||
but operators can invoke directly for any artifact review.
|
||||
tools: "*"
|
||||
promptParts:
|
||||
|
|
@ -13,7 +13,7 @@ promptParts:
|
|||
- toolInstructions
|
||||
- parallelToolCalling
|
||||
prompt: |
|
||||
You are SF's rubber-duck agent: a disciplined devil's advocate. You
|
||||
You are SF's review-code agent: a disciplined devil's advocate. You
|
||||
review artifacts and surface only issues that genuinely matter.
|
||||
|
||||
Your guiding principle: a flagged finding should feel like a $20 bill
|
||||
|
|
@ -53,7 +53,7 @@ prompt: |
|
|||
|
||||
## Output contract
|
||||
|
||||
- On AGREE: a single line — "rubber-duck: agree" — followed optionally
|
||||
- On AGREE: a single line — "review-code: agree" — followed optionally
|
||||
by one short paragraph of confirming reasoning.
|
||||
- On DISAGREE: list each concern as a separate `## Concern N:` section
|
||||
with: (a) what's wrong, (b) the evidence (file path, line, commit
|
||||
|
|
@ -3,7 +3,7 @@ displayName: Self-Feedback Triage Decider
|
|||
description: >
|
||||
Reads the open self-feedback queue and proposes a decision plan
|
||||
(Fix, Promote, or Close per entry). PLAN-ONLY: this agent does NOT
|
||||
call resolve_issue directly. The plan is reviewed by rubber-duck;
|
||||
call resolve_issue directly. The plan is reviewed by review-code;
|
||||
only on agreement does the triage --apply runner execute the plan.
|
||||
This separation exists so a bad decision can be blocked before any
|
||||
mutation lands (see sf-mp5lnlbc-ty5fec / codex review 2026-05-14).
|
||||
|
|
@ -22,7 +22,7 @@ prompt: |
|
|||
forever is the failure mode.
|
||||
|
||||
**PLAN-ONLY MODE:** You do not call resolve_issue. You produce a
|
||||
structured decision plan. A separate review stage (rubber-duck) will
|
||||
structured decision plan. A separate review stage (review-code) will
|
||||
read your plan and either approve or block it before any mutation
|
||||
happens. This separation is intentional: it prevents bad decisions
|
||||
from landing without a second opinion.
|
||||
|
|
@ -57,7 +57,7 @@ prompt: |
|
|||
|
||||
- You have read-only investigation: view, grep, glob, git_log. You
|
||||
do NOT have resolve_issue, edit, write, or bash. Mutation is the
|
||||
apply runner's job, after rubber-duck reviews your plan.
|
||||
apply runner's job, after review-code reviews your plan.
|
||||
|
||||
## Output contract
|
||||
|
||||
|
|
|
|||
|
|
@ -300,8 +300,8 @@ export const TOP_LEVEL_SUBCOMMANDS = [
|
|||
desc: "Prevent system sleep during long runs (caffeinate / systemd-inhibit)",
|
||||
},
|
||||
{
|
||||
cmd: "rubber-duck",
|
||||
desc: "Dispatch a rubber-duck subagent for constructive pre-implementation review (alias: review-code)",
|
||||
cmd: "review-code",
|
||||
desc: "Dispatch a review-code subagent for constructive pre-implementation review",
|
||||
},
|
||||
{
|
||||
cmd: "delegate",
|
||||
|
|
|
|||
|
|
@ -511,13 +511,11 @@ Examples:
|
|||
return true;
|
||||
}
|
||||
if (
|
||||
trimmed === "rubber-duck" ||
|
||||
trimmed.startsWith("rubber-duck ") ||
|
||||
trimmed === "review-code" ||
|
||||
trimmed.startsWith("review-code ")
|
||||
) {
|
||||
const input = trimmed.replace(/^(?:rubber-duck|review-code)\s*/, "").trim();
|
||||
await handleRubberDuckCommand(input, ctx, pi);
|
||||
const input = trimmed.replace(/^review-code\s*/, "").trim();
|
||||
await handleReviewCodeCommand(input, ctx, pi);
|
||||
return true;
|
||||
}
|
||||
if (trimmed === "delegate" || trimmed.startsWith("delegate ")) {
|
||||
|
|
@ -610,9 +608,9 @@ async function handleKeepAlive(args, ctx) {
|
|||
}
|
||||
}
|
||||
|
||||
// ─── /rubber-duck ────────────────────────────────────────────────────────────
|
||||
// ─── /review-code ────────────────────────────────────────────────────────────
|
||||
|
||||
async function handleRubberDuckCommand(topic, ctx, _pi) {
|
||||
async function handleReviewCodeCommand(topic, ctx, _pi) {
|
||||
const { execSync } = await import("node:child_process");
|
||||
const root = projectRoot();
|
||||
|
||||
|
|
@ -640,20 +638,20 @@ async function handleRubberDuckCommand(topic, ctx, _pi) {
|
|||
|
||||
const focus = topic ? `Focus on: ${topic}\n\n` : "";
|
||||
const reviewPrompt =
|
||||
`Dispatch a \`rubber-duck\` subagent to review the current plan or changes before proceeding. ` +
|
||||
`Use the \`subagent\` tool with \`agent: "rubber-duck"\`.\n\n` +
|
||||
`Dispatch a \`review-code\` subagent to review the current plan or changes before proceeding. ` +
|
||||
`Use the \`subagent\` tool with \`agent: "review-code"\`.\n\n` +
|
||||
`${focus}` +
|
||||
`Ask the rubber-duck agent to identify blocking issues, non-blocking issues, and suggestions. ` +
|
||||
`Ask the review-code agent to identify blocking issues, non-blocking issues, and suggestions. ` +
|
||||
`After the subagent returns, summarise the verdict and any blocking findings in one short paragraph. ` +
|
||||
`Do not proceed with implementation until the user acknowledges blocking findings.` +
|
||||
diff;
|
||||
|
||||
ctx.ui.notify("Dispatching rubber-duck review…", "info");
|
||||
ctx.ui.notify("Dispatching review-code review…", "info");
|
||||
try {
|
||||
await ctx.sendMessage?.(reviewPrompt);
|
||||
} catch {
|
||||
ctx.ui.notify(
|
||||
"Could not dispatch rubber-duck. Try: subagent agent=rubber-duck task='review current changes'",
|
||||
"Could not dispatch review-code. Try: subagent agent=review-code task='review current changes'",
|
||||
"warning",
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -145,7 +145,7 @@
|
|||
"reset-slice",
|
||||
"rethink",
|
||||
"rewind",
|
||||
"rubber-duck",
|
||||
"review-code",
|
||||
"run-hook",
|
||||
"scaffold",
|
||||
"scan",
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ Then:
|
|||
0. Narrate step transitions, key implementation decisions, and verification outcomes as you work. Keep it terse — one line between tool-call clusters, not between every call — but write complete sentences in user-facing prose, not shorthand notes or scratchpad fragments.
|
||||
0a. **Batch independent tool calls in parallel.** When the next step needs to read or grep multiple files/paths that don't depend on each other's results, issue them in a single tool-call message (multiple tool uses in one assistant turn) rather than one-at-a-time. Examples: reading the handler + the test file + the schema file to triangulate a bug; greping for two unrelated symbols. Sequential tool calls are only correct when each call's input genuinely depends on the previous call's output. Talking-then-doing is also dead weight — if the next action is unambiguous, just take it; describe what you found in the result, not what you plan to look at.
|
||||
0b. **Swarm opportunity check.** Before implementation, decide whether this task can be split into a 2-3 worker same-model swarm. Swarm only if the shards have disjoint file/directory ownership, no shared-interface or lockfile edits, shard-local verification, and clear wall-clock savings. If it passes, dispatch `subagent({ tasks: [...] })` with explicit write scopes, expected output files, and verification per worker; then inspect `git status --short`, synthesize results, resolve conflicts, and run final task verification yourself. If it does not pass, continue single-agent execution without ceremony.
|
||||
0c. **Rubber-duck check (non-trivial tasks only).** If the task touches more than two files, introduces a new abstraction, changes an API boundary, or has a non-obvious failure mode — dispatch a `rubber-duck` subagent with the task plan and any relevant existing code as context. Summarise its verdict in one line. If it returns a **Blocking** finding, address it before writing code. Skip this step for simple edits, test fixes, or renaming tasks.
|
||||
0c. **Review-code check (non-trivial tasks only).** If the task touches more than two files, introduces a new abstraction, changes an API boundary, or has a non-obvious failure mode — dispatch a `review-code` subagent with the task plan and any relevant existing code as context. Summarise its verdict in one line. If it returns a **Blocking** finding, address it before writing code. Skip this step for simple edits, test fixes, or renaming tasks.
|
||||
1. {{skillActivation}} Follow any activated skills before writing code. If no skills match this task, skip this step.
|
||||
2. **Verify file existence before editing.** The task plan references specific files. Before reading or editing any file mentioned in the plan, confirm it exists with `ls`, `find`, or `existsSync`. If a referenced file does NOT exist, stop immediately — do not attempt to create it based on the plan's description of what "should" be there. The file may have been deleted, renamed, or moved. Escalate as `blocker_discovered: true` with a clear description of which file is missing and what the plan expected to find. This prevents phantom work on stale file paths.
|
||||
3. Execute the steps in the inlined task plan, adapting minor local mismatches when the surrounding code differs from the planner's snapshot
|
||||
|
|
|
|||
|
|
@ -192,7 +192,7 @@ export function discoverAgents(cwd, scope) {
|
|||
const userDir = path.join(getAgentDir(), "agents");
|
||||
const projectAgentsDir = findNearestProjectAgentsDir(cwd);
|
||||
// Built-in agents ship with SF and have lowest precedence — user/project
|
||||
// definitions with the same name override them. Used for rubber-duck +
|
||||
// definitions with the same name override them. Used for review-code +
|
||||
// triage-decider as the foundation for SF's self-driven triage pipeline.
|
||||
const builtinAgents = scope === "project" ? [] : loadBuiltinAgents();
|
||||
const userAgents =
|
||||
|
|
|
|||
|
|
@ -113,33 +113,33 @@ test("discoverAgents_when_yaml_and_markdown_share_name_prefers_later_project_ent
|
|||
assert.match(duplicates[0].systemPrompt, /YAML body/);
|
||||
});
|
||||
|
||||
test("discoverAgents_when_scope_both_includes_builtin_rubber_duck_and_triage_decider", () => {
|
||||
test("discoverAgents_when_scope_both_includes_builtin_review_code_and_triage_decider", () => {
|
||||
// Built-in agents ship with SF (src/resources/extensions/sf/agents/) and
|
||||
// must be discoverable without operator setup. They're the foundation for
|
||||
// SF's self-driven triage pipeline (sf-mp5lnlbc-ty5fec). Isolate from the
|
||||
// real ~/.sf/agent/agents/ so the test doesn't conflict with the
|
||||
// operator's personal rubber-duck.md if present.
|
||||
// operator's personal review-code.md if present.
|
||||
const isolatedAgentDir = mkdtempSync(join(tmpdir(), "sf-agent-dir-"));
|
||||
const originalEnv = process.env.SF_CODING_AGENT_DIR;
|
||||
process.env.SF_CODING_AGENT_DIR = isolatedAgentDir;
|
||||
try {
|
||||
const project = makeProject();
|
||||
const { agents } = discoverAgents(project, "both");
|
||||
const rubberDuck = agents.find((a) => a.name === "rubber-duck");
|
||||
const reviewCode = agents.find((a) => a.name === "review-code");
|
||||
const triageDecider = agents.find((a) => a.name === "triage-decider");
|
||||
assert.ok(
|
||||
rubberDuck,
|
||||
"rubber-duck builtin agent must be discovered in scope=both",
|
||||
reviewCode,
|
||||
"review-code builtin agent must be discovered in scope=both",
|
||||
);
|
||||
assert.equal(rubberDuck.source, "builtin");
|
||||
assert.match(rubberDuck.description, /constructive critic/i);
|
||||
assert.equal(reviewCode.source, "builtin");
|
||||
assert.match(reviewCode.description, /constructive critic/i);
|
||||
assert.ok(
|
||||
triageDecider,
|
||||
"triage-decider builtin agent must be discovered in scope=both",
|
||||
);
|
||||
assert.equal(triageDecider.source, "builtin");
|
||||
// triage-decider is plan-only; the apply runner owns mutation after
|
||||
// rubber-duck agrees.
|
||||
// review-code agrees.
|
||||
assert.ok(
|
||||
!triageDecider.tools?.includes("resolve_issue"),
|
||||
"triage-decider must not declare resolve_issue tool access",
|
||||
|
|
@ -152,7 +152,7 @@ test("discoverAgents_when_scope_both_includes_builtin_rubber_duck_and_triage_dec
|
|||
|
||||
test("discoverAgents_when_project_overrides_builtin_name_project_wins", () => {
|
||||
// Project-level definitions must shadow built-ins with the same name —
|
||||
// operators need to be able to customize rubber-duck/triage-decider
|
||||
// operators need to be able to customize review-code/triage-decider
|
||||
// behavior per project without forking the SF distribution.
|
||||
const isolatedAgentDir = mkdtempSync(join(tmpdir(), "sf-agent-dir-"));
|
||||
const originalEnv = process.env.SF_CODING_AGENT_DIR;
|
||||
|
|
@ -160,17 +160,17 @@ test("discoverAgents_when_project_overrides_builtin_name_project_wins", () => {
|
|||
try {
|
||||
const project = makeProject();
|
||||
writeFileSync(
|
||||
join(project, ".sf", "agents", "rubber-duck.agent.yaml"),
|
||||
join(project, ".sf", "agents", "review-code.agent.yaml"),
|
||||
[
|
||||
"name: rubber-duck",
|
||||
"description: Project-level override of rubber-duck.",
|
||||
"name: review-code",
|
||||
"description: Project-level override of review-code.",
|
||||
"prompt: |",
|
||||
" This is the project override.",
|
||||
"",
|
||||
].join("\n"),
|
||||
);
|
||||
const { agents } = discoverAgents(project, "both");
|
||||
const matches = agents.filter((a) => a.name === "rubber-duck");
|
||||
const matches = agents.filter((a) => a.name === "review-code");
|
||||
assert.equal(matches.length, 1);
|
||||
assert.equal(matches[0].source, "project");
|
||||
assert.match(matches[0].description, /Project-level override/);
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ beforeEach(() => {
|
|||
process.env.SF_CODING_AGENT_DIR = dir;
|
||||
});
|
||||
|
||||
test("runTriageApply_when_rubber_duck_agrees_runs_both_agents_in_order", async () => {
|
||||
test("runTriageApply_when_review_code_agrees_runs_both_agents_in_order", async () => {
|
||||
const project = makeProject();
|
||||
const calls: string[] = [];
|
||||
const result = await runTriageApply(project, "triage prompt", {
|
||||
|
|
@ -56,20 +56,20 @@ test("runTriageApply_when_rubber_duck_agrees_runs_both_agents_in_order", async (
|
|||
exitCode: 0,
|
||||
};
|
||||
}
|
||||
return { ok: true, output: "rubber-duck: agree", exitCode: 0 };
|
||||
return { ok: true, output: "review-code: agree", exitCode: 0 };
|
||||
},
|
||||
});
|
||||
|
||||
// Orchestration contract: rubber-duck agreed, so the apply phase ran.
|
||||
// Orchestration contract: review-code agreed, so the apply phase ran.
|
||||
// The actual mutation on sf-test-1 fails because the test project has
|
||||
// no real ledger entry — that's the rejectedIds path, not a contract
|
||||
// violation. ok=false on apply failures is itself the contract
|
||||
// (codex review 2026-05-14 follow-up).
|
||||
assert.equal(result.agreed, true, "rubber-duck must agree");
|
||||
assert.equal(result.agreed, true, "review-code must agree");
|
||||
assert.deepEqual(
|
||||
calls,
|
||||
["triage-decider", "rubber-duck"],
|
||||
"decider must run before rubber-duck",
|
||||
["triage-decider", "review-code"],
|
||||
"decider must run before review-code",
|
||||
);
|
||||
assert.equal(result.ok, false, "apply should fail on missing ledger entry");
|
||||
assert.deepEqual(result.rejectedIds, ["sf-test-1"]);
|
||||
|
|
@ -99,7 +99,7 @@ test("runTriageApply_when_custom_runner_without_trust_flag_refuses", async () =>
|
|||
assert.match(result.error ?? "", /allowUntrustedRunner|custom agentRunner/i);
|
||||
});
|
||||
|
||||
test("runTriageApply_when_rubber_duck_disagrees_blocks_completion", async () => {
|
||||
test("runTriageApply_when_review_code_disagrees_blocks_completion", async () => {
|
||||
const project = makeProject();
|
||||
const result = await runTriageApply(project, "triage prompt", {
|
||||
allowUntrustedRunner: true,
|
||||
|
|
@ -123,11 +123,11 @@ test("runTriageApply_when_rubber_duck_disagrees_blocks_completion", async () =>
|
|||
assert.equal(result.agreed, false);
|
||||
assert.equal(
|
||||
result.error,
|
||||
"rubber-duck disagreed — pausing for operator review",
|
||||
"review-code disagreed — pausing for operator review",
|
||||
);
|
||||
});
|
||||
|
||||
test("runTriageApply_when_decider_fails_skips_rubber_duck", async () => {
|
||||
test("runTriageApply_when_decider_fails_skips_review_code", async () => {
|
||||
const project = makeProject();
|
||||
const calls: string[] = [];
|
||||
const result = await runTriageApply(project, "triage prompt", {
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
* metadata for the three triage gates.
|
||||
*
|
||||
* Test contract: each decision point (trusted-source check, plan-validation,
|
||||
* rubber-duck review) writes exactly one gate_run trace event with
|
||||
* review-code review) writes exactly one gate_run trace event with
|
||||
* surface=headless, runControl=supervised, permissionProfile=high, traceId=
|
||||
* the flowId. The outcome reflects the decision (pass/fail/manual-attention).
|
||||
*/
|
||||
|
|
@ -108,7 +108,7 @@ describe("runTriageApply emits gate_run trace events with schema-v2 metadata", (
|
|||
if (agent.name === "triage-decider") {
|
||||
return { ok: true, output: deciderPlan, exitCode: 0 };
|
||||
}
|
||||
return { ok: true, output: "rubber-duck: agree", exitCode: 0 };
|
||||
return { ok: true, output: "review-code: agree", exitCode: 0 };
|
||||
},
|
||||
});
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue