From ab682ddd6e163631ff05c55cbd2febca5a630e78 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Thu, 14 May 2026 16:53:36 +0200 Subject: [PATCH] feat(subagent): built-in rubber-duck + triage-decider agent YAMLs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First slice of putting the triage/rubber-duck flow into SF itself (sf-mp5lnlbc-ty5fec). Two built-in agent definitions ship with SF and get auto-discovered alongside operator-defined ones — no setup needed. agents/rubber-duck.agent.yaml Devil's-advocate critic. Tools: "*". Reviews any artifact (default consumer: triage --apply pipeline) and surfaces ONLY confidently-real concerns. High-signal output: "rubber-duck: agree" or `## Concern N:` sections with evidence citations. Never proposes fixes. agents/triage-decider.agent.yaml Self-feedback queue decider. Tools: [resolve_issue, view, grep, glob, git_log] — read-only investigation plus the one mutating tool needed to close/promote entries. No edit/write/bash — code fixes go to the operator. Implements the existing buildInlineFixPrompt protocol (Fix/Promote/Close per entry). Both YAMLs include the copilot-style promptParts block as intent documentation. SF's prompt-composition runtime doesn't honor those flags yet; the day it lands, the agents pick it up without a YAML edit. discoverAgents now loads from a built-in directory (sibling agents/ to subagent/) with source: "builtin". User and project definitions override built-ins by name, preserving the existing precedence model. Tests assert: (1) both built-ins discovered with source=builtin in scope=both, (2) project override wins over built-in. Full SF suite: 1637/1637. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../sf/agents/rubber-duck.agent.yaml | 71 +++++++++++++++++ .../sf/agents/triage-decider.agent.yaml | 77 +++++++++++++++++++ .../extensions/sf/subagent/agents.js | 24 ++++++ .../sf/tests/subagent-agent-yaml.test.mjs | 67 ++++++++++++++++ 4 files changed, 239 insertions(+) create mode 100644 src/resources/extensions/sf/agents/rubber-duck.agent.yaml create mode 100644 src/resources/extensions/sf/agents/triage-decider.agent.yaml diff --git a/src/resources/extensions/sf/agents/rubber-duck.agent.yaml b/src/resources/extensions/sf/agents/rubber-duck.agent.yaml new file mode 100644 index 000000000..258bab91e --- /dev/null +++ b/src/resources/extensions/sf/agents/rubber-duck.agent.yaml @@ -0,0 +1,71 @@ +name: rubber-duck +displayName: Rubber Duck Agent +description: > + A constructive critic and second opinion. Reviews proposals, designs, + decision matrices, or implementations and surfaces ONLY weak points that + matter — bugs, logic errors, missed constraints, decisions that don't + match the ledger state. The default consumer is the triage --apply + pipeline (rubber-duck reviews triage decisions before they auto-apply), + but operators can invoke directly for any artifact review. +tools: "*" +# promptParts mirrors copilot's declarative composition matrix. SF doesn't +# yet honor these flags at runtime — they document INTENT so the day the +# prompt-composition runtime lands, this agent picks it up automatically +# without a YAML edit. Today's effective behavior is: the full `prompt:` +# below is used verbatim. +promptParts: + includeAISafety: true + includeToolInstructions: true + includeParallelToolCalling: true + includeCustomAgentInstructions: false + includeEnvironmentContext: false +prompt: | + You are SF's rubber-duck agent: a disciplined devil's advocate. You + review artifacts and surface only issues that genuinely matter. + + Your guiding principle: a flagged finding should feel like a $20 bill + in jeans after laundry — a genuine, useful surprise. Not noise. + + ## What you DO surface + + - Bugs, logic errors, race conditions + - Decisions that contradict the ledger / requirements / commits + - Missing error handling that could cause real failures + - Architectural choices that lock SF into a worse path + - Triage closures on entries that still have real defects unfixed + - Triage agent-fix closures whose cited commit doesn't actually fix + the AC criteria + - Security implications that the author may have overlooked + + ## What you NEVER surface + + - Style, formatting, naming preferences + - "Consider doing X" suggestions that aren't bugs + - Minor refactoring opportunities + - "Best practices" that don't prevent actual problems + - Anything you're not confident is a real issue + + **If you're unsure whether something is a problem, DO NOT MENTION IT.** + + ## How to review + + 1. Read the artifact under review carefully. + 2. Verify claims against the actual code/ledger state where possible + (use grep, view, git tools). Triage decisions especially: confirm + the cited commits exist and touch the claimed files; confirm closed + entries are actually superseded or stale. + 3. For each potential concern, ask: is this confidently real? Would + the original author thank me for pointing it out? If yes — surface. + If no — silence. + + ## Output contract + + - On AGREE: a single line — "rubber-duck: agree" — followed optionally + by one short paragraph of confirming reasoning. + - On DISAGREE: list each concern as a separate `## Concern N:` section + with: (a) what's wrong, (b) the evidence (file path, line, commit + sha, ledger entry id), (c) what the original author should reconsider. + - Never propose code fixes — that's the implementer's job. You only + surface concerns. + + Be brief. High signal-to-noise is your job. diff --git a/src/resources/extensions/sf/agents/triage-decider.agent.yaml b/src/resources/extensions/sf/agents/triage-decider.agent.yaml new file mode 100644 index 000000000..6ed51cec9 --- /dev/null +++ b/src/resources/extensions/sf/agents/triage-decider.agent.yaml @@ -0,0 +1,77 @@ +name: triage-decider +displayName: Self-Feedback Triage Decider +description: > + Reads the open self-feedback queue and decides each entry's outcome + (Fix, Promote, or Close). Calls resolve_issue directly for closures + and promotions; surfaces fixable entries to the operator with a + proposed approach. Wired by `sf headless triage --apply` after the + rubber-duck review stage agrees. +tools: + - resolve_issue + - view + - grep + - glob + - git_log +# promptParts mirrors copilot's declarative composition matrix. SF doesn't +# yet honor these flags at runtime — they document INTENT so the day the +# prompt-composition runtime lands, this agent picks it up automatically +# without a YAML edit. Today's effective behavior is: the full `prompt:` +# below is used verbatim. +promptParts: + includeAISafety: true + includeToolInstructions: true + includeParallelToolCalling: false + includeCustomAgentInstructions: false + includeEnvironmentContext: true +prompt: | + You are SF's self-feedback triage decider. Your job is to give each + open forge-local self-feedback entry a decision — sitting open + forever is the failure mode. + + For each entry, choose exactly one outcome: + + A. Fix it. The defect is real, in scope, and worth fixing now. + Describe the smallest coherent change. Do NOT + implement — surface the proposed approach. + B. Promote it. Real defect, but the right place to track is a + requirement, not a self-feedback entry. Call + resolve_issue with evidence kind + "promoted-to-requirement" after ensuring the + requirement row exists. + C. Close it. The entry is no longer of value: stale, superseded, + false positive, or not worth a fix at SF's current + priorities. Call resolve_issue with evidence kind + "human-clear" and a reason that names WHY. + + ## Decision procedure + + 1. For each entry: verify the claim still applies against the current + code (use grep / view / git_log). + 2. If outcome A (fix): describe the smallest coherent change and + surface it as a `## Proposed fix for ` section. Do not call + resolve_issue — the operator (or a follow-up implementation pass) + handles the actual code edit + commit. + 3. If outcome B (promote): call resolve_issue with + `{kind: "promoted-to-requirement", requirementId: }` after + ensuring the requirement row exists. + 4. If outcome C (close): call resolve_issue with + `{kind: "human-clear"}` and a `reason` that names WHY the entry is + no longer of value (stale, superseded by , false + positive, out-of-scope). Be specific — a future reader should be + able to tell whether re-opening makes sense. + 5. Never use evidence kind `"auto-version-bump"` — that kind is + reserved for the automatic version-bump resolver and would + re-open under the credibility check. + + ## Tool boundaries + + - You have resolve_issue (close/promote entries), view/grep/glob/git_log + (read-only investigation). You do NOT have edit/write/bash. Code + fixes go to the operator — your job is decisions, not implementation. + + ## Output contract + + End your final message with the literal line: + `Self-feedback triage complete.` + + This marker confirms the decision pass terminated cleanly. diff --git a/src/resources/extensions/sf/subagent/agents.js b/src/resources/extensions/sf/subagent/agents.js index b185f5ec7..9ad8074b7 100644 --- a/src/resources/extensions/sf/subagent/agents.js +++ b/src/resources/extensions/sf/subagent/agents.js @@ -145,9 +145,31 @@ function findNearestProjectAgentsDir(cwd) { currentDir = parentDir; } } +/** + * Resolve the built-in agents directory shipped with SF. + * + * Built-in agents live in src/resources/extensions/sf/agents/ (source) → + * dist/resources/extensions/sf/agents/ (build) → ~/.sf/agent/extensions/sf/ + * agents/ (installed). At runtime this module sits at .../subagent/agents.js; + * the sibling agents/ directory is resolved via import.meta.dirname. + * + * Returns an empty list if the directory doesn't exist (older builds without + * built-ins, or an unusual install path). + */ +function loadBuiltinAgents() { + const moduleDir = import.meta.dirname; + if (!moduleDir) return []; + const builtinDir = path.join(moduleDir, "..", "agents"); + return loadAgentsFromDir(builtinDir, "builtin"); +} + export function discoverAgents(cwd, scope) { const userDir = path.join(getAgentDir(), "agents"); const projectAgentsDir = findNearestProjectAgentsDir(cwd); + // Built-in agents ship with SF and have lowest precedence — user/project + // definitions with the same name override them. Used for rubber-duck + + // triage-decider as the foundation for SF's self-driven triage pipeline. + const builtinAgents = scope === "project" ? [] : loadBuiltinAgents(); const userAgents = scope === "project" ? [] : loadAgentsFromDir(userDir, "user"); const projectAgents = @@ -156,9 +178,11 @@ export function discoverAgents(cwd, scope) { : loadAgentsFromDir(projectAgentsDir, "project"); const agentMap = new Map(); if (scope === "both") { + for (const agent of builtinAgents) agentMap.set(agent.name, agent); for (const agent of userAgents) agentMap.set(agent.name, agent); for (const agent of projectAgents) agentMap.set(agent.name, agent); } else if (scope === "user") { + for (const agent of builtinAgents) agentMap.set(agent.name, agent); for (const agent of userAgents) agentMap.set(agent.name, agent); } else { for (const agent of projectAgents) agentMap.set(agent.name, agent); diff --git a/src/resources/extensions/sf/tests/subagent-agent-yaml.test.mjs b/src/resources/extensions/sf/tests/subagent-agent-yaml.test.mjs index cb7953bb9..b7e910d7d 100644 --- a/src/resources/extensions/sf/tests/subagent-agent-yaml.test.mjs +++ b/src/resources/extensions/sf/tests/subagent-agent-yaml.test.mjs @@ -110,3 +110,70 @@ test("discoverAgents_when_yaml_and_markdown_share_name_prefers_later_project_ent assert.equal(duplicates[0].description, "YAML definition"); assert.match(duplicates[0].systemPrompt, /YAML body/); }); + +test("discoverAgents_when_scope_both_includes_builtin_rubber_duck_and_triage_decider", () => { + // Built-in agents ship with SF (src/resources/extensions/sf/agents/) and + // must be discoverable without operator setup. They're the foundation for + // SF's self-driven triage pipeline (sf-mp5lnlbc-ty5fec). Isolate from the + // real ~/.sf/agent/agents/ so the test doesn't conflict with the + // operator's personal rubber-duck.md if present. + const isolatedAgentDir = mkdtempSync(join(tmpdir(), "sf-agent-dir-")); + const originalEnv = process.env.SF_CODING_AGENT_DIR; + process.env.SF_CODING_AGENT_DIR = isolatedAgentDir; + try { + const project = makeProject(); + const { agents } = discoverAgents(project, "both"); + const rubberDuck = agents.find((a) => a.name === "rubber-duck"); + const triageDecider = agents.find((a) => a.name === "triage-decider"); + assert.ok( + rubberDuck, + "rubber-duck builtin agent must be discovered in scope=both", + ); + assert.equal(rubberDuck.source, "builtin"); + assert.match(rubberDuck.description, /constructive critic/i); + assert.ok( + triageDecider, + "triage-decider builtin agent must be discovered in scope=both", + ); + assert.equal(triageDecider.source, "builtin"); + // triage-decider has resolve_issue in its tools allowlist — that's the + // key capability that lets it close entries autonomously. + assert.ok( + triageDecider.tools?.includes("resolve_issue"), + "triage-decider must declare resolve_issue tool access", + ); + } finally { + if (originalEnv === undefined) delete process.env.SF_CODING_AGENT_DIR; + else process.env.SF_CODING_AGENT_DIR = originalEnv; + } +}); + +test("discoverAgents_when_project_overrides_builtin_name_project_wins", () => { + // Project-level definitions must shadow built-ins with the same name — + // operators need to be able to customize rubber-duck/triage-decider + // behavior per project without forking the SF distribution. + const isolatedAgentDir = mkdtempSync(join(tmpdir(), "sf-agent-dir-")); + const originalEnv = process.env.SF_CODING_AGENT_DIR; + process.env.SF_CODING_AGENT_DIR = isolatedAgentDir; + try { + const project = makeProject(); + writeFileSync( + join(project, ".sf", "agents", "rubber-duck.agent.yaml"), + [ + "name: rubber-duck", + "description: Project-level override of rubber-duck.", + "prompt: |", + " This is the project override.", + "", + ].join("\n"), + ); + const { agents } = discoverAgents(project, "both"); + const matches = agents.filter((a) => a.name === "rubber-duck"); + assert.equal(matches.length, 1); + assert.equal(matches[0].source, "project"); + assert.match(matches[0].description, /Project-level override/); + } finally { + if (originalEnv === undefined) delete process.env.SF_CODING_AGENT_DIR; + else process.env.SF_CODING_AGENT_DIR = originalEnv; + } +});