From 12f5eb22795f30c36de3c61d7f9fce8255c71cf7 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Thu, 14 May 2026 17:23:10 +0200 Subject: [PATCH] feat(triage): wire --apply CLI + canonical resolve_issue evidence kinds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three coupled changes that together complete the operator-facing --apply surface for sf headless triage: 1. headless.ts: parse --apply from commandArgs and forward to handleTriage. The triage option flow now distinguishes inspect (--list, --json), one-shot (--run), and orchestrated apply (--apply) cleanly. 2. help-text.ts: triage subcommand line + examples block now document the --apply mode (triage-decider → rubber-duck pipeline). 3. bootstrap/db-tools.js: resolve_issue tool now accepts the full canonical evidence-kind set instead of hardcoding "agent-fix": - agent-fix (default; commit-based fix evidence) - human-clear (stale, superseded, false positive, intentional close) - promoted-to-requirement (with required requirement_id) The tool surfaces a clear error when promoted-to-requirement is used without requirement_id. The promptGuidelines updated to walk callers through choosing the right kind. self-feedback-db.test.mjs extended with coverage for all three evidence kinds + the missing-requirement_id rejection path. Together these make sf headless triage --apply genuinely useful: the agent can produce a plan with any outcome, rubber-duck reviews it, and the runner applies via resolve_issue with the right evidence kind per decision. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/headless.ts | 17 +- src/help-text.ts | 3 +- .../extensions/sf/bootstrap/db-tools.js | 59 +++++- .../sf/tests/self-feedback-db.test.mjs | 188 +++++++++++++++--- 4 files changed, 226 insertions(+), 41 deletions(-) diff --git a/src/headless.ts b/src/headless.ts index 5db72120e..cdb008585 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -818,8 +818,7 @@ async function runHeadlessOnce( // Bypasses the RPC path for instant, TTY-independent gate health output. if (options.command === "status" && options.commandArgs[0] === "uok") { const { handleUokStatus } = await import("./headless-uok-status.js"); - const wantsJson = - options.json || options.commandArgs.includes("--json"); + const wantsJson = options.json || options.commandArgs.includes("--json"); const result = await handleUokStatus(process.cwd(), { json: wantsJson }); return { exitCode: result.exitCode, interrupted: false, timedOut: false }; } @@ -838,6 +837,7 @@ async function runHeadlessOnce( const wantsJson = options.json || options.commandArgs.includes("--json"); const wantsList = options.commandArgs.includes("--list"); const wantsRun = options.commandArgs.includes("--run"); + const wantsApply = options.commandArgs.includes("--apply"); const maxIdx = options.commandArgs.indexOf("--max"); let max: number | undefined; if (maxIdx >= 0 && maxIdx + 1 < options.commandArgs.length) { @@ -855,6 +855,7 @@ async function runHeadlessOnce( list: wantsList, max, run: wantsRun, + apply: wantsApply, model, }); return { exitCode: result.exitCode, interrupted: false, timedOut: false }; @@ -920,9 +921,7 @@ async function runHeadlessOnce( // import-backlog: deterministic text→DB transform, no LLM or RPC child needed. if (options.command === "import-backlog") { - const { runImportBacklog } = await import( - "./headless-import-backlog.js" - ); + const { runImportBacklog } = await import("./headless-import-backlog.js"); const filePath = options.commandArgs[0] ?? options.context; if (!filePath) { process.stderr.write( @@ -931,11 +930,9 @@ async function runHeadlessOnce( ); process.exit(1); } - const exitCode = await runImportBacklog( - resolve(filePath), - process.cwd(), - { json: options.json }, - ); + const exitCode = await runImportBacklog(resolve(filePath), process.cwd(), { + json: options.json, + }); process.exit(exitCode); } diff --git a/src/help-text.ts b/src/help-text.ts index e82abc2d8..0463db22f 100644 --- a/src/help-text.ts +++ b/src/help-text.ts @@ -225,7 +225,7 @@ const SUBCOMMAND_HELP: Record = { " query Machine snapshot: JSON state + next dispatch + costs (no LLM)", " usage Live LLM-provider usage snapshot (today: gemini-cli tier + per-model quota)", " reflect Assemble reflection corpus + render prompt for cross-corpus pattern analysis (--json for raw, --run to dispatch to gemini-cli, --model to override)", - " triage Render canonical self-feedback triage prompt for piping into a model (--list for digest, --json for structured, --max N to cap, --run to dispatch + write decisions to .sf/triage/decisions/, --model to override)", + " triage Render canonical self-feedback triage prompt (--list/--json inspect, --run writes decisions, --apply runs triage-decider -> rubber-duck)", "", "new-milestone flags:", " --context Path to spec/PRD file (use '-' for stdin)", @@ -259,6 +259,7 @@ const SUBCOMMAND_HELP: Record = { " sf headless triage --list Self-feedback queue digest (impact↓ effort↑ ts↑)", " sf headless triage | sf-some-model Pipe triage prompt to any model", " sf headless triage --run Dispatch triage to default model + write decisions", + " sf headless triage --apply Apply via triage-decider, then gate with rubber-duck", " sf headless reflect Render reflection prompt for piping", " sf headless reflect --run Dispatch reflection + write report", "", diff --git a/src/resources/extensions/sf/bootstrap/db-tools.js b/src/resources/extensions/sf/bootstrap/db-tools.js index 0bd7da75f..e4b201970 100644 --- a/src/resources/extensions/sf/bootstrap/db-tools.js +++ b/src/resources/extensions/sf/bootstrap/db-tools.js @@ -5,6 +5,7 @@ import { AUTONOMOUS_SOLVER_OUTCOMES, appendAutonomousSolverCheckpoint, } from "../autonomous-solver.js"; +import { getErrorMessage } from "../error-utils.js"; import { claimReservedId, findMilestoneIds, @@ -28,7 +29,6 @@ import { } from "../tools/workflow-tool-executors.js"; import { logError } from "../workflow-logger.js"; import { ensureDbOpen } from "./dynamic-tools.js"; -import { getErrorMessage } from "../error-utils.js"; export function registerDbTools(pi) { // ─── save_decision ───────────────────────────────────────────────── const decisionSaveExecute = async ( @@ -858,15 +858,38 @@ export function registerDbTools(pi) { _ctx, ) => { try { + const evidenceKind = params.evidence_kind ?? "agent-fix"; + if ( + evidenceKind === "promoted-to-requirement" && + !( + typeof params.requirement_id === "string" && + params.requirement_id.trim() + ) + ) { + return { + content: [ + { + type: "text", + text: "Error in resolve_issue: requirement_id is required for promoted-to-requirement evidence", + }, + ], + details: { + operation: "self_feedback_resolve", + id: params.id, + error: "missing_requirement_id", + }, + }; + } const ok = markResolved( params.id, { reason: params.reason, evidence: { - kind: "agent-fix", + kind: evidenceKind, commitSha: params.commit_sha, testPath: params.test_path, summaryNarrative: params.summary_narrative, + requirementId: params.requirement_id, }, criteriaMet: params.criteria_met, }, @@ -925,13 +948,15 @@ export function registerDbTools(pi) { name: "resolve_issue", label: "Resolve Self Feedback", description: - "Mark a previously reported agent-tooling issue as resolved and record the fix evidence (commit SHA, test path, or narrative). " + - "Call this only after the fix is implemented and verified — not speculatively.", + "Mark a previously reported agent-tooling issue as resolved and record canonical evidence. " + + "Use agent-fix only after a fix is implemented and verified; use human-clear or promoted-to-requirement for triage closures.", promptSnippet: - "Resolve a filed agent-tooling issue with commit or test evidence", + "Resolve a filed agent-tooling issue with canonical evidence", promptGuidelines: [ - "Call resolve_issue after implementing and verifying the fix, not before.", - "Pass commit_sha when a commit exists; use summary_narrative and test_path when a commit is not the right artifact.", + "Use evidence_kind=agent-fix after implementing and verifying the fix, not before.", + "Use evidence_kind=human-clear for stale, superseded, false-positive, or intentionally closed entries.", + "Use evidence_kind=promoted-to-requirement with requirement_id when an entry has been moved into requirements.", + "Pass commit_sha when an agent-fix commit exists; use summary_narrative and test_path when a commit/test path is not enough.", "If the entry had acceptance criteria, pass criteria_met with the criteria you satisfied.", ], parameters: Type.Object({ @@ -941,9 +966,29 @@ export function registerDbTools(pi) { reason: Type.String({ description: "Short explanation of why the entry is resolved", }), + evidence_kind: Type.Optional( + Type.Union( + [ + Type.Literal("agent-fix"), + Type.Literal("human-clear"), + Type.Literal("promoted-to-requirement"), + Type.Literal("agent-fix-unverified"), + ], + { + description: + "Canonical resolution evidence kind. Defaults to agent-fix for backward compatibility.", + }, + ), + ), commit_sha: Type.Optional( Type.String({ description: "Commit SHA containing the fix" }), ), + requirement_id: Type.Optional( + Type.String({ + description: + "Requirement id required when evidence_kind is promoted-to-requirement", + }), + ), test_path: Type.Optional( Type.String({ description: "Focused test or verification path" }), ), diff --git a/src/resources/extensions/sf/tests/self-feedback-db.test.mjs b/src/resources/extensions/sf/tests/self-feedback-db.test.mjs index 7657f59fb..abf0ed630 100644 --- a/src/resources/extensions/sf/tests/self-feedback-db.test.mjs +++ b/src/resources/extensions/sf/tests/self-feedback-db.test.mjs @@ -16,6 +16,7 @@ import { import { tmpdir } from "node:os"; import { join } from "node:path"; import { afterEach, test } from "vitest"; +import { registerDbTools } from "../bootstrap/db-tools.js"; import { compactSelfFeedbackMarkdown, markResolved, @@ -50,6 +51,16 @@ function makeForgeProject() { return dir; } +function getResolveIssueTool() { + const tools = new Map(); + registerDbTools({ + registerTool(tool) { + tools.set(tool.name, tool); + }, + }); + return tools.get("resolve_issue"); +} + test("recordSelfFeedback_when_db_available_writes_sqlite_and_versioned_projection", () => { const project = makeForgeProject(); @@ -207,6 +218,125 @@ test("markResolved_accepts_each_canonical_evidence_kind", () => { } }); +test("resolve_issue_tool_defaults_to_agent_fix_for_existing_callers", async () => { + const project = makeForgeProject(); + const filed = recordSelfFeedback( + { kind: "gap:tool-default", severity: "medium", summary: "x" }, + project, + ); + const tool = getResolveIssueTool(); + const originalCwd = process.cwd(); + process.chdir(project); + try { + const result = await tool.execute( + "tool-call", + { + id: filed.entry.id, + reason: "fixed", + summary_narrative: "verified manually", + }, + undefined, + undefined, + undefined, + ); + assert.equal(result.details.resolved, true); + } finally { + process.chdir(originalCwd); + } + const [entry] = readAllSelfFeedback(project); + assert.equal(entry.resolvedEvidence.kind, "agent-fix"); + assert.equal(entry.resolvedEvidence.summaryNarrative, "verified manually"); +}); + +test("resolve_issue_tool_records_human_clear_and_promoted_requirement_evidence", async () => { + const project = makeForgeProject(); + const human = recordSelfFeedback( + { kind: "gap:tool-human", severity: "low", summary: "x" }, + project, + ); + const promoted = recordSelfFeedback( + { kind: "gap:tool-promote", severity: "low", summary: "x" }, + project, + ); + const tool = getResolveIssueTool(); + const originalCwd = process.cwd(); + process.chdir(project); + try { + const humanResult = await tool.execute( + "tool-call", + { + id: human.entry.id, + reason: "stale", + evidence_kind: "human-clear", + }, + undefined, + undefined, + undefined, + ); + assert.equal(humanResult.details.resolved, true); + const promotedResult = await tool.execute( + "tool-call", + { + id: promoted.entry.id, + reason: "tracked as requirement", + evidence_kind: "promoted-to-requirement", + requirement_id: "R042", + }, + undefined, + undefined, + undefined, + ); + assert.equal(promotedResult.details.resolved, true); + } finally { + process.chdir(originalCwd); + } + const entries = readAllSelfFeedback(project); + assert.equal( + entries.find((entry) => entry.id === human.entry.id).resolvedEvidence.kind, + "human-clear", + ); + assert.equal( + entries.find((entry) => entry.id === promoted.entry.id).resolvedEvidence + .kind, + "promoted-to-requirement", + ); + assert.equal( + entries.find((entry) => entry.id === promoted.entry.id).resolvedEvidence + .requirementId, + "R042", + ); +}); + +test("resolve_issue_tool_rejects_promoted_requirement_without_requirement_id", async () => { + const project = makeForgeProject(); + const filed = recordSelfFeedback( + { kind: "gap:tool-promote-invalid", severity: "low", summary: "x" }, + project, + ); + const tool = getResolveIssueTool(); + const originalCwd = process.cwd(); + process.chdir(project); + let result; + try { + result = await tool.execute( + "tool-call", + { + id: filed.entry.id, + reason: "tracked as requirement", + evidence_kind: "promoted-to-requirement", + }, + undefined, + undefined, + undefined, + ); + } finally { + process.chdir(originalCwd); + } + assert.equal(result.details.error, "missing_requirement_id"); + const [entry] = readAllSelfFeedback(project); + assert.equal(entry.resolvedAt, undefined); +}); + test("markResolved_rejects_agent_fix_with_nonexistent_commit_sha", async () => { const project = makeForgeProject(); // Initialize a real git repo so the verifier can distinguish "commit @@ -267,9 +397,7 @@ test("markResolved_rejects_agent_fix_when_commit_does_not_touch_AC_files", async execFileSync("git", ["config", "user.name", "test"], { cwd: project }); // Two commits touching different files - const { writeFileSync: writeFs, mkdirSync: mkFs } = await import( - "node:fs" - ); + const { writeFileSync: writeFs, mkdirSync: mkFs } = await import("node:fs"); mkFs(join(project, "src"), { recursive: true }); writeFs(join(project, "src", "expected.js"), "// initial\n"); writeFs(join(project, "src", "unrelated.js"), "// initial\n"); @@ -481,7 +609,10 @@ test("markResolved_mirrors_resolution_into_memory_store_with_tags", async () => const { getActiveMemories } = await import("../sf-db.js"); const memories = getActiveMemories(); const closureMem = memories.find((m) => m.sourceUnitId === filed.entry.id); - assert.ok(closureMem, "expected a memory entry sourced to the resolved entry"); + assert.ok( + closureMem, + "expected a memory entry sourced to the resolved entry", + ); assert.equal(closureMem.category, "self-feedback-resolution"); assert.match(closureMem.content, /\[gap:visible-in-memory\]/); assert.match(closureMem.content, /agent-fix: fixed by deadbeef/); @@ -532,12 +663,12 @@ test("markResolved_memory_mirror_handles_human_clear_without_commit_tags", async test("recordSelfFeedback_kind_validation_accepts_canonical_shapes", () => { const project = makeForgeProject(); const cases = [ - "gap", // 1-segment legacy (allowed-domain) - "gap:routing", // 2-segment domain:family - "gap:routing:tiebreak-cost-only", // 3-segment domain:family:specific - "architecture-defect:foo", // hyphenated domain works + "gap", // 1-segment legacy (allowed-domain) + "gap:routing", // 2-segment domain:family + "gap:routing:tiebreak-cost-only", // 3-segment domain:family:specific + "architecture-defect:foo", // hyphenated domain works "architecture-defect:solver:executor-conflation", - "runaway-loop", // 1-segment legacy + "runaway-loop", // 1-segment legacy "upstream-rollup:gap-audit-orphan-prompt", ]; for (const kind of cases) { @@ -553,24 +684,28 @@ test("recordSelfFeedback_kind_validation_accepts_canonical_shapes", () => { test("recordSelfFeedback_kind_validation_rejects_malformed", () => { const project = makeForgeProject(); const cases = [ - { kind: "" }, // empty - { kind: "Unknown" }, // not in domain allow-list - { kind: "gap:" }, // empty family - { kind: ":family" }, // empty domain - { kind: "gap:routing:tiebreak:extra" },// 4 segments - { kind: "gap:Routing" }, // uppercase in family (not kebab) - { kind: "gap:routing_cost" }, // underscore (not kebab) - { kind: "gap:-routing" }, // leading hyphen - { kind: "gap:routing-" }, // trailing hyphen - { kind: "9gap:routing" }, // domain starts with digit - { kind: "weird-domain:family" }, // unknown domain + { kind: "" }, // empty + { kind: "Unknown" }, // not in domain allow-list + { kind: "gap:" }, // empty family + { kind: ":family" }, // empty domain + { kind: "gap:routing:tiebreak:extra" }, // 4 segments + { kind: "gap:Routing" }, // uppercase in family (not kebab) + { kind: "gap:routing_cost" }, // underscore (not kebab) + { kind: "gap:-routing" }, // leading hyphen + { kind: "gap:routing-" }, // trailing hyphen + { kind: "9gap:routing" }, // domain starts with digit + { kind: "weird-domain:family" }, // unknown domain ]; for (const { kind } of cases) { const result = recordSelfFeedback( { kind, severity: "low", summary: `should reject ${kind}` }, project, ); - assert.equal(result, null, `expected ${JSON.stringify(kind)} to be rejected`); + assert.equal( + result, + null, + `expected ${JSON.stringify(kind)} to be rejected`, + ); } }); @@ -581,7 +716,11 @@ test("recordSelfFeedback_kind_validation_rejects_non_string", () => { { kind, severity: "low", summary: "non-string kind" }, project, ); - assert.equal(result, null, `expected non-string ${JSON.stringify(kind)} to be rejected`); + assert.equal( + result, + null, + `expected non-string ${JSON.stringify(kind)} to be rejected`, + ); } }); @@ -615,7 +754,10 @@ test("markResolved_appends_resolution_event_to_jsonl_audit_log", () => { ); assert.equal(ok, true); - const lines = readFileSync(join(project, ".sf", "self-feedback.jsonl"), "utf-8") + const lines = readFileSync( + join(project, ".sf", "self-feedback.jsonl"), + "utf-8", + ) .split("\n") .filter((l) => l.trim()); assert.equal(lines.length, 2);