feat(triage): wire --apply CLI + canonical resolve_issue evidence kinds

Three coupled changes that together complete the operator-facing
--apply surface for sf headless triage:

1. headless.ts: parse --apply from commandArgs and forward to
   handleTriage. The triage option flow now distinguishes inspect
   (--list, --json), one-shot (--run), and orchestrated apply
   (--apply) cleanly.

2. help-text.ts: triage subcommand line + examples block now document
   the --apply mode (triage-decider → rubber-duck pipeline).

3. bootstrap/db-tools.js: resolve_issue tool now accepts the full
   canonical evidence-kind set instead of hardcoding "agent-fix":
   - agent-fix (default; commit-based fix evidence)
   - human-clear (stale, superseded, false positive, intentional close)
   - promoted-to-requirement (with required requirement_id)
   The tool surfaces a clear error when promoted-to-requirement is
   used without requirement_id. The promptGuidelines updated to walk
   callers through choosing the right kind.

   self-feedback-db.test.mjs extended with coverage for all three
   evidence kinds + the missing-requirement_id rejection path.

Together these make sf headless triage --apply genuinely useful: the
agent can produce a plan with any outcome, rubber-duck reviews it,
and the runner applies via resolve_issue with the right evidence
kind per decision.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-14 17:23:10 +02:00
parent 1881918ab8
commit 12f5eb2279
4 changed files with 226 additions and 41 deletions

View file

@ -818,8 +818,7 @@ async function runHeadlessOnce(
// Bypasses the RPC path for instant, TTY-independent gate health output. // Bypasses the RPC path for instant, TTY-independent gate health output.
if (options.command === "status" && options.commandArgs[0] === "uok") { if (options.command === "status" && options.commandArgs[0] === "uok") {
const { handleUokStatus } = await import("./headless-uok-status.js"); const { handleUokStatus } = await import("./headless-uok-status.js");
const wantsJson = const wantsJson = options.json || options.commandArgs.includes("--json");
options.json || options.commandArgs.includes("--json");
const result = await handleUokStatus(process.cwd(), { json: wantsJson }); const result = await handleUokStatus(process.cwd(), { json: wantsJson });
return { exitCode: result.exitCode, interrupted: false, timedOut: false }; return { exitCode: result.exitCode, interrupted: false, timedOut: false };
} }
@ -838,6 +837,7 @@ async function runHeadlessOnce(
const wantsJson = options.json || options.commandArgs.includes("--json"); const wantsJson = options.json || options.commandArgs.includes("--json");
const wantsList = options.commandArgs.includes("--list"); const wantsList = options.commandArgs.includes("--list");
const wantsRun = options.commandArgs.includes("--run"); const wantsRun = options.commandArgs.includes("--run");
const wantsApply = options.commandArgs.includes("--apply");
const maxIdx = options.commandArgs.indexOf("--max"); const maxIdx = options.commandArgs.indexOf("--max");
let max: number | undefined; let max: number | undefined;
if (maxIdx >= 0 && maxIdx + 1 < options.commandArgs.length) { if (maxIdx >= 0 && maxIdx + 1 < options.commandArgs.length) {
@ -855,6 +855,7 @@ async function runHeadlessOnce(
list: wantsList, list: wantsList,
max, max,
run: wantsRun, run: wantsRun,
apply: wantsApply,
model, model,
}); });
return { exitCode: result.exitCode, interrupted: false, timedOut: false }; return { exitCode: result.exitCode, interrupted: false, timedOut: false };
@ -920,9 +921,7 @@ async function runHeadlessOnce(
// import-backlog: deterministic text→DB transform, no LLM or RPC child needed. // import-backlog: deterministic text→DB transform, no LLM or RPC child needed.
if (options.command === "import-backlog") { if (options.command === "import-backlog") {
const { runImportBacklog } = await import( const { runImportBacklog } = await import("./headless-import-backlog.js");
"./headless-import-backlog.js"
);
const filePath = options.commandArgs[0] ?? options.context; const filePath = options.commandArgs[0] ?? options.context;
if (!filePath) { if (!filePath) {
process.stderr.write( process.stderr.write(
@ -931,11 +930,9 @@ async function runHeadlessOnce(
); );
process.exit(1); process.exit(1);
} }
const exitCode = await runImportBacklog( const exitCode = await runImportBacklog(resolve(filePath), process.cwd(), {
resolve(filePath), json: options.json,
process.cwd(), });
{ json: options.json },
);
process.exit(exitCode); process.exit(exitCode);
} }

View file

@ -225,7 +225,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
" query Machine snapshot: JSON state + next dispatch + costs (no LLM)", " query Machine snapshot: JSON state + next dispatch + costs (no LLM)",
" usage Live LLM-provider usage snapshot (today: gemini-cli tier + per-model quota)", " usage Live LLM-provider usage snapshot (today: gemini-cli tier + per-model quota)",
" reflect Assemble reflection corpus + render prompt for cross-corpus pattern analysis (--json for raw, --run to dispatch to gemini-cli, --model <id> to override)", " reflect Assemble reflection corpus + render prompt for cross-corpus pattern analysis (--json for raw, --run to dispatch to gemini-cli, --model <id> to override)",
" triage Render canonical self-feedback triage prompt for piping into a model (--list for digest, --json for structured, --max N to cap, --run to dispatch + write decisions to .sf/triage/decisions/, --model <id> to override)", " triage Render canonical self-feedback triage prompt (--list/--json inspect, --run writes decisions, --apply runs triage-decider -> rubber-duck)",
"", "",
"new-milestone flags:", "new-milestone flags:",
" --context <path> Path to spec/PRD file (use '-' for stdin)", " --context <path> Path to spec/PRD file (use '-' for stdin)",
@ -259,6 +259,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
" sf headless triage --list Self-feedback queue digest (impact↓ effort↑ ts↑)", " sf headless triage --list Self-feedback queue digest (impact↓ effort↑ ts↑)",
" sf headless triage | sf-some-model Pipe triage prompt to any model", " sf headless triage | sf-some-model Pipe triage prompt to any model",
" sf headless triage --run Dispatch triage to default model + write decisions", " sf headless triage --run Dispatch triage to default model + write decisions",
" sf headless triage --apply Apply via triage-decider, then gate with rubber-duck",
" sf headless reflect Render reflection prompt for piping", " sf headless reflect Render reflection prompt for piping",
" sf headless reflect --run Dispatch reflection + write report", " sf headless reflect --run Dispatch reflection + write report",
"", "",

View file

@ -5,6 +5,7 @@ import {
AUTONOMOUS_SOLVER_OUTCOMES, AUTONOMOUS_SOLVER_OUTCOMES,
appendAutonomousSolverCheckpoint, appendAutonomousSolverCheckpoint,
} from "../autonomous-solver.js"; } from "../autonomous-solver.js";
import { getErrorMessage } from "../error-utils.js";
import { import {
claimReservedId, claimReservedId,
findMilestoneIds, findMilestoneIds,
@ -28,7 +29,6 @@ import {
} from "../tools/workflow-tool-executors.js"; } from "../tools/workflow-tool-executors.js";
import { logError } from "../workflow-logger.js"; import { logError } from "../workflow-logger.js";
import { ensureDbOpen } from "./dynamic-tools.js"; import { ensureDbOpen } from "./dynamic-tools.js";
import { getErrorMessage } from "../error-utils.js";
export function registerDbTools(pi) { export function registerDbTools(pi) {
// ─── save_decision ───────────────────────────────────────────────── // ─── save_decision ─────────────────────────────────────────────────
const decisionSaveExecute = async ( const decisionSaveExecute = async (
@ -858,15 +858,38 @@ export function registerDbTools(pi) {
_ctx, _ctx,
) => { ) => {
try { try {
const evidenceKind = params.evidence_kind ?? "agent-fix";
if (
evidenceKind === "promoted-to-requirement" &&
!(
typeof params.requirement_id === "string" &&
params.requirement_id.trim()
)
) {
return {
content: [
{
type: "text",
text: "Error in resolve_issue: requirement_id is required for promoted-to-requirement evidence",
},
],
details: {
operation: "self_feedback_resolve",
id: params.id,
error: "missing_requirement_id",
},
};
}
const ok = markResolved( const ok = markResolved(
params.id, params.id,
{ {
reason: params.reason, reason: params.reason,
evidence: { evidence: {
kind: "agent-fix", kind: evidenceKind,
commitSha: params.commit_sha, commitSha: params.commit_sha,
testPath: params.test_path, testPath: params.test_path,
summaryNarrative: params.summary_narrative, summaryNarrative: params.summary_narrative,
requirementId: params.requirement_id,
}, },
criteriaMet: params.criteria_met, criteriaMet: params.criteria_met,
}, },
@ -925,13 +948,15 @@ export function registerDbTools(pi) {
name: "resolve_issue", name: "resolve_issue",
label: "Resolve Self Feedback", label: "Resolve Self Feedback",
description: description:
"Mark a previously reported agent-tooling issue as resolved and record the fix evidence (commit SHA, test path, or narrative). " + "Mark a previously reported agent-tooling issue as resolved and record canonical evidence. " +
"Call this only after the fix is implemented and verified — not speculatively.", "Use agent-fix only after a fix is implemented and verified; use human-clear or promoted-to-requirement for triage closures.",
promptSnippet: promptSnippet:
"Resolve a filed agent-tooling issue with commit or test evidence", "Resolve a filed agent-tooling issue with canonical evidence",
promptGuidelines: [ promptGuidelines: [
"Call resolve_issue after implementing and verifying the fix, not before.", "Use evidence_kind=agent-fix after implementing and verifying the fix, not before.",
"Pass commit_sha when a commit exists; use summary_narrative and test_path when a commit is not the right artifact.", "Use evidence_kind=human-clear for stale, superseded, false-positive, or intentionally closed entries.",
"Use evidence_kind=promoted-to-requirement with requirement_id when an entry has been moved into requirements.",
"Pass commit_sha when an agent-fix commit exists; use summary_narrative and test_path when a commit/test path is not enough.",
"If the entry had acceptance criteria, pass criteria_met with the criteria you satisfied.", "If the entry had acceptance criteria, pass criteria_met with the criteria you satisfied.",
], ],
parameters: Type.Object({ parameters: Type.Object({
@ -941,9 +966,29 @@ export function registerDbTools(pi) {
reason: Type.String({ reason: Type.String({
description: "Short explanation of why the entry is resolved", description: "Short explanation of why the entry is resolved",
}), }),
evidence_kind: Type.Optional(
Type.Union(
[
Type.Literal("agent-fix"),
Type.Literal("human-clear"),
Type.Literal("promoted-to-requirement"),
Type.Literal("agent-fix-unverified"),
],
{
description:
"Canonical resolution evidence kind. Defaults to agent-fix for backward compatibility.",
},
),
),
commit_sha: Type.Optional( commit_sha: Type.Optional(
Type.String({ description: "Commit SHA containing the fix" }), Type.String({ description: "Commit SHA containing the fix" }),
), ),
requirement_id: Type.Optional(
Type.String({
description:
"Requirement id required when evidence_kind is promoted-to-requirement",
}),
),
test_path: Type.Optional( test_path: Type.Optional(
Type.String({ description: "Focused test or verification path" }), Type.String({ description: "Focused test or verification path" }),
), ),

View file

@ -16,6 +16,7 @@ import {
import { tmpdir } from "node:os"; import { tmpdir } from "node:os";
import { join } from "node:path"; import { join } from "node:path";
import { afterEach, test } from "vitest"; import { afterEach, test } from "vitest";
import { registerDbTools } from "../bootstrap/db-tools.js";
import { import {
compactSelfFeedbackMarkdown, compactSelfFeedbackMarkdown,
markResolved, markResolved,
@ -50,6 +51,16 @@ function makeForgeProject() {
return dir; return dir;
} }
function getResolveIssueTool() {
const tools = new Map();
registerDbTools({
registerTool(tool) {
tools.set(tool.name, tool);
},
});
return tools.get("resolve_issue");
}
test("recordSelfFeedback_when_db_available_writes_sqlite_and_versioned_projection", () => { test("recordSelfFeedback_when_db_available_writes_sqlite_and_versioned_projection", () => {
const project = makeForgeProject(); const project = makeForgeProject();
@ -207,6 +218,125 @@ test("markResolved_accepts_each_canonical_evidence_kind", () => {
} }
}); });
test("resolve_issue_tool_defaults_to_agent_fix_for_existing_callers", async () => {
const project = makeForgeProject();
const filed = recordSelfFeedback(
{ kind: "gap:tool-default", severity: "medium", summary: "x" },
project,
);
const tool = getResolveIssueTool();
const originalCwd = process.cwd();
process.chdir(project);
try {
const result = await tool.execute(
"tool-call",
{
id: filed.entry.id,
reason: "fixed",
summary_narrative: "verified manually",
},
undefined,
undefined,
undefined,
);
assert.equal(result.details.resolved, true);
} finally {
process.chdir(originalCwd);
}
const [entry] = readAllSelfFeedback(project);
assert.equal(entry.resolvedEvidence.kind, "agent-fix");
assert.equal(entry.resolvedEvidence.summaryNarrative, "verified manually");
});
test("resolve_issue_tool_records_human_clear_and_promoted_requirement_evidence", async () => {
const project = makeForgeProject();
const human = recordSelfFeedback(
{ kind: "gap:tool-human", severity: "low", summary: "x" },
project,
);
const promoted = recordSelfFeedback(
{ kind: "gap:tool-promote", severity: "low", summary: "x" },
project,
);
const tool = getResolveIssueTool();
const originalCwd = process.cwd();
process.chdir(project);
try {
const humanResult = await tool.execute(
"tool-call",
{
id: human.entry.id,
reason: "stale",
evidence_kind: "human-clear",
},
undefined,
undefined,
undefined,
);
assert.equal(humanResult.details.resolved, true);
const promotedResult = await tool.execute(
"tool-call",
{
id: promoted.entry.id,
reason: "tracked as requirement",
evidence_kind: "promoted-to-requirement",
requirement_id: "R042",
},
undefined,
undefined,
undefined,
);
assert.equal(promotedResult.details.resolved, true);
} finally {
process.chdir(originalCwd);
}
const entries = readAllSelfFeedback(project);
assert.equal(
entries.find((entry) => entry.id === human.entry.id).resolvedEvidence.kind,
"human-clear",
);
assert.equal(
entries.find((entry) => entry.id === promoted.entry.id).resolvedEvidence
.kind,
"promoted-to-requirement",
);
assert.equal(
entries.find((entry) => entry.id === promoted.entry.id).resolvedEvidence
.requirementId,
"R042",
);
});
test("resolve_issue_tool_rejects_promoted_requirement_without_requirement_id", async () => {
const project = makeForgeProject();
const filed = recordSelfFeedback(
{ kind: "gap:tool-promote-invalid", severity: "low", summary: "x" },
project,
);
const tool = getResolveIssueTool();
const originalCwd = process.cwd();
process.chdir(project);
let result;
try {
result = await tool.execute(
"tool-call",
{
id: filed.entry.id,
reason: "tracked as requirement",
evidence_kind: "promoted-to-requirement",
},
undefined,
undefined,
undefined,
);
} finally {
process.chdir(originalCwd);
}
assert.equal(result.details.error, "missing_requirement_id");
const [entry] = readAllSelfFeedback(project);
assert.equal(entry.resolvedAt, undefined);
});
test("markResolved_rejects_agent_fix_with_nonexistent_commit_sha", async () => { test("markResolved_rejects_agent_fix_with_nonexistent_commit_sha", async () => {
const project = makeForgeProject(); const project = makeForgeProject();
// Initialize a real git repo so the verifier can distinguish "commit // Initialize a real git repo so the verifier can distinguish "commit
@ -267,9 +397,7 @@ test("markResolved_rejects_agent_fix_when_commit_does_not_touch_AC_files", async
execFileSync("git", ["config", "user.name", "test"], { cwd: project }); execFileSync("git", ["config", "user.name", "test"], { cwd: project });
// Two commits touching different files // Two commits touching different files
const { writeFileSync: writeFs, mkdirSync: mkFs } = await import( const { writeFileSync: writeFs, mkdirSync: mkFs } = await import("node:fs");
"node:fs"
);
mkFs(join(project, "src"), { recursive: true }); mkFs(join(project, "src"), { recursive: true });
writeFs(join(project, "src", "expected.js"), "// initial\n"); writeFs(join(project, "src", "expected.js"), "// initial\n");
writeFs(join(project, "src", "unrelated.js"), "// initial\n"); writeFs(join(project, "src", "unrelated.js"), "// initial\n");
@ -481,7 +609,10 @@ test("markResolved_mirrors_resolution_into_memory_store_with_tags", async () =>
const { getActiveMemories } = await import("../sf-db.js"); const { getActiveMemories } = await import("../sf-db.js");
const memories = getActiveMemories(); const memories = getActiveMemories();
const closureMem = memories.find((m) => m.sourceUnitId === filed.entry.id); const closureMem = memories.find((m) => m.sourceUnitId === filed.entry.id);
assert.ok(closureMem, "expected a memory entry sourced to the resolved entry"); assert.ok(
closureMem,
"expected a memory entry sourced to the resolved entry",
);
assert.equal(closureMem.category, "self-feedback-resolution"); assert.equal(closureMem.category, "self-feedback-resolution");
assert.match(closureMem.content, /\[gap:visible-in-memory\]/); assert.match(closureMem.content, /\[gap:visible-in-memory\]/);
assert.match(closureMem.content, /agent-fix: fixed by deadbeef/); assert.match(closureMem.content, /agent-fix: fixed by deadbeef/);
@ -532,12 +663,12 @@ test("markResolved_memory_mirror_handles_human_clear_without_commit_tags", async
test("recordSelfFeedback_kind_validation_accepts_canonical_shapes", () => { test("recordSelfFeedback_kind_validation_accepts_canonical_shapes", () => {
const project = makeForgeProject(); const project = makeForgeProject();
const cases = [ const cases = [
"gap", // 1-segment legacy (allowed-domain) "gap", // 1-segment legacy (allowed-domain)
"gap:routing", // 2-segment domain:family "gap:routing", // 2-segment domain:family
"gap:routing:tiebreak-cost-only", // 3-segment domain:family:specific "gap:routing:tiebreak-cost-only", // 3-segment domain:family:specific
"architecture-defect:foo", // hyphenated domain works "architecture-defect:foo", // hyphenated domain works
"architecture-defect:solver:executor-conflation", "architecture-defect:solver:executor-conflation",
"runaway-loop", // 1-segment legacy "runaway-loop", // 1-segment legacy
"upstream-rollup:gap-audit-orphan-prompt", "upstream-rollup:gap-audit-orphan-prompt",
]; ];
for (const kind of cases) { for (const kind of cases) {
@ -553,24 +684,28 @@ test("recordSelfFeedback_kind_validation_accepts_canonical_shapes", () => {
test("recordSelfFeedback_kind_validation_rejects_malformed", () => { test("recordSelfFeedback_kind_validation_rejects_malformed", () => {
const project = makeForgeProject(); const project = makeForgeProject();
const cases = [ const cases = [
{ kind: "" }, // empty { kind: "" }, // empty
{ kind: "Unknown" }, // not in domain allow-list { kind: "Unknown" }, // not in domain allow-list
{ kind: "gap:" }, // empty family { kind: "gap:" }, // empty family
{ kind: ":family" }, // empty domain { kind: ":family" }, // empty domain
{ kind: "gap:routing:tiebreak:extra" },// 4 segments { kind: "gap:routing:tiebreak:extra" }, // 4 segments
{ kind: "gap:Routing" }, // uppercase in family (not kebab) { kind: "gap:Routing" }, // uppercase in family (not kebab)
{ kind: "gap:routing_cost" }, // underscore (not kebab) { kind: "gap:routing_cost" }, // underscore (not kebab)
{ kind: "gap:-routing" }, // leading hyphen { kind: "gap:-routing" }, // leading hyphen
{ kind: "gap:routing-" }, // trailing hyphen { kind: "gap:routing-" }, // trailing hyphen
{ kind: "9gap:routing" }, // domain starts with digit { kind: "9gap:routing" }, // domain starts with digit
{ kind: "weird-domain:family" }, // unknown domain { kind: "weird-domain:family" }, // unknown domain
]; ];
for (const { kind } of cases) { for (const { kind } of cases) {
const result = recordSelfFeedback( const result = recordSelfFeedback(
{ kind, severity: "low", summary: `should reject ${kind}` }, { kind, severity: "low", summary: `should reject ${kind}` },
project, project,
); );
assert.equal(result, null, `expected ${JSON.stringify(kind)} to be rejected`); assert.equal(
result,
null,
`expected ${JSON.stringify(kind)} to be rejected`,
);
} }
}); });
@ -581,7 +716,11 @@ test("recordSelfFeedback_kind_validation_rejects_non_string", () => {
{ kind, severity: "low", summary: "non-string kind" }, { kind, severity: "low", summary: "non-string kind" },
project, project,
); );
assert.equal(result, null, `expected non-string ${JSON.stringify(kind)} to be rejected`); assert.equal(
result,
null,
`expected non-string ${JSON.stringify(kind)} to be rejected`,
);
} }
}); });
@ -615,7 +754,10 @@ test("markResolved_appends_resolution_event_to_jsonl_audit_log", () => {
); );
assert.equal(ok, true); assert.equal(ok, true);
const lines = readFileSync(join(project, ".sf", "self-feedback.jsonl"), "utf-8") const lines = readFileSync(
join(project, ".sf", "self-feedback.jsonl"),
"utf-8",
)
.split("\n") .split("\n")
.filter((l) => l.trim()); .filter((l) => l.trim());
assert.equal(lines.length, 2); assert.equal(lines.length, 2);