feat(triage): wire --apply CLI + canonical resolve_issue evidence kinds
Three coupled changes that together complete the operator-facing --apply surface for sf headless triage: 1. headless.ts: parse --apply from commandArgs and forward to handleTriage. The triage option flow now distinguishes inspect (--list, --json), one-shot (--run), and orchestrated apply (--apply) cleanly. 2. help-text.ts: triage subcommand line + examples block now document the --apply mode (triage-decider → rubber-duck pipeline). 3. bootstrap/db-tools.js: resolve_issue tool now accepts the full canonical evidence-kind set instead of hardcoding "agent-fix": - agent-fix (default; commit-based fix evidence) - human-clear (stale, superseded, false positive, intentional close) - promoted-to-requirement (with required requirement_id) The tool surfaces a clear error when promoted-to-requirement is used without requirement_id. The promptGuidelines updated to walk callers through choosing the right kind. self-feedback-db.test.mjs extended with coverage for all three evidence kinds + the missing-requirement_id rejection path. Together these make sf headless triage --apply genuinely useful: the agent can produce a plan with any outcome, rubber-duck reviews it, and the runner applies via resolve_issue with the right evidence kind per decision. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
1881918ab8
commit
12f5eb2279
4 changed files with 226 additions and 41 deletions
|
|
@ -818,8 +818,7 @@ async function runHeadlessOnce(
|
|||
// Bypasses the RPC path for instant, TTY-independent gate health output.
|
||||
if (options.command === "status" && options.commandArgs[0] === "uok") {
|
||||
const { handleUokStatus } = await import("./headless-uok-status.js");
|
||||
const wantsJson =
|
||||
options.json || options.commandArgs.includes("--json");
|
||||
const wantsJson = options.json || options.commandArgs.includes("--json");
|
||||
const result = await handleUokStatus(process.cwd(), { json: wantsJson });
|
||||
return { exitCode: result.exitCode, interrupted: false, timedOut: false };
|
||||
}
|
||||
|
|
@ -838,6 +837,7 @@ async function runHeadlessOnce(
|
|||
const wantsJson = options.json || options.commandArgs.includes("--json");
|
||||
const wantsList = options.commandArgs.includes("--list");
|
||||
const wantsRun = options.commandArgs.includes("--run");
|
||||
const wantsApply = options.commandArgs.includes("--apply");
|
||||
const maxIdx = options.commandArgs.indexOf("--max");
|
||||
let max: number | undefined;
|
||||
if (maxIdx >= 0 && maxIdx + 1 < options.commandArgs.length) {
|
||||
|
|
@ -855,6 +855,7 @@ async function runHeadlessOnce(
|
|||
list: wantsList,
|
||||
max,
|
||||
run: wantsRun,
|
||||
apply: wantsApply,
|
||||
model,
|
||||
});
|
||||
return { exitCode: result.exitCode, interrupted: false, timedOut: false };
|
||||
|
|
@ -920,9 +921,7 @@ async function runHeadlessOnce(
|
|||
|
||||
// import-backlog: deterministic text→DB transform, no LLM or RPC child needed.
|
||||
if (options.command === "import-backlog") {
|
||||
const { runImportBacklog } = await import(
|
||||
"./headless-import-backlog.js"
|
||||
);
|
||||
const { runImportBacklog } = await import("./headless-import-backlog.js");
|
||||
const filePath = options.commandArgs[0] ?? options.context;
|
||||
if (!filePath) {
|
||||
process.stderr.write(
|
||||
|
|
@ -931,11 +930,9 @@ async function runHeadlessOnce(
|
|||
);
|
||||
process.exit(1);
|
||||
}
|
||||
const exitCode = await runImportBacklog(
|
||||
resolve(filePath),
|
||||
process.cwd(),
|
||||
{ json: options.json },
|
||||
);
|
||||
const exitCode = await runImportBacklog(resolve(filePath), process.cwd(), {
|
||||
json: options.json,
|
||||
});
|
||||
process.exit(exitCode);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -225,7 +225,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
|
|||
" query Machine snapshot: JSON state + next dispatch + costs (no LLM)",
|
||||
" usage Live LLM-provider usage snapshot (today: gemini-cli tier + per-model quota)",
|
||||
" reflect Assemble reflection corpus + render prompt for cross-corpus pattern analysis (--json for raw, --run to dispatch to gemini-cli, --model <id> to override)",
|
||||
" triage Render canonical self-feedback triage prompt for piping into a model (--list for digest, --json for structured, --max N to cap, --run to dispatch + write decisions to .sf/triage/decisions/, --model <id> to override)",
|
||||
" triage Render canonical self-feedback triage prompt (--list/--json inspect, --run writes decisions, --apply runs triage-decider -> rubber-duck)",
|
||||
"",
|
||||
"new-milestone flags:",
|
||||
" --context <path> Path to spec/PRD file (use '-' for stdin)",
|
||||
|
|
@ -259,6 +259,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
|
|||
" sf headless triage --list Self-feedback queue digest (impact↓ effort↑ ts↑)",
|
||||
" sf headless triage | sf-some-model Pipe triage prompt to any model",
|
||||
" sf headless triage --run Dispatch triage to default model + write decisions",
|
||||
" sf headless triage --apply Apply via triage-decider, then gate with rubber-duck",
|
||||
" sf headless reflect Render reflection prompt for piping",
|
||||
" sf headless reflect --run Dispatch reflection + write report",
|
||||
"",
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import {
|
|||
AUTONOMOUS_SOLVER_OUTCOMES,
|
||||
appendAutonomousSolverCheckpoint,
|
||||
} from "../autonomous-solver.js";
|
||||
import { getErrorMessage } from "../error-utils.js";
|
||||
import {
|
||||
claimReservedId,
|
||||
findMilestoneIds,
|
||||
|
|
@ -28,7 +29,6 @@ import {
|
|||
} from "../tools/workflow-tool-executors.js";
|
||||
import { logError } from "../workflow-logger.js";
|
||||
import { ensureDbOpen } from "./dynamic-tools.js";
|
||||
import { getErrorMessage } from "../error-utils.js";
|
||||
export function registerDbTools(pi) {
|
||||
// ─── save_decision ─────────────────────────────────────────────────
|
||||
const decisionSaveExecute = async (
|
||||
|
|
@ -858,15 +858,38 @@ export function registerDbTools(pi) {
|
|||
_ctx,
|
||||
) => {
|
||||
try {
|
||||
const evidenceKind = params.evidence_kind ?? "agent-fix";
|
||||
if (
|
||||
evidenceKind === "promoted-to-requirement" &&
|
||||
!(
|
||||
typeof params.requirement_id === "string" &&
|
||||
params.requirement_id.trim()
|
||||
)
|
||||
) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "Error in resolve_issue: requirement_id is required for promoted-to-requirement evidence",
|
||||
},
|
||||
],
|
||||
details: {
|
||||
operation: "self_feedback_resolve",
|
||||
id: params.id,
|
||||
error: "missing_requirement_id",
|
||||
},
|
||||
};
|
||||
}
|
||||
const ok = markResolved(
|
||||
params.id,
|
||||
{
|
||||
reason: params.reason,
|
||||
evidence: {
|
||||
kind: "agent-fix",
|
||||
kind: evidenceKind,
|
||||
commitSha: params.commit_sha,
|
||||
testPath: params.test_path,
|
||||
summaryNarrative: params.summary_narrative,
|
||||
requirementId: params.requirement_id,
|
||||
},
|
||||
criteriaMet: params.criteria_met,
|
||||
},
|
||||
|
|
@ -925,13 +948,15 @@ export function registerDbTools(pi) {
|
|||
name: "resolve_issue",
|
||||
label: "Resolve Self Feedback",
|
||||
description:
|
||||
"Mark a previously reported agent-tooling issue as resolved and record the fix evidence (commit SHA, test path, or narrative). " +
|
||||
"Call this only after the fix is implemented and verified — not speculatively.",
|
||||
"Mark a previously reported agent-tooling issue as resolved and record canonical evidence. " +
|
||||
"Use agent-fix only after a fix is implemented and verified; use human-clear or promoted-to-requirement for triage closures.",
|
||||
promptSnippet:
|
||||
"Resolve a filed agent-tooling issue with commit or test evidence",
|
||||
"Resolve a filed agent-tooling issue with canonical evidence",
|
||||
promptGuidelines: [
|
||||
"Call resolve_issue after implementing and verifying the fix, not before.",
|
||||
"Pass commit_sha when a commit exists; use summary_narrative and test_path when a commit is not the right artifact.",
|
||||
"Use evidence_kind=agent-fix after implementing and verifying the fix, not before.",
|
||||
"Use evidence_kind=human-clear for stale, superseded, false-positive, or intentionally closed entries.",
|
||||
"Use evidence_kind=promoted-to-requirement with requirement_id when an entry has been moved into requirements.",
|
||||
"Pass commit_sha when an agent-fix commit exists; use summary_narrative and test_path when a commit/test path is not enough.",
|
||||
"If the entry had acceptance criteria, pass criteria_met with the criteria you satisfied.",
|
||||
],
|
||||
parameters: Type.Object({
|
||||
|
|
@ -941,9 +966,29 @@ export function registerDbTools(pi) {
|
|||
reason: Type.String({
|
||||
description: "Short explanation of why the entry is resolved",
|
||||
}),
|
||||
evidence_kind: Type.Optional(
|
||||
Type.Union(
|
||||
[
|
||||
Type.Literal("agent-fix"),
|
||||
Type.Literal("human-clear"),
|
||||
Type.Literal("promoted-to-requirement"),
|
||||
Type.Literal("agent-fix-unverified"),
|
||||
],
|
||||
{
|
||||
description:
|
||||
"Canonical resolution evidence kind. Defaults to agent-fix for backward compatibility.",
|
||||
},
|
||||
),
|
||||
),
|
||||
commit_sha: Type.Optional(
|
||||
Type.String({ description: "Commit SHA containing the fix" }),
|
||||
),
|
||||
requirement_id: Type.Optional(
|
||||
Type.String({
|
||||
description:
|
||||
"Requirement id required when evidence_kind is promoted-to-requirement",
|
||||
}),
|
||||
),
|
||||
test_path: Type.Optional(
|
||||
Type.String({ description: "Focused test or verification path" }),
|
||||
),
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import {
|
|||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, test } from "vitest";
|
||||
import { registerDbTools } from "../bootstrap/db-tools.js";
|
||||
import {
|
||||
compactSelfFeedbackMarkdown,
|
||||
markResolved,
|
||||
|
|
@ -50,6 +51,16 @@ function makeForgeProject() {
|
|||
return dir;
|
||||
}
|
||||
|
||||
function getResolveIssueTool() {
|
||||
const tools = new Map();
|
||||
registerDbTools({
|
||||
registerTool(tool) {
|
||||
tools.set(tool.name, tool);
|
||||
},
|
||||
});
|
||||
return tools.get("resolve_issue");
|
||||
}
|
||||
|
||||
test("recordSelfFeedback_when_db_available_writes_sqlite_and_versioned_projection", () => {
|
||||
const project = makeForgeProject();
|
||||
|
||||
|
|
@ -207,6 +218,125 @@ test("markResolved_accepts_each_canonical_evidence_kind", () => {
|
|||
}
|
||||
});
|
||||
|
||||
test("resolve_issue_tool_defaults_to_agent_fix_for_existing_callers", async () => {
|
||||
const project = makeForgeProject();
|
||||
const filed = recordSelfFeedback(
|
||||
{ kind: "gap:tool-default", severity: "medium", summary: "x" },
|
||||
project,
|
||||
);
|
||||
const tool = getResolveIssueTool();
|
||||
const originalCwd = process.cwd();
|
||||
process.chdir(project);
|
||||
try {
|
||||
const result = await tool.execute(
|
||||
"tool-call",
|
||||
{
|
||||
id: filed.entry.id,
|
||||
reason: "fixed",
|
||||
summary_narrative: "verified manually",
|
||||
},
|
||||
undefined,
|
||||
undefined,
|
||||
undefined,
|
||||
);
|
||||
assert.equal(result.details.resolved, true);
|
||||
} finally {
|
||||
process.chdir(originalCwd);
|
||||
}
|
||||
const [entry] = readAllSelfFeedback(project);
|
||||
assert.equal(entry.resolvedEvidence.kind, "agent-fix");
|
||||
assert.equal(entry.resolvedEvidence.summaryNarrative, "verified manually");
|
||||
});
|
||||
|
||||
test("resolve_issue_tool_records_human_clear_and_promoted_requirement_evidence", async () => {
|
||||
const project = makeForgeProject();
|
||||
const human = recordSelfFeedback(
|
||||
{ kind: "gap:tool-human", severity: "low", summary: "x" },
|
||||
project,
|
||||
);
|
||||
const promoted = recordSelfFeedback(
|
||||
{ kind: "gap:tool-promote", severity: "low", summary: "x" },
|
||||
project,
|
||||
);
|
||||
const tool = getResolveIssueTool();
|
||||
const originalCwd = process.cwd();
|
||||
process.chdir(project);
|
||||
try {
|
||||
const humanResult = await tool.execute(
|
||||
"tool-call",
|
||||
{
|
||||
id: human.entry.id,
|
||||
reason: "stale",
|
||||
evidence_kind: "human-clear",
|
||||
},
|
||||
undefined,
|
||||
undefined,
|
||||
undefined,
|
||||
);
|
||||
assert.equal(humanResult.details.resolved, true);
|
||||
const promotedResult = await tool.execute(
|
||||
"tool-call",
|
||||
{
|
||||
id: promoted.entry.id,
|
||||
reason: "tracked as requirement",
|
||||
evidence_kind: "promoted-to-requirement",
|
||||
requirement_id: "R042",
|
||||
},
|
||||
undefined,
|
||||
undefined,
|
||||
undefined,
|
||||
);
|
||||
assert.equal(promotedResult.details.resolved, true);
|
||||
} finally {
|
||||
process.chdir(originalCwd);
|
||||
}
|
||||
const entries = readAllSelfFeedback(project);
|
||||
assert.equal(
|
||||
entries.find((entry) => entry.id === human.entry.id).resolvedEvidence.kind,
|
||||
"human-clear",
|
||||
);
|
||||
assert.equal(
|
||||
entries.find((entry) => entry.id === promoted.entry.id).resolvedEvidence
|
||||
.kind,
|
||||
"promoted-to-requirement",
|
||||
);
|
||||
assert.equal(
|
||||
entries.find((entry) => entry.id === promoted.entry.id).resolvedEvidence
|
||||
.requirementId,
|
||||
"R042",
|
||||
);
|
||||
});
|
||||
|
||||
test("resolve_issue_tool_rejects_promoted_requirement_without_requirement_id", async () => {
|
||||
const project = makeForgeProject();
|
||||
const filed = recordSelfFeedback(
|
||||
{ kind: "gap:tool-promote-invalid", severity: "low", summary: "x" },
|
||||
project,
|
||||
);
|
||||
const tool = getResolveIssueTool();
|
||||
const originalCwd = process.cwd();
|
||||
process.chdir(project);
|
||||
let result;
|
||||
try {
|
||||
result = await tool.execute(
|
||||
"tool-call",
|
||||
{
|
||||
id: filed.entry.id,
|
||||
reason: "tracked as requirement",
|
||||
evidence_kind: "promoted-to-requirement",
|
||||
},
|
||||
undefined,
|
||||
undefined,
|
||||
undefined,
|
||||
);
|
||||
} finally {
|
||||
process.chdir(originalCwd);
|
||||
}
|
||||
assert.equal(result.details.error, "missing_requirement_id");
|
||||
const [entry] = readAllSelfFeedback(project);
|
||||
assert.equal(entry.resolvedAt, undefined);
|
||||
});
|
||||
|
||||
test("markResolved_rejects_agent_fix_with_nonexistent_commit_sha", async () => {
|
||||
const project = makeForgeProject();
|
||||
// Initialize a real git repo so the verifier can distinguish "commit
|
||||
|
|
@ -267,9 +397,7 @@ test("markResolved_rejects_agent_fix_when_commit_does_not_touch_AC_files", async
|
|||
execFileSync("git", ["config", "user.name", "test"], { cwd: project });
|
||||
|
||||
// Two commits touching different files
|
||||
const { writeFileSync: writeFs, mkdirSync: mkFs } = await import(
|
||||
"node:fs"
|
||||
);
|
||||
const { writeFileSync: writeFs, mkdirSync: mkFs } = await import("node:fs");
|
||||
mkFs(join(project, "src"), { recursive: true });
|
||||
writeFs(join(project, "src", "expected.js"), "// initial\n");
|
||||
writeFs(join(project, "src", "unrelated.js"), "// initial\n");
|
||||
|
|
@ -481,7 +609,10 @@ test("markResolved_mirrors_resolution_into_memory_store_with_tags", async () =>
|
|||
const { getActiveMemories } = await import("../sf-db.js");
|
||||
const memories = getActiveMemories();
|
||||
const closureMem = memories.find((m) => m.sourceUnitId === filed.entry.id);
|
||||
assert.ok(closureMem, "expected a memory entry sourced to the resolved entry");
|
||||
assert.ok(
|
||||
closureMem,
|
||||
"expected a memory entry sourced to the resolved entry",
|
||||
);
|
||||
assert.equal(closureMem.category, "self-feedback-resolution");
|
||||
assert.match(closureMem.content, /\[gap:visible-in-memory\]/);
|
||||
assert.match(closureMem.content, /agent-fix: fixed by deadbeef/);
|
||||
|
|
@ -532,12 +663,12 @@ test("markResolved_memory_mirror_handles_human_clear_without_commit_tags", async
|
|||
test("recordSelfFeedback_kind_validation_accepts_canonical_shapes", () => {
|
||||
const project = makeForgeProject();
|
||||
const cases = [
|
||||
"gap", // 1-segment legacy (allowed-domain)
|
||||
"gap:routing", // 2-segment domain:family
|
||||
"gap:routing:tiebreak-cost-only", // 3-segment domain:family:specific
|
||||
"architecture-defect:foo", // hyphenated domain works
|
||||
"gap", // 1-segment legacy (allowed-domain)
|
||||
"gap:routing", // 2-segment domain:family
|
||||
"gap:routing:tiebreak-cost-only", // 3-segment domain:family:specific
|
||||
"architecture-defect:foo", // hyphenated domain works
|
||||
"architecture-defect:solver:executor-conflation",
|
||||
"runaway-loop", // 1-segment legacy
|
||||
"runaway-loop", // 1-segment legacy
|
||||
"upstream-rollup:gap-audit-orphan-prompt",
|
||||
];
|
||||
for (const kind of cases) {
|
||||
|
|
@ -553,24 +684,28 @@ test("recordSelfFeedback_kind_validation_accepts_canonical_shapes", () => {
|
|||
test("recordSelfFeedback_kind_validation_rejects_malformed", () => {
|
||||
const project = makeForgeProject();
|
||||
const cases = [
|
||||
{ kind: "" }, // empty
|
||||
{ kind: "Unknown" }, // not in domain allow-list
|
||||
{ kind: "gap:" }, // empty family
|
||||
{ kind: ":family" }, // empty domain
|
||||
{ kind: "gap:routing:tiebreak:extra" },// 4 segments
|
||||
{ kind: "gap:Routing" }, // uppercase in family (not kebab)
|
||||
{ kind: "gap:routing_cost" }, // underscore (not kebab)
|
||||
{ kind: "gap:-routing" }, // leading hyphen
|
||||
{ kind: "gap:routing-" }, // trailing hyphen
|
||||
{ kind: "9gap:routing" }, // domain starts with digit
|
||||
{ kind: "weird-domain:family" }, // unknown domain
|
||||
{ kind: "" }, // empty
|
||||
{ kind: "Unknown" }, // not in domain allow-list
|
||||
{ kind: "gap:" }, // empty family
|
||||
{ kind: ":family" }, // empty domain
|
||||
{ kind: "gap:routing:tiebreak:extra" }, // 4 segments
|
||||
{ kind: "gap:Routing" }, // uppercase in family (not kebab)
|
||||
{ kind: "gap:routing_cost" }, // underscore (not kebab)
|
||||
{ kind: "gap:-routing" }, // leading hyphen
|
||||
{ kind: "gap:routing-" }, // trailing hyphen
|
||||
{ kind: "9gap:routing" }, // domain starts with digit
|
||||
{ kind: "weird-domain:family" }, // unknown domain
|
||||
];
|
||||
for (const { kind } of cases) {
|
||||
const result = recordSelfFeedback(
|
||||
{ kind, severity: "low", summary: `should reject ${kind}` },
|
||||
project,
|
||||
);
|
||||
assert.equal(result, null, `expected ${JSON.stringify(kind)} to be rejected`);
|
||||
assert.equal(
|
||||
result,
|
||||
null,
|
||||
`expected ${JSON.stringify(kind)} to be rejected`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -581,7 +716,11 @@ test("recordSelfFeedback_kind_validation_rejects_non_string", () => {
|
|||
{ kind, severity: "low", summary: "non-string kind" },
|
||||
project,
|
||||
);
|
||||
assert.equal(result, null, `expected non-string ${JSON.stringify(kind)} to be rejected`);
|
||||
assert.equal(
|
||||
result,
|
||||
null,
|
||||
`expected non-string ${JSON.stringify(kind)} to be rejected`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -615,7 +754,10 @@ test("markResolved_appends_resolution_event_to_jsonl_audit_log", () => {
|
|||
);
|
||||
assert.equal(ok, true);
|
||||
|
||||
const lines = readFileSync(join(project, ".sf", "self-feedback.jsonl"), "utf-8")
|
||||
const lines = readFileSync(
|
||||
join(project, ".sf", "self-feedback.jsonl"),
|
||||
"utf-8",
|
||||
)
|
||||
.split("\n")
|
||||
.filter((l) => l.trim());
|
||||
assert.equal(lines.length, 2);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue