From 5f245b721dafbeb9dd57cd81fab9e073061a0bc2 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Wed, 13 May 2026 22:46:24 +0200 Subject: [PATCH] fix(self-feedback): rewire inline-fix worker as triage queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The inline-fix worker was a partial repair queue — it picked only high/critical+blocking entries plus my recent gap/architecture-defect override and left everything else (medium inconsistencies, janitor gaps, architectural-risks, low-severity gaps) sitting open forever. The requirement-promoter clusters by exact `kind` string and never fires on diverse forge-local entries (every open entry currently has a unique kind), so there is no other sweep that ever touches these. They just accumulate. The point of the worker is triage, not just repair: every open entry should get an eyes-on per session and reach one of three outcomes — fix, promote to requirement, or close as not-of-value with reason. Closing deliberately is a valid, expected outcome. Changes: - `selectInlineFixCandidates` now returns every open forge-local entry, modulo the existing credibility check that re-includes suspect resolutions. Severity and blocking filters are gone; the kind-based override is no longer needed because everything qualifies. - The dispatch prompt is rewritten as a three-way triage protocol (Fix / Promote / Close) with explicit guidance per outcome and explicit prohibition on the `auto-version-bump` evidence kind (which would re-open under the credibility check). - Tests collapse the three filter-coverage tests into a single "selects every open forge-local entry" assertion that exercises the full severity × blocking × kind matrix. Upstream feedback is still excluded — those entries describe behavior in other repos that the inline-fix unit cannot directly repair. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../extensions/sf/self-feedback-drain.js | 86 ++++++++----------- .../sf/tests/self-feedback-drain.test.mjs | 40 +++------ 2 files changed, 48 insertions(+), 78 deletions(-) diff --git a/src/resources/extensions/sf/self-feedback-drain.js b/src/resources/extensions/sf/self-feedback-drain.js index d0a5d4c56..8ced3891a 100644 --- a/src/resources/extensions/sf/self-feedback-drain.js +++ b/src/resources/extensions/sf/self-feedback-drain.js @@ -37,20 +37,6 @@ import { readAllSelfFeedback } from "./self-feedback.js"; */ const SUSPECT_RESOLUTION_KINDS = new Set(["auto-version-bump"]); -/** - * Entry `kind` prefixes that describe actionable architectural debt. Entries - * matching one of these prefixes qualify for inline-fix dispatch regardless - * of severity or the blocking flag — they exist precisely because the fix is - * concrete code work, even when business severity is medium and the loop - * doesn't pause. - */ -const ARCHITECTURAL_KIND_PREFIXES = ["gap:", "architecture-defect:"]; - -function isArchitecturalKind(kind) { - if (typeof kind !== "string") return false; - return ARCHITECTURAL_KIND_PREFIXES.some((prefix) => kind.startsWith(prefix)); -} - /** * Decide whether an entry's recorded resolution is suspect — i.e. the entry * is marked resolved but the recorded resolution lacks a meaningful signal @@ -157,31 +143,27 @@ function hasUncommittedChanges(basePath) { } } /** - * Return forge-local self-feedback entries that should be repaired inline. + * Return forge-local self-feedback entries that need triage decisions. * - * Selection rules (an entry qualifies if either applies): + * The inline-fix unit is a triage worker, not just a repair worker: for each + * candidate it decides — fix, promote-to-requirement, or close-as-not-of-value + * with reason. Sitting open forever is the failure mode we are designing + * against, so the selector returns every open forge-local entry rather than + * narrowing by severity or kind. * - * 1. High/critical AND blocking — the loop-pause path. These are entries the - * runtime already treats as breaking; the inline-fix unit should mop them - * up before the next dispatch. + * Filters: * - * 2. `kind` is architectural (`gap:*` or `architecture-defect:*`) — entries - * describing known code debt the inline-fix unit was built to address. - * Severity reflects business impact; actionability is implicit in the - * kind, so medium/non-blocking entries here still qualify. Otherwise - * architectural gaps rot in the ledger indefinitely. + * 1. Forge-local only. Upstream entries are cross-project flow observations + * gathered while sf runs in other repos; they're operator-visibility + * signal for forge maintainers but the inline-fix unit edits forge + * source and cannot repair issues in those other repos. * - * An entry's recorded resolution is honored only when the resolution evidence - * is credible (`agent-fix`, `human-clear`, or `promoted-to-requirement`). - * Resolutions of other kinds (notably `auto-version-bump`, which fires on - * any sf-version bump without verifying the bump actually contained a fix) - * are treated as suspect and the entry is re-included as a candidate. - * - * Upstream feedback is intentionally NOT included here. Upstream entries are - * cross-project flow observations gathered while sf runs in other repos; - * they're operator-visibility signal for forge maintainers but the - * inline-fix unit edits forge source and cannot repair issues in those other - * repos. Including them would dispatch repair work the unit cannot perform. + * 2. Unresolved, OR resolved with suspect evidence. Resolutions are honored + * when the recorded evidence is credible (either a structured kind in + * CREDIBLE_RESOLUTION_KINDS or a legacy `resolvedReason` narrative). + * Resolutions of suspect kinds (notably `auto-version-bump`) or with + * neither structured evidence nor a narrative are re-included so the + * unit gets a chance to actually close them with a real decision. * * Consumer: dispatchSelfFeedbackInlineFixIfNeeded during session_start. */ @@ -190,12 +172,7 @@ export function selectInlineFixCandidates(basePath) { return readAllSelfFeedback(basePath) .filter((entry) => { if (isSuspectlyResolved(entry)) return true; - if (entry.resolvedAt) return false; - const isHighOrCritical = - entry.severity === "high" || entry.severity === "critical"; - if (entry.blocking && isHighOrCritical) return true; - if (isArchitecturalKind(entry.kind)) return true; - return false; + return !entry.resolvedAt; }) .sort((a, b) => a.ts.localeCompare(b.ts)); } @@ -219,22 +196,27 @@ function buildInlineFixPrompt(entries) { ) .join("\n\n"); return [ - "You are executing SF self-feedback inline-fix mode.", + "You are executing SF self-feedback triage mode.", "", - "These high/critical self-feedback entries are unresolved sf defects. Do not only triage them; repair the current codebase directly.", + "Every open forge-local self-feedback entry is listed below. Your job is to give each one a decision — sitting open forever is the failure mode. For each entry, choose exactly one of three outcomes:", + "", + " A. Fix it. The defect is real, in scope, and worth fixing now.", + " B. Promote it. The defect is real but the right place to track it is a requirement, not a self-feedback entry.", + " C. Close it. The entry is no longer of value: stale, superseded, false positive, or not worth a fix at SF's current priorities.", + "", + "Pick the smallest coherent decision per entry. You do not have to implement every entry — but you DO have to decide every entry. Closing entries deliberately is a valid and expected outcome.", "", rendered, "", - "Instructions:", - "1. Verify each entry still applies before editing.", - "2. Fix the smallest coherent set of code/docs/tests needed to satisfy the acceptance criteria.", - "3. Run focused verification and typecheck for touched areas.", - "4. Commit the fix with a conventional commit message.", - "5. Call `resolve_issue` for each repaired entry with agent-fix evidence and the commit SHA.", - "6. If an entry is already fixed, verify it and call `resolve_issue` with the verification evidence.", - "7. Do not hand-edit `.sf/self-feedback.jsonl` or `.sf/SELF-FEEDBACK.md`; use the resolver tool so the durable self-feedback store, markdown projection, and reload detection stay consistent.", + "Decision procedure:", + "1. For each entry: verify the claim still applies against the current code. If it doesn't, that's outcome C.", + "2. If outcome A (fix): make the smallest coherent code/docs/tests change. Run focused verification and typecheck for touched areas. Commit the fix with a conventional message. Then call `resolve_issue` with evidence `{kind: \"agent-fix\", commitSha: }` and a `reason` naming what landed.", + "3. If outcome B (promote): call `resolve_issue` with evidence `{kind: \"promoted-to-requirement\", requirementId: }` after ensuring the requirement row exists. Use this when the entry describes long-running policy work, not a code fix.", + "4. If outcome C (close): call `resolve_issue` with evidence `{kind: \"human-clear\"}` and a `reason` that names WHY the entry is no longer of value (stale, superseded by , false positive, out-of-scope, etc.). Be specific — a future reader should be able to tell whether re-opening makes sense.", + "5. Never use evidence `{kind: \"auto-version-bump\"}` for these decisions — that kind is reserved for the automatic version-bump resolver and would re-open under the credibility check.", + "6. Do not hand-edit `.sf/self-feedback.jsonl` or `.sf/SELF-FEEDBACK.md`. Always go through `resolve_issue` so the DB, markdown projection, and reload detection stay consistent.", "", - "When done, say: Self-feedback inline fix complete.", + "When every entry has a decision, say: Self-feedback triage complete.", ].join("\n"); } /** diff --git a/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs b/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs index 4d1a78eaf..b6c044125 100644 --- a/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs +++ b/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs @@ -61,37 +61,25 @@ describe("selectInlineFixCandidates", () => { expect(selectInlineFixCandidates(dir)).toEqual([]); }); - test("selects high+blocking entries (legacy rule)", () => { + test("selects every open forge-local entry regardless of severity, blocking, or kind", () => { const dir = makeForgeProject(); writeEntries(dir, [ - entry({ id: "a", severity: "high", blocking: true, ts: "2026-05-01T00:00:00Z" }), - entry({ id: "b", severity: "critical", blocking: true, ts: "2026-05-02T00:00:00Z" }), - entry({ id: "c", severity: "medium", blocking: true, ts: "2026-05-03T00:00:00Z" }), - entry({ id: "d", severity: "high", blocking: false, ts: "2026-05-04T00:00:00Z" }), + entry({ id: "hi-block", severity: "high", blocking: true, ts: "2026-05-01T00:00:00Z" }), + entry({ id: "crit-block", severity: "critical", blocking: true, kind: "runaway-loop:foo", ts: "2026-05-02T00:00:00Z" }), + entry({ id: "med-block", severity: "medium", blocking: true, kind: "inconsistency:bar", ts: "2026-05-03T00:00:00Z" }), + entry({ id: "med-nonblock", severity: "medium", blocking: false, kind: "janitor-gap:baz", ts: "2026-05-04T00:00:00Z" }), + entry({ id: "low-nonblock", severity: "low", blocking: false, kind: "gap:tiebreak", ts: "2026-05-05T00:00:00Z" }), + entry({ id: "arch", severity: "medium", blocking: false, kind: "architecture-defect:foo", ts: "2026-05-06T00:00:00Z" }), ]); const ids = selectInlineFixCandidates(dir).map((e) => e.id); - expect(ids).toEqual(["a", "b"]); - }); - - test("selects gap:* entries regardless of severity or blocking", () => { - const dir = makeForgeProject(); - writeEntries(dir, [ - entry({ id: "gap-med", kind: "gap:adr-step-4", severity: "medium", blocking: false, ts: "2026-05-01T00:00:00Z" }), - entry({ id: "gap-low", kind: "gap:routing-tiebreak", severity: "low", blocking: false, ts: "2026-05-02T00:00:00Z" }), - entry({ id: "non-gap-med", kind: "inconsistency:foo", severity: "medium", blocking: false, ts: "2026-05-03T00:00:00Z" }), + expect(ids).toEqual([ + "hi-block", + "crit-block", + "med-block", + "med-nonblock", + "low-nonblock", + "arch", ]); - const ids = selectInlineFixCandidates(dir).map((e) => e.id); - expect(ids).toEqual(["gap-med", "gap-low"]); - }); - - test("selects architecture-defect:* entries regardless of severity or blocking", () => { - const dir = makeForgeProject(); - writeEntries(dir, [ - entry({ id: "arch-med", kind: "architecture-defect:solver-conflation", severity: "medium", blocking: false, ts: "2026-05-01T00:00:00Z" }), - entry({ id: "noise", kind: "janitor-gap:something", severity: "high", blocking: false, ts: "2026-05-02T00:00:00Z" }), - ]); - const ids = selectInlineFixCandidates(dir).map((e) => e.id); - expect(ids).toEqual(["arch-med"]); }); test("excludes resolved entries with credible evidence kind", () => {