fix(self-feedback): rewire inline-fix worker as triage queue

The inline-fix worker was a partial repair queue — it picked only
high/critical+blocking entries plus my recent gap/architecture-defect
override and left everything else (medium inconsistencies, janitor gaps,
architectural-risks, low-severity gaps) sitting open forever. The
requirement-promoter clusters by exact `kind` string and never fires on
diverse forge-local entries (every open entry currently has a unique
kind), so there is no other sweep that ever touches these. They just
accumulate.

The point of the worker is triage, not just repair: every open entry
should get an eyes-on per session and reach one of three outcomes —
fix, promote to requirement, or close as not-of-value with reason.
Closing deliberately is a valid, expected outcome.

Changes:

- `selectInlineFixCandidates` now returns every open forge-local entry,
  modulo the existing credibility check that re-includes suspect
  resolutions. Severity and blocking filters are gone; the kind-based
  override is no longer needed because everything qualifies.
- The dispatch prompt is rewritten as a three-way triage protocol
  (Fix / Promote / Close) with explicit guidance per outcome and
  explicit prohibition on the `auto-version-bump` evidence kind (which
  would re-open under the credibility check).
- Tests collapse the three filter-coverage tests into a single
  "selects every open forge-local entry" assertion that exercises the
  full severity × blocking × kind matrix.

Upstream feedback is still excluded — those entries describe behavior
in other repos that the inline-fix unit cannot directly repair.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-05-13 22:46:24 +02:00
parent 085beb5199
commit 5f245b721d
2 changed files with 48 additions and 78 deletions

View file

@ -37,20 +37,6 @@ import { readAllSelfFeedback } from "./self-feedback.js";
*/
const SUSPECT_RESOLUTION_KINDS = new Set(["auto-version-bump"]);
/**
* Entry `kind` prefixes that describe actionable architectural debt. Entries
* matching one of these prefixes qualify for inline-fix dispatch regardless
* of severity or the blocking flag they exist precisely because the fix is
* concrete code work, even when business severity is medium and the loop
* doesn't pause.
*/
const ARCHITECTURAL_KIND_PREFIXES = ["gap:", "architecture-defect:"];
function isArchitecturalKind(kind) {
if (typeof kind !== "string") return false;
return ARCHITECTURAL_KIND_PREFIXES.some((prefix) => kind.startsWith(prefix));
}
/**
* Decide whether an entry's recorded resolution is suspect i.e. the entry
* is marked resolved but the recorded resolution lacks a meaningful signal
@ -157,31 +143,27 @@ function hasUncommittedChanges(basePath) {
}
}
/**
* Return forge-local self-feedback entries that should be repaired inline.
* Return forge-local self-feedback entries that need triage decisions.
*
* Selection rules (an entry qualifies if either applies):
* The inline-fix unit is a triage worker, not just a repair worker: for each
* candidate it decides fix, promote-to-requirement, or close-as-not-of-value
* with reason. Sitting open forever is the failure mode we are designing
* against, so the selector returns every open forge-local entry rather than
* narrowing by severity or kind.
*
* 1. High/critical AND blocking the loop-pause path. These are entries the
* runtime already treats as breaking; the inline-fix unit should mop them
* up before the next dispatch.
* Filters:
*
* 2. `kind` is architectural (`gap:*` or `architecture-defect:*`) entries
* describing known code debt the inline-fix unit was built to address.
* Severity reflects business impact; actionability is implicit in the
* kind, so medium/non-blocking entries here still qualify. Otherwise
* architectural gaps rot in the ledger indefinitely.
* 1. Forge-local only. Upstream entries are cross-project flow observations
* gathered while sf runs in other repos; they're operator-visibility
* signal for forge maintainers but the inline-fix unit edits forge
* source and cannot repair issues in those other repos.
*
* An entry's recorded resolution is honored only when the resolution evidence
* is credible (`agent-fix`, `human-clear`, or `promoted-to-requirement`).
* Resolutions of other kinds (notably `auto-version-bump`, which fires on
* any sf-version bump without verifying the bump actually contained a fix)
* are treated as suspect and the entry is re-included as a candidate.
*
* Upstream feedback is intentionally NOT included here. Upstream entries are
* cross-project flow observations gathered while sf runs in other repos;
* they're operator-visibility signal for forge maintainers but the
* inline-fix unit edits forge source and cannot repair issues in those other
* repos. Including them would dispatch repair work the unit cannot perform.
* 2. Unresolved, OR resolved with suspect evidence. Resolutions are honored
* when the recorded evidence is credible (either a structured kind in
* CREDIBLE_RESOLUTION_KINDS or a legacy `resolvedReason` narrative).
* Resolutions of suspect kinds (notably `auto-version-bump`) or with
* neither structured evidence nor a narrative are re-included so the
* unit gets a chance to actually close them with a real decision.
*
* Consumer: dispatchSelfFeedbackInlineFixIfNeeded during session_start.
*/
@ -190,12 +172,7 @@ export function selectInlineFixCandidates(basePath) {
return readAllSelfFeedback(basePath)
.filter((entry) => {
if (isSuspectlyResolved(entry)) return true;
if (entry.resolvedAt) return false;
const isHighOrCritical =
entry.severity === "high" || entry.severity === "critical";
if (entry.blocking && isHighOrCritical) return true;
if (isArchitecturalKind(entry.kind)) return true;
return false;
return !entry.resolvedAt;
})
.sort((a, b) => a.ts.localeCompare(b.ts));
}
@ -219,22 +196,27 @@ function buildInlineFixPrompt(entries) {
)
.join("\n\n");
return [
"You are executing SF self-feedback inline-fix mode.",
"You are executing SF self-feedback triage mode.",
"",
"These high/critical self-feedback entries are unresolved sf defects. Do not only triage them; repair the current codebase directly.",
"Every open forge-local self-feedback entry is listed below. Your job is to give each one a decision — sitting open forever is the failure mode. For each entry, choose exactly one of three outcomes:",
"",
" A. Fix it. The defect is real, in scope, and worth fixing now.",
" B. Promote it. The defect is real but the right place to track it is a requirement, not a self-feedback entry.",
" C. Close it. The entry is no longer of value: stale, superseded, false positive, or not worth a fix at SF's current priorities.",
"",
"Pick the smallest coherent decision per entry. You do not have to implement every entry — but you DO have to decide every entry. Closing entries deliberately is a valid and expected outcome.",
"",
rendered,
"",
"Instructions:",
"1. Verify each entry still applies before editing.",
"2. Fix the smallest coherent set of code/docs/tests needed to satisfy the acceptance criteria.",
"3. Run focused verification and typecheck for touched areas.",
"4. Commit the fix with a conventional commit message.",
"5. Call `resolve_issue` for each repaired entry with agent-fix evidence and the commit SHA.",
"6. If an entry is already fixed, verify it and call `resolve_issue` with the verification evidence.",
"7. Do not hand-edit `.sf/self-feedback.jsonl` or `.sf/SELF-FEEDBACK.md`; use the resolver tool so the durable self-feedback store, markdown projection, and reload detection stay consistent.",
"Decision procedure:",
"1. For each entry: verify the claim still applies against the current code. If it doesn't, that's outcome C.",
"2. If outcome A (fix): make the smallest coherent code/docs/tests change. Run focused verification and typecheck for touched areas. Commit the fix with a conventional message. Then call `resolve_issue` with evidence `{kind: \"agent-fix\", commitSha: <sha>}` and a `reason` naming what landed.",
"3. If outcome B (promote): call `resolve_issue` with evidence `{kind: \"promoted-to-requirement\", requirementId: <id>}` after ensuring the requirement row exists. Use this when the entry describes long-running policy work, not a code fix.",
"4. If outcome C (close): call `resolve_issue` with evidence `{kind: \"human-clear\"}` and a `reason` that names WHY the entry is no longer of value (stale, superseded by <commit>, false positive, out-of-scope, etc.). Be specific — a future reader should be able to tell whether re-opening makes sense.",
"5. Never use evidence `{kind: \"auto-version-bump\"}` for these decisions — that kind is reserved for the automatic version-bump resolver and would re-open under the credibility check.",
"6. Do not hand-edit `.sf/self-feedback.jsonl` or `.sf/SELF-FEEDBACK.md`. Always go through `resolve_issue` so the DB, markdown projection, and reload detection stay consistent.",
"",
"When done, say: Self-feedback inline fix complete.",
"When every entry has a decision, say: Self-feedback triage complete.",
].join("\n");
}
/**

View file

@ -61,37 +61,25 @@ describe("selectInlineFixCandidates", () => {
expect(selectInlineFixCandidates(dir)).toEqual([]);
});
test("selects high+blocking entries (legacy rule)", () => {
test("selects every open forge-local entry regardless of severity, blocking, or kind", () => {
const dir = makeForgeProject();
writeEntries(dir, [
entry({ id: "a", severity: "high", blocking: true, ts: "2026-05-01T00:00:00Z" }),
entry({ id: "b", severity: "critical", blocking: true, ts: "2026-05-02T00:00:00Z" }),
entry({ id: "c", severity: "medium", blocking: true, ts: "2026-05-03T00:00:00Z" }),
entry({ id: "d", severity: "high", blocking: false, ts: "2026-05-04T00:00:00Z" }),
entry({ id: "hi-block", severity: "high", blocking: true, ts: "2026-05-01T00:00:00Z" }),
entry({ id: "crit-block", severity: "critical", blocking: true, kind: "runaway-loop:foo", ts: "2026-05-02T00:00:00Z" }),
entry({ id: "med-block", severity: "medium", blocking: true, kind: "inconsistency:bar", ts: "2026-05-03T00:00:00Z" }),
entry({ id: "med-nonblock", severity: "medium", blocking: false, kind: "janitor-gap:baz", ts: "2026-05-04T00:00:00Z" }),
entry({ id: "low-nonblock", severity: "low", blocking: false, kind: "gap:tiebreak", ts: "2026-05-05T00:00:00Z" }),
entry({ id: "arch", severity: "medium", blocking: false, kind: "architecture-defect:foo", ts: "2026-05-06T00:00:00Z" }),
]);
const ids = selectInlineFixCandidates(dir).map((e) => e.id);
expect(ids).toEqual(["a", "b"]);
});
test("selects gap:* entries regardless of severity or blocking", () => {
const dir = makeForgeProject();
writeEntries(dir, [
entry({ id: "gap-med", kind: "gap:adr-step-4", severity: "medium", blocking: false, ts: "2026-05-01T00:00:00Z" }),
entry({ id: "gap-low", kind: "gap:routing-tiebreak", severity: "low", blocking: false, ts: "2026-05-02T00:00:00Z" }),
entry({ id: "non-gap-med", kind: "inconsistency:foo", severity: "medium", blocking: false, ts: "2026-05-03T00:00:00Z" }),
expect(ids).toEqual([
"hi-block",
"crit-block",
"med-block",
"med-nonblock",
"low-nonblock",
"arch",
]);
const ids = selectInlineFixCandidates(dir).map((e) => e.id);
expect(ids).toEqual(["gap-med", "gap-low"]);
});
test("selects architecture-defect:* entries regardless of severity or blocking", () => {
const dir = makeForgeProject();
writeEntries(dir, [
entry({ id: "arch-med", kind: "architecture-defect:solver-conflation", severity: "medium", blocking: false, ts: "2026-05-01T00:00:00Z" }),
entry({ id: "noise", kind: "janitor-gap:something", severity: "high", blocking: false, ts: "2026-05-02T00:00:00Z" }),
]);
const ids = selectInlineFixCandidates(dir).map((e) => e.id);
expect(ids).toEqual(["arch-med"]);
});
test("excludes resolved entries with credible evidence kind", () => {