diff --git a/src/resources/extensions/sf/self-feedback-drain.js b/src/resources/extensions/sf/self-feedback-drain.js index 8ced3891a..ffad01a73 100644 --- a/src/resources/extensions/sf/self-feedback-drain.js +++ b/src/resources/extensions/sf/self-feedback-drain.js @@ -21,45 +21,54 @@ import { sfRuntimeRoot } from "./paths.js"; import { readAllSelfFeedback } from "./self-feedback.js"; /** - * Resolution-evidence kinds that are explicitly NOT credible for the - * inline-fix dispatcher. Entries resolved with these kinds are re-included - * as candidates so the unit gets a chance to actually fix the underlying - * defect. + * Resolution-evidence kinds that count as authoritative for the inline-fix + * dispatcher. Any other kind — including null/missing — is suspect, and the + * entry is re-included as a candidate so the unit gets another chance to + * close it with proper evidence. * - * Currently this is just `auto-version-bump`, which fires on any sf-version - * bump without verifying the bump actually contained a fix for the entry — - * the most common source of false resolutions. + * Why an allowlist instead of a denylist: an earlier version of this check + * trusted any resolution that had a prose `resolvedReason` (to preserve + * legacy operator clears), but in dogfood the triage agent produced fresh + * resolutions with prose reasons and null evidence — bypassing the + * `resolve_issue` tool which would have stamped a credible kind. Those + * non-canonical resolutions slipped through the trust-narrative path. * - * We deliberately do NOT enumerate "credible" kinds and reject everything - * else: historical entries were resolved without an `evidence.kind` field - * at all (the `resolved_reason` narrative was the only signal), and treating - * those as suspect would re-open legitimately operator-cleared entries. + * Legacy operator clears that genuinely lack an evidence kind must be + * backfilled with `kind: "human-clear"` to remain trusted; that backfill + * is a one-shot data migration, not an ongoing carve-out in the policy. + */ +const CREDIBLE_RESOLUTION_KINDS = new Set([ + "agent-fix", + "human-clear", + "promoted-to-requirement", +]); + +/** + * Resolution-evidence kinds that are explicitly NOT credible. Today this is + * just `auto-version-bump` (fires on any sf-version bump without verifying + * the bump contained a fix). Any kind outside CREDIBLE_RESOLUTION_KINDS is + * also suspect; this set is documentation-only for the most common case. */ const SUSPECT_RESOLUTION_KINDS = new Set(["auto-version-bump"]); /** * Decide whether an entry's recorded resolution is suspect — i.e. the entry - * is marked resolved but the recorded resolution lacks a meaningful signal - * that an actual fix landed. + * is marked resolved but the evidence kind is not in + * CREDIBLE_RESOLUTION_KINDS. * - * Suspect when EITHER: - * - `resolvedEvidence.kind` is in SUSPECT_RESOLUTION_KINDS - * (currently `auto-version-bump`), OR - * - both `resolvedEvidence.kind` AND `resolvedReason` are absent — the - * resolution has no narrative and no structured evidence at all. + * Returns `false` only for unresolved entries and for entries whose + * `resolvedEvidence.kind` is one of the credible kinds. Any other state — + * `auto-version-bump`, a non-standard kind, or missing kind entirely + * (regardless of how rich the prose `resolvedReason` is) — returns `true` + * so the entry is re-included as a candidate. * - * Returns `false` for unresolved entries and for entries with either - * structured credible evidence or a meaningful operator-supplied - * `resolvedReason` narrative (the legacy format). + * Legacy entries that lack an evidence kind must be backfilled with + * `kind: "human-clear"` to remain trusted under this check. */ function isSuspectlyResolved(entry) { if (!entry.resolvedAt) return false; const evidenceKind = entry.resolvedEvidence?.kind; - if (SUSPECT_RESOLUTION_KINDS.has(evidenceKind)) return true; - const hasEvidenceKind = typeof evidenceKind === "string" && evidenceKind; - const hasReason = - typeof entry.resolvedReason === "string" && entry.resolvedReason.trim(); - return !hasEvidenceKind && !hasReason; + return !CREDIBLE_RESOLUTION_KINDS.has(evidenceKind); } const CLAIM_TTL_MS = 30 * 60 * 1000; diff --git a/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs b/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs index b6c044125..2aa6df6b3 100644 --- a/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs +++ b/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs @@ -125,18 +125,34 @@ describe("selectInlineFixCandidates", () => { expect(ids).toEqual(["version-bump-resolved"]); }); - test("trusts legacy resolutions that have a resolvedReason narrative but no evidence kind", () => { + test("re-includes entries with prose reason but no evidence kind (no narrative-trust carve-out)", () => { const dir = makeForgeProject(); writeEntries(dir, [ entry({ - id: "legacy-resolved", + id: "prose-only-resolution", kind: "gap:foo", resolvedAt: "2026-05-10T00:00:00Z", resolvedReason: "Fixed by splitting solver and executor roles per ADR-0079. Solver pinned to kimi-k2.6.", }), ]); - expect(selectInlineFixCandidates(dir)).toEqual([]); + const ids = selectInlineFixCandidates(dir).map((e) => e.id); + expect(ids).toEqual(["prose-only-resolution"]); + }); + + test("re-includes entries with non-canonical evidence shape (e.g. {file, line})", () => { + const dir = makeForgeProject(); + writeEntries(dir, [ + entry({ + id: "non-canonical-evidence", + kind: "gap:foo", + resolvedAt: "2026-05-10T00:00:00Z", + resolvedReason: "claimed implemented", + resolvedEvidence: { file: "src/foo.js", line: 42 }, + }), + ]); + const ids = selectInlineFixCandidates(dir).map((e) => e.id); + expect(ids).toEqual(["non-canonical-evidence"]); }); test("re-includes entries with no resolution narrative and no evidence kind", () => {