fix(self-feedback): require structured evidence kind for trusted resolution

Dogfood of the triage worker revealed that the agent can bypass the resolve_issue tool (which hardcodes kind=agent-fix) and write DB rows directly with non-canonical evidence shapes (null, or {file, line}). The earlier credibility check trusted any resolution that had a prose resolvedReason — a "legacy narrative" carve-out meant to preserve operator clears predating structured evidence. Brand-new sloppy agent resolutions slipped through that carve-out: 5/5 of today's triage resolutions had non-canonical evidence and would have been treated as authoritative under the old check. Replace the denylist/legacy-carve-out with an allowlist: - isSuspectlyResolved returns true unless resolvedEvidence.kind is in {agent-fix, human-clear, promoted-to-requirement}. - SUSPECT_RESOLUTION_KINDS is kept as documentation of the auto-version-bump case but the allowlist makes it redundant for the actual policy decision. Tests now cover both failure modes: prose-only resolution (no kind) and non-canonical evidence shape ({file, line}) both re-include the entry as a candidate. Legacy entries that genuinely lack an evidence kind are backfilled to kind=human-clear separately so they keep their resolution under the stricter check. A self-feedback entry (sf-mp4qoby4-meiir7, severity=high) was filed about the underlying bypass — markResolved should ALSO reject or auto-tag non-canonical writes at the writer layer, since the reader is currently the only gate. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-14 02:17:47 +02:00 · 2026-05-14 02:17:47 +02:00 · 79db5704bc
commit 79db5704bc
parent 6e95c3542c
2 changed files with 54 additions and 29 deletions
--- a/src/resources/extensions/sf/self-feedback-drain.js
+++ b/src/resources/extensions/sf/self-feedback-drain.js
@ -21,45 +21,54 @@ import { sfRuntimeRoot } from "./paths.js";
 import { readAllSelfFeedback } from "./self-feedback.js";

 /**
- * Resolution-evidence kinds that are explicitly NOT credible for the
- * inline-fix dispatcher. Entries resolved with these kinds are re-included
- * as candidates so the unit gets a chance to actually fix the underlying
- * defect.
+ * Resolution-evidence kinds that count as authoritative for the inline-fix
+ * dispatcher. Any other kind — including null/missing — is suspect, and the
+ * entry is re-included as a candidate so the unit gets another chance to
+ * close it with proper evidence.
 *
- * Currently this is just `auto-version-bump`, which fires on any sf-version
- * bump without verifying the bump actually contained a fix for the entry —
- * the most common source of false resolutions.
+ * Why an allowlist instead of a denylist: an earlier version of this check
+ * trusted any resolution that had a prose `resolvedReason` (to preserve
+ * legacy operator clears), but in dogfood the triage agent produced fresh
+ * resolutions with prose reasons and null evidence — bypassing the
+ * `resolve_issue` tool which would have stamped a credible kind. Those
+ * non-canonical resolutions slipped through the trust-narrative path.
 *
- * We deliberately do NOT enumerate "credible" kinds and reject everything
- * else: historical entries were resolved without an `evidence.kind` field
- * at all (the `resolved_reason` narrative was the only signal), and treating
- * those as suspect would re-open legitimately operator-cleared entries.
+ * Legacy operator clears that genuinely lack an evidence kind must be
+ * backfilled with `kind: "human-clear"` to remain trusted; that backfill
+ * is a one-shot data migration, not an ongoing carve-out in the policy.
+ */
+const CREDIBLE_RESOLUTION_KINDS = new Set([
+	"agent-fix",
+	"human-clear",
+	"promoted-to-requirement",
+]);
+
+/**
+ * Resolution-evidence kinds that are explicitly NOT credible. Today this is
+ * just `auto-version-bump` (fires on any sf-version bump without verifying
+ * the bump contained a fix). Any kind outside CREDIBLE_RESOLUTION_KINDS is
+ * also suspect; this set is documentation-only for the most common case.
 */
 const SUSPECT_RESOLUTION_KINDS = new Set(["auto-version-bump"]);

 /**
 * Decide whether an entry's recorded resolution is suspect — i.e. the entry
- * is marked resolved but the recorded resolution lacks a meaningful signal
- * that an actual fix landed.
+ * is marked resolved but the evidence kind is not in
+ * CREDIBLE_RESOLUTION_KINDS.
 *
- * Suspect when EITHER:
- *   - `resolvedEvidence.kind` is in SUSPECT_RESOLUTION_KINDS
- *     (currently `auto-version-bump`), OR
- *   - both `resolvedEvidence.kind` AND `resolvedReason` are absent — the
- *     resolution has no narrative and no structured evidence at all.
+ * Returns `false` only for unresolved entries and for entries whose
+ * `resolvedEvidence.kind` is one of the credible kinds. Any other state —
+ * `auto-version-bump`, a non-standard kind, or missing kind entirely
+ * (regardless of how rich the prose `resolvedReason` is) — returns `true`
+ * so the entry is re-included as a candidate.
 *
- * Returns `false` for unresolved entries and for entries with either
- * structured credible evidence or a meaningful operator-supplied
- * `resolvedReason` narrative (the legacy format).
+ * Legacy entries that lack an evidence kind must be backfilled with
+ * `kind: "human-clear"` to remain trusted under this check.
 */
 function isSuspectlyResolved(entry) {
 	if (!entry.resolvedAt) return false;
 	const evidenceKind = entry.resolvedEvidence?.kind;
-	if (SUSPECT_RESOLUTION_KINDS.has(evidenceKind)) return true;
-	const hasEvidenceKind = typeof evidenceKind === "string" && evidenceKind;
-	const hasReason =
-		typeof entry.resolvedReason === "string" && entry.resolvedReason.trim();
-	return !hasEvidenceKind && !hasReason;
+	return !CREDIBLE_RESOLUTION_KINDS.has(evidenceKind);
 }

 const CLAIM_TTL_MS = 30 * 60 * 1000;
--- a/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs
+++ b/src/resources/extensions/sf/tests/self-feedback-drain.test.mjs
@ -125,18 +125,34 @@ describe("selectInlineFixCandidates", () => {
 		expect(ids).toEqual(["version-bump-resolved"]);
 	});

-	test("trusts legacy resolutions that have a resolvedReason narrative but no evidence kind", () => {
+	test("re-includes entries with prose reason but no evidence kind (no narrative-trust carve-out)", () => {
 		const dir = makeForgeProject();
 		writeEntries(dir, [
 			entry({
-				id: "legacy-resolved",
+				id: "prose-only-resolution",
 				kind: "gap:foo",
 				resolvedAt: "2026-05-10T00:00:00Z",
 				resolvedReason:
 					"Fixed by splitting solver and executor roles per ADR-0079. Solver pinned to kimi-k2.6.",
 			}),
 		]);
-		expect(selectInlineFixCandidates(dir)).toEqual([]);
+		const ids = selectInlineFixCandidates(dir).map((e) => e.id);
+		expect(ids).toEqual(["prose-only-resolution"]);
+	});
+
+	test("re-includes entries with non-canonical evidence shape (e.g. {file, line})", () => {
+		const dir = makeForgeProject();
+		writeEntries(dir, [
+			entry({
+				id: "non-canonical-evidence",
+				kind: "gap:foo",
+				resolvedAt: "2026-05-10T00:00:00Z",
+				resolvedReason: "claimed implemented",
+				resolvedEvidence: { file: "src/foo.js", line: 42 },
+			}),
+		]);
+		const ids = selectInlineFixCandidates(dir).map((e) => e.id);
+		expect(ids).toEqual(["non-canonical-evidence"]);
 	});

 	test("re-includes entries with no resolution narrative and no evidence kind", () => {