diff --git a/src/resources/extensions/sf/self-feedback.js b/src/resources/extensions/sf/self-feedback.js index 8570bf499..2570f3adb 100644 --- a/src/resources/extensions/sf/self-feedback.js +++ b/src/resources/extensions/sf/self-feedback.js @@ -573,6 +573,96 @@ function isAcceptedEvidence(evidence) { return typeof kind === "string" && ACCEPTED_EVIDENCE_KINDS.has(kind); } +/** + * Outcomes-verification AC2 (sf-mp4vxusa-pn2tnd): extract file/path + * mentions from a free-form acceptanceCriteria string. We accept two + * shapes that operators tend to use: + * + * 1. backticked code spans: `src/foo.js`, `tests/bar.test.mjs` + * 2. bare path-like tokens: src/foo.js, packages/ai/src/types.ts, + * docs/dev/ADR-019.md (any token containing a slash + dotted + * extension that doesn't start with a numeric digit) + * + * Returns an array of unique normalized paths (no leading ./, lowercased + * comparison is the caller's job — we keep original case here). Returns + * [] when no extractable paths found. Empty AC also returns []. + */ +function extractFilesFromAcceptanceCriteria(acText) { + if (typeof acText !== "string" || !acText.trim()) return []; + const found = new Set(); + // Backticked spans first — most reliable signal + const backtickRe = /`([^`\n]{1,200})`/g; + let m; + while ((m = backtickRe.exec(acText)) !== null) { + const candidate = m[1].trim(); + if (looksLikePath(candidate)) found.add(normalizePath(candidate)); + } + // Then bare path-like tokens. Word-boundary on either side; require at + // least one slash and a dotted extension to avoid sucking in plain + // english words. + const pathRe = + /(?:^|[\s,(\[])([a-zA-Z][a-zA-Z0-9._\-/]*\/[a-zA-Z0-9._\-/]+\.[a-zA-Z0-9]+)(?=[\s,)\]:;.]|$)/g; + while ((m = pathRe.exec(acText)) !== null) { + const candidate = m[1].trim(); + if (looksLikePath(candidate)) found.add(normalizePath(candidate)); + } + return Array.from(found); +} + +function looksLikePath(s) { + if (!s || s.length < 3) return false; + if (/\s/.test(s)) return false; + if (!s.includes("/")) return false; + if (!/\.[a-zA-Z0-9]+$/.test(s)) return false; + // Reject URLs and protocol-looking strings + if (/^https?:\/\//i.test(s)) return false; + // Reject leading digits (version-like strings) + if (/^\d/.test(s)) return false; + return true; +} + +function normalizePath(p) { + return p.replace(/^\.\//, "").replace(/^\/+/, ""); +} + +/** + * Outcomes-verification AC2 (sf-mp4vxusa-pn2tnd): return the list of files + * touched by a given commit. Best-effort — returns null when git is + * unavailable or the commit is missing, so callers downgrade gracefully. + * + * Consumer: markResolved when evidence.kind === "agent-fix" and the + * resolved entry has AC mentioning specific files. + */ +function getCommitTouchedFiles(commitSha, basePath) { + if (typeof commitSha !== "string" || !commitSha.trim()) return null; + let execFileSync; + try { + const cp = require("node:child_process"); + execFileSync = cp.execFileSync; + } catch { + return null; + } + try { + const out = execFileSync( + "git", + ["diff-tree", "--no-commit-id", "--name-only", "-r", commitSha.trim()], + { + cwd: basePath, + encoding: "utf-8", + stdio: ["ignore", "pipe", "pipe"], + timeout: 5000, + }, + ); + return out + .split("\n") + .map((s) => s.trim()) + .filter(Boolean) + .map(normalizePath); + } catch { + return null; + } +} + /** * Outcomes-verification AC1 (sf-mp4rxkwn-jmp039): when an agent-fix * resolution cites a commit_sha, verify the commit actually exists in the @@ -670,6 +760,50 @@ export function markResolved(entryId, resolution, basePath = process.cwd()) { basePath, ); if (status === "missing") return false; + // Outcomes-verification AC2 (sf-mp4vxusa-pn2tnd): if the entry has + // acceptanceCriteria mentioning specific files, the cited commit + // must touch at least one of them. Skip silently when AC has no + // extractable file mentions (operator AC was prose-only — there's + // nothing to verify against and we don't want false rejections). + // Skip when git can't enumerate the commit's files ("ungrokable" or + // out-of-repo) — same carve-out as AC1. + if (status === "verified" && isForgeRepo(basePath) && isDbAvailable()) { + try { + const entry = listSelfFeedbackEntries().find((e) => e.id === entryId); + const acText = + entry?.acceptanceCriteria ?? + entry?.full_json?.acceptanceCriteria ?? + ""; + const expectedFiles = extractFilesFromAcceptanceCriteria(acText); + if (expectedFiles.length > 0) { + const touched = getCommitTouchedFiles( + resolution.evidence.commitSha, + basePath, + ); + if (Array.isArray(touched) && touched.length > 0) { + const touchedSet = new Set(touched); + // Match by exact path OR by basename to tolerate the + // common AC pattern of saying "src/foo.js" when the + // actual change was in "packages/x/src/foo.js" (and vice + // versa). Exact match wins; basename match catches the + // usual operator informality without over-trusting. + const touchedBasenames = new Set( + touched.map((p) => p.split("/").pop()), + ); + const hit = expectedFiles.some((f) => { + if (touchedSet.has(f)) return true; + const basename = f.split("/").pop(); + return basename ? touchedBasenames.has(basename) : false; + }); + if (!hit) return false; + } + } + } catch { + // Verification is best-effort — never block resolution on a + // verification probe throwing. The AC1 commit-exists check + // has already accepted the SHA at this point. + } + } } if (isForgeRepo(basePath) && isDbAvailable()) { try { diff --git a/src/resources/extensions/sf/tests/self-feedback-db.test.mjs b/src/resources/extensions/sf/tests/self-feedback-db.test.mjs index f448735f6..7657f59fb 100644 --- a/src/resources/extensions/sf/tests/self-feedback-db.test.mjs +++ b/src/resources/extensions/sf/tests/self-feedback-db.test.mjs @@ -257,6 +257,166 @@ test("markResolved_rejects_agent_fix_with_nonexistent_commit_sha", async () => { assert.equal(okReal, true); }); +test("markResolved_rejects_agent_fix_when_commit_does_not_touch_AC_files", async () => { + const project = makeForgeProject(); + const { execFileSync } = await import("node:child_process"); + execFileSync("git", ["init", "-q", "-b", "main"], { cwd: project }); + execFileSync("git", ["config", "user.email", "test@example.com"], { + cwd: project, + }); + execFileSync("git", ["config", "user.name", "test"], { cwd: project }); + + // Two commits touching different files + const { writeFileSync: writeFs, mkdirSync: mkFs } = await import( + "node:fs" + ); + mkFs(join(project, "src"), { recursive: true }); + writeFs(join(project, "src", "expected.js"), "// initial\n"); + writeFs(join(project, "src", "unrelated.js"), "// initial\n"); + execFileSync("git", ["add", "-A"], { cwd: project }); + execFileSync("git", ["commit", "-m", "init", "-q"], { cwd: project }); + + // "Real fix" commit only touches src/unrelated.js + writeFs(join(project, "src", "unrelated.js"), "// updated\n"); + execFileSync("git", ["add", "-A"], { cwd: project }); + execFileSync("git", ["commit", "-m", "unrelated change", "-q"], { + cwd: project, + }); + const unrelatedSha = execFileSync("git", ["rev-parse", "HEAD"], { + cwd: project, + encoding: "utf-8", + }).trim(); + + // File an entry whose AC mentions src/expected.js — the unrelated + // commit must be rejected because it doesn't touch that file. + const filed = recordSelfFeedback( + { + kind: "gap:ac-mismatch-test", + severity: "high", + summary: "AC mentions a specific file", + acceptanceCriteria: + "AC1: `src/expected.js` is modified to use the new helper", + }, + project, + ); + + const okWrongFile = markResolved( + filed.entry.id, + { + reason: "claims fix but commit touched the wrong file", + evidence: { kind: "agent-fix", commitSha: unrelatedSha }, + }, + project, + ); + assert.equal(okWrongFile, false); + + // Now make a commit that DOES touch src/expected.js — accepted + writeFs(join(project, "src", "expected.js"), "// actually-fixed\n"); + execFileSync("git", ["add", "-A"], { cwd: project }); + execFileSync("git", ["commit", "-m", "real fix", "-q"], { cwd: project }); + const realSha = execFileSync("git", ["rev-parse", "HEAD"], { + cwd: project, + encoding: "utf-8", + }).trim(); + + const okRealFile = markResolved( + filed.entry.id, + { + reason: "real fix touches the AC-mentioned file", + evidence: { kind: "agent-fix", commitSha: realSha }, + }, + project, + ); + assert.equal(okRealFile, true); +}); + +test("markResolved_skips_AC_file_check_when_AC_has_no_extractable_paths", async () => { + const project = makeForgeProject(); + const { execFileSync } = await import("node:child_process"); + execFileSync("git", ["init", "-q", "-b", "main"], { cwd: project }); + execFileSync("git", ["config", "user.email", "test@example.com"], { + cwd: project, + }); + execFileSync("git", ["config", "user.name", "test"], { cwd: project }); + execFileSync("git", ["commit", "--allow-empty", "-m", "init", "-q"], { + cwd: project, + }); + const sha = execFileSync("git", ["rev-parse", "HEAD"], { + cwd: project, + encoding: "utf-8", + }).trim(); + + // AC is prose-only — no extractable file paths. Commit acceptance + // must succeed without file-match check (we don't fabricate + // rejections when there's nothing to compare against). + const filed = recordSelfFeedback( + { + kind: "gap:prose-only-ac-test", + severity: "low", + summary: "prose AC", + acceptanceCriteria: + "AC1: behavior is intuitive AC2: operators can recover quickly", + }, + project, + ); + + const ok = markResolved( + filed.entry.id, + { + reason: "prose AC accepts any commit", + evidence: { kind: "agent-fix", commitSha: sha }, + }, + project, + ); + assert.equal(ok, true); +}); + +test("markResolved_AC_file_check_tolerates_basename_match", async () => { + const project = makeForgeProject(); + const { execFileSync } = await import("node:child_process"); + const fsMod = await import("node:fs"); + const writeFs = fsMod.writeFileSync; + const mkFs = fsMod.mkdirSync; + execFileSync("git", ["init", "-q", "-b", "main"], { cwd: project }); + execFileSync("git", ["config", "user.email", "test@example.com"], { + cwd: project, + }); + execFileSync("git", ["config", "user.name", "test"], { cwd: project }); + mkFs(join(project, "packages", "ai", "src"), { recursive: true }); + writeFs( + join(project, "packages", "ai", "src", "types.ts"), + "export type X = {};\n", + ); + execFileSync("git", ["add", "-A"], { cwd: project }); + execFileSync("git", ["commit", "-m", "init", "-q"], { cwd: project }); + const sha = execFileSync("git", ["rev-parse", "HEAD"], { + cwd: project, + encoding: "utf-8", + }).trim(); + + // AC mentions "src/types.ts" but the actual commit touched + // "packages/ai/src/types.ts". Basename match should accept. + const filed = recordSelfFeedback( + { + kind: "gap:basename-tolerance-test", + severity: "low", + summary: "basename match", + acceptanceCriteria: "AC1: `src/types.ts` is extended with a new field", + }, + project, + ); + + const ok = markResolved( + filed.entry.id, + { + reason: "basename match across packages", + evidence: { kind: "agent-fix", commitSha: sha }, + }, + project, + ); + assert.equal(ok, true); +}); + test("markResolved_accepts_agent_fix_with_no_commit_sha_or_ungrokable_path", () => { const project = makeForgeProject(); // No git init → ungrokable → carve-out allows the resolution