fix(self-feedback): verify agent-fix commit_sha exists in repo
Partially addresses sf-mp4rxkwn-jmp039 (no-outcomes-verification): AC1 and AC3 land here. AC2 (cross-check that the cited commit's changed files include the entry's referenced files) is filed separately as a follow-up — different mechanism (semantic AC parsing). Without this check, an agent could stamp ANY string as commit_sha and markResolved would accept it under the writer-layer constraint shipped ind477ce703. The credibility check at the reader caught the OBVIOUS non-canonical shapes (null evidence, {file, line}) but a well-formed {kind: "agent-fix", commitSha: "phantom-sha"} would have passed. Implementation: verifyCommitExists(commitSha, basePath) returns one of: - "verified" — git is present and the commit is in the repo - "missing" — git is present but the commit lookup failed - "ungrokable" — git unavailable or basePath isn't a git repo (carve-out: we can't verify, so don't punish) markResolved policy: reject on "missing"; accept on the others. The agent-fix-unverified kind (reserved ind477ce703) is the explicit escape hatch for "I want to mark agent-fix but can't cite a verifiable commit" — those resolutions remain re-includable under the credibility check, which is what we want. Implementation uses two shell-outs to git (rev-parse --verify, then rev-parse --git-dir to distinguish missing from not-a-repo). Both are guarded with 5-second timeouts and never throw — failure modes return "ungrokable" so the carve-out kicks in. Tests: 2 new (11 total in self-feedback-db). - rejects_agent_fix_with_nonexistent_commit_sha: initializes a real git repo, files an entry, rejects bogus SHA, accepts real HEAD SHA - accepts_agent_fix_with_no_commit_sha_or_ungrokable_path: covers both the carve-out (no-git) and agent-fix-without-commitSha (testPath/summaryNarrative path) Full SF extension suite (1549 tests) passes; typecheck clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
d477ce7039
commit
4af10ac1b2
2 changed files with 157 additions and 0 deletions
|
|
@ -436,6 +436,59 @@ function isAcceptedEvidence(evidence) {
|
|||
return typeof kind === "string" && ACCEPTED_EVIDENCE_KINDS.has(kind);
|
||||
}
|
||||
|
||||
/**
|
||||
* Outcomes-verification AC1 (sf-mp4rxkwn-jmp039): when an agent-fix
|
||||
* resolution cites a commit_sha, verify the commit actually exists in the
|
||||
* repo. Without this check, an agent can stamp any string as a commit and
|
||||
* the writer-layer constraint will accept it.
|
||||
*
|
||||
* Returns one of:
|
||||
* "verified" — git exists and the commit is in the repo
|
||||
* "missing" — git exists, commit lookup failed (e.g. NONEXISTENT_SHA)
|
||||
* "ungrokable" — git unavailable or basePath isn't a git repo; we
|
||||
* cannot verify and must not punish (don't make every
|
||||
* non-git working tree start failing resolutions)
|
||||
*
|
||||
* Caller policy: reject on "missing"; accept on "verified" and
|
||||
* "ungrokable". The "ungrokable" carve-out is the only practical one — an
|
||||
* SF self-feedback ledger living outside a git repo is a real (rare) case
|
||||
* and the agent-fix-unverified kind exists for explicit "I edited but
|
||||
* can't cite a verified commit" workflows.
|
||||
*/
|
||||
function verifyCommitExists(commitSha, basePath) {
|
||||
if (typeof commitSha !== "string" || !commitSha.trim()) return "ungrokable";
|
||||
const sha = commitSha.trim();
|
||||
let execFileSync;
|
||||
try {
|
||||
const cp = require("node:child_process");
|
||||
execFileSync = cp.execFileSync;
|
||||
} catch {
|
||||
return "ungrokable";
|
||||
}
|
||||
try {
|
||||
execFileSync("git", ["rev-parse", "--verify", `${sha}^{commit}`], {
|
||||
cwd: basePath,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
timeout: 5000,
|
||||
});
|
||||
return "verified";
|
||||
} catch (err) {
|
||||
// rev-parse exits non-zero when the commit doesn't exist OR when
|
||||
// the directory isn't a git repo. Distinguish via a second probe:
|
||||
// `git rev-parse --git-dir` exits 0 when inside a repo.
|
||||
try {
|
||||
execFileSync("git", ["rev-parse", "--git-dir"], {
|
||||
cwd: basePath,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
timeout: 5000,
|
||||
});
|
||||
return "missing"; // in a repo but the SHA is unknown
|
||||
} catch {
|
||||
return "ungrokable"; // not in a repo or git unavailable
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark an entry as resolved. Updates SQLite for forge-local feedback when
|
||||
* available, otherwise rewrites the legacy JSONL fallback in place. Entries are
|
||||
|
|
@ -465,6 +518,22 @@ export function markResolved(entryId, resolution, basePath = process.cwd()) {
|
|||
// caller can recover by re-submitting with a valid evidence kind.
|
||||
return false;
|
||||
}
|
||||
// Outcomes-verification AC1 (sf-mp4rxkwn-jmp039): when an agent-fix
|
||||
// resolution cites a commit_sha, verify the commit actually exists in
|
||||
// the repo. Reject when git can confirm the commit is missing; accept
|
||||
// when git is unavailable or basePath isn't a git repo (the
|
||||
// agent-fix-unverified kind exists for that explicit case).
|
||||
if (
|
||||
resolution.evidence.kind === "agent-fix" &&
|
||||
typeof resolution.evidence.commitSha === "string" &&
|
||||
resolution.evidence.commitSha.trim().length > 0
|
||||
) {
|
||||
const status = verifyCommitExists(
|
||||
resolution.evidence.commitSha,
|
||||
basePath,
|
||||
);
|
||||
if (status === "missing") return false;
|
||||
}
|
||||
if (isForgeRepo(basePath) && isDbAvailable()) {
|
||||
try {
|
||||
importLegacyJsonlToDb(basePath);
|
||||
|
|
|
|||
|
|
@ -207,6 +207,94 @@ test("markResolved_accepts_each_canonical_evidence_kind", () => {
|
|||
}
|
||||
});
|
||||
|
||||
test("markResolved_rejects_agent_fix_with_nonexistent_commit_sha", async () => {
|
||||
const project = makeForgeProject();
|
||||
// Initialize a real git repo so the verifier can distinguish "commit
|
||||
// missing" from "not a repo". Without a repo, the carve-out returns
|
||||
// "ungrokable" and the resolution would be accepted.
|
||||
const { execFileSync } = await import("node:child_process");
|
||||
execFileSync("git", ["init", "-q", "-b", "main"], { cwd: project });
|
||||
execFileSync("git", ["config", "user.email", "test@example.com"], {
|
||||
cwd: project,
|
||||
});
|
||||
execFileSync("git", ["config", "user.name", "test"], { cwd: project });
|
||||
execFileSync("git", ["commit", "--allow-empty", "-m", "init", "-q"], {
|
||||
cwd: project,
|
||||
});
|
||||
|
||||
const filed = recordSelfFeedback(
|
||||
{ kind: "commit-verif-test", severity: "high", summary: "x" },
|
||||
project,
|
||||
);
|
||||
|
||||
// Bogus SHA — git rev-parse --verify exits non-zero
|
||||
const okBogus = markResolved(
|
||||
filed.entry.id,
|
||||
{
|
||||
reason: "phantom fix",
|
||||
evidence: {
|
||||
kind: "agent-fix",
|
||||
commitSha: "0000000000000000000000000000000000000000",
|
||||
},
|
||||
},
|
||||
project,
|
||||
);
|
||||
assert.equal(okBogus, false);
|
||||
|
||||
// Real SHA from the init commit — accepted
|
||||
const headSha = execFileSync("git", ["rev-parse", "HEAD"], {
|
||||
cwd: project,
|
||||
encoding: "utf-8",
|
||||
}).trim();
|
||||
const okReal = markResolved(
|
||||
filed.entry.id,
|
||||
{
|
||||
reason: "verified fix",
|
||||
evidence: { kind: "agent-fix", commitSha: headSha },
|
||||
},
|
||||
project,
|
||||
);
|
||||
assert.equal(okReal, true);
|
||||
});
|
||||
|
||||
test("markResolved_accepts_agent_fix_with_no_commit_sha_or_ungrokable_path", () => {
|
||||
const project = makeForgeProject();
|
||||
// No git init → ungrokable → carve-out allows the resolution
|
||||
const filed = recordSelfFeedback(
|
||||
{ kind: "no-git-test", severity: "low", summary: "x" },
|
||||
project,
|
||||
);
|
||||
const ok = markResolved(
|
||||
filed.entry.id,
|
||||
{
|
||||
reason: "verified some other way",
|
||||
evidence: { kind: "agent-fix", commitSha: "anything-goes-no-repo" },
|
||||
},
|
||||
project,
|
||||
);
|
||||
assert.equal(ok, true);
|
||||
|
||||
// agent-fix without any commitSha is also accepted (commit not always
|
||||
// the right artifact — summaryNarrative or testPath may be)
|
||||
const filed2 = recordSelfFeedback(
|
||||
{ kind: "no-sha-test", severity: "low", summary: "x" },
|
||||
project,
|
||||
);
|
||||
const ok2 = markResolved(
|
||||
filed2.entry.id,
|
||||
{
|
||||
reason: "test-based verification",
|
||||
evidence: {
|
||||
kind: "agent-fix",
|
||||
testPath: "src/foo.test.mjs",
|
||||
summaryNarrative: "added a test that pins the behavior",
|
||||
},
|
||||
},
|
||||
project,
|
||||
);
|
||||
assert.equal(ok2, true);
|
||||
});
|
||||
|
||||
test("markResolved_appends_resolution_event_to_jsonl_audit_log", () => {
|
||||
const project = makeForgeProject();
|
||||
const result = recordSelfFeedback(
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue