diff --git a/docs/records/2026-05-07-cli-agent-code-survey.md b/docs/records/2026-05-07-cli-agent-code-survey.md index 1bb87e78e..fe7f242db 100644 --- a/docs/records/2026-05-07-cli-agent-code-survey.md +++ b/docs/records/2026-05-07-cli-agent-code-survey.md @@ -392,9 +392,12 @@ The first execution-policy vocabulary slice also landed: denied. - The `build` profile records destructive bash risk labels from the existing destructive-command classifier without changing runtime enforcement yet. +- Auto-mode now writes `execution-policy-decision` journal events for tool + calls, recording the profile, allow/deny result, risk, destructive labels, + tool name, call id, and policy-relevant command/path only. -Next slices should attach these profile decisions to tool-call events, UOK -evidence, and headless JSON output before broad enforcement. +Next slices should project these profile decisions into UOK evidence and +headless JSON output before broad enforcement. ## Resulting Direction diff --git a/src/resources/extensions/sf/bootstrap/register-hooks.js b/src/resources/extensions/sf/bootstrap/register-hooks.js index 5dc54662f..5560381b4 100644 --- a/src/resources/extensions/sf/bootstrap/register-hooks.js +++ b/src/resources/extensions/sf/bootstrap/register-hooks.js @@ -22,9 +22,11 @@ import { recordToolCallName } from "../auto-tool-tracking.js"; import { loadToolApiKeys } from "../commands-config.js"; import { getEcosystemReadyPromise } from "../ecosystem/loader.js"; import { updateSnapshot } from "../ecosystem/sf-extension-api.js"; +import { buildExecutionPolicyJournalEntry } from "../execution-policy.js"; import { formatContinue, loadFile, saveFile } from "../files.js"; import { getDiscussionMilestoneId } from "../guided-flow.js"; import { initHealthWidget } from "../health-widget.js"; +import { emitJournalEvent } from "../journal.js"; import { initializeLearningRuntime, resetLearningRuntime, @@ -645,11 +647,25 @@ export function registerHooks(pi, ecosystemHandlers = []) { pi.on("tool_call", async (event, ctx) => { if (!isAutoActive()) return; safetyRecordToolCall(event.toolCallId, event.toolName, event.input); + const policyDash = getAutoDashboardData(); + const policyProfile = isQueuePhaseActive() ? "plan" : "build"; + if (policyDash.basePath) { + emitJournalEvent( + policyDash.basePath, + buildExecutionPolicyJournalEntry({ + event, + profileId: policyProfile, + unit: policyDash.currentUnit, + flowId: `execution-policy:${event.toolCallId ?? event.toolName}`, + now: () => new Date().toISOString(), + }), + ); + } // Persist evidence immediately at dispatch so a mid-unit session restart // (resetEvidence() + loadEvidenceFromDisk()) cannot wipe the entry between // tool_call and tool_execution_end. Without this the "no bash calls" false // positive fires when the LLM clearly ran a verification command (Bug #4385). - const callDash = getAutoDashboardData(); + const callDash = policyDash; if (callDash.basePath && callDash.currentUnit?.type === "execute-task") { const { milestone: cMid, diff --git a/src/resources/extensions/sf/execution-policy.js b/src/resources/extensions/sf/execution-policy.js index 039461260..27ba4e17d 100644 --- a/src/resources/extensions/sf/execution-policy.js +++ b/src/resources/extensions/sf/execution-policy.js @@ -114,3 +114,58 @@ export function classifyExecutionPolicyCall(profileId, toolName, input = "") { destructiveLabels: bashRisk.labels, }; } + +/** + * Extract the policy-relevant input string from a tool-call event. + * + * Purpose: avoid leaking full structured tool payloads into policy logs while + * preserving the command or path needed for deterministic classification. + * + * Consumer: execution-policy journal events. + */ +export function extractExecutionPolicyInput(toolName, input) { + if (!input || typeof input !== "object") return ""; + if (toolName === "bash") return String(input.command ?? ""); + if (toolName === "write" || toolName === "edit") { + return String(input.path ?? ""); + } + if (toolName === "sf_exec") return String(input.script ?? ""); + return ""; +} + +/** + * Build a journal entry for a tool-call execution-policy decision. + * + * Purpose: make policy decisions inspectable before broad enforcement, matching + * the comparison-survey direction from Codex/Crush without changing runtime + * permissions in this slice. + * + * Consumer: SF auto-mode tool_call hook. + */ +export function buildExecutionPolicyJournalEntry(args) { + const input = extractExecutionPolicyInput( + args.event.toolName, + args.event.input, + ); + const decision = classifyExecutionPolicyCall( + args.profileId, + args.event.toolName, + input, + ); + return { + ts: args.now?.() ?? new Date().toISOString(), + flowId: + args.flowId ?? + `execution-policy:${args.event.toolCallId ?? args.event.toolName}`, + seq: 0, + eventType: "execution-policy-decision", + unitType: args.unit?.type, + unitId: args.unit?.id, + data: { + toolCallId: args.event.toolCallId, + toolName: args.event.toolName, + input, + decision, + }, + }; +} diff --git a/src/resources/extensions/sf/tests/execution-policy.test.mjs b/src/resources/extensions/sf/tests/execution-policy.test.mjs index 7470af41c..460282837 100644 --- a/src/resources/extensions/sf/tests/execution-policy.test.mjs +++ b/src/resources/extensions/sf/tests/execution-policy.test.mjs @@ -1,7 +1,9 @@ import assert from "node:assert/strict"; import { describe, test } from "vitest"; import { + buildExecutionPolicyJournalEntry, classifyExecutionPolicyCall, + extractExecutionPolicyInput, resolveExecutionPolicyProfile, } from "../execution-policy.js"; @@ -45,4 +47,41 @@ describe("execution policy profiles", () => { assert.equal(decision.risk, "destructive"); assert.deepEqual(decision.destructiveLabels, ["hard reset"]); }); + + test("extractExecutionPolicyInput_uses_command_or_path_without_full_payload", () => { + assert.equal( + extractExecutionPolicyInput("bash", { + command: "git status", + extra: "x", + }), + "git status", + ); + assert.equal( + extractExecutionPolicyInput("write", { + path: "src/app.ts", + content: "x", + }), + "src/app.ts", + ); + }); + + test("buildExecutionPolicyJournalEntry_records_tool_decision", () => { + const entry = buildExecutionPolicyJournalEntry({ + now: () => "2026-05-07T16:30:00.000Z", + profileId: "plan", + unit: { type: "execute-task", id: "M001/S01/T01" }, + event: { + toolCallId: "call-1", + toolName: "write", + input: { path: "src/app.ts", content: "secret body" }, + }, + }); + + assert.equal(entry.eventType, "execution-policy-decision"); + assert.equal(entry.unitType, "execute-task"); + assert.equal(entry.data.toolName, "write"); + assert.equal(entry.data.input, "src/app.ts"); + assert.equal(entry.data.decision.allowed, false); + assert.equal(entry.data.decision.profile, "plan"); + }); });