feat: journal execution policy decisions

2026-05-07 22:27:29 +02:00 · 2026-05-07 22:27:29 +02:00 · d05e7164a9
commit d05e7164a9
parent e9df932234
4 changed files with 116 additions and 3 deletions
--- a/docs/records/2026-05-07-cli-agent-code-survey.md
+++ b/docs/records/2026-05-07-cli-agent-code-survey.md
@ -392,9 +392,12 @@ The first execution-policy vocabulary slice also landed:
  denied.
 - The `build` profile records destructive bash risk labels from the existing
  destructive-command classifier without changing runtime enforcement yet.
+- Auto-mode now writes `execution-policy-decision` journal events for tool
+  calls, recording the profile, allow/deny result, risk, destructive labels,
+  tool name, call id, and policy-relevant command/path only.

-Next slices should attach these profile decisions to tool-call events, UOK
-evidence, and headless JSON output before broad enforcement.
+Next slices should project these profile decisions into UOK evidence and
+headless JSON output before broad enforcement.

 ## Resulting Direction

--- a/src/resources/extensions/sf/bootstrap/register-hooks.js
+++ b/src/resources/extensions/sf/bootstrap/register-hooks.js
@ -22,9 +22,11 @@ import { recordToolCallName } from "../auto-tool-tracking.js";
 import { loadToolApiKeys } from "../commands-config.js";
 import { getEcosystemReadyPromise } from "../ecosystem/loader.js";
 import { updateSnapshot } from "../ecosystem/sf-extension-api.js";
+import { buildExecutionPolicyJournalEntry } from "../execution-policy.js";
 import { formatContinue, loadFile, saveFile } from "../files.js";
 import { getDiscussionMilestoneId } from "../guided-flow.js";
 import { initHealthWidget } from "../health-widget.js";
+import { emitJournalEvent } from "../journal.js";
 import {
 	initializeLearningRuntime,
 	resetLearningRuntime,
@ -645,11 +647,25 @@ export function registerHooks(pi, ecosystemHandlers = []) {
 	pi.on("tool_call", async (event, ctx) => {
 		if (!isAutoActive()) return;
 		safetyRecordToolCall(event.toolCallId, event.toolName, event.input);
+		const policyDash = getAutoDashboardData();
+		const policyProfile = isQueuePhaseActive() ? "plan" : "build";
+		if (policyDash.basePath) {
+			emitJournalEvent(
+				policyDash.basePath,
+				buildExecutionPolicyJournalEntry({
+					event,
+					profileId: policyProfile,
+					unit: policyDash.currentUnit,
+					flowId: `execution-policy:${event.toolCallId ?? event.toolName}`,
+					now: () => new Date().toISOString(),
+				}),
+			);
+		}
 		// Persist evidence immediately at dispatch so a mid-unit session restart
 		// (resetEvidence() + loadEvidenceFromDisk()) cannot wipe the entry between
 		// tool_call and tool_execution_end. Without this the "no bash calls" false
 		// positive fires when the LLM clearly ran a verification command (Bug #4385).
-		const callDash = getAutoDashboardData();
+		const callDash = policyDash;
 		if (callDash.basePath && callDash.currentUnit?.type === "execute-task") {
 			const {
 				milestone: cMid,
--- a/src/resources/extensions/sf/execution-policy.js
+++ b/src/resources/extensions/sf/execution-policy.js
@ -114,3 +114,58 @@ export function classifyExecutionPolicyCall(profileId, toolName, input = "") {
 		destructiveLabels: bashRisk.labels,
 	};
 }
+
+/**
+ * Extract the policy-relevant input string from a tool-call event.
+ *
+ * Purpose: avoid leaking full structured tool payloads into policy logs while
+ * preserving the command or path needed for deterministic classification.
+ *
+ * Consumer: execution-policy journal events.
+ */
+export function extractExecutionPolicyInput(toolName, input) {
+	if (!input || typeof input !== "object") return "";
+	if (toolName === "bash") return String(input.command ?? "");
+	if (toolName === "write" || toolName === "edit") {
+		return String(input.path ?? "");
+	}
+	if (toolName === "sf_exec") return String(input.script ?? "");
+	return "";
+}
+
+/**
+ * Build a journal entry for a tool-call execution-policy decision.
+ *
+ * Purpose: make policy decisions inspectable before broad enforcement, matching
+ * the comparison-survey direction from Codex/Crush without changing runtime
+ * permissions in this slice.
+ *
+ * Consumer: SF auto-mode tool_call hook.
+ */
+export function buildExecutionPolicyJournalEntry(args) {
+	const input = extractExecutionPolicyInput(
+		args.event.toolName,
+		args.event.input,
+	);
+	const decision = classifyExecutionPolicyCall(
+		args.profileId,
+		args.event.toolName,
+		input,
+	);
+	return {
+		ts: args.now?.() ?? new Date().toISOString(),
+		flowId:
+			args.flowId ??
+			`execution-policy:${args.event.toolCallId ?? args.event.toolName}`,
+		seq: 0,
+		eventType: "execution-policy-decision",
+		unitType: args.unit?.type,
+		unitId: args.unit?.id,
+		data: {
+			toolCallId: args.event.toolCallId,
+			toolName: args.event.toolName,
+			input,
+			decision,
+		},
+	};
+}
--- a/src/resources/extensions/sf/tests/execution-policy.test.mjs
+++ b/src/resources/extensions/sf/tests/execution-policy.test.mjs
@ -1,7 +1,9 @@
 import assert from "node:assert/strict";
 import { describe, test } from "vitest";
 import {
+	buildExecutionPolicyJournalEntry,
 	classifyExecutionPolicyCall,
+	extractExecutionPolicyInput,
 	resolveExecutionPolicyProfile,
 } from "../execution-policy.js";

@ -45,4 +47,41 @@ describe("execution policy profiles", () => {
 		assert.equal(decision.risk, "destructive");
 		assert.deepEqual(decision.destructiveLabels, ["hard reset"]);
 	});
+
+	test("extractExecutionPolicyInput_uses_command_or_path_without_full_payload", () => {
+		assert.equal(
+			extractExecutionPolicyInput("bash", {
+				command: "git status",
+				extra: "x",
+			}),
+			"git status",
+		);
+		assert.equal(
+			extractExecutionPolicyInput("write", {
+				path: "src/app.ts",
+				content: "x",
+			}),
+			"src/app.ts",
+		);
+	});
+
+	test("buildExecutionPolicyJournalEntry_records_tool_decision", () => {
+		const entry = buildExecutionPolicyJournalEntry({
+			now: () => "2026-05-07T16:30:00.000Z",
+			profileId: "plan",
+			unit: { type: "execute-task", id: "M001/S01/T01" },
+			event: {
+				toolCallId: "call-1",
+				toolName: "write",
+				input: { path: "src/app.ts", content: "secret body" },
+			},
+		});
+
+		assert.equal(entry.eventType, "execution-policy-decision");
+		assert.equal(entry.unitType, "execute-task");
+		assert.equal(entry.data.toolName, "write");
+		assert.equal(entry.data.input, "src/app.ts");
+		assert.equal(entry.data.decision.allowed, false);
+		assert.equal(entry.data.decision.profile, "plan");
+	});
 });