feat: journal execution policy decisions
This commit is contained in:
parent
e9df932234
commit
d05e7164a9
4 changed files with 116 additions and 3 deletions
|
|
@ -392,9 +392,12 @@ The first execution-policy vocabulary slice also landed:
|
|||
denied.
|
||||
- The `build` profile records destructive bash risk labels from the existing
|
||||
destructive-command classifier without changing runtime enforcement yet.
|
||||
- Auto-mode now writes `execution-policy-decision` journal events for tool
|
||||
calls, recording the profile, allow/deny result, risk, destructive labels,
|
||||
tool name, call id, and policy-relevant command/path only.
|
||||
|
||||
Next slices should attach these profile decisions to tool-call events, UOK
|
||||
evidence, and headless JSON output before broad enforcement.
|
||||
Next slices should project these profile decisions into UOK evidence and
|
||||
headless JSON output before broad enforcement.
|
||||
|
||||
## Resulting Direction
|
||||
|
||||
|
|
|
|||
|
|
@ -22,9 +22,11 @@ import { recordToolCallName } from "../auto-tool-tracking.js";
|
|||
import { loadToolApiKeys } from "../commands-config.js";
|
||||
import { getEcosystemReadyPromise } from "../ecosystem/loader.js";
|
||||
import { updateSnapshot } from "../ecosystem/sf-extension-api.js";
|
||||
import { buildExecutionPolicyJournalEntry } from "../execution-policy.js";
|
||||
import { formatContinue, loadFile, saveFile } from "../files.js";
|
||||
import { getDiscussionMilestoneId } from "../guided-flow.js";
|
||||
import { initHealthWidget } from "../health-widget.js";
|
||||
import { emitJournalEvent } from "../journal.js";
|
||||
import {
|
||||
initializeLearningRuntime,
|
||||
resetLearningRuntime,
|
||||
|
|
@ -645,11 +647,25 @@ export function registerHooks(pi, ecosystemHandlers = []) {
|
|||
pi.on("tool_call", async (event, ctx) => {
|
||||
if (!isAutoActive()) return;
|
||||
safetyRecordToolCall(event.toolCallId, event.toolName, event.input);
|
||||
const policyDash = getAutoDashboardData();
|
||||
const policyProfile = isQueuePhaseActive() ? "plan" : "build";
|
||||
if (policyDash.basePath) {
|
||||
emitJournalEvent(
|
||||
policyDash.basePath,
|
||||
buildExecutionPolicyJournalEntry({
|
||||
event,
|
||||
profileId: policyProfile,
|
||||
unit: policyDash.currentUnit,
|
||||
flowId: `execution-policy:${event.toolCallId ?? event.toolName}`,
|
||||
now: () => new Date().toISOString(),
|
||||
}),
|
||||
);
|
||||
}
|
||||
// Persist evidence immediately at dispatch so a mid-unit session restart
|
||||
// (resetEvidence() + loadEvidenceFromDisk()) cannot wipe the entry between
|
||||
// tool_call and tool_execution_end. Without this the "no bash calls" false
|
||||
// positive fires when the LLM clearly ran a verification command (Bug #4385).
|
||||
const callDash = getAutoDashboardData();
|
||||
const callDash = policyDash;
|
||||
if (callDash.basePath && callDash.currentUnit?.type === "execute-task") {
|
||||
const {
|
||||
milestone: cMid,
|
||||
|
|
|
|||
|
|
@ -114,3 +114,58 @@ export function classifyExecutionPolicyCall(profileId, toolName, input = "") {
|
|||
destructiveLabels: bashRisk.labels,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the policy-relevant input string from a tool-call event.
|
||||
*
|
||||
* Purpose: avoid leaking full structured tool payloads into policy logs while
|
||||
* preserving the command or path needed for deterministic classification.
|
||||
*
|
||||
* Consumer: execution-policy journal events.
|
||||
*/
|
||||
export function extractExecutionPolicyInput(toolName, input) {
|
||||
if (!input || typeof input !== "object") return "";
|
||||
if (toolName === "bash") return String(input.command ?? "");
|
||||
if (toolName === "write" || toolName === "edit") {
|
||||
return String(input.path ?? "");
|
||||
}
|
||||
if (toolName === "sf_exec") return String(input.script ?? "");
|
||||
return "";
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a journal entry for a tool-call execution-policy decision.
|
||||
*
|
||||
* Purpose: make policy decisions inspectable before broad enforcement, matching
|
||||
* the comparison-survey direction from Codex/Crush without changing runtime
|
||||
* permissions in this slice.
|
||||
*
|
||||
* Consumer: SF auto-mode tool_call hook.
|
||||
*/
|
||||
export function buildExecutionPolicyJournalEntry(args) {
|
||||
const input = extractExecutionPolicyInput(
|
||||
args.event.toolName,
|
||||
args.event.input,
|
||||
);
|
||||
const decision = classifyExecutionPolicyCall(
|
||||
args.profileId,
|
||||
args.event.toolName,
|
||||
input,
|
||||
);
|
||||
return {
|
||||
ts: args.now?.() ?? new Date().toISOString(),
|
||||
flowId:
|
||||
args.flowId ??
|
||||
`execution-policy:${args.event.toolCallId ?? args.event.toolName}`,
|
||||
seq: 0,
|
||||
eventType: "execution-policy-decision",
|
||||
unitType: args.unit?.type,
|
||||
unitId: args.unit?.id,
|
||||
data: {
|
||||
toolCallId: args.event.toolCallId,
|
||||
toolName: args.event.toolName,
|
||||
input,
|
||||
decision,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { describe, test } from "vitest";
|
||||
import {
|
||||
buildExecutionPolicyJournalEntry,
|
||||
classifyExecutionPolicyCall,
|
||||
extractExecutionPolicyInput,
|
||||
resolveExecutionPolicyProfile,
|
||||
} from "../execution-policy.js";
|
||||
|
||||
|
|
@ -45,4 +47,41 @@ describe("execution policy profiles", () => {
|
|||
assert.equal(decision.risk, "destructive");
|
||||
assert.deepEqual(decision.destructiveLabels, ["hard reset"]);
|
||||
});
|
||||
|
||||
test("extractExecutionPolicyInput_uses_command_or_path_without_full_payload", () => {
|
||||
assert.equal(
|
||||
extractExecutionPolicyInput("bash", {
|
||||
command: "git status",
|
||||
extra: "x",
|
||||
}),
|
||||
"git status",
|
||||
);
|
||||
assert.equal(
|
||||
extractExecutionPolicyInput("write", {
|
||||
path: "src/app.ts",
|
||||
content: "x",
|
||||
}),
|
||||
"src/app.ts",
|
||||
);
|
||||
});
|
||||
|
||||
test("buildExecutionPolicyJournalEntry_records_tool_decision", () => {
|
||||
const entry = buildExecutionPolicyJournalEntry({
|
||||
now: () => "2026-05-07T16:30:00.000Z",
|
||||
profileId: "plan",
|
||||
unit: { type: "execute-task", id: "M001/S01/T01" },
|
||||
event: {
|
||||
toolCallId: "call-1",
|
||||
toolName: "write",
|
||||
input: { path: "src/app.ts", content: "secret body" },
|
||||
},
|
||||
});
|
||||
|
||||
assert.equal(entry.eventType, "execution-policy-decision");
|
||||
assert.equal(entry.unitType, "execute-task");
|
||||
assert.equal(entry.data.toolName, "write");
|
||||
assert.equal(entry.data.input, "src/app.ts");
|
||||
assert.equal(entry.data.decision.allowed, false);
|
||||
assert.equal(entry.data.decision.profile, "plan");
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue