feat(swarm): forward parent permission profile to in-process worker sessions
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions

In-process swarm workers get a fresh headless AgentSession whose permission
extension defaults to read-only minimal. This blocks normal autonomous edits
(e.g., write_file, edit) even when the parent session runs at normal or
trusted level.

- run-unit.js: add legacyPermissionLevelForProfile mapping and include
  executorPermissionLevel in the dispatch envelope.
- swarm-dispatch.js: forward executorPermissionLevel from envelope to
  runAgentTurn as permissionLevel.
- agent-runner.js: accept permissionLevel option and pass it to
  runSubagent config.
- subagent-runner.ts: add permissionLevel to SubagentConfig; when set,
  temporarily set SF_PERMISSION_LEVEL env and run extension lifecycle so
  the permission extension reads the level before tool hooks execute.
- Tests for envelope field, dispatch forwarding, and run-unit integration.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Copilot 2026-05-15 06:38:42 +02:00 committed by Mikael Hugo
parent f3571475d5
commit cf9203aee0
7 changed files with 174 additions and 28 deletions

View file

@ -28,6 +28,11 @@ export interface SubagentConfig {
model?: string;
/** Tool name filter (subset of built-in tools by name). */
tools?: string[];
/**
* Legacy SF permission level to initialize permission extension state for
* headless subagent sessions. When omitted, extension lifecycle stays off.
*/
permissionLevel?: "minimal" | "low" | "medium" | "high" | "bypassed";
/** Working directory. Default: process.cwd(). */
cwd?: string;
/** Display name for log/error messages. */
@ -172,16 +177,32 @@ export async function runSubagent(
session.setActiveToolsByName(config.tools);
}
// Bind extensions in headless mode (no lifecycle, no interactive UI).
await session.bindExtensions({
uiContext: createSubagentUIContext(),
onError: (err) => {
process.stderr.write(
`[subagent:${name}] extension error (${err.extensionPath}): ${err.error}\n`,
);
},
runLifecycle: false,
});
// Bind extensions in headless mode. When the caller supplies a permission
// level, run lifecycle once so the permission extension reads the env-backed
// level into its closure before tool hooks execute.
const previousPermissionLevel = process.env.SF_PERMISSION_LEVEL;
if (config.permissionLevel) {
process.env.SF_PERMISSION_LEVEL = config.permissionLevel;
}
try {
await session.bindExtensions({
uiContext: createSubagentUIContext(),
onError: (err) => {
process.stderr.write(
`[subagent:${name}] extension error (${err.extensionPath}): ${err.error}\n`,
);
},
runLifecycle: Boolean(config.permissionLevel),
});
} finally {
if (config.permissionLevel) {
if (previousPermissionLevel === undefined) {
delete process.env.SF_PERMISSION_LEVEL;
} else {
process.env.SF_PERMISSION_LEVEL = previousPermissionLevel;
}
}
}
// Debug: confirm tool count after bindExtensions so operators can verify
// extension tools (e.g. checkpoint) are present before the model is called.

View file

@ -187,6 +187,9 @@ function cacheKey(query) {
return query.toLowerCase().trim();
}
// ── Tool registration ────────────────────────────────────────────────────────
export function googleSearchExtension(pi) {
return registerGoogleSearchTool(pi);
}
export function registerGoogleSearchTool(pi) {
pi.registerTool({
name: "google_search",
@ -398,18 +401,21 @@ export function registerGoogleSearchTool(pi) {
resultCache.clear();
});
// ── Startup notification ─────────────────────────────────────────────────
// Defensive: Only check for OAuth if modelRegistry and authStorage exist (test mocks may omit)
pi.on("session_start", async (_event, ctx) => {
const hasOAuth =
await ctx.modelRegistry.authStorage.hasAuth("google-gemini-cli");
if (!hasOAuth) {
ctx.ui.notify(
"Google Search: No google-gemini-cli OAuth set. Configure Google Code Assist OAuth to use google_search.",
"warning",
);
if (ctx?.modelRegistry?.authStorage?.hasAuth) {
const hasOAuth =
await ctx.modelRegistry.authStorage.hasAuth("google-gemini-cli");
if (!hasOAuth && ctx?.ui?.notify) {
ctx.ui.notify(
"Google Search: No google-gemini-cli OAuth set. Configure Google Code Assist OAuth to use google_search.",
"warning",
);
}
}
});
}
let resultCache = new Map();
const resultCache = new Map();
/** Reset the google_search result cache (called by search-the-web extension on session_start). */
export function resetGoogleSearchCache() {
resultCache.clear();

View file

@ -135,6 +135,29 @@ function deriveWorkMode(unitType) {
return "build";
}
/**
* Map SF's product permission profile to the legacy permission extension level.
*
* Purpose: keep in-process swarm workers aligned with the parent autonomous
* mode. The worker gets a fresh headless AgentSession whose permission
* extension otherwise defaults to read-only `minimal`, which blocks normal
* autonomous edits.
*
* Consumer: runUnitViaSwarm dispatch envelopes.
*/
function legacyPermissionLevelForProfile(profile) {
switch (profile) {
case "restricted":
return "minimal";
case "trusted":
return "medium";
case "unrestricted":
return "bypassed";
default:
return "low";
}
}
/**
* Build the system prompt for a swarm worker executing an autonomous unit.
*
@ -245,6 +268,9 @@ async function runUnitViaSwarm(ctx, _pi, s, unitType, unitId, prompt, options) {
payload: prompt,
priority: options?.priority ?? 5,
executorSystemPrompt,
executorPermissionLevel: legacyPermissionLevelForProfile(
s.permissionProfile,
),
...(executorTools ? { executorTools } : {}),
};
@ -547,7 +573,9 @@ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
const useSwarm =
swarmFlag === "1" ||
swarmFlag === "true" ||
(swarmFlag !== "0" && swarmFlag !== "false" && process.env.SF_HEADLESS === "1");
(swarmFlag !== "0" &&
swarmFlag !== "false" &&
process.env.SF_HEADLESS === "1");
if (useSwarm) {
return runUnitViaSwarm(ctx, pi, s, unitType, unitId, prompt, options);
}

View file

@ -197,6 +197,7 @@ describe("runUnit — SF_AUTONOMOUS_VIA_SWARM=1 — happy path", () => {
expect(envelope.payload).toBe("research the topic");
expect(envelope.scope).toBe("milestone-1");
expect(envelope.priority).toBe(7);
expect(envelope.executorPermissionLevel).toBe("low");
expect(opts.timeoutMs).toBeGreaterThan(0);
});
@ -316,7 +317,9 @@ describe("runUnit — SF_AUTONOMOUS_VIA_SWARM=1 — happy path", () => {
expect(Array.isArray(params.completedItems)).toBe(true);
expect(Array.isArray(params.remainingItems)).toBe(true);
expect(params.remainingItems.length).toBeGreaterThan(0);
expect(params.remainingItems[0]).toContain("Continue execute-task synth-chk-1");
expect(params.remainingItems[0]).toContain(
"Continue execute-task synth-chk-1",
);
expect(Array.isArray(params.verificationEvidence)).toBe(true);
});
@ -709,7 +712,15 @@ describe("runUnit — Round 8: swarmToolCallCount in UnitResult", () => {
const pi = makePi();
const s = makeS("/proj");
const result = await runUnit(ctx, pi, s, "execute-task", "r8-notc", "build", {});
const result = await runUnit(
ctx,
pi,
s,
"execute-task",
"r8-notc",
"build",
{},
);
expect(result.status).toBe("completed");
expect(result._via).toBe("swarm");
@ -725,14 +736,25 @@ describe("runUnit — Round 8: swarmToolCallCount in UnitResult", () => {
mockWithToolCallEvents([
{ name: "Bash", arguments: { command: "npm test" } },
{ name: "Read", arguments: { file_path: "/foo.ts" } },
{ name: "checkpoint", arguments: { outcome: "complete", summary: "done" } },
{
name: "checkpoint",
arguments: { outcome: "complete", summary: "done" },
},
]);
const ctx = makeCtx("/proj");
const pi = makePi();
const s = makeS("/proj");
const result = await runUnit(ctx, pi, s, "execute-task", "r8-tc3", "build", {});
const result = await runUnit(
ctx,
pi,
s,
"execute-task",
"r8-tc3",
"build",
{},
);
expect(result.status).toBe("completed");
expect(result._via).toBe("swarm");
@ -746,14 +768,25 @@ describe("runUnit — Round 8: swarmToolCallCount in UnitResult", () => {
process.env.SF_AUTONOMOUS_VIA_SWARM = "1";
mockWithToolCallEvents([
{ name: "checkpoint", arguments: { outcome: "continue", summary: "partial" } },
{
name: "checkpoint",
arguments: { outcome: "continue", summary: "partial" },
},
]);
const ctx = makeCtx("/proj");
const pi = makePi();
const s = makeS("/proj");
const result = await runUnit(ctx, pi, s, "execute-task", "r8-chkonly", "build", {});
const result = await runUnit(
ctx,
pi,
s,
"execute-task",
"r8-chkonly",
"build",
{},
);
expect(result.swarmToolCallCount).toBe(1);
});

View file

@ -442,7 +442,8 @@ describe("SwarmDispatchLayer.dispatchAndWait — Round 7: executor config forwar
const root = makeProject();
const layer = new SwarmDispatchLayer(root);
const EXECUTOR_PROMPT = "You are an autonomous executor. Call checkpoint when done.";
const EXECUTOR_PROMPT =
"You are an autonomous executor. Call checkpoint when done.";
await layer.dispatchAndWait({
unitId: "task-r7-sys-prompt",
unitType: "execute-task",
@ -498,6 +499,46 @@ describe("SwarmDispatchLayer.dispatchAndWait — Round 7: executor config forwar
expect(capturedOpts.toolsOverride).toEqual(EXECUTOR_TOOLS);
});
test("executorPermissionLevel from envelope is forwarded to runAgentTurn as permissionLevel", async () => {
const { runAgentTurn } = await import("../uok/agent-runner.js");
let capturedOpts = null;
runAgentTurn.mockImplementationOnce(async (agent, opts = {}) => {
capturedOpts = opts;
const { onlyMessageId } = opts;
if (onlyMessageId) agent._inbox.refresh();
const all = agent.receive(false);
const target = all.find((m) => m.id === onlyMessageId && !m.read);
const messages = target ? [target] : [];
if (messages.length === 0) return { turnsProcessed: 0, response: null };
for (const msg of messages) agent.markRead(msg.id);
const lastMsg = messages[messages.length - 1];
const replyId = agent._bus.send(
`agent:${agent.identity.name}`,
lastMsg.from,
MOCK_REPLY_TEXT,
{ replyTo: lastMsg.id, type: "response" },
);
return { turnsProcessed: 1, response: MOCK_REPLY_TEXT, replyId };
});
const root = makeProject();
const layer = new SwarmDispatchLayer(root);
await layer.dispatchAndWait({
unitId: "task-r7-permission",
unitType: "execute-task",
workMode: "build",
payload: "edit files",
priority: 5,
scope: "scope-r7-permission",
executorPermissionLevel: "low",
});
expect(capturedOpts).not.toBeNull();
expect(capturedOpts.permissionLevel).toBe("low");
});
test("envelope without executorSystemPrompt does not forward systemPromptOverride", async () => {
// Envelopes without the optional fields must not pass undefined opts to runAgentTurn.
const { runAgentTurn } = await import("../uok/agent-runner.js");
@ -538,6 +579,7 @@ describe("SwarmDispatchLayer.dispatchAndWait — Round 7: executor config forwar
expect(capturedOpts).not.toBeNull();
expect(capturedOpts.systemPromptOverride).toBeUndefined();
expect(capturedOpts.toolsOverride).toBeUndefined();
expect(capturedOpts.permissionLevel).toBeUndefined();
});
});

View file

@ -69,6 +69,8 @@ function buildAgentPrompt(agent, messages) {
* @param {string[]} [opts.toolsOverride] Override the default tool filter passed to
* runSubagent. When set (e.g. from envelope.executorTools), the worker's session is
* filtered to this specific set of tool names before the prompt is sent.
* @param {string} [opts.permissionLevel] Legacy permission level used to initialize
* SF's permission extension in the isolated headless session.
*/
async function runHeadlessPrompt(
basePath,
@ -76,7 +78,8 @@ async function runHeadlessPrompt(
timeoutMs = DEFAULT_RUNNER_TIMEOUT_MS,
opts = {},
) {
const { onEvent, systemPromptOverride, toolsOverride } = opts;
const { onEvent, systemPromptOverride, toolsOverride, permissionLevel } =
opts;
const result = await runSubagent(
{
systemPrompt:
@ -84,6 +87,7 @@ async function runHeadlessPrompt(
"You are a persistent agent in a multi-agent swarm. Process the incoming messages and produce a structured response.",
cwd: basePath,
name: "swarm-agent",
...(permissionLevel ? { permissionLevel } : {}),
...(toolsOverride && toolsOverride.length > 0
? { tools: toolsOverride }
: {}),
@ -124,6 +128,8 @@ async function runHeadlessPrompt(
* @param {string[]} [opts.toolsOverride] Override the worker's tool filter.
* Forwarded to runHeadlessPrompt so executor-specific tool sets (e.g. including
* "checkpoint") are applied in the runSubagent config.
* @param {string} [opts.permissionLevel] Legacy permission level forwarded to
* runHeadlessPrompt so permission hooks match the parent SF profile.
* @returns {Promise<{turnsProcessed: number, response: string|null}>}
*/
export async function runAgentTurn(agent, opts = {}) {
@ -134,6 +140,7 @@ export async function runAgentTurn(agent, opts = {}) {
onEvent,
systemPromptOverride,
toolsOverride,
permissionLevel,
} = opts;
// When onlyMessageId is set, force-refresh the inbox from SQLite so that
@ -181,6 +188,7 @@ export async function runAgentTurn(agent, opts = {}) {
onEvent,
...(systemPromptOverride ? { systemPromptOverride } : {}),
...(toolsOverride ? { toolsOverride } : {}),
...(permissionLevel ? { permissionLevel } : {}),
});
} catch (err) {
// On failure, write error back to bus so sender knows

View file

@ -70,6 +70,8 @@ async function getA2ATransport() {
* filter. When set, runSubagent is configured with this tool name list so the worker
* session only exposes the specified tools. Allows callers to grant/restrict tools
* per unit type (e.g. ensure "checkpoint" is always available for execute-task units).
* @property {string} [executorPermissionLevel] Optional: legacy SF permission
* extension level used by in-process headless worker sessions.
*/
/**
@ -327,6 +329,7 @@ export class SwarmDispatchLayer {
// so the worker session receives the correct prompt and tool set.
const executorSystemPrompt = envelope.executorSystemPrompt;
const executorTools = envelope.executorTools;
const executorPermissionLevel = envelope.executorPermissionLevel;
const runAgentTurn = await getRunAgentTurn();
let turnResult;
try {
@ -335,8 +338,13 @@ export class SwarmDispatchLayer {
signal,
onlyMessageId: dispatchResult.messageId,
...(onEvent ? { onEvent } : {}),
...(executorSystemPrompt ? { systemPromptOverride: executorSystemPrompt } : {}),
...(executorSystemPrompt
? { systemPromptOverride: executorSystemPrompt }
: {}),
...(executorTools ? { toolsOverride: executorTools } : {}),
...(executorPermissionLevel
? { permissionLevel: executorPermissionLevel }
: {}),
});
} catch (err) {
return {