Merge pull request #4082 from jeremymcs/claude/review-mcp-server-tools-2Gchv

Add query filtering, abort handling, and permission mode control
2026-04-12 20:54:51 -05:00 · 2026-04-12 20:54:51 -05:00 · da7a7e255f
commit da7a7e255f
parent 5c271e72e7 1eb357ca46
7 changed files with 442 additions and 76 deletions
--- a/package-lock.json
+++ b/package-lock.json
@ -1,12 +1,12 @@
 {
  "name": "gsd-pi",
-  "version": "2.66.1",
+  "version": "2.68.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "gsd-pi",
-      "version": "2.66.1",
+      "version": "2.68.0",
      "hasInstallScript": true,
      "license": "MIT",
      "workspaces": [
@ -9535,7 +9535,7 @@
    },
    "packages/pi-coding-agent": {
      "name": "@gsd/pi-coding-agent",
-      "version": "2.66.1",
+      "version": "2.68.0",
      "dependencies": {
        "@mariozechner/jiti": "^2.6.2",
        "@silvia-odwyer/photon-node": "^0.3.4",
--- a/packages/mcp-server/src/server.ts
+++ b/packages/mcp-server/src/server.ts
@ -55,46 +55,80 @@ function textContent(text: string): { content: Array<{ type: 'text'; text: strin
 // gsd_query filesystem reader
 // ---------------------------------------------------------------------------

-async function readProjectState(projectDir: string, _query: string): Promise<Record<string, unknown>> {
+/**
+ * Normalized query categories for {@link readProjectState}.
+ *
+ * Maps user-supplied query strings (or empty) to the set of fields we return.
+ * Accepts common synonyms so the MCP client can pass intuitive values.
+ */
+const QUERY_FIELDS = {
+  all: ['state', 'project', 'requirements', 'milestones'] as const,
+  state: ['state'] as const,
+  status: ['state'] as const,
+  project: ['project'] as const,
+  requirements: ['requirements'] as const,
+  milestones: ['milestones'] as const,
+} as const;
+
+type QueryCategory = keyof typeof QUERY_FIELDS;
+type ProjectStateField = (typeof QUERY_FIELDS)[QueryCategory][number];
+
+function normalizeQuery(query: string | undefined): QueryCategory {
+  const key = (query ?? 'all').trim().toLowerCase();
+  if (key in QUERY_FIELDS) return key as QueryCategory;
+  return 'all';
+}
+
+async function readProjectState(projectDir: string, query: string | undefined): Promise<Record<string, unknown>> {
  const gsdDir = join(resolve(projectDir), '.gsd');
-  const result: Record<string, unknown> = { projectDir: resolve(projectDir) };
+  const category = normalizeQuery(query);
+  const wanted = new Set<ProjectStateField>(QUERY_FIELDS[category]);

-  // STATE.md — current execution state
-  try {
-    result.state = await readFile(join(gsdDir, 'STATE.md'), 'utf-8');
-  } catch {
-    result.state = null;
-  }
+  const result: Record<string, unknown> = {
+    projectDir: resolve(projectDir),
+    query: category,
+  };

-  // PROJECT.md — project description
-  try {
-    result.project = await readFile(join(gsdDir, 'PROJECT.md'), 'utf-8');
-  } catch {
-    result.project = null;
-  }
-
-  // REQUIREMENTS.md — requirement contract
-  try {
-    result.requirements = await readFile(join(gsdDir, 'REQUIREMENTS.md'), 'utf-8');
-  } catch {
-    result.requirements = null;
-  }
-
-  // List milestones with basic metadata
-  const milestonesDir = join(gsdDir, 'milestones');
-  try {
-    const entries = await readdir(milestonesDir, { withFileTypes: true });
-    const milestones: Array<{ id: string; hasRoadmap: boolean; hasSummary: boolean }> = [];
-    for (const entry of entries) {
-      if (!entry.isDirectory()) continue;
-      const mDir = join(milestonesDir, entry.name);
-      const hasRoadmap = await fileExists(join(mDir, `${entry.name}-ROADMAP.md`));
-      const hasSummary = await fileExists(join(mDir, `${entry.name}-SUMMARY.md`));
-      milestones.push({ id: entry.name, hasRoadmap, hasSummary });
+  if (wanted.has('state')) {
+    try {
+      result.state = await readFile(join(gsdDir, 'STATE.md'), 'utf-8');
+    } catch {
+      result.state = null;
+    }
+  }
+
+  if (wanted.has('project')) {
+    try {
+      result.project = await readFile(join(gsdDir, 'PROJECT.md'), 'utf-8');
+    } catch {
+      result.project = null;
+    }
+  }
+
+  if (wanted.has('requirements')) {
+    try {
+      result.requirements = await readFile(join(gsdDir, 'REQUIREMENTS.md'), 'utf-8');
+    } catch {
+      result.requirements = null;
+    }
+  }
+
+  if (wanted.has('milestones')) {
+    const milestonesDir = join(gsdDir, 'milestones');
+    try {
+      const entries = await readdir(milestonesDir, { withFileTypes: true });
+      const milestones: Array<{ id: string; hasRoadmap: boolean; hasSummary: boolean }> = [];
+      for (const entry of entries) {
+        if (!entry.isDirectory()) continue;
+        const mDir = join(milestonesDir, entry.name);
+        const hasRoadmap = await fileExists(join(mDir, `${entry.name}-ROADMAP.md`));
+        const hasSummary = await fileExists(join(mDir, `${entry.name}-SUMMARY.md`));
+        milestones.push({ id: entry.name, hasRoadmap, hasSummary });
+      }
+      result.milestones = milestones;
+    } catch {
+      result.milestones = [];
    }
-    result.milestones = milestones;
-  } catch {
-    result.milestones = [];
  }

  return result;
@ -128,8 +162,25 @@ interface ElicitRequestFormParams {
  };
 }

+/**
+ * Handler extra — the second argument passed by McpServer.tool handlers.
+ * Contains an AbortSignal scoped to the JSON-RPC request (cancelled when
+ * the client cancels the `tools/call`) plus other per-request metadata.
+ * Tools that can actually be stopped mid-flight should honour `signal`.
+ */
+export interface McpToolExtra {
+  signal?: AbortSignal;
+  requestId?: string | number;
+  sendNotification?: (notification: unknown) => void | Promise<void>;
+}
+
 interface McpServerInstance {
-  tool(name: string, description: string, params: Record<string, unknown>, handler: (args: Record<string, unknown>) => Promise<unknown>): unknown;
+  tool(
+    name: string,
+    description: string,
+    params: Record<string, unknown>,
+    handler: (args: Record<string, unknown>, extra?: McpToolExtra) => Promise<unknown>,
+  ): unknown;
  server: {
    elicitInput(
      params: AskUserQuestionsElicitRequest | ElicitRequestFormParams,
@ -302,7 +353,12 @@ export async function createMcpServer(sessionManager: SessionManager): Promise<{
  );

  // -----------------------------------------------------------------------
-  // gsd_execute — start a new GSD auto-mode session
+  // gsd_execute — start a new GSD auto-mode session.
+  //
+  // If the JSON-RPC request is aborted while the session is starting (or
+  // immediately after), we cancel the session so we don't leak a background
+  // RpcClient process. Once the session is running the caller should use
+  // `gsd_cancel` to stop it via sessionId.
  // -----------------------------------------------------------------------
  server.tool(
    'gsd_execute',
@ -313,12 +369,20 @@ export async function createMcpServer(sessionManager: SessionManager): Promise<{
      model: z.string().optional().describe('Model ID override'),
      bare: z.boolean().optional().describe('Run in bare mode (skip user config)'),
    },
-    async (args: Record<string, unknown>) => {
+    async (args: Record<string, unknown>, extra?: McpToolExtra) => {
      const { projectDir, command, model, bare } = args as {
        projectDir: string; command?: string; model?: string; bare?: boolean;
      };
      try {
        const sessionId = await sessionManager.startSession(projectDir, { command, model, bare });
+
+        // If the client aborted while startSession was running, cancel the
+        // newly-created session rather than leaving an orphaned process.
+        if (extra?.signal?.aborted) {
+          await sessionManager.cancelSession(sessionId).catch(() => { /* swallow */ });
+          return errorContent('gsd_execute aborted by client before returning');
+        }
+
        return jsonContent({ sessionId, status: 'started' });
      } catch (err) {
        return errorContent(err instanceof Error ? err.message : String(err));
@ -411,17 +475,25 @@ export async function createMcpServer(sessionManager: SessionManager): Promise<{
  );

  // -----------------------------------------------------------------------
-  // gsd_query — read project state from filesystem (no session needed)
+  // gsd_query — read project state from filesystem (no session needed).
+  //
+  // `query` is optional: when omitted the tool returns all fields (STATE.md,
+  // PROJECT.md, requirements, milestone listing). Accepted narrow values:
+  // "state" / "status", "project", "requirements", "milestones", "all".
+  // Unknown values fall back to "all" for forward-compatibility.
  // -----------------------------------------------------------------------
  server.tool(
    'gsd_query',
-    'Query GSD project state from the filesystem. Returns STATE.md, PROJECT.md, requirements, and milestone listing. Does not require an active session.',
+    'Query GSD project state from the filesystem. By default returns STATE.md, PROJECT.md, requirements, and milestone listing. Pass `query` to narrow the response (accepted: "state"/"status", "project", "requirements", "milestones", "all"). Does not require an active session.',
    {
      projectDir: z.string().describe('Absolute path to the project directory'),
-      query: z.string().describe('What to query (e.g. "status", "milestones", "requirements")'),
+      query: z
+        .enum(['all', 'state', 'status', 'project', 'requirements', 'milestones'])
+        .optional()
+        .describe('Narrow the response to a single field (default: "all")'),
    },
    async (args: Record<string, unknown>) => {
-      const { projectDir, query } = args as { projectDir: string; query: string };
+      const { projectDir, query } = args as { projectDir: string; query?: string };
      try {
        const state = await readProjectState(projectDir, query);
        return jsonContent(state);
--- a/src/cli.ts
+++ b/src/cli.ts
@ -554,6 +554,17 @@ if (isPrintMode) {
  if (mode === 'mcp') {
    printStartupTimings()
    const { startMcpServer } = await import('./mcp-server.js')
+
+    // Activate every registered tool before starting the MCP transport.
+    // `session.agent.state.tools` is the *active* subset, not the full
+    // registry — if we expose only the active set, extension-registered
+    // tools (gsd workflow, browser-tools, mac-tools, search-the-web, …)
+    // are invisible to MCP clients. Flipping the active set to every
+    // known tool name makes `state.tools` mirror the full registry for
+    // this MCP session, which is what an external client expects.
+    const allToolNames = session.getAllTools().map((t) => t.name)
+    session.setActiveToolsByName(allToolNames)
+
    await startMcpServer({
      tools: session.agent.state.tools ?? [],
      version: process.env.GSD_VERSION || '0.0.0',
--- a/src/mcp-server.ts
+++ b/src/mcp-server.ts
@ -16,13 +16,18 @@ export interface McpToolDef {
  }>
 }

-// MCP SDK subpath imports use wildcard exports (./*) that NodeNext resolves
-// at runtime but TypeScript cannot statically type-check. We construct the
-// specifiers dynamically so tsc treats them as `any`.
+// MCP SDK subpath imports use wildcard exports (./*) in @modelcontextprotocol/sdk's
+// package.json export map. The wildcard maps "./foo" → "./dist/cjs/foo" (no .js
+// suffix), so bare subpath specifiers like `${MCP_PKG}/server/stdio` resolve to
+// a non-existent file. Historically the workaround (#3603) used createRequire so
+// the CJS resolver could auto-append `.js`; that no longer works with current
+// Node + SDK releases (#3914) — `_require.resolve` also fails with
+// "Cannot find module .../dist/cjs/server/stdio".
 //
-// Use explicit .js subpaths for modules that are loaded dynamically at runtime.
-// Recent Node / SDK combinations do not reliably resolve the extensionless
-// wildcard targets for `server/stdio` and `types` (#3914).
+// The reliable convention (matching packages/mcp-server/{server,cli}.ts) is to
+// write the `.js` suffix explicitly on every wildcard subpath. Specifiers are
+// built via a template string so TypeScript's NodeNext resolver treats them as
+// `any` and skips static checking.
 const MCP_PKG = '@modelcontextprotocol/sdk'

 /**
@ -45,7 +50,7 @@ export async function startMcpServer(options: {
 }): Promise<void> {
  const { tools, version = '0.0.0' } = options

-  const serverMod = await import(`${MCP_PKG}/server`)
+  const serverMod = await import(`${MCP_PKG}/server/index.js`)
  const stdioMod = await import(`${MCP_PKG}/server/stdio.js`)
  const typesMod = await import(`${MCP_PKG}/types.js`)

@ -73,8 +78,14 @@ export async function startMcpServer(options: {
    })),
  }))

-  // tools/call — execute the requested tool and return content blocks
-  server.setRequestHandler(CallToolRequestSchema, async (request: any) => {
+  // tools/call — execute the requested tool and return content blocks.
+  //
+  // The MCP SDK passes an `extra` argument to request handlers that includes
+  // an AbortSignal scoped to the RPC request (cancelled when the client
+  // cancels the tool call or the transport closes). Threading it into
+  // AgentTool.execute ensures long-running tools (Bash, WebFetch, grep on
+  // huge trees) actually stop when the client gives up on the result.
+  server.setRequestHandler(CallToolRequestSchema, async (request: any, extra: any) => {
    const { name, arguments: args } = request.params
    const tool = toolMap.get(name)
    if (!tool) {
@ -84,22 +95,37 @@ export async function startMcpServer(options: {
      }
    }

+    const signal: AbortSignal | undefined = extra?.signal
+
    try {
      const result = await tool.execute(
        `mcp-${Date.now()}`,
        args ?? {},
-        undefined, // no AbortSignal
-        undefined, // no onUpdate callback
+        signal,
+        undefined, // onUpdate not yet wired — progress notifications require a progressToken round-trip
      )

-      // Convert AgentToolResult content blocks to MCP content format
+      // Convert AgentToolResult content blocks to MCP content format.
+      // text and image pass through; any other shape is serialized as text
+      // so the client sees the payload rather than an empty response.
      const content = result.content.map((block: any) => {
        if (block.type === 'text') return { type: 'text' as const, text: block.text ?? '' }
-        if (block.type === 'image') return { type: 'image' as const, data: block.data ?? '', mimeType: block.mimeType ?? 'image/png' }
+        if (block.type === 'image') {
+          return {
+            type: 'image' as const,
+            data: block.data ?? '',
+            mimeType: block.mimeType ?? 'image/png',
+          }
+        }
+        // Preserve unknown block types (resource, resource_link, audio, ...)
+        // by stringifying into a text block so clients see the payload.
        return { type: 'text' as const, text: JSON.stringify(block) }
      })
      return { content }
    } catch (err: unknown) {
+      // AbortError from a cancelled tool surfaces as a normal error — MCP
+      // clients interpret `isError: true` as a failed call, which is the
+      // correct behaviour for a cancelled request.
      const message = err instanceof Error ? err.message : String(err)
      return { isError: true, content: [{ type: 'text' as const, text: message }] }
    }
--- a/src/resources/extensions/claude-code-cli/stream-adapter.ts
+++ b/src/resources/extensions/claude-code-cli/stream-adapter.ts
@ -518,22 +518,83 @@ export function createClaudeCodeElicitationHandler(
 	};
 }

+/**
+ * Aborted by the caller's AbortSignal — distinct from exhaustion. GSD's
+ * agent loop keys off `stopReason === "aborted"` to treat this as a clean
+ * user cancel instead of a retry-eligible provider failure.
+ */
+export function makeAbortedMessage(model: string, lastTextContent: string): AssistantMessage {
+	const message: AssistantMessage = {
+		role: "assistant",
+		content: lastTextContent
+			? [{ type: "text", text: lastTextContent }]
+			: [{ type: "text", text: "Claude Code stream aborted by caller" }],
+		api: "anthropic-messages",
+		provider: "claude-code",
+		model,
+		usage: { ...ZERO_USAGE },
+		stopReason: "aborted",
+		timestamp: Date.now(),
+	};
+	return message;
+}
+
 // ---------------------------------------------------------------------------
 // SDK options builder
 // ---------------------------------------------------------------------------

+/**
+ * Resolve the Claude Code permission mode for the current run.
+ *
+ * - Auto-mode / headless runs bypass permissions so tool calls don't block
+ *   on prompts the user isn't watching.
+ * - Interactive runs default to `acceptEdits` so file/bash writes still
+ *   land quickly but the SDK retains a permission gate.
+ * - `GSD_CLAUDE_CODE_PERMISSION_MODE` forces a specific mode when set.
+ *
+ * Cross-extension coupling is kept minimal by dynamically importing
+ * `isAutoActive` and falling back to the bypass default if the import
+ * fails (e.g. in unit tests that load stream-adapter in isolation).
+ */
+export async function resolveClaudePermissionMode(
+	env: NodeJS.ProcessEnv = process.env,
+): Promise<"bypassPermissions" | "acceptEdits" | "default" | "plan"> {
+	const override = env.GSD_CLAUDE_CODE_PERMISSION_MODE?.trim();
+	if (override === "bypassPermissions" || override === "acceptEdits" || override === "default" || override === "plan") {
+		return override;
+	}
+
+	try {
+		const autoMod = (await import("../gsd/auto.js")) as { isAutoActive?: () => boolean };
+		if (typeof autoMod.isAutoActive === "function" && autoMod.isAutoActive()) {
+			return "bypassPermissions";
+		}
+		return "acceptEdits";
+	} catch {
+		// auto.ts unavailable (tests, non-GSD contexts) — stay permissive.
+		return "bypassPermissions";
+	}
+}
+
 /**
 * Build the options object passed to the Claude Agent SDK's `query()` call.
 *
 * Extracted for testability — callers can verify session persistence,
 * beta flags, and other configuration without mocking the full SDK.
+ *
+ * `permissionMode` / `allowDangerouslySkipPermissions` are resolved through
+ * {@link resolveClaudePermissionMode} so interactive runs don't silently
+ * bypass the SDK's permission gate. Callers that want the old always-bypass
+ * behaviour pass `permissionMode: "bypassPermissions"` explicitly.
 */
 export function buildSdkOptions(
 	modelId: string,
 	prompt: string,
+	overrides?: { permissionMode?: "bypassPermissions" | "acceptEdits" | "default" | "plan" },
 	extraOptions: Record<string, unknown> = {},
 ): Record<string, unknown> {
 	const mcpServers = buildWorkflowMcpServers();
+	const permissionMode = overrides?.permissionMode ?? "bypassPermissions";
 	const disallowedTools = ["AskUserQuestion"];
 	return {
 		pathToClaudeCodeExecutable: getClaudePath(),
@ -541,8 +602,8 @@ export function buildSdkOptions(
 		includePartialMessages: true,
 		persistSession: true,
 		cwd: process.cwd(),
-		permissionMode: "bypassPermissions",
-		allowDangerouslySkipPermissions: true,
+		permissionMode,
+		allowDangerouslySkipPermissions: permissionMode === "bypassPermissions",
 		settingSources: ["project"],
 		systemPrompt: { type: "preset", preset: "claude_code" },
 		disallowedTools,
@ -656,6 +717,29 @@ function attachExternalResultsToToolBlocks(
 	}
 }

+/**
+ * Merge tool-call blocks from the active partial-message builder into the
+ * running list of intermediate tool calls, preserving order and de-duping
+ * by tool-call id. Exposed for testing the F3 fix (final-turn tool calls
+ * dropped when `result` arrives without a preceding synthetic `user`).
+ */
+export function mergePendingToolCalls(
+	intermediate: AssistantMessage["content"],
+	pending: AssistantMessage["content"],
+): AssistantMessage["content"] {
+	const alreadyIncluded = new Set<string>();
+	for (const block of intermediate) {
+		if (block.type === "toolCall") alreadyIncluded.add(block.id);
+	}
+	for (const block of pending) {
+		if (block.type !== "toolCall") continue;
+		if (alreadyIncluded.has(block.id)) continue;
+		alreadyIncluded.add(block.id);
+		intermediate.push(block);
+	}
+	return intermediate;
+}
+
 // ---------------------------------------------------------------------------
 // streamSimple implementation
 // ---------------------------------------------------------------------------
@ -712,9 +796,11 @@ async function pumpSdkMessages(
 		}

 		const prompt = buildPromptFromContext(context);
+		const permissionMode = await resolveClaudePermissionMode();
 		const sdkOpts = buildSdkOptions(
 			modelId,
 			prompt,
+			{ permissionMode },
 			typeof (options as ClaudeCodeStreamOptions | undefined)?.extensionUIContext === "object"
 				? {
 						onElicitation: createClaudeCodeElicitationHandler(
@ -746,7 +832,17 @@ async function pumpSdkMessages(
 		stream.push({ type: "start", partial: initialPartial });

 		for await (const msg of queryResult as AsyncIterable<SDKMessage>) {
-			if (options?.signal?.aborted) break;
+			if (options?.signal?.aborted) {
+				// User-initiated cancel — emit an aborted error so the agent
+				// loop classifies this as a deliberate stop, not a transient
+				// provider failure that should be retried.
+				stream.push({
+					type: "error",
+					reason: "aborted",
+					error: makeAbortedMessage(modelId, lastTextContent),
+				});
+				return;
+			}

 			switch (msg.type) {
 				// -- Init --
@ -857,6 +953,16 @@ async function pumpSdkMessages(
 					// events for proper TUI rendering, followed by the text response.
 					const finalContent: AssistantMessage["content"] = [];

+					// If the final turn ended without a synthetic user message
+					// (e.g. stop_reason: "tool_use" followed directly by result,
+					// or a turn with text but no tool execution), the `builder`
+					// still holds toolCall blocks that were never pushed into
+					// `intermediateToolBlocks`. Fold them in here so they aren't
+					// dropped from the final AssistantMessage.
+					if (builder) {
+						mergePendingToolCalls(intermediateToolBlocks, builder.message.content);
+					}
+
 					// Add tool calls from intermediate turns first (renders above text)
 					attachExternalResultsToToolBlocks(intermediateToolBlocks, toolResultsById);
 					finalContent.push(...intermediateToolBlocks);
--- a/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts
+++ b/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts
@ -6,6 +6,9 @@ import { tmpdir } from "node:os";
 import {
 	makeStreamExhaustedErrorMessage,
 	getResultErrorMessage,
+	makeAbortedMessage,
+	mergePendingToolCalls,
+	resolveClaudePermissionMode,
 	buildPromptFromContext,
 	buildSdkOptions,
 	createClaudeCodeElicitationHandler,
@ -16,7 +19,7 @@ import {
 	parseClaudeLookupOutput,
 	roundResultToElicitationContent,
 } from "../stream-adapter.ts";
-import type { Context, Message } from "@gsd/pi-ai";
+import type { AssistantMessage, Context, Message } from "@gsd/pi-ai";
 import type { SDKUserMessage } from "../sdk-types.ts";

 // ---------------------------------------------------------------------------
@ -414,7 +417,7 @@ describe("stream-adapter — session persistence (#2859)", () => {
 			delete process.env.GSD_WORKFLOW_MCP_ARGS;
 			delete process.env.GSD_WORKFLOW_MCP_ENV;
 			delete process.env.GSD_WORKFLOW_MCP_CWD;
-			const options = buildSdkOptions("claude-sonnet-4-20250514", "test", { onElicitation });
+			const options = buildSdkOptions("claude-sonnet-4-20250514", "test", undefined, { onElicitation });
 			assert.equal(options.onElicitation, onElicitation);
 		} finally {
 			process.env.GSD_WORKFLOW_MCP_COMMAND = prev.GSD_WORKFLOW_MCP_COMMAND;
@ -680,6 +683,134 @@ describe("stream-adapter — MCP elicitation bridge", () => {
 	});
 });

+// ---------------------------------------------------------------------------
+// F2 — abort vs stream-exhausted classification
+// ---------------------------------------------------------------------------
+
+describe("stream-adapter — abort classification (F2)", () => {
+	test("makeAbortedMessage sets stopReason to 'aborted', not 'error'", () => {
+		const message = makeAbortedMessage("claude-sonnet-4-6", "");
+		assert.equal(message.stopReason, "aborted");
+		assert.equal(message.errorMessage, undefined);
+	});
+
+	test("makeAbortedMessage preserves last-seen text content", () => {
+		const message = makeAbortedMessage("claude-sonnet-4-6", "partial mid-stream text");
+		assert.deepEqual(message.content, [{ type: "text", text: "partial mid-stream text" }]);
+	});
+
+	test("aborted message is distinguishable from stream-exhausted error", () => {
+		const aborted = makeAbortedMessage("claude-sonnet-4-6", "");
+		const exhausted = makeStreamExhaustedErrorMessage("claude-sonnet-4-6", "");
+		assert.notEqual(aborted.stopReason, exhausted.stopReason);
+		assert.equal(exhausted.errorMessage, "stream_exhausted_without_result");
+	});
+});
+
+// ---------------------------------------------------------------------------
+// F3 — final-turn tool calls not dropped
+// ---------------------------------------------------------------------------
+
+describe("stream-adapter — final-turn tool-call merge (F3)", () => {
+	function toolCall(id: string, name = "bash"): AssistantMessage["content"][number] {
+		return { type: "toolCall", id, name, arguments: {} };
+	}
+
+	test("mergePendingToolCalls appends tool calls not already in intermediate", () => {
+		const intermediate: AssistantMessage["content"] = [toolCall("tool-1")];
+		const pending: AssistantMessage["content"] = [
+			toolCall("tool-2"),
+			{ type: "text", text: "trailing text" },
+		];
+		const merged = mergePendingToolCalls(intermediate, pending);
+		assert.equal(merged.length, 2);
+		assert.equal((merged[0] as any).id, "tool-1");
+		assert.equal((merged[1] as any).id, "tool-2");
+	});
+
+	test("mergePendingToolCalls is idempotent across duplicate ids", () => {
+		const intermediate: AssistantMessage["content"] = [toolCall("tool-1")];
+		const pending: AssistantMessage["content"] = [toolCall("tool-1"), toolCall("tool-2")];
+		const merged = mergePendingToolCalls(intermediate, pending);
+		assert.equal(merged.length, 2);
+		assert.deepEqual(
+			merged.map((b) => (b as any).id),
+			["tool-1", "tool-2"],
+		);
+	});
+
+	test("mergePendingToolCalls ignores non-toolCall blocks from pending", () => {
+		const intermediate: AssistantMessage["content"] = [];
+		const pending: AssistantMessage["content"] = [
+			{ type: "text", text: "hello" },
+			{ type: "thinking", thinking: "pondering" },
+			toolCall("tool-1"),
+		];
+		const merged = mergePendingToolCalls(intermediate, pending);
+		assert.equal(merged.length, 1);
+		assert.equal((merged[0] as any).id, "tool-1");
+	});
+});
+
+// ---------------------------------------------------------------------------
+// F10 — permission mode is configurable
+// ---------------------------------------------------------------------------
+
+describe("stream-adapter — permission mode (F10)", () => {
+	// Earlier tests in this file set GSD_WORKFLOW_MCP_* env vars and restore
+	// them by reassigning from `prev.*`. When `prev.*` was undefined, node
+	// coerces the assignment to the literal string "undefined", which then
+	// fails JSON.parse inside buildWorkflowMcpServers. Clear the relevant
+	// slots before each permission-mode test so buildSdkOptions doesn't throw.
+	function clearWorkflowMcpEnv(): void {
+		for (const key of [
+			"GSD_WORKFLOW_MCP_COMMAND",
+			"GSD_WORKFLOW_MCP_NAME",
+			"GSD_WORKFLOW_MCP_ARGS",
+			"GSD_WORKFLOW_MCP_ENV",
+			"GSD_WORKFLOW_MCP_CWD",
+		]) {
+			if (process.env[key] === undefined || process.env[key] === "undefined") {
+				delete process.env[key];
+			}
+		}
+	}
+
+	test("buildSdkOptions defaults to bypassPermissions for backwards compatibility", () => {
+		clearWorkflowMcpEnv();
+		const opts = buildSdkOptions("claude-sonnet-4-6", "test");
+		assert.equal(opts.permissionMode, "bypassPermissions");
+		assert.equal(opts.allowDangerouslySkipPermissions, true);
+	});
+
+	test("buildSdkOptions respects explicit acceptEdits override", () => {
+		clearWorkflowMcpEnv();
+		const opts = buildSdkOptions("claude-sonnet-4-6", "test", { permissionMode: "acceptEdits" });
+		assert.equal(opts.permissionMode, "acceptEdits");
+		assert.equal(
+			opts.allowDangerouslySkipPermissions,
+			false,
+			"allowDangerouslySkipPermissions must be false for non-bypass modes",
+		);
+	});
+
+	test("resolveClaudePermissionMode honours the GSD_CLAUDE_CODE_PERMISSION_MODE env override", async () => {
+		const env = { GSD_CLAUDE_CODE_PERMISSION_MODE: "acceptEdits" } as NodeJS.ProcessEnv;
+		const mode = await resolveClaudePermissionMode(env);
+		assert.equal(mode, "acceptEdits");
+	});
+
+	test("resolveClaudePermissionMode rejects unknown override values (fallback path)", async () => {
+		const env = { GSD_CLAUDE_CODE_PERMISSION_MODE: "nonsense" } as NodeJS.ProcessEnv;
+		const mode = await resolveClaudePermissionMode(env);
+		// Unknown override falls back to auto-detect → either bypass or acceptEdits
+		assert.ok(
+			mode === "bypassPermissions" || mode === "acceptEdits",
+			`expected bypass or acceptEdits, got ${mode}`,
+		);
+	});
+});
+
 describe("stream-adapter — Windows Claude path lookup (#3770)", () => {
 	test("getClaudeLookupCommand uses where on Windows", () => {
 		assert.equal(getClaudeLookupCommand("win32"), "where claude");
--- a/src/tests/mcp-createRequire.test.ts
+++ b/src/tests/mcp-createRequire.test.ts
@ -1,9 +1,17 @@
 /**
- * Regression test for #3914 — MCP server uses explicit .js SDK subpaths.
+ * Regression test for #3603 / #3914 — MCP server subpath imports.
 *
- * Extensionless wildcard exports for `server/stdio` and `types` do not resolve
- * reliably across current Node / SDK combinations. The runtime import strings
- * must include `.js`.
+ * @modelcontextprotocol/sdk's package.json exports map uses a wildcard
+ * `./*` → `./dist/cjs/*` with no `.js` suffix, so bare subpath specifiers
+ * like `@modelcontextprotocol/sdk/server/stdio` resolve to a file that
+ * doesn't exist. Historically the workaround used `createRequire` so the
+ * CJS resolver auto-appended `.js`; that no longer works with current
+ * Node + SDK versions (#3914).
+ *
+ * The reliable convention (used in packages/mcp-server/{server,cli}.ts)
+ * is to write the `.js` suffix explicitly on every subpath import. This
+ * test locks that convention in so regressions can't silently reintroduce
+ * the bare subpath form or the broken createRequire-based resolution.
 */

 import { describe, test } from 'node:test';
@ -17,19 +25,31 @@ const __dirname = dirname(__filename);

 const source = readFileSync(join(__dirname, '..', 'mcp-server.ts'), 'utf-8');

-describe('MCP server SDK subpath imports (#3914)', () => {
-  test('server/stdio import uses explicit .js subpath', () => {
-    assert.match(source, /await import\(`\$\{MCP_PKG\}\/server\/stdio\.js`\)/,
-      'server/stdio import should include the .js suffix');
+describe('MCP server SDK subpath imports (#3603 / #3914)', () => {
+  test('server/index.js subpath is imported with explicit .js suffix', () => {
+    assert.match(source, /await import\(`\$\{MCP_PKG\}\/server\/index\.js`\)/,
+      'server import must use `${MCP_PKG}/server/index.js` to satisfy the wildcard export map');
  });

-  test('types import uses explicit .js subpath', () => {
+  test('server/stdio.js subpath is imported with explicit .js suffix', () => {
+    assert.match(source, /await import\(`\$\{MCP_PKG\}\/server\/stdio\.js`\)/,
+      'stdio import must use `${MCP_PKG}/server/stdio.js`');
+  });
+
+  test('types.js subpath is imported with explicit .js suffix', () => {
    assert.match(source, /await import\(`\$\{MCP_PKG\}\/types\.js`\)/,
-      'types import should include the .js suffix');
+      'types import must use `${MCP_PKG}/types.js`');
  });

  test('legacy createRequire-based resolution is gone', () => {
-    assert.doesNotMatch(source, /createRequire|_require\.resolve/,
-      'legacy createRequire-based subpath resolution should not remain');
+    // Only flag actual code, not the comment that explains the history.
+    // The import statement, variable declaration, and `_require.resolve(` call
+    // sites are the real regression surfaces.
+    assert.doesNotMatch(source, /^\s*import\s*\{\s*createRequire\s*\}\s*from/m,
+      'createRequire should not be imported from node:module');
+    assert.doesNotMatch(source, /^\s*const\s+_require\s*=\s*createRequire/m,
+      '_require helper should not be created');
+    assert.doesNotMatch(source, /_require\.resolve\(/,
+      '_require.resolve should not be used for subpath resolution');
  });
 });