Merge pull request #2748 from gsd-build/fix/2743-web-search-duplicate-rendering

fix: Remove premature pendingTools.delete causing web_search duplicate rendering
2026-03-26 16:08:39 -06:00 · 2026-03-26 16:08:39 -06:00 · 41dda26b9a
commit 41dda26b9a
parent c557aea8de ef310574da
14 changed files with 1765 additions and 39 deletions
--- a/packages/pi-coding-agent/src/cli/args.ts
+++ b/packages/pi-coding-agent/src/cli/args.ts
@ -49,6 +49,8 @@ export interface Args {
 	fileArgs: string[];
 	/** Unknown flags (potentially extension flags) - map of flag name to value */
 	unknownFlags: Map<string, boolean | string>;
+	/** --bare: suppress CLAUDE.md/AGENTS.md, user skills, prompt templates, themes, project preferences */
+	bare?: boolean;
 }

 const VALID_THINKING_LEVELS = ["off", "minimal", "low", "medium", "high", "xhigh"] as const;
@ -169,6 +171,8 @@ export function parseArgs(args: string[], extensionFlags?: Map<string, { type: "
 			}
 		} else if (arg === "--verbose") {
 			result.verbose = true;
+		} else if (arg === "--bare") {
+			result.bare = true;
 		} else if (arg === "--offline") {
 			result.offline = true;
 		} else if (arg.startsWith("@")) {
--- a/packages/pi-coding-agent/src/index.ts
+++ b/packages/pi-coding-agent/src/index.ts
@ -314,8 +314,11 @@ export {
 	type RpcClientOptions,
 	type RpcEventListener,
 	type RpcCommand,
+	type RpcInitResult,
+	type RpcProtocolVersion,
 	type RpcResponse,
 	type RpcSessionState,
+	type RpcV2Event,
 } from "./modes/index.js";
 // RPC JSONL utilities
 export { attachJsonlLineReader, serializeJsonLine } from "./modes/rpc/jsonl.js";
--- a/packages/pi-coding-agent/src/main.ts
+++ b/packages/pi-coding-agent/src/main.ts
@ -419,11 +419,13 @@ export async function main(args: string[]) {
 		additionalPromptTemplatePaths: firstPass.promptTemplates,
 		additionalThemePaths: firstPass.themes,
 		noExtensions: firstPass.noExtensions,
-		noSkills: firstPass.noSkills,
-		noPromptTemplates: firstPass.noPromptTemplates,
-		noThemes: firstPass.noThemes,
+		noSkills: firstPass.noSkills || firstPass.bare,
+		noPromptTemplates: firstPass.noPromptTemplates || firstPass.bare,
+		noThemes: firstPass.noThemes || firstPass.bare,
 		systemPrompt: firstPass.systemPrompt,
 		appendSystemPrompt: firstPass.appendSystemPrompt,
+		// --bare: suppress CLAUDE.md/AGENTS.md ancestor walk
+		...(firstPass.bare ? { agentsFilesOverride: () => ({ agentsFiles: [] }) } : {}),
 	});
 	await resourceLoader.reload();
 	time("resourceLoader.reload");
--- a/packages/pi-coding-agent/src/modes/index.ts
+++ b/packages/pi-coding-agent/src/modes/index.ts
@ -6,4 +6,11 @@ export { InteractiveMode, type InteractiveModeOptions } from "./interactive/inte
 export { type PrintModeOptions, runPrintMode } from "./print-mode.js";
 export { type ModelInfo, RpcClient, type RpcClientOptions, type RpcEventListener } from "./rpc/rpc-client.js";
 export { runRpcMode } from "./rpc/rpc-mode.js";
-export type { RpcCommand, RpcResponse, RpcSessionState } from "./rpc/rpc-types.js";
+export type {
+	RpcCommand,
+	RpcInitResult,
+	RpcProtocolVersion,
+	RpcResponse,
+	RpcSessionState,
+	RpcV2Event,
+} from "./rpc/rpc-types.js";
--- a/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts
+++ b/packages/pi-coding-agent/src/modes/interactive/controllers/chat-controller.ts
@ -150,7 +150,6 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & {
 									content: [{ type: "text", text: "Web search disabled (offline mode)" }],
 									isError: false,
 								});
-								host.pendingTools.delete(content.toolUseId);
 							} else {
 								const searchContent = content.content;
 								const isError = searchContent && typeof searchContent === "object" && "type" in (searchContent as any) && (searchContent as any).type === "web_search_tool_result_error";
@ -158,7 +157,6 @@ export async function handleAgentEvent(host: InteractiveModeStateHost & {
 									content: [{ type: "text", text: host.formatWebSearchResult(searchContent) }],
 									isError: !!isError,
 								});
-								host.pendingTools.delete(content.toolUseId);
 							}
 						}
 					}
--- a/packages/pi-coding-agent/src/modes/rpc/rpc-client.ts
+++ b/packages/pi-coding-agent/src/modes/rpc/rpc-client.ts
@ -11,7 +11,7 @@ import type { SessionStats } from "../../core/agent-session.js";
 import type { BashResult } from "../../core/bash-executor.js";
 import type { CompactionResult } from "../../core/compaction/index.js";
 import { attachJsonlLineReader, serializeJsonLine } from "./jsonl.js";
-import type { RpcCommand, RpcResponse, RpcSessionState, RpcSlashCommand } from "./rpc-types.js";
+import type { RpcCommand, RpcInitResult, RpcResponse, RpcSessionState, RpcSlashCommand } from "./rpc-types.js";

 // ============================================================================
 // Types
@ -398,6 +398,59 @@ export class RpcClient {
 		return this.getData<{ commands: RpcSlashCommand[] }>(response).commands;
 	}

+	/**
+	 * Send a UI response to a pending extension_ui_request.
+	 * Fire-and-forget — no request/response correlation.
+	 */
+	sendUIResponse(id: string, response: { value?: string; values?: string[]; confirmed?: boolean; cancelled?: boolean }): void {
+		if (!this.process?.stdin) {
+			throw new Error("Client not started");
+		}
+		this.process.stdin.write(serializeJsonLine({
+			type: "extension_ui_response",
+			id,
+			...response,
+		}));
+	}
+
+	/**
+	 * Initialize a v2 protocol session. Must be sent as the first command.
+	 * Returns the negotiated protocol version, session ID, and server capabilities.
+	 */
+	async init(options?: { clientId?: string }): Promise<RpcInitResult> {
+		const response = await this.send({ type: "init", protocolVersion: 2, clientId: options?.clientId });
+		return this.getData<RpcInitResult>(response);
+	}
+
+	/**
+	 * Request a graceful shutdown of the agent process.
+	 * Waits for the response before the process exits.
+	 */
+	async shutdown(): Promise<void> {
+		await this.send({ type: "shutdown" });
+		// Wait for process to exit after shutdown acknowledgment
+		if (this.process) {
+			await new Promise<void>((resolve) => {
+				const timeout = setTimeout(() => {
+					this.process?.kill("SIGKILL");
+					resolve();
+				}, 5000);
+				this.process?.on("exit", () => {
+					clearTimeout(timeout);
+					resolve();
+				});
+			});
+		}
+	}
+
+	/**
+	 * Subscribe to specific event types (v2 only).
+	 * Pass ["*"] to receive all events, or a list of event type strings to filter.
+	 */
+	async subscribe(events: string[]): Promise<void> {
+		await this.send({ type: "subscribe", events });
+	}
+
 	// =========================================================================
 	// Helpers
 	// =========================================================================
--- a/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts
+++ b/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts
@ -27,6 +27,7 @@ import type {
 	RpcCommand,
 	RpcExtensionUIRequest,
 	RpcExtensionUIResponse,
+	RpcInitResult,
 	RpcResponse,
 	RpcSessionState,
 	RpcSlashCommand,
@ -37,8 +38,11 @@ export type {
 	RpcCommand,
 	RpcExtensionUIRequest,
 	RpcExtensionUIResponse,
+	RpcInitResult,
+	RpcProtocolVersion,
 	RpcResponse,
 	RpcSessionState,
+	RpcV2Event,
 } from "./rpc-types.js";

 /**
@ -74,6 +78,16 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
 	// Shutdown request flag
 	let shutdownRequested = false;

+	// v2 protocol version detection state
+	let protocolVersion: 1 | 2 = 1;
+	let protocolLocked = false;
+
+	// v2 runId threading: tracks the current execution run
+	let currentRunId: string | null = null;
+
+	// v2 event filtering: null = no filter (all events); Set = only listed event types
+	let eventFilter: Set<string> | null = null;
+
 	const embeddedTerminalEnabled = process.env.GSD_WEB_BRIDGE_TUI === "1";
 	const remoteTerminal = embeddedTerminalEnabled
 		? new RemoteTerminal({
@ -425,7 +439,55 @@ export async function runRpcMode(session: AgentSession): Promise<never> {

 	// Output all agent events as JSON
 	const unsubscribe = session.subscribe((event) => {
-		output(event);
+		// v2: emit synthesized events before the regular event
+		if (protocolVersion === 2) {
+			// cost_update on assistant message_end
+			if (event.type === "message_end" && event.message.role === "assistant" && currentRunId) {
+				const stats = session.getSessionStats();
+				const costUpdate = {
+					type: "cost_update" as const,
+					runId: currentRunId,
+					turnCost: session.getLastTurnCost(),
+					cumulativeCost: stats.cost,
+					tokens: {
+						input: stats.tokens.input,
+						output: stats.tokens.output,
+						cacheRead: stats.tokens.cacheRead,
+						cacheWrite: stats.tokens.cacheWrite,
+					},
+				};
+				if (!eventFilter || eventFilter.has("cost_update")) {
+					output(costUpdate);
+				}
+			}
+
+			// execution_complete on agent_end
+			if (event.type === "agent_end" && currentRunId) {
+				const stats = session.getSessionStats();
+				const completionEvent = {
+					type: "execution_complete" as const,
+					runId: currentRunId,
+					status: "completed" as const,
+					stats,
+				};
+				if (!eventFilter || eventFilter.has("execution_complete")) {
+					output(completionEvent);
+				}
+				currentRunId = null;
+			}
+		}
+
+		// Apply event filter (v2 only, applies to agent session events only)
+		if (protocolVersion === 2 && eventFilter && !eventFilter.has(event.type)) {
+			return;
+		}
+
+		// Emit the regular event, with runId injection in v2 mode
+		if (protocolVersion === 2 && currentRunId) {
+			output({ ...event, runId: currentRunId });
+		} else {
+			output(event);
+		}
 	});

 	// Handle a single command
@ -438,6 +500,9 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
 			// =================================================================

 			case "prompt": {
+				// v2: generate runId for execution tracking
+				const runId = protocolVersion === 2 ? crypto.randomUUID() : undefined;
+				if (runId) currentRunId = runId;
 				// Don't await - events will stream
 				// Extension commands are executed immediately, file prompt templates are expanded
 				// If streaming and streamingBehavior specified, queues via steer/followUp
@ -448,17 +513,23 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
 						source: "rpc",
 					})
 					.catch((e) => output(error(id, "prompt", e.message)));
-				return success(id, "prompt");
+				return { id, type: "response", command: "prompt", success: true, ...(runId && { runId }) } as RpcResponse;
 			}

 			case "steer": {
+				// v2: generate runId for execution tracking
+				const runId = protocolVersion === 2 ? crypto.randomUUID() : undefined;
+				if (runId) currentRunId = runId;
 				await session.steer(command.message, command.images);
-				return success(id, "steer");
+				return { id, type: "response", command: "steer", success: true, ...(runId && { runId }) } as RpcResponse;
 			}

 			case "follow_up": {
+				// v2: generate runId for execution tracking
+				const runId = protocolVersion === 2 ? crypto.randomUUID() : undefined;
+				if (runId) currentRunId = runId;
 				await session.followUp(command.message, command.images);
-				return success(id, "follow_up");
+				return { id, type: "response", command: "follow_up", success: true, ...(runId && { runId }) } as RpcResponse;
 			}

 			case "abort": {
@ -709,6 +780,28 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
 				return success(id, "terminal_redraw");
 			}

+			// =================================================================
+			// v2 Protocol: subscribe
+			// =================================================================
+
+			case "subscribe": {
+				if (command.events.includes("*")) {
+					eventFilter = null; // wildcard = all events
+				} else {
+					eventFilter = new Set(command.events);
+				}
+				return success(id, "subscribe");
+			}
+
+			// =================================================================
+			// v2 Protocol: shutdown
+			// =================================================================
+
+			case "shutdown": {
+				shutdownRequested = true;
+				return success(id, "shutdown");
+			}
+
 			default: {
 				const unknownCommand = command as { type: string; id?: string };
 				return error(unknownCommand.id, unknownCommand.type, `Unknown command: ${unknownCommand.type}`);
@ -741,7 +834,7 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
 		try {
 			const parsed = JSON.parse(line);

-			// Handle extension UI responses
+			// Handle extension UI responses (bypass protocol detection)
 			if (parsed.type === "extension_ui_response") {
 				const response = parsed as RpcExtensionUIResponse;
 				const pending = pendingExtensionRequests.get(response.id);
@ -752,8 +845,33 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
 				return;
 			}

-			// Handle regular commands
 			const command = parsed as RpcCommand;
+
+			// Protocol version detection: first non-UI-response command locks the version
+			if (!protocolLocked) {
+				protocolLocked = true;
+				if (command.type === "init") {
+					protocolVersion = 2;
+					const initResult: RpcInitResult = {
+						protocolVersion: 2,
+						sessionId: session.sessionId,
+						capabilities: {
+							events: ["execution_complete", "cost_update"],
+							commands: ["init", "shutdown", "subscribe"],
+						},
+					};
+					output(success(command.id, "init", initResult));
+					return;
+				}
+				// Non-init first message: lock to v1, fall through to normal handling
+				protocolVersion = 1;
+			} else if (command.type === "init") {
+				// Already locked — reject re-init
+				output(error(command.id, "init", "Protocol version already locked. init must be the first command."));
+				return;
+			}
+
+			// Handle regular commands
 			const response = await handleCommand(command);
 			output(response);

--- a/packages/pi-coding-agent/src/modes/rpc/rpc-protocol-v2.test.ts
+++ b/packages/pi-coding-agent/src/modes/rpc/rpc-protocol-v2.test.ts
@ -0,0 +1,971 @@
+/**
+ * RPC Protocol v2 test suite.
+ *
+ * Tests v1 backward compatibility, v2 init handshake, protocol locking,
+ * v2 feature type shapes, and RpcClient command serialization against
+ * mock child processes using PassThrough streams.
+ */
+
+import { describe, it, beforeEach, afterEach, mock } from "node:test";
+import assert from "node:assert/strict";
+import { PassThrough } from "node:stream";
+import { attachJsonlLineReader, serializeJsonLine } from "./jsonl.js";
+import type {
+	RpcCommand,
+	RpcResponse,
+	RpcInitResult,
+	RpcExecutionCompleteEvent,
+	RpcCostUpdateEvent,
+	RpcV2Event,
+	RpcProtocolVersion,
+	RpcSessionState,
+} from "./rpc-types.js";
+
+// ============================================================================
+// Helpers
+// ============================================================================
+
+/** Collect JSONL output lines from a stream */
+function collectLines(stream: PassThrough): { lines: unknown[]; detach: () => void } {
+	const lines: unknown[] = [];
+	const detach = attachJsonlLineReader(stream, (line) => {
+		try {
+			lines.push(JSON.parse(line));
+		} catch {
+			// skip non-JSON lines
+		}
+	});
+	return { lines, detach };
+}
+
+/** Write a command as JSONL to a writable stream and wait for drain */
+function writeLine(stream: PassThrough, obj: unknown): void {
+	stream.write(serializeJsonLine(obj));
+}
+
+/**
+ * Create a mock "child process" with piped stdin/stdout.
+ * clientStdin  → data flows into the "server" (from the client's perspective, this is what the client writes to)
+ * clientStdout ← data flows out of the "server" (from the client's perspective, this is what the client reads from)
+ *
+ * The test acts as the "server": read from clientStdin, write to clientStdout.
+ */
+function createMockProcess() {
+	// Client writes to this → server reads from it
+	const clientStdin = new PassThrough();
+	// Server writes to this → client reads from it
+	const clientStdout = new PassThrough();
+
+	return { clientStdin, clientStdout };
+}
+
+/** Wait a tick for async handlers to process */
+function tick(ms = 10): Promise<void> {
+	return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+// ============================================================================
+// JSONL utilities
+// ============================================================================
+
+describe("JSONL utilities", () => {
+	it("serializeJsonLine produces newline-terminated JSON", () => {
+		const result = serializeJsonLine({ type: "test", value: 42 });
+		assert.equal(result, '{"type":"test","value":42}\n');
+	});
+
+	it("serializeJsonLine handles nested objects", () => {
+		const result = serializeJsonLine({ a: { b: [1, 2, 3] } });
+		assert.ok(result.endsWith("\n"));
+		const parsed = JSON.parse(result.trim());
+		assert.deepEqual(parsed, { a: { b: [1, 2, 3] } });
+	});
+
+	it("attachJsonlLineReader splits on LF only", async () => {
+		const stream = new PassThrough();
+		const { lines, detach } = collectLines(stream);
+
+		stream.write('{"a":1}\n{"b":2}\n');
+		await tick();
+
+		assert.equal(lines.length, 2);
+		assert.deepEqual(lines[0], { a: 1 });
+		assert.deepEqual(lines[1], { b: 2 });
+		detach();
+	});
+
+	it("attachJsonlLineReader handles partial writes", async () => {
+		const stream = new PassThrough();
+		const { lines, detach } = collectLines(stream);
+
+		stream.write('{"partial":');
+		await tick();
+		assert.equal(lines.length, 0);
+
+		stream.write('"value"}\n');
+		await tick();
+		assert.equal(lines.length, 1);
+		assert.deepEqual(lines[0], { partial: "value" });
+		detach();
+	});
+
+	it("attachJsonlLineReader handles CR+LF", async () => {
+		const stream = new PassThrough();
+		const { lines, detach } = collectLines(stream);
+
+		stream.write('{"cr":"lf"}\r\n');
+		await tick();
+		assert.equal(lines.length, 1);
+		assert.deepEqual(lines[0], { cr: "lf" });
+		detach();
+	});
+
+	it("detach stops line delivery", async () => {
+		const stream = new PassThrough();
+		const { lines, detach } = collectLines(stream);
+
+		stream.write('{"before":1}\n');
+		await tick();
+		assert.equal(lines.length, 1);
+
+		detach();
+
+		stream.write('{"after":2}\n');
+		await tick();
+		// Should still be 1 since we detached
+		assert.equal(lines.length, 1);
+	});
+});
+
+// ============================================================================
+// v2 type shape assertions
+// ============================================================================
+
+describe("v2 type shapes", () => {
+	it("RpcInitResult has required fields", () => {
+		const initResult: RpcInitResult = {
+			protocolVersion: 2,
+			sessionId: "test-session-123",
+			capabilities: {
+				events: ["execution_complete", "cost_update"],
+				commands: ["init", "shutdown", "subscribe"],
+			},
+		};
+		assert.equal(initResult.protocolVersion, 2);
+		assert.ok(typeof initResult.sessionId === "string");
+		assert.ok(Array.isArray(initResult.capabilities.events));
+		assert.ok(Array.isArray(initResult.capabilities.commands));
+		assert.ok(initResult.capabilities.events.includes("execution_complete"));
+		assert.ok(initResult.capabilities.events.includes("cost_update"));
+		assert.ok(initResult.capabilities.commands.includes("init"));
+		assert.ok(initResult.capabilities.commands.includes("shutdown"));
+		assert.ok(initResult.capabilities.commands.includes("subscribe"));
+	});
+
+	it("RpcExecutionCompleteEvent matches expected shape", () => {
+		const event: RpcExecutionCompleteEvent = {
+			type: "execution_complete",
+			runId: "run-abc-123",
+			status: "completed",
+			stats: {
+				cost: 0.05,
+				turns: 3,
+				duration: 12000,
+				tokens: { input: 1000, output: 500, cacheRead: 200, cacheWrite: 100 },
+			} as any, // SessionStats is complex, we just verify shape
+		};
+		assert.equal(event.type, "execution_complete");
+		assert.ok(typeof event.runId === "string");
+		assert.ok(["completed", "error", "cancelled"].includes(event.status));
+		assert.ok(event.stats !== undefined);
+	});
+
+	it("RpcExecutionCompleteEvent supports error status with reason", () => {
+		const event: RpcExecutionCompleteEvent = {
+			type: "execution_complete",
+			runId: "run-err-456",
+			status: "error",
+			reason: "API rate limit exceeded",
+			stats: {} as any,
+		};
+		assert.equal(event.status, "error");
+		assert.equal(event.reason, "API rate limit exceeded");
+	});
+
+	it("RpcCostUpdateEvent matches expected shape", () => {
+		const event: RpcCostUpdateEvent = {
+			type: "cost_update",
+			runId: "run-cost-789",
+			turnCost: 0.01,
+			cumulativeCost: 0.05,
+			tokens: {
+				input: 500,
+				output: 200,
+				cacheRead: 100,
+				cacheWrite: 50,
+			},
+		};
+		assert.equal(event.type, "cost_update");
+		assert.ok(typeof event.runId === "string");
+		assert.ok(typeof event.turnCost === "number");
+		assert.ok(typeof event.cumulativeCost === "number");
+		assert.ok(typeof event.tokens.input === "number");
+		assert.ok(typeof event.tokens.output === "number");
+		assert.ok(typeof event.tokens.cacheRead === "number");
+		assert.ok(typeof event.tokens.cacheWrite === "number");
+	});
+
+	it("RpcV2Event discriminated union resolves by type field", () => {
+		const events: RpcV2Event[] = [
+			{
+				type: "execution_complete",
+				runId: "r1",
+				status: "completed",
+				stats: {} as any,
+			},
+			{
+				type: "cost_update",
+				runId: "r2",
+				turnCost: 0.01,
+				cumulativeCost: 0.03,
+				tokens: { input: 100, output: 50, cacheRead: 10, cacheWrite: 5 },
+			},
+		];
+
+		for (const event of events) {
+			if (event.type === "execution_complete") {
+				// TypeScript narrows to RpcExecutionCompleteEvent
+				assert.ok("status" in event);
+				assert.ok("stats" in event);
+			} else if (event.type === "cost_update") {
+				// TypeScript narrows to RpcCostUpdateEvent
+				assert.ok("turnCost" in event);
+				assert.ok("tokens" in event);
+			} else {
+				assert.fail(`Unexpected event type: ${(event as any).type}`);
+			}
+		}
+	});
+
+	it("RpcProtocolVersion is 1 or 2", () => {
+		const v1: RpcProtocolVersion = 1;
+		const v2: RpcProtocolVersion = 2;
+		assert.equal(v1, 1);
+		assert.equal(v2, 2);
+	});
+
+	it("v2 prompt response includes optional runId field", () => {
+		const v1Response: RpcResponse = {
+			id: "1",
+			type: "response",
+			command: "prompt",
+			success: true,
+		};
+		assert.equal(v1Response.success, true);
+		assert.equal((v1Response as any).runId, undefined);
+
+		const v2Response: RpcResponse = {
+			id: "2",
+			type: "response",
+			command: "prompt",
+			success: true,
+			runId: "run-123",
+		};
+		assert.equal(v2Response.success, true);
+		assert.equal((v2Response as any).runId, "run-123");
+	});
+
+	it("v2 command types are present in RpcCommand union", () => {
+		// These compile — that's the actual test. Runtime verification:
+		const initCmd: RpcCommand = { type: "init", protocolVersion: 2 };
+		const shutdownCmd: RpcCommand = { type: "shutdown" };
+		const subscribeCmd: RpcCommand = { type: "subscribe", events: ["agent_end"] };
+
+		assert.equal(initCmd.type, "init");
+		assert.equal(shutdownCmd.type, "shutdown");
+		assert.equal(subscribeCmd.type, "subscribe");
+	});
+
+	it("init command supports optional clientId", () => {
+		const cmd: RpcCommand = { type: "init", protocolVersion: 2, clientId: "my-client" };
+		assert.equal(cmd.type, "init");
+		if (cmd.type === "init") {
+			assert.equal(cmd.clientId, "my-client");
+		}
+	});
+
+	it("shutdown command supports optional graceful flag", () => {
+		const cmd: RpcCommand = { type: "shutdown", graceful: true };
+		if (cmd.type === "shutdown") {
+			assert.equal(cmd.graceful, true);
+		}
+	});
+
+	it("v2 response types include init, shutdown, subscribe", () => {
+		const initResp: RpcResponse = {
+			type: "response",
+			command: "init",
+			success: true,
+			data: {
+				protocolVersion: 2,
+				sessionId: "s1",
+				capabilities: { events: [], commands: [] },
+			},
+		};
+		const shutdownResp: RpcResponse = {
+			type: "response",
+			command: "shutdown",
+			success: true,
+		};
+		const subscribeResp: RpcResponse = {
+			type: "response",
+			command: "subscribe",
+			success: true,
+		};
+
+		assert.equal(initResp.command, "init");
+		assert.equal(shutdownResp.command, "shutdown");
+		assert.equal(subscribeResp.command, "subscribe");
+	});
+});
+
+// ============================================================================
+// v1 backward compatibility
+// ============================================================================
+
+describe("v1 backward compatibility — command shapes", () => {
+	it("v1 prompt command has no protocolVersion or runId", () => {
+		const cmd: RpcCommand = { type: "prompt", message: "hello" };
+		assert.equal(cmd.type, "prompt");
+		assert.equal((cmd as any).protocolVersion, undefined);
+		assert.equal((cmd as any).runId, undefined);
+	});
+
+	it("v1 get_state response has no v2 fields", () => {
+		const state: RpcSessionState = {
+			thinkingLevel: "medium",
+			isStreaming: false,
+			isCompacting: false,
+			steeringMode: "all",
+			followUpMode: "all",
+			sessionId: "test-id",
+			autoCompactionEnabled: true,
+			autoRetryEnabled: false,
+			retryInProgress: false,
+			retryAttempt: 0,
+			messageCount: 0,
+			pendingMessageCount: 0,
+			extensionsReady: true,
+		};
+		// v1 state should not include any v2-specific fields
+		assert.equal((state as any).protocolVersion, undefined);
+		assert.equal((state as any).runId, undefined);
+	});
+
+	it("v1 prompt response has no runId", () => {
+		const resp: RpcResponse = {
+			id: "1",
+			type: "response",
+			command: "prompt",
+			success: true,
+		};
+		assert.equal(resp.success, true);
+		// runId is optional; in v1 mode it won't be present
+		assert.equal((resp as any).runId, undefined);
+	});
+
+	it("error response shape is consistent across v1 and v2", () => {
+		const errResp: RpcResponse = {
+			id: "err-1",
+			type: "response",
+			command: "init",
+			success: false,
+			error: "Protocol version already locked. init must be the first command.",
+		};
+		assert.equal(errResp.success, false);
+		if (!errResp.success) {
+			assert.ok(typeof errResp.error === "string");
+			assert.ok(errResp.error.length > 0);
+		}
+	});
+});
+
+// ============================================================================
+// RpcClient command serialization tests (mock process)
+// ============================================================================
+
+describe("RpcClient command serialization", () => {
+	// We import the class dynamically to avoid the full module graph at test time.
+	// Instead we test the protocol framing directly — what gets written to stdin and
+	// what comes back from stdout — using PassThrough streams.
+
+	it("init command serializes correctly", () => {
+		const cmd = { id: "req_1", type: "init", protocolVersion: 2 };
+		const serialized = serializeJsonLine(cmd);
+		const parsed = JSON.parse(serialized);
+		assert.equal(parsed.type, "init");
+		assert.equal(parsed.protocolVersion, 2);
+		assert.equal(parsed.id, "req_1");
+	});
+
+	it("init command with clientId serializes correctly", () => {
+		const cmd = { id: "req_1", type: "init", protocolVersion: 2, clientId: "test-client" };
+		const serialized = serializeJsonLine(cmd);
+		const parsed = JSON.parse(serialized);
+		assert.equal(parsed.clientId, "test-client");
+	});
+
+	it("shutdown command serializes correctly", () => {
+		const cmd = { id: "req_2", type: "shutdown" };
+		const serialized = serializeJsonLine(cmd);
+		const parsed = JSON.parse(serialized);
+		assert.equal(parsed.type, "shutdown");
+		assert.equal(parsed.id, "req_2");
+	});
+
+	it("subscribe command serializes correctly with event list", () => {
+		const cmd = { id: "req_3", type: "subscribe", events: ["agent_end", "cost_update"] };
+		const serialized = serializeJsonLine(cmd);
+		const parsed = JSON.parse(serialized);
+		assert.equal(parsed.type, "subscribe");
+		assert.deepEqual(parsed.events, ["agent_end", "cost_update"]);
+	});
+
+	it("subscribe command with wildcard serializes correctly", () => {
+		const cmd = { id: "req_4", type: "subscribe", events: ["*"] };
+		const serialized = serializeJsonLine(cmd);
+		const parsed = JSON.parse(serialized);
+		assert.deepEqual(parsed.events, ["*"]);
+	});
+
+	it("subscribe command with empty array serializes correctly", () => {
+		const cmd = { id: "req_5", type: "subscribe", events: [] as string[] };
+		const serialized = serializeJsonLine(cmd);
+		const parsed = JSON.parse(serialized);
+		assert.deepEqual(parsed.events, []);
+	});
+
+	it("sendUIResponse serializes correct JSONL", () => {
+		const response = {
+			type: "extension_ui_response",
+			id: "ui-req-123",
+			value: "test-value",
+		};
+		const serialized = serializeJsonLine(response);
+		const parsed = JSON.parse(serialized);
+		assert.equal(parsed.type, "extension_ui_response");
+		assert.equal(parsed.id, "ui-req-123");
+		assert.equal(parsed.value, "test-value");
+	});
+
+	it("sendUIResponse with cancelled flag serializes correctly", () => {
+		const response = {
+			type: "extension_ui_response",
+			id: "ui-req-456",
+			cancelled: true,
+		};
+		const serialized = serializeJsonLine(response);
+		const parsed = JSON.parse(serialized);
+		assert.equal(parsed.type, "extension_ui_response");
+		assert.equal(parsed.cancelled, true);
+	});
+
+	it("sendUIResponse with confirmed flag serializes correctly", () => {
+		const response = {
+			type: "extension_ui_response",
+			id: "ui-req-789",
+			confirmed: true,
+		};
+		const serialized = serializeJsonLine(response);
+		const parsed = JSON.parse(serialized);
+		assert.equal(parsed.confirmed, true);
+	});
+
+	it("sendUIResponse with multiple values serializes correctly", () => {
+		const response = {
+			type: "extension_ui_response",
+			id: "ui-req-multi",
+			values: ["opt-a", "opt-b"],
+		};
+		const serialized = serializeJsonLine(response);
+		const parsed = JSON.parse(serialized);
+		assert.deepEqual(parsed.values, ["opt-a", "opt-b"]);
+	});
+
+	it("prompt command with runId in v2 response", () => {
+		const response = {
+			id: "req_10",
+			type: "response",
+			command: "prompt",
+			success: true,
+			runId: "run-uuid-abc",
+		};
+		const serialized = serializeJsonLine(response);
+		const parsed = JSON.parse(serialized);
+		assert.equal(parsed.runId, "run-uuid-abc");
+		assert.equal(parsed.command, "prompt");
+		assert.equal(parsed.success, true);
+	});
+});
+
+// ============================================================================
+// Client ↔ Mock server integration (PassThrough streams)
+// ============================================================================
+
+describe("Client ↔ Mock server protocol exchange", () => {
+	let clientStdin: PassThrough;
+	let clientStdout: PassThrough;
+
+	beforeEach(() => {
+		const mockProc = createMockProcess();
+		clientStdin = mockProc.clientStdin;
+		clientStdout = mockProc.clientStdout;
+	});
+
+	afterEach(() => {
+		clientStdin.destroy();
+		clientStdout.destroy();
+	});
+
+	it("init handshake: client writes init, server responds with init_result", async () => {
+		// Collect what the client would write
+		const { lines: clientWrites, detach: detachStdin } = collectLines(clientStdin);
+
+		// Client sends init command
+		writeLine(clientStdin, { id: "req_1", type: "init", protocolVersion: 2 });
+		await tick();
+
+		assert.equal(clientWrites.length, 1);
+		const initCmd = clientWrites[0] as any;
+		assert.equal(initCmd.type, "init");
+		assert.equal(initCmd.protocolVersion, 2);
+
+		// Server responds with init_result
+		const initResult: RpcInitResult = {
+			protocolVersion: 2,
+			sessionId: "sess-abc",
+			capabilities: {
+				events: ["execution_complete", "cost_update"],
+				commands: ["init", "shutdown", "subscribe"],
+			},
+		};
+		writeLine(clientStdout, {
+			id: "req_1",
+			type: "response",
+			command: "init",
+			success: true,
+			data: initResult,
+		});
+
+		// Collect server response
+		const { lines: serverResponses, detach: detachStdout } = collectLines(clientStdout);
+		// Already wrote above, but let's verify the shape by re-writing
+		writeLine(clientStdout, {
+			id: "req_verify",
+			type: "response",
+			command: "init",
+			success: true,
+			data: initResult,
+		});
+		await tick();
+
+		const resp = serverResponses[0] as any;
+		assert.equal(resp.type, "response");
+		assert.equal(resp.command, "init");
+		assert.equal(resp.success, true);
+		assert.equal(resp.data.protocolVersion, 2);
+		assert.ok(typeof resp.data.sessionId === "string");
+
+		detachStdin();
+		detachStdout();
+	});
+
+	it("shutdown: client writes shutdown, server acknowledges", async () => {
+		const { lines: clientWrites, detach } = collectLines(clientStdin);
+
+		writeLine(clientStdin, { id: "req_2", type: "shutdown" });
+		await tick();
+
+		const cmd = clientWrites[0] as any;
+		assert.equal(cmd.type, "shutdown");
+
+		detach();
+	});
+
+	it("subscribe: client writes subscribe with event list", async () => {
+		const { lines: clientWrites, detach } = collectLines(clientStdin);
+
+		writeLine(clientStdin, { id: "req_3", type: "subscribe", events: ["agent_end", "execution_complete"] });
+		await tick();
+
+		const cmd = clientWrites[0] as any;
+		assert.equal(cmd.type, "subscribe");
+		assert.deepEqual(cmd.events, ["agent_end", "execution_complete"]);
+
+		detach();
+	});
+
+	it("sendUIResponse: client writes extension_ui_response", async () => {
+		const { lines: clientWrites, detach } = collectLines(clientStdin);
+
+		writeLine(clientStdin, {
+			type: "extension_ui_response",
+			id: "ui-123",
+			value: "selected-option",
+		});
+		await tick();
+
+		const msg = clientWrites[0] as any;
+		assert.equal(msg.type, "extension_ui_response");
+		assert.equal(msg.id, "ui-123");
+		assert.equal(msg.value, "selected-option");
+
+		detach();
+	});
+
+	it("v2 event filtering: subscribe with empty array should filter all", async () => {
+		// An empty event filter means no events pass through (Set with 0 entries)
+		const subscribeCmd = { id: "req_4", type: "subscribe", events: [] as string[] };
+		const serialized = serializeJsonLine(subscribeCmd);
+		const parsed = JSON.parse(serialized);
+		assert.deepEqual(parsed.events, []);
+		// Server-side: `eventFilter = new Set([])` — Set.has(anything) returns false
+		const filter = new Set(parsed.events as string[]);
+		assert.equal(filter.has("agent_end"), false);
+		assert.equal(filter.has("execution_complete"), false);
+		assert.equal(filter.size, 0);
+	});
+
+	it("v2 event filtering: subscribe with wildcard resets filter", async () => {
+		// Server-side: `events.includes("*")` → `eventFilter = null`
+		const subscribeCmd = { type: "subscribe", events: ["*"] };
+		const parsed = JSON.parse(serializeJsonLine(subscribeCmd));
+		const hasWildcard = (parsed.events as string[]).includes("*");
+		assert.equal(hasWildcard, true);
+		// When wildcard is detected, filter becomes null (all events pass)
+	});
+
+	it("multiple commands can be sent sequentially", async () => {
+		const { lines, detach } = collectLines(clientStdin);
+
+		writeLine(clientStdin, { id: "1", type: "init", protocolVersion: 2 });
+		writeLine(clientStdin, { id: "2", type: "subscribe", events: ["agent_end"] });
+		writeLine(clientStdin, { id: "3", type: "prompt", message: "hello" });
+		await tick();
+
+		assert.equal(lines.length, 3);
+		assert.equal((lines[0] as any).type, "init");
+		assert.equal((lines[1] as any).type, "subscribe");
+		assert.equal((lines[2] as any).type, "prompt");
+
+		detach();
+	});
+});
+
+// ============================================================================
+// Negative tests — malformed inputs, error paths, boundary conditions
+// ============================================================================
+
+describe("Negative tests — protocol error shapes", () => {
+	it("init with missing protocolVersion produces a type error at compile time", () => {
+		// Runtime check: a message missing protocolVersion is malformed
+		const malformed = { type: "init" } as any;
+		assert.equal(malformed.protocolVersion, undefined);
+		// Server would treat this as v1 lock since it's not a valid init
+	});
+
+	it("subscribe with non-array events is a type violation", () => {
+		// Runtime: server expects events to be string[]
+		const malformed = { type: "subscribe", events: "agent_end" } as any;
+		assert.equal(typeof malformed.events, "string"); // Not an array
+		assert.equal(Array.isArray(malformed.events), false);
+	});
+
+	it("double init error response shape", () => {
+		// When init is sent after protocol lock, server returns error
+		const errorResp: RpcResponse = {
+			id: "req_dup",
+			type: "response",
+			command: "init",
+			success: false,
+			error: "Protocol version already locked. init must be the first command.",
+		};
+		assert.equal(errorResp.success, false);
+		if (!errorResp.success) {
+			assert.ok(errorResp.error.includes("already locked"));
+		}
+	});
+
+	it("init after v1 lock error response shape", () => {
+		// First command was get_state (v1 lock), then init arrives
+		const errorResp: RpcResponse = {
+			id: "req_late_init",
+			type: "response",
+			command: "init",
+			success: false,
+			error: "Protocol version already locked. init must be the first command.",
+		};
+		assert.equal(errorResp.success, false);
+		if (!errorResp.success) {
+			assert.ok(errorResp.error.includes("init must be the first command"));
+		}
+	});
+
+	it("unknown command type produces error response", () => {
+		const errorResp: RpcResponse = {
+			id: "req_unknown",
+			type: "response",
+			command: "nonexistent",
+			success: false,
+			error: "Unknown command: nonexistent",
+		};
+		assert.equal(errorResp.success, false);
+		if (!errorResp.success) {
+			assert.ok(errorResp.error.includes("Unknown command"));
+		}
+	});
+
+	it("malformed JSON parse error shape", () => {
+		const errorResp: RpcResponse = {
+			type: "response",
+			command: "parse",
+			success: false,
+			error: "Failed to parse command: Unexpected token",
+		};
+		assert.equal(errorResp.command, "parse");
+		assert.equal(errorResp.success, false);
+	});
+
+	it("shutdown works in both v1 and v2 — no version gating", () => {
+		// shutdown returns success regardless of protocolVersion
+		const v1Shutdown: RpcResponse = {
+			id: "s1",
+			type: "response",
+			command: "shutdown",
+			success: true,
+		};
+		const v2Shutdown: RpcResponse = {
+			id: "s2",
+			type: "response",
+			command: "shutdown",
+			success: true,
+		};
+		assert.equal(v1Shutdown.success, true);
+		assert.equal(v2Shutdown.success, true);
+	});
+});
+
+// ============================================================================
+// Protocol version detection logic (unit)
+// ============================================================================
+
+describe("Protocol version detection logic", () => {
+	it("simulates v1 lock when first command is non-init", () => {
+		let protocolVersion: 1 | 2 = 1;
+		let protocolLocked = false;
+
+		// Simulate first command being get_state
+		const command = { type: "get_state" } as RpcCommand;
+
+		if (!protocolLocked) {
+			protocolLocked = true;
+			if (command.type === "init") {
+				protocolVersion = 2;
+			} else {
+				protocolVersion = 1;
+			}
+		}
+
+		assert.equal(protocolVersion, 1);
+		assert.equal(protocolLocked, true);
+	});
+
+	it("simulates v2 lock when first command is init", () => {
+		let protocolVersion: 1 | 2 = 1;
+		let protocolLocked = false;
+
+		const command: RpcCommand = { type: "init", protocolVersion: 2 };
+
+		if (!protocolLocked) {
+			protocolLocked = true;
+			if (command.type === "init") {
+				protocolVersion = 2;
+			} else {
+				protocolVersion = 1;
+			}
+		}
+
+		assert.equal(protocolVersion, 2);
+		assert.equal(protocolLocked, true);
+	});
+
+	it("rejects re-init after v2 lock", () => {
+		let protocolLocked = true; // already locked from first init
+		let errorMessage: string | null = null;
+
+		const command: RpcCommand = { type: "init", protocolVersion: 2 };
+
+		if (protocolLocked && command.type === "init") {
+			errorMessage = "Protocol version already locked. init must be the first command.";
+		}
+
+		assert.ok(errorMessage !== null);
+		assert.ok(errorMessage!.includes("already locked"));
+	});
+
+	it("rejects init after v1 lock", () => {
+		let protocolLocked = true; // already locked from first non-init command
+		let protocolVersion: 1 | 2 = 1;
+		let errorMessage: string | null = null;
+
+		const command: RpcCommand = { type: "init", protocolVersion: 2 };
+
+		if (protocolLocked && command.type === "init") {
+			errorMessage = "Protocol version already locked. init must be the first command.";
+		}
+
+		assert.equal(protocolVersion, 1); // stays v1
+		assert.ok(errorMessage !== null);
+	});
+
+	it("extension_ui_response bypasses protocol detection", () => {
+		let protocolLocked = false;
+		let protocolDetectionTriggered = false;
+
+		// Simulate the handleInputLine logic
+		const parsed = { type: "extension_ui_response", id: "ui-1", value: "ok" };
+
+		if (parsed.type === "extension_ui_response") {
+			// Bypass — do not touch protocolLocked
+		} else {
+			protocolDetectionTriggered = true;
+			if (!protocolLocked) {
+				protocolLocked = true;
+			}
+		}
+
+		assert.equal(protocolLocked, false);
+		assert.equal(protocolDetectionTriggered, false);
+	});
+});
+
+// ============================================================================
+// v2 event filter logic (unit)
+// ============================================================================
+
+describe("v2 event filter logic", () => {
+	/** Mimics the server-side event filter check: null means all events pass */
+	function shouldEmit(filter: Set<string> | null, eventType: string): boolean {
+		return !filter || filter.has(eventType);
+	}
+
+	it("null filter passes all events", () => {
+		assert.equal(shouldEmit(null, "agent_end"), true);
+		assert.equal(shouldEmit(null, "cost_update"), true);
+		assert.equal(shouldEmit(null, "anything"), true);
+	});
+
+	it("filter with specific events passes matching events", () => {
+		const filter = new Set(["agent_end", "cost_update"]);
+
+		assert.equal(shouldEmit(filter, "agent_end"), true);
+		assert.equal(shouldEmit(filter, "cost_update"), true);
+		assert.equal(shouldEmit(filter, "execution_complete"), false);
+		assert.equal(shouldEmit(filter, "message_start"), false);
+	});
+
+	it("empty Set filter blocks all events", () => {
+		const filter = new Set<string>();
+
+		assert.equal(shouldEmit(filter, "agent_end"), false);
+		assert.equal(shouldEmit(filter, "cost_update"), false);
+		assert.equal(shouldEmit(filter, "anything"), false);
+		assert.equal(filter.size, 0);
+	});
+
+	it("wildcard subscribe resets filter to null", () => {
+		let eventFilter: Set<string> | null = new Set(["agent_end"]);
+
+		// Simulate subscribe with wildcard
+		const events = ["*"];
+		if (events.includes("*")) {
+			eventFilter = null;
+		} else {
+			eventFilter = new Set(events);
+		}
+
+		assert.equal(eventFilter, null);
+	});
+
+	it("subscribe replaces previous filter", () => {
+		let eventFilter: Set<string> | null = new Set(["agent_end"]);
+
+		// Subscribe with different events
+		const events = ["cost_update", "execution_complete"];
+		if (events.includes("*")) {
+			eventFilter = null;
+		} else {
+			eventFilter = new Set(events);
+		}
+
+		assert.equal(eventFilter!.has("agent_end"), false);
+		assert.equal(eventFilter!.has("cost_update"), true);
+		assert.equal(eventFilter!.has("execution_complete"), true);
+	});
+
+	it("filter applies to both regular and synthesized v2 events", () => {
+		const eventFilter = new Set(["execution_complete"]);
+
+		// Regular event
+		assert.equal(eventFilter.has("agent_end"), false); // filtered out
+		// Synthesized v2 event
+		assert.equal(eventFilter.has("execution_complete"), true); // passes
+		assert.equal(eventFilter.has("cost_update"), false); // filtered out
+	});
+});
+
+// ============================================================================
+// v2 runId injection logic (unit)
+// ============================================================================
+
+describe("v2 runId injection", () => {
+	it("runId is present when protocolVersion is 2 and command is prompt/steer/follow_up", () => {
+		const protocolVersion = 2;
+		const commands = ["prompt", "steer", "follow_up"] as const;
+
+		for (const cmdType of commands) {
+			const runId = protocolVersion === 2 ? `run-${cmdType}-uuid` : undefined;
+			assert.ok(runId !== undefined, `runId should be generated for ${cmdType} in v2`);
+			assert.ok(typeof runId === "string");
+		}
+	});
+
+	it("runId is undefined when protocolVersion is 1", () => {
+		// Test the v1 path: runId should not be generated
+		function generateRunId(version: 1 | 2): string | undefined {
+			return version === 2 ? "run-uuid" : undefined;
+		}
+		assert.equal(generateRunId(1), undefined);
+		assert.ok(typeof generateRunId(2) === "string");
+	});
+
+	it("runId is injected into event output via spread", () => {
+		const currentRunId = "run-abc-123";
+		const event = { type: "message_start", message: { role: "assistant" } };
+
+		// v2 injection logic from rpc-mode.ts
+		const outputEvent = currentRunId ? { ...event, runId: currentRunId } : event;
+
+		assert.equal((outputEvent as any).runId, "run-abc-123");
+		assert.equal((outputEvent as any).type, "message_start");
+	});
+
+	it("runId is not injected when null", () => {
+		const currentRunId: string | null = null;
+		const event = { type: "message_start", message: { role: "assistant" } };
+
+		const outputEvent = currentRunId ? { ...event, runId: currentRunId } : event;
+
+		assert.equal((outputEvent as any).runId, undefined);
+	});
+});
--- a/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts
+++ b/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts
@ -11,6 +11,13 @@ import type { SessionStats } from "../../core/agent-session.js";
 import type { BashResult } from "../../core/bash-executor.js";
 import type { CompactionResult } from "../../core/compaction/index.js";

+// ============================================================================
+// RPC Protocol Versioning
+// ============================================================================
+
+/** Supported protocol versions. v1 is the implicit default; v2 requires an init handshake. */
+export type RpcProtocolVersion = 1 | 2;
+
 // ============================================================================
 // RPC Commands (stdin)
 // ============================================================================
@ -69,7 +76,12 @@ export type RpcCommand =
 	// Bridge-hosted native terminal
 	| { id?: string; type: "terminal_input"; data: string }
 	| { id?: string; type: "terminal_resize"; cols: number; rows: number }
-	| { id?: string; type: "terminal_redraw" };
+	| { id?: string; type: "terminal_redraw" }
+
+	// v2 Protocol
+	| { id?: string; type: "init"; protocolVersion: 2; clientId?: string }
+	| { id?: string; type: "shutdown"; graceful?: boolean }
+	| { id?: string; type: "subscribe"; events: string[] };

 // ============================================================================
 // RPC Slash Command (for get_commands response)
@ -120,9 +132,9 @@ export interface RpcSessionState {
 // Success responses with data
 export type RpcResponse =
 	// Prompting (async - events follow)
-	| { id?: string; type: "response"; command: "prompt"; success: true }
-	| { id?: string; type: "response"; command: "steer"; success: true }
-	| { id?: string; type: "response"; command: "follow_up"; success: true }
+	| { id?: string; type: "response"; command: "prompt"; success: true; runId?: string }
+	| { id?: string; type: "response"; command: "steer"; success: true; runId?: string }
+	| { id?: string; type: "response"; command: "follow_up"; success: true; runId?: string }
 	| { id?: string; type: "response"; command: "abort"; success: true }
 	| { id?: string; type: "response"; command: "new_session"; success: true; data: { cancelled: boolean } }

@ -216,9 +228,54 @@ export type RpcResponse =
 	| { id?: string; type: "response"; command: "terminal_resize"; success: true }
 	| { id?: string; type: "response"; command: "terminal_redraw"; success: true }

+	// v2 Protocol
+	| { id?: string; type: "response"; command: "init"; success: true; data: RpcInitResult }
+	| { id?: string; type: "response"; command: "shutdown"; success: true }
+	| { id?: string; type: "response"; command: "subscribe"; success: true }
+
 	// Error response (any command can fail)
 	| { id?: string; type: "response"; command: string; success: false; error: string };

+// ============================================================================
+// v2 Protocol Types
+// ============================================================================
+
+/** Result of the init handshake (v2 only) */
+export interface RpcInitResult {
+	protocolVersion: 2;
+	sessionId: string;
+	capabilities: {
+		events: string[];
+		commands: string[];
+	};
+}
+
+/** v2 execution_complete event — emitted when a prompt/steer/follow_up finishes */
+export interface RpcExecutionCompleteEvent {
+	type: "execution_complete";
+	runId: string;
+	status: "completed" | "error" | "cancelled";
+	reason?: string;
+	stats: SessionStats;
+}
+
+/** v2 cost_update event — emitted per-turn with running cost data */
+export interface RpcCostUpdateEvent {
+	type: "cost_update";
+	runId: string;
+	turnCost: number;
+	cumulativeCost: number;
+	tokens: {
+		input: number;
+		output: number;
+		cacheRead: number;
+		cacheWrite: number;
+	};
+}
+
+/** Discriminated union of all v2-only event types */
+export type RpcV2Event = RpcExecutionCompleteEvent | RpcCostUpdateEvent;
+
 // ============================================================================
 // Extension UI Events (stdout)
 // ============================================================================
--- a/src/headless-events.ts
+++ b/src/headless-events.ts
@ -3,8 +3,47 @@
 *
 * Detects terminal notifications, blocked notifications, milestone-ready signals,
 * and classifies commands as quick (single-turn) vs long-running.
+ *
+ * Also defines exit code constants and the status→exit-code mapping function.
 */

+// ---------------------------------------------------------------------------
+// Exit Code Constants
+// ---------------------------------------------------------------------------
+
+export const EXIT_SUCCESS = 0
+export const EXIT_ERROR = 1
+export const EXIT_BLOCKED = 10
+export const EXIT_CANCELLED = 11
+
+/**
+ * Map a headless session status string to its standardized exit code.
+ *
+ *   success   → 0
+ *   error     → 1
+ *   timeout   → 1
+ *   blocked   → 10
+ *   cancelled → 11
+ *
+ * Unknown statuses default to EXIT_ERROR (1).
+ */
+export function mapStatusToExitCode(status: string): number {
+  switch (status) {
+    case 'success':
+    case 'complete':
+      return EXIT_SUCCESS
+    case 'error':
+    case 'timeout':
+      return EXIT_ERROR
+    case 'blocked':
+      return EXIT_BLOCKED
+    case 'cancelled':
+      return EXIT_CANCELLED
+    default:
+      return EXIT_ERROR
+  }
+}
+
 // ---------------------------------------------------------------------------
 // Completion Detection
 // ---------------------------------------------------------------------------
--- a/src/headless-types.ts
+++ b/src/headless-types.ts
@ -0,0 +1,39 @@
+/**
+ * Headless Types — shared types for the headless orchestrator surface.
+ *
+ * Contains the structured result type emitted in --output-format json mode
+ * and the output format discriminator.
+ */
+
+// ---------------------------------------------------------------------------
+// Output Format
+// ---------------------------------------------------------------------------
+
+export type OutputFormat = 'text' | 'json' | 'stream-json'
+
+export const VALID_OUTPUT_FORMATS: ReadonlySet<string> = new Set(['text', 'json', 'stream-json'])
+
+// ---------------------------------------------------------------------------
+// Structured JSON Result
+// ---------------------------------------------------------------------------
+
+export interface HeadlessJsonResult {
+  status: 'success' | 'error' | 'blocked' | 'cancelled' | 'timeout'
+  exitCode: number
+  sessionId?: string
+  duration: number
+  cost: {
+    total: number
+    input_tokens: number
+    output_tokens: number
+    cache_read_tokens: number
+    cache_write_tokens: number
+  }
+  toolCalls: number
+  events: number
+  milestone?: string
+  phase?: string
+  nextAction?: string
+  artifacts?: string[]
+  commits?: string[]
+}
--- a/src/headless.ts
+++ b/src/headless.ts
@ -6,9 +6,10 @@
 * progress to stderr.
 *
 * Exit codes:
- *   0 — complete (command finished successfully)
- *   1 — error or timeout
- *   2 — blocked (command reported a blocker)
+ *   0  — complete (command finished successfully)
+ *   1  — error or timeout
+ *   10 — blocked (command reported a blocker)
+ *   11 — cancelled (SIGINT/SIGTERM received)
 */

 import { existsSync, mkdirSync, writeFileSync } from 'node:fs'
@ -27,8 +28,16 @@ import {
  FIRE_AND_FORGET_METHODS,
  IDLE_TIMEOUT_MS,
  NEW_MILESTONE_IDLE_TIMEOUT_MS,
+  EXIT_SUCCESS,
+  EXIT_ERROR,
+  EXIT_BLOCKED,
+  EXIT_CANCELLED,
+  mapStatusToExitCode,
 } from './headless-events.js'

+import type { OutputFormat } from './headless-types.js'
+import { VALID_OUTPUT_FORMATS } from './headless-types.js'
+
 import {
  handleExtensionUIRequest,
  formatProgress,
@ -48,6 +57,7 @@ import {
 export interface HeadlessOptions {
  timeout: number
  json: boolean
+  outputFormat: OutputFormat
  model?: string
  command: string
  commandArgs: string[]
@ -60,6 +70,8 @@ export interface HeadlessOptions {
  responseTimeout?: number // timeout for orchestrator response (default 30000ms)
  answers?: string       // path to answers JSON file
  eventFilter?: Set<string>  // filter JSONL output to specific event types
+  resumeSession?: string // session ID to resume (--resume <id>)
+  bare?: boolean         // --bare: suppress CLAUDE.md/AGENTS.md, user skills, project preferences
 }

 interface TrackedEvent {
@ -76,6 +88,7 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions {
  const options: HeadlessOptions = {
    timeout: 300_000,
    json: false,
+    outputFormat: 'text',
    command: 'auto',
    commandArgs: [],
  }
@ -96,6 +109,17 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions {
        }
      } else if (arg === '--json') {
        options.json = true
+        options.outputFormat = 'stream-json'
+      } else if (arg === '--output-format' && i + 1 < args.length) {
+        const fmt = args[++i]
+        if (!VALID_OUTPUT_FORMATS.has(fmt)) {
+          process.stderr.write(`[headless] Error: --output-format must be one of: text, json, stream-json (got '${fmt}')\n`)
+          process.exit(1)
+        }
+        options.outputFormat = fmt as OutputFormat
+        if (fmt === 'stream-json' || fmt === 'json') {
+          options.json = true
+        }
      } else if (arg === '--model' && i + 1 < args.length) {
        // --model can also be passed from the main CLI; headless-specific takes precedence
        options.model = args[++i]
@ -118,15 +142,25 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions {
      } else if (arg === '--events' && i + 1 < args.length) {
        options.eventFilter = new Set(args[++i].split(','))
        options.json = true  // --events implies --json
+        if (options.outputFormat === 'text') {
+          options.outputFormat = 'stream-json'
+        }
      } else if (arg === '--supervised') {
        options.supervised = true
        options.json = true  // supervised implies json
+        if (options.outputFormat === 'text') {
+          options.outputFormat = 'stream-json'
+        }
      } else if (arg === '--response-timeout' && i + 1 < args.length) {
        options.responseTimeout = parseInt(args[++i], 10)
        if (Number.isNaN(options.responseTimeout) || options.responseTimeout <= 0) {
          process.stderr.write('[headless] Error: --response-timeout must be a positive integer (milliseconds)\n')
          process.exit(1)
        }
+      } else if (arg === '--resume' && i + 1 < args.length) {
+        options.resumeSession = args[++i]
+      } else if (arg === '--bare') {
+        options.bare = true
      }
    } else if (!positionalStarted) {
      positionalStarted = true
@ -151,7 +185,7 @@ export async function runHeadless(options: HeadlessOptions): Promise<void> {
    const result = await runHeadlessOnce(options, restartCount)

    // Success or blocked — exit normally
-    if (result.exitCode === 0 || result.exitCode === 2) {
+    if (result.exitCode === EXIT_SUCCESS || result.exitCode === EXIT_BLOCKED) {
      process.exit(result.exitCode)
    }

@ -275,6 +309,10 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
  if (injector) {
    clientOptions.env = injector.getSecretEnvVars()
  }
+  // Propagate --bare to the child process
+  if (options.bare) {
+    clientOptions.args = [...((clientOptions.args as string[]) || []), '--bare']
+  }

  const client = new RpcClient(clientOptions)

@ -349,7 +387,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
  const timeoutTimer = options.timeout > 0
    ? setTimeout(() => {
        process.stderr.write(`[headless] Timeout after ${options.timeout / 1000}s\n`)
-        exitCode = 1
+        exitCode = EXIT_ERROR
        resolveCompletion()
      }, options.timeout)
    : null
@ -395,7 +433,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
      if (injector && !FIRE_AND_FORGET_METHODS.has(String(eventObj.method ?? ''))) {
        if (injector.tryHandle(eventObj, stdinWriter)) {
          if (completed) {
-            exitCode = blocked ? 2 : 0
+            exitCode = blocked ? EXIT_BLOCKED : EXIT_SUCCESS
            resolveCompletion()
          }
          return
@ -421,7 +459,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):

      // If we detected a terminal notification, resolve after responding
      if (completed) {
-        exitCode = blocked ? 2 : 0
+        exitCode = blocked ? EXIT_BLOCKED : EXIT_SUCCESS
        resolveCompletion()
        return
      }
@ -442,7 +480,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
  const signalHandler = () => {
    process.stderr.write('\n[headless] Interrupted, stopping child process...\n')
    interrupted = true
-    exitCode = 1
+    exitCode = EXIT_CANCELLED
    client.stop().finally(() => {
      if (timeoutTimer) clearTimeout(timeoutTimer)
      if (idleTimer) clearTimeout(idleTimer)
@ -492,10 +530,9 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
    if (!completed) {
      const msg = `[headless] Child process exited unexpectedly with code ${code ?? 'null'}\n`
      process.stderr.write(msg)
-      exitCode = 1
+      exitCode = EXIT_ERROR
      resolveCompletion()
-    }
-  })
+    }  })

  if (!options.json) {
    process.stderr.write(`[headless] Running /gsd ${options.command}${options.commandArgs.length > 0 ? ' ' + options.commandArgs.join(' ') : ''}...\n`)
@ -507,16 +544,16 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
    await client.prompt(command)
  } catch (err) {
    process.stderr.write(`[headless] Error: Failed to send prompt: ${err instanceof Error ? err.message : String(err)}\n`)
-    exitCode = 1
+    exitCode = EXIT_ERROR
  }

  // Wait for completion
-  if (exitCode === 0 || exitCode === 2) {
+  if (exitCode === EXIT_SUCCESS || exitCode === EXIT_BLOCKED) {
    await completionPromise
  }

  // Auto-mode chaining: if --auto and milestone creation succeeded, send /gsd auto
-  if (isNewMilestone && options.auto && milestoneReady && !blocked && exitCode === 0) {
+  if (isNewMilestone && options.auto && milestoneReady && !blocked && exitCode === EXIT_SUCCESS) {
    if (!options.json) {
      process.stderr.write('[headless] Milestone ready — chaining into auto-mode...\n')
    }
@ -535,10 +572,10 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):
      await client.prompt('/gsd auto')
    } catch (err) {
      process.stderr.write(`[headless] Error: Failed to start auto-mode: ${err instanceof Error ? err.message : String(err)}\n`)
-      exitCode = 1
+      exitCode = EXIT_ERROR
    }

-    if (exitCode === 0 || exitCode === 2) {
+    if (exitCode === EXIT_SUCCESS || exitCode === EXIT_BLOCKED) {
      await autoCompletionPromise
    }
  }
@ -557,7 +594,7 @@ async function runHeadlessOnce(options: HeadlessOptions, restartCount: number):

  // Summary
  const duration = ((Date.now() - startTime) / 1000).toFixed(1)
-  const status = blocked ? 'blocked' : exitCode === 1 ? (totalEvents === 0 ? 'error' : 'timeout') : 'complete'
+  const status = blocked ? 'blocked' : exitCode === EXIT_CANCELLED ? 'cancelled' : exitCode === EXIT_ERROR ? (totalEvents === 0 ? 'error' : 'timeout') : 'complete'

  process.stderr.write(`[headless] Status: ${status}\n`)
  process.stderr.write(`[headless] Duration: ${duration}s\n`)
--- a/src/help-text.ts
+++ b/src/help-text.ts
@ -94,9 +94,12 @@ const SUBCOMMAND_HELP: Record<string, string> = {
    'Run /gsd commands without the TUI. Default command: auto',
    '',
    'Flags:',
-    '  --timeout N          Overall timeout in ms (default: 300000)',
-    '  --json               JSONL event stream to stdout',
-    '  --model ID           Override model',
+    '  --timeout N            Overall timeout in ms (default: 300000)',
+    '  --json                 JSONL event stream to stdout (alias for --output-format stream-json)',
+    '  --output-format <fmt>  Output format: text (default), json (structured result), stream-json (JSONL events)',
+    '  --bare                 Minimal context: skip CLAUDE.md, AGENTS.md, user settings, user skills',
+    '  --resume <id>          Resume a prior headless session by ID',
+    '  --model ID             Override model',
    '  --supervised           Forward interactive UI requests to orchestrator via stdout/stdin',
    '  --response-timeout N   Timeout (ms) for orchestrator response (default: 30000)',
    '  --answers <path>       Pre-supply answers and secrets (JSON file)',
@ -115,11 +118,19 @@ const SUBCOMMAND_HELP: Record<string, string> = {
    '  --auto               Start auto-mode after milestone creation',
    '  --verbose            Show tool calls in progress output',
    '',
+    'Output formats:',
+    '  text         Human-readable progress on stderr (default)',
+    '  json         Collect events silently, emit structured HeadlessJsonResult on stdout at exit',
+    '  stream-json  Stream JSONL events to stdout in real time (same as --json)',
+    '',
    'Examples:',
    '  gsd headless                                    Run /gsd auto',
    '  gsd headless next                               Run one unit',
-    '  gsd headless --json status                      Machine-readable status',
+    '  gsd headless --output-format json auto           Structured JSON result on stdout',
+    '  gsd headless --json status                      Machine-readable JSONL stream',
    '  gsd headless --timeout 60000                    With 1-minute timeout',
+    '  gsd headless --bare auto                        Minimal context (CI/ecosystem use)',
+    '  gsd headless --resume abc123 auto               Resume a prior session',
    '  gsd headless new-milestone --context spec.md    Create milestone from file',
    '  cat spec.md | gsd headless new-milestone --context -   From stdin',
    '  gsd headless new-milestone --context spec.md --auto    Create + auto-execute',
@ -128,7 +139,7 @@ const SUBCOMMAND_HELP: Record<string, string> = {
    '  gsd headless --events agent_end,extension_ui_request auto   Filtered event stream',
    '  gsd headless query                              Instant JSON state snapshot',
    '',
-    'Exit codes: 0 = complete, 1 = error/timeout, 2 = blocked',
+    'Exit codes: 0 = success, 1 = error/timeout, 10 = blocked, 11 = cancelled',
  ].join('\n'),
 }

--- a/src/tests/headless-cli-surface.test.ts
+++ b/src/tests/headless-cli-surface.test.ts
@ -0,0 +1,387 @@
+/**
+ * Tests for S02 CLI surface — --output-format, exit codes, HeadlessJsonResult, --resume.
+ *
+ * Uses extracted parsing logic (mirrors headless.ts) and direct imports from
+ * headless-types.ts / headless-events.ts to avoid transitive @gsd/native
+ * import that breaks in test environment.
+ */
+
+import test from 'node:test'
+import assert from 'node:assert/strict'
+
+// ─── Import exit code constants & mapStatusToExitCode ──────────────────────
+
+import {
+  EXIT_SUCCESS,
+  EXIT_ERROR,
+  EXIT_BLOCKED,
+  EXIT_CANCELLED,
+  mapStatusToExitCode,
+} from '../headless-events.js'
+
+import type { OutputFormat, HeadlessJsonResult } from '../headless-types.js'
+import { VALID_OUTPUT_FORMATS } from '../headless-types.js'
+
+// ─── Extracted parsing logic (mirrors headless.ts) ─────────────────────────
+
+interface HeadlessOptions {
+  timeout: number
+  json: boolean
+  outputFormat: OutputFormat
+  model?: string
+  command: string
+  commandArgs: string[]
+  context?: string
+  contextText?: string
+  auto?: boolean
+  verbose?: boolean
+  maxRestarts?: number
+  supervised?: boolean
+  responseTimeout?: number
+  answers?: string
+  eventFilter?: Set<string>
+  resumeSession?: string
+  bare?: boolean
+}
+
+function parseHeadlessArgs(argv: string[]): HeadlessOptions {
+  const options: HeadlessOptions = {
+    timeout: 300_000,
+    json: false,
+    outputFormat: 'text',
+    command: 'auto',
+    commandArgs: [],
+  }
+
+  const args = argv.slice(2)
+  let positionalStarted = false
+
+  for (let i = 0; i < args.length; i++) {
+    const arg = args[i]
+    if (arg === 'headless') continue
+
+    if (!positionalStarted && arg.startsWith('--')) {
+      if (arg === '--timeout' && i + 1 < args.length) {
+        options.timeout = parseInt(args[++i], 10)
+      } else if (arg === '--json') {
+        options.json = true
+        options.outputFormat = 'stream-json'
+      } else if (arg === '--output-format' && i + 1 < args.length) {
+        const fmt = args[++i]
+        if (!VALID_OUTPUT_FORMATS.has(fmt)) {
+          throw new Error(`Invalid output format: ${fmt}`)
+        }
+        options.outputFormat = fmt as OutputFormat
+        if (fmt === 'stream-json' || fmt === 'json') {
+          options.json = true
+        }
+      } else if (arg === '--model' && i + 1 < args.length) {
+        options.model = args[++i]
+      } else if (arg === '--context' && i + 1 < args.length) {
+        options.context = args[++i]
+      } else if (arg === '--context-text' && i + 1 < args.length) {
+        options.contextText = args[++i]
+      } else if (arg === '--auto') {
+        options.auto = true
+      } else if (arg === '--verbose') {
+        options.verbose = true
+      } else if (arg === '--max-restarts' && i + 1 < args.length) {
+        options.maxRestarts = parseInt(args[++i], 10)
+      } else if (arg === '--answers' && i + 1 < args.length) {
+        options.answers = args[++i]
+      } else if (arg === '--events' && i + 1 < args.length) {
+        options.eventFilter = new Set(args[++i].split(','))
+        options.json = true
+        if (options.outputFormat === 'text') {
+          options.outputFormat = 'stream-json'
+        }
+      } else if (arg === '--supervised') {
+        options.supervised = true
+        options.json = true
+        if (options.outputFormat === 'text') {
+          options.outputFormat = 'stream-json'
+        }
+      } else if (arg === '--response-timeout' && i + 1 < args.length) {
+        options.responseTimeout = parseInt(args[++i], 10)
+      } else if (arg === '--resume' && i + 1 < args.length) {
+        options.resumeSession = args[++i]
+      } else if (arg === '--bare') {
+        options.bare = true
+      }
+    } else if (!positionalStarted) {
+      positionalStarted = true
+      options.command = arg
+    } else {
+      options.commandArgs.push(arg)
+    }
+  }
+
+  return options
+}
+
+// ─── --output-format flag parsing ──────────────────────────────────────────
+
+test('--output-format text sets outputFormat to text', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--output-format', 'text', 'auto'])
+  assert.equal(opts.outputFormat, 'text')
+  assert.equal(opts.json, false)
+})
+
+test('--output-format json sets outputFormat to json and json=true', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--output-format', 'json', 'auto'])
+  assert.equal(opts.outputFormat, 'json')
+  assert.equal(opts.json, true)
+})
+
+test('--output-format stream-json sets outputFormat to stream-json and json=true', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--output-format', 'stream-json', 'auto'])
+  assert.equal(opts.outputFormat, 'stream-json')
+  assert.equal(opts.json, true)
+})
+
+test('default output format is text', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', 'auto'])
+  assert.equal(opts.outputFormat, 'text')
+  assert.equal(opts.json, false)
+})
+
+test('invalid --output-format value throws', () => {
+  assert.throws(
+    () => parseHeadlessArgs(['node', 'gsd', 'headless', '--output-format', 'yaml', 'auto']),
+    /Invalid output format: yaml/,
+  )
+})
+
+test('invalid --output-format value (empty) throws', () => {
+  assert.throws(
+    () => parseHeadlessArgs(['node', 'gsd', 'headless', '--output-format', 'xml', 'auto']),
+    /Invalid output format/,
+  )
+})
+
+// ─── --json backward compatibility ─────────────────────────────────────────
+
+test('--json is alias for --output-format stream-json', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--json', 'auto'])
+  assert.equal(opts.outputFormat, 'stream-json')
+  assert.equal(opts.json, true)
+})
+
+test('--json before --output-format json: last writer wins', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--json', '--output-format', 'json', 'auto'])
+  assert.equal(opts.outputFormat, 'json')
+  assert.equal(opts.json, true)
+})
+
+// ─── --resume flag ─────────────────────────────────────────────────────────
+
+test('--resume parses session ID', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--resume', 'abc-123', 'auto'])
+  assert.equal(opts.resumeSession, 'abc-123')
+  assert.equal(opts.command, 'auto')
+})
+
+test('no --resume means undefined', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', 'auto'])
+  assert.equal(opts.resumeSession, undefined)
+})
+
+// ─── Exit code constants ───────────────────────────────────────────────────
+
+test('EXIT_SUCCESS is 0', () => {
+  assert.equal(EXIT_SUCCESS, 0)
+})
+
+test('EXIT_ERROR is 1', () => {
+  assert.equal(EXIT_ERROR, 1)
+})
+
+test('EXIT_BLOCKED is 10', () => {
+  assert.equal(EXIT_BLOCKED, 10)
+})
+
+test('EXIT_CANCELLED is 11', () => {
+  assert.equal(EXIT_CANCELLED, 11)
+})
+
+// ─── mapStatusToExitCode ───────────────────────────────────────────────────
+
+test('mapStatusToExitCode: success → 0', () => {
+  assert.equal(mapStatusToExitCode('success'), EXIT_SUCCESS)
+})
+
+test('mapStatusToExitCode: complete → 0', () => {
+  assert.equal(mapStatusToExitCode('complete'), EXIT_SUCCESS)
+})
+
+test('mapStatusToExitCode: error → 1', () => {
+  assert.equal(mapStatusToExitCode('error'), EXIT_ERROR)
+})
+
+test('mapStatusToExitCode: timeout → 1', () => {
+  assert.equal(mapStatusToExitCode('timeout'), EXIT_ERROR)
+})
+
+test('mapStatusToExitCode: blocked → 10', () => {
+  assert.equal(mapStatusToExitCode('blocked'), EXIT_BLOCKED)
+})
+
+test('mapStatusToExitCode: cancelled → 11', () => {
+  assert.equal(mapStatusToExitCode('cancelled'), EXIT_CANCELLED)
+})
+
+test('mapStatusToExitCode: unknown status defaults to EXIT_ERROR', () => {
+  assert.equal(mapStatusToExitCode('unknown'), EXIT_ERROR)
+  assert.equal(mapStatusToExitCode(''), EXIT_ERROR)
+})
+
+// ─── HeadlessJsonResult type shape ─────────────────────────────────────────
+
+test('HeadlessJsonResult satisfies expected shape', () => {
+  // Type-level assertion: construct a valid object and verify it compiles.
+  // At runtime, verify all required keys exist.
+  const result: HeadlessJsonResult = {
+    status: 'success',
+    exitCode: 0,
+    duration: 12345,
+    cost: { total: 0.05, input_tokens: 1000, output_tokens: 500, cache_read_tokens: 200, cache_write_tokens: 100 },
+    toolCalls: 15,
+    events: 42,
+  }
+  assert.equal(result.status, 'success')
+  assert.equal(result.exitCode, 0)
+  assert.equal(typeof result.duration, 'number')
+  assert.ok(result.cost)
+  assert.equal(typeof result.cost.total, 'number')
+  assert.equal(typeof result.cost.input_tokens, 'number')
+  assert.equal(typeof result.cost.output_tokens, 'number')
+  assert.equal(typeof result.cost.cache_read_tokens, 'number')
+  assert.equal(typeof result.cost.cache_write_tokens, 'number')
+  assert.equal(typeof result.toolCalls, 'number')
+  assert.equal(typeof result.events, 'number')
+})
+
+test('HeadlessJsonResult accepts optional fields', () => {
+  const result: HeadlessJsonResult = {
+    status: 'blocked',
+    exitCode: 10,
+    sessionId: 'sess-abc',
+    duration: 5000,
+    cost: { total: 0, input_tokens: 0, output_tokens: 0, cache_read_tokens: 0, cache_write_tokens: 0 },
+    toolCalls: 0,
+    events: 1,
+    milestone: 'M001',
+    phase: 'planning',
+    nextAction: 'fix blocker',
+    artifacts: ['ROADMAP.md'],
+    commits: ['abc1234'],
+  }
+  assert.equal(result.sessionId, 'sess-abc')
+  assert.equal(result.milestone, 'M001')
+  assert.deepEqual(result.artifacts, ['ROADMAP.md'])
+  assert.deepEqual(result.commits, ['abc1234'])
+})
+
+// ─── VALID_OUTPUT_FORMATS set ──────────────────────────────────────────────
+
+test('VALID_OUTPUT_FORMATS contains exactly text, json, stream-json', () => {
+  assert.equal(VALID_OUTPUT_FORMATS.size, 3)
+  assert.ok(VALID_OUTPUT_FORMATS.has('text'))
+  assert.ok(VALID_OUTPUT_FORMATS.has('json'))
+  assert.ok(VALID_OUTPUT_FORMATS.has('stream-json'))
+})
+
+// ─── Regression: existing flags still parse correctly ──────────────────────
+
+test('--events still works with new outputFormat default', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--events', 'agent_end,tool_execution_start', 'auto'])
+  assert.ok(opts.eventFilter instanceof Set)
+  assert.equal(opts.eventFilter!.size, 2)
+  assert.equal(opts.json, true)
+  assert.equal(opts.outputFormat, 'stream-json')
+})
+
+test('--timeout still works', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--timeout', '60000', 'auto'])
+  assert.equal(opts.timeout, 60000)
+})
+
+test('--supervised still works and implies stream-json', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--supervised', 'auto'])
+  assert.equal(opts.supervised, true)
+  assert.equal(opts.json, true)
+  assert.equal(opts.outputFormat, 'stream-json')
+})
+
+test('--answers still works', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--answers', 'answers.json', 'auto'])
+  assert.equal(opts.answers, 'answers.json')
+})
+
+test('positional command parsing still works', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', 'next'])
+  assert.equal(opts.command, 'next')
+})
+
+test('combined flags parse correctly', () => {
+  const opts = parseHeadlessArgs([
+    'node', 'gsd', 'headless',
+    '--output-format', 'json',
+    '--timeout', '120000',
+    '--resume', 'sess-xyz',
+    '--verbose',
+    'auto',
+  ])
+  assert.equal(opts.outputFormat, 'json')
+  assert.equal(opts.json, true)
+  assert.equal(opts.timeout, 120000)
+  assert.equal(opts.resumeSession, 'sess-xyz')
+  assert.equal(opts.verbose, true)
+  assert.equal(opts.command, 'auto')
+})
+
+// ─── --bare flag ───────────────────────────────────────────────────────────
+
+test('--bare sets bare to true', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--bare', 'auto'])
+  assert.equal(opts.bare, true)
+  assert.equal(opts.command, 'auto')
+})
+
+test('no --bare means bare is undefined', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', 'auto'])
+  assert.equal(opts.bare, undefined)
+})
+
+test('--bare is a boolean flag (no value needed)', () => {
+  const opts = parseHeadlessArgs(['node', 'gsd', 'headless', '--bare', '--json', 'auto'])
+  assert.equal(opts.bare, true)
+  assert.equal(opts.json, true)
+})
+
+test('--bare combined with --output-format json', () => {
+  const opts = parseHeadlessArgs([
+    'node', 'gsd', 'headless',
+    '--bare',
+    '--output-format', 'json',
+    'auto',
+  ])
+  assert.equal(opts.bare, true)
+  assert.equal(opts.outputFormat, 'json')
+  assert.equal(opts.json, true)
+  assert.equal(opts.command, 'auto')
+})
+
+test('--bare does not affect other flags', () => {
+  const opts = parseHeadlessArgs([
+    'node', 'gsd', 'headless',
+    '--bare',
+    '--timeout', '60000',
+    '--resume', 'sess-abc',
+    'auto',
+  ])
+  assert.equal(opts.bare, true)
+  assert.equal(opts.timeout, 60000)
+  assert.equal(opts.resumeSession, 'sess-abc')
+  assert.equal(opts.command, 'auto')
+})