singularity-forge/packages/agent-core/src/agent-loop.test.ts

// agent-loop tests
// Covers: pauseTurn handling (#2869), schema overload retry cap (#2783)

import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { dirname, join } from "node:path";
import { fileURLToPath } from "node:url";
import { Type } from "@sinclair/typebox";
import type { AssistantMessage, Model } from "@singularity-forge/ai";
import {
	AssistantMessageEventStream,
	type EventStream,
} from "@singularity-forge/ai";
import { describe, it } from "vitest";
import {
	agentLoop,
	MAX_CONSECUTIVE_VALIDATION_FAILURES,
} from "./agent-loop.js";
import type {
	AgentContext,
	AgentEvent,
	AgentLoopConfig,
	AgentMessage,
	AgentTool,
} from "./types.js";

const __dirname = dirname(fileURLToPath(import.meta.url));

describe("agent-loop — pauseTurn handling (#2869)", () => {
	it("sets hasMoreToolCalls when stopReason is pauseTurn", () => {
		const source = readFileSync(join(__dirname, "agent-loop.ts"), "utf-8");

		// The agent loop must treat pauseTurn as a reason to continue the inner
		// loop, just like toolUse. This prevents incomplete server_tool_use blocks
		// from being saved to history, which would cause a 400 on the next request.
		assert.match(
			source,
			/pauseTurn/,
			"agent-loop.ts must handle the pauseTurn stop reason",
		);

		// Verify it sets hasMoreToolCalls = true for pauseTurn
		assert.match(
			source,
			/stopReason\s*===?\s*["']pauseTurn["']/,
			'agent-loop.ts must check for stopReason === "pauseTurn"',
		);
	});

	it("pauseTurn is in the StopReason union type", () => {
		// Read the ai types to ensure pauseTurn is a valid StopReason
		const typesPath = join(__dirname, "..", "..", "ai", "src", "types.ts");
		const typesSource = readFileSync(typesPath, "utf-8");
		assert.match(
			typesSource,
			/["']pauseTurn["']/,
			'StopReason type must include "pauseTurn"',
		);
	});

	it("uses provider-supplied external tool results instead of the placeholder", async () => {
		const externalMessage = makeAssistantMessage({
			content: [
				{
					type: "toolCall",
					id: "tc-external-1",
					name: "bash",
					arguments: { command: "echo hi" },
					externalResult: {
						content: [{ type: "text", text: "hi\n" }],
						details: { source: "claude-code" },
						isError: false,
					},
				} as any,
			],
			stopReason: "toolUse",
			provider: "claude-code",
		});

		const mockStream = createMockStreamFn([externalMessage]);

		const context: AgentContext = {
			systemPrompt: "You are a test agent.",
			messages: [
				{
					role: "user",
					content: [{ type: "text", text: "Run the command" }],
					timestamp: Date.now(),
				},
			],
			tools: [],
		};

		const config: AgentLoopConfig = {
			model: { ...TEST_MODEL, provider: "claude-code" },
			convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
			toolExecution: "sequential",
			externalToolExecution: true,
		};

		const stream = agentLoop(
			[
				{
					role: "user",
					content: [{ type: "text", text: "Run the command" }],
					timestamp: Date.now(),
				},
			],
			context,
			config,
			undefined,
			mockStream as any,
		);

		const events = await collectEvents(stream);
		const toolEnd = events.find(
			(event): event is Extract<AgentEvent, { type: "tool_execution_end" }> =>
				event.type === "tool_execution_end",
		);

		assert.ok(toolEnd, "expected tool_execution_end event");
		assert.deepEqual(toolEnd.result.content, [{ type: "text", text: "hi\n" }]);
		assert.deepEqual(toolEnd.result.details, { source: "claude-code" });
		assert.equal(toolEnd.isError, false);
	});

	it("uses a neutral provider-executed fallback when no external result is attached", async () => {
		const externalMessage = makeAssistantMessage({
			content: [
				{
					type: "toolCall",
					id: "tc-external-fallback",
					name: "read",
					arguments: { filePath: ".sf/BACKLOG.md" },
				},
			],
			stopReason: "toolUse",
			provider: "claude-code",
		});

		const mockStream = createMockStreamFn([externalMessage]);

		const context: AgentContext = {
			systemPrompt: "You are a test agent.",
			messages: [
				{
					role: "user",
					content: [{ type: "text", text: "Read backlog" }],
					timestamp: Date.now(),
				},
			],
			tools: [],
		};

		const config: AgentLoopConfig = {
			model: { ...TEST_MODEL, provider: "claude-code" },
			convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
			toolExecution: "sequential",
			externalToolExecution: true,
		};

		const stream = agentLoop(
			[
				{
					role: "user",
					content: [{ type: "text", text: "Read backlog" }],
					timestamp: Date.now(),
				},
			],
			context,
			config,
			undefined,
			mockStream as any,
		);

		const events = await collectEvents(stream);
		const toolEnd = events.find(
			(event): event is Extract<AgentEvent, { type: "tool_execution_end" }> =>
				event.type === "tool_execution_end",
		);

		assert.ok(toolEnd, "expected tool_execution_end event");
		assert.deepEqual(toolEnd.result.content, [
			{ type: "text", text: "(executed by provider)" },
		]);
		assert.equal(
			JSON.stringify(toolEnd.result.content).includes("Claude Code"),
			false,
		);
	});
});

describe("agent-loop — steering during tool batches", () => {
	it("does not interrupt the current tool batch for custom system steering", async () => {
		const calls: string[] = [];
		const tool = {
			name: "record",
			label: "Record",
			description: "Record a value",
			parameters: Type.Object({ value: Type.String() }),
			execute: async (_id: string, args: { value: string }) => {
				calls.push(args.value);
				return {
					content: [{ type: "text" as const, text: `recorded ${args.value}` }],
					details: {},
				};
			},
		} satisfies AgentTool<{ value: string }>;

		const first = makeAssistantMessage({
			content: [
				{
					type: "toolCall",
					id: "tc-1",
					name: "record",
					arguments: { value: "one" },
				},
				{
					type: "toolCall",
					id: "tc-2",
					name: "record",
					arguments: { value: "two" },
				},
			],
			stopReason: "toolUse",
		});
		const second = makeAssistantMessage({
			content: [{ type: "text", text: "saw system steering" }],
			stopReason: "stop",
		});
		const mockStream = createMockStreamFn([first, second]);
		let steeringPolls = 0;
		const steering: AgentMessage = {
			role: "custom",
			customType: "sf-memory-sleeper",
			content: "system notice",
			display: false,
			timestamp: Date.now(),
		} as AgentMessage;

		const context: AgentContext = {
			systemPrompt: "You are a test agent.",
			messages: [
				{
					role: "user",
					content: [{ type: "text", text: "record values" }],
					timestamp: Date.now(),
				},
			],
			tools: [tool],
		};

		const config: AgentLoopConfig = {
			model: TEST_MODEL,
			convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
			toolExecution: "sequential",
			getSteeringMessages: async () => {
				steeringPolls += 1;
				return steeringPolls === 2 ? [steering] : [];
			},
		};

		const stream = agentLoop(
			[
				{
					role: "user",
					content: [{ type: "text", text: "record values" }],
					timestamp: Date.now(),
				},
			],
			context,
			config,
			undefined,
			mockStream as any,
		);

		const events = await collectEvents(stream);
		const skipped = events.filter(
			(event) =>
				event.type === "tool_execution_end" &&
				JSON.stringify(event.result.content).includes(
					"Skipped due to queued user message",
				),
		);

		assert.deepEqual(calls, ["one", "two"]);
		assert.equal(skipped.length, 0);
		assert.ok(
			events.some(
				(event) => event.type === "message_start" && event.message === steering,
			),
			"system steering should still be delivered after the tool batch",
		);
	});

	it("defers queued user steering until after the current tool batch by default", async () => {
		const calls: string[] = [];
		const tool = {
			name: "record",
			label: "Record",
			description: "Record a value",
			parameters: Type.Object({ value: Type.String() }),
			execute: async (_id: string, args: { value: string }) => {
				calls.push(args.value);
				return {
					content: [{ type: "text" as const, text: `recorded ${args.value}` }],
					details: {},
				};
			},
		} satisfies AgentTool<{ value: string }>;

		const first = makeAssistantMessage({
			content: [
				{
					type: "toolCall",
					id: "tc-1",
					name: "record",
					arguments: { value: "one" },
				},
				{
					type: "toolCall",
					id: "tc-2",
					name: "record",
					arguments: { value: "two" },
				},
			],
			stopReason: "toolUse",
		});
		const second = makeAssistantMessage({
			content: [{ type: "text", text: "saw steering" }],
			stopReason: "stop",
		});
		const mockStream = createMockStreamFn([first, second]);
		let steeringPolls = 0;
		const steering: AgentMessage = {
			role: "user",
			content: [{ type: "text", text: "do not interrupt" }],
			timestamp: Date.now(),
		};

		const context: AgentContext = {
			systemPrompt: "You are a test agent.",
			messages: [
				{
					role: "user",
					content: [{ type: "text", text: "record values" }],
					timestamp: Date.now(),
				},
			],
			tools: [tool],
		};

		const config: AgentLoopConfig = {
			model: TEST_MODEL,
			convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
			toolExecution: "sequential",
			getSteeringMessages: async () => {
				steeringPolls += 1;
				return steeringPolls === 1 ? [steering] : [];
			},
		};

		const stream = agentLoop(
			[
				{
					role: "user",
					content: [{ type: "text", text: "record values" }],
					timestamp: Date.now(),
				},
			],
			context,
			config,
			undefined,
			mockStream as any,
		);

		const events = await collectEvents(stream);
		const skipped = events.filter(
			(event) =>
				event.type === "tool_execution_end" &&
				JSON.stringify(event.result.content).includes(
					"Skipped due to queued user message",
				),
		);

		assert.deepEqual(calls, ["one", "two"]);
		assert.equal(skipped.length, 0);
		assert.ok(
			events.some(
				(event) => event.type === "message_start" && event.message === steering,
			),
			"queued steering should still be delivered after the tool batch",
		);
	});

	it("skips remaining tool calls only when steering interruption is explicit", async () => {
		const calls: string[] = [];
		const tool = {
			name: "record",
			label: "Record",
			description: "Record a value",
			parameters: Type.Object({ value: Type.String() }),
			execute: async (_id: string, args: { value: string }) => {
				calls.push(args.value);
				return {
					content: [{ type: "text" as const, text: `recorded ${args.value}` }],
					details: {},
				};
			},
		} satisfies AgentTool<{ value: string }>;

		const first = makeAssistantMessage({
			content: [
				{
					type: "toolCall",
					id: "tc-1",
					name: "record",
					arguments: { value: "one" },
				},
				{
					type: "toolCall",
					id: "tc-2",
					name: "record",
					arguments: { value: "two" },
				},
			],
			stopReason: "toolUse",
		});
		const second = makeAssistantMessage({
			content: [{ type: "text", text: "saw steering" }],
			stopReason: "stop",
		});
		const mockStream = createMockStreamFn([first, second]);
		let steeringPolls = 0;
		const steering: AgentMessage = {
			role: "user",
			content: [{ type: "text", text: "stop and listen" }],
			timestamp: Date.now(),
		};

		const context: AgentContext = {
			systemPrompt: "You are a test agent.",
			messages: [
				{
					role: "user",
					content: [{ type: "text", text: "record values" }],
					timestamp: Date.now(),
				},
			],
			tools: [tool],
		};

		const config: AgentLoopConfig = {
			model: TEST_MODEL,
			convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
			toolExecution: "sequential",
			interruptToolExecutionOnSteering: true,
			getSteeringMessages: async () => {
				steeringPolls += 1;
				return steeringPolls === 2 ? [steering] : [];
			},
		};

		const stream = agentLoop(
			[
				{
					role: "user",
					content: [{ type: "text", text: "record values" }],
					timestamp: Date.now(),
				},
			],
			context,
			config,
			undefined,
			mockStream as any,
		);

		const events = await collectEvents(stream);
		const skipped = events.filter(
			(event) =>
				event.type === "tool_execution_end" &&
				JSON.stringify(event.result.content).includes(
					"Skipped due to queued user message",
				),
		);

		assert.deepEqual(calls, ["one"]);
		assert.equal(skipped.length, 1);
		assert.ok(
			events.some(
				(event) => event.type === "message_start" && event.message === steering,
			),
			"explicit interrupt steering should still be delivered",
		);
	});
});

describe("agent-loop — predictive stream hook", () => {
	it("receives text and thinking deltas without changing the final response", async () => {
		const finalMessage = makeAssistantMessage({
			content: [{ type: "text", text: "hello" }],
			stopReason: "stop",
		});
		const mockStream = createDeltaStreamFn(
			[
				{ type: "thinking_delta", delta: "think" },
				{ type: "text_delta", delta: "hello" },
			],
			finalMessage,
		);
		const chunks: string[] = [];
		const context: AgentContext = {
			systemPrompt: "You are a test agent.",
			messages: [
				{
					role: "user",
					content: [{ type: "text", text: "say hello" }],
					timestamp: Date.now(),
				},
			],
			tools: [],
		};
		const config: AgentLoopConfig = {
			model: TEST_MODEL,
			convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
			toolExecution: "sequential",
			onStreamChunk: (chunk) => {
				chunks.push(chunk);
			},
		};

		const events = await collectEvents(
			agentLoop(
				context.messages,
				context,
				config,
				undefined,
				mockStream as any,
			),
		);

		assert.deepEqual(chunks, ["think", "hello"]);
		assert.ok(
			events.some(
				(event) =>
					event.type === "agent_end" &&
					event.messages.at(-1)?.role === "assistant",
			),
		);
	});

	it("ignores predictive hook failures so streaming can finish", async () => {
		const finalMessage = makeAssistantMessage({
			content: [{ type: "text", text: "still done" }],
			stopReason: "stop",
		});
		const mockStream = createDeltaStreamFn(
			[{ type: "text_delta", delta: "still done" }],
			finalMessage,
		);
		const context: AgentContext = {
			systemPrompt: "You are a test agent.",
			messages: [
				{
					role: "user",
					content: [{ type: "text", text: "say done" }],
					timestamp: Date.now(),
				},
			],
			tools: [],
		};
		const config: AgentLoopConfig = {
			model: TEST_MODEL,
			convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
			toolExecution: "sequential",
			onStreamChunk: () => {
				throw new Error("prefetch failed");
			},
		};

		const events = await collectEvents(
			agentLoop(
				context.messages,
				context,
				config,
				undefined,
				mockStream as any,
			),
		);

		const agentEnd = events.find((event) => event.type === "agent_end");
		assert.ok(agentEnd);
		assert.equal(agentEnd.messages.at(-1)?.role, "assistant");
	});
});

/**
 * Regression tests for #2783: Stuck-loop on execute-task — tool-call schema
 * overload causes unbounded retry + budget burn.
 *
 * When the LLM repeatedly emits tool calls with arguments that fail schema
 * validation, the agent loop retries indefinitely. Each failed validation
 * returns an error tool result, the LLM retries with the same broken args,
 * and the cycle never breaks — burning budget with no progress.
 *
 * The fix caps consecutive validation failures per turn at
 * MAX_CONSECUTIVE_VALIDATION_FAILURES (default 3). Once the cap is hit, the
 * loop injects a synthetic stop so the agent terminates cleanly instead of
 * spinning forever.
 */

// ─── Helpers ──────────────────────────────────────────────────────────────────

const TEST_MODEL: Model<"anthropic-messages"> = {
	id: "claude-test",
	name: "Test Model",
	api: "anthropic-messages",
	provider: "anthropic",
	contextWindow: 200_000,
	maxOutput: 4096,
	supportsImages: false,
	supportsPromptCache: false,
	thinkingLevel: undefined,
};

function makeToolWithSchema(): AgentTool<any> {
	return {
		name: "write_file",
		label: "Write File",
		description: "Write content to a file",
		parameters: Type.Object({
			path: Type.String(),
			content: Type.String(),
		}),
		execute: async () => ({
			content: [{ type: "text" as const, text: "done" }],
			details: {},
		}),
	};
}

/**
 * Creates a mock streamFn that returns assistant messages from a queue.
 * Each call pops the next message. The messages simulate the LLM repeatedly
 * emitting the same tool call with broken arguments.
 */
function createMockStreamFn(responses: AssistantMessage[]) {
	let callIndex = 0;

	return function mockStreamFn(): AssistantMessageEventStream {
		const message = responses[callIndex] ?? responses[responses.length - 1];
		callIndex++;

		const stream = new AssistantMessageEventStream();
		// Simulate async delivery
		queueMicrotask(() => {
			stream.push({ type: "start", partial: message });
			stream.push({ type: "done", message });
			stream.end(message);
		});
		return stream;
	};
}

function createDeltaStreamFn(
	deltas: Array<{ type: "text_delta" | "thinking_delta"; delta: string }>,
	finalMessage: AssistantMessage,
) {
	return function mockStreamFn(): AssistantMessageEventStream {
		const stream = new AssistantMessageEventStream();
		queueMicrotask(() => {
			stream.push({ type: "start", partial: finalMessage });
			for (const delta of deltas) {
				stream.push({
					type: delta.type,
					contentIndex: 0,
					delta: delta.delta,
					partial: finalMessage,
				});
			}
			stream.push({ type: "done", message: finalMessage });
			stream.end(finalMessage);
		});
		return stream;
	};
}

function makeAssistantMessage(
	overrides: Partial<AssistantMessage> = {},
): AssistantMessage {
	return {
		role: "assistant",
		content: [],
		api: "anthropic-messages",
		provider: "anthropic",
		model: "claude-test",
		usage: {
			input: 100,
			output: 50,
			cacheRead: 0,
			cacheWrite: 0,
			totalTokens: 150,
			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
		},
		stopReason: "stop",
		timestamp: Date.now(),
		...overrides,
	};
}

function makeToolCallMessage(
	toolCallArgs: Record<string, unknown>,
): AssistantMessage {
	return makeAssistantMessage({
		content: [
			{
				type: "toolCall",
				id: `tc_${Date.now()}_${Math.random()}`,
				name: "write_file",
				arguments: toolCallArgs,
			},
		],
		stopReason: "toolUse",
	});
}

function collectEvents(
	stream: EventStream<AgentEvent, AgentMessage[]>,
): Promise<AgentEvent[]> {
	return new Promise((resolve) => {
		const events: AgentEvent[] = [];
		void (async () => {
			for await (const event of stream) {
				events.push(event);
			}
			resolve(events);
		})();
	});
}

// ─── Tests ────────────────────────────────────────────────────────────────────

describe("agent-loop — schema overload retry cap (#2783)", () => {
	it("terminates after MAX_CONSECUTIVE_VALIDATION_FAILURES consecutive schema failures", async () => {
		const tool = makeToolWithSchema();

		// LLM keeps sending tool calls with invalid args (missing required 'content' field)
		const badToolCall = makeToolCallMessage({ path: "/tmp/test" }); // missing 'content'
		const finalStop = makeAssistantMessage({
			content: [{ type: "text", text: "I give up." }],
			stopReason: "stop",
		});

		// Create enough bad responses to exceed the cap, plus a final stop
		const responses: AssistantMessage[] = [];
		for (let i = 0; i < MAX_CONSECUTIVE_VALIDATION_FAILURES + 5; i++) {
			responses.push(badToolCall);
		}
		responses.push(finalStop);

		const mockStream = createMockStreamFn(responses);

		const context: AgentContext = {
			systemPrompt: "You are a test agent.",
			messages: [
				{
					role: "user",
					content: [{ type: "text", text: "Write a file" }],
					timestamp: Date.now(),
				},
			],
			tools: [tool],
		};

		const config: AgentLoopConfig = {
			model: TEST_MODEL,
			convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
			toolExecution: "sequential",
		};

		const stream = agentLoop(
			[
				{
					role: "user",
					content: [{ type: "text", text: "Write a file" }],
					timestamp: Date.now(),
				},
			],
			context,
			config,
			undefined,
			mockStream as any,
		);

		const events = await collectEvents(stream);

		// Must have terminated (agent_end event present)
		const agentEnd = events.find((e) => e.type === "agent_end");
		assert.ok(
			agentEnd,
			"agent loop must emit agent_end after hitting retry cap",
		);

		// Count how many turns had validation errors (tool_execution_end with isError: true)
		const toolErrors = events.filter(
			(e) => e.type === "tool_execution_end" && e.isError === true,
		);

		// Must not exceed the cap
		assert.ok(
			toolErrors.length <= MAX_CONSECUTIVE_VALIDATION_FAILURES,
			`Expected at most ${MAX_CONSECUTIVE_VALIDATION_FAILURES} validation error tool results, got ${toolErrors.length}`,
		);
	});

	it("resets the failure counter when a tool call succeeds", async () => {
		const tool = makeToolWithSchema();

		// Pattern: 2 failures, 1 success, 2 failures, 1 success, then stop
		const badCall = makeToolCallMessage({ path: "/tmp/test" }); // missing 'content'
		const goodCall = makeToolCallMessage({
			path: "/tmp/test",
			content: "hello",
		});
		const finalStop = makeAssistantMessage({
			content: [{ type: "text", text: "Done." }],
			stopReason: "stop",
		});

		const responses = [
			badCall,
			badCall,
			goodCall,
			badCall,
			badCall,
			goodCall,
			finalStop,
		];
		const mockStream = createMockStreamFn(responses);

		const context: AgentContext = {
			systemPrompt: "You are a test agent.",
			messages: [
				{
					role: "user",
					content: [{ type: "text", text: "Write a file" }],
					timestamp: Date.now(),
				},
			],
			tools: [tool],
		};

		const config: AgentLoopConfig = {
			model: TEST_MODEL,
			convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
			toolExecution: "sequential",
		};

		const stream = agentLoop(
			[
				{
					role: "user",
					content: [{ type: "text", text: "Write a file" }],
					timestamp: Date.now(),
				},
			],
			context,
			config,
			undefined,
			mockStream as any,
		);

		const events = await collectEvents(stream);

		// Must complete successfully since failures never reached cap consecutively
		const agentEnd = events.find((e) => e.type === "agent_end");
		assert.ok(
			agentEnd,
			"agent loop must complete normally when failures are interspersed with successes",
		);

		// Should have processed all 6 tool-bearing turns
		const toolExecEnds = events.filter((e) => e.type === "tool_execution_end");
		assert.ok(
			toolExecEnds.length >= 4,
			`Expected at least 4 tool executions (2 bad + 1 good + 2 bad + 1 good), got ${toolExecEnds.length}`,
		);
	});

	it("exports MAX_CONSECUTIVE_VALIDATION_FAILURES as a configurable constant", () => {
		assert.equal(typeof MAX_CONSECUTIVE_VALIDATION_FAILURES, "number");
		assert.ok(
			MAX_CONSECUTIVE_VALIDATION_FAILURES >= 2,
			"Cap must be at least 2 to allow one retry",
		);
		assert.ok(
			MAX_CONSECUTIVE_VALIDATION_FAILURES <= 10,
			"Cap must not be unreasonably high",
		);
	});

	it("does NOT trip schema overload cap on tool execution errors like bash exit code 1 (#3618)", async () => {
		// Simulates the real scenario: a tool (bash) that passes validation but
		// throws during execution (e.g. rg/grep returning exit code 1 = no matches).
		// These are valid tool invocations — the schema was correct, the tool ran,
		// it just returned a non-zero exit code. The cap should only trigger for
		// preparation/schema failures, not execution failures.
		const bashTool: AgentTool<any> = {
			name: "bash",
			label: "Bash",
			description: "Run a bash command",
			parameters: Type.Object({
				command: Type.String(),
			}),
			execute: async () => {
				// Simulate bash tool rejecting on non-zero exit code
				throw new Error("(no output)\n\nCommand exited with code 1");
			},
		};

		// LLM sends valid tool calls (schema is correct) that fail at execution
		const validBashCall = makeAssistantMessage({
			content: [
				{
					type: "toolCall",
					id: `tc_bash_${Date.now()}_${Math.random()}`,
					name: "bash",
					arguments: { command: "rg -l 'nonexistent' src/" },
				},
			],
			stopReason: "toolUse",
		});
		const finalStop = makeAssistantMessage({
			content: [{ type: "text", text: "No references found." }],
			stopReason: "stop",
		});

		// Send more than MAX_CONSECUTIVE_VALIDATION_FAILURES bash calls that throw
		const responses: AssistantMessage[] = [];
		for (let i = 0; i < MAX_CONSECUTIVE_VALIDATION_FAILURES + 2; i++) {
			responses.push(validBashCall);
		}
		responses.push(finalStop);

		const mockStream = createMockStreamFn(responses);

		const context: AgentContext = {
			systemPrompt: "You are a test agent.",
			messages: [
				{
					role: "user",
					content: [{ type: "text", text: "Search for references" }],
					timestamp: Date.now(),
				},
			],
			tools: [bashTool],
		};

		const config: AgentLoopConfig = {
			model: TEST_MODEL,
			convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
			toolExecution: "sequential",
		};

		const stream = agentLoop(
			[
				{
					role: "user",
					content: [{ type: "text", text: "Search for references" }],
					timestamp: Date.now(),
				},
			],
			context,
			config,
			undefined,
			mockStream as any,
		);

		const events = await collectEvents(stream);

		// Must complete normally — execution errors should NOT trigger the cap
		const agentEnd = events.find((e) => e.type === "agent_end");
		assert.ok(agentEnd, "agent loop must emit agent_end");

		// Count tool execution errors
		const toolErrors = events.filter(
			(e) => e.type === "tool_execution_end" && e.isError === true,
		);

		// All bash calls should have been attempted (not capped early)
		assert.ok(
			toolErrors.length >= MAX_CONSECUTIVE_VALIDATION_FAILURES + 2,
			`Expected all ${MAX_CONSECUTIVE_VALIDATION_FAILURES + 2} bash execution errors to be processed (not capped), got ${toolErrors.length}`,
		);

		// The stop message should NOT contain the schema overload text
		const allMessages = (agentEnd as any).messages as AgentMessage[];
		const lastMessage = allMessages[allMessages.length - 1];
		const lastText =
			lastMessage.role === "assistant"
				? (lastMessage as AssistantMessage).content.find(
						(c) => c.type === "text",
					)
				: undefined;
		if (lastText && lastText.type === "text") {
			assert.ok(
				!lastText.text.includes(
					"consecutive turns with all tool calls failing",
				),
				"Final message must NOT contain schema overload stop text for execution-only errors",
			);
		}
	});
});