singularity-forge/packages/agent-core/src/agent-loop.test.ts
Mikael Hugo 6725a55591 feat(web): add error boundaries, expand test coverage, add README
- Add class-based ErrorBoundary component wrapping all 7 main views
  inside WorkspaceChrome; fallback shows view name, error, reload button
- Add 30 new unit tests (boot null-project path × 9, onboarding
  pure-function logic × 21); all 43 web/lib tests pass
- Add web/README.md: architecture, auth flow, 7 views, dev setup,
  API route pattern, test instructions

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-10 11:24:40 +02:00

1014 lines
26 KiB
TypeScript

// agent-loop tests
// Covers: pauseTurn handling (#2869), schema overload retry cap (#2783)
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { dirname, join } from "node:path";
import { fileURLToPath } from "node:url";
import { Type } from "@sinclair/typebox";
import type { AssistantMessage, Model } from "@singularity-forge/ai";
import {
AssistantMessageEventStream,
type EventStream,
} from "@singularity-forge/ai";
import { describe, it } from "vitest";
import {
agentLoop,
MAX_CONSECUTIVE_VALIDATION_FAILURES,
} from "./agent-loop.js";
import type {
AgentContext,
AgentEvent,
AgentLoopConfig,
AgentMessage,
AgentTool,
} from "./types.js";
const __dirname = dirname(fileURLToPath(import.meta.url));
describe("agent-loop — pauseTurn handling (#2869)", () => {
it("sets hasMoreToolCalls when stopReason is pauseTurn", () => {
const source = readFileSync(join(__dirname, "agent-loop.ts"), "utf-8");
// The agent loop must treat pauseTurn as a reason to continue the inner
// loop, just like toolUse. This prevents incomplete server_tool_use blocks
// from being saved to history, which would cause a 400 on the next request.
assert.match(
source,
/pauseTurn/,
"agent-loop.ts must handle the pauseTurn stop reason",
);
// Verify it sets hasMoreToolCalls = true for pauseTurn
assert.match(
source,
/stopReason\s*===?\s*["']pauseTurn["']/,
'agent-loop.ts must check for stopReason === "pauseTurn"',
);
});
it("pauseTurn is in the StopReason union type", () => {
// Read the ai types to ensure pauseTurn is a valid StopReason
const typesPath = join(__dirname, "..", "..", "ai", "src", "types.ts");
const typesSource = readFileSync(typesPath, "utf-8");
assert.match(
typesSource,
/["']pauseTurn["']/,
'StopReason type must include "pauseTurn"',
);
});
it("uses provider-supplied external tool results instead of the placeholder", async () => {
const externalMessage = makeAssistantMessage({
content: [
{
type: "toolCall",
id: "tc-external-1",
name: "bash",
arguments: { command: "echo hi" },
externalResult: {
content: [{ type: "text", text: "hi\n" }],
details: { source: "claude-code" },
isError: false,
},
} as any,
],
stopReason: "toolUse",
provider: "claude-code",
});
const mockStream = createMockStreamFn([externalMessage]);
const context: AgentContext = {
systemPrompt: "You are a test agent.",
messages: [
{
role: "user",
content: [{ type: "text", text: "Run the command" }],
timestamp: Date.now(),
},
],
tools: [],
};
const config: AgentLoopConfig = {
model: { ...TEST_MODEL, provider: "claude-code" },
convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
toolExecution: "sequential",
externalToolExecution: true,
};
const stream = agentLoop(
[
{
role: "user",
content: [{ type: "text", text: "Run the command" }],
timestamp: Date.now(),
},
],
context,
config,
undefined,
mockStream as any,
);
const events = await collectEvents(stream);
const toolEnd = events.find(
(event): event is Extract<AgentEvent, { type: "tool_execution_end" }> =>
event.type === "tool_execution_end",
);
assert.ok(toolEnd, "expected tool_execution_end event");
assert.deepEqual(toolEnd.result.content, [{ type: "text", text: "hi\n" }]);
assert.deepEqual(toolEnd.result.details, { source: "claude-code" });
assert.equal(toolEnd.isError, false);
});
it("uses a neutral provider-executed fallback when no external result is attached", async () => {
const externalMessage = makeAssistantMessage({
content: [
{
type: "toolCall",
id: "tc-external-fallback",
name: "read",
arguments: { filePath: ".sf/BACKLOG.md" },
},
],
stopReason: "toolUse",
provider: "claude-code",
});
const mockStream = createMockStreamFn([externalMessage]);
const context: AgentContext = {
systemPrompt: "You are a test agent.",
messages: [
{
role: "user",
content: [{ type: "text", text: "Read backlog" }],
timestamp: Date.now(),
},
],
tools: [],
};
const config: AgentLoopConfig = {
model: { ...TEST_MODEL, provider: "claude-code" },
convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
toolExecution: "sequential",
externalToolExecution: true,
};
const stream = agentLoop(
[
{
role: "user",
content: [{ type: "text", text: "Read backlog" }],
timestamp: Date.now(),
},
],
context,
config,
undefined,
mockStream as any,
);
const events = await collectEvents(stream);
const toolEnd = events.find(
(event): event is Extract<AgentEvent, { type: "tool_execution_end" }> =>
event.type === "tool_execution_end",
);
assert.ok(toolEnd, "expected tool_execution_end event");
assert.deepEqual(toolEnd.result.content, [
{ type: "text", text: "(executed by provider)" },
]);
assert.equal(
JSON.stringify(toolEnd.result.content).includes("Claude Code"),
false,
);
});
});
describe("agent-loop — steering during tool batches", () => {
it("does not interrupt the current tool batch for custom system steering", async () => {
const calls: string[] = [];
const tool = {
name: "record",
label: "Record",
description: "Record a value",
parameters: Type.Object({ value: Type.String() }),
execute: async (_id: string, args: { value: string }) => {
calls.push(args.value);
return {
content: [{ type: "text" as const, text: `recorded ${args.value}` }],
details: {},
};
},
} satisfies AgentTool<{ value: string }>;
const first = makeAssistantMessage({
content: [
{
type: "toolCall",
id: "tc-1",
name: "record",
arguments: { value: "one" },
},
{
type: "toolCall",
id: "tc-2",
name: "record",
arguments: { value: "two" },
},
],
stopReason: "toolUse",
});
const second = makeAssistantMessage({
content: [{ type: "text", text: "saw system steering" }],
stopReason: "stop",
});
const mockStream = createMockStreamFn([first, second]);
let steeringPolls = 0;
const steering: AgentMessage = {
role: "custom",
customType: "sf-memory-sleeper",
content: "system notice",
display: false,
timestamp: Date.now(),
} as AgentMessage;
const context: AgentContext = {
systemPrompt: "You are a test agent.",
messages: [
{
role: "user",
content: [{ type: "text", text: "record values" }],
timestamp: Date.now(),
},
],
tools: [tool],
};
const config: AgentLoopConfig = {
model: TEST_MODEL,
convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
toolExecution: "sequential",
getSteeringMessages: async () => {
steeringPolls += 1;
return steeringPolls === 2 ? [steering] : [];
},
};
const stream = agentLoop(
[
{
role: "user",
content: [{ type: "text", text: "record values" }],
timestamp: Date.now(),
},
],
context,
config,
undefined,
mockStream as any,
);
const events = await collectEvents(stream);
const skipped = events.filter(
(event) =>
event.type === "tool_execution_end" &&
JSON.stringify(event.result.content).includes(
"Skipped due to queued user message",
),
);
assert.deepEqual(calls, ["one", "two"]);
assert.equal(skipped.length, 0);
assert.ok(
events.some(
(event) => event.type === "message_start" && event.message === steering,
),
"system steering should still be delivered after the tool batch",
);
});
it("defers queued user steering until after the current tool batch by default", async () => {
const calls: string[] = [];
const tool = {
name: "record",
label: "Record",
description: "Record a value",
parameters: Type.Object({ value: Type.String() }),
execute: async (_id: string, args: { value: string }) => {
calls.push(args.value);
return {
content: [{ type: "text" as const, text: `recorded ${args.value}` }],
details: {},
};
},
} satisfies AgentTool<{ value: string }>;
const first = makeAssistantMessage({
content: [
{
type: "toolCall",
id: "tc-1",
name: "record",
arguments: { value: "one" },
},
{
type: "toolCall",
id: "tc-2",
name: "record",
arguments: { value: "two" },
},
],
stopReason: "toolUse",
});
const second = makeAssistantMessage({
content: [{ type: "text", text: "saw steering" }],
stopReason: "stop",
});
const mockStream = createMockStreamFn([first, second]);
let steeringPolls = 0;
const steering: AgentMessage = {
role: "user",
content: [{ type: "text", text: "do not interrupt" }],
timestamp: Date.now(),
};
const context: AgentContext = {
systemPrompt: "You are a test agent.",
messages: [
{
role: "user",
content: [{ type: "text", text: "record values" }],
timestamp: Date.now(),
},
],
tools: [tool],
};
const config: AgentLoopConfig = {
model: TEST_MODEL,
convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
toolExecution: "sequential",
getSteeringMessages: async () => {
steeringPolls += 1;
return steeringPolls === 1 ? [steering] : [];
},
};
const stream = agentLoop(
[
{
role: "user",
content: [{ type: "text", text: "record values" }],
timestamp: Date.now(),
},
],
context,
config,
undefined,
mockStream as any,
);
const events = await collectEvents(stream);
const skipped = events.filter(
(event) =>
event.type === "tool_execution_end" &&
JSON.stringify(event.result.content).includes(
"Skipped due to queued user message",
),
);
assert.deepEqual(calls, ["one", "two"]);
assert.equal(skipped.length, 0);
assert.ok(
events.some(
(event) => event.type === "message_start" && event.message === steering,
),
"queued steering should still be delivered after the tool batch",
);
});
it("skips remaining tool calls only when steering interruption is explicit", async () => {
const calls: string[] = [];
const tool = {
name: "record",
label: "Record",
description: "Record a value",
parameters: Type.Object({ value: Type.String() }),
execute: async (_id: string, args: { value: string }) => {
calls.push(args.value);
return {
content: [{ type: "text" as const, text: `recorded ${args.value}` }],
details: {},
};
},
} satisfies AgentTool<{ value: string }>;
const first = makeAssistantMessage({
content: [
{
type: "toolCall",
id: "tc-1",
name: "record",
arguments: { value: "one" },
},
{
type: "toolCall",
id: "tc-2",
name: "record",
arguments: { value: "two" },
},
],
stopReason: "toolUse",
});
const second = makeAssistantMessage({
content: [{ type: "text", text: "saw steering" }],
stopReason: "stop",
});
const mockStream = createMockStreamFn([first, second]);
let steeringPolls = 0;
const steering: AgentMessage = {
role: "user",
content: [{ type: "text", text: "stop and listen" }],
timestamp: Date.now(),
};
const context: AgentContext = {
systemPrompt: "You are a test agent.",
messages: [
{
role: "user",
content: [{ type: "text", text: "record values" }],
timestamp: Date.now(),
},
],
tools: [tool],
};
const config: AgentLoopConfig = {
model: TEST_MODEL,
convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
toolExecution: "sequential",
interruptToolExecutionOnSteering: true,
getSteeringMessages: async () => {
steeringPolls += 1;
return steeringPolls === 2 ? [steering] : [];
},
};
const stream = agentLoop(
[
{
role: "user",
content: [{ type: "text", text: "record values" }],
timestamp: Date.now(),
},
],
context,
config,
undefined,
mockStream as any,
);
const events = await collectEvents(stream);
const skipped = events.filter(
(event) =>
event.type === "tool_execution_end" &&
JSON.stringify(event.result.content).includes(
"Skipped due to queued user message",
),
);
assert.deepEqual(calls, ["one"]);
assert.equal(skipped.length, 1);
assert.ok(
events.some(
(event) => event.type === "message_start" && event.message === steering,
),
"explicit interrupt steering should still be delivered",
);
});
});
describe("agent-loop — predictive stream hook", () => {
it("receives text and thinking deltas without changing the final response", async () => {
const finalMessage = makeAssistantMessage({
content: [{ type: "text", text: "hello" }],
stopReason: "stop",
});
const mockStream = createDeltaStreamFn(
[
{ type: "thinking_delta", delta: "think" },
{ type: "text_delta", delta: "hello" },
],
finalMessage,
);
const chunks: string[] = [];
const context: AgentContext = {
systemPrompt: "You are a test agent.",
messages: [
{
role: "user",
content: [{ type: "text", text: "say hello" }],
timestamp: Date.now(),
},
],
tools: [],
};
const config: AgentLoopConfig = {
model: TEST_MODEL,
convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
toolExecution: "sequential",
onStreamChunk: (chunk) => {
chunks.push(chunk);
},
};
const events = await collectEvents(
agentLoop(
context.messages,
context,
config,
undefined,
mockStream as any,
),
);
assert.deepEqual(chunks, ["think", "hello"]);
assert.ok(
events.some(
(event) =>
event.type === "agent_end" &&
event.messages.at(-1)?.role === "assistant",
),
);
});
it("ignores predictive hook failures so streaming can finish", async () => {
const finalMessage = makeAssistantMessage({
content: [{ type: "text", text: "still done" }],
stopReason: "stop",
});
const mockStream = createDeltaStreamFn(
[{ type: "text_delta", delta: "still done" }],
finalMessage,
);
const context: AgentContext = {
systemPrompt: "You are a test agent.",
messages: [
{
role: "user",
content: [{ type: "text", text: "say done" }],
timestamp: Date.now(),
},
],
tools: [],
};
const config: AgentLoopConfig = {
model: TEST_MODEL,
convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
toolExecution: "sequential",
onStreamChunk: () => {
throw new Error("prefetch failed");
},
};
const events = await collectEvents(
agentLoop(
context.messages,
context,
config,
undefined,
mockStream as any,
),
);
const agentEnd = events.find((event) => event.type === "agent_end");
assert.ok(agentEnd);
assert.equal(agentEnd.messages.at(-1)?.role, "assistant");
});
});
/**
* Regression tests for #2783: Stuck-loop on execute-task — tool-call schema
* overload causes unbounded retry + budget burn.
*
* When the LLM repeatedly emits tool calls with arguments that fail schema
* validation, the agent loop retries indefinitely. Each failed validation
* returns an error tool result, the LLM retries with the same broken args,
* and the cycle never breaks — burning budget with no progress.
*
* The fix caps consecutive validation failures per turn at
* MAX_CONSECUTIVE_VALIDATION_FAILURES (default 3). Once the cap is hit, the
* loop injects a synthetic stop so the agent terminates cleanly instead of
* spinning forever.
*/
// ─── Helpers ──────────────────────────────────────────────────────────────────
const TEST_MODEL: Model<"anthropic-messages"> = {
id: "claude-test",
name: "Test Model",
api: "anthropic-messages",
provider: "anthropic",
contextWindow: 200_000,
maxOutput: 4096,
supportsImages: false,
supportsPromptCache: false,
thinkingLevel: undefined,
};
function makeToolWithSchema(): AgentTool<any> {
return {
name: "write_file",
label: "Write File",
description: "Write content to a file",
parameters: Type.Object({
path: Type.String(),
content: Type.String(),
}),
execute: async () => ({
content: [{ type: "text" as const, text: "done" }],
details: {},
}),
};
}
/**
* Creates a mock streamFn that returns assistant messages from a queue.
* Each call pops the next message. The messages simulate the LLM repeatedly
* emitting the same tool call with broken arguments.
*/
function createMockStreamFn(responses: AssistantMessage[]) {
let callIndex = 0;
return function mockStreamFn(): AssistantMessageEventStream {
const message = responses[callIndex] ?? responses[responses.length - 1];
callIndex++;
const stream = new AssistantMessageEventStream();
// Simulate async delivery
queueMicrotask(() => {
stream.push({ type: "start", partial: message });
stream.push({ type: "done", message });
stream.end(message);
});
return stream;
};
}
function createDeltaStreamFn(
deltas: Array<{ type: "text_delta" | "thinking_delta"; delta: string }>,
finalMessage: AssistantMessage,
) {
return function mockStreamFn(): AssistantMessageEventStream {
const stream = new AssistantMessageEventStream();
queueMicrotask(() => {
stream.push({ type: "start", partial: finalMessage });
for (const delta of deltas) {
stream.push({
type: delta.type,
contentIndex: 0,
delta: delta.delta,
partial: finalMessage,
});
}
stream.push({ type: "done", message: finalMessage });
stream.end(finalMessage);
});
return stream;
};
}
function makeAssistantMessage(
overrides: Partial<AssistantMessage> = {},
): AssistantMessage {
return {
role: "assistant",
content: [],
api: "anthropic-messages",
provider: "anthropic",
model: "claude-test",
usage: {
input: 100,
output: 50,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 150,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
timestamp: Date.now(),
...overrides,
};
}
function makeToolCallMessage(
toolCallArgs: Record<string, unknown>,
): AssistantMessage {
return makeAssistantMessage({
content: [
{
type: "toolCall",
id: `tc_${Date.now()}_${Math.random()}`,
name: "write_file",
arguments: toolCallArgs,
},
],
stopReason: "toolUse",
});
}
function collectEvents(
stream: EventStream<AgentEvent, AgentMessage[]>,
): Promise<AgentEvent[]> {
return new Promise((resolve) => {
const events: AgentEvent[] = [];
void (async () => {
for await (const event of stream) {
events.push(event);
}
resolve(events);
})();
});
}
// ─── Tests ────────────────────────────────────────────────────────────────────
describe("agent-loop — schema overload retry cap (#2783)", () => {
it("terminates after MAX_CONSECUTIVE_VALIDATION_FAILURES consecutive schema failures", async () => {
const tool = makeToolWithSchema();
// LLM keeps sending tool calls with invalid args (missing required 'content' field)
const badToolCall = makeToolCallMessage({ path: "/tmp/test" }); // missing 'content'
const finalStop = makeAssistantMessage({
content: [{ type: "text", text: "I give up." }],
stopReason: "stop",
});
// Create enough bad responses to exceed the cap, plus a final stop
const responses: AssistantMessage[] = [];
for (let i = 0; i < MAX_CONSECUTIVE_VALIDATION_FAILURES + 5; i++) {
responses.push(badToolCall);
}
responses.push(finalStop);
const mockStream = createMockStreamFn(responses);
const context: AgentContext = {
systemPrompt: "You are a test agent.",
messages: [
{
role: "user",
content: [{ type: "text", text: "Write a file" }],
timestamp: Date.now(),
},
],
tools: [tool],
};
const config: AgentLoopConfig = {
model: TEST_MODEL,
convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
toolExecution: "sequential",
};
const stream = agentLoop(
[
{
role: "user",
content: [{ type: "text", text: "Write a file" }],
timestamp: Date.now(),
},
],
context,
config,
undefined,
mockStream as any,
);
const events = await collectEvents(stream);
// Must have terminated (agent_end event present)
const agentEnd = events.find((e) => e.type === "agent_end");
assert.ok(
agentEnd,
"agent loop must emit agent_end after hitting retry cap",
);
// Count how many turns had validation errors (tool_execution_end with isError: true)
const toolErrors = events.filter(
(e) => e.type === "tool_execution_end" && e.isError === true,
);
// Must not exceed the cap
assert.ok(
toolErrors.length <= MAX_CONSECUTIVE_VALIDATION_FAILURES,
`Expected at most ${MAX_CONSECUTIVE_VALIDATION_FAILURES} validation error tool results, got ${toolErrors.length}`,
);
});
it("resets the failure counter when a tool call succeeds", async () => {
const tool = makeToolWithSchema();
// Pattern: 2 failures, 1 success, 2 failures, 1 success, then stop
const badCall = makeToolCallMessage({ path: "/tmp/test" }); // missing 'content'
const goodCall = makeToolCallMessage({
path: "/tmp/test",
content: "hello",
});
const finalStop = makeAssistantMessage({
content: [{ type: "text", text: "Done." }],
stopReason: "stop",
});
const responses = [
badCall,
badCall,
goodCall,
badCall,
badCall,
goodCall,
finalStop,
];
const mockStream = createMockStreamFn(responses);
const context: AgentContext = {
systemPrompt: "You are a test agent.",
messages: [
{
role: "user",
content: [{ type: "text", text: "Write a file" }],
timestamp: Date.now(),
},
],
tools: [tool],
};
const config: AgentLoopConfig = {
model: TEST_MODEL,
convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
toolExecution: "sequential",
};
const stream = agentLoop(
[
{
role: "user",
content: [{ type: "text", text: "Write a file" }],
timestamp: Date.now(),
},
],
context,
config,
undefined,
mockStream as any,
);
const events = await collectEvents(stream);
// Must complete successfully since failures never reached cap consecutively
const agentEnd = events.find((e) => e.type === "agent_end");
assert.ok(
agentEnd,
"agent loop must complete normally when failures are interspersed with successes",
);
// Should have processed all 6 tool-bearing turns
const toolExecEnds = events.filter((e) => e.type === "tool_execution_end");
assert.ok(
toolExecEnds.length >= 4,
`Expected at least 4 tool executions (2 bad + 1 good + 2 bad + 1 good), got ${toolExecEnds.length}`,
);
});
it("exports MAX_CONSECUTIVE_VALIDATION_FAILURES as a configurable constant", () => {
assert.equal(typeof MAX_CONSECUTIVE_VALIDATION_FAILURES, "number");
assert.ok(
MAX_CONSECUTIVE_VALIDATION_FAILURES >= 2,
"Cap must be at least 2 to allow one retry",
);
assert.ok(
MAX_CONSECUTIVE_VALIDATION_FAILURES <= 10,
"Cap must not be unreasonably high",
);
});
it("does NOT trip schema overload cap on tool execution errors like bash exit code 1 (#3618)", async () => {
// Simulates the real scenario: a tool (bash) that passes validation but
// throws during execution (e.g. rg/grep returning exit code 1 = no matches).
// These are valid tool invocations — the schema was correct, the tool ran,
// it just returned a non-zero exit code. The cap should only trigger for
// preparation/schema failures, not execution failures.
const bashTool: AgentTool<any> = {
name: "bash",
label: "Bash",
description: "Run a bash command",
parameters: Type.Object({
command: Type.String(),
}),
execute: async () => {
// Simulate bash tool rejecting on non-zero exit code
throw new Error("(no output)\n\nCommand exited with code 1");
},
};
// LLM sends valid tool calls (schema is correct) that fail at execution
const validBashCall = makeAssistantMessage({
content: [
{
type: "toolCall",
id: `tc_bash_${Date.now()}_${Math.random()}`,
name: "bash",
arguments: { command: "rg -l 'nonexistent' src/" },
},
],
stopReason: "toolUse",
});
const finalStop = makeAssistantMessage({
content: [{ type: "text", text: "No references found." }],
stopReason: "stop",
});
// Send more than MAX_CONSECUTIVE_VALIDATION_FAILURES bash calls that throw
const responses: AssistantMessage[] = [];
for (let i = 0; i < MAX_CONSECUTIVE_VALIDATION_FAILURES + 2; i++) {
responses.push(validBashCall);
}
responses.push(finalStop);
const mockStream = createMockStreamFn(responses);
const context: AgentContext = {
systemPrompt: "You are a test agent.",
messages: [
{
role: "user",
content: [{ type: "text", text: "Search for references" }],
timestamp: Date.now(),
},
],
tools: [bashTool],
};
const config: AgentLoopConfig = {
model: TEST_MODEL,
convertToLlm: (msgs) => msgs.filter((m): m is any => m.role !== "custom"),
toolExecution: "sequential",
};
const stream = agentLoop(
[
{
role: "user",
content: [{ type: "text", text: "Search for references" }],
timestamp: Date.now(),
},
],
context,
config,
undefined,
mockStream as any,
);
const events = await collectEvents(stream);
// Must complete normally — execution errors should NOT trigger the cap
const agentEnd = events.find((e) => e.type === "agent_end");
assert.ok(agentEnd, "agent loop must emit agent_end");
// Count tool execution errors
const toolErrors = events.filter(
(e) => e.type === "tool_execution_end" && e.isError === true,
);
// All bash calls should have been attempted (not capped early)
assert.ok(
toolErrors.length >= MAX_CONSECUTIVE_VALIDATION_FAILURES + 2,
`Expected all ${MAX_CONSECUTIVE_VALIDATION_FAILURES + 2} bash execution errors to be processed (not capped), got ${toolErrors.length}`,
);
// The stop message should NOT contain the schema overload text
const allMessages = (agentEnd as any).messages as AgentMessage[];
const lastMessage = allMessages[allMessages.length - 1];
const lastText =
lastMessage.role === "assistant"
? (lastMessage as AssistantMessage).content.find(
(c) => c.type === "text",
)
: undefined;
if (lastText && lastText.type === "text") {
assert.ok(
!lastText.text.includes(
"consecutive turns with all tool calls failing",
),
"Final message must NOT contain schema overload stop text for execution-only errors",
);
}
});
});