fix(tui,gsd): tool-call loop guard + TUI stack overflow prevention (#1801)
This commit is contained in:
parent
48c25dc853
commit
272963569d
7 changed files with 237 additions and 10 deletions
|
|
@ -121,7 +121,7 @@ export class Markdown implements Component {
|
|||
const token = tokens[i];
|
||||
const nextToken = tokens[i + 1];
|
||||
const tokenLines = this.renderToken(token, contentWidth, nextToken?.type);
|
||||
renderedLines.push(...tokenLines);
|
||||
for (let j = 0; j < tokenLines.length; j++) renderedLines.push(tokenLines[j]);
|
||||
}
|
||||
|
||||
// Wrap lines (NO padding, NO background yet)
|
||||
|
|
@ -308,7 +308,8 @@ export class Markdown implements Component {
|
|||
}
|
||||
|
||||
case "code": {
|
||||
lines.push(...this.renderCodeBlock(token.text, token.lang));
|
||||
const codeBlockLines = this.renderCodeBlock(token.text, token.lang);
|
||||
for (let j = 0; j < codeBlockLines.length; j++) lines.push(codeBlockLines[j]);
|
||||
if (nextTokenType !== "space") {
|
||||
lines.push(""); // Add spacing after code blocks (unless space token follows)
|
||||
}
|
||||
|
|
@ -317,7 +318,7 @@ export class Markdown implements Component {
|
|||
|
||||
case "list": {
|
||||
const listLines = this.renderList(token as any, 0, styleContext);
|
||||
lines.push(...listLines);
|
||||
for (let j = 0; j < listLines.length; j++) lines.push(listLines[j]);
|
||||
// Don't add spacing after lists if a space token follows
|
||||
// (the space token will handle it)
|
||||
break;
|
||||
|
|
@ -325,7 +326,7 @@ export class Markdown implements Component {
|
|||
|
||||
case "table": {
|
||||
const tableLines = this.renderTable(token as any, width, styleContext);
|
||||
lines.push(...tableLines);
|
||||
for (let j = 0; j < tableLines.length; j++) lines.push(tableLines[j]);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -561,7 +562,7 @@ export class Markdown implements Component {
|
|||
// Nested list - render with one additional indent level
|
||||
// These lines will have their own indent, so we just add them as-is
|
||||
const nestedLines = this.renderList(token as any, parentDepth + 1, styleContext);
|
||||
lines.push(...nestedLines);
|
||||
for (let j = 0; j < nestedLines.length; j++) lines.push(nestedLines[j]);
|
||||
} else if (token.type === "text") {
|
||||
// Text content (may have inline tokens)
|
||||
const text =
|
||||
|
|
@ -575,7 +576,8 @@ export class Markdown implements Component {
|
|||
lines.push(text);
|
||||
} else if (token.type === "code") {
|
||||
// Code block in list item
|
||||
lines.push(...this.renderCodeBlock(token.text, token.lang));
|
||||
const codeLines = this.renderCodeBlock(token.text, token.lang);
|
||||
for (let j = 0; j < codeLines.length; j++) lines.push(codeLines[j]);
|
||||
} else {
|
||||
// Other token types - try to render as inline
|
||||
const text = this.renderInlineTokens([token], styleContext);
|
||||
|
|
|
|||
|
|
@ -91,7 +91,8 @@ export class SettingsList implements Component {
|
|||
const lines: string[] = [];
|
||||
|
||||
if (this.searchEnabled && this.searchInput) {
|
||||
lines.push(...this.searchInput.render(width));
|
||||
const rendered = this.searchInput.render(width);
|
||||
for (let i = 0; i < rendered.length; i++) lines.push(rendered[i]);
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -191,7 +191,8 @@ export class Container implements Component {
|
|||
render(width: number): string[] {
|
||||
const lines: string[] = [];
|
||||
for (const child of this.children) {
|
||||
lines.push(...child.render(width));
|
||||
const rendered = child.render(width);
|
||||
for (let i = 0; i < rendered.length; i++) lines.push(rendered[i]);
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,7 +27,10 @@ export function markToolEnd(toolCallId: string): void {
|
|||
*/
|
||||
export function getOldestInFlightToolAgeMs(): number {
|
||||
if (inFlightTools.size === 0) return 0;
|
||||
const oldestStart = Math.min(...inFlightTools.values());
|
||||
let oldestStart = Infinity;
|
||||
for (const t of inFlightTools.values()) {
|
||||
if (t < oldestStart) oldestStart = t;
|
||||
}
|
||||
return Date.now() - oldestStart;
|
||||
}
|
||||
|
||||
|
|
@ -43,7 +46,11 @@ export function getInFlightToolCount(): number {
|
|||
*/
|
||||
export function getOldestInFlightToolStart(): number | undefined {
|
||||
if (inFlightTools.size === 0) return undefined;
|
||||
return Math.min(...inFlightTools.values());
|
||||
let oldest = Infinity;
|
||||
for (const t of inFlightTools.values()) {
|
||||
if (t < oldest) oldest = t;
|
||||
}
|
||||
return oldest;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import { loadFile, saveFile, formatContinue } from "../files.js";
|
|||
import { deriveState } from "../state.js";
|
||||
import { getAutoDashboardData, isAutoActive, isAutoPaused, markToolEnd, markToolStart } from "../auto.js";
|
||||
import { isParallelActive, shutdownParallel } from "../parallel-orchestrator.js";
|
||||
import { checkToolCallLoop, resetToolCallLoopGuard } from "./tool-call-loop-guard.js";
|
||||
import { saveActivityLog } from "../activity-log.js";
|
||||
|
||||
// Skip the welcome screen on the very first session_start — cli.ts already
|
||||
|
|
@ -22,6 +23,7 @@ let isFirstSession = true;
|
|||
export function registerHooks(pi: ExtensionAPI): void {
|
||||
pi.on("session_start", async (_event, ctx) => {
|
||||
resetWriteGateState();
|
||||
resetToolCallLoopGuard();
|
||||
if (isFirstSession) {
|
||||
isFirstSession = false;
|
||||
} else {
|
||||
|
|
@ -58,6 +60,7 @@ export function registerHooks(pi: ExtensionAPI): void {
|
|||
});
|
||||
|
||||
pi.on("agent_end", async (event, ctx: ExtensionContext) => {
|
||||
resetToolCallLoopGuard();
|
||||
await handleAgentEnd(pi, event, ctx);
|
||||
});
|
||||
|
||||
|
|
@ -113,6 +116,12 @@ export function registerHooks(pi: ExtensionAPI): void {
|
|||
});
|
||||
|
||||
pi.on("tool_call", async (event) => {
|
||||
// ── Loop guard: block repeated identical tool calls ──
|
||||
const loopCheck = checkToolCallLoop(event.toolName, event.input as Record<string, unknown>);
|
||||
if (loopCheck.block) {
|
||||
return { block: true, reason: loopCheck.reason };
|
||||
}
|
||||
|
||||
if (!isToolCallEventType("write", event)) return;
|
||||
const result = shouldBlockContextWrite(
|
||||
event.toolName,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,84 @@
|
|||
/**
|
||||
* Tool-call loop guard.
|
||||
*
|
||||
* Detects when a model calls the same tool with identical arguments
|
||||
* repeatedly within a single agent turn. Works in both auto-mode and
|
||||
* interactive sessions by hooking into the `tool_call` event, which
|
||||
* fires before execution and can block the call.
|
||||
*
|
||||
* The guard uses a sliding window: it tracks the last N tool signatures
|
||||
* and blocks when the same signature appears more than MAX_CONSECUTIVE
|
||||
* times in a row. Resets on each agent turn (session_start, agent_end)
|
||||
* and when a different tool call breaks the streak.
|
||||
*/
|
||||
|
||||
import { createHash } from "node:crypto";
|
||||
|
||||
const MAX_CONSECUTIVE_IDENTICAL_CALLS = 4;
|
||||
|
||||
let consecutiveCount = 0;
|
||||
let lastSignature = "";
|
||||
let enabled = true;
|
||||
|
||||
/** Hash tool name + args into a compact signature for comparison. */
|
||||
function hashToolCall(toolName: string, args: Record<string, unknown>): string {
|
||||
const h = createHash("sha256");
|
||||
h.update(toolName);
|
||||
// Sort keys for deterministic hashing regardless of object key order
|
||||
h.update(JSON.stringify(args, Object.keys(args).sort()));
|
||||
return h.digest("hex").slice(0, 16);
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a tool call and check if it should be blocked.
|
||||
*
|
||||
* Returns `{ block: false }` for allowed calls.
|
||||
* Returns `{ block: true, reason }` when the loop threshold is exceeded.
|
||||
*/
|
||||
export function checkToolCallLoop(
|
||||
toolName: string,
|
||||
args: Record<string, unknown>,
|
||||
): { block: boolean; reason?: string; count?: number } {
|
||||
if (!enabled) return { block: false, count: 0 };
|
||||
|
||||
const sig = hashToolCall(toolName, args);
|
||||
|
||||
if (sig === lastSignature) {
|
||||
consecutiveCount++;
|
||||
} else {
|
||||
consecutiveCount = 1;
|
||||
lastSignature = sig;
|
||||
}
|
||||
|
||||
if (consecutiveCount > MAX_CONSECUTIVE_IDENTICAL_CALLS) {
|
||||
return {
|
||||
block: true,
|
||||
reason:
|
||||
`Tool loop detected: ${toolName} called ${consecutiveCount} times ` +
|
||||
`with identical arguments. Blocking to prevent infinite loop. ` +
|
||||
`Try a different approach or modify your arguments.`,
|
||||
count: consecutiveCount,
|
||||
};
|
||||
}
|
||||
|
||||
return { block: false, count: consecutiveCount };
|
||||
}
|
||||
|
||||
/** Reset the guard state. Call at agent turn boundaries. */
|
||||
export function resetToolCallLoopGuard(): void {
|
||||
consecutiveCount = 0;
|
||||
lastSignature = "";
|
||||
enabled = true;
|
||||
}
|
||||
|
||||
/** Disable the guard (e.g. during shutdown). */
|
||||
export function disableToolCallLoopGuard(): void {
|
||||
enabled = false;
|
||||
consecutiveCount = 0;
|
||||
lastSignature = "";
|
||||
}
|
||||
|
||||
/** Get current consecutive count for diagnostics. */
|
||||
export function getToolCallLoopCount(): number {
|
||||
return consecutiveCount;
|
||||
}
|
||||
123
src/resources/extensions/gsd/tests/tool-call-loop-guard.test.ts
Normal file
123
src/resources/extensions/gsd/tests/tool-call-loop-guard.test.ts
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
// tool-call-loop-guard — Tests for the tool-call loop detection guard.
|
||||
//
|
||||
// Verifies that identical consecutive tool calls are detected and blocked
|
||||
// after exceeding the threshold, and that the guard resets properly.
|
||||
|
||||
import { createTestContext } from './test-helpers.ts';
|
||||
import {
|
||||
checkToolCallLoop,
|
||||
resetToolCallLoopGuard,
|
||||
disableToolCallLoopGuard,
|
||||
getToolCallLoopCount,
|
||||
} from '../bootstrap/tool-call-loop-guard.ts';
|
||||
|
||||
const { assertEq, assertTrue, report } = createTestContext();
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Allows first N calls, blocks after threshold
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
console.log('\n── Loop guard: blocks after threshold ──');
|
||||
|
||||
{
|
||||
resetToolCallLoopGuard();
|
||||
|
||||
// First 4 identical calls should be allowed (threshold is 4)
|
||||
for (let i = 1; i <= 4; i++) {
|
||||
const result = checkToolCallLoop('web_search', { query: 'same query' });
|
||||
assertTrue(result.block === false, `Call ${i} should be allowed`);
|
||||
assertEq(result.count, i, `Count should be ${i} after call ${i}`);
|
||||
}
|
||||
|
||||
// 5th identical call should be blocked
|
||||
const blocked = checkToolCallLoop('web_search', { query: 'same query' });
|
||||
assertTrue(blocked.block === true, '5th identical call should be blocked');
|
||||
assertTrue(blocked.reason!.includes('web_search'), 'Reason should mention tool name');
|
||||
assertTrue(blocked.reason!.includes('5'), 'Reason should mention count');
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Different tool calls reset the streak
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
console.log('\n── Loop guard: different calls reset streak ──');
|
||||
|
||||
{
|
||||
resetToolCallLoopGuard();
|
||||
|
||||
checkToolCallLoop('web_search', { query: 'query A' });
|
||||
checkToolCallLoop('web_search', { query: 'query A' });
|
||||
checkToolCallLoop('web_search', { query: 'query A' });
|
||||
assertEq(getToolCallLoopCount(), 3, 'Count should be 3 after 3 identical calls');
|
||||
|
||||
// A different call resets the streak
|
||||
const different = checkToolCallLoop('bash', { command: 'ls' });
|
||||
assertTrue(different.block === false, 'Different tool call should be allowed');
|
||||
assertEq(getToolCallLoopCount(), 1, 'Count should reset to 1 after different call');
|
||||
|
||||
// Same tool but different args also resets
|
||||
checkToolCallLoop('web_search', { query: 'query A' });
|
||||
checkToolCallLoop('web_search', { query: 'query B' }); // different args
|
||||
assertEq(getToolCallLoopCount(), 1, 'Different args should reset count');
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Reset clears the guard
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
console.log('\n── Loop guard: reset clears state ──');
|
||||
|
||||
{
|
||||
resetToolCallLoopGuard();
|
||||
checkToolCallLoop('web_search', { query: 'q' });
|
||||
checkToolCallLoop('web_search', { query: 'q' });
|
||||
checkToolCallLoop('web_search', { query: 'q' });
|
||||
assertEq(getToolCallLoopCount(), 3, 'Count should be 3 before reset');
|
||||
|
||||
resetToolCallLoopGuard();
|
||||
assertEq(getToolCallLoopCount(), 0, 'Count should be 0 after reset');
|
||||
|
||||
// After reset, the same call starts fresh
|
||||
const result = checkToolCallLoop('web_search', { query: 'q' });
|
||||
assertTrue(result.block === false, 'Call after reset should be allowed');
|
||||
assertEq(getToolCallLoopCount(), 1, 'Count should be 1 after first call post-reset');
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Disable makes guard permissive
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
console.log('\n── Loop guard: disable allows everything ──');
|
||||
|
||||
{
|
||||
disableToolCallLoopGuard();
|
||||
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const result = checkToolCallLoop('web_search', { query: 'same' });
|
||||
assertTrue(result.block === false, `Call ${i + 1} should be allowed when disabled`);
|
||||
}
|
||||
|
||||
// Re-enable via reset
|
||||
resetToolCallLoopGuard();
|
||||
checkToolCallLoop('web_search', { query: 'q' });
|
||||
assertEq(getToolCallLoopCount(), 1, 'Guard should be active again after reset');
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Arg order doesn't affect hash
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
console.log('\n── Loop guard: arg order is normalized ──');
|
||||
|
||||
{
|
||||
resetToolCallLoopGuard();
|
||||
|
||||
checkToolCallLoop('web_search', { query: 'test', limit: 5 });
|
||||
const result = checkToolCallLoop('web_search', { limit: 5, query: 'test' }); // same args, different order
|
||||
assertTrue(result.block === false, 'Same args in different order should count as consecutive');
|
||||
assertEq(getToolCallLoopCount(), 2, 'Should detect as same call regardless of key order');
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
report();
|
||||
Loading…
Add table
Reference in a new issue