fix: exempt interactive tools from idle watchdog stall detection (#2676)
The idle watchdog treated ask_user_questions and secure_env_collect as stalled tools, killing sessions before users could respond. Root cause: tool tracking stored only toolCallId → timestamp with no tool name, so the watchdog couldn't distinguish user-interactive tools from hung tools. Changes: - auto-tool-tracking: store toolName alongside timestamp, add INTERACTIVE_TOOLS set and hasInteractiveToolInFlight() - auto.ts: forward optional toolName through markToolStart wrapper - register-hooks: pass event.toolName to markToolStart - auto-timers: skip stall detection when interactive tool is in-flight, record lastProgressKind: 'interactive-tool-waiting' - New test: 13 cases covering interactive exemption, completion cleanup, backwards compat, and existing behavior preservation
This commit is contained in:
parent
8d77c40638
commit
f40418be8d
5 changed files with 163 additions and 9 deletions
|
|
@ -16,6 +16,7 @@ import {
|
|||
getInFlightToolCount,
|
||||
getOldestInFlightToolStart,
|
||||
clearInFlightTools,
|
||||
hasInteractiveToolInFlight,
|
||||
} from "./auto-tool-tracking.js";
|
||||
import { detectWorkingTreeActivity } from "./auto-supervisor.js";
|
||||
import { closeoutUnit, type CloseoutOptions } from "./auto-unit-closeout.js";
|
||||
|
|
@ -149,6 +150,15 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
|
|||
// But only suppress recovery if the tool started recently.
|
||||
let stalledToolDetected = false;
|
||||
if (getInFlightToolCount() > 0) {
|
||||
// User-interactive tools (ask_user_questions, secure_env_collect) block
|
||||
// waiting for human input by design — never treat them as stalled (#2676).
|
||||
if (hasInteractiveToolInFlight()) {
|
||||
writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, {
|
||||
lastProgressAt: Date.now(),
|
||||
lastProgressKind: "interactive-tool-waiting",
|
||||
});
|
||||
return;
|
||||
}
|
||||
const oldestStart = getOldestInFlightToolStart()!;
|
||||
const toolAgeMs = Date.now() - oldestStart;
|
||||
if (toolAgeMs < idleTimeoutMs) {
|
||||
|
|
|
|||
|
|
@ -4,15 +4,27 @@
|
|||
* can distinguish "waiting for tool completion" from "truly idle".
|
||||
*/
|
||||
|
||||
const inFlightTools = new Map<string, number>();
|
||||
interface InFlightTool {
|
||||
startedAt: number;
|
||||
toolName: string;
|
||||
}
|
||||
|
||||
const inFlightTools = new Map<string, InFlightTool>();
|
||||
|
||||
/**
|
||||
* Tools that block waiting for human input by design.
|
||||
* The idle watchdog must not treat these as stalled.
|
||||
*/
|
||||
const INTERACTIVE_TOOLS = new Set(["ask_user_questions", "secure_env_collect"]);
|
||||
|
||||
/**
|
||||
* Mark a tool execution as in-flight.
|
||||
* Records start time so the idle watchdog can detect tools hung longer than the idle timeout.
|
||||
* Records start time and tool name so the idle watchdog can detect tools
|
||||
* hung longer than the idle timeout while exempting interactive tools.
|
||||
*/
|
||||
export function markToolStart(toolCallId: string, isActive: boolean): void {
|
||||
export function markToolStart(toolCallId: string, isActive: boolean, toolName?: string): void {
|
||||
if (!isActive) return;
|
||||
inFlightTools.set(toolCallId, Date.now());
|
||||
inFlightTools.set(toolCallId, { startedAt: Date.now(), toolName: toolName ?? "unknown" });
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -29,7 +41,7 @@ export function getOldestInFlightToolAgeMs(): number {
|
|||
if (inFlightTools.size === 0) return 0;
|
||||
let oldestStart = Infinity;
|
||||
for (const t of inFlightTools.values()) {
|
||||
if (t < oldestStart) oldestStart = t;
|
||||
if (t.startedAt < oldestStart) oldestStart = t.startedAt;
|
||||
}
|
||||
return Date.now() - oldestStart;
|
||||
}
|
||||
|
|
@ -48,11 +60,23 @@ export function getOldestInFlightToolStart(): number | undefined {
|
|||
if (inFlightTools.size === 0) return undefined;
|
||||
let oldest = Infinity;
|
||||
for (const t of inFlightTools.values()) {
|
||||
if (t < oldest) oldest = t;
|
||||
if (t.startedAt < oldest) oldest = t.startedAt;
|
||||
}
|
||||
return oldest;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if any currently in-flight tool is a user-interactive tool
|
||||
* (e.g. ask_user_questions, secure_env_collect) that blocks waiting for
|
||||
* human input. These must be exempt from idle stall detection.
|
||||
*/
|
||||
export function hasInteractiveToolInFlight(): boolean {
|
||||
for (const { toolName } of inFlightTools.values()) {
|
||||
if (INTERACTIVE_TOOLS.has(toolName)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all in-flight tool tracking state.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -73,6 +73,7 @@ import {
|
|||
getOldestInFlightToolAgeMs as _getOldestInFlightToolAgeMs,
|
||||
getInFlightToolCount,
|
||||
getOldestInFlightToolStart,
|
||||
hasInteractiveToolInFlight,
|
||||
clearInFlightTools,
|
||||
} from "./auto-tool-tracking.js";
|
||||
import { closeoutUnit } from "./auto-unit-closeout.js";
|
||||
|
|
@ -375,8 +376,8 @@ export function getAutoModeStartModel(): {
|
|||
}
|
||||
|
||||
// Tool tracking — delegates to auto-tool-tracking.ts
|
||||
export function markToolStart(toolCallId: string): void {
|
||||
_markToolStart(toolCallId, s.active);
|
||||
export function markToolStart(toolCallId: string, toolName?: string): void {
|
||||
_markToolStart(toolCallId, s.active, toolName);
|
||||
}
|
||||
|
||||
export function markToolEnd(toolCallId: string): void {
|
||||
|
|
|
|||
|
|
@ -245,7 +245,7 @@ export function registerHooks(pi: ExtensionAPI): void {
|
|||
|
||||
pi.on("tool_execution_start", async (event) => {
|
||||
if (!isAutoActive()) return;
|
||||
markToolStart(event.toolCallId);
|
||||
markToolStart(event.toolCallId, event.toolName);
|
||||
});
|
||||
|
||||
pi.on("tool_execution_end", async (event) => {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,119 @@
|
|||
/**
|
||||
* Tests for #2676: idle watchdog must exempt user-interactive tools
|
||||
* (ask_user_questions, secure_env_collect) from stall detection.
|
||||
*/
|
||||
import { describe, test, beforeEach } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import {
|
||||
markToolStart,
|
||||
markToolEnd,
|
||||
hasInteractiveToolInFlight,
|
||||
getInFlightToolCount,
|
||||
getOldestInFlightToolStart,
|
||||
getOldestInFlightToolAgeMs,
|
||||
clearInFlightTools,
|
||||
} from "../auto-tool-tracking.ts";
|
||||
|
||||
// These tests call the tracking module directly (bypassing the auto.ts
|
||||
// wrapper which guards on s.active) so we always pass isActive=true.
|
||||
|
||||
beforeEach(() => {
|
||||
clearInFlightTools();
|
||||
});
|
||||
|
||||
describe("hasInteractiveToolInFlight", () => {
|
||||
test("returns false when no tools are in-flight", () => {
|
||||
assert.equal(hasInteractiveToolInFlight(), false);
|
||||
});
|
||||
|
||||
test("returns false when only non-interactive tools are in-flight", () => {
|
||||
markToolStart("call-1", true, "bash");
|
||||
markToolStart("call-2", true, "read");
|
||||
assert.equal(hasInteractiveToolInFlight(), false);
|
||||
});
|
||||
|
||||
test("returns true when ask_user_questions is in-flight", () => {
|
||||
markToolStart("call-1", true, "bash");
|
||||
markToolStart("call-2", true, "ask_user_questions");
|
||||
assert.equal(hasInteractiveToolInFlight(), true);
|
||||
});
|
||||
|
||||
test("returns true when secure_env_collect is in-flight", () => {
|
||||
markToolStart("call-1", true, "secure_env_collect");
|
||||
assert.equal(hasInteractiveToolInFlight(), true);
|
||||
});
|
||||
|
||||
test("returns false after interactive tool completes", () => {
|
||||
markToolStart("call-1", true, "ask_user_questions");
|
||||
assert.equal(hasInteractiveToolInFlight(), true);
|
||||
markToolEnd("call-1");
|
||||
assert.equal(hasInteractiveToolInFlight(), false);
|
||||
});
|
||||
|
||||
test("returns true if one of multiple tools is interactive", () => {
|
||||
markToolStart("call-1", true, "bash");
|
||||
markToolStart("call-2", true, "edit");
|
||||
markToolStart("call-3", true, "ask_user_questions");
|
||||
markToolStart("call-4", true, "write");
|
||||
assert.equal(hasInteractiveToolInFlight(), true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("toolName tracking in markToolStart", () => {
|
||||
test("defaults toolName to 'unknown' when not provided", () => {
|
||||
markToolStart("call-1", true);
|
||||
// unknown tool should not be treated as interactive
|
||||
assert.equal(hasInteractiveToolInFlight(), false);
|
||||
assert.equal(getInFlightToolCount(), 1);
|
||||
});
|
||||
|
||||
test("no-ops when isActive is false", () => {
|
||||
markToolStart("call-1", false, "ask_user_questions");
|
||||
assert.equal(getInFlightToolCount(), 0);
|
||||
assert.equal(hasInteractiveToolInFlight(), false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("existing tracking behavior preserved with toolName", () => {
|
||||
test("getInFlightToolCount tracks correctly", () => {
|
||||
assert.equal(getInFlightToolCount(), 0);
|
||||
markToolStart("call-1", true, "bash");
|
||||
assert.equal(getInFlightToolCount(), 1);
|
||||
markToolStart("call-2", true, "ask_user_questions");
|
||||
assert.equal(getInFlightToolCount(), 2);
|
||||
markToolEnd("call-1");
|
||||
assert.equal(getInFlightToolCount(), 1);
|
||||
markToolEnd("call-2");
|
||||
assert.equal(getInFlightToolCount(), 0);
|
||||
});
|
||||
|
||||
test("getOldestInFlightToolStart returns correct timestamp", () => {
|
||||
assert.equal(getOldestInFlightToolStart(), undefined);
|
||||
const before = Date.now();
|
||||
markToolStart("call-1", true, "bash");
|
||||
const after = Date.now();
|
||||
const oldest = getOldestInFlightToolStart();
|
||||
assert.ok(oldest !== undefined);
|
||||
assert.ok(oldest! >= before && oldest! <= after);
|
||||
});
|
||||
|
||||
test("getOldestInFlightToolAgeMs returns 0 with no tools", () => {
|
||||
assert.equal(getOldestInFlightToolAgeMs(), 0);
|
||||
});
|
||||
|
||||
test("getOldestInFlightToolAgeMs returns positive value with tools", () => {
|
||||
markToolStart("call-1", true, "read");
|
||||
const age = getOldestInFlightToolAgeMs();
|
||||
assert.ok(age >= 0, `age should be non-negative, got ${age}`);
|
||||
});
|
||||
|
||||
test("clearInFlightTools resets all state", () => {
|
||||
markToolStart("call-1", true, "ask_user_questions");
|
||||
markToolStart("call-2", true, "bash");
|
||||
assert.equal(getInFlightToolCount(), 2);
|
||||
assert.equal(hasInteractiveToolInFlight(), true);
|
||||
clearInFlightTools();
|
||||
assert.equal(getInFlightToolCount(), 0);
|
||||
assert.equal(hasInteractiveToolInFlight(), false);
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue