fix: exempt interactive tools from idle watchdog stall detection (#2676)

The idle watchdog treated ask_user_questions and secure_env_collect as
stalled tools, killing sessions before users could respond.

Root cause: tool tracking stored only toolCallId → timestamp with no
tool name, so the watchdog couldn't distinguish user-interactive tools
from hung tools.

Changes:
- auto-tool-tracking: store toolName alongside timestamp, add
  INTERACTIVE_TOOLS set and hasInteractiveToolInFlight()
- auto.ts: forward optional toolName through markToolStart wrapper
- register-hooks: pass event.toolName to markToolStart
- auto-timers: skip stall detection when interactive tool is in-flight,
  record lastProgressKind: 'interactive-tool-waiting'
- New test: 13 cases covering interactive exemption, completion cleanup,
  backwards compat, and existing behavior preservation
This commit is contained in:
Vojtěch Šplíchal 2026-03-26 16:52:12 +01:00
parent 8d77c40638
commit f40418be8d
5 changed files with 163 additions and 9 deletions

View file

@ -16,6 +16,7 @@ import {
getInFlightToolCount,
getOldestInFlightToolStart,
clearInFlightTools,
hasInteractiveToolInFlight,
} from "./auto-tool-tracking.js";
import { detectWorkingTreeActivity } from "./auto-supervisor.js";
import { closeoutUnit, type CloseoutOptions } from "./auto-unit-closeout.js";
@ -149,6 +150,15 @@ export function startUnitSupervision(sctx: SupervisionContext): void {
// But only suppress recovery if the tool started recently.
let stalledToolDetected = false;
if (getInFlightToolCount() > 0) {
// User-interactive tools (ask_user_questions, secure_env_collect) block
// waiting for human input by design — never treat them as stalled (#2676).
if (hasInteractiveToolInFlight()) {
writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, {
lastProgressAt: Date.now(),
lastProgressKind: "interactive-tool-waiting",
});
return;
}
const oldestStart = getOldestInFlightToolStart()!;
const toolAgeMs = Date.now() - oldestStart;
if (toolAgeMs < idleTimeoutMs) {

View file

@ -4,15 +4,27 @@
* can distinguish "waiting for tool completion" from "truly idle".
*/
const inFlightTools = new Map<string, number>();
interface InFlightTool {
startedAt: number;
toolName: string;
}
const inFlightTools = new Map<string, InFlightTool>();
/**
* Tools that block waiting for human input by design.
* The idle watchdog must not treat these as stalled.
*/
const INTERACTIVE_TOOLS = new Set(["ask_user_questions", "secure_env_collect"]);
/**
* Mark a tool execution as in-flight.
* Records start time so the idle watchdog can detect tools hung longer than the idle timeout.
* Records start time and tool name so the idle watchdog can detect tools
* hung longer than the idle timeout while exempting interactive tools.
*/
export function markToolStart(toolCallId: string, isActive: boolean): void {
export function markToolStart(toolCallId: string, isActive: boolean, toolName?: string): void {
if (!isActive) return;
inFlightTools.set(toolCallId, Date.now());
inFlightTools.set(toolCallId, { startedAt: Date.now(), toolName: toolName ?? "unknown" });
}
/**
@ -29,7 +41,7 @@ export function getOldestInFlightToolAgeMs(): number {
if (inFlightTools.size === 0) return 0;
let oldestStart = Infinity;
for (const t of inFlightTools.values()) {
if (t < oldestStart) oldestStart = t;
if (t.startedAt < oldestStart) oldestStart = t.startedAt;
}
return Date.now() - oldestStart;
}
@ -48,11 +60,23 @@ export function getOldestInFlightToolStart(): number | undefined {
if (inFlightTools.size === 0) return undefined;
let oldest = Infinity;
for (const t of inFlightTools.values()) {
if (t < oldest) oldest = t;
if (t.startedAt < oldest) oldest = t.startedAt;
}
return oldest;
}
/**
* Returns true if any currently in-flight tool is a user-interactive tool
* (e.g. ask_user_questions, secure_env_collect) that blocks waiting for
* human input. These must be exempt from idle stall detection.
*/
export function hasInteractiveToolInFlight(): boolean {
for (const { toolName } of inFlightTools.values()) {
if (INTERACTIVE_TOOLS.has(toolName)) return true;
}
return false;
}
/**
* Clear all in-flight tool tracking state.
*/

View file

@ -73,6 +73,7 @@ import {
getOldestInFlightToolAgeMs as _getOldestInFlightToolAgeMs,
getInFlightToolCount,
getOldestInFlightToolStart,
hasInteractiveToolInFlight,
clearInFlightTools,
} from "./auto-tool-tracking.js";
import { closeoutUnit } from "./auto-unit-closeout.js";
@ -375,8 +376,8 @@ export function getAutoModeStartModel(): {
}
// Tool tracking — delegates to auto-tool-tracking.ts
export function markToolStart(toolCallId: string): void {
_markToolStart(toolCallId, s.active);
export function markToolStart(toolCallId: string, toolName?: string): void {
_markToolStart(toolCallId, s.active, toolName);
}
export function markToolEnd(toolCallId: string): void {

View file

@ -245,7 +245,7 @@ export function registerHooks(pi: ExtensionAPI): void {
pi.on("tool_execution_start", async (event) => {
if (!isAutoActive()) return;
markToolStart(event.toolCallId);
markToolStart(event.toolCallId, event.toolName);
});
pi.on("tool_execution_end", async (event) => {

View file

@ -0,0 +1,119 @@
/**
* Tests for #2676: idle watchdog must exempt user-interactive tools
* (ask_user_questions, secure_env_collect) from stall detection.
*/
import { describe, test, beforeEach } from "node:test";
import assert from "node:assert/strict";
import {
markToolStart,
markToolEnd,
hasInteractiveToolInFlight,
getInFlightToolCount,
getOldestInFlightToolStart,
getOldestInFlightToolAgeMs,
clearInFlightTools,
} from "../auto-tool-tracking.ts";
// These tests call the tracking module directly (bypassing the auto.ts
// wrapper which guards on s.active) so we always pass isActive=true.
beforeEach(() => {
clearInFlightTools();
});
describe("hasInteractiveToolInFlight", () => {
test("returns false when no tools are in-flight", () => {
assert.equal(hasInteractiveToolInFlight(), false);
});
test("returns false when only non-interactive tools are in-flight", () => {
markToolStart("call-1", true, "bash");
markToolStart("call-2", true, "read");
assert.equal(hasInteractiveToolInFlight(), false);
});
test("returns true when ask_user_questions is in-flight", () => {
markToolStart("call-1", true, "bash");
markToolStart("call-2", true, "ask_user_questions");
assert.equal(hasInteractiveToolInFlight(), true);
});
test("returns true when secure_env_collect is in-flight", () => {
markToolStart("call-1", true, "secure_env_collect");
assert.equal(hasInteractiveToolInFlight(), true);
});
test("returns false after interactive tool completes", () => {
markToolStart("call-1", true, "ask_user_questions");
assert.equal(hasInteractiveToolInFlight(), true);
markToolEnd("call-1");
assert.equal(hasInteractiveToolInFlight(), false);
});
test("returns true if one of multiple tools is interactive", () => {
markToolStart("call-1", true, "bash");
markToolStart("call-2", true, "edit");
markToolStart("call-3", true, "ask_user_questions");
markToolStart("call-4", true, "write");
assert.equal(hasInteractiveToolInFlight(), true);
});
});
describe("toolName tracking in markToolStart", () => {
test("defaults toolName to 'unknown' when not provided", () => {
markToolStart("call-1", true);
// unknown tool should not be treated as interactive
assert.equal(hasInteractiveToolInFlight(), false);
assert.equal(getInFlightToolCount(), 1);
});
test("no-ops when isActive is false", () => {
markToolStart("call-1", false, "ask_user_questions");
assert.equal(getInFlightToolCount(), 0);
assert.equal(hasInteractiveToolInFlight(), false);
});
});
describe("existing tracking behavior preserved with toolName", () => {
test("getInFlightToolCount tracks correctly", () => {
assert.equal(getInFlightToolCount(), 0);
markToolStart("call-1", true, "bash");
assert.equal(getInFlightToolCount(), 1);
markToolStart("call-2", true, "ask_user_questions");
assert.equal(getInFlightToolCount(), 2);
markToolEnd("call-1");
assert.equal(getInFlightToolCount(), 1);
markToolEnd("call-2");
assert.equal(getInFlightToolCount(), 0);
});
test("getOldestInFlightToolStart returns correct timestamp", () => {
assert.equal(getOldestInFlightToolStart(), undefined);
const before = Date.now();
markToolStart("call-1", true, "bash");
const after = Date.now();
const oldest = getOldestInFlightToolStart();
assert.ok(oldest !== undefined);
assert.ok(oldest! >= before && oldest! <= after);
});
test("getOldestInFlightToolAgeMs returns 0 with no tools", () => {
assert.equal(getOldestInFlightToolAgeMs(), 0);
});
test("getOldestInFlightToolAgeMs returns positive value with tools", () => {
markToolStart("call-1", true, "read");
const age = getOldestInFlightToolAgeMs();
assert.ok(age >= 0, `age should be non-negative, got ${age}`);
});
test("clearInFlightTools resets all state", () => {
markToolStart("call-1", true, "ask_user_questions");
markToolStart("call-2", true, "bash");
assert.equal(getInFlightToolCount(), 2);
assert.equal(hasInteractiveToolInFlight(), true);
clearInFlightTools();
assert.equal(getInFlightToolCount(), 0);
assert.equal(hasInteractiveToolInFlight(), false);
});
});