From f40418be8d1983f717e24cb3967ec711fc52ba66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vojt=C4=9Bch=20=C5=A0pl=C3=ADchal?= Date: Thu, 26 Mar 2026 16:52:12 +0100 Subject: [PATCH] fix: exempt interactive tools from idle watchdog stall detection (#2676) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The idle watchdog treated ask_user_questions and secure_env_collect as stalled tools, killing sessions before users could respond. Root cause: tool tracking stored only toolCallId → timestamp with no tool name, so the watchdog couldn't distinguish user-interactive tools from hung tools. Changes: - auto-tool-tracking: store toolName alongside timestamp, add INTERACTIVE_TOOLS set and hasInteractiveToolInFlight() - auto.ts: forward optional toolName through markToolStart wrapper - register-hooks: pass event.toolName to markToolStart - auto-timers: skip stall detection when interactive tool is in-flight, record lastProgressKind: 'interactive-tool-waiting' - New test: 13 cases covering interactive exemption, completion cleanup, backwards compat, and existing behavior preservation --- src/resources/extensions/gsd/auto-timers.ts | 10 ++ .../extensions/gsd/auto-tool-tracking.ts | 36 +++++- src/resources/extensions/gsd/auto.ts | 5 +- .../gsd/bootstrap/register-hooks.ts | 2 +- .../interactive-tool-idle-exemption.test.ts | 119 ++++++++++++++++++ 5 files changed, 163 insertions(+), 9 deletions(-) create mode 100644 src/resources/extensions/gsd/tests/interactive-tool-idle-exemption.test.ts diff --git a/src/resources/extensions/gsd/auto-timers.ts b/src/resources/extensions/gsd/auto-timers.ts index 1a7c4740b..afa3af98b 100644 --- a/src/resources/extensions/gsd/auto-timers.ts +++ b/src/resources/extensions/gsd/auto-timers.ts @@ -16,6 +16,7 @@ import { getInFlightToolCount, getOldestInFlightToolStart, clearInFlightTools, + hasInteractiveToolInFlight, } from "./auto-tool-tracking.js"; import { detectWorkingTreeActivity } from "./auto-supervisor.js"; import { closeoutUnit, type CloseoutOptions } from "./auto-unit-closeout.js"; @@ -149,6 +150,15 @@ export function startUnitSupervision(sctx: SupervisionContext): void { // But only suppress recovery if the tool started recently. let stalledToolDetected = false; if (getInFlightToolCount() > 0) { + // User-interactive tools (ask_user_questions, secure_env_collect) block + // waiting for human input by design — never treat them as stalled (#2676). + if (hasInteractiveToolInFlight()) { + writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, { + lastProgressAt: Date.now(), + lastProgressKind: "interactive-tool-waiting", + }); + return; + } const oldestStart = getOldestInFlightToolStart()!; const toolAgeMs = Date.now() - oldestStart; if (toolAgeMs < idleTimeoutMs) { diff --git a/src/resources/extensions/gsd/auto-tool-tracking.ts b/src/resources/extensions/gsd/auto-tool-tracking.ts index eea96c602..65ef2ff01 100644 --- a/src/resources/extensions/gsd/auto-tool-tracking.ts +++ b/src/resources/extensions/gsd/auto-tool-tracking.ts @@ -4,15 +4,27 @@ * can distinguish "waiting for tool completion" from "truly idle". */ -const inFlightTools = new Map(); +interface InFlightTool { + startedAt: number; + toolName: string; +} + +const inFlightTools = new Map(); + +/** + * Tools that block waiting for human input by design. + * The idle watchdog must not treat these as stalled. + */ +const INTERACTIVE_TOOLS = new Set(["ask_user_questions", "secure_env_collect"]); /** * Mark a tool execution as in-flight. - * Records start time so the idle watchdog can detect tools hung longer than the idle timeout. + * Records start time and tool name so the idle watchdog can detect tools + * hung longer than the idle timeout while exempting interactive tools. */ -export function markToolStart(toolCallId: string, isActive: boolean): void { +export function markToolStart(toolCallId: string, isActive: boolean, toolName?: string): void { if (!isActive) return; - inFlightTools.set(toolCallId, Date.now()); + inFlightTools.set(toolCallId, { startedAt: Date.now(), toolName: toolName ?? "unknown" }); } /** @@ -29,7 +41,7 @@ export function getOldestInFlightToolAgeMs(): number { if (inFlightTools.size === 0) return 0; let oldestStart = Infinity; for (const t of inFlightTools.values()) { - if (t < oldestStart) oldestStart = t; + if (t.startedAt < oldestStart) oldestStart = t.startedAt; } return Date.now() - oldestStart; } @@ -48,11 +60,23 @@ export function getOldestInFlightToolStart(): number | undefined { if (inFlightTools.size === 0) return undefined; let oldest = Infinity; for (const t of inFlightTools.values()) { - if (t < oldest) oldest = t; + if (t.startedAt < oldest) oldest = t.startedAt; } return oldest; } +/** + * Returns true if any currently in-flight tool is a user-interactive tool + * (e.g. ask_user_questions, secure_env_collect) that blocks waiting for + * human input. These must be exempt from idle stall detection. + */ +export function hasInteractiveToolInFlight(): boolean { + for (const { toolName } of inFlightTools.values()) { + if (INTERACTIVE_TOOLS.has(toolName)) return true; + } + return false; +} + /** * Clear all in-flight tool tracking state. */ diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index 1a9eff6d7..3c4a50d4e 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -73,6 +73,7 @@ import { getOldestInFlightToolAgeMs as _getOldestInFlightToolAgeMs, getInFlightToolCount, getOldestInFlightToolStart, + hasInteractiveToolInFlight, clearInFlightTools, } from "./auto-tool-tracking.js"; import { closeoutUnit } from "./auto-unit-closeout.js"; @@ -375,8 +376,8 @@ export function getAutoModeStartModel(): { } // Tool tracking — delegates to auto-tool-tracking.ts -export function markToolStart(toolCallId: string): void { - _markToolStart(toolCallId, s.active); +export function markToolStart(toolCallId: string, toolName?: string): void { + _markToolStart(toolCallId, s.active, toolName); } export function markToolEnd(toolCallId: string): void { diff --git a/src/resources/extensions/gsd/bootstrap/register-hooks.ts b/src/resources/extensions/gsd/bootstrap/register-hooks.ts index 07d385584..4fd7a1292 100644 --- a/src/resources/extensions/gsd/bootstrap/register-hooks.ts +++ b/src/resources/extensions/gsd/bootstrap/register-hooks.ts @@ -245,7 +245,7 @@ export function registerHooks(pi: ExtensionAPI): void { pi.on("tool_execution_start", async (event) => { if (!isAutoActive()) return; - markToolStart(event.toolCallId); + markToolStart(event.toolCallId, event.toolName); }); pi.on("tool_execution_end", async (event) => { diff --git a/src/resources/extensions/gsd/tests/interactive-tool-idle-exemption.test.ts b/src/resources/extensions/gsd/tests/interactive-tool-idle-exemption.test.ts new file mode 100644 index 000000000..6f3da71dd --- /dev/null +++ b/src/resources/extensions/gsd/tests/interactive-tool-idle-exemption.test.ts @@ -0,0 +1,119 @@ +/** + * Tests for #2676: idle watchdog must exempt user-interactive tools + * (ask_user_questions, secure_env_collect) from stall detection. + */ +import { describe, test, beforeEach } from "node:test"; +import assert from "node:assert/strict"; +import { + markToolStart, + markToolEnd, + hasInteractiveToolInFlight, + getInFlightToolCount, + getOldestInFlightToolStart, + getOldestInFlightToolAgeMs, + clearInFlightTools, +} from "../auto-tool-tracking.ts"; + +// These tests call the tracking module directly (bypassing the auto.ts +// wrapper which guards on s.active) so we always pass isActive=true. + +beforeEach(() => { + clearInFlightTools(); +}); + +describe("hasInteractiveToolInFlight", () => { + test("returns false when no tools are in-flight", () => { + assert.equal(hasInteractiveToolInFlight(), false); + }); + + test("returns false when only non-interactive tools are in-flight", () => { + markToolStart("call-1", true, "bash"); + markToolStart("call-2", true, "read"); + assert.equal(hasInteractiveToolInFlight(), false); + }); + + test("returns true when ask_user_questions is in-flight", () => { + markToolStart("call-1", true, "bash"); + markToolStart("call-2", true, "ask_user_questions"); + assert.equal(hasInteractiveToolInFlight(), true); + }); + + test("returns true when secure_env_collect is in-flight", () => { + markToolStart("call-1", true, "secure_env_collect"); + assert.equal(hasInteractiveToolInFlight(), true); + }); + + test("returns false after interactive tool completes", () => { + markToolStart("call-1", true, "ask_user_questions"); + assert.equal(hasInteractiveToolInFlight(), true); + markToolEnd("call-1"); + assert.equal(hasInteractiveToolInFlight(), false); + }); + + test("returns true if one of multiple tools is interactive", () => { + markToolStart("call-1", true, "bash"); + markToolStart("call-2", true, "edit"); + markToolStart("call-3", true, "ask_user_questions"); + markToolStart("call-4", true, "write"); + assert.equal(hasInteractiveToolInFlight(), true); + }); +}); + +describe("toolName tracking in markToolStart", () => { + test("defaults toolName to 'unknown' when not provided", () => { + markToolStart("call-1", true); + // unknown tool should not be treated as interactive + assert.equal(hasInteractiveToolInFlight(), false); + assert.equal(getInFlightToolCount(), 1); + }); + + test("no-ops when isActive is false", () => { + markToolStart("call-1", false, "ask_user_questions"); + assert.equal(getInFlightToolCount(), 0); + assert.equal(hasInteractiveToolInFlight(), false); + }); +}); + +describe("existing tracking behavior preserved with toolName", () => { + test("getInFlightToolCount tracks correctly", () => { + assert.equal(getInFlightToolCount(), 0); + markToolStart("call-1", true, "bash"); + assert.equal(getInFlightToolCount(), 1); + markToolStart("call-2", true, "ask_user_questions"); + assert.equal(getInFlightToolCount(), 2); + markToolEnd("call-1"); + assert.equal(getInFlightToolCount(), 1); + markToolEnd("call-2"); + assert.equal(getInFlightToolCount(), 0); + }); + + test("getOldestInFlightToolStart returns correct timestamp", () => { + assert.equal(getOldestInFlightToolStart(), undefined); + const before = Date.now(); + markToolStart("call-1", true, "bash"); + const after = Date.now(); + const oldest = getOldestInFlightToolStart(); + assert.ok(oldest !== undefined); + assert.ok(oldest! >= before && oldest! <= after); + }); + + test("getOldestInFlightToolAgeMs returns 0 with no tools", () => { + assert.equal(getOldestInFlightToolAgeMs(), 0); + }); + + test("getOldestInFlightToolAgeMs returns positive value with tools", () => { + markToolStart("call-1", true, "read"); + const age = getOldestInFlightToolAgeMs(); + assert.ok(age >= 0, `age should be non-negative, got ${age}`); + }); + + test("clearInFlightTools resets all state", () => { + markToolStart("call-1", true, "ask_user_questions"); + markToolStart("call-2", true, "bash"); + assert.equal(getInFlightToolCount(), 2); + assert.equal(hasInteractiveToolInFlight(), true); + clearInFlightTools(); + assert.equal(getInFlightToolCount(), 0); + assert.equal(hasInteractiveToolInFlight(), false); + }); +});