fix: add configurable timeout to await_job to prevent indefinite session blocking (#1769)

The await_job tool previously blocked the entire agent session with no
escape hatch. This adds a configurable timeout parameter (default 120s)
that races against the job promises. On timeout, jobs continue running
in the background and the agent regains control.

Closes #1690

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
TÂCHES 2026-03-21 09:32:52 -06:00 committed by GitHub
parent 27ef4fcc40
commit 21b2f8223d
3 changed files with 152 additions and 3 deletions

View file

@ -67,6 +67,8 @@ export function createAsyncBashTool(
promptGuidelines: [
"Use async_bash for commands that take more than a few seconds (builds, tests, installs, large git operations).",
"After starting async jobs, continue with other work and use await_job when you need the results.",
"await_job has a configurable timeout (default 120s) to prevent indefinite blocking — if it times out, jobs keep running and you can check again later.",
"For long-running processes (SSH, deploys, training) that may take minutes+, prefer async_bash with periodic await_job polling over a single long await.",
"Use cancel_job to stop a running background job.",
"Check /jobs to see all running and recent background jobs.",
],

View file

@ -0,0 +1,120 @@
/**
* await-tool.test.ts Tests for await_job timeout behavior.
*/
import test from "node:test";
import assert from "node:assert/strict";
import { AsyncJobManager } from "./job-manager.ts";
import { createAwaitTool } from "./await-tool.ts";
function getTextFromResult(result: { content: Array<{ type: string; text: string }> }): string {
return result.content.map((c) => c.text).join("\n");
}
const noopSignal = new AbortController().signal;
test("await_job returns immediately when no running jobs exist", async () => {
const manager = new AsyncJobManager();
const tool = createAwaitTool(() => manager);
const result = await tool.execute("tc1", {}, noopSignal, () => {}, undefined as never);
const text = getTextFromResult(result);
assert.match(text, /No running background jobs/);
});
test("await_job returns immediately when all watched jobs are already completed", async () => {
const manager = new AsyncJobManager();
const tool = createAwaitTool(() => manager);
// Register a job that completes instantly
const jobId = manager.register("bash", "fast-job", async () => "done");
// Wait for the job to settle
const job = manager.getJob(jobId)!;
await job.promise;
const result = await tool.execute("tc2", { jobs: [jobId] }, noopSignal, () => {}, undefined as never);
const text = getTextFromResult(result);
assert.match(text, /fast-job/);
assert.match(text, /completed/);
});
test("await_job returns on timeout when jobs are still running", async () => {
const manager = new AsyncJobManager();
const tool = createAwaitTool(() => manager);
// Register a job that takes a long time
const jobId = manager.register("bash", "slow-job", async (_signal) => {
return new Promise<string>((resolve) => {
const timer = setTimeout(() => resolve("finally done"), 60_000);
if (typeof timer === "object" && "unref" in timer) timer.unref();
});
});
const start = Date.now();
const result = await tool.execute("tc3", { jobs: [jobId], timeout: 1 }, noopSignal, () => {}, undefined as never);
const elapsed = Date.now() - start;
const text = getTextFromResult(result);
// Should have timed out within ~1-2 seconds, not 60
assert.ok(elapsed < 5_000, `Expected timeout in ~1s but took ${elapsed}ms`);
assert.match(text, /Timed out/);
assert.match(text, /Still running/);
assert.match(text, /slow-job/);
// Cleanup
manager.cancel(jobId);
manager.shutdown();
});
test("await_job completes before timeout when job finishes quickly", async () => {
const manager = new AsyncJobManager();
const tool = createAwaitTool(() => manager);
// Register a job that completes in 100ms
const jobId = manager.register("bash", "quick-job", async () => {
return new Promise<string>((resolve) => setTimeout(() => resolve("quick result"), 100));
});
const start = Date.now();
const result = await tool.execute("tc4", { jobs: [jobId], timeout: 30 }, noopSignal, () => {}, undefined as never);
const elapsed = Date.now() - start;
const text = getTextFromResult(result);
// Should complete in ~100ms, well before the 30s timeout
assert.ok(elapsed < 5_000, `Expected quick completion but took ${elapsed}ms`);
assert.ok(!text.includes("Timed out"), "Should not have timed out");
assert.match(text, /quick-job/);
assert.match(text, /completed/);
manager.shutdown();
});
test("await_job uses default timeout of 120s when not specified", async () => {
const manager = new AsyncJobManager();
const tool = createAwaitTool(() => manager);
// Register a job that completes immediately
const jobId = manager.register("bash", "instant-job", async () => "instant");
const job = manager.getJob(jobId)!;
await job.promise;
// Call without timeout param — should work fine for already-done jobs
const result = await tool.execute("tc5", { jobs: [jobId] }, noopSignal, () => {}, undefined as never);
const text = getTextFromResult(result);
assert.match(text, /instant-job/);
assert.match(text, /completed/);
manager.shutdown();
});
test("await_job returns not-found message for invalid job IDs", async () => {
const manager = new AsyncJobManager();
const tool = createAwaitTool(() => manager);
const result = await tool.execute("tc6", { jobs: ["bg_nonexistent"] }, noopSignal, () => {}, undefined as never);
const text = getTextFromResult(result);
assert.match(text, /No jobs found/);
assert.match(text, /bg_nonexistent/);
manager.shutdown();
});

View file

@ -9,12 +9,21 @@ import type { ToolDefinition } from "@gsd/pi-coding-agent";
import { Type } from "@sinclair/typebox";
import type { AsyncJobManager, Job } from "./job-manager.js";
const DEFAULT_TIMEOUT_SECONDS = 120;
const schema = Type.Object({
jobs: Type.Optional(
Type.Array(Type.String(), {
description: "Job IDs to wait for. Omit to wait for any running job.",
}),
),
timeout: Type.Optional(
Type.Number({
description:
"Maximum seconds to wait before returning control. Defaults to 120. " +
"Jobs continue running in the background after timeout.",
}),
),
});
export function createAwaitTool(getManager: () => AsyncJobManager): ToolDefinition<typeof schema> {
@ -26,7 +35,8 @@ export function createAwaitTool(getManager: () => AsyncJobManager): ToolDefiniti
parameters: schema,
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
const manager = getManager();
const { jobs: jobIds } = params;
const { jobs: jobIds, timeout } = params;
const timeoutMs = ((timeout ?? DEFAULT_TIMEOUT_SECONDS) * 1000);
let watched: Job[];
if (jobIds && jobIds.length > 0) {
@ -63,8 +73,20 @@ export function createAwaitTool(getManager: () => AsyncJobManager): ToolDefiniti
return { content: [{ type: "text", text: result }], details: undefined };
}
// Wait for at least one to complete
await Promise.race(running.map((j) => j.promise));
// Wait for at least one to complete, or timeout
const TIMEOUT_SENTINEL = Symbol("timeout");
const timeoutPromise = new Promise<typeof TIMEOUT_SENTINEL>((resolve) => {
const timer = setTimeout(() => resolve(TIMEOUT_SENTINEL), timeoutMs);
// Allow the process to exit even if the timer is pending
if (typeof timer === "object" && "unref" in timer) timer.unref();
});
const raceResult = await Promise.race([
Promise.race(running.map((j) => j.promise)).then(() => "completed" as const),
timeoutPromise,
]);
const timedOut = raceResult === TIMEOUT_SENTINEL;
// Collect all completed results (more may have finished while waiting)
const completed = watched.filter((j) => j.status !== "running");
@ -74,6 +96,11 @@ export function createAwaitTool(getManager: () => AsyncJobManager): ToolDefiniti
if (stillRunning.length > 0) {
result += `\n\n**Still running:** ${stillRunning.map((j) => `${j.id} (${j.label})`).join(", ")}`;
}
if (timedOut) {
result += `\n\n⏱ **Timed out** after ${timeout ?? DEFAULT_TIMEOUT_SECONDS}s waiting for jobs to finish. ` +
`Jobs are still running in the background. ` +
`Use \`await_job\` again later or \`async_bash\` + \`await_job\` for shorter polling intervals.`;
}
return { content: [{ type: "text", text: result }], details: undefined };
},