fix: add configurable timeout to await_job to prevent indefinite session blocking (#1769)
The await_job tool previously blocked the entire agent session with no escape hatch. This adds a configurable timeout parameter (default 120s) that races against the job promises. On timeout, jobs continue running in the background and the agent regains control. Closes #1690 Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
27ef4fcc40
commit
21b2f8223d
3 changed files with 152 additions and 3 deletions
|
|
@ -67,6 +67,8 @@ export function createAsyncBashTool(
|
|||
promptGuidelines: [
|
||||
"Use async_bash for commands that take more than a few seconds (builds, tests, installs, large git operations).",
|
||||
"After starting async jobs, continue with other work and use await_job when you need the results.",
|
||||
"await_job has a configurable timeout (default 120s) to prevent indefinite blocking — if it times out, jobs keep running and you can check again later.",
|
||||
"For long-running processes (SSH, deploys, training) that may take minutes+, prefer async_bash with periodic await_job polling over a single long await.",
|
||||
"Use cancel_job to stop a running background job.",
|
||||
"Check /jobs to see all running and recent background jobs.",
|
||||
],
|
||||
|
|
|
|||
120
src/resources/extensions/async-jobs/await-tool.test.ts
Normal file
120
src/resources/extensions/async-jobs/await-tool.test.ts
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
/**
|
||||
* await-tool.test.ts — Tests for await_job timeout behavior.
|
||||
*/
|
||||
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { AsyncJobManager } from "./job-manager.ts";
|
||||
import { createAwaitTool } from "./await-tool.ts";
|
||||
|
||||
function getTextFromResult(result: { content: Array<{ type: string; text: string }> }): string {
|
||||
return result.content.map((c) => c.text).join("\n");
|
||||
}
|
||||
|
||||
const noopSignal = new AbortController().signal;
|
||||
|
||||
test("await_job returns immediately when no running jobs exist", async () => {
|
||||
const manager = new AsyncJobManager();
|
||||
const tool = createAwaitTool(() => manager);
|
||||
|
||||
const result = await tool.execute("tc1", {}, noopSignal, () => {}, undefined as never);
|
||||
const text = getTextFromResult(result);
|
||||
assert.match(text, /No running background jobs/);
|
||||
});
|
||||
|
||||
test("await_job returns immediately when all watched jobs are already completed", async () => {
|
||||
const manager = new AsyncJobManager();
|
||||
const tool = createAwaitTool(() => manager);
|
||||
|
||||
// Register a job that completes instantly
|
||||
const jobId = manager.register("bash", "fast-job", async () => "done");
|
||||
// Wait for the job to settle
|
||||
const job = manager.getJob(jobId)!;
|
||||
await job.promise;
|
||||
|
||||
const result = await tool.execute("tc2", { jobs: [jobId] }, noopSignal, () => {}, undefined as never);
|
||||
const text = getTextFromResult(result);
|
||||
assert.match(text, /fast-job/);
|
||||
assert.match(text, /completed/);
|
||||
});
|
||||
|
||||
test("await_job returns on timeout when jobs are still running", async () => {
|
||||
const manager = new AsyncJobManager();
|
||||
const tool = createAwaitTool(() => manager);
|
||||
|
||||
// Register a job that takes a long time
|
||||
const jobId = manager.register("bash", "slow-job", async (_signal) => {
|
||||
return new Promise<string>((resolve) => {
|
||||
const timer = setTimeout(() => resolve("finally done"), 60_000);
|
||||
if (typeof timer === "object" && "unref" in timer) timer.unref();
|
||||
});
|
||||
});
|
||||
|
||||
const start = Date.now();
|
||||
const result = await tool.execute("tc3", { jobs: [jobId], timeout: 1 }, noopSignal, () => {}, undefined as never);
|
||||
const elapsed = Date.now() - start;
|
||||
const text = getTextFromResult(result);
|
||||
|
||||
// Should have timed out within ~1-2 seconds, not 60
|
||||
assert.ok(elapsed < 5_000, `Expected timeout in ~1s but took ${elapsed}ms`);
|
||||
assert.match(text, /Timed out/);
|
||||
assert.match(text, /Still running/);
|
||||
assert.match(text, /slow-job/);
|
||||
|
||||
// Cleanup
|
||||
manager.cancel(jobId);
|
||||
manager.shutdown();
|
||||
});
|
||||
|
||||
test("await_job completes before timeout when job finishes quickly", async () => {
|
||||
const manager = new AsyncJobManager();
|
||||
const tool = createAwaitTool(() => manager);
|
||||
|
||||
// Register a job that completes in 100ms
|
||||
const jobId = manager.register("bash", "quick-job", async () => {
|
||||
return new Promise<string>((resolve) => setTimeout(() => resolve("quick result"), 100));
|
||||
});
|
||||
|
||||
const start = Date.now();
|
||||
const result = await tool.execute("tc4", { jobs: [jobId], timeout: 30 }, noopSignal, () => {}, undefined as never);
|
||||
const elapsed = Date.now() - start;
|
||||
const text = getTextFromResult(result);
|
||||
|
||||
// Should complete in ~100ms, well before the 30s timeout
|
||||
assert.ok(elapsed < 5_000, `Expected quick completion but took ${elapsed}ms`);
|
||||
assert.ok(!text.includes("Timed out"), "Should not have timed out");
|
||||
assert.match(text, /quick-job/);
|
||||
assert.match(text, /completed/);
|
||||
|
||||
manager.shutdown();
|
||||
});
|
||||
|
||||
test("await_job uses default timeout of 120s when not specified", async () => {
|
||||
const manager = new AsyncJobManager();
|
||||
const tool = createAwaitTool(() => manager);
|
||||
|
||||
// Register a job that completes immediately
|
||||
const jobId = manager.register("bash", "instant-job", async () => "instant");
|
||||
const job = manager.getJob(jobId)!;
|
||||
await job.promise;
|
||||
|
||||
// Call without timeout param — should work fine for already-done jobs
|
||||
const result = await tool.execute("tc5", { jobs: [jobId] }, noopSignal, () => {}, undefined as never);
|
||||
const text = getTextFromResult(result);
|
||||
assert.match(text, /instant-job/);
|
||||
assert.match(text, /completed/);
|
||||
|
||||
manager.shutdown();
|
||||
});
|
||||
|
||||
test("await_job returns not-found message for invalid job IDs", async () => {
|
||||
const manager = new AsyncJobManager();
|
||||
const tool = createAwaitTool(() => manager);
|
||||
|
||||
const result = await tool.execute("tc6", { jobs: ["bg_nonexistent"] }, noopSignal, () => {}, undefined as never);
|
||||
const text = getTextFromResult(result);
|
||||
assert.match(text, /No jobs found/);
|
||||
assert.match(text, /bg_nonexistent/);
|
||||
|
||||
manager.shutdown();
|
||||
});
|
||||
|
|
@ -9,12 +9,21 @@ import type { ToolDefinition } from "@gsd/pi-coding-agent";
|
|||
import { Type } from "@sinclair/typebox";
|
||||
import type { AsyncJobManager, Job } from "./job-manager.js";
|
||||
|
||||
const DEFAULT_TIMEOUT_SECONDS = 120;
|
||||
|
||||
const schema = Type.Object({
|
||||
jobs: Type.Optional(
|
||||
Type.Array(Type.String(), {
|
||||
description: "Job IDs to wait for. Omit to wait for any running job.",
|
||||
}),
|
||||
),
|
||||
timeout: Type.Optional(
|
||||
Type.Number({
|
||||
description:
|
||||
"Maximum seconds to wait before returning control. Defaults to 120. " +
|
||||
"Jobs continue running in the background after timeout.",
|
||||
}),
|
||||
),
|
||||
});
|
||||
|
||||
export function createAwaitTool(getManager: () => AsyncJobManager): ToolDefinition<typeof schema> {
|
||||
|
|
@ -26,7 +35,8 @@ export function createAwaitTool(getManager: () => AsyncJobManager): ToolDefiniti
|
|||
parameters: schema,
|
||||
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
|
||||
const manager = getManager();
|
||||
const { jobs: jobIds } = params;
|
||||
const { jobs: jobIds, timeout } = params;
|
||||
const timeoutMs = ((timeout ?? DEFAULT_TIMEOUT_SECONDS) * 1000);
|
||||
|
||||
let watched: Job[];
|
||||
if (jobIds && jobIds.length > 0) {
|
||||
|
|
@ -63,8 +73,20 @@ export function createAwaitTool(getManager: () => AsyncJobManager): ToolDefiniti
|
|||
return { content: [{ type: "text", text: result }], details: undefined };
|
||||
}
|
||||
|
||||
// Wait for at least one to complete
|
||||
await Promise.race(running.map((j) => j.promise));
|
||||
// Wait for at least one to complete, or timeout
|
||||
const TIMEOUT_SENTINEL = Symbol("timeout");
|
||||
const timeoutPromise = new Promise<typeof TIMEOUT_SENTINEL>((resolve) => {
|
||||
const timer = setTimeout(() => resolve(TIMEOUT_SENTINEL), timeoutMs);
|
||||
// Allow the process to exit even if the timer is pending
|
||||
if (typeof timer === "object" && "unref" in timer) timer.unref();
|
||||
});
|
||||
|
||||
const raceResult = await Promise.race([
|
||||
Promise.race(running.map((j) => j.promise)).then(() => "completed" as const),
|
||||
timeoutPromise,
|
||||
]);
|
||||
|
||||
const timedOut = raceResult === TIMEOUT_SENTINEL;
|
||||
|
||||
// Collect all completed results (more may have finished while waiting)
|
||||
const completed = watched.filter((j) => j.status !== "running");
|
||||
|
|
@ -74,6 +96,11 @@ export function createAwaitTool(getManager: () => AsyncJobManager): ToolDefiniti
|
|||
if (stillRunning.length > 0) {
|
||||
result += `\n\n**Still running:** ${stillRunning.map((j) => `${j.id} (${j.label})`).join(", ")}`;
|
||||
}
|
||||
if (timedOut) {
|
||||
result += `\n\n⏱ **Timed out** after ${timeout ?? DEFAULT_TIMEOUT_SECONDS}s waiting for jobs to finish. ` +
|
||||
`Jobs are still running in the background. ` +
|
||||
`Use \`await_job\` again later or \`async_bash\` + \`await_job\` for shorter polling intervals.`;
|
||||
}
|
||||
|
||||
return { content: [{ type: "text", text: result }], details: undefined };
|
||||
},
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue