diff --git a/packages/pi-coding-agent/src/core/tools/bash-background.test.ts b/packages/pi-coding-agent/src/core/tools/bash-background.test.ts new file mode 100644 index 000000000..316f752de --- /dev/null +++ b/packages/pi-coding-agent/src/core/tools/bash-background.test.ts @@ -0,0 +1,91 @@ +/** + * bash-background.test.ts — Tests for rewriteBackgroundCommand + * + * Regression for #733: `cmd &` causes the bash tool to hang indefinitely + * because the background process inherits the piped stdout/stderr and keeps + * them open. rewriteBackgroundCommand injects >/dev/null 2>&1 before & when + * the command does not already redirect stdout. + */ + +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import { rewriteBackgroundCommand } from "./bash.js"; + +describe("rewriteBackgroundCommand", () => { + describe("no-op cases (no & operator)", () => { + it("passes through a plain command unchanged", () => { + const r = rewriteBackgroundCommand("python -m http.server 8080"); + assert.equal(r.rewritten, false); + assert.equal(r.command, "python -m http.server 8080"); + }); + + it("passes through a command with && (logical AND)", () => { + const r = rewriteBackgroundCommand("npm install && npm start"); + assert.equal(r.rewritten, false); + }); + + it("passes through a command with & inside a string", () => { + const r = rewriteBackgroundCommand("echo 'foo & bar'"); + assert.equal(r.rewritten, false); + }); + }); + + describe("rewrite cases (& backgrounding)", () => { + it("rewrites bare background command", () => { + const r = rewriteBackgroundCommand("python -m http.server 8080 &"); + assert.equal(r.rewritten, true); + assert.ok(r.command.includes(">/dev/null 2>&1"), "injects stdout redirect"); + assert.ok(r.command.includes("&"), "preserves background operator"); + }); + + it("rewrites background command with trailing whitespace", () => { + const r = rewriteBackgroundCommand("python -m http.server 8080 & "); + assert.equal(r.rewritten, true); + assert.ok(r.command.includes(">/dev/null 2>&1")); + }); + + it("rewrites background command with & disown", () => { + const r = rewriteBackgroundCommand("node server.js & disown"); + assert.equal(r.rewritten, true); + assert.ok(r.command.includes(">/dev/null 2>&1")); + }); + + it("does NOT double-inject when stdout already redirected (>)", () => { + const r = rewriteBackgroundCommand("python -m http.server 8080 > server.log &"); + assert.equal(r.rewritten, false, "already has > redirect"); + }); + + it("does NOT inject when already redirected to /dev/null", () => { + const r = rewriteBackgroundCommand("python -m http.server 8080 >/dev/null 2>&1 &"); + assert.equal(r.rewritten, false, "already fully redirected"); + }); + + it("does NOT inject when command uses a pipe", () => { + const r = rewriteBackgroundCommand("python -m http.server 8080 | tee server.log &"); + assert.equal(r.rewritten, false, "stdout piped elsewhere"); + }); + }); + + describe("compound commands", () => { + it("rewrites only the backgrounded segment in a compound command", () => { + const r = rewriteBackgroundCommand("echo starting; python -m http.server 8080 &"); + assert.equal(r.rewritten, true); + assert.ok(r.command.includes(">/dev/null 2>&1 &")); + assert.ok(r.command.includes("echo starting"), "non-background part preserved"); + }); + + it("handles multiple backgrounded commands", () => { + const r = rewriteBackgroundCommand("node server.js &\npython worker.py &"); + assert.equal(r.rewritten, true); + const occurrences = (r.command.match(/\/dev\/null/g) ?? []).length; + assert.ok(occurrences >= 2, "both background commands rewritten"); + }); + }); + + describe("nohup / already-safe patterns pass through", () => { + it("nohup ... & passes through unchanged (already redirects)", () => { + const r = rewriteBackgroundCommand("nohup python -m http.server 8080 > /dev/null 2>&1 &"); + assert.equal(r.rewritten, false); + }); + }); +}); diff --git a/packages/pi-coding-agent/src/core/tools/bash.ts b/packages/pi-coding-agent/src/core/tools/bash.ts index 6d84199c3..4e1d65257 100644 --- a/packages/pi-coding-agent/src/core/tools/bash.ts +++ b/packages/pi-coding-agent/src/core/tools/bash.ts @@ -43,6 +43,71 @@ function getTempFilePath(): string { return join(tmpdir(), `pi-bash-${id}.log`); } +/** + * Detect whether a command fragment ends with an unquoted & (background operator). + * Returns true for patterns like: `cmd &`, `cmd arg &`, `cmd & disown`, `(cmd) &`. + * Returns false when & appears inside a string literal or as &&. + */ +function endsWithBackgroundOperator(fragment: string): boolean { + // Remove content inside single-quoted strings to avoid false positives + const stripped = fragment.replace(/'[^']*'/g, "''"); + // Match trailing & not preceded by another & (i.e., not &&) + return /(?, >>, &>, |, /dev/null redirects. + */ +function hasOutputRedirect(segment: string): boolean { + // Remove single-quoted strings to avoid matching inside them + const stripped = segment.replace(/'[^']*'/g, "''"); + // Match >, >> not preceded by 2 (stderr-only) — we only care about stdout + // Also match &> (combined), >&, or a pipe | which routes stdout elsewhere + return /(?>?|&>|>&|\|)/.test(stripped); +} + +/** + * Rewrite a command that uses & for backgrounding so the background process + * does not inherit the bash tool's stdout/stderr pipes. + * + * Without this, `python -m http.server 8080 &` causes the bash tool to hang + * indefinitely because Node.js keeps the pipe open until every process that + * inherited it exits — including the long-running server. + * + * The rewrite adds `>/dev/null 2>&1` before each & where stdout is not already + * redirected, ensuring the background process detaches from the pipes while + * still producing a human-readable notice in the tool output. + * + * Returns { command: string; rewritten: boolean }. + */ +export function rewriteBackgroundCommand(command: string): { command: string; rewritten: boolean } { + // Quick pre-check: if there's no & at all, skip the more expensive processing + if (!command.includes("&")) return { command, rewritten: false }; + + // Split on ; and newlines to handle compound commands. + // We rewrite each segment independently. + // Note: this is intentionally simple and covers the common LLM patterns. + // It does not attempt to parse complex nested subshells. + const segments = command.split(/(?<=[;\n])/); + let anyRewritten = false; + + const rewrittenSegments = segments.map((segment) => { + if (!endsWithBackgroundOperator(segment)) return segment; + if (hasOutputRedirect(segment)) return segment; + + anyRewritten = true; + // Insert >/dev/null 2>&1 before the trailing & (and optional disown/comment) + return segment.replace( + /(?/dev/null 2>&1 $1", + ); + }); + + if (!anyRewritten) return { command, rewritten: false }; + return { command: rewrittenSegments.join(""), rewritten: true }; +} + const bashSchema = Type.Object({ command: Type.String({ description: "Bash command to execute" }), timeout: Type.Optional(Type.Number({ description: "Timeout in seconds (optional, no default timeout)" })), @@ -239,8 +304,25 @@ export function createBashTool(cwd: string, options?: BashToolOptions): AgentToo } } + // Rewrite background commands (&) to redirect output away from the pipes. + // Without this, `cmd &` causes the tool to hang because the background + // process inherits the piped stdout/stderr and keeps them open indefinitely. + const bgResult = rewriteBackgroundCommand(command); + const effectiveCommand = bgResult.command; + if (bgResult.rewritten) { + // Surface a brief advisory so the LLM knows what happened. + // The rewrite is transparent for the common case; explicit detachment + // (nohup, start_new_session) is preferred for robustness. + onUpdate?.({ + content: [{ + type: "text" as const, + text: "Note: Background command output redirected to /dev/null to prevent pipe hang. Use nohup or setsid for reliable detachment.", + }], + details: undefined, + }); + } // Apply command prefix if configured (e.g., "shopt -s expand_aliases" for alias support) - const resolvedCommand = sanitizeCommand(commandPrefix ? `${commandPrefix}\n${command}` : command); + const resolvedCommand = sanitizeCommand(commandPrefix ? `${commandPrefix}\n${effectiveCommand}` : effectiveCommand); const spawnContext = resolveSpawnContext(resolvedCommand, cwd, spawnHook); return new Promise((resolve, reject) => { diff --git a/packages/pi-coding-agent/src/core/tools/index.ts b/packages/pi-coding-agent/src/core/tools/index.ts index c95fe4b78..c321b3211 100644 --- a/packages/pi-coding-agent/src/core/tools/index.ts +++ b/packages/pi-coding-agent/src/core/tools/index.ts @@ -7,6 +7,7 @@ export { type BashToolOptions, bashTool, createBashTool, + rewriteBackgroundCommand, } from "./bash.js"; export { type BashInterceptorRule, diff --git a/packages/pi-coding-agent/src/index.ts b/packages/pi-coding-agent/src/index.ts index 86a808a05..a53d45f6a 100644 --- a/packages/pi-coding-agent/src/index.ts +++ b/packages/pi-coding-agent/src/index.ts @@ -235,6 +235,7 @@ export { type BashToolInput, type BashToolOptions, bashTool, + rewriteBackgroundCommand, checkBashInterception, type CompiledInterceptor, compileInterceptor, diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index 9bdf8d13b..9c70a5573 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -342,8 +342,12 @@ let lastBaselineCharCount: number | undefined; /** SIGTERM handler registered while auto-mode is active — cleared on stop/pause. */ let _sigtermHandler: (() => void) | null = null; -/** Tool calls currently being executed — prevents false idle detection during long-running tools. */ -const inFlightTools = new Set(); +/** + * Tool calls currently being executed — prevents false idle detection during long-running tools. + * Maps toolCallId → start timestamp (ms) so the idle watchdog can detect tools that have been + * running suspiciously long (e.g., a Bash command hung because `&` kept stdout open). + */ +const inFlightTools = new Map(); type BudgetAlertLevel = 0 | 75 | 90 | 100; @@ -422,11 +426,11 @@ export function isAutoPaused(): boolean { /** * Mark a tool execution as in-flight. Called from index.ts on tool_execution_start. - * Prevents the idle watchdog from declaring the agent idle while tools are executing. + * Records start time so the idle watchdog can detect tools hung longer than the idle timeout. */ export function markToolStart(toolCallId: string): void { if (!active) return; - inFlightTools.add(toolCallId); + inFlightTools.set(toolCallId, Date.now()); } /** @@ -436,6 +440,16 @@ export function markToolEnd(toolCallId: string): void { inFlightTools.delete(toolCallId); } +/** + * Returns the age (ms) of the oldest currently in-flight tool, or 0 if none. + * Exported for testing. + */ +export function getOldestInFlightToolAgeMs(): number { + if (inFlightTools.size === 0) return 0; + const oldestStart = Math.min(...inFlightTools.values()); + return Date.now() - oldestStart; +} + /** * Return the base path to use for the auto.lock file. * Always uses the original project root (not the worktree) so that @@ -2901,13 +2915,27 @@ async function dispatchNextUnit( if (Date.now() - runtime.lastProgressAt < idleTimeoutMs) return; // Agent has tool calls currently executing (await_job, long bash, etc.) — - // not idle, just waiting for tool completion. + // not idle, just waiting for tool completion. But only suppress recovery + // if the tool started recently. A tool in-flight for longer than the idle + // timeout is likely stuck — e.g., `python -m http.server 8080 &` keeps the + // shell's stdout/stderr open, causing the Bash tool to hang indefinitely. if (inFlightTools.size > 0) { - writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, { - lastProgressAt: Date.now(), - lastProgressKind: "tool-in-flight", - }); - return; + const oldestStart = Math.min(...inFlightTools.values()); + const toolAgeMs = Date.now() - oldestStart; + if (toolAgeMs < idleTimeoutMs) { + writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, { + lastProgressAt: Date.now(), + lastProgressKind: "tool-in-flight", + }); + return; + } + // Oldest tool has been running >= idleTimeoutMs — treat as a stuck/hung + // tool (e.g., background process holding stdout open). Fall through to + // idle recovery without resetting the progress clock. + ctx.ui.notify( + `Stalled tool detected: a tool has been in-flight for ${Math.round(toolAgeMs / 60000)}min. Treating as hung — attempting idle recovery.`, + "warning", + ); } // Before triggering recovery, check if the agent is actually producing diff --git a/src/resources/extensions/gsd/prompts/execute-task.md b/src/resources/extensions/gsd/prompts/execute-task.md index 5e40368bd..8f4690f74 100644 --- a/src/resources/extensions/gsd/prompts/execute-task.md +++ b/src/resources/extensions/gsd/prompts/execute-task.md @@ -31,6 +31,11 @@ Then: 3. Build the real thing. If the task plan says "create login endpoint", build an endpoint that actually authenticates against a real store, not one that returns a hardcoded success response. If the task plan says "create dashboard page", build a page that renders real data from the API, not a component with hardcoded props. Stubs and mocks are for tests, not for the shipped feature. 4. Write or update tests as part of execution — tests are verification, not an afterthought. If the slice plan defines test files in its Verification section and this is the first task, create them (they should initially fail). 5. When implementing non-trivial runtime behavior (async flows, API boundaries, background processes, error paths), add or preserve agent-usable observability. Skip this for simple changes where it doesn't apply. + + **Background process rule:** Never use bare `command &` to run background processes. The shell's `&` operator leaves stdout/stderr attached to the parent, which causes the Bash tool to hang indefinitely waiting for those streams to close. Always redirect output before backgrounding: + - Correct: `command > /dev/null 2>&1 &` or `nohup command > /dev/null 2>&1 &` + - Example: `python -m http.server 8080 > /dev/null 2>&1 &` (NOT `python -m http.server 8080 &`) + - Preferred: use the `bg_shell` tool if available — it manages process lifecycle correctly without stream-inheritance issues 6. Verify must-haves are met by running concrete checks (tests, commands, observable behaviors) 7. Run the slice-level verification checks defined in the slice plan's Verification section. Track which pass. On the final task of the slice, all must pass before marking done. On intermediate tasks, partial passes are expected — note which ones pass in the summary. 8. If the task touches UI, browser flows, DOM behavior, or user-visible web state: diff --git a/src/resources/extensions/gsd/tests/in-flight-tool-tracking.test.ts b/src/resources/extensions/gsd/tests/in-flight-tool-tracking.test.ts index 9e80f00c7..b39fbce93 100644 --- a/src/resources/extensions/gsd/tests/in-flight-tool-tracking.test.ts +++ b/src/resources/extensions/gsd/tests/in-flight-tool-tracking.test.ts @@ -1,6 +1,6 @@ /** * In-flight tool tracking tests — verifies that markToolStart/markToolEnd - * correctly manage the in-flight tools set used by the idle watchdog to + * correctly manage the in-flight tools map used by the idle watchdog to * distinguish "agent waiting on long-running tool" from "agent is idle". * * Background: The idle watchdog checks every 15s for agent progress. Without @@ -8,12 +8,15 @@ * can run 20+ minutes for evaluations, deployments, test suites) are falsely * declared idle and interrupted by recovery steering messages. * - * The fix hooks tool_execution_start/end events to track active tool calls. - * When tools are in-flight, the watchdog resets lastProgressAt instead of - * triggering idle recovery. + * The fix hooks tool_execution_start/end events to track active tool calls + * with start timestamps. When tools are in-flight and started recently + * (< idleTimeoutMs), the watchdog resets lastProgressAt instead of triggering + * idle recovery. When a tool has been in-flight for longer than idleTimeoutMs, + * it is treated as stuck (e.g., `command &` keeping stdout open) and recovery + * proceeds anyway. */ -import { markToolStart, markToolEnd, isAutoActive } from "../auto.ts"; +import { markToolStart, markToolEnd, isAutoActive, getOldestInFlightToolAgeMs } from "../auto.ts"; import { createTestContext } from './test-helpers.ts'; const { assertEq, assertTrue, report } = createTestContext(); @@ -49,9 +52,17 @@ const { assertEq, assertTrue, report } = createTestContext(); // ═══ Integration contract: expected exports from auto.ts ═════════════════════ { - console.log("\n=== auto.ts exports markToolStart and markToolEnd ==="); + console.log("\n=== auto.ts exports markToolStart, markToolEnd, and getOldestInFlightToolAgeMs ==="); assertEq(typeof markToolStart, "function", "markToolStart should be a function"); assertEq(typeof markToolEnd, "function", "markToolEnd should be a function"); + assertEq(typeof getOldestInFlightToolAgeMs, "function", "getOldestInFlightToolAgeMs should be a function"); +} + +{ + console.log("\n=== getOldestInFlightToolAgeMs: returns 0 when no tools in-flight ==="); + // When auto-mode is inactive, inFlightTools map is empty → age is 0 + const age = getOldestInFlightToolAgeMs(); + assertEq(age, 0, "should return 0 when no tools are in-flight"); } {