fix: populate RecoveryContext in hook unit supervision to prevent crash on stalled tool recovery (#1867)
The buildRecoveryContext callback in auto/phases.ts returned an empty object instead of a valid RecoveryContext. When the idle watchdog detected a stalled tool and called recoverTimedOutUnit, basePath was undefined, causing join(undefined, ".gsd") to throw "The path argument must be of type string. Received undefined". The error left the session permanently hung because the unit promise was never resolved. Fixes #1855 Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
f79de8a583
commit
99032444eb
2 changed files with 108 additions and 1 deletions
|
|
@ -991,7 +991,12 @@ export async function runUnitPhase(
|
|||
unitId,
|
||||
prefs,
|
||||
buildSnapshotOpts: () => deps.buildSnapshotOpts(unitType, unitId),
|
||||
buildRecoveryContext: () => ({}),
|
||||
buildRecoveryContext: () => ({
|
||||
basePath: s.basePath,
|
||||
verbose: s.verbose,
|
||||
currentUnitStartedAt: s.currentUnit?.startedAt ?? Date.now(),
|
||||
unitRecoveryCount: s.unitRecoveryCount,
|
||||
}),
|
||||
pauseAuto: deps.pauseAuto,
|
||||
});
|
||||
|
||||
|
|
|
|||
102
src/resources/extensions/gsd/tests/stalled-tool-recovery.test.ts
Normal file
102
src/resources/extensions/gsd/tests/stalled-tool-recovery.test.ts
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
/**
|
||||
* Regression test for #1855: Stalled tool detection crashes with
|
||||
* "The path argument must be of type string. Received undefined"
|
||||
*
|
||||
* When a tool stalls in-flight for 10+ minutes, the idle watchdog fires
|
||||
* recoverTimedOutUnit(). In auto/phases.ts, buildRecoveryContext was
|
||||
* returning an empty object `{}`, so basePath was undefined. The recovery
|
||||
* code passed undefined to readUnitRuntimeRecord → runtimePath → join(),
|
||||
* which throws a TypeError. The session is permanently frozen because the
|
||||
* error propagates into the idle watchdog catch handler but the unit
|
||||
* promise is never resolved.
|
||||
*
|
||||
* This test calls recoverTimedOutUnit with an empty RecoveryContext (the
|
||||
* bug) and verifies it crashes, then calls it with a valid RecoveryContext
|
||||
* (the fix) and verifies it does not crash.
|
||||
*/
|
||||
|
||||
import { mkdtempSync, mkdirSync, rmSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
import { recoverTimedOutUnit, type RecoveryContext } from "../auto-timeout-recovery.ts";
|
||||
import { createTestContext } from './test-helpers.ts';
|
||||
|
||||
const { assertTrue, report } = createTestContext();
|
||||
|
||||
// Minimal mock for ExtensionContext — only the fields recoverTimedOutUnit touches.
|
||||
function makeMockCtx() {
|
||||
return {
|
||||
ui: {
|
||||
notify: () => {},
|
||||
},
|
||||
} as any;
|
||||
}
|
||||
|
||||
// Minimal mock for ExtensionAPI — only sendMessage is called during recovery.
|
||||
function makeMockPi() {
|
||||
return {
|
||||
sendMessage: () => {},
|
||||
} as any;
|
||||
}
|
||||
|
||||
// ═══ #1855: empty RecoveryContext (basePath undefined) crashes ════════════════
|
||||
|
||||
{
|
||||
console.log("\n=== #1855: recoverTimedOutUnit crashes when basePath is undefined ===");
|
||||
const ctx = makeMockCtx();
|
||||
const pi = makeMockPi();
|
||||
|
||||
// Simulate the bug: buildRecoveryContext returns {} (empty object).
|
||||
// basePath is undefined, which causes join(undefined, ".gsd") to throw.
|
||||
const emptyRctx = {} as RecoveryContext;
|
||||
|
||||
let crashed = false;
|
||||
try {
|
||||
await recoverTimedOutUnit(ctx, pi, "execute-task", "M001/S01/T01", "idle", emptyRctx);
|
||||
} catch (err: any) {
|
||||
crashed = true;
|
||||
assertTrue(
|
||||
err.message.includes("path") || err.message.includes("string") || err.code === "ERR_INVALID_ARG_TYPE",
|
||||
`should crash with path/type error, got: ${err.message}`,
|
||||
);
|
||||
}
|
||||
assertTrue(crashed, "should crash when basePath is undefined (reproduces #1855)");
|
||||
}
|
||||
|
||||
// ═══ #1855: valid RecoveryContext does not crash ═════════════════════════════
|
||||
|
||||
{
|
||||
console.log("\n=== #1855: recoverTimedOutUnit succeeds with valid RecoveryContext ===");
|
||||
const base = mkdtempSync(join(tmpdir(), "gsd-stalled-tool-test-"));
|
||||
mkdirSync(join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks"), { recursive: true });
|
||||
mkdirSync(join(base, ".gsd", "runtime", "units"), { recursive: true });
|
||||
|
||||
try {
|
||||
const ctx = makeMockCtx();
|
||||
const pi = makeMockPi();
|
||||
|
||||
const validRctx: RecoveryContext = {
|
||||
basePath: base,
|
||||
verbose: false,
|
||||
currentUnitStartedAt: Date.now(),
|
||||
unitRecoveryCount: new Map(),
|
||||
};
|
||||
|
||||
let crashed = false;
|
||||
let result: string | undefined;
|
||||
try {
|
||||
result = await recoverTimedOutUnit(ctx, pi, "execute-task", "M001/S01/T01", "idle", validRctx);
|
||||
} catch (err: any) {
|
||||
crashed = true;
|
||||
console.error(` Unexpected crash: ${err.message}`);
|
||||
}
|
||||
assertTrue(!crashed, "should not crash with valid basePath");
|
||||
// With no runtime record on disk and recoveryAttempts=0, the function
|
||||
// should attempt steering recovery (sendMessage) and return "recovered".
|
||||
assertTrue(result === "recovered", `should return 'recovered', got '${result}'`);
|
||||
} finally {
|
||||
rmSync(base, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
report();
|
||||
Loading…
Add table
Reference in a new issue