fix: idle watchdog false-fires on active agents (#52) (#65)

The idle watchdog checked lastProgressAt to detect stalled agents, but
nothing updated that timestamp during normal execution. Any task taking
>10min triggered false idle recovery, steering messages, and eventually
got skipped — even while actively writing code.

Add detectWorkingTreeActivity() check before recovery: if git reports
uncommitted changes, the agent is working. Bump lastProgressAt and
skip recovery. Genuinely idle agents (clean working tree) still get
recovered as before.

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
TÂCHES 2026-03-11 16:08:46 -06:00 committed by GitHub
parent 85f60451fb
commit 8bd27f74e0

View file

@ -36,7 +36,6 @@ import {
clearUnitRuntimeRecord,
formatExecuteTaskRecoveryStatus,
inspectExecuteTaskDurability,
recordUnitProgress,
readUnitRuntimeRecord,
writeUnitRuntimeRecord,
} from "./unit-runtime.js";
@ -985,6 +984,17 @@ async function dispatchNextUnit(
if (!runtime) return;
if (Date.now() - runtime.lastProgressAt < idleTimeoutMs) return;
// Before triggering recovery, check if the agent is actually producing
// work on disk. `git status --porcelain` is cheap and catches any
// staged/unstaged/untracked changes the agent made since lastProgressAt.
if (detectWorkingTreeActivity(basePath)) {
writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, {
lastProgressAt: Date.now(),
lastProgressKind: "filesystem-activity",
});
return;
}
if (currentUnit) {
const modelId = ctx.model?.id ?? "unknown";
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId);
@ -2136,6 +2146,25 @@ export function skipExecuteTask(
return true;
}
/**
* Detect whether the agent is producing work on disk by checking git for
* any working-tree changes (staged, unstaged, or untracked). Returns true
* if there are uncommitted changes meaning the agent is actively working,
* even though it hasn't signaled progress through runtime records.
*/
function detectWorkingTreeActivity(cwd: string): boolean {
try {
const out = execSync("git status --porcelain", {
cwd,
stdio: ["pipe", "pipe", "pipe"],
timeout: 5000,
});
return out.toString().trim().length > 0;
} catch {
return false;
}
}
/**
* Resolve the expected artifact for a non-execute-task unit to an absolute path.
* Returns null for unit types that don't produce a single file (execute-task,