The idle watchdog checked lastProgressAt to detect stalled agents, but nothing updated that timestamp during normal execution. Any task taking >10min triggered false idle recovery, steering messages, and eventually got skipped — even while actively writing code. Add detectWorkingTreeActivity() check before recovery: if git reports uncommitted changes, the agent is working. Bump lastProgressAt and skip recovery. Genuinely idle agents (clean working tree) still get recovered as before. Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
85f60451fb
commit
8bd27f74e0
1 changed files with 30 additions and 1 deletions
|
|
@ -36,7 +36,6 @@ import {
|
|||
clearUnitRuntimeRecord,
|
||||
formatExecuteTaskRecoveryStatus,
|
||||
inspectExecuteTaskDurability,
|
||||
recordUnitProgress,
|
||||
readUnitRuntimeRecord,
|
||||
writeUnitRuntimeRecord,
|
||||
} from "./unit-runtime.js";
|
||||
|
|
@ -985,6 +984,17 @@ async function dispatchNextUnit(
|
|||
if (!runtime) return;
|
||||
if (Date.now() - runtime.lastProgressAt < idleTimeoutMs) return;
|
||||
|
||||
// Before triggering recovery, check if the agent is actually producing
|
||||
// work on disk. `git status --porcelain` is cheap and catches any
|
||||
// staged/unstaged/untracked changes the agent made since lastProgressAt.
|
||||
if (detectWorkingTreeActivity(basePath)) {
|
||||
writeUnitRuntimeRecord(basePath, unitType, unitId, currentUnit.startedAt, {
|
||||
lastProgressAt: Date.now(),
|
||||
lastProgressKind: "filesystem-activity",
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (currentUnit) {
|
||||
const modelId = ctx.model?.id ?? "unknown";
|
||||
snapshotUnitMetrics(ctx, currentUnit.type, currentUnit.id, currentUnit.startedAt, modelId);
|
||||
|
|
@ -2136,6 +2146,25 @@ export function skipExecuteTask(
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect whether the agent is producing work on disk by checking git for
|
||||
* any working-tree changes (staged, unstaged, or untracked). Returns true
|
||||
* if there are uncommitted changes — meaning the agent is actively working,
|
||||
* even though it hasn't signaled progress through runtime records.
|
||||
*/
|
||||
function detectWorkingTreeActivity(cwd: string): boolean {
|
||||
try {
|
||||
const out = execSync("git status --porcelain", {
|
||||
cwd,
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
timeout: 5000,
|
||||
});
|
||||
return out.toString().trim().length > 0;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the expected artifact for a non-execute-task unit to an absolute path.
|
||||
* Returns null for unit types that don't produce a single file (execute-task,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue