Add ADR-009 docs and resolve compile/runtime typing regressions in UOK and extension modules. Refs #4214
607 lines
24 KiB
TypeScript
607 lines
24 KiB
TypeScript
/**
|
|
* auto/loop.ts — Main auto-mode execution loop.
|
|
*
|
|
* Iterates: derive → dispatch → guards → runUnit → finalize → repeat.
|
|
* Exits when s.active becomes false or a terminal condition is reached.
|
|
*
|
|
* Imports from: auto/types, auto/resolve, auto/phases
|
|
*/
|
|
|
|
import type { ExtensionAPI, ExtensionContext } from "@gsd/pi-coding-agent";
|
|
|
|
import { randomUUID } from "node:crypto";
|
|
import type { AutoSession, SidecarItem } from "./session.js";
|
|
import type { LoopDeps } from "./loop-deps.js";
|
|
import {
|
|
MAX_LOOP_ITERATIONS,
|
|
type LoopState,
|
|
type IterationContext,
|
|
type IterationData,
|
|
} from "./types.js";
|
|
import { _clearCurrentResolve } from "./resolve.js";
|
|
import {
|
|
runPreDispatch,
|
|
runDispatch,
|
|
runGuards,
|
|
runUnitPhase,
|
|
runFinalize,
|
|
} from "./phases.js";
|
|
import { debugLog } from "../debug-logger.js";
|
|
import { isInfrastructureError, isTransientCooldownError, getCooldownRetryAfterMs, COOLDOWN_FALLBACK_WAIT_MS, MAX_COOLDOWN_RETRIES } from "./infra-errors.js";
|
|
import { resolveEngine } from "../engine-resolver.js";
|
|
import { logWarning } from "../workflow-logger.js";
|
|
import { gsdRoot } from "../paths.js";
|
|
import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
|
|
import { join } from "node:path";
|
|
|
|
// ── Stuck detection persistence (#3704) ──────────────────────────────────
|
|
// Persist stuck detection state to disk so it survives session restarts.
|
|
// Without this, restarting auto-mode resets all counters, allowing the
|
|
// same blocked unit to burn a full retry budget each session.
|
|
function stuckStatePath(basePath: string): string {
|
|
return join(gsdRoot(basePath), "runtime", "stuck-state.json");
|
|
}
|
|
|
|
function loadStuckState(basePath: string): { recentUnits: Array<{ key: string }>; stuckRecoveryAttempts: number } {
|
|
try {
|
|
const data = JSON.parse(readFileSync(stuckStatePath(basePath), "utf-8"));
|
|
return {
|
|
recentUnits: Array.isArray(data.recentUnits) ? data.recentUnits : [],
|
|
stuckRecoveryAttempts: typeof data.stuckRecoveryAttempts === "number" ? data.stuckRecoveryAttempts : 0,
|
|
};
|
|
} catch (err) {
|
|
debugLog("autoLoop", { phase: "load-stuck-state-failed", error: err instanceof Error ? err.message : String(err) });
|
|
return { recentUnits: [], stuckRecoveryAttempts: 0 };
|
|
}
|
|
}
|
|
|
|
function saveStuckState(basePath: string, state: LoopState): void {
|
|
try {
|
|
const filePath = stuckStatePath(basePath);
|
|
mkdirSync(join(gsdRoot(basePath), "runtime"), { recursive: true });
|
|
writeFileSync(filePath, JSON.stringify({
|
|
recentUnits: state.recentUnits.slice(-20), // keep last 20 entries
|
|
stuckRecoveryAttempts: state.stuckRecoveryAttempts,
|
|
updatedAt: new Date().toISOString(),
|
|
}) + "\n");
|
|
} catch (err) {
|
|
debugLog("autoLoop", { phase: "save-stuck-state-failed", error: err instanceof Error ? err.message : String(err) });
|
|
}
|
|
}
|
|
|
|
// ── Memory pressure monitoring (#3331) ──────────────────────────────────
|
|
// Check heap usage every N iterations and trigger graceful shutdown before
|
|
// the OS OOM killer sends SIGKILL. The threshold is 90% of the V8 heap
|
|
// limit (--max-old-space-size or default ~1.5-4GB depending on platform).
|
|
const MEMORY_CHECK_INTERVAL = 5; // check every 5 iterations
|
|
const MEMORY_PRESSURE_THRESHOLD = 0.85; // 85% of heap limit
|
|
|
|
function checkMemoryPressure(): { pressured: boolean; heapMB: number; limitMB: number; pct: number } {
|
|
const mem = process.memoryUsage();
|
|
// v8.getHeapStatistics() gives heap_size_limit but requires import
|
|
// Use a conservative estimate: RSS > 3GB is danger zone on most systems
|
|
const heapMB = Math.round(mem.heapUsed / 1024 / 1024);
|
|
const rssMB = Math.round(mem.rss / 1024 / 1024);
|
|
// Try to get the actual V8 heap limit
|
|
let limitMB = 4096; // conservative default
|
|
try {
|
|
const v8 = require("node:v8");
|
|
const stats = v8.getHeapStatistics();
|
|
limitMB = Math.round(stats.heap_size_limit / 1024 / 1024);
|
|
} catch { limitMB = 4096; /* v8 stats unavailable — use conservative default */ }
|
|
const pct = heapMB / limitMB;
|
|
return { pressured: pct > MEMORY_PRESSURE_THRESHOLD, heapMB, limitMB, pct };
|
|
}
|
|
|
|
/**
|
|
* Main auto-mode execution loop. Iterates: derive → dispatch → guards →
|
|
* runUnit → finalize → repeat. Exits when s.active becomes false or a
|
|
* terminal condition is reached.
|
|
*
|
|
* This is the linear replacement for the recursive
|
|
* dispatchNextUnit → handleAgentEnd → dispatchNextUnit chain.
|
|
*/
|
|
export async function autoLoop(
|
|
ctx: ExtensionContext,
|
|
pi: ExtensionAPI,
|
|
s: AutoSession,
|
|
deps: LoopDeps,
|
|
): Promise<void> {
|
|
debugLog("autoLoop", { phase: "enter" });
|
|
let iteration = 0;
|
|
// Load persisted stuck state so counters survive session restarts (#3704)
|
|
const persisted = loadStuckState(s.basePath);
|
|
const loopState: LoopState = {
|
|
recentUnits: persisted.recentUnits,
|
|
stuckRecoveryAttempts: persisted.stuckRecoveryAttempts,
|
|
consecutiveFinalizeTimeouts: 0,
|
|
};
|
|
let consecutiveErrors = 0;
|
|
let consecutiveCooldowns = 0;
|
|
const recentErrorMessages: string[] = [];
|
|
|
|
while (s.active) {
|
|
iteration++;
|
|
debugLog("autoLoop", { phase: "loop-top", iteration });
|
|
|
|
// ── Journal: per-iteration flow grouping ──
|
|
const flowId = randomUUID();
|
|
let seqCounter = 0;
|
|
const nextSeq = () => ++seqCounter;
|
|
const turnId = randomUUID();
|
|
const turnStartedAt = new Date().toISOString();
|
|
let observedUnitType: string | undefined;
|
|
let observedUnitId: string | undefined;
|
|
let turnFinished = false;
|
|
const finishTurn = (
|
|
status: "completed" | "failed" | "paused" | "stopped" | "skipped" | "retry",
|
|
failureClass: "none" | "unknown" | "manual-attention" | "timeout" | "execution" | "closeout" = "none",
|
|
error?: string,
|
|
): void => {
|
|
if (turnFinished) return;
|
|
turnFinished = true;
|
|
deps.uokObserver?.onTurnResult({
|
|
traceId: flowId,
|
|
turnId,
|
|
iteration,
|
|
unitType: observedUnitType,
|
|
unitId: observedUnitId,
|
|
status,
|
|
failureClass,
|
|
phaseResults: [],
|
|
error,
|
|
startedAt: turnStartedAt,
|
|
finishedAt: new Date().toISOString(),
|
|
});
|
|
};
|
|
deps.uokObserver?.onTurnStart({
|
|
traceId: flowId,
|
|
turnId,
|
|
iteration,
|
|
basePath: s.basePath,
|
|
startedAt: turnStartedAt,
|
|
});
|
|
|
|
if (iteration > MAX_LOOP_ITERATIONS) {
|
|
debugLog("autoLoop", {
|
|
phase: "exit",
|
|
reason: "max-iterations",
|
|
iteration,
|
|
});
|
|
await deps.stopAuto(
|
|
ctx,
|
|
pi,
|
|
`Safety: loop exceeded ${MAX_LOOP_ITERATIONS} iterations — possible runaway`,
|
|
);
|
|
finishTurn("stopped", "manual-attention", "max-iterations");
|
|
break;
|
|
}
|
|
|
|
// ── Memory pressure check (#3331) ──
|
|
// Graceful shutdown before OOM killer sends SIGKILL.
|
|
if (iteration % MEMORY_CHECK_INTERVAL === 0) {
|
|
const mem = checkMemoryPressure();
|
|
debugLog("autoLoop", { phase: "memory-check", ...mem });
|
|
if (mem.pressured) {
|
|
logWarning("dispatch", `Memory pressure: ${mem.heapMB}MB / ${mem.limitMB}MB (${Math.round(mem.pct * 100)}%) — stopping auto-mode to prevent OOM kill`);
|
|
await deps.stopAuto(
|
|
ctx,
|
|
pi,
|
|
`Memory pressure: heap at ${mem.heapMB}MB / ${mem.limitMB}MB (${Math.round(mem.pct * 100)}%). ` +
|
|
`Stopping gracefully to prevent OOM kill after ${iteration} iterations. ` +
|
|
`Resume with /gsd auto to continue from where you left off.`,
|
|
);
|
|
finishTurn("stopped", "timeout", "memory-pressure");
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!s.cmdCtx) {
|
|
debugLog("autoLoop", { phase: "exit", reason: "no-cmdCtx" });
|
|
finishTurn("stopped", "manual-attention", "missing-command-context");
|
|
break;
|
|
}
|
|
|
|
try {
|
|
// ── Blanket try/catch: one bad iteration must not kill the session
|
|
const prefs = deps.loadEffectiveGSDPreferences()?.preferences;
|
|
|
|
// ── Check sidecar queue before deriveState ──
|
|
let sidecarItem: SidecarItem | undefined;
|
|
if (s.sidecarQueue.length > 0) {
|
|
sidecarItem = s.sidecarQueue.shift()!;
|
|
debugLog("autoLoop", {
|
|
phase: "sidecar-dequeue",
|
|
kind: sidecarItem.kind,
|
|
unitType: sidecarItem.unitType,
|
|
unitId: sidecarItem.unitId,
|
|
});
|
|
deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "sidecar-dequeue", data: { kind: sidecarItem.kind, unitType: sidecarItem.unitType, unitId: sidecarItem.unitId } });
|
|
}
|
|
|
|
const sessionLockBase = deps.lockBase();
|
|
if (sessionLockBase) {
|
|
const lockStatus = deps.validateSessionLock(sessionLockBase);
|
|
if (!lockStatus.valid) {
|
|
debugLog("autoLoop", {
|
|
phase: "session-lock-invalid",
|
|
reason: lockStatus.failureReason ?? "unknown",
|
|
existingPid: lockStatus.existingPid,
|
|
expectedPid: lockStatus.expectedPid,
|
|
});
|
|
deps.handleLostSessionLock(ctx, lockStatus);
|
|
debugLog("autoLoop", {
|
|
phase: "exit",
|
|
reason: "session-lock-lost",
|
|
detail: lockStatus.failureReason ?? "unknown",
|
|
});
|
|
break;
|
|
}
|
|
}
|
|
|
|
const ic: IterationContext = { ctx, pi, s, deps, prefs, iteration, flowId, nextSeq };
|
|
deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-start", data: { iteration } });
|
|
let iterData: IterationData;
|
|
|
|
// ── Custom engine path ──────────────────────────────────────────────
|
|
// When activeEngineId is a non-dev value, bypass runPreDispatch and
|
|
// runDispatch entirely — the custom engine drives its own state via
|
|
// GRAPH.yaml. Shares runGuards and runUnitPhase with the dev path.
|
|
// After unit execution, verifies then reconciles via the engine layer.
|
|
//
|
|
// GSD_ENGINE_BYPASS=1 skips the engine layer entirely — falls through
|
|
// to the dev path below.
|
|
if (s.activeEngineId != null && s.activeEngineId !== "dev" && !sidecarItem && process.env.GSD_ENGINE_BYPASS !== "1") {
|
|
debugLog("autoLoop", { phase: "custom-engine-derive", iteration, engineId: s.activeEngineId });
|
|
|
|
const { engine, policy } = resolveEngine({
|
|
activeEngineId: s.activeEngineId,
|
|
activeRunDir: s.activeRunDir,
|
|
});
|
|
|
|
const engineState = await engine.deriveState(s.basePath);
|
|
if (engineState.isComplete) {
|
|
await deps.stopAuto(ctx, pi, "Workflow complete");
|
|
break;
|
|
}
|
|
|
|
debugLog("autoLoop", { phase: "custom-engine-dispatch", iteration });
|
|
const dispatch = await engine.resolveDispatch(engineState, { basePath: s.basePath });
|
|
|
|
if (dispatch.action === "stop") {
|
|
await deps.stopAuto(ctx, pi, dispatch.reason ?? "Engine stopped");
|
|
break;
|
|
}
|
|
if (dispatch.action === "skip") {
|
|
continue;
|
|
}
|
|
|
|
// dispatch.action === "dispatch"
|
|
const step = dispatch.step!;
|
|
const gsdState = await deps.deriveState(s.basePath);
|
|
|
|
iterData = {
|
|
unitType: step.unitType,
|
|
unitId: step.unitId,
|
|
prompt: step.prompt,
|
|
finalPrompt: step.prompt,
|
|
pauseAfterUatDispatch: false,
|
|
state: gsdState,
|
|
mid: s.currentMilestoneId ?? "workflow",
|
|
midTitle: "Workflow",
|
|
isRetry: false,
|
|
previousTier: undefined,
|
|
};
|
|
observedUnitType = iterData.unitType;
|
|
observedUnitId = iterData.unitId;
|
|
|
|
// ── Progress widget (mirrors dev path in runDispatch) ──
|
|
deps.updateProgressWidget(ctx, iterData.unitType, iterData.unitId, iterData.state);
|
|
|
|
// ── Guards (shared with dev path) ──
|
|
const guardsResult = await runGuards(ic, s.currentMilestoneId ?? "workflow");
|
|
deps.uokObserver?.onPhaseResult("guard", guardsResult.action, {
|
|
unitType: iterData.unitType,
|
|
unitId: iterData.unitId,
|
|
});
|
|
if (guardsResult.action === "break") {
|
|
finishTurn("stopped", "manual-attention", "guard-break");
|
|
break;
|
|
}
|
|
|
|
// ── Unit execution (shared with dev path) ──
|
|
const unitPhaseResult = await runUnitPhase(ic, iterData, loopState);
|
|
deps.uokObserver?.onPhaseResult("unit", unitPhaseResult.action, {
|
|
unitType: iterData.unitType,
|
|
unitId: iterData.unitId,
|
|
});
|
|
if (unitPhaseResult.action === "break") {
|
|
finishTurn("stopped", "execution", "unit-break");
|
|
break;
|
|
}
|
|
|
|
// ── Verify first, then reconcile (only mark complete on pass) ──
|
|
debugLog("autoLoop", { phase: "custom-engine-verify", iteration, unitId: iterData.unitId });
|
|
const verifyResult = await policy.verify(iterData.unitType, iterData.unitId, { basePath: s.basePath });
|
|
if (verifyResult === "pause") {
|
|
await deps.pauseAuto(ctx, pi);
|
|
deps.uokObserver?.onPhaseResult("custom-engine", "pause", {
|
|
unitType: iterData.unitType,
|
|
unitId: iterData.unitId,
|
|
});
|
|
finishTurn("paused", "manual-attention", "custom-engine-verify-pause");
|
|
break;
|
|
}
|
|
if (verifyResult === "retry") {
|
|
debugLog("autoLoop", { phase: "custom-engine-verify-retry", iteration, unitId: iterData.unitId });
|
|
deps.uokObserver?.onPhaseResult("custom-engine", "retry", {
|
|
unitType: iterData.unitType,
|
|
unitId: iterData.unitId,
|
|
});
|
|
finishTurn("retry");
|
|
continue;
|
|
}
|
|
|
|
// Verification passed — mark step complete
|
|
debugLog("autoLoop", { phase: "custom-engine-reconcile", iteration, unitId: iterData.unitId });
|
|
const reconcileResult = await engine.reconcile(engineState, {
|
|
unitType: iterData.unitType,
|
|
unitId: iterData.unitId,
|
|
startedAt: s.currentUnit?.startedAt ?? Date.now(),
|
|
finishedAt: Date.now(),
|
|
});
|
|
|
|
deps.clearUnitTimeout();
|
|
consecutiveErrors = 0;
|
|
consecutiveCooldowns = 0;
|
|
recentErrorMessages.length = 0;
|
|
deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration } });
|
|
saveStuckState(s.basePath, loopState); // persist across session restarts (#3704)
|
|
debugLog("autoLoop", { phase: "iteration-complete", iteration });
|
|
|
|
if (reconcileResult.outcome === "milestone-complete") {
|
|
await deps.stopAuto(ctx, pi, "Workflow complete");
|
|
deps.uokObserver?.onPhaseResult("custom-engine", "milestone-complete", {
|
|
unitType: iterData.unitType,
|
|
unitId: iterData.unitId,
|
|
});
|
|
finishTurn("completed");
|
|
break;
|
|
}
|
|
if (reconcileResult.outcome === "pause") {
|
|
await deps.pauseAuto(ctx, pi);
|
|
deps.uokObserver?.onPhaseResult("custom-engine", "pause", {
|
|
unitType: iterData.unitType,
|
|
unitId: iterData.unitId,
|
|
});
|
|
finishTurn("paused", "manual-attention");
|
|
break;
|
|
}
|
|
if (reconcileResult.outcome === "stop") {
|
|
await deps.stopAuto(ctx, pi, reconcileResult.reason ?? "Engine stopped");
|
|
deps.uokObserver?.onPhaseResult("custom-engine", "stop", {
|
|
unitType: iterData.unitType,
|
|
unitId: iterData.unitId,
|
|
reason: reconcileResult.reason,
|
|
});
|
|
finishTurn("stopped", "manual-attention", reconcileResult.reason);
|
|
break;
|
|
}
|
|
deps.uokObserver?.onPhaseResult("custom-engine", "continue", {
|
|
unitType: iterData.unitType,
|
|
unitId: iterData.unitId,
|
|
});
|
|
finishTurn("completed");
|
|
continue;
|
|
}
|
|
|
|
if (!sidecarItem) {
|
|
// ── Phase 1: Pre-dispatch ─────────────────────────────────────────
|
|
const preDispatchResult = await runPreDispatch(ic, loopState);
|
|
deps.uokObserver?.onPhaseResult("pre-dispatch", preDispatchResult.action);
|
|
if (preDispatchResult.action === "break") {
|
|
finishTurn("stopped", "manual-attention", "pre-dispatch-break");
|
|
break;
|
|
}
|
|
if (preDispatchResult.action === "continue") {
|
|
finishTurn("skipped");
|
|
continue;
|
|
}
|
|
|
|
const preData = preDispatchResult.data;
|
|
|
|
// ── Phase 2: Guards ───────────────────────────────────────────────
|
|
const guardsResult = await runGuards(ic, preData.mid);
|
|
deps.uokObserver?.onPhaseResult("guard", guardsResult.action);
|
|
if (guardsResult.action === "break") {
|
|
finishTurn("stopped", "manual-attention", "guard-break");
|
|
break;
|
|
}
|
|
|
|
// ── Phase 3: Dispatch ─────────────────────────────────────────────
|
|
const dispatchResult = await runDispatch(ic, preData, loopState);
|
|
deps.uokObserver?.onPhaseResult("dispatch", dispatchResult.action);
|
|
if (dispatchResult.action === "break") {
|
|
finishTurn("stopped", "manual-attention", "dispatch-break");
|
|
break;
|
|
}
|
|
if (dispatchResult.action === "continue") {
|
|
finishTurn("skipped");
|
|
continue;
|
|
}
|
|
iterData = dispatchResult.data;
|
|
observedUnitType = iterData.unitType;
|
|
observedUnitId = iterData.unitId;
|
|
} else {
|
|
// ── Sidecar path: use values from the sidecar item directly ──
|
|
const sidecarState = await deps.deriveState(s.basePath);
|
|
iterData = {
|
|
unitType: sidecarItem.unitType,
|
|
unitId: sidecarItem.unitId,
|
|
prompt: sidecarItem.prompt,
|
|
finalPrompt: sidecarItem.prompt,
|
|
pauseAfterUatDispatch: false,
|
|
state: sidecarState,
|
|
mid: sidecarState.activeMilestone?.id,
|
|
midTitle: sidecarState.activeMilestone?.title,
|
|
isRetry: false, previousTier: undefined,
|
|
};
|
|
observedUnitType = iterData.unitType;
|
|
observedUnitId = iterData.unitId;
|
|
deps.uokObserver?.onPhaseResult("dispatch", "sidecar", {
|
|
unitType: iterData.unitType,
|
|
unitId: iterData.unitId,
|
|
sidecarKind: sidecarItem.kind,
|
|
});
|
|
}
|
|
|
|
const unitPhaseResult = await runUnitPhase(ic, iterData, loopState, sidecarItem);
|
|
deps.uokObserver?.onPhaseResult("unit", unitPhaseResult.action, {
|
|
unitType: iterData.unitType,
|
|
unitId: iterData.unitId,
|
|
});
|
|
if (unitPhaseResult.action === "break") {
|
|
finishTurn("stopped", "execution", "unit-break");
|
|
break;
|
|
}
|
|
|
|
// ── Phase 5: Finalize ───────────────────────────────────────────────
|
|
|
|
const finalizeResult = await runFinalize(ic, iterData, loopState, sidecarItem);
|
|
deps.uokObserver?.onPhaseResult("finalize", finalizeResult.action, {
|
|
unitType: iterData.unitType,
|
|
unitId: iterData.unitId,
|
|
});
|
|
if (finalizeResult.action === "break") {
|
|
finishTurn("stopped", "closeout", "finalize-break");
|
|
break;
|
|
}
|
|
if (finalizeResult.action === "continue") {
|
|
finishTurn("retry");
|
|
continue;
|
|
}
|
|
|
|
consecutiveErrors = 0; // Iteration completed successfully
|
|
consecutiveCooldowns = 0;
|
|
recentErrorMessages.length = 0;
|
|
deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration } });
|
|
debugLog("autoLoop", { phase: "iteration-complete", iteration });
|
|
finishTurn("completed");
|
|
} catch (loopErr) {
|
|
// ── Blanket catch: absorb unexpected exceptions, apply graduated recovery ──
|
|
const msg = loopErr instanceof Error ? loopErr.message : String(loopErr);
|
|
|
|
// Always emit iteration-end on error so the journal records iteration
|
|
// completion even on failure (#2344). Without this, errors in
|
|
// runFinalize leave the journal incomplete, making diagnosis harder.
|
|
deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration, error: msg } });
|
|
|
|
// ── Infrastructure errors: immediate stop, no retry ──
|
|
// These are unrecoverable (disk full, OOM, etc.). Retrying just burns
|
|
// LLM budget on guaranteed failures.
|
|
const infraCode = isInfrastructureError(loopErr);
|
|
if (infraCode) {
|
|
debugLog("autoLoop", {
|
|
phase: "infrastructure-error",
|
|
iteration,
|
|
code: infraCode,
|
|
error: msg,
|
|
});
|
|
ctx.ui.notify(
|
|
`Auto-mode stopped: infrastructure error ${infraCode} — ${msg}`,
|
|
"error",
|
|
);
|
|
await deps.stopAuto(
|
|
ctx,
|
|
pi,
|
|
`Infrastructure error (${infraCode}): not recoverable by retry`,
|
|
);
|
|
finishTurn("failed", "execution", msg);
|
|
break;
|
|
}
|
|
|
|
// ── Credential cooldown: wait and retry with bounded budget ──
|
|
// A 429 triggers a 30s credential backoff in AuthStorage. If the SDK's
|
|
// getApiKey() retries couldn't outlast the window, the error surfaces
|
|
// here. Wait for the cooldown to clear rather than counting it as a
|
|
// consecutive failure — but cap retries so we don't spin for hours
|
|
// on persistent quota exhaustion.
|
|
if (isTransientCooldownError(loopErr)) {
|
|
consecutiveCooldowns++;
|
|
const retryAfterMs = getCooldownRetryAfterMs(loopErr);
|
|
debugLog("autoLoop", {
|
|
phase: "cooldown-wait",
|
|
iteration,
|
|
consecutiveCooldowns,
|
|
retryAfterMs,
|
|
error: msg,
|
|
});
|
|
|
|
if (consecutiveCooldowns > MAX_COOLDOWN_RETRIES) {
|
|
ctx.ui.notify(
|
|
`Auto-mode stopped: ${consecutiveCooldowns} consecutive credential cooldowns — rate limit or quota may be persistently exhausted.`,
|
|
"error",
|
|
);
|
|
await deps.stopAuto(
|
|
ctx,
|
|
pi,
|
|
`${consecutiveCooldowns} consecutive credential cooldowns exceeded retry budget`,
|
|
);
|
|
break;
|
|
}
|
|
|
|
const waitMs = (retryAfterMs !== undefined && retryAfterMs > 0 && retryAfterMs <= 60_000)
|
|
? retryAfterMs + 500 // Use structured hint + small buffer
|
|
: COOLDOWN_FALLBACK_WAIT_MS;
|
|
ctx.ui.notify(
|
|
`Credentials in cooldown (${consecutiveCooldowns}/${MAX_COOLDOWN_RETRIES}) — waiting ${Math.round(waitMs / 1000)}s before retrying.`,
|
|
"warning",
|
|
);
|
|
await new Promise(resolve => setTimeout(resolve, waitMs));
|
|
finishTurn("retry", "timeout", msg);
|
|
continue; // Retry iteration without incrementing consecutiveErrors
|
|
}
|
|
|
|
consecutiveErrors++;
|
|
recentErrorMessages.push(msg.length > 120 ? msg.slice(0, 120) + "..." : msg);
|
|
debugLog("autoLoop", {
|
|
phase: "iteration-error",
|
|
iteration,
|
|
consecutiveErrors,
|
|
error: msg,
|
|
});
|
|
|
|
if (consecutiveErrors >= 3) {
|
|
// 3+ consecutive: hard stop — something is fundamentally broken
|
|
const errorHistory = recentErrorMessages
|
|
.map((m, i) => ` ${i + 1}. ${m}`)
|
|
.join("\n");
|
|
ctx.ui.notify(
|
|
`Auto-mode stopped: ${consecutiveErrors} consecutive iteration failures:\n${errorHistory}`,
|
|
"error",
|
|
);
|
|
await deps.stopAuto(
|
|
ctx,
|
|
pi,
|
|
`${consecutiveErrors} consecutive iteration failures`,
|
|
);
|
|
finishTurn("failed", "execution", msg);
|
|
break;
|
|
} else if (consecutiveErrors === 2) {
|
|
// 2nd consecutive: try invalidating caches + re-deriving state
|
|
ctx.ui.notify(
|
|
`Iteration error (attempt ${consecutiveErrors}): ${msg}. Invalidating caches and retrying.`,
|
|
"warning",
|
|
);
|
|
deps.invalidateAllCaches();
|
|
} else {
|
|
// 1st error: log and retry — transient failures happen
|
|
ctx.ui.notify(`Iteration error: ${msg}. Retrying.`, "warning");
|
|
}
|
|
finishTurn("retry", "execution", msg);
|
|
}
|
|
}
|
|
|
|
_clearCurrentResolve();
|
|
debugLog("autoLoop", { phase: "exit", totalIterations: iteration });
|
|
}
|