From c7b13607b50b4bfd9a8279273bfb56e584469612 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sun, 17 May 2026 04:21:21 +0200 Subject: [PATCH] fix(wiggums): exponential backoff on autoLoop halt-watchdog-break MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the halt-watchdog detects stuck state, the autoLoop was logging "halt-watchdog-break" every iteration but otherwise tight-spinning through dispatch-resolve at ~2s/iteration. 2026-05-17 dogfood logged 60+ such events in a 30s window — pure CPU burn while the actual stuck condition stayed stuck. Fix: exponential backoff (1s → 2s → 4s → 8s → 16s → capped at 30s) based on how many halt thresholds have elapsed. Heartbeat() resets when real progress resumes (existing behavior). Backoff costs nothing when the loop is healthy. One of the 14 Ralph-Wiggum patterns surfaced this session. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/resources/extensions/sf/auto/loop.js | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/resources/extensions/sf/auto/loop.js b/src/resources/extensions/sf/auto/loop.js index 5dc4e4fda..eec802386 100644 --- a/src/resources/extensions/sf/auto/loop.js +++ b/src/resources/extensions/sf/auto/loop.js @@ -671,8 +671,18 @@ export async function autoLoop(ctx, pi, s, deps) { iteration, elapsedMs, }); - // Do not break the loop — the watchdog only emits observability - // signals. The operator or a future gate can decide to stop. + // #wiggums: when stuck, back off instead of tight-spinning. The + // old comment said "Do not break the loop — observability only", + // but observability without backoff means we burn CPU writing + // halt-watchdog-break events at ~1Hz while whatever's actually + // stuck stays stuck. 2026-05-17 dogfood logged 60+ such events + // in a single 30s window. Backoff is exponential up to 30s, so + // short stalls recover quickly while long stalls don't melt the + // CPU. Heartbeat() will clear the backoff when real progress + // resumes. + const stuckCycles = Math.floor(elapsedMs / DEFAULT_HALT_THRESHOLD_MS); + const backoffMs = Math.min(30_000, 1000 * 2 ** Math.min(5, stuckCycles)); + await new Promise((resolve) => setTimeout(resolve, backoffMs)); } // ── Journal: per-iteration flow grouping ── const flowId = randomUUID();