From 74ca7cd8cd57c5ee0cdacdd107271cf8989e04bd Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 14 Apr 2026 18:13:20 +0000 Subject: [PATCH] Wire workflow-logger through the state system The workflow-logger per-unit buffer API (_resetLogs / drainAndSummarize / formatForNotification) had zero callers outside tests, so accumulated warnings never reached users as a consolidated post-unit alert and the buffer leaked across units in the same Node process. Several state-layer sites also silently swallowed errors that should have surfaced. - auto/phases.ts: reset logger in runUnitPhase, drain + ctx.ui.notify in runFinalize success path, drain in both finalize timeout branches so timed-out unit logs don't bleed into the next iteration - auto/detect-stuck.ts: enrich stuck reasons with summarizeLogs() so recovery has the diagnostic context (read-only peek, no drain) - auto.ts: call setLogBasePath(base) in startAuto to pin the audit log on /clear resume and hot-reload paths that bypass dynamic-tools bootstrap - workflow-manifest.ts: log snapshotState ROLLBACK failures (split-brain signal) instead of silently swallowing them - state.ts: log reconcileDiskToDb roadmap read failures instead of silent continue - workflow-projections.ts: log renderStateProjection DB handle probe failures instead of silent return New regression tests cover the phases.ts wiring (source-scan), setLogBasePath in startAuto, detect-stuck enrichment runtime behavior (including the read-only peek invariant), and the three silent-catch fixes. --- src/resources/extensions/gsd/auto.ts | 5 + .../extensions/gsd/auto/detect-stuck.ts | 16 +- src/resources/extensions/gsd/auto/phases.ts | 31 ++- src/resources/extensions/gsd/gsd-db.ts | 11 +- src/resources/extensions/gsd/state.ts | 11 +- .../gsd/tests/workflow-logger-wiring.test.ts | 223 ++++++++++++++++++ .../extensions/gsd/workflow-projections.ts | 9 +- 7 files changed, 297 insertions(+), 9 deletions(-) create mode 100644 src/resources/extensions/gsd/tests/workflow-logger-wiring.test.ts diff --git a/src/resources/extensions/gsd/auto.ts b/src/resources/extensions/gsd/auto.ts index 666341dbf..104d5731e 100644 --- a/src/resources/extensions/gsd/auto.ts +++ b/src/resources/extensions/gsd/auto.ts @@ -1400,6 +1400,11 @@ export async function startAuto( s.stepMode = requestedStepMode; s.cmdCtx = ctx; s.basePath = base; + // Ensure the workflow-logger audit log is pinned to the project root + // even when auto-mode is entered via a path that bypasses the + // bootstrap/dynamic-tools ensureDbOpen() → setLogBasePath() chain + // (e.g. /clear resume, hot-reload). + setLogBasePath(base); s.unitDispatchCount.clear(); s.unitLifetimeDispatches.clear(); if (!getLedger()) initMetrics(base); diff --git a/src/resources/extensions/gsd/auto/detect-stuck.ts b/src/resources/extensions/gsd/auto/detect-stuck.ts index ab28f4850..9873e87a6 100644 --- a/src/resources/extensions/gsd/auto/detect-stuck.ts +++ b/src/resources/extensions/gsd/auto/detect-stuck.ts @@ -5,6 +5,7 @@ */ import type { WindowEntry } from "./types.js"; +import { summarizeLogs } from "../workflow-logger.js"; /** * Pattern matching ENOENT errors with a file path. @@ -28,6 +29,13 @@ export function detectStuck( ): { stuck: true; reason: string } | null { if (window.length < 2) return null; + // Peek (not drain) the workflow-logger buffer so stuck reasons can surface + // the underlying diagnostic context (projection failures, DB degradations, + // reconcile warnings) that usually explains *why* the loop is stuck. The + // auto-loop's finalize step owns the buffer lifecycle — this is read-only. + const loggerSummary = summarizeLogs(); + const suffix = loggerSummary ? ` — ${loggerSummary}` : ""; + const last = window[window.length - 1]; const prev = window[window.length - 2]; @@ -35,7 +43,7 @@ export function detectStuck( if (last.error && prev.error && last.error === prev.error) { return { stuck: true, - reason: `Same error repeated: ${last.error.slice(0, 200)}`, + reason: `Same error repeated: ${last.error.slice(0, 200)}${suffix}`, }; } @@ -45,7 +53,7 @@ export function detectStuck( if (lastThree.every((u) => u.key === last.key)) { return { stuck: true, - reason: `${last.key} derived 3 consecutive times without progress`, + reason: `${last.key} derived 3 consecutive times without progress${suffix}`, }; } } @@ -60,7 +68,7 @@ export function detectStuck( ) { return { stuck: true, - reason: `Oscillation detected: ${w[0].key} ↔ ${w[1].key}`, + reason: `Oscillation detected: ${w[0].key} ↔ ${w[1].key}${suffix}`, }; } } @@ -77,7 +85,7 @@ export function detectStuck( if (count >= 2) { return { stuck: true, - reason: `Missing file referenced twice: ${filePath} (ENOENT)`, + reason: `Missing file referenced twice: ${filePath} (ENOENT)${suffix}`, }; } enoentPaths.set(filePath, count); diff --git a/src/resources/extensions/gsd/auto/phases.ts b/src/resources/extensions/gsd/auto/phases.ts index 0bb09d797..9e1f21bd7 100644 --- a/src/resources/extensions/gsd/auto/phases.ts +++ b/src/resources/extensions/gsd/auto/phases.ts @@ -30,7 +30,15 @@ import { MergeConflictError } from "../git-service.js"; import { setCurrentPhase, clearCurrentPhase } from "../../shared/gsd-phase-state.js"; import { join, basename, dirname, parse as parsePath } from "node:path"; import { existsSync, cpSync, readdirSync } from "node:fs"; -import { logWarning, logError } from "../workflow-logger.js"; +import { + logWarning, + logError, + _resetLogs, + drainLogs, + drainAndSummarize, + formatForNotification, + hasAnyIssues, +} from "../workflow-logger.js"; import { gsdRoot } from "../paths.js"; import { atomicWriteSync } from "../atomic-write.js"; import { verifyExpectedArtifact, diagnoseExpectedArtifact, buildLoopRemediationSteps } from "../auto-recovery.js"; @@ -1081,6 +1089,10 @@ export async function runUnitPhase( ); const previousTier = s.currentUnitRouting?.tier; + // Scope workflow-logger buffer to this unit so post-finalize drains are + // per-unit. Without this, the module-level _buffer accumulates across every + // unit in the same Node process (see workflow-logger.ts module header). + _resetLogs(); s.currentUnit = { type: unitType, id: unitId, startedAt: Date.now() }; setCurrentPhase(unitType); s.lastToolInvocationError = null; // #2883: clear stale error from previous unit @@ -1545,6 +1557,9 @@ export async function runFinalize( // cannot mutate state for the next unit (#3757). s.currentUnit = null; clearCurrentPhase(); + // Drop any logger entries from the timed-out unit so they don't bleed + // into the next iteration's drain. + drainLogs(); loopState.consecutiveFinalizeTimeouts++; debugLog("autoLoop", { phase: "pre-verification-timeout", @@ -1643,6 +1658,9 @@ export async function runFinalize( // cannot mutate state for the next unit (#3757). s.currentUnit = null; clearCurrentPhase(); + // Drop any logger entries from the timed-out unit so they don't bleed + // into the next iteration's drain. + drainLogs(); loopState.consecutiveFinalizeTimeouts++; debugLog("autoLoop", { phase: "post-verification-timeout", @@ -1687,5 +1705,16 @@ export async function runFinalize( // Both pre and post verification completed without timeout — reset counter loopState.consecutiveFinalizeTimeouts = 0; + // Surface accumulated workflow-logger issues for this unit to the user. + // Warnings/errors logged during the unit are buffered in the logger and + // drained here so the user sees a single consolidated post-unit alert. + if (hasAnyIssues()) { + const { logs } = drainAndSummarize(); + if (logs.length > 0) { + const severity = logs.some((e) => e.severity === "error") ? "error" : "warning"; + ctx.ui.notify(formatForNotification(logs), severity); + } + } + return { action: "next", data: undefined as void }; } diff --git a/src/resources/extensions/gsd/gsd-db.ts b/src/resources/extensions/gsd/gsd-db.ts index 6c8ff091d..274c4b73b 100644 --- a/src/resources/extensions/gsd/gsd-db.ts +++ b/src/resources/extensions/gsd/gsd-db.ts @@ -967,7 +967,16 @@ export function readTransaction(fn: () => T): T { currentDb.exec("COMMIT"); return result; } catch (err) { - try { currentDb.exec("ROLLBACK"); } catch { /* swallow */ } + try { + currentDb.exec("ROLLBACK"); + } catch (rollbackErr) { + // A failed ROLLBACK after a failed read is a split-brain signal — + // the transaction is in an indeterminate state. Surface it via the + // logger instead of swallowing it. + logError("db", "snapshotState ROLLBACK failed", { + error: (rollbackErr as Error).message, + }); + } throw err; } finally { _txDepth--; diff --git a/src/resources/extensions/gsd/state.ts b/src/resources/extensions/gsd/state.ts index 8e5340d18..9d0ec68d1 100644 --- a/src/resources/extensions/gsd/state.ts +++ b/src/resources/extensions/gsd/state.ts @@ -345,8 +345,15 @@ function reconcileDiskToDb(basePath: string): MilestoneRow[] { const dbSliceIds = new Set(dbSlices.map(s => s.id)); let roadmapContent: string; - try { roadmapContent = readFileSync(roadmapPath, "utf-8"); } - catch { continue; } + try { + roadmapContent = readFileSync(roadmapPath, "utf-8"); + } catch (err) { + logWarning("state", "reconcileDiskToDb: roadmap read failed, skipping milestone", { + mid, + error: (err as Error).message, + }); + continue; + } const parsed = parseRoadmap(roadmapContent); for (const s of parsed.slices) { diff --git a/src/resources/extensions/gsd/tests/workflow-logger-wiring.test.ts b/src/resources/extensions/gsd/tests/workflow-logger-wiring.test.ts new file mode 100644 index 000000000..6c57a487f --- /dev/null +++ b/src/resources/extensions/gsd/tests/workflow-logger-wiring.test.ts @@ -0,0 +1,223 @@ +// GSD Extension — workflow-logger wiring regression tests +// +// Verifies the plumbing between workflow-logger and the rest of the state +// system (auto-loop phases, detect-stuck, notification store). Without this +// wiring, warnings/errors logged during a unit leak across units, never +// reach the user as a consolidated post-unit alert, and don't enrich +// stuck-detection reasons. + +import test from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +import { + logWarning, + logError, + peekLogs, + _resetLogs, + setStderrLoggingEnabled, +} from "../workflow-logger.ts"; +import { detectStuck } from "../auto/detect-stuck.ts"; + +const phasesSrc = readFileSync( + join(import.meta.dirname, "..", "auto", "phases.ts"), + "utf-8", +); +const autoSrc = readFileSync( + join(import.meta.dirname, "..", "auto.ts"), + "utf-8", +); + +// ─── Source-scan: phases.ts calls the logger lifecycle API ───────────────── + +test("auto/phases.ts imports _resetLogs, drainAndSummarize, formatForNotification, hasAnyIssues", () => { + assert.match( + phasesSrc, + /from\s+"\.\.\/workflow-logger\.js"/, + "phases.ts imports from workflow-logger", + ); + for (const name of [ + "_resetLogs", + "drainLogs", + "drainAndSummarize", + "formatForNotification", + "hasAnyIssues", + ]) { + assert.ok( + phasesSrc.includes(name), + `phases.ts should reference ${name}`, + ); + } +}); + +test("runUnitPhase calls _resetLogs() before assigning s.currentUnit", () => { + // Find the "s.currentUnit = { type: unitType" assignment line and check + // the preceding ~500 chars contain a _resetLogs() call. + const idx = phasesSrc.indexOf("s.currentUnit = { type: unitType"); + assert.ok(idx > 0, "runUnitPhase should assign s.currentUnit"); + const before = phasesSrc.slice(Math.max(0, idx - 500), idx); + assert.match( + before, + /_resetLogs\(\)/, + "_resetLogs() must be called immediately before s.currentUnit assignment", + ); +}); + +test("runFinalize drains and surfaces logger buffer via ctx.ui.notify", () => { + // Locate the runFinalize success path and verify it calls drainAndSummarize + // and routes the result through ctx.ui.notify. + const runFinalizeIdx = phasesSrc.indexOf("export async function runFinalize"); + assert.ok(runFinalizeIdx > 0, "runFinalize export should exist"); + const finalizeBody = phasesSrc.slice(runFinalizeIdx); + assert.match( + finalizeBody, + /hasAnyIssues\(\)/, + "runFinalize should gate drain on hasAnyIssues", + ); + assert.match( + finalizeBody, + /drainAndSummarize\(\)/, + "runFinalize should call drainAndSummarize on success", + ); + assert.match( + finalizeBody, + /formatForNotification\(logs\)/, + "runFinalize should format drained logs for the notification", + ); +}); + +test("runFinalize timeout branches drain the buffer to prevent bleed", () => { + // Both timeout branches null out s.currentUnit — they should also drain + // so accumulated logs for the timed-out unit don't leak into the next. + const runFinalizeIdx = phasesSrc.indexOf("export async function runFinalize"); + const finalizeBody = phasesSrc.slice(runFinalizeIdx); + const drainCallCount = + (finalizeBody.match(/drainLogs\(\)/g) ?? []).length; + assert.ok( + drainCallCount >= 2, + `runFinalize timeout branches should each call drainLogs() (found ${drainCallCount}, expected >= 2)`, + ); +}); + +// ─── Source-scan: auto.ts calls setLogBasePath in startAuto ──────────────── + +test("startAuto calls setLogBasePath(base) so audit log is pinned on resume", () => { + const startAutoIdx = autoSrc.indexOf("export async function startAuto"); + assert.ok(startAutoIdx > 0, "startAuto export should exist"); + const body = autoSrc.slice(startAutoIdx); + assert.match( + body, + /setLogBasePath\(base\)/, + "startAuto must call setLogBasePath(base) to pin the audit log", + ); +}); + +// ─── Runtime: detect-stuck enriches reason with summarizeLogs() ──────────── + +test("detectStuck reason includes workflow-logger summary when logs present", () => { + setStderrLoggingEnabled(false); + try { + _resetLogs(); + logWarning("projection", "STATE.md render failed"); + logError("db", "WAL checkpoint failed"); + + const result = detectStuck([ + { key: "execute-task/slice-A/task-1", error: "ENOENT: no such file" }, + { key: "execute-task/slice-A/task-1", error: "ENOENT: no such file" }, + ]); + + assert.notEqual(result, null); + assert.equal(result!.stuck, true); + assert.match( + result!.reason, + /Same error repeated:/, + "reason should still start with the rule string", + ); + assert.match( + result!.reason, + /STATE\.md render failed/, + "reason should include the accumulated logger warning", + ); + assert.match( + result!.reason, + /WAL checkpoint failed/, + "reason should include the accumulated logger error", + ); + + // Critical: summarizeLogs must not drain — the auto-loop's finalize + // step owns the buffer lifecycle, detect-stuck is read-only. + assert.equal( + peekLogs().length, + 2, + "detect-stuck must not drain the buffer", + ); + } finally { + _resetLogs(); + setStderrLoggingEnabled(true); + } +}); + +test("detectStuck reason unchanged when logger buffer is empty", () => { + setStderrLoggingEnabled(false); + try { + _resetLogs(); + const result = detectStuck([ + { key: "A", error: "boom" }, + { key: "A", error: "boom" }, + ]); + assert.notEqual(result, null); + // No trailing " — " suffix when there are no logs to summarize. + assert.doesNotMatch( + result!.reason, + / — \d+ (error|warning)/, + "reason should have no logger suffix when buffer is empty", + ); + } finally { + setStderrLoggingEnabled(true); + } +}); + +// ─── Runtime: readTransaction rollback failure surfaces via logError ──────── +// +// snapshotState now delegates its transaction to readTransaction() in +// gsd-db.ts (single-writer refactor in #4198), so the split-brain +// ROLLBACK-failure log lives there, not in workflow-manifest.ts. + +test("readTransaction logs ROLLBACK failures as split-brain signal", () => { + const dbSrc = readFileSync( + join(import.meta.dirname, "..", "gsd-db.ts"), + "utf-8", + ); + assert.match( + dbSrc, + /logError\("db",\s*"snapshotState ROLLBACK failed"/, + "readTransaction ROLLBACK catch should call logError", + ); +}); + +// ─── Runtime: state.ts and workflow-projections.ts log silent bailouts ───── + +test("state.ts logs roadmap read failures instead of silently continuing", () => { + const stateSrc = readFileSync( + join(import.meta.dirname, "..", "state.ts"), + "utf-8", + ); + assert.match( + stateSrc, + /logWarning\("state",\s*"reconcileDiskToDb: roadmap read failed/, + "state.ts reconcileDiskToDb should log roadmap read failures", + ); +}); + +test("workflow-projections.ts logs DB probe failures instead of silent return", () => { + const projectionsSrc = readFileSync( + join(import.meta.dirname, "..", "workflow-projections.ts"), + "utf-8", + ); + assert.match( + projectionsSrc, + /logWarning\("projection",\s*"renderStateProjection: DB handle probe failed/, + "renderStateProjection DB probe should log on failure", + ); +}); diff --git a/src/resources/extensions/gsd/workflow-projections.ts b/src/resources/extensions/gsd/workflow-projections.ts index 6aea665f0..31cca5211 100644 --- a/src/resources/extensions/gsd/workflow-projections.ts +++ b/src/resources/extensions/gsd/workflow-projections.ts @@ -350,7 +350,14 @@ export async function renderStateProjection(basePath: string): Promise { // Probe DB handle — adapter may be set but underlying handle closed const adapter = _getAdapter(); if (!adapter) return; - try { adapter.prepare("SELECT 1").get(); } catch { return; } + try { + adapter.prepare("SELECT 1").get(); + } catch (err) { + logWarning("projection", "renderStateProjection: DB handle probe failed, skipping render", { + error: (err as Error).message, + }); + return; + } const state = await deriveState(basePath); const content = renderStateContent(state); const dir = join(basePath, ".gsd");