Wire workflow-logger through the state system
The workflow-logger per-unit buffer API (_resetLogs / drainAndSummarize / formatForNotification) had zero callers outside tests, so accumulated warnings never reached users as a consolidated post-unit alert and the buffer leaked across units in the same Node process. Several state-layer sites also silently swallowed errors that should have surfaced. - auto/phases.ts: reset logger in runUnitPhase, drain + ctx.ui.notify in runFinalize success path, drain in both finalize timeout branches so timed-out unit logs don't bleed into the next iteration - auto/detect-stuck.ts: enrich stuck reasons with summarizeLogs() so recovery has the diagnostic context (read-only peek, no drain) - auto.ts: call setLogBasePath(base) in startAuto to pin the audit log on /clear resume and hot-reload paths that bypass dynamic-tools bootstrap - workflow-manifest.ts: log snapshotState ROLLBACK failures (split-brain signal) instead of silently swallowing them - state.ts: log reconcileDiskToDb roadmap read failures instead of silent continue - workflow-projections.ts: log renderStateProjection DB handle probe failures instead of silent return New regression tests cover the phases.ts wiring (source-scan), setLogBasePath in startAuto, detect-stuck enrichment runtime behavior (including the read-only peek invariant), and the three silent-catch fixes.
This commit is contained in:
parent
0d66433b4f
commit
74ca7cd8cd
7 changed files with 297 additions and 9 deletions
|
|
@ -1400,6 +1400,11 @@ export async function startAuto(
|
|||
s.stepMode = requestedStepMode;
|
||||
s.cmdCtx = ctx;
|
||||
s.basePath = base;
|
||||
// Ensure the workflow-logger audit log is pinned to the project root
|
||||
// even when auto-mode is entered via a path that bypasses the
|
||||
// bootstrap/dynamic-tools ensureDbOpen() → setLogBasePath() chain
|
||||
// (e.g. /clear resume, hot-reload).
|
||||
setLogBasePath(base);
|
||||
s.unitDispatchCount.clear();
|
||||
s.unitLifetimeDispatches.clear();
|
||||
if (!getLedger()) initMetrics(base);
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
*/
|
||||
|
||||
import type { WindowEntry } from "./types.js";
|
||||
import { summarizeLogs } from "../workflow-logger.js";
|
||||
|
||||
/**
|
||||
* Pattern matching ENOENT errors with a file path.
|
||||
|
|
@ -28,6 +29,13 @@ export function detectStuck(
|
|||
): { stuck: true; reason: string } | null {
|
||||
if (window.length < 2) return null;
|
||||
|
||||
// Peek (not drain) the workflow-logger buffer so stuck reasons can surface
|
||||
// the underlying diagnostic context (projection failures, DB degradations,
|
||||
// reconcile warnings) that usually explains *why* the loop is stuck. The
|
||||
// auto-loop's finalize step owns the buffer lifecycle — this is read-only.
|
||||
const loggerSummary = summarizeLogs();
|
||||
const suffix = loggerSummary ? ` — ${loggerSummary}` : "";
|
||||
|
||||
const last = window[window.length - 1];
|
||||
const prev = window[window.length - 2];
|
||||
|
||||
|
|
@ -35,7 +43,7 @@ export function detectStuck(
|
|||
if (last.error && prev.error && last.error === prev.error) {
|
||||
return {
|
||||
stuck: true,
|
||||
reason: `Same error repeated: ${last.error.slice(0, 200)}`,
|
||||
reason: `Same error repeated: ${last.error.slice(0, 200)}${suffix}`,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -45,7 +53,7 @@ export function detectStuck(
|
|||
if (lastThree.every((u) => u.key === last.key)) {
|
||||
return {
|
||||
stuck: true,
|
||||
reason: `${last.key} derived 3 consecutive times without progress`,
|
||||
reason: `${last.key} derived 3 consecutive times without progress${suffix}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -60,7 +68,7 @@ export function detectStuck(
|
|||
) {
|
||||
return {
|
||||
stuck: true,
|
||||
reason: `Oscillation detected: ${w[0].key} ↔ ${w[1].key}`,
|
||||
reason: `Oscillation detected: ${w[0].key} ↔ ${w[1].key}${suffix}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -77,7 +85,7 @@ export function detectStuck(
|
|||
if (count >= 2) {
|
||||
return {
|
||||
stuck: true,
|
||||
reason: `Missing file referenced twice: ${filePath} (ENOENT)`,
|
||||
reason: `Missing file referenced twice: ${filePath} (ENOENT)${suffix}`,
|
||||
};
|
||||
}
|
||||
enoentPaths.set(filePath, count);
|
||||
|
|
|
|||
|
|
@ -30,7 +30,15 @@ import { MergeConflictError } from "../git-service.js";
|
|||
import { setCurrentPhase, clearCurrentPhase } from "../../shared/gsd-phase-state.js";
|
||||
import { join, basename, dirname, parse as parsePath } from "node:path";
|
||||
import { existsSync, cpSync, readdirSync } from "node:fs";
|
||||
import { logWarning, logError } from "../workflow-logger.js";
|
||||
import {
|
||||
logWarning,
|
||||
logError,
|
||||
_resetLogs,
|
||||
drainLogs,
|
||||
drainAndSummarize,
|
||||
formatForNotification,
|
||||
hasAnyIssues,
|
||||
} from "../workflow-logger.js";
|
||||
import { gsdRoot } from "../paths.js";
|
||||
import { atomicWriteSync } from "../atomic-write.js";
|
||||
import { verifyExpectedArtifact, diagnoseExpectedArtifact, buildLoopRemediationSteps } from "../auto-recovery.js";
|
||||
|
|
@ -1081,6 +1089,10 @@ export async function runUnitPhase(
|
|||
);
|
||||
const previousTier = s.currentUnitRouting?.tier;
|
||||
|
||||
// Scope workflow-logger buffer to this unit so post-finalize drains are
|
||||
// per-unit. Without this, the module-level _buffer accumulates across every
|
||||
// unit in the same Node process (see workflow-logger.ts module header).
|
||||
_resetLogs();
|
||||
s.currentUnit = { type: unitType, id: unitId, startedAt: Date.now() };
|
||||
setCurrentPhase(unitType);
|
||||
s.lastToolInvocationError = null; // #2883: clear stale error from previous unit
|
||||
|
|
@ -1545,6 +1557,9 @@ export async function runFinalize(
|
|||
// cannot mutate state for the next unit (#3757).
|
||||
s.currentUnit = null;
|
||||
clearCurrentPhase();
|
||||
// Drop any logger entries from the timed-out unit so they don't bleed
|
||||
// into the next iteration's drain.
|
||||
drainLogs();
|
||||
loopState.consecutiveFinalizeTimeouts++;
|
||||
debugLog("autoLoop", {
|
||||
phase: "pre-verification-timeout",
|
||||
|
|
@ -1643,6 +1658,9 @@ export async function runFinalize(
|
|||
// cannot mutate state for the next unit (#3757).
|
||||
s.currentUnit = null;
|
||||
clearCurrentPhase();
|
||||
// Drop any logger entries from the timed-out unit so they don't bleed
|
||||
// into the next iteration's drain.
|
||||
drainLogs();
|
||||
loopState.consecutiveFinalizeTimeouts++;
|
||||
debugLog("autoLoop", {
|
||||
phase: "post-verification-timeout",
|
||||
|
|
@ -1687,5 +1705,16 @@ export async function runFinalize(
|
|||
// Both pre and post verification completed without timeout — reset counter
|
||||
loopState.consecutiveFinalizeTimeouts = 0;
|
||||
|
||||
// Surface accumulated workflow-logger issues for this unit to the user.
|
||||
// Warnings/errors logged during the unit are buffered in the logger and
|
||||
// drained here so the user sees a single consolidated post-unit alert.
|
||||
if (hasAnyIssues()) {
|
||||
const { logs } = drainAndSummarize();
|
||||
if (logs.length > 0) {
|
||||
const severity = logs.some((e) => e.severity === "error") ? "error" : "warning";
|
||||
ctx.ui.notify(formatForNotification(logs), severity);
|
||||
}
|
||||
}
|
||||
|
||||
return { action: "next", data: undefined as void };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -967,7 +967,16 @@ export function readTransaction<T>(fn: () => T): T {
|
|||
currentDb.exec("COMMIT");
|
||||
return result;
|
||||
} catch (err) {
|
||||
try { currentDb.exec("ROLLBACK"); } catch { /* swallow */ }
|
||||
try {
|
||||
currentDb.exec("ROLLBACK");
|
||||
} catch (rollbackErr) {
|
||||
// A failed ROLLBACK after a failed read is a split-brain signal —
|
||||
// the transaction is in an indeterminate state. Surface it via the
|
||||
// logger instead of swallowing it.
|
||||
logError("db", "snapshotState ROLLBACK failed", {
|
||||
error: (rollbackErr as Error).message,
|
||||
});
|
||||
}
|
||||
throw err;
|
||||
} finally {
|
||||
_txDepth--;
|
||||
|
|
|
|||
|
|
@ -345,8 +345,15 @@ function reconcileDiskToDb(basePath: string): MilestoneRow[] {
|
|||
const dbSliceIds = new Set(dbSlices.map(s => s.id));
|
||||
|
||||
let roadmapContent: string;
|
||||
try { roadmapContent = readFileSync(roadmapPath, "utf-8"); }
|
||||
catch { continue; }
|
||||
try {
|
||||
roadmapContent = readFileSync(roadmapPath, "utf-8");
|
||||
} catch (err) {
|
||||
logWarning("state", "reconcileDiskToDb: roadmap read failed, skipping milestone", {
|
||||
mid,
|
||||
error: (err as Error).message,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
const parsed = parseRoadmap(roadmapContent);
|
||||
for (const s of parsed.slices) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,223 @@
|
|||
// GSD Extension — workflow-logger wiring regression tests
|
||||
//
|
||||
// Verifies the plumbing between workflow-logger and the rest of the state
|
||||
// system (auto-loop phases, detect-stuck, notification store). Without this
|
||||
// wiring, warnings/errors logged during a unit leak across units, never
|
||||
// reach the user as a consolidated post-unit alert, and don't enrich
|
||||
// stuck-detection reasons.
|
||||
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
|
||||
import {
|
||||
logWarning,
|
||||
logError,
|
||||
peekLogs,
|
||||
_resetLogs,
|
||||
setStderrLoggingEnabled,
|
||||
} from "../workflow-logger.ts";
|
||||
import { detectStuck } from "../auto/detect-stuck.ts";
|
||||
|
||||
const phasesSrc = readFileSync(
|
||||
join(import.meta.dirname, "..", "auto", "phases.ts"),
|
||||
"utf-8",
|
||||
);
|
||||
const autoSrc = readFileSync(
|
||||
join(import.meta.dirname, "..", "auto.ts"),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
// ─── Source-scan: phases.ts calls the logger lifecycle API ─────────────────
|
||||
|
||||
test("auto/phases.ts imports _resetLogs, drainAndSummarize, formatForNotification, hasAnyIssues", () => {
|
||||
assert.match(
|
||||
phasesSrc,
|
||||
/from\s+"\.\.\/workflow-logger\.js"/,
|
||||
"phases.ts imports from workflow-logger",
|
||||
);
|
||||
for (const name of [
|
||||
"_resetLogs",
|
||||
"drainLogs",
|
||||
"drainAndSummarize",
|
||||
"formatForNotification",
|
||||
"hasAnyIssues",
|
||||
]) {
|
||||
assert.ok(
|
||||
phasesSrc.includes(name),
|
||||
`phases.ts should reference ${name}`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test("runUnitPhase calls _resetLogs() before assigning s.currentUnit", () => {
|
||||
// Find the "s.currentUnit = { type: unitType" assignment line and check
|
||||
// the preceding ~500 chars contain a _resetLogs() call.
|
||||
const idx = phasesSrc.indexOf("s.currentUnit = { type: unitType");
|
||||
assert.ok(idx > 0, "runUnitPhase should assign s.currentUnit");
|
||||
const before = phasesSrc.slice(Math.max(0, idx - 500), idx);
|
||||
assert.match(
|
||||
before,
|
||||
/_resetLogs\(\)/,
|
||||
"_resetLogs() must be called immediately before s.currentUnit assignment",
|
||||
);
|
||||
});
|
||||
|
||||
test("runFinalize drains and surfaces logger buffer via ctx.ui.notify", () => {
|
||||
// Locate the runFinalize success path and verify it calls drainAndSummarize
|
||||
// and routes the result through ctx.ui.notify.
|
||||
const runFinalizeIdx = phasesSrc.indexOf("export async function runFinalize");
|
||||
assert.ok(runFinalizeIdx > 0, "runFinalize export should exist");
|
||||
const finalizeBody = phasesSrc.slice(runFinalizeIdx);
|
||||
assert.match(
|
||||
finalizeBody,
|
||||
/hasAnyIssues\(\)/,
|
||||
"runFinalize should gate drain on hasAnyIssues",
|
||||
);
|
||||
assert.match(
|
||||
finalizeBody,
|
||||
/drainAndSummarize\(\)/,
|
||||
"runFinalize should call drainAndSummarize on success",
|
||||
);
|
||||
assert.match(
|
||||
finalizeBody,
|
||||
/formatForNotification\(logs\)/,
|
||||
"runFinalize should format drained logs for the notification",
|
||||
);
|
||||
});
|
||||
|
||||
test("runFinalize timeout branches drain the buffer to prevent bleed", () => {
|
||||
// Both timeout branches null out s.currentUnit — they should also drain
|
||||
// so accumulated logs for the timed-out unit don't leak into the next.
|
||||
const runFinalizeIdx = phasesSrc.indexOf("export async function runFinalize");
|
||||
const finalizeBody = phasesSrc.slice(runFinalizeIdx);
|
||||
const drainCallCount =
|
||||
(finalizeBody.match(/drainLogs\(\)/g) ?? []).length;
|
||||
assert.ok(
|
||||
drainCallCount >= 2,
|
||||
`runFinalize timeout branches should each call drainLogs() (found ${drainCallCount}, expected >= 2)`,
|
||||
);
|
||||
});
|
||||
|
||||
// ─── Source-scan: auto.ts calls setLogBasePath in startAuto ────────────────
|
||||
|
||||
test("startAuto calls setLogBasePath(base) so audit log is pinned on resume", () => {
|
||||
const startAutoIdx = autoSrc.indexOf("export async function startAuto");
|
||||
assert.ok(startAutoIdx > 0, "startAuto export should exist");
|
||||
const body = autoSrc.slice(startAutoIdx);
|
||||
assert.match(
|
||||
body,
|
||||
/setLogBasePath\(base\)/,
|
||||
"startAuto must call setLogBasePath(base) to pin the audit log",
|
||||
);
|
||||
});
|
||||
|
||||
// ─── Runtime: detect-stuck enriches reason with summarizeLogs() ────────────
|
||||
|
||||
test("detectStuck reason includes workflow-logger summary when logs present", () => {
|
||||
setStderrLoggingEnabled(false);
|
||||
try {
|
||||
_resetLogs();
|
||||
logWarning("projection", "STATE.md render failed");
|
||||
logError("db", "WAL checkpoint failed");
|
||||
|
||||
const result = detectStuck([
|
||||
{ key: "execute-task/slice-A/task-1", error: "ENOENT: no such file" },
|
||||
{ key: "execute-task/slice-A/task-1", error: "ENOENT: no such file" },
|
||||
]);
|
||||
|
||||
assert.notEqual(result, null);
|
||||
assert.equal(result!.stuck, true);
|
||||
assert.match(
|
||||
result!.reason,
|
||||
/Same error repeated:/,
|
||||
"reason should still start with the rule string",
|
||||
);
|
||||
assert.match(
|
||||
result!.reason,
|
||||
/STATE\.md render failed/,
|
||||
"reason should include the accumulated logger warning",
|
||||
);
|
||||
assert.match(
|
||||
result!.reason,
|
||||
/WAL checkpoint failed/,
|
||||
"reason should include the accumulated logger error",
|
||||
);
|
||||
|
||||
// Critical: summarizeLogs must not drain — the auto-loop's finalize
|
||||
// step owns the buffer lifecycle, detect-stuck is read-only.
|
||||
assert.equal(
|
||||
peekLogs().length,
|
||||
2,
|
||||
"detect-stuck must not drain the buffer",
|
||||
);
|
||||
} finally {
|
||||
_resetLogs();
|
||||
setStderrLoggingEnabled(true);
|
||||
}
|
||||
});
|
||||
|
||||
test("detectStuck reason unchanged when logger buffer is empty", () => {
|
||||
setStderrLoggingEnabled(false);
|
||||
try {
|
||||
_resetLogs();
|
||||
const result = detectStuck([
|
||||
{ key: "A", error: "boom" },
|
||||
{ key: "A", error: "boom" },
|
||||
]);
|
||||
assert.notEqual(result, null);
|
||||
// No trailing " — " suffix when there are no logs to summarize.
|
||||
assert.doesNotMatch(
|
||||
result!.reason,
|
||||
/ — \d+ (error|warning)/,
|
||||
"reason should have no logger suffix when buffer is empty",
|
||||
);
|
||||
} finally {
|
||||
setStderrLoggingEnabled(true);
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Runtime: readTransaction rollback failure surfaces via logError ────────
|
||||
//
|
||||
// snapshotState now delegates its transaction to readTransaction() in
|
||||
// gsd-db.ts (single-writer refactor in #4198), so the split-brain
|
||||
// ROLLBACK-failure log lives there, not in workflow-manifest.ts.
|
||||
|
||||
test("readTransaction logs ROLLBACK failures as split-brain signal", () => {
|
||||
const dbSrc = readFileSync(
|
||||
join(import.meta.dirname, "..", "gsd-db.ts"),
|
||||
"utf-8",
|
||||
);
|
||||
assert.match(
|
||||
dbSrc,
|
||||
/logError\("db",\s*"snapshotState ROLLBACK failed"/,
|
||||
"readTransaction ROLLBACK catch should call logError",
|
||||
);
|
||||
});
|
||||
|
||||
// ─── Runtime: state.ts and workflow-projections.ts log silent bailouts ─────
|
||||
|
||||
test("state.ts logs roadmap read failures instead of silently continuing", () => {
|
||||
const stateSrc = readFileSync(
|
||||
join(import.meta.dirname, "..", "state.ts"),
|
||||
"utf-8",
|
||||
);
|
||||
assert.match(
|
||||
stateSrc,
|
||||
/logWarning\("state",\s*"reconcileDiskToDb: roadmap read failed/,
|
||||
"state.ts reconcileDiskToDb should log roadmap read failures",
|
||||
);
|
||||
});
|
||||
|
||||
test("workflow-projections.ts logs DB probe failures instead of silent return", () => {
|
||||
const projectionsSrc = readFileSync(
|
||||
join(import.meta.dirname, "..", "workflow-projections.ts"),
|
||||
"utf-8",
|
||||
);
|
||||
assert.match(
|
||||
projectionsSrc,
|
||||
/logWarning\("projection",\s*"renderStateProjection: DB handle probe failed/,
|
||||
"renderStateProjection DB probe should log on failure",
|
||||
);
|
||||
});
|
||||
|
|
@ -350,7 +350,14 @@ export async function renderStateProjection(basePath: string): Promise<void> {
|
|||
// Probe DB handle — adapter may be set but underlying handle closed
|
||||
const adapter = _getAdapter();
|
||||
if (!adapter) return;
|
||||
try { adapter.prepare("SELECT 1").get(); } catch { return; }
|
||||
try {
|
||||
adapter.prepare("SELECT 1").get();
|
||||
} catch (err) {
|
||||
logWarning("projection", "renderStateProjection: DB handle probe failed, skipping render", {
|
||||
error: (err as Error).message,
|
||||
});
|
||||
return;
|
||||
}
|
||||
const state = await deriveState(basePath);
|
||||
const content = renderStateContent(state);
|
||||
const dir = join(basePath, ".gsd");
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue