From 53639aee5ddd835ae4209489e29e858fdee3f565 Mon Sep 17 00:00:00 2001 From: Jeremy Date: Sat, 4 Apr 2026 21:04:26 -0500 Subject: [PATCH] fix(gsd): enrich vague diagnostic messages with root-cause context Closes #3524 --- .../extensions/gsd/auto-post-unit.ts | 9 ++- .../extensions/gsd/auto-verification.ts | 17 +++++- src/resources/extensions/gsd/auto/loop.ts | 9 ++- src/resources/extensions/gsd/auto/phases.ts | 14 +++-- src/resources/extensions/gsd/state.ts | 1 + .../gsd/tests/workflow-logger.test.ts | 58 ++++++------------- .../extensions/gsd/tools/complete-slice.ts | 8 +-- .../extensions/gsd/workflow-logger.ts | 21 ++++--- .../extensions/gsd/workflow-reconcile.ts | 4 +- 9 files changed, 73 insertions(+), 68 deletions(-) diff --git a/src/resources/extensions/gsd/auto-post-unit.ts b/src/resources/extensions/gsd/auto-post-unit.ts index 41a9ea323..38d730fec 100644 --- a/src/resources/extensions/gsd/auto-post-unit.ts +++ b/src/resources/extensions/gsd/auto-post-unit.ts @@ -33,6 +33,7 @@ import { import { verifyExpectedArtifact, resolveExpectedArtifactPath, + diagnoseExpectedArtifact, } from "./auto-recovery.js"; import { regenerateIfMissing } from "./workflow-projections.js"; import { syncStateToProjectRoot } from "./auto-worktree.js"; @@ -476,8 +477,9 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV // db_unavailable so the artifact was never written. Retrying would // produce an infinite re-dispatch loop (#2517). debugLog("postUnit", { phase: "artifact-verify-skip-db-unavailable", unitType: s.currentUnit.type, unitId: s.currentUnit.id }); + const dbSkipDiag = diagnoseExpectedArtifact(s.currentUnit.type, s.currentUnit.id, s.basePath); ctx.ui.notify( - `Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} but DB is unavailable — skipping retry to avoid loop (#2517)`, + `Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — DB unavailable, skipping retry.${dbSkipDiag ? ` Expected: ${dbSkipDiag}` : ""}`, "error", ); } else if (!triggerArtifactVerified) { @@ -486,14 +488,15 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV const retryKey = `${s.currentUnit.type}:${s.currentUnit.id}`; const attempt = (s.verificationRetryCount.get(retryKey) ?? 0) + 1; s.verificationRetryCount.set(retryKey, attempt); + const retryDiag = diagnoseExpectedArtifact(s.currentUnit.type, s.currentUnit.id, s.basePath); s.pendingVerificationRetry = { unitId: s.currentUnit.id, - failureContext: `Artifact verification failed: expected artifact for ${s.currentUnit.type} "${s.currentUnit.id}" was not found on disk after unit execution (attempt ${attempt}).`, + failureContext: `Artifact verification failed: expected artifact for ${s.currentUnit.type} "${s.currentUnit.id}" was not found on disk after unit execution (attempt ${attempt}).${retryDiag ? ` Expected: ${retryDiag}` : ""}`, attempt, }; debugLog("postUnit", { phase: "artifact-verify-retry", unitType: s.currentUnit.type, unitId: s.currentUnit.id, attempt }); ctx.ui.notify( - `Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — retrying (attempt ${attempt})`, + `Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — retrying (attempt ${attempt}).${retryDiag ? ` Expected: ${retryDiag}` : ""}`, "warning", ); return "retry"; diff --git a/src/resources/extensions/gsd/auto-verification.ts b/src/resources/extensions/gsd/auto-verification.ts index e7f915a66..16fa11d38 100644 --- a/src/resources/extensions/gsd/auto-verification.ts +++ b/src/resources/extensions/gsd/auto-verification.ts @@ -196,19 +196,30 @@ export async function runPostUnitVerification( failureContext: formatFailureContext(result), attempt: nextAttempt, }; + const failedCmds = result.checks + .filter((c) => c.exitCode !== 0) + .map((c) => c.command); + const cmdSummary = failedCmds.length <= 3 + ? failedCmds.join(", ") + : `${failedCmds.slice(0, 3).join(", ")}... and ${failedCmds.length - 3} more`; ctx.ui.notify( - `Verification failed — auto-fix attempt ${nextAttempt}/${maxRetries}`, + `Verification failed (${cmdSummary}) — auto-fix attempt ${nextAttempt}/${maxRetries}`, "warning", ); // Return "retry" — the autoLoop while loop will re-iterate with the retry context return "retry"; } else { // Gate failed, retries exhausted - const exhaustedAttempt = attempt + 1; s.verificationRetryCount.delete(s.currentUnit.id); s.pendingVerificationRetry = null; + const exhaustedFails = result.checks + .filter((c) => c.exitCode !== 0) + .map((c) => c.command); + const exhaustedSummary = exhaustedFails.length <= 3 + ? exhaustedFails.join(", ") + : `${exhaustedFails.slice(0, 3).join(", ")}... and ${exhaustedFails.length - 3} more`; ctx.ui.notify( - `Verification gate FAILED after ${exhaustedAttempt > maxRetries ? exhaustedAttempt - 1 : exhaustedAttempt} retries — pausing for human review`, + `Verification gate FAILED after ${attempt} ${attempt === 1 ? "retry" : "retries"} (${exhaustedSummary}) — pausing for human review`, "error", ); await pauseAuto(ctx, pi); diff --git a/src/resources/extensions/gsd/auto/loop.ts b/src/resources/extensions/gsd/auto/loop.ts index 712968422..84897a369 100644 --- a/src/resources/extensions/gsd/auto/loop.ts +++ b/src/resources/extensions/gsd/auto/loop.ts @@ -48,6 +48,7 @@ export async function autoLoop( let iteration = 0; const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0 }; let consecutiveErrors = 0; + const recentErrorMessages: string[] = []; while (s.active) { iteration++; @@ -202,6 +203,7 @@ export async function autoLoop( deps.clearUnitTimeout(); consecutiveErrors = 0; + recentErrorMessages.length = 0; deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration } }); debugLog("autoLoop", { phase: "iteration-complete", iteration }); continue; @@ -250,6 +252,7 @@ export async function autoLoop( if (finalizeResult.action === "continue") continue; consecutiveErrors = 0; // Iteration completed successfully + recentErrorMessages.length = 0; deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration } }); debugLog("autoLoop", { phase: "iteration-complete", iteration }); } catch (loopErr) { @@ -280,6 +283,7 @@ export async function autoLoop( } consecutiveErrors++; + recentErrorMessages.push(msg.length > 120 ? msg.slice(0, 120) + "..." : msg); debugLog("autoLoop", { phase: "iteration-error", iteration, @@ -289,8 +293,11 @@ export async function autoLoop( if (consecutiveErrors >= 3) { // 3+ consecutive: hard stop — something is fundamentally broken + const errorHistory = recentErrorMessages + .map((m, i) => ` ${i + 1}. ${m}`) + .join("\n"); ctx.ui.notify( - `Auto-mode stopped: ${consecutiveErrors} consecutive iteration failures. Last: ${msg}`, + `Auto-mode stopped: ${consecutiveErrors} consecutive iteration failures:\n${errorHistory}`, "error", ); await deps.stopAuto( diff --git a/src/resources/extensions/gsd/auto/phases.ts b/src/resources/extensions/gsd/auto/phases.ts index a6f481066..430cde624 100644 --- a/src/resources/extensions/gsd/auto/phases.ts +++ b/src/resources/extensions/gsd/auto/phases.ts @@ -31,7 +31,7 @@ import { existsSync, cpSync } from "node:fs"; import { logWarning, logError } from "../workflow-logger.js"; import { gsdRoot } from "../paths.js"; import { atomicWriteSync } from "../atomic-write.js"; -import { verifyExpectedArtifact } from "../auto-recovery.js"; +import { verifyExpectedArtifact, diagnoseExpectedArtifact, buildLoopRemediationSteps } from "../auto-recovery.js"; import { writeUnitRuntimeRecord } from "../unit-runtime.js"; // ─── generateMilestoneReport ────────────────────────────────────────────────── @@ -182,7 +182,7 @@ export async function runPreDispatch( } if (!healthGate.proceed) { ctx.ui.notify( - healthGate.reason ?? "Pre-dispatch health check failed.", + healthGate.reason || "Pre-dispatch health check failed — run /gsd doctor for details.", "error", ); await deps.pauseAuto(ctx, pi); @@ -628,15 +628,17 @@ export async function runDispatch( unitId, reason: stuckSignal.reason, }); + const stuckDiag = diagnoseExpectedArtifact(unitType, unitId, s.basePath); + const stuckRemediation = buildLoopRemediationSteps(unitType, unitId, s.basePath); + const stuckParts = [`Stuck on ${unitType} ${unitId} — ${stuckSignal.reason}.`]; + if (stuckDiag) stuckParts.push(`Expected: ${stuckDiag}`); + if (stuckRemediation) stuckParts.push(`To recover:\n${stuckRemediation}`); + ctx.ui.notify(stuckParts.join(" "), "error"); await deps.stopAuto( ctx, pi, `Stuck: ${stuckSignal.reason}`, ); - ctx.ui.notify( - `Stuck on ${unitType} ${unitId} — ${stuckSignal.reason}. The expected artifact was not written.`, - "error", - ); return { action: "break", reason: "stuck-detected" }; } } else { diff --git a/src/resources/extensions/gsd/state.ts b/src/resources/extensions/gsd/state.ts index 0de955677..ec616309e 100644 --- a/src/resources/extensions/gsd/state.ts +++ b/src/resources/extensions/gsd/state.ts @@ -259,6 +259,7 @@ export async function deriveState(basePath: string): Promise { _telemetry.markdownDeriveCount++; } } else { + logWarning("state", "DB unavailable — using filesystem state derivation (degraded mode)"); result = await _deriveStateImpl(basePath); _telemetry.markdownDeriveCount++; } diff --git a/src/resources/extensions/gsd/tests/workflow-logger.test.ts b/src/resources/extensions/gsd/tests/workflow-logger.test.ts index 84c5d935f..68ef6710f 100644 --- a/src/resources/extensions/gsd/tests/workflow-logger.test.ts +++ b/src/resources/extensions/gsd/tests/workflow-logger.test.ts @@ -217,12 +217,26 @@ describe("workflow-logger", () => { assert.ok(formatted.includes("\n")); }); - test("does not include context in formatted output", () => { + test("includes context fields in formatted output", () => { logError("tool", "failed", { cmd: "complete_task" }); const entries = drainLogs(); const formatted = formatForNotification(entries); - assert.equal(formatted, "[tool] failed"); - assert.ok(!formatted.includes("complete_task")); + assert.equal(formatted, "[tool] failed (cmd: complete_task)"); + }); + + test("excludes error key from context to avoid redundancy", () => { + logError("tool", "disk write failed", { error: "ENOSPC", path: "/tmp/foo" }); + const entries = drainLogs(); + const formatted = formatForNotification(entries); + assert.ok(formatted.includes("path: /tmp/foo")); + assert.ok(!formatted.includes("error: ENOSPC")); + }); + + test("formats entry without context unchanged", () => { + logError("intercept", "blocked write"); + const entries = drainLogs(); + const formatted = formatForNotification(entries); + assert.equal(formatted, "[intercept] blocked write"); }); }); @@ -279,44 +293,6 @@ describe("workflow-logger", () => { }); }); - describe("audit log persistence", () => { - let dir: string; - - beforeEach(() => { - dir = makeTempDir("wl-audit-"); - }); - - afterEach(() => { - setLogBasePath(""); - cleanup(dir); - }); - - test("writes entry to .gsd/audit-log.jsonl after setLogBasePath", () => { - setLogBasePath(dir); - logError("engine", "audit test entry"); - - const auditPath = join(dir, ".gsd", "audit-log.jsonl"); - assert.ok(existsSync(auditPath), "audit-log.jsonl should exist"); - const content = readFileSync(auditPath, "utf-8"); - const entry = JSON.parse(content.trim()); - assert.equal(entry.severity, "error"); - assert.equal(entry.component, "engine"); - assert.equal(entry.message, "audit test entry"); - }); - - test("_resetLogs does not clear the audit base path", () => { - setLogBasePath(dir); - _resetLogs(); - logError("engine", "post-reset entry"); - - const auditPath = join(dir, ".gsd", "audit-log.jsonl"); - assert.ok(existsSync(auditPath), "audit-log.jsonl should exist after _resetLogs"); - const content = readFileSync(auditPath, "utf-8"); - const entry = JSON.parse(content.trim()); - assert.equal(entry.message, "post-reset entry"); - }); - }); - describe("new log components (db, dispatch)", () => { test("logError with 'db' component stores correct component", () => { logError("db", "failed to copy DB to worktree", { error: "ENOENT" }); diff --git a/src/resources/extensions/gsd/tools/complete-slice.ts b/src/resources/extensions/gsd/tools/complete-slice.ts index 6d0312cfe..d555ab518 100644 --- a/src/resources/extensions/gsd/tools/complete-slice.ts +++ b/src/resources/extensions/gsd/tools/complete-slice.ts @@ -292,13 +292,11 @@ export async function handleCompleteSlice( // Toggle roadmap checkbox via renderer module const roadmapToggled = await renderRoadmapCheckboxes(basePath, params.milestoneId); if (!roadmapToggled) { - process.stderr.write( - `gsd-db: complete_slice — could not find roadmap for ${params.milestoneId}, skipping checkbox toggle\n`, - ); + logWarning("tool", `complete_slice — could not find roadmap for ${params.milestoneId}, skipping checkbox toggle`); } } catch (renderErr) { // Disk render failed — roll back DB status so state stays consistent - logWarning("tool", `complete_slice — disk render failed, rolling back DB status: ${(renderErr as Error).message}`); + logWarning("tool", `complete_slice — disk render failed for ${params.milestoneId}/${params.sliceId}, rolling back DB status`, { error: (renderErr as Error).message }); updateSliceStatus(params.milestoneId, params.sliceId, 'pending'); invalidateStateCache(); return { error: `disk render failed: ${(renderErr as Error).message}` }; @@ -325,7 +323,7 @@ export async function handleCompleteSlice( trigger_reason: params.triggerReason, }); } catch (hookErr) { - logWarning("tool", `complete-slice post-mutation hook warning: ${(hookErr as Error).message}`); + logWarning("tool", `complete-slice post-mutation hook failed for ${params.milestoneId}/${params.sliceId}`, { error: (hookErr as Error).message }); } return { diff --git a/src/resources/extensions/gsd/workflow-logger.ts b/src/resources/extensions/gsd/workflow-logger.ts index f31ef0bfa..1f0a7d163 100644 --- a/src/resources/extensions/gsd/workflow-logger.ts +++ b/src/resources/extensions/gsd/workflow-logger.ts @@ -174,17 +174,22 @@ export function summarizeLogs(): string | null { /** * Format entries for display (used by auto-loop post-unit notification). - * Note: context fields are not included in the formatted output. + * Includes key context fields (file paths, commands) when present. */ export function formatForNotification(entries: readonly LogEntry[]): string { if (entries.length === 0) return ""; - if (entries.length === 1) { - const e = entries[0]; - return `[${e.component}] ${e.message}`; - } - return entries - .map((e) => `[${e.component}] ${e.message}`) - .join("\n"); + return entries.map((e) => { + let line = `[${e.component}] ${e.message}`; + if (e.context) { + const ctxParts = Object.entries(e.context) + .filter(([k]) => k !== "error") // error is redundant with message + .map(([k, v]) => v.includes(",") ? `${k}: "${v}"` : `${k}: ${v}`); + if (ctxParts.length > 0) { + line += ` (${ctxParts.join(", ")})`; + } + } + return line; + }).join("\n"); } /** diff --git a/src/resources/extensions/gsd/workflow-reconcile.ts b/src/resources/extensions/gsd/workflow-reconcile.ts index 25eb0f78f..01bcce8b7 100644 --- a/src/resources/extensions/gsd/workflow-reconcile.ts +++ b/src/resources/extensions/gsd/workflow-reconcile.ts @@ -348,7 +348,9 @@ function _reconcileWorktreeLogsInner( if (conflicts.length > 0) { // D-04: atomic all-or-nothing — block entire merge writeConflictsFile(mainBasePath, conflicts, worktreeBasePath); - logError("reconcile", `${conflicts.length} conflict(s) detected`, { count: String(conflicts.length), path: join(mainBasePath, ".gsd", "CONFLICTS.md") }); + const conflictSummary = conflicts.slice(0, 3).map(c => `${c.entityType}:${c.entityId}`).join(", "); + const truncated = conflicts.length > 3 ? `... and ${conflicts.length - 3} more` : ""; + logError("reconcile", `${conflicts.length} conflict(s) detected on ${conflictSummary}${truncated}. Details: .gsd/CONFLICTS.md`, { count: String(conflicts.length), path: join(mainBasePath, ".gsd", "CONFLICTS.md") }); return { autoMerged: 0, conflicts }; }