fix(gsd): enrich vague diagnostic messages with root-cause context

Closes #3524
2026-04-04 21:04:26 -05:00 · 2026-04-04 21:04:26 -05:00 · 53639aee5d
commit 53639aee5d
parent 546c93cae7
9 changed files with 73 additions and 68 deletions
--- a/src/resources/extensions/gsd/auto-post-unit.ts
+++ b/src/resources/extensions/gsd/auto-post-unit.ts
@ -33,6 +33,7 @@ import {
 import {
  verifyExpectedArtifact,
  resolveExpectedArtifactPath,
+  diagnoseExpectedArtifact,
 } from "./auto-recovery.js";
 import { regenerateIfMissing } from "./workflow-projections.js";
 import { syncStateToProjectRoot } from "./auto-worktree.js";
@ -476,8 +477,9 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
        // db_unavailable so the artifact was never written. Retrying would
        // produce an infinite re-dispatch loop (#2517).
        debugLog("postUnit", { phase: "artifact-verify-skip-db-unavailable", unitType: s.currentUnit.type, unitId: s.currentUnit.id });
+        const dbSkipDiag = diagnoseExpectedArtifact(s.currentUnit.type, s.currentUnit.id, s.basePath);
        ctx.ui.notify(
-          `Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} but DB is unavailable — skipping retry to avoid loop (#2517)`,
+          `Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — DB unavailable, skipping retry.${dbSkipDiag ? ` Expected: ${dbSkipDiag}` : ""}`,
          "error",
        );
      } else if (!triggerArtifactVerified) {
@ -486,14 +488,15 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
          const retryKey = `${s.currentUnit.type}:${s.currentUnit.id}`;
          const attempt = (s.verificationRetryCount.get(retryKey) ?? 0) + 1;
          s.verificationRetryCount.set(retryKey, attempt);
+          const retryDiag = diagnoseExpectedArtifact(s.currentUnit.type, s.currentUnit.id, s.basePath);
          s.pendingVerificationRetry = {
            unitId: s.currentUnit.id,
-            failureContext: `Artifact verification failed: expected artifact for ${s.currentUnit.type} "${s.currentUnit.id}" was not found on disk after unit execution (attempt ${attempt}).`,
+            failureContext: `Artifact verification failed: expected artifact for ${s.currentUnit.type} "${s.currentUnit.id}" was not found on disk after unit execution (attempt ${attempt}).${retryDiag ? ` Expected: ${retryDiag}` : ""}`,
            attempt,
          };
          debugLog("postUnit", { phase: "artifact-verify-retry", unitType: s.currentUnit.type, unitId: s.currentUnit.id, attempt });
          ctx.ui.notify(
-            `Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — retrying (attempt ${attempt})`,
+            `Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — retrying (attempt ${attempt}).${retryDiag ? ` Expected: ${retryDiag}` : ""}`,
            "warning",
          );
          return "retry";
--- a/src/resources/extensions/gsd/auto-verification.ts
+++ b/src/resources/extensions/gsd/auto-verification.ts
@ -196,19 +196,30 @@ export async function runPostUnitVerification(
        failureContext: formatFailureContext(result),
        attempt: nextAttempt,
      };
+      const failedCmds = result.checks
+        .filter((c) => c.exitCode !== 0)
+        .map((c) => c.command);
+      const cmdSummary = failedCmds.length <= 3
+        ? failedCmds.join(", ")
+        : `${failedCmds.slice(0, 3).join(", ")}... and ${failedCmds.length - 3} more`;
      ctx.ui.notify(
-        `Verification failed — auto-fix attempt ${nextAttempt}/${maxRetries}`,
+        `Verification failed (${cmdSummary}) — auto-fix attempt ${nextAttempt}/${maxRetries}`,
        "warning",
      );
      // Return "retry" — the autoLoop while loop will re-iterate with the retry context
      return "retry";
    } else {
      // Gate failed, retries exhausted
-      const exhaustedAttempt = attempt + 1;
      s.verificationRetryCount.delete(s.currentUnit.id);
      s.pendingVerificationRetry = null;
+      const exhaustedFails = result.checks
+        .filter((c) => c.exitCode !== 0)
+        .map((c) => c.command);
+      const exhaustedSummary = exhaustedFails.length <= 3
+        ? exhaustedFails.join(", ")
+        : `${exhaustedFails.slice(0, 3).join(", ")}... and ${exhaustedFails.length - 3} more`;
      ctx.ui.notify(
-        `Verification gate FAILED after ${exhaustedAttempt > maxRetries ? exhaustedAttempt - 1 : exhaustedAttempt} retries — pausing for human review`,
+        `Verification gate FAILED after ${attempt} ${attempt === 1 ? "retry" : "retries"} (${exhaustedSummary}) — pausing for human review`,
        "error",
      );
      await pauseAuto(ctx, pi);
--- a/src/resources/extensions/gsd/auto/loop.ts
+++ b/src/resources/extensions/gsd/auto/loop.ts
@ -48,6 +48,7 @@ export async function autoLoop(
  let iteration = 0;
  const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0 };
  let consecutiveErrors = 0;
+  const recentErrorMessages: string[] = [];

  while (s.active) {
    iteration++;
@ -202,6 +203,7 @@ export async function autoLoop(

        deps.clearUnitTimeout();
        consecutiveErrors = 0;
+        recentErrorMessages.length = 0;
        deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration } });
        debugLog("autoLoop", { phase: "iteration-complete", iteration });
        continue;
@ -250,6 +252,7 @@ export async function autoLoop(
      if (finalizeResult.action === "continue") continue;

      consecutiveErrors = 0; // Iteration completed successfully
+      recentErrorMessages.length = 0;
      deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration } });
      debugLog("autoLoop", { phase: "iteration-complete", iteration });
    } catch (loopErr) {
@ -280,6 +283,7 @@ export async function autoLoop(
      }

      consecutiveErrors++;
+      recentErrorMessages.push(msg.length > 120 ? msg.slice(0, 120) + "..." : msg);
      debugLog("autoLoop", {
        phase: "iteration-error",
        iteration,
@ -289,8 +293,11 @@ export async function autoLoop(

      if (consecutiveErrors >= 3) {
        // 3+ consecutive: hard stop — something is fundamentally broken
+        const errorHistory = recentErrorMessages
+          .map((m, i) => `  ${i + 1}. ${m}`)
+          .join("\n");
        ctx.ui.notify(
-          `Auto-mode stopped: ${consecutiveErrors} consecutive iteration failures. Last: ${msg}`,
+          `Auto-mode stopped: ${consecutiveErrors} consecutive iteration failures:\n${errorHistory}`,
          "error",
        );
        await deps.stopAuto(
--- a/src/resources/extensions/gsd/auto/phases.ts
+++ b/src/resources/extensions/gsd/auto/phases.ts
@ -31,7 +31,7 @@ import { existsSync, cpSync } from "node:fs";
 import { logWarning, logError } from "../workflow-logger.js";
 import { gsdRoot } from "../paths.js";
 import { atomicWriteSync } from "../atomic-write.js";
-import { verifyExpectedArtifact } from "../auto-recovery.js";
+import { verifyExpectedArtifact, diagnoseExpectedArtifact, buildLoopRemediationSteps } from "../auto-recovery.js";
 import { writeUnitRuntimeRecord } from "../unit-runtime.js";

 // ─── generateMilestoneReport ──────────────────────────────────────────────────
@ -182,7 +182,7 @@ export async function runPreDispatch(
    }
    if (!healthGate.proceed) {
      ctx.ui.notify(
-        healthGate.reason ?? "Pre-dispatch health check failed.",
+        healthGate.reason || "Pre-dispatch health check failed — run /gsd doctor for details.",
        "error",
      );
      await deps.pauseAuto(ctx, pi);
@ -628,15 +628,17 @@ export async function runDispatch(
          unitId,
          reason: stuckSignal.reason,
        });
+        const stuckDiag = diagnoseExpectedArtifact(unitType, unitId, s.basePath);
+        const stuckRemediation = buildLoopRemediationSteps(unitType, unitId, s.basePath);
+        const stuckParts = [`Stuck on ${unitType} ${unitId} — ${stuckSignal.reason}.`];
+        if (stuckDiag) stuckParts.push(`Expected: ${stuckDiag}`);
+        if (stuckRemediation) stuckParts.push(`To recover:\n${stuckRemediation}`);
+        ctx.ui.notify(stuckParts.join(" "), "error");
        await deps.stopAuto(
          ctx,
          pi,
          `Stuck: ${stuckSignal.reason}`,
        );
-        ctx.ui.notify(
-          `Stuck on ${unitType} ${unitId} — ${stuckSignal.reason}. The expected artifact was not written.`,
-          "error",
-        );
        return { action: "break", reason: "stuck-detected" };
      }
    } else {
--- a/src/resources/extensions/gsd/state.ts
+++ b/src/resources/extensions/gsd/state.ts
@ -259,6 +259,7 @@ export async function deriveState(basePath: string): Promise<GSDState> {
      _telemetry.markdownDeriveCount++;
    }
  } else {
+    logWarning("state", "DB unavailable — using filesystem state derivation (degraded mode)");
    result = await _deriveStateImpl(basePath);
    _telemetry.markdownDeriveCount++;
  }
--- a/src/resources/extensions/gsd/tests/workflow-logger.test.ts
+++ b/src/resources/extensions/gsd/tests/workflow-logger.test.ts
@ -217,12 +217,26 @@ describe("workflow-logger", () => {
      assert.ok(formatted.includes("\n"));
    });

-    test("does not include context in formatted output", () => {
+    test("includes context fields in formatted output", () => {
      logError("tool", "failed", { cmd: "complete_task" });
      const entries = drainLogs();
      const formatted = formatForNotification(entries);
-      assert.equal(formatted, "[tool] failed");
-      assert.ok(!formatted.includes("complete_task"));
+      assert.equal(formatted, "[tool] failed (cmd: complete_task)");
+    });
+
+    test("excludes error key from context to avoid redundancy", () => {
+      logError("tool", "disk write failed", { error: "ENOSPC", path: "/tmp/foo" });
+      const entries = drainLogs();
+      const formatted = formatForNotification(entries);
+      assert.ok(formatted.includes("path: /tmp/foo"));
+      assert.ok(!formatted.includes("error: ENOSPC"));
+    });
+
+    test("formats entry without context unchanged", () => {
+      logError("intercept", "blocked write");
+      const entries = drainLogs();
+      const formatted = formatForNotification(entries);
+      assert.equal(formatted, "[intercept] blocked write");
    });
  });

@ -279,44 +293,6 @@ describe("workflow-logger", () => {
    });
  });

-  describe("audit log persistence", () => {
-    let dir: string;
-
-    beforeEach(() => {
-      dir = makeTempDir("wl-audit-");
-    });
-
-    afterEach(() => {
-      setLogBasePath("");
-      cleanup(dir);
-    });
-
-    test("writes entry to .gsd/audit-log.jsonl after setLogBasePath", () => {
-      setLogBasePath(dir);
-      logError("engine", "audit test entry");
-
-      const auditPath = join(dir, ".gsd", "audit-log.jsonl");
-      assert.ok(existsSync(auditPath), "audit-log.jsonl should exist");
-      const content = readFileSync(auditPath, "utf-8");
-      const entry = JSON.parse(content.trim());
-      assert.equal(entry.severity, "error");
-      assert.equal(entry.component, "engine");
-      assert.equal(entry.message, "audit test entry");
-    });
-
-    test("_resetLogs does not clear the audit base path", () => {
-      setLogBasePath(dir);
-      _resetLogs();
-      logError("engine", "post-reset entry");
-
-      const auditPath = join(dir, ".gsd", "audit-log.jsonl");
-      assert.ok(existsSync(auditPath), "audit-log.jsonl should exist after _resetLogs");
-      const content = readFileSync(auditPath, "utf-8");
-      const entry = JSON.parse(content.trim());
-      assert.equal(entry.message, "post-reset entry");
-    });
-  });
-
  describe("new log components (db, dispatch)", () => {
    test("logError with 'db' component stores correct component", () => {
      logError("db", "failed to copy DB to worktree", { error: "ENOENT" });
--- a/src/resources/extensions/gsd/tools/complete-slice.ts
+++ b/src/resources/extensions/gsd/tools/complete-slice.ts
@ -292,13 +292,11 @@ export async function handleCompleteSlice(
    // Toggle roadmap checkbox via renderer module
    const roadmapToggled = await renderRoadmapCheckboxes(basePath, params.milestoneId);
    if (!roadmapToggled) {
-      process.stderr.write(
-        `gsd-db: complete_slice — could not find roadmap for ${params.milestoneId}, skipping checkbox toggle\n`,
-      );
+      logWarning("tool", `complete_slice — could not find roadmap for ${params.milestoneId}, skipping checkbox toggle`);
    }
  } catch (renderErr) {
    // Disk render failed — roll back DB status so state stays consistent
-    logWarning("tool", `complete_slice — disk render failed, rolling back DB status: ${(renderErr as Error).message}`);
+    logWarning("tool", `complete_slice — disk render failed for ${params.milestoneId}/${params.sliceId}, rolling back DB status`, { error: (renderErr as Error).message });
    updateSliceStatus(params.milestoneId, params.sliceId, 'pending');
    invalidateStateCache();
    return { error: `disk render failed: ${(renderErr as Error).message}` };
@ -325,7 +323,7 @@ export async function handleCompleteSlice(
      trigger_reason: params.triggerReason,
    });
  } catch (hookErr) {
-    logWarning("tool", `complete-slice post-mutation hook warning: ${(hookErr as Error).message}`);
+    logWarning("tool", `complete-slice post-mutation hook failed for ${params.milestoneId}/${params.sliceId}`, { error: (hookErr as Error).message });
  }

  return {
--- a/src/resources/extensions/gsd/workflow-logger.ts
+++ b/src/resources/extensions/gsd/workflow-logger.ts
@ -174,17 +174,22 @@ export function summarizeLogs(): string | null {

 /**
 * Format entries for display (used by auto-loop post-unit notification).
- * Note: context fields are not included in the formatted output.
+ * Includes key context fields (file paths, commands) when present.
 */
 export function formatForNotification(entries: readonly LogEntry[]): string {
  if (entries.length === 0) return "";
-  if (entries.length === 1) {
-    const e = entries[0];
-    return `[${e.component}] ${e.message}`;
-  }
-  return entries
-    .map((e) => `[${e.component}] ${e.message}`)
-    .join("\n");
+  return entries.map((e) => {
+    let line = `[${e.component}] ${e.message}`;
+    if (e.context) {
+      const ctxParts = Object.entries(e.context)
+        .filter(([k]) => k !== "error") // error is redundant with message
+        .map(([k, v]) => v.includes(",") ? `${k}: "${v}"` : `${k}: ${v}`);
+      if (ctxParts.length > 0) {
+        line += ` (${ctxParts.join(", ")})`;
+      }
+    }
+    return line;
+  }).join("\n");
 }

 /**
--- a/src/resources/extensions/gsd/workflow-reconcile.ts
+++ b/src/resources/extensions/gsd/workflow-reconcile.ts
@ -348,7 +348,9 @@ function _reconcileWorktreeLogsInner(
  if (conflicts.length > 0) {
    // D-04: atomic all-or-nothing — block entire merge
    writeConflictsFile(mainBasePath, conflicts, worktreeBasePath);
-    logError("reconcile", `${conflicts.length} conflict(s) detected`, { count: String(conflicts.length), path: join(mainBasePath, ".gsd", "CONFLICTS.md") });
+    const conflictSummary = conflicts.slice(0, 3).map(c => `${c.entityType}:${c.entityId}`).join(", ");
+    const truncated = conflicts.length > 3 ? `... and ${conflicts.length - 3} more` : "";
+    logError("reconcile", `${conflicts.length} conflict(s) detected on ${conflictSummary}${truncated}. Details: .gsd/CONFLICTS.md`, { count: String(conflicts.length), path: join(mainBasePath, ".gsd", "CONFLICTS.md") });
    return { autoMerged: 0, conflicts };
  }