Merge pull request #3525 from jeremymcs/fix/diagnostic-messaging

fix(gsd): enrich vague diagnostic messages with root-cause context
This commit is contained in:
Jeremy McSpadden 2026-04-04 21:20:43 -05:00 committed by GitHub
commit ab38f60cc2
9 changed files with 73 additions and 68 deletions

View file

@ -33,6 +33,7 @@ import {
import {
verifyExpectedArtifact,
resolveExpectedArtifactPath,
diagnoseExpectedArtifact,
} from "./auto-recovery.js";
import { regenerateIfMissing } from "./workflow-projections.js";
import { syncStateToProjectRoot } from "./auto-worktree.js";
@ -476,8 +477,9 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
// db_unavailable so the artifact was never written. Retrying would
// produce an infinite re-dispatch loop (#2517).
debugLog("postUnit", { phase: "artifact-verify-skip-db-unavailable", unitType: s.currentUnit.type, unitId: s.currentUnit.id });
const dbSkipDiag = diagnoseExpectedArtifact(s.currentUnit.type, s.currentUnit.id, s.basePath);
ctx.ui.notify(
`Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} but DB is unavailable — skipping retry to avoid loop (#2517)`,
`Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — DB unavailable, skipping retry.${dbSkipDiag ? ` Expected: ${dbSkipDiag}` : ""}`,
"error",
);
} else if (!triggerArtifactVerified) {
@ -486,14 +488,15 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
const retryKey = `${s.currentUnit.type}:${s.currentUnit.id}`;
const attempt = (s.verificationRetryCount.get(retryKey) ?? 0) + 1;
s.verificationRetryCount.set(retryKey, attempt);
const retryDiag = diagnoseExpectedArtifact(s.currentUnit.type, s.currentUnit.id, s.basePath);
s.pendingVerificationRetry = {
unitId: s.currentUnit.id,
failureContext: `Artifact verification failed: expected artifact for ${s.currentUnit.type} "${s.currentUnit.id}" was not found on disk after unit execution (attempt ${attempt}).`,
failureContext: `Artifact verification failed: expected artifact for ${s.currentUnit.type} "${s.currentUnit.id}" was not found on disk after unit execution (attempt ${attempt}).${retryDiag ? ` Expected: ${retryDiag}` : ""}`,
attempt,
};
debugLog("postUnit", { phase: "artifact-verify-retry", unitType: s.currentUnit.type, unitId: s.currentUnit.id, attempt });
ctx.ui.notify(
`Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — retrying (attempt ${attempt})`,
`Artifact missing for ${s.currentUnit.type} ${s.currentUnit.id} — retrying (attempt ${attempt}).${retryDiag ? ` Expected: ${retryDiag}` : ""}`,
"warning",
);
return "retry";

View file

@ -196,19 +196,30 @@ export async function runPostUnitVerification(
failureContext: formatFailureContext(result),
attempt: nextAttempt,
};
const failedCmds = result.checks
.filter((c) => c.exitCode !== 0)
.map((c) => c.command);
const cmdSummary = failedCmds.length <= 3
? failedCmds.join(", ")
: `${failedCmds.slice(0, 3).join(", ")}... and ${failedCmds.length - 3} more`;
ctx.ui.notify(
`Verification failed — auto-fix attempt ${nextAttempt}/${maxRetries}`,
`Verification failed (${cmdSummary}) — auto-fix attempt ${nextAttempt}/${maxRetries}`,
"warning",
);
// Return "retry" — the autoLoop while loop will re-iterate with the retry context
return "retry";
} else {
// Gate failed, retries exhausted
const exhaustedAttempt = attempt + 1;
s.verificationRetryCount.delete(s.currentUnit.id);
s.pendingVerificationRetry = null;
const exhaustedFails = result.checks
.filter((c) => c.exitCode !== 0)
.map((c) => c.command);
const exhaustedSummary = exhaustedFails.length <= 3
? exhaustedFails.join(", ")
: `${exhaustedFails.slice(0, 3).join(", ")}... and ${exhaustedFails.length - 3} more`;
ctx.ui.notify(
`Verification gate FAILED after ${exhaustedAttempt > maxRetries ? exhaustedAttempt - 1 : exhaustedAttempt} retries — pausing for human review`,
`Verification gate FAILED after ${attempt} ${attempt === 1 ? "retry" : "retries"} (${exhaustedSummary}) — pausing for human review`,
"error",
);
await pauseAuto(ctx, pi);

View file

@ -48,6 +48,7 @@ export async function autoLoop(
let iteration = 0;
const loopState: LoopState = { recentUnits: [], stuckRecoveryAttempts: 0 };
let consecutiveErrors = 0;
const recentErrorMessages: string[] = [];
while (s.active) {
iteration++;
@ -202,6 +203,7 @@ export async function autoLoop(
deps.clearUnitTimeout();
consecutiveErrors = 0;
recentErrorMessages.length = 0;
deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration } });
debugLog("autoLoop", { phase: "iteration-complete", iteration });
continue;
@ -250,6 +252,7 @@ export async function autoLoop(
if (finalizeResult.action === "continue") continue;
consecutiveErrors = 0; // Iteration completed successfully
recentErrorMessages.length = 0;
deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration } });
debugLog("autoLoop", { phase: "iteration-complete", iteration });
} catch (loopErr) {
@ -280,6 +283,7 @@ export async function autoLoop(
}
consecutiveErrors++;
recentErrorMessages.push(msg.length > 120 ? msg.slice(0, 120) + "..." : msg);
debugLog("autoLoop", {
phase: "iteration-error",
iteration,
@ -289,8 +293,11 @@ export async function autoLoop(
if (consecutiveErrors >= 3) {
// 3+ consecutive: hard stop — something is fundamentally broken
const errorHistory = recentErrorMessages
.map((m, i) => ` ${i + 1}. ${m}`)
.join("\n");
ctx.ui.notify(
`Auto-mode stopped: ${consecutiveErrors} consecutive iteration failures. Last: ${msg}`,
`Auto-mode stopped: ${consecutiveErrors} consecutive iteration failures:\n${errorHistory}`,
"error",
);
await deps.stopAuto(

View file

@ -31,7 +31,7 @@ import { existsSync, cpSync } from "node:fs";
import { logWarning, logError } from "../workflow-logger.js";
import { gsdRoot } from "../paths.js";
import { atomicWriteSync } from "../atomic-write.js";
import { verifyExpectedArtifact } from "../auto-recovery.js";
import { verifyExpectedArtifact, diagnoseExpectedArtifact, buildLoopRemediationSteps } from "../auto-recovery.js";
import { writeUnitRuntimeRecord } from "../unit-runtime.js";
// ─── generateMilestoneReport ──────────────────────────────────────────────────
@ -182,7 +182,7 @@ export async function runPreDispatch(
}
if (!healthGate.proceed) {
ctx.ui.notify(
healthGate.reason ?? "Pre-dispatch health check failed.",
healthGate.reason || "Pre-dispatch health check failed — run /gsd doctor for details.",
"error",
);
await deps.pauseAuto(ctx, pi);
@ -628,15 +628,17 @@ export async function runDispatch(
unitId,
reason: stuckSignal.reason,
});
const stuckDiag = diagnoseExpectedArtifact(unitType, unitId, s.basePath);
const stuckRemediation = buildLoopRemediationSteps(unitType, unitId, s.basePath);
const stuckParts = [`Stuck on ${unitType} ${unitId}${stuckSignal.reason}.`];
if (stuckDiag) stuckParts.push(`Expected: ${stuckDiag}`);
if (stuckRemediation) stuckParts.push(`To recover:\n${stuckRemediation}`);
ctx.ui.notify(stuckParts.join(" "), "error");
await deps.stopAuto(
ctx,
pi,
`Stuck: ${stuckSignal.reason}`,
);
ctx.ui.notify(
`Stuck on ${unitType} ${unitId}${stuckSignal.reason}. The expected artifact was not written.`,
"error",
);
return { action: "break", reason: "stuck-detected" };
}
} else {

View file

@ -259,6 +259,7 @@ export async function deriveState(basePath: string): Promise<GSDState> {
_telemetry.markdownDeriveCount++;
}
} else {
logWarning("state", "DB unavailable — using filesystem state derivation (degraded mode)");
result = await _deriveStateImpl(basePath);
_telemetry.markdownDeriveCount++;
}

View file

@ -217,12 +217,26 @@ describe("workflow-logger", () => {
assert.ok(formatted.includes("\n"));
});
test("does not include context in formatted output", () => {
test("includes context fields in formatted output", () => {
logError("tool", "failed", { cmd: "complete_task" });
const entries = drainLogs();
const formatted = formatForNotification(entries);
assert.equal(formatted, "[tool] failed");
assert.ok(!formatted.includes("complete_task"));
assert.equal(formatted, "[tool] failed (cmd: complete_task)");
});
test("excludes error key from context to avoid redundancy", () => {
logError("tool", "disk write failed", { error: "ENOSPC", path: "/tmp/foo" });
const entries = drainLogs();
const formatted = formatForNotification(entries);
assert.ok(formatted.includes("path: /tmp/foo"));
assert.ok(!formatted.includes("error: ENOSPC"));
});
test("formats entry without context unchanged", () => {
logError("intercept", "blocked write");
const entries = drainLogs();
const formatted = formatForNotification(entries);
assert.equal(formatted, "[intercept] blocked write");
});
});
@ -279,44 +293,6 @@ describe("workflow-logger", () => {
});
});
describe("audit log persistence", () => {
let dir: string;
beforeEach(() => {
dir = makeTempDir("wl-audit-");
});
afterEach(() => {
setLogBasePath("");
cleanup(dir);
});
test("writes entry to .gsd/audit-log.jsonl after setLogBasePath", () => {
setLogBasePath(dir);
logError("engine", "audit test entry");
const auditPath = join(dir, ".gsd", "audit-log.jsonl");
assert.ok(existsSync(auditPath), "audit-log.jsonl should exist");
const content = readFileSync(auditPath, "utf-8");
const entry = JSON.parse(content.trim());
assert.equal(entry.severity, "error");
assert.equal(entry.component, "engine");
assert.equal(entry.message, "audit test entry");
});
test("_resetLogs does not clear the audit base path", () => {
setLogBasePath(dir);
_resetLogs();
logError("engine", "post-reset entry");
const auditPath = join(dir, ".gsd", "audit-log.jsonl");
assert.ok(existsSync(auditPath), "audit-log.jsonl should exist after _resetLogs");
const content = readFileSync(auditPath, "utf-8");
const entry = JSON.parse(content.trim());
assert.equal(entry.message, "post-reset entry");
});
});
describe("new log components (db, dispatch)", () => {
test("logError with 'db' component stores correct component", () => {
logError("db", "failed to copy DB to worktree", { error: "ENOENT" });

View file

@ -292,13 +292,11 @@ export async function handleCompleteSlice(
// Toggle roadmap checkbox via renderer module
const roadmapToggled = await renderRoadmapCheckboxes(basePath, params.milestoneId);
if (!roadmapToggled) {
process.stderr.write(
`gsd-db: complete_slice — could not find roadmap for ${params.milestoneId}, skipping checkbox toggle\n`,
);
logWarning("tool", `complete_slice — could not find roadmap for ${params.milestoneId}, skipping checkbox toggle`);
}
} catch (renderErr) {
// Disk render failed — roll back DB status so state stays consistent
logWarning("tool", `complete_slice — disk render failed, rolling back DB status: ${(renderErr as Error).message}`);
logWarning("tool", `complete_slice — disk render failed for ${params.milestoneId}/${params.sliceId}, rolling back DB status`, { error: (renderErr as Error).message });
updateSliceStatus(params.milestoneId, params.sliceId, 'pending');
invalidateStateCache();
return { error: `disk render failed: ${(renderErr as Error).message}` };
@ -325,7 +323,7 @@ export async function handleCompleteSlice(
trigger_reason: params.triggerReason,
});
} catch (hookErr) {
logWarning("tool", `complete-slice post-mutation hook warning: ${(hookErr as Error).message}`);
logWarning("tool", `complete-slice post-mutation hook failed for ${params.milestoneId}/${params.sliceId}`, { error: (hookErr as Error).message });
}
return {

View file

@ -174,17 +174,22 @@ export function summarizeLogs(): string | null {
/**
* Format entries for display (used by auto-loop post-unit notification).
* Note: context fields are not included in the formatted output.
* Includes key context fields (file paths, commands) when present.
*/
export function formatForNotification(entries: readonly LogEntry[]): string {
if (entries.length === 0) return "";
if (entries.length === 1) {
const e = entries[0];
return `[${e.component}] ${e.message}`;
}
return entries
.map((e) => `[${e.component}] ${e.message}`)
.join("\n");
return entries.map((e) => {
let line = `[${e.component}] ${e.message}`;
if (e.context) {
const ctxParts = Object.entries(e.context)
.filter(([k]) => k !== "error") // error is redundant with message
.map(([k, v]) => v.includes(",") ? `${k}: "${v}"` : `${k}: ${v}`);
if (ctxParts.length > 0) {
line += ` (${ctxParts.join(", ")})`;
}
}
return line;
}).join("\n");
}
/**

View file

@ -348,7 +348,9 @@ function _reconcileWorktreeLogsInner(
if (conflicts.length > 0) {
// D-04: atomic all-or-nothing — block entire merge
writeConflictsFile(mainBasePath, conflicts, worktreeBasePath);
logError("reconcile", `${conflicts.length} conflict(s) detected`, { count: String(conflicts.length), path: join(mainBasePath, ".gsd", "CONFLICTS.md") });
const conflictSummary = conflicts.slice(0, 3).map(c => `${c.entityType}:${c.entityId}`).join(", ");
const truncated = conflicts.length > 3 ? `... and ${conflicts.length - 3} more` : "";
logError("reconcile", `${conflicts.length} conflict(s) detected on ${conflictSummary}${truncated}. Details: .gsd/CONFLICTS.md`, { count: String(conflicts.length), path: join(mainBasePath, ".gsd", "CONFLICTS.md") });
return { autoMerged: 0, conflicts };
}