diff --git a/src/resources/extensions/gsd/session-forensics.ts b/src/resources/extensions/gsd/session-forensics.ts index f01a4b8e1..9cf0a23c5 100644 --- a/src/resources/extensions/gsd/session-forensics.ts +++ b/src/resources/extensions/gsd/session-forensics.ts @@ -472,9 +472,13 @@ function formatTraceSummary(trace: ExecutionTrace): string { if (trace.errors.length > 0) { parts.push(`Errors: ${trace.errors.slice(-3).join("; ")}`); } - if (trace.lastReasoning) { - parts.push(`Last reasoning: "${trace.lastReasoning}"`); - } + // NOTE: lastReasoning is intentionally excluded from the retry diagnostic. + // This summary is injected into retry prompts via getDeepDiagnostic() → + // phases.ts. Including prior assistant free-text causes hallucination loops + // when the previous turn was truncated or malformed. Crash recovery has its + // own path (formatCrashRecoveryBriefing) that handles lastReasoning safely + // with explicit "Last Agent Reasoning Before Interruption" framing. + // See: https://github.com/gsd-build/gsd-2/issues/2195 return parts.join("\n"); } diff --git a/src/resources/extensions/gsd/tests/retry-diagnostic-reasoning.test.ts b/src/resources/extensions/gsd/tests/retry-diagnostic-reasoning.test.ts new file mode 100644 index 000000000..b64283877 --- /dev/null +++ b/src/resources/extensions/gsd/tests/retry-diagnostic-reasoning.test.ts @@ -0,0 +1,161 @@ +/** + * Regression tests for #2195: formatTraceSummary (used by getDeepDiagnostic → + * retry prompts) must NOT include lastReasoning from prior assistant text. + * + * Including prior assistant free-text in retry diagnostics causes hallucination + * loops when the previous turn was truncated or malformed. + * + * The crash recovery path (formatCrashRecoveryBriefing) has its own safe handling + * of lastReasoning and is NOT affected by this change. + */ +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { mkdirSync, writeFileSync, rmSync, mkdtempSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { extractTrace, getDeepDiagnostic } from "../session-forensics.ts"; + +/** Build a minimal assistant text reasoning entry. */ +function makeAssistantText(text: string): unknown { + return { + type: "message", + message: { + role: "assistant", + content: [{ type: "text", text }], + }, + }; +} + +/** Build a minimal assistant tool call + tool result pair. */ +function makeToolPair( + toolName: string, + input: Record, + resultText: string, + isError: boolean, +): unknown[] { + const toolCallId = `toolu_${Math.random().toString(36).slice(2, 10)}`; + return [ + { + type: "message", + message: { + role: "assistant", + content: [ + { + type: "toolCall", + id: toolCallId, + name: toolName, + arguments: input, + }, + ], + }, + }, + { + type: "message", + message: { + role: "toolResult", + toolCallId, + toolName, + isError, + content: [{ type: "text", text: resultText }], + }, + }, + ]; +} + +describe("retry diagnostic excludes lastReasoning (#2195)", () => { + test("extractTrace still captures lastReasoning in the trace object", () => { + const entries = [ + makeAssistantText("I am going to write the summary file now"), + ...makeToolPair("write", { path: "/tmp/SUMMARY.md" }, "ok", false), + makeAssistantText("The task is complete — all files written."), + ]; + + const trace = extractTrace(entries); + // extractTrace should still collect lastReasoning for crash recovery + assert.ok(trace.lastReasoning.length > 0, + "extractTrace should still populate lastReasoning"); + assert.ok(trace.lastReasoning.includes("all files written"), + "lastReasoning should contain the last assistant text"); + }); + + test("getDeepDiagnostic output does NOT contain lastReasoning", () => { + // Create a temporary activity directory with a JSONL file + const tempBase = mkdtempSync(join(tmpdir(), "gsd-diag-test-")); + const gsdDir = join(tempBase, ".gsd"); + const activityDir = join(gsdDir, "activity"); + mkdirSync(activityDir, { recursive: true }); + + try { + // Build entries with both tool calls and assistant reasoning + const entries = [ + makeAssistantText("Let me analyze the codebase structure first"), + ...makeToolPair("bash", { command: "ls src/" }, "index.ts\nutils.ts", false), + makeAssistantText("I see the milestone/M001 branch has a significantly different ... 3. "), + ]; + + // Write JSONL activity file + const jsonl = entries.map(e => JSON.stringify(e)).join("\n"); + writeFileSync(join(activityDir, "2025-01-01T00-00-00.jsonl"), jsonl); + + const diagnostic = getDeepDiagnostic(tempBase); + + // Diagnostic should exist (we have tool calls) + assert.ok(diagnostic !== null, "diagnostic should not be null"); + + // Diagnostic should contain structured execution evidence + assert.ok(diagnostic!.includes("Tool calls completed:"), + "should include tool call count"); + assert.ok(diagnostic!.includes("ls src/"), + "should include commands run"); + + // Diagnostic must NOT contain the assistant's free-text reasoning + assert.ok(!diagnostic!.includes("Last reasoning"), + "diagnostic must not include 'Last reasoning' label"); + assert.ok(!diagnostic!.includes("analyze the codebase"), + "diagnostic must not include prior assistant text"); + assert.ok(!diagnostic!.includes("significantly different"), + "diagnostic must not include truncated assistant reasoning"); + } finally { + rmSync(tempBase, { recursive: true, force: true }); + } + }); + + test("getDeepDiagnostic still includes errors and file operations", () => { + const tempBase = mkdtempSync(join(tmpdir(), "gsd-diag-test-")); + const gsdDir = join(tempBase, ".gsd"); + const activityDir = join(gsdDir, "activity"); + mkdirSync(activityDir, { recursive: true }); + + try { + const entries = [ + makeAssistantText("Writing the plan file"), + ...makeToolPair("write", { path: "M001/S01/S01-PLAN.md" }, "ok", false), + ...makeToolPair("bash", { command: "npm run build" }, "Error: type mismatch", true), + makeAssistantText("The build failed, let me investigate"), + ]; + + const jsonl = entries.map(e => JSON.stringify(e)).join("\n"); + writeFileSync(join(activityDir, "2025-01-01T00-00-00.jsonl"), jsonl); + + const diagnostic = getDeepDiagnostic(tempBase); + assert.ok(diagnostic !== null); + + // Structured evidence should be present + assert.ok(diagnostic!.includes("S01-PLAN.md"), + "should include files written"); + assert.ok(diagnostic!.includes("npm run build"), + "should include commands run"); + assert.ok(diagnostic!.includes("type mismatch"), + "should include errors"); + + // But NOT the assistant's free-text + assert.ok(!diagnostic!.includes("Writing the plan"), + "must not include assistant reasoning"); + assert.ok(!diagnostic!.includes("build failed"), + "must not include assistant reasoning about failures"); + } finally { + rmSync(tempBase, { recursive: true, force: true }); + } + }); +});