From e8c6b5019b10fb57bba8de88b60b10f081bb6bf9 Mon Sep 17 00:00:00 2001 From: Jeremy Date: Wed, 8 Apr 2026 20:07:46 -0500 Subject: [PATCH] fix(gsd): harden auto merge recovery and session safety --- src/resources/extensions/gsd/auto-recovery.ts | 30 ++++----- .../extensions/gsd/auto/loop-deps.ts | 3 +- src/resources/extensions/gsd/auto/phases.ts | 9 ++- src/resources/extensions/gsd/auto/run-unit.ts | 16 ++++- src/resources/extensions/gsd/state.ts | 3 +- .../extensions/gsd/tests/auto-loop.test.ts | 45 ++++++++++++- .../gsd/tests/auto-recovery.test.ts | 48 ++++++++++++++ .../tests/integration/auto-recovery.test.ts | 66 +++++++++++++++---- .../gsd/tests/journal-integration.test.ts | 2 +- .../tests/migrate-writer-integration.test.ts | 7 +- 10 files changed, 188 insertions(+), 41 deletions(-) create mode 100644 src/resources/extensions/gsd/tests/auto-recovery.test.ts diff --git a/src/resources/extensions/gsd/auto-recovery.ts b/src/resources/extensions/gsd/auto-recovery.ts index 5fa4a2d1d..92086af16 100644 --- a/src/resources/extensions/gsd/auto-recovery.ts +++ b/src/resources/extensions/gsd/auto-recovery.ts @@ -13,7 +13,7 @@ import { appendEvent } from "./workflow-events.js"; import { atomicWriteSync } from "./atomic-write.js"; import { clearParseCache } from "./files.js"; import { parseRoadmap as parseLegacyRoadmap, parsePlan as parseLegacyPlan } from "./parsers-legacy.js"; -import { isDbAvailable, getTask, getSlice, getSliceTasks, updateTaskStatus, updateSliceStatus } from "./gsd-db.js"; +import { isDbAvailable, getTask, getSlice, getSliceTasks, getPendingGates, updateTaskStatus, updateSliceStatus } from "./gsd-db.js"; import { isValidationTerminal } from "./state.js"; import { getErrorMessage } from "./error-utils.js"; import { logWarning, logError } from "./workflow-logger.js"; @@ -248,8 +248,7 @@ export function verifyExpectedArtifact( if (gateIds.length === 0) return true; try { - const { getPendingGates: getPending } = require("./gsd-db.js"); - const pending = getPending(mid, sid, "slice"); + const pending = getPendingGates(mid, sid, "slice"); const pendingIds = new Set(pending.map((g: any) => g.gate_id)); // All dispatched gates must no longer be pending for (const gid of gateIds) { @@ -480,22 +479,23 @@ function abortAndResetMerge( } } +export type MergeReconcileResult = "clean" | "reconciled" | "blocked"; + /** * Detect leftover merge state from a prior session and reconcile it. * If MERGE_HEAD or SQUASH_MSG exists, check whether conflicts are resolved. - * If resolved: finalize the commit. If still conflicted: abort and reset. - * - * Returns true if state was dirty and re-derivation is needed. + * If resolved: finalize the commit. If only .gsd conflicts remain: auto-resolve. + * If code conflicts remain: fail safe without modifying the worktree. */ export function reconcileMergeState( basePath: string, ctx: ExtensionContext, -): boolean { +): MergeReconcileResult { const mergeHeadPath = join(basePath, ".git", "MERGE_HEAD"); const squashMsgPath = join(basePath, ".git", "SQUASH_MSG"); const hasMergeHead = existsSync(mergeHeadPath); const hasSquashMsg = existsSync(squashMsgPath); - if (!hasMergeHead && !hasSquashMsg) return false; + if (!hasMergeHead && !hasSquashMsg) return "clean"; const conflictedFiles = nativeConflictFiles(basePath); if (conflictedFiles.length === 0) { @@ -511,7 +511,7 @@ export function reconcileMergeState( } catch (err) { const errorMessage = getErrorMessage(err); ctx.ui.notify(`Failed to finalize leftover merge/squash commit: ${errorMessage}`, "error"); - return false; + return "blocked"; } } else { // Still conflicted — try auto-resolving .gsd/ state file conflicts (#530) @@ -551,15 +551,16 @@ export function reconcileMergeState( ); } } else { - // Code conflicts present — abort and reset - abortAndResetMerge(basePath, hasMergeHead, squashMsgPath); + // Code conflicts present — fail safe and preserve any manual resolution + // work instead of discarding it with merge --abort/reset --hard. ctx.ui.notify( - "Detected leftover merge state with unresolved conflicts — cleaned up. Re-deriving state.", - "warning", + "Detected leftover merge state with unresolved code conflicts. Auto-mode will pause without modifying the worktree so manual conflict resolution is preserved.", + "error", ); + return "blocked"; } } - return true; + return "reconciled"; } // ─── Loop Remediation ───────────────────────────────────────────────────────── @@ -618,4 +619,3 @@ export function buildLoopRemediationSteps( } return null; } - diff --git a/src/resources/extensions/gsd/auto/loop-deps.ts b/src/resources/extensions/gsd/auto/loop-deps.ts index a7678d85f..ff63d8a3e 100644 --- a/src/resources/extensions/gsd/auto/loop-deps.ts +++ b/src/resources/extensions/gsd/auto/loop-deps.ts @@ -20,6 +20,7 @@ import type { DispatchAction } from "../auto-dispatch.js"; import type { WorktreeResolver } from "../worktree-resolver.js"; import type { CmuxLogLevel } from "../../cmux/index.js"; import type { JournalEntry } from "../journal.js"; +import type { MergeReconcileResult } from "../auto-recovery.js"; /** * Dependencies injected by the caller (auto.ts startAuto) so autoLoop @@ -118,7 +119,7 @@ export interface LoopDeps { milestoneId: string, fileType: string, ) => string | null; - reconcileMergeState: (basePath: string, ctx: ExtensionContext) => boolean; + reconcileMergeState: (basePath: string, ctx: ExtensionContext) => MergeReconcileResult; // Budget/context/secrets getLedger: () => unknown; diff --git a/src/resources/extensions/gsd/auto/phases.ts b/src/resources/extensions/gsd/auto/phases.ts index 78bbd696e..f9ff192fe 100644 --- a/src/resources/extensions/gsd/auto/phases.ts +++ b/src/resources/extensions/gsd/auto/phases.ts @@ -507,7 +507,13 @@ export async function runPreDispatch( } // Mid-merge safety check - if (deps.reconcileMergeState(s.basePath, ctx)) { + const mergeReconcileResult = deps.reconcileMergeState(s.basePath, ctx); + if (mergeReconcileResult === "blocked") { + await deps.pauseAuto(ctx, pi); + debugLog("autoLoop", { phase: "exit", reason: "merge-reconciliation-blocked" }); + return { action: "break", reason: "merge-reconciliation-blocked" }; + } + if (mergeReconcileResult === "reconciled") { deps.invalidateAllCaches(); state = await deps.deriveState(s.basePath); mid = state.activeMilestone?.id; @@ -1639,4 +1645,3 @@ export async function runFinalize( return { action: "next", data: undefined as void }; } - diff --git a/src/resources/extensions/gsd/auto/run-unit.ts b/src/resources/extensions/gsd/auto/run-unit.ts index 8319e9d4b..6f1646364 100644 --- a/src/resources/extensions/gsd/auto/run-unit.ts +++ b/src/resources/extensions/gsd/auto/run-unit.ts @@ -12,6 +12,11 @@ import type { UnitResult } from "./types.js"; import { _setCurrentResolve, _setSessionSwitchInFlight } from "./resolve.js"; import { debugLog } from "../debug-logger.js"; import { logWarning, logError } from "../workflow-logger.js"; +import { resolveAutoSupervisorConfig } from "../preferences.js"; + +// Tracks the latest session-switch attempt so a late timeout settlement from an +// older runUnit() call cannot clear the guard for a newer one. +let sessionSwitchGeneration = 0; /** * Execute a single unit: create a new session, send the prompt, and await @@ -36,10 +41,13 @@ export async function runUnit( let sessionResult: { cancelled: boolean }; let sessionTimeoutHandle: ReturnType | undefined; + const mySessionSwitchGeneration = ++sessionSwitchGeneration; _setSessionSwitchInFlight(true); try { const sessionPromise = s.cmdCtx!.newSession().finally(() => { - _setSessionSwitchInFlight(false); + if (sessionSwitchGeneration === mySessionSwitchGeneration) { + _setSessionSwitchInFlight(false); + } }); const timeoutPromise = new Promise<{ cancelled: true }>((resolve) => { sessionTimeoutHandle = setTimeout( @@ -112,7 +120,11 @@ export async function runUnit( // If supervision fails to resolve unitPromise within 30s, treat as cancelled. // Without this, a crashed agent that never emits agent_end hangs the loop (#3161). debugLog("runUnit", { phase: "awaiting-agent-end", unitType, unitId }); - const UNIT_HARD_TIMEOUT_MS = 30_000; + const supervisor = resolveAutoSupervisorConfig(); + const UNIT_HARD_TIMEOUT_MS = Math.max( + 30_000, + ((supervisor.hard_timeout_minutes ?? 30) * 60 * 1000) + 30_000, + ); let unitTimeoutHandle: ReturnType | undefined; const timeoutResult = new Promise((resolve) => { unitTimeoutHandle = setTimeout(() => { diff --git a/src/resources/extensions/gsd/state.ts b/src/resources/extensions/gsd/state.ts index 87f553c5a..f6170522b 100644 --- a/src/resources/extensions/gsd/state.ts +++ b/src/resources/extensions/gsd/state.ts @@ -1317,7 +1317,8 @@ export async function _deriveStateImpl(basePath: string): Promise { ? `All milestones complete. ${activeReqs} active requirement${activeReqs === 1 ? '' : 's'} in REQUIREMENTS.md ${activeReqs === 1 ? 'has' : 'have'} not been mapped to a milestone.` : 'All milestones complete.'; return { - activeMilestone: lastEntry ? { id: lastEntry.id, title: lastEntry.title } : null, + activeMilestone: null, + lastCompletedMilestone: lastEntry ? { id: lastEntry.id, title: lastEntry.title } : null, activeSlice: null, activeTask: null, phase: 'complete', diff --git a/src/resources/extensions/gsd/tests/auto-loop.test.ts b/src/resources/extensions/gsd/tests/auto-loop.test.ts index 32a7c261b..4d61da311 100644 --- a/src/resources/extensions/gsd/tests/auto-loop.test.ts +++ b/src/resources/extensions/gsd/tests/auto-loop.test.ts @@ -1,4 +1,4 @@ -import test from "node:test"; +import test, { mock } from "node:test"; import assert from "node:assert/strict"; import { readFileSync } from "node:fs"; import { resolve } from "node:path"; @@ -191,6 +191,47 @@ test("runUnit returns cancelled when session creation times out", async () => { assert.equal(pi.calls.length, 0); }); +test("runUnit keeps the session-switch guard across a late newSession settlement", async () => { + _resetPendingResolve(); + mock.timers.enable(); + + try { + const ctx = makeMockCtx(); + const pi = makeMockPi(); + const firstSession = makeMockSession({ newSessionDelayMs: 60_000 }); + const secondSession = makeMockSession({ newSessionDelayMs: 60_000 }); + + const firstRun = runUnit(ctx, pi, firstSession, "task", "T01", "prompt"); + + mock.timers.tick(30_000); + await Promise.resolve(); + + const firstResult = await firstRun; + assert.equal(firstResult.status, "cancelled"); + assert.equal(isSessionSwitchInFlight(), true, "guard should remain set after the timed-out session"); + + mock.timers.tick(1); + const secondRun = runUnit(ctx, pi, secondSession, "task", "T02", "prompt"); + + mock.timers.tick(29_999); + await Promise.resolve(); + assert.equal( + isSessionSwitchInFlight(), + true, + "late settlement from the first session must not clear the newer session guard", + ); + + mock.timers.tick(30_001); + await Promise.resolve(); + + const secondResult = await secondRun; + assert.equal(secondResult.status, "cancelled"); + assert.equal(isSessionSwitchInFlight(), false, "guard should clear after the newer session settles"); + } finally { + mock.timers.reset(); + } +}); + test("runUnit returns cancelled when s.active is false before sendMessage", async () => { _resetPendingResolve(); @@ -412,7 +453,7 @@ function makeMockDeps( getCurrentBranch: () => "main", autoWorktreeBranch: () => "auto/M001", resolveMilestoneFile: () => null, - reconcileMergeState: () => false, + reconcileMergeState: () => "clean", getLedger: () => null, getProjectTotals: () => ({ cost: 0 }), formatCost: (c: number) => `$${c.toFixed(2)}`, diff --git a/src/resources/extensions/gsd/tests/auto-recovery.test.ts b/src/resources/extensions/gsd/tests/auto-recovery.test.ts new file mode 100644 index 000000000..2f24f2232 --- /dev/null +++ b/src/resources/extensions/gsd/tests/auto-recovery.test.ts @@ -0,0 +1,48 @@ +import test, { afterEach } from "node:test"; +import assert from "node:assert/strict"; +import { mkdtempSync, mkdirSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +import { verifyExpectedArtifact } from "../auto-recovery.ts"; +import { openDatabase, closeDatabase, insertMilestone, insertSlice, insertGateRow } from "../gsd-db.ts"; + +const tmpDirs: string[] = []; + +function makeTmpProject(): string { + const dir = mkdtempSync(join(tmpdir(), "auto-recovery-")); + mkdirSync(join(dir, ".gsd"), { recursive: true }); + openDatabase(join(dir, ".gsd", "gsd.db")); + insertMilestone({ id: "M001", title: "Test Milestone", status: "active" }); + insertSlice({ + milestoneId: "M001", + id: "S01", + title: "Test Slice", + status: "pending", + risk: "low", + depends: [], + }); + insertGateRow({ milestoneId: "M001", sliceId: "S01", gateId: "Q3", scope: "slice" }); + tmpDirs.push(dir); + return dir; +} + +afterEach(() => { + closeDatabase(); + for (const dir of tmpDirs) { + try { + rmSync(dir, { recursive: true, force: true }); + } catch { + // Best-effort cleanup only. + } + } + tmpDirs.length = 0; +}); + +test("verifyExpectedArtifact checks pending gate-evaluate artifacts without ESM require failures", () => { + const base = makeTmpProject(); + + const verified = verifyExpectedArtifact("gate-evaluate", "M001/S01/gates+Q3", base); + + assert.equal(verified, false, "pending gates should keep gate-evaluate unverified"); +}); diff --git a/src/resources/extensions/gsd/tests/integration/auto-recovery.test.ts b/src/resources/extensions/gsd/tests/integration/auto-recovery.test.ts index 610f4d442..efff084bd 100644 --- a/src/resources/extensions/gsd/tests/integration/auto-recovery.test.ts +++ b/src/resources/extensions/gsd/tests/integration/auto-recovery.test.ts @@ -1,9 +1,10 @@ import test from "node:test"; import assert from "node:assert/strict"; -import { mkdirSync, writeFileSync, existsSync, readFileSync, rmSync } from "node:fs"; +import { mkdirSync, writeFileSync, existsSync, readFileSync, rmSync, chmodSync } from "node:fs"; import { join } from "node:path"; import { tmpdir } from "node:os"; import { randomUUID } from "node:crypto"; +import { execFileSync } from "node:child_process"; import { resolveExpectedArtifactPath, @@ -11,6 +12,7 @@ import { diagnoseExpectedArtifact, buildLoopRemediationSteps, hasImplementationArtifacts, + reconcileMergeState, } from "../../auto-recovery.ts"; import { parseRoadmap, parsePlan } from "../../parsers-legacy.ts"; import { parseTaskPlanFile, clearParseCache } from "../../files.ts"; @@ -669,8 +671,6 @@ test("#793: invalidateAllCaches clears all caches so deriveState sees fresh disk // ─── hasImplementationArtifacts (#1703) ─────────────────────────────────── -import { execFileSync } from "node:child_process"; - function makeGitBase(): string { const base = join(tmpdir(), `gsd-test-git-${randomUUID()}`); mkdirSync(base, { recursive: true }); @@ -745,9 +745,6 @@ test("verifyExpectedArtifact complete-milestone fails with only .gsd/ files (#17 // ─── reconcileMergeState: silent nativeCommit failure (#2542) ───────────── -import { reconcileMergeState } from "../../auto-recovery.ts"; -import { chmodSync } from "node:fs"; - function makeMockCtx(): { ctx: any; notifications: Array<{ msg: string; level: string }> } { const notifications: Array<{ msg: string; level: string }> = []; const ctx = { @@ -760,7 +757,7 @@ function makeMockCtx(): { ctx: any; notifications: Array<{ msg: string; level: s return { ctx, notifications }; } -test("reconcileMergeState returns false and notifies error when nativeCommit fails (#2542)", (t) => { +test("reconcileMergeState returns blocked and notifies error when nativeCommit fails (#2542)", (t) => { const base = makeGitBase(); t.after(() => cleanup(base)); @@ -786,9 +783,7 @@ test("reconcileMergeState returns false and notifies error when nativeCommit fai const { ctx, notifications } = makeMockCtx(); const result = reconcileMergeState(base, ctx); - // The function should return false to signal reconciliation failure - // (Currently it silently swallows the error and returns true — this test should FAIL before the fix) - assert.equal(result, false, "reconcileMergeState should return false when nativeCommit fails"); + assert.equal(result, "blocked", "reconcileMergeState should return blocked when nativeCommit fails"); const errorNotifications = notifications.filter(n => n.level === "error"); assert.ok(errorNotifications.length > 0, "should notify an error when nativeCommit fails"); assert.ok( @@ -797,18 +792,63 @@ test("reconcileMergeState returns false and notifies error when nativeCommit fai ); }); -test("reconcileMergeState returns true when no merge state present", (t) => { - // When there's no MERGE_HEAD or SQUASH_MSG, reconcileMergeState returns false (no dirty state) +test("reconcileMergeState returns clean when no merge state present", (t) => { const base = makeGitBase(); t.after(() => cleanup(base)); const { ctx, notifications } = makeMockCtx(); const result = reconcileMergeState(base, ctx); - assert.equal(result, false, "should return false when no merge state exists"); + assert.equal(result, "clean", "should return clean when no merge state exists"); assert.equal(notifications.length, 0, "should not notify when no merge state present"); }); +test("reconcileMergeState blocks and preserves unresolved code conflicts", (t) => { + const base = makeGitBase(); + t.after(() => cleanup(base)); + + writeFileSync(join(base, "conflict.txt"), "base\n"); + execFileSync("git", ["add", "conflict.txt"], { cwd: base, stdio: "ignore" }); + execFileSync("git", ["commit", "-m", "add conflict base"], { cwd: base, stdio: "ignore" }); + + execFileSync("git", ["checkout", "-b", "feature"], { cwd: base, stdio: "ignore" }); + writeFileSync(join(base, "conflict.txt"), "feature\n"); + execFileSync("git", ["add", "conflict.txt"], { cwd: base, stdio: "ignore" }); + execFileSync("git", ["commit", "-m", "feature change"], { cwd: base, stdio: "ignore" }); + + execFileSync("git", ["checkout", "main"], { cwd: base, stdio: "ignore" }); + writeFileSync(join(base, "conflict.txt"), "main\n"); + execFileSync("git", ["add", "conflict.txt"], { cwd: base, stdio: "ignore" }); + execFileSync("git", ["commit", "-m", "main change"], { cwd: base, stdio: "ignore" }); + + let mergeFailed = false; + try { + execFileSync("git", ["merge", "--no-ff", "feature"], { cwd: base, stdio: "ignore" }); + } catch { + mergeFailed = true; + } + assert.equal(mergeFailed, true, "merge should produce a conflict"); + assert.ok(existsSync(join(base, ".git", "MERGE_HEAD")), "MERGE_HEAD should remain present before reconcile"); + + const beforeContents = readFileSync(join(base, "conflict.txt"), "utf8"); + assert.match(beforeContents, /<<<<<<<|=======|>>>>>>>/, "fixture should contain conflict markers"); + + const { ctx, notifications } = makeMockCtx(); + const result = reconcileMergeState(base, ctx); + + assert.equal(result, "blocked", "code conflicts should block reconciliation"); + assert.ok(existsSync(join(base, ".git", "MERGE_HEAD")), "MERGE_HEAD should be preserved for manual resolution"); + assert.equal( + readFileSync(join(base, "conflict.txt"), "utf8"), + beforeContents, + "reconcile should preserve the conflicted file contents", + ); + assert.ok( + notifications.some((n) => n.level === "error" && n.msg.includes("manual conflict resolution is preserved")), + "should notify that auto-mode paused and preserved manual work", + ); +}); + test("verifyExpectedArtifact complete-milestone passes with impl files (#1703)", (t) => { const base = makeGitBase(); t.after(() => cleanup(base)); diff --git a/src/resources/extensions/gsd/tests/journal-integration.test.ts b/src/resources/extensions/gsd/tests/journal-integration.test.ts index 4725af341..c05c6b3fc 100644 --- a/src/resources/extensions/gsd/tests/journal-integration.test.ts +++ b/src/resources/extensions/gsd/tests/journal-integration.test.ts @@ -72,7 +72,7 @@ function makeMockDeps( getCurrentBranch: () => "main", autoWorktreeBranch: () => "auto/M001", resolveMilestoneFile: () => null, - reconcileMergeState: () => false, + reconcileMergeState: () => "clean", getLedger: () => ({ units: [] }), getProjectTotals: () => ({ cost: 0 }), formatCost: (c: number) => `$${c.toFixed(2)}`, diff --git a/src/resources/extensions/gsd/tests/migrate-writer-integration.test.ts b/src/resources/extensions/gsd/tests/migrate-writer-integration.test.ts index 8fa3d98d0..71be7d850 100644 --- a/src/resources/extensions/gsd/tests/migrate-writer-integration.test.ts +++ b/src/resources/extensions/gsd/tests/migrate-writer-integration.test.ts @@ -273,9 +273,9 @@ test('Scenario 2: Fully complete project — deriveState phase', async () => { invalidateAllCaches(); const state = await deriveState(base); assert.deepStrictEqual(state.phase, 'complete', 'complete: deriveState phase is complete (validation + summary written by migration)'); - // When all milestones are complete, activeMilestone points to the last entry (for display) - assert.ok(state.activeMilestone !== null, 'complete: deriveState has activeMilestone (last entry)'); - assert.deepStrictEqual(state.activeMilestone!.id, 'M001', 'complete: deriveState activeMilestone is M001'); + assert.equal(state.activeMilestone, null, 'complete: deriveState has no activeMilestone'); + assert.ok(state.lastCompletedMilestone !== null, 'complete: deriveState exposes lastCompletedMilestone'); + assert.deepStrictEqual(state.lastCompletedMilestone!.id, 'M001', 'complete: deriveState lastCompletedMilestone is M001'); // generatePreview for complete project const preview = generatePreview(project); @@ -292,4 +292,3 @@ test('Scenario 2: Fully complete project — deriveState phase', async () => { rmSync(base, { recursive: true, force: true }); } }); -