fix: worktree artifact verification uses correct base path (#769) (#774)

Three fixes for the worktree isolation stuck-state bug: 1. selfHealRuntimeRecords on initial start used the function parameter `base` (main project root) instead of `basePath` (worktree path after entry). This meant stale runtime records in the worktree were never found or healed, leaving dispatched records that block auto-mode. 2. syncStateToProjectRoot now copies runtime/units/ records alongside milestone data. This provides defense-in-depth: even if selfHeal runs before worktree re-entry, stale records from a prior sync are visible. 3. initMetrics and initRoutingHistory also corrected from `base` to `basePath` — same class of bug (stale function parameter after worktree entry). Adds test verifying selfHealRuntimeRecords resolves artifacts and clears records correctly when pointed at a worktree base path.
2026-03-16 23:03:22 -04:00 · 2026-03-16 23:03:22 -04:00 · 3c1a4e9109
commit 3c1a4e9109
parent 52848d7fd2
2 changed files with 83 additions and 5 deletions
--- a/src/resources/extensions/gsd/auto.ts
+++ b/src/resources/extensions/gsd/auto.ts
@ -196,6 +196,19 @@ function syncStateToProjectRoot(worktreePath: string, projectRoot: string, miles
      cpSync(srcMilestone, dstMilestone, { recursive: true, force: true });
    }
  } catch { /* non-fatal */ }
+
+  // 3. Runtime records — unit dispatch state used by selfHealRuntimeRecords().
+  // Without this, a crash during a unit leaves the runtime record only in the
+  // worktree. If the next session resolves basePath before worktree re-entry,
+  // selfHeal can't find or clear the stale record (#769).
+  try {
+    const srcRuntime = join(wtGsd, "runtime", "units");
+    const dstRuntime = join(prGsd, "runtime", "units");
+    if (existsSync(srcRuntime)) {
+      mkdirSync(dstRuntime, { recursive: true });
+      cpSync(srcRuntime, dstRuntime, { recursive: true, force: true });
+    }
+  } catch { /* non-fatal */ }
 }

 // ─── State ────────────────────────────────────────────────────────────────────
@ -1125,11 +1138,12 @@ export async function startAuto(
    }
  }

-  // Initialize metrics — loads existing ledger from disk
-  initMetrics(base);
+  // Initialize metrics — loads existing ledger from disk.
+  // Use basePath (not base) so worktree-mode reads the worktree ledger (#769).
+  initMetrics(basePath);

  // Initialize routing history for adaptive learning
-  initRoutingHistory(base);
+  initRoutingHistory(basePath);

  // Capture the session's current model at auto-mode start (#650).
  // This prevents model bleed when multiple GSD instances share the
@ -1180,8 +1194,10 @@ export async function startAuto(
    );
  }

-  // Self-heal: clear stale runtime records where artifacts already exist
-  await selfHealRuntimeRecords(base, ctx, completedKeySet);
+  // Self-heal: clear stale runtime records where artifacts already exist.
+  // Use basePath (not base) — in worktree mode, basePath points to the worktree
+  // where runtime records and artifacts actually live (#769).
+  await selfHealRuntimeRecords(basePath, ctx, completedKeySet);

  // Self-heal: remove stale .git/index.lock from prior crash.
  // A stale lock file blocks all git operations (commit, merge, checkout).
--- a/src/resources/extensions/gsd/tests/auto-recovery.test.ts
+++ b/src/resources/extensions/gsd/tests/auto-recovery.test.ts
@ -10,6 +10,7 @@ import {
  verifyExpectedArtifact,
  diagnoseExpectedArtifact,
  buildLoopRemediationSteps,
+  selfHealRuntimeRecords,
  completedKeysPath,
  persistCompletedKey,
  removePersistedKey,
@ -460,3 +461,64 @@ test("verifyExpectedArtifact plan-slice fails for plan with no tasks (#699)", ()
    cleanup(base);
  }
 });
+
+// ─── selfHealRuntimeRecords — worktree base path (#769) ──────────────────
+
+test("selfHealRuntimeRecords clears stale record when artifact exists at worktree base (#769)", async () => {
+  // Simulate worktree layout: the runtime record AND the artifact both live
+  // under the worktree's .gsd/, not the main project root.
+  const worktreeBase = makeTmpBase();
+  const mainBase = makeTmpBase();
+  try {
+    const { writeUnitRuntimeRecord, readUnitRuntimeRecord } = await import("../unit-runtime.ts");
+
+    // Write a stale runtime record in the worktree .gsd/runtime/units/
+    writeUnitRuntimeRecord(worktreeBase, "run-uat", "M001/S01", Date.now() - 7200_000, {
+      phase: "dispatched",
+    });
+
+    // Write the UAT result artifact in the worktree .gsd/milestones/
+    const uatPath = join(worktreeBase, ".gsd", "milestones", "M001", "slices", "S01", "S01-UAT-RESULT.md");
+    writeFileSync(uatPath, "---\nresult: pass\n---\n# UAT Result\nAll tests passed.\n");
+
+    // Verify the runtime record exists before heal
+    const before = readUnitRuntimeRecord(worktreeBase, "run-uat", "M001/S01");
+    assert.ok(before, "runtime record should exist before heal");
+
+    // Mock ExtensionContext with minimal notify
+    const notifications: string[] = [];
+    const mockCtx = {
+      ui: { notify: (msg: string) => { notifications.push(msg); } },
+    } as any;
+
+    // Call selfHeal with worktreeBase — this is the fix: using the worktree path
+    // so both the runtime record and artifact are found
+    const completedKeys = new Set<string>();
+    await selfHealRuntimeRecords(worktreeBase, mockCtx, completedKeys);
+
+    // The stale record should be cleared
+    const after = readUnitRuntimeRecord(worktreeBase, "run-uat", "M001/S01");
+    assert.equal(after, null, "runtime record should be cleared after heal");
+
+    // The completion key should be persisted
+    assert.ok(completedKeys.has("run-uat/M001/S01"), "completion key should be added");
+    assert.ok(notifications.some(n => n.includes("Self-heal")), "should emit self-heal notification");
+
+    // Now verify that calling with mainBase does NOT find/clear anything (the old bug)
+    // Write a stale record at mainBase but NO artifact there
+    writeUnitRuntimeRecord(mainBase, "run-uat", "M001/S01", Date.now() - 7200_000, {
+      phase: "dispatched",
+    });
+    const mainKeys = new Set<string>();
+    await selfHealRuntimeRecords(mainBase, mockCtx, mainKeys);
+
+    // The record at mainBase should be cleared by the stale timeout (>1h),
+    // but the completion key should NOT be set (artifact doesn't exist at mainBase)
+    const afterMain = readUnitRuntimeRecord(mainBase, "run-uat", "M001/S01");
+    assert.equal(afterMain, null, "stale record at main base should be cleared by timeout");
+    assert.ok(!mainKeys.has("run-uat/M001/S01"), "completion key should NOT be set when artifact is missing");
+  } finally {
+    cleanup(worktreeBase);
+    cleanup(mainBase);
+  }
+});