fix: worktree artifact verification uses correct base path (#769) (#774)

Three fixes for the worktree isolation stuck-state bug:

1. selfHealRuntimeRecords on initial start used the function parameter
   `base` (main project root) instead of `basePath` (worktree path after
   entry). This meant stale runtime records in the worktree were never
   found or healed, leaving dispatched records that block auto-mode.

2. syncStateToProjectRoot now copies runtime/units/ records alongside
   milestone data. This provides defense-in-depth: even if selfHeal runs
   before worktree re-entry, stale records from a prior sync are visible.

3. initMetrics and initRoutingHistory also corrected from `base` to
   `basePath` — same class of bug (stale function parameter after
   worktree entry).

Adds test verifying selfHealRuntimeRecords resolves artifacts and clears
records correctly when pointed at a worktree base path.
This commit is contained in:
Tom Boucher 2026-03-16 23:03:22 -04:00 committed by GitHub
parent 52848d7fd2
commit 3c1a4e9109
2 changed files with 83 additions and 5 deletions

View file

@ -196,6 +196,19 @@ function syncStateToProjectRoot(worktreePath: string, projectRoot: string, miles
cpSync(srcMilestone, dstMilestone, { recursive: true, force: true });
}
} catch { /* non-fatal */ }
// 3. Runtime records — unit dispatch state used by selfHealRuntimeRecords().
// Without this, a crash during a unit leaves the runtime record only in the
// worktree. If the next session resolves basePath before worktree re-entry,
// selfHeal can't find or clear the stale record (#769).
try {
const srcRuntime = join(wtGsd, "runtime", "units");
const dstRuntime = join(prGsd, "runtime", "units");
if (existsSync(srcRuntime)) {
mkdirSync(dstRuntime, { recursive: true });
cpSync(srcRuntime, dstRuntime, { recursive: true, force: true });
}
} catch { /* non-fatal */ }
}
// ─── State ────────────────────────────────────────────────────────────────────
@ -1125,11 +1138,12 @@ export async function startAuto(
}
}
// Initialize metrics — loads existing ledger from disk
initMetrics(base);
// Initialize metrics — loads existing ledger from disk.
// Use basePath (not base) so worktree-mode reads the worktree ledger (#769).
initMetrics(basePath);
// Initialize routing history for adaptive learning
initRoutingHistory(base);
initRoutingHistory(basePath);
// Capture the session's current model at auto-mode start (#650).
// This prevents model bleed when multiple GSD instances share the
@ -1180,8 +1194,10 @@ export async function startAuto(
);
}
// Self-heal: clear stale runtime records where artifacts already exist
await selfHealRuntimeRecords(base, ctx, completedKeySet);
// Self-heal: clear stale runtime records where artifacts already exist.
// Use basePath (not base) — in worktree mode, basePath points to the worktree
// where runtime records and artifacts actually live (#769).
await selfHealRuntimeRecords(basePath, ctx, completedKeySet);
// Self-heal: remove stale .git/index.lock from prior crash.
// A stale lock file blocks all git operations (commit, merge, checkout).

View file

@ -10,6 +10,7 @@ import {
verifyExpectedArtifact,
diagnoseExpectedArtifact,
buildLoopRemediationSteps,
selfHealRuntimeRecords,
completedKeysPath,
persistCompletedKey,
removePersistedKey,
@ -460,3 +461,64 @@ test("verifyExpectedArtifact plan-slice fails for plan with no tasks (#699)", ()
cleanup(base);
}
});
// ─── selfHealRuntimeRecords — worktree base path (#769) ──────────────────
test("selfHealRuntimeRecords clears stale record when artifact exists at worktree base (#769)", async () => {
// Simulate worktree layout: the runtime record AND the artifact both live
// under the worktree's .gsd/, not the main project root.
const worktreeBase = makeTmpBase();
const mainBase = makeTmpBase();
try {
const { writeUnitRuntimeRecord, readUnitRuntimeRecord } = await import("../unit-runtime.ts");
// Write a stale runtime record in the worktree .gsd/runtime/units/
writeUnitRuntimeRecord(worktreeBase, "run-uat", "M001/S01", Date.now() - 7200_000, {
phase: "dispatched",
});
// Write the UAT result artifact in the worktree .gsd/milestones/
const uatPath = join(worktreeBase, ".gsd", "milestones", "M001", "slices", "S01", "S01-UAT-RESULT.md");
writeFileSync(uatPath, "---\nresult: pass\n---\n# UAT Result\nAll tests passed.\n");
// Verify the runtime record exists before heal
const before = readUnitRuntimeRecord(worktreeBase, "run-uat", "M001/S01");
assert.ok(before, "runtime record should exist before heal");
// Mock ExtensionContext with minimal notify
const notifications: string[] = [];
const mockCtx = {
ui: { notify: (msg: string) => { notifications.push(msg); } },
} as any;
// Call selfHeal with worktreeBase — this is the fix: using the worktree path
// so both the runtime record and artifact are found
const completedKeys = new Set<string>();
await selfHealRuntimeRecords(worktreeBase, mockCtx, completedKeys);
// The stale record should be cleared
const after = readUnitRuntimeRecord(worktreeBase, "run-uat", "M001/S01");
assert.equal(after, null, "runtime record should be cleared after heal");
// The completion key should be persisted
assert.ok(completedKeys.has("run-uat/M001/S01"), "completion key should be added");
assert.ok(notifications.some(n => n.includes("Self-heal")), "should emit self-heal notification");
// Now verify that calling with mainBase does NOT find/clear anything (the old bug)
// Write a stale record at mainBase but NO artifact there
writeUnitRuntimeRecord(mainBase, "run-uat", "M001/S01", Date.now() - 7200_000, {
phase: "dispatched",
});
const mainKeys = new Set<string>();
await selfHealRuntimeRecords(mainBase, mockCtx, mainKeys);
// The record at mainBase should be cleared by the stale timeout (>1h),
// but the completion key should NOT be set (artifact doesn't exist at mainBase)
const afterMain = readUnitRuntimeRecord(mainBase, "run-uat", "M001/S01");
assert.equal(afterMain, null, "stale record at main base should be cleared by timeout");
assert.ok(!mainKeys.has("run-uat/M001/S01"), "completion key should NOT be set when artifact is missing");
} finally {
cleanup(worktreeBase);
cleanup(mainBase);
}
});