Three fixes for the worktree isolation stuck-state bug: 1. selfHealRuntimeRecords on initial start used the function parameter `base` (main project root) instead of `basePath` (worktree path after entry). This meant stale runtime records in the worktree were never found or healed, leaving dispatched records that block auto-mode. 2. syncStateToProjectRoot now copies runtime/units/ records alongside milestone data. This provides defense-in-depth: even if selfHeal runs before worktree re-entry, stale records from a prior sync are visible. 3. initMetrics and initRoutingHistory also corrected from `base` to `basePath` — same class of bug (stale function parameter after worktree entry). Adds test verifying selfHealRuntimeRecords resolves artifacts and clears records correctly when pointed at a worktree base path.
This commit is contained in:
parent
52848d7fd2
commit
3c1a4e9109
2 changed files with 83 additions and 5 deletions
|
|
@ -196,6 +196,19 @@ function syncStateToProjectRoot(worktreePath: string, projectRoot: string, miles
|
|||
cpSync(srcMilestone, dstMilestone, { recursive: true, force: true });
|
||||
}
|
||||
} catch { /* non-fatal */ }
|
||||
|
||||
// 3. Runtime records — unit dispatch state used by selfHealRuntimeRecords().
|
||||
// Without this, a crash during a unit leaves the runtime record only in the
|
||||
// worktree. If the next session resolves basePath before worktree re-entry,
|
||||
// selfHeal can't find or clear the stale record (#769).
|
||||
try {
|
||||
const srcRuntime = join(wtGsd, "runtime", "units");
|
||||
const dstRuntime = join(prGsd, "runtime", "units");
|
||||
if (existsSync(srcRuntime)) {
|
||||
mkdirSync(dstRuntime, { recursive: true });
|
||||
cpSync(srcRuntime, dstRuntime, { recursive: true, force: true });
|
||||
}
|
||||
} catch { /* non-fatal */ }
|
||||
}
|
||||
|
||||
// ─── State ────────────────────────────────────────────────────────────────────
|
||||
|
|
@ -1125,11 +1138,12 @@ export async function startAuto(
|
|||
}
|
||||
}
|
||||
|
||||
// Initialize metrics — loads existing ledger from disk
|
||||
initMetrics(base);
|
||||
// Initialize metrics — loads existing ledger from disk.
|
||||
// Use basePath (not base) so worktree-mode reads the worktree ledger (#769).
|
||||
initMetrics(basePath);
|
||||
|
||||
// Initialize routing history for adaptive learning
|
||||
initRoutingHistory(base);
|
||||
initRoutingHistory(basePath);
|
||||
|
||||
// Capture the session's current model at auto-mode start (#650).
|
||||
// This prevents model bleed when multiple GSD instances share the
|
||||
|
|
@ -1180,8 +1194,10 @@ export async function startAuto(
|
|||
);
|
||||
}
|
||||
|
||||
// Self-heal: clear stale runtime records where artifacts already exist
|
||||
await selfHealRuntimeRecords(base, ctx, completedKeySet);
|
||||
// Self-heal: clear stale runtime records where artifacts already exist.
|
||||
// Use basePath (not base) — in worktree mode, basePath points to the worktree
|
||||
// where runtime records and artifacts actually live (#769).
|
||||
await selfHealRuntimeRecords(basePath, ctx, completedKeySet);
|
||||
|
||||
// Self-heal: remove stale .git/index.lock from prior crash.
|
||||
// A stale lock file blocks all git operations (commit, merge, checkout).
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import {
|
|||
verifyExpectedArtifact,
|
||||
diagnoseExpectedArtifact,
|
||||
buildLoopRemediationSteps,
|
||||
selfHealRuntimeRecords,
|
||||
completedKeysPath,
|
||||
persistCompletedKey,
|
||||
removePersistedKey,
|
||||
|
|
@ -460,3 +461,64 @@ test("verifyExpectedArtifact plan-slice fails for plan with no tasks (#699)", ()
|
|||
cleanup(base);
|
||||
}
|
||||
});
|
||||
|
||||
// ─── selfHealRuntimeRecords — worktree base path (#769) ──────────────────
|
||||
|
||||
test("selfHealRuntimeRecords clears stale record when artifact exists at worktree base (#769)", async () => {
|
||||
// Simulate worktree layout: the runtime record AND the artifact both live
|
||||
// under the worktree's .gsd/, not the main project root.
|
||||
const worktreeBase = makeTmpBase();
|
||||
const mainBase = makeTmpBase();
|
||||
try {
|
||||
const { writeUnitRuntimeRecord, readUnitRuntimeRecord } = await import("../unit-runtime.ts");
|
||||
|
||||
// Write a stale runtime record in the worktree .gsd/runtime/units/
|
||||
writeUnitRuntimeRecord(worktreeBase, "run-uat", "M001/S01", Date.now() - 7200_000, {
|
||||
phase: "dispatched",
|
||||
});
|
||||
|
||||
// Write the UAT result artifact in the worktree .gsd/milestones/
|
||||
const uatPath = join(worktreeBase, ".gsd", "milestones", "M001", "slices", "S01", "S01-UAT-RESULT.md");
|
||||
writeFileSync(uatPath, "---\nresult: pass\n---\n# UAT Result\nAll tests passed.\n");
|
||||
|
||||
// Verify the runtime record exists before heal
|
||||
const before = readUnitRuntimeRecord(worktreeBase, "run-uat", "M001/S01");
|
||||
assert.ok(before, "runtime record should exist before heal");
|
||||
|
||||
// Mock ExtensionContext with minimal notify
|
||||
const notifications: string[] = [];
|
||||
const mockCtx = {
|
||||
ui: { notify: (msg: string) => { notifications.push(msg); } },
|
||||
} as any;
|
||||
|
||||
// Call selfHeal with worktreeBase — this is the fix: using the worktree path
|
||||
// so both the runtime record and artifact are found
|
||||
const completedKeys = new Set<string>();
|
||||
await selfHealRuntimeRecords(worktreeBase, mockCtx, completedKeys);
|
||||
|
||||
// The stale record should be cleared
|
||||
const after = readUnitRuntimeRecord(worktreeBase, "run-uat", "M001/S01");
|
||||
assert.equal(after, null, "runtime record should be cleared after heal");
|
||||
|
||||
// The completion key should be persisted
|
||||
assert.ok(completedKeys.has("run-uat/M001/S01"), "completion key should be added");
|
||||
assert.ok(notifications.some(n => n.includes("Self-heal")), "should emit self-heal notification");
|
||||
|
||||
// Now verify that calling with mainBase does NOT find/clear anything (the old bug)
|
||||
// Write a stale record at mainBase but NO artifact there
|
||||
writeUnitRuntimeRecord(mainBase, "run-uat", "M001/S01", Date.now() - 7200_000, {
|
||||
phase: "dispatched",
|
||||
});
|
||||
const mainKeys = new Set<string>();
|
||||
await selfHealRuntimeRecords(mainBase, mockCtx, mainKeys);
|
||||
|
||||
// The record at mainBase should be cleared by the stale timeout (>1h),
|
||||
// but the completion key should NOT be set (artifact doesn't exist at mainBase)
|
||||
const afterMain = readUnitRuntimeRecord(mainBase, "run-uat", "M001/S01");
|
||||
assert.equal(afterMain, null, "stale record at main base should be cleared by timeout");
|
||||
assert.ok(!mainKeys.has("run-uat/M001/S01"), "completion key should NOT be set when artifact is missing");
|
||||
} finally {
|
||||
cleanup(worktreeBase);
|
||||
cleanup(mainBase);
|
||||
}
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue