diff --git a/src/resources/extensions/sf/auto.ts b/src/resources/extensions/sf/auto.ts index 7483beb25..9935570f9 100644 --- a/src/resources/extensions/sf/auto.ts +++ b/src/resources/extensions/sf/auto.ts @@ -65,12 +65,12 @@ import { import { DISPATCH_RULES, resolveDispatch } from "./auto-dispatch.js"; import { _resetPendingResolve, + autoLoop, type ErrorContext, isSessionSwitchInFlight, type LoopDeps, resolveAgentEnd, resolveAgentEndCancelled, - autoLoop, runUokKernelLoop, } from "./auto-loop.js"; import { @@ -132,6 +132,7 @@ import { debugLog, isDebugEnabled, writeDebugSummary } from "./debug-logger.js"; import { getPriorSliceCompletionBlocker } from "./dispatch-guard.js"; import { rebuildState, runSFDoctor } from "./doctor.js"; import { + healAutoStartupRuntime, preDispatchHealthGate, resetProactiveHealing, setLevelChangeCallback, @@ -424,7 +425,10 @@ export function getAutoDashboardData(): AutoDashboardData { const ledger = getLedger(); const totals = ledger ? getProjectTotals(ledger.units) : null; const sessionId = s.cmdCtx?.sessionManager?.getSessionId?.() ?? null; - const rtkSavings = sessionId && s.basePath ? getRtkSessionSavings(s.basePath, sessionId) : null; + const rtkSavings = + sessionId && s.basePath + ? getRtkSessionSavings(s.basePath, sessionId) + : null; const rtkEnabled = loadEffectiveSFPreferences()?.preferences.experimental?.rtk === true; // Pending capture count — lazy check, non-fatal @@ -1013,8 +1017,7 @@ export async function stopAuto( // metadata.kind rather than text matching. blocking=true when the // stop reason includes "blocked" (e.g. write-gate, guardrail block). const isBlocked = - reason !== undefined && - reason.toLowerCase().includes("block"); + reason !== undefined && reason.toLowerCase().includes("block"); const stopMeta = { kind: "terminal" as const, ...(isBlocked ? { blocking: true } : {}), @@ -1550,6 +1553,11 @@ export async function startAuto( // Escape stale worktree cwd from a previous milestone (#608). base = escapeStaleWorktree(base); + const startupFixes = healAutoStartupRuntime(base); + for (const fix of startupFixes) { + ctx.ui.notify(`Startup self-heal: ${fix}.`, "info"); + } + const freshStartAssessment = interruptedAssessment ?? (await assessInterruptedSession(base)); diff --git a/src/resources/extensions/sf/doctor-proactive.ts b/src/resources/extensions/sf/doctor-proactive.ts index 998ef33f0..c8ceb7579 100644 --- a/src/resources/extensions/sf/doctor-proactive.ts +++ b/src/resources/extensions/sf/doctor-proactive.ts @@ -14,8 +14,8 @@ * after N units, escalates to LLM-assisted heal dispatch. */ -import { existsSync } from "node:fs"; -import { join } from "node:path"; +import { existsSync, rmSync } from "node:fs"; +import { basename, dirname, join } from "node:path"; import { clearLock, isLockProcessAlive, @@ -231,6 +231,46 @@ export interface PreDispatchHealthResult { fixesApplied: string[]; } +/** + * Clear stale auto runtime locks before startup decides whether to resume. + * + * Purpose: make background/proactive healing effective for the first auto + * decision, not only after a unit is already about to dispatch. + * + * Consumer: startAuto before assessInterruptedSession reads auto.lock and + * paused-session state. + */ +export function healAutoStartupRuntime(basePath: string): string[] { + const fixesApplied: string[] = []; + + try { + const lock = readCrashLock(basePath); + if (lock && !isLockProcessAlive(lock)) { + clearLock(basePath); + fixesApplied.push("cleared stale auto.lock before auto startup"); + } + } catch { + // Non-fatal. + } + + try { + const root = sfRoot(basePath); + const lockDir = join(dirname(root), `${basename(root)}.lock`); + if (existsSync(lockDir)) { + const lock = readCrashLock(basePath); + const lockHolderAlive = lock ? isLockProcessAlive(lock) : false; + if (!lockHolderAlive) { + rmSync(lockDir, { recursive: true, force: true }); + fixesApplied.push("removed stranded session lock directory"); + } + } + } catch { + // Non-fatal. + } + + return fixesApplied; +} + /** * Lightweight pre-dispatch health check. Runs fast checks that should * block dispatch if they fail — avoids dispatching into a broken state. diff --git a/src/resources/extensions/sf/tests/integration/doctor-proactive.test.ts b/src/resources/extensions/sf/tests/integration/doctor-proactive.test.ts index 4d0d855ea..fc397b248 100644 --- a/src/resources/extensions/sf/tests/integration/doctor-proactive.test.ts +++ b/src/resources/extensions/sf/tests/integration/doctor-proactive.test.ts @@ -1,5 +1,5 @@ import assert from "node:assert/strict"; -import { describe, test } from 'vitest'; +import { describe, test } from "vitest"; /** * doctor-proactive.test.ts — Tests for proactive healing layer. @@ -28,6 +28,7 @@ import { getConsecutiveErrorUnits, getHealthHistory, getHealthTrend, + healAutoStartupRuntime, preDispatchHealthGate, recordHealthSnapshot, resetProactiveHealing, @@ -371,6 +372,34 @@ describe("doctor-proactive", async () => { ); }); + test("startup self-heal clears stale crash lock before resume assessment", () => { + const dir = realpathSync(mkdtempSync(join(tmpdir(), "doc-proactive-"))); + cleanups.push(dir); + mkdirSync(join(dir, ".sf"), { recursive: true }); + + writeFileSync( + join(dir, ".sf", "auto.lock"), + JSON.stringify({ + pid: 9999999, + startedAt: "2026-03-10T00:00:00Z", + unitType: "research-slice", + unitId: "M008/S01", + unitStartedAt: "2026-03-10T00:01:00Z", + }), + ); + + const fixes = healAutoStartupRuntime(dir); + + assert.ok( + fixes.some((f) => f.includes("cleared stale auto.lock")), + "startup self-heal reports stale auto.lock cleanup", + ); + assert.ok( + !existsSync(join(dir, ".sf", "auto.lock")), + "startup self-heal removes stale auto.lock before assessInterruptedSession", + ); + }); + test("health gate: corrupt merge state auto-healed", async () => { if (process.platform !== "win32") { {