diff --git a/src/cli-status.ts b/src/cli-status.ts index 6eb5c10d4..f91aaab29 100644 --- a/src/cli-status.ts +++ b/src/cli-status.ts @@ -56,6 +56,36 @@ function formatCost(snapshot: QuerySnapshot): string { return `$${total.toFixed(4)}${workers > 0 ? ` (${workers} worker${workers === 1 ? "" : "s"})` : ""}`; } +function readSolverStatus(basePath: string): string | null { + let state: Record; + try { + state = JSON.parse( + readFileSync( + join(basePath, ".sf", "runtime", "autonomous-solver", "active.json"), + "utf-8", + ), + ); + } catch { + return null; + } + const checkpoint = state.latestCheckpoint ?? {}; + const parts = [ + `${state.unitType ?? "unit"} ${state.unitId ?? "n/a"}`, + `iter ${state.iteration ?? "?"}/${state.maxIterations ?? "?"}`, + `outcome ${checkpoint.outcome ?? "none"}`, + ]; + const remaining = Array.isArray(checkpoint.remainingItems) + ? checkpoint.remainingItems.length + : null; + if (remaining !== null) parts.push(`${remaining} remaining`); + if (checkpoint.blockerReason) + parts.push(`blocker: ${checkpoint.blockerReason}`); + if (checkpoint.decisionQuestion) + parts.push(`decision: ${checkpoint.decisionQuestion}`); + if (checkpoint.summary) parts.push(String(checkpoint.summary)); + return parts.join(" · "); +} + function latestJsonlFile(dir: string): string | null { try { const entries = readdirSync(dir) @@ -135,7 +165,11 @@ function formatModel(model: CurrentModel | null): string { export function renderLiveStatus( snapshot: QuerySnapshot, - opts: { model: CurrentModel | null; recentEvents: string[] }, + opts: { + basePath?: string; + model: CurrentModel | null; + recentEvents: string[]; + }, ): string { const lines: string[] = []; lines.push("SF Status"); @@ -149,6 +183,8 @@ export function renderLiveStatus( lines.push(`Dispatch: ${formatDispatch(snapshot)}`); lines.push(`Cost: ${formatCost(snapshot)}`); lines.push(`Model: ${formatModel(opts.model)}`); + const solverStatus = opts.basePath ? readSolverStatus(opts.basePath) : null; + if (solverStatus) lines.push(`Solver: ${solverStatus}`); lines.push(""); lines.push("Last Events:"); if (opts.recentEvents.length === 0) { @@ -177,6 +213,7 @@ async function buildStatusText( .map(formatMergedLogEvent); return renderLiveStatus(snapshot, { + basePath, model: getCurrentModel(basePath, sfHome), recentEvents, }); diff --git a/src/resource-loader.ts b/src/resource-loader.ts index e26e0bc62..b164bf40b 100644 --- a/src/resource-loader.ts +++ b/src/resource-loader.ts @@ -237,6 +237,100 @@ export function getNewerManagedResourceVersion( : null; } +const RESOURCE_SYNC_LOCK_STALE_MS = 120_000; +const RESOURCE_SYNC_LOCK_TIMEOUT_MS = 60_000; +const RESOURCE_SYNC_LOCK_POLL_MS = 100; + +function sleepSync(ms: number): void { + const signal = new Int32Array(new SharedArrayBuffer(4)); + Atomics.wait(signal, 0, 0, ms); +} + +function readLockPid(lockDir: string): number | null { + try { + const raw = readFileSync(join(lockDir, "owner"), "utf-8").trim(); + const pid = Number.parseInt(raw, 10); + return Number.isFinite(pid) && pid > 0 ? pid : null; + } catch { + return null; + } +} + +function isProcessAlive(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +} + +function removeStaleResourceSyncLock(lockDir: string): boolean { + if (!existsSync(lockDir)) return true; + + const pid = readLockPid(lockDir); + let oldEnough = false; + try { + oldEnough = + Date.now() - statSync(lockDir).mtimeMs > RESOURCE_SYNC_LOCK_STALE_MS; + } catch { + oldEnough = true; + } + const ownerDead = pid !== null && !isProcessAlive(pid); + if (!oldEnough && !ownerDead) return false; + + try { + rmSync(lockDir, { recursive: true, force: true, maxRetries: 3 }); + return true; + } catch { + return false; + } +} + +/** + * Serialise writes to the shared managed resource tree. + * + * Purpose: prevent parallel SF launches from deleting/copying the same + * ~/.sf/agent/extensions subtree concurrently, which can surface as transient + * ENOTEMPTY/ENOENT failures during startup. + * + * Consumer: initResources before it prunes and copies bundled resources. + */ +export function withResourceSyncLock( + agentDir: string, + work: () => T, + timeoutMs = RESOURCE_SYNC_LOCK_TIMEOUT_MS, +): T { + const lockDir = join(agentDir, ".resource-sync.lock"); + const startedAt = Date.now(); + + while (true) { + try { + mkdirSync(lockDir); + writeFileSync(join(lockDir, "owner"), `${process.pid}\n`); + break; + } catch { + removeStaleResourceSyncLock(lockDir); + if (Date.now() - startedAt > timeoutMs) { + throw new Error( + `Timed out waiting for SF resource sync lock: ${lockDir}`, + ); + } + sleepSync(RESOURCE_SYNC_LOCK_POLL_MS); + } + } + + try { + return work(); + } finally { + try { + rmSync(lockDir, { recursive: true, force: true, maxRetries: 3 }); + } catch { + /* non-fatal: stale-lock cleanup on next launch handles this */ + } + } +} + /** * Recursively makes all files and directories under dirPath owner-writable. * @@ -709,7 +803,10 @@ function pruneRemovedBundledExtensions( */ export function initResources(agentDir: string): void { mkdirSync(agentDir, { recursive: true }); + withResourceSyncLock(agentDir, () => initResourcesUnlocked(agentDir)); +} +function initResourcesUnlocked(agentDir: string): void { const currentVersion = getBundledSfVersion(); const manifest = readManagedResourceManifest(agentDir); const extensionsDir = join(agentDir, "extensions"); diff --git a/src/resources/extensions/sf/auto-dashboard.js b/src/resources/extensions/sf/auto-dashboard.js index 7275453c2..32faf32b4 100644 --- a/src/resources/extensions/sf/auto-dashboard.js +++ b/src/resources/extensions/sf/auto-dashboard.js @@ -14,6 +14,7 @@ import { getRtkSessionSavings, } from "../shared/rtk-session-stats.js"; import { makeUI } from "../shared/tui.js"; +import { readAutonomousSolverState } from "./autonomous-solver.js"; import { getErrorMessage } from "./error-utils.js"; import { getLedger, getProjectTotals } from "./metrics.js"; import { getActiveHook } from "./post-unit-hooks.js"; @@ -145,6 +146,25 @@ function _peekNext(unitType, state) { return ""; } } + +function formatSolverWidgetLine(basePath, theme, width, pad) { + const solver = readAutonomousSolverState(basePath); + if (!solver?.unitType || !solver?.unitId) return null; + const checkpoint = solver.latestCheckpoint ?? {}; + const remaining = Array.isArray(checkpoint.remainingItems) + ? checkpoint.remainingItems.length + : 0; + const issue = checkpoint.blockerReason || checkpoint.decisionQuestion || ""; + const text = [ + `solver ${solver.iteration ?? "?"}/${solver.maxIterations ?? "?"}`, + checkpoint.outcome ? `outcome ${checkpoint.outcome}` : "outcome none", + `${remaining} remaining`, + issue ? String(issue) : "", + ] + .filter(Boolean) + .join(" · "); + return truncateToWidth(`${pad}${theme.fg("dim", text)}`, width, "…"); +} /** * Describe what the next unit will be, based on current state. */ @@ -727,6 +747,13 @@ export function updateProgressWidget( lines.push( rightAlign(actionLeft, theme.fg("dim", phaseLabel), width), ); + const solverLine = formatSolverWidgetLine( + accessors.getBasePath(), + theme, + width, + pad, + ); + if (solverLine) lines.push(solverLine); // Progress bar const roadmapSlices = mid ? getRoadmapSlicesSync() : null; if (roadmapSlices) { @@ -814,6 +841,13 @@ export function updateProgressWidget( const tierTag = tierBadge ? theme.fg("dim", `[${tierBadge}] `) : ""; const phaseBadge = `${tierTag}${theme.fg("dim", phaseLabel)}`; lines.push(rightAlign(actionLeft, phaseBadge, width)); + const solverLine = formatSolverWidgetLine( + accessors.getBasePath(), + theme, + width, + pad, + ); + if (solverLine) lines.push(solverLine); lines.push(""); // Two-column body const minTwoColWidth = 76; diff --git a/src/resources/extensions/sf/auto-loop.js b/src/resources/extensions/sf/auto-loop.js index aac7aaa9a..7854900f7 100644 --- a/src/resources/extensions/sf/auto-loop.js +++ b/src/resources/extensions/sf/auto-loop.js @@ -13,7 +13,6 @@ export { } from "./auto/infra-errors.js"; export { autoLoop, - runStandardAutoLoop, runUokKernelLoop, } from "./auto/loop.js"; export { diff --git a/src/resources/extensions/sf/auto.js b/src/resources/extensions/sf/auto.js index bdd1a74dd..2486bb5d1 100644 --- a/src/resources/extensions/sf/auto.js +++ b/src/resources/extensions/sf/auto.js @@ -55,7 +55,6 @@ import { import { DISPATCH_RULES, resolveDispatch } from "./auto-dispatch.js"; import { _resetPendingResolve, - autoLoop, isSessionSwitchInFlight, resolveAgentEnd, resolveAgentEndCancelled, @@ -336,8 +335,7 @@ export { /** Wrapper: register SIGTERM handler and store reference. */ function registerSigtermHandler(currentBasePath) { const prefs = loadEffectiveSFPreferences()?.preferences; - const flags = resolveUokFlags(prefs); - const pathLabel = flags.enabled ? "uok-kernel" : "standard-loop"; + const flags = { ...resolveUokFlags(prefs), enabled: true }; const onSignal = () => { // Write UOK parity exit heartbeat before process.exit(0) bypasses // the finally block in runAutoLoopWithUok. Fixes the enter/exit @@ -346,7 +344,7 @@ function registerSigtermHandler(currentBasePath) { ts: new Date().toISOString(), ...(s.currentUokRunId ? { runId: s.currentUokRunId } : {}), sessionId: s.cmdCtx?.sessionManager?.getSessionId?.(), - path: pathLabel, + path: "uok-kernel", flags: { ...flags }, phase: "exit", status: "signal", @@ -1734,7 +1732,6 @@ export async function startAuto(ctx, pi, base, verboseMode, options) { s, deps: buildLoopDeps(), runKernelLoop: runUokKernelLoop, - runStandardLoop: autoLoop, }); cleanupAfterLoopExit(ctx); return; @@ -1783,7 +1780,6 @@ export async function startAuto(ctx, pi, base, verboseMode, options) { s, deps: buildLoopDeps(), runKernelLoop: runUokKernelLoop, - runStandardLoop: autoLoop, }); cleanupAfterLoopExit(ctx); } diff --git a/src/resources/extensions/sf/auto/loop.js b/src/resources/extensions/sf/auto/loop.js index abd0dabb8..c75908a36 100644 --- a/src/resources/extensions/sf/auto/loop.js +++ b/src/resources/extensions/sf/auto/loop.js @@ -248,16 +248,7 @@ function resolveDispatchNodeKind(unitType, sidecarItem) { } return "unit"; } -async function runUnitPhaseViaContract( - dispatchContract, - ic, - iterData, - loopState, - sidecarItem, -) { - if (dispatchContract === "standard-direct") { - return runUnitPhase(ic, iterData, loopState, sidecarItem); - } +async function runUnitPhaseViaContract(ic, iterData, loopState, sidecarItem) { const scheduler = new ExecutionGraphScheduler(); let outcome = null; const executeNode = async () => { @@ -307,8 +298,7 @@ async function enforceMinRequestInterval(s, prefs) { * This is the linear replacement for the recursive * dispatchNextUnit → handleAgentEnd → dispatchNextUnit chain. */ -export async function autoLoop(ctx, pi, s, deps, options) { - const dispatchContract = options?.dispatchContract ?? "standard-direct"; +export async function autoLoop(ctx, pi, s, deps) { debugLog("autoLoop", { phase: "enter" }); let iteration = 0; // Load persisted stuck state so counters survive session restarts (#3704) @@ -571,7 +561,6 @@ export async function autoLoop(ctx, pi, s, deps, options) { // ── Unit execution (shared with dev path) ── await enforceMinRequestInterval(s, ic.prefs); const unitPhaseResult = await runUnitPhaseViaContract( - dispatchContract, ic, iterData, loopState, @@ -590,6 +579,10 @@ export async function autoLoop(ctx, pi, s, deps, options) { finishTurn("stopped", "execution", "unit-break"); break; } + if (unitPhaseResult.action === "continue") { + finishTurn("retry"); + continue; + } // ── Verify first, then reconcile (only mark complete on pass) ── debugLog("autoLoop", { phase: "custom-engine-verify", @@ -875,7 +868,6 @@ export async function autoLoop(ctx, pi, s, deps, options) { } await enforceMinRequestInterval(s, ic.prefs); const unitPhaseResult = await runUnitPhaseViaContract( - dispatchContract, ic, iterData, loopState, @@ -895,6 +887,10 @@ export async function autoLoop(ctx, pi, s, deps, options) { finishTurn("stopped", "execution", "unit-break"); break; } + if (unitPhaseResult.action === "continue") { + finishTurn("retry"); + continue; + } // ── Phase 5: Finalize ─────────────────────────────────────────────── const finalizeResult = await withPhaseTimeout( "finalize", @@ -1117,8 +1113,5 @@ export async function autoLoop(ctx, pi, s, deps, options) { } // ── Dispatch-contract entry points ─────────────────────────────────────── export async function runUokKernelLoop(ctx, pi, s, deps) { - return autoLoop(ctx, pi, s, deps, { dispatchContract: "uok-scheduler" }); -} -export async function runStandardAutoLoop(ctx, pi, s, deps) { - return autoLoop(ctx, pi, s, deps, { dispatchContract: "standard-direct" }); + return autoLoop(ctx, pi, s, deps); } diff --git a/src/resources/extensions/sf/auto/phases.js b/src/resources/extensions/sf/auto/phases.js index 9931165d5..77e14ad8b 100644 --- a/src/resources/extensions/sf/auto/phases.js +++ b/src/resources/extensions/sf/auto/phases.js @@ -34,6 +34,16 @@ import { formatToolCallSummary, resetToolCallCounts, } from "../auto-tool-tracking.js"; +import { + assessAutonomousSolverTurn, + beginAutonomousSolverIteration, + buildAutonomousSolverMissingCheckpointRepairPrompt, + buildAutonomousSolverPromptBlock, + buildAutonomousSolverSteeringPromptBlock, + consumePendingAutonomousSolverSteering, + getConfiguredAutonomousSolverMaxIterations, + recordAutonomousSolverMissingCheckpointRetry, +} from "../autonomous-solver.js"; import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.js"; import { debugLog } from "../debug-logger.js"; import { PROJECT_FILES } from "../detection.js"; @@ -1874,6 +1884,40 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { } } // Prompt char measurement + try { + const solverState = beginAutonomousSolverIteration( + s.basePath, + unitType, + unitId, + { + maxIterations: getConfiguredAutonomousSolverMaxIterations(prefs), + }, + ); + const steeringBlock = buildAutonomousSolverSteeringPromptBlock( + consumePendingAutonomousSolverSteering(s.basePath), + ); + if (steeringBlock) { + finalPrompt = `${finalPrompt}\n\n---\n\n${steeringBlock}`; + } + finalPrompt = `${finalPrompt}\n\n---\n\n${buildAutonomousSolverPromptBlock(solverState)}`; + deps.emitJournalEvent({ + ts: new Date().toISOString(), + flowId: ic.flowId, + seq: ic.nextSeq(), + eventType: "solver-iteration-start", + data: { + unitType, + unitId, + iteration: solverState.iteration, + maxIterations: solverState.maxIterations, + steeringInjected: Boolean(steeringBlock), + }, + }); + } catch (solverErr) { + logWarning("engine", "Autonomous solver prompt injection failed", { + error: solverErr instanceof Error ? solverErr.message : String(solverErr), + }); + } s.lastPromptCharCount = finalPrompt.length; s.lastBaselineCharCount = undefined; if (deps.isDbAvailable()) { @@ -2018,12 +2062,127 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { }); const unitResult = await runUnit(ctx, pi, s, unitType, unitId, finalPrompt); s.lastUnitAgentEndMessages = unitResult.event?.messages ?? null; + let currentUnitResult = unitResult; + let solverAssessment = assessAutonomousSolverTurn( + s.basePath, + unitType, + unitId, + ); + if (solverAssessment.action === "missing-checkpoint-retry") { + recordAutonomousSolverMissingCheckpointRetry(s.basePath, unitType, unitId); + deps.emitJournalEvent({ + ts: new Date().toISOString(), + flowId: ic.flowId, + seq: ic.nextSeq(), + eventType: "solver-missing-checkpoint-retry", + data: { + unitType, + unitId, + iteration: solverAssessment.state?.iteration, + }, + }); + ctx.ui.notify( + `Autonomous solver checkpoint missing for ${unitType} ${unitId}; redispatching one repair turn.`, + "warning", + ); + currentUnitResult = await runUnit( + ctx, + pi, + s, + unitType, + unitId, + buildAutonomousSolverMissingCheckpointRepairPrompt( + solverAssessment.state, + unitType, + unitId, + ), + ); + s.lastUnitAgentEndMessages = currentUnitResult.event?.messages ?? null; + solverAssessment = assessAutonomousSolverTurn(s.basePath, unitType, unitId); + } + const solverCheckpoint = solverAssessment.checkpoint; + if (solverCheckpoint) { + deps.emitJournalEvent({ + ts: new Date().toISOString(), + flowId: ic.flowId, + seq: ic.nextSeq(), + eventType: "solver-checkpoint", + data: { + unitType, + unitId, + iteration: solverCheckpoint.iteration, + outcome: solverCheckpoint.outcome, + remainingCount: solverCheckpoint.remainingItems?.length ?? 0, + }, + }); + } + if (solverAssessment.action === "pause") { + const reason = + solverCheckpoint?.outcome === "decide" + ? (solverCheckpoint.decisionQuestion ?? solverCheckpoint.summary) + : solverCheckpoint?.outcome === "blocked" + ? (solverCheckpoint.blockerReason ?? solverCheckpoint.summary) + : solverAssessment.reason; + deps.emitJournalEvent({ + ts: new Date().toISOString(), + flowId: ic.flowId, + seq: ic.nextSeq(), + eventType: + solverAssessment.reason === "solver-max-iterations" + ? "solver-max-iterations-pause" + : "solver-pause", + data: { + unitType, + unitId, + reason: solverAssessment.reason, + iteration: solverAssessment.state?.iteration, + maxIterations: solverAssessment.state?.maxIterations, + remainingItems: solverCheckpoint?.remainingItems ?? [], + evidencePath: ".sf/runtime/autonomous-solver/LOOP.md", + }, + }); + ctx.ui.notify( + `Autonomous solver paused ${unitType} ${unitId}: ${reason || solverAssessment.reason}`, + "warning", + ); + await deps.pauseAuto(ctx, pi); + return { + action: "break", + reason: solverAssessment.reason, + }; + } + if (solverAssessment.action === "continue") { + deps.emitJournalEvent({ + ts: new Date().toISOString(), + flowId: ic.flowId, + seq: ic.nextSeq(), + eventType: "solver-continue-redispatch", + data: { + unitType, + unitId, + iteration: solverAssessment.state?.iteration, + remainingItems: solverCheckpoint?.remainingItems ?? [], + evidencePath: ".sf/runtime/autonomous-solver/LOOP.md", + }, + }); + ctx.ui.notify( + `Autonomous solver continuing ${unitType} ${unitId}: ${solverCheckpoint?.remainingItems?.length ?? 0} item(s) remain.`, + "info", + ); + return { + action: "continue", + data: { + unitStartedAt: s.currentUnit?.startedAt, + requestDispatchedAt: currentUnitResult.requestDispatchedAt, + }, + }; + } debugLog("autoLoop", { phase: "runUnit-end", iteration: ic.iteration, unitType, unitId, - status: unitResult.status, + status: currentUnitResult.status, }); // Now that runUnit has called newSession(), the session file path is correct. const sessionFile = deps.getSessionFile(ctx); @@ -2039,20 +2198,22 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { // Tag the most recent window entry with error info for stuck detection const lastEntry = loopState.recentUnits[loopState.recentUnits.length - 1]; if (lastEntry) { - if (unitResult.errorContext) { + if (currentUnitResult.errorContext) { lastEntry.error = - `${unitResult.errorContext.category}:${unitResult.errorContext.message}`.slice( + `${currentUnitResult.errorContext.category}:${currentUnitResult.errorContext.message}`.slice( 0, 200, ); } else if ( - unitResult.status === "error" || - unitResult.status === "cancelled" + currentUnitResult.status === "error" || + currentUnitResult.status === "cancelled" ) { - lastEntry.error = `${unitResult.status}:${unitType}/${unitId}`; - } else if (unitResult.event?.messages?.length) { + lastEntry.error = `${currentUnitResult.status}:${unitType}/${unitId}`; + } else if (currentUnitResult.event?.messages?.length) { const lastMsg = - unitResult.event.messages[unitResult.event.messages.length - 1]; + currentUnitResult.event.messages[ + currentUnitResult.event.messages.length - 1 + ]; const msgStr = typeof lastMsg === "string" ? lastMsg : JSON.stringify(lastMsg); if (/error|fail|exception/i.test(msgStr)) { @@ -2060,28 +2221,28 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { } } } - if (unitResult.status === "cancelled") { + if (currentUnitResult.status === "cancelled") { clearDeferredCommitAfterCancelledUnit( s, ctx, unitType, unitId, - unitResult.errorContext?.message ?? "cancelled", + currentUnitResult.errorContext?.message ?? "cancelled", ); // Provider-error pause: pauseAuto already handled cleanup and scheduled // recovery. Don't hard-stop — just break out of the loop (#2762). - if (unitResult.errorContext?.category === "provider") { + if (currentUnitResult.errorContext?.category === "provider") { await emitCancelledUnitEnd( ic, unitType, unitId, unitStartSeq, - unitResult.errorContext, + currentUnitResult.errorContext, ); debugLog("autoLoop", { phase: "exit", reason: "provider-pause", - isTransient: unitResult.errorContext.isTransient, + isTransient: currentUnitResult.errorContext.isTransient, }); return { action: "break", reason: "provider-pause" }; } @@ -2091,13 +2252,15 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { // Structural errors (TypeError, is not a function) are NOT transient // and must hard-stop to avoid infinite retry loops. if ( - unitResult.errorContext?.isTransient && - unitResult.errorContext?.category === "timeout" + currentUnitResult.errorContext?.isTransient && + currentUnitResult.errorContext?.category === "timeout" ) { // Session-timeout cancellations are resumable pauses: pauseAuto below preserves the auto session // instead of routing the cancelled unit into the hard-stop path. const isSessionCreationTimeout = - unitResult.errorContext.message?.includes("Session creation timed out"); + currentUnitResult.errorContext.message?.includes( + "Session creation timed out", + ); if (isSessionCreationTimeout) { consecutiveSessionTimeouts += 1; const baseRetryAfterMs = 30_000; @@ -2150,7 +2313,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { unitType, unitId, unitStartSeq, - unitResult.errorContext, + currentUnitResult.errorContext, ); return { action: "break", reason: "session-timeout" }; } @@ -2170,7 +2333,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { unitType, unitId, unitStartSeq, - unitResult.errorContext, + currentUnitResult.errorContext, ); return { action: "break", reason: "unit-hard-timeout" }; } @@ -2190,16 +2353,16 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { unitType, unitId, unitStartSeq, - unitResult.errorContext, + currentUnitResult.errorContext, ); ctx.ui.notify( - `Session creation failed for ${unitType} ${unitId}: ${unitResult.errorContext?.message ?? "unknown"}. Stopping auto-mode.`, + `Session creation failed for ${unitType} ${unitId}: ${currentUnitResult.errorContext?.message ?? "unknown"}. Stopping auto-mode.`, "warning", ); await deps.stopAuto( ctx, pi, - `Session creation failed: ${unitResult.errorContext?.message ?? "unknown"}`, + `Session creation failed: ${currentUnitResult.errorContext?.message ?? "unknown"}`, ); debugLog("autoLoop", { phase: "exit", reason: "session-failed" }); return { action: "break", reason: "session-failed" }; @@ -2276,7 +2439,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt, - requestDispatchedAt: unitResult.requestDispatchedAt, + requestDispatchedAt: currentUnitResult.requestDispatchedAt, }, }; } @@ -2330,7 +2493,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { ); } } - if (unitResult.status !== "completed" || !artifactVerified) { + if (currentUnitResult.status !== "completed" || !artifactVerified) { recordLearningOutcomeForUnit( ic, unitType, @@ -2364,7 +2527,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { data: { unitType, unitId, - status: unitResult.status, + status: currentUnitResult.status, artifactVerified, ...(unitEndEntry ? { @@ -2374,8 +2537,8 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { tokens_output: unitEndEntry.tokens.output, } : {}), - ...(unitResult.errorContext - ? { errorContext: unitResult.errorContext } + ...(currentUnitResult.errorContext + ? { errorContext: currentUnitResult.errorContext } : {}), }, causedBy: { flowId: ic.flowId, seq: unitStartSeq }, @@ -2383,13 +2546,13 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { } { const verdict = - unitResult.status === "completed" + currentUnitResult.status === "completed" ? artifactVerified ? "success" : "blocked" - : unitResult.status === "error" + : currentUnitResult.status === "error" ? "fail" - : unitResult.status; + : currentUnitResult.status; const ledger = deps.getLedger(); const unitEntry = ledger?.units ? [...ledger.units] @@ -2417,7 +2580,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { } // ── Safety harness: checkpoint cleanup or rollback ── if (s.checkpointSha) { - if (unitResult.status === "error" && safetyConfig.auto_rollback) { + if (currentUnitResult.status === "error" && safetyConfig.auto_rollback) { const rolled = rollbackToCheckpoint(s.basePath, unitId, s.checkpointSha); if (rolled) { ctx.ui.notify( @@ -2426,7 +2589,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { ); debugLog("runUnitPhase", { phase: "checkpoint-rollback", unitId }); } - } else if (unitResult.status === "error") { + } else if (currentUnitResult.status === "error") { ctx.ui.notify( `Unit ${unitId} failed. Pre-unit checkpoint available at ${s.checkpointSha.slice(0, 8)}`, "warning", @@ -2443,7 +2606,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt, - requestDispatchedAt: unitResult.requestDispatchedAt, + requestDispatchedAt: currentUnitResult.requestDispatchedAt, }, }; } diff --git a/src/resources/extensions/sf/autonomous-solver.js b/src/resources/extensions/sf/autonomous-solver.js new file mode 100644 index 000000000..ee74f9257 --- /dev/null +++ b/src/resources/extensions/sf/autonomous-solver.js @@ -0,0 +1,537 @@ +/** + * autonomous-solver.ts — PDD-shaped checkpoint contract for autonomous units. + * + * Purpose: make long-running autonomous work explicit, resumable, and auditable + * instead of relying on a single agent turn to either finish or silently drift. + * + * Consumer: auto/phases.js injects the contract into each autonomous unit, and + * bootstrap/db-tools.js records agent checkpoints via sf_autonomous_checkpoint. + */ +import { + appendFileSync, + mkdirSync, + readFileSync, + writeFileSync, +} from "node:fs"; +import { dirname, join } from "node:path"; +import { atomicWriteSync } from "./atomic-write.js"; +import { sfRoot } from "./paths.js"; + +export const AUTONOMOUS_SOLVER_OUTCOMES = [ + "continue", + "complete", + "blocked", + "decide", +]; + +const MAX_RENDERED_ITEMS = 12; +const DEFAULT_SOLVER_MAX_ITERATIONS = 12; +const MIN_SOLVER_MAX_ITERATIONS = 1; +const MAX_SOLVER_MAX_ITERATIONS = 100; + +function solverDir(basePath) { + return join(sfRoot(basePath), "runtime", "autonomous-solver"); +} + +function statePath(basePath) { + return join(solverDir(basePath), "active.json"); +} + +function projectionPath(basePath) { + return join(solverDir(basePath), "LOOP.md"); +} + +function historyPath(basePath) { + return join(solverDir(basePath), "iterations.jsonl"); +} + +function steeringPath(basePath) { + return join(solverDir(basePath), "steering.jsonl"); +} + +function nowIso() { + return new Date().toISOString(); +} + +function sanitizeList(value) { + if (!Array.isArray(value)) return []; + return value.map((item) => String(item).trim()).filter(Boolean); +} + +function readJson(path) { + try { + return JSON.parse(readFileSync(path, "utf-8")); + } catch { + return null; + } +} + +function sameUnit(state, unitType, unitId) { + return state?.unitType === unitType && state?.unitId === unitId; +} + +function clampNumber(value, min, max, fallback) { + const n = Number(value); + if (!Number.isFinite(n)) return fallback; + return Math.max(min, Math.min(max, Math.floor(n))); +} + +/** + * Resolve the bounded autonomous solver iteration limit. + * + * Purpose: prevent a misconfigured or vague unit from retrying forever while + * still letting projects raise the ceiling for large, explicitly bounded work. + * + * Consumer: runUnitPhase before dispatch and preferences resolution tests. + */ +export function getConfiguredAutonomousSolverMaxIterations(preferences) { + return clampNumber( + preferences?.auto_supervisor?.solver_max_iterations, + MIN_SOLVER_MAX_ITERATIONS, + MAX_SOLVER_MAX_ITERATIONS, + DEFAULT_SOLVER_MAX_ITERATIONS, + ); +} + +function renderList(items, fallback) { + const list = sanitizeList(items).slice(0, MAX_RENDERED_ITEMS); + if (list.length === 0) return `- ${fallback}`; + return list.map((item) => `- ${item}`).join("\n"); +} + +function renderPdd(pdd = {}) { + return [ + "## PDD Contract", + `- Purpose: ${pdd.purpose || "not recorded yet"}`, + `- Consumer: ${pdd.consumer || "not recorded yet"}`, + `- Contract: ${pdd.contract || "not recorded yet"}`, + `- Failure boundary: ${pdd.failureBoundary || "not recorded yet"}`, + `- Evidence: ${pdd.evidence || "not recorded yet"}`, + `- Non-goals: ${pdd.nonGoals || "not recorded yet"}`, + `- Invariants: ${pdd.invariants || "not recorded yet"}`, + `- Assumptions: ${pdd.assumptions || "not recorded yet"}`, + ].join("\n"); +} + +function renderProjection(state) { + const checkpoint = state.latestCheckpoint ?? {}; + return [ + "# Autonomous Solver Loop", + "", + `- Unit: ${state.unitType} ${state.unitId}`, + `- Status: ${state.status}`, + `- Iteration: ${state.iteration} of ${state.maxIterations}`, + `- Started: ${state.startedAt}`, + `- Updated: ${state.updatedAt}`, + "", + "## Last Checkpoint", + `- Outcome: ${checkpoint.outcome ?? "none"}`, + `- Summary: ${checkpoint.summary ?? "none yet"}`, + checkpoint.blockerReason ? `- Blocker: ${checkpoint.blockerReason}` : "", + checkpoint.decisionQuestion + ? `- Decision needed: ${checkpoint.decisionQuestion}` + : "", + "", + "## Completed This Iteration", + renderList(checkpoint.completedItems, "Nothing recorded yet."), + "", + "## Remaining", + renderList( + checkpoint.remainingItems, + "Unknown until the agent checkpoints.", + ), + "", + renderPdd(checkpoint.pdd), + "", + "## Verification Evidence", + renderList( + checkpoint.verificationEvidence, + "No verification evidence recorded yet.", + ), + "", + ] + .filter((line) => line !== "") + .join("\n"); +} + +function writeState(basePath, state) { + const dir = solverDir(basePath); + mkdirSync(dir, { recursive: true }); + atomicWriteSync(statePath(basePath), `${JSON.stringify(state, null, 2)}\n`); + atomicWriteSync(projectionPath(basePath), renderProjection(state)); +} + +/** + * Start or advance the persisted autonomous solver iteration for a unit. + * + * Purpose: each autonomous dispatch gets an explicit iteration number and + * durable loop projection, so retries and restarts have concrete state. + * + * Consumer: runUnitPhase before sending the unit prompt. + */ +export function beginAutonomousSolverIteration( + basePath, + unitType, + unitId, + options = {}, +) { + const existing = readJson(statePath(basePath)); + const priorIteration = + sameUnit(existing, unitType, unitId) && existing.status !== "complete" + ? Number(existing.iteration ?? 0) + : 0; + const maxIterations = clampNumber( + options.maxIterations ?? existing?.maxIterations, + MIN_SOLVER_MAX_ITERATIONS, + MAX_SOLVER_MAX_ITERATIONS, + DEFAULT_SOLVER_MAX_ITERATIONS, + ); + const state = { + unitType, + unitId, + status: "running", + iteration: Math.max(1, priorIteration + 1), + maxIterations, + startedAt: sameUnit(existing, unitType, unitId) + ? existing.startedAt || nowIso() + : nowIso(), + updatedAt: nowIso(), + latestCheckpoint: sameUnit(existing, unitType, unitId) + ? (existing.latestCheckpoint ?? null) + : null, + missingCheckpointRetry: null, + }; + writeState(basePath, state); + return state; +} + +/** + * Build the PDD autonomous solver prompt block appended to unit prompts. + * + * Purpose: bind every autonomous unit to bounded iterations, evidence, stop + * signals, and the eight PDD fields instead of open-ended hidden retries. + * + * Consumer: runUnitPhase prompt injection. + */ +export function buildAutonomousSolverPromptBlock(state) { + return [ + "## Autonomous Solver Loop Contract", + "", + `You are inside /sf autonomous iteration ${state.iteration} of ${state.maxIterations} for ${state.unitType} ${state.unitId}.`, + "", + "This is SF's built-in solver loop. It is not a separate Ralph workflow. Work one bounded, useful chunk; preserve enough state for the next autonomous iteration to continue without guessing.", + "", + "Before ending the turn, call `sf_autonomous_checkpoint` with:", + '- `outcome: "complete"` only when this unit\'s normal completion tool/artifact is also done.', + '- `outcome: "continue"` when you made real progress but more autonomous iterations are needed.', + '- `outcome: "blocked"` when the next step cannot proceed without unavailable facts, credentials, or a broken environment.', + '- `outcome: "decide"` when there is a material product/architecture choice that must not be decided autonomously.', + "", + "Checkpoint the eight PDD fields every time:", + "- Purpose: why this behavior exists and what value it protects.", + "- Consumer: who or what uses it in production.", + "- Contract: the observable behavior or artifact boundary.", + "- Failure boundary: what failures must be contained or surfaced.", + "- Evidence: commands, files, tests, or runtime observations proving progress.", + "- Non-goals: what you intentionally did not solve this iteration.", + "- Invariants: rules that must remain true across iterations.", + "- Assumptions: uncertain facts you relied on and how to falsify them later.", + "", + "If you are executing an `execute-task` unit and the task is finished, `sf_task_complete` remains mandatory; `sf_autonomous_checkpoint` does not replace it.", + "If you need another iteration, leave exact remaining items in the checkpoint rather than ending with vague prose.", + ].join("\n"); +} + +/** + * Record a solver checkpoint and update the markdown projection. + * + * Purpose: turn the agent's end-of-iteration status into structured autonomous + * state that can be inspected, gated, and resumed. + * + * Consumer: sf_autonomous_checkpoint tool. + */ +export function appendAutonomousSolverCheckpoint(basePath, params) { + const state = + readJson(statePath(basePath)) ?? + beginAutonomousSolverIteration(basePath, params.unitType, params.unitId); + const checkpoint = { + ts: nowIso(), + unitType: params.unitType, + unitId: params.unitId, + iteration: sameUnit(state, params.unitType, params.unitId) + ? state.iteration + : 1, + outcome: params.outcome, + summary: String(params.summary ?? "").trim(), + completedItems: sanitizeList(params.completedItems), + remainingItems: sanitizeList(params.remainingItems), + verificationEvidence: sanitizeList(params.verificationEvidence), + blockerReason: params.blockerReason + ? String(params.blockerReason).trim() + : undefined, + decisionQuestion: params.decisionQuestion + ? String(params.decisionQuestion).trim() + : undefined, + pdd: { + purpose: String(params.pdd?.purpose ?? "").trim(), + consumer: String(params.pdd?.consumer ?? "").trim(), + contract: String(params.pdd?.contract ?? "").trim(), + failureBoundary: String(params.pdd?.failureBoundary ?? "").trim(), + evidence: String(params.pdd?.evidence ?? "").trim(), + nonGoals: String(params.pdd?.nonGoals ?? "").trim(), + invariants: String(params.pdd?.invariants ?? "").trim(), + assumptions: String(params.pdd?.assumptions ?? "").trim(), + }, + }; + const nextState = { + ...state, + unitType: params.unitType, + unitId: params.unitId, + status: + params.outcome === "complete" + ? "complete" + : params.outcome === "blocked" || params.outcome === "decide" + ? "paused" + : "running", + updatedAt: checkpoint.ts, + latestCheckpoint: checkpoint, + }; + mkdirSync(dirname(historyPath(basePath)), { recursive: true }); + writeFileSync(historyPath(basePath), `${JSON.stringify(checkpoint)}\n`, { + flag: "a", + }); + writeState(basePath, nextState); + return checkpoint; +} + +/** + * Read the current persisted autonomous solver state. + * + * Purpose: status surfaces and loop enforcement need one structured source for + * the active solver unit instead of scraping markdown projections. + * + * Consumer: /sf status, sf-progress, and runUnitPhase. + */ +export function readAutonomousSolverState(basePath) { + return readJson(statePath(basePath)); +} + +/** + * Record that a missing checkpoint repair dispatch has already been attempted. + * + * Purpose: enforce the checkpoint contract with one repair chance while + * preventing an unbounded missing-checkpoint redispatch loop. + * + * Consumer: runUnitPhase after the first unit turn omits sf_autonomous_checkpoint. + */ +export function recordAutonomousSolverMissingCheckpointRetry( + basePath, + unitType, + unitId, +) { + const state = readJson(statePath(basePath)); + if (!sameUnit(state, unitType, unitId)) return null; + const nextState = { + ...state, + status: "running", + updatedAt: nowIso(), + missingCheckpointRetry: { + iteration: state.iteration, + ts: nowIso(), + }, + }; + writeState(basePath, nextState); + return nextState; +} + +/** + * Classify the completed solver turn into the next loop action. + * + * Purpose: make checkpoint outcomes authoritative for autonomous scheduling + * instead of letting artifact heuristics silently override blocked, decision, + * continue, or missing-checkpoint states. + * + * Consumer: runUnitPhase immediately after each unit turn. + */ +export function assessAutonomousSolverTurn(basePath, unitType, unitId) { + const state = readJson(statePath(basePath)); + if (!sameUnit(state, unitType, unitId)) { + return { + action: "missing-checkpoint-retry", + reason: "solver-missing-state", + state, + }; + } + const checkpoint = state.latestCheckpoint ?? null; + const hasCurrentCheckpoint = + checkpoint?.unitType === unitType && + checkpoint?.unitId === unitId && + Number(checkpoint?.iteration) === Number(state.iteration); + if (!hasCurrentCheckpoint) { + const alreadyRetried = + Number(state.missingCheckpointRetry?.iteration) === + Number(state.iteration); + if (alreadyRetried) { + return { + action: "pause", + reason: "solver-missing-checkpoint", + state, + }; + } + return { + action: "missing-checkpoint-retry", + reason: "solver-missing-checkpoint", + state, + }; + } + if ( + state.iteration >= state.maxIterations && + checkpoint.outcome !== "complete" + ) { + return { + action: "pause", + reason: "solver-max-iterations", + state, + checkpoint, + }; + } + if (checkpoint.outcome === "blocked" || checkpoint.outcome === "decide") { + return { + action: "pause", + reason: `solver-${checkpoint.outcome}`, + state, + checkpoint, + }; + } + return { + action: checkpoint.outcome === "continue" ? "continue" : "complete", + reason: `solver-${checkpoint.outcome}`, + state, + checkpoint, + }; +} + +/** + * Append user steering for the next autonomous solver iteration. + * + * Purpose: active /sf steer must redirect the next bounded iteration without + * interrupting the current tool batch or forcing an immediate agent turn. + * + * Consumer: /sf steer while autonomous mode is active. + */ +export function appendAutonomousSolverSteering(basePath, text, metadata = {}) { + const trimmed = String(text ?? "").trim(); + if (!trimmed) return null; + const entry = { + id: `${Date.now()}-${Math.random().toString(16).slice(2)}`, + ts: nowIso(), + text: trimmed, + consumedAt: null, + ...metadata, + }; + mkdirSync(solverDir(basePath), { recursive: true }); + appendFileSync(steeringPath(basePath), `${JSON.stringify(entry)}\n`); + return entry; +} + +function readSteeringEntries(basePath) { + try { + return readFileSync(steeringPath(basePath), "utf-8") + .split("\n") + .filter((line) => line.trim()) + .map((line) => { + try { + return JSON.parse(line); + } catch { + return null; + } + }) + .filter(Boolean); + } catch { + return []; + } +} + +/** + * Consume pending steering exactly once for prompt injection. + * + * Purpose: every user steering note should influence the next solver iteration + * once, without being replayed into every later dispatch. + * + * Consumer: runUnitPhase before appending the solver prompt block. + */ +export function consumePendingAutonomousSolverSteering(basePath) { + const entries = readSteeringEntries(basePath); + const pending = entries.filter((entry) => !entry.consumedAt); + if (pending.length === 0) return []; + const consumedAt = nowIso(); + const rewritten = entries.map((entry) => + entry.consumedAt ? entry : { ...entry, consumedAt }, + ); + mkdirSync(solverDir(basePath), { recursive: true }); + atomicWriteSync( + steeringPath(basePath), + rewritten.map((entry) => JSON.stringify(entry)).join("\n") + "\n", + ); + return pending; +} + +/** + * Render consumed steering entries as a bounded prompt block. + * + * Purpose: keep user steering visible to the next iteration as explicit input + * while preserving the checkpoint-driven solver contract. + * + * Consumer: runUnitPhase prompt injection. + */ +export function buildAutonomousSolverSteeringPromptBlock(entries) { + const pending = Array.isArray(entries) ? entries : []; + if (pending.length === 0) return ""; + return [ + "## Pending User Steering", + "", + "Apply these user overrides in this solver iteration:", + ...pending.map((entry) => `- ${String(entry.text ?? "").trim()}`), + ].join("\n"); +} + +/** + * Build the one allowed repair prompt for a missing checkpoint. + * + * Purpose: give the agent a narrow chance to satisfy the solver contract before + * autonomous mode pauses for inspection. + * + * Consumer: runUnitPhase when a turn ends without a current checkpoint. + */ +export function buildAutonomousSolverMissingCheckpointRepairPrompt( + state, + unitType, + unitId, +) { + return [ + "## Checkpoint Required", + "", + `Your previous autonomous turn for ${unitType} ${unitId} ended without calling sf_autonomous_checkpoint for iteration ${state?.iteration ?? "unknown"}.`, + "Do not continue implementation work in this repair turn.", + "Inspect the work you just performed, then call sf_autonomous_checkpoint with the correct outcome and all eight PDD fields.", + "If no useful progress happened, use outcome=blocked and explain why.", + ].join("\n"); +} + +/** + * Return the latest solver checkpoint for a unit, if one exists. + * + * Purpose: let autonomous finalization react to semantic blocked/decision + * outcomes without scraping prose from model output. + * + * Consumer: runUnitPhase after runUnit returns. + */ +export function readLatestAutonomousSolverCheckpoint( + basePath, + unitType, + unitId, +) { + const state = readJson(statePath(basePath)); + if (!sameUnit(state, unitType, unitId)) return null; + return state.latestCheckpoint ?? null; +} diff --git a/src/resources/extensions/sf/bootstrap/db-tools.js b/src/resources/extensions/sf/bootstrap/db-tools.js index 61882ac1e..3dc7c248f 100644 --- a/src/resources/extensions/sf/bootstrap/db-tools.js +++ b/src/resources/extensions/sf/bootstrap/db-tools.js @@ -1,6 +1,10 @@ import { Type } from "@sinclair/typebox"; import { StringEnum } from "@singularity-forge/pi-ai"; import { Text } from "@singularity-forge/pi-tui"; +import { + AUTONOMOUS_SOLVER_OUTCOMES, + appendAutonomousSolverCheckpoint, +} from "../autonomous-solver.js"; import { claimReservedId, findMilestoneIds, @@ -877,6 +881,160 @@ export function registerDbTools(pi) { ); }, }); + // ─── sf_autonomous_checkpoint ─────────────────────────────────────── + const autonomousCheckpointExecute = async ( + _toolCallId, + params, + _signal, + _onUpdate, + _ctx, + ) => { + try { + const checkpoint = appendAutonomousSolverCheckpoint( + process.cwd(), + params, + ); + return { + content: [ + { + type: "text", + text: `Recorded autonomous checkpoint for ${checkpoint.unitType} ${checkpoint.unitId}: ${checkpoint.outcome}`, + }, + ], + details: { + operation: "autonomous_checkpoint", + unitType: checkpoint.unitType, + unitId: checkpoint.unitId, + iteration: checkpoint.iteration, + outcome: checkpoint.outcome, + }, + }; + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + logError("tool", `sf_autonomous_checkpoint tool failed: ${msg}`, { + tool: "sf_autonomous_checkpoint", + error: String(err), + }); + return { + content: [ + { + type: "text", + text: `Error in sf_autonomous_checkpoint: ${msg}`, + }, + ], + details: { operation: "autonomous_checkpoint", error: msg }, + }; + } + }; + pi.registerTool({ + name: "sf_autonomous_checkpoint", + label: "Autonomous Checkpoint", + description: + "Record a PDD-shaped autonomous solver checkpoint for the current unit. " + + "Use this before ending every /sf autonomous unit turn to make progress, blockers, decisions, and remaining work explicit.", + promptSnippet: + "Checkpoint autonomous solver progress with PDD fields and semantic outcome", + promptGuidelines: [ + "Call sf_autonomous_checkpoint before ending an autonomous unit turn.", + "Use outcome=complete only when the normal unit completion artifact/tool is also complete.", + "Use outcome=continue when you made real progress but the unit needs another autonomous iteration.", + "Use outcome=blocked for missing facts, credentials, broken environment, or impossible next steps.", + "Use outcome=decide for material product or architecture choices that autonomous mode must not decide silently.", + "Fill all eight PDD fields: purpose, consumer, contract, failureBoundary, evidence, nonGoals, invariants, assumptions.", + ], + parameters: Type.Object({ + unitType: Type.String({ + description: "Current autonomous unit type, e.g. execute-task", + }), + unitId: Type.String({ + description: "Current autonomous unit id, e.g. M010/S05/T02", + }), + outcome: Type.Union( + AUTONOMOUS_SOLVER_OUTCOMES.map((outcome) => Type.Literal(outcome)), + { + description: + "Semantic result for this iteration: continue, complete, blocked, or decide", + }, + ), + summary: Type.String({ + description: "Concise statement of what happened this iteration", + }), + completedItems: Type.Array(Type.String(), { + description: "Concrete items completed in this iteration", + }), + remainingItems: Type.Array(Type.String(), { + description: "Exact remaining work for the next autonomous iteration", + }), + verificationEvidence: Type.Array(Type.String(), { + description: + "Commands, files, tests, screenshots, or observations that prove progress", + }), + blockerReason: Type.Optional( + Type.String({ + description: "Required when outcome=blocked", + }), + ), + decisionQuestion: Type.Optional( + Type.String({ + description: "Required when outcome=decide", + }), + ), + pdd: Type.Object({ + purpose: Type.String({ description: "Why this behavior exists" }), + consumer: Type.String({ + description: "Who or what uses it in production", + }), + contract: Type.String({ + description: "Observable behavior or artifact boundary", + }), + failureBoundary: Type.String({ + description: "Failures that must be contained or surfaced", + }), + evidence: Type.String({ + description: "Proof gathered this iteration", + }), + nonGoals: Type.String({ + description: "What is intentionally not solved here", + }), + invariants: Type.String({ + description: "Rules that must remain true across iterations", + }), + assumptions: Type.String({ + description: "Uncertain facts and how to falsify them", + }), + }), + }), + execute: autonomousCheckpointExecute, + renderCall(args, theme) { + let text = theme.fg("toolTitle", theme.bold("sf_autonomous_checkpoint ")); + if (args.outcome) text += theme.fg("accent", `[${args.outcome}] `); + if (args.unitType || args.unitId) { + text += theme.fg( + "muted", + `${args.unitType ?? "unit"} ${args.unitId ?? ""}`.trim(), + ); + } + return new Text(text, 0, 0); + }, + renderResult(result, _options, theme) { + const d = result.details; + if (result.isError || d?.error) { + return new Text( + theme.fg("error", `Error: ${d?.error ?? "unknown"}`), + 0, + 0, + ); + } + return new Text( + theme.fg( + "success", + `Checkpoint ${d?.outcome ?? "recorded"} · ${d?.unitType ?? ""} ${d?.unitId ?? ""}`.trim(), + ), + 0, + 0, + ); + }, + }); // ─── sf_plan_milestone ──────────────────────────────────────────────── const planMilestoneExecute = async ( _toolCallId, diff --git a/src/resources/extensions/sf/commands-handlers.js b/src/resources/extensions/sf/commands-handlers.js index 564ef023e..02e5b4b94 100644 --- a/src/resources/extensions/sf/commands-handlers.js +++ b/src/resources/extensions/sf/commands-handlers.js @@ -8,6 +8,7 @@ import { existsSync, mkdirSync, readFileSync } from "node:fs"; import { join } from "node:path"; import { checkRemoteAutoSession, isAutoActive } from "./auto.js"; import { getAutoWorktreePath } from "./auto-worktree.js"; +import { appendAutonomousSolverSteering } from "./autonomous-solver.js"; import { appendCapture, hasPendingCaptures, @@ -436,29 +437,13 @@ export async function handleSteer(change, ctx, pi) { autoRunning && mid !== "none" ? getAutoWorktreePath(basePath, mid) : null; const targetPath = wtPath ?? basePath; await appendOverride(targetPath, change, appliedAt); + appendAutonomousSolverSteering(targetPath, change, { appliedAt }); const overrideLoc = wtPath ? "worktree `.sf/OVERRIDES.md`" : "`.sf/OVERRIDES.md`"; - if (isAutoActive()) { - pi.sendMessage( - { - customType: "sf-hard-steer", - content: [ - "HARD STEER — User override registered.", - "", - `**Override:** ${change}`, - "", - `This override has been saved to ${overrideLoc} and will be injected into all future task prompts.`, - "A document rewrite unit will run before the next task to propagate this change across all active plan documents.", - "", - "If you are mid-task, finish your current work respecting this override. The next dispatched unit will be a document rewrite.", - ].join("\n"), - display: false, - }, - { triggerTurn: true }, - ); + if (autoRunning) { ctx.ui.notify( - `Override registered (${overrideLoc}): "${change}". Will be applied before next task dispatch.`, + `Override registered (${overrideLoc}): "${change}". Will be injected into the next solver iteration.`, "info", ); } else { diff --git a/src/resources/extensions/sf/docs/preferences-reference.md b/src/resources/extensions/sf/docs/preferences-reference.md index 0013a18f5..5a99a97e6 100644 --- a/src/resources/extensions/sf/docs/preferences-reference.md +++ b/src/resources/extensions/sf/docs/preferences-reference.md @@ -125,6 +125,7 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea - `soft_timeout_minutes`: minutes before the supervisor issues a soft warning (default: 20). - `idle_timeout_minutes`: minutes of inactivity before the supervisor intervenes (default: 10). - `hard_timeout_minutes`: minutes before the supervisor forces termination (default: 30). + - `solver_max_iterations`: maximum autonomous solver iterations for one unit before pausing (default: `12`, min: `1`, max: `100`). - `completion_nudge_after`: tool calls in a complete-slice unit before nudging the agent to call `sf_slice_complete` (default: 10; set `0` to disable). - `runaway_guard_enabled`: enable active-loop diagnosis for long-running units (default: `true`). - `runaway_tool_call_warning`: unit tool calls before a runaway warning (default: `60`; set `0` to disable this signal). diff --git a/src/resources/extensions/sf/extension-manifest.json b/src/resources/extensions/sf/extension-manifest.json index d8d9faef1..f2e809f61 100644 --- a/src/resources/extensions/sf/extension-manifest.json +++ b/src/resources/extensions/sf/extension-manifest.json @@ -13,6 +13,7 @@ "kill_agent", "memory_query", "read", + "sf_autonomous_checkpoint", "sf_complete_milestone", "sf_decision_save", "sf_exec", diff --git a/src/resources/extensions/sf/key-manager.js b/src/resources/extensions/sf/key-manager.js index 053edc799..2196b0370 100644 --- a/src/resources/extensions/sf/key-manager.js +++ b/src/resources/extensions/sf/key-manager.js @@ -60,6 +60,20 @@ export const PROVIDER_REGISTRY = [ envVar: "MISTRAL_API_KEY", dashboardUrl: "console.mistral.ai", }, + { + id: "minimax", + label: "MiniMax", + category: "llm", + envVar: "MINIMAX_API_KEY", + dashboardUrl: "api.minimax.io", + }, + { + id: "kimi-coding", + label: "Kimi Coding", + category: "llm", + envVar: "KIMI_API_KEY", + dashboardUrl: "platform.moonshot.ai", + }, { id: "zai", label: "ZAI", @@ -235,7 +249,8 @@ export function describeCredential(cred) { * Get the auth.json path. */ export function getAuthPath() { - return join(process.env.HOME ?? "~", ".sf", "agent", "auth.json"); + const sfHome = process.env.SF_HOME || join(process.env.HOME ?? "~", ".sf"); + return join(sfHome, "agent", "auth.json"); } /** * Create an AuthStorage instance for key management. diff --git a/src/resources/extensions/sf/preferences-models.js b/src/resources/extensions/sf/preferences-models.js index afe94c815..46a8235c9 100644 --- a/src/resources/extensions/sf/preferences-models.js +++ b/src/resources/extensions/sf/preferences-models.js @@ -692,6 +692,11 @@ export function resolveAutoSupervisorConfig() { soft_timeout_minutes: configured.soft_timeout_minutes ?? 20, idle_timeout_minutes: configured.idle_timeout_minutes ?? 10, hard_timeout_minutes: configured.hard_timeout_minutes ?? 30, + solver_max_iterations: Number.isFinite( + Number(configured.solver_max_iterations), + ) + ? Math.max(1, Math.min(100, Number(configured.solver_max_iterations))) + : 12, completion_nudge_after: configured.completion_nudge_after ?? 10, runaway_guard_enabled: configured.runaway_guard_enabled ?? true, runaway_tool_call_warning: diff --git a/src/resources/extensions/sf/preferences-validation.js b/src/resources/extensions/sf/preferences-validation.js index 470d75fe7..93403f917 100644 --- a/src/resources/extensions/sf/preferences-validation.js +++ b/src/resources/extensions/sf/preferences-validation.js @@ -780,6 +780,16 @@ export function validatePreferences(preferences) { "auto_supervisor.hard_timeout_minutes must be a non-negative number", ); } + if (as.solver_max_iterations !== undefined) { + const val = Number(as.solver_max_iterations); + if (!Number.isNaN(val) && val >= 1 && val <= 100) { + validatedAs.solver_max_iterations = Math.floor(val); + } else { + errors.push( + "auto_supervisor.solver_max_iterations must be a number from 1 to 100", + ); + } + } if (as.phase_timeout_minutes !== undefined) { const val = Number(as.phase_timeout_minutes); if (!Number.isNaN(val) && val >= 0) diff --git a/src/resources/extensions/sf/tests/auto-supervisor.test.mjs b/src/resources/extensions/sf/tests/auto-supervisor.test.mjs index 297072170..f300e557c 100644 --- a/src/resources/extensions/sf/tests/auto-supervisor.test.mjs +++ b/src/resources/extensions/sf/tests/auto-supervisor.test.mjs @@ -14,6 +14,7 @@ test("resolveAutoSupervisorConfig provides safe timeout defaults", () => { assert.equal(supervisor.soft_timeout_minutes, 20); assert.equal(supervisor.idle_timeout_minutes, 10); assert.equal(supervisor.hard_timeout_minutes, 30); + assert.equal(supervisor.solver_max_iterations, 12); }); test("writeUnitRuntimeRecord persists progress and recovery metadata defaults", () => { diff --git a/src/resources/extensions/sf/tests/autonomous-solver.test.mjs b/src/resources/extensions/sf/tests/autonomous-solver.test.mjs new file mode 100644 index 000000000..f27c86f0a --- /dev/null +++ b/src/resources/extensions/sf/tests/autonomous-solver.test.mjs @@ -0,0 +1,245 @@ +import { mkdtempSync, readFileSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, describe, expect, test } from "vitest"; +import { + appendAutonomousSolverCheckpoint, + appendAutonomousSolverSteering, + assessAutonomousSolverTurn, + beginAutonomousSolverIteration, + buildAutonomousSolverPromptBlock, + consumePendingAutonomousSolverSteering, + getConfiguredAutonomousSolverMaxIterations, + readLatestAutonomousSolverCheckpoint, + recordAutonomousSolverMissingCheckpointRetry, +} from "../autonomous-solver.js"; + +let tempDirs = []; + +function makeProject() { + const dir = mkdtempSync(join(tmpdir(), "sf-autonomous-solver-")); + tempDirs.push(dir); + return dir; +} + +afterEach(() => { + for (const dir of tempDirs) { + rmSync(dir, { recursive: true, force: true }); + } + tempDirs = []; +}); + +function pdd(overrides = {}) { + return { + purpose: "Protect autonomous execution from vague hidden retries.", + consumer: "/sf autonomous unit executor.", + contract: + "Checkpoint contains outcome, progress, evidence, and remaining work.", + failureBoundary: + "Blocked or decide outcomes pause instead of continuing blind.", + evidence: "Projection and JSONL history are written.", + nonGoals: "Does not replace the normal task completion tool.", + invariants: "Each checkpoint is tied to one unit id.", + assumptions: "Filesystem writes are available under .sf/runtime.", + ...overrides, + }; +} + +describe("autonomous solver", () => { + test("beginAutonomousSolverIteration_same_unit_advances_iteration", () => { + const project = makeProject(); + const first = beginAutonomousSolverIteration( + project, + "execute-task", + "M001/S01/T01", + ); + const second = beginAutonomousSolverIteration( + project, + "execute-task", + "M001/S01/T01", + ); + + expect(first.iteration).toBe(1); + expect(second.iteration).toBe(2); + }); + + test("beginAutonomousSolverIteration_new_unit_resets_iteration", () => { + const project = makeProject(); + beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01"); + const next = beginAutonomousSolverIteration( + project, + "execute-task", + "M001/S01/T02", + ); + + expect(next.iteration).toBe(1); + }); + + test("appendAutonomousSolverCheckpoint_writes_pdd_projection_and_history", () => { + const project = makeProject(); + beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01"); + + appendAutonomousSolverCheckpoint(project, { + unitType: "execute-task", + unitId: "M001/S01/T01", + outcome: "continue", + summary: "Implemented the first bounded repair.", + completedItems: ["Read the failing path", "Added regression test"], + remainingItems: ["Finish implementation", "Run full verification"], + verificationEvidence: ["npm run test:unit -- autonomous-solver"], + pdd: pdd(), + }); + + const latest = readLatestAutonomousSolverCheckpoint( + project, + "execute-task", + "M001/S01/T01", + ); + const projection = readFileSync( + join(project, ".sf/runtime/autonomous-solver/LOOP.md"), + "utf-8", + ); + const history = readFileSync( + join(project, ".sf/runtime/autonomous-solver/iterations.jsonl"), + "utf-8", + ); + + expect(latest.outcome).toBe("continue"); + expect(projection).toContain("## PDD Contract"); + expect(projection).toContain("Purpose: Protect autonomous execution"); + expect(projection).toContain("Finish implementation"); + expect(JSON.parse(history.trim()).outcome).toBe("continue"); + }); + + test("buildAutonomousSolverPromptBlock_names_pdd_and_checkpoint_outcomes", () => { + const prompt = buildAutonomousSolverPromptBlock({ + unitType: "execute-task", + unitId: "M001/S01/T01", + iteration: 3, + maxIterations: 12, + }); + + expect(prompt).toContain("/sf autonomous iteration 3 of 12"); + expect(prompt).toContain("sf_autonomous_checkpoint"); + expect(prompt).toContain("Purpose:"); + expect(prompt).toContain("Consumer:"); + expect(prompt).toContain("Failure boundary:"); + expect(prompt).toContain('outcome: "decide"'); + }); + + test("assessAutonomousSolverTurn_missing_checkpoint_retries_once_then_pauses", () => { + const project = makeProject(); + beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01"); + + const first = assessAutonomousSolverTurn( + project, + "execute-task", + "M001/S01/T01", + ); + expect(first.action).toBe("missing-checkpoint-retry"); + + recordAutonomousSolverMissingCheckpointRetry( + project, + "execute-task", + "M001/S01/T01", + ); + const second = assessAutonomousSolverTurn( + project, + "execute-task", + "M001/S01/T01", + ); + expect(second.action).toBe("pause"); + expect(second.reason).toBe("solver-missing-checkpoint"); + }); + + test("assessAutonomousSolverTurn_continue_and_blocked_are_authoritative", () => { + const project = makeProject(); + beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01"); + appendAutonomousSolverCheckpoint(project, { + unitType: "execute-task", + unitId: "M001/S01/T01", + outcome: "continue", + summary: "More work remains.", + completedItems: ["First pass"], + remainingItems: ["Second pass"], + verificationEvidence: ["npx vitest run focused.test.mjs"], + pdd: pdd(), + }); + expect( + assessAutonomousSolverTurn(project, "execute-task", "M001/S01/T01") + .action, + ).toBe("continue"); + + beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01"); + appendAutonomousSolverCheckpoint(project, { + unitType: "execute-task", + unitId: "M001/S01/T01", + outcome: "blocked", + summary: "Credentials unavailable.", + completedItems: [], + remainingItems: ["Wait for credentials"], + verificationEvidence: ["provider returned 401"], + blockerReason: "Missing provider token.", + pdd: pdd(), + }); + const blocked = assessAutonomousSolverTurn( + project, + "execute-task", + "M001/S01/T01", + ); + expect(blocked.action).toBe("pause"); + expect(blocked.reason).toBe("solver-blocked"); + }); + + test("assessAutonomousSolverTurn_max_iterations_pauses_before_unbounded_retry", () => { + const project = makeProject(); + beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01", { + maxIterations: 1, + }); + appendAutonomousSolverCheckpoint(project, { + unitType: "execute-task", + unitId: "M001/S01/T01", + outcome: "continue", + summary: "More work remains.", + completedItems: ["First pass"], + remainingItems: ["Second pass"], + verificationEvidence: ["npx vitest run focused.test.mjs"], + pdd: pdd(), + }); + + const result = assessAutonomousSolverTurn( + project, + "execute-task", + "M001/S01/T01", + ); + expect(result.action).toBe("pause"); + expect(result.reason).toBe("solver-max-iterations"); + }); + + test("steering_append_consume_is_idempotent", () => { + const project = makeProject(); + appendAutonomousSolverSteering(project, "Prefer runtime enforcement."); + appendAutonomousSolverSteering(project, "Keep /sf autonomous only."); + + const first = consumePendingAutonomousSolverSteering(project); + const second = consumePendingAutonomousSolverSteering(project); + + expect(first).toHaveLength(2); + expect(first[0].text).toBe("Prefer runtime enforcement."); + expect(second).toHaveLength(0); + }); + + test("getConfiguredAutonomousSolverMaxIterations_clamps_preference", () => { + expect(getConfiguredAutonomousSolverMaxIterations()).toBe(12); + expect( + getConfiguredAutonomousSolverMaxIterations({ + auto_supervisor: { solver_max_iterations: 0 }, + }), + ).toBe(1); + expect( + getConfiguredAutonomousSolverMaxIterations({ + auto_supervisor: { solver_max_iterations: 150 }, + }), + ).toBe(100); + }); +}); diff --git a/src/resources/extensions/sf/tests/doctor-providers.test.mjs b/src/resources/extensions/sf/tests/doctor-providers.test.mjs index b55f413dc..289ac4229 100644 --- a/src/resources/extensions/sf/tests/doctor-providers.test.mjs +++ b/src/resources/extensions/sf/tests/doctor-providers.test.mjs @@ -4,6 +4,7 @@ import { tmpdir } from "node:os"; import { join } from "node:path"; import { afterEach, describe, test } from "vitest"; import { runProviderChecks } from "../doctor-providers.js"; +import { PROVIDER_REGISTRY } from "../key-manager.js"; const originalCwd = process.cwd(); const originalEnv = { ...process.env }; @@ -36,6 +37,13 @@ function makePreferencesProject(globalPreferences) { } describe("doctor provider checks", () => { + test("provider registry includes direct routed model providers used by SF preferences", () => { + const providers = new Map(PROVIDER_REGISTRY.map((p) => [p.id, p])); + + assert.equal(providers.get("minimax")?.envVar, "MINIMAX_API_KEY"); + assert.equal(providers.get("kimi-coding")?.envVar, "KIMI_API_KEY"); + }); + test("runProviderChecks_when_any_configured_llm_route_is_usable_does_not_require_every_preferred_provider", () => { makePreferencesProject( [ diff --git a/src/resources/extensions/sf/tests/uok-parity-report.test.mjs b/src/resources/extensions/sf/tests/uok-parity-report.test.mjs index ce6b553d2..635ac635a 100644 --- a/src/resources/extensions/sf/tests/uok-parity-report.test.mjs +++ b/src/resources/extensions/sf/tests/uok-parity-report.test.mjs @@ -349,6 +349,30 @@ test("runAutoLoopWithUok_success_writes_balanced_run_id_heartbeats", async () => assert.equal(hasCurrentParityWarning(report), false); }); +test("runAutoLoopWithUok_ignores_legacy_disabled_flag_and_uses_kernel_path", async () => { + const projectRoot = makeProject(); + const state = { basePath: projectRoot, autoStartTime: NOW }; + let kernelRan = false; + + await runAutoLoopWithUok({ + ctx: testCtx("session-force-uok"), + pi: {}, + s: state, + deps: testDeps({ uok: { enabled: false } }), + async runKernelLoop(_ctx, _pi, _s, deps) { + kernelRan = Boolean(deps.uokObserver); + }, + async runStandardLoop() { + throw new Error("legacy standard loop should not run"); + }, + }); + + assert.equal(kernelRan, true); + const events = readProjectParityEvents(projectRoot); + assert.equal(events[0].path, "uok-kernel"); + assert.equal(events[1].path, "uok-kernel"); +}); + test("runAutoLoopWithUok_throw_still_writes_exit_and_current_error_report", async () => { const projectRoot = makeProject(); const state = { basePath: projectRoot, autoStartTime: NOW }; diff --git a/src/resources/extensions/sf/uok/kernel.js b/src/resources/extensions/sf/uok/kernel.js index 4d511d308..39156989b 100644 --- a/src/resources/extensions/sf/uok/kernel.js +++ b/src/resources/extensions/sf/uok/kernel.js @@ -30,13 +30,13 @@ function refreshParityReport(basePath) { return null; } } -function resolveKernelPathLabel(flags) { - return flags.enabled ? "uok-kernel" : "standard-loop"; +function resolveKernelPathLabel() { + return "uok-kernel"; } export async function runAutoLoopWithUok(args) { - const { ctx, pi, s, deps, runKernelLoop, runStandardLoop } = args; + const { ctx, pi, s, deps, runKernelLoop } = args; const prefs = deps.loadEffectiveSFPreferences()?.preferences; - const flags = resolveUokFlags(prefs); + const flags = { ...resolveUokFlags(prefs), enabled: true }; const previousReport = refreshParityReport(s.basePath); const runId = `uok-${randomUUID()}`; s.currentUokRunId = runId; @@ -58,7 +58,7 @@ export async function runAutoLoopWithUok(args) { recordUokRunStart({ runId, sessionId: ctx.sessionManager?.getSessionId?.(), - path: resolveKernelPathLabel(flags), + path: resolveKernelPathLabel(), flags: { ...flags }, startedAt, }); @@ -67,7 +67,7 @@ export async function runAutoLoopWithUok(args) { ts: startedAt, runId, sessionId: ctx.sessionManager?.getSessionId?.(), - path: resolveKernelPathLabel(flags), + path: resolveKernelPathLabel(), flags: { ...flags }, phase: "enter", }); @@ -85,26 +85,20 @@ export async function runAutoLoopWithUok(args) { }), ); } - const decoratedDeps = flags.enabled - ? { - ...deps, - uokObserver: createTurnObserver({ - basePath: s.basePath, - gitAction: flags.gitopsTurnAction, - gitPush: flags.gitopsTurnPush, - enableAudit: flags.auditEnvelope, - enableGitops: flags.gitops, - }), - } - : deps; + const decoratedDeps = { + ...deps, + uokObserver: createTurnObserver({ + basePath: s.basePath, + gitAction: flags.gitopsTurnAction, + gitPush: flags.gitopsTurnPush, + enableAudit: flags.auditEnvelope, + enableGitops: flags.gitops, + }), + }; let status = "ok"; let error; try { - if (flags.enabled) { - await runKernelLoop(ctx, pi, s, decoratedDeps); - } else { - await runStandardLoop(ctx, pi, s, deps); - } + await runKernelLoop(ctx, pi, s, decoratedDeps); } catch (err) { status = "error"; error = err instanceof Error ? err.message : String(err); @@ -115,7 +109,7 @@ export async function runAutoLoopWithUok(args) { recordUokRunExit({ runId, sessionId: ctx.sessionManager?.getSessionId?.(), - path: resolveKernelPathLabel(flags), + path: resolveKernelPathLabel(), flags: { ...flags }, status, endedAt, @@ -126,7 +120,7 @@ export async function runAutoLoopWithUok(args) { ts: endedAt, runId, sessionId: ctx.sessionManager?.getSessionId?.(), - path: resolveKernelPathLabel(flags), + path: resolveKernelPathLabel(), flags: { ...flags }, phase: "exit", status, diff --git a/src/tests/resource-loader.test.ts b/src/tests/resource-loader.test.ts index 5a39345c8..d1c8ac6c1 100644 --- a/src/tests/resource-loader.test.ts +++ b/src/tests/resource-loader.test.ts @@ -64,6 +64,27 @@ test("getExtensionKey normalizes top-level .ts and .js entry names to the same k ); }); +test("withResourceSyncLock removes a stale owner lock before running work", async () => { + const tmp = mkdtempSync(join(tmpdir(), "sf-resource-loader-lock-")); + const lockDir = join(tmp, ".resource-sync.lock"); + + afterEach(() => { + rmSync(tmp, { recursive: true, force: true }); + }); + + mkdirSync(lockDir, { recursive: true }); + writeFileSync(join(lockDir, "owner"), "999999999\n"); + + const { withResourceSyncLock } = await import("../resource-loader.ts"); + let ran = false; + withResourceSyncLock(tmp, () => { + ran = true; + }); + + assert.equal(ran, true); + assert.equal(existsSync(lockDir), false); +}); + test("hasStaleCompiledExtensionSiblings only flags top-level .ts/.js sibling pairs", async (_t) => { const { hasStaleCompiledExtensionSiblings } = await import( "../resource-loader.ts"