sf snapshot: uncommitted changes after 61m inactivity

This commit is contained in:
Mikael Hugo 2026-05-06 03:25:43 +02:00
parent a1fd6cfc05
commit 152da756a1
21 changed files with 1427 additions and 103 deletions

View file

@ -56,6 +56,36 @@ function formatCost(snapshot: QuerySnapshot): string {
return `$${total.toFixed(4)}${workers > 0 ? ` (${workers} worker${workers === 1 ? "" : "s"})` : ""}`;
}
function readSolverStatus(basePath: string): string | null {
let state: Record<string, any>;
try {
state = JSON.parse(
readFileSync(
join(basePath, ".sf", "runtime", "autonomous-solver", "active.json"),
"utf-8",
),
);
} catch {
return null;
}
const checkpoint = state.latestCheckpoint ?? {};
const parts = [
`${state.unitType ?? "unit"} ${state.unitId ?? "n/a"}`,
`iter ${state.iteration ?? "?"}/${state.maxIterations ?? "?"}`,
`outcome ${checkpoint.outcome ?? "none"}`,
];
const remaining = Array.isArray(checkpoint.remainingItems)
? checkpoint.remainingItems.length
: null;
if (remaining !== null) parts.push(`${remaining} remaining`);
if (checkpoint.blockerReason)
parts.push(`blocker: ${checkpoint.blockerReason}`);
if (checkpoint.decisionQuestion)
parts.push(`decision: ${checkpoint.decisionQuestion}`);
if (checkpoint.summary) parts.push(String(checkpoint.summary));
return parts.join(" · ");
}
function latestJsonlFile(dir: string): string | null {
try {
const entries = readdirSync(dir)
@ -135,7 +165,11 @@ function formatModel(model: CurrentModel | null): string {
export function renderLiveStatus(
snapshot: QuerySnapshot,
opts: { model: CurrentModel | null; recentEvents: string[] },
opts: {
basePath?: string;
model: CurrentModel | null;
recentEvents: string[];
},
): string {
const lines: string[] = [];
lines.push("SF Status");
@ -149,6 +183,8 @@ export function renderLiveStatus(
lines.push(`Dispatch: ${formatDispatch(snapshot)}`);
lines.push(`Cost: ${formatCost(snapshot)}`);
lines.push(`Model: ${formatModel(opts.model)}`);
const solverStatus = opts.basePath ? readSolverStatus(opts.basePath) : null;
if (solverStatus) lines.push(`Solver: ${solverStatus}`);
lines.push("");
lines.push("Last Events:");
if (opts.recentEvents.length === 0) {
@ -177,6 +213,7 @@ async function buildStatusText(
.map(formatMergedLogEvent);
return renderLiveStatus(snapshot, {
basePath,
model: getCurrentModel(basePath, sfHome),
recentEvents,
});

View file

@ -237,6 +237,100 @@ export function getNewerManagedResourceVersion(
: null;
}
const RESOURCE_SYNC_LOCK_STALE_MS = 120_000;
const RESOURCE_SYNC_LOCK_TIMEOUT_MS = 60_000;
const RESOURCE_SYNC_LOCK_POLL_MS = 100;
function sleepSync(ms: number): void {
const signal = new Int32Array(new SharedArrayBuffer(4));
Atomics.wait(signal, 0, 0, ms);
}
function readLockPid(lockDir: string): number | null {
try {
const raw = readFileSync(join(lockDir, "owner"), "utf-8").trim();
const pid = Number.parseInt(raw, 10);
return Number.isFinite(pid) && pid > 0 ? pid : null;
} catch {
return null;
}
}
function isProcessAlive(pid: number): boolean {
try {
process.kill(pid, 0);
return true;
} catch {
return false;
}
}
function removeStaleResourceSyncLock(lockDir: string): boolean {
if (!existsSync(lockDir)) return true;
const pid = readLockPid(lockDir);
let oldEnough = false;
try {
oldEnough =
Date.now() - statSync(lockDir).mtimeMs > RESOURCE_SYNC_LOCK_STALE_MS;
} catch {
oldEnough = true;
}
const ownerDead = pid !== null && !isProcessAlive(pid);
if (!oldEnough && !ownerDead) return false;
try {
rmSync(lockDir, { recursive: true, force: true, maxRetries: 3 });
return true;
} catch {
return false;
}
}
/**
* Serialise writes to the shared managed resource tree.
*
* Purpose: prevent parallel SF launches from deleting/copying the same
* ~/.sf/agent/extensions subtree concurrently, which can surface as transient
* ENOTEMPTY/ENOENT failures during startup.
*
* Consumer: initResources before it prunes and copies bundled resources.
*/
export function withResourceSyncLock<T>(
agentDir: string,
work: () => T,
timeoutMs = RESOURCE_SYNC_LOCK_TIMEOUT_MS,
): T {
const lockDir = join(agentDir, ".resource-sync.lock");
const startedAt = Date.now();
while (true) {
try {
mkdirSync(lockDir);
writeFileSync(join(lockDir, "owner"), `${process.pid}\n`);
break;
} catch {
removeStaleResourceSyncLock(lockDir);
if (Date.now() - startedAt > timeoutMs) {
throw new Error(
`Timed out waiting for SF resource sync lock: ${lockDir}`,
);
}
sleepSync(RESOURCE_SYNC_LOCK_POLL_MS);
}
}
try {
return work();
} finally {
try {
rmSync(lockDir, { recursive: true, force: true, maxRetries: 3 });
} catch {
/* non-fatal: stale-lock cleanup on next launch handles this */
}
}
}
/**
* Recursively makes all files and directories under dirPath owner-writable.
*
@ -709,7 +803,10 @@ function pruneRemovedBundledExtensions(
*/
export function initResources(agentDir: string): void {
mkdirSync(agentDir, { recursive: true });
withResourceSyncLock(agentDir, () => initResourcesUnlocked(agentDir));
}
function initResourcesUnlocked(agentDir: string): void {
const currentVersion = getBundledSfVersion();
const manifest = readManagedResourceManifest(agentDir);
const extensionsDir = join(agentDir, "extensions");

View file

@ -14,6 +14,7 @@ import {
getRtkSessionSavings,
} from "../shared/rtk-session-stats.js";
import { makeUI } from "../shared/tui.js";
import { readAutonomousSolverState } from "./autonomous-solver.js";
import { getErrorMessage } from "./error-utils.js";
import { getLedger, getProjectTotals } from "./metrics.js";
import { getActiveHook } from "./post-unit-hooks.js";
@ -145,6 +146,25 @@ function _peekNext(unitType, state) {
return "";
}
}
function formatSolverWidgetLine(basePath, theme, width, pad) {
const solver = readAutonomousSolverState(basePath);
if (!solver?.unitType || !solver?.unitId) return null;
const checkpoint = solver.latestCheckpoint ?? {};
const remaining = Array.isArray(checkpoint.remainingItems)
? checkpoint.remainingItems.length
: 0;
const issue = checkpoint.blockerReason || checkpoint.decisionQuestion || "";
const text = [
`solver ${solver.iteration ?? "?"}/${solver.maxIterations ?? "?"}`,
checkpoint.outcome ? `outcome ${checkpoint.outcome}` : "outcome none",
`${remaining} remaining`,
issue ? String(issue) : "",
]
.filter(Boolean)
.join(" · ");
return truncateToWidth(`${pad}${theme.fg("dim", text)}`, width, "…");
}
/**
* Describe what the next unit will be, based on current state.
*/
@ -727,6 +747,13 @@ export function updateProgressWidget(
lines.push(
rightAlign(actionLeft, theme.fg("dim", phaseLabel), width),
);
const solverLine = formatSolverWidgetLine(
accessors.getBasePath(),
theme,
width,
pad,
);
if (solverLine) lines.push(solverLine);
// Progress bar
const roadmapSlices = mid ? getRoadmapSlicesSync() : null;
if (roadmapSlices) {
@ -814,6 +841,13 @@ export function updateProgressWidget(
const tierTag = tierBadge ? theme.fg("dim", `[${tierBadge}] `) : "";
const phaseBadge = `${tierTag}${theme.fg("dim", phaseLabel)}`;
lines.push(rightAlign(actionLeft, phaseBadge, width));
const solverLine = formatSolverWidgetLine(
accessors.getBasePath(),
theme,
width,
pad,
);
if (solverLine) lines.push(solverLine);
lines.push("");
// Two-column body
const minTwoColWidth = 76;

View file

@ -13,7 +13,6 @@ export {
} from "./auto/infra-errors.js";
export {
autoLoop,
runStandardAutoLoop,
runUokKernelLoop,
} from "./auto/loop.js";
export {

View file

@ -55,7 +55,6 @@ import {
import { DISPATCH_RULES, resolveDispatch } from "./auto-dispatch.js";
import {
_resetPendingResolve,
autoLoop,
isSessionSwitchInFlight,
resolveAgentEnd,
resolveAgentEndCancelled,
@ -336,8 +335,7 @@ export {
/** Wrapper: register SIGTERM handler and store reference. */
function registerSigtermHandler(currentBasePath) {
const prefs = loadEffectiveSFPreferences()?.preferences;
const flags = resolveUokFlags(prefs);
const pathLabel = flags.enabled ? "uok-kernel" : "standard-loop";
const flags = { ...resolveUokFlags(prefs), enabled: true };
const onSignal = () => {
// Write UOK parity exit heartbeat before process.exit(0) bypasses
// the finally block in runAutoLoopWithUok. Fixes the enter/exit
@ -346,7 +344,7 @@ function registerSigtermHandler(currentBasePath) {
ts: new Date().toISOString(),
...(s.currentUokRunId ? { runId: s.currentUokRunId } : {}),
sessionId: s.cmdCtx?.sessionManager?.getSessionId?.(),
path: pathLabel,
path: "uok-kernel",
flags: { ...flags },
phase: "exit",
status: "signal",
@ -1734,7 +1732,6 @@ export async function startAuto(ctx, pi, base, verboseMode, options) {
s,
deps: buildLoopDeps(),
runKernelLoop: runUokKernelLoop,
runStandardLoop: autoLoop,
});
cleanupAfterLoopExit(ctx);
return;
@ -1783,7 +1780,6 @@ export async function startAuto(ctx, pi, base, verboseMode, options) {
s,
deps: buildLoopDeps(),
runKernelLoop: runUokKernelLoop,
runStandardLoop: autoLoop,
});
cleanupAfterLoopExit(ctx);
}

View file

@ -248,16 +248,7 @@ function resolveDispatchNodeKind(unitType, sidecarItem) {
}
return "unit";
}
async function runUnitPhaseViaContract(
dispatchContract,
ic,
iterData,
loopState,
sidecarItem,
) {
if (dispatchContract === "standard-direct") {
return runUnitPhase(ic, iterData, loopState, sidecarItem);
}
async function runUnitPhaseViaContract(ic, iterData, loopState, sidecarItem) {
const scheduler = new ExecutionGraphScheduler();
let outcome = null;
const executeNode = async () => {
@ -307,8 +298,7 @@ async function enforceMinRequestInterval(s, prefs) {
* This is the linear replacement for the recursive
* dispatchNextUnit handleAgentEnd dispatchNextUnit chain.
*/
export async function autoLoop(ctx, pi, s, deps, options) {
const dispatchContract = options?.dispatchContract ?? "standard-direct";
export async function autoLoop(ctx, pi, s, deps) {
debugLog("autoLoop", { phase: "enter" });
let iteration = 0;
// Load persisted stuck state so counters survive session restarts (#3704)
@ -571,7 +561,6 @@ export async function autoLoop(ctx, pi, s, deps, options) {
// ── Unit execution (shared with dev path) ──
await enforceMinRequestInterval(s, ic.prefs);
const unitPhaseResult = await runUnitPhaseViaContract(
dispatchContract,
ic,
iterData,
loopState,
@ -590,6 +579,10 @@ export async function autoLoop(ctx, pi, s, deps, options) {
finishTurn("stopped", "execution", "unit-break");
break;
}
if (unitPhaseResult.action === "continue") {
finishTurn("retry");
continue;
}
// ── Verify first, then reconcile (only mark complete on pass) ──
debugLog("autoLoop", {
phase: "custom-engine-verify",
@ -875,7 +868,6 @@ export async function autoLoop(ctx, pi, s, deps, options) {
}
await enforceMinRequestInterval(s, ic.prefs);
const unitPhaseResult = await runUnitPhaseViaContract(
dispatchContract,
ic,
iterData,
loopState,
@ -895,6 +887,10 @@ export async function autoLoop(ctx, pi, s, deps, options) {
finishTurn("stopped", "execution", "unit-break");
break;
}
if (unitPhaseResult.action === "continue") {
finishTurn("retry");
continue;
}
// ── Phase 5: Finalize ───────────────────────────────────────────────
const finalizeResult = await withPhaseTimeout(
"finalize",
@ -1117,8 +1113,5 @@ export async function autoLoop(ctx, pi, s, deps, options) {
}
// ── Dispatch-contract entry points ───────────────────────────────────────
export async function runUokKernelLoop(ctx, pi, s, deps) {
return autoLoop(ctx, pi, s, deps, { dispatchContract: "uok-scheduler" });
}
export async function runStandardAutoLoop(ctx, pi, s, deps) {
return autoLoop(ctx, pi, s, deps, { dispatchContract: "standard-direct" });
return autoLoop(ctx, pi, s, deps);
}

View file

@ -34,6 +34,16 @@ import {
formatToolCallSummary,
resetToolCallCounts,
} from "../auto-tool-tracking.js";
import {
assessAutonomousSolverTurn,
beginAutonomousSolverIteration,
buildAutonomousSolverMissingCheckpointRepairPrompt,
buildAutonomousSolverPromptBlock,
buildAutonomousSolverSteeringPromptBlock,
consumePendingAutonomousSolverSteering,
getConfiguredAutonomousSolverMaxIterations,
recordAutonomousSolverMissingCheckpointRetry,
} from "../autonomous-solver.js";
import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.js";
import { debugLog } from "../debug-logger.js";
import { PROJECT_FILES } from "../detection.js";
@ -1874,6 +1884,40 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
}
}
// Prompt char measurement
try {
const solverState = beginAutonomousSolverIteration(
s.basePath,
unitType,
unitId,
{
maxIterations: getConfiguredAutonomousSolverMaxIterations(prefs),
},
);
const steeringBlock = buildAutonomousSolverSteeringPromptBlock(
consumePendingAutonomousSolverSteering(s.basePath),
);
if (steeringBlock) {
finalPrompt = `${finalPrompt}\n\n---\n\n${steeringBlock}`;
}
finalPrompt = `${finalPrompt}\n\n---\n\n${buildAutonomousSolverPromptBlock(solverState)}`;
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "solver-iteration-start",
data: {
unitType,
unitId,
iteration: solverState.iteration,
maxIterations: solverState.maxIterations,
steeringInjected: Boolean(steeringBlock),
},
});
} catch (solverErr) {
logWarning("engine", "Autonomous solver prompt injection failed", {
error: solverErr instanceof Error ? solverErr.message : String(solverErr),
});
}
s.lastPromptCharCount = finalPrompt.length;
s.lastBaselineCharCount = undefined;
if (deps.isDbAvailable()) {
@ -2018,12 +2062,127 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
});
const unitResult = await runUnit(ctx, pi, s, unitType, unitId, finalPrompt);
s.lastUnitAgentEndMessages = unitResult.event?.messages ?? null;
let currentUnitResult = unitResult;
let solverAssessment = assessAutonomousSolverTurn(
s.basePath,
unitType,
unitId,
);
if (solverAssessment.action === "missing-checkpoint-retry") {
recordAutonomousSolverMissingCheckpointRetry(s.basePath, unitType, unitId);
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "solver-missing-checkpoint-retry",
data: {
unitType,
unitId,
iteration: solverAssessment.state?.iteration,
},
});
ctx.ui.notify(
`Autonomous solver checkpoint missing for ${unitType} ${unitId}; redispatching one repair turn.`,
"warning",
);
currentUnitResult = await runUnit(
ctx,
pi,
s,
unitType,
unitId,
buildAutonomousSolverMissingCheckpointRepairPrompt(
solverAssessment.state,
unitType,
unitId,
),
);
s.lastUnitAgentEndMessages = currentUnitResult.event?.messages ?? null;
solverAssessment = assessAutonomousSolverTurn(s.basePath, unitType, unitId);
}
const solverCheckpoint = solverAssessment.checkpoint;
if (solverCheckpoint) {
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "solver-checkpoint",
data: {
unitType,
unitId,
iteration: solverCheckpoint.iteration,
outcome: solverCheckpoint.outcome,
remainingCount: solverCheckpoint.remainingItems?.length ?? 0,
},
});
}
if (solverAssessment.action === "pause") {
const reason =
solverCheckpoint?.outcome === "decide"
? (solverCheckpoint.decisionQuestion ?? solverCheckpoint.summary)
: solverCheckpoint?.outcome === "blocked"
? (solverCheckpoint.blockerReason ?? solverCheckpoint.summary)
: solverAssessment.reason;
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType:
solverAssessment.reason === "solver-max-iterations"
? "solver-max-iterations-pause"
: "solver-pause",
data: {
unitType,
unitId,
reason: solverAssessment.reason,
iteration: solverAssessment.state?.iteration,
maxIterations: solverAssessment.state?.maxIterations,
remainingItems: solverCheckpoint?.remainingItems ?? [],
evidencePath: ".sf/runtime/autonomous-solver/LOOP.md",
},
});
ctx.ui.notify(
`Autonomous solver paused ${unitType} ${unitId}: ${reason || solverAssessment.reason}`,
"warning",
);
await deps.pauseAuto(ctx, pi);
return {
action: "break",
reason: solverAssessment.reason,
};
}
if (solverAssessment.action === "continue") {
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "solver-continue-redispatch",
data: {
unitType,
unitId,
iteration: solverAssessment.state?.iteration,
remainingItems: solverCheckpoint?.remainingItems ?? [],
evidencePath: ".sf/runtime/autonomous-solver/LOOP.md",
},
});
ctx.ui.notify(
`Autonomous solver continuing ${unitType} ${unitId}: ${solverCheckpoint?.remainingItems?.length ?? 0} item(s) remain.`,
"info",
);
return {
action: "continue",
data: {
unitStartedAt: s.currentUnit?.startedAt,
requestDispatchedAt: currentUnitResult.requestDispatchedAt,
},
};
}
debugLog("autoLoop", {
phase: "runUnit-end",
iteration: ic.iteration,
unitType,
unitId,
status: unitResult.status,
status: currentUnitResult.status,
});
// Now that runUnit has called newSession(), the session file path is correct.
const sessionFile = deps.getSessionFile(ctx);
@ -2039,20 +2198,22 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
// Tag the most recent window entry with error info for stuck detection
const lastEntry = loopState.recentUnits[loopState.recentUnits.length - 1];
if (lastEntry) {
if (unitResult.errorContext) {
if (currentUnitResult.errorContext) {
lastEntry.error =
`${unitResult.errorContext.category}:${unitResult.errorContext.message}`.slice(
`${currentUnitResult.errorContext.category}:${currentUnitResult.errorContext.message}`.slice(
0,
200,
);
} else if (
unitResult.status === "error" ||
unitResult.status === "cancelled"
currentUnitResult.status === "error" ||
currentUnitResult.status === "cancelled"
) {
lastEntry.error = `${unitResult.status}:${unitType}/${unitId}`;
} else if (unitResult.event?.messages?.length) {
lastEntry.error = `${currentUnitResult.status}:${unitType}/${unitId}`;
} else if (currentUnitResult.event?.messages?.length) {
const lastMsg =
unitResult.event.messages[unitResult.event.messages.length - 1];
currentUnitResult.event.messages[
currentUnitResult.event.messages.length - 1
];
const msgStr =
typeof lastMsg === "string" ? lastMsg : JSON.stringify(lastMsg);
if (/error|fail|exception/i.test(msgStr)) {
@ -2060,28 +2221,28 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
}
}
}
if (unitResult.status === "cancelled") {
if (currentUnitResult.status === "cancelled") {
clearDeferredCommitAfterCancelledUnit(
s,
ctx,
unitType,
unitId,
unitResult.errorContext?.message ?? "cancelled",
currentUnitResult.errorContext?.message ?? "cancelled",
);
// Provider-error pause: pauseAuto already handled cleanup and scheduled
// recovery. Don't hard-stop — just break out of the loop (#2762).
if (unitResult.errorContext?.category === "provider") {
if (currentUnitResult.errorContext?.category === "provider") {
await emitCancelledUnitEnd(
ic,
unitType,
unitId,
unitStartSeq,
unitResult.errorContext,
currentUnitResult.errorContext,
);
debugLog("autoLoop", {
phase: "exit",
reason: "provider-pause",
isTransient: unitResult.errorContext.isTransient,
isTransient: currentUnitResult.errorContext.isTransient,
});
return { action: "break", reason: "provider-pause" };
}
@ -2091,13 +2252,15 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
// Structural errors (TypeError, is not a function) are NOT transient
// and must hard-stop to avoid infinite retry loops.
if (
unitResult.errorContext?.isTransient &&
unitResult.errorContext?.category === "timeout"
currentUnitResult.errorContext?.isTransient &&
currentUnitResult.errorContext?.category === "timeout"
) {
// Session-timeout cancellations are resumable pauses: pauseAuto below preserves the auto session
// instead of routing the cancelled unit into the hard-stop path.
const isSessionCreationTimeout =
unitResult.errorContext.message?.includes("Session creation timed out");
currentUnitResult.errorContext.message?.includes(
"Session creation timed out",
);
if (isSessionCreationTimeout) {
consecutiveSessionTimeouts += 1;
const baseRetryAfterMs = 30_000;
@ -2150,7 +2313,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
unitType,
unitId,
unitStartSeq,
unitResult.errorContext,
currentUnitResult.errorContext,
);
return { action: "break", reason: "session-timeout" };
}
@ -2170,7 +2333,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
unitType,
unitId,
unitStartSeq,
unitResult.errorContext,
currentUnitResult.errorContext,
);
return { action: "break", reason: "unit-hard-timeout" };
}
@ -2190,16 +2353,16 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
unitType,
unitId,
unitStartSeq,
unitResult.errorContext,
currentUnitResult.errorContext,
);
ctx.ui.notify(
`Session creation failed for ${unitType} ${unitId}: ${unitResult.errorContext?.message ?? "unknown"}. Stopping auto-mode.`,
`Session creation failed for ${unitType} ${unitId}: ${currentUnitResult.errorContext?.message ?? "unknown"}. Stopping auto-mode.`,
"warning",
);
await deps.stopAuto(
ctx,
pi,
`Session creation failed: ${unitResult.errorContext?.message ?? "unknown"}`,
`Session creation failed: ${currentUnitResult.errorContext?.message ?? "unknown"}`,
);
debugLog("autoLoop", { phase: "exit", reason: "session-failed" });
return { action: "break", reason: "session-failed" };
@ -2276,7 +2439,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
action: "next",
data: {
unitStartedAt: s.currentUnit?.startedAt,
requestDispatchedAt: unitResult.requestDispatchedAt,
requestDispatchedAt: currentUnitResult.requestDispatchedAt,
},
};
}
@ -2330,7 +2493,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
);
}
}
if (unitResult.status !== "completed" || !artifactVerified) {
if (currentUnitResult.status !== "completed" || !artifactVerified) {
recordLearningOutcomeForUnit(
ic,
unitType,
@ -2364,7 +2527,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
data: {
unitType,
unitId,
status: unitResult.status,
status: currentUnitResult.status,
artifactVerified,
...(unitEndEntry
? {
@ -2374,8 +2537,8 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
tokens_output: unitEndEntry.tokens.output,
}
: {}),
...(unitResult.errorContext
? { errorContext: unitResult.errorContext }
...(currentUnitResult.errorContext
? { errorContext: currentUnitResult.errorContext }
: {}),
},
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
@ -2383,13 +2546,13 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
}
{
const verdict =
unitResult.status === "completed"
currentUnitResult.status === "completed"
? artifactVerified
? "success"
: "blocked"
: unitResult.status === "error"
: currentUnitResult.status === "error"
? "fail"
: unitResult.status;
: currentUnitResult.status;
const ledger = deps.getLedger();
const unitEntry = ledger?.units
? [...ledger.units]
@ -2417,7 +2580,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
}
// ── Safety harness: checkpoint cleanup or rollback ──
if (s.checkpointSha) {
if (unitResult.status === "error" && safetyConfig.auto_rollback) {
if (currentUnitResult.status === "error" && safetyConfig.auto_rollback) {
const rolled = rollbackToCheckpoint(s.basePath, unitId, s.checkpointSha);
if (rolled) {
ctx.ui.notify(
@ -2426,7 +2589,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
);
debugLog("runUnitPhase", { phase: "checkpoint-rollback", unitId });
}
} else if (unitResult.status === "error") {
} else if (currentUnitResult.status === "error") {
ctx.ui.notify(
`Unit ${unitId} failed. Pre-unit checkpoint available at ${s.checkpointSha.slice(0, 8)}`,
"warning",
@ -2443,7 +2606,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
action: "next",
data: {
unitStartedAt: s.currentUnit?.startedAt,
requestDispatchedAt: unitResult.requestDispatchedAt,
requestDispatchedAt: currentUnitResult.requestDispatchedAt,
},
};
}

View file

@ -0,0 +1,537 @@
/**
* autonomous-solver.ts PDD-shaped checkpoint contract for autonomous units.
*
* Purpose: make long-running autonomous work explicit, resumable, and auditable
* instead of relying on a single agent turn to either finish or silently drift.
*
* Consumer: auto/phases.js injects the contract into each autonomous unit, and
* bootstrap/db-tools.js records agent checkpoints via sf_autonomous_checkpoint.
*/
import {
appendFileSync,
mkdirSync,
readFileSync,
writeFileSync,
} from "node:fs";
import { dirname, join } from "node:path";
import { atomicWriteSync } from "./atomic-write.js";
import { sfRoot } from "./paths.js";
export const AUTONOMOUS_SOLVER_OUTCOMES = [
"continue",
"complete",
"blocked",
"decide",
];
const MAX_RENDERED_ITEMS = 12;
const DEFAULT_SOLVER_MAX_ITERATIONS = 12;
const MIN_SOLVER_MAX_ITERATIONS = 1;
const MAX_SOLVER_MAX_ITERATIONS = 100;
function solverDir(basePath) {
return join(sfRoot(basePath), "runtime", "autonomous-solver");
}
function statePath(basePath) {
return join(solverDir(basePath), "active.json");
}
function projectionPath(basePath) {
return join(solverDir(basePath), "LOOP.md");
}
function historyPath(basePath) {
return join(solverDir(basePath), "iterations.jsonl");
}
function steeringPath(basePath) {
return join(solverDir(basePath), "steering.jsonl");
}
function nowIso() {
return new Date().toISOString();
}
function sanitizeList(value) {
if (!Array.isArray(value)) return [];
return value.map((item) => String(item).trim()).filter(Boolean);
}
function readJson(path) {
try {
return JSON.parse(readFileSync(path, "utf-8"));
} catch {
return null;
}
}
function sameUnit(state, unitType, unitId) {
return state?.unitType === unitType && state?.unitId === unitId;
}
function clampNumber(value, min, max, fallback) {
const n = Number(value);
if (!Number.isFinite(n)) return fallback;
return Math.max(min, Math.min(max, Math.floor(n)));
}
/**
* Resolve the bounded autonomous solver iteration limit.
*
* Purpose: prevent a misconfigured or vague unit from retrying forever while
* still letting projects raise the ceiling for large, explicitly bounded work.
*
* Consumer: runUnitPhase before dispatch and preferences resolution tests.
*/
export function getConfiguredAutonomousSolverMaxIterations(preferences) {
return clampNumber(
preferences?.auto_supervisor?.solver_max_iterations,
MIN_SOLVER_MAX_ITERATIONS,
MAX_SOLVER_MAX_ITERATIONS,
DEFAULT_SOLVER_MAX_ITERATIONS,
);
}
function renderList(items, fallback) {
const list = sanitizeList(items).slice(0, MAX_RENDERED_ITEMS);
if (list.length === 0) return `- ${fallback}`;
return list.map((item) => `- ${item}`).join("\n");
}
function renderPdd(pdd = {}) {
return [
"## PDD Contract",
`- Purpose: ${pdd.purpose || "not recorded yet"}`,
`- Consumer: ${pdd.consumer || "not recorded yet"}`,
`- Contract: ${pdd.contract || "not recorded yet"}`,
`- Failure boundary: ${pdd.failureBoundary || "not recorded yet"}`,
`- Evidence: ${pdd.evidence || "not recorded yet"}`,
`- Non-goals: ${pdd.nonGoals || "not recorded yet"}`,
`- Invariants: ${pdd.invariants || "not recorded yet"}`,
`- Assumptions: ${pdd.assumptions || "not recorded yet"}`,
].join("\n");
}
function renderProjection(state) {
const checkpoint = state.latestCheckpoint ?? {};
return [
"# Autonomous Solver Loop",
"",
`- Unit: ${state.unitType} ${state.unitId}`,
`- Status: ${state.status}`,
`- Iteration: ${state.iteration} of ${state.maxIterations}`,
`- Started: ${state.startedAt}`,
`- Updated: ${state.updatedAt}`,
"",
"## Last Checkpoint",
`- Outcome: ${checkpoint.outcome ?? "none"}`,
`- Summary: ${checkpoint.summary ?? "none yet"}`,
checkpoint.blockerReason ? `- Blocker: ${checkpoint.blockerReason}` : "",
checkpoint.decisionQuestion
? `- Decision needed: ${checkpoint.decisionQuestion}`
: "",
"",
"## Completed This Iteration",
renderList(checkpoint.completedItems, "Nothing recorded yet."),
"",
"## Remaining",
renderList(
checkpoint.remainingItems,
"Unknown until the agent checkpoints.",
),
"",
renderPdd(checkpoint.pdd),
"",
"## Verification Evidence",
renderList(
checkpoint.verificationEvidence,
"No verification evidence recorded yet.",
),
"",
]
.filter((line) => line !== "")
.join("\n");
}
function writeState(basePath, state) {
const dir = solverDir(basePath);
mkdirSync(dir, { recursive: true });
atomicWriteSync(statePath(basePath), `${JSON.stringify(state, null, 2)}\n`);
atomicWriteSync(projectionPath(basePath), renderProjection(state));
}
/**
* Start or advance the persisted autonomous solver iteration for a unit.
*
* Purpose: each autonomous dispatch gets an explicit iteration number and
* durable loop projection, so retries and restarts have concrete state.
*
* Consumer: runUnitPhase before sending the unit prompt.
*/
export function beginAutonomousSolverIteration(
basePath,
unitType,
unitId,
options = {},
) {
const existing = readJson(statePath(basePath));
const priorIteration =
sameUnit(existing, unitType, unitId) && existing.status !== "complete"
? Number(existing.iteration ?? 0)
: 0;
const maxIterations = clampNumber(
options.maxIterations ?? existing?.maxIterations,
MIN_SOLVER_MAX_ITERATIONS,
MAX_SOLVER_MAX_ITERATIONS,
DEFAULT_SOLVER_MAX_ITERATIONS,
);
const state = {
unitType,
unitId,
status: "running",
iteration: Math.max(1, priorIteration + 1),
maxIterations,
startedAt: sameUnit(existing, unitType, unitId)
? existing.startedAt || nowIso()
: nowIso(),
updatedAt: nowIso(),
latestCheckpoint: sameUnit(existing, unitType, unitId)
? (existing.latestCheckpoint ?? null)
: null,
missingCheckpointRetry: null,
};
writeState(basePath, state);
return state;
}
/**
* Build the PDD autonomous solver prompt block appended to unit prompts.
*
* Purpose: bind every autonomous unit to bounded iterations, evidence, stop
* signals, and the eight PDD fields instead of open-ended hidden retries.
*
* Consumer: runUnitPhase prompt injection.
*/
export function buildAutonomousSolverPromptBlock(state) {
return [
"## Autonomous Solver Loop Contract",
"",
`You are inside /sf autonomous iteration ${state.iteration} of ${state.maxIterations} for ${state.unitType} ${state.unitId}.`,
"",
"This is SF's built-in solver loop. It is not a separate Ralph workflow. Work one bounded, useful chunk; preserve enough state for the next autonomous iteration to continue without guessing.",
"",
"Before ending the turn, call `sf_autonomous_checkpoint` with:",
'- `outcome: "complete"` only when this unit\'s normal completion tool/artifact is also done.',
'- `outcome: "continue"` when you made real progress but more autonomous iterations are needed.',
'- `outcome: "blocked"` when the next step cannot proceed without unavailable facts, credentials, or a broken environment.',
'- `outcome: "decide"` when there is a material product/architecture choice that must not be decided autonomously.',
"",
"Checkpoint the eight PDD fields every time:",
"- Purpose: why this behavior exists and what value it protects.",
"- Consumer: who or what uses it in production.",
"- Contract: the observable behavior or artifact boundary.",
"- Failure boundary: what failures must be contained or surfaced.",
"- Evidence: commands, files, tests, or runtime observations proving progress.",
"- Non-goals: what you intentionally did not solve this iteration.",
"- Invariants: rules that must remain true across iterations.",
"- Assumptions: uncertain facts you relied on and how to falsify them later.",
"",
"If you are executing an `execute-task` unit and the task is finished, `sf_task_complete` remains mandatory; `sf_autonomous_checkpoint` does not replace it.",
"If you need another iteration, leave exact remaining items in the checkpoint rather than ending with vague prose.",
].join("\n");
}
/**
* Record a solver checkpoint and update the markdown projection.
*
* Purpose: turn the agent's end-of-iteration status into structured autonomous
* state that can be inspected, gated, and resumed.
*
* Consumer: sf_autonomous_checkpoint tool.
*/
export function appendAutonomousSolverCheckpoint(basePath, params) {
const state =
readJson(statePath(basePath)) ??
beginAutonomousSolverIteration(basePath, params.unitType, params.unitId);
const checkpoint = {
ts: nowIso(),
unitType: params.unitType,
unitId: params.unitId,
iteration: sameUnit(state, params.unitType, params.unitId)
? state.iteration
: 1,
outcome: params.outcome,
summary: String(params.summary ?? "").trim(),
completedItems: sanitizeList(params.completedItems),
remainingItems: sanitizeList(params.remainingItems),
verificationEvidence: sanitizeList(params.verificationEvidence),
blockerReason: params.blockerReason
? String(params.blockerReason).trim()
: undefined,
decisionQuestion: params.decisionQuestion
? String(params.decisionQuestion).trim()
: undefined,
pdd: {
purpose: String(params.pdd?.purpose ?? "").trim(),
consumer: String(params.pdd?.consumer ?? "").trim(),
contract: String(params.pdd?.contract ?? "").trim(),
failureBoundary: String(params.pdd?.failureBoundary ?? "").trim(),
evidence: String(params.pdd?.evidence ?? "").trim(),
nonGoals: String(params.pdd?.nonGoals ?? "").trim(),
invariants: String(params.pdd?.invariants ?? "").trim(),
assumptions: String(params.pdd?.assumptions ?? "").trim(),
},
};
const nextState = {
...state,
unitType: params.unitType,
unitId: params.unitId,
status:
params.outcome === "complete"
? "complete"
: params.outcome === "blocked" || params.outcome === "decide"
? "paused"
: "running",
updatedAt: checkpoint.ts,
latestCheckpoint: checkpoint,
};
mkdirSync(dirname(historyPath(basePath)), { recursive: true });
writeFileSync(historyPath(basePath), `${JSON.stringify(checkpoint)}\n`, {
flag: "a",
});
writeState(basePath, nextState);
return checkpoint;
}
/**
* Read the current persisted autonomous solver state.
*
* Purpose: status surfaces and loop enforcement need one structured source for
* the active solver unit instead of scraping markdown projections.
*
* Consumer: /sf status, sf-progress, and runUnitPhase.
*/
export function readAutonomousSolverState(basePath) {
return readJson(statePath(basePath));
}
/**
* Record that a missing checkpoint repair dispatch has already been attempted.
*
* Purpose: enforce the checkpoint contract with one repair chance while
* preventing an unbounded missing-checkpoint redispatch loop.
*
* Consumer: runUnitPhase after the first unit turn omits sf_autonomous_checkpoint.
*/
export function recordAutonomousSolverMissingCheckpointRetry(
basePath,
unitType,
unitId,
) {
const state = readJson(statePath(basePath));
if (!sameUnit(state, unitType, unitId)) return null;
const nextState = {
...state,
status: "running",
updatedAt: nowIso(),
missingCheckpointRetry: {
iteration: state.iteration,
ts: nowIso(),
},
};
writeState(basePath, nextState);
return nextState;
}
/**
* Classify the completed solver turn into the next loop action.
*
* Purpose: make checkpoint outcomes authoritative for autonomous scheduling
* instead of letting artifact heuristics silently override blocked, decision,
* continue, or missing-checkpoint states.
*
* Consumer: runUnitPhase immediately after each unit turn.
*/
export function assessAutonomousSolverTurn(basePath, unitType, unitId) {
const state = readJson(statePath(basePath));
if (!sameUnit(state, unitType, unitId)) {
return {
action: "missing-checkpoint-retry",
reason: "solver-missing-state",
state,
};
}
const checkpoint = state.latestCheckpoint ?? null;
const hasCurrentCheckpoint =
checkpoint?.unitType === unitType &&
checkpoint?.unitId === unitId &&
Number(checkpoint?.iteration) === Number(state.iteration);
if (!hasCurrentCheckpoint) {
const alreadyRetried =
Number(state.missingCheckpointRetry?.iteration) ===
Number(state.iteration);
if (alreadyRetried) {
return {
action: "pause",
reason: "solver-missing-checkpoint",
state,
};
}
return {
action: "missing-checkpoint-retry",
reason: "solver-missing-checkpoint",
state,
};
}
if (
state.iteration >= state.maxIterations &&
checkpoint.outcome !== "complete"
) {
return {
action: "pause",
reason: "solver-max-iterations",
state,
checkpoint,
};
}
if (checkpoint.outcome === "blocked" || checkpoint.outcome === "decide") {
return {
action: "pause",
reason: `solver-${checkpoint.outcome}`,
state,
checkpoint,
};
}
return {
action: checkpoint.outcome === "continue" ? "continue" : "complete",
reason: `solver-${checkpoint.outcome}`,
state,
checkpoint,
};
}
/**
* Append user steering for the next autonomous solver iteration.
*
* Purpose: active /sf steer must redirect the next bounded iteration without
* interrupting the current tool batch or forcing an immediate agent turn.
*
* Consumer: /sf steer while autonomous mode is active.
*/
export function appendAutonomousSolverSteering(basePath, text, metadata = {}) {
const trimmed = String(text ?? "").trim();
if (!trimmed) return null;
const entry = {
id: `${Date.now()}-${Math.random().toString(16).slice(2)}`,
ts: nowIso(),
text: trimmed,
consumedAt: null,
...metadata,
};
mkdirSync(solverDir(basePath), { recursive: true });
appendFileSync(steeringPath(basePath), `${JSON.stringify(entry)}\n`);
return entry;
}
function readSteeringEntries(basePath) {
try {
return readFileSync(steeringPath(basePath), "utf-8")
.split("\n")
.filter((line) => line.trim())
.map((line) => {
try {
return JSON.parse(line);
} catch {
return null;
}
})
.filter(Boolean);
} catch {
return [];
}
}
/**
* Consume pending steering exactly once for prompt injection.
*
* Purpose: every user steering note should influence the next solver iteration
* once, without being replayed into every later dispatch.
*
* Consumer: runUnitPhase before appending the solver prompt block.
*/
export function consumePendingAutonomousSolverSteering(basePath) {
const entries = readSteeringEntries(basePath);
const pending = entries.filter((entry) => !entry.consumedAt);
if (pending.length === 0) return [];
const consumedAt = nowIso();
const rewritten = entries.map((entry) =>
entry.consumedAt ? entry : { ...entry, consumedAt },
);
mkdirSync(solverDir(basePath), { recursive: true });
atomicWriteSync(
steeringPath(basePath),
rewritten.map((entry) => JSON.stringify(entry)).join("\n") + "\n",
);
return pending;
}
/**
* Render consumed steering entries as a bounded prompt block.
*
* Purpose: keep user steering visible to the next iteration as explicit input
* while preserving the checkpoint-driven solver contract.
*
* Consumer: runUnitPhase prompt injection.
*/
export function buildAutonomousSolverSteeringPromptBlock(entries) {
const pending = Array.isArray(entries) ? entries : [];
if (pending.length === 0) return "";
return [
"## Pending User Steering",
"",
"Apply these user overrides in this solver iteration:",
...pending.map((entry) => `- ${String(entry.text ?? "").trim()}`),
].join("\n");
}
/**
* Build the one allowed repair prompt for a missing checkpoint.
*
* Purpose: give the agent a narrow chance to satisfy the solver contract before
* autonomous mode pauses for inspection.
*
* Consumer: runUnitPhase when a turn ends without a current checkpoint.
*/
export function buildAutonomousSolverMissingCheckpointRepairPrompt(
state,
unitType,
unitId,
) {
return [
"## Checkpoint Required",
"",
`Your previous autonomous turn for ${unitType} ${unitId} ended without calling sf_autonomous_checkpoint for iteration ${state?.iteration ?? "unknown"}.`,
"Do not continue implementation work in this repair turn.",
"Inspect the work you just performed, then call sf_autonomous_checkpoint with the correct outcome and all eight PDD fields.",
"If no useful progress happened, use outcome=blocked and explain why.",
].join("\n");
}
/**
* Return the latest solver checkpoint for a unit, if one exists.
*
* Purpose: let autonomous finalization react to semantic blocked/decision
* outcomes without scraping prose from model output.
*
* Consumer: runUnitPhase after runUnit returns.
*/
export function readLatestAutonomousSolverCheckpoint(
basePath,
unitType,
unitId,
) {
const state = readJson(statePath(basePath));
if (!sameUnit(state, unitType, unitId)) return null;
return state.latestCheckpoint ?? null;
}

View file

@ -1,6 +1,10 @@
import { Type } from "@sinclair/typebox";
import { StringEnum } from "@singularity-forge/pi-ai";
import { Text } from "@singularity-forge/pi-tui";
import {
AUTONOMOUS_SOLVER_OUTCOMES,
appendAutonomousSolverCheckpoint,
} from "../autonomous-solver.js";
import {
claimReservedId,
findMilestoneIds,
@ -877,6 +881,160 @@ export function registerDbTools(pi) {
);
},
});
// ─── sf_autonomous_checkpoint ───────────────────────────────────────
const autonomousCheckpointExecute = async (
_toolCallId,
params,
_signal,
_onUpdate,
_ctx,
) => {
try {
const checkpoint = appendAutonomousSolverCheckpoint(
process.cwd(),
params,
);
return {
content: [
{
type: "text",
text: `Recorded autonomous checkpoint for ${checkpoint.unitType} ${checkpoint.unitId}: ${checkpoint.outcome}`,
},
],
details: {
operation: "autonomous_checkpoint",
unitType: checkpoint.unitType,
unitId: checkpoint.unitId,
iteration: checkpoint.iteration,
outcome: checkpoint.outcome,
},
};
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logError("tool", `sf_autonomous_checkpoint tool failed: ${msg}`, {
tool: "sf_autonomous_checkpoint",
error: String(err),
});
return {
content: [
{
type: "text",
text: `Error in sf_autonomous_checkpoint: ${msg}`,
},
],
details: { operation: "autonomous_checkpoint", error: msg },
};
}
};
pi.registerTool({
name: "sf_autonomous_checkpoint",
label: "Autonomous Checkpoint",
description:
"Record a PDD-shaped autonomous solver checkpoint for the current unit. " +
"Use this before ending every /sf autonomous unit turn to make progress, blockers, decisions, and remaining work explicit.",
promptSnippet:
"Checkpoint autonomous solver progress with PDD fields and semantic outcome",
promptGuidelines: [
"Call sf_autonomous_checkpoint before ending an autonomous unit turn.",
"Use outcome=complete only when the normal unit completion artifact/tool is also complete.",
"Use outcome=continue when you made real progress but the unit needs another autonomous iteration.",
"Use outcome=blocked for missing facts, credentials, broken environment, or impossible next steps.",
"Use outcome=decide for material product or architecture choices that autonomous mode must not decide silently.",
"Fill all eight PDD fields: purpose, consumer, contract, failureBoundary, evidence, nonGoals, invariants, assumptions.",
],
parameters: Type.Object({
unitType: Type.String({
description: "Current autonomous unit type, e.g. execute-task",
}),
unitId: Type.String({
description: "Current autonomous unit id, e.g. M010/S05/T02",
}),
outcome: Type.Union(
AUTONOMOUS_SOLVER_OUTCOMES.map((outcome) => Type.Literal(outcome)),
{
description:
"Semantic result for this iteration: continue, complete, blocked, or decide",
},
),
summary: Type.String({
description: "Concise statement of what happened this iteration",
}),
completedItems: Type.Array(Type.String(), {
description: "Concrete items completed in this iteration",
}),
remainingItems: Type.Array(Type.String(), {
description: "Exact remaining work for the next autonomous iteration",
}),
verificationEvidence: Type.Array(Type.String(), {
description:
"Commands, files, tests, screenshots, or observations that prove progress",
}),
blockerReason: Type.Optional(
Type.String({
description: "Required when outcome=blocked",
}),
),
decisionQuestion: Type.Optional(
Type.String({
description: "Required when outcome=decide",
}),
),
pdd: Type.Object({
purpose: Type.String({ description: "Why this behavior exists" }),
consumer: Type.String({
description: "Who or what uses it in production",
}),
contract: Type.String({
description: "Observable behavior or artifact boundary",
}),
failureBoundary: Type.String({
description: "Failures that must be contained or surfaced",
}),
evidence: Type.String({
description: "Proof gathered this iteration",
}),
nonGoals: Type.String({
description: "What is intentionally not solved here",
}),
invariants: Type.String({
description: "Rules that must remain true across iterations",
}),
assumptions: Type.String({
description: "Uncertain facts and how to falsify them",
}),
}),
}),
execute: autonomousCheckpointExecute,
renderCall(args, theme) {
let text = theme.fg("toolTitle", theme.bold("sf_autonomous_checkpoint "));
if (args.outcome) text += theme.fg("accent", `[${args.outcome}] `);
if (args.unitType || args.unitId) {
text += theme.fg(
"muted",
`${args.unitType ?? "unit"} ${args.unitId ?? ""}`.trim(),
);
}
return new Text(text, 0, 0);
},
renderResult(result, _options, theme) {
const d = result.details;
if (result.isError || d?.error) {
return new Text(
theme.fg("error", `Error: ${d?.error ?? "unknown"}`),
0,
0,
);
}
return new Text(
theme.fg(
"success",
`Checkpoint ${d?.outcome ?? "recorded"} · ${d?.unitType ?? ""} ${d?.unitId ?? ""}`.trim(),
),
0,
0,
);
},
});
// ─── sf_plan_milestone ────────────────────────────────────────────────
const planMilestoneExecute = async (
_toolCallId,

View file

@ -8,6 +8,7 @@ import { existsSync, mkdirSync, readFileSync } from "node:fs";
import { join } from "node:path";
import { checkRemoteAutoSession, isAutoActive } from "./auto.js";
import { getAutoWorktreePath } from "./auto-worktree.js";
import { appendAutonomousSolverSteering } from "./autonomous-solver.js";
import {
appendCapture,
hasPendingCaptures,
@ -436,29 +437,13 @@ export async function handleSteer(change, ctx, pi) {
autoRunning && mid !== "none" ? getAutoWorktreePath(basePath, mid) : null;
const targetPath = wtPath ?? basePath;
await appendOverride(targetPath, change, appliedAt);
appendAutonomousSolverSteering(targetPath, change, { appliedAt });
const overrideLoc = wtPath
? "worktree `.sf/OVERRIDES.md`"
: "`.sf/OVERRIDES.md`";
if (isAutoActive()) {
pi.sendMessage(
{
customType: "sf-hard-steer",
content: [
"HARD STEER — User override registered.",
"",
`**Override:** ${change}`,
"",
`This override has been saved to ${overrideLoc} and will be injected into all future task prompts.`,
"A document rewrite unit will run before the next task to propagate this change across all active plan documents.",
"",
"If you are mid-task, finish your current work respecting this override. The next dispatched unit will be a document rewrite.",
].join("\n"),
display: false,
},
{ triggerTurn: true },
);
if (autoRunning) {
ctx.ui.notify(
`Override registered (${overrideLoc}): "${change}". Will be applied before next task dispatch.`,
`Override registered (${overrideLoc}): "${change}". Will be injected into the next solver iteration.`,
"info",
);
} else {

View file

@ -125,6 +125,7 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea
- `soft_timeout_minutes`: minutes before the supervisor issues a soft warning (default: 20).
- `idle_timeout_minutes`: minutes of inactivity before the supervisor intervenes (default: 10).
- `hard_timeout_minutes`: minutes before the supervisor forces termination (default: 30).
- `solver_max_iterations`: maximum autonomous solver iterations for one unit before pausing (default: `12`, min: `1`, max: `100`).
- `completion_nudge_after`: tool calls in a complete-slice unit before nudging the agent to call `sf_slice_complete` (default: 10; set `0` to disable).
- `runaway_guard_enabled`: enable active-loop diagnosis for long-running units (default: `true`).
- `runaway_tool_call_warning`: unit tool calls before a runaway warning (default: `60`; set `0` to disable this signal).

View file

@ -13,6 +13,7 @@
"kill_agent",
"memory_query",
"read",
"sf_autonomous_checkpoint",
"sf_complete_milestone",
"sf_decision_save",
"sf_exec",

View file

@ -60,6 +60,20 @@ export const PROVIDER_REGISTRY = [
envVar: "MISTRAL_API_KEY",
dashboardUrl: "console.mistral.ai",
},
{
id: "minimax",
label: "MiniMax",
category: "llm",
envVar: "MINIMAX_API_KEY",
dashboardUrl: "api.minimax.io",
},
{
id: "kimi-coding",
label: "Kimi Coding",
category: "llm",
envVar: "KIMI_API_KEY",
dashboardUrl: "platform.moonshot.ai",
},
{
id: "zai",
label: "ZAI",
@ -235,7 +249,8 @@ export function describeCredential(cred) {
* Get the auth.json path.
*/
export function getAuthPath() {
return join(process.env.HOME ?? "~", ".sf", "agent", "auth.json");
const sfHome = process.env.SF_HOME || join(process.env.HOME ?? "~", ".sf");
return join(sfHome, "agent", "auth.json");
}
/**
* Create an AuthStorage instance for key management.

View file

@ -692,6 +692,11 @@ export function resolveAutoSupervisorConfig() {
soft_timeout_minutes: configured.soft_timeout_minutes ?? 20,
idle_timeout_minutes: configured.idle_timeout_minutes ?? 10,
hard_timeout_minutes: configured.hard_timeout_minutes ?? 30,
solver_max_iterations: Number.isFinite(
Number(configured.solver_max_iterations),
)
? Math.max(1, Math.min(100, Number(configured.solver_max_iterations)))
: 12,
completion_nudge_after: configured.completion_nudge_after ?? 10,
runaway_guard_enabled: configured.runaway_guard_enabled ?? true,
runaway_tool_call_warning:

View file

@ -780,6 +780,16 @@ export function validatePreferences(preferences) {
"auto_supervisor.hard_timeout_minutes must be a non-negative number",
);
}
if (as.solver_max_iterations !== undefined) {
const val = Number(as.solver_max_iterations);
if (!Number.isNaN(val) && val >= 1 && val <= 100) {
validatedAs.solver_max_iterations = Math.floor(val);
} else {
errors.push(
"auto_supervisor.solver_max_iterations must be a number from 1 to 100",
);
}
}
if (as.phase_timeout_minutes !== undefined) {
const val = Number(as.phase_timeout_minutes);
if (!Number.isNaN(val) && val >= 0)

View file

@ -14,6 +14,7 @@ test("resolveAutoSupervisorConfig provides safe timeout defaults", () => {
assert.equal(supervisor.soft_timeout_minutes, 20);
assert.equal(supervisor.idle_timeout_minutes, 10);
assert.equal(supervisor.hard_timeout_minutes, 30);
assert.equal(supervisor.solver_max_iterations, 12);
});
test("writeUnitRuntimeRecord persists progress and recovery metadata defaults", () => {

View file

@ -0,0 +1,245 @@
import { mkdtempSync, readFileSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, expect, test } from "vitest";
import {
appendAutonomousSolverCheckpoint,
appendAutonomousSolverSteering,
assessAutonomousSolverTurn,
beginAutonomousSolverIteration,
buildAutonomousSolverPromptBlock,
consumePendingAutonomousSolverSteering,
getConfiguredAutonomousSolverMaxIterations,
readLatestAutonomousSolverCheckpoint,
recordAutonomousSolverMissingCheckpointRetry,
} from "../autonomous-solver.js";
let tempDirs = [];
function makeProject() {
const dir = mkdtempSync(join(tmpdir(), "sf-autonomous-solver-"));
tempDirs.push(dir);
return dir;
}
afterEach(() => {
for (const dir of tempDirs) {
rmSync(dir, { recursive: true, force: true });
}
tempDirs = [];
});
function pdd(overrides = {}) {
return {
purpose: "Protect autonomous execution from vague hidden retries.",
consumer: "/sf autonomous unit executor.",
contract:
"Checkpoint contains outcome, progress, evidence, and remaining work.",
failureBoundary:
"Blocked or decide outcomes pause instead of continuing blind.",
evidence: "Projection and JSONL history are written.",
nonGoals: "Does not replace the normal task completion tool.",
invariants: "Each checkpoint is tied to one unit id.",
assumptions: "Filesystem writes are available under .sf/runtime.",
...overrides,
};
}
describe("autonomous solver", () => {
test("beginAutonomousSolverIteration_same_unit_advances_iteration", () => {
const project = makeProject();
const first = beginAutonomousSolverIteration(
project,
"execute-task",
"M001/S01/T01",
);
const second = beginAutonomousSolverIteration(
project,
"execute-task",
"M001/S01/T01",
);
expect(first.iteration).toBe(1);
expect(second.iteration).toBe(2);
});
test("beginAutonomousSolverIteration_new_unit_resets_iteration", () => {
const project = makeProject();
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
const next = beginAutonomousSolverIteration(
project,
"execute-task",
"M001/S01/T02",
);
expect(next.iteration).toBe(1);
});
test("appendAutonomousSolverCheckpoint_writes_pdd_projection_and_history", () => {
const project = makeProject();
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
appendAutonomousSolverCheckpoint(project, {
unitType: "execute-task",
unitId: "M001/S01/T01",
outcome: "continue",
summary: "Implemented the first bounded repair.",
completedItems: ["Read the failing path", "Added regression test"],
remainingItems: ["Finish implementation", "Run full verification"],
verificationEvidence: ["npm run test:unit -- autonomous-solver"],
pdd: pdd(),
});
const latest = readLatestAutonomousSolverCheckpoint(
project,
"execute-task",
"M001/S01/T01",
);
const projection = readFileSync(
join(project, ".sf/runtime/autonomous-solver/LOOP.md"),
"utf-8",
);
const history = readFileSync(
join(project, ".sf/runtime/autonomous-solver/iterations.jsonl"),
"utf-8",
);
expect(latest.outcome).toBe("continue");
expect(projection).toContain("## PDD Contract");
expect(projection).toContain("Purpose: Protect autonomous execution");
expect(projection).toContain("Finish implementation");
expect(JSON.parse(history.trim()).outcome).toBe("continue");
});
test("buildAutonomousSolverPromptBlock_names_pdd_and_checkpoint_outcomes", () => {
const prompt = buildAutonomousSolverPromptBlock({
unitType: "execute-task",
unitId: "M001/S01/T01",
iteration: 3,
maxIterations: 12,
});
expect(prompt).toContain("/sf autonomous iteration 3 of 12");
expect(prompt).toContain("sf_autonomous_checkpoint");
expect(prompt).toContain("Purpose:");
expect(prompt).toContain("Consumer:");
expect(prompt).toContain("Failure boundary:");
expect(prompt).toContain('outcome: "decide"');
});
test("assessAutonomousSolverTurn_missing_checkpoint_retries_once_then_pauses", () => {
const project = makeProject();
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
const first = assessAutonomousSolverTurn(
project,
"execute-task",
"M001/S01/T01",
);
expect(first.action).toBe("missing-checkpoint-retry");
recordAutonomousSolverMissingCheckpointRetry(
project,
"execute-task",
"M001/S01/T01",
);
const second = assessAutonomousSolverTurn(
project,
"execute-task",
"M001/S01/T01",
);
expect(second.action).toBe("pause");
expect(second.reason).toBe("solver-missing-checkpoint");
});
test("assessAutonomousSolverTurn_continue_and_blocked_are_authoritative", () => {
const project = makeProject();
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
appendAutonomousSolverCheckpoint(project, {
unitType: "execute-task",
unitId: "M001/S01/T01",
outcome: "continue",
summary: "More work remains.",
completedItems: ["First pass"],
remainingItems: ["Second pass"],
verificationEvidence: ["npx vitest run focused.test.mjs"],
pdd: pdd(),
});
expect(
assessAutonomousSolverTurn(project, "execute-task", "M001/S01/T01")
.action,
).toBe("continue");
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
appendAutonomousSolverCheckpoint(project, {
unitType: "execute-task",
unitId: "M001/S01/T01",
outcome: "blocked",
summary: "Credentials unavailable.",
completedItems: [],
remainingItems: ["Wait for credentials"],
verificationEvidence: ["provider returned 401"],
blockerReason: "Missing provider token.",
pdd: pdd(),
});
const blocked = assessAutonomousSolverTurn(
project,
"execute-task",
"M001/S01/T01",
);
expect(blocked.action).toBe("pause");
expect(blocked.reason).toBe("solver-blocked");
});
test("assessAutonomousSolverTurn_max_iterations_pauses_before_unbounded_retry", () => {
const project = makeProject();
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01", {
maxIterations: 1,
});
appendAutonomousSolverCheckpoint(project, {
unitType: "execute-task",
unitId: "M001/S01/T01",
outcome: "continue",
summary: "More work remains.",
completedItems: ["First pass"],
remainingItems: ["Second pass"],
verificationEvidence: ["npx vitest run focused.test.mjs"],
pdd: pdd(),
});
const result = assessAutonomousSolverTurn(
project,
"execute-task",
"M001/S01/T01",
);
expect(result.action).toBe("pause");
expect(result.reason).toBe("solver-max-iterations");
});
test("steering_append_consume_is_idempotent", () => {
const project = makeProject();
appendAutonomousSolverSteering(project, "Prefer runtime enforcement.");
appendAutonomousSolverSteering(project, "Keep /sf autonomous only.");
const first = consumePendingAutonomousSolverSteering(project);
const second = consumePendingAutonomousSolverSteering(project);
expect(first).toHaveLength(2);
expect(first[0].text).toBe("Prefer runtime enforcement.");
expect(second).toHaveLength(0);
});
test("getConfiguredAutonomousSolverMaxIterations_clamps_preference", () => {
expect(getConfiguredAutonomousSolverMaxIterations()).toBe(12);
expect(
getConfiguredAutonomousSolverMaxIterations({
auto_supervisor: { solver_max_iterations: 0 },
}),
).toBe(1);
expect(
getConfiguredAutonomousSolverMaxIterations({
auto_supervisor: { solver_max_iterations: 150 },
}),
).toBe(100);
});
});

View file

@ -4,6 +4,7 @@ import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, test } from "vitest";
import { runProviderChecks } from "../doctor-providers.js";
import { PROVIDER_REGISTRY } from "../key-manager.js";
const originalCwd = process.cwd();
const originalEnv = { ...process.env };
@ -36,6 +37,13 @@ function makePreferencesProject(globalPreferences) {
}
describe("doctor provider checks", () => {
test("provider registry includes direct routed model providers used by SF preferences", () => {
const providers = new Map(PROVIDER_REGISTRY.map((p) => [p.id, p]));
assert.equal(providers.get("minimax")?.envVar, "MINIMAX_API_KEY");
assert.equal(providers.get("kimi-coding")?.envVar, "KIMI_API_KEY");
});
test("runProviderChecks_when_any_configured_llm_route_is_usable_does_not_require_every_preferred_provider", () => {
makePreferencesProject(
[

View file

@ -349,6 +349,30 @@ test("runAutoLoopWithUok_success_writes_balanced_run_id_heartbeats", async () =>
assert.equal(hasCurrentParityWarning(report), false);
});
test("runAutoLoopWithUok_ignores_legacy_disabled_flag_and_uses_kernel_path", async () => {
const projectRoot = makeProject();
const state = { basePath: projectRoot, autoStartTime: NOW };
let kernelRan = false;
await runAutoLoopWithUok({
ctx: testCtx("session-force-uok"),
pi: {},
s: state,
deps: testDeps({ uok: { enabled: false } }),
async runKernelLoop(_ctx, _pi, _s, deps) {
kernelRan = Boolean(deps.uokObserver);
},
async runStandardLoop() {
throw new Error("legacy standard loop should not run");
},
});
assert.equal(kernelRan, true);
const events = readProjectParityEvents(projectRoot);
assert.equal(events[0].path, "uok-kernel");
assert.equal(events[1].path, "uok-kernel");
});
test("runAutoLoopWithUok_throw_still_writes_exit_and_current_error_report", async () => {
const projectRoot = makeProject();
const state = { basePath: projectRoot, autoStartTime: NOW };

View file

@ -30,13 +30,13 @@ function refreshParityReport(basePath) {
return null;
}
}
function resolveKernelPathLabel(flags) {
return flags.enabled ? "uok-kernel" : "standard-loop";
function resolveKernelPathLabel() {
return "uok-kernel";
}
export async function runAutoLoopWithUok(args) {
const { ctx, pi, s, deps, runKernelLoop, runStandardLoop } = args;
const { ctx, pi, s, deps, runKernelLoop } = args;
const prefs = deps.loadEffectiveSFPreferences()?.preferences;
const flags = resolveUokFlags(prefs);
const flags = { ...resolveUokFlags(prefs), enabled: true };
const previousReport = refreshParityReport(s.basePath);
const runId = `uok-${randomUUID()}`;
s.currentUokRunId = runId;
@ -58,7 +58,7 @@ export async function runAutoLoopWithUok(args) {
recordUokRunStart({
runId,
sessionId: ctx.sessionManager?.getSessionId?.(),
path: resolveKernelPathLabel(flags),
path: resolveKernelPathLabel(),
flags: { ...flags },
startedAt,
});
@ -67,7 +67,7 @@ export async function runAutoLoopWithUok(args) {
ts: startedAt,
runId,
sessionId: ctx.sessionManager?.getSessionId?.(),
path: resolveKernelPathLabel(flags),
path: resolveKernelPathLabel(),
flags: { ...flags },
phase: "enter",
});
@ -85,26 +85,20 @@ export async function runAutoLoopWithUok(args) {
}),
);
}
const decoratedDeps = flags.enabled
? {
...deps,
uokObserver: createTurnObserver({
basePath: s.basePath,
gitAction: flags.gitopsTurnAction,
gitPush: flags.gitopsTurnPush,
enableAudit: flags.auditEnvelope,
enableGitops: flags.gitops,
}),
}
: deps;
const decoratedDeps = {
...deps,
uokObserver: createTurnObserver({
basePath: s.basePath,
gitAction: flags.gitopsTurnAction,
gitPush: flags.gitopsTurnPush,
enableAudit: flags.auditEnvelope,
enableGitops: flags.gitops,
}),
};
let status = "ok";
let error;
try {
if (flags.enabled) {
await runKernelLoop(ctx, pi, s, decoratedDeps);
} else {
await runStandardLoop(ctx, pi, s, deps);
}
await runKernelLoop(ctx, pi, s, decoratedDeps);
} catch (err) {
status = "error";
error = err instanceof Error ? err.message : String(err);
@ -115,7 +109,7 @@ export async function runAutoLoopWithUok(args) {
recordUokRunExit({
runId,
sessionId: ctx.sessionManager?.getSessionId?.(),
path: resolveKernelPathLabel(flags),
path: resolveKernelPathLabel(),
flags: { ...flags },
status,
endedAt,
@ -126,7 +120,7 @@ export async function runAutoLoopWithUok(args) {
ts: endedAt,
runId,
sessionId: ctx.sessionManager?.getSessionId?.(),
path: resolveKernelPathLabel(flags),
path: resolveKernelPathLabel(),
flags: { ...flags },
phase: "exit",
status,

View file

@ -64,6 +64,27 @@ test("getExtensionKey normalizes top-level .ts and .js entry names to the same k
);
});
test("withResourceSyncLock removes a stale owner lock before running work", async () => {
const tmp = mkdtempSync(join(tmpdir(), "sf-resource-loader-lock-"));
const lockDir = join(tmp, ".resource-sync.lock");
afterEach(() => {
rmSync(tmp, { recursive: true, force: true });
});
mkdirSync(lockDir, { recursive: true });
writeFileSync(join(lockDir, "owner"), "999999999\n");
const { withResourceSyncLock } = await import("../resource-loader.ts");
let ran = false;
withResourceSyncLock(tmp, () => {
ran = true;
});
assert.equal(ran, true);
assert.equal(existsSync(lockDir), false);
});
test("hasStaleCompiledExtensionSiblings only flags top-level .ts/.js sibling pairs", async (_t) => {
const { hasStaleCompiledExtensionSiblings } = await import(
"../resource-loader.ts"