sf snapshot: uncommitted changes after 61m inactivity
This commit is contained in:
parent
a1fd6cfc05
commit
152da756a1
21 changed files with 1427 additions and 103 deletions
|
|
@ -56,6 +56,36 @@ function formatCost(snapshot: QuerySnapshot): string {
|
|||
return `$${total.toFixed(4)}${workers > 0 ? ` (${workers} worker${workers === 1 ? "" : "s"})` : ""}`;
|
||||
}
|
||||
|
||||
function readSolverStatus(basePath: string): string | null {
|
||||
let state: Record<string, any>;
|
||||
try {
|
||||
state = JSON.parse(
|
||||
readFileSync(
|
||||
join(basePath, ".sf", "runtime", "autonomous-solver", "active.json"),
|
||||
"utf-8",
|
||||
),
|
||||
);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
const checkpoint = state.latestCheckpoint ?? {};
|
||||
const parts = [
|
||||
`${state.unitType ?? "unit"} ${state.unitId ?? "n/a"}`,
|
||||
`iter ${state.iteration ?? "?"}/${state.maxIterations ?? "?"}`,
|
||||
`outcome ${checkpoint.outcome ?? "none"}`,
|
||||
];
|
||||
const remaining = Array.isArray(checkpoint.remainingItems)
|
||||
? checkpoint.remainingItems.length
|
||||
: null;
|
||||
if (remaining !== null) parts.push(`${remaining} remaining`);
|
||||
if (checkpoint.blockerReason)
|
||||
parts.push(`blocker: ${checkpoint.blockerReason}`);
|
||||
if (checkpoint.decisionQuestion)
|
||||
parts.push(`decision: ${checkpoint.decisionQuestion}`);
|
||||
if (checkpoint.summary) parts.push(String(checkpoint.summary));
|
||||
return parts.join(" · ");
|
||||
}
|
||||
|
||||
function latestJsonlFile(dir: string): string | null {
|
||||
try {
|
||||
const entries = readdirSync(dir)
|
||||
|
|
@ -135,7 +165,11 @@ function formatModel(model: CurrentModel | null): string {
|
|||
|
||||
export function renderLiveStatus(
|
||||
snapshot: QuerySnapshot,
|
||||
opts: { model: CurrentModel | null; recentEvents: string[] },
|
||||
opts: {
|
||||
basePath?: string;
|
||||
model: CurrentModel | null;
|
||||
recentEvents: string[];
|
||||
},
|
||||
): string {
|
||||
const lines: string[] = [];
|
||||
lines.push("SF Status");
|
||||
|
|
@ -149,6 +183,8 @@ export function renderLiveStatus(
|
|||
lines.push(`Dispatch: ${formatDispatch(snapshot)}`);
|
||||
lines.push(`Cost: ${formatCost(snapshot)}`);
|
||||
lines.push(`Model: ${formatModel(opts.model)}`);
|
||||
const solverStatus = opts.basePath ? readSolverStatus(opts.basePath) : null;
|
||||
if (solverStatus) lines.push(`Solver: ${solverStatus}`);
|
||||
lines.push("");
|
||||
lines.push("Last Events:");
|
||||
if (opts.recentEvents.length === 0) {
|
||||
|
|
@ -177,6 +213,7 @@ async function buildStatusText(
|
|||
.map(formatMergedLogEvent);
|
||||
|
||||
return renderLiveStatus(snapshot, {
|
||||
basePath,
|
||||
model: getCurrentModel(basePath, sfHome),
|
||||
recentEvents,
|
||||
});
|
||||
|
|
|
|||
|
|
@ -237,6 +237,100 @@ export function getNewerManagedResourceVersion(
|
|||
: null;
|
||||
}
|
||||
|
||||
const RESOURCE_SYNC_LOCK_STALE_MS = 120_000;
|
||||
const RESOURCE_SYNC_LOCK_TIMEOUT_MS = 60_000;
|
||||
const RESOURCE_SYNC_LOCK_POLL_MS = 100;
|
||||
|
||||
function sleepSync(ms: number): void {
|
||||
const signal = new Int32Array(new SharedArrayBuffer(4));
|
||||
Atomics.wait(signal, 0, 0, ms);
|
||||
}
|
||||
|
||||
function readLockPid(lockDir: string): number | null {
|
||||
try {
|
||||
const raw = readFileSync(join(lockDir, "owner"), "utf-8").trim();
|
||||
const pid = Number.parseInt(raw, 10);
|
||||
return Number.isFinite(pid) && pid > 0 ? pid : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function isProcessAlive(pid: number): boolean {
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function removeStaleResourceSyncLock(lockDir: string): boolean {
|
||||
if (!existsSync(lockDir)) return true;
|
||||
|
||||
const pid = readLockPid(lockDir);
|
||||
let oldEnough = false;
|
||||
try {
|
||||
oldEnough =
|
||||
Date.now() - statSync(lockDir).mtimeMs > RESOURCE_SYNC_LOCK_STALE_MS;
|
||||
} catch {
|
||||
oldEnough = true;
|
||||
}
|
||||
const ownerDead = pid !== null && !isProcessAlive(pid);
|
||||
if (!oldEnough && !ownerDead) return false;
|
||||
|
||||
try {
|
||||
rmSync(lockDir, { recursive: true, force: true, maxRetries: 3 });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialise writes to the shared managed resource tree.
|
||||
*
|
||||
* Purpose: prevent parallel SF launches from deleting/copying the same
|
||||
* ~/.sf/agent/extensions subtree concurrently, which can surface as transient
|
||||
* ENOTEMPTY/ENOENT failures during startup.
|
||||
*
|
||||
* Consumer: initResources before it prunes and copies bundled resources.
|
||||
*/
|
||||
export function withResourceSyncLock<T>(
|
||||
agentDir: string,
|
||||
work: () => T,
|
||||
timeoutMs = RESOURCE_SYNC_LOCK_TIMEOUT_MS,
|
||||
): T {
|
||||
const lockDir = join(agentDir, ".resource-sync.lock");
|
||||
const startedAt = Date.now();
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
mkdirSync(lockDir);
|
||||
writeFileSync(join(lockDir, "owner"), `${process.pid}\n`);
|
||||
break;
|
||||
} catch {
|
||||
removeStaleResourceSyncLock(lockDir);
|
||||
if (Date.now() - startedAt > timeoutMs) {
|
||||
throw new Error(
|
||||
`Timed out waiting for SF resource sync lock: ${lockDir}`,
|
||||
);
|
||||
}
|
||||
sleepSync(RESOURCE_SYNC_LOCK_POLL_MS);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
return work();
|
||||
} finally {
|
||||
try {
|
||||
rmSync(lockDir, { recursive: true, force: true, maxRetries: 3 });
|
||||
} catch {
|
||||
/* non-fatal: stale-lock cleanup on next launch handles this */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively makes all files and directories under dirPath owner-writable.
|
||||
*
|
||||
|
|
@ -709,7 +803,10 @@ function pruneRemovedBundledExtensions(
|
|||
*/
|
||||
export function initResources(agentDir: string): void {
|
||||
mkdirSync(agentDir, { recursive: true });
|
||||
withResourceSyncLock(agentDir, () => initResourcesUnlocked(agentDir));
|
||||
}
|
||||
|
||||
function initResourcesUnlocked(agentDir: string): void {
|
||||
const currentVersion = getBundledSfVersion();
|
||||
const manifest = readManagedResourceManifest(agentDir);
|
||||
const extensionsDir = join(agentDir, "extensions");
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ import {
|
|||
getRtkSessionSavings,
|
||||
} from "../shared/rtk-session-stats.js";
|
||||
import { makeUI } from "../shared/tui.js";
|
||||
import { readAutonomousSolverState } from "./autonomous-solver.js";
|
||||
import { getErrorMessage } from "./error-utils.js";
|
||||
import { getLedger, getProjectTotals } from "./metrics.js";
|
||||
import { getActiveHook } from "./post-unit-hooks.js";
|
||||
|
|
@ -145,6 +146,25 @@ function _peekNext(unitType, state) {
|
|||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
function formatSolverWidgetLine(basePath, theme, width, pad) {
|
||||
const solver = readAutonomousSolverState(basePath);
|
||||
if (!solver?.unitType || !solver?.unitId) return null;
|
||||
const checkpoint = solver.latestCheckpoint ?? {};
|
||||
const remaining = Array.isArray(checkpoint.remainingItems)
|
||||
? checkpoint.remainingItems.length
|
||||
: 0;
|
||||
const issue = checkpoint.blockerReason || checkpoint.decisionQuestion || "";
|
||||
const text = [
|
||||
`solver ${solver.iteration ?? "?"}/${solver.maxIterations ?? "?"}`,
|
||||
checkpoint.outcome ? `outcome ${checkpoint.outcome}` : "outcome none",
|
||||
`${remaining} remaining`,
|
||||
issue ? String(issue) : "",
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(" · ");
|
||||
return truncateToWidth(`${pad}${theme.fg("dim", text)}`, width, "…");
|
||||
}
|
||||
/**
|
||||
* Describe what the next unit will be, based on current state.
|
||||
*/
|
||||
|
|
@ -727,6 +747,13 @@ export function updateProgressWidget(
|
|||
lines.push(
|
||||
rightAlign(actionLeft, theme.fg("dim", phaseLabel), width),
|
||||
);
|
||||
const solverLine = formatSolverWidgetLine(
|
||||
accessors.getBasePath(),
|
||||
theme,
|
||||
width,
|
||||
pad,
|
||||
);
|
||||
if (solverLine) lines.push(solverLine);
|
||||
// Progress bar
|
||||
const roadmapSlices = mid ? getRoadmapSlicesSync() : null;
|
||||
if (roadmapSlices) {
|
||||
|
|
@ -814,6 +841,13 @@ export function updateProgressWidget(
|
|||
const tierTag = tierBadge ? theme.fg("dim", `[${tierBadge}] `) : "";
|
||||
const phaseBadge = `${tierTag}${theme.fg("dim", phaseLabel)}`;
|
||||
lines.push(rightAlign(actionLeft, phaseBadge, width));
|
||||
const solverLine = formatSolverWidgetLine(
|
||||
accessors.getBasePath(),
|
||||
theme,
|
||||
width,
|
||||
pad,
|
||||
);
|
||||
if (solverLine) lines.push(solverLine);
|
||||
lines.push("");
|
||||
// Two-column body
|
||||
const minTwoColWidth = 76;
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ export {
|
|||
} from "./auto/infra-errors.js";
|
||||
export {
|
||||
autoLoop,
|
||||
runStandardAutoLoop,
|
||||
runUokKernelLoop,
|
||||
} from "./auto/loop.js";
|
||||
export {
|
||||
|
|
|
|||
|
|
@ -55,7 +55,6 @@ import {
|
|||
import { DISPATCH_RULES, resolveDispatch } from "./auto-dispatch.js";
|
||||
import {
|
||||
_resetPendingResolve,
|
||||
autoLoop,
|
||||
isSessionSwitchInFlight,
|
||||
resolveAgentEnd,
|
||||
resolveAgentEndCancelled,
|
||||
|
|
@ -336,8 +335,7 @@ export {
|
|||
/** Wrapper: register SIGTERM handler and store reference. */
|
||||
function registerSigtermHandler(currentBasePath) {
|
||||
const prefs = loadEffectiveSFPreferences()?.preferences;
|
||||
const flags = resolveUokFlags(prefs);
|
||||
const pathLabel = flags.enabled ? "uok-kernel" : "standard-loop";
|
||||
const flags = { ...resolveUokFlags(prefs), enabled: true };
|
||||
const onSignal = () => {
|
||||
// Write UOK parity exit heartbeat before process.exit(0) bypasses
|
||||
// the finally block in runAutoLoopWithUok. Fixes the enter/exit
|
||||
|
|
@ -346,7 +344,7 @@ function registerSigtermHandler(currentBasePath) {
|
|||
ts: new Date().toISOString(),
|
||||
...(s.currentUokRunId ? { runId: s.currentUokRunId } : {}),
|
||||
sessionId: s.cmdCtx?.sessionManager?.getSessionId?.(),
|
||||
path: pathLabel,
|
||||
path: "uok-kernel",
|
||||
flags: { ...flags },
|
||||
phase: "exit",
|
||||
status: "signal",
|
||||
|
|
@ -1734,7 +1732,6 @@ export async function startAuto(ctx, pi, base, verboseMode, options) {
|
|||
s,
|
||||
deps: buildLoopDeps(),
|
||||
runKernelLoop: runUokKernelLoop,
|
||||
runStandardLoop: autoLoop,
|
||||
});
|
||||
cleanupAfterLoopExit(ctx);
|
||||
return;
|
||||
|
|
@ -1783,7 +1780,6 @@ export async function startAuto(ctx, pi, base, verboseMode, options) {
|
|||
s,
|
||||
deps: buildLoopDeps(),
|
||||
runKernelLoop: runUokKernelLoop,
|
||||
runStandardLoop: autoLoop,
|
||||
});
|
||||
cleanupAfterLoopExit(ctx);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -248,16 +248,7 @@ function resolveDispatchNodeKind(unitType, sidecarItem) {
|
|||
}
|
||||
return "unit";
|
||||
}
|
||||
async function runUnitPhaseViaContract(
|
||||
dispatchContract,
|
||||
ic,
|
||||
iterData,
|
||||
loopState,
|
||||
sidecarItem,
|
||||
) {
|
||||
if (dispatchContract === "standard-direct") {
|
||||
return runUnitPhase(ic, iterData, loopState, sidecarItem);
|
||||
}
|
||||
async function runUnitPhaseViaContract(ic, iterData, loopState, sidecarItem) {
|
||||
const scheduler = new ExecutionGraphScheduler();
|
||||
let outcome = null;
|
||||
const executeNode = async () => {
|
||||
|
|
@ -307,8 +298,7 @@ async function enforceMinRequestInterval(s, prefs) {
|
|||
* This is the linear replacement for the recursive
|
||||
* dispatchNextUnit → handleAgentEnd → dispatchNextUnit chain.
|
||||
*/
|
||||
export async function autoLoop(ctx, pi, s, deps, options) {
|
||||
const dispatchContract = options?.dispatchContract ?? "standard-direct";
|
||||
export async function autoLoop(ctx, pi, s, deps) {
|
||||
debugLog("autoLoop", { phase: "enter" });
|
||||
let iteration = 0;
|
||||
// Load persisted stuck state so counters survive session restarts (#3704)
|
||||
|
|
@ -571,7 +561,6 @@ export async function autoLoop(ctx, pi, s, deps, options) {
|
|||
// ── Unit execution (shared with dev path) ──
|
||||
await enforceMinRequestInterval(s, ic.prefs);
|
||||
const unitPhaseResult = await runUnitPhaseViaContract(
|
||||
dispatchContract,
|
||||
ic,
|
||||
iterData,
|
||||
loopState,
|
||||
|
|
@ -590,6 +579,10 @@ export async function autoLoop(ctx, pi, s, deps, options) {
|
|||
finishTurn("stopped", "execution", "unit-break");
|
||||
break;
|
||||
}
|
||||
if (unitPhaseResult.action === "continue") {
|
||||
finishTurn("retry");
|
||||
continue;
|
||||
}
|
||||
// ── Verify first, then reconcile (only mark complete on pass) ──
|
||||
debugLog("autoLoop", {
|
||||
phase: "custom-engine-verify",
|
||||
|
|
@ -875,7 +868,6 @@ export async function autoLoop(ctx, pi, s, deps, options) {
|
|||
}
|
||||
await enforceMinRequestInterval(s, ic.prefs);
|
||||
const unitPhaseResult = await runUnitPhaseViaContract(
|
||||
dispatchContract,
|
||||
ic,
|
||||
iterData,
|
||||
loopState,
|
||||
|
|
@ -895,6 +887,10 @@ export async function autoLoop(ctx, pi, s, deps, options) {
|
|||
finishTurn("stopped", "execution", "unit-break");
|
||||
break;
|
||||
}
|
||||
if (unitPhaseResult.action === "continue") {
|
||||
finishTurn("retry");
|
||||
continue;
|
||||
}
|
||||
// ── Phase 5: Finalize ───────────────────────────────────────────────
|
||||
const finalizeResult = await withPhaseTimeout(
|
||||
"finalize",
|
||||
|
|
@ -1117,8 +1113,5 @@ export async function autoLoop(ctx, pi, s, deps, options) {
|
|||
}
|
||||
// ── Dispatch-contract entry points ───────────────────────────────────────
|
||||
export async function runUokKernelLoop(ctx, pi, s, deps) {
|
||||
return autoLoop(ctx, pi, s, deps, { dispatchContract: "uok-scheduler" });
|
||||
}
|
||||
export async function runStandardAutoLoop(ctx, pi, s, deps) {
|
||||
return autoLoop(ctx, pi, s, deps, { dispatchContract: "standard-direct" });
|
||||
return autoLoop(ctx, pi, s, deps);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,6 +34,16 @@ import {
|
|||
formatToolCallSummary,
|
||||
resetToolCallCounts,
|
||||
} from "../auto-tool-tracking.js";
|
||||
import {
|
||||
assessAutonomousSolverTurn,
|
||||
beginAutonomousSolverIteration,
|
||||
buildAutonomousSolverMissingCheckpointRepairPrompt,
|
||||
buildAutonomousSolverPromptBlock,
|
||||
buildAutonomousSolverSteeringPromptBlock,
|
||||
consumePendingAutonomousSolverSteering,
|
||||
getConfiguredAutonomousSolverMaxIterations,
|
||||
recordAutonomousSolverMissingCheckpointRetry,
|
||||
} from "../autonomous-solver.js";
|
||||
import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.js";
|
||||
import { debugLog } from "../debug-logger.js";
|
||||
import { PROJECT_FILES } from "../detection.js";
|
||||
|
|
@ -1874,6 +1884,40 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
}
|
||||
}
|
||||
// Prompt char measurement
|
||||
try {
|
||||
const solverState = beginAutonomousSolverIteration(
|
||||
s.basePath,
|
||||
unitType,
|
||||
unitId,
|
||||
{
|
||||
maxIterations: getConfiguredAutonomousSolverMaxIterations(prefs),
|
||||
},
|
||||
);
|
||||
const steeringBlock = buildAutonomousSolverSteeringPromptBlock(
|
||||
consumePendingAutonomousSolverSteering(s.basePath),
|
||||
);
|
||||
if (steeringBlock) {
|
||||
finalPrompt = `${finalPrompt}\n\n---\n\n${steeringBlock}`;
|
||||
}
|
||||
finalPrompt = `${finalPrompt}\n\n---\n\n${buildAutonomousSolverPromptBlock(solverState)}`;
|
||||
deps.emitJournalEvent({
|
||||
ts: new Date().toISOString(),
|
||||
flowId: ic.flowId,
|
||||
seq: ic.nextSeq(),
|
||||
eventType: "solver-iteration-start",
|
||||
data: {
|
||||
unitType,
|
||||
unitId,
|
||||
iteration: solverState.iteration,
|
||||
maxIterations: solverState.maxIterations,
|
||||
steeringInjected: Boolean(steeringBlock),
|
||||
},
|
||||
});
|
||||
} catch (solverErr) {
|
||||
logWarning("engine", "Autonomous solver prompt injection failed", {
|
||||
error: solverErr instanceof Error ? solverErr.message : String(solverErr),
|
||||
});
|
||||
}
|
||||
s.lastPromptCharCount = finalPrompt.length;
|
||||
s.lastBaselineCharCount = undefined;
|
||||
if (deps.isDbAvailable()) {
|
||||
|
|
@ -2018,12 +2062,127 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
});
|
||||
const unitResult = await runUnit(ctx, pi, s, unitType, unitId, finalPrompt);
|
||||
s.lastUnitAgentEndMessages = unitResult.event?.messages ?? null;
|
||||
let currentUnitResult = unitResult;
|
||||
let solverAssessment = assessAutonomousSolverTurn(
|
||||
s.basePath,
|
||||
unitType,
|
||||
unitId,
|
||||
);
|
||||
if (solverAssessment.action === "missing-checkpoint-retry") {
|
||||
recordAutonomousSolverMissingCheckpointRetry(s.basePath, unitType, unitId);
|
||||
deps.emitJournalEvent({
|
||||
ts: new Date().toISOString(),
|
||||
flowId: ic.flowId,
|
||||
seq: ic.nextSeq(),
|
||||
eventType: "solver-missing-checkpoint-retry",
|
||||
data: {
|
||||
unitType,
|
||||
unitId,
|
||||
iteration: solverAssessment.state?.iteration,
|
||||
},
|
||||
});
|
||||
ctx.ui.notify(
|
||||
`Autonomous solver checkpoint missing for ${unitType} ${unitId}; redispatching one repair turn.`,
|
||||
"warning",
|
||||
);
|
||||
currentUnitResult = await runUnit(
|
||||
ctx,
|
||||
pi,
|
||||
s,
|
||||
unitType,
|
||||
unitId,
|
||||
buildAutonomousSolverMissingCheckpointRepairPrompt(
|
||||
solverAssessment.state,
|
||||
unitType,
|
||||
unitId,
|
||||
),
|
||||
);
|
||||
s.lastUnitAgentEndMessages = currentUnitResult.event?.messages ?? null;
|
||||
solverAssessment = assessAutonomousSolverTurn(s.basePath, unitType, unitId);
|
||||
}
|
||||
const solverCheckpoint = solverAssessment.checkpoint;
|
||||
if (solverCheckpoint) {
|
||||
deps.emitJournalEvent({
|
||||
ts: new Date().toISOString(),
|
||||
flowId: ic.flowId,
|
||||
seq: ic.nextSeq(),
|
||||
eventType: "solver-checkpoint",
|
||||
data: {
|
||||
unitType,
|
||||
unitId,
|
||||
iteration: solverCheckpoint.iteration,
|
||||
outcome: solverCheckpoint.outcome,
|
||||
remainingCount: solverCheckpoint.remainingItems?.length ?? 0,
|
||||
},
|
||||
});
|
||||
}
|
||||
if (solverAssessment.action === "pause") {
|
||||
const reason =
|
||||
solverCheckpoint?.outcome === "decide"
|
||||
? (solverCheckpoint.decisionQuestion ?? solverCheckpoint.summary)
|
||||
: solverCheckpoint?.outcome === "blocked"
|
||||
? (solverCheckpoint.blockerReason ?? solverCheckpoint.summary)
|
||||
: solverAssessment.reason;
|
||||
deps.emitJournalEvent({
|
||||
ts: new Date().toISOString(),
|
||||
flowId: ic.flowId,
|
||||
seq: ic.nextSeq(),
|
||||
eventType:
|
||||
solverAssessment.reason === "solver-max-iterations"
|
||||
? "solver-max-iterations-pause"
|
||||
: "solver-pause",
|
||||
data: {
|
||||
unitType,
|
||||
unitId,
|
||||
reason: solverAssessment.reason,
|
||||
iteration: solverAssessment.state?.iteration,
|
||||
maxIterations: solverAssessment.state?.maxIterations,
|
||||
remainingItems: solverCheckpoint?.remainingItems ?? [],
|
||||
evidencePath: ".sf/runtime/autonomous-solver/LOOP.md",
|
||||
},
|
||||
});
|
||||
ctx.ui.notify(
|
||||
`Autonomous solver paused ${unitType} ${unitId}: ${reason || solverAssessment.reason}`,
|
||||
"warning",
|
||||
);
|
||||
await deps.pauseAuto(ctx, pi);
|
||||
return {
|
||||
action: "break",
|
||||
reason: solverAssessment.reason,
|
||||
};
|
||||
}
|
||||
if (solverAssessment.action === "continue") {
|
||||
deps.emitJournalEvent({
|
||||
ts: new Date().toISOString(),
|
||||
flowId: ic.flowId,
|
||||
seq: ic.nextSeq(),
|
||||
eventType: "solver-continue-redispatch",
|
||||
data: {
|
||||
unitType,
|
||||
unitId,
|
||||
iteration: solverAssessment.state?.iteration,
|
||||
remainingItems: solverCheckpoint?.remainingItems ?? [],
|
||||
evidencePath: ".sf/runtime/autonomous-solver/LOOP.md",
|
||||
},
|
||||
});
|
||||
ctx.ui.notify(
|
||||
`Autonomous solver continuing ${unitType} ${unitId}: ${solverCheckpoint?.remainingItems?.length ?? 0} item(s) remain.`,
|
||||
"info",
|
||||
);
|
||||
return {
|
||||
action: "continue",
|
||||
data: {
|
||||
unitStartedAt: s.currentUnit?.startedAt,
|
||||
requestDispatchedAt: currentUnitResult.requestDispatchedAt,
|
||||
},
|
||||
};
|
||||
}
|
||||
debugLog("autoLoop", {
|
||||
phase: "runUnit-end",
|
||||
iteration: ic.iteration,
|
||||
unitType,
|
||||
unitId,
|
||||
status: unitResult.status,
|
||||
status: currentUnitResult.status,
|
||||
});
|
||||
// Now that runUnit has called newSession(), the session file path is correct.
|
||||
const sessionFile = deps.getSessionFile(ctx);
|
||||
|
|
@ -2039,20 +2198,22 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
// Tag the most recent window entry with error info for stuck detection
|
||||
const lastEntry = loopState.recentUnits[loopState.recentUnits.length - 1];
|
||||
if (lastEntry) {
|
||||
if (unitResult.errorContext) {
|
||||
if (currentUnitResult.errorContext) {
|
||||
lastEntry.error =
|
||||
`${unitResult.errorContext.category}:${unitResult.errorContext.message}`.slice(
|
||||
`${currentUnitResult.errorContext.category}:${currentUnitResult.errorContext.message}`.slice(
|
||||
0,
|
||||
200,
|
||||
);
|
||||
} else if (
|
||||
unitResult.status === "error" ||
|
||||
unitResult.status === "cancelled"
|
||||
currentUnitResult.status === "error" ||
|
||||
currentUnitResult.status === "cancelled"
|
||||
) {
|
||||
lastEntry.error = `${unitResult.status}:${unitType}/${unitId}`;
|
||||
} else if (unitResult.event?.messages?.length) {
|
||||
lastEntry.error = `${currentUnitResult.status}:${unitType}/${unitId}`;
|
||||
} else if (currentUnitResult.event?.messages?.length) {
|
||||
const lastMsg =
|
||||
unitResult.event.messages[unitResult.event.messages.length - 1];
|
||||
currentUnitResult.event.messages[
|
||||
currentUnitResult.event.messages.length - 1
|
||||
];
|
||||
const msgStr =
|
||||
typeof lastMsg === "string" ? lastMsg : JSON.stringify(lastMsg);
|
||||
if (/error|fail|exception/i.test(msgStr)) {
|
||||
|
|
@ -2060,28 +2221,28 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
}
|
||||
}
|
||||
}
|
||||
if (unitResult.status === "cancelled") {
|
||||
if (currentUnitResult.status === "cancelled") {
|
||||
clearDeferredCommitAfterCancelledUnit(
|
||||
s,
|
||||
ctx,
|
||||
unitType,
|
||||
unitId,
|
||||
unitResult.errorContext?.message ?? "cancelled",
|
||||
currentUnitResult.errorContext?.message ?? "cancelled",
|
||||
);
|
||||
// Provider-error pause: pauseAuto already handled cleanup and scheduled
|
||||
// recovery. Don't hard-stop — just break out of the loop (#2762).
|
||||
if (unitResult.errorContext?.category === "provider") {
|
||||
if (currentUnitResult.errorContext?.category === "provider") {
|
||||
await emitCancelledUnitEnd(
|
||||
ic,
|
||||
unitType,
|
||||
unitId,
|
||||
unitStartSeq,
|
||||
unitResult.errorContext,
|
||||
currentUnitResult.errorContext,
|
||||
);
|
||||
debugLog("autoLoop", {
|
||||
phase: "exit",
|
||||
reason: "provider-pause",
|
||||
isTransient: unitResult.errorContext.isTransient,
|
||||
isTransient: currentUnitResult.errorContext.isTransient,
|
||||
});
|
||||
return { action: "break", reason: "provider-pause" };
|
||||
}
|
||||
|
|
@ -2091,13 +2252,15 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
// Structural errors (TypeError, is not a function) are NOT transient
|
||||
// and must hard-stop to avoid infinite retry loops.
|
||||
if (
|
||||
unitResult.errorContext?.isTransient &&
|
||||
unitResult.errorContext?.category === "timeout"
|
||||
currentUnitResult.errorContext?.isTransient &&
|
||||
currentUnitResult.errorContext?.category === "timeout"
|
||||
) {
|
||||
// Session-timeout cancellations are resumable pauses: pauseAuto below preserves the auto session
|
||||
// instead of routing the cancelled unit into the hard-stop path.
|
||||
const isSessionCreationTimeout =
|
||||
unitResult.errorContext.message?.includes("Session creation timed out");
|
||||
currentUnitResult.errorContext.message?.includes(
|
||||
"Session creation timed out",
|
||||
);
|
||||
if (isSessionCreationTimeout) {
|
||||
consecutiveSessionTimeouts += 1;
|
||||
const baseRetryAfterMs = 30_000;
|
||||
|
|
@ -2150,7 +2313,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
unitType,
|
||||
unitId,
|
||||
unitStartSeq,
|
||||
unitResult.errorContext,
|
||||
currentUnitResult.errorContext,
|
||||
);
|
||||
return { action: "break", reason: "session-timeout" };
|
||||
}
|
||||
|
|
@ -2170,7 +2333,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
unitType,
|
||||
unitId,
|
||||
unitStartSeq,
|
||||
unitResult.errorContext,
|
||||
currentUnitResult.errorContext,
|
||||
);
|
||||
return { action: "break", reason: "unit-hard-timeout" };
|
||||
}
|
||||
|
|
@ -2190,16 +2353,16 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
unitType,
|
||||
unitId,
|
||||
unitStartSeq,
|
||||
unitResult.errorContext,
|
||||
currentUnitResult.errorContext,
|
||||
);
|
||||
ctx.ui.notify(
|
||||
`Session creation failed for ${unitType} ${unitId}: ${unitResult.errorContext?.message ?? "unknown"}. Stopping auto-mode.`,
|
||||
`Session creation failed for ${unitType} ${unitId}: ${currentUnitResult.errorContext?.message ?? "unknown"}. Stopping auto-mode.`,
|
||||
"warning",
|
||||
);
|
||||
await deps.stopAuto(
|
||||
ctx,
|
||||
pi,
|
||||
`Session creation failed: ${unitResult.errorContext?.message ?? "unknown"}`,
|
||||
`Session creation failed: ${currentUnitResult.errorContext?.message ?? "unknown"}`,
|
||||
);
|
||||
debugLog("autoLoop", { phase: "exit", reason: "session-failed" });
|
||||
return { action: "break", reason: "session-failed" };
|
||||
|
|
@ -2276,7 +2439,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
action: "next",
|
||||
data: {
|
||||
unitStartedAt: s.currentUnit?.startedAt,
|
||||
requestDispatchedAt: unitResult.requestDispatchedAt,
|
||||
requestDispatchedAt: currentUnitResult.requestDispatchedAt,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -2330,7 +2493,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
);
|
||||
}
|
||||
}
|
||||
if (unitResult.status !== "completed" || !artifactVerified) {
|
||||
if (currentUnitResult.status !== "completed" || !artifactVerified) {
|
||||
recordLearningOutcomeForUnit(
|
||||
ic,
|
||||
unitType,
|
||||
|
|
@ -2364,7 +2527,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
data: {
|
||||
unitType,
|
||||
unitId,
|
||||
status: unitResult.status,
|
||||
status: currentUnitResult.status,
|
||||
artifactVerified,
|
||||
...(unitEndEntry
|
||||
? {
|
||||
|
|
@ -2374,8 +2537,8 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
tokens_output: unitEndEntry.tokens.output,
|
||||
}
|
||||
: {}),
|
||||
...(unitResult.errorContext
|
||||
? { errorContext: unitResult.errorContext }
|
||||
...(currentUnitResult.errorContext
|
||||
? { errorContext: currentUnitResult.errorContext }
|
||||
: {}),
|
||||
},
|
||||
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
|
||||
|
|
@ -2383,13 +2546,13 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
}
|
||||
{
|
||||
const verdict =
|
||||
unitResult.status === "completed"
|
||||
currentUnitResult.status === "completed"
|
||||
? artifactVerified
|
||||
? "success"
|
||||
: "blocked"
|
||||
: unitResult.status === "error"
|
||||
: currentUnitResult.status === "error"
|
||||
? "fail"
|
||||
: unitResult.status;
|
||||
: currentUnitResult.status;
|
||||
const ledger = deps.getLedger();
|
||||
const unitEntry = ledger?.units
|
||||
? [...ledger.units]
|
||||
|
|
@ -2417,7 +2580,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
}
|
||||
// ── Safety harness: checkpoint cleanup or rollback ──
|
||||
if (s.checkpointSha) {
|
||||
if (unitResult.status === "error" && safetyConfig.auto_rollback) {
|
||||
if (currentUnitResult.status === "error" && safetyConfig.auto_rollback) {
|
||||
const rolled = rollbackToCheckpoint(s.basePath, unitId, s.checkpointSha);
|
||||
if (rolled) {
|
||||
ctx.ui.notify(
|
||||
|
|
@ -2426,7 +2589,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
);
|
||||
debugLog("runUnitPhase", { phase: "checkpoint-rollback", unitId });
|
||||
}
|
||||
} else if (unitResult.status === "error") {
|
||||
} else if (currentUnitResult.status === "error") {
|
||||
ctx.ui.notify(
|
||||
`Unit ${unitId} failed. Pre-unit checkpoint available at ${s.checkpointSha.slice(0, 8)}`,
|
||||
"warning",
|
||||
|
|
@ -2443,7 +2606,7 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
action: "next",
|
||||
data: {
|
||||
unitStartedAt: s.currentUnit?.startedAt,
|
||||
requestDispatchedAt: unitResult.requestDispatchedAt,
|
||||
requestDispatchedAt: currentUnitResult.requestDispatchedAt,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
|
|||
537
src/resources/extensions/sf/autonomous-solver.js
Normal file
537
src/resources/extensions/sf/autonomous-solver.js
Normal file
|
|
@ -0,0 +1,537 @@
|
|||
/**
|
||||
* autonomous-solver.ts — PDD-shaped checkpoint contract for autonomous units.
|
||||
*
|
||||
* Purpose: make long-running autonomous work explicit, resumable, and auditable
|
||||
* instead of relying on a single agent turn to either finish or silently drift.
|
||||
*
|
||||
* Consumer: auto/phases.js injects the contract into each autonomous unit, and
|
||||
* bootstrap/db-tools.js records agent checkpoints via sf_autonomous_checkpoint.
|
||||
*/
|
||||
import {
|
||||
appendFileSync,
|
||||
mkdirSync,
|
||||
readFileSync,
|
||||
writeFileSync,
|
||||
} from "node:fs";
|
||||
import { dirname, join } from "node:path";
|
||||
import { atomicWriteSync } from "./atomic-write.js";
|
||||
import { sfRoot } from "./paths.js";
|
||||
|
||||
export const AUTONOMOUS_SOLVER_OUTCOMES = [
|
||||
"continue",
|
||||
"complete",
|
||||
"blocked",
|
||||
"decide",
|
||||
];
|
||||
|
||||
const MAX_RENDERED_ITEMS = 12;
|
||||
const DEFAULT_SOLVER_MAX_ITERATIONS = 12;
|
||||
const MIN_SOLVER_MAX_ITERATIONS = 1;
|
||||
const MAX_SOLVER_MAX_ITERATIONS = 100;
|
||||
|
||||
function solverDir(basePath) {
|
||||
return join(sfRoot(basePath), "runtime", "autonomous-solver");
|
||||
}
|
||||
|
||||
function statePath(basePath) {
|
||||
return join(solverDir(basePath), "active.json");
|
||||
}
|
||||
|
||||
function projectionPath(basePath) {
|
||||
return join(solverDir(basePath), "LOOP.md");
|
||||
}
|
||||
|
||||
function historyPath(basePath) {
|
||||
return join(solverDir(basePath), "iterations.jsonl");
|
||||
}
|
||||
|
||||
function steeringPath(basePath) {
|
||||
return join(solverDir(basePath), "steering.jsonl");
|
||||
}
|
||||
|
||||
function nowIso() {
|
||||
return new Date().toISOString();
|
||||
}
|
||||
|
||||
function sanitizeList(value) {
|
||||
if (!Array.isArray(value)) return [];
|
||||
return value.map((item) => String(item).trim()).filter(Boolean);
|
||||
}
|
||||
|
||||
function readJson(path) {
|
||||
try {
|
||||
return JSON.parse(readFileSync(path, "utf-8"));
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function sameUnit(state, unitType, unitId) {
|
||||
return state?.unitType === unitType && state?.unitId === unitId;
|
||||
}
|
||||
|
||||
function clampNumber(value, min, max, fallback) {
|
||||
const n = Number(value);
|
||||
if (!Number.isFinite(n)) return fallback;
|
||||
return Math.max(min, Math.min(max, Math.floor(n)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the bounded autonomous solver iteration limit.
|
||||
*
|
||||
* Purpose: prevent a misconfigured or vague unit from retrying forever while
|
||||
* still letting projects raise the ceiling for large, explicitly bounded work.
|
||||
*
|
||||
* Consumer: runUnitPhase before dispatch and preferences resolution tests.
|
||||
*/
|
||||
export function getConfiguredAutonomousSolverMaxIterations(preferences) {
|
||||
return clampNumber(
|
||||
preferences?.auto_supervisor?.solver_max_iterations,
|
||||
MIN_SOLVER_MAX_ITERATIONS,
|
||||
MAX_SOLVER_MAX_ITERATIONS,
|
||||
DEFAULT_SOLVER_MAX_ITERATIONS,
|
||||
);
|
||||
}
|
||||
|
||||
function renderList(items, fallback) {
|
||||
const list = sanitizeList(items).slice(0, MAX_RENDERED_ITEMS);
|
||||
if (list.length === 0) return `- ${fallback}`;
|
||||
return list.map((item) => `- ${item}`).join("\n");
|
||||
}
|
||||
|
||||
function renderPdd(pdd = {}) {
|
||||
return [
|
||||
"## PDD Contract",
|
||||
`- Purpose: ${pdd.purpose || "not recorded yet"}`,
|
||||
`- Consumer: ${pdd.consumer || "not recorded yet"}`,
|
||||
`- Contract: ${pdd.contract || "not recorded yet"}`,
|
||||
`- Failure boundary: ${pdd.failureBoundary || "not recorded yet"}`,
|
||||
`- Evidence: ${pdd.evidence || "not recorded yet"}`,
|
||||
`- Non-goals: ${pdd.nonGoals || "not recorded yet"}`,
|
||||
`- Invariants: ${pdd.invariants || "not recorded yet"}`,
|
||||
`- Assumptions: ${pdd.assumptions || "not recorded yet"}`,
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
function renderProjection(state) {
|
||||
const checkpoint = state.latestCheckpoint ?? {};
|
||||
return [
|
||||
"# Autonomous Solver Loop",
|
||||
"",
|
||||
`- Unit: ${state.unitType} ${state.unitId}`,
|
||||
`- Status: ${state.status}`,
|
||||
`- Iteration: ${state.iteration} of ${state.maxIterations}`,
|
||||
`- Started: ${state.startedAt}`,
|
||||
`- Updated: ${state.updatedAt}`,
|
||||
"",
|
||||
"## Last Checkpoint",
|
||||
`- Outcome: ${checkpoint.outcome ?? "none"}`,
|
||||
`- Summary: ${checkpoint.summary ?? "none yet"}`,
|
||||
checkpoint.blockerReason ? `- Blocker: ${checkpoint.blockerReason}` : "",
|
||||
checkpoint.decisionQuestion
|
||||
? `- Decision needed: ${checkpoint.decisionQuestion}`
|
||||
: "",
|
||||
"",
|
||||
"## Completed This Iteration",
|
||||
renderList(checkpoint.completedItems, "Nothing recorded yet."),
|
||||
"",
|
||||
"## Remaining",
|
||||
renderList(
|
||||
checkpoint.remainingItems,
|
||||
"Unknown until the agent checkpoints.",
|
||||
),
|
||||
"",
|
||||
renderPdd(checkpoint.pdd),
|
||||
"",
|
||||
"## Verification Evidence",
|
||||
renderList(
|
||||
checkpoint.verificationEvidence,
|
||||
"No verification evidence recorded yet.",
|
||||
),
|
||||
"",
|
||||
]
|
||||
.filter((line) => line !== "")
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
function writeState(basePath, state) {
|
||||
const dir = solverDir(basePath);
|
||||
mkdirSync(dir, { recursive: true });
|
||||
atomicWriteSync(statePath(basePath), `${JSON.stringify(state, null, 2)}\n`);
|
||||
atomicWriteSync(projectionPath(basePath), renderProjection(state));
|
||||
}
|
||||
|
||||
/**
|
||||
* Start or advance the persisted autonomous solver iteration for a unit.
|
||||
*
|
||||
* Purpose: each autonomous dispatch gets an explicit iteration number and
|
||||
* durable loop projection, so retries and restarts have concrete state.
|
||||
*
|
||||
* Consumer: runUnitPhase before sending the unit prompt.
|
||||
*/
|
||||
export function beginAutonomousSolverIteration(
|
||||
basePath,
|
||||
unitType,
|
||||
unitId,
|
||||
options = {},
|
||||
) {
|
||||
const existing = readJson(statePath(basePath));
|
||||
const priorIteration =
|
||||
sameUnit(existing, unitType, unitId) && existing.status !== "complete"
|
||||
? Number(existing.iteration ?? 0)
|
||||
: 0;
|
||||
const maxIterations = clampNumber(
|
||||
options.maxIterations ?? existing?.maxIterations,
|
||||
MIN_SOLVER_MAX_ITERATIONS,
|
||||
MAX_SOLVER_MAX_ITERATIONS,
|
||||
DEFAULT_SOLVER_MAX_ITERATIONS,
|
||||
);
|
||||
const state = {
|
||||
unitType,
|
||||
unitId,
|
||||
status: "running",
|
||||
iteration: Math.max(1, priorIteration + 1),
|
||||
maxIterations,
|
||||
startedAt: sameUnit(existing, unitType, unitId)
|
||||
? existing.startedAt || nowIso()
|
||||
: nowIso(),
|
||||
updatedAt: nowIso(),
|
||||
latestCheckpoint: sameUnit(existing, unitType, unitId)
|
||||
? (existing.latestCheckpoint ?? null)
|
||||
: null,
|
||||
missingCheckpointRetry: null,
|
||||
};
|
||||
writeState(basePath, state);
|
||||
return state;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the PDD autonomous solver prompt block appended to unit prompts.
|
||||
*
|
||||
* Purpose: bind every autonomous unit to bounded iterations, evidence, stop
|
||||
* signals, and the eight PDD fields instead of open-ended hidden retries.
|
||||
*
|
||||
* Consumer: runUnitPhase prompt injection.
|
||||
*/
|
||||
export function buildAutonomousSolverPromptBlock(state) {
|
||||
return [
|
||||
"## Autonomous Solver Loop Contract",
|
||||
"",
|
||||
`You are inside /sf autonomous iteration ${state.iteration} of ${state.maxIterations} for ${state.unitType} ${state.unitId}.`,
|
||||
"",
|
||||
"This is SF's built-in solver loop. It is not a separate Ralph workflow. Work one bounded, useful chunk; preserve enough state for the next autonomous iteration to continue without guessing.",
|
||||
"",
|
||||
"Before ending the turn, call `sf_autonomous_checkpoint` with:",
|
||||
'- `outcome: "complete"` only when this unit\'s normal completion tool/artifact is also done.',
|
||||
'- `outcome: "continue"` when you made real progress but more autonomous iterations are needed.',
|
||||
'- `outcome: "blocked"` when the next step cannot proceed without unavailable facts, credentials, or a broken environment.',
|
||||
'- `outcome: "decide"` when there is a material product/architecture choice that must not be decided autonomously.',
|
||||
"",
|
||||
"Checkpoint the eight PDD fields every time:",
|
||||
"- Purpose: why this behavior exists and what value it protects.",
|
||||
"- Consumer: who or what uses it in production.",
|
||||
"- Contract: the observable behavior or artifact boundary.",
|
||||
"- Failure boundary: what failures must be contained or surfaced.",
|
||||
"- Evidence: commands, files, tests, or runtime observations proving progress.",
|
||||
"- Non-goals: what you intentionally did not solve this iteration.",
|
||||
"- Invariants: rules that must remain true across iterations.",
|
||||
"- Assumptions: uncertain facts you relied on and how to falsify them later.",
|
||||
"",
|
||||
"If you are executing an `execute-task` unit and the task is finished, `sf_task_complete` remains mandatory; `sf_autonomous_checkpoint` does not replace it.",
|
||||
"If you need another iteration, leave exact remaining items in the checkpoint rather than ending with vague prose.",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a solver checkpoint and update the markdown projection.
|
||||
*
|
||||
* Purpose: turn the agent's end-of-iteration status into structured autonomous
|
||||
* state that can be inspected, gated, and resumed.
|
||||
*
|
||||
* Consumer: sf_autonomous_checkpoint tool.
|
||||
*/
|
||||
export function appendAutonomousSolverCheckpoint(basePath, params) {
|
||||
const state =
|
||||
readJson(statePath(basePath)) ??
|
||||
beginAutonomousSolverIteration(basePath, params.unitType, params.unitId);
|
||||
const checkpoint = {
|
||||
ts: nowIso(),
|
||||
unitType: params.unitType,
|
||||
unitId: params.unitId,
|
||||
iteration: sameUnit(state, params.unitType, params.unitId)
|
||||
? state.iteration
|
||||
: 1,
|
||||
outcome: params.outcome,
|
||||
summary: String(params.summary ?? "").trim(),
|
||||
completedItems: sanitizeList(params.completedItems),
|
||||
remainingItems: sanitizeList(params.remainingItems),
|
||||
verificationEvidence: sanitizeList(params.verificationEvidence),
|
||||
blockerReason: params.blockerReason
|
||||
? String(params.blockerReason).trim()
|
||||
: undefined,
|
||||
decisionQuestion: params.decisionQuestion
|
||||
? String(params.decisionQuestion).trim()
|
||||
: undefined,
|
||||
pdd: {
|
||||
purpose: String(params.pdd?.purpose ?? "").trim(),
|
||||
consumer: String(params.pdd?.consumer ?? "").trim(),
|
||||
contract: String(params.pdd?.contract ?? "").trim(),
|
||||
failureBoundary: String(params.pdd?.failureBoundary ?? "").trim(),
|
||||
evidence: String(params.pdd?.evidence ?? "").trim(),
|
||||
nonGoals: String(params.pdd?.nonGoals ?? "").trim(),
|
||||
invariants: String(params.pdd?.invariants ?? "").trim(),
|
||||
assumptions: String(params.pdd?.assumptions ?? "").trim(),
|
||||
},
|
||||
};
|
||||
const nextState = {
|
||||
...state,
|
||||
unitType: params.unitType,
|
||||
unitId: params.unitId,
|
||||
status:
|
||||
params.outcome === "complete"
|
||||
? "complete"
|
||||
: params.outcome === "blocked" || params.outcome === "decide"
|
||||
? "paused"
|
||||
: "running",
|
||||
updatedAt: checkpoint.ts,
|
||||
latestCheckpoint: checkpoint,
|
||||
};
|
||||
mkdirSync(dirname(historyPath(basePath)), { recursive: true });
|
||||
writeFileSync(historyPath(basePath), `${JSON.stringify(checkpoint)}\n`, {
|
||||
flag: "a",
|
||||
});
|
||||
writeState(basePath, nextState);
|
||||
return checkpoint;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the current persisted autonomous solver state.
|
||||
*
|
||||
* Purpose: status surfaces and loop enforcement need one structured source for
|
||||
* the active solver unit instead of scraping markdown projections.
|
||||
*
|
||||
* Consumer: /sf status, sf-progress, and runUnitPhase.
|
||||
*/
|
||||
export function readAutonomousSolverState(basePath) {
|
||||
return readJson(statePath(basePath));
|
||||
}
|
||||
|
||||
/**
|
||||
* Record that a missing checkpoint repair dispatch has already been attempted.
|
||||
*
|
||||
* Purpose: enforce the checkpoint contract with one repair chance while
|
||||
* preventing an unbounded missing-checkpoint redispatch loop.
|
||||
*
|
||||
* Consumer: runUnitPhase after the first unit turn omits sf_autonomous_checkpoint.
|
||||
*/
|
||||
export function recordAutonomousSolverMissingCheckpointRetry(
|
||||
basePath,
|
||||
unitType,
|
||||
unitId,
|
||||
) {
|
||||
const state = readJson(statePath(basePath));
|
||||
if (!sameUnit(state, unitType, unitId)) return null;
|
||||
const nextState = {
|
||||
...state,
|
||||
status: "running",
|
||||
updatedAt: nowIso(),
|
||||
missingCheckpointRetry: {
|
||||
iteration: state.iteration,
|
||||
ts: nowIso(),
|
||||
},
|
||||
};
|
||||
writeState(basePath, nextState);
|
||||
return nextState;
|
||||
}
|
||||
|
||||
/**
|
||||
* Classify the completed solver turn into the next loop action.
|
||||
*
|
||||
* Purpose: make checkpoint outcomes authoritative for autonomous scheduling
|
||||
* instead of letting artifact heuristics silently override blocked, decision,
|
||||
* continue, or missing-checkpoint states.
|
||||
*
|
||||
* Consumer: runUnitPhase immediately after each unit turn.
|
||||
*/
|
||||
export function assessAutonomousSolverTurn(basePath, unitType, unitId) {
|
||||
const state = readJson(statePath(basePath));
|
||||
if (!sameUnit(state, unitType, unitId)) {
|
||||
return {
|
||||
action: "missing-checkpoint-retry",
|
||||
reason: "solver-missing-state",
|
||||
state,
|
||||
};
|
||||
}
|
||||
const checkpoint = state.latestCheckpoint ?? null;
|
||||
const hasCurrentCheckpoint =
|
||||
checkpoint?.unitType === unitType &&
|
||||
checkpoint?.unitId === unitId &&
|
||||
Number(checkpoint?.iteration) === Number(state.iteration);
|
||||
if (!hasCurrentCheckpoint) {
|
||||
const alreadyRetried =
|
||||
Number(state.missingCheckpointRetry?.iteration) ===
|
||||
Number(state.iteration);
|
||||
if (alreadyRetried) {
|
||||
return {
|
||||
action: "pause",
|
||||
reason: "solver-missing-checkpoint",
|
||||
state,
|
||||
};
|
||||
}
|
||||
return {
|
||||
action: "missing-checkpoint-retry",
|
||||
reason: "solver-missing-checkpoint",
|
||||
state,
|
||||
};
|
||||
}
|
||||
if (
|
||||
state.iteration >= state.maxIterations &&
|
||||
checkpoint.outcome !== "complete"
|
||||
) {
|
||||
return {
|
||||
action: "pause",
|
||||
reason: "solver-max-iterations",
|
||||
state,
|
||||
checkpoint,
|
||||
};
|
||||
}
|
||||
if (checkpoint.outcome === "blocked" || checkpoint.outcome === "decide") {
|
||||
return {
|
||||
action: "pause",
|
||||
reason: `solver-${checkpoint.outcome}`,
|
||||
state,
|
||||
checkpoint,
|
||||
};
|
||||
}
|
||||
return {
|
||||
action: checkpoint.outcome === "continue" ? "continue" : "complete",
|
||||
reason: `solver-${checkpoint.outcome}`,
|
||||
state,
|
||||
checkpoint,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Append user steering for the next autonomous solver iteration.
|
||||
*
|
||||
* Purpose: active /sf steer must redirect the next bounded iteration without
|
||||
* interrupting the current tool batch or forcing an immediate agent turn.
|
||||
*
|
||||
* Consumer: /sf steer while autonomous mode is active.
|
||||
*/
|
||||
export function appendAutonomousSolverSteering(basePath, text, metadata = {}) {
|
||||
const trimmed = String(text ?? "").trim();
|
||||
if (!trimmed) return null;
|
||||
const entry = {
|
||||
id: `${Date.now()}-${Math.random().toString(16).slice(2)}`,
|
||||
ts: nowIso(),
|
||||
text: trimmed,
|
||||
consumedAt: null,
|
||||
...metadata,
|
||||
};
|
||||
mkdirSync(solverDir(basePath), { recursive: true });
|
||||
appendFileSync(steeringPath(basePath), `${JSON.stringify(entry)}\n`);
|
||||
return entry;
|
||||
}
|
||||
|
||||
function readSteeringEntries(basePath) {
|
||||
try {
|
||||
return readFileSync(steeringPath(basePath), "utf-8")
|
||||
.split("\n")
|
||||
.filter((line) => line.trim())
|
||||
.map((line) => {
|
||||
try {
|
||||
return JSON.parse(line);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
})
|
||||
.filter(Boolean);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume pending steering exactly once for prompt injection.
|
||||
*
|
||||
* Purpose: every user steering note should influence the next solver iteration
|
||||
* once, without being replayed into every later dispatch.
|
||||
*
|
||||
* Consumer: runUnitPhase before appending the solver prompt block.
|
||||
*/
|
||||
export function consumePendingAutonomousSolverSteering(basePath) {
|
||||
const entries = readSteeringEntries(basePath);
|
||||
const pending = entries.filter((entry) => !entry.consumedAt);
|
||||
if (pending.length === 0) return [];
|
||||
const consumedAt = nowIso();
|
||||
const rewritten = entries.map((entry) =>
|
||||
entry.consumedAt ? entry : { ...entry, consumedAt },
|
||||
);
|
||||
mkdirSync(solverDir(basePath), { recursive: true });
|
||||
atomicWriteSync(
|
||||
steeringPath(basePath),
|
||||
rewritten.map((entry) => JSON.stringify(entry)).join("\n") + "\n",
|
||||
);
|
||||
return pending;
|
||||
}
|
||||
|
||||
/**
|
||||
* Render consumed steering entries as a bounded prompt block.
|
||||
*
|
||||
* Purpose: keep user steering visible to the next iteration as explicit input
|
||||
* while preserving the checkpoint-driven solver contract.
|
||||
*
|
||||
* Consumer: runUnitPhase prompt injection.
|
||||
*/
|
||||
export function buildAutonomousSolverSteeringPromptBlock(entries) {
|
||||
const pending = Array.isArray(entries) ? entries : [];
|
||||
if (pending.length === 0) return "";
|
||||
return [
|
||||
"## Pending User Steering",
|
||||
"",
|
||||
"Apply these user overrides in this solver iteration:",
|
||||
...pending.map((entry) => `- ${String(entry.text ?? "").trim()}`),
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the one allowed repair prompt for a missing checkpoint.
|
||||
*
|
||||
* Purpose: give the agent a narrow chance to satisfy the solver contract before
|
||||
* autonomous mode pauses for inspection.
|
||||
*
|
||||
* Consumer: runUnitPhase when a turn ends without a current checkpoint.
|
||||
*/
|
||||
export function buildAutonomousSolverMissingCheckpointRepairPrompt(
|
||||
state,
|
||||
unitType,
|
||||
unitId,
|
||||
) {
|
||||
return [
|
||||
"## Checkpoint Required",
|
||||
"",
|
||||
`Your previous autonomous turn for ${unitType} ${unitId} ended without calling sf_autonomous_checkpoint for iteration ${state?.iteration ?? "unknown"}.`,
|
||||
"Do not continue implementation work in this repair turn.",
|
||||
"Inspect the work you just performed, then call sf_autonomous_checkpoint with the correct outcome and all eight PDD fields.",
|
||||
"If no useful progress happened, use outcome=blocked and explain why.",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the latest solver checkpoint for a unit, if one exists.
|
||||
*
|
||||
* Purpose: let autonomous finalization react to semantic blocked/decision
|
||||
* outcomes without scraping prose from model output.
|
||||
*
|
||||
* Consumer: runUnitPhase after runUnit returns.
|
||||
*/
|
||||
export function readLatestAutonomousSolverCheckpoint(
|
||||
basePath,
|
||||
unitType,
|
||||
unitId,
|
||||
) {
|
||||
const state = readJson(statePath(basePath));
|
||||
if (!sameUnit(state, unitType, unitId)) return null;
|
||||
return state.latestCheckpoint ?? null;
|
||||
}
|
||||
|
|
@ -1,6 +1,10 @@
|
|||
import { Type } from "@sinclair/typebox";
|
||||
import { StringEnum } from "@singularity-forge/pi-ai";
|
||||
import { Text } from "@singularity-forge/pi-tui";
|
||||
import {
|
||||
AUTONOMOUS_SOLVER_OUTCOMES,
|
||||
appendAutonomousSolverCheckpoint,
|
||||
} from "../autonomous-solver.js";
|
||||
import {
|
||||
claimReservedId,
|
||||
findMilestoneIds,
|
||||
|
|
@ -877,6 +881,160 @@ export function registerDbTools(pi) {
|
|||
);
|
||||
},
|
||||
});
|
||||
// ─── sf_autonomous_checkpoint ───────────────────────────────────────
|
||||
const autonomousCheckpointExecute = async (
|
||||
_toolCallId,
|
||||
params,
|
||||
_signal,
|
||||
_onUpdate,
|
||||
_ctx,
|
||||
) => {
|
||||
try {
|
||||
const checkpoint = appendAutonomousSolverCheckpoint(
|
||||
process.cwd(),
|
||||
params,
|
||||
);
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Recorded autonomous checkpoint for ${checkpoint.unitType} ${checkpoint.unitId}: ${checkpoint.outcome}`,
|
||||
},
|
||||
],
|
||||
details: {
|
||||
operation: "autonomous_checkpoint",
|
||||
unitType: checkpoint.unitType,
|
||||
unitId: checkpoint.unitId,
|
||||
iteration: checkpoint.iteration,
|
||||
outcome: checkpoint.outcome,
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
logError("tool", `sf_autonomous_checkpoint tool failed: ${msg}`, {
|
||||
tool: "sf_autonomous_checkpoint",
|
||||
error: String(err),
|
||||
});
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Error in sf_autonomous_checkpoint: ${msg}`,
|
||||
},
|
||||
],
|
||||
details: { operation: "autonomous_checkpoint", error: msg },
|
||||
};
|
||||
}
|
||||
};
|
||||
pi.registerTool({
|
||||
name: "sf_autonomous_checkpoint",
|
||||
label: "Autonomous Checkpoint",
|
||||
description:
|
||||
"Record a PDD-shaped autonomous solver checkpoint for the current unit. " +
|
||||
"Use this before ending every /sf autonomous unit turn to make progress, blockers, decisions, and remaining work explicit.",
|
||||
promptSnippet:
|
||||
"Checkpoint autonomous solver progress with PDD fields and semantic outcome",
|
||||
promptGuidelines: [
|
||||
"Call sf_autonomous_checkpoint before ending an autonomous unit turn.",
|
||||
"Use outcome=complete only when the normal unit completion artifact/tool is also complete.",
|
||||
"Use outcome=continue when you made real progress but the unit needs another autonomous iteration.",
|
||||
"Use outcome=blocked for missing facts, credentials, broken environment, or impossible next steps.",
|
||||
"Use outcome=decide for material product or architecture choices that autonomous mode must not decide silently.",
|
||||
"Fill all eight PDD fields: purpose, consumer, contract, failureBoundary, evidence, nonGoals, invariants, assumptions.",
|
||||
],
|
||||
parameters: Type.Object({
|
||||
unitType: Type.String({
|
||||
description: "Current autonomous unit type, e.g. execute-task",
|
||||
}),
|
||||
unitId: Type.String({
|
||||
description: "Current autonomous unit id, e.g. M010/S05/T02",
|
||||
}),
|
||||
outcome: Type.Union(
|
||||
AUTONOMOUS_SOLVER_OUTCOMES.map((outcome) => Type.Literal(outcome)),
|
||||
{
|
||||
description:
|
||||
"Semantic result for this iteration: continue, complete, blocked, or decide",
|
||||
},
|
||||
),
|
||||
summary: Type.String({
|
||||
description: "Concise statement of what happened this iteration",
|
||||
}),
|
||||
completedItems: Type.Array(Type.String(), {
|
||||
description: "Concrete items completed in this iteration",
|
||||
}),
|
||||
remainingItems: Type.Array(Type.String(), {
|
||||
description: "Exact remaining work for the next autonomous iteration",
|
||||
}),
|
||||
verificationEvidence: Type.Array(Type.String(), {
|
||||
description:
|
||||
"Commands, files, tests, screenshots, or observations that prove progress",
|
||||
}),
|
||||
blockerReason: Type.Optional(
|
||||
Type.String({
|
||||
description: "Required when outcome=blocked",
|
||||
}),
|
||||
),
|
||||
decisionQuestion: Type.Optional(
|
||||
Type.String({
|
||||
description: "Required when outcome=decide",
|
||||
}),
|
||||
),
|
||||
pdd: Type.Object({
|
||||
purpose: Type.String({ description: "Why this behavior exists" }),
|
||||
consumer: Type.String({
|
||||
description: "Who or what uses it in production",
|
||||
}),
|
||||
contract: Type.String({
|
||||
description: "Observable behavior or artifact boundary",
|
||||
}),
|
||||
failureBoundary: Type.String({
|
||||
description: "Failures that must be contained or surfaced",
|
||||
}),
|
||||
evidence: Type.String({
|
||||
description: "Proof gathered this iteration",
|
||||
}),
|
||||
nonGoals: Type.String({
|
||||
description: "What is intentionally not solved here",
|
||||
}),
|
||||
invariants: Type.String({
|
||||
description: "Rules that must remain true across iterations",
|
||||
}),
|
||||
assumptions: Type.String({
|
||||
description: "Uncertain facts and how to falsify them",
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
execute: autonomousCheckpointExecute,
|
||||
renderCall(args, theme) {
|
||||
let text = theme.fg("toolTitle", theme.bold("sf_autonomous_checkpoint "));
|
||||
if (args.outcome) text += theme.fg("accent", `[${args.outcome}] `);
|
||||
if (args.unitType || args.unitId) {
|
||||
text += theme.fg(
|
||||
"muted",
|
||||
`${args.unitType ?? "unit"} ${args.unitId ?? ""}`.trim(),
|
||||
);
|
||||
}
|
||||
return new Text(text, 0, 0);
|
||||
},
|
||||
renderResult(result, _options, theme) {
|
||||
const d = result.details;
|
||||
if (result.isError || d?.error) {
|
||||
return new Text(
|
||||
theme.fg("error", `Error: ${d?.error ?? "unknown"}`),
|
||||
0,
|
||||
0,
|
||||
);
|
||||
}
|
||||
return new Text(
|
||||
theme.fg(
|
||||
"success",
|
||||
`Checkpoint ${d?.outcome ?? "recorded"} · ${d?.unitType ?? ""} ${d?.unitId ?? ""}`.trim(),
|
||||
),
|
||||
0,
|
||||
0,
|
||||
);
|
||||
},
|
||||
});
|
||||
// ─── sf_plan_milestone ────────────────────────────────────────────────
|
||||
const planMilestoneExecute = async (
|
||||
_toolCallId,
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import { existsSync, mkdirSync, readFileSync } from "node:fs";
|
|||
import { join } from "node:path";
|
||||
import { checkRemoteAutoSession, isAutoActive } from "./auto.js";
|
||||
import { getAutoWorktreePath } from "./auto-worktree.js";
|
||||
import { appendAutonomousSolverSteering } from "./autonomous-solver.js";
|
||||
import {
|
||||
appendCapture,
|
||||
hasPendingCaptures,
|
||||
|
|
@ -436,29 +437,13 @@ export async function handleSteer(change, ctx, pi) {
|
|||
autoRunning && mid !== "none" ? getAutoWorktreePath(basePath, mid) : null;
|
||||
const targetPath = wtPath ?? basePath;
|
||||
await appendOverride(targetPath, change, appliedAt);
|
||||
appendAutonomousSolverSteering(targetPath, change, { appliedAt });
|
||||
const overrideLoc = wtPath
|
||||
? "worktree `.sf/OVERRIDES.md`"
|
||||
: "`.sf/OVERRIDES.md`";
|
||||
if (isAutoActive()) {
|
||||
pi.sendMessage(
|
||||
{
|
||||
customType: "sf-hard-steer",
|
||||
content: [
|
||||
"HARD STEER — User override registered.",
|
||||
"",
|
||||
`**Override:** ${change}`,
|
||||
"",
|
||||
`This override has been saved to ${overrideLoc} and will be injected into all future task prompts.`,
|
||||
"A document rewrite unit will run before the next task to propagate this change across all active plan documents.",
|
||||
"",
|
||||
"If you are mid-task, finish your current work respecting this override. The next dispatched unit will be a document rewrite.",
|
||||
].join("\n"),
|
||||
display: false,
|
||||
},
|
||||
{ triggerTurn: true },
|
||||
);
|
||||
if (autoRunning) {
|
||||
ctx.ui.notify(
|
||||
`Override registered (${overrideLoc}): "${change}". Will be applied before next task dispatch.`,
|
||||
`Override registered (${overrideLoc}): "${change}". Will be injected into the next solver iteration.`,
|
||||
"info",
|
||||
);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -125,6 +125,7 @@ Setting `prefer_skills: []` does **not** disable skill discovery — it just mea
|
|||
- `soft_timeout_minutes`: minutes before the supervisor issues a soft warning (default: 20).
|
||||
- `idle_timeout_minutes`: minutes of inactivity before the supervisor intervenes (default: 10).
|
||||
- `hard_timeout_minutes`: minutes before the supervisor forces termination (default: 30).
|
||||
- `solver_max_iterations`: maximum autonomous solver iterations for one unit before pausing (default: `12`, min: `1`, max: `100`).
|
||||
- `completion_nudge_after`: tool calls in a complete-slice unit before nudging the agent to call `sf_slice_complete` (default: 10; set `0` to disable).
|
||||
- `runaway_guard_enabled`: enable active-loop diagnosis for long-running units (default: `true`).
|
||||
- `runaway_tool_call_warning`: unit tool calls before a runaway warning (default: `60`; set `0` to disable this signal).
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
"kill_agent",
|
||||
"memory_query",
|
||||
"read",
|
||||
"sf_autonomous_checkpoint",
|
||||
"sf_complete_milestone",
|
||||
"sf_decision_save",
|
||||
"sf_exec",
|
||||
|
|
|
|||
|
|
@ -60,6 +60,20 @@ export const PROVIDER_REGISTRY = [
|
|||
envVar: "MISTRAL_API_KEY",
|
||||
dashboardUrl: "console.mistral.ai",
|
||||
},
|
||||
{
|
||||
id: "minimax",
|
||||
label: "MiniMax",
|
||||
category: "llm",
|
||||
envVar: "MINIMAX_API_KEY",
|
||||
dashboardUrl: "api.minimax.io",
|
||||
},
|
||||
{
|
||||
id: "kimi-coding",
|
||||
label: "Kimi Coding",
|
||||
category: "llm",
|
||||
envVar: "KIMI_API_KEY",
|
||||
dashboardUrl: "platform.moonshot.ai",
|
||||
},
|
||||
{
|
||||
id: "zai",
|
||||
label: "ZAI",
|
||||
|
|
@ -235,7 +249,8 @@ export function describeCredential(cred) {
|
|||
* Get the auth.json path.
|
||||
*/
|
||||
export function getAuthPath() {
|
||||
return join(process.env.HOME ?? "~", ".sf", "agent", "auth.json");
|
||||
const sfHome = process.env.SF_HOME || join(process.env.HOME ?? "~", ".sf");
|
||||
return join(sfHome, "agent", "auth.json");
|
||||
}
|
||||
/**
|
||||
* Create an AuthStorage instance for key management.
|
||||
|
|
|
|||
|
|
@ -692,6 +692,11 @@ export function resolveAutoSupervisorConfig() {
|
|||
soft_timeout_minutes: configured.soft_timeout_minutes ?? 20,
|
||||
idle_timeout_minutes: configured.idle_timeout_minutes ?? 10,
|
||||
hard_timeout_minutes: configured.hard_timeout_minutes ?? 30,
|
||||
solver_max_iterations: Number.isFinite(
|
||||
Number(configured.solver_max_iterations),
|
||||
)
|
||||
? Math.max(1, Math.min(100, Number(configured.solver_max_iterations)))
|
||||
: 12,
|
||||
completion_nudge_after: configured.completion_nudge_after ?? 10,
|
||||
runaway_guard_enabled: configured.runaway_guard_enabled ?? true,
|
||||
runaway_tool_call_warning:
|
||||
|
|
|
|||
|
|
@ -780,6 +780,16 @@ export function validatePreferences(preferences) {
|
|||
"auto_supervisor.hard_timeout_minutes must be a non-negative number",
|
||||
);
|
||||
}
|
||||
if (as.solver_max_iterations !== undefined) {
|
||||
const val = Number(as.solver_max_iterations);
|
||||
if (!Number.isNaN(val) && val >= 1 && val <= 100) {
|
||||
validatedAs.solver_max_iterations = Math.floor(val);
|
||||
} else {
|
||||
errors.push(
|
||||
"auto_supervisor.solver_max_iterations must be a number from 1 to 100",
|
||||
);
|
||||
}
|
||||
}
|
||||
if (as.phase_timeout_minutes !== undefined) {
|
||||
const val = Number(as.phase_timeout_minutes);
|
||||
if (!Number.isNaN(val) && val >= 0)
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ test("resolveAutoSupervisorConfig provides safe timeout defaults", () => {
|
|||
assert.equal(supervisor.soft_timeout_minutes, 20);
|
||||
assert.equal(supervisor.idle_timeout_minutes, 10);
|
||||
assert.equal(supervisor.hard_timeout_minutes, 30);
|
||||
assert.equal(supervisor.solver_max_iterations, 12);
|
||||
});
|
||||
|
||||
test("writeUnitRuntimeRecord persists progress and recovery metadata defaults", () => {
|
||||
|
|
|
|||
245
src/resources/extensions/sf/tests/autonomous-solver.test.mjs
Normal file
245
src/resources/extensions/sf/tests/autonomous-solver.test.mjs
Normal file
|
|
@ -0,0 +1,245 @@
|
|||
import { mkdtempSync, readFileSync, rmSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, describe, expect, test } from "vitest";
|
||||
import {
|
||||
appendAutonomousSolverCheckpoint,
|
||||
appendAutonomousSolverSteering,
|
||||
assessAutonomousSolverTurn,
|
||||
beginAutonomousSolverIteration,
|
||||
buildAutonomousSolverPromptBlock,
|
||||
consumePendingAutonomousSolverSteering,
|
||||
getConfiguredAutonomousSolverMaxIterations,
|
||||
readLatestAutonomousSolverCheckpoint,
|
||||
recordAutonomousSolverMissingCheckpointRetry,
|
||||
} from "../autonomous-solver.js";
|
||||
|
||||
let tempDirs = [];
|
||||
|
||||
function makeProject() {
|
||||
const dir = mkdtempSync(join(tmpdir(), "sf-autonomous-solver-"));
|
||||
tempDirs.push(dir);
|
||||
return dir;
|
||||
}
|
||||
|
||||
afterEach(() => {
|
||||
for (const dir of tempDirs) {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
tempDirs = [];
|
||||
});
|
||||
|
||||
function pdd(overrides = {}) {
|
||||
return {
|
||||
purpose: "Protect autonomous execution from vague hidden retries.",
|
||||
consumer: "/sf autonomous unit executor.",
|
||||
contract:
|
||||
"Checkpoint contains outcome, progress, evidence, and remaining work.",
|
||||
failureBoundary:
|
||||
"Blocked or decide outcomes pause instead of continuing blind.",
|
||||
evidence: "Projection and JSONL history are written.",
|
||||
nonGoals: "Does not replace the normal task completion tool.",
|
||||
invariants: "Each checkpoint is tied to one unit id.",
|
||||
assumptions: "Filesystem writes are available under .sf/runtime.",
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe("autonomous solver", () => {
|
||||
test("beginAutonomousSolverIteration_same_unit_advances_iteration", () => {
|
||||
const project = makeProject();
|
||||
const first = beginAutonomousSolverIteration(
|
||||
project,
|
||||
"execute-task",
|
||||
"M001/S01/T01",
|
||||
);
|
||||
const second = beginAutonomousSolverIteration(
|
||||
project,
|
||||
"execute-task",
|
||||
"M001/S01/T01",
|
||||
);
|
||||
|
||||
expect(first.iteration).toBe(1);
|
||||
expect(second.iteration).toBe(2);
|
||||
});
|
||||
|
||||
test("beginAutonomousSolverIteration_new_unit_resets_iteration", () => {
|
||||
const project = makeProject();
|
||||
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
|
||||
const next = beginAutonomousSolverIteration(
|
||||
project,
|
||||
"execute-task",
|
||||
"M001/S01/T02",
|
||||
);
|
||||
|
||||
expect(next.iteration).toBe(1);
|
||||
});
|
||||
|
||||
test("appendAutonomousSolverCheckpoint_writes_pdd_projection_and_history", () => {
|
||||
const project = makeProject();
|
||||
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
|
||||
|
||||
appendAutonomousSolverCheckpoint(project, {
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
outcome: "continue",
|
||||
summary: "Implemented the first bounded repair.",
|
||||
completedItems: ["Read the failing path", "Added regression test"],
|
||||
remainingItems: ["Finish implementation", "Run full verification"],
|
||||
verificationEvidence: ["npm run test:unit -- autonomous-solver"],
|
||||
pdd: pdd(),
|
||||
});
|
||||
|
||||
const latest = readLatestAutonomousSolverCheckpoint(
|
||||
project,
|
||||
"execute-task",
|
||||
"M001/S01/T01",
|
||||
);
|
||||
const projection = readFileSync(
|
||||
join(project, ".sf/runtime/autonomous-solver/LOOP.md"),
|
||||
"utf-8",
|
||||
);
|
||||
const history = readFileSync(
|
||||
join(project, ".sf/runtime/autonomous-solver/iterations.jsonl"),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
expect(latest.outcome).toBe("continue");
|
||||
expect(projection).toContain("## PDD Contract");
|
||||
expect(projection).toContain("Purpose: Protect autonomous execution");
|
||||
expect(projection).toContain("Finish implementation");
|
||||
expect(JSON.parse(history.trim()).outcome).toBe("continue");
|
||||
});
|
||||
|
||||
test("buildAutonomousSolverPromptBlock_names_pdd_and_checkpoint_outcomes", () => {
|
||||
const prompt = buildAutonomousSolverPromptBlock({
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
iteration: 3,
|
||||
maxIterations: 12,
|
||||
});
|
||||
|
||||
expect(prompt).toContain("/sf autonomous iteration 3 of 12");
|
||||
expect(prompt).toContain("sf_autonomous_checkpoint");
|
||||
expect(prompt).toContain("Purpose:");
|
||||
expect(prompt).toContain("Consumer:");
|
||||
expect(prompt).toContain("Failure boundary:");
|
||||
expect(prompt).toContain('outcome: "decide"');
|
||||
});
|
||||
|
||||
test("assessAutonomousSolverTurn_missing_checkpoint_retries_once_then_pauses", () => {
|
||||
const project = makeProject();
|
||||
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
|
||||
|
||||
const first = assessAutonomousSolverTurn(
|
||||
project,
|
||||
"execute-task",
|
||||
"M001/S01/T01",
|
||||
);
|
||||
expect(first.action).toBe("missing-checkpoint-retry");
|
||||
|
||||
recordAutonomousSolverMissingCheckpointRetry(
|
||||
project,
|
||||
"execute-task",
|
||||
"M001/S01/T01",
|
||||
);
|
||||
const second = assessAutonomousSolverTurn(
|
||||
project,
|
||||
"execute-task",
|
||||
"M001/S01/T01",
|
||||
);
|
||||
expect(second.action).toBe("pause");
|
||||
expect(second.reason).toBe("solver-missing-checkpoint");
|
||||
});
|
||||
|
||||
test("assessAutonomousSolverTurn_continue_and_blocked_are_authoritative", () => {
|
||||
const project = makeProject();
|
||||
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
|
||||
appendAutonomousSolverCheckpoint(project, {
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
outcome: "continue",
|
||||
summary: "More work remains.",
|
||||
completedItems: ["First pass"],
|
||||
remainingItems: ["Second pass"],
|
||||
verificationEvidence: ["npx vitest run focused.test.mjs"],
|
||||
pdd: pdd(),
|
||||
});
|
||||
expect(
|
||||
assessAutonomousSolverTurn(project, "execute-task", "M001/S01/T01")
|
||||
.action,
|
||||
).toBe("continue");
|
||||
|
||||
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01");
|
||||
appendAutonomousSolverCheckpoint(project, {
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
outcome: "blocked",
|
||||
summary: "Credentials unavailable.",
|
||||
completedItems: [],
|
||||
remainingItems: ["Wait for credentials"],
|
||||
verificationEvidence: ["provider returned 401"],
|
||||
blockerReason: "Missing provider token.",
|
||||
pdd: pdd(),
|
||||
});
|
||||
const blocked = assessAutonomousSolverTurn(
|
||||
project,
|
||||
"execute-task",
|
||||
"M001/S01/T01",
|
||||
);
|
||||
expect(blocked.action).toBe("pause");
|
||||
expect(blocked.reason).toBe("solver-blocked");
|
||||
});
|
||||
|
||||
test("assessAutonomousSolverTurn_max_iterations_pauses_before_unbounded_retry", () => {
|
||||
const project = makeProject();
|
||||
beginAutonomousSolverIteration(project, "execute-task", "M001/S01/T01", {
|
||||
maxIterations: 1,
|
||||
});
|
||||
appendAutonomousSolverCheckpoint(project, {
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
outcome: "continue",
|
||||
summary: "More work remains.",
|
||||
completedItems: ["First pass"],
|
||||
remainingItems: ["Second pass"],
|
||||
verificationEvidence: ["npx vitest run focused.test.mjs"],
|
||||
pdd: pdd(),
|
||||
});
|
||||
|
||||
const result = assessAutonomousSolverTurn(
|
||||
project,
|
||||
"execute-task",
|
||||
"M001/S01/T01",
|
||||
);
|
||||
expect(result.action).toBe("pause");
|
||||
expect(result.reason).toBe("solver-max-iterations");
|
||||
});
|
||||
|
||||
test("steering_append_consume_is_idempotent", () => {
|
||||
const project = makeProject();
|
||||
appendAutonomousSolverSteering(project, "Prefer runtime enforcement.");
|
||||
appendAutonomousSolverSteering(project, "Keep /sf autonomous only.");
|
||||
|
||||
const first = consumePendingAutonomousSolverSteering(project);
|
||||
const second = consumePendingAutonomousSolverSteering(project);
|
||||
|
||||
expect(first).toHaveLength(2);
|
||||
expect(first[0].text).toBe("Prefer runtime enforcement.");
|
||||
expect(second).toHaveLength(0);
|
||||
});
|
||||
|
||||
test("getConfiguredAutonomousSolverMaxIterations_clamps_preference", () => {
|
||||
expect(getConfiguredAutonomousSolverMaxIterations()).toBe(12);
|
||||
expect(
|
||||
getConfiguredAutonomousSolverMaxIterations({
|
||||
auto_supervisor: { solver_max_iterations: 0 },
|
||||
}),
|
||||
).toBe(1);
|
||||
expect(
|
||||
getConfiguredAutonomousSolverMaxIterations({
|
||||
auto_supervisor: { solver_max_iterations: 150 },
|
||||
}),
|
||||
).toBe(100);
|
||||
});
|
||||
});
|
||||
|
|
@ -4,6 +4,7 @@ import { tmpdir } from "node:os";
|
|||
import { join } from "node:path";
|
||||
import { afterEach, describe, test } from "vitest";
|
||||
import { runProviderChecks } from "../doctor-providers.js";
|
||||
import { PROVIDER_REGISTRY } from "../key-manager.js";
|
||||
|
||||
const originalCwd = process.cwd();
|
||||
const originalEnv = { ...process.env };
|
||||
|
|
@ -36,6 +37,13 @@ function makePreferencesProject(globalPreferences) {
|
|||
}
|
||||
|
||||
describe("doctor provider checks", () => {
|
||||
test("provider registry includes direct routed model providers used by SF preferences", () => {
|
||||
const providers = new Map(PROVIDER_REGISTRY.map((p) => [p.id, p]));
|
||||
|
||||
assert.equal(providers.get("minimax")?.envVar, "MINIMAX_API_KEY");
|
||||
assert.equal(providers.get("kimi-coding")?.envVar, "KIMI_API_KEY");
|
||||
});
|
||||
|
||||
test("runProviderChecks_when_any_configured_llm_route_is_usable_does_not_require_every_preferred_provider", () => {
|
||||
makePreferencesProject(
|
||||
[
|
||||
|
|
|
|||
|
|
@ -349,6 +349,30 @@ test("runAutoLoopWithUok_success_writes_balanced_run_id_heartbeats", async () =>
|
|||
assert.equal(hasCurrentParityWarning(report), false);
|
||||
});
|
||||
|
||||
test("runAutoLoopWithUok_ignores_legacy_disabled_flag_and_uses_kernel_path", async () => {
|
||||
const projectRoot = makeProject();
|
||||
const state = { basePath: projectRoot, autoStartTime: NOW };
|
||||
let kernelRan = false;
|
||||
|
||||
await runAutoLoopWithUok({
|
||||
ctx: testCtx("session-force-uok"),
|
||||
pi: {},
|
||||
s: state,
|
||||
deps: testDeps({ uok: { enabled: false } }),
|
||||
async runKernelLoop(_ctx, _pi, _s, deps) {
|
||||
kernelRan = Boolean(deps.uokObserver);
|
||||
},
|
||||
async runStandardLoop() {
|
||||
throw new Error("legacy standard loop should not run");
|
||||
},
|
||||
});
|
||||
|
||||
assert.equal(kernelRan, true);
|
||||
const events = readProjectParityEvents(projectRoot);
|
||||
assert.equal(events[0].path, "uok-kernel");
|
||||
assert.equal(events[1].path, "uok-kernel");
|
||||
});
|
||||
|
||||
test("runAutoLoopWithUok_throw_still_writes_exit_and_current_error_report", async () => {
|
||||
const projectRoot = makeProject();
|
||||
const state = { basePath: projectRoot, autoStartTime: NOW };
|
||||
|
|
|
|||
|
|
@ -30,13 +30,13 @@ function refreshParityReport(basePath) {
|
|||
return null;
|
||||
}
|
||||
}
|
||||
function resolveKernelPathLabel(flags) {
|
||||
return flags.enabled ? "uok-kernel" : "standard-loop";
|
||||
function resolveKernelPathLabel() {
|
||||
return "uok-kernel";
|
||||
}
|
||||
export async function runAutoLoopWithUok(args) {
|
||||
const { ctx, pi, s, deps, runKernelLoop, runStandardLoop } = args;
|
||||
const { ctx, pi, s, deps, runKernelLoop } = args;
|
||||
const prefs = deps.loadEffectiveSFPreferences()?.preferences;
|
||||
const flags = resolveUokFlags(prefs);
|
||||
const flags = { ...resolveUokFlags(prefs), enabled: true };
|
||||
const previousReport = refreshParityReport(s.basePath);
|
||||
const runId = `uok-${randomUUID()}`;
|
||||
s.currentUokRunId = runId;
|
||||
|
|
@ -58,7 +58,7 @@ export async function runAutoLoopWithUok(args) {
|
|||
recordUokRunStart({
|
||||
runId,
|
||||
sessionId: ctx.sessionManager?.getSessionId?.(),
|
||||
path: resolveKernelPathLabel(flags),
|
||||
path: resolveKernelPathLabel(),
|
||||
flags: { ...flags },
|
||||
startedAt,
|
||||
});
|
||||
|
|
@ -67,7 +67,7 @@ export async function runAutoLoopWithUok(args) {
|
|||
ts: startedAt,
|
||||
runId,
|
||||
sessionId: ctx.sessionManager?.getSessionId?.(),
|
||||
path: resolveKernelPathLabel(flags),
|
||||
path: resolveKernelPathLabel(),
|
||||
flags: { ...flags },
|
||||
phase: "enter",
|
||||
});
|
||||
|
|
@ -85,26 +85,20 @@ export async function runAutoLoopWithUok(args) {
|
|||
}),
|
||||
);
|
||||
}
|
||||
const decoratedDeps = flags.enabled
|
||||
? {
|
||||
...deps,
|
||||
uokObserver: createTurnObserver({
|
||||
basePath: s.basePath,
|
||||
gitAction: flags.gitopsTurnAction,
|
||||
gitPush: flags.gitopsTurnPush,
|
||||
enableAudit: flags.auditEnvelope,
|
||||
enableGitops: flags.gitops,
|
||||
}),
|
||||
}
|
||||
: deps;
|
||||
const decoratedDeps = {
|
||||
...deps,
|
||||
uokObserver: createTurnObserver({
|
||||
basePath: s.basePath,
|
||||
gitAction: flags.gitopsTurnAction,
|
||||
gitPush: flags.gitopsTurnPush,
|
||||
enableAudit: flags.auditEnvelope,
|
||||
enableGitops: flags.gitops,
|
||||
}),
|
||||
};
|
||||
let status = "ok";
|
||||
let error;
|
||||
try {
|
||||
if (flags.enabled) {
|
||||
await runKernelLoop(ctx, pi, s, decoratedDeps);
|
||||
} else {
|
||||
await runStandardLoop(ctx, pi, s, deps);
|
||||
}
|
||||
await runKernelLoop(ctx, pi, s, decoratedDeps);
|
||||
} catch (err) {
|
||||
status = "error";
|
||||
error = err instanceof Error ? err.message : String(err);
|
||||
|
|
@ -115,7 +109,7 @@ export async function runAutoLoopWithUok(args) {
|
|||
recordUokRunExit({
|
||||
runId,
|
||||
sessionId: ctx.sessionManager?.getSessionId?.(),
|
||||
path: resolveKernelPathLabel(flags),
|
||||
path: resolveKernelPathLabel(),
|
||||
flags: { ...flags },
|
||||
status,
|
||||
endedAt,
|
||||
|
|
@ -126,7 +120,7 @@ export async function runAutoLoopWithUok(args) {
|
|||
ts: endedAt,
|
||||
runId,
|
||||
sessionId: ctx.sessionManager?.getSessionId?.(),
|
||||
path: resolveKernelPathLabel(flags),
|
||||
path: resolveKernelPathLabel(),
|
||||
flags: { ...flags },
|
||||
phase: "exit",
|
||||
status,
|
||||
|
|
|
|||
|
|
@ -64,6 +64,27 @@ test("getExtensionKey normalizes top-level .ts and .js entry names to the same k
|
|||
);
|
||||
});
|
||||
|
||||
test("withResourceSyncLock removes a stale owner lock before running work", async () => {
|
||||
const tmp = mkdtempSync(join(tmpdir(), "sf-resource-loader-lock-"));
|
||||
const lockDir = join(tmp, ".resource-sync.lock");
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
mkdirSync(lockDir, { recursive: true });
|
||||
writeFileSync(join(lockDir, "owner"), "999999999\n");
|
||||
|
||||
const { withResourceSyncLock } = await import("../resource-loader.ts");
|
||||
let ran = false;
|
||||
withResourceSyncLock(tmp, () => {
|
||||
ran = true;
|
||||
});
|
||||
|
||||
assert.equal(ran, true);
|
||||
assert.equal(existsSync(lockDir), false);
|
||||
});
|
||||
|
||||
test("hasStaleCompiledExtensionSiblings only flags top-level .ts/.js sibling pairs", async (_t) => {
|
||||
const { hasStaleCompiledExtensionSiblings } = await import(
|
||||
"../resource-loader.ts"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue