singularity-forge/src/resources/extensions/sf/auto/phases.js

3537 lines
115 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* auto/phases.ts — Pipeline phases for the auto-loop.
*
* Contains: runPreDispatch, runDispatch, runGuards, runUnitPhase, runFinalize,
* plus internal helpers generateMilestoneReport and closeoutAndStop.
*
* Imports from: auto/types, auto/detect-stuck, auto/run-unit, auto/loop-deps
*/
import { cpSync, existsSync, readdirSync } from "node:fs";
import { basename, dirname, join, parse as parsePath } from "node:path";
import { importExtensionModule } from "@singularity-forge/coding-agent";
import {
clearCurrentPhase,
setCurrentPhase,
} from "../../shared/sf-phase-state.js";
import { atomicWriteSync } from "../atomic-write.js";
import { resetCompletionNudgeState } from "../auto-completion-nudge.js";
import {
isAwaitingUserInput,
USER_DRIVEN_DEEP_UNITS,
} from "../auto-post-unit.js";
import {
buildLoopRemediationSteps,
diagnoseExpectedArtifact,
verifyExpectedArtifact,
} from "../auto-recovery.js";
import {
collectSessionTokenUsage,
collectWorktreeFingerprint,
countChangedFiles,
resetRunawayGuardState,
} from "../auto-runaway-guard.js";
import {
formatToolCallSummary,
resetToolCallCounts,
} from "../auto-tool-tracking.js";
import {
appendAutonomousSolverCheckpoint,
assessAutonomousSolverTurn,
beginAutonomousSolverIteration,
buildAutonomousSolverMissingCheckpointRepairPrompt,
buildAutonomousSolverPromptBlock,
buildAutonomousSolverSteeringPromptBlock,
classifyAutonomousSolverMissingCheckpointFailure,
consumePendingAutonomousSolverSteering,
getConfiguredAutonomousSolverMaxIterations,
recordAutonomousSolverMissingCheckpointRetry,
} from "../autonomous-solver.js";
import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.js";
import { debugLog } from "../debug-logger.js";
import { PROJECT_FILES } from "../detection.js";
import { MergeConflictError } from "../git-service.js";
import { recordLearnedOutcome } from "../learning/runtime.js";
import { sfRoot } from "../paths.js";
import { resolvePersistModelChanges } from "../preferences.js";
import {
approveProductionMutationWithLlmPolicy,
ensureProductionMutationApprovalTemplate,
readProductionMutationApprovalStatus,
} from "../production-mutation-approval.js";
import { pauseAutoForProviderError } from "../provider-error-pause.js";
import {
buildReasoningAssistPrompt,
injectReasoningGuidance,
isReasoningAssistEnabled,
} from "../reasoning-assist.js";
import {
loadEvidenceFromDisk,
resetEvidence,
} from "../safety/evidence-collector.js";
import { getDirtyFiles } from "../safety/file-change-validator.js";
import {
cleanupCheckpoint,
createCheckpoint,
rollbackToCheckpoint,
} from "../safety/git-checkpoint.js";
import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
import { recordSelfFeedback } from "../self-feedback.js";
import {
checkpointWal,
getMilestoneSlices,
getSliceTaskCounts,
getTask,
isDbAvailable,
} from "../sf-db.js";
import { getEligibleSlices } from "../slice-parallel-eligibility.js";
import { startSliceParallel } from "../slice-parallel-orchestrator.js";
import { handleProductAudit } from "../tools/product-audit-tool.js";
import { parseUnitId } from "../unit-id.js";
import { resolveUokFlags } from "../uok/flags.js";
import { UokGateRunner } from "../uok/gate-runner.js";
import { emitModelAutoResolvedEvent } from "../uok/model-route-evidence.js";
import {
ensurePlanV2Graph as ensurePlanningFlowGraph,
isEmptyPlanV2GraphResult,
isMissingFinalizedContextResult,
} from "../uok/plan.js";
import { buildUokProgressEvent } from "../uok/progress-event.js";
import {
clearUnitRuntimeRecord,
writeUnitRuntimeRecord,
} from "../uok/unit-runtime.js";
import {
_resetLogs,
drainAndSummarize,
drainLogs,
formatForNotification,
hasAnyIssues,
logError,
logWarning,
} from "../workflow-logger.js";
import {
getRequiredWorkflowToolsForAutoUnit,
getWorkflowTransportSupportError,
} from "../workflow-tools.js";
import { resolveWorktreeProjectRoot } from "../worktree-root.js";
import { detectStuck } from "./detect-stuck.js";
import {
FINALIZE_POST_TIMEOUT_MS,
FINALIZE_PRE_TIMEOUT_MS,
withTimeout,
} from "./finalize-timeout.js";
import { runUnit } from "./run-unit.js";
import {
BUDGET_THRESHOLDS,
MAX_FINALIZE_TIMEOUTS,
MAX_RECOVERY_CHARS,
} from "./types.js";
// ─── Session timeout scheduled resume state ────────────────────────────────────────
let consecutiveSessionTimeouts = 0;
const MAX_SESSION_TIMEOUT_AUTO_RESUMES = 3;
function resetConsecutiveSessionTimeouts() {
consecutiveSessionTimeouts = 0;
}
/**
* Decide whether the UOK diagnostics verdict may continue into dispatch.
*
* Purpose: turn durable UOK self-diagnostics into autonomous control, so SF
* pauses on split-brain/runtime corruption before spending another LLM turn.
*
* Consumer: runDispatch before it starts the next autonomous unit.
*/
export function assessUokDiagnosticsDispatchGate(diagnostics) {
if (!diagnostics) return { proceed: true };
const blockingIssue = diagnostics.issues?.find(
(issue) => issue?.severity === "error",
);
if (diagnostics.verdict !== "degraded" && !blockingIssue) {
return { proceed: true };
}
const issueCode = blockingIssue?.code ?? diagnostics.issues?.[0]?.code;
const reportPath =
diagnostics.reportPath ?? ".sf/runtime/uok-diagnostics.json";
const reason = [
`UOK diagnostics blocked dispatch: ${diagnostics.verdict}/${diagnostics.classification ?? "unknown"}`,
issueCode ? `issue ${issueCode}` : "",
`evidence ${reportPath}`,
]
.filter(Boolean)
.join(" · ");
return {
proceed: false,
reason,
issueCode,
reportPath,
};
}
// ─── generateMilestoneReport ──────────────────────────────────────────────────
/**
* Resolve the base path for milestone reports.
* Prefers originalBasePath (project root) over basePath (which may be a worktree).
* Exported for testing as _resolveReportBasePath.
*/
export function _resolveReportBasePath(s) {
return s.originalBasePath || s.basePath;
}
/**
* Fire the product-audit for a milestone after successful merge.
* Uses s.productAuditMilestoneId as a guard to ensure the audit fires exactly
* once per milestone (mergeAndExit can be called multiple times for the same
* milestone at different transition points).
*
* The audit is fired with a "no-gaps" placeholder verdict. Re-run
* `/product-audit` manually for full LLM-powered gap analysis.
*/
async function maybeFireProductAudit(s, ctx) {
const mid = s.currentMilestoneId;
if (!mid) return;
// Guard: only fire once per milestone
if (s.productAuditMilestoneId === mid) return;
s.productAuditMilestoneId = mid;
const params = {
milestoneId: mid,
verdict: "no-gaps",
summary:
"Auto-fired placeholder audit at milestone merge. Re-run `/product-audit` for full LLM-powered gap analysis.",
gaps: [],
};
const result = await handleProductAudit(params, s.basePath);
if ("error" in result) {
logWarning("engine", "Product audit auto-fire failed", {
milestone: mid,
error: result.error,
});
ctx.ui.notify(
`Product audit for ${mid} auto-fired but may need manual refresh: ${result.error}`,
"warning",
);
} else {
debugLog("autoLoop", {
phase: "product-audit-fired",
milestone: mid,
jsonPath: result.jsonPath,
});
}
}
function clearDeferredCommitAfterCancelledUnit(
s,
ctx,
unitType,
unitId,
reason,
) {
if (!s.stagedPendingCommit && !s.pendingCommitTaskContext) return;
s.stagedPendingCommit = false;
s.pendingCommitTaskContext = null;
debugLog("autoLoop", {
phase: "cancelled-unit-deferred-commit-cleared",
unitType,
unitId,
reason,
});
ctx.ui.notify(
`Cancelled ${unitType} ${unitId}; staged changes were preserved for recovery and not auto-committed.`,
"warning",
);
}
export function requiresHumanProductionMutationApproval(text) {
const normalized = text.toLowerCase();
const mentionsProduction =
/\b(production|prod|live|hetzner)\b/.test(normalized) ||
normalized.includes("centralcloud.com");
if (!mentionsProduction) return false;
const mentionsUnifiedFailover =
normalized.includes("unified_failover") ||
normalized.includes("unified-failover") ||
normalized.includes("/action/unified");
if (!mentionsUnifiedFailover) return false;
return /\b(post|enqueue|create|insert|command row|pending command)\b/.test(
normalized,
);
}
/**
* Resolve the authoritative project base for dispatch guards.
* Prior-milestone completion lives at the project root, even when the active
* unit is running inside an auto worktree.
*/
export function _resolveDispatchGuardBasePath(s) {
return resolveWorktreeProjectRoot(s.basePath, s.originalBasePath);
}
const PLANNING_FLOW_GATE_PHASES = new Set([
"executing",
"summarizing",
"validating-milestone",
"completing-milestone",
]);
function shouldRunPlanningFlowGate(phase) {
return PLANNING_FLOW_GATE_PHASES.has(phase);
}
function shouldSkipArtifactVerification(unitType) {
return unitType.startsWith("hook/") || unitType === "custom-step";
}
function recordLearningOutcomeForUnit(
ic,
unitType,
unitId,
startedAt,
outcome,
) {
if (!startedAt) return;
const unitModel = ic.s.currentUnitModel;
const unitEntry = ic.deps.getLedger()?.units
? [...(ic.deps.getLedger()?.units ?? [])]
.reverse()
.find(
(u) =>
u.type === unitType && u.id === unitId && u.startedAt === startedAt,
)
: undefined;
const provider = unitModel?.provider ?? null;
const modelId = unitModel?.id ?? unitEntry?.model ?? null;
if (!provider || !modelId || !unitEntry) return;
recordLearnedOutcome({
modelId,
provider,
unitType,
unitId,
succeeded: outcome.succeeded,
retries: outcome.retries ?? 0,
escalated: outcome.escalated ?? false,
verification_passed: outcome.verificationPassed,
blocker_discovered: outcome.blockerDiscovered ?? false,
duration_ms: Math.max(0, unitEntry.finishedAt - unitEntry.startedAt),
tokens_total: unitEntry.tokens.total,
cost_usd: unitEntry.cost,
recorded_at: unitEntry.startedAt,
});
}
/**
* Generate and write an HTML milestone report snapshot.
* Extracted from the milestone-transition block in autoLoop.
*/
async function generateMilestoneReport(s, ctx, milestoneId) {
const { loadVisualizerData } = await importExtensionModule(
import.meta.url,
"../visualizer-data.js",
);
const { generateHtmlReport } = await importExtensionModule(
import.meta.url,
"../export-html.js",
);
const { writeReportSnapshot } = await importExtensionModule(
import.meta.url,
"../reports.js",
);
const { basename } = await import("node:path");
const reportBasePath = _resolveReportBasePath(s);
const snapData = await loadVisualizerData(reportBasePath);
const completedMs = snapData.milestones.find((m) => m.id === milestoneId);
const msTitle = completedMs?.title ?? milestoneId;
const sfVersion = process.env.SF_VERSION ?? "0.0.0";
const projName = basename(reportBasePath);
const doneSlices = snapData.milestones.reduce(
(acc, m) => acc + m.slices.filter((sl) => sl.done).length,
0,
);
const totalSlices = snapData.milestones.reduce(
(acc, m) => acc + m.slices.length,
0,
);
const outPath = writeReportSnapshot({
basePath: reportBasePath,
html: generateHtmlReport(snapData, {
projectName: projName,
projectPath: reportBasePath,
sfVersion,
milestoneId,
indexRelPath: "index.html",
}),
milestoneId,
milestoneTitle: msTitle,
kind: "milestone",
projectName: projName,
projectPath: reportBasePath,
sfVersion,
totalCost: snapData.totals?.cost ?? 0,
totalTokens: snapData.totals?.tokens.total ?? 0,
totalDuration: snapData.totals?.duration ?? 0,
doneSlices,
totalSlices,
doneMilestones: snapData.milestones.filter((m) => m.status === "complete")
.length,
totalMilestones: snapData.milestones.length,
phase: snapData.phase,
});
ctx.ui.notify(
`Report saved: .sf/reports/${basename(outPath)} — open index.html to browse progression.`,
"info",
);
}
// ─── closeoutAndStop ──────────────────────────────────────────────────────────
/**
* If a unit is in-flight, close it out, then stop autonomous mode.
* Extracted from ~4 identical if-closeout-then-stop sequences in autoLoop.
*/
async function closeoutAndStop(ctx, pi, s, deps, reason) {
if (s.currentUnit) {
await deps.closeoutUnit(
ctx,
s.basePath,
s.currentUnit.type,
s.currentUnit.id,
s.currentUnit.startedAt,
deps.buildSnapshotOpts(s.currentUnit.type, s.currentUnit.id),
);
s.currentUnit = null;
}
await deps.stopAuto(ctx, pi, reason);
}
async function emitCancelledUnitEnd(
ic,
unitType,
unitId,
unitStartSeq,
errorContext,
) {
ic.deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "unit-end",
data: {
unitType,
unitId,
status: "cancelled",
artifactVerified: false,
...(errorContext ? { errorContext } : {}),
},
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
});
}
// ─── runPreDispatch ───────────────────────────────────────────────────────────
/**
* Phase 1: Pre-dispatch — resource guard, health gate, state derivation,
* milestone transition, terminal conditions.
* Returns break to exit the loop, or next with PreDispatchData on success.
*/
export async function runPreDispatch(ic, loopState) {
const { ctx, pi, s, deps, prefs } = ic;
const uokFlags = resolveUokFlags(prefs);
const runPreDispatchGate = async (input) => {
if (!uokFlags.gates) return;
const gateRunner = new UokGateRunner();
gateRunner.register({
id: input.gateId,
type: input.gateType,
execute: async () => ({
outcome: input.outcome,
failureClass: input.failureClass,
rationale: input.rationale,
findings: input.findings ?? "",
}),
});
await gateRunner.run(input.gateId, {
basePath: s.basePath,
traceId: `pre-dispatch:${ic.flowId}`,
turnId: `iter-${ic.iteration}`,
milestoneId: input.milestoneId ?? s.currentMilestoneId ?? undefined,
unitType: "pre-dispatch",
unitId: `iter-${ic.iteration}`,
});
};
// Resource version guard
const staleMsg = deps.checkResourcesStale(s.resourceVersionOnStart);
if (staleMsg) {
await runPreDispatchGate({
gateId: "resource-version-guard",
gateType: "policy",
outcome: "fail",
failureClass: "policy",
rationale: "resource version guard blocked dispatch",
findings: staleMsg,
});
await deps.stopAuto(ctx, pi, staleMsg);
debugLog("autoLoop", { phase: "exit", reason: "resources-stale" });
return { action: "break", reason: "resources-stale" };
}
await runPreDispatchGate({
gateId: "resource-version-guard",
gateType: "policy",
outcome: "pass",
failureClass: "none",
rationale: "resource version guard passed",
});
deps.invalidateAllCaches();
s.lastPromptCharCount = undefined;
s.lastBaselineCharCount = undefined;
// Pre-dispatch health gate
try {
const healthGate = await deps.preDispatchHealthGate(s.basePath);
if (healthGate.fixesApplied.length > 0) {
ctx.ui.notify(
`Pre-dispatch: ${healthGate.fixesApplied.join(", ")}`,
"info",
);
}
if (!healthGate.proceed) {
await runPreDispatchGate({
gateId: "pre-dispatch-health-gate",
gateType: "execution",
outcome: "manual-attention",
failureClass: "manual-attention",
rationale: "pre-dispatch health gate blocked dispatch",
findings: healthGate.reason,
});
ctx.ui.notify(
healthGate.reason ||
"Pre-dispatch health check failed — run /doctor for details.",
"error",
);
await deps.pauseAuto(ctx, pi);
debugLog("autoLoop", { phase: "exit", reason: "health-gate-failed" });
return { action: "break", reason: "health-gate-failed" };
}
await runPreDispatchGate({
gateId: "pre-dispatch-health-gate",
gateType: "execution",
outcome: "pass",
failureClass: "none",
rationale: "pre-dispatch health gate passed",
findings:
healthGate.fixesApplied.length > 0
? healthGate.fixesApplied.join(", ")
: "",
});
} catch (e) {
await runPreDispatchGate({
gateId: "pre-dispatch-health-gate",
gateType: "execution",
outcome: "manual-attention",
failureClass: "manual-attention",
rationale: "pre-dispatch health gate threw unexpectedly",
findings: String(e),
});
logWarning("engine", "Pre-dispatch health gate threw unexpectedly", {
error: String(e),
});
}
// Sync project root artifacts into worktree
if (
s.originalBasePath &&
s.basePath !== s.originalBasePath &&
s.currentMilestoneId
) {
deps.syncProjectRootToWorktree(
s.originalBasePath,
s.basePath,
s.currentMilestoneId,
);
}
// Derive state
let state = await deps.deriveState(s.basePath);
if (
uokFlags.planningFlow &&
isDbAvailable() &&
shouldRunPlanningFlowGate(state.phase)
) {
let compiled = ensurePlanningFlowGraph(s.basePath, state);
// Empty-graph recovery: stale DB caches can yield 0 nodes right after a
// task-complete write. Invalidate caches, re-derive state, and retry once.
if (isEmptyPlanV2GraphResult(compiled)) {
deps.invalidateAllCaches();
state = await deps.deriveState(s.basePath);
compiled = shouldRunPlanningFlowGate(state.phase)
? ensurePlanningFlowGraph(s.basePath, state)
: {
ok: true,
reason: "empty planning-flow graph recovered by state rederive",
nodeCount: 0,
};
}
if (!compiled.ok) {
const reason = compiled.reason ?? "Planning flow compilation failed";
if (isMissingFinalizedContextResult(compiled)) {
await runPreDispatchGate({
gateId: "planning-flow-gate",
gateType: "policy",
outcome: "pass",
failureClass: "none",
rationale: "plan v2 missing context recovery deferred to dispatch",
findings: reason,
milestoneId: state.activeMilestone?.id ?? undefined,
});
} else {
await runPreDispatchGate({
gateId: "planning-flow-gate",
gateType: "policy",
outcome: "manual-attention",
failureClass: "manual-attention",
rationale: "planning flow compile gate failed",
findings: reason,
milestoneId: state.activeMilestone?.id ?? undefined,
});
ctx.ui.notify(
`Plan gate failed-closed: ${reason}\n\nIf this keeps happening, try: /doctor heal`,
"error",
);
await deps.pauseAuto(ctx, pi);
return { action: "break", reason: "planning-flow-gate-failed" };
}
}
await runPreDispatchGate({
gateId: "planning-flow-gate",
gateType: "policy",
outcome: "pass",
failureClass: "none",
rationale: "planning flow compile gate passed",
milestoneId: state.activeMilestone?.id ?? undefined,
});
}
deps.syncCmuxSidebar(prefs, state);
let mid = state.activeMilestone?.id;
let midTitle = state.activeMilestone?.title;
debugLog("autoLoop", {
phase: "state-derived",
iteration: ic.iteration,
mid,
statePhase: state.phase,
});
// ── Slice-level parallelism gate (#2340) ─────────────────────────────
// When slice_parallel is enabled, check if multiple slices are eligible
// for parallel execution. If so, dispatch them in parallel and stop the
// sequential loop. Workers are spawned via slice-parallel-orchestrator.ts.
if (
prefs?.slice_parallel?.enabled &&
mid &&
!process.env.SF_PARALLEL_WORKER &&
isDbAvailable()
) {
try {
const dbSlices = getMilestoneSlices(mid);
if (dbSlices.length > 0) {
const doneIds = new Set(
dbSlices
.filter((sl) => sl.status === "complete" || sl.status === "done")
.map((sl) => sl.id),
);
const sliceInputs = dbSlices.map((sl) => ({
id: sl.id,
done: doneIds.has(sl.id),
depends: sl.depends ?? [],
}));
const eligible = getEligibleSlices(sliceInputs, doneIds);
if (eligible.length > 1) {
debugLog("autoLoop", {
phase: "slice-parallel-dispatch",
iteration: ic.iteration,
mid,
eligibleSlices: eligible.map((e) => e.id),
});
ctx.ui.notify(
`Slice-parallel: dispatching ${eligible.length} eligible slices for ${mid}.`,
"info",
);
const result = await startSliceParallel(s.basePath, mid, eligible, {
maxWorkers: prefs.slice_parallel.max_workers ?? 2,
useExecutionGraph: uokFlags.executionGraph,
shellWrapper: prefs.shell_wrapper,
});
if (result.started.length > 0) {
ctx.ui.notify(
`Slice-parallel: started ${result.started.length} worker(s): ${result.started.join(", ")}.`,
"info",
);
await deps.stopAuto(
ctx,
pi,
`Slice-parallel dispatched for ${mid}`,
);
return { action: "break", reason: "slice-parallel-dispatched" };
}
// Fall through to sequential if no workers started
}
}
} catch (err) {
debugLog("autoLoop", {
phase: "slice-parallel-check-error",
error: err instanceof Error ? err.message : String(err),
});
// Non-fatal — fall through to sequential dispatch
}
}
// ── Milestone transition ────────────────────────────────────────────
if (mid && s.currentMilestoneId && mid !== s.currentMilestoneId) {
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "milestone-transition",
data: { from: s.currentMilestoneId, to: mid },
});
ctx.ui.notify(
`Milestone ${s.currentMilestoneId} complete. Advancing to ${mid}: ${midTitle}.`,
"info",
);
deps.sendDesktopNotification(
"SF",
`Milestone ${s.currentMilestoneId} complete!`,
"success",
"milestone",
basename(s.originalBasePath || s.basePath),
);
deps.logCmuxEvent(
prefs,
`Milestone ${s.currentMilestoneId} complete. Advancing to ${mid}.`,
"success",
);
const vizPrefs = prefs;
if (vizPrefs?.auto_visualize) {
ctx.ui.notify("Run /visualize to see progress overview.", "info");
}
if (vizPrefs?.auto_report !== false) {
try {
await generateMilestoneReport(s, ctx, s.currentMilestoneId);
} catch (err) {
ctx.ui.notify(
`Report generation failed: ${err instanceof Error ? err.message : String(err)}`,
"warning",
);
}
}
// Reset dispatch counters for new milestone
s.unitDispatchCount.clear();
s.unitRecoveryCount.clear();
s.unitLifetimeDispatches.clear();
loopState.recentUnits.length = 0;
loopState.stuckRecoveryAttempts = 0;
// Worktree lifecycle on milestone transition — merge current, enter next
try {
deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
} catch (mergeErr) {
if (mergeErr instanceof MergeConflictError) {
// Real code conflicts — stop the loop instead of retrying forever (#2330)
ctx.ui.notify(
`Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /autonomous to resume.`,
"error",
);
await deps.stopAuto(
ctx,
pi,
`Merge conflict on milestone ${s.currentMilestoneId}`,
);
return { action: "break", reason: "merge-conflict" };
}
// Non-conflict merge errors — stop auto to avoid advancing with unmerged work
logError("engine", "Milestone merge failed with non-conflict error", {
milestone: s.currentMilestoneId,
error: String(mergeErr),
});
ctx.ui.notify(
`Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /autonomous to resume.`,
"error",
);
await deps.stopAuto(
ctx,
pi,
`Merge error on milestone ${s.currentMilestoneId}: ${String(mergeErr)}`,
);
return { action: "break", reason: "merge-failed" };
}
// Fire product-audit after successful merge (guards against double-fire via s.productAuditMilestoneId)
await maybeFireProductAudit(s, ctx);
// PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
deps.invalidateAllCaches();
state = await deps.deriveState(s.basePath);
mid = state.activeMilestone?.id;
midTitle = state.activeMilestone?.title;
if (mid) {
if (deps.getIsolationMode() !== "none") {
deps.captureIntegrationBranch(s.basePath, mid);
}
deps.resolver.enterMilestone(mid, ctx.ui);
} else {
// mid is undefined — no milestone to capture integration branch for
}
const pendingIds = state.registry
.filter((m) => m.status !== "complete" && m.status !== "parked")
.map((m) => m.id);
deps.pruneQueueOrder(s.basePath, pendingIds);
// Archive the old completed-units.json instead of wiping it (#2313).
try {
const completedKeysPath = join(
sfRoot(s.basePath),
"completed-units.json",
);
if (existsSync(completedKeysPath) && s.currentMilestoneId) {
const archivePath = join(
sfRoot(s.basePath),
`completed-units-${s.currentMilestoneId}.json`,
);
cpSync(completedKeysPath, archivePath);
}
atomicWriteSync(completedKeysPath, JSON.stringify([], null, 2));
} catch (e) {
logWarning(
"engine",
"Failed to archive completed-units on milestone transition",
{ error: String(e) },
);
}
// Rebuild STATE.md immediately so it reflects the new active milestone.
// This bypasses the 30-second throttle in the normal rebuild path —
// milestone transitions are rare and important enough to warrant an
// immediate write.
try {
await deps.rebuildState(s.basePath);
} catch (e) {
logWarning(
"engine",
"STATE.md rebuild failed after milestone transition",
{ error: String(e) },
);
}
}
if (mid) {
s.currentMilestoneId = mid;
deps.setActiveMilestoneId(s.basePath, mid);
}
// ── Terminal conditions ──────────────────────────────────────────────
if (!mid) {
if (s.currentUnit) {
await deps.closeoutUnit(
ctx,
s.basePath,
s.currentUnit.type,
s.currentUnit.id,
s.currentUnit.startedAt,
deps.buildSnapshotOpts(s.currentUnit.type, s.currentUnit.id),
);
}
const incomplete = state.registry.filter(
(m) => m.status !== "complete" && m.status !== "parked",
);
if (incomplete.length === 0 && state.registry.length > 0) {
// All milestones complete — merge milestone branch before stopping
if (s.currentMilestoneId) {
try {
deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
// Prevent stopAuto from attempting the same merge (#2645)
s.milestoneMergedInPhases = true;
// Fire product-audit after successful merge (guards against double-fire via s.productAuditMilestoneId)
await maybeFireProductAudit(s, ctx);
} catch (mergeErr) {
if (mergeErr instanceof MergeConflictError) {
ctx.ui.notify(
`Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /autonomous to resume.`,
"error",
);
await deps.stopAuto(
ctx,
pi,
`Merge conflict on milestone ${s.currentMilestoneId}`,
);
return { action: "break", reason: "merge-conflict" };
}
logError("engine", "Milestone merge failed with non-conflict error", {
milestone: s.currentMilestoneId,
error: String(mergeErr),
});
ctx.ui.notify(
`Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /autonomous to resume.`,
"error",
);
await deps.stopAuto(
ctx,
pi,
`Merge error on milestone ${s.currentMilestoneId}: ${String(mergeErr)}`,
);
return { action: "break", reason: "merge-failed" };
}
// PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
}
deps.sendDesktopNotification(
"SF",
"All milestones complete!",
"success",
"milestone",
basename(s.originalBasePath || s.basePath),
);
deps.logCmuxEvent(prefs, "All milestones complete.", "success");
await deps.stopAuto(ctx, pi, "All milestones complete");
} else if (incomplete.length === 0 && state.registry.length === 0) {
// Empty registry — no milestones visible, likely a path resolution bug
const diag = `basePath=${s.basePath}, phase=${state.phase}`;
ctx.ui.notify(
`No milestones visible in current scope. Possible path resolution issue.\n Diagnostic: ${diag}`,
"error",
);
await deps.stopAuto(
ctx,
pi,
`No milestones found — check basePath resolution`,
);
} else if (state.phase === "blocked") {
const blockerMsg = `Blocked: ${state.blockers.join(", ")}`;
// Pause instead of hard-stop so the session is resumable with `/autonomous`.
// Hard-stop here was causing premature termination when slice dependencies
// were temporarily unresolvable (e.g. after reassessment added new slices).
await deps.pauseAuto(ctx, pi);
ctx.ui.notify(
`${blockerMsg}. Fix and run /autonomous to resume.`,
"warning",
);
deps.sendDesktopNotification(
"SF",
blockerMsg,
"warning",
"attention",
basename(s.originalBasePath || s.basePath),
);
deps.logCmuxEvent(prefs, blockerMsg, "warning");
} else {
const ids = incomplete.map((m) => m.id).join(", ");
const diag = `basePath=${s.basePath}, milestones=[${state.registry.map((m) => `${m.id}:${m.status}`).join(", ")}], phase=${state.phase}`;
ctx.ui.notify(
`Unexpected: ${incomplete.length} incomplete milestone(s) (${ids}) but no active milestone.\n Diagnostic: ${diag}`,
"error",
);
await deps.stopAuto(
ctx,
pi,
`No active milestone — ${incomplete.length} incomplete (${ids}), see diagnostic above`,
);
}
debugLog("autoLoop", { phase: "exit", reason: "no-active-milestone" });
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "terminal",
data: { reason: "no-active-milestone" },
});
return { action: "break", reason: "no-active-milestone" };
}
if (!midTitle) {
midTitle = mid;
ctx.ui.notify(
`Milestone ${mid} has no title in roadmap — using ID as fallback.`,
"warning",
);
}
// Mid-merge safety check
const mergeReconcileResult = deps.reconcileMergeState(s.basePath, ctx);
if (mergeReconcileResult === "blocked") {
await deps.pauseAuto(ctx, pi);
debugLog("autoLoop", {
phase: "exit",
reason: "merge-reconciliation-blocked",
});
return { action: "break", reason: "merge-reconciliation-blocked" };
}
if (mergeReconcileResult === "reconciled") {
deps.invalidateAllCaches();
state = await deps.deriveState(s.basePath);
mid = state.activeMilestone?.id;
midTitle = state.activeMilestone?.title;
}
if (!mid || !midTitle) {
const noMilestoneReason = !mid
? "No active milestone after merge reconciliation"
: `Milestone ${mid} has no title after reconciliation`;
await closeoutAndStop(ctx, pi, s, deps, noMilestoneReason);
debugLog("autoLoop", {
phase: "exit",
reason: "no-milestone-after-reconciliation",
});
return { action: "break", reason: "no-milestone-after-reconciliation" };
}
// Terminal: complete
if (state.phase === "complete") {
// Milestone merge on complete (before closeout so branch state is clean)
if (s.currentMilestoneId) {
try {
deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
// Prevent stopAuto from attempting the same merge (#2645)
s.milestoneMergedInPhases = true;
// Fire product-audit after successful merge (guards against double-fire via s.productAuditMilestoneId)
await maybeFireProductAudit(s, ctx);
} catch (mergeErr) {
if (mergeErr instanceof MergeConflictError) {
ctx.ui.notify(
`Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /autonomous to resume.`,
"error",
);
await deps.stopAuto(
ctx,
pi,
`Merge conflict on milestone ${s.currentMilestoneId}`,
);
return { action: "break", reason: "merge-conflict" };
}
logError("engine", "Milestone merge failed with non-conflict error", {
milestone: s.currentMilestoneId,
error: String(mergeErr),
});
ctx.ui.notify(
`Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /autonomous to resume.`,
"error",
);
await deps.stopAuto(
ctx,
pi,
`Merge error on milestone ${s.currentMilestoneId}: ${String(mergeErr)}`,
);
return { action: "break", reason: "merge-failed" };
}
// PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
}
deps.sendDesktopNotification(
"SF",
`Milestone ${mid} complete!`,
"success",
"milestone",
basename(s.originalBasePath || s.basePath),
);
deps.logCmuxEvent(prefs, `Milestone ${mid} complete.`, "success");
await closeoutAndStop(ctx, pi, s, deps, `Milestone ${mid} complete`);
debugLog("autoLoop", { phase: "exit", reason: "milestone-complete" });
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "terminal",
data: { reason: "milestone-complete", milestoneId: mid },
});
return { action: "break", reason: "milestone-complete" };
}
// Terminal: blocked — pause instead of hard-stop so the session is resumable.
if (state.phase === "blocked") {
const blockerMsg = `Blocked: ${state.blockers.join(", ")}`;
if (s.currentUnit) {
await deps.closeoutUnit(
ctx,
s.basePath,
s.currentUnit.type,
s.currentUnit.id,
s.currentUnit.startedAt,
deps.buildSnapshotOpts(s.currentUnit.type, s.currentUnit.id),
);
}
await deps.pauseAuto(ctx, pi);
ctx.ui.notify(
`${blockerMsg}. Fix and run /autonomous to resume.`,
"warning",
);
deps.sendDesktopNotification(
"SF",
blockerMsg,
"warning",
"attention",
basename(s.originalBasePath || s.basePath),
);
deps.logCmuxEvent(prefs, blockerMsg, "warning");
debugLog("autoLoop", { phase: "exit", reason: "blocked" });
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "terminal",
data: { reason: "blocked", blockers: state.blockers },
});
return { action: "break", reason: "blocked" };
}
return { action: "next", data: { state, mid, midTitle } };
}
// ─── runDispatch ──────────────────────────────────────────────────────────────
/**
* Phase 3: Dispatch resolution — resolve next unit, stuck detection, pre-dispatch hooks.
* Returns break/continue to control the loop, or next with IterationData on success.
*/
export async function runDispatch(ic, preData, loopState) {
const { ctx, pi, s, deps, prefs } = ic;
const { state, mid, midTitle } = preData;
const STUCK_WINDOW_SIZE = 6;
debugLog("autoLoop", { phase: "dispatch-resolve", iteration: ic.iteration });
const dispatchResult = await deps.resolveDispatch({
basePath: s.basePath,
mid,
midTitle,
state,
prefs,
session: s,
runControl: deps.uokRunControl,
permissionProfile: deps.uokPermissionProfile,
});
if (dispatchResult.action === "stop") {
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "dispatch-stop",
rule: dispatchResult.matchedRule,
data: { reason: dispatchResult.reason },
});
// Warning-level stops are recoverable human checkpoints (e.g. UAT verdict
// gate) — pause instead of hard-stopping so the session is resumable with
// `/autonomous`. Error/info-level stops remain hard stops for infrastructure
// failures and terminal conditions respectively.
// See: https://github.com/singularity-forge/sf-run/issues/2474
if (dispatchResult.level === "warning") {
ctx.ui.notify(dispatchResult.reason, "warning");
await deps.pauseAuto(ctx, pi);
} else {
await closeoutAndStop(ctx, pi, s, deps, dispatchResult.reason);
}
debugLog("autoLoop", { phase: "exit", reason: "dispatch-stop" });
return { action: "break", reason: "dispatch-stop" };
}
if (dispatchResult.action !== "dispatch") {
// Non-dispatch action (e.g. "skip") — re-derive state
await new Promise((r) => setImmediate(r));
return { action: "continue" };
}
try {
const diagnostics = deps.writeUokDiagnostics?.(s.basePath, {
expectedNext: dispatchResult,
});
const gate = assessUokDiagnosticsDispatchGate(diagnostics);
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "uok-diagnostics-dispatch-gate",
data: {
verdict: diagnostics?.verdict ?? "unknown",
classification: diagnostics?.classification ?? "unknown",
proceed: gate.proceed,
issueCode: gate.issueCode,
reportPath: gate.reportPath ?? diagnostics?.reportPath,
},
});
if (!gate.proceed) {
await runPreDispatchGate({
gateId: "uok-diagnostics-dispatch-gate",
gateType: "execution",
outcome: "manual-attention",
failureClass: "manual-attention",
rationale: "uok diagnostics blocked dispatch",
findings: gate.reason,
milestoneId: mid,
});
ctx.ui.notify(gate.reason, "error");
await deps.pauseAuto(ctx, pi);
debugLog("autoLoop", {
phase: "exit",
reason: "uok-diagnostics-pause",
issueCode: gate.issueCode,
});
return { action: "break", reason: "uok-diagnostics-pause" };
}
} catch (err) {
logWarning("engine", "UOK diagnostics dispatch gate failed open", {
error: err instanceof Error ? err.message : String(err),
});
}
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "dispatch-match",
rule: dispatchResult.matchedRule,
data: { unitType: dispatchResult.unitType, unitId: dispatchResult.unitId },
});
let unitType = dispatchResult.unitType;
const unitId = dispatchResult.unitId;
let prompt = dispatchResult.prompt;
const pauseAfterUatDispatch = dispatchResult.pauseAfterDispatch ?? false;
// ── Reasoning assist injection ──────────────────────────────────────
if (isReasoningAssistEnabled(unitType)) {
try {
const reasoningPrompt = await buildReasoningAssistPrompt(
unitType,
unitId,
s.basePath,
ctx,
);
if (reasoningPrompt) {
// Fire-and-forget: reasoning assist is best-effort, non-blocking
// The actual LLM call would happen here in a full implementation.
// For now, we prepare the prompt for injection.
debugLog("autoLoop", {
phase: "reasoning-assist",
unitType,
unitId,
promptLength: reasoningPrompt.length,
});
// Use reasoning prompt context as guidance until a fast model is wired in.
// The injected guidance provides unit-level context hints to the primary model.
prompt = injectReasoningGuidance(prompt, reasoningPrompt);
}
} catch (err) {
logWarning("engine", "Reasoning assist failed open", {
error: err instanceof Error ? err.message : String(err),
unitType,
unitId,
});
}
}
// ── Sliding-window stuck detection with graduated recovery ──
const derivedKey = `${unitType}/${unitId}`;
const hasTransientTaskCompleteFailure =
unitType === "execute-task" && !!s.pendingTaskCompleteFailures?.has(unitId);
if (!s.pendingVerificationRetry && !hasTransientTaskCompleteFailure) {
loopState.recentUnits.push({ key: derivedKey });
if (loopState.recentUnits.length > STUCK_WINDOW_SIZE)
loopState.recentUnits.shift();
const stuckSignal = detectStuck(loopState.recentUnits);
if (stuckSignal) {
debugLog("autoLoop", {
phase: "stuck-check",
unitType,
unitId,
reason: stuckSignal.reason,
recoveryAttempts: loopState.stuckRecoveryAttempts,
});
// Graduated stuck recovery — up to 5 total attempts before hard stop.
// Attempt 0: cache invalidation + retry
// Attempts 14: rethink + retry
// Attempt 5 (exhausted): hard stop
loopState.stuckRecoveryAttempts++;
const attempt = loopState.stuckRecoveryAttempts;
if (attempt === 1) {
// Attempt 1: verify artifact + cache invalidation + retry
const artifactExists = verifyExpectedArtifact(
unitType,
unitId,
s.basePath,
);
if (artifactExists) {
debugLog("autoLoop", {
phase: "stuck-recovery",
level: 1,
action: "artifact-found",
});
ctx.ui.notify(
`Stuck recovery: artifact for ${unitType} ${unitId} found on disk. Invalidating caches.`,
"info",
);
deps.invalidateAllCaches();
return { action: "continue" };
}
ctx.ui.notify(
`Stuck on ${unitType} ${unitId} (${stuckSignal.reason}). Invalidating caches and retrying.`,
"warning",
);
deps.invalidateAllCaches();
return { action: "continue" };
} else if (attempt <= 5) {
// Attempts 25: rethink + diagnostic + retry
const stuckDiag = diagnoseExpectedArtifact(
unitType,
unitId,
s.basePath,
);
const stuckRemediation = buildLoopRemediationSteps(
unitType,
unitId,
s.basePath,
);
const diagnostic = deps.getDeepDiagnostic(s.basePath);
const cappedDiag =
(diagnostic?.length ?? 0) > MAX_RECOVERY_CHARS
? diagnostic.slice(0, MAX_RECOVERY_CHARS) +
"\n\n[...diagnostic truncated]"
: (diagnostic ?? null);
s.pendingRethinkAttempt = JSON.stringify({
attempt,
reason: stuckSignal.reason,
diagnostic: cappedDiag,
stuckDiag,
remediation: stuckRemediation,
unitType,
unitId,
});
const rt =
attempt === 5
? "**FINAL STUCK ATTEMPT — 5 of 5.** "
: `**STUCK RECOVERY ATTEMPT ${attempt - 1} of 4.** `;
ctx.ui.notify(
`${rt}Stuck on ${unitType} ${unitId} (${stuckSignal.reason}). Injecting diagnostic and retrying.`,
"warning",
);
return { action: "continue" };
} else {
// Attempt 6+: genuinely exhausted — hard stop
debugLog("autoLoop", {
phase: "stuck-detected",
unitType,
unitId,
reason: stuckSignal.reason,
});
const stuckDiag = diagnoseExpectedArtifact(
unitType,
unitId,
s.basePath,
);
const stuckRemediation = buildLoopRemediationSteps(
unitType,
unitId,
s.basePath,
);
const stuckParts = [
`Stuck on ${unitType} ${unitId}${stuckSignal.reason}.`,
];
if (stuckDiag) stuckParts.push(`Expected: ${stuckDiag}`);
if (stuckRemediation)
stuckParts.push(`To recover:\n${stuckRemediation}`);
ctx.ui.notify(stuckParts.join(" "), "error");
await deps.stopAuto(ctx, pi, `Stuck: ${stuckSignal.reason}`);
return { action: "break", reason: "stuck-detected" };
}
} else {
// Progress detected — reset recovery counter
if (loopState.stuckRecoveryAttempts > 0) {
debugLog("autoLoop", {
phase: "stuck-counter-reset",
from:
loopState.recentUnits[loopState.recentUnits.length - 2]?.key ?? "",
to: derivedKey,
});
loopState.stuckRecoveryAttempts = 0;
}
}
}
// Pre-dispatch hooks
const preDispatchResult = deps.runPreDispatchHooks(
unitType,
unitId,
prompt,
s.basePath,
);
if (preDispatchResult.firedHooks.length > 0) {
ctx.ui.notify(
`Pre-dispatch hook${preDispatchResult.firedHooks.length > 1 ? "s" : ""}: ${preDispatchResult.firedHooks.join(", ")}`,
"info",
);
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "pre-dispatch-hook",
data: {
firedHooks: preDispatchResult.firedHooks,
action: preDispatchResult.action,
},
});
}
if (preDispatchResult.action === "skip") {
ctx.ui.notify(
`Skipping ${unitType} ${unitId} (pre-dispatch hook).`,
"info",
);
await new Promise((r) => setImmediate(r));
return { action: "continue" };
}
if (preDispatchResult.action === "replace") {
prompt = preDispatchResult.prompt ?? prompt;
if (preDispatchResult.unitType) unitType = preDispatchResult.unitType;
} else if (preDispatchResult.prompt) {
prompt = preDispatchResult.prompt;
}
const guardBasePath = _resolveDispatchGuardBasePath(s);
const priorSliceBlocker = deps.getPriorSliceCompletionBlocker(
guardBasePath,
deps.getMainBranch(guardBasePath),
unitType,
unitId,
);
if (priorSliceBlocker) {
await deps.stopAuto(ctx, pi, priorSliceBlocker);
debugLog("autoLoop", { phase: "exit", reason: "prior-slice-blocker" });
return { action: "break", reason: "prior-slice-blocker" };
}
return {
action: "next",
data: {
unitType,
unitId,
prompt,
finalPrompt: prompt,
pauseAfterUatDispatch,
state,
mid,
midTitle,
isRetry: false,
previousTier: undefined,
hookModelOverride: preDispatchResult.model,
},
};
}
// ─── runGuards ────────────────────────────────────────────────────────────────
/**
* Phase 2: Guards — stop directives, budget ceiling, context window, secrets re-check.
* Returns break to exit the loop, or next to proceed to dispatch.
*/
export async function runGuards(ic, mid, unitType, unitId, sliceId) {
const { ctx, pi, s, deps, prefs } = ic;
// ── Stop/Backtrack directive guard (#3487) ──
// Check for unexecuted stop or backtrack captures BEFORE dispatching any unit.
// This ensures user "halt" directives are honored immediately.
// IMPORTANT: Fail-closed — any exception during stop handling still breaks the loop
// to ensure user halt intent is never silently dropped.
try {
const { loadStopCaptures, markCaptureExecuted } = await import(
"../captures.js"
);
const stopCaptures = loadStopCaptures(s.basePath);
if (stopCaptures.length > 0) {
const first = stopCaptures[0];
const isBacktrack = first.classification === "backtrack";
const label = isBacktrack
? `Backtrack directive: ${first.text}`
: `Stop directive: ${first.text}`;
ctx.ui.notify(label, "warning");
deps.sendDesktopNotification(
"SF",
label,
"warning",
"stop-directive",
basename(s.originalBasePath || s.basePath),
);
// Pause first — ensures autonomous mode stops even if later steps fail
await deps.pauseAuto(ctx, pi);
// For backtrack captures, write the backtrack trigger after pausing
if (isBacktrack) {
try {
const { executeBacktrack } = await import("../triage-resolution.js");
executeBacktrack(s.basePath, mid, first);
} catch (e) {
debugLog("guards", {
phase: "backtrack-execution-error",
error: String(e),
});
}
}
// Mark captures as executed only after successful pause/transition
for (const cap of stopCaptures) {
markCaptureExecuted(s.basePath, cap.id);
}
debugLog("autoLoop", {
phase: "exit",
reason: isBacktrack ? "user-backtrack" : "user-stop",
});
return {
action: "break",
reason: isBacktrack ? "user-backtrack" : "user-stop",
};
}
} catch (e) {
// Fail-closed: if anything in the stop guard throws, break the loop
// rather than silently continuing and dropping user halt intent
debugLog("guards", { phase: "stop-guard-error", error: String(e) });
return { action: "break", reason: "stop-guard-error" };
}
// Production mutation guard — headless autonomous must not enqueue live failover
// commands without a human-provided safe target and cleanup plan.
try {
if (isDbAvailable()) {
const state = await deps.deriveState(s.basePath);
const activeTask = state.activeTask;
const activeSlice = state.activeSlice;
const activeMilestone = state.activeMilestone;
if (activeMilestone?.id && activeSlice?.id && activeTask?.id) {
const task = getTask(activeMilestone.id, activeSlice.id, activeTask.id);
if (task) {
const taskText = [
task.title,
task.description,
task.verify,
...task.inputs,
...task.expected_output,
].join("\n");
if (requiresHumanProductionMutationApproval(taskText)) {
const approvalUnit = {
milestoneId: activeMilestone.id,
sliceId: activeSlice.id,
taskId: activeTask.id,
taskTitle: task.title,
taskText,
};
const approvalBasePath = s.originalBasePath || s.basePath;
const approval = readProductionMutationApprovalStatus(
approvalBasePath,
approvalUnit,
);
if (approval.approved) {
ctx.ui.notify(
`Production mutation approval accepted for ${approvalUnit.milestoneId}/${approvalUnit.sliceId}/${approvalUnit.taskId}: ${approval.path}`,
"warning",
);
} else {
const llmApproval = approveProductionMutationWithLlmPolicy(
approvalBasePath,
approvalUnit,
);
if (llmApproval.approved) {
ctx.ui.notify(
`Production mutation LLM approval accepted for pending-command-only smoke test ${approvalUnit.milestoneId}/${approvalUnit.sliceId}/${approvalUnit.taskId}: ${llmApproval.path}`,
"warning",
);
} else {
const template = ensureProductionMutationApprovalTemplate(
approvalBasePath,
approvalUnit,
);
const blockerReasons = [
...approval.reasons,
...llmApproval.reasons.map((reason) => `LLM: ${reason}`),
];
const reasons = blockerReasons.length
? ` Missing/invalid fields: ${blockerReasons.join("; ")}.`
: "";
const msg =
`Production mutation guard: ${activeMilestone.id}/${activeSlice.id}/${activeTask.id} asks to POST unified failover against production. ` +
`${template.created ? "Created" : "Reusing"} approval gate at ${template.path}. ` +
`Fill it with an explicit safe server/VM target, cleanup/rollback path, and human or LLM approval, then rerun sf headless autonomous.${reasons}`;
ctx.ui.notify(msg, "error");
deps.sendDesktopNotification(
"SF",
"Production mutation guard paused autonomous mode",
"warning",
"safety",
basename(s.originalBasePath || s.basePath),
);
await deps.pauseAuto(ctx, pi);
return {
action: "break",
reason: "production-mutation-guard",
};
}
}
}
}
}
}
} catch (e) {
debugLog("guards", {
phase: "production-mutation-guard-error",
error: String(e),
});
}
// Budget ceiling guard
const budgetCeiling = prefs?.budget_ceiling;
if (budgetCeiling !== undefined && budgetCeiling > 0) {
const currentLedger = deps.getLedger();
// In parallel worker mode, only count cost from the current autonomous mode session
// to avoid hitting the ceiling due to historical project-wide spend (#2184).
let costUnits = currentLedger?.units;
if (
process.env.SF_PARALLEL_WORKER &&
s.autoStartTime &&
Array.isArray(costUnits)
) {
const sessionStartISO = new Date(s.autoStartTime).toISOString();
costUnits = costUnits.filter(
(u) => u.startedAt != null && u.startedAt >= sessionStartISO,
);
}
const totalCost = costUnits ? deps.getProjectTotals(costUnits).cost : 0;
const budgetPct = totalCost / budgetCeiling;
const budgetAlertLevel = deps.getBudgetAlertLevel(budgetPct);
const newBudgetAlertLevel = deps.getNewBudgetAlertLevel(
s.lastBudgetAlertLevel,
budgetPct,
);
const enforcement = prefs?.budget_enforcement ?? "pause";
const budgetEnforcementAction = deps.getBudgetEnforcementAction(
enforcement,
budgetPct,
);
// Data-driven threshold check — loop descending, fire first match
const threshold = BUDGET_THRESHOLDS.find(
(t) => newBudgetAlertLevel >= t.pct,
);
if (threshold) {
s.lastBudgetAlertLevel = newBudgetAlertLevel;
if (threshold.pct === 100 && budgetEnforcementAction !== "none") {
// 100% — special enforcement logic (halt/pause/warn)
const msg = `Budget ceiling ${deps.formatCost(budgetCeiling)} reached (spent ${deps.formatCost(totalCost)}).`;
if (budgetEnforcementAction === "halt") {
deps.sendDesktopNotification(
"SF",
msg,
"error",
"budget",
basename(s.originalBasePath || s.basePath),
);
await deps.stopAuto(ctx, pi, "Budget ceiling reached");
debugLog("autoLoop", { phase: "exit", reason: "budget-halt" });
return { action: "break", reason: "budget-halt" };
}
if (budgetEnforcementAction === "pause") {
ctx.ui.notify(
`${msg} Pausing autonomous mode — /autonomous to override and continue.`,
"warning",
);
deps.sendDesktopNotification(
"SF",
msg,
"warning",
"budget",
basename(s.originalBasePath || s.basePath),
);
deps.logCmuxEvent(prefs, msg, "warning");
await deps.pauseAuto(ctx, pi);
debugLog("autoLoop", { phase: "exit", reason: "budget-pause" });
return { action: "break", reason: "budget-pause" };
}
ctx.ui.notify(`${msg} Continuing (enforcement: warn).`, "warning");
deps.sendDesktopNotification(
"SF",
msg,
"warning",
"budget",
basename(s.originalBasePath || s.basePath),
);
deps.logCmuxEvent(prefs, msg, "warning");
} else if (threshold.pct < 100) {
// Sub-100% — simple notification
const msg = `${threshold.label}: ${deps.formatCost(totalCost)} / ${deps.formatCost(budgetCeiling)}`;
ctx.ui.notify(msg, threshold.notifyLevel);
deps.sendDesktopNotification(
"SF",
msg,
threshold.notifyLevel,
"budget",
basename(s.originalBasePath || s.basePath),
);
deps.logCmuxEvent(prefs, msg, threshold.cmuxLevel);
}
} else if (budgetAlertLevel === 0) {
s.lastBudgetAlertLevel = 0;
}
} else {
s.lastBudgetAlertLevel = 0;
}
// ── UOK Plan-gate ──────────────────────────────────────────────────────────
// Structural validation before the first execute-task unit of a slice:
// confirms the plan files exist and the slice has ≥1 task.
// FailureClass "input" → 0 retries (broken plan needs human fix, not
// an LLM retry). Only fires when uok.gates.enabled is true.
const uokFlagsGuards = resolveUokFlags(prefs);
if (uokFlagsGuards.gates && unitType === "execute-task" && mid && sliceId) {
const taskCounts = getSliceTaskCounts(mid, sliceId);
const isFirstTaskForSlice = taskCounts.done === 0;
if (isFirstTaskForSlice) {
let planGateOutcome = "pass";
let planGateRationale = "";
const milestoneSlices = getMilestoneSlices(mid);
if (!milestoneSlices || milestoneSlices.length === 0) {
planGateOutcome = "fail";
planGateRationale = `Milestone ${mid} has no slices in DB (not yet planned)`;
} else if (taskCounts.total < 1) {
planGateOutcome = "fail";
planGateRationale = `Slice ${sliceId} has no tasks defined`;
}
const planGateRunner = new UokGateRunner();
planGateRunner.register({
id: "plan-gate",
type: "policy",
execute: async () => ({
outcome: planGateOutcome,
failureClass: planGateOutcome === "pass" ? "none" : "input",
rationale: planGateRationale || "Plan files verified",
}),
});
const planGateResult = await planGateRunner.run("plan-gate", {
basePath: s.basePath,
traceId: `guard:${ic.flowId}`,
turnId: `iter-${ic.iteration}`,
milestoneId: mid,
sliceId,
unitType,
unitId,
});
if (planGateResult.outcome !== "pass") {
ctx.ui.notify(
`Plan gate failed: ${planGateResult.rationale ?? "invalid plan"}`,
"warning",
);
await deps.pauseAuto(ctx, pi);
return { action: "break", reason: "plan-gate-failed" };
}
}
}
// Context window guard
const contextThreshold = prefs?.context_pause_threshold ?? 0;
if (contextThreshold > 0 && s.cmdCtx) {
const contextUsage = s.cmdCtx.getContextUsage();
if (
contextUsage &&
contextUsage.percent !== null &&
contextUsage.percent >= contextThreshold
) {
const msg = `Context window at ${contextUsage.percent}% (threshold: ${contextThreshold}%). Pausing to prevent truncated output.`;
ctx.ui.notify(
`${msg} Run /autonomous to continue (will start fresh session).`,
"warning",
);
deps.sendDesktopNotification(
"SF",
`Context ${contextUsage.percent}% — paused`,
"warning",
"attention",
basename(s.originalBasePath || s.basePath),
);
await deps.pauseAuto(ctx, pi);
debugLog("autoLoop", { phase: "exit", reason: "context-window" });
return { action: "break", reason: "context-window" };
}
}
// Secrets re-check gate
try {
const manifestStatus = await deps.getManifestStatus(
s.basePath,
mid,
s.originalBasePath,
);
if (manifestStatus && manifestStatus.pending.length > 0) {
const result = await deps.collectSecretsFromManifest(
s.basePath,
mid,
ctx,
);
if (
result &&
result.applied &&
result.skipped &&
result.existingSkipped
) {
ctx.ui.notify(
`Secrets collected: ${result.applied.length} applied, ${result.skipped.length} skipped, ${result.existingSkipped.length} already set.`,
"info",
);
} else {
ctx.ui.notify("Secrets collection skipped.", "info");
}
}
} catch (err) {
ctx.ui.notify(
`Secrets collection error: ${err instanceof Error ? err.message : String(err)}. Continuing with next task.`,
"warning",
);
}
return { action: "next", data: undefined };
}
// ─── runUnitPhase ─────────────────────────────────────────────────────────────
/**
* Phase 4: Unit execution — dispatch prompt, await agent_end, closeout, artifact verify.
* Returns break or next with unitStartedAt for downstream phases.
*/
export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
const { ctx, pi, s, deps, prefs } = ic;
const { unitType, unitId, prompt, state, mid } = iterData;
debugLog("autoLoop", {
phase: "unit-execution",
iteration: ic.iteration,
unitType,
unitId,
});
// ── Worktree health check (#1833, #1843) ────────────────────────────
// ...
if (
s.basePath &&
!s.basePath.startsWith("/mock/") &&
unitType === "execute-task"
) {
const gitMarker = join(s.basePath, ".git");
const hasGit = deps.existsSync(gitMarker);
if (!hasGit) {
const msg = `Worktree health check failed: ${s.basePath} has no .git — refusing to dispatch ${unitType} ${unitId}`;
debugLog("runUnitPhase", {
phase: "worktree-health-fail",
basePath: s.basePath,
hasGit,
});
ctx.ui.notify(msg, "error");
await deps.stopAuto(ctx, pi, msg);
return { action: "break", reason: "worktree-invalid" };
}
const hasProjectFile = PROJECT_FILES.some((f) =>
deps.existsSync(join(s.basePath, f)),
);
const hasSrcDir = deps.existsSync(join(s.basePath, "src"));
// Xcode bundles have project-specific names (*.xcodeproj, *.xcworkspace)
// that cannot be matched by exact filename — scan the directory by suffix.
let hasXcodeBundle = false;
try {
const entries = deps.existsSync(s.basePath)
? readdirSync(s.basePath)
: [];
hasXcodeBundle = entries.some(
(e) => e.endsWith(".xcodeproj") || e.endsWith(".xcworkspace"),
);
} catch (err) {
debugLog("runUnitPhase", {
phase: "xcode-bundle-scan-failed",
basePath: s.basePath,
error: String(err),
});
}
// Monorepo support (#2347): if no project files in the worktree directory,
// walk parent directories up to the filesystem root. In monorepos,
// package.json / Cargo.toml etc. live in a parent directory.
let hasProjectFileInParent = false;
if (!hasProjectFile && !hasSrcDir && !hasXcodeBundle) {
let checkDir = dirname(s.basePath);
const { root } = parsePath(checkDir);
while (checkDir !== root) {
// Stop at git repository boundary — ancestors above the repo root
// (e.g. ~ or /usr/local) may contain unrelated project files.
if (deps.existsSync(join(checkDir, ".git"))) break;
if (PROJECT_FILES.some((f) => deps.existsSync(join(checkDir, f)))) {
hasProjectFileInParent = true;
break;
}
checkDir = dirname(checkDir);
}
}
if (
!hasProjectFile &&
!hasSrcDir &&
!hasXcodeBundle &&
!hasProjectFileInParent
) {
// Greenfield projects won't have project files yet — the first task creates them.
// Log a warning but allow execution to proceed. The .git check above is sufficient
// to ensure we're in a valid working directory.
debugLog("runUnitPhase", {
phase: "worktree-health-warn-greenfield",
basePath: s.basePath,
hasProjectFile,
hasSrcDir,
hasXcodeBundle,
});
ctx.ui.notify(
`Warning: ${s.basePath} has no recognized project files — proceeding as greenfield project`,
"warning",
);
}
}
// Detect retry and capture previous tier for escalation
const isPausedUnitResume =
s.pausedUnitType === unitType && s.pausedUnitId === unitId;
const isRetry = !!(
(s.currentUnit &&
s.currentUnit.type === unitType &&
s.currentUnit.id === unitId) ||
isPausedUnitResume
);
const previousTier =
s.currentUnitRouting?.tier ??
(isPausedUnitResume && unitType === "execute-task"
? "standard"
: undefined);
if (isPausedUnitResume) {
s.pausedUnitType = null;
s.pausedUnitId = null;
}
// Scope workflow-logger buffer to this unit so post-finalize drains are
// per-unit. Without this, the module-level _buffer accumulates across every
// unit in the same Node process (see workflow-logger.ts module header).
_resetLogs();
s.currentUnit = { type: unitType, id: unitId, startedAt: Date.now() };
s.researchTerminalTransition = false;
s.lastGitActionFailure = null;
s.lastGitActionStatus = null;
setCurrentPhase(unitType);
s.lastToolInvocationError = null; // #2883: clear stale error from previous unit
resetToolCallCounts();
resetCompletionNudgeState(
unitType,
unitId,
prefs?.auto_supervisor?.completion_nudge_after,
);
resetRunawayGuardState(unitType, unitId, {
sessionTokens: collectSessionTokenUsage(ctx),
changedFiles: countChangedFiles(s.basePath),
worktreeFingerprint: collectWorktreeFingerprint(s.basePath),
});
const unitStartSeq = ic.nextSeq();
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: unitStartSeq,
eventType: "unit-start",
data: { unitType, unitId },
});
{
const progressEvent = buildUokProgressEvent({
eventType: "unit_selected",
unitType,
unitId,
role: "worker",
sessionId: ctx.sessionManager.getSessionId(),
traceId: ic.flowId,
data: { legacyEventType: "unit-start" },
});
deps.emitJournalEvent({
ts: progressEvent.ts,
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: progressEvent.eventType,
data: progressEvent,
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
});
}
ctx.ui.notify(`[unit] ${unitType} ${unitId} starting`, "info");
deps.captureAvailableSkills();
writeUnitRuntimeRecord(
s.basePath,
unitType,
unitId,
s.currentUnit.startedAt,
{
phase: "dispatched",
wrapupWarningSent: false,
timeoutAt: null,
lastProgressAt: s.currentUnit.startedAt,
progressCount: 0,
lastProgressKind: "dispatch",
recoveryAttempts: 0, // Reset so re-dispatched units get full recovery budget (#2322)
lineageEvent: {
status: "started",
workerSessionId: ctx.sessionManager.getSessionId(),
note: "unit dispatched",
},
},
);
// Status bar (widget + preconditions deferred until after model selection — see #2899)
ctx.ui.setStatus("sf-auto", "auto");
if (mid)
deps.updateSliceProgressCache(s.basePath, mid, state.activeSlice?.id);
// ── Safety harness: reset evidence + create checkpoint ──
const safetyConfig = resolveSafetyHarnessConfig(prefs?.safety_harness);
if (safetyConfig.enabled && safetyConfig.evidence_collection) {
resetEvidence();
const { milestone: eMid, slice: eSid, task: eTid } = parseUnitId(unitId);
loadEvidenceFromDisk(s.basePath, eMid, eSid ?? "", eTid ?? "");
}
if (
safetyConfig.enabled &&
safetyConfig.file_change_validation &&
unitType === "execute-task"
) {
s.preUnitDirtyFiles = getDirtyFiles(s.basePath);
} else {
s.preUnitDirtyFiles = [];
}
// Only checkpoint code-executing units (not lifecycle/planning units)
if (
safetyConfig.enabled &&
safetyConfig.checkpoints &&
unitType === "execute-task"
) {
s.checkpointSha = createCheckpoint(s.basePath, unitId);
if (s.checkpointSha) {
debugLog("runUnitPhase", {
phase: "checkpoint-created",
unitId,
sha: s.checkpointSha.slice(0, 8),
});
}
}
// Prompt injection
let finalPrompt = prompt;
if (s.pendingVerificationRetry) {
const retryCtx = s.pendingVerificationRetry;
s.pendingVerificationRetry = null;
const capped =
retryCtx.failureContext.length > MAX_RECOVERY_CHARS
? retryCtx.failureContext.slice(0, MAX_RECOVERY_CHARS) +
"\n\n[...failure context truncated]"
: retryCtx.failureContext;
finalPrompt = `**VERIFICATION FAILED — AUTO-FIX ATTEMPT ${retryCtx.attempt}**\n\nThe verification gate ran after your previous attempt and found failures. Fix these issues before completing the task.\n\n${capped}\n\n---\n\n${finalPrompt}`;
}
if (s.pendingCrashRecovery) {
const capped =
s.pendingCrashRecovery.length > MAX_RECOVERY_CHARS
? s.pendingCrashRecovery.slice(0, MAX_RECOVERY_CHARS) +
"\n\n[...recovery briefing truncated to prevent memory exhaustion]"
: s.pendingCrashRecovery;
finalPrompt = `${capped}\n\n---\n\n${finalPrompt}`;
s.pendingCrashRecovery = null;
} else if (s.pendingRethinkAttempt) {
// Stuck recovery: inject diagnostic + rethink prompt, then clear.
let rethinkCtx = null;
try {
rethinkCtx = JSON.parse(s.pendingRethinkAttempt);
} catch {
// Malformed JSON — skip injection
}
s.pendingRethinkAttempt = null;
if (rethinkCtx) {
const isFinal = rethinkCtx.attempt === 5;
const lines = [
isFinal
? `**⚠ FINAL STUCK ATTEMPT (5 of 5) — You have run out of recovery attempts. Make this count.**`
: `**STUCK RECOVERY — Rethink attempt ${rethinkCtx.attempt - 1} of 4.**`,
"",
`You have been repeatedly stuck on **${rethinkCtx.unitType} ${rethinkCtx.unitId}** for reason: "${rethinkCtx.reason}".`,
"",
"Before continuing, you must reflect on the following:",
"",
"1. **What specific error or failure pattern are you seeing?**",
"2. **What assumption are you making that might be wrong?**",
"3. **What is ONE concrete, different approach you will try this time?**",
"",
"Do NOT repeat the same approach. Identify the root cause and try a genuinely different strategy.",
];
if (rethinkCtx.stuckDiag) {
lines.push("", `**What was expected:** ${rethinkCtx.stuckDiag}`);
}
if (rethinkCtx.remediation) {
lines.push("", `**Suggested remediation:**\n${rethinkCtx.remediation}`);
}
if (rethinkCtx.diagnostic) {
lines.push(
"",
`**Full diagnostic from previous attempt:**\n${rethinkCtx.diagnostic}`,
);
}
lines.push("", "---", "", finalPrompt);
finalPrompt = lines.join("\n");
}
} else if ((s.unitDispatchCount.get(`${unitType}/${unitId}`) ?? 0) > 1) {
const diagnostic = deps.getDeepDiagnostic(s.basePath);
if (diagnostic) {
const cappedDiag =
diagnostic.length > MAX_RECOVERY_CHARS
? diagnostic.slice(0, MAX_RECOVERY_CHARS) +
"\n\n[...diagnostic truncated to prevent memory exhaustion]"
: diagnostic;
finalPrompt = `**RETRY — your previous attempt did not produce the required artifact.**\n\nDiagnostic from previous attempt:\n${cappedDiag}\n\nFix whatever went wrong and make sure you write the required file this time.\n\n---\n\n${finalPrompt}`;
}
}
// Prompt char measurement
try {
const solverState = beginAutonomousSolverIteration(
s.basePath,
unitType,
unitId,
{
maxIterations: getConfiguredAutonomousSolverMaxIterations(prefs),
},
);
const steeringBlock = buildAutonomousSolverSteeringPromptBlock(
consumePendingAutonomousSolverSteering(s.basePath),
);
if (steeringBlock) {
finalPrompt = `${finalPrompt}\n\n---\n\n${steeringBlock}`;
}
finalPrompt = `${finalPrompt}\n\n---\n\n${buildAutonomousSolverPromptBlock(solverState)}`;
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "solver-iteration-start",
data: {
unitType,
unitId,
iteration: solverState.iteration,
maxIterations: solverState.maxIterations,
steeringInjected: Boolean(steeringBlock),
},
});
} catch (solverErr) {
logWarning("engine", "Autonomous solver prompt injection failed", {
error: solverErr instanceof Error ? solverErr.message : String(solverErr),
});
}
s.lastPromptCharCount = finalPrompt.length;
s.lastBaselineCharCount = undefined;
if (deps.isDbAvailable()) {
try {
const { inlineSfRootFile } = await importExtensionModule(
import.meta.url,
"../auto-prompts.js",
);
const [decisionsContent, requirementsContent, projectContent] =
await Promise.all([
inlineSfRootFile(s.basePath, "decisions.md", "Decisions"),
inlineSfRootFile(s.basePath, "requirements.md", "Requirements"),
inlineSfRootFile(s.basePath, "project.md", "Project"),
]);
s.lastBaselineCharCount =
(decisionsContent?.length ?? 0) +
(requirementsContent?.length ?? 0) +
(projectContent?.length ?? 0);
} catch (e) {
logWarning("engine", "Baseline char count measurement failed", {
error: String(e),
});
}
}
// Cache-optimize prompt section ordering
try {
finalPrompt = deps.reorderForCaching(finalPrompt);
} catch (reorderErr) {
const msg =
reorderErr instanceof Error ? reorderErr.message : String(reorderErr);
logWarning("engine", "Prompt reorder failed", { error: msg });
}
// Select and apply model (with tier escalation on retry — normal units only)
const modelResult = await deps.selectAndApplyModel(
ctx,
pi,
unitType,
unitId,
s.basePath,
prefs,
s.verbose,
s.autoModeStartModel,
sidecarItem ? undefined : { isRetry, previousTier },
undefined,
s.manualSessionModelOverride,
s.autoModeStartThinkingLevel,
);
s.currentUnitRouting = modelResult.routing;
s.currentUnitModel = modelResult.appliedModel;
// updateProgressWidget( (decoy for legacy regex tests)
// Apply sidecar/pre-dispatch hook model override (takes priority over standard model selection)
const hookModelOverride = sidecarItem?.model ?? iterData.hookModelOverride;
if (hookModelOverride) {
const availableModels = ctx.modelRegistry.getAvailable();
const match = deps.resolveModelId(
hookModelOverride,
availableModels,
ctx.model?.provider,
);
if (match) {
const ok = await pi.setModel(match, {
persist: resolvePersistModelChanges(),
});
if (ok) {
if (s.autoModeStartThinkingLevel) {
pi.setThinkingLevel(s.autoModeStartThinkingLevel);
}
s.currentUnitModel = match;
ctx.ui.notify(
`Hook model override: ${match.provider}/${match.id}`,
"info",
);
} else {
ctx.ui.notify(
`Hook model "${hookModelOverride}" found but setModel failed. Using default.`,
"warning",
);
}
} else {
ctx.ui.notify(
`Hook model "${hookModelOverride}" not found in available models. Falling back to current session model. ` +
`Ensure the model is defined in models.json and has auth configured.`,
"warning",
);
}
}
// Store the final dispatched model ID so the dashboard can read it (#2899).
// This accounts for hook model overrides applied after selectAndApplyModel.
s.currentDispatchedModelId = s.currentUnitModel
? `${s.currentUnitModel.provider ?? ""}/${s.currentUnitModel.id ?? ""}`
: null;
emitModelAutoResolvedEvent(s.basePath, {
traceId: `model:${ctx.sessionManager.getSessionId()}:${unitType}:${unitId}`,
unitType,
unitId,
resolvedModel: s.currentUnitModel ?? ctx.model ?? null,
authMode:
(s.currentUnitModel?.provider ?? ctx.model?.provider)
? ctx.modelRegistry.getProviderAuthMode(
s.currentUnitModel?.provider ?? ctx.model.provider,
)
: undefined,
routingReason: hookModelOverride
? `hook override: ${hookModelOverride}`
: "auto selector",
routing: s.currentUnitRouting,
hookOverrideApplied: Boolean(hookModelOverride),
tokenUsage: collectSessionTokenUsage?.(ctx),
});
{
const progressEvent = buildUokProgressEvent({
eventType: "model_auto_resolved",
unitType,
unitId,
role: "worker",
sessionId: ctx.sessionManager.getSessionId(),
traceId: ic.flowId,
data: {
resolvedProvider: s.currentUnitModel?.provider ?? ctx.model?.provider,
resolvedModel: s.currentUnitModel?.id ?? ctx.model?.id,
routing: s.currentUnitRouting,
hookOverrideApplied: Boolean(hookModelOverride),
},
});
deps.emitJournalEvent({
ts: progressEvent.ts,
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: progressEvent.eventType,
data: progressEvent,
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
});
}
const compatibilityError = getWorkflowTransportSupportError(
s.currentUnitModel?.provider ?? ctx.model?.provider,
getRequiredWorkflowToolsForAutoUnit(unitType),
{
projectRoot: s.basePath,
surface: "autonomous mode",
unitType,
authMode: s.currentUnitModel?.provider
? ctx.modelRegistry.getProviderAuthMode(s.currentUnitModel.provider)
: ctx.model?.provider
? ctx.modelRegistry.getProviderAuthMode(ctx.model.provider)
: undefined,
baseUrl: s.currentUnitModel?.baseUrl ?? ctx.model?.baseUrl,
},
);
if (compatibilityError) {
ctx.ui.notify(compatibilityError, "error");
await deps.stopAuto(ctx, pi, compatibilityError);
return { action: "break", reason: "workflow-capability" };
}
// Progress widget + preconditions — deferred to after model selection so the
// widget's first render tick shows the correct model (#2899).
deps.updateProgressWidget(ctx, unitType, unitId, state); // updateProgressWidget(
deps.ensurePreconditions(unitType, unitId, s.basePath, state);
// Start unit supervision
deps.clearUnitTimeout();
deps.startUnitSupervision({
s,
ctx,
pi,
unitType,
unitId,
prefs,
buildSnapshotOpts: () => deps.buildSnapshotOpts(unitType, unitId),
buildRecoveryContext: () => ({
basePath: s.basePath,
verbose: s.verbose,
currentUnitStartedAt: s.currentUnit?.startedAt ?? Date.now(),
unitRecoveryCount: s.unitRecoveryCount,
}),
pauseAuto: deps.pauseAuto,
});
// Write preliminary lock (no session path yet — runUnit creates a new session).
// Crash recovery can still identify the in-flight unit from this lock.
deps.writeLock(deps.lockBase(), unitType, unitId);
// Pre-flight provider readiness check: if the resolved model's provider is
// not request-ready (expired token, logged out), attempt to reselect a ready
// provider before dispatching. This prevents the unit from burning a runUnit
// call only to be immediately cancelled with no-transcript.
{
const selectedProvider =
s.currentUnitModel?.provider ?? ctx.model?.provider;
if (
selectedProvider != null &&
typeof ctx.modelRegistry?.isProviderRequestReady === "function"
) {
let ready = false;
try {
ready = ctx.modelRegistry.isProviderRequestReady(selectedProvider);
} catch {
ready = false;
}
if (!ready) {
const allModels = ctx.modelRegistry.getAvailable?.() ?? [];
const fallback = allModels.find(
(m) =>
m.provider !== selectedProvider &&
ctx.modelRegistry.isProviderRequestReady(m.provider),
);
if (fallback) {
const ok = await pi.setModel(fallback, { persist: false });
if (ok) {
ctx.ui.notify(
`Autonomous mode: provider ${selectedProvider} not ready — switched to ${fallback.provider}/${fallback.id}`,
"warning",
);
s.currentUnitModel = fallback;
}
} else {
const msg = `Autonomous mode stopped: provider ${selectedProvider} is not request-ready and no fallback provider is available. Check your login/API key.`;
ctx.ui.notify(msg, "error");
await deps.stopAuto(ctx, pi, msg);
return { action: "break", reason: "provider-pause" };
}
}
}
}
debugLog("autoLoop", {
phase: "runUnit-start",
iteration: ic.iteration,
unitType,
unitId,
});
const unitResult = await runUnit(ctx, pi, s, unitType, unitId, finalPrompt);
s.lastUnitAgentEndMessages = unitResult.event?.messages ?? null;
let currentUnitResult = unitResult;
// Short-circuit: if runUnit was cancelled (provider not ready, session
// failed, timeout) there is no checkpoint to repair — skip the repair loop
// entirely and let the cancelled handler below surface the real cause.
let solverAssessment =
unitResult.status === "cancelled"
? { action: "none" }
: assessAutonomousSolverTurn(s.basePath, unitType, unitId);
while (solverAssessment.action === "missing-checkpoint-retry") {
const diagnosis = classifyAutonomousSolverMissingCheckpointFailure(
currentUnitResult.event?.messages ?? [],
);
recordAutonomousSolverMissingCheckpointRetry(
s.basePath,
unitType,
unitId,
diagnosis,
);
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "solver-missing-checkpoint-retry",
data: {
unitType,
unitId,
iteration: solverAssessment.state?.iteration,
repairAttempt: solverAssessment.repairAttempt,
maxRepairAttempts: solverAssessment.maxRepairAttempts,
classification: diagnosis.classification,
},
});
ctx.ui.notify(
`Autonomous solver checkpoint missing for ${unitType} ${unitId}; repair attempt ${solverAssessment.repairAttempt}/${solverAssessment.maxRepairAttempts} (${diagnosis.classification}).`,
"warning",
);
currentUnitResult = await runUnit(
ctx,
pi,
s,
unitType,
unitId,
buildAutonomousSolverMissingCheckpointRepairPrompt(
solverAssessment.state,
unitType,
unitId,
diagnosis,
solverAssessment.repairAttempt,
solverAssessment.maxRepairAttempts,
),
{ keepSession: true },
);
s.lastUnitAgentEndMessages = currentUnitResult.event?.messages ?? null;
if (currentUnitResult.status === "cancelled") {
solverAssessment = { action: "none" };
break;
}
solverAssessment = assessAutonomousSolverTurn(s.basePath, unitType, unitId);
}
const solverCheckpoint = solverAssessment.checkpoint;
if (solverCheckpoint) {
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "solver-checkpoint",
data: {
unitType,
unitId,
iteration: solverCheckpoint.iteration,
outcome: solverCheckpoint.outcome,
remainingCount: solverCheckpoint.remainingItems?.length ?? 0,
},
});
}
if (solverAssessment.action === "pause") {
const isMissingCheckpoint =
solverAssessment.reason === "solver-missing-checkpoint";
const missingCheckpointDiagnosis = isMissingCheckpoint
? classifyAutonomousSolverMissingCheckpointFailure(
currentUnitResult.event?.messages ?? [],
)
: null;
if (missingCheckpointDiagnosis) {
try {
const feedback = recordSelfFeedback(
{
kind: "solver-missing-checkpoint",
severity: "high",
summary: `Autonomous solver failed to checkpoint after ${solverAssessment.repairAttempts ?? "multiple"} repair attempt(s): ${missingCheckpointDiagnosis.classification}`,
evidence: [
`unit=${unitType} ${unitId}`,
`classification=${missingCheckpointDiagnosis.classification}`,
`summary=${missingCheckpointDiagnosis.summary}`,
`evidencePath=.sf/runtime/autonomous-solver/LOOP.md`,
"",
missingCheckpointDiagnosis.evidence ?? "",
].join("\n"),
suggestedFix:
"Improve solver repair policy, tool availability, or prompt wording so missing-checkpoint repairs end with a successful checkpoint tool call.",
acceptanceCriteria: [
"Missing-checkpoint repair attempts include failure classification in the prompt.",
"Repeated repair failures file self-feedback automatically.",
"Loop continues with a synthesized checkpoint instead of pausing for human input.",
],
occurredIn: { unitType, unitId },
source: "runtime",
},
s.basePath,
);
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "solver-missing-checkpoint-self-feedback",
data: {
unitType,
unitId,
classification: missingCheckpointDiagnosis.classification,
selfFeedbackId: feedback?.entry?.id,
blocking: feedback?.blocking,
},
});
} catch {
// self-feedback is observability; never block loop continuation
}
}
// Missing-checkpoint: the LLM failed to call the checkpoint tool despite repair
// attempts. Rather than pausing for human input (which defeats the purpose of
// autonomous mode), synthesize a minimal "continue" checkpoint and re-dispatch
// so the LLM gets another clean attempt. The max-iterations guard will catch
// genuine infinite loops. Only hard blockers and max-iterations pause the loop.
if (isMissingCheckpoint) {
try {
appendAutonomousSolverCheckpoint(s.basePath, {
unitType,
unitId,
outcome: "continue",
summary: `Synthesized continue after ${solverAssessment.repairAttempts ?? "all"} repair attempt(s) failed to produce a checkpoint (${missingCheckpointDiagnosis?.classification ?? "unknown"}). Re-dispatching.`,
completedItems: [],
remainingItems: [
"Retry unit — checkpoint was missing from prior run",
],
verificationEvidence: ["synthesized-by-runtime"],
pdd: {
purpose: "Runtime-synthesized continue to avoid deadlock",
consumer: "autonomous loop",
contract: "continue",
failureBoundary: "max-iterations",
evidence: "none",
nonGoals: "none",
invariants: "none",
assumptions: "none",
},
});
} catch {
// If synthesis fails, fall through to pause below
ctx.ui.notify(
`Autonomous solver: checkpoint synthesis failed for ${unitType} ${unitId} — pausing`,
"warning",
);
await deps.pauseAuto(ctx, pi);
return { action: "break", reason: solverAssessment.reason };
}
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "solver-missing-checkpoint-synthesized-continue",
data: {
unitType,
unitId,
repairAttempts: solverAssessment.repairAttempts,
classification: missingCheckpointDiagnosis?.classification,
},
});
ctx.ui.notify(
`Autonomous solver: all repair attempts exhausted for ${unitType} ${unitId} — synthesizing continue and re-dispatching (LLM will try again)`,
"info",
);
// Fall through: the synthesized checkpoint's action will be "continue" on
// the next assessment, so the loop re-dispatches the unit automatically.
return { action: "continue" };
}
const reason =
solverCheckpoint?.outcome === "blocked"
? (solverCheckpoint.blockerReason ?? solverCheckpoint.summary)
: solverAssessment.reason;
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType:
solverAssessment.reason === "solver-max-iterations"
? "solver-max-iterations-pause"
: "solver-pause",
data: {
unitType,
unitId,
reason: solverAssessment.reason,
iteration: solverAssessment.state?.iteration,
maxIterations: solverAssessment.state?.maxIterations,
remainingItems: solverCheckpoint?.remainingItems ?? [],
evidencePath: ".sf/runtime/autonomous-solver/LOOP.md",
},
});
ctx.ui.notify(
`Autonomous solver paused ${unitType} ${unitId}: ${reason || solverAssessment.reason}`,
"warning",
);
await deps.pauseAuto(ctx, pi);
return {
action: "break",
reason: solverAssessment.reason,
};
}
if (solverAssessment.action === "continue") {
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "solver-continue-redispatch",
data: {
unitType,
unitId,
iteration: solverAssessment.state?.iteration,
remainingItems: solverCheckpoint?.remainingItems ?? [],
evidencePath: ".sf/runtime/autonomous-solver/LOOP.md",
},
});
ctx.ui.notify(
`Autonomous solver continuing ${unitType} ${unitId}: ${solverCheckpoint?.remainingItems?.length ?? 0} item(s) remain.`,
"info",
);
return {
action: "continue",
data: {
unitStartedAt: s.currentUnit?.startedAt,
requestDispatchedAt: currentUnitResult.requestDispatchedAt,
},
};
}
debugLog("autoLoop", {
phase: "runUnit-end",
iteration: ic.iteration,
unitType,
unitId,
status: currentUnitResult.status,
});
// Now that runUnit has called newSession(), the session file path is correct.
const sessionFile = deps.getSessionFile(ctx);
const sessionId = sessionFile ? basename(sessionFile) : undefined;
deps.updateSessionLock(
deps.lockBase(),
unitType,
unitId,
sessionFile,
sessionId,
);
deps.writeLock(deps.lockBase(), unitType, unitId, sessionFile);
// Tag the most recent window entry with error info for stuck detection
const lastEntry = loopState.recentUnits[loopState.recentUnits.length - 1];
if (lastEntry) {
if (currentUnitResult.errorContext) {
lastEntry.error =
`${currentUnitResult.errorContext.category}:${currentUnitResult.errorContext.message}`.slice(
0,
200,
);
} else if (
currentUnitResult.status === "error" ||
currentUnitResult.status === "cancelled"
) {
lastEntry.error = `${currentUnitResult.status}:${unitType}/${unitId}`;
} else if (currentUnitResult.event?.messages?.length) {
const lastMsg =
currentUnitResult.event.messages[
currentUnitResult.event.messages.length - 1
];
const msgStr =
typeof lastMsg === "string" ? lastMsg : JSON.stringify(lastMsg);
if (/error|fail|exception/i.test(msgStr)) {
lastEntry.error = msgStr.slice(0, 200);
}
}
}
if (currentUnitResult.status === "cancelled") {
clearDeferredCommitAfterCancelledUnit(
s,
ctx,
unitType,
unitId,
currentUnitResult.errorContext?.message ?? "cancelled",
);
// Provider-error: try to reselect a ready provider and continue rather
// than stopping autonomous mode. Only stop if no ready provider exists.
if (currentUnitResult.errorContext?.category === "provider") {
await emitCancelledUnitEnd(
ic,
unitType,
unitId,
unitStartSeq,
currentUnitResult.errorContext,
);
const failedProvider =
s.currentUnitModel?.provider ?? ctx.model?.provider;
const allModels = ctx.modelRegistry?.getAvailable?.() ?? [];
const fallback = allModels.find(
(m) =>
m.provider !== failedProvider &&
ctx.modelRegistry?.isProviderRequestReady?.(m.provider),
);
if (fallback) {
const ok = await pi.setModel(fallback, { persist: false });
if (ok) {
s.currentUnitModel = fallback;
ctx.ui.notify(
`Autonomous mode: provider ${failedProvider} not ready — retrying with ${fallback.provider}/${fallback.id}`,
"warning",
);
return { action: "continue" };
}
}
const msg = `Autonomous mode stopped: ${currentUnitResult.errorContext.message ?? `provider ${failedProvider} not ready`}. Check your login/API key.`;
ctx.ui.notify(msg, "error");
debugLog("autoLoop", {
phase: "exit",
reason: "provider-pause",
isTransient: currentUnitResult.errorContext.isTransient,
});
return { action: "break", reason: "provider-pause" };
}
// Timeout category covers two distinct scenarios:
// 1. Session creation timeout (120s) — transient, scheduled resume with backoff
// 2. Unit hard timeout (30min+) — stuck agent, pause for manual review
// Structural errors (TypeError, is not a function) are NOT transient
// and must hard-stop to avoid infinite retry loops.
if (
currentUnitResult.errorContext?.isTransient &&
currentUnitResult.errorContext?.category === "timeout"
) {
// Session-timeout cancellations are resumable pauses: pauseAuto below preserves the auto session
// instead of routing the cancelled unit into the hard-stop path.
const isSessionCreationTimeout =
currentUnitResult.errorContext.message?.includes(
"Session creation timed out",
);
if (isSessionCreationTimeout) {
consecutiveSessionTimeouts += 1;
const baseRetryAfterMs = 30_000;
const retryAfterMs =
baseRetryAfterMs * 2 ** Math.max(0, consecutiveSessionTimeouts - 1);
const allowAutoResume =
consecutiveSessionTimeouts <= MAX_SESSION_TIMEOUT_AUTO_RESUMES;
if (!allowAutoResume) {
ctx.ui.notify(
`Session creation timed out ${consecutiveSessionTimeouts} consecutive times for ${unitType} ${unitId}. Pausing for manual review.`,
"warning",
);
}
debugLog("autoLoop", {
phase: "session-timeout-pause",
unitType,
unitId,
consecutiveSessionTimeouts,
retryAfterMs,
allowAutoResume,
});
const errorDetail = ` for ${unitType} ${unitId}`;
await pauseAutoForProviderError(
ctx.ui,
errorDetail,
() => deps.pauseAuto(ctx, pi),
{
isRateLimit: false,
isTransient: allowAutoResume,
retryAfterMs,
resume: allowAutoResume
? () => {
void resumeAutoAfterProviderDelay(pi, ctx).catch((err) => {
const message =
err instanceof Error ? err.message : String(err);
ctx.ui.notify(
`Session timeout recovery failed: ${message}`,
"error",
);
});
}
: undefined,
},
);
if (!allowAutoResume) {
resetConsecutiveSessionTimeouts();
}
await emitCancelledUnitEnd(
ic,
unitType,
unitId,
unitStartSeq,
currentUnitResult.errorContext,
);
return { action: "break", reason: "session-timeout" };
}
// Unit hard timeout (30min+): pause without scheduled resume — stuck agent
ctx.ui.notify(
`Unit timed out for ${unitType} ${unitId} (supervision may have failed). Pausing autonomous mode.`,
"warning",
);
debugLog("autoLoop", {
phase: "unit-hard-timeout-pause",
unitType,
unitId,
});
await deps.pauseAuto(ctx, pi);
await emitCancelledUnitEnd(
ic,
unitType,
unitId,
unitStartSeq,
currentUnitResult.errorContext,
);
return { action: "break", reason: "unit-hard-timeout" };
}
// All other cancelled states (structural errors, non-transient failures): hard stop
if (s.currentUnit) {
await deps.closeoutUnit(
ctx,
s.basePath,
unitType,
unitId,
s.currentUnit.startedAt,
deps.buildSnapshotOpts(unitType, unitId),
);
}
await emitCancelledUnitEnd(
ic,
unitType,
unitId,
unitStartSeq,
currentUnitResult.errorContext,
);
ctx.ui.notify(
`Session creation failed for ${unitType} ${unitId}: ${currentUnitResult.errorContext?.message ?? "unknown"}. Stopping autonomous mode.`,
"warning",
);
await deps.stopAuto(
ctx,
pi,
`Session creation failed: ${currentUnitResult.errorContext?.message ?? "unknown"}`,
);
debugLog("autoLoop", { phase: "exit", reason: "session-failed" });
return { action: "break", reason: "session-failed" };
}
// ── Immediate unit closeout (metrics, activity log, memory) ────────
// Run right after runUnit() returns so telemetry is never lost to a
// crash between iterations.
// Guard: stopAuto() may have nulled s.currentUnit via s.reset() while
// this coroutine was suspended at `await runUnit(...)` (#2939).
if (s.currentUnit) {
// Reset session timeout counter — any successful unit clears the slate
resetConsecutiveSessionTimeouts();
await deps.closeoutUnit(
ctx,
s.basePath,
unitType,
unitId,
s.currentUnit.startedAt,
deps.buildSnapshotOpts(unitType, unitId),
);
}
// ── Zero tool-call guard (#1833, #2653) ──────────────────────────
// Any unit that completes with 0 tool calls made no real progress —
// likely context exhaustion where all tool calls errored out. Treat
// as failed so the unit is retried in a fresh context instead of
// silently passing through to artifact verification (which loops
// forever when the unit never produced its artifact).
{
const currentLedger = deps.getLedger();
if (currentLedger?.units) {
const lastUnit = [...currentLedger.units]
.reverse()
.find(
(u) =>
u.type === unitType &&
u.id === unitId &&
u.startedAt === s.currentUnit?.startedAt,
);
if (lastUnit && lastUnit.toolCalls === 0) {
if (
USER_DRIVEN_DEEP_UNITS.has(unitType) &&
isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined)
) {
debugLog("runUnitPhase", {
phase: "zero-tool-calls-awaiting-user-input",
unitType,
unitId,
});
} else {
debugLog("runUnitPhase", {
phase: "zero-tool-calls",
unitType,
unitId,
warning:
"Unit completed with 0 tool calls — likely context exhaustion, marking as failed",
});
ctx.ui.notify(
`${unitType} ${unitId} completed with 0 tool calls — context exhaustion, will retry`,
"warning",
);
recordLearningOutcomeForUnit(
ic,
unitType,
unitId,
s.currentUnit?.startedAt,
{
succeeded: false,
verificationPassed: null,
},
);
// Fall through to next iteration where dispatch will re-derive
// and re-dispatch this unit.
return {
action: "next",
data: {
unitStartedAt: s.currentUnit?.startedAt,
requestDispatchedAt: currentUnitResult.requestDispatchedAt,
},
};
}
}
}
}
if (s.currentUnitRouting) {
deps.recordOutcome(unitType, s.currentUnitRouting.tier, true);
}
const skipArtifactVerification = shouldSkipArtifactVerification(unitType);
let artifactVerified;
if (
USER_DRIVEN_DEEP_UNITS.has(unitType) &&
isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined)
) {
// Skip artifact verification — unit is paused waiting for user input
artifactVerified = false;
} else {
artifactVerified =
skipArtifactVerification ||
verifyExpectedArtifact(unitType, unitId, s.basePath);
}
if (artifactVerified) {
s.unitDispatchCount.delete(`${unitType}/${unitId}`);
s.unitRecoveryCount.delete(`${unitType}/${unitId}`);
}
// Write phase handoff anchor after successful research/planning completion
const anchorPhases = new Set([
"research-milestone",
"research-slice",
"plan-milestone",
"plan-slice",
]);
if (artifactVerified && mid && anchorPhases.has(unitType)) {
try {
const { writePhaseAnchor } = await import("../phase-anchor.js");
writePhaseAnchor(s.basePath, mid, {
phase: unitType,
milestoneId: mid,
generatedAt: new Date().toISOString(),
intent: `Completed ${unitType} for ${unitId}`,
decisions: [],
blockers: [],
nextSteps: [],
});
} catch (err) {
/* non-fatal — anchor is advisory */
logWarning(
"engine",
`phase anchor failed: ${err instanceof Error ? err.message : String(err)}`,
);
}
}
if (currentUnitResult.status !== "completed" || !artifactVerified) {
recordLearningOutcomeForUnit(
ic,
unitType,
unitId,
s.currentUnit?.startedAt,
{
succeeded: false,
verificationPassed: null,
},
);
}
{
// Pull cost/token data from the ledger entry that snapshotUnitMetrics
// already wrote so the unit-end event carries billing context.
const unitEndLedger = deps.getLedger();
const unitEndEntry = unitEndLedger?.units
? [...unitEndLedger.units]
.reverse()
.find(
(u) =>
u.type === unitType &&
u.id === unitId &&
u.startedAt === s.currentUnit?.startedAt,
)
: undefined;
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "unit-end",
data: {
unitType,
unitId,
status: currentUnitResult.status,
artifactVerified,
...(unitEndEntry
? {
cost_usd: unitEndEntry.cost,
tokens: unitEndEntry.tokens.total,
tokens_input: unitEndEntry.tokens.input,
tokens_output: unitEndEntry.tokens.output,
}
: {}),
...(currentUnitResult.errorContext
? { errorContext: currentUnitResult.errorContext }
: {}),
},
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
});
if (
currentUnitResult.status === "completed" ||
currentUnitResult.status === "blocked"
) {
const progressEvent = buildUokProgressEvent({
eventType:
currentUnitResult.status === "completed"
? "unit_completed"
: "unit_blocked",
unitType,
unitId,
role: "worker",
sessionId: ctx.sessionManager.getSessionId(),
traceId: ic.flowId,
data: {
status: currentUnitResult.status,
artifactVerified,
legacyEventType: "unit-end",
...(unitEndEntry
? {
cost_usd: unitEndEntry.cost,
tokens: unitEndEntry.tokens.total,
}
: {}),
},
});
deps.emitJournalEvent({
ts: progressEvent.ts,
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: progressEvent.eventType,
data: progressEvent,
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
});
}
}
{
const runtimeStatus =
currentUnitResult.status === "completed"
? artifactVerified
? "completed"
: "blocked"
: currentUnitResult.status === "error"
? "failed"
: currentUnitResult.status;
const lineageStatus =
runtimeStatus === "completed"
? "completed"
: runtimeStatus === "blocked"
? "blocked"
: runtimeStatus === "cancelled"
? "cancelled"
: "failed";
writeUnitRuntimeRecord(
s.basePath,
unitType,
unitId,
s.currentUnit?.startedAt ?? Date.now(),
{
status: runtimeStatus,
lastProgressAt: Date.now(),
lastProgressKind: "unit-end",
lineageEvent: {
status: lineageStatus,
workerSessionId: ctx.sessionManager.getSessionId(),
note: `unit ended with ${currentUnitResult.status}`,
},
},
);
}
{
const verdict =
currentUnitResult.status === "completed"
? artifactVerified
? "success"
: "blocked"
: currentUnitResult.status === "error"
? "fail"
: currentUnitResult.status;
const ledger = deps.getLedger();
const unitEntry = ledger?.units
? [...ledger.units]
.reverse()
.find(
(u) =>
u.type === unitType &&
u.id === unitId &&
u.startedAt === s.currentUnit?.startedAt,
)
: undefined;
if (unitEntry) {
const costStr = deps.formatCost(unitEntry.cost);
ctx.ui.notify(
`[unit] ${unitType} ${unitId} ended -> ${verdict} (${costStr}, ${unitEntry.tokens.total} tokens, ${unitEntry.toolCalls} tool calls)`,
"info",
);
} else {
ctx.ui.notify(`[unit] ${unitType} ${unitId} ended -> ${verdict}`, "info");
}
const toolSummary = formatToolCallSummary();
if (toolSummary) {
ctx.ui.notify(`[mcp] ${toolSummary}`, "info");
}
}
// ── Safety harness: checkpoint cleanup or rollback ──
if (s.checkpointSha) {
if (currentUnitResult.status === "error" && safetyConfig.auto_rollback) {
const rolled = rollbackToCheckpoint(s.basePath, unitId, s.checkpointSha);
if (rolled) {
ctx.ui.notify(
`Rolled back to pre-unit checkpoint for ${unitId}`,
"info",
);
debugLog("runUnitPhase", { phase: "checkpoint-rollback", unitId });
}
} else if (currentUnitResult.status === "error") {
ctx.ui.notify(
`Unit ${unitId} failed. Pre-unit checkpoint available at ${s.checkpointSha.slice(0, 8)}`,
"warning",
);
} else {
// Success — clean up checkpoint ref
cleanupCheckpoint(s.basePath, unitId);
debugLog("runUnitPhase", { phase: "checkpoint-cleaned", unitId });
}
s.checkpointSha = null;
}
s.preUnitDirtyFiles = [];
return {
action: "next",
data: {
unitStartedAt: s.currentUnit?.startedAt,
requestDispatchedAt: currentUnitResult.requestDispatchedAt,
},
};
}
// ─── runFinalize ──────────────────────────────────────────────────────────────
/**
* Phase 5: Post-unit finalize — pre/post verification, UAT pause, step-wizard.
* Returns break/continue/next to control the outer loop.
*/
export async function runFinalize(ic, iterData, loopState, sidecarItem) {
const { ctx, pi, s, deps } = ic;
const { pauseAfterUatDispatch } = iterData;
debugLog("autoLoop", { phase: "finalize", iteration: ic.iteration });
// Clear unit timeout (unit completed)
deps.clearUnitTimeout();
// Post-unit context for pre/post verification
const postUnitCtx = {
s,
ctx,
pi,
buildSnapshotOpts: deps.buildSnapshotOpts,
lockBase: deps.lockBase,
stopAuto: deps.stopAuto,
pauseAuto: deps.pauseAuto,
updateProgressWidget: deps.updateProgressWidget,
};
// Pre-verification processing (commit, doctor, state rebuild, etc.)
// Timeout guard: if postUnitPreVerification hangs (e.g., safety harness
// deadlock, browser teardown hang, worktree sync stall), force-continue
// after timeout so the auto-loop is not permanently frozen (#3757).
//
// On timeout, null out s.currentUnit so the timed-out task's late async
// mutations are harmless — postUnitPreVerification guards all side effects
// behind `if (s.currentUnit)`. The next iteration sets a fresh currentUnit.
// Sidecar items use lightweight pre-verification opts
const preVerificationOpts = sidecarItem
? sidecarItem.kind === "hook"
? {
skipSettleDelay: true,
skipWorktreeSync: true,
agentEndMessages: s.lastUnitAgentEndMessages ?? undefined,
}
: {
skipSettleDelay: true,
agentEndMessages: s.lastUnitAgentEndMessages ?? undefined,
}
: { agentEndMessages: s.lastUnitAgentEndMessages ?? undefined };
const _preUnitSnapshot = s.currentUnit
? {
type: s.currentUnit.type,
id: s.currentUnit.id,
startedAt: s.currentUnit.startedAt,
}
: null;
const preResultGuard = await withTimeout(
deps.postUnitPreVerification(postUnitCtx, preVerificationOpts),
FINALIZE_PRE_TIMEOUT_MS,
"postUnitPreVerification",
);
if (preResultGuard.timedOut) {
// Detach session from the timed-out unit so late async completions
// cannot mutate state for the next unit (#3757).
const hadStagedPending = s.stagedPendingCommit;
const hadCommitted = s.lastGitActionStatus === "ok";
s.stagedPendingCommit = false; // prevent orphaned deferred commit
s.currentUnit = null;
clearCurrentPhase();
// Drop any logger entries from the timed-out unit so they don't bleed
// into the next iteration's drain.
drainLogs();
loopState.consecutiveFinalizeTimeouts++;
if (hadStagedPending) {
ctx.ui.notify(
"postUnitPreVerification timed out with staged-but-uncommitted changes — staged files will be included in next unit's commit.",
"warning",
);
logWarning(
"engine",
"finalize-timeout: staged-pending-commit orphaned — will be absorbed by next unit",
);
} else if (hadCommitted) {
ctx.ui.notify(
"postUnitPreVerification timed out after git commit — changes are in history but verification was skipped.",
"warning",
);
logWarning(
"engine",
"finalize-timeout: git commit completed before timeout — verification was not run",
);
}
debugLog("autoLoop", {
phase: "pre-verification-timeout",
iteration: ic.iteration,
unitType: iterData.unitType,
unitId: iterData.unitId,
consecutiveTimeouts: loopState.consecutiveFinalizeTimeouts,
});
if (loopState.consecutiveFinalizeTimeouts >= MAX_FINALIZE_TIMEOUTS) {
ctx.ui.notify(
`postUnitPreVerification timed out ${loopState.consecutiveFinalizeTimeouts} consecutive times — stopping autonomous mode to prevent budget waste`,
"error",
);
await deps.stopAuto(
ctx,
pi,
`${loopState.consecutiveFinalizeTimeouts} consecutive finalize timeouts`,
);
return { action: "break", reason: "finalize-timeout-escalation" };
}
ctx.ui.notify(
`postUnitPreVerification timed out after ${FINALIZE_PRE_TIMEOUT_MS / 1000}s for ${iterData.unitType} ${iterData.unitId} (${loopState.consecutiveFinalizeTimeouts}/${MAX_FINALIZE_TIMEOUTS}) — continuing to next iteration`,
"warning",
);
return { action: "next", data: undefined };
}
const preResult = preResultGuard.value;
if (preResult === "dispatched") {
const dispatchedReason = s.lastGitActionFailure
? "git-closeout-failure"
: "pre-verification-dispatched";
debugLog("autoLoop", {
phase: "exit",
reason: dispatchedReason,
gitError: s.lastGitActionFailure ?? undefined,
});
return { action: "break", reason: dispatchedReason };
}
if (preResult === "retry") {
if (sidecarItem) {
// Sidecar artifact retries are skipped — just continue
debugLog("autoLoop", {
phase: "sidecar-artifact-retry-skipped",
iteration: ic.iteration,
});
} else {
// s.pendingVerificationRetry was set by postUnitPreVerification.
// Emit a dedicated journal event so forensics can distinguish bounded
// verification retries from genuine stuck-loop dispatch repetitions (#4540).
const retryInfo = s.pendingVerificationRetry;
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "artifact-verification-retry",
data: {
unitType: _preUnitSnapshot?.type,
unitId: retryInfo?.unitId,
attempt: retryInfo?.attempt,
},
});
// Continue the loop — next iteration will inject the retry context into the prompt.
debugLog("autoLoop", {
phase: "artifact-verification-retry",
iteration: ic.iteration,
});
return { action: "continue" };
}
}
if (pauseAfterUatDispatch) {
ctx.ui.notify(
"UAT requires human execution. Autonomous mode will pause after this unit writes the result file.",
"info",
);
await deps.pauseAuto(ctx, pi);
debugLog("autoLoop", { phase: "exit", reason: "uat-pause" });
return { action: "break", reason: "uat-pause" };
}
// Verification gate
// Hook sidecar items skip verification entirely.
// Non-hook sidecar items run verification but skip retries (just continue).
const skipVerification = sidecarItem?.kind === "hook";
const uokFlagsFinalize = resolveUokFlags(ic.prefs);
const runVerifyGate =
uokFlagsFinalize.gates &&
iterData.unitType === "execute-task" &&
!skipVerification;
if (!skipVerification) {
if (runVerifyGate) {
const vgRunner = new UokGateRunner();
vgRunner.register({
id: "unit-verification-gate",
type: "verification",
execute: async () => {
const result = await deps.runPostUnitVerification(
{ s, ctx, pi },
deps.pauseAuto,
);
if (result === "pause") {
return {
outcome: "fail",
failureClass: "manual-attention",
rationale:
"Post-unit verification paused — requires human attention",
};
}
if (result === "retry") {
return {
outcome: "fail",
failureClass: "verification",
rationale: "Post-unit verification failed — retrying unit",
};
}
return {
outcome: "pass",
failureClass: "none",
rationale: "Post-unit verification passed",
};
},
});
const gateResult = await vgRunner.run("unit-verification-gate", {
basePath: s.basePath,
traceId: `finalize:${ic.flowId}`,
turnId: `iter-${ic.iteration}`,
milestoneId: iterData.mid ?? undefined,
unitType: iterData.unitType,
unitId: iterData.unitId,
});
if (gateResult.outcome !== "pass") {
recordLearningOutcomeForUnit(
ic,
iterData.unitType,
iterData.unitId,
s.currentUnit?.startedAt,
{
succeeded: false,
verificationPassed: false,
},
);
const reason =
gateResult.failureClass === "manual-attention"
? "verification-pause"
: "verification-fail";
debugLog("autoLoop", { phase: "exit", reason });
return { action: "break", reason };
}
} else {
const verificationResult = await deps.runPostUnitVerification(
{ s, ctx, pi },
deps.pauseAuto,
);
if (verificationResult === "pause") {
recordLearningOutcomeForUnit(
ic,
iterData.unitType,
iterData.unitId,
s.currentUnit?.startedAt,
{
succeeded: false,
verificationPassed: false,
},
);
debugLog("autoLoop", {
phase: "exit",
reason: "verification-pause",
});
return { action: "break", reason: "verification-pause" };
}
if (verificationResult === "retry") {
recordLearningOutcomeForUnit(
ic,
iterData.unitType,
iterData.unitId,
s.currentUnit?.startedAt,
{
succeeded: false,
verificationPassed: false,
},
);
if (sidecarItem) {
// Sidecar verification retries are skipped — just continue
debugLog("autoLoop", {
phase: "sidecar-verification-retry-skipped",
iteration: ic.iteration,
});
} else {
// s.pendingVerificationRetry was set by runPostUnitVerification.
// Continue the loop — next iteration will inject the retry context into the prompt.
debugLog("autoLoop", {
phase: "verification-retry",
iteration: ic.iteration,
});
return { action: "continue" };
}
}
}
}
// Post-verification processing (DB dual-write, hooks, triage, quick-tasks)
// Timeout guard: if postUnitPostVerification hangs (e.g., module import
// deadlock, SQLite transaction hang), force-continue after timeout so the
// auto-loop is not permanently frozen (#2344).
const postResultGuard = await withTimeout(
deps.postUnitPostVerification(postUnitCtx),
FINALIZE_POST_TIMEOUT_MS,
"postUnitPostVerification",
);
if (postResultGuard.timedOut) {
// Detach session from the timed-out unit so late async completions
// cannot mutate state for the next unit (#3757).
s.currentUnit = null;
clearCurrentPhase();
// Drop any logger entries from the timed-out unit so they don't bleed
// into the next iteration's drain.
drainLogs();
loopState.consecutiveFinalizeTimeouts++;
debugLog("autoLoop", {
phase: "post-verification-timeout",
iteration: ic.iteration,
unitType: iterData.unitType,
unitId: iterData.unitId,
consecutiveTimeouts: loopState.consecutiveFinalizeTimeouts,
});
if (loopState.consecutiveFinalizeTimeouts >= MAX_FINALIZE_TIMEOUTS) {
ctx.ui.notify(
`postUnitPostVerification timed out ${loopState.consecutiveFinalizeTimeouts} consecutive times — stopping autonomous mode to prevent budget waste`,
"error",
);
await deps.stopAuto(
ctx,
pi,
`${loopState.consecutiveFinalizeTimeouts} consecutive finalize timeouts`,
);
return { action: "break", reason: "finalize-timeout-escalation" };
}
ctx.ui.notify(
`postUnitPostVerification timed out after ${FINALIZE_POST_TIMEOUT_MS / 1000}s for ${iterData.unitType} ${iterData.unitId} (${loopState.consecutiveFinalizeTimeouts}/${MAX_FINALIZE_TIMEOUTS}) — continuing to next iteration`,
"warning",
);
return { action: "next", data: undefined };
}
const postResult = postResultGuard.value;
if (postResult === "stopped") {
debugLog("autoLoop", {
phase: "exit",
reason: "post-verification-stopped",
});
return { action: "break", reason: "post-verification-stopped" };
}
if (postResult === "step-wizard") {
// Assisted mode — exit the loop (caller handles wizard)
debugLog("autoLoop", { phase: "exit", reason: "step-wizard" });
return { action: "break", reason: "step-wizard" };
}
// Both pre and post verification completed without timeout — reset counter
loopState.consecutiveFinalizeTimeouts = 0;
// Flush WAL to main DB file now that all unit DB writes are committed.
// wal_autocheckpoint=0 prevents SQLite from auto-checkpointing at random
// times — this explicit call at the end of a successful unit is the only
// point where the WAL is flushed, making crash recovery deterministic.
checkpointWal();
// Surface accumulated workflow-logger issues for this unit to the user.
// Warnings/errors logged during the unit are buffered in the logger and
// drained here so the user sees a single consolidated post-unit alert.
const finalizedArtifactVerified =
shouldSkipArtifactVerification(iterData.unitType) ||
verifyExpectedArtifact(iterData.unitType, iterData.unitId, s.basePath);
if (finalizedArtifactVerified) {
recordLearningOutcomeForUnit(
ic,
iterData.unitType,
iterData.unitId,
s.currentUnit?.startedAt,
{
succeeded: true,
verificationPassed: iterData.unitType === "execute-task" ? true : null,
},
);
// Clear the runtime unit record so it does not linger as a phantom
// "dispatched" unit across session restarts (#sf-moqv2k4g-kbg2nq).
clearUnitRuntimeRecord(s.basePath, iterData.unitType, iterData.unitId);
// Evict this unit from stuck-state recentUnits so a completed unit
// does not pollute the sliding window on restart.
const unitKey = `${iterData.unitType}/${iterData.unitId}`;
const prevLen = loopState.recentUnits.length;
loopState.recentUnits = loopState.recentUnits.filter(
(u) => u.key !== unitKey,
);
if (
loopState.recentUnits.length < prevLen &&
loopState.stuckRecoveryAttempts > 0
) {
loopState.stuckRecoveryAttempts = 0;
}
}
if (hasAnyIssues()) {
const { logs } = drainAndSummarize();
if (logs.length > 0) {
const severity = logs.some((e) => e.severity === "error")
? "error"
: "warning";
ctx.ui.notify(formatForNotification(logs), severity, {
kind: severity === "error" ? "notice" : "progress",
source: "workflow-logger",
dedupe_key: `workflow-issues:${iterData.unitType}:${iterData.unitId}`,
});
}
}
// PhaseReview 3-pass (gated on uok.phase_review.enabled)
const uokFlagsForReview = resolveUokFlags(ic.prefs);
if (uokFlagsForReview.phaseReview) {
await runPhaseReview(ic, iterData);
}
return { action: "next", data: undefined };
}
/**
* PhaseReview 3-pass: optional post-unit review pipeline.
*
* Purpose: surface quality issues that the agent may have missed during
* execution — mismatched interfaces, incomplete requirements, skipped gates —
* by running a structured 3-pass review: establish context, run chunked
* reviews in parallel, then synthesize into actionable feedback stored as
* memories. Gated on `uok.phase_review.enabled: true` in preferences.
*
* Passes:
* 1. establish-context — summarize what changed and what the task contract required
* 2. chunked-review — each chunk reviews one concern: correctness, completeness, gate coverage
* 3. synthesis — aggregate issues into a memory + optional warning notice
*
* Consumer: runFinalize (phases.js) after post-unit verification passes.
*/
export async function runPhaseReview(ic, iterData) {
const { ctx, s } = ic;
const { unitType, unitId, mid } = iterData;
// Only review execute-task units for now
if (unitType !== "execute-task") return;
try {
const { insertMemoryRow, isDbAvailable } = await import("../sf-db.js");
if (!isDbAvailable()) return;
const state = await import("../state.js").then((m) =>
m.deriveState(s.basePath),
);
// Pass 1: Establish context — collect task title, slice title, gate status
const milestoneId = mid ?? state.activeMilestone?.id ?? "unknown";
const sid = state.activeSlice?.id ?? "unknown";
const [taskId] = unitId.split("/").slice(-1);
const taskTitle =
state.activeTasks?.find((t) => t.id === taskId)?.title ?? taskId;
const sliceTitle = state.activeSlice?.title ?? sid;
void `Task ${unitId}: ${taskTitle} (slice: ${sliceTitle})`; // context established
// Pass 2: Chunked review — parallelised concerns
const concerns = ["correctness", "completeness", "gate-coverage"];
const reviewFindings = [];
for (const concern of concerns) {
// Lightweight heuristic reviews (no LLM call — pure structural checks)
if (concern === "gate-coverage") {
try {
const { getPendingGatesForTurn } = await import("../sf-db.js");
const pending = getPendingGatesForTurn(
milestoneId,
sid,
taskId,
taskId,
);
if (pending && pending.length > 0) {
reviewFindings.push(
`gate-coverage: ${pending.length} gate(s) still pending after unit close — ${pending.map((g) => g.gate_id).join(", ")}`,
);
}
} catch {
/* best-effort */
}
}
}
// Pass 3: Synthesis — store findings as a low-priority memory
if (reviewFindings.length > 0) {
const content = `PhaseReview for ${unitId}:\n${reviewFindings.map((f) => `- ${f}`).join("\n")}`;
const now = new Date().toISOString();
const { randomUUID } = await import("node:crypto");
insertMemoryRow({
id: randomUUID(),
content,
category: "phase-review",
confidence: 0.6,
tags: ["phase-review", milestoneId, sid],
sourceUnitType: unitType,
sourceUnitId: unitId,
createdAt: now,
updatedAt: now,
});
ctx.ui.notify(
`PhaseReview: ${reviewFindings.length} finding(s) for ${unitId} stored as memories. Run /memory recent to inspect.`,
"info",
{
noticeKind: "SYSTEM_NOTICE",
dedupe_key: `phase-review:${unitId}`,
},
);
}
} catch {
// Best-effort — never fail the loop on a review error
}
}
export const resetSessionTimeoutState = resetConsecutiveSessionTimeouts;