singularity-forge/src/resources/extensions/sf/auto/phases.js
2026-05-04 23:27:20 +02:00

2191 lines
107 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* auto/phases.ts — Pipeline phases for the auto-loop.
*
* Contains: runPreDispatch, runDispatch, runGuards, runUnitPhase, runFinalize,
* plus internal helpers generateMilestoneReport and closeoutAndStop.
*
* Imports from: auto/types, auto/detect-stuck, auto/run-unit, auto/loop-deps
*/
import { cpSync, existsSync, readdirSync } from "node:fs";
import { basename, dirname, join, parse as parsePath } from "node:path";
import { importExtensionModule, } from "@singularity-forge/pi-coding-agent";
import { clearCurrentPhase, setCurrentPhase, } from "../../shared/sf-phase-state.js";
import { atomicWriteSync } from "../atomic-write.js";
import { resetCompletionNudgeState } from "../auto-completion-nudge.js";
import { isAwaitingUserInput, USER_DRIVEN_DEEP_UNITS, } from "../auto-post-unit.js";
import { buildLoopRemediationSteps, diagnoseExpectedArtifact, verifyExpectedArtifact, } from "../auto-recovery.js";
import { collectSessionTokenUsage, collectWorktreeFingerprint, countChangedFiles, resetRunawayGuardState, } from "../auto-runaway-guard.js";
import { formatToolCallSummary, resetToolCallCounts, } from "../auto-tool-tracking.js";
import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.js";
import { debugLog } from "../debug-logger.js";
import { PROJECT_FILES } from "../detection.js";
import { MergeConflictError } from "../git-service.js";
import { recordLearnedOutcome } from "../learning/runtime.js";
import { resolveMilestoneFile, resolveSliceFile, sfRoot } from "../paths.js";
import { resolvePersistModelChanges } from "../preferences.js";
import { approveProductionMutationWithLlmPolicy, ensureProductionMutationApprovalTemplate, readProductionMutationApprovalStatus, } from "../production-mutation-approval.js";
import { pauseAutoForProviderError } from "../provider-error-pause.js";
import { loadEvidenceFromDisk, resetEvidence, } from "../safety/evidence-collector.js";
import { getDirtyFiles } from "../safety/file-change-validator.js";
import { cleanupCheckpoint, createCheckpoint, rollbackToCheckpoint, } from "../safety/git-checkpoint.js";
import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
import { getMilestoneSlices, getSliceTaskCounts, getTask, isDbAvailable, } from "../sf-db.js";
import { getEligibleSlices } from "../slice-parallel-eligibility.js";
import { startSliceParallel } from "../slice-parallel-orchestrator.js";
import { handleProductAudit, } from "../tools/product-audit-tool.js";
import { parseUnitId } from "../unit-id.js";
import { clearUnitRuntimeRecord, writeUnitRuntimeRecord } from "../unit-runtime.js";
import { resolveUokFlags } from "../uok/flags.js";
import { UokGateRunner } from "../uok/gate-runner.js";
import { ensurePlanV2Graph as ensurePlanningFlowGraph, isEmptyPlanV2GraphResult, isMissingFinalizedContextResult, } from "../uok/plan-v2.js";
import { _resetLogs, drainAndSummarize, drainLogs, formatForNotification, hasAnyIssues, logError, logWarning, } from "../workflow-logger.js";
import { getRequiredWorkflowToolsForAutoUnit, getWorkflowTransportSupportError, } from "../workflow-mcp.js";
import { resolveWorktreeProjectRoot } from "../worktree-root.js";
import { detectStuck } from "./detect-stuck.js";
import { FINALIZE_POST_TIMEOUT_MS, FINALIZE_PRE_TIMEOUT_MS, withTimeout, } from "./finalize-timeout.js";
import { runUnit } from "./run-unit.js";
import { BUDGET_THRESHOLDS, MAX_FINALIZE_TIMEOUTS, MAX_RECOVERY_CHARS, } from "./types.js";
// ─── Session timeout auto-resume state ────────────────────────────────────────
let consecutiveSessionTimeouts = 0;
const MAX_SESSION_TIMEOUT_AUTO_RESUMES = 3;
function resetConsecutiveSessionTimeouts() {
consecutiveSessionTimeouts = 0;
}
// ─── generateMilestoneReport ──────────────────────────────────────────────────
/**
* Resolve the base path for milestone reports.
* Prefers originalBasePath (project root) over basePath (which may be a worktree).
* Exported for testing as _resolveReportBasePath.
*/
export function _resolveReportBasePath(s) {
return s.originalBasePath || s.basePath;
}
/**
* Fire the product-audit for a milestone after successful merge.
* Uses s.productAuditMilestoneId as a guard to ensure the audit fires exactly
* once per milestone (mergeAndExit can be called multiple times for the same
* milestone at different transition points).
*
* The audit is fired with a "no-gaps" placeholder verdict. Re-run
* `/sf product-audit` manually for full LLM-powered gap analysis.
*/
async function maybeFireProductAudit(s, ctx) {
const mid = s.currentMilestoneId;
if (!mid)
return;
// Guard: only fire once per milestone
if (s.productAuditMilestoneId === mid)
return;
s.productAuditMilestoneId = mid;
const params = {
milestoneId: mid,
verdict: "no-gaps",
summary: "Auto-fired placeholder audit at milestone merge. Re-run `/sf product-audit` for full LLM-powered gap analysis.",
gaps: [],
};
const result = await handleProductAudit(params, s.basePath);
if ("error" in result) {
logWarning("engine", "Product audit auto-fire failed", {
milestone: mid,
error: result.error,
});
ctx.ui.notify(`Product audit for ${mid} auto-fired but may need manual refresh: ${result.error}`, "warning");
}
else {
debugLog("autoLoop", {
phase: "product-audit-fired",
milestone: mid,
jsonPath: result.jsonPath,
});
}
}
function clearDeferredCommitAfterCancelledUnit(s, ctx, unitType, unitId, reason) {
if (!s.stagedPendingCommit && !s.pendingCommitTaskContext)
return;
s.stagedPendingCommit = false;
s.pendingCommitTaskContext = null;
debugLog("autoLoop", {
phase: "cancelled-unit-deferred-commit-cleared",
unitType,
unitId,
reason,
});
ctx.ui.notify(`Cancelled ${unitType} ${unitId}; staged changes were preserved for recovery and not auto-committed.`, "warning");
}
export function requiresHumanProductionMutationApproval(text) {
const normalized = text.toLowerCase();
const mentionsProduction = /\b(production|prod|live|hetzner)\b/.test(normalized) ||
normalized.includes("centralcloud.com");
if (!mentionsProduction)
return false;
const mentionsUnifiedFailover = normalized.includes("unified_failover") ||
normalized.includes("unified-failover") ||
normalized.includes("/action/unified");
if (!mentionsUnifiedFailover)
return false;
return /\b(post|enqueue|create|insert|command row|pending command)\b/.test(normalized);
}
/**
* Resolve the authoritative project base for dispatch guards.
* Prior-milestone completion lives at the project root, even when the active
* unit is running inside an auto worktree.
*/
export function _resolveDispatchGuardBasePath(s) {
return resolveWorktreeProjectRoot(s.basePath, s.originalBasePath);
}
const PLANNING_FLOW_GATE_PHASES = new Set([
"executing",
"summarizing",
"validating-milestone",
"completing-milestone",
]);
function shouldRunPlanningFlowGate(phase) {
return PLANNING_FLOW_GATE_PHASES.has(phase);
}
function shouldSkipArtifactVerification(unitType) {
return unitType.startsWith("hook/") || unitType === "custom-step";
}
function recordLearningOutcomeForUnit(ic, unitType, unitId, startedAt, outcome) {
if (!startedAt)
return;
const unitModel = ic.s.currentUnitModel;
const unitEntry = ic.deps.getLedger()?.units
? [
...(ic.deps.getLedger()?.units ?? []),
]
.reverse()
.find((u) => u.type === unitType && u.id === unitId && u.startedAt === startedAt)
: undefined;
const provider = unitModel?.provider ?? null;
const modelId = unitModel?.id ?? unitEntry?.model ?? null;
if (!provider || !modelId || !unitEntry)
return;
recordLearnedOutcome({
modelId,
provider,
unitType,
unitId,
succeeded: outcome.succeeded,
retries: outcome.retries ?? 0,
escalated: outcome.escalated ?? false,
verification_passed: outcome.verificationPassed,
blocker_discovered: outcome.blockerDiscovered ?? false,
duration_ms: Math.max(0, unitEntry.finishedAt - unitEntry.startedAt),
tokens_total: unitEntry.tokens.total,
cost_usd: unitEntry.cost,
recorded_at: unitEntry.startedAt,
});
}
/**
* Generate and write an HTML milestone report snapshot.
* Extracted from the milestone-transition block in autoLoop.
*/
async function generateMilestoneReport(s, ctx, milestoneId) {
const { loadVisualizerData } = await importExtensionModule(import.meta.url, "../visualizer-data.js");
const { generateHtmlReport } = await importExtensionModule(import.meta.url, "../export-html.js");
const { writeReportSnapshot } = await importExtensionModule(import.meta.url, "../reports.js");
const { basename } = await import("node:path");
const reportBasePath = _resolveReportBasePath(s);
const snapData = await loadVisualizerData(reportBasePath);
const completedMs = snapData.milestones.find((m) => m.id === milestoneId);
const msTitle = completedMs?.title ?? milestoneId;
const sfVersion = process.env.SF_VERSION ?? "0.0.0";
const projName = basename(reportBasePath);
const doneSlices = snapData.milestones.reduce((acc, m) => acc + m.slices.filter((sl) => sl.done).length, 0);
const totalSlices = snapData.milestones.reduce((acc, m) => acc + m.slices.length, 0);
const outPath = writeReportSnapshot({
basePath: reportBasePath,
html: generateHtmlReport(snapData, {
projectName: projName,
projectPath: reportBasePath,
sfVersion,
milestoneId,
indexRelPath: "index.html",
}),
milestoneId,
milestoneTitle: msTitle,
kind: "milestone",
projectName: projName,
projectPath: reportBasePath,
sfVersion,
totalCost: snapData.totals?.cost ?? 0,
totalTokens: snapData.totals?.tokens.total ?? 0,
totalDuration: snapData.totals?.duration ?? 0,
doneSlices,
totalSlices,
doneMilestones: snapData.milestones.filter((m) => m.status === "complete").length,
totalMilestones: snapData.milestones.length,
phase: snapData.phase,
});
ctx.ui.notify(`Report saved: .sf/reports/${basename(outPath)} — open index.html to browse progression.`, "info");
}
// ─── closeoutAndStop ──────────────────────────────────────────────────────────
/**
* If a unit is in-flight, close it out, then stop auto-mode.
* Extracted from ~4 identical if-closeout-then-stop sequences in autoLoop.
*/
async function closeoutAndStop(ctx, pi, s, deps, reason) {
if (s.currentUnit) {
await deps.closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt, deps.buildSnapshotOpts(s.currentUnit.type, s.currentUnit.id));
s.currentUnit = null;
}
await deps.stopAuto(ctx, pi, reason);
}
async function emitCancelledUnitEnd(ic, unitType, unitId, unitStartSeq, errorContext) {
ic.deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "unit-end",
data: {
unitType,
unitId,
status: "cancelled",
artifactVerified: false,
...(errorContext ? { errorContext } : {}),
},
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
});
}
// ─── runPreDispatch ───────────────────────────────────────────────────────────
/**
* Phase 1: Pre-dispatch — resource guard, health gate, state derivation,
* milestone transition, terminal conditions.
* Returns break to exit the loop, or next with PreDispatchData on success.
*/
export async function runPreDispatch(ic, loopState) {
const { ctx, pi, s, deps, prefs } = ic;
const uokFlags = resolveUokFlags(prefs);
const runPreDispatchGate = async (input) => {
if (!uokFlags.gates)
return;
const gateRunner = new UokGateRunner();
gateRunner.register({
id: input.gateId,
type: input.gateType,
execute: async () => ({
outcome: input.outcome,
failureClass: input.failureClass,
rationale: input.rationale,
findings: input.findings ?? "",
}),
});
await gateRunner.run(input.gateId, {
basePath: s.basePath,
traceId: `pre-dispatch:${ic.flowId}`,
turnId: `iter-${ic.iteration}`,
milestoneId: input.milestoneId ?? s.currentMilestoneId ?? undefined,
unitType: "pre-dispatch",
unitId: `iter-${ic.iteration}`,
});
};
// Resource version guard
const staleMsg = deps.checkResourcesStale(s.resourceVersionOnStart);
if (staleMsg) {
await runPreDispatchGate({
gateId: "resource-version-guard",
gateType: "policy",
outcome: "fail",
failureClass: "policy",
rationale: "resource version guard blocked dispatch",
findings: staleMsg,
});
await deps.stopAuto(ctx, pi, staleMsg);
debugLog("autoLoop", { phase: "exit", reason: "resources-stale" });
return { action: "break", reason: "resources-stale" };
}
await runPreDispatchGate({
gateId: "resource-version-guard",
gateType: "policy",
outcome: "pass",
failureClass: "none",
rationale: "resource version guard passed",
});
deps.invalidateAllCaches();
s.lastPromptCharCount = undefined;
s.lastBaselineCharCount = undefined;
// Pre-dispatch health gate
try {
const healthGate = await deps.preDispatchHealthGate(s.basePath);
if (healthGate.fixesApplied.length > 0) {
ctx.ui.notify(`Pre-dispatch: ${healthGate.fixesApplied.join(", ")}`, "info");
}
if (!healthGate.proceed) {
await runPreDispatchGate({
gateId: "pre-dispatch-health-gate",
gateType: "execution",
outcome: "manual-attention",
failureClass: "manual-attention",
rationale: "pre-dispatch health gate blocked dispatch",
findings: healthGate.reason,
});
ctx.ui.notify(healthGate.reason ||
"Pre-dispatch health check failed — run /sf doctor for details.", "error");
await deps.pauseAuto(ctx, pi);
debugLog("autoLoop", { phase: "exit", reason: "health-gate-failed" });
return { action: "break", reason: "health-gate-failed" };
}
await runPreDispatchGate({
gateId: "pre-dispatch-health-gate",
gateType: "execution",
outcome: "pass",
failureClass: "none",
rationale: "pre-dispatch health gate passed",
findings: healthGate.fixesApplied.length > 0
? healthGate.fixesApplied.join(", ")
: "",
});
}
catch (e) {
await runPreDispatchGate({
gateId: "pre-dispatch-health-gate",
gateType: "execution",
outcome: "manual-attention",
failureClass: "manual-attention",
rationale: "pre-dispatch health gate threw unexpectedly",
findings: String(e),
});
logWarning("engine", "Pre-dispatch health gate threw unexpectedly", {
error: String(e),
});
}
// Sync project root artifacts into worktree
if (s.originalBasePath &&
s.basePath !== s.originalBasePath &&
s.currentMilestoneId) {
deps.syncProjectRootToWorktree(s.originalBasePath, s.basePath, s.currentMilestoneId);
}
// Derive state
let state = await deps.deriveState(s.basePath);
if (uokFlags.planningFlow &&
isDbAvailable() &&
shouldRunPlanningFlowGate(state.phase)) {
let compiled = ensurePlanningFlowGraph(s.basePath, state);
// Empty-graph recovery: stale DB caches can yield 0 nodes right after a
// task-complete write. Invalidate caches, re-derive state, and retry once.
if (isEmptyPlanV2GraphResult(compiled)) {
deps.invalidateAllCaches();
state = await deps.deriveState(s.basePath);
compiled = shouldRunPlanningFlowGate(state.phase)
? ensurePlanningFlowGraph(s.basePath, state)
: {
ok: true,
reason: "empty planning-flow graph recovered by state rederive",
nodeCount: 0,
};
}
if (!compiled.ok) {
const reason = compiled.reason ?? "Planning flow compilation failed";
if (isMissingFinalizedContextResult(compiled)) {
await runPreDispatchGate({
gateId: "planning-flow-gate",
gateType: "policy",
outcome: "pass",
failureClass: "none",
rationale: "plan v2 missing context recovery deferred to dispatch",
findings: reason,
milestoneId: state.activeMilestone?.id ?? undefined,
});
}
else {
await runPreDispatchGate({
gateId: "planning-flow-gate",
gateType: "policy",
outcome: "manual-attention",
failureClass: "manual-attention",
rationale: "planning flow compile gate failed",
findings: reason,
milestoneId: state.activeMilestone?.id ?? undefined,
});
ctx.ui.notify(`Plan gate failed-closed: ${reason}\n\nIf this keeps happening, try: /sf doctor heal`, "error");
await deps.pauseAuto(ctx, pi);
return { action: "break", reason: "planning-flow-gate-failed" };
}
}
await runPreDispatchGate({
gateId: "planning-flow-gate",
gateType: "policy",
outcome: "pass",
failureClass: "none",
rationale: "planning flow compile gate passed",
milestoneId: state.activeMilestone?.id ?? undefined,
});
}
deps.syncCmuxSidebar(prefs, state);
let mid = state.activeMilestone?.id;
let midTitle = state.activeMilestone?.title;
debugLog("autoLoop", {
phase: "state-derived",
iteration: ic.iteration,
mid,
statePhase: state.phase,
});
// ── Slice-level parallelism gate (#2340) ─────────────────────────────
// When slice_parallel is enabled, check if multiple slices are eligible
// for parallel execution. If so, dispatch them in parallel and stop the
// sequential loop. Workers are spawned via slice-parallel-orchestrator.ts.
if (prefs?.slice_parallel?.enabled &&
mid &&
!process.env.SF_PARALLEL_WORKER &&
isDbAvailable()) {
try {
const dbSlices = getMilestoneSlices(mid);
if (dbSlices.length > 0) {
const doneIds = new Set(dbSlices
.filter((sl) => sl.status === "complete" || sl.status === "done")
.map((sl) => sl.id));
const sliceInputs = dbSlices.map((sl) => ({
id: sl.id,
done: doneIds.has(sl.id),
depends: sl.depends ?? [],
}));
const eligible = getEligibleSlices(sliceInputs, doneIds);
if (eligible.length > 1) {
debugLog("autoLoop", {
phase: "slice-parallel-dispatch",
iteration: ic.iteration,
mid,
eligibleSlices: eligible.map((e) => e.id),
});
ctx.ui.notify(`Slice-parallel: dispatching ${eligible.length} eligible slices for ${mid}.`, "info");
const result = await startSliceParallel(s.basePath, mid, eligible, {
maxWorkers: prefs.slice_parallel.max_workers ?? 2,
useExecutionGraph: uokFlags.executionGraph,
shellWrapper: prefs.shell_wrapper,
});
if (result.started.length > 0) {
ctx.ui.notify(`Slice-parallel: started ${result.started.length} worker(s): ${result.started.join(", ")}.`, "info");
await deps.stopAuto(ctx, pi, `Slice-parallel dispatched for ${mid}`);
return { action: "break", reason: "slice-parallel-dispatched" };
}
// Fall through to sequential if no workers started
}
}
}
catch (err) {
debugLog("autoLoop", {
phase: "slice-parallel-check-error",
error: err instanceof Error ? err.message : String(err),
});
// Non-fatal — fall through to sequential dispatch
}
}
// ── Milestone transition ────────────────────────────────────────────
if (mid && s.currentMilestoneId && mid !== s.currentMilestoneId) {
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "milestone-transition",
data: { from: s.currentMilestoneId, to: mid },
});
ctx.ui.notify(`Milestone ${s.currentMilestoneId} complete. Advancing to ${mid}: ${midTitle}.`, "info");
deps.sendDesktopNotification("SF", `Milestone ${s.currentMilestoneId} complete!`, "success", "milestone", basename(s.originalBasePath || s.basePath));
deps.logCmuxEvent(prefs, `Milestone ${s.currentMilestoneId} complete. Advancing to ${mid}.`, "success");
const vizPrefs = prefs;
if (vizPrefs?.auto_visualize) {
ctx.ui.notify("Run /sf visualize to see progress overview.", "info");
}
if (vizPrefs?.auto_report !== false) {
try {
await generateMilestoneReport(s, ctx, s.currentMilestoneId);
}
catch (err) {
ctx.ui.notify(`Report generation failed: ${err instanceof Error ? err.message : String(err)}`, "warning");
}
}
// Reset dispatch counters for new milestone
s.unitDispatchCount.clear();
s.unitRecoveryCount.clear();
s.unitLifetimeDispatches.clear();
loopState.recentUnits.length = 0;
loopState.stuckRecoveryAttempts = 0;
// Worktree lifecycle on milestone transition — merge current, enter next
try {
deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
}
catch (mergeErr) {
if (mergeErr instanceof MergeConflictError) {
// Real code conflicts — stop the loop instead of retrying forever (#2330)
ctx.ui.notify(`Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /sf autonomous to resume.`, "error");
await deps.stopAuto(ctx, pi, `Merge conflict on milestone ${s.currentMilestoneId}`);
return { action: "break", reason: "merge-conflict" };
}
// Non-conflict merge errors — stop auto to avoid advancing with unmerged work
logError("engine", "Milestone merge failed with non-conflict error", {
milestone: s.currentMilestoneId,
error: String(mergeErr),
});
ctx.ui.notify(`Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /sf autonomous to resume.`, "error");
await deps.stopAuto(ctx, pi, `Merge error on milestone ${s.currentMilestoneId}: ${String(mergeErr)}`);
return { action: "break", reason: "merge-failed" };
}
// Fire product-audit after successful merge (guards against double-fire via s.productAuditMilestoneId)
await maybeFireProductAudit(s, ctx);
// PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
deps.invalidateAllCaches();
state = await deps.deriveState(s.basePath);
mid = state.activeMilestone?.id;
midTitle = state.activeMilestone?.title;
if (mid) {
if (deps.getIsolationMode() !== "none") {
deps.captureIntegrationBranch(s.basePath, mid);
}
deps.resolver.enterMilestone(mid, ctx.ui);
}
else {
// mid is undefined — no milestone to capture integration branch for
}
const pendingIds = state.registry
.filter((m) => m.status !== "complete" && m.status !== "parked")
.map((m) => m.id);
deps.pruneQueueOrder(s.basePath, pendingIds);
// Archive the old completed-units.json instead of wiping it (#2313).
try {
const completedKeysPath = join(sfRoot(s.basePath), "completed-units.json");
if (existsSync(completedKeysPath) && s.currentMilestoneId) {
const archivePath = join(sfRoot(s.basePath), `completed-units-${s.currentMilestoneId}.json`);
cpSync(completedKeysPath, archivePath);
}
atomicWriteSync(completedKeysPath, JSON.stringify([], null, 2));
}
catch (e) {
logWarning("engine", "Failed to archive completed-units on milestone transition", { error: String(e) });
}
// Rebuild STATE.md immediately so it reflects the new active milestone.
// This bypasses the 30-second throttle in the normal rebuild path —
// milestone transitions are rare and important enough to warrant an
// immediate write.
try {
await deps.rebuildState(s.basePath);
}
catch (e) {
logWarning("engine", "STATE.md rebuild failed after milestone transition", { error: String(e) });
}
}
if (mid) {
s.currentMilestoneId = mid;
deps.setActiveMilestoneId(s.basePath, mid);
}
// ── Terminal conditions ──────────────────────────────────────────────
if (!mid) {
if (s.currentUnit) {
await deps.closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt, deps.buildSnapshotOpts(s.currentUnit.type, s.currentUnit.id));
}
const incomplete = state.registry.filter((m) => m.status !== "complete" && m.status !== "parked");
if (incomplete.length === 0 && state.registry.length > 0) {
// All milestones complete — merge milestone branch before stopping
if (s.currentMilestoneId) {
try {
deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
// Prevent stopAuto from attempting the same merge (#2645)
s.milestoneMergedInPhases = true;
// Fire product-audit after successful merge (guards against double-fire via s.productAuditMilestoneId)
await maybeFireProductAudit(s, ctx);
}
catch (mergeErr) {
if (mergeErr instanceof MergeConflictError) {
ctx.ui.notify(`Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /sf autonomous to resume.`, "error");
await deps.stopAuto(ctx, pi, `Merge conflict on milestone ${s.currentMilestoneId}`);
return { action: "break", reason: "merge-conflict" };
}
logError("engine", "Milestone merge failed with non-conflict error", {
milestone: s.currentMilestoneId,
error: String(mergeErr),
});
ctx.ui.notify(`Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /sf autonomous to resume.`, "error");
await deps.stopAuto(ctx, pi, `Merge error on milestone ${s.currentMilestoneId}: ${String(mergeErr)}`);
return { action: "break", reason: "merge-failed" };
}
// PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
}
deps.sendDesktopNotification("SF", "All milestones complete!", "success", "milestone", basename(s.originalBasePath || s.basePath));
deps.logCmuxEvent(prefs, "All milestones complete.", "success");
await deps.stopAuto(ctx, pi, "All milestones complete");
}
else if (incomplete.length === 0 && state.registry.length === 0) {
// Empty registry — no milestones visible, likely a path resolution bug
const diag = `basePath=${s.basePath}, phase=${state.phase}`;
ctx.ui.notify(`No milestones visible in current scope. Possible path resolution issue.\n Diagnostic: ${diag}`, "error");
await deps.stopAuto(ctx, pi, `No milestones found — check basePath resolution`);
}
else if (state.phase === "blocked") {
const blockerMsg = `Blocked: ${state.blockers.join(", ")}`;
// Pause instead of hard-stop so the session is resumable with `/sf autonomous`.
// Hard-stop here was causing premature termination when slice dependencies
// were temporarily unresolvable (e.g. after reassessment added new slices).
await deps.pauseAuto(ctx, pi);
ctx.ui.notify(`${blockerMsg}. Fix and run /sf autonomous to resume.`, "warning");
deps.sendDesktopNotification("SF", blockerMsg, "warning", "attention", basename(s.originalBasePath || s.basePath));
deps.logCmuxEvent(prefs, blockerMsg, "warning");
}
else {
const ids = incomplete.map((m) => m.id).join(", ");
const diag = `basePath=${s.basePath}, milestones=[${state.registry.map((m) => `${m.id}:${m.status}`).join(", ")}], phase=${state.phase}`;
ctx.ui.notify(`Unexpected: ${incomplete.length} incomplete milestone(s) (${ids}) but no active milestone.\n Diagnostic: ${diag}`, "error");
await deps.stopAuto(ctx, pi, `No active milestone — ${incomplete.length} incomplete (${ids}), see diagnostic above`);
}
debugLog("autoLoop", { phase: "exit", reason: "no-active-milestone" });
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "terminal",
data: { reason: "no-active-milestone" },
});
return { action: "break", reason: "no-active-milestone" };
}
if (!midTitle) {
midTitle = mid;
ctx.ui.notify(`Milestone ${mid} has no title in roadmap — using ID as fallback.`, "warning");
}
// Mid-merge safety check
const mergeReconcileResult = deps.reconcileMergeState(s.basePath, ctx);
if (mergeReconcileResult === "blocked") {
await deps.pauseAuto(ctx, pi);
debugLog("autoLoop", {
phase: "exit",
reason: "merge-reconciliation-blocked",
});
return { action: "break", reason: "merge-reconciliation-blocked" };
}
if (mergeReconcileResult === "reconciled") {
deps.invalidateAllCaches();
state = await deps.deriveState(s.basePath);
mid = state.activeMilestone?.id;
midTitle = state.activeMilestone?.title;
}
if (!mid || !midTitle) {
const noMilestoneReason = !mid
? "No active milestone after merge reconciliation"
: `Milestone ${mid} has no title after reconciliation`;
await closeoutAndStop(ctx, pi, s, deps, noMilestoneReason);
debugLog("autoLoop", {
phase: "exit",
reason: "no-milestone-after-reconciliation",
});
return { action: "break", reason: "no-milestone-after-reconciliation" };
}
// Terminal: complete
if (state.phase === "complete") {
// Milestone merge on complete (before closeout so branch state is clean)
if (s.currentMilestoneId) {
try {
deps.resolver.mergeAndExit(s.currentMilestoneId, ctx.ui);
// Prevent stopAuto from attempting the same merge (#2645)
s.milestoneMergedInPhases = true;
// Fire product-audit after successful merge (guards against double-fire via s.productAuditMilestoneId)
await maybeFireProductAudit(s, ctx);
}
catch (mergeErr) {
if (mergeErr instanceof MergeConflictError) {
ctx.ui.notify(`Merge conflict: ${mergeErr.conflictedFiles.join(", ")}. Resolve conflicts manually and run /sf autonomous to resume.`, "error");
await deps.stopAuto(ctx, pi, `Merge conflict on milestone ${s.currentMilestoneId}`);
return { action: "break", reason: "merge-conflict" };
}
logError("engine", "Milestone merge failed with non-conflict error", {
milestone: s.currentMilestoneId,
error: String(mergeErr),
});
ctx.ui.notify(`Merge failed: ${mergeErr instanceof Error ? mergeErr.message : String(mergeErr)}. Resolve and run /sf autonomous to resume.`, "error");
await deps.stopAuto(ctx, pi, `Merge error on milestone ${s.currentMilestoneId}: ${String(mergeErr)}`);
return { action: "break", reason: "merge-failed" };
}
// PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
}
deps.sendDesktopNotification("SF", `Milestone ${mid} complete!`, "success", "milestone", basename(s.originalBasePath || s.basePath));
deps.logCmuxEvent(prefs, `Milestone ${mid} complete.`, "success");
await closeoutAndStop(ctx, pi, s, deps, `Milestone ${mid} complete`);
debugLog("autoLoop", { phase: "exit", reason: "milestone-complete" });
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "terminal",
data: { reason: "milestone-complete", milestoneId: mid },
});
return { action: "break", reason: "milestone-complete" };
}
// Terminal: blocked — pause instead of hard-stop so the session is resumable.
if (state.phase === "blocked") {
const blockerMsg = `Blocked: ${state.blockers.join(", ")}`;
if (s.currentUnit) {
await deps.closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt, deps.buildSnapshotOpts(s.currentUnit.type, s.currentUnit.id));
}
await deps.pauseAuto(ctx, pi);
ctx.ui.notify(`${blockerMsg}. Fix and run /sf autonomous to resume.`, "warning");
deps.sendDesktopNotification("SF", blockerMsg, "warning", "attention", basename(s.originalBasePath || s.basePath));
deps.logCmuxEvent(prefs, blockerMsg, "warning");
debugLog("autoLoop", { phase: "exit", reason: "blocked" });
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "terminal",
data: { reason: "blocked", blockers: state.blockers },
});
return { action: "break", reason: "blocked" };
}
return { action: "next", data: { state, mid, midTitle } };
}
// ─── runDispatch ──────────────────────────────────────────────────────────────
/**
* Phase 3: Dispatch resolution — resolve next unit, stuck detection, pre-dispatch hooks.
* Returns break/continue to control the loop, or next with IterationData on success.
*/
export async function runDispatch(ic, preData, loopState) {
const { ctx, pi, s, deps, prefs } = ic;
const { state, mid, midTitle } = preData;
const STUCK_WINDOW_SIZE = 6;
debugLog("autoLoop", { phase: "dispatch-resolve", iteration: ic.iteration });
const dispatchResult = await deps.resolveDispatch({
basePath: s.basePath,
mid,
midTitle,
state,
prefs,
session: s,
});
if (dispatchResult.action === "stop") {
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "dispatch-stop",
rule: dispatchResult.matchedRule,
data: { reason: dispatchResult.reason },
});
// Warning-level stops are recoverable human checkpoints (e.g. UAT verdict
// gate) — pause instead of hard-stopping so the session is resumable with
// `/sf autonomous`. Error/info-level stops remain hard stops for infrastructure
// failures and terminal conditions respectively.
// See: https://github.com/singularity-forge/sf-run/issues/2474
if (dispatchResult.level === "warning") {
ctx.ui.notify(dispatchResult.reason, "warning");
await deps.pauseAuto(ctx, pi);
}
else {
await closeoutAndStop(ctx, pi, s, deps, dispatchResult.reason);
}
debugLog("autoLoop", { phase: "exit", reason: "dispatch-stop" });
return { action: "break", reason: "dispatch-stop" };
}
if (dispatchResult.action !== "dispatch") {
// Non-dispatch action (e.g. "skip") — re-derive state
await new Promise((r) => setImmediate(r));
return { action: "continue" };
}
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "dispatch-match",
rule: dispatchResult.matchedRule,
data: { unitType: dispatchResult.unitType, unitId: dispatchResult.unitId },
});
let unitType = dispatchResult.unitType;
const unitId = dispatchResult.unitId;
let prompt = dispatchResult.prompt;
const pauseAfterUatDispatch = dispatchResult.pauseAfterDispatch ?? false;
// ── Sliding-window stuck detection with graduated recovery ──
const derivedKey = `${unitType}/${unitId}`;
const hasTransientTaskCompleteFailure = unitType === "execute-task" && !!s.pendingTaskCompleteFailures?.has(unitId);
if (!s.pendingVerificationRetry && !hasTransientTaskCompleteFailure) {
loopState.recentUnits.push({ key: derivedKey });
if (loopState.recentUnits.length > STUCK_WINDOW_SIZE)
loopState.recentUnits.shift();
const stuckSignal = detectStuck(loopState.recentUnits);
if (stuckSignal) {
debugLog("autoLoop", {
phase: "stuck-check",
unitType,
unitId,
reason: stuckSignal.reason,
recoveryAttempts: loopState.stuckRecoveryAttempts,
});
// Graduated stuck recovery — up to 5 total attempts before hard stop.
// Attempt 0: cache invalidation + retry
// Attempts 14: rethink + retry
// Attempt 5 (exhausted): hard stop
loopState.stuckRecoveryAttempts++;
const attempt = loopState.stuckRecoveryAttempts;
if (attempt === 1) {
// Attempt 1: verify artifact + cache invalidation + retry
const artifactExists = verifyExpectedArtifact(unitType, unitId, s.basePath);
if (artifactExists) {
debugLog("autoLoop", {
phase: "stuck-recovery",
level: 1,
action: "artifact-found",
});
ctx.ui.notify(`Stuck recovery: artifact for ${unitType} ${unitId} found on disk. Invalidating caches.`, "info");
deps.invalidateAllCaches();
return { action: "continue" };
}
ctx.ui.notify(`Stuck on ${unitType} ${unitId} (${stuckSignal.reason}). Invalidating caches and retrying.`, "warning");
deps.invalidateAllCaches();
return { action: "continue" };
}
else if (attempt <= 5) {
// Attempts 25: rethink + diagnostic + retry
const stuckDiag = diagnoseExpectedArtifact(unitType, unitId, s.basePath);
const stuckRemediation = buildLoopRemediationSteps(unitType, unitId, s.basePath);
const diagnostic = deps.getDeepDiagnostic(s.basePath);
const cappedDiag = (diagnostic?.length ?? 0) > MAX_RECOVERY_CHARS
? diagnostic.slice(0, MAX_RECOVERY_CHARS) +
"\n\n[...diagnostic truncated]"
: (diagnostic ?? null);
s.pendingRethinkAttempt = JSON.stringify({
attempt,
reason: stuckSignal.reason,
diagnostic: cappedDiag,
stuckDiag,
remediation: stuckRemediation,
unitType,
unitId,
});
const rt = attempt === 5
? "**FINAL STUCK ATTEMPT — 5 of 5.** "
: `**STUCK RECOVERY ATTEMPT ${attempt - 1} of 4.** `;
ctx.ui.notify(`${rt}Stuck on ${unitType} ${unitId} (${stuckSignal.reason}). Injecting diagnostic and retrying.`, "warning");
return { action: "continue" };
}
else {
// Attempt 6+: genuinely exhausted — hard stop
debugLog("autoLoop", {
phase: "stuck-detected",
unitType,
unitId,
reason: stuckSignal.reason,
});
const stuckDiag = diagnoseExpectedArtifact(unitType, unitId, s.basePath);
const stuckRemediation = buildLoopRemediationSteps(unitType, unitId, s.basePath);
const stuckParts = [
`Stuck on ${unitType} ${unitId}${stuckSignal.reason}.`,
];
if (stuckDiag)
stuckParts.push(`Expected: ${stuckDiag}`);
if (stuckRemediation)
stuckParts.push(`To recover:\n${stuckRemediation}`);
ctx.ui.notify(stuckParts.join(" "), "error");
await deps.stopAuto(ctx, pi, `Stuck: ${stuckSignal.reason}`);
return { action: "break", reason: "stuck-detected" };
}
}
else {
// Progress detected — reset recovery counter
if (loopState.stuckRecoveryAttempts > 0) {
debugLog("autoLoop", {
phase: "stuck-counter-reset",
from: loopState.recentUnits[loopState.recentUnits.length - 2]?.key ?? "",
to: derivedKey,
});
loopState.stuckRecoveryAttempts = 0;
}
}
}
// Pre-dispatch hooks
const preDispatchResult = deps.runPreDispatchHooks(unitType, unitId, prompt, s.basePath);
if (preDispatchResult.firedHooks.length > 0) {
ctx.ui.notify(`Pre-dispatch hook${preDispatchResult.firedHooks.length > 1 ? "s" : ""}: ${preDispatchResult.firedHooks.join(", ")}`, "info");
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "pre-dispatch-hook",
data: {
firedHooks: preDispatchResult.firedHooks,
action: preDispatchResult.action,
},
});
}
if (preDispatchResult.action === "skip") {
ctx.ui.notify(`Skipping ${unitType} ${unitId} (pre-dispatch hook).`, "info");
await new Promise((r) => setImmediate(r));
return { action: "continue" };
}
if (preDispatchResult.action === "replace") {
prompt = preDispatchResult.prompt ?? prompt;
if (preDispatchResult.unitType)
unitType = preDispatchResult.unitType;
}
else if (preDispatchResult.prompt) {
prompt = preDispatchResult.prompt;
}
const guardBasePath = _resolveDispatchGuardBasePath(s);
const priorSliceBlocker = deps.getPriorSliceCompletionBlocker(guardBasePath, deps.getMainBranch(guardBasePath), unitType, unitId);
if (priorSliceBlocker) {
await deps.stopAuto(ctx, pi, priorSliceBlocker);
debugLog("autoLoop", { phase: "exit", reason: "prior-slice-blocker" });
return { action: "break", reason: "prior-slice-blocker" };
}
return {
action: "next",
data: {
unitType,
unitId,
prompt,
finalPrompt: prompt,
pauseAfterUatDispatch,
state,
mid,
midTitle,
isRetry: false,
previousTier: undefined,
hookModelOverride: preDispatchResult.model,
},
};
}
// ─── runGuards ────────────────────────────────────────────────────────────────
/**
* Phase 2: Guards — stop directives, budget ceiling, context window, secrets re-check.
* Returns break to exit the loop, or next to proceed to dispatch.
*/
export async function runGuards(ic, mid, unitType, unitId, sliceId) {
const { ctx, pi, s, deps, prefs } = ic;
// ── Stop/Backtrack directive guard (#3487) ──
// Check for unexecuted stop or backtrack captures BEFORE dispatching any unit.
// This ensures user "halt" directives are honored immediately.
// IMPORTANT: Fail-closed — any exception during stop handling still breaks the loop
// to ensure user halt intent is never silently dropped.
try {
const { loadStopCaptures, markCaptureExecuted } = await import("../captures.js");
const stopCaptures = loadStopCaptures(s.basePath);
if (stopCaptures.length > 0) {
const first = stopCaptures[0];
const isBacktrack = first.classification === "backtrack";
const label = isBacktrack
? `Backtrack directive: ${first.text}`
: `Stop directive: ${first.text}`;
ctx.ui.notify(label, "warning");
deps.sendDesktopNotification("SF", label, "warning", "stop-directive", basename(s.originalBasePath || s.basePath));
// Pause first — ensures auto-mode stops even if later steps fail
await deps.pauseAuto(ctx, pi);
// For backtrack captures, write the backtrack trigger after pausing
if (isBacktrack) {
try {
const { executeBacktrack } = await import("../triage-resolution.js");
executeBacktrack(s.basePath, mid, first);
}
catch (e) {
debugLog("guards", {
phase: "backtrack-execution-error",
error: String(e),
});
}
}
// Mark captures as executed only after successful pause/transition
for (const cap of stopCaptures) {
markCaptureExecuted(s.basePath, cap.id);
}
debugLog("autoLoop", {
phase: "exit",
reason: isBacktrack ? "user-backtrack" : "user-stop",
});
return {
action: "break",
reason: isBacktrack ? "user-backtrack" : "user-stop",
};
}
}
catch (e) {
// Fail-closed: if anything in the stop guard throws, break the loop
// rather than silently continuing and dropping user halt intent
debugLog("guards", { phase: "stop-guard-error", error: String(e) });
return { action: "break", reason: "stop-guard-error" };
}
// Production mutation guard — headless auto must not enqueue live failover
// commands without a human-provided safe target and cleanup plan.
try {
if (isDbAvailable()) {
const state = await deps.deriveState(s.basePath);
const activeTask = state.activeTask;
const activeSlice = state.activeSlice;
const activeMilestone = state.activeMilestone;
if (activeMilestone?.id && activeSlice?.id && activeTask?.id) {
const task = getTask(activeMilestone.id, activeSlice.id, activeTask.id);
if (task) {
const taskText = [
task.title,
task.description,
task.verify,
...task.inputs,
...task.expected_output,
].join("\n");
if (requiresHumanProductionMutationApproval(taskText)) {
const approvalUnit = {
milestoneId: activeMilestone.id,
sliceId: activeSlice.id,
taskId: activeTask.id,
taskTitle: task.title,
taskText,
};
const approvalBasePath = s.originalBasePath || s.basePath;
const approval = readProductionMutationApprovalStatus(approvalBasePath, approvalUnit);
if (approval.approved) {
ctx.ui.notify(`Production mutation approval accepted for ${approvalUnit.milestoneId}/${approvalUnit.sliceId}/${approvalUnit.taskId}: ${approval.path}`, "warning");
}
else {
const llmApproval = approveProductionMutationWithLlmPolicy(approvalBasePath, approvalUnit);
if (llmApproval.approved) {
ctx.ui.notify(`Production mutation LLM approval accepted for pending-command-only smoke test ${approvalUnit.milestoneId}/${approvalUnit.sliceId}/${approvalUnit.taskId}: ${llmApproval.path}`, "warning");
}
else {
const template = ensureProductionMutationApprovalTemplate(approvalBasePath, approvalUnit);
const blockerReasons = [
...approval.reasons,
...llmApproval.reasons.map((reason) => `LLM: ${reason}`),
];
const reasons = blockerReasons.length
? ` Missing/invalid fields: ${blockerReasons.join("; ")}.`
: "";
const msg = `Production mutation guard: ${activeMilestone.id}/${activeSlice.id}/${activeTask.id} asks to POST unified failover against production. ` +
`${template.created ? "Created" : "Reusing"} approval gate at ${template.path}. ` +
`Fill it with an explicit safe server/VM target, cleanup/rollback path, and human or LLM approval, then rerun sf headless auto.${reasons}`;
ctx.ui.notify(msg, "error");
deps.sendDesktopNotification("SF", "Production mutation guard paused auto-mode", "warning", "safety", basename(s.originalBasePath || s.basePath));
await deps.pauseAuto(ctx, pi);
return {
action: "break",
reason: "production-mutation-guard",
};
}
}
}
}
}
}
}
catch (e) {
debugLog("guards", {
phase: "production-mutation-guard-error",
error: String(e),
});
}
// Budget ceiling guard
const budgetCeiling = prefs?.budget_ceiling;
if (budgetCeiling !== undefined && budgetCeiling > 0) {
const currentLedger = deps.getLedger();
// In parallel worker mode, only count cost from the current auto-mode session
// to avoid hitting the ceiling due to historical project-wide spend (#2184).
let costUnits = currentLedger?.units;
if (process.env.SF_PARALLEL_WORKER &&
s.autoStartTime &&
Array.isArray(costUnits)) {
const sessionStartISO = new Date(s.autoStartTime).toISOString();
costUnits = costUnits.filter((u) => u.startedAt != null && u.startedAt >= sessionStartISO);
}
const totalCost = costUnits ? deps.getProjectTotals(costUnits).cost : 0;
const budgetPct = totalCost / budgetCeiling;
const budgetAlertLevel = deps.getBudgetAlertLevel(budgetPct);
const newBudgetAlertLevel = deps.getNewBudgetAlertLevel(s.lastBudgetAlertLevel, budgetPct);
const enforcement = prefs?.budget_enforcement ?? "pause";
const budgetEnforcementAction = deps.getBudgetEnforcementAction(enforcement, budgetPct);
// Data-driven threshold check — loop descending, fire first match
const threshold = BUDGET_THRESHOLDS.find((t) => newBudgetAlertLevel >= t.pct);
if (threshold) {
s.lastBudgetAlertLevel =
newBudgetAlertLevel;
if (threshold.pct === 100 && budgetEnforcementAction !== "none") {
// 100% — special enforcement logic (halt/pause/warn)
const msg = `Budget ceiling ${deps.formatCost(budgetCeiling)} reached (spent ${deps.formatCost(totalCost)}).`;
if (budgetEnforcementAction === "halt") {
deps.sendDesktopNotification("SF", msg, "error", "budget", basename(s.originalBasePath || s.basePath));
await deps.stopAuto(ctx, pi, "Budget ceiling reached");
debugLog("autoLoop", { phase: "exit", reason: "budget-halt" });
return { action: "break", reason: "budget-halt" };
}
if (budgetEnforcementAction === "pause") {
ctx.ui.notify(`${msg} Pausing autonomous mode — /sf autonomous to override and continue.`, "warning");
deps.sendDesktopNotification("SF", msg, "warning", "budget", basename(s.originalBasePath || s.basePath));
deps.logCmuxEvent(prefs, msg, "warning");
await deps.pauseAuto(ctx, pi);
debugLog("autoLoop", { phase: "exit", reason: "budget-pause" });
return { action: "break", reason: "budget-pause" };
}
ctx.ui.notify(`${msg} Continuing (enforcement: warn).`, "warning");
deps.sendDesktopNotification("SF", msg, "warning", "budget", basename(s.originalBasePath || s.basePath));
deps.logCmuxEvent(prefs, msg, "warning");
}
else if (threshold.pct < 100) {
// Sub-100% — simple notification
const msg = `${threshold.label}: ${deps.formatCost(totalCost)} / ${deps.formatCost(budgetCeiling)}`;
ctx.ui.notify(msg, threshold.notifyLevel);
deps.sendDesktopNotification("SF", msg, threshold.notifyLevel, "budget", basename(s.originalBasePath || s.basePath));
deps.logCmuxEvent(prefs, msg, threshold.cmuxLevel);
}
}
else if (budgetAlertLevel === 0) {
s.lastBudgetAlertLevel = 0;
}
}
else {
s.lastBudgetAlertLevel = 0;
}
// ── UOK Plan-gate ──────────────────────────────────────────────────────────
// Structural validation before the first execute-task unit of a slice:
// confirms the plan files exist and the slice has ≥1 task.
// FailureClass "input" → 0 retries (broken plan needs human fix, not
// an LLM retry). Only fires when uok.gates.enabled is true.
const uokFlagsGuards = resolveUokFlags(prefs);
if (uokFlagsGuards.gates && unitType === "execute-task" && mid && sliceId) {
const taskCounts = getSliceTaskCounts(mid, sliceId);
const isFirstTaskForSlice = taskCounts.done === 0;
if (isFirstTaskForSlice) {
let planGateOutcome = "pass";
let planGateRationale = "";
const roadmapPath = resolveMilestoneFile(s.basePath, mid, "ROADMAP");
if (!roadmapPath || !existsSync(roadmapPath)) {
planGateOutcome = "fail";
planGateRationale = `Milestone roadmap not found for ${mid}`;
}
else {
const slicePlanPath = resolveSliceFile(s.basePath, mid, sliceId, "PLAN");
if (!slicePlanPath || !existsSync(slicePlanPath)) {
planGateOutcome = "fail";
planGateRationale = `Slice plan not found for ${mid}/${sliceId}`;
}
else if (taskCounts.total < 1) {
planGateOutcome = "fail";
planGateRationale = `Slice ${sliceId} has no tasks defined`;
}
}
const planGateRunner = new UokGateRunner();
planGateRunner.register({
id: "plan-gate",
type: "policy",
execute: async () => ({
outcome: planGateOutcome,
failureClass: planGateOutcome === "pass" ? "none" : "input",
rationale: planGateRationale || "Plan files verified",
}),
});
const planGateResult = await planGateRunner.run("plan-gate", {
basePath: s.basePath,
traceId: `guard:${ic.flowId}`,
turnId: `iter-${ic.iteration}`,
milestoneId: mid,
sliceId,
unitType,
unitId,
});
if (planGateResult.outcome !== "pass") {
ctx.ui.notify(`Plan gate failed: ${planGateResult.rationale ?? "invalid plan"}`, "warning");
await deps.pauseAuto(ctx, pi);
return { action: "break", reason: "plan-gate-failed" };
}
}
}
// Context window guard
const contextThreshold = prefs?.context_pause_threshold ?? 0;
if (contextThreshold > 0 && s.cmdCtx) {
const contextUsage = s.cmdCtx.getContextUsage();
if (contextUsage &&
contextUsage.percent !== null &&
contextUsage.percent >= contextThreshold) {
const msg = `Context window at ${contextUsage.percent}% (threshold: ${contextThreshold}%). Pausing to prevent truncated output.`;
ctx.ui.notify(`${msg} Run /sf autonomous to continue (will start fresh session).`, "warning");
deps.sendDesktopNotification("SF", `Context ${contextUsage.percent}% — paused`, "warning", "attention", basename(s.originalBasePath || s.basePath));
await deps.pauseAuto(ctx, pi);
debugLog("autoLoop", { phase: "exit", reason: "context-window" });
return { action: "break", reason: "context-window" };
}
}
// Secrets re-check gate
try {
const manifestStatus = await deps.getManifestStatus(s.basePath, mid, s.originalBasePath);
if (manifestStatus && manifestStatus.pending.length > 0) {
const result = await deps.collectSecretsFromManifest(s.basePath, mid, ctx);
if (result &&
result.applied &&
result.skipped &&
result.existingSkipped) {
ctx.ui.notify(`Secrets collected: ${result.applied.length} applied, ${result.skipped.length} skipped, ${result.existingSkipped.length} already set.`, "info");
}
else {
ctx.ui.notify("Secrets collection skipped.", "info");
}
}
}
catch (err) {
ctx.ui.notify(`Secrets collection error: ${err instanceof Error ? err.message : String(err)}. Continuing with next task.`, "warning");
}
return { action: "next", data: undefined };
}
// ─── runUnitPhase ─────────────────────────────────────────────────────────────
/**
* Phase 4: Unit execution — dispatch prompt, await agent_end, closeout, artifact verify.
* Returns break or next with unitStartedAt for downstream phases.
*/
export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
const { ctx, pi, s, deps, prefs } = ic;
const { unitType, unitId, prompt, state, mid } = iterData;
debugLog("autoLoop", {
phase: "unit-execution",
iteration: ic.iteration,
unitType,
unitId,
});
// ── Worktree health check (#1833, #1843) ────────────────────────────
// ...
if (s.basePath &&
!s.basePath.startsWith("/mock/") &&
unitType === "execute-task") {
const gitMarker = join(s.basePath, ".git");
const hasGit = deps.existsSync(gitMarker);
if (!hasGit) {
const msg = `Worktree health check failed: ${s.basePath} has no .git — refusing to dispatch ${unitType} ${unitId}`;
debugLog("runUnitPhase", {
phase: "worktree-health-fail",
basePath: s.basePath,
hasGit,
});
ctx.ui.notify(msg, "error");
await deps.stopAuto(ctx, pi, msg);
return { action: "break", reason: "worktree-invalid" };
}
const hasProjectFile = PROJECT_FILES.some((f) => deps.existsSync(join(s.basePath, f)));
const hasSrcDir = deps.existsSync(join(s.basePath, "src"));
// Xcode bundles have project-specific names (*.xcodeproj, *.xcworkspace)
// that cannot be matched by exact filename — scan the directory by suffix.
let hasXcodeBundle = false;
try {
const entries = deps.existsSync(s.basePath)
? readdirSync(s.basePath)
: [];
hasXcodeBundle = entries.some((e) => e.endsWith(".xcodeproj") || e.endsWith(".xcworkspace"));
}
catch (err) {
debugLog("runUnitPhase", {
phase: "xcode-bundle-scan-failed",
basePath: s.basePath,
error: String(err),
});
}
// Monorepo support (#2347): if no project files in the worktree directory,
// walk parent directories up to the filesystem root. In monorepos,
// package.json / Cargo.toml etc. live in a parent directory.
let hasProjectFileInParent = false;
if (!hasProjectFile && !hasSrcDir && !hasXcodeBundle) {
let checkDir = dirname(s.basePath);
const { root } = parsePath(checkDir);
while (checkDir !== root) {
// Stop at git repository boundary — ancestors above the repo root
// (e.g. ~ or /usr/local) may contain unrelated project files.
if (deps.existsSync(join(checkDir, ".git")))
break;
if (PROJECT_FILES.some((f) => deps.existsSync(join(checkDir, f)))) {
hasProjectFileInParent = true;
break;
}
checkDir = dirname(checkDir);
}
}
if (!hasProjectFile &&
!hasSrcDir &&
!hasXcodeBundle &&
!hasProjectFileInParent) {
// Greenfield projects won't have project files yet — the first task creates them.
// Log a warning but allow execution to proceed. The .git check above is sufficient
// to ensure we're in a valid working directory.
debugLog("runUnitPhase", {
phase: "worktree-health-warn-greenfield",
basePath: s.basePath,
hasProjectFile,
hasSrcDir,
hasXcodeBundle,
});
ctx.ui.notify(`Warning: ${s.basePath} has no recognized project files — proceeding as greenfield project`, "warning");
}
}
// Detect retry and capture previous tier for escalation
const isPausedUnitResume = s.pausedUnitType === unitType && s.pausedUnitId === unitId;
const isRetry = !!((s.currentUnit &&
s.currentUnit.type === unitType &&
s.currentUnit.id === unitId) ||
isPausedUnitResume);
const previousTier = s.currentUnitRouting?.tier ??
(isPausedUnitResume && unitType === "execute-task"
? "standard"
: undefined);
if (isPausedUnitResume) {
s.pausedUnitType = null;
s.pausedUnitId = null;
}
// Scope workflow-logger buffer to this unit so post-finalize drains are
// per-unit. Without this, the module-level _buffer accumulates across every
// unit in the same Node process (see workflow-logger.ts module header).
_resetLogs();
s.currentUnit = { type: unitType, id: unitId, startedAt: Date.now() };
s.researchTerminalTransition = false;
s.lastGitActionFailure = null;
s.lastGitActionStatus = null;
setCurrentPhase(unitType);
s.lastToolInvocationError = null; // #2883: clear stale error from previous unit
resetToolCallCounts();
resetCompletionNudgeState(unitType, unitId, prefs?.auto_supervisor?.completion_nudge_after);
resetRunawayGuardState(unitType, unitId, {
sessionTokens: collectSessionTokenUsage(ctx),
changedFiles: countChangedFiles(s.basePath),
worktreeFingerprint: collectWorktreeFingerprint(s.basePath),
});
const unitStartSeq = ic.nextSeq();
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: unitStartSeq,
eventType: "unit-start",
data: { unitType, unitId },
});
ctx.ui.notify(`[unit] ${unitType} ${unitId} starting`, "info");
deps.captureAvailableSkills();
writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, {
phase: "dispatched",
wrapupWarningSent: false,
timeoutAt: null,
lastProgressAt: s.currentUnit.startedAt,
progressCount: 0,
lastProgressKind: "dispatch",
recoveryAttempts: 0, // Reset so re-dispatched units get full recovery budget (#2322)
});
// Status bar (widget + preconditions deferred until after model selection — see #2899)
ctx.ui.setStatus("sf-auto", "auto");
if (mid)
deps.updateSliceProgressCache(s.basePath, mid, state.activeSlice?.id);
// ── Safety harness: reset evidence + create checkpoint ──
const safetyConfig = resolveSafetyHarnessConfig(prefs?.safety_harness);
if (safetyConfig.enabled && safetyConfig.evidence_collection) {
resetEvidence();
const { milestone: eMid, slice: eSid, task: eTid } = parseUnitId(unitId);
loadEvidenceFromDisk(s.basePath, eMid, eSid ?? "", eTid ?? "");
}
if (safetyConfig.enabled &&
safetyConfig.file_change_validation &&
unitType === "execute-task") {
s.preUnitDirtyFiles = getDirtyFiles(s.basePath);
}
else {
s.preUnitDirtyFiles = [];
}
// Only checkpoint code-executing units (not lifecycle/planning units)
if (safetyConfig.enabled &&
safetyConfig.checkpoints &&
unitType === "execute-task") {
s.checkpointSha = createCheckpoint(s.basePath, unitId);
if (s.checkpointSha) {
debugLog("runUnitPhase", {
phase: "checkpoint-created",
unitId,
sha: s.checkpointSha.slice(0, 8),
});
}
}
// Prompt injection
let finalPrompt = prompt;
if (s.pendingVerificationRetry) {
const retryCtx = s.pendingVerificationRetry;
s.pendingVerificationRetry = null;
const capped = retryCtx.failureContext.length > MAX_RECOVERY_CHARS
? retryCtx.failureContext.slice(0, MAX_RECOVERY_CHARS) +
"\n\n[...failure context truncated]"
: retryCtx.failureContext;
finalPrompt = `**VERIFICATION FAILED — AUTO-FIX ATTEMPT ${retryCtx.attempt}**\n\nThe verification gate ran after your previous attempt and found failures. Fix these issues before completing the task.\n\n${capped}\n\n---\n\n${finalPrompt}`;
}
if (s.pendingCrashRecovery) {
const capped = s.pendingCrashRecovery.length > MAX_RECOVERY_CHARS
? s.pendingCrashRecovery.slice(0, MAX_RECOVERY_CHARS) +
"\n\n[...recovery briefing truncated to prevent memory exhaustion]"
: s.pendingCrashRecovery;
finalPrompt = `${capped}\n\n---\n\n${finalPrompt}`;
s.pendingCrashRecovery = null;
}
else if (s.pendingRethinkAttempt) {
// Stuck recovery: inject diagnostic + rethink prompt, then clear.
let rethinkCtx = null;
try {
rethinkCtx = JSON.parse(s.pendingRethinkAttempt);
}
catch {
// Malformed JSON — skip injection
}
s.pendingRethinkAttempt = null;
if (rethinkCtx) {
const isFinal = rethinkCtx.attempt === 5;
const lines = [
isFinal
? `**⚠ FINAL STUCK ATTEMPT (5 of 5) — You have run out of recovery attempts. Make this count.**`
: `**STUCK RECOVERY — Rethink attempt ${rethinkCtx.attempt - 1} of 4.**`,
"",
`You have been repeatedly stuck on **${rethinkCtx.unitType} ${rethinkCtx.unitId}** for reason: "${rethinkCtx.reason}".`,
"",
"Before continuing, you must reflect on the following:",
"",
"1. **What specific error or failure pattern are you seeing?**",
"2. **What assumption are you making that might be wrong?**",
"3. **What is ONE concrete, different approach you will try this time?**",
"",
"Do NOT repeat the same approach. Identify the root cause and try a genuinely different strategy.",
];
if (rethinkCtx.stuckDiag) {
lines.push("", `**What was expected:** ${rethinkCtx.stuckDiag}`);
}
if (rethinkCtx.remediation) {
lines.push("", `**Suggested remediation:**\n${rethinkCtx.remediation}`);
}
if (rethinkCtx.diagnostic) {
lines.push("", `**Full diagnostic from previous attempt:**\n${rethinkCtx.diagnostic}`);
}
lines.push("", "---", "", finalPrompt);
finalPrompt = lines.join("\n");
}
}
else if ((s.unitDispatchCount.get(`${unitType}/${unitId}`) ?? 0) > 1) {
const diagnostic = deps.getDeepDiagnostic(s.basePath);
if (diagnostic) {
const cappedDiag = diagnostic.length > MAX_RECOVERY_CHARS
? diagnostic.slice(0, MAX_RECOVERY_CHARS) +
"\n\n[...diagnostic truncated to prevent memory exhaustion]"
: diagnostic;
finalPrompt = `**RETRY — your previous attempt did not produce the required artifact.**\n\nDiagnostic from previous attempt:\n${cappedDiag}\n\nFix whatever went wrong and make sure you write the required file this time.\n\n---\n\n${finalPrompt}`;
}
}
// Prompt char measurement
s.lastPromptCharCount = finalPrompt.length;
s.lastBaselineCharCount = undefined;
if (deps.isDbAvailable()) {
try {
const { inlineSfRootFile } = await importExtensionModule(import.meta.url, "../auto-prompts.js");
const [decisionsContent, requirementsContent, projectContent] = await Promise.all([
inlineSfRootFile(s.basePath, "decisions.md", "Decisions"),
inlineSfRootFile(s.basePath, "requirements.md", "Requirements"),
inlineSfRootFile(s.basePath, "project.md", "Project"),
]);
s.lastBaselineCharCount =
(decisionsContent?.length ?? 0) +
(requirementsContent?.length ?? 0) +
(projectContent?.length ?? 0);
}
catch (e) {
logWarning("engine", "Baseline char count measurement failed", {
error: String(e),
});
}
}
// Cache-optimize prompt section ordering
try {
finalPrompt = deps.reorderForCaching(finalPrompt);
}
catch (reorderErr) {
const msg = reorderErr instanceof Error ? reorderErr.message : String(reorderErr);
logWarning("engine", "Prompt reorder failed", { error: msg });
}
// Select and apply model (with tier escalation on retry — normal units only)
const modelResult = await deps.selectAndApplyModel(ctx, pi, unitType, unitId, s.basePath, prefs, s.verbose, s.autoModeStartModel, sidecarItem ? undefined : { isRetry, previousTier }, undefined, s.manualSessionModelOverride, s.autoModeStartThinkingLevel);
s.currentUnitRouting =
modelResult.routing;
s.currentUnitModel =
modelResult.appliedModel;
// updateProgressWidget( (decoy for legacy regex tests)
// Apply sidecar/pre-dispatch hook model override (takes priority over standard model selection)
const hookModelOverride = sidecarItem?.model ?? iterData.hookModelOverride;
if (hookModelOverride) {
const availableModels = ctx.modelRegistry.getAvailable();
const match = deps.resolveModelId(hookModelOverride, availableModels, ctx.model?.provider);
if (match) {
const ok = await pi.setModel(match, {
persist: resolvePersistModelChanges(),
});
if (ok) {
if (s.autoModeStartThinkingLevel) {
pi.setThinkingLevel(s.autoModeStartThinkingLevel);
}
s.currentUnitModel = match;
ctx.ui.notify(`Hook model override: ${match.provider}/${match.id}`, "info");
}
else {
ctx.ui.notify(`Hook model "${hookModelOverride}" found but setModel failed. Using default.`, "warning");
}
}
else {
ctx.ui.notify(`Hook model "${hookModelOverride}" not found in available models. Falling back to current session model. ` +
`Ensure the model is defined in models.json and has auth configured.`, "warning");
}
}
// Store the final dispatched model ID so the dashboard can read it (#2899).
// This accounts for hook model overrides applied after selectAndApplyModel.
s.currentDispatchedModelId = s.currentUnitModel
? `${s.currentUnitModel.provider ?? ""}/${s.currentUnitModel.id ?? ""}`
: null;
const compatibilityError = getWorkflowTransportSupportError(s.currentUnitModel?.provider ?? ctx.model?.provider, getRequiredWorkflowToolsForAutoUnit(unitType), {
projectRoot: s.basePath,
surface: "auto-mode",
unitType,
authMode: s.currentUnitModel?.provider
? ctx.modelRegistry.getProviderAuthMode(s.currentUnitModel.provider)
: ctx.model?.provider
? ctx.modelRegistry.getProviderAuthMode(ctx.model.provider)
: undefined,
baseUrl: s.currentUnitModel?.baseUrl ?? ctx.model?.baseUrl,
});
if (compatibilityError) {
ctx.ui.notify(compatibilityError, "error");
await deps.stopAuto(ctx, pi, compatibilityError);
return { action: "break", reason: "workflow-capability" };
}
// Progress widget + preconditions — deferred to after model selection so the
// widget's first render tick shows the correct model (#2899).
deps.updateProgressWidget(ctx, unitType, unitId, state); // updateProgressWidget(
deps.ensurePreconditions(unitType, unitId, s.basePath, state);
// Start unit supervision
deps.clearUnitTimeout();
deps.startUnitSupervision({
s,
ctx,
pi,
unitType,
unitId,
prefs,
buildSnapshotOpts: () => deps.buildSnapshotOpts(unitType, unitId),
buildRecoveryContext: () => ({
basePath: s.basePath,
verbose: s.verbose,
currentUnitStartedAt: s.currentUnit?.startedAt ?? Date.now(),
unitRecoveryCount: s.unitRecoveryCount,
}),
pauseAuto: deps.pauseAuto,
});
// Write preliminary lock (no session path yet — runUnit creates a new session).
// Crash recovery can still identify the in-flight unit from this lock.
deps.writeLock(deps.lockBase(), unitType, unitId);
debugLog("autoLoop", {
phase: "runUnit-start",
iteration: ic.iteration,
unitType,
unitId,
});
const unitResult = await runUnit(ctx, pi, s, unitType, unitId, finalPrompt);
s.lastUnitAgentEndMessages = unitResult.event?.messages ?? null;
debugLog("autoLoop", {
phase: "runUnit-end",
iteration: ic.iteration,
unitType,
unitId,
status: unitResult.status,
});
// Now that runUnit has called newSession(), the session file path is correct.
const sessionFile = deps.getSessionFile(ctx);
const sessionId = sessionFile ? basename(sessionFile) : undefined;
deps.updateSessionLock(deps.lockBase(), unitType, unitId, sessionFile, sessionId);
deps.writeLock(deps.lockBase(), unitType, unitId, sessionFile);
// Tag the most recent window entry with error info for stuck detection
const lastEntry = loopState.recentUnits[loopState.recentUnits.length - 1];
if (lastEntry) {
if (unitResult.errorContext) {
lastEntry.error =
`${unitResult.errorContext.category}:${unitResult.errorContext.message}`.slice(0, 200);
}
else if (unitResult.status === "error" ||
unitResult.status === "cancelled") {
lastEntry.error = `${unitResult.status}:${unitType}/${unitId}`;
}
else if (unitResult.event?.messages?.length) {
const lastMsg = unitResult.event.messages[unitResult.event.messages.length - 1];
const msgStr = typeof lastMsg === "string" ? lastMsg : JSON.stringify(lastMsg);
if (/error|fail|exception/i.test(msgStr)) {
lastEntry.error = msgStr.slice(0, 200);
}
}
}
if (unitResult.status === "cancelled") {
clearDeferredCommitAfterCancelledUnit(s, ctx, unitType, unitId, unitResult.errorContext?.message ?? "cancelled");
// Provider-error pause: pauseAuto already handled cleanup and scheduled
// recovery. Don't hard-stop — just break out of the loop (#2762).
if (unitResult.errorContext?.category === "provider") {
await emitCancelledUnitEnd(ic, unitType, unitId, unitStartSeq, unitResult.errorContext);
debugLog("autoLoop", {
phase: "exit",
reason: "provider-pause",
isTransient: unitResult.errorContext.isTransient,
});
return { action: "break", reason: "provider-pause" };
}
// Timeout category covers two distinct scenarios:
// 1. Session creation timeout (120s) — transient, auto-resume with backoff
// 2. Unit hard timeout (30min+) — stuck agent, pause for manual review
// Structural errors (TypeError, is not a function) are NOT transient
// and must hard-stop to avoid infinite retry loops.
if (unitResult.errorContext?.isTransient &&
unitResult.errorContext?.category === "timeout") {
// Session-timeout cancellations are resumable pauses: pauseAuto below preserves the auto session
// instead of routing the cancelled unit into the hard-stop path.
const isSessionCreationTimeout = unitResult.errorContext.message?.includes("Session creation timed out");
if (isSessionCreationTimeout) {
consecutiveSessionTimeouts += 1;
const baseRetryAfterMs = 30_000;
const retryAfterMs = baseRetryAfterMs * 2 ** Math.max(0, consecutiveSessionTimeouts - 1);
const allowAutoResume = consecutiveSessionTimeouts <= MAX_SESSION_TIMEOUT_AUTO_RESUMES;
if (!allowAutoResume) {
ctx.ui.notify(`Session creation timed out ${consecutiveSessionTimeouts} consecutive times for ${unitType} ${unitId}. Pausing for manual review.`, "warning");
}
debugLog("autoLoop", {
phase: "session-timeout-pause",
unitType,
unitId,
consecutiveSessionTimeouts,
retryAfterMs,
allowAutoResume,
});
const errorDetail = ` for ${unitType} ${unitId}`;
await pauseAutoForProviderError(ctx.ui, errorDetail, () => deps.pauseAuto(ctx, pi), {
isRateLimit: false,
isTransient: allowAutoResume,
retryAfterMs,
resume: allowAutoResume
? () => {
void resumeAutoAfterProviderDelay(pi, ctx).catch((err) => {
const message = err instanceof Error ? err.message : String(err);
ctx.ui.notify(`Session timeout recovery failed: ${message}`, "error");
});
}
: undefined,
});
if (!allowAutoResume) {
resetConsecutiveSessionTimeouts();
}
await emitCancelledUnitEnd(ic, unitType, unitId, unitStartSeq, unitResult.errorContext);
return { action: "break", reason: "session-timeout" };
}
// Unit hard timeout (30min+): pause without auto-resume — stuck agent
ctx.ui.notify(`Unit timed out for ${unitType} ${unitId} (supervision may have failed). Pausing auto-mode.`, "warning");
debugLog("autoLoop", {
phase: "unit-hard-timeout-pause",
unitType,
unitId,
});
await deps.pauseAuto(ctx, pi);
await emitCancelledUnitEnd(ic, unitType, unitId, unitStartSeq, unitResult.errorContext);
return { action: "break", reason: "unit-hard-timeout" };
}
// All other cancelled states (structural errors, non-transient failures): hard stop
if (s.currentUnit) {
await deps.closeoutUnit(ctx, s.basePath, unitType, unitId, s.currentUnit.startedAt, deps.buildSnapshotOpts(unitType, unitId));
}
await emitCancelledUnitEnd(ic, unitType, unitId, unitStartSeq, unitResult.errorContext);
ctx.ui.notify(`Session creation failed for ${unitType} ${unitId}: ${unitResult.errorContext?.message ?? "unknown"}. Stopping auto-mode.`, "warning");
await deps.stopAuto(ctx, pi, `Session creation failed: ${unitResult.errorContext?.message ?? "unknown"}`);
debugLog("autoLoop", { phase: "exit", reason: "session-failed" });
return { action: "break", reason: "session-failed" };
}
// ── Immediate unit closeout (metrics, activity log, memory) ────────
// Run right after runUnit() returns so telemetry is never lost to a
// crash between iterations.
// Guard: stopAuto() may have nulled s.currentUnit via s.reset() while
// this coroutine was suspended at `await runUnit(...)` (#2939).
if (s.currentUnit) {
// Reset session timeout counter — any successful unit clears the slate
resetConsecutiveSessionTimeouts();
await deps.closeoutUnit(ctx, s.basePath, unitType, unitId, s.currentUnit.startedAt, deps.buildSnapshotOpts(unitType, unitId));
}
// ── Zero tool-call guard (#1833, #2653) ──────────────────────────
// Any unit that completes with 0 tool calls made no real progress —
// likely context exhaustion where all tool calls errored out. Treat
// as failed so the unit is retried in a fresh context instead of
// silently passing through to artifact verification (which loops
// forever when the unit never produced its artifact).
{
const currentLedger = deps.getLedger();
if (currentLedger?.units) {
const lastUnit = [...currentLedger.units]
.reverse()
.find((u) => u.type === unitType &&
u.id === unitId &&
u.startedAt === s.currentUnit?.startedAt);
if (lastUnit && lastUnit.toolCalls === 0) {
if (USER_DRIVEN_DEEP_UNITS.has(unitType) &&
isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined)) {
debugLog("runUnitPhase", {
phase: "zero-tool-calls-awaiting-user-input",
unitType,
unitId,
});
}
else {
debugLog("runUnitPhase", {
phase: "zero-tool-calls",
unitType,
unitId,
warning: "Unit completed with 0 tool calls — likely context exhaustion, marking as failed",
});
ctx.ui.notify(`${unitType} ${unitId} completed with 0 tool calls — context exhaustion, will retry`, "warning");
recordLearningOutcomeForUnit(ic, unitType, unitId, s.currentUnit?.startedAt, {
succeeded: false,
verificationPassed: null,
});
// Fall through to next iteration where dispatch will re-derive
// and re-dispatch this unit.
return {
action: "next",
data: {
unitStartedAt: s.currentUnit?.startedAt,
requestDispatchedAt: unitResult.requestDispatchedAt,
},
};
}
}
}
}
if (s.currentUnitRouting) {
deps.recordOutcome(unitType, s.currentUnitRouting.tier, true);
}
const skipArtifactVerification = shouldSkipArtifactVerification(unitType);
let artifactVerified;
if (USER_DRIVEN_DEEP_UNITS.has(unitType) &&
isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined)) {
// Skip artifact verification — unit is paused waiting for user input
artifactVerified = false;
}
else {
artifactVerified =
skipArtifactVerification ||
verifyExpectedArtifact(unitType, unitId, s.basePath);
}
if (artifactVerified) {
s.unitDispatchCount.delete(`${unitType}/${unitId}`);
s.unitRecoveryCount.delete(`${unitType}/${unitId}`);
}
// Write phase handoff anchor after successful research/planning completion
const anchorPhases = new Set([
"research-milestone",
"research-slice",
"plan-milestone",
"plan-slice",
]);
if (artifactVerified && mid && anchorPhases.has(unitType)) {
try {
const { writePhaseAnchor } = await import("../phase-anchor.js");
writePhaseAnchor(s.basePath, mid, {
phase: unitType,
milestoneId: mid,
generatedAt: new Date().toISOString(),
intent: `Completed ${unitType} for ${unitId}`,
decisions: [],
blockers: [],
nextSteps: [],
});
}
catch (err) {
/* non-fatal — anchor is advisory */
logWarning("engine", `phase anchor failed: ${err instanceof Error ? err.message : String(err)}`);
}
}
if (unitResult.status !== "completed" || !artifactVerified) {
recordLearningOutcomeForUnit(ic, unitType, unitId, s.currentUnit?.startedAt, {
succeeded: false,
verificationPassed: null,
});
}
{
// Pull cost/token data from the ledger entry that snapshotUnitMetrics
// already wrote so the unit-end event carries billing context.
const unitEndLedger = deps.getLedger();
const unitEndEntry = unitEndLedger?.units
? [...unitEndLedger.units]
.reverse()
.find((u) => u.type === unitType &&
u.id === unitId &&
u.startedAt === s.currentUnit?.startedAt)
: undefined;
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "unit-end",
data: {
unitType,
unitId,
status: unitResult.status,
artifactVerified,
...(unitEndEntry
? {
cost_usd: unitEndEntry.cost,
tokens: unitEndEntry.tokens.total,
tokens_input: unitEndEntry.tokens.input,
tokens_output: unitEndEntry.tokens.output,
}
: {}),
...(unitResult.errorContext
? { errorContext: unitResult.errorContext }
: {}),
},
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
});
}
{
const verdict = unitResult.status === "completed"
? artifactVerified
? "success"
: "blocked"
: unitResult.status === "error"
? "fail"
: unitResult.status;
const ledger = deps.getLedger();
const unitEntry = ledger?.units
? [...ledger.units]
.reverse()
.find((u) => u.type === unitType &&
u.id === unitId &&
u.startedAt === s.currentUnit?.startedAt)
: undefined;
if (unitEntry) {
const costStr = deps.formatCost(unitEntry.cost);
ctx.ui.notify(`[unit] ${unitType} ${unitId} ended -> ${verdict} (${costStr}, ${unitEntry.tokens.total} tokens, ${unitEntry.toolCalls} tool calls)`, "info");
}
else {
ctx.ui.notify(`[unit] ${unitType} ${unitId} ended -> ${verdict}`, "info");
}
const toolSummary = formatToolCallSummary();
if (toolSummary) {
ctx.ui.notify(`[mcp] ${toolSummary}`, "info");
}
}
// ── Safety harness: checkpoint cleanup or rollback ──
if (s.checkpointSha) {
if (unitResult.status === "error" && safetyConfig.auto_rollback) {
const rolled = rollbackToCheckpoint(s.basePath, unitId, s.checkpointSha);
if (rolled) {
ctx.ui.notify(`Rolled back to pre-unit checkpoint for ${unitId}`, "info");
debugLog("runUnitPhase", { phase: "checkpoint-rollback", unitId });
}
}
else if (unitResult.status === "error") {
ctx.ui.notify(`Unit ${unitId} failed. Pre-unit checkpoint available at ${s.checkpointSha.slice(0, 8)}`, "warning");
}
else {
// Success — clean up checkpoint ref
cleanupCheckpoint(s.basePath, unitId);
debugLog("runUnitPhase", { phase: "checkpoint-cleaned", unitId });
}
s.checkpointSha = null;
}
s.preUnitDirtyFiles = [];
return {
action: "next",
data: {
unitStartedAt: s.currentUnit?.startedAt,
requestDispatchedAt: unitResult.requestDispatchedAt,
},
};
}
// ─── runFinalize ──────────────────────────────────────────────────────────────
/**
* Phase 5: Post-unit finalize — pre/post verification, UAT pause, step-wizard.
* Returns break/continue/next to control the outer loop.
*/
export async function runFinalize(ic, iterData, loopState, sidecarItem) {
const { ctx, pi, s, deps } = ic;
const { pauseAfterUatDispatch } = iterData;
debugLog("autoLoop", { phase: "finalize", iteration: ic.iteration });
// Clear unit timeout (unit completed)
deps.clearUnitTimeout();
// Post-unit context for pre/post verification
const postUnitCtx = {
s,
ctx,
pi,
buildSnapshotOpts: deps.buildSnapshotOpts,
lockBase: deps.lockBase,
stopAuto: deps.stopAuto,
pauseAuto: deps.pauseAuto,
updateProgressWidget: deps.updateProgressWidget,
};
// Pre-verification processing (commit, doctor, state rebuild, etc.)
// Timeout guard: if postUnitPreVerification hangs (e.g., safety harness
// deadlock, browser teardown hang, worktree sync stall), force-continue
// after timeout so the auto-loop is not permanently frozen (#3757).
//
// On timeout, null out s.currentUnit so the timed-out task's late async
// mutations are harmless — postUnitPreVerification guards all side effects
// behind `if (s.currentUnit)`. The next iteration sets a fresh currentUnit.
// Sidecar items use lightweight pre-verification opts
const preVerificationOpts = sidecarItem
? sidecarItem.kind === "hook"
? {
skipSettleDelay: true,
skipWorktreeSync: true,
agentEndMessages: s.lastUnitAgentEndMessages ?? undefined,
}
: {
skipSettleDelay: true,
agentEndMessages: s.lastUnitAgentEndMessages ?? undefined,
}
: { agentEndMessages: s.lastUnitAgentEndMessages ?? undefined };
const _preUnitSnapshot = s.currentUnit
? {
type: s.currentUnit.type,
id: s.currentUnit.id,
startedAt: s.currentUnit.startedAt,
}
: null;
const preResultGuard = await withTimeout(deps.postUnitPreVerification(postUnitCtx, preVerificationOpts), FINALIZE_PRE_TIMEOUT_MS, "postUnitPreVerification");
if (preResultGuard.timedOut) {
// Detach session from the timed-out unit so late async completions
// cannot mutate state for the next unit (#3757).
const hadStagedPending = s.stagedPendingCommit;
const hadCommitted = s.lastGitActionStatus === "ok";
s.stagedPendingCommit = false; // prevent orphaned deferred commit
s.currentUnit = null;
clearCurrentPhase();
// Drop any logger entries from the timed-out unit so they don't bleed
// into the next iteration's drain.
drainLogs();
loopState.consecutiveFinalizeTimeouts++;
if (hadStagedPending) {
ctx.ui.notify("postUnitPreVerification timed out with staged-but-uncommitted changes — staged files will be included in next unit's commit.", "warning");
logWarning("engine", "finalize-timeout: staged-pending-commit orphaned — will be absorbed by next unit");
}
else if (hadCommitted) {
ctx.ui.notify("postUnitPreVerification timed out after git commit — changes are in history but verification was skipped.", "warning");
logWarning("engine", "finalize-timeout: git commit completed before timeout — verification was not run");
}
debugLog("autoLoop", {
phase: "pre-verification-timeout",
iteration: ic.iteration,
unitType: iterData.unitType,
unitId: iterData.unitId,
consecutiveTimeouts: loopState.consecutiveFinalizeTimeouts,
});
if (loopState.consecutiveFinalizeTimeouts >= MAX_FINALIZE_TIMEOUTS) {
ctx.ui.notify(`postUnitPreVerification timed out ${loopState.consecutiveFinalizeTimeouts} consecutive times — stopping auto-mode to prevent budget waste`, "error");
await deps.stopAuto(ctx, pi, `${loopState.consecutiveFinalizeTimeouts} consecutive finalize timeouts`);
return { action: "break", reason: "finalize-timeout-escalation" };
}
ctx.ui.notify(`postUnitPreVerification timed out after ${FINALIZE_PRE_TIMEOUT_MS / 1000}s for ${iterData.unitType} ${iterData.unitId} (${loopState.consecutiveFinalizeTimeouts}/${MAX_FINALIZE_TIMEOUTS}) — continuing to next iteration`, "warning");
return { action: "next", data: undefined };
}
const preResult = preResultGuard.value;
if (preResult === "dispatched") {
const dispatchedReason = s.lastGitActionFailure
? "git-closeout-failure"
: "pre-verification-dispatched";
debugLog("autoLoop", {
phase: "exit",
reason: dispatchedReason,
gitError: s.lastGitActionFailure ?? undefined,
});
return { action: "break", reason: dispatchedReason };
}
if (preResult === "retry") {
if (sidecarItem) {
// Sidecar artifact retries are skipped — just continue
debugLog("autoLoop", {
phase: "sidecar-artifact-retry-skipped",
iteration: ic.iteration,
});
}
else {
// s.pendingVerificationRetry was set by postUnitPreVerification.
// Emit a dedicated journal event so forensics can distinguish bounded
// verification retries from genuine stuck-loop dispatch repetitions (#4540).
const retryInfo = s.pendingVerificationRetry;
deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "artifact-verification-retry",
data: {
unitType: _preUnitSnapshot?.type,
unitId: retryInfo?.unitId,
attempt: retryInfo?.attempt,
},
});
// Continue the loop — next iteration will inject the retry context into the prompt.
debugLog("autoLoop", {
phase: "artifact-verification-retry",
iteration: ic.iteration,
});
return { action: "continue" };
}
}
if (pauseAfterUatDispatch) {
ctx.ui.notify("UAT requires human execution. Auto-mode will pause after this unit writes the result file.", "info");
await deps.pauseAuto(ctx, pi);
debugLog("autoLoop", { phase: "exit", reason: "uat-pause" });
return { action: "break", reason: "uat-pause" };
}
// Verification gate
// Hook sidecar items skip verification entirely.
// Non-hook sidecar items run verification but skip retries (just continue).
const skipVerification = sidecarItem?.kind === "hook";
const uokFlagsFinalize = resolveUokFlags(ic.prefs);
const runVerifyGate = uokFlagsFinalize.gates &&
iterData.unitType === "execute-task" &&
!skipVerification;
if (!skipVerification) {
if (runVerifyGate) {
const vgRunner = new UokGateRunner();
vgRunner.register({
id: "unit-verification-gate",
type: "verification",
execute: async () => {
const result = await deps.runPostUnitVerification({ s, ctx, pi }, deps.pauseAuto);
if (result === "pause") {
return {
outcome: "fail",
failureClass: "manual-attention",
rationale: "Post-unit verification paused — requires human attention",
};
}
if (result === "retry") {
return {
outcome: "fail",
failureClass: "verification",
rationale: "Post-unit verification failed — retrying unit",
};
}
return {
outcome: "pass",
failureClass: "none",
rationale: "Post-unit verification passed",
};
},
});
const gateResult = await vgRunner.run("unit-verification-gate", {
basePath: s.basePath,
traceId: `finalize:${ic.flowId}`,
turnId: `iter-${ic.iteration}`,
milestoneId: iterData.mid ?? undefined,
unitType: iterData.unitType,
unitId: iterData.unitId,
});
if (gateResult.outcome !== "pass") {
recordLearningOutcomeForUnit(ic, iterData.unitType, iterData.unitId, s.currentUnit?.startedAt, {
succeeded: false,
verificationPassed: false,
});
const reason = gateResult.failureClass === "manual-attention"
? "verification-pause"
: "verification-fail";
debugLog("autoLoop", { phase: "exit", reason });
return { action: "break", reason };
}
}
else {
const verificationResult = await deps.runPostUnitVerification({ s, ctx, pi }, deps.pauseAuto);
if (verificationResult === "pause") {
recordLearningOutcomeForUnit(ic, iterData.unitType, iterData.unitId, s.currentUnit?.startedAt, {
succeeded: false,
verificationPassed: false,
});
debugLog("autoLoop", {
phase: "exit",
reason: "verification-pause",
});
return { action: "break", reason: "verification-pause" };
}
if (verificationResult === "retry") {
recordLearningOutcomeForUnit(ic, iterData.unitType, iterData.unitId, s.currentUnit?.startedAt, {
succeeded: false,
verificationPassed: false,
});
if (sidecarItem) {
// Sidecar verification retries are skipped — just continue
debugLog("autoLoop", {
phase: "sidecar-verification-retry-skipped",
iteration: ic.iteration,
});
}
else {
// s.pendingVerificationRetry was set by runPostUnitVerification.
// Continue the loop — next iteration will inject the retry context into the prompt.
debugLog("autoLoop", {
phase: "verification-retry",
iteration: ic.iteration,
});
return { action: "continue" };
}
}
}
}
// Post-verification processing (DB dual-write, hooks, triage, quick-tasks)
// Timeout guard: if postUnitPostVerification hangs (e.g., module import
// deadlock, SQLite transaction hang), force-continue after timeout so the
// auto-loop is not permanently frozen (#2344).
const postResultGuard = await withTimeout(deps.postUnitPostVerification(postUnitCtx), FINALIZE_POST_TIMEOUT_MS, "postUnitPostVerification");
if (postResultGuard.timedOut) {
// Detach session from the timed-out unit so late async completions
// cannot mutate state for the next unit (#3757).
s.currentUnit = null;
clearCurrentPhase();
// Drop any logger entries from the timed-out unit so they don't bleed
// into the next iteration's drain.
drainLogs();
loopState.consecutiveFinalizeTimeouts++;
debugLog("autoLoop", {
phase: "post-verification-timeout",
iteration: ic.iteration,
unitType: iterData.unitType,
unitId: iterData.unitId,
consecutiveTimeouts: loopState.consecutiveFinalizeTimeouts,
});
if (loopState.consecutiveFinalizeTimeouts >= MAX_FINALIZE_TIMEOUTS) {
ctx.ui.notify(`postUnitPostVerification timed out ${loopState.consecutiveFinalizeTimeouts} consecutive times — stopping auto-mode to prevent budget waste`, "error");
await deps.stopAuto(ctx, pi, `${loopState.consecutiveFinalizeTimeouts} consecutive finalize timeouts`);
return { action: "break", reason: "finalize-timeout-escalation" };
}
ctx.ui.notify(`postUnitPostVerification timed out after ${FINALIZE_POST_TIMEOUT_MS / 1000}s for ${iterData.unitType} ${iterData.unitId} (${loopState.consecutiveFinalizeTimeouts}/${MAX_FINALIZE_TIMEOUTS}) — continuing to next iteration`, "warning");
return { action: "next", data: undefined };
}
const postResult = postResultGuard.value;
if (postResult === "stopped") {
debugLog("autoLoop", {
phase: "exit",
reason: "post-verification-stopped",
});
return { action: "break", reason: "post-verification-stopped" };
}
if (postResult === "step-wizard") {
// Step mode — exit the loop (caller handles wizard)
debugLog("autoLoop", { phase: "exit", reason: "step-wizard" });
return { action: "break", reason: "step-wizard" };
}
// Both pre and post verification completed without timeout — reset counter
loopState.consecutiveFinalizeTimeouts = 0;
// Surface accumulated workflow-logger issues for this unit to the user.
// Warnings/errors logged during the unit are buffered in the logger and
// drained here so the user sees a single consolidated post-unit alert.
const finalizedArtifactVerified = shouldSkipArtifactVerification(iterData.unitType) ||
verifyExpectedArtifact(iterData.unitType, iterData.unitId, s.basePath);
if (finalizedArtifactVerified) {
recordLearningOutcomeForUnit(ic, iterData.unitType, iterData.unitId, s.currentUnit?.startedAt, {
succeeded: true,
verificationPassed: iterData.unitType === "execute-task" ? true : null,
});
// Clear the runtime unit record so it does not linger as a phantom
// "dispatched" unit across session restarts (#sf-moqv2k4g-kbg2nq).
clearUnitRuntimeRecord(s.basePath, iterData.unitType, iterData.unitId);
// Evict this unit from stuck-state recentUnits so a completed unit
// does not pollute the sliding window on restart.
const unitKey = `${iterData.unitType}/${iterData.unitId}`;
const prevLen = loopState.recentUnits.length;
loopState.recentUnits = loopState.recentUnits.filter((u) => u.key !== unitKey);
if (loopState.recentUnits.length < prevLen && loopState.stuckRecoveryAttempts > 0) {
loopState.stuckRecoveryAttempts = 0;
}
}
if (hasAnyIssues()) {
const { logs } = drainAndSummarize();
if (logs.length > 0) {
const severity = logs.some((e) => e.severity === "error")
? "error"
: "warning";
ctx.ui.notify(formatForNotification(logs), severity, {
kind: severity === "error" ? "notice" : "progress",
source: "workflow-logger",
dedupe_key: `workflow-issues:${iterData.unitType}:${iterData.unitId}`,
});
}
}
return { action: "next", data: undefined };
}
// ─── GAP-12: exported alias ───────────────────────────────────────────────────
export const resetSessionTimeoutState = resetConsecutiveSessionTimeouts;