485 lines
16 KiB
JavaScript
485 lines
16 KiB
JavaScript
/**
|
||
* auto/phases-dispatch.js — runDispatch phase.
|
||
*/
|
||
import { basename, dirname, join, parse as parsePath } from "node:path";
|
||
import { importExtensionModule } from "@singularity-forge/coding-agent";
|
||
import {
|
||
clearCurrentPhase,
|
||
setCurrentPhase,
|
||
} from "../../shared/sf-phase-state.js";
|
||
import { atomicWriteSync } from "../atomic-write.js";
|
||
import { resetCompletionNudgeState } from "../auto-completion-nudge.js";
|
||
import {
|
||
isAwaitingUserInput,
|
||
USER_DRIVEN_DEEP_UNITS,
|
||
} from "../auto-post-unit.js";
|
||
import {
|
||
buildLoopRemediationSteps,
|
||
diagnoseExpectedArtifact,
|
||
verifyExpectedArtifact,
|
||
} from "../auto-recovery.js";
|
||
import {
|
||
formatToolCallSummary,
|
||
resetToolCallCounts,
|
||
} from "../auto-tool-tracking.js";
|
||
import {
|
||
appendAutonomousSolverCheckpoint,
|
||
assessAutonomousSolverTurn,
|
||
beginAutonomousSolverIteration,
|
||
buildAutonomousSolverMissingCheckpointRepairPrompt,
|
||
buildAutonomousSolverPromptBlock,
|
||
buildAutonomousSolverSteeringPromptBlock,
|
||
classifyAutonomousSolverMissingCheckpointFailure,
|
||
consumePendingAutonomousSolverSteering,
|
||
getConfiguredAutonomousSolverMaxIterations,
|
||
recordAutonomousSolverMissingCheckpointRetry,
|
||
} from "../autonomous-solver.js";
|
||
import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.js";
|
||
import { debugLog } from "../debug-logger.js";
|
||
import { PROJECT_FILES } from "../detection.js";
|
||
import { MergeConflictError } from "../git-service.js";
|
||
import { recordLearnedOutcome } from "../learning/runtime.js";
|
||
import { sfRoot } from "../paths.js";
|
||
import { resolvePersistModelChanges } from "../preferences.js";
|
||
import {
|
||
approveProductionMutationWithLlmPolicy,
|
||
ensureProductionMutationApprovalTemplate,
|
||
readProductionMutationApprovalStatus,
|
||
} from "../production-mutation-approval.js";
|
||
import { pauseAutoForProviderError } from "../provider-error-pause.js";
|
||
import {
|
||
buildReasoningAssistPrompt,
|
||
injectReasoningGuidance,
|
||
isReasoningAssistEnabled,
|
||
} from "../reasoning-assist.js";
|
||
import {
|
||
loadEvidenceFromDisk,
|
||
resetEvidence,
|
||
} from "../safety/evidence-collector.js";
|
||
import { getDirtyFiles } from "../safety/file-change-validator.js";
|
||
import {
|
||
cleanupCheckpoint,
|
||
createCheckpoint,
|
||
rollbackToCheckpoint,
|
||
} from "../safety/git-checkpoint.js";
|
||
import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
|
||
import { recordSelfFeedback } from "../self-feedback.js";
|
||
import {
|
||
checkpointWal,
|
||
getMilestoneSlices,
|
||
getSliceTaskCounts,
|
||
getTask,
|
||
isDbAvailable,
|
||
} from "../sf-db.js";
|
||
import { getEligibleSlices } from "../slice-parallel-eligibility.js";
|
||
import { startSliceParallel } from "../slice-parallel-orchestrator.js";
|
||
import { handleProductAudit } from "../tools/product-audit-tool.js";
|
||
import { parseUnitId } from "../unit-id.js";
|
||
import {
|
||
collectSessionTokenUsage,
|
||
collectWorktreeFingerprint,
|
||
countChangedFiles,
|
||
resetRunawayGuardState,
|
||
} from "../uok/auto-runaway-guard.js";
|
||
import { resolveUokFlags } from "../uok/flags.js";
|
||
import { UokGateRunner } from "../uok/gate-runner.js";
|
||
import { emitModelAutoResolvedEvent } from "../uok/model-route-evidence.js";
|
||
import {
|
||
ensurePlanV2Graph as ensurePlanningFlowGraph,
|
||
isEmptyPlanV2GraphResult,
|
||
isMissingFinalizedContextResult,
|
||
} from "../uok/plan.js";
|
||
import { buildUokProgressEvent } from "../uok/progress-event.js";
|
||
import {
|
||
clearUnitRuntimeRecord,
|
||
writeUnitRuntimeRecord,
|
||
} from "../uok/unit-runtime.js";
|
||
import {
|
||
_resetLogs,
|
||
drainAndSummarize,
|
||
drainLogs,
|
||
formatForNotification,
|
||
hasAnyIssues,
|
||
logError,
|
||
logWarning,
|
||
} from "../workflow-logger.js";
|
||
import {
|
||
getRequiredWorkflowToolsForAutoUnit,
|
||
getWorkflowTransportSupportError,
|
||
} from "../workflow-tools.js";
|
||
import { resolveWorktreeProjectRoot } from "../worktree-root.js";
|
||
import { detectStuck } from "./detect-stuck.js";
|
||
import {
|
||
FINALIZE_POST_TIMEOUT_MS,
|
||
FINALIZE_PRE_TIMEOUT_MS,
|
||
withTimeout,
|
||
} from "./finalize-timeout.js";
|
||
import { runUnit } from "./run-unit.js";
|
||
import { getErrorMessage } from "../error-utils.js";
|
||
import {
|
||
BUDGET_THRESHOLDS,
|
||
MAX_FINALIZE_TIMEOUTS,
|
||
MAX_RECOVERY_CHARS,
|
||
} from "./types.js";
|
||
import { closeoutAndStop, _resolveDispatchGuardBasePath } from "./phases-helpers.js";
|
||
|
||
/**
|
||
* Decide whether the UOK diagnostics verdict may continue into dispatch.
|
||
*
|
||
* Purpose: turn durable UOK self-diagnostics into autonomous control, so SF
|
||
* pauses on split-brain/runtime corruption before spending another LLM turn.
|
||
*
|
||
* Consumer: runDispatch before it starts the next autonomous unit.
|
||
*/
|
||
export function assessUokDiagnosticsDispatchGate(diagnostics) {
|
||
if (!diagnostics) return { proceed: true };
|
||
const blockingIssue = diagnostics.issues?.find(
|
||
(issue) => issue?.severity === "error",
|
||
);
|
||
if (diagnostics.verdict !== "degraded" && !blockingIssue) {
|
||
return { proceed: true };
|
||
}
|
||
const issueCode = blockingIssue?.code ?? diagnostics.issues?.[0]?.code;
|
||
const reportPath =
|
||
diagnostics.reportPath ?? ".sf/runtime/uok-diagnostics.json";
|
||
const reason = [
|
||
`UOK diagnostics blocked dispatch: ${diagnostics.verdict}/${diagnostics.classification ?? "unknown"}`,
|
||
issueCode ? `issue ${issueCode}` : "",
|
||
`evidence ${reportPath}`,
|
||
]
|
||
.filter(Boolean)
|
||
.join(" · ");
|
||
return {
|
||
proceed: false,
|
||
reason,
|
||
issueCode,
|
||
reportPath,
|
||
};
|
||
}
|
||
// ─── generateMilestoneReport ──────────────────────────────────────────────────
|
||
|
||
// ─── runDispatch ──────────────────────────────────────────────────────────────
|
||
/**
|
||
* Phase 3: Dispatch resolution — resolve next unit, stuck detection, pre-dispatch hooks.
|
||
* Returns break/continue to control the loop, or next with IterationData on success.
|
||
*/
|
||
export async function runDispatch(ic, preData, loopState) {
|
||
const { ctx, pi, s, deps, prefs } = ic;
|
||
const { state, mid, midTitle } = preData;
|
||
const STUCK_WINDOW_SIZE = 6;
|
||
debugLog("autoLoop", { phase: "dispatch-resolve", iteration: ic.iteration });
|
||
const dispatchResult = await deps.resolveDispatch({
|
||
basePath: s.basePath,
|
||
mid,
|
||
midTitle,
|
||
state,
|
||
prefs,
|
||
session: s,
|
||
runControl: deps.uokRunControl,
|
||
permissionProfile: deps.uokPermissionProfile,
|
||
});
|
||
if (dispatchResult.action === "stop") {
|
||
deps.emitJournalEvent({
|
||
ts: new Date().toISOString(),
|
||
flowId: ic.flowId,
|
||
seq: ic.nextSeq(),
|
||
eventType: "dispatch-stop",
|
||
rule: dispatchResult.matchedRule,
|
||
data: { reason: dispatchResult.reason },
|
||
});
|
||
// Warning-level stops are recoverable human checkpoints (e.g. UAT verdict
|
||
// gate) — pause instead of hard-stopping so the session is resumable with
|
||
// `/autonomous`. Error/info-level stops remain hard stops for infrastructure
|
||
// failures and terminal conditions respectively.
|
||
// See: https://github.com/singularity-forge/sf-run/issues/2474
|
||
if (dispatchResult.level === "warning") {
|
||
ctx.ui.notify(dispatchResult.reason, "warning");
|
||
await deps.pauseAuto(ctx, pi);
|
||
} else {
|
||
await closeoutAndStop(ctx, pi, s, deps, dispatchResult.reason);
|
||
}
|
||
debugLog("autoLoop", { phase: "exit", reason: "dispatch-stop" });
|
||
return { action: "break", reason: "dispatch-stop" };
|
||
}
|
||
if (dispatchResult.action !== "dispatch") {
|
||
// Non-dispatch action (e.g. "skip") — re-derive state
|
||
await new Promise((r) => setImmediate(r));
|
||
return { action: "continue" };
|
||
}
|
||
try {
|
||
const diagnostics = deps.writeUokDiagnostics?.(s.basePath, {
|
||
expectedNext: dispatchResult,
|
||
});
|
||
const gate = assessUokDiagnosticsDispatchGate(diagnostics);
|
||
deps.emitJournalEvent({
|
||
ts: new Date().toISOString(),
|
||
flowId: ic.flowId,
|
||
seq: ic.nextSeq(),
|
||
eventType: "uok-diagnostics-dispatch-gate",
|
||
data: {
|
||
verdict: diagnostics?.verdict ?? "unknown",
|
||
classification: diagnostics?.classification ?? "unknown",
|
||
proceed: gate.proceed,
|
||
issueCode: gate.issueCode,
|
||
reportPath: gate.reportPath ?? diagnostics?.reportPath,
|
||
},
|
||
});
|
||
if (!gate.proceed) {
|
||
await runPreDispatchGate({
|
||
gateId: "uok-diagnostics-dispatch-gate",
|
||
gateType: "execution",
|
||
outcome: "manual-attention",
|
||
failureClass: "manual-attention",
|
||
rationale: "uok diagnostics blocked dispatch",
|
||
findings: gate.reason,
|
||
milestoneId: mid,
|
||
});
|
||
ctx.ui.notify(gate.reason, "error");
|
||
await deps.pauseAuto(ctx, pi);
|
||
debugLog("autoLoop", {
|
||
phase: "exit",
|
||
reason: "uok-diagnostics-pause",
|
||
issueCode: gate.issueCode,
|
||
});
|
||
return { action: "break", reason: "uok-diagnostics-pause" };
|
||
}
|
||
} catch (err) {
|
||
logWarning("engine", "UOK diagnostics dispatch gate failed open", {
|
||
error: getErrorMessage(err),
|
||
});
|
||
}
|
||
deps.emitJournalEvent({
|
||
ts: new Date().toISOString(),
|
||
flowId: ic.flowId,
|
||
seq: ic.nextSeq(),
|
||
eventType: "dispatch-match",
|
||
rule: dispatchResult.matchedRule,
|
||
data: { unitType: dispatchResult.unitType, unitId: dispatchResult.unitId },
|
||
});
|
||
let unitType = dispatchResult.unitType;
|
||
const unitId = dispatchResult.unitId;
|
||
let prompt = dispatchResult.prompt;
|
||
const pauseAfterUatDispatch = dispatchResult.pauseAfterDispatch ?? false;
|
||
// ── Reasoning assist injection ──────────────────────────────────────
|
||
if (isReasoningAssistEnabled(unitType)) {
|
||
try {
|
||
const reasoningPrompt = await buildReasoningAssistPrompt(
|
||
unitType,
|
||
unitId,
|
||
s.basePath,
|
||
ctx,
|
||
);
|
||
if (reasoningPrompt) {
|
||
// Fire-and-forget: reasoning assist is best-effort, non-blocking
|
||
// The actual LLM call would happen here in a full implementation.
|
||
// For now, we prepare the prompt for injection.
|
||
debugLog("autoLoop", {
|
||
phase: "reasoning-assist",
|
||
unitType,
|
||
unitId,
|
||
promptLength: reasoningPrompt.length,
|
||
});
|
||
// Use reasoning prompt context as guidance until a fast model is wired in.
|
||
// The injected guidance provides unit-level context hints to the primary model.
|
||
prompt = injectReasoningGuidance(prompt, reasoningPrompt);
|
||
}
|
||
} catch (err) {
|
||
logWarning("engine", "Reasoning assist failed open", {
|
||
error: getErrorMessage(err),
|
||
unitType,
|
||
unitId,
|
||
});
|
||
}
|
||
}
|
||
// ── Sliding-window stuck detection with graduated recovery ──
|
||
const derivedKey = `${unitType}/${unitId}`;
|
||
const hasTransientTaskCompleteFailure =
|
||
unitType === "execute-task" && !!s.pendingTaskCompleteFailures?.has(unitId);
|
||
if (!s.pendingVerificationRetry && !hasTransientTaskCompleteFailure) {
|
||
loopState.recentUnits.push({ key: derivedKey });
|
||
if (loopState.recentUnits.length > STUCK_WINDOW_SIZE)
|
||
loopState.recentUnits.shift();
|
||
const stuckSignal = detectStuck(loopState.recentUnits);
|
||
if (stuckSignal) {
|
||
debugLog("autoLoop", {
|
||
phase: "stuck-check",
|
||
unitType,
|
||
unitId,
|
||
reason: stuckSignal.reason,
|
||
recoveryAttempts: loopState.stuckRecoveryAttempts,
|
||
});
|
||
// Graduated stuck recovery — up to 5 total attempts before hard stop.
|
||
// Attempt 0: cache invalidation + retry
|
||
// Attempts 1–4: rethink + retry
|
||
// Attempt 5 (exhausted): hard stop
|
||
loopState.stuckRecoveryAttempts++;
|
||
const attempt = loopState.stuckRecoveryAttempts;
|
||
if (attempt === 1) {
|
||
// Attempt 1: verify artifact + cache invalidation + retry
|
||
const artifactExists = verifyExpectedArtifact(
|
||
unitType,
|
||
unitId,
|
||
s.basePath,
|
||
);
|
||
if (artifactExists) {
|
||
debugLog("autoLoop", {
|
||
phase: "stuck-recovery",
|
||
level: 1,
|
||
action: "artifact-found",
|
||
});
|
||
ctx.ui.notify(
|
||
`Stuck recovery: artifact for ${unitType} ${unitId} found on disk. Invalidating caches.`,
|
||
"info",
|
||
);
|
||
deps.invalidateAllCaches();
|
||
return { action: "continue" };
|
||
}
|
||
ctx.ui.notify(
|
||
`Stuck on ${unitType} ${unitId} (${stuckSignal.reason}). Invalidating caches and retrying.`,
|
||
"warning",
|
||
);
|
||
deps.invalidateAllCaches();
|
||
return { action: "continue" };
|
||
} else if (attempt <= 5) {
|
||
// Attempts 2–5: rethink + diagnostic + retry
|
||
const stuckDiag = diagnoseExpectedArtifact(
|
||
unitType,
|
||
unitId,
|
||
s.basePath,
|
||
);
|
||
const stuckRemediation = buildLoopRemediationSteps(
|
||
unitType,
|
||
unitId,
|
||
s.basePath,
|
||
);
|
||
const diagnostic = deps.getDeepDiagnostic(s.basePath);
|
||
const cappedDiag =
|
||
(diagnostic?.length ?? 0) > MAX_RECOVERY_CHARS
|
||
? diagnostic.slice(0, MAX_RECOVERY_CHARS) +
|
||
"\n\n[...diagnostic truncated]"
|
||
: (diagnostic ?? null);
|
||
s.pendingRethinkAttempt = JSON.stringify({
|
||
attempt,
|
||
reason: stuckSignal.reason,
|
||
diagnostic: cappedDiag,
|
||
stuckDiag,
|
||
remediation: stuckRemediation,
|
||
unitType,
|
||
unitId,
|
||
});
|
||
const rt =
|
||
attempt === 5
|
||
? "**FINAL STUCK ATTEMPT — 5 of 5.** "
|
||
: `**STUCK RECOVERY ATTEMPT ${attempt - 1} of 4.** `;
|
||
ctx.ui.notify(
|
||
`${rt}Stuck on ${unitType} ${unitId} (${stuckSignal.reason}). Injecting diagnostic and retrying.`,
|
||
"warning",
|
||
);
|
||
return { action: "continue" };
|
||
} else {
|
||
// Attempt 6+: genuinely exhausted — hard stop
|
||
debugLog("autoLoop", {
|
||
phase: "stuck-detected",
|
||
unitType,
|
||
unitId,
|
||
reason: stuckSignal.reason,
|
||
});
|
||
const stuckDiag = diagnoseExpectedArtifact(
|
||
unitType,
|
||
unitId,
|
||
s.basePath,
|
||
);
|
||
const stuckRemediation = buildLoopRemediationSteps(
|
||
unitType,
|
||
unitId,
|
||
s.basePath,
|
||
);
|
||
const stuckParts = [
|
||
`Stuck on ${unitType} ${unitId} — ${stuckSignal.reason}.`,
|
||
];
|
||
if (stuckDiag) stuckParts.push(`Expected: ${stuckDiag}`);
|
||
if (stuckRemediation)
|
||
stuckParts.push(`To recover:\n${stuckRemediation}`);
|
||
ctx.ui.notify(stuckParts.join(" "), "error");
|
||
await deps.stopAuto(ctx, pi, `Stuck: ${stuckSignal.reason}`);
|
||
return { action: "break", reason: "stuck-detected" };
|
||
}
|
||
} else {
|
||
// Progress detected — reset recovery counter
|
||
if (loopState.stuckRecoveryAttempts > 0) {
|
||
debugLog("autoLoop", {
|
||
phase: "stuck-counter-reset",
|
||
from:
|
||
loopState.recentUnits[loopState.recentUnits.length - 2]?.key ?? "",
|
||
to: derivedKey,
|
||
});
|
||
loopState.stuckRecoveryAttempts = 0;
|
||
}
|
||
}
|
||
}
|
||
// Pre-dispatch hooks
|
||
const preDispatchResult = deps.runPreDispatchHooks(
|
||
unitType,
|
||
unitId,
|
||
prompt,
|
||
s.basePath,
|
||
);
|
||
if (preDispatchResult.firedHooks.length > 0) {
|
||
ctx.ui.notify(
|
||
`Pre-dispatch hook${preDispatchResult.firedHooks.length > 1 ? "s" : ""}: ${preDispatchResult.firedHooks.join(", ")}`,
|
||
"info",
|
||
);
|
||
deps.emitJournalEvent({
|
||
ts: new Date().toISOString(),
|
||
flowId: ic.flowId,
|
||
seq: ic.nextSeq(),
|
||
eventType: "pre-dispatch-hook",
|
||
data: {
|
||
firedHooks: preDispatchResult.firedHooks,
|
||
action: preDispatchResult.action,
|
||
},
|
||
});
|
||
}
|
||
if (preDispatchResult.action === "skip") {
|
||
ctx.ui.notify(
|
||
`Skipping ${unitType} ${unitId} (pre-dispatch hook).`,
|
||
"info",
|
||
);
|
||
await new Promise((r) => setImmediate(r));
|
||
return { action: "continue" };
|
||
}
|
||
if (preDispatchResult.action === "replace") {
|
||
prompt = preDispatchResult.prompt ?? prompt;
|
||
if (preDispatchResult.unitType) unitType = preDispatchResult.unitType;
|
||
} else if (preDispatchResult.prompt) {
|
||
prompt = preDispatchResult.prompt;
|
||
}
|
||
const guardBasePath = _resolveDispatchGuardBasePath(s);
|
||
const priorSliceBlocker = deps.getPriorSliceCompletionBlocker(
|
||
guardBasePath,
|
||
deps.getMainBranch(guardBasePath),
|
||
unitType,
|
||
unitId,
|
||
);
|
||
if (priorSliceBlocker) {
|
||
await deps.stopAuto(ctx, pi, priorSliceBlocker);
|
||
debugLog("autoLoop", { phase: "exit", reason: "prior-slice-blocker" });
|
||
return { action: "break", reason: "prior-slice-blocker" };
|
||
}
|
||
return {
|
||
action: "next",
|
||
data: {
|
||
unitType,
|
||
unitId,
|
||
prompt,
|
||
finalPrompt: prompt,
|
||
pauseAfterUatDispatch,
|
||
state,
|
||
mid,
|
||
midTitle,
|
||
isRetry: false,
|
||
previousTier: undefined,
|
||
hookModelOverride: preDispatchResult.model,
|
||
},
|
||
};
|
||
}
|