singularity-forge/src/resources/extensions/sf/auto-start.js

1231 lines
44 KiB
JavaScript

/**
* Autonomous mode bootstrap — fresh-start initialization path.
*
* Git/state bootstrap, crash lock detection, debug init, worktree recovery,
* guided flow gate, session init, worktree lifecycle, DB lifecycle,
* preflight validation.
*
* Extracted from startAuto() in auto.ts. The resume path (s.paused)
* remains in auto.ts — this module handles only the fresh-start path.
*/
import {
existsSync,
mkdirSync,
readdirSync,
rmSync,
statSync,
unlinkSync,
} from "node:fs";
import { join, sep as pathSep } from "node:path";
import { collectSecretsFromManifest } from "../get-secrets-from-user.js";
import { ensureAgenticDocsScaffold } from "./agentic-docs-scaffold.js";
import { hideFooter } from "./auto-dashboard.js";
import {
cleanStaleRuntimeUnits,
getAutoWorktreePath,
readResourceVersion,
} from "./auto-worktree.js";
import { resolveProjectRootDbPath } from "./bootstrap/dynamic-tools.js";
import { invalidateAllCaches } from "./cache.js";
import { ensureSiftIndexWarmup } from "./code-intelligence.js";
import { clearLock, writeLock } from "./crash-recovery.js";
import {
debugLog,
enableDebug,
getDebugLogPath,
isDebugEnabled,
} from "./debug-logger.js";
import {
resetProactiveHealing,
setLevelChangeCallback,
} from "./doctor-proactive.js";
import { getManifestStatus, loadFile } from "./files.js";
import { GitServiceImpl } from "./git-service.js";
import { ensureGitignore, untrackRuntimeFiles } from "./gitignore.js";
import { initMetrics } from "./metrics.js";
import {
migrateToExternalState,
recoverFailedMigration,
} from "./migrate-external.js";
import {
nativeAddAll,
nativeBranchDelete,
nativeBranchList,
nativeBranchListMerged,
nativeCheckoutBranch,
nativeCommit,
nativeCommitCountBetween,
nativeDetectMainBranch,
nativeGetCurrentBranch,
nativeInit,
nativeIsRepo,
nativeWorktreeRemove,
} from "./native-git-bridge.js";
import { resolveMilestoneFile, sfRoot } from "./paths.js";
import { resetHookState, restoreHookState } from "./post-unit-hooks.js";
import {
getIsolationMode,
loadEffectiveSFPreferences,
resolvePersistModelChanges,
resolveSkillDiscoveryMode,
} from "./preferences.js";
import {
isCustomProvider,
resolveDefaultSessionModel,
resolveDynamicRoutingConfig,
} from "./preferences-models.js";
import {
ensureSfSymlink,
isInheritedRepo,
validateProjectId,
} from "./repo-identity.js";
import { initRoutingHistory } from "./routing-history.js";
import {
acquireSessionLock,
releaseSessionLock,
updateSessionLock,
} from "./session-lock.js";
import { getSessionModelOverride } from "./session-model-override.js";
import { getMilestone, isDbAvailable, openDatabase } from "./sf-db.js";
import { snapshotSkills } from "./skill-discovery.js";
import { deriveState, isGhostMilestone } from "./state.js";
import { isClosedStatus } from "./status-guards.js";
import {
reconcileDurableCompleteUnitRuntimeRecords,
reconcileStaleCompleteSliceRecords,
} from "./uok/unit-runtime.js";
import { safeSetWidget } from "./widget-safe.js";
import { logError, logWarning } from "./workflow-logger.js";
import {
captureIntegrationBranch,
detectWorktreeName,
setActiveMilestoneId,
} from "./worktree.js";
import {
worktreePath as getWorktreeDir,
isInsideWorktreesDir,
} from "./worktree-manager.js";
import { emitWorktreeOrphaned } from "./worktree-telemetry.js";
/**
* Bootstrap a fresh autonomous mode session. Handles everything from git init
* through secrets collection, returning when ready for the first
* dispatchNextUnit call.
*
* Returns false if the bootstrap aborted (e.g., guided flow returned,
* concurrent session detected). Returns true when ready to dispatch.
*/
// Guard constant for consecutive bootstrap attempts that found phase === "complete".
// Counter moved to AutoSession.consecutiveCompleteBootstraps so s.reset() clears it.
const MAX_CONSECUTIVE_COMPLETE_BOOTSTRAPS = 2;
/**
* Decide which survivor-branch recovery action bootstrapAutoSession must
* run for the current (hasSurvivorBranch, phase) combination. Pure function,
* extracted for testability.
*/
export function decideSurvivorAction(hasSurvivorBranch, phase) {
if (!hasSurvivorBranch) return "none";
if (phase === "needs-discussion") return "discuss";
if (phase === "complete") return "finalize";
return "none";
}
export async function openProjectDbIfPresent(basePath) {
const sfDbPath = resolveProjectRootDbPath(basePath);
if (!existsSync(sfDbPath) || isDbAvailable()) return;
try {
openDatabase(sfDbPath);
} catch (err) {
logWarning(
"engine",
`sf-db: failed to open existing database: ${err instanceof Error ? err.message : String(err)}`,
);
}
}
/**
* Audit for orphaned milestone branches at bootstrap.
*
* After a milestone completes, the teardown step (merge branch → main,
* delete branch, remove worktree) runs as a post-completion engine step.
* If the session ends between completion and teardown, the branch and
* worktree are orphaned — the DB says "complete" so autonomous mode won't
* re-enter the milestone, and the teardown is never retried.
*
* This audit runs on every fresh bootstrap to catch that gap:
* 1. Lists all local `milestone/*` branches.
* 2. For each, checks if the milestone's DB status is "complete".
* 3. If the branch is already merged into main → deletes the branch
* and cleans up any orphaned worktree directory (safe, no data loss).
* 4. If the branch is NOT merged → preserves it and warns the user
* so they can merge manually (data safety first).
*
* Returns a summary of actions taken for the caller to surface via notify.
*/
export function auditOrphanedMilestoneBranches(basePath, isolationMode) {
const recovered = [];
const warnings = [];
// Skip in none mode — no milestone branches are created
if (isolationMode === "none") return { recovered, warnings };
// Skip if DB not available — can't determine completion status
if (!isDbAvailable()) return { recovered, warnings };
let milestoneBranches;
try {
milestoneBranches = nativeBranchList(basePath, "milestone/*");
} catch {
// git branch list failed — skip audit
return { recovered, warnings };
}
if (milestoneBranches.length === 0) return { recovered, warnings };
// Detect main branch for merge-check
let mainBranch;
try {
mainBranch = nativeDetectMainBranch(basePath);
} catch {
mainBranch = "main";
}
// Get branches already merged into main
let mergedBranches;
try {
mergedBranches = new Set(
nativeBranchListMerged(basePath, mainBranch, "milestone/*"),
);
} catch {
mergedBranches = new Set();
}
for (const branch of milestoneBranches) {
const milestoneId = branch.replace(/^milestone\//, "");
const milestone = getMilestone(milestoneId);
if (!milestone) continue;
// #4762 — in-progress milestone branch with unmerged commits ahead of
// main. This is the pre-completion orphan case: autonomous mode exited without
// completing the milestone (pause, stop, crash, merge error, blocker) and
// work is stranded on the branch or in the worktree. Data safety first:
// we never delete or touch; we just surface a warning so the user knows
// where to look.
//
// Gate on isClosedStatus so we only warn about genuinely open milestones.
// Parked/other closed statuses go through the legacy complete/unmerged
// path below where appropriate.
if (!isClosedStatus(milestone.status)) {
const isMergedForInProgress = mergedBranches.has(branch);
if (isMergedForInProgress) continue; // nothing to recover
let commitsAhead = 0;
try {
commitsAhead = nativeCommitCountBetween(basePath, mainBranch, branch);
} catch {
// Rev-walk failure — skip rather than noise
continue;
}
if (commitsAhead === 0) continue;
const wtDir = getWorktreeDir(basePath, milestoneId);
const wtDirExists = existsSync(wtDir);
const wtSuffix = wtDirExists
? ` Worktree directory at .sf/worktrees/${milestoneId}/ holds the live work.`
: "";
warnings.push(
`Branch ${branch} has ${commitsAhead} commit(s) ahead of ${mainBranch} for in-progress milestone ${milestoneId}.` +
wtSuffix +
` Run \`/sf autonomous\` to resume, or merge manually if abandoning.`,
);
// #4764 telemetry
try {
emitWorktreeOrphaned(basePath, milestoneId, {
reason: "in-progress-unmerged",
commitsAhead,
worktreeDirExists: wtDirExists,
});
} catch (err) {
logWarning(
"engine",
`worktree-orphaned telemetry failed for ${milestoneId}: ${err instanceof Error ? err.message : String(err)}`,
);
}
continue;
}
// Only the "complete" status participates in the merged/unmerged cleanup
// paths below — other closed statuses (parked, etc.) are intentionally
// left alone.
if (milestone.status !== "complete") continue;
const isMerged = mergedBranches.has(branch);
if (isMerged) {
// Branch is merged — safe to delete branch and clean up worktree dir
try {
nativeBranchDelete(basePath, branch, true);
recovered.push(
`Deleted merged branch ${branch} for completed milestone ${milestoneId}.`,
);
} catch (err) {
warnings.push(
`Failed to delete merged branch ${branch}: ${err instanceof Error ? err.message : String(err)}`,
);
}
// Clean up orphaned worktree directory if it exists
const wtDir = getWorktreeDir(basePath, milestoneId);
if (existsSync(wtDir)) {
// Try git worktree remove first (handles registered worktrees)
try {
nativeWorktreeRemove(basePath, wtDir, true);
} catch (e) {
// Not a registered worktree — expected for orphaned dirs
logWarning(
"engine",
`worktree remove failed (expected for orphaned dirs): ${e instanceof Error ? e.message : String(e)}`,
);
}
// If the directory still exists after git worktree remove (either it
// wasn't registered or the remove was a noop), fall back to direct
// filesystem removal — but only inside .sf/worktrees/ for safety (#2365).
if (existsSync(wtDir)) {
if (isInsideWorktreesDir(basePath, wtDir)) {
try {
rmSync(wtDir, { recursive: true, force: true });
recovered.push(
`Removed orphaned worktree directory for ${milestoneId}.`,
);
} catch (err2) {
warnings.push(
`Failed to remove worktree directory for ${milestoneId}: ${err2 instanceof Error ? err2.message : String(err2)}`,
);
}
} else {
warnings.push(
`Orphaned worktree directory for ${milestoneId} is outside .sf/worktrees/ — skipping removal for safety.`,
);
}
} else {
recovered.push(
`Removed orphaned worktree directory for ${milestoneId}.`,
);
}
}
} else {
// Branch is NOT merged — preserve for safety, warn the user
warnings.push(
`Branch ${branch} exists for completed milestone ${milestoneId} but is NOT merged into ${mainBranch}. ` +
`This may contain unmerged work. Merge manually or run \`/sf health --fix\` to resolve.`,
);
// #4764 telemetry
try {
emitWorktreeOrphaned(basePath, milestoneId, {
reason: "complete-unmerged",
worktreeDirExists: existsSync(getWorktreeDir(basePath, milestoneId)),
});
} catch (err) {
logWarning(
"engine",
`worktree-orphaned telemetry failed for ${milestoneId}: ${err instanceof Error ? err.message : String(err)}`,
);
}
}
}
return { recovered, warnings };
}
export async function bootstrapAutoSession(
s,
ctx,
pi,
base,
verboseMode,
requestedStepMode,
deps,
interrupted,
) {
const {
shouldUseWorktreeIsolation,
registerSigtermHandler,
lockBase,
buildResolver,
} = deps;
const lockResult = acquireSessionLock(base, {
sessionId: ctx.sessionManager?.getSessionId?.(),
sessionFile: ctx.sessionManager?.getSessionFile?.(),
});
if (!lockResult.acquired) {
const reason = lockResult.reason;
ctx.ui.notify(reason, "error");
return false;
}
function releaseLockAndReturn() {
releaseSessionLock(base);
clearLock(base);
return false;
}
// Capture the user's session model before guided-flow dispatch can apply a
// phase-specific planning model for a discuss turn (#2829).
//
// Precedence:
// 1) Explicit session override via /sf model (this session)
// 2) SF model preferences from PREFERENCES.md (validated against live auth)
// 3) Current session model from settings/session restore (if provider ready)
//
// This preserves #3517 defaults while honoring explicit runtime model
// selection for subsequent /sf runs in the same session.
//
// Exception (#4122): when the session provider is a custom provider declared
// in ~/.sf/agent/models.json (Ollama, vLLM, OpenAI-compatible proxy, etc.),
// PREFERENCES.md is skipped entirely. PREFERENCES.md cannot reference custom
// providers, so honoring it would silently reroute autonomous mode to a built-in
// provider the user is not logged into and surface as "Not logged in · Please
// run /login" before pausing and resetting to claude-code/claude-sonnet-4-6.
const manualSessionOverride = getSessionModelOverride(
ctx.sessionManager.getSessionId(),
);
const sessionProviderIsCustom = isCustomProvider(ctx.model?.provider);
const preferredModel = sessionProviderIsCustom
? null
: resolveDefaultSessionModel(ctx.model?.provider);
// Validate the preferred model against the live registry + provider auth so
// an unconfigured PREFERENCES.md entry (no API key / OAuth) can't become the
// start-model snapshot. Without this, every subsequent unit would try to
// fall back to an unusable model.
let validatedPreferredModel;
if (preferredModel) {
const { resolveModelId } = await import("./auto-model-selection.js");
const available = ctx.modelRegistry.getAvailable();
const match = resolveModelId(
`${preferredModel.provider}/${preferredModel.id}`,
available,
ctx.model?.provider,
);
if (match) {
validatedPreferredModel = { provider: match.provider, id: match.id };
} else {
ctx.ui.notify(
`Preferred model ${preferredModel.provider}/${preferredModel.id} from PREFERENCES.md is not configured; falling back to session default.`,
"warning",
);
}
}
const sessionModelReady =
ctx.model && ctx.modelRegistry.isProviderRequestReady(ctx.model.provider);
const startModelSnapshot =
manualSessionOverride ??
validatedPreferredModel ??
(sessionModelReady && ctx.model
? { provider: ctx.model.provider, id: ctx.model.id }
: null);
try {
// Validate SF_PROJECT_ID early so the user gets immediate feedback
const customProjectId = process.env.SF_PROJECT_ID;
if (customProjectId && !validateProjectId(customProjectId)) {
ctx.ui.notify(
`SF_PROJECT_ID must contain only alphanumeric characters, hyphens, and underscores. Got: "${customProjectId}"`,
"error",
);
return releaseLockAndReturn();
}
// Ensure git repo exists *locally* at base.
// nativeIsRepo() uses `git rev-parse` which traverses up to parent dirs,
// so a parent repo can make it return true even when base has no .git of
// its own. Check for a local .git instead (defense-in-depth for the case
// where isInheritedRepo() returns a false negative, e.g. stale .sf at
// the parent git root). See #2393 and related issue.
const hasLocalGit = existsSync(join(base, ".git"));
if (!hasLocalGit || isInheritedRepo(base)) {
const mainBranch =
loadEffectiveSFPreferences()?.preferences?.git?.main_branch || "main";
nativeInit(base, mainBranch);
}
// Migrate legacy in-project .sf/ to external state directory.
// Migration MUST run before ensureGitignore to avoid adding ".sf" to
// .gitignore when .sf/ is git-tracked (data-loss bug #1364).
recoverFailedMigration(base);
const migration = migrateToExternalState(base);
if (migration.error) {
ctx.ui.notify(
`External state migration warning: ${migration.error}`,
"warning",
);
}
// Ensure symlink exists (handles fresh projects and post-migration)
ensureSfSymlink(base);
// Ensure .gitignore has baseline patterns.
// ensureGitignore checks for git-tracked .sf/ files and skips the
// ".sf" pattern if the project intentionally tracks .sf/ in git.
const gitPrefs = loadEffectiveSFPreferences()?.preferences?.git;
const manageGitignore = gitPrefs?.manage_gitignore;
ensureGitignore(base, { manageGitignore });
ensureAgenticDocsScaffold(base);
ensureSiftIndexWarmup(
base,
loadEffectiveSFPreferences()?.preferences?.codebase,
);
if (manageGitignore !== false) untrackRuntimeFiles(base);
// Bootstrap milestones/ if it doesn't exist.
// Check milestones/ directly — ensureSfSymlink above already created .sf/,
// so checking .sf/ existence would be dead code (#2942).
const sfDir = join(base, ".sf");
const milestonesPath = join(sfDir, "milestones");
if (!existsSync(milestonesPath)) {
mkdirSync(milestonesPath, { recursive: true });
try {
nativeAddAll(base);
nativeCommit(base, "chore: init sf");
} catch (err) {
/* nothing to commit */
logWarning(
"engine",
`mkdir failed: ${err instanceof Error ? err.message : String(err)}`,
);
}
}
// Initialize GitServiceImpl
s.gitService = new GitServiceImpl(
s.basePath,
loadEffectiveSFPreferences()?.preferences?.git ?? {},
);
// ── Debug mode ──
if (!isDebugEnabled() && process.env.SF_DEBUG === "1") {
enableDebug(base);
}
if (isDebugEnabled()) {
const { isNativeParserAvailable } = await import(
"./native-parser-bridge.js"
);
debugLog("debug-start", {
platform: process.platform,
arch: process.arch,
node: process.version,
model: ctx.model?.id ?? "unknown",
provider: ctx.model?.provider ?? "unknown",
nativeParser: isNativeParserAvailable(),
cwd: base,
});
ctx.ui.notify(`Debug logging enabled → ${getDebugLogPath()}`, "info");
}
if (interrupted.classification !== "recoverable") {
s.pendingCrashRecovery = null;
}
// Invalidate caches before initial state derivation
invalidateAllCaches();
// Clean stale runtime unit files for completed milestones (#887)
cleanStaleRuntimeUnits(
sfRoot(base),
(mid) => !!resolveMilestoneFile(base, mid, "SUMMARY"),
);
// Reconcile stale complete-slice runtime records where the slice
// completed successfully on retry but a prior cancelled/failed record
// persists. Prevents flow-audit false positives (#sf-moqv5o7h-vaabu6).
try {
const reconciled = reconcileStaleCompleteSliceRecords(base);
if (reconciled.cleared > 0) {
debugLog("bootstrap", {
phase: "stale-slice-runtime-reconciled",
cleared: reconciled.cleared,
units: reconciled.details,
});
}
} catch (err) {
// Non-fatal — defensive cleanup, never block bootstrap
logWarning(
"bootstrap",
`stale slice runtime reconciliation failed: ${err instanceof Error ? err.message : String(err)}`,
);
}
// Open the project-root DB before deriveState so DB-backed state
// derivation (queue-order, task status) works on a cold start (#2841).
await openProjectDbIfPresent(base);
try {
const reconciled = await reconcileDurableCompleteUnitRuntimeRecords(base);
if (reconciled.cleared > 0) {
debugLog("bootstrap", {
phase: "durable-complete-runtime-reconciled",
cleared: reconciled.cleared,
units: reconciled.details,
});
}
} catch (err) {
// Non-fatal — defensive cleanup, never block bootstrap
logWarning(
"bootstrap",
`durable complete runtime reconciliation failed: ${err instanceof Error ? err.message : String(err)}`,
);
}
// ── Orphaned milestone branch audit ──
// Catches completed milestones whose teardown (merge + branch delete)
// was lost due to session ending between completion and teardown.
// Must run after DB open and before worktree entry.
try {
const auditResult = auditOrphanedMilestoneBranches(
base,
getIsolationMode(),
);
for (const msg of auditResult.recovered) {
ctx.ui.notify(`Orphan audit: ${msg}`, "info");
}
for (const msg of auditResult.warnings) {
ctx.ui.notify(`Orphan audit: ${msg}`, "warning");
}
if (auditResult.recovered.length > 0) {
debugLog("orphan-audit", {
recovered: auditResult.recovered,
warnings: auditResult.warnings,
});
}
} catch (err) {
// Non-fatal — the audit is defensive, never block bootstrap
logWarning(
"bootstrap",
`orphaned milestone branch audit failed: ${err instanceof Error ? err.message : String(err)}`,
);
}
let state = await deriveState(base);
// Stale worktree state recovery (#654)
if (
state.activeMilestone &&
shouldUseWorktreeIsolation() &&
!detectWorktreeName(base)
) {
const wtPath = getAutoWorktreePath(base, state.activeMilestone.id);
if (wtPath) {
state = await deriveState(wtPath);
}
}
// Milestone branch recovery (#601, #2358)
// Detect survivor milestone branches in both pre-planning and complete phases.
// In phase=complete, the milestone artifacts exist but finalization (merge,
// worktree cleanup) was never run — the survivor branch must be merged.
let hasSurvivorBranch = false;
if (
state.activeMilestone &&
(state.phase === "pre-planning" || state.phase === "complete") &&
shouldUseWorktreeIsolation() &&
!detectWorktreeName(base) &&
!base.includes(`${pathSep}.sf${pathSep}worktrees${pathSep}`)
) {
const milestoneBranch = `milestone/${state.activeMilestone.id}`;
const { nativeBranchExists } = await import("./native-git-bridge.js");
hasSurvivorBranch = nativeBranchExists(base, milestoneBranch);
if (hasSurvivorBranch) {
ctx.ui.notify(
`Found prior session branch ${milestoneBranch}. Resuming.`,
"info",
);
}
}
// Survivor branch exists but milestone still needs discussion (#1726):
// The worktree/branch was created but the milestone only has CONTEXT-DRAFT.md.
// Route to the interactive discussion handler instead of falling through to
// autonomous mode, which would immediately stop with "needs discussion".
if (decideSurvivorAction(hasSurvivorBranch, state.phase) === "discuss") {
const { showWorkflowEntry } = await import("./guided-flow.js");
await showWorkflowEntry(ctx, pi, base, { step: requestedStepMode });
invalidateAllCaches();
const postState = await deriveState(base);
if (postState.activeMilestone && postState.phase !== "needs-discussion") {
state = postState;
// Discussion succeeded — clear survivor flag so normal flow continues
hasSurvivorBranch = false;
} else {
ctx.ui.notify(
"Discussion completed but milestone draft was not promoted. Run /sf to try again.",
"warning",
);
return releaseLockAndReturn();
}
}
// Survivor branch exists and milestone is complete (#2358):
// The milestone artifacts were written but finalization (merge, worktree
// cleanup) never ran. Run mergeAndExit to finalize, then re-derive state
// so the normal "all milestones complete" or "next milestone" path runs.
if (decideSurvivorAction(hasSurvivorBranch, state.phase) === "finalize") {
const mid = state.activeMilestone.id;
ctx.ui.notify(
`Milestone ${mid} is complete but branch/worktree was not finalized. Running merge now.`,
"info",
);
const resolver = buildResolver();
resolver.mergeAndExit(mid, {
notify: ctx.ui.notify.bind(ctx.ui),
});
invalidateAllCaches();
state = await deriveState(base);
// Clear survivor flag — finalization is done
hasSurvivorBranch = false;
}
if (!hasSurvivorBranch) {
// No active work — start a new milestone via discuss flow
if (!state.activeMilestone || state.phase === "complete") {
// Guard against recursive dialog loop (#1348):
// If we've entered this branch multiple times in quick succession,
// the discuss workflow isn't producing a milestone. Break the cycle.
s.consecutiveCompleteBootstraps++;
if (
s.consecutiveCompleteBootstraps > MAX_CONSECUTIVE_COMPLETE_BOOTSTRAPS
) {
s.consecutiveCompleteBootstraps = 0;
ctx.ui.notify(
"All milestones are complete and the discussion didn't produce a new one. " +
"Run /sf to start a new milestone manually.",
"warning",
);
return releaseLockAndReturn();
}
// Autonomous mode: map the codebase and create milestones
// without waiting for user answers. Uses discuss-headless prompt.
ctx.ui.notify(
"No milestones found. Bootstrapping from repo docs and source inventory.",
"info",
);
const { buildAutoBootstrapContext } = await import(
"./auto-bootstrap-context.js"
);
const {
bootstrapNewMilestone,
dispatchNewMilestoneDiscuss,
injectTodoContext,
} = await import("./guided-flow.js");
const bootstrapContext = buildAutoBootstrapContext(base);
const nextId = bootstrapNewMilestone(base);
await dispatchNewMilestoneDiscuss(ctx, pi, base, nextId, {
autonomousBootstrap: true,
preamble: injectTodoContext(base, bootstrapContext),
});
invalidateAllCaches();
let postState = await deriveState(base);
if (!postState.activeMilestone) {
ctx.ui.notify(
`Headless bootstrap for ${nextId} returned without artifacts. Starting roadmap planning repair session.`,
"warning",
);
await dispatchNewMilestoneDiscuss(ctx, pi, base, nextId, {
autonomousBootstrap: true,
preamble: injectTodoContext(
base,
[
`This is an autonomous roadmap bootstrap repair for ${nextId}.`,
"The previous bootstrap turn ended without writing CONTEXT, CONTEXT-DRAFT, or ROADMAP artifacts.",
"Use the repo-doc/source bootstrap context below as the source of truth.",
bootstrapContext,
"Start the roadmap planning session now: build project knowledge, run the planning meeting, and persist artifacts.",
"Do not stop after reflection. At minimum write CONTEXT-DRAFT with evidence and open questions.",
"If confidence is high enough, write CONTEXT and call sf_plan_milestone so autonomous mode can continue.",
].join("\n"),
),
});
invalidateAllCaches();
postState = await deriveState(base);
}
if (
postState.activeMilestone &&
postState.phase !== "complete" &&
postState.phase !== "pre-planning"
) {
s.consecutiveCompleteBootstraps = 0; // Successfully advanced past "complete"
state = postState;
} else if (
postState.activeMilestone &&
postState.phase === "pre-planning"
) {
const contextFile = resolveMilestoneFile(
base,
postState.activeMilestone.id,
"CONTEXT",
);
const hasContext = !!(contextFile && (await loadFile(contextFile)));
if (hasContext) {
state = postState;
} else {
const repairId = postState.activeMilestone.id;
ctx.ui.notify(
`Headless bootstrap created ${repairId} without context. Starting roadmap planning repair session.`,
"warning",
);
await dispatchNewMilestoneDiscuss(ctx, pi, base, repairId, {
autonomousBootstrap: true,
preamble: injectTodoContext(
base,
[
`This is an autonomous roadmap bootstrap repair for existing milestone ${repairId}.`,
"The previous bootstrap created a milestone shell but did not write CONTEXT.md, CONTEXT-DRAFT.md, or ROADMAP.md.",
"Use the repo-doc/source bootstrap context below as the source of truth.",
bootstrapContext,
"Reuse this milestone ID. Do not create a new milestone for the same bootstrap work.",
"Run the roadmap planning session now and persist CONTEXT or CONTEXT-DRAFT at minimum.",
"If confidence is high enough, write CONTEXT and call sf_plan_milestone so autonomous mode can continue.",
].join("\n"),
),
});
invalidateAllCaches();
postState = await deriveState(base);
if (
postState.activeMilestone &&
postState.phase !== "complete" &&
postState.phase !== "pre-planning"
) {
s.consecutiveCompleteBootstraps = 0;
state = postState;
} else if (
postState.activeMilestone &&
postState.phase === "pre-planning"
) {
const repairedContextFile = resolveMilestoneFile(
base,
postState.activeMilestone.id,
"CONTEXT",
);
const repairedHasContext = !!(
repairedContextFile && (await loadFile(repairedContextFile))
);
if (repairedHasContext) {
state = postState;
} else {
ctx.ui.notify(
"Headless bootstrap repair completed but milestone context is still missing.",
"warning",
);
return releaseLockAndReturn();
}
} else {
ctx.ui.notify(
"Headless bootstrap repair completed but no milestone artifacts were written. Auto cannot continue without a context or draft.",
"warning",
);
return releaseLockAndReturn();
}
}
} else {
if (isGhostMilestone(base, nextId)) {
rmSync(join(sfRoot(base), "milestones", nextId), {
recursive: true,
force: true,
});
invalidateAllCaches();
}
ctx.ui.notify(
"Headless bootstrap repair completed but no milestone artifacts were written. Auto cannot continue without a context or draft.",
"warning",
);
return releaseLockAndReturn();
}
}
// Active milestone exists but has no roadmap
if (state.phase === "pre-planning") {
const mid = state.activeMilestone.id;
const contextFile = resolveMilestoneFile(base, mid, "CONTEXT");
const hasContext = !!(contextFile && (await loadFile(contextFile)));
if (!hasContext) {
ctx.ui.notify(
`Milestone ${mid} has no context. Bootstrapping from repo docs and source inventory.`,
"info",
);
const { buildAutoBootstrapContext } = await import(
"./auto-bootstrap-context.js"
);
const { dispatchNewMilestoneDiscuss, injectTodoContext } =
await import("./guided-flow.js");
const bootstrapContext = buildAutoBootstrapContext(base);
await dispatchNewMilestoneDiscuss(ctx, pi, base, mid, {
autonomousBootstrap: true,
preamble: injectTodoContext(
base,
[
`This is an autonomous roadmap bootstrap repair for existing milestone ${mid}.`,
"The milestone exists but has no CONTEXT.md yet.",
"Use the repo-doc/source bootstrap context below as the source of truth.",
bootstrapContext,
"Reuse this milestone ID. Do not create a new milestone for the same bootstrap work.",
"Build project knowledge, run the planning meeting, and persist CONTEXT or CONTEXT-DRAFT.",
].join("\n"),
),
});
invalidateAllCaches();
const postState = await deriveState(base);
if (postState.activeMilestone && postState.phase !== "pre-planning") {
state = postState;
} else if (
postState.activeMilestone &&
postState.phase === "pre-planning"
) {
const repairedContextFile = resolveMilestoneFile(
base,
postState.activeMilestone.id,
"CONTEXT",
);
const repairedHasContext = !!(
repairedContextFile && (await loadFile(repairedContextFile))
);
if (repairedHasContext) {
state = postState;
} else {
ctx.ui.notify(
"Discussion completed but milestone context is still missing. Run /sf to try again.",
"warning",
);
return releaseLockAndReturn();
}
} else {
ctx.ui.notify(
"Discussion completed but milestone context is still missing. Run /sf to try again.",
"warning",
);
return releaseLockAndReturn();
}
}
}
// Active milestone has CONTEXT-DRAFT but no full context — needs discussion
if (state.phase === "needs-discussion") {
const { showWorkflowEntry } = await import("./guided-flow.js");
await showWorkflowEntry(ctx, pi, base, { step: requestedStepMode });
invalidateAllCaches();
const postState = await deriveState(base);
if (
postState.activeMilestone &&
postState.phase !== "needs-discussion"
) {
state = postState;
} else {
ctx.ui.notify(
"Discussion completed but milestone draft was not promoted. Run /sf to try again.",
"warning",
);
return releaseLockAndReturn();
}
}
}
// Unreachable safety check
if (!state.activeMilestone) {
const { showWorkflowEntry } = await import("./guided-flow.js");
await showWorkflowEntry(ctx, pi, base, { step: requestedStepMode });
return releaseLockAndReturn();
}
// Successfully resolved an active milestone — reset the re-entry guard
s.consecutiveCompleteBootstraps = 0;
// ── Initialize session state ──
// Notify shared phase state so subagent conflict checks can fire
const { activateSF: activateSFPhaseState } = await import(
"../shared/sf-phase-state.js"
);
activateSFPhaseState();
s.active = true;
s.stepMode = requestedStepMode;
s.verbose = verboseMode;
s.cmdCtx = ctx;
s.basePath = base;
s.unitDispatchCount.clear();
s.unitRecoveryCount.clear();
s.lastBudgetAlertLevel = 0;
s.unitLifetimeDispatches.clear();
resetHookState();
restoreHookState(base);
resetProactiveHealing();
// Notify user on health level transitions (green→yellow→red and back)
setLevelChangeCallback((_from, to, summary) => {
const level =
to === "red" ? "error" : to === "yellow" ? "warning" : "info";
ctx.ui.notify(summary, level);
});
s.autoStartTime = Date.now();
s.resourceVersionOnStart = readResourceVersion();
s.pendingQuickTasks = [];
s.currentUnit = null;
s.currentMilestoneId = state.activeMilestone?.id ?? null;
s.originalModelId = ctx.model?.id ?? null;
s.originalModelProvider = ctx.model?.provider ?? null;
// Register SIGTERM handler
registerSigtermHandler(base);
// Capture integration branch
if (s.currentMilestoneId) {
if (getIsolationMode() !== "none") {
captureIntegrationBranch(base, s.currentMilestoneId);
}
setActiveMilestoneId(base, s.currentMilestoneId);
}
// Guard against stale milestone branch when isolation:none (#3613).
// A prior session with isolation:branch/worktree may have left HEAD on
// milestone/<MID>. Auto-checkout back to the integration branch.
if (getIsolationMode() === "none" && nativeIsRepo(base)) {
try {
const currentBranch = nativeGetCurrentBranch(base);
if (currentBranch.startsWith("milestone/")) {
const integrationBranch = nativeDetectMainBranch(base);
nativeCheckoutBranch(base, integrationBranch);
logWarning(
"bootstrap",
`Returned to "${integrationBranch}" — HEAD was on stale milestone branch "${currentBranch}" (isolation: none does not use milestone branches).`,
);
}
} catch (err) {
logWarning(
"bootstrap",
`Could not auto-checkout from stale milestone branch: ${err instanceof Error ? err.message : String(err)}`,
);
}
}
// ── Auto-worktree setup ──
s.originalBasePath = base;
const isUnderSfWorktrees = (p) => {
// Direct layout: /.sf/worktrees/
const marker = `${pathSep}.sf${pathSep}worktrees${pathSep}`;
if (p.includes(marker)) return true;
const worktreesSuffix = `${pathSep}.sf${pathSep}worktrees`;
if (p.endsWith(worktreesSuffix)) return true;
// Symlink-resolved layout: /.sf/projects/<hash>/worktrees/
const symlinkRe = new RegExp(
`\\${pathSep}\\.sf\\${pathSep}projects\\${pathSep}[a-f0-9]+\\${pathSep}worktrees(?:\\${pathSep}|$)`,
);
return symlinkRe.test(p);
};
if (
s.currentMilestoneId &&
shouldUseWorktreeIsolation() &&
!detectWorktreeName(base) &&
!isUnderSfWorktrees(base)
) {
buildResolver().enterMilestone(s.currentMilestoneId, {
notify: ctx.ui.notify.bind(ctx.ui),
});
if (s.basePath !== base) {
// Successfully entered worktree — re-register SIGTERM handler at original base
registerSigtermHandler(s.originalBasePath);
}
}
// ── DB lifecycle ──
const sfDbPath = resolveProjectRootDbPath(s.basePath);
const sfDirPath = join(s.basePath, ".sf");
if (existsSync(sfDirPath) && !existsSync(sfDbPath)) {
const hasDecisions = existsSync(join(sfDirPath, "DECISIONS.md"));
const hasRequirements = existsSync(join(sfDirPath, "REQUIREMENTS.md"));
const hasMilestones = existsSync(join(sfDirPath, "milestones"));
try {
const { openDatabase: openDb } = await import("./sf-db.js");
openDb(sfDbPath);
if (hasDecisions || hasRequirements || hasMilestones) {
const { migrateFromMarkdown } = await import("./md-importer.js");
migrateFromMarkdown(s.basePath);
}
} catch (err) {
logError("engine", `auto-migration failed: ${err.message}`);
}
}
if (existsSync(sfDbPath) && !isDbAvailable()) {
try {
const { openDatabase: openDb } = await import("./sf-db.js");
openDb(sfDbPath);
} catch (err) {
logError("engine", `failed to open existing database: ${err.message}`);
}
}
// Gate: abort bootstrap if the DB file exists but the provider is
// still unavailable after both open attempts above. Without this,
// autonomous mode starts but every sf_task_complete / sf_slice_complete
// call returns "db_unavailable", triggering artifact-retry which
// re-dispatches the same task — producing an infinite loop (#2419).
if (existsSync(sfDbPath) && !isDbAvailable()) {
ctx.ui.notify(
"SQLite database exists but failed to open. Autonomous mode cannot proceed without a working database provider. " +
"Check for corrupt sf.db or missing native SQLite bindings.",
"error",
);
return releaseLockAndReturn();
}
// Initialize metrics
initMetrics(s.basePath);
// Initialize routing history
initRoutingHistory(s.basePath);
// Restore the model that was active when auto bootstrap began (#650, #2829).
if (startModelSnapshot) {
s.autoModeStartModel = {
provider: startModelSnapshot.provider,
id: startModelSnapshot.id,
};
}
s.manualSessionModelOverride = manualSessionOverride ?? null;
// Apply worker model override from parallel orchestrator (#worker-model).
// SF_WORKER_MODEL is injected by the coordinator when parallel.worker_model
// is configured, so parallel milestone workers use a cheaper model than the
// coordinator session (e.g. Haiku for execution, Sonnet for planning).
const workerModelOverride = process.env.SF_WORKER_MODEL;
if (workerModelOverride && process.env.SF_PARALLEL_WORKER === "1") {
const availableModels = ctx.modelRegistry.getAvailable();
const { resolveModelId } = await import("./auto-model-selection.js");
const overrideModel = resolveModelId(
workerModelOverride,
availableModels,
ctx.model?.provider,
);
if (overrideModel) {
const ok = await pi.setModel(overrideModel, {
persist: resolvePersistModelChanges(),
});
if (ok) {
// Update start model so all subsequent units use this as the baseline
s.autoModeStartModel = {
provider: overrideModel.provider,
id: overrideModel.id,
};
ctx.ui.notify(
`Worker model override: ${overrideModel.provider}/${overrideModel.id}`,
"info",
);
}
}
}
// Snapshot installed skills
if (resolveSkillDiscoveryMode() !== "off") {
snapshotSkills();
}
ctx.ui.setStatus("sf-auto", s.stepMode ? "next" : "auto");
ctx.ui.setFooter(hideFooter);
// Hide sf-health during AUTO — sf-progress is the single source of truth
// for last-commit / cost / health signal while auto is running.
safeSetWidget(ctx, "sf-health", undefined);
const modeLabel = s.stepMode ? "Assisted mode" : "Autonomous mode";
const pendingCount = (state.registry ?? []).filter(
(m) => m.status !== "complete" && m.status !== "parked",
).length;
const scopeMsg =
pendingCount > 1
? `Will loop through ${pendingCount} milestones.`
: "Will loop until milestone complete.";
ctx.ui.notify(`${modeLabel} started. ${scopeMsg}`, "info");
// Show dynamic routing status so users know upfront if models will be
// downgraded for simple tasks (#3962).
// Use the same effective logic as selectAndApplyModel: check flat-rate
// provider suppression and resolve the actual ceiling model.
const routingConfig = resolveDynamicRoutingConfig();
const startModelLabel = s.autoModeStartModel
? `${s.autoModeStartModel.provider}/${s.autoModeStartModel.id}`
: ctx.model
? `${ctx.model.provider}/${ctx.model.id}`
: "default";
// Flat-rate providers (e.g. GitHub Copilot, claude-code, user-declared
// subscription proxies, externalCli CLIs) suppress routing at dispatch
// time (#3453) — reflect that in the banner. Thread the same
// FlatRateContext used by selectAndApplyModel so user-declared
// flat-rate providers and externalCli auto-detection are respected.
const { isFlatRateProvider, buildFlatRateContext } = await import(
"./auto-model-selection.js"
);
const bannerPrefs = loadEffectiveSFPreferences()?.preferences;
const effectiveProvider =
s.autoModeStartModel?.provider ?? ctx.model?.provider;
const effectivelyEnabled =
routingConfig.enabled &&
!(
effectiveProvider &&
isFlatRateProvider(
effectiveProvider,
buildFlatRateContext(effectiveProvider, ctx, bannerPrefs),
)
);
// The actual ceiling may come from tier_models.heavy, not the start model.
const effectiveCeiling =
routingConfig.enabled && routingConfig.tier_models?.heavy
? routingConfig.tier_models.heavy
: startModelLabel;
if (effectivelyEnabled) {
ctx.ui.notify(
`Dynamic routing: enabled — simple tasks may use cheaper models (ceiling: ${effectiveCeiling})`,
"info",
);
} else {
ctx.ui.notify(
`Dynamic routing: disabled — all tasks will use ${startModelLabel}`,
"info",
);
}
updateSessionLock(
lockBase(),
"starting",
s.currentMilestoneId ?? "unknown",
);
writeLock(lockBase(), "starting", s.currentMilestoneId ?? "unknown");
// Secrets collection gate
const mid = state.activeMilestone.id;
try {
const manifestStatus = await getManifestStatus(
base,
mid,
s.originalBasePath || base,
);
if (manifestStatus && manifestStatus.pending.length > 0) {
const result = await collectSecretsFromManifest(base, mid, ctx);
if (
result &&
result.applied &&
result.skipped &&
result.existingSkipped
) {
ctx.ui.notify(
`Secrets collected: ${result.applied.length} applied, ${result.skipped.length} skipped, ${result.existingSkipped.length} already set.`,
"info",
);
} else {
ctx.ui.notify("Secrets collection skipped.", "info");
}
}
} catch (err) {
ctx.ui.notify(
`Secrets collection error: ${err instanceof Error ? err.message : String(err)}. Continuing with next task.`,
"warning",
);
}
// Self-heal: remove stale .git/index.lock
try {
const gitLockFile = join(base, ".git", "index.lock");
if (existsSync(gitLockFile)) {
const lockAge = Date.now() - statSync(gitLockFile).mtimeMs;
if (lockAge > 60_000) {
unlinkSync(gitLockFile);
ctx.ui.notify(
"Removed stale .git/index.lock from prior crash.",
"info",
);
}
}
} catch (e) {
debugLog("git-lock-cleanup-failed", {
error: e instanceof Error ? e.message : String(e),
});
}
// Pre-flight: validate milestone queue
try {
const msDir = join(base, ".sf", "milestones");
if (existsSync(msDir)) {
const milestoneIds = readdirSync(msDir, { withFileTypes: true })
.filter((d) => d.isDirectory() && /^M\d{3}/.test(d.name))
.map((d) => d.name.match(/^(M\d{3})/)?.[1] ?? d.name);
if (milestoneIds.length > 1) {
const issues = [];
for (const id of milestoneIds) {
// Skip completed/parked milestones — a leftover CONTEXT-DRAFT.md
// on a finished milestone is harmless residue, not an actionable warning.
if (isDbAvailable()) {
const ms = getMilestone(id);
if (ms?.status === "complete" || ms?.status === "parked")
continue;
}
const draft = resolveMilestoneFile(base, id, "CONTEXT-DRAFT");
if (draft)
issues.push(
`${id}: has CONTEXT-DRAFT.md (will pause for discussion)`,
);
}
if (issues.length > 0) {
ctx.ui.notify(
`Pre-flight: ${milestoneIds.length} milestones queued.\n${issues.map((i) => `${i}`).join("\n")}`,
"warning",
);
} else {
ctx.ui.notify(
`Pre-flight: ${milestoneIds.length} milestones queued. All have full context.`,
"info",
);
}
}
}
} catch (err) {
/* non-fatal */
logWarning(
"engine",
`preflight validation failed: ${err instanceof Error ? err.message : String(err)}`,
);
}
return true;
} catch (err) {
releaseSessionLock(base);
clearLock(base);
throw err;
}
}