refactor: decompose auto.ts into 6 focused modules (#1088)

Extract 6 cohesive modules from the 3,476-line auto.ts god file,
reducing it to 1,732 lines while preserving all external import paths.

New modules:
- auto-timers.ts (223 lines): Unit supervision timers — soft timeout,
  idle watchdog, hard timeout, context-pressure monitor
- auto-idempotency.ts (150 lines): Completed-key checks, skip loop
  detection, phantom loop handling, fallback persistence
- auto-stuck-detection.ts (220 lines): Dispatch count tracking,
  lifetime cap, MAX_UNIT_DISPATCHES loop detection, stub recovery.
  Uses return values instead of calling stopAuto/dispatchNextUnit.
- auto-verification.ts (195 lines): Post-unit typecheck/lint/test gate,
  runtime error capture, dependency audit, auto-fix retry logic
- auto-post-unit.ts (585 lines): Split into postUnitPreVerification
  and postUnitPostVerification — commit, doctor, state rebuild,
  worktree sync, DB dual-write, hooks, triage, quick-tasks
- auto-start.ts (472 lines): Fresh session bootstrap — git/state init,
  crash lock detection, debug init, worktree setup, DB lifecycle

All extracted functions receive AutoSession + context as parameters.
No circular dependencies — new modules import from leaf dependencies
only, never from ./auto.js. All public exports from auto.ts are
preserved so external import paths continue to work unchanged.

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
TÂCHES 2026-03-17 22:26:05 -06:00 committed by GitHub
parent 05fa939c11
commit 25d5f60836
7 changed files with 1966 additions and 1868 deletions

View file

@ -0,0 +1,150 @@
/**
* Idempotency checks for auto-mode unit dispatch.
*
* Handles completed-key membership, artifact cross-validation,
* consecutive skip counting, phantom skip loop detection, key eviction,
* and fallback persistence.
*
* Extracted from dispatchNextUnit() in auto.ts. Pure decision logic
* with set mutations does NOT call dispatchNextUnit or stopAuto.
*/
import { invalidateAllCaches } from "./cache.js";
import {
verifyExpectedArtifact,
persistCompletedKey,
removePersistedKey,
} from "./auto-recovery.js";
import { resolveMilestoneFile } from "./paths.js";
import { MAX_CONSECUTIVE_SKIPS, MAX_LIFETIME_DISPATCHES } from "./auto/session.js";
import type { AutoSession } from "./auto/session.js";
export interface IdempotencyContext {
s: AutoSession;
unitType: string;
unitId: string;
basePath: string;
/** Notification callback */
notify: (message: string, level: "info" | "warning" | "error") => void;
}
export type IdempotencyResult =
| { action: "skip"; reason: string }
| { action: "rerun"; reason: string }
| { action: "proceed" }
| { action: "stop"; reason: string };
/**
* Check whether a unit should be skipped (already completed), rerun
* (stale completion record), or dispatched normally.
*
* Mutates s.completedKeySet, s.unitConsecutiveSkips, s.unitLifetimeDispatches,
* and s.recentlyEvictedKeys as needed.
*/
export function checkIdempotency(ictx: IdempotencyContext): IdempotencyResult {
const { s, unitType, unitId, basePath, notify } = ictx;
const idempotencyKey = `${unitType}/${unitId}`;
// ── Primary path: key exists in completed set ──
if (s.completedKeySet.has(idempotencyKey)) {
const artifactExists = verifyExpectedArtifact(unitType, unitId, basePath);
if (artifactExists) {
// Guard against infinite skip loops
const skipCount = (s.unitConsecutiveSkips.get(idempotencyKey) ?? 0) + 1;
s.unitConsecutiveSkips.set(idempotencyKey, skipCount);
if (skipCount > MAX_CONSECUTIVE_SKIPS) {
// Cross-check: verify the unit's milestone is still active (#790)
const skippedMid = unitId.split("/")[0];
const skippedMilestoneComplete = skippedMid
? !!resolveMilestoneFile(basePath, skippedMid, "SUMMARY")
: false;
if (skippedMilestoneComplete) {
s.unitConsecutiveSkips.delete(idempotencyKey);
invalidateAllCaches();
notify(
`Phantom skip loop cleared: ${unitType} ${unitId} belongs to completed milestone ${skippedMid}. Re-dispatching from fresh state.`,
"info",
);
return { action: "skip", reason: "phantom-loop-cleared" };
}
s.unitConsecutiveSkips.delete(idempotencyKey);
s.completedKeySet.delete(idempotencyKey);
s.recentlyEvictedKeys.add(idempotencyKey);
removePersistedKey(basePath, idempotencyKey);
invalidateAllCaches();
notify(
`Skip loop detected: ${unitType} ${unitId} skipped ${skipCount} times without advancing. Evicting completion record and forcing reconciliation.`,
"warning",
);
return { action: "skip", reason: "evicted" };
}
// Count toward lifetime cap
const lifeSkip = (s.unitLifetimeDispatches.get(idempotencyKey) ?? 0) + 1;
s.unitLifetimeDispatches.set(idempotencyKey, lifeSkip);
if (lifeSkip > MAX_LIFETIME_DISPATCHES) {
return { action: "stop", reason: `Hard loop: ${unitType} ${unitId} (skip cycle)` };
}
notify(
`Skipping ${unitType} ${unitId} — already completed in a prior session. Advancing.`,
"info",
);
return { action: "skip", reason: "completed" };
} else {
// Stale completion record — artifact missing. Remove and re-run.
s.completedKeySet.delete(idempotencyKey);
removePersistedKey(basePath, idempotencyKey);
notify(
`Re-running ${unitType} ${unitId} — marked complete but expected artifact missing.`,
"warning",
);
return { action: "rerun", reason: "stale-key" };
}
}
// ── Fallback: key missing but artifact exists ──
if (verifyExpectedArtifact(unitType, unitId, basePath) && !s.recentlyEvictedKeys.has(idempotencyKey)) {
persistCompletedKey(basePath, idempotencyKey);
s.completedKeySet.add(idempotencyKey);
invalidateAllCaches();
// Same consecutive-skip guard as the primary path
const skipCount2 = (s.unitConsecutiveSkips.get(idempotencyKey) ?? 0) + 1;
s.unitConsecutiveSkips.set(idempotencyKey, skipCount2);
if (skipCount2 > MAX_CONSECUTIVE_SKIPS) {
const skippedMid2 = unitId.split("/")[0];
const skippedMilestoneComplete2 = skippedMid2
? !!resolveMilestoneFile(basePath, skippedMid2, "SUMMARY")
: false;
if (skippedMilestoneComplete2) {
s.unitConsecutiveSkips.delete(idempotencyKey);
invalidateAllCaches();
notify(
`Phantom skip loop cleared: ${unitType} ${unitId} belongs to completed milestone ${skippedMid2}. Re-dispatching from fresh state.`,
"info",
);
return { action: "skip", reason: "phantom-loop-cleared" };
}
s.unitConsecutiveSkips.delete(idempotencyKey);
s.completedKeySet.delete(idempotencyKey);
removePersistedKey(basePath, idempotencyKey);
invalidateAllCaches();
notify(
`Skip loop detected: ${unitType} ${unitId} skipped ${skipCount2} times without advancing. Evicting completion record and forcing reconciliation.`,
"warning",
);
return { action: "skip", reason: "evicted" };
}
// Count toward lifetime cap
const lifeSkip2 = (s.unitLifetimeDispatches.get(idempotencyKey) ?? 0) + 1;
s.unitLifetimeDispatches.set(idempotencyKey, lifeSkip2);
if (lifeSkip2 > MAX_LIFETIME_DISPATCHES) {
return { action: "stop", reason: `Hard loop: ${unitType} ${unitId} (skip cycle)` };
}
notify(
`Skipping ${unitType} ${unitId} — artifact exists but completion key was missing. Repaired and advancing.`,
"info",
);
return { action: "skip", reason: "fallback-persisted" };
}
return { action: "proceed" };
}

View file

@ -0,0 +1,586 @@
/**
* Post-unit processing for handleAgentEnd auto-commit, doctor run,
* state rebuild, worktree sync, DB dual-write, hooks, triage, and
* quick-task dispatch.
*
* Split into two functions called sequentially by handleAgentEnd with
* the verification gate between them:
* 1. postUnitPreVerification() commit, doctor, state rebuild, worktree sync, artifact verification
* 2. postUnitPostVerification() DB dual-write, hooks, triage, quick-tasks
*
* Extracted from handleAgentEnd() in auto.ts.
*/
import type { ExtensionContext, ExtensionCommandContext, ExtensionAPI } from "@gsd/pi-coding-agent";
import { deriveState } from "./state.js";
import { loadFile, parseSummary, resolveAllOverrides } from "./files.js";
import { loadPrompt } from "./prompt-loader.js";
import {
resolveSliceFile,
resolveTaskFile,
resolveMilestoneFile,
gsdRoot,
} from "./paths.js";
import { invalidateAllCaches } from "./cache.js";
import { closeoutUnit, type CloseoutOptions } from "./auto-unit-closeout.js";
import {
autoCommitCurrentBranch,
type TaskCommitContext,
} from "./worktree.js";
import {
verifyExpectedArtifact,
persistCompletedKey,
removePersistedKey,
} from "./auto-recovery.js";
import { writeUnitRuntimeRecord, clearUnitRuntimeRecord } from "./unit-runtime.js";
import { resolveAutoSupervisorConfig, loadEffectiveGSDPreferences } from "./preferences.js";
import { runGSDDoctor, rebuildState, summarizeDoctorIssues } from "./doctor.js";
import { recordHealthSnapshot, checkHealEscalation } from "./doctor-proactive.js";
import { syncStateToProjectRoot } from "./auto-worktree-sync.js";
import { resetRewriteCircuitBreaker } from "./auto-dispatch.js";
import { isDbAvailable } from "./gsd-db.js";
import { consumeSignal } from "./session-status-io.js";
import {
checkPostUnitHooks,
getActiveHook,
resetHookState,
isRetryPending,
consumeRetryTrigger,
persistHookState,
} from "./post-unit-hooks.js";
import { hasPendingCaptures, loadPendingCaptures, countPendingCaptures } from "./captures.js";
import { writeLock } from "./crash-recovery.js";
import { debugLog } from "./debug-logger.js";
import type { AutoSession } from "./auto/session.js";
import type { WidgetStateAccessors, AutoDashboardData } from "./auto-dashboard.js";
import {
updateProgressWidget as _updateProgressWidget,
updateSliceProgressCache,
unitVerb,
hideFooter,
} from "./auto-dashboard.js";
import { join } from "node:path";
/** Throttle STATE.md rebuilds — at most once per 30 seconds */
const STATE_REBUILD_MIN_INTERVAL_MS = 30_000;
export interface PostUnitContext {
s: AutoSession;
ctx: ExtensionContext;
pi: ExtensionAPI;
buildSnapshotOpts: (unitType: string, unitId: string) => CloseoutOptions & Record<string, unknown>;
lockBase: () => string;
stopAuto: (ctx?: ExtensionContext, pi?: ExtensionAPI, reason?: string) => Promise<void>;
pauseAuto: (ctx?: ExtensionContext, pi?: ExtensionAPI) => Promise<void>;
updateProgressWidget: (ctx: ExtensionContext, unitType: string, unitId: string, state: import("./types.js").GSDState) => void;
}
/**
* Pre-verification processing: parallel worker signal check, cache invalidation,
* auto-commit, doctor run, state rebuild, worktree sync, artifact verification.
*
* Returns "dispatched" if a signal caused stop/pause, "continue" to proceed.
*/
export async function postUnitPreVerification(pctx: PostUnitContext): Promise<"dispatched" | "continue"> {
const { s, ctx, pi, buildSnapshotOpts, stopAuto, pauseAuto } = pctx;
// ── Parallel worker signal check ──
const milestoneLock = process.env.GSD_MILESTONE_LOCK;
if (milestoneLock) {
const signal = consumeSignal(s.basePath, milestoneLock);
if (signal) {
if (signal.signal === "stop") {
await stopAuto(ctx, pi);
return "dispatched";
}
if (signal.signal === "pause") {
await pauseAuto(ctx, pi);
return "dispatched";
}
}
}
// Invalidate all caches
invalidateAllCaches();
// Small delay to let files settle
await new Promise(r => setTimeout(r, 500));
// Auto-commit
if (s.currentUnit) {
try {
let taskContext: TaskCommitContext | undefined;
if (s.currentUnit.type === "execute-task") {
const parts = s.currentUnit.id.split("/");
const [mid, sid, tid] = parts;
if (mid && sid && tid) {
const summaryPath = resolveTaskFile(s.basePath, mid, sid, tid, "SUMMARY");
if (summaryPath) {
try {
const summaryContent = await loadFile(summaryPath);
if (summaryContent) {
const summary = parseSummary(summaryContent);
taskContext = {
taskId: `${sid}/${tid}`,
taskTitle: summary.title?.replace(/^T\d+:\s*/, "") || tid,
oneLiner: summary.oneLiner || undefined,
keyFiles: summary.frontmatter.key_files?.filter(f => !f.includes("{{")) || undefined,
};
}
} catch {
// Non-fatal
}
}
}
}
const commitMsg = autoCommitCurrentBranch(s.basePath, s.currentUnit.type, s.currentUnit.id, taskContext);
if (commitMsg) {
ctx.ui.notify(`Committed: ${commitMsg.split("\n")[0]}`, "info");
}
} catch {
// Non-fatal
}
// Doctor: fix mechanical bookkeeping
try {
const scopeParts = s.currentUnit.id.split("/").slice(0, 2);
const doctorScope = scopeParts.join("/");
const sliceTerminalUnits = new Set(["complete-slice", "run-uat"]);
const effectiveFixLevel = sliceTerminalUnits.has(s.currentUnit.type) ? "all" as const : "task" as const;
const report = await runGSDDoctor(s.basePath, { fix: true, scope: doctorScope, fixLevel: effectiveFixLevel });
if (report.fixesApplied.length > 0) {
ctx.ui.notify(`Post-hook: applied ${report.fixesApplied.length} fix(es).`, "info");
}
// Proactive health tracking
const summary = summarizeDoctorIssues(report.issues);
recordHealthSnapshot(summary.errors, summary.warnings, report.fixesApplied.length);
// Check if we should escalate to LLM-assisted heal
if (summary.errors > 0) {
const unresolvedErrors = report.issues
.filter(i => i.severity === "error" && !i.fixable)
.map(i => ({ code: i.code, message: i.message, unitId: i.unitId }));
const escalation = checkHealEscalation(summary.errors, unresolvedErrors);
if (escalation.shouldEscalate) {
ctx.ui.notify(
`Doctor heal escalation: ${escalation.reason}. Dispatching LLM-assisted heal.`,
"warning",
);
try {
const { formatDoctorIssuesForPrompt, formatDoctorReport } = await import("./doctor.js");
const { dispatchDoctorHeal } = await import("./commands.js");
const actionable = report.issues.filter(i => i.severity === "error");
const reportText = formatDoctorReport(report, { scope: doctorScope, includeWarnings: true });
const structuredIssues = formatDoctorIssuesForPrompt(actionable);
dispatchDoctorHeal(pi, doctorScope, reportText, structuredIssues);
} catch {
// Non-fatal
}
}
}
} catch {
// Non-fatal
}
// Throttled STATE.md rebuild
const now = Date.now();
if (now - s.lastStateRebuildAt >= STATE_REBUILD_MIN_INTERVAL_MS) {
try {
await rebuildState(s.basePath);
s.lastStateRebuildAt = now;
autoCommitCurrentBranch(s.basePath, "state-rebuild", s.currentUnit.id);
} catch {
// Non-fatal
}
}
// Prune dead bg-shell processes
try {
const { pruneDeadProcesses } = await import("../bg-shell/process-manager.js");
pruneDeadProcesses();
} catch {
// Non-fatal
}
// Sync worktree state back to project root
if (s.originalBasePath && s.originalBasePath !== s.basePath) {
try {
syncStateToProjectRoot(s.basePath, s.originalBasePath, s.currentMilestoneId);
} catch {
// Non-fatal
}
}
// Rewrite-docs completion
if (s.currentUnit.type === "rewrite-docs") {
try {
await resolveAllOverrides(s.basePath);
resetRewriteCircuitBreaker();
ctx.ui.notify("Override(s) resolved — rewrite-docs completed.", "info");
} catch {
// Non-fatal
}
}
// Post-triage: execute actionable resolutions
if (s.currentUnit.type === "triage-captures") {
try {
const { executeTriageResolutions } = await import("./triage-resolution.js");
const state = await deriveState(s.basePath);
const mid = state.activeMilestone?.id;
const sid = state.activeSlice?.id;
if (mid && sid) {
const triageResult = executeTriageResolutions(s.basePath, mid, sid);
if (triageResult.injected > 0) {
ctx.ui.notify(
`Triage: injected ${triageResult.injected} task${triageResult.injected === 1 ? "" : "s"} into ${sid} plan.`,
"info",
);
}
if (triageResult.replanned > 0) {
ctx.ui.notify(
`Triage: replan trigger written for ${sid} — next dispatch will enter replanning.`,
"info",
);
}
if (triageResult.quickTasks.length > 0) {
for (const qt of triageResult.quickTasks) {
s.pendingQuickTasks.push(qt);
}
ctx.ui.notify(
`Triage: ${triageResult.quickTasks.length} quick-task${triageResult.quickTasks.length === 1 ? "" : "s"} queued for execution.`,
"info",
);
}
for (const action of triageResult.actions) {
process.stderr.write(`gsd-triage: ${action}\n`);
}
}
} catch (err) {
process.stderr.write(`gsd-triage: resolution execution failed: ${(err as Error).message}\n`);
}
}
// Artifact verification and completion persistence
let triggerArtifactVerified = false;
if (!s.currentUnit.type.startsWith("hook/")) {
try {
triggerArtifactVerified = verifyExpectedArtifact(s.currentUnit.type, s.currentUnit.id, s.basePath);
if (triggerArtifactVerified) {
const completionKey = `${s.currentUnit.type}/${s.currentUnit.id}`;
if (!s.completedKeySet.has(completionKey)) {
persistCompletedKey(s.basePath, completionKey);
s.completedKeySet.add(completionKey);
}
invalidateAllCaches();
}
} catch {
// Non-fatal
}
} else {
// Hook unit completed — finalize its runtime record
try {
writeUnitRuntimeRecord(s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt, {
phase: "finalized",
progressCount: 1,
lastProgressKind: "hook-completed",
});
clearUnitRuntimeRecord(s.basePath, s.currentUnit.type, s.currentUnit.id);
} catch {
// Non-fatal
}
}
}
return "continue";
}
/**
* Post-verification processing: DB dual-write, post-unit hooks, triage
* capture dispatch, quick-task dispatch.
*
* Returns:
* - "dispatched" a hook/triage/quick-task was dispatched (sendMessage sent)
* - "continue" proceed to normal dispatchNextUnit
* - "step-wizard" step mode, show wizard instead
* - "stopped" stopAuto was called
*/
export async function postUnitPostVerification(pctx: PostUnitContext): Promise<"dispatched" | "continue" | "step-wizard" | "stopped"> {
const { s, ctx, pi, buildSnapshotOpts, lockBase, stopAuto, pauseAuto, updateProgressWidget } = pctx;
// ── DB dual-write ──
if (isDbAvailable()) {
try {
const { migrateFromMarkdown } = await import("./md-importer.js");
migrateFromMarkdown(s.basePath);
} catch (err) {
process.stderr.write(`gsd-db: re-import failed: ${(err as Error).message}\n`);
}
}
// ── Post-unit hooks ──
if (s.currentUnit && !s.stepMode) {
const hookUnit = checkPostUnitHooks(s.currentUnit.type, s.currentUnit.id, s.basePath);
if (hookUnit) {
const hookStartedAt = Date.now();
if (s.currentUnit) {
await closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt, buildSnapshotOpts(s.currentUnit.type, s.currentUnit.id));
}
s.currentUnit = { type: hookUnit.unitType, id: hookUnit.unitId, startedAt: hookStartedAt };
writeUnitRuntimeRecord(s.basePath, hookUnit.unitType, hookUnit.unitId, hookStartedAt, {
phase: "dispatched",
wrapupWarningSent: false,
timeoutAt: null,
lastProgressAt: hookStartedAt,
progressCount: 0,
lastProgressKind: "dispatch",
});
const state = await deriveState(s.basePath);
updateProgressWidget(ctx, hookUnit.unitType, hookUnit.unitId, state);
const hookState = getActiveHook();
ctx.ui.notify(
`Running post-unit hook: ${hookUnit.hookName} (cycle ${hookState?.cycle ?? 1})`,
"info",
);
// Switch model if the hook specifies one
if (hookUnit.model) {
const availableModels = ctx.modelRegistry.getAvailable();
const match = availableModels.find(m =>
m.id === hookUnit.model || `${m.provider}/${m.id}` === hookUnit.model,
);
if (match) {
try {
await pi.setModel(match);
} catch { /* non-fatal */ }
}
}
const result = await s.cmdCtx!.newSession();
if (result.cancelled) {
resetHookState();
await stopAuto(ctx, pi, "Hook session cancelled");
return "stopped";
}
const sessionFile = ctx.sessionManager.getSessionFile();
writeLock(lockBase(), hookUnit.unitType, hookUnit.unitId, s.completedUnits.length, sessionFile);
persistHookState(s.basePath);
// Start supervision timers for hook units
const supervisor = resolveAutoSupervisorConfig();
const hookHardTimeoutMs = (supervisor.hard_timeout_minutes ?? 30) * 60 * 1000;
s.unitTimeoutHandle = setTimeout(async () => {
s.unitTimeoutHandle = null;
if (!s.active) return;
if (s.currentUnit) {
writeUnitRuntimeRecord(s.basePath, hookUnit.unitType, hookUnit.unitId, s.currentUnit.startedAt, {
phase: "timeout",
timeoutAt: Date.now(),
});
}
ctx.ui.notify(
`Hook ${hookUnit.hookName} exceeded ${supervisor.hard_timeout_minutes ?? 30}min timeout. Pausing auto-mode.`,
"warning",
);
resetHookState();
await pauseAuto(ctx, pi);
}, hookHardTimeoutMs);
if (!s.active) return "stopped";
pi.sendMessage(
{ customType: "gsd-auto", content: hookUnit.prompt, display: s.verbose },
{ triggerTurn: true },
);
return "dispatched";
}
// Check if a hook requested a retry of the trigger unit
if (isRetryPending()) {
const trigger = consumeRetryTrigger();
if (trigger) {
const triggerKey = `${trigger.unitType}/${trigger.unitId}`;
s.completedKeySet.delete(triggerKey);
removePersistedKey(s.basePath, triggerKey);
ctx.ui.notify(
`Hook requested retry of ${trigger.unitType} ${trigger.unitId}.`,
"info",
);
// Fall through to normal dispatch
}
}
}
// ── Triage check ──
if (
!s.stepMode &&
s.currentUnit &&
!s.currentUnit.type.startsWith("hook/") &&
s.currentUnit.type !== "triage-captures" &&
s.currentUnit.type !== "quick-task"
) {
try {
if (hasPendingCaptures(s.basePath)) {
const pending = loadPendingCaptures(s.basePath);
if (pending.length > 0) {
const state = await deriveState(s.basePath);
const mid = state.activeMilestone?.id;
const sid = state.activeSlice?.id;
if (mid && sid) {
let currentPlan = "";
let roadmapContext = "";
const planFile = resolveSliceFile(s.basePath, mid, sid, "PLAN");
if (planFile) currentPlan = (await loadFile(planFile)) ?? "";
const roadmapFile = resolveMilestoneFile(s.basePath, mid, "ROADMAP");
if (roadmapFile) roadmapContext = (await loadFile(roadmapFile)) ?? "";
const capturesList = pending.map(c =>
`- **${c.id}**: "${c.text}" (captured: ${c.timestamp})`
).join("\n");
const prompt = loadPrompt("triage-captures", {
pendingCaptures: capturesList,
currentPlan: currentPlan || "(no active slice plan)",
roadmapContext: roadmapContext || "(no active roadmap)",
});
ctx.ui.notify(
`Triaging ${pending.length} pending capture${pending.length === 1 ? "" : "s"}...`,
"info",
);
if (s.currentUnit) {
await closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt);
}
const triageUnitType = "triage-captures";
const triageUnitId = `${mid}/${sid}/triage`;
const triageStartedAt = Date.now();
s.currentUnit = { type: triageUnitType, id: triageUnitId, startedAt: triageStartedAt };
writeUnitRuntimeRecord(s.basePath, triageUnitType, triageUnitId, triageStartedAt, {
phase: "dispatched",
wrapupWarningSent: false,
timeoutAt: null,
lastProgressAt: triageStartedAt,
progressCount: 0,
lastProgressKind: "dispatch",
});
updateProgressWidget(ctx, triageUnitType, triageUnitId, state);
const result = await s.cmdCtx!.newSession();
if (result.cancelled) {
await stopAuto(ctx, pi);
return "stopped";
}
const sessionFile = ctx.sessionManager.getSessionFile();
writeLock(lockBase(), triageUnitType, triageUnitId, s.completedUnits.length, sessionFile);
const supervisor = resolveAutoSupervisorConfig();
const triageTimeoutMs = (supervisor.hard_timeout_minutes ?? 30) * 60 * 1000;
s.unitTimeoutHandle = setTimeout(async () => {
s.unitTimeoutHandle = null;
if (!s.active) return;
ctx.ui.notify(
`Triage unit exceeded timeout. Pausing auto-mode.`,
"warning",
);
await pauseAuto(ctx, pi);
}, triageTimeoutMs);
if (!s.active) return "stopped";
pi.sendMessage(
{ customType: "gsd-auto", content: prompt, display: s.verbose },
{ triggerTurn: true },
);
return "dispatched";
}
}
}
} catch {
// Triage check failure is non-fatal
}
}
// ── Quick-task dispatch ──
if (
!s.stepMode &&
s.pendingQuickTasks.length > 0 &&
s.currentUnit &&
s.currentUnit.type !== "quick-task"
) {
try {
const capture = s.pendingQuickTasks.shift()!;
const { buildQuickTaskPrompt } = await import("./triage-resolution.js");
const { markCaptureExecuted } = await import("./captures.js");
const prompt = buildQuickTaskPrompt(capture);
ctx.ui.notify(
`Executing quick-task: ${capture.id} — "${capture.text}"`,
"info",
);
if (s.currentUnit) {
await closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt);
}
const qtUnitType = "quick-task";
const qtUnitId = `${s.currentMilestoneId}/${capture.id}`;
const qtStartedAt = Date.now();
s.currentUnit = { type: qtUnitType, id: qtUnitId, startedAt: qtStartedAt };
writeUnitRuntimeRecord(s.basePath, qtUnitType, qtUnitId, qtStartedAt, {
phase: "dispatched",
wrapupWarningSent: false,
timeoutAt: null,
lastProgressAt: qtStartedAt,
progressCount: 0,
lastProgressKind: "dispatch",
});
const state = await deriveState(s.basePath);
updateProgressWidget(ctx, qtUnitType, qtUnitId, state);
const result = await s.cmdCtx!.newSession();
if (result.cancelled) {
await stopAuto(ctx, pi);
return "stopped";
}
const sessionFile = ctx.sessionManager.getSessionFile();
writeLock(lockBase(), qtUnitType, qtUnitId, s.completedUnits.length, sessionFile);
markCaptureExecuted(s.basePath, capture.id);
const supervisor = resolveAutoSupervisorConfig();
const qtTimeoutMs = (supervisor.hard_timeout_minutes ?? 30) * 60 * 1000;
s.unitTimeoutHandle = setTimeout(async () => {
s.unitTimeoutHandle = null;
if (!s.active) return;
ctx.ui.notify(
`Quick-task ${capture.id} exceeded timeout. Pausing auto-mode.`,
"warning",
);
await pauseAuto(ctx, pi);
}, qtTimeoutMs);
if (!s.active) return "stopped";
pi.sendMessage(
{ customType: "gsd-auto", content: prompt, display: s.verbose },
{ triggerTurn: true },
);
return "dispatched";
} catch {
// Non-fatal — proceed to normal dispatch
}
}
// Step mode → show wizard instead of dispatch
if (s.stepMode) {
return "step-wizard";
}
return "continue";
}

View file

@ -0,0 +1,472 @@
/**
* Auto-mode bootstrap fresh-start initialization path.
*
* Git/state bootstrap, crash lock detection, debug init, worktree recovery,
* guided flow gate, session init, worktree lifecycle, DB lifecycle,
* preflight validation.
*
* Extracted from startAuto() in auto.ts. The resume path (s.paused)
* remains in auto.ts this module handles only the fresh-start path.
*/
import type {
ExtensionAPI,
ExtensionCommandContext,
} from "@gsd/pi-coding-agent";
import { deriveState } from "./state.js";
import { loadFile, getManifestStatus } from "./files.js";
import { loadEffectiveGSDPreferences, resolveSkillDiscoveryMode, getIsolationMode } from "./preferences.js";
import { collectSecretsFromManifest } from "../get-secrets-from-user.js";
import {
gsdRoot,
resolveMilestoneFile,
milestonesDir,
} from "./paths.js";
import { invalidateAllCaches } from "./cache.js";
import { synthesizeCrashRecovery } from "./session-forensics.js";
import { writeLock, clearLock, readCrashLock, formatCrashInfo, isLockProcessAlive } from "./crash-recovery.js";
import { selfHealRuntimeRecords } from "./auto-recovery.js";
import { ensureGitignore, untrackRuntimeFiles } from "./gitignore.js";
import { nativeIsRepo, nativeInit, nativeAddAll, nativeCommit } from "./native-git-bridge.js";
import { GitServiceImpl } from "./git-service.js";
import {
captureIntegrationBranch,
detectWorktreeName,
setActiveMilestoneId,
} from "./worktree.js";
import {
createAutoWorktree,
enterAutoWorktree,
getAutoWorktreePath,
isInAutoWorktree,
} from "./auto-worktree.js";
import { readResourceVersion } from "./auto-worktree-sync.js";
import { initMetrics, getLedger } from "./metrics.js";
import { initRoutingHistory } from "./routing-history.js";
import { restoreHookState, resetHookState, clearPersistedHookState } from "./post-unit-hooks.js";
import { resetProactiveHealing } from "./doctor-proactive.js";
import { snapshotSkills } from "./skill-discovery.js";
import { isDbAvailable } from "./gsd-db.js";
import { loadPersistedKeys } from "./auto-recovery.js";
import { hideFooter } from "./auto-dashboard.js";
import { debugLog, enableDebug, isDebugEnabled, getDebugLogPath } from "./debug-logger.js";
import type { AutoSession } from "./auto/session.js";
import { existsSync, mkdirSync, readdirSync, statSync, unlinkSync } from "node:fs";
import { join } from "node:path";
import { sep as pathSep } from "node:path";
export interface BootstrapDeps {
shouldUseWorktreeIsolation: () => boolean;
registerSigtermHandler: (basePath: string) => void;
lockBase: () => string;
}
/**
* Bootstrap a fresh auto-mode session. Handles everything from git init
* through secrets collection, returning when ready for the first
* dispatchNextUnit call.
*
* Returns false if the bootstrap aborted (e.g., guided flow returned,
* concurrent session detected). Returns true when ready to dispatch.
*/
export async function bootstrapAutoSession(
s: AutoSession,
ctx: ExtensionCommandContext,
pi: ExtensionAPI,
base: string,
verboseMode: boolean,
requestedStepMode: boolean,
deps: BootstrapDeps,
): Promise<boolean> {
const { shouldUseWorktreeIsolation, registerSigtermHandler, lockBase } = deps;
// Ensure git repo exists
if (!nativeIsRepo(base)) {
const mainBranch = loadEffectiveGSDPreferences()?.preferences?.git?.main_branch || "main";
nativeInit(base, mainBranch);
}
// Ensure .gitignore has baseline patterns
const gitPrefs = loadEffectiveGSDPreferences()?.preferences?.git;
const commitDocs = gitPrefs?.commit_docs;
const manageGitignore = gitPrefs?.manage_gitignore;
ensureGitignore(base, { commitDocs, manageGitignore });
if (manageGitignore !== false) untrackRuntimeFiles(base);
// Bootstrap .gsd/ if it doesn't exist
const gsdDir = join(base, ".gsd");
if (!existsSync(gsdDir)) {
mkdirSync(join(gsdDir, "milestones"), { recursive: true });
if (commitDocs !== false) {
try {
nativeAddAll(base);
nativeCommit(base, "chore: init gsd");
} catch { /* nothing to commit */ }
}
}
// Initialize GitServiceImpl
s.gitService = new GitServiceImpl(s.basePath, loadEffectiveGSDPreferences()?.preferences?.git ?? {});
// Check for crash from previous session
const crashLock = readCrashLock(base);
if (crashLock) {
if (isLockProcessAlive(crashLock)) {
ctx.ui.notify(
`Another auto-mode session (PID ${crashLock.pid}) appears to be running.\nStop it with \`kill ${crashLock.pid}\` before starting a new session.`,
"error",
);
return false;
}
const recoveredMid = crashLock.unitId.split("/")[0];
const milestoneAlreadyComplete = recoveredMid
? !!resolveMilestoneFile(base, recoveredMid, "SUMMARY")
: false;
if (milestoneAlreadyComplete) {
ctx.ui.notify(
`Crash recovery: discarding stale context for ${crashLock.unitId} — milestone ${recoveredMid} is already complete.`,
"info",
);
} else {
const activityDir = join(gsdRoot(base), "activity");
const recovery = synthesizeCrashRecovery(
base, crashLock.unitType, crashLock.unitId,
crashLock.sessionFile, activityDir,
);
if (recovery && recovery.trace.toolCallCount > 0) {
s.pendingCrashRecovery = recovery.prompt;
ctx.ui.notify(
`${formatCrashInfo(crashLock)}\nRecovered ${recovery.trace.toolCallCount} tool calls from crashed session. Resuming with full context.`,
"warning",
);
} else {
ctx.ui.notify(
`${formatCrashInfo(crashLock)}\nNo session data recovered. Resuming from disk state.`,
"warning",
);
}
}
clearLock(base);
}
// ── Debug mode ──
if (!isDebugEnabled() && process.env.GSD_DEBUG === "1") {
enableDebug(base);
}
if (isDebugEnabled()) {
const { isNativeParserAvailable } = await import("./native-parser-bridge.js");
debugLog("debug-start", {
platform: process.platform,
arch: process.arch,
node: process.version,
model: ctx.model?.id ?? "unknown",
provider: ctx.model?.provider ?? "unknown",
nativeParser: isNativeParserAvailable(),
cwd: base,
});
ctx.ui.notify(`Debug logging enabled → ${getDebugLogPath()}`, "info");
}
// Invalidate caches before initial state derivation
invalidateAllCaches();
// Clean stale runtime unit files for completed milestones (#887)
try {
const runtimeUnitsDir = join(gsdRoot(base), "runtime", "units");
if (existsSync(runtimeUnitsDir)) {
for (const file of readdirSync(runtimeUnitsDir)) {
if (!file.endsWith(".json")) continue;
const midMatch = file.match(/(M\d+(?:-[a-z0-9]{6})?)/);
if (!midMatch) continue;
const mid = midMatch[1];
if (resolveMilestoneFile(base, mid, "SUMMARY")) {
try { unlinkSync(join(runtimeUnitsDir, file)); } catch (e) { debugLog("stale-unit-cleanup-failed", { file, error: e instanceof Error ? e.message : String(e) }); }
}
}
}
} catch (e) { debugLog("stale-unit-dir-cleanup-failed", { error: e instanceof Error ? e.message : String(e) }); }
let state = await deriveState(base);
// Stale worktree state recovery (#654)
if (
state.activeMilestone &&
shouldUseWorktreeIsolation() &&
!detectWorktreeName(base)
) {
const wtPath = getAutoWorktreePath(base, state.activeMilestone.id);
if (wtPath) {
state = await deriveState(wtPath);
}
}
// Milestone branch recovery (#601)
let hasSurvivorBranch = false;
if (
state.activeMilestone &&
(state.phase === "pre-planning" || state.phase === "needs-discussion") &&
shouldUseWorktreeIsolation() &&
!detectWorktreeName(base) &&
!base.includes(`${pathSep}.gsd${pathSep}worktrees${pathSep}`)
) {
const milestoneBranch = `milestone/${state.activeMilestone.id}`;
const { nativeBranchExists } = await import("./native-git-bridge.js");
hasSurvivorBranch = nativeBranchExists(base, milestoneBranch);
if (hasSurvivorBranch) {
ctx.ui.notify(
`Found prior session branch ${milestoneBranch}. Resuming.`,
"info",
);
}
}
if (!hasSurvivorBranch) {
// No active work — start a new milestone via discuss flow
if (!state.activeMilestone || state.phase === "complete") {
const { showSmartEntry } = await import("./guided-flow.js");
await showSmartEntry(ctx, pi, base, { step: requestedStepMode });
invalidateAllCaches();
const postState = await deriveState(base);
if (postState.activeMilestone && postState.phase !== "complete" && postState.phase !== "pre-planning") {
state = postState;
} else if (postState.activeMilestone && postState.phase === "pre-planning") {
const contextFile = resolveMilestoneFile(base, postState.activeMilestone.id, "CONTEXT");
const hasContext = !!(contextFile && await loadFile(contextFile));
if (hasContext) {
state = postState;
} else {
ctx.ui.notify(
"Discussion completed but no milestone context was written. Run /gsd to try the discussion again, or /gsd auto after creating the milestone manually.",
"warning",
);
return false;
}
} else {
return false;
}
}
// Active milestone exists but has no roadmap
if (state.phase === "pre-planning") {
const mid = state.activeMilestone!.id;
const contextFile = resolveMilestoneFile(base, mid, "CONTEXT");
const hasContext = !!(contextFile && await loadFile(contextFile));
if (!hasContext) {
const { showSmartEntry } = await import("./guided-flow.js");
await showSmartEntry(ctx, pi, base, { step: requestedStepMode });
invalidateAllCaches();
const postState = await deriveState(base);
if (postState.activeMilestone && postState.phase !== "pre-planning") {
state = postState;
} else {
ctx.ui.notify(
"Discussion completed but milestone context is still missing. Run /gsd to try again.",
"warning",
);
return false;
}
}
}
}
// Unreachable safety check
if (!state.activeMilestone) {
const { showSmartEntry } = await import("./guided-flow.js");
await showSmartEntry(ctx, pi, base, { step: requestedStepMode });
return false;
}
// ── Initialize session state ──
s.active = true;
s.stepMode = requestedStepMode;
s.verbose = verboseMode;
s.cmdCtx = ctx;
s.basePath = base;
s.unitDispatchCount.clear();
s.unitRecoveryCount.clear();
s.unitConsecutiveSkips.clear();
s.lastBudgetAlertLevel = 0;
s.unitLifetimeDispatches.clear();
s.completedKeySet.clear();
loadPersistedKeys(base, s.completedKeySet);
resetHookState();
restoreHookState(base);
resetProactiveHealing();
s.autoStartTime = Date.now();
s.resourceVersionOnStart = readResourceVersion();
s.completedUnits = [];
s.pendingQuickTasks = [];
s.currentUnit = null;
s.currentMilestoneId = state.activeMilestone?.id ?? null;
s.originalModelId = ctx.model?.id ?? null;
s.originalModelProvider = ctx.model?.provider ?? null;
// Register SIGTERM handler
registerSigtermHandler(base);
// Capture integration branch
if (s.currentMilestoneId) {
if (getIsolationMode() !== "none") {
captureIntegrationBranch(base, s.currentMilestoneId, { commitDocs });
}
setActiveMilestoneId(base, s.currentMilestoneId);
}
// ── Auto-worktree setup ──
s.originalBasePath = base;
const isUnderGsdWorktrees = (p: string): boolean => {
const marker = `${pathSep}.gsd${pathSep}worktrees${pathSep}`;
if (p.includes(marker)) return true;
const worktreesSuffix = `${pathSep}.gsd${pathSep}worktrees`;
return p.endsWith(worktreesSuffix);
};
if (s.currentMilestoneId && shouldUseWorktreeIsolation() && !detectWorktreeName(base) && !isUnderGsdWorktrees(base)) {
try {
const existingWtPath = getAutoWorktreePath(base, s.currentMilestoneId);
if (existingWtPath) {
const wtPath = enterAutoWorktree(base, s.currentMilestoneId);
s.basePath = wtPath;
s.gitService = new GitServiceImpl(s.basePath, loadEffectiveGSDPreferences()?.preferences?.git ?? {});
ctx.ui.notify(`Entered auto-worktree at ${wtPath}`, "info");
} else {
const wtPath = createAutoWorktree(base, s.currentMilestoneId);
s.basePath = wtPath;
s.gitService = new GitServiceImpl(s.basePath, loadEffectiveGSDPreferences()?.preferences?.git ?? {});
ctx.ui.notify(`Created auto-worktree at ${wtPath}`, "info");
}
registerSigtermHandler(s.originalBasePath);
// Load completed keys from BOTH locations
if (s.basePath !== s.originalBasePath) {
loadPersistedKeys(s.basePath, s.completedKeySet);
}
} catch (err) {
ctx.ui.notify(
`Auto-worktree setup failed: ${err instanceof Error ? err.message : String(err)}. Continuing in project root.`,
"warning",
);
}
}
// ── DB lifecycle ──
const gsdDbPath = join(s.basePath, ".gsd", "gsd.db");
const gsdDirPath = join(s.basePath, ".gsd");
if (existsSync(gsdDirPath) && !existsSync(gsdDbPath)) {
const hasDecisions = existsSync(join(gsdDirPath, "DECISIONS.md"));
const hasRequirements = existsSync(join(gsdDirPath, "REQUIREMENTS.md"));
const hasMilestones = existsSync(join(gsdDirPath, "milestones"));
if (hasDecisions || hasRequirements || hasMilestones) {
try {
const { openDatabase: openDb } = await import("./gsd-db.js");
const { migrateFromMarkdown } = await import("./md-importer.js");
openDb(gsdDbPath);
migrateFromMarkdown(s.basePath);
} catch (err) {
process.stderr.write(`gsd-migrate: auto-migration failed: ${(err as Error).message}\n`);
}
}
}
if (existsSync(gsdDbPath) && !isDbAvailable()) {
try {
const { openDatabase: openDb } = await import("./gsd-db.js");
openDb(gsdDbPath);
} catch (err) {
process.stderr.write(`gsd-db: failed to open existing database: ${(err as Error).message}\n`);
}
}
// Initialize metrics
initMetrics(s.basePath);
// Initialize routing history
initRoutingHistory(s.basePath);
// Capture session's model at auto-mode start (#650)
const currentModel = ctx.model;
if (currentModel) {
s.autoModeStartModel = { provider: currentModel.provider, id: currentModel.id };
}
// Snapshot installed skills
if (resolveSkillDiscoveryMode() !== "off") {
snapshotSkills();
}
ctx.ui.setStatus("gsd-auto", s.stepMode ? "next" : "auto");
ctx.ui.setFooter(hideFooter);
const modeLabel = s.stepMode ? "Step-mode" : "Auto-mode";
const pendingCount = state.registry.filter(m => m.status !== 'complete').length;
const scopeMsg = pendingCount > 1
? `Will loop through ${pendingCount} milestones.`
: "Will loop until milestone complete.";
ctx.ui.notify(`${modeLabel} started. ${scopeMsg}`, "info");
// Write initial lock file
writeLock(lockBase(), "starting", s.currentMilestoneId ?? "unknown", 0);
// Secrets collection gate
const mid = state.activeMilestone!.id;
try {
const manifestStatus = await getManifestStatus(base, mid);
if (manifestStatus && manifestStatus.pending.length > 0) {
const result = await collectSecretsFromManifest(base, mid, ctx);
if (result && result.applied && result.skipped && result.existingSkipped) {
ctx.ui.notify(
`Secrets collected: ${result.applied.length} applied, ${result.skipped.length} skipped, ${result.existingSkipped.length} already set.`,
"info",
);
} else {
ctx.ui.notify("Secrets collection skipped.", "info");
}
}
} catch (err) {
ctx.ui.notify(
`Secrets collection error: ${err instanceof Error ? err.message : String(err)}. Continuing with next task.`,
"warning",
);
}
// Self-heal: clear stale runtime records
await selfHealRuntimeRecords(s.basePath, ctx, s.completedKeySet);
// Self-heal: remove stale .git/index.lock
try {
const gitLockFile = join(base, ".git", "index.lock");
if (existsSync(gitLockFile)) {
const lockAge = Date.now() - statSync(gitLockFile).mtimeMs;
if (lockAge > 60_000) {
unlinkSync(gitLockFile);
ctx.ui.notify("Removed stale .git/index.lock from prior crash.", "info");
}
}
} catch (e) { debugLog("git-lock-cleanup-failed", { error: e instanceof Error ? e.message : String(e) }); }
// Pre-flight: validate milestone queue
try {
const msDir = join(base, ".gsd", "milestones");
if (existsSync(msDir)) {
const milestoneIds = readdirSync(msDir, { withFileTypes: true })
.filter(d => d.isDirectory() && /^M\d{3}/.test(d.name))
.map(d => d.name.match(/^(M\d{3})/)?.[1] ?? d.name);
if (milestoneIds.length > 1) {
const issues: string[] = [];
for (const id of milestoneIds) {
const draft = resolveMilestoneFile(base, id, "CONTEXT-DRAFT");
if (draft) issues.push(`${id}: has CONTEXT-DRAFT.md (will pause for discussion)`);
}
if (issues.length > 0) {
ctx.ui.notify(`Pre-flight: ${milestoneIds.length} milestones queued.\n${issues.map(i => `${i}`).join("\n")}`, "warning");
} else {
ctx.ui.notify(`Pre-flight: ${milestoneIds.length} milestones queued. All have full context.`, "info");
}
}
}
} catch { /* non-fatal */ }
return true;
}

View file

@ -0,0 +1,220 @@
/**
* Stuck detection and loop recovery for auto-mode unit dispatch.
*
* Tracks dispatch counts per unit, enforces lifetime caps, and attempts
* stub/artifact recovery before stopping.
*
* Extracted from dispatchNextUnit() in auto.ts. Returns action values
* instead of calling stopAuto/dispatchNextUnit the caller handles
* control flow.
*/
import type { ExtensionContext } from "@gsd/pi-coding-agent";
import {
inspectExecuteTaskDurability,
} from "./unit-runtime.js";
import {
verifyExpectedArtifact,
diagnoseExpectedArtifact,
skipExecuteTask,
persistCompletedKey,
buildLoopRemediationSteps,
} from "./auto-recovery.js";
import { closeoutUnit, type CloseoutOptions } from "./auto-unit-closeout.js";
import { saveActivityLog } from "./activity-log.js";
import { invalidateAllCaches } from "./cache.js";
import { sendDesktopNotification } from "./notifications.js";
import { debugLog } from "./debug-logger.js";
import {
resolveMilestonePath,
resolveSlicePath,
resolveTasksDir,
buildTaskFileName,
} from "./paths.js";
import {
MAX_UNIT_DISPATCHES,
STUB_RECOVERY_THRESHOLD,
MAX_LIFETIME_DISPATCHES,
} from "./auto/session.js";
import type { AutoSession } from "./auto/session.js";
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
import { join } from "node:path";
export interface StuckContext {
s: AutoSession;
ctx: ExtensionContext;
unitType: string;
unitId: string;
basePath: string;
buildSnapshotOpts: () => CloseoutOptions & Record<string, unknown>;
}
export type StuckResult =
| { action: "proceed" }
| { action: "recovered"; dispatchAgain: true }
| { action: "stop"; reason: string; notifyMessage?: string };
/**
* Check dispatch counts, enforce lifetime cap and MAX_UNIT_DISPATCHES,
* attempt stub/artifact recovery. Returns an action for the caller.
*/
export async function checkStuckAndRecover(sctx: StuckContext): Promise<StuckResult> {
const { s, ctx, unitType, unitId, basePath, buildSnapshotOpts } = sctx;
const dispatchKey = `${unitType}/${unitId}`;
const prevCount = s.unitDispatchCount.get(dispatchKey) ?? 0;
// Real dispatch reached — clear the consecutive-skip counter for this unit.
s.unitConsecutiveSkips.delete(dispatchKey);
debugLog("dispatch-unit", {
type: unitType,
id: unitId,
cycle: prevCount + 1,
lifetime: (s.unitLifetimeDispatches.get(dispatchKey) ?? 0) + 1,
});
// Hard lifetime cap — survives counter resets from loop-recovery/self-repair.
const lifetimeCount = (s.unitLifetimeDispatches.get(dispatchKey) ?? 0) + 1;
s.unitLifetimeDispatches.set(dispatchKey, lifetimeCount);
if (lifetimeCount > MAX_LIFETIME_DISPATCHES) {
if (s.currentUnit) {
await closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt, buildSnapshotOpts());
} else {
saveActivityLog(ctx, s.basePath, unitType, unitId);
}
const expected = diagnoseExpectedArtifact(unitType, unitId, basePath);
return {
action: "stop",
reason: `Hard loop: ${unitType} ${unitId}`,
notifyMessage: `Hard loop detected: ${unitType} ${unitId} dispatched ${lifetimeCount} times total (across reconciliation cycles).${expected ? `\n Expected artifact: ${expected}` : ""}\n This may indicate deriveState() keeps returning the same unit despite artifacts existing.\n Check .gsd/completed-units.json and the slice plan checkbox state.`,
};
}
if (prevCount >= MAX_UNIT_DISPATCHES) {
if (s.currentUnit) {
await closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt, buildSnapshotOpts());
} else {
saveActivityLog(ctx, s.basePath, unitType, unitId);
}
// Final reconciliation pass for execute-task
if (unitType === "execute-task") {
const [mid, sid, tid] = unitId.split("/");
if (mid && sid && tid) {
const status = await inspectExecuteTaskDurability(basePath, unitId);
if (status) {
const reconciled = skipExecuteTask(basePath, mid, sid, tid, status, "loop-recovery", prevCount);
if (reconciled && verifyExpectedArtifact(unitType, unitId, basePath)) {
ctx.ui.notify(
`Loop recovery: ${unitId} reconciled after ${prevCount + 1} dispatches — blocker artifacts written, pipeline advancing.\n Review ${status.summaryPath} and replace the placeholder with real work.`,
"warning",
);
const reconciledKey = `${unitType}/${unitId}`;
persistCompletedKey(basePath, reconciledKey);
s.completedKeySet.add(reconciledKey);
s.unitDispatchCount.delete(dispatchKey);
invalidateAllCaches();
return { action: "recovered", dispatchAgain: true };
}
}
}
}
// General reconciliation: artifact appeared on last attempt
if (verifyExpectedArtifact(unitType, unitId, basePath)) {
ctx.ui.notify(
`Loop recovery: ${unitType} ${unitId} — artifact verified after ${prevCount + 1} dispatches. Advancing.`,
"info",
);
persistCompletedKey(basePath, dispatchKey);
s.completedKeySet.add(dispatchKey);
s.unitDispatchCount.delete(dispatchKey);
invalidateAllCaches();
return { action: "recovered", dispatchAgain: true };
}
// Last resort for complete-milestone: generate stub summary
if (unitType === "complete-milestone") {
try {
const mPath = resolveMilestonePath(basePath, unitId);
if (mPath) {
const stubPath = join(mPath, `${unitId}-SUMMARY.md`);
if (!existsSync(stubPath)) {
writeFileSync(stubPath, `# ${unitId} Summary\n\nAuto-generated stub — milestone tasks completed but summary generation failed after ${prevCount + 1} attempts.\nReview and replace this stub with a proper summary.\n`);
ctx.ui.notify(`Generated stub summary for ${unitId} to unblock pipeline. Review later.`, "warning");
persistCompletedKey(basePath, dispatchKey);
s.completedKeySet.add(dispatchKey);
s.unitDispatchCount.delete(dispatchKey);
invalidateAllCaches();
return { action: "recovered", dispatchAgain: true };
}
}
} catch { /* non-fatal — fall through to normal stop */ }
}
const expected = diagnoseExpectedArtifact(unitType, unitId, basePath);
const remediation = buildLoopRemediationSteps(unitType, unitId, basePath);
sendDesktopNotification("GSD", `Loop detected: ${unitType} ${unitId}`, "error", "error");
return {
action: "stop",
reason: `Loop: ${unitType} ${unitId}`,
notifyMessage: `Loop detected: ${unitType} ${unitId} dispatched ${prevCount + 1} times total. Expected artifact not found.${expected ? `\n Expected: ${expected}` : ""}${remediation ? `\n\n Remediation steps:\n${remediation}` : "\n Check branch state and .gsd/ artifacts."}`,
};
}
s.unitDispatchCount.set(dispatchKey, prevCount + 1);
if (prevCount > 0) {
// Adaptive self-repair: each retry attempts a different remediation step.
if (unitType === "execute-task") {
const status = await inspectExecuteTaskDurability(basePath, unitId);
const [mid, sid, tid] = unitId.split("/");
if (status && mid && sid && tid) {
if (status.summaryExists && !status.taskChecked) {
const repaired = skipExecuteTask(basePath, mid, sid, tid, status, "self-repair", 0);
if (repaired && verifyExpectedArtifact(unitType, unitId, basePath)) {
ctx.ui.notify(
`Self-repaired ${unitId}: summary existed but checkbox was unmarked. Marked [x] and advancing.`,
"warning",
);
const repairedKey = `${unitType}/${unitId}`;
persistCompletedKey(basePath, repairedKey);
s.completedKeySet.add(repairedKey);
s.unitDispatchCount.delete(dispatchKey);
invalidateAllCaches();
return { action: "recovered", dispatchAgain: true };
}
} else if (prevCount >= STUB_RECOVERY_THRESHOLD && !status.summaryExists) {
const tasksDir = resolveTasksDir(basePath, mid, sid);
const sDir = resolveSlicePath(basePath, mid, sid);
const targetDir = tasksDir ?? (sDir ? join(sDir, "tasks") : null);
if (targetDir) {
if (!existsSync(targetDir)) mkdirSync(targetDir, { recursive: true });
const summaryPath = join(targetDir, buildTaskFileName(tid, "SUMMARY"));
if (!existsSync(summaryPath)) {
const stubContent = [
`# PARTIAL RECOVERY — attempt ${prevCount + 1} of ${MAX_UNIT_DISPATCHES}`,
``,
`Task \`${tid}\` in slice \`${sid}\` (milestone \`${mid}\`) has not yet produced a real summary.`,
`This placeholder was written by auto-mode after ${prevCount} dispatch attempts.`,
``,
`The next agent session will retry this task. Replace this file with real work when done.`,
].join("\n");
writeFileSync(summaryPath, stubContent, "utf-8");
ctx.ui.notify(
`Stub recovery (attempt ${prevCount + 1}/${MAX_UNIT_DISPATCHES}): ${unitId} stub summary placeholder written. Retrying with recovery context.`,
"warning",
);
}
}
}
}
}
ctx.ui.notify(
`${unitType} ${unitId} didn't produce expected artifact. Retrying (${prevCount + 1}/${MAX_UNIT_DISPATCHES}).`,
"warning",
);
}
return { action: "proceed" };
}

View file

@ -0,0 +1,223 @@
/**
* Unit supervision timers soft timeout warning, idle watchdog,
* hard timeout, and context-pressure monitor.
*
* Extracted from dispatchNextUnit() in auto.ts. All timers are set up
* via startUnitSupervision() and torn down by the caller via clearUnitTimeout().
*/
import type { ExtensionAPI, ExtensionContext } from "@gsd/pi-coding-agent";
import { readUnitRuntimeRecord, writeUnitRuntimeRecord } from "./unit-runtime.js";
import { resolveAutoSupervisorConfig } from "./preferences.js";
import type { GSDPreferences } from "./preferences.js";
import { computeBudgets, resolveExecutorContextWindow } from "./context-budget.js";
import {
getInFlightToolCount,
getOldestInFlightToolStart,
} from "./auto-tool-tracking.js";
import { detectWorkingTreeActivity } from "./auto-supervisor.js";
import { closeoutUnit, type CloseoutOptions } from "./auto-unit-closeout.js";
import { saveActivityLog } from "./activity-log.js";
import { recoverTimedOutUnit, type RecoveryContext } from "./auto-timeout-recovery.js";
import type { AutoSession } from "./auto/session.js";
export interface SupervisionContext {
s: AutoSession;
ctx: ExtensionContext;
pi: ExtensionAPI;
unitType: string;
unitId: string;
prefs: GSDPreferences | undefined;
buildSnapshotOpts: () => CloseoutOptions & Record<string, unknown>;
buildRecoveryContext: () => RecoveryContext;
pauseAuto: (ctx?: ExtensionContext, pi?: ExtensionAPI) => Promise<void>;
}
/**
* Set up all four supervision timers for the current unit:
* 1. Soft timeout warning (wrapup)
* 2. Idle watchdog (progress polling, stuck tool detection)
* 3. Hard timeout (pause + recovery)
* 4. Context-pressure monitor (continue-here)
*/
export function startUnitSupervision(sctx: SupervisionContext): void {
const { s, ctx, pi, unitType, unitId, prefs, buildSnapshotOpts, buildRecoveryContext, pauseAuto } = sctx;
const supervisor = resolveAutoSupervisorConfig();
const softTimeoutMs = (supervisor.soft_timeout_minutes ?? 0) * 60 * 1000;
const idleTimeoutMs = (supervisor.idle_timeout_minutes ?? 0) * 60 * 1000;
const hardTimeoutMs = (supervisor.hard_timeout_minutes ?? 0) * 60 * 1000;
// ── 1. Soft timeout warning ──
s.wrapupWarningHandle = setTimeout(() => {
s.wrapupWarningHandle = null;
if (!s.active || !s.currentUnit) return;
writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, {
phase: "wrapup-warning-sent",
wrapupWarningSent: true,
});
pi.sendMessage(
{
customType: "gsd-auto-wrapup",
display: s.verbose,
content: [
"**TIME BUDGET WARNING — keep going only if progress is real.**",
"This unit crossed the soft time budget.",
"If you are making progress, continue. If not, switch to wrap-up mode now:",
"1. rerun the minimal required verification",
"2. write or update the required durable artifacts",
"3. mark task or slice state on disk correctly",
"4. leave precise resume notes if anything remains unfinished",
].join("\n"),
},
{ triggerTurn: true },
);
}, softTimeoutMs);
// ── 2. Idle watchdog ──
s.idleWatchdogHandle = setInterval(async () => {
try {
if (!s.active || !s.currentUnit) return;
const runtime = readUnitRuntimeRecord(s.basePath, unitType, unitId);
if (!runtime) return;
if (Date.now() - runtime.lastProgressAt < idleTimeoutMs) return;
// Agent has tool calls currently executing — not idle, just waiting.
// But only suppress recovery if the tool started recently.
if (getInFlightToolCount() > 0) {
const oldestStart = getOldestInFlightToolStart()!;
const toolAgeMs = Date.now() - oldestStart;
if (toolAgeMs < idleTimeoutMs) {
writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, {
lastProgressAt: Date.now(),
lastProgressKind: "tool-in-flight",
});
return;
}
ctx.ui.notify(
`Stalled tool detected: a tool has been in-flight for ${Math.round(toolAgeMs / 60000)}min. Treating as hung — attempting idle recovery.`,
"warning",
);
}
// Check if the agent is producing work on disk.
if (detectWorkingTreeActivity(s.basePath)) {
writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, {
lastProgressAt: Date.now(),
lastProgressKind: "filesystem-activity",
});
return;
}
if (s.currentUnit) {
await closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt, buildSnapshotOpts());
} else {
saveActivityLog(ctx, s.basePath, unitType, unitId);
}
const recovery = await recoverTimedOutUnit(ctx, pi, unitType, unitId, "idle", buildRecoveryContext());
if (recovery === "recovered") return;
writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, {
phase: "paused",
});
ctx.ui.notify(
`Unit ${unitType} ${unitId} made no meaningful progress for ${supervisor.idle_timeout_minutes}min. Pausing auto-mode.`,
"warning",
);
await pauseAuto(ctx, pi);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
console.error(`[idle-watchdog] Unhandled error: ${message}`);
try {
ctx.ui.notify(`Idle watchdog error: ${message}`, "warning");
} catch { /* best effort */ }
}
}, 15000);
// ── 3. Hard timeout ──
s.unitTimeoutHandle = setTimeout(async () => {
try {
s.unitTimeoutHandle = null;
if (!s.active) return;
if (s.currentUnit) {
writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, {
phase: "timeout",
timeoutAt: Date.now(),
});
await closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt, buildSnapshotOpts());
} else {
saveActivityLog(ctx, s.basePath, unitType, unitId);
}
const recovery = await recoverTimedOutUnit(ctx, pi, unitType, unitId, "hard", buildRecoveryContext());
if (recovery === "recovered") return;
ctx.ui.notify(
`Unit ${unitType} ${unitId} exceeded ${supervisor.hard_timeout_minutes}min hard timeout. Pausing auto-mode.`,
"warning",
);
await pauseAuto(ctx, pi);
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
console.error(`[hard-timeout] Unhandled error: ${message}`);
try {
ctx.ui.notify(`Hard timeout error: ${message}`, "warning");
} catch { /* best effort */ }
}
}, hardTimeoutMs);
// ── 4. Context-pressure continue-here monitor ──
if (s.continueHereHandle) {
clearInterval(s.continueHereHandle);
s.continueHereHandle = null;
}
const executorContextWindow = resolveExecutorContextWindow(
ctx.modelRegistry as Parameters<typeof resolveExecutorContextWindow>[0],
prefs as Parameters<typeof resolveExecutorContextWindow>[1],
ctx.model?.contextWindow,
);
const continueHereThreshold = computeBudgets(executorContextWindow).continueThresholdPercent;
s.continueHereHandle = setInterval(() => {
if (!s.active || !s.currentUnit || !s.cmdCtx) return;
const runtime = readUnitRuntimeRecord(s.basePath, unitType, unitId);
if (runtime?.continueHereFired) return;
const contextUsage = s.cmdCtx.getContextUsage();
if (!contextUsage || contextUsage.percent == null || contextUsage.percent < continueHereThreshold) return;
writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit!.startedAt, {
continueHereFired: true,
});
if (s.verbose) {
ctx.ui.notify(
`Context at ${contextUsage.percent}% (threshold: ${continueHereThreshold}%) — sending wrap-up signal.`,
"info",
);
}
pi.sendMessage(
{
customType: "gsd-auto-wrapup",
display: s.verbose,
content: [
"**CONTEXT BUDGET WARNING — wrap up this unit now.**",
`Context window is at ${contextUsage.percent}% (threshold: ${continueHereThreshold}%).`,
"The next unit needs a fresh context to work effectively. Wrap up now:",
"1. Finish any in-progress file writes",
"2. Write or update the required durable artifacts (summary, checkboxes)",
"3. Mark task state on disk correctly",
"4. Leave precise resume notes if anything remains unfinished",
"Do NOT start new sub-tasks or investigations.",
].join("\n"),
},
{ triggerTurn: true },
);
if (s.continueHereHandle) {
clearInterval(s.continueHereHandle);
s.continueHereHandle = null;
}
}, 15_000);
}

View file

@ -0,0 +1,195 @@
/**
* Post-unit verification gate for auto-mode.
*
* Runs typecheck/lint/test checks, captures runtime errors, performs
* dependency audits, handles auto-fix retry logic, and writes
* verification evidence JSON.
*
* Extracted from handleAgentEnd() in auto.ts. Returns a sentinel
* value instead of calling return/pauseAuto directly the caller
* checks the result and handles control flow.
*/
import type { ExtensionContext, ExtensionAPI } from "@gsd/pi-coding-agent";
import { loadFile, parsePlan } from "./files.js";
import { resolveSliceFile, resolveSlicePath } from "./paths.js";
import { loadEffectiveGSDPreferences } from "./preferences.js";
import {
runVerificationGate,
formatFailureContext,
captureRuntimeErrors,
runDependencyAudit,
} from "./verification-gate.js";
import { writeVerificationJSON } from "./verification-evidence.js";
import { removePersistedKey } from "./auto-recovery.js";
import type { AutoSession, PendingVerificationRetry } from "./auto/session.js";
import { join } from "node:path";
export interface VerificationContext {
s: AutoSession;
ctx: ExtensionContext;
pi: ExtensionAPI;
}
export type VerificationResult = "continue" | "retry" | "pause";
/**
* Run the verification gate for the current execute-task unit.
* Returns:
* - "continue" gate passed (or no checks configured), proceed normally
* - "retry" gate failed with retries remaining, dispatchNextUnit already called
* - "pause" gate failed with retries exhausted, pauseAuto already called
*/
export async function runPostUnitVerification(
vctx: VerificationContext,
dispatchNextUnit: (ctx: ExtensionContext, pi: ExtensionAPI) => Promise<void>,
startDispatchGapWatchdog: (ctx: ExtensionContext, pi: ExtensionAPI) => void,
pauseAuto: (ctx?: ExtensionContext, pi?: ExtensionAPI) => Promise<void>,
): Promise<VerificationResult> {
const { s, ctx, pi } = vctx;
if (!s.currentUnit || s.currentUnit.type !== "execute-task") {
return "continue";
}
try {
const effectivePrefs = loadEffectiveGSDPreferences();
const prefs = effectivePrefs?.preferences;
// Read task plan verify field
const parts = s.currentUnit.id.split("/");
let taskPlanVerify: string | undefined;
if (parts.length >= 3) {
const [mid, sid, tid] = parts;
const planFile = resolveSliceFile(s.basePath, mid, sid, "PLAN");
if (planFile) {
const planContent = await loadFile(planFile);
if (planContent) {
const slicePlan = parsePlan(planContent);
const taskEntry = slicePlan?.tasks?.find(t => t.id === tid);
taskPlanVerify = taskEntry?.verify;
}
}
}
const result = runVerificationGate({
basePath: s.basePath,
unitId: s.currentUnit.id,
cwd: s.basePath,
preferenceCommands: prefs?.verification_commands,
taskPlanVerify,
});
// Capture runtime errors
const runtimeErrors = await captureRuntimeErrors();
if (runtimeErrors.length > 0) {
result.runtimeErrors = runtimeErrors;
if (runtimeErrors.some(e => e.blocking)) {
result.passed = false;
}
}
// Dependency audit
const auditWarnings = runDependencyAudit(s.basePath);
if (auditWarnings.length > 0) {
result.auditWarnings = auditWarnings;
process.stderr.write(`verification-gate: ${auditWarnings.length} audit warning(s)\n`);
for (const w of auditWarnings) {
process.stderr.write(` [${w.severity}] ${w.name}: ${w.title}\n`);
}
}
// Auto-fix retry preferences
const autoFixEnabled = prefs?.verification_auto_fix !== false;
const maxRetries = typeof prefs?.verification_max_retries === "number" ? prefs.verification_max_retries : 2;
const completionKey = `${s.currentUnit.type}/${s.currentUnit.id}`;
if (result.checks.length > 0) {
const passCount = result.checks.filter(c => c.exitCode === 0).length;
const total = result.checks.length;
if (result.passed) {
ctx.ui.notify(`Verification gate: ${passCount}/${total} checks passed`);
} else {
const failures = result.checks.filter(c => c.exitCode !== 0);
const failNames = failures.map(f => f.command).join(", ");
ctx.ui.notify(`Verification gate: FAILED — ${failNames}`);
process.stderr.write(`verification-gate: ${total - passCount}/${total} checks failed\n`);
for (const f of failures) {
process.stderr.write(` ${f.command} exited ${f.exitCode}\n`);
if (f.stderr) process.stderr.write(` stderr: ${f.stderr.slice(0, 500)}\n`);
}
}
}
// Log blocking runtime errors
if (result.runtimeErrors?.some(e => e.blocking)) {
const blockingErrors = result.runtimeErrors.filter(e => e.blocking);
process.stderr.write(`verification-gate: ${blockingErrors.length} blocking runtime error(s) detected\n`);
for (const err of blockingErrors) {
process.stderr.write(` [${err.source}] ${err.severity}: ${err.message.slice(0, 200)}\n`);
}
}
// Write verification evidence JSON
const attempt = s.verificationRetryCount.get(s.currentUnit.id) ?? 0;
if (parts.length >= 3) {
try {
const [mid, sid, tid] = parts;
const sDir = resolveSlicePath(s.basePath, mid, sid);
if (sDir) {
const tasksDir = join(sDir, "tasks");
if (result.passed) {
writeVerificationJSON(result, tasksDir, tid, s.currentUnit.id);
} else {
const nextAttempt = attempt + 1;
writeVerificationJSON(result, tasksDir, tid, s.currentUnit.id, nextAttempt, maxRetries);
}
}
} catch (evidenceErr) {
process.stderr.write(`verification-evidence: write error — ${(evidenceErr as Error).message}\n`);
}
}
// ── Auto-fix retry logic ──
if (result.passed) {
s.verificationRetryCount.delete(s.currentUnit.id);
s.pendingVerificationRetry = null;
return "continue";
} else if (autoFixEnabled && attempt + 1 <= maxRetries) {
const nextAttempt = attempt + 1;
s.verificationRetryCount.set(s.currentUnit.id, nextAttempt);
s.pendingVerificationRetry = {
unitId: s.currentUnit.id,
failureContext: formatFailureContext(result),
attempt: nextAttempt,
};
ctx.ui.notify(`Verification failed — auto-fix attempt ${nextAttempt}/${maxRetries}`, "warning");
s.completedKeySet.delete(completionKey);
removePersistedKey(s.basePath, completionKey);
// Dispatch retry immediately
try {
await dispatchNextUnit(ctx, pi);
} catch (retryDispatchErr) {
const msg = retryDispatchErr instanceof Error ? retryDispatchErr.message : String(retryDispatchErr);
ctx.ui.notify(`Verification retry dispatch error: ${msg}`, "error");
startDispatchGapWatchdog(ctx, pi);
}
return "retry";
} else {
// Gate failed, retries exhausted
const exhaustedAttempt = attempt + 1;
s.verificationRetryCount.delete(s.currentUnit.id);
s.pendingVerificationRetry = null;
ctx.ui.notify(
`Verification gate FAILED after ${exhaustedAttempt > maxRetries ? exhaustedAttempt - 1 : exhaustedAttempt} retries — pausing for human review`,
"error",
);
await pauseAuto(ctx, pi);
return "pause";
}
} catch (err) {
// Gate errors are non-fatal
process.stderr.write(`verification-gate: error — ${(err as Error).message}\n`);
return "continue";
}
}

File diff suppressed because it is too large Load diff