refactor: decompose auto.ts into 6 focused modules (#1088)
Extract 6 cohesive modules from the 3,476-line auto.ts god file, reducing it to 1,732 lines while preserving all external import paths. New modules: - auto-timers.ts (223 lines): Unit supervision timers — soft timeout, idle watchdog, hard timeout, context-pressure monitor - auto-idempotency.ts (150 lines): Completed-key checks, skip loop detection, phantom loop handling, fallback persistence - auto-stuck-detection.ts (220 lines): Dispatch count tracking, lifetime cap, MAX_UNIT_DISPATCHES loop detection, stub recovery. Uses return values instead of calling stopAuto/dispatchNextUnit. - auto-verification.ts (195 lines): Post-unit typecheck/lint/test gate, runtime error capture, dependency audit, auto-fix retry logic - auto-post-unit.ts (585 lines): Split into postUnitPreVerification and postUnitPostVerification — commit, doctor, state rebuild, worktree sync, DB dual-write, hooks, triage, quick-tasks - auto-start.ts (472 lines): Fresh session bootstrap — git/state init, crash lock detection, debug init, worktree setup, DB lifecycle All extracted functions receive AutoSession + context as parameters. No circular dependencies — new modules import from leaf dependencies only, never from ./auto.js. All public exports from auto.ts are preserved so external import paths continue to work unchanged. Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
05fa939c11
commit
25d5f60836
7 changed files with 1966 additions and 1868 deletions
150
src/resources/extensions/gsd/auto-idempotency.ts
Normal file
150
src/resources/extensions/gsd/auto-idempotency.ts
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
/**
|
||||
* Idempotency checks for auto-mode unit dispatch.
|
||||
*
|
||||
* Handles completed-key membership, artifact cross-validation,
|
||||
* consecutive skip counting, phantom skip loop detection, key eviction,
|
||||
* and fallback persistence.
|
||||
*
|
||||
* Extracted from dispatchNextUnit() in auto.ts. Pure decision logic
|
||||
* with set mutations — does NOT call dispatchNextUnit or stopAuto.
|
||||
*/
|
||||
|
||||
import { invalidateAllCaches } from "./cache.js";
|
||||
import {
|
||||
verifyExpectedArtifact,
|
||||
persistCompletedKey,
|
||||
removePersistedKey,
|
||||
} from "./auto-recovery.js";
|
||||
import { resolveMilestoneFile } from "./paths.js";
|
||||
import { MAX_CONSECUTIVE_SKIPS, MAX_LIFETIME_DISPATCHES } from "./auto/session.js";
|
||||
import type { AutoSession } from "./auto/session.js";
|
||||
|
||||
export interface IdempotencyContext {
|
||||
s: AutoSession;
|
||||
unitType: string;
|
||||
unitId: string;
|
||||
basePath: string;
|
||||
/** Notification callback */
|
||||
notify: (message: string, level: "info" | "warning" | "error") => void;
|
||||
}
|
||||
|
||||
export type IdempotencyResult =
|
||||
| { action: "skip"; reason: string }
|
||||
| { action: "rerun"; reason: string }
|
||||
| { action: "proceed" }
|
||||
| { action: "stop"; reason: string };
|
||||
|
||||
/**
|
||||
* Check whether a unit should be skipped (already completed), rerun
|
||||
* (stale completion record), or dispatched normally.
|
||||
*
|
||||
* Mutates s.completedKeySet, s.unitConsecutiveSkips, s.unitLifetimeDispatches,
|
||||
* and s.recentlyEvictedKeys as needed.
|
||||
*/
|
||||
export function checkIdempotency(ictx: IdempotencyContext): IdempotencyResult {
|
||||
const { s, unitType, unitId, basePath, notify } = ictx;
|
||||
const idempotencyKey = `${unitType}/${unitId}`;
|
||||
|
||||
// ── Primary path: key exists in completed set ──
|
||||
if (s.completedKeySet.has(idempotencyKey)) {
|
||||
const artifactExists = verifyExpectedArtifact(unitType, unitId, basePath);
|
||||
if (artifactExists) {
|
||||
// Guard against infinite skip loops
|
||||
const skipCount = (s.unitConsecutiveSkips.get(idempotencyKey) ?? 0) + 1;
|
||||
s.unitConsecutiveSkips.set(idempotencyKey, skipCount);
|
||||
if (skipCount > MAX_CONSECUTIVE_SKIPS) {
|
||||
// Cross-check: verify the unit's milestone is still active (#790)
|
||||
const skippedMid = unitId.split("/")[0];
|
||||
const skippedMilestoneComplete = skippedMid
|
||||
? !!resolveMilestoneFile(basePath, skippedMid, "SUMMARY")
|
||||
: false;
|
||||
if (skippedMilestoneComplete) {
|
||||
s.unitConsecutiveSkips.delete(idempotencyKey);
|
||||
invalidateAllCaches();
|
||||
notify(
|
||||
`Phantom skip loop cleared: ${unitType} ${unitId} belongs to completed milestone ${skippedMid}. Re-dispatching from fresh state.`,
|
||||
"info",
|
||||
);
|
||||
return { action: "skip", reason: "phantom-loop-cleared" };
|
||||
}
|
||||
s.unitConsecutiveSkips.delete(idempotencyKey);
|
||||
s.completedKeySet.delete(idempotencyKey);
|
||||
s.recentlyEvictedKeys.add(idempotencyKey);
|
||||
removePersistedKey(basePath, idempotencyKey);
|
||||
invalidateAllCaches();
|
||||
notify(
|
||||
`Skip loop detected: ${unitType} ${unitId} skipped ${skipCount} times without advancing. Evicting completion record and forcing reconciliation.`,
|
||||
"warning",
|
||||
);
|
||||
return { action: "skip", reason: "evicted" };
|
||||
}
|
||||
// Count toward lifetime cap
|
||||
const lifeSkip = (s.unitLifetimeDispatches.get(idempotencyKey) ?? 0) + 1;
|
||||
s.unitLifetimeDispatches.set(idempotencyKey, lifeSkip);
|
||||
if (lifeSkip > MAX_LIFETIME_DISPATCHES) {
|
||||
return { action: "stop", reason: `Hard loop: ${unitType} ${unitId} (skip cycle)` };
|
||||
}
|
||||
notify(
|
||||
`Skipping ${unitType} ${unitId} — already completed in a prior session. Advancing.`,
|
||||
"info",
|
||||
);
|
||||
return { action: "skip", reason: "completed" };
|
||||
} else {
|
||||
// Stale completion record — artifact missing. Remove and re-run.
|
||||
s.completedKeySet.delete(idempotencyKey);
|
||||
removePersistedKey(basePath, idempotencyKey);
|
||||
notify(
|
||||
`Re-running ${unitType} ${unitId} — marked complete but expected artifact missing.`,
|
||||
"warning",
|
||||
);
|
||||
return { action: "rerun", reason: "stale-key" };
|
||||
}
|
||||
}
|
||||
|
||||
// ── Fallback: key missing but artifact exists ──
|
||||
if (verifyExpectedArtifact(unitType, unitId, basePath) && !s.recentlyEvictedKeys.has(idempotencyKey)) {
|
||||
persistCompletedKey(basePath, idempotencyKey);
|
||||
s.completedKeySet.add(idempotencyKey);
|
||||
invalidateAllCaches();
|
||||
// Same consecutive-skip guard as the primary path
|
||||
const skipCount2 = (s.unitConsecutiveSkips.get(idempotencyKey) ?? 0) + 1;
|
||||
s.unitConsecutiveSkips.set(idempotencyKey, skipCount2);
|
||||
if (skipCount2 > MAX_CONSECUTIVE_SKIPS) {
|
||||
const skippedMid2 = unitId.split("/")[0];
|
||||
const skippedMilestoneComplete2 = skippedMid2
|
||||
? !!resolveMilestoneFile(basePath, skippedMid2, "SUMMARY")
|
||||
: false;
|
||||
if (skippedMilestoneComplete2) {
|
||||
s.unitConsecutiveSkips.delete(idempotencyKey);
|
||||
invalidateAllCaches();
|
||||
notify(
|
||||
`Phantom skip loop cleared: ${unitType} ${unitId} belongs to completed milestone ${skippedMid2}. Re-dispatching from fresh state.`,
|
||||
"info",
|
||||
);
|
||||
return { action: "skip", reason: "phantom-loop-cleared" };
|
||||
}
|
||||
s.unitConsecutiveSkips.delete(idempotencyKey);
|
||||
s.completedKeySet.delete(idempotencyKey);
|
||||
removePersistedKey(basePath, idempotencyKey);
|
||||
invalidateAllCaches();
|
||||
notify(
|
||||
`Skip loop detected: ${unitType} ${unitId} skipped ${skipCount2} times without advancing. Evicting completion record and forcing reconciliation.`,
|
||||
"warning",
|
||||
);
|
||||
return { action: "skip", reason: "evicted" };
|
||||
}
|
||||
// Count toward lifetime cap
|
||||
const lifeSkip2 = (s.unitLifetimeDispatches.get(idempotencyKey) ?? 0) + 1;
|
||||
s.unitLifetimeDispatches.set(idempotencyKey, lifeSkip2);
|
||||
if (lifeSkip2 > MAX_LIFETIME_DISPATCHES) {
|
||||
return { action: "stop", reason: `Hard loop: ${unitType} ${unitId} (skip cycle)` };
|
||||
}
|
||||
notify(
|
||||
`Skipping ${unitType} ${unitId} — artifact exists but completion key was missing. Repaired and advancing.`,
|
||||
"info",
|
||||
);
|
||||
return { action: "skip", reason: "fallback-persisted" };
|
||||
}
|
||||
|
||||
return { action: "proceed" };
|
||||
}
|
||||
586
src/resources/extensions/gsd/auto-post-unit.ts
Normal file
586
src/resources/extensions/gsd/auto-post-unit.ts
Normal file
|
|
@ -0,0 +1,586 @@
|
|||
/**
|
||||
* Post-unit processing for handleAgentEnd — auto-commit, doctor run,
|
||||
* state rebuild, worktree sync, DB dual-write, hooks, triage, and
|
||||
* quick-task dispatch.
|
||||
*
|
||||
* Split into two functions called sequentially by handleAgentEnd with
|
||||
* the verification gate between them:
|
||||
* 1. postUnitPreVerification() — commit, doctor, state rebuild, worktree sync, artifact verification
|
||||
* 2. postUnitPostVerification() — DB dual-write, hooks, triage, quick-tasks
|
||||
*
|
||||
* Extracted from handleAgentEnd() in auto.ts.
|
||||
*/
|
||||
|
||||
import type { ExtensionContext, ExtensionCommandContext, ExtensionAPI } from "@gsd/pi-coding-agent";
|
||||
import { deriveState } from "./state.js";
|
||||
import { loadFile, parseSummary, resolveAllOverrides } from "./files.js";
|
||||
import { loadPrompt } from "./prompt-loader.js";
|
||||
import {
|
||||
resolveSliceFile,
|
||||
resolveTaskFile,
|
||||
resolveMilestoneFile,
|
||||
gsdRoot,
|
||||
} from "./paths.js";
|
||||
import { invalidateAllCaches } from "./cache.js";
|
||||
import { closeoutUnit, type CloseoutOptions } from "./auto-unit-closeout.js";
|
||||
import {
|
||||
autoCommitCurrentBranch,
|
||||
type TaskCommitContext,
|
||||
} from "./worktree.js";
|
||||
import {
|
||||
verifyExpectedArtifact,
|
||||
persistCompletedKey,
|
||||
removePersistedKey,
|
||||
} from "./auto-recovery.js";
|
||||
import { writeUnitRuntimeRecord, clearUnitRuntimeRecord } from "./unit-runtime.js";
|
||||
import { resolveAutoSupervisorConfig, loadEffectiveGSDPreferences } from "./preferences.js";
|
||||
import { runGSDDoctor, rebuildState, summarizeDoctorIssues } from "./doctor.js";
|
||||
import { recordHealthSnapshot, checkHealEscalation } from "./doctor-proactive.js";
|
||||
import { syncStateToProjectRoot } from "./auto-worktree-sync.js";
|
||||
import { resetRewriteCircuitBreaker } from "./auto-dispatch.js";
|
||||
import { isDbAvailable } from "./gsd-db.js";
|
||||
import { consumeSignal } from "./session-status-io.js";
|
||||
import {
|
||||
checkPostUnitHooks,
|
||||
getActiveHook,
|
||||
resetHookState,
|
||||
isRetryPending,
|
||||
consumeRetryTrigger,
|
||||
persistHookState,
|
||||
} from "./post-unit-hooks.js";
|
||||
import { hasPendingCaptures, loadPendingCaptures, countPendingCaptures } from "./captures.js";
|
||||
import { writeLock } from "./crash-recovery.js";
|
||||
import { debugLog } from "./debug-logger.js";
|
||||
import type { AutoSession } from "./auto/session.js";
|
||||
import type { WidgetStateAccessors, AutoDashboardData } from "./auto-dashboard.js";
|
||||
import {
|
||||
updateProgressWidget as _updateProgressWidget,
|
||||
updateSliceProgressCache,
|
||||
unitVerb,
|
||||
hideFooter,
|
||||
} from "./auto-dashboard.js";
|
||||
import { join } from "node:path";
|
||||
|
||||
/** Throttle STATE.md rebuilds — at most once per 30 seconds */
|
||||
const STATE_REBUILD_MIN_INTERVAL_MS = 30_000;
|
||||
|
||||
export interface PostUnitContext {
|
||||
s: AutoSession;
|
||||
ctx: ExtensionContext;
|
||||
pi: ExtensionAPI;
|
||||
buildSnapshotOpts: (unitType: string, unitId: string) => CloseoutOptions & Record<string, unknown>;
|
||||
lockBase: () => string;
|
||||
stopAuto: (ctx?: ExtensionContext, pi?: ExtensionAPI, reason?: string) => Promise<void>;
|
||||
pauseAuto: (ctx?: ExtensionContext, pi?: ExtensionAPI) => Promise<void>;
|
||||
updateProgressWidget: (ctx: ExtensionContext, unitType: string, unitId: string, state: import("./types.js").GSDState) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pre-verification processing: parallel worker signal check, cache invalidation,
|
||||
* auto-commit, doctor run, state rebuild, worktree sync, artifact verification.
|
||||
*
|
||||
* Returns "dispatched" if a signal caused stop/pause, "continue" to proceed.
|
||||
*/
|
||||
export async function postUnitPreVerification(pctx: PostUnitContext): Promise<"dispatched" | "continue"> {
|
||||
const { s, ctx, pi, buildSnapshotOpts, stopAuto, pauseAuto } = pctx;
|
||||
|
||||
// ── Parallel worker signal check ──
|
||||
const milestoneLock = process.env.GSD_MILESTONE_LOCK;
|
||||
if (milestoneLock) {
|
||||
const signal = consumeSignal(s.basePath, milestoneLock);
|
||||
if (signal) {
|
||||
if (signal.signal === "stop") {
|
||||
await stopAuto(ctx, pi);
|
||||
return "dispatched";
|
||||
}
|
||||
if (signal.signal === "pause") {
|
||||
await pauseAuto(ctx, pi);
|
||||
return "dispatched";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Invalidate all caches
|
||||
invalidateAllCaches();
|
||||
|
||||
// Small delay to let files settle
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
|
||||
// Auto-commit
|
||||
if (s.currentUnit) {
|
||||
try {
|
||||
let taskContext: TaskCommitContext | undefined;
|
||||
|
||||
if (s.currentUnit.type === "execute-task") {
|
||||
const parts = s.currentUnit.id.split("/");
|
||||
const [mid, sid, tid] = parts;
|
||||
if (mid && sid && tid) {
|
||||
const summaryPath = resolveTaskFile(s.basePath, mid, sid, tid, "SUMMARY");
|
||||
if (summaryPath) {
|
||||
try {
|
||||
const summaryContent = await loadFile(summaryPath);
|
||||
if (summaryContent) {
|
||||
const summary = parseSummary(summaryContent);
|
||||
taskContext = {
|
||||
taskId: `${sid}/${tid}`,
|
||||
taskTitle: summary.title?.replace(/^T\d+:\s*/, "") || tid,
|
||||
oneLiner: summary.oneLiner || undefined,
|
||||
keyFiles: summary.frontmatter.key_files?.filter(f => !f.includes("{{")) || undefined,
|
||||
};
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const commitMsg = autoCommitCurrentBranch(s.basePath, s.currentUnit.type, s.currentUnit.id, taskContext);
|
||||
if (commitMsg) {
|
||||
ctx.ui.notify(`Committed: ${commitMsg.split("\n")[0]}`, "info");
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal
|
||||
}
|
||||
|
||||
// Doctor: fix mechanical bookkeeping
|
||||
try {
|
||||
const scopeParts = s.currentUnit.id.split("/").slice(0, 2);
|
||||
const doctorScope = scopeParts.join("/");
|
||||
const sliceTerminalUnits = new Set(["complete-slice", "run-uat"]);
|
||||
const effectiveFixLevel = sliceTerminalUnits.has(s.currentUnit.type) ? "all" as const : "task" as const;
|
||||
const report = await runGSDDoctor(s.basePath, { fix: true, scope: doctorScope, fixLevel: effectiveFixLevel });
|
||||
if (report.fixesApplied.length > 0) {
|
||||
ctx.ui.notify(`Post-hook: applied ${report.fixesApplied.length} fix(es).`, "info");
|
||||
}
|
||||
|
||||
// Proactive health tracking
|
||||
const summary = summarizeDoctorIssues(report.issues);
|
||||
recordHealthSnapshot(summary.errors, summary.warnings, report.fixesApplied.length);
|
||||
|
||||
// Check if we should escalate to LLM-assisted heal
|
||||
if (summary.errors > 0) {
|
||||
const unresolvedErrors = report.issues
|
||||
.filter(i => i.severity === "error" && !i.fixable)
|
||||
.map(i => ({ code: i.code, message: i.message, unitId: i.unitId }));
|
||||
const escalation = checkHealEscalation(summary.errors, unresolvedErrors);
|
||||
if (escalation.shouldEscalate) {
|
||||
ctx.ui.notify(
|
||||
`Doctor heal escalation: ${escalation.reason}. Dispatching LLM-assisted heal.`,
|
||||
"warning",
|
||||
);
|
||||
try {
|
||||
const { formatDoctorIssuesForPrompt, formatDoctorReport } = await import("./doctor.js");
|
||||
const { dispatchDoctorHeal } = await import("./commands.js");
|
||||
const actionable = report.issues.filter(i => i.severity === "error");
|
||||
const reportText = formatDoctorReport(report, { scope: doctorScope, includeWarnings: true });
|
||||
const structuredIssues = formatDoctorIssuesForPrompt(actionable);
|
||||
dispatchDoctorHeal(pi, doctorScope, reportText, structuredIssues);
|
||||
} catch {
|
||||
// Non-fatal
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal
|
||||
}
|
||||
|
||||
// Throttled STATE.md rebuild
|
||||
const now = Date.now();
|
||||
if (now - s.lastStateRebuildAt >= STATE_REBUILD_MIN_INTERVAL_MS) {
|
||||
try {
|
||||
await rebuildState(s.basePath);
|
||||
s.lastStateRebuildAt = now;
|
||||
autoCommitCurrentBranch(s.basePath, "state-rebuild", s.currentUnit.id);
|
||||
} catch {
|
||||
// Non-fatal
|
||||
}
|
||||
}
|
||||
|
||||
// Prune dead bg-shell processes
|
||||
try {
|
||||
const { pruneDeadProcesses } = await import("../bg-shell/process-manager.js");
|
||||
pruneDeadProcesses();
|
||||
} catch {
|
||||
// Non-fatal
|
||||
}
|
||||
|
||||
// Sync worktree state back to project root
|
||||
if (s.originalBasePath && s.originalBasePath !== s.basePath) {
|
||||
try {
|
||||
syncStateToProjectRoot(s.basePath, s.originalBasePath, s.currentMilestoneId);
|
||||
} catch {
|
||||
// Non-fatal
|
||||
}
|
||||
}
|
||||
|
||||
// Rewrite-docs completion
|
||||
if (s.currentUnit.type === "rewrite-docs") {
|
||||
try {
|
||||
await resolveAllOverrides(s.basePath);
|
||||
resetRewriteCircuitBreaker();
|
||||
ctx.ui.notify("Override(s) resolved — rewrite-docs completed.", "info");
|
||||
} catch {
|
||||
// Non-fatal
|
||||
}
|
||||
}
|
||||
|
||||
// Post-triage: execute actionable resolutions
|
||||
if (s.currentUnit.type === "triage-captures") {
|
||||
try {
|
||||
const { executeTriageResolutions } = await import("./triage-resolution.js");
|
||||
const state = await deriveState(s.basePath);
|
||||
const mid = state.activeMilestone?.id;
|
||||
const sid = state.activeSlice?.id;
|
||||
|
||||
if (mid && sid) {
|
||||
const triageResult = executeTriageResolutions(s.basePath, mid, sid);
|
||||
|
||||
if (triageResult.injected > 0) {
|
||||
ctx.ui.notify(
|
||||
`Triage: injected ${triageResult.injected} task${triageResult.injected === 1 ? "" : "s"} into ${sid} plan.`,
|
||||
"info",
|
||||
);
|
||||
}
|
||||
if (triageResult.replanned > 0) {
|
||||
ctx.ui.notify(
|
||||
`Triage: replan trigger written for ${sid} — next dispatch will enter replanning.`,
|
||||
"info",
|
||||
);
|
||||
}
|
||||
if (triageResult.quickTasks.length > 0) {
|
||||
for (const qt of triageResult.quickTasks) {
|
||||
s.pendingQuickTasks.push(qt);
|
||||
}
|
||||
ctx.ui.notify(
|
||||
`Triage: ${triageResult.quickTasks.length} quick-task${triageResult.quickTasks.length === 1 ? "" : "s"} queued for execution.`,
|
||||
"info",
|
||||
);
|
||||
}
|
||||
for (const action of triageResult.actions) {
|
||||
process.stderr.write(`gsd-triage: ${action}\n`);
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
process.stderr.write(`gsd-triage: resolution execution failed: ${(err as Error).message}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
// Artifact verification and completion persistence
|
||||
let triggerArtifactVerified = false;
|
||||
if (!s.currentUnit.type.startsWith("hook/")) {
|
||||
try {
|
||||
triggerArtifactVerified = verifyExpectedArtifact(s.currentUnit.type, s.currentUnit.id, s.basePath);
|
||||
if (triggerArtifactVerified) {
|
||||
const completionKey = `${s.currentUnit.type}/${s.currentUnit.id}`;
|
||||
if (!s.completedKeySet.has(completionKey)) {
|
||||
persistCompletedKey(s.basePath, completionKey);
|
||||
s.completedKeySet.add(completionKey);
|
||||
}
|
||||
invalidateAllCaches();
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal
|
||||
}
|
||||
} else {
|
||||
// Hook unit completed — finalize its runtime record
|
||||
try {
|
||||
writeUnitRuntimeRecord(s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt, {
|
||||
phase: "finalized",
|
||||
progressCount: 1,
|
||||
lastProgressKind: "hook-completed",
|
||||
});
|
||||
clearUnitRuntimeRecord(s.basePath, s.currentUnit.type, s.currentUnit.id);
|
||||
} catch {
|
||||
// Non-fatal
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "continue";
|
||||
}
|
||||
|
||||
/**
|
||||
* Post-verification processing: DB dual-write, post-unit hooks, triage
|
||||
* capture dispatch, quick-task dispatch.
|
||||
*
|
||||
* Returns:
|
||||
* - "dispatched" — a hook/triage/quick-task was dispatched (sendMessage sent)
|
||||
* - "continue" — proceed to normal dispatchNextUnit
|
||||
* - "step-wizard" — step mode, show wizard instead
|
||||
* - "stopped" — stopAuto was called
|
||||
*/
|
||||
export async function postUnitPostVerification(pctx: PostUnitContext): Promise<"dispatched" | "continue" | "step-wizard" | "stopped"> {
|
||||
const { s, ctx, pi, buildSnapshotOpts, lockBase, stopAuto, pauseAuto, updateProgressWidget } = pctx;
|
||||
|
||||
// ── DB dual-write ──
|
||||
if (isDbAvailable()) {
|
||||
try {
|
||||
const { migrateFromMarkdown } = await import("./md-importer.js");
|
||||
migrateFromMarkdown(s.basePath);
|
||||
} catch (err) {
|
||||
process.stderr.write(`gsd-db: re-import failed: ${(err as Error).message}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Post-unit hooks ──
|
||||
if (s.currentUnit && !s.stepMode) {
|
||||
const hookUnit = checkPostUnitHooks(s.currentUnit.type, s.currentUnit.id, s.basePath);
|
||||
if (hookUnit) {
|
||||
const hookStartedAt = Date.now();
|
||||
if (s.currentUnit) {
|
||||
await closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt, buildSnapshotOpts(s.currentUnit.type, s.currentUnit.id));
|
||||
}
|
||||
s.currentUnit = { type: hookUnit.unitType, id: hookUnit.unitId, startedAt: hookStartedAt };
|
||||
writeUnitRuntimeRecord(s.basePath, hookUnit.unitType, hookUnit.unitId, hookStartedAt, {
|
||||
phase: "dispatched",
|
||||
wrapupWarningSent: false,
|
||||
timeoutAt: null,
|
||||
lastProgressAt: hookStartedAt,
|
||||
progressCount: 0,
|
||||
lastProgressKind: "dispatch",
|
||||
});
|
||||
|
||||
const state = await deriveState(s.basePath);
|
||||
updateProgressWidget(ctx, hookUnit.unitType, hookUnit.unitId, state);
|
||||
const hookState = getActiveHook();
|
||||
ctx.ui.notify(
|
||||
`Running post-unit hook: ${hookUnit.hookName} (cycle ${hookState?.cycle ?? 1})`,
|
||||
"info",
|
||||
);
|
||||
|
||||
// Switch model if the hook specifies one
|
||||
if (hookUnit.model) {
|
||||
const availableModels = ctx.modelRegistry.getAvailable();
|
||||
const match = availableModels.find(m =>
|
||||
m.id === hookUnit.model || `${m.provider}/${m.id}` === hookUnit.model,
|
||||
);
|
||||
if (match) {
|
||||
try {
|
||||
await pi.setModel(match);
|
||||
} catch { /* non-fatal */ }
|
||||
}
|
||||
}
|
||||
|
||||
const result = await s.cmdCtx!.newSession();
|
||||
if (result.cancelled) {
|
||||
resetHookState();
|
||||
await stopAuto(ctx, pi, "Hook session cancelled");
|
||||
return "stopped";
|
||||
}
|
||||
const sessionFile = ctx.sessionManager.getSessionFile();
|
||||
writeLock(lockBase(), hookUnit.unitType, hookUnit.unitId, s.completedUnits.length, sessionFile);
|
||||
persistHookState(s.basePath);
|
||||
|
||||
// Start supervision timers for hook units
|
||||
const supervisor = resolveAutoSupervisorConfig();
|
||||
const hookHardTimeoutMs = (supervisor.hard_timeout_minutes ?? 30) * 60 * 1000;
|
||||
s.unitTimeoutHandle = setTimeout(async () => {
|
||||
s.unitTimeoutHandle = null;
|
||||
if (!s.active) return;
|
||||
if (s.currentUnit) {
|
||||
writeUnitRuntimeRecord(s.basePath, hookUnit.unitType, hookUnit.unitId, s.currentUnit.startedAt, {
|
||||
phase: "timeout",
|
||||
timeoutAt: Date.now(),
|
||||
});
|
||||
}
|
||||
ctx.ui.notify(
|
||||
`Hook ${hookUnit.hookName} exceeded ${supervisor.hard_timeout_minutes ?? 30}min timeout. Pausing auto-mode.`,
|
||||
"warning",
|
||||
);
|
||||
resetHookState();
|
||||
await pauseAuto(ctx, pi);
|
||||
}, hookHardTimeoutMs);
|
||||
|
||||
if (!s.active) return "stopped";
|
||||
pi.sendMessage(
|
||||
{ customType: "gsd-auto", content: hookUnit.prompt, display: s.verbose },
|
||||
{ triggerTurn: true },
|
||||
);
|
||||
return "dispatched";
|
||||
}
|
||||
|
||||
// Check if a hook requested a retry of the trigger unit
|
||||
if (isRetryPending()) {
|
||||
const trigger = consumeRetryTrigger();
|
||||
if (trigger) {
|
||||
const triggerKey = `${trigger.unitType}/${trigger.unitId}`;
|
||||
s.completedKeySet.delete(triggerKey);
|
||||
removePersistedKey(s.basePath, triggerKey);
|
||||
ctx.ui.notify(
|
||||
`Hook requested retry of ${trigger.unitType} ${trigger.unitId}.`,
|
||||
"info",
|
||||
);
|
||||
// Fall through to normal dispatch
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── Triage check ──
|
||||
if (
|
||||
!s.stepMode &&
|
||||
s.currentUnit &&
|
||||
!s.currentUnit.type.startsWith("hook/") &&
|
||||
s.currentUnit.type !== "triage-captures" &&
|
||||
s.currentUnit.type !== "quick-task"
|
||||
) {
|
||||
try {
|
||||
if (hasPendingCaptures(s.basePath)) {
|
||||
const pending = loadPendingCaptures(s.basePath);
|
||||
if (pending.length > 0) {
|
||||
const state = await deriveState(s.basePath);
|
||||
const mid = state.activeMilestone?.id;
|
||||
const sid = state.activeSlice?.id;
|
||||
|
||||
if (mid && sid) {
|
||||
let currentPlan = "";
|
||||
let roadmapContext = "";
|
||||
const planFile = resolveSliceFile(s.basePath, mid, sid, "PLAN");
|
||||
if (planFile) currentPlan = (await loadFile(planFile)) ?? "";
|
||||
const roadmapFile = resolveMilestoneFile(s.basePath, mid, "ROADMAP");
|
||||
if (roadmapFile) roadmapContext = (await loadFile(roadmapFile)) ?? "";
|
||||
|
||||
const capturesList = pending.map(c =>
|
||||
`- **${c.id}**: "${c.text}" (captured: ${c.timestamp})`
|
||||
).join("\n");
|
||||
|
||||
const prompt = loadPrompt("triage-captures", {
|
||||
pendingCaptures: capturesList,
|
||||
currentPlan: currentPlan || "(no active slice plan)",
|
||||
roadmapContext: roadmapContext || "(no active roadmap)",
|
||||
});
|
||||
|
||||
ctx.ui.notify(
|
||||
`Triaging ${pending.length} pending capture${pending.length === 1 ? "" : "s"}...`,
|
||||
"info",
|
||||
);
|
||||
|
||||
if (s.currentUnit) {
|
||||
await closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt);
|
||||
}
|
||||
|
||||
const triageUnitType = "triage-captures";
|
||||
const triageUnitId = `${mid}/${sid}/triage`;
|
||||
const triageStartedAt = Date.now();
|
||||
s.currentUnit = { type: triageUnitType, id: triageUnitId, startedAt: triageStartedAt };
|
||||
writeUnitRuntimeRecord(s.basePath, triageUnitType, triageUnitId, triageStartedAt, {
|
||||
phase: "dispatched",
|
||||
wrapupWarningSent: false,
|
||||
timeoutAt: null,
|
||||
lastProgressAt: triageStartedAt,
|
||||
progressCount: 0,
|
||||
lastProgressKind: "dispatch",
|
||||
});
|
||||
updateProgressWidget(ctx, triageUnitType, triageUnitId, state);
|
||||
|
||||
const result = await s.cmdCtx!.newSession();
|
||||
if (result.cancelled) {
|
||||
await stopAuto(ctx, pi);
|
||||
return "stopped";
|
||||
}
|
||||
const sessionFile = ctx.sessionManager.getSessionFile();
|
||||
writeLock(lockBase(), triageUnitType, triageUnitId, s.completedUnits.length, sessionFile);
|
||||
|
||||
const supervisor = resolveAutoSupervisorConfig();
|
||||
const triageTimeoutMs = (supervisor.hard_timeout_minutes ?? 30) * 60 * 1000;
|
||||
s.unitTimeoutHandle = setTimeout(async () => {
|
||||
s.unitTimeoutHandle = null;
|
||||
if (!s.active) return;
|
||||
ctx.ui.notify(
|
||||
`Triage unit exceeded timeout. Pausing auto-mode.`,
|
||||
"warning",
|
||||
);
|
||||
await pauseAuto(ctx, pi);
|
||||
}, triageTimeoutMs);
|
||||
|
||||
if (!s.active) return "stopped";
|
||||
pi.sendMessage(
|
||||
{ customType: "gsd-auto", content: prompt, display: s.verbose },
|
||||
{ triggerTurn: true },
|
||||
);
|
||||
return "dispatched";
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Triage check failure is non-fatal
|
||||
}
|
||||
}
|
||||
|
||||
// ── Quick-task dispatch ──
|
||||
if (
|
||||
!s.stepMode &&
|
||||
s.pendingQuickTasks.length > 0 &&
|
||||
s.currentUnit &&
|
||||
s.currentUnit.type !== "quick-task"
|
||||
) {
|
||||
try {
|
||||
const capture = s.pendingQuickTasks.shift()!;
|
||||
const { buildQuickTaskPrompt } = await import("./triage-resolution.js");
|
||||
const { markCaptureExecuted } = await import("./captures.js");
|
||||
const prompt = buildQuickTaskPrompt(capture);
|
||||
|
||||
ctx.ui.notify(
|
||||
`Executing quick-task: ${capture.id} — "${capture.text}"`,
|
||||
"info",
|
||||
);
|
||||
|
||||
if (s.currentUnit) {
|
||||
await closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt);
|
||||
}
|
||||
|
||||
const qtUnitType = "quick-task";
|
||||
const qtUnitId = `${s.currentMilestoneId}/${capture.id}`;
|
||||
const qtStartedAt = Date.now();
|
||||
s.currentUnit = { type: qtUnitType, id: qtUnitId, startedAt: qtStartedAt };
|
||||
writeUnitRuntimeRecord(s.basePath, qtUnitType, qtUnitId, qtStartedAt, {
|
||||
phase: "dispatched",
|
||||
wrapupWarningSent: false,
|
||||
timeoutAt: null,
|
||||
lastProgressAt: qtStartedAt,
|
||||
progressCount: 0,
|
||||
lastProgressKind: "dispatch",
|
||||
});
|
||||
const state = await deriveState(s.basePath);
|
||||
updateProgressWidget(ctx, qtUnitType, qtUnitId, state);
|
||||
|
||||
const result = await s.cmdCtx!.newSession();
|
||||
if (result.cancelled) {
|
||||
await stopAuto(ctx, pi);
|
||||
return "stopped";
|
||||
}
|
||||
const sessionFile = ctx.sessionManager.getSessionFile();
|
||||
writeLock(lockBase(), qtUnitType, qtUnitId, s.completedUnits.length, sessionFile);
|
||||
|
||||
markCaptureExecuted(s.basePath, capture.id);
|
||||
|
||||
const supervisor = resolveAutoSupervisorConfig();
|
||||
const qtTimeoutMs = (supervisor.hard_timeout_minutes ?? 30) * 60 * 1000;
|
||||
s.unitTimeoutHandle = setTimeout(async () => {
|
||||
s.unitTimeoutHandle = null;
|
||||
if (!s.active) return;
|
||||
ctx.ui.notify(
|
||||
`Quick-task ${capture.id} exceeded timeout. Pausing auto-mode.`,
|
||||
"warning",
|
||||
);
|
||||
await pauseAuto(ctx, pi);
|
||||
}, qtTimeoutMs);
|
||||
|
||||
if (!s.active) return "stopped";
|
||||
pi.sendMessage(
|
||||
{ customType: "gsd-auto", content: prompt, display: s.verbose },
|
||||
{ triggerTurn: true },
|
||||
);
|
||||
return "dispatched";
|
||||
} catch {
|
||||
// Non-fatal — proceed to normal dispatch
|
||||
}
|
||||
}
|
||||
|
||||
// Step mode → show wizard instead of dispatch
|
||||
if (s.stepMode) {
|
||||
return "step-wizard";
|
||||
}
|
||||
|
||||
return "continue";
|
||||
}
|
||||
472
src/resources/extensions/gsd/auto-start.ts
Normal file
472
src/resources/extensions/gsd/auto-start.ts
Normal file
|
|
@ -0,0 +1,472 @@
|
|||
/**
|
||||
* Auto-mode bootstrap — fresh-start initialization path.
|
||||
*
|
||||
* Git/state bootstrap, crash lock detection, debug init, worktree recovery,
|
||||
* guided flow gate, session init, worktree lifecycle, DB lifecycle,
|
||||
* preflight validation.
|
||||
*
|
||||
* Extracted from startAuto() in auto.ts. The resume path (s.paused)
|
||||
* remains in auto.ts — this module handles only the fresh-start path.
|
||||
*/
|
||||
|
||||
import type {
|
||||
ExtensionAPI,
|
||||
ExtensionCommandContext,
|
||||
} from "@gsd/pi-coding-agent";
|
||||
import { deriveState } from "./state.js";
|
||||
import { loadFile, getManifestStatus } from "./files.js";
|
||||
import { loadEffectiveGSDPreferences, resolveSkillDiscoveryMode, getIsolationMode } from "./preferences.js";
|
||||
import { collectSecretsFromManifest } from "../get-secrets-from-user.js";
|
||||
import {
|
||||
gsdRoot,
|
||||
resolveMilestoneFile,
|
||||
milestonesDir,
|
||||
} from "./paths.js";
|
||||
import { invalidateAllCaches } from "./cache.js";
|
||||
import { synthesizeCrashRecovery } from "./session-forensics.js";
|
||||
import { writeLock, clearLock, readCrashLock, formatCrashInfo, isLockProcessAlive } from "./crash-recovery.js";
|
||||
import { selfHealRuntimeRecords } from "./auto-recovery.js";
|
||||
import { ensureGitignore, untrackRuntimeFiles } from "./gitignore.js";
|
||||
import { nativeIsRepo, nativeInit, nativeAddAll, nativeCommit } from "./native-git-bridge.js";
|
||||
import { GitServiceImpl } from "./git-service.js";
|
||||
import {
|
||||
captureIntegrationBranch,
|
||||
detectWorktreeName,
|
||||
setActiveMilestoneId,
|
||||
} from "./worktree.js";
|
||||
import {
|
||||
createAutoWorktree,
|
||||
enterAutoWorktree,
|
||||
getAutoWorktreePath,
|
||||
isInAutoWorktree,
|
||||
} from "./auto-worktree.js";
|
||||
import { readResourceVersion } from "./auto-worktree-sync.js";
|
||||
import { initMetrics, getLedger } from "./metrics.js";
|
||||
import { initRoutingHistory } from "./routing-history.js";
|
||||
import { restoreHookState, resetHookState, clearPersistedHookState } from "./post-unit-hooks.js";
|
||||
import { resetProactiveHealing } from "./doctor-proactive.js";
|
||||
import { snapshotSkills } from "./skill-discovery.js";
|
||||
import { isDbAvailable } from "./gsd-db.js";
|
||||
import { loadPersistedKeys } from "./auto-recovery.js";
|
||||
import { hideFooter } from "./auto-dashboard.js";
|
||||
import { debugLog, enableDebug, isDebugEnabled, getDebugLogPath } from "./debug-logger.js";
|
||||
import type { AutoSession } from "./auto/session.js";
|
||||
import { existsSync, mkdirSync, readdirSync, statSync, unlinkSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { sep as pathSep } from "node:path";
|
||||
|
||||
export interface BootstrapDeps {
|
||||
shouldUseWorktreeIsolation: () => boolean;
|
||||
registerSigtermHandler: (basePath: string) => void;
|
||||
lockBase: () => string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Bootstrap a fresh auto-mode session. Handles everything from git init
|
||||
* through secrets collection, returning when ready for the first
|
||||
* dispatchNextUnit call.
|
||||
*
|
||||
* Returns false if the bootstrap aborted (e.g., guided flow returned,
|
||||
* concurrent session detected). Returns true when ready to dispatch.
|
||||
*/
|
||||
export async function bootstrapAutoSession(
|
||||
s: AutoSession,
|
||||
ctx: ExtensionCommandContext,
|
||||
pi: ExtensionAPI,
|
||||
base: string,
|
||||
verboseMode: boolean,
|
||||
requestedStepMode: boolean,
|
||||
deps: BootstrapDeps,
|
||||
): Promise<boolean> {
|
||||
const { shouldUseWorktreeIsolation, registerSigtermHandler, lockBase } = deps;
|
||||
|
||||
// Ensure git repo exists
|
||||
if (!nativeIsRepo(base)) {
|
||||
const mainBranch = loadEffectiveGSDPreferences()?.preferences?.git?.main_branch || "main";
|
||||
nativeInit(base, mainBranch);
|
||||
}
|
||||
|
||||
// Ensure .gitignore has baseline patterns
|
||||
const gitPrefs = loadEffectiveGSDPreferences()?.preferences?.git;
|
||||
const commitDocs = gitPrefs?.commit_docs;
|
||||
const manageGitignore = gitPrefs?.manage_gitignore;
|
||||
ensureGitignore(base, { commitDocs, manageGitignore });
|
||||
if (manageGitignore !== false) untrackRuntimeFiles(base);
|
||||
|
||||
// Bootstrap .gsd/ if it doesn't exist
|
||||
const gsdDir = join(base, ".gsd");
|
||||
if (!existsSync(gsdDir)) {
|
||||
mkdirSync(join(gsdDir, "milestones"), { recursive: true });
|
||||
if (commitDocs !== false) {
|
||||
try {
|
||||
nativeAddAll(base);
|
||||
nativeCommit(base, "chore: init gsd");
|
||||
} catch { /* nothing to commit */ }
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize GitServiceImpl
|
||||
s.gitService = new GitServiceImpl(s.basePath, loadEffectiveGSDPreferences()?.preferences?.git ?? {});
|
||||
|
||||
// Check for crash from previous session
|
||||
const crashLock = readCrashLock(base);
|
||||
if (crashLock) {
|
||||
if (isLockProcessAlive(crashLock)) {
|
||||
ctx.ui.notify(
|
||||
`Another auto-mode session (PID ${crashLock.pid}) appears to be running.\nStop it with \`kill ${crashLock.pid}\` before starting a new session.`,
|
||||
"error",
|
||||
);
|
||||
return false;
|
||||
}
|
||||
const recoveredMid = crashLock.unitId.split("/")[0];
|
||||
const milestoneAlreadyComplete = recoveredMid
|
||||
? !!resolveMilestoneFile(base, recoveredMid, "SUMMARY")
|
||||
: false;
|
||||
|
||||
if (milestoneAlreadyComplete) {
|
||||
ctx.ui.notify(
|
||||
`Crash recovery: discarding stale context for ${crashLock.unitId} — milestone ${recoveredMid} is already complete.`,
|
||||
"info",
|
||||
);
|
||||
} else {
|
||||
const activityDir = join(gsdRoot(base), "activity");
|
||||
const recovery = synthesizeCrashRecovery(
|
||||
base, crashLock.unitType, crashLock.unitId,
|
||||
crashLock.sessionFile, activityDir,
|
||||
);
|
||||
if (recovery && recovery.trace.toolCallCount > 0) {
|
||||
s.pendingCrashRecovery = recovery.prompt;
|
||||
ctx.ui.notify(
|
||||
`${formatCrashInfo(crashLock)}\nRecovered ${recovery.trace.toolCallCount} tool calls from crashed session. Resuming with full context.`,
|
||||
"warning",
|
||||
);
|
||||
} else {
|
||||
ctx.ui.notify(
|
||||
`${formatCrashInfo(crashLock)}\nNo session data recovered. Resuming from disk state.`,
|
||||
"warning",
|
||||
);
|
||||
}
|
||||
}
|
||||
clearLock(base);
|
||||
}
|
||||
|
||||
// ── Debug mode ──
|
||||
if (!isDebugEnabled() && process.env.GSD_DEBUG === "1") {
|
||||
enableDebug(base);
|
||||
}
|
||||
if (isDebugEnabled()) {
|
||||
const { isNativeParserAvailable } = await import("./native-parser-bridge.js");
|
||||
debugLog("debug-start", {
|
||||
platform: process.platform,
|
||||
arch: process.arch,
|
||||
node: process.version,
|
||||
model: ctx.model?.id ?? "unknown",
|
||||
provider: ctx.model?.provider ?? "unknown",
|
||||
nativeParser: isNativeParserAvailable(),
|
||||
cwd: base,
|
||||
});
|
||||
ctx.ui.notify(`Debug logging enabled → ${getDebugLogPath()}`, "info");
|
||||
}
|
||||
|
||||
// Invalidate caches before initial state derivation
|
||||
invalidateAllCaches();
|
||||
|
||||
// Clean stale runtime unit files for completed milestones (#887)
|
||||
try {
|
||||
const runtimeUnitsDir = join(gsdRoot(base), "runtime", "units");
|
||||
if (existsSync(runtimeUnitsDir)) {
|
||||
for (const file of readdirSync(runtimeUnitsDir)) {
|
||||
if (!file.endsWith(".json")) continue;
|
||||
const midMatch = file.match(/(M\d+(?:-[a-z0-9]{6})?)/);
|
||||
if (!midMatch) continue;
|
||||
const mid = midMatch[1];
|
||||
if (resolveMilestoneFile(base, mid, "SUMMARY")) {
|
||||
try { unlinkSync(join(runtimeUnitsDir, file)); } catch (e) { debugLog("stale-unit-cleanup-failed", { file, error: e instanceof Error ? e.message : String(e) }); }
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) { debugLog("stale-unit-dir-cleanup-failed", { error: e instanceof Error ? e.message : String(e) }); }
|
||||
|
||||
let state = await deriveState(base);
|
||||
|
||||
// Stale worktree state recovery (#654)
|
||||
if (
|
||||
state.activeMilestone &&
|
||||
shouldUseWorktreeIsolation() &&
|
||||
!detectWorktreeName(base)
|
||||
) {
|
||||
const wtPath = getAutoWorktreePath(base, state.activeMilestone.id);
|
||||
if (wtPath) {
|
||||
state = await deriveState(wtPath);
|
||||
}
|
||||
}
|
||||
|
||||
// Milestone branch recovery (#601)
|
||||
let hasSurvivorBranch = false;
|
||||
if (
|
||||
state.activeMilestone &&
|
||||
(state.phase === "pre-planning" || state.phase === "needs-discussion") &&
|
||||
shouldUseWorktreeIsolation() &&
|
||||
!detectWorktreeName(base) &&
|
||||
!base.includes(`${pathSep}.gsd${pathSep}worktrees${pathSep}`)
|
||||
) {
|
||||
const milestoneBranch = `milestone/${state.activeMilestone.id}`;
|
||||
const { nativeBranchExists } = await import("./native-git-bridge.js");
|
||||
hasSurvivorBranch = nativeBranchExists(base, milestoneBranch);
|
||||
if (hasSurvivorBranch) {
|
||||
ctx.ui.notify(
|
||||
`Found prior session branch ${milestoneBranch}. Resuming.`,
|
||||
"info",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasSurvivorBranch) {
|
||||
// No active work — start a new milestone via discuss flow
|
||||
if (!state.activeMilestone || state.phase === "complete") {
|
||||
const { showSmartEntry } = await import("./guided-flow.js");
|
||||
await showSmartEntry(ctx, pi, base, { step: requestedStepMode });
|
||||
|
||||
invalidateAllCaches();
|
||||
const postState = await deriveState(base);
|
||||
if (postState.activeMilestone && postState.phase !== "complete" && postState.phase !== "pre-planning") {
|
||||
state = postState;
|
||||
} else if (postState.activeMilestone && postState.phase === "pre-planning") {
|
||||
const contextFile = resolveMilestoneFile(base, postState.activeMilestone.id, "CONTEXT");
|
||||
const hasContext = !!(contextFile && await loadFile(contextFile));
|
||||
if (hasContext) {
|
||||
state = postState;
|
||||
} else {
|
||||
ctx.ui.notify(
|
||||
"Discussion completed but no milestone context was written. Run /gsd to try the discussion again, or /gsd auto after creating the milestone manually.",
|
||||
"warning",
|
||||
);
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Active milestone exists but has no roadmap
|
||||
if (state.phase === "pre-planning") {
|
||||
const mid = state.activeMilestone!.id;
|
||||
const contextFile = resolveMilestoneFile(base, mid, "CONTEXT");
|
||||
const hasContext = !!(contextFile && await loadFile(contextFile));
|
||||
if (!hasContext) {
|
||||
const { showSmartEntry } = await import("./guided-flow.js");
|
||||
await showSmartEntry(ctx, pi, base, { step: requestedStepMode });
|
||||
|
||||
invalidateAllCaches();
|
||||
const postState = await deriveState(base);
|
||||
if (postState.activeMilestone && postState.phase !== "pre-planning") {
|
||||
state = postState;
|
||||
} else {
|
||||
ctx.ui.notify(
|
||||
"Discussion completed but milestone context is still missing. Run /gsd to try again.",
|
||||
"warning",
|
||||
);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Unreachable safety check
|
||||
if (!state.activeMilestone) {
|
||||
const { showSmartEntry } = await import("./guided-flow.js");
|
||||
await showSmartEntry(ctx, pi, base, { step: requestedStepMode });
|
||||
return false;
|
||||
}
|
||||
|
||||
// ── Initialize session state ──
|
||||
s.active = true;
|
||||
s.stepMode = requestedStepMode;
|
||||
s.verbose = verboseMode;
|
||||
s.cmdCtx = ctx;
|
||||
s.basePath = base;
|
||||
s.unitDispatchCount.clear();
|
||||
s.unitRecoveryCount.clear();
|
||||
s.unitConsecutiveSkips.clear();
|
||||
s.lastBudgetAlertLevel = 0;
|
||||
s.unitLifetimeDispatches.clear();
|
||||
s.completedKeySet.clear();
|
||||
loadPersistedKeys(base, s.completedKeySet);
|
||||
resetHookState();
|
||||
restoreHookState(base);
|
||||
resetProactiveHealing();
|
||||
s.autoStartTime = Date.now();
|
||||
s.resourceVersionOnStart = readResourceVersion();
|
||||
s.completedUnits = [];
|
||||
s.pendingQuickTasks = [];
|
||||
s.currentUnit = null;
|
||||
s.currentMilestoneId = state.activeMilestone?.id ?? null;
|
||||
s.originalModelId = ctx.model?.id ?? null;
|
||||
s.originalModelProvider = ctx.model?.provider ?? null;
|
||||
|
||||
// Register SIGTERM handler
|
||||
registerSigtermHandler(base);
|
||||
|
||||
// Capture integration branch
|
||||
if (s.currentMilestoneId) {
|
||||
if (getIsolationMode() !== "none") {
|
||||
captureIntegrationBranch(base, s.currentMilestoneId, { commitDocs });
|
||||
}
|
||||
setActiveMilestoneId(base, s.currentMilestoneId);
|
||||
}
|
||||
|
||||
// ── Auto-worktree setup ──
|
||||
s.originalBasePath = base;
|
||||
|
||||
const isUnderGsdWorktrees = (p: string): boolean => {
|
||||
const marker = `${pathSep}.gsd${pathSep}worktrees${pathSep}`;
|
||||
if (p.includes(marker)) return true;
|
||||
const worktreesSuffix = `${pathSep}.gsd${pathSep}worktrees`;
|
||||
return p.endsWith(worktreesSuffix);
|
||||
};
|
||||
|
||||
if (s.currentMilestoneId && shouldUseWorktreeIsolation() && !detectWorktreeName(base) && !isUnderGsdWorktrees(base)) {
|
||||
try {
|
||||
const existingWtPath = getAutoWorktreePath(base, s.currentMilestoneId);
|
||||
if (existingWtPath) {
|
||||
const wtPath = enterAutoWorktree(base, s.currentMilestoneId);
|
||||
s.basePath = wtPath;
|
||||
s.gitService = new GitServiceImpl(s.basePath, loadEffectiveGSDPreferences()?.preferences?.git ?? {});
|
||||
ctx.ui.notify(`Entered auto-worktree at ${wtPath}`, "info");
|
||||
} else {
|
||||
const wtPath = createAutoWorktree(base, s.currentMilestoneId);
|
||||
s.basePath = wtPath;
|
||||
s.gitService = new GitServiceImpl(s.basePath, loadEffectiveGSDPreferences()?.preferences?.git ?? {});
|
||||
ctx.ui.notify(`Created auto-worktree at ${wtPath}`, "info");
|
||||
}
|
||||
registerSigtermHandler(s.originalBasePath);
|
||||
|
||||
// Load completed keys from BOTH locations
|
||||
if (s.basePath !== s.originalBasePath) {
|
||||
loadPersistedKeys(s.basePath, s.completedKeySet);
|
||||
}
|
||||
} catch (err) {
|
||||
ctx.ui.notify(
|
||||
`Auto-worktree setup failed: ${err instanceof Error ? err.message : String(err)}. Continuing in project root.`,
|
||||
"warning",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ── DB lifecycle ──
|
||||
const gsdDbPath = join(s.basePath, ".gsd", "gsd.db");
|
||||
const gsdDirPath = join(s.basePath, ".gsd");
|
||||
if (existsSync(gsdDirPath) && !existsSync(gsdDbPath)) {
|
||||
const hasDecisions = existsSync(join(gsdDirPath, "DECISIONS.md"));
|
||||
const hasRequirements = existsSync(join(gsdDirPath, "REQUIREMENTS.md"));
|
||||
const hasMilestones = existsSync(join(gsdDirPath, "milestones"));
|
||||
if (hasDecisions || hasRequirements || hasMilestones) {
|
||||
try {
|
||||
const { openDatabase: openDb } = await import("./gsd-db.js");
|
||||
const { migrateFromMarkdown } = await import("./md-importer.js");
|
||||
openDb(gsdDbPath);
|
||||
migrateFromMarkdown(s.basePath);
|
||||
} catch (err) {
|
||||
process.stderr.write(`gsd-migrate: auto-migration failed: ${(err as Error).message}\n`);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (existsSync(gsdDbPath) && !isDbAvailable()) {
|
||||
try {
|
||||
const { openDatabase: openDb } = await import("./gsd-db.js");
|
||||
openDb(gsdDbPath);
|
||||
} catch (err) {
|
||||
process.stderr.write(`gsd-db: failed to open existing database: ${(err as Error).message}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize metrics
|
||||
initMetrics(s.basePath);
|
||||
|
||||
// Initialize routing history
|
||||
initRoutingHistory(s.basePath);
|
||||
|
||||
// Capture session's model at auto-mode start (#650)
|
||||
const currentModel = ctx.model;
|
||||
if (currentModel) {
|
||||
s.autoModeStartModel = { provider: currentModel.provider, id: currentModel.id };
|
||||
}
|
||||
|
||||
// Snapshot installed skills
|
||||
if (resolveSkillDiscoveryMode() !== "off") {
|
||||
snapshotSkills();
|
||||
}
|
||||
|
||||
ctx.ui.setStatus("gsd-auto", s.stepMode ? "next" : "auto");
|
||||
ctx.ui.setFooter(hideFooter);
|
||||
const modeLabel = s.stepMode ? "Step-mode" : "Auto-mode";
|
||||
const pendingCount = state.registry.filter(m => m.status !== 'complete').length;
|
||||
const scopeMsg = pendingCount > 1
|
||||
? `Will loop through ${pendingCount} milestones.`
|
||||
: "Will loop until milestone complete.";
|
||||
ctx.ui.notify(`${modeLabel} started. ${scopeMsg}`, "info");
|
||||
|
||||
// Write initial lock file
|
||||
writeLock(lockBase(), "starting", s.currentMilestoneId ?? "unknown", 0);
|
||||
|
||||
// Secrets collection gate
|
||||
const mid = state.activeMilestone!.id;
|
||||
try {
|
||||
const manifestStatus = await getManifestStatus(base, mid);
|
||||
if (manifestStatus && manifestStatus.pending.length > 0) {
|
||||
const result = await collectSecretsFromManifest(base, mid, ctx);
|
||||
if (result && result.applied && result.skipped && result.existingSkipped) {
|
||||
ctx.ui.notify(
|
||||
`Secrets collected: ${result.applied.length} applied, ${result.skipped.length} skipped, ${result.existingSkipped.length} already set.`,
|
||||
"info",
|
||||
);
|
||||
} else {
|
||||
ctx.ui.notify("Secrets collection skipped.", "info");
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
ctx.ui.notify(
|
||||
`Secrets collection error: ${err instanceof Error ? err.message : String(err)}. Continuing with next task.`,
|
||||
"warning",
|
||||
);
|
||||
}
|
||||
|
||||
// Self-heal: clear stale runtime records
|
||||
await selfHealRuntimeRecords(s.basePath, ctx, s.completedKeySet);
|
||||
|
||||
// Self-heal: remove stale .git/index.lock
|
||||
try {
|
||||
const gitLockFile = join(base, ".git", "index.lock");
|
||||
if (existsSync(gitLockFile)) {
|
||||
const lockAge = Date.now() - statSync(gitLockFile).mtimeMs;
|
||||
if (lockAge > 60_000) {
|
||||
unlinkSync(gitLockFile);
|
||||
ctx.ui.notify("Removed stale .git/index.lock from prior crash.", "info");
|
||||
}
|
||||
}
|
||||
} catch (e) { debugLog("git-lock-cleanup-failed", { error: e instanceof Error ? e.message : String(e) }); }
|
||||
|
||||
// Pre-flight: validate milestone queue
|
||||
try {
|
||||
const msDir = join(base, ".gsd", "milestones");
|
||||
if (existsSync(msDir)) {
|
||||
const milestoneIds = readdirSync(msDir, { withFileTypes: true })
|
||||
.filter(d => d.isDirectory() && /^M\d{3}/.test(d.name))
|
||||
.map(d => d.name.match(/^(M\d{3})/)?.[1] ?? d.name);
|
||||
if (milestoneIds.length > 1) {
|
||||
const issues: string[] = [];
|
||||
for (const id of milestoneIds) {
|
||||
const draft = resolveMilestoneFile(base, id, "CONTEXT-DRAFT");
|
||||
if (draft) issues.push(`${id}: has CONTEXT-DRAFT.md (will pause for discussion)`);
|
||||
}
|
||||
if (issues.length > 0) {
|
||||
ctx.ui.notify(`Pre-flight: ${milestoneIds.length} milestones queued.\n${issues.map(i => ` ⚠ ${i}`).join("\n")}`, "warning");
|
||||
} else {
|
||||
ctx.ui.notify(`Pre-flight: ${milestoneIds.length} milestones queued. All have full context.`, "info");
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch { /* non-fatal */ }
|
||||
|
||||
return true;
|
||||
}
|
||||
220
src/resources/extensions/gsd/auto-stuck-detection.ts
Normal file
220
src/resources/extensions/gsd/auto-stuck-detection.ts
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
/**
|
||||
* Stuck detection and loop recovery for auto-mode unit dispatch.
|
||||
*
|
||||
* Tracks dispatch counts per unit, enforces lifetime caps, and attempts
|
||||
* stub/artifact recovery before stopping.
|
||||
*
|
||||
* Extracted from dispatchNextUnit() in auto.ts. Returns action values
|
||||
* instead of calling stopAuto/dispatchNextUnit — the caller handles
|
||||
* control flow.
|
||||
*/
|
||||
|
||||
import type { ExtensionContext } from "@gsd/pi-coding-agent";
|
||||
import {
|
||||
inspectExecuteTaskDurability,
|
||||
} from "./unit-runtime.js";
|
||||
import {
|
||||
verifyExpectedArtifact,
|
||||
diagnoseExpectedArtifact,
|
||||
skipExecuteTask,
|
||||
persistCompletedKey,
|
||||
buildLoopRemediationSteps,
|
||||
} from "./auto-recovery.js";
|
||||
import { closeoutUnit, type CloseoutOptions } from "./auto-unit-closeout.js";
|
||||
import { saveActivityLog } from "./activity-log.js";
|
||||
import { invalidateAllCaches } from "./cache.js";
|
||||
import { sendDesktopNotification } from "./notifications.js";
|
||||
import { debugLog } from "./debug-logger.js";
|
||||
import {
|
||||
resolveMilestonePath,
|
||||
resolveSlicePath,
|
||||
resolveTasksDir,
|
||||
buildTaskFileName,
|
||||
} from "./paths.js";
|
||||
import {
|
||||
MAX_UNIT_DISPATCHES,
|
||||
STUB_RECOVERY_THRESHOLD,
|
||||
MAX_LIFETIME_DISPATCHES,
|
||||
} from "./auto/session.js";
|
||||
import type { AutoSession } from "./auto/session.js";
|
||||
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
|
||||
export interface StuckContext {
|
||||
s: AutoSession;
|
||||
ctx: ExtensionContext;
|
||||
unitType: string;
|
||||
unitId: string;
|
||||
basePath: string;
|
||||
buildSnapshotOpts: () => CloseoutOptions & Record<string, unknown>;
|
||||
}
|
||||
|
||||
export type StuckResult =
|
||||
| { action: "proceed" }
|
||||
| { action: "recovered"; dispatchAgain: true }
|
||||
| { action: "stop"; reason: string; notifyMessage?: string };
|
||||
|
||||
/**
|
||||
* Check dispatch counts, enforce lifetime cap and MAX_UNIT_DISPATCHES,
|
||||
* attempt stub/artifact recovery. Returns an action for the caller.
|
||||
*/
|
||||
export async function checkStuckAndRecover(sctx: StuckContext): Promise<StuckResult> {
|
||||
const { s, ctx, unitType, unitId, basePath, buildSnapshotOpts } = sctx;
|
||||
const dispatchKey = `${unitType}/${unitId}`;
|
||||
const prevCount = s.unitDispatchCount.get(dispatchKey) ?? 0;
|
||||
|
||||
// Real dispatch reached — clear the consecutive-skip counter for this unit.
|
||||
s.unitConsecutiveSkips.delete(dispatchKey);
|
||||
|
||||
debugLog("dispatch-unit", {
|
||||
type: unitType,
|
||||
id: unitId,
|
||||
cycle: prevCount + 1,
|
||||
lifetime: (s.unitLifetimeDispatches.get(dispatchKey) ?? 0) + 1,
|
||||
});
|
||||
|
||||
// Hard lifetime cap — survives counter resets from loop-recovery/self-repair.
|
||||
const lifetimeCount = (s.unitLifetimeDispatches.get(dispatchKey) ?? 0) + 1;
|
||||
s.unitLifetimeDispatches.set(dispatchKey, lifetimeCount);
|
||||
if (lifetimeCount > MAX_LIFETIME_DISPATCHES) {
|
||||
if (s.currentUnit) {
|
||||
await closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt, buildSnapshotOpts());
|
||||
} else {
|
||||
saveActivityLog(ctx, s.basePath, unitType, unitId);
|
||||
}
|
||||
const expected = diagnoseExpectedArtifact(unitType, unitId, basePath);
|
||||
return {
|
||||
action: "stop",
|
||||
reason: `Hard loop: ${unitType} ${unitId}`,
|
||||
notifyMessage: `Hard loop detected: ${unitType} ${unitId} dispatched ${lifetimeCount} times total (across reconciliation cycles).${expected ? `\n Expected artifact: ${expected}` : ""}\n This may indicate deriveState() keeps returning the same unit despite artifacts existing.\n Check .gsd/completed-units.json and the slice plan checkbox state.`,
|
||||
};
|
||||
}
|
||||
|
||||
if (prevCount >= MAX_UNIT_DISPATCHES) {
|
||||
if (s.currentUnit) {
|
||||
await closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt, buildSnapshotOpts());
|
||||
} else {
|
||||
saveActivityLog(ctx, s.basePath, unitType, unitId);
|
||||
}
|
||||
|
||||
// Final reconciliation pass for execute-task
|
||||
if (unitType === "execute-task") {
|
||||
const [mid, sid, tid] = unitId.split("/");
|
||||
if (mid && sid && tid) {
|
||||
const status = await inspectExecuteTaskDurability(basePath, unitId);
|
||||
if (status) {
|
||||
const reconciled = skipExecuteTask(basePath, mid, sid, tid, status, "loop-recovery", prevCount);
|
||||
if (reconciled && verifyExpectedArtifact(unitType, unitId, basePath)) {
|
||||
ctx.ui.notify(
|
||||
`Loop recovery: ${unitId} reconciled after ${prevCount + 1} dispatches — blocker artifacts written, pipeline advancing.\n Review ${status.summaryPath} and replace the placeholder with real work.`,
|
||||
"warning",
|
||||
);
|
||||
const reconciledKey = `${unitType}/${unitId}`;
|
||||
persistCompletedKey(basePath, reconciledKey);
|
||||
s.completedKeySet.add(reconciledKey);
|
||||
s.unitDispatchCount.delete(dispatchKey);
|
||||
invalidateAllCaches();
|
||||
return { action: "recovered", dispatchAgain: true };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// General reconciliation: artifact appeared on last attempt
|
||||
if (verifyExpectedArtifact(unitType, unitId, basePath)) {
|
||||
ctx.ui.notify(
|
||||
`Loop recovery: ${unitType} ${unitId} — artifact verified after ${prevCount + 1} dispatches. Advancing.`,
|
||||
"info",
|
||||
);
|
||||
persistCompletedKey(basePath, dispatchKey);
|
||||
s.completedKeySet.add(dispatchKey);
|
||||
s.unitDispatchCount.delete(dispatchKey);
|
||||
invalidateAllCaches();
|
||||
return { action: "recovered", dispatchAgain: true };
|
||||
}
|
||||
|
||||
// Last resort for complete-milestone: generate stub summary
|
||||
if (unitType === "complete-milestone") {
|
||||
try {
|
||||
const mPath = resolveMilestonePath(basePath, unitId);
|
||||
if (mPath) {
|
||||
const stubPath = join(mPath, `${unitId}-SUMMARY.md`);
|
||||
if (!existsSync(stubPath)) {
|
||||
writeFileSync(stubPath, `# ${unitId} Summary\n\nAuto-generated stub — milestone tasks completed but summary generation failed after ${prevCount + 1} attempts.\nReview and replace this stub with a proper summary.\n`);
|
||||
ctx.ui.notify(`Generated stub summary for ${unitId} to unblock pipeline. Review later.`, "warning");
|
||||
persistCompletedKey(basePath, dispatchKey);
|
||||
s.completedKeySet.add(dispatchKey);
|
||||
s.unitDispatchCount.delete(dispatchKey);
|
||||
invalidateAllCaches();
|
||||
return { action: "recovered", dispatchAgain: true };
|
||||
}
|
||||
}
|
||||
} catch { /* non-fatal — fall through to normal stop */ }
|
||||
}
|
||||
|
||||
const expected = diagnoseExpectedArtifact(unitType, unitId, basePath);
|
||||
const remediation = buildLoopRemediationSteps(unitType, unitId, basePath);
|
||||
sendDesktopNotification("GSD", `Loop detected: ${unitType} ${unitId}`, "error", "error");
|
||||
return {
|
||||
action: "stop",
|
||||
reason: `Loop: ${unitType} ${unitId}`,
|
||||
notifyMessage: `Loop detected: ${unitType} ${unitId} dispatched ${prevCount + 1} times total. Expected artifact not found.${expected ? `\n Expected: ${expected}` : ""}${remediation ? `\n\n Remediation steps:\n${remediation}` : "\n Check branch state and .gsd/ artifacts."}`,
|
||||
};
|
||||
}
|
||||
|
||||
s.unitDispatchCount.set(dispatchKey, prevCount + 1);
|
||||
|
||||
if (prevCount > 0) {
|
||||
// Adaptive self-repair: each retry attempts a different remediation step.
|
||||
if (unitType === "execute-task") {
|
||||
const status = await inspectExecuteTaskDurability(basePath, unitId);
|
||||
const [mid, sid, tid] = unitId.split("/");
|
||||
if (status && mid && sid && tid) {
|
||||
if (status.summaryExists && !status.taskChecked) {
|
||||
const repaired = skipExecuteTask(basePath, mid, sid, tid, status, "self-repair", 0);
|
||||
if (repaired && verifyExpectedArtifact(unitType, unitId, basePath)) {
|
||||
ctx.ui.notify(
|
||||
`Self-repaired ${unitId}: summary existed but checkbox was unmarked. Marked [x] and advancing.`,
|
||||
"warning",
|
||||
);
|
||||
const repairedKey = `${unitType}/${unitId}`;
|
||||
persistCompletedKey(basePath, repairedKey);
|
||||
s.completedKeySet.add(repairedKey);
|
||||
s.unitDispatchCount.delete(dispatchKey);
|
||||
invalidateAllCaches();
|
||||
return { action: "recovered", dispatchAgain: true };
|
||||
}
|
||||
} else if (prevCount >= STUB_RECOVERY_THRESHOLD && !status.summaryExists) {
|
||||
const tasksDir = resolveTasksDir(basePath, mid, sid);
|
||||
const sDir = resolveSlicePath(basePath, mid, sid);
|
||||
const targetDir = tasksDir ?? (sDir ? join(sDir, "tasks") : null);
|
||||
if (targetDir) {
|
||||
if (!existsSync(targetDir)) mkdirSync(targetDir, { recursive: true });
|
||||
const summaryPath = join(targetDir, buildTaskFileName(tid, "SUMMARY"));
|
||||
if (!existsSync(summaryPath)) {
|
||||
const stubContent = [
|
||||
`# PARTIAL RECOVERY — attempt ${prevCount + 1} of ${MAX_UNIT_DISPATCHES}`,
|
||||
``,
|
||||
`Task \`${tid}\` in slice \`${sid}\` (milestone \`${mid}\`) has not yet produced a real summary.`,
|
||||
`This placeholder was written by auto-mode after ${prevCount} dispatch attempts.`,
|
||||
``,
|
||||
`The next agent session will retry this task. Replace this file with real work when done.`,
|
||||
].join("\n");
|
||||
writeFileSync(summaryPath, stubContent, "utf-8");
|
||||
ctx.ui.notify(
|
||||
`Stub recovery (attempt ${prevCount + 1}/${MAX_UNIT_DISPATCHES}): ${unitId} stub summary placeholder written. Retrying with recovery context.`,
|
||||
"warning",
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ctx.ui.notify(
|
||||
`${unitType} ${unitId} didn't produce expected artifact. Retrying (${prevCount + 1}/${MAX_UNIT_DISPATCHES}).`,
|
||||
"warning",
|
||||
);
|
||||
}
|
||||
|
||||
return { action: "proceed" };
|
||||
}
|
||||
223
src/resources/extensions/gsd/auto-timers.ts
Normal file
223
src/resources/extensions/gsd/auto-timers.ts
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
/**
|
||||
* Unit supervision timers — soft timeout warning, idle watchdog,
|
||||
* hard timeout, and context-pressure monitor.
|
||||
*
|
||||
* Extracted from dispatchNextUnit() in auto.ts. All timers are set up
|
||||
* via startUnitSupervision() and torn down by the caller via clearUnitTimeout().
|
||||
*/
|
||||
|
||||
import type { ExtensionAPI, ExtensionContext } from "@gsd/pi-coding-agent";
|
||||
import { readUnitRuntimeRecord, writeUnitRuntimeRecord } from "./unit-runtime.js";
|
||||
import { resolveAutoSupervisorConfig } from "./preferences.js";
|
||||
import type { GSDPreferences } from "./preferences.js";
|
||||
import { computeBudgets, resolveExecutorContextWindow } from "./context-budget.js";
|
||||
import {
|
||||
getInFlightToolCount,
|
||||
getOldestInFlightToolStart,
|
||||
} from "./auto-tool-tracking.js";
|
||||
import { detectWorkingTreeActivity } from "./auto-supervisor.js";
|
||||
import { closeoutUnit, type CloseoutOptions } from "./auto-unit-closeout.js";
|
||||
import { saveActivityLog } from "./activity-log.js";
|
||||
import { recoverTimedOutUnit, type RecoveryContext } from "./auto-timeout-recovery.js";
|
||||
import type { AutoSession } from "./auto/session.js";
|
||||
|
||||
export interface SupervisionContext {
|
||||
s: AutoSession;
|
||||
ctx: ExtensionContext;
|
||||
pi: ExtensionAPI;
|
||||
unitType: string;
|
||||
unitId: string;
|
||||
prefs: GSDPreferences | undefined;
|
||||
buildSnapshotOpts: () => CloseoutOptions & Record<string, unknown>;
|
||||
buildRecoveryContext: () => RecoveryContext;
|
||||
pauseAuto: (ctx?: ExtensionContext, pi?: ExtensionAPI) => Promise<void>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set up all four supervision timers for the current unit:
|
||||
* 1. Soft timeout warning (wrapup)
|
||||
* 2. Idle watchdog (progress polling, stuck tool detection)
|
||||
* 3. Hard timeout (pause + recovery)
|
||||
* 4. Context-pressure monitor (continue-here)
|
||||
*/
|
||||
export function startUnitSupervision(sctx: SupervisionContext): void {
|
||||
const { s, ctx, pi, unitType, unitId, prefs, buildSnapshotOpts, buildRecoveryContext, pauseAuto } = sctx;
|
||||
|
||||
const supervisor = resolveAutoSupervisorConfig();
|
||||
const softTimeoutMs = (supervisor.soft_timeout_minutes ?? 0) * 60 * 1000;
|
||||
const idleTimeoutMs = (supervisor.idle_timeout_minutes ?? 0) * 60 * 1000;
|
||||
const hardTimeoutMs = (supervisor.hard_timeout_minutes ?? 0) * 60 * 1000;
|
||||
|
||||
// ── 1. Soft timeout warning ──
|
||||
s.wrapupWarningHandle = setTimeout(() => {
|
||||
s.wrapupWarningHandle = null;
|
||||
if (!s.active || !s.currentUnit) return;
|
||||
writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, {
|
||||
phase: "wrapup-warning-sent",
|
||||
wrapupWarningSent: true,
|
||||
});
|
||||
pi.sendMessage(
|
||||
{
|
||||
customType: "gsd-auto-wrapup",
|
||||
display: s.verbose,
|
||||
content: [
|
||||
"**TIME BUDGET WARNING — keep going only if progress is real.**",
|
||||
"This unit crossed the soft time budget.",
|
||||
"If you are making progress, continue. If not, switch to wrap-up mode now:",
|
||||
"1. rerun the minimal required verification",
|
||||
"2. write or update the required durable artifacts",
|
||||
"3. mark task or slice state on disk correctly",
|
||||
"4. leave precise resume notes if anything remains unfinished",
|
||||
].join("\n"),
|
||||
},
|
||||
{ triggerTurn: true },
|
||||
);
|
||||
}, softTimeoutMs);
|
||||
|
||||
// ── 2. Idle watchdog ──
|
||||
s.idleWatchdogHandle = setInterval(async () => {
|
||||
try {
|
||||
if (!s.active || !s.currentUnit) return;
|
||||
const runtime = readUnitRuntimeRecord(s.basePath, unitType, unitId);
|
||||
if (!runtime) return;
|
||||
if (Date.now() - runtime.lastProgressAt < idleTimeoutMs) return;
|
||||
|
||||
// Agent has tool calls currently executing — not idle, just waiting.
|
||||
// But only suppress recovery if the tool started recently.
|
||||
if (getInFlightToolCount() > 0) {
|
||||
const oldestStart = getOldestInFlightToolStart()!;
|
||||
const toolAgeMs = Date.now() - oldestStart;
|
||||
if (toolAgeMs < idleTimeoutMs) {
|
||||
writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, {
|
||||
lastProgressAt: Date.now(),
|
||||
lastProgressKind: "tool-in-flight",
|
||||
});
|
||||
return;
|
||||
}
|
||||
ctx.ui.notify(
|
||||
`Stalled tool detected: a tool has been in-flight for ${Math.round(toolAgeMs / 60000)}min. Treating as hung — attempting idle recovery.`,
|
||||
"warning",
|
||||
);
|
||||
}
|
||||
|
||||
// Check if the agent is producing work on disk.
|
||||
if (detectWorkingTreeActivity(s.basePath)) {
|
||||
writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, {
|
||||
lastProgressAt: Date.now(),
|
||||
lastProgressKind: "filesystem-activity",
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (s.currentUnit) {
|
||||
await closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt, buildSnapshotOpts());
|
||||
} else {
|
||||
saveActivityLog(ctx, s.basePath, unitType, unitId);
|
||||
}
|
||||
|
||||
const recovery = await recoverTimedOutUnit(ctx, pi, unitType, unitId, "idle", buildRecoveryContext());
|
||||
if (recovery === "recovered") return;
|
||||
|
||||
writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, {
|
||||
phase: "paused",
|
||||
});
|
||||
ctx.ui.notify(
|
||||
`Unit ${unitType} ${unitId} made no meaningful progress for ${supervisor.idle_timeout_minutes}min. Pausing auto-mode.`,
|
||||
"warning",
|
||||
);
|
||||
await pauseAuto(ctx, pi);
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
console.error(`[idle-watchdog] Unhandled error: ${message}`);
|
||||
try {
|
||||
ctx.ui.notify(`Idle watchdog error: ${message}`, "warning");
|
||||
} catch { /* best effort */ }
|
||||
}
|
||||
}, 15000);
|
||||
|
||||
// ── 3. Hard timeout ──
|
||||
s.unitTimeoutHandle = setTimeout(async () => {
|
||||
try {
|
||||
s.unitTimeoutHandle = null;
|
||||
if (!s.active) return;
|
||||
if (s.currentUnit) {
|
||||
writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit.startedAt, {
|
||||
phase: "timeout",
|
||||
timeoutAt: Date.now(),
|
||||
});
|
||||
await closeoutUnit(ctx, s.basePath, s.currentUnit.type, s.currentUnit.id, s.currentUnit.startedAt, buildSnapshotOpts());
|
||||
} else {
|
||||
saveActivityLog(ctx, s.basePath, unitType, unitId);
|
||||
}
|
||||
|
||||
const recovery = await recoverTimedOutUnit(ctx, pi, unitType, unitId, "hard", buildRecoveryContext());
|
||||
if (recovery === "recovered") return;
|
||||
|
||||
ctx.ui.notify(
|
||||
`Unit ${unitType} ${unitId} exceeded ${supervisor.hard_timeout_minutes}min hard timeout. Pausing auto-mode.`,
|
||||
"warning",
|
||||
);
|
||||
await pauseAuto(ctx, pi);
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
console.error(`[hard-timeout] Unhandled error: ${message}`);
|
||||
try {
|
||||
ctx.ui.notify(`Hard timeout error: ${message}`, "warning");
|
||||
} catch { /* best effort */ }
|
||||
}
|
||||
}, hardTimeoutMs);
|
||||
|
||||
// ── 4. Context-pressure continue-here monitor ──
|
||||
if (s.continueHereHandle) {
|
||||
clearInterval(s.continueHereHandle);
|
||||
s.continueHereHandle = null;
|
||||
}
|
||||
const executorContextWindow = resolveExecutorContextWindow(
|
||||
ctx.modelRegistry as Parameters<typeof resolveExecutorContextWindow>[0],
|
||||
prefs as Parameters<typeof resolveExecutorContextWindow>[1],
|
||||
ctx.model?.contextWindow,
|
||||
);
|
||||
const continueHereThreshold = computeBudgets(executorContextWindow).continueThresholdPercent;
|
||||
s.continueHereHandle = setInterval(() => {
|
||||
if (!s.active || !s.currentUnit || !s.cmdCtx) return;
|
||||
const runtime = readUnitRuntimeRecord(s.basePath, unitType, unitId);
|
||||
if (runtime?.continueHereFired) return;
|
||||
|
||||
const contextUsage = s.cmdCtx.getContextUsage();
|
||||
if (!contextUsage || contextUsage.percent == null || contextUsage.percent < continueHereThreshold) return;
|
||||
|
||||
writeUnitRuntimeRecord(s.basePath, unitType, unitId, s.currentUnit!.startedAt, {
|
||||
continueHereFired: true,
|
||||
});
|
||||
|
||||
if (s.verbose) {
|
||||
ctx.ui.notify(
|
||||
`Context at ${contextUsage.percent}% (threshold: ${continueHereThreshold}%) — sending wrap-up signal.`,
|
||||
"info",
|
||||
);
|
||||
}
|
||||
|
||||
pi.sendMessage(
|
||||
{
|
||||
customType: "gsd-auto-wrapup",
|
||||
display: s.verbose,
|
||||
content: [
|
||||
"**CONTEXT BUDGET WARNING — wrap up this unit now.**",
|
||||
`Context window is at ${contextUsage.percent}% (threshold: ${continueHereThreshold}%).`,
|
||||
"The next unit needs a fresh context to work effectively. Wrap up now:",
|
||||
"1. Finish any in-progress file writes",
|
||||
"2. Write or update the required durable artifacts (summary, checkboxes)",
|
||||
"3. Mark task state on disk correctly",
|
||||
"4. Leave precise resume notes if anything remains unfinished",
|
||||
"Do NOT start new sub-tasks or investigations.",
|
||||
].join("\n"),
|
||||
},
|
||||
{ triggerTurn: true },
|
||||
);
|
||||
|
||||
if (s.continueHereHandle) {
|
||||
clearInterval(s.continueHereHandle);
|
||||
s.continueHereHandle = null;
|
||||
}
|
||||
}, 15_000);
|
||||
}
|
||||
195
src/resources/extensions/gsd/auto-verification.ts
Normal file
195
src/resources/extensions/gsd/auto-verification.ts
Normal file
|
|
@ -0,0 +1,195 @@
|
|||
/**
|
||||
* Post-unit verification gate for auto-mode.
|
||||
*
|
||||
* Runs typecheck/lint/test checks, captures runtime errors, performs
|
||||
* dependency audits, handles auto-fix retry logic, and writes
|
||||
* verification evidence JSON.
|
||||
*
|
||||
* Extracted from handleAgentEnd() in auto.ts. Returns a sentinel
|
||||
* value instead of calling return/pauseAuto directly — the caller
|
||||
* checks the result and handles control flow.
|
||||
*/
|
||||
|
||||
import type { ExtensionContext, ExtensionAPI } from "@gsd/pi-coding-agent";
|
||||
import { loadFile, parsePlan } from "./files.js";
|
||||
import { resolveSliceFile, resolveSlicePath } from "./paths.js";
|
||||
import { loadEffectiveGSDPreferences } from "./preferences.js";
|
||||
import {
|
||||
runVerificationGate,
|
||||
formatFailureContext,
|
||||
captureRuntimeErrors,
|
||||
runDependencyAudit,
|
||||
} from "./verification-gate.js";
|
||||
import { writeVerificationJSON } from "./verification-evidence.js";
|
||||
import { removePersistedKey } from "./auto-recovery.js";
|
||||
import type { AutoSession, PendingVerificationRetry } from "./auto/session.js";
|
||||
import { join } from "node:path";
|
||||
|
||||
export interface VerificationContext {
|
||||
s: AutoSession;
|
||||
ctx: ExtensionContext;
|
||||
pi: ExtensionAPI;
|
||||
}
|
||||
|
||||
export type VerificationResult = "continue" | "retry" | "pause";
|
||||
|
||||
/**
|
||||
* Run the verification gate for the current execute-task unit.
|
||||
* Returns:
|
||||
* - "continue" — gate passed (or no checks configured), proceed normally
|
||||
* - "retry" — gate failed with retries remaining, dispatchNextUnit already called
|
||||
* - "pause" — gate failed with retries exhausted, pauseAuto already called
|
||||
*/
|
||||
export async function runPostUnitVerification(
|
||||
vctx: VerificationContext,
|
||||
dispatchNextUnit: (ctx: ExtensionContext, pi: ExtensionAPI) => Promise<void>,
|
||||
startDispatchGapWatchdog: (ctx: ExtensionContext, pi: ExtensionAPI) => void,
|
||||
pauseAuto: (ctx?: ExtensionContext, pi?: ExtensionAPI) => Promise<void>,
|
||||
): Promise<VerificationResult> {
|
||||
const { s, ctx, pi } = vctx;
|
||||
|
||||
if (!s.currentUnit || s.currentUnit.type !== "execute-task") {
|
||||
return "continue";
|
||||
}
|
||||
|
||||
try {
|
||||
const effectivePrefs = loadEffectiveGSDPreferences();
|
||||
const prefs = effectivePrefs?.preferences;
|
||||
|
||||
// Read task plan verify field
|
||||
const parts = s.currentUnit.id.split("/");
|
||||
let taskPlanVerify: string | undefined;
|
||||
if (parts.length >= 3) {
|
||||
const [mid, sid, tid] = parts;
|
||||
const planFile = resolveSliceFile(s.basePath, mid, sid, "PLAN");
|
||||
if (planFile) {
|
||||
const planContent = await loadFile(planFile);
|
||||
if (planContent) {
|
||||
const slicePlan = parsePlan(planContent);
|
||||
const taskEntry = slicePlan?.tasks?.find(t => t.id === tid);
|
||||
taskPlanVerify = taskEntry?.verify;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const result = runVerificationGate({
|
||||
basePath: s.basePath,
|
||||
unitId: s.currentUnit.id,
|
||||
cwd: s.basePath,
|
||||
preferenceCommands: prefs?.verification_commands,
|
||||
taskPlanVerify,
|
||||
});
|
||||
|
||||
// Capture runtime errors
|
||||
const runtimeErrors = await captureRuntimeErrors();
|
||||
if (runtimeErrors.length > 0) {
|
||||
result.runtimeErrors = runtimeErrors;
|
||||
if (runtimeErrors.some(e => e.blocking)) {
|
||||
result.passed = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Dependency audit
|
||||
const auditWarnings = runDependencyAudit(s.basePath);
|
||||
if (auditWarnings.length > 0) {
|
||||
result.auditWarnings = auditWarnings;
|
||||
process.stderr.write(`verification-gate: ${auditWarnings.length} audit warning(s)\n`);
|
||||
for (const w of auditWarnings) {
|
||||
process.stderr.write(` [${w.severity}] ${w.name}: ${w.title}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
// Auto-fix retry preferences
|
||||
const autoFixEnabled = prefs?.verification_auto_fix !== false;
|
||||
const maxRetries = typeof prefs?.verification_max_retries === "number" ? prefs.verification_max_retries : 2;
|
||||
const completionKey = `${s.currentUnit.type}/${s.currentUnit.id}`;
|
||||
|
||||
if (result.checks.length > 0) {
|
||||
const passCount = result.checks.filter(c => c.exitCode === 0).length;
|
||||
const total = result.checks.length;
|
||||
if (result.passed) {
|
||||
ctx.ui.notify(`Verification gate: ${passCount}/${total} checks passed`);
|
||||
} else {
|
||||
const failures = result.checks.filter(c => c.exitCode !== 0);
|
||||
const failNames = failures.map(f => f.command).join(", ");
|
||||
ctx.ui.notify(`Verification gate: FAILED — ${failNames}`);
|
||||
process.stderr.write(`verification-gate: ${total - passCount}/${total} checks failed\n`);
|
||||
for (const f of failures) {
|
||||
process.stderr.write(` ${f.command} exited ${f.exitCode}\n`);
|
||||
if (f.stderr) process.stderr.write(` stderr: ${f.stderr.slice(0, 500)}\n`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Log blocking runtime errors
|
||||
if (result.runtimeErrors?.some(e => e.blocking)) {
|
||||
const blockingErrors = result.runtimeErrors.filter(e => e.blocking);
|
||||
process.stderr.write(`verification-gate: ${blockingErrors.length} blocking runtime error(s) detected\n`);
|
||||
for (const err of blockingErrors) {
|
||||
process.stderr.write(` [${err.source}] ${err.severity}: ${err.message.slice(0, 200)}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
// Write verification evidence JSON
|
||||
const attempt = s.verificationRetryCount.get(s.currentUnit.id) ?? 0;
|
||||
if (parts.length >= 3) {
|
||||
try {
|
||||
const [mid, sid, tid] = parts;
|
||||
const sDir = resolveSlicePath(s.basePath, mid, sid);
|
||||
if (sDir) {
|
||||
const tasksDir = join(sDir, "tasks");
|
||||
if (result.passed) {
|
||||
writeVerificationJSON(result, tasksDir, tid, s.currentUnit.id);
|
||||
} else {
|
||||
const nextAttempt = attempt + 1;
|
||||
writeVerificationJSON(result, tasksDir, tid, s.currentUnit.id, nextAttempt, maxRetries);
|
||||
}
|
||||
}
|
||||
} catch (evidenceErr) {
|
||||
process.stderr.write(`verification-evidence: write error — ${(evidenceErr as Error).message}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Auto-fix retry logic ──
|
||||
if (result.passed) {
|
||||
s.verificationRetryCount.delete(s.currentUnit.id);
|
||||
s.pendingVerificationRetry = null;
|
||||
return "continue";
|
||||
} else if (autoFixEnabled && attempt + 1 <= maxRetries) {
|
||||
const nextAttempt = attempt + 1;
|
||||
s.verificationRetryCount.set(s.currentUnit.id, nextAttempt);
|
||||
s.pendingVerificationRetry = {
|
||||
unitId: s.currentUnit.id,
|
||||
failureContext: formatFailureContext(result),
|
||||
attempt: nextAttempt,
|
||||
};
|
||||
ctx.ui.notify(`Verification failed — auto-fix attempt ${nextAttempt}/${maxRetries}`, "warning");
|
||||
s.completedKeySet.delete(completionKey);
|
||||
removePersistedKey(s.basePath, completionKey);
|
||||
// Dispatch retry immediately
|
||||
try {
|
||||
await dispatchNextUnit(ctx, pi);
|
||||
} catch (retryDispatchErr) {
|
||||
const msg = retryDispatchErr instanceof Error ? retryDispatchErr.message : String(retryDispatchErr);
|
||||
ctx.ui.notify(`Verification retry dispatch error: ${msg}`, "error");
|
||||
startDispatchGapWatchdog(ctx, pi);
|
||||
}
|
||||
return "retry";
|
||||
} else {
|
||||
// Gate failed, retries exhausted
|
||||
const exhaustedAttempt = attempt + 1;
|
||||
s.verificationRetryCount.delete(s.currentUnit.id);
|
||||
s.pendingVerificationRetry = null;
|
||||
ctx.ui.notify(
|
||||
`Verification gate FAILED after ${exhaustedAttempt > maxRetries ? exhaustedAttempt - 1 : exhaustedAttempt} retries — pausing for human review`,
|
||||
"error",
|
||||
);
|
||||
await pauseAuto(ctx, pi);
|
||||
return "pause";
|
||||
}
|
||||
} catch (err) {
|
||||
// Gate errors are non-fatal
|
||||
process.stderr.write(`verification-gate: error — ${(err as Error).message}\n`);
|
||||
return "continue";
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Reference in a new issue