Merge pull request #4028 from mastertyko/fix/3838-closeout-cancelled-units

fix(gsd): close out cancelled auto units
This commit is contained in:
Jeremy McSpadden 2026-04-14 18:29:38 -05:00 committed by GitHub
commit f4f365a27a
5 changed files with 174 additions and 58 deletions

View file

@ -262,6 +262,65 @@ export interface PostUnitContext {
updateProgressWidget: (ctx: ExtensionContext, unitType: string, unitId: string, state: import("./types.js").GSDState) => void;
}
export async function autoCommitUnit(
basePath: string,
unitType: string,
unitId: string,
ctx?: ExtensionContext,
): Promise<string | null> {
try {
let taskContext: TaskCommitContext | undefined;
if (unitType === "execute-task") {
const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId);
if (mid && sid && tid) {
const summaryPath = resolveTaskFile(basePath, mid, sid, tid, "SUMMARY");
if (summaryPath) {
try {
const summaryContent = await loadFile(summaryPath);
if (summaryContent) {
const summary = parseSummary(summaryContent);
let ghIssueNumber: number | undefined;
try {
const { getTaskIssueNumberForCommit } = await import("../github-sync/sync.js");
ghIssueNumber = getTaskIssueNumberForCommit(basePath, mid, sid, tid) ?? undefined;
} catch (err) {
logWarning("engine", `GitHub issue lookup failed: ${err instanceof Error ? err.message : String(err)}`);
}
taskContext = {
taskId: `${sid}/${tid}`,
taskTitle: summary.title?.replace(/^T\d+:\s*/, "") || tid,
oneLiner: summary.oneLiner || undefined,
keyFiles: summary.frontmatter.key_files?.filter(f => !f.includes("{{")) || undefined,
issueNumber: ghIssueNumber,
};
}
} catch (e) {
debugLog("postUnit", { phase: "task-summary-parse", error: String(e) });
}
}
}
}
_resetHasChangesCache();
if (LIFECYCLE_ONLY_UNITS.has(unitType)) {
return null;
}
const commitMsg = autoCommitCurrentBranch(basePath, unitType, unitId, taskContext);
if (commitMsg) {
ctx?.ui.notify(`Committed: ${commitMsg.split("\n")[0]}`, "info");
}
return commitMsg;
} catch (e) {
debugLog("postUnit", { phase: "auto-commit", error: String(e) });
ctx?.ui.notify(`Auto-commit failed: ${String(e).split("\n")[0]}`, "warning");
return null;
}
}
/**
* Pre-verification processing: parallel worker signal check, cache invalidation,
* auto-commit, doctor run, state rebuild, worktree sync, artifact verification.
@ -301,63 +360,7 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
// Auto-commit
if (s.currentUnit) {
const unit = s.currentUnit;
try {
let taskContext: TaskCommitContext | undefined;
if (s.currentUnit.type === "execute-task") {
const { milestone: mid, slice: sid, task: tid } = parseUnitId(s.currentUnit.id);
if (mid && sid && tid) {
const summaryPath = resolveTaskFile(s.basePath, mid, sid, tid, "SUMMARY");
if (summaryPath) {
try {
const summaryContent = await loadFile(summaryPath);
if (summaryContent) {
const summary = parseSummary(summaryContent);
// Look up GitHub issue number for commit linking
let ghIssueNumber: number | undefined;
try {
const { getTaskIssueNumberForCommit } = await import("../github-sync/sync.js");
ghIssueNumber = getTaskIssueNumberForCommit(s.basePath, mid, sid, tid) ?? undefined;
} catch (err) {
// GitHub sync not available — skip
logWarning("engine", `GitHub issue lookup failed: ${err instanceof Error ? err.message : String(err)}`);
}
taskContext = {
taskId: `${sid}/${tid}`,
taskTitle: summary.title?.replace(/^T\d+:\s*/, "") || tid,
oneLiner: summary.oneLiner || undefined,
keyFiles: summary.frontmatter.key_files?.filter(f => !f.includes("{{")) || undefined,
issueNumber: ghIssueNumber,
};
}
} catch (e) {
debugLog("postUnit", { phase: "task-summary-parse", error: String(e) });
}
}
}
}
// Invalidate the nativeHasChanges cache before auto-commit (#1853).
// The cache has a 10-second TTL and is keyed by basePath. A stale
// `false` result causes autoCommit to skip staging entirely, leaving
// code files only in the working tree where they are destroyed by
// `git worktree remove --force` during teardown.
_resetHasChangesCache();
// Skip auto-commit for lifecycle-only units (#2553) — they only touch
// `.gsd/` internal state files. Those files are picked up by the next
// actual task commit via smartStage().
if (!LIFECYCLE_ONLY_UNITS.has(s.currentUnit.type)) {
const commitMsg = autoCommitCurrentBranch(s.basePath, s.currentUnit.type, s.currentUnit.id, taskContext);
if (commitMsg) {
ctx.ui.notify(`Committed: ${commitMsg.split("\n")[0]}`, "info");
}
}
} catch (e) {
debugLog("postUnit", { phase: "auto-commit", error: String(e) });
ctx.ui.notify(`Auto-commit failed: ${String(e).split("\n")[0]}`, "warning");
}
await autoCommitUnit(s.basePath, unit.type, unit.id, ctx);
// GitHub sync (non-blocking, opt-in)
await runSafely("postUnit", "github-sync", async () => {

View file

@ -195,6 +195,7 @@ import { clearCmuxSidebar, logCmuxEvent, syncCmuxSidebar } from "../cmux/index.j
import { startUnitSupervision } from "./auto-timers.js";
import { runPostUnitVerification } from "./auto-verification.js";
import {
autoCommitUnit,
postUnitPreVerification,
postUnitPostVerification,
} from "./auto-post-unit.js";
@ -1180,6 +1181,7 @@ function buildLoopDeps(): LoopDeps {
getMainBranch,
// Unit closeout + runtime records
closeoutUnit,
autoCommitUnit,
recordOutcome,
writeLock,
captureAvailableSkills,

View file

@ -180,6 +180,12 @@ export interface LoopDeps {
startedAt: number,
opts?: CloseoutOptions & Record<string, unknown>,
) => Promise<void>;
autoCommitUnit?: (
basePath: string,
unitType: string,
unitId: string,
ctx?: ExtensionContext,
) => Promise<string | null>;
recordOutcome: (unitType: string, tier: string, success: boolean) => void;
writeLock: (
lockBase: string,

View file

@ -167,6 +167,29 @@ async function closeoutAndStop(
await deps.stopAuto(ctx, pi, reason);
}
async function emitCancelledUnitEnd(
ic: IterationContext,
unitType: string,
unitId: string,
unitStartSeq: number,
errorContext?: { message: string; category: string; stopReason?: string; isTransient?: boolean; retryAfterMs?: number },
): Promise<void> {
ic.deps.emitJournalEvent({
ts: new Date().toISOString(),
flowId: ic.flowId,
seq: ic.nextSeq(),
eventType: "unit-end",
data: {
unitType,
unitId,
status: "cancelled",
artifactVerified: false,
...(errorContext ? { errorContext } : {}),
},
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
});
}
// ─── runPreDispatch ───────────────────────────────────────────────────────────
/**
@ -1359,6 +1382,7 @@ export async function runUnitPhase(
// Provider-error pause: pauseAuto already handled cleanup and scheduled
// recovery. Don't hard-stop — just break out of the loop (#2762).
if (unitResult.errorContext?.category === "provider") {
await emitCancelledUnitEnd(ic, unitType, unitId, unitStartSeq, unitResult.errorContext);
debugLog("autoLoop", { phase: "exit", reason: "provider-pause", isTransient: unitResult.errorContext.isTransient });
return { action: "break", reason: "provider-pause" };
}
@ -1377,9 +1401,23 @@ export async function runUnitPhase(
);
debugLog("autoLoop", { phase: "session-timeout-pause", unitType, unitId });
await deps.pauseAuto(ctx, pi);
await deps.autoCommitUnit?.(s.basePath, unitType, unitId, ctx);
await emitCancelledUnitEnd(ic, unitType, unitId, unitStartSeq, unitResult.errorContext);
return { action: "break", reason: "session-timeout" };
}
// All other cancelled states (structural errors, non-transient failures): hard stop
if (s.currentUnit) {
await deps.closeoutUnit(
ctx,
s.basePath,
unitType,
unitId,
s.currentUnit.startedAt,
deps.buildSnapshotOpts(unitType, unitId),
);
}
await deps.autoCommitUnit?.(s.basePath, unitType, unitId, ctx);
await emitCancelledUnitEnd(ic, unitType, unitId, unitStartSeq, unitResult.errorContext);
ctx.ui.notify(
`Session creation failed for ${unitType} ${unitId}: ${unitResult.errorContext?.message ?? "unknown"}. Stopping auto-mode.`,
"warning",

View file

@ -92,6 +92,7 @@ function makeMockDeps(
getPriorSliceCompletionBlocker: () => null,
getMainBranch: () => "main",
closeoutUnit: async () => {},
autoCommitUnit: async () => null,
recordOutcome: () => {},
writeLock: () => {},
captureAvailableSkills: () => {},
@ -567,7 +568,15 @@ test("unit-end event contains errorContext when unit is cancelled with structure
const { resolveAgentEndCancelled, _resetPendingResolve } = await import("../auto-loop.js");
_resetPendingResolve();
const deps = makeMockDeps(capture);
let pauseCalls = 0;
let commitCalls = 0;
const deps = makeMockDeps(capture, {
pauseAuto: async () => { pauseCalls++; },
autoCommitUnit: async () => {
commitCalls++;
return "commit";
},
});
const ic = makeIC(deps);
const iterData: IterationData = {
unitType: "execute-task",
@ -593,10 +602,68 @@ test("unit-end event contains errorContext when unit is cancelled with structure
// Transient timeout cancellations pause (recoverable) instead of hard-stopping
assert.equal(result.action, "break");
assert.equal((result as any).reason, "session-timeout");
assert.equal(pauseCalls, 1, "timeout cancellations should pause auto-mode exactly once");
assert.equal(commitCalls, 1, "timeout cancellations should flush a unit auto-commit once");
// Verify error classification used structured errorContext on the window entry
const entry = loopState.recentUnits[loopState.recentUnits.length - 1];
assert.ok(entry.error, "window entry must have error set");
assert.ok(entry.error!.startsWith("timeout:"), "error must start with category from errorContext");
assert.ok(entry.error!.includes("Hard timeout error"), "error must include the errorContext message");
const endEvents = capture.events.filter(e => e.eventType === "unit-end");
assert.equal(endEvents.length, 1, "timeout cancellations should still emit unit-end");
assert.equal((endEvents[0].data as any).status, "cancelled");
assert.equal((endEvents[0].data as any).artifactVerified, false);
assert.equal((endEvents[0].data as any).errorContext.category, "timeout");
});
test("session-failed cancellations close out and emit unit-end before hard stop", async () => {
const capture = createEventCapture();
const { resolveAgentEndCancelled, _resetPendingResolve } = await import("../auto-loop.js");
_resetPendingResolve();
let closeoutCalls = 0;
let commitCalls = 0;
let stopCalls = 0;
const deps = makeMockDeps(capture, {
closeoutUnit: async () => { closeoutCalls++; },
autoCommitUnit: async () => {
commitCalls++;
return "commit";
},
stopAuto: async () => { stopCalls++; },
});
const ic = makeIC(deps);
const iterData: IterationData = {
unitType: "execute-task",
unitId: "M001/S01/T01",
prompt: "do stuff",
finalPrompt: "do stuff",
pauseAfterUatDispatch: false,
state: { phase: "executing", activeMilestone: { id: "M001" }, activeSlice: { id: "S01" }, registry: [], blockers: [] } as any,
mid: "M001",
midTitle: "Test",
isRetry: false,
previousTier: undefined,
};
const loopState: LoopState = { recentUnits: [{ key: "execute-task/M001/S01/T01" }], stuckRecoveryAttempts: 0, consecutiveFinalizeTimeouts: 0 };
const unitPromise = runUnitPhase(ic, iterData, loopState);
await new Promise(r => setTimeout(r, 50));
resolveAgentEndCancelled({ message: "session bootstrap exploded", category: "session-failed", isTransient: false });
const result = await unitPromise;
assert.equal(result.action, "break");
assert.equal((result as any).reason, "session-failed");
assert.equal(closeoutCalls, 1, "session-failed cancellations should close out the unit before stopping");
assert.equal(commitCalls, 1, "session-failed cancellations should try one auto-commit flush");
assert.equal(stopCalls, 1, "session-failed cancellations should hard-stop auto-mode");
const endEvents = capture.events.filter(e => e.eventType === "unit-end");
assert.equal(endEvents.length, 1, "session-failed cancellations should emit unit-end");
assert.equal((endEvents[0].data as any).status, "cancelled");
assert.equal((endEvents[0].data as any).artifactVerified, false);
assert.equal((endEvents[0].data as any).errorContext.category, "session-failed");
});