Merge pull request #4028 from mastertyko/fix/3838-closeout-cancelled-units
fix(gsd): close out cancelled auto units
This commit is contained in:
commit
f4f365a27a
5 changed files with 174 additions and 58 deletions
|
|
@ -262,6 +262,65 @@ export interface PostUnitContext {
|
|||
updateProgressWidget: (ctx: ExtensionContext, unitType: string, unitId: string, state: import("./types.js").GSDState) => void;
|
||||
}
|
||||
|
||||
export async function autoCommitUnit(
|
||||
basePath: string,
|
||||
unitType: string,
|
||||
unitId: string,
|
||||
ctx?: ExtensionContext,
|
||||
): Promise<string | null> {
|
||||
try {
|
||||
let taskContext: TaskCommitContext | undefined;
|
||||
|
||||
if (unitType === "execute-task") {
|
||||
const { milestone: mid, slice: sid, task: tid } = parseUnitId(unitId);
|
||||
if (mid && sid && tid) {
|
||||
const summaryPath = resolveTaskFile(basePath, mid, sid, tid, "SUMMARY");
|
||||
if (summaryPath) {
|
||||
try {
|
||||
const summaryContent = await loadFile(summaryPath);
|
||||
if (summaryContent) {
|
||||
const summary = parseSummary(summaryContent);
|
||||
let ghIssueNumber: number | undefined;
|
||||
try {
|
||||
const { getTaskIssueNumberForCommit } = await import("../github-sync/sync.js");
|
||||
ghIssueNumber = getTaskIssueNumberForCommit(basePath, mid, sid, tid) ?? undefined;
|
||||
} catch (err) {
|
||||
logWarning("engine", `GitHub issue lookup failed: ${err instanceof Error ? err.message : String(err)}`);
|
||||
}
|
||||
|
||||
taskContext = {
|
||||
taskId: `${sid}/${tid}`,
|
||||
taskTitle: summary.title?.replace(/^T\d+:\s*/, "") || tid,
|
||||
oneLiner: summary.oneLiner || undefined,
|
||||
keyFiles: summary.frontmatter.key_files?.filter(f => !f.includes("{{")) || undefined,
|
||||
issueNumber: ghIssueNumber,
|
||||
};
|
||||
}
|
||||
} catch (e) {
|
||||
debugLog("postUnit", { phase: "task-summary-parse", error: String(e) });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_resetHasChangesCache();
|
||||
|
||||
if (LIFECYCLE_ONLY_UNITS.has(unitType)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const commitMsg = autoCommitCurrentBranch(basePath, unitType, unitId, taskContext);
|
||||
if (commitMsg) {
|
||||
ctx?.ui.notify(`Committed: ${commitMsg.split("\n")[0]}`, "info");
|
||||
}
|
||||
return commitMsg;
|
||||
} catch (e) {
|
||||
debugLog("postUnit", { phase: "auto-commit", error: String(e) });
|
||||
ctx?.ui.notify(`Auto-commit failed: ${String(e).split("\n")[0]}`, "warning");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Pre-verification processing: parallel worker signal check, cache invalidation,
|
||||
* auto-commit, doctor run, state rebuild, worktree sync, artifact verification.
|
||||
|
|
@ -301,63 +360,7 @@ export async function postUnitPreVerification(pctx: PostUnitContext, opts?: PreV
|
|||
// Auto-commit
|
||||
if (s.currentUnit) {
|
||||
const unit = s.currentUnit;
|
||||
try {
|
||||
let taskContext: TaskCommitContext | undefined;
|
||||
|
||||
if (s.currentUnit.type === "execute-task") {
|
||||
const { milestone: mid, slice: sid, task: tid } = parseUnitId(s.currentUnit.id);
|
||||
if (mid && sid && tid) {
|
||||
const summaryPath = resolveTaskFile(s.basePath, mid, sid, tid, "SUMMARY");
|
||||
if (summaryPath) {
|
||||
try {
|
||||
const summaryContent = await loadFile(summaryPath);
|
||||
if (summaryContent) {
|
||||
const summary = parseSummary(summaryContent);
|
||||
// Look up GitHub issue number for commit linking
|
||||
let ghIssueNumber: number | undefined;
|
||||
try {
|
||||
const { getTaskIssueNumberForCommit } = await import("../github-sync/sync.js");
|
||||
ghIssueNumber = getTaskIssueNumberForCommit(s.basePath, mid, sid, tid) ?? undefined;
|
||||
} catch (err) {
|
||||
// GitHub sync not available — skip
|
||||
logWarning("engine", `GitHub issue lookup failed: ${err instanceof Error ? err.message : String(err)}`);
|
||||
}
|
||||
|
||||
taskContext = {
|
||||
taskId: `${sid}/${tid}`,
|
||||
taskTitle: summary.title?.replace(/^T\d+:\s*/, "") || tid,
|
||||
oneLiner: summary.oneLiner || undefined,
|
||||
keyFiles: summary.frontmatter.key_files?.filter(f => !f.includes("{{")) || undefined,
|
||||
issueNumber: ghIssueNumber,
|
||||
};
|
||||
}
|
||||
} catch (e) {
|
||||
debugLog("postUnit", { phase: "task-summary-parse", error: String(e) });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Invalidate the nativeHasChanges cache before auto-commit (#1853).
|
||||
// The cache has a 10-second TTL and is keyed by basePath. A stale
|
||||
// `false` result causes autoCommit to skip staging entirely, leaving
|
||||
// code files only in the working tree where they are destroyed by
|
||||
// `git worktree remove --force` during teardown.
|
||||
_resetHasChangesCache();
|
||||
|
||||
// Skip auto-commit for lifecycle-only units (#2553) — they only touch
|
||||
// `.gsd/` internal state files. Those files are picked up by the next
|
||||
// actual task commit via smartStage().
|
||||
if (!LIFECYCLE_ONLY_UNITS.has(s.currentUnit.type)) {
|
||||
const commitMsg = autoCommitCurrentBranch(s.basePath, s.currentUnit.type, s.currentUnit.id, taskContext);
|
||||
if (commitMsg) {
|
||||
ctx.ui.notify(`Committed: ${commitMsg.split("\n")[0]}`, "info");
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
debugLog("postUnit", { phase: "auto-commit", error: String(e) });
|
||||
ctx.ui.notify(`Auto-commit failed: ${String(e).split("\n")[0]}`, "warning");
|
||||
}
|
||||
await autoCommitUnit(s.basePath, unit.type, unit.id, ctx);
|
||||
|
||||
// GitHub sync (non-blocking, opt-in)
|
||||
await runSafely("postUnit", "github-sync", async () => {
|
||||
|
|
|
|||
|
|
@ -195,6 +195,7 @@ import { clearCmuxSidebar, logCmuxEvent, syncCmuxSidebar } from "../cmux/index.j
|
|||
import { startUnitSupervision } from "./auto-timers.js";
|
||||
import { runPostUnitVerification } from "./auto-verification.js";
|
||||
import {
|
||||
autoCommitUnit,
|
||||
postUnitPreVerification,
|
||||
postUnitPostVerification,
|
||||
} from "./auto-post-unit.js";
|
||||
|
|
@ -1180,6 +1181,7 @@ function buildLoopDeps(): LoopDeps {
|
|||
getMainBranch,
|
||||
// Unit closeout + runtime records
|
||||
closeoutUnit,
|
||||
autoCommitUnit,
|
||||
recordOutcome,
|
||||
writeLock,
|
||||
captureAvailableSkills,
|
||||
|
|
|
|||
|
|
@ -180,6 +180,12 @@ export interface LoopDeps {
|
|||
startedAt: number,
|
||||
opts?: CloseoutOptions & Record<string, unknown>,
|
||||
) => Promise<void>;
|
||||
autoCommitUnit?: (
|
||||
basePath: string,
|
||||
unitType: string,
|
||||
unitId: string,
|
||||
ctx?: ExtensionContext,
|
||||
) => Promise<string | null>;
|
||||
recordOutcome: (unitType: string, tier: string, success: boolean) => void;
|
||||
writeLock: (
|
||||
lockBase: string,
|
||||
|
|
|
|||
|
|
@ -167,6 +167,29 @@ async function closeoutAndStop(
|
|||
await deps.stopAuto(ctx, pi, reason);
|
||||
}
|
||||
|
||||
async function emitCancelledUnitEnd(
|
||||
ic: IterationContext,
|
||||
unitType: string,
|
||||
unitId: string,
|
||||
unitStartSeq: number,
|
||||
errorContext?: { message: string; category: string; stopReason?: string; isTransient?: boolean; retryAfterMs?: number },
|
||||
): Promise<void> {
|
||||
ic.deps.emitJournalEvent({
|
||||
ts: new Date().toISOString(),
|
||||
flowId: ic.flowId,
|
||||
seq: ic.nextSeq(),
|
||||
eventType: "unit-end",
|
||||
data: {
|
||||
unitType,
|
||||
unitId,
|
||||
status: "cancelled",
|
||||
artifactVerified: false,
|
||||
...(errorContext ? { errorContext } : {}),
|
||||
},
|
||||
causedBy: { flowId: ic.flowId, seq: unitStartSeq },
|
||||
});
|
||||
}
|
||||
|
||||
// ─── runPreDispatch ───────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
|
|
@ -1359,6 +1382,7 @@ export async function runUnitPhase(
|
|||
// Provider-error pause: pauseAuto already handled cleanup and scheduled
|
||||
// recovery. Don't hard-stop — just break out of the loop (#2762).
|
||||
if (unitResult.errorContext?.category === "provider") {
|
||||
await emitCancelledUnitEnd(ic, unitType, unitId, unitStartSeq, unitResult.errorContext);
|
||||
debugLog("autoLoop", { phase: "exit", reason: "provider-pause", isTransient: unitResult.errorContext.isTransient });
|
||||
return { action: "break", reason: "provider-pause" };
|
||||
}
|
||||
|
|
@ -1377,9 +1401,23 @@ export async function runUnitPhase(
|
|||
);
|
||||
debugLog("autoLoop", { phase: "session-timeout-pause", unitType, unitId });
|
||||
await deps.pauseAuto(ctx, pi);
|
||||
await deps.autoCommitUnit?.(s.basePath, unitType, unitId, ctx);
|
||||
await emitCancelledUnitEnd(ic, unitType, unitId, unitStartSeq, unitResult.errorContext);
|
||||
return { action: "break", reason: "session-timeout" };
|
||||
}
|
||||
// All other cancelled states (structural errors, non-transient failures): hard stop
|
||||
if (s.currentUnit) {
|
||||
await deps.closeoutUnit(
|
||||
ctx,
|
||||
s.basePath,
|
||||
unitType,
|
||||
unitId,
|
||||
s.currentUnit.startedAt,
|
||||
deps.buildSnapshotOpts(unitType, unitId),
|
||||
);
|
||||
}
|
||||
await deps.autoCommitUnit?.(s.basePath, unitType, unitId, ctx);
|
||||
await emitCancelledUnitEnd(ic, unitType, unitId, unitStartSeq, unitResult.errorContext);
|
||||
ctx.ui.notify(
|
||||
`Session creation failed for ${unitType} ${unitId}: ${unitResult.errorContext?.message ?? "unknown"}. Stopping auto-mode.`,
|
||||
"warning",
|
||||
|
|
|
|||
|
|
@ -92,6 +92,7 @@ function makeMockDeps(
|
|||
getPriorSliceCompletionBlocker: () => null,
|
||||
getMainBranch: () => "main",
|
||||
closeoutUnit: async () => {},
|
||||
autoCommitUnit: async () => null,
|
||||
recordOutcome: () => {},
|
||||
writeLock: () => {},
|
||||
captureAvailableSkills: () => {},
|
||||
|
|
@ -567,7 +568,15 @@ test("unit-end event contains errorContext when unit is cancelled with structure
|
|||
const { resolveAgentEndCancelled, _resetPendingResolve } = await import("../auto-loop.js");
|
||||
_resetPendingResolve();
|
||||
|
||||
const deps = makeMockDeps(capture);
|
||||
let pauseCalls = 0;
|
||||
let commitCalls = 0;
|
||||
const deps = makeMockDeps(capture, {
|
||||
pauseAuto: async () => { pauseCalls++; },
|
||||
autoCommitUnit: async () => {
|
||||
commitCalls++;
|
||||
return "commit";
|
||||
},
|
||||
});
|
||||
const ic = makeIC(deps);
|
||||
const iterData: IterationData = {
|
||||
unitType: "execute-task",
|
||||
|
|
@ -593,10 +602,68 @@ test("unit-end event contains errorContext when unit is cancelled with structure
|
|||
// Transient timeout cancellations pause (recoverable) instead of hard-stopping
|
||||
assert.equal(result.action, "break");
|
||||
assert.equal((result as any).reason, "session-timeout");
|
||||
assert.equal(pauseCalls, 1, "timeout cancellations should pause auto-mode exactly once");
|
||||
assert.equal(commitCalls, 1, "timeout cancellations should flush a unit auto-commit once");
|
||||
|
||||
// Verify error classification used structured errorContext on the window entry
|
||||
const entry = loopState.recentUnits[loopState.recentUnits.length - 1];
|
||||
assert.ok(entry.error, "window entry must have error set");
|
||||
assert.ok(entry.error!.startsWith("timeout:"), "error must start with category from errorContext");
|
||||
assert.ok(entry.error!.includes("Hard timeout error"), "error must include the errorContext message");
|
||||
|
||||
const endEvents = capture.events.filter(e => e.eventType === "unit-end");
|
||||
assert.equal(endEvents.length, 1, "timeout cancellations should still emit unit-end");
|
||||
assert.equal((endEvents[0].data as any).status, "cancelled");
|
||||
assert.equal((endEvents[0].data as any).artifactVerified, false);
|
||||
assert.equal((endEvents[0].data as any).errorContext.category, "timeout");
|
||||
});
|
||||
|
||||
test("session-failed cancellations close out and emit unit-end before hard stop", async () => {
|
||||
const capture = createEventCapture();
|
||||
const { resolveAgentEndCancelled, _resetPendingResolve } = await import("../auto-loop.js");
|
||||
_resetPendingResolve();
|
||||
|
||||
let closeoutCalls = 0;
|
||||
let commitCalls = 0;
|
||||
let stopCalls = 0;
|
||||
const deps = makeMockDeps(capture, {
|
||||
closeoutUnit: async () => { closeoutCalls++; },
|
||||
autoCommitUnit: async () => {
|
||||
commitCalls++;
|
||||
return "commit";
|
||||
},
|
||||
stopAuto: async () => { stopCalls++; },
|
||||
});
|
||||
const ic = makeIC(deps);
|
||||
const iterData: IterationData = {
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
prompt: "do stuff",
|
||||
finalPrompt: "do stuff",
|
||||
pauseAfterUatDispatch: false,
|
||||
state: { phase: "executing", activeMilestone: { id: "M001" }, activeSlice: { id: "S01" }, registry: [], blockers: [] } as any,
|
||||
mid: "M001",
|
||||
midTitle: "Test",
|
||||
isRetry: false,
|
||||
previousTier: undefined,
|
||||
};
|
||||
const loopState: LoopState = { recentUnits: [{ key: "execute-task/M001/S01/T01" }], stuckRecoveryAttempts: 0, consecutiveFinalizeTimeouts: 0 };
|
||||
|
||||
const unitPromise = runUnitPhase(ic, iterData, loopState);
|
||||
await new Promise(r => setTimeout(r, 50));
|
||||
|
||||
resolveAgentEndCancelled({ message: "session bootstrap exploded", category: "session-failed", isTransient: false });
|
||||
|
||||
const result = await unitPromise;
|
||||
assert.equal(result.action, "break");
|
||||
assert.equal((result as any).reason, "session-failed");
|
||||
assert.equal(closeoutCalls, 1, "session-failed cancellations should close out the unit before stopping");
|
||||
assert.equal(commitCalls, 1, "session-failed cancellations should try one auto-commit flush");
|
||||
assert.equal(stopCalls, 1, "session-failed cancellations should hard-stop auto-mode");
|
||||
|
||||
const endEvents = capture.events.filter(e => e.eventType === "unit-end");
|
||||
assert.equal(endEvents.length, 1, "session-failed cancellations should emit unit-end");
|
||||
assert.equal((endEvents[0].data as any).status, "cancelled");
|
||||
assert.equal((endEvents[0].data as any).artifactVerified, false);
|
||||
assert.equal((endEvents[0].data as any).errorContext.category, "session-failed");
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue