singularity-forge/src/resources/extensions/gsd/tests/auto-loop.test.ts
Lex Christopherson 7ca3ce04a4 fix(gsd): remove stale observability validator + fix greenfield worktree check
The observability validator checked for markdown headings (## Observability / Diagnostics,
## Observability Impact) that the DB-backed renderer never produces, causing false-positive
warnings on every dispatch. Removed entirely — the DB schema enforces structure at write time.

The worktree health check blocked execution in directories without recognized project files
(package.json, Cargo.toml, etc.), preventing greenfield projects from scaffolding. Downgraded
to a warning — .git check remains as the hard gate.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-24 07:27:48 -06:00

2123 lines
66 KiB
TypeScript

import test from "node:test";
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { resolve } from "node:path";
import {
resolveAgentEnd,
resolveAgentEndCancelled,
runUnit,
autoLoop,
detectStuck,
_resetPendingResolve,
_setActiveSession,
isSessionSwitchInFlight,
type UnitResult,
type AgentEndEvent,
type LoopDeps,
} from "../auto-loop.js";
import type { SessionLockStatus } from "../session-lock.js";
// ─── Helpers ─────────────────────────────────────────────────────────────────
function makeEvent(
messages: unknown[] = [{ role: "assistant" }],
): AgentEndEvent {
return { messages };
}
/**
* Build a minimal mock AutoSession with controllable newSession behavior.
*/
function makeMockSession(opts?: {
newSessionResult?: { cancelled: boolean };
newSessionThrows?: string;
newSessionDelayMs?: number;
onNewSessionStart?: (session: any) => void;
onNewSessionSettle?: (session: any) => void;
}) {
const session = {
active: true,
verbose: false,
cmdCtx: {
newSession: () => {
opts?.onNewSessionStart?.(session);
if (opts?.newSessionThrows) {
return Promise.reject(new Error(opts.newSessionThrows));
}
const result = opts?.newSessionResult ?? { cancelled: false };
const delay = opts?.newSessionDelayMs ?? 0;
if (delay > 0) {
return new Promise<{ cancelled: boolean }>((res) =>
setTimeout(() => {
opts?.onNewSessionSettle?.(session);
res(result);
}, delay),
);
}
opts?.onNewSessionSettle?.(session);
return Promise.resolve(result);
},
},
clearTimers: () => {},
} as any;
return session;
}
/**
* Build a minimal mock ExtensionContext.
*/
function makeMockCtx() {
return {
ui: { notify: () => {} },
model: { id: "test-model" },
} as any;
}
/**
* Build a minimal mock ExtensionAPI that records sendMessage calls.
*/
function makeMockPi() {
const calls: unknown[] = [];
return {
sendMessage: (...args: unknown[]) => {
calls.push(args);
},
calls,
} as any;
}
// ─── Tests ───────────────────────────────────────────────────────────────────
test("resolveAgentEnd resolves a pending runUnit promise", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
const pi = makeMockPi();
const s = makeMockSession();
const event = makeEvent();
// Start runUnit — it will create the promise and send a message,
// then block awaiting agent_end
const resultPromise = runUnit(
ctx,
pi,
s,
"task",
"T01",
"do stuff",
);
// Give the microtask queue a tick so runUnit reaches the await
await new Promise((r) => setTimeout(r, 10));
// Now resolve the agent_end
resolveAgentEnd(event);
const result = await resultPromise;
assert.equal(result.status, "completed");
assert.deepEqual(result.event, event);
});
test("resolveAgentEnd drops event when no promise is pending", () => {
_resetPendingResolve();
// Should not throw — event is dropped (logged as warning)
assert.doesNotThrow(() => {
resolveAgentEnd(makeEvent());
});
});
test("double resolveAgentEnd only resolves once (second is dropped)", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
const pi = makeMockPi();
const s = makeMockSession();
const event1 = makeEvent([{ id: 1 }]);
const event2 = makeEvent([{ id: 2 }]);
const resultPromise = runUnit(ctx, pi, s, "task", "T01", "prompt");
await new Promise((r) => setTimeout(r, 10));
// First resolve — should work
resolveAgentEnd(event1);
// Second resolve — should be dropped (no pending resolver)
assert.doesNotThrow(() => {
resolveAgentEnd(event2);
});
const result = await resultPromise;
assert.equal(result.status, "completed");
// Should have the first event, not the second
assert.deepEqual(result.event, event1);
});
test("runUnit returns cancelled when session creation fails", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
const pi = makeMockPi();
const s = makeMockSession({ newSessionThrows: "connection refused" });
const result = await runUnit(ctx, pi, s, "task", "T01", "prompt");
assert.equal(result.status, "cancelled");
assert.equal(result.event, undefined);
// sendMessage should NOT have been called
assert.equal(pi.calls.length, 0);
});
test("runUnit returns cancelled when session creation times out", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
const pi = makeMockPi();
// Session returns cancelled: true (simulates the timeout race outcome)
const s = makeMockSession({ newSessionResult: { cancelled: true } });
const result = await runUnit(ctx, pi, s, "task", "T01", "prompt");
assert.equal(result.status, "cancelled");
assert.equal(result.event, undefined);
assert.equal(pi.calls.length, 0);
});
test("runUnit returns cancelled when s.active is false before sendMessage", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
const pi = makeMockPi();
const s = makeMockSession();
s.active = false;
const result = await runUnit(ctx, pi, s, "task", "T01", "prompt");
assert.equal(result.status, "cancelled");
assert.equal(pi.calls.length, 0);
});
test("runUnit only arms resolve after newSession completes", async () => {
_resetPendingResolve();
let sawSwitchFlag = false;
const ctx = makeMockCtx();
const pi = makeMockPi();
const s = makeMockSession({
newSessionDelayMs: 20,
onNewSessionStart: () => {
sawSwitchFlag = isSessionSwitchInFlight();
},
});
const resultPromise = runUnit(ctx, pi, s, "task", "T01", "prompt");
await new Promise((r) => setTimeout(r, 30));
assert.equal(sawSwitchFlag, true, "session switch guard should be active during newSession");
assert.equal(isSessionSwitchInFlight(), false, "session switch guard should clear after newSession settles");
resolveAgentEnd(makeEvent());
const result = await resultPromise;
assert.equal(result.status, "completed");
assert.equal(pi.calls.length, 1);
});
// ─── Structural assertions ───────────────────────────────────────────────────
test("auto-loop.ts exports autoLoop, runUnit, resolveAgentEnd", async () => {
const mod = await import("../auto-loop.js");
assert.equal(
typeof mod.autoLoop,
"function",
"autoLoop should be exported as a function",
);
assert.equal(
typeof mod.runUnit,
"function",
"runUnit should be exported as a function",
);
assert.equal(
typeof mod.resolveAgentEnd,
"function",
"resolveAgentEnd should be exported as a function",
);
});
test("auto/loop.ts contains a while keyword", () => {
const src = readFileSync(
resolve(import.meta.dirname, "..", "auto", "loop.ts"),
"utf-8",
);
assert.ok(
src.includes("while"),
"auto/loop.ts should contain a while keyword (loop or placeholder)",
);
});
test("auto/resolve.ts one-shot pattern: _currentResolve is nulled before calling resolver", () => {
const src = readFileSync(
resolve(import.meta.dirname, "..", "auto", "resolve.ts"),
"utf-8",
);
// The one-shot pattern requires: save ref, null the variable, then call
const resolveBlock = src.slice(
src.indexOf("export function resolveAgentEnd"),
src.indexOf("export function resolveAgentEnd") + 600,
);
const nullIdx = resolveBlock.indexOf("_currentResolve = null");
const callIdx = resolveBlock.indexOf("r({");
assert.ok(nullIdx > 0, "should null _currentResolve in resolveAgentEnd");
assert.ok(callIdx > 0, "should call resolver in resolveAgentEnd");
assert.ok(
nullIdx < callIdx,
"_currentResolve should be nulled before calling the resolver (one-shot)",
);
});
// ─── autoLoop tests (T02) ─────────────────────────────────────────────────
/**
* Build a mock LoopDeps that tracks call order and allows controlling
* behavior via overrides.
*/
function makeMockDeps(
overrides?: Partial<LoopDeps>,
): LoopDeps & { callLog: string[] } {
const callLog: string[] = [];
const baseDeps: LoopDeps = {
lockBase: () => "/tmp/test-lock",
buildSnapshotOpts: () => ({}),
stopAuto: async () => {
callLog.push("stopAuto");
},
pauseAuto: async () => {
callLog.push("pauseAuto");
},
clearUnitTimeout: () => {},
updateProgressWidget: () => {},
syncCmuxSidebar: () => {},
logCmuxEvent: () => {},
invalidateAllCaches: () => {
callLog.push("invalidateAllCaches");
},
deriveState: async () => {
callLog.push("deriveState");
return {
phase: "executing",
activeMilestone: {
id: "M001",
title: "Test Milestone",
status: "active",
},
activeSlice: { id: "S01", title: "Test Slice" },
activeTask: { id: "T01" },
registry: [{ id: "M001", status: "active" }],
blockers: [],
} as any;
},
loadEffectiveGSDPreferences: () => ({ preferences: {} }),
preDispatchHealthGate: async () => ({ proceed: true, fixesApplied: [] }),
syncProjectRootToWorktree: () => {},
checkResourcesStale: () => null,
validateSessionLock: () => ({ valid: true } as SessionLockStatus),
updateSessionLock: () => {
callLog.push("updateSessionLock");
},
handleLostSessionLock: () => {
callLog.push("handleLostSessionLock");
},
sendDesktopNotification: () => {},
setActiveMilestoneId: () => {},
pruneQueueOrder: () => {},
isInAutoWorktree: () => false,
shouldUseWorktreeIsolation: () => false,
mergeMilestoneToMain: () => ({ pushed: false, codeFilesChanged: true }),
teardownAutoWorktree: () => {},
createAutoWorktree: () => "/tmp/wt",
captureIntegrationBranch: () => {},
getIsolationMode: () => "none",
getCurrentBranch: () => "main",
autoWorktreeBranch: () => "auto/M001",
resolveMilestoneFile: () => null,
reconcileMergeState: () => false,
getLedger: () => null,
getProjectTotals: () => ({ cost: 0 }),
formatCost: (c: number) => `$${c.toFixed(2)}`,
getBudgetAlertLevel: () => 0,
getNewBudgetAlertLevel: () => 0,
getBudgetEnforcementAction: () => "none",
getManifestStatus: async () => null,
collectSecretsFromManifest: async () => null,
resolveDispatch: async () => {
callLog.push("resolveDispatch");
return {
action: "dispatch" as const,
unitType: "execute-task",
unitId: "M001/S01/T01",
prompt: "do the thing",
};
},
runPreDispatchHooks: () => ({ firedHooks: [], action: "proceed" }),
getPriorSliceCompletionBlocker: () => null,
getMainBranch: () => "main",
closeoutUnit: async () => {},
verifyExpectedArtifact: () => true,
clearUnitRuntimeRecord: () => {},
writeUnitRuntimeRecord: () => {},
recordOutcome: () => {},
writeLock: () => {},
captureAvailableSkills: () => {},
ensurePreconditions: () => {},
updateSliceProgressCache: () => {},
selectAndApplyModel: async () => ({ routing: null }),
startUnitSupervision: () => {},
getDeepDiagnostic: () => null,
isDbAvailable: () => false,
reorderForCaching: (p: string) => p,
existsSync: (p: string) => p.endsWith(".git") || p.endsWith("package.json"),
readFileSync: () => "",
atomicWriteSync: () => {},
GitServiceImpl: class {} as any,
resolver: {
get workPath() {
return "/tmp/project";
},
get projectRoot() {
return "/tmp/project";
},
get lockPath() {
return "/tmp/project";
},
enterMilestone: () => {},
exitMilestone: () => {},
mergeAndExit: () => {},
mergeAndEnterNext: () => {},
} as any,
postUnitPreVerification: async () => {
callLog.push("postUnitPreVerification");
return "continue" as const;
},
runPostUnitVerification: async () => {
callLog.push("runPostUnitVerification");
return "continue" as const;
},
postUnitPostVerification: async () => {
callLog.push("postUnitPostVerification");
return "continue" as const;
},
getSessionFile: () => "/tmp/session.json",
rebuildState: async () => {},
resolveModelId: (id: string, models: any[]) => models.find((m: any) => m.id === id),
emitJournalEvent: () => {},
};
const merged = { ...baseDeps, ...overrides, callLog };
return merged;
}
/**
* Build a mock session for autoLoop testing — needs more fields than the
* runUnit mock (dispatch counters, milestone state, etc.).
*/
function makeLoopSession(overrides?: Partial<Record<string, unknown>>) {
return {
active: true,
verbose: false,
stepMode: false,
paused: false,
basePath: "/tmp/project",
originalBasePath: "",
currentMilestoneId: "M001",
currentUnit: null,
currentUnitRouting: null,
completedUnits: [],
resourceVersionOnStart: null,
lastPromptCharCount: undefined,
lastBaselineCharCount: undefined,
lastBudgetAlertLevel: 0,
pendingVerificationRetry: null,
pendingCrashRecovery: null,
pendingQuickTasks: [],
sidecarQueue: [],
autoModeStartModel: null,
unitDispatchCount: new Map<string, number>(),
unitLifetimeDispatches: new Map<string, number>(),
unitRecoveryCount: new Map<string, number>(),
verificationRetryCount: new Map<string, number>(),
gitService: null,
autoStartTime: Date.now(),
cmdCtx: {
newSession: () => Promise.resolve({ cancelled: false }),
getContextUsage: () => ({ percent: 10, tokens: 1000, limit: 10000 }),
},
clearTimers: () => {},
...overrides,
} as any;
}
test("autoLoop exits when s.active is set to false", async (t) => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
const pi = makeMockPi();
const s = makeLoopSession({ active: false });
const deps = makeMockDeps();
await autoLoop(ctx, pi, s, deps);
// Loop body should not have executed (deriveState never called)
assert.ok(
!deps.callLog.includes("deriveState"),
"loop should not have iterated",
);
});
test("autoLoop exits on terminal complete state", async (t) => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
const pi = makeMockPi();
const s = makeLoopSession();
const deps = makeMockDeps({
deriveState: async () => {
deps.callLog.push("deriveState");
return {
phase: "complete",
activeMilestone: { id: "M001", title: "Test", status: "complete" },
activeSlice: null,
activeTask: null,
registry: [{ id: "M001", status: "complete" }],
blockers: [],
} as any;
},
});
await autoLoop(ctx, pi, s, deps);
assert.ok(deps.callLog.includes("deriveState"), "should have derived state");
assert.ok(
deps.callLog.includes("stopAuto"),
"should have called stopAuto for complete state",
);
// Should NOT have dispatched a unit
assert.ok(
!deps.callLog.includes("resolveDispatch"),
"should not dispatch when complete",
);
});
test("autoLoop passes structured session-lock failure details to the handler", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
const pi = makeMockPi();
const s = makeLoopSession();
let observedLockStatus: SessionLockStatus | undefined;
const deps = makeMockDeps({
validateSessionLock: () =>
({
valid: false,
failureReason: "compromised",
expectedPid: process.pid,
}) as SessionLockStatus,
handleLostSessionLock: (_ctx, lockStatus) => {
observedLockStatus = lockStatus;
deps.callLog.push("handleLostSessionLock");
},
});
await autoLoop(ctx, pi, s, deps);
assert.deepEqual(observedLockStatus, {
valid: false,
failureReason: "compromised",
expectedPid: process.pid,
});
assert.ok(
!deps.callLog.includes("resolveDispatch"),
"should stop before dispatch after lock validation fails",
);
});
test("autoLoop exits on terminal blocked state", async (t) => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
const pi = makeMockPi();
const s = makeLoopSession();
const deps = makeMockDeps({
deriveState: async () => {
deps.callLog.push("deriveState");
return {
phase: "blocked",
activeMilestone: { id: "M001", title: "Test", status: "active" },
activeSlice: null,
activeTask: null,
registry: [{ id: "M001", status: "active" }],
blockers: ["Missing API key"],
} as any;
},
});
await autoLoop(ctx, pi, s, deps);
assert.ok(deps.callLog.includes("deriveState"), "should have derived state");
assert.ok(
deps.callLog.includes("stopAuto"),
"should have called stopAuto for blocked state",
);
assert.ok(
!deps.callLog.includes("resolveDispatch"),
"should not dispatch when blocked",
);
});
test("autoLoop calls deriveState → resolveDispatch → runUnit in sequence", async (t) => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
ctx.sessionManager = { getSessionFile: () => "/tmp/session.json" };
const pi = makeMockPi();
const s = makeLoopSession();
const deps = makeMockDeps({
deriveState: async () => {
deps.callLog.push("deriveState");
return {
phase: "executing",
activeMilestone: { id: "M001", title: "Test", status: "active" },
activeSlice: { id: "S01", title: "Slice 1" },
activeTask: { id: "T01" },
registry: [{ id: "M001", status: "active" }],
blockers: [],
} as any;
},
resolveDispatch: async () => {
deps.callLog.push("resolveDispatch");
return {
action: "dispatch" as const,
unitType: "execute-task",
unitId: "M001/S01/T01",
prompt: "do the thing",
};
},
postUnitPostVerification: async () => {
deps.callLog.push("postUnitPostVerification");
// Deactivate after first iteration to exit the loop
s.active = false;
return "continue" as const;
},
});
// Run autoLoop — it will call runUnit internally which creates a promise.
// We need to resolve the promise from outside via resolveAgentEnd.
const loopPromise = autoLoop(ctx, pi, s, deps);
// Give the loop time to reach runUnit's await
await new Promise((r) => setTimeout(r, 50));
// Resolve the first unit's agent_end
resolveAgentEnd(makeEvent());
await loopPromise;
// Verify the sequence: deriveState → resolveDispatch → then finalize callbacks
const deriveIdx = deps.callLog.indexOf("deriveState");
const dispatchIdx = deps.callLog.indexOf("resolveDispatch");
const preVerIdx = deps.callLog.indexOf("postUnitPreVerification");
const verIdx = deps.callLog.indexOf("runPostUnitVerification");
const postVerIdx = deps.callLog.indexOf("postUnitPostVerification");
assert.ok(deriveIdx >= 0, "deriveState should have been called");
assert.ok(
dispatchIdx > deriveIdx,
"resolveDispatch should come after deriveState",
);
assert.ok(
preVerIdx > dispatchIdx,
"postUnitPreVerification should come after resolveDispatch",
);
assert.ok(
verIdx > preVerIdx,
"runPostUnitVerification should come after pre-verification",
);
assert.ok(
postVerIdx > verIdx,
"postUnitPostVerification should come after verification",
);
});
test("crash lock records session file from AFTER newSession, not before (#1710)", async (t) => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
// Simulate newSession changing the session file path.
// newSession() in runUnit changes the underlying session, so getSessionFile
// returns a different path after newSession completes.
let currentSessionFile = "/tmp/old-session.json";
ctx.sessionManager = {
getSessionFile: () => currentSessionFile,
};
const pi = makeMockPi();
const s = makeLoopSession({
cmdCtx: {
newSession: () => {
// When newSession completes, the session file changes
currentSessionFile = "/tmp/new-session-after-newSession.json";
return Promise.resolve({ cancelled: false });
},
getContextUsage: () => ({ percent: 10, tokens: 1000, limit: 10000 }),
},
});
// Track all writeLock calls with their sessionFile argument
const writeLockCalls: { sessionFile: string | undefined }[] = [];
const updateSessionLockCalls: { sessionFile: string | undefined }[] = [];
const deps = makeMockDeps({
deriveState: async () => {
deps.callLog.push("deriveState");
return {
phase: "executing",
activeMilestone: { id: "M001", title: "Test", status: "active" },
activeSlice: { id: "S01", title: "Slice 1" },
activeTask: { id: "T01" },
registry: [{ id: "M001", status: "active" }],
blockers: [],
} as any;
},
resolveDispatch: async () => {
deps.callLog.push("resolveDispatch");
return {
action: "dispatch" as const,
unitType: "execute-task",
unitId: "M001/S01/T01",
prompt: "do the thing",
};
},
writeLock: (_base: string, _ut: string, _uid: string, _count: number, sessionFile?: string) => {
writeLockCalls.push({ sessionFile });
},
updateSessionLock: (_base: string, _ut: string, _uid: string, _count: number, sessionFile?: string) => {
updateSessionLockCalls.push({ sessionFile });
},
getSessionFile: (ctxArg: any) => {
return ctxArg.sessionManager?.getSessionFile() ?? "";
},
postUnitPostVerification: async () => {
deps.callLog.push("postUnitPostVerification");
// Deactivate after first iteration to exit the loop
s.active = false;
return "continue" as const;
},
});
const loopPromise = autoLoop(ctx, pi, s, deps);
// Give the loop time to reach runUnit's await
await new Promise((r) => setTimeout(r, 50));
// Resolve the unit's agent_end
resolveAgentEnd(makeEvent());
await loopPromise;
// The preliminary lock (before runUnit) should have NO session file
assert.ok(
writeLockCalls.length >= 2,
`expected at least 2 writeLock calls, got ${writeLockCalls.length}`,
);
assert.strictEqual(
writeLockCalls[0].sessionFile,
undefined,
"preliminary lock before runUnit should have no session file",
);
// The post-runUnit lock should have the NEW session file path
assert.strictEqual(
writeLockCalls[1].sessionFile,
"/tmp/new-session-after-newSession.json",
"post-runUnit lock should record the session file created by newSession",
);
// updateSessionLock should also have the new session file
assert.ok(
updateSessionLockCalls.length >= 1,
"updateSessionLock should have been called at least once",
);
assert.strictEqual(
updateSessionLockCalls[0].sessionFile,
"/tmp/new-session-after-newSession.json",
"updateSessionLock should record the session file created by newSession",
);
});
test("autoLoop handles verification retry by continuing loop", async (t) => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
ctx.sessionManager = { getSessionFile: () => "/tmp/session.json" };
const pi = makeMockPi();
let verifyCallCount = 0;
let deriveCallCount = 0;
const s = makeLoopSession();
// Pre-queued verification actions: each entry provides a side-effect + return value
type VerifyAction = { sideEffect?: () => void; response: "retry" | "continue" };
const verificationActions: VerifyAction[] = [
{
sideEffect: () => {
// Simulate retry — set pendingVerificationRetry on session
s.pendingVerificationRetry = {
unitId: "M001/S01/T01",
failureContext: "test failed: expected X got Y",
attempt: 1,
};
},
response: "retry",
},
{ response: "continue" },
];
const deps = makeMockDeps({
deriveState: async () => {
deriveCallCount++;
deps.callLog.push("deriveState");
return {
phase: "executing",
activeMilestone: { id: "M001", title: "Test", status: "active" },
activeSlice: { id: "S01", title: "Slice 1" },
activeTask: { id: "T01" },
registry: [{ id: "M001", status: "active" }],
blockers: [],
} as any;
},
runPostUnitVerification: async () => {
const action = verificationActions[verifyCallCount] ?? { response: "continue" as const };
verifyCallCount++;
deps.callLog.push("runPostUnitVerification");
action.sideEffect?.();
return action.response;
},
postUnitPostVerification: async () => {
deps.callLog.push("postUnitPostVerification");
// After the retry cycle completes, deactivate
s.active = false;
return "continue" as const;
},
});
const loopPromise = autoLoop(ctx, pi, s, deps);
// First iteration: runUnit → verification returns "retry" → loop continues
await new Promise((r) => setTimeout(r, 50));
resolveAgentEnd(makeEvent()); // resolve first unit
// Second iteration: runUnit → verification returns "continue"
await new Promise((r) => setTimeout(r, 50));
resolveAgentEnd(makeEvent()); // resolve retry unit
await loopPromise;
// Verify deriveState was called twice (two iterations)
const deriveCount = deps.callLog.filter((c) => c === "deriveState").length;
assert.ok(
deriveCount >= 2,
`deriveState should be called at least 2 times (got ${deriveCount})`,
);
// Verify verification was called twice
assert.equal(
verifyCallCount,
2,
"verification should have been called twice (once retry, once pass)",
);
});
test("autoLoop handles dispatch stop action", async (t) => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
const pi = makeMockPi();
const s = makeLoopSession();
const deps = makeMockDeps({
resolveDispatch: async () => {
deps.callLog.push("resolveDispatch");
return {
action: "stop" as const,
reason: "test-stop-reason",
level: "info" as const,
};
},
});
await autoLoop(ctx, pi, s, deps);
assert.ok(
deps.callLog.includes("resolveDispatch"),
"should have called resolveDispatch",
);
assert.ok(
deps.callLog.includes("stopAuto"),
"should have stopped on dispatch stop action",
);
});
test("autoLoop handles dispatch skip action by continuing", async (t) => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
const pi = makeMockPi();
const s = makeLoopSession();
let dispatchCallCount = 0;
// Pre-queued dispatch responses: first call returns "skip", second returns "stop"
const dispatchResponses = [
{ action: "skip" as const },
{ action: "stop" as const, reason: "done", level: "info" as const },
];
const deps = makeMockDeps({
resolveDispatch: async () => {
const response = dispatchResponses[dispatchCallCount] ?? dispatchResponses[dispatchResponses.length - 1];
dispatchCallCount++;
deps.callLog.push("resolveDispatch");
return response;
},
});
await autoLoop(ctx, pi, s, deps);
// Should have called resolveDispatch twice (skip → re-derive → stop)
const dispatchCalls = deps.callLog.filter((c) => c === "resolveDispatch");
assert.equal(
dispatchCalls.length,
2,
"resolveDispatch should be called twice (skip then stop)",
);
const deriveCalls = deps.callLog.filter((c) => c === "deriveState");
assert.ok(
deriveCalls.length >= 2,
"deriveState should be called at least twice (one per iteration)",
);
});
test("autoLoop drains sidecar queue after postUnitPostVerification enqueues items", async (t) => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
ctx.sessionManager = { getSessionFile: () => "/tmp/session.json" };
const pi = makeMockPi();
const s = makeLoopSession();
let postVerCallCount = 0;
const postVerActions: Array<() => void> = [
() => {
// First call (main unit): enqueue a sidecar item
s.sidecarQueue.push({
kind: "hook" as const,
unitType: "hook/review",
unitId: "M001/S01/T01/review",
prompt: "review the code",
});
},
() => {
// Second call (sidecar unit completed): deactivate
s.active = false;
},
];
const deps = makeMockDeps({
postUnitPostVerification: async () => {
postVerActions[postVerCallCount]?.();
postVerCallCount++;
deps.callLog.push("postUnitPostVerification");
return "continue" as const;
},
});
const loopPromise = autoLoop(ctx, pi, s, deps);
// Wait for main unit's runUnit to be awaiting
await new Promise((r) => setTimeout(r, 50));
resolveAgentEnd(makeEvent()); // resolve main unit
// Wait for the sidecar unit's runUnit to be awaiting
await new Promise((r) => setTimeout(r, 50));
resolveAgentEnd(makeEvent()); // resolve sidecar unit
await loopPromise;
// postUnitPostVerification should have been called twice (main + sidecar)
assert.equal(
postVerCallCount,
2,
"postUnitPostVerification should be called twice (main + sidecar)",
);
});
test("autoLoop exits when no active milestone found", async (t) => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
const pi = makeMockPi();
const s = makeLoopSession({ currentMilestoneId: null });
const deps = makeMockDeps({
deriveState: async () => {
deps.callLog.push("deriveState");
return {
phase: "executing",
activeMilestone: null,
activeSlice: null,
activeTask: null,
registry: [],
blockers: [],
} as any;
},
});
await autoLoop(ctx, pi, s, deps);
assert.ok(
deps.callLog.includes("stopAuto"),
"should stop when no milestone and all complete",
);
});
test("autoLoop exports LoopDeps type", async () => {
const src = readFileSync(
resolve(import.meta.dirname, "..", "auto", "loop-deps.ts"),
"utf-8",
);
assert.ok(
src.includes("export interface LoopDeps"),
"auto/loop-deps.ts should export LoopDeps interface",
);
});
test("autoLoop signature accepts deps parameter", async () => {
const src = readFileSync(
resolve(import.meta.dirname, "..", "auto", "loop.ts"),
"utf-8",
);
assert.ok(
src.includes("deps: LoopDeps"),
"autoLoop should accept a deps: LoopDeps parameter",
);
});
test("autoLoop contains while (s.active) loop", () => {
const src = readFileSync(
resolve(import.meta.dirname, "..", "auto", "loop.ts"),
"utf-8",
);
assert.ok(
src.includes("while (s.active)"),
"autoLoop should contain a while (s.active) loop",
);
});
// ── T03: End-to-end wiring structural assertions ─────────────────────────────
test("auto-loop.ts barrel re-exports autoLoop, runUnit, and resolveAgentEnd", () => {
const barrel = readFileSync(
resolve(import.meta.dirname, "..", "auto-loop.ts"),
"utf-8",
);
assert.ok(
barrel.includes("autoLoop"),
"barrel must re-export autoLoop",
);
assert.ok(
barrel.includes("runUnit"),
"barrel must re-export runUnit",
);
assert.ok(
barrel.includes("resolveAgentEnd"),
"barrel must re-export resolveAgentEnd",
);
// Verify the actual function declarations exist in the submodules
const loopSrc = readFileSync(
resolve(import.meta.dirname, "..", "auto", "loop.ts"),
"utf-8",
);
assert.ok(
loopSrc.includes("export async function autoLoop"),
"auto/loop.ts must define autoLoop",
);
const runUnitSrc = readFileSync(
resolve(import.meta.dirname, "..", "auto", "run-unit.ts"),
"utf-8",
);
assert.ok(
runUnitSrc.includes("export async function runUnit"),
"auto/run-unit.ts must define runUnit",
);
const resolveSrc = readFileSync(
resolve(import.meta.dirname, "..", "auto", "resolve.ts"),
"utf-8",
);
assert.ok(
resolveSrc.includes("export function resolveAgentEnd"),
"auto/resolve.ts must define resolveAgentEnd",
);
});
test("auto.ts startAuto calls autoLoop (not dispatchNextUnit as first dispatch)", () => {
const src = readFileSync(
resolve(import.meta.dirname, "..", "auto.ts"),
"utf-8",
);
// Find the startAuto function body
const fnIdx = src.indexOf("export async function startAuto");
assert.ok(fnIdx > -1, "startAuto must exist in auto.ts");
const fnEnd = src.indexOf("\n// ─── ", fnIdx + 100);
const fnBlock =
fnEnd > -1 ? src.slice(fnIdx, fnEnd) : src.slice(fnIdx, fnIdx + 5000);
assert.ok(
fnBlock.includes("autoLoop("),
"startAuto must call autoLoop() instead of dispatchNextUnit()",
);
});
test("startAuto calls selfHealRuntimeRecords before autoLoop (#1727)", () => {
const src = readFileSync(
resolve(import.meta.dirname, "..", "auto.ts"),
"utf-8",
);
const fnIdx = src.indexOf("export async function startAuto");
assert.ok(fnIdx > -1, "startAuto must exist in auto.ts");
const fnEnd = src.indexOf("\n// ─── ", fnIdx + 100);
const fnBlock =
fnEnd > -1 ? src.slice(fnIdx, fnEnd) : src.slice(fnIdx, fnIdx + 5000);
// Both autoLoop call sites must be preceded by selfHealRuntimeRecords
const healIdx = fnBlock.indexOf("selfHealRuntimeRecords");
const loopIdx = fnBlock.indexOf("autoLoop(");
assert.ok(healIdx > -1, "startAuto must call selfHealRuntimeRecords");
assert.ok(healIdx < loopIdx, "selfHealRuntimeRecords must be called before autoLoop");
// Verify the second autoLoop call site also has selfHeal before it (if present)
const secondLoopIdx = fnBlock.indexOf("autoLoop(", loopIdx + 1);
const secondHealIdx = fnBlock.indexOf("selfHealRuntimeRecords", healIdx + 1);
assert.ok(
secondLoopIdx === -1 || (secondHealIdx > -1 && secondHealIdx < secondLoopIdx),
"if a second autoLoop call exists, it must also be preceded by selfHealRuntimeRecords",
);
});
test("agent_end handler calls resolveAgentEnd (not handleAgentEnd)", () => {
const hooksSrc = readFileSync(
resolve(import.meta.dirname, "..", "bootstrap", "register-hooks.ts"),
"utf-8",
);
// Verify the agent_end hook is registered
const handlerIdx = hooksSrc.indexOf('pi.on("agent_end"');
assert.ok(handlerIdx > -1, "register-hooks.ts must have an agent_end handler");
const recoverySrc = readFileSync(
resolve(import.meta.dirname, "..", "bootstrap", "agent-end-recovery.ts"),
"utf-8",
);
assert.ok(
recoverySrc.includes("resolveAgentEnd(event)"),
"agent_end success path must call resolveAgentEnd(event) instead of handleAgentEnd(ctx, pi)",
);
assert.ok(
recoverySrc.includes("isSessionSwitchInFlight()"),
"agent_end handler must ignore session-switch agent_end events from cmdCtx.newSession()",
);
});
test("auto-verification.ts runPostUnitVerification does not take dispatchNextUnit callback", () => {
const src = readFileSync(
resolve(import.meta.dirname, "..", "auto-verification.ts"),
"utf-8",
);
const fnIdx = src.indexOf("export async function runPostUnitVerification");
assert.ok(fnIdx > -1, "runPostUnitVerification must exist");
const sigEnd = src.indexOf("): Promise<VerificationResult>", fnIdx);
const signature = src.slice(fnIdx, sigEnd);
assert.ok(
!signature.includes("dispatchNextUnit"),
"runPostUnitVerification must not take a dispatchNextUnit callback parameter",
);
assert.ok(
!signature.includes("startDispatchGapWatchdog"),
"runPostUnitVerification must not take a startDispatchGapWatchdog callback parameter",
);
});
test("auto-timeout-recovery.ts calls resolveAgentEnd instead of dispatchNextUnit", () => {
const src = readFileSync(
resolve(import.meta.dirname, "..", "auto-timeout-recovery.ts"),
"utf-8",
);
assert.ok(
!src.includes("await dispatchNextUnit"),
"auto-timeout-recovery.ts must not call dispatchNextUnit",
);
assert.ok(
src.includes("resolveAgentEnd("),
"auto-timeout-recovery.ts must call resolveAgentEnd to re-iterate the loop on timeout recovery",
);
});
test("handleAgentEnd in auto.ts is a thin wrapper calling resolveAgentEnd", () => {
const src = readFileSync(
resolve(import.meta.dirname, "..", "auto.ts"),
"utf-8",
);
const fnIdx = src.indexOf("export async function handleAgentEnd");
assert.ok(fnIdx > -1, "handleAgentEnd must exist");
const fnEnd = src.indexOf("\n// ─── ", fnIdx + 100);
const fnBlock =
fnEnd > -1 ? src.slice(fnIdx, fnEnd) : src.slice(fnIdx, fnIdx + 1000);
assert.ok(
fnBlock.includes("resolveAgentEnd("),
"handleAgentEnd must call resolveAgentEnd",
);
// The function should be short — no reentrancy guard, no verification, no dispatch
assert.ok(
!fnBlock.includes("dispatchNextUnit"),
"handleAgentEnd must not call dispatchNextUnit (it's now a thin wrapper)",
);
assert.ok(
!fnBlock.includes("postUnitPreVerification") &&
!fnBlock.includes("postUnitPostVerification"),
"handleAgentEnd must not contain verification logic (moved to autoLoop)",
);
});
// ── Stuck counter tests ──────────────────────────────────────────────────────
test("stuck detection: stops when sliding window detects same unit 3 consecutive times", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
ctx.ui.notify = () => {};
const pi = makeMockPi();
const s = makeLoopSession();
let stopReason = "";
const deps = makeMockDeps({
deriveState: async () =>
({
phase: "executing",
activeMilestone: { id: "M001", title: "Test", status: "active" },
activeSlice: { id: "S01", title: "Slice 1" },
activeTask: { id: "T01" },
registry: [{ id: "M001", status: "active" }],
blockers: [],
}) as any,
resolveDispatch: async () => ({
action: "dispatch" as const,
unitType: "execute-task",
unitId: "M001/S01/T01",
prompt: "do the thing",
}),
stopAuto: async (_ctx?: any, _pi?: any, reason?: string) => {
deps.callLog.push("stopAuto");
stopReason = reason ?? "";
s.active = false;
},
});
const loopPromise = autoLoop(ctx, pi, s, deps);
// Sliding window: iteration 1 pushes [A], iteration 2 pushes [A,A],
// iteration 3 pushes [A,A,A] → Rule 2 fires (3 consecutive) → Level 1 recovery.
// Level 1 invalidates caches and continues. Iteration 4 pushes [A,A,A,A] →
// Rule 2 fires again → Level 2 hard stop.
// Iterations 1-3 each run a unit (3 resolves needed). Iteration 3 triggers
// Level 1 (cache invalidation + continue). Iteration 4 triggers Level 2 (stop
// before runUnit), so no 4th resolve needed.
for (let i = 0; i < 3; i++) {
await new Promise((r) => setTimeout(r, 30));
resolveAgentEnd(makeEvent());
}
await loopPromise;
assert.ok(
deps.callLog.includes("stopAuto"),
"stopAuto should have been called",
);
assert.ok(
stopReason.includes("Stuck"),
`stop reason should mention 'Stuck', got: ${stopReason}`,
);
assert.ok(
stopReason.includes("M001/S01/T01"),
"stop reason should include unitId",
);
});
test("stuck detection: window resets recovery when deriveState returns a different unit", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
ctx.ui.notify = () => {};
const pi = makeMockPi();
const s = makeLoopSession();
let deriveCallCount = 0;
let postVerCallCount = 0;
let stopCalled = false;
// First 3 derives return T01, 4th returns T02; dispatch follows the derived task
const derivedTaskIds = ["T01", "T01", "T01", "T02"];
const deps = makeMockDeps({
deriveState: async () => {
const taskId = derivedTaskIds[Math.min(deriveCallCount, derivedTaskIds.length - 1)];
deriveCallCount++;
deps.callLog.push("deriveState");
return {
phase: "executing",
activeMilestone: { id: "M001", title: "Test", status: "active" },
activeSlice: { id: "S01", title: "Slice 1" },
activeTask: { id: taskId },
registry: [{ id: "M001", status: "active" }],
blockers: [],
} as any;
},
resolveDispatch: async () => {
const taskId = derivedTaskIds[Math.min(deriveCallCount - 1, derivedTaskIds.length - 1)];
deps.callLog.push("resolveDispatch");
return {
action: "dispatch" as const,
unitType: "execute-task",
unitId: `M001/S01/${taskId}`,
prompt: "do the thing",
};
},
stopAuto: async (_ctx?: any, _pi?: any, reason?: string) => {
deps.callLog.push("stopAuto");
stopCalled = true;
s.active = false;
},
postUnitPostVerification: async () => {
postVerCallCount++;
deps.callLog.push("postUnitPostVerification");
// Exit on the 4th call (after T02 unit completes)
const shouldExit = postVerCallCount >= 4;
s.active = !shouldExit;
return "continue" as const;
},
});
const loopPromise = autoLoop(ctx, pi, s, deps);
// Resolve agent_end for iterations 1-4
for (let i = 0; i < 4; i++) {
await new Promise((r) => setTimeout(r, 30));
resolveAgentEnd(makeEvent());
}
await loopPromise;
// Level 1 recovery fires on iteration 3 (cache invalidation + continue),
// then iteration 4 derives T02 — no Level 2 hard stop.
assert.ok(
!stopCalled,
"stopAuto should NOT have been called — different unit broke stuck pattern",
);
assert.ok(
deriveCallCount >= 4,
`deriveState should have been called at least 4 times (got ${deriveCallCount})`,
);
});
test("stuck detection: does not push to window during verification retry", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
ctx.ui.notify = () => {};
const pi = makeMockPi();
const s = makeLoopSession();
let verifyCallCount = 0;
let stopReason = "";
// Pre-queued responses: 3 retries then a continue (exit)
const verifyActions: Array<() => "retry" | "continue"> = [
() => { s.pendingVerificationRetry = { unitId: "M001/S01/T01", failureContext: "test failed", attempt: 1 }; return "retry"; },
() => { s.pendingVerificationRetry = { unitId: "M001/S01/T01", failureContext: "test failed", attempt: 2 }; return "retry"; },
() => { s.pendingVerificationRetry = { unitId: "M001/S01/T01", failureContext: "test failed", attempt: 3 }; return "retry"; },
() => { s.active = false; return "continue"; },
];
const deps = makeMockDeps({
deriveState: async () =>
({
phase: "executing",
activeMilestone: { id: "M001", title: "Test", status: "active" },
activeSlice: { id: "S01", title: "Slice 1" },
activeTask: { id: "T01" },
registry: [{ id: "M001", status: "active" }],
blockers: [],
}) as any,
resolveDispatch: async () => ({
action: "dispatch" as const,
unitType: "execute-task",
unitId: "M001/S01/T01",
prompt: "do the thing",
}),
runPostUnitVerification: async () => {
const action = verifyActions[verifyCallCount] ?? (() => { s.active = false; return "continue" as const; });
verifyCallCount++;
deps.callLog.push("runPostUnitVerification");
return action();
},
stopAuto: async (_ctx?: any, _pi?: any, reason?: string) => {
deps.callLog.push("stopAuto");
stopReason = reason ?? "";
s.active = false;
},
});
const loopPromise = autoLoop(ctx, pi, s, deps);
// Resolve agent_end for 4 iterations (1 initial + 3 retries)
for (let i = 0; i < 4; i++) {
await new Promise((r) => setTimeout(r, 30));
resolveAgentEnd(makeEvent());
}
await loopPromise;
// Even though same unit was derived 4 times, verification retries should
// not push to the sliding window, so stuck detection should not have fired
assert.ok(
!stopReason.includes("Stuck"),
`stuck detection should not fire during verification retries, got: ${stopReason}`,
);
assert.equal(
verifyCallCount,
4,
"verification should have been called 4 times (1 initial + 3 retries)",
);
});
// ── detectStuck unit tests ────────────────────────────────────────────────────
test("detectStuck: returns null for fewer than 2 entries", () => {
assert.equal(detectStuck([]), null);
assert.equal(detectStuck([{ key: "A" }]), null);
});
test("detectStuck: Rule 1 — same error twice in a row", () => {
const result = detectStuck([
{ key: "A", error: "ENOENT: file not found" },
{ key: "A", error: "ENOENT: file not found" },
]);
assert.ok(result?.stuck, "should detect same error repeated");
assert.ok(result?.reason.includes("Same error repeated"));
});
test("detectStuck: Rule 1 — different errors do not trigger", () => {
const result = detectStuck([
{ key: "A", error: "ENOENT: file not found" },
{ key: "A", error: "EACCES: permission denied" },
]);
assert.equal(result, null);
});
test("detectStuck: Rule 2 — same unit 3 consecutive times", () => {
const result = detectStuck([
{ key: "execute-task/M001/S01/T01" },
{ key: "execute-task/M001/S01/T01" },
{ key: "execute-task/M001/S01/T01" },
]);
assert.ok(result?.stuck);
assert.ok(result?.reason.includes("3 consecutive times"));
});
test("detectStuck: Rule 2 — 2 consecutive does not trigger", () => {
assert.equal(detectStuck([
{ key: "A" },
{ key: "A" },
]), null);
});
test("detectStuck: Rule 3 — oscillation A→B→A→B", () => {
const result = detectStuck([
{ key: "A" },
{ key: "B" },
{ key: "A" },
{ key: "B" },
]);
assert.ok(result?.stuck);
assert.ok(result?.reason.includes("Oscillation"));
});
test("detectStuck: Rule 3 — non-oscillation pattern A→B→C→B", () => {
assert.equal(detectStuck([
{ key: "A" },
{ key: "B" },
{ key: "C" },
{ key: "B" },
]), null);
});
test("detectStuck: Rule 1 takes priority over Rule 2 when both match", () => {
const result = detectStuck([
{ key: "A", error: "test error" },
{ key: "A", error: "test error" },
{ key: "A", error: "test error" },
]);
assert.ok(result?.stuck);
// Rule 1 fires first
assert.ok(result?.reason.includes("Same error repeated"));
});
test("detectStuck: truncates long error strings", () => {
const longError = "x".repeat(500);
const result = detectStuck([
{ key: "A", error: longError },
{ key: "A", error: longError },
]);
assert.ok(result?.stuck);
assert.ok(result!.reason.length < 300, "reason should be truncated");
});
test("stuck detection: logs debug output with stuck-detected phase", () => {
// Structural test: verify auto/phases.ts contains
// stuck-detected and stuck-counter-reset debug log phases, plus detectStuck
const src = readFileSync(
resolve(import.meta.dirname, "..", "auto", "phases.ts"),
"utf-8",
);
assert.ok(
src.includes('"stuck-detected"'),
"auto/phases.ts must log phase: 'stuck-detected' when stuck detection fires",
);
assert.ok(
src.includes('"stuck-counter-reset"'),
"auto/phases.ts must log phase: 'stuck-counter-reset' when recovery resets on new unit",
);
assert.ok(
src.includes("detectStuck"),
"auto/phases.ts must use detectStuck for sliding window analysis",
);
});
// ── Lifecycle test (S05/T02) ─────────────────────────────────────────────────
test("autoLoop lifecycle: advances through research → plan → execute → verify → complete across iterations", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
ctx.ui.notify = () => {};
ctx.sessionManager = { getSessionFile: () => "/tmp/session.json" };
const pi = makeMockPi();
const s = makeLoopSession();
let deriveCallCount = 0;
let dispatchCallCount = 0;
const dispatchedUnitTypes: string[] = [];
// Phase sequence: each deriveState call returns a different phase.
// The 6th entry (index 5) is the terminal "complete" phase that stops the loop.
const phases = [
// Call 1: researching → dispatches research-slice
{
phase: "researching",
activeSlice: { id: "S01", title: "Research Slice" },
activeTask: null,
},
// Call 2: planning → dispatches plan-slice
{
phase: "planning",
activeSlice: { id: "S01", title: "Plan Slice" },
activeTask: null,
},
// Call 3: executing → dispatches execute-task
{
phase: "executing",
activeSlice: { id: "S01", title: "Execute Slice" },
activeTask: { id: "T01" },
},
// Call 4: verifying → dispatches verify-slice
{
phase: "verifying",
activeSlice: { id: "S01", title: "Verify Slice" },
activeTask: null,
},
// Call 5: completing → dispatches complete-slice
{
phase: "completing",
activeSlice: { id: "S01", title: "Complete Slice" },
activeTask: null,
},
// Call 6: terminal — deactivate to exit the loop
{
phase: "complete",
activeSlice: null,
activeTask: null,
},
];
const dispatches = [
{ unitType: "research-slice", unitId: "M001/S01", prompt: "research" },
{ unitType: "plan-slice", unitId: "M001/S01", prompt: "plan" },
{ unitType: "execute-task", unitId: "M001/S01/T01", prompt: "execute" },
{ unitType: "verify-slice", unitId: "M001/S01", prompt: "verify" },
{ unitType: "complete-slice", unitId: "M001/S01", prompt: "complete" },
];
const deps = makeMockDeps({
deriveState: async () => {
const p = phases[Math.min(deriveCallCount, phases.length - 1)];
deriveCallCount++;
deps.callLog.push("deriveState");
const terminalPhases: Record<string, string> = { complete: "complete" };
s.active = p.phase !== "complete";
const milestoneStatus = terminalPhases[p.phase] ?? "active";
return {
phase: p.phase,
activeMilestone: { id: "M001", title: "Test", status: milestoneStatus },
activeSlice: p.activeSlice ?? null,
activeTask: p.activeTask ?? null,
registry: [{ id: "M001", status: milestoneStatus }],
blockers: [],
} as any;
},
resolveDispatch: async () => {
const d = dispatches[Math.min(dispatchCallCount, dispatches.length - 1)];
dispatchCallCount++;
deps.callLog.push("resolveDispatch");
dispatchedUnitTypes.push(d.unitType);
return {
action: "dispatch" as const,
unitType: d.unitType,
unitId: d.unitId,
prompt: d.prompt,
};
},
postUnitPostVerification: async () => {
deps.callLog.push("postUnitPostVerification");
return "continue" as const;
},
});
const loopPromise = autoLoop(ctx, pi, s, deps);
// Resolve each iteration's agent_end — 5 iterations, each dispatches a unit
for (let i = 0; i < 5; i++) {
await new Promise((r) => setTimeout(r, 30));
resolveAgentEnd(makeEvent());
}
await loopPromise;
// Assert deriveState was called at least 5 times (once per iteration)
assert.ok(
deriveCallCount >= 5,
`deriveState should be called at least 5 times (got ${deriveCallCount})`,
);
// Assert the dispatched unit types cover the full lifecycle sequence
assert.ok(
dispatchedUnitTypes.includes("research-slice"),
`should have dispatched research-slice, got: ${dispatchedUnitTypes.join(", ")}`,
);
assert.ok(
dispatchedUnitTypes.includes("plan-slice"),
`should have dispatched plan-slice, got: ${dispatchedUnitTypes.join(", ")}`,
);
assert.ok(
dispatchedUnitTypes.includes("execute-task"),
`should have dispatched execute-task, got: ${dispatchedUnitTypes.join(", ")}`,
);
assert.ok(
dispatchedUnitTypes.includes("verify-slice"),
`should have dispatched verify-slice, got: ${dispatchedUnitTypes.join(", ")}`,
);
assert.ok(
dispatchedUnitTypes.includes("complete-slice"),
`should have dispatched complete-slice, got: ${dispatchedUnitTypes.join(", ")}`,
);
// Assert call sequence: deriveState and resolveDispatch entries are interleaved
const deriveEntries = deps.callLog.filter((c) => c === "deriveState");
const dispatchEntries = deps.callLog.filter((c) => c === "resolveDispatch");
assert.ok(
deriveEntries.length >= 5,
`callLog should have at least 5 deriveState entries (got ${deriveEntries.length})`,
);
assert.ok(
dispatchEntries.length >= 5,
`callLog should have at least 5 resolveDispatch entries (got ${dispatchEntries.length})`,
);
// Verify interleaving: a deriveState must follow a resolveDispatch (confirms loop advanced)
const firstDispatchIdx = deps.callLog.indexOf("resolveDispatch");
const firstDeriveAfterDispatch = deps.callLog.indexOf("deriveState", firstDispatchIdx + 1);
assert.ok(firstDispatchIdx >= 0, "resolveDispatch should appear in callLog");
assert.ok(firstDeriveAfterDispatch > firstDispatchIdx, "deriveState should follow resolveDispatch to confirm loop advanced");
// Assert the exact sequence of dispatched unit types
assert.deepEqual(
dispatchedUnitTypes,
[
"research-slice",
"plan-slice",
"execute-task",
"verify-slice",
"complete-slice",
],
"dispatched unit types should follow the full lifecycle sequence",
);
});
// ─── resolveAgentEndCancelled tests ──────────────────────────────────────────
test("resolveAgentEndCancelled resolves a pending promise with cancelled status", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
const pi = makeMockPi();
const s = makeMockSession();
const resultPromise = runUnit(ctx, pi, s, "task", "T01", "prompt");
await new Promise((r) => setTimeout(r, 10));
resolveAgentEndCancelled();
const result = await resultPromise;
assert.equal(result.status, "cancelled");
assert.equal(result.event, undefined);
});
test("resolveAgentEndCancelled is a no-op when no promise is pending", () => {
_resetPendingResolve();
assert.doesNotThrow(() => {
resolveAgentEndCancelled();
});
});
test("resolveAgentEndCancelled prevents orphaned promise after abort path", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
const pi = makeMockPi();
const s = makeMockSession();
const resultPromise = runUnit(ctx, pi, s, "task", "T01", "prompt");
await new Promise((r) => setTimeout(r, 10));
s.active = false;
resolveAgentEndCancelled();
const result = await resultPromise;
assert.equal(result.status, "cancelled");
});
// ─── #1571: artifact verification retry ──────────────────────────────────────
test("autoLoop re-iterates when postUnitPreVerification returns retry (#1571)", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
const pi = makeMockPi();
const s = makeLoopSession();
let preVerifyCallCount = 0;
// Pre-queued responses: first call returns "retry", second returns "continue"
const preVerifyResponses = ["retry", "continue"] as const;
const deps = makeMockDeps({
deriveState: async () => {
deps.callLog.push("deriveState");
return {
phase: "executing",
activeMilestone: { id: "M001", title: "Test", status: "active" },
activeSlice: { id: "S01", title: "Slice 1" },
activeTask: { id: "T01" },
registry: [{ id: "M001", status: "active" }],
blockers: [],
} as any;
},
postUnitPreVerification: async () => {
deps.callLog.push("postUnitPreVerification");
return preVerifyResponses[preVerifyCallCount++] ?? "continue";
},
postUnitPostVerification: async () => {
deps.callLog.push("postUnitPostVerification");
s.active = false;
return "continue" as const;
},
});
const loopPromise = autoLoop(ctx, pi, s, deps);
await new Promise((r) => setTimeout(r, 50));
resolveAgentEnd(makeEvent());
await new Promise((r) => setTimeout(r, 50));
resolveAgentEnd(makeEvent());
await loopPromise;
assert.equal(preVerifyCallCount, 2, "preVerification should be called twice");
const postVerifyCalls = deps.callLog.filter(
(c: string) => c === "runPostUnitVerification",
);
const postPostVerifyCalls = deps.callLog.filter(
(c: string) => c === "postUnitPostVerification",
);
assert.equal(postVerifyCalls.length, 1, "runPostUnitVerification should only be called once");
assert.equal(postPostVerifyCalls.length, 1, "postUnitPostVerification should only be called once");
});
// ─── stopAuto unitPromise leak regression (#1799) ────────────────────────────
test("resolveAgentEnd unblocks pending runUnit when called before session reset (#1799)", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
const pi = makeMockPi();
const s = makeMockSession();
const resultPromise = runUnit(ctx, pi, s, "task", "T01", "do work");
await new Promise((r) => setTimeout(r, 10));
resolveAgentEnd({ messages: [] });
_resetPendingResolve();
s.active = false;
const result = await resultPromise;
assert.equal(result.status, "completed", "runUnit should resolve, not hang");
});
// ─── Zero tool-call hallucination guard (#1833) ───────────────────────────
test("autoLoop rejects execute-task with 0 tool calls as hallucinated (#1833)", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
ctx.sessionManager = { getSessionFile: () => "/tmp/session.json" };
const pi = makeMockPi();
let iterationCount = 0;
const notifications: string[] = [];
ctx.ui.notify = (msg: string) => { notifications.push(msg); };
const s = makeLoopSession();
// Mock ledger: execute-task completed with 0 tool calls
const mockLedger = {
version: 1,
projectStartedAt: Date.now(),
units: [] as any[],
};
const deps = makeMockDeps({
deriveState: async () => {
deps.callLog.push("deriveState");
return {
phase: "executing",
activeMilestone: { id: "M001", title: "Test", status: "active" },
activeSlice: { id: "S01", title: "Slice 1" },
activeTask: { id: "T01" },
registry: [{ id: "M001", status: "active" }],
blockers: [],
} as any;
},
resolveDispatch: async () => {
deps.callLog.push("resolveDispatch");
return {
action: "dispatch" as const,
unitType: "execute-task",
unitId: "M001/S01/T01",
prompt: "implement the feature",
};
},
closeoutUnit: async () => {
// Simulate snapshotUnitMetrics adding a 0-toolCalls entry to ledger
mockLedger.units.push({
type: "execute-task",
id: "M001/S01/T01",
startedAt: s.currentUnit?.startedAt ?? Date.now(),
toolCalls: 0,
assistantMessages: 1,
tokens: { input: 100, output: 200, total: 300, cacheRead: 0, cacheWrite: 0 },
cost: 0.50,
});
},
getLedger: () => mockLedger,
postUnitPostVerification: async () => {
deps.callLog.push("postUnitPostVerification");
iterationCount++;
// Deactivate after 2nd iteration
s.active = iterationCount < 2;
return "continue" as const;
},
});
const loopPromise = autoLoop(ctx, pi, s, deps);
// First iteration: execute-task with 0 tool calls → rejected
await new Promise((r) => setTimeout(r, 50));
resolveAgentEnd(makeEvent());
// Second iteration: same task re-dispatched, this time with tool calls
await new Promise((r) => setTimeout(r, 50));
mockLedger.units.length = 0; // clear previous entry
(deps as any).closeoutUnit = async () => {
mockLedger.units.push({
type: "execute-task",
id: "M001/S01/T01",
startedAt: s.currentUnit?.startedAt ?? Date.now(),
toolCalls: 5,
assistantMessages: 3,
tokens: { input: 500, output: 800, total: 1300, cacheRead: 0, cacheWrite: 0 },
cost: 1.00,
});
};
resolveAgentEnd(makeEvent());
await loopPromise;
// The task should NOT have been added to completedUnits on the first iteration
// (0 tool calls), but SHOULD be added on the second iteration (5 tool calls)
const warningNotification = notifications.find(
(n) => n.includes("0 tool calls") && n.includes("hallucinated"),
);
assert.ok(
warningNotification,
"should notify about 0 tool calls hallucination",
);
// Verify deriveState was called at least twice (two iterations)
const deriveCount = deps.callLog.filter((c) => c === "deriveState").length;
assert.ok(
deriveCount >= 2,
`deriveState should be called at least 2 times for retry (got ${deriveCount})`,
);
});
test("autoLoop does NOT reject non-execute-task units with 0 tool calls (#1833)", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
ctx.sessionManager = { getSessionFile: () => "/tmp/session.json" };
const pi = makeMockPi();
const notifications: string[] = [];
ctx.ui.notify = (msg: string) => { notifications.push(msg); };
const s = makeLoopSession();
const mockLedger = {
version: 1,
projectStartedAt: Date.now(),
units: [] as any[],
};
const deps = makeMockDeps({
deriveState: async () => {
deps.callLog.push("deriveState");
return {
phase: "executing",
activeMilestone: { id: "M001", title: "Test", status: "active" },
activeSlice: { id: "S01", title: "Slice 1" },
activeTask: { id: "T01" },
registry: [{ id: "M001", status: "active" }],
blockers: [],
} as any;
},
resolveDispatch: async () => {
deps.callLog.push("resolveDispatch");
return {
action: "dispatch" as const,
unitType: "complete-slice",
unitId: "M001/S01",
prompt: "complete the slice",
};
},
closeoutUnit: async () => {
// complete-slice with 0 tool calls is fine (e.g. it may just update status)
mockLedger.units.push({
type: "complete-slice",
id: "M001/S01",
startedAt: s.currentUnit?.startedAt ?? Date.now(),
toolCalls: 0,
assistantMessages: 1,
tokens: { input: 50, output: 100, total: 150, cacheRead: 0, cacheWrite: 0 },
cost: 0.10,
});
},
getLedger: () => mockLedger,
verifyExpectedArtifact: () => true,
postUnitPostVerification: async () => {
deps.callLog.push("postUnitPostVerification");
s.active = false;
return "continue" as const;
},
});
const loopPromise = autoLoop(ctx, pi, s, deps);
await new Promise((r) => setTimeout(r, 50));
resolveAgentEnd(makeEvent());
await loopPromise;
// Should NOT have a hallucination warning for non-execute-task units
const warningNotification = notifications.find(
(n) => n.includes("0 tool calls") && n.includes("hallucinated"),
);
assert.ok(
!warningNotification,
"should NOT flag non-execute-task units with 0 tool calls",
);
// The unit should have been added to completedUnits normally
assert.ok(
s.completedUnits.length >= 1,
"complete-slice with 0 tool calls should still be marked as completed",
);
});
// ─── Worktree health check (#1833) ────────────────────────────────────────
test("autoLoop stops when worktree has no .git for execute-task (#1833)", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
ctx.sessionManager = { getSessionFile: () => "/tmp/session.json" };
const pi = makeMockPi();
const notifications: string[] = [];
ctx.ui.notify = (msg: string) => { notifications.push(msg); };
const s = makeLoopSession({ basePath: "/tmp/broken-worktree" });
const deps = makeMockDeps({
deriveState: async () => {
deps.callLog.push("deriveState");
return {
phase: "executing",
activeMilestone: { id: "M001", title: "Test", status: "active" },
activeSlice: { id: "S01", title: "Slice 1" },
activeTask: { id: "T01" },
registry: [{ id: "M001", status: "active" }],
blockers: [],
} as any;
},
// .git does not exist in the broken worktree
existsSync: (p: string) => !p.endsWith(".git"),
});
await autoLoop(ctx, pi, s, deps);
assert.ok(
deps.callLog.includes("stopAuto"),
"should stop auto-mode when worktree is invalid",
);
const healthNotification = notifications.find(
(n) => n.includes("Worktree health check failed") && n.includes("no .git"),
);
assert.ok(
healthNotification,
"should notify about missing .git in worktree",
);
});
test("autoLoop warns but proceeds for greenfield project (no project files) (#1833)", async () => {
_resetPendingResolve();
const ctx = makeMockCtx();
ctx.ui.setStatus = () => {};
ctx.sessionManager = { getSessionFile: () => "/tmp/session.json" };
const pi = makeMockPi();
const notifications: string[] = [];
const s = makeLoopSession({ basePath: "/tmp/empty-worktree" });
ctx.ui.notify = (msg: string) => {
notifications.push(msg);
// Terminate the loop after the greenfield warning fires,
// so we don't hang waiting for dispatch resolution.
if (msg.includes("greenfield")) {
s.active = false;
}
};
const deps = makeMockDeps({
deriveState: async () => {
deps.callLog.push("deriveState");
return {
phase: "executing",
activeMilestone: { id: "M001", title: "Test", status: "active" },
activeSlice: { id: "S01", title: "Slice 1" },
activeTask: { id: "T01" },
registry: [{ id: "M001", status: "active" }],
blockers: [],
} as any;
},
// Has .git but no package.json or src/
existsSync: (p: string) => p.endsWith(".git"),
});
await autoLoop(ctx, pi, s, deps);
// Should NOT have stopped auto-mode due to health check — greenfield is allowed
const stoppedForHealth = notifications.find(
(n) => n.includes("Worktree health check failed"),
);
assert.ok(
!stoppedForHealth,
"should not stop with health check failure for greenfield project",
);
const greenfieldWarning = notifications.find(
(n) => n.includes("no recognized project files") && n.includes("greenfield"),
);
assert.ok(
greenfieldWarning,
"should warn about greenfield project (no project files)",
);
});