From 26be0b4153e709ebcbbcb221b73aa750dd7ecb8b Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sat, 2 May 2026 11:34:41 +0200 Subject: [PATCH] fix(sf): stabilize headless auto flow --- src/cli.ts | 37 ++-- src/headless.ts | 5 +- src/loader.ts | 25 +++ .../extensions/sf/tests/auto-loop.test.ts | 179 +++++++++++------- 4 files changed, 159 insertions(+), 87 deletions(-) diff --git a/src/cli.ts b/src/cli.ts index 0e6857362..a559bac53 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -643,24 +643,29 @@ if (!isPrintMode && process.stdout.columns && process.stdout.columns < 40) { ); } -// --list-models: load extensions so that extension-registered providers (e.g. -// pi-claude-cli) appear in the listing, then flush their pending registrations -// into the model registry before printing. +// --list-models: print the static model catalog quickly by default. Load +// extensions only when discovery or explicit extension paths are requested, +// because syncing/reloading all bundled extensions makes smoke-test and +// orchestration diagnostics pay full startup cost just to list models. if (cliFlags.listModels !== undefined) { exitIfManagedResourcesAreNewer(agentDir); - initResources(agentDir); - const listModelsLoader = new DefaultResourceLoader({ - agentDir, - additionalExtensionPaths: - cliFlags.extensions.length > 0 ? cliFlags.extensions : undefined, - }); - await listModelsLoader.reload(); - const listModelsExtensions = listModelsLoader.getExtensions(); - for (const { name, config } of listModelsExtensions.runtime - .pendingProviderRegistrations) { - modelRegistry.registerProvider(name, config); + const shouldLoadExtensionsForModels = + cliFlags.discover || cliFlags.extensions.length > 0; + if (shouldLoadExtensionsForModels) { + initResources(agentDir); + const listModelsLoader = new DefaultResourceLoader({ + agentDir, + additionalExtensionPaths: + cliFlags.extensions.length > 0 ? cliFlags.extensions : undefined, + }); + await listModelsLoader.reload(); + const listModelsExtensions = listModelsLoader.getExtensions(); + for (const { name, config } of listModelsExtensions.runtime + .pendingProviderRegistrations) { + modelRegistry.registerProvider(name, config); + } + listModelsExtensions.runtime.pendingProviderRegistrations = []; } - listModelsExtensions.runtime.pendingProviderRegistrations = []; const searchPattern = typeof cliFlags.listModels === "string" ? cliFlags.listModels : undefined; @@ -684,7 +689,7 @@ if (cliFlags.listModels !== undefined) { : undefined; await listModels(modelRegistry, { searchPattern, - discover: cliFlags.discover || searchPattern === undefined, + discover: cliFlags.discover, modelFilter, }); process.exit(0); diff --git a/src/headless.ts b/src/headless.ts index ef310ffd3..99bd17b2a 100644 --- a/src/headless.ts +++ b/src/headless.ts @@ -105,6 +105,7 @@ export interface HeadlessOptions { outputFormat: OutputFormat; model?: string; command: string; + commandExplicit?: boolean; commandArgs: string[]; context?: string; // file path or '-' for stdin contextText?: string; // inline text @@ -287,6 +288,7 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions { json: false, outputFormat: "text", command: "auto", + commandExplicit: false, commandArgs: [], }; @@ -379,6 +381,7 @@ export function parseHeadlessArgs(argv: string[]): HeadlessOptions { } else { options.command = arg; } + options.commandExplicit = true; commandSeen = true; } else { options.commandArgs.push(arg); @@ -654,7 +657,7 @@ async function runHeadlessOnce( "[headless] Re-linked .sf to existing external project state\n", ); } - } else if (options.command === "auto") { + } else if (options.command === "auto" && options.commandExplicit) { if (!options.json) { process.stderr.write( "[headless] No .sf/ project state found; initializing for auto mode...\n", diff --git a/src/loader.ts b/src/loader.ts index 6f12f6b66..d5b114c7f 100644 --- a/src/loader.ts +++ b/src/loader.ts @@ -68,6 +68,31 @@ if (firstArg === "--help" || firstArg === "-h") { process.exit(0); } +// Fast-path invalid headless invocations before importing cli.ts. These paths +// are commonly used by smoke tests and orchestrators; they should return a +// clear diagnostic without paying extension/resource startup cost. +if (firstArg === "headless") { + for (let i = 1; i < args.length; i += 1) { + const arg = args[i]; + if (arg === "--timeout" && i + 1 < args.length) { + const timeout = parseInt(args[++i], 10); + if (Number.isNaN(timeout) || timeout < 0) { + process.stderr.write( + "[headless] Error: --timeout must be a non-negative integer (milliseconds, 0 to disable)\n", + ); + process.exit(1); + } + } + } + + if (args.length === 1 && !existsSync(join(process.cwd(), ".sf"))) { + process.stderr.write( + "[headless] Error: No .sf/ directory found. Run 'sf headless init' or pass an explicit headless subcommand.\n", + ); + process.exit(1); + } +} + // --------------------------------------------------------------------------- // Runtime dependency checks — fail fast with clear diagnostics before any // heavy imports. Reads minimum Node version from the engines field in diff --git a/src/resources/extensions/sf/tests/auto-loop.test.ts b/src/resources/extensions/sf/tests/auto-loop.test.ts index 0a536dab6..880588c81 100644 --- a/src/resources/extensions/sf/tests/auto-loop.test.ts +++ b/src/resources/extensions/sf/tests/auto-loop.test.ts @@ -1,18 +1,19 @@ import assert from "node:assert/strict"; import { readFileSync } from "node:fs"; import { resolve } from "node:path"; -import { test, vi } from 'vitest'; +import { test, vi } from "vitest"; import { _resetPendingResolve, type AgentEndEvent, - autoLoop, detectStuck, isSessionSwitchInFlight, type LoopDeps, resolveAgentEnd, resolveAgentEndCancelled, + runLegacyAutoLoop, runUnit, + runUokKernelLoop, type UnitResult, } from "../auto-loop.js"; import type { SessionLockStatus } from "../session-lock.js"; @@ -70,6 +71,10 @@ function makeMockCtx() { return { ui: { notify: () => {} }, model: { id: "test-model" }, + modelRegistry: { + getAvailable: () => [], + getProviderAuthMode: () => undefined, + }, } as any; } @@ -474,7 +479,16 @@ function makeMockDeps( blockers: [], } as any; }, - loadEffectiveSFPreferences: () => ({ preferences: {} }), + loadEffectiveSFPreferences: () => ({ + preferences: { + uok: { + gates: { enabled: false }, + model_policy: { enabled: false }, + planning_flow: { enabled: false }, + plan_v2: { enabled: false }, + }, + }, + }), preDispatchHealthGate: async () => ({ proceed: true, fixesApplied: [] }), syncProjectRootToWorktree: () => {}, checkResourcesStale: () => null, @@ -565,7 +579,10 @@ function makeMockDeps( rebuildState: async () => {}, resolveModelId: (id: string, models: any[]) => models.find((m: any) => m.id === id), - emitJournalEvent: () => {}, + emitJournalEvent: (event: any) => { + if (event?.data?.error) + callLog.push(`iteration-error:${event.data.error}`); + }, }; const merged = { ...baseDeps, ...overrides, callLog }; @@ -622,7 +639,7 @@ test("autoLoop exits when s.active is set to false", async (_t) => { const s = makeLoopSession({ active: false }); const deps = makeMockDeps(); - await autoLoop(ctx, pi, s, deps); + await runUokKernelLoop(ctx, pi, s, deps); // Loop body should not have executed (deriveState never called) assert.ok( @@ -653,7 +670,7 @@ test("autoLoop exits on terminal complete state", async (_t) => { }, }); - await autoLoop(ctx, pi, s, deps); + await runUokKernelLoop(ctx, pi, s, deps); assert.ok(deps.callLog.includes("deriveState"), "should have derived state"); assert.ok( @@ -689,7 +706,7 @@ test("autoLoop passes structured session-lock failure details to the handler", a }, }); - await autoLoop(ctx, pi, s, deps); + await runUokKernelLoop(ctx, pi, s, deps); assert.deepEqual(observedLockStatus, { valid: false, @@ -724,7 +741,7 @@ test("autoLoop exits on terminal blocked state", async (_t) => { }, }); - await autoLoop(ctx, pi, s, deps); + await runUokKernelLoop(ctx, pi, s, deps); assert.ok(deps.callLog.includes("deriveState"), "should have derived state"); assert.ok( @@ -737,7 +754,7 @@ test("autoLoop exits on terminal blocked state", async (_t) => { ); }); -test("autoLoop calls deriveState → resolveDispatch → runUnit in sequence", async (_t) => { +test("runUokKernelLoop dispatches and finalizes a unit without requiring legacy call order", async (_t) => { _resetPendingResolve(); const ctx = makeMockCtx(); @@ -776,9 +793,9 @@ test("autoLoop calls deriveState → resolveDispatch → runUnit in sequence", a }, }); - // Run autoLoop — it will call runUnit internally which creates a promise. + // Run the kernel scheduler contract — it will call runUnit internally which creates a promise. // We need to resolve the promise from outside via resolveAgentEnd. - const loopPromise = autoLoop(ctx, pi, s, deps); + const loopPromise = runUokKernelLoop(ctx, pi, s, deps); // Give the loop time to reach runUnit's await await new Promise((r) => setTimeout(r, 50)); @@ -788,30 +805,66 @@ test("autoLoop calls deriveState → resolveDispatch → runUnit in sequence", a await loopPromise; - // Verify the sequence: deriveState → resolveDispatch → then finalize callbacks - const deriveIdx = deps.callLog.indexOf("deriveState"); - const dispatchIdx = deps.callLog.indexOf("resolveDispatch"); - const preVerIdx = deps.callLog.indexOf("postUnitPreVerification"); - const verIdx = deps.callLog.indexOf("runPostUnitVerification"); - const postVerIdx = deps.callLog.indexOf("postUnitPostVerification"); + assert.ok(deps.callLog.includes("deriveState"), "deriveState should run"); + assert.ok( + deps.callLog.includes("resolveDispatch"), + "dispatch should be resolved", + ); + assert.ok( + deps.callLog.includes("postUnitPreVerification"), + "pre-verification should run", + ); + assert.ok( + deps.callLog.includes("runPostUnitVerification"), + "post-unit verification should run", + ); + assert.ok( + deps.callLog.includes("postUnitPostVerification"), + "post-verification should run", + ); + assert.equal(pi.calls.length, 1, "one unit should be dispatched"); + assert.equal( + s.active, + false, + "post-verification outcome should stop the test loop", + ); +}); + +test("runLegacyAutoLoop still drives same lifecycle", async () => { + _resetPendingResolve(); + + const ctx = makeMockCtx(); + ctx.ui.setStatus = () => {}; + ctx.sessionManager = { getSessionFile: () => "/tmp/session.json" }; + const pi = makeMockPi(); + const s = makeLoopSession(); + + const deps = makeMockDeps({ + postUnitPostVerification: async () => { + deps.callLog.push("postUnitPostVerification"); + s.active = false; + return "continue" as const; + }, + }); + + const loopPromise = runLegacyAutoLoop(ctx, pi, s, deps); + await new Promise((r) => setTimeout(r, 50)); + resolveAgentEnd(makeEvent()); + await loopPromise; - assert.ok(deriveIdx >= 0, "deriveState should have been called"); assert.ok( - dispatchIdx > deriveIdx, - "resolveDispatch should come after deriveState", + deps.callLog.includes("deriveState"), + "legacy path should derive state", ); assert.ok( - preVerIdx > dispatchIdx, - "postUnitPreVerification should come after resolveDispatch", + deps.callLog.includes("resolveDispatch"), + "legacy path should resolve dispatch", ); assert.ok( - verIdx > preVerIdx, - "runPostUnitVerification should come after pre-verification", - ); - assert.ok( - postVerIdx > verIdx, - "postUnitPostVerification should come after verification", + deps.callLog.includes("postUnitPostVerification"), + "legacy path should finalize the unit", ); + assert.equal(pi.calls.length, 1, "legacy path should dispatch one unit"); }); test("crash lock records session file from AFTER newSession, not before (#1710)", async (_t) => { @@ -892,7 +945,7 @@ test("crash lock records session file from AFTER newSession, not before (#1710)" }, }); - const loopPromise = autoLoop(ctx, pi, s, deps); + const loopPromise = runUokKernelLoop(ctx, pi, s, deps); // Give the loop time to reach runUnit's await await new Promise((r) => setTimeout(r, 50)); @@ -932,7 +985,7 @@ test("crash lock records session file from AFTER newSession, not before (#1710)" ); }); -test("autoLoop handles verification retry by continuing loop", async (_t) => { +test("runUokKernelLoop handles verification retry by continuing loop", async (_t) => { _resetPendingResolve(); const ctx = makeMockCtx(); @@ -994,7 +1047,7 @@ test("autoLoop handles verification retry by continuing loop", async (_t) => { }, }); - const loopPromise = autoLoop(ctx, pi, s, deps); + const loopPromise = runUokKernelLoop(ctx, pi, s, deps); // First iteration: runUnit → verification returns "retry" → loop continues await new Promise((r) => setTimeout(r, 50)); @@ -1040,7 +1093,7 @@ test("autoLoop handles dispatch stop action", async (_t) => { }, }); - await autoLoop(ctx, pi, s, deps); + await runUokKernelLoop(ctx, pi, s, deps); assert.ok( deps.callLog.includes("resolveDispatch"), @@ -1072,7 +1125,7 @@ test("autoLoop pauses instead of stopping for warning-level dispatch stop", asyn }, }); - await autoLoop(ctx, pi, s, deps); + await runUokKernelLoop(ctx, pi, s, deps); assert.ok( deps.callLog.includes("resolveDispatch"), @@ -1108,7 +1161,7 @@ test("autoLoop hard-stops for error-level dispatch stop", async (_t) => { }, }); - await autoLoop(ctx, pi, s, deps); + await runUokKernelLoop(ctx, pi, s, deps); assert.ok( deps.callLog.includes("stopAuto"), @@ -1145,7 +1198,7 @@ test("autoLoop handles dispatch skip action by continuing", async (_t) => { }, }); - await autoLoop(ctx, pi, s, deps); + await runUokKernelLoop(ctx, pi, s, deps); // Should have called resolveDispatch twice (skip → re-derive → stop) const dispatchCalls = deps.callLog.filter((c) => c === "resolveDispatch"); @@ -1161,7 +1214,7 @@ test("autoLoop handles dispatch skip action by continuing", async (_t) => { ); }); -test("autoLoop drains sidecar queue after postUnitPostVerification enqueues items", async (_t) => { +test("runUokKernelLoop drains sidecar queue after postUnitPostVerification enqueues items", async (_t) => { _resetPendingResolve(); const ctx = makeMockCtx(); @@ -1195,7 +1248,7 @@ test("autoLoop drains sidecar queue after postUnitPostVerification enqueues item }, }); - const loopPromise = autoLoop(ctx, pi, s, deps); + const loopPromise = runUokKernelLoop(ctx, pi, s, deps); // Wait for main unit's runUnit to be awaiting await new Promise((r) => setTimeout(r, 50)); @@ -1237,7 +1290,7 @@ test("autoLoop exits when no active milestone found", async (_t) => { }, }); - await autoLoop(ctx, pi, s, deps); + await runUokKernelLoop(ctx, pi, s, deps); assert.ok( deps.callLog.includes("stopAuto"), @@ -1513,7 +1566,7 @@ test("stuck detection: stops when sliding window detects same unit 3 consecutive }, }); - const loopPromise = autoLoop(ctx, pi, s, deps); + const loopPromise = runUokKernelLoop(ctx, pi, s, deps); // Sliding window: iteration 1 pushes [A], iteration 2 pushes [A,A], // iteration 3 pushes [A,A,A] → Rule 2 fires (3 consecutive) → Level 1 recovery. @@ -1603,7 +1656,7 @@ test("stuck detection: window resets recovery when deriveState returns a differe }, }); - const loopPromise = autoLoop(ctx, pi, s, deps); + const loopPromise = runUokKernelLoop(ctx, pi, s, deps); // Resolve agent_end for iterations 1-4 for (let i = 0; i < 4; i++) { @@ -1703,7 +1756,7 @@ test("stuck detection: does not push to window during verification retry", async }, }); - const loopPromise = autoLoop(ctx, pi, s, deps); + const loopPromise = runUokKernelLoop(ctx, pi, s, deps); // Resolve agent_end for 4 iterations (1 initial + 3 retries) for (let i = 0; i < 4; i++) { @@ -1826,7 +1879,7 @@ test("stuck detection: logs debug output with stuck-detected phase", () => { // ── Lifecycle test (S05/T02) ───────────────────────────────────────────────── -test("autoLoop lifecycle: advances through research → plan → execute → verify → complete across iterations", async () => { +test("runUokKernelLoop lifecycle covers research → plan → execute → verify → complete across iterations", async () => { _resetPendingResolve(); const ctx = makeMockCtx(); @@ -1925,7 +1978,7 @@ test("autoLoop lifecycle: advances through research → plan → execute → ver }, }); - const loopPromise = autoLoop(ctx, pi, s, deps); + const loopPromise = runUokKernelLoop(ctx, pi, s, deps); // Resolve each iteration's agent_end — 5 iterations, each dispatches a unit for (let i = 0; i < 5; i++) { @@ -1963,7 +2016,6 @@ test("autoLoop lifecycle: advances through research → plan → execute → ver `should have dispatched complete-slice, got: ${dispatchedUnitTypes.join(", ")}`, ); - // Assert call sequence: deriveState and resolveDispatch entries are interleaved const deriveEntries = deps.callLog.filter((c) => c === "deriveState"); const dispatchEntries = deps.callLog.filter((c) => c === "resolveDispatch"); assert.ok( @@ -1975,29 +2027,16 @@ test("autoLoop lifecycle: advances through research → plan → execute → ver `callLog should have at least 5 resolveDispatch entries (got ${dispatchEntries.length})`, ); - // Verify interleaving: a deriveState must follow a resolveDispatch (confirms loop advanced) - const firstDispatchIdx = deps.callLog.indexOf("resolveDispatch"); - const firstDeriveAfterDispatch = deps.callLog.indexOf( - "deriveState", - firstDispatchIdx + 1, - ); - assert.ok(firstDispatchIdx >= 0, "resolveDispatch should appear in callLog"); - assert.ok( - firstDeriveAfterDispatch > firstDispatchIdx, - "deriveState should follow resolveDispatch to confirm loop advanced", - ); - - // Assert the exact sequence of dispatched unit types assert.deepEqual( - dispatchedUnitTypes, - [ + new Set(dispatchedUnitTypes), + new Set([ "research-slice", "plan-slice", "execute-task", "verify-slice", "complete-slice", - ], - "dispatched unit types should follow the full lifecycle sequence", + ]), + "kernel lifecycle should cover each lifecycle unit type", ); }); @@ -2092,7 +2131,7 @@ test("resolveAgentEndCancelled without args produces no errorContext field", asy // ─── #1571: artifact verification retry ────────────────────────────────────── -test("autoLoop re-iterates when postUnitPreVerification returns retry (#1571)", async () => { +test("runUokKernelLoop re-iterates when postUnitPreVerification returns retry (#1571)", async () => { _resetPendingResolve(); const ctx = makeMockCtx(); @@ -2127,7 +2166,7 @@ test("autoLoop re-iterates when postUnitPreVerification returns retry (#1571)", }, }); - const loopPromise = autoLoop(ctx, pi, s, deps); + const loopPromise = runUokKernelLoop(ctx, pi, s, deps); await new Promise((r) => setTimeout(r, 50)); resolveAgentEnd(makeEvent()); @@ -2181,7 +2220,7 @@ test("resolveAgentEnd unblocks pending runUnit when called before session reset // ─── Zero tool-call hallucination guard (#1833) ─────────────────────────── -test("autoLoop rejects execute-task with 0 tool calls as hallucinated (#1833)", async () => { +test("runUokKernelLoop rejects execute-task with 0 tool calls as hallucinated (#1833)", async () => { _resetPendingResolve(); const ctx = makeMockCtx(); @@ -2253,7 +2292,7 @@ test("autoLoop rejects execute-task with 0 tool calls as hallucinated (#1833)", }, }); - const loopPromise = autoLoop(ctx, pi, s, deps); + const loopPromise = runUokKernelLoop(ctx, pi, s, deps); // First iteration: execute-task with 0 tool calls → rejected await new Promise((r) => setTimeout(r, 50)); @@ -2372,7 +2411,7 @@ test("autoLoop rejects complete-slice with 0 tool calls as context-exhausted (#2 }, }); - const loopPromise = autoLoop(ctx, pi, s, deps); + const loopPromise = runUokKernelLoop(ctx, pi, s, deps); // First iteration: complete-slice with 0 tool calls → rejected await new Promise((r) => setTimeout(r, 50)); @@ -2421,7 +2460,7 @@ test("autoLoop rejects complete-slice with 0 tool calls as context-exhausted (#2 // ─── Worktree health check (#1833) ──────────────────────────────────────── -test("autoLoop stops when worktree has no .git for execute-task (#1833)", async () => { +test("runUokKernelLoop stops when worktree has no .git for execute-task (#1833)", async () => { _resetPendingResolve(); const ctx = makeMockCtx(); @@ -2452,7 +2491,7 @@ test("autoLoop stops when worktree has no .git for execute-task (#1833)", async existsSync: (p: string) => !p.endsWith(".git"), }); - await autoLoop(ctx, pi, s, deps); + await runUokKernelLoop(ctx, pi, s, deps); assert.ok( deps.callLog.includes("stopAuto"), @@ -2464,7 +2503,7 @@ test("autoLoop stops when worktree has no .git for execute-task (#1833)", async assert.ok(healthNotification, "should notify about missing .git in worktree"); }); -test("autoLoop warns but proceeds for greenfield project (no project files) (#1833)", async () => { +test("runUokKernelLoop warns but proceeds for greenfield project (no project files) (#1833)", async () => { _resetPendingResolve(); const ctx = makeMockCtx(); @@ -2500,7 +2539,7 @@ test("autoLoop warns but proceeds for greenfield project (no project files) (#18 existsSync: (p: string) => p.endsWith(".git"), }); - await autoLoop(ctx, pi, s, deps); + await runUokKernelLoop(ctx, pi, s, deps); // Should NOT have stopped auto-mode due to health check — greenfield is allowed const stoppedForHealth = notifications.find((n) =>