From 0037f44677c02607983e92a580cc76a7968ec2c6 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Mon, 4 May 2026 09:47:30 +0200 Subject: [PATCH] sf snapshot: pre-dispatch, uncommitted changes after 83m inactivity --- .../src/core/fallback-resolver.test.ts | 42 +- .../src/core/fallback-resolver.ts | 46 +- .../src/core/retry-handler.test.ts | 73 ++++ .../pi-coding-agent/src/core/retry-handler.ts | 42 ++ .../sf/tests/verification-engine.test.ts | 400 ------------------ 5 files changed, 200 insertions(+), 403 deletions(-) delete mode 100644 src/resources/extensions/sf/tests/verification-engine.test.ts diff --git a/packages/pi-coding-agent/src/core/fallback-resolver.test.ts b/packages/pi-coding-agent/src/core/fallback-resolver.test.ts index c92a999b7..4e9ca7cd9 100644 --- a/packages/pi-coding-agent/src/core/fallback-resolver.test.ts +++ b/packages/pi-coding-agent/src/core/fallback-resolver.test.ts @@ -40,6 +40,7 @@ function createResolver(overrides?: { hasAuth?: (provider: string) => boolean; isProviderRequestReady?: (provider: string) => boolean; find?: (provider: string, modelId: string) => Model | undefined; + getAvailable?: () => Model[]; }) { const settingsManager = { getFallbackSettings: () => ({ @@ -62,6 +63,7 @@ function createResolver(overrides?: { return undefined; }), isProviderRequestReady: overrides?.isProviderRequestReady ?? overrides?.hasAuth ?? (() => true), + getAvailable: overrides?.getAvailable ?? (() => [zaiModel, alibabaModel, openaiModel]), } as unknown as ModelRegistry; return { resolver: new FallbackResolver(settingsManager, authStorage, modelRegistry), authStorage }; @@ -105,6 +107,7 @@ describe("FallbackResolver — findFallback", () => { it("returns null when all providers are backed off", async () => { const { resolver } = createResolver({ isProviderAvailable: () => false, + getAvailable: () => [zaiModel, alibabaModel, openaiModel], }); const result = await resolver.findFallback(zaiModel, "quota_exhausted"); @@ -117,11 +120,46 @@ describe("FallbackResolver — findFallback", () => { assert.equal(result, null); }); - it("returns null when model is not in any chain", async () => { + it("falls back to free selection when model is not in any chain", async () => { const { resolver } = createResolver(); const unknownModel = createMockModel("unknown", "some-model"); const result = await resolver.findFallback(unknownModel, "quota_exhausted"); - assert.equal(result, null); + assert.notEqual(result, null); + assert.equal(result!.chainName, "free-selection"); + // Should pick an available model with different provider + assert.notEqual(result!.model.provider, "unknown"); + }); + + it("free selection prefers models with matching reasoning capability", async () => { + const reasoningModel = createMockModel("openai", "gpt-4.1"); + reasoningModel.reasoning = true; + const nonReasoningModel = createMockModel("alibaba", "glm-5"); + nonReasoningModel.reasoning = false; + + const { resolver } = createResolver({ + getAvailable: () => [nonReasoningModel, reasoningModel], + }); + + const currentModel = createMockModel("unknown", "some-model"); + currentModel.reasoning = true; + + const result = await resolver.findFallback(currentModel, "quota_exhausted"); + assert.notEqual(result, null); + assert.equal(result!.model.provider, "openai"); + assert.equal(result!.model.reasoning, true); + }); + + it("free selection excludes same provider", async () => { + const sameProviderModel = createMockModel("zai", "glm-5-other"); + const differentProviderModel = createMockModel("alibaba", "glm-5"); + + const { resolver } = createResolver({ + getAvailable: () => [sameProviderModel, differentProviderModel], + }); + + const result = await resolver.findFallback(zaiModel, "quota_exhausted"); + assert.notEqual(result, null); + assert.equal(result!.model.provider, "alibaba"); }); it("skips providers that are not request-ready", async () => { diff --git a/packages/pi-coding-agent/src/core/fallback-resolver.ts b/packages/pi-coding-agent/src/core/fallback-resolver.ts index e5532f2f4..690dca75d 100644 --- a/packages/pi-coding-agent/src/core/fallback-resolver.ts +++ b/packages/pi-coding-agent/src/core/fallback-resolver.ts @@ -32,6 +32,9 @@ export class FallbackResolver { * Searches all chains for entries matching the current model's provider+id, * then returns the next available entry with lower priority (higher number). * + * If no chain contains the current model, falls through to free selection: + * picks any available model from the registry with a different provider. + * * @returns FallbackResult if a fallback is available, null otherwise */ async findFallback( @@ -61,7 +64,9 @@ export class FallbackResolver { if (wrapResult) return wrapResult; } - return null; + // No chain contained the current model — fall through to free selection + // from any available model in the registry with a different provider. + return this._findAnyAvailableFallback(currentModel); } /** @@ -161,4 +166,43 @@ export class FallbackResolver { return null; } + + /** + * Free-selection fallback when no chain contains the current model. + * Picks any available model from the registry with a different provider. + * Prefers models with reasoning capability if the current model has it. + */ + private _findAnyAvailableFallback( + currentModel: Model, + ): FallbackResult | null { + const allModels = this.modelRegistry.getAvailable(); + const candidates = allModels.filter((m) => { + // Exclude same provider — credential rotation was already tried + if (m.provider === currentModel.provider) return false; + // Exclude exhausted providers + if (!this.authStorage.isProviderAvailable(m.provider)) return false; + // Exclude models without auth + if (!this.modelRegistry.isProviderRequestReady(m.provider)) return false; + return true; + }); + + if (candidates.length === 0) return null; + + // Sort: prefer models with matching reasoning capability, then by context window + candidates.sort((a, b) => { + const aReasoningMatch = a.reasoning === currentModel.reasoning ? 1 : 0; + const bReasoningMatch = b.reasoning === currentModel.reasoning ? 1 : 0; + if (aReasoningMatch !== bReasoningMatch) { + return bReasoningMatch - aReasoningMatch; + } + return (b.contextWindow ?? 0) - (a.contextWindow ?? 0); + }); + + const chosen = candidates[0]; + return { + model: chosen, + chainName: "free-selection", + reason: `free fallback to ${chosen.provider}/${chosen.id} (no chain configured)`, + }; + } } diff --git a/packages/pi-coding-agent/src/core/retry-handler.test.ts b/packages/pi-coding-agent/src/core/retry-handler.test.ts index 757552b74..71086f202 100644 --- a/packages/pi-coding-agent/src/core/retry-handler.test.ts +++ b/packages/pi-coding-agent/src/core/retry-handler.test.ts @@ -607,6 +607,79 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => { }); }); + describe("quota wait before fallback", () => { + it("waits for retryAfterMs before retrying same provider on quota error", async () => { + const { deps, emittedEvents, continueFn } = createMockDeps({ + model: createMockModel("google-gemini-cli", "gemini-2.5-pro"), + fallbackResult: null, + retrySettings: { maxRetries: 5, baseDelayMs: 1000, maxDelayMs: 60000 }, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage( + "You have exhausted your capacity on this model. Your quota will reset after 59s.", + ); + (msg as any).retryAfterMs = 59000; + + const result = await handler.handleRetryableError(msg); + + // Should wait and retry, not immediately fail + assert.equal(result, true, "should wait and retry on quota reset"); + + const retryStart = emittedEvents.find( + (e) => e.type === "auto_retry_start", + ); + assert.ok(retryStart, "should emit auto_retry_start with wait"); + assert.equal( + retryStart!.delayMs, + 59000, + "should use provider's retry-after delay", + ); + assert.ok( + retryStart!.errorMessage.includes("waiting for quota reset"), + "error message should indicate quota wait", + ); + + // Should NOT have emitted fallback_chain_exhausted + const chainExhausted = emittedEvents.find( + (e) => e.type === "fallback_chain_exhausted", + ); + assert.equal( + chainExhausted, + undefined, + "should not emit fallback_chain_exhausted when waiting for quota reset", + ); + }); + + it("falls through to fallback when retryAfterMs exceeds maxDelayMs", async () => { + const fallbackModel = createMockModel("openai", "gpt-4o"); + const { deps, emittedEvents } = createMockDeps({ + model: createMockModel("google-gemini-cli", "gemini-2.5-pro"), + fallbackResult: { + model: fallbackModel, + reason: "cross-provider fallback", + }, + retrySettings: { maxRetries: 5, baseDelayMs: 1000, maxDelayMs: 30000 }, + }); + + const handler = new RetryHandler(deps); + const msg = errorMessage( + "You have exhausted your capacity on this model. Your quota will reset after 5m.", + ); + (msg as any).retryAfterMs = 300000; // 5 minutes, exceeds maxDelayMs + + const result = await handler.handleRetryableError(msg); + + // Should fall through to fallback since wait is too long + assert.equal(result, true, "should fallback when quota reset is too long"); + + const switchEvent = emittedEvents.find( + (e) => e.type === "fallback_provider_switch", + ); + assert.ok(switchEvent, "should emit fallback_provider_switch"); + }); + }); + describe("quota_exhausted credential backoff (#3430)", () => { it("does NOT call markUsageLimitReached for quota_exhausted errors", async () => { // "Extra usage is required" is an account-level billing gate. diff --git a/packages/pi-coding-agent/src/core/retry-handler.ts b/packages/pi-coding-agent/src/core/retry-handler.ts index f7bb89640..50261b89a 100644 --- a/packages/pi-coding-agent/src/core/retry-handler.ts +++ b/packages/pi-coding-agent/src/core/retry-handler.ts @@ -210,6 +210,48 @@ export class RetryHandler { // quota errors, or auth errors (invalid/expired key — no point retrying). const isAuthError = errorType === "auth_error"; if (isRateLimit || isQuotaError || isAuthError) { + // For quota errors with a retry-after hint, wait before switching providers. + // The quota may reset quickly (e.g. 59s), so waiting is often better than + // switching to a potentially worse model. + if (isQuotaError && message.retryAfterMs !== undefined && message.retryAfterMs > 0) { + const cap = settings.maxDelayMs > 0 ? settings.maxDelayMs : Infinity; + if (message.retryAfterMs <= cap) { + this._deps.emit({ + type: "auto_retry_start", + attempt: this._retryAttempt + 1, + maxAttempts: settings.maxRetries, + delayMs: message.retryAfterMs, + errorMessage: `${message.errorMessage} (waiting for quota reset)`, + }); + + this._removeLastAssistantError(); + this._retryAbortController = new AbortController(); + try { + await sleep(message.retryAfterMs, this._retryAbortController.signal); + } catch { + // Aborted during sleep + if (retryGeneration !== this._retryGeneration) { + this._retryAbortController = undefined; + return false; + } + const attempt = this._retryAttempt; + this._retryAttempt = 0; + this._retryAbortController = undefined; + this._deps.emit({ + type: "auto_retry_end", + success: false, + attempt, + finalError: "Retry cancelled", + }); + this._resolveRetry(); + return false; + } + this._retryAbortController = undefined; + this._scheduleContinue(retryGeneration); + return true; + } + } + const fallbackResult = await this._deps.fallbackResolver.findFallback( this._deps.getModel()!, errorType, diff --git a/src/resources/extensions/sf/tests/verification-engine.test.ts b/src/resources/extensions/sf/tests/verification-engine.test.ts deleted file mode 100644 index 98ca031f7..000000000 --- a/src/resources/extensions/sf/tests/verification-engine.test.ts +++ /dev/null @@ -1,400 +0,0 @@ -import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; -import { tmpdir } from "node:os"; -import { join } from "node:path"; -import { describe, expect, test } from "vitest"; -import { - evaluateRunawayGuard, - resetRunawayGuardState, -} from "../auto-runaway-guard.js"; -import { runCustomVerification } from "../custom-verification.js"; -import { getPriorSliceCompletionBlocker } from "../dispatch-guard.js"; -import { checkCrossTaskSignatures } from "../post-execution-checks.js"; -import { - extractPackageReferences, - normalizeFilePath, -} from "../pre-execution-checks.js"; -import { formatFailureContext, isLikelyCommand } from "../verification-gate.js"; - -// ─── Bug 1: formatFailureContext double-truncation ───────────────────────── - -describe("formatFailureContext", () => { - test("does not double-truncate a single check's stderr", () => { - const longStderr = "x".repeat(3000); - const result = { - passed: false, - checks: [ - { - command: "npm test", - exitCode: 1, - stdout: "", - stderr: longStderr, - durationMs: 100, - }, - ], - discoverySource: "preference" as const, - timestamp: Date.now(), - }; - const context = formatFailureContext(result); - // Per-check budget = 10000/1 = 10000, capped to MAX_STDERR_PER_CHECK=2000 - expect(context).toContain("x".repeat(2000)); - expect(context).toContain("…[truncated]"); - // Must NOT contain the total-body truncation marker - expect(context).not.toContain("…[remaining failures truncated]"); - }); - - test("fairly distributes budget across multiple failures", () => { - const stderrA = "a".repeat(3000); - const stderrB = "b".repeat(3000); - const result = { - passed: false, - checks: [ - { - command: "npm run lint", - exitCode: 1, - stdout: "", - stderr: stderrA, - durationMs: 100, - }, - { - command: "npm run test", - exitCode: 1, - stdout: "", - stderr: stderrB, - durationMs: 100, - }, - ], - discoverySource: "preference" as const, - timestamp: Date.now(), - }; - const context = formatFailureContext(result); - // Per-check budget = 10000/2 = 5000, capped to 2000 - expect(context).toContain("a".repeat(2000)); - expect(context).toContain("b".repeat(2000)); - expect(context).not.toContain("…[remaining failures truncated]"); - }); -}); - -// ─── Bug 2: checkCrossTaskSignatures partial-signature-drift ─────────────── - -describe("checkCrossTaskSignatures", () => { - test("detects drift against ALL prior definitions, not just the most recent", () => { - const base = mkdtempSync(join(tmpdir(), "sf-sig-drift-")); - try { - // Create prior task files with different signatures for same function - mkdirSync(join(base, "first"), { recursive: true }); - mkdirSync(join(base, "second"), { recursive: true }); - mkdirSync(join(base, "current"), { recursive: true }); - - writeFileSync( - join(base, "first", "first.ts"), - "export function foo(a: string): void {}", - ); - writeFileSync( - join(base, "second", "second.ts"), - "export function foo(a: string, b: number): void {}", - ); - writeFileSync( - join(base, "current", "current.ts"), - "export function foo(a: string): number { return 1; }", - ); - - const taskRow = { - id: "T03", - key_files: ["current/current.ts"], - }; - const priorTasks = [ - { - id: "T01", - key_files: ["first/first.ts"], - }, - { - id: "T02", - key_files: ["second/second.ts"], - }, - ]; - - const results = checkCrossTaskSignatures(taskRow, priorTasks, base); - - // Should detect drift against BOTH prior definitions - const fooResults = results.filter((r) => r.target === "foo"); - expect(fooResults.length).toBeGreaterThanOrEqual(2); - - // Should report mismatch with T01 (params match but return type differs) - const returnTypeMismatches = fooResults.filter((r) => - r.message.includes("returns"), - ); - expect(returnTypeMismatches.length).toBeGreaterThanOrEqual(1); - - // Should report mismatch with T02 (params differ) - const paramMismatches = fooResults.filter((r) => - r.message.includes("parameters"), - ); - expect(paramMismatches.length).toBeGreaterThanOrEqual(1); - } finally { - rmSync(base, { recursive: true, force: true }); - } - }); -}); - -// ─── Bug 3: dispatch-guard global-positional-fallback ────────────────────── - -describe("getPriorSliceCompletionBlocker", () => { - test("does not block zero-dep slice when earlier incomplete slice has unsatisfied deps", () => { - const base = mkdtempSync(join(tmpdir(), "sf-dispatch-")); - try { - // Create a milestone structure with: - // S01 depends on S02 (circular-ish, but S02 is incomplete) - // S03 has no dependencies - // When dispatching S03, it should NOT be blocked by S01's incompleteness - // because S01 is itself blocked by unsatisfied deps. - mkdirSync(join(base, ".sf", "milestones", "M001"), { recursive: true }); - writeFileSync( - join(base, ".sf", "milestones", "M001", "ROADMAP.md"), - "# M001\n\n" + - "- [ ] S01 — first slice\n" + - " - depends: S02\n" + - "- [ ] S02 — second slice\n" + - "- [ ] S03 — third slice\n", - ); - - const blocker = getPriorSliceCompletionBlocker( - base, - "main", - "execute-task", - "M001/S03/T01", - ); - // S03 should NOT be blocked because S01 has unsatisfied deps (S02 incomplete) - expect(blocker).toBeNull(); - } finally { - rmSync(base, { recursive: true, force: true }); - } - }); -}); - -// ─── Bug 4: extractPackageReferences stopword collision ──────────────────── - -describe("extractPackageReferences", () => { - test("includes 'test' after -D flag", () => { - const packages = extractPackageReferences("npm install -D test"); - expect(packages).toContain("test"); - }); - - test("includes 'test' without flag when it looks like a package name", () => { - // "test" by itself is a stopword, but with package-name chars it should pass - const packages = extractPackageReferences("npm install test-utils"); - expect(packages).toContain("test-utils"); - }); - - test("stops on prose stopwords", () => { - const packages = extractPackageReferences("npm install and then run build"); - // "and" is a stopword and should terminate parsing - expect(packages).not.toContain("then"); - expect(packages).not.toContain("run"); - }); - - test("includes scoped packages regardless of stopwords", () => { - const packages = extractPackageReferences("npm install @types/test"); - expect(packages).toContain("@types/test"); - }); -}); - -// ─── Bug 5: runaway-guard token false-positive ───────────────────────────── - -describe("evaluateRunawayGuard", () => { - test("does not warn on token count alone when other signals are healthy", () => { - // Simulate a unit that has burned 1.2M tokens but only made 25 tool calls - // in 5 minutes with 1 changed file — this is healthy progress, not runaway. - resetRunawayGuardState("execute-task", "M001/S01/T01", { - sessionTokens: 100_000_000, // baseline: session already had 100M tokens - changedFiles: 0, - worktreeFingerprint: "abc123", - }); - - const config = { - enabled: true, - toolCallWarning: 60, - tokenWarning: 1_000_000, - elapsedMs: 20 * 60 * 1000, - changedFilesWarning: 75, - diagnosticTurns: 2, - hardPause: true, - minIntervalMs: 120_000, - }; - - const decision = evaluateRunawayGuard( - "execute-task", - "M001/S01/T01", - { - toolCalls: 25, - sessionTokens: 101_200_000, // delta = 1.2M tokens - elapsedMs: 5 * 60 * 1000, // 5 minutes - changedFiles: 1, - worktreeFingerprint: "def456", - topTools: { read: 10, edit: 5, bash: 5 }, - }, - config, - ); - - // Should NOT warn because token count alone is not enough — - // tool calls (25 < 60), elapsed (5min < 20min), changed files (1 < 75) - // are all well below thresholds. - expect(decision.action).toBe("none"); - }); - - test("warns when token count is accompanied by a primary signal", () => { - resetRunawayGuardState("execute-task", "M001/S01/T01", { - sessionTokens: 100_000_000, - changedFiles: 0, - worktreeFingerprint: "abc123", - }); - - const config = { - enabled: true, - toolCallWarning: 60, - tokenWarning: 1_000_000, - elapsedMs: 20 * 60 * 1000, - changedFilesWarning: 75, - diagnosticTurns: 2, - hardPause: true, - minIntervalMs: 120_000, - }; - - const decision = evaluateRunawayGuard( - "execute-task", - "M001/S01/T01", - { - toolCalls: 65, // exceeds toolCallWarning (60) - sessionTokens: 101_200_000, // delta = 1.2M tokens - elapsedMs: 5 * 60 * 1000, - changedFiles: 1, - worktreeFingerprint: "def456", - topTools: { read: 30, edit: 20, bash: 15 }, - }, - config, - ); - - // Should warn because tool calls (65 >= 60) is a primary signal, - // and tokens (1.2M >= 1M) corroborate. - expect(decision.action).toBe("warn"); - }); - - test("warns on no-progress heuristic regardless of primary signals", () => { - resetRunawayGuardState("execute-task", "M001/S01/T01", { - sessionTokens: 100_000_000, - changedFiles: 0, - worktreeFingerprint: "abc123", - }); - - const config = { - enabled: true, - toolCallWarning: 60, - tokenWarning: 1_000_000, - elapsedMs: 20 * 60 * 1000, - changedFilesWarning: 75, - diagnosticTurns: 2, - hardPause: true, - minIntervalMs: 120_000, - }; - - const decision = evaluateRunawayGuard( - "execute-task", - "M001/S01/T01", - { - toolCalls: 30, // exceeds EXECUTE_NO_PROGRESS_TOOL_WARNING (25) - sessionTokens: 100_600_000, // delta = 600K tokens - elapsedMs: 10 * 60 * 1000, - changedFiles: 0, - worktreeFingerprint: "abc123", // unchanged - topTools: { read: 15, edit: 10, bash: 5 }, - }, - config, - ); - - // Should warn because no-progress heuristic fires: - // 0 changed files, worktree unchanged, 30 tool calls, 600K tokens - expect(decision.action).toBe("warn"); - }); -}); - -// ─── Bug 6: isLikelyCommand prose guard gaps ─────────────────────────────── - -describe("isLikelyCommand", () => { - test("treats single non-command token as prose", () => { - expect(isLikelyCommand("hello")).toBe(false); - expect(isLikelyCommand("world")).toBe(false); - }); - - test("treats single known command token as command", () => { - expect(isLikelyCommand("npm")).toBe(true); - expect(isLikelyCommand("node")).toBe(true); - }); - - test("treats single-letter first token with 2+ words as prose", () => { - expect(isLikelyCommand("a quick test")).toBe(false); - expect(isLikelyCommand("it works now")).toBe(false); - expect(isLikelyCommand("i am here")).toBe(false); - }); - - test("still treats short command-like strings as commands", () => { - expect(isLikelyCommand("npm test")).toBe(true); - expect(isLikelyCommand("./script.sh")).toBe(true); - }); -}); - -// ─── Bug 7: custom-verification standalone shell operators ───────────────── - -describe("runCustomVerification shell-command policy", () => { - test("pauses on standalone pipe operator", () => { - const base = mkdtempSync(join(tmpdir(), "sf-cv-pipe-")); - try { - writeFileSync( - join(base, "DEFINITION.yaml"), - `steps:\n - id: step-1\n produces: []\n verify:\n policy: shell-command\n command: echo hello | cat\n`, - ); - const result = runCustomVerification(base, "step-1"); - expect(result).toBe("pause"); - } finally { - rmSync(base, { recursive: true, force: true }); - } - }); - - test("pauses on background operator", () => { - const base = mkdtempSync(join(tmpdir(), "sf-cv-bg-")); - try { - writeFileSync( - join(base, "DEFINITION.yaml"), - `steps:\n - id: step-1\n produces: []\n verify:\n policy: shell-command\n command: echo hello \u0026\n`, - ); - const result = runCustomVerification(base, "step-1"); - expect(result).toBe("pause"); - } finally { - rmSync(base, { recursive: true, force: true }); - } - }); - - test("allows safe command without pipe or background", () => { - const base = mkdtempSync(join(tmpdir(), "sf-cv-safe-")); - try { - writeFileSync( - join(base, "DEFINITION.yaml"), - `steps:\n - id: step-1\n produces: []\n verify:\n policy: shell-command\n command: echo hello\n`, - ); - const result = runCustomVerification(base, "step-1"); - expect(result).toBe("continue"); - } finally { - rmSync(base, { recursive: true, force: true }); - } - }); -}); - -// ─── Bug 8: normalizeFilePath JSDoc filesystem warning ───────────────────── - -describe("normalizeFilePath", () => { - test("does NOT make path safe for direct filesystem use", () => { - // The function normalizes for comparison only; traversal sequences remain - const normalized = normalizeFilePath("../../etc/passwd"); - expect(normalized).toBe("../../etc/passwd"); - // A consumer that passed this straight to readFileSync would escape - // the intended directory. The JSDoc warns about this explicitly. - }); -});