sf snapshot: pre-dispatch, uncommitted changes after 83m inactivity

2026-05-04 09:47:30 +02:00 · 2026-05-04 09:47:30 +02:00 · 0037f44677
commit 0037f44677
parent 8c66c11131
5 changed files with 200 additions and 403 deletions
--- a/packages/pi-coding-agent/src/core/fallback-resolver.test.ts
+++ b/packages/pi-coding-agent/src/core/fallback-resolver.test.ts
@ -40,6 +40,7 @@ function createResolver(overrides?: {
 	hasAuth?: (provider: string) => boolean;
 	isProviderRequestReady?: (provider: string) => boolean;
 	find?: (provider: string, modelId: string) => Model<Api> | undefined;
+	getAvailable?: () => Model<Api>[];
 }) {
 	const settingsManager = {
 		getFallbackSettings: () => ({
@ -62,6 +63,7 @@ function createResolver(overrides?: {
 			return undefined;
 		}),
 		isProviderRequestReady: overrides?.isProviderRequestReady ?? overrides?.hasAuth ?? (() => true),
+		getAvailable: overrides?.getAvailable ?? (() => [zaiModel, alibabaModel, openaiModel]),
 	} as unknown as ModelRegistry;

 	return { resolver: new FallbackResolver(settingsManager, authStorage, modelRegistry), authStorage };
@ -105,6 +107,7 @@ describe("FallbackResolver — findFallback", () => {
 	it("returns null when all providers are backed off", async () => {
 		const { resolver } = createResolver({
 			isProviderAvailable: () => false,
+			getAvailable: () => [zaiModel, alibabaModel, openaiModel],
 		});

 		const result = await resolver.findFallback(zaiModel, "quota_exhausted");
@ -117,11 +120,46 @@ describe("FallbackResolver — findFallback", () => {
 		assert.equal(result, null);
 	});

-	it("returns null when model is not in any chain", async () => {
+	it("falls back to free selection when model is not in any chain", async () => {
 		const { resolver } = createResolver();
 		const unknownModel = createMockModel("unknown", "some-model");
 		const result = await resolver.findFallback(unknownModel, "quota_exhausted");
-		assert.equal(result, null);
+		assert.notEqual(result, null);
+		assert.equal(result!.chainName, "free-selection");
+		// Should pick an available model with different provider
+		assert.notEqual(result!.model.provider, "unknown");
+	});
+
+	it("free selection prefers models with matching reasoning capability", async () => {
+		const reasoningModel = createMockModel("openai", "gpt-4.1");
+		reasoningModel.reasoning = true;
+		const nonReasoningModel = createMockModel("alibaba", "glm-5");
+		nonReasoningModel.reasoning = false;
+
+		const { resolver } = createResolver({
+			getAvailable: () => [nonReasoningModel, reasoningModel],
+		});
+
+		const currentModel = createMockModel("unknown", "some-model");
+		currentModel.reasoning = true;
+
+		const result = await resolver.findFallback(currentModel, "quota_exhausted");
+		assert.notEqual(result, null);
+		assert.equal(result!.model.provider, "openai");
+		assert.equal(result!.model.reasoning, true);
+	});
+
+	it("free selection excludes same provider", async () => {
+		const sameProviderModel = createMockModel("zai", "glm-5-other");
+		const differentProviderModel = createMockModel("alibaba", "glm-5");
+
+		const { resolver } = createResolver({
+			getAvailable: () => [sameProviderModel, differentProviderModel],
+		});
+
+		const result = await resolver.findFallback(zaiModel, "quota_exhausted");
+		assert.notEqual(result, null);
+		assert.equal(result!.model.provider, "alibaba");
 	});

 	it("skips providers that are not request-ready", async () => {
--- a/packages/pi-coding-agent/src/core/fallback-resolver.ts
+++ b/packages/pi-coding-agent/src/core/fallback-resolver.ts
@ -32,6 +32,9 @@ export class FallbackResolver {
 	 * Searches all chains for entries matching the current model's provider+id,
 	 * then returns the next available entry with lower priority (higher number).
 	 *
+	 * If no chain contains the current model, falls through to free selection:
+	 * picks any available model from the registry with a different provider.
+	 *
 	 * @returns FallbackResult if a fallback is available, null otherwise
 	 */
 	async findFallback(
@ -61,7 +64,9 @@ export class FallbackResolver {
 			if (wrapResult) return wrapResult;
 		}

-		return null;
+		// No chain contained the current model — fall through to free selection
+		// from any available model in the registry with a different provider.
+		return this._findAnyAvailableFallback(currentModel);
 	}

 	/**
@ -161,4 +166,43 @@ export class FallbackResolver {

 		return null;
 	}
+
+	/**
+	 * Free-selection fallback when no chain contains the current model.
+	 * Picks any available model from the registry with a different provider.
+	 * Prefers models with reasoning capability if the current model has it.
+	 */
+	private _findAnyAvailableFallback(
+		currentModel: Model<Api>,
+	): FallbackResult | null {
+		const allModels = this.modelRegistry.getAvailable();
+		const candidates = allModels.filter((m) => {
+			// Exclude same provider — credential rotation was already tried
+			if (m.provider === currentModel.provider) return false;
+			// Exclude exhausted providers
+			if (!this.authStorage.isProviderAvailable(m.provider)) return false;
+			// Exclude models without auth
+			if (!this.modelRegistry.isProviderRequestReady(m.provider)) return false;
+			return true;
+		});
+
+		if (candidates.length === 0) return null;
+
+		// Sort: prefer models with matching reasoning capability, then by context window
+		candidates.sort((a, b) => {
+			const aReasoningMatch = a.reasoning === currentModel.reasoning ? 1 : 0;
+			const bReasoningMatch = b.reasoning === currentModel.reasoning ? 1 : 0;
+			if (aReasoningMatch !== bReasoningMatch) {
+				return bReasoningMatch - aReasoningMatch;
+			}
+			return (b.contextWindow ?? 0) - (a.contextWindow ?? 0);
+		});
+
+		const chosen = candidates[0];
+		return {
+			model: chosen,
+			chainName: "free-selection",
+			reason: `free fallback to ${chosen.provider}/${chosen.id} (no chain configured)`,
+		};
+	}
 }
--- a/packages/pi-coding-agent/src/core/retry-handler.test.ts
+++ b/packages/pi-coding-agent/src/core/retry-handler.test.ts
@ -607,6 +607,79 @@ describe("RetryHandler — long-context entitlement 429 (#2803)", () => {
 		});
 	});

+	describe("quota wait before fallback", () => {
+		it("waits for retryAfterMs before retrying same provider on quota error", async () => {
+			const { deps, emittedEvents, continueFn } = createMockDeps({
+				model: createMockModel("google-gemini-cli", "gemini-2.5-pro"),
+				fallbackResult: null,
+				retrySettings: { maxRetries: 5, baseDelayMs: 1000, maxDelayMs: 60000 },
+			});
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage(
+				"You have exhausted your capacity on this model. Your quota will reset after 59s.",
+			);
+			(msg as any).retryAfterMs = 59000;
+
+			const result = await handler.handleRetryableError(msg);
+
+			// Should wait and retry, not immediately fail
+			assert.equal(result, true, "should wait and retry on quota reset");
+
+			const retryStart = emittedEvents.find(
+				(e) => e.type === "auto_retry_start",
+			);
+			assert.ok(retryStart, "should emit auto_retry_start with wait");
+			assert.equal(
+				retryStart!.delayMs,
+				59000,
+				"should use provider's retry-after delay",
+			);
+			assert.ok(
+				retryStart!.errorMessage.includes("waiting for quota reset"),
+				"error message should indicate quota wait",
+			);
+
+			// Should NOT have emitted fallback_chain_exhausted
+			const chainExhausted = emittedEvents.find(
+				(e) => e.type === "fallback_chain_exhausted",
+			);
+			assert.equal(
+				chainExhausted,
+				undefined,
+				"should not emit fallback_chain_exhausted when waiting for quota reset",
+			);
+		});
+
+		it("falls through to fallback when retryAfterMs exceeds maxDelayMs", async () => {
+			const fallbackModel = createMockModel("openai", "gpt-4o");
+			const { deps, emittedEvents } = createMockDeps({
+				model: createMockModel("google-gemini-cli", "gemini-2.5-pro"),
+				fallbackResult: {
+					model: fallbackModel,
+					reason: "cross-provider fallback",
+				},
+				retrySettings: { maxRetries: 5, baseDelayMs: 1000, maxDelayMs: 30000 },
+			});
+
+			const handler = new RetryHandler(deps);
+			const msg = errorMessage(
+				"You have exhausted your capacity on this model. Your quota will reset after 5m.",
+			);
+			(msg as any).retryAfterMs = 300000; // 5 minutes, exceeds maxDelayMs
+
+			const result = await handler.handleRetryableError(msg);
+
+			// Should fall through to fallback since wait is too long
+			assert.equal(result, true, "should fallback when quota reset is too long");
+
+			const switchEvent = emittedEvents.find(
+				(e) => e.type === "fallback_provider_switch",
+			);
+			assert.ok(switchEvent, "should emit fallback_provider_switch");
+		});
+	});
+
 	describe("quota_exhausted credential backoff (#3430)", () => {
 		it("does NOT call markUsageLimitReached for quota_exhausted errors", async () => {
 			// "Extra usage is required" is an account-level billing gate.
--- a/packages/pi-coding-agent/src/core/retry-handler.ts
+++ b/packages/pi-coding-agent/src/core/retry-handler.ts
@ -210,6 +210,48 @@ export class RetryHandler {
 			// quota errors, or auth errors (invalid/expired key — no point retrying).
 			const isAuthError = errorType === "auth_error";
 			if (isRateLimit || isQuotaError || isAuthError) {
+				// For quota errors with a retry-after hint, wait before switching providers.
+				// The quota may reset quickly (e.g. 59s), so waiting is often better than
+				// switching to a potentially worse model.
+				if (isQuotaError && message.retryAfterMs !== undefined && message.retryAfterMs > 0) {
+					const cap = settings.maxDelayMs > 0 ? settings.maxDelayMs : Infinity;
+					if (message.retryAfterMs <= cap) {
+						this._deps.emit({
+							type: "auto_retry_start",
+							attempt: this._retryAttempt + 1,
+							maxAttempts: settings.maxRetries,
+							delayMs: message.retryAfterMs,
+							errorMessage: `${message.errorMessage} (waiting for quota reset)`,
+						});
+
+						this._removeLastAssistantError();
+						this._retryAbortController = new AbortController();
+						try {
+							await sleep(message.retryAfterMs, this._retryAbortController.signal);
+						} catch {
+							// Aborted during sleep
+							if (retryGeneration !== this._retryGeneration) {
+								this._retryAbortController = undefined;
+								return false;
+							}
+							const attempt = this._retryAttempt;
+							this._retryAttempt = 0;
+							this._retryAbortController = undefined;
+							this._deps.emit({
+								type: "auto_retry_end",
+								success: false,
+								attempt,
+								finalError: "Retry cancelled",
+							});
+							this._resolveRetry();
+							return false;
+						}
+						this._retryAbortController = undefined;
+						this._scheduleContinue(retryGeneration);
+						return true;
+					}
+				}
+
 				const fallbackResult = await this._deps.fallbackResolver.findFallback(
 					this._deps.getModel()!,
 					errorType,
--- a/src/resources/extensions/sf/tests/verification-engine.test.ts
+++ b/src/resources/extensions/sf/tests/verification-engine.test.ts
@ -1,400 +0,0 @@
-import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
-import { tmpdir } from "node:os";
-import { join } from "node:path";
-import { describe, expect, test } from "vitest";
-import {
-	evaluateRunawayGuard,
-	resetRunawayGuardState,
-} from "../auto-runaway-guard.js";
-import { runCustomVerification } from "../custom-verification.js";
-import { getPriorSliceCompletionBlocker } from "../dispatch-guard.js";
-import { checkCrossTaskSignatures } from "../post-execution-checks.js";
-import {
-	extractPackageReferences,
-	normalizeFilePath,
-} from "../pre-execution-checks.js";
-import { formatFailureContext, isLikelyCommand } from "../verification-gate.js";
-
-// ─── Bug 1: formatFailureContext double-truncation ─────────────────────────
-
-describe("formatFailureContext", () => {
-	test("does not double-truncate a single check's stderr", () => {
-		const longStderr = "x".repeat(3000);
-		const result = {
-			passed: false,
-			checks: [
-				{
-					command: "npm test",
-					exitCode: 1,
-					stdout: "",
-					stderr: longStderr,
-					durationMs: 100,
-				},
-			],
-			discoverySource: "preference" as const,
-			timestamp: Date.now(),
-		};
-		const context = formatFailureContext(result);
-		// Per-check budget = 10000/1 = 10000, capped to MAX_STDERR_PER_CHECK=2000
-		expect(context).toContain("x".repeat(2000));
-		expect(context).toContain("…[truncated]");
-		// Must NOT contain the total-body truncation marker
-		expect(context).not.toContain("…[remaining failures truncated]");
-	});
-
-	test("fairly distributes budget across multiple failures", () => {
-		const stderrA = "a".repeat(3000);
-		const stderrB = "b".repeat(3000);
-		const result = {
-			passed: false,
-			checks: [
-				{
-					command: "npm run lint",
-					exitCode: 1,
-					stdout: "",
-					stderr: stderrA,
-					durationMs: 100,
-				},
-				{
-					command: "npm run test",
-					exitCode: 1,
-					stdout: "",
-					stderr: stderrB,
-					durationMs: 100,
-				},
-			],
-			discoverySource: "preference" as const,
-			timestamp: Date.now(),
-		};
-		const context = formatFailureContext(result);
-		// Per-check budget = 10000/2 = 5000, capped to 2000
-		expect(context).toContain("a".repeat(2000));
-		expect(context).toContain("b".repeat(2000));
-		expect(context).not.toContain("…[remaining failures truncated]");
-	});
-});
-
-// ─── Bug 2: checkCrossTaskSignatures partial-signature-drift ───────────────
-
-describe("checkCrossTaskSignatures", () => {
-	test("detects drift against ALL prior definitions, not just the most recent", () => {
-		const base = mkdtempSync(join(tmpdir(), "sf-sig-drift-"));
-		try {
-			// Create prior task files with different signatures for same function
-			mkdirSync(join(base, "first"), { recursive: true });
-			mkdirSync(join(base, "second"), { recursive: true });
-			mkdirSync(join(base, "current"), { recursive: true });
-
-			writeFileSync(
-				join(base, "first", "first.ts"),
-				"export function foo(a: string): void {}",
-			);
-			writeFileSync(
-				join(base, "second", "second.ts"),
-				"export function foo(a: string, b: number): void {}",
-			);
-			writeFileSync(
-				join(base, "current", "current.ts"),
-				"export function foo(a: string): number { return 1; }",
-			);
-
-			const taskRow = {
-				id: "T03",
-				key_files: ["current/current.ts"],
-			};
-			const priorTasks = [
-				{
-					id: "T01",
-					key_files: ["first/first.ts"],
-				},
-				{
-					id: "T02",
-					key_files: ["second/second.ts"],
-				},
-			];
-
-			const results = checkCrossTaskSignatures(taskRow, priorTasks, base);
-
-			// Should detect drift against BOTH prior definitions
-			const fooResults = results.filter((r) => r.target === "foo");
-			expect(fooResults.length).toBeGreaterThanOrEqual(2);
-
-			// Should report mismatch with T01 (params match but return type differs)
-			const returnTypeMismatches = fooResults.filter((r) =>
-				r.message.includes("returns"),
-			);
-			expect(returnTypeMismatches.length).toBeGreaterThanOrEqual(1);
-
-			// Should report mismatch with T02 (params differ)
-			const paramMismatches = fooResults.filter((r) =>
-				r.message.includes("parameters"),
-			);
-			expect(paramMismatches.length).toBeGreaterThanOrEqual(1);
-		} finally {
-			rmSync(base, { recursive: true, force: true });
-		}
-	});
-});
-
-// ─── Bug 3: dispatch-guard global-positional-fallback ──────────────────────
-
-describe("getPriorSliceCompletionBlocker", () => {
-	test("does not block zero-dep slice when earlier incomplete slice has unsatisfied deps", () => {
-		const base = mkdtempSync(join(tmpdir(), "sf-dispatch-"));
-		try {
-			// Create a milestone structure with:
-			// S01 depends on S02 (circular-ish, but S02 is incomplete)
-			// S03 has no dependencies
-			// When dispatching S03, it should NOT be blocked by S01's incompleteness
-			// because S01 is itself blocked by unsatisfied deps.
-			mkdirSync(join(base, ".sf", "milestones", "M001"), { recursive: true });
-			writeFileSync(
-				join(base, ".sf", "milestones", "M001", "ROADMAP.md"),
-				"# M001\n\n" +
-					"- [ ] S01 — first slice\n" +
-					"  - depends: S02\n" +
-					"- [ ] S02 — second slice\n" +
-					"- [ ] S03 — third slice\n",
-			);
-
-			const blocker = getPriorSliceCompletionBlocker(
-				base,
-				"main",
-				"execute-task",
-				"M001/S03/T01",
-			);
-			// S03 should NOT be blocked because S01 has unsatisfied deps (S02 incomplete)
-			expect(blocker).toBeNull();
-		} finally {
-			rmSync(base, { recursive: true, force: true });
-		}
-	});
-});
-
-// ─── Bug 4: extractPackageReferences stopword collision ────────────────────
-
-describe("extractPackageReferences", () => {
-	test("includes 'test' after -D flag", () => {
-		const packages = extractPackageReferences("npm install -D test");
-		expect(packages).toContain("test");
-	});
-
-	test("includes 'test' without flag when it looks like a package name", () => {
-		// "test" by itself is a stopword, but with package-name chars it should pass
-		const packages = extractPackageReferences("npm install test-utils");
-		expect(packages).toContain("test-utils");
-	});
-
-	test("stops on prose stopwords", () => {
-		const packages = extractPackageReferences("npm install and then run build");
-		// "and" is a stopword and should terminate parsing
-		expect(packages).not.toContain("then");
-		expect(packages).not.toContain("run");
-	});
-
-	test("includes scoped packages regardless of stopwords", () => {
-		const packages = extractPackageReferences("npm install @types/test");
-		expect(packages).toContain("@types/test");
-	});
-});
-
-// ─── Bug 5: runaway-guard token false-positive ─────────────────────────────
-
-describe("evaluateRunawayGuard", () => {
-	test("does not warn on token count alone when other signals are healthy", () => {
-		// Simulate a unit that has burned 1.2M tokens but only made 25 tool calls
-		// in 5 minutes with 1 changed file — this is healthy progress, not runaway.
-		resetRunawayGuardState("execute-task", "M001/S01/T01", {
-			sessionTokens: 100_000_000, // baseline: session already had 100M tokens
-			changedFiles: 0,
-			worktreeFingerprint: "abc123",
-		});
-
-		const config = {
-			enabled: true,
-			toolCallWarning: 60,
-			tokenWarning: 1_000_000,
-			elapsedMs: 20 * 60 * 1000,
-			changedFilesWarning: 75,
-			diagnosticTurns: 2,
-			hardPause: true,
-			minIntervalMs: 120_000,
-		};
-
-		const decision = evaluateRunawayGuard(
-			"execute-task",
-			"M001/S01/T01",
-			{
-				toolCalls: 25,
-				sessionTokens: 101_200_000, // delta = 1.2M tokens
-				elapsedMs: 5 * 60 * 1000, // 5 minutes
-				changedFiles: 1,
-				worktreeFingerprint: "def456",
-				topTools: { read: 10, edit: 5, bash: 5 },
-			},
-			config,
-		);
-
-		// Should NOT warn because token count alone is not enough —
-		// tool calls (25 < 60), elapsed (5min < 20min), changed files (1 < 75)
-		// are all well below thresholds.
-		expect(decision.action).toBe("none");
-	});
-
-	test("warns when token count is accompanied by a primary signal", () => {
-		resetRunawayGuardState("execute-task", "M001/S01/T01", {
-			sessionTokens: 100_000_000,
-			changedFiles: 0,
-			worktreeFingerprint: "abc123",
-		});
-
-		const config = {
-			enabled: true,
-			toolCallWarning: 60,
-			tokenWarning: 1_000_000,
-			elapsedMs: 20 * 60 * 1000,
-			changedFilesWarning: 75,
-			diagnosticTurns: 2,
-			hardPause: true,
-			minIntervalMs: 120_000,
-		};
-
-		const decision = evaluateRunawayGuard(
-			"execute-task",
-			"M001/S01/T01",
-			{
-				toolCalls: 65, // exceeds toolCallWarning (60)
-				sessionTokens: 101_200_000, // delta = 1.2M tokens
-				elapsedMs: 5 * 60 * 1000,
-				changedFiles: 1,
-				worktreeFingerprint: "def456",
-				topTools: { read: 30, edit: 20, bash: 15 },
-			},
-			config,
-		);
-
-		// Should warn because tool calls (65 >= 60) is a primary signal,
-		// and tokens (1.2M >= 1M) corroborate.
-		expect(decision.action).toBe("warn");
-	});
-
-	test("warns on no-progress heuristic regardless of primary signals", () => {
-		resetRunawayGuardState("execute-task", "M001/S01/T01", {
-			sessionTokens: 100_000_000,
-			changedFiles: 0,
-			worktreeFingerprint: "abc123",
-		});
-
-		const config = {
-			enabled: true,
-			toolCallWarning: 60,
-			tokenWarning: 1_000_000,
-			elapsedMs: 20 * 60 * 1000,
-			changedFilesWarning: 75,
-			diagnosticTurns: 2,
-			hardPause: true,
-			minIntervalMs: 120_000,
-		};
-
-		const decision = evaluateRunawayGuard(
-			"execute-task",
-			"M001/S01/T01",
-			{
-				toolCalls: 30, // exceeds EXECUTE_NO_PROGRESS_TOOL_WARNING (25)
-				sessionTokens: 100_600_000, // delta = 600K tokens
-				elapsedMs: 10 * 60 * 1000,
-				changedFiles: 0,
-				worktreeFingerprint: "abc123", // unchanged
-				topTools: { read: 15, edit: 10, bash: 5 },
-			},
-			config,
-		);
-
-		// Should warn because no-progress heuristic fires:
-		// 0 changed files, worktree unchanged, 30 tool calls, 600K tokens
-		expect(decision.action).toBe("warn");
-	});
-});
-
-// ─── Bug 6: isLikelyCommand prose guard gaps ───────────────────────────────
-
-describe("isLikelyCommand", () => {
-	test("treats single non-command token as prose", () => {
-		expect(isLikelyCommand("hello")).toBe(false);
-		expect(isLikelyCommand("world")).toBe(false);
-	});
-
-	test("treats single known command token as command", () => {
-		expect(isLikelyCommand("npm")).toBe(true);
-		expect(isLikelyCommand("node")).toBe(true);
-	});
-
-	test("treats single-letter first token with 2+ words as prose", () => {
-		expect(isLikelyCommand("a quick test")).toBe(false);
-		expect(isLikelyCommand("it works now")).toBe(false);
-		expect(isLikelyCommand("i am here")).toBe(false);
-	});
-
-	test("still treats short command-like strings as commands", () => {
-		expect(isLikelyCommand("npm test")).toBe(true);
-		expect(isLikelyCommand("./script.sh")).toBe(true);
-	});
-});
-
-// ─── Bug 7: custom-verification standalone shell operators ─────────────────
-
-describe("runCustomVerification shell-command policy", () => {
-	test("pauses on standalone pipe operator", () => {
-		const base = mkdtempSync(join(tmpdir(), "sf-cv-pipe-"));
-		try {
-			writeFileSync(
-				join(base, "DEFINITION.yaml"),
-				`steps:\n  - id: step-1\n    produces: []\n    verify:\n      policy: shell-command\n      command: echo hello | cat\n`,
-			);
-			const result = runCustomVerification(base, "step-1");
-			expect(result).toBe("pause");
-		} finally {
-			rmSync(base, { recursive: true, force: true });
-		}
-	});
-
-	test("pauses on background operator", () => {
-		const base = mkdtempSync(join(tmpdir(), "sf-cv-bg-"));
-		try {
-			writeFileSync(
-				join(base, "DEFINITION.yaml"),
-				`steps:\n  - id: step-1\n    produces: []\n    verify:\n      policy: shell-command\n      command: echo hello \u0026\n`,
-			);
-			const result = runCustomVerification(base, "step-1");
-			expect(result).toBe("pause");
-		} finally {
-			rmSync(base, { recursive: true, force: true });
-		}
-	});
-
-	test("allows safe command without pipe or background", () => {
-		const base = mkdtempSync(join(tmpdir(), "sf-cv-safe-"));
-		try {
-			writeFileSync(
-				join(base, "DEFINITION.yaml"),
-				`steps:\n  - id: step-1\n    produces: []\n    verify:\n      policy: shell-command\n      command: echo hello\n`,
-			);
-			const result = runCustomVerification(base, "step-1");
-			expect(result).toBe("continue");
-		} finally {
-			rmSync(base, { recursive: true, force: true });
-		}
-	});
-});
-
-// ─── Bug 8: normalizeFilePath JSDoc filesystem warning ─────────────────────
-
-describe("normalizeFilePath", () => {
-	test("does NOT make path safe for direct filesystem use", () => {
-		// The function normalizes for comparison only; traversal sequences remain
-		const normalized = normalizeFilePath("../../etc/passwd");
-		expect(normalized).toBe("../../etc/passwd");
-		// A consumer that passed this straight to readFileSync would escape
-		// the intended directory. The JSDoc warns about this explicitly.
-	});
-});