fix(sf): stabilize auto notices and package checks

2026-05-02 12:39:27 +02:00 · 2026-05-02 12:39:27 +02:00 · 85a0188fe1
commit 85a0188fe1
parent ed2c4af729
20 changed files with 697 additions and 178 deletions
--- a/docs/records/2026-05-02-pdd-v2-research.md
+++ b/docs/records/2026-05-02-pdd-v2-research.md
@ -2,6 +2,8 @@
 actionable: true
 kind: design-research
 date: 2026-05-02
+promoted: true
+promoted_to: M012
 ---

 # PDD v2 — Research Findings
--- a/package-lock.json
+++ b/package-lock.json
@ -14,13 +14,13 @@
        "studio"
      ],
      "dependencies": {
-        "@anthropic-ai/sdk": "^0.73.0",
+        "@anthropic-ai/sdk": "^0.92.0",
        "@anthropic-ai/vertex-sdk": "^0.14.4",
        "@aws-sdk/client-bedrock-runtime": "^3.983.0",
        "@clack/prompts": "^1.1.0",
        "@google/genai": "^1.40.0",
        "@mariozechner/jiti": "^2.6.2",
-        "@mistralai/mistralai": "^1.14.1",
+        "@mistralai/mistralai": "^2.2.1",
        "@modelcontextprotocol/sdk": "^1.27.1",
        "@octokit/rest": "^22.0.1",
        "@silvia-odwyer/photon-node": "^0.3.4",
@ -72,6 +72,7 @@
        "esbuild": "^0.27.4",
        "jiti": "^2.6.1",
        "typescript": "^5.4.0",
+        "typescript-language-server": "^5.1.3",
        "vitest": "^4.1.5"
      },
      "engines": {
@ -155,9 +156,9 @@
      }
    },
    "node_modules/@anthropic-ai/sdk": {
-      "version": "0.73.0",
-      "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.73.0.tgz",
-      "integrity": "sha512-URURVzhxXGJDGUGFunIOtBlSl7KWvZiAAKY/ttTkZAkXT9bTPqdk2eK0b8qqSxXpikh3QKPnPYpiyX98zf5ebw==",
+      "version": "0.92.0",
+      "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.92.0.tgz",
+      "integrity": "sha512-l653JFC83wCglH8H83t1xpgDurCyPyslYW1maPRdCsfuNuGbLvQjQ81sWd3Go3LWRm0jNspzAhuqAYV8r9joSw==",
      "license": "MIT",
      "dependencies": {
        "json-schema-to-ts": "^3.1.1"
@ -3915,13 +3916,14 @@
      }
    },
    "node_modules/@mistralai/mistralai": {
-      "version": "1.14.1",
-      "resolved": "https://registry.npmjs.org/@mistralai/mistralai/-/mistralai-1.14.1.tgz",
-      "integrity": "sha512-IiLmmZFCCTReQgPAT33r7KQ1nYo5JPdvGkrkZqA8qQ2qB1GHgs5LoP5K2ICyrjnpw2n8oSxMM/VP+liiKcGNlQ==",
+      "version": "2.2.1",
+      "resolved": "https://registry.npmjs.org/@mistralai/mistralai/-/mistralai-2.2.1.tgz",
+      "integrity": "sha512-uKU8CZmL2RzYKmplsU01hii4p3pe4HqJefpWNRWXm1Tcm0Sm4xXfwSLIy4k7ZCPlbETCGcp69E7hZs+WOJ5itQ==",
+      "license": "Apache-2.0",
      "dependencies": {
        "ws": "^8.18.0",
        "zod": "^3.25.0 || ^4.0.0",
-        "zod-to-json-schema": "^3.24.1"
+        "zod-to-json-schema": "^3.25.0"
      }
    },
    "node_modules/@modelcontextprotocol/sdk": {
@ -15498,6 +15500,19 @@
        "node": ">=14.17"
      }
    },
+    "node_modules/typescript-language-server": {
+      "version": "5.1.3",
+      "resolved": "https://registry.npmjs.org/typescript-language-server/-/typescript-language-server-5.1.3.tgz",
+      "integrity": "sha512-r+pAcYtWdN8tKlYZPwiiHNA2QPjXnI02NrW5Sf2cVM3TRtuQ3V9EKKwOxqwaQ0krsaEXk/CbN90I5erBuf84Vg==",
+      "dev": true,
+      "license": "Apache-2.0",
+      "bin": {
+        "typescript-language-server": "lib/cli.mjs"
+      },
+      "engines": {
+        "node": ">=20"
+      }
+    },
    "node_modules/uint8array-extras": {
      "version": "1.5.0",
      "resolved": "https://registry.npmjs.org/uint8array-extras/-/uint8array-extras-1.5.0.tgz",
@ -16305,7 +16320,7 @@
      "version": "2.75.0",
      "license": "MIT",
      "dependencies": {
-        "@anthropic-ai/sdk": "^0.52.0",
+        "@anthropic-ai/sdk": "^0.92.0",
        "@singularity-forge/rpc-client": "^2.75.0",
        "discord.js": "^14.25.1",
        "yaml": "^2.8.0",
@ -16322,15 +16337,6 @@
        "node": ">=24.15.0"
      }
    },
-    "packages/daemon/node_modules/@anthropic-ai/sdk": {
-      "version": "0.52.0",
-      "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.52.0.tgz",
-      "integrity": "sha512-d4c+fg+xy9e46c8+YnrrgIQR45CZlAi7PwdzIfDXDM6ACxEZli1/fxhURsq30ZpMZy6LvSkr41jGq5aF5TD7rQ==",
-      "license": "MIT",
-      "bin": {
-        "anthropic-ai-sdk": "bin/cli"
-      }
-    },
    "packages/daemon/node_modules/zod": {
      "version": "3.25.76",
      "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
@ -16387,12 +16393,12 @@
      "name": "@singularity-forge/pi-ai",
      "version": "2.75.0",
      "dependencies": {
-        "@anthropic-ai/sdk": "^0.73.0",
+        "@anthropic-ai/sdk": "^0.92.0",
        "@anthropic-ai/vertex-sdk": "^0.14.4",
        "@aws-sdk/client-bedrock-runtime": "^3.983.0",
        "@google/gemini-cli-core": "0.38.2",
        "@google/genai": "^1.40.0",
-        "@mistralai/mistralai": "^1.14.1",
+        "@mistralai/mistralai": "^2.2.1",
        "@sinclair/typebox": "^0.34.41",
        "ajv": "^8.17.1",
        "ajv-formats": "^3.0.1",
--- a/package.json
+++ b/package.json
@ -102,13 +102,13 @@
    "test:live-regression": "node --experimental-strip-types tests/live-regression/run.ts"
  },
  "dependencies": {
-    "@anthropic-ai/sdk": "^0.73.0",
+    "@anthropic-ai/sdk": "^0.92.0",
    "@anthropic-ai/vertex-sdk": "^0.14.4",
    "@aws-sdk/client-bedrock-runtime": "^3.983.0",
    "@clack/prompts": "^1.1.0",
    "@google/genai": "^1.40.0",
    "@mariozechner/jiti": "^2.6.2",
-    "@mistralai/mistralai": "^1.14.1",
+    "@mistralai/mistralai": "^2.2.1",
    "@modelcontextprotocol/sdk": "^1.27.1",
    "@octokit/rest": "^22.0.1",
    "@silvia-odwyer/photon-node": "^0.3.4",
@ -156,6 +156,7 @@
    "esbuild": "^0.27.4",
    "jiti": "^2.6.1",
    "typescript": "^5.4.0",
+    "typescript-language-server": "^5.1.3",
    "vitest": "^4.1.5"
  },
  "optionalDependencies": {
--- a/packages/daemon/package.json
+++ b/packages/daemon/package.json
@ -28,7 +28,7 @@
    "test": "node --test dist/daemon.test.js"
  },
  "dependencies": {
-    "@anthropic-ai/sdk": "^0.52.0",
+    "@anthropic-ai/sdk": "^0.92.0",
    "@singularity-forge/rpc-client": "^2.75.0",
    "discord.js": "^14.25.1",
    "yaml": "^2.8.0",
--- a/packages/pi-ai/package.json
+++ b/packages/pi-ai/package.json
@ -23,12 +23,12 @@
    "build": "tsc -p tsconfig.json"
  },
  "dependencies": {
-    "@anthropic-ai/sdk": "^0.73.0",
+    "@anthropic-ai/sdk": "^0.92.0",
    "@anthropic-ai/vertex-sdk": "^0.14.4",
    "@aws-sdk/client-bedrock-runtime": "^3.983.0",
    "@google/gemini-cli-core": "0.38.2",
    "@google/genai": "^1.40.0",
-    "@mistralai/mistralai": "^1.14.1",
+    "@mistralai/mistralai": "^2.2.1",
    "@sinclair/typebox": "^0.34.41",
    "ajv": "^8.17.1",
    "ajv-formats": "^3.0.1",
--- a/packages/pi-ai/src/providers/mistral.ts
+++ b/packages/pi-ai/src/providers/mistral.ts
@ -4,7 +4,7 @@ import type { Mistral } from "@mistralai/mistralai";
 import type { RequestOptions } from "@mistralai/mistralai/lib/sdks.js";
 import type {
 	ChatCompletionStreamRequest,
-	ChatCompletionStreamRequestMessages,
+	ChatCompletionStreamRequestMessage,
 	CompletionEvent,
 	ContentChunk,
 	FunctionTool,
@ -464,8 +464,8 @@ function toFunctionTools(tools: Tool[]): Array<FunctionTool & { type: "function"
 	}));
 }

-function toChatMessages(messages: Message[], supportsImages: boolean): ChatCompletionStreamRequestMessages[] {
-	const result: ChatCompletionStreamRequestMessages[] = [];
+function toChatMessages(messages: Message[], supportsImages: boolean): ChatCompletionStreamRequestMessage[] {
+	const result: ChatCompletionStreamRequestMessage[] = [];

 	for (const msg of messages) {
 		if (msg.role === "user") {
@ -520,7 +520,7 @@ function toChatMessages(messages: Message[], supportsImages: boolean): ChatCompl
 				});
 			}

-			const assistantMessage: ChatCompletionStreamRequestMessages = { role: "assistant" };
+			const assistantMessage: ChatCompletionStreamRequestMessage = { role: "assistant" };
 			if (contentParts.length > 0) assistantMessage.content = contentParts;
 			if (toolCalls.length > 0) assistantMessage.toolCalls = toolCalls;
 			if (contentParts.length > 0 || toolCalls.length > 0) result.push(assistantMessage);
--- a/packages/pi-coding-agent/src/core/lsp/lsp-integration.test.ts
+++ b/packages/pi-coding-agent/src/core/lsp/lsp-integration.test.ts
@ -8,12 +8,24 @@
 * Run: node --experimental-strip-types --test src/core/lsp/lsp-integration.test.ts
 * (from packages/pi-coding-agent/)
 */
-import { describe, test, beforeAll, afterAll } from 'vitest';
+
 import assert from "node:assert/strict";
-import { spawn } from "node:child_process";
+import { execSync, spawn } from "node:child_process";
 import * as fs from "node:fs";
-import * as path from "node:path";
 import * as os from "node:os";
+import * as path from "node:path";
+import { afterAll, beforeAll, describe, test } from "vitest";
+
+function hasTypeScriptLanguageServer(): boolean {
+	try {
+		execSync("npx which typescript-language-server", { stdio: "ignore" });
+		return true;
+	} catch {
+		return false;
+	}
+}
+
+const describeOrSkip = hasTypeScriptLanguageServer() ? describe : describe.skip;

 // ---------------------------------------------------------------------------
 // Helpers — lightweight JSON-RPC over stdio (no dependency on our LSP code)
@ -39,7 +51,9 @@ interface JsonRpcResponse {
 	error?: { code: number; message: string };
 }

-function encodeMessage(msg: JsonRpcRequest | JsonRpcNotification | JsonRpcResponse): string {
+function encodeMessage(
+	msg: JsonRpcRequest | JsonRpcNotification | JsonRpcResponse,
+): string {
 	const body = JSON.stringify(msg);
 	return `Content-Length: ${Buffer.byteLength(body, "utf-8")}\r\n\r\n${body}`;
 }
@ -51,7 +65,10 @@ class LspHarness {
 	private proc;
 	private nextId = 1;
 	private buffer = Buffer.alloc(0);
-	private pending = new Map<number, { resolve: (v: unknown) => void; reject: (e: Error) => void }>();
+	private pending = new Map<
+		number,
+		{ resolve: (v: unknown) => void; reject: (e: Error) => void }
+	>();
 	private notifications: Array<{ method: string; params: unknown }> = [];

 	constructor(command: string, args: string[], cwd: string) {
@ -65,7 +82,7 @@ class LspHarness {
 			this.drain();
 		});

-		this.proc.stderr!.on("data", (chunk: Buffer) => {
+		this.proc.stderr!.on("data", (_chunk: Buffer) => {
 			// Swallow stderr (server logs)
 		});
 	}
@ -84,16 +101,23 @@ class LspHarness {
 			const messageEnd = messageStart + contentLength;
 			if (this.buffer.length < messageEnd) return;

-			const body = this.buffer.subarray(messageStart, messageEnd).toString("utf-8");
+			const body = this.buffer
+				.subarray(messageStart, messageEnd)
+				.toString("utf-8");
 			this.buffer = Buffer.from(this.buffer.subarray(messageEnd));

-			const msg = JSON.parse(body) as JsonRpcResponse & { method?: string; params?: unknown };
+			const msg = JSON.parse(body) as JsonRpcResponse & {
+				method?: string;
+				params?: unknown;
+			};

 			if (msg.id !== undefined && this.pending.has(msg.id)) {
 				const p = this.pending.get(msg.id)!;
 				this.pending.delete(msg.id);
 				if (msg.error) {
-					p.reject(new Error(`LSP error ${msg.error.code}: ${msg.error.message}`));
+					p.reject(
+						new Error(`LSP error ${msg.error.code}: ${msg.error.message}`),
+					);
 				} else {
 					p.resolve(msg.result);
 				}
@ -127,7 +151,11 @@ class LspHarness {
 		this.proc.stdin!.write(encodeMessage(msg));
 	}

-	async request(method: string, params: unknown, timeoutMs = 15000): Promise<unknown> {
+	async request(
+		method: string,
+		params: unknown,
+		timeoutMs = 15000,
+	): Promise<unknown> {
 		const id = this.nextId++;
 		const msg: JsonRpcRequest = { jsonrpc: "2.0", id, method, params };
 		this.proc.stdin!.write(encodeMessage(msg));
@ -156,11 +184,27 @@ class LspHarness {
 		this.proc.stdin!.write(encodeMessage(msg));
 	}

-	getNotifications(method?: string): Array<{ method: string; params: unknown }> {
+	getNotifications(
+		method?: string,
+	): Array<{ method: string; params: unknown }> {
 		if (!method) return this.notifications;
 		return this.notifications.filter((n) => n.method === method);
 	}

+	async waitForNotification(
+		method: string,
+		predicate: (notification: { method: string; params: unknown }) => boolean,
+		timeoutMs = 10_000,
+	): Promise<{ method: string; params: unknown } | undefined> {
+		const startedAt = Date.now();
+		while (Date.now() - startedAt < timeoutMs) {
+			const found = this.getNotifications(method).find(predicate);
+			if (found) return found;
+			await new Promise((resolve) => setTimeout(resolve, 100));
+		}
+		return undefined;
+	}
+
 	async shutdown(): Promise<void> {
 		try {
 			await this.request("shutdown", null, 5000);
@ -255,7 +299,7 @@ function fileToUri(filePath: string): string {
 // Tests
 // ---------------------------------------------------------------------------

-describe("LSP integration: typescript-language-server", () => {
+describeOrSkip("LSP integration: typescript-language-server", () => {
 	let dir: string;
 	let cleanup: () => void;
 	let mainPath: string;
@ -293,8 +337,14 @@ describe("LSP integration: typescript-language-server", () => {

 		assert.ok(result, "initialize should return a result");
 		assert.ok(result.capabilities, "result should have capabilities");
-		assert.ok(result.capabilities.hoverProvider !== undefined, "should support hover");
-		assert.ok(result.capabilities.definitionProvider !== undefined, "should support definition");
+		assert.ok(
+			result.capabilities.hoverProvider !== undefined,
+			"should support hover",
+		);
+		assert.ok(
+			result.capabilities.definitionProvider !== undefined,
+			"should support definition",
+		);

 		lsp.notify("initialized", {});

@ -303,10 +353,20 @@ describe("LSP integration: typescript-language-server", () => {
 		const mathContent = fs.readFileSync(mathPath, "utf-8");

 		lsp.notify("textDocument/didOpen", {
-			textDocument: { uri: mainUri, languageId: "typescript", version: 1, text: mainContent },
+			textDocument: {
+				uri: mainUri,
+				languageId: "typescript",
+				version: 1,
+				text: mainContent,
+			},
 		});
 		lsp.notify("textDocument/didOpen", {
-			textDocument: { uri: mathUri, languageId: "typescript", version: 1, text: mathContent },
+			textDocument: {
+				uri: mathUri,
+				languageId: "typescript",
+				version: 1,
+				text: mathContent,
+			},
 		});

 		// Give the server time to index
@ -352,7 +412,10 @@ describe("LSP integration: typescript-language-server", () => {
 		// Response can be Location (uri) or LocationLink (targetUri)
 		const loc = locations[0] as Record<string, unknown>;
 		const uri = (loc.uri ?? loc.targetUri) as string;
-		assert.ok(uri, `definition should have uri or targetUri, got keys: ${Object.keys(loc).join(", ")}`);
+		assert.ok(
+			uri,
+			`definition should have uri or targetUri, got keys: ${Object.keys(loc).join(", ")}`,
+		);
 		assert.ok(
 			uri.includes("math.ts"),
 			`definition should point to math.ts, got: ${uri}`,
@ -368,7 +431,10 @@ describe("LSP integration: typescript-language-server", () => {
 		})) as Array<{ uri: string; range: unknown }> | null;

 		assert.ok(result, "references should return a result");
-		assert.ok(result.length >= 2, `should find at least 2 references (decl + usage), got ${result.length}`);
+		assert.ok(
+			result.length >= 2,
+			`should find at least 2 references (decl + usage), got ${result.length}`,
+		);
 	});

 	// ---- Document Symbols ----
@ -378,27 +444,47 @@ describe("LSP integration: typescript-language-server", () => {
 		})) as Array<{ name: string; kind: number }> | null;

 		assert.ok(result, "documentSymbol should return a result");
-		assert.ok(result.length >= 2, `should find at least 2 symbols, got ${result.length}`);
+		assert.ok(
+			result.length >= 2,
+			`should find at least 2 symbols, got ${result.length}`,
+		);
 		const names = result.map((s) => s.name);
-		assert.ok(names.includes("add"), `symbols should include 'add', got: ${names.join(", ")}`);
-		assert.ok(names.includes("subtract"), `symbols should include 'subtract', got: ${names.join(", ")}`);
+		assert.ok(
+			names.includes("add"),
+			`symbols should include 'add', got: ${names.join(", ")}`,
+		);
+		assert.ok(
+			names.includes("subtract"),
+			`symbols should include 'subtract', got: ${names.join(", ")}`,
+		);
 	});

 	// ---- Diagnostics (published via notification) ----
 	test("diagnostics for type error", async () => {
-		// Wait a bit more for diagnostics to arrive
-		await new Promise((r) => setTimeout(r, 2000));
+		const mainContent = fs.readFileSync(mainPath, "utf-8");
+		lsp.notify("textDocument/didChange", {
+			textDocument: { uri: mainUri, version: 2 },
+			contentChanges: [{ text: mainContent }],
+		});

-		const diagNotifications = lsp.getNotifications("textDocument/publishDiagnostics");
-		const mainDiags = diagNotifications.filter(
-			(n) => (n.params as { uri: string }).uri === mainUri,
+		const mainDiagNotification = await lsp.waitForNotification(
+			"textDocument/publishDiagnostics",
+			(n) => {
+				const params = n.params as {
+					uri: string;
+					diagnostics?: Array<{ message: string; range: unknown }>;
+				};
+				return params.uri === mainUri && (params.diagnostics?.length ?? 0) > 0;
+			},
 		);

-		assert.ok(mainDiags.length > 0, "should receive diagnostics for main.ts");
+		assert.ok(mainDiagNotification, "should receive diagnostics for main.ts");

-		const lastDiag = mainDiags[mainDiags.length - 1];
-		const diagnostics = (lastDiag.params as { diagnostics: Array<{ message: string; range: unknown }> })
-			.diagnostics;
+		const diagnostics = (
+			mainDiagNotification.params as {
+				diagnostics: Array<{ message: string; range: unknown }>;
+			}
+		).diagnostics;

 		// Should catch the type error: string assigned to number
 		const typeError = diagnostics.find(
--- a/src/loader.ts
+++ b/src/loader.ts
@ -7,6 +7,7 @@ import {
 	symlinkSync,
 } from "node:fs";
 import { delimiter, join, relative, resolve } from "node:path";
+
 // SF Startup Loader
 // Copyright (c) 2026 Singularity Forge

@ -68,6 +69,18 @@ if (firstArg === "--help" || firstArg === "-h") {
 	process.exit(0);
 }

+if (
+	firstArg &&
+	firstArg !== "--" &&
+	args.slice(1).some((arg) => arg === "--help" || arg === "-h")
+) {
+	const { printHelp, printSubcommandHelp } = await import("./help-text.js");
+	if (!printSubcommandHelp(firstArg, sfVersion)) {
+		printHelp(sfVersion);
+	}
+	process.exit(0);
+}
+
 // Fast-path invalid headless invocations before importing cli.ts. These paths
 // are commonly used by smoke tests and orchestrators; they should return a
 // clear diagnostic without paying extension/resource startup cost.
--- a/src/resources/extensions/browser-tools/tests/browser-tools-integration.test.mjs
+++ b/src/resources/extensions/browser-tools/tests/browser-tools-integration.test.mjs
@ -12,7 +12,22 @@
 import assert from "node:assert/strict";
 import { readFileSync } from "node:fs";
 import { dirname, resolve } from "node:path";
-import { after, afterAll, before, beforeAll, describe, it } from 'vitest';
+import { afterAll, beforeAll, describe, it } from "vitest";
+
+// Skip the entire suite if Playwright Chromium cannot launch (missing system
+// libraries or browser binaries in this environment).
+let canLaunchChromium = false;
+try {
+	const { chromium } = await import("playwright");
+	const testBrowser = await chromium.launch({ headless: true });
+	await testBrowser.close();
+	canLaunchChromium = true;
+} catch {
+	canLaunchChromium = false;
+}
+
+const describeOrSkip = canLaunchChromium ? describe : describe.skip;
+
 import { fileURLToPath } from "node:url";
 import { chromium } from "playwright";

@ -132,7 +147,7 @@ async function injectHelpers() {
 // 1. window.__pi utility tests
 // =========================================================================

-describe("window.__pi utilities", () => {
+describeOrSkip("window.__pi utilities", () => {
 	it("simpleHash — deterministic output for same input", async () => {
 		await page.setContent("<p>test</p>");
 		await injectHelpers();
@ -408,7 +423,7 @@ describe("window.__pi utilities", () => {
 // 2. Intent scoring tests
 // =========================================================================

-describe("intent scoring", () => {
+describeOrSkip("intent scoring", () => {
 	it("submit_form — submit button inside form scores higher than outside", async () => {
 		await page.setContent(`
      <form>
@ -585,7 +600,7 @@ describe("intent scoring", () => {
 // 3. Form analysis tests
 // =========================================================================

-describe("form analysis", () => {
+describeOrSkip("form analysis", () => {
 	const COMPLEX_FORM = `
    <form id="testform" action="/submit">
      <!-- label[for] association -->
--- a/src/resources/extensions/sf/auto-post-unit.ts
+++ b/src/resources/extensions/sf/auto-post-unit.ts
@ -17,7 +17,6 @@ import type {
 } from "@singularity-forge/pi-coding-agent";
 import { detectAbandonMilestone } from "./abandon-detect.js";
 import type { AutoSession, SidecarItem } from "./auto/session.js";
-import { isDeterministicPolicyError } from "./auto-tool-tracking.js";
 import { resolveExpectedArtifactPath as resolveArtifactForContent } from "./auto-artifact-paths.js";
 import {
 	diagnoseExpectedArtifact,
@ -25,6 +24,7 @@ import {
 	verifyExpectedArtifact,
 	writeBlockerPlaceholder,
 } from "./auto-recovery.js";
+import { isDeterministicPolicyError } from "./auto-tool-tracking.js";
 import { type CloseoutOptions, closeoutUnit } from "./auto-unit-closeout.js";
 import { runSafely } from "./auto-utils.js";
 import { syncStateToProjectRoot } from "./auto-worktree.js";
@ -67,13 +67,16 @@ import {
 } from "./pre-execution-checks.js";
 import { loadEffectiveSFPreferences } from "./preferences.js";
 import { loadPrompt } from "./prompt-loader.js";
-import { recordSelfFeedback } from "./self-feedback.js";
 // crossReferenceEvidence available for future use when verification_evidence is stored in DB
 // import { crossReferenceEvidence, type ClaimedEvidence } from "./safety/evidence-cross-ref.js";
 import { validateContent } from "./safety/content-validator.js";
-import { clearEvidenceFromDisk, getEvidence } from "./safety/evidence-collector.js";
+import {
+	clearEvidenceFromDisk,
+	getEvidence,
+} from "./safety/evidence-collector.js";
 import { validateFileChanges } from "./safety/file-change-validator.js";
 import { resolveSafetyHarnessConfig } from "./safety/safety-harness.js";
+import { recordSelfFeedback } from "./self-feedback.js";
 import { consumeSignal } from "./session-status-io.js";
 import {
 	_getAdapter,
@ -87,10 +90,10 @@ import {
 } from "./sf-db.js";
 import { deriveState } from "./state.js";
 import { parseUnitId } from "./unit-id.js";
-import { isAwaitingUserInput } from "./user-input-boundary.js";
 import { resolveUokFlags } from "./uok/flags.js";
 import { UokGateRunner } from "./uok/gate-runner.js";
 import { writeTurnGitTransaction } from "./uok/gitops.js";
+import { isAwaitingUserInput } from "./user-input-boundary.js";
 import { writePreExecutionEvidence } from "./verification-evidence.js";
 import { logError, logWarning } from "./workflow-logger.js";
 import { regenerateIfMissing } from "./workflow-projections.js";
@ -1073,6 +1076,11 @@ export async function postUnitPreVerification(
 									ctx.ui.notify(
 										`Safety: ${warnings.length} unexpected file change(s) outside task plan`,
 										"warning",
+										{
+											kind: "progress",
+											source: "safety",
+											dedupe_key: `safety:file-change:${s.currentUnit.id}`,
+										},
 									);
 								}
 							}
@ -1113,6 +1121,11 @@ export async function postUnitPreVerification(
 								ctx.ui.notify(
 									`Safety: task ${sTid} has verification commands but no bash calls were recorded`,
 									"warning",
+									{
+										kind: "progress",
+										source: "safety",
+										dedupe_key: `safety:evidence:${s.currentUnit.id}`,
+									},
 								);
 							}
 						}
@ -1138,7 +1151,11 @@ export async function postUnitPreVerification(
 						);
 						for (const v of contentViolations) {
 							logWarning("safety", `content: ${v.reason}`);
-							ctx.ui.notify(`Content validation: ${v.reason}`, "warning");
+							ctx.ui.notify(`Content validation: ${v.reason}`, "warning", {
+								kind: "progress",
+								source: "safety",
+								dedupe_key: `safety:content:${s.currentUnit.id}:${v.reason}`,
+							});
 						}
 					} catch (e) {
 						debugLog("postUnit", {
@ -1285,7 +1302,12 @@ export async function postUnitPreVerification(
 				s.lastToolInvocationError = null;
 				s.pendingVerificationRetry = null;
 				s.verificationRetryCount.delete(retryKey);
-				writeBlockerPlaceholder(s.currentUnit.type, s.currentUnit.id, s.basePath, reason);
+				writeBlockerPlaceholder(
+					s.currentUnit.type,
+					s.currentUnit.id,
+					s.basePath,
+					reason,
+				);
 				ctx.ui.notify(
 					`${s.currentUnit.type} ${s.currentUnit.id} — deterministic policy rejection, wrote blocker placeholder (no retries) (#4973)`,
 					"warning",
--- a/src/resources/extensions/sf/auto/phases.ts
+++ b/src/resources/extensions/sf/auto/phases.ts
@ -21,13 +21,11 @@ import {
 import { atomicWriteSync } from "../atomic-write.js";
 import { resetCompletionNudgeState } from "../auto-completion-nudge.js";
 import {
-	USER_DRIVEN_DEEP_UNITS,
 	isAwaitingUserInput,
 	type PostUnitContext,
 	type PreVerificationOpts,
+	USER_DRIVEN_DEEP_UNITS,
 } from "../auto-post-unit.js";
-import { pauseAutoForProviderError } from "../provider-error-pause.js";
-import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.js";
 import {
 	buildLoopRemediationSteps,
 	diagnoseExpectedArtifact,
@ -43,23 +41,23 @@ import {
 	formatToolCallSummary,
 	resetToolCallCounts,
 } from "../auto-tool-tracking.js";
+import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.js";
 import { debugLog } from "../debug-logger.js";
 import { PROJECT_FILES } from "../detection.js";
 import { MergeConflictError } from "../git-service.js";
 import { recordLearnedOutcome } from "../learning/runtime.js";
-import {
-	resolveMilestoneFile,
-	resolveSliceFile,
-	sfRoot,
-} from "../paths.js";
+import { resolveMilestoneFile, resolveSliceFile, sfRoot } from "../paths.js";
 import { resolvePersistModelChanges } from "../preferences.js";
 import {
 	approveProductionMutationWithLlmPolicy,
 	ensureProductionMutationApprovalTemplate,
 	readProductionMutationApprovalStatus,
 } from "../production-mutation-approval.js";
-import { loadEvidenceFromDisk, resetEvidence } from "../safety/evidence-collector.js";
-import { parseUnitId } from "../unit-id.js";
+import { pauseAutoForProviderError } from "../provider-error-pause.js";
+import {
+	loadEvidenceFromDisk,
+	resetEvidence,
+} from "../safety/evidence-collector.js";
 import { getDirtyFiles } from "../safety/file-change-validator.js";
 import {
 	cleanupCheckpoint,
@ -67,10 +65,20 @@ import {
 	rollbackToCheckpoint,
 } from "../safety/git-checkpoint.js";
 import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
-import { getMilestoneSlices, getSliceTaskCounts, getTask, isDbAvailable } from "../sf-db.js";
+import {
+	getMilestoneSlices,
+	getSliceTaskCounts,
+	getTask,
+	isDbAvailable,
+} from "../sf-db.js";
 import { getEligibleSlices } from "../slice-parallel-eligibility.js";
 import { startSliceParallel } from "../slice-parallel-orchestrator.js";
+import {
+	handleProductAudit,
+	type ProductAuditParams,
+} from "../tools/product-audit-tool.js";
 import type { Phase } from "../types.js";
+import { parseUnitId } from "../unit-id.js";
 import { writeUnitRuntimeRecord } from "../unit-runtime.js";
 import { resolveUokFlags } from "../uok/flags.js";
 import { UokGateRunner } from "../uok/gate-runner.js";
@ -88,10 +96,6 @@ import {
 	logError,
 	logWarning,
 } from "../workflow-logger.js";
-import {
-	handleProductAudit,
-	type ProductAuditParams,
-} from "../tools/product-audit-tool.js";
 import {
 	getRequiredWorkflowToolsForAutoUnit,
 	getWorkflowTransportSupportError,
@ -596,7 +600,11 @@ export async function runPreDispatch(

 	// Derive state
 	let state = await deps.deriveState(s.basePath);
-	if (uokFlags.planningFlow && isDbAvailable() && shouldRunPlanningFlowGate(state.phase)) {
+	if (
+		uokFlags.planningFlow &&
+		isDbAvailable() &&
+		shouldRunPlanningFlowGate(state.phase)
+	) {
 		let compiled = ensurePlanningFlowGraph(s.basePath, state);
 		// Empty-graph recovery: stale DB caches can yield 0 nodes right after a
 		// task-complete write. Invalidate caches, re-derive state, and retry once.
@ -1208,8 +1216,7 @@ export async function runDispatch(
 	const derivedKey = `${unitType}/${unitId}`;

 	const hasTransientTaskCompleteFailure =
-		unitType === "execute-task" &&
-		!!s.pendingTaskCompleteFailures?.has(unitId);
+		unitType === "execute-task" && !!s.pendingTaskCompleteFailures?.has(unitId);

 	if (!s.pendingVerificationRetry && !hasTransientTaskCompleteFailure) {
 		loopState.recentUnits.push({ key: derivedKey });
@ -1276,7 +1283,7 @@ export async function runDispatch(
 					(diagnostic?.length ?? 0) > MAX_RECOVERY_CHARS
 						? diagnostic!.slice(0, MAX_RECOVERY_CHARS) +
 							"\n\n[...diagnostic truncated]"
-						: diagnostic ?? null;
+						: (diagnostic ?? null);
 				s.pendingRethinkAttempt = JSON.stringify({
 					attempt,
 					reason: stuckSignal.reason,
@ -1286,9 +1293,10 @@ export async function runDispatch(
 					unitType,
 					unitId,
 				});
-				const rt = attempt === 5
-					? "**FINAL STUCK ATTEMPT — 5 of 5.** "
-					: `**STUCK RECOVERY ATTEMPT ${attempt - 1} of 4.** `;
+				const rt =
+					attempt === 5
+						? "**FINAL STUCK ATTEMPT — 5 of 5.** "
+						: `**STUCK RECOVERY ATTEMPT ${attempt - 1} of 4.** `;
 				ctx.ui.notify(
 					`${rt}Stuck on ${unitType} ${unitId} (${stuckSignal.reason}). Injecting diagnostic and retrying.`,
 					"warning",
@ -1677,12 +1685,7 @@ export async function runGuards(
 	// FailureClass "input" → 0 retries (broken plan needs human fix, not
 	// an LLM retry). Only fires when uok.gates.enabled is true.
 	const uokFlagsGuards = resolveUokFlags(prefs);
-	if (
-		uokFlagsGuards.gates &&
-		unitType === "execute-task" &&
-		mid &&
-		sliceId
-	) {
+	if (uokFlagsGuards.gates && unitType === "execute-task" && mid && sliceId) {
 		const taskCounts = getSliceTaskCounts(mid, sliceId);
 		const isFirstTaskForSlice = taskCounts.done === 0;
 		if (isFirstTaskForSlice) {
@ -1814,7 +1817,9 @@ export async function runUnitPhase(
 	iterData: IterationData,
 	loopState: LoopState,
 	sidecarItem?: SidecarItem,
-): Promise<PhaseResult<{ unitStartedAt: number; requestDispatchedAt?: number }>> {
+): Promise<
+	PhaseResult<{ unitStartedAt: number; requestDispatchedAt?: number }>
+> {
 	const { ctx, pi, s, deps, prefs } = ic;
 	const { unitType, unitId, prompt, state, mid } = iterData;

@ -2074,7 +2079,10 @@ export async function runUnitPhase(
 				lines.push("", `**Suggested remediation:**\n${rethinkCtx.remediation}`);
 			}
 			if (rethinkCtx.diagnostic) {
-				lines.push("", `**Full diagnostic from previous attempt:**\n${rethinkCtx.diagnostic}`);
+				lines.push(
+					"",
+					`**Full diagnostic from previous attempt:**\n${rethinkCtx.diagnostic}`,
+				);
 			}
 			lines.push("", "---", "", finalPrompt);
 			finalPrompt = lines.join("\n");
@ -2320,13 +2328,16 @@ export async function runUnitPhase(
 		) {
 			// Session-timeout cancellations are resumable pauses: pauseAuto below preserves the auto session
 			// instead of routing the cancelled unit into the hard-stop path.
-			const isSessionCreationTimeout = unitResult.errorContext.message?.includes("Session creation timed out");
+			const isSessionCreationTimeout =
+				unitResult.errorContext.message?.includes("Session creation timed out");

 			if (isSessionCreationTimeout) {
 				consecutiveSessionTimeouts += 1;
 				const baseRetryAfterMs = 30_000;
-				const retryAfterMs = baseRetryAfterMs * 2 ** Math.max(0, consecutiveSessionTimeouts - 1);
-				const allowAutoResume = consecutiveSessionTimeouts <= MAX_SESSION_TIMEOUT_AUTO_RESUMES;
+				const retryAfterMs =
+					baseRetryAfterMs * 2 ** Math.max(0, consecutiveSessionTimeouts - 1);
+				const allowAutoResume =
+					consecutiveSessionTimeouts <= MAX_SESSION_TIMEOUT_AUTO_RESUMES;

 				if (!allowAutoResume) {
 					ctx.ui.notify(
@ -2356,7 +2367,8 @@ export async function runUnitPhase(
 						resume: allowAutoResume
 							? () => {
 									void resumeAutoAfterProviderDelay(pi, ctx).catch((err) => {
-										const message = err instanceof Error ? err.message : String(err);
+										const message =
+											err instanceof Error ? err.message : String(err);
 										ctx.ui.notify(
 											`Session timeout recovery failed: ${message}`,
 											"error",
@ -2369,7 +2381,13 @@ export async function runUnitPhase(
 				if (!allowAutoResume) {
 					resetConsecutiveSessionTimeouts();
 				}
-				await emitCancelledUnitEnd(ic, unitType, unitId, unitStartSeq, unitResult.errorContext);
+				await emitCancelledUnitEnd(
+					ic,
+					unitType,
+					unitId,
+					unitStartSeq,
+					unitResult.errorContext,
+				);
 				return { action: "break", reason: "session-timeout" };
 			}

@ -2378,7 +2396,11 @@ export async function runUnitPhase(
 				`Unit timed out for ${unitType} ${unitId} (supervision may have failed). Pausing auto-mode.`,
 				"warning",
 			);
-			debugLog("autoLoop", { phase: "unit-hard-timeout-pause", unitType, unitId });
+			debugLog("autoLoop", {
+				phase: "unit-hard-timeout-pause",
+				unitType,
+				unitId,
+			});
 			await deps.pauseAuto(ctx, pi);
 			await emitCancelledUnitEnd(
 				ic,
@ -2468,7 +2490,10 @@ export async function runUnitPhase(
 						u.startedAt === s.currentUnit?.startedAt,
 				);
 			if (lastUnit && lastUnit.toolCalls === 0) {
-				if (USER_DRIVEN_DEEP_UNITS.has(unitType) && isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined)) {
+				if (
+					USER_DRIVEN_DEEP_UNITS.has(unitType) &&
+					isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined)
+				) {
 					debugLog("runUnitPhase", {
 						phase: "zero-tool-calls-awaiting-user-input",
 						unitType,
@ -2500,7 +2525,10 @@ export async function runUnitPhase(
 					// and re-dispatch this unit.
 					return {
 						action: "next",
-						data: { unitStartedAt: s.currentUnit?.startedAt, requestDispatchedAt: unitResult.requestDispatchedAt },
+						data: {
+							unitStartedAt: s.currentUnit?.startedAt,
+							requestDispatchedAt: unitResult.requestDispatchedAt,
+						},
 					};
 				}
 			}
@ -2517,7 +2545,10 @@ export async function runUnitPhase(

 	const skipArtifactVerification = shouldSkipArtifactVerification(unitType);
 	let artifactVerified: boolean;
-	if (USER_DRIVEN_DEEP_UNITS.has(unitType) && isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined)) {
+	if (
+		USER_DRIVEN_DEEP_UNITS.has(unitType) &&
+		isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined)
+	) {
 		// Skip artifact verification — unit is paused waiting for user input
 		artifactVerified = false;
 	} else {
@ -2688,7 +2719,13 @@ export async function runUnitPhase(
 	}
 	s.preUnitDirtyFiles = [];

-	return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt, requestDispatchedAt: unitResult.requestDispatchedAt } };
+	return {
+		action: "next",
+		data: {
+			unitStartedAt: s.currentUnit?.startedAt,
+			requestDispatchedAt: unitResult.requestDispatchedAt,
+		},
+	};
 }

 // ─── runFinalize ──────────────────────────────────────────────────────────────
@ -2734,8 +2771,15 @@ export async function runFinalize(
 	// Sidecar items use lightweight pre-verification opts
 	const preVerificationOpts: PreVerificationOpts = sidecarItem
 		? sidecarItem.kind === "hook"
-			? { skipSettleDelay: true, skipWorktreeSync: true, agentEndMessages: s.lastUnitAgentEndMessages ?? undefined }
-			: { skipSettleDelay: true, agentEndMessages: s.lastUnitAgentEndMessages ?? undefined }
+			? {
+					skipSettleDelay: true,
+					skipWorktreeSync: true,
+					agentEndMessages: s.lastUnitAgentEndMessages ?? undefined,
+				}
+			: {
+					skipSettleDelay: true,
+					agentEndMessages: s.lastUnitAgentEndMessages ?? undefined,
+				}
 		: { agentEndMessages: s.lastUnitAgentEndMessages ?? undefined };
 	const _preUnitSnapshot = s.currentUnit
 		? {
@ -3079,7 +3123,11 @@ export async function runFinalize(
 			const severity = logs.some((e) => e.severity === "error")
 				? "error"
 				: "warning";
-			ctx.ui.notify(formatForNotification(logs), severity);
+			ctx.ui.notify(formatForNotification(logs), severity, {
+				kind: severity === "error" ? "notice" : "progress",
+				source: "workflow-logger",
+				dedupe_key: `workflow-issues:${iterData.unitType}:${iterData.unitId}`,
+			});
 		}
 	}

--- a/src/resources/extensions/sf/model-router.ts
+++ b/src/resources/extensions/sf/model-router.ts
@ -83,12 +83,15 @@ export const MODEL_CAPABILITY_TIER: Record<string, ComplexityTier> = {
 	"gpt-5.3-codex-spark": "light",
 	"gemini-2.0-flash": "light",
 	"gemini-flash-2.0": "light",
+	"gemini-3.1-flash-lite-preview": "light",
+	"gemini-2.5-flash-lite": "light",
 	"glm-4.7-flash": "light",
 	"glm-4.7-flashx": "light",
 	"ministral-3b-latest": "light",
 	"ministral-8b-latest": "light",
 	"devstral-small-2505": "light",
 	"devstral-small-2507": "light",
+	"labs-devstral-small-2512": "light",

 	// Standard-tier models
 	"claude-sonnet-4-6": "standard",
@ -98,8 +101,16 @@ export const MODEL_CAPABILITY_TIER: Record<string, ComplexityTier> = {
 	"gpt-4.1": "standard",
 	"gpt-5.1-codex-max": "standard",
 	"gemini-2.5-pro": "standard",
+	"gemini-3-flash-preview": "standard",
+	"gemini-2.5-flash": "standard",
 	"deepseek-chat": "standard",
 	"glm-4.7": "standard",
+	"qwen3-coder:480b": "standard",
+	"qwen3-coder-next": "standard",
+	"kimi-k2.6": "standard",
+	"kimi-for-coding": "standard",
+	"MiniMax-M2.7": "standard",
+	"MiniMax-M2.7-highspeed": "standard",
 	"codestral-latest": "standard",
 	"devstral-2512": "standard",
 	"devstral-medium-2507": "standard",
@ -131,6 +142,10 @@ export const MODEL_CAPABILITY_TIER: Record<string, ComplexityTier> = {
 	o3: "heavy",
 	"o4-mini": "heavy",
 	"o4-mini-deep-research": "heavy",
+	"gemini-3.1-pro-preview": "heavy",
+	"gemini-3-pro-preview": "heavy",
+	"kimi-k2-thinking": "heavy",
+	"qwen3-next:80b": "heavy",
 	"glm-5": "heavy",
 	"glm-5-turbo": "heavy",
 	"glm-5.1": "heavy",
@ -176,6 +191,12 @@ const MODEL_COST_PER_1K_INPUT: Record<string, number> = {
 	"o4-mini-deep-research": 0.005,
 	"gemini-2.0-flash": 0.0001,
 	"gemini-2.5-pro": 0.00125,
+	"gemini-3.1-pro-preview": 0.00125,
+	"gemini-3.1-flash-lite-preview": 0.0001,
+	"gemini-3-pro-preview": 0.00125,
+	"gemini-3-flash-preview": 0.0001,
+	"gemini-2.5-flash": 0.0001,
+	"gemini-2.5-flash-lite": 0.00005,
 	"deepseek-chat": 0.00014,
 	"glm-4.7": 0.0006,
 	"glm-4.7-flash": 0,
@ -184,12 +205,21 @@ const MODEL_COST_PER_1K_INPUT: Record<string, number> = {
 	"glm-5-turbo": 0.0012,
 	"glm-5.1": 0.0014,
 	"glm-5v-turbo": 0.0012,
+	"qwen3-coder:480b": 0.0004,
+	"qwen3-coder-next": 0.0004,
+	"qwen3-next:80b": 0.0002,
+	"kimi-k2.6": 0.0006,
+	"kimi-for-coding": 0.0006,
+	"kimi-k2-thinking": 0.001,
+	"MiniMax-M2.7": 0.0006,
+	"MiniMax-M2.7-highspeed": 0.0006,
 	"codestral-latest": 0.0003,
 	"devstral-2512": 0.0004,
 	"devstral-medium-2507": 0.0004,
 	"devstral-medium-latest": 0.0004,
 	"devstral-small-2505": 0.0001,
 	"devstral-small-2507": 0.0001,
+	"labs-devstral-small-2512": 0.0001,
 	"magistral-medium-latest": 0.002,
 	"magistral-small": 0.0005,
 	"ministral-3b-latest": 0.00004,
@ -523,6 +553,60 @@ export const MODEL_CAPABILITY_PROFILES: Record<string, ModelCapabilities> = {
 		longContext: 90,
 		instruction: 75,
 	},
+	"gemini-3.1-pro-preview": {
+		coding: 82,
+		debugging: 78,
+		research: 92,
+		reasoning: 84,
+		speed: 48,
+		longContext: 98,
+		instruction: 82,
+	},
+	"gemini-3-pro-preview": {
+		coding: 82,
+		debugging: 78,
+		research: 90,
+		reasoning: 84,
+		speed: 50,
+		longContext: 96,
+		instruction: 82,
+	},
+	"gemini-3-flash-preview": {
+		coding: 62,
+		debugging: 55,
+		research: 70,
+		reasoning: 60,
+		speed: 88,
+		longContext: 88,
+		instruction: 72,
+	},
+	"gemini-3.1-flash-lite-preview": {
+		coding: 55,
+		debugging: 48,
+		research: 62,
+		reasoning: 52,
+		speed: 96,
+		longContext: 85,
+		instruction: 68,
+	},
+	"gemini-2.5-flash": {
+		coding: 60,
+		debugging: 52,
+		research: 68,
+		reasoning: 58,
+		speed: 92,
+		longContext: 85,
+		instruction: 70,
+	},
+	"gemini-2.5-flash-lite": {
+		coding: 52,
+		debugging: 45,
+		research: 58,
+		reasoning: 48,
+		speed: 97,
+		longContext: 78,
+		instruction: 65,
+	},
 	"gemini-2.0-flash": {
 		coding: 50,
 		debugging: 40,
@ -761,6 +845,15 @@ export const MODEL_CAPABILITY_PROFILES: Record<string, ModelCapabilities> = {
 		longContext: 45,
 		instruction: 65,
 	},
+	"labs-devstral-small-2512": {
+		coding: 65,
+		debugging: 58,
+		research: 45,
+		reasoning: 55,
+		speed: 88,
+		longContext: 60,
+		instruction: 68,
+	},

 	// ── Zhipu AI (GLM) ─────────────────────────────────────────────────────────
 	"glm-5": {
@ -826,6 +919,129 @@ export const MODEL_CAPABILITY_PROFILES: Record<string, ModelCapabilities> = {
 		longContext: 45,
 		instruction: 60,
 	},
+
+	// ── Qwen / Ollama Cloud compatible tags ──────────────────────────────────
+	"qwen3-coder:480b": {
+		coding: 84,
+		debugging: 78,
+		research: 62,
+		reasoning: 76,
+		speed: 58,
+		longContext: 86,
+		instruction: 78,
+	},
+	"qwen3-coder-next": {
+		coding: 82,
+		debugging: 76,
+		research: 60,
+		reasoning: 74,
+		speed: 70,
+		longContext: 86,
+		instruction: 76,
+	},
+	"qwen3-next:80b": {
+		coding: 70,
+		debugging: 68,
+		research: 76,
+		reasoning: 80,
+		speed: 62,
+		longContext: 86,
+		instruction: 74,
+	},
+
+	// ── Moonshot / Kimi ───────────────────────────────────────────────────────
+	"kimi-k2.6": {
+		coding: 88,
+		debugging: 84,
+		research: 72,
+		reasoning: 82,
+		speed: 55,
+		longContext: 86,
+		instruction: 84,
+	},
+	"kimi-for-coding": {
+		coding: 88,
+		debugging: 84,
+		research: 72,
+		reasoning: 82,
+		speed: 55,
+		longContext: 86,
+		instruction: 84,
+	},
+	"kimi-k2-thinking": {
+		coding: 86,
+		debugging: 88,
+		research: 78,
+		reasoning: 92,
+		speed: 30,
+		longContext: 86,
+		instruction: 84,
+	},
+
+	// ── MiniMax ───────────────────────────────────────────────────────────────
+	"MiniMax-M2.7": {
+		coding: 84,
+		debugging: 80,
+		research: 78,
+		reasoning: 84,
+		speed: 52,
+		longContext: 84,
+		instruction: 82,
+	},
+	"MiniMax-M2.7-highspeed": {
+		coding: 82,
+		debugging: 78,
+		research: 76,
+		reasoning: 80,
+		speed: 72,
+		longContext: 84,
+		instruction: 80,
+	},
+};
+
+const MODEL_CAPABILITY_ALIASES: Record<string, string> = {
+	"deepseek-v3.1": "deepseek-chat",
+	"deepseek-v3.2": "deepseek-chat",
+	"deepseek-v4-flash": "deepseek-chat",
+	"deepseek-v4-pro": "deepseek-chat",
+	"devstral-latest": "devstral-medium-latest",
+	"devstral-2:123b": "devstral-2512",
+	"mistral.devstral-2-123b": "devstral-2512",
+	"devstral-small-2:24b": "devstral-small-2507",
+	"mistral.devstral-small-2-24b": "labs-devstral-small-2512",
+	"mistral.mistral-large-3-675b-instruct": "mistral-large-latest",
+	"mistral.ministral-3-14b-instruct": "mistral-small-latest",
+	"mistral.ministral-3-3b-instruct": "ministral-3b-latest",
+	"mistral.ministral-3-8b-instruct": "ministral-8b-latest",
+	"gemini-3-flash-preview": "gemini-3-flash-preview",
+	"glm-4.6": "glm-4.7",
+	"gpt-oss:120b": "gpt-4o",
+	"gpt-oss:20b": "gpt-4o-mini",
+	"kimi-k2:1t": "kimi-k2.6",
+	"kimi-k2.5": "kimi-k2.6",
+	"kimi-for-coding": "kimi-k2.6",
+	"kimi-k2.6:cloud": "kimi-k2.6",
+	"kimi-k2.6-cloud": "kimi-k2.6",
+	"minimax-m2": "MiniMax-M2.7",
+	"minimax-m2.1": "MiniMax-M2.7",
+	"minimax-m2.5": "MiniMax-M2.7",
+	"minimax-m2.7": "MiniMax-M2.7",
+	"mistral-large-3:675b": "mistral-large-latest",
+	"ministral-3:3b": "ministral-3b-latest",
+	"ministral-3:8b": "ministral-8b-latest",
+	"ministral-3:14b": "mistral-small-latest",
+	"nemotron-3-nano:30b": "gpt-4o-mini",
+	"nemotron-3-super": "gpt-4o",
+	"qwen3-coder-480b-a35b-v1:0": "qwen3-coder:480b",
+	"qwen3-coder-480b-a35b": "qwen3-coder:480b",
+	"qwen3-coder": "qwen3-coder:480b",
+	"qwen3-coder:free": "qwen3-coder:480b",
+	"qwen3-coder-30b-a3b-instruct": "qwen3-coder-next",
+	"qwen3-coder-flash": "qwen3-coder-next",
+	"qwen3-next-80b-a3b": "qwen3-next:80b",
+	"qwen3-next-80b-a3b-instruct": "qwen3-next:80b",
+	"qwen3-next-80b-a3b-instruct:free": "qwen3-next:80b",
+	"qwen3-next-80b-a3b-thinking": "qwen3-next:80b",
 };

 // ─── Base Task Requirements Data Table ───────────────────────────────────────
@ -922,8 +1138,10 @@ export function scoreEligibleModels(
 	capabilityOverrides?: Record<string, Partial<ModelCapabilities>>,
 ): Array<{ modelId: string; score: number }> {
 	const scored = eligibleModelIds.map((modelId) => {
-		const builtin = MODEL_CAPABILITY_PROFILES[modelId];
-		const override = capabilityOverrides?.[modelId];
+		const canonicalModelId = canonicalCapabilityModelId(modelId);
+		const builtin = MODEL_CAPABILITY_PROFILES[canonicalModelId];
+		const override =
+			capabilityOverrides?.[modelId] ?? capabilityOverrides?.[canonicalModelId];
 		const profile: ModelCapabilities = builtin
 			? override
 				? { ...builtin, ...override }
@ -950,6 +1168,29 @@ export function scoreEligibleModels(
 	return scored;
 }

+function canonicalCapabilityModelId(modelId: string): string {
+	const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId;
+	const normalizedId = bareId.replace(/:cloud$/i, "").replace(/-cloud$/i, "");
+	const aliased = resolveCapabilityAlias(bareId) ?? resolveCapabilityAlias(normalizedId);
+	if (aliased) return aliased;
+	if (MODEL_CAPABILITY_PROFILES[normalizedId]) return normalizedId;
+	for (const knownId of Object.keys(MODEL_CAPABILITY_PROFILES)) {
+		if (normalizedId.includes(knownId) || knownId.includes(normalizedId)) {
+			return knownId;
+		}
+	}
+	return normalizedId;
+}
+
+function resolveCapabilityAlias(modelId: string): string | undefined {
+	const direct = MODEL_CAPABILITY_ALIASES[modelId];
+	if (direct) return direct;
+	const lower = modelId.toLowerCase();
+	return Object.entries(MODEL_CAPABILITY_ALIASES).find(
+		([alias]) => alias.toLowerCase() === lower,
+	)?.[1];
+}
+
 /**
 * Return all models eligible for a given tier, sorted cheapest first.
 * If routingConfig.tier_models[tier] is set and available, returns only that
@ -1193,18 +1434,17 @@ export function defaultRoutingConfig(): DynamicRoutingConfig {
 // ─── Internal ────────────────────────────────────────────────────────────────

 export function getModelTier(modelId: string): ComplexityTier {
-	// Strip provider prefix if present
-	const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId;
+	const canonicalId = canonicalCapabilityModelId(modelId);

 	// Check exact match first
-	if (MODEL_CAPABILITY_TIER[bareId]) return MODEL_CAPABILITY_TIER[bareId];
+	if (MODEL_CAPABILITY_TIER[canonicalId]) return MODEL_CAPABILITY_TIER[canonicalId];

-	const sizeTier = inferTierFromModelSize(bareId);
+	const sizeTier = inferTierFromModelSize(canonicalId);
 	if (sizeTier) return sizeTier;

 	// Check if any known model ID is a prefix/suffix match
 	for (const [knownId, tier] of Object.entries(MODEL_CAPABILITY_TIER)) {
-		if (bareId.includes(knownId) || knownId.includes(bareId)) return tier;
+		if (canonicalId.includes(knownId) || knownId.includes(canonicalId)) return tier;
 	}

 	// Unknown models are assumed standard (per D-15: avoids silently ignoring user config)
@ -1223,24 +1463,26 @@ function inferTierFromModelSize(modelId: string): ComplexityTier | null {

 /** Check if a model ID has a known capability tier mapping. (#2192) */
 function isKnownModel(modelId: string): boolean {
-	const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId;
-	if (MODEL_CAPABILITY_TIER[bareId]) return true;
+	const canonicalId = canonicalCapabilityModelId(modelId);
+	if (MODEL_CAPABILITY_TIER[canonicalId]) return true;
 	for (const knownId of Object.keys(MODEL_CAPABILITY_TIER)) {
-		if (bareId.includes(knownId) || knownId.includes(bareId)) return true;
+		if (canonicalId.includes(knownId) || knownId.includes(canonicalId)) return true;
 	}
 	return false;
 }

 function getModelCost(modelId: string): number {
-	const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId;
+	const canonicalId = canonicalCapabilityModelId(modelId);

-	if (MODEL_COST_PER_1K_INPUT[bareId] !== undefined) {
-		return MODEL_COST_PER_1K_INPUT[bareId];
+	if (MODEL_COST_PER_1K_INPUT[canonicalId] !== undefined) {
+		return MODEL_COST_PER_1K_INPUT[canonicalId];
 	}

 	// Check partial matches
 	for (const [knownId, cost] of Object.entries(MODEL_COST_PER_1K_INPUT)) {
-		if (bareId.includes(knownId) || knownId.includes(bareId)) return cost;
+		if (canonicalId.includes(knownId) || knownId.includes(canonicalId)) {
+			return cost;
+		}
 	}

 	// Unknown cost — assume expensive to avoid routing to unknown cheap models
--- a/src/resources/extensions/sf/notification-store.ts
+++ b/src/resources/extensions/sf/notification-store.ts
@ -1,5 +1,5 @@
 // SF Extension — Persistent Notification Store
-// Captures all ctx.ui.notify() calls and workflow-logger warnings to
+// Captures durable ctx.ui.notify() calls and workflow-logger errors to
 // .sf/notifications.jsonl so they survive context resets and session restarts.
 // Rotates at MAX_ENTRIES to prevent unbounded growth.

@ -99,6 +99,7 @@ export function appendNotification(
 ): void {
 	if (!_basePath) return;
 	if (_suppressCount > 0) return;
+	if (!shouldPersistNotification(severity, metadata)) return;
 	const persistedMessage =
 		message.length > 500 ? message.slice(0, 500) + "…" : message;
 	// Use explicit dedupe_key when provided; fall back to message-hash based key.
@ -141,6 +142,14 @@ export function appendNotification(
 	}
 }

+function shouldPersistNotification(
+	_severity: NotifySeverity,
+	metadata?: NotificationMetadata,
+): boolean {
+	if (metadata?.kind === "progress") return false;
+	return true;
+}
+
 /**
 * Read all notification entries from disk. Returns newest-first.
 */
@ -350,7 +359,10 @@ function _withLock<T>(basePath: string, fn: () => T): T {
 					const stat = readFileSync(lockPath, "utf-8");
 					const lockTime = parseInt(stat, 10);
 					// Treat NaN (creator crashed before writing timestamp) as stale.
-					if (isNaN(lockTime) || (Number.isFinite(lockTime) && Date.now() - lockTime > 5000)) {
+					if (
+						Number.isNaN(lockTime) ||
+						(Number.isFinite(lockTime) && Date.now() - lockTime > 5000)
+					) {
 						try {
 							unlinkSync(lockPath);
 						} catch {
--- a/src/resources/extensions/sf/tests/model-router.test.ts
+++ b/src/resources/extensions/sf/tests/model-router.test.ts
@ -271,6 +271,69 @@ test("scoreModel returns 50 for empty requirements", () => {
 	assert.equal(score, 50);
 });

+test("scoreEligibleModels treats kimi-for-coding as the Kimi K2.6 capability profile", () => {
+	const requirements = { coding: 1.0 };
+	const scored = scoreEligibleModels(
+		["kimi-coding/kimi-for-coding", "unknown-future-model"],
+		requirements,
+	);
+
+	assert.equal(scored[0]?.modelId, "kimi-coding/kimi-for-coding");
+	assert.equal(scored[0]?.score, MODEL_CAPABILITY_PROFILES["kimi-k2.6"].coding);
+});
+
+test("scoreEligibleModels uses bare model IDs for provider-prefixed GLM routes", () => {
+	const requirements = { reasoning: 1.0 };
+	const scored = scoreEligibleModels(
+		["zai/glm-5.1", "zai/glm-4.7"],
+		requirements,
+	);
+
+	assert.equal(scored[0]?.modelId, "zai/glm-5.1");
+	assert.equal(scored[0]?.score, MODEL_CAPABILITY_PROFILES["glm-5.1"].reasoning);
+});
+
+test("scoreEligibleModels keeps Kimi thinking distinct from plain K2.6", () => {
+	const reasoningScores = scoreEligibleModels(
+		["kimi-coding/kimi-k2-thinking", "kimi-coding/kimi-k2.6"],
+		{ reasoning: 1.0 },
+	);
+	assert.equal(reasoningScores[0]?.modelId, "kimi-coding/kimi-k2-thinking");
+
+	const speedScores = scoreEligibleModels(
+		["kimi-coding/kimi-k2-thinking", "kimi-coding/kimi-k2.6"],
+		{ speed: 1.0 },
+	);
+	assert.equal(speedScores[0]?.modelId, "kimi-coding/kimi-k2.6");
+});
+
+test("scoreEligibleModels normalizes Ollama Cloud suffix aliases", () => {
+	const scored = scoreEligibleModels(
+		["ollama-cloud/kimi-k2.6:cloud", "unknown-future-model"],
+		{ coding: 1.0 },
+	);
+
+	assert.equal(scored[0]?.modelId, "ollama-cloud/kimi-k2.6:cloud");
+	assert.equal(scored[0]?.score, MODEL_CAPABILITY_PROFILES["kimi-k2.6"].coding);
+});
+
+test("scoreEligibleModels normalizes Ollama Cloud family aliases", () => {
+	const scored = scoreEligibleModels(
+		[
+			"ollama-cloud/minimax-m2.7",
+			"ollama-cloud/devstral-2:123b",
+			"ollama-cloud/qwen3-coder:480b",
+		],
+		{ coding: 1.0 },
+	);
+
+	assert.ok(scored.every((entry) => entry.score > 50));
+	assert.deepEqual(
+		scored.map((entry) => getModelTier(entry.modelId)),
+		["standard", "standard", "standard"],
+	);
+});
+
 test("computeTaskRequirements returns base vector for known unit type", () => {
 	const reqs = computeTaskRequirements("execute-task");
 	assert.ok(reqs.coding !== undefined && reqs.coding > 0);
--- a/src/resources/extensions/sf/tests/notification-event-model.test.ts
+++ b/src/resources/extensions/sf/tests/notification-event-model.test.ts
@ -12,7 +12,7 @@ import assert from "node:assert/strict";
 import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
-import { afterEach, beforeEach, describe, test } from 'vitest';
+import { afterEach, beforeEach, describe, test } from "vitest";

 import {
 	isBlockedNotification,
@ -126,10 +126,10 @@ describe("isMilestoneReadyNotification — metadata-first", () => {

 describe("isPauseNotification — metadata-first", () => {
 	test("returns true when metadata.kind=terminal and blocking=true", () => {
-		const event = notifyEvent(
-			"Autonomous mode paused. Type to interact.",
-			{ kind: "terminal", blocking: true },
-		);
+		const event = notifyEvent("Autonomous mode paused. Type to interact.", {
+			kind: "terminal",
+			blocking: true,
+		});
 		assert.equal(isPauseNotification(event), true);
 	});
 });
@ -157,7 +157,11 @@ describe("notification-store — dedupe_key", () => {
 			dedupe_key: "sync:progress",
 		});
 		const entries = readNotifications(tmpDir);
-		assert.equal(entries.length, 1, "second entry with same dedupe_key should be dropped");
+		assert.equal(
+			entries.length,
+			1,
+			"second entry with same dedupe_key should be dropped",
+		);
 	});

 	test("does not deduplicate across different dedupe_keys", () => {
@ -168,7 +172,11 @@ describe("notification-store — dedupe_key", () => {
 			dedupe_key: "sync:B",
 		});
 		const entries = readNotifications(tmpDir);
-		assert.equal(entries.length, 2, "different dedupe_keys should produce separate entries");
+		assert.equal(
+			entries.length,
+			2,
+			"different dedupe_keys should produce separate entries",
+		);
 	});

 	test("stores metadata on the entry", () => {
@ -184,15 +192,13 @@ describe("notification-store — dedupe_key", () => {
 		assert.equal(entries[0].metadata?.source, "workflow");
 	});

-	test("automated progress notice does not affect blocking classification", () => {
+	test("automated progress notice is not persisted or treated as blocking", () => {
 		appendNotification("Running checks...", "info", "notify", {
 			kind: "progress",
 			source: "workflow",
 		});
 		const entries = readNotifications(tmpDir);
-		assert.equal(entries.length, 1);
-		// The notice is stored, but kind=progress means headless will not treat it as blocked.
-		assert.equal(entries[0].metadata?.kind, "progress");
+		assert.equal(entries.length, 0);
 		// Confirm headless classification: this event should NOT be blocked
 		const fakeEvent = notifyEvent("Running checks...", {
 			kind: "progress",
--- a/src/resources/extensions/sf/tests/notification-store.test.ts
+++ b/src/resources/extensions/sf/tests/notification-store.test.ts
@ -11,7 +11,7 @@ import {
 } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
-import { afterEach, beforeEach, describe, test, vi } from 'vitest';
+import { afterEach, beforeEach, describe, test, vi } from "vitest";

 import {
 	_resetNotificationStore,
--- a/src/resources/extensions/sf/tests/workflow-mcp.test.ts
+++ b/src/resources/extensions/sf/tests/workflow-mcp.test.ts
@ -532,7 +532,7 @@ test("workflow MCP ask_user_questions uses stdio elicitation round-trip", async
 				},
 			},
 			undefined,
-			{ timeout: 60_000 },
+			{ timeout: 120_000 },
 		);

 		assert.ok(
--- a/src/resources/extensions/sf/workflow-logger.ts
+++ b/src/resources/extensions/sf/workflow-logger.ts
@ -299,17 +299,18 @@ function _push(
 	const ctxStr = context ? ` ${JSON.stringify(context)}` : "";
 	_writeStderr(`[sf:${component}] ${prefix}: ${message}${ctxStr}\n`);

-	// Persist to notification store (both warnings and errors)
-	try {
-		appendNotification(
-			`[${component}] ${message}`,
-			severity === "error" ? "error" : "warning",
-			"workflow-logger",
-		);
-	} catch (notifErr) {
-		_writeStderr(
-			`[sf:workflow-logger] notification-store append failed: ${(notifErr as Error).message}\n`,
-		);
+	if (severity === "error") {
+		try {
+			appendNotification(
+				`[${component}] ${message}`,
+				"error",
+				"workflow-logger",
+			);
+		} catch (notifErr) {
+			_writeStderr(
+				`[sf:workflow-logger] notification-store append failed: ${(notifErr as Error).message}\n`,
+			);
+		}
 	}

 	// Buffer for auto-loop to drain
--- a/src/tests/integration/e2e-smoke.test.ts
+++ b/src/tests/integration/e2e-smoke.test.ts
@ -19,7 +19,7 @@ import { execFileSync, spawn } from "node:child_process";
 import { existsSync, mkdirSync, mkdtempSync, rmSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
-import { test, afterEach } from 'vitest';
+import { afterEach, test } from "vitest";

 const projectRoot = process.cwd();
 const loaderPath = join(projectRoot, "dist", "loader.js");
@ -43,13 +43,13 @@ type RunResult = {
 * Spawn `node dist/loader.js ...args` and collect output.
 *
 * @param args    CLI arguments to pass after the script path
- * @param timeoutMs  Maximum time to wait before SIGTERM (default 8 s)
+ * @param timeoutMs  Maximum time to wait before SIGTERM (default 15 s)
 * @param env     Additional / override environment variables
 * @param cwd     Working directory for the child process (default: projectRoot)
 */
 function runSf(
 	args: string[],
-	timeoutMs = 8_000,
+	timeoutMs = 15_000,
 	env: NodeJS.ProcessEnv = {},
 	cwd: string = projectRoot,
 ): Promise<RunResult> {
@ -88,7 +88,6 @@ function runSf(

 /** Strip ANSI escape codes from a string. */
 function stripAnsi(s: string): string {
-	// biome-ignore lint/suspicious/noControlCharactersInRegex: ANSI escape sequence
 	return s.replace(/\x1b\[[0-9;]*[A-Za-z]/g, "");
 }

@ -426,7 +425,7 @@ test("sf -h is equivalent to --help", async () => {
 // 13. sf headless without .sf/ directory exits 1 with clean error
 // ---------------------------------------------------------------------------

-test("sf headless without .sf/ directory exits 1 with clean error", async (t) => {
+test("sf headless without .sf/ directory exits 1 with clean error", async () => {
 	const tmpDir = mkdtempSync(join(tmpdir(), "sf-e2e-no-sf-"));

 	afterEach(() => {
@ -451,19 +450,14 @@ test("sf headless without .sf/ directory exits 1 with clean error", async (t) =>
 // 14. sf headless new-milestone without --context exits 1
 // ---------------------------------------------------------------------------

-test("sf headless new-milestone without --context exits 1", async (t) => {
+test("sf headless new-milestone without --context exits 1", async () => {
 	const tmpDir = mkdtempSync(join(tmpdir(), "sf-e2e-no-ctx-"));

 	afterEach(() => {
 		rmSync(tmpDir, { recursive: true, force: true });
 	});

-	const result = await runSf(
-		["headless", "new-milestone"],
-		10_000,
-		{},
-		tmpDir,
-	);
+	const result = await runSf(["headless", "new-milestone"], 10_000, {}, tmpDir);

 	assert.ok(!result.timedOut, "process should not hang");
 	assert.strictEqual(result.code, 1, `expected exit 1, got ${result.code}`);
@ -481,7 +475,7 @@ test("sf headless new-milestone without --context exits 1", async (t) => {
 // 15. sf headless --timeout with invalid value exits 1
 // ---------------------------------------------------------------------------

-test("sf headless --timeout with invalid value exits 1", async (t) => {
+test("sf headless --timeout with invalid value exits 1", async () => {
 	const tmpDir = mkdtempSync(join(tmpdir(), "sf-e2e-bad-timeout-"));

 	afterEach(() => {
@ -511,7 +505,7 @@ test("sf headless --timeout with invalid value exits 1", async (t) => {
 // 16. sf headless --timeout with negative value exits 1
 // ---------------------------------------------------------------------------

-test("sf headless --timeout with negative value exits 1", async (t) => {
+test("sf headless --timeout with negative value exits 1", async () => {
 	const tmpDir = mkdtempSync(join(tmpdir(), "sf-e2e-neg-timeout-"));

 	afterEach(() => {
@ -537,7 +531,7 @@ test("sf headless --timeout with negative value exits 1", async (t) => {
 	assertNoCrashMarkers(combined);
 });

-test("sf headless query returns JSON from the built CLI", async (t) => {
+test("sf headless query returns JSON from the built CLI", async () => {
 	const tmpDir = createTempGitRepo("sf-e2e-query-");

 	afterEach(() => {
@ -565,7 +559,7 @@ test("sf headless query returns JSON from the built CLI", async (t) => {
 	);
 });

-test("sf worktree list loads the built worktree CLI without module errors", async (t) => {
+test("sf worktree list loads the built worktree CLI without module errors", async () => {
 	const tmpDir = createTempGitRepo("sf-e2e-worktree-");

 	afterEach(() => {
--- a/src/tests/integration/pack-install.test.ts
+++ b/src/tests/integration/pack-install.test.ts
@ -21,11 +21,14 @@ import {
 	writeFileSync,
 } from "node:fs";
 import { tmpdir } from "node:os";
-import { join } from "node:path";
-import { test, afterEach } from 'vitest';
+import { delimiter, dirname, join } from "node:path";
 import { createGunzip } from "node:zlib";
+import { afterEach, test } from "vitest";

 const projectRoot = process.cwd();
+const packageName = JSON.parse(
+	readFileSync(join(projectRoot, "package.json"), "utf-8"),
+).name as string;

 if (!existsSync(join(projectRoot, "dist"))) {
 	throw new Error("dist/ not found — run: npm run build");
@ -50,6 +53,9 @@ function createNpmSandbox(prefix: string): NpmSandbox {
 		installPrefix,
 		env: {
 			...process.env,
+			PATH: [dirname(process.execPath), process.env.PATH]
+				.filter(Boolean)
+				.join(delimiter),
 			NPM_CONFIG_CACHE: cacheDir,
 			npm_config_cache: cacheDir,
 			PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: "1",
@ -62,6 +68,8 @@ function buildQuietNpmEnv(sandbox: NpmSandbox): NodeJS.ProcessEnv {
 		...sandbox.env,
 		NPM_CONFIG_LOGLEVEL: "error",
 		npm_config_loglevel: "error",
+		NPM_CONFIG_ENGINE_STRICT: "false",
+		npm_config_engine_strict: "false",
 		NPM_CONFIG_FUND: "false",
 		npm_config_fund: "false",
 		NPM_CONFIG_AUDIT: "false",
@ -141,7 +149,7 @@ function listTarEntries(tarballPath: string): Promise<string[]> {
 // 1. npm pack produces valid tarball with correct file layout
 // ═══════════════════════════════════════════════════════════════════════════

-test("npm pack produces tarball with required files", async (t) => {
+test("npm pack produces tarball with required files", async () => {
 	const sandbox = createNpmSandbox("sf-pack-test-");
 	const tarballPath = packTarball(sandbox);

@ -204,13 +212,13 @@ test("npm pack produces tarball with required files", async (t) => {
 		".sf",
 		"pkg/package.json piConfig.configDir is .sf",
 	);
-});
+}, 240_000);

 // ═══════════════════════════════════════════════════════════════════════════
 // 2. npm pack → install → sf binary resolves
 // ═══════════════════════════════════════════════════════════════════════════

-test("tarball installs and sf binary resolves", async (t) => {
+test("tarball installs and sf binary resolves", async () => {
 	const sandbox = createNpmSandbox("sf-install-test-");
 	const tarballPath = packTarball(sandbox);

@ -242,7 +250,7 @@ test("tarball installs and sf binary resolves", async (t) => {
 	const installedLoader = join(
 		sandbox.installPrefix,
 		"node_modules",
-		"sf-run",
+		packageName,
 		"dist",
 		"loader.js",
 	);
@ -258,7 +266,7 @@ test("tarball installs and sf binary resolves", async (t) => {
 	const installedSfExt = join(
 		sandbox.installPrefix,
 		"node_modules",
-		"sf-run",
+		packageName,
 		"src",
 		"resources",
 		"extensions",
@ -269,7 +277,7 @@ test("tarball installs and sf binary resolves", async (t) => {
 		existsSync(installedSfExt),
 		"bundled sf extension present in installed package",
 	);
-});
+}, 420_000);

 // ═══════════════════════════════════════════════════════════════════════════
 // 3. Launch → extensions load → no errors on stderr
@ -329,7 +337,7 @@ test("sf launches and loads extensions without errors", async () => {
 	);
 });

-test("sf exits early with a clear message when synced resources are newer than the binary", async (t) => {
+test("sf exits early with a clear message when synced resources are newer than the binary", async () => {
 	const fakeHome = mkdtempSync(join(tmpdir(), "sf-version-skew-"));
 	const fakeAgentDir = join(fakeHome, ".sf", "agent");
 	mkdirSync(fakeAgentDir, { recursive: true });