diff --git a/docs/records/2026-05-02-pdd-v2-research.md b/docs/records/2026-05-02-pdd-v2-research.md index 3b7b50f61..39a37df65 100644 --- a/docs/records/2026-05-02-pdd-v2-research.md +++ b/docs/records/2026-05-02-pdd-v2-research.md @@ -2,6 +2,8 @@ actionable: true kind: design-research date: 2026-05-02 +promoted: true +promoted_to: M012 --- # PDD v2 — Research Findings diff --git a/package-lock.json b/package-lock.json index 823062272..bf0fd0f1a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,13 +14,13 @@ "studio" ], "dependencies": { - "@anthropic-ai/sdk": "^0.73.0", + "@anthropic-ai/sdk": "^0.92.0", "@anthropic-ai/vertex-sdk": "^0.14.4", "@aws-sdk/client-bedrock-runtime": "^3.983.0", "@clack/prompts": "^1.1.0", "@google/genai": "^1.40.0", "@mariozechner/jiti": "^2.6.2", - "@mistralai/mistralai": "^1.14.1", + "@mistralai/mistralai": "^2.2.1", "@modelcontextprotocol/sdk": "^1.27.1", "@octokit/rest": "^22.0.1", "@silvia-odwyer/photon-node": "^0.3.4", @@ -72,6 +72,7 @@ "esbuild": "^0.27.4", "jiti": "^2.6.1", "typescript": "^5.4.0", + "typescript-language-server": "^5.1.3", "vitest": "^4.1.5" }, "engines": { @@ -155,9 +156,9 @@ } }, "node_modules/@anthropic-ai/sdk": { - "version": "0.73.0", - "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.73.0.tgz", - "integrity": "sha512-URURVzhxXGJDGUGFunIOtBlSl7KWvZiAAKY/ttTkZAkXT9bTPqdk2eK0b8qqSxXpikh3QKPnPYpiyX98zf5ebw==", + "version": "0.92.0", + "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.92.0.tgz", + "integrity": "sha512-l653JFC83wCglH8H83t1xpgDurCyPyslYW1maPRdCsfuNuGbLvQjQ81sWd3Go3LWRm0jNspzAhuqAYV8r9joSw==", "license": "MIT", "dependencies": { "json-schema-to-ts": "^3.1.1" @@ -3915,13 +3916,14 @@ } }, "node_modules/@mistralai/mistralai": { - "version": "1.14.1", - "resolved": "https://registry.npmjs.org/@mistralai/mistralai/-/mistralai-1.14.1.tgz", - "integrity": "sha512-IiLmmZFCCTReQgPAT33r7KQ1nYo5JPdvGkrkZqA8qQ2qB1GHgs5LoP5K2ICyrjnpw2n8oSxMM/VP+liiKcGNlQ==", + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/@mistralai/mistralai/-/mistralai-2.2.1.tgz", + "integrity": "sha512-uKU8CZmL2RzYKmplsU01hii4p3pe4HqJefpWNRWXm1Tcm0Sm4xXfwSLIy4k7ZCPlbETCGcp69E7hZs+WOJ5itQ==", + "license": "Apache-2.0", "dependencies": { "ws": "^8.18.0", "zod": "^3.25.0 || ^4.0.0", - "zod-to-json-schema": "^3.24.1" + "zod-to-json-schema": "^3.25.0" } }, "node_modules/@modelcontextprotocol/sdk": { @@ -15498,6 +15500,19 @@ "node": ">=14.17" } }, + "node_modules/typescript-language-server": { + "version": "5.1.3", + "resolved": "https://registry.npmjs.org/typescript-language-server/-/typescript-language-server-5.1.3.tgz", + "integrity": "sha512-r+pAcYtWdN8tKlYZPwiiHNA2QPjXnI02NrW5Sf2cVM3TRtuQ3V9EKKwOxqwaQ0krsaEXk/CbN90I5erBuf84Vg==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "typescript-language-server": "lib/cli.mjs" + }, + "engines": { + "node": ">=20" + } + }, "node_modules/uint8array-extras": { "version": "1.5.0", "resolved": "https://registry.npmjs.org/uint8array-extras/-/uint8array-extras-1.5.0.tgz", @@ -16305,7 +16320,7 @@ "version": "2.75.0", "license": "MIT", "dependencies": { - "@anthropic-ai/sdk": "^0.52.0", + "@anthropic-ai/sdk": "^0.92.0", "@singularity-forge/rpc-client": "^2.75.0", "discord.js": "^14.25.1", "yaml": "^2.8.0", @@ -16322,15 +16337,6 @@ "node": ">=24.15.0" } }, - "packages/daemon/node_modules/@anthropic-ai/sdk": { - "version": "0.52.0", - "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.52.0.tgz", - "integrity": "sha512-d4c+fg+xy9e46c8+YnrrgIQR45CZlAi7PwdzIfDXDM6ACxEZli1/fxhURsq30ZpMZy6LvSkr41jGq5aF5TD7rQ==", - "license": "MIT", - "bin": { - "anthropic-ai-sdk": "bin/cli" - } - }, "packages/daemon/node_modules/zod": { "version": "3.25.76", "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", @@ -16387,12 +16393,12 @@ "name": "@singularity-forge/pi-ai", "version": "2.75.0", "dependencies": { - "@anthropic-ai/sdk": "^0.73.0", + "@anthropic-ai/sdk": "^0.92.0", "@anthropic-ai/vertex-sdk": "^0.14.4", "@aws-sdk/client-bedrock-runtime": "^3.983.0", "@google/gemini-cli-core": "0.38.2", "@google/genai": "^1.40.0", - "@mistralai/mistralai": "^1.14.1", + "@mistralai/mistralai": "^2.2.1", "@sinclair/typebox": "^0.34.41", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", diff --git a/package.json b/package.json index 7c1a67544..329f13cbc 100644 --- a/package.json +++ b/package.json @@ -102,13 +102,13 @@ "test:live-regression": "node --experimental-strip-types tests/live-regression/run.ts" }, "dependencies": { - "@anthropic-ai/sdk": "^0.73.0", + "@anthropic-ai/sdk": "^0.92.0", "@anthropic-ai/vertex-sdk": "^0.14.4", "@aws-sdk/client-bedrock-runtime": "^3.983.0", "@clack/prompts": "^1.1.0", "@google/genai": "^1.40.0", "@mariozechner/jiti": "^2.6.2", - "@mistralai/mistralai": "^1.14.1", + "@mistralai/mistralai": "^2.2.1", "@modelcontextprotocol/sdk": "^1.27.1", "@octokit/rest": "^22.0.1", "@silvia-odwyer/photon-node": "^0.3.4", @@ -156,6 +156,7 @@ "esbuild": "^0.27.4", "jiti": "^2.6.1", "typescript": "^5.4.0", + "typescript-language-server": "^5.1.3", "vitest": "^4.1.5" }, "optionalDependencies": { diff --git a/packages/daemon/package.json b/packages/daemon/package.json index 5ce3715a5..9846d7740 100644 --- a/packages/daemon/package.json +++ b/packages/daemon/package.json @@ -28,7 +28,7 @@ "test": "node --test dist/daemon.test.js" }, "dependencies": { - "@anthropic-ai/sdk": "^0.52.0", + "@anthropic-ai/sdk": "^0.92.0", "@singularity-forge/rpc-client": "^2.75.0", "discord.js": "^14.25.1", "yaml": "^2.8.0", diff --git a/packages/pi-ai/package.json b/packages/pi-ai/package.json index e57b3fcb7..9bb5ebfd3 100644 --- a/packages/pi-ai/package.json +++ b/packages/pi-ai/package.json @@ -23,12 +23,12 @@ "build": "tsc -p tsconfig.json" }, "dependencies": { - "@anthropic-ai/sdk": "^0.73.0", + "@anthropic-ai/sdk": "^0.92.0", "@anthropic-ai/vertex-sdk": "^0.14.4", "@aws-sdk/client-bedrock-runtime": "^3.983.0", "@google/gemini-cli-core": "0.38.2", "@google/genai": "^1.40.0", - "@mistralai/mistralai": "^1.14.1", + "@mistralai/mistralai": "^2.2.1", "@sinclair/typebox": "^0.34.41", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", diff --git a/packages/pi-ai/src/providers/mistral.ts b/packages/pi-ai/src/providers/mistral.ts index 22f6069e6..fa8051918 100644 --- a/packages/pi-ai/src/providers/mistral.ts +++ b/packages/pi-ai/src/providers/mistral.ts @@ -4,7 +4,7 @@ import type { Mistral } from "@mistralai/mistralai"; import type { RequestOptions } from "@mistralai/mistralai/lib/sdks.js"; import type { ChatCompletionStreamRequest, - ChatCompletionStreamRequestMessages, + ChatCompletionStreamRequestMessage, CompletionEvent, ContentChunk, FunctionTool, @@ -464,8 +464,8 @@ function toFunctionTools(tools: Tool[]): Array 0) assistantMessage.content = contentParts; if (toolCalls.length > 0) assistantMessage.toolCalls = toolCalls; if (contentParts.length > 0 || toolCalls.length > 0) result.push(assistantMessage); diff --git a/packages/pi-coding-agent/src/core/lsp/lsp-integration.test.ts b/packages/pi-coding-agent/src/core/lsp/lsp-integration.test.ts index af1ed3053..8c4240a6c 100644 --- a/packages/pi-coding-agent/src/core/lsp/lsp-integration.test.ts +++ b/packages/pi-coding-agent/src/core/lsp/lsp-integration.test.ts @@ -8,12 +8,24 @@ * Run: node --experimental-strip-types --test src/core/lsp/lsp-integration.test.ts * (from packages/pi-coding-agent/) */ -import { describe, test, beforeAll, afterAll } from 'vitest'; + import assert from "node:assert/strict"; -import { spawn } from "node:child_process"; +import { execSync, spawn } from "node:child_process"; import * as fs from "node:fs"; -import * as path from "node:path"; import * as os from "node:os"; +import * as path from "node:path"; +import { afterAll, beforeAll, describe, test } from "vitest"; + +function hasTypeScriptLanguageServer(): boolean { + try { + execSync("npx which typescript-language-server", { stdio: "ignore" }); + return true; + } catch { + return false; + } +} + +const describeOrSkip = hasTypeScriptLanguageServer() ? describe : describe.skip; // --------------------------------------------------------------------------- // Helpers — lightweight JSON-RPC over stdio (no dependency on our LSP code) @@ -39,7 +51,9 @@ interface JsonRpcResponse { error?: { code: number; message: string }; } -function encodeMessage(msg: JsonRpcRequest | JsonRpcNotification | JsonRpcResponse): string { +function encodeMessage( + msg: JsonRpcRequest | JsonRpcNotification | JsonRpcResponse, +): string { const body = JSON.stringify(msg); return `Content-Length: ${Buffer.byteLength(body, "utf-8")}\r\n\r\n${body}`; } @@ -51,7 +65,10 @@ class LspHarness { private proc; private nextId = 1; private buffer = Buffer.alloc(0); - private pending = new Map void; reject: (e: Error) => void }>(); + private pending = new Map< + number, + { resolve: (v: unknown) => void; reject: (e: Error) => void } + >(); private notifications: Array<{ method: string; params: unknown }> = []; constructor(command: string, args: string[], cwd: string) { @@ -65,7 +82,7 @@ class LspHarness { this.drain(); }); - this.proc.stderr!.on("data", (chunk: Buffer) => { + this.proc.stderr!.on("data", (_chunk: Buffer) => { // Swallow stderr (server logs) }); } @@ -84,16 +101,23 @@ class LspHarness { const messageEnd = messageStart + contentLength; if (this.buffer.length < messageEnd) return; - const body = this.buffer.subarray(messageStart, messageEnd).toString("utf-8"); + const body = this.buffer + .subarray(messageStart, messageEnd) + .toString("utf-8"); this.buffer = Buffer.from(this.buffer.subarray(messageEnd)); - const msg = JSON.parse(body) as JsonRpcResponse & { method?: string; params?: unknown }; + const msg = JSON.parse(body) as JsonRpcResponse & { + method?: string; + params?: unknown; + }; if (msg.id !== undefined && this.pending.has(msg.id)) { const p = this.pending.get(msg.id)!; this.pending.delete(msg.id); if (msg.error) { - p.reject(new Error(`LSP error ${msg.error.code}: ${msg.error.message}`)); + p.reject( + new Error(`LSP error ${msg.error.code}: ${msg.error.message}`), + ); } else { p.resolve(msg.result); } @@ -127,7 +151,11 @@ class LspHarness { this.proc.stdin!.write(encodeMessage(msg)); } - async request(method: string, params: unknown, timeoutMs = 15000): Promise { + async request( + method: string, + params: unknown, + timeoutMs = 15000, + ): Promise { const id = this.nextId++; const msg: JsonRpcRequest = { jsonrpc: "2.0", id, method, params }; this.proc.stdin!.write(encodeMessage(msg)); @@ -156,11 +184,27 @@ class LspHarness { this.proc.stdin!.write(encodeMessage(msg)); } - getNotifications(method?: string): Array<{ method: string; params: unknown }> { + getNotifications( + method?: string, + ): Array<{ method: string; params: unknown }> { if (!method) return this.notifications; return this.notifications.filter((n) => n.method === method); } + async waitForNotification( + method: string, + predicate: (notification: { method: string; params: unknown }) => boolean, + timeoutMs = 10_000, + ): Promise<{ method: string; params: unknown } | undefined> { + const startedAt = Date.now(); + while (Date.now() - startedAt < timeoutMs) { + const found = this.getNotifications(method).find(predicate); + if (found) return found; + await new Promise((resolve) => setTimeout(resolve, 100)); + } + return undefined; + } + async shutdown(): Promise { try { await this.request("shutdown", null, 5000); @@ -255,7 +299,7 @@ function fileToUri(filePath: string): string { // Tests // --------------------------------------------------------------------------- -describe("LSP integration: typescript-language-server", () => { +describeOrSkip("LSP integration: typescript-language-server", () => { let dir: string; let cleanup: () => void; let mainPath: string; @@ -293,8 +337,14 @@ describe("LSP integration: typescript-language-server", () => { assert.ok(result, "initialize should return a result"); assert.ok(result.capabilities, "result should have capabilities"); - assert.ok(result.capabilities.hoverProvider !== undefined, "should support hover"); - assert.ok(result.capabilities.definitionProvider !== undefined, "should support definition"); + assert.ok( + result.capabilities.hoverProvider !== undefined, + "should support hover", + ); + assert.ok( + result.capabilities.definitionProvider !== undefined, + "should support definition", + ); lsp.notify("initialized", {}); @@ -303,10 +353,20 @@ describe("LSP integration: typescript-language-server", () => { const mathContent = fs.readFileSync(mathPath, "utf-8"); lsp.notify("textDocument/didOpen", { - textDocument: { uri: mainUri, languageId: "typescript", version: 1, text: mainContent }, + textDocument: { + uri: mainUri, + languageId: "typescript", + version: 1, + text: mainContent, + }, }); lsp.notify("textDocument/didOpen", { - textDocument: { uri: mathUri, languageId: "typescript", version: 1, text: mathContent }, + textDocument: { + uri: mathUri, + languageId: "typescript", + version: 1, + text: mathContent, + }, }); // Give the server time to index @@ -352,7 +412,10 @@ describe("LSP integration: typescript-language-server", () => { // Response can be Location (uri) or LocationLink (targetUri) const loc = locations[0] as Record; const uri = (loc.uri ?? loc.targetUri) as string; - assert.ok(uri, `definition should have uri or targetUri, got keys: ${Object.keys(loc).join(", ")}`); + assert.ok( + uri, + `definition should have uri or targetUri, got keys: ${Object.keys(loc).join(", ")}`, + ); assert.ok( uri.includes("math.ts"), `definition should point to math.ts, got: ${uri}`, @@ -368,7 +431,10 @@ describe("LSP integration: typescript-language-server", () => { })) as Array<{ uri: string; range: unknown }> | null; assert.ok(result, "references should return a result"); - assert.ok(result.length >= 2, `should find at least 2 references (decl + usage), got ${result.length}`); + assert.ok( + result.length >= 2, + `should find at least 2 references (decl + usage), got ${result.length}`, + ); }); // ---- Document Symbols ---- @@ -378,27 +444,47 @@ describe("LSP integration: typescript-language-server", () => { })) as Array<{ name: string; kind: number }> | null; assert.ok(result, "documentSymbol should return a result"); - assert.ok(result.length >= 2, `should find at least 2 symbols, got ${result.length}`); + assert.ok( + result.length >= 2, + `should find at least 2 symbols, got ${result.length}`, + ); const names = result.map((s) => s.name); - assert.ok(names.includes("add"), `symbols should include 'add', got: ${names.join(", ")}`); - assert.ok(names.includes("subtract"), `symbols should include 'subtract', got: ${names.join(", ")}`); + assert.ok( + names.includes("add"), + `symbols should include 'add', got: ${names.join(", ")}`, + ); + assert.ok( + names.includes("subtract"), + `symbols should include 'subtract', got: ${names.join(", ")}`, + ); }); // ---- Diagnostics (published via notification) ---- test("diagnostics for type error", async () => { - // Wait a bit more for diagnostics to arrive - await new Promise((r) => setTimeout(r, 2000)); + const mainContent = fs.readFileSync(mainPath, "utf-8"); + lsp.notify("textDocument/didChange", { + textDocument: { uri: mainUri, version: 2 }, + contentChanges: [{ text: mainContent }], + }); - const diagNotifications = lsp.getNotifications("textDocument/publishDiagnostics"); - const mainDiags = diagNotifications.filter( - (n) => (n.params as { uri: string }).uri === mainUri, + const mainDiagNotification = await lsp.waitForNotification( + "textDocument/publishDiagnostics", + (n) => { + const params = n.params as { + uri: string; + diagnostics?: Array<{ message: string; range: unknown }>; + }; + return params.uri === mainUri && (params.diagnostics?.length ?? 0) > 0; + }, ); - assert.ok(mainDiags.length > 0, "should receive diagnostics for main.ts"); + assert.ok(mainDiagNotification, "should receive diagnostics for main.ts"); - const lastDiag = mainDiags[mainDiags.length - 1]; - const diagnostics = (lastDiag.params as { diagnostics: Array<{ message: string; range: unknown }> }) - .diagnostics; + const diagnostics = ( + mainDiagNotification.params as { + diagnostics: Array<{ message: string; range: unknown }>; + } + ).diagnostics; // Should catch the type error: string assigned to number const typeError = diagnostics.find( diff --git a/src/loader.ts b/src/loader.ts index d5b114c7f..851d2d416 100644 --- a/src/loader.ts +++ b/src/loader.ts @@ -7,6 +7,7 @@ import { symlinkSync, } from "node:fs"; import { delimiter, join, relative, resolve } from "node:path"; + // SF Startup Loader // Copyright (c) 2026 Singularity Forge @@ -68,6 +69,18 @@ if (firstArg === "--help" || firstArg === "-h") { process.exit(0); } +if ( + firstArg && + firstArg !== "--" && + args.slice(1).some((arg) => arg === "--help" || arg === "-h") +) { + const { printHelp, printSubcommandHelp } = await import("./help-text.js"); + if (!printSubcommandHelp(firstArg, sfVersion)) { + printHelp(sfVersion); + } + process.exit(0); +} + // Fast-path invalid headless invocations before importing cli.ts. These paths // are commonly used by smoke tests and orchestrators; they should return a // clear diagnostic without paying extension/resource startup cost. diff --git a/src/resources/extensions/browser-tools/tests/browser-tools-integration.test.mjs b/src/resources/extensions/browser-tools/tests/browser-tools-integration.test.mjs index 21d26604c..50353a935 100644 --- a/src/resources/extensions/browser-tools/tests/browser-tools-integration.test.mjs +++ b/src/resources/extensions/browser-tools/tests/browser-tools-integration.test.mjs @@ -12,7 +12,22 @@ import assert from "node:assert/strict"; import { readFileSync } from "node:fs"; import { dirname, resolve } from "node:path"; -import { after, afterAll, before, beforeAll, describe, it } from 'vitest'; +import { afterAll, beforeAll, describe, it } from "vitest"; + +// Skip the entire suite if Playwright Chromium cannot launch (missing system +// libraries or browser binaries in this environment). +let canLaunchChromium = false; +try { + const { chromium } = await import("playwright"); + const testBrowser = await chromium.launch({ headless: true }); + await testBrowser.close(); + canLaunchChromium = true; +} catch { + canLaunchChromium = false; +} + +const describeOrSkip = canLaunchChromium ? describe : describe.skip; + import { fileURLToPath } from "node:url"; import { chromium } from "playwright"; @@ -132,7 +147,7 @@ async function injectHelpers() { // 1. window.__pi utility tests // ========================================================================= -describe("window.__pi utilities", () => { +describeOrSkip("window.__pi utilities", () => { it("simpleHash — deterministic output for same input", async () => { await page.setContent("

test

"); await injectHelpers(); @@ -408,7 +423,7 @@ describe("window.__pi utilities", () => { // 2. Intent scoring tests // ========================================================================= -describe("intent scoring", () => { +describeOrSkip("intent scoring", () => { it("submit_form — submit button inside form scores higher than outside", async () => { await page.setContent(`
@@ -585,7 +600,7 @@ describe("intent scoring", () => { // 3. Form analysis tests // ========================================================================= -describe("form analysis", () => { +describeOrSkip("form analysis", () => { const COMPLEX_FORM = ` diff --git a/src/resources/extensions/sf/auto-post-unit.ts b/src/resources/extensions/sf/auto-post-unit.ts index c429ef0f6..80e947801 100644 --- a/src/resources/extensions/sf/auto-post-unit.ts +++ b/src/resources/extensions/sf/auto-post-unit.ts @@ -17,7 +17,6 @@ import type { } from "@singularity-forge/pi-coding-agent"; import { detectAbandonMilestone } from "./abandon-detect.js"; import type { AutoSession, SidecarItem } from "./auto/session.js"; -import { isDeterministicPolicyError } from "./auto-tool-tracking.js"; import { resolveExpectedArtifactPath as resolveArtifactForContent } from "./auto-artifact-paths.js"; import { diagnoseExpectedArtifact, @@ -25,6 +24,7 @@ import { verifyExpectedArtifact, writeBlockerPlaceholder, } from "./auto-recovery.js"; +import { isDeterministicPolicyError } from "./auto-tool-tracking.js"; import { type CloseoutOptions, closeoutUnit } from "./auto-unit-closeout.js"; import { runSafely } from "./auto-utils.js"; import { syncStateToProjectRoot } from "./auto-worktree.js"; @@ -67,13 +67,16 @@ import { } from "./pre-execution-checks.js"; import { loadEffectiveSFPreferences } from "./preferences.js"; import { loadPrompt } from "./prompt-loader.js"; -import { recordSelfFeedback } from "./self-feedback.js"; // crossReferenceEvidence available for future use when verification_evidence is stored in DB // import { crossReferenceEvidence, type ClaimedEvidence } from "./safety/evidence-cross-ref.js"; import { validateContent } from "./safety/content-validator.js"; -import { clearEvidenceFromDisk, getEvidence } from "./safety/evidence-collector.js"; +import { + clearEvidenceFromDisk, + getEvidence, +} from "./safety/evidence-collector.js"; import { validateFileChanges } from "./safety/file-change-validator.js"; import { resolveSafetyHarnessConfig } from "./safety/safety-harness.js"; +import { recordSelfFeedback } from "./self-feedback.js"; import { consumeSignal } from "./session-status-io.js"; import { _getAdapter, @@ -87,10 +90,10 @@ import { } from "./sf-db.js"; import { deriveState } from "./state.js"; import { parseUnitId } from "./unit-id.js"; -import { isAwaitingUserInput } from "./user-input-boundary.js"; import { resolveUokFlags } from "./uok/flags.js"; import { UokGateRunner } from "./uok/gate-runner.js"; import { writeTurnGitTransaction } from "./uok/gitops.js"; +import { isAwaitingUserInput } from "./user-input-boundary.js"; import { writePreExecutionEvidence } from "./verification-evidence.js"; import { logError, logWarning } from "./workflow-logger.js"; import { regenerateIfMissing } from "./workflow-projections.js"; @@ -1073,6 +1076,11 @@ export async function postUnitPreVerification( ctx.ui.notify( `Safety: ${warnings.length} unexpected file change(s) outside task plan`, "warning", + { + kind: "progress", + source: "safety", + dedupe_key: `safety:file-change:${s.currentUnit.id}`, + }, ); } } @@ -1113,6 +1121,11 @@ export async function postUnitPreVerification( ctx.ui.notify( `Safety: task ${sTid} has verification commands but no bash calls were recorded`, "warning", + { + kind: "progress", + source: "safety", + dedupe_key: `safety:evidence:${s.currentUnit.id}`, + }, ); } } @@ -1138,7 +1151,11 @@ export async function postUnitPreVerification( ); for (const v of contentViolations) { logWarning("safety", `content: ${v.reason}`); - ctx.ui.notify(`Content validation: ${v.reason}`, "warning"); + ctx.ui.notify(`Content validation: ${v.reason}`, "warning", { + kind: "progress", + source: "safety", + dedupe_key: `safety:content:${s.currentUnit.id}:${v.reason}`, + }); } } catch (e) { debugLog("postUnit", { @@ -1285,7 +1302,12 @@ export async function postUnitPreVerification( s.lastToolInvocationError = null; s.pendingVerificationRetry = null; s.verificationRetryCount.delete(retryKey); - writeBlockerPlaceholder(s.currentUnit.type, s.currentUnit.id, s.basePath, reason); + writeBlockerPlaceholder( + s.currentUnit.type, + s.currentUnit.id, + s.basePath, + reason, + ); ctx.ui.notify( `${s.currentUnit.type} ${s.currentUnit.id} — deterministic policy rejection, wrote blocker placeholder (no retries) (#4973)`, "warning", diff --git a/src/resources/extensions/sf/auto/phases.ts b/src/resources/extensions/sf/auto/phases.ts index 4076a8488..1d8ba6d93 100644 --- a/src/resources/extensions/sf/auto/phases.ts +++ b/src/resources/extensions/sf/auto/phases.ts @@ -21,13 +21,11 @@ import { import { atomicWriteSync } from "../atomic-write.js"; import { resetCompletionNudgeState } from "../auto-completion-nudge.js"; import { - USER_DRIVEN_DEEP_UNITS, isAwaitingUserInput, type PostUnitContext, type PreVerificationOpts, + USER_DRIVEN_DEEP_UNITS, } from "../auto-post-unit.js"; -import { pauseAutoForProviderError } from "../provider-error-pause.js"; -import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.js"; import { buildLoopRemediationSteps, diagnoseExpectedArtifact, @@ -43,23 +41,23 @@ import { formatToolCallSummary, resetToolCallCounts, } from "../auto-tool-tracking.js"; +import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.js"; import { debugLog } from "../debug-logger.js"; import { PROJECT_FILES } from "../detection.js"; import { MergeConflictError } from "../git-service.js"; import { recordLearnedOutcome } from "../learning/runtime.js"; -import { - resolveMilestoneFile, - resolveSliceFile, - sfRoot, -} from "../paths.js"; +import { resolveMilestoneFile, resolveSliceFile, sfRoot } from "../paths.js"; import { resolvePersistModelChanges } from "../preferences.js"; import { approveProductionMutationWithLlmPolicy, ensureProductionMutationApprovalTemplate, readProductionMutationApprovalStatus, } from "../production-mutation-approval.js"; -import { loadEvidenceFromDisk, resetEvidence } from "../safety/evidence-collector.js"; -import { parseUnitId } from "../unit-id.js"; +import { pauseAutoForProviderError } from "../provider-error-pause.js"; +import { + loadEvidenceFromDisk, + resetEvidence, +} from "../safety/evidence-collector.js"; import { getDirtyFiles } from "../safety/file-change-validator.js"; import { cleanupCheckpoint, @@ -67,10 +65,20 @@ import { rollbackToCheckpoint, } from "../safety/git-checkpoint.js"; import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js"; -import { getMilestoneSlices, getSliceTaskCounts, getTask, isDbAvailable } from "../sf-db.js"; +import { + getMilestoneSlices, + getSliceTaskCounts, + getTask, + isDbAvailable, +} from "../sf-db.js"; import { getEligibleSlices } from "../slice-parallel-eligibility.js"; import { startSliceParallel } from "../slice-parallel-orchestrator.js"; +import { + handleProductAudit, + type ProductAuditParams, +} from "../tools/product-audit-tool.js"; import type { Phase } from "../types.js"; +import { parseUnitId } from "../unit-id.js"; import { writeUnitRuntimeRecord } from "../unit-runtime.js"; import { resolveUokFlags } from "../uok/flags.js"; import { UokGateRunner } from "../uok/gate-runner.js"; @@ -88,10 +96,6 @@ import { logError, logWarning, } from "../workflow-logger.js"; -import { - handleProductAudit, - type ProductAuditParams, -} from "../tools/product-audit-tool.js"; import { getRequiredWorkflowToolsForAutoUnit, getWorkflowTransportSupportError, @@ -596,7 +600,11 @@ export async function runPreDispatch( // Derive state let state = await deps.deriveState(s.basePath); - if (uokFlags.planningFlow && isDbAvailable() && shouldRunPlanningFlowGate(state.phase)) { + if ( + uokFlags.planningFlow && + isDbAvailable() && + shouldRunPlanningFlowGate(state.phase) + ) { let compiled = ensurePlanningFlowGraph(s.basePath, state); // Empty-graph recovery: stale DB caches can yield 0 nodes right after a // task-complete write. Invalidate caches, re-derive state, and retry once. @@ -1208,8 +1216,7 @@ export async function runDispatch( const derivedKey = `${unitType}/${unitId}`; const hasTransientTaskCompleteFailure = - unitType === "execute-task" && - !!s.pendingTaskCompleteFailures?.has(unitId); + unitType === "execute-task" && !!s.pendingTaskCompleteFailures?.has(unitId); if (!s.pendingVerificationRetry && !hasTransientTaskCompleteFailure) { loopState.recentUnits.push({ key: derivedKey }); @@ -1276,7 +1283,7 @@ export async function runDispatch( (diagnostic?.length ?? 0) > MAX_RECOVERY_CHARS ? diagnostic!.slice(0, MAX_RECOVERY_CHARS) + "\n\n[...diagnostic truncated]" - : diagnostic ?? null; + : (diagnostic ?? null); s.pendingRethinkAttempt = JSON.stringify({ attempt, reason: stuckSignal.reason, @@ -1286,9 +1293,10 @@ export async function runDispatch( unitType, unitId, }); - const rt = attempt === 5 - ? "**FINAL STUCK ATTEMPT — 5 of 5.** " - : `**STUCK RECOVERY ATTEMPT ${attempt - 1} of 4.** `; + const rt = + attempt === 5 + ? "**FINAL STUCK ATTEMPT — 5 of 5.** " + : `**STUCK RECOVERY ATTEMPT ${attempt - 1} of 4.** `; ctx.ui.notify( `${rt}Stuck on ${unitType} ${unitId} (${stuckSignal.reason}). Injecting diagnostic and retrying.`, "warning", @@ -1677,12 +1685,7 @@ export async function runGuards( // FailureClass "input" → 0 retries (broken plan needs human fix, not // an LLM retry). Only fires when uok.gates.enabled is true. const uokFlagsGuards = resolveUokFlags(prefs); - if ( - uokFlagsGuards.gates && - unitType === "execute-task" && - mid && - sliceId - ) { + if (uokFlagsGuards.gates && unitType === "execute-task" && mid && sliceId) { const taskCounts = getSliceTaskCounts(mid, sliceId); const isFirstTaskForSlice = taskCounts.done === 0; if (isFirstTaskForSlice) { @@ -1814,7 +1817,9 @@ export async function runUnitPhase( iterData: IterationData, loopState: LoopState, sidecarItem?: SidecarItem, -): Promise> { +): Promise< + PhaseResult<{ unitStartedAt: number; requestDispatchedAt?: number }> +> { const { ctx, pi, s, deps, prefs } = ic; const { unitType, unitId, prompt, state, mid } = iterData; @@ -2074,7 +2079,10 @@ export async function runUnitPhase( lines.push("", `**Suggested remediation:**\n${rethinkCtx.remediation}`); } if (rethinkCtx.diagnostic) { - lines.push("", `**Full diagnostic from previous attempt:**\n${rethinkCtx.diagnostic}`); + lines.push( + "", + `**Full diagnostic from previous attempt:**\n${rethinkCtx.diagnostic}`, + ); } lines.push("", "---", "", finalPrompt); finalPrompt = lines.join("\n"); @@ -2320,13 +2328,16 @@ export async function runUnitPhase( ) { // Session-timeout cancellations are resumable pauses: pauseAuto below preserves the auto session // instead of routing the cancelled unit into the hard-stop path. - const isSessionCreationTimeout = unitResult.errorContext.message?.includes("Session creation timed out"); + const isSessionCreationTimeout = + unitResult.errorContext.message?.includes("Session creation timed out"); if (isSessionCreationTimeout) { consecutiveSessionTimeouts += 1; const baseRetryAfterMs = 30_000; - const retryAfterMs = baseRetryAfterMs * 2 ** Math.max(0, consecutiveSessionTimeouts - 1); - const allowAutoResume = consecutiveSessionTimeouts <= MAX_SESSION_TIMEOUT_AUTO_RESUMES; + const retryAfterMs = + baseRetryAfterMs * 2 ** Math.max(0, consecutiveSessionTimeouts - 1); + const allowAutoResume = + consecutiveSessionTimeouts <= MAX_SESSION_TIMEOUT_AUTO_RESUMES; if (!allowAutoResume) { ctx.ui.notify( @@ -2356,7 +2367,8 @@ export async function runUnitPhase( resume: allowAutoResume ? () => { void resumeAutoAfterProviderDelay(pi, ctx).catch((err) => { - const message = err instanceof Error ? err.message : String(err); + const message = + err instanceof Error ? err.message : String(err); ctx.ui.notify( `Session timeout recovery failed: ${message}`, "error", @@ -2369,7 +2381,13 @@ export async function runUnitPhase( if (!allowAutoResume) { resetConsecutiveSessionTimeouts(); } - await emitCancelledUnitEnd(ic, unitType, unitId, unitStartSeq, unitResult.errorContext); + await emitCancelledUnitEnd( + ic, + unitType, + unitId, + unitStartSeq, + unitResult.errorContext, + ); return { action: "break", reason: "session-timeout" }; } @@ -2378,7 +2396,11 @@ export async function runUnitPhase( `Unit timed out for ${unitType} ${unitId} (supervision may have failed). Pausing auto-mode.`, "warning", ); - debugLog("autoLoop", { phase: "unit-hard-timeout-pause", unitType, unitId }); + debugLog("autoLoop", { + phase: "unit-hard-timeout-pause", + unitType, + unitId, + }); await deps.pauseAuto(ctx, pi); await emitCancelledUnitEnd( ic, @@ -2468,7 +2490,10 @@ export async function runUnitPhase( u.startedAt === s.currentUnit?.startedAt, ); if (lastUnit && lastUnit.toolCalls === 0) { - if (USER_DRIVEN_DEEP_UNITS.has(unitType) && isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined)) { + if ( + USER_DRIVEN_DEEP_UNITS.has(unitType) && + isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined) + ) { debugLog("runUnitPhase", { phase: "zero-tool-calls-awaiting-user-input", unitType, @@ -2500,7 +2525,10 @@ export async function runUnitPhase( // and re-dispatch this unit. return { action: "next", - data: { unitStartedAt: s.currentUnit?.startedAt, requestDispatchedAt: unitResult.requestDispatchedAt }, + data: { + unitStartedAt: s.currentUnit?.startedAt, + requestDispatchedAt: unitResult.requestDispatchedAt, + }, }; } } @@ -2517,7 +2545,10 @@ export async function runUnitPhase( const skipArtifactVerification = shouldSkipArtifactVerification(unitType); let artifactVerified: boolean; - if (USER_DRIVEN_DEEP_UNITS.has(unitType) && isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined)) { + if ( + USER_DRIVEN_DEEP_UNITS.has(unitType) && + isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined) + ) { // Skip artifact verification — unit is paused waiting for user input artifactVerified = false; } else { @@ -2688,7 +2719,13 @@ export async function runUnitPhase( } s.preUnitDirtyFiles = []; - return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt, requestDispatchedAt: unitResult.requestDispatchedAt } }; + return { + action: "next", + data: { + unitStartedAt: s.currentUnit?.startedAt, + requestDispatchedAt: unitResult.requestDispatchedAt, + }, + }; } // ─── runFinalize ────────────────────────────────────────────────────────────── @@ -2734,8 +2771,15 @@ export async function runFinalize( // Sidecar items use lightweight pre-verification opts const preVerificationOpts: PreVerificationOpts = sidecarItem ? sidecarItem.kind === "hook" - ? { skipSettleDelay: true, skipWorktreeSync: true, agentEndMessages: s.lastUnitAgentEndMessages ?? undefined } - : { skipSettleDelay: true, agentEndMessages: s.lastUnitAgentEndMessages ?? undefined } + ? { + skipSettleDelay: true, + skipWorktreeSync: true, + agentEndMessages: s.lastUnitAgentEndMessages ?? undefined, + } + : { + skipSettleDelay: true, + agentEndMessages: s.lastUnitAgentEndMessages ?? undefined, + } : { agentEndMessages: s.lastUnitAgentEndMessages ?? undefined }; const _preUnitSnapshot = s.currentUnit ? { @@ -3079,7 +3123,11 @@ export async function runFinalize( const severity = logs.some((e) => e.severity === "error") ? "error" : "warning"; - ctx.ui.notify(formatForNotification(logs), severity); + ctx.ui.notify(formatForNotification(logs), severity, { + kind: severity === "error" ? "notice" : "progress", + source: "workflow-logger", + dedupe_key: `workflow-issues:${iterData.unitType}:${iterData.unitId}`, + }); } } diff --git a/src/resources/extensions/sf/model-router.ts b/src/resources/extensions/sf/model-router.ts index 533f9890a..6debc1c0b 100644 --- a/src/resources/extensions/sf/model-router.ts +++ b/src/resources/extensions/sf/model-router.ts @@ -83,12 +83,15 @@ export const MODEL_CAPABILITY_TIER: Record = { "gpt-5.3-codex-spark": "light", "gemini-2.0-flash": "light", "gemini-flash-2.0": "light", + "gemini-3.1-flash-lite-preview": "light", + "gemini-2.5-flash-lite": "light", "glm-4.7-flash": "light", "glm-4.7-flashx": "light", "ministral-3b-latest": "light", "ministral-8b-latest": "light", "devstral-small-2505": "light", "devstral-small-2507": "light", + "labs-devstral-small-2512": "light", // Standard-tier models "claude-sonnet-4-6": "standard", @@ -98,8 +101,16 @@ export const MODEL_CAPABILITY_TIER: Record = { "gpt-4.1": "standard", "gpt-5.1-codex-max": "standard", "gemini-2.5-pro": "standard", + "gemini-3-flash-preview": "standard", + "gemini-2.5-flash": "standard", "deepseek-chat": "standard", "glm-4.7": "standard", + "qwen3-coder:480b": "standard", + "qwen3-coder-next": "standard", + "kimi-k2.6": "standard", + "kimi-for-coding": "standard", + "MiniMax-M2.7": "standard", + "MiniMax-M2.7-highspeed": "standard", "codestral-latest": "standard", "devstral-2512": "standard", "devstral-medium-2507": "standard", @@ -131,6 +142,10 @@ export const MODEL_CAPABILITY_TIER: Record = { o3: "heavy", "o4-mini": "heavy", "o4-mini-deep-research": "heavy", + "gemini-3.1-pro-preview": "heavy", + "gemini-3-pro-preview": "heavy", + "kimi-k2-thinking": "heavy", + "qwen3-next:80b": "heavy", "glm-5": "heavy", "glm-5-turbo": "heavy", "glm-5.1": "heavy", @@ -176,6 +191,12 @@ const MODEL_COST_PER_1K_INPUT: Record = { "o4-mini-deep-research": 0.005, "gemini-2.0-flash": 0.0001, "gemini-2.5-pro": 0.00125, + "gemini-3.1-pro-preview": 0.00125, + "gemini-3.1-flash-lite-preview": 0.0001, + "gemini-3-pro-preview": 0.00125, + "gemini-3-flash-preview": 0.0001, + "gemini-2.5-flash": 0.0001, + "gemini-2.5-flash-lite": 0.00005, "deepseek-chat": 0.00014, "glm-4.7": 0.0006, "glm-4.7-flash": 0, @@ -184,12 +205,21 @@ const MODEL_COST_PER_1K_INPUT: Record = { "glm-5-turbo": 0.0012, "glm-5.1": 0.0014, "glm-5v-turbo": 0.0012, + "qwen3-coder:480b": 0.0004, + "qwen3-coder-next": 0.0004, + "qwen3-next:80b": 0.0002, + "kimi-k2.6": 0.0006, + "kimi-for-coding": 0.0006, + "kimi-k2-thinking": 0.001, + "MiniMax-M2.7": 0.0006, + "MiniMax-M2.7-highspeed": 0.0006, "codestral-latest": 0.0003, "devstral-2512": 0.0004, "devstral-medium-2507": 0.0004, "devstral-medium-latest": 0.0004, "devstral-small-2505": 0.0001, "devstral-small-2507": 0.0001, + "labs-devstral-small-2512": 0.0001, "magistral-medium-latest": 0.002, "magistral-small": 0.0005, "ministral-3b-latest": 0.00004, @@ -523,6 +553,60 @@ export const MODEL_CAPABILITY_PROFILES: Record = { longContext: 90, instruction: 75, }, + "gemini-3.1-pro-preview": { + coding: 82, + debugging: 78, + research: 92, + reasoning: 84, + speed: 48, + longContext: 98, + instruction: 82, + }, + "gemini-3-pro-preview": { + coding: 82, + debugging: 78, + research: 90, + reasoning: 84, + speed: 50, + longContext: 96, + instruction: 82, + }, + "gemini-3-flash-preview": { + coding: 62, + debugging: 55, + research: 70, + reasoning: 60, + speed: 88, + longContext: 88, + instruction: 72, + }, + "gemini-3.1-flash-lite-preview": { + coding: 55, + debugging: 48, + research: 62, + reasoning: 52, + speed: 96, + longContext: 85, + instruction: 68, + }, + "gemini-2.5-flash": { + coding: 60, + debugging: 52, + research: 68, + reasoning: 58, + speed: 92, + longContext: 85, + instruction: 70, + }, + "gemini-2.5-flash-lite": { + coding: 52, + debugging: 45, + research: 58, + reasoning: 48, + speed: 97, + longContext: 78, + instruction: 65, + }, "gemini-2.0-flash": { coding: 50, debugging: 40, @@ -761,6 +845,15 @@ export const MODEL_CAPABILITY_PROFILES: Record = { longContext: 45, instruction: 65, }, + "labs-devstral-small-2512": { + coding: 65, + debugging: 58, + research: 45, + reasoning: 55, + speed: 88, + longContext: 60, + instruction: 68, + }, // ── Zhipu AI (GLM) ───────────────────────────────────────────────────────── "glm-5": { @@ -826,6 +919,129 @@ export const MODEL_CAPABILITY_PROFILES: Record = { longContext: 45, instruction: 60, }, + + // ── Qwen / Ollama Cloud compatible tags ────────────────────────────────── + "qwen3-coder:480b": { + coding: 84, + debugging: 78, + research: 62, + reasoning: 76, + speed: 58, + longContext: 86, + instruction: 78, + }, + "qwen3-coder-next": { + coding: 82, + debugging: 76, + research: 60, + reasoning: 74, + speed: 70, + longContext: 86, + instruction: 76, + }, + "qwen3-next:80b": { + coding: 70, + debugging: 68, + research: 76, + reasoning: 80, + speed: 62, + longContext: 86, + instruction: 74, + }, + + // ── Moonshot / Kimi ─────────────────────────────────────────────────────── + "kimi-k2.6": { + coding: 88, + debugging: 84, + research: 72, + reasoning: 82, + speed: 55, + longContext: 86, + instruction: 84, + }, + "kimi-for-coding": { + coding: 88, + debugging: 84, + research: 72, + reasoning: 82, + speed: 55, + longContext: 86, + instruction: 84, + }, + "kimi-k2-thinking": { + coding: 86, + debugging: 88, + research: 78, + reasoning: 92, + speed: 30, + longContext: 86, + instruction: 84, + }, + + // ── MiniMax ─────────────────────────────────────────────────────────────── + "MiniMax-M2.7": { + coding: 84, + debugging: 80, + research: 78, + reasoning: 84, + speed: 52, + longContext: 84, + instruction: 82, + }, + "MiniMax-M2.7-highspeed": { + coding: 82, + debugging: 78, + research: 76, + reasoning: 80, + speed: 72, + longContext: 84, + instruction: 80, + }, +}; + +const MODEL_CAPABILITY_ALIASES: Record = { + "deepseek-v3.1": "deepseek-chat", + "deepseek-v3.2": "deepseek-chat", + "deepseek-v4-flash": "deepseek-chat", + "deepseek-v4-pro": "deepseek-chat", + "devstral-latest": "devstral-medium-latest", + "devstral-2:123b": "devstral-2512", + "mistral.devstral-2-123b": "devstral-2512", + "devstral-small-2:24b": "devstral-small-2507", + "mistral.devstral-small-2-24b": "labs-devstral-small-2512", + "mistral.mistral-large-3-675b-instruct": "mistral-large-latest", + "mistral.ministral-3-14b-instruct": "mistral-small-latest", + "mistral.ministral-3-3b-instruct": "ministral-3b-latest", + "mistral.ministral-3-8b-instruct": "ministral-8b-latest", + "gemini-3-flash-preview": "gemini-3-flash-preview", + "glm-4.6": "glm-4.7", + "gpt-oss:120b": "gpt-4o", + "gpt-oss:20b": "gpt-4o-mini", + "kimi-k2:1t": "kimi-k2.6", + "kimi-k2.5": "kimi-k2.6", + "kimi-for-coding": "kimi-k2.6", + "kimi-k2.6:cloud": "kimi-k2.6", + "kimi-k2.6-cloud": "kimi-k2.6", + "minimax-m2": "MiniMax-M2.7", + "minimax-m2.1": "MiniMax-M2.7", + "minimax-m2.5": "MiniMax-M2.7", + "minimax-m2.7": "MiniMax-M2.7", + "mistral-large-3:675b": "mistral-large-latest", + "ministral-3:3b": "ministral-3b-latest", + "ministral-3:8b": "ministral-8b-latest", + "ministral-3:14b": "mistral-small-latest", + "nemotron-3-nano:30b": "gpt-4o-mini", + "nemotron-3-super": "gpt-4o", + "qwen3-coder-480b-a35b-v1:0": "qwen3-coder:480b", + "qwen3-coder-480b-a35b": "qwen3-coder:480b", + "qwen3-coder": "qwen3-coder:480b", + "qwen3-coder:free": "qwen3-coder:480b", + "qwen3-coder-30b-a3b-instruct": "qwen3-coder-next", + "qwen3-coder-flash": "qwen3-coder-next", + "qwen3-next-80b-a3b": "qwen3-next:80b", + "qwen3-next-80b-a3b-instruct": "qwen3-next:80b", + "qwen3-next-80b-a3b-instruct:free": "qwen3-next:80b", + "qwen3-next-80b-a3b-thinking": "qwen3-next:80b", }; // ─── Base Task Requirements Data Table ─────────────────────────────────────── @@ -922,8 +1138,10 @@ export function scoreEligibleModels( capabilityOverrides?: Record>, ): Array<{ modelId: string; score: number }> { const scored = eligibleModelIds.map((modelId) => { - const builtin = MODEL_CAPABILITY_PROFILES[modelId]; - const override = capabilityOverrides?.[modelId]; + const canonicalModelId = canonicalCapabilityModelId(modelId); + const builtin = MODEL_CAPABILITY_PROFILES[canonicalModelId]; + const override = + capabilityOverrides?.[modelId] ?? capabilityOverrides?.[canonicalModelId]; const profile: ModelCapabilities = builtin ? override ? { ...builtin, ...override } @@ -950,6 +1168,29 @@ export function scoreEligibleModels( return scored; } +function canonicalCapabilityModelId(modelId: string): string { + const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId; + const normalizedId = bareId.replace(/:cloud$/i, "").replace(/-cloud$/i, ""); + const aliased = resolveCapabilityAlias(bareId) ?? resolveCapabilityAlias(normalizedId); + if (aliased) return aliased; + if (MODEL_CAPABILITY_PROFILES[normalizedId]) return normalizedId; + for (const knownId of Object.keys(MODEL_CAPABILITY_PROFILES)) { + if (normalizedId.includes(knownId) || knownId.includes(normalizedId)) { + return knownId; + } + } + return normalizedId; +} + +function resolveCapabilityAlias(modelId: string): string | undefined { + const direct = MODEL_CAPABILITY_ALIASES[modelId]; + if (direct) return direct; + const lower = modelId.toLowerCase(); + return Object.entries(MODEL_CAPABILITY_ALIASES).find( + ([alias]) => alias.toLowerCase() === lower, + )?.[1]; +} + /** * Return all models eligible for a given tier, sorted cheapest first. * If routingConfig.tier_models[tier] is set and available, returns only that @@ -1193,18 +1434,17 @@ export function defaultRoutingConfig(): DynamicRoutingConfig { // ─── Internal ──────────────────────────────────────────────────────────────── export function getModelTier(modelId: string): ComplexityTier { - // Strip provider prefix if present - const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId; + const canonicalId = canonicalCapabilityModelId(modelId); // Check exact match first - if (MODEL_CAPABILITY_TIER[bareId]) return MODEL_CAPABILITY_TIER[bareId]; + if (MODEL_CAPABILITY_TIER[canonicalId]) return MODEL_CAPABILITY_TIER[canonicalId]; - const sizeTier = inferTierFromModelSize(bareId); + const sizeTier = inferTierFromModelSize(canonicalId); if (sizeTier) return sizeTier; // Check if any known model ID is a prefix/suffix match for (const [knownId, tier] of Object.entries(MODEL_CAPABILITY_TIER)) { - if (bareId.includes(knownId) || knownId.includes(bareId)) return tier; + if (canonicalId.includes(knownId) || knownId.includes(canonicalId)) return tier; } // Unknown models are assumed standard (per D-15: avoids silently ignoring user config) @@ -1223,24 +1463,26 @@ function inferTierFromModelSize(modelId: string): ComplexityTier | null { /** Check if a model ID has a known capability tier mapping. (#2192) */ function isKnownModel(modelId: string): boolean { - const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId; - if (MODEL_CAPABILITY_TIER[bareId]) return true; + const canonicalId = canonicalCapabilityModelId(modelId); + if (MODEL_CAPABILITY_TIER[canonicalId]) return true; for (const knownId of Object.keys(MODEL_CAPABILITY_TIER)) { - if (bareId.includes(knownId) || knownId.includes(bareId)) return true; + if (canonicalId.includes(knownId) || knownId.includes(canonicalId)) return true; } return false; } function getModelCost(modelId: string): number { - const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId; + const canonicalId = canonicalCapabilityModelId(modelId); - if (MODEL_COST_PER_1K_INPUT[bareId] !== undefined) { - return MODEL_COST_PER_1K_INPUT[bareId]; + if (MODEL_COST_PER_1K_INPUT[canonicalId] !== undefined) { + return MODEL_COST_PER_1K_INPUT[canonicalId]; } // Check partial matches for (const [knownId, cost] of Object.entries(MODEL_COST_PER_1K_INPUT)) { - if (bareId.includes(knownId) || knownId.includes(bareId)) return cost; + if (canonicalId.includes(knownId) || knownId.includes(canonicalId)) { + return cost; + } } // Unknown cost — assume expensive to avoid routing to unknown cheap models diff --git a/src/resources/extensions/sf/notification-store.ts b/src/resources/extensions/sf/notification-store.ts index 160e3214e..e5b1abe55 100644 --- a/src/resources/extensions/sf/notification-store.ts +++ b/src/resources/extensions/sf/notification-store.ts @@ -1,5 +1,5 @@ // SF Extension — Persistent Notification Store -// Captures all ctx.ui.notify() calls and workflow-logger warnings to +// Captures durable ctx.ui.notify() calls and workflow-logger errors to // .sf/notifications.jsonl so they survive context resets and session restarts. // Rotates at MAX_ENTRIES to prevent unbounded growth. @@ -99,6 +99,7 @@ export function appendNotification( ): void { if (!_basePath) return; if (_suppressCount > 0) return; + if (!shouldPersistNotification(severity, metadata)) return; const persistedMessage = message.length > 500 ? message.slice(0, 500) + "…" : message; // Use explicit dedupe_key when provided; fall back to message-hash based key. @@ -141,6 +142,14 @@ export function appendNotification( } } +function shouldPersistNotification( + _severity: NotifySeverity, + metadata?: NotificationMetadata, +): boolean { + if (metadata?.kind === "progress") return false; + return true; +} + /** * Read all notification entries from disk. Returns newest-first. */ @@ -350,7 +359,10 @@ function _withLock(basePath: string, fn: () => T): T { const stat = readFileSync(lockPath, "utf-8"); const lockTime = parseInt(stat, 10); // Treat NaN (creator crashed before writing timestamp) as stale. - if (isNaN(lockTime) || (Number.isFinite(lockTime) && Date.now() - lockTime > 5000)) { + if ( + Number.isNaN(lockTime) || + (Number.isFinite(lockTime) && Date.now() - lockTime > 5000) + ) { try { unlinkSync(lockPath); } catch { diff --git a/src/resources/extensions/sf/tests/model-router.test.ts b/src/resources/extensions/sf/tests/model-router.test.ts index 4f6f8f81c..8d70e7cec 100644 --- a/src/resources/extensions/sf/tests/model-router.test.ts +++ b/src/resources/extensions/sf/tests/model-router.test.ts @@ -271,6 +271,69 @@ test("scoreModel returns 50 for empty requirements", () => { assert.equal(score, 50); }); +test("scoreEligibleModels treats kimi-for-coding as the Kimi K2.6 capability profile", () => { + const requirements = { coding: 1.0 }; + const scored = scoreEligibleModels( + ["kimi-coding/kimi-for-coding", "unknown-future-model"], + requirements, + ); + + assert.equal(scored[0]?.modelId, "kimi-coding/kimi-for-coding"); + assert.equal(scored[0]?.score, MODEL_CAPABILITY_PROFILES["kimi-k2.6"].coding); +}); + +test("scoreEligibleModels uses bare model IDs for provider-prefixed GLM routes", () => { + const requirements = { reasoning: 1.0 }; + const scored = scoreEligibleModels( + ["zai/glm-5.1", "zai/glm-4.7"], + requirements, + ); + + assert.equal(scored[0]?.modelId, "zai/glm-5.1"); + assert.equal(scored[0]?.score, MODEL_CAPABILITY_PROFILES["glm-5.1"].reasoning); +}); + +test("scoreEligibleModels keeps Kimi thinking distinct from plain K2.6", () => { + const reasoningScores = scoreEligibleModels( + ["kimi-coding/kimi-k2-thinking", "kimi-coding/kimi-k2.6"], + { reasoning: 1.0 }, + ); + assert.equal(reasoningScores[0]?.modelId, "kimi-coding/kimi-k2-thinking"); + + const speedScores = scoreEligibleModels( + ["kimi-coding/kimi-k2-thinking", "kimi-coding/kimi-k2.6"], + { speed: 1.0 }, + ); + assert.equal(speedScores[0]?.modelId, "kimi-coding/kimi-k2.6"); +}); + +test("scoreEligibleModels normalizes Ollama Cloud suffix aliases", () => { + const scored = scoreEligibleModels( + ["ollama-cloud/kimi-k2.6:cloud", "unknown-future-model"], + { coding: 1.0 }, + ); + + assert.equal(scored[0]?.modelId, "ollama-cloud/kimi-k2.6:cloud"); + assert.equal(scored[0]?.score, MODEL_CAPABILITY_PROFILES["kimi-k2.6"].coding); +}); + +test("scoreEligibleModels normalizes Ollama Cloud family aliases", () => { + const scored = scoreEligibleModels( + [ + "ollama-cloud/minimax-m2.7", + "ollama-cloud/devstral-2:123b", + "ollama-cloud/qwen3-coder:480b", + ], + { coding: 1.0 }, + ); + + assert.ok(scored.every((entry) => entry.score > 50)); + assert.deepEqual( + scored.map((entry) => getModelTier(entry.modelId)), + ["standard", "standard", "standard"], + ); +}); + test("computeTaskRequirements returns base vector for known unit type", () => { const reqs = computeTaskRequirements("execute-task"); assert.ok(reqs.coding !== undefined && reqs.coding > 0); diff --git a/src/resources/extensions/sf/tests/notification-event-model.test.ts b/src/resources/extensions/sf/tests/notification-event-model.test.ts index 61f057e7d..b71f9f712 100644 --- a/src/resources/extensions/sf/tests/notification-event-model.test.ts +++ b/src/resources/extensions/sf/tests/notification-event-model.test.ts @@ -12,7 +12,7 @@ import assert from "node:assert/strict"; import { mkdirSync, mkdtempSync, rmSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; -import { afterEach, beforeEach, describe, test } from 'vitest'; +import { afterEach, beforeEach, describe, test } from "vitest"; import { isBlockedNotification, @@ -126,10 +126,10 @@ describe("isMilestoneReadyNotification — metadata-first", () => { describe("isPauseNotification — metadata-first", () => { test("returns true when metadata.kind=terminal and blocking=true", () => { - const event = notifyEvent( - "Autonomous mode paused. Type to interact.", - { kind: "terminal", blocking: true }, - ); + const event = notifyEvent("Autonomous mode paused. Type to interact.", { + kind: "terminal", + blocking: true, + }); assert.equal(isPauseNotification(event), true); }); }); @@ -157,7 +157,11 @@ describe("notification-store — dedupe_key", () => { dedupe_key: "sync:progress", }); const entries = readNotifications(tmpDir); - assert.equal(entries.length, 1, "second entry with same dedupe_key should be dropped"); + assert.equal( + entries.length, + 1, + "second entry with same dedupe_key should be dropped", + ); }); test("does not deduplicate across different dedupe_keys", () => { @@ -168,7 +172,11 @@ describe("notification-store — dedupe_key", () => { dedupe_key: "sync:B", }); const entries = readNotifications(tmpDir); - assert.equal(entries.length, 2, "different dedupe_keys should produce separate entries"); + assert.equal( + entries.length, + 2, + "different dedupe_keys should produce separate entries", + ); }); test("stores metadata on the entry", () => { @@ -184,15 +192,13 @@ describe("notification-store — dedupe_key", () => { assert.equal(entries[0].metadata?.source, "workflow"); }); - test("automated progress notice does not affect blocking classification", () => { + test("automated progress notice is not persisted or treated as blocking", () => { appendNotification("Running checks...", "info", "notify", { kind: "progress", source: "workflow", }); const entries = readNotifications(tmpDir); - assert.equal(entries.length, 1); - // The notice is stored, but kind=progress means headless will not treat it as blocked. - assert.equal(entries[0].metadata?.kind, "progress"); + assert.equal(entries.length, 0); // Confirm headless classification: this event should NOT be blocked const fakeEvent = notifyEvent("Running checks...", { kind: "progress", diff --git a/src/resources/extensions/sf/tests/notification-store.test.ts b/src/resources/extensions/sf/tests/notification-store.test.ts index ac740c50c..e655890d8 100644 --- a/src/resources/extensions/sf/tests/notification-store.test.ts +++ b/src/resources/extensions/sf/tests/notification-store.test.ts @@ -11,7 +11,7 @@ import { } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; -import { afterEach, beforeEach, describe, test, vi } from 'vitest'; +import { afterEach, beforeEach, describe, test, vi } from "vitest"; import { _resetNotificationStore, diff --git a/src/resources/extensions/sf/tests/workflow-mcp.test.ts b/src/resources/extensions/sf/tests/workflow-mcp.test.ts index 50293040a..97090542e 100644 --- a/src/resources/extensions/sf/tests/workflow-mcp.test.ts +++ b/src/resources/extensions/sf/tests/workflow-mcp.test.ts @@ -532,7 +532,7 @@ test("workflow MCP ask_user_questions uses stdio elicitation round-trip", async }, }, undefined, - { timeout: 60_000 }, + { timeout: 120_000 }, ); assert.ok( diff --git a/src/resources/extensions/sf/workflow-logger.ts b/src/resources/extensions/sf/workflow-logger.ts index e7686a9e8..340892849 100644 --- a/src/resources/extensions/sf/workflow-logger.ts +++ b/src/resources/extensions/sf/workflow-logger.ts @@ -299,17 +299,18 @@ function _push( const ctxStr = context ? ` ${JSON.stringify(context)}` : ""; _writeStderr(`[sf:${component}] ${prefix}: ${message}${ctxStr}\n`); - // Persist to notification store (both warnings and errors) - try { - appendNotification( - `[${component}] ${message}`, - severity === "error" ? "error" : "warning", - "workflow-logger", - ); - } catch (notifErr) { - _writeStderr( - `[sf:workflow-logger] notification-store append failed: ${(notifErr as Error).message}\n`, - ); + if (severity === "error") { + try { + appendNotification( + `[${component}] ${message}`, + "error", + "workflow-logger", + ); + } catch (notifErr) { + _writeStderr( + `[sf:workflow-logger] notification-store append failed: ${(notifErr as Error).message}\n`, + ); + } } // Buffer for auto-loop to drain diff --git a/src/tests/integration/e2e-smoke.test.ts b/src/tests/integration/e2e-smoke.test.ts index a7aa02828..4b2b49e3f 100644 --- a/src/tests/integration/e2e-smoke.test.ts +++ b/src/tests/integration/e2e-smoke.test.ts @@ -19,7 +19,7 @@ import { execFileSync, spawn } from "node:child_process"; import { existsSync, mkdirSync, mkdtempSync, rmSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; -import { test, afterEach } from 'vitest'; +import { afterEach, test } from "vitest"; const projectRoot = process.cwd(); const loaderPath = join(projectRoot, "dist", "loader.js"); @@ -43,13 +43,13 @@ type RunResult = { * Spawn `node dist/loader.js ...args` and collect output. * * @param args CLI arguments to pass after the script path - * @param timeoutMs Maximum time to wait before SIGTERM (default 8 s) + * @param timeoutMs Maximum time to wait before SIGTERM (default 15 s) * @param env Additional / override environment variables * @param cwd Working directory for the child process (default: projectRoot) */ function runSf( args: string[], - timeoutMs = 8_000, + timeoutMs = 15_000, env: NodeJS.ProcessEnv = {}, cwd: string = projectRoot, ): Promise { @@ -88,7 +88,6 @@ function runSf( /** Strip ANSI escape codes from a string. */ function stripAnsi(s: string): string { - // biome-ignore lint/suspicious/noControlCharactersInRegex: ANSI escape sequence return s.replace(/\x1b\[[0-9;]*[A-Za-z]/g, ""); } @@ -426,7 +425,7 @@ test("sf -h is equivalent to --help", async () => { // 13. sf headless without .sf/ directory exits 1 with clean error // --------------------------------------------------------------------------- -test("sf headless without .sf/ directory exits 1 with clean error", async (t) => { +test("sf headless without .sf/ directory exits 1 with clean error", async () => { const tmpDir = mkdtempSync(join(tmpdir(), "sf-e2e-no-sf-")); afterEach(() => { @@ -451,19 +450,14 @@ test("sf headless without .sf/ directory exits 1 with clean error", async (t) => // 14. sf headless new-milestone without --context exits 1 // --------------------------------------------------------------------------- -test("sf headless new-milestone without --context exits 1", async (t) => { +test("sf headless new-milestone without --context exits 1", async () => { const tmpDir = mkdtempSync(join(tmpdir(), "sf-e2e-no-ctx-")); afterEach(() => { rmSync(tmpDir, { recursive: true, force: true }); }); - const result = await runSf( - ["headless", "new-milestone"], - 10_000, - {}, - tmpDir, - ); + const result = await runSf(["headless", "new-milestone"], 10_000, {}, tmpDir); assert.ok(!result.timedOut, "process should not hang"); assert.strictEqual(result.code, 1, `expected exit 1, got ${result.code}`); @@ -481,7 +475,7 @@ test("sf headless new-milestone without --context exits 1", async (t) => { // 15. sf headless --timeout with invalid value exits 1 // --------------------------------------------------------------------------- -test("sf headless --timeout with invalid value exits 1", async (t) => { +test("sf headless --timeout with invalid value exits 1", async () => { const tmpDir = mkdtempSync(join(tmpdir(), "sf-e2e-bad-timeout-")); afterEach(() => { @@ -511,7 +505,7 @@ test("sf headless --timeout with invalid value exits 1", async (t) => { // 16. sf headless --timeout with negative value exits 1 // --------------------------------------------------------------------------- -test("sf headless --timeout with negative value exits 1", async (t) => { +test("sf headless --timeout with negative value exits 1", async () => { const tmpDir = mkdtempSync(join(tmpdir(), "sf-e2e-neg-timeout-")); afterEach(() => { @@ -537,7 +531,7 @@ test("sf headless --timeout with negative value exits 1", async (t) => { assertNoCrashMarkers(combined); }); -test("sf headless query returns JSON from the built CLI", async (t) => { +test("sf headless query returns JSON from the built CLI", async () => { const tmpDir = createTempGitRepo("sf-e2e-query-"); afterEach(() => { @@ -565,7 +559,7 @@ test("sf headless query returns JSON from the built CLI", async (t) => { ); }); -test("sf worktree list loads the built worktree CLI without module errors", async (t) => { +test("sf worktree list loads the built worktree CLI without module errors", async () => { const tmpDir = createTempGitRepo("sf-e2e-worktree-"); afterEach(() => { diff --git a/src/tests/integration/pack-install.test.ts b/src/tests/integration/pack-install.test.ts index edde0904e..699ef4b54 100644 --- a/src/tests/integration/pack-install.test.ts +++ b/src/tests/integration/pack-install.test.ts @@ -21,11 +21,14 @@ import { writeFileSync, } from "node:fs"; import { tmpdir } from "node:os"; -import { join } from "node:path"; -import { test, afterEach } from 'vitest'; +import { delimiter, dirname, join } from "node:path"; import { createGunzip } from "node:zlib"; +import { afterEach, test } from "vitest"; const projectRoot = process.cwd(); +const packageName = JSON.parse( + readFileSync(join(projectRoot, "package.json"), "utf-8"), +).name as string; if (!existsSync(join(projectRoot, "dist"))) { throw new Error("dist/ not found — run: npm run build"); @@ -50,6 +53,9 @@ function createNpmSandbox(prefix: string): NpmSandbox { installPrefix, env: { ...process.env, + PATH: [dirname(process.execPath), process.env.PATH] + .filter(Boolean) + .join(delimiter), NPM_CONFIG_CACHE: cacheDir, npm_config_cache: cacheDir, PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: "1", @@ -62,6 +68,8 @@ function buildQuietNpmEnv(sandbox: NpmSandbox): NodeJS.ProcessEnv { ...sandbox.env, NPM_CONFIG_LOGLEVEL: "error", npm_config_loglevel: "error", + NPM_CONFIG_ENGINE_STRICT: "false", + npm_config_engine_strict: "false", NPM_CONFIG_FUND: "false", npm_config_fund: "false", NPM_CONFIG_AUDIT: "false", @@ -141,7 +149,7 @@ function listTarEntries(tarballPath: string): Promise { // 1. npm pack produces valid tarball with correct file layout // ═══════════════════════════════════════════════════════════════════════════ -test("npm pack produces tarball with required files", async (t) => { +test("npm pack produces tarball with required files", async () => { const sandbox = createNpmSandbox("sf-pack-test-"); const tarballPath = packTarball(sandbox); @@ -204,13 +212,13 @@ test("npm pack produces tarball with required files", async (t) => { ".sf", "pkg/package.json piConfig.configDir is .sf", ); -}); +}, 240_000); // ═══════════════════════════════════════════════════════════════════════════ // 2. npm pack → install → sf binary resolves // ═══════════════════════════════════════════════════════════════════════════ -test("tarball installs and sf binary resolves", async (t) => { +test("tarball installs and sf binary resolves", async () => { const sandbox = createNpmSandbox("sf-install-test-"); const tarballPath = packTarball(sandbox); @@ -242,7 +250,7 @@ test("tarball installs and sf binary resolves", async (t) => { const installedLoader = join( sandbox.installPrefix, "node_modules", - "sf-run", + packageName, "dist", "loader.js", ); @@ -258,7 +266,7 @@ test("tarball installs and sf binary resolves", async (t) => { const installedSfExt = join( sandbox.installPrefix, "node_modules", - "sf-run", + packageName, "src", "resources", "extensions", @@ -269,7 +277,7 @@ test("tarball installs and sf binary resolves", async (t) => { existsSync(installedSfExt), "bundled sf extension present in installed package", ); -}); +}, 420_000); // ═══════════════════════════════════════════════════════════════════════════ // 3. Launch → extensions load → no errors on stderr @@ -329,7 +337,7 @@ test("sf launches and loads extensions without errors", async () => { ); }); -test("sf exits early with a clear message when synced resources are newer than the binary", async (t) => { +test("sf exits early with a clear message when synced resources are newer than the binary", async () => { const fakeHome = mkdtempSync(join(tmpdir(), "sf-version-skew-")); const fakeAgentDir = join(fakeHome, ".sf", "agent"); mkdirSync(fakeAgentDir, { recursive: true });