fix(sf): stabilize auto notices and package checks

This commit is contained in:
Mikael Hugo 2026-05-02 12:39:27 +02:00
parent ed2c4af729
commit 85a0188fe1
20 changed files with 697 additions and 178 deletions

View file

@ -2,6 +2,8 @@
actionable: true
kind: design-research
date: 2026-05-02
promoted: true
promoted_to: M012
---
# PDD v2 — Research Findings

48
package-lock.json generated
View file

@ -14,13 +14,13 @@
"studio"
],
"dependencies": {
"@anthropic-ai/sdk": "^0.73.0",
"@anthropic-ai/sdk": "^0.92.0",
"@anthropic-ai/vertex-sdk": "^0.14.4",
"@aws-sdk/client-bedrock-runtime": "^3.983.0",
"@clack/prompts": "^1.1.0",
"@google/genai": "^1.40.0",
"@mariozechner/jiti": "^2.6.2",
"@mistralai/mistralai": "^1.14.1",
"@mistralai/mistralai": "^2.2.1",
"@modelcontextprotocol/sdk": "^1.27.1",
"@octokit/rest": "^22.0.1",
"@silvia-odwyer/photon-node": "^0.3.4",
@ -72,6 +72,7 @@
"esbuild": "^0.27.4",
"jiti": "^2.6.1",
"typescript": "^5.4.0",
"typescript-language-server": "^5.1.3",
"vitest": "^4.1.5"
},
"engines": {
@ -155,9 +156,9 @@
}
},
"node_modules/@anthropic-ai/sdk": {
"version": "0.73.0",
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.73.0.tgz",
"integrity": "sha512-URURVzhxXGJDGUGFunIOtBlSl7KWvZiAAKY/ttTkZAkXT9bTPqdk2eK0b8qqSxXpikh3QKPnPYpiyX98zf5ebw==",
"version": "0.92.0",
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.92.0.tgz",
"integrity": "sha512-l653JFC83wCglH8H83t1xpgDurCyPyslYW1maPRdCsfuNuGbLvQjQ81sWd3Go3LWRm0jNspzAhuqAYV8r9joSw==",
"license": "MIT",
"dependencies": {
"json-schema-to-ts": "^3.1.1"
@ -3915,13 +3916,14 @@
}
},
"node_modules/@mistralai/mistralai": {
"version": "1.14.1",
"resolved": "https://registry.npmjs.org/@mistralai/mistralai/-/mistralai-1.14.1.tgz",
"integrity": "sha512-IiLmmZFCCTReQgPAT33r7KQ1nYo5JPdvGkrkZqA8qQ2qB1GHgs5LoP5K2ICyrjnpw2n8oSxMM/VP+liiKcGNlQ==",
"version": "2.2.1",
"resolved": "https://registry.npmjs.org/@mistralai/mistralai/-/mistralai-2.2.1.tgz",
"integrity": "sha512-uKU8CZmL2RzYKmplsU01hii4p3pe4HqJefpWNRWXm1Tcm0Sm4xXfwSLIy4k7ZCPlbETCGcp69E7hZs+WOJ5itQ==",
"license": "Apache-2.0",
"dependencies": {
"ws": "^8.18.0",
"zod": "^3.25.0 || ^4.0.0",
"zod-to-json-schema": "^3.24.1"
"zod-to-json-schema": "^3.25.0"
}
},
"node_modules/@modelcontextprotocol/sdk": {
@ -15498,6 +15500,19 @@
"node": ">=14.17"
}
},
"node_modules/typescript-language-server": {
"version": "5.1.3",
"resolved": "https://registry.npmjs.org/typescript-language-server/-/typescript-language-server-5.1.3.tgz",
"integrity": "sha512-r+pAcYtWdN8tKlYZPwiiHNA2QPjXnI02NrW5Sf2cVM3TRtuQ3V9EKKwOxqwaQ0krsaEXk/CbN90I5erBuf84Vg==",
"dev": true,
"license": "Apache-2.0",
"bin": {
"typescript-language-server": "lib/cli.mjs"
},
"engines": {
"node": ">=20"
}
},
"node_modules/uint8array-extras": {
"version": "1.5.0",
"resolved": "https://registry.npmjs.org/uint8array-extras/-/uint8array-extras-1.5.0.tgz",
@ -16305,7 +16320,7 @@
"version": "2.75.0",
"license": "MIT",
"dependencies": {
"@anthropic-ai/sdk": "^0.52.0",
"@anthropic-ai/sdk": "^0.92.0",
"@singularity-forge/rpc-client": "^2.75.0",
"discord.js": "^14.25.1",
"yaml": "^2.8.0",
@ -16322,15 +16337,6 @@
"node": ">=24.15.0"
}
},
"packages/daemon/node_modules/@anthropic-ai/sdk": {
"version": "0.52.0",
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.52.0.tgz",
"integrity": "sha512-d4c+fg+xy9e46c8+YnrrgIQR45CZlAi7PwdzIfDXDM6ACxEZli1/fxhURsq30ZpMZy6LvSkr41jGq5aF5TD7rQ==",
"license": "MIT",
"bin": {
"anthropic-ai-sdk": "bin/cli"
}
},
"packages/daemon/node_modules/zod": {
"version": "3.25.76",
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
@ -16387,12 +16393,12 @@
"name": "@singularity-forge/pi-ai",
"version": "2.75.0",
"dependencies": {
"@anthropic-ai/sdk": "^0.73.0",
"@anthropic-ai/sdk": "^0.92.0",
"@anthropic-ai/vertex-sdk": "^0.14.4",
"@aws-sdk/client-bedrock-runtime": "^3.983.0",
"@google/gemini-cli-core": "0.38.2",
"@google/genai": "^1.40.0",
"@mistralai/mistralai": "^1.14.1",
"@mistralai/mistralai": "^2.2.1",
"@sinclair/typebox": "^0.34.41",
"ajv": "^8.17.1",
"ajv-formats": "^3.0.1",

View file

@ -102,13 +102,13 @@
"test:live-regression": "node --experimental-strip-types tests/live-regression/run.ts"
},
"dependencies": {
"@anthropic-ai/sdk": "^0.73.0",
"@anthropic-ai/sdk": "^0.92.0",
"@anthropic-ai/vertex-sdk": "^0.14.4",
"@aws-sdk/client-bedrock-runtime": "^3.983.0",
"@clack/prompts": "^1.1.0",
"@google/genai": "^1.40.0",
"@mariozechner/jiti": "^2.6.2",
"@mistralai/mistralai": "^1.14.1",
"@mistralai/mistralai": "^2.2.1",
"@modelcontextprotocol/sdk": "^1.27.1",
"@octokit/rest": "^22.0.1",
"@silvia-odwyer/photon-node": "^0.3.4",
@ -156,6 +156,7 @@
"esbuild": "^0.27.4",
"jiti": "^2.6.1",
"typescript": "^5.4.0",
"typescript-language-server": "^5.1.3",
"vitest": "^4.1.5"
},
"optionalDependencies": {

View file

@ -28,7 +28,7 @@
"test": "node --test dist/daemon.test.js"
},
"dependencies": {
"@anthropic-ai/sdk": "^0.52.0",
"@anthropic-ai/sdk": "^0.92.0",
"@singularity-forge/rpc-client": "^2.75.0",
"discord.js": "^14.25.1",
"yaml": "^2.8.0",

View file

@ -23,12 +23,12 @@
"build": "tsc -p tsconfig.json"
},
"dependencies": {
"@anthropic-ai/sdk": "^0.73.0",
"@anthropic-ai/sdk": "^0.92.0",
"@anthropic-ai/vertex-sdk": "^0.14.4",
"@aws-sdk/client-bedrock-runtime": "^3.983.0",
"@google/gemini-cli-core": "0.38.2",
"@google/genai": "^1.40.0",
"@mistralai/mistralai": "^1.14.1",
"@mistralai/mistralai": "^2.2.1",
"@sinclair/typebox": "^0.34.41",
"ajv": "^8.17.1",
"ajv-formats": "^3.0.1",

View file

@ -4,7 +4,7 @@ import type { Mistral } from "@mistralai/mistralai";
import type { RequestOptions } from "@mistralai/mistralai/lib/sdks.js";
import type {
ChatCompletionStreamRequest,
ChatCompletionStreamRequestMessages,
ChatCompletionStreamRequestMessage,
CompletionEvent,
ContentChunk,
FunctionTool,
@ -464,8 +464,8 @@ function toFunctionTools(tools: Tool[]): Array<FunctionTool & { type: "function"
}));
}
function toChatMessages(messages: Message[], supportsImages: boolean): ChatCompletionStreamRequestMessages[] {
const result: ChatCompletionStreamRequestMessages[] = [];
function toChatMessages(messages: Message[], supportsImages: boolean): ChatCompletionStreamRequestMessage[] {
const result: ChatCompletionStreamRequestMessage[] = [];
for (const msg of messages) {
if (msg.role === "user") {
@ -520,7 +520,7 @@ function toChatMessages(messages: Message[], supportsImages: boolean): ChatCompl
});
}
const assistantMessage: ChatCompletionStreamRequestMessages = { role: "assistant" };
const assistantMessage: ChatCompletionStreamRequestMessage = { role: "assistant" };
if (contentParts.length > 0) assistantMessage.content = contentParts;
if (toolCalls.length > 0) assistantMessage.toolCalls = toolCalls;
if (contentParts.length > 0 || toolCalls.length > 0) result.push(assistantMessage);

View file

@ -8,12 +8,24 @@
* Run: node --experimental-strip-types --test src/core/lsp/lsp-integration.test.ts
* (from packages/pi-coding-agent/)
*/
import { describe, test, beforeAll, afterAll } from 'vitest';
import assert from "node:assert/strict";
import { spawn } from "node:child_process";
import { execSync, spawn } from "node:child_process";
import * as fs from "node:fs";
import * as path from "node:path";
import * as os from "node:os";
import * as path from "node:path";
import { afterAll, beforeAll, describe, test } from "vitest";
function hasTypeScriptLanguageServer(): boolean {
try {
execSync("npx which typescript-language-server", { stdio: "ignore" });
return true;
} catch {
return false;
}
}
const describeOrSkip = hasTypeScriptLanguageServer() ? describe : describe.skip;
// ---------------------------------------------------------------------------
// Helpers — lightweight JSON-RPC over stdio (no dependency on our LSP code)
@ -39,7 +51,9 @@ interface JsonRpcResponse {
error?: { code: number; message: string };
}
function encodeMessage(msg: JsonRpcRequest | JsonRpcNotification | JsonRpcResponse): string {
function encodeMessage(
msg: JsonRpcRequest | JsonRpcNotification | JsonRpcResponse,
): string {
const body = JSON.stringify(msg);
return `Content-Length: ${Buffer.byteLength(body, "utf-8")}\r\n\r\n${body}`;
}
@ -51,7 +65,10 @@ class LspHarness {
private proc;
private nextId = 1;
private buffer = Buffer.alloc(0);
private pending = new Map<number, { resolve: (v: unknown) => void; reject: (e: Error) => void }>();
private pending = new Map<
number,
{ resolve: (v: unknown) => void; reject: (e: Error) => void }
>();
private notifications: Array<{ method: string; params: unknown }> = [];
constructor(command: string, args: string[], cwd: string) {
@ -65,7 +82,7 @@ class LspHarness {
this.drain();
});
this.proc.stderr!.on("data", (chunk: Buffer) => {
this.proc.stderr!.on("data", (_chunk: Buffer) => {
// Swallow stderr (server logs)
});
}
@ -84,16 +101,23 @@ class LspHarness {
const messageEnd = messageStart + contentLength;
if (this.buffer.length < messageEnd) return;
const body = this.buffer.subarray(messageStart, messageEnd).toString("utf-8");
const body = this.buffer
.subarray(messageStart, messageEnd)
.toString("utf-8");
this.buffer = Buffer.from(this.buffer.subarray(messageEnd));
const msg = JSON.parse(body) as JsonRpcResponse & { method?: string; params?: unknown };
const msg = JSON.parse(body) as JsonRpcResponse & {
method?: string;
params?: unknown;
};
if (msg.id !== undefined && this.pending.has(msg.id)) {
const p = this.pending.get(msg.id)!;
this.pending.delete(msg.id);
if (msg.error) {
p.reject(new Error(`LSP error ${msg.error.code}: ${msg.error.message}`));
p.reject(
new Error(`LSP error ${msg.error.code}: ${msg.error.message}`),
);
} else {
p.resolve(msg.result);
}
@ -127,7 +151,11 @@ class LspHarness {
this.proc.stdin!.write(encodeMessage(msg));
}
async request(method: string, params: unknown, timeoutMs = 15000): Promise<unknown> {
async request(
method: string,
params: unknown,
timeoutMs = 15000,
): Promise<unknown> {
const id = this.nextId++;
const msg: JsonRpcRequest = { jsonrpc: "2.0", id, method, params };
this.proc.stdin!.write(encodeMessage(msg));
@ -156,11 +184,27 @@ class LspHarness {
this.proc.stdin!.write(encodeMessage(msg));
}
getNotifications(method?: string): Array<{ method: string; params: unknown }> {
getNotifications(
method?: string,
): Array<{ method: string; params: unknown }> {
if (!method) return this.notifications;
return this.notifications.filter((n) => n.method === method);
}
async waitForNotification(
method: string,
predicate: (notification: { method: string; params: unknown }) => boolean,
timeoutMs = 10_000,
): Promise<{ method: string; params: unknown } | undefined> {
const startedAt = Date.now();
while (Date.now() - startedAt < timeoutMs) {
const found = this.getNotifications(method).find(predicate);
if (found) return found;
await new Promise((resolve) => setTimeout(resolve, 100));
}
return undefined;
}
async shutdown(): Promise<void> {
try {
await this.request("shutdown", null, 5000);
@ -255,7 +299,7 @@ function fileToUri(filePath: string): string {
// Tests
// ---------------------------------------------------------------------------
describe("LSP integration: typescript-language-server", () => {
describeOrSkip("LSP integration: typescript-language-server", () => {
let dir: string;
let cleanup: () => void;
let mainPath: string;
@ -293,8 +337,14 @@ describe("LSP integration: typescript-language-server", () => {
assert.ok(result, "initialize should return a result");
assert.ok(result.capabilities, "result should have capabilities");
assert.ok(result.capabilities.hoverProvider !== undefined, "should support hover");
assert.ok(result.capabilities.definitionProvider !== undefined, "should support definition");
assert.ok(
result.capabilities.hoverProvider !== undefined,
"should support hover",
);
assert.ok(
result.capabilities.definitionProvider !== undefined,
"should support definition",
);
lsp.notify("initialized", {});
@ -303,10 +353,20 @@ describe("LSP integration: typescript-language-server", () => {
const mathContent = fs.readFileSync(mathPath, "utf-8");
lsp.notify("textDocument/didOpen", {
textDocument: { uri: mainUri, languageId: "typescript", version: 1, text: mainContent },
textDocument: {
uri: mainUri,
languageId: "typescript",
version: 1,
text: mainContent,
},
});
lsp.notify("textDocument/didOpen", {
textDocument: { uri: mathUri, languageId: "typescript", version: 1, text: mathContent },
textDocument: {
uri: mathUri,
languageId: "typescript",
version: 1,
text: mathContent,
},
});
// Give the server time to index
@ -352,7 +412,10 @@ describe("LSP integration: typescript-language-server", () => {
// Response can be Location (uri) or LocationLink (targetUri)
const loc = locations[0] as Record<string, unknown>;
const uri = (loc.uri ?? loc.targetUri) as string;
assert.ok(uri, `definition should have uri or targetUri, got keys: ${Object.keys(loc).join(", ")}`);
assert.ok(
uri,
`definition should have uri or targetUri, got keys: ${Object.keys(loc).join(", ")}`,
);
assert.ok(
uri.includes("math.ts"),
`definition should point to math.ts, got: ${uri}`,
@ -368,7 +431,10 @@ describe("LSP integration: typescript-language-server", () => {
})) as Array<{ uri: string; range: unknown }> | null;
assert.ok(result, "references should return a result");
assert.ok(result.length >= 2, `should find at least 2 references (decl + usage), got ${result.length}`);
assert.ok(
result.length >= 2,
`should find at least 2 references (decl + usage), got ${result.length}`,
);
});
// ---- Document Symbols ----
@ -378,27 +444,47 @@ describe("LSP integration: typescript-language-server", () => {
})) as Array<{ name: string; kind: number }> | null;
assert.ok(result, "documentSymbol should return a result");
assert.ok(result.length >= 2, `should find at least 2 symbols, got ${result.length}`);
assert.ok(
result.length >= 2,
`should find at least 2 symbols, got ${result.length}`,
);
const names = result.map((s) => s.name);
assert.ok(names.includes("add"), `symbols should include 'add', got: ${names.join(", ")}`);
assert.ok(names.includes("subtract"), `symbols should include 'subtract', got: ${names.join(", ")}`);
assert.ok(
names.includes("add"),
`symbols should include 'add', got: ${names.join(", ")}`,
);
assert.ok(
names.includes("subtract"),
`symbols should include 'subtract', got: ${names.join(", ")}`,
);
});
// ---- Diagnostics (published via notification) ----
test("diagnostics for type error", async () => {
// Wait a bit more for diagnostics to arrive
await new Promise((r) => setTimeout(r, 2000));
const mainContent = fs.readFileSync(mainPath, "utf-8");
lsp.notify("textDocument/didChange", {
textDocument: { uri: mainUri, version: 2 },
contentChanges: [{ text: mainContent }],
});
const diagNotifications = lsp.getNotifications("textDocument/publishDiagnostics");
const mainDiags = diagNotifications.filter(
(n) => (n.params as { uri: string }).uri === mainUri,
const mainDiagNotification = await lsp.waitForNotification(
"textDocument/publishDiagnostics",
(n) => {
const params = n.params as {
uri: string;
diagnostics?: Array<{ message: string; range: unknown }>;
};
return params.uri === mainUri && (params.diagnostics?.length ?? 0) > 0;
},
);
assert.ok(mainDiags.length > 0, "should receive diagnostics for main.ts");
assert.ok(mainDiagNotification, "should receive diagnostics for main.ts");
const lastDiag = mainDiags[mainDiags.length - 1];
const diagnostics = (lastDiag.params as { diagnostics: Array<{ message: string; range: unknown }> })
.diagnostics;
const diagnostics = (
mainDiagNotification.params as {
diagnostics: Array<{ message: string; range: unknown }>;
}
).diagnostics;
// Should catch the type error: string assigned to number
const typeError = diagnostics.find(

View file

@ -7,6 +7,7 @@ import {
symlinkSync,
} from "node:fs";
import { delimiter, join, relative, resolve } from "node:path";
// SF Startup Loader
// Copyright (c) 2026 Singularity Forge
@ -68,6 +69,18 @@ if (firstArg === "--help" || firstArg === "-h") {
process.exit(0);
}
if (
firstArg &&
firstArg !== "--" &&
args.slice(1).some((arg) => arg === "--help" || arg === "-h")
) {
const { printHelp, printSubcommandHelp } = await import("./help-text.js");
if (!printSubcommandHelp(firstArg, sfVersion)) {
printHelp(sfVersion);
}
process.exit(0);
}
// Fast-path invalid headless invocations before importing cli.ts. These paths
// are commonly used by smoke tests and orchestrators; they should return a
// clear diagnostic without paying extension/resource startup cost.

View file

@ -12,7 +12,22 @@
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { dirname, resolve } from "node:path";
import { after, afterAll, before, beforeAll, describe, it } from 'vitest';
import { afterAll, beforeAll, describe, it } from "vitest";
// Skip the entire suite if Playwright Chromium cannot launch (missing system
// libraries or browser binaries in this environment).
let canLaunchChromium = false;
try {
const { chromium } = await import("playwright");
const testBrowser = await chromium.launch({ headless: true });
await testBrowser.close();
canLaunchChromium = true;
} catch {
canLaunchChromium = false;
}
const describeOrSkip = canLaunchChromium ? describe : describe.skip;
import { fileURLToPath } from "node:url";
import { chromium } from "playwright";
@ -132,7 +147,7 @@ async function injectHelpers() {
// 1. window.__pi utility tests
// =========================================================================
describe("window.__pi utilities", () => {
describeOrSkip("window.__pi utilities", () => {
it("simpleHash — deterministic output for same input", async () => {
await page.setContent("<p>test</p>");
await injectHelpers();
@ -408,7 +423,7 @@ describe("window.__pi utilities", () => {
// 2. Intent scoring tests
// =========================================================================
describe("intent scoring", () => {
describeOrSkip("intent scoring", () => {
it("submit_form — submit button inside form scores higher than outside", async () => {
await page.setContent(`
<form>
@ -585,7 +600,7 @@ describe("intent scoring", () => {
// 3. Form analysis tests
// =========================================================================
describe("form analysis", () => {
describeOrSkip("form analysis", () => {
const COMPLEX_FORM = `
<form id="testform" action="/submit">
<!-- label[for] association -->

View file

@ -17,7 +17,6 @@ import type {
} from "@singularity-forge/pi-coding-agent";
import { detectAbandonMilestone } from "./abandon-detect.js";
import type { AutoSession, SidecarItem } from "./auto/session.js";
import { isDeterministicPolicyError } from "./auto-tool-tracking.js";
import { resolveExpectedArtifactPath as resolveArtifactForContent } from "./auto-artifact-paths.js";
import {
diagnoseExpectedArtifact,
@ -25,6 +24,7 @@ import {
verifyExpectedArtifact,
writeBlockerPlaceholder,
} from "./auto-recovery.js";
import { isDeterministicPolicyError } from "./auto-tool-tracking.js";
import { type CloseoutOptions, closeoutUnit } from "./auto-unit-closeout.js";
import { runSafely } from "./auto-utils.js";
import { syncStateToProjectRoot } from "./auto-worktree.js";
@ -67,13 +67,16 @@ import {
} from "./pre-execution-checks.js";
import { loadEffectiveSFPreferences } from "./preferences.js";
import { loadPrompt } from "./prompt-loader.js";
import { recordSelfFeedback } from "./self-feedback.js";
// crossReferenceEvidence available for future use when verification_evidence is stored in DB
// import { crossReferenceEvidence, type ClaimedEvidence } from "./safety/evidence-cross-ref.js";
import { validateContent } from "./safety/content-validator.js";
import { clearEvidenceFromDisk, getEvidence } from "./safety/evidence-collector.js";
import {
clearEvidenceFromDisk,
getEvidence,
} from "./safety/evidence-collector.js";
import { validateFileChanges } from "./safety/file-change-validator.js";
import { resolveSafetyHarnessConfig } from "./safety/safety-harness.js";
import { recordSelfFeedback } from "./self-feedback.js";
import { consumeSignal } from "./session-status-io.js";
import {
_getAdapter,
@ -87,10 +90,10 @@ import {
} from "./sf-db.js";
import { deriveState } from "./state.js";
import { parseUnitId } from "./unit-id.js";
import { isAwaitingUserInput } from "./user-input-boundary.js";
import { resolveUokFlags } from "./uok/flags.js";
import { UokGateRunner } from "./uok/gate-runner.js";
import { writeTurnGitTransaction } from "./uok/gitops.js";
import { isAwaitingUserInput } from "./user-input-boundary.js";
import { writePreExecutionEvidence } from "./verification-evidence.js";
import { logError, logWarning } from "./workflow-logger.js";
import { regenerateIfMissing } from "./workflow-projections.js";
@ -1073,6 +1076,11 @@ export async function postUnitPreVerification(
ctx.ui.notify(
`Safety: ${warnings.length} unexpected file change(s) outside task plan`,
"warning",
{
kind: "progress",
source: "safety",
dedupe_key: `safety:file-change:${s.currentUnit.id}`,
},
);
}
}
@ -1113,6 +1121,11 @@ export async function postUnitPreVerification(
ctx.ui.notify(
`Safety: task ${sTid} has verification commands but no bash calls were recorded`,
"warning",
{
kind: "progress",
source: "safety",
dedupe_key: `safety:evidence:${s.currentUnit.id}`,
},
);
}
}
@ -1138,7 +1151,11 @@ export async function postUnitPreVerification(
);
for (const v of contentViolations) {
logWarning("safety", `content: ${v.reason}`);
ctx.ui.notify(`Content validation: ${v.reason}`, "warning");
ctx.ui.notify(`Content validation: ${v.reason}`, "warning", {
kind: "progress",
source: "safety",
dedupe_key: `safety:content:${s.currentUnit.id}:${v.reason}`,
});
}
} catch (e) {
debugLog("postUnit", {
@ -1285,7 +1302,12 @@ export async function postUnitPreVerification(
s.lastToolInvocationError = null;
s.pendingVerificationRetry = null;
s.verificationRetryCount.delete(retryKey);
writeBlockerPlaceholder(s.currentUnit.type, s.currentUnit.id, s.basePath, reason);
writeBlockerPlaceholder(
s.currentUnit.type,
s.currentUnit.id,
s.basePath,
reason,
);
ctx.ui.notify(
`${s.currentUnit.type} ${s.currentUnit.id} — deterministic policy rejection, wrote blocker placeholder (no retries) (#4973)`,
"warning",

View file

@ -21,13 +21,11 @@ import {
import { atomicWriteSync } from "../atomic-write.js";
import { resetCompletionNudgeState } from "../auto-completion-nudge.js";
import {
USER_DRIVEN_DEEP_UNITS,
isAwaitingUserInput,
type PostUnitContext,
type PreVerificationOpts,
USER_DRIVEN_DEEP_UNITS,
} from "../auto-post-unit.js";
import { pauseAutoForProviderError } from "../provider-error-pause.js";
import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.js";
import {
buildLoopRemediationSteps,
diagnoseExpectedArtifact,
@ -43,23 +41,23 @@ import {
formatToolCallSummary,
resetToolCallCounts,
} from "../auto-tool-tracking.js";
import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.js";
import { debugLog } from "../debug-logger.js";
import { PROJECT_FILES } from "../detection.js";
import { MergeConflictError } from "../git-service.js";
import { recordLearnedOutcome } from "../learning/runtime.js";
import {
resolveMilestoneFile,
resolveSliceFile,
sfRoot,
} from "../paths.js";
import { resolveMilestoneFile, resolveSliceFile, sfRoot } from "../paths.js";
import { resolvePersistModelChanges } from "../preferences.js";
import {
approveProductionMutationWithLlmPolicy,
ensureProductionMutationApprovalTemplate,
readProductionMutationApprovalStatus,
} from "../production-mutation-approval.js";
import { loadEvidenceFromDisk, resetEvidence } from "../safety/evidence-collector.js";
import { parseUnitId } from "../unit-id.js";
import { pauseAutoForProviderError } from "../provider-error-pause.js";
import {
loadEvidenceFromDisk,
resetEvidence,
} from "../safety/evidence-collector.js";
import { getDirtyFiles } from "../safety/file-change-validator.js";
import {
cleanupCheckpoint,
@ -67,10 +65,20 @@ import {
rollbackToCheckpoint,
} from "../safety/git-checkpoint.js";
import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
import { getMilestoneSlices, getSliceTaskCounts, getTask, isDbAvailable } from "../sf-db.js";
import {
getMilestoneSlices,
getSliceTaskCounts,
getTask,
isDbAvailable,
} from "../sf-db.js";
import { getEligibleSlices } from "../slice-parallel-eligibility.js";
import { startSliceParallel } from "../slice-parallel-orchestrator.js";
import {
handleProductAudit,
type ProductAuditParams,
} from "../tools/product-audit-tool.js";
import type { Phase } from "../types.js";
import { parseUnitId } from "../unit-id.js";
import { writeUnitRuntimeRecord } from "../unit-runtime.js";
import { resolveUokFlags } from "../uok/flags.js";
import { UokGateRunner } from "../uok/gate-runner.js";
@ -88,10 +96,6 @@ import {
logError,
logWarning,
} from "../workflow-logger.js";
import {
handleProductAudit,
type ProductAuditParams,
} from "../tools/product-audit-tool.js";
import {
getRequiredWorkflowToolsForAutoUnit,
getWorkflowTransportSupportError,
@ -596,7 +600,11 @@ export async function runPreDispatch(
// Derive state
let state = await deps.deriveState(s.basePath);
if (uokFlags.planningFlow && isDbAvailable() && shouldRunPlanningFlowGate(state.phase)) {
if (
uokFlags.planningFlow &&
isDbAvailable() &&
shouldRunPlanningFlowGate(state.phase)
) {
let compiled = ensurePlanningFlowGraph(s.basePath, state);
// Empty-graph recovery: stale DB caches can yield 0 nodes right after a
// task-complete write. Invalidate caches, re-derive state, and retry once.
@ -1208,8 +1216,7 @@ export async function runDispatch(
const derivedKey = `${unitType}/${unitId}`;
const hasTransientTaskCompleteFailure =
unitType === "execute-task" &&
!!s.pendingTaskCompleteFailures?.has(unitId);
unitType === "execute-task" && !!s.pendingTaskCompleteFailures?.has(unitId);
if (!s.pendingVerificationRetry && !hasTransientTaskCompleteFailure) {
loopState.recentUnits.push({ key: derivedKey });
@ -1276,7 +1283,7 @@ export async function runDispatch(
(diagnostic?.length ?? 0) > MAX_RECOVERY_CHARS
? diagnostic!.slice(0, MAX_RECOVERY_CHARS) +
"\n\n[...diagnostic truncated]"
: diagnostic ?? null;
: (diagnostic ?? null);
s.pendingRethinkAttempt = JSON.stringify({
attempt,
reason: stuckSignal.reason,
@ -1286,9 +1293,10 @@ export async function runDispatch(
unitType,
unitId,
});
const rt = attempt === 5
? "**FINAL STUCK ATTEMPT — 5 of 5.** "
: `**STUCK RECOVERY ATTEMPT ${attempt - 1} of 4.** `;
const rt =
attempt === 5
? "**FINAL STUCK ATTEMPT — 5 of 5.** "
: `**STUCK RECOVERY ATTEMPT ${attempt - 1} of 4.** `;
ctx.ui.notify(
`${rt}Stuck on ${unitType} ${unitId} (${stuckSignal.reason}). Injecting diagnostic and retrying.`,
"warning",
@ -1677,12 +1685,7 @@ export async function runGuards(
// FailureClass "input" → 0 retries (broken plan needs human fix, not
// an LLM retry). Only fires when uok.gates.enabled is true.
const uokFlagsGuards = resolveUokFlags(prefs);
if (
uokFlagsGuards.gates &&
unitType === "execute-task" &&
mid &&
sliceId
) {
if (uokFlagsGuards.gates && unitType === "execute-task" && mid && sliceId) {
const taskCounts = getSliceTaskCounts(mid, sliceId);
const isFirstTaskForSlice = taskCounts.done === 0;
if (isFirstTaskForSlice) {
@ -1814,7 +1817,9 @@ export async function runUnitPhase(
iterData: IterationData,
loopState: LoopState,
sidecarItem?: SidecarItem,
): Promise<PhaseResult<{ unitStartedAt: number; requestDispatchedAt?: number }>> {
): Promise<
PhaseResult<{ unitStartedAt: number; requestDispatchedAt?: number }>
> {
const { ctx, pi, s, deps, prefs } = ic;
const { unitType, unitId, prompt, state, mid } = iterData;
@ -2074,7 +2079,10 @@ export async function runUnitPhase(
lines.push("", `**Suggested remediation:**\n${rethinkCtx.remediation}`);
}
if (rethinkCtx.diagnostic) {
lines.push("", `**Full diagnostic from previous attempt:**\n${rethinkCtx.diagnostic}`);
lines.push(
"",
`**Full diagnostic from previous attempt:**\n${rethinkCtx.diagnostic}`,
);
}
lines.push("", "---", "", finalPrompt);
finalPrompt = lines.join("\n");
@ -2320,13 +2328,16 @@ export async function runUnitPhase(
) {
// Session-timeout cancellations are resumable pauses: pauseAuto below preserves the auto session
// instead of routing the cancelled unit into the hard-stop path.
const isSessionCreationTimeout = unitResult.errorContext.message?.includes("Session creation timed out");
const isSessionCreationTimeout =
unitResult.errorContext.message?.includes("Session creation timed out");
if (isSessionCreationTimeout) {
consecutiveSessionTimeouts += 1;
const baseRetryAfterMs = 30_000;
const retryAfterMs = baseRetryAfterMs * 2 ** Math.max(0, consecutiveSessionTimeouts - 1);
const allowAutoResume = consecutiveSessionTimeouts <= MAX_SESSION_TIMEOUT_AUTO_RESUMES;
const retryAfterMs =
baseRetryAfterMs * 2 ** Math.max(0, consecutiveSessionTimeouts - 1);
const allowAutoResume =
consecutiveSessionTimeouts <= MAX_SESSION_TIMEOUT_AUTO_RESUMES;
if (!allowAutoResume) {
ctx.ui.notify(
@ -2356,7 +2367,8 @@ export async function runUnitPhase(
resume: allowAutoResume
? () => {
void resumeAutoAfterProviderDelay(pi, ctx).catch((err) => {
const message = err instanceof Error ? err.message : String(err);
const message =
err instanceof Error ? err.message : String(err);
ctx.ui.notify(
`Session timeout recovery failed: ${message}`,
"error",
@ -2369,7 +2381,13 @@ export async function runUnitPhase(
if (!allowAutoResume) {
resetConsecutiveSessionTimeouts();
}
await emitCancelledUnitEnd(ic, unitType, unitId, unitStartSeq, unitResult.errorContext);
await emitCancelledUnitEnd(
ic,
unitType,
unitId,
unitStartSeq,
unitResult.errorContext,
);
return { action: "break", reason: "session-timeout" };
}
@ -2378,7 +2396,11 @@ export async function runUnitPhase(
`Unit timed out for ${unitType} ${unitId} (supervision may have failed). Pausing auto-mode.`,
"warning",
);
debugLog("autoLoop", { phase: "unit-hard-timeout-pause", unitType, unitId });
debugLog("autoLoop", {
phase: "unit-hard-timeout-pause",
unitType,
unitId,
});
await deps.pauseAuto(ctx, pi);
await emitCancelledUnitEnd(
ic,
@ -2468,7 +2490,10 @@ export async function runUnitPhase(
u.startedAt === s.currentUnit?.startedAt,
);
if (lastUnit && lastUnit.toolCalls === 0) {
if (USER_DRIVEN_DEEP_UNITS.has(unitType) && isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined)) {
if (
USER_DRIVEN_DEEP_UNITS.has(unitType) &&
isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined)
) {
debugLog("runUnitPhase", {
phase: "zero-tool-calls-awaiting-user-input",
unitType,
@ -2500,7 +2525,10 @@ export async function runUnitPhase(
// and re-dispatch this unit.
return {
action: "next",
data: { unitStartedAt: s.currentUnit?.startedAt, requestDispatchedAt: unitResult.requestDispatchedAt },
data: {
unitStartedAt: s.currentUnit?.startedAt,
requestDispatchedAt: unitResult.requestDispatchedAt,
},
};
}
}
@ -2517,7 +2545,10 @@ export async function runUnitPhase(
const skipArtifactVerification = shouldSkipArtifactVerification(unitType);
let artifactVerified: boolean;
if (USER_DRIVEN_DEEP_UNITS.has(unitType) && isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined)) {
if (
USER_DRIVEN_DEEP_UNITS.has(unitType) &&
isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined)
) {
// Skip artifact verification — unit is paused waiting for user input
artifactVerified = false;
} else {
@ -2688,7 +2719,13 @@ export async function runUnitPhase(
}
s.preUnitDirtyFiles = [];
return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt, requestDispatchedAt: unitResult.requestDispatchedAt } };
return {
action: "next",
data: {
unitStartedAt: s.currentUnit?.startedAt,
requestDispatchedAt: unitResult.requestDispatchedAt,
},
};
}
// ─── runFinalize ──────────────────────────────────────────────────────────────
@ -2734,8 +2771,15 @@ export async function runFinalize(
// Sidecar items use lightweight pre-verification opts
const preVerificationOpts: PreVerificationOpts = sidecarItem
? sidecarItem.kind === "hook"
? { skipSettleDelay: true, skipWorktreeSync: true, agentEndMessages: s.lastUnitAgentEndMessages ?? undefined }
: { skipSettleDelay: true, agentEndMessages: s.lastUnitAgentEndMessages ?? undefined }
? {
skipSettleDelay: true,
skipWorktreeSync: true,
agentEndMessages: s.lastUnitAgentEndMessages ?? undefined,
}
: {
skipSettleDelay: true,
agentEndMessages: s.lastUnitAgentEndMessages ?? undefined,
}
: { agentEndMessages: s.lastUnitAgentEndMessages ?? undefined };
const _preUnitSnapshot = s.currentUnit
? {
@ -3079,7 +3123,11 @@ export async function runFinalize(
const severity = logs.some((e) => e.severity === "error")
? "error"
: "warning";
ctx.ui.notify(formatForNotification(logs), severity);
ctx.ui.notify(formatForNotification(logs), severity, {
kind: severity === "error" ? "notice" : "progress",
source: "workflow-logger",
dedupe_key: `workflow-issues:${iterData.unitType}:${iterData.unitId}`,
});
}
}

View file

@ -83,12 +83,15 @@ export const MODEL_CAPABILITY_TIER: Record<string, ComplexityTier> = {
"gpt-5.3-codex-spark": "light",
"gemini-2.0-flash": "light",
"gemini-flash-2.0": "light",
"gemini-3.1-flash-lite-preview": "light",
"gemini-2.5-flash-lite": "light",
"glm-4.7-flash": "light",
"glm-4.7-flashx": "light",
"ministral-3b-latest": "light",
"ministral-8b-latest": "light",
"devstral-small-2505": "light",
"devstral-small-2507": "light",
"labs-devstral-small-2512": "light",
// Standard-tier models
"claude-sonnet-4-6": "standard",
@ -98,8 +101,16 @@ export const MODEL_CAPABILITY_TIER: Record<string, ComplexityTier> = {
"gpt-4.1": "standard",
"gpt-5.1-codex-max": "standard",
"gemini-2.5-pro": "standard",
"gemini-3-flash-preview": "standard",
"gemini-2.5-flash": "standard",
"deepseek-chat": "standard",
"glm-4.7": "standard",
"qwen3-coder:480b": "standard",
"qwen3-coder-next": "standard",
"kimi-k2.6": "standard",
"kimi-for-coding": "standard",
"MiniMax-M2.7": "standard",
"MiniMax-M2.7-highspeed": "standard",
"codestral-latest": "standard",
"devstral-2512": "standard",
"devstral-medium-2507": "standard",
@ -131,6 +142,10 @@ export const MODEL_CAPABILITY_TIER: Record<string, ComplexityTier> = {
o3: "heavy",
"o4-mini": "heavy",
"o4-mini-deep-research": "heavy",
"gemini-3.1-pro-preview": "heavy",
"gemini-3-pro-preview": "heavy",
"kimi-k2-thinking": "heavy",
"qwen3-next:80b": "heavy",
"glm-5": "heavy",
"glm-5-turbo": "heavy",
"glm-5.1": "heavy",
@ -176,6 +191,12 @@ const MODEL_COST_PER_1K_INPUT: Record<string, number> = {
"o4-mini-deep-research": 0.005,
"gemini-2.0-flash": 0.0001,
"gemini-2.5-pro": 0.00125,
"gemini-3.1-pro-preview": 0.00125,
"gemini-3.1-flash-lite-preview": 0.0001,
"gemini-3-pro-preview": 0.00125,
"gemini-3-flash-preview": 0.0001,
"gemini-2.5-flash": 0.0001,
"gemini-2.5-flash-lite": 0.00005,
"deepseek-chat": 0.00014,
"glm-4.7": 0.0006,
"glm-4.7-flash": 0,
@ -184,12 +205,21 @@ const MODEL_COST_PER_1K_INPUT: Record<string, number> = {
"glm-5-turbo": 0.0012,
"glm-5.1": 0.0014,
"glm-5v-turbo": 0.0012,
"qwen3-coder:480b": 0.0004,
"qwen3-coder-next": 0.0004,
"qwen3-next:80b": 0.0002,
"kimi-k2.6": 0.0006,
"kimi-for-coding": 0.0006,
"kimi-k2-thinking": 0.001,
"MiniMax-M2.7": 0.0006,
"MiniMax-M2.7-highspeed": 0.0006,
"codestral-latest": 0.0003,
"devstral-2512": 0.0004,
"devstral-medium-2507": 0.0004,
"devstral-medium-latest": 0.0004,
"devstral-small-2505": 0.0001,
"devstral-small-2507": 0.0001,
"labs-devstral-small-2512": 0.0001,
"magistral-medium-latest": 0.002,
"magistral-small": 0.0005,
"ministral-3b-latest": 0.00004,
@ -523,6 +553,60 @@ export const MODEL_CAPABILITY_PROFILES: Record<string, ModelCapabilities> = {
longContext: 90,
instruction: 75,
},
"gemini-3.1-pro-preview": {
coding: 82,
debugging: 78,
research: 92,
reasoning: 84,
speed: 48,
longContext: 98,
instruction: 82,
},
"gemini-3-pro-preview": {
coding: 82,
debugging: 78,
research: 90,
reasoning: 84,
speed: 50,
longContext: 96,
instruction: 82,
},
"gemini-3-flash-preview": {
coding: 62,
debugging: 55,
research: 70,
reasoning: 60,
speed: 88,
longContext: 88,
instruction: 72,
},
"gemini-3.1-flash-lite-preview": {
coding: 55,
debugging: 48,
research: 62,
reasoning: 52,
speed: 96,
longContext: 85,
instruction: 68,
},
"gemini-2.5-flash": {
coding: 60,
debugging: 52,
research: 68,
reasoning: 58,
speed: 92,
longContext: 85,
instruction: 70,
},
"gemini-2.5-flash-lite": {
coding: 52,
debugging: 45,
research: 58,
reasoning: 48,
speed: 97,
longContext: 78,
instruction: 65,
},
"gemini-2.0-flash": {
coding: 50,
debugging: 40,
@ -761,6 +845,15 @@ export const MODEL_CAPABILITY_PROFILES: Record<string, ModelCapabilities> = {
longContext: 45,
instruction: 65,
},
"labs-devstral-small-2512": {
coding: 65,
debugging: 58,
research: 45,
reasoning: 55,
speed: 88,
longContext: 60,
instruction: 68,
},
// ── Zhipu AI (GLM) ─────────────────────────────────────────────────────────
"glm-5": {
@ -826,6 +919,129 @@ export const MODEL_CAPABILITY_PROFILES: Record<string, ModelCapabilities> = {
longContext: 45,
instruction: 60,
},
// ── Qwen / Ollama Cloud compatible tags ──────────────────────────────────
"qwen3-coder:480b": {
coding: 84,
debugging: 78,
research: 62,
reasoning: 76,
speed: 58,
longContext: 86,
instruction: 78,
},
"qwen3-coder-next": {
coding: 82,
debugging: 76,
research: 60,
reasoning: 74,
speed: 70,
longContext: 86,
instruction: 76,
},
"qwen3-next:80b": {
coding: 70,
debugging: 68,
research: 76,
reasoning: 80,
speed: 62,
longContext: 86,
instruction: 74,
},
// ── Moonshot / Kimi ───────────────────────────────────────────────────────
"kimi-k2.6": {
coding: 88,
debugging: 84,
research: 72,
reasoning: 82,
speed: 55,
longContext: 86,
instruction: 84,
},
"kimi-for-coding": {
coding: 88,
debugging: 84,
research: 72,
reasoning: 82,
speed: 55,
longContext: 86,
instruction: 84,
},
"kimi-k2-thinking": {
coding: 86,
debugging: 88,
research: 78,
reasoning: 92,
speed: 30,
longContext: 86,
instruction: 84,
},
// ── MiniMax ───────────────────────────────────────────────────────────────
"MiniMax-M2.7": {
coding: 84,
debugging: 80,
research: 78,
reasoning: 84,
speed: 52,
longContext: 84,
instruction: 82,
},
"MiniMax-M2.7-highspeed": {
coding: 82,
debugging: 78,
research: 76,
reasoning: 80,
speed: 72,
longContext: 84,
instruction: 80,
},
};
const MODEL_CAPABILITY_ALIASES: Record<string, string> = {
"deepseek-v3.1": "deepseek-chat",
"deepseek-v3.2": "deepseek-chat",
"deepseek-v4-flash": "deepseek-chat",
"deepseek-v4-pro": "deepseek-chat",
"devstral-latest": "devstral-medium-latest",
"devstral-2:123b": "devstral-2512",
"mistral.devstral-2-123b": "devstral-2512",
"devstral-small-2:24b": "devstral-small-2507",
"mistral.devstral-small-2-24b": "labs-devstral-small-2512",
"mistral.mistral-large-3-675b-instruct": "mistral-large-latest",
"mistral.ministral-3-14b-instruct": "mistral-small-latest",
"mistral.ministral-3-3b-instruct": "ministral-3b-latest",
"mistral.ministral-3-8b-instruct": "ministral-8b-latest",
"gemini-3-flash-preview": "gemini-3-flash-preview",
"glm-4.6": "glm-4.7",
"gpt-oss:120b": "gpt-4o",
"gpt-oss:20b": "gpt-4o-mini",
"kimi-k2:1t": "kimi-k2.6",
"kimi-k2.5": "kimi-k2.6",
"kimi-for-coding": "kimi-k2.6",
"kimi-k2.6:cloud": "kimi-k2.6",
"kimi-k2.6-cloud": "kimi-k2.6",
"minimax-m2": "MiniMax-M2.7",
"minimax-m2.1": "MiniMax-M2.7",
"minimax-m2.5": "MiniMax-M2.7",
"minimax-m2.7": "MiniMax-M2.7",
"mistral-large-3:675b": "mistral-large-latest",
"ministral-3:3b": "ministral-3b-latest",
"ministral-3:8b": "ministral-8b-latest",
"ministral-3:14b": "mistral-small-latest",
"nemotron-3-nano:30b": "gpt-4o-mini",
"nemotron-3-super": "gpt-4o",
"qwen3-coder-480b-a35b-v1:0": "qwen3-coder:480b",
"qwen3-coder-480b-a35b": "qwen3-coder:480b",
"qwen3-coder": "qwen3-coder:480b",
"qwen3-coder:free": "qwen3-coder:480b",
"qwen3-coder-30b-a3b-instruct": "qwen3-coder-next",
"qwen3-coder-flash": "qwen3-coder-next",
"qwen3-next-80b-a3b": "qwen3-next:80b",
"qwen3-next-80b-a3b-instruct": "qwen3-next:80b",
"qwen3-next-80b-a3b-instruct:free": "qwen3-next:80b",
"qwen3-next-80b-a3b-thinking": "qwen3-next:80b",
};
// ─── Base Task Requirements Data Table ───────────────────────────────────────
@ -922,8 +1138,10 @@ export function scoreEligibleModels(
capabilityOverrides?: Record<string, Partial<ModelCapabilities>>,
): Array<{ modelId: string; score: number }> {
const scored = eligibleModelIds.map((modelId) => {
const builtin = MODEL_CAPABILITY_PROFILES[modelId];
const override = capabilityOverrides?.[modelId];
const canonicalModelId = canonicalCapabilityModelId(modelId);
const builtin = MODEL_CAPABILITY_PROFILES[canonicalModelId];
const override =
capabilityOverrides?.[modelId] ?? capabilityOverrides?.[canonicalModelId];
const profile: ModelCapabilities = builtin
? override
? { ...builtin, ...override }
@ -950,6 +1168,29 @@ export function scoreEligibleModels(
return scored;
}
function canonicalCapabilityModelId(modelId: string): string {
const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId;
const normalizedId = bareId.replace(/:cloud$/i, "").replace(/-cloud$/i, "");
const aliased = resolveCapabilityAlias(bareId) ?? resolveCapabilityAlias(normalizedId);
if (aliased) return aliased;
if (MODEL_CAPABILITY_PROFILES[normalizedId]) return normalizedId;
for (const knownId of Object.keys(MODEL_CAPABILITY_PROFILES)) {
if (normalizedId.includes(knownId) || knownId.includes(normalizedId)) {
return knownId;
}
}
return normalizedId;
}
function resolveCapabilityAlias(modelId: string): string | undefined {
const direct = MODEL_CAPABILITY_ALIASES[modelId];
if (direct) return direct;
const lower = modelId.toLowerCase();
return Object.entries(MODEL_CAPABILITY_ALIASES).find(
([alias]) => alias.toLowerCase() === lower,
)?.[1];
}
/**
* Return all models eligible for a given tier, sorted cheapest first.
* If routingConfig.tier_models[tier] is set and available, returns only that
@ -1193,18 +1434,17 @@ export function defaultRoutingConfig(): DynamicRoutingConfig {
// ─── Internal ────────────────────────────────────────────────────────────────
export function getModelTier(modelId: string): ComplexityTier {
// Strip provider prefix if present
const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId;
const canonicalId = canonicalCapabilityModelId(modelId);
// Check exact match first
if (MODEL_CAPABILITY_TIER[bareId]) return MODEL_CAPABILITY_TIER[bareId];
if (MODEL_CAPABILITY_TIER[canonicalId]) return MODEL_CAPABILITY_TIER[canonicalId];
const sizeTier = inferTierFromModelSize(bareId);
const sizeTier = inferTierFromModelSize(canonicalId);
if (sizeTier) return sizeTier;
// Check if any known model ID is a prefix/suffix match
for (const [knownId, tier] of Object.entries(MODEL_CAPABILITY_TIER)) {
if (bareId.includes(knownId) || knownId.includes(bareId)) return tier;
if (canonicalId.includes(knownId) || knownId.includes(canonicalId)) return tier;
}
// Unknown models are assumed standard (per D-15: avoids silently ignoring user config)
@ -1223,24 +1463,26 @@ function inferTierFromModelSize(modelId: string): ComplexityTier | null {
/** Check if a model ID has a known capability tier mapping. (#2192) */
function isKnownModel(modelId: string): boolean {
const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId;
if (MODEL_CAPABILITY_TIER[bareId]) return true;
const canonicalId = canonicalCapabilityModelId(modelId);
if (MODEL_CAPABILITY_TIER[canonicalId]) return true;
for (const knownId of Object.keys(MODEL_CAPABILITY_TIER)) {
if (bareId.includes(knownId) || knownId.includes(bareId)) return true;
if (canonicalId.includes(knownId) || knownId.includes(canonicalId)) return true;
}
return false;
}
function getModelCost(modelId: string): number {
const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId;
const canonicalId = canonicalCapabilityModelId(modelId);
if (MODEL_COST_PER_1K_INPUT[bareId] !== undefined) {
return MODEL_COST_PER_1K_INPUT[bareId];
if (MODEL_COST_PER_1K_INPUT[canonicalId] !== undefined) {
return MODEL_COST_PER_1K_INPUT[canonicalId];
}
// Check partial matches
for (const [knownId, cost] of Object.entries(MODEL_COST_PER_1K_INPUT)) {
if (bareId.includes(knownId) || knownId.includes(bareId)) return cost;
if (canonicalId.includes(knownId) || knownId.includes(canonicalId)) {
return cost;
}
}
// Unknown cost — assume expensive to avoid routing to unknown cheap models

View file

@ -1,5 +1,5 @@
// SF Extension — Persistent Notification Store
// Captures all ctx.ui.notify() calls and workflow-logger warnings to
// Captures durable ctx.ui.notify() calls and workflow-logger errors to
// .sf/notifications.jsonl so they survive context resets and session restarts.
// Rotates at MAX_ENTRIES to prevent unbounded growth.
@ -99,6 +99,7 @@ export function appendNotification(
): void {
if (!_basePath) return;
if (_suppressCount > 0) return;
if (!shouldPersistNotification(severity, metadata)) return;
const persistedMessage =
message.length > 500 ? message.slice(0, 500) + "…" : message;
// Use explicit dedupe_key when provided; fall back to message-hash based key.
@ -141,6 +142,14 @@ export function appendNotification(
}
}
function shouldPersistNotification(
_severity: NotifySeverity,
metadata?: NotificationMetadata,
): boolean {
if (metadata?.kind === "progress") return false;
return true;
}
/**
* Read all notification entries from disk. Returns newest-first.
*/
@ -350,7 +359,10 @@ function _withLock<T>(basePath: string, fn: () => T): T {
const stat = readFileSync(lockPath, "utf-8");
const lockTime = parseInt(stat, 10);
// Treat NaN (creator crashed before writing timestamp) as stale.
if (isNaN(lockTime) || (Number.isFinite(lockTime) && Date.now() - lockTime > 5000)) {
if (
Number.isNaN(lockTime) ||
(Number.isFinite(lockTime) && Date.now() - lockTime > 5000)
) {
try {
unlinkSync(lockPath);
} catch {

View file

@ -271,6 +271,69 @@ test("scoreModel returns 50 for empty requirements", () => {
assert.equal(score, 50);
});
test("scoreEligibleModels treats kimi-for-coding as the Kimi K2.6 capability profile", () => {
const requirements = { coding: 1.0 };
const scored = scoreEligibleModels(
["kimi-coding/kimi-for-coding", "unknown-future-model"],
requirements,
);
assert.equal(scored[0]?.modelId, "kimi-coding/kimi-for-coding");
assert.equal(scored[0]?.score, MODEL_CAPABILITY_PROFILES["kimi-k2.6"].coding);
});
test("scoreEligibleModels uses bare model IDs for provider-prefixed GLM routes", () => {
const requirements = { reasoning: 1.0 };
const scored = scoreEligibleModels(
["zai/glm-5.1", "zai/glm-4.7"],
requirements,
);
assert.equal(scored[0]?.modelId, "zai/glm-5.1");
assert.equal(scored[0]?.score, MODEL_CAPABILITY_PROFILES["glm-5.1"].reasoning);
});
test("scoreEligibleModels keeps Kimi thinking distinct from plain K2.6", () => {
const reasoningScores = scoreEligibleModels(
["kimi-coding/kimi-k2-thinking", "kimi-coding/kimi-k2.6"],
{ reasoning: 1.0 },
);
assert.equal(reasoningScores[0]?.modelId, "kimi-coding/kimi-k2-thinking");
const speedScores = scoreEligibleModels(
["kimi-coding/kimi-k2-thinking", "kimi-coding/kimi-k2.6"],
{ speed: 1.0 },
);
assert.equal(speedScores[0]?.modelId, "kimi-coding/kimi-k2.6");
});
test("scoreEligibleModels normalizes Ollama Cloud suffix aliases", () => {
const scored = scoreEligibleModels(
["ollama-cloud/kimi-k2.6:cloud", "unknown-future-model"],
{ coding: 1.0 },
);
assert.equal(scored[0]?.modelId, "ollama-cloud/kimi-k2.6:cloud");
assert.equal(scored[0]?.score, MODEL_CAPABILITY_PROFILES["kimi-k2.6"].coding);
});
test("scoreEligibleModels normalizes Ollama Cloud family aliases", () => {
const scored = scoreEligibleModels(
[
"ollama-cloud/minimax-m2.7",
"ollama-cloud/devstral-2:123b",
"ollama-cloud/qwen3-coder:480b",
],
{ coding: 1.0 },
);
assert.ok(scored.every((entry) => entry.score > 50));
assert.deepEqual(
scored.map((entry) => getModelTier(entry.modelId)),
["standard", "standard", "standard"],
);
});
test("computeTaskRequirements returns base vector for known unit type", () => {
const reqs = computeTaskRequirements("execute-task");
assert.ok(reqs.coding !== undefined && reqs.coding > 0);

View file

@ -12,7 +12,7 @@ import assert from "node:assert/strict";
import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, beforeEach, describe, test } from 'vitest';
import { afterEach, beforeEach, describe, test } from "vitest";
import {
isBlockedNotification,
@ -126,10 +126,10 @@ describe("isMilestoneReadyNotification — metadata-first", () => {
describe("isPauseNotification — metadata-first", () => {
test("returns true when metadata.kind=terminal and blocking=true", () => {
const event = notifyEvent(
"Autonomous mode paused. Type to interact.",
{ kind: "terminal", blocking: true },
);
const event = notifyEvent("Autonomous mode paused. Type to interact.", {
kind: "terminal",
blocking: true,
});
assert.equal(isPauseNotification(event), true);
});
});
@ -157,7 +157,11 @@ describe("notification-store — dedupe_key", () => {
dedupe_key: "sync:progress",
});
const entries = readNotifications(tmpDir);
assert.equal(entries.length, 1, "second entry with same dedupe_key should be dropped");
assert.equal(
entries.length,
1,
"second entry with same dedupe_key should be dropped",
);
});
test("does not deduplicate across different dedupe_keys", () => {
@ -168,7 +172,11 @@ describe("notification-store — dedupe_key", () => {
dedupe_key: "sync:B",
});
const entries = readNotifications(tmpDir);
assert.equal(entries.length, 2, "different dedupe_keys should produce separate entries");
assert.equal(
entries.length,
2,
"different dedupe_keys should produce separate entries",
);
});
test("stores metadata on the entry", () => {
@ -184,15 +192,13 @@ describe("notification-store — dedupe_key", () => {
assert.equal(entries[0].metadata?.source, "workflow");
});
test("automated progress notice does not affect blocking classification", () => {
test("automated progress notice is not persisted or treated as blocking", () => {
appendNotification("Running checks...", "info", "notify", {
kind: "progress",
source: "workflow",
});
const entries = readNotifications(tmpDir);
assert.equal(entries.length, 1);
// The notice is stored, but kind=progress means headless will not treat it as blocked.
assert.equal(entries[0].metadata?.kind, "progress");
assert.equal(entries.length, 0);
// Confirm headless classification: this event should NOT be blocked
const fakeEvent = notifyEvent("Running checks...", {
kind: "progress",

View file

@ -11,7 +11,7 @@ import {
} from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, beforeEach, describe, test, vi } from 'vitest';
import { afterEach, beforeEach, describe, test, vi } from "vitest";
import {
_resetNotificationStore,

View file

@ -532,7 +532,7 @@ test("workflow MCP ask_user_questions uses stdio elicitation round-trip", async
},
},
undefined,
{ timeout: 60_000 },
{ timeout: 120_000 },
);
assert.ok(

View file

@ -299,17 +299,18 @@ function _push(
const ctxStr = context ? ` ${JSON.stringify(context)}` : "";
_writeStderr(`[sf:${component}] ${prefix}: ${message}${ctxStr}\n`);
// Persist to notification store (both warnings and errors)
try {
appendNotification(
`[${component}] ${message}`,
severity === "error" ? "error" : "warning",
"workflow-logger",
);
} catch (notifErr) {
_writeStderr(
`[sf:workflow-logger] notification-store append failed: ${(notifErr as Error).message}\n`,
);
if (severity === "error") {
try {
appendNotification(
`[${component}] ${message}`,
"error",
"workflow-logger",
);
} catch (notifErr) {
_writeStderr(
`[sf:workflow-logger] notification-store append failed: ${(notifErr as Error).message}\n`,
);
}
}
// Buffer for auto-loop to drain

View file

@ -19,7 +19,7 @@ import { execFileSync, spawn } from "node:child_process";
import { existsSync, mkdirSync, mkdtempSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { test, afterEach } from 'vitest';
import { afterEach, test } from "vitest";
const projectRoot = process.cwd();
const loaderPath = join(projectRoot, "dist", "loader.js");
@ -43,13 +43,13 @@ type RunResult = {
* Spawn `node dist/loader.js ...args` and collect output.
*
* @param args CLI arguments to pass after the script path
* @param timeoutMs Maximum time to wait before SIGTERM (default 8 s)
* @param timeoutMs Maximum time to wait before SIGTERM (default 15 s)
* @param env Additional / override environment variables
* @param cwd Working directory for the child process (default: projectRoot)
*/
function runSf(
args: string[],
timeoutMs = 8_000,
timeoutMs = 15_000,
env: NodeJS.ProcessEnv = {},
cwd: string = projectRoot,
): Promise<RunResult> {
@ -88,7 +88,6 @@ function runSf(
/** Strip ANSI escape codes from a string. */
function stripAnsi(s: string): string {
// biome-ignore lint/suspicious/noControlCharactersInRegex: ANSI escape sequence
return s.replace(/\x1b\[[0-9;]*[A-Za-z]/g, "");
}
@ -426,7 +425,7 @@ test("sf -h is equivalent to --help", async () => {
// 13. sf headless without .sf/ directory exits 1 with clean error
// ---------------------------------------------------------------------------
test("sf headless without .sf/ directory exits 1 with clean error", async (t) => {
test("sf headless without .sf/ directory exits 1 with clean error", async () => {
const tmpDir = mkdtempSync(join(tmpdir(), "sf-e2e-no-sf-"));
afterEach(() => {
@ -451,19 +450,14 @@ test("sf headless without .sf/ directory exits 1 with clean error", async (t) =>
// 14. sf headless new-milestone without --context exits 1
// ---------------------------------------------------------------------------
test("sf headless new-milestone without --context exits 1", async (t) => {
test("sf headless new-milestone without --context exits 1", async () => {
const tmpDir = mkdtempSync(join(tmpdir(), "sf-e2e-no-ctx-"));
afterEach(() => {
rmSync(tmpDir, { recursive: true, force: true });
});
const result = await runSf(
["headless", "new-milestone"],
10_000,
{},
tmpDir,
);
const result = await runSf(["headless", "new-milestone"], 10_000, {}, tmpDir);
assert.ok(!result.timedOut, "process should not hang");
assert.strictEqual(result.code, 1, `expected exit 1, got ${result.code}`);
@ -481,7 +475,7 @@ test("sf headless new-milestone without --context exits 1", async (t) => {
// 15. sf headless --timeout with invalid value exits 1
// ---------------------------------------------------------------------------
test("sf headless --timeout with invalid value exits 1", async (t) => {
test("sf headless --timeout with invalid value exits 1", async () => {
const tmpDir = mkdtempSync(join(tmpdir(), "sf-e2e-bad-timeout-"));
afterEach(() => {
@ -511,7 +505,7 @@ test("sf headless --timeout with invalid value exits 1", async (t) => {
// 16. sf headless --timeout with negative value exits 1
// ---------------------------------------------------------------------------
test("sf headless --timeout with negative value exits 1", async (t) => {
test("sf headless --timeout with negative value exits 1", async () => {
const tmpDir = mkdtempSync(join(tmpdir(), "sf-e2e-neg-timeout-"));
afterEach(() => {
@ -537,7 +531,7 @@ test("sf headless --timeout with negative value exits 1", async (t) => {
assertNoCrashMarkers(combined);
});
test("sf headless query returns JSON from the built CLI", async (t) => {
test("sf headless query returns JSON from the built CLI", async () => {
const tmpDir = createTempGitRepo("sf-e2e-query-");
afterEach(() => {
@ -565,7 +559,7 @@ test("sf headless query returns JSON from the built CLI", async (t) => {
);
});
test("sf worktree list loads the built worktree CLI without module errors", async (t) => {
test("sf worktree list loads the built worktree CLI without module errors", async () => {
const tmpDir = createTempGitRepo("sf-e2e-worktree-");
afterEach(() => {

View file

@ -21,11 +21,14 @@ import {
writeFileSync,
} from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { test, afterEach } from 'vitest';
import { delimiter, dirname, join } from "node:path";
import { createGunzip } from "node:zlib";
import { afterEach, test } from "vitest";
const projectRoot = process.cwd();
const packageName = JSON.parse(
readFileSync(join(projectRoot, "package.json"), "utf-8"),
).name as string;
if (!existsSync(join(projectRoot, "dist"))) {
throw new Error("dist/ not found — run: npm run build");
@ -50,6 +53,9 @@ function createNpmSandbox(prefix: string): NpmSandbox {
installPrefix,
env: {
...process.env,
PATH: [dirname(process.execPath), process.env.PATH]
.filter(Boolean)
.join(delimiter),
NPM_CONFIG_CACHE: cacheDir,
npm_config_cache: cacheDir,
PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: "1",
@ -62,6 +68,8 @@ function buildQuietNpmEnv(sandbox: NpmSandbox): NodeJS.ProcessEnv {
...sandbox.env,
NPM_CONFIG_LOGLEVEL: "error",
npm_config_loglevel: "error",
NPM_CONFIG_ENGINE_STRICT: "false",
npm_config_engine_strict: "false",
NPM_CONFIG_FUND: "false",
npm_config_fund: "false",
NPM_CONFIG_AUDIT: "false",
@ -141,7 +149,7 @@ function listTarEntries(tarballPath: string): Promise<string[]> {
// 1. npm pack produces valid tarball with correct file layout
// ═══════════════════════════════════════════════════════════════════════════
test("npm pack produces tarball with required files", async (t) => {
test("npm pack produces tarball with required files", async () => {
const sandbox = createNpmSandbox("sf-pack-test-");
const tarballPath = packTarball(sandbox);
@ -204,13 +212,13 @@ test("npm pack produces tarball with required files", async (t) => {
".sf",
"pkg/package.json piConfig.configDir is .sf",
);
});
}, 240_000);
// ═══════════════════════════════════════════════════════════════════════════
// 2. npm pack → install → sf binary resolves
// ═══════════════════════════════════════════════════════════════════════════
test("tarball installs and sf binary resolves", async (t) => {
test("tarball installs and sf binary resolves", async () => {
const sandbox = createNpmSandbox("sf-install-test-");
const tarballPath = packTarball(sandbox);
@ -242,7 +250,7 @@ test("tarball installs and sf binary resolves", async (t) => {
const installedLoader = join(
sandbox.installPrefix,
"node_modules",
"sf-run",
packageName,
"dist",
"loader.js",
);
@ -258,7 +266,7 @@ test("tarball installs and sf binary resolves", async (t) => {
const installedSfExt = join(
sandbox.installPrefix,
"node_modules",
"sf-run",
packageName,
"src",
"resources",
"extensions",
@ -269,7 +277,7 @@ test("tarball installs and sf binary resolves", async (t) => {
existsSync(installedSfExt),
"bundled sf extension present in installed package",
);
});
}, 420_000);
// ═══════════════════════════════════════════════════════════════════════════
// 3. Launch → extensions load → no errors on stderr
@ -329,7 +337,7 @@ test("sf launches and loads extensions without errors", async () => {
);
});
test("sf exits early with a clear message when synced resources are newer than the binary", async (t) => {
test("sf exits early with a clear message when synced resources are newer than the binary", async () => {
const fakeHome = mkdtempSync(join(tmpdir(), "sf-version-skew-"));
const fakeAgentDir = join(fakeHome, ".sf", "agent");
mkdirSync(fakeAgentDir, { recursive: true });