fix(sf): stabilize auto notices and package checks
This commit is contained in:
parent
ed2c4af729
commit
85a0188fe1
20 changed files with 697 additions and 178 deletions
|
|
@ -2,6 +2,8 @@
|
|||
actionable: true
|
||||
kind: design-research
|
||||
date: 2026-05-02
|
||||
promoted: true
|
||||
promoted_to: M012
|
||||
---
|
||||
|
||||
# PDD v2 — Research Findings
|
||||
|
|
|
|||
48
package-lock.json
generated
48
package-lock.json
generated
|
|
@ -14,13 +14,13 @@
|
|||
"studio"
|
||||
],
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.73.0",
|
||||
"@anthropic-ai/sdk": "^0.92.0",
|
||||
"@anthropic-ai/vertex-sdk": "^0.14.4",
|
||||
"@aws-sdk/client-bedrock-runtime": "^3.983.0",
|
||||
"@clack/prompts": "^1.1.0",
|
||||
"@google/genai": "^1.40.0",
|
||||
"@mariozechner/jiti": "^2.6.2",
|
||||
"@mistralai/mistralai": "^1.14.1",
|
||||
"@mistralai/mistralai": "^2.2.1",
|
||||
"@modelcontextprotocol/sdk": "^1.27.1",
|
||||
"@octokit/rest": "^22.0.1",
|
||||
"@silvia-odwyer/photon-node": "^0.3.4",
|
||||
|
|
@ -72,6 +72,7 @@
|
|||
"esbuild": "^0.27.4",
|
||||
"jiti": "^2.6.1",
|
||||
"typescript": "^5.4.0",
|
||||
"typescript-language-server": "^5.1.3",
|
||||
"vitest": "^4.1.5"
|
||||
},
|
||||
"engines": {
|
||||
|
|
@ -155,9 +156,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/sdk": {
|
||||
"version": "0.73.0",
|
||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.73.0.tgz",
|
||||
"integrity": "sha512-URURVzhxXGJDGUGFunIOtBlSl7KWvZiAAKY/ttTkZAkXT9bTPqdk2eK0b8qqSxXpikh3QKPnPYpiyX98zf5ebw==",
|
||||
"version": "0.92.0",
|
||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.92.0.tgz",
|
||||
"integrity": "sha512-l653JFC83wCglH8H83t1xpgDurCyPyslYW1maPRdCsfuNuGbLvQjQ81sWd3Go3LWRm0jNspzAhuqAYV8r9joSw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"json-schema-to-ts": "^3.1.1"
|
||||
|
|
@ -3915,13 +3916,14 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@mistralai/mistralai": {
|
||||
"version": "1.14.1",
|
||||
"resolved": "https://registry.npmjs.org/@mistralai/mistralai/-/mistralai-1.14.1.tgz",
|
||||
"integrity": "sha512-IiLmmZFCCTReQgPAT33r7KQ1nYo5JPdvGkrkZqA8qQ2qB1GHgs5LoP5K2ICyrjnpw2n8oSxMM/VP+liiKcGNlQ==",
|
||||
"version": "2.2.1",
|
||||
"resolved": "https://registry.npmjs.org/@mistralai/mistralai/-/mistralai-2.2.1.tgz",
|
||||
"integrity": "sha512-uKU8CZmL2RzYKmplsU01hii4p3pe4HqJefpWNRWXm1Tcm0Sm4xXfwSLIy4k7ZCPlbETCGcp69E7hZs+WOJ5itQ==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"ws": "^8.18.0",
|
||||
"zod": "^3.25.0 || ^4.0.0",
|
||||
"zod-to-json-schema": "^3.24.1"
|
||||
"zod-to-json-schema": "^3.25.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@modelcontextprotocol/sdk": {
|
||||
|
|
@ -15498,6 +15500,19 @@
|
|||
"node": ">=14.17"
|
||||
}
|
||||
},
|
||||
"node_modules/typescript-language-server": {
|
||||
"version": "5.1.3",
|
||||
"resolved": "https://registry.npmjs.org/typescript-language-server/-/typescript-language-server-5.1.3.tgz",
|
||||
"integrity": "sha512-r+pAcYtWdN8tKlYZPwiiHNA2QPjXnI02NrW5Sf2cVM3TRtuQ3V9EKKwOxqwaQ0krsaEXk/CbN90I5erBuf84Vg==",
|
||||
"dev": true,
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"typescript-language-server": "lib/cli.mjs"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20"
|
||||
}
|
||||
},
|
||||
"node_modules/uint8array-extras": {
|
||||
"version": "1.5.0",
|
||||
"resolved": "https://registry.npmjs.org/uint8array-extras/-/uint8array-extras-1.5.0.tgz",
|
||||
|
|
@ -16305,7 +16320,7 @@
|
|||
"version": "2.75.0",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.52.0",
|
||||
"@anthropic-ai/sdk": "^0.92.0",
|
||||
"@singularity-forge/rpc-client": "^2.75.0",
|
||||
"discord.js": "^14.25.1",
|
||||
"yaml": "^2.8.0",
|
||||
|
|
@ -16322,15 +16337,6 @@
|
|||
"node": ">=24.15.0"
|
||||
}
|
||||
},
|
||||
"packages/daemon/node_modules/@anthropic-ai/sdk": {
|
||||
"version": "0.52.0",
|
||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.52.0.tgz",
|
||||
"integrity": "sha512-d4c+fg+xy9e46c8+YnrrgIQR45CZlAi7PwdzIfDXDM6ACxEZli1/fxhURsq30ZpMZy6LvSkr41jGq5aF5TD7rQ==",
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"anthropic-ai-sdk": "bin/cli"
|
||||
}
|
||||
},
|
||||
"packages/daemon/node_modules/zod": {
|
||||
"version": "3.25.76",
|
||||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
|
||||
|
|
@ -16387,12 +16393,12 @@
|
|||
"name": "@singularity-forge/pi-ai",
|
||||
"version": "2.75.0",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.73.0",
|
||||
"@anthropic-ai/sdk": "^0.92.0",
|
||||
"@anthropic-ai/vertex-sdk": "^0.14.4",
|
||||
"@aws-sdk/client-bedrock-runtime": "^3.983.0",
|
||||
"@google/gemini-cli-core": "0.38.2",
|
||||
"@google/genai": "^1.40.0",
|
||||
"@mistralai/mistralai": "^1.14.1",
|
||||
"@mistralai/mistralai": "^2.2.1",
|
||||
"@sinclair/typebox": "^0.34.41",
|
||||
"ajv": "^8.17.1",
|
||||
"ajv-formats": "^3.0.1",
|
||||
|
|
|
|||
|
|
@ -102,13 +102,13 @@
|
|||
"test:live-regression": "node --experimental-strip-types tests/live-regression/run.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.73.0",
|
||||
"@anthropic-ai/sdk": "^0.92.0",
|
||||
"@anthropic-ai/vertex-sdk": "^0.14.4",
|
||||
"@aws-sdk/client-bedrock-runtime": "^3.983.0",
|
||||
"@clack/prompts": "^1.1.0",
|
||||
"@google/genai": "^1.40.0",
|
||||
"@mariozechner/jiti": "^2.6.2",
|
||||
"@mistralai/mistralai": "^1.14.1",
|
||||
"@mistralai/mistralai": "^2.2.1",
|
||||
"@modelcontextprotocol/sdk": "^1.27.1",
|
||||
"@octokit/rest": "^22.0.1",
|
||||
"@silvia-odwyer/photon-node": "^0.3.4",
|
||||
|
|
@ -156,6 +156,7 @@
|
|||
"esbuild": "^0.27.4",
|
||||
"jiti": "^2.6.1",
|
||||
"typescript": "^5.4.0",
|
||||
"typescript-language-server": "^5.1.3",
|
||||
"vitest": "^4.1.5"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@
|
|||
"test": "node --test dist/daemon.test.js"
|
||||
},
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.52.0",
|
||||
"@anthropic-ai/sdk": "^0.92.0",
|
||||
"@singularity-forge/rpc-client": "^2.75.0",
|
||||
"discord.js": "^14.25.1",
|
||||
"yaml": "^2.8.0",
|
||||
|
|
|
|||
|
|
@ -23,12 +23,12 @@
|
|||
"build": "tsc -p tsconfig.json"
|
||||
},
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.73.0",
|
||||
"@anthropic-ai/sdk": "^0.92.0",
|
||||
"@anthropic-ai/vertex-sdk": "^0.14.4",
|
||||
"@aws-sdk/client-bedrock-runtime": "^3.983.0",
|
||||
"@google/gemini-cli-core": "0.38.2",
|
||||
"@google/genai": "^1.40.0",
|
||||
"@mistralai/mistralai": "^1.14.1",
|
||||
"@mistralai/mistralai": "^2.2.1",
|
||||
"@sinclair/typebox": "^0.34.41",
|
||||
"ajv": "^8.17.1",
|
||||
"ajv-formats": "^3.0.1",
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import type { Mistral } from "@mistralai/mistralai";
|
|||
import type { RequestOptions } from "@mistralai/mistralai/lib/sdks.js";
|
||||
import type {
|
||||
ChatCompletionStreamRequest,
|
||||
ChatCompletionStreamRequestMessages,
|
||||
ChatCompletionStreamRequestMessage,
|
||||
CompletionEvent,
|
||||
ContentChunk,
|
||||
FunctionTool,
|
||||
|
|
@ -464,8 +464,8 @@ function toFunctionTools(tools: Tool[]): Array<FunctionTool & { type: "function"
|
|||
}));
|
||||
}
|
||||
|
||||
function toChatMessages(messages: Message[], supportsImages: boolean): ChatCompletionStreamRequestMessages[] {
|
||||
const result: ChatCompletionStreamRequestMessages[] = [];
|
||||
function toChatMessages(messages: Message[], supportsImages: boolean): ChatCompletionStreamRequestMessage[] {
|
||||
const result: ChatCompletionStreamRequestMessage[] = [];
|
||||
|
||||
for (const msg of messages) {
|
||||
if (msg.role === "user") {
|
||||
|
|
@ -520,7 +520,7 @@ function toChatMessages(messages: Message[], supportsImages: boolean): ChatCompl
|
|||
});
|
||||
}
|
||||
|
||||
const assistantMessage: ChatCompletionStreamRequestMessages = { role: "assistant" };
|
||||
const assistantMessage: ChatCompletionStreamRequestMessage = { role: "assistant" };
|
||||
if (contentParts.length > 0) assistantMessage.content = contentParts;
|
||||
if (toolCalls.length > 0) assistantMessage.toolCalls = toolCalls;
|
||||
if (contentParts.length > 0 || toolCalls.length > 0) result.push(assistantMessage);
|
||||
|
|
|
|||
|
|
@ -8,12 +8,24 @@
|
|||
* Run: node --experimental-strip-types --test src/core/lsp/lsp-integration.test.ts
|
||||
* (from packages/pi-coding-agent/)
|
||||
*/
|
||||
import { describe, test, beforeAll, afterAll } from 'vitest';
|
||||
|
||||
import assert from "node:assert/strict";
|
||||
import { spawn } from "node:child_process";
|
||||
import { execSync, spawn } from "node:child_process";
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import * as os from "node:os";
|
||||
import * as path from "node:path";
|
||||
import { afterAll, beforeAll, describe, test } from "vitest";
|
||||
|
||||
function hasTypeScriptLanguageServer(): boolean {
|
||||
try {
|
||||
execSync("npx which typescript-language-server", { stdio: "ignore" });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const describeOrSkip = hasTypeScriptLanguageServer() ? describe : describe.skip;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers — lightweight JSON-RPC over stdio (no dependency on our LSP code)
|
||||
|
|
@ -39,7 +51,9 @@ interface JsonRpcResponse {
|
|||
error?: { code: number; message: string };
|
||||
}
|
||||
|
||||
function encodeMessage(msg: JsonRpcRequest | JsonRpcNotification | JsonRpcResponse): string {
|
||||
function encodeMessage(
|
||||
msg: JsonRpcRequest | JsonRpcNotification | JsonRpcResponse,
|
||||
): string {
|
||||
const body = JSON.stringify(msg);
|
||||
return `Content-Length: ${Buffer.byteLength(body, "utf-8")}\r\n\r\n${body}`;
|
||||
}
|
||||
|
|
@ -51,7 +65,10 @@ class LspHarness {
|
|||
private proc;
|
||||
private nextId = 1;
|
||||
private buffer = Buffer.alloc(0);
|
||||
private pending = new Map<number, { resolve: (v: unknown) => void; reject: (e: Error) => void }>();
|
||||
private pending = new Map<
|
||||
number,
|
||||
{ resolve: (v: unknown) => void; reject: (e: Error) => void }
|
||||
>();
|
||||
private notifications: Array<{ method: string; params: unknown }> = [];
|
||||
|
||||
constructor(command: string, args: string[], cwd: string) {
|
||||
|
|
@ -65,7 +82,7 @@ class LspHarness {
|
|||
this.drain();
|
||||
});
|
||||
|
||||
this.proc.stderr!.on("data", (chunk: Buffer) => {
|
||||
this.proc.stderr!.on("data", (_chunk: Buffer) => {
|
||||
// Swallow stderr (server logs)
|
||||
});
|
||||
}
|
||||
|
|
@ -84,16 +101,23 @@ class LspHarness {
|
|||
const messageEnd = messageStart + contentLength;
|
||||
if (this.buffer.length < messageEnd) return;
|
||||
|
||||
const body = this.buffer.subarray(messageStart, messageEnd).toString("utf-8");
|
||||
const body = this.buffer
|
||||
.subarray(messageStart, messageEnd)
|
||||
.toString("utf-8");
|
||||
this.buffer = Buffer.from(this.buffer.subarray(messageEnd));
|
||||
|
||||
const msg = JSON.parse(body) as JsonRpcResponse & { method?: string; params?: unknown };
|
||||
const msg = JSON.parse(body) as JsonRpcResponse & {
|
||||
method?: string;
|
||||
params?: unknown;
|
||||
};
|
||||
|
||||
if (msg.id !== undefined && this.pending.has(msg.id)) {
|
||||
const p = this.pending.get(msg.id)!;
|
||||
this.pending.delete(msg.id);
|
||||
if (msg.error) {
|
||||
p.reject(new Error(`LSP error ${msg.error.code}: ${msg.error.message}`));
|
||||
p.reject(
|
||||
new Error(`LSP error ${msg.error.code}: ${msg.error.message}`),
|
||||
);
|
||||
} else {
|
||||
p.resolve(msg.result);
|
||||
}
|
||||
|
|
@ -127,7 +151,11 @@ class LspHarness {
|
|||
this.proc.stdin!.write(encodeMessage(msg));
|
||||
}
|
||||
|
||||
async request(method: string, params: unknown, timeoutMs = 15000): Promise<unknown> {
|
||||
async request(
|
||||
method: string,
|
||||
params: unknown,
|
||||
timeoutMs = 15000,
|
||||
): Promise<unknown> {
|
||||
const id = this.nextId++;
|
||||
const msg: JsonRpcRequest = { jsonrpc: "2.0", id, method, params };
|
||||
this.proc.stdin!.write(encodeMessage(msg));
|
||||
|
|
@ -156,11 +184,27 @@ class LspHarness {
|
|||
this.proc.stdin!.write(encodeMessage(msg));
|
||||
}
|
||||
|
||||
getNotifications(method?: string): Array<{ method: string; params: unknown }> {
|
||||
getNotifications(
|
||||
method?: string,
|
||||
): Array<{ method: string; params: unknown }> {
|
||||
if (!method) return this.notifications;
|
||||
return this.notifications.filter((n) => n.method === method);
|
||||
}
|
||||
|
||||
async waitForNotification(
|
||||
method: string,
|
||||
predicate: (notification: { method: string; params: unknown }) => boolean,
|
||||
timeoutMs = 10_000,
|
||||
): Promise<{ method: string; params: unknown } | undefined> {
|
||||
const startedAt = Date.now();
|
||||
while (Date.now() - startedAt < timeoutMs) {
|
||||
const found = this.getNotifications(method).find(predicate);
|
||||
if (found) return found;
|
||||
await new Promise((resolve) => setTimeout(resolve, 100));
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
async shutdown(): Promise<void> {
|
||||
try {
|
||||
await this.request("shutdown", null, 5000);
|
||||
|
|
@ -255,7 +299,7 @@ function fileToUri(filePath: string): string {
|
|||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("LSP integration: typescript-language-server", () => {
|
||||
describeOrSkip("LSP integration: typescript-language-server", () => {
|
||||
let dir: string;
|
||||
let cleanup: () => void;
|
||||
let mainPath: string;
|
||||
|
|
@ -293,8 +337,14 @@ describe("LSP integration: typescript-language-server", () => {
|
|||
|
||||
assert.ok(result, "initialize should return a result");
|
||||
assert.ok(result.capabilities, "result should have capabilities");
|
||||
assert.ok(result.capabilities.hoverProvider !== undefined, "should support hover");
|
||||
assert.ok(result.capabilities.definitionProvider !== undefined, "should support definition");
|
||||
assert.ok(
|
||||
result.capabilities.hoverProvider !== undefined,
|
||||
"should support hover",
|
||||
);
|
||||
assert.ok(
|
||||
result.capabilities.definitionProvider !== undefined,
|
||||
"should support definition",
|
||||
);
|
||||
|
||||
lsp.notify("initialized", {});
|
||||
|
||||
|
|
@ -303,10 +353,20 @@ describe("LSP integration: typescript-language-server", () => {
|
|||
const mathContent = fs.readFileSync(mathPath, "utf-8");
|
||||
|
||||
lsp.notify("textDocument/didOpen", {
|
||||
textDocument: { uri: mainUri, languageId: "typescript", version: 1, text: mainContent },
|
||||
textDocument: {
|
||||
uri: mainUri,
|
||||
languageId: "typescript",
|
||||
version: 1,
|
||||
text: mainContent,
|
||||
},
|
||||
});
|
||||
lsp.notify("textDocument/didOpen", {
|
||||
textDocument: { uri: mathUri, languageId: "typescript", version: 1, text: mathContent },
|
||||
textDocument: {
|
||||
uri: mathUri,
|
||||
languageId: "typescript",
|
||||
version: 1,
|
||||
text: mathContent,
|
||||
},
|
||||
});
|
||||
|
||||
// Give the server time to index
|
||||
|
|
@ -352,7 +412,10 @@ describe("LSP integration: typescript-language-server", () => {
|
|||
// Response can be Location (uri) or LocationLink (targetUri)
|
||||
const loc = locations[0] as Record<string, unknown>;
|
||||
const uri = (loc.uri ?? loc.targetUri) as string;
|
||||
assert.ok(uri, `definition should have uri or targetUri, got keys: ${Object.keys(loc).join(", ")}`);
|
||||
assert.ok(
|
||||
uri,
|
||||
`definition should have uri or targetUri, got keys: ${Object.keys(loc).join(", ")}`,
|
||||
);
|
||||
assert.ok(
|
||||
uri.includes("math.ts"),
|
||||
`definition should point to math.ts, got: ${uri}`,
|
||||
|
|
@ -368,7 +431,10 @@ describe("LSP integration: typescript-language-server", () => {
|
|||
})) as Array<{ uri: string; range: unknown }> | null;
|
||||
|
||||
assert.ok(result, "references should return a result");
|
||||
assert.ok(result.length >= 2, `should find at least 2 references (decl + usage), got ${result.length}`);
|
||||
assert.ok(
|
||||
result.length >= 2,
|
||||
`should find at least 2 references (decl + usage), got ${result.length}`,
|
||||
);
|
||||
});
|
||||
|
||||
// ---- Document Symbols ----
|
||||
|
|
@ -378,27 +444,47 @@ describe("LSP integration: typescript-language-server", () => {
|
|||
})) as Array<{ name: string; kind: number }> | null;
|
||||
|
||||
assert.ok(result, "documentSymbol should return a result");
|
||||
assert.ok(result.length >= 2, `should find at least 2 symbols, got ${result.length}`);
|
||||
assert.ok(
|
||||
result.length >= 2,
|
||||
`should find at least 2 symbols, got ${result.length}`,
|
||||
);
|
||||
const names = result.map((s) => s.name);
|
||||
assert.ok(names.includes("add"), `symbols should include 'add', got: ${names.join(", ")}`);
|
||||
assert.ok(names.includes("subtract"), `symbols should include 'subtract', got: ${names.join(", ")}`);
|
||||
assert.ok(
|
||||
names.includes("add"),
|
||||
`symbols should include 'add', got: ${names.join(", ")}`,
|
||||
);
|
||||
assert.ok(
|
||||
names.includes("subtract"),
|
||||
`symbols should include 'subtract', got: ${names.join(", ")}`,
|
||||
);
|
||||
});
|
||||
|
||||
// ---- Diagnostics (published via notification) ----
|
||||
test("diagnostics for type error", async () => {
|
||||
// Wait a bit more for diagnostics to arrive
|
||||
await new Promise((r) => setTimeout(r, 2000));
|
||||
const mainContent = fs.readFileSync(mainPath, "utf-8");
|
||||
lsp.notify("textDocument/didChange", {
|
||||
textDocument: { uri: mainUri, version: 2 },
|
||||
contentChanges: [{ text: mainContent }],
|
||||
});
|
||||
|
||||
const diagNotifications = lsp.getNotifications("textDocument/publishDiagnostics");
|
||||
const mainDiags = diagNotifications.filter(
|
||||
(n) => (n.params as { uri: string }).uri === mainUri,
|
||||
const mainDiagNotification = await lsp.waitForNotification(
|
||||
"textDocument/publishDiagnostics",
|
||||
(n) => {
|
||||
const params = n.params as {
|
||||
uri: string;
|
||||
diagnostics?: Array<{ message: string; range: unknown }>;
|
||||
};
|
||||
return params.uri === mainUri && (params.diagnostics?.length ?? 0) > 0;
|
||||
},
|
||||
);
|
||||
|
||||
assert.ok(mainDiags.length > 0, "should receive diagnostics for main.ts");
|
||||
assert.ok(mainDiagNotification, "should receive diagnostics for main.ts");
|
||||
|
||||
const lastDiag = mainDiags[mainDiags.length - 1];
|
||||
const diagnostics = (lastDiag.params as { diagnostics: Array<{ message: string; range: unknown }> })
|
||||
.diagnostics;
|
||||
const diagnostics = (
|
||||
mainDiagNotification.params as {
|
||||
diagnostics: Array<{ message: string; range: unknown }>;
|
||||
}
|
||||
).diagnostics;
|
||||
|
||||
// Should catch the type error: string assigned to number
|
||||
const typeError = diagnostics.find(
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import {
|
|||
symlinkSync,
|
||||
} from "node:fs";
|
||||
import { delimiter, join, relative, resolve } from "node:path";
|
||||
|
||||
// SF Startup Loader
|
||||
// Copyright (c) 2026 Singularity Forge
|
||||
|
||||
|
|
@ -68,6 +69,18 @@ if (firstArg === "--help" || firstArg === "-h") {
|
|||
process.exit(0);
|
||||
}
|
||||
|
||||
if (
|
||||
firstArg &&
|
||||
firstArg !== "--" &&
|
||||
args.slice(1).some((arg) => arg === "--help" || arg === "-h")
|
||||
) {
|
||||
const { printHelp, printSubcommandHelp } = await import("./help-text.js");
|
||||
if (!printSubcommandHelp(firstArg, sfVersion)) {
|
||||
printHelp(sfVersion);
|
||||
}
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
// Fast-path invalid headless invocations before importing cli.ts. These paths
|
||||
// are commonly used by smoke tests and orchestrators; they should return a
|
||||
// clear diagnostic without paying extension/resource startup cost.
|
||||
|
|
|
|||
|
|
@ -12,7 +12,22 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { dirname, resolve } from "node:path";
|
||||
import { after, afterAll, before, beforeAll, describe, it } from 'vitest';
|
||||
import { afterAll, beforeAll, describe, it } from "vitest";
|
||||
|
||||
// Skip the entire suite if Playwright Chromium cannot launch (missing system
|
||||
// libraries or browser binaries in this environment).
|
||||
let canLaunchChromium = false;
|
||||
try {
|
||||
const { chromium } = await import("playwright");
|
||||
const testBrowser = await chromium.launch({ headless: true });
|
||||
await testBrowser.close();
|
||||
canLaunchChromium = true;
|
||||
} catch {
|
||||
canLaunchChromium = false;
|
||||
}
|
||||
|
||||
const describeOrSkip = canLaunchChromium ? describe : describe.skip;
|
||||
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { chromium } from "playwright";
|
||||
|
||||
|
|
@ -132,7 +147,7 @@ async function injectHelpers() {
|
|||
// 1. window.__pi utility tests
|
||||
// =========================================================================
|
||||
|
||||
describe("window.__pi utilities", () => {
|
||||
describeOrSkip("window.__pi utilities", () => {
|
||||
it("simpleHash — deterministic output for same input", async () => {
|
||||
await page.setContent("<p>test</p>");
|
||||
await injectHelpers();
|
||||
|
|
@ -408,7 +423,7 @@ describe("window.__pi utilities", () => {
|
|||
// 2. Intent scoring tests
|
||||
// =========================================================================
|
||||
|
||||
describe("intent scoring", () => {
|
||||
describeOrSkip("intent scoring", () => {
|
||||
it("submit_form — submit button inside form scores higher than outside", async () => {
|
||||
await page.setContent(`
|
||||
<form>
|
||||
|
|
@ -585,7 +600,7 @@ describe("intent scoring", () => {
|
|||
// 3. Form analysis tests
|
||||
// =========================================================================
|
||||
|
||||
describe("form analysis", () => {
|
||||
describeOrSkip("form analysis", () => {
|
||||
const COMPLEX_FORM = `
|
||||
<form id="testform" action="/submit">
|
||||
<!-- label[for] association -->
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@ import type {
|
|||
} from "@singularity-forge/pi-coding-agent";
|
||||
import { detectAbandonMilestone } from "./abandon-detect.js";
|
||||
import type { AutoSession, SidecarItem } from "./auto/session.js";
|
||||
import { isDeterministicPolicyError } from "./auto-tool-tracking.js";
|
||||
import { resolveExpectedArtifactPath as resolveArtifactForContent } from "./auto-artifact-paths.js";
|
||||
import {
|
||||
diagnoseExpectedArtifact,
|
||||
|
|
@ -25,6 +24,7 @@ import {
|
|||
verifyExpectedArtifact,
|
||||
writeBlockerPlaceholder,
|
||||
} from "./auto-recovery.js";
|
||||
import { isDeterministicPolicyError } from "./auto-tool-tracking.js";
|
||||
import { type CloseoutOptions, closeoutUnit } from "./auto-unit-closeout.js";
|
||||
import { runSafely } from "./auto-utils.js";
|
||||
import { syncStateToProjectRoot } from "./auto-worktree.js";
|
||||
|
|
@ -67,13 +67,16 @@ import {
|
|||
} from "./pre-execution-checks.js";
|
||||
import { loadEffectiveSFPreferences } from "./preferences.js";
|
||||
import { loadPrompt } from "./prompt-loader.js";
|
||||
import { recordSelfFeedback } from "./self-feedback.js";
|
||||
// crossReferenceEvidence available for future use when verification_evidence is stored in DB
|
||||
// import { crossReferenceEvidence, type ClaimedEvidence } from "./safety/evidence-cross-ref.js";
|
||||
import { validateContent } from "./safety/content-validator.js";
|
||||
import { clearEvidenceFromDisk, getEvidence } from "./safety/evidence-collector.js";
|
||||
import {
|
||||
clearEvidenceFromDisk,
|
||||
getEvidence,
|
||||
} from "./safety/evidence-collector.js";
|
||||
import { validateFileChanges } from "./safety/file-change-validator.js";
|
||||
import { resolveSafetyHarnessConfig } from "./safety/safety-harness.js";
|
||||
import { recordSelfFeedback } from "./self-feedback.js";
|
||||
import { consumeSignal } from "./session-status-io.js";
|
||||
import {
|
||||
_getAdapter,
|
||||
|
|
@ -87,10 +90,10 @@ import {
|
|||
} from "./sf-db.js";
|
||||
import { deriveState } from "./state.js";
|
||||
import { parseUnitId } from "./unit-id.js";
|
||||
import { isAwaitingUserInput } from "./user-input-boundary.js";
|
||||
import { resolveUokFlags } from "./uok/flags.js";
|
||||
import { UokGateRunner } from "./uok/gate-runner.js";
|
||||
import { writeTurnGitTransaction } from "./uok/gitops.js";
|
||||
import { isAwaitingUserInput } from "./user-input-boundary.js";
|
||||
import { writePreExecutionEvidence } from "./verification-evidence.js";
|
||||
import { logError, logWarning } from "./workflow-logger.js";
|
||||
import { regenerateIfMissing } from "./workflow-projections.js";
|
||||
|
|
@ -1073,6 +1076,11 @@ export async function postUnitPreVerification(
|
|||
ctx.ui.notify(
|
||||
`Safety: ${warnings.length} unexpected file change(s) outside task plan`,
|
||||
"warning",
|
||||
{
|
||||
kind: "progress",
|
||||
source: "safety",
|
||||
dedupe_key: `safety:file-change:${s.currentUnit.id}`,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1113,6 +1121,11 @@ export async function postUnitPreVerification(
|
|||
ctx.ui.notify(
|
||||
`Safety: task ${sTid} has verification commands but no bash calls were recorded`,
|
||||
"warning",
|
||||
{
|
||||
kind: "progress",
|
||||
source: "safety",
|
||||
dedupe_key: `safety:evidence:${s.currentUnit.id}`,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1138,7 +1151,11 @@ export async function postUnitPreVerification(
|
|||
);
|
||||
for (const v of contentViolations) {
|
||||
logWarning("safety", `content: ${v.reason}`);
|
||||
ctx.ui.notify(`Content validation: ${v.reason}`, "warning");
|
||||
ctx.ui.notify(`Content validation: ${v.reason}`, "warning", {
|
||||
kind: "progress",
|
||||
source: "safety",
|
||||
dedupe_key: `safety:content:${s.currentUnit.id}:${v.reason}`,
|
||||
});
|
||||
}
|
||||
} catch (e) {
|
||||
debugLog("postUnit", {
|
||||
|
|
@ -1285,7 +1302,12 @@ export async function postUnitPreVerification(
|
|||
s.lastToolInvocationError = null;
|
||||
s.pendingVerificationRetry = null;
|
||||
s.verificationRetryCount.delete(retryKey);
|
||||
writeBlockerPlaceholder(s.currentUnit.type, s.currentUnit.id, s.basePath, reason);
|
||||
writeBlockerPlaceholder(
|
||||
s.currentUnit.type,
|
||||
s.currentUnit.id,
|
||||
s.basePath,
|
||||
reason,
|
||||
);
|
||||
ctx.ui.notify(
|
||||
`${s.currentUnit.type} ${s.currentUnit.id} — deterministic policy rejection, wrote blocker placeholder (no retries) (#4973)`,
|
||||
"warning",
|
||||
|
|
|
|||
|
|
@ -21,13 +21,11 @@ import {
|
|||
import { atomicWriteSync } from "../atomic-write.js";
|
||||
import { resetCompletionNudgeState } from "../auto-completion-nudge.js";
|
||||
import {
|
||||
USER_DRIVEN_DEEP_UNITS,
|
||||
isAwaitingUserInput,
|
||||
type PostUnitContext,
|
||||
type PreVerificationOpts,
|
||||
USER_DRIVEN_DEEP_UNITS,
|
||||
} from "../auto-post-unit.js";
|
||||
import { pauseAutoForProviderError } from "../provider-error-pause.js";
|
||||
import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.js";
|
||||
import {
|
||||
buildLoopRemediationSteps,
|
||||
diagnoseExpectedArtifact,
|
||||
|
|
@ -43,23 +41,23 @@ import {
|
|||
formatToolCallSummary,
|
||||
resetToolCallCounts,
|
||||
} from "../auto-tool-tracking.js";
|
||||
import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.js";
|
||||
import { debugLog } from "../debug-logger.js";
|
||||
import { PROJECT_FILES } from "../detection.js";
|
||||
import { MergeConflictError } from "../git-service.js";
|
||||
import { recordLearnedOutcome } from "../learning/runtime.js";
|
||||
import {
|
||||
resolveMilestoneFile,
|
||||
resolveSliceFile,
|
||||
sfRoot,
|
||||
} from "../paths.js";
|
||||
import { resolveMilestoneFile, resolveSliceFile, sfRoot } from "../paths.js";
|
||||
import { resolvePersistModelChanges } from "../preferences.js";
|
||||
import {
|
||||
approveProductionMutationWithLlmPolicy,
|
||||
ensureProductionMutationApprovalTemplate,
|
||||
readProductionMutationApprovalStatus,
|
||||
} from "../production-mutation-approval.js";
|
||||
import { loadEvidenceFromDisk, resetEvidence } from "../safety/evidence-collector.js";
|
||||
import { parseUnitId } from "../unit-id.js";
|
||||
import { pauseAutoForProviderError } from "../provider-error-pause.js";
|
||||
import {
|
||||
loadEvidenceFromDisk,
|
||||
resetEvidence,
|
||||
} from "../safety/evidence-collector.js";
|
||||
import { getDirtyFiles } from "../safety/file-change-validator.js";
|
||||
import {
|
||||
cleanupCheckpoint,
|
||||
|
|
@ -67,10 +65,20 @@ import {
|
|||
rollbackToCheckpoint,
|
||||
} from "../safety/git-checkpoint.js";
|
||||
import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
|
||||
import { getMilestoneSlices, getSliceTaskCounts, getTask, isDbAvailable } from "../sf-db.js";
|
||||
import {
|
||||
getMilestoneSlices,
|
||||
getSliceTaskCounts,
|
||||
getTask,
|
||||
isDbAvailable,
|
||||
} from "../sf-db.js";
|
||||
import { getEligibleSlices } from "../slice-parallel-eligibility.js";
|
||||
import { startSliceParallel } from "../slice-parallel-orchestrator.js";
|
||||
import {
|
||||
handleProductAudit,
|
||||
type ProductAuditParams,
|
||||
} from "../tools/product-audit-tool.js";
|
||||
import type { Phase } from "../types.js";
|
||||
import { parseUnitId } from "../unit-id.js";
|
||||
import { writeUnitRuntimeRecord } from "../unit-runtime.js";
|
||||
import { resolveUokFlags } from "../uok/flags.js";
|
||||
import { UokGateRunner } from "../uok/gate-runner.js";
|
||||
|
|
@ -88,10 +96,6 @@ import {
|
|||
logError,
|
||||
logWarning,
|
||||
} from "../workflow-logger.js";
|
||||
import {
|
||||
handleProductAudit,
|
||||
type ProductAuditParams,
|
||||
} from "../tools/product-audit-tool.js";
|
||||
import {
|
||||
getRequiredWorkflowToolsForAutoUnit,
|
||||
getWorkflowTransportSupportError,
|
||||
|
|
@ -596,7 +600,11 @@ export async function runPreDispatch(
|
|||
|
||||
// Derive state
|
||||
let state = await deps.deriveState(s.basePath);
|
||||
if (uokFlags.planningFlow && isDbAvailable() && shouldRunPlanningFlowGate(state.phase)) {
|
||||
if (
|
||||
uokFlags.planningFlow &&
|
||||
isDbAvailable() &&
|
||||
shouldRunPlanningFlowGate(state.phase)
|
||||
) {
|
||||
let compiled = ensurePlanningFlowGraph(s.basePath, state);
|
||||
// Empty-graph recovery: stale DB caches can yield 0 nodes right after a
|
||||
// task-complete write. Invalidate caches, re-derive state, and retry once.
|
||||
|
|
@ -1208,8 +1216,7 @@ export async function runDispatch(
|
|||
const derivedKey = `${unitType}/${unitId}`;
|
||||
|
||||
const hasTransientTaskCompleteFailure =
|
||||
unitType === "execute-task" &&
|
||||
!!s.pendingTaskCompleteFailures?.has(unitId);
|
||||
unitType === "execute-task" && !!s.pendingTaskCompleteFailures?.has(unitId);
|
||||
|
||||
if (!s.pendingVerificationRetry && !hasTransientTaskCompleteFailure) {
|
||||
loopState.recentUnits.push({ key: derivedKey });
|
||||
|
|
@ -1276,7 +1283,7 @@ export async function runDispatch(
|
|||
(diagnostic?.length ?? 0) > MAX_RECOVERY_CHARS
|
||||
? diagnostic!.slice(0, MAX_RECOVERY_CHARS) +
|
||||
"\n\n[...diagnostic truncated]"
|
||||
: diagnostic ?? null;
|
||||
: (diagnostic ?? null);
|
||||
s.pendingRethinkAttempt = JSON.stringify({
|
||||
attempt,
|
||||
reason: stuckSignal.reason,
|
||||
|
|
@ -1286,9 +1293,10 @@ export async function runDispatch(
|
|||
unitType,
|
||||
unitId,
|
||||
});
|
||||
const rt = attempt === 5
|
||||
? "**FINAL STUCK ATTEMPT — 5 of 5.** "
|
||||
: `**STUCK RECOVERY ATTEMPT ${attempt - 1} of 4.** `;
|
||||
const rt =
|
||||
attempt === 5
|
||||
? "**FINAL STUCK ATTEMPT — 5 of 5.** "
|
||||
: `**STUCK RECOVERY ATTEMPT ${attempt - 1} of 4.** `;
|
||||
ctx.ui.notify(
|
||||
`${rt}Stuck on ${unitType} ${unitId} (${stuckSignal.reason}). Injecting diagnostic and retrying.`,
|
||||
"warning",
|
||||
|
|
@ -1677,12 +1685,7 @@ export async function runGuards(
|
|||
// FailureClass "input" → 0 retries (broken plan needs human fix, not
|
||||
// an LLM retry). Only fires when uok.gates.enabled is true.
|
||||
const uokFlagsGuards = resolveUokFlags(prefs);
|
||||
if (
|
||||
uokFlagsGuards.gates &&
|
||||
unitType === "execute-task" &&
|
||||
mid &&
|
||||
sliceId
|
||||
) {
|
||||
if (uokFlagsGuards.gates && unitType === "execute-task" && mid && sliceId) {
|
||||
const taskCounts = getSliceTaskCounts(mid, sliceId);
|
||||
const isFirstTaskForSlice = taskCounts.done === 0;
|
||||
if (isFirstTaskForSlice) {
|
||||
|
|
@ -1814,7 +1817,9 @@ export async function runUnitPhase(
|
|||
iterData: IterationData,
|
||||
loopState: LoopState,
|
||||
sidecarItem?: SidecarItem,
|
||||
): Promise<PhaseResult<{ unitStartedAt: number; requestDispatchedAt?: number }>> {
|
||||
): Promise<
|
||||
PhaseResult<{ unitStartedAt: number; requestDispatchedAt?: number }>
|
||||
> {
|
||||
const { ctx, pi, s, deps, prefs } = ic;
|
||||
const { unitType, unitId, prompt, state, mid } = iterData;
|
||||
|
||||
|
|
@ -2074,7 +2079,10 @@ export async function runUnitPhase(
|
|||
lines.push("", `**Suggested remediation:**\n${rethinkCtx.remediation}`);
|
||||
}
|
||||
if (rethinkCtx.diagnostic) {
|
||||
lines.push("", `**Full diagnostic from previous attempt:**\n${rethinkCtx.diagnostic}`);
|
||||
lines.push(
|
||||
"",
|
||||
`**Full diagnostic from previous attempt:**\n${rethinkCtx.diagnostic}`,
|
||||
);
|
||||
}
|
||||
lines.push("", "---", "", finalPrompt);
|
||||
finalPrompt = lines.join("\n");
|
||||
|
|
@ -2320,13 +2328,16 @@ export async function runUnitPhase(
|
|||
) {
|
||||
// Session-timeout cancellations are resumable pauses: pauseAuto below preserves the auto session
|
||||
// instead of routing the cancelled unit into the hard-stop path.
|
||||
const isSessionCreationTimeout = unitResult.errorContext.message?.includes("Session creation timed out");
|
||||
const isSessionCreationTimeout =
|
||||
unitResult.errorContext.message?.includes("Session creation timed out");
|
||||
|
||||
if (isSessionCreationTimeout) {
|
||||
consecutiveSessionTimeouts += 1;
|
||||
const baseRetryAfterMs = 30_000;
|
||||
const retryAfterMs = baseRetryAfterMs * 2 ** Math.max(0, consecutiveSessionTimeouts - 1);
|
||||
const allowAutoResume = consecutiveSessionTimeouts <= MAX_SESSION_TIMEOUT_AUTO_RESUMES;
|
||||
const retryAfterMs =
|
||||
baseRetryAfterMs * 2 ** Math.max(0, consecutiveSessionTimeouts - 1);
|
||||
const allowAutoResume =
|
||||
consecutiveSessionTimeouts <= MAX_SESSION_TIMEOUT_AUTO_RESUMES;
|
||||
|
||||
if (!allowAutoResume) {
|
||||
ctx.ui.notify(
|
||||
|
|
@ -2356,7 +2367,8 @@ export async function runUnitPhase(
|
|||
resume: allowAutoResume
|
||||
? () => {
|
||||
void resumeAutoAfterProviderDelay(pi, ctx).catch((err) => {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
const message =
|
||||
err instanceof Error ? err.message : String(err);
|
||||
ctx.ui.notify(
|
||||
`Session timeout recovery failed: ${message}`,
|
||||
"error",
|
||||
|
|
@ -2369,7 +2381,13 @@ export async function runUnitPhase(
|
|||
if (!allowAutoResume) {
|
||||
resetConsecutiveSessionTimeouts();
|
||||
}
|
||||
await emitCancelledUnitEnd(ic, unitType, unitId, unitStartSeq, unitResult.errorContext);
|
||||
await emitCancelledUnitEnd(
|
||||
ic,
|
||||
unitType,
|
||||
unitId,
|
||||
unitStartSeq,
|
||||
unitResult.errorContext,
|
||||
);
|
||||
return { action: "break", reason: "session-timeout" };
|
||||
}
|
||||
|
||||
|
|
@ -2378,7 +2396,11 @@ export async function runUnitPhase(
|
|||
`Unit timed out for ${unitType} ${unitId} (supervision may have failed). Pausing auto-mode.`,
|
||||
"warning",
|
||||
);
|
||||
debugLog("autoLoop", { phase: "unit-hard-timeout-pause", unitType, unitId });
|
||||
debugLog("autoLoop", {
|
||||
phase: "unit-hard-timeout-pause",
|
||||
unitType,
|
||||
unitId,
|
||||
});
|
||||
await deps.pauseAuto(ctx, pi);
|
||||
await emitCancelledUnitEnd(
|
||||
ic,
|
||||
|
|
@ -2468,7 +2490,10 @@ export async function runUnitPhase(
|
|||
u.startedAt === s.currentUnit?.startedAt,
|
||||
);
|
||||
if (lastUnit && lastUnit.toolCalls === 0) {
|
||||
if (USER_DRIVEN_DEEP_UNITS.has(unitType) && isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined)) {
|
||||
if (
|
||||
USER_DRIVEN_DEEP_UNITS.has(unitType) &&
|
||||
isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined)
|
||||
) {
|
||||
debugLog("runUnitPhase", {
|
||||
phase: "zero-tool-calls-awaiting-user-input",
|
||||
unitType,
|
||||
|
|
@ -2500,7 +2525,10 @@ export async function runUnitPhase(
|
|||
// and re-dispatch this unit.
|
||||
return {
|
||||
action: "next",
|
||||
data: { unitStartedAt: s.currentUnit?.startedAt, requestDispatchedAt: unitResult.requestDispatchedAt },
|
||||
data: {
|
||||
unitStartedAt: s.currentUnit?.startedAt,
|
||||
requestDispatchedAt: unitResult.requestDispatchedAt,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -2517,7 +2545,10 @@ export async function runUnitPhase(
|
|||
|
||||
const skipArtifactVerification = shouldSkipArtifactVerification(unitType);
|
||||
let artifactVerified: boolean;
|
||||
if (USER_DRIVEN_DEEP_UNITS.has(unitType) && isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined)) {
|
||||
if (
|
||||
USER_DRIVEN_DEEP_UNITS.has(unitType) &&
|
||||
isAwaitingUserInput(s.lastUnitAgentEndMessages ?? undefined)
|
||||
) {
|
||||
// Skip artifact verification — unit is paused waiting for user input
|
||||
artifactVerified = false;
|
||||
} else {
|
||||
|
|
@ -2688,7 +2719,13 @@ export async function runUnitPhase(
|
|||
}
|
||||
s.preUnitDirtyFiles = [];
|
||||
|
||||
return { action: "next", data: { unitStartedAt: s.currentUnit?.startedAt, requestDispatchedAt: unitResult.requestDispatchedAt } };
|
||||
return {
|
||||
action: "next",
|
||||
data: {
|
||||
unitStartedAt: s.currentUnit?.startedAt,
|
||||
requestDispatchedAt: unitResult.requestDispatchedAt,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// ─── runFinalize ──────────────────────────────────────────────────────────────
|
||||
|
|
@ -2734,8 +2771,15 @@ export async function runFinalize(
|
|||
// Sidecar items use lightweight pre-verification opts
|
||||
const preVerificationOpts: PreVerificationOpts = sidecarItem
|
||||
? sidecarItem.kind === "hook"
|
||||
? { skipSettleDelay: true, skipWorktreeSync: true, agentEndMessages: s.lastUnitAgentEndMessages ?? undefined }
|
||||
: { skipSettleDelay: true, agentEndMessages: s.lastUnitAgentEndMessages ?? undefined }
|
||||
? {
|
||||
skipSettleDelay: true,
|
||||
skipWorktreeSync: true,
|
||||
agentEndMessages: s.lastUnitAgentEndMessages ?? undefined,
|
||||
}
|
||||
: {
|
||||
skipSettleDelay: true,
|
||||
agentEndMessages: s.lastUnitAgentEndMessages ?? undefined,
|
||||
}
|
||||
: { agentEndMessages: s.lastUnitAgentEndMessages ?? undefined };
|
||||
const _preUnitSnapshot = s.currentUnit
|
||||
? {
|
||||
|
|
@ -3079,7 +3123,11 @@ export async function runFinalize(
|
|||
const severity = logs.some((e) => e.severity === "error")
|
||||
? "error"
|
||||
: "warning";
|
||||
ctx.ui.notify(formatForNotification(logs), severity);
|
||||
ctx.ui.notify(formatForNotification(logs), severity, {
|
||||
kind: severity === "error" ? "notice" : "progress",
|
||||
source: "workflow-logger",
|
||||
dedupe_key: `workflow-issues:${iterData.unitType}:${iterData.unitId}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -83,12 +83,15 @@ export const MODEL_CAPABILITY_TIER: Record<string, ComplexityTier> = {
|
|||
"gpt-5.3-codex-spark": "light",
|
||||
"gemini-2.0-flash": "light",
|
||||
"gemini-flash-2.0": "light",
|
||||
"gemini-3.1-flash-lite-preview": "light",
|
||||
"gemini-2.5-flash-lite": "light",
|
||||
"glm-4.7-flash": "light",
|
||||
"glm-4.7-flashx": "light",
|
||||
"ministral-3b-latest": "light",
|
||||
"ministral-8b-latest": "light",
|
||||
"devstral-small-2505": "light",
|
||||
"devstral-small-2507": "light",
|
||||
"labs-devstral-small-2512": "light",
|
||||
|
||||
// Standard-tier models
|
||||
"claude-sonnet-4-6": "standard",
|
||||
|
|
@ -98,8 +101,16 @@ export const MODEL_CAPABILITY_TIER: Record<string, ComplexityTier> = {
|
|||
"gpt-4.1": "standard",
|
||||
"gpt-5.1-codex-max": "standard",
|
||||
"gemini-2.5-pro": "standard",
|
||||
"gemini-3-flash-preview": "standard",
|
||||
"gemini-2.5-flash": "standard",
|
||||
"deepseek-chat": "standard",
|
||||
"glm-4.7": "standard",
|
||||
"qwen3-coder:480b": "standard",
|
||||
"qwen3-coder-next": "standard",
|
||||
"kimi-k2.6": "standard",
|
||||
"kimi-for-coding": "standard",
|
||||
"MiniMax-M2.7": "standard",
|
||||
"MiniMax-M2.7-highspeed": "standard",
|
||||
"codestral-latest": "standard",
|
||||
"devstral-2512": "standard",
|
||||
"devstral-medium-2507": "standard",
|
||||
|
|
@ -131,6 +142,10 @@ export const MODEL_CAPABILITY_TIER: Record<string, ComplexityTier> = {
|
|||
o3: "heavy",
|
||||
"o4-mini": "heavy",
|
||||
"o4-mini-deep-research": "heavy",
|
||||
"gemini-3.1-pro-preview": "heavy",
|
||||
"gemini-3-pro-preview": "heavy",
|
||||
"kimi-k2-thinking": "heavy",
|
||||
"qwen3-next:80b": "heavy",
|
||||
"glm-5": "heavy",
|
||||
"glm-5-turbo": "heavy",
|
||||
"glm-5.1": "heavy",
|
||||
|
|
@ -176,6 +191,12 @@ const MODEL_COST_PER_1K_INPUT: Record<string, number> = {
|
|||
"o4-mini-deep-research": 0.005,
|
||||
"gemini-2.0-flash": 0.0001,
|
||||
"gemini-2.5-pro": 0.00125,
|
||||
"gemini-3.1-pro-preview": 0.00125,
|
||||
"gemini-3.1-flash-lite-preview": 0.0001,
|
||||
"gemini-3-pro-preview": 0.00125,
|
||||
"gemini-3-flash-preview": 0.0001,
|
||||
"gemini-2.5-flash": 0.0001,
|
||||
"gemini-2.5-flash-lite": 0.00005,
|
||||
"deepseek-chat": 0.00014,
|
||||
"glm-4.7": 0.0006,
|
||||
"glm-4.7-flash": 0,
|
||||
|
|
@ -184,12 +205,21 @@ const MODEL_COST_PER_1K_INPUT: Record<string, number> = {
|
|||
"glm-5-turbo": 0.0012,
|
||||
"glm-5.1": 0.0014,
|
||||
"glm-5v-turbo": 0.0012,
|
||||
"qwen3-coder:480b": 0.0004,
|
||||
"qwen3-coder-next": 0.0004,
|
||||
"qwen3-next:80b": 0.0002,
|
||||
"kimi-k2.6": 0.0006,
|
||||
"kimi-for-coding": 0.0006,
|
||||
"kimi-k2-thinking": 0.001,
|
||||
"MiniMax-M2.7": 0.0006,
|
||||
"MiniMax-M2.7-highspeed": 0.0006,
|
||||
"codestral-latest": 0.0003,
|
||||
"devstral-2512": 0.0004,
|
||||
"devstral-medium-2507": 0.0004,
|
||||
"devstral-medium-latest": 0.0004,
|
||||
"devstral-small-2505": 0.0001,
|
||||
"devstral-small-2507": 0.0001,
|
||||
"labs-devstral-small-2512": 0.0001,
|
||||
"magistral-medium-latest": 0.002,
|
||||
"magistral-small": 0.0005,
|
||||
"ministral-3b-latest": 0.00004,
|
||||
|
|
@ -523,6 +553,60 @@ export const MODEL_CAPABILITY_PROFILES: Record<string, ModelCapabilities> = {
|
|||
longContext: 90,
|
||||
instruction: 75,
|
||||
},
|
||||
"gemini-3.1-pro-preview": {
|
||||
coding: 82,
|
||||
debugging: 78,
|
||||
research: 92,
|
||||
reasoning: 84,
|
||||
speed: 48,
|
||||
longContext: 98,
|
||||
instruction: 82,
|
||||
},
|
||||
"gemini-3-pro-preview": {
|
||||
coding: 82,
|
||||
debugging: 78,
|
||||
research: 90,
|
||||
reasoning: 84,
|
||||
speed: 50,
|
||||
longContext: 96,
|
||||
instruction: 82,
|
||||
},
|
||||
"gemini-3-flash-preview": {
|
||||
coding: 62,
|
||||
debugging: 55,
|
||||
research: 70,
|
||||
reasoning: 60,
|
||||
speed: 88,
|
||||
longContext: 88,
|
||||
instruction: 72,
|
||||
},
|
||||
"gemini-3.1-flash-lite-preview": {
|
||||
coding: 55,
|
||||
debugging: 48,
|
||||
research: 62,
|
||||
reasoning: 52,
|
||||
speed: 96,
|
||||
longContext: 85,
|
||||
instruction: 68,
|
||||
},
|
||||
"gemini-2.5-flash": {
|
||||
coding: 60,
|
||||
debugging: 52,
|
||||
research: 68,
|
||||
reasoning: 58,
|
||||
speed: 92,
|
||||
longContext: 85,
|
||||
instruction: 70,
|
||||
},
|
||||
"gemini-2.5-flash-lite": {
|
||||
coding: 52,
|
||||
debugging: 45,
|
||||
research: 58,
|
||||
reasoning: 48,
|
||||
speed: 97,
|
||||
longContext: 78,
|
||||
instruction: 65,
|
||||
},
|
||||
"gemini-2.0-flash": {
|
||||
coding: 50,
|
||||
debugging: 40,
|
||||
|
|
@ -761,6 +845,15 @@ export const MODEL_CAPABILITY_PROFILES: Record<string, ModelCapabilities> = {
|
|||
longContext: 45,
|
||||
instruction: 65,
|
||||
},
|
||||
"labs-devstral-small-2512": {
|
||||
coding: 65,
|
||||
debugging: 58,
|
||||
research: 45,
|
||||
reasoning: 55,
|
||||
speed: 88,
|
||||
longContext: 60,
|
||||
instruction: 68,
|
||||
},
|
||||
|
||||
// ── Zhipu AI (GLM) ─────────────────────────────────────────────────────────
|
||||
"glm-5": {
|
||||
|
|
@ -826,6 +919,129 @@ export const MODEL_CAPABILITY_PROFILES: Record<string, ModelCapabilities> = {
|
|||
longContext: 45,
|
||||
instruction: 60,
|
||||
},
|
||||
|
||||
// ── Qwen / Ollama Cloud compatible tags ──────────────────────────────────
|
||||
"qwen3-coder:480b": {
|
||||
coding: 84,
|
||||
debugging: 78,
|
||||
research: 62,
|
||||
reasoning: 76,
|
||||
speed: 58,
|
||||
longContext: 86,
|
||||
instruction: 78,
|
||||
},
|
||||
"qwen3-coder-next": {
|
||||
coding: 82,
|
||||
debugging: 76,
|
||||
research: 60,
|
||||
reasoning: 74,
|
||||
speed: 70,
|
||||
longContext: 86,
|
||||
instruction: 76,
|
||||
},
|
||||
"qwen3-next:80b": {
|
||||
coding: 70,
|
||||
debugging: 68,
|
||||
research: 76,
|
||||
reasoning: 80,
|
||||
speed: 62,
|
||||
longContext: 86,
|
||||
instruction: 74,
|
||||
},
|
||||
|
||||
// ── Moonshot / Kimi ───────────────────────────────────────────────────────
|
||||
"kimi-k2.6": {
|
||||
coding: 88,
|
||||
debugging: 84,
|
||||
research: 72,
|
||||
reasoning: 82,
|
||||
speed: 55,
|
||||
longContext: 86,
|
||||
instruction: 84,
|
||||
},
|
||||
"kimi-for-coding": {
|
||||
coding: 88,
|
||||
debugging: 84,
|
||||
research: 72,
|
||||
reasoning: 82,
|
||||
speed: 55,
|
||||
longContext: 86,
|
||||
instruction: 84,
|
||||
},
|
||||
"kimi-k2-thinking": {
|
||||
coding: 86,
|
||||
debugging: 88,
|
||||
research: 78,
|
||||
reasoning: 92,
|
||||
speed: 30,
|
||||
longContext: 86,
|
||||
instruction: 84,
|
||||
},
|
||||
|
||||
// ── MiniMax ───────────────────────────────────────────────────────────────
|
||||
"MiniMax-M2.7": {
|
||||
coding: 84,
|
||||
debugging: 80,
|
||||
research: 78,
|
||||
reasoning: 84,
|
||||
speed: 52,
|
||||
longContext: 84,
|
||||
instruction: 82,
|
||||
},
|
||||
"MiniMax-M2.7-highspeed": {
|
||||
coding: 82,
|
||||
debugging: 78,
|
||||
research: 76,
|
||||
reasoning: 80,
|
||||
speed: 72,
|
||||
longContext: 84,
|
||||
instruction: 80,
|
||||
},
|
||||
};
|
||||
|
||||
const MODEL_CAPABILITY_ALIASES: Record<string, string> = {
|
||||
"deepseek-v3.1": "deepseek-chat",
|
||||
"deepseek-v3.2": "deepseek-chat",
|
||||
"deepseek-v4-flash": "deepseek-chat",
|
||||
"deepseek-v4-pro": "deepseek-chat",
|
||||
"devstral-latest": "devstral-medium-latest",
|
||||
"devstral-2:123b": "devstral-2512",
|
||||
"mistral.devstral-2-123b": "devstral-2512",
|
||||
"devstral-small-2:24b": "devstral-small-2507",
|
||||
"mistral.devstral-small-2-24b": "labs-devstral-small-2512",
|
||||
"mistral.mistral-large-3-675b-instruct": "mistral-large-latest",
|
||||
"mistral.ministral-3-14b-instruct": "mistral-small-latest",
|
||||
"mistral.ministral-3-3b-instruct": "ministral-3b-latest",
|
||||
"mistral.ministral-3-8b-instruct": "ministral-8b-latest",
|
||||
"gemini-3-flash-preview": "gemini-3-flash-preview",
|
||||
"glm-4.6": "glm-4.7",
|
||||
"gpt-oss:120b": "gpt-4o",
|
||||
"gpt-oss:20b": "gpt-4o-mini",
|
||||
"kimi-k2:1t": "kimi-k2.6",
|
||||
"kimi-k2.5": "kimi-k2.6",
|
||||
"kimi-for-coding": "kimi-k2.6",
|
||||
"kimi-k2.6:cloud": "kimi-k2.6",
|
||||
"kimi-k2.6-cloud": "kimi-k2.6",
|
||||
"minimax-m2": "MiniMax-M2.7",
|
||||
"minimax-m2.1": "MiniMax-M2.7",
|
||||
"minimax-m2.5": "MiniMax-M2.7",
|
||||
"minimax-m2.7": "MiniMax-M2.7",
|
||||
"mistral-large-3:675b": "mistral-large-latest",
|
||||
"ministral-3:3b": "ministral-3b-latest",
|
||||
"ministral-3:8b": "ministral-8b-latest",
|
||||
"ministral-3:14b": "mistral-small-latest",
|
||||
"nemotron-3-nano:30b": "gpt-4o-mini",
|
||||
"nemotron-3-super": "gpt-4o",
|
||||
"qwen3-coder-480b-a35b-v1:0": "qwen3-coder:480b",
|
||||
"qwen3-coder-480b-a35b": "qwen3-coder:480b",
|
||||
"qwen3-coder": "qwen3-coder:480b",
|
||||
"qwen3-coder:free": "qwen3-coder:480b",
|
||||
"qwen3-coder-30b-a3b-instruct": "qwen3-coder-next",
|
||||
"qwen3-coder-flash": "qwen3-coder-next",
|
||||
"qwen3-next-80b-a3b": "qwen3-next:80b",
|
||||
"qwen3-next-80b-a3b-instruct": "qwen3-next:80b",
|
||||
"qwen3-next-80b-a3b-instruct:free": "qwen3-next:80b",
|
||||
"qwen3-next-80b-a3b-thinking": "qwen3-next:80b",
|
||||
};
|
||||
|
||||
// ─── Base Task Requirements Data Table ───────────────────────────────────────
|
||||
|
|
@ -922,8 +1138,10 @@ export function scoreEligibleModels(
|
|||
capabilityOverrides?: Record<string, Partial<ModelCapabilities>>,
|
||||
): Array<{ modelId: string; score: number }> {
|
||||
const scored = eligibleModelIds.map((modelId) => {
|
||||
const builtin = MODEL_CAPABILITY_PROFILES[modelId];
|
||||
const override = capabilityOverrides?.[modelId];
|
||||
const canonicalModelId = canonicalCapabilityModelId(modelId);
|
||||
const builtin = MODEL_CAPABILITY_PROFILES[canonicalModelId];
|
||||
const override =
|
||||
capabilityOverrides?.[modelId] ?? capabilityOverrides?.[canonicalModelId];
|
||||
const profile: ModelCapabilities = builtin
|
||||
? override
|
||||
? { ...builtin, ...override }
|
||||
|
|
@ -950,6 +1168,29 @@ export function scoreEligibleModels(
|
|||
return scored;
|
||||
}
|
||||
|
||||
function canonicalCapabilityModelId(modelId: string): string {
|
||||
const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId;
|
||||
const normalizedId = bareId.replace(/:cloud$/i, "").replace(/-cloud$/i, "");
|
||||
const aliased = resolveCapabilityAlias(bareId) ?? resolveCapabilityAlias(normalizedId);
|
||||
if (aliased) return aliased;
|
||||
if (MODEL_CAPABILITY_PROFILES[normalizedId]) return normalizedId;
|
||||
for (const knownId of Object.keys(MODEL_CAPABILITY_PROFILES)) {
|
||||
if (normalizedId.includes(knownId) || knownId.includes(normalizedId)) {
|
||||
return knownId;
|
||||
}
|
||||
}
|
||||
return normalizedId;
|
||||
}
|
||||
|
||||
function resolveCapabilityAlias(modelId: string): string | undefined {
|
||||
const direct = MODEL_CAPABILITY_ALIASES[modelId];
|
||||
if (direct) return direct;
|
||||
const lower = modelId.toLowerCase();
|
||||
return Object.entries(MODEL_CAPABILITY_ALIASES).find(
|
||||
([alias]) => alias.toLowerCase() === lower,
|
||||
)?.[1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all models eligible for a given tier, sorted cheapest first.
|
||||
* If routingConfig.tier_models[tier] is set and available, returns only that
|
||||
|
|
@ -1193,18 +1434,17 @@ export function defaultRoutingConfig(): DynamicRoutingConfig {
|
|||
// ─── Internal ────────────────────────────────────────────────────────────────
|
||||
|
||||
export function getModelTier(modelId: string): ComplexityTier {
|
||||
// Strip provider prefix if present
|
||||
const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId;
|
||||
const canonicalId = canonicalCapabilityModelId(modelId);
|
||||
|
||||
// Check exact match first
|
||||
if (MODEL_CAPABILITY_TIER[bareId]) return MODEL_CAPABILITY_TIER[bareId];
|
||||
if (MODEL_CAPABILITY_TIER[canonicalId]) return MODEL_CAPABILITY_TIER[canonicalId];
|
||||
|
||||
const sizeTier = inferTierFromModelSize(bareId);
|
||||
const sizeTier = inferTierFromModelSize(canonicalId);
|
||||
if (sizeTier) return sizeTier;
|
||||
|
||||
// Check if any known model ID is a prefix/suffix match
|
||||
for (const [knownId, tier] of Object.entries(MODEL_CAPABILITY_TIER)) {
|
||||
if (bareId.includes(knownId) || knownId.includes(bareId)) return tier;
|
||||
if (canonicalId.includes(knownId) || knownId.includes(canonicalId)) return tier;
|
||||
}
|
||||
|
||||
// Unknown models are assumed standard (per D-15: avoids silently ignoring user config)
|
||||
|
|
@ -1223,24 +1463,26 @@ function inferTierFromModelSize(modelId: string): ComplexityTier | null {
|
|||
|
||||
/** Check if a model ID has a known capability tier mapping. (#2192) */
|
||||
function isKnownModel(modelId: string): boolean {
|
||||
const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId;
|
||||
if (MODEL_CAPABILITY_TIER[bareId]) return true;
|
||||
const canonicalId = canonicalCapabilityModelId(modelId);
|
||||
if (MODEL_CAPABILITY_TIER[canonicalId]) return true;
|
||||
for (const knownId of Object.keys(MODEL_CAPABILITY_TIER)) {
|
||||
if (bareId.includes(knownId) || knownId.includes(bareId)) return true;
|
||||
if (canonicalId.includes(knownId) || knownId.includes(canonicalId)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function getModelCost(modelId: string): number {
|
||||
const bareId = modelId.includes("/") ? modelId.split("/").pop()! : modelId;
|
||||
const canonicalId = canonicalCapabilityModelId(modelId);
|
||||
|
||||
if (MODEL_COST_PER_1K_INPUT[bareId] !== undefined) {
|
||||
return MODEL_COST_PER_1K_INPUT[bareId];
|
||||
if (MODEL_COST_PER_1K_INPUT[canonicalId] !== undefined) {
|
||||
return MODEL_COST_PER_1K_INPUT[canonicalId];
|
||||
}
|
||||
|
||||
// Check partial matches
|
||||
for (const [knownId, cost] of Object.entries(MODEL_COST_PER_1K_INPUT)) {
|
||||
if (bareId.includes(knownId) || knownId.includes(bareId)) return cost;
|
||||
if (canonicalId.includes(knownId) || knownId.includes(canonicalId)) {
|
||||
return cost;
|
||||
}
|
||||
}
|
||||
|
||||
// Unknown cost — assume expensive to avoid routing to unknown cheap models
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
// SF Extension — Persistent Notification Store
|
||||
// Captures all ctx.ui.notify() calls and workflow-logger warnings to
|
||||
// Captures durable ctx.ui.notify() calls and workflow-logger errors to
|
||||
// .sf/notifications.jsonl so they survive context resets and session restarts.
|
||||
// Rotates at MAX_ENTRIES to prevent unbounded growth.
|
||||
|
||||
|
|
@ -99,6 +99,7 @@ export function appendNotification(
|
|||
): void {
|
||||
if (!_basePath) return;
|
||||
if (_suppressCount > 0) return;
|
||||
if (!shouldPersistNotification(severity, metadata)) return;
|
||||
const persistedMessage =
|
||||
message.length > 500 ? message.slice(0, 500) + "…" : message;
|
||||
// Use explicit dedupe_key when provided; fall back to message-hash based key.
|
||||
|
|
@ -141,6 +142,14 @@ export function appendNotification(
|
|||
}
|
||||
}
|
||||
|
||||
function shouldPersistNotification(
|
||||
_severity: NotifySeverity,
|
||||
metadata?: NotificationMetadata,
|
||||
): boolean {
|
||||
if (metadata?.kind === "progress") return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read all notification entries from disk. Returns newest-first.
|
||||
*/
|
||||
|
|
@ -350,7 +359,10 @@ function _withLock<T>(basePath: string, fn: () => T): T {
|
|||
const stat = readFileSync(lockPath, "utf-8");
|
||||
const lockTime = parseInt(stat, 10);
|
||||
// Treat NaN (creator crashed before writing timestamp) as stale.
|
||||
if (isNaN(lockTime) || (Number.isFinite(lockTime) && Date.now() - lockTime > 5000)) {
|
||||
if (
|
||||
Number.isNaN(lockTime) ||
|
||||
(Number.isFinite(lockTime) && Date.now() - lockTime > 5000)
|
||||
) {
|
||||
try {
|
||||
unlinkSync(lockPath);
|
||||
} catch {
|
||||
|
|
|
|||
|
|
@ -271,6 +271,69 @@ test("scoreModel returns 50 for empty requirements", () => {
|
|||
assert.equal(score, 50);
|
||||
});
|
||||
|
||||
test("scoreEligibleModels treats kimi-for-coding as the Kimi K2.6 capability profile", () => {
|
||||
const requirements = { coding: 1.0 };
|
||||
const scored = scoreEligibleModels(
|
||||
["kimi-coding/kimi-for-coding", "unknown-future-model"],
|
||||
requirements,
|
||||
);
|
||||
|
||||
assert.equal(scored[0]?.modelId, "kimi-coding/kimi-for-coding");
|
||||
assert.equal(scored[0]?.score, MODEL_CAPABILITY_PROFILES["kimi-k2.6"].coding);
|
||||
});
|
||||
|
||||
test("scoreEligibleModels uses bare model IDs for provider-prefixed GLM routes", () => {
|
||||
const requirements = { reasoning: 1.0 };
|
||||
const scored = scoreEligibleModels(
|
||||
["zai/glm-5.1", "zai/glm-4.7"],
|
||||
requirements,
|
||||
);
|
||||
|
||||
assert.equal(scored[0]?.modelId, "zai/glm-5.1");
|
||||
assert.equal(scored[0]?.score, MODEL_CAPABILITY_PROFILES["glm-5.1"].reasoning);
|
||||
});
|
||||
|
||||
test("scoreEligibleModels keeps Kimi thinking distinct from plain K2.6", () => {
|
||||
const reasoningScores = scoreEligibleModels(
|
||||
["kimi-coding/kimi-k2-thinking", "kimi-coding/kimi-k2.6"],
|
||||
{ reasoning: 1.0 },
|
||||
);
|
||||
assert.equal(reasoningScores[0]?.modelId, "kimi-coding/kimi-k2-thinking");
|
||||
|
||||
const speedScores = scoreEligibleModels(
|
||||
["kimi-coding/kimi-k2-thinking", "kimi-coding/kimi-k2.6"],
|
||||
{ speed: 1.0 },
|
||||
);
|
||||
assert.equal(speedScores[0]?.modelId, "kimi-coding/kimi-k2.6");
|
||||
});
|
||||
|
||||
test("scoreEligibleModels normalizes Ollama Cloud suffix aliases", () => {
|
||||
const scored = scoreEligibleModels(
|
||||
["ollama-cloud/kimi-k2.6:cloud", "unknown-future-model"],
|
||||
{ coding: 1.0 },
|
||||
);
|
||||
|
||||
assert.equal(scored[0]?.modelId, "ollama-cloud/kimi-k2.6:cloud");
|
||||
assert.equal(scored[0]?.score, MODEL_CAPABILITY_PROFILES["kimi-k2.6"].coding);
|
||||
});
|
||||
|
||||
test("scoreEligibleModels normalizes Ollama Cloud family aliases", () => {
|
||||
const scored = scoreEligibleModels(
|
||||
[
|
||||
"ollama-cloud/minimax-m2.7",
|
||||
"ollama-cloud/devstral-2:123b",
|
||||
"ollama-cloud/qwen3-coder:480b",
|
||||
],
|
||||
{ coding: 1.0 },
|
||||
);
|
||||
|
||||
assert.ok(scored.every((entry) => entry.score > 50));
|
||||
assert.deepEqual(
|
||||
scored.map((entry) => getModelTier(entry.modelId)),
|
||||
["standard", "standard", "standard"],
|
||||
);
|
||||
});
|
||||
|
||||
test("computeTaskRequirements returns base vector for known unit type", () => {
|
||||
const reqs = computeTaskRequirements("execute-task");
|
||||
assert.ok(reqs.coding !== undefined && reqs.coding > 0);
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import assert from "node:assert/strict";
|
|||
import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, beforeEach, describe, test } from 'vitest';
|
||||
import { afterEach, beforeEach, describe, test } from "vitest";
|
||||
|
||||
import {
|
||||
isBlockedNotification,
|
||||
|
|
@ -126,10 +126,10 @@ describe("isMilestoneReadyNotification — metadata-first", () => {
|
|||
|
||||
describe("isPauseNotification — metadata-first", () => {
|
||||
test("returns true when metadata.kind=terminal and blocking=true", () => {
|
||||
const event = notifyEvent(
|
||||
"Autonomous mode paused. Type to interact.",
|
||||
{ kind: "terminal", blocking: true },
|
||||
);
|
||||
const event = notifyEvent("Autonomous mode paused. Type to interact.", {
|
||||
kind: "terminal",
|
||||
blocking: true,
|
||||
});
|
||||
assert.equal(isPauseNotification(event), true);
|
||||
});
|
||||
});
|
||||
|
|
@ -157,7 +157,11 @@ describe("notification-store — dedupe_key", () => {
|
|||
dedupe_key: "sync:progress",
|
||||
});
|
||||
const entries = readNotifications(tmpDir);
|
||||
assert.equal(entries.length, 1, "second entry with same dedupe_key should be dropped");
|
||||
assert.equal(
|
||||
entries.length,
|
||||
1,
|
||||
"second entry with same dedupe_key should be dropped",
|
||||
);
|
||||
});
|
||||
|
||||
test("does not deduplicate across different dedupe_keys", () => {
|
||||
|
|
@ -168,7 +172,11 @@ describe("notification-store — dedupe_key", () => {
|
|||
dedupe_key: "sync:B",
|
||||
});
|
||||
const entries = readNotifications(tmpDir);
|
||||
assert.equal(entries.length, 2, "different dedupe_keys should produce separate entries");
|
||||
assert.equal(
|
||||
entries.length,
|
||||
2,
|
||||
"different dedupe_keys should produce separate entries",
|
||||
);
|
||||
});
|
||||
|
||||
test("stores metadata on the entry", () => {
|
||||
|
|
@ -184,15 +192,13 @@ describe("notification-store — dedupe_key", () => {
|
|||
assert.equal(entries[0].metadata?.source, "workflow");
|
||||
});
|
||||
|
||||
test("automated progress notice does not affect blocking classification", () => {
|
||||
test("automated progress notice is not persisted or treated as blocking", () => {
|
||||
appendNotification("Running checks...", "info", "notify", {
|
||||
kind: "progress",
|
||||
source: "workflow",
|
||||
});
|
||||
const entries = readNotifications(tmpDir);
|
||||
assert.equal(entries.length, 1);
|
||||
// The notice is stored, but kind=progress means headless will not treat it as blocked.
|
||||
assert.equal(entries[0].metadata?.kind, "progress");
|
||||
assert.equal(entries.length, 0);
|
||||
// Confirm headless classification: this event should NOT be blocked
|
||||
const fakeEvent = notifyEvent("Running checks...", {
|
||||
kind: "progress",
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import {
|
|||
} from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, beforeEach, describe, test, vi } from 'vitest';
|
||||
import { afterEach, beforeEach, describe, test, vi } from "vitest";
|
||||
|
||||
import {
|
||||
_resetNotificationStore,
|
||||
|
|
|
|||
|
|
@ -532,7 +532,7 @@ test("workflow MCP ask_user_questions uses stdio elicitation round-trip", async
|
|||
},
|
||||
},
|
||||
undefined,
|
||||
{ timeout: 60_000 },
|
||||
{ timeout: 120_000 },
|
||||
);
|
||||
|
||||
assert.ok(
|
||||
|
|
|
|||
|
|
@ -299,17 +299,18 @@ function _push(
|
|||
const ctxStr = context ? ` ${JSON.stringify(context)}` : "";
|
||||
_writeStderr(`[sf:${component}] ${prefix}: ${message}${ctxStr}\n`);
|
||||
|
||||
// Persist to notification store (both warnings and errors)
|
||||
try {
|
||||
appendNotification(
|
||||
`[${component}] ${message}`,
|
||||
severity === "error" ? "error" : "warning",
|
||||
"workflow-logger",
|
||||
);
|
||||
} catch (notifErr) {
|
||||
_writeStderr(
|
||||
`[sf:workflow-logger] notification-store append failed: ${(notifErr as Error).message}\n`,
|
||||
);
|
||||
if (severity === "error") {
|
||||
try {
|
||||
appendNotification(
|
||||
`[${component}] ${message}`,
|
||||
"error",
|
||||
"workflow-logger",
|
||||
);
|
||||
} catch (notifErr) {
|
||||
_writeStderr(
|
||||
`[sf:workflow-logger] notification-store append failed: ${(notifErr as Error).message}\n`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Buffer for auto-loop to drain
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ import { execFileSync, spawn } from "node:child_process";
|
|||
import { existsSync, mkdirSync, mkdtempSync, rmSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { test, afterEach } from 'vitest';
|
||||
import { afterEach, test } from "vitest";
|
||||
|
||||
const projectRoot = process.cwd();
|
||||
const loaderPath = join(projectRoot, "dist", "loader.js");
|
||||
|
|
@ -43,13 +43,13 @@ type RunResult = {
|
|||
* Spawn `node dist/loader.js ...args` and collect output.
|
||||
*
|
||||
* @param args CLI arguments to pass after the script path
|
||||
* @param timeoutMs Maximum time to wait before SIGTERM (default 8 s)
|
||||
* @param timeoutMs Maximum time to wait before SIGTERM (default 15 s)
|
||||
* @param env Additional / override environment variables
|
||||
* @param cwd Working directory for the child process (default: projectRoot)
|
||||
*/
|
||||
function runSf(
|
||||
args: string[],
|
||||
timeoutMs = 8_000,
|
||||
timeoutMs = 15_000,
|
||||
env: NodeJS.ProcessEnv = {},
|
||||
cwd: string = projectRoot,
|
||||
): Promise<RunResult> {
|
||||
|
|
@ -88,7 +88,6 @@ function runSf(
|
|||
|
||||
/** Strip ANSI escape codes from a string. */
|
||||
function stripAnsi(s: string): string {
|
||||
// biome-ignore lint/suspicious/noControlCharactersInRegex: ANSI escape sequence
|
||||
return s.replace(/\x1b\[[0-9;]*[A-Za-z]/g, "");
|
||||
}
|
||||
|
||||
|
|
@ -426,7 +425,7 @@ test("sf -h is equivalent to --help", async () => {
|
|||
// 13. sf headless without .sf/ directory exits 1 with clean error
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
test("sf headless without .sf/ directory exits 1 with clean error", async (t) => {
|
||||
test("sf headless without .sf/ directory exits 1 with clean error", async () => {
|
||||
const tmpDir = mkdtempSync(join(tmpdir(), "sf-e2e-no-sf-"));
|
||||
|
||||
afterEach(() => {
|
||||
|
|
@ -451,19 +450,14 @@ test("sf headless without .sf/ directory exits 1 with clean error", async (t) =>
|
|||
// 14. sf headless new-milestone without --context exits 1
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
test("sf headless new-milestone without --context exits 1", async (t) => {
|
||||
test("sf headless new-milestone without --context exits 1", async () => {
|
||||
const tmpDir = mkdtempSync(join(tmpdir(), "sf-e2e-no-ctx-"));
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
const result = await runSf(
|
||||
["headless", "new-milestone"],
|
||||
10_000,
|
||||
{},
|
||||
tmpDir,
|
||||
);
|
||||
const result = await runSf(["headless", "new-milestone"], 10_000, {}, tmpDir);
|
||||
|
||||
assert.ok(!result.timedOut, "process should not hang");
|
||||
assert.strictEqual(result.code, 1, `expected exit 1, got ${result.code}`);
|
||||
|
|
@ -481,7 +475,7 @@ test("sf headless new-milestone without --context exits 1", async (t) => {
|
|||
// 15. sf headless --timeout with invalid value exits 1
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
test("sf headless --timeout with invalid value exits 1", async (t) => {
|
||||
test("sf headless --timeout with invalid value exits 1", async () => {
|
||||
const tmpDir = mkdtempSync(join(tmpdir(), "sf-e2e-bad-timeout-"));
|
||||
|
||||
afterEach(() => {
|
||||
|
|
@ -511,7 +505,7 @@ test("sf headless --timeout with invalid value exits 1", async (t) => {
|
|||
// 16. sf headless --timeout with negative value exits 1
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
test("sf headless --timeout with negative value exits 1", async (t) => {
|
||||
test("sf headless --timeout with negative value exits 1", async () => {
|
||||
const tmpDir = mkdtempSync(join(tmpdir(), "sf-e2e-neg-timeout-"));
|
||||
|
||||
afterEach(() => {
|
||||
|
|
@ -537,7 +531,7 @@ test("sf headless --timeout with negative value exits 1", async (t) => {
|
|||
assertNoCrashMarkers(combined);
|
||||
});
|
||||
|
||||
test("sf headless query returns JSON from the built CLI", async (t) => {
|
||||
test("sf headless query returns JSON from the built CLI", async () => {
|
||||
const tmpDir = createTempGitRepo("sf-e2e-query-");
|
||||
|
||||
afterEach(() => {
|
||||
|
|
@ -565,7 +559,7 @@ test("sf headless query returns JSON from the built CLI", async (t) => {
|
|||
);
|
||||
});
|
||||
|
||||
test("sf worktree list loads the built worktree CLI without module errors", async (t) => {
|
||||
test("sf worktree list loads the built worktree CLI without module errors", async () => {
|
||||
const tmpDir = createTempGitRepo("sf-e2e-worktree-");
|
||||
|
||||
afterEach(() => {
|
||||
|
|
|
|||
|
|
@ -21,11 +21,14 @@ import {
|
|||
writeFileSync,
|
||||
} from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { test, afterEach } from 'vitest';
|
||||
import { delimiter, dirname, join } from "node:path";
|
||||
import { createGunzip } from "node:zlib";
|
||||
import { afterEach, test } from "vitest";
|
||||
|
||||
const projectRoot = process.cwd();
|
||||
const packageName = JSON.parse(
|
||||
readFileSync(join(projectRoot, "package.json"), "utf-8"),
|
||||
).name as string;
|
||||
|
||||
if (!existsSync(join(projectRoot, "dist"))) {
|
||||
throw new Error("dist/ not found — run: npm run build");
|
||||
|
|
@ -50,6 +53,9 @@ function createNpmSandbox(prefix: string): NpmSandbox {
|
|||
installPrefix,
|
||||
env: {
|
||||
...process.env,
|
||||
PATH: [dirname(process.execPath), process.env.PATH]
|
||||
.filter(Boolean)
|
||||
.join(delimiter),
|
||||
NPM_CONFIG_CACHE: cacheDir,
|
||||
npm_config_cache: cacheDir,
|
||||
PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: "1",
|
||||
|
|
@ -62,6 +68,8 @@ function buildQuietNpmEnv(sandbox: NpmSandbox): NodeJS.ProcessEnv {
|
|||
...sandbox.env,
|
||||
NPM_CONFIG_LOGLEVEL: "error",
|
||||
npm_config_loglevel: "error",
|
||||
NPM_CONFIG_ENGINE_STRICT: "false",
|
||||
npm_config_engine_strict: "false",
|
||||
NPM_CONFIG_FUND: "false",
|
||||
npm_config_fund: "false",
|
||||
NPM_CONFIG_AUDIT: "false",
|
||||
|
|
@ -141,7 +149,7 @@ function listTarEntries(tarballPath: string): Promise<string[]> {
|
|||
// 1. npm pack produces valid tarball with correct file layout
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
test("npm pack produces tarball with required files", async (t) => {
|
||||
test("npm pack produces tarball with required files", async () => {
|
||||
const sandbox = createNpmSandbox("sf-pack-test-");
|
||||
const tarballPath = packTarball(sandbox);
|
||||
|
||||
|
|
@ -204,13 +212,13 @@ test("npm pack produces tarball with required files", async (t) => {
|
|||
".sf",
|
||||
"pkg/package.json piConfig.configDir is .sf",
|
||||
);
|
||||
});
|
||||
}, 240_000);
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 2. npm pack → install → sf binary resolves
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
test("tarball installs and sf binary resolves", async (t) => {
|
||||
test("tarball installs and sf binary resolves", async () => {
|
||||
const sandbox = createNpmSandbox("sf-install-test-");
|
||||
const tarballPath = packTarball(sandbox);
|
||||
|
||||
|
|
@ -242,7 +250,7 @@ test("tarball installs and sf binary resolves", async (t) => {
|
|||
const installedLoader = join(
|
||||
sandbox.installPrefix,
|
||||
"node_modules",
|
||||
"sf-run",
|
||||
packageName,
|
||||
"dist",
|
||||
"loader.js",
|
||||
);
|
||||
|
|
@ -258,7 +266,7 @@ test("tarball installs and sf binary resolves", async (t) => {
|
|||
const installedSfExt = join(
|
||||
sandbox.installPrefix,
|
||||
"node_modules",
|
||||
"sf-run",
|
||||
packageName,
|
||||
"src",
|
||||
"resources",
|
||||
"extensions",
|
||||
|
|
@ -269,7 +277,7 @@ test("tarball installs and sf binary resolves", async (t) => {
|
|||
existsSync(installedSfExt),
|
||||
"bundled sf extension present in installed package",
|
||||
);
|
||||
});
|
||||
}, 420_000);
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// 3. Launch → extensions load → no errors on stderr
|
||||
|
|
@ -329,7 +337,7 @@ test("sf launches and loads extensions without errors", async () => {
|
|||
);
|
||||
});
|
||||
|
||||
test("sf exits early with a clear message when synced resources are newer than the binary", async (t) => {
|
||||
test("sf exits early with a clear message when synced resources are newer than the binary", async () => {
|
||||
const fakeHome = mkdtempSync(join(tmpdir(), "sf-version-skew-"));
|
||||
const fakeAgentDir = join(fakeHome, ".sf", "agent");
|
||||
mkdirSync(fakeAgentDir, { recursive: true });
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue