fix: harden sf server control loop
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions
Some checks are pending
CI / detect-changes (push) Waiting to run
CI / docs-check (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / integration-tests (push) Blocked by required conditions
CI / windows-portability (push) Blocked by required conditions
CI / rtk-portability (linux, blacksmith-4vcpu-ubuntu-2404) (push) Blocked by required conditions
CI / rtk-portability (macos, macos-15) (push) Blocked by required conditions
CI / rtk-portability (windows, blacksmith-4vcpu-windows-2025) (push) Blocked by required conditions
This commit is contained in:
parent
70d89eebec
commit
acd907fec2
33 changed files with 1602 additions and 192 deletions
|
|
@ -1,8 +1,6 @@
|
|||
---
|
||||
version: 1
|
||||
experimental:
|
||||
smoke_gate: false
|
||||
---
|
||||
# SF Preferences
|
||||
|
||||
See `~/.sf/agent/extensions/sf/docs/preferences-reference.md` for full documentation.
|
||||
# See `~/.sf/agent/extensions/sf/docs/preferences-reference.md` for full documentation.
|
||||
|
|
|
|||
21
AGENTS.md
21
AGENTS.md
|
|
@ -98,6 +98,27 @@ npm run release:changelog
|
|||
npm run release:bump
|
||||
```
|
||||
|
||||
## Running SF Locally
|
||||
|
||||
The server surface is the default local dogfooding surface for web/RPC/autonomous
|
||||
control. The TUI still exists, but do not use it as the default way to run or
|
||||
verify autonomous mode.
|
||||
|
||||
```bash
|
||||
# Source/dev server
|
||||
npm run sf:server -- --port 4000 --host 127.0.0.1
|
||||
|
||||
# Built server after npm run build:core or npm run build
|
||||
npm run sf:server:dist -- --port 4000 --host 127.0.0.1
|
||||
```
|
||||
|
||||
Bind only trusted interfaces. For this workstation, localhost plus Tailscale is
|
||||
acceptable; public `0.0.0.0` is not the default. If a server is already running,
|
||||
use `sf headless ...` as the machine/control surface instead of starting a
|
||||
second writer. Server-forwarded feedback writes are queued and drained by the
|
||||
server before autonomous dispatch, so CLI control does not block behind a busy
|
||||
unit.
|
||||
|
||||
## Coding Style & Naming Conventions
|
||||
|
||||
- **Language**: TypeScript with `"strict": true` enabled in all packages
|
||||
|
|
|
|||
31
CLAUDE.md
31
CLAUDE.md
|
|
@ -98,3 +98,34 @@ When adding a new `{{variable}}` to a prompt template in `prompts/`, you must:
|
|||
`loadPrompt` throws at runtime if any `{{var}}` in the template has no
|
||||
corresponding key in the vars object — this is intentional to catch
|
||||
template/code drift early.
|
||||
|
||||
## Running the SF server in this repo
|
||||
|
||||
Use the server surface for dogfooding and browser/RPC control. Do not start the
|
||||
TUI as the default way to exercise autonomous mode.
|
||||
|
||||
```bash
|
||||
# source/dev server, with resource redirect and restart support
|
||||
npm run sf:server -- --port 4000 --host 127.0.0.1
|
||||
|
||||
# built server, after npm run build:core or npm run build
|
||||
npm run sf:server:dist -- --port 4000 --host 127.0.0.1
|
||||
```
|
||||
|
||||
If the server is already running, prefer `sf headless ...` control commands
|
||||
rather than starting a second writer. Feedback add/resolve commands are
|
||||
forwarded to the active server and queued there so CLI control does not hang
|
||||
behind an autonomous unit.
|
||||
|
||||
For remote local-network access, bind an additional trusted interface such as a
|
||||
Tailscale address. Do not bind `0.0.0.0` for the dev server unless an explicit
|
||||
fronting proxy/firewall decision is in place.
|
||||
|
||||
Before assuming a source edit is live, rebuild the relevant output:
|
||||
|
||||
```bash
|
||||
npm run build:core
|
||||
```
|
||||
|
||||
Then restart the server. Stale `dist/` or stale `~/.sf/agent/extensions/sf/`
|
||||
copies can make fixed source look broken.
|
||||
|
|
|
|||
|
|
@ -409,12 +409,19 @@ export class RpcClient {
|
|||
subcommand: "add" | "resolve",
|
||||
args: string[],
|
||||
json = false,
|
||||
): Promise<{ exitCode: number; stdout: string; stderr: string }> {
|
||||
options: { queued?: boolean } = {},
|
||||
): Promise<{
|
||||
exitCode: number | null;
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
queued?: boolean;
|
||||
}> {
|
||||
const response = await this.send({
|
||||
type: "sf_feedback",
|
||||
subcommand,
|
||||
args,
|
||||
json,
|
||||
queued: options.queued,
|
||||
});
|
||||
return this.getData(response);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,7 +12,16 @@
|
|||
*/
|
||||
|
||||
import * as crypto from "node:crypto";
|
||||
import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
|
||||
import {
|
||||
appendFileSync,
|
||||
existsSync,
|
||||
mkdirSync,
|
||||
readdirSync,
|
||||
readFileSync,
|
||||
renameSync,
|
||||
statSync,
|
||||
unlinkSync,
|
||||
} from "node:fs";
|
||||
import type { WriteStream } from "node:tty";
|
||||
import { pathToFileURL } from "node:url";
|
||||
import { dirname, join, resolve } from "node:path";
|
||||
|
|
@ -42,6 +51,142 @@ const RUNTIME_HEARTBEAT_INTERVAL_MS = Number(
|
|||
process.env.SF_RUNTIME_HEARTBEAT_INTERVAL_MS ?? 10_000,
|
||||
);
|
||||
|
||||
const SF_FEEDBACK_QUEUE_FILE = "sf-feedback-queue.jsonl";
|
||||
const SF_FEEDBACK_FAILED_QUEUE_FILE = "sf-feedback-queue-failed.jsonl";
|
||||
|
||||
function queueSfFeedbackCommand(
|
||||
cwd: string,
|
||||
command: Extract<RpcCommand, { type: "sf_feedback" }>,
|
||||
): string {
|
||||
const dir = join(cwd, ".sf", "runtime");
|
||||
mkdirSync(dir, { recursive: true });
|
||||
const path = join(dir, SF_FEEDBACK_QUEUE_FILE);
|
||||
appendFileSync(
|
||||
path,
|
||||
`${JSON.stringify({
|
||||
schemaVersion: 1,
|
||||
queuedAt: new Date().toISOString(),
|
||||
id: command.id,
|
||||
subcommand: command.subcommand,
|
||||
args: command.args,
|
||||
json: command.json === true,
|
||||
source: "rpc",
|
||||
})}\n`,
|
||||
"utf-8",
|
||||
);
|
||||
return path;
|
||||
}
|
||||
|
||||
type QueuedSfFeedbackCommand = {
|
||||
schemaVersion: 1;
|
||||
queuedAt: string;
|
||||
id?: string;
|
||||
subcommand: "add" | "list" | "resolve";
|
||||
args: string[];
|
||||
json: boolean;
|
||||
source: "rpc";
|
||||
};
|
||||
|
||||
function parseQueuedSfFeedbackLine(
|
||||
line: string,
|
||||
): QueuedSfFeedbackCommand | null {
|
||||
try {
|
||||
const row = JSON.parse(line) as Partial<QueuedSfFeedbackCommand>;
|
||||
if (
|
||||
row.schemaVersion !== 1 ||
|
||||
(row.subcommand !== "add" &&
|
||||
row.subcommand !== "list" &&
|
||||
row.subcommand !== "resolve") ||
|
||||
!Array.isArray(row.args)
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
schemaVersion: 1,
|
||||
queuedAt:
|
||||
typeof row.queuedAt === "string"
|
||||
? row.queuedAt
|
||||
: new Date().toISOString(),
|
||||
id: typeof row.id === "string" ? row.id : undefined,
|
||||
subcommand: row.subcommand,
|
||||
args: row.args.map((arg) => String(arg)),
|
||||
json: row.json === true,
|
||||
source: "rpc",
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply queued sf_feedback commands before a daemon-owned autonomous run starts.
|
||||
*
|
||||
* Purpose: keep CLI/RPC control commands non-blocking while preserving a single
|
||||
* server-owned writer for self-feedback mutations.
|
||||
*
|
||||
* Consumer: start_autonomous RPC command in the SF server session.
|
||||
*/
|
||||
async function drainQueuedSfFeedbackCommands(cwd: string): Promise<void> {
|
||||
const runtimeDir = join(cwd, ".sf", "runtime");
|
||||
const queuePath = join(runtimeDir, SF_FEEDBACK_QUEUE_FILE);
|
||||
if (!existsSync(queuePath)) return;
|
||||
|
||||
const drainingPath = join(
|
||||
runtimeDir,
|
||||
`${SF_FEEDBACK_QUEUE_FILE}.${process.pid}.draining`,
|
||||
);
|
||||
try {
|
||||
renameSync(queuePath, drainingPath);
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
|
||||
const lines = readFileSync(drainingPath, "utf-8")
|
||||
.split("\n")
|
||||
.map((line) => line.trim())
|
||||
.filter(Boolean);
|
||||
const queued = lines
|
||||
.map(parseQueuedSfFeedbackLine)
|
||||
.filter((row): row is QueuedSfFeedbackCommand => row !== null);
|
||||
if (queued.length === 0) {
|
||||
unlinkSync(drainingPath);
|
||||
return;
|
||||
}
|
||||
|
||||
const { handleFeedback } = await loadHeadlessFeedbackHandler();
|
||||
const failed: QueuedSfFeedbackCommand[] = [];
|
||||
for (const command of queued) {
|
||||
try {
|
||||
const captured = await captureProcessWrites(() =>
|
||||
handleFeedback(cwd, {
|
||||
subcommand: command.subcommand,
|
||||
args: command.args,
|
||||
json: command.json,
|
||||
}),
|
||||
);
|
||||
if (captured.result.exitCode !== 0) failed.push(command);
|
||||
} catch {
|
||||
failed.push(command);
|
||||
}
|
||||
}
|
||||
|
||||
if (failed.length > 0) {
|
||||
appendFileSync(
|
||||
join(runtimeDir, SF_FEEDBACK_FAILED_QUEUE_FILE),
|
||||
failed.map((row) => JSON.stringify(row)).join("\n") + "\n",
|
||||
"utf-8",
|
||||
);
|
||||
}
|
||||
unlinkSync(drainingPath);
|
||||
}
|
||||
|
||||
function scheduleQueuedSfFeedbackDrain(cwd: string): void {
|
||||
const timer = setTimeout(() => {
|
||||
void drainQueuedSfFeedbackCommands(cwd);
|
||||
}, 0);
|
||||
timer.unref?.();
|
||||
}
|
||||
|
||||
async function captureProcessWrites<T>(
|
||||
run: () => Promise<T>,
|
||||
): Promise<{ result: T; stdout: string; stderr: string }> {
|
||||
|
|
@ -853,6 +998,7 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
|
|||
const previousHeadless = process.env.SF_HEADLESS;
|
||||
process.env.SF_HEADLESS = "1";
|
||||
try {
|
||||
await drainQueuedSfFeedbackCommands(process.cwd());
|
||||
await session.prompt("/autonomous", {
|
||||
source: "rpc",
|
||||
});
|
||||
|
|
@ -882,6 +1028,16 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
|
|||
}
|
||||
|
||||
case "sf_feedback": {
|
||||
if (command.queued === true) {
|
||||
const queuePath = queueSfFeedbackCommand(process.cwd(), command);
|
||||
scheduleQueuedSfFeedbackDrain(process.cwd());
|
||||
return success(id, "sf_feedback", {
|
||||
exitCode: null,
|
||||
stdout: JSON.stringify({ ok: true, queued: true, queuePath }),
|
||||
stderr: "",
|
||||
queued: true,
|
||||
});
|
||||
}
|
||||
const { handleFeedback } = await loadHeadlessFeedbackHandler();
|
||||
const captured = await captureProcessWrites(() =>
|
||||
handleFeedback(process.cwd(), {
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ export type RpcCommand =
|
|||
subcommand: "add" | "resolve";
|
||||
args: string[];
|
||||
json?: boolean;
|
||||
queued?: boolean;
|
||||
}
|
||||
|
||||
// State
|
||||
|
|
@ -185,7 +186,12 @@ export type RpcResponse =
|
|||
type: "response";
|
||||
command: "sf_feedback";
|
||||
success: true;
|
||||
data: { exitCode: number; stdout: string; stderr: string };
|
||||
data: {
|
||||
exitCode: number | null;
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
queued?: boolean;
|
||||
};
|
||||
}
|
||||
| {
|
||||
id?: string;
|
||||
|
|
|
|||
|
|
@ -482,12 +482,19 @@ export class RpcClient {
|
|||
subcommand: "add" | "resolve",
|
||||
args: string[],
|
||||
json = false,
|
||||
): Promise<{ exitCode: number; stdout: string; stderr: string }> {
|
||||
options: { queued?: boolean } = {},
|
||||
): Promise<{
|
||||
exitCode: number | null;
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
queued?: boolean;
|
||||
}> {
|
||||
const response = await this.send({
|
||||
type: "sf_feedback",
|
||||
subcommand,
|
||||
args,
|
||||
json,
|
||||
queued: options.queued,
|
||||
});
|
||||
return this.getData(response);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -113,6 +113,7 @@ export type RpcCommand =
|
|||
subcommand: "add" | "resolve";
|
||||
args: string[];
|
||||
json?: boolean;
|
||||
queued?: boolean;
|
||||
}
|
||||
|
||||
// State
|
||||
|
|
@ -251,7 +252,12 @@ export type RpcResponse =
|
|||
type: "response";
|
||||
command: "sf_feedback";
|
||||
success: true;
|
||||
data: { exitCode: number; stdout: string; stderr: string };
|
||||
data: {
|
||||
exitCode: number | null;
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
queued?: boolean;
|
||||
};
|
||||
}
|
||||
| {
|
||||
id?: string;
|
||||
|
|
|
|||
|
|
@ -227,7 +227,7 @@ export type RunWebCliBranchResult =
|
|||
| {
|
||||
handled: true;
|
||||
exitCode: number;
|
||||
action: "start";
|
||||
action: "start" | "reload";
|
||||
status: WebModeLaunchStatus;
|
||||
launchInputs: {
|
||||
cwd: string;
|
||||
|
|
@ -270,8 +270,8 @@ export async function runWebCliBranch(
|
|||
};
|
||||
}
|
||||
|
||||
// `sf server [start] [path]` starts the full operator server for one repo.
|
||||
// Matches: `sf server`, `sf server start`, `sf server start <path>`, `sf server <path>`
|
||||
// `sf server [start|reload] [path]` starts the full operator server for one repo.
|
||||
// Matches: `sf server`, `sf server start`, `sf server reload`, `sf server <path>`
|
||||
const isWebSubcommand =
|
||||
flags.messages[0] === "server" && flags.messages[1] !== "stop";
|
||||
if (!isWebSubcommand) {
|
||||
|
|
@ -286,7 +286,7 @@ export async function runWebCliBranch(
|
|||
// sf server <path> → messages[1] (when not "start")
|
||||
let webPath = flags.webPath;
|
||||
if (!webPath && isWebSubcommand) {
|
||||
if (flags.messages[1] === "start") {
|
||||
if (flags.messages[1] === "start" || flags.messages[1] === "reload") {
|
||||
webPath = flags.messages[2];
|
||||
} else if (flags.messages[1]) {
|
||||
webPath = flags.messages[1];
|
||||
|
|
@ -346,6 +346,7 @@ export async function runWebCliBranch(
|
|||
agentDir,
|
||||
host: flags.webHost,
|
||||
port: flags.webPort,
|
||||
...(flags.messages[1] === "reload" ? { reload: true } : {}),
|
||||
allowedOrigins: flags.webAllowedOrigins,
|
||||
});
|
||||
|
||||
|
|
@ -356,7 +357,7 @@ export async function runWebCliBranch(
|
|||
return {
|
||||
handled: true,
|
||||
exitCode: status.ok ? 0 : 1,
|
||||
action: "start",
|
||||
action: flags.messages[1] === "reload" ? "reload" : "start",
|
||||
status,
|
||||
launchInputs: {
|
||||
cwd: currentCwd,
|
||||
|
|
|
|||
|
|
@ -12,9 +12,10 @@ import { resolve } from "node:path";
|
|||
import { readInstanceRegistry, type WebInstanceEntry } from "./web-mode.js";
|
||||
|
||||
export interface ForwardedHeadlessResult {
|
||||
exitCode: number;
|
||||
exitCode: number | null;
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
queued?: boolean;
|
||||
}
|
||||
|
||||
type SfFeedbackResponse =
|
||||
|
|
@ -109,6 +110,7 @@ export async function forwardFeedbackToActiveServer(
|
|||
subcommand: options.subcommand,
|
||||
args: options.args,
|
||||
json: options.json,
|
||||
queued: true,
|
||||
},
|
||||
);
|
||||
if (response.statusCode === 404) return null;
|
||||
|
|
|
|||
|
|
@ -67,6 +67,7 @@ export interface HandleTriageOptions {
|
|||
max?: number;
|
||||
run?: boolean;
|
||||
apply?: boolean;
|
||||
urgentOnly?: boolean;
|
||||
model?: string;
|
||||
agentRunner?: AgentRunner;
|
||||
}
|
||||
|
|
@ -1166,6 +1167,13 @@ export async function handleTriage(
|
|||
return { exitCode: 1 };
|
||||
}
|
||||
|
||||
if (options.urgentOnly) {
|
||||
candidates = candidates.filter(
|
||||
(candidate) =>
|
||||
candidate.severity === "high" || candidate.severity === "critical",
|
||||
);
|
||||
}
|
||||
|
||||
if (typeof options.max === "number" && options.max > 0) {
|
||||
candidates = candidates.slice(0, options.max);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -105,6 +105,13 @@ import {
|
|||
|
||||
const HEADLESS_HEARTBEAT_INTERVAL_MS = 60_000;
|
||||
|
||||
type SelfFeedbackSeverity = "low" | "medium" | "high" | "critical" | string;
|
||||
|
||||
interface SelfFeedbackRowForTriage {
|
||||
resolvedAt?: string | null;
|
||||
severity?: SelfFeedbackSeverity;
|
||||
}
|
||||
|
||||
interface HeadlessTimeoutSolverEvalRecord {
|
||||
runId: string;
|
||||
reportPath: string;
|
||||
|
|
@ -577,6 +584,31 @@ export async function runHeadless(options: HeadlessOptions): Promise<void> {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Count unresolved high/critical self-feedback rows for autonomous pre-triage.
|
||||
*
|
||||
* Purpose: let urgent operator/detector findings bypass the normal triage
|
||||
* cadence without making the TypeScript headless surface depend on JS
|
||||
* extension declarations.
|
||||
*
|
||||
* Consumer: runHeadlessOnce before autonomous dispatch.
|
||||
*/
|
||||
async function countUrgentSelfFeedbackRows(basePath: string): Promise<number> {
|
||||
try {
|
||||
const modulePath = "./resources/extensions/sf/self-feedback.js";
|
||||
const mod = (await import(modulePath)) as {
|
||||
readAllSelfFeedback?: (basePath: string) => SelfFeedbackRowForTriage[];
|
||||
};
|
||||
return (mod.readAllSelfFeedback?.(basePath) ?? []).filter(
|
||||
(entry) =>
|
||||
!entry.resolvedAt &&
|
||||
(entry.severity === "high" || entry.severity === "critical"),
|
||||
).length;
|
||||
} catch {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
async function runHeadlessOnce(
|
||||
options: HeadlessOptions,
|
||||
restartCount: number,
|
||||
|
|
@ -660,12 +692,19 @@ async function runHeadlessOnce(
|
|||
"last-triage-at",
|
||||
);
|
||||
let shouldRunTriage = true;
|
||||
const urgentTriageCount = await countUrgentSelfFeedbackRows(
|
||||
process.cwd(),
|
||||
);
|
||||
try {
|
||||
if (existsSync(triageMarkerPath)) {
|
||||
const last = Date.parse(
|
||||
readFileSync(triageMarkerPath, "utf8").trim(),
|
||||
);
|
||||
if (Number.isFinite(last) && Date.now() - last < triageIntervalMs) {
|
||||
if (
|
||||
urgentTriageCount === 0 &&
|
||||
Number.isFinite(last) &&
|
||||
Date.now() - last < triageIntervalMs
|
||||
) {
|
||||
shouldRunTriage = false;
|
||||
if (!options.json) {
|
||||
process.stderr.write(
|
||||
|
|
@ -687,13 +726,16 @@ async function runHeadlessOnce(
|
|||
const { handleTriage } = await import("./headless-triage.js");
|
||||
if (!options.json) {
|
||||
process.stderr.write(
|
||||
`[headless] autonomous: draining self-feedback triage queue first (max=${triageMaxBatch})...\n`,
|
||||
urgentTriageCount > 0
|
||||
? `[headless] autonomous: draining ${urgentTriageCount} high/critical self-feedback entr${urgentTriageCount === 1 ? "y" : "ies"} before dispatch (max=${triageMaxBatch})...\n`
|
||||
: `[headless] autonomous: draining self-feedback triage queue first (max=${triageMaxBatch})...\n`,
|
||||
);
|
||||
}
|
||||
await handleTriage(process.cwd(), {
|
||||
apply: true,
|
||||
json: !!options.json,
|
||||
max: triageMaxBatch,
|
||||
urgentOnly: urgentTriageCount > 0,
|
||||
});
|
||||
try {
|
||||
const runtimeDir = join(process.cwd(), ".sf", "runtime");
|
||||
|
|
@ -971,7 +1013,7 @@ async function runHeadlessOnce(
|
|||
if (forwarded.stdout) process.stdout.write(forwarded.stdout);
|
||||
if (forwarded.stderr) process.stderr.write(forwarded.stderr);
|
||||
return {
|
||||
exitCode: forwarded.exitCode,
|
||||
exitCode: forwarded.exitCode ?? EXIT_SUCCESS,
|
||||
interrupted: false,
|
||||
timedOut: false,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
* via startUnitSupervision() and torn down by the caller via clearUnitTimeout().
|
||||
*/
|
||||
import { saveActivityLog } from "./activity-log.js";
|
||||
import { resolveAgentEnd } from "./auto/resolve.js";
|
||||
import { resolveAgentEndCancelled } from "./auto/resolve.js";
|
||||
import { detectWorkingTreeActivity } from "./auto-supervisor.js";
|
||||
import { blockModel } from "./blocked-models.js";
|
||||
|
|
@ -40,6 +41,124 @@ import {
|
|||
writeUnitRuntimeRecord,
|
||||
} from "./uok/unit-runtime.js";
|
||||
import { logError, logWarning } from "./workflow-logger.js";
|
||||
|
||||
/**
|
||||
* Clear active supervision handles for the current unit attempt.
|
||||
*
|
||||
* Purpose: stop one runaway-guard terminal decision from being emitted repeatedly
|
||||
* while the autonomous loop is being unblocked.
|
||||
*
|
||||
* Consumer: finalizeRunawayGuardFailure() when zero-progress or silent-worker
|
||||
* detection has already converted the current unit attempt into a failed record.
|
||||
*/
|
||||
function clearSupervisionHandles(s) {
|
||||
if (s.unitTimeoutHandle) {
|
||||
clearTimeout(s.unitTimeoutHandle);
|
||||
s.unitTimeoutHandle = null;
|
||||
}
|
||||
if (s.wrapupWarningHandle) {
|
||||
clearTimeout(s.wrapupWarningHandle);
|
||||
s.wrapupWarningHandle = null;
|
||||
}
|
||||
if (s.idleWatchdogHandle) {
|
||||
clearInterval(s.idleWatchdogHandle);
|
||||
s.idleWatchdogHandle = null;
|
||||
}
|
||||
if (s.continueHereHandle) {
|
||||
clearInterval(s.continueHereHandle);
|
||||
s.continueHereHandle = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finish a runaway-guard failure as one terminal unit-attempt event.
|
||||
*
|
||||
* Purpose: convert zero-progress and silent-worker supervision failures into a
|
||||
* retryable failed runtime record, close the worker lineage, stop supervision
|
||||
* timers, and unblock the unit promise so the autonomous loop can select the
|
||||
* next eligible model instead of repeating the same warning.
|
||||
*
|
||||
* Consumer: startUnitSupervision() idle watchdog fail branch.
|
||||
*/
|
||||
export async function finalizeRunawayGuardFailure(sctx, decision, helpers = {}) {
|
||||
const { s, ctx, unitType, unitId, buildSnapshotOpts } = sctx;
|
||||
const currentUnit = s.currentUnit;
|
||||
if (!currentUnit) return;
|
||||
const closeout = helpers.closeoutUnit ?? closeoutUnit;
|
||||
const writeRuntime = helpers.writeUnitRuntimeRecord ?? writeUnitRuntimeRecord;
|
||||
const block = helpers.blockModel ?? blockModel;
|
||||
const recordFeedback = helpers.recordSelfFeedback ?? recordSelfFeedback;
|
||||
const notify = helpers.notify ?? ((message, level) => ctx.ui.notify(message, level));
|
||||
const resolveUnit =
|
||||
helpers.resolveAgentEnd ??
|
||||
((event) => {
|
||||
resolveAgentEnd(event);
|
||||
});
|
||||
const failedModel = s.currentUnitModel;
|
||||
if (
|
||||
decision.reason === "zero-progress" &&
|
||||
failedModel?.provider &&
|
||||
failedModel?.id
|
||||
) {
|
||||
block(
|
||||
s.basePath,
|
||||
failedModel.provider,
|
||||
failedModel.id,
|
||||
`zero-progress on ${unitType} ${unitId}`,
|
||||
{ expiresAt: Date.now() + 60 * 60 * 1000 },
|
||||
);
|
||||
notify(
|
||||
`Temporarily blocked ${failedModel.provider}/${failedModel.id} after zero-progress on ${unitType} ${unitId}; retry will choose a fallback.`,
|
||||
"warning",
|
||||
);
|
||||
}
|
||||
await closeout(
|
||||
ctx,
|
||||
s.basePath,
|
||||
currentUnit.type,
|
||||
currentUnit.id,
|
||||
currentUnit.startedAt,
|
||||
buildSnapshotOpts(),
|
||||
);
|
||||
writeRuntime(s.basePath, unitType, unitId, currentUnit.startedAt, {
|
||||
phase: "failed-silent-worker",
|
||||
status: "failed",
|
||||
lastProgressAt: Date.now(),
|
||||
lastProgressKind: "runaway-guard-fail",
|
||||
runawayGuardFail: decision.metadata,
|
||||
lineageEvent: {
|
||||
status: "failed",
|
||||
workerSessionId: ctx.sessionManager?.getSessionId?.(),
|
||||
note: `${decision.reason ?? "runaway-guard"} failed current attempt`,
|
||||
},
|
||||
});
|
||||
const unitParts = unitId.split("/");
|
||||
recordFeedback(
|
||||
{
|
||||
kind: "runaway-loop:silent-worker-failure",
|
||||
severity: "high",
|
||||
summary: decision.reason,
|
||||
evidence: JSON.stringify(decision.metadata, null, 2),
|
||||
suggestedFix:
|
||||
"LLM session never produced an assistant message — check session-manager.ts:1086-1096 (silent _persist skip) and verify the model/provider is responding. The dispatcher will attempt retry within maxRetries; if persistent, transitions to blocked.",
|
||||
occurredIn: {
|
||||
unitType,
|
||||
milestone: unitParts[0],
|
||||
slice: unitParts[1],
|
||||
task: unitParts.slice(2).join("/") || undefined,
|
||||
},
|
||||
source: "detector",
|
||||
},
|
||||
s.basePath,
|
||||
);
|
||||
clearSupervisionHandles(s);
|
||||
notify(decision.reason, "error");
|
||||
resolveUnit({
|
||||
messages: [],
|
||||
_synthetic: "runaway-guard-fail",
|
||||
reason: decision.reason,
|
||||
});
|
||||
}
|
||||
/**
|
||||
* Set up all four supervision timers for the current unit:
|
||||
* 1. Soft timeout warning (wrapup)
|
||||
|
|
@ -271,65 +390,7 @@ export function startUnitSupervision(sctx) {
|
|||
}
|
||||
if (decision.action === "fail") {
|
||||
if (getInFlightToolCount() > 0) return;
|
||||
const failedModel = s.currentUnitModel;
|
||||
if (
|
||||
decision.reason === "zero-progress" &&
|
||||
failedModel?.provider &&
|
||||
failedModel?.id
|
||||
) {
|
||||
blockModel(
|
||||
s.basePath,
|
||||
failedModel.provider,
|
||||
failedModel.id,
|
||||
`zero-progress on ${unitType} ${unitId}`,
|
||||
{ expiresAt: Date.now() + 60 * 60 * 1000 },
|
||||
);
|
||||
ctx.ui.notify(
|
||||
`Temporarily blocked ${failedModel.provider}/${failedModel.id} after zero-progress on ${unitType} ${unitId}; retry will choose a fallback.`,
|
||||
"warning",
|
||||
);
|
||||
}
|
||||
await closeoutUnit(
|
||||
ctx,
|
||||
s.basePath,
|
||||
s.currentUnit.type,
|
||||
s.currentUnit.id,
|
||||
s.currentUnit.startedAt,
|
||||
buildSnapshotOpts(),
|
||||
);
|
||||
writeUnitRuntimeRecord(
|
||||
s.basePath,
|
||||
unitType,
|
||||
unitId,
|
||||
s.currentUnit.startedAt,
|
||||
{
|
||||
phase: "failed-silent-worker",
|
||||
status: "failed",
|
||||
lastProgressAt: Date.now(),
|
||||
lastProgressKind: "runaway-guard-fail",
|
||||
runawayGuardFail: decision.metadata,
|
||||
},
|
||||
);
|
||||
const unitParts = unitId.split("/");
|
||||
recordSelfFeedback(
|
||||
{
|
||||
kind: "runaway-loop:silent-worker-failure",
|
||||
severity: "high",
|
||||
summary: decision.reason,
|
||||
evidence: JSON.stringify(decision.metadata, null, 2),
|
||||
suggestedFix:
|
||||
"LLM session never produced an assistant message — check session-manager.ts:1086-1096 (silent _persist skip) and verify the model/provider is responding. The dispatcher will attempt retry within maxRetries; if persistent, transitions to blocked.",
|
||||
occurredIn: {
|
||||
unitType,
|
||||
milestone: unitParts[0],
|
||||
slice: unitParts[1],
|
||||
task: unitParts.slice(2).join("/") || undefined,
|
||||
},
|
||||
source: "detector",
|
||||
},
|
||||
s.basePath,
|
||||
);
|
||||
ctx.ui.notify(decision.reason, "error");
|
||||
await finalizeRunawayGuardFailure(sctx, decision);
|
||||
return;
|
||||
}
|
||||
if (decision.action === "pause") {
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ export { periodicDetectorSweepGate } from "./periodic-runner.js";
|
|||
export { productionPlateauGate } from "./production-plateau.js";
|
||||
export { repeatedFeedbackKindGate } from "./repeated-feedback-kind.js";
|
||||
export { sameUnitLoopGate } from "./same-unit-loop.js";
|
||||
export { serverDirectionDriftGate } from "./server-direction-drift.js";
|
||||
export { staleLockGate } from "./stale-lock.js";
|
||||
export { statusCompletionDriftGate } from "./status-completion-drift.js";
|
||||
export { zeroProgressGate } from "./zero-progress.js";
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import { detectCrashLoop } from "./crash-loop-classifier.js";
|
|||
import { detectProductionPlateau } from "./production-plateau.js";
|
||||
import { detectRepeatedFeedbackKind } from "./repeated-feedback-kind.js";
|
||||
import { detectSameUnitLoop } from "./same-unit-loop.js";
|
||||
import { detectServerDirectionDrift } from "./server-direction-drift.js";
|
||||
import { detectStaleLock } from "./stale-lock.js";
|
||||
import { detectStatusCompletionDrift } from "./status-completion-drift.js";
|
||||
import { detectZeroProgress } from "./zero-progress.js";
|
||||
|
|
@ -74,6 +75,10 @@ function defaultDetectors(ctx, options) {
|
|||
name: "production-plateau",
|
||||
run: () => detectProductionPlateau(ctx?.unitMetrics, ctx, options),
|
||||
},
|
||||
{
|
||||
name: "server-direction-drift",
|
||||
run: () => detectServerDirectionDrift(ctx, options),
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
|
|
|
|||
132
src/resources/extensions/sf/detectors/server-direction-drift.js
Normal file
132
src/resources/extensions/sf/detectors/server-direction-drift.js
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
/**
|
||||
* server-direction-drift.js — detect obsolete server architecture in live work.
|
||||
*
|
||||
* Purpose: stop SF from planning queued work against superseded server shapes
|
||||
* after the product direction moves to one embedded `sf server` control plane.
|
||||
*
|
||||
* Consumer: Wiggums periodic detector sweep and UOK detector gate registry.
|
||||
*/
|
||||
|
||||
const DEFAULT_DEPRECATED_PATTERNS = [
|
||||
/\bsf serve\b/i,
|
||||
/\bA2A\b/i,
|
||||
/\bJSON-RPC API\b/i,
|
||||
/\bper-repo systemd unit\b/i,
|
||||
/\bper-repo web servers?\b/i,
|
||||
/\bseparate standalone daemon brain\b/i,
|
||||
];
|
||||
|
||||
const ACTIVE_STATUSES = new Set(["queued", "active", "planned", "pending"]);
|
||||
const CLOSED_STATUSES = new Set([
|
||||
"cancelled",
|
||||
"canceled",
|
||||
"complete",
|
||||
"completed",
|
||||
"done",
|
||||
"superseded",
|
||||
"parked",
|
||||
]);
|
||||
|
||||
/**
|
||||
* Detect queued milestone/slice work that still targets a deprecated server path.
|
||||
*
|
||||
* Purpose: make stale roadmap/server-direction drift visible before autonomous
|
||||
* planning spends turns on obsolete `sf serve`, A2A, or per-repo server work.
|
||||
*
|
||||
* Consumer: periodic-runner.js default detector list.
|
||||
*/
|
||||
export function detectServerDirectionDrift(ctx = {}, options = {}) {
|
||||
const rows = [
|
||||
...normalizeRows(ctx.milestones, "milestone"),
|
||||
...normalizeRows(ctx.slices, "slice"),
|
||||
...normalizeRows(ctx.requirements, "requirement"),
|
||||
];
|
||||
const patterns =
|
||||
options.deprecatedServerPatterns ?? DEFAULT_DEPRECATED_PATTERNS;
|
||||
const matches = [];
|
||||
|
||||
for (const row of rows) {
|
||||
if (!isActiveRow(row)) continue;
|
||||
const text = searchableText(row);
|
||||
const pattern = patterns.find((candidate) => candidate.test(text));
|
||||
if (!pattern) continue;
|
||||
matches.push({
|
||||
kind: row.kind,
|
||||
id: row.id,
|
||||
milestoneId: row.milestoneId ?? row.milestone_id ?? null,
|
||||
status: row.status ?? null,
|
||||
pattern: pattern.source,
|
||||
title: row.title ?? "",
|
||||
});
|
||||
}
|
||||
|
||||
if (matches.length === 0) {
|
||||
return { stuck: false, reason: "", signature: { checked: rows.length } };
|
||||
}
|
||||
return {
|
||||
stuck: true,
|
||||
reason: "server-direction-drift",
|
||||
signature: {
|
||||
matches,
|
||||
expectedDirection:
|
||||
"sf server is the single operator server; web/Next.js embeds daemon lifecycle",
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Run server-direction drift as a UOK verification gate.
|
||||
*
|
||||
* Purpose: make superseded server architecture detectable through the common
|
||||
* gate runner, not only through ad hoc roadmap review.
|
||||
*
|
||||
* Consumer: detector gate registry and periodicDetectorSweepGate.
|
||||
*/
|
||||
export const serverDirectionDriftGate = {
|
||||
id: "server-direction-drift",
|
||||
type: "verification",
|
||||
async execute(ctx = {}) {
|
||||
const result = detectServerDirectionDrift(ctx, ctx.options);
|
||||
if (result.stuck) {
|
||||
return {
|
||||
outcome: "manual-attention",
|
||||
failureClass: "verification",
|
||||
rationale: result.reason,
|
||||
findings: result.signature,
|
||||
};
|
||||
}
|
||||
return {
|
||||
outcome: "pass",
|
||||
failureClass: null,
|
||||
rationale: "no server-direction drift",
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
function normalizeRows(rows, kind) {
|
||||
if (!Array.isArray(rows)) return [];
|
||||
return rows.map((row) => ({ ...row, kind }));
|
||||
}
|
||||
|
||||
function isActiveRow(row) {
|
||||
const status = String(row.status ?? "").toLowerCase();
|
||||
if (CLOSED_STATUSES.has(status)) return false;
|
||||
return ACTIVE_STATUSES.has(status) || status === "";
|
||||
}
|
||||
|
||||
function searchableText(row) {
|
||||
return [
|
||||
row.id,
|
||||
row.title,
|
||||
row.description,
|
||||
row.why,
|
||||
row.goal,
|
||||
row.successCriteria,
|
||||
row.success_criteria,
|
||||
row.notes,
|
||||
row.full_content,
|
||||
row.vision,
|
||||
]
|
||||
.filter((value) => typeof value === "string")
|
||||
.join("\n");
|
||||
}
|
||||
|
|
@ -17,12 +17,35 @@ import {
|
|||
loadProjectSFPreferences,
|
||||
} from "./preferences.js";
|
||||
|
||||
/** Extract the body section that follows a YAML frontmatter block. */
|
||||
function extractBodyAfterFrontmatter(content) {
|
||||
const closingIdx = content.indexOf("\n---", content.indexOf("---"));
|
||||
if (closingIdx === -1) return null;
|
||||
const afterFrontmatter = content.slice(closingIdx + 4);
|
||||
return afterFrontmatter.trim() ? afterFrontmatter : null;
|
||||
/** Return the preferences documentation comment block from a YAML file. */
|
||||
function extractPreferencesCommentBlock(content) {
|
||||
const marker = "\n# SF Preferences";
|
||||
const idx = content.indexOf(marker);
|
||||
if (idx >= 0) return commentPreferencesBody(content.slice(idx));
|
||||
if (content.startsWith("# SF Preferences")) return content;
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Return a YAML-commented default preferences reference block. */
|
||||
function defaultPreferencesCommentBlock() {
|
||||
return [
|
||||
"",
|
||||
"# SF Preferences",
|
||||
"#",
|
||||
"# See `~/.sf/agent/extensions/sf/docs/preferences-reference.md` for full documentation.",
|
||||
"",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
/** Preserve the human reference body without making preferences.yaml multi-doc. */
|
||||
function commentPreferencesBody(body) {
|
||||
return body
|
||||
.split("\n")
|
||||
.map((line) => {
|
||||
if (line === "" || line.startsWith("#")) return line;
|
||||
return `# ${line}`;
|
||||
})
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
/** All recognized experimental feature flags with descriptions. */
|
||||
|
|
@ -81,14 +104,15 @@ export function setExperimentalFlag(name, value) {
|
|||
prefs.experimental = { ...(prefs.experimental ?? {}), [name]: value };
|
||||
|
||||
const frontmatter = serializePreferencesToFrontmatter(prefs);
|
||||
let body =
|
||||
"\n# SF Preferences\n\nSee `~/.sf/agent/extensions/sf/docs/preferences-reference.md` for full documentation.\n";
|
||||
let body = defaultPreferencesCommentBlock();
|
||||
if (existsSync(path)) {
|
||||
const preserved = extractBodyAfterFrontmatter(readFileSync(path, "utf-8"));
|
||||
const preserved = extractPreferencesCommentBlock(
|
||||
readFileSync(path, "utf-8"),
|
||||
);
|
||||
if (preserved) body = preserved;
|
||||
}
|
||||
mkdirSync(dirname(path), { recursive: true });
|
||||
writeFileSync(path, `---\n${frontmatter}---${body}`, "utf-8");
|
||||
writeFileSync(path, `${frontmatter}${body}`, "utf-8");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -194,7 +194,7 @@ export function _resetParseWarningFlag() {
|
|||
*/
|
||||
export function parsePreferencesYaml(content) {
|
||||
try {
|
||||
const parsed = parseYaml(content);
|
||||
const parsed = parseYaml(stripPreferencesYamlDocument(content));
|
||||
if (typeof parsed !== "object" || parsed === null) return {};
|
||||
return parsed;
|
||||
} catch (e) {
|
||||
|
|
@ -203,6 +203,22 @@ export function parsePreferencesYaml(content) {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return only the machine-readable YAML document from preferences.yaml.
|
||||
*
|
||||
* Purpose: tolerate older files where a human reference body was appended as
|
||||
* raw Markdown after `# SF Preferences` while keeping canonical writes pure
|
||||
* YAML plus comments.
|
||||
*
|
||||
* Consumer: parsePreferencesYaml before handing content to the YAML parser.
|
||||
*/
|
||||
function stripPreferencesYamlDocument(content) {
|
||||
const marker = "\n# SF Preferences";
|
||||
const idx = content.indexOf(marker);
|
||||
if (idx < 0) return content;
|
||||
return content.slice(0, idx);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse legacy frontmatter-style preference content.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ export * from "./sf-db/sf-db-memory.js";
|
|||
export * from "./sf-db/sf-db-milestones.js";
|
||||
export * from "./sf-db/sf-db-mode-state.js";
|
||||
export * from "./sf-db/sf-db-profile.js";
|
||||
export * from "./sf-db/roadmap-projection-sync.js";
|
||||
export * from "./sf-db/sf-db-self-feedback.js";
|
||||
export * from "./sf-db/sf-db-session-store.js";
|
||||
export * from "./sf-db/sf-db-slices.js";
|
||||
|
|
|
|||
85
src/resources/extensions/sf/sf-db/roadmap-projection-sync.js
Normal file
85
src/resources/extensions/sf/sf-db/roadmap-projection-sync.js
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
/**
|
||||
* roadmap-projection-sync.js - schedule DB-backed roadmap projection refreshes.
|
||||
*
|
||||
* Purpose: keep M###-ROADMAP.md and M###-ROADMAP.json as generated views of
|
||||
* canonical SQLite planning state after milestone or slice mutations.
|
||||
*
|
||||
* Consumer: sf-db milestone/slice write wrappers and projection-sync tests.
|
||||
*/
|
||||
import { logWarning } from "../workflow-logger.js";
|
||||
|
||||
const pending = new Map();
|
||||
const inFlight = new Set();
|
||||
|
||||
/**
|
||||
* Queue a best-effort ROADMAP.md/json refresh for one milestone.
|
||||
*
|
||||
* Purpose: make roadmap files server-maintained projections instead of stale
|
||||
* manually rendered artifacts while keeping DB writes synchronous and durable.
|
||||
*
|
||||
* Consumer: insert/update milestone and slice DB wrappers.
|
||||
*/
|
||||
export function scheduleRoadmapProjectionRefresh(
|
||||
basePath = process.cwd(),
|
||||
milestoneId,
|
||||
) {
|
||||
if (!milestoneId || roadmapProjectionSyncDisabled()) return;
|
||||
const key = `${basePath}\0${milestoneId}`;
|
||||
if (pending.has(key) || inFlight.has(key)) return;
|
||||
pending.set(key, { basePath, milestoneId });
|
||||
const timer = setTimeout(() => {
|
||||
void flushOneRoadmapProjection(key);
|
||||
}, 0);
|
||||
timer.unref?.();
|
||||
}
|
||||
|
||||
/**
|
||||
* Refresh one roadmap projection immediately.
|
||||
*
|
||||
* Purpose: provide an explicit, awaitable projection path for tests and repair
|
||||
* tools while sharing the same renderer used by the asynchronous scheduler.
|
||||
*
|
||||
* Consumer: roadmap projection sync tests and future server repair jobs.
|
||||
*/
|
||||
export async function refreshRoadmapProjectionNow(basePath, milestoneId) {
|
||||
const { renderRoadmapFromDb } = await import("../markdown-renderer.js");
|
||||
return renderRoadmapFromDb(basePath, milestoneId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Drain queued projection refreshes.
|
||||
*
|
||||
* Purpose: let tests prove DB writes schedule real roadmap projection updates
|
||||
* without waiting on wall-clock timers.
|
||||
*
|
||||
* Consumer: roadmap-projection-sync.test.mjs.
|
||||
*/
|
||||
export async function flushRoadmapProjectionRefreshesForTests() {
|
||||
while (pending.size > 0) {
|
||||
const keys = [...pending.keys()];
|
||||
await Promise.all(keys.map((key) => flushOneRoadmapProjection(key)));
|
||||
}
|
||||
}
|
||||
|
||||
function roadmapProjectionSyncDisabled() {
|
||||
if (process.env.SF_ROADMAP_PROJECTION_SYNC === "0") return true;
|
||||
if (process.env.SF_ROADMAP_PROJECTION_SYNC === "1") return false;
|
||||
return process.env.VITEST === "true";
|
||||
}
|
||||
|
||||
async function flushOneRoadmapProjection(key) {
|
||||
const entry = pending.get(key);
|
||||
if (!entry || inFlight.has(key)) return;
|
||||
pending.delete(key);
|
||||
inFlight.add(key);
|
||||
try {
|
||||
await refreshRoadmapProjectionNow(entry.basePath, entry.milestoneId);
|
||||
} catch (err) {
|
||||
logWarning("roadmap-projection-sync", "projection refresh failed", {
|
||||
milestoneId: entry.milestoneId,
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
});
|
||||
} finally {
|
||||
inFlight.delete(key);
|
||||
}
|
||||
}
|
||||
|
|
@ -11,6 +11,7 @@ import {
|
|||
rowToMilestone,
|
||||
transaction,
|
||||
} from "./sf-db-core.js";
|
||||
import { scheduleRoadmapProjectionRefresh } from "./roadmap-projection-sync.js";
|
||||
|
||||
export function insertMilestone(m) {
|
||||
const currentDb = _getAdapter();
|
||||
|
|
@ -57,6 +58,7 @@ export function insertMilestone(m) {
|
|||
if (hasPlanningPayload(m.planning)) {
|
||||
insertMilestoneSpecIfAbsent(m.id, m.planning ?? {});
|
||||
}
|
||||
scheduleRoadmapProjectionRefresh(process.cwd(), m.id);
|
||||
}
|
||||
|
||||
export function upsertMilestonePlanning(milestoneId, planning) {
|
||||
|
|
@ -111,6 +113,7 @@ export function upsertMilestonePlanning(milestoneId, planning) {
|
|||
? JSON.stringify(planning.productResearch)
|
||||
: null,
|
||||
});
|
||||
scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
|
||||
}
|
||||
|
||||
export function getAllMilestones() {
|
||||
|
|
@ -146,6 +149,7 @@ export function updateMilestoneStatus(milestoneId, status, completedAt) {
|
|||
":completed_at": completedAt ?? null,
|
||||
":id": milestoneId,
|
||||
});
|
||||
scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
|
||||
}
|
||||
|
||||
export function updateMilestoneQueueOrder(order) {
|
||||
|
|
@ -159,6 +163,9 @@ export function updateMilestoneQueueOrder(order) {
|
|||
stmt.run({ ":sequence": i + 1, ":id": order[i] });
|
||||
}
|
||||
});
|
||||
for (const milestoneId of order) {
|
||||
scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
|
||||
}
|
||||
}
|
||||
|
||||
export function getActiveMilestoneFromDb() {
|
||||
|
|
@ -274,6 +281,9 @@ export function bulkInsertLegacyHierarchy(payload) {
|
|||
);
|
||||
}
|
||||
});
|
||||
for (const milestoneId of clearMilestoneIds) {
|
||||
scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
|
||||
}
|
||||
}
|
||||
|
||||
export function clearEngineHierarchy() {
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import {
|
|||
safeParseJsonArray,
|
||||
transaction,
|
||||
} from "./sf-db-core.js";
|
||||
import { scheduleRoadmapProjectionRefresh } from "./roadmap-projection-sync.js";
|
||||
|
||||
export function insertSlice(s) {
|
||||
const currentDb = _getAdapter();
|
||||
|
|
@ -95,6 +96,7 @@ export function insertSlice(s) {
|
|||
":raw_traces_vision_fragment": s.tracesVisionFragment ?? null,
|
||||
});
|
||||
insertSliceSpecIfAbsent(s.milestoneId, s.id, s.planning ?? {});
|
||||
scheduleRoadmapProjectionRefresh(process.cwd(), s.milestoneId);
|
||||
}
|
||||
|
||||
export function insertOrIgnoreSlice(args) {
|
||||
|
|
@ -109,6 +111,7 @@ export function insertOrIgnoreSlice(args) {
|
|||
":title": args.title,
|
||||
":ts": args.createdAt,
|
||||
});
|
||||
scheduleRoadmapProjectionRefresh(process.cwd(), args.milestoneId);
|
||||
}
|
||||
|
||||
export function clearSliceSketch(milestoneId, sliceId) {
|
||||
|
|
@ -127,6 +130,7 @@ export function setSliceSketchFlag(milestoneId, sliceId, isSketch) {
|
|||
":mid": milestoneId,
|
||||
":sid": sliceId,
|
||||
});
|
||||
scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
|
||||
}
|
||||
|
||||
export function autoHealSketchFlags(milestoneId, hasPlanFile) {
|
||||
|
|
@ -178,6 +182,7 @@ export function upsertSlicePlanning(milestoneId, sliceId, planning) {
|
|||
// ADR-0000 P2 (schema v69): vision trace fragment is part of planning.
|
||||
":traces_vision_fragment": planning.tracesVisionFragment ?? null,
|
||||
});
|
||||
scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
|
||||
}
|
||||
|
||||
// ADR-0000 P2 (schema v69): focused setter so callers that already have a
|
||||
|
|
@ -195,6 +200,7 @@ export function updateSliceVisionTrace(milestoneId, sliceId, fragment) {
|
|||
":mid": milestoneId,
|
||||
":sid": sliceId,
|
||||
});
|
||||
scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
|
||||
}
|
||||
|
||||
export function getSlice(milestoneId, sliceId) {
|
||||
|
|
@ -219,6 +225,7 @@ export function updateSliceStatus(milestoneId, sliceId, status, completedAt) {
|
|||
":milestone_id": milestoneId,
|
||||
":id": sliceId,
|
||||
});
|
||||
scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
|
||||
}
|
||||
|
||||
export function setSliceUatVerdict(milestoneId, sliceId, verdict) {
|
||||
|
|
@ -229,6 +236,7 @@ export function setSliceUatVerdict(milestoneId, sliceId, verdict) {
|
|||
`UPDATE slices SET uat_verdict = :verdict WHERE milestone_id = :mid AND id = :sid`,
|
||||
)
|
||||
.run({ ":mid": milestoneId, ":sid": sliceId, ":verdict": verdict });
|
||||
scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
|
||||
}
|
||||
|
||||
export function getSliceUatVerdict(milestoneId, sliceId) {
|
||||
|
|
@ -312,6 +320,7 @@ export function setSliceSummaryMd(milestoneId, sliceId, summaryMd, uatMd) {
|
|||
":summary_md": summaryMd,
|
||||
":uat_md": uatMd,
|
||||
});
|
||||
scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
|
||||
}
|
||||
|
||||
export function getMilestoneSlices(milestoneId) {
|
||||
|
|
@ -369,6 +378,7 @@ export function syncSliceDependencies(milestoneId, sliceId, depends) {
|
|||
)
|
||||
.run({ ":mid": milestoneId, ":sid": sliceId, ":dep": dep });
|
||||
}
|
||||
scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
|
||||
}
|
||||
|
||||
export function getDependentSlices(milestoneId, sliceId) {
|
||||
|
|
@ -452,6 +462,7 @@ export function updateSliceFields(milestoneId, sliceId, fields) {
|
|||
":depends": fields.depends ? JSON.stringify(fields.depends) : null,
|
||||
":demo": fields.demo ?? null,
|
||||
});
|
||||
scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
|
||||
}
|
||||
|
||||
export function setSliceReplanTriggeredAt(milestoneId, sliceId, ts) {
|
||||
|
|
@ -462,6 +473,7 @@ export function setSliceReplanTriggeredAt(milestoneId, sliceId, ts) {
|
|||
"UPDATE slices SET replan_triggered_at = :ts WHERE milestone_id = :mid AND id = :sid",
|
||||
)
|
||||
.run({ ":ts": ts, ":mid": milestoneId, ":sid": sliceId });
|
||||
scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
|
||||
}
|
||||
|
||||
export function deleteSlice(milestoneId, sliceId) {
|
||||
|
|
@ -493,4 +505,5 @@ export function deleteSlice(milestoneId, sliceId) {
|
|||
.prepare(`DELETE FROM slices WHERE milestone_id = :mid AND id = :sid`)
|
||||
.run({ ":mid": milestoneId, ":sid": sliceId });
|
||||
});
|
||||
scheduleRoadmapProjectionRefresh(process.cwd(), milestoneId);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,82 @@
|
|||
/**
|
||||
* detector-server-direction-drift.test.mjs — server direction drift contracts.
|
||||
*
|
||||
* Purpose: prove Wiggums catches queued work that revives superseded server
|
||||
* architecture while ignoring cancelled historical slices.
|
||||
*/
|
||||
import assert from "node:assert/strict";
|
||||
import { test } from "vitest";
|
||||
import {
|
||||
detectServerDirectionDrift,
|
||||
serverDirectionDriftGate,
|
||||
} from "../detectors/server-direction-drift.js";
|
||||
import { runDetectorSweep } from "../detectors/periodic-runner.js";
|
||||
|
||||
test("detectServerDirectionDrift_when_queued_slice_mentions_sf_serve_flags_drift", () => {
|
||||
const result = detectServerDirectionDrift({
|
||||
slices: [
|
||||
{
|
||||
milestone_id: "M053",
|
||||
id: "S01",
|
||||
status: "queued",
|
||||
title: "`sf serve` daemon scaffold + JSON-RPC API",
|
||||
goal: "Create a separate JSON-RPC API.",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
assert.equal(result.stuck, true);
|
||||
assert.equal(result.reason, "server-direction-drift");
|
||||
assert.equal(result.signature.matches[0].id, "S01");
|
||||
});
|
||||
|
||||
test("detectServerDirectionDrift_when_cancelled_slice_mentions_sf_serve_ignores_history", () => {
|
||||
const result = detectServerDirectionDrift({
|
||||
slices: [
|
||||
{
|
||||
milestone_id: "M053",
|
||||
id: "S01",
|
||||
status: "cancelled",
|
||||
title: "`sf serve` daemon scaffold + JSON-RPC API",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
assert.equal(result.stuck, false);
|
||||
});
|
||||
|
||||
test("serverDirectionDriftGate_when_drift_exists_returns_manual_attention", async () => {
|
||||
const result = await serverDirectionDriftGate.execute({
|
||||
requirements: [
|
||||
{
|
||||
id: "R999",
|
||||
status: "active",
|
||||
description: "Add A2A as the primary server control plane.",
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
assert.equal(result.outcome, "manual-attention");
|
||||
assert.equal(result.rationale, "server-direction-drift");
|
||||
});
|
||||
|
||||
test("runDetectorSweep_includes_server_direction_drift_detector", async () => {
|
||||
const result = await runDetectorSweep(
|
||||
{
|
||||
slices: [
|
||||
{
|
||||
id: "S99",
|
||||
status: "queued",
|
||||
title: "Per-repo systemd unit for another server",
|
||||
},
|
||||
],
|
||||
},
|
||||
{ throttleMs: 0 },
|
||||
);
|
||||
|
||||
assert.ok(
|
||||
result.detectorsFired.some(
|
||||
(detector) => detector.name === "server-direction-drift",
|
||||
),
|
||||
);
|
||||
});
|
||||
|
|
@ -2,6 +2,7 @@ import { describe, expect, test } from "vitest";
|
|||
import {
|
||||
BASE_REQUIREMENTS,
|
||||
MODEL_CAPABILITY_PROFILES,
|
||||
resolveModelForComplexity,
|
||||
scoreEligibleModels,
|
||||
scoreModel,
|
||||
} from "../model-router.js";
|
||||
|
|
@ -16,6 +17,11 @@ describe("agentic capability axis (ADR-0079)", () => {
|
|||
);
|
||||
});
|
||||
|
||||
test("challenge base requirements weight adversarial agentic reasoning", () => {
|
||||
expect(BASE_REQUIREMENTS.challenge.reasoning).toBeGreaterThanOrEqual(0.8);
|
||||
expect(BASE_REQUIREMENTS.challenge.agentic).toBeGreaterThanOrEqual(0.85);
|
||||
});
|
||||
|
||||
test("known agentic-capable models score higher than coding-completion models on execute-task", () => {
|
||||
const codestralScore = scoreModel(
|
||||
MODEL_CAPABILITY_PROFILES["codestral-latest"],
|
||||
|
|
@ -34,6 +40,45 @@ describe("agentic capability axis (ADR-0079)", () => {
|
|||
expect(sonnetScore).toBeGreaterThan(codestralScore);
|
||||
});
|
||||
|
||||
test("challenge routing ignores sticky model unless explicitly enabled", () => {
|
||||
const phaseConfig = {
|
||||
primary: "openai/gpt-5.5",
|
||||
fallbacks: ["minimax/MiniMax-M2.7"],
|
||||
};
|
||||
const routingConfig = {
|
||||
enabled: true,
|
||||
capability_routing: true,
|
||||
};
|
||||
const availableModels = ["kimi-coding/kimi-k2.6", "minimax/MiniMax-M2.7"];
|
||||
const stickyHint = { provider: "minimax", id: "MiniMax-M2.7" };
|
||||
|
||||
const withoutSticky = resolveModelForComplexity(
|
||||
{ tier: "standard" },
|
||||
phaseConfig,
|
||||
routingConfig,
|
||||
availableModels,
|
||||
"challenge",
|
||||
{},
|
||||
{},
|
||||
stickyHint,
|
||||
);
|
||||
expect(withoutSticky.selectionMethod).toBe("capability-scored");
|
||||
expect(withoutSticky.modelId).toBe("kimi-coding/kimi-k2.6");
|
||||
|
||||
const withSticky = resolveModelForComplexity(
|
||||
{ tier: "standard" },
|
||||
phaseConfig,
|
||||
{ ...routingConfig, sticky_routing: true },
|
||||
availableModels,
|
||||
"challenge",
|
||||
{},
|
||||
{},
|
||||
stickyHint,
|
||||
);
|
||||
expect(withSticky.selectionMethod).toBe("slice-sticky");
|
||||
expect(withSticky.modelId).toBe("minimax/MiniMax-M2.7");
|
||||
});
|
||||
|
||||
test("devstral variants score below agentic models on execute-task", () => {
|
||||
const devstralScore = scoreModel(
|
||||
MODEL_CAPABILITY_PROFILES["devstral-2512"],
|
||||
|
|
|
|||
|
|
@ -110,6 +110,25 @@ describe("preferences model resolution", () => {
|
|||
});
|
||||
});
|
||||
|
||||
test("resolveModelWithFallbacksForUnit_when_challenge_uses_validation_model", () => {
|
||||
makePreferencesProject(
|
||||
[
|
||||
"version: 1",
|
||||
"models:",
|
||||
" planning: minimax/MiniMax-M2.7",
|
||||
" validation: kimi-coding/kimi-k2.6",
|
||||
"",
|
||||
].join("\n"),
|
||||
);
|
||||
|
||||
const result = resolveModelWithFallbacksForUnit("challenge");
|
||||
|
||||
assert.deepEqual(result, {
|
||||
primary: "kimi-coding/kimi-k2.6",
|
||||
fallbacks: [],
|
||||
});
|
||||
});
|
||||
|
||||
test("isModelInEnabledList_when_list_empty_allows_any_model", () => {
|
||||
assert.equal(isModelInEnabledList("kimi-coding", "kimi-k2.6", []), true);
|
||||
assert.equal(
|
||||
|
|
|
|||
|
|
@ -0,0 +1,106 @@
|
|||
import assert from "node:assert/strict";
|
||||
import {
|
||||
existsSync,
|
||||
mkdirSync,
|
||||
mkdtempSync,
|
||||
readFileSync,
|
||||
rmSync,
|
||||
} from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, describe, test } from "vitest";
|
||||
import {
|
||||
closeDatabase,
|
||||
flushRoadmapProjectionRefreshesForTests,
|
||||
insertMilestone,
|
||||
insertSlice,
|
||||
openDatabase,
|
||||
updateSliceStatus,
|
||||
upsertMilestonePlanning,
|
||||
} from "../sf-db.js";
|
||||
|
||||
const originalCwd = process.cwd();
|
||||
const originalEnv = { ...process.env };
|
||||
const tmpDirs = [];
|
||||
|
||||
afterEach(() => {
|
||||
closeDatabase();
|
||||
process.chdir(originalCwd);
|
||||
process.env = { ...originalEnv };
|
||||
while (tmpDirs.length > 0) {
|
||||
rmSync(tmpDirs.pop(), { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
function makeProject() {
|
||||
const dir = mkdtempSync(join(tmpdir(), "sf-roadmap-sync-"));
|
||||
tmpDirs.push(dir);
|
||||
mkdirSync(join(dir, ".sf"), { recursive: true });
|
||||
process.env.SF_ROADMAP_PROJECTION_SYNC = "1";
|
||||
process.chdir(dir);
|
||||
openDatabase(join(dir, ".sf", "sf.db"));
|
||||
return dir;
|
||||
}
|
||||
|
||||
describe("roadmap projection sync", () => {
|
||||
test("db_writes_refresh_roadmap_projection", async () => {
|
||||
const project = makeProject();
|
||||
|
||||
insertMilestone({
|
||||
id: "M777",
|
||||
title: "Initial server plan",
|
||||
status: "queued",
|
||||
planning: {
|
||||
vision: "Keep planning state in SQLite.",
|
||||
successCriteria: ["Projection exists."],
|
||||
},
|
||||
});
|
||||
insertSlice({
|
||||
milestoneId: "M777",
|
||||
id: "S01",
|
||||
title: "Render projection",
|
||||
status: "pending",
|
||||
sequence: 1,
|
||||
planning: {
|
||||
goal: "Write ROADMAP.md and ROADMAP.json from DB state.",
|
||||
},
|
||||
});
|
||||
await flushRoadmapProjectionRefreshesForTests();
|
||||
|
||||
const roadmapPath = join(
|
||||
project,
|
||||
".sf",
|
||||
"milestones",
|
||||
"M777",
|
||||
"M777-ROADMAP.md",
|
||||
);
|
||||
const jsonPath = join(
|
||||
project,
|
||||
".sf",
|
||||
"milestones",
|
||||
"M777",
|
||||
"M777-ROADMAP.json",
|
||||
);
|
||||
assert.equal(existsSync(roadmapPath), true);
|
||||
assert.equal(existsSync(jsonPath), true);
|
||||
assert.match(readFileSync(roadmapPath, "utf-8"), /Initial server plan/);
|
||||
|
||||
upsertMilestonePlanning("M777", {
|
||||
title: "Server-owned roadmap projection",
|
||||
vision: "The server refreshes generated roadmap files after DB writes.",
|
||||
});
|
||||
updateSliceStatus("M777", "S01", "complete", "2026-05-17T20:00:00.000Z");
|
||||
await flushRoadmapProjectionRefreshesForTests();
|
||||
|
||||
const roadmap = readFileSync(roadmapPath, "utf-8");
|
||||
const projection = JSON.parse(readFileSync(jsonPath, "utf-8"));
|
||||
assert.match(roadmap, /Server-owned roadmap projection/);
|
||||
assert.match(
|
||||
roadmap,
|
||||
/The server refreshes generated roadmap files after DB writes/,
|
||||
);
|
||||
assert.match(roadmap, /- \[x\] \*\*S01: Render projection\*\*/);
|
||||
assert.equal(projection.origin, "db-projection");
|
||||
assert.equal(projection.slices[0].status, "complete");
|
||||
});
|
||||
});
|
||||
|
|
@ -11,6 +11,7 @@ import { repeatedFeedbackKindGate } from "../detectors/repeated-feedback-kind.js
|
|||
import { artifactFlapGate } from "../detectors/artifact-flap.js";
|
||||
import { staleLockGate } from "../detectors/stale-lock.js";
|
||||
import { periodicDetectorSweepGate } from "../detectors/periodic-runner.js";
|
||||
import { serverDirectionDriftGate } from "../detectors/server-direction-drift.js";
|
||||
import { inlineRuntimeGate } from "./inline-runtime-gate.js";
|
||||
|
||||
/**
|
||||
|
|
@ -41,6 +42,7 @@ registry.register(zeroProgressGate);
|
|||
registry.register(repeatedFeedbackKindGate);
|
||||
registry.register(artifactFlapGate);
|
||||
registry.register(staleLockGate);
|
||||
registry.register(serverDirectionDriftGate);
|
||||
registry.register(periodicDetectorSweepGate);
|
||||
registry.register(inlineRuntimeGate);
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,24 @@ const handlerSrc = readFileSync(
|
|||
join(__dirname, "..", "headless-feedback.ts"),
|
||||
"utf-8",
|
||||
);
|
||||
const forwardSrc = readFileSync(
|
||||
join(__dirname, "..", "headless-server-forward.ts"),
|
||||
"utf-8",
|
||||
);
|
||||
const rpcModeSrc = readFileSync(
|
||||
join(
|
||||
__dirname,
|
||||
"..",
|
||||
"..",
|
||||
"packages",
|
||||
"coding-agent",
|
||||
"src",
|
||||
"modes",
|
||||
"rpc",
|
||||
"rpc-mode.ts",
|
||||
),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
test("headless.ts dispatches feedback command to handleFeedback", () => {
|
||||
assert.match(
|
||||
|
|
@ -72,5 +90,29 @@ test("add path defaults blocking from severity, doesn't require it", () => {
|
|||
// readBoolFlag(--blocking) OR severity === high|critical → blocking=true.
|
||||
// The behaviour is documented in self-feedback.js (deriveBlocking),
|
||||
// mirror it so operator-filed entries have consistent semantics.
|
||||
assert.match(handlerSrc, /severity === "high" \|\| severity === "critical"/);
|
||||
assert.match(handlerSrc, /severity === "high"/);
|
||||
assert.match(handlerSrc, /severity === "critical"/);
|
||||
});
|
||||
|
||||
test("active-server feedback forwarding queues writes instead of blocking RPC", () => {
|
||||
assert.match(
|
||||
forwardSrc,
|
||||
/queued:\s*true/,
|
||||
"forwarded add/resolve commands must ask the active RPC server to queue writes",
|
||||
);
|
||||
assert.match(
|
||||
rpcModeSrc,
|
||||
/SF_FEEDBACK_QUEUE_FILE = "sf-feedback-queue\.jsonl"/,
|
||||
"RPC server must persist queued feedback commands durably",
|
||||
);
|
||||
assert.match(
|
||||
rpcModeSrc,
|
||||
/await drainQueuedSfFeedbackCommands\(process\.cwd\(\)\)/,
|
||||
"server-owned autonomous startup must drain queued feedback before running",
|
||||
);
|
||||
assert.match(
|
||||
rpcModeSrc,
|
||||
/scheduleQueuedSfFeedbackDrain\(process\.cwd\(\)\)/,
|
||||
"queued feedback commands should also drain from the server control lane",
|
||||
);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -954,6 +954,51 @@ test("sf server stop <path> is parsed and dispatched with resolved path", async
|
|||
assert.equal(stopOptions?.all, false);
|
||||
});
|
||||
|
||||
test("sf server reload <path> is parsed as reload launch", async (_t) => {
|
||||
const tmp = mkdtempSync(join(tmpdir(), "sf-web-reload-path-"));
|
||||
let receivedOptions: Record<string, unknown> | undefined;
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
mkdirSync(tmp, { recursive: true });
|
||||
const flags = cliWeb.parseCliArgs([
|
||||
"node",
|
||||
"dist/loader.js",
|
||||
"server",
|
||||
"reload",
|
||||
tmp,
|
||||
]);
|
||||
assert.deepEqual(flags.messages, ["server", "reload", tmp]);
|
||||
|
||||
const result = await cliWeb.runWebCliBranch(flags, {
|
||||
cwd: () => "/",
|
||||
runWebMode: async (options) => {
|
||||
receivedOptions = options as unknown as Record<string, unknown>;
|
||||
return {
|
||||
mode: "web" as const,
|
||||
ok: true as const,
|
||||
cwd: options.cwd,
|
||||
projectSessionsDir: options.projectSessionsDir,
|
||||
host: "127.0.0.1",
|
||||
port: 4000,
|
||||
url: "http://127.0.0.1:4000",
|
||||
hostKind: "packaged-standalone" as const,
|
||||
hostPath: "/tmp/server.js",
|
||||
hostRoot: "/tmp",
|
||||
};
|
||||
},
|
||||
stderr: { write: () => true },
|
||||
});
|
||||
|
||||
assert.equal(result.handled, true);
|
||||
if (!result.handled) throw new Error("expected handled");
|
||||
assert.equal(result.action, "reload");
|
||||
assert.equal(receivedOptions?.cwd, tmp);
|
||||
assert.equal(receivedOptions?.reload, true);
|
||||
});
|
||||
|
||||
// ─── Context-aware launch detection tests ──────────────────────────────
|
||||
|
||||
test("resolveContextAwareCwd returns project cwd when inside a project under dev root", (_t) => {
|
||||
|
|
@ -1137,12 +1182,94 @@ test("launchWebMode kills stale instance for same cwd before spawning", async (_
|
|||
assert.equal(status.ok, true);
|
||||
assert.equal(spawnCalled, true);
|
||||
// Stale instance for same cwd should have been cleaned up
|
||||
assert.match(stderrOutput, /Cleaning up stale/);
|
||||
assert.match(stderrOutput, /Stale SF server was already stopped/);
|
||||
// New instance should be registered
|
||||
const registry = webMode.readInstanceRegistry(registryPath);
|
||||
assert.equal(registry[resolve(cwd)]?.pid, 88888);
|
||||
});
|
||||
|
||||
test("launchWebMode reload proves candidate before replacing fixed-port server", async (_t) => {
|
||||
const tmp = mkdtempSync(join(tmpdir(), "sf-web-reload-"));
|
||||
const standaloneRoot = join(tmp, "dist", "web", "standalone");
|
||||
const serverPath = join(standaloneRoot, "server.js");
|
||||
mkdirSync(standaloneRoot, { recursive: true });
|
||||
writeFileSync(serverPath, 'console.log("stub")\n');
|
||||
|
||||
const registryPath = join(tmp, "web-instances.json");
|
||||
const pidFilePath = join(tmp, "web-server.pid");
|
||||
const cwd = "/tmp/reload-project";
|
||||
webMode.registerInstance(
|
||||
cwd,
|
||||
{ pid: 77777, port: 4000, url: "http://127.0.0.1:4000" },
|
||||
registryPath,
|
||||
);
|
||||
|
||||
const spawnPorts: string[] = [];
|
||||
const bootUrls: string[] = [];
|
||||
let nextPid = 90000;
|
||||
let stderrOutput = "";
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(tmp, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
const status = await webMode.launchWebMode(
|
||||
{
|
||||
cwd,
|
||||
projectSessionsDir: "/tmp/.sf/sessions/reload",
|
||||
agentDir: "/tmp/.sf/agent",
|
||||
packageRoot: tmp,
|
||||
port: 4000,
|
||||
reload: true,
|
||||
},
|
||||
{
|
||||
initResources: () => {},
|
||||
resolvePort: async () => 45123,
|
||||
execPath: "/custom/node",
|
||||
env: { TEST_ENV: "1" },
|
||||
kill: ((pid: number, signal?: string | number) => {
|
||||
if (pid === 77777 && signal === 0) return true;
|
||||
const error = new Error("no such process") as NodeJS.ErrnoException;
|
||||
error.code = "ESRCH";
|
||||
throw error;
|
||||
}) as typeof process.kill,
|
||||
spawn: (_command, _args, options) => {
|
||||
spawnPorts.push(String(options.env?.PORT));
|
||||
return {
|
||||
pid: nextPid++,
|
||||
once: () => undefined,
|
||||
unref: () => {},
|
||||
} as any;
|
||||
},
|
||||
waitForBootReady: async (url) => {
|
||||
bootUrls.push(url);
|
||||
},
|
||||
openBrowser: () => {},
|
||||
pidFilePath,
|
||||
writePidFile: webMode.writePidFile,
|
||||
registryPath,
|
||||
stderr: {
|
||||
write(chunk: string) {
|
||||
stderrOutput += chunk;
|
||||
return true;
|
||||
},
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
assert.equal(status.ok, true);
|
||||
assert.deepEqual(spawnPorts, ["45123", "4000"]);
|
||||
assert.deepEqual(bootUrls, [
|
||||
"http://127.0.0.1:45123",
|
||||
"http://127.0.0.1:4000",
|
||||
]);
|
||||
assert.match(stderrOutput, /Proving reload candidate/);
|
||||
assert.match(stderrOutput, /Reload candidate passed boot check/);
|
||||
const registry = webMode.readInstanceRegistry(registryPath);
|
||||
assert.equal(registry[resolve(cwd)]?.pid, 90001);
|
||||
assert.equal(registry[resolve(cwd)]?.port, 4000);
|
||||
});
|
||||
|
||||
test("launchWebMode does not log cleanup when no stale instance exists", async (_t) => {
|
||||
const tmp = mkdtempSync(join(tmpdir(), "sf-web-no-stale-"));
|
||||
const standaloneRoot = join(tmp, "dist", "web", "standalone");
|
||||
|
|
|
|||
349
src/web-mode.ts
349
src/web-mode.ts
|
|
@ -56,6 +56,16 @@ export interface WebModeLaunchOptions {
|
|||
packageRoot?: string;
|
||||
host?: string;
|
||||
port?: number;
|
||||
/**
|
||||
* Reload an existing registered server after the replacement passes boot.
|
||||
*
|
||||
* Purpose: keep `sf server` upgrades graceful by proving the candidate host
|
||||
* is healthy before terminating the old process bound to the project.
|
||||
*
|
||||
* Consumer: `sf server reload` and default `sf server start` behavior when a
|
||||
* live same-project instance already exists.
|
||||
*/
|
||||
reload?: boolean;
|
||||
/** Additional allowed origins for CORS (forwarded as SF_WEB_ALLOWED_ORIGINS). */
|
||||
allowedOrigins?: string[];
|
||||
}
|
||||
|
|
@ -128,6 +138,7 @@ export interface WebModeDeps {
|
|||
writePidFile?: (path: string, pid: number) => void;
|
||||
readPidFile?: (path: string) => number | null;
|
||||
deletePidFile?: (path: string) => void;
|
||||
kill?: typeof process.kill;
|
||||
/** Path to the multi-instance registry JSON (for testing). */
|
||||
registryPath?: string;
|
||||
}
|
||||
|
|
@ -146,6 +157,11 @@ export interface WebModeStopResult {
|
|||
stoppedCount?: number;
|
||||
}
|
||||
|
||||
type ExistingServerInstance =
|
||||
| { state: "none" }
|
||||
| { state: "dead"; entry: WebInstanceEntry }
|
||||
| { state: "live"; entry: WebInstanceEntry };
|
||||
|
||||
// ─── Instance Registry ──────────────────────────────────────────────────────
|
||||
|
||||
export interface WebInstanceEntry {
|
||||
|
|
@ -831,6 +847,57 @@ function cleanupStaleInstance(
|
|||
unregisterInstance(cwd, registryPath);
|
||||
}
|
||||
|
||||
function getRegisteredServerInstance(
|
||||
cwd: string,
|
||||
registryPath?: string,
|
||||
kill: typeof process.kill = process.kill,
|
||||
): ExistingServerInstance {
|
||||
const registry = readInstanceRegistry(registryPath);
|
||||
const entry = registry[resolve(cwd)];
|
||||
if (!entry) return { state: "none" };
|
||||
if (!pidExists(entry.pid, kill)) return { state: "dead", entry };
|
||||
return { state: "live", entry };
|
||||
}
|
||||
|
||||
function cleanupDeadRegisteredInstance(
|
||||
cwd: string,
|
||||
stderr: WritableLike,
|
||||
entry: WebInstanceEntry,
|
||||
registryPath?: string,
|
||||
): void {
|
||||
stderr.write(
|
||||
`[forge] Stale SF server was already stopped (pid=${entry.pid}) — clearing entry.\n`,
|
||||
);
|
||||
unregisterInstance(cwd, registryPath);
|
||||
}
|
||||
|
||||
function stopReloadedInstance(
|
||||
cwd: string,
|
||||
stderr: WritableLike,
|
||||
entry: WebInstanceEntry,
|
||||
registryPath?: string,
|
||||
): void {
|
||||
const result = terminateWebServerProcessTree(entry.pid);
|
||||
if (result === "killed" || result === "force-killed") {
|
||||
stderr.write(
|
||||
`[forge] Reloaded SF server for ${resolve(cwd)}; stopped previous pid=${entry.pid}.\n`,
|
||||
);
|
||||
} else if (result === "already-dead") {
|
||||
stderr.write(
|
||||
`[forge] Previous SF server already exited during reload (pid=${entry.pid}).\n`,
|
||||
);
|
||||
} else {
|
||||
stderr.write(
|
||||
`[forge] Reload candidate is running, but previous SF server pid=${entry.pid} did not stop: ${result.error}\n`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
// Only remove the old registry row after the new instance has already
|
||||
// registered itself. unregisterInstance deletes by cwd, so callers must
|
||||
// invoke this before registering the replacement.
|
||||
unregisterInstance(cwd, registryPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect and reap orphaned next-server processes that outlived their parent
|
||||
* web host. These orphans have cwd under dist/web/standalone (or a deleted
|
||||
|
|
@ -951,10 +1018,35 @@ export async function launchWebMode(
|
|||
|
||||
stderr.write(`[forge] Starting server mode…\n`);
|
||||
|
||||
// Kill any stale server instance for this project before reserving a port.
|
||||
// This prevents EADDRINUSE when the previous `sf server` was terminated
|
||||
// without a clean shutdown (e.g. terminal closed, crash).
|
||||
cleanupStaleInstance(options.cwd, stderr, deps.registryPath);
|
||||
const existing = getRegisteredServerInstance(
|
||||
options.cwd,
|
||||
deps.registryPath,
|
||||
deps.kill,
|
||||
);
|
||||
let reloadPrevious: WebInstanceEntry | null = null;
|
||||
if (
|
||||
existing.state === "live" &&
|
||||
(options.reload === true ||
|
||||
!options.port ||
|
||||
options.port === existing.entry.port)
|
||||
) {
|
||||
reloadPrevious = existing.entry;
|
||||
stderr.write(
|
||||
`[forge] Existing SF server found for ${resolve(options.cwd)} (pid=${existing.entry.pid}, port=${existing.entry.port}); launching replacement before shutdown.\n`,
|
||||
);
|
||||
} else if (existing.state === "dead") {
|
||||
cleanupDeadRegisteredInstance(
|
||||
options.cwd,
|
||||
stderr,
|
||||
existing.entry,
|
||||
deps.registryPath,
|
||||
);
|
||||
} else if (existing.state === "live") {
|
||||
// Explicit fixed-port start cannot bind beside a live same-port process.
|
||||
// Stop it before launch so legacy `sf server start --port 4000` keeps
|
||||
// working, while normal starts use reload-first behavior.
|
||||
cleanupStaleInstance(options.cwd, stderr, deps.registryPath);
|
||||
}
|
||||
|
||||
// Also reap orphaned next-server processes from prior unclean shutdowns
|
||||
// (sf-mooe4m5k-6fm7z9): orphaned next-server processes with cwd under
|
||||
|
|
@ -969,28 +1061,11 @@ export async function launchWebMode(
|
|||
);
|
||||
}
|
||||
|
||||
const port =
|
||||
const targetPort =
|
||||
options.port ??
|
||||
reloadPrevious?.port ??
|
||||
(deps.resolvePort ? await deps.resolvePort(host) : DEFAULT_PORT);
|
||||
const authToken = randomBytes(32).toString("hex");
|
||||
const url = `http://${host}:${port}`;
|
||||
const env = {
|
||||
...(deps.env ?? process.env),
|
||||
HOSTNAME: host,
|
||||
PORT: String(port),
|
||||
SF_WEB_HOST: host,
|
||||
SF_WEB_PORT: String(port),
|
||||
SF_WEB_AUTH_TOKEN: authToken,
|
||||
SF_WEB_PROJECT_CWD: options.cwd,
|
||||
SF_WEB_PROJECT_SESSIONS_DIR: options.projectSessionsDir,
|
||||
SF_WEB_PACKAGE_ROOT: resolution.packageRoot,
|
||||
SF_WEB_HOST_KIND: resolution.kind,
|
||||
SF_WEB_AUTO_START_AUTONOMOUS: "1",
|
||||
...(resolution.kind === "source-dev" ? { NEXT_PUBLIC_SF_DEV: "1" } : {}),
|
||||
...(options.allowedOrigins?.length
|
||||
? { SF_WEB_ALLOWED_ORIGINS: options.allowedOrigins.join(",") }
|
||||
: {}),
|
||||
};
|
||||
const targetUrl = `http://${host}:${targetPort}`;
|
||||
|
||||
try {
|
||||
stderr.write(`[forge] Initialising resources…\n`);
|
||||
|
|
@ -1005,8 +1080,8 @@ export async function launchWebMode(
|
|||
cwd: options.cwd,
|
||||
projectSessionsDir: options.projectSessionsDir,
|
||||
host,
|
||||
port,
|
||||
url,
|
||||
port: targetPort,
|
||||
url: targetUrl,
|
||||
hostKind: resolution.kind,
|
||||
hostPath: resolution.entryPath,
|
||||
hostRoot: resolution.hostRoot,
|
||||
|
|
@ -1016,89 +1091,163 @@ export async function launchWebMode(
|
|||
return failure;
|
||||
}
|
||||
|
||||
const spawnSpec = buildSpawnSpec(
|
||||
resolution,
|
||||
host,
|
||||
port,
|
||||
deps.platform ?? process.platform,
|
||||
deps.execPath ?? process.execPath,
|
||||
);
|
||||
|
||||
stderr.write(`[forge] Launching web host on port ${port}…\n`);
|
||||
|
||||
const spawnResult = await spawnDetachedProcess(
|
||||
deps.spawn ??
|
||||
((command, args, spawnOptions) => spawn(command, args, spawnOptions)),
|
||||
spawnSpec.command,
|
||||
spawnSpec.args,
|
||||
{
|
||||
cwd: spawnSpec.cwd,
|
||||
detached: true,
|
||||
stdio: "ignore",
|
||||
windowsHide: true,
|
||||
shell: needsWindowsShell(
|
||||
spawnSpec.command,
|
||||
deps.platform ?? process.platform,
|
||||
),
|
||||
env,
|
||||
},
|
||||
);
|
||||
|
||||
if (!spawnResult.ok) {
|
||||
const failure: WebModeLaunchFailure = {
|
||||
mode: "web",
|
||||
ok: false,
|
||||
cwd: options.cwd,
|
||||
projectSessionsDir: options.projectSessionsDir,
|
||||
const spawnVerifiedHost = async (
|
||||
port: number,
|
||||
label: "candidate" | "web host",
|
||||
autoStartAutonomous: boolean,
|
||||
): Promise<
|
||||
| {
|
||||
ok: true;
|
||||
child: SpawnedChildLike;
|
||||
authToken: string;
|
||||
url: string;
|
||||
}
|
||||
| { ok: false; failure: WebModeLaunchFailure }
|
||||
> => {
|
||||
const authToken = randomBytes(32).toString("hex");
|
||||
const url = `http://${host}:${port}`;
|
||||
const env = {
|
||||
...(deps.env ?? process.env),
|
||||
HOSTNAME: host,
|
||||
PORT: String(port),
|
||||
SF_WEB_HOST: host,
|
||||
SF_WEB_PORT: String(port),
|
||||
SF_WEB_AUTH_TOKEN: authToken,
|
||||
SF_WEB_PROJECT_CWD: options.cwd,
|
||||
SF_WEB_PROJECT_SESSIONS_DIR: options.projectSessionsDir,
|
||||
SF_WEB_PACKAGE_ROOT: resolution.packageRoot,
|
||||
SF_WEB_HOST_KIND: resolution.kind,
|
||||
SF_WEB_AUTO_START_AUTONOMOUS: autoStartAutonomous ? "1" : "0",
|
||||
...(resolution.kind === "source-dev" ? { NEXT_PUBLIC_SF_DEV: "1" } : {}),
|
||||
...(options.allowedOrigins?.length
|
||||
? { SF_WEB_ALLOWED_ORIGINS: options.allowedOrigins.join(",") }
|
||||
: {}),
|
||||
};
|
||||
const spawnSpec = buildSpawnSpec(
|
||||
resolution,
|
||||
host,
|
||||
port,
|
||||
url,
|
||||
hostKind: resolution.kind,
|
||||
hostPath: resolution.entryPath,
|
||||
hostRoot: resolution.hostRoot,
|
||||
failureReason: `launch:${spawnResult.error instanceof Error ? spawnResult.error.message : String(spawnResult.error)}`,
|
||||
};
|
||||
emitLaunchStatus(stderr, failure);
|
||||
return failure;
|
||||
deps.platform ?? process.platform,
|
||||
deps.execPath ?? process.execPath,
|
||||
);
|
||||
stderr.write(`[forge] Launching ${label} on port ${port}…\n`);
|
||||
const spawnResult = await spawnDetachedProcess(
|
||||
deps.spawn ??
|
||||
((command, args, spawnOptions) => spawn(command, args, spawnOptions)),
|
||||
spawnSpec.command,
|
||||
spawnSpec.args,
|
||||
{
|
||||
cwd: spawnSpec.cwd,
|
||||
detached: true,
|
||||
stdio: "ignore",
|
||||
windowsHide: true,
|
||||
shell: needsWindowsShell(
|
||||
spawnSpec.command,
|
||||
deps.platform ?? process.platform,
|
||||
),
|
||||
env,
|
||||
},
|
||||
);
|
||||
if (!spawnResult.ok) {
|
||||
return {
|
||||
ok: false,
|
||||
failure: {
|
||||
mode: "web",
|
||||
ok: false,
|
||||
cwd: options.cwd,
|
||||
projectSessionsDir: options.projectSessionsDir,
|
||||
host,
|
||||
port,
|
||||
url,
|
||||
hostKind: resolution.kind,
|
||||
hostPath: resolution.entryPath,
|
||||
hostRoot: resolution.hostRoot,
|
||||
failureReason: `launch:${spawnResult.error instanceof Error ? spawnResult.error.message : String(spawnResult.error)}`,
|
||||
},
|
||||
};
|
||||
}
|
||||
try {
|
||||
const bootReadyFn =
|
||||
deps.waitForBootReady ??
|
||||
((u: string) => waitForBootReady(u, 180_000, stderr, authToken));
|
||||
await bootReadyFn(url);
|
||||
} catch (error) {
|
||||
if (spawnResult.child.pid !== undefined) {
|
||||
terminateWebServerProcessTree(spawnResult.child.pid);
|
||||
}
|
||||
return {
|
||||
ok: false,
|
||||
failure: {
|
||||
mode: "web",
|
||||
ok: false,
|
||||
cwd: options.cwd,
|
||||
projectSessionsDir: options.projectSessionsDir,
|
||||
host,
|
||||
port,
|
||||
url,
|
||||
hostKind: resolution.kind,
|
||||
hostPath: resolution.entryPath,
|
||||
hostRoot: resolution.hostRoot,
|
||||
failureReason: `boot-ready:${error instanceof Error ? error.message : String(error)}`,
|
||||
},
|
||||
};
|
||||
}
|
||||
return { ok: true, child: spawnResult.child, authToken, url };
|
||||
};
|
||||
|
||||
if (reloadPrevious) {
|
||||
const candidatePort = deps.resolvePort
|
||||
? await deps.resolvePort(host)
|
||||
: await reserveWebPort(host);
|
||||
stderr.write(
|
||||
`[forge] Proving reload candidate on temporary port ${candidatePort} before touching fixed port ${targetPort}…\n`,
|
||||
);
|
||||
const candidate = await spawnVerifiedHost(
|
||||
candidatePort,
|
||||
"candidate",
|
||||
false,
|
||||
);
|
||||
if (!candidate.ok) {
|
||||
emitLaunchStatus(stderr, candidate.failure);
|
||||
return candidate.failure;
|
||||
}
|
||||
if (candidate.child.pid !== undefined) {
|
||||
terminateWebServerProcessTree(candidate.child.pid);
|
||||
}
|
||||
stderr.write(`[forge] Reload candidate passed boot check.\n`);
|
||||
stopReloadedInstance(
|
||||
options.cwd,
|
||||
stderr,
|
||||
reloadPrevious,
|
||||
deps.registryPath,
|
||||
);
|
||||
}
|
||||
|
||||
const finalHost = await spawnVerifiedHost(targetPort, "web host", true);
|
||||
if (!finalHost.ok) {
|
||||
emitLaunchStatus(stderr, finalHost.failure);
|
||||
return finalHost.failure;
|
||||
}
|
||||
|
||||
try {
|
||||
const bootReadyFn =
|
||||
deps.waitForBootReady ??
|
||||
((u: string) => waitForBootReady(u, 180_000, stderr, authToken));
|
||||
await bootReadyFn(url);
|
||||
} catch (error) {
|
||||
const failure: WebModeLaunchFailure = {
|
||||
mode: "web",
|
||||
ok: false,
|
||||
cwd: options.cwd,
|
||||
projectSessionsDir: options.projectSessionsDir,
|
||||
host,
|
||||
port,
|
||||
url,
|
||||
hostKind: resolution.kind,
|
||||
hostPath: resolution.entryPath,
|
||||
hostRoot: resolution.hostRoot,
|
||||
failureReason: `boot-ready:${error instanceof Error ? error.message : String(error)}`,
|
||||
};
|
||||
emitLaunchStatus(stderr, failure);
|
||||
return failure;
|
||||
}
|
||||
|
||||
try {
|
||||
spawnResult.child.unref?.();
|
||||
const pid = spawnResult.child.pid;
|
||||
finalHost.child.unref?.();
|
||||
const pid = finalHost.child.pid;
|
||||
if (pid !== undefined) {
|
||||
const pidFilePath = deps.pidFilePath ?? defaultWebPidFilePath;
|
||||
(deps.writePidFile ?? writePidFile)(pidFilePath, pid);
|
||||
// Register in multi-instance registry
|
||||
registerInstance(
|
||||
options.cwd,
|
||||
{ pid, port, url, authToken },
|
||||
{
|
||||
pid,
|
||||
port: targetPort,
|
||||
url: targetUrl,
|
||||
authToken: finalHost.authToken,
|
||||
},
|
||||
deps.registryPath,
|
||||
);
|
||||
}
|
||||
const authenticatedUrl = `${url}/#token=${authToken}`;
|
||||
const authenticatedUrl = `${targetUrl}/#token=${finalHost.authToken}`;
|
||||
try {
|
||||
(deps.openBrowser ?? openBrowser)(authenticatedUrl);
|
||||
} catch (browserError) {
|
||||
|
|
@ -1113,8 +1262,8 @@ export async function launchWebMode(
|
|||
cwd: options.cwd,
|
||||
projectSessionsDir: options.projectSessionsDir,
|
||||
host,
|
||||
port,
|
||||
url,
|
||||
port: targetPort,
|
||||
url: targetUrl,
|
||||
hostKind: resolution.kind,
|
||||
hostPath: resolution.entryPath,
|
||||
hostRoot: resolution.hostRoot,
|
||||
|
|
@ -1124,15 +1273,15 @@ export async function launchWebMode(
|
|||
return failure;
|
||||
}
|
||||
|
||||
const authenticatedUrl = `${url}/#token=${authToken}`;
|
||||
const authenticatedUrl = `${targetUrl}/#token=${finalHost.authToken}`;
|
||||
const success: WebModeLaunchSuccess = {
|
||||
mode: "web",
|
||||
ok: true,
|
||||
cwd: options.cwd,
|
||||
projectSessionsDir: options.projectSessionsDir,
|
||||
host,
|
||||
port,
|
||||
url,
|
||||
port: targetPort,
|
||||
url: targetUrl,
|
||||
hostKind: resolution.kind,
|
||||
hostPath: resolution.entryPath,
|
||||
hostRoot: resolution.hostRoot,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import { execFile } from "node:child_process";
|
||||
import { existsSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { dirname, join } from "node:path";
|
||||
import { pathToFileURL } from "node:url";
|
||||
import type { SettingsData } from "../../web/lib/settings-types.ts";
|
||||
import { resolveBridgeRuntimeConfig } from "./bridge-service.ts";
|
||||
|
|
@ -65,6 +65,13 @@ export async function collectSettingsData(
|
|||
const budgetPath = budgetResolution.modulePath;
|
||||
const historyPath = historyResolution.modulePath;
|
||||
const metricsPath = metricsResolution.modulePath;
|
||||
const benchmarksPath = join(
|
||||
dirname(routerPath),
|
||||
"learning",
|
||||
"data",
|
||||
"model-benchmarks.json",
|
||||
);
|
||||
const performancePath = join(projectCwd, ".sf", "model-performance.json");
|
||||
|
||||
// All modules share the same compiled-vs-source mode (they're all from the same package)
|
||||
const useCompiledJs = prefsResolution.useCompiledJs;
|
||||
|
|
@ -102,6 +109,7 @@ export async function collectSettingsData(
|
|||
// and writes a combined JSON payload to stdout.
|
||||
const script = [
|
||||
'const { pathToFileURL } = await import("node:url");',
|
||||
'const { existsSync, readFileSync } = await import("node:fs");',
|
||||
"const prefsMod = await import(pathToFileURL(process.env.SF_SETTINGS_PREFS_MODULE).href);",
|
||||
"const routerMod = await import(pathToFileURL(process.env.SF_SETTINGS_ROUTER_MODULE).href);",
|
||||
"const budgetMod = await import(pathToFileURL(process.env.SF_SETTINGS_BUDGET_MODULE).href);",
|
||||
|
|
@ -172,8 +180,45 @@ export async function collectSettingsData(
|
|||
"const ledger = metricsMod.loadLedgerFromDisk(process.env.SF_SETTINGS_BASE);",
|
||||
"const projectTotals = ledger ? metricsMod.getProjectTotals(ledger.units) : null;",
|
||||
|
||||
// 6. Published benchmark table and local learned model outcomes
|
||||
"function readJson(path) {",
|
||||
" if (!path || !existsSync(path)) return null;",
|
||||
" try { return JSON.parse(readFileSync(path, 'utf-8')); } catch { return null; }",
|
||||
"}",
|
||||
"function benchmarkRows(raw) {",
|
||||
" if (!raw || typeof raw !== 'object') return [];",
|
||||
" return Object.entries(raw)",
|
||||
" .filter(([modelId]) => !modelId.startsWith('_'))",
|
||||
" .map(([modelId, row]) => ({ modelId, ...(row && typeof row === 'object' ? row : {}) }))",
|
||||
" .sort((a, b) => String(a.modelId).localeCompare(String(b.modelId)));",
|
||||
"}",
|
||||
"function performanceRows(raw) {",
|
||||
" if (!raw || typeof raw !== 'object') return [];",
|
||||
" const rows = [];",
|
||||
" for (const [unitType, models] of Object.entries(raw)) {",
|
||||
" if (!models || typeof models !== 'object') continue;",
|
||||
" for (const [modelId, value] of Object.entries(models)) {",
|
||||
" if (!value || typeof value !== 'object') continue;",
|
||||
" const aggregate = value.aggregate && typeof value.aggregate === 'object' ? value.aggregate : {};",
|
||||
" rows.push({",
|
||||
" unitType,",
|
||||
" modelId,",
|
||||
" successes: Number(aggregate.successes ?? 0),",
|
||||
" failures: Number(aggregate.failures ?? 0),",
|
||||
" timeouts: Number(aggregate.timeouts ?? 0),",
|
||||
" totalTokens: Number(aggregate.totalTokens ?? 0),",
|
||||
" totalCost: Number(aggregate.totalCost ?? 0),",
|
||||
" lastUsed: aggregate.lastUsed ?? null,",
|
||||
" });",
|
||||
" }",
|
||||
" }",
|
||||
" return rows.sort((a, b) => String(b.lastUsed ?? '').localeCompare(String(a.lastUsed ?? '')));",
|
||||
"}",
|
||||
"const modelBenchmarks = benchmarkRows(readJson(process.env.SF_SETTINGS_BENCHMARKS_PATH));",
|
||||
"const modelPerformance = performanceRows(readJson(process.env.SF_SETTINGS_MODEL_PERFORMANCE_PATH));",
|
||||
|
||||
// Write combined payload
|
||||
"process.stdout.write(JSON.stringify({ preferences, routingConfig, budgetAllocation, routingHistory, projectTotals }));",
|
||||
"process.stdout.write(JSON.stringify({ preferences, routingConfig, budgetAllocation, routingHistory, projectTotals, modelBenchmarks, modelPerformance }));",
|
||||
].join(" ");
|
||||
|
||||
const prefixArgs = buildSubprocessPrefixArgs(
|
||||
|
|
@ -196,6 +241,8 @@ export async function collectSettingsData(
|
|||
SF_SETTINGS_HISTORY_MODULE: historyPath,
|
||||
SF_SETTINGS_METRICS_MODULE: metricsPath,
|
||||
SF_SETTINGS_BASE: projectCwd,
|
||||
SF_SETTINGS_BENCHMARKS_PATH: benchmarksPath,
|
||||
SF_SETTINGS_MODEL_PERFORMANCE_PATH: performancePath,
|
||||
},
|
||||
maxBuffer: SETTINGS_MAX_BUFFER,
|
||||
windowsHide: true,
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@ import { Button } from "@/components/ui/button";
|
|||
import { authFetch } from "@/lib/auth";
|
||||
import type {
|
||||
SettingsData,
|
||||
SettingsModelBenchmark,
|
||||
SettingsModelPerformance,
|
||||
SettingsPatternHistory,
|
||||
SettingsRoutingHistory,
|
||||
} from "@/lib/settings-types";
|
||||
|
|
@ -438,10 +440,68 @@ function TierOutcomeBadge({
|
|||
);
|
||||
}
|
||||
|
||||
function normalizeModelId(id: string): string {
|
||||
return id.includes("/") ? (id.split("/").pop() ?? id) : id;
|
||||
}
|
||||
|
||||
function formatBenchmarkScore(value: number | null | undefined): string {
|
||||
return typeof value === "number" && Number.isFinite(value)
|
||||
? value.toFixed(1)
|
||||
: "–";
|
||||
}
|
||||
|
||||
function aggregateModelPerformance(
|
||||
rows: SettingsModelPerformance[],
|
||||
modelId: string,
|
||||
): { runs: number; successRate: string; cost: string } {
|
||||
const bare = normalizeModelId(modelId);
|
||||
const matched = rows.filter(
|
||||
(row) =>
|
||||
row.modelId === modelId ||
|
||||
row.modelId.endsWith(`/${bare}`) ||
|
||||
normalizeModelId(row.modelId) === bare,
|
||||
);
|
||||
const totals = matched.reduce(
|
||||
(acc, row) => {
|
||||
acc.successes += row.successes;
|
||||
acc.failures += row.failures;
|
||||
acc.timeouts += row.timeouts;
|
||||
acc.cost += row.totalCost;
|
||||
return acc;
|
||||
},
|
||||
{ successes: 0, failures: 0, timeouts: 0, cost: 0 },
|
||||
);
|
||||
const runs = totals.successes + totals.failures + totals.timeouts;
|
||||
return {
|
||||
runs,
|
||||
successRate:
|
||||
runs > 0 ? `${Math.round((totals.successes / runs) * 100)}%` : "–",
|
||||
cost: runs > 0 ? formatCost(totals.cost) : "–",
|
||||
};
|
||||
}
|
||||
|
||||
function rankedBenchmarks(
|
||||
benchmarks: SettingsModelBenchmark[],
|
||||
): SettingsModelBenchmark[] {
|
||||
return [...benchmarks]
|
||||
.sort((a, b) => {
|
||||
const score = (row: SettingsModelBenchmark) =>
|
||||
(row.swe_bench_verified ?? row.swe_bench ?? 0) * 0.35 +
|
||||
(row.live_code_bench ?? 0) * 0.25 +
|
||||
(row.hle ?? 0) * 0.15 +
|
||||
(row.gpqa ?? 0) * 0.15 +
|
||||
(row.instruction_following ?? 0) * 0.1;
|
||||
return score(b) - score(a);
|
||||
})
|
||||
.slice(0, 12);
|
||||
}
|
||||
|
||||
export function ModelRoutingPanel() {
|
||||
const { state, data, busy, refresh } = useSettingsData();
|
||||
const routingConfig = data?.routingConfig ?? null;
|
||||
const routingHistory = data?.routingHistory ?? null;
|
||||
const modelBenchmarks = rankedBenchmarks(data?.modelBenchmarks ?? []);
|
||||
const modelPerformance = data?.modelPerformance ?? [];
|
||||
|
||||
return (
|
||||
<div className="space-y-4" data-testid="settings-model-routing">
|
||||
|
|
@ -569,6 +629,73 @@ export function ModelRoutingPanel() {
|
|||
) : (
|
||||
<SettingsEmpty message="No routing history yet" />
|
||||
)}
|
||||
|
||||
{/* Model benchmarks */}
|
||||
{modelBenchmarks.length > 0 ? (
|
||||
<div className="space-y-2">
|
||||
<h4 className="text-[11px] font-medium text-muted-foreground">
|
||||
Model Benchmarks
|
||||
</h4>
|
||||
<div className="overflow-x-auto rounded-lg border border-border/50 bg-card/50">
|
||||
<table className="w-full text-left text-xs">
|
||||
<thead className="border-b border-border/50 text-[10px] uppercase text-muted-foreground">
|
||||
<tr>
|
||||
<th className="px-3 py-2 font-medium">Model</th>
|
||||
<th className="px-2 py-2 font-medium">SWE</th>
|
||||
<th className="px-2 py-2 font-medium">LCB</th>
|
||||
<th className="px-2 py-2 font-medium">HLE</th>
|
||||
<th className="px-2 py-2 font-medium">GPQA</th>
|
||||
<th className="px-2 py-2 font-medium">Local</th>
|
||||
<th className="px-3 py-2 font-medium">Cost</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{modelBenchmarks.map((row) => {
|
||||
const local = aggregateModelPerformance(
|
||||
modelPerformance,
|
||||
row.modelId,
|
||||
);
|
||||
return (
|
||||
<tr
|
||||
key={row.modelId}
|
||||
className="border-b border-border/30 last:border-0"
|
||||
title={row.source ?? undefined}
|
||||
>
|
||||
<td className="max-w-[180px] truncate px-3 py-2 font-mono text-[11px] text-foreground/85">
|
||||
{row.modelId}
|
||||
</td>
|
||||
<td className="px-2 py-2 tabular-nums">
|
||||
{formatBenchmarkScore(
|
||||
row.swe_bench_verified ?? row.swe_bench,
|
||||
)}
|
||||
</td>
|
||||
<td className="px-2 py-2 tabular-nums">
|
||||
{formatBenchmarkScore(row.live_code_bench)}
|
||||
</td>
|
||||
<td className="px-2 py-2 tabular-nums">
|
||||
{formatBenchmarkScore(row.hle)}
|
||||
</td>
|
||||
<td className="px-2 py-2 tabular-nums">
|
||||
{formatBenchmarkScore(row.gpqa)}
|
||||
</td>
|
||||
<td className="px-2 py-2 tabular-nums">
|
||||
{local.runs > 0
|
||||
? `${local.successRate} / ${local.runs}`
|
||||
: "–"}
|
||||
</td>
|
||||
<td className="px-3 py-2 tabular-nums">
|
||||
{local.cost}
|
||||
</td>
|
||||
</tr>
|
||||
);
|
||||
})}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<SettingsEmpty message="No benchmark data available" />
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
|
@ -775,7 +902,7 @@ export function RemoteQuestionsPanel() {
|
|||
const { data, busy, refresh } = useSettingsData();
|
||||
const existingConfig = data?.preferences?.remoteQuestions ?? null;
|
||||
|
||||
const [_envVarSet, setEnvVarSet] = useState(false);
|
||||
const [, setEnvVarSet] = useState(false);
|
||||
const [envVarName, setEnvVarName] = useState<string | null>(null);
|
||||
const [apiLoading, setApiLoading] = useState(true);
|
||||
const [tokenSet, setTokenSet] = useState(false);
|
||||
|
|
|
|||
|
|
@ -83,6 +83,35 @@ export interface SettingsProjectTotals {
|
|||
userMessages: number;
|
||||
}
|
||||
|
||||
// ─── Model Benchmark And Local Outcome Data ─────────────────────────────────
|
||||
|
||||
export interface SettingsModelBenchmark {
|
||||
modelId: string;
|
||||
swe_bench?: number | null;
|
||||
swe_bench_verified?: number | null;
|
||||
live_code_bench?: number | null;
|
||||
human_eval?: number | null;
|
||||
hle?: number | null;
|
||||
aime_2026?: number | null;
|
||||
gpqa?: number | null;
|
||||
mmlu_pro?: number | null;
|
||||
instruction_following?: number | null;
|
||||
context_window?: number | null;
|
||||
max_output_tokens?: number | null;
|
||||
source?: string | null;
|
||||
}
|
||||
|
||||
export interface SettingsModelPerformance {
|
||||
unitType: string;
|
||||
modelId: string;
|
||||
successes: number;
|
||||
failures: number;
|
||||
timeouts: number;
|
||||
totalTokens: number;
|
||||
totalCost: number;
|
||||
lastUsed: string | null;
|
||||
}
|
||||
|
||||
// ─── Effective Preferences ────────────────────────────────────────────────────
|
||||
|
||||
export interface SettingsPreferencesData {
|
||||
|
|
@ -124,4 +153,6 @@ export interface SettingsData {
|
|||
budgetAllocation: SettingsBudgetAllocation;
|
||||
routingHistory: SettingsRoutingHistory | null;
|
||||
projectTotals: SettingsProjectTotals | null;
|
||||
modelBenchmarks: SettingsModelBenchmark[];
|
||||
modelPerformance: SettingsModelPerformance[];
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue