fix(sf): recover model routes and self-feedback

This commit is contained in:
Mikael Hugo 2026-05-02 22:07:10 +02:00
parent c308a492d7
commit dd126ddc8b
37 changed files with 4295 additions and 563 deletions

View file

@ -27,6 +27,7 @@ class MockRpcClient {
stopped = false;
aborted = false;
prompted: string[] = [];
switchedSessions: string[] = [];
private eventListeners: Array<(event: Record<string, unknown>) => void> = [];
uiResponses: Array<{ requestId: string; response: Record<string, unknown> }> = [];
@ -69,6 +70,16 @@ class MockRpcClient {
async prompt(message: string): Promise<void> {
this.prompted.push(message);
if (message === '/sf pause') {
queueMicrotask(() => {
this.emitEvent({
type: 'extension_ui_request',
id: 'pause-notice',
method: 'notify',
message: 'Auto-mode paused: daemon reload requested',
});
});
}
}
async abort(): Promise<void> {
@ -79,6 +90,18 @@ class MockRpcClient {
this.uiResponses.push({ requestId, response });
}
async getState(): Promise<{ sessionFile: string; sessionId: string }> {
return {
sessionFile: `/tmp/${this.initSessionId}.jsonl`,
sessionId: this.initSessionId,
};
}
async switchSession(sessionPath: string): Promise<{ cancelled: boolean }> {
this.switchedSessions.push(sessionPath);
return { cancelled: false };
}
/** Test helper — emit an event to all listeners */
emitEvent(event: Record<string, unknown>): void {
for (const listener of this.eventListeners) {
@ -98,6 +121,15 @@ class TestableSessionManager extends SessionManager {
nextInitError: Error | null = null;
nextStartError: Error | null = null;
protected override createRpcClient(_cliPath: string, cwd: string, args: string[]): any {
this.sessionCounter++;
const client = new MockRpcClient({ cwd, args });
client.initSessionId = `mock-session-${String(this.sessionCounter).padStart(3, '0')}`;
this.lastClient = client;
this.allClients.push(client);
return client;
}
override async startSession(options: { projectDir: string; command?: string; model?: string; bare?: boolean; cliPath?: string }): Promise<string> {
const { projectDir } = options;
@ -116,7 +148,7 @@ class TestableSessionManager extends SessionManager {
);
}
const client = new MockRpcClient({ cwd: resolvedDir, args: [] });
const client = this.createRpcClient('mock-sf', resolvedDir, []);
if (this.nextStartError) {
client.startError = this.nextStartError;
this.nextStartError = null;
@ -126,22 +158,19 @@ class TestableSessionManager extends SessionManager {
this.nextInitError = null;
}
this.sessionCounter++;
client.initSessionId = `mock-session-${String(this.sessionCounter).padStart(3, '0')}`;
this.lastClient = client;
this.allClients.push(client);
// Build session shell
const session: ManagedSession = {
sessionId: '',
projectDir: resolvedDir,
projectName,
status: 'starting',
reloadState: 'running',
client: client as any, // duck-typed mock
events: [],
pendingBlocker: null,
cost: { totalCost: 0, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } },
startTime: Date.now(),
startOptions: { ...options, projectDir: resolvedDir },
};
// Insert into internal sessions map
@ -300,6 +329,38 @@ describe('SessionManager', () => {
assert.equal(completedLogs.length, 1);
});
it('runtime epoch mismatch restarts child and resumes prior session file', async () => {
const { manager } = createManager();
const sessionId = await manager.startSession({ projectDir: '/tmp/reload-project' });
const originalClient = manager.lastClient!;
const restarted = new Promise<void>((resolve) => {
manager.once('session:restarted', () => resolve());
});
originalClient.emitEvent({
type: 'runtime_heartbeat',
sessionId,
sessionFile: '/tmp/reload-session.jsonl',
unitType: 'execute-task',
unitId: 'M001/S01/T01',
runtimeEpoch: 100,
sourceEpoch: 200,
emittedAt: Date.now(),
});
await restarted;
const session = manager.getSession('mock-session-002')!;
assert.ok(session);
assert.equal(originalClient.stopped, true);
assert.equal(manager.allClients.length, 2);
const replacement = manager.allClients[1];
assert.deepEqual(replacement.switchedSessions, ['/tmp/mock-session-001.jsonl']);
assert.deepEqual(replacement.prompted, ['/sf autonomous']);
assert.equal(session.reloadState, 'running');
});
// ---- Lifecycle: start → running → blocked → resolve → running → completed ----
it('start → blocked → resolve → running → completed lifecycle', async () => {
@ -723,8 +784,10 @@ describe('SessionManager', () => {
assert.equal(result.sessionId, sessionId);
assert.equal(result.status, 'running');
assert.equal(result.reloadState, 'running');
assert.equal(result.projectName, 'result-test');
assert.equal(result.error, null);
assert.equal(result.lastHeartbeat, null);
assert.equal(result.pendingBlocker, null);
assert.ok(typeof result.durationMs === 'number');
assert.ok(result.cost);

View file

@ -22,6 +22,7 @@ import type {
ManagedSession,
StartSessionOptions,
PendingBlocker,
RuntimeHeartbeat,
} from './types.js';
import { MAX_EVENTS, INIT_TIMEOUT_MS } from './types.js';
import type { Logger } from './logger.js';
@ -34,7 +35,8 @@ const FIRE_AND_FORGET_METHODS = new Set([
'notify', 'setStatus', 'setWidget', 'setTitle', 'set_editor_text',
]);
const TERMINAL_PREFIXES = ['auto-mode stopped', 'step-mode stopped'];
const TERMINAL_PREFIXES = ['auto-mode stopped', 'auto-mode paused', 'step-mode stopped'];
const RELOAD_PAUSE_TIMEOUT_MS = 5_000;
function isTerminalNotification(event: Record<string, unknown>): boolean {
if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false;
@ -45,7 +47,7 @@ function isTerminalNotification(event: Record<string, unknown>): boolean {
function isBlockedNotification(event: Record<string, unknown>): boolean {
if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false;
const message = String(event.message ?? '').toLowerCase();
return message.includes('blocked:');
return message.includes('blocked:') || message.startsWith('auto-mode paused');
}
function isBlockingUIRequest(event: Record<string, unknown>): boolean {
@ -96,11 +98,7 @@ export class SessionManager extends EventEmitter {
if (options.model) args.push('--model', options.model);
if (options.bare) args.push('--bare');
const client = new RpcClient({
cliPath,
cwd: resolvedDir,
args,
});
const client = this.createRpcClient(cliPath, resolvedDir, args);
// Build the session shell before async operations so we can track state
const session: ManagedSession = {
@ -108,11 +106,13 @@ export class SessionManager extends EventEmitter {
projectDir: resolvedDir,
projectName,
status: 'starting',
reloadState: 'running',
client,
events: [],
pendingBlocker: null,
cost: { totalCost: 0, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } },
startTime: Date.now(),
startOptions: { ...options, projectDir: resolvedDir },
};
// Insert into map early (keyed by dir) so concurrent starts are rejected
@ -231,6 +231,18 @@ export class SessionManager extends EventEmitter {
this.logger.info('session cancelled', { sessionId, projectDir: session.projectDir });
}
/**
* Restart a managed RPC child and resume the same persisted session when possible.
*
* Purpose: make daemon-managed auto sessions pick up changed runtime/source
* files at process boundaries instead of trying unsafe in-process hot reload.
*/
async reloadSession(sessionId: string, reason = 'runtime epoch changed'): Promise<void> {
const session = this.getSession(sessionId);
if (!session) throw new Error(`Session not found: ${sessionId}`);
await this.restartSession(session, reason);
}
/**
* Build a HeadlessJsonResult-shaped object from accumulated session state.
*/
@ -245,9 +257,11 @@ export class SessionManager extends EventEmitter {
projectDir: session.projectDir,
projectName: session.projectName,
status: session.status,
reloadState: session.reloadState ?? 'running',
durationMs,
cost: session.cost,
recentEvents: session.events.slice(-10),
lastHeartbeat: session.lastHeartbeat ?? null,
pendingBlocker: session.pendingBlocker
? { id: session.pendingBlocker.id, method: session.pendingBlocker.method, message: session.pendingBlocker.message }
: null,
@ -311,6 +325,10 @@ export class SessionManager extends EventEmitter {
this.logger.debug('session event', { sessionId: session.sessionId, type: (event as Record<string, unknown>).type as string });
this.emit('session:event', { sessionId: session.sessionId, projectDir: session.projectDir, event });
if ((event as Record<string, unknown>).type === 'runtime_heartbeat') {
this.handleRuntimeHeartbeat(session, event as unknown as RuntimeHeartbeat);
}
// Cost tracking (K004 — cumulative-max)
if ((event as Record<string, unknown>).type === 'cost_update') {
const costEvent = event as unknown as RpcCostUpdateEvent;
@ -371,6 +389,135 @@ export class SessionManager extends EventEmitter {
});
}
}
private handleRuntimeHeartbeat(session: ManagedSession, heartbeat: RuntimeHeartbeat): void {
session.lastHeartbeat = heartbeat;
if (heartbeat.runtimeEpoch === heartbeat.sourceEpoch) return;
if (session.reloadState === 'reloading') return;
if (session.status !== 'running' && session.status !== 'blocked') return;
this.logger.info('runtime epoch mismatch detected', {
sessionId: session.sessionId,
projectDir: session.projectDir,
unitType: heartbeat.unitType,
unitId: heartbeat.unitId,
runtimeEpoch: heartbeat.runtimeEpoch,
sourceEpoch: heartbeat.sourceEpoch,
});
void this.restartSession(session, 'runtime epoch changed').catch((err) => {
session.reloadState = 'reload_failed';
session.status = 'error';
session.error = err instanceof Error ? err.message : String(err);
this.logger.error('session reload failed', {
sessionId: session.sessionId,
projectDir: session.projectDir,
error: session.error,
});
this.emit('session:error', {
sessionId: session.sessionId,
projectDir: session.projectDir,
projectName: session.projectName,
error: session.error,
});
});
}
private async restartSession(session: ManagedSession, reason: string): Promise<void> {
if (session.reloadState === 'reloading') return;
session.reloadState = 'reloading';
let sessionFile = session.lastHeartbeat?.sessionFile;
try {
const state = await session.client.getState();
sessionFile = state.sessionFile ?? sessionFile;
} catch {
// Best effort: a wedged child may not answer state requests.
}
try {
await session.client.prompt('/sf pause');
await waitFor(
() => session.status === 'blocked' || session.status === 'completed' || session.status === 'cancelled',
RELOAD_PAUSE_TIMEOUT_MS,
);
} catch {
// Timeout or prompt failure: stop() escalates SIGTERM to SIGKILL.
}
session.unsubscribe?.();
try {
await session.client.stop();
} catch {
// stop() is best-effort; subsequent start creates a new child.
}
const opts = session.startOptions ?? { projectDir: session.projectDir };
const cliPath = opts.cliPath ?? SessionManager.resolveCLIPath();
const args: string[] = ['--mode', 'rpc'];
if (opts.model) args.push('--model', opts.model);
if (opts.bare) args.push('--bare');
const client = this.createRpcClient(cliPath, session.projectDir, args);
await Promise.race([
client.start(),
timeout(INIT_TIMEOUT_MS, `RpcClient.start() timed out after ${INIT_TIMEOUT_MS}ms`),
]);
const initResult: RpcInitResult = await Promise.race([
client.init(),
timeout(INIT_TIMEOUT_MS, `RpcClient.init() timed out after ${INIT_TIMEOUT_MS}ms`),
]) as RpcInitResult;
session.client = client;
session.sessionId = initResult.sessionId;
session.status = 'running';
session.pendingBlocker = null;
session.reloadState = 'restarted';
session.error = undefined;
session.startOptions = { ...opts, projectDir: session.projectDir };
session.unsubscribe = client.onEvent((event: SdkAgentEvent) => {
this.handleEvent(session, event);
});
if (sessionFile) {
try {
await client.switchSession(sessionFile);
} catch (err) {
this.logger.warn('session reload could not switch to previous session file', {
sessionId: session.sessionId,
projectDir: session.projectDir,
sessionFile,
error: err instanceof Error ? err.message : String(err),
});
}
}
await client.prompt(opts.command ?? '/sf autonomous');
session.reloadState = 'running';
this.logger.info('session reloaded', {
sessionId: session.sessionId,
projectDir: session.projectDir,
reason,
resumedSessionFile: sessionFile,
});
this.emit('session:restarted', {
sessionId: session.sessionId,
projectDir: session.projectDir,
projectName: session.projectName,
reason,
sessionFile,
});
}
protected createRpcClient(cliPath: string, cwd: string, args: string[]): RpcClient {
return new RpcClient({
cliPath,
cwd,
args,
});
}
}
// ---------------------------------------------------------------------------
@ -383,6 +530,24 @@ function timeout(ms: number, message: string): Promise<never> {
});
}
function waitFor(predicate: () => boolean, timeoutMs: number): Promise<void> {
if (predicate()) return Promise.resolve();
return new Promise((resolve, reject) => {
const startedAt = Date.now();
const interval = setInterval(() => {
if (predicate()) {
clearInterval(interval);
resolve();
return;
}
if (Date.now() - startedAt >= timeoutMs) {
clearInterval(interval);
reject(new Error(`Timed out after ${timeoutMs}ms`));
}
}, 100);
});
}
function extractBlocker(event: SdkAgentEvent): PendingBlocker {
const uiEvent = event as unknown as RpcExtensionUIRequest;
return {

View file

@ -57,6 +57,17 @@ export interface DaemonConfig {
// ---------------------------------------------------------------------------
export type SessionStatus = 'starting' | 'running' | 'blocked' | 'completed' | 'error' | 'cancelled';
export type ReloadState = 'running' | 'reloading' | 'restarted' | 'reload_failed';
export interface RuntimeHeartbeat {
sessionId: string;
sessionFile?: string;
unitType?: string;
unitId?: string;
runtimeEpoch: number;
sourceEpoch: number;
emittedAt: number;
}
// ---------------------------------------------------------------------------
// Managed Session
@ -78,6 +89,9 @@ export interface ManagedSession {
/** Current lifecycle status */
status: SessionStatus;
/** Daemon-managed runtime reload state */
reloadState?: ReloadState;
/** The RpcClient instance managing the agent process */
client: RpcClient;
@ -96,6 +110,12 @@ export interface ManagedSession {
/** Error message if status is 'error' */
error?: string;
/** Latest runtime heartbeat received from the RPC child */
lastHeartbeat?: RuntimeHeartbeat;
/** Original session start options used for daemon-managed restarts */
startOptions?: StartSessionOptions;
/** Cleanup function to unsubscribe from events */
unsubscribe?: () => void;
}

View file

@ -0,0 +1,70 @@
import assert from "node:assert/strict";
import { describe, test, vi } from "vitest";
import type { Context, Model } from "../types.js";
const geminiCliCore = vi.hoisted(() => ({
retryError: undefined as Error | undefined,
retryOptions: undefined as Record<string, unknown> | undefined,
}));
vi.mock("@google/gemini-cli-core", () => ({
AuthType: { LOGIN_WITH_GOOGLE: "LOGIN_WITH_GOOGLE" },
CodeAssistServer: class {
async generateContentStream(): Promise<AsyncGenerator<unknown>> {
return (async function* emptyStream() {})();
}
},
getOauthClient: vi.fn(async () => ({})),
makeFakeConfig: vi.fn(() => ({})),
retryWithBackoff: vi.fn(async (_fn: unknown, options: Record<string, unknown>) => {
geminiCliCore.retryOptions = options;
throw geminiCliCore.retryError ?? new Error("quota exhausted");
}),
setupUser: vi.fn(async () => ({ projectId: "test-project" })),
}));
import { streamGoogleGeminiCli } from "./google-gemini-cli.js";
function makeModel(): Model<"google-gemini-cli"> {
return {
id: "gemini-3-flash-preview",
name: "Gemini 3 Flash Preview",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: "",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 1_000_000,
maxTokens: 8192,
};
}
function makeContext(): Context {
return {
messages: [{ role: "user", content: "hello", timestamp: 0 }],
};
}
describe("google-gemini-cli provider retry ownership", () => {
test("google_gemini_cli_when_quota_resets_soon_returns_error_to_caller_without_cli_retry_loop", async () => {
geminiCliCore.retryOptions = undefined;
geminiCliCore.retryError = Object.assign(
new Error(
"You have exhausted your capacity on this model. Your quota will reset after 54s.",
),
{ retryDelayMs: 54_000 },
);
const stream = streamGoogleGeminiCli(makeModel(), makeContext());
const result = await stream.result();
const retryOptions = geminiCliCore.retryOptions as
| { maxAttempts?: unknown }
| undefined;
assert.equal(retryOptions?.maxAttempts, 1);
assert.equal(result.stopReason, "error");
assert.match(result.errorMessage ?? "", /exhausted your capacity/i);
assert.equal(result.retryAfterMs, 54_000);
});
});

View file

@ -5,7 +5,8 @@
* @google/gemini-cli-core the same library the real `gemini` CLI uses.
* cli-core reads ~/.gemini/oauth_creds.json itself, refreshes tokens,
* discovers the project (free-tier or whatever's onboarded server-side)
* via setupUser(), and handles all the User-Agent / retry / 429 details.
* via setupUser(), and handles all the User-Agent / quota-classification details.
* Request retry/fallback stays in the caller so SF can move to the next model.
*/
import {
@ -227,6 +228,9 @@ export const streamGoogleGeminiCli: StreamFunction<
() => server.generateContentStream(req as any, promptId, "USER" as any),
{
authType: AuthType.LOGIN_WITH_GOOGLE,
// SF owns cross-model fallback. Let cli-core classify quota errors,
// but do not let it hold the turn through its 10-attempt retry loop.
maxAttempts: 1,
signal: options?.signal,
},
);

View file

@ -12,6 +12,8 @@
*/
import * as crypto from "node:crypto";
import { existsSync, readFileSync, readdirSync, statSync } from "node:fs";
import { dirname, join, resolve } from "node:path";
import type { AgentSession } from "../../core/agent-session.js";
import { killTrackedDetachedChildren } from "../../utils/shell.js";
import type {
@ -34,6 +36,110 @@ import type {
RpcSlashCommand,
} from "./rpc-types.js";
const RUNTIME_HEARTBEAT_INTERVAL_MS = Number(
process.env.SF_RUNTIME_HEARTBEAT_INTERVAL_MS ?? 10_000,
);
function findRuntimeSourceRoot(): string {
const explicit =
process.env.SF_RUNTIME_SOURCE_ROOT ?? process.env.SF_SOURCE_ROOT;
if (explicit) return resolve(explicit);
let dir = resolve(dirname(process.argv[1] ?? process.cwd()));
while (true) {
if (existsSync(join(dir, "package.json")) && existsSync(join(dir, "src"))) {
return dir;
}
const parent = dirname(dir);
if (parent === dir) return process.cwd();
dir = parent;
}
}
function newestSourceMtimeMs(root: string): number {
let newest = 0;
const skip = new Set([
".git",
".sf",
"dist",
"node_modules",
"target",
".next",
"coverage",
]);
const stack = [root];
while (stack.length > 0) {
const dir = stack.pop()!;
let entries: import("node:fs").Dirent[];
try {
entries = readdirSync(dir, { withFileTypes: true });
} catch {
continue;
}
for (const entry of entries) {
if (skip.has(entry.name)) continue;
const full = join(dir, entry.name);
if (entry.isDirectory()) {
stack.push(full);
continue;
}
if (!entry.isFile() || !/\.(?:ts|tsx|mts|cts)$/.test(entry.name)) {
continue;
}
try {
newest = Math.max(newest, statSync(full).mtimeMs);
} catch {
// Ignore files that disappear during a scan.
}
}
}
return newest;
}
interface RuntimeUnitState {
unitType?: string;
unitId?: string;
sessionFile?: string;
}
function effectiveAutoLockFile(): string {
const milestoneLock = process.env.SF_PARALLEL_WORKER
? process.env.SF_MILESTONE_LOCK
: undefined;
return milestoneLock ? `auto-${milestoneLock}.lock` : "auto.lock";
}
function readRuntimeUnitState(): RuntimeUnitState {
const roots = [process.env.SF_PROJECT_ROOT, process.cwd()].filter(
(root): root is string => Boolean(root),
);
const seen = new Set<string>();
for (const root of roots) {
const resolvedRoot = resolve(root);
if (seen.has(resolvedRoot)) continue;
seen.add(resolvedRoot);
const lockPath = join(resolvedRoot, ".sf", effectiveAutoLockFile());
try {
if (!existsSync(lockPath)) continue;
const data = JSON.parse(readFileSync(lockPath, "utf-8")) as Record<
string,
unknown
>;
return {
unitType:
typeof data.unitType === "string" ? data.unitType : undefined,
unitId: typeof data.unitId === "string" ? data.unitId : undefined,
sessionFile:
typeof data.sessionFile === "string" ? data.sessionFile : undefined,
};
} catch {
// Heartbeats should never fail because lock metadata is temporarily absent
// or being rewritten.
}
}
return {};
}
// Re-export types for consumers
export type {
RpcCommand,
@ -519,6 +625,32 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
}
});
const runtimeSourceRoot = findRuntimeSourceRoot();
const runtimeEpoch = newestSourceMtimeMs(runtimeSourceRoot);
const emitRuntimeHeartbeat = () => {
const runtimeUnit = readRuntimeUnitState();
const heartbeat = {
type: "runtime_heartbeat" as const,
sessionId: session.sessionId,
sessionFile: runtimeUnit.sessionFile ?? session.sessionFile,
unitType: runtimeUnit.unitType,
unitId: runtimeUnit.unitId,
runtimeEpoch,
sourceEpoch: newestSourceMtimeMs(runtimeSourceRoot),
emittedAt: Date.now(),
};
if (!eventFilter || eventFilter.has("runtime_heartbeat")) {
output(heartbeat);
}
};
const runtimeHeartbeatTimer =
RUNTIME_HEARTBEAT_INTERVAL_MS > 0
? setInterval(emitRuntimeHeartbeat, RUNTIME_HEARTBEAT_INTERVAL_MS)
: undefined;
if (runtimeHeartbeatTimer) {
signalCleanupHandlers.push(() => clearInterval(runtimeHeartbeatTimer));
}
// Handle a single command
const handleCommand = async (command: RpcCommand): Promise<RpcResponse> => {
const id = command.id;
@ -901,7 +1033,7 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
protocolVersion: 2,
sessionId: session.sessionId,
capabilities: {
events: ["execution_complete", "cost_update"],
events: ["execution_complete", "cost_update", "runtime_heartbeat"],
commands: ["init", "shutdown", "subscribe"],
},
};

View file

@ -148,7 +148,7 @@ describe("v2 type shapes", () => {
protocolVersion: 2,
sessionId: "test-session-123",
capabilities: {
events: ["execution_complete", "cost_update"],
events: ["execution_complete", "cost_update", "runtime_heartbeat"],
commands: ["init", "shutdown", "subscribe"],
},
};
@ -158,6 +158,7 @@ describe("v2 type shapes", () => {
assert.ok(Array.isArray(initResult.capabilities.commands));
assert.ok(initResult.capabilities.events.includes("execution_complete"));
assert.ok(initResult.capabilities.events.includes("cost_update"));
assert.ok(initResult.capabilities.events.includes("runtime_heartbeat"));
assert.ok(initResult.capabilities.commands.includes("init"));
assert.ok(initResult.capabilities.commands.includes("shutdown"));
assert.ok(initResult.capabilities.commands.includes("subscribe"));
@ -231,6 +232,16 @@ describe("v2 type shapes", () => {
cumulativeCost: 0.03,
tokens: { input: 100, output: 50, cacheRead: 10, cacheWrite: 5 },
},
{
type: "runtime_heartbeat",
sessionId: "s1",
sessionFile: "/tmp/s1.jsonl",
unitType: "execute-task",
unitId: "M001/S01/T01",
runtimeEpoch: 100,
sourceEpoch: 101,
emittedAt: 123,
},
];
for (const event of events) {
@ -242,6 +253,9 @@ describe("v2 type shapes", () => {
// TypeScript narrows to RpcCostUpdateEvent
assert.ok("turnCost" in event);
assert.ok("tokens" in event);
} else if (event.type === "runtime_heartbeat") {
assert.ok("runtimeEpoch" in event);
assert.ok("sourceEpoch" in event);
} else {
assert.fail(`Unexpected event type: ${(event as any).type}`);
}
@ -569,7 +583,7 @@ describe("Client ↔ Mock server protocol exchange", () => {
protocolVersion: 2,
sessionId: "sess-abc",
capabilities: {
events: ["execution_complete", "cost_update"],
events: ["execution_complete", "cost_update", "runtime_heartbeat"],
commands: ["init", "shutdown", "subscribe"],
},
};

View file

@ -273,8 +273,23 @@ export interface RpcCostUpdateEvent {
};
}
/** Runtime heartbeat emitted by long-lived RPC children for daemon reload supervision. */
export interface RpcRuntimeHeartbeatEvent {
type: "runtime_heartbeat";
sessionId: string;
sessionFile?: string;
unitType?: string;
unitId?: string;
runtimeEpoch: number;
sourceEpoch: number;
emittedAt: number;
}
/** Discriminated union of all v2-only event types */
export type RpcV2Event = RpcExecutionCompleteEvent | RpcCostUpdateEvent;
export type RpcV2Event =
| RpcExecutionCompleteEvent
| RpcCostUpdateEvent
| RpcRuntimeHeartbeatEvent;
// ============================================================================
// Extension UI Events (stdout)

View file

@ -216,7 +216,7 @@ describe("type shapes", () => {
assert.equal(v2, 2);
});
it("RpcV2Event discriminated union covers both event types", () => {
it("RpcV2Event discriminated union covers protocol event types", () => {
const events: RpcV2Event[] = [
{
type: "execution_complete",
@ -241,10 +241,19 @@ describe("type shapes", () => {
cumulativeCost: 0.001,
tokens: { input: 100, output: 50, cacheRead: 0, cacheWrite: 0 },
},
{
type: "runtime_heartbeat",
sessionId: "s1",
sessionFile: "/tmp/s1.jsonl",
runtimeEpoch: 100,
sourceEpoch: 100,
emittedAt: 123,
},
];
assert.equal(events.length, 2);
assert.equal(events.length, 3);
assert.equal(events[0].type, "execution_complete");
assert.equal(events[1].type, "cost_update");
assert.equal(events[2].type, "runtime_heartbeat");
});
});

View file

@ -336,8 +336,23 @@ export interface RpcCostUpdateEvent {
};
}
/** Runtime heartbeat emitted by long-lived RPC children for daemon reload supervision. */
export interface RpcRuntimeHeartbeatEvent {
type: "runtime_heartbeat";
sessionId: string;
sessionFile?: string;
unitType?: string;
unitId?: string;
runtimeEpoch: number;
sourceEpoch: number;
emittedAt: number;
}
/** Discriminated union of all v2-only event types */
export type RpcV2Event = RpcExecutionCompleteEvent | RpcCostUpdateEvent;
export type RpcV2Event =
| RpcExecutionCompleteEvent
| RpcCostUpdateEvent
| RpcRuntimeHeartbeatEvent;
// ============================================================================
// Extension UI Events (stdout)

View file

@ -15,8 +15,9 @@
* bypassing the extension loader's jiti setup (#1137).
*/
import { existsSync, readdirSync, readFileSync } from "node:fs";
import { homedir } from "node:os";
import { join } from "node:path";
import { dirname, join } from "node:path";
import { createJiti } from "@mariozechner/jiti";
import { resolveBundledSourceResource } from "./bundled-resource-path.js";
import type { SFState } from "./resources/extensions/sf/types.js";
@ -33,7 +34,6 @@ const agentExtensionsDir = join(
"extensions",
"sf",
);
const { existsSync } = await import("node:fs");
const useAgentDir = existsSync(join(agentExtensionsDir, "state.js"));
const sfExtensionPath = (moduleName: string) =>
useAgentDir
@ -46,10 +46,7 @@ const sfExtensionPath = (moduleName: string) =>
);
async function loadExtensionModules() {
const stateModule = (await jiti.import(
sfExtensionPath("state"),
{},
)) as any;
const stateModule = (await jiti.import(sfExtensionPath("state"), {})) as any;
const dispatchModule = (await jiti.import(
sfExtensionPath("auto-dispatch"),
{},
@ -86,6 +83,43 @@ async function loadExtensionModules() {
// ─── Types ──────────────────────────────────────────────────────────────────
type RuntimeDispatchDecisionSummary = {
action: "dispatch" | "retry" | "notify" | "block" | "skip";
reasonCode:
| "no-runtime-record"
| "queued"
| "retry-budget-available"
| "terminal-ready-to-notify"
| "retry-budget-exhausted"
| "synthetic-reset-required"
| "already-notified"
| "active-or-claimed"
| "notified"
| "terminal-nonretryable";
retryCount: number;
maxRetries: number;
retryBudgetRemaining: number;
};
type RuntimeUnitSummary = {
unitType: string;
unitId: string;
phase: string;
status: string;
startedAt: number | null;
updatedAt: number | null;
retryCount: number;
maxRetries: number;
retryBudgetRemaining: number;
lastHeartbeatAt: number | null;
lastProgressAt: number | null;
lastOutputAt: number | null;
outputPath: string | null;
watchdogReason: string | null;
notifiedAt: number | null;
dispatchDecision: RuntimeDispatchDecisionSummary;
};
export interface QuerySnapshot {
schemaVersion: 1;
state: SFState;
@ -105,6 +139,9 @@ export interface QuerySnapshot {
}>;
total: number;
};
runtime: {
units: RuntimeUnitSummary[];
};
}
export interface QueryResult {
@ -114,6 +151,192 @@ export interface QueryResult {
// ─── Implementation ─────────────────────────────────────────────────────────
const QUERY_TERMINAL_STATUSES = new Set([
"completed",
"failed",
"blocked",
"cancelled",
"stale",
"runaway-recovered",
]);
const QUERY_RETRYABLE_TERMINAL_STATUSES = new Set([
"failed",
"stale",
"runaway-recovered",
]);
const DEFAULT_QUERY_MAX_RETRIES = 1;
function resolveSfRootForQuery(basePath: string): string {
let current = basePath;
while (true) {
const candidate = join(current, ".sf");
if (existsSync(candidate)) return candidate;
const parent = dirname(current);
if (parent === current) return join(basePath, ".sf");
current = parent;
}
}
function stringField(value: unknown, fallback = ""): string {
return typeof value === "string" ? value : fallback;
}
function numberField(value: unknown): number | null {
return typeof value === "number" && Number.isFinite(value) ? value : null;
}
function inferQueryStatus(
phase: string,
record: Record<string, unknown>,
): string {
switch (phase) {
case "queued":
case "claimed":
case "running":
case "progress":
case "completed":
case "failed":
case "blocked":
case "cancelled":
case "stale":
case "runaway-recovered":
case "notified":
return phase;
case "dispatched":
return "running";
case "wrapup-warning-sent":
case "runaway-warning-sent":
case "runaway-final-warning-sent":
case "recovered":
return "progress";
case "timeout":
return "stale";
case "finalized":
return "completed";
case "paused":
return record.runawayGuardPause ? "runaway-recovered" : "blocked";
case "skipped":
return "blocked";
default:
return "running";
}
}
function queryRuntimeDecision(input: {
unitType: string;
unitId: string;
status: string;
retryCount: number;
maxRetries: number;
notifiedAt: number | null;
}): RuntimeDispatchDecisionSummary {
const retryBudgetRemaining = Math.max(0, input.maxRetries - input.retryCount);
const common = {
retryCount: input.retryCount,
maxRetries: input.maxRetries,
retryBudgetRemaining,
};
if (input.notifiedAt !== null) {
return { action: "skip", reasonCode: "already-notified", ...common };
}
if (input.status === "notified") {
return { action: "skip", reasonCode: "notified", ...common };
}
if (input.status === "queued") {
return { action: "dispatch", reasonCode: "queued", ...common };
}
if (!QUERY_TERMINAL_STATUSES.has(input.status)) {
return { action: "skip", reasonCode: "active-or-claimed", ...common };
}
const synthetic =
input.unitType === "synthetic" ||
input.unitId.includes("parallel-research");
if (synthetic && input.status !== "completed") {
return {
action: "block",
reasonCode: "synthetic-reset-required",
...common,
};
}
if (QUERY_RETRYABLE_TERMINAL_STATUSES.has(input.status)) {
return retryBudgetRemaining > 0
? { action: "retry", reasonCode: "retry-budget-available", ...common }
: { action: "block", reasonCode: "retry-budget-exhausted", ...common };
}
if (
input.status === "completed" ||
input.status === "blocked" ||
input.status === "cancelled"
) {
return {
action: "notify",
reasonCode: "terminal-ready-to-notify",
...common,
};
}
return { action: "skip", reasonCode: "terminal-nonretryable", ...common };
}
function readRuntimeUnitSummaries(basePath: string): RuntimeUnitSummary[] {
const unitsDir = join(resolveSfRootForQuery(basePath), "runtime", "units");
if (!existsSync(unitsDir)) return [];
const results: RuntimeUnitSummary[] = [];
for (const file of readdirSync(unitsDir)) {
if (!file.endsWith(".json")) continue;
try {
const record = JSON.parse(
readFileSync(join(unitsDir, file), "utf-8"),
) as Record<string, unknown>;
const unitType = stringField(record.unitType);
const unitId = stringField(record.unitId);
if (!unitType || !unitId) continue;
const phase = stringField(record.phase, "dispatched");
const status = stringField(
record.status,
inferQueryStatus(phase, record),
);
const recoveryAttempts = numberField(record.recoveryAttempts) ?? 0;
const retryCount = numberField(record.retryCount) ?? recoveryAttempts;
const maxRetries =
numberField(record.maxRetries) ?? DEFAULT_QUERY_MAX_RETRIES;
const notifiedAt = numberField(record.notifiedAt);
const dispatchDecision = queryRuntimeDecision({
unitType,
unitId,
status,
retryCount,
maxRetries,
notifiedAt,
});
results.push({
unitType,
unitId,
phase,
status,
startedAt: numberField(record.startedAt),
updatedAt: numberField(record.updatedAt),
retryCount,
maxRetries,
retryBudgetRemaining: dispatchDecision.retryBudgetRemaining,
lastHeartbeatAt: numberField(record.lastHeartbeatAt),
lastProgressAt: numberField(record.lastProgressAt),
lastOutputAt: numberField(record.lastOutputAt),
outputPath:
typeof record.outputPath === "string" ? record.outputPath : null,
watchdogReason:
typeof record.watchdogReason === "string"
? record.watchdogReason
: null,
notifiedAt,
dispatchDecision,
});
} catch {
// Runtime query must stay best-effort; malformed unit files are ignored.
}
}
return results;
}
export async function buildQuerySnapshot(
basePath: string,
): Promise<QuerySnapshot> {
@ -169,6 +392,7 @@ export async function buildQuerySnapshot(
state,
next,
cost: { workers, total: workers.reduce((sum, w) => sum + w.cost, 0) },
runtime: { units: readRuntimeUnitSummaries(basePath) },
};
return snapshot;

View file

@ -15,6 +15,7 @@ import type {
ExtensionCommandContext,
ExtensionContext,
} from "@singularity-forge/pi-coding-agent";
import type { Api, Model } from "@singularity-forge/pi-ai";
import { getManifestStatus } from "./files.js";
import {
assessInterruptedSession,
@ -47,7 +48,11 @@ import { getRtkSessionSavings } from "../shared/rtk-session-stats.js";
import { deactivateSF } from "../shared/sf-phase-state.js";
import { clearActivityLogState } from "./activity-log.js";
import { atomicWriteSync } from "./atomic-write.js";
import { AutoSession, getAutoSession } from "./auto/session.js";
import {
AutoSession,
getAutoSession,
type ModelFailureRecord,
} from "./auto/session.js";
// import { startSliceParallel } from "./slice-parallel-orchestrator.js"; (decoy for legacy regex tests)
import {
getBudgetAlertLevel,
@ -542,6 +547,64 @@ export function setCurrentDispatchedModelId(
s.currentDispatchedModelId = model ? `${model.provider}/${model.id}` : null;
}
/**
* Update the concrete model tracked for the currently running unit.
*
* Purpose: keep fresh-session restoration and dashboard state aligned after
* runtime provider recovery switches models mid-unit.
*
* Consumer: bootstrap/agent-end-recovery.ts after a configured fallback route
* is successfully applied.
*/
export function setCurrentUnitModel(model: Model<Api> | null): void {
s.currentUnitModel = model;
setCurrentDispatchedModelId(model);
}
/**
* Record that a provider/model route failed for the current auto unit.
*
* Purpose: prevent retry loops on quota/rate-limit/server failures by making
* subsequent recovery skip the failed route for this unit.
*
* Consumer: bootstrap/agent-end-recovery.ts before selecting the next configured
* fallback route.
*/
export function recordCurrentModelFailure(input: {
provider: string;
modelId: string;
reason: string;
timestamp?: number;
}): void {
if (!s.currentUnit) return;
s.modelFailures.push({
unitType: s.currentUnit.type,
unitId: s.currentUnit.id,
provider: input.provider,
modelId: input.modelId,
reason: input.reason,
timestamp: input.timestamp ?? Date.now(),
});
}
/**
* Return model failures scoped to the currently running auto unit.
*
* Purpose: keep recovery decisions unit-local so a quota failure in one unit
* does not permanently suppress a model in later work.
*
* Consumer: bootstrap/agent-end-recovery.ts when resolving the next configured
* fallback route.
*/
export function getCurrentUnitModelFailures(): ModelFailureRecord[] {
if (!s.currentUnit) return [];
return s.modelFailures.filter(
(failure) =>
failure.unitType === s.currentUnit?.type &&
failure.unitId === s.currentUnit?.id,
);
}
/**
* Mark the current research unit as terminal after saving its RESEARCH artifact.
*

View file

@ -47,6 +47,15 @@ export interface StartModel {
id: string;
}
export interface ModelFailureRecord {
unitType: string;
unitId: string;
provider: string;
modelId: string;
reason: string;
timestamp: number;
}
export interface PendingVerificationRetry {
unitId: string;
failureContext: string;
@ -156,6 +165,8 @@ export class AutoSession {
currentUnitModel: Model<Api> | null = null;
/** Fully-qualified model ID (provider/id) set after selectAndApplyModel + hook overrides (#2899). */
currentDispatchedModelId: string | null = null;
/** Per-session, per-unit failed model routes skipped by runtime recovery. */
readonly modelFailures: ModelFailureRecord[] = [];
originalModelId: string | null = null;
originalModelProvider: string | null = null;
lastBudgetAlertLevel: BudgetAlertLevel = 0;
@ -348,6 +359,7 @@ export class AutoSession {
this.manualSessionModelOverride = null;
this.currentUnitModel = null;
this.currentDispatchedModelId = null;
this.modelFailures.length = 0;
this.originalModelId = null;
this.originalModelProvider = null;
this.lastBudgetAlertLevel = 0;

View file

@ -4,13 +4,13 @@ import type {
} from "@singularity-forge/pi-coding-agent";
import {
getAutoDashboardData,
getAutoModeStartModel,
getCurrentUnitModelFailures,
isAutoActive,
pauseAuto,
setCurrentDispatchedModelId,
recordCurrentModelFailure,
setCurrentUnitModel,
} from "../auto.js";
import { isSessionSwitchInFlight, resolveAgentEnd } from "../auto-loop.js";
import { resolveModelId } from "../auto-model-selection.js";
import { blockModel, isModelBlocked } from "../blocked-models.js";
import {
classifyError,
@ -21,76 +21,122 @@ import {
} from "../error-classifier.js";
import { checkAutoStartAfterDiscuss } from "../guided-flow.js";
import {
getNextFallbackModel,
type ModelRouteRef,
resolveNextModelRoute,
} from "../model-route-failure.js";
import {
resolveModelWithFallbacksForUnit,
resolvePersistModelChanges,
} from "../preferences.js";
import { pauseAutoForProviderError } from "../provider-error-pause.js";
import { logWarning } from "../workflow-logger.js";
import { resumeAutoAfterProviderDelay } from "./provider-error-resume.js";
import { clearDiscussionFlowState } from "./write-gate.js";
const retryState = createRetryState();
const MAX_NETWORK_RETRIES = 2;
const MAX_TRANSIENT_AUTO_RESUMES = 8;
/**
* Reset the module-level retry state so a resumed auto-session starts fresh.
* Called by provider-error-resume.ts before startAuto() without this, the
* consecutiveTransientCount accumulates across pause/resume cycles and locks
* out auto-resume after MAX_TRANSIENT_AUTO_RESUMES total (not consecutive) errors.
* Called by provider-error-resume.ts before startAuto() so legacy paused
* provider recovery does not inherit stale transient counters.
*/
export function resetTransientRetryState(): void {
resetRetryState(retryState);
}
async function pauseTransientWithBackoff(
cls: ErrorClass,
pi: ExtensionAPI,
function getCurrentRouteFromMessage(
lastMsg: unknown,
ctx: ExtensionContext,
errorDetail: string,
isRateLimit: boolean,
): Promise<void> {
retryState.consecutiveTransientCount += 1;
const baseRetryAfterMs = "retryAfterMs" in cls ? cls.retryAfterMs : 15_000;
const retryAfterMs =
baseRetryAfterMs *
2 ** Math.max(0, retryState.consecutiveTransientCount - 1);
const allowAutoResume =
retryState.consecutiveTransientCount <= MAX_TRANSIENT_AUTO_RESUMES;
if (!allowAutoResume) {
ctx.ui.notify(
`Transient provider errors persisted after ${MAX_TRANSIENT_AUTO_RESUMES} auto-resume attempts. Pausing for manual review.`,
): ModelRouteRef | undefined {
const msg = lastMsg as Record<string, unknown> | undefined;
const provider =
typeof msg?.provider === "string" ? msg.provider : ctx.model?.provider;
const id = typeof msg?.model === "string" ? msg.model : ctx.model?.id;
return provider && id ? { provider, id } : undefined;
}
function isModelRouteFailure(cls: ErrorClass): boolean {
return (
cls.kind === "rate-limit" ||
cls.kind === "network" ||
cls.kind === "server" ||
cls.kind === "connection" ||
cls.kind === "stream"
);
}
async function trySwitchToFallbackModel(args: {
pi: ExtensionAPI;
ctx: ExtensionContext;
current: ModelRouteRef | undefined;
reason: string;
unitType: string;
basePath: string | undefined;
errorDetail: string;
persistModelChanges: boolean;
}): Promise<boolean> {
const modelConfig = resolveModelWithFallbacksForUnit(args.unitType, {
autoBenchmark: true,
});
if (args.current) {
recordCurrentModelFailure({
provider: args.current.provider,
modelId: args.current.id,
reason: args.reason,
});
}
const availableModels = args.ctx.modelRegistry.getAvailable();
const isBlocked = args.basePath
? (model: { provider: string; id: string }) =>
isModelBlocked(args.basePath!, model.provider, model.id)
: undefined;
for (
let attempt = 0;
attempt < availableModels.length + (modelConfig?.fallbacks.length ?? 0) + 1;
attempt++
) {
const nextRoute = resolveNextModelRoute({
current: args.current,
modelConfig,
availableModels,
failedRoutes: getCurrentUnitModelFailures(),
isBlocked,
});
if (!nextRoute) return false;
const ok = await args.pi.setModel(nextRoute.model, {
persist: args.persistModelChanges,
});
if (!ok) {
recordCurrentModelFailure({
provider: nextRoute.model.provider,
modelId: nextRoute.model.id,
reason: "setModel failed during provider recovery",
});
continue;
}
resetRetryState(retryState);
setCurrentUnitModel(nextRoute.model);
args.ctx.ui.notify(
`Model route failed${args.errorDetail}. Switched to ${nextRoute.source === "configured" ? "configured fallback" : "available fallback"}: ${nextRoute.model.provider}/${nextRoute.model.id}.`,
"warning",
);
args.pi.sendMessage(
{
customType: "sf-auto-timeout-recovery",
content: "Continue execution.",
display: false,
},
{ triggerTurn: true },
);
return true;
}
await pauseAutoForProviderError(
ctx.ui,
errorDetail,
() =>
pauseAuto(ctx, pi, {
message: `Provider error: ${errorDetail}`,
category: "provider",
isTransient: allowAutoResume,
retryAfterMs,
}),
{
isRateLimit,
isTransient: allowAutoResume,
retryAfterMs,
resume: allowAutoResume
? () => {
void resumeAutoAfterProviderDelay(pi, ctx).catch((err) => {
const message = err instanceof Error ? err.message : String(err);
ctx.ui.notify(
`Provider error recovery delay elapsed, but auto-mode failed to resume: ${message}`,
"error",
);
});
}
: undefined,
},
);
return false;
}
export async function handleAgentEnd(
@ -172,14 +218,12 @@ export async function handleAgentEnd(
// ── 1. Classify using rawErrorMsg to avoid prose false-positives ────
const cls = classifyError(rawErrorMsg, explicitRetryAfterMs);
const currentRoute = getCurrentRouteFromMessage(lastMsg, ctx);
const dash = getAutoDashboardData();
// ── 1b. Defer to Core RetryHandler for most transient errors ────────
// Core retries transient failures in-session after this handler.
// Keep that behavior for non-rate-limit classes to avoid pause/retry races,
// but let rate-limit continue into model fallback logic below (#4373).
if (isTransient(cls) && cls.kind !== "rate-limit") {
return;
}
// SF owns provider-route recovery in auto-mode. Quota/rate-limit/server/
// stream/connection failures must leave the failed provider/model route
// immediately instead of sleeping or waiting for same-model retry loops.
// Cap rate-limit backoff for CLI-style providers (openai-codex, google-gemini-cli)
// which use per-user quotas with shorter windows (#2922).
@ -198,9 +242,8 @@ export async function handleAgentEnd(
// same dead model isn't reselected on the next /sf auto restart,
// then try a fallback before pausing.
if (cls.kind === "unsupported-model") {
const dash = getAutoDashboardData();
const rejectedProvider = ctx.model?.provider;
const rejectedId = ctx.model?.id;
const rejectedProvider = currentRoute?.provider;
const rejectedId = currentRoute?.id;
if (dash.basePath && rejectedProvider && rejectedId) {
try {
blockModel(
@ -219,62 +262,18 @@ export async function handleAgentEnd(
}
}
// Try configured fallback chain, skipping anything already blocked.
if (dash.currentUnit && dash.basePath) {
const modelConfig = resolveModelWithFallbacksForUnit(
dash.currentUnit.type,
);
if (modelConfig && modelConfig.fallbacks.length > 0) {
const availableModels = ctx.modelRegistry.getAvailable();
let cursorModelId: string | undefined = ctx.model?.id;
while (true) {
const nextModelId = getNextFallbackModel(
cursorModelId,
modelConfig,
);
if (!nextModelId) break;
if (
isModelBlocked(dash.basePath, ctx.model?.provider, nextModelId)
) {
cursorModelId = nextModelId;
continue;
}
const modelToSet = resolveModelId(
nextModelId,
availableModels,
ctx.model?.provider,
);
if (
modelToSet &&
!isModelBlocked(dash.basePath, modelToSet.provider, modelToSet.id)
) {
const persistModelChanges = resolvePersistModelChanges();
const ok = await pi.setModel(modelToSet, {
persist: persistModelChanges,
});
if (ok) {
setCurrentDispatchedModelId({
provider: modelToSet.provider,
id: modelToSet.id,
});
ctx.ui.notify(
`Switched to unblocked fallback: ${nextModelId} and resuming.`,
"info",
);
pi.sendMessage(
{
customType: "sf-auto-timeout-recovery",
content: "Continue execution.",
display: false,
},
{ triggerTurn: true },
);
return;
}
}
cursorModelId = nextModelId;
}
}
const switched = await trySwitchToFallbackModel({
pi,
ctx,
current: currentRoute,
reason: rawErrorMsg || "unsupported for account",
unitType: dash.currentUnit.type,
basePath: dash.basePath,
errorDetail,
persistModelChanges,
});
if (switched) return;
}
// No usable fallback — pause
@ -292,150 +291,42 @@ export async function handleAgentEnd(
// ── 2. Decide & Act ──────────────────────────────────────────────────
// --- Network errors: same-model retry with backoff ---
if (cls.kind === "network") {
const currentModelId = ctx.model?.id ?? "unknown";
if (retryState.currentRetryModelId !== currentModelId) {
retryState.networkRetryCount = 0;
retryState.currentRetryModelId = currentModelId;
}
if (retryState.networkRetryCount < MAX_NETWORK_RETRIES) {
retryState.networkRetryCount += 1;
retryState.consecutiveTransientCount += 1;
const attempt = retryState.networkRetryCount;
const delayMs = attempt * cls.retryAfterMs;
ctx.ui.notify(
`Network error on ${currentModelId}${errorDetail}. Retry ${attempt}/${MAX_NETWORK_RETRIES} in ${delayMs / 1000}s...`,
"warning",
);
setTimeout(() => {
pi.sendMessage(
{
customType: "sf-auto-timeout-recovery",
content:
"Continue execution — retrying after transient network error.",
display: false,
},
{ triggerTurn: true },
);
}, delayMs);
return;
}
// Network retries exhausted — fall through to model fallback
retryState.networkRetryCount = 0;
retryState.currentRetryModelId = undefined;
ctx.ui.notify(
`Network retries exhausted for ${currentModelId}. Attempting model fallback.`,
"warning",
);
}
// --- Transient errors: try model fallback first, then pause ---
// Rate limits are often per-model, so switching models can bypass them.
if (
cls.kind === "rate-limit" ||
cls.kind === "network" ||
cls.kind === "server" ||
cls.kind === "connection" ||
cls.kind === "stream"
) {
// Try model fallback
const dash = getAutoDashboardData();
if (dash.currentUnit) {
const modelConfig = resolveModelWithFallbacksForUnit(
dash.currentUnit.type,
);
if (modelConfig && modelConfig.fallbacks.length > 0) {
const availableModels = ctx.modelRegistry.getAvailable();
const nextModelId = getNextFallbackModel(ctx.model?.id, modelConfig);
if (nextModelId) {
retryState.networkRetryCount = 0;
retryState.currentRetryModelId = undefined;
const modelToSet = resolveModelId(
nextModelId,
availableModels,
ctx.model?.provider,
);
if (modelToSet) {
const ok = await pi.setModel(modelToSet, {
persist: persistModelChanges,
});
if (ok) {
setCurrentDispatchedModelId({
provider: modelToSet.provider,
id: modelToSet.id,
});
ctx.ui.notify(
`Model error${errorDetail}. Switched to fallback: ${nextModelId} and resuming.`,
"warning",
);
pi.sendMessage(
{
customType: "sf-auto-timeout-recovery",
content: "Continue execution.",
display: false,
},
{ triggerTurn: true },
);
return;
}
}
}
}
}
// Try restoring session model
const sessionModel = getAutoModeStartModel();
if (sessionModel) {
if (
ctx.model?.id !== sessionModel.id ||
ctx.model?.provider !== sessionModel.provider
) {
const startModel = ctx.modelRegistry
.getAvailable()
.find(
(m) =>
m.provider === sessionModel.provider &&
m.id === sessionModel.id,
);
if (startModel) {
const ok = await pi.setModel(startModel, {
persist: persistModelChanges,
});
if (ok) {
setCurrentDispatchedModelId({
provider: startModel.provider,
id: startModel.id,
});
retryState.networkRetryCount = 0;
retryState.currentRetryModelId = undefined;
ctx.ui.notify(
`Model error${errorDetail}. Restored session model: ${sessionModel.provider}/${sessionModel.id} and resuming.`,
"warning",
);
pi.sendMessage(
{
customType: "sf-auto-timeout-recovery",
content: "Continue execution.",
display: false,
},
{ triggerTurn: true },
);
return;
}
}
}
}
}
// --- Transient fallback: pause with auto-resume ---
if (isTransient(cls)) {
await pauseTransientWithBackoff(
cls,
// --- Route failures: try configured fallback first, then any available route ---
if (isModelRouteFailure(cls) && dash.currentUnit) {
const switched = await trySwitchToFallbackModel({
pi,
ctx,
current: currentRoute,
reason: rawErrorMsg || cls.kind,
unitType: dash.currentUnit.type,
basePath: dash.basePath,
errorDetail,
cls.kind === "rate-limit",
persistModelChanges,
});
if (switched) return;
}
// --- Transient fallback exhausted: pause without same-route auto-resume ---
if (isTransient(cls)) {
const message =
isModelRouteFailure(cls) && dash.currentUnit
? `Provider route failed and no usable fallback model remains${errorDetail}`
: `Provider error${errorDetail}`;
await pauseAutoForProviderError(
ctx.ui,
errorDetail,
() =>
pauseAuto(ctx, pi, {
message,
category: "provider",
isTransient: false,
retryAfterMs: "retryAfterMs" in cls ? cls.retryAfterMs : undefined,
}),
{
isRateLimit: cls.kind === "rate-limit",
isTransient: false,
retryAfterMs: "retryAfterMs" in cls ? cls.retryAfterMs : 0,
},
);
return;
}

View file

@ -10,7 +10,7 @@ import {
nextMilestoneId,
} from "../guided-flow.js";
import { loadEffectiveSFPreferences } from "../preferences.js";
import { recordSelfFeedback } from "../self-feedback.js";
import { markResolved, recordSelfFeedback } from "../self-feedback.js";
import {
executeCompleteMilestone,
executePlanMilestone,
@ -687,7 +687,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
promptGuidelines: [
"Use sf_self_report for ANY sf-internal observation — not just bugs. Acceptable kinds include: 'prompt-quality-issue' (you found a prompt ambiguous, contradictory, or missing context), 'improvement-idea' (a non-bug enhancement that would help), 'agent-friction' (workflow friction you worked around), 'design-thought' (broader speculation), 'missing-feature' (capability you wished sf had), as well as classic bug kinds like 'brittle-predicate' or 'git-empty-pathspec'.",
"Do NOT use this for bugs in the user's project, for your own task work, or to track your task's todo list. ONLY for observations about sf-the-tool itself.",
"This tool FILES new entries; it does not address or resolve existing ones. Self-feedback is a triage inbox awaiting human/triage-agent review — do NOT autonomously pick entries off self-feedback and try to fix them. Treat existing entries as out of scope unless your task plan explicitly names a self-feedback entry id as the work.",
"This tool FILES new entries; it does not resolve existing ones. High/critical forge self-feedback may be queued autonomously at startup or an idle turn boundary as repair work. Use sf_self_feedback_resolve after fixing an entry; do not hand-edit the JSONL.",
"Over-reporting is preferred to under-reporting at this stage. If you noticed it about sf, file it. Dedup and threshold-to-roadmap promotion are tracked as their own self-feedback items and will eventually clean noise.",
"Severity guide: low = cosmetic / nice-to-have / improvement idea. medium = noisy or imperfect or recurring friction. high = blocked the unit (sf-the-tool prevented you from completing the task). critical = needs immediate fix (currently treated as high until inline-fix dispatch lands).",
"high/critical entries mark the originating unit as blocked: it will not seal as success, and will be re-queued only after sf is bumped past the recorded version.",
@ -780,6 +780,145 @@ export function registerDbTools(pi: ExtensionAPI): void {
pi.registerTool(selfReportTool);
// ─── sf_self_feedback_resolve ────────────────────────────────────────
// Agent-callable resolver for inline self-feedback repair turns. The
// inline-fix prompt must not rely on hand-editing JSONL: the tool updates
// the structured source of truth and regenerates the markdown view.
const selfFeedbackResolveExecute = async (
_toolCallId: string,
params: any,
_signal: AbortSignal | undefined,
_onUpdate: unknown,
_ctx: unknown,
): Promise<AgentToolResult<Record<string, unknown>>> => {
try {
const ok = markResolved(
params.id,
{
reason: params.reason,
evidence: {
kind: "agent-fix",
commitSha: params.commit_sha,
testPath: params.test_path,
summaryNarrative: params.summary_narrative,
},
criteriaMet: params.criteria_met,
},
process.cwd(),
);
if (!ok) {
return {
content: [
{
type: "text" as const,
text: `Error: unresolved self-feedback entry not found: ${params.id}`,
},
],
details: {
operation: "self_feedback_resolve",
id: params.id,
error: "not_found_or_already_resolved",
},
};
}
return {
content: [
{
type: "text" as const,
text: `Resolved self-feedback ${params.id}`,
},
],
details: {
operation: "self_feedback_resolve",
id: params.id,
resolved: true,
},
};
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logError("tool", `sf_self_feedback_resolve tool failed: ${msg}`, {
tool: "sf_self_feedback_resolve",
error: String(err),
});
return {
content: [
{
type: "text" as const,
text: `Error in sf_self_feedback_resolve: ${msg}`,
},
],
details: {
operation: "self_feedback_resolve",
id: params.id,
error: msg,
},
};
}
};
pi.registerTool({
name: "sf_self_feedback_resolve",
label: "Resolve Self Feedback",
description:
"Mark a repaired SF self-feedback entry resolved with structured agent-fix evidence. " +
"Use this only after verifying the entry no longer applies, landing the fix, and citing the commit or verification evidence.",
promptSnippet:
"Resolve a repaired SF self-feedback entry with commit/test evidence",
promptGuidelines: [
"Use sf_self_feedback_resolve during self-feedback inline-fix repair turns after the fix is implemented and verified.",
"Do not hand-edit `.sf/self-feedback.jsonl`; this tool updates the JSONL source of truth and regenerates `.sf/SELF-FEEDBACK.md`.",
"If the entry has acceptance criteria, pass criteria_met with the criteria that were satisfied.",
"Pass commit_sha when a commit exists. If an entry was already fixed, cite the existing commit or include summary_narrative and test_path.",
],
parameters: Type.Object({
id: Type.String({
description: "Self-feedback entry id, e.g. sf-moocz9so-4ffov2",
}),
reason: Type.String({
description: "Short explanation of why the entry is resolved",
}),
commit_sha: Type.Optional(
Type.String({ description: "Commit SHA containing the fix" }),
),
test_path: Type.Optional(
Type.String({ description: "Focused test or verification path" }),
),
summary_narrative: Type.Optional(
Type.String({
description:
"Concise verification summary when a commit/test path alone is not enough",
}),
),
criteria_met: Type.Optional(
Type.Array(Type.String(), {
description:
"Acceptance criteria satisfied by this fix, if the entry provided criteria",
}),
),
}),
execute: selfFeedbackResolveExecute,
renderCall(args: any, theme: any) {
let text = theme.fg("toolTitle", theme.bold("sf_self_feedback_resolve "));
if (args.id) text += theme.fg("muted", args.id);
return new Text(text, 0, 0);
},
renderResult(result: any, _options: any, theme: any) {
const d = result.details;
if (result.isError || d?.error) {
return new Text(
theme.fg("error", `Error: ${d?.error ?? "unknown"}`),
0,
0,
);
}
return new Text(
theme.fg("success", `Resolved ${d?.id ?? "self-feedback"}`),
0,
0,
);
},
});
// ─── sf_plan_milestone ────────────────────────────────────────────────
const planMilestoneExecute = async (

View file

@ -13,9 +13,9 @@ import {
hasResearchTerminalTransition,
isAutoActive,
isAutoPaused,
markResearchTerminalTransition,
markToolEnd,
markToolStart,
markResearchTerminalTransition,
recordToolInvocationError,
} from "../auto.js";
import {
@ -194,6 +194,18 @@ export function registerHooks(
}
}
loadToolApiKeys();
// Flow audit is read-only by default: surface stale dispatched units,
// missing session pointers, runaway history, and optional child hangs at
// startup before another auto unit compounds the same milestone failure.
try {
const { runFlowAudit } = await import("../doctor.js");
const flow = await runFlowAudit(process.cwd());
if (!flow.ok) {
ctx.ui?.notify?.(`Flow audit: ${flow.recommendedAction}`, "warning");
}
} catch {
/* non-fatal — flow audit must never block session start */
}
// Drain self-feedback: auto-resolve entries whose blocking
// sf-version constraint has been satisfied by the current sf bump,
// and surface entries that remain blocked to the operator. Done after
@ -239,9 +251,9 @@ export function registerHooks(
"warning",
);
}
// Forge-only: surface high/critical entries as inline-fix candidates so
// the operator (or a follow-up dispatcher) can drain self-reported bugs
// without leaving the session. Read-only signal for now — no auto-dispatch.
// Forge-only: high/critical entries are queued as hidden follow-up repair
// work on startup, even outside /sf auto. The drain helper owns claim TTL
// and delivery failure retry, so this is safe to call opportunistically.
const highBlocked = triage.stillBlocked.filter(
(e) => e.severity === "high" || e.severity === "critical",
);
@ -366,6 +378,16 @@ export function registerHooks(
resetToolCallLoopGuard();
resetAskUserQuestionsCache();
await handleAgentEnd(pi, event, ctx);
// Best-effort embedding backfill: when SF_LLM_GATEWAY_KEY is set and the
// gateway has an embed worker online, embed any memories that don't yet
// have a vector. Bounded per invocation; logs once-per-minute when the
// gateway is unavailable so we don't spam the journal.
try {
const { runEmbeddingBackfill } = await import("../memory-embeddings.js");
await runEmbeddingBackfill();
} catch {
// Never break agent_end on backfill issues.
}
});
// Squash-merge quick-task branch back to the original branch after the
@ -378,9 +400,10 @@ export function registerHooks(
// Best-effort: don't break the turn lifecycle if cleanup fails.
}
try {
const { consumeCompletedInlineFixClaim } = await import(
"../self-feedback-drain.js"
);
const {
consumeCompletedInlineFixClaim,
dispatchSelfFeedbackInlineFixIfNeeded,
} = await import("../self-feedback-drain.js");
const resolvedIds = consumeCompletedInlineFixClaim(process.cwd());
if (resolvedIds.length > 0) {
const requestReload = (
@ -391,7 +414,9 @@ export function registerHooks(
requestReload?.(
`self-feedback inline fix resolved ${resolvedIds.length} entr${resolvedIds.length === 1 ? "y" : "ies"}`,
);
return;
}
dispatchSelfFeedbackInlineFixIfNeeded(process.cwd(), ctx, pi);
} catch {
// Best-effort: stale code should not break normal turn completion.
}
@ -511,6 +536,7 @@ export function registerHooks(
block: true,
reason:
`Research unit terminal transition: ${currentUnit.type} ${currentUnit.id} has already completed its RESEARCH artifact. ` +
`Post-artifact drift is blocked before runaway supervision treats it as legitimate large research. ` +
`Planning tools (${event.toolName}) are blocked. The orchestrator will dispatch planner units after research.`,
};
}

View file

@ -131,25 +131,65 @@ export async function handleDoctor(
// ── Flow audit subcommand (sf-moocz9so-4ffov2) ─────────────────────────
if (trimmed === "flow" || trimmed.startsWith("flow ")) {
const flowResult = await runFlowAudit(projectRoot());
const flowResult = await runFlowAudit(projectRoot(), {
killOverBudgetChildren: /\b(--kill-children|kill-children|kill)\b/.test(
trimmed,
),
});
const lines: string[] = ["## SF Flow Audit", ""];
if (flowResult.activeMilestone) {
lines.push(
`**Active milestone:** ${flowResult.activeMilestone.id}${flowResult.activeMilestone.title ? `${flowResult.activeMilestone.title}` : ""}`,
flowResult.activeMilestone.phase
? `- Phase: ${flowResult.activeMilestone.phase}`
: "",
"",
);
} else {
lines.push("**Active milestone:** none", "");
}
if (flowResult.activeUnit) {
const ageMin = Math.round(flowResult.activeUnit.ageMs / 60000);
const progressAgeMin = Math.round(
flowResult.activeUnit.progressAgeMs / 60000,
);
lines.push(
`**Active unit:** ${flowResult.activeUnit.unitType} ${flowResult.activeUnit.unitId}`,
`- Phase: ${flowResult.activeUnit.phase}`,
`- Started: ${flowResult.activeUnit.startedAt}`,
`- Age: ${ageMin} minutes`,
`- Progress age: ${progressAgeMin} minutes`,
flowResult.activeUnit.lastProgressAt
? `- Last progress: ${flowResult.activeUnit.lastProgressAt}`
: "",
"",
);
} else {
lines.push("**Active unit:** none", "");
}
lines.push(
`**Session pointer:** ${
flowResult.sessionPointer?.sessionFile ??
flowResult.sessionPointer?.sessionId ??
"none recorded"
}`,
`**Recommended action:** ${flowResult.recommendedAction}`,
"",
);
if (flowResult.warnings.length > 0) {
lines.push("**Warnings:**");
for (const w of flowResult.warnings) lines.push(`- ${w}`);
lines.push("");
}
if (flowResult.staleDispatchedUnits.length > 0) {
lines.push("**Stale dispatched units:**");
for (const unit of flowResult.staleDispatchedUnits.slice(0, 5)) {
lines.push(
`- ${unit.unitType} ${unit.unitId}: progress age ${Math.round(unit.progressAgeMs / 60000)} minutes`,
);
}
lines.push("");
}
if (flowResult.recommendations.length > 0) {
lines.push("**Recommendations:**");
for (const r of flowResult.recommendations) lines.push(`- ${r}`);
@ -158,7 +198,19 @@ export async function handleDoctor(
if (flowResult.childProcesses.length > 0) {
lines.push("**Child processes:**");
for (const cp of flowResult.childProcesses.slice(0, 10)) {
lines.push(`- pid=${cp.pid} [${cp.classification}] ${cp.cmd.slice(0, 60)}`);
const age =
cp.ageMs === undefined ? "" : ` age=${Math.round(cp.ageMs / 60000)}m`;
const nonBlocking = cp.nonBlocking ? " non-blocking" : "";
lines.push(
`- pid=${cp.pid} ppid=${cp.ppid} [${cp.classification}]${age}${nonBlocking} action=${cp.action} ${cp.cmd.slice(0, 80)}`,
);
}
lines.push("");
}
if (flowResult.runawayHistory.length > 0) {
lines.push("**Runaway history:**");
for (const event of flowResult.runawayHistory.slice(-5)) {
lines.push(`- ${event}`);
}
lines.push("");
}

View file

@ -7,13 +7,15 @@
* tracked docs artifacts (sf-moocr4rv-au7r3l).
*/
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
import { mkdirSync, writeFileSync } from "node:fs";
import { join, resolve } from "node:path";
import type { ExtensionCommandContext } from "@singularity-forge/pi-coding-agent";
import { ensureDbOpen } from "./bootstrap/dynamic-tools.js";
import { projectRoot } from "./commands/context.js";
import { profileRepository } from "./repo-profiler.js";
import { recordRepoProfile } from "./sf-db.js";
import { profileRepository, type RepoProfile } from "./repo-profiler.js";
import { getLatestRepoProfile, recordRepoProfile } from "./sf-db.js";
const HARNESS_PROMOTION_REPO_DIR = "docs/exec-plans/active";
/**
* Format a repo profile summary for user notification.
@ -47,10 +49,91 @@ function formatProfileSummary(
`Stacks: ${stacks}`,
`Risk hints: ${risks}`,
"",
"Untracked files were recorded as observations only; SF did not stage or adopt them.",
"Runtime observation boundary:",
"- Profile state was stored only in .sf runtime state.",
"- No repo-committable artifact was written by profiling.",
"- Use /sf harness promote <finding-id> after review to create a tracked docs artifact.",
"- Untracked files remain observed_only; SF did not stage or adopt them.",
].join("\n");
}
/**
* Convert a finding id into a stable filename segment.
*
* Purpose: keep promotion artifacts deterministic while preventing path
* traversal through user-provided finding IDs.
*
* Consumer: `/sf harness promote <finding-id>`.
*/
function findingIdSlug(findingId: string): string {
const slug = findingId
.trim()
.toLowerCase()
.replace(/[^a-z0-9._-]+/g, "-")
.replace(/^-+|-+$/g, "")
.slice(0, 120);
return slug || "finding";
}
/**
* Parse the persisted repo profile JSON from .sf runtime state.
*
* Purpose: promotion must be a writeback from recorded observations, not a new
* profiler run that can observe its own artifact or introduce timestamps.
*
* Consumer: `/sf harness promote <finding-id>`.
*/
function parseRecordedProfile(profileJson: string): RepoProfile | null {
try {
const parsed = JSON.parse(profileJson) as Partial<RepoProfile>;
if (
typeof parsed.profileId === "string" &&
typeof parsed.createdAt === "string" &&
parsed.git &&
Array.isArray(parsed.git.changedFiles)
) {
return parsed as RepoProfile;
}
} catch {
// Fall back to row-level metadata below.
}
return null;
}
/**
* Build the stable JSON payload embedded in a promotion artifact.
*
* Purpose: document the recorded observation facts without leaking absolute
* runtime paths or adding promotion-time fields.
*
* Consumer: `/sf harness promote <finding-id>`.
*/
function profilePromotionPayload(
profile: RepoProfile | null,
fallback: {
profileId: string;
branch: string | null;
dirty: boolean;
createdAt: string;
},
): Record<string, unknown> {
return {
profileId: profile?.profileId ?? fallback.profileId,
profileCapturedAt: profile?.createdAt ?? fallback.createdAt,
branch: profile?.git.branch ?? fallback.branch,
dirty: profile?.git.dirty ?? fallback.dirty,
changedFiles: profile?.git.changedFiles ?? [],
stacks: profile?.stacks ?? [],
entrypoints: profile?.entrypoints ?? [],
tests: profile?.tests ?? [],
ci: profile?.ci ?? [],
docs: profile?.docs ?? [],
dataStores: profile?.dataStores ?? [],
networkSurfaces: profile?.networkSurfaces ?? [],
riskHints: profile?.riskHints ?? [],
};
}
/**
* Promote a harness/profile finding from .sf runtime observations into a
* tracked docs artifact. This is the writeback path that turns operational
@ -80,42 +163,57 @@ export async function handleHarnessPromote(
return;
}
// Determine the target tracked-docs path
const displayFindingId = findingId.trim();
const latestProfile = getLatestRepoProfile();
if (!latestProfile) {
ctx.ui.notify(
"No recorded harness profile found. Run /sf harness profile first; promotion writes tracked docs only from .sf runtime observations.",
"warning",
);
return;
}
const slug = findingIdSlug(displayFindingId);
const relativePath = `${HARNESS_PROMOTION_REPO_DIR}/harness-promotion-${slug}.md`;
const trackedDir = resolve(basePath, "docs", "exec-plans", "active");
const targetPath = join(trackedDir, `harness-promotion-${findingId}.md`);
const targetPath = join(trackedDir, `harness-promotion-${slug}.md`);
// Ensure the tracked directory exists (creates under the repo, not .sf)
mkdirSync(trackedDir, { recursive: true });
// Read the latest profile from DB to include in the promotion
const profile = profileRepository(basePath);
const recordedProfile = parseRecordedProfile(latestProfile.profileJson);
const payload = profilePromotionPayload(recordedProfile, {
profileId: latestProfile.profileId,
branch: latestProfile.branch,
dirty: latestProfile.dirty,
createdAt: latestProfile.createdAt,
});
// Build the promoted artifact content
const content = [
`# Harness Promotion: ${findingId}`,
`# Harness Promotion: ${displayFindingId}`,
"",
`Promoted from: \`.sf\` runtime observations`,
`Promoted at: ${new Date().toISOString()}`,
`Source profile: ${profile.profileId}`,
`Source branch: ${profile.git.branch ?? "unknown"}`,
`Finding ID: ${displayFindingId}`,
`Repo artifact: \`${relativePath}\``,
"Source: `.sf` runtime observations",
`Source profile: ${latestProfile.profileId}`,
`Source profile captured at: ${latestProfile.createdAt}`,
`Source branch: ${latestProfile.branch ?? "unknown"}`,
"",
"## Observed State",
"## Runtime Boundary",
"",
"- `.sf` remains operational runtime state and is not repo output.",
"- Unpromoted .sf runtime observations remain `observed_only`.",
"- This Markdown file is the repo-committable artifact created by promotion.",
"- Promotion does not stage or claim untracked observed files.",
"",
"## Observed Profile",
"",
"```json",
JSON.stringify(
{
profileId: profile.profileId,
branch: profile.git.branch,
changedFiles: profile.git.changedFiles,
stacks: profile.stacks,
riskHints: profile.riskHints,
},
null,
2,
),
JSON.stringify(payload, null, 2),
"```",
"",
"## Status",
"## Review Checklist",
"",
"- [ ] Reviewed by human",
"- [ ] Adopted into milestone plan",
@ -131,10 +229,10 @@ export async function handleHarnessPromote(
ctx.ui.notify(
[
`Harness finding '${findingId}' promoted to tracked docs.`,
`Path: ${targetPath}`,
`Harness finding '${displayFindingId}' promoted to tracked docs.`,
`Path: ${relativePath}`,
"",
"This artifact is now part of the repo's tracked documentation.",
"This Markdown file is now the repo-committable artifact for review.",
"Unpromoted .sf runtime state remains observed_only.",
].join("\n"),
"info",
@ -161,7 +259,7 @@ export async function handleHarness(
}
if (!["profile", "snapshot", "status"].includes(subcommand)) {
ctx.ui.notify(
"Usage: /sf harness profile | /sf harness promote <finding-id>\nRecords a read-only repo profile or promotes a finding to tracked docs.",
"Usage: /sf harness profile | /sf harness promote <finding-id>\nRecords a read-only .sf runtime profile or promotes a reviewed finding to tracked docs.",
"warning",
);
return;

View file

@ -50,34 +50,486 @@ import {
loadEffectiveSFPreferences,
type SFPreferences,
} from "./preferences.js";
import {
type PersistedSelfFeedbackEntry,
readAllSelfFeedback,
recordSelfFeedback,
} from "./self-feedback.js";
import { getMilestoneSlices, getSliceTasks, isDbAvailable } from "./sf-db.js";
import { deriveState, isMilestoneComplete } from "./state.js";
import { isClosedStatus } from "./status-guards.js";
import type { RoadmapSliceEntry } from "./types.js";
import { parseUnitId } from "./unit-id.js";
// ─── Flow Audit Types (sf-moocz9so-4ffov2) ────────────────────────────────
export type FlowAuditChildClassification =
| "active-session"
| "warmup"
| "background"
| "orphan"
| "unknown";
export type FlowAuditChildAction = "observe" | "non-blocking" | "kill";
/**
* Configure `runFlowAudit` for deterministic tests and explicit recovery mode.
*
* Purpose: keep the default auditor read-only during startup while allowing
* `/sf doctor flow --kill-children` and tests to exercise bounded child cleanup.
*
* Consumer: session_start, `/sf doctor flow`, and flow-audit regression tests.
*/
export interface FlowAuditOptions {
nowMs?: number;
staleProgressMs?: number;
optionalChildBudgetMs?: number;
psOutput?: string;
killOverBudgetChildren?: boolean;
killProcess?: (pid: number) => void;
recordSelfFeedback?: boolean;
}
/**
* Flow-audit output returned to commands and startup hooks.
*
* Purpose: preserve enough structured evidence for operators and tests to avoid
* reconstructing stuck auto-mode state from locks, runtime files, sessions, and ps.
*
* Consumer: `/sf doctor flow`, session_start notifications, and regression tests.
*/
export interface FlowAuditResult {
ok: boolean;
activeMilestone?: {
id: string;
title?: string;
phase?: string;
};
activeUnit?: {
unitType: string;
unitId: string;
phase: string;
startedAt: string;
ageMs: number;
progressAgeMs: number;
lastProgressAt?: string;
};
sessionPointer?: {
sessionId?: string;
sessionFile?: string;
source: "auto.lock" | "runtime-unit";
};
recommendations: string[];
recommendedAction: string;
warnings: string[];
childProcesses: Array<{
pid: number;
ppid: number;
cmd: string;
classification: "active-session" | "warmup" | "orphan" | "unknown";
classification: FlowAuditChildClassification;
ageMs?: number;
nonBlocking: boolean;
overBudget: boolean;
action: FlowAuditChildAction;
killed?: boolean;
killError?: string;
}>;
lastErrors: string[];
staleDispatchedUnits: Array<{
unitType: string;
unitId: string;
phase: string;
progressAgeMs: number;
lastProgressAt?: string;
}>;
runawayHistory: string[];
loopEvidence?: {
milestoneId: string;
sliceId?: string;
taskId?: string;
completedPriorTasks: string[];
missingSummaries: string[];
};
repeatedFailureRollup?: {
filed: boolean;
milestoneId: string;
count: number;
entryId?: string;
};
}
// ─── Flow Audit Implementation ────────────────────────────────────────────
const DEFAULT_STALE_PROGRESS_MS = 20 * 60 * 1000;
const DEFAULT_OPTIONAL_CHILD_BUDGET_MS = 30 * 60 * 1000;
const REPEATED_FAILURE_THRESHOLD = 3;
const FLOW_AUDIT_ROLLUP_KIND = "flow-audit:repeated-milestone-failure";
interface AutoLockAuditRecord {
pid?: number;
unitType?: string;
unitId?: string;
startedAt?: string | number;
phase?: string;
sessionId?: string;
sessionFile?: string;
}
interface RuntimeUnitAuditRecord {
unitType?: string;
unitId?: string;
phase?: string;
startedAt?: number | string;
updatedAt?: number | string;
lastProgressAt?: number | string;
lastProgressKind?: string;
progressCount?: number;
sessionId?: string;
sessionFile?: string;
runawayGuardPause?: {
reason?: string;
unitType?: string;
unitId?: string;
pausedAt?: number;
};
}
interface PsAuditRow {
pid: number;
ppid: number;
ageMs?: number;
cmd: string;
}
function parseEpochMs(value: unknown, fallbackMs: number): number {
if (typeof value === "number" && Number.isFinite(value)) {
return value < 10_000_000_000 ? value * 1000 : value;
}
if (typeof value === "string" && value.trim()) {
const parsed = new Date(value).getTime();
if (Number.isFinite(parsed)) return parsed;
}
return fallbackMs;
}
function formatIso(ms: number | undefined): string | undefined {
if (ms === undefined || !Number.isFinite(ms)) return undefined;
return new Date(ms).toISOString();
}
function minutes(ms: number): number {
return Math.max(0, Math.round(ms / 60_000));
}
function readJsonFile<T>(path: string): T | null {
try {
if (!existsSync(path)) return null;
return JSON.parse(readFileSync(path, "utf8")) as T;
} catch {
return null;
}
}
function readRuntimeUnits(runtimeUnitsDir: string): RuntimeUnitAuditRecord[] {
if (!existsSync(runtimeUnitsDir)) return [];
const records: RuntimeUnitAuditRecord[] = [];
try {
for (const file of readdirSync(runtimeUnitsDir)) {
if (!file.endsWith(".json")) continue;
const record = readJsonFile<RuntimeUnitAuditRecord>(
join(runtimeUnitsDir, file),
);
if (record) records.push(record);
}
} catch {
// Runtime audit must stay best-effort.
}
return records;
}
function parsePsOutput(psOutput: string): PsAuditRow[] {
const rows: PsAuditRow[] = [];
for (const line of psOutput.split("\n")) {
const trimmed = line.trim();
if (!trimmed) continue;
const match = trimmed.match(/^(\d+)\s+(\d+)(?:\s+(\d+))?\s+(.+)$/);
if (!match) continue;
const pid = Number.parseInt(match[1], 10);
const ppid = Number.parseInt(match[2], 10);
if (!Number.isFinite(pid) || !Number.isFinite(ppid)) continue;
const elapsedSeconds =
match[3] === undefined ? undefined : Number.parseInt(match[3], 10);
rows.push({
pid,
ppid,
ageMs:
elapsedSeconds !== undefined && Number.isFinite(elapsedSeconds)
? elapsedSeconds * 1000
: undefined,
cmd: match[4],
});
}
return rows;
}
async function readPsRows(options: FlowAuditOptions): Promise<PsAuditRow[]> {
if (options.psOutput !== undefined) return parsePsOutput(options.psOutput);
if (process.platform === "win32") return [];
try {
const { execSync } = await import("node:child_process");
const psOutput = execSync("ps -eo pid,ppid,etimes,cmd --no-headers", {
encoding: "utf8",
timeout: 5000,
});
return parsePsOutput(psOutput);
} catch {
return [];
}
}
function classifyProcess(row: PsAuditRow): FlowAuditChildClassification {
const cmd = row.cmd.toLowerCase();
if (cmd.includes("sift") || cmd.includes("warmup")) return "warmup";
if (row.ppid === 1 && cmd.includes("next-server")) return "orphan";
if (
cmd.includes("next-server") ||
cmd.includes("vite") ||
cmd.includes("turbopack")
) {
return "background";
}
if (
(cmd.includes("node") || cmd.includes("sf-run") || cmd.includes("codex")) &&
(cmd.includes(" sf") ||
cmd.includes("/sf") ||
cmd.includes("dist/loader") ||
cmd.includes("tool-session") ||
cmd.includes("headless"))
) {
return "active-session";
}
return "unknown";
}
function isOptionalChild(
classification: FlowAuditChildClassification,
): boolean {
return (
classification === "warmup" ||
classification === "background" ||
classification === "orphan"
);
}
function shouldIncludeProcess(
row: PsAuditRow,
classification: FlowAuditChildClassification,
activePid: number | undefined,
): boolean {
if (classification !== "unknown") return true;
if (activePid === undefined) return false;
return row.pid === activePid || row.ppid === activePid;
}
function readRecentErrors(runtimeRoot: string): string[] {
const notificationsPath = join(runtimeRoot, "notifications.jsonl");
if (!existsSync(notificationsPath)) return [];
const errors: string[] = [];
try {
const lines = readFileSync(notificationsPath, "utf8")
.split("\n")
.filter((l) => l.trim());
for (const line of lines.slice(-20)) {
try {
const entry = JSON.parse(line) as {
severity?: string;
message?: string;
text?: string;
};
const message = entry.message ?? entry.text ?? "";
if (
entry.severity === "error" ||
message.toLowerCase().includes("error") ||
message.toLowerCase().includes("failed")
) {
errors.push(message || "Unknown error");
}
} catch {
// skip malformed notification rows
}
}
} catch {
// non-fatal
}
return errors;
}
function buildLoopEvidence(
basePath: string,
unitType: string,
unitId: string,
): FlowAuditResult["loopEvidence"] | undefined {
if (unitType !== "execute-task") return undefined;
const { milestone, slice, task } = parseUnitId(unitId);
if (!milestone || !slice || !task) return undefined;
const planPath = resolveSliceFile(basePath, milestone, slice, "PLAN");
if (!planPath || !existsSync(planPath)) return undefined;
const completedPriorTasks: string[] = [];
const missingSummaries: string[] = [];
try {
const plan = parsePlan(readFileSync(planPath, "utf8"));
const currentIndex = plan.tasks.findIndex((t) => t.id === task);
if (currentIndex > 0) {
for (const prior of plan.tasks.slice(0, currentIndex)) {
if (prior.done) completedPriorTasks.push(prior.id);
}
}
if (!resolveTaskFile(basePath, milestone, slice, task, "SUMMARY")) {
missingSummaries.push(`${milestone}/${slice}/${task} task SUMMARY`);
}
const allTasksDone =
plan.tasks.length > 0 && plan.tasks.every((t) => t.done);
if (
allTasksDone &&
!resolveSliceFile(basePath, milestone, slice, "SUMMARY")
) {
missingSummaries.push(`${milestone}/${slice} slice SUMMARY`);
}
} catch {
return undefined;
}
return {
milestoneId: milestone,
sliceId: slice,
taskId: task,
completedPriorTasks,
missingSummaries,
};
}
function collectRunawayHistory(
runtimeUnits: RuntimeUnitAuditRecord[],
feedback: PersistedSelfFeedbackEntry[],
milestoneId: string | undefined,
): string[] {
const history: string[] = [];
for (const unit of runtimeUnits) {
const pause = unit.runawayGuardPause;
if (!pause) continue;
const id = pause.unitId ?? unit.unitId ?? "unknown";
if (milestoneId && !id.startsWith(`${milestoneId}/`)) continue;
history.push(pause.reason ?? `Runaway guard paused ${id}`);
}
for (const entry of feedback) {
if (entry.resolvedAt) continue;
if (milestoneId && entry.occurredIn?.milestone !== milestoneId) continue;
if (
entry.kind.includes("runaway") ||
entry.summary.toLowerCase().includes("runaway")
) {
history.push(`${entry.kind}: ${entry.summary}`);
}
}
return Array.from(new Set(history)).slice(-10);
}
function maybeRecordRepeatedFailureRollup(
basePath: string,
milestoneId: string | undefined,
feedback: PersistedSelfFeedbackEntry[],
options: FlowAuditOptions,
): FlowAuditResult["repeatedFailureRollup"] | undefined {
if (!milestoneId || options.recordSelfFeedback === false) return undefined;
const failures = feedback.filter(
(e) =>
!e.resolvedAt &&
e.occurredIn?.milestone === milestoneId &&
e.kind !== FLOW_AUDIT_ROLLUP_KIND,
);
if (failures.length < REPEATED_FAILURE_THRESHOLD) return undefined;
const openRollup = feedback.find(
(e) =>
!e.resolvedAt &&
e.kind === FLOW_AUDIT_ROLLUP_KIND &&
e.occurredIn?.milestone === milestoneId,
);
if (openRollup) {
return {
filed: false,
milestoneId,
count: failures.length,
entryId: openRollup.id,
};
}
const evidence = failures
.slice(-8)
.map(
(e) =>
`[${e.id}] ${e.kind} ${[
e.occurredIn?.milestone,
e.occurredIn?.slice,
e.occurredIn?.task,
]
.filter(Boolean)
.join("/")}: ${e.summary}`,
)
.join("\n");
const recorded = recordSelfFeedback(
{
kind: FLOW_AUDIT_ROLLUP_KIND,
severity: "high",
summary: `${failures.length} unresolved flow failures on ${milestoneId} need one recovery fix`,
evidence,
suggestedFix:
"Fix the shared milestone-flow failure instead of filing one item per failed unit. Use the flow audit evidence to repair stale dispatch, missing summary, runaway, or child-process handling.",
acceptanceCriteria:
"AC1: flow audit reports the active milestone/unit and session pointer. AC2: stale dispatched unit with no progress is flagged. AC3: runaway history and child-process hang evidence are preserved. AC4: repeated same-milestone failures stay deduplicated into one open item.",
source: "detector",
occurredIn: { milestone: milestoneId, unitType: "flow-audit" },
},
basePath,
);
if (!recorded) return undefined;
return {
filed: true,
milestoneId,
count: failures.length,
entryId: recorded.entry.id,
};
}
function chooseRecommendedAction(args: {
activeUnit?: FlowAuditResult["activeUnit"];
sessionPointer?: FlowAuditResult["sessionPointer"];
staleDispatchedUnits: FlowAuditResult["staleDispatchedUnits"];
childProcesses: FlowAuditResult["childProcesses"];
lastErrors: string[];
activeMilestone?: FlowAuditResult["activeMilestone"];
}): string {
if (args.staleDispatchedUnits.length > 0) {
const unit = args.staleDispatchedUnits[0];
const session = args.sessionPointer?.sessionFile
? ` ${args.sessionPointer.sessionFile}`
: args.sessionPointer?.sessionId
? ` ${args.sessionPointer.sessionId}`
: "";
return `Inspect session${session} for ${unit.unitType} ${unit.unitId}; if no new output exists, stop/requeue the stale dispatched unit before continuing.`;
}
const overBudgetOptional = args.childProcesses.find(
(p) => p.nonBlocking && p.overBudget,
);
if (overBudgetOptional) {
return `Optional ${overBudgetOptional.classification} child pid ${overBudgetOptional.pid} is over budget; it is non-blocking, or rerun with --kill-children to terminate it.`;
}
if (args.lastErrors.length > 0) {
return "Review recent errors before dispatching another unit.";
}
if (args.activeMilestone && !args.activeUnit) {
return `Dispatch or resume the next unit for ${args.activeMilestone.id}.`;
}
return "No flow-auditor action needed.";
}
/**
* Run a flow audit: inspect active unit state, auto.lock, runtime artifacts,
* and child processes to diagnose stuck milestones without human forensic work.
@ -86,165 +538,228 @@ export interface FlowAuditResult {
* milestone/unit, progress age, session pointer, child processes, last errors,
* and recommended action.
*
* Consumer: `/sf doctor flow` command.
* Consumer: `/sf doctor flow` command and session_start startup health sweep.
*/
export async function runFlowAudit(basePath: string): Promise<FlowAuditResult> {
export async function runFlowAudit(
basePath: string,
options: FlowAuditOptions = {},
): Promise<FlowAuditResult> {
const nowMs = options.nowMs ?? Date.now();
const staleProgressMs = options.staleProgressMs ?? DEFAULT_STALE_PROGRESS_MS;
const optionalChildBudgetMs =
options.optionalChildBudgetMs ?? DEFAULT_OPTIONAL_CHILD_BUDGET_MS;
const runtimeRoot = sfRoot(basePath);
const warnings: string[] = [];
const recommendations: string[] = [];
const childProcesses: FlowAuditResult["childProcesses"] = [];
const lastErrors: string[] = [];
const lastErrors = readRecentErrors(runtimeRoot);
const staleDispatchedUnits: FlowAuditResult["staleDispatchedUnits"] = [];
let sessionPointer: FlowAuditResult["sessionPointer"] | undefined;
let activeMilestone: FlowAuditResult["activeMilestone"] | undefined;
// Read auto.lock for active unit info
const autoLockPath = join(basePath, ".sf", "auto.lock");
const autoLockPath = join(runtimeRoot, "auto.lock");
let activeUnit: FlowAuditResult["activeUnit"] | undefined;
if (existsSync(autoLockPath)) {
try {
const lockContent = readFileSync(autoLockPath, "utf8");
const lockData = JSON.parse(lockContent) as {
unitType?: string;
unitId?: string;
startedAt?: string;
phase?: string;
let activePid: number | undefined;
const lockData = readJsonFile<AutoLockAuditRecord>(autoLockPath);
if (lockData) {
if (lockData.unitType && lockData.unitId) {
const startedAtMs = parseEpochMs(lockData.startedAt, nowMs);
const parsed = parseUnitId(lockData.unitId);
activeMilestone = { id: parsed.milestone };
activePid =
typeof lockData.pid === "number" && Number.isFinite(lockData.pid)
? lockData.pid
: undefined;
activeUnit = {
unitType: lockData.unitType,
unitId: lockData.unitId,
phase: lockData.phase ?? "unknown",
startedAt: formatIso(startedAtMs) ?? new Date(nowMs).toISOString(),
ageMs: Math.max(0, nowMs - startedAtMs),
progressAgeMs: Math.max(0, nowMs - startedAtMs),
};
if (lockData.unitType && lockData.unitId) {
const startedAt = lockData.startedAt
? new Date(lockData.startedAt).getTime()
: Date.now();
const ageMs = Date.now() - startedAt;
activeUnit = {
unitType: lockData.unitType,
unitId: lockData.unitId,
phase: lockData.phase ?? "unknown",
startedAt: lockData.startedAt ?? new Date().toISOString(),
ageMs,
if (lockData.sessionId || lockData.sessionFile) {
sessionPointer = {
sessionId: lockData.sessionId,
sessionFile: lockData.sessionFile,
source: "auto.lock",
};
if (ageMs > 30 * 60 * 1000) {
warnings.push(
`Active unit ${lockData.unitId} has been running for ${Math.round(ageMs / 60000)} minutes.`,
);
recommendations.push(
`Consider checking if ${lockData.unitId} is stuck or making progress.`,
);
}
}
} catch {
warnings.push("Could not parse .sf/auto.lock");
}
} else if (existsSync(autoLockPath)) {
warnings.push("Could not parse .sf/auto.lock");
}
// Read runtime units directory
const runtimeUnitsDir = join(basePath, ".sf", "runtime", "units");
if (existsSync(runtimeUnitsDir)) {
try {
const files = readdirSync(runtimeUnitsDir);
let dispatchedCount = 0;
for (const file of files) {
if (!file.endsWith(".json")) continue;
try {
const content = readFileSync(
join(runtimeUnitsDir, file),
"utf8",
);
const unit = JSON.parse(content) as {
phase?: string;
unitType?: string;
unitId?: string;
};
if (unit.phase === "dispatched") dispatchedCount++;
} catch {
// skip malformed
}
}
if (dispatchedCount > 1) {
warnings.push(
`${dispatchedCount} units are in dispatched phase simultaneously.`,
);
}
} catch {
// ignore
}
}
// Read notifications for recent errors
const notificationsPath = join(basePath, ".sf", "notifications.jsonl");
if (existsSync(notificationsPath)) {
try {
const lines = readFileSync(notificationsPath, "utf8")
.split("\n")
.filter((l) => l.trim());
const recentLines = lines.slice(-20);
for (const line of recentLines) {
try {
const entry = JSON.parse(line) as {
severity?: string;
message?: string;
};
if (
entry.severity === "error" ||
entry.message?.toLowerCase().includes("error")
) {
lastErrors.push(entry.message ?? "Unknown error");
}
} catch {
// skip malformed
}
}
} catch {
// ignore
}
}
// Scan child processes (Linux/macOS only)
if (process.platform !== "win32") {
try {
const { execSync } = await import("node:child_process");
const psOutput = execSync("ps -eo pid,ppid,cmd --no-headers", {
encoding: "utf8",
timeout: 5000,
const runtimeUnits = readRuntimeUnits(join(runtimeRoot, "runtime", "units"));
let dispatchedCount = 0;
for (const unit of runtimeUnits) {
if (unit.phase === "dispatched") dispatchedCount++;
if (!unit.unitType || !unit.unitId) continue;
const progressBaseMs = parseEpochMs(
unit.lastProgressAt ?? unit.updatedAt ?? unit.startedAt,
nowMs,
);
const progressAgeMs = Math.max(0, nowMs - progressBaseMs);
const lastProgressAt = formatIso(progressBaseMs);
const stale =
unit.phase === "dispatched" && progressAgeMs > staleProgressMs;
if (stale) {
staleDispatchedUnits.push({
unitType: unit.unitType,
unitId: unit.unitId,
phase: unit.phase ?? "unknown",
progressAgeMs,
lastProgressAt,
});
const lines = psOutput.split("\n").filter((l) => l.trim());
for (const line of lines) {
const parts = line.trim().split(/\s+/);
if (parts.length < 3) continue;
const pid = Number.parseInt(parts[0], 10);
const ppid = Number.parseInt(parts[1], 10);
const cmd = parts.slice(2).join(" ");
if (!Number.isFinite(pid)) continue;
// Classify processes
let classification: FlowAuditResult["childProcesses"][0]["classification"] = "unknown";
if (cmd.includes("sift") || cmd.includes("warmup")) {
classification = "warmup";
} else if (cmd.includes("node") && cmd.includes("sf")) {
classification = "active-session";
} else if (ppid === 1 && cmd.includes("next-server")) {
classification = "orphan";
}
childProcesses.push({ pid, cmd, classification });
warnings.push(
`Unit ${unit.unitId} has no progress for ${minutes(progressAgeMs)} minutes (phase=${unit.phase}).`,
);
}
if (
activeUnit &&
unit.unitType === activeUnit.unitType &&
unit.unitId === activeUnit.unitId
) {
activeUnit.phase = unit.phase ?? activeUnit.phase;
activeUnit.progressAgeMs = progressAgeMs;
activeUnit.lastProgressAt = lastProgressAt;
if (!sessionPointer && (unit.sessionId || unit.sessionFile)) {
sessionPointer = {
sessionId: unit.sessionId,
sessionFile: unit.sessionFile,
source: "runtime-unit",
};
}
} catch {
// ignore on platforms without ps
}
}
if (dispatchedCount > 1) {
warnings.push(
`${dispatchedCount} units are in dispatched phase simultaneously.`,
);
}
const psRows = await readPsRows(options);
for (const row of psRows) {
const classification = classifyProcess(row);
if (!shouldIncludeProcess(row, classification, activePid)) continue;
const nonBlocking = isOptionalChild(classification);
const overBudget =
nonBlocking &&
row.ageMs !== undefined &&
row.ageMs > optionalChildBudgetMs;
let action: FlowAuditChildAction = nonBlocking ? "non-blocking" : "observe";
let killed = false;
let killError: string | undefined;
if (overBudget) {
warnings.push(
`${classification} child pid ${row.pid} is over budget (${minutes(row.ageMs ?? 0)} minutes).`,
);
if (options.killOverBudgetChildren) {
action = "kill";
try {
if (options.killProcess) options.killProcess(row.pid);
else process.kill(row.pid, "SIGTERM");
killed = true;
} catch (err) {
killError = err instanceof Error ? err.message : String(err);
warnings.push(
`Failed to kill over-budget ${classification} child pid ${row.pid}: ${killError}`,
);
}
}
}
childProcesses.push({
pid: row.pid,
ppid: row.ppid,
cmd: row.cmd,
classification,
ageMs: row.ageMs,
nonBlocking,
overBudget,
action,
killed: killed || undefined,
killError,
});
}
// Derive state for milestone context
try {
const state = await deriveState(basePath);
if (state.activeMilestone) {
activeMilestone = {
id: state.activeMilestone.id,
title: state.activeMilestone.title,
phase: state.phase,
};
}
if (state.activeMilestone && !activeUnit) {
recommendations.push(
`No active unit detected, but milestone ${state.activeMilestone.id} is active. Consider dispatching the next unit.`,
);
}
} catch {
// ignore
// State derivation is useful context but not required for the audit.
}
const loopEvidence =
activeUnit &&
buildLoopEvidence(basePath, activeUnit.unitType, activeUnit.unitId);
if (
loopEvidence?.completedPriorTasks.length &&
loopEvidence.missingSummaries.length
) {
warnings.push(
`${loopEvidence.milestoneId}/${loopEvidence.sliceId} has ${loopEvidence.completedPriorTasks.length} completed prior tasks but missing final summary evidence for ${loopEvidence.missingSummaries.join(", ")}.`,
);
}
const feedback = readAllSelfFeedback(basePath);
const milestoneId = activeMilestone?.id;
const runawayHistory = collectRunawayHistory(
runtimeUnits,
feedback,
milestoneId,
);
const repeatedFailureRollup = maybeRecordRepeatedFailureRollup(
basePath,
milestoneId,
feedback,
options,
);
if (repeatedFailureRollup?.filed) {
recommendations.push(
`Filed ${FLOW_AUDIT_ROLLUP_KIND} for ${milestoneId} after ${repeatedFailureRollup.count} repeated failures.`,
);
}
const recommendedAction = chooseRecommendedAction({
activeUnit,
sessionPointer,
staleDispatchedUnits,
childProcesses,
lastErrors,
activeMilestone,
});
if (!recommendations.includes(recommendedAction)) {
recommendations.unshift(recommendedAction);
}
return {
ok: warnings.length === 0 && lastErrors.length === 0,
ok:
warnings.length === 0 &&
lastErrors.length === 0 &&
staleDispatchedUnits.length === 0,
activeMilestone,
activeUnit,
sessionPointer,
recommendations,
recommendedAction,
warnings,
childProcesses,
lastErrors,
staleDispatchedUnits,
runawayHistory,
loopEvidence,
repeatedFailureRollup,
};
}

View file

@ -15,7 +15,8 @@
"sf_summary_save",
"sf_requirement_update",
"sf_milestone_generate_id",
"sf_self_report"
"sf_self_report",
"sf_self_feedback_resolve"
],
"commands": ["sf", "kill", "worktree", "exit"],
"hooks": [
@ -25,6 +26,7 @@
"session_fork",
"before_agent_start",
"agent_end",
"turn_end",
"session_before_compact",
"session_shutdown",
"tool_call",

View file

@ -0,0 +1,179 @@
import type { Api, Model } from "@singularity-forge/pi-ai";
import type { ModelFailureRecord } from "./auto/session.js";
import { resolveModelId } from "./auto-model-selection.js";
import type { ResolvedModelConfig } from "./preferences.js";
export interface ModelRouteRef {
provider: string;
id: string;
}
export interface NextModelRouteResult {
model: Model<Api>;
route: string;
source: "configured" | "available";
}
/**
* Build the stable identity key for a concrete provider route.
*
* Purpose: make fallback recovery compare full provider/model routes instead of
* ambiguous bare model ids.
*
* Consumer: resolveNextConfiguredModelRoute() when skipping failed and current
* runtime routes.
*/
export function modelRouteKey(route: ModelRouteRef): string {
return `${route.provider.toLowerCase()}/${route.id.toLowerCase()}`;
}
function dedupeConfiguredRoutes(modelConfig: ResolvedModelConfig): string[] {
const seen = new Set<string>();
const routes: string[] = [];
for (const route of [modelConfig.primary, ...modelConfig.fallbacks]) {
const key = route.toLowerCase();
if (seen.has(key)) continue;
seen.add(key);
routes.push(route);
}
return routes;
}
/**
* Resolve the next configured model route after a provider/model failure.
*
* Purpose: keep auto-mode recovery inside the user's explicit primary/fallback
* chain, skip routes already failed for this unit, and avoid returning the same
* provider/model again.
*
* Consumer: bootstrap/agent-end-recovery.ts when a provider returns quota,
* rate-limit, server, stream, or connection failures during a unit.
*/
export function resolveNextConfiguredModelRoute(args: {
current: ModelRouteRef | undefined;
modelConfig: ResolvedModelConfig;
availableModels: Model<Api>[];
failedRoutes: readonly ModelFailureRecord[];
isBlocked?: (model: Model<Api>) => boolean;
}): NextModelRouteResult | undefined {
const routes = dedupeConfiguredRoutes(args.modelConfig);
const currentKey = args.current ? modelRouteKey(args.current) : undefined;
const failedKeys = new Set(
args.failedRoutes.map((failure) =>
modelRouteKey({ provider: failure.provider, id: failure.modelId }),
),
);
const resolvedRoutes = routes.map((configuredRoute) => ({
configuredRoute,
model: resolveModelId(
configuredRoute,
args.availableModels,
args.current?.provider,
) as Model<Api> | undefined,
}));
const currentIndex =
currentKey === undefined
? -1
: resolvedRoutes.findIndex(
(route) => route.model && modelRouteKey(route.model) === currentKey,
);
const candidates =
currentIndex >= 0 ? resolvedRoutes.slice(currentIndex + 1) : resolvedRoutes;
for (const candidate of candidates) {
if (!candidate.model) continue;
const candidateKey = modelRouteKey(candidate.model);
if (candidateKey === currentKey) continue;
if (failedKeys.has(candidateKey)) continue;
if (args.isBlocked?.(candidate.model)) continue;
return {
model: candidate.model,
route: candidate.configuredRoute,
source: "configured",
};
}
return undefined;
}
/**
* Resolve another currently available provider/model route when configured
* fallbacks are missing or exhausted.
*
* Purpose: keep auto-mode moving on quota/rate-limit/server failures instead
* of pausing just because the configured fallback chain did not cover every
* live provider route.
*
* Consumer: bootstrap/agent-end-recovery.ts after configured fallback lookup
* fails for a model-route failure.
*/
export function resolveNextAvailableModelRoute(args: {
current: ModelRouteRef | undefined;
availableModels: Model<Api>[];
failedRoutes: readonly ModelFailureRecord[];
isBlocked?: (model: Model<Api>) => boolean;
}): NextModelRouteResult | undefined {
const currentKey = args.current ? modelRouteKey(args.current) : undefined;
const failedKeys = new Set(
args.failedRoutes.map((failure) =>
modelRouteKey({ provider: failure.provider, id: failure.modelId }),
),
);
const candidates = args.availableModels.filter((model) => {
const key = modelRouteKey(model);
if (key === currentKey) return false;
if (failedKeys.has(key)) return false;
if (args.isBlocked?.(model)) return false;
return true;
});
if (candidates.length === 0) return undefined;
const differentProvider =
args.current &&
candidates.find(
(model) =>
model.provider.toLowerCase() !== args.current!.provider.toLowerCase(),
);
const model = differentProvider ?? candidates[0];
return {
model,
route: `${model.provider}/${model.id}`,
source: "available",
};
}
/**
* Resolve the next model route by trying configured policy first, then any
* other live route.
*
* Purpose: preserve configured fallback ordering when it exists while still
* enforcing the no-pause contract for transient provider/model failures.
*
* Consumer: bootstrap/agent-end-recovery.ts during provider-route recovery.
*/
export function resolveNextModelRoute(args: {
current: ModelRouteRef | undefined;
modelConfig: ResolvedModelConfig | undefined;
availableModels: Model<Api>[];
failedRoutes: readonly ModelFailureRecord[];
isBlocked?: (model: Model<Api>) => boolean;
}): NextModelRouteResult | undefined {
if (args.modelConfig) {
const configured = resolveNextConfiguredModelRoute({
current: args.current,
modelConfig: args.modelConfig,
availableModels: args.availableModels,
failedRoutes: args.failedRoutes,
isBlocked: args.isBlocked,
});
if (configured) return configured;
}
return resolveNextAvailableModelRoute({
current: args.current,
availableModels: args.availableModels,
failedRoutes: args.failedRoutes,
isBlocked: args.isBlocked,
});
}

View file

@ -21,6 +21,7 @@ import type {
ExtensionAPI,
ExtensionContext,
} from "@singularity-forge/pi-coding-agent";
import { getErrorMessage } from "./error-utils.js";
import { sfRuntimeRoot } from "./paths.js";
import type { PersistedSelfFeedbackEntry } from "./self-feedback.js";
import {
@ -33,6 +34,7 @@ const CLAIM_TTL_MS = 30 * 60 * 1000;
interface InlineFixClaim {
ids: string[];
dispatchedAt: string;
lastDispatchError?: string;
}
function claimPath(basePath: string): string {
@ -63,6 +65,28 @@ function writeClaim(basePath: string, ids: string[]): void {
);
}
function writeFailedClaim(
basePath: string,
ids: string[],
error: string,
): void {
const path = claimPath(basePath);
mkdirSync(dirname(path), { recursive: true });
writeFileSync(
path,
JSON.stringify(
{
ids,
dispatchedAt: new Date(Date.now() - CLAIM_TTL_MS - 1).toISOString(),
lastDispatchError: error,
},
null,
2,
),
"utf-8",
);
}
function clearClaim(basePath: string): void {
try {
unlinkSync(claimPath(basePath));
@ -147,10 +171,10 @@ function buildInlineFixPrompt(entries: PersistedSelfFeedbackEntry[]): string {
)
.join("\n\n");
return [
"You are executing SF self-feedback inline-fix mode.",
"",
"These high/critical self-feedback entries are unresolved sf defects. Do not only triage them; repair the current codebase directly.",
return [
"You are executing SF self-feedback inline-fix mode.",
"",
"These high/critical self-feedback entries are unresolved sf defects. Do not only triage them; repair the current codebase directly.",
"",
rendered,
"",
@ -159,8 +183,9 @@ function buildInlineFixPrompt(entries: PersistedSelfFeedbackEntry[]): string {
"2. Fix the smallest coherent set of code/docs/tests needed to satisfy the acceptance criteria.",
"3. Run focused verification and typecheck for touched areas.",
"4. Commit the fix with a conventional commit message.",
"5. Mark the repaired entries resolved in `.sf/self-feedback.jsonl` with agent-fix evidence and the commit SHA.",
"6. If an entry is already fixed, mark it resolved with agent-fix evidence and explain the verification.",
"5. Call `sf_self_feedback_resolve` for each repaired entry with agent-fix evidence and the commit SHA.",
"6. If an entry is already fixed, verify it and call `sf_self_feedback_resolve` with the verification evidence.",
"7. Do not hand-edit `.sf/self-feedback.jsonl`; use the resolver tool so markdown, JSONL, and reload detection stay consistent.",
"",
"When done, say: Self-feedback inline fix complete.",
].join("\n");
@ -195,17 +220,25 @@ export function dispatchSelfFeedbackInlineFixIfNeeded(
writeClaim(basePath, ids);
const prompt = buildInlineFixPrompt(candidates);
ctx.ui.notify(
`Dispatching self-feedback inline fix for ${ids.length} high/critical entr${ids.length === 1 ? "y" : "ies"}.`,
`Queueing self-feedback inline fix for ${ids.length} high/critical entr${ids.length === 1 ? "y" : "ies"}.`,
"warning",
);
pi.sendMessage(
const dispatch = pi.sendMessage(
{
customType: "sf-self-feedback-inline-fix",
content: prompt,
display: false,
},
{ triggerTurn: true },
{ triggerTurn: true, deliverAs: "followUp" },
);
void Promise.resolve(dispatch).catch((error) => {
const message = getErrorMessage(error);
writeFailedClaim(basePath, ids, message);
ctx.ui.notify(
`Self-feedback inline fix dispatch failed; will retry at the next idle point: ${message}`,
"warning",
);
});
return candidates.length;
}

View file

@ -0,0 +1,171 @@
import assert from "node:assert/strict";
import { execFileSync } from "node:child_process";
import {
appendFileSync,
existsSync,
mkdirSync,
mkdtempSync,
readFileSync,
realpathSync,
rmSync,
symlinkSync,
writeFileSync,
} from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, test } from "vitest";
import { handleHarness } from "../commands-harness.ts";
import { profileRepository } from "../repo-profiler.ts";
import {
closeDatabase,
getRepoFileObservations,
openDatabase,
recordRepoProfile,
} from "../sf-db.ts";
const originalCwd = process.cwd();
const originalProjectRoot = process.env.SF_PROJECT_ROOT;
let roots: string[] = [];
afterEach(() => {
process.chdir(originalCwd);
closeDatabase();
for (const root of roots) rmSync(root, { recursive: true, force: true });
roots = [];
if (originalProjectRoot === undefined) delete process.env.SF_PROJECT_ROOT;
else process.env.SF_PROJECT_ROOT = originalProjectRoot;
});
function runGit(args: string[], cwd: string): string {
return execFileSync("git", args, {
cwd,
stdio: ["ignore", "pipe", "pipe"],
encoding: "utf-8",
}).trim();
}
function makeRepo(prefix: string): string {
const repo = realpathSync(mkdtempSync(join(tmpdir(), prefix)));
roots.push(repo);
runGit(["init", "-b", "main"], repo);
runGit(["config", "user.email", "test@example.com"], repo);
runGit(["config", "user.name", "SF Test"], repo);
writeFileSync(join(repo, "README.md"), "# Repo\n", "utf8");
writeFileSync(
join(repo, "package.json"),
'{"scripts":{"test":"node --test"}}\n',
"utf8",
);
runGit(["add", "README.md", "package.json"], repo);
runGit(["commit", "-m", "init"], repo);
return repo;
}
function makeExternalSfState(repo: string): string {
const externalState = realpathSync(mkdtempSync(join(tmpdir(), "sf-state-")));
roots.push(externalState);
symlinkSync(externalState, join(repo, ".sf"), "junction");
appendFileSync(join(repo, ".git", "info", "exclude"), "\n.sf\n", "utf8");
return externalState;
}
function makeMockCtx(): {
notifications: Array<{ message: string; level?: string }>;
ui: { notify(message: string, level?: string): void };
} {
const notifications: Array<{ message: string; level?: string }> = [];
return {
notifications,
ui: {
notify(message: string, level?: string) {
notifications.push({ message, level });
},
},
};
}
test("harnessPromote_when_sf_is_external_symlink_writes_tracked_docs_not_runtime_target", async () => {
const repo = makeRepo("sf-harness-promote-");
const externalState = makeExternalSfState(repo);
mkdirSync(join(repo, "notes"), { recursive: true });
writeFileSync(join(repo, "notes", "local-finding.md"), "# Finding\n", "utf8");
closeDatabase();
assert.equal(openDatabase(join(repo, ".sf", "sf.db")), true);
recordRepoProfile(
profileRepository(repo, {
now: () => "2026-05-02T10:00:00.000Z",
}),
);
closeDatabase();
delete process.env.SF_PROJECT_ROOT;
process.chdir(repo);
const ctx = makeMockCtx();
await handleHarness("promote sf-moocr4rv-au7r3l", ctx as any);
const relativeArtifact =
"docs/exec-plans/active/harness-promotion-sf-moocr4rv-au7r3l.md";
const artifact = join(repo, relativeArtifact);
assert.ok(existsSync(artifact), "promotion writes a repo docs artifact");
assert.ok(
!existsSync(join(externalState, relativeArtifact)),
"promotion must not write into the external .sf symlink target",
);
assert.equal(
runGit(["status", "--short", "--", relativeArtifact], repo),
`?? ${relativeArtifact}`,
"promoted docs artifact is visible to git as repo output",
);
const firstContent = readFileSync(artifact, "utf8");
await handleHarness("promote sf-moocr4rv-au7r3l", ctx as any);
assert.equal(
readFileSync(artifact, "utf8"),
firstContent,
"promotion content is deterministic for the same recorded profile",
);
assert.doesNotMatch(firstContent, /Promoted at:/);
assert.match(
firstContent,
/Unpromoted \.sf runtime observations remain `observed_only`/,
);
assert.match(firstContent, /"ownership": "observed_only"/);
assert.match(
firstContent,
new RegExp(`Repo artifact: \`${relativeArtifact}\``),
);
assert.match(
ctx.notifications.at(-1)?.message ?? "",
/Unpromoted \.sf runtime state remains observed_only/,
);
});
test("harnessProfile_when_recording_runtime_state_reports_no_repo_artifact", async () => {
const repo = makeRepo("sf-harness-profile-");
makeExternalSfState(repo);
mkdirSync(join(repo, "notes"), { recursive: true });
writeFileSync(join(repo, "notes", "scratch.md"), "# Scratch\n", "utf8");
delete process.env.SF_PROJECT_ROOT;
process.chdir(repo);
const ctx = makeMockCtx();
await handleHarness("profile", ctx as any);
const observations = getRepoFileObservations();
const scratch = observations.find((obs) => obs.path === "notes/scratch.md");
assert.equal(scratch?.ownership, "observed_only");
assert.ok(
!existsSync(join(repo, "docs", "exec-plans", "active")),
"profile does not create repo-committable docs output",
);
const notice = ctx.notifications[0]?.message ?? "";
assert.match(notice, /Runtime observation boundary:/);
assert.match(notice, /No repo-committable artifact was written/);
assert.match(notice, /\/sf harness promote <finding-id>/);
assert.doesNotMatch(notice, /tracked documentation artifact created/);
});

View file

@ -0,0 +1,15 @@
import assert from "node:assert/strict";
import { test } from "vitest";
import { classifyError } from "../error-classifier.ts";
test("quota_reset_after_seconds_is_rate_limit_with_retry_delay", () => {
const result = classifyError(
"You have exhausted your capacity on this model. Your quota will reset after 33s.",
);
assert.equal(result.kind, "rate-limit");
if (result.kind === "rate-limit") {
assert.equal(result.retryAfterMs, 33_000);
}
});

View file

@ -0,0 +1,255 @@
import assert from "node:assert/strict";
import {
mkdirSync,
mkdtempSync,
readFileSync,
rmSync,
writeFileSync,
} from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, test } from "vitest";
import { runFlowAudit } from "../doctor.ts";
import { readAllSelfFeedback, recordSelfFeedback } from "../self-feedback.ts";
const roots: string[] = [];
afterEach(() => {
for (const root of roots) rmSync(root, { recursive: true, force: true });
roots.length = 0;
});
function makeForgeProject(): string {
const root = mkdtempSync(join(tmpdir(), "sf-flow-audit-"));
roots.push(root);
mkdirSync(join(root, ".sf"), { recursive: true });
writeFileSync(
join(root, "package.json"),
JSON.stringify({ name: "singularity-forge", version: "0.0.1" }),
"utf-8",
);
return root;
}
function writeM007LoopFixture(root: string, nowMs: number): void {
const sf = join(root, ".sf");
const unitId = "M007/S01/T10";
const startedAt = nowMs - 45 * 60 * 1000;
const lastProgressAt = nowMs - 31 * 60 * 1000;
const sliceDir = join(sf, "milestones", "M007", "slices", "S01");
const tasksDir = join(sliceDir, "tasks");
const unitsDir = join(sf, "runtime", "units");
mkdirSync(tasksDir, { recursive: true });
mkdirSync(unitsDir, { recursive: true });
writeFileSync(
join(sf, "auto.lock"),
JSON.stringify(
{
pid: 5000,
unitType: "execute-task",
unitId,
phase: "dispatched",
startedAt: new Date(startedAt).toISOString(),
sessionId: "sess-m007",
sessionFile: "/tmp/sessions/m007.jsonl",
},
null,
2,
),
"utf-8",
);
writeFileSync(
join(unitsDir, "execute-task-M007-S01-T10.json"),
JSON.stringify(
{
version: 1,
unitType: "execute-task",
unitId,
startedAt,
updatedAt: lastProgressAt,
phase: "dispatched",
wrapupWarningSent: false,
continueHereFired: false,
timeoutAt: null,
lastProgressAt,
progressCount: 0,
lastProgressKind: "dispatch",
runawayGuardPause: {
reason: "Runaway guard paused execute-task M007/S01/T09",
pausedAt: lastProgressAt - 60_000,
unitType: "execute-task",
unitId: "M007/S01/T09",
diagnosticTurns: 2,
warningsSent: 2,
thresholdReasons: ["budget kept growing"],
metrics: {
toolCalls: 90,
sessionTokens: 1_200_000,
elapsedMs: 2_000_000,
changedFiles: 0,
worktreeChangedSinceStart: false,
topTools: { read: 80, bash: 10 },
},
thresholds: {
toolCallWarning: 60,
tokenWarning: 1_000_000,
elapsedMs: 1_200_000,
changedFilesWarning: 75,
minIntervalMs: 120_000,
},
},
},
null,
2,
),
"utf-8",
);
const taskLines: string[] = [];
for (let i = 1; i <= 10; i++) {
const id = `T${String(i).padStart(2, "0")}`;
taskLines.push(
`- [${i < 10 ? "x" : " "}] **${id}: Task ${i}** \`est:10m\``,
);
if (i < 10) {
writeFileSync(
join(tasksDir, `${id}-SUMMARY.md`),
`# ${id} summary\n\nDone.\n`,
"utf-8",
);
}
}
writeFileSync(
join(sliceDir, "S01-PLAN.md"),
`# S01: Loop Evidence\n\n## Tasks\n\n${taskLines.join("\n")}\n`,
"utf-8",
);
writeFileSync(
join(sf, "notifications.jsonl"),
JSON.stringify({
severity: "error",
message: "session creation failed before final summary",
}) + "\n",
"utf-8",
);
for (const task of ["T08", "T09", "T10"]) {
recordSelfFeedback(
{
kind: "runaway-guard-hard-pause",
severity: "medium",
summary: `Runaway guard paused execute-task M007/S01/${task}`,
evidence: `${task} had no final closure`,
source: "detector",
occurredIn: {
milestone: "M007",
slice: "S01",
task,
unitType: "execute-task",
},
},
root,
);
}
}
describe("flow audit", () => {
test("audit_when_m007_loop_evidence_exists_reports_actionable_stale_flow", async () => {
const root = makeForgeProject();
const nowMs = Date.UTC(2026, 4, 2, 13, 45, 0);
writeM007LoopFixture(root, nowMs);
const result = await runFlowAudit(root, {
nowMs,
psOutput:
"5000 1 2700 node dist/loader.js sf headless auto\n" +
"5100 5000 2400 sift search --json --strategy page-index-hybrid warmup\n" +
"5200 5000 120 node dist/loader.js sf tool-session\n",
staleProgressMs: 20 * 60 * 1000,
optionalChildBudgetMs: 30 * 60 * 1000,
});
assert.equal(result.ok, false);
assert.equal(result.activeMilestone?.id, "M007");
assert.equal(result.activeUnit?.unitId, "M007/S01/T10");
assert.equal(result.activeUnit?.progressAgeMs, 31 * 60 * 1000);
assert.equal(result.sessionPointer?.sessionId, "sess-m007");
assert.equal(
result.sessionPointer?.sessionFile,
"/tmp/sessions/m007.jsonl",
);
assert.equal(result.staleDispatchedUnits.length, 1);
assert.match(result.warnings.join("\n"), /no progress for 31 minutes/);
assert.deepEqual(result.loopEvidence?.completedPriorTasks.slice(-2), [
"T08",
"T09",
]);
assert.match(result.loopEvidence?.missingSummaries.join("\n") ?? "", /T10/);
assert.match(result.lastErrors.join("\n"), /session creation failed/);
assert.match(result.runawayHistory.join("\n"), /M007\/S01\/T09/);
assert.match(result.recommendedAction, /Inspect session/);
const warmup = result.childProcesses.find((p) => p.pid === 5100);
assert.ok(warmup, "warmup child should be reported");
assert.equal(warmup.classification, "warmup");
assert.equal(warmup.nonBlocking, true);
assert.equal(warmup.overBudget, true);
assert.equal(warmup.action, "non-blocking");
const active = result.childProcesses.find((p) => p.pid === 5200);
assert.ok(active, "active tool child should be reported");
assert.equal(active.classification, "active-session");
assert.equal(active.nonBlocking, false);
const entries = readAllSelfFeedback(root);
const rollups = entries.filter(
(e) =>
e.kind === "flow-audit:repeated-milestone-failure" && !e.resolvedAt,
);
assert.equal(rollups.length, 1);
assert.equal(rollups[0]?.severity, "high");
assert.match(rollups[0]?.summary ?? "", /M007/);
assert.match(rollups[0]?.acceptanceCriteria ?? "", /stale dispatched unit/);
await runFlowAudit(root, { nowMs, psOutput: "" });
assert.equal(
readAllSelfFeedback(root).filter(
(e) => e.kind === "flow-audit:repeated-milestone-failure",
).length,
1,
"same milestone rollup stays single while open",
);
});
test("audit_when_optional_child_is_over_budget_can_kill_it_explicitly", async () => {
const root = makeForgeProject();
const killed: number[] = [];
const result = await runFlowAudit(root, {
nowMs: Date.UTC(2026, 4, 2, 13, 45, 0),
psOutput:
"5100 5000 2400 sift search --json --strategy page-index-hybrid warmup\n",
optionalChildBudgetMs: 60_000,
killOverBudgetChildren: true,
killProcess: (pid) => {
killed.push(pid);
},
});
assert.deepEqual(killed, [5100]);
assert.equal(result.childProcesses[0]?.classification, "warmup");
assert.equal(result.childProcesses[0]?.action, "kill");
assert.equal(result.childProcesses[0]?.killed, true);
});
test("session_start_when_registered_runs_flow_auditor", () => {
const source = readFileSync(
join(import.meta.dirname, "..", "bootstrap", "register-hooks.ts"),
"utf-8",
);
assert.match(source, /pi\.on\("session_start"/);
assert.match(source, /runFlowAudit/);
assert.match(source, /Flow audit:/);
});
});

View file

@ -0,0 +1,155 @@
import assert from "node:assert/strict";
import { describe, test } from "vitest";
import {
modelRouteKey,
resolveNextAvailableModelRoute,
resolveNextConfiguredModelRoute,
resolveNextModelRoute,
} from "../model-route-failure.ts";
const models = [
{ provider: "google-gemini-cli", id: "gemini-3-flash-preview" },
{ provider: "google", id: "gemini-3-flash-preview" },
{ provider: "anthropic", id: "claude-sonnet-4-6" },
{ provider: "zai", id: "glm-5.1" },
] as any[];
describe("configured model route failure recovery", () => {
test("quota_when_current_route_fails_returns_next_configured_fallback", () => {
const next = resolveNextConfiguredModelRoute({
current: {
provider: "google-gemini-cli",
id: "gemini-3-flash-preview",
},
modelConfig: {
primary: "google-gemini-cli/gemini-3-flash-preview",
fallbacks: ["anthropic/claude-sonnet-4-6", "zai/glm-5.1"],
},
availableModels: models,
failedRoutes: [
{
unitType: "execute-task",
unitId: "M001/S01/T01",
provider: "google-gemini-cli",
modelId: "gemini-3-flash-preview",
reason: "quota reset after 33s",
timestamp: 1,
},
],
});
assert.equal(next?.model.provider, "anthropic");
assert.equal(next?.model.id, "claude-sonnet-4-6");
});
test("current_model_not_in_config_starts_at_configured_primary", () => {
const next = resolveNextConfiguredModelRoute({
current: { provider: "google-gemini-cli", id: "gemini-3-flash-preview" },
modelConfig: {
primary: "anthropic/claude-sonnet-4-6",
fallbacks: ["zai/glm-5.1"],
},
availableModels: models,
failedRoutes: [],
});
assert.equal(next?.model.provider, "anthropic");
assert.equal(next?.model.id, "claude-sonnet-4-6");
});
test("exhausted_chain_returns_undefined", () => {
const next = resolveNextConfiguredModelRoute({
current: { provider: "zai", id: "glm-5.1" },
modelConfig: {
primary: "anthropic/claude-sonnet-4-6",
fallbacks: ["zai/glm-5.1"],
},
availableModels: models,
failedRoutes: [
{
unitType: "execute-task",
unitId: "M001/S01/T01",
provider: "zai",
modelId: "glm-5.1",
reason: "server overloaded",
timestamp: 1,
},
],
});
assert.equal(next, undefined);
});
test("exhausted_configured_chain_uses_available_route_before_pause", () => {
const next = resolveNextModelRoute({
current: { provider: "zai", id: "glm-5.1" },
modelConfig: {
primary: "anthropic/claude-sonnet-4-6",
fallbacks: ["zai/glm-5.1"],
},
availableModels: models,
failedRoutes: [
{
unitType: "execute-task",
unitId: "M001/S01/T01",
provider: "zai",
modelId: "glm-5.1",
reason: "server overloaded",
timestamp: 1,
},
],
});
assert.equal(next?.source, "available");
assert.equal(next?.model.provider, "google-gemini-cli");
assert.equal(next?.model.id, "gemini-3-flash-preview");
});
test("missing_config_uses_available_route_and_prefers_different_provider", () => {
const next = resolveNextAvailableModelRoute({
current: { provider: "google-gemini-cli", id: "gemini-3-flash-preview" },
availableModels: models,
failedRoutes: [
{
unitType: "execute-task",
unitId: "M001/S01/T01",
provider: "google-gemini-cli",
modelId: "gemini-3-flash-preview",
reason: "quota",
timestamp: 1,
},
],
});
assert.equal(next?.source, "available");
assert.notEqual(next?.model.provider, "google-gemini-cli");
assert.notEqual(
modelRouteKey(next!.model),
"google-gemini-cli/gemini-3-flash-preview",
);
});
test("provider_model_identity_skips_only_the_failed_route", () => {
const next = resolveNextConfiguredModelRoute({
current: { provider: "google-gemini-cli", id: "gemini-3-flash-preview" },
modelConfig: {
primary: "google-gemini-cli/gemini-3-flash-preview",
fallbacks: ["google/gemini-3-flash-preview"],
},
availableModels: models,
failedRoutes: [
{
unitType: "execute-task",
unitId: "M001/S01/T01",
provider: "google-gemini-cli",
modelId: "gemini-3-flash-preview",
reason: "quota",
timestamp: 1,
},
],
});
assert.equal(modelRouteKey(next!.model), "google/gemini-3-flash-preview");
});
});

View file

@ -8,8 +8,8 @@
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { dirname, join } from "node:path";
import { test } from 'vitest';
import { fileURLToPath } from "node:url";
import { test } from "vitest";
import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.ts";
import {
classifyError,
@ -388,22 +388,18 @@ test("resumeAutoAfterProviderDelay restarts paused auto-mode from the recorded b
ui: { notify() {} },
newSession: async () => ({ cancelled: false }),
} as any;
const result = await resumeAutoAfterProviderDelay(
{} as any,
commandCtx,
{
getSnapshot: () => ({
active: false,
paused: true,
stepMode: true,
basePath: "/tmp/project",
}),
resetTransientRetryState: () => {},
startAuto: async (_ctx, _pi, base, verboseMode, options) => {
startCalls.push({ base, verboseMode, step: options?.step });
},
const result = await resumeAutoAfterProviderDelay({} as any, commandCtx, {
getSnapshot: () => ({
active: false,
paused: true,
stepMode: true,
basePath: "/tmp/project",
}),
resetTransientRetryState: () => {},
startAuto: async (_ctx, _pi, base, verboseMode, options) => {
startCalls.push({ base, verboseMode, step: options?.step });
},
);
});
assert.equal(result, "resumed");
assert.deepEqual(startCalls, [
@ -545,21 +541,21 @@ test("resumeAutoAfterProviderDelay leaves paused when no command context is avai
]);
});
// ── Escalating backoff for transient errors (#1166) ─────────────────────────
// ── Configured model-route recovery for provider failures ───────────────────
test("agent-end-recovery.ts tracks consecutive transient errors for escalating backoff", () => {
test("agent-end-recovery.ts records failed provider routes for configured fallback", () => {
const src = readFileSync(
join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"),
"utf-8",
);
assert.ok(
src.includes("consecutiveTransientCount"),
"agent-end-recovery.ts must track consecutiveTransientCount for escalating backoff (#1166)",
src.includes("recordCurrentModelFailure"),
"agent-end-recovery.ts must record failed provider/model routes before resolving fallbacks",
);
assert.ok(
src.includes("MAX_TRANSIENT_AUTO_RESUMES"),
"agent-end-recovery.ts must define MAX_TRANSIENT_AUTO_RESUMES to cap infinite retries (#1166)",
src.includes("getCurrentUnitModelFailures"),
"agent-end-recovery.ts must skip routes already failed for the current unit",
);
});
@ -576,34 +572,35 @@ test("agent-end-recovery.ts resets retry state before resolveAgentEnd on success
);
});
test("agent-end-recovery.ts applies escalating delay for repeated transient errors", () => {
test("agent-end-recovery.ts does not sleep or same-route retry model-route failures", () => {
const src = readFileSync(
join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"),
"utf-8",
);
// Must contain the exponential backoff formula (may span multiple lines)
assert.ok(
src.includes("2 ** Math.max(0, retryState.consecutiveTransientCount"),
"agent-end-recovery.ts must escalate retryAfterMs exponentially for consecutive transient errors (#1166)",
!src.includes("pauseTransientWithBackoff"),
"model-route failures must not enter same-model transient backoff",
);
assert.ok(
!src.includes("resumeAutoAfterProviderDelay"),
"model-route failures must not schedule same-model auto-resume",
);
});
test("agent-end-recovery.ts resumes transient provider pauses through startAuto instead of a hidden prompt", () => {
test("agent-end-recovery.ts sends hidden continue after any successful fallback switch", () => {
const src = readFileSync(
join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"),
"utf-8",
);
assert.ok(
src.includes("resumeAutoAfterProviderDelay"),
"agent-end-recovery.ts must resume paused auto-mode through resumeAutoAfterProviderDelay (#2813)",
src.includes('customType: "sf-auto-timeout-recovery"'),
"successful fallback switches should continue the active unit with a hidden message",
);
assert.ok(
!src.includes(
"Continue execution — provider error recovery delay elapsed.",
),
"transient provider resume must not rely on a hidden continue prompt (#2813)",
src.includes("configured fallback") && src.includes("available fallback"),
"hidden continue must be tied to a successful model switch, whether configured or available",
);
});
@ -613,8 +610,9 @@ test("agent-end-recovery.ts does not defer rate-limit errors to core retry handl
"utf-8",
);
assert.ok(
src.includes('if (isTransient(cls) && cls.kind !== "rate-limit")'),
"rate-limit errors must bypass transient core-retry deferral so fallback can execute (#4373)",
src.includes("isModelRouteFailure(cls)") &&
src.includes('cls.kind === "rate-limit"'),
"rate-limit errors must enter model-route recovery before pausing (#4373)",
);
});
@ -624,8 +622,8 @@ test("agent-end-recovery.ts updates dashboard dispatched model after fallback sw
"utf-8",
);
assert.ok(
src.includes("setCurrentDispatchedModelId"),
"agent-end-recovery.ts should update currentDispatchedModelId when recovery switches model",
src.includes("setCurrentUnitModel"),
"agent-end-recovery.ts should update current unit/dashboard model state when recovery switches model",
);
});
@ -704,19 +702,17 @@ test("phases.ts handles timeout session-creation failures with pause instead of
);
});
// ── Fix 3: MAX_TRANSIENT_AUTO_RESUMES raised to 8 ───────────────────────────
// ── Fix 3: same-route transient retry cap removed for route failures ────────
test("MAX_TRANSIENT_AUTO_RESUMES is at least 8 for sustained overload resilience", () => {
test("agent-end-recovery.ts does not keep a same-route transient resume cap", () => {
const src = readFileSync(
join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"),
"utf-8",
);
const match = src.match(/MAX_TRANSIENT_AUTO_RESUMES\s*=\s*(\d+)/);
assert.ok(match, "MAX_TRANSIENT_AUTO_RESUMES must be defined");
const value = Number(match![1]);
assert.ok(
value >= 8,
`MAX_TRANSIENT_AUTO_RESUMES must be >= 8 for sustained overload resilience, got ${value}`,
!src.includes("MAX_TRANSIENT_AUTO_RESUMES"),
"provider route failures should switch explicit routes or pause, not count same-route resumes",
);
});

View file

@ -1,15 +1,16 @@
/**
* rate-limit-model-fallback.test.ts Regression test for #2770.
*
* Rate-limit errors enter the model fallback path before falling through
* to pause. This verifies the structural contract in agent-end-recovery.ts.
* Rate-limit errors enter model-route fallback before pausing.
* Recovery must switch to configured fallbacks first, then any other available
* route before pausing.
*/
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { dirname, join } from "node:path";
import { test } from 'vitest';
import { fileURLToPath } from "node:url";
import { test } from "vitest";
const __dirname = dirname(fileURLToPath(import.meta.url));
const RECOVERY_PATH = join(
@ -28,16 +29,10 @@ function getRecoverySource(): string {
test("rate-limit errors enter the model fallback branch alongside other transient errors", () => {
const src = getRecoverySource();
// The condition that gates model fallback must include rate-limit.
// Match the if-condition that contains both "rate-limit" and fallback-related kinds.
const fallbackConditionRe =
/if\s*\([^)]*cls\.kind\s*===\s*"rate-limit"[^)]*cls\.kind\s*===\s*"network"/;
const fallbackConditionReAlt =
/if\s*\([^)]*cls\.kind\s*===\s*"network"[^)]*cls\.kind\s*===\s*"rate-limit"/;
assert.ok(
fallbackConditionRe.test(src) || fallbackConditionReAlt.test(src),
"rate-limit must appear in the same if-condition as network/server for model fallback (#2770)",
src.includes('cls.kind === "rate-limit"') &&
src.includes("isModelRouteFailure(cls)"),
"rate-limit must enter the configured model-route failure path (#2770)",
);
});
@ -54,23 +49,50 @@ test("rate-limit errors are NOT short-circuited to pause before model fallback",
);
});
test("rate-limit errors fall through to pause if no fallback model is available", () => {
test("model fallback uses configured routes first then automatic available routes", () => {
const src = getRecoverySource();
// After the fallback block, the transient fallback pause must still fire for rate-limit.
// The isTransient check covers rate-limit (verified by error-classifier tests).
// Verify pauseTransientWithBackoff is called with isRateLimit derived from cls.kind.
assert.ok(
src.includes('cls.kind === "rate-limit"'),
'agent-end-recovery.ts must reference cls.kind === "rate-limit" for fallback and pause paths (#2770)',
src.includes("resolveNextModelRoute"),
"agent-end-recovery.ts must route through the configured-or-available route helper",
);
// The transient fallback pause must pass the isRateLimit flag correctly.
const pauseCallRe =
/pauseTransientWithBackoff\([^)]*cls\.kind\s*===\s*"rate-limit"/;
assert.ok(
pauseCallRe.test(src),
'pauseTransientWithBackoff must receive isRateLimit based on cls.kind === "rate-limit" (#2770)',
src.includes("autoBenchmark: true"),
"runtime recovery must allow benchmark-provided fallbacks when preferences do not pin the full chain",
);
assert.ok(
!src.includes("getAutoModeStartModel"),
"runtime recovery must not restore a session/system model as an inferred fallback",
);
});
test("rate-limit errors pause only when no configured_or_available fallback remains", () => {
const src = getRecoverySource();
assert.ok(
src.includes("available fallback"),
"exhausted configured fallback chain should try another available model before pausing",
);
assert.ok(
src.includes("no usable fallback model remains"),
"only complete fallback exhaustion should pause with a clear provider error",
);
assert.ok(
/isTransient:\s*false/.test(src),
"complete provider route exhaustion must not same-route auto-resume",
);
});
test("setModel failure advances to the next configured fallback", () => {
const src = getRecoverySource();
assert.ok(
src.includes('reason: "setModel failed during provider recovery"'),
"failed fallback routes should be recorded so the next configured route can be tried",
);
assert.ok(
/if\s*\(!ok\)\s*\{[\s\S]{0,300}continue;/.test(src),
"setModel failure should continue walking the configured fallback chain",
);
});

View file

@ -4,14 +4,12 @@ import { mkdirSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { test } from "vitest";
import {
getAutoSession,
resetAutoSession,
} from "../auto/session.js";
import { getAutoSession } from "../auto/session.js";
import {
hasResearchTerminalTransition,
markResearchTerminalTransition,
} from "../auto.js";
import { registerHooks } from "../bootstrap/register-hooks.ts";
function makeTmpBase(): string {
const base = join(tmpdir(), `sf-research-terminal-${randomUUID()}`);
@ -96,6 +94,69 @@ test("research terminal transition blocks planning tools", async () => {
}
});
test("post_summary_planning_tool_attempt_is_blocked_without_followup_turn", async () => {
const session = getAutoSession();
session.reset();
session.active = true;
session.currentUnit = {
type: "research-slice",
id: "M001/S01",
startedAt: Date.now(),
};
const sentMessages: unknown[] = [];
const handlers = new Map<string, Array<(event: any, ctx?: any) => any>>();
const pi = {
on(event: string, handler: (event: any, ctx?: any) => any) {
const existing = handlers.get(event) ?? [];
existing.push(handler);
handlers.set(event, existing);
},
sendMessage(message: unknown) {
sentMessages.push(message);
},
} as any;
registerHooks(pi);
const toolResultHandlers = handlers.get("tool_result") ?? [];
const toolCallHandlers = handlers.get("tool_call") ?? [];
assert.ok(toolResultHandlers.length, "tool_result handler should register");
assert.ok(toolCallHandlers.length, "tool_call handler should register");
for (const handler of toolResultHandlers) {
await handler({
toolName: "sf_summary_save",
content: [{ type: "text", text: "Saved RESEARCH" }],
details: {
terminal_transition: true,
unit_type: "research",
},
});
}
assert.equal(hasResearchTerminalTransition(), true);
const planningAttempt = {
toolName: "sf_plan_milestone",
input: {},
};
const results = [];
for (const handler of toolCallHandlers) {
results.push(await handler(planningAttempt));
}
const block = results.find((result) => result?.block === true);
assert.ok(block, "post-summary planning attempt should be blocked");
assert.match(block.reason, /Post-artifact drift/);
assert.match(block.reason, /sf_plan_milestone/);
assert.equal(
sentMessages.length,
0,
"blocking the tool call must not enqueue another agent turn",
);
session.reset();
});
test("research terminal transition does not block non-planning tools", () => {
const session = getAutoSession();
// Reset to known state
@ -113,7 +174,7 @@ test("research terminal transition does not block non-planning tools", () => {
// Non-planning tools should not be blocked by the research terminal transition
// (the actual blocking logic only checks planning tools)
const nonPlanningTools = [
const _nonPlanningTools = [
"read",
"write",
"edit",

View file

@ -1,6 +1,12 @@
import assert from "node:assert/strict";
import { execFileSync } from "node:child_process";
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
import {
mkdirSync,
mkdtempSync,
readFileSync,
rmSync,
writeFileSync,
} from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, it } from "vitest";
@ -90,7 +96,7 @@ describe("self-feedback inline drain", () => {
root,
);
const messages: unknown[] = [];
const messages: Array<{ message: unknown; options: unknown }> = [];
const notifications: string[] = [];
const ctx = {
ui: {
@ -100,18 +106,72 @@ describe("self-feedback inline drain", () => {
},
} as any;
const pi = {
sendMessage(message: unknown, options: unknown) {
messages.push({ message, options });
},
} as any;
assert.equal(dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, pi), 1);
assert.equal(dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, pi), 0);
assert.equal(messages.length, 1);
assert.equal(notifications.length, 2);
assert.match(
JSON.stringify(messages[0]?.message),
/sf-self-feedback-inline-fix/,
);
assert.match(
JSON.stringify(messages[0]?.message),
/sf_self_feedback_resolve/,
);
assert.deepEqual(messages[0]?.options, {
triggerTurn: true,
deliverAs: "followUp",
});
assert.match(notifications[1], /already dispatched/);
});
it("dispatch_failure_expires_claim_so_next_idle_turn_can_retry", async () => {
const root = makeForgeProject();
recordSelfFeedback(
{
kind: "startup-dispatch-race",
severity: "critical",
summary: "Startup dispatch can fail before the turn is accepted",
source: "detector",
},
root,
);
const notifications: string[] = [];
const ctx = {
ui: {
notify(message: string) {
notifications.push(message);
},
},
} as any;
const failingPi = {
sendMessage() {
return Promise.reject(new Error("agent busy"));
},
} as any;
assert.equal(
dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, failingPi),
1,
);
await Promise.resolve();
await Promise.resolve();
const messages: unknown[] = [];
const retryPi = {
sendMessage(message: unknown) {
messages.push(message);
},
} as any;
assert.equal(dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, pi), 1);
assert.equal(dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, pi), 0);
assert.equal(messages.length, 1);
assert.equal(notifications.length, 2);
assert.match(JSON.stringify(messages[0]), /sf-self-feedback-inline-fix/);
assert.match(notifications[1], /already dispatched/);
});
assert.equal(dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, retryPi), 1);
assert.equal(messages.length, 1);
assert.match(notifications.join("\n"), /will retry at the next idle point/);
});
it("consumes the claim after the inline-fix entries are resolved", () => {
const root = makeForgeProject();
@ -162,7 +222,11 @@ describe("self-feedback inline drain", () => {
const ctx = { ui: { notify() {} } } as any;
const pi = { sendMessage() {} } as any;
assert.equal(dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, pi), 1);
writeFileSync(join(root, "dirty.ts"), "export const dirty = true;\n", "utf-8");
writeFileSync(
join(root, "dirty.ts"),
"export const dirty = true;\n",
"utf-8",
);
assert.equal(
markResolved(
recorded.entry.id,
@ -199,4 +263,20 @@ describe("self-feedback inline drain", () => {
);
assert.equal(selected[0]?.repoIdentity, "external");
});
it("session_start_hook_queues_inline_fix_followup_not_only_warning", () => {
const source = readFileSync(
join(import.meta.dirname, "..", "bootstrap", "register-hooks.ts"),
"utf-8",
);
const start = source.indexOf('pi.on("session_start"');
const end = source.indexOf("return buildBeforeAgentStartResult", start);
assert.notEqual(start, -1);
assert.notEqual(end, -1);
const sessionStartBlock = source.slice(start, end);
assert.match(sessionStartBlock, /dispatchSelfFeedbackInlineFixIfNeeded/);
assert.match(sessionStartBlock, /even outside \/sf auto/);
assert.doesNotMatch(sessionStartBlock, /no auto-dispatch/);
});
});

View file

@ -0,0 +1,106 @@
import assert from "node:assert/strict";
import {
mkdirSync,
mkdtempSync,
readFileSync,
rmSync,
writeFileSync,
} from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, test } from "vitest";
import { registerDbTools } from "../bootstrap/db-tools.ts";
import { readAllSelfFeedback, recordSelfFeedback } from "../self-feedback.ts";
const originalCwd = process.cwd();
const originalSfHome = process.env.SF_HOME;
let roots: string[] = [];
afterEach(() => {
process.chdir(originalCwd);
for (const root of roots) rmSync(root, { recursive: true, force: true });
roots = [];
if (originalSfHome === undefined) delete process.env.SF_HOME;
else process.env.SF_HOME = originalSfHome;
});
function makeForgeProject(): string {
const root = mkdtempSync(join(tmpdir(), "sf-self-feedback-resolve-"));
roots.push(root);
mkdirSync(join(root, ".sf"), { recursive: true });
process.env.SF_HOME = join(root, "sf-home");
writeFileSync(
join(root, "package.json"),
JSON.stringify({ name: "singularity-forge", version: "0.0.1" }),
"utf-8",
);
return root;
}
function makeMockPi() {
const tools: any[] = [];
return {
registerTool(tool: any) {
tools.push(tool);
},
tools,
} as any;
}
describe("sf_self_feedback_resolve", () => {
test("resolve_when_entry_is_fixed_sets_resolved_evidence_and_regenerates_markdown", async () => {
const root = makeForgeProject();
const recorded = recordSelfFeedback(
{
kind: "inline-fix-resolution-gap",
severity: "high",
summary: "Inline fix landed but entry stayed unresolved",
acceptanceCriteria: "1. Resolver tool exists. 2. JSONL is updated.",
source: "detector",
},
root,
);
assert.ok(recorded);
process.chdir(root);
const pi = makeMockPi();
registerDbTools(pi);
const tool = pi.tools.find(
(t: any) => t.name === "sf_self_feedback_resolve",
);
assert.ok(tool, "resolver tool should be registered");
const result = await tool.execute(
"call-1",
{
id: recorded.entry.id,
reason: "resolver tool verified",
commit_sha: "abc1234",
test_path:
"src/resources/extensions/sf/tests/self-feedback-resolve-tool.test.ts",
criteria_met: ["Resolver tool exists", "JSONL is updated"],
},
undefined,
undefined,
undefined,
);
assert.equal(result.details?.resolved, true);
const [entry] = readAllSelfFeedback(root).filter(
(e) => e.id === recorded.entry.id,
);
assert.ok(entry?.resolvedAt);
assert.equal(entry.resolvedEvidence?.kind, "agent-fix");
assert.equal(entry.resolvedEvidence?.commitSha, "abc1234");
assert.deepEqual(entry.resolvedCriteriaMet, [
"Resolver tool exists",
"JSONL is updated",
]);
const markdown = readFileSync(
join(root, ".sf", "SELF-FEEDBACK.md"),
"utf-8",
);
assert.match(markdown, /Recently Resolved/);
assert.match(markdown, /inline-fix-resolution-gap/);
});
});

View file

@ -26,6 +26,7 @@ const CANONICAL_DB_TOOLS = [
"sf_summary_save",
"sf_milestone_generate_id",
"sf_self_report",
"sf_self_feedback_resolve",
"sf_plan_milestone",
"sf_plan_slice",
"sf_plan_task",

View file

@ -0,0 +1,401 @@
/**
* Triage protocol registry integration tests.
*
* Purpose: Validate that every finding in the M008 bug registry conforms to
* the triage protocol definitions (severity, status, cluster routing), and
* that the systematic-debugging skill correctly references the protocol.
*
* Consumer: CI gate that blocks milestone completion when registry and
* protocol drift out of sync.
*/
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { dirname, join } from "node:path";
import { describe, test } from "vitest";
import { fileURLToPath } from "node:url";
const __dirname = dirname(fileURLToPath(import.meta.url));
const repoRoot = join(__dirname, "..", "..", "..", "..", "..");
// ─── Load canonical artifacts ────────────────────────────────────────────────
const registryPath = join(repoRoot, ".sf", "milestones", "M008", "bugs", "bug-registry.json");
const protocolPath = join(repoRoot, ".sf", "milestones", "M008", "triage-protocol.md");
const skillPath = join(repoRoot, "src", "resources", "extensions", "sf", "skills", "systematic-debugging", "SKILL.md");
const registry = JSON.parse(readFileSync(registryPath, "utf-8")) as {
schema_version: string;
meta: {
source: string;
date: string;
totalFindings: number;
clusters: string[];
};
findings: Array<{
id: string;
file: string;
lines: string;
category: string;
severity: string;
status: string;
description: string;
suggestedFix: string;
cluster: string;
fixedByTaskId?: string;
}>;
summary: {
severity: Record<string, number>;
status: Record<string, number>;
cluster: Record<string, number>;
};
};
const protocol = readFileSync(protocolPath, "utf-8");
const skill = (() => {
try {
return readFileSync(skillPath, "utf-8");
} catch {
return "";
}
})();
// ─── Severity definitions from protocol ──────────────────────────────────────
const VALID_SEVERITIES = ["HIGH", "MEDIUM", "LOW", "FALSE_POSITIVE"] as const;
const VALID_STATUSES = ["CONFIRMED", "FALSE_POSITIVE", "FIXED", "WONTFIX", "IN_PROGRESS"] as const;
// Cluster routing table from protocol
const PROTOCOL_CLUSTERS = [
"engine + verification",
"scaffold + doctor",
"worktree + git",
"memory + state + cache",
"bootstrap + workflow",
"notification + detection + headless",
] as const;
// ─── Helpers ─────────────────────────────────────────────────────────────────
function assertFinding(
condition: boolean,
findingId: string,
message: string,
): void {
assert.ok(condition, `Finding ${findingId}: ${message}`);
}
// ─── Registry structural validity ────────────────────────────────────────────
describe("triage-protocol-registry", () => {
test("registry_schema_version_is_1_0_0", () => {
assert.strictEqual(registry.schema_version, "1.0.0", "schema_version must be 1.0.0");
});
test("registry_meta_totalFindings_matches_actual_count", () => {
assert.strictEqual(
registry.meta.totalFindings,
registry.findings.length,
`meta.totalFindings (${registry.meta.totalFindings}) must equal actual findings count (${registry.findings.length})`,
);
});
test("registry_meta_clusters_match_protocol_clusters", () => {
const registryClusters = new Set(registry.meta.clusters);
const protocolClusterSet = new Set(PROTOCOL_CLUSTERS);
assert.deepStrictEqual(
registryClusters,
protocolClusterSet,
"registry meta.clusters must exactly match protocol cluster routing table",
);
});
// ─── Per-finding validation ──────────────────────────────────────────────
test("every_finding_has_valid_severity", () => {
for (const f of registry.findings) {
assertFinding(
VALID_SEVERITIES.includes(f.severity as (typeof VALID_SEVERITIES)[number]),
f.id,
`severity "${f.severity}" is not one of ${VALID_SEVERITIES.join(", ")}`,
);
}
});
test("every_finding_has_valid_status", () => {
for (const f of registry.findings) {
assertFinding(
VALID_STATUSES.includes(f.status as (typeof VALID_STATUSES)[number]),
f.id,
`status "${f.status}" is not one of ${VALID_STATUSES.join(", ")}`,
);
}
});
test("every_finding_belongs_to_protocol_cluster", () => {
for (const f of registry.findings) {
assertFinding(
PROTOCOL_CLUSTERS.includes(f.cluster as (typeof PROTOCOL_CLUSTERS)[number]),
f.id,
`cluster "${f.cluster}" is not in the protocol routing table`,
);
}
});
test("every_finding_has_non_empty_id", () => {
for (const f of registry.findings) {
assertFinding(
f.id.length > 0,
f.id,
"finding id must not be empty",
);
}
});
test("every_finding_has_non_empty_description", () => {
for (const f of registry.findings) {
assertFinding(
f.description.length > 0,
f.id,
"description must not be empty",
);
}
});
test("every_finding_has_non_empty_suggestedFix", () => {
for (const f of registry.findings) {
assertFinding(
f.suggestedFix.length > 0,
f.id,
"suggestedFix must not be empty",
);
}
});
// ─── Severity / status consistency rules ─────────────────────────────────
test("severity_FALSE_POSITIVE_implies_status_FALSE_POSITIVE", () => {
for (const f of registry.findings) {
if (f.severity === "FALSE_POSITIVE") {
assertFinding(
f.status === "FALSE_POSITIVE",
f.id,
`severity=FALSE_POSITIVE requires status=FALSE_POSITIVE, got status=${f.status}`,
);
}
}
});
test("status_FALSE_POSITIVE_implies_severity_FALSE_POSITIVE", () => {
for (const f of registry.findings) {
if (f.status === "FALSE_POSITIVE") {
assertFinding(
f.severity === "FALSE_POSITIVE",
f.id,
`status=FALSE_POSITIVE requires severity=FALSE_POSITIVE, got severity=${f.severity}`,
);
}
}
});
test("status_FIXED_implies_fixedByTaskId_present", () => {
for (const f of registry.findings) {
if (f.status === "FIXED") {
assertFinding(
f.fixedByTaskId !== undefined && f.fixedByTaskId.length > 0,
f.id,
`status=FIXED requires fixedByTaskId to be set`,
);
}
}
});
test("fixedByTaskId_present_only_when_status_FIXED", () => {
for (const f of registry.findings) {
if (f.fixedByTaskId !== undefined) {
assertFinding(
f.status === "FIXED",
f.id,
`fixedByTaskId (${f.fixedByTaskId}) should only be present when status=FIXED, got status=${f.status}`,
);
}
}
});
// ─── Summary statistics accuracy ─────────────────────────────────────────
test("summary_severity_counts_match_actual", () => {
const actual: Record<string, number> = {};
for (const f of registry.findings) {
actual[f.severity] = (actual[f.severity] ?? 0) + 1;
}
assert.deepStrictEqual(
registry.summary.severity,
actual,
"summary.severity counts must match actual finding severities",
);
});
test("summary_status_counts_match_actual", () => {
const actual: Record<string, number> = {};
for (const f of registry.findings) {
actual[f.status] = (actual[f.status] ?? 0) + 1;
}
// Compare only keys that exist in either object; zero-count keys in summary are allowed
const allKeys = new Set([...Object.keys(registry.summary.status), ...Object.keys(actual)]);
for (const key of allKeys) {
const expectedCount = registry.summary.status[key] ?? 0;
const actualCount = actual[key] ?? 0;
assert.strictEqual(
actualCount,
expectedCount,
`summary.status["${key}"]: expected ${expectedCount}, got ${actualCount}`,
);
}
});
test("summary_cluster_counts_match_actual", () => {
const actual: Record<string, number> = {};
for (const f of registry.findings) {
actual[f.cluster] = (actual[f.cluster] ?? 0) + 1;
}
assert.deepStrictEqual(
registry.summary.cluster,
actual,
"summary.cluster counts must match actual finding clusters",
);
});
// ─── Protocol content validation ─────────────────────────────────────────
test("protocol_defines_all_severity_levels", () => {
for (const sev of VALID_SEVERITIES) {
assert.ok(
protocol.includes(sev),
`triage-protocol.md must mention severity level ${sev}`,
);
}
});
test("protocol_defines_all_status_values", () => {
for (const st of VALID_STATUSES) {
assert.ok(
protocol.includes(st),
`triage-protocol.md must mention status value ${st}`,
);
}
});
test("protocol_defines_all_clusters_in_routing_table", () => {
for (const cluster of PROTOCOL_CLUSTERS) {
assert.ok(
protocol.includes(cluster),
`triage-protocol.md cluster routing table must include "${cluster}"`,
);
}
});
test("protocol_contains_confidence_gate_table", () => {
assert.ok(
protocol.includes("Confidence Gate Requirements"),
"protocol must contain Confidence Gate Requirements section",
);
assert.ok(
protocol.includes("0.90") || protocol.includes("0.95") || protocol.includes("0.80"),
"protocol must list numeric confidence thresholds",
);
});
test("protocol_contains_escalation_rules", () => {
assert.ok(
protocol.includes("Escalation Rules"),
"protocol must contain Escalation Rules section",
);
});
// ─── Skill references protocol correctly ─────────────────────────────────
test("skill_references_triage_protocol_file", () => {
assert.ok(
skill.includes("triage-protocol.md") || skill.includes("triage protocol"),
"systematic-debugging SKILL.md must reference the triage protocol",
);
});
test("skill_references_bug_registry", () => {
assert.ok(
skill.includes("bug-registry.json"),
"systematic-debugging SKILL.md must reference bug-registry.json",
);
});
test("skill_lists_severity_values", () => {
assert.ok(
(skill.includes('"HIGH"') || skill.includes('`HIGH`')) &&
(skill.includes('"MEDIUM"') || skill.includes('`MEDIUM`')) &&
(skill.includes('"LOW"') || skill.includes('`LOW`')),
"systematic-debugging SKILL.md must list HIGH / MEDIUM / LOW severity values",
);
});
test("skill_mentions_confidence_gate_thresholds", () => {
assert.ok(
skill.includes("0.80") || skill.includes("0.85") || skill.includes("0.90") || skill.includes("0.95"),
"systematic-debugging SKILL.md must mention confidence gate thresholds",
);
});
test("skill_mentions_cluster_aware_fixes", () => {
assert.ok(
skill.includes("cluster-aware") || skill.includes("Cluster-aware"),
"systematic-debugging SKILL.md must mention cluster-aware fixes",
);
});
test("skill_mentions_registry_update_after_fix", () => {
assert.ok(
skill.includes("Update the registry") || skill.includes("update the registry") || skill.includes("bug-registry.json"),
"systematic-debugging SKILL.md must instruct updating registry after fix",
);
});
// ─── Protocol decision flow integrity ────────────────────────────────────
test("protocol_decision_flow_has_all_severity_branches", () => {
// The decision flow should branch on HIGH, MEDIUM, and LOW
assert.ok(
protocol.includes("severity = HIGH") || protocol.includes("Is severity = HIGH"),
"protocol decision flow must branch on HIGH severity",
);
assert.ok(
protocol.includes("severity = MEDIUM") || protocol.includes("Is severity = MEDIUM"),
"protocol decision flow must branch on MEDIUM severity",
);
assert.ok(
protocol.includes("severity = LOW") || protocol.includes("Is severity = LOW"),
"protocol decision flow must branch on LOW severity",
);
});
test("protocol_high_severity_requires_regression_test", () => {
const highSection = protocol.slice(protocol.indexOf("severity = HIGH"));
assert.ok(
highSection.includes("regression test") || protocol.includes("Require regression test"),
"protocol must require regression test for HIGH severity",
);
});
test("protocol_medium_severity_has_confidence_gate_0_85", () => {
assert.ok(
protocol.includes("0.85"),
"protocol must specify 0.85 confidence gate for MEDIUM severity",
);
});
test("protocol_low_severity_has_confidence_gate_0_80", () => {
assert.ok(
protocol.includes("0.80"),
"protocol must specify 0.80 confidence gate for LOW severity",
);
});
});

View file

@ -0,0 +1,264 @@
import assert from "node:assert/strict";
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, test } from "vitest";
import { buildQuerySnapshot } from "../../../../headless-query.ts";
import { resolveDispatch } from "../auto-dispatch.ts";
import {
clearUnitRuntimeRecord,
decideUnitRuntimeDispatch,
readUnitRuntimeRecord,
UNIT_RUNTIME_STATUSES,
UNIT_RUNTIME_TERMINAL_STATUSES,
UNIT_RUNTIME_TRANSITIONS,
writeUnitRuntimeRecord,
} from "../unit-runtime.ts";
const tmpDirs: string[] = [];
function makeTmpBase(prefix = "sf-unit-runtime-fsm-"): string {
const base = mkdtempSync(join(tmpdir(), prefix));
tmpDirs.push(base);
mkdirSync(join(base, ".sf", "milestones"), { recursive: true });
return base;
}
function makeParallelResearchProject(): string {
const base = makeTmpBase("sf-unit-runtime-parallel-");
const milestoneDir = join(base, ".sf", "milestones", "M001");
mkdirSync(milestoneDir, { recursive: true });
writeFileSync(
join(milestoneDir, "M001-ROADMAP.md"),
[
"# M001: Parallel Research Milestone",
"",
"**Vision:** Research-ready slices.",
"",
"## Slices",
"",
"- [ ] **S01: Alpha** `risk:low` `depends:[]`",
"- [ ] **S02: Beta** `risk:low` `depends:[]`",
"",
].join("\n"),
"utf-8",
);
return base;
}
async function resolvePlanningDispatch(base: string) {
return resolveDispatch({
basePath: base,
mid: "M001",
midTitle: "Parallel Research Milestone",
state: {
phase: "planning",
activeMilestone: {
id: "M001",
title: "Parallel Research Milestone",
status: "active",
},
activeSlice: { id: "S01", title: "Alpha" },
activeTask: null,
registry: [],
blockers: [],
} as any,
prefs: undefined,
});
}
afterEach(() => {
for (const dir of tmpDirs) {
rmSync(dir, { recursive: true, force: true });
}
tmpDirs.length = 0;
});
test("unit_runtime_transitions_when_enumerated_cover_all_statuses", () => {
assert.deepEqual(UNIT_RUNTIME_STATUSES, [
"queued",
"claimed",
"running",
"progress",
"completed",
"failed",
"blocked",
"cancelled",
"stale",
"runaway-recovered",
"notified",
]);
assert.deepEqual(UNIT_RUNTIME_TERMINAL_STATUSES, [
"completed",
"failed",
"blocked",
"cancelled",
"stale",
"runaway-recovered",
]);
assert.deepEqual(UNIT_RUNTIME_TRANSITIONS, {
queued: ["claimed", "cancelled"],
claimed: ["running", "stale", "cancelled"],
running: [
"progress",
"completed",
"failed",
"blocked",
"cancelled",
"stale",
"runaway-recovered",
],
progress: [
"running",
"completed",
"failed",
"blocked",
"cancelled",
"stale",
"runaway-recovered",
],
completed: ["notified"],
failed: ["queued", "notified"],
blocked: ["notified"],
cancelled: ["notified"],
stale: ["queued", "notified"],
"runaway-recovered": ["queued", "notified"],
notified: ["queued"],
});
});
test("synthetic_failed_unit_when_not_reset_cannot_redispatch", async () => {
const base = makeParallelResearchProject();
writeUnitRuntimeRecord(
base,
"research-slice",
"M001/parallel-research",
1000,
{
status: "failed",
retryCount: 0,
maxRetries: 2,
},
);
const record = readUnitRuntimeRecord(
base,
"research-slice",
"M001/parallel-research",
);
const decision = decideUnitRuntimeDispatch(record);
assert.equal(decision.action, "block");
assert.equal(decision.reasonCode, "synthetic-reset-required");
assert.equal(decision.retryCount, 0);
assert.equal(decision.maxRetries, 2);
const blockedDispatch = await resolvePlanningDispatch(base);
assert.equal(blockedDispatch.action, "dispatch");
if (blockedDispatch.action === "dispatch") {
assert.equal(blockedDispatch.unitType, "research-slice");
assert.equal(blockedDispatch.unitId, "M001/S01");
}
clearUnitRuntimeRecord(base, "research-slice", "M001/parallel-research");
const resetDecision = decideUnitRuntimeDispatch(
readUnitRuntimeRecord(base, "research-slice", "M001/parallel-research"),
);
assert.equal(resetDecision.action, "dispatch");
assert.equal(resetDecision.reasonCode, "no-runtime-record");
const resetDispatch = await resolvePlanningDispatch(base);
assert.equal(resetDispatch.action, "dispatch");
if (resetDispatch.action === "dispatch") {
assert.equal(resetDispatch.unitType, "research-slice");
assert.equal(resetDispatch.unitId, "M001/parallel-research");
}
});
test("terminal_status_when_budget_available_produces_expected_dispatch_decision", () => {
const base = makeTmpBase();
const cases = [
["completed", "notify", "terminal-ready-to-notify"],
["failed", "retry", "retry-budget-available"],
["blocked", "notify", "terminal-ready-to-notify"],
["cancelled", "notify", "terminal-ready-to-notify"],
["stale", "retry", "retry-budget-available"],
["runaway-recovered", "retry", "retry-budget-available"],
] as const;
for (const [status, expectedAction, expectedReason] of cases) {
writeUnitRuntimeRecord(base, "execute-task", `M001/S01/${status}`, 1000, {
status,
retryCount: 0,
maxRetries: 2,
});
const record = readUnitRuntimeRecord(
base,
"execute-task",
`M001/S01/${status}`,
);
const decision = decideUnitRuntimeDispatch(record);
assert.equal(decision.action, expectedAction, status);
assert.equal(decision.reasonCode, expectedReason, status);
}
});
test("retryable_terminal_status_when_budget_exhausted_blocks_dispatch", () => {
const base = makeTmpBase();
for (const status of ["failed", "stale", "runaway-recovered"] as const) {
writeUnitRuntimeRecord(base, "execute-task", `M001/S01/${status}`, 1000, {
status,
retryCount: 2,
maxRetries: 2,
});
const decision = decideUnitRuntimeDispatch(
readUnitRuntimeRecord(base, "execute-task", `M001/S01/${status}`),
);
assert.equal(decision.action, "block", status);
assert.equal(decision.reasonCode, "retry-budget-exhausted", status);
assert.equal(decision.retryCount, 2, status);
assert.equal(decision.maxRetries, 2, status);
}
});
test("terminal_status_when_already_notified_skips_dispatch", () => {
const base = makeTmpBase();
writeUnitRuntimeRecord(base, "execute-task", "M001/S01/T01", 1000, {
status: "failed",
retryCount: 0,
maxRetries: 2,
notifiedAt: 2000,
});
const decision = decideUnitRuntimeDispatch(
readUnitRuntimeRecord(base, "execute-task", "M001/S01/T01"),
);
assert.equal(decision.action, "skip");
assert.equal(decision.reasonCode, "already-notified");
});
test("headless_query_when_runtime_record_exists_shows_retry_budget", async () => {
const base = makeTmpBase();
writeUnitRuntimeRecord(base, "execute-task", "M001/S01/T01", 1000, {
status: "failed",
retryCount: 1,
maxRetries: 2,
watchdogReason: "no heartbeat",
outputPath: ".sf/runtime/units/M001-S01-T01.log",
});
const snapshot = await buildQuerySnapshot(base);
const unit = snapshot.runtime.units.find(
(item) =>
item.unitType === "execute-task" && item.unitId === "M001/S01/T01",
);
assert.ok(unit);
assert.equal(unit.status, "failed");
assert.equal(unit.retryCount, 1);
assert.equal(unit.maxRetries, 2);
assert.equal(unit.retryBudgetRemaining, 1);
assert.equal(unit.dispatchDecision.action, "retry");
assert.equal(unit.dispatchDecision.reasonCode, "retry-budget-available");
assert.equal(unit.watchdogReason, "no heartbeat");
assert.equal(unit.outputPath, ".sf/runtime/units/M001-S01-T01.log");
});

View file

@ -22,7 +22,126 @@ import {
} from "./paths.js";
import { parseUnitId } from "./unit-id.js";
/**
* Lists every durable unit runtime status in FSM order.
*
* Purpose: give dispatch, recovery, and query surfaces one canonical state
* vocabulary so terminal units cannot be redispatched by ambiguous legacy phases.
*
* Consumer: auto runtime persistence, unit-runtime tests, headless query summaries.
*/
export const UNIT_RUNTIME_STATUSES = [
"queued",
"claimed",
"running",
"progress",
"completed",
"failed",
"blocked",
"cancelled",
"stale",
"runaway-recovered",
"notified",
] as const;
/**
* Names the unit statuses that end an execution attempt.
*
* Purpose: centralize the terminal-state union so retry and notification policy
* does not drift between watchdog recovery and dispatch preview logic.
*
* Consumer: decideUnitRuntimeDispatch and operator-facing query summaries.
*/
export const UNIT_RUNTIME_TERMINAL_STATUSES = [
"completed",
"failed",
"blocked",
"cancelled",
"stale",
"runaway-recovered",
] as const;
/**
* Describes the explicit unit runtime finite-state-machine transitions.
*
* Purpose: make retry, notification, and reset transitions reviewable as data
* instead of implied by ad hoc marker files or legacy phase strings.
*
* Consumer: unit runtime tests, future dispatch/reconciler guards.
*/
export const UNIT_RUNTIME_TRANSITIONS = {
queued: ["claimed", "cancelled"],
claimed: ["running", "stale", "cancelled"],
running: [
"progress",
"completed",
"failed",
"blocked",
"cancelled",
"stale",
"runaway-recovered",
],
progress: [
"running",
"completed",
"failed",
"blocked",
"cancelled",
"stale",
"runaway-recovered",
],
completed: ["notified"],
failed: ["queued", "notified"],
blocked: ["notified"],
cancelled: ["notified"],
stale: ["queued", "notified"],
"runaway-recovered": ["queued", "notified"],
notified: ["queued"],
} as const satisfies Record<UnitRuntimeStatus, readonly UnitRuntimeStatus[]>;
/**
* Enumerates every durable unit runtime status.
*
* Purpose: let persistence and dispatch decisions share one exhaustive status
* type while legacy `phase` remains available for older call sites.
*
* Consumer: AutoUnitRuntimeRecord.status, retry decisions, query summaries.
*/
export type UnitRuntimeStatus = (typeof UNIT_RUNTIME_STATUSES)[number];
/**
* Enumerates statuses that end a unit execution attempt.
*
* Purpose: distinguish states that need notify/retry/block policy from active
* states that should not start a second copy of the same unit.
*
* Consumer: decideUnitRuntimeDispatch.
*/
export type UnitRuntimeTerminalStatus =
(typeof UNIT_RUNTIME_TERMINAL_STATUSES)[number];
/**
* Captures the durable FSM state embedded in a unit runtime record.
*
* Purpose: expose retry budget, liveness, and notification fields together so
* callers can decide whether a unit should run, retry, block, or notify.
*
* Consumer: writeUnitRuntimeRecord, decideUnitRuntimeDispatch, headless query.
*/
export interface UnitRuntimeState {
status: UnitRuntimeStatus;
retryCount: number;
maxRetries: number;
lastHeartbeatAt: number | null;
lastProgressAt: number;
lastOutputAt: number | null;
outputPath: string | null;
watchdogReason: string | null;
notifiedAt: number | null;
}
export type UnitRuntimePhase =
| UnitRuntimeStatus
| "dispatched"
| "wrapup-warning-sent"
| "runaway-warning-sent"
@ -33,6 +152,14 @@ export type UnitRuntimePhase =
| "paused"
| "skipped";
const DEFAULT_UNIT_RUNTIME_MAX_RETRIES = 1;
const RETRYABLE_TERMINAL_STATUSES = new Set<UnitRuntimeStatus>([
"failed",
"stale",
"runaway-recovered",
]);
export interface ExecuteTaskRecoveryStatus {
planPath: string;
summaryPath: string;
@ -50,18 +177,263 @@ export interface AutoUnitRuntimeRecord {
startedAt: number;
updatedAt: number;
phase: UnitRuntimePhase;
status: UnitRuntimeStatus;
wrapupWarningSent: boolean;
continueHereFired: boolean;
timeoutAt: number | null;
lastHeartbeatAt?: number | null;
lastProgressAt: number;
progressCount: number;
lastProgressKind: string;
lastOutputAt?: number | null;
outputPath?: string | null;
watchdogReason?: string | null;
notifiedAt?: number | null;
recovery?: ExecuteTaskRecoveryStatus;
recoveryAttempts?: number;
retryCount?: number;
maxRetries?: number;
lastRecoveryReason?: "idle" | "hard";
runawayGuardPause?: RunawayGuardPauseMetadata;
}
/**
* Describes whether dispatch may run a unit from its runtime record.
*
* Purpose: surface the same retry-budget decision to tests, dispatch preview,
* and operator diagnostics without reinterpreting terminal states ad hoc.
*
* Consumer: unit-runtime FSM tests and headless query runtime summaries.
*/
export type UnitRuntimeDispatchDecision =
| {
action: "dispatch";
reasonCode: "no-runtime-record" | "queued";
retryCount: number;
maxRetries: number;
retryBudgetRemaining: number;
}
| {
action: "retry";
reasonCode: "retry-budget-available";
retryCount: number;
maxRetries: number;
retryBudgetRemaining: number;
}
| {
action: "notify";
reasonCode: "terminal-ready-to-notify";
retryCount: number;
maxRetries: number;
retryBudgetRemaining: number;
}
| {
action: "block";
reasonCode: "retry-budget-exhausted" | "synthetic-reset-required";
retryCount: number;
maxRetries: number;
retryBudgetRemaining: number;
}
| {
action: "skip";
reasonCode:
| "already-notified"
| "active-or-claimed"
| "notified"
| "terminal-nonretryable";
retryCount: number;
maxRetries: number;
retryBudgetRemaining: number;
};
function hasUpdate<K extends keyof AutoUnitRuntimeRecord>(
updates: Partial<AutoUnitRuntimeRecord>,
key: K,
): boolean {
return Object.hasOwn(updates, key);
}
function phaseForStatus(status: UnitRuntimeStatus): UnitRuntimePhase {
switch (status) {
case "queued":
case "claimed":
case "running":
return "dispatched";
case "progress":
return "wrapup-warning-sent";
case "completed":
return "finalized";
default:
return status;
}
}
function inferStatusFromPhase(
phase: UnitRuntimePhase,
record?: Pick<AutoUnitRuntimeRecord, "runawayGuardPause"> | null,
): UnitRuntimeStatus {
if ((UNIT_RUNTIME_STATUSES as readonly string[]).includes(phase)) {
return phase as UnitRuntimeStatus;
}
switch (phase) {
case "dispatched":
return "running";
case "wrapup-warning-sent":
case "runaway-warning-sent":
case "runaway-final-warning-sent":
case "recovered":
return "progress";
case "timeout":
return "stale";
case "finalized":
return "completed";
case "paused":
return record?.runawayGuardPause ? "runaway-recovered" : "blocked";
case "skipped":
return "blocked";
default:
return "running";
}
}
function retryBudgetRemaining(retryCount: number, maxRetries: number): number {
return Math.max(0, maxRetries - retryCount);
}
/**
* Returns true when a runtime status is terminal for one execution attempt.
*
* Purpose: keep terminal-state checks exhaustive against the exported terminal
* union rather than hard-coded differently at each caller.
*
* Consumer: decideUnitRuntimeDispatch and query summary generation.
*/
export function isTerminalUnitRuntimeStatus(
status: UnitRuntimeStatus,
): status is UnitRuntimeTerminalStatus {
return (UNIT_RUNTIME_TERMINAL_STATUSES as readonly string[]).includes(status);
}
/**
* Returns the normalized FSM state embedded in a runtime record.
*
* Purpose: let legacy records with only `phase` still participate in retry and
* query policy while new records persist explicit FSM fields.
*
* Consumer: decideUnitRuntimeDispatch and headless query summaries.
*/
export function getUnitRuntimeState(
record: AutoUnitRuntimeRecord,
): UnitRuntimeState {
const status = record.status ?? inferStatusFromPhase(record.phase, record);
const retryCount = record.retryCount ?? record.recoveryAttempts ?? 0;
const maxRetries = record.maxRetries ?? DEFAULT_UNIT_RUNTIME_MAX_RETRIES;
return {
status,
retryCount,
maxRetries,
lastHeartbeatAt: record.lastHeartbeatAt ?? null,
lastProgressAt: record.lastProgressAt,
lastOutputAt: record.lastOutputAt ?? null,
outputPath: record.outputPath ?? null,
watchdogReason: record.watchdogReason ?? null,
notifiedAt: record.notifiedAt ?? null,
};
}
/**
* Returns true for synthetic units that must be reset before rerun.
*
* Purpose: prevent synthetic orchestration units such as parallel research from
* looping after failure while preserving normal task retry behavior.
*
* Consumer: decideUnitRuntimeDispatch.
*/
export function isSyntheticUnitRuntime(record: AutoUnitRuntimeRecord): boolean {
return (
record.unitType === "synthetic" ||
record.unitId.includes("parallel-research")
);
}
/**
* Decides whether a unit runtime record permits dispatch, retry, notify, or block.
*
* Purpose: enforce retry budgets and explicit reset requirements before callers
* schedule another copy of a failed or stale unit.
*
* Consumer: unit-runtime FSM tests and headless query runtime summaries.
*/
export function decideUnitRuntimeDispatch(
record: AutoUnitRuntimeRecord | null,
options: { synthetic?: boolean } = {},
): UnitRuntimeDispatchDecision {
if (!record) {
return {
action: "dispatch",
reasonCode: "no-runtime-record",
retryCount: 0,
maxRetries: DEFAULT_UNIT_RUNTIME_MAX_RETRIES,
retryBudgetRemaining: DEFAULT_UNIT_RUNTIME_MAX_RETRIES,
};
}
const state = getUnitRuntimeState(record);
const remaining = retryBudgetRemaining(state.retryCount, state.maxRetries);
const common = {
retryCount: state.retryCount,
maxRetries: state.maxRetries,
retryBudgetRemaining: remaining,
};
if (state.notifiedAt !== null) {
return { action: "skip", reasonCode: "already-notified", ...common };
}
if (state.status === "notified") {
return { action: "skip", reasonCode: "notified", ...common };
}
if (state.status === "queued") {
return { action: "dispatch", reasonCode: "queued", ...common };
}
if (!isTerminalUnitRuntimeStatus(state.status)) {
return { action: "skip", reasonCode: "active-or-claimed", ...common };
}
const synthetic = options.synthetic ?? isSyntheticUnitRuntime(record);
if (synthetic && state.status !== "completed") {
return {
action: "block",
reasonCode: "synthetic-reset-required",
...common,
};
}
if (RETRYABLE_TERMINAL_STATUSES.has(state.status)) {
if (remaining > 0) {
return {
action: "retry",
reasonCode: "retry-budget-available",
...common,
};
}
return { action: "block", reasonCode: "retry-budget-exhausted", ...common };
}
if (
state.status === "completed" ||
state.status === "blocked" ||
state.status === "cancelled"
) {
return {
action: "notify",
reasonCode: "terminal-ready-to-notify",
...common,
};
}
return { action: "skip", reasonCode: "terminal-nonretryable", ...common };
}
function runtimeDir(basePath: string): string {
return join(sfRoot(basePath), "runtime", "units");
}
@ -105,25 +477,68 @@ export function writeUnitRuntimeRecord(
mkdirSync(dir, { recursive: true });
const path = runtimePath(basePath, unitType, unitId);
const prev = _runtimeCache.get(path) ?? null;
const phase =
updates.phase ??
(updates.status ? phaseForStatus(updates.status) : prev?.phase) ??
"dispatched";
const status =
updates.status ??
(updates.phase || !prev?.status
? inferStatusFromPhase(phase, {
runawayGuardPause:
updates.runawayGuardPause ?? prev?.runawayGuardPause,
})
: prev.status);
const recoveryAttempts = hasUpdate(updates, "recoveryAttempts")
? (updates.recoveryAttempts ?? 0)
: (prev?.recoveryAttempts ?? 0);
const retryCount = hasUpdate(updates, "retryCount")
? (updates.retryCount ?? 0)
: hasUpdate(updates, "recoveryAttempts")
? (updates.recoveryAttempts ?? 0)
: (prev?.retryCount ?? recoveryAttempts ?? 0);
const next: AutoUnitRuntimeRecord = {
version: 1,
unitType,
unitId,
startedAt,
updatedAt: Date.now(),
phase: updates.phase ?? prev?.phase ?? "dispatched",
phase,
status,
wrapupWarningSent:
updates.wrapupWarningSent ?? prev?.wrapupWarningSent ?? false,
continueHereFired:
updates.continueHereFired ?? prev?.continueHereFired ?? false,
timeoutAt: updates.timeoutAt ?? prev?.timeoutAt ?? null,
timeoutAt: hasUpdate(updates, "timeoutAt")
? (updates.timeoutAt ?? null)
: (prev?.timeoutAt ?? null),
lastHeartbeatAt: hasUpdate(updates, "lastHeartbeatAt")
? (updates.lastHeartbeatAt ?? null)
: (prev?.lastHeartbeatAt ?? startedAt),
lastProgressAt:
updates.lastProgressAt ?? prev?.lastProgressAt ?? Date.now(),
progressCount: updates.progressCount ?? prev?.progressCount ?? 0,
lastProgressKind:
updates.lastProgressKind ?? prev?.lastProgressKind ?? "dispatch",
lastOutputAt: hasUpdate(updates, "lastOutputAt")
? (updates.lastOutputAt ?? null)
: (prev?.lastOutputAt ?? null),
outputPath: hasUpdate(updates, "outputPath")
? (updates.outputPath ?? null)
: (prev?.outputPath ?? null),
watchdogReason: hasUpdate(updates, "watchdogReason")
? (updates.watchdogReason ?? null)
: (prev?.watchdogReason ?? null),
notifiedAt: hasUpdate(updates, "notifiedAt")
? (updates.notifiedAt ?? null)
: (prev?.notifiedAt ?? null),
recovery: updates.recovery ?? prev?.recovery,
recoveryAttempts: updates.recoveryAttempts ?? prev?.recoveryAttempts ?? 0,
recoveryAttempts,
retryCount,
maxRetries:
updates.maxRetries ??
prev?.maxRetries ??
DEFAULT_UNIT_RUNTIME_MAX_RETRIES,
lastRecoveryReason: updates.lastRecoveryReason ?? prev?.lastRecoveryReason,
runawayGuardPause: updates.runawayGuardPause ?? prev?.runawayGuardPause,
};

View file

@ -8,7 +8,7 @@ import {
} from "node:fs";
import { tmpdir } from "node:os";
import { join, resolve } from "node:path";
import { test, afterEach } from 'vitest';
import { afterEach, test } from "vitest";
const projectRoot = process.cwd();
@ -954,3 +954,42 @@ test("reapOrphanedNextServerProcesses returns zero reaped on non-Linux platforms
test("reapOrphanedNextServerProcesses is exported and callable", () => {
assert.equal(typeof webMode.reapOrphanedNextServerProcesses, "function");
});
test("reapOrphanedNextServerProcesses kills orphaned standalone next-server", () => {
const killed: Array<{ pid: number; signal: string }> = [];
const stderrChunks: string[] = [];
const packageRoot = "/tmp/sf-package";
const result = webMode.reapOrphanedNextServerProcesses(
{
write: (chunk: string) => {
stderrChunks.push(chunk);
return true;
},
},
packageRoot,
{
platform: "linux",
execSync: (() =>
[
"123 1 node /tmp/sf-package/dist/web/standalone/node_modules/next/dist/server/next-server.js node",
"124 999 node /tmp/sf-package/dist/web/standalone/node_modules/next/dist/server/next-server.js node",
"125 1 node /elsewhere/next-server.js node",
].join("\n")) as any,
readlinkSync: ((path: string) => {
if (path === "/proc/123/cwd")
return "/tmp/sf-package/dist/web/standalone";
if (path === "/proc/124/cwd")
return "/tmp/sf-package/dist/web/standalone";
return "/elsewhere";
}) as any,
kill: ((pid: number, signal: string) => {
killed.push({ pid, signal });
return true;
}) as any,
},
);
assert.equal(result.reaped, 1);
assert.deepEqual(killed, [{ pid: 123, signal: "SIGTERM" }]);
assert.match(stderrChunks.join(""), /Reaped orphaned next-server/);
});

View file

@ -6,7 +6,13 @@ import {
spawn,
} from "node:child_process";
import { randomBytes } from "node:crypto";
import { existsSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
import {
existsSync,
readFileSync,
readlinkSync,
unlinkSync,
writeFileSync,
} from "node:fs";
import { request as httpRequest } from "node:http";
import { createServer } from "node:net";
import { join, resolve } from "node:path";
@ -16,10 +22,7 @@ import {
} from "./app-paths.js";
const DEFAULT_HOST = "127.0.0.1";
const DEFAULT_PACKAGE_ROOT = resolve(
import.meta.dirname,
"..",
);
const DEFAULT_PACKAGE_ROOT = resolve(import.meta.dirname, "..");
/** Open a URL in the user's default browser. */
function openBrowser(url: string): void {
@ -685,10 +688,17 @@ function cleanupStaleInstance(
export function reapOrphanedNextServerProcesses(
stderr: WritableLike,
packageRoot = DEFAULT_PACKAGE_ROOT,
deps: {
execSync?: typeof execSync;
readlinkSync?: typeof readlinkSync;
kill?: typeof process.kill;
platform?: NodeJS.Platform;
} = {},
): { reaped: number; errors: string[] } {
const errors: string[] = [];
let reaped = 0;
if (process.platform === "win32") {
const platform = deps.platform ?? process.platform;
if (platform === "win32") {
// Windows orphan detection not implemented; rely on port-kill fallback
return { reaped: 0, errors: [] };
}
@ -696,10 +706,10 @@ export function reapOrphanedNextServerProcesses(
// Find next-server processes with cwd matching our standalone host path
const standalonePath = resolve(packageRoot, "dist", "web", "standalone");
// Use ps to find node processes with next-server in their command line
const psOutput = execSync(
const psOutput = (deps.execSync ?? execSync)(
"ps -eo pid,ppid,cmd,comm --no-headers",
{ encoding: "utf8", timeout: 5000 },
);
) as string;
const lines = psOutput.split("\n").filter((line) => line.trim());
for (const line of lines) {
const parts = line.trim().split(/\s+/);
@ -715,7 +725,7 @@ export function reapOrphanedNextServerProcesses(
// Check if the process cwd matches our standalone path (or deleted variant)
let cwd: string | null = null;
try {
cwd = readFileSync(`/proc/${pid}/cwd`, "utf8").trim();
cwd = (deps.readlinkSync ?? readlinkSync)(`/proc/${pid}/cwd`);
} catch {
// Process may have exited between ps and readlink
continue;
@ -728,7 +738,7 @@ export function reapOrphanedNextServerProcesses(
const isOrphan = ppid === 1;
if (isOrphan) {
try {
process.kill(pid, "SIGTERM");
(deps.kill ?? process.kill)(pid, "SIGTERM");
reaped++;
stderr.write(
`[forge] Reaped orphaned next-server (pid=${pid}, cwd=${cwd})\n`,