fix(sf): recover model routes and self-feedback
This commit is contained in:
parent
c308a492d7
commit
dd126ddc8b
37 changed files with 4295 additions and 563 deletions
|
|
@ -27,6 +27,7 @@ class MockRpcClient {
|
|||
stopped = false;
|
||||
aborted = false;
|
||||
prompted: string[] = [];
|
||||
switchedSessions: string[] = [];
|
||||
private eventListeners: Array<(event: Record<string, unknown>) => void> = [];
|
||||
uiResponses: Array<{ requestId: string; response: Record<string, unknown> }> = [];
|
||||
|
||||
|
|
@ -69,6 +70,16 @@ class MockRpcClient {
|
|||
|
||||
async prompt(message: string): Promise<void> {
|
||||
this.prompted.push(message);
|
||||
if (message === '/sf pause') {
|
||||
queueMicrotask(() => {
|
||||
this.emitEvent({
|
||||
type: 'extension_ui_request',
|
||||
id: 'pause-notice',
|
||||
method: 'notify',
|
||||
message: 'Auto-mode paused: daemon reload requested',
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async abort(): Promise<void> {
|
||||
|
|
@ -79,6 +90,18 @@ class MockRpcClient {
|
|||
this.uiResponses.push({ requestId, response });
|
||||
}
|
||||
|
||||
async getState(): Promise<{ sessionFile: string; sessionId: string }> {
|
||||
return {
|
||||
sessionFile: `/tmp/${this.initSessionId}.jsonl`,
|
||||
sessionId: this.initSessionId,
|
||||
};
|
||||
}
|
||||
|
||||
async switchSession(sessionPath: string): Promise<{ cancelled: boolean }> {
|
||||
this.switchedSessions.push(sessionPath);
|
||||
return { cancelled: false };
|
||||
}
|
||||
|
||||
/** Test helper — emit an event to all listeners */
|
||||
emitEvent(event: Record<string, unknown>): void {
|
||||
for (const listener of this.eventListeners) {
|
||||
|
|
@ -98,6 +121,15 @@ class TestableSessionManager extends SessionManager {
|
|||
nextInitError: Error | null = null;
|
||||
nextStartError: Error | null = null;
|
||||
|
||||
protected override createRpcClient(_cliPath: string, cwd: string, args: string[]): any {
|
||||
this.sessionCounter++;
|
||||
const client = new MockRpcClient({ cwd, args });
|
||||
client.initSessionId = `mock-session-${String(this.sessionCounter).padStart(3, '0')}`;
|
||||
this.lastClient = client;
|
||||
this.allClients.push(client);
|
||||
return client;
|
||||
}
|
||||
|
||||
override async startSession(options: { projectDir: string; command?: string; model?: string; bare?: boolean; cliPath?: string }): Promise<string> {
|
||||
const { projectDir } = options;
|
||||
|
||||
|
|
@ -116,7 +148,7 @@ class TestableSessionManager extends SessionManager {
|
|||
);
|
||||
}
|
||||
|
||||
const client = new MockRpcClient({ cwd: resolvedDir, args: [] });
|
||||
const client = this.createRpcClient('mock-sf', resolvedDir, []);
|
||||
if (this.nextStartError) {
|
||||
client.startError = this.nextStartError;
|
||||
this.nextStartError = null;
|
||||
|
|
@ -126,22 +158,19 @@ class TestableSessionManager extends SessionManager {
|
|||
this.nextInitError = null;
|
||||
}
|
||||
|
||||
this.sessionCounter++;
|
||||
client.initSessionId = `mock-session-${String(this.sessionCounter).padStart(3, '0')}`;
|
||||
this.lastClient = client;
|
||||
this.allClients.push(client);
|
||||
|
||||
// Build session shell
|
||||
const session: ManagedSession = {
|
||||
sessionId: '',
|
||||
projectDir: resolvedDir,
|
||||
projectName,
|
||||
status: 'starting',
|
||||
reloadState: 'running',
|
||||
client: client as any, // duck-typed mock
|
||||
events: [],
|
||||
pendingBlocker: null,
|
||||
cost: { totalCost: 0, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } },
|
||||
startTime: Date.now(),
|
||||
startOptions: { ...options, projectDir: resolvedDir },
|
||||
};
|
||||
|
||||
// Insert into internal sessions map
|
||||
|
|
@ -300,6 +329,38 @@ describe('SessionManager', () => {
|
|||
assert.equal(completedLogs.length, 1);
|
||||
});
|
||||
|
||||
it('runtime epoch mismatch restarts child and resumes prior session file', async () => {
|
||||
const { manager } = createManager();
|
||||
|
||||
const sessionId = await manager.startSession({ projectDir: '/tmp/reload-project' });
|
||||
const originalClient = manager.lastClient!;
|
||||
const restarted = new Promise<void>((resolve) => {
|
||||
manager.once('session:restarted', () => resolve());
|
||||
});
|
||||
|
||||
originalClient.emitEvent({
|
||||
type: 'runtime_heartbeat',
|
||||
sessionId,
|
||||
sessionFile: '/tmp/reload-session.jsonl',
|
||||
unitType: 'execute-task',
|
||||
unitId: 'M001/S01/T01',
|
||||
runtimeEpoch: 100,
|
||||
sourceEpoch: 200,
|
||||
emittedAt: Date.now(),
|
||||
});
|
||||
|
||||
await restarted;
|
||||
|
||||
const session = manager.getSession('mock-session-002')!;
|
||||
assert.ok(session);
|
||||
assert.equal(originalClient.stopped, true);
|
||||
assert.equal(manager.allClients.length, 2);
|
||||
const replacement = manager.allClients[1];
|
||||
assert.deepEqual(replacement.switchedSessions, ['/tmp/mock-session-001.jsonl']);
|
||||
assert.deepEqual(replacement.prompted, ['/sf autonomous']);
|
||||
assert.equal(session.reloadState, 'running');
|
||||
});
|
||||
|
||||
// ---- Lifecycle: start → running → blocked → resolve → running → completed ----
|
||||
|
||||
it('start → blocked → resolve → running → completed lifecycle', async () => {
|
||||
|
|
@ -723,8 +784,10 @@ describe('SessionManager', () => {
|
|||
|
||||
assert.equal(result.sessionId, sessionId);
|
||||
assert.equal(result.status, 'running');
|
||||
assert.equal(result.reloadState, 'running');
|
||||
assert.equal(result.projectName, 'result-test');
|
||||
assert.equal(result.error, null);
|
||||
assert.equal(result.lastHeartbeat, null);
|
||||
assert.equal(result.pendingBlocker, null);
|
||||
assert.ok(typeof result.durationMs === 'number');
|
||||
assert.ok(result.cost);
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ import type {
|
|||
ManagedSession,
|
||||
StartSessionOptions,
|
||||
PendingBlocker,
|
||||
RuntimeHeartbeat,
|
||||
} from './types.js';
|
||||
import { MAX_EVENTS, INIT_TIMEOUT_MS } from './types.js';
|
||||
import type { Logger } from './logger.js';
|
||||
|
|
@ -34,7 +35,8 @@ const FIRE_AND_FORGET_METHODS = new Set([
|
|||
'notify', 'setStatus', 'setWidget', 'setTitle', 'set_editor_text',
|
||||
]);
|
||||
|
||||
const TERMINAL_PREFIXES = ['auto-mode stopped', 'step-mode stopped'];
|
||||
const TERMINAL_PREFIXES = ['auto-mode stopped', 'auto-mode paused', 'step-mode stopped'];
|
||||
const RELOAD_PAUSE_TIMEOUT_MS = 5_000;
|
||||
|
||||
function isTerminalNotification(event: Record<string, unknown>): boolean {
|
||||
if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false;
|
||||
|
|
@ -45,7 +47,7 @@ function isTerminalNotification(event: Record<string, unknown>): boolean {
|
|||
function isBlockedNotification(event: Record<string, unknown>): boolean {
|
||||
if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false;
|
||||
const message = String(event.message ?? '').toLowerCase();
|
||||
return message.includes('blocked:');
|
||||
return message.includes('blocked:') || message.startsWith('auto-mode paused');
|
||||
}
|
||||
|
||||
function isBlockingUIRequest(event: Record<string, unknown>): boolean {
|
||||
|
|
@ -96,11 +98,7 @@ export class SessionManager extends EventEmitter {
|
|||
if (options.model) args.push('--model', options.model);
|
||||
if (options.bare) args.push('--bare');
|
||||
|
||||
const client = new RpcClient({
|
||||
cliPath,
|
||||
cwd: resolvedDir,
|
||||
args,
|
||||
});
|
||||
const client = this.createRpcClient(cliPath, resolvedDir, args);
|
||||
|
||||
// Build the session shell before async operations so we can track state
|
||||
const session: ManagedSession = {
|
||||
|
|
@ -108,11 +106,13 @@ export class SessionManager extends EventEmitter {
|
|||
projectDir: resolvedDir,
|
||||
projectName,
|
||||
status: 'starting',
|
||||
reloadState: 'running',
|
||||
client,
|
||||
events: [],
|
||||
pendingBlocker: null,
|
||||
cost: { totalCost: 0, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } },
|
||||
startTime: Date.now(),
|
||||
startOptions: { ...options, projectDir: resolvedDir },
|
||||
};
|
||||
|
||||
// Insert into map early (keyed by dir) so concurrent starts are rejected
|
||||
|
|
@ -231,6 +231,18 @@ export class SessionManager extends EventEmitter {
|
|||
this.logger.info('session cancelled', { sessionId, projectDir: session.projectDir });
|
||||
}
|
||||
|
||||
/**
|
||||
* Restart a managed RPC child and resume the same persisted session when possible.
|
||||
*
|
||||
* Purpose: make daemon-managed auto sessions pick up changed runtime/source
|
||||
* files at process boundaries instead of trying unsafe in-process hot reload.
|
||||
*/
|
||||
async reloadSession(sessionId: string, reason = 'runtime epoch changed'): Promise<void> {
|
||||
const session = this.getSession(sessionId);
|
||||
if (!session) throw new Error(`Session not found: ${sessionId}`);
|
||||
await this.restartSession(session, reason);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a HeadlessJsonResult-shaped object from accumulated session state.
|
||||
*/
|
||||
|
|
@ -245,9 +257,11 @@ export class SessionManager extends EventEmitter {
|
|||
projectDir: session.projectDir,
|
||||
projectName: session.projectName,
|
||||
status: session.status,
|
||||
reloadState: session.reloadState ?? 'running',
|
||||
durationMs,
|
||||
cost: session.cost,
|
||||
recentEvents: session.events.slice(-10),
|
||||
lastHeartbeat: session.lastHeartbeat ?? null,
|
||||
pendingBlocker: session.pendingBlocker
|
||||
? { id: session.pendingBlocker.id, method: session.pendingBlocker.method, message: session.pendingBlocker.message }
|
||||
: null,
|
||||
|
|
@ -311,6 +325,10 @@ export class SessionManager extends EventEmitter {
|
|||
this.logger.debug('session event', { sessionId: session.sessionId, type: (event as Record<string, unknown>).type as string });
|
||||
this.emit('session:event', { sessionId: session.sessionId, projectDir: session.projectDir, event });
|
||||
|
||||
if ((event as Record<string, unknown>).type === 'runtime_heartbeat') {
|
||||
this.handleRuntimeHeartbeat(session, event as unknown as RuntimeHeartbeat);
|
||||
}
|
||||
|
||||
// Cost tracking (K004 — cumulative-max)
|
||||
if ((event as Record<string, unknown>).type === 'cost_update') {
|
||||
const costEvent = event as unknown as RpcCostUpdateEvent;
|
||||
|
|
@ -371,6 +389,135 @@ export class SessionManager extends EventEmitter {
|
|||
});
|
||||
}
|
||||
}
|
||||
|
||||
private handleRuntimeHeartbeat(session: ManagedSession, heartbeat: RuntimeHeartbeat): void {
|
||||
session.lastHeartbeat = heartbeat;
|
||||
if (heartbeat.runtimeEpoch === heartbeat.sourceEpoch) return;
|
||||
if (session.reloadState === 'reloading') return;
|
||||
if (session.status !== 'running' && session.status !== 'blocked') return;
|
||||
|
||||
this.logger.info('runtime epoch mismatch detected', {
|
||||
sessionId: session.sessionId,
|
||||
projectDir: session.projectDir,
|
||||
unitType: heartbeat.unitType,
|
||||
unitId: heartbeat.unitId,
|
||||
runtimeEpoch: heartbeat.runtimeEpoch,
|
||||
sourceEpoch: heartbeat.sourceEpoch,
|
||||
});
|
||||
|
||||
void this.restartSession(session, 'runtime epoch changed').catch((err) => {
|
||||
session.reloadState = 'reload_failed';
|
||||
session.status = 'error';
|
||||
session.error = err instanceof Error ? err.message : String(err);
|
||||
this.logger.error('session reload failed', {
|
||||
sessionId: session.sessionId,
|
||||
projectDir: session.projectDir,
|
||||
error: session.error,
|
||||
});
|
||||
this.emit('session:error', {
|
||||
sessionId: session.sessionId,
|
||||
projectDir: session.projectDir,
|
||||
projectName: session.projectName,
|
||||
error: session.error,
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
private async restartSession(session: ManagedSession, reason: string): Promise<void> {
|
||||
if (session.reloadState === 'reloading') return;
|
||||
session.reloadState = 'reloading';
|
||||
|
||||
let sessionFile = session.lastHeartbeat?.sessionFile;
|
||||
try {
|
||||
const state = await session.client.getState();
|
||||
sessionFile = state.sessionFile ?? sessionFile;
|
||||
} catch {
|
||||
// Best effort: a wedged child may not answer state requests.
|
||||
}
|
||||
|
||||
try {
|
||||
await session.client.prompt('/sf pause');
|
||||
await waitFor(
|
||||
() => session.status === 'blocked' || session.status === 'completed' || session.status === 'cancelled',
|
||||
RELOAD_PAUSE_TIMEOUT_MS,
|
||||
);
|
||||
} catch {
|
||||
// Timeout or prompt failure: stop() escalates SIGTERM to SIGKILL.
|
||||
}
|
||||
|
||||
session.unsubscribe?.();
|
||||
try {
|
||||
await session.client.stop();
|
||||
} catch {
|
||||
// stop() is best-effort; subsequent start creates a new child.
|
||||
}
|
||||
|
||||
const opts = session.startOptions ?? { projectDir: session.projectDir };
|
||||
const cliPath = opts.cliPath ?? SessionManager.resolveCLIPath();
|
||||
const args: string[] = ['--mode', 'rpc'];
|
||||
if (opts.model) args.push('--model', opts.model);
|
||||
if (opts.bare) args.push('--bare');
|
||||
|
||||
const client = this.createRpcClient(cliPath, session.projectDir, args);
|
||||
|
||||
await Promise.race([
|
||||
client.start(),
|
||||
timeout(INIT_TIMEOUT_MS, `RpcClient.start() timed out after ${INIT_TIMEOUT_MS}ms`),
|
||||
]);
|
||||
|
||||
const initResult: RpcInitResult = await Promise.race([
|
||||
client.init(),
|
||||
timeout(INIT_TIMEOUT_MS, `RpcClient.init() timed out after ${INIT_TIMEOUT_MS}ms`),
|
||||
]) as RpcInitResult;
|
||||
|
||||
session.client = client;
|
||||
session.sessionId = initResult.sessionId;
|
||||
session.status = 'running';
|
||||
session.pendingBlocker = null;
|
||||
session.reloadState = 'restarted';
|
||||
session.error = undefined;
|
||||
session.startOptions = { ...opts, projectDir: session.projectDir };
|
||||
session.unsubscribe = client.onEvent((event: SdkAgentEvent) => {
|
||||
this.handleEvent(session, event);
|
||||
});
|
||||
|
||||
if (sessionFile) {
|
||||
try {
|
||||
await client.switchSession(sessionFile);
|
||||
} catch (err) {
|
||||
this.logger.warn('session reload could not switch to previous session file', {
|
||||
sessionId: session.sessionId,
|
||||
projectDir: session.projectDir,
|
||||
sessionFile,
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
await client.prompt(opts.command ?? '/sf autonomous');
|
||||
session.reloadState = 'running';
|
||||
this.logger.info('session reloaded', {
|
||||
sessionId: session.sessionId,
|
||||
projectDir: session.projectDir,
|
||||
reason,
|
||||
resumedSessionFile: sessionFile,
|
||||
});
|
||||
this.emit('session:restarted', {
|
||||
sessionId: session.sessionId,
|
||||
projectDir: session.projectDir,
|
||||
projectName: session.projectName,
|
||||
reason,
|
||||
sessionFile,
|
||||
});
|
||||
}
|
||||
|
||||
protected createRpcClient(cliPath: string, cwd: string, args: string[]): RpcClient {
|
||||
return new RpcClient({
|
||||
cliPath,
|
||||
cwd,
|
||||
args,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
|
@ -383,6 +530,24 @@ function timeout(ms: number, message: string): Promise<never> {
|
|||
});
|
||||
}
|
||||
|
||||
function waitFor(predicate: () => boolean, timeoutMs: number): Promise<void> {
|
||||
if (predicate()) return Promise.resolve();
|
||||
return new Promise((resolve, reject) => {
|
||||
const startedAt = Date.now();
|
||||
const interval = setInterval(() => {
|
||||
if (predicate()) {
|
||||
clearInterval(interval);
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
if (Date.now() - startedAt >= timeoutMs) {
|
||||
clearInterval(interval);
|
||||
reject(new Error(`Timed out after ${timeoutMs}ms`));
|
||||
}
|
||||
}, 100);
|
||||
});
|
||||
}
|
||||
|
||||
function extractBlocker(event: SdkAgentEvent): PendingBlocker {
|
||||
const uiEvent = event as unknown as RpcExtensionUIRequest;
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -57,6 +57,17 @@ export interface DaemonConfig {
|
|||
// ---------------------------------------------------------------------------
|
||||
|
||||
export type SessionStatus = 'starting' | 'running' | 'blocked' | 'completed' | 'error' | 'cancelled';
|
||||
export type ReloadState = 'running' | 'reloading' | 'restarted' | 'reload_failed';
|
||||
|
||||
export interface RuntimeHeartbeat {
|
||||
sessionId: string;
|
||||
sessionFile?: string;
|
||||
unitType?: string;
|
||||
unitId?: string;
|
||||
runtimeEpoch: number;
|
||||
sourceEpoch: number;
|
||||
emittedAt: number;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Managed Session
|
||||
|
|
@ -78,6 +89,9 @@ export interface ManagedSession {
|
|||
/** Current lifecycle status */
|
||||
status: SessionStatus;
|
||||
|
||||
/** Daemon-managed runtime reload state */
|
||||
reloadState?: ReloadState;
|
||||
|
||||
/** The RpcClient instance managing the agent process */
|
||||
client: RpcClient;
|
||||
|
||||
|
|
@ -96,6 +110,12 @@ export interface ManagedSession {
|
|||
/** Error message if status is 'error' */
|
||||
error?: string;
|
||||
|
||||
/** Latest runtime heartbeat received from the RPC child */
|
||||
lastHeartbeat?: RuntimeHeartbeat;
|
||||
|
||||
/** Original session start options used for daemon-managed restarts */
|
||||
startOptions?: StartSessionOptions;
|
||||
|
||||
/** Cleanup function to unsubscribe from events */
|
||||
unsubscribe?: () => void;
|
||||
}
|
||||
|
|
|
|||
70
packages/pi-ai/src/providers/google-gemini-cli.test.ts
Normal file
70
packages/pi-ai/src/providers/google-gemini-cli.test.ts
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { describe, test, vi } from "vitest";
|
||||
import type { Context, Model } from "../types.js";
|
||||
|
||||
const geminiCliCore = vi.hoisted(() => ({
|
||||
retryError: undefined as Error | undefined,
|
||||
retryOptions: undefined as Record<string, unknown> | undefined,
|
||||
}));
|
||||
|
||||
vi.mock("@google/gemini-cli-core", () => ({
|
||||
AuthType: { LOGIN_WITH_GOOGLE: "LOGIN_WITH_GOOGLE" },
|
||||
CodeAssistServer: class {
|
||||
async generateContentStream(): Promise<AsyncGenerator<unknown>> {
|
||||
return (async function* emptyStream() {})();
|
||||
}
|
||||
},
|
||||
getOauthClient: vi.fn(async () => ({})),
|
||||
makeFakeConfig: vi.fn(() => ({})),
|
||||
retryWithBackoff: vi.fn(async (_fn: unknown, options: Record<string, unknown>) => {
|
||||
geminiCliCore.retryOptions = options;
|
||||
throw geminiCliCore.retryError ?? new Error("quota exhausted");
|
||||
}),
|
||||
setupUser: vi.fn(async () => ({ projectId: "test-project" })),
|
||||
}));
|
||||
|
||||
import { streamGoogleGeminiCli } from "./google-gemini-cli.js";
|
||||
|
||||
function makeModel(): Model<"google-gemini-cli"> {
|
||||
return {
|
||||
id: "gemini-3-flash-preview",
|
||||
name: "Gemini 3 Flash Preview",
|
||||
api: "google-gemini-cli",
|
||||
provider: "google-gemini-cli",
|
||||
baseUrl: "",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 1_000_000,
|
||||
maxTokens: 8192,
|
||||
};
|
||||
}
|
||||
|
||||
function makeContext(): Context {
|
||||
return {
|
||||
messages: [{ role: "user", content: "hello", timestamp: 0 }],
|
||||
};
|
||||
}
|
||||
|
||||
describe("google-gemini-cli provider retry ownership", () => {
|
||||
test("google_gemini_cli_when_quota_resets_soon_returns_error_to_caller_without_cli_retry_loop", async () => {
|
||||
geminiCliCore.retryOptions = undefined;
|
||||
geminiCliCore.retryError = Object.assign(
|
||||
new Error(
|
||||
"You have exhausted your capacity on this model. Your quota will reset after 54s.",
|
||||
),
|
||||
{ retryDelayMs: 54_000 },
|
||||
);
|
||||
|
||||
const stream = streamGoogleGeminiCli(makeModel(), makeContext());
|
||||
const result = await stream.result();
|
||||
|
||||
const retryOptions = geminiCliCore.retryOptions as
|
||||
| { maxAttempts?: unknown }
|
||||
| undefined;
|
||||
assert.equal(retryOptions?.maxAttempts, 1);
|
||||
assert.equal(result.stopReason, "error");
|
||||
assert.match(result.errorMessage ?? "", /exhausted your capacity/i);
|
||||
assert.equal(result.retryAfterMs, 54_000);
|
||||
});
|
||||
});
|
||||
|
|
@ -5,7 +5,8 @@
|
|||
* @google/gemini-cli-core — the same library the real `gemini` CLI uses.
|
||||
* cli-core reads ~/.gemini/oauth_creds.json itself, refreshes tokens,
|
||||
* discovers the project (free-tier or whatever's onboarded server-side)
|
||||
* via setupUser(), and handles all the User-Agent / retry / 429 details.
|
||||
* via setupUser(), and handles all the User-Agent / quota-classification details.
|
||||
* Request retry/fallback stays in the caller so SF can move to the next model.
|
||||
*/
|
||||
|
||||
import {
|
||||
|
|
@ -227,6 +228,9 @@ export const streamGoogleGeminiCli: StreamFunction<
|
|||
() => server.generateContentStream(req as any, promptId, "USER" as any),
|
||||
{
|
||||
authType: AuthType.LOGIN_WITH_GOOGLE,
|
||||
// SF owns cross-model fallback. Let cli-core classify quota errors,
|
||||
// but do not let it hold the turn through its 10-attempt retry loop.
|
||||
maxAttempts: 1,
|
||||
signal: options?.signal,
|
||||
},
|
||||
);
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@
|
|||
*/
|
||||
|
||||
import * as crypto from "node:crypto";
|
||||
import { existsSync, readFileSync, readdirSync, statSync } from "node:fs";
|
||||
import { dirname, join, resolve } from "node:path";
|
||||
import type { AgentSession } from "../../core/agent-session.js";
|
||||
import { killTrackedDetachedChildren } from "../../utils/shell.js";
|
||||
import type {
|
||||
|
|
@ -34,6 +36,110 @@ import type {
|
|||
RpcSlashCommand,
|
||||
} from "./rpc-types.js";
|
||||
|
||||
const RUNTIME_HEARTBEAT_INTERVAL_MS = Number(
|
||||
process.env.SF_RUNTIME_HEARTBEAT_INTERVAL_MS ?? 10_000,
|
||||
);
|
||||
|
||||
function findRuntimeSourceRoot(): string {
|
||||
const explicit =
|
||||
process.env.SF_RUNTIME_SOURCE_ROOT ?? process.env.SF_SOURCE_ROOT;
|
||||
if (explicit) return resolve(explicit);
|
||||
|
||||
let dir = resolve(dirname(process.argv[1] ?? process.cwd()));
|
||||
while (true) {
|
||||
if (existsSync(join(dir, "package.json")) && existsSync(join(dir, "src"))) {
|
||||
return dir;
|
||||
}
|
||||
const parent = dirname(dir);
|
||||
if (parent === dir) return process.cwd();
|
||||
dir = parent;
|
||||
}
|
||||
}
|
||||
|
||||
function newestSourceMtimeMs(root: string): number {
|
||||
let newest = 0;
|
||||
const skip = new Set([
|
||||
".git",
|
||||
".sf",
|
||||
"dist",
|
||||
"node_modules",
|
||||
"target",
|
||||
".next",
|
||||
"coverage",
|
||||
]);
|
||||
const stack = [root];
|
||||
while (stack.length > 0) {
|
||||
const dir = stack.pop()!;
|
||||
let entries: import("node:fs").Dirent[];
|
||||
try {
|
||||
entries = readdirSync(dir, { withFileTypes: true });
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
for (const entry of entries) {
|
||||
if (skip.has(entry.name)) continue;
|
||||
const full = join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
stack.push(full);
|
||||
continue;
|
||||
}
|
||||
if (!entry.isFile() || !/\.(?:ts|tsx|mts|cts)$/.test(entry.name)) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
newest = Math.max(newest, statSync(full).mtimeMs);
|
||||
} catch {
|
||||
// Ignore files that disappear during a scan.
|
||||
}
|
||||
}
|
||||
}
|
||||
return newest;
|
||||
}
|
||||
|
||||
interface RuntimeUnitState {
|
||||
unitType?: string;
|
||||
unitId?: string;
|
||||
sessionFile?: string;
|
||||
}
|
||||
|
||||
function effectiveAutoLockFile(): string {
|
||||
const milestoneLock = process.env.SF_PARALLEL_WORKER
|
||||
? process.env.SF_MILESTONE_LOCK
|
||||
: undefined;
|
||||
return milestoneLock ? `auto-${milestoneLock}.lock` : "auto.lock";
|
||||
}
|
||||
|
||||
function readRuntimeUnitState(): RuntimeUnitState {
|
||||
const roots = [process.env.SF_PROJECT_ROOT, process.cwd()].filter(
|
||||
(root): root is string => Boolean(root),
|
||||
);
|
||||
const seen = new Set<string>();
|
||||
for (const root of roots) {
|
||||
const resolvedRoot = resolve(root);
|
||||
if (seen.has(resolvedRoot)) continue;
|
||||
seen.add(resolvedRoot);
|
||||
const lockPath = join(resolvedRoot, ".sf", effectiveAutoLockFile());
|
||||
try {
|
||||
if (!existsSync(lockPath)) continue;
|
||||
const data = JSON.parse(readFileSync(lockPath, "utf-8")) as Record<
|
||||
string,
|
||||
unknown
|
||||
>;
|
||||
return {
|
||||
unitType:
|
||||
typeof data.unitType === "string" ? data.unitType : undefined,
|
||||
unitId: typeof data.unitId === "string" ? data.unitId : undefined,
|
||||
sessionFile:
|
||||
typeof data.sessionFile === "string" ? data.sessionFile : undefined,
|
||||
};
|
||||
} catch {
|
||||
// Heartbeats should never fail because lock metadata is temporarily absent
|
||||
// or being rewritten.
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
// Re-export types for consumers
|
||||
export type {
|
||||
RpcCommand,
|
||||
|
|
@ -519,6 +625,32 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
|
|||
}
|
||||
});
|
||||
|
||||
const runtimeSourceRoot = findRuntimeSourceRoot();
|
||||
const runtimeEpoch = newestSourceMtimeMs(runtimeSourceRoot);
|
||||
const emitRuntimeHeartbeat = () => {
|
||||
const runtimeUnit = readRuntimeUnitState();
|
||||
const heartbeat = {
|
||||
type: "runtime_heartbeat" as const,
|
||||
sessionId: session.sessionId,
|
||||
sessionFile: runtimeUnit.sessionFile ?? session.sessionFile,
|
||||
unitType: runtimeUnit.unitType,
|
||||
unitId: runtimeUnit.unitId,
|
||||
runtimeEpoch,
|
||||
sourceEpoch: newestSourceMtimeMs(runtimeSourceRoot),
|
||||
emittedAt: Date.now(),
|
||||
};
|
||||
if (!eventFilter || eventFilter.has("runtime_heartbeat")) {
|
||||
output(heartbeat);
|
||||
}
|
||||
};
|
||||
const runtimeHeartbeatTimer =
|
||||
RUNTIME_HEARTBEAT_INTERVAL_MS > 0
|
||||
? setInterval(emitRuntimeHeartbeat, RUNTIME_HEARTBEAT_INTERVAL_MS)
|
||||
: undefined;
|
||||
if (runtimeHeartbeatTimer) {
|
||||
signalCleanupHandlers.push(() => clearInterval(runtimeHeartbeatTimer));
|
||||
}
|
||||
|
||||
// Handle a single command
|
||||
const handleCommand = async (command: RpcCommand): Promise<RpcResponse> => {
|
||||
const id = command.id;
|
||||
|
|
@ -901,7 +1033,7 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
|
|||
protocolVersion: 2,
|
||||
sessionId: session.sessionId,
|
||||
capabilities: {
|
||||
events: ["execution_complete", "cost_update"],
|
||||
events: ["execution_complete", "cost_update", "runtime_heartbeat"],
|
||||
commands: ["init", "shutdown", "subscribe"],
|
||||
},
|
||||
};
|
||||
|
|
|
|||
|
|
@ -148,7 +148,7 @@ describe("v2 type shapes", () => {
|
|||
protocolVersion: 2,
|
||||
sessionId: "test-session-123",
|
||||
capabilities: {
|
||||
events: ["execution_complete", "cost_update"],
|
||||
events: ["execution_complete", "cost_update", "runtime_heartbeat"],
|
||||
commands: ["init", "shutdown", "subscribe"],
|
||||
},
|
||||
};
|
||||
|
|
@ -158,6 +158,7 @@ describe("v2 type shapes", () => {
|
|||
assert.ok(Array.isArray(initResult.capabilities.commands));
|
||||
assert.ok(initResult.capabilities.events.includes("execution_complete"));
|
||||
assert.ok(initResult.capabilities.events.includes("cost_update"));
|
||||
assert.ok(initResult.capabilities.events.includes("runtime_heartbeat"));
|
||||
assert.ok(initResult.capabilities.commands.includes("init"));
|
||||
assert.ok(initResult.capabilities.commands.includes("shutdown"));
|
||||
assert.ok(initResult.capabilities.commands.includes("subscribe"));
|
||||
|
|
@ -231,6 +232,16 @@ describe("v2 type shapes", () => {
|
|||
cumulativeCost: 0.03,
|
||||
tokens: { input: 100, output: 50, cacheRead: 10, cacheWrite: 5 },
|
||||
},
|
||||
{
|
||||
type: "runtime_heartbeat",
|
||||
sessionId: "s1",
|
||||
sessionFile: "/tmp/s1.jsonl",
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
runtimeEpoch: 100,
|
||||
sourceEpoch: 101,
|
||||
emittedAt: 123,
|
||||
},
|
||||
];
|
||||
|
||||
for (const event of events) {
|
||||
|
|
@ -242,6 +253,9 @@ describe("v2 type shapes", () => {
|
|||
// TypeScript narrows to RpcCostUpdateEvent
|
||||
assert.ok("turnCost" in event);
|
||||
assert.ok("tokens" in event);
|
||||
} else if (event.type === "runtime_heartbeat") {
|
||||
assert.ok("runtimeEpoch" in event);
|
||||
assert.ok("sourceEpoch" in event);
|
||||
} else {
|
||||
assert.fail(`Unexpected event type: ${(event as any).type}`);
|
||||
}
|
||||
|
|
@ -569,7 +583,7 @@ describe("Client ↔ Mock server protocol exchange", () => {
|
|||
protocolVersion: 2,
|
||||
sessionId: "sess-abc",
|
||||
capabilities: {
|
||||
events: ["execution_complete", "cost_update"],
|
||||
events: ["execution_complete", "cost_update", "runtime_heartbeat"],
|
||||
commands: ["init", "shutdown", "subscribe"],
|
||||
},
|
||||
};
|
||||
|
|
|
|||
|
|
@ -273,8 +273,23 @@ export interface RpcCostUpdateEvent {
|
|||
};
|
||||
}
|
||||
|
||||
/** Runtime heartbeat emitted by long-lived RPC children for daemon reload supervision. */
|
||||
export interface RpcRuntimeHeartbeatEvent {
|
||||
type: "runtime_heartbeat";
|
||||
sessionId: string;
|
||||
sessionFile?: string;
|
||||
unitType?: string;
|
||||
unitId?: string;
|
||||
runtimeEpoch: number;
|
||||
sourceEpoch: number;
|
||||
emittedAt: number;
|
||||
}
|
||||
|
||||
/** Discriminated union of all v2-only event types */
|
||||
export type RpcV2Event = RpcExecutionCompleteEvent | RpcCostUpdateEvent;
|
||||
export type RpcV2Event =
|
||||
| RpcExecutionCompleteEvent
|
||||
| RpcCostUpdateEvent
|
||||
| RpcRuntimeHeartbeatEvent;
|
||||
|
||||
// ============================================================================
|
||||
// Extension UI Events (stdout)
|
||||
|
|
|
|||
|
|
@ -216,7 +216,7 @@ describe("type shapes", () => {
|
|||
assert.equal(v2, 2);
|
||||
});
|
||||
|
||||
it("RpcV2Event discriminated union covers both event types", () => {
|
||||
it("RpcV2Event discriminated union covers protocol event types", () => {
|
||||
const events: RpcV2Event[] = [
|
||||
{
|
||||
type: "execution_complete",
|
||||
|
|
@ -241,10 +241,19 @@ describe("type shapes", () => {
|
|||
cumulativeCost: 0.001,
|
||||
tokens: { input: 100, output: 50, cacheRead: 0, cacheWrite: 0 },
|
||||
},
|
||||
{
|
||||
type: "runtime_heartbeat",
|
||||
sessionId: "s1",
|
||||
sessionFile: "/tmp/s1.jsonl",
|
||||
runtimeEpoch: 100,
|
||||
sourceEpoch: 100,
|
||||
emittedAt: 123,
|
||||
},
|
||||
];
|
||||
assert.equal(events.length, 2);
|
||||
assert.equal(events.length, 3);
|
||||
assert.equal(events[0].type, "execution_complete");
|
||||
assert.equal(events[1].type, "cost_update");
|
||||
assert.equal(events[2].type, "runtime_heartbeat");
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -336,8 +336,23 @@ export interface RpcCostUpdateEvent {
|
|||
};
|
||||
}
|
||||
|
||||
/** Runtime heartbeat emitted by long-lived RPC children for daemon reload supervision. */
|
||||
export interface RpcRuntimeHeartbeatEvent {
|
||||
type: "runtime_heartbeat";
|
||||
sessionId: string;
|
||||
sessionFile?: string;
|
||||
unitType?: string;
|
||||
unitId?: string;
|
||||
runtimeEpoch: number;
|
||||
sourceEpoch: number;
|
||||
emittedAt: number;
|
||||
}
|
||||
|
||||
/** Discriminated union of all v2-only event types */
|
||||
export type RpcV2Event = RpcExecutionCompleteEvent | RpcCostUpdateEvent;
|
||||
export type RpcV2Event =
|
||||
| RpcExecutionCompleteEvent
|
||||
| RpcCostUpdateEvent
|
||||
| RpcRuntimeHeartbeatEvent;
|
||||
|
||||
// ============================================================================
|
||||
// Extension UI Events (stdout)
|
||||
|
|
|
|||
|
|
@ -15,8 +15,9 @@
|
|||
* bypassing the extension loader's jiti setup (#1137).
|
||||
*/
|
||||
|
||||
import { existsSync, readdirSync, readFileSync } from "node:fs";
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { dirname, join } from "node:path";
|
||||
import { createJiti } from "@mariozechner/jiti";
|
||||
import { resolveBundledSourceResource } from "./bundled-resource-path.js";
|
||||
import type { SFState } from "./resources/extensions/sf/types.js";
|
||||
|
|
@ -33,7 +34,6 @@ const agentExtensionsDir = join(
|
|||
"extensions",
|
||||
"sf",
|
||||
);
|
||||
const { existsSync } = await import("node:fs");
|
||||
const useAgentDir = existsSync(join(agentExtensionsDir, "state.js"));
|
||||
const sfExtensionPath = (moduleName: string) =>
|
||||
useAgentDir
|
||||
|
|
@ -46,10 +46,7 @@ const sfExtensionPath = (moduleName: string) =>
|
|||
);
|
||||
|
||||
async function loadExtensionModules() {
|
||||
const stateModule = (await jiti.import(
|
||||
sfExtensionPath("state"),
|
||||
{},
|
||||
)) as any;
|
||||
const stateModule = (await jiti.import(sfExtensionPath("state"), {})) as any;
|
||||
const dispatchModule = (await jiti.import(
|
||||
sfExtensionPath("auto-dispatch"),
|
||||
{},
|
||||
|
|
@ -86,6 +83,43 @@ async function loadExtensionModules() {
|
|||
|
||||
// ─── Types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
type RuntimeDispatchDecisionSummary = {
|
||||
action: "dispatch" | "retry" | "notify" | "block" | "skip";
|
||||
reasonCode:
|
||||
| "no-runtime-record"
|
||||
| "queued"
|
||||
| "retry-budget-available"
|
||||
| "terminal-ready-to-notify"
|
||||
| "retry-budget-exhausted"
|
||||
| "synthetic-reset-required"
|
||||
| "already-notified"
|
||||
| "active-or-claimed"
|
||||
| "notified"
|
||||
| "terminal-nonretryable";
|
||||
retryCount: number;
|
||||
maxRetries: number;
|
||||
retryBudgetRemaining: number;
|
||||
};
|
||||
|
||||
type RuntimeUnitSummary = {
|
||||
unitType: string;
|
||||
unitId: string;
|
||||
phase: string;
|
||||
status: string;
|
||||
startedAt: number | null;
|
||||
updatedAt: number | null;
|
||||
retryCount: number;
|
||||
maxRetries: number;
|
||||
retryBudgetRemaining: number;
|
||||
lastHeartbeatAt: number | null;
|
||||
lastProgressAt: number | null;
|
||||
lastOutputAt: number | null;
|
||||
outputPath: string | null;
|
||||
watchdogReason: string | null;
|
||||
notifiedAt: number | null;
|
||||
dispatchDecision: RuntimeDispatchDecisionSummary;
|
||||
};
|
||||
|
||||
export interface QuerySnapshot {
|
||||
schemaVersion: 1;
|
||||
state: SFState;
|
||||
|
|
@ -105,6 +139,9 @@ export interface QuerySnapshot {
|
|||
}>;
|
||||
total: number;
|
||||
};
|
||||
runtime: {
|
||||
units: RuntimeUnitSummary[];
|
||||
};
|
||||
}
|
||||
|
||||
export interface QueryResult {
|
||||
|
|
@ -114,6 +151,192 @@ export interface QueryResult {
|
|||
|
||||
// ─── Implementation ─────────────────────────────────────────────────────────
|
||||
|
||||
const QUERY_TERMINAL_STATUSES = new Set([
|
||||
"completed",
|
||||
"failed",
|
||||
"blocked",
|
||||
"cancelled",
|
||||
"stale",
|
||||
"runaway-recovered",
|
||||
]);
|
||||
const QUERY_RETRYABLE_TERMINAL_STATUSES = new Set([
|
||||
"failed",
|
||||
"stale",
|
||||
"runaway-recovered",
|
||||
]);
|
||||
const DEFAULT_QUERY_MAX_RETRIES = 1;
|
||||
|
||||
function resolveSfRootForQuery(basePath: string): string {
|
||||
let current = basePath;
|
||||
while (true) {
|
||||
const candidate = join(current, ".sf");
|
||||
if (existsSync(candidate)) return candidate;
|
||||
const parent = dirname(current);
|
||||
if (parent === current) return join(basePath, ".sf");
|
||||
current = parent;
|
||||
}
|
||||
}
|
||||
|
||||
function stringField(value: unknown, fallback = ""): string {
|
||||
return typeof value === "string" ? value : fallback;
|
||||
}
|
||||
|
||||
function numberField(value: unknown): number | null {
|
||||
return typeof value === "number" && Number.isFinite(value) ? value : null;
|
||||
}
|
||||
|
||||
function inferQueryStatus(
|
||||
phase: string,
|
||||
record: Record<string, unknown>,
|
||||
): string {
|
||||
switch (phase) {
|
||||
case "queued":
|
||||
case "claimed":
|
||||
case "running":
|
||||
case "progress":
|
||||
case "completed":
|
||||
case "failed":
|
||||
case "blocked":
|
||||
case "cancelled":
|
||||
case "stale":
|
||||
case "runaway-recovered":
|
||||
case "notified":
|
||||
return phase;
|
||||
case "dispatched":
|
||||
return "running";
|
||||
case "wrapup-warning-sent":
|
||||
case "runaway-warning-sent":
|
||||
case "runaway-final-warning-sent":
|
||||
case "recovered":
|
||||
return "progress";
|
||||
case "timeout":
|
||||
return "stale";
|
||||
case "finalized":
|
||||
return "completed";
|
||||
case "paused":
|
||||
return record.runawayGuardPause ? "runaway-recovered" : "blocked";
|
||||
case "skipped":
|
||||
return "blocked";
|
||||
default:
|
||||
return "running";
|
||||
}
|
||||
}
|
||||
|
||||
function queryRuntimeDecision(input: {
|
||||
unitType: string;
|
||||
unitId: string;
|
||||
status: string;
|
||||
retryCount: number;
|
||||
maxRetries: number;
|
||||
notifiedAt: number | null;
|
||||
}): RuntimeDispatchDecisionSummary {
|
||||
const retryBudgetRemaining = Math.max(0, input.maxRetries - input.retryCount);
|
||||
const common = {
|
||||
retryCount: input.retryCount,
|
||||
maxRetries: input.maxRetries,
|
||||
retryBudgetRemaining,
|
||||
};
|
||||
if (input.notifiedAt !== null) {
|
||||
return { action: "skip", reasonCode: "already-notified", ...common };
|
||||
}
|
||||
if (input.status === "notified") {
|
||||
return { action: "skip", reasonCode: "notified", ...common };
|
||||
}
|
||||
if (input.status === "queued") {
|
||||
return { action: "dispatch", reasonCode: "queued", ...common };
|
||||
}
|
||||
if (!QUERY_TERMINAL_STATUSES.has(input.status)) {
|
||||
return { action: "skip", reasonCode: "active-or-claimed", ...common };
|
||||
}
|
||||
const synthetic =
|
||||
input.unitType === "synthetic" ||
|
||||
input.unitId.includes("parallel-research");
|
||||
if (synthetic && input.status !== "completed") {
|
||||
return {
|
||||
action: "block",
|
||||
reasonCode: "synthetic-reset-required",
|
||||
...common,
|
||||
};
|
||||
}
|
||||
if (QUERY_RETRYABLE_TERMINAL_STATUSES.has(input.status)) {
|
||||
return retryBudgetRemaining > 0
|
||||
? { action: "retry", reasonCode: "retry-budget-available", ...common }
|
||||
: { action: "block", reasonCode: "retry-budget-exhausted", ...common };
|
||||
}
|
||||
if (
|
||||
input.status === "completed" ||
|
||||
input.status === "blocked" ||
|
||||
input.status === "cancelled"
|
||||
) {
|
||||
return {
|
||||
action: "notify",
|
||||
reasonCode: "terminal-ready-to-notify",
|
||||
...common,
|
||||
};
|
||||
}
|
||||
return { action: "skip", reasonCode: "terminal-nonretryable", ...common };
|
||||
}
|
||||
|
||||
function readRuntimeUnitSummaries(basePath: string): RuntimeUnitSummary[] {
|
||||
const unitsDir = join(resolveSfRootForQuery(basePath), "runtime", "units");
|
||||
if (!existsSync(unitsDir)) return [];
|
||||
const results: RuntimeUnitSummary[] = [];
|
||||
for (const file of readdirSync(unitsDir)) {
|
||||
if (!file.endsWith(".json")) continue;
|
||||
try {
|
||||
const record = JSON.parse(
|
||||
readFileSync(join(unitsDir, file), "utf-8"),
|
||||
) as Record<string, unknown>;
|
||||
const unitType = stringField(record.unitType);
|
||||
const unitId = stringField(record.unitId);
|
||||
if (!unitType || !unitId) continue;
|
||||
const phase = stringField(record.phase, "dispatched");
|
||||
const status = stringField(
|
||||
record.status,
|
||||
inferQueryStatus(phase, record),
|
||||
);
|
||||
const recoveryAttempts = numberField(record.recoveryAttempts) ?? 0;
|
||||
const retryCount = numberField(record.retryCount) ?? recoveryAttempts;
|
||||
const maxRetries =
|
||||
numberField(record.maxRetries) ?? DEFAULT_QUERY_MAX_RETRIES;
|
||||
const notifiedAt = numberField(record.notifiedAt);
|
||||
const dispatchDecision = queryRuntimeDecision({
|
||||
unitType,
|
||||
unitId,
|
||||
status,
|
||||
retryCount,
|
||||
maxRetries,
|
||||
notifiedAt,
|
||||
});
|
||||
results.push({
|
||||
unitType,
|
||||
unitId,
|
||||
phase,
|
||||
status,
|
||||
startedAt: numberField(record.startedAt),
|
||||
updatedAt: numberField(record.updatedAt),
|
||||
retryCount,
|
||||
maxRetries,
|
||||
retryBudgetRemaining: dispatchDecision.retryBudgetRemaining,
|
||||
lastHeartbeatAt: numberField(record.lastHeartbeatAt),
|
||||
lastProgressAt: numberField(record.lastProgressAt),
|
||||
lastOutputAt: numberField(record.lastOutputAt),
|
||||
outputPath:
|
||||
typeof record.outputPath === "string" ? record.outputPath : null,
|
||||
watchdogReason:
|
||||
typeof record.watchdogReason === "string"
|
||||
? record.watchdogReason
|
||||
: null,
|
||||
notifiedAt,
|
||||
dispatchDecision,
|
||||
});
|
||||
} catch {
|
||||
// Runtime query must stay best-effort; malformed unit files are ignored.
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
export async function buildQuerySnapshot(
|
||||
basePath: string,
|
||||
): Promise<QuerySnapshot> {
|
||||
|
|
@ -169,6 +392,7 @@ export async function buildQuerySnapshot(
|
|||
state,
|
||||
next,
|
||||
cost: { workers, total: workers.reduce((sum, w) => sum + w.cost, 0) },
|
||||
runtime: { units: readRuntimeUnitSummaries(basePath) },
|
||||
};
|
||||
|
||||
return snapshot;
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ import type {
|
|||
ExtensionCommandContext,
|
||||
ExtensionContext,
|
||||
} from "@singularity-forge/pi-coding-agent";
|
||||
import type { Api, Model } from "@singularity-forge/pi-ai";
|
||||
import { getManifestStatus } from "./files.js";
|
||||
import {
|
||||
assessInterruptedSession,
|
||||
|
|
@ -47,7 +48,11 @@ import { getRtkSessionSavings } from "../shared/rtk-session-stats.js";
|
|||
import { deactivateSF } from "../shared/sf-phase-state.js";
|
||||
import { clearActivityLogState } from "./activity-log.js";
|
||||
import { atomicWriteSync } from "./atomic-write.js";
|
||||
import { AutoSession, getAutoSession } from "./auto/session.js";
|
||||
import {
|
||||
AutoSession,
|
||||
getAutoSession,
|
||||
type ModelFailureRecord,
|
||||
} from "./auto/session.js";
|
||||
// import { startSliceParallel } from "./slice-parallel-orchestrator.js"; (decoy for legacy regex tests)
|
||||
import {
|
||||
getBudgetAlertLevel,
|
||||
|
|
@ -542,6 +547,64 @@ export function setCurrentDispatchedModelId(
|
|||
s.currentDispatchedModelId = model ? `${model.provider}/${model.id}` : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the concrete model tracked for the currently running unit.
|
||||
*
|
||||
* Purpose: keep fresh-session restoration and dashboard state aligned after
|
||||
* runtime provider recovery switches models mid-unit.
|
||||
*
|
||||
* Consumer: bootstrap/agent-end-recovery.ts after a configured fallback route
|
||||
* is successfully applied.
|
||||
*/
|
||||
export function setCurrentUnitModel(model: Model<Api> | null): void {
|
||||
s.currentUnitModel = model;
|
||||
setCurrentDispatchedModelId(model);
|
||||
}
|
||||
|
||||
/**
|
||||
* Record that a provider/model route failed for the current auto unit.
|
||||
*
|
||||
* Purpose: prevent retry loops on quota/rate-limit/server failures by making
|
||||
* subsequent recovery skip the failed route for this unit.
|
||||
*
|
||||
* Consumer: bootstrap/agent-end-recovery.ts before selecting the next configured
|
||||
* fallback route.
|
||||
*/
|
||||
export function recordCurrentModelFailure(input: {
|
||||
provider: string;
|
||||
modelId: string;
|
||||
reason: string;
|
||||
timestamp?: number;
|
||||
}): void {
|
||||
if (!s.currentUnit) return;
|
||||
s.modelFailures.push({
|
||||
unitType: s.currentUnit.type,
|
||||
unitId: s.currentUnit.id,
|
||||
provider: input.provider,
|
||||
modelId: input.modelId,
|
||||
reason: input.reason,
|
||||
timestamp: input.timestamp ?? Date.now(),
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Return model failures scoped to the currently running auto unit.
|
||||
*
|
||||
* Purpose: keep recovery decisions unit-local so a quota failure in one unit
|
||||
* does not permanently suppress a model in later work.
|
||||
*
|
||||
* Consumer: bootstrap/agent-end-recovery.ts when resolving the next configured
|
||||
* fallback route.
|
||||
*/
|
||||
export function getCurrentUnitModelFailures(): ModelFailureRecord[] {
|
||||
if (!s.currentUnit) return [];
|
||||
return s.modelFailures.filter(
|
||||
(failure) =>
|
||||
failure.unitType === s.currentUnit?.type &&
|
||||
failure.unitId === s.currentUnit?.id,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark the current research unit as terminal after saving its RESEARCH artifact.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -47,6 +47,15 @@ export interface StartModel {
|
|||
id: string;
|
||||
}
|
||||
|
||||
export interface ModelFailureRecord {
|
||||
unitType: string;
|
||||
unitId: string;
|
||||
provider: string;
|
||||
modelId: string;
|
||||
reason: string;
|
||||
timestamp: number;
|
||||
}
|
||||
|
||||
export interface PendingVerificationRetry {
|
||||
unitId: string;
|
||||
failureContext: string;
|
||||
|
|
@ -156,6 +165,8 @@ export class AutoSession {
|
|||
currentUnitModel: Model<Api> | null = null;
|
||||
/** Fully-qualified model ID (provider/id) set after selectAndApplyModel + hook overrides (#2899). */
|
||||
currentDispatchedModelId: string | null = null;
|
||||
/** Per-session, per-unit failed model routes skipped by runtime recovery. */
|
||||
readonly modelFailures: ModelFailureRecord[] = [];
|
||||
originalModelId: string | null = null;
|
||||
originalModelProvider: string | null = null;
|
||||
lastBudgetAlertLevel: BudgetAlertLevel = 0;
|
||||
|
|
@ -348,6 +359,7 @@ export class AutoSession {
|
|||
this.manualSessionModelOverride = null;
|
||||
this.currentUnitModel = null;
|
||||
this.currentDispatchedModelId = null;
|
||||
this.modelFailures.length = 0;
|
||||
this.originalModelId = null;
|
||||
this.originalModelProvider = null;
|
||||
this.lastBudgetAlertLevel = 0;
|
||||
|
|
|
|||
|
|
@ -4,13 +4,13 @@ import type {
|
|||
} from "@singularity-forge/pi-coding-agent";
|
||||
import {
|
||||
getAutoDashboardData,
|
||||
getAutoModeStartModel,
|
||||
getCurrentUnitModelFailures,
|
||||
isAutoActive,
|
||||
pauseAuto,
|
||||
setCurrentDispatchedModelId,
|
||||
recordCurrentModelFailure,
|
||||
setCurrentUnitModel,
|
||||
} from "../auto.js";
|
||||
import { isSessionSwitchInFlight, resolveAgentEnd } from "../auto-loop.js";
|
||||
import { resolveModelId } from "../auto-model-selection.js";
|
||||
import { blockModel, isModelBlocked } from "../blocked-models.js";
|
||||
import {
|
||||
classifyError,
|
||||
|
|
@ -21,76 +21,122 @@ import {
|
|||
} from "../error-classifier.js";
|
||||
import { checkAutoStartAfterDiscuss } from "../guided-flow.js";
|
||||
import {
|
||||
getNextFallbackModel,
|
||||
type ModelRouteRef,
|
||||
resolveNextModelRoute,
|
||||
} from "../model-route-failure.js";
|
||||
import {
|
||||
resolveModelWithFallbacksForUnit,
|
||||
resolvePersistModelChanges,
|
||||
} from "../preferences.js";
|
||||
import { pauseAutoForProviderError } from "../provider-error-pause.js";
|
||||
import { logWarning } from "../workflow-logger.js";
|
||||
import { resumeAutoAfterProviderDelay } from "./provider-error-resume.js";
|
||||
import { clearDiscussionFlowState } from "./write-gate.js";
|
||||
|
||||
const retryState = createRetryState();
|
||||
const MAX_NETWORK_RETRIES = 2;
|
||||
const MAX_TRANSIENT_AUTO_RESUMES = 8;
|
||||
|
||||
/**
|
||||
* Reset the module-level retry state so a resumed auto-session starts fresh.
|
||||
* Called by provider-error-resume.ts before startAuto() — without this, the
|
||||
* consecutiveTransientCount accumulates across pause/resume cycles and locks
|
||||
* out auto-resume after MAX_TRANSIENT_AUTO_RESUMES total (not consecutive) errors.
|
||||
* Called by provider-error-resume.ts before startAuto() so legacy paused
|
||||
* provider recovery does not inherit stale transient counters.
|
||||
*/
|
||||
export function resetTransientRetryState(): void {
|
||||
resetRetryState(retryState);
|
||||
}
|
||||
|
||||
async function pauseTransientWithBackoff(
|
||||
cls: ErrorClass,
|
||||
pi: ExtensionAPI,
|
||||
function getCurrentRouteFromMessage(
|
||||
lastMsg: unknown,
|
||||
ctx: ExtensionContext,
|
||||
errorDetail: string,
|
||||
isRateLimit: boolean,
|
||||
): Promise<void> {
|
||||
retryState.consecutiveTransientCount += 1;
|
||||
const baseRetryAfterMs = "retryAfterMs" in cls ? cls.retryAfterMs : 15_000;
|
||||
const retryAfterMs =
|
||||
baseRetryAfterMs *
|
||||
2 ** Math.max(0, retryState.consecutiveTransientCount - 1);
|
||||
const allowAutoResume =
|
||||
retryState.consecutiveTransientCount <= MAX_TRANSIENT_AUTO_RESUMES;
|
||||
if (!allowAutoResume) {
|
||||
ctx.ui.notify(
|
||||
`Transient provider errors persisted after ${MAX_TRANSIENT_AUTO_RESUMES} auto-resume attempts. Pausing for manual review.`,
|
||||
): ModelRouteRef | undefined {
|
||||
const msg = lastMsg as Record<string, unknown> | undefined;
|
||||
const provider =
|
||||
typeof msg?.provider === "string" ? msg.provider : ctx.model?.provider;
|
||||
const id = typeof msg?.model === "string" ? msg.model : ctx.model?.id;
|
||||
return provider && id ? { provider, id } : undefined;
|
||||
}
|
||||
|
||||
function isModelRouteFailure(cls: ErrorClass): boolean {
|
||||
return (
|
||||
cls.kind === "rate-limit" ||
|
||||
cls.kind === "network" ||
|
||||
cls.kind === "server" ||
|
||||
cls.kind === "connection" ||
|
||||
cls.kind === "stream"
|
||||
);
|
||||
}
|
||||
|
||||
async function trySwitchToFallbackModel(args: {
|
||||
pi: ExtensionAPI;
|
||||
ctx: ExtensionContext;
|
||||
current: ModelRouteRef | undefined;
|
||||
reason: string;
|
||||
unitType: string;
|
||||
basePath: string | undefined;
|
||||
errorDetail: string;
|
||||
persistModelChanges: boolean;
|
||||
}): Promise<boolean> {
|
||||
const modelConfig = resolveModelWithFallbacksForUnit(args.unitType, {
|
||||
autoBenchmark: true,
|
||||
});
|
||||
|
||||
if (args.current) {
|
||||
recordCurrentModelFailure({
|
||||
provider: args.current.provider,
|
||||
modelId: args.current.id,
|
||||
reason: args.reason,
|
||||
});
|
||||
}
|
||||
|
||||
const availableModels = args.ctx.modelRegistry.getAvailable();
|
||||
const isBlocked = args.basePath
|
||||
? (model: { provider: string; id: string }) =>
|
||||
isModelBlocked(args.basePath!, model.provider, model.id)
|
||||
: undefined;
|
||||
|
||||
for (
|
||||
let attempt = 0;
|
||||
attempt < availableModels.length + (modelConfig?.fallbacks.length ?? 0) + 1;
|
||||
attempt++
|
||||
) {
|
||||
const nextRoute = resolveNextModelRoute({
|
||||
current: args.current,
|
||||
modelConfig,
|
||||
availableModels,
|
||||
failedRoutes: getCurrentUnitModelFailures(),
|
||||
isBlocked,
|
||||
});
|
||||
|
||||
if (!nextRoute) return false;
|
||||
|
||||
const ok = await args.pi.setModel(nextRoute.model, {
|
||||
persist: args.persistModelChanges,
|
||||
});
|
||||
if (!ok) {
|
||||
recordCurrentModelFailure({
|
||||
provider: nextRoute.model.provider,
|
||||
modelId: nextRoute.model.id,
|
||||
reason: "setModel failed during provider recovery",
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
resetRetryState(retryState);
|
||||
setCurrentUnitModel(nextRoute.model);
|
||||
args.ctx.ui.notify(
|
||||
`Model route failed${args.errorDetail}. Switched to ${nextRoute.source === "configured" ? "configured fallback" : "available fallback"}: ${nextRoute.model.provider}/${nextRoute.model.id}.`,
|
||||
"warning",
|
||||
);
|
||||
args.pi.sendMessage(
|
||||
{
|
||||
customType: "sf-auto-timeout-recovery",
|
||||
content: "Continue execution.",
|
||||
display: false,
|
||||
},
|
||||
{ triggerTurn: true },
|
||||
);
|
||||
return true;
|
||||
}
|
||||
await pauseAutoForProviderError(
|
||||
ctx.ui,
|
||||
errorDetail,
|
||||
() =>
|
||||
pauseAuto(ctx, pi, {
|
||||
message: `Provider error: ${errorDetail}`,
|
||||
category: "provider",
|
||||
isTransient: allowAutoResume,
|
||||
retryAfterMs,
|
||||
}),
|
||||
{
|
||||
isRateLimit,
|
||||
isTransient: allowAutoResume,
|
||||
retryAfterMs,
|
||||
resume: allowAutoResume
|
||||
? () => {
|
||||
void resumeAutoAfterProviderDelay(pi, ctx).catch((err) => {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
ctx.ui.notify(
|
||||
`Provider error recovery delay elapsed, but auto-mode failed to resume: ${message}`,
|
||||
"error",
|
||||
);
|
||||
});
|
||||
}
|
||||
: undefined,
|
||||
},
|
||||
);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
export async function handleAgentEnd(
|
||||
|
|
@ -172,14 +218,12 @@ export async function handleAgentEnd(
|
|||
|
||||
// ── 1. Classify using rawErrorMsg to avoid prose false-positives ────
|
||||
const cls = classifyError(rawErrorMsg, explicitRetryAfterMs);
|
||||
const currentRoute = getCurrentRouteFromMessage(lastMsg, ctx);
|
||||
const dash = getAutoDashboardData();
|
||||
|
||||
// ── 1b. Defer to Core RetryHandler for most transient errors ────────
|
||||
// Core retries transient failures in-session after this handler.
|
||||
// Keep that behavior for non-rate-limit classes to avoid pause/retry races,
|
||||
// but let rate-limit continue into model fallback logic below (#4373).
|
||||
if (isTransient(cls) && cls.kind !== "rate-limit") {
|
||||
return;
|
||||
}
|
||||
// SF owns provider-route recovery in auto-mode. Quota/rate-limit/server/
|
||||
// stream/connection failures must leave the failed provider/model route
|
||||
// immediately instead of sleeping or waiting for same-model retry loops.
|
||||
|
||||
// Cap rate-limit backoff for CLI-style providers (openai-codex, google-gemini-cli)
|
||||
// which use per-user quotas with shorter windows (#2922).
|
||||
|
|
@ -198,9 +242,8 @@ export async function handleAgentEnd(
|
|||
// same dead model isn't reselected on the next /sf auto restart,
|
||||
// then try a fallback before pausing.
|
||||
if (cls.kind === "unsupported-model") {
|
||||
const dash = getAutoDashboardData();
|
||||
const rejectedProvider = ctx.model?.provider;
|
||||
const rejectedId = ctx.model?.id;
|
||||
const rejectedProvider = currentRoute?.provider;
|
||||
const rejectedId = currentRoute?.id;
|
||||
if (dash.basePath && rejectedProvider && rejectedId) {
|
||||
try {
|
||||
blockModel(
|
||||
|
|
@ -219,62 +262,18 @@ export async function handleAgentEnd(
|
|||
}
|
||||
}
|
||||
|
||||
// Try configured fallback chain, skipping anything already blocked.
|
||||
if (dash.currentUnit && dash.basePath) {
|
||||
const modelConfig = resolveModelWithFallbacksForUnit(
|
||||
dash.currentUnit.type,
|
||||
);
|
||||
if (modelConfig && modelConfig.fallbacks.length > 0) {
|
||||
const availableModels = ctx.modelRegistry.getAvailable();
|
||||
let cursorModelId: string | undefined = ctx.model?.id;
|
||||
while (true) {
|
||||
const nextModelId = getNextFallbackModel(
|
||||
cursorModelId,
|
||||
modelConfig,
|
||||
);
|
||||
if (!nextModelId) break;
|
||||
if (
|
||||
isModelBlocked(dash.basePath, ctx.model?.provider, nextModelId)
|
||||
) {
|
||||
cursorModelId = nextModelId;
|
||||
continue;
|
||||
}
|
||||
const modelToSet = resolveModelId(
|
||||
nextModelId,
|
||||
availableModels,
|
||||
ctx.model?.provider,
|
||||
);
|
||||
if (
|
||||
modelToSet &&
|
||||
!isModelBlocked(dash.basePath, modelToSet.provider, modelToSet.id)
|
||||
) {
|
||||
const persistModelChanges = resolvePersistModelChanges();
|
||||
const ok = await pi.setModel(modelToSet, {
|
||||
persist: persistModelChanges,
|
||||
});
|
||||
if (ok) {
|
||||
setCurrentDispatchedModelId({
|
||||
provider: modelToSet.provider,
|
||||
id: modelToSet.id,
|
||||
});
|
||||
ctx.ui.notify(
|
||||
`Switched to unblocked fallback: ${nextModelId} and resuming.`,
|
||||
"info",
|
||||
);
|
||||
pi.sendMessage(
|
||||
{
|
||||
customType: "sf-auto-timeout-recovery",
|
||||
content: "Continue execution.",
|
||||
display: false,
|
||||
},
|
||||
{ triggerTurn: true },
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
cursorModelId = nextModelId;
|
||||
}
|
||||
}
|
||||
const switched = await trySwitchToFallbackModel({
|
||||
pi,
|
||||
ctx,
|
||||
current: currentRoute,
|
||||
reason: rawErrorMsg || "unsupported for account",
|
||||
unitType: dash.currentUnit.type,
|
||||
basePath: dash.basePath,
|
||||
errorDetail,
|
||||
persistModelChanges,
|
||||
});
|
||||
if (switched) return;
|
||||
}
|
||||
|
||||
// No usable fallback — pause
|
||||
|
|
@ -292,150 +291,42 @@ export async function handleAgentEnd(
|
|||
|
||||
// ── 2. Decide & Act ──────────────────────────────────────────────────
|
||||
|
||||
// --- Network errors: same-model retry with backoff ---
|
||||
if (cls.kind === "network") {
|
||||
const currentModelId = ctx.model?.id ?? "unknown";
|
||||
if (retryState.currentRetryModelId !== currentModelId) {
|
||||
retryState.networkRetryCount = 0;
|
||||
retryState.currentRetryModelId = currentModelId;
|
||||
}
|
||||
if (retryState.networkRetryCount < MAX_NETWORK_RETRIES) {
|
||||
retryState.networkRetryCount += 1;
|
||||
retryState.consecutiveTransientCount += 1;
|
||||
const attempt = retryState.networkRetryCount;
|
||||
const delayMs = attempt * cls.retryAfterMs;
|
||||
ctx.ui.notify(
|
||||
`Network error on ${currentModelId}${errorDetail}. Retry ${attempt}/${MAX_NETWORK_RETRIES} in ${delayMs / 1000}s...`,
|
||||
"warning",
|
||||
);
|
||||
setTimeout(() => {
|
||||
pi.sendMessage(
|
||||
{
|
||||
customType: "sf-auto-timeout-recovery",
|
||||
content:
|
||||
"Continue execution — retrying after transient network error.",
|
||||
display: false,
|
||||
},
|
||||
{ triggerTurn: true },
|
||||
);
|
||||
}, delayMs);
|
||||
return;
|
||||
}
|
||||
// Network retries exhausted — fall through to model fallback
|
||||
retryState.networkRetryCount = 0;
|
||||
retryState.currentRetryModelId = undefined;
|
||||
ctx.ui.notify(
|
||||
`Network retries exhausted for ${currentModelId}. Attempting model fallback.`,
|
||||
"warning",
|
||||
);
|
||||
}
|
||||
|
||||
// --- Transient errors: try model fallback first, then pause ---
|
||||
// Rate limits are often per-model, so switching models can bypass them.
|
||||
if (
|
||||
cls.kind === "rate-limit" ||
|
||||
cls.kind === "network" ||
|
||||
cls.kind === "server" ||
|
||||
cls.kind === "connection" ||
|
||||
cls.kind === "stream"
|
||||
) {
|
||||
// Try model fallback
|
||||
const dash = getAutoDashboardData();
|
||||
if (dash.currentUnit) {
|
||||
const modelConfig = resolveModelWithFallbacksForUnit(
|
||||
dash.currentUnit.type,
|
||||
);
|
||||
if (modelConfig && modelConfig.fallbacks.length > 0) {
|
||||
const availableModels = ctx.modelRegistry.getAvailable();
|
||||
const nextModelId = getNextFallbackModel(ctx.model?.id, modelConfig);
|
||||
if (nextModelId) {
|
||||
retryState.networkRetryCount = 0;
|
||||
retryState.currentRetryModelId = undefined;
|
||||
const modelToSet = resolveModelId(
|
||||
nextModelId,
|
||||
availableModels,
|
||||
ctx.model?.provider,
|
||||
);
|
||||
if (modelToSet) {
|
||||
const ok = await pi.setModel(modelToSet, {
|
||||
persist: persistModelChanges,
|
||||
});
|
||||
if (ok) {
|
||||
setCurrentDispatchedModelId({
|
||||
provider: modelToSet.provider,
|
||||
id: modelToSet.id,
|
||||
});
|
||||
ctx.ui.notify(
|
||||
`Model error${errorDetail}. Switched to fallback: ${nextModelId} and resuming.`,
|
||||
"warning",
|
||||
);
|
||||
pi.sendMessage(
|
||||
{
|
||||
customType: "sf-auto-timeout-recovery",
|
||||
content: "Continue execution.",
|
||||
display: false,
|
||||
},
|
||||
{ triggerTurn: true },
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try restoring session model
|
||||
const sessionModel = getAutoModeStartModel();
|
||||
if (sessionModel) {
|
||||
if (
|
||||
ctx.model?.id !== sessionModel.id ||
|
||||
ctx.model?.provider !== sessionModel.provider
|
||||
) {
|
||||
const startModel = ctx.modelRegistry
|
||||
.getAvailable()
|
||||
.find(
|
||||
(m) =>
|
||||
m.provider === sessionModel.provider &&
|
||||
m.id === sessionModel.id,
|
||||
);
|
||||
if (startModel) {
|
||||
const ok = await pi.setModel(startModel, {
|
||||
persist: persistModelChanges,
|
||||
});
|
||||
if (ok) {
|
||||
setCurrentDispatchedModelId({
|
||||
provider: startModel.provider,
|
||||
id: startModel.id,
|
||||
});
|
||||
retryState.networkRetryCount = 0;
|
||||
retryState.currentRetryModelId = undefined;
|
||||
ctx.ui.notify(
|
||||
`Model error${errorDetail}. Restored session model: ${sessionModel.provider}/${sessionModel.id} and resuming.`,
|
||||
"warning",
|
||||
);
|
||||
pi.sendMessage(
|
||||
{
|
||||
customType: "sf-auto-timeout-recovery",
|
||||
content: "Continue execution.",
|
||||
display: false,
|
||||
},
|
||||
{ triggerTurn: true },
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --- Transient fallback: pause with auto-resume ---
|
||||
if (isTransient(cls)) {
|
||||
await pauseTransientWithBackoff(
|
||||
cls,
|
||||
// --- Route failures: try configured fallback first, then any available route ---
|
||||
if (isModelRouteFailure(cls) && dash.currentUnit) {
|
||||
const switched = await trySwitchToFallbackModel({
|
||||
pi,
|
||||
ctx,
|
||||
current: currentRoute,
|
||||
reason: rawErrorMsg || cls.kind,
|
||||
unitType: dash.currentUnit.type,
|
||||
basePath: dash.basePath,
|
||||
errorDetail,
|
||||
cls.kind === "rate-limit",
|
||||
persistModelChanges,
|
||||
});
|
||||
if (switched) return;
|
||||
}
|
||||
|
||||
// --- Transient fallback exhausted: pause without same-route auto-resume ---
|
||||
if (isTransient(cls)) {
|
||||
const message =
|
||||
isModelRouteFailure(cls) && dash.currentUnit
|
||||
? `Provider route failed and no usable fallback model remains${errorDetail}`
|
||||
: `Provider error${errorDetail}`;
|
||||
await pauseAutoForProviderError(
|
||||
ctx.ui,
|
||||
errorDetail,
|
||||
() =>
|
||||
pauseAuto(ctx, pi, {
|
||||
message,
|
||||
category: "provider",
|
||||
isTransient: false,
|
||||
retryAfterMs: "retryAfterMs" in cls ? cls.retryAfterMs : undefined,
|
||||
}),
|
||||
{
|
||||
isRateLimit: cls.kind === "rate-limit",
|
||||
isTransient: false,
|
||||
retryAfterMs: "retryAfterMs" in cls ? cls.retryAfterMs : 0,
|
||||
},
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ import {
|
|||
nextMilestoneId,
|
||||
} from "../guided-flow.js";
|
||||
import { loadEffectiveSFPreferences } from "../preferences.js";
|
||||
import { recordSelfFeedback } from "../self-feedback.js";
|
||||
import { markResolved, recordSelfFeedback } from "../self-feedback.js";
|
||||
import {
|
||||
executeCompleteMilestone,
|
||||
executePlanMilestone,
|
||||
|
|
@ -687,7 +687,7 @@ export function registerDbTools(pi: ExtensionAPI): void {
|
|||
promptGuidelines: [
|
||||
"Use sf_self_report for ANY sf-internal observation — not just bugs. Acceptable kinds include: 'prompt-quality-issue' (you found a prompt ambiguous, contradictory, or missing context), 'improvement-idea' (a non-bug enhancement that would help), 'agent-friction' (workflow friction you worked around), 'design-thought' (broader speculation), 'missing-feature' (capability you wished sf had), as well as classic bug kinds like 'brittle-predicate' or 'git-empty-pathspec'.",
|
||||
"Do NOT use this for bugs in the user's project, for your own task work, or to track your task's todo list. ONLY for observations about sf-the-tool itself.",
|
||||
"This tool FILES new entries; it does not address or resolve existing ones. Self-feedback is a triage inbox awaiting human/triage-agent review — do NOT autonomously pick entries off self-feedback and try to fix them. Treat existing entries as out of scope unless your task plan explicitly names a self-feedback entry id as the work.",
|
||||
"This tool FILES new entries; it does not resolve existing ones. High/critical forge self-feedback may be queued autonomously at startup or an idle turn boundary as repair work. Use sf_self_feedback_resolve after fixing an entry; do not hand-edit the JSONL.",
|
||||
"Over-reporting is preferred to under-reporting at this stage. If you noticed it about sf, file it. Dedup and threshold-to-roadmap promotion are tracked as their own self-feedback items and will eventually clean noise.",
|
||||
"Severity guide: low = cosmetic / nice-to-have / improvement idea. medium = noisy or imperfect or recurring friction. high = blocked the unit (sf-the-tool prevented you from completing the task). critical = needs immediate fix (currently treated as high until inline-fix dispatch lands).",
|
||||
"high/critical entries mark the originating unit as blocked: it will not seal as success, and will be re-queued only after sf is bumped past the recorded version.",
|
||||
|
|
@ -780,6 +780,145 @@ export function registerDbTools(pi: ExtensionAPI): void {
|
|||
|
||||
pi.registerTool(selfReportTool);
|
||||
|
||||
// ─── sf_self_feedback_resolve ────────────────────────────────────────
|
||||
// Agent-callable resolver for inline self-feedback repair turns. The
|
||||
// inline-fix prompt must not rely on hand-editing JSONL: the tool updates
|
||||
// the structured source of truth and regenerates the markdown view.
|
||||
const selfFeedbackResolveExecute = async (
|
||||
_toolCallId: string,
|
||||
params: any,
|
||||
_signal: AbortSignal | undefined,
|
||||
_onUpdate: unknown,
|
||||
_ctx: unknown,
|
||||
): Promise<AgentToolResult<Record<string, unknown>>> => {
|
||||
try {
|
||||
const ok = markResolved(
|
||||
params.id,
|
||||
{
|
||||
reason: params.reason,
|
||||
evidence: {
|
||||
kind: "agent-fix",
|
||||
commitSha: params.commit_sha,
|
||||
testPath: params.test_path,
|
||||
summaryNarrative: params.summary_narrative,
|
||||
},
|
||||
criteriaMet: params.criteria_met,
|
||||
},
|
||||
process.cwd(),
|
||||
);
|
||||
if (!ok) {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: `Error: unresolved self-feedback entry not found: ${params.id}`,
|
||||
},
|
||||
],
|
||||
details: {
|
||||
operation: "self_feedback_resolve",
|
||||
id: params.id,
|
||||
error: "not_found_or_already_resolved",
|
||||
},
|
||||
};
|
||||
}
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: `Resolved self-feedback ${params.id}`,
|
||||
},
|
||||
],
|
||||
details: {
|
||||
operation: "self_feedback_resolve",
|
||||
id: params.id,
|
||||
resolved: true,
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
const msg = err instanceof Error ? err.message : String(err);
|
||||
logError("tool", `sf_self_feedback_resolve tool failed: ${msg}`, {
|
||||
tool: "sf_self_feedback_resolve",
|
||||
error: String(err),
|
||||
});
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text" as const,
|
||||
text: `Error in sf_self_feedback_resolve: ${msg}`,
|
||||
},
|
||||
],
|
||||
details: {
|
||||
operation: "self_feedback_resolve",
|
||||
id: params.id,
|
||||
error: msg,
|
||||
},
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
pi.registerTool({
|
||||
name: "sf_self_feedback_resolve",
|
||||
label: "Resolve Self Feedback",
|
||||
description:
|
||||
"Mark a repaired SF self-feedback entry resolved with structured agent-fix evidence. " +
|
||||
"Use this only after verifying the entry no longer applies, landing the fix, and citing the commit or verification evidence.",
|
||||
promptSnippet:
|
||||
"Resolve a repaired SF self-feedback entry with commit/test evidence",
|
||||
promptGuidelines: [
|
||||
"Use sf_self_feedback_resolve during self-feedback inline-fix repair turns after the fix is implemented and verified.",
|
||||
"Do not hand-edit `.sf/self-feedback.jsonl`; this tool updates the JSONL source of truth and regenerates `.sf/SELF-FEEDBACK.md`.",
|
||||
"If the entry has acceptance criteria, pass criteria_met with the criteria that were satisfied.",
|
||||
"Pass commit_sha when a commit exists. If an entry was already fixed, cite the existing commit or include summary_narrative and test_path.",
|
||||
],
|
||||
parameters: Type.Object({
|
||||
id: Type.String({
|
||||
description: "Self-feedback entry id, e.g. sf-moocz9so-4ffov2",
|
||||
}),
|
||||
reason: Type.String({
|
||||
description: "Short explanation of why the entry is resolved",
|
||||
}),
|
||||
commit_sha: Type.Optional(
|
||||
Type.String({ description: "Commit SHA containing the fix" }),
|
||||
),
|
||||
test_path: Type.Optional(
|
||||
Type.String({ description: "Focused test or verification path" }),
|
||||
),
|
||||
summary_narrative: Type.Optional(
|
||||
Type.String({
|
||||
description:
|
||||
"Concise verification summary when a commit/test path alone is not enough",
|
||||
}),
|
||||
),
|
||||
criteria_met: Type.Optional(
|
||||
Type.Array(Type.String(), {
|
||||
description:
|
||||
"Acceptance criteria satisfied by this fix, if the entry provided criteria",
|
||||
}),
|
||||
),
|
||||
}),
|
||||
execute: selfFeedbackResolveExecute,
|
||||
renderCall(args: any, theme: any) {
|
||||
let text = theme.fg("toolTitle", theme.bold("sf_self_feedback_resolve "));
|
||||
if (args.id) text += theme.fg("muted", args.id);
|
||||
return new Text(text, 0, 0);
|
||||
},
|
||||
renderResult(result: any, _options: any, theme: any) {
|
||||
const d = result.details;
|
||||
if (result.isError || d?.error) {
|
||||
return new Text(
|
||||
theme.fg("error", `Error: ${d?.error ?? "unknown"}`),
|
||||
0,
|
||||
0,
|
||||
);
|
||||
}
|
||||
return new Text(
|
||||
theme.fg("success", `Resolved ${d?.id ?? "self-feedback"}`),
|
||||
0,
|
||||
0,
|
||||
);
|
||||
},
|
||||
});
|
||||
|
||||
// ─── sf_plan_milestone ────────────────────────────────────────────────
|
||||
|
||||
const planMilestoneExecute = async (
|
||||
|
|
|
|||
|
|
@ -13,9 +13,9 @@ import {
|
|||
hasResearchTerminalTransition,
|
||||
isAutoActive,
|
||||
isAutoPaused,
|
||||
markResearchTerminalTransition,
|
||||
markToolEnd,
|
||||
markToolStart,
|
||||
markResearchTerminalTransition,
|
||||
recordToolInvocationError,
|
||||
} from "../auto.js";
|
||||
import {
|
||||
|
|
@ -194,6 +194,18 @@ export function registerHooks(
|
|||
}
|
||||
}
|
||||
loadToolApiKeys();
|
||||
// Flow audit is read-only by default: surface stale dispatched units,
|
||||
// missing session pointers, runaway history, and optional child hangs at
|
||||
// startup before another auto unit compounds the same milestone failure.
|
||||
try {
|
||||
const { runFlowAudit } = await import("../doctor.js");
|
||||
const flow = await runFlowAudit(process.cwd());
|
||||
if (!flow.ok) {
|
||||
ctx.ui?.notify?.(`Flow audit: ${flow.recommendedAction}`, "warning");
|
||||
}
|
||||
} catch {
|
||||
/* non-fatal — flow audit must never block session start */
|
||||
}
|
||||
// Drain self-feedback: auto-resolve entries whose blocking
|
||||
// sf-version constraint has been satisfied by the current sf bump,
|
||||
// and surface entries that remain blocked to the operator. Done after
|
||||
|
|
@ -239,9 +251,9 @@ export function registerHooks(
|
|||
"warning",
|
||||
);
|
||||
}
|
||||
// Forge-only: surface high/critical entries as inline-fix candidates so
|
||||
// the operator (or a follow-up dispatcher) can drain self-reported bugs
|
||||
// without leaving the session. Read-only signal for now — no auto-dispatch.
|
||||
// Forge-only: high/critical entries are queued as hidden follow-up repair
|
||||
// work on startup, even outside /sf auto. The drain helper owns claim TTL
|
||||
// and delivery failure retry, so this is safe to call opportunistically.
|
||||
const highBlocked = triage.stillBlocked.filter(
|
||||
(e) => e.severity === "high" || e.severity === "critical",
|
||||
);
|
||||
|
|
@ -366,6 +378,16 @@ export function registerHooks(
|
|||
resetToolCallLoopGuard();
|
||||
resetAskUserQuestionsCache();
|
||||
await handleAgentEnd(pi, event, ctx);
|
||||
// Best-effort embedding backfill: when SF_LLM_GATEWAY_KEY is set and the
|
||||
// gateway has an embed worker online, embed any memories that don't yet
|
||||
// have a vector. Bounded per invocation; logs once-per-minute when the
|
||||
// gateway is unavailable so we don't spam the journal.
|
||||
try {
|
||||
const { runEmbeddingBackfill } = await import("../memory-embeddings.js");
|
||||
await runEmbeddingBackfill();
|
||||
} catch {
|
||||
// Never break agent_end on backfill issues.
|
||||
}
|
||||
});
|
||||
|
||||
// Squash-merge quick-task branch back to the original branch after the
|
||||
|
|
@ -378,9 +400,10 @@ export function registerHooks(
|
|||
// Best-effort: don't break the turn lifecycle if cleanup fails.
|
||||
}
|
||||
try {
|
||||
const { consumeCompletedInlineFixClaim } = await import(
|
||||
"../self-feedback-drain.js"
|
||||
);
|
||||
const {
|
||||
consumeCompletedInlineFixClaim,
|
||||
dispatchSelfFeedbackInlineFixIfNeeded,
|
||||
} = await import("../self-feedback-drain.js");
|
||||
const resolvedIds = consumeCompletedInlineFixClaim(process.cwd());
|
||||
if (resolvedIds.length > 0) {
|
||||
const requestReload = (
|
||||
|
|
@ -391,7 +414,9 @@ export function registerHooks(
|
|||
requestReload?.(
|
||||
`self-feedback inline fix resolved ${resolvedIds.length} entr${resolvedIds.length === 1 ? "y" : "ies"}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
dispatchSelfFeedbackInlineFixIfNeeded(process.cwd(), ctx, pi);
|
||||
} catch {
|
||||
// Best-effort: stale code should not break normal turn completion.
|
||||
}
|
||||
|
|
@ -511,6 +536,7 @@ export function registerHooks(
|
|||
block: true,
|
||||
reason:
|
||||
`Research unit terminal transition: ${currentUnit.type} ${currentUnit.id} has already completed its RESEARCH artifact. ` +
|
||||
`Post-artifact drift is blocked before runaway supervision treats it as legitimate large research. ` +
|
||||
`Planning tools (${event.toolName}) are blocked. The orchestrator will dispatch planner units after research.`,
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -131,25 +131,65 @@ export async function handleDoctor(
|
|||
|
||||
// ── Flow audit subcommand (sf-moocz9so-4ffov2) ─────────────────────────
|
||||
if (trimmed === "flow" || trimmed.startsWith("flow ")) {
|
||||
const flowResult = await runFlowAudit(projectRoot());
|
||||
const flowResult = await runFlowAudit(projectRoot(), {
|
||||
killOverBudgetChildren: /\b(--kill-children|kill-children|kill)\b/.test(
|
||||
trimmed,
|
||||
),
|
||||
});
|
||||
const lines: string[] = ["## SF Flow Audit", ""];
|
||||
if (flowResult.activeMilestone) {
|
||||
lines.push(
|
||||
`**Active milestone:** ${flowResult.activeMilestone.id}${flowResult.activeMilestone.title ? ` — ${flowResult.activeMilestone.title}` : ""}`,
|
||||
flowResult.activeMilestone.phase
|
||||
? `- Phase: ${flowResult.activeMilestone.phase}`
|
||||
: "",
|
||||
"",
|
||||
);
|
||||
} else {
|
||||
lines.push("**Active milestone:** none", "");
|
||||
}
|
||||
if (flowResult.activeUnit) {
|
||||
const ageMin = Math.round(flowResult.activeUnit.ageMs / 60000);
|
||||
const progressAgeMin = Math.round(
|
||||
flowResult.activeUnit.progressAgeMs / 60000,
|
||||
);
|
||||
lines.push(
|
||||
`**Active unit:** ${flowResult.activeUnit.unitType} ${flowResult.activeUnit.unitId}`,
|
||||
`- Phase: ${flowResult.activeUnit.phase}`,
|
||||
`- Started: ${flowResult.activeUnit.startedAt}`,
|
||||
`- Age: ${ageMin} minutes`,
|
||||
`- Progress age: ${progressAgeMin} minutes`,
|
||||
flowResult.activeUnit.lastProgressAt
|
||||
? `- Last progress: ${flowResult.activeUnit.lastProgressAt}`
|
||||
: "",
|
||||
"",
|
||||
);
|
||||
} else {
|
||||
lines.push("**Active unit:** none", "");
|
||||
}
|
||||
lines.push(
|
||||
`**Session pointer:** ${
|
||||
flowResult.sessionPointer?.sessionFile ??
|
||||
flowResult.sessionPointer?.sessionId ??
|
||||
"none recorded"
|
||||
}`,
|
||||
`**Recommended action:** ${flowResult.recommendedAction}`,
|
||||
"",
|
||||
);
|
||||
if (flowResult.warnings.length > 0) {
|
||||
lines.push("**Warnings:**");
|
||||
for (const w of flowResult.warnings) lines.push(`- ${w}`);
|
||||
lines.push("");
|
||||
}
|
||||
if (flowResult.staleDispatchedUnits.length > 0) {
|
||||
lines.push("**Stale dispatched units:**");
|
||||
for (const unit of flowResult.staleDispatchedUnits.slice(0, 5)) {
|
||||
lines.push(
|
||||
`- ${unit.unitType} ${unit.unitId}: progress age ${Math.round(unit.progressAgeMs / 60000)} minutes`,
|
||||
);
|
||||
}
|
||||
lines.push("");
|
||||
}
|
||||
if (flowResult.recommendations.length > 0) {
|
||||
lines.push("**Recommendations:**");
|
||||
for (const r of flowResult.recommendations) lines.push(`- ${r}`);
|
||||
|
|
@ -158,7 +198,19 @@ export async function handleDoctor(
|
|||
if (flowResult.childProcesses.length > 0) {
|
||||
lines.push("**Child processes:**");
|
||||
for (const cp of flowResult.childProcesses.slice(0, 10)) {
|
||||
lines.push(`- pid=${cp.pid} [${cp.classification}] ${cp.cmd.slice(0, 60)}`);
|
||||
const age =
|
||||
cp.ageMs === undefined ? "" : ` age=${Math.round(cp.ageMs / 60000)}m`;
|
||||
const nonBlocking = cp.nonBlocking ? " non-blocking" : "";
|
||||
lines.push(
|
||||
`- pid=${cp.pid} ppid=${cp.ppid} [${cp.classification}]${age}${nonBlocking} action=${cp.action} ${cp.cmd.slice(0, 80)}`,
|
||||
);
|
||||
}
|
||||
lines.push("");
|
||||
}
|
||||
if (flowResult.runawayHistory.length > 0) {
|
||||
lines.push("**Runaway history:**");
|
||||
for (const event of flowResult.runawayHistory.slice(-5)) {
|
||||
lines.push(`- ${event}`);
|
||||
}
|
||||
lines.push("");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,13 +7,15 @@
|
|||
* tracked docs artifacts (sf-moocr4rv-au7r3l).
|
||||
*/
|
||||
|
||||
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
||||
import { mkdirSync, writeFileSync } from "node:fs";
|
||||
import { join, resolve } from "node:path";
|
||||
import type { ExtensionCommandContext } from "@singularity-forge/pi-coding-agent";
|
||||
import { ensureDbOpen } from "./bootstrap/dynamic-tools.js";
|
||||
import { projectRoot } from "./commands/context.js";
|
||||
import { profileRepository } from "./repo-profiler.js";
|
||||
import { recordRepoProfile } from "./sf-db.js";
|
||||
import { profileRepository, type RepoProfile } from "./repo-profiler.js";
|
||||
import { getLatestRepoProfile, recordRepoProfile } from "./sf-db.js";
|
||||
|
||||
const HARNESS_PROMOTION_REPO_DIR = "docs/exec-plans/active";
|
||||
|
||||
/**
|
||||
* Format a repo profile summary for user notification.
|
||||
|
|
@ -47,10 +49,91 @@ function formatProfileSummary(
|
|||
`Stacks: ${stacks}`,
|
||||
`Risk hints: ${risks}`,
|
||||
"",
|
||||
"Untracked files were recorded as observations only; SF did not stage or adopt them.",
|
||||
"Runtime observation boundary:",
|
||||
"- Profile state was stored only in .sf runtime state.",
|
||||
"- No repo-committable artifact was written by profiling.",
|
||||
"- Use /sf harness promote <finding-id> after review to create a tracked docs artifact.",
|
||||
"- Untracked files remain observed_only; SF did not stage or adopt them.",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a finding id into a stable filename segment.
|
||||
*
|
||||
* Purpose: keep promotion artifacts deterministic while preventing path
|
||||
* traversal through user-provided finding IDs.
|
||||
*
|
||||
* Consumer: `/sf harness promote <finding-id>`.
|
||||
*/
|
||||
function findingIdSlug(findingId: string): string {
|
||||
const slug = findingId
|
||||
.trim()
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9._-]+/g, "-")
|
||||
.replace(/^-+|-+$/g, "")
|
||||
.slice(0, 120);
|
||||
return slug || "finding";
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the persisted repo profile JSON from .sf runtime state.
|
||||
*
|
||||
* Purpose: promotion must be a writeback from recorded observations, not a new
|
||||
* profiler run that can observe its own artifact or introduce timestamps.
|
||||
*
|
||||
* Consumer: `/sf harness promote <finding-id>`.
|
||||
*/
|
||||
function parseRecordedProfile(profileJson: string): RepoProfile | null {
|
||||
try {
|
||||
const parsed = JSON.parse(profileJson) as Partial<RepoProfile>;
|
||||
if (
|
||||
typeof parsed.profileId === "string" &&
|
||||
typeof parsed.createdAt === "string" &&
|
||||
parsed.git &&
|
||||
Array.isArray(parsed.git.changedFiles)
|
||||
) {
|
||||
return parsed as RepoProfile;
|
||||
}
|
||||
} catch {
|
||||
// Fall back to row-level metadata below.
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the stable JSON payload embedded in a promotion artifact.
|
||||
*
|
||||
* Purpose: document the recorded observation facts without leaking absolute
|
||||
* runtime paths or adding promotion-time fields.
|
||||
*
|
||||
* Consumer: `/sf harness promote <finding-id>`.
|
||||
*/
|
||||
function profilePromotionPayload(
|
||||
profile: RepoProfile | null,
|
||||
fallback: {
|
||||
profileId: string;
|
||||
branch: string | null;
|
||||
dirty: boolean;
|
||||
createdAt: string;
|
||||
},
|
||||
): Record<string, unknown> {
|
||||
return {
|
||||
profileId: profile?.profileId ?? fallback.profileId,
|
||||
profileCapturedAt: profile?.createdAt ?? fallback.createdAt,
|
||||
branch: profile?.git.branch ?? fallback.branch,
|
||||
dirty: profile?.git.dirty ?? fallback.dirty,
|
||||
changedFiles: profile?.git.changedFiles ?? [],
|
||||
stacks: profile?.stacks ?? [],
|
||||
entrypoints: profile?.entrypoints ?? [],
|
||||
tests: profile?.tests ?? [],
|
||||
ci: profile?.ci ?? [],
|
||||
docs: profile?.docs ?? [],
|
||||
dataStores: profile?.dataStores ?? [],
|
||||
networkSurfaces: profile?.networkSurfaces ?? [],
|
||||
riskHints: profile?.riskHints ?? [],
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Promote a harness/profile finding from .sf runtime observations into a
|
||||
* tracked docs artifact. This is the writeback path that turns operational
|
||||
|
|
@ -80,42 +163,57 @@ export async function handleHarnessPromote(
|
|||
return;
|
||||
}
|
||||
|
||||
// Determine the target tracked-docs path
|
||||
const displayFindingId = findingId.trim();
|
||||
const latestProfile = getLatestRepoProfile();
|
||||
if (!latestProfile) {
|
||||
ctx.ui.notify(
|
||||
"No recorded harness profile found. Run /sf harness profile first; promotion writes tracked docs only from .sf runtime observations.",
|
||||
"warning",
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const slug = findingIdSlug(displayFindingId);
|
||||
const relativePath = `${HARNESS_PROMOTION_REPO_DIR}/harness-promotion-${slug}.md`;
|
||||
const trackedDir = resolve(basePath, "docs", "exec-plans", "active");
|
||||
const targetPath = join(trackedDir, `harness-promotion-${findingId}.md`);
|
||||
const targetPath = join(trackedDir, `harness-promotion-${slug}.md`);
|
||||
|
||||
// Ensure the tracked directory exists (creates under the repo, not .sf)
|
||||
mkdirSync(trackedDir, { recursive: true });
|
||||
|
||||
// Read the latest profile from DB to include in the promotion
|
||||
const profile = profileRepository(basePath);
|
||||
const recordedProfile = parseRecordedProfile(latestProfile.profileJson);
|
||||
const payload = profilePromotionPayload(recordedProfile, {
|
||||
profileId: latestProfile.profileId,
|
||||
branch: latestProfile.branch,
|
||||
dirty: latestProfile.dirty,
|
||||
createdAt: latestProfile.createdAt,
|
||||
});
|
||||
|
||||
// Build the promoted artifact content
|
||||
const content = [
|
||||
`# Harness Promotion: ${findingId}`,
|
||||
`# Harness Promotion: ${displayFindingId}`,
|
||||
"",
|
||||
`Promoted from: \`.sf\` runtime observations`,
|
||||
`Promoted at: ${new Date().toISOString()}`,
|
||||
`Source profile: ${profile.profileId}`,
|
||||
`Source branch: ${profile.git.branch ?? "unknown"}`,
|
||||
`Finding ID: ${displayFindingId}`,
|
||||
`Repo artifact: \`${relativePath}\``,
|
||||
"Source: `.sf` runtime observations",
|
||||
`Source profile: ${latestProfile.profileId}`,
|
||||
`Source profile captured at: ${latestProfile.createdAt}`,
|
||||
`Source branch: ${latestProfile.branch ?? "unknown"}`,
|
||||
"",
|
||||
"## Observed State",
|
||||
"## Runtime Boundary",
|
||||
"",
|
||||
"- `.sf` remains operational runtime state and is not repo output.",
|
||||
"- Unpromoted .sf runtime observations remain `observed_only`.",
|
||||
"- This Markdown file is the repo-committable artifact created by promotion.",
|
||||
"- Promotion does not stage or claim untracked observed files.",
|
||||
"",
|
||||
"## Observed Profile",
|
||||
"",
|
||||
"```json",
|
||||
JSON.stringify(
|
||||
{
|
||||
profileId: profile.profileId,
|
||||
branch: profile.git.branch,
|
||||
changedFiles: profile.git.changedFiles,
|
||||
stacks: profile.stacks,
|
||||
riskHints: profile.riskHints,
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
JSON.stringify(payload, null, 2),
|
||||
"```",
|
||||
"",
|
||||
"## Status",
|
||||
"## Review Checklist",
|
||||
"",
|
||||
"- [ ] Reviewed by human",
|
||||
"- [ ] Adopted into milestone plan",
|
||||
|
|
@ -131,10 +229,10 @@ export async function handleHarnessPromote(
|
|||
|
||||
ctx.ui.notify(
|
||||
[
|
||||
`Harness finding '${findingId}' promoted to tracked docs.`,
|
||||
`Path: ${targetPath}`,
|
||||
`Harness finding '${displayFindingId}' promoted to tracked docs.`,
|
||||
`Path: ${relativePath}`,
|
||||
"",
|
||||
"This artifact is now part of the repo's tracked documentation.",
|
||||
"This Markdown file is now the repo-committable artifact for review.",
|
||||
"Unpromoted .sf runtime state remains observed_only.",
|
||||
].join("\n"),
|
||||
"info",
|
||||
|
|
@ -161,7 +259,7 @@ export async function handleHarness(
|
|||
}
|
||||
if (!["profile", "snapshot", "status"].includes(subcommand)) {
|
||||
ctx.ui.notify(
|
||||
"Usage: /sf harness profile | /sf harness promote <finding-id>\nRecords a read-only repo profile or promotes a finding to tracked docs.",
|
||||
"Usage: /sf harness profile | /sf harness promote <finding-id>\nRecords a read-only .sf runtime profile or promotes a reviewed finding to tracked docs.",
|
||||
"warning",
|
||||
);
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -50,34 +50,486 @@ import {
|
|||
loadEffectiveSFPreferences,
|
||||
type SFPreferences,
|
||||
} from "./preferences.js";
|
||||
import {
|
||||
type PersistedSelfFeedbackEntry,
|
||||
readAllSelfFeedback,
|
||||
recordSelfFeedback,
|
||||
} from "./self-feedback.js";
|
||||
import { getMilestoneSlices, getSliceTasks, isDbAvailable } from "./sf-db.js";
|
||||
import { deriveState, isMilestoneComplete } from "./state.js";
|
||||
import { isClosedStatus } from "./status-guards.js";
|
||||
import type { RoadmapSliceEntry } from "./types.js";
|
||||
import { parseUnitId } from "./unit-id.js";
|
||||
|
||||
// ─── Flow Audit Types (sf-moocz9so-4ffov2) ────────────────────────────────
|
||||
|
||||
export type FlowAuditChildClassification =
|
||||
| "active-session"
|
||||
| "warmup"
|
||||
| "background"
|
||||
| "orphan"
|
||||
| "unknown";
|
||||
|
||||
export type FlowAuditChildAction = "observe" | "non-blocking" | "kill";
|
||||
|
||||
/**
|
||||
* Configure `runFlowAudit` for deterministic tests and explicit recovery mode.
|
||||
*
|
||||
* Purpose: keep the default auditor read-only during startup while allowing
|
||||
* `/sf doctor flow --kill-children` and tests to exercise bounded child cleanup.
|
||||
*
|
||||
* Consumer: session_start, `/sf doctor flow`, and flow-audit regression tests.
|
||||
*/
|
||||
export interface FlowAuditOptions {
|
||||
nowMs?: number;
|
||||
staleProgressMs?: number;
|
||||
optionalChildBudgetMs?: number;
|
||||
psOutput?: string;
|
||||
killOverBudgetChildren?: boolean;
|
||||
killProcess?: (pid: number) => void;
|
||||
recordSelfFeedback?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Flow-audit output returned to commands and startup hooks.
|
||||
*
|
||||
* Purpose: preserve enough structured evidence for operators and tests to avoid
|
||||
* reconstructing stuck auto-mode state from locks, runtime files, sessions, and ps.
|
||||
*
|
||||
* Consumer: `/sf doctor flow`, session_start notifications, and regression tests.
|
||||
*/
|
||||
export interface FlowAuditResult {
|
||||
ok: boolean;
|
||||
activeMilestone?: {
|
||||
id: string;
|
||||
title?: string;
|
||||
phase?: string;
|
||||
};
|
||||
activeUnit?: {
|
||||
unitType: string;
|
||||
unitId: string;
|
||||
phase: string;
|
||||
startedAt: string;
|
||||
ageMs: number;
|
||||
progressAgeMs: number;
|
||||
lastProgressAt?: string;
|
||||
};
|
||||
sessionPointer?: {
|
||||
sessionId?: string;
|
||||
sessionFile?: string;
|
||||
source: "auto.lock" | "runtime-unit";
|
||||
};
|
||||
recommendations: string[];
|
||||
recommendedAction: string;
|
||||
warnings: string[];
|
||||
childProcesses: Array<{
|
||||
pid: number;
|
||||
ppid: number;
|
||||
cmd: string;
|
||||
classification: "active-session" | "warmup" | "orphan" | "unknown";
|
||||
classification: FlowAuditChildClassification;
|
||||
ageMs?: number;
|
||||
nonBlocking: boolean;
|
||||
overBudget: boolean;
|
||||
action: FlowAuditChildAction;
|
||||
killed?: boolean;
|
||||
killError?: string;
|
||||
}>;
|
||||
lastErrors: string[];
|
||||
staleDispatchedUnits: Array<{
|
||||
unitType: string;
|
||||
unitId: string;
|
||||
phase: string;
|
||||
progressAgeMs: number;
|
||||
lastProgressAt?: string;
|
||||
}>;
|
||||
runawayHistory: string[];
|
||||
loopEvidence?: {
|
||||
milestoneId: string;
|
||||
sliceId?: string;
|
||||
taskId?: string;
|
||||
completedPriorTasks: string[];
|
||||
missingSummaries: string[];
|
||||
};
|
||||
repeatedFailureRollup?: {
|
||||
filed: boolean;
|
||||
milestoneId: string;
|
||||
count: number;
|
||||
entryId?: string;
|
||||
};
|
||||
}
|
||||
|
||||
// ─── Flow Audit Implementation ────────────────────────────────────────────
|
||||
|
||||
const DEFAULT_STALE_PROGRESS_MS = 20 * 60 * 1000;
|
||||
const DEFAULT_OPTIONAL_CHILD_BUDGET_MS = 30 * 60 * 1000;
|
||||
const REPEATED_FAILURE_THRESHOLD = 3;
|
||||
const FLOW_AUDIT_ROLLUP_KIND = "flow-audit:repeated-milestone-failure";
|
||||
|
||||
interface AutoLockAuditRecord {
|
||||
pid?: number;
|
||||
unitType?: string;
|
||||
unitId?: string;
|
||||
startedAt?: string | number;
|
||||
phase?: string;
|
||||
sessionId?: string;
|
||||
sessionFile?: string;
|
||||
}
|
||||
|
||||
interface RuntimeUnitAuditRecord {
|
||||
unitType?: string;
|
||||
unitId?: string;
|
||||
phase?: string;
|
||||
startedAt?: number | string;
|
||||
updatedAt?: number | string;
|
||||
lastProgressAt?: number | string;
|
||||
lastProgressKind?: string;
|
||||
progressCount?: number;
|
||||
sessionId?: string;
|
||||
sessionFile?: string;
|
||||
runawayGuardPause?: {
|
||||
reason?: string;
|
||||
unitType?: string;
|
||||
unitId?: string;
|
||||
pausedAt?: number;
|
||||
};
|
||||
}
|
||||
|
||||
interface PsAuditRow {
|
||||
pid: number;
|
||||
ppid: number;
|
||||
ageMs?: number;
|
||||
cmd: string;
|
||||
}
|
||||
|
||||
function parseEpochMs(value: unknown, fallbackMs: number): number {
|
||||
if (typeof value === "number" && Number.isFinite(value)) {
|
||||
return value < 10_000_000_000 ? value * 1000 : value;
|
||||
}
|
||||
if (typeof value === "string" && value.trim()) {
|
||||
const parsed = new Date(value).getTime();
|
||||
if (Number.isFinite(parsed)) return parsed;
|
||||
}
|
||||
return fallbackMs;
|
||||
}
|
||||
|
||||
function formatIso(ms: number | undefined): string | undefined {
|
||||
if (ms === undefined || !Number.isFinite(ms)) return undefined;
|
||||
return new Date(ms).toISOString();
|
||||
}
|
||||
|
||||
function minutes(ms: number): number {
|
||||
return Math.max(0, Math.round(ms / 60_000));
|
||||
}
|
||||
|
||||
function readJsonFile<T>(path: string): T | null {
|
||||
try {
|
||||
if (!existsSync(path)) return null;
|
||||
return JSON.parse(readFileSync(path, "utf8")) as T;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function readRuntimeUnits(runtimeUnitsDir: string): RuntimeUnitAuditRecord[] {
|
||||
if (!existsSync(runtimeUnitsDir)) return [];
|
||||
const records: RuntimeUnitAuditRecord[] = [];
|
||||
try {
|
||||
for (const file of readdirSync(runtimeUnitsDir)) {
|
||||
if (!file.endsWith(".json")) continue;
|
||||
const record = readJsonFile<RuntimeUnitAuditRecord>(
|
||||
join(runtimeUnitsDir, file),
|
||||
);
|
||||
if (record) records.push(record);
|
||||
}
|
||||
} catch {
|
||||
// Runtime audit must stay best-effort.
|
||||
}
|
||||
return records;
|
||||
}
|
||||
|
||||
function parsePsOutput(psOutput: string): PsAuditRow[] {
|
||||
const rows: PsAuditRow[] = [];
|
||||
for (const line of psOutput.split("\n")) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
const match = trimmed.match(/^(\d+)\s+(\d+)(?:\s+(\d+))?\s+(.+)$/);
|
||||
if (!match) continue;
|
||||
const pid = Number.parseInt(match[1], 10);
|
||||
const ppid = Number.parseInt(match[2], 10);
|
||||
if (!Number.isFinite(pid) || !Number.isFinite(ppid)) continue;
|
||||
const elapsedSeconds =
|
||||
match[3] === undefined ? undefined : Number.parseInt(match[3], 10);
|
||||
rows.push({
|
||||
pid,
|
||||
ppid,
|
||||
ageMs:
|
||||
elapsedSeconds !== undefined && Number.isFinite(elapsedSeconds)
|
||||
? elapsedSeconds * 1000
|
||||
: undefined,
|
||||
cmd: match[4],
|
||||
});
|
||||
}
|
||||
return rows;
|
||||
}
|
||||
|
||||
async function readPsRows(options: FlowAuditOptions): Promise<PsAuditRow[]> {
|
||||
if (options.psOutput !== undefined) return parsePsOutput(options.psOutput);
|
||||
if (process.platform === "win32") return [];
|
||||
try {
|
||||
const { execSync } = await import("node:child_process");
|
||||
const psOutput = execSync("ps -eo pid,ppid,etimes,cmd --no-headers", {
|
||||
encoding: "utf8",
|
||||
timeout: 5000,
|
||||
});
|
||||
return parsePsOutput(psOutput);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
function classifyProcess(row: PsAuditRow): FlowAuditChildClassification {
|
||||
const cmd = row.cmd.toLowerCase();
|
||||
if (cmd.includes("sift") || cmd.includes("warmup")) return "warmup";
|
||||
if (row.ppid === 1 && cmd.includes("next-server")) return "orphan";
|
||||
if (
|
||||
cmd.includes("next-server") ||
|
||||
cmd.includes("vite") ||
|
||||
cmd.includes("turbopack")
|
||||
) {
|
||||
return "background";
|
||||
}
|
||||
if (
|
||||
(cmd.includes("node") || cmd.includes("sf-run") || cmd.includes("codex")) &&
|
||||
(cmd.includes(" sf") ||
|
||||
cmd.includes("/sf") ||
|
||||
cmd.includes("dist/loader") ||
|
||||
cmd.includes("tool-session") ||
|
||||
cmd.includes("headless"))
|
||||
) {
|
||||
return "active-session";
|
||||
}
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
function isOptionalChild(
|
||||
classification: FlowAuditChildClassification,
|
||||
): boolean {
|
||||
return (
|
||||
classification === "warmup" ||
|
||||
classification === "background" ||
|
||||
classification === "orphan"
|
||||
);
|
||||
}
|
||||
|
||||
function shouldIncludeProcess(
|
||||
row: PsAuditRow,
|
||||
classification: FlowAuditChildClassification,
|
||||
activePid: number | undefined,
|
||||
): boolean {
|
||||
if (classification !== "unknown") return true;
|
||||
if (activePid === undefined) return false;
|
||||
return row.pid === activePid || row.ppid === activePid;
|
||||
}
|
||||
|
||||
function readRecentErrors(runtimeRoot: string): string[] {
|
||||
const notificationsPath = join(runtimeRoot, "notifications.jsonl");
|
||||
if (!existsSync(notificationsPath)) return [];
|
||||
const errors: string[] = [];
|
||||
try {
|
||||
const lines = readFileSync(notificationsPath, "utf8")
|
||||
.split("\n")
|
||||
.filter((l) => l.trim());
|
||||
for (const line of lines.slice(-20)) {
|
||||
try {
|
||||
const entry = JSON.parse(line) as {
|
||||
severity?: string;
|
||||
message?: string;
|
||||
text?: string;
|
||||
};
|
||||
const message = entry.message ?? entry.text ?? "";
|
||||
if (
|
||||
entry.severity === "error" ||
|
||||
message.toLowerCase().includes("error") ||
|
||||
message.toLowerCase().includes("failed")
|
||||
) {
|
||||
errors.push(message || "Unknown error");
|
||||
}
|
||||
} catch {
|
||||
// skip malformed notification rows
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// non-fatal
|
||||
}
|
||||
return errors;
|
||||
}
|
||||
|
||||
function buildLoopEvidence(
|
||||
basePath: string,
|
||||
unitType: string,
|
||||
unitId: string,
|
||||
): FlowAuditResult["loopEvidence"] | undefined {
|
||||
if (unitType !== "execute-task") return undefined;
|
||||
const { milestone, slice, task } = parseUnitId(unitId);
|
||||
if (!milestone || !slice || !task) return undefined;
|
||||
const planPath = resolveSliceFile(basePath, milestone, slice, "PLAN");
|
||||
if (!planPath || !existsSync(planPath)) return undefined;
|
||||
const completedPriorTasks: string[] = [];
|
||||
const missingSummaries: string[] = [];
|
||||
try {
|
||||
const plan = parsePlan(readFileSync(planPath, "utf8"));
|
||||
const currentIndex = plan.tasks.findIndex((t) => t.id === task);
|
||||
if (currentIndex > 0) {
|
||||
for (const prior of plan.tasks.slice(0, currentIndex)) {
|
||||
if (prior.done) completedPriorTasks.push(prior.id);
|
||||
}
|
||||
}
|
||||
if (!resolveTaskFile(basePath, milestone, slice, task, "SUMMARY")) {
|
||||
missingSummaries.push(`${milestone}/${slice}/${task} task SUMMARY`);
|
||||
}
|
||||
const allTasksDone =
|
||||
plan.tasks.length > 0 && plan.tasks.every((t) => t.done);
|
||||
if (
|
||||
allTasksDone &&
|
||||
!resolveSliceFile(basePath, milestone, slice, "SUMMARY")
|
||||
) {
|
||||
missingSummaries.push(`${milestone}/${slice} slice SUMMARY`);
|
||||
}
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
milestoneId: milestone,
|
||||
sliceId: slice,
|
||||
taskId: task,
|
||||
completedPriorTasks,
|
||||
missingSummaries,
|
||||
};
|
||||
}
|
||||
|
||||
function collectRunawayHistory(
|
||||
runtimeUnits: RuntimeUnitAuditRecord[],
|
||||
feedback: PersistedSelfFeedbackEntry[],
|
||||
milestoneId: string | undefined,
|
||||
): string[] {
|
||||
const history: string[] = [];
|
||||
for (const unit of runtimeUnits) {
|
||||
const pause = unit.runawayGuardPause;
|
||||
if (!pause) continue;
|
||||
const id = pause.unitId ?? unit.unitId ?? "unknown";
|
||||
if (milestoneId && !id.startsWith(`${milestoneId}/`)) continue;
|
||||
history.push(pause.reason ?? `Runaway guard paused ${id}`);
|
||||
}
|
||||
for (const entry of feedback) {
|
||||
if (entry.resolvedAt) continue;
|
||||
if (milestoneId && entry.occurredIn?.milestone !== milestoneId) continue;
|
||||
if (
|
||||
entry.kind.includes("runaway") ||
|
||||
entry.summary.toLowerCase().includes("runaway")
|
||||
) {
|
||||
history.push(`${entry.kind}: ${entry.summary}`);
|
||||
}
|
||||
}
|
||||
return Array.from(new Set(history)).slice(-10);
|
||||
}
|
||||
|
||||
function maybeRecordRepeatedFailureRollup(
|
||||
basePath: string,
|
||||
milestoneId: string | undefined,
|
||||
feedback: PersistedSelfFeedbackEntry[],
|
||||
options: FlowAuditOptions,
|
||||
): FlowAuditResult["repeatedFailureRollup"] | undefined {
|
||||
if (!milestoneId || options.recordSelfFeedback === false) return undefined;
|
||||
const failures = feedback.filter(
|
||||
(e) =>
|
||||
!e.resolvedAt &&
|
||||
e.occurredIn?.milestone === milestoneId &&
|
||||
e.kind !== FLOW_AUDIT_ROLLUP_KIND,
|
||||
);
|
||||
if (failures.length < REPEATED_FAILURE_THRESHOLD) return undefined;
|
||||
const openRollup = feedback.find(
|
||||
(e) =>
|
||||
!e.resolvedAt &&
|
||||
e.kind === FLOW_AUDIT_ROLLUP_KIND &&
|
||||
e.occurredIn?.milestone === milestoneId,
|
||||
);
|
||||
if (openRollup) {
|
||||
return {
|
||||
filed: false,
|
||||
milestoneId,
|
||||
count: failures.length,
|
||||
entryId: openRollup.id,
|
||||
};
|
||||
}
|
||||
const evidence = failures
|
||||
.slice(-8)
|
||||
.map(
|
||||
(e) =>
|
||||
`[${e.id}] ${e.kind} ${[
|
||||
e.occurredIn?.milestone,
|
||||
e.occurredIn?.slice,
|
||||
e.occurredIn?.task,
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join("/")}: ${e.summary}`,
|
||||
)
|
||||
.join("\n");
|
||||
const recorded = recordSelfFeedback(
|
||||
{
|
||||
kind: FLOW_AUDIT_ROLLUP_KIND,
|
||||
severity: "high",
|
||||
summary: `${failures.length} unresolved flow failures on ${milestoneId} need one recovery fix`,
|
||||
evidence,
|
||||
suggestedFix:
|
||||
"Fix the shared milestone-flow failure instead of filing one item per failed unit. Use the flow audit evidence to repair stale dispatch, missing summary, runaway, or child-process handling.",
|
||||
acceptanceCriteria:
|
||||
"AC1: flow audit reports the active milestone/unit and session pointer. AC2: stale dispatched unit with no progress is flagged. AC3: runaway history and child-process hang evidence are preserved. AC4: repeated same-milestone failures stay deduplicated into one open item.",
|
||||
source: "detector",
|
||||
occurredIn: { milestone: milestoneId, unitType: "flow-audit" },
|
||||
},
|
||||
basePath,
|
||||
);
|
||||
if (!recorded) return undefined;
|
||||
return {
|
||||
filed: true,
|
||||
milestoneId,
|
||||
count: failures.length,
|
||||
entryId: recorded.entry.id,
|
||||
};
|
||||
}
|
||||
|
||||
function chooseRecommendedAction(args: {
|
||||
activeUnit?: FlowAuditResult["activeUnit"];
|
||||
sessionPointer?: FlowAuditResult["sessionPointer"];
|
||||
staleDispatchedUnits: FlowAuditResult["staleDispatchedUnits"];
|
||||
childProcesses: FlowAuditResult["childProcesses"];
|
||||
lastErrors: string[];
|
||||
activeMilestone?: FlowAuditResult["activeMilestone"];
|
||||
}): string {
|
||||
if (args.staleDispatchedUnits.length > 0) {
|
||||
const unit = args.staleDispatchedUnits[0];
|
||||
const session = args.sessionPointer?.sessionFile
|
||||
? ` ${args.sessionPointer.sessionFile}`
|
||||
: args.sessionPointer?.sessionId
|
||||
? ` ${args.sessionPointer.sessionId}`
|
||||
: "";
|
||||
return `Inspect session${session} for ${unit.unitType} ${unit.unitId}; if no new output exists, stop/requeue the stale dispatched unit before continuing.`;
|
||||
}
|
||||
const overBudgetOptional = args.childProcesses.find(
|
||||
(p) => p.nonBlocking && p.overBudget,
|
||||
);
|
||||
if (overBudgetOptional) {
|
||||
return `Optional ${overBudgetOptional.classification} child pid ${overBudgetOptional.pid} is over budget; it is non-blocking, or rerun with --kill-children to terminate it.`;
|
||||
}
|
||||
if (args.lastErrors.length > 0) {
|
||||
return "Review recent errors before dispatching another unit.";
|
||||
}
|
||||
if (args.activeMilestone && !args.activeUnit) {
|
||||
return `Dispatch or resume the next unit for ${args.activeMilestone.id}.`;
|
||||
}
|
||||
return "No flow-auditor action needed.";
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a flow audit: inspect active unit state, auto.lock, runtime artifacts,
|
||||
* and child processes to diagnose stuck milestones without human forensic work.
|
||||
|
|
@ -86,165 +538,228 @@ export interface FlowAuditResult {
|
|||
* milestone/unit, progress age, session pointer, child processes, last errors,
|
||||
* and recommended action.
|
||||
*
|
||||
* Consumer: `/sf doctor flow` command.
|
||||
* Consumer: `/sf doctor flow` command and session_start startup health sweep.
|
||||
*/
|
||||
export async function runFlowAudit(basePath: string): Promise<FlowAuditResult> {
|
||||
export async function runFlowAudit(
|
||||
basePath: string,
|
||||
options: FlowAuditOptions = {},
|
||||
): Promise<FlowAuditResult> {
|
||||
const nowMs = options.nowMs ?? Date.now();
|
||||
const staleProgressMs = options.staleProgressMs ?? DEFAULT_STALE_PROGRESS_MS;
|
||||
const optionalChildBudgetMs =
|
||||
options.optionalChildBudgetMs ?? DEFAULT_OPTIONAL_CHILD_BUDGET_MS;
|
||||
const runtimeRoot = sfRoot(basePath);
|
||||
const warnings: string[] = [];
|
||||
const recommendations: string[] = [];
|
||||
const childProcesses: FlowAuditResult["childProcesses"] = [];
|
||||
const lastErrors: string[] = [];
|
||||
const lastErrors = readRecentErrors(runtimeRoot);
|
||||
const staleDispatchedUnits: FlowAuditResult["staleDispatchedUnits"] = [];
|
||||
let sessionPointer: FlowAuditResult["sessionPointer"] | undefined;
|
||||
let activeMilestone: FlowAuditResult["activeMilestone"] | undefined;
|
||||
|
||||
// Read auto.lock for active unit info
|
||||
const autoLockPath = join(basePath, ".sf", "auto.lock");
|
||||
const autoLockPath = join(runtimeRoot, "auto.lock");
|
||||
let activeUnit: FlowAuditResult["activeUnit"] | undefined;
|
||||
if (existsSync(autoLockPath)) {
|
||||
try {
|
||||
const lockContent = readFileSync(autoLockPath, "utf8");
|
||||
const lockData = JSON.parse(lockContent) as {
|
||||
unitType?: string;
|
||||
unitId?: string;
|
||||
startedAt?: string;
|
||||
phase?: string;
|
||||
let activePid: number | undefined;
|
||||
const lockData = readJsonFile<AutoLockAuditRecord>(autoLockPath);
|
||||
if (lockData) {
|
||||
if (lockData.unitType && lockData.unitId) {
|
||||
const startedAtMs = parseEpochMs(lockData.startedAt, nowMs);
|
||||
const parsed = parseUnitId(lockData.unitId);
|
||||
activeMilestone = { id: parsed.milestone };
|
||||
activePid =
|
||||
typeof lockData.pid === "number" && Number.isFinite(lockData.pid)
|
||||
? lockData.pid
|
||||
: undefined;
|
||||
activeUnit = {
|
||||
unitType: lockData.unitType,
|
||||
unitId: lockData.unitId,
|
||||
phase: lockData.phase ?? "unknown",
|
||||
startedAt: formatIso(startedAtMs) ?? new Date(nowMs).toISOString(),
|
||||
ageMs: Math.max(0, nowMs - startedAtMs),
|
||||
progressAgeMs: Math.max(0, nowMs - startedAtMs),
|
||||
};
|
||||
if (lockData.unitType && lockData.unitId) {
|
||||
const startedAt = lockData.startedAt
|
||||
? new Date(lockData.startedAt).getTime()
|
||||
: Date.now();
|
||||
const ageMs = Date.now() - startedAt;
|
||||
activeUnit = {
|
||||
unitType: lockData.unitType,
|
||||
unitId: lockData.unitId,
|
||||
phase: lockData.phase ?? "unknown",
|
||||
startedAt: lockData.startedAt ?? new Date().toISOString(),
|
||||
ageMs,
|
||||
if (lockData.sessionId || lockData.sessionFile) {
|
||||
sessionPointer = {
|
||||
sessionId: lockData.sessionId,
|
||||
sessionFile: lockData.sessionFile,
|
||||
source: "auto.lock",
|
||||
};
|
||||
if (ageMs > 30 * 60 * 1000) {
|
||||
warnings.push(
|
||||
`Active unit ${lockData.unitId} has been running for ${Math.round(ageMs / 60000)} minutes.`,
|
||||
);
|
||||
recommendations.push(
|
||||
`Consider checking if ${lockData.unitId} is stuck or making progress.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
warnings.push("Could not parse .sf/auto.lock");
|
||||
}
|
||||
} else if (existsSync(autoLockPath)) {
|
||||
warnings.push("Could not parse .sf/auto.lock");
|
||||
}
|
||||
|
||||
// Read runtime units directory
|
||||
const runtimeUnitsDir = join(basePath, ".sf", "runtime", "units");
|
||||
if (existsSync(runtimeUnitsDir)) {
|
||||
try {
|
||||
const files = readdirSync(runtimeUnitsDir);
|
||||
let dispatchedCount = 0;
|
||||
for (const file of files) {
|
||||
if (!file.endsWith(".json")) continue;
|
||||
try {
|
||||
const content = readFileSync(
|
||||
join(runtimeUnitsDir, file),
|
||||
"utf8",
|
||||
);
|
||||
const unit = JSON.parse(content) as {
|
||||
phase?: string;
|
||||
unitType?: string;
|
||||
unitId?: string;
|
||||
};
|
||||
if (unit.phase === "dispatched") dispatchedCount++;
|
||||
} catch {
|
||||
// skip malformed
|
||||
}
|
||||
}
|
||||
if (dispatchedCount > 1) {
|
||||
warnings.push(
|
||||
`${dispatchedCount} units are in dispatched phase simultaneously.`,
|
||||
);
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
// Read notifications for recent errors
|
||||
const notificationsPath = join(basePath, ".sf", "notifications.jsonl");
|
||||
if (existsSync(notificationsPath)) {
|
||||
try {
|
||||
const lines = readFileSync(notificationsPath, "utf8")
|
||||
.split("\n")
|
||||
.filter((l) => l.trim());
|
||||
const recentLines = lines.slice(-20);
|
||||
for (const line of recentLines) {
|
||||
try {
|
||||
const entry = JSON.parse(line) as {
|
||||
severity?: string;
|
||||
message?: string;
|
||||
};
|
||||
if (
|
||||
entry.severity === "error" ||
|
||||
entry.message?.toLowerCase().includes("error")
|
||||
) {
|
||||
lastErrors.push(entry.message ?? "Unknown error");
|
||||
}
|
||||
} catch {
|
||||
// skip malformed
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
// Scan child processes (Linux/macOS only)
|
||||
if (process.platform !== "win32") {
|
||||
try {
|
||||
const { execSync } = await import("node:child_process");
|
||||
const psOutput = execSync("ps -eo pid,ppid,cmd --no-headers", {
|
||||
encoding: "utf8",
|
||||
timeout: 5000,
|
||||
const runtimeUnits = readRuntimeUnits(join(runtimeRoot, "runtime", "units"));
|
||||
let dispatchedCount = 0;
|
||||
for (const unit of runtimeUnits) {
|
||||
if (unit.phase === "dispatched") dispatchedCount++;
|
||||
if (!unit.unitType || !unit.unitId) continue;
|
||||
const progressBaseMs = parseEpochMs(
|
||||
unit.lastProgressAt ?? unit.updatedAt ?? unit.startedAt,
|
||||
nowMs,
|
||||
);
|
||||
const progressAgeMs = Math.max(0, nowMs - progressBaseMs);
|
||||
const lastProgressAt = formatIso(progressBaseMs);
|
||||
const stale =
|
||||
unit.phase === "dispatched" && progressAgeMs > staleProgressMs;
|
||||
if (stale) {
|
||||
staleDispatchedUnits.push({
|
||||
unitType: unit.unitType,
|
||||
unitId: unit.unitId,
|
||||
phase: unit.phase ?? "unknown",
|
||||
progressAgeMs,
|
||||
lastProgressAt,
|
||||
});
|
||||
const lines = psOutput.split("\n").filter((l) => l.trim());
|
||||
for (const line of lines) {
|
||||
const parts = line.trim().split(/\s+/);
|
||||
if (parts.length < 3) continue;
|
||||
const pid = Number.parseInt(parts[0], 10);
|
||||
const ppid = Number.parseInt(parts[1], 10);
|
||||
const cmd = parts.slice(2).join(" ");
|
||||
if (!Number.isFinite(pid)) continue;
|
||||
// Classify processes
|
||||
let classification: FlowAuditResult["childProcesses"][0]["classification"] = "unknown";
|
||||
if (cmd.includes("sift") || cmd.includes("warmup")) {
|
||||
classification = "warmup";
|
||||
} else if (cmd.includes("node") && cmd.includes("sf")) {
|
||||
classification = "active-session";
|
||||
} else if (ppid === 1 && cmd.includes("next-server")) {
|
||||
classification = "orphan";
|
||||
}
|
||||
childProcesses.push({ pid, cmd, classification });
|
||||
warnings.push(
|
||||
`Unit ${unit.unitId} has no progress for ${minutes(progressAgeMs)} minutes (phase=${unit.phase}).`,
|
||||
);
|
||||
}
|
||||
if (
|
||||
activeUnit &&
|
||||
unit.unitType === activeUnit.unitType &&
|
||||
unit.unitId === activeUnit.unitId
|
||||
) {
|
||||
activeUnit.phase = unit.phase ?? activeUnit.phase;
|
||||
activeUnit.progressAgeMs = progressAgeMs;
|
||||
activeUnit.lastProgressAt = lastProgressAt;
|
||||
if (!sessionPointer && (unit.sessionId || unit.sessionFile)) {
|
||||
sessionPointer = {
|
||||
sessionId: unit.sessionId,
|
||||
sessionFile: unit.sessionFile,
|
||||
source: "runtime-unit",
|
||||
};
|
||||
}
|
||||
} catch {
|
||||
// ignore on platforms without ps
|
||||
}
|
||||
}
|
||||
if (dispatchedCount > 1) {
|
||||
warnings.push(
|
||||
`${dispatchedCount} units are in dispatched phase simultaneously.`,
|
||||
);
|
||||
}
|
||||
|
||||
const psRows = await readPsRows(options);
|
||||
for (const row of psRows) {
|
||||
const classification = classifyProcess(row);
|
||||
if (!shouldIncludeProcess(row, classification, activePid)) continue;
|
||||
const nonBlocking = isOptionalChild(classification);
|
||||
const overBudget =
|
||||
nonBlocking &&
|
||||
row.ageMs !== undefined &&
|
||||
row.ageMs > optionalChildBudgetMs;
|
||||
let action: FlowAuditChildAction = nonBlocking ? "non-blocking" : "observe";
|
||||
let killed = false;
|
||||
let killError: string | undefined;
|
||||
if (overBudget) {
|
||||
warnings.push(
|
||||
`${classification} child pid ${row.pid} is over budget (${minutes(row.ageMs ?? 0)} minutes).`,
|
||||
);
|
||||
if (options.killOverBudgetChildren) {
|
||||
action = "kill";
|
||||
try {
|
||||
if (options.killProcess) options.killProcess(row.pid);
|
||||
else process.kill(row.pid, "SIGTERM");
|
||||
killed = true;
|
||||
} catch (err) {
|
||||
killError = err instanceof Error ? err.message : String(err);
|
||||
warnings.push(
|
||||
`Failed to kill over-budget ${classification} child pid ${row.pid}: ${killError}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
childProcesses.push({
|
||||
pid: row.pid,
|
||||
ppid: row.ppid,
|
||||
cmd: row.cmd,
|
||||
classification,
|
||||
ageMs: row.ageMs,
|
||||
nonBlocking,
|
||||
overBudget,
|
||||
action,
|
||||
killed: killed || undefined,
|
||||
killError,
|
||||
});
|
||||
}
|
||||
|
||||
// Derive state for milestone context
|
||||
try {
|
||||
const state = await deriveState(basePath);
|
||||
if (state.activeMilestone) {
|
||||
activeMilestone = {
|
||||
id: state.activeMilestone.id,
|
||||
title: state.activeMilestone.title,
|
||||
phase: state.phase,
|
||||
};
|
||||
}
|
||||
if (state.activeMilestone && !activeUnit) {
|
||||
recommendations.push(
|
||||
`No active unit detected, but milestone ${state.activeMilestone.id} is active. Consider dispatching the next unit.`,
|
||||
);
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
// State derivation is useful context but not required for the audit.
|
||||
}
|
||||
|
||||
const loopEvidence =
|
||||
activeUnit &&
|
||||
buildLoopEvidence(basePath, activeUnit.unitType, activeUnit.unitId);
|
||||
if (
|
||||
loopEvidence?.completedPriorTasks.length &&
|
||||
loopEvidence.missingSummaries.length
|
||||
) {
|
||||
warnings.push(
|
||||
`${loopEvidence.milestoneId}/${loopEvidence.sliceId} has ${loopEvidence.completedPriorTasks.length} completed prior tasks but missing final summary evidence for ${loopEvidence.missingSummaries.join(", ")}.`,
|
||||
);
|
||||
}
|
||||
|
||||
const feedback = readAllSelfFeedback(basePath);
|
||||
const milestoneId = activeMilestone?.id;
|
||||
const runawayHistory = collectRunawayHistory(
|
||||
runtimeUnits,
|
||||
feedback,
|
||||
milestoneId,
|
||||
);
|
||||
const repeatedFailureRollup = maybeRecordRepeatedFailureRollup(
|
||||
basePath,
|
||||
milestoneId,
|
||||
feedback,
|
||||
options,
|
||||
);
|
||||
if (repeatedFailureRollup?.filed) {
|
||||
recommendations.push(
|
||||
`Filed ${FLOW_AUDIT_ROLLUP_KIND} for ${milestoneId} after ${repeatedFailureRollup.count} repeated failures.`,
|
||||
);
|
||||
}
|
||||
|
||||
const recommendedAction = chooseRecommendedAction({
|
||||
activeUnit,
|
||||
sessionPointer,
|
||||
staleDispatchedUnits,
|
||||
childProcesses,
|
||||
lastErrors,
|
||||
activeMilestone,
|
||||
});
|
||||
if (!recommendations.includes(recommendedAction)) {
|
||||
recommendations.unshift(recommendedAction);
|
||||
}
|
||||
|
||||
return {
|
||||
ok: warnings.length === 0 && lastErrors.length === 0,
|
||||
ok:
|
||||
warnings.length === 0 &&
|
||||
lastErrors.length === 0 &&
|
||||
staleDispatchedUnits.length === 0,
|
||||
activeMilestone,
|
||||
activeUnit,
|
||||
sessionPointer,
|
||||
recommendations,
|
||||
recommendedAction,
|
||||
warnings,
|
||||
childProcesses,
|
||||
lastErrors,
|
||||
staleDispatchedUnits,
|
||||
runawayHistory,
|
||||
loopEvidence,
|
||||
repeatedFailureRollup,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,8 @@
|
|||
"sf_summary_save",
|
||||
"sf_requirement_update",
|
||||
"sf_milestone_generate_id",
|
||||
"sf_self_report"
|
||||
"sf_self_report",
|
||||
"sf_self_feedback_resolve"
|
||||
],
|
||||
"commands": ["sf", "kill", "worktree", "exit"],
|
||||
"hooks": [
|
||||
|
|
@ -25,6 +26,7 @@
|
|||
"session_fork",
|
||||
"before_agent_start",
|
||||
"agent_end",
|
||||
"turn_end",
|
||||
"session_before_compact",
|
||||
"session_shutdown",
|
||||
"tool_call",
|
||||
|
|
|
|||
179
src/resources/extensions/sf/model-route-failure.ts
Normal file
179
src/resources/extensions/sf/model-route-failure.ts
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
import type { Api, Model } from "@singularity-forge/pi-ai";
|
||||
import type { ModelFailureRecord } from "./auto/session.js";
|
||||
import { resolveModelId } from "./auto-model-selection.js";
|
||||
import type { ResolvedModelConfig } from "./preferences.js";
|
||||
|
||||
export interface ModelRouteRef {
|
||||
provider: string;
|
||||
id: string;
|
||||
}
|
||||
|
||||
export interface NextModelRouteResult {
|
||||
model: Model<Api>;
|
||||
route: string;
|
||||
source: "configured" | "available";
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the stable identity key for a concrete provider route.
|
||||
*
|
||||
* Purpose: make fallback recovery compare full provider/model routes instead of
|
||||
* ambiguous bare model ids.
|
||||
*
|
||||
* Consumer: resolveNextConfiguredModelRoute() when skipping failed and current
|
||||
* runtime routes.
|
||||
*/
|
||||
export function modelRouteKey(route: ModelRouteRef): string {
|
||||
return `${route.provider.toLowerCase()}/${route.id.toLowerCase()}`;
|
||||
}
|
||||
|
||||
function dedupeConfiguredRoutes(modelConfig: ResolvedModelConfig): string[] {
|
||||
const seen = new Set<string>();
|
||||
const routes: string[] = [];
|
||||
for (const route of [modelConfig.primary, ...modelConfig.fallbacks]) {
|
||||
const key = route.toLowerCase();
|
||||
if (seen.has(key)) continue;
|
||||
seen.add(key);
|
||||
routes.push(route);
|
||||
}
|
||||
return routes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the next configured model route after a provider/model failure.
|
||||
*
|
||||
* Purpose: keep auto-mode recovery inside the user's explicit primary/fallback
|
||||
* chain, skip routes already failed for this unit, and avoid returning the same
|
||||
* provider/model again.
|
||||
*
|
||||
* Consumer: bootstrap/agent-end-recovery.ts when a provider returns quota,
|
||||
* rate-limit, server, stream, or connection failures during a unit.
|
||||
*/
|
||||
export function resolveNextConfiguredModelRoute(args: {
|
||||
current: ModelRouteRef | undefined;
|
||||
modelConfig: ResolvedModelConfig;
|
||||
availableModels: Model<Api>[];
|
||||
failedRoutes: readonly ModelFailureRecord[];
|
||||
isBlocked?: (model: Model<Api>) => boolean;
|
||||
}): NextModelRouteResult | undefined {
|
||||
const routes = dedupeConfiguredRoutes(args.modelConfig);
|
||||
const currentKey = args.current ? modelRouteKey(args.current) : undefined;
|
||||
const failedKeys = new Set(
|
||||
args.failedRoutes.map((failure) =>
|
||||
modelRouteKey({ provider: failure.provider, id: failure.modelId }),
|
||||
),
|
||||
);
|
||||
|
||||
const resolvedRoutes = routes.map((configuredRoute) => ({
|
||||
configuredRoute,
|
||||
model: resolveModelId(
|
||||
configuredRoute,
|
||||
args.availableModels,
|
||||
args.current?.provider,
|
||||
) as Model<Api> | undefined,
|
||||
}));
|
||||
|
||||
const currentIndex =
|
||||
currentKey === undefined
|
||||
? -1
|
||||
: resolvedRoutes.findIndex(
|
||||
(route) => route.model && modelRouteKey(route.model) === currentKey,
|
||||
);
|
||||
const candidates =
|
||||
currentIndex >= 0 ? resolvedRoutes.slice(currentIndex + 1) : resolvedRoutes;
|
||||
|
||||
for (const candidate of candidates) {
|
||||
if (!candidate.model) continue;
|
||||
const candidateKey = modelRouteKey(candidate.model);
|
||||
if (candidateKey === currentKey) continue;
|
||||
if (failedKeys.has(candidateKey)) continue;
|
||||
if (args.isBlocked?.(candidate.model)) continue;
|
||||
return {
|
||||
model: candidate.model,
|
||||
route: candidate.configuredRoute,
|
||||
source: "configured",
|
||||
};
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve another currently available provider/model route when configured
|
||||
* fallbacks are missing or exhausted.
|
||||
*
|
||||
* Purpose: keep auto-mode moving on quota/rate-limit/server failures instead
|
||||
* of pausing just because the configured fallback chain did not cover every
|
||||
* live provider route.
|
||||
*
|
||||
* Consumer: bootstrap/agent-end-recovery.ts after configured fallback lookup
|
||||
* fails for a model-route failure.
|
||||
*/
|
||||
export function resolveNextAvailableModelRoute(args: {
|
||||
current: ModelRouteRef | undefined;
|
||||
availableModels: Model<Api>[];
|
||||
failedRoutes: readonly ModelFailureRecord[];
|
||||
isBlocked?: (model: Model<Api>) => boolean;
|
||||
}): NextModelRouteResult | undefined {
|
||||
const currentKey = args.current ? modelRouteKey(args.current) : undefined;
|
||||
const failedKeys = new Set(
|
||||
args.failedRoutes.map((failure) =>
|
||||
modelRouteKey({ provider: failure.provider, id: failure.modelId }),
|
||||
),
|
||||
);
|
||||
const candidates = args.availableModels.filter((model) => {
|
||||
const key = modelRouteKey(model);
|
||||
if (key === currentKey) return false;
|
||||
if (failedKeys.has(key)) return false;
|
||||
if (args.isBlocked?.(model)) return false;
|
||||
return true;
|
||||
});
|
||||
if (candidates.length === 0) return undefined;
|
||||
|
||||
const differentProvider =
|
||||
args.current &&
|
||||
candidates.find(
|
||||
(model) =>
|
||||
model.provider.toLowerCase() !== args.current!.provider.toLowerCase(),
|
||||
);
|
||||
const model = differentProvider ?? candidates[0];
|
||||
return {
|
||||
model,
|
||||
route: `${model.provider}/${model.id}`,
|
||||
source: "available",
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the next model route by trying configured policy first, then any
|
||||
* other live route.
|
||||
*
|
||||
* Purpose: preserve configured fallback ordering when it exists while still
|
||||
* enforcing the no-pause contract for transient provider/model failures.
|
||||
*
|
||||
* Consumer: bootstrap/agent-end-recovery.ts during provider-route recovery.
|
||||
*/
|
||||
export function resolveNextModelRoute(args: {
|
||||
current: ModelRouteRef | undefined;
|
||||
modelConfig: ResolvedModelConfig | undefined;
|
||||
availableModels: Model<Api>[];
|
||||
failedRoutes: readonly ModelFailureRecord[];
|
||||
isBlocked?: (model: Model<Api>) => boolean;
|
||||
}): NextModelRouteResult | undefined {
|
||||
if (args.modelConfig) {
|
||||
const configured = resolveNextConfiguredModelRoute({
|
||||
current: args.current,
|
||||
modelConfig: args.modelConfig,
|
||||
availableModels: args.availableModels,
|
||||
failedRoutes: args.failedRoutes,
|
||||
isBlocked: args.isBlocked,
|
||||
});
|
||||
if (configured) return configured;
|
||||
}
|
||||
return resolveNextAvailableModelRoute({
|
||||
current: args.current,
|
||||
availableModels: args.availableModels,
|
||||
failedRoutes: args.failedRoutes,
|
||||
isBlocked: args.isBlocked,
|
||||
});
|
||||
}
|
||||
|
|
@ -21,6 +21,7 @@ import type {
|
|||
ExtensionAPI,
|
||||
ExtensionContext,
|
||||
} from "@singularity-forge/pi-coding-agent";
|
||||
import { getErrorMessage } from "./error-utils.js";
|
||||
import { sfRuntimeRoot } from "./paths.js";
|
||||
import type { PersistedSelfFeedbackEntry } from "./self-feedback.js";
|
||||
import {
|
||||
|
|
@ -33,6 +34,7 @@ const CLAIM_TTL_MS = 30 * 60 * 1000;
|
|||
interface InlineFixClaim {
|
||||
ids: string[];
|
||||
dispatchedAt: string;
|
||||
lastDispatchError?: string;
|
||||
}
|
||||
|
||||
function claimPath(basePath: string): string {
|
||||
|
|
@ -63,6 +65,28 @@ function writeClaim(basePath: string, ids: string[]): void {
|
|||
);
|
||||
}
|
||||
|
||||
function writeFailedClaim(
|
||||
basePath: string,
|
||||
ids: string[],
|
||||
error: string,
|
||||
): void {
|
||||
const path = claimPath(basePath);
|
||||
mkdirSync(dirname(path), { recursive: true });
|
||||
writeFileSync(
|
||||
path,
|
||||
JSON.stringify(
|
||||
{
|
||||
ids,
|
||||
dispatchedAt: new Date(Date.now() - CLAIM_TTL_MS - 1).toISOString(),
|
||||
lastDispatchError: error,
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
"utf-8",
|
||||
);
|
||||
}
|
||||
|
||||
function clearClaim(basePath: string): void {
|
||||
try {
|
||||
unlinkSync(claimPath(basePath));
|
||||
|
|
@ -147,10 +171,10 @@ function buildInlineFixPrompt(entries: PersistedSelfFeedbackEntry[]): string {
|
|||
)
|
||||
.join("\n\n");
|
||||
|
||||
return [
|
||||
"You are executing SF self-feedback inline-fix mode.",
|
||||
"",
|
||||
"These high/critical self-feedback entries are unresolved sf defects. Do not only triage them; repair the current codebase directly.",
|
||||
return [
|
||||
"You are executing SF self-feedback inline-fix mode.",
|
||||
"",
|
||||
"These high/critical self-feedback entries are unresolved sf defects. Do not only triage them; repair the current codebase directly.",
|
||||
"",
|
||||
rendered,
|
||||
"",
|
||||
|
|
@ -159,8 +183,9 @@ function buildInlineFixPrompt(entries: PersistedSelfFeedbackEntry[]): string {
|
|||
"2. Fix the smallest coherent set of code/docs/tests needed to satisfy the acceptance criteria.",
|
||||
"3. Run focused verification and typecheck for touched areas.",
|
||||
"4. Commit the fix with a conventional commit message.",
|
||||
"5. Mark the repaired entries resolved in `.sf/self-feedback.jsonl` with agent-fix evidence and the commit SHA.",
|
||||
"6. If an entry is already fixed, mark it resolved with agent-fix evidence and explain the verification.",
|
||||
"5. Call `sf_self_feedback_resolve` for each repaired entry with agent-fix evidence and the commit SHA.",
|
||||
"6. If an entry is already fixed, verify it and call `sf_self_feedback_resolve` with the verification evidence.",
|
||||
"7. Do not hand-edit `.sf/self-feedback.jsonl`; use the resolver tool so markdown, JSONL, and reload detection stay consistent.",
|
||||
"",
|
||||
"When done, say: Self-feedback inline fix complete.",
|
||||
].join("\n");
|
||||
|
|
@ -195,17 +220,25 @@ export function dispatchSelfFeedbackInlineFixIfNeeded(
|
|||
writeClaim(basePath, ids);
|
||||
const prompt = buildInlineFixPrompt(candidates);
|
||||
ctx.ui.notify(
|
||||
`Dispatching self-feedback inline fix for ${ids.length} high/critical entr${ids.length === 1 ? "y" : "ies"}.`,
|
||||
`Queueing self-feedback inline fix for ${ids.length} high/critical entr${ids.length === 1 ? "y" : "ies"}.`,
|
||||
"warning",
|
||||
);
|
||||
pi.sendMessage(
|
||||
const dispatch = pi.sendMessage(
|
||||
{
|
||||
customType: "sf-self-feedback-inline-fix",
|
||||
content: prompt,
|
||||
display: false,
|
||||
},
|
||||
{ triggerTurn: true },
|
||||
{ triggerTurn: true, deliverAs: "followUp" },
|
||||
);
|
||||
void Promise.resolve(dispatch).catch((error) => {
|
||||
const message = getErrorMessage(error);
|
||||
writeFailedClaim(basePath, ids, message);
|
||||
ctx.ui.notify(
|
||||
`Self-feedback inline fix dispatch failed; will retry at the next idle point: ${message}`,
|
||||
"warning",
|
||||
);
|
||||
});
|
||||
return candidates.length;
|
||||
}
|
||||
|
||||
|
|
|
|||
171
src/resources/extensions/sf/tests/commands-harness.test.ts
Normal file
171
src/resources/extensions/sf/tests/commands-harness.test.ts
Normal file
|
|
@ -0,0 +1,171 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { execFileSync } from "node:child_process";
|
||||
import {
|
||||
appendFileSync,
|
||||
existsSync,
|
||||
mkdirSync,
|
||||
mkdtempSync,
|
||||
readFileSync,
|
||||
realpathSync,
|
||||
rmSync,
|
||||
symlinkSync,
|
||||
writeFileSync,
|
||||
} from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, test } from "vitest";
|
||||
|
||||
import { handleHarness } from "../commands-harness.ts";
|
||||
import { profileRepository } from "../repo-profiler.ts";
|
||||
import {
|
||||
closeDatabase,
|
||||
getRepoFileObservations,
|
||||
openDatabase,
|
||||
recordRepoProfile,
|
||||
} from "../sf-db.ts";
|
||||
|
||||
const originalCwd = process.cwd();
|
||||
const originalProjectRoot = process.env.SF_PROJECT_ROOT;
|
||||
let roots: string[] = [];
|
||||
|
||||
afterEach(() => {
|
||||
process.chdir(originalCwd);
|
||||
closeDatabase();
|
||||
for (const root of roots) rmSync(root, { recursive: true, force: true });
|
||||
roots = [];
|
||||
if (originalProjectRoot === undefined) delete process.env.SF_PROJECT_ROOT;
|
||||
else process.env.SF_PROJECT_ROOT = originalProjectRoot;
|
||||
});
|
||||
|
||||
function runGit(args: string[], cwd: string): string {
|
||||
return execFileSync("git", args, {
|
||||
cwd,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
encoding: "utf-8",
|
||||
}).trim();
|
||||
}
|
||||
|
||||
function makeRepo(prefix: string): string {
|
||||
const repo = realpathSync(mkdtempSync(join(tmpdir(), prefix)));
|
||||
roots.push(repo);
|
||||
runGit(["init", "-b", "main"], repo);
|
||||
runGit(["config", "user.email", "test@example.com"], repo);
|
||||
runGit(["config", "user.name", "SF Test"], repo);
|
||||
writeFileSync(join(repo, "README.md"), "# Repo\n", "utf8");
|
||||
writeFileSync(
|
||||
join(repo, "package.json"),
|
||||
'{"scripts":{"test":"node --test"}}\n',
|
||||
"utf8",
|
||||
);
|
||||
runGit(["add", "README.md", "package.json"], repo);
|
||||
runGit(["commit", "-m", "init"], repo);
|
||||
return repo;
|
||||
}
|
||||
|
||||
function makeExternalSfState(repo: string): string {
|
||||
const externalState = realpathSync(mkdtempSync(join(tmpdir(), "sf-state-")));
|
||||
roots.push(externalState);
|
||||
symlinkSync(externalState, join(repo, ".sf"), "junction");
|
||||
appendFileSync(join(repo, ".git", "info", "exclude"), "\n.sf\n", "utf8");
|
||||
return externalState;
|
||||
}
|
||||
|
||||
function makeMockCtx(): {
|
||||
notifications: Array<{ message: string; level?: string }>;
|
||||
ui: { notify(message: string, level?: string): void };
|
||||
} {
|
||||
const notifications: Array<{ message: string; level?: string }> = [];
|
||||
return {
|
||||
notifications,
|
||||
ui: {
|
||||
notify(message: string, level?: string) {
|
||||
notifications.push({ message, level });
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
test("harnessPromote_when_sf_is_external_symlink_writes_tracked_docs_not_runtime_target", async () => {
|
||||
const repo = makeRepo("sf-harness-promote-");
|
||||
const externalState = makeExternalSfState(repo);
|
||||
mkdirSync(join(repo, "notes"), { recursive: true });
|
||||
writeFileSync(join(repo, "notes", "local-finding.md"), "# Finding\n", "utf8");
|
||||
|
||||
closeDatabase();
|
||||
assert.equal(openDatabase(join(repo, ".sf", "sf.db")), true);
|
||||
recordRepoProfile(
|
||||
profileRepository(repo, {
|
||||
now: () => "2026-05-02T10:00:00.000Z",
|
||||
}),
|
||||
);
|
||||
closeDatabase();
|
||||
|
||||
delete process.env.SF_PROJECT_ROOT;
|
||||
process.chdir(repo);
|
||||
const ctx = makeMockCtx();
|
||||
|
||||
await handleHarness("promote sf-moocr4rv-au7r3l", ctx as any);
|
||||
|
||||
const relativeArtifact =
|
||||
"docs/exec-plans/active/harness-promotion-sf-moocr4rv-au7r3l.md";
|
||||
const artifact = join(repo, relativeArtifact);
|
||||
assert.ok(existsSync(artifact), "promotion writes a repo docs artifact");
|
||||
assert.ok(
|
||||
!existsSync(join(externalState, relativeArtifact)),
|
||||
"promotion must not write into the external .sf symlink target",
|
||||
);
|
||||
assert.equal(
|
||||
runGit(["status", "--short", "--", relativeArtifact], repo),
|
||||
`?? ${relativeArtifact}`,
|
||||
"promoted docs artifact is visible to git as repo output",
|
||||
);
|
||||
|
||||
const firstContent = readFileSync(artifact, "utf8");
|
||||
await handleHarness("promote sf-moocr4rv-au7r3l", ctx as any);
|
||||
assert.equal(
|
||||
readFileSync(artifact, "utf8"),
|
||||
firstContent,
|
||||
"promotion content is deterministic for the same recorded profile",
|
||||
);
|
||||
assert.doesNotMatch(firstContent, /Promoted at:/);
|
||||
assert.match(
|
||||
firstContent,
|
||||
/Unpromoted \.sf runtime observations remain `observed_only`/,
|
||||
);
|
||||
assert.match(firstContent, /"ownership": "observed_only"/);
|
||||
assert.match(
|
||||
firstContent,
|
||||
new RegExp(`Repo artifact: \`${relativeArtifact}\``),
|
||||
);
|
||||
assert.match(
|
||||
ctx.notifications.at(-1)?.message ?? "",
|
||||
/Unpromoted \.sf runtime state remains observed_only/,
|
||||
);
|
||||
});
|
||||
|
||||
test("harnessProfile_when_recording_runtime_state_reports_no_repo_artifact", async () => {
|
||||
const repo = makeRepo("sf-harness-profile-");
|
||||
makeExternalSfState(repo);
|
||||
mkdirSync(join(repo, "notes"), { recursive: true });
|
||||
writeFileSync(join(repo, "notes", "scratch.md"), "# Scratch\n", "utf8");
|
||||
|
||||
delete process.env.SF_PROJECT_ROOT;
|
||||
process.chdir(repo);
|
||||
const ctx = makeMockCtx();
|
||||
|
||||
await handleHarness("profile", ctx as any);
|
||||
|
||||
const observations = getRepoFileObservations();
|
||||
const scratch = observations.find((obs) => obs.path === "notes/scratch.md");
|
||||
assert.equal(scratch?.ownership, "observed_only");
|
||||
assert.ok(
|
||||
!existsSync(join(repo, "docs", "exec-plans", "active")),
|
||||
"profile does not create repo-committable docs output",
|
||||
);
|
||||
|
||||
const notice = ctx.notifications[0]?.message ?? "";
|
||||
assert.match(notice, /Runtime observation boundary:/);
|
||||
assert.match(notice, /No repo-committable artifact was written/);
|
||||
assert.match(notice, /\/sf harness promote <finding-id>/);
|
||||
assert.doesNotMatch(notice, /tracked documentation artifact created/);
|
||||
});
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { test } from "vitest";
|
||||
|
||||
import { classifyError } from "../error-classifier.ts";
|
||||
|
||||
test("quota_reset_after_seconds_is_rate_limit_with_retry_delay", () => {
|
||||
const result = classifyError(
|
||||
"You have exhausted your capacity on this model. Your quota will reset after 33s.",
|
||||
);
|
||||
|
||||
assert.equal(result.kind, "rate-limit");
|
||||
if (result.kind === "rate-limit") {
|
||||
assert.equal(result.retryAfterMs, 33_000);
|
||||
}
|
||||
});
|
||||
255
src/resources/extensions/sf/tests/flow-audit.test.ts
Normal file
255
src/resources/extensions/sf/tests/flow-audit.test.ts
Normal file
|
|
@ -0,0 +1,255 @@
|
|||
import assert from "node:assert/strict";
|
||||
import {
|
||||
mkdirSync,
|
||||
mkdtempSync,
|
||||
readFileSync,
|
||||
rmSync,
|
||||
writeFileSync,
|
||||
} from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, describe, test } from "vitest";
|
||||
import { runFlowAudit } from "../doctor.ts";
|
||||
import { readAllSelfFeedback, recordSelfFeedback } from "../self-feedback.ts";
|
||||
|
||||
const roots: string[] = [];
|
||||
|
||||
afterEach(() => {
|
||||
for (const root of roots) rmSync(root, { recursive: true, force: true });
|
||||
roots.length = 0;
|
||||
});
|
||||
|
||||
function makeForgeProject(): string {
|
||||
const root = mkdtempSync(join(tmpdir(), "sf-flow-audit-"));
|
||||
roots.push(root);
|
||||
mkdirSync(join(root, ".sf"), { recursive: true });
|
||||
writeFileSync(
|
||||
join(root, "package.json"),
|
||||
JSON.stringify({ name: "singularity-forge", version: "0.0.1" }),
|
||||
"utf-8",
|
||||
);
|
||||
return root;
|
||||
}
|
||||
|
||||
function writeM007LoopFixture(root: string, nowMs: number): void {
|
||||
const sf = join(root, ".sf");
|
||||
const unitId = "M007/S01/T10";
|
||||
const startedAt = nowMs - 45 * 60 * 1000;
|
||||
const lastProgressAt = nowMs - 31 * 60 * 1000;
|
||||
const sliceDir = join(sf, "milestones", "M007", "slices", "S01");
|
||||
const tasksDir = join(sliceDir, "tasks");
|
||||
const unitsDir = join(sf, "runtime", "units");
|
||||
mkdirSync(tasksDir, { recursive: true });
|
||||
mkdirSync(unitsDir, { recursive: true });
|
||||
|
||||
writeFileSync(
|
||||
join(sf, "auto.lock"),
|
||||
JSON.stringify(
|
||||
{
|
||||
pid: 5000,
|
||||
unitType: "execute-task",
|
||||
unitId,
|
||||
phase: "dispatched",
|
||||
startedAt: new Date(startedAt).toISOString(),
|
||||
sessionId: "sess-m007",
|
||||
sessionFile: "/tmp/sessions/m007.jsonl",
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
"utf-8",
|
||||
);
|
||||
writeFileSync(
|
||||
join(unitsDir, "execute-task-M007-S01-T10.json"),
|
||||
JSON.stringify(
|
||||
{
|
||||
version: 1,
|
||||
unitType: "execute-task",
|
||||
unitId,
|
||||
startedAt,
|
||||
updatedAt: lastProgressAt,
|
||||
phase: "dispatched",
|
||||
wrapupWarningSent: false,
|
||||
continueHereFired: false,
|
||||
timeoutAt: null,
|
||||
lastProgressAt,
|
||||
progressCount: 0,
|
||||
lastProgressKind: "dispatch",
|
||||
runawayGuardPause: {
|
||||
reason: "Runaway guard paused execute-task M007/S01/T09",
|
||||
pausedAt: lastProgressAt - 60_000,
|
||||
unitType: "execute-task",
|
||||
unitId: "M007/S01/T09",
|
||||
diagnosticTurns: 2,
|
||||
warningsSent: 2,
|
||||
thresholdReasons: ["budget kept growing"],
|
||||
metrics: {
|
||||
toolCalls: 90,
|
||||
sessionTokens: 1_200_000,
|
||||
elapsedMs: 2_000_000,
|
||||
changedFiles: 0,
|
||||
worktreeChangedSinceStart: false,
|
||||
topTools: { read: 80, bash: 10 },
|
||||
},
|
||||
thresholds: {
|
||||
toolCallWarning: 60,
|
||||
tokenWarning: 1_000_000,
|
||||
elapsedMs: 1_200_000,
|
||||
changedFilesWarning: 75,
|
||||
minIntervalMs: 120_000,
|
||||
},
|
||||
},
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
const taskLines: string[] = [];
|
||||
for (let i = 1; i <= 10; i++) {
|
||||
const id = `T${String(i).padStart(2, "0")}`;
|
||||
taskLines.push(
|
||||
`- [${i < 10 ? "x" : " "}] **${id}: Task ${i}** \`est:10m\``,
|
||||
);
|
||||
if (i < 10) {
|
||||
writeFileSync(
|
||||
join(tasksDir, `${id}-SUMMARY.md`),
|
||||
`# ${id} summary\n\nDone.\n`,
|
||||
"utf-8",
|
||||
);
|
||||
}
|
||||
}
|
||||
writeFileSync(
|
||||
join(sliceDir, "S01-PLAN.md"),
|
||||
`# S01: Loop Evidence\n\n## Tasks\n\n${taskLines.join("\n")}\n`,
|
||||
"utf-8",
|
||||
);
|
||||
writeFileSync(
|
||||
join(sf, "notifications.jsonl"),
|
||||
JSON.stringify({
|
||||
severity: "error",
|
||||
message: "session creation failed before final summary",
|
||||
}) + "\n",
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
for (const task of ["T08", "T09", "T10"]) {
|
||||
recordSelfFeedback(
|
||||
{
|
||||
kind: "runaway-guard-hard-pause",
|
||||
severity: "medium",
|
||||
summary: `Runaway guard paused execute-task M007/S01/${task}`,
|
||||
evidence: `${task} had no final closure`,
|
||||
source: "detector",
|
||||
occurredIn: {
|
||||
milestone: "M007",
|
||||
slice: "S01",
|
||||
task,
|
||||
unitType: "execute-task",
|
||||
},
|
||||
},
|
||||
root,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
describe("flow audit", () => {
|
||||
test("audit_when_m007_loop_evidence_exists_reports_actionable_stale_flow", async () => {
|
||||
const root = makeForgeProject();
|
||||
const nowMs = Date.UTC(2026, 4, 2, 13, 45, 0);
|
||||
writeM007LoopFixture(root, nowMs);
|
||||
|
||||
const result = await runFlowAudit(root, {
|
||||
nowMs,
|
||||
psOutput:
|
||||
"5000 1 2700 node dist/loader.js sf headless auto\n" +
|
||||
"5100 5000 2400 sift search --json --strategy page-index-hybrid warmup\n" +
|
||||
"5200 5000 120 node dist/loader.js sf tool-session\n",
|
||||
staleProgressMs: 20 * 60 * 1000,
|
||||
optionalChildBudgetMs: 30 * 60 * 1000,
|
||||
});
|
||||
|
||||
assert.equal(result.ok, false);
|
||||
assert.equal(result.activeMilestone?.id, "M007");
|
||||
assert.equal(result.activeUnit?.unitId, "M007/S01/T10");
|
||||
assert.equal(result.activeUnit?.progressAgeMs, 31 * 60 * 1000);
|
||||
assert.equal(result.sessionPointer?.sessionId, "sess-m007");
|
||||
assert.equal(
|
||||
result.sessionPointer?.sessionFile,
|
||||
"/tmp/sessions/m007.jsonl",
|
||||
);
|
||||
assert.equal(result.staleDispatchedUnits.length, 1);
|
||||
assert.match(result.warnings.join("\n"), /no progress for 31 minutes/);
|
||||
assert.deepEqual(result.loopEvidence?.completedPriorTasks.slice(-2), [
|
||||
"T08",
|
||||
"T09",
|
||||
]);
|
||||
assert.match(result.loopEvidence?.missingSummaries.join("\n") ?? "", /T10/);
|
||||
assert.match(result.lastErrors.join("\n"), /session creation failed/);
|
||||
assert.match(result.runawayHistory.join("\n"), /M007\/S01\/T09/);
|
||||
assert.match(result.recommendedAction, /Inspect session/);
|
||||
|
||||
const warmup = result.childProcesses.find((p) => p.pid === 5100);
|
||||
assert.ok(warmup, "warmup child should be reported");
|
||||
assert.equal(warmup.classification, "warmup");
|
||||
assert.equal(warmup.nonBlocking, true);
|
||||
assert.equal(warmup.overBudget, true);
|
||||
assert.equal(warmup.action, "non-blocking");
|
||||
|
||||
const active = result.childProcesses.find((p) => p.pid === 5200);
|
||||
assert.ok(active, "active tool child should be reported");
|
||||
assert.equal(active.classification, "active-session");
|
||||
assert.equal(active.nonBlocking, false);
|
||||
|
||||
const entries = readAllSelfFeedback(root);
|
||||
const rollups = entries.filter(
|
||||
(e) =>
|
||||
e.kind === "flow-audit:repeated-milestone-failure" && !e.resolvedAt,
|
||||
);
|
||||
assert.equal(rollups.length, 1);
|
||||
assert.equal(rollups[0]?.severity, "high");
|
||||
assert.match(rollups[0]?.summary ?? "", /M007/);
|
||||
assert.match(rollups[0]?.acceptanceCriteria ?? "", /stale dispatched unit/);
|
||||
|
||||
await runFlowAudit(root, { nowMs, psOutput: "" });
|
||||
assert.equal(
|
||||
readAllSelfFeedback(root).filter(
|
||||
(e) => e.kind === "flow-audit:repeated-milestone-failure",
|
||||
).length,
|
||||
1,
|
||||
"same milestone rollup stays single while open",
|
||||
);
|
||||
});
|
||||
|
||||
test("audit_when_optional_child_is_over_budget_can_kill_it_explicitly", async () => {
|
||||
const root = makeForgeProject();
|
||||
const killed: number[] = [];
|
||||
const result = await runFlowAudit(root, {
|
||||
nowMs: Date.UTC(2026, 4, 2, 13, 45, 0),
|
||||
psOutput:
|
||||
"5100 5000 2400 sift search --json --strategy page-index-hybrid warmup\n",
|
||||
optionalChildBudgetMs: 60_000,
|
||||
killOverBudgetChildren: true,
|
||||
killProcess: (pid) => {
|
||||
killed.push(pid);
|
||||
},
|
||||
});
|
||||
|
||||
assert.deepEqual(killed, [5100]);
|
||||
assert.equal(result.childProcesses[0]?.classification, "warmup");
|
||||
assert.equal(result.childProcesses[0]?.action, "kill");
|
||||
assert.equal(result.childProcesses[0]?.killed, true);
|
||||
});
|
||||
|
||||
test("session_start_when_registered_runs_flow_auditor", () => {
|
||||
const source = readFileSync(
|
||||
join(import.meta.dirname, "..", "bootstrap", "register-hooks.ts"),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
assert.match(source, /pi\.on\("session_start"/);
|
||||
assert.match(source, /runFlowAudit/);
|
||||
assert.match(source, /Flow audit:/);
|
||||
});
|
||||
});
|
||||
155
src/resources/extensions/sf/tests/model-route-failure.test.ts
Normal file
155
src/resources/extensions/sf/tests/model-route-failure.test.ts
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { describe, test } from "vitest";
|
||||
|
||||
import {
|
||||
modelRouteKey,
|
||||
resolveNextAvailableModelRoute,
|
||||
resolveNextConfiguredModelRoute,
|
||||
resolveNextModelRoute,
|
||||
} from "../model-route-failure.ts";
|
||||
|
||||
const models = [
|
||||
{ provider: "google-gemini-cli", id: "gemini-3-flash-preview" },
|
||||
{ provider: "google", id: "gemini-3-flash-preview" },
|
||||
{ provider: "anthropic", id: "claude-sonnet-4-6" },
|
||||
{ provider: "zai", id: "glm-5.1" },
|
||||
] as any[];
|
||||
|
||||
describe("configured model route failure recovery", () => {
|
||||
test("quota_when_current_route_fails_returns_next_configured_fallback", () => {
|
||||
const next = resolveNextConfiguredModelRoute({
|
||||
current: {
|
||||
provider: "google-gemini-cli",
|
||||
id: "gemini-3-flash-preview",
|
||||
},
|
||||
modelConfig: {
|
||||
primary: "google-gemini-cli/gemini-3-flash-preview",
|
||||
fallbacks: ["anthropic/claude-sonnet-4-6", "zai/glm-5.1"],
|
||||
},
|
||||
availableModels: models,
|
||||
failedRoutes: [
|
||||
{
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
provider: "google-gemini-cli",
|
||||
modelId: "gemini-3-flash-preview",
|
||||
reason: "quota reset after 33s",
|
||||
timestamp: 1,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
assert.equal(next?.model.provider, "anthropic");
|
||||
assert.equal(next?.model.id, "claude-sonnet-4-6");
|
||||
});
|
||||
|
||||
test("current_model_not_in_config_starts_at_configured_primary", () => {
|
||||
const next = resolveNextConfiguredModelRoute({
|
||||
current: { provider: "google-gemini-cli", id: "gemini-3-flash-preview" },
|
||||
modelConfig: {
|
||||
primary: "anthropic/claude-sonnet-4-6",
|
||||
fallbacks: ["zai/glm-5.1"],
|
||||
},
|
||||
availableModels: models,
|
||||
failedRoutes: [],
|
||||
});
|
||||
|
||||
assert.equal(next?.model.provider, "anthropic");
|
||||
assert.equal(next?.model.id, "claude-sonnet-4-6");
|
||||
});
|
||||
|
||||
test("exhausted_chain_returns_undefined", () => {
|
||||
const next = resolveNextConfiguredModelRoute({
|
||||
current: { provider: "zai", id: "glm-5.1" },
|
||||
modelConfig: {
|
||||
primary: "anthropic/claude-sonnet-4-6",
|
||||
fallbacks: ["zai/glm-5.1"],
|
||||
},
|
||||
availableModels: models,
|
||||
failedRoutes: [
|
||||
{
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
provider: "zai",
|
||||
modelId: "glm-5.1",
|
||||
reason: "server overloaded",
|
||||
timestamp: 1,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
assert.equal(next, undefined);
|
||||
});
|
||||
|
||||
test("exhausted_configured_chain_uses_available_route_before_pause", () => {
|
||||
const next = resolveNextModelRoute({
|
||||
current: { provider: "zai", id: "glm-5.1" },
|
||||
modelConfig: {
|
||||
primary: "anthropic/claude-sonnet-4-6",
|
||||
fallbacks: ["zai/glm-5.1"],
|
||||
},
|
||||
availableModels: models,
|
||||
failedRoutes: [
|
||||
{
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
provider: "zai",
|
||||
modelId: "glm-5.1",
|
||||
reason: "server overloaded",
|
||||
timestamp: 1,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
assert.equal(next?.source, "available");
|
||||
assert.equal(next?.model.provider, "google-gemini-cli");
|
||||
assert.equal(next?.model.id, "gemini-3-flash-preview");
|
||||
});
|
||||
|
||||
test("missing_config_uses_available_route_and_prefers_different_provider", () => {
|
||||
const next = resolveNextAvailableModelRoute({
|
||||
current: { provider: "google-gemini-cli", id: "gemini-3-flash-preview" },
|
||||
availableModels: models,
|
||||
failedRoutes: [
|
||||
{
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
provider: "google-gemini-cli",
|
||||
modelId: "gemini-3-flash-preview",
|
||||
reason: "quota",
|
||||
timestamp: 1,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
assert.equal(next?.source, "available");
|
||||
assert.notEqual(next?.model.provider, "google-gemini-cli");
|
||||
assert.notEqual(
|
||||
modelRouteKey(next!.model),
|
||||
"google-gemini-cli/gemini-3-flash-preview",
|
||||
);
|
||||
});
|
||||
|
||||
test("provider_model_identity_skips_only_the_failed_route", () => {
|
||||
const next = resolveNextConfiguredModelRoute({
|
||||
current: { provider: "google-gemini-cli", id: "gemini-3-flash-preview" },
|
||||
modelConfig: {
|
||||
primary: "google-gemini-cli/gemini-3-flash-preview",
|
||||
fallbacks: ["google/gemini-3-flash-preview"],
|
||||
},
|
||||
availableModels: models,
|
||||
failedRoutes: [
|
||||
{
|
||||
unitType: "execute-task",
|
||||
unitId: "M001/S01/T01",
|
||||
provider: "google-gemini-cli",
|
||||
modelId: "gemini-3-flash-preview",
|
||||
reason: "quota",
|
||||
timestamp: 1,
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
assert.equal(modelRouteKey(next!.model), "google/gemini-3-flash-preview");
|
||||
});
|
||||
});
|
||||
|
|
@ -8,8 +8,8 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { dirname, join } from "node:path";
|
||||
import { test } from 'vitest';
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { test } from "vitest";
|
||||
import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.ts";
|
||||
import {
|
||||
classifyError,
|
||||
|
|
@ -388,22 +388,18 @@ test("resumeAutoAfterProviderDelay restarts paused auto-mode from the recorded b
|
|||
ui: { notify() {} },
|
||||
newSession: async () => ({ cancelled: false }),
|
||||
} as any;
|
||||
const result = await resumeAutoAfterProviderDelay(
|
||||
{} as any,
|
||||
commandCtx,
|
||||
{
|
||||
getSnapshot: () => ({
|
||||
active: false,
|
||||
paused: true,
|
||||
stepMode: true,
|
||||
basePath: "/tmp/project",
|
||||
}),
|
||||
resetTransientRetryState: () => {},
|
||||
startAuto: async (_ctx, _pi, base, verboseMode, options) => {
|
||||
startCalls.push({ base, verboseMode, step: options?.step });
|
||||
},
|
||||
const result = await resumeAutoAfterProviderDelay({} as any, commandCtx, {
|
||||
getSnapshot: () => ({
|
||||
active: false,
|
||||
paused: true,
|
||||
stepMode: true,
|
||||
basePath: "/tmp/project",
|
||||
}),
|
||||
resetTransientRetryState: () => {},
|
||||
startAuto: async (_ctx, _pi, base, verboseMode, options) => {
|
||||
startCalls.push({ base, verboseMode, step: options?.step });
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
assert.equal(result, "resumed");
|
||||
assert.deepEqual(startCalls, [
|
||||
|
|
@ -545,21 +541,21 @@ test("resumeAutoAfterProviderDelay leaves paused when no command context is avai
|
|||
]);
|
||||
});
|
||||
|
||||
// ── Escalating backoff for transient errors (#1166) ─────────────────────────
|
||||
// ── Configured model-route recovery for provider failures ───────────────────
|
||||
|
||||
test("agent-end-recovery.ts tracks consecutive transient errors for escalating backoff", () => {
|
||||
test("agent-end-recovery.ts records failed provider routes for configured fallback", () => {
|
||||
const src = readFileSync(
|
||||
join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
assert.ok(
|
||||
src.includes("consecutiveTransientCount"),
|
||||
"agent-end-recovery.ts must track consecutiveTransientCount for escalating backoff (#1166)",
|
||||
src.includes("recordCurrentModelFailure"),
|
||||
"agent-end-recovery.ts must record failed provider/model routes before resolving fallbacks",
|
||||
);
|
||||
assert.ok(
|
||||
src.includes("MAX_TRANSIENT_AUTO_RESUMES"),
|
||||
"agent-end-recovery.ts must define MAX_TRANSIENT_AUTO_RESUMES to cap infinite retries (#1166)",
|
||||
src.includes("getCurrentUnitModelFailures"),
|
||||
"agent-end-recovery.ts must skip routes already failed for the current unit",
|
||||
);
|
||||
});
|
||||
|
||||
|
|
@ -576,34 +572,35 @@ test("agent-end-recovery.ts resets retry state before resolveAgentEnd on success
|
|||
);
|
||||
});
|
||||
|
||||
test("agent-end-recovery.ts applies escalating delay for repeated transient errors", () => {
|
||||
test("agent-end-recovery.ts does not sleep or same-route retry model-route failures", () => {
|
||||
const src = readFileSync(
|
||||
join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
// Must contain the exponential backoff formula (may span multiple lines)
|
||||
assert.ok(
|
||||
src.includes("2 ** Math.max(0, retryState.consecutiveTransientCount"),
|
||||
"agent-end-recovery.ts must escalate retryAfterMs exponentially for consecutive transient errors (#1166)",
|
||||
!src.includes("pauseTransientWithBackoff"),
|
||||
"model-route failures must not enter same-model transient backoff",
|
||||
);
|
||||
assert.ok(
|
||||
!src.includes("resumeAutoAfterProviderDelay"),
|
||||
"model-route failures must not schedule same-model auto-resume",
|
||||
);
|
||||
});
|
||||
|
||||
test("agent-end-recovery.ts resumes transient provider pauses through startAuto instead of a hidden prompt", () => {
|
||||
test("agent-end-recovery.ts sends hidden continue after any successful fallback switch", () => {
|
||||
const src = readFileSync(
|
||||
join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
assert.ok(
|
||||
src.includes("resumeAutoAfterProviderDelay"),
|
||||
"agent-end-recovery.ts must resume paused auto-mode through resumeAutoAfterProviderDelay (#2813)",
|
||||
src.includes('customType: "sf-auto-timeout-recovery"'),
|
||||
"successful fallback switches should continue the active unit with a hidden message",
|
||||
);
|
||||
assert.ok(
|
||||
!src.includes(
|
||||
"Continue execution — provider error recovery delay elapsed.",
|
||||
),
|
||||
"transient provider resume must not rely on a hidden continue prompt (#2813)",
|
||||
src.includes("configured fallback") && src.includes("available fallback"),
|
||||
"hidden continue must be tied to a successful model switch, whether configured or available",
|
||||
);
|
||||
});
|
||||
|
||||
|
|
@ -613,8 +610,9 @@ test("agent-end-recovery.ts does not defer rate-limit errors to core retry handl
|
|||
"utf-8",
|
||||
);
|
||||
assert.ok(
|
||||
src.includes('if (isTransient(cls) && cls.kind !== "rate-limit")'),
|
||||
"rate-limit errors must bypass transient core-retry deferral so fallback can execute (#4373)",
|
||||
src.includes("isModelRouteFailure(cls)") &&
|
||||
src.includes('cls.kind === "rate-limit"'),
|
||||
"rate-limit errors must enter model-route recovery before pausing (#4373)",
|
||||
);
|
||||
});
|
||||
|
||||
|
|
@ -624,8 +622,8 @@ test("agent-end-recovery.ts updates dashboard dispatched model after fallback sw
|
|||
"utf-8",
|
||||
);
|
||||
assert.ok(
|
||||
src.includes("setCurrentDispatchedModelId"),
|
||||
"agent-end-recovery.ts should update currentDispatchedModelId when recovery switches model",
|
||||
src.includes("setCurrentUnitModel"),
|
||||
"agent-end-recovery.ts should update current unit/dashboard model state when recovery switches model",
|
||||
);
|
||||
});
|
||||
|
||||
|
|
@ -704,19 +702,17 @@ test("phases.ts handles timeout session-creation failures with pause instead of
|
|||
);
|
||||
});
|
||||
|
||||
// ── Fix 3: MAX_TRANSIENT_AUTO_RESUMES raised to 8 ───────────────────────────
|
||||
// ── Fix 3: same-route transient retry cap removed for route failures ────────
|
||||
|
||||
test("MAX_TRANSIENT_AUTO_RESUMES is at least 8 for sustained overload resilience", () => {
|
||||
test("agent-end-recovery.ts does not keep a same-route transient resume cap", () => {
|
||||
const src = readFileSync(
|
||||
join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"),
|
||||
"utf-8",
|
||||
);
|
||||
const match = src.match(/MAX_TRANSIENT_AUTO_RESUMES\s*=\s*(\d+)/);
|
||||
assert.ok(match, "MAX_TRANSIENT_AUTO_RESUMES must be defined");
|
||||
const value = Number(match![1]);
|
||||
|
||||
assert.ok(
|
||||
value >= 8,
|
||||
`MAX_TRANSIENT_AUTO_RESUMES must be >= 8 for sustained overload resilience, got ${value}`,
|
||||
!src.includes("MAX_TRANSIENT_AUTO_RESUMES"),
|
||||
"provider route failures should switch explicit routes or pause, not count same-route resumes",
|
||||
);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,16 @@
|
|||
/**
|
||||
* rate-limit-model-fallback.test.ts — Regression test for #2770.
|
||||
*
|
||||
* Rate-limit errors enter the model fallback path before falling through
|
||||
* to pause. This verifies the structural contract in agent-end-recovery.ts.
|
||||
* Rate-limit errors enter model-route fallback before pausing.
|
||||
* Recovery must switch to configured fallbacks first, then any other available
|
||||
* route before pausing.
|
||||
*/
|
||||
|
||||
import assert from "node:assert/strict";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { dirname, join } from "node:path";
|
||||
import { test } from 'vitest';
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { test } from "vitest";
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const RECOVERY_PATH = join(
|
||||
|
|
@ -28,16 +29,10 @@ function getRecoverySource(): string {
|
|||
test("rate-limit errors enter the model fallback branch alongside other transient errors", () => {
|
||||
const src = getRecoverySource();
|
||||
|
||||
// The condition that gates model fallback must include rate-limit.
|
||||
// Match the if-condition that contains both "rate-limit" and fallback-related kinds.
|
||||
const fallbackConditionRe =
|
||||
/if\s*\([^)]*cls\.kind\s*===\s*"rate-limit"[^)]*cls\.kind\s*===\s*"network"/;
|
||||
const fallbackConditionReAlt =
|
||||
/if\s*\([^)]*cls\.kind\s*===\s*"network"[^)]*cls\.kind\s*===\s*"rate-limit"/;
|
||||
|
||||
assert.ok(
|
||||
fallbackConditionRe.test(src) || fallbackConditionReAlt.test(src),
|
||||
"rate-limit must appear in the same if-condition as network/server for model fallback (#2770)",
|
||||
src.includes('cls.kind === "rate-limit"') &&
|
||||
src.includes("isModelRouteFailure(cls)"),
|
||||
"rate-limit must enter the configured model-route failure path (#2770)",
|
||||
);
|
||||
});
|
||||
|
||||
|
|
@ -54,23 +49,50 @@ test("rate-limit errors are NOT short-circuited to pause before model fallback",
|
|||
);
|
||||
});
|
||||
|
||||
test("rate-limit errors fall through to pause if no fallback model is available", () => {
|
||||
test("model fallback uses configured routes first then automatic available routes", () => {
|
||||
const src = getRecoverySource();
|
||||
|
||||
// After the fallback block, the transient fallback pause must still fire for rate-limit.
|
||||
// The isTransient check covers rate-limit (verified by error-classifier tests).
|
||||
// Verify pauseTransientWithBackoff is called with isRateLimit derived from cls.kind.
|
||||
assert.ok(
|
||||
src.includes('cls.kind === "rate-limit"'),
|
||||
'agent-end-recovery.ts must reference cls.kind === "rate-limit" for fallback and pause paths (#2770)',
|
||||
src.includes("resolveNextModelRoute"),
|
||||
"agent-end-recovery.ts must route through the configured-or-available route helper",
|
||||
);
|
||||
|
||||
// The transient fallback pause must pass the isRateLimit flag correctly.
|
||||
const pauseCallRe =
|
||||
/pauseTransientWithBackoff\([^)]*cls\.kind\s*===\s*"rate-limit"/;
|
||||
assert.ok(
|
||||
pauseCallRe.test(src),
|
||||
'pauseTransientWithBackoff must receive isRateLimit based on cls.kind === "rate-limit" (#2770)',
|
||||
src.includes("autoBenchmark: true"),
|
||||
"runtime recovery must allow benchmark-provided fallbacks when preferences do not pin the full chain",
|
||||
);
|
||||
assert.ok(
|
||||
!src.includes("getAutoModeStartModel"),
|
||||
"runtime recovery must not restore a session/system model as an inferred fallback",
|
||||
);
|
||||
});
|
||||
|
||||
test("rate-limit errors pause only when no configured_or_available fallback remains", () => {
|
||||
const src = getRecoverySource();
|
||||
|
||||
assert.ok(
|
||||
src.includes("available fallback"),
|
||||
"exhausted configured fallback chain should try another available model before pausing",
|
||||
);
|
||||
assert.ok(
|
||||
src.includes("no usable fallback model remains"),
|
||||
"only complete fallback exhaustion should pause with a clear provider error",
|
||||
);
|
||||
assert.ok(
|
||||
/isTransient:\s*false/.test(src),
|
||||
"complete provider route exhaustion must not same-route auto-resume",
|
||||
);
|
||||
});
|
||||
|
||||
test("setModel failure advances to the next configured fallback", () => {
|
||||
const src = getRecoverySource();
|
||||
|
||||
assert.ok(
|
||||
src.includes('reason: "setModel failed during provider recovery"'),
|
||||
"failed fallback routes should be recorded so the next configured route can be tried",
|
||||
);
|
||||
assert.ok(
|
||||
/if\s*\(!ok\)\s*\{[\s\S]{0,300}continue;/.test(src),
|
||||
"setModel failure should continue walking the configured fallback chain",
|
||||
);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -4,14 +4,12 @@ import { mkdirSync, rmSync } from "node:fs";
|
|||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { test } from "vitest";
|
||||
import {
|
||||
getAutoSession,
|
||||
resetAutoSession,
|
||||
} from "../auto/session.js";
|
||||
import { getAutoSession } from "../auto/session.js";
|
||||
import {
|
||||
hasResearchTerminalTransition,
|
||||
markResearchTerminalTransition,
|
||||
} from "../auto.js";
|
||||
import { registerHooks } from "../bootstrap/register-hooks.ts";
|
||||
|
||||
function makeTmpBase(): string {
|
||||
const base = join(tmpdir(), `sf-research-terminal-${randomUUID()}`);
|
||||
|
|
@ -96,6 +94,69 @@ test("research terminal transition blocks planning tools", async () => {
|
|||
}
|
||||
});
|
||||
|
||||
test("post_summary_planning_tool_attempt_is_blocked_without_followup_turn", async () => {
|
||||
const session = getAutoSession();
|
||||
session.reset();
|
||||
session.active = true;
|
||||
session.currentUnit = {
|
||||
type: "research-slice",
|
||||
id: "M001/S01",
|
||||
startedAt: Date.now(),
|
||||
};
|
||||
|
||||
const sentMessages: unknown[] = [];
|
||||
const handlers = new Map<string, Array<(event: any, ctx?: any) => any>>();
|
||||
const pi = {
|
||||
on(event: string, handler: (event: any, ctx?: any) => any) {
|
||||
const existing = handlers.get(event) ?? [];
|
||||
existing.push(handler);
|
||||
handlers.set(event, existing);
|
||||
},
|
||||
sendMessage(message: unknown) {
|
||||
sentMessages.push(message);
|
||||
},
|
||||
} as any;
|
||||
|
||||
registerHooks(pi);
|
||||
const toolResultHandlers = handlers.get("tool_result") ?? [];
|
||||
const toolCallHandlers = handlers.get("tool_call") ?? [];
|
||||
assert.ok(toolResultHandlers.length, "tool_result handler should register");
|
||||
assert.ok(toolCallHandlers.length, "tool_call handler should register");
|
||||
|
||||
for (const handler of toolResultHandlers) {
|
||||
await handler({
|
||||
toolName: "sf_summary_save",
|
||||
content: [{ type: "text", text: "Saved RESEARCH" }],
|
||||
details: {
|
||||
terminal_transition: true,
|
||||
unit_type: "research",
|
||||
},
|
||||
});
|
||||
}
|
||||
assert.equal(hasResearchTerminalTransition(), true);
|
||||
|
||||
const planningAttempt = {
|
||||
toolName: "sf_plan_milestone",
|
||||
input: {},
|
||||
};
|
||||
const results = [];
|
||||
for (const handler of toolCallHandlers) {
|
||||
results.push(await handler(planningAttempt));
|
||||
}
|
||||
|
||||
const block = results.find((result) => result?.block === true);
|
||||
assert.ok(block, "post-summary planning attempt should be blocked");
|
||||
assert.match(block.reason, /Post-artifact drift/);
|
||||
assert.match(block.reason, /sf_plan_milestone/);
|
||||
assert.equal(
|
||||
sentMessages.length,
|
||||
0,
|
||||
"blocking the tool call must not enqueue another agent turn",
|
||||
);
|
||||
|
||||
session.reset();
|
||||
});
|
||||
|
||||
test("research terminal transition does not block non-planning tools", () => {
|
||||
const session = getAutoSession();
|
||||
// Reset to known state
|
||||
|
|
@ -113,7 +174,7 @@ test("research terminal transition does not block non-planning tools", () => {
|
|||
|
||||
// Non-planning tools should not be blocked by the research terminal transition
|
||||
// (the actual blocking logic only checks planning tools)
|
||||
const nonPlanningTools = [
|
||||
const _nonPlanningTools = [
|
||||
"read",
|
||||
"write",
|
||||
"edit",
|
||||
|
|
|
|||
|
|
@ -1,6 +1,12 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
||||
import {
|
||||
mkdirSync,
|
||||
mkdtempSync,
|
||||
readFileSync,
|
||||
rmSync,
|
||||
writeFileSync,
|
||||
} from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, describe, it } from "vitest";
|
||||
|
|
@ -90,7 +96,7 @@ describe("self-feedback inline drain", () => {
|
|||
root,
|
||||
);
|
||||
|
||||
const messages: unknown[] = [];
|
||||
const messages: Array<{ message: unknown; options: unknown }> = [];
|
||||
const notifications: string[] = [];
|
||||
const ctx = {
|
||||
ui: {
|
||||
|
|
@ -100,18 +106,72 @@ describe("self-feedback inline drain", () => {
|
|||
},
|
||||
} as any;
|
||||
const pi = {
|
||||
sendMessage(message: unknown, options: unknown) {
|
||||
messages.push({ message, options });
|
||||
},
|
||||
} as any;
|
||||
|
||||
assert.equal(dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, pi), 1);
|
||||
assert.equal(dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, pi), 0);
|
||||
assert.equal(messages.length, 1);
|
||||
assert.equal(notifications.length, 2);
|
||||
assert.match(
|
||||
JSON.stringify(messages[0]?.message),
|
||||
/sf-self-feedback-inline-fix/,
|
||||
);
|
||||
assert.match(
|
||||
JSON.stringify(messages[0]?.message),
|
||||
/sf_self_feedback_resolve/,
|
||||
);
|
||||
assert.deepEqual(messages[0]?.options, {
|
||||
triggerTurn: true,
|
||||
deliverAs: "followUp",
|
||||
});
|
||||
assert.match(notifications[1], /already dispatched/);
|
||||
});
|
||||
|
||||
it("dispatch_failure_expires_claim_so_next_idle_turn_can_retry", async () => {
|
||||
const root = makeForgeProject();
|
||||
recordSelfFeedback(
|
||||
{
|
||||
kind: "startup-dispatch-race",
|
||||
severity: "critical",
|
||||
summary: "Startup dispatch can fail before the turn is accepted",
|
||||
source: "detector",
|
||||
},
|
||||
root,
|
||||
);
|
||||
|
||||
const notifications: string[] = [];
|
||||
const ctx = {
|
||||
ui: {
|
||||
notify(message: string) {
|
||||
notifications.push(message);
|
||||
},
|
||||
},
|
||||
} as any;
|
||||
const failingPi = {
|
||||
sendMessage() {
|
||||
return Promise.reject(new Error("agent busy"));
|
||||
},
|
||||
} as any;
|
||||
assert.equal(
|
||||
dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, failingPi),
|
||||
1,
|
||||
);
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
|
||||
const messages: unknown[] = [];
|
||||
const retryPi = {
|
||||
sendMessage(message: unknown) {
|
||||
messages.push(message);
|
||||
},
|
||||
} as any;
|
||||
|
||||
assert.equal(dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, pi), 1);
|
||||
assert.equal(dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, pi), 0);
|
||||
assert.equal(messages.length, 1);
|
||||
assert.equal(notifications.length, 2);
|
||||
assert.match(JSON.stringify(messages[0]), /sf-self-feedback-inline-fix/);
|
||||
assert.match(notifications[1], /already dispatched/);
|
||||
});
|
||||
assert.equal(dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, retryPi), 1);
|
||||
assert.equal(messages.length, 1);
|
||||
assert.match(notifications.join("\n"), /will retry at the next idle point/);
|
||||
});
|
||||
|
||||
it("consumes the claim after the inline-fix entries are resolved", () => {
|
||||
const root = makeForgeProject();
|
||||
|
|
@ -162,7 +222,11 @@ describe("self-feedback inline drain", () => {
|
|||
const ctx = { ui: { notify() {} } } as any;
|
||||
const pi = { sendMessage() {} } as any;
|
||||
assert.equal(dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, pi), 1);
|
||||
writeFileSync(join(root, "dirty.ts"), "export const dirty = true;\n", "utf-8");
|
||||
writeFileSync(
|
||||
join(root, "dirty.ts"),
|
||||
"export const dirty = true;\n",
|
||||
"utf-8",
|
||||
);
|
||||
assert.equal(
|
||||
markResolved(
|
||||
recorded.entry.id,
|
||||
|
|
@ -199,4 +263,20 @@ describe("self-feedback inline drain", () => {
|
|||
);
|
||||
assert.equal(selected[0]?.repoIdentity, "external");
|
||||
});
|
||||
|
||||
it("session_start_hook_queues_inline_fix_followup_not_only_warning", () => {
|
||||
const source = readFileSync(
|
||||
join(import.meta.dirname, "..", "bootstrap", "register-hooks.ts"),
|
||||
"utf-8",
|
||||
);
|
||||
const start = source.indexOf('pi.on("session_start"');
|
||||
const end = source.indexOf("return buildBeforeAgentStartResult", start);
|
||||
assert.notEqual(start, -1);
|
||||
assert.notEqual(end, -1);
|
||||
const sessionStartBlock = source.slice(start, end);
|
||||
|
||||
assert.match(sessionStartBlock, /dispatchSelfFeedbackInlineFixIfNeeded/);
|
||||
assert.match(sessionStartBlock, /even outside \/sf auto/);
|
||||
assert.doesNotMatch(sessionStartBlock, /no auto-dispatch/);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -0,0 +1,106 @@
|
|||
import assert from "node:assert/strict";
|
||||
import {
|
||||
mkdirSync,
|
||||
mkdtempSync,
|
||||
readFileSync,
|
||||
rmSync,
|
||||
writeFileSync,
|
||||
} from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, describe, test } from "vitest";
|
||||
import { registerDbTools } from "../bootstrap/db-tools.ts";
|
||||
import { readAllSelfFeedback, recordSelfFeedback } from "../self-feedback.ts";
|
||||
|
||||
const originalCwd = process.cwd();
|
||||
const originalSfHome = process.env.SF_HOME;
|
||||
let roots: string[] = [];
|
||||
|
||||
afterEach(() => {
|
||||
process.chdir(originalCwd);
|
||||
for (const root of roots) rmSync(root, { recursive: true, force: true });
|
||||
roots = [];
|
||||
if (originalSfHome === undefined) delete process.env.SF_HOME;
|
||||
else process.env.SF_HOME = originalSfHome;
|
||||
});
|
||||
|
||||
function makeForgeProject(): string {
|
||||
const root = mkdtempSync(join(tmpdir(), "sf-self-feedback-resolve-"));
|
||||
roots.push(root);
|
||||
mkdirSync(join(root, ".sf"), { recursive: true });
|
||||
process.env.SF_HOME = join(root, "sf-home");
|
||||
writeFileSync(
|
||||
join(root, "package.json"),
|
||||
JSON.stringify({ name: "singularity-forge", version: "0.0.1" }),
|
||||
"utf-8",
|
||||
);
|
||||
return root;
|
||||
}
|
||||
|
||||
function makeMockPi() {
|
||||
const tools: any[] = [];
|
||||
return {
|
||||
registerTool(tool: any) {
|
||||
tools.push(tool);
|
||||
},
|
||||
tools,
|
||||
} as any;
|
||||
}
|
||||
|
||||
describe("sf_self_feedback_resolve", () => {
|
||||
test("resolve_when_entry_is_fixed_sets_resolved_evidence_and_regenerates_markdown", async () => {
|
||||
const root = makeForgeProject();
|
||||
const recorded = recordSelfFeedback(
|
||||
{
|
||||
kind: "inline-fix-resolution-gap",
|
||||
severity: "high",
|
||||
summary: "Inline fix landed but entry stayed unresolved",
|
||||
acceptanceCriteria: "1. Resolver tool exists. 2. JSONL is updated.",
|
||||
source: "detector",
|
||||
},
|
||||
root,
|
||||
);
|
||||
assert.ok(recorded);
|
||||
process.chdir(root);
|
||||
|
||||
const pi = makeMockPi();
|
||||
registerDbTools(pi);
|
||||
const tool = pi.tools.find(
|
||||
(t: any) => t.name === "sf_self_feedback_resolve",
|
||||
);
|
||||
assert.ok(tool, "resolver tool should be registered");
|
||||
|
||||
const result = await tool.execute(
|
||||
"call-1",
|
||||
{
|
||||
id: recorded.entry.id,
|
||||
reason: "resolver tool verified",
|
||||
commit_sha: "abc1234",
|
||||
test_path:
|
||||
"src/resources/extensions/sf/tests/self-feedback-resolve-tool.test.ts",
|
||||
criteria_met: ["Resolver tool exists", "JSONL is updated"],
|
||||
},
|
||||
undefined,
|
||||
undefined,
|
||||
undefined,
|
||||
);
|
||||
|
||||
assert.equal(result.details?.resolved, true);
|
||||
const [entry] = readAllSelfFeedback(root).filter(
|
||||
(e) => e.id === recorded.entry.id,
|
||||
);
|
||||
assert.ok(entry?.resolvedAt);
|
||||
assert.equal(entry.resolvedEvidence?.kind, "agent-fix");
|
||||
assert.equal(entry.resolvedEvidence?.commitSha, "abc1234");
|
||||
assert.deepEqual(entry.resolvedCriteriaMet, [
|
||||
"Resolver tool exists",
|
||||
"JSONL is updated",
|
||||
]);
|
||||
const markdown = readFileSync(
|
||||
join(root, ".sf", "SELF-FEEDBACK.md"),
|
||||
"utf-8",
|
||||
);
|
||||
assert.match(markdown, /Recently Resolved/);
|
||||
assert.match(markdown, /inline-fix-resolution-gap/);
|
||||
});
|
||||
});
|
||||
|
|
@ -26,6 +26,7 @@ const CANONICAL_DB_TOOLS = [
|
|||
"sf_summary_save",
|
||||
"sf_milestone_generate_id",
|
||||
"sf_self_report",
|
||||
"sf_self_feedback_resolve",
|
||||
"sf_plan_milestone",
|
||||
"sf_plan_slice",
|
||||
"sf_plan_task",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,401 @@
|
|||
/**
|
||||
* Triage protocol — registry integration tests.
|
||||
*
|
||||
* Purpose: Validate that every finding in the M008 bug registry conforms to
|
||||
* the triage protocol definitions (severity, status, cluster routing), and
|
||||
* that the systematic-debugging skill correctly references the protocol.
|
||||
*
|
||||
* Consumer: CI gate that blocks milestone completion when registry and
|
||||
* protocol drift out of sync.
|
||||
*/
|
||||
|
||||
import assert from "node:assert/strict";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { dirname, join } from "node:path";
|
||||
import { describe, test } from "vitest";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const repoRoot = join(__dirname, "..", "..", "..", "..", "..");
|
||||
|
||||
// ─── Load canonical artifacts ────────────────────────────────────────────────
|
||||
|
||||
const registryPath = join(repoRoot, ".sf", "milestones", "M008", "bugs", "bug-registry.json");
|
||||
const protocolPath = join(repoRoot, ".sf", "milestones", "M008", "triage-protocol.md");
|
||||
const skillPath = join(repoRoot, "src", "resources", "extensions", "sf", "skills", "systematic-debugging", "SKILL.md");
|
||||
|
||||
const registry = JSON.parse(readFileSync(registryPath, "utf-8")) as {
|
||||
schema_version: string;
|
||||
meta: {
|
||||
source: string;
|
||||
date: string;
|
||||
totalFindings: number;
|
||||
clusters: string[];
|
||||
};
|
||||
findings: Array<{
|
||||
id: string;
|
||||
file: string;
|
||||
lines: string;
|
||||
category: string;
|
||||
severity: string;
|
||||
status: string;
|
||||
description: string;
|
||||
suggestedFix: string;
|
||||
cluster: string;
|
||||
fixedByTaskId?: string;
|
||||
}>;
|
||||
summary: {
|
||||
severity: Record<string, number>;
|
||||
status: Record<string, number>;
|
||||
cluster: Record<string, number>;
|
||||
};
|
||||
};
|
||||
|
||||
const protocol = readFileSync(protocolPath, "utf-8");
|
||||
const skill = (() => {
|
||||
try {
|
||||
return readFileSync(skillPath, "utf-8");
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
})();
|
||||
|
||||
// ─── Severity definitions from protocol ──────────────────────────────────────
|
||||
|
||||
const VALID_SEVERITIES = ["HIGH", "MEDIUM", "LOW", "FALSE_POSITIVE"] as const;
|
||||
const VALID_STATUSES = ["CONFIRMED", "FALSE_POSITIVE", "FIXED", "WONTFIX", "IN_PROGRESS"] as const;
|
||||
|
||||
// Cluster routing table from protocol
|
||||
const PROTOCOL_CLUSTERS = [
|
||||
"engine + verification",
|
||||
"scaffold + doctor",
|
||||
"worktree + git",
|
||||
"memory + state + cache",
|
||||
"bootstrap + workflow",
|
||||
"notification + detection + headless",
|
||||
] as const;
|
||||
|
||||
// ─── Helpers ─────────────────────────────────────────────────────────────────
|
||||
|
||||
function assertFinding(
|
||||
condition: boolean,
|
||||
findingId: string,
|
||||
message: string,
|
||||
): void {
|
||||
assert.ok(condition, `Finding ${findingId}: ${message}`);
|
||||
}
|
||||
|
||||
// ─── Registry structural validity ────────────────────────────────────────────
|
||||
|
||||
describe("triage-protocol-registry", () => {
|
||||
test("registry_schema_version_is_1_0_0", () => {
|
||||
assert.strictEqual(registry.schema_version, "1.0.0", "schema_version must be 1.0.0");
|
||||
});
|
||||
|
||||
test("registry_meta_totalFindings_matches_actual_count", () => {
|
||||
assert.strictEqual(
|
||||
registry.meta.totalFindings,
|
||||
registry.findings.length,
|
||||
`meta.totalFindings (${registry.meta.totalFindings}) must equal actual findings count (${registry.findings.length})`,
|
||||
);
|
||||
});
|
||||
|
||||
test("registry_meta_clusters_match_protocol_clusters", () => {
|
||||
const registryClusters = new Set(registry.meta.clusters);
|
||||
const protocolClusterSet = new Set(PROTOCOL_CLUSTERS);
|
||||
assert.deepStrictEqual(
|
||||
registryClusters,
|
||||
protocolClusterSet,
|
||||
"registry meta.clusters must exactly match protocol cluster routing table",
|
||||
);
|
||||
});
|
||||
|
||||
// ─── Per-finding validation ──────────────────────────────────────────────
|
||||
|
||||
test("every_finding_has_valid_severity", () => {
|
||||
for (const f of registry.findings) {
|
||||
assertFinding(
|
||||
VALID_SEVERITIES.includes(f.severity as (typeof VALID_SEVERITIES)[number]),
|
||||
f.id,
|
||||
`severity "${f.severity}" is not one of ${VALID_SEVERITIES.join(", ")}`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test("every_finding_has_valid_status", () => {
|
||||
for (const f of registry.findings) {
|
||||
assertFinding(
|
||||
VALID_STATUSES.includes(f.status as (typeof VALID_STATUSES)[number]),
|
||||
f.id,
|
||||
`status "${f.status}" is not one of ${VALID_STATUSES.join(", ")}`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test("every_finding_belongs_to_protocol_cluster", () => {
|
||||
for (const f of registry.findings) {
|
||||
assertFinding(
|
||||
PROTOCOL_CLUSTERS.includes(f.cluster as (typeof PROTOCOL_CLUSTERS)[number]),
|
||||
f.id,
|
||||
`cluster "${f.cluster}" is not in the protocol routing table`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test("every_finding_has_non_empty_id", () => {
|
||||
for (const f of registry.findings) {
|
||||
assertFinding(
|
||||
f.id.length > 0,
|
||||
f.id,
|
||||
"finding id must not be empty",
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test("every_finding_has_non_empty_description", () => {
|
||||
for (const f of registry.findings) {
|
||||
assertFinding(
|
||||
f.description.length > 0,
|
||||
f.id,
|
||||
"description must not be empty",
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test("every_finding_has_non_empty_suggestedFix", () => {
|
||||
for (const f of registry.findings) {
|
||||
assertFinding(
|
||||
f.suggestedFix.length > 0,
|
||||
f.id,
|
||||
"suggestedFix must not be empty",
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Severity / status consistency rules ─────────────────────────────────
|
||||
|
||||
test("severity_FALSE_POSITIVE_implies_status_FALSE_POSITIVE", () => {
|
||||
for (const f of registry.findings) {
|
||||
if (f.severity === "FALSE_POSITIVE") {
|
||||
assertFinding(
|
||||
f.status === "FALSE_POSITIVE",
|
||||
f.id,
|
||||
`severity=FALSE_POSITIVE requires status=FALSE_POSITIVE, got status=${f.status}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test("status_FALSE_POSITIVE_implies_severity_FALSE_POSITIVE", () => {
|
||||
for (const f of registry.findings) {
|
||||
if (f.status === "FALSE_POSITIVE") {
|
||||
assertFinding(
|
||||
f.severity === "FALSE_POSITIVE",
|
||||
f.id,
|
||||
`status=FALSE_POSITIVE requires severity=FALSE_POSITIVE, got severity=${f.severity}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test("status_FIXED_implies_fixedByTaskId_present", () => {
|
||||
for (const f of registry.findings) {
|
||||
if (f.status === "FIXED") {
|
||||
assertFinding(
|
||||
f.fixedByTaskId !== undefined && f.fixedByTaskId.length > 0,
|
||||
f.id,
|
||||
`status=FIXED requires fixedByTaskId to be set`,
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test("fixedByTaskId_present_only_when_status_FIXED", () => {
|
||||
for (const f of registry.findings) {
|
||||
if (f.fixedByTaskId !== undefined) {
|
||||
assertFinding(
|
||||
f.status === "FIXED",
|
||||
f.id,
|
||||
`fixedByTaskId (${f.fixedByTaskId}) should only be present when status=FIXED, got status=${f.status}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Summary statistics accuracy ─────────────────────────────────────────
|
||||
|
||||
test("summary_severity_counts_match_actual", () => {
|
||||
const actual: Record<string, number> = {};
|
||||
for (const f of registry.findings) {
|
||||
actual[f.severity] = (actual[f.severity] ?? 0) + 1;
|
||||
}
|
||||
assert.deepStrictEqual(
|
||||
registry.summary.severity,
|
||||
actual,
|
||||
"summary.severity counts must match actual finding severities",
|
||||
);
|
||||
});
|
||||
|
||||
test("summary_status_counts_match_actual", () => {
|
||||
const actual: Record<string, number> = {};
|
||||
for (const f of registry.findings) {
|
||||
actual[f.status] = (actual[f.status] ?? 0) + 1;
|
||||
}
|
||||
// Compare only keys that exist in either object; zero-count keys in summary are allowed
|
||||
const allKeys = new Set([...Object.keys(registry.summary.status), ...Object.keys(actual)]);
|
||||
for (const key of allKeys) {
|
||||
const expectedCount = registry.summary.status[key] ?? 0;
|
||||
const actualCount = actual[key] ?? 0;
|
||||
assert.strictEqual(
|
||||
actualCount,
|
||||
expectedCount,
|
||||
`summary.status["${key}"]: expected ${expectedCount}, got ${actualCount}`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test("summary_cluster_counts_match_actual", () => {
|
||||
const actual: Record<string, number> = {};
|
||||
for (const f of registry.findings) {
|
||||
actual[f.cluster] = (actual[f.cluster] ?? 0) + 1;
|
||||
}
|
||||
assert.deepStrictEqual(
|
||||
registry.summary.cluster,
|
||||
actual,
|
||||
"summary.cluster counts must match actual finding clusters",
|
||||
);
|
||||
});
|
||||
|
||||
// ─── Protocol content validation ─────────────────────────────────────────
|
||||
|
||||
test("protocol_defines_all_severity_levels", () => {
|
||||
for (const sev of VALID_SEVERITIES) {
|
||||
assert.ok(
|
||||
protocol.includes(sev),
|
||||
`triage-protocol.md must mention severity level ${sev}`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test("protocol_defines_all_status_values", () => {
|
||||
for (const st of VALID_STATUSES) {
|
||||
assert.ok(
|
||||
protocol.includes(st),
|
||||
`triage-protocol.md must mention status value ${st}`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test("protocol_defines_all_clusters_in_routing_table", () => {
|
||||
for (const cluster of PROTOCOL_CLUSTERS) {
|
||||
assert.ok(
|
||||
protocol.includes(cluster),
|
||||
`triage-protocol.md cluster routing table must include "${cluster}"`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test("protocol_contains_confidence_gate_table", () => {
|
||||
assert.ok(
|
||||
protocol.includes("Confidence Gate Requirements"),
|
||||
"protocol must contain Confidence Gate Requirements section",
|
||||
);
|
||||
assert.ok(
|
||||
protocol.includes("0.90") || protocol.includes("0.95") || protocol.includes("0.80"),
|
||||
"protocol must list numeric confidence thresholds",
|
||||
);
|
||||
});
|
||||
|
||||
test("protocol_contains_escalation_rules", () => {
|
||||
assert.ok(
|
||||
protocol.includes("Escalation Rules"),
|
||||
"protocol must contain Escalation Rules section",
|
||||
);
|
||||
});
|
||||
|
||||
// ─── Skill references protocol correctly ─────────────────────────────────
|
||||
|
||||
test("skill_references_triage_protocol_file", () => {
|
||||
assert.ok(
|
||||
skill.includes("triage-protocol.md") || skill.includes("triage protocol"),
|
||||
"systematic-debugging SKILL.md must reference the triage protocol",
|
||||
);
|
||||
});
|
||||
|
||||
test("skill_references_bug_registry", () => {
|
||||
assert.ok(
|
||||
skill.includes("bug-registry.json"),
|
||||
"systematic-debugging SKILL.md must reference bug-registry.json",
|
||||
);
|
||||
});
|
||||
|
||||
test("skill_lists_severity_values", () => {
|
||||
assert.ok(
|
||||
(skill.includes('"HIGH"') || skill.includes('`HIGH`')) &&
|
||||
(skill.includes('"MEDIUM"') || skill.includes('`MEDIUM`')) &&
|
||||
(skill.includes('"LOW"') || skill.includes('`LOW`')),
|
||||
"systematic-debugging SKILL.md must list HIGH / MEDIUM / LOW severity values",
|
||||
);
|
||||
});
|
||||
|
||||
test("skill_mentions_confidence_gate_thresholds", () => {
|
||||
assert.ok(
|
||||
skill.includes("0.80") || skill.includes("0.85") || skill.includes("0.90") || skill.includes("0.95"),
|
||||
"systematic-debugging SKILL.md must mention confidence gate thresholds",
|
||||
);
|
||||
});
|
||||
|
||||
test("skill_mentions_cluster_aware_fixes", () => {
|
||||
assert.ok(
|
||||
skill.includes("cluster-aware") || skill.includes("Cluster-aware"),
|
||||
"systematic-debugging SKILL.md must mention cluster-aware fixes",
|
||||
);
|
||||
});
|
||||
|
||||
test("skill_mentions_registry_update_after_fix", () => {
|
||||
assert.ok(
|
||||
skill.includes("Update the registry") || skill.includes("update the registry") || skill.includes("bug-registry.json"),
|
||||
"systematic-debugging SKILL.md must instruct updating registry after fix",
|
||||
);
|
||||
});
|
||||
|
||||
// ─── Protocol decision flow integrity ────────────────────────────────────
|
||||
|
||||
test("protocol_decision_flow_has_all_severity_branches", () => {
|
||||
// The decision flow should branch on HIGH, MEDIUM, and LOW
|
||||
assert.ok(
|
||||
protocol.includes("severity = HIGH") || protocol.includes("Is severity = HIGH"),
|
||||
"protocol decision flow must branch on HIGH severity",
|
||||
);
|
||||
assert.ok(
|
||||
protocol.includes("severity = MEDIUM") || protocol.includes("Is severity = MEDIUM"),
|
||||
"protocol decision flow must branch on MEDIUM severity",
|
||||
);
|
||||
assert.ok(
|
||||
protocol.includes("severity = LOW") || protocol.includes("Is severity = LOW"),
|
||||
"protocol decision flow must branch on LOW severity",
|
||||
);
|
||||
});
|
||||
|
||||
test("protocol_high_severity_requires_regression_test", () => {
|
||||
const highSection = protocol.slice(protocol.indexOf("severity = HIGH"));
|
||||
assert.ok(
|
||||
highSection.includes("regression test") || protocol.includes("Require regression test"),
|
||||
"protocol must require regression test for HIGH severity",
|
||||
);
|
||||
});
|
||||
|
||||
test("protocol_medium_severity_has_confidence_gate_0_85", () => {
|
||||
assert.ok(
|
||||
protocol.includes("0.85"),
|
||||
"protocol must specify 0.85 confidence gate for MEDIUM severity",
|
||||
);
|
||||
});
|
||||
|
||||
test("protocol_low_severity_has_confidence_gate_0_80", () => {
|
||||
assert.ok(
|
||||
protocol.includes("0.80"),
|
||||
"protocol must specify 0.80 confidence gate for LOW severity",
|
||||
);
|
||||
});
|
||||
});
|
||||
264
src/resources/extensions/sf/tests/unit-runtime-fsm.test.ts
Normal file
264
src/resources/extensions/sf/tests/unit-runtime-fsm.test.ts
Normal file
|
|
@ -0,0 +1,264 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, test } from "vitest";
|
||||
import { buildQuerySnapshot } from "../../../../headless-query.ts";
|
||||
import { resolveDispatch } from "../auto-dispatch.ts";
|
||||
import {
|
||||
clearUnitRuntimeRecord,
|
||||
decideUnitRuntimeDispatch,
|
||||
readUnitRuntimeRecord,
|
||||
UNIT_RUNTIME_STATUSES,
|
||||
UNIT_RUNTIME_TERMINAL_STATUSES,
|
||||
UNIT_RUNTIME_TRANSITIONS,
|
||||
writeUnitRuntimeRecord,
|
||||
} from "../unit-runtime.ts";
|
||||
|
||||
const tmpDirs: string[] = [];
|
||||
|
||||
function makeTmpBase(prefix = "sf-unit-runtime-fsm-"): string {
|
||||
const base = mkdtempSync(join(tmpdir(), prefix));
|
||||
tmpDirs.push(base);
|
||||
mkdirSync(join(base, ".sf", "milestones"), { recursive: true });
|
||||
return base;
|
||||
}
|
||||
|
||||
function makeParallelResearchProject(): string {
|
||||
const base = makeTmpBase("sf-unit-runtime-parallel-");
|
||||
const milestoneDir = join(base, ".sf", "milestones", "M001");
|
||||
mkdirSync(milestoneDir, { recursive: true });
|
||||
writeFileSync(
|
||||
join(milestoneDir, "M001-ROADMAP.md"),
|
||||
[
|
||||
"# M001: Parallel Research Milestone",
|
||||
"",
|
||||
"**Vision:** Research-ready slices.",
|
||||
"",
|
||||
"## Slices",
|
||||
"",
|
||||
"- [ ] **S01: Alpha** `risk:low` `depends:[]`",
|
||||
"- [ ] **S02: Beta** `risk:low` `depends:[]`",
|
||||
"",
|
||||
].join("\n"),
|
||||
"utf-8",
|
||||
);
|
||||
return base;
|
||||
}
|
||||
|
||||
async function resolvePlanningDispatch(base: string) {
|
||||
return resolveDispatch({
|
||||
basePath: base,
|
||||
mid: "M001",
|
||||
midTitle: "Parallel Research Milestone",
|
||||
state: {
|
||||
phase: "planning",
|
||||
activeMilestone: {
|
||||
id: "M001",
|
||||
title: "Parallel Research Milestone",
|
||||
status: "active",
|
||||
},
|
||||
activeSlice: { id: "S01", title: "Alpha" },
|
||||
activeTask: null,
|
||||
registry: [],
|
||||
blockers: [],
|
||||
} as any,
|
||||
prefs: undefined,
|
||||
});
|
||||
}
|
||||
|
||||
afterEach(() => {
|
||||
for (const dir of tmpDirs) {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
tmpDirs.length = 0;
|
||||
});
|
||||
|
||||
test("unit_runtime_transitions_when_enumerated_cover_all_statuses", () => {
|
||||
assert.deepEqual(UNIT_RUNTIME_STATUSES, [
|
||||
"queued",
|
||||
"claimed",
|
||||
"running",
|
||||
"progress",
|
||||
"completed",
|
||||
"failed",
|
||||
"blocked",
|
||||
"cancelled",
|
||||
"stale",
|
||||
"runaway-recovered",
|
||||
"notified",
|
||||
]);
|
||||
assert.deepEqual(UNIT_RUNTIME_TERMINAL_STATUSES, [
|
||||
"completed",
|
||||
"failed",
|
||||
"blocked",
|
||||
"cancelled",
|
||||
"stale",
|
||||
"runaway-recovered",
|
||||
]);
|
||||
assert.deepEqual(UNIT_RUNTIME_TRANSITIONS, {
|
||||
queued: ["claimed", "cancelled"],
|
||||
claimed: ["running", "stale", "cancelled"],
|
||||
running: [
|
||||
"progress",
|
||||
"completed",
|
||||
"failed",
|
||||
"blocked",
|
||||
"cancelled",
|
||||
"stale",
|
||||
"runaway-recovered",
|
||||
],
|
||||
progress: [
|
||||
"running",
|
||||
"completed",
|
||||
"failed",
|
||||
"blocked",
|
||||
"cancelled",
|
||||
"stale",
|
||||
"runaway-recovered",
|
||||
],
|
||||
completed: ["notified"],
|
||||
failed: ["queued", "notified"],
|
||||
blocked: ["notified"],
|
||||
cancelled: ["notified"],
|
||||
stale: ["queued", "notified"],
|
||||
"runaway-recovered": ["queued", "notified"],
|
||||
notified: ["queued"],
|
||||
});
|
||||
});
|
||||
|
||||
test("synthetic_failed_unit_when_not_reset_cannot_redispatch", async () => {
|
||||
const base = makeParallelResearchProject();
|
||||
writeUnitRuntimeRecord(
|
||||
base,
|
||||
"research-slice",
|
||||
"M001/parallel-research",
|
||||
1000,
|
||||
{
|
||||
status: "failed",
|
||||
retryCount: 0,
|
||||
maxRetries: 2,
|
||||
},
|
||||
);
|
||||
|
||||
const record = readUnitRuntimeRecord(
|
||||
base,
|
||||
"research-slice",
|
||||
"M001/parallel-research",
|
||||
);
|
||||
const decision = decideUnitRuntimeDispatch(record);
|
||||
assert.equal(decision.action, "block");
|
||||
assert.equal(decision.reasonCode, "synthetic-reset-required");
|
||||
assert.equal(decision.retryCount, 0);
|
||||
assert.equal(decision.maxRetries, 2);
|
||||
|
||||
const blockedDispatch = await resolvePlanningDispatch(base);
|
||||
assert.equal(blockedDispatch.action, "dispatch");
|
||||
if (blockedDispatch.action === "dispatch") {
|
||||
assert.equal(blockedDispatch.unitType, "research-slice");
|
||||
assert.equal(blockedDispatch.unitId, "M001/S01");
|
||||
}
|
||||
|
||||
clearUnitRuntimeRecord(base, "research-slice", "M001/parallel-research");
|
||||
const resetDecision = decideUnitRuntimeDispatch(
|
||||
readUnitRuntimeRecord(base, "research-slice", "M001/parallel-research"),
|
||||
);
|
||||
assert.equal(resetDecision.action, "dispatch");
|
||||
assert.equal(resetDecision.reasonCode, "no-runtime-record");
|
||||
|
||||
const resetDispatch = await resolvePlanningDispatch(base);
|
||||
assert.equal(resetDispatch.action, "dispatch");
|
||||
if (resetDispatch.action === "dispatch") {
|
||||
assert.equal(resetDispatch.unitType, "research-slice");
|
||||
assert.equal(resetDispatch.unitId, "M001/parallel-research");
|
||||
}
|
||||
});
|
||||
|
||||
test("terminal_status_when_budget_available_produces_expected_dispatch_decision", () => {
|
||||
const base = makeTmpBase();
|
||||
const cases = [
|
||||
["completed", "notify", "terminal-ready-to-notify"],
|
||||
["failed", "retry", "retry-budget-available"],
|
||||
["blocked", "notify", "terminal-ready-to-notify"],
|
||||
["cancelled", "notify", "terminal-ready-to-notify"],
|
||||
["stale", "retry", "retry-budget-available"],
|
||||
["runaway-recovered", "retry", "retry-budget-available"],
|
||||
] as const;
|
||||
|
||||
for (const [status, expectedAction, expectedReason] of cases) {
|
||||
writeUnitRuntimeRecord(base, "execute-task", `M001/S01/${status}`, 1000, {
|
||||
status,
|
||||
retryCount: 0,
|
||||
maxRetries: 2,
|
||||
});
|
||||
const record = readUnitRuntimeRecord(
|
||||
base,
|
||||
"execute-task",
|
||||
`M001/S01/${status}`,
|
||||
);
|
||||
const decision = decideUnitRuntimeDispatch(record);
|
||||
assert.equal(decision.action, expectedAction, status);
|
||||
assert.equal(decision.reasonCode, expectedReason, status);
|
||||
}
|
||||
});
|
||||
|
||||
test("retryable_terminal_status_when_budget_exhausted_blocks_dispatch", () => {
|
||||
const base = makeTmpBase();
|
||||
for (const status of ["failed", "stale", "runaway-recovered"] as const) {
|
||||
writeUnitRuntimeRecord(base, "execute-task", `M001/S01/${status}`, 1000, {
|
||||
status,
|
||||
retryCount: 2,
|
||||
maxRetries: 2,
|
||||
});
|
||||
const decision = decideUnitRuntimeDispatch(
|
||||
readUnitRuntimeRecord(base, "execute-task", `M001/S01/${status}`),
|
||||
);
|
||||
assert.equal(decision.action, "block", status);
|
||||
assert.equal(decision.reasonCode, "retry-budget-exhausted", status);
|
||||
assert.equal(decision.retryCount, 2, status);
|
||||
assert.equal(decision.maxRetries, 2, status);
|
||||
}
|
||||
});
|
||||
|
||||
test("terminal_status_when_already_notified_skips_dispatch", () => {
|
||||
const base = makeTmpBase();
|
||||
writeUnitRuntimeRecord(base, "execute-task", "M001/S01/T01", 1000, {
|
||||
status: "failed",
|
||||
retryCount: 0,
|
||||
maxRetries: 2,
|
||||
notifiedAt: 2000,
|
||||
});
|
||||
|
||||
const decision = decideUnitRuntimeDispatch(
|
||||
readUnitRuntimeRecord(base, "execute-task", "M001/S01/T01"),
|
||||
);
|
||||
assert.equal(decision.action, "skip");
|
||||
assert.equal(decision.reasonCode, "already-notified");
|
||||
});
|
||||
|
||||
test("headless_query_when_runtime_record_exists_shows_retry_budget", async () => {
|
||||
const base = makeTmpBase();
|
||||
writeUnitRuntimeRecord(base, "execute-task", "M001/S01/T01", 1000, {
|
||||
status: "failed",
|
||||
retryCount: 1,
|
||||
maxRetries: 2,
|
||||
watchdogReason: "no heartbeat",
|
||||
outputPath: ".sf/runtime/units/M001-S01-T01.log",
|
||||
});
|
||||
|
||||
const snapshot = await buildQuerySnapshot(base);
|
||||
const unit = snapshot.runtime.units.find(
|
||||
(item) =>
|
||||
item.unitType === "execute-task" && item.unitId === "M001/S01/T01",
|
||||
);
|
||||
|
||||
assert.ok(unit);
|
||||
assert.equal(unit.status, "failed");
|
||||
assert.equal(unit.retryCount, 1);
|
||||
assert.equal(unit.maxRetries, 2);
|
||||
assert.equal(unit.retryBudgetRemaining, 1);
|
||||
assert.equal(unit.dispatchDecision.action, "retry");
|
||||
assert.equal(unit.dispatchDecision.reasonCode, "retry-budget-available");
|
||||
assert.equal(unit.watchdogReason, "no heartbeat");
|
||||
assert.equal(unit.outputPath, ".sf/runtime/units/M001-S01-T01.log");
|
||||
});
|
||||
|
|
@ -22,7 +22,126 @@ import {
|
|||
} from "./paths.js";
|
||||
import { parseUnitId } from "./unit-id.js";
|
||||
|
||||
/**
|
||||
* Lists every durable unit runtime status in FSM order.
|
||||
*
|
||||
* Purpose: give dispatch, recovery, and query surfaces one canonical state
|
||||
* vocabulary so terminal units cannot be redispatched by ambiguous legacy phases.
|
||||
*
|
||||
* Consumer: auto runtime persistence, unit-runtime tests, headless query summaries.
|
||||
*/
|
||||
export const UNIT_RUNTIME_STATUSES = [
|
||||
"queued",
|
||||
"claimed",
|
||||
"running",
|
||||
"progress",
|
||||
"completed",
|
||||
"failed",
|
||||
"blocked",
|
||||
"cancelled",
|
||||
"stale",
|
||||
"runaway-recovered",
|
||||
"notified",
|
||||
] as const;
|
||||
|
||||
/**
|
||||
* Names the unit statuses that end an execution attempt.
|
||||
*
|
||||
* Purpose: centralize the terminal-state union so retry and notification policy
|
||||
* does not drift between watchdog recovery and dispatch preview logic.
|
||||
*
|
||||
* Consumer: decideUnitRuntimeDispatch and operator-facing query summaries.
|
||||
*/
|
||||
export const UNIT_RUNTIME_TERMINAL_STATUSES = [
|
||||
"completed",
|
||||
"failed",
|
||||
"blocked",
|
||||
"cancelled",
|
||||
"stale",
|
||||
"runaway-recovered",
|
||||
] as const;
|
||||
|
||||
/**
|
||||
* Describes the explicit unit runtime finite-state-machine transitions.
|
||||
*
|
||||
* Purpose: make retry, notification, and reset transitions reviewable as data
|
||||
* instead of implied by ad hoc marker files or legacy phase strings.
|
||||
*
|
||||
* Consumer: unit runtime tests, future dispatch/reconciler guards.
|
||||
*/
|
||||
export const UNIT_RUNTIME_TRANSITIONS = {
|
||||
queued: ["claimed", "cancelled"],
|
||||
claimed: ["running", "stale", "cancelled"],
|
||||
running: [
|
||||
"progress",
|
||||
"completed",
|
||||
"failed",
|
||||
"blocked",
|
||||
"cancelled",
|
||||
"stale",
|
||||
"runaway-recovered",
|
||||
],
|
||||
progress: [
|
||||
"running",
|
||||
"completed",
|
||||
"failed",
|
||||
"blocked",
|
||||
"cancelled",
|
||||
"stale",
|
||||
"runaway-recovered",
|
||||
],
|
||||
completed: ["notified"],
|
||||
failed: ["queued", "notified"],
|
||||
blocked: ["notified"],
|
||||
cancelled: ["notified"],
|
||||
stale: ["queued", "notified"],
|
||||
"runaway-recovered": ["queued", "notified"],
|
||||
notified: ["queued"],
|
||||
} as const satisfies Record<UnitRuntimeStatus, readonly UnitRuntimeStatus[]>;
|
||||
|
||||
/**
|
||||
* Enumerates every durable unit runtime status.
|
||||
*
|
||||
* Purpose: let persistence and dispatch decisions share one exhaustive status
|
||||
* type while legacy `phase` remains available for older call sites.
|
||||
*
|
||||
* Consumer: AutoUnitRuntimeRecord.status, retry decisions, query summaries.
|
||||
*/
|
||||
export type UnitRuntimeStatus = (typeof UNIT_RUNTIME_STATUSES)[number];
|
||||
|
||||
/**
|
||||
* Enumerates statuses that end a unit execution attempt.
|
||||
*
|
||||
* Purpose: distinguish states that need notify/retry/block policy from active
|
||||
* states that should not start a second copy of the same unit.
|
||||
*
|
||||
* Consumer: decideUnitRuntimeDispatch.
|
||||
*/
|
||||
export type UnitRuntimeTerminalStatus =
|
||||
(typeof UNIT_RUNTIME_TERMINAL_STATUSES)[number];
|
||||
|
||||
/**
|
||||
* Captures the durable FSM state embedded in a unit runtime record.
|
||||
*
|
||||
* Purpose: expose retry budget, liveness, and notification fields together so
|
||||
* callers can decide whether a unit should run, retry, block, or notify.
|
||||
*
|
||||
* Consumer: writeUnitRuntimeRecord, decideUnitRuntimeDispatch, headless query.
|
||||
*/
|
||||
export interface UnitRuntimeState {
|
||||
status: UnitRuntimeStatus;
|
||||
retryCount: number;
|
||||
maxRetries: number;
|
||||
lastHeartbeatAt: number | null;
|
||||
lastProgressAt: number;
|
||||
lastOutputAt: number | null;
|
||||
outputPath: string | null;
|
||||
watchdogReason: string | null;
|
||||
notifiedAt: number | null;
|
||||
}
|
||||
|
||||
export type UnitRuntimePhase =
|
||||
| UnitRuntimeStatus
|
||||
| "dispatched"
|
||||
| "wrapup-warning-sent"
|
||||
| "runaway-warning-sent"
|
||||
|
|
@ -33,6 +152,14 @@ export type UnitRuntimePhase =
|
|||
| "paused"
|
||||
| "skipped";
|
||||
|
||||
const DEFAULT_UNIT_RUNTIME_MAX_RETRIES = 1;
|
||||
|
||||
const RETRYABLE_TERMINAL_STATUSES = new Set<UnitRuntimeStatus>([
|
||||
"failed",
|
||||
"stale",
|
||||
"runaway-recovered",
|
||||
]);
|
||||
|
||||
export interface ExecuteTaskRecoveryStatus {
|
||||
planPath: string;
|
||||
summaryPath: string;
|
||||
|
|
@ -50,18 +177,263 @@ export interface AutoUnitRuntimeRecord {
|
|||
startedAt: number;
|
||||
updatedAt: number;
|
||||
phase: UnitRuntimePhase;
|
||||
status: UnitRuntimeStatus;
|
||||
wrapupWarningSent: boolean;
|
||||
continueHereFired: boolean;
|
||||
timeoutAt: number | null;
|
||||
lastHeartbeatAt?: number | null;
|
||||
lastProgressAt: number;
|
||||
progressCount: number;
|
||||
lastProgressKind: string;
|
||||
lastOutputAt?: number | null;
|
||||
outputPath?: string | null;
|
||||
watchdogReason?: string | null;
|
||||
notifiedAt?: number | null;
|
||||
recovery?: ExecuteTaskRecoveryStatus;
|
||||
recoveryAttempts?: number;
|
||||
retryCount?: number;
|
||||
maxRetries?: number;
|
||||
lastRecoveryReason?: "idle" | "hard";
|
||||
runawayGuardPause?: RunawayGuardPauseMetadata;
|
||||
}
|
||||
|
||||
/**
|
||||
* Describes whether dispatch may run a unit from its runtime record.
|
||||
*
|
||||
* Purpose: surface the same retry-budget decision to tests, dispatch preview,
|
||||
* and operator diagnostics without reinterpreting terminal states ad hoc.
|
||||
*
|
||||
* Consumer: unit-runtime FSM tests and headless query runtime summaries.
|
||||
*/
|
||||
export type UnitRuntimeDispatchDecision =
|
||||
| {
|
||||
action: "dispatch";
|
||||
reasonCode: "no-runtime-record" | "queued";
|
||||
retryCount: number;
|
||||
maxRetries: number;
|
||||
retryBudgetRemaining: number;
|
||||
}
|
||||
| {
|
||||
action: "retry";
|
||||
reasonCode: "retry-budget-available";
|
||||
retryCount: number;
|
||||
maxRetries: number;
|
||||
retryBudgetRemaining: number;
|
||||
}
|
||||
| {
|
||||
action: "notify";
|
||||
reasonCode: "terminal-ready-to-notify";
|
||||
retryCount: number;
|
||||
maxRetries: number;
|
||||
retryBudgetRemaining: number;
|
||||
}
|
||||
| {
|
||||
action: "block";
|
||||
reasonCode: "retry-budget-exhausted" | "synthetic-reset-required";
|
||||
retryCount: number;
|
||||
maxRetries: number;
|
||||
retryBudgetRemaining: number;
|
||||
}
|
||||
| {
|
||||
action: "skip";
|
||||
reasonCode:
|
||||
| "already-notified"
|
||||
| "active-or-claimed"
|
||||
| "notified"
|
||||
| "terminal-nonretryable";
|
||||
retryCount: number;
|
||||
maxRetries: number;
|
||||
retryBudgetRemaining: number;
|
||||
};
|
||||
|
||||
function hasUpdate<K extends keyof AutoUnitRuntimeRecord>(
|
||||
updates: Partial<AutoUnitRuntimeRecord>,
|
||||
key: K,
|
||||
): boolean {
|
||||
return Object.hasOwn(updates, key);
|
||||
}
|
||||
|
||||
function phaseForStatus(status: UnitRuntimeStatus): UnitRuntimePhase {
|
||||
switch (status) {
|
||||
case "queued":
|
||||
case "claimed":
|
||||
case "running":
|
||||
return "dispatched";
|
||||
case "progress":
|
||||
return "wrapup-warning-sent";
|
||||
case "completed":
|
||||
return "finalized";
|
||||
default:
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
function inferStatusFromPhase(
|
||||
phase: UnitRuntimePhase,
|
||||
record?: Pick<AutoUnitRuntimeRecord, "runawayGuardPause"> | null,
|
||||
): UnitRuntimeStatus {
|
||||
if ((UNIT_RUNTIME_STATUSES as readonly string[]).includes(phase)) {
|
||||
return phase as UnitRuntimeStatus;
|
||||
}
|
||||
switch (phase) {
|
||||
case "dispatched":
|
||||
return "running";
|
||||
case "wrapup-warning-sent":
|
||||
case "runaway-warning-sent":
|
||||
case "runaway-final-warning-sent":
|
||||
case "recovered":
|
||||
return "progress";
|
||||
case "timeout":
|
||||
return "stale";
|
||||
case "finalized":
|
||||
return "completed";
|
||||
case "paused":
|
||||
return record?.runawayGuardPause ? "runaway-recovered" : "blocked";
|
||||
case "skipped":
|
||||
return "blocked";
|
||||
default:
|
||||
return "running";
|
||||
}
|
||||
}
|
||||
|
||||
function retryBudgetRemaining(retryCount: number, maxRetries: number): number {
|
||||
return Math.max(0, maxRetries - retryCount);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true when a runtime status is terminal for one execution attempt.
|
||||
*
|
||||
* Purpose: keep terminal-state checks exhaustive against the exported terminal
|
||||
* union rather than hard-coded differently at each caller.
|
||||
*
|
||||
* Consumer: decideUnitRuntimeDispatch and query summary generation.
|
||||
*/
|
||||
export function isTerminalUnitRuntimeStatus(
|
||||
status: UnitRuntimeStatus,
|
||||
): status is UnitRuntimeTerminalStatus {
|
||||
return (UNIT_RUNTIME_TERMINAL_STATUSES as readonly string[]).includes(status);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the normalized FSM state embedded in a runtime record.
|
||||
*
|
||||
* Purpose: let legacy records with only `phase` still participate in retry and
|
||||
* query policy while new records persist explicit FSM fields.
|
||||
*
|
||||
* Consumer: decideUnitRuntimeDispatch and headless query summaries.
|
||||
*/
|
||||
export function getUnitRuntimeState(
|
||||
record: AutoUnitRuntimeRecord,
|
||||
): UnitRuntimeState {
|
||||
const status = record.status ?? inferStatusFromPhase(record.phase, record);
|
||||
const retryCount = record.retryCount ?? record.recoveryAttempts ?? 0;
|
||||
const maxRetries = record.maxRetries ?? DEFAULT_UNIT_RUNTIME_MAX_RETRIES;
|
||||
return {
|
||||
status,
|
||||
retryCount,
|
||||
maxRetries,
|
||||
lastHeartbeatAt: record.lastHeartbeatAt ?? null,
|
||||
lastProgressAt: record.lastProgressAt,
|
||||
lastOutputAt: record.lastOutputAt ?? null,
|
||||
outputPath: record.outputPath ?? null,
|
||||
watchdogReason: record.watchdogReason ?? null,
|
||||
notifiedAt: record.notifiedAt ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true for synthetic units that must be reset before rerun.
|
||||
*
|
||||
* Purpose: prevent synthetic orchestration units such as parallel research from
|
||||
* looping after failure while preserving normal task retry behavior.
|
||||
*
|
||||
* Consumer: decideUnitRuntimeDispatch.
|
||||
*/
|
||||
export function isSyntheticUnitRuntime(record: AutoUnitRuntimeRecord): boolean {
|
||||
return (
|
||||
record.unitType === "synthetic" ||
|
||||
record.unitId.includes("parallel-research")
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decides whether a unit runtime record permits dispatch, retry, notify, or block.
|
||||
*
|
||||
* Purpose: enforce retry budgets and explicit reset requirements before callers
|
||||
* schedule another copy of a failed or stale unit.
|
||||
*
|
||||
* Consumer: unit-runtime FSM tests and headless query runtime summaries.
|
||||
*/
|
||||
export function decideUnitRuntimeDispatch(
|
||||
record: AutoUnitRuntimeRecord | null,
|
||||
options: { synthetic?: boolean } = {},
|
||||
): UnitRuntimeDispatchDecision {
|
||||
if (!record) {
|
||||
return {
|
||||
action: "dispatch",
|
||||
reasonCode: "no-runtime-record",
|
||||
retryCount: 0,
|
||||
maxRetries: DEFAULT_UNIT_RUNTIME_MAX_RETRIES,
|
||||
retryBudgetRemaining: DEFAULT_UNIT_RUNTIME_MAX_RETRIES,
|
||||
};
|
||||
}
|
||||
|
||||
const state = getUnitRuntimeState(record);
|
||||
const remaining = retryBudgetRemaining(state.retryCount, state.maxRetries);
|
||||
const common = {
|
||||
retryCount: state.retryCount,
|
||||
maxRetries: state.maxRetries,
|
||||
retryBudgetRemaining: remaining,
|
||||
};
|
||||
|
||||
if (state.notifiedAt !== null) {
|
||||
return { action: "skip", reasonCode: "already-notified", ...common };
|
||||
}
|
||||
if (state.status === "notified") {
|
||||
return { action: "skip", reasonCode: "notified", ...common };
|
||||
}
|
||||
if (state.status === "queued") {
|
||||
return { action: "dispatch", reasonCode: "queued", ...common };
|
||||
}
|
||||
if (!isTerminalUnitRuntimeStatus(state.status)) {
|
||||
return { action: "skip", reasonCode: "active-or-claimed", ...common };
|
||||
}
|
||||
|
||||
const synthetic = options.synthetic ?? isSyntheticUnitRuntime(record);
|
||||
if (synthetic && state.status !== "completed") {
|
||||
return {
|
||||
action: "block",
|
||||
reasonCode: "synthetic-reset-required",
|
||||
...common,
|
||||
};
|
||||
}
|
||||
|
||||
if (RETRYABLE_TERMINAL_STATUSES.has(state.status)) {
|
||||
if (remaining > 0) {
|
||||
return {
|
||||
action: "retry",
|
||||
reasonCode: "retry-budget-available",
|
||||
...common,
|
||||
};
|
||||
}
|
||||
return { action: "block", reasonCode: "retry-budget-exhausted", ...common };
|
||||
}
|
||||
|
||||
if (
|
||||
state.status === "completed" ||
|
||||
state.status === "blocked" ||
|
||||
state.status === "cancelled"
|
||||
) {
|
||||
return {
|
||||
action: "notify",
|
||||
reasonCode: "terminal-ready-to-notify",
|
||||
...common,
|
||||
};
|
||||
}
|
||||
|
||||
return { action: "skip", reasonCode: "terminal-nonretryable", ...common };
|
||||
}
|
||||
|
||||
function runtimeDir(basePath: string): string {
|
||||
return join(sfRoot(basePath), "runtime", "units");
|
||||
}
|
||||
|
|
@ -105,25 +477,68 @@ export function writeUnitRuntimeRecord(
|
|||
mkdirSync(dir, { recursive: true });
|
||||
const path = runtimePath(basePath, unitType, unitId);
|
||||
const prev = _runtimeCache.get(path) ?? null;
|
||||
const phase =
|
||||
updates.phase ??
|
||||
(updates.status ? phaseForStatus(updates.status) : prev?.phase) ??
|
||||
"dispatched";
|
||||
const status =
|
||||
updates.status ??
|
||||
(updates.phase || !prev?.status
|
||||
? inferStatusFromPhase(phase, {
|
||||
runawayGuardPause:
|
||||
updates.runawayGuardPause ?? prev?.runawayGuardPause,
|
||||
})
|
||||
: prev.status);
|
||||
const recoveryAttempts = hasUpdate(updates, "recoveryAttempts")
|
||||
? (updates.recoveryAttempts ?? 0)
|
||||
: (prev?.recoveryAttempts ?? 0);
|
||||
const retryCount = hasUpdate(updates, "retryCount")
|
||||
? (updates.retryCount ?? 0)
|
||||
: hasUpdate(updates, "recoveryAttempts")
|
||||
? (updates.recoveryAttempts ?? 0)
|
||||
: (prev?.retryCount ?? recoveryAttempts ?? 0);
|
||||
const next: AutoUnitRuntimeRecord = {
|
||||
version: 1,
|
||||
unitType,
|
||||
unitId,
|
||||
startedAt,
|
||||
updatedAt: Date.now(),
|
||||
phase: updates.phase ?? prev?.phase ?? "dispatched",
|
||||
phase,
|
||||
status,
|
||||
wrapupWarningSent:
|
||||
updates.wrapupWarningSent ?? prev?.wrapupWarningSent ?? false,
|
||||
continueHereFired:
|
||||
updates.continueHereFired ?? prev?.continueHereFired ?? false,
|
||||
timeoutAt: updates.timeoutAt ?? prev?.timeoutAt ?? null,
|
||||
timeoutAt: hasUpdate(updates, "timeoutAt")
|
||||
? (updates.timeoutAt ?? null)
|
||||
: (prev?.timeoutAt ?? null),
|
||||
lastHeartbeatAt: hasUpdate(updates, "lastHeartbeatAt")
|
||||
? (updates.lastHeartbeatAt ?? null)
|
||||
: (prev?.lastHeartbeatAt ?? startedAt),
|
||||
lastProgressAt:
|
||||
updates.lastProgressAt ?? prev?.lastProgressAt ?? Date.now(),
|
||||
progressCount: updates.progressCount ?? prev?.progressCount ?? 0,
|
||||
lastProgressKind:
|
||||
updates.lastProgressKind ?? prev?.lastProgressKind ?? "dispatch",
|
||||
lastOutputAt: hasUpdate(updates, "lastOutputAt")
|
||||
? (updates.lastOutputAt ?? null)
|
||||
: (prev?.lastOutputAt ?? null),
|
||||
outputPath: hasUpdate(updates, "outputPath")
|
||||
? (updates.outputPath ?? null)
|
||||
: (prev?.outputPath ?? null),
|
||||
watchdogReason: hasUpdate(updates, "watchdogReason")
|
||||
? (updates.watchdogReason ?? null)
|
||||
: (prev?.watchdogReason ?? null),
|
||||
notifiedAt: hasUpdate(updates, "notifiedAt")
|
||||
? (updates.notifiedAt ?? null)
|
||||
: (prev?.notifiedAt ?? null),
|
||||
recovery: updates.recovery ?? prev?.recovery,
|
||||
recoveryAttempts: updates.recoveryAttempts ?? prev?.recoveryAttempts ?? 0,
|
||||
recoveryAttempts,
|
||||
retryCount,
|
||||
maxRetries:
|
||||
updates.maxRetries ??
|
||||
prev?.maxRetries ??
|
||||
DEFAULT_UNIT_RUNTIME_MAX_RETRIES,
|
||||
lastRecoveryReason: updates.lastRecoveryReason ?? prev?.lastRecoveryReason,
|
||||
runawayGuardPause: updates.runawayGuardPause ?? prev?.runawayGuardPause,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import {
|
|||
} from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join, resolve } from "node:path";
|
||||
import { test, afterEach } from 'vitest';
|
||||
import { afterEach, test } from "vitest";
|
||||
|
||||
const projectRoot = process.cwd();
|
||||
|
||||
|
|
@ -954,3 +954,42 @@ test("reapOrphanedNextServerProcesses returns zero reaped on non-Linux platforms
|
|||
test("reapOrphanedNextServerProcesses is exported and callable", () => {
|
||||
assert.equal(typeof webMode.reapOrphanedNextServerProcesses, "function");
|
||||
});
|
||||
|
||||
test("reapOrphanedNextServerProcesses kills orphaned standalone next-server", () => {
|
||||
const killed: Array<{ pid: number; signal: string }> = [];
|
||||
const stderrChunks: string[] = [];
|
||||
const packageRoot = "/tmp/sf-package";
|
||||
const result = webMode.reapOrphanedNextServerProcesses(
|
||||
{
|
||||
write: (chunk: string) => {
|
||||
stderrChunks.push(chunk);
|
||||
return true;
|
||||
},
|
||||
},
|
||||
packageRoot,
|
||||
{
|
||||
platform: "linux",
|
||||
execSync: (() =>
|
||||
[
|
||||
"123 1 node /tmp/sf-package/dist/web/standalone/node_modules/next/dist/server/next-server.js node",
|
||||
"124 999 node /tmp/sf-package/dist/web/standalone/node_modules/next/dist/server/next-server.js node",
|
||||
"125 1 node /elsewhere/next-server.js node",
|
||||
].join("\n")) as any,
|
||||
readlinkSync: ((path: string) => {
|
||||
if (path === "/proc/123/cwd")
|
||||
return "/tmp/sf-package/dist/web/standalone";
|
||||
if (path === "/proc/124/cwd")
|
||||
return "/tmp/sf-package/dist/web/standalone";
|
||||
return "/elsewhere";
|
||||
}) as any,
|
||||
kill: ((pid: number, signal: string) => {
|
||||
killed.push({ pid, signal });
|
||||
return true;
|
||||
}) as any,
|
||||
},
|
||||
);
|
||||
|
||||
assert.equal(result.reaped, 1);
|
||||
assert.deepEqual(killed, [{ pid: 123, signal: "SIGTERM" }]);
|
||||
assert.match(stderrChunks.join(""), /Reaped orphaned next-server/);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -6,7 +6,13 @@ import {
|
|||
spawn,
|
||||
} from "node:child_process";
|
||||
import { randomBytes } from "node:crypto";
|
||||
import { existsSync, readFileSync, unlinkSync, writeFileSync } from "node:fs";
|
||||
import {
|
||||
existsSync,
|
||||
readFileSync,
|
||||
readlinkSync,
|
||||
unlinkSync,
|
||||
writeFileSync,
|
||||
} from "node:fs";
|
||||
import { request as httpRequest } from "node:http";
|
||||
import { createServer } from "node:net";
|
||||
import { join, resolve } from "node:path";
|
||||
|
|
@ -16,10 +22,7 @@ import {
|
|||
} from "./app-paths.js";
|
||||
|
||||
const DEFAULT_HOST = "127.0.0.1";
|
||||
const DEFAULT_PACKAGE_ROOT = resolve(
|
||||
import.meta.dirname,
|
||||
"..",
|
||||
);
|
||||
const DEFAULT_PACKAGE_ROOT = resolve(import.meta.dirname, "..");
|
||||
|
||||
/** Open a URL in the user's default browser. */
|
||||
function openBrowser(url: string): void {
|
||||
|
|
@ -685,10 +688,17 @@ function cleanupStaleInstance(
|
|||
export function reapOrphanedNextServerProcesses(
|
||||
stderr: WritableLike,
|
||||
packageRoot = DEFAULT_PACKAGE_ROOT,
|
||||
deps: {
|
||||
execSync?: typeof execSync;
|
||||
readlinkSync?: typeof readlinkSync;
|
||||
kill?: typeof process.kill;
|
||||
platform?: NodeJS.Platform;
|
||||
} = {},
|
||||
): { reaped: number; errors: string[] } {
|
||||
const errors: string[] = [];
|
||||
let reaped = 0;
|
||||
if (process.platform === "win32") {
|
||||
const platform = deps.platform ?? process.platform;
|
||||
if (platform === "win32") {
|
||||
// Windows orphan detection not implemented; rely on port-kill fallback
|
||||
return { reaped: 0, errors: [] };
|
||||
}
|
||||
|
|
@ -696,10 +706,10 @@ export function reapOrphanedNextServerProcesses(
|
|||
// Find next-server processes with cwd matching our standalone host path
|
||||
const standalonePath = resolve(packageRoot, "dist", "web", "standalone");
|
||||
// Use ps to find node processes with next-server in their command line
|
||||
const psOutput = execSync(
|
||||
const psOutput = (deps.execSync ?? execSync)(
|
||||
"ps -eo pid,ppid,cmd,comm --no-headers",
|
||||
{ encoding: "utf8", timeout: 5000 },
|
||||
);
|
||||
) as string;
|
||||
const lines = psOutput.split("\n").filter((line) => line.trim());
|
||||
for (const line of lines) {
|
||||
const parts = line.trim().split(/\s+/);
|
||||
|
|
@ -715,7 +725,7 @@ export function reapOrphanedNextServerProcesses(
|
|||
// Check if the process cwd matches our standalone path (or deleted variant)
|
||||
let cwd: string | null = null;
|
||||
try {
|
||||
cwd = readFileSync(`/proc/${pid}/cwd`, "utf8").trim();
|
||||
cwd = (deps.readlinkSync ?? readlinkSync)(`/proc/${pid}/cwd`);
|
||||
} catch {
|
||||
// Process may have exited between ps and readlink
|
||||
continue;
|
||||
|
|
@ -728,7 +738,7 @@ export function reapOrphanedNextServerProcesses(
|
|||
const isOrphan = ppid === 1;
|
||||
if (isOrphan) {
|
||||
try {
|
||||
process.kill(pid, "SIGTERM");
|
||||
(deps.kill ?? process.kill)(pid, "SIGTERM");
|
||||
reaped++;
|
||||
stderr.write(
|
||||
`[forge] Reaped orphaned next-server (pid=${pid}, cwd=${cwd})\n`,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue