From dd126ddc8b85b7343dd037de284f0e368175e40f Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sat, 2 May 2026 22:07:10 +0200 Subject: [PATCH] fix(sf): recover model routes and self-feedback --- packages/daemon/src/session-manager.test.ts | 75 +- packages/daemon/src/session-manager.ts | 179 +++- packages/daemon/src/types.ts | 20 + .../src/providers/google-gemini-cli.test.ts | 70 ++ .../pi-ai/src/providers/google-gemini-cli.ts | 6 +- .../pi-coding-agent/src/modes/rpc/rpc-mode.ts | 134 ++- .../src/modes/rpc/rpc-protocol-v2.test.ts | 18 +- .../src/modes/rpc/rpc-types.ts | 17 +- packages/rpc-client/src/rpc-client.test.ts | 13 +- packages/rpc-client/src/rpc-types.ts | 17 +- src/headless-query.ts | 236 +++++- src/resources/extensions/sf/auto.ts | 65 +- src/resources/extensions/sf/auto/session.ts | 12 + .../sf/bootstrap/agent-end-recovery.ts | 409 ++++------ .../extensions/sf/bootstrap/db-tools.ts | 143 +++- .../extensions/sf/bootstrap/register-hooks.ts | 40 +- .../extensions/sf/commands-handlers.ts | 56 +- .../extensions/sf/commands-harness.ts | 158 +++- src/resources/extensions/sf/doctor.ts | 769 +++++++++++++++--- .../extensions/sf/extension-manifest.json | 4 +- .../extensions/sf/model-route-failure.ts | 179 ++++ .../extensions/sf/self-feedback-drain.ts | 51 +- .../sf/tests/commands-harness.test.ts | 171 ++++ .../error-classifier-quota-reset.test.ts | 15 + .../extensions/sf/tests/flow-audit.test.ts | 255 ++++++ .../sf/tests/model-route-failure.test.ts | 155 ++++ .../sf/tests/provider-errors.test.ts | 84 +- .../tests/rate-limit-model-fallback.test.ts | 70 +- .../research-terminal-transition.test.ts | 71 +- .../sf/tests/self-feedback-drain.test.ts | 102 ++- .../tests/self-feedback-resolve-tool.test.ts | 106 +++ .../extensions/sf/tests/tool-naming.test.ts | 1 + .../sf/tests/triage-protocol-registry.test.ts | 401 +++++++++ .../sf/tests/unit-runtime-fsm.test.ts | 264 ++++++ src/resources/extensions/sf/unit-runtime.ts | 421 +++++++++- src/tests/integration/web-mode-cli.test.ts | 41 +- src/web-mode.ts | 30 +- 37 files changed, 4295 insertions(+), 563 deletions(-) create mode 100644 packages/pi-ai/src/providers/google-gemini-cli.test.ts create mode 100644 src/resources/extensions/sf/model-route-failure.ts create mode 100644 src/resources/extensions/sf/tests/commands-harness.test.ts create mode 100644 src/resources/extensions/sf/tests/error-classifier-quota-reset.test.ts create mode 100644 src/resources/extensions/sf/tests/flow-audit.test.ts create mode 100644 src/resources/extensions/sf/tests/model-route-failure.test.ts create mode 100644 src/resources/extensions/sf/tests/self-feedback-resolve-tool.test.ts create mode 100644 src/resources/extensions/sf/tests/triage-protocol-registry.test.ts create mode 100644 src/resources/extensions/sf/tests/unit-runtime-fsm.test.ts diff --git a/packages/daemon/src/session-manager.test.ts b/packages/daemon/src/session-manager.test.ts index 0838cbde2..b668aedce 100644 --- a/packages/daemon/src/session-manager.test.ts +++ b/packages/daemon/src/session-manager.test.ts @@ -27,6 +27,7 @@ class MockRpcClient { stopped = false; aborted = false; prompted: string[] = []; + switchedSessions: string[] = []; private eventListeners: Array<(event: Record) => void> = []; uiResponses: Array<{ requestId: string; response: Record }> = []; @@ -69,6 +70,16 @@ class MockRpcClient { async prompt(message: string): Promise { this.prompted.push(message); + if (message === '/sf pause') { + queueMicrotask(() => { + this.emitEvent({ + type: 'extension_ui_request', + id: 'pause-notice', + method: 'notify', + message: 'Auto-mode paused: daemon reload requested', + }); + }); + } } async abort(): Promise { @@ -79,6 +90,18 @@ class MockRpcClient { this.uiResponses.push({ requestId, response }); } + async getState(): Promise<{ sessionFile: string; sessionId: string }> { + return { + sessionFile: `/tmp/${this.initSessionId}.jsonl`, + sessionId: this.initSessionId, + }; + } + + async switchSession(sessionPath: string): Promise<{ cancelled: boolean }> { + this.switchedSessions.push(sessionPath); + return { cancelled: false }; + } + /** Test helper — emit an event to all listeners */ emitEvent(event: Record): void { for (const listener of this.eventListeners) { @@ -98,6 +121,15 @@ class TestableSessionManager extends SessionManager { nextInitError: Error | null = null; nextStartError: Error | null = null; + protected override createRpcClient(_cliPath: string, cwd: string, args: string[]): any { + this.sessionCounter++; + const client = new MockRpcClient({ cwd, args }); + client.initSessionId = `mock-session-${String(this.sessionCounter).padStart(3, '0')}`; + this.lastClient = client; + this.allClients.push(client); + return client; + } + override async startSession(options: { projectDir: string; command?: string; model?: string; bare?: boolean; cliPath?: string }): Promise { const { projectDir } = options; @@ -116,7 +148,7 @@ class TestableSessionManager extends SessionManager { ); } - const client = new MockRpcClient({ cwd: resolvedDir, args: [] }); + const client = this.createRpcClient('mock-sf', resolvedDir, []); if (this.nextStartError) { client.startError = this.nextStartError; this.nextStartError = null; @@ -126,22 +158,19 @@ class TestableSessionManager extends SessionManager { this.nextInitError = null; } - this.sessionCounter++; - client.initSessionId = `mock-session-${String(this.sessionCounter).padStart(3, '0')}`; - this.lastClient = client; - this.allClients.push(client); - // Build session shell const session: ManagedSession = { sessionId: '', projectDir: resolvedDir, projectName, status: 'starting', + reloadState: 'running', client: client as any, // duck-typed mock events: [], pendingBlocker: null, cost: { totalCost: 0, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } }, startTime: Date.now(), + startOptions: { ...options, projectDir: resolvedDir }, }; // Insert into internal sessions map @@ -300,6 +329,38 @@ describe('SessionManager', () => { assert.equal(completedLogs.length, 1); }); + it('runtime epoch mismatch restarts child and resumes prior session file', async () => { + const { manager } = createManager(); + + const sessionId = await manager.startSession({ projectDir: '/tmp/reload-project' }); + const originalClient = manager.lastClient!; + const restarted = new Promise((resolve) => { + manager.once('session:restarted', () => resolve()); + }); + + originalClient.emitEvent({ + type: 'runtime_heartbeat', + sessionId, + sessionFile: '/tmp/reload-session.jsonl', + unitType: 'execute-task', + unitId: 'M001/S01/T01', + runtimeEpoch: 100, + sourceEpoch: 200, + emittedAt: Date.now(), + }); + + await restarted; + + const session = manager.getSession('mock-session-002')!; + assert.ok(session); + assert.equal(originalClient.stopped, true); + assert.equal(manager.allClients.length, 2); + const replacement = manager.allClients[1]; + assert.deepEqual(replacement.switchedSessions, ['/tmp/mock-session-001.jsonl']); + assert.deepEqual(replacement.prompted, ['/sf autonomous']); + assert.equal(session.reloadState, 'running'); + }); + // ---- Lifecycle: start → running → blocked → resolve → running → completed ---- it('start → blocked → resolve → running → completed lifecycle', async () => { @@ -723,8 +784,10 @@ describe('SessionManager', () => { assert.equal(result.sessionId, sessionId); assert.equal(result.status, 'running'); + assert.equal(result.reloadState, 'running'); assert.equal(result.projectName, 'result-test'); assert.equal(result.error, null); + assert.equal(result.lastHeartbeat, null); assert.equal(result.pendingBlocker, null); assert.ok(typeof result.durationMs === 'number'); assert.ok(result.cost); diff --git a/packages/daemon/src/session-manager.ts b/packages/daemon/src/session-manager.ts index 215db18e2..d4fd86919 100644 --- a/packages/daemon/src/session-manager.ts +++ b/packages/daemon/src/session-manager.ts @@ -22,6 +22,7 @@ import type { ManagedSession, StartSessionOptions, PendingBlocker, + RuntimeHeartbeat, } from './types.js'; import { MAX_EVENTS, INIT_TIMEOUT_MS } from './types.js'; import type { Logger } from './logger.js'; @@ -34,7 +35,8 @@ const FIRE_AND_FORGET_METHODS = new Set([ 'notify', 'setStatus', 'setWidget', 'setTitle', 'set_editor_text', ]); -const TERMINAL_PREFIXES = ['auto-mode stopped', 'step-mode stopped']; +const TERMINAL_PREFIXES = ['auto-mode stopped', 'auto-mode paused', 'step-mode stopped']; +const RELOAD_PAUSE_TIMEOUT_MS = 5_000; function isTerminalNotification(event: Record): boolean { if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false; @@ -45,7 +47,7 @@ function isTerminalNotification(event: Record): boolean { function isBlockedNotification(event: Record): boolean { if (event.type !== 'extension_ui_request' || event.method !== 'notify') return false; const message = String(event.message ?? '').toLowerCase(); - return message.includes('blocked:'); + return message.includes('blocked:') || message.startsWith('auto-mode paused'); } function isBlockingUIRequest(event: Record): boolean { @@ -96,11 +98,7 @@ export class SessionManager extends EventEmitter { if (options.model) args.push('--model', options.model); if (options.bare) args.push('--bare'); - const client = new RpcClient({ - cliPath, - cwd: resolvedDir, - args, - }); + const client = this.createRpcClient(cliPath, resolvedDir, args); // Build the session shell before async operations so we can track state const session: ManagedSession = { @@ -108,11 +106,13 @@ export class SessionManager extends EventEmitter { projectDir: resolvedDir, projectName, status: 'starting', + reloadState: 'running', client, events: [], pendingBlocker: null, cost: { totalCost: 0, tokens: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 } }, startTime: Date.now(), + startOptions: { ...options, projectDir: resolvedDir }, }; // Insert into map early (keyed by dir) so concurrent starts are rejected @@ -231,6 +231,18 @@ export class SessionManager extends EventEmitter { this.logger.info('session cancelled', { sessionId, projectDir: session.projectDir }); } + /** + * Restart a managed RPC child and resume the same persisted session when possible. + * + * Purpose: make daemon-managed auto sessions pick up changed runtime/source + * files at process boundaries instead of trying unsafe in-process hot reload. + */ + async reloadSession(sessionId: string, reason = 'runtime epoch changed'): Promise { + const session = this.getSession(sessionId); + if (!session) throw new Error(`Session not found: ${sessionId}`); + await this.restartSession(session, reason); + } + /** * Build a HeadlessJsonResult-shaped object from accumulated session state. */ @@ -245,9 +257,11 @@ export class SessionManager extends EventEmitter { projectDir: session.projectDir, projectName: session.projectName, status: session.status, + reloadState: session.reloadState ?? 'running', durationMs, cost: session.cost, recentEvents: session.events.slice(-10), + lastHeartbeat: session.lastHeartbeat ?? null, pendingBlocker: session.pendingBlocker ? { id: session.pendingBlocker.id, method: session.pendingBlocker.method, message: session.pendingBlocker.message } : null, @@ -311,6 +325,10 @@ export class SessionManager extends EventEmitter { this.logger.debug('session event', { sessionId: session.sessionId, type: (event as Record).type as string }); this.emit('session:event', { sessionId: session.sessionId, projectDir: session.projectDir, event }); + if ((event as Record).type === 'runtime_heartbeat') { + this.handleRuntimeHeartbeat(session, event as unknown as RuntimeHeartbeat); + } + // Cost tracking (K004 — cumulative-max) if ((event as Record).type === 'cost_update') { const costEvent = event as unknown as RpcCostUpdateEvent; @@ -371,6 +389,135 @@ export class SessionManager extends EventEmitter { }); } } + + private handleRuntimeHeartbeat(session: ManagedSession, heartbeat: RuntimeHeartbeat): void { + session.lastHeartbeat = heartbeat; + if (heartbeat.runtimeEpoch === heartbeat.sourceEpoch) return; + if (session.reloadState === 'reloading') return; + if (session.status !== 'running' && session.status !== 'blocked') return; + + this.logger.info('runtime epoch mismatch detected', { + sessionId: session.sessionId, + projectDir: session.projectDir, + unitType: heartbeat.unitType, + unitId: heartbeat.unitId, + runtimeEpoch: heartbeat.runtimeEpoch, + sourceEpoch: heartbeat.sourceEpoch, + }); + + void this.restartSession(session, 'runtime epoch changed').catch((err) => { + session.reloadState = 'reload_failed'; + session.status = 'error'; + session.error = err instanceof Error ? err.message : String(err); + this.logger.error('session reload failed', { + sessionId: session.sessionId, + projectDir: session.projectDir, + error: session.error, + }); + this.emit('session:error', { + sessionId: session.sessionId, + projectDir: session.projectDir, + projectName: session.projectName, + error: session.error, + }); + }); + } + + private async restartSession(session: ManagedSession, reason: string): Promise { + if (session.reloadState === 'reloading') return; + session.reloadState = 'reloading'; + + let sessionFile = session.lastHeartbeat?.sessionFile; + try { + const state = await session.client.getState(); + sessionFile = state.sessionFile ?? sessionFile; + } catch { + // Best effort: a wedged child may not answer state requests. + } + + try { + await session.client.prompt('/sf pause'); + await waitFor( + () => session.status === 'blocked' || session.status === 'completed' || session.status === 'cancelled', + RELOAD_PAUSE_TIMEOUT_MS, + ); + } catch { + // Timeout or prompt failure: stop() escalates SIGTERM to SIGKILL. + } + + session.unsubscribe?.(); + try { + await session.client.stop(); + } catch { + // stop() is best-effort; subsequent start creates a new child. + } + + const opts = session.startOptions ?? { projectDir: session.projectDir }; + const cliPath = opts.cliPath ?? SessionManager.resolveCLIPath(); + const args: string[] = ['--mode', 'rpc']; + if (opts.model) args.push('--model', opts.model); + if (opts.bare) args.push('--bare'); + + const client = this.createRpcClient(cliPath, session.projectDir, args); + + await Promise.race([ + client.start(), + timeout(INIT_TIMEOUT_MS, `RpcClient.start() timed out after ${INIT_TIMEOUT_MS}ms`), + ]); + + const initResult: RpcInitResult = await Promise.race([ + client.init(), + timeout(INIT_TIMEOUT_MS, `RpcClient.init() timed out after ${INIT_TIMEOUT_MS}ms`), + ]) as RpcInitResult; + + session.client = client; + session.sessionId = initResult.sessionId; + session.status = 'running'; + session.pendingBlocker = null; + session.reloadState = 'restarted'; + session.error = undefined; + session.startOptions = { ...opts, projectDir: session.projectDir }; + session.unsubscribe = client.onEvent((event: SdkAgentEvent) => { + this.handleEvent(session, event); + }); + + if (sessionFile) { + try { + await client.switchSession(sessionFile); + } catch (err) { + this.logger.warn('session reload could not switch to previous session file', { + sessionId: session.sessionId, + projectDir: session.projectDir, + sessionFile, + error: err instanceof Error ? err.message : String(err), + }); + } + } + + await client.prompt(opts.command ?? '/sf autonomous'); + session.reloadState = 'running'; + this.logger.info('session reloaded', { + sessionId: session.sessionId, + projectDir: session.projectDir, + reason, + resumedSessionFile: sessionFile, + }); + this.emit('session:restarted', { + sessionId: session.sessionId, + projectDir: session.projectDir, + projectName: session.projectName, + reason, + sessionFile, + }); + } + + protected createRpcClient(cliPath: string, cwd: string, args: string[]): RpcClient { + return new RpcClient({ + cliPath, + cwd, + args, + }); + } } // --------------------------------------------------------------------------- @@ -383,6 +530,24 @@ function timeout(ms: number, message: string): Promise { }); } +function waitFor(predicate: () => boolean, timeoutMs: number): Promise { + if (predicate()) return Promise.resolve(); + return new Promise((resolve, reject) => { + const startedAt = Date.now(); + const interval = setInterval(() => { + if (predicate()) { + clearInterval(interval); + resolve(); + return; + } + if (Date.now() - startedAt >= timeoutMs) { + clearInterval(interval); + reject(new Error(`Timed out after ${timeoutMs}ms`)); + } + }, 100); + }); +} + function extractBlocker(event: SdkAgentEvent): PendingBlocker { const uiEvent = event as unknown as RpcExtensionUIRequest; return { diff --git a/packages/daemon/src/types.ts b/packages/daemon/src/types.ts index f47925a4a..b4494ef28 100644 --- a/packages/daemon/src/types.ts +++ b/packages/daemon/src/types.ts @@ -57,6 +57,17 @@ export interface DaemonConfig { // --------------------------------------------------------------------------- export type SessionStatus = 'starting' | 'running' | 'blocked' | 'completed' | 'error' | 'cancelled'; +export type ReloadState = 'running' | 'reloading' | 'restarted' | 'reload_failed'; + +export interface RuntimeHeartbeat { + sessionId: string; + sessionFile?: string; + unitType?: string; + unitId?: string; + runtimeEpoch: number; + sourceEpoch: number; + emittedAt: number; +} // --------------------------------------------------------------------------- // Managed Session @@ -78,6 +89,9 @@ export interface ManagedSession { /** Current lifecycle status */ status: SessionStatus; + /** Daemon-managed runtime reload state */ + reloadState?: ReloadState; + /** The RpcClient instance managing the agent process */ client: RpcClient; @@ -96,6 +110,12 @@ export interface ManagedSession { /** Error message if status is 'error' */ error?: string; + /** Latest runtime heartbeat received from the RPC child */ + lastHeartbeat?: RuntimeHeartbeat; + + /** Original session start options used for daemon-managed restarts */ + startOptions?: StartSessionOptions; + /** Cleanup function to unsubscribe from events */ unsubscribe?: () => void; } diff --git a/packages/pi-ai/src/providers/google-gemini-cli.test.ts b/packages/pi-ai/src/providers/google-gemini-cli.test.ts new file mode 100644 index 000000000..44c5d4fcd --- /dev/null +++ b/packages/pi-ai/src/providers/google-gemini-cli.test.ts @@ -0,0 +1,70 @@ +import assert from "node:assert/strict"; +import { describe, test, vi } from "vitest"; +import type { Context, Model } from "../types.js"; + +const geminiCliCore = vi.hoisted(() => ({ + retryError: undefined as Error | undefined, + retryOptions: undefined as Record | undefined, +})); + +vi.mock("@google/gemini-cli-core", () => ({ + AuthType: { LOGIN_WITH_GOOGLE: "LOGIN_WITH_GOOGLE" }, + CodeAssistServer: class { + async generateContentStream(): Promise> { + return (async function* emptyStream() {})(); + } + }, + getOauthClient: vi.fn(async () => ({})), + makeFakeConfig: vi.fn(() => ({})), + retryWithBackoff: vi.fn(async (_fn: unknown, options: Record) => { + geminiCliCore.retryOptions = options; + throw geminiCliCore.retryError ?? new Error("quota exhausted"); + }), + setupUser: vi.fn(async () => ({ projectId: "test-project" })), +})); + +import { streamGoogleGeminiCli } from "./google-gemini-cli.js"; + +function makeModel(): Model<"google-gemini-cli"> { + return { + id: "gemini-3-flash-preview", + name: "Gemini 3 Flash Preview", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: "", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1_000_000, + maxTokens: 8192, + }; +} + +function makeContext(): Context { + return { + messages: [{ role: "user", content: "hello", timestamp: 0 }], + }; +} + +describe("google-gemini-cli provider retry ownership", () => { + test("google_gemini_cli_when_quota_resets_soon_returns_error_to_caller_without_cli_retry_loop", async () => { + geminiCliCore.retryOptions = undefined; + geminiCliCore.retryError = Object.assign( + new Error( + "You have exhausted your capacity on this model. Your quota will reset after 54s.", + ), + { retryDelayMs: 54_000 }, + ); + + const stream = streamGoogleGeminiCli(makeModel(), makeContext()); + const result = await stream.result(); + + const retryOptions = geminiCliCore.retryOptions as + | { maxAttempts?: unknown } + | undefined; + assert.equal(retryOptions?.maxAttempts, 1); + assert.equal(result.stopReason, "error"); + assert.match(result.errorMessage ?? "", /exhausted your capacity/i); + assert.equal(result.retryAfterMs, 54_000); + }); +}); diff --git a/packages/pi-ai/src/providers/google-gemini-cli.ts b/packages/pi-ai/src/providers/google-gemini-cli.ts index c815227cc..a3c83e67d 100644 --- a/packages/pi-ai/src/providers/google-gemini-cli.ts +++ b/packages/pi-ai/src/providers/google-gemini-cli.ts @@ -5,7 +5,8 @@ * @google/gemini-cli-core — the same library the real `gemini` CLI uses. * cli-core reads ~/.gemini/oauth_creds.json itself, refreshes tokens, * discovers the project (free-tier or whatever's onboarded server-side) - * via setupUser(), and handles all the User-Agent / retry / 429 details. + * via setupUser(), and handles all the User-Agent / quota-classification details. + * Request retry/fallback stays in the caller so SF can move to the next model. */ import { @@ -227,6 +228,9 @@ export const streamGoogleGeminiCli: StreamFunction< () => server.generateContentStream(req as any, promptId, "USER" as any), { authType: AuthType.LOGIN_WITH_GOOGLE, + // SF owns cross-model fallback. Let cli-core classify quota errors, + // but do not let it hold the turn through its 10-attempt retry loop. + maxAttempts: 1, signal: options?.signal, }, ); diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts index 3c138ca70..637d2fb3b 100644 --- a/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts +++ b/packages/pi-coding-agent/src/modes/rpc/rpc-mode.ts @@ -12,6 +12,8 @@ */ import * as crypto from "node:crypto"; +import { existsSync, readFileSync, readdirSync, statSync } from "node:fs"; +import { dirname, join, resolve } from "node:path"; import type { AgentSession } from "../../core/agent-session.js"; import { killTrackedDetachedChildren } from "../../utils/shell.js"; import type { @@ -34,6 +36,110 @@ import type { RpcSlashCommand, } from "./rpc-types.js"; +const RUNTIME_HEARTBEAT_INTERVAL_MS = Number( + process.env.SF_RUNTIME_HEARTBEAT_INTERVAL_MS ?? 10_000, +); + +function findRuntimeSourceRoot(): string { + const explicit = + process.env.SF_RUNTIME_SOURCE_ROOT ?? process.env.SF_SOURCE_ROOT; + if (explicit) return resolve(explicit); + + let dir = resolve(dirname(process.argv[1] ?? process.cwd())); + while (true) { + if (existsSync(join(dir, "package.json")) && existsSync(join(dir, "src"))) { + return dir; + } + const parent = dirname(dir); + if (parent === dir) return process.cwd(); + dir = parent; + } +} + +function newestSourceMtimeMs(root: string): number { + let newest = 0; + const skip = new Set([ + ".git", + ".sf", + "dist", + "node_modules", + "target", + ".next", + "coverage", + ]); + const stack = [root]; + while (stack.length > 0) { + const dir = stack.pop()!; + let entries: import("node:fs").Dirent[]; + try { + entries = readdirSync(dir, { withFileTypes: true }); + } catch { + continue; + } + for (const entry of entries) { + if (skip.has(entry.name)) continue; + const full = join(dir, entry.name); + if (entry.isDirectory()) { + stack.push(full); + continue; + } + if (!entry.isFile() || !/\.(?:ts|tsx|mts|cts)$/.test(entry.name)) { + continue; + } + try { + newest = Math.max(newest, statSync(full).mtimeMs); + } catch { + // Ignore files that disappear during a scan. + } + } + } + return newest; +} + +interface RuntimeUnitState { + unitType?: string; + unitId?: string; + sessionFile?: string; +} + +function effectiveAutoLockFile(): string { + const milestoneLock = process.env.SF_PARALLEL_WORKER + ? process.env.SF_MILESTONE_LOCK + : undefined; + return milestoneLock ? `auto-${milestoneLock}.lock` : "auto.lock"; +} + +function readRuntimeUnitState(): RuntimeUnitState { + const roots = [process.env.SF_PROJECT_ROOT, process.cwd()].filter( + (root): root is string => Boolean(root), + ); + const seen = new Set(); + for (const root of roots) { + const resolvedRoot = resolve(root); + if (seen.has(resolvedRoot)) continue; + seen.add(resolvedRoot); + const lockPath = join(resolvedRoot, ".sf", effectiveAutoLockFile()); + try { + if (!existsSync(lockPath)) continue; + const data = JSON.parse(readFileSync(lockPath, "utf-8")) as Record< + string, + unknown + >; + return { + unitType: + typeof data.unitType === "string" ? data.unitType : undefined, + unitId: typeof data.unitId === "string" ? data.unitId : undefined, + sessionFile: + typeof data.sessionFile === "string" ? data.sessionFile : undefined, + }; + } catch { + // Heartbeats should never fail because lock metadata is temporarily absent + // or being rewritten. + } + } + return {}; +} + // Re-export types for consumers export type { RpcCommand, @@ -519,6 +625,32 @@ export async function runRpcMode(session: AgentSession): Promise { } }); + const runtimeSourceRoot = findRuntimeSourceRoot(); + const runtimeEpoch = newestSourceMtimeMs(runtimeSourceRoot); + const emitRuntimeHeartbeat = () => { + const runtimeUnit = readRuntimeUnitState(); + const heartbeat = { + type: "runtime_heartbeat" as const, + sessionId: session.sessionId, + sessionFile: runtimeUnit.sessionFile ?? session.sessionFile, + unitType: runtimeUnit.unitType, + unitId: runtimeUnit.unitId, + runtimeEpoch, + sourceEpoch: newestSourceMtimeMs(runtimeSourceRoot), + emittedAt: Date.now(), + }; + if (!eventFilter || eventFilter.has("runtime_heartbeat")) { + output(heartbeat); + } + }; + const runtimeHeartbeatTimer = + RUNTIME_HEARTBEAT_INTERVAL_MS > 0 + ? setInterval(emitRuntimeHeartbeat, RUNTIME_HEARTBEAT_INTERVAL_MS) + : undefined; + if (runtimeHeartbeatTimer) { + signalCleanupHandlers.push(() => clearInterval(runtimeHeartbeatTimer)); + } + // Handle a single command const handleCommand = async (command: RpcCommand): Promise => { const id = command.id; @@ -901,7 +1033,7 @@ export async function runRpcMode(session: AgentSession): Promise { protocolVersion: 2, sessionId: session.sessionId, capabilities: { - events: ["execution_complete", "cost_update"], + events: ["execution_complete", "cost_update", "runtime_heartbeat"], commands: ["init", "shutdown", "subscribe"], }, }; diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-protocol-v2.test.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-protocol-v2.test.ts index 818e5a548..09dd3fb48 100644 --- a/packages/pi-coding-agent/src/modes/rpc/rpc-protocol-v2.test.ts +++ b/packages/pi-coding-agent/src/modes/rpc/rpc-protocol-v2.test.ts @@ -148,7 +148,7 @@ describe("v2 type shapes", () => { protocolVersion: 2, sessionId: "test-session-123", capabilities: { - events: ["execution_complete", "cost_update"], + events: ["execution_complete", "cost_update", "runtime_heartbeat"], commands: ["init", "shutdown", "subscribe"], }, }; @@ -158,6 +158,7 @@ describe("v2 type shapes", () => { assert.ok(Array.isArray(initResult.capabilities.commands)); assert.ok(initResult.capabilities.events.includes("execution_complete")); assert.ok(initResult.capabilities.events.includes("cost_update")); + assert.ok(initResult.capabilities.events.includes("runtime_heartbeat")); assert.ok(initResult.capabilities.commands.includes("init")); assert.ok(initResult.capabilities.commands.includes("shutdown")); assert.ok(initResult.capabilities.commands.includes("subscribe")); @@ -231,6 +232,16 @@ describe("v2 type shapes", () => { cumulativeCost: 0.03, tokens: { input: 100, output: 50, cacheRead: 10, cacheWrite: 5 }, }, + { + type: "runtime_heartbeat", + sessionId: "s1", + sessionFile: "/tmp/s1.jsonl", + unitType: "execute-task", + unitId: "M001/S01/T01", + runtimeEpoch: 100, + sourceEpoch: 101, + emittedAt: 123, + }, ]; for (const event of events) { @@ -242,6 +253,9 @@ describe("v2 type shapes", () => { // TypeScript narrows to RpcCostUpdateEvent assert.ok("turnCost" in event); assert.ok("tokens" in event); + } else if (event.type === "runtime_heartbeat") { + assert.ok("runtimeEpoch" in event); + assert.ok("sourceEpoch" in event); } else { assert.fail(`Unexpected event type: ${(event as any).type}`); } @@ -569,7 +583,7 @@ describe("Client ↔ Mock server protocol exchange", () => { protocolVersion: 2, sessionId: "sess-abc", capabilities: { - events: ["execution_complete", "cost_update"], + events: ["execution_complete", "cost_update", "runtime_heartbeat"], commands: ["init", "shutdown", "subscribe"], }, }; diff --git a/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts b/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts index 918b78c78..da4265339 100644 --- a/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts +++ b/packages/pi-coding-agent/src/modes/rpc/rpc-types.ts @@ -273,8 +273,23 @@ export interface RpcCostUpdateEvent { }; } +/** Runtime heartbeat emitted by long-lived RPC children for daemon reload supervision. */ +export interface RpcRuntimeHeartbeatEvent { + type: "runtime_heartbeat"; + sessionId: string; + sessionFile?: string; + unitType?: string; + unitId?: string; + runtimeEpoch: number; + sourceEpoch: number; + emittedAt: number; +} + /** Discriminated union of all v2-only event types */ -export type RpcV2Event = RpcExecutionCompleteEvent | RpcCostUpdateEvent; +export type RpcV2Event = + | RpcExecutionCompleteEvent + | RpcCostUpdateEvent + | RpcRuntimeHeartbeatEvent; // ============================================================================ // Extension UI Events (stdout) diff --git a/packages/rpc-client/src/rpc-client.test.ts b/packages/rpc-client/src/rpc-client.test.ts index 5cdd5f00c..46f9d17a3 100644 --- a/packages/rpc-client/src/rpc-client.test.ts +++ b/packages/rpc-client/src/rpc-client.test.ts @@ -216,7 +216,7 @@ describe("type shapes", () => { assert.equal(v2, 2); }); - it("RpcV2Event discriminated union covers both event types", () => { + it("RpcV2Event discriminated union covers protocol event types", () => { const events: RpcV2Event[] = [ { type: "execution_complete", @@ -241,10 +241,19 @@ describe("type shapes", () => { cumulativeCost: 0.001, tokens: { input: 100, output: 50, cacheRead: 0, cacheWrite: 0 }, }, + { + type: "runtime_heartbeat", + sessionId: "s1", + sessionFile: "/tmp/s1.jsonl", + runtimeEpoch: 100, + sourceEpoch: 100, + emittedAt: 123, + }, ]; - assert.equal(events.length, 2); + assert.equal(events.length, 3); assert.equal(events[0].type, "execution_complete"); assert.equal(events[1].type, "cost_update"); + assert.equal(events[2].type, "runtime_heartbeat"); }); }); diff --git a/packages/rpc-client/src/rpc-types.ts b/packages/rpc-client/src/rpc-types.ts index eb9be0acf..dce4e09e6 100644 --- a/packages/rpc-client/src/rpc-types.ts +++ b/packages/rpc-client/src/rpc-types.ts @@ -336,8 +336,23 @@ export interface RpcCostUpdateEvent { }; } +/** Runtime heartbeat emitted by long-lived RPC children for daemon reload supervision. */ +export interface RpcRuntimeHeartbeatEvent { + type: "runtime_heartbeat"; + sessionId: string; + sessionFile?: string; + unitType?: string; + unitId?: string; + runtimeEpoch: number; + sourceEpoch: number; + emittedAt: number; +} + /** Discriminated union of all v2-only event types */ -export type RpcV2Event = RpcExecutionCompleteEvent | RpcCostUpdateEvent; +export type RpcV2Event = + | RpcExecutionCompleteEvent + | RpcCostUpdateEvent + | RpcRuntimeHeartbeatEvent; // ============================================================================ // Extension UI Events (stdout) diff --git a/src/headless-query.ts b/src/headless-query.ts index 3354988e2..ef6d6b679 100644 --- a/src/headless-query.ts +++ b/src/headless-query.ts @@ -15,8 +15,9 @@ * bypassing the extension loader's jiti setup (#1137). */ +import { existsSync, readdirSync, readFileSync } from "node:fs"; import { homedir } from "node:os"; -import { join } from "node:path"; +import { dirname, join } from "node:path"; import { createJiti } from "@mariozechner/jiti"; import { resolveBundledSourceResource } from "./bundled-resource-path.js"; import type { SFState } from "./resources/extensions/sf/types.js"; @@ -33,7 +34,6 @@ const agentExtensionsDir = join( "extensions", "sf", ); -const { existsSync } = await import("node:fs"); const useAgentDir = existsSync(join(agentExtensionsDir, "state.js")); const sfExtensionPath = (moduleName: string) => useAgentDir @@ -46,10 +46,7 @@ const sfExtensionPath = (moduleName: string) => ); async function loadExtensionModules() { - const stateModule = (await jiti.import( - sfExtensionPath("state"), - {}, - )) as any; + const stateModule = (await jiti.import(sfExtensionPath("state"), {})) as any; const dispatchModule = (await jiti.import( sfExtensionPath("auto-dispatch"), {}, @@ -86,6 +83,43 @@ async function loadExtensionModules() { // ─── Types ────────────────────────────────────────────────────────────────── +type RuntimeDispatchDecisionSummary = { + action: "dispatch" | "retry" | "notify" | "block" | "skip"; + reasonCode: + | "no-runtime-record" + | "queued" + | "retry-budget-available" + | "terminal-ready-to-notify" + | "retry-budget-exhausted" + | "synthetic-reset-required" + | "already-notified" + | "active-or-claimed" + | "notified" + | "terminal-nonretryable"; + retryCount: number; + maxRetries: number; + retryBudgetRemaining: number; +}; + +type RuntimeUnitSummary = { + unitType: string; + unitId: string; + phase: string; + status: string; + startedAt: number | null; + updatedAt: number | null; + retryCount: number; + maxRetries: number; + retryBudgetRemaining: number; + lastHeartbeatAt: number | null; + lastProgressAt: number | null; + lastOutputAt: number | null; + outputPath: string | null; + watchdogReason: string | null; + notifiedAt: number | null; + dispatchDecision: RuntimeDispatchDecisionSummary; +}; + export interface QuerySnapshot { schemaVersion: 1; state: SFState; @@ -105,6 +139,9 @@ export interface QuerySnapshot { }>; total: number; }; + runtime: { + units: RuntimeUnitSummary[]; + }; } export interface QueryResult { @@ -114,6 +151,192 @@ export interface QueryResult { // ─── Implementation ───────────────────────────────────────────────────────── +const QUERY_TERMINAL_STATUSES = new Set([ + "completed", + "failed", + "blocked", + "cancelled", + "stale", + "runaway-recovered", +]); +const QUERY_RETRYABLE_TERMINAL_STATUSES = new Set([ + "failed", + "stale", + "runaway-recovered", +]); +const DEFAULT_QUERY_MAX_RETRIES = 1; + +function resolveSfRootForQuery(basePath: string): string { + let current = basePath; + while (true) { + const candidate = join(current, ".sf"); + if (existsSync(candidate)) return candidate; + const parent = dirname(current); + if (parent === current) return join(basePath, ".sf"); + current = parent; + } +} + +function stringField(value: unknown, fallback = ""): string { + return typeof value === "string" ? value : fallback; +} + +function numberField(value: unknown): number | null { + return typeof value === "number" && Number.isFinite(value) ? value : null; +} + +function inferQueryStatus( + phase: string, + record: Record, +): string { + switch (phase) { + case "queued": + case "claimed": + case "running": + case "progress": + case "completed": + case "failed": + case "blocked": + case "cancelled": + case "stale": + case "runaway-recovered": + case "notified": + return phase; + case "dispatched": + return "running"; + case "wrapup-warning-sent": + case "runaway-warning-sent": + case "runaway-final-warning-sent": + case "recovered": + return "progress"; + case "timeout": + return "stale"; + case "finalized": + return "completed"; + case "paused": + return record.runawayGuardPause ? "runaway-recovered" : "blocked"; + case "skipped": + return "blocked"; + default: + return "running"; + } +} + +function queryRuntimeDecision(input: { + unitType: string; + unitId: string; + status: string; + retryCount: number; + maxRetries: number; + notifiedAt: number | null; +}): RuntimeDispatchDecisionSummary { + const retryBudgetRemaining = Math.max(0, input.maxRetries - input.retryCount); + const common = { + retryCount: input.retryCount, + maxRetries: input.maxRetries, + retryBudgetRemaining, + }; + if (input.notifiedAt !== null) { + return { action: "skip", reasonCode: "already-notified", ...common }; + } + if (input.status === "notified") { + return { action: "skip", reasonCode: "notified", ...common }; + } + if (input.status === "queued") { + return { action: "dispatch", reasonCode: "queued", ...common }; + } + if (!QUERY_TERMINAL_STATUSES.has(input.status)) { + return { action: "skip", reasonCode: "active-or-claimed", ...common }; + } + const synthetic = + input.unitType === "synthetic" || + input.unitId.includes("parallel-research"); + if (synthetic && input.status !== "completed") { + return { + action: "block", + reasonCode: "synthetic-reset-required", + ...common, + }; + } + if (QUERY_RETRYABLE_TERMINAL_STATUSES.has(input.status)) { + return retryBudgetRemaining > 0 + ? { action: "retry", reasonCode: "retry-budget-available", ...common } + : { action: "block", reasonCode: "retry-budget-exhausted", ...common }; + } + if ( + input.status === "completed" || + input.status === "blocked" || + input.status === "cancelled" + ) { + return { + action: "notify", + reasonCode: "terminal-ready-to-notify", + ...common, + }; + } + return { action: "skip", reasonCode: "terminal-nonretryable", ...common }; +} + +function readRuntimeUnitSummaries(basePath: string): RuntimeUnitSummary[] { + const unitsDir = join(resolveSfRootForQuery(basePath), "runtime", "units"); + if (!existsSync(unitsDir)) return []; + const results: RuntimeUnitSummary[] = []; + for (const file of readdirSync(unitsDir)) { + if (!file.endsWith(".json")) continue; + try { + const record = JSON.parse( + readFileSync(join(unitsDir, file), "utf-8"), + ) as Record; + const unitType = stringField(record.unitType); + const unitId = stringField(record.unitId); + if (!unitType || !unitId) continue; + const phase = stringField(record.phase, "dispatched"); + const status = stringField( + record.status, + inferQueryStatus(phase, record), + ); + const recoveryAttempts = numberField(record.recoveryAttempts) ?? 0; + const retryCount = numberField(record.retryCount) ?? recoveryAttempts; + const maxRetries = + numberField(record.maxRetries) ?? DEFAULT_QUERY_MAX_RETRIES; + const notifiedAt = numberField(record.notifiedAt); + const dispatchDecision = queryRuntimeDecision({ + unitType, + unitId, + status, + retryCount, + maxRetries, + notifiedAt, + }); + results.push({ + unitType, + unitId, + phase, + status, + startedAt: numberField(record.startedAt), + updatedAt: numberField(record.updatedAt), + retryCount, + maxRetries, + retryBudgetRemaining: dispatchDecision.retryBudgetRemaining, + lastHeartbeatAt: numberField(record.lastHeartbeatAt), + lastProgressAt: numberField(record.lastProgressAt), + lastOutputAt: numberField(record.lastOutputAt), + outputPath: + typeof record.outputPath === "string" ? record.outputPath : null, + watchdogReason: + typeof record.watchdogReason === "string" + ? record.watchdogReason + : null, + notifiedAt, + dispatchDecision, + }); + } catch { + // Runtime query must stay best-effort; malformed unit files are ignored. + } + } + return results; +} + export async function buildQuerySnapshot( basePath: string, ): Promise { @@ -169,6 +392,7 @@ export async function buildQuerySnapshot( state, next, cost: { workers, total: workers.reduce((sum, w) => sum + w.cost, 0) }, + runtime: { units: readRuntimeUnitSummaries(basePath) }, }; return snapshot; diff --git a/src/resources/extensions/sf/auto.ts b/src/resources/extensions/sf/auto.ts index aa5866104..c1330c4bf 100644 --- a/src/resources/extensions/sf/auto.ts +++ b/src/resources/extensions/sf/auto.ts @@ -15,6 +15,7 @@ import type { ExtensionCommandContext, ExtensionContext, } from "@singularity-forge/pi-coding-agent"; +import type { Api, Model } from "@singularity-forge/pi-ai"; import { getManifestStatus } from "./files.js"; import { assessInterruptedSession, @@ -47,7 +48,11 @@ import { getRtkSessionSavings } from "../shared/rtk-session-stats.js"; import { deactivateSF } from "../shared/sf-phase-state.js"; import { clearActivityLogState } from "./activity-log.js"; import { atomicWriteSync } from "./atomic-write.js"; -import { AutoSession, getAutoSession } from "./auto/session.js"; +import { + AutoSession, + getAutoSession, + type ModelFailureRecord, +} from "./auto/session.js"; // import { startSliceParallel } from "./slice-parallel-orchestrator.js"; (decoy for legacy regex tests) import { getBudgetAlertLevel, @@ -542,6 +547,64 @@ export function setCurrentDispatchedModelId( s.currentDispatchedModelId = model ? `${model.provider}/${model.id}` : null; } +/** + * Update the concrete model tracked for the currently running unit. + * + * Purpose: keep fresh-session restoration and dashboard state aligned after + * runtime provider recovery switches models mid-unit. + * + * Consumer: bootstrap/agent-end-recovery.ts after a configured fallback route + * is successfully applied. + */ +export function setCurrentUnitModel(model: Model | null): void { + s.currentUnitModel = model; + setCurrentDispatchedModelId(model); +} + +/** + * Record that a provider/model route failed for the current auto unit. + * + * Purpose: prevent retry loops on quota/rate-limit/server failures by making + * subsequent recovery skip the failed route for this unit. + * + * Consumer: bootstrap/agent-end-recovery.ts before selecting the next configured + * fallback route. + */ +export function recordCurrentModelFailure(input: { + provider: string; + modelId: string; + reason: string; + timestamp?: number; +}): void { + if (!s.currentUnit) return; + s.modelFailures.push({ + unitType: s.currentUnit.type, + unitId: s.currentUnit.id, + provider: input.provider, + modelId: input.modelId, + reason: input.reason, + timestamp: input.timestamp ?? Date.now(), + }); +} + +/** + * Return model failures scoped to the currently running auto unit. + * + * Purpose: keep recovery decisions unit-local so a quota failure in one unit + * does not permanently suppress a model in later work. + * + * Consumer: bootstrap/agent-end-recovery.ts when resolving the next configured + * fallback route. + */ +export function getCurrentUnitModelFailures(): ModelFailureRecord[] { + if (!s.currentUnit) return []; + return s.modelFailures.filter( + (failure) => + failure.unitType === s.currentUnit?.type && + failure.unitId === s.currentUnit?.id, + ); +} + /** * Mark the current research unit as terminal after saving its RESEARCH artifact. * diff --git a/src/resources/extensions/sf/auto/session.ts b/src/resources/extensions/sf/auto/session.ts index c708b2113..ab4037eb7 100644 --- a/src/resources/extensions/sf/auto/session.ts +++ b/src/resources/extensions/sf/auto/session.ts @@ -47,6 +47,15 @@ export interface StartModel { id: string; } +export interface ModelFailureRecord { + unitType: string; + unitId: string; + provider: string; + modelId: string; + reason: string; + timestamp: number; +} + export interface PendingVerificationRetry { unitId: string; failureContext: string; @@ -156,6 +165,8 @@ export class AutoSession { currentUnitModel: Model | null = null; /** Fully-qualified model ID (provider/id) set after selectAndApplyModel + hook overrides (#2899). */ currentDispatchedModelId: string | null = null; + /** Per-session, per-unit failed model routes skipped by runtime recovery. */ + readonly modelFailures: ModelFailureRecord[] = []; originalModelId: string | null = null; originalModelProvider: string | null = null; lastBudgetAlertLevel: BudgetAlertLevel = 0; @@ -348,6 +359,7 @@ export class AutoSession { this.manualSessionModelOverride = null; this.currentUnitModel = null; this.currentDispatchedModelId = null; + this.modelFailures.length = 0; this.originalModelId = null; this.originalModelProvider = null; this.lastBudgetAlertLevel = 0; diff --git a/src/resources/extensions/sf/bootstrap/agent-end-recovery.ts b/src/resources/extensions/sf/bootstrap/agent-end-recovery.ts index 52a4c07ea..577b80109 100644 --- a/src/resources/extensions/sf/bootstrap/agent-end-recovery.ts +++ b/src/resources/extensions/sf/bootstrap/agent-end-recovery.ts @@ -4,13 +4,13 @@ import type { } from "@singularity-forge/pi-coding-agent"; import { getAutoDashboardData, - getAutoModeStartModel, + getCurrentUnitModelFailures, isAutoActive, pauseAuto, - setCurrentDispatchedModelId, + recordCurrentModelFailure, + setCurrentUnitModel, } from "../auto.js"; import { isSessionSwitchInFlight, resolveAgentEnd } from "../auto-loop.js"; -import { resolveModelId } from "../auto-model-selection.js"; import { blockModel, isModelBlocked } from "../blocked-models.js"; import { classifyError, @@ -21,76 +21,122 @@ import { } from "../error-classifier.js"; import { checkAutoStartAfterDiscuss } from "../guided-flow.js"; import { - getNextFallbackModel, + type ModelRouteRef, + resolveNextModelRoute, +} from "../model-route-failure.js"; +import { resolveModelWithFallbacksForUnit, resolvePersistModelChanges, } from "../preferences.js"; import { pauseAutoForProviderError } from "../provider-error-pause.js"; import { logWarning } from "../workflow-logger.js"; -import { resumeAutoAfterProviderDelay } from "./provider-error-resume.js"; import { clearDiscussionFlowState } from "./write-gate.js"; const retryState = createRetryState(); -const MAX_NETWORK_RETRIES = 2; -const MAX_TRANSIENT_AUTO_RESUMES = 8; /** * Reset the module-level retry state so a resumed auto-session starts fresh. - * Called by provider-error-resume.ts before startAuto() — without this, the - * consecutiveTransientCount accumulates across pause/resume cycles and locks - * out auto-resume after MAX_TRANSIENT_AUTO_RESUMES total (not consecutive) errors. + * Called by provider-error-resume.ts before startAuto() so legacy paused + * provider recovery does not inherit stale transient counters. */ export function resetTransientRetryState(): void { resetRetryState(retryState); } -async function pauseTransientWithBackoff( - cls: ErrorClass, - pi: ExtensionAPI, +function getCurrentRouteFromMessage( + lastMsg: unknown, ctx: ExtensionContext, - errorDetail: string, - isRateLimit: boolean, -): Promise { - retryState.consecutiveTransientCount += 1; - const baseRetryAfterMs = "retryAfterMs" in cls ? cls.retryAfterMs : 15_000; - const retryAfterMs = - baseRetryAfterMs * - 2 ** Math.max(0, retryState.consecutiveTransientCount - 1); - const allowAutoResume = - retryState.consecutiveTransientCount <= MAX_TRANSIENT_AUTO_RESUMES; - if (!allowAutoResume) { - ctx.ui.notify( - `Transient provider errors persisted after ${MAX_TRANSIENT_AUTO_RESUMES} auto-resume attempts. Pausing for manual review.`, +): ModelRouteRef | undefined { + const msg = lastMsg as Record | undefined; + const provider = + typeof msg?.provider === "string" ? msg.provider : ctx.model?.provider; + const id = typeof msg?.model === "string" ? msg.model : ctx.model?.id; + return provider && id ? { provider, id } : undefined; +} + +function isModelRouteFailure(cls: ErrorClass): boolean { + return ( + cls.kind === "rate-limit" || + cls.kind === "network" || + cls.kind === "server" || + cls.kind === "connection" || + cls.kind === "stream" + ); +} + +async function trySwitchToFallbackModel(args: { + pi: ExtensionAPI; + ctx: ExtensionContext; + current: ModelRouteRef | undefined; + reason: string; + unitType: string; + basePath: string | undefined; + errorDetail: string; + persistModelChanges: boolean; +}): Promise { + const modelConfig = resolveModelWithFallbacksForUnit(args.unitType, { + autoBenchmark: true, + }); + + if (args.current) { + recordCurrentModelFailure({ + provider: args.current.provider, + modelId: args.current.id, + reason: args.reason, + }); + } + + const availableModels = args.ctx.modelRegistry.getAvailable(); + const isBlocked = args.basePath + ? (model: { provider: string; id: string }) => + isModelBlocked(args.basePath!, model.provider, model.id) + : undefined; + + for ( + let attempt = 0; + attempt < availableModels.length + (modelConfig?.fallbacks.length ?? 0) + 1; + attempt++ + ) { + const nextRoute = resolveNextModelRoute({ + current: args.current, + modelConfig, + availableModels, + failedRoutes: getCurrentUnitModelFailures(), + isBlocked, + }); + + if (!nextRoute) return false; + + const ok = await args.pi.setModel(nextRoute.model, { + persist: args.persistModelChanges, + }); + if (!ok) { + recordCurrentModelFailure({ + provider: nextRoute.model.provider, + modelId: nextRoute.model.id, + reason: "setModel failed during provider recovery", + }); + continue; + } + + resetRetryState(retryState); + setCurrentUnitModel(nextRoute.model); + args.ctx.ui.notify( + `Model route failed${args.errorDetail}. Switched to ${nextRoute.source === "configured" ? "configured fallback" : "available fallback"}: ${nextRoute.model.provider}/${nextRoute.model.id}.`, "warning", ); + args.pi.sendMessage( + { + customType: "sf-auto-timeout-recovery", + content: "Continue execution.", + display: false, + }, + { triggerTurn: true }, + ); + return true; } - await pauseAutoForProviderError( - ctx.ui, - errorDetail, - () => - pauseAuto(ctx, pi, { - message: `Provider error: ${errorDetail}`, - category: "provider", - isTransient: allowAutoResume, - retryAfterMs, - }), - { - isRateLimit, - isTransient: allowAutoResume, - retryAfterMs, - resume: allowAutoResume - ? () => { - void resumeAutoAfterProviderDelay(pi, ctx).catch((err) => { - const message = err instanceof Error ? err.message : String(err); - ctx.ui.notify( - `Provider error recovery delay elapsed, but auto-mode failed to resume: ${message}`, - "error", - ); - }); - } - : undefined, - }, - ); + + return false; } export async function handleAgentEnd( @@ -172,14 +218,12 @@ export async function handleAgentEnd( // ── 1. Classify using rawErrorMsg to avoid prose false-positives ──── const cls = classifyError(rawErrorMsg, explicitRetryAfterMs); + const currentRoute = getCurrentRouteFromMessage(lastMsg, ctx); + const dash = getAutoDashboardData(); - // ── 1b. Defer to Core RetryHandler for most transient errors ──────── - // Core retries transient failures in-session after this handler. - // Keep that behavior for non-rate-limit classes to avoid pause/retry races, - // but let rate-limit continue into model fallback logic below (#4373). - if (isTransient(cls) && cls.kind !== "rate-limit") { - return; - } + // SF owns provider-route recovery in auto-mode. Quota/rate-limit/server/ + // stream/connection failures must leave the failed provider/model route + // immediately instead of sleeping or waiting for same-model retry loops. // Cap rate-limit backoff for CLI-style providers (openai-codex, google-gemini-cli) // which use per-user quotas with shorter windows (#2922). @@ -198,9 +242,8 @@ export async function handleAgentEnd( // same dead model isn't reselected on the next /sf auto restart, // then try a fallback before pausing. if (cls.kind === "unsupported-model") { - const dash = getAutoDashboardData(); - const rejectedProvider = ctx.model?.provider; - const rejectedId = ctx.model?.id; + const rejectedProvider = currentRoute?.provider; + const rejectedId = currentRoute?.id; if (dash.basePath && rejectedProvider && rejectedId) { try { blockModel( @@ -219,62 +262,18 @@ export async function handleAgentEnd( } } - // Try configured fallback chain, skipping anything already blocked. if (dash.currentUnit && dash.basePath) { - const modelConfig = resolveModelWithFallbacksForUnit( - dash.currentUnit.type, - ); - if (modelConfig && modelConfig.fallbacks.length > 0) { - const availableModels = ctx.modelRegistry.getAvailable(); - let cursorModelId: string | undefined = ctx.model?.id; - while (true) { - const nextModelId = getNextFallbackModel( - cursorModelId, - modelConfig, - ); - if (!nextModelId) break; - if ( - isModelBlocked(dash.basePath, ctx.model?.provider, nextModelId) - ) { - cursorModelId = nextModelId; - continue; - } - const modelToSet = resolveModelId( - nextModelId, - availableModels, - ctx.model?.provider, - ); - if ( - modelToSet && - !isModelBlocked(dash.basePath, modelToSet.provider, modelToSet.id) - ) { - const persistModelChanges = resolvePersistModelChanges(); - const ok = await pi.setModel(modelToSet, { - persist: persistModelChanges, - }); - if (ok) { - setCurrentDispatchedModelId({ - provider: modelToSet.provider, - id: modelToSet.id, - }); - ctx.ui.notify( - `Switched to unblocked fallback: ${nextModelId} and resuming.`, - "info", - ); - pi.sendMessage( - { - customType: "sf-auto-timeout-recovery", - content: "Continue execution.", - display: false, - }, - { triggerTurn: true }, - ); - return; - } - } - cursorModelId = nextModelId; - } - } + const switched = await trySwitchToFallbackModel({ + pi, + ctx, + current: currentRoute, + reason: rawErrorMsg || "unsupported for account", + unitType: dash.currentUnit.type, + basePath: dash.basePath, + errorDetail, + persistModelChanges, + }); + if (switched) return; } // No usable fallback — pause @@ -292,150 +291,42 @@ export async function handleAgentEnd( // ── 2. Decide & Act ────────────────────────────────────────────────── - // --- Network errors: same-model retry with backoff --- - if (cls.kind === "network") { - const currentModelId = ctx.model?.id ?? "unknown"; - if (retryState.currentRetryModelId !== currentModelId) { - retryState.networkRetryCount = 0; - retryState.currentRetryModelId = currentModelId; - } - if (retryState.networkRetryCount < MAX_NETWORK_RETRIES) { - retryState.networkRetryCount += 1; - retryState.consecutiveTransientCount += 1; - const attempt = retryState.networkRetryCount; - const delayMs = attempt * cls.retryAfterMs; - ctx.ui.notify( - `Network error on ${currentModelId}${errorDetail}. Retry ${attempt}/${MAX_NETWORK_RETRIES} in ${delayMs / 1000}s...`, - "warning", - ); - setTimeout(() => { - pi.sendMessage( - { - customType: "sf-auto-timeout-recovery", - content: - "Continue execution — retrying after transient network error.", - display: false, - }, - { triggerTurn: true }, - ); - }, delayMs); - return; - } - // Network retries exhausted — fall through to model fallback - retryState.networkRetryCount = 0; - retryState.currentRetryModelId = undefined; - ctx.ui.notify( - `Network retries exhausted for ${currentModelId}. Attempting model fallback.`, - "warning", - ); - } - - // --- Transient errors: try model fallback first, then pause --- - // Rate limits are often per-model, so switching models can bypass them. - if ( - cls.kind === "rate-limit" || - cls.kind === "network" || - cls.kind === "server" || - cls.kind === "connection" || - cls.kind === "stream" - ) { - // Try model fallback - const dash = getAutoDashboardData(); - if (dash.currentUnit) { - const modelConfig = resolveModelWithFallbacksForUnit( - dash.currentUnit.type, - ); - if (modelConfig && modelConfig.fallbacks.length > 0) { - const availableModels = ctx.modelRegistry.getAvailable(); - const nextModelId = getNextFallbackModel(ctx.model?.id, modelConfig); - if (nextModelId) { - retryState.networkRetryCount = 0; - retryState.currentRetryModelId = undefined; - const modelToSet = resolveModelId( - nextModelId, - availableModels, - ctx.model?.provider, - ); - if (modelToSet) { - const ok = await pi.setModel(modelToSet, { - persist: persistModelChanges, - }); - if (ok) { - setCurrentDispatchedModelId({ - provider: modelToSet.provider, - id: modelToSet.id, - }); - ctx.ui.notify( - `Model error${errorDetail}. Switched to fallback: ${nextModelId} and resuming.`, - "warning", - ); - pi.sendMessage( - { - customType: "sf-auto-timeout-recovery", - content: "Continue execution.", - display: false, - }, - { triggerTurn: true }, - ); - return; - } - } - } - } - } - - // Try restoring session model - const sessionModel = getAutoModeStartModel(); - if (sessionModel) { - if ( - ctx.model?.id !== sessionModel.id || - ctx.model?.provider !== sessionModel.provider - ) { - const startModel = ctx.modelRegistry - .getAvailable() - .find( - (m) => - m.provider === sessionModel.provider && - m.id === sessionModel.id, - ); - if (startModel) { - const ok = await pi.setModel(startModel, { - persist: persistModelChanges, - }); - if (ok) { - setCurrentDispatchedModelId({ - provider: startModel.provider, - id: startModel.id, - }); - retryState.networkRetryCount = 0; - retryState.currentRetryModelId = undefined; - ctx.ui.notify( - `Model error${errorDetail}. Restored session model: ${sessionModel.provider}/${sessionModel.id} and resuming.`, - "warning", - ); - pi.sendMessage( - { - customType: "sf-auto-timeout-recovery", - content: "Continue execution.", - display: false, - }, - { triggerTurn: true }, - ); - return; - } - } - } - } - } - - // --- Transient fallback: pause with auto-resume --- - if (isTransient(cls)) { - await pauseTransientWithBackoff( - cls, + // --- Route failures: try configured fallback first, then any available route --- + if (isModelRouteFailure(cls) && dash.currentUnit) { + const switched = await trySwitchToFallbackModel({ pi, ctx, + current: currentRoute, + reason: rawErrorMsg || cls.kind, + unitType: dash.currentUnit.type, + basePath: dash.basePath, errorDetail, - cls.kind === "rate-limit", + persistModelChanges, + }); + if (switched) return; + } + + // --- Transient fallback exhausted: pause without same-route auto-resume --- + if (isTransient(cls)) { + const message = + isModelRouteFailure(cls) && dash.currentUnit + ? `Provider route failed and no usable fallback model remains${errorDetail}` + : `Provider error${errorDetail}`; + await pauseAutoForProviderError( + ctx.ui, + errorDetail, + () => + pauseAuto(ctx, pi, { + message, + category: "provider", + isTransient: false, + retryAfterMs: "retryAfterMs" in cls ? cls.retryAfterMs : undefined, + }), + { + isRateLimit: cls.kind === "rate-limit", + isTransient: false, + retryAfterMs: "retryAfterMs" in cls ? cls.retryAfterMs : 0, + }, ); return; } diff --git a/src/resources/extensions/sf/bootstrap/db-tools.ts b/src/resources/extensions/sf/bootstrap/db-tools.ts index 48df9c4ec..9bfefb90f 100644 --- a/src/resources/extensions/sf/bootstrap/db-tools.ts +++ b/src/resources/extensions/sf/bootstrap/db-tools.ts @@ -10,7 +10,7 @@ import { nextMilestoneId, } from "../guided-flow.js"; import { loadEffectiveSFPreferences } from "../preferences.js"; -import { recordSelfFeedback } from "../self-feedback.js"; +import { markResolved, recordSelfFeedback } from "../self-feedback.js"; import { executeCompleteMilestone, executePlanMilestone, @@ -687,7 +687,7 @@ export function registerDbTools(pi: ExtensionAPI): void { promptGuidelines: [ "Use sf_self_report for ANY sf-internal observation — not just bugs. Acceptable kinds include: 'prompt-quality-issue' (you found a prompt ambiguous, contradictory, or missing context), 'improvement-idea' (a non-bug enhancement that would help), 'agent-friction' (workflow friction you worked around), 'design-thought' (broader speculation), 'missing-feature' (capability you wished sf had), as well as classic bug kinds like 'brittle-predicate' or 'git-empty-pathspec'.", "Do NOT use this for bugs in the user's project, for your own task work, or to track your task's todo list. ONLY for observations about sf-the-tool itself.", - "This tool FILES new entries; it does not address or resolve existing ones. Self-feedback is a triage inbox awaiting human/triage-agent review — do NOT autonomously pick entries off self-feedback and try to fix them. Treat existing entries as out of scope unless your task plan explicitly names a self-feedback entry id as the work.", + "This tool FILES new entries; it does not resolve existing ones. High/critical forge self-feedback may be queued autonomously at startup or an idle turn boundary as repair work. Use sf_self_feedback_resolve after fixing an entry; do not hand-edit the JSONL.", "Over-reporting is preferred to under-reporting at this stage. If you noticed it about sf, file it. Dedup and threshold-to-roadmap promotion are tracked as their own self-feedback items and will eventually clean noise.", "Severity guide: low = cosmetic / nice-to-have / improvement idea. medium = noisy or imperfect or recurring friction. high = blocked the unit (sf-the-tool prevented you from completing the task). critical = needs immediate fix (currently treated as high until inline-fix dispatch lands).", "high/critical entries mark the originating unit as blocked: it will not seal as success, and will be re-queued only after sf is bumped past the recorded version.", @@ -780,6 +780,145 @@ export function registerDbTools(pi: ExtensionAPI): void { pi.registerTool(selfReportTool); + // ─── sf_self_feedback_resolve ──────────────────────────────────────── + // Agent-callable resolver for inline self-feedback repair turns. The + // inline-fix prompt must not rely on hand-editing JSONL: the tool updates + // the structured source of truth and regenerates the markdown view. + const selfFeedbackResolveExecute = async ( + _toolCallId: string, + params: any, + _signal: AbortSignal | undefined, + _onUpdate: unknown, + _ctx: unknown, + ): Promise>> => { + try { + const ok = markResolved( + params.id, + { + reason: params.reason, + evidence: { + kind: "agent-fix", + commitSha: params.commit_sha, + testPath: params.test_path, + summaryNarrative: params.summary_narrative, + }, + criteriaMet: params.criteria_met, + }, + process.cwd(), + ); + if (!ok) { + return { + content: [ + { + type: "text" as const, + text: `Error: unresolved self-feedback entry not found: ${params.id}`, + }, + ], + details: { + operation: "self_feedback_resolve", + id: params.id, + error: "not_found_or_already_resolved", + }, + }; + } + return { + content: [ + { + type: "text" as const, + text: `Resolved self-feedback ${params.id}`, + }, + ], + details: { + operation: "self_feedback_resolve", + id: params.id, + resolved: true, + }, + }; + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + logError("tool", `sf_self_feedback_resolve tool failed: ${msg}`, { + tool: "sf_self_feedback_resolve", + error: String(err), + }); + return { + content: [ + { + type: "text" as const, + text: `Error in sf_self_feedback_resolve: ${msg}`, + }, + ], + details: { + operation: "self_feedback_resolve", + id: params.id, + error: msg, + }, + }; + } + }; + + pi.registerTool({ + name: "sf_self_feedback_resolve", + label: "Resolve Self Feedback", + description: + "Mark a repaired SF self-feedback entry resolved with structured agent-fix evidence. " + + "Use this only after verifying the entry no longer applies, landing the fix, and citing the commit or verification evidence.", + promptSnippet: + "Resolve a repaired SF self-feedback entry with commit/test evidence", + promptGuidelines: [ + "Use sf_self_feedback_resolve during self-feedback inline-fix repair turns after the fix is implemented and verified.", + "Do not hand-edit `.sf/self-feedback.jsonl`; this tool updates the JSONL source of truth and regenerates `.sf/SELF-FEEDBACK.md`.", + "If the entry has acceptance criteria, pass criteria_met with the criteria that were satisfied.", + "Pass commit_sha when a commit exists. If an entry was already fixed, cite the existing commit or include summary_narrative and test_path.", + ], + parameters: Type.Object({ + id: Type.String({ + description: "Self-feedback entry id, e.g. sf-moocz9so-4ffov2", + }), + reason: Type.String({ + description: "Short explanation of why the entry is resolved", + }), + commit_sha: Type.Optional( + Type.String({ description: "Commit SHA containing the fix" }), + ), + test_path: Type.Optional( + Type.String({ description: "Focused test or verification path" }), + ), + summary_narrative: Type.Optional( + Type.String({ + description: + "Concise verification summary when a commit/test path alone is not enough", + }), + ), + criteria_met: Type.Optional( + Type.Array(Type.String(), { + description: + "Acceptance criteria satisfied by this fix, if the entry provided criteria", + }), + ), + }), + execute: selfFeedbackResolveExecute, + renderCall(args: any, theme: any) { + let text = theme.fg("toolTitle", theme.bold("sf_self_feedback_resolve ")); + if (args.id) text += theme.fg("muted", args.id); + return new Text(text, 0, 0); + }, + renderResult(result: any, _options: any, theme: any) { + const d = result.details; + if (result.isError || d?.error) { + return new Text( + theme.fg("error", `Error: ${d?.error ?? "unknown"}`), + 0, + 0, + ); + } + return new Text( + theme.fg("success", `Resolved ${d?.id ?? "self-feedback"}`), + 0, + 0, + ); + }, + }); + // ─── sf_plan_milestone ──────────────────────────────────────────────── const planMilestoneExecute = async ( diff --git a/src/resources/extensions/sf/bootstrap/register-hooks.ts b/src/resources/extensions/sf/bootstrap/register-hooks.ts index 96c6ddeff..884e0a440 100644 --- a/src/resources/extensions/sf/bootstrap/register-hooks.ts +++ b/src/resources/extensions/sf/bootstrap/register-hooks.ts @@ -13,9 +13,9 @@ import { hasResearchTerminalTransition, isAutoActive, isAutoPaused, + markResearchTerminalTransition, markToolEnd, markToolStart, - markResearchTerminalTransition, recordToolInvocationError, } from "../auto.js"; import { @@ -194,6 +194,18 @@ export function registerHooks( } } loadToolApiKeys(); + // Flow audit is read-only by default: surface stale dispatched units, + // missing session pointers, runaway history, and optional child hangs at + // startup before another auto unit compounds the same milestone failure. + try { + const { runFlowAudit } = await import("../doctor.js"); + const flow = await runFlowAudit(process.cwd()); + if (!flow.ok) { + ctx.ui?.notify?.(`Flow audit: ${flow.recommendedAction}`, "warning"); + } + } catch { + /* non-fatal — flow audit must never block session start */ + } // Drain self-feedback: auto-resolve entries whose blocking // sf-version constraint has been satisfied by the current sf bump, // and surface entries that remain blocked to the operator. Done after @@ -239,9 +251,9 @@ export function registerHooks( "warning", ); } - // Forge-only: surface high/critical entries as inline-fix candidates so - // the operator (or a follow-up dispatcher) can drain self-reported bugs - // without leaving the session. Read-only signal for now — no auto-dispatch. + // Forge-only: high/critical entries are queued as hidden follow-up repair + // work on startup, even outside /sf auto. The drain helper owns claim TTL + // and delivery failure retry, so this is safe to call opportunistically. const highBlocked = triage.stillBlocked.filter( (e) => e.severity === "high" || e.severity === "critical", ); @@ -366,6 +378,16 @@ export function registerHooks( resetToolCallLoopGuard(); resetAskUserQuestionsCache(); await handleAgentEnd(pi, event, ctx); + // Best-effort embedding backfill: when SF_LLM_GATEWAY_KEY is set and the + // gateway has an embed worker online, embed any memories that don't yet + // have a vector. Bounded per invocation; logs once-per-minute when the + // gateway is unavailable so we don't spam the journal. + try { + const { runEmbeddingBackfill } = await import("../memory-embeddings.js"); + await runEmbeddingBackfill(); + } catch { + // Never break agent_end on backfill issues. + } }); // Squash-merge quick-task branch back to the original branch after the @@ -378,9 +400,10 @@ export function registerHooks( // Best-effort: don't break the turn lifecycle if cleanup fails. } try { - const { consumeCompletedInlineFixClaim } = await import( - "../self-feedback-drain.js" - ); + const { + consumeCompletedInlineFixClaim, + dispatchSelfFeedbackInlineFixIfNeeded, + } = await import("../self-feedback-drain.js"); const resolvedIds = consumeCompletedInlineFixClaim(process.cwd()); if (resolvedIds.length > 0) { const requestReload = ( @@ -391,7 +414,9 @@ export function registerHooks( requestReload?.( `self-feedback inline fix resolved ${resolvedIds.length} entr${resolvedIds.length === 1 ? "y" : "ies"}`, ); + return; } + dispatchSelfFeedbackInlineFixIfNeeded(process.cwd(), ctx, pi); } catch { // Best-effort: stale code should not break normal turn completion. } @@ -511,6 +536,7 @@ export function registerHooks( block: true, reason: `Research unit terminal transition: ${currentUnit.type} ${currentUnit.id} has already completed its RESEARCH artifact. ` + + `Post-artifact drift is blocked before runaway supervision treats it as legitimate large research. ` + `Planning tools (${event.toolName}) are blocked. The orchestrator will dispatch planner units after research.`, }; } diff --git a/src/resources/extensions/sf/commands-handlers.ts b/src/resources/extensions/sf/commands-handlers.ts index 09cc3f173..50228c848 100644 --- a/src/resources/extensions/sf/commands-handlers.ts +++ b/src/resources/extensions/sf/commands-handlers.ts @@ -131,25 +131,65 @@ export async function handleDoctor( // ── Flow audit subcommand (sf-moocz9so-4ffov2) ───────────────────────── if (trimmed === "flow" || trimmed.startsWith("flow ")) { - const flowResult = await runFlowAudit(projectRoot()); + const flowResult = await runFlowAudit(projectRoot(), { + killOverBudgetChildren: /\b(--kill-children|kill-children|kill)\b/.test( + trimmed, + ), + }); const lines: string[] = ["## SF Flow Audit", ""]; + if (flowResult.activeMilestone) { + lines.push( + `**Active milestone:** ${flowResult.activeMilestone.id}${flowResult.activeMilestone.title ? ` — ${flowResult.activeMilestone.title}` : ""}`, + flowResult.activeMilestone.phase + ? `- Phase: ${flowResult.activeMilestone.phase}` + : "", + "", + ); + } else { + lines.push("**Active milestone:** none", ""); + } if (flowResult.activeUnit) { const ageMin = Math.round(flowResult.activeUnit.ageMs / 60000); + const progressAgeMin = Math.round( + flowResult.activeUnit.progressAgeMs / 60000, + ); lines.push( `**Active unit:** ${flowResult.activeUnit.unitType} ${flowResult.activeUnit.unitId}`, `- Phase: ${flowResult.activeUnit.phase}`, `- Started: ${flowResult.activeUnit.startedAt}`, `- Age: ${ageMin} minutes`, + `- Progress age: ${progressAgeMin} minutes`, + flowResult.activeUnit.lastProgressAt + ? `- Last progress: ${flowResult.activeUnit.lastProgressAt}` + : "", "", ); } else { lines.push("**Active unit:** none", ""); } + lines.push( + `**Session pointer:** ${ + flowResult.sessionPointer?.sessionFile ?? + flowResult.sessionPointer?.sessionId ?? + "none recorded" + }`, + `**Recommended action:** ${flowResult.recommendedAction}`, + "", + ); if (flowResult.warnings.length > 0) { lines.push("**Warnings:**"); for (const w of flowResult.warnings) lines.push(`- ${w}`); lines.push(""); } + if (flowResult.staleDispatchedUnits.length > 0) { + lines.push("**Stale dispatched units:**"); + for (const unit of flowResult.staleDispatchedUnits.slice(0, 5)) { + lines.push( + `- ${unit.unitType} ${unit.unitId}: progress age ${Math.round(unit.progressAgeMs / 60000)} minutes`, + ); + } + lines.push(""); + } if (flowResult.recommendations.length > 0) { lines.push("**Recommendations:**"); for (const r of flowResult.recommendations) lines.push(`- ${r}`); @@ -158,7 +198,19 @@ export async function handleDoctor( if (flowResult.childProcesses.length > 0) { lines.push("**Child processes:**"); for (const cp of flowResult.childProcesses.slice(0, 10)) { - lines.push(`- pid=${cp.pid} [${cp.classification}] ${cp.cmd.slice(0, 60)}`); + const age = + cp.ageMs === undefined ? "" : ` age=${Math.round(cp.ageMs / 60000)}m`; + const nonBlocking = cp.nonBlocking ? " non-blocking" : ""; + lines.push( + `- pid=${cp.pid} ppid=${cp.ppid} [${cp.classification}]${age}${nonBlocking} action=${cp.action} ${cp.cmd.slice(0, 80)}`, + ); + } + lines.push(""); + } + if (flowResult.runawayHistory.length > 0) { + lines.push("**Runaway history:**"); + for (const event of flowResult.runawayHistory.slice(-5)) { + lines.push(`- ${event}`); } lines.push(""); } diff --git a/src/resources/extensions/sf/commands-harness.ts b/src/resources/extensions/sf/commands-harness.ts index b7c676ab2..a024042b0 100644 --- a/src/resources/extensions/sf/commands-harness.ts +++ b/src/resources/extensions/sf/commands-harness.ts @@ -7,13 +7,15 @@ * tracked docs artifacts (sf-moocr4rv-au7r3l). */ -import { existsSync, mkdirSync, writeFileSync } from "node:fs"; +import { mkdirSync, writeFileSync } from "node:fs"; import { join, resolve } from "node:path"; import type { ExtensionCommandContext } from "@singularity-forge/pi-coding-agent"; import { ensureDbOpen } from "./bootstrap/dynamic-tools.js"; import { projectRoot } from "./commands/context.js"; -import { profileRepository } from "./repo-profiler.js"; -import { recordRepoProfile } from "./sf-db.js"; +import { profileRepository, type RepoProfile } from "./repo-profiler.js"; +import { getLatestRepoProfile, recordRepoProfile } from "./sf-db.js"; + +const HARNESS_PROMOTION_REPO_DIR = "docs/exec-plans/active"; /** * Format a repo profile summary for user notification. @@ -47,10 +49,91 @@ function formatProfileSummary( `Stacks: ${stacks}`, `Risk hints: ${risks}`, "", - "Untracked files were recorded as observations only; SF did not stage or adopt them.", + "Runtime observation boundary:", + "- Profile state was stored only in .sf runtime state.", + "- No repo-committable artifact was written by profiling.", + "- Use /sf harness promote after review to create a tracked docs artifact.", + "- Untracked files remain observed_only; SF did not stage or adopt them.", ].join("\n"); } +/** + * Convert a finding id into a stable filename segment. + * + * Purpose: keep promotion artifacts deterministic while preventing path + * traversal through user-provided finding IDs. + * + * Consumer: `/sf harness promote `. + */ +function findingIdSlug(findingId: string): string { + const slug = findingId + .trim() + .toLowerCase() + .replace(/[^a-z0-9._-]+/g, "-") + .replace(/^-+|-+$/g, "") + .slice(0, 120); + return slug || "finding"; +} + +/** + * Parse the persisted repo profile JSON from .sf runtime state. + * + * Purpose: promotion must be a writeback from recorded observations, not a new + * profiler run that can observe its own artifact or introduce timestamps. + * + * Consumer: `/sf harness promote `. + */ +function parseRecordedProfile(profileJson: string): RepoProfile | null { + try { + const parsed = JSON.parse(profileJson) as Partial; + if ( + typeof parsed.profileId === "string" && + typeof parsed.createdAt === "string" && + parsed.git && + Array.isArray(parsed.git.changedFiles) + ) { + return parsed as RepoProfile; + } + } catch { + // Fall back to row-level metadata below. + } + return null; +} + +/** + * Build the stable JSON payload embedded in a promotion artifact. + * + * Purpose: document the recorded observation facts without leaking absolute + * runtime paths or adding promotion-time fields. + * + * Consumer: `/sf harness promote `. + */ +function profilePromotionPayload( + profile: RepoProfile | null, + fallback: { + profileId: string; + branch: string | null; + dirty: boolean; + createdAt: string; + }, +): Record { + return { + profileId: profile?.profileId ?? fallback.profileId, + profileCapturedAt: profile?.createdAt ?? fallback.createdAt, + branch: profile?.git.branch ?? fallback.branch, + dirty: profile?.git.dirty ?? fallback.dirty, + changedFiles: profile?.git.changedFiles ?? [], + stacks: profile?.stacks ?? [], + entrypoints: profile?.entrypoints ?? [], + tests: profile?.tests ?? [], + ci: profile?.ci ?? [], + docs: profile?.docs ?? [], + dataStores: profile?.dataStores ?? [], + networkSurfaces: profile?.networkSurfaces ?? [], + riskHints: profile?.riskHints ?? [], + }; +} + /** * Promote a harness/profile finding from .sf runtime observations into a * tracked docs artifact. This is the writeback path that turns operational @@ -80,42 +163,57 @@ export async function handleHarnessPromote( return; } - // Determine the target tracked-docs path + const displayFindingId = findingId.trim(); + const latestProfile = getLatestRepoProfile(); + if (!latestProfile) { + ctx.ui.notify( + "No recorded harness profile found. Run /sf harness profile first; promotion writes tracked docs only from .sf runtime observations.", + "warning", + ); + return; + } + + const slug = findingIdSlug(displayFindingId); + const relativePath = `${HARNESS_PROMOTION_REPO_DIR}/harness-promotion-${slug}.md`; const trackedDir = resolve(basePath, "docs", "exec-plans", "active"); - const targetPath = join(trackedDir, `harness-promotion-${findingId}.md`); + const targetPath = join(trackedDir, `harness-promotion-${slug}.md`); // Ensure the tracked directory exists (creates under the repo, not .sf) mkdirSync(trackedDir, { recursive: true }); - // Read the latest profile from DB to include in the promotion - const profile = profileRepository(basePath); + const recordedProfile = parseRecordedProfile(latestProfile.profileJson); + const payload = profilePromotionPayload(recordedProfile, { + profileId: latestProfile.profileId, + branch: latestProfile.branch, + dirty: latestProfile.dirty, + createdAt: latestProfile.createdAt, + }); // Build the promoted artifact content const content = [ - `# Harness Promotion: ${findingId}`, + `# Harness Promotion: ${displayFindingId}`, "", - `Promoted from: \`.sf\` runtime observations`, - `Promoted at: ${new Date().toISOString()}`, - `Source profile: ${profile.profileId}`, - `Source branch: ${profile.git.branch ?? "unknown"}`, + `Finding ID: ${displayFindingId}`, + `Repo artifact: \`${relativePath}\``, + "Source: `.sf` runtime observations", + `Source profile: ${latestProfile.profileId}`, + `Source profile captured at: ${latestProfile.createdAt}`, + `Source branch: ${latestProfile.branch ?? "unknown"}`, "", - "## Observed State", + "## Runtime Boundary", + "", + "- `.sf` remains operational runtime state and is not repo output.", + "- Unpromoted .sf runtime observations remain `observed_only`.", + "- This Markdown file is the repo-committable artifact created by promotion.", + "- Promotion does not stage or claim untracked observed files.", + "", + "## Observed Profile", "", "```json", - JSON.stringify( - { - profileId: profile.profileId, - branch: profile.git.branch, - changedFiles: profile.git.changedFiles, - stacks: profile.stacks, - riskHints: profile.riskHints, - }, - null, - 2, - ), + JSON.stringify(payload, null, 2), "```", "", - "## Status", + "## Review Checklist", "", "- [ ] Reviewed by human", "- [ ] Adopted into milestone plan", @@ -131,10 +229,10 @@ export async function handleHarnessPromote( ctx.ui.notify( [ - `Harness finding '${findingId}' promoted to tracked docs.`, - `Path: ${targetPath}`, + `Harness finding '${displayFindingId}' promoted to tracked docs.`, + `Path: ${relativePath}`, "", - "This artifact is now part of the repo's tracked documentation.", + "This Markdown file is now the repo-committable artifact for review.", "Unpromoted .sf runtime state remains observed_only.", ].join("\n"), "info", @@ -161,7 +259,7 @@ export async function handleHarness( } if (!["profile", "snapshot", "status"].includes(subcommand)) { ctx.ui.notify( - "Usage: /sf harness profile | /sf harness promote \nRecords a read-only repo profile or promotes a finding to tracked docs.", + "Usage: /sf harness profile | /sf harness promote \nRecords a read-only .sf runtime profile or promotes a reviewed finding to tracked docs.", "warning", ); return; diff --git a/src/resources/extensions/sf/doctor.ts b/src/resources/extensions/sf/doctor.ts index c1cde1836..c8f6981a7 100644 --- a/src/resources/extensions/sf/doctor.ts +++ b/src/resources/extensions/sf/doctor.ts @@ -50,34 +50,486 @@ import { loadEffectiveSFPreferences, type SFPreferences, } from "./preferences.js"; +import { + type PersistedSelfFeedbackEntry, + readAllSelfFeedback, + recordSelfFeedback, +} from "./self-feedback.js"; import { getMilestoneSlices, getSliceTasks, isDbAvailable } from "./sf-db.js"; import { deriveState, isMilestoneComplete } from "./state.js"; import { isClosedStatus } from "./status-guards.js"; import type { RoadmapSliceEntry } from "./types.js"; +import { parseUnitId } from "./unit-id.js"; // ─── Flow Audit Types (sf-moocz9so-4ffov2) ──────────────────────────────── +export type FlowAuditChildClassification = + | "active-session" + | "warmup" + | "background" + | "orphan" + | "unknown"; + +export type FlowAuditChildAction = "observe" | "non-blocking" | "kill"; + +/** + * Configure `runFlowAudit` for deterministic tests and explicit recovery mode. + * + * Purpose: keep the default auditor read-only during startup while allowing + * `/sf doctor flow --kill-children` and tests to exercise bounded child cleanup. + * + * Consumer: session_start, `/sf doctor flow`, and flow-audit regression tests. + */ +export interface FlowAuditOptions { + nowMs?: number; + staleProgressMs?: number; + optionalChildBudgetMs?: number; + psOutput?: string; + killOverBudgetChildren?: boolean; + killProcess?: (pid: number) => void; + recordSelfFeedback?: boolean; +} + +/** + * Flow-audit output returned to commands and startup hooks. + * + * Purpose: preserve enough structured evidence for operators and tests to avoid + * reconstructing stuck auto-mode state from locks, runtime files, sessions, and ps. + * + * Consumer: `/sf doctor flow`, session_start notifications, and regression tests. + */ export interface FlowAuditResult { ok: boolean; + activeMilestone?: { + id: string; + title?: string; + phase?: string; + }; activeUnit?: { unitType: string; unitId: string; phase: string; startedAt: string; ageMs: number; + progressAgeMs: number; + lastProgressAt?: string; + }; + sessionPointer?: { + sessionId?: string; + sessionFile?: string; + source: "auto.lock" | "runtime-unit"; }; recommendations: string[]; + recommendedAction: string; warnings: string[]; childProcesses: Array<{ pid: number; + ppid: number; cmd: string; - classification: "active-session" | "warmup" | "orphan" | "unknown"; + classification: FlowAuditChildClassification; + ageMs?: number; + nonBlocking: boolean; + overBudget: boolean; + action: FlowAuditChildAction; + killed?: boolean; + killError?: string; }>; lastErrors: string[]; + staleDispatchedUnits: Array<{ + unitType: string; + unitId: string; + phase: string; + progressAgeMs: number; + lastProgressAt?: string; + }>; + runawayHistory: string[]; + loopEvidence?: { + milestoneId: string; + sliceId?: string; + taskId?: string; + completedPriorTasks: string[]; + missingSummaries: string[]; + }; + repeatedFailureRollup?: { + filed: boolean; + milestoneId: string; + count: number; + entryId?: string; + }; } // ─── Flow Audit Implementation ──────────────────────────────────────────── +const DEFAULT_STALE_PROGRESS_MS = 20 * 60 * 1000; +const DEFAULT_OPTIONAL_CHILD_BUDGET_MS = 30 * 60 * 1000; +const REPEATED_FAILURE_THRESHOLD = 3; +const FLOW_AUDIT_ROLLUP_KIND = "flow-audit:repeated-milestone-failure"; + +interface AutoLockAuditRecord { + pid?: number; + unitType?: string; + unitId?: string; + startedAt?: string | number; + phase?: string; + sessionId?: string; + sessionFile?: string; +} + +interface RuntimeUnitAuditRecord { + unitType?: string; + unitId?: string; + phase?: string; + startedAt?: number | string; + updatedAt?: number | string; + lastProgressAt?: number | string; + lastProgressKind?: string; + progressCount?: number; + sessionId?: string; + sessionFile?: string; + runawayGuardPause?: { + reason?: string; + unitType?: string; + unitId?: string; + pausedAt?: number; + }; +} + +interface PsAuditRow { + pid: number; + ppid: number; + ageMs?: number; + cmd: string; +} + +function parseEpochMs(value: unknown, fallbackMs: number): number { + if (typeof value === "number" && Number.isFinite(value)) { + return value < 10_000_000_000 ? value * 1000 : value; + } + if (typeof value === "string" && value.trim()) { + const parsed = new Date(value).getTime(); + if (Number.isFinite(parsed)) return parsed; + } + return fallbackMs; +} + +function formatIso(ms: number | undefined): string | undefined { + if (ms === undefined || !Number.isFinite(ms)) return undefined; + return new Date(ms).toISOString(); +} + +function minutes(ms: number): number { + return Math.max(0, Math.round(ms / 60_000)); +} + +function readJsonFile(path: string): T | null { + try { + if (!existsSync(path)) return null; + return JSON.parse(readFileSync(path, "utf8")) as T; + } catch { + return null; + } +} + +function readRuntimeUnits(runtimeUnitsDir: string): RuntimeUnitAuditRecord[] { + if (!existsSync(runtimeUnitsDir)) return []; + const records: RuntimeUnitAuditRecord[] = []; + try { + for (const file of readdirSync(runtimeUnitsDir)) { + if (!file.endsWith(".json")) continue; + const record = readJsonFile( + join(runtimeUnitsDir, file), + ); + if (record) records.push(record); + } + } catch { + // Runtime audit must stay best-effort. + } + return records; +} + +function parsePsOutput(psOutput: string): PsAuditRow[] { + const rows: PsAuditRow[] = []; + for (const line of psOutput.split("\n")) { + const trimmed = line.trim(); + if (!trimmed) continue; + const match = trimmed.match(/^(\d+)\s+(\d+)(?:\s+(\d+))?\s+(.+)$/); + if (!match) continue; + const pid = Number.parseInt(match[1], 10); + const ppid = Number.parseInt(match[2], 10); + if (!Number.isFinite(pid) || !Number.isFinite(ppid)) continue; + const elapsedSeconds = + match[3] === undefined ? undefined : Number.parseInt(match[3], 10); + rows.push({ + pid, + ppid, + ageMs: + elapsedSeconds !== undefined && Number.isFinite(elapsedSeconds) + ? elapsedSeconds * 1000 + : undefined, + cmd: match[4], + }); + } + return rows; +} + +async function readPsRows(options: FlowAuditOptions): Promise { + if (options.psOutput !== undefined) return parsePsOutput(options.psOutput); + if (process.platform === "win32") return []; + try { + const { execSync } = await import("node:child_process"); + const psOutput = execSync("ps -eo pid,ppid,etimes,cmd --no-headers", { + encoding: "utf8", + timeout: 5000, + }); + return parsePsOutput(psOutput); + } catch { + return []; + } +} + +function classifyProcess(row: PsAuditRow): FlowAuditChildClassification { + const cmd = row.cmd.toLowerCase(); + if (cmd.includes("sift") || cmd.includes("warmup")) return "warmup"; + if (row.ppid === 1 && cmd.includes("next-server")) return "orphan"; + if ( + cmd.includes("next-server") || + cmd.includes("vite") || + cmd.includes("turbopack") + ) { + return "background"; + } + if ( + (cmd.includes("node") || cmd.includes("sf-run") || cmd.includes("codex")) && + (cmd.includes(" sf") || + cmd.includes("/sf") || + cmd.includes("dist/loader") || + cmd.includes("tool-session") || + cmd.includes("headless")) + ) { + return "active-session"; + } + return "unknown"; +} + +function isOptionalChild( + classification: FlowAuditChildClassification, +): boolean { + return ( + classification === "warmup" || + classification === "background" || + classification === "orphan" + ); +} + +function shouldIncludeProcess( + row: PsAuditRow, + classification: FlowAuditChildClassification, + activePid: number | undefined, +): boolean { + if (classification !== "unknown") return true; + if (activePid === undefined) return false; + return row.pid === activePid || row.ppid === activePid; +} + +function readRecentErrors(runtimeRoot: string): string[] { + const notificationsPath = join(runtimeRoot, "notifications.jsonl"); + if (!existsSync(notificationsPath)) return []; + const errors: string[] = []; + try { + const lines = readFileSync(notificationsPath, "utf8") + .split("\n") + .filter((l) => l.trim()); + for (const line of lines.slice(-20)) { + try { + const entry = JSON.parse(line) as { + severity?: string; + message?: string; + text?: string; + }; + const message = entry.message ?? entry.text ?? ""; + if ( + entry.severity === "error" || + message.toLowerCase().includes("error") || + message.toLowerCase().includes("failed") + ) { + errors.push(message || "Unknown error"); + } + } catch { + // skip malformed notification rows + } + } + } catch { + // non-fatal + } + return errors; +} + +function buildLoopEvidence( + basePath: string, + unitType: string, + unitId: string, +): FlowAuditResult["loopEvidence"] | undefined { + if (unitType !== "execute-task") return undefined; + const { milestone, slice, task } = parseUnitId(unitId); + if (!milestone || !slice || !task) return undefined; + const planPath = resolveSliceFile(basePath, milestone, slice, "PLAN"); + if (!planPath || !existsSync(planPath)) return undefined; + const completedPriorTasks: string[] = []; + const missingSummaries: string[] = []; + try { + const plan = parsePlan(readFileSync(planPath, "utf8")); + const currentIndex = plan.tasks.findIndex((t) => t.id === task); + if (currentIndex > 0) { + for (const prior of plan.tasks.slice(0, currentIndex)) { + if (prior.done) completedPriorTasks.push(prior.id); + } + } + if (!resolveTaskFile(basePath, milestone, slice, task, "SUMMARY")) { + missingSummaries.push(`${milestone}/${slice}/${task} task SUMMARY`); + } + const allTasksDone = + plan.tasks.length > 0 && plan.tasks.every((t) => t.done); + if ( + allTasksDone && + !resolveSliceFile(basePath, milestone, slice, "SUMMARY") + ) { + missingSummaries.push(`${milestone}/${slice} slice SUMMARY`); + } + } catch { + return undefined; + } + return { + milestoneId: milestone, + sliceId: slice, + taskId: task, + completedPriorTasks, + missingSummaries, + }; +} + +function collectRunawayHistory( + runtimeUnits: RuntimeUnitAuditRecord[], + feedback: PersistedSelfFeedbackEntry[], + milestoneId: string | undefined, +): string[] { + const history: string[] = []; + for (const unit of runtimeUnits) { + const pause = unit.runawayGuardPause; + if (!pause) continue; + const id = pause.unitId ?? unit.unitId ?? "unknown"; + if (milestoneId && !id.startsWith(`${milestoneId}/`)) continue; + history.push(pause.reason ?? `Runaway guard paused ${id}`); + } + for (const entry of feedback) { + if (entry.resolvedAt) continue; + if (milestoneId && entry.occurredIn?.milestone !== milestoneId) continue; + if ( + entry.kind.includes("runaway") || + entry.summary.toLowerCase().includes("runaway") + ) { + history.push(`${entry.kind}: ${entry.summary}`); + } + } + return Array.from(new Set(history)).slice(-10); +} + +function maybeRecordRepeatedFailureRollup( + basePath: string, + milestoneId: string | undefined, + feedback: PersistedSelfFeedbackEntry[], + options: FlowAuditOptions, +): FlowAuditResult["repeatedFailureRollup"] | undefined { + if (!milestoneId || options.recordSelfFeedback === false) return undefined; + const failures = feedback.filter( + (e) => + !e.resolvedAt && + e.occurredIn?.milestone === milestoneId && + e.kind !== FLOW_AUDIT_ROLLUP_KIND, + ); + if (failures.length < REPEATED_FAILURE_THRESHOLD) return undefined; + const openRollup = feedback.find( + (e) => + !e.resolvedAt && + e.kind === FLOW_AUDIT_ROLLUP_KIND && + e.occurredIn?.milestone === milestoneId, + ); + if (openRollup) { + return { + filed: false, + milestoneId, + count: failures.length, + entryId: openRollup.id, + }; + } + const evidence = failures + .slice(-8) + .map( + (e) => + `[${e.id}] ${e.kind} ${[ + e.occurredIn?.milestone, + e.occurredIn?.slice, + e.occurredIn?.task, + ] + .filter(Boolean) + .join("/")}: ${e.summary}`, + ) + .join("\n"); + const recorded = recordSelfFeedback( + { + kind: FLOW_AUDIT_ROLLUP_KIND, + severity: "high", + summary: `${failures.length} unresolved flow failures on ${milestoneId} need one recovery fix`, + evidence, + suggestedFix: + "Fix the shared milestone-flow failure instead of filing one item per failed unit. Use the flow audit evidence to repair stale dispatch, missing summary, runaway, or child-process handling.", + acceptanceCriteria: + "AC1: flow audit reports the active milestone/unit and session pointer. AC2: stale dispatched unit with no progress is flagged. AC3: runaway history and child-process hang evidence are preserved. AC4: repeated same-milestone failures stay deduplicated into one open item.", + source: "detector", + occurredIn: { milestone: milestoneId, unitType: "flow-audit" }, + }, + basePath, + ); + if (!recorded) return undefined; + return { + filed: true, + milestoneId, + count: failures.length, + entryId: recorded.entry.id, + }; +} + +function chooseRecommendedAction(args: { + activeUnit?: FlowAuditResult["activeUnit"]; + sessionPointer?: FlowAuditResult["sessionPointer"]; + staleDispatchedUnits: FlowAuditResult["staleDispatchedUnits"]; + childProcesses: FlowAuditResult["childProcesses"]; + lastErrors: string[]; + activeMilestone?: FlowAuditResult["activeMilestone"]; +}): string { + if (args.staleDispatchedUnits.length > 0) { + const unit = args.staleDispatchedUnits[0]; + const session = args.sessionPointer?.sessionFile + ? ` ${args.sessionPointer.sessionFile}` + : args.sessionPointer?.sessionId + ? ` ${args.sessionPointer.sessionId}` + : ""; + return `Inspect session${session} for ${unit.unitType} ${unit.unitId}; if no new output exists, stop/requeue the stale dispatched unit before continuing.`; + } + const overBudgetOptional = args.childProcesses.find( + (p) => p.nonBlocking && p.overBudget, + ); + if (overBudgetOptional) { + return `Optional ${overBudgetOptional.classification} child pid ${overBudgetOptional.pid} is over budget; it is non-blocking, or rerun with --kill-children to terminate it.`; + } + if (args.lastErrors.length > 0) { + return "Review recent errors before dispatching another unit."; + } + if (args.activeMilestone && !args.activeUnit) { + return `Dispatch or resume the next unit for ${args.activeMilestone.id}.`; + } + return "No flow-auditor action needed."; +} + /** * Run a flow audit: inspect active unit state, auto.lock, runtime artifacts, * and child processes to diagnose stuck milestones without human forensic work. @@ -86,165 +538,228 @@ export interface FlowAuditResult { * milestone/unit, progress age, session pointer, child processes, last errors, * and recommended action. * - * Consumer: `/sf doctor flow` command. + * Consumer: `/sf doctor flow` command and session_start startup health sweep. */ -export async function runFlowAudit(basePath: string): Promise { +export async function runFlowAudit( + basePath: string, + options: FlowAuditOptions = {}, +): Promise { + const nowMs = options.nowMs ?? Date.now(); + const staleProgressMs = options.staleProgressMs ?? DEFAULT_STALE_PROGRESS_MS; + const optionalChildBudgetMs = + options.optionalChildBudgetMs ?? DEFAULT_OPTIONAL_CHILD_BUDGET_MS; + const runtimeRoot = sfRoot(basePath); const warnings: string[] = []; const recommendations: string[] = []; const childProcesses: FlowAuditResult["childProcesses"] = []; - const lastErrors: string[] = []; + const lastErrors = readRecentErrors(runtimeRoot); + const staleDispatchedUnits: FlowAuditResult["staleDispatchedUnits"] = []; + let sessionPointer: FlowAuditResult["sessionPointer"] | undefined; + let activeMilestone: FlowAuditResult["activeMilestone"] | undefined; - // Read auto.lock for active unit info - const autoLockPath = join(basePath, ".sf", "auto.lock"); + const autoLockPath = join(runtimeRoot, "auto.lock"); let activeUnit: FlowAuditResult["activeUnit"] | undefined; - if (existsSync(autoLockPath)) { - try { - const lockContent = readFileSync(autoLockPath, "utf8"); - const lockData = JSON.parse(lockContent) as { - unitType?: string; - unitId?: string; - startedAt?: string; - phase?: string; + let activePid: number | undefined; + const lockData = readJsonFile(autoLockPath); + if (lockData) { + if (lockData.unitType && lockData.unitId) { + const startedAtMs = parseEpochMs(lockData.startedAt, nowMs); + const parsed = parseUnitId(lockData.unitId); + activeMilestone = { id: parsed.milestone }; + activePid = + typeof lockData.pid === "number" && Number.isFinite(lockData.pid) + ? lockData.pid + : undefined; + activeUnit = { + unitType: lockData.unitType, + unitId: lockData.unitId, + phase: lockData.phase ?? "unknown", + startedAt: formatIso(startedAtMs) ?? new Date(nowMs).toISOString(), + ageMs: Math.max(0, nowMs - startedAtMs), + progressAgeMs: Math.max(0, nowMs - startedAtMs), }; - if (lockData.unitType && lockData.unitId) { - const startedAt = lockData.startedAt - ? new Date(lockData.startedAt).getTime() - : Date.now(); - const ageMs = Date.now() - startedAt; - activeUnit = { - unitType: lockData.unitType, - unitId: lockData.unitId, - phase: lockData.phase ?? "unknown", - startedAt: lockData.startedAt ?? new Date().toISOString(), - ageMs, + if (lockData.sessionId || lockData.sessionFile) { + sessionPointer = { + sessionId: lockData.sessionId, + sessionFile: lockData.sessionFile, + source: "auto.lock", }; - if (ageMs > 30 * 60 * 1000) { - warnings.push( - `Active unit ${lockData.unitId} has been running for ${Math.round(ageMs / 60000)} minutes.`, - ); - recommendations.push( - `Consider checking if ${lockData.unitId} is stuck or making progress.`, - ); - } } - } catch { - warnings.push("Could not parse .sf/auto.lock"); } + } else if (existsSync(autoLockPath)) { + warnings.push("Could not parse .sf/auto.lock"); } - // Read runtime units directory - const runtimeUnitsDir = join(basePath, ".sf", "runtime", "units"); - if (existsSync(runtimeUnitsDir)) { - try { - const files = readdirSync(runtimeUnitsDir); - let dispatchedCount = 0; - for (const file of files) { - if (!file.endsWith(".json")) continue; - try { - const content = readFileSync( - join(runtimeUnitsDir, file), - "utf8", - ); - const unit = JSON.parse(content) as { - phase?: string; - unitType?: string; - unitId?: string; - }; - if (unit.phase === "dispatched") dispatchedCount++; - } catch { - // skip malformed - } - } - if (dispatchedCount > 1) { - warnings.push( - `${dispatchedCount} units are in dispatched phase simultaneously.`, - ); - } - } catch { - // ignore - } - } - - // Read notifications for recent errors - const notificationsPath = join(basePath, ".sf", "notifications.jsonl"); - if (existsSync(notificationsPath)) { - try { - const lines = readFileSync(notificationsPath, "utf8") - .split("\n") - .filter((l) => l.trim()); - const recentLines = lines.slice(-20); - for (const line of recentLines) { - try { - const entry = JSON.parse(line) as { - severity?: string; - message?: string; - }; - if ( - entry.severity === "error" || - entry.message?.toLowerCase().includes("error") - ) { - lastErrors.push(entry.message ?? "Unknown error"); - } - } catch { - // skip malformed - } - } - } catch { - // ignore - } - } - - // Scan child processes (Linux/macOS only) - if (process.platform !== "win32") { - try { - const { execSync } = await import("node:child_process"); - const psOutput = execSync("ps -eo pid,ppid,cmd --no-headers", { - encoding: "utf8", - timeout: 5000, + const runtimeUnits = readRuntimeUnits(join(runtimeRoot, "runtime", "units")); + let dispatchedCount = 0; + for (const unit of runtimeUnits) { + if (unit.phase === "dispatched") dispatchedCount++; + if (!unit.unitType || !unit.unitId) continue; + const progressBaseMs = parseEpochMs( + unit.lastProgressAt ?? unit.updatedAt ?? unit.startedAt, + nowMs, + ); + const progressAgeMs = Math.max(0, nowMs - progressBaseMs); + const lastProgressAt = formatIso(progressBaseMs); + const stale = + unit.phase === "dispatched" && progressAgeMs > staleProgressMs; + if (stale) { + staleDispatchedUnits.push({ + unitType: unit.unitType, + unitId: unit.unitId, + phase: unit.phase ?? "unknown", + progressAgeMs, + lastProgressAt, }); - const lines = psOutput.split("\n").filter((l) => l.trim()); - for (const line of lines) { - const parts = line.trim().split(/\s+/); - if (parts.length < 3) continue; - const pid = Number.parseInt(parts[0], 10); - const ppid = Number.parseInt(parts[1], 10); - const cmd = parts.slice(2).join(" "); - if (!Number.isFinite(pid)) continue; - // Classify processes - let classification: FlowAuditResult["childProcesses"][0]["classification"] = "unknown"; - if (cmd.includes("sift") || cmd.includes("warmup")) { - classification = "warmup"; - } else if (cmd.includes("node") && cmd.includes("sf")) { - classification = "active-session"; - } else if (ppid === 1 && cmd.includes("next-server")) { - classification = "orphan"; - } - childProcesses.push({ pid, cmd, classification }); + warnings.push( + `Unit ${unit.unitId} has no progress for ${minutes(progressAgeMs)} minutes (phase=${unit.phase}).`, + ); + } + if ( + activeUnit && + unit.unitType === activeUnit.unitType && + unit.unitId === activeUnit.unitId + ) { + activeUnit.phase = unit.phase ?? activeUnit.phase; + activeUnit.progressAgeMs = progressAgeMs; + activeUnit.lastProgressAt = lastProgressAt; + if (!sessionPointer && (unit.sessionId || unit.sessionFile)) { + sessionPointer = { + sessionId: unit.sessionId, + sessionFile: unit.sessionFile, + source: "runtime-unit", + }; } - } catch { - // ignore on platforms without ps } } + if (dispatchedCount > 1) { + warnings.push( + `${dispatchedCount} units are in dispatched phase simultaneously.`, + ); + } + + const psRows = await readPsRows(options); + for (const row of psRows) { + const classification = classifyProcess(row); + if (!shouldIncludeProcess(row, classification, activePid)) continue; + const nonBlocking = isOptionalChild(classification); + const overBudget = + nonBlocking && + row.ageMs !== undefined && + row.ageMs > optionalChildBudgetMs; + let action: FlowAuditChildAction = nonBlocking ? "non-blocking" : "observe"; + let killed = false; + let killError: string | undefined; + if (overBudget) { + warnings.push( + `${classification} child pid ${row.pid} is over budget (${minutes(row.ageMs ?? 0)} minutes).`, + ); + if (options.killOverBudgetChildren) { + action = "kill"; + try { + if (options.killProcess) options.killProcess(row.pid); + else process.kill(row.pid, "SIGTERM"); + killed = true; + } catch (err) { + killError = err instanceof Error ? err.message : String(err); + warnings.push( + `Failed to kill over-budget ${classification} child pid ${row.pid}: ${killError}`, + ); + } + } + } + childProcesses.push({ + pid: row.pid, + ppid: row.ppid, + cmd: row.cmd, + classification, + ageMs: row.ageMs, + nonBlocking, + overBudget, + action, + killed: killed || undefined, + killError, + }); + } - // Derive state for milestone context try { const state = await deriveState(basePath); + if (state.activeMilestone) { + activeMilestone = { + id: state.activeMilestone.id, + title: state.activeMilestone.title, + phase: state.phase, + }; + } if (state.activeMilestone && !activeUnit) { recommendations.push( `No active unit detected, but milestone ${state.activeMilestone.id} is active. Consider dispatching the next unit.`, ); } } catch { - // ignore + // State derivation is useful context but not required for the audit. + } + + const loopEvidence = + activeUnit && + buildLoopEvidence(basePath, activeUnit.unitType, activeUnit.unitId); + if ( + loopEvidence?.completedPriorTasks.length && + loopEvidence.missingSummaries.length + ) { + warnings.push( + `${loopEvidence.milestoneId}/${loopEvidence.sliceId} has ${loopEvidence.completedPriorTasks.length} completed prior tasks but missing final summary evidence for ${loopEvidence.missingSummaries.join(", ")}.`, + ); + } + + const feedback = readAllSelfFeedback(basePath); + const milestoneId = activeMilestone?.id; + const runawayHistory = collectRunawayHistory( + runtimeUnits, + feedback, + milestoneId, + ); + const repeatedFailureRollup = maybeRecordRepeatedFailureRollup( + basePath, + milestoneId, + feedback, + options, + ); + if (repeatedFailureRollup?.filed) { + recommendations.push( + `Filed ${FLOW_AUDIT_ROLLUP_KIND} for ${milestoneId} after ${repeatedFailureRollup.count} repeated failures.`, + ); + } + + const recommendedAction = chooseRecommendedAction({ + activeUnit, + sessionPointer, + staleDispatchedUnits, + childProcesses, + lastErrors, + activeMilestone, + }); + if (!recommendations.includes(recommendedAction)) { + recommendations.unshift(recommendedAction); } return { - ok: warnings.length === 0 && lastErrors.length === 0, + ok: + warnings.length === 0 && + lastErrors.length === 0 && + staleDispatchedUnits.length === 0, + activeMilestone, activeUnit, + sessionPointer, recommendations, + recommendedAction, warnings, childProcesses, lastErrors, + staleDispatchedUnits, + runawayHistory, + loopEvidence, + repeatedFailureRollup, }; } diff --git a/src/resources/extensions/sf/extension-manifest.json b/src/resources/extensions/sf/extension-manifest.json index 0834a0349..99a42ece3 100644 --- a/src/resources/extensions/sf/extension-manifest.json +++ b/src/resources/extensions/sf/extension-manifest.json @@ -15,7 +15,8 @@ "sf_summary_save", "sf_requirement_update", "sf_milestone_generate_id", - "sf_self_report" + "sf_self_report", + "sf_self_feedback_resolve" ], "commands": ["sf", "kill", "worktree", "exit"], "hooks": [ @@ -25,6 +26,7 @@ "session_fork", "before_agent_start", "agent_end", + "turn_end", "session_before_compact", "session_shutdown", "tool_call", diff --git a/src/resources/extensions/sf/model-route-failure.ts b/src/resources/extensions/sf/model-route-failure.ts new file mode 100644 index 000000000..0acb30660 --- /dev/null +++ b/src/resources/extensions/sf/model-route-failure.ts @@ -0,0 +1,179 @@ +import type { Api, Model } from "@singularity-forge/pi-ai"; +import type { ModelFailureRecord } from "./auto/session.js"; +import { resolveModelId } from "./auto-model-selection.js"; +import type { ResolvedModelConfig } from "./preferences.js"; + +export interface ModelRouteRef { + provider: string; + id: string; +} + +export interface NextModelRouteResult { + model: Model; + route: string; + source: "configured" | "available"; +} + +/** + * Build the stable identity key for a concrete provider route. + * + * Purpose: make fallback recovery compare full provider/model routes instead of + * ambiguous bare model ids. + * + * Consumer: resolveNextConfiguredModelRoute() when skipping failed and current + * runtime routes. + */ +export function modelRouteKey(route: ModelRouteRef): string { + return `${route.provider.toLowerCase()}/${route.id.toLowerCase()}`; +} + +function dedupeConfiguredRoutes(modelConfig: ResolvedModelConfig): string[] { + const seen = new Set(); + const routes: string[] = []; + for (const route of [modelConfig.primary, ...modelConfig.fallbacks]) { + const key = route.toLowerCase(); + if (seen.has(key)) continue; + seen.add(key); + routes.push(route); + } + return routes; +} + +/** + * Resolve the next configured model route after a provider/model failure. + * + * Purpose: keep auto-mode recovery inside the user's explicit primary/fallback + * chain, skip routes already failed for this unit, and avoid returning the same + * provider/model again. + * + * Consumer: bootstrap/agent-end-recovery.ts when a provider returns quota, + * rate-limit, server, stream, or connection failures during a unit. + */ +export function resolveNextConfiguredModelRoute(args: { + current: ModelRouteRef | undefined; + modelConfig: ResolvedModelConfig; + availableModels: Model[]; + failedRoutes: readonly ModelFailureRecord[]; + isBlocked?: (model: Model) => boolean; +}): NextModelRouteResult | undefined { + const routes = dedupeConfiguredRoutes(args.modelConfig); + const currentKey = args.current ? modelRouteKey(args.current) : undefined; + const failedKeys = new Set( + args.failedRoutes.map((failure) => + modelRouteKey({ provider: failure.provider, id: failure.modelId }), + ), + ); + + const resolvedRoutes = routes.map((configuredRoute) => ({ + configuredRoute, + model: resolveModelId( + configuredRoute, + args.availableModels, + args.current?.provider, + ) as Model | undefined, + })); + + const currentIndex = + currentKey === undefined + ? -1 + : resolvedRoutes.findIndex( + (route) => route.model && modelRouteKey(route.model) === currentKey, + ); + const candidates = + currentIndex >= 0 ? resolvedRoutes.slice(currentIndex + 1) : resolvedRoutes; + + for (const candidate of candidates) { + if (!candidate.model) continue; + const candidateKey = modelRouteKey(candidate.model); + if (candidateKey === currentKey) continue; + if (failedKeys.has(candidateKey)) continue; + if (args.isBlocked?.(candidate.model)) continue; + return { + model: candidate.model, + route: candidate.configuredRoute, + source: "configured", + }; + } + + return undefined; +} + +/** + * Resolve another currently available provider/model route when configured + * fallbacks are missing or exhausted. + * + * Purpose: keep auto-mode moving on quota/rate-limit/server failures instead + * of pausing just because the configured fallback chain did not cover every + * live provider route. + * + * Consumer: bootstrap/agent-end-recovery.ts after configured fallback lookup + * fails for a model-route failure. + */ +export function resolveNextAvailableModelRoute(args: { + current: ModelRouteRef | undefined; + availableModels: Model[]; + failedRoutes: readonly ModelFailureRecord[]; + isBlocked?: (model: Model) => boolean; +}): NextModelRouteResult | undefined { + const currentKey = args.current ? modelRouteKey(args.current) : undefined; + const failedKeys = new Set( + args.failedRoutes.map((failure) => + modelRouteKey({ provider: failure.provider, id: failure.modelId }), + ), + ); + const candidates = args.availableModels.filter((model) => { + const key = modelRouteKey(model); + if (key === currentKey) return false; + if (failedKeys.has(key)) return false; + if (args.isBlocked?.(model)) return false; + return true; + }); + if (candidates.length === 0) return undefined; + + const differentProvider = + args.current && + candidates.find( + (model) => + model.provider.toLowerCase() !== args.current!.provider.toLowerCase(), + ); + const model = differentProvider ?? candidates[0]; + return { + model, + route: `${model.provider}/${model.id}`, + source: "available", + }; +} + +/** + * Resolve the next model route by trying configured policy first, then any + * other live route. + * + * Purpose: preserve configured fallback ordering when it exists while still + * enforcing the no-pause contract for transient provider/model failures. + * + * Consumer: bootstrap/agent-end-recovery.ts during provider-route recovery. + */ +export function resolveNextModelRoute(args: { + current: ModelRouteRef | undefined; + modelConfig: ResolvedModelConfig | undefined; + availableModels: Model[]; + failedRoutes: readonly ModelFailureRecord[]; + isBlocked?: (model: Model) => boolean; +}): NextModelRouteResult | undefined { + if (args.modelConfig) { + const configured = resolveNextConfiguredModelRoute({ + current: args.current, + modelConfig: args.modelConfig, + availableModels: args.availableModels, + failedRoutes: args.failedRoutes, + isBlocked: args.isBlocked, + }); + if (configured) return configured; + } + return resolveNextAvailableModelRoute({ + current: args.current, + availableModels: args.availableModels, + failedRoutes: args.failedRoutes, + isBlocked: args.isBlocked, + }); +} diff --git a/src/resources/extensions/sf/self-feedback-drain.ts b/src/resources/extensions/sf/self-feedback-drain.ts index 323d5f5d3..27843c750 100644 --- a/src/resources/extensions/sf/self-feedback-drain.ts +++ b/src/resources/extensions/sf/self-feedback-drain.ts @@ -21,6 +21,7 @@ import type { ExtensionAPI, ExtensionContext, } from "@singularity-forge/pi-coding-agent"; +import { getErrorMessage } from "./error-utils.js"; import { sfRuntimeRoot } from "./paths.js"; import type { PersistedSelfFeedbackEntry } from "./self-feedback.js"; import { @@ -33,6 +34,7 @@ const CLAIM_TTL_MS = 30 * 60 * 1000; interface InlineFixClaim { ids: string[]; dispatchedAt: string; + lastDispatchError?: string; } function claimPath(basePath: string): string { @@ -63,6 +65,28 @@ function writeClaim(basePath: string, ids: string[]): void { ); } +function writeFailedClaim( + basePath: string, + ids: string[], + error: string, +): void { + const path = claimPath(basePath); + mkdirSync(dirname(path), { recursive: true }); + writeFileSync( + path, + JSON.stringify( + { + ids, + dispatchedAt: new Date(Date.now() - CLAIM_TTL_MS - 1).toISOString(), + lastDispatchError: error, + }, + null, + 2, + ), + "utf-8", + ); +} + function clearClaim(basePath: string): void { try { unlinkSync(claimPath(basePath)); @@ -147,10 +171,10 @@ function buildInlineFixPrompt(entries: PersistedSelfFeedbackEntry[]): string { ) .join("\n\n"); - return [ - "You are executing SF self-feedback inline-fix mode.", - "", - "These high/critical self-feedback entries are unresolved sf defects. Do not only triage them; repair the current codebase directly.", + return [ + "You are executing SF self-feedback inline-fix mode.", + "", + "These high/critical self-feedback entries are unresolved sf defects. Do not only triage them; repair the current codebase directly.", "", rendered, "", @@ -159,8 +183,9 @@ function buildInlineFixPrompt(entries: PersistedSelfFeedbackEntry[]): string { "2. Fix the smallest coherent set of code/docs/tests needed to satisfy the acceptance criteria.", "3. Run focused verification and typecheck for touched areas.", "4. Commit the fix with a conventional commit message.", - "5. Mark the repaired entries resolved in `.sf/self-feedback.jsonl` with agent-fix evidence and the commit SHA.", - "6. If an entry is already fixed, mark it resolved with agent-fix evidence and explain the verification.", + "5. Call `sf_self_feedback_resolve` for each repaired entry with agent-fix evidence and the commit SHA.", + "6. If an entry is already fixed, verify it and call `sf_self_feedback_resolve` with the verification evidence.", + "7. Do not hand-edit `.sf/self-feedback.jsonl`; use the resolver tool so markdown, JSONL, and reload detection stay consistent.", "", "When done, say: Self-feedback inline fix complete.", ].join("\n"); @@ -195,17 +220,25 @@ export function dispatchSelfFeedbackInlineFixIfNeeded( writeClaim(basePath, ids); const prompt = buildInlineFixPrompt(candidates); ctx.ui.notify( - `Dispatching self-feedback inline fix for ${ids.length} high/critical entr${ids.length === 1 ? "y" : "ies"}.`, + `Queueing self-feedback inline fix for ${ids.length} high/critical entr${ids.length === 1 ? "y" : "ies"}.`, "warning", ); - pi.sendMessage( + const dispatch = pi.sendMessage( { customType: "sf-self-feedback-inline-fix", content: prompt, display: false, }, - { triggerTurn: true }, + { triggerTurn: true, deliverAs: "followUp" }, ); + void Promise.resolve(dispatch).catch((error) => { + const message = getErrorMessage(error); + writeFailedClaim(basePath, ids, message); + ctx.ui.notify( + `Self-feedback inline fix dispatch failed; will retry at the next idle point: ${message}`, + "warning", + ); + }); return candidates.length; } diff --git a/src/resources/extensions/sf/tests/commands-harness.test.ts b/src/resources/extensions/sf/tests/commands-harness.test.ts new file mode 100644 index 000000000..6c3cf6309 --- /dev/null +++ b/src/resources/extensions/sf/tests/commands-harness.test.ts @@ -0,0 +1,171 @@ +import assert from "node:assert/strict"; +import { execFileSync } from "node:child_process"; +import { + appendFileSync, + existsSync, + mkdirSync, + mkdtempSync, + readFileSync, + realpathSync, + rmSync, + symlinkSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, test } from "vitest"; + +import { handleHarness } from "../commands-harness.ts"; +import { profileRepository } from "../repo-profiler.ts"; +import { + closeDatabase, + getRepoFileObservations, + openDatabase, + recordRepoProfile, +} from "../sf-db.ts"; + +const originalCwd = process.cwd(); +const originalProjectRoot = process.env.SF_PROJECT_ROOT; +let roots: string[] = []; + +afterEach(() => { + process.chdir(originalCwd); + closeDatabase(); + for (const root of roots) rmSync(root, { recursive: true, force: true }); + roots = []; + if (originalProjectRoot === undefined) delete process.env.SF_PROJECT_ROOT; + else process.env.SF_PROJECT_ROOT = originalProjectRoot; +}); + +function runGit(args: string[], cwd: string): string { + return execFileSync("git", args, { + cwd, + stdio: ["ignore", "pipe", "pipe"], + encoding: "utf-8", + }).trim(); +} + +function makeRepo(prefix: string): string { + const repo = realpathSync(mkdtempSync(join(tmpdir(), prefix))); + roots.push(repo); + runGit(["init", "-b", "main"], repo); + runGit(["config", "user.email", "test@example.com"], repo); + runGit(["config", "user.name", "SF Test"], repo); + writeFileSync(join(repo, "README.md"), "# Repo\n", "utf8"); + writeFileSync( + join(repo, "package.json"), + '{"scripts":{"test":"node --test"}}\n', + "utf8", + ); + runGit(["add", "README.md", "package.json"], repo); + runGit(["commit", "-m", "init"], repo); + return repo; +} + +function makeExternalSfState(repo: string): string { + const externalState = realpathSync(mkdtempSync(join(tmpdir(), "sf-state-"))); + roots.push(externalState); + symlinkSync(externalState, join(repo, ".sf"), "junction"); + appendFileSync(join(repo, ".git", "info", "exclude"), "\n.sf\n", "utf8"); + return externalState; +} + +function makeMockCtx(): { + notifications: Array<{ message: string; level?: string }>; + ui: { notify(message: string, level?: string): void }; +} { + const notifications: Array<{ message: string; level?: string }> = []; + return { + notifications, + ui: { + notify(message: string, level?: string) { + notifications.push({ message, level }); + }, + }, + }; +} + +test("harnessPromote_when_sf_is_external_symlink_writes_tracked_docs_not_runtime_target", async () => { + const repo = makeRepo("sf-harness-promote-"); + const externalState = makeExternalSfState(repo); + mkdirSync(join(repo, "notes"), { recursive: true }); + writeFileSync(join(repo, "notes", "local-finding.md"), "# Finding\n", "utf8"); + + closeDatabase(); + assert.equal(openDatabase(join(repo, ".sf", "sf.db")), true); + recordRepoProfile( + profileRepository(repo, { + now: () => "2026-05-02T10:00:00.000Z", + }), + ); + closeDatabase(); + + delete process.env.SF_PROJECT_ROOT; + process.chdir(repo); + const ctx = makeMockCtx(); + + await handleHarness("promote sf-moocr4rv-au7r3l", ctx as any); + + const relativeArtifact = + "docs/exec-plans/active/harness-promotion-sf-moocr4rv-au7r3l.md"; + const artifact = join(repo, relativeArtifact); + assert.ok(existsSync(artifact), "promotion writes a repo docs artifact"); + assert.ok( + !existsSync(join(externalState, relativeArtifact)), + "promotion must not write into the external .sf symlink target", + ); + assert.equal( + runGit(["status", "--short", "--", relativeArtifact], repo), + `?? ${relativeArtifact}`, + "promoted docs artifact is visible to git as repo output", + ); + + const firstContent = readFileSync(artifact, "utf8"); + await handleHarness("promote sf-moocr4rv-au7r3l", ctx as any); + assert.equal( + readFileSync(artifact, "utf8"), + firstContent, + "promotion content is deterministic for the same recorded profile", + ); + assert.doesNotMatch(firstContent, /Promoted at:/); + assert.match( + firstContent, + /Unpromoted \.sf runtime observations remain `observed_only`/, + ); + assert.match(firstContent, /"ownership": "observed_only"/); + assert.match( + firstContent, + new RegExp(`Repo artifact: \`${relativeArtifact}\``), + ); + assert.match( + ctx.notifications.at(-1)?.message ?? "", + /Unpromoted \.sf runtime state remains observed_only/, + ); +}); + +test("harnessProfile_when_recording_runtime_state_reports_no_repo_artifact", async () => { + const repo = makeRepo("sf-harness-profile-"); + makeExternalSfState(repo); + mkdirSync(join(repo, "notes"), { recursive: true }); + writeFileSync(join(repo, "notes", "scratch.md"), "# Scratch\n", "utf8"); + + delete process.env.SF_PROJECT_ROOT; + process.chdir(repo); + const ctx = makeMockCtx(); + + await handleHarness("profile", ctx as any); + + const observations = getRepoFileObservations(); + const scratch = observations.find((obs) => obs.path === "notes/scratch.md"); + assert.equal(scratch?.ownership, "observed_only"); + assert.ok( + !existsSync(join(repo, "docs", "exec-plans", "active")), + "profile does not create repo-committable docs output", + ); + + const notice = ctx.notifications[0]?.message ?? ""; + assert.match(notice, /Runtime observation boundary:/); + assert.match(notice, /No repo-committable artifact was written/); + assert.match(notice, /\/sf harness promote /); + assert.doesNotMatch(notice, /tracked documentation artifact created/); +}); diff --git a/src/resources/extensions/sf/tests/error-classifier-quota-reset.test.ts b/src/resources/extensions/sf/tests/error-classifier-quota-reset.test.ts new file mode 100644 index 000000000..ad3ea800d --- /dev/null +++ b/src/resources/extensions/sf/tests/error-classifier-quota-reset.test.ts @@ -0,0 +1,15 @@ +import assert from "node:assert/strict"; +import { test } from "vitest"; + +import { classifyError } from "../error-classifier.ts"; + +test("quota_reset_after_seconds_is_rate_limit_with_retry_delay", () => { + const result = classifyError( + "You have exhausted your capacity on this model. Your quota will reset after 33s.", + ); + + assert.equal(result.kind, "rate-limit"); + if (result.kind === "rate-limit") { + assert.equal(result.retryAfterMs, 33_000); + } +}); diff --git a/src/resources/extensions/sf/tests/flow-audit.test.ts b/src/resources/extensions/sf/tests/flow-audit.test.ts new file mode 100644 index 000000000..2a2f70223 --- /dev/null +++ b/src/resources/extensions/sf/tests/flow-audit.test.ts @@ -0,0 +1,255 @@ +import assert from "node:assert/strict"; +import { + mkdirSync, + mkdtempSync, + readFileSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, describe, test } from "vitest"; +import { runFlowAudit } from "../doctor.ts"; +import { readAllSelfFeedback, recordSelfFeedback } from "../self-feedback.ts"; + +const roots: string[] = []; + +afterEach(() => { + for (const root of roots) rmSync(root, { recursive: true, force: true }); + roots.length = 0; +}); + +function makeForgeProject(): string { + const root = mkdtempSync(join(tmpdir(), "sf-flow-audit-")); + roots.push(root); + mkdirSync(join(root, ".sf"), { recursive: true }); + writeFileSync( + join(root, "package.json"), + JSON.stringify({ name: "singularity-forge", version: "0.0.1" }), + "utf-8", + ); + return root; +} + +function writeM007LoopFixture(root: string, nowMs: number): void { + const sf = join(root, ".sf"); + const unitId = "M007/S01/T10"; + const startedAt = nowMs - 45 * 60 * 1000; + const lastProgressAt = nowMs - 31 * 60 * 1000; + const sliceDir = join(sf, "milestones", "M007", "slices", "S01"); + const tasksDir = join(sliceDir, "tasks"); + const unitsDir = join(sf, "runtime", "units"); + mkdirSync(tasksDir, { recursive: true }); + mkdirSync(unitsDir, { recursive: true }); + + writeFileSync( + join(sf, "auto.lock"), + JSON.stringify( + { + pid: 5000, + unitType: "execute-task", + unitId, + phase: "dispatched", + startedAt: new Date(startedAt).toISOString(), + sessionId: "sess-m007", + sessionFile: "/tmp/sessions/m007.jsonl", + }, + null, + 2, + ), + "utf-8", + ); + writeFileSync( + join(unitsDir, "execute-task-M007-S01-T10.json"), + JSON.stringify( + { + version: 1, + unitType: "execute-task", + unitId, + startedAt, + updatedAt: lastProgressAt, + phase: "dispatched", + wrapupWarningSent: false, + continueHereFired: false, + timeoutAt: null, + lastProgressAt, + progressCount: 0, + lastProgressKind: "dispatch", + runawayGuardPause: { + reason: "Runaway guard paused execute-task M007/S01/T09", + pausedAt: lastProgressAt - 60_000, + unitType: "execute-task", + unitId: "M007/S01/T09", + diagnosticTurns: 2, + warningsSent: 2, + thresholdReasons: ["budget kept growing"], + metrics: { + toolCalls: 90, + sessionTokens: 1_200_000, + elapsedMs: 2_000_000, + changedFiles: 0, + worktreeChangedSinceStart: false, + topTools: { read: 80, bash: 10 }, + }, + thresholds: { + toolCallWarning: 60, + tokenWarning: 1_000_000, + elapsedMs: 1_200_000, + changedFilesWarning: 75, + minIntervalMs: 120_000, + }, + }, + }, + null, + 2, + ), + "utf-8", + ); + + const taskLines: string[] = []; + for (let i = 1; i <= 10; i++) { + const id = `T${String(i).padStart(2, "0")}`; + taskLines.push( + `- [${i < 10 ? "x" : " "}] **${id}: Task ${i}** \`est:10m\``, + ); + if (i < 10) { + writeFileSync( + join(tasksDir, `${id}-SUMMARY.md`), + `# ${id} summary\n\nDone.\n`, + "utf-8", + ); + } + } + writeFileSync( + join(sliceDir, "S01-PLAN.md"), + `# S01: Loop Evidence\n\n## Tasks\n\n${taskLines.join("\n")}\n`, + "utf-8", + ); + writeFileSync( + join(sf, "notifications.jsonl"), + JSON.stringify({ + severity: "error", + message: "session creation failed before final summary", + }) + "\n", + "utf-8", + ); + + for (const task of ["T08", "T09", "T10"]) { + recordSelfFeedback( + { + kind: "runaway-guard-hard-pause", + severity: "medium", + summary: `Runaway guard paused execute-task M007/S01/${task}`, + evidence: `${task} had no final closure`, + source: "detector", + occurredIn: { + milestone: "M007", + slice: "S01", + task, + unitType: "execute-task", + }, + }, + root, + ); + } +} + +describe("flow audit", () => { + test("audit_when_m007_loop_evidence_exists_reports_actionable_stale_flow", async () => { + const root = makeForgeProject(); + const nowMs = Date.UTC(2026, 4, 2, 13, 45, 0); + writeM007LoopFixture(root, nowMs); + + const result = await runFlowAudit(root, { + nowMs, + psOutput: + "5000 1 2700 node dist/loader.js sf headless auto\n" + + "5100 5000 2400 sift search --json --strategy page-index-hybrid warmup\n" + + "5200 5000 120 node dist/loader.js sf tool-session\n", + staleProgressMs: 20 * 60 * 1000, + optionalChildBudgetMs: 30 * 60 * 1000, + }); + + assert.equal(result.ok, false); + assert.equal(result.activeMilestone?.id, "M007"); + assert.equal(result.activeUnit?.unitId, "M007/S01/T10"); + assert.equal(result.activeUnit?.progressAgeMs, 31 * 60 * 1000); + assert.equal(result.sessionPointer?.sessionId, "sess-m007"); + assert.equal( + result.sessionPointer?.sessionFile, + "/tmp/sessions/m007.jsonl", + ); + assert.equal(result.staleDispatchedUnits.length, 1); + assert.match(result.warnings.join("\n"), /no progress for 31 minutes/); + assert.deepEqual(result.loopEvidence?.completedPriorTasks.slice(-2), [ + "T08", + "T09", + ]); + assert.match(result.loopEvidence?.missingSummaries.join("\n") ?? "", /T10/); + assert.match(result.lastErrors.join("\n"), /session creation failed/); + assert.match(result.runawayHistory.join("\n"), /M007\/S01\/T09/); + assert.match(result.recommendedAction, /Inspect session/); + + const warmup = result.childProcesses.find((p) => p.pid === 5100); + assert.ok(warmup, "warmup child should be reported"); + assert.equal(warmup.classification, "warmup"); + assert.equal(warmup.nonBlocking, true); + assert.equal(warmup.overBudget, true); + assert.equal(warmup.action, "non-blocking"); + + const active = result.childProcesses.find((p) => p.pid === 5200); + assert.ok(active, "active tool child should be reported"); + assert.equal(active.classification, "active-session"); + assert.equal(active.nonBlocking, false); + + const entries = readAllSelfFeedback(root); + const rollups = entries.filter( + (e) => + e.kind === "flow-audit:repeated-milestone-failure" && !e.resolvedAt, + ); + assert.equal(rollups.length, 1); + assert.equal(rollups[0]?.severity, "high"); + assert.match(rollups[0]?.summary ?? "", /M007/); + assert.match(rollups[0]?.acceptanceCriteria ?? "", /stale dispatched unit/); + + await runFlowAudit(root, { nowMs, psOutput: "" }); + assert.equal( + readAllSelfFeedback(root).filter( + (e) => e.kind === "flow-audit:repeated-milestone-failure", + ).length, + 1, + "same milestone rollup stays single while open", + ); + }); + + test("audit_when_optional_child_is_over_budget_can_kill_it_explicitly", async () => { + const root = makeForgeProject(); + const killed: number[] = []; + const result = await runFlowAudit(root, { + nowMs: Date.UTC(2026, 4, 2, 13, 45, 0), + psOutput: + "5100 5000 2400 sift search --json --strategy page-index-hybrid warmup\n", + optionalChildBudgetMs: 60_000, + killOverBudgetChildren: true, + killProcess: (pid) => { + killed.push(pid); + }, + }); + + assert.deepEqual(killed, [5100]); + assert.equal(result.childProcesses[0]?.classification, "warmup"); + assert.equal(result.childProcesses[0]?.action, "kill"); + assert.equal(result.childProcesses[0]?.killed, true); + }); + + test("session_start_when_registered_runs_flow_auditor", () => { + const source = readFileSync( + join(import.meta.dirname, "..", "bootstrap", "register-hooks.ts"), + "utf-8", + ); + + assert.match(source, /pi\.on\("session_start"/); + assert.match(source, /runFlowAudit/); + assert.match(source, /Flow audit:/); + }); +}); diff --git a/src/resources/extensions/sf/tests/model-route-failure.test.ts b/src/resources/extensions/sf/tests/model-route-failure.test.ts new file mode 100644 index 000000000..28ca39e88 --- /dev/null +++ b/src/resources/extensions/sf/tests/model-route-failure.test.ts @@ -0,0 +1,155 @@ +import assert from "node:assert/strict"; +import { describe, test } from "vitest"; + +import { + modelRouteKey, + resolveNextAvailableModelRoute, + resolveNextConfiguredModelRoute, + resolveNextModelRoute, +} from "../model-route-failure.ts"; + +const models = [ + { provider: "google-gemini-cli", id: "gemini-3-flash-preview" }, + { provider: "google", id: "gemini-3-flash-preview" }, + { provider: "anthropic", id: "claude-sonnet-4-6" }, + { provider: "zai", id: "glm-5.1" }, +] as any[]; + +describe("configured model route failure recovery", () => { + test("quota_when_current_route_fails_returns_next_configured_fallback", () => { + const next = resolveNextConfiguredModelRoute({ + current: { + provider: "google-gemini-cli", + id: "gemini-3-flash-preview", + }, + modelConfig: { + primary: "google-gemini-cli/gemini-3-flash-preview", + fallbacks: ["anthropic/claude-sonnet-4-6", "zai/glm-5.1"], + }, + availableModels: models, + failedRoutes: [ + { + unitType: "execute-task", + unitId: "M001/S01/T01", + provider: "google-gemini-cli", + modelId: "gemini-3-flash-preview", + reason: "quota reset after 33s", + timestamp: 1, + }, + ], + }); + + assert.equal(next?.model.provider, "anthropic"); + assert.equal(next?.model.id, "claude-sonnet-4-6"); + }); + + test("current_model_not_in_config_starts_at_configured_primary", () => { + const next = resolveNextConfiguredModelRoute({ + current: { provider: "google-gemini-cli", id: "gemini-3-flash-preview" }, + modelConfig: { + primary: "anthropic/claude-sonnet-4-6", + fallbacks: ["zai/glm-5.1"], + }, + availableModels: models, + failedRoutes: [], + }); + + assert.equal(next?.model.provider, "anthropic"); + assert.equal(next?.model.id, "claude-sonnet-4-6"); + }); + + test("exhausted_chain_returns_undefined", () => { + const next = resolveNextConfiguredModelRoute({ + current: { provider: "zai", id: "glm-5.1" }, + modelConfig: { + primary: "anthropic/claude-sonnet-4-6", + fallbacks: ["zai/glm-5.1"], + }, + availableModels: models, + failedRoutes: [ + { + unitType: "execute-task", + unitId: "M001/S01/T01", + provider: "zai", + modelId: "glm-5.1", + reason: "server overloaded", + timestamp: 1, + }, + ], + }); + + assert.equal(next, undefined); + }); + + test("exhausted_configured_chain_uses_available_route_before_pause", () => { + const next = resolveNextModelRoute({ + current: { provider: "zai", id: "glm-5.1" }, + modelConfig: { + primary: "anthropic/claude-sonnet-4-6", + fallbacks: ["zai/glm-5.1"], + }, + availableModels: models, + failedRoutes: [ + { + unitType: "execute-task", + unitId: "M001/S01/T01", + provider: "zai", + modelId: "glm-5.1", + reason: "server overloaded", + timestamp: 1, + }, + ], + }); + + assert.equal(next?.source, "available"); + assert.equal(next?.model.provider, "google-gemini-cli"); + assert.equal(next?.model.id, "gemini-3-flash-preview"); + }); + + test("missing_config_uses_available_route_and_prefers_different_provider", () => { + const next = resolveNextAvailableModelRoute({ + current: { provider: "google-gemini-cli", id: "gemini-3-flash-preview" }, + availableModels: models, + failedRoutes: [ + { + unitType: "execute-task", + unitId: "M001/S01/T01", + provider: "google-gemini-cli", + modelId: "gemini-3-flash-preview", + reason: "quota", + timestamp: 1, + }, + ], + }); + + assert.equal(next?.source, "available"); + assert.notEqual(next?.model.provider, "google-gemini-cli"); + assert.notEqual( + modelRouteKey(next!.model), + "google-gemini-cli/gemini-3-flash-preview", + ); + }); + + test("provider_model_identity_skips_only_the_failed_route", () => { + const next = resolveNextConfiguredModelRoute({ + current: { provider: "google-gemini-cli", id: "gemini-3-flash-preview" }, + modelConfig: { + primary: "google-gemini-cli/gemini-3-flash-preview", + fallbacks: ["google/gemini-3-flash-preview"], + }, + availableModels: models, + failedRoutes: [ + { + unitType: "execute-task", + unitId: "M001/S01/T01", + provider: "google-gemini-cli", + modelId: "gemini-3-flash-preview", + reason: "quota", + timestamp: 1, + }, + ], + }); + + assert.equal(modelRouteKey(next!.model), "google/gemini-3-flash-preview"); + }); +}); diff --git a/src/resources/extensions/sf/tests/provider-errors.test.ts b/src/resources/extensions/sf/tests/provider-errors.test.ts index d81d6b2c1..3adaed254 100644 --- a/src/resources/extensions/sf/tests/provider-errors.test.ts +++ b/src/resources/extensions/sf/tests/provider-errors.test.ts @@ -8,8 +8,8 @@ import assert from "node:assert/strict"; import { readFileSync } from "node:fs"; import { dirname, join } from "node:path"; -import { test } from 'vitest'; import { fileURLToPath } from "node:url"; +import { test } from "vitest"; import { resumeAutoAfterProviderDelay } from "../bootstrap/provider-error-resume.ts"; import { classifyError, @@ -388,22 +388,18 @@ test("resumeAutoAfterProviderDelay restarts paused auto-mode from the recorded b ui: { notify() {} }, newSession: async () => ({ cancelled: false }), } as any; - const result = await resumeAutoAfterProviderDelay( - {} as any, - commandCtx, - { - getSnapshot: () => ({ - active: false, - paused: true, - stepMode: true, - basePath: "/tmp/project", - }), - resetTransientRetryState: () => {}, - startAuto: async (_ctx, _pi, base, verboseMode, options) => { - startCalls.push({ base, verboseMode, step: options?.step }); - }, + const result = await resumeAutoAfterProviderDelay({} as any, commandCtx, { + getSnapshot: () => ({ + active: false, + paused: true, + stepMode: true, + basePath: "/tmp/project", + }), + resetTransientRetryState: () => {}, + startAuto: async (_ctx, _pi, base, verboseMode, options) => { + startCalls.push({ base, verboseMode, step: options?.step }); }, - ); + }); assert.equal(result, "resumed"); assert.deepEqual(startCalls, [ @@ -545,21 +541,21 @@ test("resumeAutoAfterProviderDelay leaves paused when no command context is avai ]); }); -// ── Escalating backoff for transient errors (#1166) ───────────────────────── +// ── Configured model-route recovery for provider failures ─────────────────── -test("agent-end-recovery.ts tracks consecutive transient errors for escalating backoff", () => { +test("agent-end-recovery.ts records failed provider routes for configured fallback", () => { const src = readFileSync( join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"), "utf-8", ); assert.ok( - src.includes("consecutiveTransientCount"), - "agent-end-recovery.ts must track consecutiveTransientCount for escalating backoff (#1166)", + src.includes("recordCurrentModelFailure"), + "agent-end-recovery.ts must record failed provider/model routes before resolving fallbacks", ); assert.ok( - src.includes("MAX_TRANSIENT_AUTO_RESUMES"), - "agent-end-recovery.ts must define MAX_TRANSIENT_AUTO_RESUMES to cap infinite retries (#1166)", + src.includes("getCurrentUnitModelFailures"), + "agent-end-recovery.ts must skip routes already failed for the current unit", ); }); @@ -576,34 +572,35 @@ test("agent-end-recovery.ts resets retry state before resolveAgentEnd on success ); }); -test("agent-end-recovery.ts applies escalating delay for repeated transient errors", () => { +test("agent-end-recovery.ts does not sleep or same-route retry model-route failures", () => { const src = readFileSync( join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"), "utf-8", ); - // Must contain the exponential backoff formula (may span multiple lines) assert.ok( - src.includes("2 ** Math.max(0, retryState.consecutiveTransientCount"), - "agent-end-recovery.ts must escalate retryAfterMs exponentially for consecutive transient errors (#1166)", + !src.includes("pauseTransientWithBackoff"), + "model-route failures must not enter same-model transient backoff", + ); + assert.ok( + !src.includes("resumeAutoAfterProviderDelay"), + "model-route failures must not schedule same-model auto-resume", ); }); -test("agent-end-recovery.ts resumes transient provider pauses through startAuto instead of a hidden prompt", () => { +test("agent-end-recovery.ts sends hidden continue after any successful fallback switch", () => { const src = readFileSync( join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"), "utf-8", ); assert.ok( - src.includes("resumeAutoAfterProviderDelay"), - "agent-end-recovery.ts must resume paused auto-mode through resumeAutoAfterProviderDelay (#2813)", + src.includes('customType: "sf-auto-timeout-recovery"'), + "successful fallback switches should continue the active unit with a hidden message", ); assert.ok( - !src.includes( - "Continue execution — provider error recovery delay elapsed.", - ), - "transient provider resume must not rely on a hidden continue prompt (#2813)", + src.includes("configured fallback") && src.includes("available fallback"), + "hidden continue must be tied to a successful model switch, whether configured or available", ); }); @@ -613,8 +610,9 @@ test("agent-end-recovery.ts does not defer rate-limit errors to core retry handl "utf-8", ); assert.ok( - src.includes('if (isTransient(cls) && cls.kind !== "rate-limit")'), - "rate-limit errors must bypass transient core-retry deferral so fallback can execute (#4373)", + src.includes("isModelRouteFailure(cls)") && + src.includes('cls.kind === "rate-limit"'), + "rate-limit errors must enter model-route recovery before pausing (#4373)", ); }); @@ -624,8 +622,8 @@ test("agent-end-recovery.ts updates dashboard dispatched model after fallback sw "utf-8", ); assert.ok( - src.includes("setCurrentDispatchedModelId"), - "agent-end-recovery.ts should update currentDispatchedModelId when recovery switches model", + src.includes("setCurrentUnitModel"), + "agent-end-recovery.ts should update current unit/dashboard model state when recovery switches model", ); }); @@ -704,19 +702,17 @@ test("phases.ts handles timeout session-creation failures with pause instead of ); }); -// ── Fix 3: MAX_TRANSIENT_AUTO_RESUMES raised to 8 ─────────────────────────── +// ── Fix 3: same-route transient retry cap removed for route failures ──────── -test("MAX_TRANSIENT_AUTO_RESUMES is at least 8 for sustained overload resilience", () => { +test("agent-end-recovery.ts does not keep a same-route transient resume cap", () => { const src = readFileSync( join(__dirname, "..", "bootstrap", "agent-end-recovery.ts"), "utf-8", ); - const match = src.match(/MAX_TRANSIENT_AUTO_RESUMES\s*=\s*(\d+)/); - assert.ok(match, "MAX_TRANSIENT_AUTO_RESUMES must be defined"); - const value = Number(match![1]); + assert.ok( - value >= 8, - `MAX_TRANSIENT_AUTO_RESUMES must be >= 8 for sustained overload resilience, got ${value}`, + !src.includes("MAX_TRANSIENT_AUTO_RESUMES"), + "provider route failures should switch explicit routes or pause, not count same-route resumes", ); }); diff --git a/src/resources/extensions/sf/tests/rate-limit-model-fallback.test.ts b/src/resources/extensions/sf/tests/rate-limit-model-fallback.test.ts index fa63f7324..7730d1890 100644 --- a/src/resources/extensions/sf/tests/rate-limit-model-fallback.test.ts +++ b/src/resources/extensions/sf/tests/rate-limit-model-fallback.test.ts @@ -1,15 +1,16 @@ /** * rate-limit-model-fallback.test.ts — Regression test for #2770. * - * Rate-limit errors enter the model fallback path before falling through - * to pause. This verifies the structural contract in agent-end-recovery.ts. + * Rate-limit errors enter model-route fallback before pausing. + * Recovery must switch to configured fallbacks first, then any other available + * route before pausing. */ import assert from "node:assert/strict"; import { readFileSync } from "node:fs"; import { dirname, join } from "node:path"; -import { test } from 'vitest'; import { fileURLToPath } from "node:url"; +import { test } from "vitest"; const __dirname = dirname(fileURLToPath(import.meta.url)); const RECOVERY_PATH = join( @@ -28,16 +29,10 @@ function getRecoverySource(): string { test("rate-limit errors enter the model fallback branch alongside other transient errors", () => { const src = getRecoverySource(); - // The condition that gates model fallback must include rate-limit. - // Match the if-condition that contains both "rate-limit" and fallback-related kinds. - const fallbackConditionRe = - /if\s*\([^)]*cls\.kind\s*===\s*"rate-limit"[^)]*cls\.kind\s*===\s*"network"/; - const fallbackConditionReAlt = - /if\s*\([^)]*cls\.kind\s*===\s*"network"[^)]*cls\.kind\s*===\s*"rate-limit"/; - assert.ok( - fallbackConditionRe.test(src) || fallbackConditionReAlt.test(src), - "rate-limit must appear in the same if-condition as network/server for model fallback (#2770)", + src.includes('cls.kind === "rate-limit"') && + src.includes("isModelRouteFailure(cls)"), + "rate-limit must enter the configured model-route failure path (#2770)", ); }); @@ -54,23 +49,50 @@ test("rate-limit errors are NOT short-circuited to pause before model fallback", ); }); -test("rate-limit errors fall through to pause if no fallback model is available", () => { +test("model fallback uses configured routes first then automatic available routes", () => { const src = getRecoverySource(); - // After the fallback block, the transient fallback pause must still fire for rate-limit. - // The isTransient check covers rate-limit (verified by error-classifier tests). - // Verify pauseTransientWithBackoff is called with isRateLimit derived from cls.kind. assert.ok( - src.includes('cls.kind === "rate-limit"'), - 'agent-end-recovery.ts must reference cls.kind === "rate-limit" for fallback and pause paths (#2770)', + src.includes("resolveNextModelRoute"), + "agent-end-recovery.ts must route through the configured-or-available route helper", ); - - // The transient fallback pause must pass the isRateLimit flag correctly. - const pauseCallRe = - /pauseTransientWithBackoff\([^)]*cls\.kind\s*===\s*"rate-limit"/; assert.ok( - pauseCallRe.test(src), - 'pauseTransientWithBackoff must receive isRateLimit based on cls.kind === "rate-limit" (#2770)', + src.includes("autoBenchmark: true"), + "runtime recovery must allow benchmark-provided fallbacks when preferences do not pin the full chain", + ); + assert.ok( + !src.includes("getAutoModeStartModel"), + "runtime recovery must not restore a session/system model as an inferred fallback", + ); +}); + +test("rate-limit errors pause only when no configured_or_available fallback remains", () => { + const src = getRecoverySource(); + + assert.ok( + src.includes("available fallback"), + "exhausted configured fallback chain should try another available model before pausing", + ); + assert.ok( + src.includes("no usable fallback model remains"), + "only complete fallback exhaustion should pause with a clear provider error", + ); + assert.ok( + /isTransient:\s*false/.test(src), + "complete provider route exhaustion must not same-route auto-resume", + ); +}); + +test("setModel failure advances to the next configured fallback", () => { + const src = getRecoverySource(); + + assert.ok( + src.includes('reason: "setModel failed during provider recovery"'), + "failed fallback routes should be recorded so the next configured route can be tried", + ); + assert.ok( + /if\s*\(!ok\)\s*\{[\s\S]{0,300}continue;/.test(src), + "setModel failure should continue walking the configured fallback chain", ); }); diff --git a/src/resources/extensions/sf/tests/research-terminal-transition.test.ts b/src/resources/extensions/sf/tests/research-terminal-transition.test.ts index 10464d2e2..015949b90 100644 --- a/src/resources/extensions/sf/tests/research-terminal-transition.test.ts +++ b/src/resources/extensions/sf/tests/research-terminal-transition.test.ts @@ -4,14 +4,12 @@ import { mkdirSync, rmSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { test } from "vitest"; -import { - getAutoSession, - resetAutoSession, -} from "../auto/session.js"; +import { getAutoSession } from "../auto/session.js"; import { hasResearchTerminalTransition, markResearchTerminalTransition, } from "../auto.js"; +import { registerHooks } from "../bootstrap/register-hooks.ts"; function makeTmpBase(): string { const base = join(tmpdir(), `sf-research-terminal-${randomUUID()}`); @@ -96,6 +94,69 @@ test("research terminal transition blocks planning tools", async () => { } }); +test("post_summary_planning_tool_attempt_is_blocked_without_followup_turn", async () => { + const session = getAutoSession(); + session.reset(); + session.active = true; + session.currentUnit = { + type: "research-slice", + id: "M001/S01", + startedAt: Date.now(), + }; + + const sentMessages: unknown[] = []; + const handlers = new Map any>>(); + const pi = { + on(event: string, handler: (event: any, ctx?: any) => any) { + const existing = handlers.get(event) ?? []; + existing.push(handler); + handlers.set(event, existing); + }, + sendMessage(message: unknown) { + sentMessages.push(message); + }, + } as any; + + registerHooks(pi); + const toolResultHandlers = handlers.get("tool_result") ?? []; + const toolCallHandlers = handlers.get("tool_call") ?? []; + assert.ok(toolResultHandlers.length, "tool_result handler should register"); + assert.ok(toolCallHandlers.length, "tool_call handler should register"); + + for (const handler of toolResultHandlers) { + await handler({ + toolName: "sf_summary_save", + content: [{ type: "text", text: "Saved RESEARCH" }], + details: { + terminal_transition: true, + unit_type: "research", + }, + }); + } + assert.equal(hasResearchTerminalTransition(), true); + + const planningAttempt = { + toolName: "sf_plan_milestone", + input: {}, + }; + const results = []; + for (const handler of toolCallHandlers) { + results.push(await handler(planningAttempt)); + } + + const block = results.find((result) => result?.block === true); + assert.ok(block, "post-summary planning attempt should be blocked"); + assert.match(block.reason, /Post-artifact drift/); + assert.match(block.reason, /sf_plan_milestone/); + assert.equal( + sentMessages.length, + 0, + "blocking the tool call must not enqueue another agent turn", + ); + + session.reset(); +}); + test("research terminal transition does not block non-planning tools", () => { const session = getAutoSession(); // Reset to known state @@ -113,7 +174,7 @@ test("research terminal transition does not block non-planning tools", () => { // Non-planning tools should not be blocked by the research terminal transition // (the actual blocking logic only checks planning tools) - const nonPlanningTools = [ + const _nonPlanningTools = [ "read", "write", "edit", diff --git a/src/resources/extensions/sf/tests/self-feedback-drain.test.ts b/src/resources/extensions/sf/tests/self-feedback-drain.test.ts index 9df9bc043..08ce29258 100644 --- a/src/resources/extensions/sf/tests/self-feedback-drain.test.ts +++ b/src/resources/extensions/sf/tests/self-feedback-drain.test.ts @@ -1,6 +1,12 @@ import assert from "node:assert/strict"; import { execFileSync } from "node:child_process"; -import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { + mkdirSync, + mkdtempSync, + readFileSync, + rmSync, + writeFileSync, +} from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { afterEach, describe, it } from "vitest"; @@ -90,7 +96,7 @@ describe("self-feedback inline drain", () => { root, ); - const messages: unknown[] = []; + const messages: Array<{ message: unknown; options: unknown }> = []; const notifications: string[] = []; const ctx = { ui: { @@ -100,18 +106,72 @@ describe("self-feedback inline drain", () => { }, } as any; const pi = { + sendMessage(message: unknown, options: unknown) { + messages.push({ message, options }); + }, + } as any; + + assert.equal(dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, pi), 1); + assert.equal(dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, pi), 0); + assert.equal(messages.length, 1); + assert.equal(notifications.length, 2); + assert.match( + JSON.stringify(messages[0]?.message), + /sf-self-feedback-inline-fix/, + ); + assert.match( + JSON.stringify(messages[0]?.message), + /sf_self_feedback_resolve/, + ); + assert.deepEqual(messages[0]?.options, { + triggerTurn: true, + deliverAs: "followUp", + }); + assert.match(notifications[1], /already dispatched/); + }); + + it("dispatch_failure_expires_claim_so_next_idle_turn_can_retry", async () => { + const root = makeForgeProject(); + recordSelfFeedback( + { + kind: "startup-dispatch-race", + severity: "critical", + summary: "Startup dispatch can fail before the turn is accepted", + source: "detector", + }, + root, + ); + + const notifications: string[] = []; + const ctx = { + ui: { + notify(message: string) { + notifications.push(message); + }, + }, + } as any; + const failingPi = { + sendMessage() { + return Promise.reject(new Error("agent busy")); + }, + } as any; + assert.equal( + dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, failingPi), + 1, + ); + await Promise.resolve(); + await Promise.resolve(); + + const messages: unknown[] = []; + const retryPi = { sendMessage(message: unknown) { messages.push(message); }, } as any; - - assert.equal(dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, pi), 1); - assert.equal(dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, pi), 0); - assert.equal(messages.length, 1); - assert.equal(notifications.length, 2); - assert.match(JSON.stringify(messages[0]), /sf-self-feedback-inline-fix/); - assert.match(notifications[1], /already dispatched/); - }); + assert.equal(dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, retryPi), 1); + assert.equal(messages.length, 1); + assert.match(notifications.join("\n"), /will retry at the next idle point/); + }); it("consumes the claim after the inline-fix entries are resolved", () => { const root = makeForgeProject(); @@ -162,7 +222,11 @@ describe("self-feedback inline drain", () => { const ctx = { ui: { notify() {} } } as any; const pi = { sendMessage() {} } as any; assert.equal(dispatchSelfFeedbackInlineFixIfNeeded(root, ctx, pi), 1); - writeFileSync(join(root, "dirty.ts"), "export const dirty = true;\n", "utf-8"); + writeFileSync( + join(root, "dirty.ts"), + "export const dirty = true;\n", + "utf-8", + ); assert.equal( markResolved( recorded.entry.id, @@ -199,4 +263,20 @@ describe("self-feedback inline drain", () => { ); assert.equal(selected[0]?.repoIdentity, "external"); }); + + it("session_start_hook_queues_inline_fix_followup_not_only_warning", () => { + const source = readFileSync( + join(import.meta.dirname, "..", "bootstrap", "register-hooks.ts"), + "utf-8", + ); + const start = source.indexOf('pi.on("session_start"'); + const end = source.indexOf("return buildBeforeAgentStartResult", start); + assert.notEqual(start, -1); + assert.notEqual(end, -1); + const sessionStartBlock = source.slice(start, end); + + assert.match(sessionStartBlock, /dispatchSelfFeedbackInlineFixIfNeeded/); + assert.match(sessionStartBlock, /even outside \/sf auto/); + assert.doesNotMatch(sessionStartBlock, /no auto-dispatch/); + }); }); diff --git a/src/resources/extensions/sf/tests/self-feedback-resolve-tool.test.ts b/src/resources/extensions/sf/tests/self-feedback-resolve-tool.test.ts new file mode 100644 index 000000000..6fe44aa1a --- /dev/null +++ b/src/resources/extensions/sf/tests/self-feedback-resolve-tool.test.ts @@ -0,0 +1,106 @@ +import assert from "node:assert/strict"; +import { + mkdirSync, + mkdtempSync, + readFileSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, describe, test } from "vitest"; +import { registerDbTools } from "../bootstrap/db-tools.ts"; +import { readAllSelfFeedback, recordSelfFeedback } from "../self-feedback.ts"; + +const originalCwd = process.cwd(); +const originalSfHome = process.env.SF_HOME; +let roots: string[] = []; + +afterEach(() => { + process.chdir(originalCwd); + for (const root of roots) rmSync(root, { recursive: true, force: true }); + roots = []; + if (originalSfHome === undefined) delete process.env.SF_HOME; + else process.env.SF_HOME = originalSfHome; +}); + +function makeForgeProject(): string { + const root = mkdtempSync(join(tmpdir(), "sf-self-feedback-resolve-")); + roots.push(root); + mkdirSync(join(root, ".sf"), { recursive: true }); + process.env.SF_HOME = join(root, "sf-home"); + writeFileSync( + join(root, "package.json"), + JSON.stringify({ name: "singularity-forge", version: "0.0.1" }), + "utf-8", + ); + return root; +} + +function makeMockPi() { + const tools: any[] = []; + return { + registerTool(tool: any) { + tools.push(tool); + }, + tools, + } as any; +} + +describe("sf_self_feedback_resolve", () => { + test("resolve_when_entry_is_fixed_sets_resolved_evidence_and_regenerates_markdown", async () => { + const root = makeForgeProject(); + const recorded = recordSelfFeedback( + { + kind: "inline-fix-resolution-gap", + severity: "high", + summary: "Inline fix landed but entry stayed unresolved", + acceptanceCriteria: "1. Resolver tool exists. 2. JSONL is updated.", + source: "detector", + }, + root, + ); + assert.ok(recorded); + process.chdir(root); + + const pi = makeMockPi(); + registerDbTools(pi); + const tool = pi.tools.find( + (t: any) => t.name === "sf_self_feedback_resolve", + ); + assert.ok(tool, "resolver tool should be registered"); + + const result = await tool.execute( + "call-1", + { + id: recorded.entry.id, + reason: "resolver tool verified", + commit_sha: "abc1234", + test_path: + "src/resources/extensions/sf/tests/self-feedback-resolve-tool.test.ts", + criteria_met: ["Resolver tool exists", "JSONL is updated"], + }, + undefined, + undefined, + undefined, + ); + + assert.equal(result.details?.resolved, true); + const [entry] = readAllSelfFeedback(root).filter( + (e) => e.id === recorded.entry.id, + ); + assert.ok(entry?.resolvedAt); + assert.equal(entry.resolvedEvidence?.kind, "agent-fix"); + assert.equal(entry.resolvedEvidence?.commitSha, "abc1234"); + assert.deepEqual(entry.resolvedCriteriaMet, [ + "Resolver tool exists", + "JSONL is updated", + ]); + const markdown = readFileSync( + join(root, ".sf", "SELF-FEEDBACK.md"), + "utf-8", + ); + assert.match(markdown, /Recently Resolved/); + assert.match(markdown, /inline-fix-resolution-gap/); + }); +}); diff --git a/src/resources/extensions/sf/tests/tool-naming.test.ts b/src/resources/extensions/sf/tests/tool-naming.test.ts index 603419ba9..21b545759 100644 --- a/src/resources/extensions/sf/tests/tool-naming.test.ts +++ b/src/resources/extensions/sf/tests/tool-naming.test.ts @@ -26,6 +26,7 @@ const CANONICAL_DB_TOOLS = [ "sf_summary_save", "sf_milestone_generate_id", "sf_self_report", + "sf_self_feedback_resolve", "sf_plan_milestone", "sf_plan_slice", "sf_plan_task", diff --git a/src/resources/extensions/sf/tests/triage-protocol-registry.test.ts b/src/resources/extensions/sf/tests/triage-protocol-registry.test.ts new file mode 100644 index 000000000..355e0b61a --- /dev/null +++ b/src/resources/extensions/sf/tests/triage-protocol-registry.test.ts @@ -0,0 +1,401 @@ +/** + * Triage protocol — registry integration tests. + * + * Purpose: Validate that every finding in the M008 bug registry conforms to + * the triage protocol definitions (severity, status, cluster routing), and + * that the systematic-debugging skill correctly references the protocol. + * + * Consumer: CI gate that blocks milestone completion when registry and + * protocol drift out of sync. + */ + +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { describe, test } from "vitest"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const repoRoot = join(__dirname, "..", "..", "..", "..", ".."); + +// ─── Load canonical artifacts ──────────────────────────────────────────────── + +const registryPath = join(repoRoot, ".sf", "milestones", "M008", "bugs", "bug-registry.json"); +const protocolPath = join(repoRoot, ".sf", "milestones", "M008", "triage-protocol.md"); +const skillPath = join(repoRoot, "src", "resources", "extensions", "sf", "skills", "systematic-debugging", "SKILL.md"); + +const registry = JSON.parse(readFileSync(registryPath, "utf-8")) as { + schema_version: string; + meta: { + source: string; + date: string; + totalFindings: number; + clusters: string[]; + }; + findings: Array<{ + id: string; + file: string; + lines: string; + category: string; + severity: string; + status: string; + description: string; + suggestedFix: string; + cluster: string; + fixedByTaskId?: string; + }>; + summary: { + severity: Record; + status: Record; + cluster: Record; + }; +}; + +const protocol = readFileSync(protocolPath, "utf-8"); +const skill = (() => { + try { + return readFileSync(skillPath, "utf-8"); + } catch { + return ""; + } +})(); + +// ─── Severity definitions from protocol ────────────────────────────────────── + +const VALID_SEVERITIES = ["HIGH", "MEDIUM", "LOW", "FALSE_POSITIVE"] as const; +const VALID_STATUSES = ["CONFIRMED", "FALSE_POSITIVE", "FIXED", "WONTFIX", "IN_PROGRESS"] as const; + +// Cluster routing table from protocol +const PROTOCOL_CLUSTERS = [ + "engine + verification", + "scaffold + doctor", + "worktree + git", + "memory + state + cache", + "bootstrap + workflow", + "notification + detection + headless", +] as const; + +// ─── Helpers ───────────────────────────────────────────────────────────────── + +function assertFinding( + condition: boolean, + findingId: string, + message: string, +): void { + assert.ok(condition, `Finding ${findingId}: ${message}`); +} + +// ─── Registry structural validity ──────────────────────────────────────────── + +describe("triage-protocol-registry", () => { + test("registry_schema_version_is_1_0_0", () => { + assert.strictEqual(registry.schema_version, "1.0.0", "schema_version must be 1.0.0"); + }); + + test("registry_meta_totalFindings_matches_actual_count", () => { + assert.strictEqual( + registry.meta.totalFindings, + registry.findings.length, + `meta.totalFindings (${registry.meta.totalFindings}) must equal actual findings count (${registry.findings.length})`, + ); + }); + + test("registry_meta_clusters_match_protocol_clusters", () => { + const registryClusters = new Set(registry.meta.clusters); + const protocolClusterSet = new Set(PROTOCOL_CLUSTERS); + assert.deepStrictEqual( + registryClusters, + protocolClusterSet, + "registry meta.clusters must exactly match protocol cluster routing table", + ); + }); + + // ─── Per-finding validation ────────────────────────────────────────────── + + test("every_finding_has_valid_severity", () => { + for (const f of registry.findings) { + assertFinding( + VALID_SEVERITIES.includes(f.severity as (typeof VALID_SEVERITIES)[number]), + f.id, + `severity "${f.severity}" is not one of ${VALID_SEVERITIES.join(", ")}`, + ); + } + }); + + test("every_finding_has_valid_status", () => { + for (const f of registry.findings) { + assertFinding( + VALID_STATUSES.includes(f.status as (typeof VALID_STATUSES)[number]), + f.id, + `status "${f.status}" is not one of ${VALID_STATUSES.join(", ")}`, + ); + } + }); + + test("every_finding_belongs_to_protocol_cluster", () => { + for (const f of registry.findings) { + assertFinding( + PROTOCOL_CLUSTERS.includes(f.cluster as (typeof PROTOCOL_CLUSTERS)[number]), + f.id, + `cluster "${f.cluster}" is not in the protocol routing table`, + ); + } + }); + + test("every_finding_has_non_empty_id", () => { + for (const f of registry.findings) { + assertFinding( + f.id.length > 0, + f.id, + "finding id must not be empty", + ); + } + }); + + test("every_finding_has_non_empty_description", () => { + for (const f of registry.findings) { + assertFinding( + f.description.length > 0, + f.id, + "description must not be empty", + ); + } + }); + + test("every_finding_has_non_empty_suggestedFix", () => { + for (const f of registry.findings) { + assertFinding( + f.suggestedFix.length > 0, + f.id, + "suggestedFix must not be empty", + ); + } + }); + + // ─── Severity / status consistency rules ───────────────────────────────── + + test("severity_FALSE_POSITIVE_implies_status_FALSE_POSITIVE", () => { + for (const f of registry.findings) { + if (f.severity === "FALSE_POSITIVE") { + assertFinding( + f.status === "FALSE_POSITIVE", + f.id, + `severity=FALSE_POSITIVE requires status=FALSE_POSITIVE, got status=${f.status}`, + ); + } + } + }); + + test("status_FALSE_POSITIVE_implies_severity_FALSE_POSITIVE", () => { + for (const f of registry.findings) { + if (f.status === "FALSE_POSITIVE") { + assertFinding( + f.severity === "FALSE_POSITIVE", + f.id, + `status=FALSE_POSITIVE requires severity=FALSE_POSITIVE, got severity=${f.severity}`, + ); + } + } + }); + + test("status_FIXED_implies_fixedByTaskId_present", () => { + for (const f of registry.findings) { + if (f.status === "FIXED") { + assertFinding( + f.fixedByTaskId !== undefined && f.fixedByTaskId.length > 0, + f.id, + `status=FIXED requires fixedByTaskId to be set`, + ); + } + } + }); + + test("fixedByTaskId_present_only_when_status_FIXED", () => { + for (const f of registry.findings) { + if (f.fixedByTaskId !== undefined) { + assertFinding( + f.status === "FIXED", + f.id, + `fixedByTaskId (${f.fixedByTaskId}) should only be present when status=FIXED, got status=${f.status}`, + ); + } + } + }); + + // ─── Summary statistics accuracy ───────────────────────────────────────── + + test("summary_severity_counts_match_actual", () => { + const actual: Record = {}; + for (const f of registry.findings) { + actual[f.severity] = (actual[f.severity] ?? 0) + 1; + } + assert.deepStrictEqual( + registry.summary.severity, + actual, + "summary.severity counts must match actual finding severities", + ); + }); + + test("summary_status_counts_match_actual", () => { + const actual: Record = {}; + for (const f of registry.findings) { + actual[f.status] = (actual[f.status] ?? 0) + 1; + } + // Compare only keys that exist in either object; zero-count keys in summary are allowed + const allKeys = new Set([...Object.keys(registry.summary.status), ...Object.keys(actual)]); + for (const key of allKeys) { + const expectedCount = registry.summary.status[key] ?? 0; + const actualCount = actual[key] ?? 0; + assert.strictEqual( + actualCount, + expectedCount, + `summary.status["${key}"]: expected ${expectedCount}, got ${actualCount}`, + ); + } + }); + + test("summary_cluster_counts_match_actual", () => { + const actual: Record = {}; + for (const f of registry.findings) { + actual[f.cluster] = (actual[f.cluster] ?? 0) + 1; + } + assert.deepStrictEqual( + registry.summary.cluster, + actual, + "summary.cluster counts must match actual finding clusters", + ); + }); + + // ─── Protocol content validation ───────────────────────────────────────── + + test("protocol_defines_all_severity_levels", () => { + for (const sev of VALID_SEVERITIES) { + assert.ok( + protocol.includes(sev), + `triage-protocol.md must mention severity level ${sev}`, + ); + } + }); + + test("protocol_defines_all_status_values", () => { + for (const st of VALID_STATUSES) { + assert.ok( + protocol.includes(st), + `triage-protocol.md must mention status value ${st}`, + ); + } + }); + + test("protocol_defines_all_clusters_in_routing_table", () => { + for (const cluster of PROTOCOL_CLUSTERS) { + assert.ok( + protocol.includes(cluster), + `triage-protocol.md cluster routing table must include "${cluster}"`, + ); + } + }); + + test("protocol_contains_confidence_gate_table", () => { + assert.ok( + protocol.includes("Confidence Gate Requirements"), + "protocol must contain Confidence Gate Requirements section", + ); + assert.ok( + protocol.includes("0.90") || protocol.includes("0.95") || protocol.includes("0.80"), + "protocol must list numeric confidence thresholds", + ); + }); + + test("protocol_contains_escalation_rules", () => { + assert.ok( + protocol.includes("Escalation Rules"), + "protocol must contain Escalation Rules section", + ); + }); + + // ─── Skill references protocol correctly ───────────────────────────────── + + test("skill_references_triage_protocol_file", () => { + assert.ok( + skill.includes("triage-protocol.md") || skill.includes("triage protocol"), + "systematic-debugging SKILL.md must reference the triage protocol", + ); + }); + + test("skill_references_bug_registry", () => { + assert.ok( + skill.includes("bug-registry.json"), + "systematic-debugging SKILL.md must reference bug-registry.json", + ); + }); + + test("skill_lists_severity_values", () => { + assert.ok( + (skill.includes('"HIGH"') || skill.includes('`HIGH`')) && + (skill.includes('"MEDIUM"') || skill.includes('`MEDIUM`')) && + (skill.includes('"LOW"') || skill.includes('`LOW`')), + "systematic-debugging SKILL.md must list HIGH / MEDIUM / LOW severity values", + ); + }); + + test("skill_mentions_confidence_gate_thresholds", () => { + assert.ok( + skill.includes("0.80") || skill.includes("0.85") || skill.includes("0.90") || skill.includes("0.95"), + "systematic-debugging SKILL.md must mention confidence gate thresholds", + ); + }); + + test("skill_mentions_cluster_aware_fixes", () => { + assert.ok( + skill.includes("cluster-aware") || skill.includes("Cluster-aware"), + "systematic-debugging SKILL.md must mention cluster-aware fixes", + ); + }); + + test("skill_mentions_registry_update_after_fix", () => { + assert.ok( + skill.includes("Update the registry") || skill.includes("update the registry") || skill.includes("bug-registry.json"), + "systematic-debugging SKILL.md must instruct updating registry after fix", + ); + }); + + // ─── Protocol decision flow integrity ──────────────────────────────────── + + test("protocol_decision_flow_has_all_severity_branches", () => { + // The decision flow should branch on HIGH, MEDIUM, and LOW + assert.ok( + protocol.includes("severity = HIGH") || protocol.includes("Is severity = HIGH"), + "protocol decision flow must branch on HIGH severity", + ); + assert.ok( + protocol.includes("severity = MEDIUM") || protocol.includes("Is severity = MEDIUM"), + "protocol decision flow must branch on MEDIUM severity", + ); + assert.ok( + protocol.includes("severity = LOW") || protocol.includes("Is severity = LOW"), + "protocol decision flow must branch on LOW severity", + ); + }); + + test("protocol_high_severity_requires_regression_test", () => { + const highSection = protocol.slice(protocol.indexOf("severity = HIGH")); + assert.ok( + highSection.includes("regression test") || protocol.includes("Require regression test"), + "protocol must require regression test for HIGH severity", + ); + }); + + test("protocol_medium_severity_has_confidence_gate_0_85", () => { + assert.ok( + protocol.includes("0.85"), + "protocol must specify 0.85 confidence gate for MEDIUM severity", + ); + }); + + test("protocol_low_severity_has_confidence_gate_0_80", () => { + assert.ok( + protocol.includes("0.80"), + "protocol must specify 0.80 confidence gate for LOW severity", + ); + }); +}); diff --git a/src/resources/extensions/sf/tests/unit-runtime-fsm.test.ts b/src/resources/extensions/sf/tests/unit-runtime-fsm.test.ts new file mode 100644 index 000000000..8a8d9bced --- /dev/null +++ b/src/resources/extensions/sf/tests/unit-runtime-fsm.test.ts @@ -0,0 +1,264 @@ +import assert from "node:assert/strict"; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, test } from "vitest"; +import { buildQuerySnapshot } from "../../../../headless-query.ts"; +import { resolveDispatch } from "../auto-dispatch.ts"; +import { + clearUnitRuntimeRecord, + decideUnitRuntimeDispatch, + readUnitRuntimeRecord, + UNIT_RUNTIME_STATUSES, + UNIT_RUNTIME_TERMINAL_STATUSES, + UNIT_RUNTIME_TRANSITIONS, + writeUnitRuntimeRecord, +} from "../unit-runtime.ts"; + +const tmpDirs: string[] = []; + +function makeTmpBase(prefix = "sf-unit-runtime-fsm-"): string { + const base = mkdtempSync(join(tmpdir(), prefix)); + tmpDirs.push(base); + mkdirSync(join(base, ".sf", "milestones"), { recursive: true }); + return base; +} + +function makeParallelResearchProject(): string { + const base = makeTmpBase("sf-unit-runtime-parallel-"); + const milestoneDir = join(base, ".sf", "milestones", "M001"); + mkdirSync(milestoneDir, { recursive: true }); + writeFileSync( + join(milestoneDir, "M001-ROADMAP.md"), + [ + "# M001: Parallel Research Milestone", + "", + "**Vision:** Research-ready slices.", + "", + "## Slices", + "", + "- [ ] **S01: Alpha** `risk:low` `depends:[]`", + "- [ ] **S02: Beta** `risk:low` `depends:[]`", + "", + ].join("\n"), + "utf-8", + ); + return base; +} + +async function resolvePlanningDispatch(base: string) { + return resolveDispatch({ + basePath: base, + mid: "M001", + midTitle: "Parallel Research Milestone", + state: { + phase: "planning", + activeMilestone: { + id: "M001", + title: "Parallel Research Milestone", + status: "active", + }, + activeSlice: { id: "S01", title: "Alpha" }, + activeTask: null, + registry: [], + blockers: [], + } as any, + prefs: undefined, + }); +} + +afterEach(() => { + for (const dir of tmpDirs) { + rmSync(dir, { recursive: true, force: true }); + } + tmpDirs.length = 0; +}); + +test("unit_runtime_transitions_when_enumerated_cover_all_statuses", () => { + assert.deepEqual(UNIT_RUNTIME_STATUSES, [ + "queued", + "claimed", + "running", + "progress", + "completed", + "failed", + "blocked", + "cancelled", + "stale", + "runaway-recovered", + "notified", + ]); + assert.deepEqual(UNIT_RUNTIME_TERMINAL_STATUSES, [ + "completed", + "failed", + "blocked", + "cancelled", + "stale", + "runaway-recovered", + ]); + assert.deepEqual(UNIT_RUNTIME_TRANSITIONS, { + queued: ["claimed", "cancelled"], + claimed: ["running", "stale", "cancelled"], + running: [ + "progress", + "completed", + "failed", + "blocked", + "cancelled", + "stale", + "runaway-recovered", + ], + progress: [ + "running", + "completed", + "failed", + "blocked", + "cancelled", + "stale", + "runaway-recovered", + ], + completed: ["notified"], + failed: ["queued", "notified"], + blocked: ["notified"], + cancelled: ["notified"], + stale: ["queued", "notified"], + "runaway-recovered": ["queued", "notified"], + notified: ["queued"], + }); +}); + +test("synthetic_failed_unit_when_not_reset_cannot_redispatch", async () => { + const base = makeParallelResearchProject(); + writeUnitRuntimeRecord( + base, + "research-slice", + "M001/parallel-research", + 1000, + { + status: "failed", + retryCount: 0, + maxRetries: 2, + }, + ); + + const record = readUnitRuntimeRecord( + base, + "research-slice", + "M001/parallel-research", + ); + const decision = decideUnitRuntimeDispatch(record); + assert.equal(decision.action, "block"); + assert.equal(decision.reasonCode, "synthetic-reset-required"); + assert.equal(decision.retryCount, 0); + assert.equal(decision.maxRetries, 2); + + const blockedDispatch = await resolvePlanningDispatch(base); + assert.equal(blockedDispatch.action, "dispatch"); + if (blockedDispatch.action === "dispatch") { + assert.equal(blockedDispatch.unitType, "research-slice"); + assert.equal(blockedDispatch.unitId, "M001/S01"); + } + + clearUnitRuntimeRecord(base, "research-slice", "M001/parallel-research"); + const resetDecision = decideUnitRuntimeDispatch( + readUnitRuntimeRecord(base, "research-slice", "M001/parallel-research"), + ); + assert.equal(resetDecision.action, "dispatch"); + assert.equal(resetDecision.reasonCode, "no-runtime-record"); + + const resetDispatch = await resolvePlanningDispatch(base); + assert.equal(resetDispatch.action, "dispatch"); + if (resetDispatch.action === "dispatch") { + assert.equal(resetDispatch.unitType, "research-slice"); + assert.equal(resetDispatch.unitId, "M001/parallel-research"); + } +}); + +test("terminal_status_when_budget_available_produces_expected_dispatch_decision", () => { + const base = makeTmpBase(); + const cases = [ + ["completed", "notify", "terminal-ready-to-notify"], + ["failed", "retry", "retry-budget-available"], + ["blocked", "notify", "terminal-ready-to-notify"], + ["cancelled", "notify", "terminal-ready-to-notify"], + ["stale", "retry", "retry-budget-available"], + ["runaway-recovered", "retry", "retry-budget-available"], + ] as const; + + for (const [status, expectedAction, expectedReason] of cases) { + writeUnitRuntimeRecord(base, "execute-task", `M001/S01/${status}`, 1000, { + status, + retryCount: 0, + maxRetries: 2, + }); + const record = readUnitRuntimeRecord( + base, + "execute-task", + `M001/S01/${status}`, + ); + const decision = decideUnitRuntimeDispatch(record); + assert.equal(decision.action, expectedAction, status); + assert.equal(decision.reasonCode, expectedReason, status); + } +}); + +test("retryable_terminal_status_when_budget_exhausted_blocks_dispatch", () => { + const base = makeTmpBase(); + for (const status of ["failed", "stale", "runaway-recovered"] as const) { + writeUnitRuntimeRecord(base, "execute-task", `M001/S01/${status}`, 1000, { + status, + retryCount: 2, + maxRetries: 2, + }); + const decision = decideUnitRuntimeDispatch( + readUnitRuntimeRecord(base, "execute-task", `M001/S01/${status}`), + ); + assert.equal(decision.action, "block", status); + assert.equal(decision.reasonCode, "retry-budget-exhausted", status); + assert.equal(decision.retryCount, 2, status); + assert.equal(decision.maxRetries, 2, status); + } +}); + +test("terminal_status_when_already_notified_skips_dispatch", () => { + const base = makeTmpBase(); + writeUnitRuntimeRecord(base, "execute-task", "M001/S01/T01", 1000, { + status: "failed", + retryCount: 0, + maxRetries: 2, + notifiedAt: 2000, + }); + + const decision = decideUnitRuntimeDispatch( + readUnitRuntimeRecord(base, "execute-task", "M001/S01/T01"), + ); + assert.equal(decision.action, "skip"); + assert.equal(decision.reasonCode, "already-notified"); +}); + +test("headless_query_when_runtime_record_exists_shows_retry_budget", async () => { + const base = makeTmpBase(); + writeUnitRuntimeRecord(base, "execute-task", "M001/S01/T01", 1000, { + status: "failed", + retryCount: 1, + maxRetries: 2, + watchdogReason: "no heartbeat", + outputPath: ".sf/runtime/units/M001-S01-T01.log", + }); + + const snapshot = await buildQuerySnapshot(base); + const unit = snapshot.runtime.units.find( + (item) => + item.unitType === "execute-task" && item.unitId === "M001/S01/T01", + ); + + assert.ok(unit); + assert.equal(unit.status, "failed"); + assert.equal(unit.retryCount, 1); + assert.equal(unit.maxRetries, 2); + assert.equal(unit.retryBudgetRemaining, 1); + assert.equal(unit.dispatchDecision.action, "retry"); + assert.equal(unit.dispatchDecision.reasonCode, "retry-budget-available"); + assert.equal(unit.watchdogReason, "no heartbeat"); + assert.equal(unit.outputPath, ".sf/runtime/units/M001-S01-T01.log"); +}); diff --git a/src/resources/extensions/sf/unit-runtime.ts b/src/resources/extensions/sf/unit-runtime.ts index 284b6498a..fff290269 100644 --- a/src/resources/extensions/sf/unit-runtime.ts +++ b/src/resources/extensions/sf/unit-runtime.ts @@ -22,7 +22,126 @@ import { } from "./paths.js"; import { parseUnitId } from "./unit-id.js"; +/** + * Lists every durable unit runtime status in FSM order. + * + * Purpose: give dispatch, recovery, and query surfaces one canonical state + * vocabulary so terminal units cannot be redispatched by ambiguous legacy phases. + * + * Consumer: auto runtime persistence, unit-runtime tests, headless query summaries. + */ +export const UNIT_RUNTIME_STATUSES = [ + "queued", + "claimed", + "running", + "progress", + "completed", + "failed", + "blocked", + "cancelled", + "stale", + "runaway-recovered", + "notified", +] as const; + +/** + * Names the unit statuses that end an execution attempt. + * + * Purpose: centralize the terminal-state union so retry and notification policy + * does not drift between watchdog recovery and dispatch preview logic. + * + * Consumer: decideUnitRuntimeDispatch and operator-facing query summaries. + */ +export const UNIT_RUNTIME_TERMINAL_STATUSES = [ + "completed", + "failed", + "blocked", + "cancelled", + "stale", + "runaway-recovered", +] as const; + +/** + * Describes the explicit unit runtime finite-state-machine transitions. + * + * Purpose: make retry, notification, and reset transitions reviewable as data + * instead of implied by ad hoc marker files or legacy phase strings. + * + * Consumer: unit runtime tests, future dispatch/reconciler guards. + */ +export const UNIT_RUNTIME_TRANSITIONS = { + queued: ["claimed", "cancelled"], + claimed: ["running", "stale", "cancelled"], + running: [ + "progress", + "completed", + "failed", + "blocked", + "cancelled", + "stale", + "runaway-recovered", + ], + progress: [ + "running", + "completed", + "failed", + "blocked", + "cancelled", + "stale", + "runaway-recovered", + ], + completed: ["notified"], + failed: ["queued", "notified"], + blocked: ["notified"], + cancelled: ["notified"], + stale: ["queued", "notified"], + "runaway-recovered": ["queued", "notified"], + notified: ["queued"], +} as const satisfies Record; + +/** + * Enumerates every durable unit runtime status. + * + * Purpose: let persistence and dispatch decisions share one exhaustive status + * type while legacy `phase` remains available for older call sites. + * + * Consumer: AutoUnitRuntimeRecord.status, retry decisions, query summaries. + */ +export type UnitRuntimeStatus = (typeof UNIT_RUNTIME_STATUSES)[number]; + +/** + * Enumerates statuses that end a unit execution attempt. + * + * Purpose: distinguish states that need notify/retry/block policy from active + * states that should not start a second copy of the same unit. + * + * Consumer: decideUnitRuntimeDispatch. + */ +export type UnitRuntimeTerminalStatus = + (typeof UNIT_RUNTIME_TERMINAL_STATUSES)[number]; + +/** + * Captures the durable FSM state embedded in a unit runtime record. + * + * Purpose: expose retry budget, liveness, and notification fields together so + * callers can decide whether a unit should run, retry, block, or notify. + * + * Consumer: writeUnitRuntimeRecord, decideUnitRuntimeDispatch, headless query. + */ +export interface UnitRuntimeState { + status: UnitRuntimeStatus; + retryCount: number; + maxRetries: number; + lastHeartbeatAt: number | null; + lastProgressAt: number; + lastOutputAt: number | null; + outputPath: string | null; + watchdogReason: string | null; + notifiedAt: number | null; +} + export type UnitRuntimePhase = + | UnitRuntimeStatus | "dispatched" | "wrapup-warning-sent" | "runaway-warning-sent" @@ -33,6 +152,14 @@ export type UnitRuntimePhase = | "paused" | "skipped"; +const DEFAULT_UNIT_RUNTIME_MAX_RETRIES = 1; + +const RETRYABLE_TERMINAL_STATUSES = new Set([ + "failed", + "stale", + "runaway-recovered", +]); + export interface ExecuteTaskRecoveryStatus { planPath: string; summaryPath: string; @@ -50,18 +177,263 @@ export interface AutoUnitRuntimeRecord { startedAt: number; updatedAt: number; phase: UnitRuntimePhase; + status: UnitRuntimeStatus; wrapupWarningSent: boolean; continueHereFired: boolean; timeoutAt: number | null; + lastHeartbeatAt?: number | null; lastProgressAt: number; progressCount: number; lastProgressKind: string; + lastOutputAt?: number | null; + outputPath?: string | null; + watchdogReason?: string | null; + notifiedAt?: number | null; recovery?: ExecuteTaskRecoveryStatus; recoveryAttempts?: number; + retryCount?: number; + maxRetries?: number; lastRecoveryReason?: "idle" | "hard"; runawayGuardPause?: RunawayGuardPauseMetadata; } +/** + * Describes whether dispatch may run a unit from its runtime record. + * + * Purpose: surface the same retry-budget decision to tests, dispatch preview, + * and operator diagnostics without reinterpreting terminal states ad hoc. + * + * Consumer: unit-runtime FSM tests and headless query runtime summaries. + */ +export type UnitRuntimeDispatchDecision = + | { + action: "dispatch"; + reasonCode: "no-runtime-record" | "queued"; + retryCount: number; + maxRetries: number; + retryBudgetRemaining: number; + } + | { + action: "retry"; + reasonCode: "retry-budget-available"; + retryCount: number; + maxRetries: number; + retryBudgetRemaining: number; + } + | { + action: "notify"; + reasonCode: "terminal-ready-to-notify"; + retryCount: number; + maxRetries: number; + retryBudgetRemaining: number; + } + | { + action: "block"; + reasonCode: "retry-budget-exhausted" | "synthetic-reset-required"; + retryCount: number; + maxRetries: number; + retryBudgetRemaining: number; + } + | { + action: "skip"; + reasonCode: + | "already-notified" + | "active-or-claimed" + | "notified" + | "terminal-nonretryable"; + retryCount: number; + maxRetries: number; + retryBudgetRemaining: number; + }; + +function hasUpdate( + updates: Partial, + key: K, +): boolean { + return Object.hasOwn(updates, key); +} + +function phaseForStatus(status: UnitRuntimeStatus): UnitRuntimePhase { + switch (status) { + case "queued": + case "claimed": + case "running": + return "dispatched"; + case "progress": + return "wrapup-warning-sent"; + case "completed": + return "finalized"; + default: + return status; + } +} + +function inferStatusFromPhase( + phase: UnitRuntimePhase, + record?: Pick | null, +): UnitRuntimeStatus { + if ((UNIT_RUNTIME_STATUSES as readonly string[]).includes(phase)) { + return phase as UnitRuntimeStatus; + } + switch (phase) { + case "dispatched": + return "running"; + case "wrapup-warning-sent": + case "runaway-warning-sent": + case "runaway-final-warning-sent": + case "recovered": + return "progress"; + case "timeout": + return "stale"; + case "finalized": + return "completed"; + case "paused": + return record?.runawayGuardPause ? "runaway-recovered" : "blocked"; + case "skipped": + return "blocked"; + default: + return "running"; + } +} + +function retryBudgetRemaining(retryCount: number, maxRetries: number): number { + return Math.max(0, maxRetries - retryCount); +} + +/** + * Returns true when a runtime status is terminal for one execution attempt. + * + * Purpose: keep terminal-state checks exhaustive against the exported terminal + * union rather than hard-coded differently at each caller. + * + * Consumer: decideUnitRuntimeDispatch and query summary generation. + */ +export function isTerminalUnitRuntimeStatus( + status: UnitRuntimeStatus, +): status is UnitRuntimeTerminalStatus { + return (UNIT_RUNTIME_TERMINAL_STATUSES as readonly string[]).includes(status); +} + +/** + * Returns the normalized FSM state embedded in a runtime record. + * + * Purpose: let legacy records with only `phase` still participate in retry and + * query policy while new records persist explicit FSM fields. + * + * Consumer: decideUnitRuntimeDispatch and headless query summaries. + */ +export function getUnitRuntimeState( + record: AutoUnitRuntimeRecord, +): UnitRuntimeState { + const status = record.status ?? inferStatusFromPhase(record.phase, record); + const retryCount = record.retryCount ?? record.recoveryAttempts ?? 0; + const maxRetries = record.maxRetries ?? DEFAULT_UNIT_RUNTIME_MAX_RETRIES; + return { + status, + retryCount, + maxRetries, + lastHeartbeatAt: record.lastHeartbeatAt ?? null, + lastProgressAt: record.lastProgressAt, + lastOutputAt: record.lastOutputAt ?? null, + outputPath: record.outputPath ?? null, + watchdogReason: record.watchdogReason ?? null, + notifiedAt: record.notifiedAt ?? null, + }; +} + +/** + * Returns true for synthetic units that must be reset before rerun. + * + * Purpose: prevent synthetic orchestration units such as parallel research from + * looping after failure while preserving normal task retry behavior. + * + * Consumer: decideUnitRuntimeDispatch. + */ +export function isSyntheticUnitRuntime(record: AutoUnitRuntimeRecord): boolean { + return ( + record.unitType === "synthetic" || + record.unitId.includes("parallel-research") + ); +} + +/** + * Decides whether a unit runtime record permits dispatch, retry, notify, or block. + * + * Purpose: enforce retry budgets and explicit reset requirements before callers + * schedule another copy of a failed or stale unit. + * + * Consumer: unit-runtime FSM tests and headless query runtime summaries. + */ +export function decideUnitRuntimeDispatch( + record: AutoUnitRuntimeRecord | null, + options: { synthetic?: boolean } = {}, +): UnitRuntimeDispatchDecision { + if (!record) { + return { + action: "dispatch", + reasonCode: "no-runtime-record", + retryCount: 0, + maxRetries: DEFAULT_UNIT_RUNTIME_MAX_RETRIES, + retryBudgetRemaining: DEFAULT_UNIT_RUNTIME_MAX_RETRIES, + }; + } + + const state = getUnitRuntimeState(record); + const remaining = retryBudgetRemaining(state.retryCount, state.maxRetries); + const common = { + retryCount: state.retryCount, + maxRetries: state.maxRetries, + retryBudgetRemaining: remaining, + }; + + if (state.notifiedAt !== null) { + return { action: "skip", reasonCode: "already-notified", ...common }; + } + if (state.status === "notified") { + return { action: "skip", reasonCode: "notified", ...common }; + } + if (state.status === "queued") { + return { action: "dispatch", reasonCode: "queued", ...common }; + } + if (!isTerminalUnitRuntimeStatus(state.status)) { + return { action: "skip", reasonCode: "active-or-claimed", ...common }; + } + + const synthetic = options.synthetic ?? isSyntheticUnitRuntime(record); + if (synthetic && state.status !== "completed") { + return { + action: "block", + reasonCode: "synthetic-reset-required", + ...common, + }; + } + + if (RETRYABLE_TERMINAL_STATUSES.has(state.status)) { + if (remaining > 0) { + return { + action: "retry", + reasonCode: "retry-budget-available", + ...common, + }; + } + return { action: "block", reasonCode: "retry-budget-exhausted", ...common }; + } + + if ( + state.status === "completed" || + state.status === "blocked" || + state.status === "cancelled" + ) { + return { + action: "notify", + reasonCode: "terminal-ready-to-notify", + ...common, + }; + } + + return { action: "skip", reasonCode: "terminal-nonretryable", ...common }; +} + function runtimeDir(basePath: string): string { return join(sfRoot(basePath), "runtime", "units"); } @@ -105,25 +477,68 @@ export function writeUnitRuntimeRecord( mkdirSync(dir, { recursive: true }); const path = runtimePath(basePath, unitType, unitId); const prev = _runtimeCache.get(path) ?? null; + const phase = + updates.phase ?? + (updates.status ? phaseForStatus(updates.status) : prev?.phase) ?? + "dispatched"; + const status = + updates.status ?? + (updates.phase || !prev?.status + ? inferStatusFromPhase(phase, { + runawayGuardPause: + updates.runawayGuardPause ?? prev?.runawayGuardPause, + }) + : prev.status); + const recoveryAttempts = hasUpdate(updates, "recoveryAttempts") + ? (updates.recoveryAttempts ?? 0) + : (prev?.recoveryAttempts ?? 0); + const retryCount = hasUpdate(updates, "retryCount") + ? (updates.retryCount ?? 0) + : hasUpdate(updates, "recoveryAttempts") + ? (updates.recoveryAttempts ?? 0) + : (prev?.retryCount ?? recoveryAttempts ?? 0); const next: AutoUnitRuntimeRecord = { version: 1, unitType, unitId, startedAt, updatedAt: Date.now(), - phase: updates.phase ?? prev?.phase ?? "dispatched", + phase, + status, wrapupWarningSent: updates.wrapupWarningSent ?? prev?.wrapupWarningSent ?? false, continueHereFired: updates.continueHereFired ?? prev?.continueHereFired ?? false, - timeoutAt: updates.timeoutAt ?? prev?.timeoutAt ?? null, + timeoutAt: hasUpdate(updates, "timeoutAt") + ? (updates.timeoutAt ?? null) + : (prev?.timeoutAt ?? null), + lastHeartbeatAt: hasUpdate(updates, "lastHeartbeatAt") + ? (updates.lastHeartbeatAt ?? null) + : (prev?.lastHeartbeatAt ?? startedAt), lastProgressAt: updates.lastProgressAt ?? prev?.lastProgressAt ?? Date.now(), progressCount: updates.progressCount ?? prev?.progressCount ?? 0, lastProgressKind: updates.lastProgressKind ?? prev?.lastProgressKind ?? "dispatch", + lastOutputAt: hasUpdate(updates, "lastOutputAt") + ? (updates.lastOutputAt ?? null) + : (prev?.lastOutputAt ?? null), + outputPath: hasUpdate(updates, "outputPath") + ? (updates.outputPath ?? null) + : (prev?.outputPath ?? null), + watchdogReason: hasUpdate(updates, "watchdogReason") + ? (updates.watchdogReason ?? null) + : (prev?.watchdogReason ?? null), + notifiedAt: hasUpdate(updates, "notifiedAt") + ? (updates.notifiedAt ?? null) + : (prev?.notifiedAt ?? null), recovery: updates.recovery ?? prev?.recovery, - recoveryAttempts: updates.recoveryAttempts ?? prev?.recoveryAttempts ?? 0, + recoveryAttempts, + retryCount, + maxRetries: + updates.maxRetries ?? + prev?.maxRetries ?? + DEFAULT_UNIT_RUNTIME_MAX_RETRIES, lastRecoveryReason: updates.lastRecoveryReason ?? prev?.lastRecoveryReason, runawayGuardPause: updates.runawayGuardPause ?? prev?.runawayGuardPause, }; diff --git a/src/tests/integration/web-mode-cli.test.ts b/src/tests/integration/web-mode-cli.test.ts index 7a81bef7a..bd4cddd3f 100644 --- a/src/tests/integration/web-mode-cli.test.ts +++ b/src/tests/integration/web-mode-cli.test.ts @@ -8,7 +8,7 @@ import { } from "node:fs"; import { tmpdir } from "node:os"; import { join, resolve } from "node:path"; -import { test, afterEach } from 'vitest'; +import { afterEach, test } from "vitest"; const projectRoot = process.cwd(); @@ -954,3 +954,42 @@ test("reapOrphanedNextServerProcesses returns zero reaped on non-Linux platforms test("reapOrphanedNextServerProcesses is exported and callable", () => { assert.equal(typeof webMode.reapOrphanedNextServerProcesses, "function"); }); + +test("reapOrphanedNextServerProcesses kills orphaned standalone next-server", () => { + const killed: Array<{ pid: number; signal: string }> = []; + const stderrChunks: string[] = []; + const packageRoot = "/tmp/sf-package"; + const result = webMode.reapOrphanedNextServerProcesses( + { + write: (chunk: string) => { + stderrChunks.push(chunk); + return true; + }, + }, + packageRoot, + { + platform: "linux", + execSync: (() => + [ + "123 1 node /tmp/sf-package/dist/web/standalone/node_modules/next/dist/server/next-server.js node", + "124 999 node /tmp/sf-package/dist/web/standalone/node_modules/next/dist/server/next-server.js node", + "125 1 node /elsewhere/next-server.js node", + ].join("\n")) as any, + readlinkSync: ((path: string) => { + if (path === "/proc/123/cwd") + return "/tmp/sf-package/dist/web/standalone"; + if (path === "/proc/124/cwd") + return "/tmp/sf-package/dist/web/standalone"; + return "/elsewhere"; + }) as any, + kill: ((pid: number, signal: string) => { + killed.push({ pid, signal }); + return true; + }) as any, + }, + ); + + assert.equal(result.reaped, 1); + assert.deepEqual(killed, [{ pid: 123, signal: "SIGTERM" }]); + assert.match(stderrChunks.join(""), /Reaped orphaned next-server/); +}); diff --git a/src/web-mode.ts b/src/web-mode.ts index a1a755928..9822866b4 100644 --- a/src/web-mode.ts +++ b/src/web-mode.ts @@ -6,7 +6,13 @@ import { spawn, } from "node:child_process"; import { randomBytes } from "node:crypto"; -import { existsSync, readFileSync, unlinkSync, writeFileSync } from "node:fs"; +import { + existsSync, + readFileSync, + readlinkSync, + unlinkSync, + writeFileSync, +} from "node:fs"; import { request as httpRequest } from "node:http"; import { createServer } from "node:net"; import { join, resolve } from "node:path"; @@ -16,10 +22,7 @@ import { } from "./app-paths.js"; const DEFAULT_HOST = "127.0.0.1"; -const DEFAULT_PACKAGE_ROOT = resolve( - import.meta.dirname, - "..", -); +const DEFAULT_PACKAGE_ROOT = resolve(import.meta.dirname, ".."); /** Open a URL in the user's default browser. */ function openBrowser(url: string): void { @@ -685,10 +688,17 @@ function cleanupStaleInstance( export function reapOrphanedNextServerProcesses( stderr: WritableLike, packageRoot = DEFAULT_PACKAGE_ROOT, + deps: { + execSync?: typeof execSync; + readlinkSync?: typeof readlinkSync; + kill?: typeof process.kill; + platform?: NodeJS.Platform; + } = {}, ): { reaped: number; errors: string[] } { const errors: string[] = []; let reaped = 0; - if (process.platform === "win32") { + const platform = deps.platform ?? process.platform; + if (platform === "win32") { // Windows orphan detection not implemented; rely on port-kill fallback return { reaped: 0, errors: [] }; } @@ -696,10 +706,10 @@ export function reapOrphanedNextServerProcesses( // Find next-server processes with cwd matching our standalone host path const standalonePath = resolve(packageRoot, "dist", "web", "standalone"); // Use ps to find node processes with next-server in their command line - const psOutput = execSync( + const psOutput = (deps.execSync ?? execSync)( "ps -eo pid,ppid,cmd,comm --no-headers", { encoding: "utf8", timeout: 5000 }, - ); + ) as string; const lines = psOutput.split("\n").filter((line) => line.trim()); for (const line of lines) { const parts = line.trim().split(/\s+/); @@ -715,7 +725,7 @@ export function reapOrphanedNextServerProcesses( // Check if the process cwd matches our standalone path (or deleted variant) let cwd: string | null = null; try { - cwd = readFileSync(`/proc/${pid}/cwd`, "utf8").trim(); + cwd = (deps.readlinkSync ?? readlinkSync)(`/proc/${pid}/cwd`); } catch { // Process may have exited between ps and readlink continue; @@ -728,7 +738,7 @@ export function reapOrphanedNextServerProcesses( const isOrphan = ppid === 1; if (isOrphan) { try { - process.kill(pid, "SIGTERM"); + (deps.kill ?? process.kill)(pid, "SIGTERM"); reaped++; stderr.write( `[forge] Reaped orphaned next-server (pid=${pid}, cwd=${cwd})\n`,