refactor(uok): move auto-dispatch, auto-verification, auto-runaway-guard, auto-unit-closeout into sf/uok/

Per checkpoint-008/009 next-steps: these 4 autonomous-loop modules belong in
the UOK subsystem alongside the other orchestration primitives.

- auto-dispatch.js → uok/auto-dispatch.js
  - Dispatch table + resolveDispatch() is a core UOK orchestration primitive
  - Updated 3 static importers + 1 dynamic await import + 3 test files
- auto-verification.js → uok/auto-verification.js
  - Post-unit verification gate delegates to UOK gates (ChaosMonkey, Security,
    CostGuard, OutcomeLearning, etc.)
  - Updated 1 importer (auto.js)
- auto-runaway-guard.js → uok/auto-runaway-guard.js
  - Diagnostic budget guard; no local relative imports
  - Updated 4 importers (auto-timers.js, preferences-models.js, auto/phases.js,
    auto/run-unit.js)
- auto-unit-closeout.js → uok/auto-unit-closeout.js
  - Unit metrics snapshot + activity log + memory extraction helper
  - Updated 3 importers (auto-timers.js, auto-post-unit.js, auto.js)

Each original file is now a 1-line re-export shim preserving public API.
All 4 are added to uok/index.js as the UOK barrel.

26 dispatch tests pass; full unit suite 4374 tests pass.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Mikael Hugo 2026-05-11 03:02:52 +02:00
parent adb449d642
commit 70afabedb7
19 changed files with 3504 additions and 3412 deletions

File diff suppressed because it is too large Load diff

View file

@ -19,7 +19,7 @@ import {
writeBlockerPlaceholder,
} from "./auto-recovery.js";
import { isDeterministicPolicyError } from "./auto-tool-tracking.js";
import { closeoutUnit } from "./auto-unit-closeout.js";
import { closeoutUnit } from "./uok/auto-unit-closeout.js";
import { runSafely } from "./auto-utils.js";
import { syncStateToProjectRoot } from "./auto-worktree.js";
import { invalidateAllCaches } from "./cache.js";
@ -754,7 +754,7 @@ export async function postUnitPreVerification(pctx, opts) {
await resolveAllOverrides(s.basePath);
// Reset both disk and in-memory counters. Disk counter is authoritative
// (survives restarts); in-memory is kept in sync for the current session.
const { setRewriteCount } = await import("./auto-dispatch.js");
const { setRewriteCount } = await import("./uok/auto-dispatch.js");
setRewriteCount(s.basePath, 0);
s.rewriteAttemptCount = 0;
ctx.ui.notify("Override(s) resolved — rewrite-docs completed.", "info");

View file

@ -1,433 +1,22 @@
/**
* Diagnostic budget guard for unusually long autonomous mode units.
* auto-runaway-guard.ts Barrel re-export for the UOK runaway guard.
*
* This is intentionally not a blind tool-count kill switch. It gives the agent
* explicit turns to explain whether the unit is legitimately large, stuck, or
* churning, then pauses only if the unit keeps consuming budget afterward.
* The implementation has moved into the UOK subsystem under uok/auto-runaway-guard.js.
* This file preserves the original public API so external consumers
* continue to work without changes.
*/
import { execFileSync } from "node:child_process";
import { createHash } from "node:crypto";
import { existsSync, lstatSync, readdirSync, readFileSync } from "node:fs";
import { formatTokenCount } from "@singularity-forge/coding-agent";
export const DEFAULT_RUNAWAY_TOOL_CALL_WARNING = 60;
export const DEFAULT_RUNAWAY_TOKEN_WARNING = 1_000_000;
export const DEFAULT_RUNAWAY_ELAPSED_MINUTES = 20;
export const DEFAULT_RUNAWAY_CHANGED_FILES_WARNING = 75;
export const DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS = 2;
export const DEFAULT_RUNAWAY_MIN_INTERVAL_MS = 120_000;
const EXECUTE_NO_PROGRESS_TOOL_WARNING = 25;
const EXECUTE_NO_PROGRESS_TOKEN_WARNING = 500_000;
const DURABLE_SF_ARTIFACT_PATHS = [
".sf/milestones",
".sf/approvals",
];
let state = null;
export function resetRunawayGuardState(unitType, unitId, baseline) {
state = {
unitKey: `${unitType}/${unitId}`,
baselineSessionTokens: baseline?.sessionTokens ?? 0,
baselineChangedFiles: baseline?.changedFiles ?? 0,
baselineWorktreeFingerprint: baseline?.worktreeFingerprint ?? null,
warningsSent: 0,
lastWarningAt: 0,
lastToolCalls: 0,
lastSessionTokens: 0,
lastElapsedMs: 0,
finalWarningSent: false,
};
}
export function clearRunawayGuardState() {
state = null;
}
export function resolveRunawayGuardConfig(supervisor) {
return {
enabled: supervisor?.runaway_guard_enabled !== false,
toolCallWarning:
supervisor?.runaway_tool_call_warning ??
DEFAULT_RUNAWAY_TOOL_CALL_WARNING,
tokenWarning:
supervisor?.runaway_token_warning ?? DEFAULT_RUNAWAY_TOKEN_WARNING,
elapsedMs:
(supervisor?.runaway_elapsed_minutes ?? DEFAULT_RUNAWAY_ELAPSED_MINUTES) *
60 *
1000,
changedFilesWarning:
supervisor?.runaway_changed_files_warning ??
DEFAULT_RUNAWAY_CHANGED_FILES_WARNING,
diagnosticTurns:
supervisor?.runaway_diagnostic_turns ?? DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS,
hardPause: supervisor?.runaway_hard_pause !== false,
minIntervalMs: DEFAULT_RUNAWAY_MIN_INTERVAL_MS,
};
}
export function collectSessionTokenUsage(ctx) {
try {
const entries = ctx.sessionManager?.getEntries?.() ?? [];
let total = 0;
for (const entry of entries) {
const message = entry.message;
if (message?.role !== "assistant" || !message.usage) continue;
const usage = message.usage;
const totalTokens = numeric(usage.totalTokens ?? usage.total);
if (totalTokens > 0) {
total += totalTokens;
continue;
}
total +=
numeric(usage.input) +
numeric(usage.output) +
numeric(usage.cacheRead) +
numeric(usage.cacheWrite);
}
return total;
} catch {
return 0;
}
}
export function countChangedFiles(cwd) {
try {
const out = execFileSync("git", ["status", "--short"], {
cwd,
encoding: "utf8",
stdio: ["ignore", "pipe", "ignore"],
timeout: 2000,
});
return out
.split("\n")
.map((line) => line.trim())
.filter(Boolean).length;
} catch {
return 0;
}
}
export function collectWorktreeFingerprint(cwd) {
try {
const status = execFileSync(
"git",
["status", "--porcelain=v1", "--untracked-files=all"],
{
cwd,
encoding: "utf8",
stdio: ["ignore", "pipe", "ignore"],
timeout: 2000,
},
);
const lines = status
.split("\n")
.map((line) => line.trimEnd())
.filter(Boolean);
const hash = createHash("sha256");
if (lines.length === 0) {
hash.update("git-clean");
hash.update("\0");
}
for (const line of lines) {
hash.update(line);
hash.update("\0");
const filePath = parsePorcelainPath(line);
if (!filePath) continue;
appendFileFingerprint(hash, cwd, filePath);
}
appendDurableSfArtifactFingerprint(hash, cwd);
return hash.digest("hex");
} catch {
return null;
}
}
function appendDurableSfArtifactFingerprint(hash, cwd) {
hash.update("sf-artifacts");
hash.update("\0");
for (const artifactPath of DURABLE_SF_ARTIFACT_PATHS) {
appendPathFingerprint(hash, cwd, artifactPath);
}
}
function appendPathFingerprint(hash, cwd, relativePath) {
const fullPath = `${cwd}/${relativePath}`;
if (!existsSync(fullPath)) {
hash.update(`missing:${relativePath}`);
hash.update("\0");
return;
}
let stat;
try {
stat = lstatSync(fullPath);
} catch {
hash.update(`unreadable:${relativePath}`);
hash.update("\0");
return;
}
if (stat.isDirectory()) {
hash.update(`dir:${relativePath}`);
hash.update("\0");
let entries;
try {
entries = readdirSync(fullPath).sort();
} catch {
hash.update(`unreadable-dir:${relativePath}`);
hash.update("\0");
return;
}
for (const entry of entries) {
appendPathFingerprint(hash, cwd, `${relativePath}/${entry}`);
}
return;
}
appendFileFingerprint(hash, cwd, relativePath);
}
function appendFileFingerprint(hash, cwd, relativePath) {
try {
const stat = lstatSync(`${cwd}/${relativePath}`);
if (!stat.isFile()) {
hash.update(
`type:${relativePath}:${stat.isDirectory() ? "dir" : "other"}`,
);
hash.update("\0");
return;
}
hash.update(`file:${relativePath}`);
hash.update("\0");
hash.update(readFileSync(`${cwd}/${relativePath}`));
hash.update("\0");
} catch {
hash.update(`unreadable-or-deleted:${relativePath}`);
hash.update("\0");
}
}
export function evaluateRunawayGuard(
unitType,
unitId,
metrics,
config,
now = Date.now(),
) {
if (!config.enabled) return { action: "none" };
if (config.diagnosticTurns <= 0) return { action: "none" };
const unitKey = `${unitType}/${unitId}`;
if (!state || state.unitKey !== unitKey)
resetRunawayGuardState(unitType, unitId);
const s = state;
const unitMetrics = normalizeMetricsToUnit(metrics, s);
const reasons = thresholdReasons(unitType, unitMetrics, config);
if (reasons.length === 0) return { action: "none" };
if (
s.lastWarningAt > 0 &&
now - s.lastWarningAt < config.minIntervalMs &&
!hasMeaningfulGrowth(unitMetrics, s, config)
) {
return { action: "none" };
}
// Skip hard-pause if the unit is making file-change progress — growth with
// changes is legitimate diagnostic/planning work, not a stuck loop.
// Without this check, discuss/plan phases that legitimately consume tokens
// while writing summaries/plans would be hard-paused despite making progress.
if (
(unitMetrics.changedFiles ?? 0) > 0 ||
unitMetrics.worktreeChangedSinceStart === true
) {
return { action: "none" };
}
if (
config.hardPause &&
s.finalWarningSent &&
hasMeaningfulGrowth(unitMetrics, s, config)
) {
const reason =
`Runaway guard paused ${unitType} ${unitId}: budget kept growing after ` +
`${config.diagnosticTurns} diagnostic turn(s). ` +
formatMetricSummary(unitMetrics);
return {
action: "pause",
reason,
metadata: {
reason,
pausedAt: now,
unitType,
unitId,
diagnosticTurns: config.diagnosticTurns,
warningsSent: s.warningsSent,
thresholdReasons: reasons,
metrics: unitMetrics,
lastWarningMetrics: {
toolCalls: s.lastToolCalls,
sessionTokens: s.lastSessionTokens,
elapsedMs: s.lastElapsedMs,
},
thresholds: {
toolCallWarning: config.toolCallWarning,
tokenWarning: config.tokenWarning,
elapsedMs: config.elapsedMs,
changedFilesWarning: config.changedFilesWarning,
minIntervalMs: config.minIntervalMs,
},
},
};
}
const final = s.warningsSent + 1 >= config.diagnosticTurns;
s.warningsSent++;
s.lastWarningAt = now;
s.lastToolCalls = unitMetrics.toolCalls;
s.lastSessionTokens = unitMetrics.sessionTokens;
s.lastElapsedMs = unitMetrics.elapsedMs;
if (final) s.finalWarningSent = true;
return {
action: "warn",
final,
message: buildRunawayGuardMessage(
unitType,
unitId,
unitMetrics,
reasons,
final,
),
};
}
function normalizeMetricsToUnit(metrics, state) {
const worktreeChangedSinceStart =
metrics.worktreeFingerprint !== undefined &&
metrics.worktreeFingerprint !== null &&
state.baselineWorktreeFingerprint !== null
? metrics.worktreeFingerprint !== state.baselineWorktreeFingerprint
: metrics.worktreeChangedSinceStart;
return {
...metrics,
sessionTokens: Math.max(
0,
metrics.sessionTokens - state.baselineSessionTokens,
),
changedFiles:
metrics.changedFiles === undefined
? undefined
: Math.max(0, metrics.changedFiles - state.baselineChangedFiles),
worktreeChangedSinceStart,
};
}
function thresholdReasons(unitType, metrics, config) {
const reasons = [];
// Primary signal: high tool call count — strong indicator of runaway/churn
if (
config.toolCallWarning > 0 &&
metrics.toolCalls >= config.toolCallWarning
) {
reasons.push(
`${metrics.toolCalls} tool calls (warning ${config.toolCallWarning})`,
);
}
// Primary signal: long elapsed time — unit may be stuck
if (config.elapsedMs > 0 && metrics.elapsedMs >= config.elapsedMs) {
reasons.push(
`${Math.round(metrics.elapsedMs / 60000)}min elapsed (warning ${Math.round(config.elapsedMs / 60000)}min)`,
);
}
// Primary signal: many changed files — possible churn/duplication
if (
config.changedFilesWarning > 0 &&
(metrics.changedFiles ?? 0) >= config.changedFilesWarning
) {
reasons.push(
`${metrics.changedFiles} new changed files (warning ${config.changedFilesWarning})`,
);
}
// Token count is a secondary signal: only fire when at least one primary
// signal is also present, OR when the no-progress heuristic fires.
// This prevents false positives on units that do real work with large
// context models (a 25-tool-call unit can legitimately burn 1M+ tokens).
const hasPrimarySignal = reasons.length > 0;
if (config.tokenWarning > 0 && metrics.sessionTokens >= config.tokenWarning) {
if (hasPrimarySignal) {
reasons.push(
`${formatTokenCount(metrics.sessionTokens)} unit tokens (warning ${formatTokenCount(config.tokenWarning)})`,
);
}
}
// No-progress heuristic for execute-task: no file changes despite many
// tool calls and tokens — strong runaway indicator regardless of primary
// signals. This is the exception where tokens alone can trigger.
if (
unitType === "execute-task" &&
(metrics.changedFiles ?? 0) === 0 &&
metrics.worktreeChangedSinceStart !== true &&
metrics.toolCalls >= EXECUTE_NO_PROGRESS_TOOL_WARNING &&
metrics.sessionTokens >= EXECUTE_NO_PROGRESS_TOKEN_WARNING
) {
reasons.push(
`no new file changes after ${metrics.toolCalls} tool calls and ${formatTokenCount(metrics.sessionTokens)} tokens`,
);
}
return reasons;
}
function hasMeaningfulGrowth(metrics, state, config) {
const toolGrowth = Math.max(5, Math.floor(config.toolCallWarning / 4));
const tokenGrowth = Math.max(50_000, Math.floor(config.tokenWarning / 4));
return (
metrics.toolCalls - state.lastToolCalls >= toolGrowth ||
metrics.sessionTokens - state.lastSessionTokens >= tokenGrowth ||
metrics.elapsedMs - state.lastElapsedMs >= config.minIntervalMs
);
}
function buildRunawayGuardMessage(unitType, unitId, metrics, reasons, final) {
const topTools = metrics.topTools
? Object.entries(metrics.topTools)
.sort(([, a], [, b]) => b - a)
.slice(0, 5)
.map(([name, count]) => `${name}x${count}`)
.join(", ")
: "";
const title = final
? "**RUNAWAY UNIT FINAL WARNING - write diagnosis and handoff now.**"
: "**RUNAWAY UNIT BUDGET WARNING - diagnose before continuing.**";
return [
title,
`Unit: ${unitType} ${unitId}`,
`Budget signals: ${reasons.join("; ")}.`,
topTools ? `Tool mix: ${topTools}.` : "",
formatChangedFilesLine(unitType, metrics),
"",
final
? "You have already received a budget warning. Do not start new exploration. Write or update the durable artifact/handoff now, explicitly stating whether the unit was legitimately large, blocked, or stuck in a loop."
: "Before more exploration or broad edits, state why this unit is still running: legitimately large, blocked, or stuck/churning. Then either finish the required artifact or write a precise handoff.",
]
.filter(Boolean)
.join("\n");
}
function formatChangedFilesLine(unitType, metrics) {
if ((metrics.changedFiles ?? 0) > 0) {
return `Working tree has ${metrics.changedFiles} new changed file(s) since this unit started. Active edits are not automatically healthy progress; check for repeated or broad churn.`;
}
if (unitType === "execute-task" && metrics.worktreeChangedSinceStart) {
return "Working tree has 0 new changed file paths, but dirty file content changed since this execute-task started.";
}
if (unitType === "execute-task") {
return "Working tree has 0 new changed files since this execute-task started. For implementation work, that is no durable progress yet.";
}
return "";
}
function formatMetricSummary(metrics) {
return [
`${metrics.toolCalls} tool calls`,
`${formatTokenCount(metrics.sessionTokens)} tokens`,
`${Math.round(metrics.elapsedMs / 60000)}min elapsed`,
metrics.changedFiles !== undefined
? `${metrics.changedFiles} new changed files`
: "",
metrics.worktreeChangedSinceStart ? "dirty file content changed" : "",
]
.filter(Boolean)
.join(", ");
}
function parsePorcelainPath(line) {
if (line.length < 4) return null;
let filePath = line.slice(3);
const renameSeparator = " -> ";
if (filePath.includes(renameSeparator)) {
filePath = filePath.slice(
filePath.lastIndexOf(renameSeparator) + renameSeparator.length,
);
}
if (filePath.startsWith('"') && filePath.endsWith('"')) {
filePath = filePath.slice(1, -1);
}
return filePath || null;
}
function numeric(value) {
return typeof value === "number" && Number.isFinite(value) ? value : 0;
}
export {
clearRunawayGuardState,
collectSessionTokenUsage,
collectWorktreeFingerprint,
countChangedFiles,
DEFAULT_RUNAWAY_CHANGED_FILES_WARNING,
DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS,
DEFAULT_RUNAWAY_ELAPSED_MINUTES,
DEFAULT_RUNAWAY_MIN_INTERVAL_MS,
DEFAULT_RUNAWAY_TOOL_CALL_WARNING,
DEFAULT_RUNAWAY_TOKEN_WARNING,
evaluateRunawayGuard,
resetRunawayGuardState,
resolveRunawayGuardConfig,
} from "./uok/auto-runaway-guard.js";

View file

@ -13,7 +13,7 @@ import {
countChangedFiles,
evaluateRunawayGuard,
resolveRunawayGuardConfig,
} from "./auto-runaway-guard.js";
} from "./uok/auto-runaway-guard.js";
import { detectWorkingTreeActivity } from "./auto-supervisor.js";
import { recoverTimedOutUnit } from "./auto-timeout-recovery.js";
import {
@ -24,7 +24,7 @@ import {
getTotalToolCallCount,
hasInteractiveToolInFlight,
} from "./auto-tool-tracking.js";
import { closeoutUnit } from "./auto-unit-closeout.js";
import { closeoutUnit } from "./uok/auto-unit-closeout.js";
import {
computeBudgets,
resolveExecutorContextWindow,

View file

@ -1,85 +1,8 @@
/**
* Unit closeout helper consolidates the repeated pattern of
* snapshotting metrics + saving activity log + extracting memories
* that appears 6+ times in auto.ts.
* auto-unit-closeout.ts Barrel re-export for the UOK unit closeout helper.
*
* The implementation has moved into the UOK subsystem under uok/auto-unit-closeout.js.
* This file preserves the original public API so external consumers
* continue to work without changes.
*/
import { saveActivityLog } from "./activity-log.js";
import { snapshotUnitMetrics } from "./metrics.js";
import { updateSubscriptionTokensUsed } from "./preferences-models.js";
import { writeTurnGitTransaction } from "./uok/gitops.js";
import { logWarning } from "./workflow-logger.js";
/**
* Snapshot metrics, save activity log, and fire-and-forget memory extraction
* for a completed unit. Returns the activity log file path (if any).
*/
export async function closeoutUnit(
ctx,
basePath,
unitType,
unitId,
startedAt,
opts,
) {
const provider = ctx.model?.provider;
const id = ctx.model?.id;
const modelId = provider && id ? `${provider}/${id}` : (id ?? "unknown");
const unit = snapshotUnitMetrics(
ctx,
unitType,
unitId,
startedAt,
modelId,
opts,
);
// Track subscription token consumption for amortized cost reporting.
// Fire-and-forget: updateSubscriptionTokensUsed is already best-effort.
if (provider && unit && unit.tokens.total > 0) {
updateSubscriptionTokensUsed(provider, unit.tokens.total);
}
const activityFile = saveActivityLog(ctx, basePath, unitType, unitId);
if (activityFile) {
try {
const { buildMemoryLLMCall, extractMemoriesFromUnit } = await import(
"./memory-extractor.js"
);
const llmCallFn = buildMemoryLLMCall(ctx);
if (llmCallFn) {
extractMemoriesFromUnit(
activityFile,
unitType,
unitId,
llmCallFn,
).catch((err) => {
logWarning(
"engine",
`memory extraction failed for ${unitType}/${unitId}: ${err.message}`,
);
});
}
} catch (err) {
/* non-fatal */
logWarning(
"engine",
`operation failed: ${err instanceof Error ? err.message : String(err)}`,
);
}
}
if (opts?.traceId && opts.turnId && opts.gitAction && opts.gitStatus) {
writeTurnGitTransaction({
basePath,
traceId: opts.traceId,
turnId: opts.turnId,
unitType,
unitId,
stage: "record",
action: opts.gitAction,
push: opts.gitPush === true,
status: opts.gitStatus,
error: opts.gitError,
metadata: {
activityFile,
},
});
}
return activityFile ?? undefined;
}
export { closeoutUnit } from "./uok/auto-unit-closeout.js";

View file

@ -1,824 +1,8 @@
/**
* Post-unit verification gate for autonomous mode.
* auto-verification.ts Barrel re-export for the UOK post-unit verification gate.
*
* Runs typecheck/lint/test checks, captures runtime errors, performs
* dependency audits, handles auto-fix retry logic, and writes
* verification evidence JSON.
*
* Extracted from handleAgentEnd() in auto.ts. Returns a sentinel
* value instead of calling return/pauseAuto directly the caller
* checks the result and handles control flow.
* The implementation has moved into the UOK subsystem under uok/auto-verification.js.
* This file preserves the original public API so external consumers
* continue to work without changes.
*/
import { mkdirSync, writeFileSync } from "node:fs";
import { join } from "node:path";
import { loadFile } from "./files.js";
import { parseRoadmap } from "./parsers.js";
import { resolveMilestoneFile, resolveSlicePath } from "./paths.js";
import { runPostExecutionChecks } from "./post-execution-checks.js";
import { loadEffectiveSFPreferences } from "./preferences.js";
import {
getMilestoneSlices,
getSliceTasks,
getTask,
isDbAvailable,
} from "./sf-db.js";
import { isMilestoneComplete } from "./state.js";
import { isClosedStatus } from "./status-guards.js";
import { parseUnitId } from "./unit-id.js";
import { ChaosMonkeyGate } from "./uok/chaos-monkey.js";
import { CostGuardGate } from "./uok/cost-guard-gate.js";
import { resolveUokFlags } from "./uok/flags.js";
import { UokGateRunner } from "./uok/gate-runner.js";
import { MultiPackageGate } from "./uok/multi-package-gate.js";
import { OutcomeLearningGate } from "./uok/outcome-learning-gate.js";
import { SecurityGate } from "./uok/security-gate.js";
import {
formatExecuteTaskRecoveryStatus,
inspectExecuteTaskDurability,
} from "./uok/unit-runtime.js";
import { extractVerdict } from "./verdict-parser.js";
import { writeVerificationJSON } from "./verification-evidence.js";
import {
captureRuntimeErrors,
formatFailureContext,
runDependencyAudit,
runVerificationGate,
} from "./verification-gate.js";
import { logError, logWarning } from "./workflow-logger.js";
function computeTokenCountFromSession(ctx) {
const entries = ctx.sessionManager?.getEntries?.() ?? [];
let total = 0;
for (const entry of entries) {
if (entry.type !== "message") continue;
const msg = entry.message;
if (!msg || msg.role !== "assistant") continue;
if (msg.usage?.totalTokens != null) {
total += msg.usage.totalTokens;
}
}
return total;
}
function getMemoryPressureMB() {
try {
const mem = process.memoryUsage();
return Math.round(mem.heapUsed / 1024 / 1024);
} catch {
return undefined;
}
}
function buildGateOutcomesSummary(gateIds, gateResults) {
if (!gateIds || !gateResults || gateIds.length === 0) return undefined;
const outcomes = {};
for (let i = 0; i < gateIds.length; i++) {
outcomes[gateIds[i]] = gateResults[i]?.outcome ?? "unknown";
}
return outcomes;
}
function isInfraVerificationFailure(stderr) {
return /\b(ENOENT|ENOTFOUND|ETIMEDOUT|ECONNRESET|EAI_AGAIN|spawn\s+\S+\s+ENOENT|command not found)\b/i.test(
stderr,
);
}
/**
* Post-unit guard for `validate-milestone` units (#4094).
*
* When validate-milestone writes verdict=needs-remediation, the agent is
* expected to also call reassess_roadmap in the same turn to add
* remediation slices. If they don't, the state machine re-derives
* `phase: validating-milestone` indefinitely (all slices still complete +
* verdict still needs-remediation), wasting ~3 dispatches before the stuck
* detector fires.
*
* This guard fires immediately on the first occurrence: if VALIDATION.md
* verdict is needs-remediation and no incomplete slices exist for the
* milestone, pause the auto-loop with a clear blocker.
*/
async function runValidateMilestonePostCheck(vctx, pauseAuto) {
const { s, ctx, pi } = vctx;
const prefs = loadEffectiveSFPreferences()?.preferences;
const uokFlags = resolveUokFlags(prefs);
const persistMilestoneValidationGate = async (
outcome,
failureClass,
rationale,
findings = "",
milestoneId,
) => {
if (!uokFlags.gates || !s.currentUnit) return;
const gateRunner = new UokGateRunner();
gateRunner.register({
id: "milestone-validation-post-check",
type: "verification",
execute: async () => ({
outcome,
failureClass,
rationale,
findings,
}),
});
await gateRunner.run("milestone-validation-post-check", {
basePath: s.basePath,
traceId: `validation-post-check:${s.currentUnit.id}`,
turnId: s.currentUnit.id,
milestoneId,
unitType: s.currentUnit.type,
unitId: s.currentUnit.id,
});
};
if (!s.currentUnit) return "continue";
const { milestone: mid } = parseUnitId(s.currentUnit.id);
if (!mid) return "continue";
const validationFile = resolveMilestoneFile(s.basePath, mid, "VALIDATION");
if (!validationFile) return "continue";
const validationContent = await loadFile(validationFile);
if (!validationContent) return "continue";
const verdict = extractVerdict(validationContent);
if (verdict !== "needs-remediation") {
await persistMilestoneValidationGate(
"pass",
"none",
`milestone validation verdict is ${verdict}; no remediation loop risk`,
"",
mid,
);
return "continue";
}
const incompleteSliceCount = await countIncompleteSlices(s.basePath, mid);
// If any non-closed slices exist, the agent successfully queued remediation
// work — proceed normally. The state machine will execute those slices and
// re-validate per the #3596/#3670 fix.
if (incompleteSliceCount > 0) {
await persistMilestoneValidationGate(
"pass",
"none",
`remediation slices present (${incompleteSliceCount}); validation can continue`,
"",
mid,
);
return "continue";
}
ctx.ui.notify(
`Milestone ${mid} validation returned verdict=needs-remediation but no remediation slices were added. Pausing for human review.`,
"error",
);
process.stderr.write(
`validate-milestone: pausing — verdict=needs-remediation with no incomplete slices for ${mid}. ` +
`The agent must call reassess_roadmap to add remediation slices before re-validation.\n`,
);
await persistMilestoneValidationGate(
"manual-attention",
"manual-attention",
"needs-remediation verdict without queued remediation slices",
`No incomplete slices found for ${mid} while verdict=needs-remediation`,
mid,
);
await pauseAuto(ctx, pi);
return "pause";
}
/**
* Count slices for a milestone that are not in a closed status.
* DB-backed projects are authoritative (#4094 peer review); falls back to
* roadmap parsing only when the DB is unavailable.
*/
async function countIncompleteSlices(basePath, milestoneId) {
if (isDbAvailable()) {
const slices = getMilestoneSlices(milestoneId);
if (slices.length === 0) {
// No DB rows — treat as "unknown", do not pause.
return 1;
}
return slices.filter((slice) => !isClosedStatus(slice.status)).length;
}
// Filesystem fallback: parse the roadmap markdown.
try {
const roadmapFile = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
if (!roadmapFile) return 1;
const roadmapContent = await loadFile(roadmapFile);
if (!roadmapContent) return 1;
const roadmap = parseRoadmap(roadmapContent);
if (roadmap.slices.length === 0) return 1;
return isMilestoneComplete(roadmap) ? 0 : 1;
} catch {
// Parsing failures should not cause false-positive pauses.
return 1;
}
}
/**
* Run the verification gate for the current execute-task unit.
* Returns:
* - "continue" gate passed (or no checks configured), proceed normally
* - "retry" gate failed with retries remaining, s.pendingVerificationRetry set for loop re-iteration
* - "pause" gate failed with retries exhausted, pauseAuto already called
*/
export async function runPostUnitVerification(vctx, pauseAuto) {
const { s, ctx, pi } = vctx;
if (!s.currentUnit) {
return "continue";
}
if (s.currentUnit.type === "validate-milestone") {
return await runValidateMilestonePostCheck(vctx, pauseAuto);
}
if (s.currentUnit.type !== "execute-task") {
return "continue";
}
// ── Zone 1: Gate machinery (outer try) ──────────────────────────────────
// Failures here indicate broken infrastructure — pause for human review.
let prefs;
let uokFlags;
let mid;
let sid;
let tid;
let result;
try {
const effectivePrefs = loadEffectiveSFPreferences();
prefs = effectivePrefs?.preferences;
uokFlags = resolveUokFlags(prefs);
// Read task plan verify field
({ milestone: mid, slice: sid, task: tid } = parseUnitId(s.currentUnit.id));
let taskPlanVerify;
if (mid && sid && tid) {
if (isDbAvailable()) {
taskPlanVerify = getTask(mid, sid, tid)?.verify;
}
// When DB unavailable, taskPlanVerify stays undefined — gate runs without task-specific checks
}
result = runVerificationGate({
cwd: s.basePath,
preferenceCommands: prefs?.verification_commands,
taskPlanVerify,
});
// Handle skipped gate (no commands discovered) — fail-closed but not a hard failure
if (result.skipped === true) {
process.stderr.write(
"verification-gate: no commands discovered — gate skipped, not passed\n",
);
ctx.ui.notify(
"[verify] SKIP — no verification commands configured",
"warning",
);
return "continue";
}
// Capture runtime errors
const runtimeErrors = await captureRuntimeErrors();
if (runtimeErrors.length > 0) {
result.runtimeErrors = runtimeErrors;
if (runtimeErrors.some((e) => e.blocking)) {
result.passed = false;
}
}
// Dependency audit
const auditWarnings = runDependencyAudit(s.basePath);
if (auditWarnings.length > 0) {
result.auditWarnings = auditWarnings;
process.stderr.write(
`verification-gate: ${auditWarnings.length} audit warning(s)\n`,
);
for (const w of auditWarnings) {
process.stderr.write(` [${w.severity}] ${w.name}: ${w.title}\n`);
}
}
} catch (machineryErr) {
logError(
"engine",
`verification-gate machinery error — pausing for human review: ${machineryErr.message}`,
);
ctx.ui.notify(
"verification-gate machinery error — pausing for human review",
"error",
);
await pauseAuto(ctx, pi);
return "pause";
}
// ── Zone 2: Ancillary post-gate work (inner try) ─────────────────────────
// Failures here are non-fatal — evidence writes, UOK gate calls, notifications, retry logic.
let gateIds = [];
let gateResults = [];
try {
if (uokFlags.gates) {
const gateRunner = new UokGateRunner();
gateRunner.register({
id: "verification-gate",
type: "verification",
execute: async () => ({
outcome: result.passed ? "pass" : "fail",
failureClass: result.runtimeErrors?.some((e) => e.blocking)
? "execution"
: "verification",
rationale: result.passed
? "verification checks passed"
: "verification checks failed",
findings: result.passed ? "" : formatFailureContext(result),
}),
});
if (uokFlags.securityGuard) {
gateRunner.register(new SecurityGate());
}
if (uokFlags.multiPackageHealing) {
gateRunner.register(new MultiPackageGate());
}
if (uokFlags.autonomousCostGuard) {
gateRunner.register(new CostGuardGate());
}
if (uokFlags.outcomeLearning) {
gateRunner.register(new OutcomeLearningGate());
}
if (uokFlags.chaosMonkey) {
gateRunner.register(new ChaosMonkeyGate({ active: true }));
}
const baseCtx = {
basePath: s.basePath,
traceId: `verification:${s.currentUnit.id}`,
turnId: s.currentUnit.id,
milestoneId: mid ?? undefined,
sliceId: sid ?? undefined,
taskId: tid ?? undefined,
unitType: s.currentUnit.type,
unitId: s.currentUnit.id,
iteration: s.verificationRetryCount.get(s.currentUnit.id) ?? 0,
};
gateIds = gateRunner.list().map((g) => g.id);
gateResults = await Promise.all(
gateIds.map((id) =>
gateRunner
.run(id, {
...baseCtx,
traceId: `${id}:${s.currentUnit.id}`,
})
.catch((err) => ({
outcome: "fail",
failureClass: "unknown",
rationale: `Gate ${id} threw: ${err instanceof Error ? err.message : String(err)}`,
})),
),
);
for (let i = 0; i < gateIds.length; i++) {
const id = gateIds[i];
const res = gateResults[i];
if (res.outcome !== "fail") continue;
result.passed = false;
if (id === "security-guard") {
result.securityFailure = true;
result.securityRationale = res.rationale;
result.securityFindings = res.findings;
} else if (id === "multi-package-healing") {
result.multiPackageFailure = true;
result.multiPackageRationale = res.rationale;
result.multiPackageFindings = res.findings;
} else if (id === "cost-guard") {
result.costGuardFailure = true;
result.costGuardRationale = res.rationale;
} else if (id === "chaos-monkey") {
result.chaosMonkeyFailure = true;
result.chaosMonkeyRationale = res.rationale;
}
}
}
// Auto-fix retry preferences
const autoFixEnabled = prefs?.verification_auto_fix !== false;
const maxRetries =
typeof prefs?.verification_max_retries === "number"
? prefs.verification_max_retries
: 2;
if (result.checks.length > 0) {
const passCount = result.checks.filter((c) => c.exitCode === 0).length;
const total = result.checks.length;
const commandList = result.checks.map((c) => c.command).join(" | ");
ctx.ui.notify(`[verify] running: ${commandList}`, "info");
const attemptSoFar = s.verificationRetryCount.get(s.currentUnit.id) ?? 0;
if (result.passed) {
ctx.ui.notify(`[verify] PASS - ${passCount}/${total} checks`, "info");
} else {
const failures = result.checks.filter((c) => c.exitCode !== 0);
const failNames = failures.map((f) => f.command).join(", ");
const nextAttempt = attemptSoFar + 1;
ctx.ui.notify(
`[verify] FAIL - ${failNames} (auto-fix attempt ${nextAttempt}/${maxRetries})`,
"info",
);
process.stderr.write(
`verification-gate: ${total - passCount}/${total} checks failed\n`,
);
for (const f of failures) {
process.stderr.write(` ${f.command} exited ${f.exitCode}\n`);
if (f.stderr)
process.stderr.write(` stderr: ${f.stderr.slice(0, 500)}\n`);
}
}
}
// Log blocking runtime errors
if (result.runtimeErrors?.some((e) => e.blocking)) {
const blockingErrors = result.runtimeErrors.filter((e) => e.blocking);
process.stderr.write(
`verification-gate: ${blockingErrors.length} blocking runtime error(s) detected\n`,
);
for (const err of blockingErrors) {
process.stderr.write(
` [${err.source}] ${err.severity}: ${err.message.slice(0, 200)}\n`,
);
}
}
// Log security failures
if (result.securityFailure) {
ctx.ui.notify(
`[verify] SECURITY FAIL — ${result.securityRationale}`,
"error",
);
process.stderr.write(
`verification-gate: security failure: ${result.securityRationale}\n`,
);
if (result.securityFindings) {
process.stderr.write(`${result.securityFindings}\n`);
}
}
// Log multi-package failures
if (result.multiPackageFailure) {
ctx.ui.notify(
`[verify] MULTI-PACKAGE FAIL — ${result.multiPackageRationale}`,
"error",
);
process.stderr.write(
`verification-gate: multi-package healing failure: ${result.multiPackageRationale}\n`,
);
if (result.multiPackageFindings) {
process.stderr.write(`${result.multiPackageFindings}\n`);
}
}
// Log cost-guard failures
if (result.costGuardFailure) {
ctx.ui.notify(
`[verify] COST-GUARD FAIL — ${result.costGuardRationale}`,
"error",
);
process.stderr.write(
`verification-gate: cost-guard failure: ${result.costGuardRationale}\n`,
);
}
// Log chaos-monkey failures
if (result.chaosMonkeyFailure) {
ctx.ui.notify(
`[verify] CHAOS-MONKEY FAIL — ${result.chaosMonkeyRationale}`,
"error",
);
process.stderr.write(
`verification-gate: chaos-monkey injected failure: ${result.chaosMonkeyRationale}\n`,
);
}
// Write verification evidence JSON
const attempt = s.verificationRetryCount.get(s.currentUnit.id) ?? 0;
const tokenCount = computeTokenCountFromSession(ctx);
const memoryPressureMB = getMemoryPressureMB();
const gateOutcomes = buildGateOutcomesSummary(gateIds, gateResults);
let recoveryStatus;
try {
const durability = await inspectExecuteTaskDurability(
s.basePath,
s.currentUnit.id,
);
if (durability) {
recoveryStatus = formatExecuteTaskRecoveryStatus(durability);
}
} catch {
recoveryStatus = undefined;
}
if (mid && sid && tid) {
try {
const sDir = resolveSlicePath(s.basePath, mid, sid);
if (sDir) {
const tasksDir = join(sDir, "tasks");
if (result.passed) {
writeVerificationJSON(
result,
tasksDir,
tid,
s.currentUnit.id,
undefined,
undefined,
tokenCount,
memoryPressureMB,
gateOutcomes,
recoveryStatus,
);
} else {
const nextAttempt = attempt + 1;
writeVerificationJSON(
result,
tasksDir,
tid,
s.currentUnit.id,
nextAttempt,
maxRetries,
tokenCount,
memoryPressureMB,
gateOutcomes,
recoveryStatus,
);
}
}
} catch (evidenceErr) {
logWarning(
"engine",
`verification-evidence write error: ${evidenceErr.message}`,
);
}
}
const advisoryFailure =
!result.passed &&
(result.discoverySource === "package-json" ||
result.checks.some((check) =>
isInfraVerificationFailure(check.stderr),
));
if (advisoryFailure) {
s.verificationRetryCount.delete(s.currentUnit.id);
s.pendingVerificationRetry = null;
ctx.ui.notify(
result.discoverySource === "package-json"
? "Verification failed in auto-discovered package.json checks — treating as advisory."
: "Verification failed due to infrastructure/runtime environment issues — treating as advisory.",
"warning",
);
return "continue";
}
// ── Post-execution checks (run after main verification passes for execute-task units) ──
let postExecChecks;
let postExecBlockingFailure = false;
if (result.passed && mid && sid && tid) {
// Check preferences — respect enhanced_verification and enhanced_verification_post
const enhancedEnabled = prefs?.enhanced_verification !== false; // default true
const postEnabled = prefs?.enhanced_verification_post !== false; // default true
if (enhancedEnabled && postEnabled && isDbAvailable()) {
try {
// Get the completed task from DB
const taskRow = getTask(mid, sid, tid);
if (taskRow && taskRow.key_files && taskRow.key_files.length > 0) {
// Get all tasks in the slice
const allTasks = getSliceTasks(mid, sid);
// Filter to prior completed tasks (status = 'complete' or 'done', before current task)
const priorTasks = allTasks.filter(
(t) =>
(t.status === "complete" || t.status === "done") &&
t.id !== tid &&
t.sequence < taskRow.sequence,
);
// Run post-execution checks
const postExecResult = runPostExecutionChecks(
taskRow,
priorTasks,
s.basePath,
);
// Store checks for evidence JSON
postExecChecks = postExecResult.checks;
// Log summary to stderr with sf-post-exec: prefix
const emoji =
postExecResult.status === "pass"
? "✅"
: postExecResult.status === "warn"
? "⚠️"
: "❌";
process.stderr.write(
`sf-post-exec: ${emoji} Post-execution checks ${postExecResult.status} for ${mid}/${sid}/${tid} (${postExecResult.durationMs}ms)\n`,
);
// Log individual check results
for (const check of postExecResult.checks) {
const checkEmoji = check.passed
? "✓"
: check.blocking
? "✗"
: "⚠";
process.stderr.write(
`sf-post-exec: ${checkEmoji} [${check.category}] ${check.target}: ${check.message}\n`,
);
}
if (uokFlags.gates) {
const strictMode = prefs?.enhanced_verification_strict === true;
const warnEscalated =
postExecResult.status === "warn" && strictMode;
const blockingFailure =
postExecResult.status === "fail" || warnEscalated;
const findings = postExecResult.checks
.filter((check) => !check.passed)
.map(
(check) =>
`[${check.category}] ${check.target}: ${check.message}`,
)
.join("\n");
const gateRunner = new UokGateRunner();
gateRunner.register({
id: "post-execution-checks",
type: "artifact",
execute: async () => ({
outcome: blockingFailure ? "fail" : "pass",
failureClass:
postExecResult.status === "fail"
? "artifact"
: warnEscalated
? "policy"
: "none",
rationale: blockingFailure
? `post-execution checks ${postExecResult.status}${warnEscalated ? " (strict)" : ""}`
: "post-execution checks passed",
findings,
}),
});
await gateRunner.run("post-execution-checks", {
basePath: s.basePath,
traceId: `verification:${s.currentUnit.id}`,
turnId: s.currentUnit.id,
milestoneId: mid,
sliceId: sid,
taskId: tid,
unitType: s.currentUnit.type,
unitId: s.currentUnit.id,
});
}
// Check for blocking failures
if (postExecResult.status === "fail") {
postExecBlockingFailure = true;
const blockingCount = postExecResult.checks.filter(
(c) => !c.passed && c.blocking,
).length;
ctx.ui.notify(
`Post-execution checks failed: ${blockingCount} blocking issue${blockingCount === 1 ? "" : "s"} found`,
"error",
);
} else if (postExecResult.status === "warn") {
ctx.ui.notify(
`Post-execution checks passed with warnings`,
"warning",
);
// Strict mode: treat warnings as blocking
if (prefs?.enhanced_verification_strict === true) {
postExecBlockingFailure = true;
}
}
}
} catch (postExecErr) {
// Post-execution check errors are non-fatal — log and continue
logWarning("engine", `sf-post-exec: error — ${postExecErr.message}`);
}
}
}
// Re-write verification evidence JSON with post-execution checks
if (postExecChecks && postExecChecks.length > 0 && mid && sid && tid) {
try {
const sDir = resolveSlicePath(s.basePath, mid, sid);
if (sDir) {
const tasksDir = join(sDir, "tasks");
// Add postExecutionChecks to the result for the JSON write
const resultWithPostExec = {
...result,
// Mark as failed if there was a blocking post-exec failure
passed: result.passed && !postExecBlockingFailure,
};
// Manually write with postExecutionChecks field
writeVerificationJSONWithPostExec(
resultWithPostExec,
tasksDir,
tid,
s.currentUnit.id,
postExecChecks,
postExecBlockingFailure ? attempt + 1 : undefined,
postExecBlockingFailure ? maxRetries : undefined,
tokenCount,
memoryPressureMB,
gateOutcomes,
recoveryStatus,
);
}
} catch (evidenceErr) {
logWarning(
"engine",
`verification-evidence: post-exec write error — ${evidenceErr.message}`,
);
}
}
// Update result.passed based on post-execution checks
if (postExecBlockingFailure) {
result.passed = false;
}
// ── Auto-fix retry logic ──
if (result.passed) {
s.verificationRetryCount.delete(s.currentUnit.id);
s.pendingVerificationRetry = null;
return "continue";
} else if (postExecBlockingFailure) {
// Post-execution failures are cross-task consistency issues — retrying the same task won't fix them.
// Skip retry and pause immediately for human review.
s.verificationRetryCount.delete(s.currentUnit.id);
s.pendingVerificationRetry = null;
ctx.ui.notify(
`Post-execution checks failed — cross-task consistency issue detected, pausing for human review`,
"error",
);
await pauseAuto(ctx, pi);
return "pause";
} else if (autoFixEnabled && attempt + 1 <= maxRetries) {
const nextAttempt = attempt + 1;
s.verificationRetryCount.set(s.currentUnit.id, nextAttempt);
s.pendingVerificationRetry = {
unitId: s.currentUnit.id,
failureContext: formatFailureContext(result),
attempt: nextAttempt,
};
const failedCmds = result.checks
.filter((c) => c.exitCode !== 0)
.map((c) => c.command);
const cmdSummary =
failedCmds.length <= 3
? failedCmds.join(", ")
: `${failedCmds.slice(0, 3).join(", ")}... and ${failedCmds.length - 3} more`;
ctx.ui.notify(
`Verification failed (${cmdSummary}) — auto-fix attempt ${nextAttempt}/${maxRetries}`,
"warning",
);
// Return "retry" — the autoLoop while loop will re-iterate with the retry context
return "retry";
} else {
// Gate failed, retries exhausted
s.verificationRetryCount.delete(s.currentUnit.id);
s.pendingVerificationRetry = null;
const exhaustedFails = result.checks
.filter((c) => c.exitCode !== 0)
.map((c) => c.command);
const exhaustedSummary =
exhaustedFails.length <= 3
? exhaustedFails.join(", ")
: `${exhaustedFails.slice(0, 3).join(", ")}... and ${exhaustedFails.length - 3} more`;
ctx.ui.notify(
`Verification gate FAILED after ${attempt} ${attempt === 1 ? "retry" : "retries"} (${exhaustedSummary}) — pausing for human review`,
"error",
);
await pauseAuto(ctx, pi);
return "pause";
}
} catch (err) {
// Ancillary post-gate errors are non-fatal — log warning and continue
logWarning("engine", `verification-gate error: ${err.message}`);
return "continue";
}
}
/**
* Write verification evidence JSON with post-execution checks included.
* This is a variant of writeVerificationJSON that adds the postExecutionChecks field.
*/
function writeVerificationJSONWithPostExec(
result,
tasksDir,
taskId,
unitId,
postExecutionChecks,
retryAttempt,
maxRetries,
tokenCount,
memoryPressureMB,
gateOutcomes,
recoveryStatus,
) {
mkdirSync(tasksDir, { recursive: true });
const evidence = {
schemaVersion: 1,
taskId,
unitId: unitId ?? taskId,
timestamp: result.timestamp,
passed: result.passed,
discoverySource: result.discoverySource,
checks: result.checks.map((check) => ({
command: check.command,
exitCode: check.exitCode,
durationMs: check.durationMs,
verdict: check.exitCode === 0 ? "pass" : "fail",
})),
...(retryAttempt !== undefined ? { retryAttempt } : {}),
...(maxRetries !== undefined ? { maxRetries } : {}),
...(tokenCount !== undefined ? { tokenCount } : {}),
...(memoryPressureMB !== undefined ? { memoryPressureMB } : {}),
...(gateOutcomes !== undefined ? { gateOutcomes } : {}),
...(recoveryStatus !== undefined ? { recoveryStatus } : {}),
postExecutionChecks,
};
if (result.runtimeErrors && result.runtimeErrors.length > 0) {
evidence.runtimeErrors = result.runtimeErrors.map((e) => ({
source: e.source,
severity: e.severity,
message: e.message,
blocking: e.blocking,
}));
}
if (result.auditWarnings && result.auditWarnings.length > 0) {
evidence.auditWarnings = result.auditWarnings.map((w) => ({
name: w.name,
severity: w.severity,
title: w.title,
url: w.url,
fixAvailable: w.fixAvailable,
}));
}
const filePath = join(tasksDir, `${taskId}-VERIFY.json`);
writeFileSync(filePath, JSON.stringify(evidence, null, 2) + "\n", "utf-8");
}
export { runPostUnitVerification } from "./uok/auto-verification.js";

View file

@ -52,7 +52,7 @@ import {
clearSliceProgressCache,
updateSliceProgressCache,
} from "./auto-dashboard.js";
import { DISPATCH_RULES, resolveDispatch } from "./auto-dispatch.js";
import { DISPATCH_RULES, resolveDispatch } from "./uok/auto-dispatch.js";
import {
_resetPendingResolve,
isSessionSwitchInFlight,
@ -86,8 +86,8 @@ import {
isQueuedUserMessageSkip,
isToolInvocationError,
} from "./auto-tool-tracking.js";
import { closeoutUnit } from "./auto-unit-closeout.js";
import { runPostUnitVerification } from "./auto-verification.js";
import { closeoutUnit } from "./uok/auto-unit-closeout.js";
import { runPostUnitVerification } from "./uok/auto-verification.js";
import {
autoWorktreeBranch,
checkResourcesStale,

View file

@ -29,7 +29,7 @@ import {
collectWorktreeFingerprint,
countChangedFiles,
resetRunawayGuardState,
} from "../auto-runaway-guard.js";
} from "../uok/auto-runaway-guard.js";
import {
formatToolCallSummary,
resetToolCallCounts,

View file

@ -8,7 +8,7 @@ import {
collectWorktreeFingerprint,
countChangedFiles,
resetRunawayGuardState,
} from "../auto-runaway-guard.js";
} from "../uok/auto-runaway-guard.js";
import { scopeActiveToolsForUnitType } from "../constants.js";
import { debugLog } from "../debug-logger.js";
import {

View file

@ -5,7 +5,7 @@
* and dispatch logic. This is the "dev" engine it wraps the current SF
* autonomous mode behavior behind the engine-polymorphic interface.
*/
import { resolveDispatch } from "./auto-dispatch.js";
import { resolveDispatch } from "./uok/auto-dispatch.js";
import { loadEffectiveSFPreferences } from "./preferences.js";
import { deriveState } from "./state.js";
// ─── Bridge: DispatchAction → EngineDispatchAction ────────────────────────

View file

@ -15,7 +15,7 @@ import {
DEFAULT_RUNAWAY_ELAPSED_MINUTES,
DEFAULT_RUNAWAY_TOKEN_WARNING,
DEFAULT_RUNAWAY_TOOL_CALL_WARNING,
} from "./auto-runaway-guard.js";
} from "./uok/auto-runaway-guard.js";
import { selectByBenchmarks } from "./benchmark-selector.js";
import { defaultRoutingConfig, MODEL_CAPABILITY_TIER } from "./model-router.js";

View file

@ -41,7 +41,7 @@ vi.mock("../auto-prompts.js", () => ({
checkNeedsRunUat: vi.fn(async () => null),
}));
import { resolveDispatch } from "../auto-dispatch.js";
import { resolveDispatch } from "../uok/auto-dispatch.js";
import {
closeDatabase,
insertMilestone,

View file

@ -5,7 +5,7 @@
*/
import { beforeEach, describe, expect, it, vi } from "vitest";
import { enhanceUnitRankingWithMemory } from "../auto-dispatch.js";
import { enhanceUnitRankingWithMemory } from "../uok/auto-dispatch.js";
// Mock memory store
vi.mock("../memory-store.js", () => ({

View file

@ -14,7 +14,7 @@ import { existsSync, mkdirSync, readFileSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, beforeEach, describe, it } from "vitest";
import { DISPATCH_RULES } from "../auto-dispatch.js";
import { DISPATCH_RULES } from "../uok/auto-dispatch.js";
import { createScheduleStore } from "../schedule/schedule-store.js";
import { generateULID } from "../schedule/schedule-ulid.js";

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,433 @@
/**
* Diagnostic budget guard for unusually long autonomous mode units.
*
* This is intentionally not a blind tool-count kill switch. It gives the agent
* explicit turns to explain whether the unit is legitimately large, stuck, or
* churning, then pauses only if the unit keeps consuming budget afterward.
*/
import { execFileSync } from "node:child_process";
import { createHash } from "node:crypto";
import { existsSync, lstatSync, readdirSync, readFileSync } from "node:fs";
import { formatTokenCount } from "@singularity-forge/coding-agent";
export const DEFAULT_RUNAWAY_TOOL_CALL_WARNING = 60;
export const DEFAULT_RUNAWAY_TOKEN_WARNING = 1_000_000;
export const DEFAULT_RUNAWAY_ELAPSED_MINUTES = 20;
export const DEFAULT_RUNAWAY_CHANGED_FILES_WARNING = 75;
export const DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS = 2;
export const DEFAULT_RUNAWAY_MIN_INTERVAL_MS = 120_000;
const EXECUTE_NO_PROGRESS_TOOL_WARNING = 25;
const EXECUTE_NO_PROGRESS_TOKEN_WARNING = 500_000;
const DURABLE_SF_ARTIFACT_PATHS = [
".sf/milestones",
".sf/approvals",
];
let state = null;
export function resetRunawayGuardState(unitType, unitId, baseline) {
state = {
unitKey: `${unitType}/${unitId}`,
baselineSessionTokens: baseline?.sessionTokens ?? 0,
baselineChangedFiles: baseline?.changedFiles ?? 0,
baselineWorktreeFingerprint: baseline?.worktreeFingerprint ?? null,
warningsSent: 0,
lastWarningAt: 0,
lastToolCalls: 0,
lastSessionTokens: 0,
lastElapsedMs: 0,
finalWarningSent: false,
};
}
export function clearRunawayGuardState() {
state = null;
}
export function resolveRunawayGuardConfig(supervisor) {
return {
enabled: supervisor?.runaway_guard_enabled !== false,
toolCallWarning:
supervisor?.runaway_tool_call_warning ??
DEFAULT_RUNAWAY_TOOL_CALL_WARNING,
tokenWarning:
supervisor?.runaway_token_warning ?? DEFAULT_RUNAWAY_TOKEN_WARNING,
elapsedMs:
(supervisor?.runaway_elapsed_minutes ?? DEFAULT_RUNAWAY_ELAPSED_MINUTES) *
60 *
1000,
changedFilesWarning:
supervisor?.runaway_changed_files_warning ??
DEFAULT_RUNAWAY_CHANGED_FILES_WARNING,
diagnosticTurns:
supervisor?.runaway_diagnostic_turns ?? DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS,
hardPause: supervisor?.runaway_hard_pause !== false,
minIntervalMs: DEFAULT_RUNAWAY_MIN_INTERVAL_MS,
};
}
export function collectSessionTokenUsage(ctx) {
try {
const entries = ctx.sessionManager?.getEntries?.() ?? [];
let total = 0;
for (const entry of entries) {
const message = entry.message;
if (message?.role !== "assistant" || !message.usage) continue;
const usage = message.usage;
const totalTokens = numeric(usage.totalTokens ?? usage.total);
if (totalTokens > 0) {
total += totalTokens;
continue;
}
total +=
numeric(usage.input) +
numeric(usage.output) +
numeric(usage.cacheRead) +
numeric(usage.cacheWrite);
}
return total;
} catch {
return 0;
}
}
export function countChangedFiles(cwd) {
try {
const out = execFileSync("git", ["status", "--short"], {
cwd,
encoding: "utf8",
stdio: ["ignore", "pipe", "ignore"],
timeout: 2000,
});
return out
.split("\n")
.map((line) => line.trim())
.filter(Boolean).length;
} catch {
return 0;
}
}
export function collectWorktreeFingerprint(cwd) {
try {
const status = execFileSync(
"git",
["status", "--porcelain=v1", "--untracked-files=all"],
{
cwd,
encoding: "utf8",
stdio: ["ignore", "pipe", "ignore"],
timeout: 2000,
},
);
const lines = status
.split("\n")
.map((line) => line.trimEnd())
.filter(Boolean);
const hash = createHash("sha256");
if (lines.length === 0) {
hash.update("git-clean");
hash.update("\0");
}
for (const line of lines) {
hash.update(line);
hash.update("\0");
const filePath = parsePorcelainPath(line);
if (!filePath) continue;
appendFileFingerprint(hash, cwd, filePath);
}
appendDurableSfArtifactFingerprint(hash, cwd);
return hash.digest("hex");
} catch {
return null;
}
}
function appendDurableSfArtifactFingerprint(hash, cwd) {
hash.update("sf-artifacts");
hash.update("\0");
for (const artifactPath of DURABLE_SF_ARTIFACT_PATHS) {
appendPathFingerprint(hash, cwd, artifactPath);
}
}
function appendPathFingerprint(hash, cwd, relativePath) {
const fullPath = `${cwd}/${relativePath}`;
if (!existsSync(fullPath)) {
hash.update(`missing:${relativePath}`);
hash.update("\0");
return;
}
let stat;
try {
stat = lstatSync(fullPath);
} catch {
hash.update(`unreadable:${relativePath}`);
hash.update("\0");
return;
}
if (stat.isDirectory()) {
hash.update(`dir:${relativePath}`);
hash.update("\0");
let entries;
try {
entries = readdirSync(fullPath).sort();
} catch {
hash.update(`unreadable-dir:${relativePath}`);
hash.update("\0");
return;
}
for (const entry of entries) {
appendPathFingerprint(hash, cwd, `${relativePath}/${entry}`);
}
return;
}
appendFileFingerprint(hash, cwd, relativePath);
}
function appendFileFingerprint(hash, cwd, relativePath) {
try {
const stat = lstatSync(`${cwd}/${relativePath}`);
if (!stat.isFile()) {
hash.update(
`type:${relativePath}:${stat.isDirectory() ? "dir" : "other"}`,
);
hash.update("\0");
return;
}
hash.update(`file:${relativePath}`);
hash.update("\0");
hash.update(readFileSync(`${cwd}/${relativePath}`));
hash.update("\0");
} catch {
hash.update(`unreadable-or-deleted:${relativePath}`);
hash.update("\0");
}
}
export function evaluateRunawayGuard(
unitType,
unitId,
metrics,
config,
now = Date.now(),
) {
if (!config.enabled) return { action: "none" };
if (config.diagnosticTurns <= 0) return { action: "none" };
const unitKey = `${unitType}/${unitId}`;
if (!state || state.unitKey !== unitKey)
resetRunawayGuardState(unitType, unitId);
const s = state;
const unitMetrics = normalizeMetricsToUnit(metrics, s);
const reasons = thresholdReasons(unitType, unitMetrics, config);
if (reasons.length === 0) return { action: "none" };
if (
s.lastWarningAt > 0 &&
now - s.lastWarningAt < config.minIntervalMs &&
!hasMeaningfulGrowth(unitMetrics, s, config)
) {
return { action: "none" };
}
// Skip hard-pause if the unit is making file-change progress — growth with
// changes is legitimate diagnostic/planning work, not a stuck loop.
// Without this check, discuss/plan phases that legitimately consume tokens
// while writing summaries/plans would be hard-paused despite making progress.
if (
(unitMetrics.changedFiles ?? 0) > 0 ||
unitMetrics.worktreeChangedSinceStart === true
) {
return { action: "none" };
}
if (
config.hardPause &&
s.finalWarningSent &&
hasMeaningfulGrowth(unitMetrics, s, config)
) {
const reason =
`Runaway guard paused ${unitType} ${unitId}: budget kept growing after ` +
`${config.diagnosticTurns} diagnostic turn(s). ` +
formatMetricSummary(unitMetrics);
return {
action: "pause",
reason,
metadata: {
reason,
pausedAt: now,
unitType,
unitId,
diagnosticTurns: config.diagnosticTurns,
warningsSent: s.warningsSent,
thresholdReasons: reasons,
metrics: unitMetrics,
lastWarningMetrics: {
toolCalls: s.lastToolCalls,
sessionTokens: s.lastSessionTokens,
elapsedMs: s.lastElapsedMs,
},
thresholds: {
toolCallWarning: config.toolCallWarning,
tokenWarning: config.tokenWarning,
elapsedMs: config.elapsedMs,
changedFilesWarning: config.changedFilesWarning,
minIntervalMs: config.minIntervalMs,
},
},
};
}
const final = s.warningsSent + 1 >= config.diagnosticTurns;
s.warningsSent++;
s.lastWarningAt = now;
s.lastToolCalls = unitMetrics.toolCalls;
s.lastSessionTokens = unitMetrics.sessionTokens;
s.lastElapsedMs = unitMetrics.elapsedMs;
if (final) s.finalWarningSent = true;
return {
action: "warn",
final,
message: buildRunawayGuardMessage(
unitType,
unitId,
unitMetrics,
reasons,
final,
),
};
}
function normalizeMetricsToUnit(metrics, state) {
const worktreeChangedSinceStart =
metrics.worktreeFingerprint !== undefined &&
metrics.worktreeFingerprint !== null &&
state.baselineWorktreeFingerprint !== null
? metrics.worktreeFingerprint !== state.baselineWorktreeFingerprint
: metrics.worktreeChangedSinceStart;
return {
...metrics,
sessionTokens: Math.max(
0,
metrics.sessionTokens - state.baselineSessionTokens,
),
changedFiles:
metrics.changedFiles === undefined
? undefined
: Math.max(0, metrics.changedFiles - state.baselineChangedFiles),
worktreeChangedSinceStart,
};
}
function thresholdReasons(unitType, metrics, config) {
const reasons = [];
// Primary signal: high tool call count — strong indicator of runaway/churn
if (
config.toolCallWarning > 0 &&
metrics.toolCalls >= config.toolCallWarning
) {
reasons.push(
`${metrics.toolCalls} tool calls (warning ${config.toolCallWarning})`,
);
}
// Primary signal: long elapsed time — unit may be stuck
if (config.elapsedMs > 0 && metrics.elapsedMs >= config.elapsedMs) {
reasons.push(
`${Math.round(metrics.elapsedMs / 60000)}min elapsed (warning ${Math.round(config.elapsedMs / 60000)}min)`,
);
}
// Primary signal: many changed files — possible churn/duplication
if (
config.changedFilesWarning > 0 &&
(metrics.changedFiles ?? 0) >= config.changedFilesWarning
) {
reasons.push(
`${metrics.changedFiles} new changed files (warning ${config.changedFilesWarning})`,
);
}
// Token count is a secondary signal: only fire when at least one primary
// signal is also present, OR when the no-progress heuristic fires.
// This prevents false positives on units that do real work with large
// context models (a 25-tool-call unit can legitimately burn 1M+ tokens).
const hasPrimarySignal = reasons.length > 0;
if (config.tokenWarning > 0 && metrics.sessionTokens >= config.tokenWarning) {
if (hasPrimarySignal) {
reasons.push(
`${formatTokenCount(metrics.sessionTokens)} unit tokens (warning ${formatTokenCount(config.tokenWarning)})`,
);
}
}
// No-progress heuristic for execute-task: no file changes despite many
// tool calls and tokens — strong runaway indicator regardless of primary
// signals. This is the exception where tokens alone can trigger.
if (
unitType === "execute-task" &&
(metrics.changedFiles ?? 0) === 0 &&
metrics.worktreeChangedSinceStart !== true &&
metrics.toolCalls >= EXECUTE_NO_PROGRESS_TOOL_WARNING &&
metrics.sessionTokens >= EXECUTE_NO_PROGRESS_TOKEN_WARNING
) {
reasons.push(
`no new file changes after ${metrics.toolCalls} tool calls and ${formatTokenCount(metrics.sessionTokens)} tokens`,
);
}
return reasons;
}
function hasMeaningfulGrowth(metrics, state, config) {
const toolGrowth = Math.max(5, Math.floor(config.toolCallWarning / 4));
const tokenGrowth = Math.max(50_000, Math.floor(config.tokenWarning / 4));
return (
metrics.toolCalls - state.lastToolCalls >= toolGrowth ||
metrics.sessionTokens - state.lastSessionTokens >= tokenGrowth ||
metrics.elapsedMs - state.lastElapsedMs >= config.minIntervalMs
);
}
function buildRunawayGuardMessage(unitType, unitId, metrics, reasons, final) {
const topTools = metrics.topTools
? Object.entries(metrics.topTools)
.sort(([, a], [, b]) => b - a)
.slice(0, 5)
.map(([name, count]) => `${name}x${count}`)
.join(", ")
: "";
const title = final
? "**RUNAWAY UNIT FINAL WARNING - write diagnosis and handoff now.**"
: "**RUNAWAY UNIT BUDGET WARNING - diagnose before continuing.**";
return [
title,
`Unit: ${unitType} ${unitId}`,
`Budget signals: ${reasons.join("; ")}.`,
topTools ? `Tool mix: ${topTools}.` : "",
formatChangedFilesLine(unitType, metrics),
"",
final
? "You have already received a budget warning. Do not start new exploration. Write or update the durable artifact/handoff now, explicitly stating whether the unit was legitimately large, blocked, or stuck in a loop."
: "Before more exploration or broad edits, state why this unit is still running: legitimately large, blocked, or stuck/churning. Then either finish the required artifact or write a precise handoff.",
]
.filter(Boolean)
.join("\n");
}
function formatChangedFilesLine(unitType, metrics) {
if ((metrics.changedFiles ?? 0) > 0) {
return `Working tree has ${metrics.changedFiles} new changed file(s) since this unit started. Active edits are not automatically healthy progress; check for repeated or broad churn.`;
}
if (unitType === "execute-task" && metrics.worktreeChangedSinceStart) {
return "Working tree has 0 new changed file paths, but dirty file content changed since this execute-task started.";
}
if (unitType === "execute-task") {
return "Working tree has 0 new changed files since this execute-task started. For implementation work, that is no durable progress yet.";
}
return "";
}
function formatMetricSummary(metrics) {
return [
`${metrics.toolCalls} tool calls`,
`${formatTokenCount(metrics.sessionTokens)} tokens`,
`${Math.round(metrics.elapsedMs / 60000)}min elapsed`,
metrics.changedFiles !== undefined
? `${metrics.changedFiles} new changed files`
: "",
metrics.worktreeChangedSinceStart ? "dirty file content changed" : "",
]
.filter(Boolean)
.join(", ");
}
function parsePorcelainPath(line) {
if (line.length < 4) return null;
let filePath = line.slice(3);
const renameSeparator = " -> ";
if (filePath.includes(renameSeparator)) {
filePath = filePath.slice(
filePath.lastIndexOf(renameSeparator) + renameSeparator.length,
);
}
if (filePath.startsWith('"') && filePath.endsWith('"')) {
filePath = filePath.slice(1, -1);
}
return filePath || null;
}
function numeric(value) {
return typeof value === "number" && Number.isFinite(value) ? value : 0;
}

View file

@ -0,0 +1,85 @@
/**
* Unit closeout helper consolidates the repeated pattern of
* snapshotting metrics + saving activity log + extracting memories
* that appears 6+ times in auto.ts.
*/
import { saveActivityLog } from "../activity-log.js";
import { snapshotUnitMetrics } from "../metrics.js";
import { updateSubscriptionTokensUsed } from "../preferences-models.js";
import { writeTurnGitTransaction } from "./gitops.js";
import { logWarning } from "../workflow-logger.js";
/**
* Snapshot metrics, save activity log, and fire-and-forget memory extraction
* for a completed unit. Returns the activity log file path (if any).
*/
export async function closeoutUnit(
ctx,
basePath,
unitType,
unitId,
startedAt,
opts,
) {
const provider = ctx.model?.provider;
const id = ctx.model?.id;
const modelId = provider && id ? `${provider}/${id}` : (id ?? "unknown");
const unit = snapshotUnitMetrics(
ctx,
unitType,
unitId,
startedAt,
modelId,
opts,
);
// Track subscription token consumption for amortized cost reporting.
// Fire-and-forget: updateSubscriptionTokensUsed is already best-effort.
if (provider && unit && unit.tokens.total > 0) {
updateSubscriptionTokensUsed(provider, unit.tokens.total);
}
const activityFile = saveActivityLog(ctx, basePath, unitType, unitId);
if (activityFile) {
try {
const { buildMemoryLLMCall, extractMemoriesFromUnit } = await import(
"./memory-extractor.js"
);
const llmCallFn = buildMemoryLLMCall(ctx);
if (llmCallFn) {
extractMemoriesFromUnit(
activityFile,
unitType,
unitId,
llmCallFn,
).catch((err) => {
logWarning(
"engine",
`memory extraction failed for ${unitType}/${unitId}: ${err.message}`,
);
});
}
} catch (err) {
/* non-fatal */
logWarning(
"engine",
`operation failed: ${err instanceof Error ? err.message : String(err)}`,
);
}
}
if (opts?.traceId && opts.turnId && opts.gitAction && opts.gitStatus) {
writeTurnGitTransaction({
basePath,
traceId: opts.traceId,
turnId: opts.turnId,
unitType,
unitId,
stage: "record",
action: opts.gitAction,
push: opts.gitPush === true,
status: opts.gitStatus,
error: opts.gitError,
metadata: {
activityFile,
},
});
}
return activityFile ?? undefined;
}

View file

@ -0,0 +1,824 @@
/**
* Post-unit verification gate for autonomous mode.
*
* Runs typecheck/lint/test checks, captures runtime errors, performs
* dependency audits, handles auto-fix retry logic, and writes
* verification evidence JSON.
*
* Extracted from handleAgentEnd() in auto.ts. Returns a sentinel
* value instead of calling return/pauseAuto directly the caller
* checks the result and handles control flow.
*/
import { mkdirSync, writeFileSync } from "node:fs";
import { join } from "node:path";
import { loadFile } from "../files.js";
import { parseRoadmap } from "../parsers.js";
import { resolveMilestoneFile, resolveSlicePath } from "../paths.js";
import { runPostExecutionChecks } from "../post-execution-checks.js";
import { loadEffectiveSFPreferences } from "../preferences.js";
import {
getMilestoneSlices,
getSliceTasks,
getTask,
isDbAvailable,
} from "../sf-db.js";
import { isMilestoneComplete } from "../state.js";
import { isClosedStatus } from "../status-guards.js";
import { parseUnitId } from "../unit-id.js";
import { ChaosMonkeyGate } from "./chaos-monkey.js";
import { CostGuardGate } from "./cost-guard-gate.js";
import { resolveUokFlags } from "./flags.js";
import { UokGateRunner } from "./gate-runner.js";
import { MultiPackageGate } from "./multi-package-gate.js";
import { OutcomeLearningGate } from "./outcome-learning-gate.js";
import { SecurityGate } from "./security-gate.js";
import {
formatExecuteTaskRecoveryStatus,
inspectExecuteTaskDurability,
} from "./unit-runtime.js";
import { extractVerdict } from "../verdict-parser.js";
import { writeVerificationJSON } from "../verification-evidence.js";
import {
captureRuntimeErrors,
formatFailureContext,
runDependencyAudit,
runVerificationGate,
} from "../verification-gate.js";
import { logError, logWarning } from "../workflow-logger.js";
function computeTokenCountFromSession(ctx) {
const entries = ctx.sessionManager?.getEntries?.() ?? [];
let total = 0;
for (const entry of entries) {
if (entry.type !== "message") continue;
const msg = entry.message;
if (!msg || msg.role !== "assistant") continue;
if (msg.usage?.totalTokens != null) {
total += msg.usage.totalTokens;
}
}
return total;
}
function getMemoryPressureMB() {
try {
const mem = process.memoryUsage();
return Math.round(mem.heapUsed / 1024 / 1024);
} catch {
return undefined;
}
}
function buildGateOutcomesSummary(gateIds, gateResults) {
if (!gateIds || !gateResults || gateIds.length === 0) return undefined;
const outcomes = {};
for (let i = 0; i < gateIds.length; i++) {
outcomes[gateIds[i]] = gateResults[i]?.outcome ?? "unknown";
}
return outcomes;
}
function isInfraVerificationFailure(stderr) {
return /\b(ENOENT|ENOTFOUND|ETIMEDOUT|ECONNRESET|EAI_AGAIN|spawn\s+\S+\s+ENOENT|command not found)\b/i.test(
stderr,
);
}
/**
* Post-unit guard for `validate-milestone` units (#4094).
*
* When validate-milestone writes verdict=needs-remediation, the agent is
* expected to also call reassess_roadmap in the same turn to add
* remediation slices. If they don't, the state machine re-derives
* `phase: validating-milestone` indefinitely (all slices still complete +
* verdict still needs-remediation), wasting ~3 dispatches before the stuck
* detector fires.
*
* This guard fires immediately on the first occurrence: if VALIDATION.md
* verdict is needs-remediation and no incomplete slices exist for the
* milestone, pause the auto-loop with a clear blocker.
*/
async function runValidateMilestonePostCheck(vctx, pauseAuto) {
const { s, ctx, pi } = vctx;
const prefs = loadEffectiveSFPreferences()?.preferences;
const uokFlags = resolveUokFlags(prefs);
const persistMilestoneValidationGate = async (
outcome,
failureClass,
rationale,
findings = "",
milestoneId,
) => {
if (!uokFlags.gates || !s.currentUnit) return;
const gateRunner = new UokGateRunner();
gateRunner.register({
id: "milestone-validation-post-check",
type: "verification",
execute: async () => ({
outcome,
failureClass,
rationale,
findings,
}),
});
await gateRunner.run("milestone-validation-post-check", {
basePath: s.basePath,
traceId: `validation-post-check:${s.currentUnit.id}`,
turnId: s.currentUnit.id,
milestoneId,
unitType: s.currentUnit.type,
unitId: s.currentUnit.id,
});
};
if (!s.currentUnit) return "continue";
const { milestone: mid } = parseUnitId(s.currentUnit.id);
if (!mid) return "continue";
const validationFile = resolveMilestoneFile(s.basePath, mid, "VALIDATION");
if (!validationFile) return "continue";
const validationContent = await loadFile(validationFile);
if (!validationContent) return "continue";
const verdict = extractVerdict(validationContent);
if (verdict !== "needs-remediation") {
await persistMilestoneValidationGate(
"pass",
"none",
`milestone validation verdict is ${verdict}; no remediation loop risk`,
"",
mid,
);
return "continue";
}
const incompleteSliceCount = await countIncompleteSlices(s.basePath, mid);
// If any non-closed slices exist, the agent successfully queued remediation
// work — proceed normally. The state machine will execute those slices and
// re-validate per the #3596/#3670 fix.
if (incompleteSliceCount > 0) {
await persistMilestoneValidationGate(
"pass",
"none",
`remediation slices present (${incompleteSliceCount}); validation can continue`,
"",
mid,
);
return "continue";
}
ctx.ui.notify(
`Milestone ${mid} validation returned verdict=needs-remediation but no remediation slices were added. Pausing for human review.`,
"error",
);
process.stderr.write(
`validate-milestone: pausing — verdict=needs-remediation with no incomplete slices for ${mid}. ` +
`The agent must call reassess_roadmap to add remediation slices before re-validation.\n`,
);
await persistMilestoneValidationGate(
"manual-attention",
"manual-attention",
"needs-remediation verdict without queued remediation slices",
`No incomplete slices found for ${mid} while verdict=needs-remediation`,
mid,
);
await pauseAuto(ctx, pi);
return "pause";
}
/**
* Count slices for a milestone that are not in a closed status.
* DB-backed projects are authoritative (#4094 peer review); falls back to
* roadmap parsing only when the DB is unavailable.
*/
async function countIncompleteSlices(basePath, milestoneId) {
if (isDbAvailable()) {
const slices = getMilestoneSlices(milestoneId);
if (slices.length === 0) {
// No DB rows — treat as "unknown", do not pause.
return 1;
}
return slices.filter((slice) => !isClosedStatus(slice.status)).length;
}
// Filesystem fallback: parse the roadmap markdown.
try {
const roadmapFile = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
if (!roadmapFile) return 1;
const roadmapContent = await loadFile(roadmapFile);
if (!roadmapContent) return 1;
const roadmap = parseRoadmap(roadmapContent);
if (roadmap.slices.length === 0) return 1;
return isMilestoneComplete(roadmap) ? 0 : 1;
} catch {
// Parsing failures should not cause false-positive pauses.
return 1;
}
}
/**
* Run the verification gate for the current execute-task unit.
* Returns:
* - "continue" gate passed (or no checks configured), proceed normally
* - "retry" gate failed with retries remaining, s.pendingVerificationRetry set for loop re-iteration
* - "pause" gate failed with retries exhausted, pauseAuto already called
*/
export async function runPostUnitVerification(vctx, pauseAuto) {
const { s, ctx, pi } = vctx;
if (!s.currentUnit) {
return "continue";
}
if (s.currentUnit.type === "validate-milestone") {
return await runValidateMilestonePostCheck(vctx, pauseAuto);
}
if (s.currentUnit.type !== "execute-task") {
return "continue";
}
// ── Zone 1: Gate machinery (outer try) ──────────────────────────────────
// Failures here indicate broken infrastructure — pause for human review.
let prefs;
let uokFlags;
let mid;
let sid;
let tid;
let result;
try {
const effectivePrefs = loadEffectiveSFPreferences();
prefs = effectivePrefs?.preferences;
uokFlags = resolveUokFlags(prefs);
// Read task plan verify field
({ milestone: mid, slice: sid, task: tid } = parseUnitId(s.currentUnit.id));
let taskPlanVerify;
if (mid && sid && tid) {
if (isDbAvailable()) {
taskPlanVerify = getTask(mid, sid, tid)?.verify;
}
// When DB unavailable, taskPlanVerify stays undefined — gate runs without task-specific checks
}
result = runVerificationGate({
cwd: s.basePath,
preferenceCommands: prefs?.verification_commands,
taskPlanVerify,
});
// Handle skipped gate (no commands discovered) — fail-closed but not a hard failure
if (result.skipped === true) {
process.stderr.write(
"verification-gate: no commands discovered — gate skipped, not passed\n",
);
ctx.ui.notify(
"[verify] SKIP — no verification commands configured",
"warning",
);
return "continue";
}
// Capture runtime errors
const runtimeErrors = await captureRuntimeErrors();
if (runtimeErrors.length > 0) {
result.runtimeErrors = runtimeErrors;
if (runtimeErrors.some((e) => e.blocking)) {
result.passed = false;
}
}
// Dependency audit
const auditWarnings = runDependencyAudit(s.basePath);
if (auditWarnings.length > 0) {
result.auditWarnings = auditWarnings;
process.stderr.write(
`verification-gate: ${auditWarnings.length} audit warning(s)\n`,
);
for (const w of auditWarnings) {
process.stderr.write(` [${w.severity}] ${w.name}: ${w.title}\n`);
}
}
} catch (machineryErr) {
logError(
"engine",
`verification-gate machinery error — pausing for human review: ${machineryErr.message}`,
);
ctx.ui.notify(
"verification-gate machinery error — pausing for human review",
"error",
);
await pauseAuto(ctx, pi);
return "pause";
}
// ── Zone 2: Ancillary post-gate work (inner try) ─────────────────────────
// Failures here are non-fatal — evidence writes, UOK gate calls, notifications, retry logic.
let gateIds = [];
let gateResults = [];
try {
if (uokFlags.gates) {
const gateRunner = new UokGateRunner();
gateRunner.register({
id: "verification-gate",
type: "verification",
execute: async () => ({
outcome: result.passed ? "pass" : "fail",
failureClass: result.runtimeErrors?.some((e) => e.blocking)
? "execution"
: "verification",
rationale: result.passed
? "verification checks passed"
: "verification checks failed",
findings: result.passed ? "" : formatFailureContext(result),
}),
});
if (uokFlags.securityGuard) {
gateRunner.register(new SecurityGate());
}
if (uokFlags.multiPackageHealing) {
gateRunner.register(new MultiPackageGate());
}
if (uokFlags.autonomousCostGuard) {
gateRunner.register(new CostGuardGate());
}
if (uokFlags.outcomeLearning) {
gateRunner.register(new OutcomeLearningGate());
}
if (uokFlags.chaosMonkey) {
gateRunner.register(new ChaosMonkeyGate({ active: true }));
}
const baseCtx = {
basePath: s.basePath,
traceId: `verification:${s.currentUnit.id}`,
turnId: s.currentUnit.id,
milestoneId: mid ?? undefined,
sliceId: sid ?? undefined,
taskId: tid ?? undefined,
unitType: s.currentUnit.type,
unitId: s.currentUnit.id,
iteration: s.verificationRetryCount.get(s.currentUnit.id) ?? 0,
};
gateIds = gateRunner.list().map((g) => g.id);
gateResults = await Promise.all(
gateIds.map((id) =>
gateRunner
.run(id, {
...baseCtx,
traceId: `${id}:${s.currentUnit.id}`,
})
.catch((err) => ({
outcome: "fail",
failureClass: "unknown",
rationale: `Gate ${id} threw: ${err instanceof Error ? err.message : String(err)}`,
})),
),
);
for (let i = 0; i < gateIds.length; i++) {
const id = gateIds[i];
const res = gateResults[i];
if (res.outcome !== "fail") continue;
result.passed = false;
if (id === "security-guard") {
result.securityFailure = true;
result.securityRationale = res.rationale;
result.securityFindings = res.findings;
} else if (id === "multi-package-healing") {
result.multiPackageFailure = true;
result.multiPackageRationale = res.rationale;
result.multiPackageFindings = res.findings;
} else if (id === "cost-guard") {
result.costGuardFailure = true;
result.costGuardRationale = res.rationale;
} else if (id === "chaos-monkey") {
result.chaosMonkeyFailure = true;
result.chaosMonkeyRationale = res.rationale;
}
}
}
// Auto-fix retry preferences
const autoFixEnabled = prefs?.verification_auto_fix !== false;
const maxRetries =
typeof prefs?.verification_max_retries === "number"
? prefs.verification_max_retries
: 2;
if (result.checks.length > 0) {
const passCount = result.checks.filter((c) => c.exitCode === 0).length;
const total = result.checks.length;
const commandList = result.checks.map((c) => c.command).join(" | ");
ctx.ui.notify(`[verify] running: ${commandList}`, "info");
const attemptSoFar = s.verificationRetryCount.get(s.currentUnit.id) ?? 0;
if (result.passed) {
ctx.ui.notify(`[verify] PASS - ${passCount}/${total} checks`, "info");
} else {
const failures = result.checks.filter((c) => c.exitCode !== 0);
const failNames = failures.map((f) => f.command).join(", ");
const nextAttempt = attemptSoFar + 1;
ctx.ui.notify(
`[verify] FAIL - ${failNames} (auto-fix attempt ${nextAttempt}/${maxRetries})`,
"info",
);
process.stderr.write(
`verification-gate: ${total - passCount}/${total} checks failed\n`,
);
for (const f of failures) {
process.stderr.write(` ${f.command} exited ${f.exitCode}\n`);
if (f.stderr)
process.stderr.write(` stderr: ${f.stderr.slice(0, 500)}\n`);
}
}
}
// Log blocking runtime errors
if (result.runtimeErrors?.some((e) => e.blocking)) {
const blockingErrors = result.runtimeErrors.filter((e) => e.blocking);
process.stderr.write(
`verification-gate: ${blockingErrors.length} blocking runtime error(s) detected\n`,
);
for (const err of blockingErrors) {
process.stderr.write(
` [${err.source}] ${err.severity}: ${err.message.slice(0, 200)}\n`,
);
}
}
// Log security failures
if (result.securityFailure) {
ctx.ui.notify(
`[verify] SECURITY FAIL — ${result.securityRationale}`,
"error",
);
process.stderr.write(
`verification-gate: security failure: ${result.securityRationale}\n`,
);
if (result.securityFindings) {
process.stderr.write(`${result.securityFindings}\n`);
}
}
// Log multi-package failures
if (result.multiPackageFailure) {
ctx.ui.notify(
`[verify] MULTI-PACKAGE FAIL — ${result.multiPackageRationale}`,
"error",
);
process.stderr.write(
`verification-gate: multi-package healing failure: ${result.multiPackageRationale}\n`,
);
if (result.multiPackageFindings) {
process.stderr.write(`${result.multiPackageFindings}\n`);
}
}
// Log cost-guard failures
if (result.costGuardFailure) {
ctx.ui.notify(
`[verify] COST-GUARD FAIL — ${result.costGuardRationale}`,
"error",
);
process.stderr.write(
`verification-gate: cost-guard failure: ${result.costGuardRationale}\n`,
);
}
// Log chaos-monkey failures
if (result.chaosMonkeyFailure) {
ctx.ui.notify(
`[verify] CHAOS-MONKEY FAIL — ${result.chaosMonkeyRationale}`,
"error",
);
process.stderr.write(
`verification-gate: chaos-monkey injected failure: ${result.chaosMonkeyRationale}\n`,
);
}
// Write verification evidence JSON
const attempt = s.verificationRetryCount.get(s.currentUnit.id) ?? 0;
const tokenCount = computeTokenCountFromSession(ctx);
const memoryPressureMB = getMemoryPressureMB();
const gateOutcomes = buildGateOutcomesSummary(gateIds, gateResults);
let recoveryStatus;
try {
const durability = await inspectExecuteTaskDurability(
s.basePath,
s.currentUnit.id,
);
if (durability) {
recoveryStatus = formatExecuteTaskRecoveryStatus(durability);
}
} catch {
recoveryStatus = undefined;
}
if (mid && sid && tid) {
try {
const sDir = resolveSlicePath(s.basePath, mid, sid);
if (sDir) {
const tasksDir = join(sDir, "tasks");
if (result.passed) {
writeVerificationJSON(
result,
tasksDir,
tid,
s.currentUnit.id,
undefined,
undefined,
tokenCount,
memoryPressureMB,
gateOutcomes,
recoveryStatus,
);
} else {
const nextAttempt = attempt + 1;
writeVerificationJSON(
result,
tasksDir,
tid,
s.currentUnit.id,
nextAttempt,
maxRetries,
tokenCount,
memoryPressureMB,
gateOutcomes,
recoveryStatus,
);
}
}
} catch (evidenceErr) {
logWarning(
"engine",
`verification-evidence write error: ${evidenceErr.message}`,
);
}
}
const advisoryFailure =
!result.passed &&
(result.discoverySource === "package-json" ||
result.checks.some((check) =>
isInfraVerificationFailure(check.stderr),
));
if (advisoryFailure) {
s.verificationRetryCount.delete(s.currentUnit.id);
s.pendingVerificationRetry = null;
ctx.ui.notify(
result.discoverySource === "package-json"
? "Verification failed in auto-discovered package.json checks — treating as advisory."
: "Verification failed due to infrastructure/runtime environment issues — treating as advisory.",
"warning",
);
return "continue";
}
// ── Post-execution checks (run after main verification passes for execute-task units) ──
let postExecChecks;
let postExecBlockingFailure = false;
if (result.passed && mid && sid && tid) {
// Check preferences — respect enhanced_verification and enhanced_verification_post
const enhancedEnabled = prefs?.enhanced_verification !== false; // default true
const postEnabled = prefs?.enhanced_verification_post !== false; // default true
if (enhancedEnabled && postEnabled && isDbAvailable()) {
try {
// Get the completed task from DB
const taskRow = getTask(mid, sid, tid);
if (taskRow && taskRow.key_files && taskRow.key_files.length > 0) {
// Get all tasks in the slice
const allTasks = getSliceTasks(mid, sid);
// Filter to prior completed tasks (status = 'complete' or 'done', before current task)
const priorTasks = allTasks.filter(
(t) =>
(t.status === "complete" || t.status === "done") &&
t.id !== tid &&
t.sequence < taskRow.sequence,
);
// Run post-execution checks
const postExecResult = runPostExecutionChecks(
taskRow,
priorTasks,
s.basePath,
);
// Store checks for evidence JSON
postExecChecks = postExecResult.checks;
// Log summary to stderr with sf-post-exec: prefix
const emoji =
postExecResult.status === "pass"
? "✅"
: postExecResult.status === "warn"
? "⚠️"
: "❌";
process.stderr.write(
`sf-post-exec: ${emoji} Post-execution checks ${postExecResult.status} for ${mid}/${sid}/${tid} (${postExecResult.durationMs}ms)\n`,
);
// Log individual check results
for (const check of postExecResult.checks) {
const checkEmoji = check.passed
? "✓"
: check.blocking
? "✗"
: "⚠";
process.stderr.write(
`sf-post-exec: ${checkEmoji} [${check.category}] ${check.target}: ${check.message}\n`,
);
}
if (uokFlags.gates) {
const strictMode = prefs?.enhanced_verification_strict === true;
const warnEscalated =
postExecResult.status === "warn" && strictMode;
const blockingFailure =
postExecResult.status === "fail" || warnEscalated;
const findings = postExecResult.checks
.filter((check) => !check.passed)
.map(
(check) =>
`[${check.category}] ${check.target}: ${check.message}`,
)
.join("\n");
const gateRunner = new UokGateRunner();
gateRunner.register({
id: "post-execution-checks",
type: "artifact",
execute: async () => ({
outcome: blockingFailure ? "fail" : "pass",
failureClass:
postExecResult.status === "fail"
? "artifact"
: warnEscalated
? "policy"
: "none",
rationale: blockingFailure
? `post-execution checks ${postExecResult.status}${warnEscalated ? " (strict)" : ""}`
: "post-execution checks passed",
findings,
}),
});
await gateRunner.run("post-execution-checks", {
basePath: s.basePath,
traceId: `verification:${s.currentUnit.id}`,
turnId: s.currentUnit.id,
milestoneId: mid,
sliceId: sid,
taskId: tid,
unitType: s.currentUnit.type,
unitId: s.currentUnit.id,
});
}
// Check for blocking failures
if (postExecResult.status === "fail") {
postExecBlockingFailure = true;
const blockingCount = postExecResult.checks.filter(
(c) => !c.passed && c.blocking,
).length;
ctx.ui.notify(
`Post-execution checks failed: ${blockingCount} blocking issue${blockingCount === 1 ? "" : "s"} found`,
"error",
);
} else if (postExecResult.status === "warn") {
ctx.ui.notify(
`Post-execution checks passed with warnings`,
"warning",
);
// Strict mode: treat warnings as blocking
if (prefs?.enhanced_verification_strict === true) {
postExecBlockingFailure = true;
}
}
}
} catch (postExecErr) {
// Post-execution check errors are non-fatal — log and continue
logWarning("engine", `sf-post-exec: error — ${postExecErr.message}`);
}
}
}
// Re-write verification evidence JSON with post-execution checks
if (postExecChecks && postExecChecks.length > 0 && mid && sid && tid) {
try {
const sDir = resolveSlicePath(s.basePath, mid, sid);
if (sDir) {
const tasksDir = join(sDir, "tasks");
// Add postExecutionChecks to the result for the JSON write
const resultWithPostExec = {
...result,
// Mark as failed if there was a blocking post-exec failure
passed: result.passed && !postExecBlockingFailure,
};
// Manually write with postExecutionChecks field
writeVerificationJSONWithPostExec(
resultWithPostExec,
tasksDir,
tid,
s.currentUnit.id,
postExecChecks,
postExecBlockingFailure ? attempt + 1 : undefined,
postExecBlockingFailure ? maxRetries : undefined,
tokenCount,
memoryPressureMB,
gateOutcomes,
recoveryStatus,
);
}
} catch (evidenceErr) {
logWarning(
"engine",
`verification-evidence: post-exec write error — ${evidenceErr.message}`,
);
}
}
// Update result.passed based on post-execution checks
if (postExecBlockingFailure) {
result.passed = false;
}
// ── Auto-fix retry logic ──
if (result.passed) {
s.verificationRetryCount.delete(s.currentUnit.id);
s.pendingVerificationRetry = null;
return "continue";
} else if (postExecBlockingFailure) {
// Post-execution failures are cross-task consistency issues — retrying the same task won't fix them.
// Skip retry and pause immediately for human review.
s.verificationRetryCount.delete(s.currentUnit.id);
s.pendingVerificationRetry = null;
ctx.ui.notify(
`Post-execution checks failed — cross-task consistency issue detected, pausing for human review`,
"error",
);
await pauseAuto(ctx, pi);
return "pause";
} else if (autoFixEnabled && attempt + 1 <= maxRetries) {
const nextAttempt = attempt + 1;
s.verificationRetryCount.set(s.currentUnit.id, nextAttempt);
s.pendingVerificationRetry = {
unitId: s.currentUnit.id,
failureContext: formatFailureContext(result),
attempt: nextAttempt,
};
const failedCmds = result.checks
.filter((c) => c.exitCode !== 0)
.map((c) => c.command);
const cmdSummary =
failedCmds.length <= 3
? failedCmds.join(", ")
: `${failedCmds.slice(0, 3).join(", ")}... and ${failedCmds.length - 3} more`;
ctx.ui.notify(
`Verification failed (${cmdSummary}) — auto-fix attempt ${nextAttempt}/${maxRetries}`,
"warning",
);
// Return "retry" — the autoLoop while loop will re-iterate with the retry context
return "retry";
} else {
// Gate failed, retries exhausted
s.verificationRetryCount.delete(s.currentUnit.id);
s.pendingVerificationRetry = null;
const exhaustedFails = result.checks
.filter((c) => c.exitCode !== 0)
.map((c) => c.command);
const exhaustedSummary =
exhaustedFails.length <= 3
? exhaustedFails.join(", ")
: `${exhaustedFails.slice(0, 3).join(", ")}... and ${exhaustedFails.length - 3} more`;
ctx.ui.notify(
`Verification gate FAILED after ${attempt} ${attempt === 1 ? "retry" : "retries"} (${exhaustedSummary}) — pausing for human review`,
"error",
);
await pauseAuto(ctx, pi);
return "pause";
}
} catch (err) {
// Ancillary post-gate errors are non-fatal — log warning and continue
logWarning("engine", `verification-gate error: ${err.message}`);
return "continue";
}
}
/**
* Write verification evidence JSON with post-execution checks included.
* This is a variant of writeVerificationJSON that adds the postExecutionChecks field.
*/
function writeVerificationJSONWithPostExec(
result,
tasksDir,
taskId,
unitId,
postExecutionChecks,
retryAttempt,
maxRetries,
tokenCount,
memoryPressureMB,
gateOutcomes,
recoveryStatus,
) {
mkdirSync(tasksDir, { recursive: true });
const evidence = {
schemaVersion: 1,
taskId,
unitId: unitId ?? taskId,
timestamp: result.timestamp,
passed: result.passed,
discoverySource: result.discoverySource,
checks: result.checks.map((check) => ({
command: check.command,
exitCode: check.exitCode,
durationMs: check.durationMs,
verdict: check.exitCode === 0 ? "pass" : "fail",
})),
...(retryAttempt !== undefined ? { retryAttempt } : {}),
...(maxRetries !== undefined ? { maxRetries } : {}),
...(tokenCount !== undefined ? { tokenCount } : {}),
...(memoryPressureMB !== undefined ? { memoryPressureMB } : {}),
...(gateOutcomes !== undefined ? { gateOutcomes } : {}),
...(recoveryStatus !== undefined ? { recoveryStatus } : {}),
postExecutionChecks,
};
if (result.runtimeErrors && result.runtimeErrors.length > 0) {
evidence.runtimeErrors = result.runtimeErrors.map((e) => ({
source: e.source,
severity: e.severity,
message: e.message,
blocking: e.blocking,
}));
}
if (result.auditWarnings && result.auditWarnings.length > 0) {
evidence.auditWarnings = result.auditWarnings.map((w) => ({
name: w.name,
severity: w.severity,
title: w.title,
url: w.url,
fixAvailable: w.fixAvailable,
}));
}
const filePath = join(tasksDir, `${taskId}-VERIFY.json`);
writeFileSync(filePath, JSON.stringify(evidence, null, 2) + "\n", "utf-8");
}

View file

@ -222,3 +222,37 @@ export {
nextWriteRecord,
releaseWriterToken,
} from "./writer.js";
// ─── Autonomous Dispatch ───────────────────────────────────────────────────
export {
DISPATCH_RULES,
enhanceUnitRankingWithMemory,
extractValidationAttentionPlan,
formatTaskCompleteFailurePrompt,
getDispatchRuleNames,
getRewriteCount,
getUatCount,
incrementUatCount,
isVerificationNotApplicable,
resolveDispatch,
setRewriteCount,
} from "./auto-dispatch.js";
// ─── Runaway Guard ────────────────────────────────────────────────────────
export {
clearRunawayGuardState,
collectSessionTokenUsage,
collectWorktreeFingerprint,
countChangedFiles,
DEFAULT_RUNAWAY_CHANGED_FILES_WARNING,
DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS,
DEFAULT_RUNAWAY_ELAPSED_MINUTES,
DEFAULT_RUNAWAY_MIN_INTERVAL_MS,
DEFAULT_RUNAWAY_TOOL_CALL_WARNING,
DEFAULT_RUNAWAY_TOKEN_WARNING,
evaluateRunawayGuard,
resetRunawayGuardState,
resolveRunawayGuardConfig,
} from "./auto-runaway-guard.js";
// ─── Unit Closeout ────────────────────────────────────────────────────────
export { closeoutUnit } from "./auto-unit-closeout.js";
// ─── Post-Unit Verification ────────────────────────────────────────────────
export { runPostUnitVerification } from "./auto-verification.js";