refactor(uok): move auto-dispatch, auto-verification, auto-runaway-guard, auto-unit-closeout into sf/uok/
Per checkpoint-008/009 next-steps: these 4 autonomous-loop modules belong in
the UOK subsystem alongside the other orchestration primitives.
- auto-dispatch.js → uok/auto-dispatch.js
- Dispatch table + resolveDispatch() is a core UOK orchestration primitive
- Updated 3 static importers + 1 dynamic await import + 3 test files
- auto-verification.js → uok/auto-verification.js
- Post-unit verification gate delegates to UOK gates (ChaosMonkey, Security,
CostGuard, OutcomeLearning, etc.)
- Updated 1 importer (auto.js)
- auto-runaway-guard.js → uok/auto-runaway-guard.js
- Diagnostic budget guard; no local relative imports
- Updated 4 importers (auto-timers.js, preferences-models.js, auto/phases.js,
auto/run-unit.js)
- auto-unit-closeout.js → uok/auto-unit-closeout.js
- Unit metrics snapshot + activity log + memory extraction helper
- Updated 3 importers (auto-timers.js, auto-post-unit.js, auto.js)
Each original file is now a 1-line re-export shim preserving public API.
All 4 are added to uok/index.js as the UOK barrel.
26 dispatch tests pass; full unit suite 4374 tests pass.
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
adb449d642
commit
70afabedb7
19 changed files with 3504 additions and 3412 deletions
File diff suppressed because it is too large
Load diff
|
|
@ -19,7 +19,7 @@ import {
|
|||
writeBlockerPlaceholder,
|
||||
} from "./auto-recovery.js";
|
||||
import { isDeterministicPolicyError } from "./auto-tool-tracking.js";
|
||||
import { closeoutUnit } from "./auto-unit-closeout.js";
|
||||
import { closeoutUnit } from "./uok/auto-unit-closeout.js";
|
||||
import { runSafely } from "./auto-utils.js";
|
||||
import { syncStateToProjectRoot } from "./auto-worktree.js";
|
||||
import { invalidateAllCaches } from "./cache.js";
|
||||
|
|
@ -754,7 +754,7 @@ export async function postUnitPreVerification(pctx, opts) {
|
|||
await resolveAllOverrides(s.basePath);
|
||||
// Reset both disk and in-memory counters. Disk counter is authoritative
|
||||
// (survives restarts); in-memory is kept in sync for the current session.
|
||||
const { setRewriteCount } = await import("./auto-dispatch.js");
|
||||
const { setRewriteCount } = await import("./uok/auto-dispatch.js");
|
||||
setRewriteCount(s.basePath, 0);
|
||||
s.rewriteAttemptCount = 0;
|
||||
ctx.ui.notify("Override(s) resolved — rewrite-docs completed.", "info");
|
||||
|
|
|
|||
|
|
@ -1,433 +1,22 @@
|
|||
/**
|
||||
* Diagnostic budget guard for unusually long autonomous mode units.
|
||||
* auto-runaway-guard.ts — Barrel re-export for the UOK runaway guard.
|
||||
*
|
||||
* This is intentionally not a blind tool-count kill switch. It gives the agent
|
||||
* explicit turns to explain whether the unit is legitimately large, stuck, or
|
||||
* churning, then pauses only if the unit keeps consuming budget afterward.
|
||||
* The implementation has moved into the UOK subsystem under uok/auto-runaway-guard.js.
|
||||
* This file preserves the original public API so external consumers
|
||||
* continue to work without changes.
|
||||
*/
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { createHash } from "node:crypto";
|
||||
import { existsSync, lstatSync, readdirSync, readFileSync } from "node:fs";
|
||||
import { formatTokenCount } from "@singularity-forge/coding-agent";
|
||||
export const DEFAULT_RUNAWAY_TOOL_CALL_WARNING = 60;
|
||||
export const DEFAULT_RUNAWAY_TOKEN_WARNING = 1_000_000;
|
||||
export const DEFAULT_RUNAWAY_ELAPSED_MINUTES = 20;
|
||||
export const DEFAULT_RUNAWAY_CHANGED_FILES_WARNING = 75;
|
||||
export const DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS = 2;
|
||||
export const DEFAULT_RUNAWAY_MIN_INTERVAL_MS = 120_000;
|
||||
const EXECUTE_NO_PROGRESS_TOOL_WARNING = 25;
|
||||
const EXECUTE_NO_PROGRESS_TOKEN_WARNING = 500_000;
|
||||
const DURABLE_SF_ARTIFACT_PATHS = [
|
||||
".sf/milestones",
|
||||
".sf/approvals",
|
||||
];
|
||||
let state = null;
|
||||
export function resetRunawayGuardState(unitType, unitId, baseline) {
|
||||
state = {
|
||||
unitKey: `${unitType}/${unitId}`,
|
||||
baselineSessionTokens: baseline?.sessionTokens ?? 0,
|
||||
baselineChangedFiles: baseline?.changedFiles ?? 0,
|
||||
baselineWorktreeFingerprint: baseline?.worktreeFingerprint ?? null,
|
||||
warningsSent: 0,
|
||||
lastWarningAt: 0,
|
||||
lastToolCalls: 0,
|
||||
lastSessionTokens: 0,
|
||||
lastElapsedMs: 0,
|
||||
finalWarningSent: false,
|
||||
};
|
||||
}
|
||||
export function clearRunawayGuardState() {
|
||||
state = null;
|
||||
}
|
||||
export function resolveRunawayGuardConfig(supervisor) {
|
||||
return {
|
||||
enabled: supervisor?.runaway_guard_enabled !== false,
|
||||
toolCallWarning:
|
||||
supervisor?.runaway_tool_call_warning ??
|
||||
DEFAULT_RUNAWAY_TOOL_CALL_WARNING,
|
||||
tokenWarning:
|
||||
supervisor?.runaway_token_warning ?? DEFAULT_RUNAWAY_TOKEN_WARNING,
|
||||
elapsedMs:
|
||||
(supervisor?.runaway_elapsed_minutes ?? DEFAULT_RUNAWAY_ELAPSED_MINUTES) *
|
||||
60 *
|
||||
1000,
|
||||
changedFilesWarning:
|
||||
supervisor?.runaway_changed_files_warning ??
|
||||
DEFAULT_RUNAWAY_CHANGED_FILES_WARNING,
|
||||
diagnosticTurns:
|
||||
supervisor?.runaway_diagnostic_turns ?? DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS,
|
||||
hardPause: supervisor?.runaway_hard_pause !== false,
|
||||
minIntervalMs: DEFAULT_RUNAWAY_MIN_INTERVAL_MS,
|
||||
};
|
||||
}
|
||||
export function collectSessionTokenUsage(ctx) {
|
||||
try {
|
||||
const entries = ctx.sessionManager?.getEntries?.() ?? [];
|
||||
let total = 0;
|
||||
for (const entry of entries) {
|
||||
const message = entry.message;
|
||||
if (message?.role !== "assistant" || !message.usage) continue;
|
||||
const usage = message.usage;
|
||||
const totalTokens = numeric(usage.totalTokens ?? usage.total);
|
||||
if (totalTokens > 0) {
|
||||
total += totalTokens;
|
||||
continue;
|
||||
}
|
||||
total +=
|
||||
numeric(usage.input) +
|
||||
numeric(usage.output) +
|
||||
numeric(usage.cacheRead) +
|
||||
numeric(usage.cacheWrite);
|
||||
}
|
||||
return total;
|
||||
} catch {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
export function countChangedFiles(cwd) {
|
||||
try {
|
||||
const out = execFileSync("git", ["status", "--short"], {
|
||||
cwd,
|
||||
encoding: "utf8",
|
||||
stdio: ["ignore", "pipe", "ignore"],
|
||||
timeout: 2000,
|
||||
});
|
||||
return out
|
||||
.split("\n")
|
||||
.map((line) => line.trim())
|
||||
.filter(Boolean).length;
|
||||
} catch {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
export function collectWorktreeFingerprint(cwd) {
|
||||
try {
|
||||
const status = execFileSync(
|
||||
"git",
|
||||
["status", "--porcelain=v1", "--untracked-files=all"],
|
||||
{
|
||||
cwd,
|
||||
encoding: "utf8",
|
||||
stdio: ["ignore", "pipe", "ignore"],
|
||||
timeout: 2000,
|
||||
},
|
||||
);
|
||||
const lines = status
|
||||
.split("\n")
|
||||
.map((line) => line.trimEnd())
|
||||
.filter(Boolean);
|
||||
const hash = createHash("sha256");
|
||||
if (lines.length === 0) {
|
||||
hash.update("git-clean");
|
||||
hash.update("\0");
|
||||
}
|
||||
for (const line of lines) {
|
||||
hash.update(line);
|
||||
hash.update("\0");
|
||||
const filePath = parsePorcelainPath(line);
|
||||
if (!filePath) continue;
|
||||
appendFileFingerprint(hash, cwd, filePath);
|
||||
}
|
||||
appendDurableSfArtifactFingerprint(hash, cwd);
|
||||
return hash.digest("hex");
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
function appendDurableSfArtifactFingerprint(hash, cwd) {
|
||||
hash.update("sf-artifacts");
|
||||
hash.update("\0");
|
||||
for (const artifactPath of DURABLE_SF_ARTIFACT_PATHS) {
|
||||
appendPathFingerprint(hash, cwd, artifactPath);
|
||||
}
|
||||
}
|
||||
function appendPathFingerprint(hash, cwd, relativePath) {
|
||||
const fullPath = `${cwd}/${relativePath}`;
|
||||
if (!existsSync(fullPath)) {
|
||||
hash.update(`missing:${relativePath}`);
|
||||
hash.update("\0");
|
||||
return;
|
||||
}
|
||||
let stat;
|
||||
try {
|
||||
stat = lstatSync(fullPath);
|
||||
} catch {
|
||||
hash.update(`unreadable:${relativePath}`);
|
||||
hash.update("\0");
|
||||
return;
|
||||
}
|
||||
if (stat.isDirectory()) {
|
||||
hash.update(`dir:${relativePath}`);
|
||||
hash.update("\0");
|
||||
let entries;
|
||||
try {
|
||||
entries = readdirSync(fullPath).sort();
|
||||
} catch {
|
||||
hash.update(`unreadable-dir:${relativePath}`);
|
||||
hash.update("\0");
|
||||
return;
|
||||
}
|
||||
for (const entry of entries) {
|
||||
appendPathFingerprint(hash, cwd, `${relativePath}/${entry}`);
|
||||
}
|
||||
return;
|
||||
}
|
||||
appendFileFingerprint(hash, cwd, relativePath);
|
||||
}
|
||||
function appendFileFingerprint(hash, cwd, relativePath) {
|
||||
try {
|
||||
const stat = lstatSync(`${cwd}/${relativePath}`);
|
||||
if (!stat.isFile()) {
|
||||
hash.update(
|
||||
`type:${relativePath}:${stat.isDirectory() ? "dir" : "other"}`,
|
||||
);
|
||||
hash.update("\0");
|
||||
return;
|
||||
}
|
||||
hash.update(`file:${relativePath}`);
|
||||
hash.update("\0");
|
||||
hash.update(readFileSync(`${cwd}/${relativePath}`));
|
||||
hash.update("\0");
|
||||
} catch {
|
||||
hash.update(`unreadable-or-deleted:${relativePath}`);
|
||||
hash.update("\0");
|
||||
}
|
||||
}
|
||||
export function evaluateRunawayGuard(
|
||||
unitType,
|
||||
unitId,
|
||||
metrics,
|
||||
config,
|
||||
now = Date.now(),
|
||||
) {
|
||||
if (!config.enabled) return { action: "none" };
|
||||
if (config.diagnosticTurns <= 0) return { action: "none" };
|
||||
const unitKey = `${unitType}/${unitId}`;
|
||||
if (!state || state.unitKey !== unitKey)
|
||||
resetRunawayGuardState(unitType, unitId);
|
||||
const s = state;
|
||||
const unitMetrics = normalizeMetricsToUnit(metrics, s);
|
||||
const reasons = thresholdReasons(unitType, unitMetrics, config);
|
||||
if (reasons.length === 0) return { action: "none" };
|
||||
if (
|
||||
s.lastWarningAt > 0 &&
|
||||
now - s.lastWarningAt < config.minIntervalMs &&
|
||||
!hasMeaningfulGrowth(unitMetrics, s, config)
|
||||
) {
|
||||
return { action: "none" };
|
||||
}
|
||||
// Skip hard-pause if the unit is making file-change progress — growth with
|
||||
// changes is legitimate diagnostic/planning work, not a stuck loop.
|
||||
// Without this check, discuss/plan phases that legitimately consume tokens
|
||||
// while writing summaries/plans would be hard-paused despite making progress.
|
||||
if (
|
||||
(unitMetrics.changedFiles ?? 0) > 0 ||
|
||||
unitMetrics.worktreeChangedSinceStart === true
|
||||
) {
|
||||
return { action: "none" };
|
||||
}
|
||||
if (
|
||||
config.hardPause &&
|
||||
s.finalWarningSent &&
|
||||
hasMeaningfulGrowth(unitMetrics, s, config)
|
||||
) {
|
||||
const reason =
|
||||
`Runaway guard paused ${unitType} ${unitId}: budget kept growing after ` +
|
||||
`${config.diagnosticTurns} diagnostic turn(s). ` +
|
||||
formatMetricSummary(unitMetrics);
|
||||
return {
|
||||
action: "pause",
|
||||
reason,
|
||||
metadata: {
|
||||
reason,
|
||||
pausedAt: now,
|
||||
unitType,
|
||||
unitId,
|
||||
diagnosticTurns: config.diagnosticTurns,
|
||||
warningsSent: s.warningsSent,
|
||||
thresholdReasons: reasons,
|
||||
metrics: unitMetrics,
|
||||
lastWarningMetrics: {
|
||||
toolCalls: s.lastToolCalls,
|
||||
sessionTokens: s.lastSessionTokens,
|
||||
elapsedMs: s.lastElapsedMs,
|
||||
},
|
||||
thresholds: {
|
||||
toolCallWarning: config.toolCallWarning,
|
||||
tokenWarning: config.tokenWarning,
|
||||
elapsedMs: config.elapsedMs,
|
||||
changedFilesWarning: config.changedFilesWarning,
|
||||
minIntervalMs: config.minIntervalMs,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
const final = s.warningsSent + 1 >= config.diagnosticTurns;
|
||||
s.warningsSent++;
|
||||
s.lastWarningAt = now;
|
||||
s.lastToolCalls = unitMetrics.toolCalls;
|
||||
s.lastSessionTokens = unitMetrics.sessionTokens;
|
||||
s.lastElapsedMs = unitMetrics.elapsedMs;
|
||||
if (final) s.finalWarningSent = true;
|
||||
return {
|
||||
action: "warn",
|
||||
final,
|
||||
message: buildRunawayGuardMessage(
|
||||
unitType,
|
||||
unitId,
|
||||
unitMetrics,
|
||||
reasons,
|
||||
final,
|
||||
),
|
||||
};
|
||||
}
|
||||
function normalizeMetricsToUnit(metrics, state) {
|
||||
const worktreeChangedSinceStart =
|
||||
metrics.worktreeFingerprint !== undefined &&
|
||||
metrics.worktreeFingerprint !== null &&
|
||||
state.baselineWorktreeFingerprint !== null
|
||||
? metrics.worktreeFingerprint !== state.baselineWorktreeFingerprint
|
||||
: metrics.worktreeChangedSinceStart;
|
||||
return {
|
||||
...metrics,
|
||||
sessionTokens: Math.max(
|
||||
0,
|
||||
metrics.sessionTokens - state.baselineSessionTokens,
|
||||
),
|
||||
changedFiles:
|
||||
metrics.changedFiles === undefined
|
||||
? undefined
|
||||
: Math.max(0, metrics.changedFiles - state.baselineChangedFiles),
|
||||
worktreeChangedSinceStart,
|
||||
};
|
||||
}
|
||||
function thresholdReasons(unitType, metrics, config) {
|
||||
const reasons = [];
|
||||
// Primary signal: high tool call count — strong indicator of runaway/churn
|
||||
if (
|
||||
config.toolCallWarning > 0 &&
|
||||
metrics.toolCalls >= config.toolCallWarning
|
||||
) {
|
||||
reasons.push(
|
||||
`${metrics.toolCalls} tool calls (warning ${config.toolCallWarning})`,
|
||||
);
|
||||
}
|
||||
// Primary signal: long elapsed time — unit may be stuck
|
||||
if (config.elapsedMs > 0 && metrics.elapsedMs >= config.elapsedMs) {
|
||||
reasons.push(
|
||||
`${Math.round(metrics.elapsedMs / 60000)}min elapsed (warning ${Math.round(config.elapsedMs / 60000)}min)`,
|
||||
);
|
||||
}
|
||||
// Primary signal: many changed files — possible churn/duplication
|
||||
if (
|
||||
config.changedFilesWarning > 0 &&
|
||||
(metrics.changedFiles ?? 0) >= config.changedFilesWarning
|
||||
) {
|
||||
reasons.push(
|
||||
`${metrics.changedFiles} new changed files (warning ${config.changedFilesWarning})`,
|
||||
);
|
||||
}
|
||||
// Token count is a secondary signal: only fire when at least one primary
|
||||
// signal is also present, OR when the no-progress heuristic fires.
|
||||
// This prevents false positives on units that do real work with large
|
||||
// context models (a 25-tool-call unit can legitimately burn 1M+ tokens).
|
||||
const hasPrimarySignal = reasons.length > 0;
|
||||
if (config.tokenWarning > 0 && metrics.sessionTokens >= config.tokenWarning) {
|
||||
if (hasPrimarySignal) {
|
||||
reasons.push(
|
||||
`${formatTokenCount(metrics.sessionTokens)} unit tokens (warning ${formatTokenCount(config.tokenWarning)})`,
|
||||
);
|
||||
}
|
||||
}
|
||||
// No-progress heuristic for execute-task: no file changes despite many
|
||||
// tool calls and tokens — strong runaway indicator regardless of primary
|
||||
// signals. This is the exception where tokens alone can trigger.
|
||||
if (
|
||||
unitType === "execute-task" &&
|
||||
(metrics.changedFiles ?? 0) === 0 &&
|
||||
metrics.worktreeChangedSinceStart !== true &&
|
||||
metrics.toolCalls >= EXECUTE_NO_PROGRESS_TOOL_WARNING &&
|
||||
metrics.sessionTokens >= EXECUTE_NO_PROGRESS_TOKEN_WARNING
|
||||
) {
|
||||
reasons.push(
|
||||
`no new file changes after ${metrics.toolCalls} tool calls and ${formatTokenCount(metrics.sessionTokens)} tokens`,
|
||||
);
|
||||
}
|
||||
return reasons;
|
||||
}
|
||||
function hasMeaningfulGrowth(metrics, state, config) {
|
||||
const toolGrowth = Math.max(5, Math.floor(config.toolCallWarning / 4));
|
||||
const tokenGrowth = Math.max(50_000, Math.floor(config.tokenWarning / 4));
|
||||
return (
|
||||
metrics.toolCalls - state.lastToolCalls >= toolGrowth ||
|
||||
metrics.sessionTokens - state.lastSessionTokens >= tokenGrowth ||
|
||||
metrics.elapsedMs - state.lastElapsedMs >= config.minIntervalMs
|
||||
);
|
||||
}
|
||||
function buildRunawayGuardMessage(unitType, unitId, metrics, reasons, final) {
|
||||
const topTools = metrics.topTools
|
||||
? Object.entries(metrics.topTools)
|
||||
.sort(([, a], [, b]) => b - a)
|
||||
.slice(0, 5)
|
||||
.map(([name, count]) => `${name}x${count}`)
|
||||
.join(", ")
|
||||
: "";
|
||||
const title = final
|
||||
? "**RUNAWAY UNIT FINAL WARNING - write diagnosis and handoff now.**"
|
||||
: "**RUNAWAY UNIT BUDGET WARNING - diagnose before continuing.**";
|
||||
return [
|
||||
title,
|
||||
`Unit: ${unitType} ${unitId}`,
|
||||
`Budget signals: ${reasons.join("; ")}.`,
|
||||
topTools ? `Tool mix: ${topTools}.` : "",
|
||||
formatChangedFilesLine(unitType, metrics),
|
||||
"",
|
||||
final
|
||||
? "You have already received a budget warning. Do not start new exploration. Write or update the durable artifact/handoff now, explicitly stating whether the unit was legitimately large, blocked, or stuck in a loop."
|
||||
: "Before more exploration or broad edits, state why this unit is still running: legitimately large, blocked, or stuck/churning. Then either finish the required artifact or write a precise handoff.",
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join("\n");
|
||||
}
|
||||
function formatChangedFilesLine(unitType, metrics) {
|
||||
if ((metrics.changedFiles ?? 0) > 0) {
|
||||
return `Working tree has ${metrics.changedFiles} new changed file(s) since this unit started. Active edits are not automatically healthy progress; check for repeated or broad churn.`;
|
||||
}
|
||||
if (unitType === "execute-task" && metrics.worktreeChangedSinceStart) {
|
||||
return "Working tree has 0 new changed file paths, but dirty file content changed since this execute-task started.";
|
||||
}
|
||||
if (unitType === "execute-task") {
|
||||
return "Working tree has 0 new changed files since this execute-task started. For implementation work, that is no durable progress yet.";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
function formatMetricSummary(metrics) {
|
||||
return [
|
||||
`${metrics.toolCalls} tool calls`,
|
||||
`${formatTokenCount(metrics.sessionTokens)} tokens`,
|
||||
`${Math.round(metrics.elapsedMs / 60000)}min elapsed`,
|
||||
metrics.changedFiles !== undefined
|
||||
? `${metrics.changedFiles} new changed files`
|
||||
: "",
|
||||
metrics.worktreeChangedSinceStart ? "dirty file content changed" : "",
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(", ");
|
||||
}
|
||||
function parsePorcelainPath(line) {
|
||||
if (line.length < 4) return null;
|
||||
let filePath = line.slice(3);
|
||||
const renameSeparator = " -> ";
|
||||
if (filePath.includes(renameSeparator)) {
|
||||
filePath = filePath.slice(
|
||||
filePath.lastIndexOf(renameSeparator) + renameSeparator.length,
|
||||
);
|
||||
}
|
||||
if (filePath.startsWith('"') && filePath.endsWith('"')) {
|
||||
filePath = filePath.slice(1, -1);
|
||||
}
|
||||
return filePath || null;
|
||||
}
|
||||
function numeric(value) {
|
||||
return typeof value === "number" && Number.isFinite(value) ? value : 0;
|
||||
}
|
||||
export {
|
||||
clearRunawayGuardState,
|
||||
collectSessionTokenUsage,
|
||||
collectWorktreeFingerprint,
|
||||
countChangedFiles,
|
||||
DEFAULT_RUNAWAY_CHANGED_FILES_WARNING,
|
||||
DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS,
|
||||
DEFAULT_RUNAWAY_ELAPSED_MINUTES,
|
||||
DEFAULT_RUNAWAY_MIN_INTERVAL_MS,
|
||||
DEFAULT_RUNAWAY_TOOL_CALL_WARNING,
|
||||
DEFAULT_RUNAWAY_TOKEN_WARNING,
|
||||
evaluateRunawayGuard,
|
||||
resetRunawayGuardState,
|
||||
resolveRunawayGuardConfig,
|
||||
} from "./uok/auto-runaway-guard.js";
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ import {
|
|||
countChangedFiles,
|
||||
evaluateRunawayGuard,
|
||||
resolveRunawayGuardConfig,
|
||||
} from "./auto-runaway-guard.js";
|
||||
} from "./uok/auto-runaway-guard.js";
|
||||
import { detectWorkingTreeActivity } from "./auto-supervisor.js";
|
||||
import { recoverTimedOutUnit } from "./auto-timeout-recovery.js";
|
||||
import {
|
||||
|
|
@ -24,7 +24,7 @@ import {
|
|||
getTotalToolCallCount,
|
||||
hasInteractiveToolInFlight,
|
||||
} from "./auto-tool-tracking.js";
|
||||
import { closeoutUnit } from "./auto-unit-closeout.js";
|
||||
import { closeoutUnit } from "./uok/auto-unit-closeout.js";
|
||||
import {
|
||||
computeBudgets,
|
||||
resolveExecutorContextWindow,
|
||||
|
|
|
|||
|
|
@ -1,85 +1,8 @@
|
|||
/**
|
||||
* Unit closeout helper — consolidates the repeated pattern of
|
||||
* snapshotting metrics + saving activity log + extracting memories
|
||||
* that appears 6+ times in auto.ts.
|
||||
* auto-unit-closeout.ts — Barrel re-export for the UOK unit closeout helper.
|
||||
*
|
||||
* The implementation has moved into the UOK subsystem under uok/auto-unit-closeout.js.
|
||||
* This file preserves the original public API so external consumers
|
||||
* continue to work without changes.
|
||||
*/
|
||||
import { saveActivityLog } from "./activity-log.js";
|
||||
import { snapshotUnitMetrics } from "./metrics.js";
|
||||
import { updateSubscriptionTokensUsed } from "./preferences-models.js";
|
||||
import { writeTurnGitTransaction } from "./uok/gitops.js";
|
||||
import { logWarning } from "./workflow-logger.js";
|
||||
/**
|
||||
* Snapshot metrics, save activity log, and fire-and-forget memory extraction
|
||||
* for a completed unit. Returns the activity log file path (if any).
|
||||
*/
|
||||
export async function closeoutUnit(
|
||||
ctx,
|
||||
basePath,
|
||||
unitType,
|
||||
unitId,
|
||||
startedAt,
|
||||
opts,
|
||||
) {
|
||||
const provider = ctx.model?.provider;
|
||||
const id = ctx.model?.id;
|
||||
const modelId = provider && id ? `${provider}/${id}` : (id ?? "unknown");
|
||||
const unit = snapshotUnitMetrics(
|
||||
ctx,
|
||||
unitType,
|
||||
unitId,
|
||||
startedAt,
|
||||
modelId,
|
||||
opts,
|
||||
);
|
||||
// Track subscription token consumption for amortized cost reporting.
|
||||
// Fire-and-forget: updateSubscriptionTokensUsed is already best-effort.
|
||||
if (provider && unit && unit.tokens.total > 0) {
|
||||
updateSubscriptionTokensUsed(provider, unit.tokens.total);
|
||||
}
|
||||
const activityFile = saveActivityLog(ctx, basePath, unitType, unitId);
|
||||
if (activityFile) {
|
||||
try {
|
||||
const { buildMemoryLLMCall, extractMemoriesFromUnit } = await import(
|
||||
"./memory-extractor.js"
|
||||
);
|
||||
const llmCallFn = buildMemoryLLMCall(ctx);
|
||||
if (llmCallFn) {
|
||||
extractMemoriesFromUnit(
|
||||
activityFile,
|
||||
unitType,
|
||||
unitId,
|
||||
llmCallFn,
|
||||
).catch((err) => {
|
||||
logWarning(
|
||||
"engine",
|
||||
`memory extraction failed for ${unitType}/${unitId}: ${err.message}`,
|
||||
);
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
/* non-fatal */
|
||||
logWarning(
|
||||
"engine",
|
||||
`operation failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
if (opts?.traceId && opts.turnId && opts.gitAction && opts.gitStatus) {
|
||||
writeTurnGitTransaction({
|
||||
basePath,
|
||||
traceId: opts.traceId,
|
||||
turnId: opts.turnId,
|
||||
unitType,
|
||||
unitId,
|
||||
stage: "record",
|
||||
action: opts.gitAction,
|
||||
push: opts.gitPush === true,
|
||||
status: opts.gitStatus,
|
||||
error: opts.gitError,
|
||||
metadata: {
|
||||
activityFile,
|
||||
},
|
||||
});
|
||||
}
|
||||
return activityFile ?? undefined;
|
||||
}
|
||||
export { closeoutUnit } from "./uok/auto-unit-closeout.js";
|
||||
|
|
|
|||
|
|
@ -1,824 +1,8 @@
|
|||
/**
|
||||
* Post-unit verification gate for autonomous mode.
|
||||
* auto-verification.ts — Barrel re-export for the UOK post-unit verification gate.
|
||||
*
|
||||
* Runs typecheck/lint/test checks, captures runtime errors, performs
|
||||
* dependency audits, handles auto-fix retry logic, and writes
|
||||
* verification evidence JSON.
|
||||
*
|
||||
* Extracted from handleAgentEnd() in auto.ts. Returns a sentinel
|
||||
* value instead of calling return/pauseAuto directly — the caller
|
||||
* checks the result and handles control flow.
|
||||
* The implementation has moved into the UOK subsystem under uok/auto-verification.js.
|
||||
* This file preserves the original public API so external consumers
|
||||
* continue to work without changes.
|
||||
*/
|
||||
import { mkdirSync, writeFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { loadFile } from "./files.js";
|
||||
import { parseRoadmap } from "./parsers.js";
|
||||
import { resolveMilestoneFile, resolveSlicePath } from "./paths.js";
|
||||
import { runPostExecutionChecks } from "./post-execution-checks.js";
|
||||
import { loadEffectiveSFPreferences } from "./preferences.js";
|
||||
import {
|
||||
getMilestoneSlices,
|
||||
getSliceTasks,
|
||||
getTask,
|
||||
isDbAvailable,
|
||||
} from "./sf-db.js";
|
||||
import { isMilestoneComplete } from "./state.js";
|
||||
import { isClosedStatus } from "./status-guards.js";
|
||||
import { parseUnitId } from "./unit-id.js";
|
||||
import { ChaosMonkeyGate } from "./uok/chaos-monkey.js";
|
||||
import { CostGuardGate } from "./uok/cost-guard-gate.js";
|
||||
import { resolveUokFlags } from "./uok/flags.js";
|
||||
import { UokGateRunner } from "./uok/gate-runner.js";
|
||||
import { MultiPackageGate } from "./uok/multi-package-gate.js";
|
||||
import { OutcomeLearningGate } from "./uok/outcome-learning-gate.js";
|
||||
import { SecurityGate } from "./uok/security-gate.js";
|
||||
import {
|
||||
formatExecuteTaskRecoveryStatus,
|
||||
inspectExecuteTaskDurability,
|
||||
} from "./uok/unit-runtime.js";
|
||||
import { extractVerdict } from "./verdict-parser.js";
|
||||
import { writeVerificationJSON } from "./verification-evidence.js";
|
||||
import {
|
||||
captureRuntimeErrors,
|
||||
formatFailureContext,
|
||||
runDependencyAudit,
|
||||
runVerificationGate,
|
||||
} from "./verification-gate.js";
|
||||
import { logError, logWarning } from "./workflow-logger.js";
|
||||
|
||||
function computeTokenCountFromSession(ctx) {
|
||||
const entries = ctx.sessionManager?.getEntries?.() ?? [];
|
||||
let total = 0;
|
||||
for (const entry of entries) {
|
||||
if (entry.type !== "message") continue;
|
||||
const msg = entry.message;
|
||||
if (!msg || msg.role !== "assistant") continue;
|
||||
if (msg.usage?.totalTokens != null) {
|
||||
total += msg.usage.totalTokens;
|
||||
}
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
function getMemoryPressureMB() {
|
||||
try {
|
||||
const mem = process.memoryUsage();
|
||||
return Math.round(mem.heapUsed / 1024 / 1024);
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function buildGateOutcomesSummary(gateIds, gateResults) {
|
||||
if (!gateIds || !gateResults || gateIds.length === 0) return undefined;
|
||||
const outcomes = {};
|
||||
for (let i = 0; i < gateIds.length; i++) {
|
||||
outcomes[gateIds[i]] = gateResults[i]?.outcome ?? "unknown";
|
||||
}
|
||||
return outcomes;
|
||||
}
|
||||
|
||||
function isInfraVerificationFailure(stderr) {
|
||||
return /\b(ENOENT|ENOTFOUND|ETIMEDOUT|ECONNRESET|EAI_AGAIN|spawn\s+\S+\s+ENOENT|command not found)\b/i.test(
|
||||
stderr,
|
||||
);
|
||||
}
|
||||
/**
|
||||
* Post-unit guard for `validate-milestone` units (#4094).
|
||||
*
|
||||
* When validate-milestone writes verdict=needs-remediation, the agent is
|
||||
* expected to also call reassess_roadmap in the same turn to add
|
||||
* remediation slices. If they don't, the state machine re-derives
|
||||
* `phase: validating-milestone` indefinitely (all slices still complete +
|
||||
* verdict still needs-remediation), wasting ~3 dispatches before the stuck
|
||||
* detector fires.
|
||||
*
|
||||
* This guard fires immediately on the first occurrence: if VALIDATION.md
|
||||
* verdict is needs-remediation and no incomplete slices exist for the
|
||||
* milestone, pause the auto-loop with a clear blocker.
|
||||
*/
|
||||
async function runValidateMilestonePostCheck(vctx, pauseAuto) {
|
||||
const { s, ctx, pi } = vctx;
|
||||
const prefs = loadEffectiveSFPreferences()?.preferences;
|
||||
const uokFlags = resolveUokFlags(prefs);
|
||||
const persistMilestoneValidationGate = async (
|
||||
outcome,
|
||||
failureClass,
|
||||
rationale,
|
||||
findings = "",
|
||||
milestoneId,
|
||||
) => {
|
||||
if (!uokFlags.gates || !s.currentUnit) return;
|
||||
const gateRunner = new UokGateRunner();
|
||||
gateRunner.register({
|
||||
id: "milestone-validation-post-check",
|
||||
type: "verification",
|
||||
execute: async () => ({
|
||||
outcome,
|
||||
failureClass,
|
||||
rationale,
|
||||
findings,
|
||||
}),
|
||||
});
|
||||
await gateRunner.run("milestone-validation-post-check", {
|
||||
basePath: s.basePath,
|
||||
traceId: `validation-post-check:${s.currentUnit.id}`,
|
||||
turnId: s.currentUnit.id,
|
||||
milestoneId,
|
||||
unitType: s.currentUnit.type,
|
||||
unitId: s.currentUnit.id,
|
||||
});
|
||||
};
|
||||
if (!s.currentUnit) return "continue";
|
||||
const { milestone: mid } = parseUnitId(s.currentUnit.id);
|
||||
if (!mid) return "continue";
|
||||
const validationFile = resolveMilestoneFile(s.basePath, mid, "VALIDATION");
|
||||
if (!validationFile) return "continue";
|
||||
const validationContent = await loadFile(validationFile);
|
||||
if (!validationContent) return "continue";
|
||||
const verdict = extractVerdict(validationContent);
|
||||
if (verdict !== "needs-remediation") {
|
||||
await persistMilestoneValidationGate(
|
||||
"pass",
|
||||
"none",
|
||||
`milestone validation verdict is ${verdict}; no remediation loop risk`,
|
||||
"",
|
||||
mid,
|
||||
);
|
||||
return "continue";
|
||||
}
|
||||
const incompleteSliceCount = await countIncompleteSlices(s.basePath, mid);
|
||||
// If any non-closed slices exist, the agent successfully queued remediation
|
||||
// work — proceed normally. The state machine will execute those slices and
|
||||
// re-validate per the #3596/#3670 fix.
|
||||
if (incompleteSliceCount > 0) {
|
||||
await persistMilestoneValidationGate(
|
||||
"pass",
|
||||
"none",
|
||||
`remediation slices present (${incompleteSliceCount}); validation can continue`,
|
||||
"",
|
||||
mid,
|
||||
);
|
||||
return "continue";
|
||||
}
|
||||
ctx.ui.notify(
|
||||
`Milestone ${mid} validation returned verdict=needs-remediation but no remediation slices were added. Pausing for human review.`,
|
||||
"error",
|
||||
);
|
||||
process.stderr.write(
|
||||
`validate-milestone: pausing — verdict=needs-remediation with no incomplete slices for ${mid}. ` +
|
||||
`The agent must call reassess_roadmap to add remediation slices before re-validation.\n`,
|
||||
);
|
||||
await persistMilestoneValidationGate(
|
||||
"manual-attention",
|
||||
"manual-attention",
|
||||
"needs-remediation verdict without queued remediation slices",
|
||||
`No incomplete slices found for ${mid} while verdict=needs-remediation`,
|
||||
mid,
|
||||
);
|
||||
await pauseAuto(ctx, pi);
|
||||
return "pause";
|
||||
}
|
||||
/**
|
||||
* Count slices for a milestone that are not in a closed status.
|
||||
* DB-backed projects are authoritative (#4094 peer review); falls back to
|
||||
* roadmap parsing only when the DB is unavailable.
|
||||
*/
|
||||
async function countIncompleteSlices(basePath, milestoneId) {
|
||||
if (isDbAvailable()) {
|
||||
const slices = getMilestoneSlices(milestoneId);
|
||||
if (slices.length === 0) {
|
||||
// No DB rows — treat as "unknown", do not pause.
|
||||
return 1;
|
||||
}
|
||||
return slices.filter((slice) => !isClosedStatus(slice.status)).length;
|
||||
}
|
||||
// Filesystem fallback: parse the roadmap markdown.
|
||||
try {
|
||||
const roadmapFile = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
|
||||
if (!roadmapFile) return 1;
|
||||
const roadmapContent = await loadFile(roadmapFile);
|
||||
if (!roadmapContent) return 1;
|
||||
const roadmap = parseRoadmap(roadmapContent);
|
||||
if (roadmap.slices.length === 0) return 1;
|
||||
return isMilestoneComplete(roadmap) ? 0 : 1;
|
||||
} catch {
|
||||
// Parsing failures should not cause false-positive pauses.
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Run the verification gate for the current execute-task unit.
|
||||
* Returns:
|
||||
* - "continue" — gate passed (or no checks configured), proceed normally
|
||||
* - "retry" — gate failed with retries remaining, s.pendingVerificationRetry set for loop re-iteration
|
||||
* - "pause" — gate failed with retries exhausted, pauseAuto already called
|
||||
*/
|
||||
export async function runPostUnitVerification(vctx, pauseAuto) {
|
||||
const { s, ctx, pi } = vctx;
|
||||
if (!s.currentUnit) {
|
||||
return "continue";
|
||||
}
|
||||
if (s.currentUnit.type === "validate-milestone") {
|
||||
return await runValidateMilestonePostCheck(vctx, pauseAuto);
|
||||
}
|
||||
if (s.currentUnit.type !== "execute-task") {
|
||||
return "continue";
|
||||
}
|
||||
// ── Zone 1: Gate machinery (outer try) ──────────────────────────────────
|
||||
// Failures here indicate broken infrastructure — pause for human review.
|
||||
let prefs;
|
||||
let uokFlags;
|
||||
let mid;
|
||||
let sid;
|
||||
let tid;
|
||||
let result;
|
||||
try {
|
||||
const effectivePrefs = loadEffectiveSFPreferences();
|
||||
prefs = effectivePrefs?.preferences;
|
||||
uokFlags = resolveUokFlags(prefs);
|
||||
// Read task plan verify field
|
||||
({ milestone: mid, slice: sid, task: tid } = parseUnitId(s.currentUnit.id));
|
||||
let taskPlanVerify;
|
||||
if (mid && sid && tid) {
|
||||
if (isDbAvailable()) {
|
||||
taskPlanVerify = getTask(mid, sid, tid)?.verify;
|
||||
}
|
||||
// When DB unavailable, taskPlanVerify stays undefined — gate runs without task-specific checks
|
||||
}
|
||||
result = runVerificationGate({
|
||||
cwd: s.basePath,
|
||||
preferenceCommands: prefs?.verification_commands,
|
||||
taskPlanVerify,
|
||||
});
|
||||
// Handle skipped gate (no commands discovered) — fail-closed but not a hard failure
|
||||
if (result.skipped === true) {
|
||||
process.stderr.write(
|
||||
"verification-gate: no commands discovered — gate skipped, not passed\n",
|
||||
);
|
||||
ctx.ui.notify(
|
||||
"[verify] SKIP — no verification commands configured",
|
||||
"warning",
|
||||
);
|
||||
return "continue";
|
||||
}
|
||||
// Capture runtime errors
|
||||
const runtimeErrors = await captureRuntimeErrors();
|
||||
if (runtimeErrors.length > 0) {
|
||||
result.runtimeErrors = runtimeErrors;
|
||||
if (runtimeErrors.some((e) => e.blocking)) {
|
||||
result.passed = false;
|
||||
}
|
||||
}
|
||||
// Dependency audit
|
||||
const auditWarnings = runDependencyAudit(s.basePath);
|
||||
if (auditWarnings.length > 0) {
|
||||
result.auditWarnings = auditWarnings;
|
||||
process.stderr.write(
|
||||
`verification-gate: ${auditWarnings.length} audit warning(s)\n`,
|
||||
);
|
||||
for (const w of auditWarnings) {
|
||||
process.stderr.write(` [${w.severity}] ${w.name}: ${w.title}\n`);
|
||||
}
|
||||
}
|
||||
} catch (machineryErr) {
|
||||
logError(
|
||||
"engine",
|
||||
`verification-gate machinery error — pausing for human review: ${machineryErr.message}`,
|
||||
);
|
||||
ctx.ui.notify(
|
||||
"verification-gate machinery error — pausing for human review",
|
||||
"error",
|
||||
);
|
||||
await pauseAuto(ctx, pi);
|
||||
return "pause";
|
||||
}
|
||||
// ── Zone 2: Ancillary post-gate work (inner try) ─────────────────────────
|
||||
// Failures here are non-fatal — evidence writes, UOK gate calls, notifications, retry logic.
|
||||
let gateIds = [];
|
||||
let gateResults = [];
|
||||
try {
|
||||
if (uokFlags.gates) {
|
||||
const gateRunner = new UokGateRunner();
|
||||
gateRunner.register({
|
||||
id: "verification-gate",
|
||||
type: "verification",
|
||||
execute: async () => ({
|
||||
outcome: result.passed ? "pass" : "fail",
|
||||
failureClass: result.runtimeErrors?.some((e) => e.blocking)
|
||||
? "execution"
|
||||
: "verification",
|
||||
rationale: result.passed
|
||||
? "verification checks passed"
|
||||
: "verification checks failed",
|
||||
findings: result.passed ? "" : formatFailureContext(result),
|
||||
}),
|
||||
});
|
||||
if (uokFlags.securityGuard) {
|
||||
gateRunner.register(new SecurityGate());
|
||||
}
|
||||
if (uokFlags.multiPackageHealing) {
|
||||
gateRunner.register(new MultiPackageGate());
|
||||
}
|
||||
if (uokFlags.autonomousCostGuard) {
|
||||
gateRunner.register(new CostGuardGate());
|
||||
}
|
||||
if (uokFlags.outcomeLearning) {
|
||||
gateRunner.register(new OutcomeLearningGate());
|
||||
}
|
||||
if (uokFlags.chaosMonkey) {
|
||||
gateRunner.register(new ChaosMonkeyGate({ active: true }));
|
||||
}
|
||||
|
||||
const baseCtx = {
|
||||
basePath: s.basePath,
|
||||
traceId: `verification:${s.currentUnit.id}`,
|
||||
turnId: s.currentUnit.id,
|
||||
milestoneId: mid ?? undefined,
|
||||
sliceId: sid ?? undefined,
|
||||
taskId: tid ?? undefined,
|
||||
unitType: s.currentUnit.type,
|
||||
unitId: s.currentUnit.id,
|
||||
iteration: s.verificationRetryCount.get(s.currentUnit.id) ?? 0,
|
||||
};
|
||||
|
||||
gateIds = gateRunner.list().map((g) => g.id);
|
||||
gateResults = await Promise.all(
|
||||
gateIds.map((id) =>
|
||||
gateRunner
|
||||
.run(id, {
|
||||
...baseCtx,
|
||||
traceId: `${id}:${s.currentUnit.id}`,
|
||||
})
|
||||
.catch((err) => ({
|
||||
outcome: "fail",
|
||||
failureClass: "unknown",
|
||||
rationale: `Gate ${id} threw: ${err instanceof Error ? err.message : String(err)}`,
|
||||
})),
|
||||
),
|
||||
);
|
||||
|
||||
for (let i = 0; i < gateIds.length; i++) {
|
||||
const id = gateIds[i];
|
||||
const res = gateResults[i];
|
||||
if (res.outcome !== "fail") continue;
|
||||
result.passed = false;
|
||||
if (id === "security-guard") {
|
||||
result.securityFailure = true;
|
||||
result.securityRationale = res.rationale;
|
||||
result.securityFindings = res.findings;
|
||||
} else if (id === "multi-package-healing") {
|
||||
result.multiPackageFailure = true;
|
||||
result.multiPackageRationale = res.rationale;
|
||||
result.multiPackageFindings = res.findings;
|
||||
} else if (id === "cost-guard") {
|
||||
result.costGuardFailure = true;
|
||||
result.costGuardRationale = res.rationale;
|
||||
} else if (id === "chaos-monkey") {
|
||||
result.chaosMonkeyFailure = true;
|
||||
result.chaosMonkeyRationale = res.rationale;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Auto-fix retry preferences
|
||||
const autoFixEnabled = prefs?.verification_auto_fix !== false;
|
||||
const maxRetries =
|
||||
typeof prefs?.verification_max_retries === "number"
|
||||
? prefs.verification_max_retries
|
||||
: 2;
|
||||
if (result.checks.length > 0) {
|
||||
const passCount = result.checks.filter((c) => c.exitCode === 0).length;
|
||||
const total = result.checks.length;
|
||||
const commandList = result.checks.map((c) => c.command).join(" | ");
|
||||
ctx.ui.notify(`[verify] running: ${commandList}`, "info");
|
||||
const attemptSoFar = s.verificationRetryCount.get(s.currentUnit.id) ?? 0;
|
||||
if (result.passed) {
|
||||
ctx.ui.notify(`[verify] PASS - ${passCount}/${total} checks`, "info");
|
||||
} else {
|
||||
const failures = result.checks.filter((c) => c.exitCode !== 0);
|
||||
const failNames = failures.map((f) => f.command).join(", ");
|
||||
const nextAttempt = attemptSoFar + 1;
|
||||
ctx.ui.notify(
|
||||
`[verify] FAIL - ${failNames} (auto-fix attempt ${nextAttempt}/${maxRetries})`,
|
||||
"info",
|
||||
);
|
||||
process.stderr.write(
|
||||
`verification-gate: ${total - passCount}/${total} checks failed\n`,
|
||||
);
|
||||
for (const f of failures) {
|
||||
process.stderr.write(` ${f.command} exited ${f.exitCode}\n`);
|
||||
if (f.stderr)
|
||||
process.stderr.write(` stderr: ${f.stderr.slice(0, 500)}\n`);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Log blocking runtime errors
|
||||
if (result.runtimeErrors?.some((e) => e.blocking)) {
|
||||
const blockingErrors = result.runtimeErrors.filter((e) => e.blocking);
|
||||
process.stderr.write(
|
||||
`verification-gate: ${blockingErrors.length} blocking runtime error(s) detected\n`,
|
||||
);
|
||||
for (const err of blockingErrors) {
|
||||
process.stderr.write(
|
||||
` [${err.source}] ${err.severity}: ${err.message.slice(0, 200)}\n`,
|
||||
);
|
||||
}
|
||||
}
|
||||
// Log security failures
|
||||
if (result.securityFailure) {
|
||||
ctx.ui.notify(
|
||||
`[verify] SECURITY FAIL — ${result.securityRationale}`,
|
||||
"error",
|
||||
);
|
||||
process.stderr.write(
|
||||
`verification-gate: security failure: ${result.securityRationale}\n`,
|
||||
);
|
||||
if (result.securityFindings) {
|
||||
process.stderr.write(`${result.securityFindings}\n`);
|
||||
}
|
||||
}
|
||||
// Log multi-package failures
|
||||
if (result.multiPackageFailure) {
|
||||
ctx.ui.notify(
|
||||
`[verify] MULTI-PACKAGE FAIL — ${result.multiPackageRationale}`,
|
||||
"error",
|
||||
);
|
||||
process.stderr.write(
|
||||
`verification-gate: multi-package healing failure: ${result.multiPackageRationale}\n`,
|
||||
);
|
||||
if (result.multiPackageFindings) {
|
||||
process.stderr.write(`${result.multiPackageFindings}\n`);
|
||||
}
|
||||
}
|
||||
// Log cost-guard failures
|
||||
if (result.costGuardFailure) {
|
||||
ctx.ui.notify(
|
||||
`[verify] COST-GUARD FAIL — ${result.costGuardRationale}`,
|
||||
"error",
|
||||
);
|
||||
process.stderr.write(
|
||||
`verification-gate: cost-guard failure: ${result.costGuardRationale}\n`,
|
||||
);
|
||||
}
|
||||
// Log chaos-monkey failures
|
||||
if (result.chaosMonkeyFailure) {
|
||||
ctx.ui.notify(
|
||||
`[verify] CHAOS-MONKEY FAIL — ${result.chaosMonkeyRationale}`,
|
||||
"error",
|
||||
);
|
||||
process.stderr.write(
|
||||
`verification-gate: chaos-monkey injected failure: ${result.chaosMonkeyRationale}\n`,
|
||||
);
|
||||
}
|
||||
// Write verification evidence JSON
|
||||
const attempt = s.verificationRetryCount.get(s.currentUnit.id) ?? 0;
|
||||
const tokenCount = computeTokenCountFromSession(ctx);
|
||||
const memoryPressureMB = getMemoryPressureMB();
|
||||
const gateOutcomes = buildGateOutcomesSummary(gateIds, gateResults);
|
||||
let recoveryStatus;
|
||||
try {
|
||||
const durability = await inspectExecuteTaskDurability(
|
||||
s.basePath,
|
||||
s.currentUnit.id,
|
||||
);
|
||||
if (durability) {
|
||||
recoveryStatus = formatExecuteTaskRecoveryStatus(durability);
|
||||
}
|
||||
} catch {
|
||||
recoveryStatus = undefined;
|
||||
}
|
||||
if (mid && sid && tid) {
|
||||
try {
|
||||
const sDir = resolveSlicePath(s.basePath, mid, sid);
|
||||
if (sDir) {
|
||||
const tasksDir = join(sDir, "tasks");
|
||||
if (result.passed) {
|
||||
writeVerificationJSON(
|
||||
result,
|
||||
tasksDir,
|
||||
tid,
|
||||
s.currentUnit.id,
|
||||
undefined,
|
||||
undefined,
|
||||
tokenCount,
|
||||
memoryPressureMB,
|
||||
gateOutcomes,
|
||||
recoveryStatus,
|
||||
);
|
||||
} else {
|
||||
const nextAttempt = attempt + 1;
|
||||
writeVerificationJSON(
|
||||
result,
|
||||
tasksDir,
|
||||
tid,
|
||||
s.currentUnit.id,
|
||||
nextAttempt,
|
||||
maxRetries,
|
||||
tokenCount,
|
||||
memoryPressureMB,
|
||||
gateOutcomes,
|
||||
recoveryStatus,
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (evidenceErr) {
|
||||
logWarning(
|
||||
"engine",
|
||||
`verification-evidence write error: ${evidenceErr.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
const advisoryFailure =
|
||||
!result.passed &&
|
||||
(result.discoverySource === "package-json" ||
|
||||
result.checks.some((check) =>
|
||||
isInfraVerificationFailure(check.stderr),
|
||||
));
|
||||
if (advisoryFailure) {
|
||||
s.verificationRetryCount.delete(s.currentUnit.id);
|
||||
s.pendingVerificationRetry = null;
|
||||
ctx.ui.notify(
|
||||
result.discoverySource === "package-json"
|
||||
? "Verification failed in auto-discovered package.json checks — treating as advisory."
|
||||
: "Verification failed due to infrastructure/runtime environment issues — treating as advisory.",
|
||||
"warning",
|
||||
);
|
||||
return "continue";
|
||||
}
|
||||
// ── Post-execution checks (run after main verification passes for execute-task units) ──
|
||||
let postExecChecks;
|
||||
let postExecBlockingFailure = false;
|
||||
if (result.passed && mid && sid && tid) {
|
||||
// Check preferences — respect enhanced_verification and enhanced_verification_post
|
||||
const enhancedEnabled = prefs?.enhanced_verification !== false; // default true
|
||||
const postEnabled = prefs?.enhanced_verification_post !== false; // default true
|
||||
if (enhancedEnabled && postEnabled && isDbAvailable()) {
|
||||
try {
|
||||
// Get the completed task from DB
|
||||
const taskRow = getTask(mid, sid, tid);
|
||||
if (taskRow && taskRow.key_files && taskRow.key_files.length > 0) {
|
||||
// Get all tasks in the slice
|
||||
const allTasks = getSliceTasks(mid, sid);
|
||||
// Filter to prior completed tasks (status = 'complete' or 'done', before current task)
|
||||
const priorTasks = allTasks.filter(
|
||||
(t) =>
|
||||
(t.status === "complete" || t.status === "done") &&
|
||||
t.id !== tid &&
|
||||
t.sequence < taskRow.sequence,
|
||||
);
|
||||
// Run post-execution checks
|
||||
const postExecResult = runPostExecutionChecks(
|
||||
taskRow,
|
||||
priorTasks,
|
||||
s.basePath,
|
||||
);
|
||||
// Store checks for evidence JSON
|
||||
postExecChecks = postExecResult.checks;
|
||||
// Log summary to stderr with sf-post-exec: prefix
|
||||
const emoji =
|
||||
postExecResult.status === "pass"
|
||||
? "✅"
|
||||
: postExecResult.status === "warn"
|
||||
? "⚠️"
|
||||
: "❌";
|
||||
process.stderr.write(
|
||||
`sf-post-exec: ${emoji} Post-execution checks ${postExecResult.status} for ${mid}/${sid}/${tid} (${postExecResult.durationMs}ms)\n`,
|
||||
);
|
||||
// Log individual check results
|
||||
for (const check of postExecResult.checks) {
|
||||
const checkEmoji = check.passed
|
||||
? "✓"
|
||||
: check.blocking
|
||||
? "✗"
|
||||
: "⚠";
|
||||
process.stderr.write(
|
||||
`sf-post-exec: ${checkEmoji} [${check.category}] ${check.target}: ${check.message}\n`,
|
||||
);
|
||||
}
|
||||
if (uokFlags.gates) {
|
||||
const strictMode = prefs?.enhanced_verification_strict === true;
|
||||
const warnEscalated =
|
||||
postExecResult.status === "warn" && strictMode;
|
||||
const blockingFailure =
|
||||
postExecResult.status === "fail" || warnEscalated;
|
||||
const findings = postExecResult.checks
|
||||
.filter((check) => !check.passed)
|
||||
.map(
|
||||
(check) =>
|
||||
`[${check.category}] ${check.target}: ${check.message}`,
|
||||
)
|
||||
.join("\n");
|
||||
const gateRunner = new UokGateRunner();
|
||||
gateRunner.register({
|
||||
id: "post-execution-checks",
|
||||
type: "artifact",
|
||||
execute: async () => ({
|
||||
outcome: blockingFailure ? "fail" : "pass",
|
||||
failureClass:
|
||||
postExecResult.status === "fail"
|
||||
? "artifact"
|
||||
: warnEscalated
|
||||
? "policy"
|
||||
: "none",
|
||||
rationale: blockingFailure
|
||||
? `post-execution checks ${postExecResult.status}${warnEscalated ? " (strict)" : ""}`
|
||||
: "post-execution checks passed",
|
||||
findings,
|
||||
}),
|
||||
});
|
||||
await gateRunner.run("post-execution-checks", {
|
||||
basePath: s.basePath,
|
||||
traceId: `verification:${s.currentUnit.id}`,
|
||||
turnId: s.currentUnit.id,
|
||||
milestoneId: mid,
|
||||
sliceId: sid,
|
||||
taskId: tid,
|
||||
unitType: s.currentUnit.type,
|
||||
unitId: s.currentUnit.id,
|
||||
});
|
||||
}
|
||||
// Check for blocking failures
|
||||
if (postExecResult.status === "fail") {
|
||||
postExecBlockingFailure = true;
|
||||
const blockingCount = postExecResult.checks.filter(
|
||||
(c) => !c.passed && c.blocking,
|
||||
).length;
|
||||
ctx.ui.notify(
|
||||
`Post-execution checks failed: ${blockingCount} blocking issue${blockingCount === 1 ? "" : "s"} found`,
|
||||
"error",
|
||||
);
|
||||
} else if (postExecResult.status === "warn") {
|
||||
ctx.ui.notify(
|
||||
`Post-execution checks passed with warnings`,
|
||||
"warning",
|
||||
);
|
||||
// Strict mode: treat warnings as blocking
|
||||
if (prefs?.enhanced_verification_strict === true) {
|
||||
postExecBlockingFailure = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (postExecErr) {
|
||||
// Post-execution check errors are non-fatal — log and continue
|
||||
logWarning("engine", `sf-post-exec: error — ${postExecErr.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Re-write verification evidence JSON with post-execution checks
|
||||
if (postExecChecks && postExecChecks.length > 0 && mid && sid && tid) {
|
||||
try {
|
||||
const sDir = resolveSlicePath(s.basePath, mid, sid);
|
||||
if (sDir) {
|
||||
const tasksDir = join(sDir, "tasks");
|
||||
// Add postExecutionChecks to the result for the JSON write
|
||||
const resultWithPostExec = {
|
||||
...result,
|
||||
// Mark as failed if there was a blocking post-exec failure
|
||||
passed: result.passed && !postExecBlockingFailure,
|
||||
};
|
||||
// Manually write with postExecutionChecks field
|
||||
writeVerificationJSONWithPostExec(
|
||||
resultWithPostExec,
|
||||
tasksDir,
|
||||
tid,
|
||||
s.currentUnit.id,
|
||||
postExecChecks,
|
||||
postExecBlockingFailure ? attempt + 1 : undefined,
|
||||
postExecBlockingFailure ? maxRetries : undefined,
|
||||
tokenCount,
|
||||
memoryPressureMB,
|
||||
gateOutcomes,
|
||||
recoveryStatus,
|
||||
);
|
||||
}
|
||||
} catch (evidenceErr) {
|
||||
logWarning(
|
||||
"engine",
|
||||
`verification-evidence: post-exec write error — ${evidenceErr.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
// Update result.passed based on post-execution checks
|
||||
if (postExecBlockingFailure) {
|
||||
result.passed = false;
|
||||
}
|
||||
// ── Auto-fix retry logic ──
|
||||
if (result.passed) {
|
||||
s.verificationRetryCount.delete(s.currentUnit.id);
|
||||
s.pendingVerificationRetry = null;
|
||||
return "continue";
|
||||
} else if (postExecBlockingFailure) {
|
||||
// Post-execution failures are cross-task consistency issues — retrying the same task won't fix them.
|
||||
// Skip retry and pause immediately for human review.
|
||||
s.verificationRetryCount.delete(s.currentUnit.id);
|
||||
s.pendingVerificationRetry = null;
|
||||
ctx.ui.notify(
|
||||
`Post-execution checks failed — cross-task consistency issue detected, pausing for human review`,
|
||||
"error",
|
||||
);
|
||||
await pauseAuto(ctx, pi);
|
||||
return "pause";
|
||||
} else if (autoFixEnabled && attempt + 1 <= maxRetries) {
|
||||
const nextAttempt = attempt + 1;
|
||||
s.verificationRetryCount.set(s.currentUnit.id, nextAttempt);
|
||||
s.pendingVerificationRetry = {
|
||||
unitId: s.currentUnit.id,
|
||||
failureContext: formatFailureContext(result),
|
||||
attempt: nextAttempt,
|
||||
};
|
||||
const failedCmds = result.checks
|
||||
.filter((c) => c.exitCode !== 0)
|
||||
.map((c) => c.command);
|
||||
const cmdSummary =
|
||||
failedCmds.length <= 3
|
||||
? failedCmds.join(", ")
|
||||
: `${failedCmds.slice(0, 3).join(", ")}... and ${failedCmds.length - 3} more`;
|
||||
ctx.ui.notify(
|
||||
`Verification failed (${cmdSummary}) — auto-fix attempt ${nextAttempt}/${maxRetries}`,
|
||||
"warning",
|
||||
);
|
||||
// Return "retry" — the autoLoop while loop will re-iterate with the retry context
|
||||
return "retry";
|
||||
} else {
|
||||
// Gate failed, retries exhausted
|
||||
s.verificationRetryCount.delete(s.currentUnit.id);
|
||||
s.pendingVerificationRetry = null;
|
||||
const exhaustedFails = result.checks
|
||||
.filter((c) => c.exitCode !== 0)
|
||||
.map((c) => c.command);
|
||||
const exhaustedSummary =
|
||||
exhaustedFails.length <= 3
|
||||
? exhaustedFails.join(", ")
|
||||
: `${exhaustedFails.slice(0, 3).join(", ")}... and ${exhaustedFails.length - 3} more`;
|
||||
ctx.ui.notify(
|
||||
`Verification gate FAILED after ${attempt} ${attempt === 1 ? "retry" : "retries"} (${exhaustedSummary}) — pausing for human review`,
|
||||
"error",
|
||||
);
|
||||
await pauseAuto(ctx, pi);
|
||||
return "pause";
|
||||
}
|
||||
} catch (err) {
|
||||
// Ancillary post-gate errors are non-fatal — log warning and continue
|
||||
logWarning("engine", `verification-gate error: ${err.message}`);
|
||||
return "continue";
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Write verification evidence JSON with post-execution checks included.
|
||||
* This is a variant of writeVerificationJSON that adds the postExecutionChecks field.
|
||||
*/
|
||||
function writeVerificationJSONWithPostExec(
|
||||
result,
|
||||
tasksDir,
|
||||
taskId,
|
||||
unitId,
|
||||
postExecutionChecks,
|
||||
retryAttempt,
|
||||
maxRetries,
|
||||
tokenCount,
|
||||
memoryPressureMB,
|
||||
gateOutcomes,
|
||||
recoveryStatus,
|
||||
) {
|
||||
mkdirSync(tasksDir, { recursive: true });
|
||||
const evidence = {
|
||||
schemaVersion: 1,
|
||||
taskId,
|
||||
unitId: unitId ?? taskId,
|
||||
timestamp: result.timestamp,
|
||||
passed: result.passed,
|
||||
discoverySource: result.discoverySource,
|
||||
checks: result.checks.map((check) => ({
|
||||
command: check.command,
|
||||
exitCode: check.exitCode,
|
||||
durationMs: check.durationMs,
|
||||
verdict: check.exitCode === 0 ? "pass" : "fail",
|
||||
})),
|
||||
...(retryAttempt !== undefined ? { retryAttempt } : {}),
|
||||
...(maxRetries !== undefined ? { maxRetries } : {}),
|
||||
...(tokenCount !== undefined ? { tokenCount } : {}),
|
||||
...(memoryPressureMB !== undefined ? { memoryPressureMB } : {}),
|
||||
...(gateOutcomes !== undefined ? { gateOutcomes } : {}),
|
||||
...(recoveryStatus !== undefined ? { recoveryStatus } : {}),
|
||||
postExecutionChecks,
|
||||
};
|
||||
if (result.runtimeErrors && result.runtimeErrors.length > 0) {
|
||||
evidence.runtimeErrors = result.runtimeErrors.map((e) => ({
|
||||
source: e.source,
|
||||
severity: e.severity,
|
||||
message: e.message,
|
||||
blocking: e.blocking,
|
||||
}));
|
||||
}
|
||||
if (result.auditWarnings && result.auditWarnings.length > 0) {
|
||||
evidence.auditWarnings = result.auditWarnings.map((w) => ({
|
||||
name: w.name,
|
||||
severity: w.severity,
|
||||
title: w.title,
|
||||
url: w.url,
|
||||
fixAvailable: w.fixAvailable,
|
||||
}));
|
||||
}
|
||||
const filePath = join(tasksDir, `${taskId}-VERIFY.json`);
|
||||
writeFileSync(filePath, JSON.stringify(evidence, null, 2) + "\n", "utf-8");
|
||||
}
|
||||
export { runPostUnitVerification } from "./uok/auto-verification.js";
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ import {
|
|||
clearSliceProgressCache,
|
||||
updateSliceProgressCache,
|
||||
} from "./auto-dashboard.js";
|
||||
import { DISPATCH_RULES, resolveDispatch } from "./auto-dispatch.js";
|
||||
import { DISPATCH_RULES, resolveDispatch } from "./uok/auto-dispatch.js";
|
||||
import {
|
||||
_resetPendingResolve,
|
||||
isSessionSwitchInFlight,
|
||||
|
|
@ -86,8 +86,8 @@ import {
|
|||
isQueuedUserMessageSkip,
|
||||
isToolInvocationError,
|
||||
} from "./auto-tool-tracking.js";
|
||||
import { closeoutUnit } from "./auto-unit-closeout.js";
|
||||
import { runPostUnitVerification } from "./auto-verification.js";
|
||||
import { closeoutUnit } from "./uok/auto-unit-closeout.js";
|
||||
import { runPostUnitVerification } from "./uok/auto-verification.js";
|
||||
import {
|
||||
autoWorktreeBranch,
|
||||
checkResourcesStale,
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ import {
|
|||
collectWorktreeFingerprint,
|
||||
countChangedFiles,
|
||||
resetRunawayGuardState,
|
||||
} from "../auto-runaway-guard.js";
|
||||
} from "../uok/auto-runaway-guard.js";
|
||||
import {
|
||||
formatToolCallSummary,
|
||||
resetToolCallCounts,
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import {
|
|||
collectWorktreeFingerprint,
|
||||
countChangedFiles,
|
||||
resetRunawayGuardState,
|
||||
} from "../auto-runaway-guard.js";
|
||||
} from "../uok/auto-runaway-guard.js";
|
||||
import { scopeActiveToolsForUnitType } from "../constants.js";
|
||||
import { debugLog } from "../debug-logger.js";
|
||||
import {
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
* and dispatch logic. This is the "dev" engine — it wraps the current SF
|
||||
* autonomous mode behavior behind the engine-polymorphic interface.
|
||||
*/
|
||||
import { resolveDispatch } from "./auto-dispatch.js";
|
||||
import { resolveDispatch } from "./uok/auto-dispatch.js";
|
||||
import { loadEffectiveSFPreferences } from "./preferences.js";
|
||||
import { deriveState } from "./state.js";
|
||||
// ─── Bridge: DispatchAction → EngineDispatchAction ────────────────────────
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ import {
|
|||
DEFAULT_RUNAWAY_ELAPSED_MINUTES,
|
||||
DEFAULT_RUNAWAY_TOKEN_WARNING,
|
||||
DEFAULT_RUNAWAY_TOOL_CALL_WARNING,
|
||||
} from "./auto-runaway-guard.js";
|
||||
} from "./uok/auto-runaway-guard.js";
|
||||
import { selectByBenchmarks } from "./benchmark-selector.js";
|
||||
import { defaultRoutingConfig, MODEL_CAPABILITY_TIER } from "./model-router.js";
|
||||
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ vi.mock("../auto-prompts.js", () => ({
|
|||
checkNeedsRunUat: vi.fn(async () => null),
|
||||
}));
|
||||
|
||||
import { resolveDispatch } from "../auto-dispatch.js";
|
||||
import { resolveDispatch } from "../uok/auto-dispatch.js";
|
||||
import {
|
||||
closeDatabase,
|
||||
insertMilestone,
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
import { beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import { enhanceUnitRankingWithMemory } from "../auto-dispatch.js";
|
||||
import { enhanceUnitRankingWithMemory } from "../uok/auto-dispatch.js";
|
||||
|
||||
// Mock memory store
|
||||
vi.mock("../memory-store.js", () => ({
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ import { existsSync, mkdirSync, readFileSync, rmSync } from "node:fs";
|
|||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, beforeEach, describe, it } from "vitest";
|
||||
import { DISPATCH_RULES } from "../auto-dispatch.js";
|
||||
import { DISPATCH_RULES } from "../uok/auto-dispatch.js";
|
||||
import { createScheduleStore } from "../schedule/schedule-store.js";
|
||||
import { generateULID } from "../schedule/schedule-ulid.js";
|
||||
|
||||
|
|
|
|||
2067
src/resources/extensions/sf/uok/auto-dispatch.js
Normal file
2067
src/resources/extensions/sf/uok/auto-dispatch.js
Normal file
File diff suppressed because it is too large
Load diff
433
src/resources/extensions/sf/uok/auto-runaway-guard.js
Normal file
433
src/resources/extensions/sf/uok/auto-runaway-guard.js
Normal file
|
|
@ -0,0 +1,433 @@
|
|||
/**
|
||||
* Diagnostic budget guard for unusually long autonomous mode units.
|
||||
*
|
||||
* This is intentionally not a blind tool-count kill switch. It gives the agent
|
||||
* explicit turns to explain whether the unit is legitimately large, stuck, or
|
||||
* churning, then pauses only if the unit keeps consuming budget afterward.
|
||||
*/
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { createHash } from "node:crypto";
|
||||
import { existsSync, lstatSync, readdirSync, readFileSync } from "node:fs";
|
||||
import { formatTokenCount } from "@singularity-forge/coding-agent";
|
||||
export const DEFAULT_RUNAWAY_TOOL_CALL_WARNING = 60;
|
||||
export const DEFAULT_RUNAWAY_TOKEN_WARNING = 1_000_000;
|
||||
export const DEFAULT_RUNAWAY_ELAPSED_MINUTES = 20;
|
||||
export const DEFAULT_RUNAWAY_CHANGED_FILES_WARNING = 75;
|
||||
export const DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS = 2;
|
||||
export const DEFAULT_RUNAWAY_MIN_INTERVAL_MS = 120_000;
|
||||
const EXECUTE_NO_PROGRESS_TOOL_WARNING = 25;
|
||||
const EXECUTE_NO_PROGRESS_TOKEN_WARNING = 500_000;
|
||||
const DURABLE_SF_ARTIFACT_PATHS = [
|
||||
".sf/milestones",
|
||||
".sf/approvals",
|
||||
];
|
||||
let state = null;
|
||||
export function resetRunawayGuardState(unitType, unitId, baseline) {
|
||||
state = {
|
||||
unitKey: `${unitType}/${unitId}`,
|
||||
baselineSessionTokens: baseline?.sessionTokens ?? 0,
|
||||
baselineChangedFiles: baseline?.changedFiles ?? 0,
|
||||
baselineWorktreeFingerprint: baseline?.worktreeFingerprint ?? null,
|
||||
warningsSent: 0,
|
||||
lastWarningAt: 0,
|
||||
lastToolCalls: 0,
|
||||
lastSessionTokens: 0,
|
||||
lastElapsedMs: 0,
|
||||
finalWarningSent: false,
|
||||
};
|
||||
}
|
||||
export function clearRunawayGuardState() {
|
||||
state = null;
|
||||
}
|
||||
export function resolveRunawayGuardConfig(supervisor) {
|
||||
return {
|
||||
enabled: supervisor?.runaway_guard_enabled !== false,
|
||||
toolCallWarning:
|
||||
supervisor?.runaway_tool_call_warning ??
|
||||
DEFAULT_RUNAWAY_TOOL_CALL_WARNING,
|
||||
tokenWarning:
|
||||
supervisor?.runaway_token_warning ?? DEFAULT_RUNAWAY_TOKEN_WARNING,
|
||||
elapsedMs:
|
||||
(supervisor?.runaway_elapsed_minutes ?? DEFAULT_RUNAWAY_ELAPSED_MINUTES) *
|
||||
60 *
|
||||
1000,
|
||||
changedFilesWarning:
|
||||
supervisor?.runaway_changed_files_warning ??
|
||||
DEFAULT_RUNAWAY_CHANGED_FILES_WARNING,
|
||||
diagnosticTurns:
|
||||
supervisor?.runaway_diagnostic_turns ?? DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS,
|
||||
hardPause: supervisor?.runaway_hard_pause !== false,
|
||||
minIntervalMs: DEFAULT_RUNAWAY_MIN_INTERVAL_MS,
|
||||
};
|
||||
}
|
||||
export function collectSessionTokenUsage(ctx) {
|
||||
try {
|
||||
const entries = ctx.sessionManager?.getEntries?.() ?? [];
|
||||
let total = 0;
|
||||
for (const entry of entries) {
|
||||
const message = entry.message;
|
||||
if (message?.role !== "assistant" || !message.usage) continue;
|
||||
const usage = message.usage;
|
||||
const totalTokens = numeric(usage.totalTokens ?? usage.total);
|
||||
if (totalTokens > 0) {
|
||||
total += totalTokens;
|
||||
continue;
|
||||
}
|
||||
total +=
|
||||
numeric(usage.input) +
|
||||
numeric(usage.output) +
|
||||
numeric(usage.cacheRead) +
|
||||
numeric(usage.cacheWrite);
|
||||
}
|
||||
return total;
|
||||
} catch {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
export function countChangedFiles(cwd) {
|
||||
try {
|
||||
const out = execFileSync("git", ["status", "--short"], {
|
||||
cwd,
|
||||
encoding: "utf8",
|
||||
stdio: ["ignore", "pipe", "ignore"],
|
||||
timeout: 2000,
|
||||
});
|
||||
return out
|
||||
.split("\n")
|
||||
.map((line) => line.trim())
|
||||
.filter(Boolean).length;
|
||||
} catch {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
export function collectWorktreeFingerprint(cwd) {
|
||||
try {
|
||||
const status = execFileSync(
|
||||
"git",
|
||||
["status", "--porcelain=v1", "--untracked-files=all"],
|
||||
{
|
||||
cwd,
|
||||
encoding: "utf8",
|
||||
stdio: ["ignore", "pipe", "ignore"],
|
||||
timeout: 2000,
|
||||
},
|
||||
);
|
||||
const lines = status
|
||||
.split("\n")
|
||||
.map((line) => line.trimEnd())
|
||||
.filter(Boolean);
|
||||
const hash = createHash("sha256");
|
||||
if (lines.length === 0) {
|
||||
hash.update("git-clean");
|
||||
hash.update("\0");
|
||||
}
|
||||
for (const line of lines) {
|
||||
hash.update(line);
|
||||
hash.update("\0");
|
||||
const filePath = parsePorcelainPath(line);
|
||||
if (!filePath) continue;
|
||||
appendFileFingerprint(hash, cwd, filePath);
|
||||
}
|
||||
appendDurableSfArtifactFingerprint(hash, cwd);
|
||||
return hash.digest("hex");
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
function appendDurableSfArtifactFingerprint(hash, cwd) {
|
||||
hash.update("sf-artifacts");
|
||||
hash.update("\0");
|
||||
for (const artifactPath of DURABLE_SF_ARTIFACT_PATHS) {
|
||||
appendPathFingerprint(hash, cwd, artifactPath);
|
||||
}
|
||||
}
|
||||
function appendPathFingerprint(hash, cwd, relativePath) {
|
||||
const fullPath = `${cwd}/${relativePath}`;
|
||||
if (!existsSync(fullPath)) {
|
||||
hash.update(`missing:${relativePath}`);
|
||||
hash.update("\0");
|
||||
return;
|
||||
}
|
||||
let stat;
|
||||
try {
|
||||
stat = lstatSync(fullPath);
|
||||
} catch {
|
||||
hash.update(`unreadable:${relativePath}`);
|
||||
hash.update("\0");
|
||||
return;
|
||||
}
|
||||
if (stat.isDirectory()) {
|
||||
hash.update(`dir:${relativePath}`);
|
||||
hash.update("\0");
|
||||
let entries;
|
||||
try {
|
||||
entries = readdirSync(fullPath).sort();
|
||||
} catch {
|
||||
hash.update(`unreadable-dir:${relativePath}`);
|
||||
hash.update("\0");
|
||||
return;
|
||||
}
|
||||
for (const entry of entries) {
|
||||
appendPathFingerprint(hash, cwd, `${relativePath}/${entry}`);
|
||||
}
|
||||
return;
|
||||
}
|
||||
appendFileFingerprint(hash, cwd, relativePath);
|
||||
}
|
||||
function appendFileFingerprint(hash, cwd, relativePath) {
|
||||
try {
|
||||
const stat = lstatSync(`${cwd}/${relativePath}`);
|
||||
if (!stat.isFile()) {
|
||||
hash.update(
|
||||
`type:${relativePath}:${stat.isDirectory() ? "dir" : "other"}`,
|
||||
);
|
||||
hash.update("\0");
|
||||
return;
|
||||
}
|
||||
hash.update(`file:${relativePath}`);
|
||||
hash.update("\0");
|
||||
hash.update(readFileSync(`${cwd}/${relativePath}`));
|
||||
hash.update("\0");
|
||||
} catch {
|
||||
hash.update(`unreadable-or-deleted:${relativePath}`);
|
||||
hash.update("\0");
|
||||
}
|
||||
}
|
||||
export function evaluateRunawayGuard(
|
||||
unitType,
|
||||
unitId,
|
||||
metrics,
|
||||
config,
|
||||
now = Date.now(),
|
||||
) {
|
||||
if (!config.enabled) return { action: "none" };
|
||||
if (config.diagnosticTurns <= 0) return { action: "none" };
|
||||
const unitKey = `${unitType}/${unitId}`;
|
||||
if (!state || state.unitKey !== unitKey)
|
||||
resetRunawayGuardState(unitType, unitId);
|
||||
const s = state;
|
||||
const unitMetrics = normalizeMetricsToUnit(metrics, s);
|
||||
const reasons = thresholdReasons(unitType, unitMetrics, config);
|
||||
if (reasons.length === 0) return { action: "none" };
|
||||
if (
|
||||
s.lastWarningAt > 0 &&
|
||||
now - s.lastWarningAt < config.minIntervalMs &&
|
||||
!hasMeaningfulGrowth(unitMetrics, s, config)
|
||||
) {
|
||||
return { action: "none" };
|
||||
}
|
||||
// Skip hard-pause if the unit is making file-change progress — growth with
|
||||
// changes is legitimate diagnostic/planning work, not a stuck loop.
|
||||
// Without this check, discuss/plan phases that legitimately consume tokens
|
||||
// while writing summaries/plans would be hard-paused despite making progress.
|
||||
if (
|
||||
(unitMetrics.changedFiles ?? 0) > 0 ||
|
||||
unitMetrics.worktreeChangedSinceStart === true
|
||||
) {
|
||||
return { action: "none" };
|
||||
}
|
||||
if (
|
||||
config.hardPause &&
|
||||
s.finalWarningSent &&
|
||||
hasMeaningfulGrowth(unitMetrics, s, config)
|
||||
) {
|
||||
const reason =
|
||||
`Runaway guard paused ${unitType} ${unitId}: budget kept growing after ` +
|
||||
`${config.diagnosticTurns} diagnostic turn(s). ` +
|
||||
formatMetricSummary(unitMetrics);
|
||||
return {
|
||||
action: "pause",
|
||||
reason,
|
||||
metadata: {
|
||||
reason,
|
||||
pausedAt: now,
|
||||
unitType,
|
||||
unitId,
|
||||
diagnosticTurns: config.diagnosticTurns,
|
||||
warningsSent: s.warningsSent,
|
||||
thresholdReasons: reasons,
|
||||
metrics: unitMetrics,
|
||||
lastWarningMetrics: {
|
||||
toolCalls: s.lastToolCalls,
|
||||
sessionTokens: s.lastSessionTokens,
|
||||
elapsedMs: s.lastElapsedMs,
|
||||
},
|
||||
thresholds: {
|
||||
toolCallWarning: config.toolCallWarning,
|
||||
tokenWarning: config.tokenWarning,
|
||||
elapsedMs: config.elapsedMs,
|
||||
changedFilesWarning: config.changedFilesWarning,
|
||||
minIntervalMs: config.minIntervalMs,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
const final = s.warningsSent + 1 >= config.diagnosticTurns;
|
||||
s.warningsSent++;
|
||||
s.lastWarningAt = now;
|
||||
s.lastToolCalls = unitMetrics.toolCalls;
|
||||
s.lastSessionTokens = unitMetrics.sessionTokens;
|
||||
s.lastElapsedMs = unitMetrics.elapsedMs;
|
||||
if (final) s.finalWarningSent = true;
|
||||
return {
|
||||
action: "warn",
|
||||
final,
|
||||
message: buildRunawayGuardMessage(
|
||||
unitType,
|
||||
unitId,
|
||||
unitMetrics,
|
||||
reasons,
|
||||
final,
|
||||
),
|
||||
};
|
||||
}
|
||||
function normalizeMetricsToUnit(metrics, state) {
|
||||
const worktreeChangedSinceStart =
|
||||
metrics.worktreeFingerprint !== undefined &&
|
||||
metrics.worktreeFingerprint !== null &&
|
||||
state.baselineWorktreeFingerprint !== null
|
||||
? metrics.worktreeFingerprint !== state.baselineWorktreeFingerprint
|
||||
: metrics.worktreeChangedSinceStart;
|
||||
return {
|
||||
...metrics,
|
||||
sessionTokens: Math.max(
|
||||
0,
|
||||
metrics.sessionTokens - state.baselineSessionTokens,
|
||||
),
|
||||
changedFiles:
|
||||
metrics.changedFiles === undefined
|
||||
? undefined
|
||||
: Math.max(0, metrics.changedFiles - state.baselineChangedFiles),
|
||||
worktreeChangedSinceStart,
|
||||
};
|
||||
}
|
||||
function thresholdReasons(unitType, metrics, config) {
|
||||
const reasons = [];
|
||||
// Primary signal: high tool call count — strong indicator of runaway/churn
|
||||
if (
|
||||
config.toolCallWarning > 0 &&
|
||||
metrics.toolCalls >= config.toolCallWarning
|
||||
) {
|
||||
reasons.push(
|
||||
`${metrics.toolCalls} tool calls (warning ${config.toolCallWarning})`,
|
||||
);
|
||||
}
|
||||
// Primary signal: long elapsed time — unit may be stuck
|
||||
if (config.elapsedMs > 0 && metrics.elapsedMs >= config.elapsedMs) {
|
||||
reasons.push(
|
||||
`${Math.round(metrics.elapsedMs / 60000)}min elapsed (warning ${Math.round(config.elapsedMs / 60000)}min)`,
|
||||
);
|
||||
}
|
||||
// Primary signal: many changed files — possible churn/duplication
|
||||
if (
|
||||
config.changedFilesWarning > 0 &&
|
||||
(metrics.changedFiles ?? 0) >= config.changedFilesWarning
|
||||
) {
|
||||
reasons.push(
|
||||
`${metrics.changedFiles} new changed files (warning ${config.changedFilesWarning})`,
|
||||
);
|
||||
}
|
||||
// Token count is a secondary signal: only fire when at least one primary
|
||||
// signal is also present, OR when the no-progress heuristic fires.
|
||||
// This prevents false positives on units that do real work with large
|
||||
// context models (a 25-tool-call unit can legitimately burn 1M+ tokens).
|
||||
const hasPrimarySignal = reasons.length > 0;
|
||||
if (config.tokenWarning > 0 && metrics.sessionTokens >= config.tokenWarning) {
|
||||
if (hasPrimarySignal) {
|
||||
reasons.push(
|
||||
`${formatTokenCount(metrics.sessionTokens)} unit tokens (warning ${formatTokenCount(config.tokenWarning)})`,
|
||||
);
|
||||
}
|
||||
}
|
||||
// No-progress heuristic for execute-task: no file changes despite many
|
||||
// tool calls and tokens — strong runaway indicator regardless of primary
|
||||
// signals. This is the exception where tokens alone can trigger.
|
||||
if (
|
||||
unitType === "execute-task" &&
|
||||
(metrics.changedFiles ?? 0) === 0 &&
|
||||
metrics.worktreeChangedSinceStart !== true &&
|
||||
metrics.toolCalls >= EXECUTE_NO_PROGRESS_TOOL_WARNING &&
|
||||
metrics.sessionTokens >= EXECUTE_NO_PROGRESS_TOKEN_WARNING
|
||||
) {
|
||||
reasons.push(
|
||||
`no new file changes after ${metrics.toolCalls} tool calls and ${formatTokenCount(metrics.sessionTokens)} tokens`,
|
||||
);
|
||||
}
|
||||
return reasons;
|
||||
}
|
||||
function hasMeaningfulGrowth(metrics, state, config) {
|
||||
const toolGrowth = Math.max(5, Math.floor(config.toolCallWarning / 4));
|
||||
const tokenGrowth = Math.max(50_000, Math.floor(config.tokenWarning / 4));
|
||||
return (
|
||||
metrics.toolCalls - state.lastToolCalls >= toolGrowth ||
|
||||
metrics.sessionTokens - state.lastSessionTokens >= tokenGrowth ||
|
||||
metrics.elapsedMs - state.lastElapsedMs >= config.minIntervalMs
|
||||
);
|
||||
}
|
||||
function buildRunawayGuardMessage(unitType, unitId, metrics, reasons, final) {
|
||||
const topTools = metrics.topTools
|
||||
? Object.entries(metrics.topTools)
|
||||
.sort(([, a], [, b]) => b - a)
|
||||
.slice(0, 5)
|
||||
.map(([name, count]) => `${name}x${count}`)
|
||||
.join(", ")
|
||||
: "";
|
||||
const title = final
|
||||
? "**RUNAWAY UNIT FINAL WARNING - write diagnosis and handoff now.**"
|
||||
: "**RUNAWAY UNIT BUDGET WARNING - diagnose before continuing.**";
|
||||
return [
|
||||
title,
|
||||
`Unit: ${unitType} ${unitId}`,
|
||||
`Budget signals: ${reasons.join("; ")}.`,
|
||||
topTools ? `Tool mix: ${topTools}.` : "",
|
||||
formatChangedFilesLine(unitType, metrics),
|
||||
"",
|
||||
final
|
||||
? "You have already received a budget warning. Do not start new exploration. Write or update the durable artifact/handoff now, explicitly stating whether the unit was legitimately large, blocked, or stuck in a loop."
|
||||
: "Before more exploration or broad edits, state why this unit is still running: legitimately large, blocked, or stuck/churning. Then either finish the required artifact or write a precise handoff.",
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join("\n");
|
||||
}
|
||||
function formatChangedFilesLine(unitType, metrics) {
|
||||
if ((metrics.changedFiles ?? 0) > 0) {
|
||||
return `Working tree has ${metrics.changedFiles} new changed file(s) since this unit started. Active edits are not automatically healthy progress; check for repeated or broad churn.`;
|
||||
}
|
||||
if (unitType === "execute-task" && metrics.worktreeChangedSinceStart) {
|
||||
return "Working tree has 0 new changed file paths, but dirty file content changed since this execute-task started.";
|
||||
}
|
||||
if (unitType === "execute-task") {
|
||||
return "Working tree has 0 new changed files since this execute-task started. For implementation work, that is no durable progress yet.";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
function formatMetricSummary(metrics) {
|
||||
return [
|
||||
`${metrics.toolCalls} tool calls`,
|
||||
`${formatTokenCount(metrics.sessionTokens)} tokens`,
|
||||
`${Math.round(metrics.elapsedMs / 60000)}min elapsed`,
|
||||
metrics.changedFiles !== undefined
|
||||
? `${metrics.changedFiles} new changed files`
|
||||
: "",
|
||||
metrics.worktreeChangedSinceStart ? "dirty file content changed" : "",
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(", ");
|
||||
}
|
||||
function parsePorcelainPath(line) {
|
||||
if (line.length < 4) return null;
|
||||
let filePath = line.slice(3);
|
||||
const renameSeparator = " -> ";
|
||||
if (filePath.includes(renameSeparator)) {
|
||||
filePath = filePath.slice(
|
||||
filePath.lastIndexOf(renameSeparator) + renameSeparator.length,
|
||||
);
|
||||
}
|
||||
if (filePath.startsWith('"') && filePath.endsWith('"')) {
|
||||
filePath = filePath.slice(1, -1);
|
||||
}
|
||||
return filePath || null;
|
||||
}
|
||||
function numeric(value) {
|
||||
return typeof value === "number" && Number.isFinite(value) ? value : 0;
|
||||
}
|
||||
85
src/resources/extensions/sf/uok/auto-unit-closeout.js
Normal file
85
src/resources/extensions/sf/uok/auto-unit-closeout.js
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
/**
|
||||
* Unit closeout helper — consolidates the repeated pattern of
|
||||
* snapshotting metrics + saving activity log + extracting memories
|
||||
* that appears 6+ times in auto.ts.
|
||||
*/
|
||||
import { saveActivityLog } from "../activity-log.js";
|
||||
import { snapshotUnitMetrics } from "../metrics.js";
|
||||
import { updateSubscriptionTokensUsed } from "../preferences-models.js";
|
||||
import { writeTurnGitTransaction } from "./gitops.js";
|
||||
import { logWarning } from "../workflow-logger.js";
|
||||
/**
|
||||
* Snapshot metrics, save activity log, and fire-and-forget memory extraction
|
||||
* for a completed unit. Returns the activity log file path (if any).
|
||||
*/
|
||||
export async function closeoutUnit(
|
||||
ctx,
|
||||
basePath,
|
||||
unitType,
|
||||
unitId,
|
||||
startedAt,
|
||||
opts,
|
||||
) {
|
||||
const provider = ctx.model?.provider;
|
||||
const id = ctx.model?.id;
|
||||
const modelId = provider && id ? `${provider}/${id}` : (id ?? "unknown");
|
||||
const unit = snapshotUnitMetrics(
|
||||
ctx,
|
||||
unitType,
|
||||
unitId,
|
||||
startedAt,
|
||||
modelId,
|
||||
opts,
|
||||
);
|
||||
// Track subscription token consumption for amortized cost reporting.
|
||||
// Fire-and-forget: updateSubscriptionTokensUsed is already best-effort.
|
||||
if (provider && unit && unit.tokens.total > 0) {
|
||||
updateSubscriptionTokensUsed(provider, unit.tokens.total);
|
||||
}
|
||||
const activityFile = saveActivityLog(ctx, basePath, unitType, unitId);
|
||||
if (activityFile) {
|
||||
try {
|
||||
const { buildMemoryLLMCall, extractMemoriesFromUnit } = await import(
|
||||
"./memory-extractor.js"
|
||||
);
|
||||
const llmCallFn = buildMemoryLLMCall(ctx);
|
||||
if (llmCallFn) {
|
||||
extractMemoriesFromUnit(
|
||||
activityFile,
|
||||
unitType,
|
||||
unitId,
|
||||
llmCallFn,
|
||||
).catch((err) => {
|
||||
logWarning(
|
||||
"engine",
|
||||
`memory extraction failed for ${unitType}/${unitId}: ${err.message}`,
|
||||
);
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
/* non-fatal */
|
||||
logWarning(
|
||||
"engine",
|
||||
`operation failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
if (opts?.traceId && opts.turnId && opts.gitAction && opts.gitStatus) {
|
||||
writeTurnGitTransaction({
|
||||
basePath,
|
||||
traceId: opts.traceId,
|
||||
turnId: opts.turnId,
|
||||
unitType,
|
||||
unitId,
|
||||
stage: "record",
|
||||
action: opts.gitAction,
|
||||
push: opts.gitPush === true,
|
||||
status: opts.gitStatus,
|
||||
error: opts.gitError,
|
||||
metadata: {
|
||||
activityFile,
|
||||
},
|
||||
});
|
||||
}
|
||||
return activityFile ?? undefined;
|
||||
}
|
||||
824
src/resources/extensions/sf/uok/auto-verification.js
Normal file
824
src/resources/extensions/sf/uok/auto-verification.js
Normal file
|
|
@ -0,0 +1,824 @@
|
|||
/**
|
||||
* Post-unit verification gate for autonomous mode.
|
||||
*
|
||||
* Runs typecheck/lint/test checks, captures runtime errors, performs
|
||||
* dependency audits, handles auto-fix retry logic, and writes
|
||||
* verification evidence JSON.
|
||||
*
|
||||
* Extracted from handleAgentEnd() in auto.ts. Returns a sentinel
|
||||
* value instead of calling return/pauseAuto directly — the caller
|
||||
* checks the result and handles control flow.
|
||||
*/
|
||||
import { mkdirSync, writeFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { loadFile } from "../files.js";
|
||||
import { parseRoadmap } from "../parsers.js";
|
||||
import { resolveMilestoneFile, resolveSlicePath } from "../paths.js";
|
||||
import { runPostExecutionChecks } from "../post-execution-checks.js";
|
||||
import { loadEffectiveSFPreferences } from "../preferences.js";
|
||||
import {
|
||||
getMilestoneSlices,
|
||||
getSliceTasks,
|
||||
getTask,
|
||||
isDbAvailable,
|
||||
} from "../sf-db.js";
|
||||
import { isMilestoneComplete } from "../state.js";
|
||||
import { isClosedStatus } from "../status-guards.js";
|
||||
import { parseUnitId } from "../unit-id.js";
|
||||
import { ChaosMonkeyGate } from "./chaos-monkey.js";
|
||||
import { CostGuardGate } from "./cost-guard-gate.js";
|
||||
import { resolveUokFlags } from "./flags.js";
|
||||
import { UokGateRunner } from "./gate-runner.js";
|
||||
import { MultiPackageGate } from "./multi-package-gate.js";
|
||||
import { OutcomeLearningGate } from "./outcome-learning-gate.js";
|
||||
import { SecurityGate } from "./security-gate.js";
|
||||
import {
|
||||
formatExecuteTaskRecoveryStatus,
|
||||
inspectExecuteTaskDurability,
|
||||
} from "./unit-runtime.js";
|
||||
import { extractVerdict } from "../verdict-parser.js";
|
||||
import { writeVerificationJSON } from "../verification-evidence.js";
|
||||
import {
|
||||
captureRuntimeErrors,
|
||||
formatFailureContext,
|
||||
runDependencyAudit,
|
||||
runVerificationGate,
|
||||
} from "../verification-gate.js";
|
||||
import { logError, logWarning } from "../workflow-logger.js";
|
||||
|
||||
function computeTokenCountFromSession(ctx) {
|
||||
const entries = ctx.sessionManager?.getEntries?.() ?? [];
|
||||
let total = 0;
|
||||
for (const entry of entries) {
|
||||
if (entry.type !== "message") continue;
|
||||
const msg = entry.message;
|
||||
if (!msg || msg.role !== "assistant") continue;
|
||||
if (msg.usage?.totalTokens != null) {
|
||||
total += msg.usage.totalTokens;
|
||||
}
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
function getMemoryPressureMB() {
|
||||
try {
|
||||
const mem = process.memoryUsage();
|
||||
return Math.round(mem.heapUsed / 1024 / 1024);
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function buildGateOutcomesSummary(gateIds, gateResults) {
|
||||
if (!gateIds || !gateResults || gateIds.length === 0) return undefined;
|
||||
const outcomes = {};
|
||||
for (let i = 0; i < gateIds.length; i++) {
|
||||
outcomes[gateIds[i]] = gateResults[i]?.outcome ?? "unknown";
|
||||
}
|
||||
return outcomes;
|
||||
}
|
||||
|
||||
function isInfraVerificationFailure(stderr) {
|
||||
return /\b(ENOENT|ENOTFOUND|ETIMEDOUT|ECONNRESET|EAI_AGAIN|spawn\s+\S+\s+ENOENT|command not found)\b/i.test(
|
||||
stderr,
|
||||
);
|
||||
}
|
||||
/**
|
||||
* Post-unit guard for `validate-milestone` units (#4094).
|
||||
*
|
||||
* When validate-milestone writes verdict=needs-remediation, the agent is
|
||||
* expected to also call reassess_roadmap in the same turn to add
|
||||
* remediation slices. If they don't, the state machine re-derives
|
||||
* `phase: validating-milestone` indefinitely (all slices still complete +
|
||||
* verdict still needs-remediation), wasting ~3 dispatches before the stuck
|
||||
* detector fires.
|
||||
*
|
||||
* This guard fires immediately on the first occurrence: if VALIDATION.md
|
||||
* verdict is needs-remediation and no incomplete slices exist for the
|
||||
* milestone, pause the auto-loop with a clear blocker.
|
||||
*/
|
||||
async function runValidateMilestonePostCheck(vctx, pauseAuto) {
|
||||
const { s, ctx, pi } = vctx;
|
||||
const prefs = loadEffectiveSFPreferences()?.preferences;
|
||||
const uokFlags = resolveUokFlags(prefs);
|
||||
const persistMilestoneValidationGate = async (
|
||||
outcome,
|
||||
failureClass,
|
||||
rationale,
|
||||
findings = "",
|
||||
milestoneId,
|
||||
) => {
|
||||
if (!uokFlags.gates || !s.currentUnit) return;
|
||||
const gateRunner = new UokGateRunner();
|
||||
gateRunner.register({
|
||||
id: "milestone-validation-post-check",
|
||||
type: "verification",
|
||||
execute: async () => ({
|
||||
outcome,
|
||||
failureClass,
|
||||
rationale,
|
||||
findings,
|
||||
}),
|
||||
});
|
||||
await gateRunner.run("milestone-validation-post-check", {
|
||||
basePath: s.basePath,
|
||||
traceId: `validation-post-check:${s.currentUnit.id}`,
|
||||
turnId: s.currentUnit.id,
|
||||
milestoneId,
|
||||
unitType: s.currentUnit.type,
|
||||
unitId: s.currentUnit.id,
|
||||
});
|
||||
};
|
||||
if (!s.currentUnit) return "continue";
|
||||
const { milestone: mid } = parseUnitId(s.currentUnit.id);
|
||||
if (!mid) return "continue";
|
||||
const validationFile = resolveMilestoneFile(s.basePath, mid, "VALIDATION");
|
||||
if (!validationFile) return "continue";
|
||||
const validationContent = await loadFile(validationFile);
|
||||
if (!validationContent) return "continue";
|
||||
const verdict = extractVerdict(validationContent);
|
||||
if (verdict !== "needs-remediation") {
|
||||
await persistMilestoneValidationGate(
|
||||
"pass",
|
||||
"none",
|
||||
`milestone validation verdict is ${verdict}; no remediation loop risk`,
|
||||
"",
|
||||
mid,
|
||||
);
|
||||
return "continue";
|
||||
}
|
||||
const incompleteSliceCount = await countIncompleteSlices(s.basePath, mid);
|
||||
// If any non-closed slices exist, the agent successfully queued remediation
|
||||
// work — proceed normally. The state machine will execute those slices and
|
||||
// re-validate per the #3596/#3670 fix.
|
||||
if (incompleteSliceCount > 0) {
|
||||
await persistMilestoneValidationGate(
|
||||
"pass",
|
||||
"none",
|
||||
`remediation slices present (${incompleteSliceCount}); validation can continue`,
|
||||
"",
|
||||
mid,
|
||||
);
|
||||
return "continue";
|
||||
}
|
||||
ctx.ui.notify(
|
||||
`Milestone ${mid} validation returned verdict=needs-remediation but no remediation slices were added. Pausing for human review.`,
|
||||
"error",
|
||||
);
|
||||
process.stderr.write(
|
||||
`validate-milestone: pausing — verdict=needs-remediation with no incomplete slices for ${mid}. ` +
|
||||
`The agent must call reassess_roadmap to add remediation slices before re-validation.\n`,
|
||||
);
|
||||
await persistMilestoneValidationGate(
|
||||
"manual-attention",
|
||||
"manual-attention",
|
||||
"needs-remediation verdict without queued remediation slices",
|
||||
`No incomplete slices found for ${mid} while verdict=needs-remediation`,
|
||||
mid,
|
||||
);
|
||||
await pauseAuto(ctx, pi);
|
||||
return "pause";
|
||||
}
|
||||
/**
|
||||
* Count slices for a milestone that are not in a closed status.
|
||||
* DB-backed projects are authoritative (#4094 peer review); falls back to
|
||||
* roadmap parsing only when the DB is unavailable.
|
||||
*/
|
||||
async function countIncompleteSlices(basePath, milestoneId) {
|
||||
if (isDbAvailable()) {
|
||||
const slices = getMilestoneSlices(milestoneId);
|
||||
if (slices.length === 0) {
|
||||
// No DB rows — treat as "unknown", do not pause.
|
||||
return 1;
|
||||
}
|
||||
return slices.filter((slice) => !isClosedStatus(slice.status)).length;
|
||||
}
|
||||
// Filesystem fallback: parse the roadmap markdown.
|
||||
try {
|
||||
const roadmapFile = resolveMilestoneFile(basePath, milestoneId, "ROADMAP");
|
||||
if (!roadmapFile) return 1;
|
||||
const roadmapContent = await loadFile(roadmapFile);
|
||||
if (!roadmapContent) return 1;
|
||||
const roadmap = parseRoadmap(roadmapContent);
|
||||
if (roadmap.slices.length === 0) return 1;
|
||||
return isMilestoneComplete(roadmap) ? 0 : 1;
|
||||
} catch {
|
||||
// Parsing failures should not cause false-positive pauses.
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Run the verification gate for the current execute-task unit.
|
||||
* Returns:
|
||||
* - "continue" — gate passed (or no checks configured), proceed normally
|
||||
* - "retry" — gate failed with retries remaining, s.pendingVerificationRetry set for loop re-iteration
|
||||
* - "pause" — gate failed with retries exhausted, pauseAuto already called
|
||||
*/
|
||||
export async function runPostUnitVerification(vctx, pauseAuto) {
|
||||
const { s, ctx, pi } = vctx;
|
||||
if (!s.currentUnit) {
|
||||
return "continue";
|
||||
}
|
||||
if (s.currentUnit.type === "validate-milestone") {
|
||||
return await runValidateMilestonePostCheck(vctx, pauseAuto);
|
||||
}
|
||||
if (s.currentUnit.type !== "execute-task") {
|
||||
return "continue";
|
||||
}
|
||||
// ── Zone 1: Gate machinery (outer try) ──────────────────────────────────
|
||||
// Failures here indicate broken infrastructure — pause for human review.
|
||||
let prefs;
|
||||
let uokFlags;
|
||||
let mid;
|
||||
let sid;
|
||||
let tid;
|
||||
let result;
|
||||
try {
|
||||
const effectivePrefs = loadEffectiveSFPreferences();
|
||||
prefs = effectivePrefs?.preferences;
|
||||
uokFlags = resolveUokFlags(prefs);
|
||||
// Read task plan verify field
|
||||
({ milestone: mid, slice: sid, task: tid } = parseUnitId(s.currentUnit.id));
|
||||
let taskPlanVerify;
|
||||
if (mid && sid && tid) {
|
||||
if (isDbAvailable()) {
|
||||
taskPlanVerify = getTask(mid, sid, tid)?.verify;
|
||||
}
|
||||
// When DB unavailable, taskPlanVerify stays undefined — gate runs without task-specific checks
|
||||
}
|
||||
result = runVerificationGate({
|
||||
cwd: s.basePath,
|
||||
preferenceCommands: prefs?.verification_commands,
|
||||
taskPlanVerify,
|
||||
});
|
||||
// Handle skipped gate (no commands discovered) — fail-closed but not a hard failure
|
||||
if (result.skipped === true) {
|
||||
process.stderr.write(
|
||||
"verification-gate: no commands discovered — gate skipped, not passed\n",
|
||||
);
|
||||
ctx.ui.notify(
|
||||
"[verify] SKIP — no verification commands configured",
|
||||
"warning",
|
||||
);
|
||||
return "continue";
|
||||
}
|
||||
// Capture runtime errors
|
||||
const runtimeErrors = await captureRuntimeErrors();
|
||||
if (runtimeErrors.length > 0) {
|
||||
result.runtimeErrors = runtimeErrors;
|
||||
if (runtimeErrors.some((e) => e.blocking)) {
|
||||
result.passed = false;
|
||||
}
|
||||
}
|
||||
// Dependency audit
|
||||
const auditWarnings = runDependencyAudit(s.basePath);
|
||||
if (auditWarnings.length > 0) {
|
||||
result.auditWarnings = auditWarnings;
|
||||
process.stderr.write(
|
||||
`verification-gate: ${auditWarnings.length} audit warning(s)\n`,
|
||||
);
|
||||
for (const w of auditWarnings) {
|
||||
process.stderr.write(` [${w.severity}] ${w.name}: ${w.title}\n`);
|
||||
}
|
||||
}
|
||||
} catch (machineryErr) {
|
||||
logError(
|
||||
"engine",
|
||||
`verification-gate machinery error — pausing for human review: ${machineryErr.message}`,
|
||||
);
|
||||
ctx.ui.notify(
|
||||
"verification-gate machinery error — pausing for human review",
|
||||
"error",
|
||||
);
|
||||
await pauseAuto(ctx, pi);
|
||||
return "pause";
|
||||
}
|
||||
// ── Zone 2: Ancillary post-gate work (inner try) ─────────────────────────
|
||||
// Failures here are non-fatal — evidence writes, UOK gate calls, notifications, retry logic.
|
||||
let gateIds = [];
|
||||
let gateResults = [];
|
||||
try {
|
||||
if (uokFlags.gates) {
|
||||
const gateRunner = new UokGateRunner();
|
||||
gateRunner.register({
|
||||
id: "verification-gate",
|
||||
type: "verification",
|
||||
execute: async () => ({
|
||||
outcome: result.passed ? "pass" : "fail",
|
||||
failureClass: result.runtimeErrors?.some((e) => e.blocking)
|
||||
? "execution"
|
||||
: "verification",
|
||||
rationale: result.passed
|
||||
? "verification checks passed"
|
||||
: "verification checks failed",
|
||||
findings: result.passed ? "" : formatFailureContext(result),
|
||||
}),
|
||||
});
|
||||
if (uokFlags.securityGuard) {
|
||||
gateRunner.register(new SecurityGate());
|
||||
}
|
||||
if (uokFlags.multiPackageHealing) {
|
||||
gateRunner.register(new MultiPackageGate());
|
||||
}
|
||||
if (uokFlags.autonomousCostGuard) {
|
||||
gateRunner.register(new CostGuardGate());
|
||||
}
|
||||
if (uokFlags.outcomeLearning) {
|
||||
gateRunner.register(new OutcomeLearningGate());
|
||||
}
|
||||
if (uokFlags.chaosMonkey) {
|
||||
gateRunner.register(new ChaosMonkeyGate({ active: true }));
|
||||
}
|
||||
|
||||
const baseCtx = {
|
||||
basePath: s.basePath,
|
||||
traceId: `verification:${s.currentUnit.id}`,
|
||||
turnId: s.currentUnit.id,
|
||||
milestoneId: mid ?? undefined,
|
||||
sliceId: sid ?? undefined,
|
||||
taskId: tid ?? undefined,
|
||||
unitType: s.currentUnit.type,
|
||||
unitId: s.currentUnit.id,
|
||||
iteration: s.verificationRetryCount.get(s.currentUnit.id) ?? 0,
|
||||
};
|
||||
|
||||
gateIds = gateRunner.list().map((g) => g.id);
|
||||
gateResults = await Promise.all(
|
||||
gateIds.map((id) =>
|
||||
gateRunner
|
||||
.run(id, {
|
||||
...baseCtx,
|
||||
traceId: `${id}:${s.currentUnit.id}`,
|
||||
})
|
||||
.catch((err) => ({
|
||||
outcome: "fail",
|
||||
failureClass: "unknown",
|
||||
rationale: `Gate ${id} threw: ${err instanceof Error ? err.message : String(err)}`,
|
||||
})),
|
||||
),
|
||||
);
|
||||
|
||||
for (let i = 0; i < gateIds.length; i++) {
|
||||
const id = gateIds[i];
|
||||
const res = gateResults[i];
|
||||
if (res.outcome !== "fail") continue;
|
||||
result.passed = false;
|
||||
if (id === "security-guard") {
|
||||
result.securityFailure = true;
|
||||
result.securityRationale = res.rationale;
|
||||
result.securityFindings = res.findings;
|
||||
} else if (id === "multi-package-healing") {
|
||||
result.multiPackageFailure = true;
|
||||
result.multiPackageRationale = res.rationale;
|
||||
result.multiPackageFindings = res.findings;
|
||||
} else if (id === "cost-guard") {
|
||||
result.costGuardFailure = true;
|
||||
result.costGuardRationale = res.rationale;
|
||||
} else if (id === "chaos-monkey") {
|
||||
result.chaosMonkeyFailure = true;
|
||||
result.chaosMonkeyRationale = res.rationale;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Auto-fix retry preferences
|
||||
const autoFixEnabled = prefs?.verification_auto_fix !== false;
|
||||
const maxRetries =
|
||||
typeof prefs?.verification_max_retries === "number"
|
||||
? prefs.verification_max_retries
|
||||
: 2;
|
||||
if (result.checks.length > 0) {
|
||||
const passCount = result.checks.filter((c) => c.exitCode === 0).length;
|
||||
const total = result.checks.length;
|
||||
const commandList = result.checks.map((c) => c.command).join(" | ");
|
||||
ctx.ui.notify(`[verify] running: ${commandList}`, "info");
|
||||
const attemptSoFar = s.verificationRetryCount.get(s.currentUnit.id) ?? 0;
|
||||
if (result.passed) {
|
||||
ctx.ui.notify(`[verify] PASS - ${passCount}/${total} checks`, "info");
|
||||
} else {
|
||||
const failures = result.checks.filter((c) => c.exitCode !== 0);
|
||||
const failNames = failures.map((f) => f.command).join(", ");
|
||||
const nextAttempt = attemptSoFar + 1;
|
||||
ctx.ui.notify(
|
||||
`[verify] FAIL - ${failNames} (auto-fix attempt ${nextAttempt}/${maxRetries})`,
|
||||
"info",
|
||||
);
|
||||
process.stderr.write(
|
||||
`verification-gate: ${total - passCount}/${total} checks failed\n`,
|
||||
);
|
||||
for (const f of failures) {
|
||||
process.stderr.write(` ${f.command} exited ${f.exitCode}\n`);
|
||||
if (f.stderr)
|
||||
process.stderr.write(` stderr: ${f.stderr.slice(0, 500)}\n`);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Log blocking runtime errors
|
||||
if (result.runtimeErrors?.some((e) => e.blocking)) {
|
||||
const blockingErrors = result.runtimeErrors.filter((e) => e.blocking);
|
||||
process.stderr.write(
|
||||
`verification-gate: ${blockingErrors.length} blocking runtime error(s) detected\n`,
|
||||
);
|
||||
for (const err of blockingErrors) {
|
||||
process.stderr.write(
|
||||
` [${err.source}] ${err.severity}: ${err.message.slice(0, 200)}\n`,
|
||||
);
|
||||
}
|
||||
}
|
||||
// Log security failures
|
||||
if (result.securityFailure) {
|
||||
ctx.ui.notify(
|
||||
`[verify] SECURITY FAIL — ${result.securityRationale}`,
|
||||
"error",
|
||||
);
|
||||
process.stderr.write(
|
||||
`verification-gate: security failure: ${result.securityRationale}\n`,
|
||||
);
|
||||
if (result.securityFindings) {
|
||||
process.stderr.write(`${result.securityFindings}\n`);
|
||||
}
|
||||
}
|
||||
// Log multi-package failures
|
||||
if (result.multiPackageFailure) {
|
||||
ctx.ui.notify(
|
||||
`[verify] MULTI-PACKAGE FAIL — ${result.multiPackageRationale}`,
|
||||
"error",
|
||||
);
|
||||
process.stderr.write(
|
||||
`verification-gate: multi-package healing failure: ${result.multiPackageRationale}\n`,
|
||||
);
|
||||
if (result.multiPackageFindings) {
|
||||
process.stderr.write(`${result.multiPackageFindings}\n`);
|
||||
}
|
||||
}
|
||||
// Log cost-guard failures
|
||||
if (result.costGuardFailure) {
|
||||
ctx.ui.notify(
|
||||
`[verify] COST-GUARD FAIL — ${result.costGuardRationale}`,
|
||||
"error",
|
||||
);
|
||||
process.stderr.write(
|
||||
`verification-gate: cost-guard failure: ${result.costGuardRationale}\n`,
|
||||
);
|
||||
}
|
||||
// Log chaos-monkey failures
|
||||
if (result.chaosMonkeyFailure) {
|
||||
ctx.ui.notify(
|
||||
`[verify] CHAOS-MONKEY FAIL — ${result.chaosMonkeyRationale}`,
|
||||
"error",
|
||||
);
|
||||
process.stderr.write(
|
||||
`verification-gate: chaos-monkey injected failure: ${result.chaosMonkeyRationale}\n`,
|
||||
);
|
||||
}
|
||||
// Write verification evidence JSON
|
||||
const attempt = s.verificationRetryCount.get(s.currentUnit.id) ?? 0;
|
||||
const tokenCount = computeTokenCountFromSession(ctx);
|
||||
const memoryPressureMB = getMemoryPressureMB();
|
||||
const gateOutcomes = buildGateOutcomesSummary(gateIds, gateResults);
|
||||
let recoveryStatus;
|
||||
try {
|
||||
const durability = await inspectExecuteTaskDurability(
|
||||
s.basePath,
|
||||
s.currentUnit.id,
|
||||
);
|
||||
if (durability) {
|
||||
recoveryStatus = formatExecuteTaskRecoveryStatus(durability);
|
||||
}
|
||||
} catch {
|
||||
recoveryStatus = undefined;
|
||||
}
|
||||
if (mid && sid && tid) {
|
||||
try {
|
||||
const sDir = resolveSlicePath(s.basePath, mid, sid);
|
||||
if (sDir) {
|
||||
const tasksDir = join(sDir, "tasks");
|
||||
if (result.passed) {
|
||||
writeVerificationJSON(
|
||||
result,
|
||||
tasksDir,
|
||||
tid,
|
||||
s.currentUnit.id,
|
||||
undefined,
|
||||
undefined,
|
||||
tokenCount,
|
||||
memoryPressureMB,
|
||||
gateOutcomes,
|
||||
recoveryStatus,
|
||||
);
|
||||
} else {
|
||||
const nextAttempt = attempt + 1;
|
||||
writeVerificationJSON(
|
||||
result,
|
||||
tasksDir,
|
||||
tid,
|
||||
s.currentUnit.id,
|
||||
nextAttempt,
|
||||
maxRetries,
|
||||
tokenCount,
|
||||
memoryPressureMB,
|
||||
gateOutcomes,
|
||||
recoveryStatus,
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (evidenceErr) {
|
||||
logWarning(
|
||||
"engine",
|
||||
`verification-evidence write error: ${evidenceErr.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
const advisoryFailure =
|
||||
!result.passed &&
|
||||
(result.discoverySource === "package-json" ||
|
||||
result.checks.some((check) =>
|
||||
isInfraVerificationFailure(check.stderr),
|
||||
));
|
||||
if (advisoryFailure) {
|
||||
s.verificationRetryCount.delete(s.currentUnit.id);
|
||||
s.pendingVerificationRetry = null;
|
||||
ctx.ui.notify(
|
||||
result.discoverySource === "package-json"
|
||||
? "Verification failed in auto-discovered package.json checks — treating as advisory."
|
||||
: "Verification failed due to infrastructure/runtime environment issues — treating as advisory.",
|
||||
"warning",
|
||||
);
|
||||
return "continue";
|
||||
}
|
||||
// ── Post-execution checks (run after main verification passes for execute-task units) ──
|
||||
let postExecChecks;
|
||||
let postExecBlockingFailure = false;
|
||||
if (result.passed && mid && sid && tid) {
|
||||
// Check preferences — respect enhanced_verification and enhanced_verification_post
|
||||
const enhancedEnabled = prefs?.enhanced_verification !== false; // default true
|
||||
const postEnabled = prefs?.enhanced_verification_post !== false; // default true
|
||||
if (enhancedEnabled && postEnabled && isDbAvailable()) {
|
||||
try {
|
||||
// Get the completed task from DB
|
||||
const taskRow = getTask(mid, sid, tid);
|
||||
if (taskRow && taskRow.key_files && taskRow.key_files.length > 0) {
|
||||
// Get all tasks in the slice
|
||||
const allTasks = getSliceTasks(mid, sid);
|
||||
// Filter to prior completed tasks (status = 'complete' or 'done', before current task)
|
||||
const priorTasks = allTasks.filter(
|
||||
(t) =>
|
||||
(t.status === "complete" || t.status === "done") &&
|
||||
t.id !== tid &&
|
||||
t.sequence < taskRow.sequence,
|
||||
);
|
||||
// Run post-execution checks
|
||||
const postExecResult = runPostExecutionChecks(
|
||||
taskRow,
|
||||
priorTasks,
|
||||
s.basePath,
|
||||
);
|
||||
// Store checks for evidence JSON
|
||||
postExecChecks = postExecResult.checks;
|
||||
// Log summary to stderr with sf-post-exec: prefix
|
||||
const emoji =
|
||||
postExecResult.status === "pass"
|
||||
? "✅"
|
||||
: postExecResult.status === "warn"
|
||||
? "⚠️"
|
||||
: "❌";
|
||||
process.stderr.write(
|
||||
`sf-post-exec: ${emoji} Post-execution checks ${postExecResult.status} for ${mid}/${sid}/${tid} (${postExecResult.durationMs}ms)\n`,
|
||||
);
|
||||
// Log individual check results
|
||||
for (const check of postExecResult.checks) {
|
||||
const checkEmoji = check.passed
|
||||
? "✓"
|
||||
: check.blocking
|
||||
? "✗"
|
||||
: "⚠";
|
||||
process.stderr.write(
|
||||
`sf-post-exec: ${checkEmoji} [${check.category}] ${check.target}: ${check.message}\n`,
|
||||
);
|
||||
}
|
||||
if (uokFlags.gates) {
|
||||
const strictMode = prefs?.enhanced_verification_strict === true;
|
||||
const warnEscalated =
|
||||
postExecResult.status === "warn" && strictMode;
|
||||
const blockingFailure =
|
||||
postExecResult.status === "fail" || warnEscalated;
|
||||
const findings = postExecResult.checks
|
||||
.filter((check) => !check.passed)
|
||||
.map(
|
||||
(check) =>
|
||||
`[${check.category}] ${check.target}: ${check.message}`,
|
||||
)
|
||||
.join("\n");
|
||||
const gateRunner = new UokGateRunner();
|
||||
gateRunner.register({
|
||||
id: "post-execution-checks",
|
||||
type: "artifact",
|
||||
execute: async () => ({
|
||||
outcome: blockingFailure ? "fail" : "pass",
|
||||
failureClass:
|
||||
postExecResult.status === "fail"
|
||||
? "artifact"
|
||||
: warnEscalated
|
||||
? "policy"
|
||||
: "none",
|
||||
rationale: blockingFailure
|
||||
? `post-execution checks ${postExecResult.status}${warnEscalated ? " (strict)" : ""}`
|
||||
: "post-execution checks passed",
|
||||
findings,
|
||||
}),
|
||||
});
|
||||
await gateRunner.run("post-execution-checks", {
|
||||
basePath: s.basePath,
|
||||
traceId: `verification:${s.currentUnit.id}`,
|
||||
turnId: s.currentUnit.id,
|
||||
milestoneId: mid,
|
||||
sliceId: sid,
|
||||
taskId: tid,
|
||||
unitType: s.currentUnit.type,
|
||||
unitId: s.currentUnit.id,
|
||||
});
|
||||
}
|
||||
// Check for blocking failures
|
||||
if (postExecResult.status === "fail") {
|
||||
postExecBlockingFailure = true;
|
||||
const blockingCount = postExecResult.checks.filter(
|
||||
(c) => !c.passed && c.blocking,
|
||||
).length;
|
||||
ctx.ui.notify(
|
||||
`Post-execution checks failed: ${blockingCount} blocking issue${blockingCount === 1 ? "" : "s"} found`,
|
||||
"error",
|
||||
);
|
||||
} else if (postExecResult.status === "warn") {
|
||||
ctx.ui.notify(
|
||||
`Post-execution checks passed with warnings`,
|
||||
"warning",
|
||||
);
|
||||
// Strict mode: treat warnings as blocking
|
||||
if (prefs?.enhanced_verification_strict === true) {
|
||||
postExecBlockingFailure = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (postExecErr) {
|
||||
// Post-execution check errors are non-fatal — log and continue
|
||||
logWarning("engine", `sf-post-exec: error — ${postExecErr.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Re-write verification evidence JSON with post-execution checks
|
||||
if (postExecChecks && postExecChecks.length > 0 && mid && sid && tid) {
|
||||
try {
|
||||
const sDir = resolveSlicePath(s.basePath, mid, sid);
|
||||
if (sDir) {
|
||||
const tasksDir = join(sDir, "tasks");
|
||||
// Add postExecutionChecks to the result for the JSON write
|
||||
const resultWithPostExec = {
|
||||
...result,
|
||||
// Mark as failed if there was a blocking post-exec failure
|
||||
passed: result.passed && !postExecBlockingFailure,
|
||||
};
|
||||
// Manually write with postExecutionChecks field
|
||||
writeVerificationJSONWithPostExec(
|
||||
resultWithPostExec,
|
||||
tasksDir,
|
||||
tid,
|
||||
s.currentUnit.id,
|
||||
postExecChecks,
|
||||
postExecBlockingFailure ? attempt + 1 : undefined,
|
||||
postExecBlockingFailure ? maxRetries : undefined,
|
||||
tokenCount,
|
||||
memoryPressureMB,
|
||||
gateOutcomes,
|
||||
recoveryStatus,
|
||||
);
|
||||
}
|
||||
} catch (evidenceErr) {
|
||||
logWarning(
|
||||
"engine",
|
||||
`verification-evidence: post-exec write error — ${evidenceErr.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
// Update result.passed based on post-execution checks
|
||||
if (postExecBlockingFailure) {
|
||||
result.passed = false;
|
||||
}
|
||||
// ── Auto-fix retry logic ──
|
||||
if (result.passed) {
|
||||
s.verificationRetryCount.delete(s.currentUnit.id);
|
||||
s.pendingVerificationRetry = null;
|
||||
return "continue";
|
||||
} else if (postExecBlockingFailure) {
|
||||
// Post-execution failures are cross-task consistency issues — retrying the same task won't fix them.
|
||||
// Skip retry and pause immediately for human review.
|
||||
s.verificationRetryCount.delete(s.currentUnit.id);
|
||||
s.pendingVerificationRetry = null;
|
||||
ctx.ui.notify(
|
||||
`Post-execution checks failed — cross-task consistency issue detected, pausing for human review`,
|
||||
"error",
|
||||
);
|
||||
await pauseAuto(ctx, pi);
|
||||
return "pause";
|
||||
} else if (autoFixEnabled && attempt + 1 <= maxRetries) {
|
||||
const nextAttempt = attempt + 1;
|
||||
s.verificationRetryCount.set(s.currentUnit.id, nextAttempt);
|
||||
s.pendingVerificationRetry = {
|
||||
unitId: s.currentUnit.id,
|
||||
failureContext: formatFailureContext(result),
|
||||
attempt: nextAttempt,
|
||||
};
|
||||
const failedCmds = result.checks
|
||||
.filter((c) => c.exitCode !== 0)
|
||||
.map((c) => c.command);
|
||||
const cmdSummary =
|
||||
failedCmds.length <= 3
|
||||
? failedCmds.join(", ")
|
||||
: `${failedCmds.slice(0, 3).join(", ")}... and ${failedCmds.length - 3} more`;
|
||||
ctx.ui.notify(
|
||||
`Verification failed (${cmdSummary}) — auto-fix attempt ${nextAttempt}/${maxRetries}`,
|
||||
"warning",
|
||||
);
|
||||
// Return "retry" — the autoLoop while loop will re-iterate with the retry context
|
||||
return "retry";
|
||||
} else {
|
||||
// Gate failed, retries exhausted
|
||||
s.verificationRetryCount.delete(s.currentUnit.id);
|
||||
s.pendingVerificationRetry = null;
|
||||
const exhaustedFails = result.checks
|
||||
.filter((c) => c.exitCode !== 0)
|
||||
.map((c) => c.command);
|
||||
const exhaustedSummary =
|
||||
exhaustedFails.length <= 3
|
||||
? exhaustedFails.join(", ")
|
||||
: `${exhaustedFails.slice(0, 3).join(", ")}... and ${exhaustedFails.length - 3} more`;
|
||||
ctx.ui.notify(
|
||||
`Verification gate FAILED after ${attempt} ${attempt === 1 ? "retry" : "retries"} (${exhaustedSummary}) — pausing for human review`,
|
||||
"error",
|
||||
);
|
||||
await pauseAuto(ctx, pi);
|
||||
return "pause";
|
||||
}
|
||||
} catch (err) {
|
||||
// Ancillary post-gate errors are non-fatal — log warning and continue
|
||||
logWarning("engine", `verification-gate error: ${err.message}`);
|
||||
return "continue";
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Write verification evidence JSON with post-execution checks included.
|
||||
* This is a variant of writeVerificationJSON that adds the postExecutionChecks field.
|
||||
*/
|
||||
function writeVerificationJSONWithPostExec(
|
||||
result,
|
||||
tasksDir,
|
||||
taskId,
|
||||
unitId,
|
||||
postExecutionChecks,
|
||||
retryAttempt,
|
||||
maxRetries,
|
||||
tokenCount,
|
||||
memoryPressureMB,
|
||||
gateOutcomes,
|
||||
recoveryStatus,
|
||||
) {
|
||||
mkdirSync(tasksDir, { recursive: true });
|
||||
const evidence = {
|
||||
schemaVersion: 1,
|
||||
taskId,
|
||||
unitId: unitId ?? taskId,
|
||||
timestamp: result.timestamp,
|
||||
passed: result.passed,
|
||||
discoverySource: result.discoverySource,
|
||||
checks: result.checks.map((check) => ({
|
||||
command: check.command,
|
||||
exitCode: check.exitCode,
|
||||
durationMs: check.durationMs,
|
||||
verdict: check.exitCode === 0 ? "pass" : "fail",
|
||||
})),
|
||||
...(retryAttempt !== undefined ? { retryAttempt } : {}),
|
||||
...(maxRetries !== undefined ? { maxRetries } : {}),
|
||||
...(tokenCount !== undefined ? { tokenCount } : {}),
|
||||
...(memoryPressureMB !== undefined ? { memoryPressureMB } : {}),
|
||||
...(gateOutcomes !== undefined ? { gateOutcomes } : {}),
|
||||
...(recoveryStatus !== undefined ? { recoveryStatus } : {}),
|
||||
postExecutionChecks,
|
||||
};
|
||||
if (result.runtimeErrors && result.runtimeErrors.length > 0) {
|
||||
evidence.runtimeErrors = result.runtimeErrors.map((e) => ({
|
||||
source: e.source,
|
||||
severity: e.severity,
|
||||
message: e.message,
|
||||
blocking: e.blocking,
|
||||
}));
|
||||
}
|
||||
if (result.auditWarnings && result.auditWarnings.length > 0) {
|
||||
evidence.auditWarnings = result.auditWarnings.map((w) => ({
|
||||
name: w.name,
|
||||
severity: w.severity,
|
||||
title: w.title,
|
||||
url: w.url,
|
||||
fixAvailable: w.fixAvailable,
|
||||
}));
|
||||
}
|
||||
const filePath = join(tasksDir, `${taskId}-VERIFY.json`);
|
||||
writeFileSync(filePath, JSON.stringify(evidence, null, 2) + "\n", "utf-8");
|
||||
}
|
||||
|
|
@ -222,3 +222,37 @@ export {
|
|||
nextWriteRecord,
|
||||
releaseWriterToken,
|
||||
} from "./writer.js";
|
||||
// ─── Autonomous Dispatch ───────────────────────────────────────────────────
|
||||
export {
|
||||
DISPATCH_RULES,
|
||||
enhanceUnitRankingWithMemory,
|
||||
extractValidationAttentionPlan,
|
||||
formatTaskCompleteFailurePrompt,
|
||||
getDispatchRuleNames,
|
||||
getRewriteCount,
|
||||
getUatCount,
|
||||
incrementUatCount,
|
||||
isVerificationNotApplicable,
|
||||
resolveDispatch,
|
||||
setRewriteCount,
|
||||
} from "./auto-dispatch.js";
|
||||
// ─── Runaway Guard ────────────────────────────────────────────────────────
|
||||
export {
|
||||
clearRunawayGuardState,
|
||||
collectSessionTokenUsage,
|
||||
collectWorktreeFingerprint,
|
||||
countChangedFiles,
|
||||
DEFAULT_RUNAWAY_CHANGED_FILES_WARNING,
|
||||
DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS,
|
||||
DEFAULT_RUNAWAY_ELAPSED_MINUTES,
|
||||
DEFAULT_RUNAWAY_MIN_INTERVAL_MS,
|
||||
DEFAULT_RUNAWAY_TOOL_CALL_WARNING,
|
||||
DEFAULT_RUNAWAY_TOKEN_WARNING,
|
||||
evaluateRunawayGuard,
|
||||
resetRunawayGuardState,
|
||||
resolveRunawayGuardConfig,
|
||||
} from "./auto-runaway-guard.js";
|
||||
// ─── Unit Closeout ────────────────────────────────────────────────────────
|
||||
export { closeoutUnit } from "./auto-unit-closeout.js";
|
||||
// ─── Post-Unit Verification ────────────────────────────────────────────────
|
||||
export { runPostUnitVerification } from "./auto-verification.js";
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue