fix: Let safe smoke tasks use LLM approval
This commit is contained in:
parent
085d3b7705
commit
0f27ffe865
5 changed files with 392 additions and 54 deletions
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { createHash } from "node:crypto";
|
||||
import { lstatSync, readFileSync } from "node:fs";
|
||||
import { existsSync, lstatSync, readdirSync, readFileSync } from "node:fs";
|
||||
import type { ExtensionContext } from "@singularity-forge/pi-coding-agent";
|
||||
import { formatTokenCount } from "../shared/format-utils.js";
|
||||
import type { AutoSupervisorConfig } from "./preferences-types.js";
|
||||
|
|
@ -21,6 +21,13 @@ export const DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS = 2;
|
|||
export const DEFAULT_RUNAWAY_MIN_INTERVAL_MS = 120_000;
|
||||
const EXECUTE_NO_PROGRESS_TOOL_WARNING = 25;
|
||||
const EXECUTE_NO_PROGRESS_TOKEN_WARNING = 500_000;
|
||||
const DURABLE_SF_ARTIFACT_PATHS = [
|
||||
".sf/milestones",
|
||||
".sf/approvals",
|
||||
".sf/DECISIONS.md",
|
||||
".sf/KNOWLEDGE.md",
|
||||
".sf/STATE.md",
|
||||
];
|
||||
|
||||
export interface RunawayGuardConfig {
|
||||
enabled: boolean;
|
||||
|
|
@ -119,16 +126,14 @@ export function resolveRunawayGuardConfig(
|
|||
tokenWarning:
|
||||
supervisor?.runaway_token_warning ?? DEFAULT_RUNAWAY_TOKEN_WARNING,
|
||||
elapsedMs:
|
||||
(supervisor?.runaway_elapsed_minutes ??
|
||||
DEFAULT_RUNAWAY_ELAPSED_MINUTES) *
|
||||
(supervisor?.runaway_elapsed_minutes ?? DEFAULT_RUNAWAY_ELAPSED_MINUTES) *
|
||||
60 *
|
||||
1000,
|
||||
changedFilesWarning:
|
||||
supervisor?.runaway_changed_files_warning ??
|
||||
DEFAULT_RUNAWAY_CHANGED_FILES_WARNING,
|
||||
diagnosticTurns:
|
||||
supervisor?.runaway_diagnostic_turns ??
|
||||
DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS,
|
||||
supervisor?.runaway_diagnostic_turns ?? DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS,
|
||||
hardPause: supervisor?.runaway_hard_pause !== false,
|
||||
minIntervalMs: DEFAULT_RUNAWAY_MIN_INTERVAL_MS,
|
||||
};
|
||||
|
|
@ -194,34 +199,102 @@ export function collectWorktreeFingerprint(cwd: string): string | null {
|
|||
.split("\n")
|
||||
.map((line) => line.trimEnd())
|
||||
.filter(Boolean);
|
||||
if (lines.length === 0) return "clean";
|
||||
|
||||
const hash = createHash("sha256");
|
||||
if (lines.length === 0) {
|
||||
hash.update("git-clean");
|
||||
hash.update("\0");
|
||||
}
|
||||
for (const line of lines) {
|
||||
hash.update(line);
|
||||
hash.update("\0");
|
||||
const filePath = parsePorcelainPath(line);
|
||||
if (!filePath) continue;
|
||||
try {
|
||||
const stat = lstatSync(`${cwd}/${filePath}`);
|
||||
if (!stat.isFile()) {
|
||||
hash.update(`type:${stat.isDirectory() ? "dir" : "other"}`);
|
||||
hash.update("\0");
|
||||
continue;
|
||||
}
|
||||
hash.update(readFileSync(`${cwd}/${filePath}`));
|
||||
hash.update("\0");
|
||||
} catch {
|
||||
hash.update("unreadable-or-deleted");
|
||||
hash.update("\0");
|
||||
}
|
||||
appendFileFingerprint(hash, cwd, filePath);
|
||||
}
|
||||
appendDurableSfArtifactFingerprint(hash, cwd);
|
||||
return hash.digest("hex");
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function appendDurableSfArtifactFingerprint(
|
||||
hash: ReturnType<typeof createHash>,
|
||||
cwd: string,
|
||||
): void {
|
||||
hash.update("sf-artifacts");
|
||||
hash.update("\0");
|
||||
for (const artifactPath of DURABLE_SF_ARTIFACT_PATHS) {
|
||||
appendPathFingerprint(hash, cwd, artifactPath);
|
||||
}
|
||||
}
|
||||
|
||||
function appendPathFingerprint(
|
||||
hash: ReturnType<typeof createHash>,
|
||||
cwd: string,
|
||||
relativePath: string,
|
||||
): void {
|
||||
const fullPath = `${cwd}/${relativePath}`;
|
||||
if (!existsSync(fullPath)) {
|
||||
hash.update(`missing:${relativePath}`);
|
||||
hash.update("\0");
|
||||
return;
|
||||
}
|
||||
|
||||
let stat: ReturnType<typeof lstatSync>;
|
||||
try {
|
||||
stat = lstatSync(fullPath);
|
||||
} catch {
|
||||
hash.update(`unreadable:${relativePath}`);
|
||||
hash.update("\0");
|
||||
return;
|
||||
}
|
||||
|
||||
if (stat.isDirectory()) {
|
||||
hash.update(`dir:${relativePath}`);
|
||||
hash.update("\0");
|
||||
let entries: string[];
|
||||
try {
|
||||
entries = readdirSync(fullPath).sort();
|
||||
} catch {
|
||||
hash.update(`unreadable-dir:${relativePath}`);
|
||||
hash.update("\0");
|
||||
return;
|
||||
}
|
||||
for (const entry of entries) {
|
||||
appendPathFingerprint(hash, cwd, `${relativePath}/${entry}`);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
appendFileFingerprint(hash, cwd, relativePath);
|
||||
}
|
||||
|
||||
function appendFileFingerprint(
|
||||
hash: ReturnType<typeof createHash>,
|
||||
cwd: string,
|
||||
relativePath: string,
|
||||
): void {
|
||||
try {
|
||||
const stat = lstatSync(`${cwd}/${relativePath}`);
|
||||
if (!stat.isFile()) {
|
||||
hash.update(
|
||||
`type:${relativePath}:${stat.isDirectory() ? "dir" : "other"}`,
|
||||
);
|
||||
hash.update("\0");
|
||||
return;
|
||||
}
|
||||
hash.update(`file:${relativePath}`);
|
||||
hash.update("\0");
|
||||
hash.update(readFileSync(`${cwd}/${relativePath}`));
|
||||
hash.update("\0");
|
||||
} catch {
|
||||
hash.update(`unreadable-or-deleted:${relativePath}`);
|
||||
hash.update("\0");
|
||||
}
|
||||
}
|
||||
|
||||
export type RunawayGuardDecision =
|
||||
| { action: "none" }
|
||||
| { action: "warn"; message: string; final: boolean }
|
||||
|
|
@ -238,7 +311,8 @@ export function evaluateRunawayGuard(
|
|||
if (config.diagnosticTurns <= 0) return { action: "none" };
|
||||
|
||||
const unitKey = `${unitType}/${unitId}`;
|
||||
if (!state || state.unitKey !== unitKey) resetRunawayGuardState(unitType, unitId);
|
||||
if (!state || state.unitKey !== unitKey)
|
||||
resetRunawayGuardState(unitType, unitId);
|
||||
const s = state!;
|
||||
const unitMetrics = normalizeMetricsToUnit(metrics, s);
|
||||
|
||||
|
|
@ -349,10 +423,7 @@ function thresholdReasons(
|
|||
`${metrics.toolCalls} tool calls (warning ${config.toolCallWarning})`,
|
||||
);
|
||||
}
|
||||
if (
|
||||
config.tokenWarning > 0 &&
|
||||
metrics.sessionTokens >= config.tokenWarning
|
||||
) {
|
||||
if (config.tokenWarning > 0 && metrics.sessionTokens >= config.tokenWarning) {
|
||||
reasons.push(
|
||||
`${formatTokenCount(metrics.sessionTokens)} session tokens (warning ${formatTokenCount(config.tokenWarning)})`,
|
||||
);
|
||||
|
|
@ -425,7 +496,9 @@ function buildRunawayGuardMessage(
|
|||
final
|
||||
? "You have already received a budget warning. Do not start new exploration. Write or update the durable artifact/handoff now, explicitly stating whether the unit was legitimately large, blocked, or stuck in a loop."
|
||||
: "Before more exploration or broad edits, state why this unit is still running: legitimately large, blocked, or stuck/churning. Then either finish the required artifact or write a precise handoff.",
|
||||
].filter(Boolean).join("\n");
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
function formatChangedFilesLine(
|
||||
|
|
@ -463,7 +536,9 @@ function parsePorcelainPath(line: string): string | null {
|
|||
let filePath = line.slice(3);
|
||||
const renameSeparator = " -> ";
|
||||
if (filePath.includes(renameSeparator)) {
|
||||
filePath = filePath.slice(filePath.lastIndexOf(renameSeparator) + renameSeparator.length);
|
||||
filePath = filePath.slice(
|
||||
filePath.lastIndexOf(renameSeparator) + renameSeparator.length,
|
||||
);
|
||||
}
|
||||
if (filePath.startsWith('"') && filePath.endsWith('"')) {
|
||||
filePath = filePath.slice(1, -1);
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ import { recordLearnedOutcome } from "../learning/runtime.js";
|
|||
import { sfRoot } from "../paths.js";
|
||||
import { resolvePersistModelChanges } from "../preferences.js";
|
||||
import {
|
||||
approveProductionMutationWithLlmPolicy,
|
||||
ensureProductionMutationApprovalTemplate,
|
||||
readProductionMutationApprovalStatus,
|
||||
} from "../production-mutation-approval.js";
|
||||
|
|
@ -1379,27 +1380,45 @@ export async function runGuards(
|
|||
"warning",
|
||||
);
|
||||
} else {
|
||||
const template = ensureProductionMutationApprovalTemplate(
|
||||
const llmApproval = approveProductionMutationWithLlmPolicy(
|
||||
approvalBasePath,
|
||||
approvalUnit,
|
||||
);
|
||||
const reasons = approval.reasons.length
|
||||
? ` Missing/invalid fields: ${approval.reasons.join("; ")}.`
|
||||
: "";
|
||||
const msg =
|
||||
`Production mutation guard: ${activeMilestone.id}/${activeSlice.id}/${activeTask.id} asks to POST unified failover against production. ` +
|
||||
`${template.created ? "Created" : "Reusing"} approval gate at ${template.path}. ` +
|
||||
`Fill it with an explicit safe server/VM target, cleanup/rollback path, and operator approval, then rerun sf headless auto.${reasons}`;
|
||||
ctx.ui.notify(msg, "error");
|
||||
deps.sendDesktopNotification(
|
||||
"SF",
|
||||
"Production mutation guard paused auto-mode",
|
||||
"warning",
|
||||
"safety",
|
||||
basename(s.originalBasePath || s.basePath),
|
||||
);
|
||||
await deps.pauseAuto(ctx, pi);
|
||||
return { action: "break", reason: "production-mutation-guard" };
|
||||
if (llmApproval.approved) {
|
||||
ctx.ui.notify(
|
||||
`Production mutation LLM approval accepted for pending-command-only smoke test ${approvalUnit.milestoneId}/${approvalUnit.sliceId}/${approvalUnit.taskId}: ${llmApproval.path}`,
|
||||
"warning",
|
||||
);
|
||||
} else {
|
||||
const template = ensureProductionMutationApprovalTemplate(
|
||||
approvalBasePath,
|
||||
approvalUnit,
|
||||
);
|
||||
const blockerReasons = [
|
||||
...approval.reasons,
|
||||
...llmApproval.reasons.map((reason) => `LLM: ${reason}`),
|
||||
];
|
||||
const reasons = blockerReasons.length
|
||||
? ` Missing/invalid fields: ${blockerReasons.join("; ")}.`
|
||||
: "";
|
||||
const msg =
|
||||
`Production mutation guard: ${activeMilestone.id}/${activeSlice.id}/${activeTask.id} asks to POST unified failover against production. ` +
|
||||
`${template.created ? "Created" : "Reusing"} approval gate at ${template.path}. ` +
|
||||
`Fill it with an explicit safe server/VM target, cleanup/rollback path, and human or LLM approval, then rerun sf headless auto.${reasons}`;
|
||||
ctx.ui.notify(msg, "error");
|
||||
deps.sendDesktopNotification(
|
||||
"SF",
|
||||
"Production mutation guard paused auto-mode",
|
||||
"warning",
|
||||
"safety",
|
||||
basename(s.originalBasePath || s.basePath),
|
||||
);
|
||||
await deps.pauseAuto(ctx, pi);
|
||||
return {
|
||||
action: "break",
|
||||
reason: "production-mutation-guard",
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,9 +25,12 @@ export interface ProductionMutationApproval {
|
|||
approval: {
|
||||
approved: boolean;
|
||||
approvedBy: string;
|
||||
approverType: "" | "human" | "llm";
|
||||
approvedAt: string;
|
||||
safeServerId: string;
|
||||
safeVmNames: string[];
|
||||
targetSelectionPlan: string;
|
||||
preMutationChecks: string[];
|
||||
cleanupPlan: string;
|
||||
rollbackPlan: string;
|
||||
notes: string;
|
||||
|
|
@ -81,21 +84,129 @@ export function buildProductionMutationApprovalTemplate(
|
|||
approval: {
|
||||
approved: false,
|
||||
approvedBy: "",
|
||||
approverType: "",
|
||||
approvedAt: "",
|
||||
safeServerId: "",
|
||||
safeVmNames: [],
|
||||
targetSelectionPlan: "",
|
||||
preMutationChecks: [],
|
||||
cleanupPlan: "",
|
||||
rollbackPlan: "",
|
||||
notes: "",
|
||||
},
|
||||
instructions: [
|
||||
"Set status to approved and approval.approved to true only after selecting a safe non-customer-impacting target.",
|
||||
"Fill approvedBy, approvedAt, safeServerId, safeVmNames, cleanupPlan, and rollbackPlan.",
|
||||
"Human and LLM approvals are both valid when they name a safe non-customer-impacting target or a constrained target-selection plan.",
|
||||
"Fill status, approval.approved, approvedBy, approverType, approvedAt, safeServerId or targetSelectionPlan, safeVmNames or targetSelectionPlan, cleanupPlan, and rollbackPlan.",
|
||||
"Then rerun sf headless auto.",
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
function normalizedTask(unit: ProductionMutationUnit): string {
|
||||
return [unit.taskTitle, unit.taskText].join("\n").toLowerCase();
|
||||
}
|
||||
|
||||
export function assessLlmProductionMutationApproval(
|
||||
unit: ProductionMutationUnit,
|
||||
): { approved: boolean; reasons: string[] } {
|
||||
const text = normalizedTask(unit);
|
||||
const reasons: string[] = [];
|
||||
|
||||
if (
|
||||
!(
|
||||
text.includes("status=pending") ||
|
||||
text.includes("pending state") ||
|
||||
text.includes("pending command") ||
|
||||
text.includes("command row")
|
||||
)
|
||||
) {
|
||||
reasons.push("task is not constrained to pending command verification");
|
||||
}
|
||||
if (
|
||||
!(
|
||||
text.includes("do not trigger actual failover") ||
|
||||
text.includes("do not trigger real failover")
|
||||
)
|
||||
) {
|
||||
reasons.push("task does not explicitly forbid actual failover");
|
||||
}
|
||||
if (
|
||||
!(
|
||||
text.includes("routing table is unchanged") ||
|
||||
text.includes("gateway routing table is unchanged") ||
|
||||
text.includes("no active_dr")
|
||||
)
|
||||
) {
|
||||
reasons.push("task does not require no routing/runtime failover changes");
|
||||
}
|
||||
if (!(text.includes("test server_id") || text.includes("test server id"))) {
|
||||
reasons.push("task does not require a test server target");
|
||||
}
|
||||
if (!(text.includes("vm_names") || text.includes("test vm"))) {
|
||||
reasons.push("task does not require a test VM target");
|
||||
}
|
||||
|
||||
return { approved: reasons.length === 0, reasons };
|
||||
}
|
||||
|
||||
export function buildLlmProductionMutationApproval(
|
||||
unit: ProductionMutationUnit,
|
||||
approvedAt: Date = new Date(),
|
||||
): ProductionMutationApproval {
|
||||
return {
|
||||
...buildProductionMutationApprovalTemplate(unit),
|
||||
status: "approved",
|
||||
approval: {
|
||||
approved: true,
|
||||
approvedBy: "sf-llm:auto-production-mutation-policy",
|
||||
approverType: "llm",
|
||||
approvedAt: approvedAt.toISOString(),
|
||||
safeServerId: "agent-selected-non-customer-impacting-test-server",
|
||||
safeVmNames: ["agent-selected-non-customer-impacting-test-vm"],
|
||||
targetSelectionPlan:
|
||||
"Before POST, inspect portal/DB state and select only a non-customer-impacting test server_id and test VM. If no safe test target exists, stop and report instead of posting.",
|
||||
preMutationChecks: [
|
||||
"Confirm the task is pending-command-only validation.",
|
||||
"Confirm the selected target is not a real customer workload.",
|
||||
"Confirm no actual failover, active_dr update, or gateway route change is expected.",
|
||||
"Capture the command id created by the smoke POST for cleanup/audit.",
|
||||
],
|
||||
cleanupPlan:
|
||||
"After evidence capture, leave no actionable production failover command behind. If the row remains pending and a supported cancel/delete path exists, cancel/delete that smoke command; otherwise stop and escalate with the command id before any agent can execute it.",
|
||||
rollbackPlan:
|
||||
"Do not mutate gateway routing or active_dr state. If any route or active_dr state changes, stop immediately, restore the previous route/state from captured pre-checks, and report the incident.",
|
||||
notes:
|
||||
"LLM auto-approval is limited to this pending-command-only smoke test contract. It is not approval for real VM failover.",
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function approveProductionMutationWithLlmPolicy(
|
||||
basePath: string,
|
||||
unit: ProductionMutationUnit,
|
||||
approvedAt: Date = new Date(),
|
||||
): { path: string; approved: boolean; reasons: string[]; wrote: boolean } {
|
||||
const path = productionMutationApprovalPath(basePath, unit);
|
||||
const assessment = assessLlmProductionMutationApproval(unit);
|
||||
if (!assessment.approved) {
|
||||
return {
|
||||
path,
|
||||
approved: false,
|
||||
reasons: assessment.reasons,
|
||||
wrote: false,
|
||||
};
|
||||
}
|
||||
atomicWriteSync(
|
||||
path,
|
||||
JSON.stringify(
|
||||
buildLlmProductionMutationApproval(unit, approvedAt),
|
||||
null,
|
||||
2,
|
||||
) + "\n",
|
||||
);
|
||||
return { path, approved: true, reasons: [], wrote: true };
|
||||
}
|
||||
|
||||
export function ensureProductionMutationApprovalTemplate(
|
||||
basePath: string,
|
||||
unit: ProductionMutationUnit,
|
||||
|
|
@ -158,10 +269,17 @@ export function validateProductionMutationApproval(
|
|||
if (approval.approved !== true) {
|
||||
reasons.push("approval.approved must be true");
|
||||
}
|
||||
if (
|
||||
"approverType" in approval &&
|
||||
approval.approverType !== "" &&
|
||||
approval.approverType !== "human" &&
|
||||
approval.approverType !== "llm"
|
||||
) {
|
||||
reasons.push("approval.approverType must be human or llm");
|
||||
}
|
||||
for (const field of [
|
||||
"approvedBy",
|
||||
"approvedAt",
|
||||
"safeServerId",
|
||||
"cleanupPlan",
|
||||
"rollbackPlan",
|
||||
] as const) {
|
||||
|
|
@ -169,8 +287,14 @@ export function validateProductionMutationApproval(
|
|||
reasons.push(`approval.${field} is required`);
|
||||
}
|
||||
}
|
||||
if (!nonEmptyStringArray(approval.safeVmNames)) {
|
||||
reasons.push("approval.safeVmNames must contain at least one VM name");
|
||||
const hasConcreteTarget =
|
||||
nonEmptyString(approval.safeServerId) &&
|
||||
nonEmptyStringArray(approval.safeVmNames);
|
||||
const hasTargetSelectionPlan = nonEmptyString(approval.targetSelectionPlan);
|
||||
if (!hasConcreteTarget && !hasTargetSelectionPlan) {
|
||||
reasons.push(
|
||||
"approval must include safeServerId and safeVmNames, or targetSelectionPlan",
|
||||
);
|
||||
}
|
||||
|
||||
return { approved: reasons.length === 0, reasons };
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import test from "node:test";
|
||||
|
|
@ -13,7 +13,9 @@ import {
|
|||
resetRunawayGuardState,
|
||||
} from "../auto-runaway-guard.ts";
|
||||
|
||||
function config(overrides: Partial<RunawayGuardConfig> = {}): RunawayGuardConfig {
|
||||
function config(
|
||||
overrides: Partial<RunawayGuardConfig> = {},
|
||||
): RunawayGuardConfig {
|
||||
return {
|
||||
enabled: true,
|
||||
toolCallWarning: 60,
|
||||
|
|
@ -53,7 +55,10 @@ test("runaway guard warns on first unit-relative threshold crossing", () => {
|
|||
assert.equal(decision.final, false);
|
||||
assert.match(decision.message, /61 tool calls/);
|
||||
assert.match(decision.message, /1\.00M session tokens/);
|
||||
assert.match(decision.message, /legitimately large, blocked, or stuck\/churning/);
|
||||
assert.match(
|
||||
decision.message,
|
||||
/legitimately large, blocked, or stuck\/churning/,
|
||||
);
|
||||
});
|
||||
|
||||
test("runaway guard pauses after diagnostic turns when budget keeps growing", () => {
|
||||
|
|
@ -141,7 +146,10 @@ test("runaway guard catches changed-file churn relative to unit start", () => {
|
|||
assert.equal(decision.action, "warn");
|
||||
if (decision.action !== "warn") throw new Error("expected warning");
|
||||
assert.match(decision.message, /76 new changed files/);
|
||||
assert.match(decision.message, /Active edits are not automatically healthy progress/);
|
||||
assert.match(
|
||||
decision.message,
|
||||
/Active edits are not automatically healthy progress/,
|
||||
);
|
||||
});
|
||||
|
||||
test("runaway guard warns early when execute-task spends budget with no new file changes", () => {
|
||||
|
|
@ -252,6 +260,52 @@ test("worktree fingerprint changes when untracked file content changes", () => {
|
|||
}
|
||||
});
|
||||
|
||||
test("worktree fingerprint changes when ignored durable sf artifacts change", () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), "sf-runaway-"));
|
||||
try {
|
||||
execFileSync("git", ["init"], {
|
||||
cwd: dir,
|
||||
stdio: ["ignore", "ignore", "ignore"],
|
||||
});
|
||||
writeFileSync(join(dir, ".gitignore"), ".sf/\n");
|
||||
execFileSync("git", ["add", ".gitignore"], {
|
||||
cwd: dir,
|
||||
stdio: ["ignore", "ignore", "ignore"],
|
||||
});
|
||||
execFileSync("git", ["commit", "-m", "init"], {
|
||||
cwd: dir,
|
||||
env: {
|
||||
...process.env,
|
||||
GIT_AUTHOR_NAME: "SF Test",
|
||||
GIT_AUTHOR_EMAIL: "sf@example.com",
|
||||
GIT_COMMITTER_NAME: "SF Test",
|
||||
GIT_COMMITTER_EMAIL: "sf@example.com",
|
||||
},
|
||||
stdio: ["ignore", "ignore", "ignore"],
|
||||
});
|
||||
mkdirSync(join(dir, ".sf", "milestones", "M004", "slices", "S03"), {
|
||||
recursive: true,
|
||||
});
|
||||
writeFileSync(
|
||||
join(dir, ".sf", "milestones", "M004", "slices", "S03", "evidence.md"),
|
||||
"before\n",
|
||||
);
|
||||
|
||||
const before = collectWorktreeFingerprint(dir);
|
||||
writeFileSync(
|
||||
join(dir, ".sf", "milestones", "M004", "slices", "S03", "evidence.md"),
|
||||
"after\n",
|
||||
);
|
||||
const after = collectWorktreeFingerprint(dir);
|
||||
|
||||
assert.ok(before);
|
||||
assert.ok(after);
|
||||
assert.notEqual(after, before);
|
||||
} finally {
|
||||
rmSync(dir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("runaway guard is disabled when configured off", () => {
|
||||
clearRunawayGuardState();
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,8 @@ import { join } from "node:path";
|
|||
import test from "node:test";
|
||||
|
||||
import {
|
||||
approveProductionMutationWithLlmPolicy,
|
||||
assessLlmProductionMutationApproval,
|
||||
ensureProductionMutationApprovalTemplate,
|
||||
type ProductionMutationApproval,
|
||||
type ProductionMutationUnit,
|
||||
|
|
@ -32,6 +34,8 @@ const unit: ProductionMutationUnit = {
|
|||
"Execute smoke test against production NixOS Hetzner infrastructure.",
|
||||
"POST to /action/unified-failover with test server_id and vm_names.",
|
||||
"Verify command row was created with status=pending.",
|
||||
"Verify gateway routing table is unchanged and no active_dr updates occur.",
|
||||
"NOTE: Do NOT trigger actual failover on real VMs.",
|
||||
].join("\n"),
|
||||
};
|
||||
|
||||
|
|
@ -50,7 +54,9 @@ test("production mutation approval writes a pending template once", () => {
|
|||
assert.equal(parsed.unitId, "M004/S03/T01");
|
||||
assert.equal(parsed.status, "pending");
|
||||
assert.equal(parsed.approval.approved, false);
|
||||
assert.equal(parsed.approval.approverType, "");
|
||||
assert.deepEqual(parsed.approval.safeVmNames, []);
|
||||
assert.deepEqual(parsed.approval.preMutationChecks, []);
|
||||
|
||||
const second = ensureProductionMutationApprovalTemplate(basePath, unit);
|
||||
assert.equal(second.created, false);
|
||||
|
|
@ -70,9 +76,12 @@ test("production mutation approval rejects incomplete approval files", () => {
|
|||
approval: {
|
||||
approved: true,
|
||||
approvedBy: "",
|
||||
approverType: "",
|
||||
approvedAt: "",
|
||||
safeServerId: "",
|
||||
safeVmNames: [],
|
||||
targetSelectionPlan: "",
|
||||
preMutationChecks: [],
|
||||
cleanupPlan: "",
|
||||
rollbackPlan: "",
|
||||
},
|
||||
|
|
@ -85,7 +94,7 @@ test("production mutation approval rejects incomplete approval files", () => {
|
|||
assert.ok(result.reasons.includes("approval.approvedBy is required"));
|
||||
assert.ok(
|
||||
result.reasons.includes(
|
||||
"approval.safeVmNames must contain at least one VM name",
|
||||
"approval must include safeServerId and safeVmNames, or targetSelectionPlan",
|
||||
),
|
||||
);
|
||||
});
|
||||
|
|
@ -106,9 +115,12 @@ test("production mutation approval accepts complete explicit approval", () => {
|
|||
approval: {
|
||||
approved: true,
|
||||
approvedBy: "operator@example.com",
|
||||
approverType: "human",
|
||||
approvedAt: "2026-04-30T10:00:00.000Z",
|
||||
safeServerId: "test-server-id",
|
||||
safeVmNames: ["pilot-smoke-vm"],
|
||||
targetSelectionPlan: "",
|
||||
preMutationChecks: ["Confirm target is non-customer-impacting."],
|
||||
cleanupPlan: "Delete the test pending command after verification.",
|
||||
rollbackPlan: "Abort the test failover and restore the previous route.",
|
||||
notes: "Smoke target is non-customer-impacting.",
|
||||
|
|
@ -125,3 +137,57 @@ test("production mutation approval accepts complete explicit approval", () => {
|
|||
rmSync(basePath, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("production mutation approval auto-fills LLM approval for pending-command-only smoke tests", () => {
|
||||
const basePath = tempBase();
|
||||
try {
|
||||
const approvedAt = new Date("2026-04-30T10:05:00.000Z");
|
||||
const result = approveProductionMutationWithLlmPolicy(
|
||||
basePath,
|
||||
unit,
|
||||
approvedAt,
|
||||
);
|
||||
assert.equal(result.approved, true);
|
||||
assert.equal(result.wrote, true);
|
||||
assert.deepEqual(result.reasons, []);
|
||||
|
||||
const status = readProductionMutationApprovalStatus(basePath, unit);
|
||||
assert.equal(status.approved, true);
|
||||
|
||||
const parsed = JSON.parse(
|
||||
readFileSync(result.path, "utf-8"),
|
||||
) as ProductionMutationApproval;
|
||||
assert.equal(parsed.status, "approved");
|
||||
assert.equal(parsed.approval.approverType, "llm");
|
||||
assert.equal(
|
||||
parsed.approval.approvedBy,
|
||||
"sf-llm:auto-production-mutation-policy",
|
||||
);
|
||||
assert.equal(parsed.approval.approvedAt, "2026-04-30T10:05:00.000Z");
|
||||
assert.ok(parsed.approval.targetSelectionPlan.length > 0);
|
||||
assert.ok(parsed.approval.preMutationChecks.length >= 3);
|
||||
} finally {
|
||||
rmSync(basePath, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("production mutation approval does not auto-approve real failover tasks", () => {
|
||||
const realFailoverUnit: ProductionMutationUnit = {
|
||||
...unit,
|
||||
taskText:
|
||||
"POST to /action/unified-failover against production and verify the VM fails over.",
|
||||
};
|
||||
|
||||
const assessment = assessLlmProductionMutationApproval(realFailoverUnit);
|
||||
assert.equal(assessment.approved, false);
|
||||
assert.ok(
|
||||
assessment.reasons.includes(
|
||||
"task is not constrained to pending command verification",
|
||||
),
|
||||
);
|
||||
assert.ok(
|
||||
assessment.reasons.includes(
|
||||
"task does not explicitly forbid actual failover",
|
||||
),
|
||||
);
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue