fix: Let safe smoke tasks use LLM approval

2026-04-30 13:11:26 +02:00 · 2026-04-30 13:11:26 +02:00 · 0f27ffe865
commit 0f27ffe865
parent 085d3b7705
5 changed files with 392 additions and 54 deletions
--- a/src/resources/extensions/sf/auto-runaway-guard.ts
+++ b/src/resources/extensions/sf/auto-runaway-guard.ts
@ -8,7 +8,7 @@

 import { execFileSync } from "node:child_process";
 import { createHash } from "node:crypto";
-import { lstatSync, readFileSync } from "node:fs";
+import { existsSync, lstatSync, readdirSync, readFileSync } from "node:fs";
 import type { ExtensionContext } from "@singularity-forge/pi-coding-agent";
 import { formatTokenCount } from "../shared/format-utils.js";
 import type { AutoSupervisorConfig } from "./preferences-types.js";
@ -21,6 +21,13 @@ export const DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS = 2;
 export const DEFAULT_RUNAWAY_MIN_INTERVAL_MS = 120_000;
 const EXECUTE_NO_PROGRESS_TOOL_WARNING = 25;
 const EXECUTE_NO_PROGRESS_TOKEN_WARNING = 500_000;
+const DURABLE_SF_ARTIFACT_PATHS = [
+	".sf/milestones",
+	".sf/approvals",
+	".sf/DECISIONS.md",
+	".sf/KNOWLEDGE.md",
+	".sf/STATE.md",
+];

 export interface RunawayGuardConfig {
 	enabled: boolean;
@ -119,16 +126,14 @@ export function resolveRunawayGuardConfig(
 		tokenWarning:
 			supervisor?.runaway_token_warning ?? DEFAULT_RUNAWAY_TOKEN_WARNING,
 		elapsedMs:
-			(supervisor?.runaway_elapsed_minutes ??
-				DEFAULT_RUNAWAY_ELAPSED_MINUTES) *
+			(supervisor?.runaway_elapsed_minutes ?? DEFAULT_RUNAWAY_ELAPSED_MINUTES) *
 			60 *
 			1000,
 		changedFilesWarning:
 			supervisor?.runaway_changed_files_warning ??
 			DEFAULT_RUNAWAY_CHANGED_FILES_WARNING,
 		diagnosticTurns:
-			supervisor?.runaway_diagnostic_turns ??
-			DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS,
+			supervisor?.runaway_diagnostic_turns ?? DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS,
 		hardPause: supervisor?.runaway_hard_pause !== false,
 		minIntervalMs: DEFAULT_RUNAWAY_MIN_INTERVAL_MS,
 	};
@ -194,34 +199,102 @@ export function collectWorktreeFingerprint(cwd: string): string | null {
 			.split("\n")
 			.map((line) => line.trimEnd())
 			.filter(Boolean);
-		if (lines.length === 0) return "clean";

 		const hash = createHash("sha256");
+		if (lines.length === 0) {
+			hash.update("git-clean");
+			hash.update("\0");
+		}
 		for (const line of lines) {
 			hash.update(line);
 			hash.update("\0");
 			const filePath = parsePorcelainPath(line);
 			if (!filePath) continue;
-			try {
-				const stat = lstatSync(`${cwd}/${filePath}`);
-				if (!stat.isFile()) {
-					hash.update(`type:${stat.isDirectory() ? "dir" : "other"}`);
-					hash.update("\0");
-					continue;
-				}
-				hash.update(readFileSync(`${cwd}/${filePath}`));
-				hash.update("\0");
-			} catch {
-				hash.update("unreadable-or-deleted");
-				hash.update("\0");
-			}
+			appendFileFingerprint(hash, cwd, filePath);
 		}
+		appendDurableSfArtifactFingerprint(hash, cwd);
 		return hash.digest("hex");
 	} catch {
 		return null;
 	}
 }

+function appendDurableSfArtifactFingerprint(
+	hash: ReturnType<typeof createHash>,
+	cwd: string,
+): void {
+	hash.update("sf-artifacts");
+	hash.update("\0");
+	for (const artifactPath of DURABLE_SF_ARTIFACT_PATHS) {
+		appendPathFingerprint(hash, cwd, artifactPath);
+	}
+}
+
+function appendPathFingerprint(
+	hash: ReturnType<typeof createHash>,
+	cwd: string,
+	relativePath: string,
+): void {
+	const fullPath = `${cwd}/${relativePath}`;
+	if (!existsSync(fullPath)) {
+		hash.update(`missing:${relativePath}`);
+		hash.update("\0");
+		return;
+	}
+
+	let stat: ReturnType<typeof lstatSync>;
+	try {
+		stat = lstatSync(fullPath);
+	} catch {
+		hash.update(`unreadable:${relativePath}`);
+		hash.update("\0");
+		return;
+	}
+
+	if (stat.isDirectory()) {
+		hash.update(`dir:${relativePath}`);
+		hash.update("\0");
+		let entries: string[];
+		try {
+			entries = readdirSync(fullPath).sort();
+		} catch {
+			hash.update(`unreadable-dir:${relativePath}`);
+			hash.update("\0");
+			return;
+		}
+		for (const entry of entries) {
+			appendPathFingerprint(hash, cwd, `${relativePath}/${entry}`);
+		}
+		return;
+	}
+
+	appendFileFingerprint(hash, cwd, relativePath);
+}
+
+function appendFileFingerprint(
+	hash: ReturnType<typeof createHash>,
+	cwd: string,
+	relativePath: string,
+): void {
+	try {
+		const stat = lstatSync(`${cwd}/${relativePath}`);
+		if (!stat.isFile()) {
+			hash.update(
+				`type:${relativePath}:${stat.isDirectory() ? "dir" : "other"}`,
+			);
+			hash.update("\0");
+			return;
+		}
+		hash.update(`file:${relativePath}`);
+		hash.update("\0");
+		hash.update(readFileSync(`${cwd}/${relativePath}`));
+		hash.update("\0");
+	} catch {
+		hash.update(`unreadable-or-deleted:${relativePath}`);
+		hash.update("\0");
+	}
+}
+
 export type RunawayGuardDecision =
 	| { action: "none" }
 	| { action: "warn"; message: string; final: boolean }
@ -238,7 +311,8 @@ export function evaluateRunawayGuard(
 	if (config.diagnosticTurns <= 0) return { action: "none" };

 	const unitKey = `${unitType}/${unitId}`;
-	if (!state || state.unitKey !== unitKey) resetRunawayGuardState(unitType, unitId);
+	if (!state || state.unitKey !== unitKey)
+		resetRunawayGuardState(unitType, unitId);
 	const s = state!;
 	const unitMetrics = normalizeMetricsToUnit(metrics, s);

@ -349,10 +423,7 @@ function thresholdReasons(
 			`${metrics.toolCalls} tool calls (warning ${config.toolCallWarning})`,
 		);
 	}
-	if (
-		config.tokenWarning > 0 &&
-		metrics.sessionTokens >= config.tokenWarning
-	) {
+	if (config.tokenWarning > 0 && metrics.sessionTokens >= config.tokenWarning) {
 		reasons.push(
 			`${formatTokenCount(metrics.sessionTokens)} session tokens (warning ${formatTokenCount(config.tokenWarning)})`,
 		);
@ -425,7 +496,9 @@ function buildRunawayGuardMessage(
 		final
 			? "You have already received a budget warning. Do not start new exploration. Write or update the durable artifact/handoff now, explicitly stating whether the unit was legitimately large, blocked, or stuck in a loop."
 			: "Before more exploration or broad edits, state why this unit is still running: legitimately large, blocked, or stuck/churning. Then either finish the required artifact or write a precise handoff.",
-	].filter(Boolean).join("\n");
+	]
+		.filter(Boolean)
+		.join("\n");
 }

 function formatChangedFilesLine(
@ -463,7 +536,9 @@ function parsePorcelainPath(line: string): string | null {
 	let filePath = line.slice(3);
 	const renameSeparator = " -> ";
 	if (filePath.includes(renameSeparator)) {
-		filePath = filePath.slice(filePath.lastIndexOf(renameSeparator) + renameSeparator.length);
+		filePath = filePath.slice(
+			filePath.lastIndexOf(renameSeparator) + renameSeparator.length,
+		);
 	}
 	if (filePath.startsWith('"') && filePath.endsWith('"')) {
 		filePath = filePath.slice(1, -1);
--- a/src/resources/extensions/sf/auto/phases.ts
+++ b/src/resources/extensions/sf/auto/phases.ts
@ -46,6 +46,7 @@ import { recordLearnedOutcome } from "../learning/runtime.js";
 import { sfRoot } from "../paths.js";
 import { resolvePersistModelChanges } from "../preferences.js";
 import {
+	approveProductionMutationWithLlmPolicy,
 	ensureProductionMutationApprovalTemplate,
 	readProductionMutationApprovalStatus,
 } from "../production-mutation-approval.js";
@ -1379,27 +1380,45 @@ export async function runGuards(
 								"warning",
 							);
 						} else {
-							const template = ensureProductionMutationApprovalTemplate(
+							const llmApproval = approveProductionMutationWithLlmPolicy(
 								approvalBasePath,
 								approvalUnit,
 							);
-							const reasons = approval.reasons.length
-								? ` Missing/invalid fields: ${approval.reasons.join("; ")}.`
-								: "";
-							const msg =
-								`Production mutation guard: ${activeMilestone.id}/${activeSlice.id}/${activeTask.id} asks to POST unified failover against production. ` +
-								`${template.created ? "Created" : "Reusing"} approval gate at ${template.path}. ` +
-								`Fill it with an explicit safe server/VM target, cleanup/rollback path, and operator approval, then rerun sf headless auto.${reasons}`;
-							ctx.ui.notify(msg, "error");
-							deps.sendDesktopNotification(
-								"SF",
-								"Production mutation guard paused auto-mode",
-								"warning",
-								"safety",
-								basename(s.originalBasePath || s.basePath),
-							);
-							await deps.pauseAuto(ctx, pi);
-							return { action: "break", reason: "production-mutation-guard" };
+							if (llmApproval.approved) {
+								ctx.ui.notify(
+									`Production mutation LLM approval accepted for pending-command-only smoke test ${approvalUnit.milestoneId}/${approvalUnit.sliceId}/${approvalUnit.taskId}: ${llmApproval.path}`,
+									"warning",
+								);
+							} else {
+								const template = ensureProductionMutationApprovalTemplate(
+									approvalBasePath,
+									approvalUnit,
+								);
+								const blockerReasons = [
+									...approval.reasons,
+									...llmApproval.reasons.map((reason) => `LLM: ${reason}`),
+								];
+								const reasons = blockerReasons.length
+									? ` Missing/invalid fields: ${blockerReasons.join("; ")}.`
+									: "";
+								const msg =
+									`Production mutation guard: ${activeMilestone.id}/${activeSlice.id}/${activeTask.id} asks to POST unified failover against production. ` +
+									`${template.created ? "Created" : "Reusing"} approval gate at ${template.path}. ` +
+									`Fill it with an explicit safe server/VM target, cleanup/rollback path, and human or LLM approval, then rerun sf headless auto.${reasons}`;
+								ctx.ui.notify(msg, "error");
+								deps.sendDesktopNotification(
+									"SF",
+									"Production mutation guard paused auto-mode",
+									"warning",
+									"safety",
+									basename(s.originalBasePath || s.basePath),
+								);
+								await deps.pauseAuto(ctx, pi);
+								return {
+									action: "break",
+									reason: "production-mutation-guard",
+								};
+							}
 						}
 					}
 				}
--- a/src/resources/extensions/sf/production-mutation-approval.ts
+++ b/src/resources/extensions/sf/production-mutation-approval.ts
@ -25,9 +25,12 @@ export interface ProductionMutationApproval {
 	approval: {
 		approved: boolean;
 		approvedBy: string;
+		approverType: "" | "human" | "llm";
 		approvedAt: string;
 		safeServerId: string;
 		safeVmNames: string[];
+		targetSelectionPlan: string;
+		preMutationChecks: string[];
 		cleanupPlan: string;
 		rollbackPlan: string;
 		notes: string;
@ -81,21 +84,129 @@ export function buildProductionMutationApprovalTemplate(
 		approval: {
 			approved: false,
 			approvedBy: "",
+			approverType: "",
 			approvedAt: "",
 			safeServerId: "",
 			safeVmNames: [],
+			targetSelectionPlan: "",
+			preMutationChecks: [],
 			cleanupPlan: "",
 			rollbackPlan: "",
 			notes: "",
 		},
 		instructions: [
-			"Set status to approved and approval.approved to true only after selecting a safe non-customer-impacting target.",
-			"Fill approvedBy, approvedAt, safeServerId, safeVmNames, cleanupPlan, and rollbackPlan.",
+			"Human and LLM approvals are both valid when they name a safe non-customer-impacting target or a constrained target-selection plan.",
+			"Fill status, approval.approved, approvedBy, approverType, approvedAt, safeServerId or targetSelectionPlan, safeVmNames or targetSelectionPlan, cleanupPlan, and rollbackPlan.",
 			"Then rerun sf headless auto.",
 		],
 	};
 }

+function normalizedTask(unit: ProductionMutationUnit): string {
+	return [unit.taskTitle, unit.taskText].join("\n").toLowerCase();
+}
+
+export function assessLlmProductionMutationApproval(
+	unit: ProductionMutationUnit,
+): { approved: boolean; reasons: string[] } {
+	const text = normalizedTask(unit);
+	const reasons: string[] = [];
+
+	if (
+		!(
+			text.includes("status=pending") ||
+			text.includes("pending state") ||
+			text.includes("pending command") ||
+			text.includes("command row")
+		)
+	) {
+		reasons.push("task is not constrained to pending command verification");
+	}
+	if (
+		!(
+			text.includes("do not trigger actual failover") ||
+			text.includes("do not trigger real failover")
+		)
+	) {
+		reasons.push("task does not explicitly forbid actual failover");
+	}
+	if (
+		!(
+			text.includes("routing table is unchanged") ||
+			text.includes("gateway routing table is unchanged") ||
+			text.includes("no active_dr")
+		)
+	) {
+		reasons.push("task does not require no routing/runtime failover changes");
+	}
+	if (!(text.includes("test server_id") || text.includes("test server id"))) {
+		reasons.push("task does not require a test server target");
+	}
+	if (!(text.includes("vm_names") || text.includes("test vm"))) {
+		reasons.push("task does not require a test VM target");
+	}
+
+	return { approved: reasons.length === 0, reasons };
+}
+
+export function buildLlmProductionMutationApproval(
+	unit: ProductionMutationUnit,
+	approvedAt: Date = new Date(),
+): ProductionMutationApproval {
+	return {
+		...buildProductionMutationApprovalTemplate(unit),
+		status: "approved",
+		approval: {
+			approved: true,
+			approvedBy: "sf-llm:auto-production-mutation-policy",
+			approverType: "llm",
+			approvedAt: approvedAt.toISOString(),
+			safeServerId: "agent-selected-non-customer-impacting-test-server",
+			safeVmNames: ["agent-selected-non-customer-impacting-test-vm"],
+			targetSelectionPlan:
+				"Before POST, inspect portal/DB state and select only a non-customer-impacting test server_id and test VM. If no safe test target exists, stop and report instead of posting.",
+			preMutationChecks: [
+				"Confirm the task is pending-command-only validation.",
+				"Confirm the selected target is not a real customer workload.",
+				"Confirm no actual failover, active_dr update, or gateway route change is expected.",
+				"Capture the command id created by the smoke POST for cleanup/audit.",
+			],
+			cleanupPlan:
+				"After evidence capture, leave no actionable production failover command behind. If the row remains pending and a supported cancel/delete path exists, cancel/delete that smoke command; otherwise stop and escalate with the command id before any agent can execute it.",
+			rollbackPlan:
+				"Do not mutate gateway routing or active_dr state. If any route or active_dr state changes, stop immediately, restore the previous route/state from captured pre-checks, and report the incident.",
+			notes:
+				"LLM auto-approval is limited to this pending-command-only smoke test contract. It is not approval for real VM failover.",
+		},
+	};
+}
+
+export function approveProductionMutationWithLlmPolicy(
+	basePath: string,
+	unit: ProductionMutationUnit,
+	approvedAt: Date = new Date(),
+): { path: string; approved: boolean; reasons: string[]; wrote: boolean } {
+	const path = productionMutationApprovalPath(basePath, unit);
+	const assessment = assessLlmProductionMutationApproval(unit);
+	if (!assessment.approved) {
+		return {
+			path,
+			approved: false,
+			reasons: assessment.reasons,
+			wrote: false,
+		};
+	}
+	atomicWriteSync(
+		path,
+		JSON.stringify(
+			buildLlmProductionMutationApproval(unit, approvedAt),
+			null,
+			2,
+		) + "\n",
+	);
+	return { path, approved: true, reasons: [], wrote: true };
+}
+
 export function ensureProductionMutationApprovalTemplate(
 	basePath: string,
 	unit: ProductionMutationUnit,
@ -158,10 +269,17 @@ export function validateProductionMutationApproval(
 	if (approval.approved !== true) {
 		reasons.push("approval.approved must be true");
 	}
+	if (
+		"approverType" in approval &&
+		approval.approverType !== "" &&
+		approval.approverType !== "human" &&
+		approval.approverType !== "llm"
+	) {
+		reasons.push("approval.approverType must be human or llm");
+	}
 	for (const field of [
 		"approvedBy",
 		"approvedAt",
-		"safeServerId",
 		"cleanupPlan",
 		"rollbackPlan",
 	] as const) {
@ -169,8 +287,14 @@ export function validateProductionMutationApproval(
 			reasons.push(`approval.${field} is required`);
 		}
 	}
-	if (!nonEmptyStringArray(approval.safeVmNames)) {
-		reasons.push("approval.safeVmNames must contain at least one VM name");
+	const hasConcreteTarget =
+		nonEmptyString(approval.safeServerId) &&
+		nonEmptyStringArray(approval.safeVmNames);
+	const hasTargetSelectionPlan = nonEmptyString(approval.targetSelectionPlan);
+	if (!hasConcreteTarget && !hasTargetSelectionPlan) {
+		reasons.push(
+			"approval must include safeServerId and safeVmNames, or targetSelectionPlan",
+		);
 	}

 	return { approved: reasons.length === 0, reasons };
--- a/src/resources/extensions/sf/tests/auto-runaway-guard.test.ts
+++ b/src/resources/extensions/sf/tests/auto-runaway-guard.test.ts
@ -1,6 +1,6 @@
 import assert from "node:assert/strict";
 import { execFileSync } from "node:child_process";
-import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import test from "node:test";
@ -13,7 +13,9 @@ import {
 	resetRunawayGuardState,
 } from "../auto-runaway-guard.ts";

-function config(overrides: Partial<RunawayGuardConfig> = {}): RunawayGuardConfig {
+function config(
+	overrides: Partial<RunawayGuardConfig> = {},
+): RunawayGuardConfig {
 	return {
 		enabled: true,
 		toolCallWarning: 60,
@ -53,7 +55,10 @@ test("runaway guard warns on first unit-relative threshold crossing", () => {
 	assert.equal(decision.final, false);
 	assert.match(decision.message, /61 tool calls/);
 	assert.match(decision.message, /1\.00M session tokens/);
-	assert.match(decision.message, /legitimately large, blocked, or stuck\/churning/);
+	assert.match(
+		decision.message,
+		/legitimately large, blocked, or stuck\/churning/,
+	);
 });

 test("runaway guard pauses after diagnostic turns when budget keeps growing", () => {
@ -141,7 +146,10 @@ test("runaway guard catches changed-file churn relative to unit start", () => {
 	assert.equal(decision.action, "warn");
 	if (decision.action !== "warn") throw new Error("expected warning");
 	assert.match(decision.message, /76 new changed files/);
-	assert.match(decision.message, /Active edits are not automatically healthy progress/);
+	assert.match(
+		decision.message,
+		/Active edits are not automatically healthy progress/,
+	);
 });

 test("runaway guard warns early when execute-task spends budget with no new file changes", () => {
@ -252,6 +260,52 @@ test("worktree fingerprint changes when untracked file content changes", () => {
 	}
 });

+test("worktree fingerprint changes when ignored durable sf artifacts change", () => {
+	const dir = mkdtempSync(join(tmpdir(), "sf-runaway-"));
+	try {
+		execFileSync("git", ["init"], {
+			cwd: dir,
+			stdio: ["ignore", "ignore", "ignore"],
+		});
+		writeFileSync(join(dir, ".gitignore"), ".sf/\n");
+		execFileSync("git", ["add", ".gitignore"], {
+			cwd: dir,
+			stdio: ["ignore", "ignore", "ignore"],
+		});
+		execFileSync("git", ["commit", "-m", "init"], {
+			cwd: dir,
+			env: {
+				...process.env,
+				GIT_AUTHOR_NAME: "SF Test",
+				GIT_AUTHOR_EMAIL: "sf@example.com",
+				GIT_COMMITTER_NAME: "SF Test",
+				GIT_COMMITTER_EMAIL: "sf@example.com",
+			},
+			stdio: ["ignore", "ignore", "ignore"],
+		});
+		mkdirSync(join(dir, ".sf", "milestones", "M004", "slices", "S03"), {
+			recursive: true,
+		});
+		writeFileSync(
+			join(dir, ".sf", "milestones", "M004", "slices", "S03", "evidence.md"),
+			"before\n",
+		);
+
+		const before = collectWorktreeFingerprint(dir);
+		writeFileSync(
+			join(dir, ".sf", "milestones", "M004", "slices", "S03", "evidence.md"),
+			"after\n",
+		);
+		const after = collectWorktreeFingerprint(dir);
+
+		assert.ok(before);
+		assert.ok(after);
+		assert.notEqual(after, before);
+	} finally {
+		rmSync(dir, { recursive: true, force: true });
+	}
+});
+
 test("runaway guard is disabled when configured off", () => {
 	clearRunawayGuardState();

--- a/src/resources/extensions/sf/tests/production-mutation-approval.test.ts
+++ b/src/resources/extensions/sf/tests/production-mutation-approval.test.ts
@ -11,6 +11,8 @@ import { join } from "node:path";
 import test from "node:test";

 import {
+	approveProductionMutationWithLlmPolicy,
+	assessLlmProductionMutationApproval,
 	ensureProductionMutationApprovalTemplate,
 	type ProductionMutationApproval,
 	type ProductionMutationUnit,
@ -32,6 +34,8 @@ const unit: ProductionMutationUnit = {
 		"Execute smoke test against production NixOS Hetzner infrastructure.",
 		"POST to /action/unified-failover with test server_id and vm_names.",
 		"Verify command row was created with status=pending.",
+		"Verify gateway routing table is unchanged and no active_dr updates occur.",
+		"NOTE: Do NOT trigger actual failover on real VMs.",
 	].join("\n"),
 };

@ -50,7 +54,9 @@ test("production mutation approval writes a pending template once", () => {
 		assert.equal(parsed.unitId, "M004/S03/T01");
 		assert.equal(parsed.status, "pending");
 		assert.equal(parsed.approval.approved, false);
+		assert.equal(parsed.approval.approverType, "");
 		assert.deepEqual(parsed.approval.safeVmNames, []);
+		assert.deepEqual(parsed.approval.preMutationChecks, []);

 		const second = ensureProductionMutationApprovalTemplate(basePath, unit);
 		assert.equal(second.created, false);
@ -70,9 +76,12 @@ test("production mutation approval rejects incomplete approval files", () => {
 			approval: {
 				approved: true,
 				approvedBy: "",
+				approverType: "",
 				approvedAt: "",
 				safeServerId: "",
 				safeVmNames: [],
+				targetSelectionPlan: "",
+				preMutationChecks: [],
 				cleanupPlan: "",
 				rollbackPlan: "",
 			},
@ -85,7 +94,7 @@ test("production mutation approval rejects incomplete approval files", () => {
 	assert.ok(result.reasons.includes("approval.approvedBy is required"));
 	assert.ok(
 		result.reasons.includes(
-			"approval.safeVmNames must contain at least one VM name",
+			"approval must include safeServerId and safeVmNames, or targetSelectionPlan",
 		),
 	);
 });
@ -106,9 +115,12 @@ test("production mutation approval accepts complete explicit approval", () => {
 			approval: {
 				approved: true,
 				approvedBy: "operator@example.com",
+				approverType: "human",
 				approvedAt: "2026-04-30T10:00:00.000Z",
 				safeServerId: "test-server-id",
 				safeVmNames: ["pilot-smoke-vm"],
+				targetSelectionPlan: "",
+				preMutationChecks: ["Confirm target is non-customer-impacting."],
 				cleanupPlan: "Delete the test pending command after verification.",
 				rollbackPlan: "Abort the test failover and restore the previous route.",
 				notes: "Smoke target is non-customer-impacting.",
@ -125,3 +137,57 @@ test("production mutation approval accepts complete explicit approval", () => {
 		rmSync(basePath, { recursive: true, force: true });
 	}
 });
+
+test("production mutation approval auto-fills LLM approval for pending-command-only smoke tests", () => {
+	const basePath = tempBase();
+	try {
+		const approvedAt = new Date("2026-04-30T10:05:00.000Z");
+		const result = approveProductionMutationWithLlmPolicy(
+			basePath,
+			unit,
+			approvedAt,
+		);
+		assert.equal(result.approved, true);
+		assert.equal(result.wrote, true);
+		assert.deepEqual(result.reasons, []);
+
+		const status = readProductionMutationApprovalStatus(basePath, unit);
+		assert.equal(status.approved, true);
+
+		const parsed = JSON.parse(
+			readFileSync(result.path, "utf-8"),
+		) as ProductionMutationApproval;
+		assert.equal(parsed.status, "approved");
+		assert.equal(parsed.approval.approverType, "llm");
+		assert.equal(
+			parsed.approval.approvedBy,
+			"sf-llm:auto-production-mutation-policy",
+		);
+		assert.equal(parsed.approval.approvedAt, "2026-04-30T10:05:00.000Z");
+		assert.ok(parsed.approval.targetSelectionPlan.length > 0);
+		assert.ok(parsed.approval.preMutationChecks.length >= 3);
+	} finally {
+		rmSync(basePath, { recursive: true, force: true });
+	}
+});
+
+test("production mutation approval does not auto-approve real failover tasks", () => {
+	const realFailoverUnit: ProductionMutationUnit = {
+		...unit,
+		taskText:
+			"POST to /action/unified-failover against production and verify the VM fails over.",
+	};
+
+	const assessment = assessLlmProductionMutationApproval(realFailoverUnit);
+	assert.equal(assessment.approved, false);
+	assert.ok(
+		assessment.reasons.includes(
+			"task is not constrained to pending command verification",
+		),
+	);
+	assert.ok(
+		assessment.reasons.includes(
+			"task does not explicitly forbid actual failover",
+		),
+	);
+});