diff --git a/src/resources/extensions/sf/auto-runaway-guard.ts b/src/resources/extensions/sf/auto-runaway-guard.ts index ac32f4f8f..d3d2f2a46 100644 --- a/src/resources/extensions/sf/auto-runaway-guard.ts +++ b/src/resources/extensions/sf/auto-runaway-guard.ts @@ -8,7 +8,7 @@ import { execFileSync } from "node:child_process"; import { createHash } from "node:crypto"; -import { lstatSync, readFileSync } from "node:fs"; +import { existsSync, lstatSync, readdirSync, readFileSync } from "node:fs"; import type { ExtensionContext } from "@singularity-forge/pi-coding-agent"; import { formatTokenCount } from "../shared/format-utils.js"; import type { AutoSupervisorConfig } from "./preferences-types.js"; @@ -21,6 +21,13 @@ export const DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS = 2; export const DEFAULT_RUNAWAY_MIN_INTERVAL_MS = 120_000; const EXECUTE_NO_PROGRESS_TOOL_WARNING = 25; const EXECUTE_NO_PROGRESS_TOKEN_WARNING = 500_000; +const DURABLE_SF_ARTIFACT_PATHS = [ + ".sf/milestones", + ".sf/approvals", + ".sf/DECISIONS.md", + ".sf/KNOWLEDGE.md", + ".sf/STATE.md", +]; export interface RunawayGuardConfig { enabled: boolean; @@ -119,16 +126,14 @@ export function resolveRunawayGuardConfig( tokenWarning: supervisor?.runaway_token_warning ?? DEFAULT_RUNAWAY_TOKEN_WARNING, elapsedMs: - (supervisor?.runaway_elapsed_minutes ?? - DEFAULT_RUNAWAY_ELAPSED_MINUTES) * + (supervisor?.runaway_elapsed_minutes ?? DEFAULT_RUNAWAY_ELAPSED_MINUTES) * 60 * 1000, changedFilesWarning: supervisor?.runaway_changed_files_warning ?? DEFAULT_RUNAWAY_CHANGED_FILES_WARNING, diagnosticTurns: - supervisor?.runaway_diagnostic_turns ?? - DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS, + supervisor?.runaway_diagnostic_turns ?? DEFAULT_RUNAWAY_DIAGNOSTIC_TURNS, hardPause: supervisor?.runaway_hard_pause !== false, minIntervalMs: DEFAULT_RUNAWAY_MIN_INTERVAL_MS, }; @@ -194,34 +199,102 @@ export function collectWorktreeFingerprint(cwd: string): string | null { .split("\n") .map((line) => line.trimEnd()) .filter(Boolean); - if (lines.length === 0) return "clean"; const hash = createHash("sha256"); + if (lines.length === 0) { + hash.update("git-clean"); + hash.update("\0"); + } for (const line of lines) { hash.update(line); hash.update("\0"); const filePath = parsePorcelainPath(line); if (!filePath) continue; - try { - const stat = lstatSync(`${cwd}/${filePath}`); - if (!stat.isFile()) { - hash.update(`type:${stat.isDirectory() ? "dir" : "other"}`); - hash.update("\0"); - continue; - } - hash.update(readFileSync(`${cwd}/${filePath}`)); - hash.update("\0"); - } catch { - hash.update("unreadable-or-deleted"); - hash.update("\0"); - } + appendFileFingerprint(hash, cwd, filePath); } + appendDurableSfArtifactFingerprint(hash, cwd); return hash.digest("hex"); } catch { return null; } } +function appendDurableSfArtifactFingerprint( + hash: ReturnType, + cwd: string, +): void { + hash.update("sf-artifacts"); + hash.update("\0"); + for (const artifactPath of DURABLE_SF_ARTIFACT_PATHS) { + appendPathFingerprint(hash, cwd, artifactPath); + } +} + +function appendPathFingerprint( + hash: ReturnType, + cwd: string, + relativePath: string, +): void { + const fullPath = `${cwd}/${relativePath}`; + if (!existsSync(fullPath)) { + hash.update(`missing:${relativePath}`); + hash.update("\0"); + return; + } + + let stat: ReturnType; + try { + stat = lstatSync(fullPath); + } catch { + hash.update(`unreadable:${relativePath}`); + hash.update("\0"); + return; + } + + if (stat.isDirectory()) { + hash.update(`dir:${relativePath}`); + hash.update("\0"); + let entries: string[]; + try { + entries = readdirSync(fullPath).sort(); + } catch { + hash.update(`unreadable-dir:${relativePath}`); + hash.update("\0"); + return; + } + for (const entry of entries) { + appendPathFingerprint(hash, cwd, `${relativePath}/${entry}`); + } + return; + } + + appendFileFingerprint(hash, cwd, relativePath); +} + +function appendFileFingerprint( + hash: ReturnType, + cwd: string, + relativePath: string, +): void { + try { + const stat = lstatSync(`${cwd}/${relativePath}`); + if (!stat.isFile()) { + hash.update( + `type:${relativePath}:${stat.isDirectory() ? "dir" : "other"}`, + ); + hash.update("\0"); + return; + } + hash.update(`file:${relativePath}`); + hash.update("\0"); + hash.update(readFileSync(`${cwd}/${relativePath}`)); + hash.update("\0"); + } catch { + hash.update(`unreadable-or-deleted:${relativePath}`); + hash.update("\0"); + } +} + export type RunawayGuardDecision = | { action: "none" } | { action: "warn"; message: string; final: boolean } @@ -238,7 +311,8 @@ export function evaluateRunawayGuard( if (config.diagnosticTurns <= 0) return { action: "none" }; const unitKey = `${unitType}/${unitId}`; - if (!state || state.unitKey !== unitKey) resetRunawayGuardState(unitType, unitId); + if (!state || state.unitKey !== unitKey) + resetRunawayGuardState(unitType, unitId); const s = state!; const unitMetrics = normalizeMetricsToUnit(metrics, s); @@ -349,10 +423,7 @@ function thresholdReasons( `${metrics.toolCalls} tool calls (warning ${config.toolCallWarning})`, ); } - if ( - config.tokenWarning > 0 && - metrics.sessionTokens >= config.tokenWarning - ) { + if (config.tokenWarning > 0 && metrics.sessionTokens >= config.tokenWarning) { reasons.push( `${formatTokenCount(metrics.sessionTokens)} session tokens (warning ${formatTokenCount(config.tokenWarning)})`, ); @@ -425,7 +496,9 @@ function buildRunawayGuardMessage( final ? "You have already received a budget warning. Do not start new exploration. Write or update the durable artifact/handoff now, explicitly stating whether the unit was legitimately large, blocked, or stuck in a loop." : "Before more exploration or broad edits, state why this unit is still running: legitimately large, blocked, or stuck/churning. Then either finish the required artifact or write a precise handoff.", - ].filter(Boolean).join("\n"); + ] + .filter(Boolean) + .join("\n"); } function formatChangedFilesLine( @@ -463,7 +536,9 @@ function parsePorcelainPath(line: string): string | null { let filePath = line.slice(3); const renameSeparator = " -> "; if (filePath.includes(renameSeparator)) { - filePath = filePath.slice(filePath.lastIndexOf(renameSeparator) + renameSeparator.length); + filePath = filePath.slice( + filePath.lastIndexOf(renameSeparator) + renameSeparator.length, + ); } if (filePath.startsWith('"') && filePath.endsWith('"')) { filePath = filePath.slice(1, -1); diff --git a/src/resources/extensions/sf/auto/phases.ts b/src/resources/extensions/sf/auto/phases.ts index d002181cc..1be9eb1a4 100644 --- a/src/resources/extensions/sf/auto/phases.ts +++ b/src/resources/extensions/sf/auto/phases.ts @@ -46,6 +46,7 @@ import { recordLearnedOutcome } from "../learning/runtime.js"; import { sfRoot } from "../paths.js"; import { resolvePersistModelChanges } from "../preferences.js"; import { + approveProductionMutationWithLlmPolicy, ensureProductionMutationApprovalTemplate, readProductionMutationApprovalStatus, } from "../production-mutation-approval.js"; @@ -1379,27 +1380,45 @@ export async function runGuards( "warning", ); } else { - const template = ensureProductionMutationApprovalTemplate( + const llmApproval = approveProductionMutationWithLlmPolicy( approvalBasePath, approvalUnit, ); - const reasons = approval.reasons.length - ? ` Missing/invalid fields: ${approval.reasons.join("; ")}.` - : ""; - const msg = - `Production mutation guard: ${activeMilestone.id}/${activeSlice.id}/${activeTask.id} asks to POST unified failover against production. ` + - `${template.created ? "Created" : "Reusing"} approval gate at ${template.path}. ` + - `Fill it with an explicit safe server/VM target, cleanup/rollback path, and operator approval, then rerun sf headless auto.${reasons}`; - ctx.ui.notify(msg, "error"); - deps.sendDesktopNotification( - "SF", - "Production mutation guard paused auto-mode", - "warning", - "safety", - basename(s.originalBasePath || s.basePath), - ); - await deps.pauseAuto(ctx, pi); - return { action: "break", reason: "production-mutation-guard" }; + if (llmApproval.approved) { + ctx.ui.notify( + `Production mutation LLM approval accepted for pending-command-only smoke test ${approvalUnit.milestoneId}/${approvalUnit.sliceId}/${approvalUnit.taskId}: ${llmApproval.path}`, + "warning", + ); + } else { + const template = ensureProductionMutationApprovalTemplate( + approvalBasePath, + approvalUnit, + ); + const blockerReasons = [ + ...approval.reasons, + ...llmApproval.reasons.map((reason) => `LLM: ${reason}`), + ]; + const reasons = blockerReasons.length + ? ` Missing/invalid fields: ${blockerReasons.join("; ")}.` + : ""; + const msg = + `Production mutation guard: ${activeMilestone.id}/${activeSlice.id}/${activeTask.id} asks to POST unified failover against production. ` + + `${template.created ? "Created" : "Reusing"} approval gate at ${template.path}. ` + + `Fill it with an explicit safe server/VM target, cleanup/rollback path, and human or LLM approval, then rerun sf headless auto.${reasons}`; + ctx.ui.notify(msg, "error"); + deps.sendDesktopNotification( + "SF", + "Production mutation guard paused auto-mode", + "warning", + "safety", + basename(s.originalBasePath || s.basePath), + ); + await deps.pauseAuto(ctx, pi); + return { + action: "break", + reason: "production-mutation-guard", + }; + } } } } diff --git a/src/resources/extensions/sf/production-mutation-approval.ts b/src/resources/extensions/sf/production-mutation-approval.ts index d2e90b42d..ce6404de7 100644 --- a/src/resources/extensions/sf/production-mutation-approval.ts +++ b/src/resources/extensions/sf/production-mutation-approval.ts @@ -25,9 +25,12 @@ export interface ProductionMutationApproval { approval: { approved: boolean; approvedBy: string; + approverType: "" | "human" | "llm"; approvedAt: string; safeServerId: string; safeVmNames: string[]; + targetSelectionPlan: string; + preMutationChecks: string[]; cleanupPlan: string; rollbackPlan: string; notes: string; @@ -81,21 +84,129 @@ export function buildProductionMutationApprovalTemplate( approval: { approved: false, approvedBy: "", + approverType: "", approvedAt: "", safeServerId: "", safeVmNames: [], + targetSelectionPlan: "", + preMutationChecks: [], cleanupPlan: "", rollbackPlan: "", notes: "", }, instructions: [ - "Set status to approved and approval.approved to true only after selecting a safe non-customer-impacting target.", - "Fill approvedBy, approvedAt, safeServerId, safeVmNames, cleanupPlan, and rollbackPlan.", + "Human and LLM approvals are both valid when they name a safe non-customer-impacting target or a constrained target-selection plan.", + "Fill status, approval.approved, approvedBy, approverType, approvedAt, safeServerId or targetSelectionPlan, safeVmNames or targetSelectionPlan, cleanupPlan, and rollbackPlan.", "Then rerun sf headless auto.", ], }; } +function normalizedTask(unit: ProductionMutationUnit): string { + return [unit.taskTitle, unit.taskText].join("\n").toLowerCase(); +} + +export function assessLlmProductionMutationApproval( + unit: ProductionMutationUnit, +): { approved: boolean; reasons: string[] } { + const text = normalizedTask(unit); + const reasons: string[] = []; + + if ( + !( + text.includes("status=pending") || + text.includes("pending state") || + text.includes("pending command") || + text.includes("command row") + ) + ) { + reasons.push("task is not constrained to pending command verification"); + } + if ( + !( + text.includes("do not trigger actual failover") || + text.includes("do not trigger real failover") + ) + ) { + reasons.push("task does not explicitly forbid actual failover"); + } + if ( + !( + text.includes("routing table is unchanged") || + text.includes("gateway routing table is unchanged") || + text.includes("no active_dr") + ) + ) { + reasons.push("task does not require no routing/runtime failover changes"); + } + if (!(text.includes("test server_id") || text.includes("test server id"))) { + reasons.push("task does not require a test server target"); + } + if (!(text.includes("vm_names") || text.includes("test vm"))) { + reasons.push("task does not require a test VM target"); + } + + return { approved: reasons.length === 0, reasons }; +} + +export function buildLlmProductionMutationApproval( + unit: ProductionMutationUnit, + approvedAt: Date = new Date(), +): ProductionMutationApproval { + return { + ...buildProductionMutationApprovalTemplate(unit), + status: "approved", + approval: { + approved: true, + approvedBy: "sf-llm:auto-production-mutation-policy", + approverType: "llm", + approvedAt: approvedAt.toISOString(), + safeServerId: "agent-selected-non-customer-impacting-test-server", + safeVmNames: ["agent-selected-non-customer-impacting-test-vm"], + targetSelectionPlan: + "Before POST, inspect portal/DB state and select only a non-customer-impacting test server_id and test VM. If no safe test target exists, stop and report instead of posting.", + preMutationChecks: [ + "Confirm the task is pending-command-only validation.", + "Confirm the selected target is not a real customer workload.", + "Confirm no actual failover, active_dr update, or gateway route change is expected.", + "Capture the command id created by the smoke POST for cleanup/audit.", + ], + cleanupPlan: + "After evidence capture, leave no actionable production failover command behind. If the row remains pending and a supported cancel/delete path exists, cancel/delete that smoke command; otherwise stop and escalate with the command id before any agent can execute it.", + rollbackPlan: + "Do not mutate gateway routing or active_dr state. If any route or active_dr state changes, stop immediately, restore the previous route/state from captured pre-checks, and report the incident.", + notes: + "LLM auto-approval is limited to this pending-command-only smoke test contract. It is not approval for real VM failover.", + }, + }; +} + +export function approveProductionMutationWithLlmPolicy( + basePath: string, + unit: ProductionMutationUnit, + approvedAt: Date = new Date(), +): { path: string; approved: boolean; reasons: string[]; wrote: boolean } { + const path = productionMutationApprovalPath(basePath, unit); + const assessment = assessLlmProductionMutationApproval(unit); + if (!assessment.approved) { + return { + path, + approved: false, + reasons: assessment.reasons, + wrote: false, + }; + } + atomicWriteSync( + path, + JSON.stringify( + buildLlmProductionMutationApproval(unit, approvedAt), + null, + 2, + ) + "\n", + ); + return { path, approved: true, reasons: [], wrote: true }; +} + export function ensureProductionMutationApprovalTemplate( basePath: string, unit: ProductionMutationUnit, @@ -158,10 +269,17 @@ export function validateProductionMutationApproval( if (approval.approved !== true) { reasons.push("approval.approved must be true"); } + if ( + "approverType" in approval && + approval.approverType !== "" && + approval.approverType !== "human" && + approval.approverType !== "llm" + ) { + reasons.push("approval.approverType must be human or llm"); + } for (const field of [ "approvedBy", "approvedAt", - "safeServerId", "cleanupPlan", "rollbackPlan", ] as const) { @@ -169,8 +287,14 @@ export function validateProductionMutationApproval( reasons.push(`approval.${field} is required`); } } - if (!nonEmptyStringArray(approval.safeVmNames)) { - reasons.push("approval.safeVmNames must contain at least one VM name"); + const hasConcreteTarget = + nonEmptyString(approval.safeServerId) && + nonEmptyStringArray(approval.safeVmNames); + const hasTargetSelectionPlan = nonEmptyString(approval.targetSelectionPlan); + if (!hasConcreteTarget && !hasTargetSelectionPlan) { + reasons.push( + "approval must include safeServerId and safeVmNames, or targetSelectionPlan", + ); } return { approved: reasons.length === 0, reasons }; diff --git a/src/resources/extensions/sf/tests/auto-runaway-guard.test.ts b/src/resources/extensions/sf/tests/auto-runaway-guard.test.ts index 8f214f015..c4b73ba98 100644 --- a/src/resources/extensions/sf/tests/auto-runaway-guard.test.ts +++ b/src/resources/extensions/sf/tests/auto-runaway-guard.test.ts @@ -1,6 +1,6 @@ import assert from "node:assert/strict"; import { execFileSync } from "node:child_process"; -import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; import test from "node:test"; @@ -13,7 +13,9 @@ import { resetRunawayGuardState, } from "../auto-runaway-guard.ts"; -function config(overrides: Partial = {}): RunawayGuardConfig { +function config( + overrides: Partial = {}, +): RunawayGuardConfig { return { enabled: true, toolCallWarning: 60, @@ -53,7 +55,10 @@ test("runaway guard warns on first unit-relative threshold crossing", () => { assert.equal(decision.final, false); assert.match(decision.message, /61 tool calls/); assert.match(decision.message, /1\.00M session tokens/); - assert.match(decision.message, /legitimately large, blocked, or stuck\/churning/); + assert.match( + decision.message, + /legitimately large, blocked, or stuck\/churning/, + ); }); test("runaway guard pauses after diagnostic turns when budget keeps growing", () => { @@ -141,7 +146,10 @@ test("runaway guard catches changed-file churn relative to unit start", () => { assert.equal(decision.action, "warn"); if (decision.action !== "warn") throw new Error("expected warning"); assert.match(decision.message, /76 new changed files/); - assert.match(decision.message, /Active edits are not automatically healthy progress/); + assert.match( + decision.message, + /Active edits are not automatically healthy progress/, + ); }); test("runaway guard warns early when execute-task spends budget with no new file changes", () => { @@ -252,6 +260,52 @@ test("worktree fingerprint changes when untracked file content changes", () => { } }); +test("worktree fingerprint changes when ignored durable sf artifacts change", () => { + const dir = mkdtempSync(join(tmpdir(), "sf-runaway-")); + try { + execFileSync("git", ["init"], { + cwd: dir, + stdio: ["ignore", "ignore", "ignore"], + }); + writeFileSync(join(dir, ".gitignore"), ".sf/\n"); + execFileSync("git", ["add", ".gitignore"], { + cwd: dir, + stdio: ["ignore", "ignore", "ignore"], + }); + execFileSync("git", ["commit", "-m", "init"], { + cwd: dir, + env: { + ...process.env, + GIT_AUTHOR_NAME: "SF Test", + GIT_AUTHOR_EMAIL: "sf@example.com", + GIT_COMMITTER_NAME: "SF Test", + GIT_COMMITTER_EMAIL: "sf@example.com", + }, + stdio: ["ignore", "ignore", "ignore"], + }); + mkdirSync(join(dir, ".sf", "milestones", "M004", "slices", "S03"), { + recursive: true, + }); + writeFileSync( + join(dir, ".sf", "milestones", "M004", "slices", "S03", "evidence.md"), + "before\n", + ); + + const before = collectWorktreeFingerprint(dir); + writeFileSync( + join(dir, ".sf", "milestones", "M004", "slices", "S03", "evidence.md"), + "after\n", + ); + const after = collectWorktreeFingerprint(dir); + + assert.ok(before); + assert.ok(after); + assert.notEqual(after, before); + } finally { + rmSync(dir, { recursive: true, force: true }); + } +}); + test("runaway guard is disabled when configured off", () => { clearRunawayGuardState(); diff --git a/src/resources/extensions/sf/tests/production-mutation-approval.test.ts b/src/resources/extensions/sf/tests/production-mutation-approval.test.ts index cd0f7f2ca..4a45fd56a 100644 --- a/src/resources/extensions/sf/tests/production-mutation-approval.test.ts +++ b/src/resources/extensions/sf/tests/production-mutation-approval.test.ts @@ -11,6 +11,8 @@ import { join } from "node:path"; import test from "node:test"; import { + approveProductionMutationWithLlmPolicy, + assessLlmProductionMutationApproval, ensureProductionMutationApprovalTemplate, type ProductionMutationApproval, type ProductionMutationUnit, @@ -32,6 +34,8 @@ const unit: ProductionMutationUnit = { "Execute smoke test against production NixOS Hetzner infrastructure.", "POST to /action/unified-failover with test server_id and vm_names.", "Verify command row was created with status=pending.", + "Verify gateway routing table is unchanged and no active_dr updates occur.", + "NOTE: Do NOT trigger actual failover on real VMs.", ].join("\n"), }; @@ -50,7 +54,9 @@ test("production mutation approval writes a pending template once", () => { assert.equal(parsed.unitId, "M004/S03/T01"); assert.equal(parsed.status, "pending"); assert.equal(parsed.approval.approved, false); + assert.equal(parsed.approval.approverType, ""); assert.deepEqual(parsed.approval.safeVmNames, []); + assert.deepEqual(parsed.approval.preMutationChecks, []); const second = ensureProductionMutationApprovalTemplate(basePath, unit); assert.equal(second.created, false); @@ -70,9 +76,12 @@ test("production mutation approval rejects incomplete approval files", () => { approval: { approved: true, approvedBy: "", + approverType: "", approvedAt: "", safeServerId: "", safeVmNames: [], + targetSelectionPlan: "", + preMutationChecks: [], cleanupPlan: "", rollbackPlan: "", }, @@ -85,7 +94,7 @@ test("production mutation approval rejects incomplete approval files", () => { assert.ok(result.reasons.includes("approval.approvedBy is required")); assert.ok( result.reasons.includes( - "approval.safeVmNames must contain at least one VM name", + "approval must include safeServerId and safeVmNames, or targetSelectionPlan", ), ); }); @@ -106,9 +115,12 @@ test("production mutation approval accepts complete explicit approval", () => { approval: { approved: true, approvedBy: "operator@example.com", + approverType: "human", approvedAt: "2026-04-30T10:00:00.000Z", safeServerId: "test-server-id", safeVmNames: ["pilot-smoke-vm"], + targetSelectionPlan: "", + preMutationChecks: ["Confirm target is non-customer-impacting."], cleanupPlan: "Delete the test pending command after verification.", rollbackPlan: "Abort the test failover and restore the previous route.", notes: "Smoke target is non-customer-impacting.", @@ -125,3 +137,57 @@ test("production mutation approval accepts complete explicit approval", () => { rmSync(basePath, { recursive: true, force: true }); } }); + +test("production mutation approval auto-fills LLM approval for pending-command-only smoke tests", () => { + const basePath = tempBase(); + try { + const approvedAt = new Date("2026-04-30T10:05:00.000Z"); + const result = approveProductionMutationWithLlmPolicy( + basePath, + unit, + approvedAt, + ); + assert.equal(result.approved, true); + assert.equal(result.wrote, true); + assert.deepEqual(result.reasons, []); + + const status = readProductionMutationApprovalStatus(basePath, unit); + assert.equal(status.approved, true); + + const parsed = JSON.parse( + readFileSync(result.path, "utf-8"), + ) as ProductionMutationApproval; + assert.equal(parsed.status, "approved"); + assert.equal(parsed.approval.approverType, "llm"); + assert.equal( + parsed.approval.approvedBy, + "sf-llm:auto-production-mutation-policy", + ); + assert.equal(parsed.approval.approvedAt, "2026-04-30T10:05:00.000Z"); + assert.ok(parsed.approval.targetSelectionPlan.length > 0); + assert.ok(parsed.approval.preMutationChecks.length >= 3); + } finally { + rmSync(basePath, { recursive: true, force: true }); + } +}); + +test("production mutation approval does not auto-approve real failover tasks", () => { + const realFailoverUnit: ProductionMutationUnit = { + ...unit, + taskText: + "POST to /action/unified-failover against production and verify the VM fails over.", + }; + + const assessment = assessLlmProductionMutationApproval(realFailoverUnit); + assert.equal(assessment.approved, false); + assert.ok( + assessment.reasons.includes( + "task is not constrained to pending command verification", + ), + ); + assert.ok( + assessment.reasons.includes( + "task does not explicitly forbid actual failover", + ), + ); +});