From 08ea92b072cd73686e4476ead664592e8a70ac92 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Thu, 30 Apr 2026 11:35:16 +0200 Subject: [PATCH] fix: Harden auto recovery and production guards --- src/resources/extensions/sf/auto.ts | 18 +--- src/resources/extensions/sf/auto/phases.ts | 82 +++++++++++++++++-- src/resources/extensions/sf/git-service.ts | 12 ++- .../extensions/sf/markdown-renderer.ts | 10 ++- .../extensions/sf/pre-execution-checks.ts | 7 ++ .../extensions/sf/tests/auto-loop.test.ts | 30 +++++++ .../auto-paused-session-validation.test.ts | 4 + .../sf/tests/integration/git-service.test.ts | 24 ++++++ .../sf/tests/pre-execution-checks.test.ts | 13 +++ .../tests/production-mutation-guard.test.ts | 29 +++++++ 10 files changed, 203 insertions(+), 26 deletions(-) create mode 100644 src/resources/extensions/sf/tests/production-mutation-guard.test.ts diff --git a/src/resources/extensions/sf/auto.ts b/src/resources/extensions/sf/auto.ts index 6ead72628..480119de7 100644 --- a/src/resources/extensions/sf/auto.ts +++ b/src/resources/extensions/sf/auto.ts @@ -1648,21 +1648,9 @@ export async function startAuto( // the provider JSONL must remain available for synthesizeCrashRecovery(). const resumeSessionFile = s.pausedSessionFile; - // Lock acquired — now safe to delete the pause file - if (resumeSessionFile) { - try { - unlinkSync(resumeSessionFile); - } catch (err) { - if ((err as NodeJS.ErrnoException).code !== "ENOENT") { - logWarning( - "session", - `pause file cleanup failed: ${err instanceof Error ? err.message : String(err)}`, - { file: "auto.ts" }, - ); - } - } - s.pausedSessionFile = null; - } + // Clear mutable resume metadata without deleting the provider session JSONL: + // synthesizeCrashRecovery() still needs that trace to avoid restarting blind. + s.pausedSessionFile = null; s.paused = false; s.active = true; diff --git a/src/resources/extensions/sf/auto/phases.ts b/src/resources/extensions/sf/auto/phases.ts index 10aa01d91..41398ebf7 100644 --- a/src/resources/extensions/sf/auto/phases.ts +++ b/src/resources/extensions/sf/auto/phases.ts @@ -52,7 +52,7 @@ import { rollbackToCheckpoint, } from "../safety/git-checkpoint.js"; import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js"; -import { getMilestoneSlices, isDbAvailable } from "../sf-db.js"; +import { getMilestoneSlices, getTask, isDbAvailable } from "../sf-db.js"; import { getEligibleSlices } from "../slice-parallel-eligibility.js"; import { startSliceParallel } from "../slice-parallel-orchestrator.js"; import type { Phase } from "../types.js"; @@ -131,6 +131,24 @@ function clearDeferredCommitAfterCancelledUnit( ); } +export function requiresHumanProductionMutationApproval(text: string): boolean { + const normalized = text.toLowerCase(); + const mentionsProduction = + /\b(production|prod|live|hetzner)\b/.test(normalized) || + normalized.includes("centralcloud.com"); + if (!mentionsProduction) return false; + + const mentionsUnifiedFailover = + normalized.includes("unified_failover") || + normalized.includes("unified-failover") || + normalized.includes("/action/unified"); + if (!mentionsUnifiedFailover) return false; + + return /\b(post|enqueue|create|insert|command row|pending command)\b/.test( + normalized, + ); +} + /** * Resolve the authoritative project base for dispatch guards. * Prior-milestone completion lives at the project root, even when the active @@ -1320,6 +1338,49 @@ export async function runGuards( return { action: "break", reason: "stop-guard-error" }; } + // Production mutation guard — headless auto must not enqueue live failover + // commands without a human-provided safe target and cleanup plan. + try { + if (isDbAvailable()) { + const state = await deps.deriveState(s.basePath); + const activeTask = state.activeTask; + const activeSlice = state.activeSlice; + const activeMilestone = state.activeMilestone; + if (activeMilestone?.id && activeSlice?.id && activeTask?.id) { + const task = getTask(activeMilestone.id, activeSlice.id, activeTask.id); + if (task) { + const taskText = [ + task.title, + task.description, + task.verify, + ...task.inputs, + ...task.expected_output, + ].join("\n"); + if (requiresHumanProductionMutationApproval(taskText)) { + const msg = + `Production mutation guard: ${activeMilestone.id}/${activeSlice.id}/${activeTask.id} asks to POST unified failover against production. ` + + "Pause for an explicit safe server/VM target, cleanup/rollback path, and operator approval."; + ctx.ui.notify(msg, "error"); + deps.sendDesktopNotification( + "SF", + "Production mutation guard paused auto-mode", + "warning", + "safety", + basename(s.originalBasePath || s.basePath), + ); + await deps.pauseAuto(ctx, pi); + return { action: "break", reason: "production-mutation-guard" }; + } + } + } + } + } catch (e) { + debugLog("guards", { + phase: "production-mutation-guard-error", + error: String(e), + }); + } + // Budget ceiling guard const budgetCeiling = prefs?.budget_ceiling; if (budgetCeiling !== undefined && budgetCeiling > 0) { @@ -1589,12 +1650,23 @@ export async function runUnitPhase( } // Detect retry and capture previous tier for escalation + const isPausedUnitResume = + s.pausedUnitType === unitType && s.pausedUnitId === unitId; const isRetry = !!( - s.currentUnit && - s.currentUnit.type === unitType && - s.currentUnit.id === unitId + (s.currentUnit && + s.currentUnit.type === unitType && + s.currentUnit.id === unitId) || + isPausedUnitResume ); - const previousTier = s.currentUnitRouting?.tier; + const previousTier = + s.currentUnitRouting?.tier ?? + (isPausedUnitResume && unitType === "execute-task" + ? "standard" + : undefined); + if (isPausedUnitResume) { + s.pausedUnitType = null; + s.pausedUnitId = null; + } // Scope workflow-logger buffer to this unit so post-finalize drains are // per-unit. Without this, the module-level _buffer accumulates across every diff --git a/src/resources/extensions/sf/git-service.ts b/src/resources/extensions/sf/git-service.ts index ccf52a4b6..f91ada3eb 100644 --- a/src/resources/extensions/sf/git-service.ts +++ b/src/resources/extensions/sf/git-service.ts @@ -167,8 +167,11 @@ export function buildTaskCommitMessage(ctx: TaskCommitContext): string { // Build body with key files if available const bodyParts: string[] = []; - if (ctx.keyFiles && ctx.keyFiles.length > 0) { - const fileLines = ctx.keyFiles + const keyFiles = ctx.keyFiles?.filter( + (file) => normalizeExplicitStagePath(file) !== null, + ); + if (keyFiles && keyFiles.length > 0) { + const fileLines = keyFiles .slice(0, 8) // cap at 8 files to keep commit concise .map((f) => `- ${f}`) .join("\n"); @@ -269,9 +272,14 @@ function normalizeExplicitStagePath(path: string): string | null { ) .replace(/\\/g, "/") .replace(/^\.\//, ""); + const lower = normalized.toLowerCase(); if ( !normalized || normalized === "." || + lower === "(none)" || + lower === "none." || + lower === "n/a" || + lower === "-" || normalized.includes("\0") || isAbsolute(normalized) || /^[A-Za-z]:\//.test(normalized) || diff --git a/src/resources/extensions/sf/markdown-renderer.ts b/src/resources/extensions/sf/markdown-renderer.ts index ebca7ed18..ce0e5dac0 100644 --- a/src/resources/extensions/sf/markdown-renderer.ts +++ b/src/resources/extensions/sf/markdown-renderer.ts @@ -544,14 +544,16 @@ function renderSlicePlanMarkdown( const filesLikelyTouched = Array.from( new Set(tasks.flatMap((task) => task.files)), ); - if (filesLikelyTouched.length > 0) { - lines.push("## Files Likely Touched"); - lines.push(""); + lines.push("## Files Likely Touched"); + lines.push(""); + if (filesLikelyTouched.length === 0) { + lines.push("- (none)"); + } else { for (const file of filesLikelyTouched) { lines.push(`- ${file}`); } - lines.push(""); } + lines.push(""); return `${lines.join("\n").trimEnd()}\n`; } diff --git a/src/resources/extensions/sf/pre-execution-checks.ts b/src/resources/extensions/sf/pre-execution-checks.ts index 3ad7f57a7..3664ba5d8 100644 --- a/src/resources/extensions/sf/pre-execution-checks.ts +++ b/src/resources/extensions/sf/pre-execution-checks.ts @@ -324,6 +324,13 @@ function shouldValidateInputAsPath(raw: string): boolean { const candidate = extractPathFromAnnotation(trimmed); if (!candidate) return false; + if ( + /\s/.test(candidate) && + /^[A-Za-z][A-Za-z0-9 _-]{0,48}:\s+/.test(candidate) + ) { + return false; + } + if (!/\s/.test(candidate)) { return true; } diff --git a/src/resources/extensions/sf/tests/auto-loop.test.ts b/src/resources/extensions/sf/tests/auto-loop.test.ts index cfea113e3..2a9514c13 100644 --- a/src/resources/extensions/sf/tests/auto-loop.test.ts +++ b/src/resources/extensions/sf/tests/auto-loop.test.ts @@ -402,6 +402,36 @@ test("auto/phases.ts: selectAndApplyModel called exactly once and before updateP ); }); +test("auto/phases.ts: paused resumed units count as retries for tier escalation", () => { + const src = readFileSync( + resolve(import.meta.dirname, "..", "auto", "phases.ts"), + "utf-8", + ); + const fnStart = src.indexOf("export async function runUnitPhase"); + assert.ok(fnStart > 0, "runUnitPhase should exist in phases.ts"); + const fnBody = src.slice(fnStart, fnStart + 12000); + + assert.ok( + fnBody.includes("const isPausedUnitResume"), + "runUnitPhase should detect when a persisted paused unit is being resumed", + ); + assert.ok( + /s\.pausedUnitType\s*===\s*unitType/.test(fnBody) && + /s\.pausedUnitId\s*===\s*unitId/.test(fnBody), + "paused resume detection must compare the paused unit type and id with the dispatched unit", + ); + assert.ok( + /isRetry[\s\S]*isPausedUnitResume/.test(fnBody), + "paused resumed units must be included in retry detection", + ); + assert.ok( + /unitType\s*===\s*"execute-task"[\s\S]*\?\s*"standard"[\s\S]*:\s*undefined/.test( + fnBody, + ), + "paused execute-task resumes should supply a standard previous tier so retry escalation can reach heavy after restart", + ); +}); + // ─── autoLoop tests (T02) ───────────────────────────────────────────────── /** diff --git a/src/resources/extensions/sf/tests/auto-paused-session-validation.test.ts b/src/resources/extensions/sf/tests/auto-paused-session-validation.test.ts index ee4d5340f..72ea74449 100644 --- a/src/resources/extensions/sf/tests/auto-paused-session-validation.test.ts +++ b/src/resources/extensions/sf/tests/auto-paused-session-validation.test.ts @@ -61,6 +61,10 @@ test("auto.ts validates milestone before restoring paused session (#1664)", () = source.includes("resumeSessionFile ?? undefined"), "auto.ts must pass the captured paused session file into synthesizeCrashRecovery", ); + assert.ok( + !source.includes("unlinkSync(resumeSessionFile)"), + "auto.ts must not delete the provider session JSONL before recovery can read it", + ); // Pause path must sanitize live session file path before persisting metadata. assert.ok( diff --git a/src/resources/extensions/sf/tests/integration/git-service.test.ts b/src/resources/extensions/sf/tests/integration/git-service.test.ts index cbcfea98b..70e730efb 100644 --- a/src/resources/extensions/sf/tests/integration/git-service.test.ts +++ b/src/resources/extensions/sf/tests/integration/git-service.test.ts @@ -1779,6 +1779,30 @@ describe("git-service", async () => { rmSync(externalGsd, { recursive: true, force: true }); }); + test("GitServiceImpl: stageOnly ignores summary none placeholders", () => { + const repo = initTempRepo(); + createFile(repo, "portal/failover-handlers_test.go", "package main\n"); + run("git add -A", repo); + run('git commit -m "add portal test"', repo); + + writeFileSync( + join(repo, "portal/failover-handlers_test.go"), + "package main\n// gate tests\n", + ); + + const svc = new GitServiceImpl(repo); + assert.equal( + svc.stageOnly([], ["(none)"]), + true, + "tracked implementation edits should still stage when task summary key_files contains the placeholder", + ); + + const staged = run("git diff --cached --name-only", repo); + assert.equal(staged, "portal/failover-handlers_test.go"); + + rmSync(repo, { recursive: true, force: true }); + }); + // ─── nativeAddAllWithExclusions: non-symlinked .sf still works ─────── test("nativeAddAllWithExclusions: non-symlinked .sf still works", () => { diff --git a/src/resources/extensions/sf/tests/pre-execution-checks.test.ts b/src/resources/extensions/sf/tests/pre-execution-checks.test.ts index bfa3f5227..302be875e 100644 --- a/src/resources/extensions/sf/tests/pre-execution-checks.test.ts +++ b/src/resources/extensions/sf/tests/pre-execution-checks.test.ts @@ -141,6 +141,19 @@ import type { Request } from 'express'; describe("checkFilePathConsistency", () => { let tempDir: string; + test("ignores labeled operational inputs that are not filesystem paths", () => { + const task = createTask({ + inputs: [ + "Portal endpoint: POST /action/unified-failover on da.centralcloud.com", + "DB: commands table on db.centralcloud.com", + "Gateway: dr.centralcloud.com routing table", + ], + }); + + const results = checkFilePathConsistency([task], tmpdir()); + assert.deepEqual(results, []); + }); + test("passes when files exist on disk", () => { tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`); mkdirSync(tempDir, { recursive: true }); diff --git a/src/resources/extensions/sf/tests/production-mutation-guard.test.ts b/src/resources/extensions/sf/tests/production-mutation-guard.test.ts new file mode 100644 index 000000000..e27f8541b --- /dev/null +++ b/src/resources/extensions/sf/tests/production-mutation-guard.test.ts @@ -0,0 +1,29 @@ +import assert from "node:assert/strict"; +import test from "node:test"; + +import { requiresHumanProductionMutationApproval } from "../auto/phases.ts"; + +test("production mutation guard flags live unified failover POST tasks", () => { + assert.equal( + requiresHumanProductionMutationApproval( + [ + "Execute smoke test against production NixOS Hetzner infrastructure.", + "POST to /action/unified-failover with test server_id and vm_names.", + "Verify command row was created with status=pending.", + ].join("\n"), + ), + true, + ); +}); + +test("production mutation guard allows read-only production checks", () => { + assert.equal( + requiresHumanProductionMutationApproval( + [ + "Run read-only smoke checks against production.", + "GET /health and query current gateway routing table.", + ].join("\n"), + ), + false, + ); +});