fix: Harden auto recovery and production guards

This commit is contained in:
Mikael Hugo 2026-04-30 11:35:16 +02:00
parent e60882efc7
commit 08ea92b072
10 changed files with 203 additions and 26 deletions

View file

@ -1648,21 +1648,9 @@ export async function startAuto(
// the provider JSONL must remain available for synthesizeCrashRecovery().
const resumeSessionFile = s.pausedSessionFile;
// Lock acquired — now safe to delete the pause file
if (resumeSessionFile) {
try {
unlinkSync(resumeSessionFile);
} catch (err) {
if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
logWarning(
"session",
`pause file cleanup failed: ${err instanceof Error ? err.message : String(err)}`,
{ file: "auto.ts" },
);
}
}
s.pausedSessionFile = null;
}
// Clear mutable resume metadata without deleting the provider session JSONL:
// synthesizeCrashRecovery() still needs that trace to avoid restarting blind.
s.pausedSessionFile = null;
s.paused = false;
s.active = true;

View file

@ -52,7 +52,7 @@ import {
rollbackToCheckpoint,
} from "../safety/git-checkpoint.js";
import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
import { getMilestoneSlices, isDbAvailable } from "../sf-db.js";
import { getMilestoneSlices, getTask, isDbAvailable } from "../sf-db.js";
import { getEligibleSlices } from "../slice-parallel-eligibility.js";
import { startSliceParallel } from "../slice-parallel-orchestrator.js";
import type { Phase } from "../types.js";
@ -131,6 +131,24 @@ function clearDeferredCommitAfterCancelledUnit(
);
}
export function requiresHumanProductionMutationApproval(text: string): boolean {
const normalized = text.toLowerCase();
const mentionsProduction =
/\b(production|prod|live|hetzner)\b/.test(normalized) ||
normalized.includes("centralcloud.com");
if (!mentionsProduction) return false;
const mentionsUnifiedFailover =
normalized.includes("unified_failover") ||
normalized.includes("unified-failover") ||
normalized.includes("/action/unified");
if (!mentionsUnifiedFailover) return false;
return /\b(post|enqueue|create|insert|command row|pending command)\b/.test(
normalized,
);
}
/**
* Resolve the authoritative project base for dispatch guards.
* Prior-milestone completion lives at the project root, even when the active
@ -1320,6 +1338,49 @@ export async function runGuards(
return { action: "break", reason: "stop-guard-error" };
}
// Production mutation guard — headless auto must not enqueue live failover
// commands without a human-provided safe target and cleanup plan.
try {
if (isDbAvailable()) {
const state = await deps.deriveState(s.basePath);
const activeTask = state.activeTask;
const activeSlice = state.activeSlice;
const activeMilestone = state.activeMilestone;
if (activeMilestone?.id && activeSlice?.id && activeTask?.id) {
const task = getTask(activeMilestone.id, activeSlice.id, activeTask.id);
if (task) {
const taskText = [
task.title,
task.description,
task.verify,
...task.inputs,
...task.expected_output,
].join("\n");
if (requiresHumanProductionMutationApproval(taskText)) {
const msg =
`Production mutation guard: ${activeMilestone.id}/${activeSlice.id}/${activeTask.id} asks to POST unified failover against production. ` +
"Pause for an explicit safe server/VM target, cleanup/rollback path, and operator approval.";
ctx.ui.notify(msg, "error");
deps.sendDesktopNotification(
"SF",
"Production mutation guard paused auto-mode",
"warning",
"safety",
basename(s.originalBasePath || s.basePath),
);
await deps.pauseAuto(ctx, pi);
return { action: "break", reason: "production-mutation-guard" };
}
}
}
}
} catch (e) {
debugLog("guards", {
phase: "production-mutation-guard-error",
error: String(e),
});
}
// Budget ceiling guard
const budgetCeiling = prefs?.budget_ceiling;
if (budgetCeiling !== undefined && budgetCeiling > 0) {
@ -1589,12 +1650,23 @@ export async function runUnitPhase(
}
// Detect retry and capture previous tier for escalation
const isPausedUnitResume =
s.pausedUnitType === unitType && s.pausedUnitId === unitId;
const isRetry = !!(
s.currentUnit &&
s.currentUnit.type === unitType &&
s.currentUnit.id === unitId
(s.currentUnit &&
s.currentUnit.type === unitType &&
s.currentUnit.id === unitId) ||
isPausedUnitResume
);
const previousTier = s.currentUnitRouting?.tier;
const previousTier =
s.currentUnitRouting?.tier ??
(isPausedUnitResume && unitType === "execute-task"
? "standard"
: undefined);
if (isPausedUnitResume) {
s.pausedUnitType = null;
s.pausedUnitId = null;
}
// Scope workflow-logger buffer to this unit so post-finalize drains are
// per-unit. Without this, the module-level _buffer accumulates across every

View file

@ -167,8 +167,11 @@ export function buildTaskCommitMessage(ctx: TaskCommitContext): string {
// Build body with key files if available
const bodyParts: string[] = [];
if (ctx.keyFiles && ctx.keyFiles.length > 0) {
const fileLines = ctx.keyFiles
const keyFiles = ctx.keyFiles?.filter(
(file) => normalizeExplicitStagePath(file) !== null,
);
if (keyFiles && keyFiles.length > 0) {
const fileLines = keyFiles
.slice(0, 8) // cap at 8 files to keep commit concise
.map((f) => `- ${f}`)
.join("\n");
@ -269,9 +272,14 @@ function normalizeExplicitStagePath(path: string): string | null {
)
.replace(/\\/g, "/")
.replace(/^\.\//, "");
const lower = normalized.toLowerCase();
if (
!normalized ||
normalized === "." ||
lower === "(none)" ||
lower === "none." ||
lower === "n/a" ||
lower === "-" ||
normalized.includes("\0") ||
isAbsolute(normalized) ||
/^[A-Za-z]:\//.test(normalized) ||

View file

@ -544,14 +544,16 @@ function renderSlicePlanMarkdown(
const filesLikelyTouched = Array.from(
new Set(tasks.flatMap((task) => task.files)),
);
if (filesLikelyTouched.length > 0) {
lines.push("## Files Likely Touched");
lines.push("");
lines.push("## Files Likely Touched");
lines.push("");
if (filesLikelyTouched.length === 0) {
lines.push("- (none)");
} else {
for (const file of filesLikelyTouched) {
lines.push(`- ${file}`);
}
lines.push("");
}
lines.push("");
return `${lines.join("\n").trimEnd()}\n`;
}

View file

@ -324,6 +324,13 @@ function shouldValidateInputAsPath(raw: string): boolean {
const candidate = extractPathFromAnnotation(trimmed);
if (!candidate) return false;
if (
/\s/.test(candidate) &&
/^[A-Za-z][A-Za-z0-9 _-]{0,48}:\s+/.test(candidate)
) {
return false;
}
if (!/\s/.test(candidate)) {
return true;
}

View file

@ -402,6 +402,36 @@ test("auto/phases.ts: selectAndApplyModel called exactly once and before updateP
);
});
test("auto/phases.ts: paused resumed units count as retries for tier escalation", () => {
const src = readFileSync(
resolve(import.meta.dirname, "..", "auto", "phases.ts"),
"utf-8",
);
const fnStart = src.indexOf("export async function runUnitPhase");
assert.ok(fnStart > 0, "runUnitPhase should exist in phases.ts");
const fnBody = src.slice(fnStart, fnStart + 12000);
assert.ok(
fnBody.includes("const isPausedUnitResume"),
"runUnitPhase should detect when a persisted paused unit is being resumed",
);
assert.ok(
/s\.pausedUnitType\s*===\s*unitType/.test(fnBody) &&
/s\.pausedUnitId\s*===\s*unitId/.test(fnBody),
"paused resume detection must compare the paused unit type and id with the dispatched unit",
);
assert.ok(
/isRetry[\s\S]*isPausedUnitResume/.test(fnBody),
"paused resumed units must be included in retry detection",
);
assert.ok(
/unitType\s*===\s*"execute-task"[\s\S]*\?\s*"standard"[\s\S]*:\s*undefined/.test(
fnBody,
),
"paused execute-task resumes should supply a standard previous tier so retry escalation can reach heavy after restart",
);
});
// ─── autoLoop tests (T02) ─────────────────────────────────────────────────
/**

View file

@ -61,6 +61,10 @@ test("auto.ts validates milestone before restoring paused session (#1664)", () =
source.includes("resumeSessionFile ?? undefined"),
"auto.ts must pass the captured paused session file into synthesizeCrashRecovery",
);
assert.ok(
!source.includes("unlinkSync(resumeSessionFile)"),
"auto.ts must not delete the provider session JSONL before recovery can read it",
);
// Pause path must sanitize live session file path before persisting metadata.
assert.ok(

View file

@ -1779,6 +1779,30 @@ describe("git-service", async () => {
rmSync(externalGsd, { recursive: true, force: true });
});
test("GitServiceImpl: stageOnly ignores summary none placeholders", () => {
const repo = initTempRepo();
createFile(repo, "portal/failover-handlers_test.go", "package main\n");
run("git add -A", repo);
run('git commit -m "add portal test"', repo);
writeFileSync(
join(repo, "portal/failover-handlers_test.go"),
"package main\n// gate tests\n",
);
const svc = new GitServiceImpl(repo);
assert.equal(
svc.stageOnly([], ["(none)"]),
true,
"tracked implementation edits should still stage when task summary key_files contains the placeholder",
);
const staged = run("git diff --cached --name-only", repo);
assert.equal(staged, "portal/failover-handlers_test.go");
rmSync(repo, { recursive: true, force: true });
});
// ─── nativeAddAllWithExclusions: non-symlinked .sf still works ───────
test("nativeAddAllWithExclusions: non-symlinked .sf still works", () => {

View file

@ -141,6 +141,19 @@ import type { Request } from 'express';
describe("checkFilePathConsistency", () => {
let tempDir: string;
test("ignores labeled operational inputs that are not filesystem paths", () => {
const task = createTask({
inputs: [
"Portal endpoint: POST /action/unified-failover on da.centralcloud.com",
"DB: commands table on db.centralcloud.com",
"Gateway: dr.centralcloud.com routing table",
],
});
const results = checkFilePathConsistency([task], tmpdir());
assert.deepEqual(results, []);
});
test("passes when files exist on disk", () => {
tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`);
mkdirSync(tempDir, { recursive: true });

View file

@ -0,0 +1,29 @@
import assert from "node:assert/strict";
import test from "node:test";
import { requiresHumanProductionMutationApproval } from "../auto/phases.ts";
test("production mutation guard flags live unified failover POST tasks", () => {
assert.equal(
requiresHumanProductionMutationApproval(
[
"Execute smoke test against production NixOS Hetzner infrastructure.",
"POST to /action/unified-failover with test server_id and vm_names.",
"Verify command row was created with status=pending.",
].join("\n"),
),
true,
);
});
test("production mutation guard allows read-only production checks", () => {
assert.equal(
requiresHumanProductionMutationApproval(
[
"Run read-only smoke checks against production.",
"GET /health and query current gateway routing table.",
].join("\n"),
),
false,
);
});