fix: Harden auto recovery and production guards
This commit is contained in:
parent
e60882efc7
commit
08ea92b072
10 changed files with 203 additions and 26 deletions
|
|
@ -1648,21 +1648,9 @@ export async function startAuto(
|
|||
// the provider JSONL must remain available for synthesizeCrashRecovery().
|
||||
const resumeSessionFile = s.pausedSessionFile;
|
||||
|
||||
// Lock acquired — now safe to delete the pause file
|
||||
if (resumeSessionFile) {
|
||||
try {
|
||||
unlinkSync(resumeSessionFile);
|
||||
} catch (err) {
|
||||
if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
|
||||
logWarning(
|
||||
"session",
|
||||
`pause file cleanup failed: ${err instanceof Error ? err.message : String(err)}`,
|
||||
{ file: "auto.ts" },
|
||||
);
|
||||
}
|
||||
}
|
||||
s.pausedSessionFile = null;
|
||||
}
|
||||
// Clear mutable resume metadata without deleting the provider session JSONL:
|
||||
// synthesizeCrashRecovery() still needs that trace to avoid restarting blind.
|
||||
s.pausedSessionFile = null;
|
||||
|
||||
s.paused = false;
|
||||
s.active = true;
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ import {
|
|||
rollbackToCheckpoint,
|
||||
} from "../safety/git-checkpoint.js";
|
||||
import { resolveSafetyHarnessConfig } from "../safety/safety-harness.js";
|
||||
import { getMilestoneSlices, isDbAvailable } from "../sf-db.js";
|
||||
import { getMilestoneSlices, getTask, isDbAvailable } from "../sf-db.js";
|
||||
import { getEligibleSlices } from "../slice-parallel-eligibility.js";
|
||||
import { startSliceParallel } from "../slice-parallel-orchestrator.js";
|
||||
import type { Phase } from "../types.js";
|
||||
|
|
@ -131,6 +131,24 @@ function clearDeferredCommitAfterCancelledUnit(
|
|||
);
|
||||
}
|
||||
|
||||
export function requiresHumanProductionMutationApproval(text: string): boolean {
|
||||
const normalized = text.toLowerCase();
|
||||
const mentionsProduction =
|
||||
/\b(production|prod|live|hetzner)\b/.test(normalized) ||
|
||||
normalized.includes("centralcloud.com");
|
||||
if (!mentionsProduction) return false;
|
||||
|
||||
const mentionsUnifiedFailover =
|
||||
normalized.includes("unified_failover") ||
|
||||
normalized.includes("unified-failover") ||
|
||||
normalized.includes("/action/unified");
|
||||
if (!mentionsUnifiedFailover) return false;
|
||||
|
||||
return /\b(post|enqueue|create|insert|command row|pending command)\b/.test(
|
||||
normalized,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the authoritative project base for dispatch guards.
|
||||
* Prior-milestone completion lives at the project root, even when the active
|
||||
|
|
@ -1320,6 +1338,49 @@ export async function runGuards(
|
|||
return { action: "break", reason: "stop-guard-error" };
|
||||
}
|
||||
|
||||
// Production mutation guard — headless auto must not enqueue live failover
|
||||
// commands without a human-provided safe target and cleanup plan.
|
||||
try {
|
||||
if (isDbAvailable()) {
|
||||
const state = await deps.deriveState(s.basePath);
|
||||
const activeTask = state.activeTask;
|
||||
const activeSlice = state.activeSlice;
|
||||
const activeMilestone = state.activeMilestone;
|
||||
if (activeMilestone?.id && activeSlice?.id && activeTask?.id) {
|
||||
const task = getTask(activeMilestone.id, activeSlice.id, activeTask.id);
|
||||
if (task) {
|
||||
const taskText = [
|
||||
task.title,
|
||||
task.description,
|
||||
task.verify,
|
||||
...task.inputs,
|
||||
...task.expected_output,
|
||||
].join("\n");
|
||||
if (requiresHumanProductionMutationApproval(taskText)) {
|
||||
const msg =
|
||||
`Production mutation guard: ${activeMilestone.id}/${activeSlice.id}/${activeTask.id} asks to POST unified failover against production. ` +
|
||||
"Pause for an explicit safe server/VM target, cleanup/rollback path, and operator approval.";
|
||||
ctx.ui.notify(msg, "error");
|
||||
deps.sendDesktopNotification(
|
||||
"SF",
|
||||
"Production mutation guard paused auto-mode",
|
||||
"warning",
|
||||
"safety",
|
||||
basename(s.originalBasePath || s.basePath),
|
||||
);
|
||||
await deps.pauseAuto(ctx, pi);
|
||||
return { action: "break", reason: "production-mutation-guard" };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
debugLog("guards", {
|
||||
phase: "production-mutation-guard-error",
|
||||
error: String(e),
|
||||
});
|
||||
}
|
||||
|
||||
// Budget ceiling guard
|
||||
const budgetCeiling = prefs?.budget_ceiling;
|
||||
if (budgetCeiling !== undefined && budgetCeiling > 0) {
|
||||
|
|
@ -1589,12 +1650,23 @@ export async function runUnitPhase(
|
|||
}
|
||||
|
||||
// Detect retry and capture previous tier for escalation
|
||||
const isPausedUnitResume =
|
||||
s.pausedUnitType === unitType && s.pausedUnitId === unitId;
|
||||
const isRetry = !!(
|
||||
s.currentUnit &&
|
||||
s.currentUnit.type === unitType &&
|
||||
s.currentUnit.id === unitId
|
||||
(s.currentUnit &&
|
||||
s.currentUnit.type === unitType &&
|
||||
s.currentUnit.id === unitId) ||
|
||||
isPausedUnitResume
|
||||
);
|
||||
const previousTier = s.currentUnitRouting?.tier;
|
||||
const previousTier =
|
||||
s.currentUnitRouting?.tier ??
|
||||
(isPausedUnitResume && unitType === "execute-task"
|
||||
? "standard"
|
||||
: undefined);
|
||||
if (isPausedUnitResume) {
|
||||
s.pausedUnitType = null;
|
||||
s.pausedUnitId = null;
|
||||
}
|
||||
|
||||
// Scope workflow-logger buffer to this unit so post-finalize drains are
|
||||
// per-unit. Without this, the module-level _buffer accumulates across every
|
||||
|
|
|
|||
|
|
@ -167,8 +167,11 @@ export function buildTaskCommitMessage(ctx: TaskCommitContext): string {
|
|||
// Build body with key files if available
|
||||
const bodyParts: string[] = [];
|
||||
|
||||
if (ctx.keyFiles && ctx.keyFiles.length > 0) {
|
||||
const fileLines = ctx.keyFiles
|
||||
const keyFiles = ctx.keyFiles?.filter(
|
||||
(file) => normalizeExplicitStagePath(file) !== null,
|
||||
);
|
||||
if (keyFiles && keyFiles.length > 0) {
|
||||
const fileLines = keyFiles
|
||||
.slice(0, 8) // cap at 8 files to keep commit concise
|
||||
.map((f) => `- ${f}`)
|
||||
.join("\n");
|
||||
|
|
@ -269,9 +272,14 @@ function normalizeExplicitStagePath(path: string): string | null {
|
|||
)
|
||||
.replace(/\\/g, "/")
|
||||
.replace(/^\.\//, "");
|
||||
const lower = normalized.toLowerCase();
|
||||
if (
|
||||
!normalized ||
|
||||
normalized === "." ||
|
||||
lower === "(none)" ||
|
||||
lower === "none." ||
|
||||
lower === "n/a" ||
|
||||
lower === "-" ||
|
||||
normalized.includes("\0") ||
|
||||
isAbsolute(normalized) ||
|
||||
/^[A-Za-z]:\//.test(normalized) ||
|
||||
|
|
|
|||
|
|
@ -544,14 +544,16 @@ function renderSlicePlanMarkdown(
|
|||
const filesLikelyTouched = Array.from(
|
||||
new Set(tasks.flatMap((task) => task.files)),
|
||||
);
|
||||
if (filesLikelyTouched.length > 0) {
|
||||
lines.push("## Files Likely Touched");
|
||||
lines.push("");
|
||||
lines.push("## Files Likely Touched");
|
||||
lines.push("");
|
||||
if (filesLikelyTouched.length === 0) {
|
||||
lines.push("- (none)");
|
||||
} else {
|
||||
for (const file of filesLikelyTouched) {
|
||||
lines.push(`- ${file}`);
|
||||
}
|
||||
lines.push("");
|
||||
}
|
||||
lines.push("");
|
||||
|
||||
return `${lines.join("\n").trimEnd()}\n`;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -324,6 +324,13 @@ function shouldValidateInputAsPath(raw: string): boolean {
|
|||
const candidate = extractPathFromAnnotation(trimmed);
|
||||
if (!candidate) return false;
|
||||
|
||||
if (
|
||||
/\s/.test(candidate) &&
|
||||
/^[A-Za-z][A-Za-z0-9 _-]{0,48}:\s+/.test(candidate)
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!/\s/.test(candidate)) {
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -402,6 +402,36 @@ test("auto/phases.ts: selectAndApplyModel called exactly once and before updateP
|
|||
);
|
||||
});
|
||||
|
||||
test("auto/phases.ts: paused resumed units count as retries for tier escalation", () => {
|
||||
const src = readFileSync(
|
||||
resolve(import.meta.dirname, "..", "auto", "phases.ts"),
|
||||
"utf-8",
|
||||
);
|
||||
const fnStart = src.indexOf("export async function runUnitPhase");
|
||||
assert.ok(fnStart > 0, "runUnitPhase should exist in phases.ts");
|
||||
const fnBody = src.slice(fnStart, fnStart + 12000);
|
||||
|
||||
assert.ok(
|
||||
fnBody.includes("const isPausedUnitResume"),
|
||||
"runUnitPhase should detect when a persisted paused unit is being resumed",
|
||||
);
|
||||
assert.ok(
|
||||
/s\.pausedUnitType\s*===\s*unitType/.test(fnBody) &&
|
||||
/s\.pausedUnitId\s*===\s*unitId/.test(fnBody),
|
||||
"paused resume detection must compare the paused unit type and id with the dispatched unit",
|
||||
);
|
||||
assert.ok(
|
||||
/isRetry[\s\S]*isPausedUnitResume/.test(fnBody),
|
||||
"paused resumed units must be included in retry detection",
|
||||
);
|
||||
assert.ok(
|
||||
/unitType\s*===\s*"execute-task"[\s\S]*\?\s*"standard"[\s\S]*:\s*undefined/.test(
|
||||
fnBody,
|
||||
),
|
||||
"paused execute-task resumes should supply a standard previous tier so retry escalation can reach heavy after restart",
|
||||
);
|
||||
});
|
||||
|
||||
// ─── autoLoop tests (T02) ─────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -61,6 +61,10 @@ test("auto.ts validates milestone before restoring paused session (#1664)", () =
|
|||
source.includes("resumeSessionFile ?? undefined"),
|
||||
"auto.ts must pass the captured paused session file into synthesizeCrashRecovery",
|
||||
);
|
||||
assert.ok(
|
||||
!source.includes("unlinkSync(resumeSessionFile)"),
|
||||
"auto.ts must not delete the provider session JSONL before recovery can read it",
|
||||
);
|
||||
|
||||
// Pause path must sanitize live session file path before persisting metadata.
|
||||
assert.ok(
|
||||
|
|
|
|||
|
|
@ -1779,6 +1779,30 @@ describe("git-service", async () => {
|
|||
rmSync(externalGsd, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test("GitServiceImpl: stageOnly ignores summary none placeholders", () => {
|
||||
const repo = initTempRepo();
|
||||
createFile(repo, "portal/failover-handlers_test.go", "package main\n");
|
||||
run("git add -A", repo);
|
||||
run('git commit -m "add portal test"', repo);
|
||||
|
||||
writeFileSync(
|
||||
join(repo, "portal/failover-handlers_test.go"),
|
||||
"package main\n// gate tests\n",
|
||||
);
|
||||
|
||||
const svc = new GitServiceImpl(repo);
|
||||
assert.equal(
|
||||
svc.stageOnly([], ["(none)"]),
|
||||
true,
|
||||
"tracked implementation edits should still stage when task summary key_files contains the placeholder",
|
||||
);
|
||||
|
||||
const staged = run("git diff --cached --name-only", repo);
|
||||
assert.equal(staged, "portal/failover-handlers_test.go");
|
||||
|
||||
rmSync(repo, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
// ─── nativeAddAllWithExclusions: non-symlinked .sf still works ───────
|
||||
|
||||
test("nativeAddAllWithExclusions: non-symlinked .sf still works", () => {
|
||||
|
|
|
|||
|
|
@ -141,6 +141,19 @@ import type { Request } from 'express';
|
|||
describe("checkFilePathConsistency", () => {
|
||||
let tempDir: string;
|
||||
|
||||
test("ignores labeled operational inputs that are not filesystem paths", () => {
|
||||
const task = createTask({
|
||||
inputs: [
|
||||
"Portal endpoint: POST /action/unified-failover on da.centralcloud.com",
|
||||
"DB: commands table on db.centralcloud.com",
|
||||
"Gateway: dr.centralcloud.com routing table",
|
||||
],
|
||||
});
|
||||
|
||||
const results = checkFilePathConsistency([task], tmpdir());
|
||||
assert.deepEqual(results, []);
|
||||
});
|
||||
|
||||
test("passes when files exist on disk", () => {
|
||||
tempDir = join(tmpdir(), `pre-exec-test-${Date.now()}`);
|
||||
mkdirSync(tempDir, { recursive: true });
|
||||
|
|
|
|||
|
|
@ -0,0 +1,29 @@
|
|||
import assert from "node:assert/strict";
|
||||
import test from "node:test";
|
||||
|
||||
import { requiresHumanProductionMutationApproval } from "../auto/phases.ts";
|
||||
|
||||
test("production mutation guard flags live unified failover POST tasks", () => {
|
||||
assert.equal(
|
||||
requiresHumanProductionMutationApproval(
|
||||
[
|
||||
"Execute smoke test against production NixOS Hetzner infrastructure.",
|
||||
"POST to /action/unified-failover with test server_id and vm_names.",
|
||||
"Verify command row was created with status=pending.",
|
||||
].join("\n"),
|
||||
),
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
test("production mutation guard allows read-only production checks", () => {
|
||||
assert.equal(
|
||||
requiresHumanProductionMutationApproval(
|
||||
[
|
||||
"Run read-only smoke checks against production.",
|
||||
"GET /health and query current gateway routing table.",
|
||||
].join("\n"),
|
||||
),
|
||||
false,
|
||||
);
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue