Merge auto-hardening: 10 structural fixes for reliable multi-day auto operation
Merges the auto-hardening branch which implements all audit-identified structural holes in the SF auto-mode loop, memory, verification, health, and parallel systems. See individual commits for detailed change descriptions. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
commit
9724cb437a
16 changed files with 319 additions and 19 deletions
|
|
@ -12,8 +12,8 @@
|
|||
import type { SFState } from "./types.js";
|
||||
import type { SFPreferences } from "./preferences.js";
|
||||
import type { UatType } from "./files.js";
|
||||
import { loadFile, extractUatType, loadActiveOverrides } from "./files.js";
|
||||
import { isDbAvailable, getMilestoneSlices, getPendingGates, markAllGatesOmitted, getMilestone } from "./sf-db.js";
|
||||
import { loadFile, extractUatType, loadActiveOverrides, parseDeferredRequirements } from "./files.js";
|
||||
import { isDbAvailable, getMilestoneSlices, getPendingGates, markAllGatesOmitted, getMilestone, getSliceTasks } from "./sf-db.js";
|
||||
import { extractVerdict, isAcceptableUatVerdict } from "./verdict-parser.js";
|
||||
|
||||
import {
|
||||
|
|
@ -684,6 +684,30 @@ export const DISPATCH_RULES: DispatchRule[] = [
|
|||
return null;
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "executing → prior-task verification all-fail guard",
|
||||
match: async ({ state, mid }) => {
|
||||
if (state.phase !== "executing" || !state.activeTask) return null;
|
||||
if (!state.activeSlice) return null;
|
||||
if (!isDbAvailable()) return null;
|
||||
const sid = state.activeSlice.id;
|
||||
const tid = state.activeTask.id;
|
||||
const sliceTasks = getSliceTasks(mid, sid);
|
||||
const sortedTasks = sliceTasks.sort((a, b) => (a.sequence ?? 0) - (b.sequence ?? 0) || a.id.localeCompare(b.id));
|
||||
const currentIdx = sortedTasks.findIndex((t) => t.id === tid);
|
||||
if (currentIdx > 0) {
|
||||
const priorTask = sortedTasks[currentIdx - 1];
|
||||
if (priorTask?.verification_status === "all_fail") {
|
||||
return {
|
||||
action: "stop",
|
||||
reason: `Task ${priorTask.id} in slice ${sid} had all verification checks fail — stopping before dispatching ${tid}. Fix verification in the prior task or re-run it.`,
|
||||
level: "error",
|
||||
};
|
||||
}
|
||||
}
|
||||
return null;
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "executing → execute-task",
|
||||
match: async ({ state, mid, basePath }) => {
|
||||
|
|
@ -843,6 +867,18 @@ export const DISPATCH_RULES: DispatchRule[] = [
|
|||
logWarning("dispatch", `verification class check failed: ${err instanceof Error ? err.message : String(err)}`);
|
||||
}
|
||||
|
||||
// P5-A: Advisory check for deferred requirements targeting this milestone
|
||||
try {
|
||||
const deferred = parseDeferredRequirements(basePath);
|
||||
const unaddressed = deferred.filter((r) => r.deferredTo === mid);
|
||||
if (unaddressed.length > 0) {
|
||||
const ids = unaddressed.map((r) => r.id).join(", ");
|
||||
logWarning("dispatch", `Milestone ${mid} has ${unaddressed.length} deferred requirement(s) (${ids}) that were not validated. Review before completing.`);
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal advisory
|
||||
}
|
||||
|
||||
return {
|
||||
action: "dispatch",
|
||||
unitType: "complete-milestone",
|
||||
|
|
|
|||
|
|
@ -36,6 +36,18 @@ import { readPhaseAnchor, formatAnchorForPrompt } from "./phase-anchor.js";
|
|||
import { logWarning } from "./workflow-logger.js";
|
||||
import { inlineGraphSubgraph } from "./graph-context.js";
|
||||
|
||||
// ─── Memory Injection ─────────────────────────────────────────────────────────
|
||||
|
||||
async function buildMemoriesBlock(limit = 5): Promise<string> {
|
||||
try {
|
||||
const { getActiveMemoriesRanked, formatMemoriesForPrompt } = await import("./memory-store.js");
|
||||
const memories = getActiveMemoriesRanked(limit);
|
||||
return formatMemoriesForPrompt(memories);
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Preamble Cap ─────────────────────────────────────────────────────────────
|
||||
|
||||
const MAX_PREAMBLE_CHARS = 30_000;
|
||||
|
|
@ -1095,6 +1107,8 @@ export async function buildPlanMilestonePrompt(mid: string, midTitle: string, ba
|
|||
}
|
||||
const knowledgeInlinePM = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge");
|
||||
if (knowledgeInlinePM) inlined.push(knowledgeInlinePM);
|
||||
const memoriesBlockPM = await buildMemoriesBlock(5);
|
||||
if (memoriesBlockPM) inlined.push(memoriesBlockPM);
|
||||
inlined.push(inlineTemplate("roadmap", "Roadmap"));
|
||||
if (inlineLevel === "full") {
|
||||
inlined.push(inlineTemplate("decisions", "Decisions"));
|
||||
|
|
@ -1413,6 +1427,7 @@ export async function buildExecuteTaskPrompt(
|
|||
: "";
|
||||
|
||||
const phaseAnchorSection = planAnchor ? formatAnchorForPrompt(planAnchor) : "";
|
||||
const memoriesSection = await buildMemoriesBlock(3);
|
||||
|
||||
// Task-scoped gates owned by execute-task (Q5/Q6/Q7). Pull only the
|
||||
// gates that plan-slice actually seeded for this task — tasks with no
|
||||
|
|
@ -1428,6 +1443,7 @@ export async function buildExecuteTaskPrompt(
|
|||
return loadPrompt("execute-task", {
|
||||
overridesSection,
|
||||
runtimeContext,
|
||||
memoriesSection,
|
||||
phaseAnchorSection,
|
||||
workingDirectory: base,
|
||||
milestoneId: mid, sliceId: sid, sliceTitle: sTitle, taskId: tid, taskTitle: tTitle,
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ export interface LoopDeps {
|
|||
// Pre-dispatch health gate
|
||||
preDispatchHealthGate: (
|
||||
basePath: string,
|
||||
) => Promise<{ proceed: boolean; reason?: string; fixesApplied: string[] }>;
|
||||
) => Promise<{ proceed: boolean; reason?: string; issues?: string[]; fixesApplied: string[] }>;
|
||||
|
||||
// Worktree sync
|
||||
syncProjectRootToWorktree: (
|
||||
|
|
|
|||
|
|
@ -95,6 +95,45 @@ function checkMemoryPressure(): { pressured: boolean; heapMB: number; limitMB: n
|
|||
return { pressured: pct > MEMORY_PRESSURE_THRESHOLD, heapMB, limitMB, pct };
|
||||
}
|
||||
|
||||
/**
|
||||
* Tracks the dangling phase promise from the most recent timeout so the next
|
||||
* iteration can drain it before proceeding. Promise.race() rejects on timeout
|
||||
* but does not cancel the underlying async work; draining here prevents the
|
||||
* timed-out phase from mutating state concurrently with the next iteration.
|
||||
*/
|
||||
let _danglingPhasePromise: Promise<unknown> | null = null;
|
||||
|
||||
/**
|
||||
* Wrap a phase function with a timeout. Rejects with an Error whose message
|
||||
* starts with "phase-timeout:" so the blanket catch can handle it specially.
|
||||
* Stores the still-running phase promise in _danglingPhasePromise so the caller
|
||||
* can drain it before starting a new iteration.
|
||||
*/
|
||||
async function withPhaseTimeout<T>(
|
||||
name: string,
|
||||
fn: () => Promise<T>,
|
||||
timeoutMs: number,
|
||||
): Promise<T> {
|
||||
let timer: ReturnType<typeof setTimeout> | undefined;
|
||||
const phasePromise = fn();
|
||||
const timeout = new Promise<never>((_, reject) => {
|
||||
timer = setTimeout(
|
||||
() => reject(new Error(`phase-timeout:${name}`)),
|
||||
timeoutMs,
|
||||
);
|
||||
});
|
||||
try {
|
||||
return await Promise.race([phasePromise, timeout]);
|
||||
} catch (err) {
|
||||
if (err instanceof Error && err.message.startsWith("phase-timeout:")) {
|
||||
_danglingPhasePromise = phasePromise;
|
||||
}
|
||||
throw err;
|
||||
} finally {
|
||||
if (timer !== undefined) clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Main auto-mode execution loop. Iterates: derive → dispatch → guards →
|
||||
* runUnit → finalize → repeat. Exits when s.active becomes false or a
|
||||
|
|
@ -208,10 +247,21 @@ export async function autoLoop(
|
|||
break;
|
||||
}
|
||||
|
||||
// ── Drain any dangling phase promise before starting new work ──
|
||||
// Promise.race() on timeout does not cancel the underlying async fn; that
|
||||
// fn keeps running and may mutate state after the loop has advanced.
|
||||
// Awaiting its completion here ensures no concurrent state writes.
|
||||
if (_danglingPhasePromise !== null) {
|
||||
const dangling = _danglingPhasePromise;
|
||||
_danglingPhasePromise = null;
|
||||
try { await dangling; } catch { /* ignore — result is irrelevant */ }
|
||||
}
|
||||
|
||||
try {
|
||||
// ── Blanket try/catch: one bad iteration must not kill the session
|
||||
const prefs = deps.loadEffectiveSFPreferences()?.preferences;
|
||||
const uokFlags = resolveUokFlags(prefs);
|
||||
const phaseTimeoutMs = ((prefs?.auto_supervisor?.phase_timeout_minutes ?? 10) * 60_000);
|
||||
|
||||
// ── Check sidecar queue before deriveState ──
|
||||
let sidecarItem: SidecarItem | undefined;
|
||||
|
|
@ -410,8 +460,42 @@ export async function autoLoop(
|
|||
}
|
||||
|
||||
if (!sidecarItem) {
|
||||
// ── P4-A: Doctor issues → reassess escalation ─────────────────────
|
||||
// If the health gate detects issues that mention slice IDs (state
|
||||
// inconsistencies that reassessment can fix), queue reassess instead
|
||||
// of pausing auto-mode. This runs separately from the gate inside
|
||||
// runPreDispatch so we can intercept *before* the break path.
|
||||
try {
|
||||
const healthCheck = await deps.preDispatchHealthGate(s.basePath);
|
||||
if (!healthCheck.proceed && healthCheck.issues && healthCheck.issues.length > 0) {
|
||||
const sliceRefPattern = /\bS\d+\b/;
|
||||
const hasSliceRef = healthCheck.issues.some((issue) => sliceRefPattern.test(issue));
|
||||
if (hasSliceRef) {
|
||||
const sfState = await deps.deriveState(s.basePath);
|
||||
const mid = sfState.activeMilestone?.id;
|
||||
const midTitle = sfState.activeMilestone?.title ?? "";
|
||||
const sliceId = sfState.activeSlice?.id ?? "reassess";
|
||||
if (mid) {
|
||||
ctx.ui.notify(`Health issues detected with slice references — queuing reassess-roadmap instead of pausing.`, "warning");
|
||||
const { buildReassessRoadmapPrompt } = await import("../auto-prompts.js");
|
||||
const reassessPrompt = await buildReassessRoadmapPrompt(mid, midTitle, sliceId, s.basePath);
|
||||
s.sidecarQueue.unshift({
|
||||
kind: "hook",
|
||||
unitType: "reassess-roadmap",
|
||||
unitId: `${mid}/${sliceId}`,
|
||||
prompt: `## Doctor Health Issues\n\n${healthCheck.issues.map(i => `- ${i}`).join("\n")}\n\n${reassessPrompt}`,
|
||||
});
|
||||
finishTurn("retry");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Non-fatal — fall through to normal runPreDispatch path
|
||||
}
|
||||
|
||||
// ── Phase 1: Pre-dispatch ─────────────────────────────────────────
|
||||
const preDispatchResult = await runPreDispatch(ic, loopState);
|
||||
const preDispatchResult = await withPhaseTimeout("preDispatch", () => runPreDispatch(ic, loopState), phaseTimeoutMs / 2);
|
||||
deps.uokObserver?.onPhaseResult("pre-dispatch", preDispatchResult.action);
|
||||
if (preDispatchResult.action === "break") {
|
||||
finishTurn("stopped", "manual-attention", "pre-dispatch-break");
|
||||
|
|
@ -433,7 +517,7 @@ export async function autoLoop(
|
|||
}
|
||||
|
||||
// ── Phase 3: Dispatch ─────────────────────────────────────────────
|
||||
const dispatchResult = await runDispatch(ic, preData, loopState);
|
||||
const dispatchResult = await withPhaseTimeout("dispatch", () => runDispatch(ic, preData, loopState), phaseTimeoutMs);
|
||||
deps.uokObserver?.onPhaseResult("dispatch", dispatchResult.action);
|
||||
if (dispatchResult.action === "break") {
|
||||
finishTurn("stopped", "manual-attention", "dispatch-break");
|
||||
|
|
@ -481,7 +565,7 @@ export async function autoLoop(
|
|||
|
||||
// ── Phase 5: Finalize ───────────────────────────────────────────────
|
||||
|
||||
const finalizeResult = await runFinalize(ic, iterData, loopState, sidecarItem);
|
||||
const finalizeResult = await withPhaseTimeout("finalize", () => runFinalize(ic, iterData, loopState, sidecarItem), phaseTimeoutMs);
|
||||
deps.uokObserver?.onPhaseResult("finalize", finalizeResult.action, {
|
||||
unitType: iterData.unitType,
|
||||
unitId: iterData.unitId,
|
||||
|
|
@ -537,6 +621,19 @@ export async function autoLoop(
|
|||
break;
|
||||
}
|
||||
|
||||
// ── Phase timeout: log, increment counter, continue ──
|
||||
if (msg.startsWith("phase-timeout:")) {
|
||||
const phaseName = msg.slice("phase-timeout:".length);
|
||||
loopState.consecutiveFinalizeTimeouts++;
|
||||
ctx.ui.notify(
|
||||
`Phase "${phaseName}" timed out (${loopState.consecutiveFinalizeTimeouts} consecutive) — skipping iteration and continuing.`,
|
||||
"warning",
|
||||
);
|
||||
debugLog("autoLoop", { phase: "phase-timeout", phaseName, consecutiveFinalizeTimeouts: loopState.consecutiveFinalizeTimeouts, iteration });
|
||||
finishTurn("retry", "timeout", msg);
|
||||
continue;
|
||||
}
|
||||
|
||||
// ── Credential cooldown: wait and retry with bounded budget ──
|
||||
// A 429 triggers a 30s credential backoff in AuthStorage. If the SDK's
|
||||
// getApiKey() retries couldn't outlast the window, the error surfaces
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
// Used by state derivation and the status widget.
|
||||
// Pure functions, zero Pi dependencies - uses only Node built-ins.
|
||||
|
||||
import { promises as fs } from 'node:fs';
|
||||
import { promises as fs, readFileSync } from 'node:fs';
|
||||
import { resolve } from 'node:path';
|
||||
import { atomicWriteAsync } from './atomic-write.js';
|
||||
import { resolveMilestoneFile, relMilestoneFile, resolveSfRootFile } from './paths.js';
|
||||
|
|
@ -531,6 +531,35 @@ export function parseRequirementCounts(content: string | null): RequirementCount
|
|||
return counts;
|
||||
}
|
||||
|
||||
// ─── Deferred Requirement Parser ──────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Parse requirement entries under the "## Deferred" section of REQUIREMENTS.md.
|
||||
* Looks for milestone references in the form `→ M001`, `(deferred to M001)`,
|
||||
* or `(deferred to M001)` in the entry text.
|
||||
*/
|
||||
export function parseDeferredRequirements(basePath: string): Array<{ id: string; text: string; deferredTo: string | null }> {
|
||||
try {
|
||||
const reqPath = resolveSfRootFile(basePath, "REQUIREMENTS");
|
||||
if (!reqPath) return [];
|
||||
const content = readFileSync(reqPath, "utf-8");
|
||||
const deferredSection = extractSection(content, "Deferred", 2);
|
||||
if (!deferredSection) return [];
|
||||
const results: Array<{ id: string; text: string; deferredTo: string | null }> = [];
|
||||
for (const line of deferredSection.split("\n")) {
|
||||
const idMatch = line.match(/###\s+([A-Z][\w-]*\d+)\s+—\s+(.+)/);
|
||||
if (!idMatch) continue;
|
||||
const id = idMatch[1];
|
||||
const text = idMatch[2].trim();
|
||||
const deferMatch = text.match(/(?:→|deferred to)\s+(M\d+)/i);
|
||||
results.push({ id, text, deferredTo: deferMatch ? deferMatch[1] : null });
|
||||
}
|
||||
return results;
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Task Plan Must-Haves Parser ───────────────────────────────────────────
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -40,6 +40,8 @@ export interface GateDefinition {
|
|||
/** H3 section header used in the artifact the turn writes
|
||||
* (e.g. "Operational Readiness" for Q8 in the slice summary). */
|
||||
promptSection: string;
|
||||
/** Minimum word count required in the rationale when verdict is "omitted". 0 = no minimum. */
|
||||
minOmissionWords: number;
|
||||
}
|
||||
|
||||
export const GATE_REGISTRY = {
|
||||
|
|
@ -55,6 +57,7 @@ export const GATE_REGISTRY = {
|
|||
"If none apply, return verdict 'omitted' with rationale explaining why.",
|
||||
].join("\n"),
|
||||
promptSection: "Abuse Surface",
|
||||
minOmissionWords: 20,
|
||||
},
|
||||
Q4: {
|
||||
id: "Q4",
|
||||
|
|
@ -68,6 +71,7 @@ export const GATE_REGISTRY = {
|
|||
"If no existing requirements are affected, return verdict 'omitted'.",
|
||||
].join("\n"),
|
||||
promptSection: "Broken Promises",
|
||||
minOmissionWords: 0,
|
||||
},
|
||||
Q5: {
|
||||
id: "Q5",
|
||||
|
|
@ -81,6 +85,7 @@ export const GATE_REGISTRY = {
|
|||
"Return verdict 'omitted' only if the task has no external dependencies.",
|
||||
].join("\n"),
|
||||
promptSection: "Failure Modes",
|
||||
minOmissionWords: 15,
|
||||
},
|
||||
Q6: {
|
||||
id: "Q6",
|
||||
|
|
@ -93,6 +98,7 @@ export const GATE_REGISTRY = {
|
|||
"Return verdict 'omitted' if the task has no runtime load dimension.",
|
||||
].join("\n"),
|
||||
promptSection: "Load Profile",
|
||||
minOmissionWords: 10,
|
||||
},
|
||||
Q7: {
|
||||
id: "Q7",
|
||||
|
|
@ -105,6 +111,7 @@ export const GATE_REGISTRY = {
|
|||
"Return verdict 'omitted' only if the task has no meaningful negative surface.",
|
||||
].join("\n"),
|
||||
promptSection: "Negative Tests",
|
||||
minOmissionWords: 15,
|
||||
},
|
||||
Q8: {
|
||||
id: "Q8",
|
||||
|
|
@ -118,6 +125,7 @@ export const GATE_REGISTRY = {
|
|||
"Return verdict 'omitted' only for slices with no runtime behavior at all.",
|
||||
].join("\n"),
|
||||
promptSection: "Operational Readiness",
|
||||
minOmissionWords: 0,
|
||||
},
|
||||
MV01: {
|
||||
id: "MV01",
|
||||
|
|
@ -130,6 +138,7 @@ export const GATE_REGISTRY = {
|
|||
"Return verdict 'flag' if any criterion is unmet or unverifiable.",
|
||||
].join("\n"),
|
||||
promptSection: "Success Criteria Checklist",
|
||||
minOmissionWords: 0,
|
||||
},
|
||||
MV02: {
|
||||
id: "MV02",
|
||||
|
|
@ -142,6 +151,7 @@ export const GATE_REGISTRY = {
|
|||
"Flag missing artifacts and slices with outstanding follow-ups or known limitations.",
|
||||
].join("\n"),
|
||||
promptSection: "Slice Delivery Audit",
|
||||
minOmissionWords: 0,
|
||||
},
|
||||
MV03: {
|
||||
id: "MV03",
|
||||
|
|
@ -153,6 +163,7 @@ export const GATE_REGISTRY = {
|
|||
"Flag gaps where two slices were built in isolation with no integration evidence.",
|
||||
].join("\n"),
|
||||
promptSection: "Cross-Slice Integration",
|
||||
minOmissionWords: 0,
|
||||
},
|
||||
MV04: {
|
||||
id: "MV04",
|
||||
|
|
@ -164,6 +175,7 @@ export const GATE_REGISTRY = {
|
|||
"Flag requirements that slices claim to advance but no artifact proves.",
|
||||
].join("\n"),
|
||||
promptSection: "Requirement Coverage",
|
||||
minOmissionWords: 0,
|
||||
},
|
||||
} as const satisfies Record<GateId, GateDefinition>;
|
||||
|
||||
|
|
|
|||
|
|
@ -309,6 +309,7 @@ export async function extractMemoriesFromUnit(
|
|||
_extracting = true;
|
||||
_lastExtractionTime = now;
|
||||
|
||||
let userPrompt: string | undefined;
|
||||
try {
|
||||
// Read and parse activity file
|
||||
const raw = readFileSync(activityFile, 'utf-8');
|
||||
|
|
@ -326,7 +327,7 @@ export async function extractMemoriesFromUnit(
|
|||
}));
|
||||
|
||||
// Build prompts
|
||||
const userPrompt = buildExtractionUserPrompt(unitType, unitId, activeMemories, safeTranscript);
|
||||
userPrompt = buildExtractionUserPrompt(unitType, unitId, activeMemories, safeTranscript);
|
||||
|
||||
// Call LLM
|
||||
const response = await llmCallFn(EXTRACTION_SYSTEM, userPrompt);
|
||||
|
|
@ -345,7 +346,18 @@ export async function extractMemoriesFromUnit(
|
|||
// Mark unit as processed
|
||||
markUnitProcessed(unitKey, activityFile);
|
||||
} catch {
|
||||
// Non-fatal — memory extraction failure should never affect auto-mode
|
||||
// Retry once after a brief delay
|
||||
if (userPrompt) {
|
||||
try {
|
||||
await new Promise<void>((r) => setTimeout(r, 2000));
|
||||
const response2 = await llmCallFn(EXTRACTION_SYSTEM, userPrompt);
|
||||
const actions2 = parseMemoryResponse(response2);
|
||||
if (actions2.length > 0) applyMemoryActions(actions2, unitType, unitId);
|
||||
markUnitProcessed(unitKey, activityFile);
|
||||
} catch {
|
||||
// Non-fatal — memory extraction failure should never affect auto-mode
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
_extracting = false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import { deriveState } from "./state.js";
|
|||
import { resolveMilestoneFile, resolveSliceFile } from "./paths.js";
|
||||
import { findMilestoneIds } from "./guided-flow.js";
|
||||
import { isDbAvailable, getMilestoneSlices, getSliceTasks } from "./sf-db.js";
|
||||
import { getWorkerStatuses } from "./parallel-orchestrator.js";
|
||||
import type { MilestoneRegistryEntry } from "./types.js";
|
||||
|
||||
// ─── Types ───────────────────────────────────────────────────────────────────
|
||||
|
|
@ -179,9 +180,21 @@ export async function analyzeParallelEligibility(
|
|||
overlappingIds.add(overlap.mid2);
|
||||
}
|
||||
|
||||
const runningWorkerIds = new Set(
|
||||
getWorkerStatuses(basePath)
|
||||
.filter((w) => w.state === "running")
|
||||
.map((w) => w.milestoneId),
|
||||
);
|
||||
|
||||
for (const result of eligible) {
|
||||
if (overlappingIds.has(result.milestoneId)) {
|
||||
result.reason = "All dependencies satisfied. WARNING: has file overlap with another eligible milestone.";
|
||||
if (!overlappingIds.has(result.milestoneId)) continue;
|
||||
const overlap = fileOverlaps.find((o) => o.mid1 === result.milestoneId || o.mid2 === result.milestoneId);
|
||||
const overlappingWith = overlap ? (overlap.mid1 === result.milestoneId ? overlap.mid2 : overlap.mid1) : undefined;
|
||||
if (overlappingWith && runningWorkerIds.has(overlappingWith)) {
|
||||
result.eligible = false;
|
||||
result.reason = `File overlap with running milestone ${overlappingWith} — will dispatch after it completes.`;
|
||||
} else {
|
||||
result.reason = "All dependencies satisfied. NOTE: file overlap with another eligible milestone.";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1015,6 +1015,18 @@ export function refreshWorkerStatuses(
|
|||
state.totalCost += worker.cost;
|
||||
}
|
||||
|
||||
// Kill workers that have exceeded their timeout
|
||||
const workerTimeoutMs = ((state.config.worker_timeout_minutes ?? 120) * 60_000);
|
||||
for (const [, worker] of state.workers) {
|
||||
if (worker.state === "running" && Date.now() - worker.startedAt > workerTimeoutMs) {
|
||||
if (worker.process) worker.process.kill("SIGTERM");
|
||||
worker.cleanup?.();
|
||||
worker.cleanup = undefined;
|
||||
worker.state = "error";
|
||||
worker.process = null;
|
||||
}
|
||||
}
|
||||
|
||||
// If all workers are in a terminal state (error/stopped), the orchestration
|
||||
// is finished — deactivate and clean up so zombie workers don't persist.
|
||||
const allDead = [...state.workers.values()].every(
|
||||
|
|
|
|||
|
|
@ -193,6 +193,7 @@ export interface AutoSupervisorConfig {
|
|||
soft_timeout_minutes?: number;
|
||||
idle_timeout_minutes?: number;
|
||||
hard_timeout_minutes?: number;
|
||||
phase_timeout_minutes?: number;
|
||||
}
|
||||
|
||||
export interface RemoteQuestionsConfig {
|
||||
|
|
|
|||
|
|
@ -630,5 +630,6 @@ export function resolveParallelConfig(prefs: SFPreferences | undefined): import(
|
|||
merge_strategy: prefs?.parallel?.merge_strategy ?? "per-milestone",
|
||||
auto_merge: prefs?.parallel?.auto_merge ?? "confirm",
|
||||
worker_model: prefs?.parallel?.worker_model,
|
||||
worker_timeout_minutes: prefs?.parallel?.worker_timeout_minutes,
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@ A researcher explored the codebase and a planner decomposed the work — you are
|
|||
|
||||
{{runtimeContext}}
|
||||
|
||||
{{memoriesSection}}
|
||||
|
||||
{{phaseAnchorSection}}
|
||||
|
||||
{{resumeSection}}
|
||||
|
|
|
|||
|
|
@ -1001,6 +1001,40 @@ function migrateSchema(db: DbAdapter): void {
|
|||
});
|
||||
}
|
||||
|
||||
if (currentVersion < 17) {
|
||||
ensureColumn(db, "tasks", "verification_status", `ALTER TABLE tasks ADD COLUMN verification_status TEXT NOT NULL DEFAULT ''`);
|
||||
// Backfill verification_status from existing verification_evidence rows so the
|
||||
// prior-task guard works on databases upgraded mid-project (not just new ones).
|
||||
db.exec(`
|
||||
UPDATE tasks
|
||||
SET verification_status = CASE
|
||||
WHEN (SELECT COUNT(*) FROM verification_evidence ve
|
||||
WHERE ve.milestone_id = tasks.milestone_id
|
||||
AND ve.slice_id = tasks.slice_id
|
||||
AND ve.task_id = tasks.id) = 0
|
||||
THEN ''
|
||||
WHEN (SELECT COUNT(*) FROM verification_evidence ve
|
||||
WHERE ve.milestone_id = tasks.milestone_id
|
||||
AND ve.slice_id = tasks.slice_id
|
||||
AND ve.task_id = tasks.id
|
||||
AND ve.exit_code != 0) = 0
|
||||
THEN 'all_pass'
|
||||
WHEN (SELECT COUNT(*) FROM verification_evidence ve
|
||||
WHERE ve.milestone_id = tasks.milestone_id
|
||||
AND ve.slice_id = tasks.slice_id
|
||||
AND ve.task_id = tasks.id
|
||||
AND ve.exit_code = 0) > 0
|
||||
THEN 'partial'
|
||||
ELSE 'all_fail'
|
||||
END
|
||||
WHERE tasks.status IN ('complete', 'done')
|
||||
`);
|
||||
db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
|
||||
":version": 17,
|
||||
":applied_at": new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
|
||||
db.exec("COMMIT");
|
||||
} catch (err) {
|
||||
db.exec("ROLLBACK");
|
||||
|
|
@ -1599,17 +1633,18 @@ export function insertTask(t: {
|
|||
fullSummaryMd?: string;
|
||||
sequence?: number;
|
||||
planning?: Partial<TaskPlanningRecord>;
|
||||
verificationStatus?: "all_pass" | "partial" | "all_fail" | "";
|
||||
}): void {
|
||||
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
|
||||
currentDb.prepare(
|
||||
`INSERT INTO tasks (
|
||||
milestone_id, slice_id, id, title, status, one_liner, narrative,
|
||||
verification_result, duration, completed_at, blocker_discovered,
|
||||
verification_result, verification_status, duration, completed_at, blocker_discovered,
|
||||
deviations, known_issues, key_files, key_decisions, full_summary_md,
|
||||
description, estimate, files, verify, inputs, expected_output, observability_impact, sequence
|
||||
) VALUES (
|
||||
:milestone_id, :slice_id, :id, :title, :status, :one_liner, :narrative,
|
||||
:verification_result, :duration, :completed_at, :blocker_discovered,
|
||||
:verification_result, :verification_status, :duration, :completed_at, :blocker_discovered,
|
||||
:deviations, :known_issues, :key_files, :key_decisions, :full_summary_md,
|
||||
:description, :estimate, :files, :verify, :inputs, :expected_output, :observability_impact, :sequence
|
||||
)
|
||||
|
|
@ -1619,6 +1654,7 @@ export function insertTask(t: {
|
|||
one_liner = :one_liner,
|
||||
narrative = :narrative,
|
||||
verification_result = :verification_result,
|
||||
verification_status = :verification_status,
|
||||
duration = :duration,
|
||||
completed_at = :completed_at,
|
||||
blocker_discovered = :blocker_discovered,
|
||||
|
|
@ -1644,6 +1680,7 @@ export function insertTask(t: {
|
|||
":one_liner": t.oneLiner ?? "",
|
||||
":narrative": t.narrative ?? "",
|
||||
":verification_result": t.verificationResult ?? "",
|
||||
":verification_status": t.verificationStatus ?? "",
|
||||
":duration": t.duration ?? "",
|
||||
":completed_at": t.status === "done" || t.status === "complete" ? new Date().toISOString() : null,
|
||||
":blocker_discovered": t.blockerDiscovered ? 1 : 0,
|
||||
|
|
@ -1818,6 +1855,7 @@ export interface TaskRow {
|
|||
observability_impact: string;
|
||||
full_plan_md: string;
|
||||
sequence: number;
|
||||
verification_status?: string;
|
||||
}
|
||||
|
||||
function parseTaskArrayColumn(raw: unknown): string[] {
|
||||
|
|
@ -1888,6 +1926,7 @@ function rowToTask(row: Record<string, unknown>): TaskRow {
|
|||
observability_impact: (row["observability_impact"] as string) ?? "",
|
||||
full_plan_md: (row["full_plan_md"] as string) ?? "",
|
||||
sequence: (row["sequence"] as number) ?? 0,
|
||||
verification_status: (row["verification_status"] as string) ?? "",
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -111,6 +111,7 @@ function paramsToTaskRow(params: CompleteTaskParams, completedAt: string): TaskR
|
|||
observability_impact: "",
|
||||
full_plan_md: "",
|
||||
sequence: 0,
|
||||
verification_status: "",
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -178,6 +179,10 @@ export async function handleCompleteTask(
|
|||
// All guards passed — perform writes
|
||||
insertMilestone({ id: params.milestoneId, title: params.milestoneId });
|
||||
insertSlice({ id: params.sliceId, milestoneId: params.milestoneId, title: params.sliceId });
|
||||
const evidence = params.verificationEvidence ?? [];
|
||||
const verificationStatus = evidence.length === 0 ? "" :
|
||||
evidence.every((c) => c.exitCode === 0) ? "all_pass" :
|
||||
evidence.some((c) => c.exitCode === 0) ? "partial" : "all_fail";
|
||||
insertTask({
|
||||
id: params.taskId,
|
||||
sliceId: params.sliceId,
|
||||
|
|
@ -193,6 +198,7 @@ export async function handleCompleteTask(
|
|||
knownIssues: params.knownIssues ?? "None.",
|
||||
keyFiles: params.keyFiles ?? [],
|
||||
keyDecisions: params.keyDecisions ?? [],
|
||||
verificationStatus,
|
||||
});
|
||||
|
||||
for (const evidence of (params.verificationEvidence ?? [])) {
|
||||
|
|
@ -279,15 +285,24 @@ export async function handleCompleteTask(
|
|||
if (!def) continue;
|
||||
const field = taskGateFieldForId(def.id, params);
|
||||
const hasContent = typeof field === "string" && field.trim().length > 0;
|
||||
let verdict: import("../types.js").GateVerdict = hasContent ? "pass" : "omitted";
|
||||
let rationale = hasContent
|
||||
? `${def.promptSection} section populated in task summary`
|
||||
: `${def.promptSection} section left empty — recorded as omitted`;
|
||||
if (verdict === "omitted" && def.minOmissionWords > 0) {
|
||||
const wordCount = rationale.trim().split(/\s+/).filter(Boolean).length;
|
||||
if (wordCount < def.minOmissionWords) {
|
||||
verdict = "flag";
|
||||
rationale = `[⚠ Rationale too short — ${wordCount} words, ${def.minOmissionWords} required for omission] ${rationale}`;
|
||||
}
|
||||
}
|
||||
saveGateResult({
|
||||
milestoneId: params.milestoneId,
|
||||
sliceId: params.sliceId,
|
||||
taskId: params.taskId,
|
||||
gateId: def.id,
|
||||
verdict: hasContent ? "pass" : "omitted",
|
||||
rationale: hasContent
|
||||
? `${def.promptSection} section populated in task summary`
|
||||
: `${def.promptSection} section left empty — recorded as omitted`,
|
||||
verdict,
|
||||
rationale,
|
||||
findings: hasContent ? (field as string).trim() : "",
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -449,13 +449,26 @@ export async function executeSaveGateResult(
|
|||
}
|
||||
|
||||
try {
|
||||
let effectiveVerdict: string = params.verdict;
|
||||
let effectiveRationale = params.rationale;
|
||||
if (effectiveVerdict === "omitted") {
|
||||
const def = (GATE_REGISTRY as Record<string, { minOmissionWords?: number }>)[params.gateId];
|
||||
const minWords = def?.minOmissionWords ?? 0;
|
||||
if (minWords > 0) {
|
||||
const wordCount = effectiveRationale.trim().split(/\s+/).filter(Boolean).length;
|
||||
if (wordCount < minWords) {
|
||||
effectiveVerdict = "flag";
|
||||
effectiveRationale = `[⚠ Rationale too short — ${wordCount} words, ${minWords} required for omission] ${effectiveRationale}`;
|
||||
}
|
||||
}
|
||||
}
|
||||
saveGateResult({
|
||||
milestoneId: params.milestoneId,
|
||||
sliceId: params.sliceId,
|
||||
gateId: params.gateId,
|
||||
taskId: params.taskId ?? "",
|
||||
verdict: params.verdict,
|
||||
rationale: params.rationale,
|
||||
verdict: effectiveVerdict as import("../types.js").GateVerdict,
|
||||
rationale: effectiveRationale,
|
||||
findings: params.findings ?? "",
|
||||
});
|
||||
invalidateStateCache();
|
||||
|
|
|
|||
|
|
@ -455,6 +455,8 @@ export interface ParallelConfig {
|
|||
auto_merge: AutoMergeMode;
|
||||
/** Optional model override for parallel milestone workers (e.g. "claude-haiku-4-5"). */
|
||||
worker_model?: string;
|
||||
/** Minutes before a running worker is killed as hung. Default: 120. */
|
||||
worker_timeout_minutes?: number;
|
||||
}
|
||||
|
||||
// ─── Reactive Task Execution Types ───────────────────────────────────────
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue