Merge auto-hardening: 10 structural fixes for reliable multi-day auto operation

Merges the auto-hardening branch which implements all audit-identified structural
holes in the SF auto-mode loop, memory, verification, health, and parallel systems.

See individual commits for detailed change descriptions.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Mikael Hugo 2026-04-18 16:48:38 +02:00
commit 9724cb437a
16 changed files with 319 additions and 19 deletions

View file

@ -12,8 +12,8 @@
import type { SFState } from "./types.js";
import type { SFPreferences } from "./preferences.js";
import type { UatType } from "./files.js";
import { loadFile, extractUatType, loadActiveOverrides } from "./files.js";
import { isDbAvailable, getMilestoneSlices, getPendingGates, markAllGatesOmitted, getMilestone } from "./sf-db.js";
import { loadFile, extractUatType, loadActiveOverrides, parseDeferredRequirements } from "./files.js";
import { isDbAvailable, getMilestoneSlices, getPendingGates, markAllGatesOmitted, getMilestone, getSliceTasks } from "./sf-db.js";
import { extractVerdict, isAcceptableUatVerdict } from "./verdict-parser.js";
import {
@ -684,6 +684,30 @@ export const DISPATCH_RULES: DispatchRule[] = [
return null;
},
},
{
name: "executing → prior-task verification all-fail guard",
match: async ({ state, mid }) => {
if (state.phase !== "executing" || !state.activeTask) return null;
if (!state.activeSlice) return null;
if (!isDbAvailable()) return null;
const sid = state.activeSlice.id;
const tid = state.activeTask.id;
const sliceTasks = getSliceTasks(mid, sid);
const sortedTasks = sliceTasks.sort((a, b) => (a.sequence ?? 0) - (b.sequence ?? 0) || a.id.localeCompare(b.id));
const currentIdx = sortedTasks.findIndex((t) => t.id === tid);
if (currentIdx > 0) {
const priorTask = sortedTasks[currentIdx - 1];
if (priorTask?.verification_status === "all_fail") {
return {
action: "stop",
reason: `Task ${priorTask.id} in slice ${sid} had all verification checks fail — stopping before dispatching ${tid}. Fix verification in the prior task or re-run it.`,
level: "error",
};
}
}
return null;
},
},
{
name: "executing → execute-task",
match: async ({ state, mid, basePath }) => {
@ -843,6 +867,18 @@ export const DISPATCH_RULES: DispatchRule[] = [
logWarning("dispatch", `verification class check failed: ${err instanceof Error ? err.message : String(err)}`);
}
// P5-A: Advisory check for deferred requirements targeting this milestone
try {
const deferred = parseDeferredRequirements(basePath);
const unaddressed = deferred.filter((r) => r.deferredTo === mid);
if (unaddressed.length > 0) {
const ids = unaddressed.map((r) => r.id).join(", ");
logWarning("dispatch", `Milestone ${mid} has ${unaddressed.length} deferred requirement(s) (${ids}) that were not validated. Review before completing.`);
}
} catch {
// Non-fatal advisory
}
return {
action: "dispatch",
unitType: "complete-milestone",

View file

@ -36,6 +36,18 @@ import { readPhaseAnchor, formatAnchorForPrompt } from "./phase-anchor.js";
import { logWarning } from "./workflow-logger.js";
import { inlineGraphSubgraph } from "./graph-context.js";
// ─── Memory Injection ─────────────────────────────────────────────────────────
async function buildMemoriesBlock(limit = 5): Promise<string> {
try {
const { getActiveMemoriesRanked, formatMemoriesForPrompt } = await import("./memory-store.js");
const memories = getActiveMemoriesRanked(limit);
return formatMemoriesForPrompt(memories);
} catch {
return "";
}
}
// ─── Preamble Cap ─────────────────────────────────────────────────────────────
const MAX_PREAMBLE_CHARS = 30_000;
@ -1095,6 +1107,8 @@ export async function buildPlanMilestonePrompt(mid: string, midTitle: string, ba
}
const knowledgeInlinePM = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge");
if (knowledgeInlinePM) inlined.push(knowledgeInlinePM);
const memoriesBlockPM = await buildMemoriesBlock(5);
if (memoriesBlockPM) inlined.push(memoriesBlockPM);
inlined.push(inlineTemplate("roadmap", "Roadmap"));
if (inlineLevel === "full") {
inlined.push(inlineTemplate("decisions", "Decisions"));
@ -1413,6 +1427,7 @@ export async function buildExecuteTaskPrompt(
: "";
const phaseAnchorSection = planAnchor ? formatAnchorForPrompt(planAnchor) : "";
const memoriesSection = await buildMemoriesBlock(3);
// Task-scoped gates owned by execute-task (Q5/Q6/Q7). Pull only the
// gates that plan-slice actually seeded for this task — tasks with no
@ -1428,6 +1443,7 @@ export async function buildExecuteTaskPrompt(
return loadPrompt("execute-task", {
overridesSection,
runtimeContext,
memoriesSection,
phaseAnchorSection,
workingDirectory: base,
milestoneId: mid, sliceId: sid, sliceTitle: sTitle, taskId: tid, taskTitle: tTitle,

View file

@ -64,7 +64,7 @@ export interface LoopDeps {
// Pre-dispatch health gate
preDispatchHealthGate: (
basePath: string,
) => Promise<{ proceed: boolean; reason?: string; fixesApplied: string[] }>;
) => Promise<{ proceed: boolean; reason?: string; issues?: string[]; fixesApplied: string[] }>;
// Worktree sync
syncProjectRootToWorktree: (

View file

@ -95,6 +95,45 @@ function checkMemoryPressure(): { pressured: boolean; heapMB: number; limitMB: n
return { pressured: pct > MEMORY_PRESSURE_THRESHOLD, heapMB, limitMB, pct };
}
/**
* Tracks the dangling phase promise from the most recent timeout so the next
* iteration can drain it before proceeding. Promise.race() rejects on timeout
* but does not cancel the underlying async work; draining here prevents the
* timed-out phase from mutating state concurrently with the next iteration.
*/
let _danglingPhasePromise: Promise<unknown> | null = null;
/**
* Wrap a phase function with a timeout. Rejects with an Error whose message
* starts with "phase-timeout:" so the blanket catch can handle it specially.
* Stores the still-running phase promise in _danglingPhasePromise so the caller
* can drain it before starting a new iteration.
*/
async function withPhaseTimeout<T>(
name: string,
fn: () => Promise<T>,
timeoutMs: number,
): Promise<T> {
let timer: ReturnType<typeof setTimeout> | undefined;
const phasePromise = fn();
const timeout = new Promise<never>((_, reject) => {
timer = setTimeout(
() => reject(new Error(`phase-timeout:${name}`)),
timeoutMs,
);
});
try {
return await Promise.race([phasePromise, timeout]);
} catch (err) {
if (err instanceof Error && err.message.startsWith("phase-timeout:")) {
_danglingPhasePromise = phasePromise;
}
throw err;
} finally {
if (timer !== undefined) clearTimeout(timer);
}
}
/**
* Main auto-mode execution loop. Iterates: derive dispatch guards
* runUnit finalize repeat. Exits when s.active becomes false or a
@ -208,10 +247,21 @@ export async function autoLoop(
break;
}
// ── Drain any dangling phase promise before starting new work ──
// Promise.race() on timeout does not cancel the underlying async fn; that
// fn keeps running and may mutate state after the loop has advanced.
// Awaiting its completion here ensures no concurrent state writes.
if (_danglingPhasePromise !== null) {
const dangling = _danglingPhasePromise;
_danglingPhasePromise = null;
try { await dangling; } catch { /* ignore — result is irrelevant */ }
}
try {
// ── Blanket try/catch: one bad iteration must not kill the session
const prefs = deps.loadEffectiveSFPreferences()?.preferences;
const uokFlags = resolveUokFlags(prefs);
const phaseTimeoutMs = ((prefs?.auto_supervisor?.phase_timeout_minutes ?? 10) * 60_000);
// ── Check sidecar queue before deriveState ──
let sidecarItem: SidecarItem | undefined;
@ -410,8 +460,42 @@ export async function autoLoop(
}
if (!sidecarItem) {
// ── P4-A: Doctor issues → reassess escalation ─────────────────────
// If the health gate detects issues that mention slice IDs (state
// inconsistencies that reassessment can fix), queue reassess instead
// of pausing auto-mode. This runs separately from the gate inside
// runPreDispatch so we can intercept *before* the break path.
try {
const healthCheck = await deps.preDispatchHealthGate(s.basePath);
if (!healthCheck.proceed && healthCheck.issues && healthCheck.issues.length > 0) {
const sliceRefPattern = /\bS\d+\b/;
const hasSliceRef = healthCheck.issues.some((issue) => sliceRefPattern.test(issue));
if (hasSliceRef) {
const sfState = await deps.deriveState(s.basePath);
const mid = sfState.activeMilestone?.id;
const midTitle = sfState.activeMilestone?.title ?? "";
const sliceId = sfState.activeSlice?.id ?? "reassess";
if (mid) {
ctx.ui.notify(`Health issues detected with slice references — queuing reassess-roadmap instead of pausing.`, "warning");
const { buildReassessRoadmapPrompt } = await import("../auto-prompts.js");
const reassessPrompt = await buildReassessRoadmapPrompt(mid, midTitle, sliceId, s.basePath);
s.sidecarQueue.unshift({
kind: "hook",
unitType: "reassess-roadmap",
unitId: `${mid}/${sliceId}`,
prompt: `## Doctor Health Issues\n\n${healthCheck.issues.map(i => `- ${i}`).join("\n")}\n\n${reassessPrompt}`,
});
finishTurn("retry");
continue;
}
}
}
} catch {
// Non-fatal — fall through to normal runPreDispatch path
}
// ── Phase 1: Pre-dispatch ─────────────────────────────────────────
const preDispatchResult = await runPreDispatch(ic, loopState);
const preDispatchResult = await withPhaseTimeout("preDispatch", () => runPreDispatch(ic, loopState), phaseTimeoutMs / 2);
deps.uokObserver?.onPhaseResult("pre-dispatch", preDispatchResult.action);
if (preDispatchResult.action === "break") {
finishTurn("stopped", "manual-attention", "pre-dispatch-break");
@ -433,7 +517,7 @@ export async function autoLoop(
}
// ── Phase 3: Dispatch ─────────────────────────────────────────────
const dispatchResult = await runDispatch(ic, preData, loopState);
const dispatchResult = await withPhaseTimeout("dispatch", () => runDispatch(ic, preData, loopState), phaseTimeoutMs);
deps.uokObserver?.onPhaseResult("dispatch", dispatchResult.action);
if (dispatchResult.action === "break") {
finishTurn("stopped", "manual-attention", "dispatch-break");
@ -481,7 +565,7 @@ export async function autoLoop(
// ── Phase 5: Finalize ───────────────────────────────────────────────
const finalizeResult = await runFinalize(ic, iterData, loopState, sidecarItem);
const finalizeResult = await withPhaseTimeout("finalize", () => runFinalize(ic, iterData, loopState, sidecarItem), phaseTimeoutMs);
deps.uokObserver?.onPhaseResult("finalize", finalizeResult.action, {
unitType: iterData.unitType,
unitId: iterData.unitId,
@ -537,6 +621,19 @@ export async function autoLoop(
break;
}
// ── Phase timeout: log, increment counter, continue ──
if (msg.startsWith("phase-timeout:")) {
const phaseName = msg.slice("phase-timeout:".length);
loopState.consecutiveFinalizeTimeouts++;
ctx.ui.notify(
`Phase "${phaseName}" timed out (${loopState.consecutiveFinalizeTimeouts} consecutive) — skipping iteration and continuing.`,
"warning",
);
debugLog("autoLoop", { phase: "phase-timeout", phaseName, consecutiveFinalizeTimeouts: loopState.consecutiveFinalizeTimeouts, iteration });
finishTurn("retry", "timeout", msg);
continue;
}
// ── Credential cooldown: wait and retry with bounded budget ──
// A 429 triggers a 30s credential backoff in AuthStorage. If the SDK's
// getApiKey() retries couldn't outlast the window, the error surfaces

View file

@ -3,7 +3,7 @@
// Used by state derivation and the status widget.
// Pure functions, zero Pi dependencies - uses only Node built-ins.
import { promises as fs } from 'node:fs';
import { promises as fs, readFileSync } from 'node:fs';
import { resolve } from 'node:path';
import { atomicWriteAsync } from './atomic-write.js';
import { resolveMilestoneFile, relMilestoneFile, resolveSfRootFile } from './paths.js';
@ -531,6 +531,35 @@ export function parseRequirementCounts(content: string | null): RequirementCount
return counts;
}
// ─── Deferred Requirement Parser ──────────────────────────────────────────
/**
* Parse requirement entries under the "## Deferred" section of REQUIREMENTS.md.
* Looks for milestone references in the form `→ M001`, `(deferred to M001)`,
* or `(deferred to M001)` in the entry text.
*/
export function parseDeferredRequirements(basePath: string): Array<{ id: string; text: string; deferredTo: string | null }> {
try {
const reqPath = resolveSfRootFile(basePath, "REQUIREMENTS");
if (!reqPath) return [];
const content = readFileSync(reqPath, "utf-8");
const deferredSection = extractSection(content, "Deferred", 2);
if (!deferredSection) return [];
const results: Array<{ id: string; text: string; deferredTo: string | null }> = [];
for (const line of deferredSection.split("\n")) {
const idMatch = line.match(/###\s+([A-Z][\w-]*\d+)\s+—\s+(.+)/);
if (!idMatch) continue;
const id = idMatch[1];
const text = idMatch[2].trim();
const deferMatch = text.match(/(?:→|deferred to)\s+(M\d+)/i);
results.push({ id, text, deferredTo: deferMatch ? deferMatch[1] : null });
}
return results;
} catch {
return [];
}
}
// ─── Task Plan Must-Haves Parser ───────────────────────────────────────────
/**

View file

@ -40,6 +40,8 @@ export interface GateDefinition {
/** H3 section header used in the artifact the turn writes
* (e.g. "Operational Readiness" for Q8 in the slice summary). */
promptSection: string;
/** Minimum word count required in the rationale when verdict is "omitted". 0 = no minimum. */
minOmissionWords: number;
}
export const GATE_REGISTRY = {
@ -55,6 +57,7 @@ export const GATE_REGISTRY = {
"If none apply, return verdict 'omitted' with rationale explaining why.",
].join("\n"),
promptSection: "Abuse Surface",
minOmissionWords: 20,
},
Q4: {
id: "Q4",
@ -68,6 +71,7 @@ export const GATE_REGISTRY = {
"If no existing requirements are affected, return verdict 'omitted'.",
].join("\n"),
promptSection: "Broken Promises",
minOmissionWords: 0,
},
Q5: {
id: "Q5",
@ -81,6 +85,7 @@ export const GATE_REGISTRY = {
"Return verdict 'omitted' only if the task has no external dependencies.",
].join("\n"),
promptSection: "Failure Modes",
minOmissionWords: 15,
},
Q6: {
id: "Q6",
@ -93,6 +98,7 @@ export const GATE_REGISTRY = {
"Return verdict 'omitted' if the task has no runtime load dimension.",
].join("\n"),
promptSection: "Load Profile",
minOmissionWords: 10,
},
Q7: {
id: "Q7",
@ -105,6 +111,7 @@ export const GATE_REGISTRY = {
"Return verdict 'omitted' only if the task has no meaningful negative surface.",
].join("\n"),
promptSection: "Negative Tests",
minOmissionWords: 15,
},
Q8: {
id: "Q8",
@ -118,6 +125,7 @@ export const GATE_REGISTRY = {
"Return verdict 'omitted' only for slices with no runtime behavior at all.",
].join("\n"),
promptSection: "Operational Readiness",
minOmissionWords: 0,
},
MV01: {
id: "MV01",
@ -130,6 +138,7 @@ export const GATE_REGISTRY = {
"Return verdict 'flag' if any criterion is unmet or unverifiable.",
].join("\n"),
promptSection: "Success Criteria Checklist",
minOmissionWords: 0,
},
MV02: {
id: "MV02",
@ -142,6 +151,7 @@ export const GATE_REGISTRY = {
"Flag missing artifacts and slices with outstanding follow-ups or known limitations.",
].join("\n"),
promptSection: "Slice Delivery Audit",
minOmissionWords: 0,
},
MV03: {
id: "MV03",
@ -153,6 +163,7 @@ export const GATE_REGISTRY = {
"Flag gaps where two slices were built in isolation with no integration evidence.",
].join("\n"),
promptSection: "Cross-Slice Integration",
minOmissionWords: 0,
},
MV04: {
id: "MV04",
@ -164,6 +175,7 @@ export const GATE_REGISTRY = {
"Flag requirements that slices claim to advance but no artifact proves.",
].join("\n"),
promptSection: "Requirement Coverage",
minOmissionWords: 0,
},
} as const satisfies Record<GateId, GateDefinition>;

View file

@ -309,6 +309,7 @@ export async function extractMemoriesFromUnit(
_extracting = true;
_lastExtractionTime = now;
let userPrompt: string | undefined;
try {
// Read and parse activity file
const raw = readFileSync(activityFile, 'utf-8');
@ -326,7 +327,7 @@ export async function extractMemoriesFromUnit(
}));
// Build prompts
const userPrompt = buildExtractionUserPrompt(unitType, unitId, activeMemories, safeTranscript);
userPrompt = buildExtractionUserPrompt(unitType, unitId, activeMemories, safeTranscript);
// Call LLM
const response = await llmCallFn(EXTRACTION_SYSTEM, userPrompt);
@ -345,7 +346,18 @@ export async function extractMemoriesFromUnit(
// Mark unit as processed
markUnitProcessed(unitKey, activityFile);
} catch {
// Non-fatal — memory extraction failure should never affect auto-mode
// Retry once after a brief delay
if (userPrompt) {
try {
await new Promise<void>((r) => setTimeout(r, 2000));
const response2 = await llmCallFn(EXTRACTION_SYSTEM, userPrompt);
const actions2 = parseMemoryResponse(response2);
if (actions2.length > 0) applyMemoryActions(actions2, unitType, unitId);
markUnitProcessed(unitKey, activityFile);
} catch {
// Non-fatal — memory extraction failure should never affect auto-mode
}
}
} finally {
_extracting = false;
}

View file

@ -9,6 +9,7 @@ import { deriveState } from "./state.js";
import { resolveMilestoneFile, resolveSliceFile } from "./paths.js";
import { findMilestoneIds } from "./guided-flow.js";
import { isDbAvailable, getMilestoneSlices, getSliceTasks } from "./sf-db.js";
import { getWorkerStatuses } from "./parallel-orchestrator.js";
import type { MilestoneRegistryEntry } from "./types.js";
// ─── Types ───────────────────────────────────────────────────────────────────
@ -179,9 +180,21 @@ export async function analyzeParallelEligibility(
overlappingIds.add(overlap.mid2);
}
const runningWorkerIds = new Set(
getWorkerStatuses(basePath)
.filter((w) => w.state === "running")
.map((w) => w.milestoneId),
);
for (const result of eligible) {
if (overlappingIds.has(result.milestoneId)) {
result.reason = "All dependencies satisfied. WARNING: has file overlap with another eligible milestone.";
if (!overlappingIds.has(result.milestoneId)) continue;
const overlap = fileOverlaps.find((o) => o.mid1 === result.milestoneId || o.mid2 === result.milestoneId);
const overlappingWith = overlap ? (overlap.mid1 === result.milestoneId ? overlap.mid2 : overlap.mid1) : undefined;
if (overlappingWith && runningWorkerIds.has(overlappingWith)) {
result.eligible = false;
result.reason = `File overlap with running milestone ${overlappingWith} — will dispatch after it completes.`;
} else {
result.reason = "All dependencies satisfied. NOTE: file overlap with another eligible milestone.";
}
}

View file

@ -1015,6 +1015,18 @@ export function refreshWorkerStatuses(
state.totalCost += worker.cost;
}
// Kill workers that have exceeded their timeout
const workerTimeoutMs = ((state.config.worker_timeout_minutes ?? 120) * 60_000);
for (const [, worker] of state.workers) {
if (worker.state === "running" && Date.now() - worker.startedAt > workerTimeoutMs) {
if (worker.process) worker.process.kill("SIGTERM");
worker.cleanup?.();
worker.cleanup = undefined;
worker.state = "error";
worker.process = null;
}
}
// If all workers are in a terminal state (error/stopped), the orchestration
// is finished — deactivate and clean up so zombie workers don't persist.
const allDead = [...state.workers.values()].every(

View file

@ -193,6 +193,7 @@ export interface AutoSupervisorConfig {
soft_timeout_minutes?: number;
idle_timeout_minutes?: number;
hard_timeout_minutes?: number;
phase_timeout_minutes?: number;
}
export interface RemoteQuestionsConfig {

View file

@ -630,5 +630,6 @@ export function resolveParallelConfig(prefs: SFPreferences | undefined): import(
merge_strategy: prefs?.parallel?.merge_strategy ?? "per-milestone",
auto_merge: prefs?.parallel?.auto_merge ?? "confirm",
worker_model: prefs?.parallel?.worker_model,
worker_timeout_minutes: prefs?.parallel?.worker_timeout_minutes,
};
}

View file

@ -12,6 +12,8 @@ A researcher explored the codebase and a planner decomposed the work — you are
{{runtimeContext}}
{{memoriesSection}}
{{phaseAnchorSection}}
{{resumeSection}}

View file

@ -1001,6 +1001,40 @@ function migrateSchema(db: DbAdapter): void {
});
}
if (currentVersion < 17) {
ensureColumn(db, "tasks", "verification_status", `ALTER TABLE tasks ADD COLUMN verification_status TEXT NOT NULL DEFAULT ''`);
// Backfill verification_status from existing verification_evidence rows so the
// prior-task guard works on databases upgraded mid-project (not just new ones).
db.exec(`
UPDATE tasks
SET verification_status = CASE
WHEN (SELECT COUNT(*) FROM verification_evidence ve
WHERE ve.milestone_id = tasks.milestone_id
AND ve.slice_id = tasks.slice_id
AND ve.task_id = tasks.id) = 0
THEN ''
WHEN (SELECT COUNT(*) FROM verification_evidence ve
WHERE ve.milestone_id = tasks.milestone_id
AND ve.slice_id = tasks.slice_id
AND ve.task_id = tasks.id
AND ve.exit_code != 0) = 0
THEN 'all_pass'
WHEN (SELECT COUNT(*) FROM verification_evidence ve
WHERE ve.milestone_id = tasks.milestone_id
AND ve.slice_id = tasks.slice_id
AND ve.task_id = tasks.id
AND ve.exit_code = 0) > 0
THEN 'partial'
ELSE 'all_fail'
END
WHERE tasks.status IN ('complete', 'done')
`);
db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
":version": 17,
":applied_at": new Date().toISOString(),
});
}
db.exec("COMMIT");
} catch (err) {
db.exec("ROLLBACK");
@ -1599,17 +1633,18 @@ export function insertTask(t: {
fullSummaryMd?: string;
sequence?: number;
planning?: Partial<TaskPlanningRecord>;
verificationStatus?: "all_pass" | "partial" | "all_fail" | "";
}): void {
if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
currentDb.prepare(
`INSERT INTO tasks (
milestone_id, slice_id, id, title, status, one_liner, narrative,
verification_result, duration, completed_at, blocker_discovered,
verification_result, verification_status, duration, completed_at, blocker_discovered,
deviations, known_issues, key_files, key_decisions, full_summary_md,
description, estimate, files, verify, inputs, expected_output, observability_impact, sequence
) VALUES (
:milestone_id, :slice_id, :id, :title, :status, :one_liner, :narrative,
:verification_result, :duration, :completed_at, :blocker_discovered,
:verification_result, :verification_status, :duration, :completed_at, :blocker_discovered,
:deviations, :known_issues, :key_files, :key_decisions, :full_summary_md,
:description, :estimate, :files, :verify, :inputs, :expected_output, :observability_impact, :sequence
)
@ -1619,6 +1654,7 @@ export function insertTask(t: {
one_liner = :one_liner,
narrative = :narrative,
verification_result = :verification_result,
verification_status = :verification_status,
duration = :duration,
completed_at = :completed_at,
blocker_discovered = :blocker_discovered,
@ -1644,6 +1680,7 @@ export function insertTask(t: {
":one_liner": t.oneLiner ?? "",
":narrative": t.narrative ?? "",
":verification_result": t.verificationResult ?? "",
":verification_status": t.verificationStatus ?? "",
":duration": t.duration ?? "",
":completed_at": t.status === "done" || t.status === "complete" ? new Date().toISOString() : null,
":blocker_discovered": t.blockerDiscovered ? 1 : 0,
@ -1818,6 +1855,7 @@ export interface TaskRow {
observability_impact: string;
full_plan_md: string;
sequence: number;
verification_status?: string;
}
function parseTaskArrayColumn(raw: unknown): string[] {
@ -1888,6 +1926,7 @@ function rowToTask(row: Record<string, unknown>): TaskRow {
observability_impact: (row["observability_impact"] as string) ?? "",
full_plan_md: (row["full_plan_md"] as string) ?? "",
sequence: (row["sequence"] as number) ?? 0,
verification_status: (row["verification_status"] as string) ?? "",
};
}

View file

@ -111,6 +111,7 @@ function paramsToTaskRow(params: CompleteTaskParams, completedAt: string): TaskR
observability_impact: "",
full_plan_md: "",
sequence: 0,
verification_status: "",
};
}
@ -178,6 +179,10 @@ export async function handleCompleteTask(
// All guards passed — perform writes
insertMilestone({ id: params.milestoneId, title: params.milestoneId });
insertSlice({ id: params.sliceId, milestoneId: params.milestoneId, title: params.sliceId });
const evidence = params.verificationEvidence ?? [];
const verificationStatus = evidence.length === 0 ? "" :
evidence.every((c) => c.exitCode === 0) ? "all_pass" :
evidence.some((c) => c.exitCode === 0) ? "partial" : "all_fail";
insertTask({
id: params.taskId,
sliceId: params.sliceId,
@ -193,6 +198,7 @@ export async function handleCompleteTask(
knownIssues: params.knownIssues ?? "None.",
keyFiles: params.keyFiles ?? [],
keyDecisions: params.keyDecisions ?? [],
verificationStatus,
});
for (const evidence of (params.verificationEvidence ?? [])) {
@ -279,15 +285,24 @@ export async function handleCompleteTask(
if (!def) continue;
const field = taskGateFieldForId(def.id, params);
const hasContent = typeof field === "string" && field.trim().length > 0;
let verdict: import("../types.js").GateVerdict = hasContent ? "pass" : "omitted";
let rationale = hasContent
? `${def.promptSection} section populated in task summary`
: `${def.promptSection} section left empty — recorded as omitted`;
if (verdict === "omitted" && def.minOmissionWords > 0) {
const wordCount = rationale.trim().split(/\s+/).filter(Boolean).length;
if (wordCount < def.minOmissionWords) {
verdict = "flag";
rationale = `[⚠ Rationale too short — ${wordCount} words, ${def.minOmissionWords} required for omission] ${rationale}`;
}
}
saveGateResult({
milestoneId: params.milestoneId,
sliceId: params.sliceId,
taskId: params.taskId,
gateId: def.id,
verdict: hasContent ? "pass" : "omitted",
rationale: hasContent
? `${def.promptSection} section populated in task summary`
: `${def.promptSection} section left empty — recorded as omitted`,
verdict,
rationale,
findings: hasContent ? (field as string).trim() : "",
});
}

View file

@ -449,13 +449,26 @@ export async function executeSaveGateResult(
}
try {
let effectiveVerdict: string = params.verdict;
let effectiveRationale = params.rationale;
if (effectiveVerdict === "omitted") {
const def = (GATE_REGISTRY as Record<string, { minOmissionWords?: number }>)[params.gateId];
const minWords = def?.minOmissionWords ?? 0;
if (minWords > 0) {
const wordCount = effectiveRationale.trim().split(/\s+/).filter(Boolean).length;
if (wordCount < minWords) {
effectiveVerdict = "flag";
effectiveRationale = `[⚠ Rationale too short — ${wordCount} words, ${minWords} required for omission] ${effectiveRationale}`;
}
}
}
saveGateResult({
milestoneId: params.milestoneId,
sliceId: params.sliceId,
gateId: params.gateId,
taskId: params.taskId ?? "",
verdict: params.verdict,
rationale: params.rationale,
verdict: effectiveVerdict as import("../types.js").GateVerdict,
rationale: effectiveRationale,
findings: params.findings ?? "",
});
invalidateStateCache();

View file

@ -455,6 +455,8 @@ export interface ParallelConfig {
auto_merge: AutoMergeMode;
/** Optional model override for parallel milestone workers (e.g. "claude-haiku-4-5"). */
worker_model?: string;
/** Minutes before a running worker is killed as hung. Default: 120. */
worker_timeout_minutes?: number;
}
// ─── Reactive Task Execution Types ───────────────────────────────────────