Merge auto-hardening: 10 structural fixes for reliable multi-day auto operation

Merges the auto-hardening branch which implements all audit-identified structural holes in the SF auto-mode loop, memory, verification, health, and parallel systems. See individual commits for detailed change descriptions. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-18 16:48:38 +02:00 · 2026-04-18 16:48:38 +02:00 · 9724cb437a
commit 9724cb437a
parent 4ee188e43e 9a04fef925
16 changed files with 319 additions and 19 deletions
--- a/src/resources/extensions/sf/auto-dispatch.ts
+++ b/src/resources/extensions/sf/auto-dispatch.ts
@ -12,8 +12,8 @@
 import type { SFState } from "./types.js";
 import type { SFPreferences } from "./preferences.js";
 import type { UatType } from "./files.js";
-import { loadFile, extractUatType, loadActiveOverrides } from "./files.js";
-import { isDbAvailable, getMilestoneSlices, getPendingGates, markAllGatesOmitted, getMilestone } from "./sf-db.js";
+import { loadFile, extractUatType, loadActiveOverrides, parseDeferredRequirements } from "./files.js";
+import { isDbAvailable, getMilestoneSlices, getPendingGates, markAllGatesOmitted, getMilestone, getSliceTasks } from "./sf-db.js";
 import { extractVerdict, isAcceptableUatVerdict } from "./verdict-parser.js";

 import {
@ -684,6 +684,30 @@ export const DISPATCH_RULES: DispatchRule[] = [
      return null;
    },
  },
+  {
+    name: "executing → prior-task verification all-fail guard",
+    match: async ({ state, mid }) => {
+      if (state.phase !== "executing" || !state.activeTask) return null;
+      if (!state.activeSlice) return null;
+      if (!isDbAvailable()) return null;
+      const sid = state.activeSlice.id;
+      const tid = state.activeTask.id;
+      const sliceTasks = getSliceTasks(mid, sid);
+      const sortedTasks = sliceTasks.sort((a, b) => (a.sequence ?? 0) - (b.sequence ?? 0) || a.id.localeCompare(b.id));
+      const currentIdx = sortedTasks.findIndex((t) => t.id === tid);
+      if (currentIdx > 0) {
+        const priorTask = sortedTasks[currentIdx - 1];
+        if (priorTask?.verification_status === "all_fail") {
+          return {
+            action: "stop",
+            reason: `Task ${priorTask.id} in slice ${sid} had all verification checks fail — stopping before dispatching ${tid}. Fix verification in the prior task or re-run it.`,
+            level: "error",
+          };
+        }
+      }
+      return null;
+    },
+  },
  {
    name: "executing → execute-task",
    match: async ({ state, mid, basePath }) => {
@ -843,6 +867,18 @@ export const DISPATCH_RULES: DispatchRule[] = [
        logWarning("dispatch", `verification class check failed: ${err instanceof Error ? err.message : String(err)}`);
      }

+      // P5-A: Advisory check for deferred requirements targeting this milestone
+      try {
+        const deferred = parseDeferredRequirements(basePath);
+        const unaddressed = deferred.filter((r) => r.deferredTo === mid);
+        if (unaddressed.length > 0) {
+          const ids = unaddressed.map((r) => r.id).join(", ");
+          logWarning("dispatch", `Milestone ${mid} has ${unaddressed.length} deferred requirement(s) (${ids}) that were not validated. Review before completing.`);
+        }
+      } catch {
+        // Non-fatal advisory
+      }
+
      return {
        action: "dispatch",
        unitType: "complete-milestone",
--- a/src/resources/extensions/sf/auto-prompts.ts
+++ b/src/resources/extensions/sf/auto-prompts.ts
@ -36,6 +36,18 @@ import { readPhaseAnchor, formatAnchorForPrompt } from "./phase-anchor.js";
 import { logWarning } from "./workflow-logger.js";
 import { inlineGraphSubgraph } from "./graph-context.js";

+// ─── Memory Injection ─────────────────────────────────────────────────────────
+
+async function buildMemoriesBlock(limit = 5): Promise<string> {
+  try {
+    const { getActiveMemoriesRanked, formatMemoriesForPrompt } = await import("./memory-store.js");
+    const memories = getActiveMemoriesRanked(limit);
+    return formatMemoriesForPrompt(memories);
+  } catch {
+    return "";
+  }
+}
+
 // ─── Preamble Cap ─────────────────────────────────────────────────────────────

 const MAX_PREAMBLE_CHARS = 30_000;
@ -1095,6 +1107,8 @@ export async function buildPlanMilestonePrompt(mid: string, midTitle: string, ba
  }
  const knowledgeInlinePM = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge");
  if (knowledgeInlinePM) inlined.push(knowledgeInlinePM);
+  const memoriesBlockPM = await buildMemoriesBlock(5);
+  if (memoriesBlockPM) inlined.push(memoriesBlockPM);
  inlined.push(inlineTemplate("roadmap", "Roadmap"));
  if (inlineLevel === "full") {
    inlined.push(inlineTemplate("decisions", "Decisions"));
@ -1413,6 +1427,7 @@ export async function buildExecuteTaskPrompt(
    : "";

  const phaseAnchorSection = planAnchor ? formatAnchorForPrompt(planAnchor) : "";
+  const memoriesSection = await buildMemoriesBlock(3);

  // Task-scoped gates owned by execute-task (Q5/Q6/Q7). Pull only the
  // gates that plan-slice actually seeded for this task — tasks with no
@ -1428,6 +1443,7 @@ export async function buildExecuteTaskPrompt(
  return loadPrompt("execute-task", {
    overridesSection,
    runtimeContext,
+    memoriesSection,
    phaseAnchorSection,
    workingDirectory: base,
    milestoneId: mid, sliceId: sid, sliceTitle: sTitle, taskId: tid, taskTitle: tTitle,
--- a/src/resources/extensions/sf/auto/loop-deps.ts
+++ b/src/resources/extensions/sf/auto/loop-deps.ts
@ -64,7 +64,7 @@ export interface LoopDeps {
  // Pre-dispatch health gate
  preDispatchHealthGate: (
    basePath: string,
-  ) => Promise<{ proceed: boolean; reason?: string; fixesApplied: string[] }>;
+  ) => Promise<{ proceed: boolean; reason?: string; issues?: string[]; fixesApplied: string[] }>;

  // Worktree sync
  syncProjectRootToWorktree: (
--- a/src/resources/extensions/sf/auto/loop.ts
+++ b/src/resources/extensions/sf/auto/loop.ts
@ -95,6 +95,45 @@ function checkMemoryPressure(): { pressured: boolean; heapMB: number; limitMB: n
  return { pressured: pct > MEMORY_PRESSURE_THRESHOLD, heapMB, limitMB, pct };
 }

+/**
+ * Tracks the dangling phase promise from the most recent timeout so the next
+ * iteration can drain it before proceeding. Promise.race() rejects on timeout
+ * but does not cancel the underlying async work; draining here prevents the
+ * timed-out phase from mutating state concurrently with the next iteration.
+ */
+let _danglingPhasePromise: Promise<unknown> | null = null;
+
+/**
+ * Wrap a phase function with a timeout. Rejects with an Error whose message
+ * starts with "phase-timeout:" so the blanket catch can handle it specially.
+ * Stores the still-running phase promise in _danglingPhasePromise so the caller
+ * can drain it before starting a new iteration.
+ */
+async function withPhaseTimeout<T>(
+  name: string,
+  fn: () => Promise<T>,
+  timeoutMs: number,
+): Promise<T> {
+  let timer: ReturnType<typeof setTimeout> | undefined;
+  const phasePromise = fn();
+  const timeout = new Promise<never>((_, reject) => {
+    timer = setTimeout(
+      () => reject(new Error(`phase-timeout:${name}`)),
+      timeoutMs,
+    );
+  });
+  try {
+    return await Promise.race([phasePromise, timeout]);
+  } catch (err) {
+    if (err instanceof Error && err.message.startsWith("phase-timeout:")) {
+      _danglingPhasePromise = phasePromise;
+    }
+    throw err;
+  } finally {
+    if (timer !== undefined) clearTimeout(timer);
+  }
+}
+
 /**
 * Main auto-mode execution loop. Iterates: derive → dispatch → guards →
 * runUnit → finalize → repeat. Exits when s.active becomes false or a
@ -208,10 +247,21 @@ export async function autoLoop(
      break;
    }

+    // ── Drain any dangling phase promise before starting new work ──
+    // Promise.race() on timeout does not cancel the underlying async fn; that
+    // fn keeps running and may mutate state after the loop has advanced.
+    // Awaiting its completion here ensures no concurrent state writes.
+    if (_danglingPhasePromise !== null) {
+      const dangling = _danglingPhasePromise;
+      _danglingPhasePromise = null;
+      try { await dangling; } catch { /* ignore — result is irrelevant */ }
+    }
+
    try {
      // ── Blanket try/catch: one bad iteration must not kill the session
      const prefs = deps.loadEffectiveSFPreferences()?.preferences;
      const uokFlags = resolveUokFlags(prefs);
+      const phaseTimeoutMs = ((prefs?.auto_supervisor?.phase_timeout_minutes ?? 10) * 60_000);

      // ── Check sidecar queue before deriveState ──
      let sidecarItem: SidecarItem | undefined;
@ -410,8 +460,42 @@ export async function autoLoop(
      }

      if (!sidecarItem) {
+        // ── P4-A: Doctor issues → reassess escalation ─────────────────────
+        // If the health gate detects issues that mention slice IDs (state
+        // inconsistencies that reassessment can fix), queue reassess instead
+        // of pausing auto-mode. This runs separately from the gate inside
+        // runPreDispatch so we can intercept *before* the break path.
+        try {
+          const healthCheck = await deps.preDispatchHealthGate(s.basePath);
+          if (!healthCheck.proceed && healthCheck.issues && healthCheck.issues.length > 0) {
+            const sliceRefPattern = /\bS\d+\b/;
+            const hasSliceRef = healthCheck.issues.some((issue) => sliceRefPattern.test(issue));
+            if (hasSliceRef) {
+              const sfState = await deps.deriveState(s.basePath);
+              const mid = sfState.activeMilestone?.id;
+              const midTitle = sfState.activeMilestone?.title ?? "";
+              const sliceId = sfState.activeSlice?.id ?? "reassess";
+              if (mid) {
+                ctx.ui.notify(`Health issues detected with slice references — queuing reassess-roadmap instead of pausing.`, "warning");
+                const { buildReassessRoadmapPrompt } = await import("../auto-prompts.js");
+                const reassessPrompt = await buildReassessRoadmapPrompt(mid, midTitle, sliceId, s.basePath);
+                s.sidecarQueue.unshift({
+                  kind: "hook",
+                  unitType: "reassess-roadmap",
+                  unitId: `${mid}/${sliceId}`,
+                  prompt: `## Doctor Health Issues\n\n${healthCheck.issues.map(i => `- ${i}`).join("\n")}\n\n${reassessPrompt}`,
+                });
+                finishTurn("retry");
+                continue;
+              }
+            }
+          }
+        } catch {
+          // Non-fatal — fall through to normal runPreDispatch path
+        }
+
        // ── Phase 1: Pre-dispatch ─────────────────────────────────────────
-        const preDispatchResult = await runPreDispatch(ic, loopState);
+        const preDispatchResult = await withPhaseTimeout("preDispatch", () => runPreDispatch(ic, loopState), phaseTimeoutMs / 2);
        deps.uokObserver?.onPhaseResult("pre-dispatch", preDispatchResult.action);
        if (preDispatchResult.action === "break") {
          finishTurn("stopped", "manual-attention", "pre-dispatch-break");
@ -433,7 +517,7 @@ export async function autoLoop(
        }

        // ── Phase 3: Dispatch ─────────────────────────────────────────────
-        const dispatchResult = await runDispatch(ic, preData, loopState);
+        const dispatchResult = await withPhaseTimeout("dispatch", () => runDispatch(ic, preData, loopState), phaseTimeoutMs);
        deps.uokObserver?.onPhaseResult("dispatch", dispatchResult.action);
        if (dispatchResult.action === "break") {
          finishTurn("stopped", "manual-attention", "dispatch-break");
@ -481,7 +565,7 @@ export async function autoLoop(

      // ── Phase 5: Finalize ───────────────────────────────────────────────

-      const finalizeResult = await runFinalize(ic, iterData, loopState, sidecarItem);
+      const finalizeResult = await withPhaseTimeout("finalize", () => runFinalize(ic, iterData, loopState, sidecarItem), phaseTimeoutMs);
      deps.uokObserver?.onPhaseResult("finalize", finalizeResult.action, {
        unitType: iterData.unitType,
        unitId: iterData.unitId,
@ -537,6 +621,19 @@ export async function autoLoop(
        break;
      }

+      // ── Phase timeout: log, increment counter, continue ──
+      if (msg.startsWith("phase-timeout:")) {
+        const phaseName = msg.slice("phase-timeout:".length);
+        loopState.consecutiveFinalizeTimeouts++;
+        ctx.ui.notify(
+          `Phase "${phaseName}" timed out (${loopState.consecutiveFinalizeTimeouts} consecutive) — skipping iteration and continuing.`,
+          "warning",
+        );
+        debugLog("autoLoop", { phase: "phase-timeout", phaseName, consecutiveFinalizeTimeouts: loopState.consecutiveFinalizeTimeouts, iteration });
+        finishTurn("retry", "timeout", msg);
+        continue;
+      }
+
      // ── Credential cooldown: wait and retry with bounded budget ──
      // A 429 triggers a 30s credential backoff in AuthStorage. If the SDK's
      // getApiKey() retries couldn't outlast the window, the error surfaces
--- a/src/resources/extensions/sf/files.ts
+++ b/src/resources/extensions/sf/files.ts
@ -3,7 +3,7 @@
 // Used by state derivation and the status widget.
 // Pure functions, zero Pi dependencies - uses only Node built-ins.

-import { promises as fs } from 'node:fs';
+import { promises as fs, readFileSync } from 'node:fs';
 import { resolve } from 'node:path';
 import { atomicWriteAsync } from './atomic-write.js';
 import { resolveMilestoneFile, relMilestoneFile, resolveSfRootFile } from './paths.js';
@ -531,6 +531,35 @@ export function parseRequirementCounts(content: string | null): RequirementCount
  return counts;
 }

+// ─── Deferred Requirement Parser ──────────────────────────────────────────
+
+/**
+ * Parse requirement entries under the "## Deferred" section of REQUIREMENTS.md.
+ * Looks for milestone references in the form `→ M001`, `(deferred to M001)`,
+ * or `(deferred to M001)` in the entry text.
+ */
+export function parseDeferredRequirements(basePath: string): Array<{ id: string; text: string; deferredTo: string | null }> {
+  try {
+    const reqPath = resolveSfRootFile(basePath, "REQUIREMENTS");
+    if (!reqPath) return [];
+    const content = readFileSync(reqPath, "utf-8");
+    const deferredSection = extractSection(content, "Deferred", 2);
+    if (!deferredSection) return [];
+    const results: Array<{ id: string; text: string; deferredTo: string | null }> = [];
+    for (const line of deferredSection.split("\n")) {
+      const idMatch = line.match(/###\s+([A-Z][\w-]*\d+)\s+—\s+(.+)/);
+      if (!idMatch) continue;
+      const id = idMatch[1];
+      const text = idMatch[2].trim();
+      const deferMatch = text.match(/(?:→|deferred to)\s+(M\d+)/i);
+      results.push({ id, text, deferredTo: deferMatch ? deferMatch[1] : null });
+    }
+    return results;
+  } catch {
+    return [];
+  }
+}
+
 // ─── Task Plan Must-Haves Parser ───────────────────────────────────────────

 /**
--- a/src/resources/extensions/sf/gate-registry.ts
+++ b/src/resources/extensions/sf/gate-registry.ts
@ -40,6 +40,8 @@ export interface GateDefinition {
  /** H3 section header used in the artifact the turn writes
   *  (e.g. "Operational Readiness" for Q8 in the slice summary). */
  promptSection: string;
+  /** Minimum word count required in the rationale when verdict is "omitted". 0 = no minimum. */
+  minOmissionWords: number;
 }

 export const GATE_REGISTRY = {
@ -55,6 +57,7 @@ export const GATE_REGISTRY = {
      "If none apply, return verdict 'omitted' with rationale explaining why.",
    ].join("\n"),
    promptSection: "Abuse Surface",
+    minOmissionWords: 20,
  },
  Q4: {
    id: "Q4",
@ -68,6 +71,7 @@ export const GATE_REGISTRY = {
      "If no existing requirements are affected, return verdict 'omitted'.",
    ].join("\n"),
    promptSection: "Broken Promises",
+    minOmissionWords: 0,
  },
  Q5: {
    id: "Q5",
@ -81,6 +85,7 @@ export const GATE_REGISTRY = {
      "Return verdict 'omitted' only if the task has no external dependencies.",
    ].join("\n"),
    promptSection: "Failure Modes",
+    minOmissionWords: 15,
  },
  Q6: {
    id: "Q6",
@ -93,6 +98,7 @@ export const GATE_REGISTRY = {
      "Return verdict 'omitted' if the task has no runtime load dimension.",
    ].join("\n"),
    promptSection: "Load Profile",
+    minOmissionWords: 10,
  },
  Q7: {
    id: "Q7",
@ -105,6 +111,7 @@ export const GATE_REGISTRY = {
      "Return verdict 'omitted' only if the task has no meaningful negative surface.",
    ].join("\n"),
    promptSection: "Negative Tests",
+    minOmissionWords: 15,
  },
  Q8: {
    id: "Q8",
@ -118,6 +125,7 @@ export const GATE_REGISTRY = {
      "Return verdict 'omitted' only for slices with no runtime behavior at all.",
    ].join("\n"),
    promptSection: "Operational Readiness",
+    minOmissionWords: 0,
  },
  MV01: {
    id: "MV01",
@ -130,6 +138,7 @@ export const GATE_REGISTRY = {
      "Return verdict 'flag' if any criterion is unmet or unverifiable.",
    ].join("\n"),
    promptSection: "Success Criteria Checklist",
+    minOmissionWords: 0,
  },
  MV02: {
    id: "MV02",
@ -142,6 +151,7 @@ export const GATE_REGISTRY = {
      "Flag missing artifacts and slices with outstanding follow-ups or known limitations.",
    ].join("\n"),
    promptSection: "Slice Delivery Audit",
+    minOmissionWords: 0,
  },
  MV03: {
    id: "MV03",
@ -153,6 +163,7 @@ export const GATE_REGISTRY = {
      "Flag gaps where two slices were built in isolation with no integration evidence.",
    ].join("\n"),
    promptSection: "Cross-Slice Integration",
+    minOmissionWords: 0,
  },
  MV04: {
    id: "MV04",
@ -164,6 +175,7 @@ export const GATE_REGISTRY = {
      "Flag requirements that slices claim to advance but no artifact proves.",
    ].join("\n"),
    promptSection: "Requirement Coverage",
+    minOmissionWords: 0,
  },
 } as const satisfies Record<GateId, GateDefinition>;

--- a/src/resources/extensions/sf/memory-extractor.ts
+++ b/src/resources/extensions/sf/memory-extractor.ts
@ -309,6 +309,7 @@ export async function extractMemoriesFromUnit(
  _extracting = true;
  _lastExtractionTime = now;

+  let userPrompt: string | undefined;
  try {
    // Read and parse activity file
    const raw = readFileSync(activityFile, 'utf-8');
@ -326,7 +327,7 @@ export async function extractMemoriesFromUnit(
    }));

    // Build prompts
-    const userPrompt = buildExtractionUserPrompt(unitType, unitId, activeMemories, safeTranscript);
+    userPrompt = buildExtractionUserPrompt(unitType, unitId, activeMemories, safeTranscript);

    // Call LLM
    const response = await llmCallFn(EXTRACTION_SYSTEM, userPrompt);
@ -345,7 +346,18 @@ export async function extractMemoriesFromUnit(
    // Mark unit as processed
    markUnitProcessed(unitKey, activityFile);
  } catch {
-    // Non-fatal — memory extraction failure should never affect auto-mode
+    // Retry once after a brief delay
+    if (userPrompt) {
+      try {
+        await new Promise<void>((r) => setTimeout(r, 2000));
+        const response2 = await llmCallFn(EXTRACTION_SYSTEM, userPrompt);
+        const actions2 = parseMemoryResponse(response2);
+        if (actions2.length > 0) applyMemoryActions(actions2, unitType, unitId);
+        markUnitProcessed(unitKey, activityFile);
+      } catch {
+        // Non-fatal — memory extraction failure should never affect auto-mode
+      }
+    }
  } finally {
    _extracting = false;
  }
--- a/src/resources/extensions/sf/parallel-eligibility.ts
+++ b/src/resources/extensions/sf/parallel-eligibility.ts
@ -9,6 +9,7 @@ import { deriveState } from "./state.js";
 import { resolveMilestoneFile, resolveSliceFile } from "./paths.js";
 import { findMilestoneIds } from "./guided-flow.js";
 import { isDbAvailable, getMilestoneSlices, getSliceTasks } from "./sf-db.js";
+import { getWorkerStatuses } from "./parallel-orchestrator.js";
 import type { MilestoneRegistryEntry } from "./types.js";

 // ─── Types ───────────────────────────────────────────────────────────────────
@ -179,9 +180,21 @@ export async function analyzeParallelEligibility(
    overlappingIds.add(overlap.mid2);
  }

+  const runningWorkerIds = new Set(
+    getWorkerStatuses(basePath)
+      .filter((w) => w.state === "running")
+      .map((w) => w.milestoneId),
+  );
+
  for (const result of eligible) {
-    if (overlappingIds.has(result.milestoneId)) {
-      result.reason = "All dependencies satisfied. WARNING: has file overlap with another eligible milestone.";
+    if (!overlappingIds.has(result.milestoneId)) continue;
+    const overlap = fileOverlaps.find((o) => o.mid1 === result.milestoneId || o.mid2 === result.milestoneId);
+    const overlappingWith = overlap ? (overlap.mid1 === result.milestoneId ? overlap.mid2 : overlap.mid1) : undefined;
+    if (overlappingWith && runningWorkerIds.has(overlappingWith)) {
+      result.eligible = false;
+      result.reason = `File overlap with running milestone ${overlappingWith} — will dispatch after it completes.`;
+    } else {
+      result.reason = "All dependencies satisfied. NOTE: file overlap with another eligible milestone.";
    }
  }

--- a/src/resources/extensions/sf/parallel-orchestrator.ts
+++ b/src/resources/extensions/sf/parallel-orchestrator.ts
@ -1015,6 +1015,18 @@ export function refreshWorkerStatuses(
    state.totalCost += worker.cost;
  }

+  // Kill workers that have exceeded their timeout
+  const workerTimeoutMs = ((state.config.worker_timeout_minutes ?? 120) * 60_000);
+  for (const [, worker] of state.workers) {
+    if (worker.state === "running" && Date.now() - worker.startedAt > workerTimeoutMs) {
+      if (worker.process) worker.process.kill("SIGTERM");
+      worker.cleanup?.();
+      worker.cleanup = undefined;
+      worker.state = "error";
+      worker.process = null;
+    }
+  }
+
  // If all workers are in a terminal state (error/stopped), the orchestration
  // is finished — deactivate and clean up so zombie workers don't persist.
  const allDead = [...state.workers.values()].every(
--- a/src/resources/extensions/sf/preferences-types.ts
+++ b/src/resources/extensions/sf/preferences-types.ts
@ -193,6 +193,7 @@ export interface AutoSupervisorConfig {
  soft_timeout_minutes?: number;
  idle_timeout_minutes?: number;
  hard_timeout_minutes?: number;
+  phase_timeout_minutes?: number;
 }

 export interface RemoteQuestionsConfig {
--- a/src/resources/extensions/sf/preferences.ts
+++ b/src/resources/extensions/sf/preferences.ts
@ -630,5 +630,6 @@ export function resolveParallelConfig(prefs: SFPreferences | undefined): import(
    merge_strategy: prefs?.parallel?.merge_strategy ?? "per-milestone",
    auto_merge: prefs?.parallel?.auto_merge ?? "confirm",
    worker_model: prefs?.parallel?.worker_model,
+    worker_timeout_minutes: prefs?.parallel?.worker_timeout_minutes,
  };
 }
--- a/src/resources/extensions/sf/prompts/execute-task.md
+++ b/src/resources/extensions/sf/prompts/execute-task.md
@ -12,6 +12,8 @@ A researcher explored the codebase and a planner decomposed the work — you are

 {{runtimeContext}}

+{{memoriesSection}}
+
 {{phaseAnchorSection}}

 {{resumeSection}}
--- a/src/resources/extensions/sf/sf-db.ts
+++ b/src/resources/extensions/sf/sf-db.ts
@ -1001,6 +1001,40 @@ function migrateSchema(db: DbAdapter): void {
      });
    }

+    if (currentVersion < 17) {
+      ensureColumn(db, "tasks", "verification_status", `ALTER TABLE tasks ADD COLUMN verification_status TEXT NOT NULL DEFAULT ''`);
+      // Backfill verification_status from existing verification_evidence rows so the
+      // prior-task guard works on databases upgraded mid-project (not just new ones).
+      db.exec(`
+        UPDATE tasks
+        SET verification_status = CASE
+          WHEN (SELECT COUNT(*) FROM verification_evidence ve
+                WHERE ve.milestone_id = tasks.milestone_id
+                  AND ve.slice_id    = tasks.slice_id
+                  AND ve.task_id     = tasks.id) = 0
+            THEN ''
+          WHEN (SELECT COUNT(*) FROM verification_evidence ve
+                WHERE ve.milestone_id = tasks.milestone_id
+                  AND ve.slice_id    = tasks.slice_id
+                  AND ve.task_id     = tasks.id
+                  AND ve.exit_code  != 0) = 0
+            THEN 'all_pass'
+          WHEN (SELECT COUNT(*) FROM verification_evidence ve
+                WHERE ve.milestone_id = tasks.milestone_id
+                  AND ve.slice_id    = tasks.slice_id
+                  AND ve.task_id     = tasks.id
+                  AND ve.exit_code   = 0) > 0
+            THEN 'partial'
+          ELSE 'all_fail'
+        END
+        WHERE tasks.status IN ('complete', 'done')
+      `);
+      db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
+        ":version": 17,
+        ":applied_at": new Date().toISOString(),
+      });
+    }
+
    db.exec("COMMIT");
  } catch (err) {
    db.exec("ROLLBACK");
@ -1599,17 +1633,18 @@ export function insertTask(t: {
  fullSummaryMd?: string;
  sequence?: number;
  planning?: Partial<TaskPlanningRecord>;
+  verificationStatus?: "all_pass" | "partial" | "all_fail" | "";
 }): void {
  if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open");
  currentDb.prepare(
    `INSERT INTO tasks (
      milestone_id, slice_id, id, title, status, one_liner, narrative,
-      verification_result, duration, completed_at, blocker_discovered,
+      verification_result, verification_status, duration, completed_at, blocker_discovered,
      deviations, known_issues, key_files, key_decisions, full_summary_md,
      description, estimate, files, verify, inputs, expected_output, observability_impact, sequence
    ) VALUES (
      :milestone_id, :slice_id, :id, :title, :status, :one_liner, :narrative,
-      :verification_result, :duration, :completed_at, :blocker_discovered,
+      :verification_result, :verification_status, :duration, :completed_at, :blocker_discovered,
      :deviations, :known_issues, :key_files, :key_decisions, :full_summary_md,
      :description, :estimate, :files, :verify, :inputs, :expected_output, :observability_impact, :sequence
    )
@ -1619,6 +1654,7 @@ export function insertTask(t: {
      one_liner = :one_liner,
      narrative = :narrative,
      verification_result = :verification_result,
+      verification_status = :verification_status,
      duration = :duration,
      completed_at = :completed_at,
      blocker_discovered = :blocker_discovered,
@ -1644,6 +1680,7 @@ export function insertTask(t: {
    ":one_liner": t.oneLiner ?? "",
    ":narrative": t.narrative ?? "",
    ":verification_result": t.verificationResult ?? "",
+    ":verification_status": t.verificationStatus ?? "",
    ":duration": t.duration ?? "",
    ":completed_at": t.status === "done" || t.status === "complete" ? new Date().toISOString() : null,
    ":blocker_discovered": t.blockerDiscovered ? 1 : 0,
@ -1818,6 +1855,7 @@ export interface TaskRow {
  observability_impact: string;
  full_plan_md: string;
  sequence: number;
+  verification_status?: string;
 }

 function parseTaskArrayColumn(raw: unknown): string[] {
@ -1888,6 +1926,7 @@ function rowToTask(row: Record<string, unknown>): TaskRow {
    observability_impact: (row["observability_impact"] as string) ?? "",
    full_plan_md: (row["full_plan_md"] as string) ?? "",
    sequence: (row["sequence"] as number) ?? 0,
+    verification_status: (row["verification_status"] as string) ?? "",
  };
 }

--- a/src/resources/extensions/sf/tools/complete-task.ts
+++ b/src/resources/extensions/sf/tools/complete-task.ts
@ -111,6 +111,7 @@ function paramsToTaskRow(params: CompleteTaskParams, completedAt: string): TaskR
    observability_impact: "",
    full_plan_md: "",
    sequence: 0,
+    verification_status: "",
  };
 }

@ -178,6 +179,10 @@ export async function handleCompleteTask(
    // All guards passed — perform writes
    insertMilestone({ id: params.milestoneId, title: params.milestoneId });
    insertSlice({ id: params.sliceId, milestoneId: params.milestoneId, title: params.sliceId });
+    const evidence = params.verificationEvidence ?? [];
+    const verificationStatus = evidence.length === 0 ? "" :
+      evidence.every((c) => c.exitCode === 0) ? "all_pass" :
+      evidence.some((c) => c.exitCode === 0) ? "partial" : "all_fail";
    insertTask({
      id: params.taskId,
      sliceId: params.sliceId,
@ -193,6 +198,7 @@ export async function handleCompleteTask(
      knownIssues: params.knownIssues ?? "None.",
      keyFiles: params.keyFiles ?? [],
      keyDecisions: params.keyDecisions ?? [],
+      verificationStatus,
    });

    for (const evidence of (params.verificationEvidence ?? [])) {
@ -279,15 +285,24 @@ export async function handleCompleteTask(
        if (!def) continue;
        const field = taskGateFieldForId(def.id, params);
        const hasContent = typeof field === "string" && field.trim().length > 0;
+        let verdict: import("../types.js").GateVerdict = hasContent ? "pass" : "omitted";
+        let rationale = hasContent
+          ? `${def.promptSection} section populated in task summary`
+          : `${def.promptSection} section left empty — recorded as omitted`;
+        if (verdict === "omitted" && def.minOmissionWords > 0) {
+          const wordCount = rationale.trim().split(/\s+/).filter(Boolean).length;
+          if (wordCount < def.minOmissionWords) {
+            verdict = "flag";
+            rationale = `[⚠ Rationale too short — ${wordCount} words, ${def.minOmissionWords} required for omission] ${rationale}`;
+          }
+        }
        saveGateResult({
          milestoneId: params.milestoneId,
          sliceId: params.sliceId,
          taskId: params.taskId,
          gateId: def.id,
-          verdict: hasContent ? "pass" : "omitted",
-          rationale: hasContent
-            ? `${def.promptSection} section populated in task summary`
-            : `${def.promptSection} section left empty — recorded as omitted`,
+          verdict,
+          rationale,
          findings: hasContent ? (field as string).trim() : "",
        });
      }
--- a/src/resources/extensions/sf/tools/workflow-tool-executors.ts
+++ b/src/resources/extensions/sf/tools/workflow-tool-executors.ts
@ -449,13 +449,26 @@ export async function executeSaveGateResult(
  }

  try {
+    let effectiveVerdict: string = params.verdict;
+    let effectiveRationale = params.rationale;
+    if (effectiveVerdict === "omitted") {
+      const def = (GATE_REGISTRY as Record<string, { minOmissionWords?: number }>)[params.gateId];
+      const minWords = def?.minOmissionWords ?? 0;
+      if (minWords > 0) {
+        const wordCount = effectiveRationale.trim().split(/\s+/).filter(Boolean).length;
+        if (wordCount < minWords) {
+          effectiveVerdict = "flag";
+          effectiveRationale = `[⚠ Rationale too short — ${wordCount} words, ${minWords} required for omission] ${effectiveRationale}`;
+        }
+      }
+    }
    saveGateResult({
      milestoneId: params.milestoneId,
      sliceId: params.sliceId,
      gateId: params.gateId,
      taskId: params.taskId ?? "",
-      verdict: params.verdict,
-      rationale: params.rationale,
+      verdict: effectiveVerdict as import("../types.js").GateVerdict,
+      rationale: effectiveRationale,
      findings: params.findings ?? "",
    });
    invalidateStateCache();
--- a/src/resources/extensions/sf/types.ts
+++ b/src/resources/extensions/sf/types.ts
@ -455,6 +455,8 @@ export interface ParallelConfig {
  auto_merge: AutoMergeMode;
  /** Optional model override for parallel milestone workers (e.g. "claude-haiku-4-5"). */
  worker_model?: string;
+  /** Minutes before a running worker is killed as hung. Default: 120. */
+  worker_timeout_minutes?: number;
 }

 // ─── Reactive Task Execution Types ───────────────────────────────────────