From ca5890df2e2582ffa42eddf41d5d04ba56d5b249 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sat, 18 Apr 2026 16:26:25 +0200 Subject: [PATCH] Auto-hardening: 10 structural fixes for reliable multi-day autonomous operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements all fixes from the auto-hardening audit plan: P1-A: Per-phase timeout watchdog — withPhaseTimeout() wraps preDispatch/dispatch/finalize; on timeout emits warning, increments consecutiveFinalizeTimeouts, continues loop. Configurable via preferences.auto_supervisor.phase_timeout_minutes (default: 10). P1-B: Verified already wired (MAX_COOLDOWN_RETRIES → stopAuto+break). No change needed. P1-C: Worker timeout in parallel orchestrator — kills workers running beyond parallel.worker_timeout_minutes (default: 120 min) in refreshWorkerStatuses(). P2-A: Memory injection into dispatch prompts — buildMemoriesBlock() appended to plan-milestone inlined[] context and added as memoriesSection in execute-task. P2-B: Memory extraction retry — one 2s-delayed retry in the catch block of extractMemoriesFromUnit(); second failure is silently swallowed (non-fatal). P3-A: Partial verification state in DB — verificationStatus ("all_pass"/"partial"/"all_fail") derived from verificationEvidence.exitCode array and stored in new tasks column. New dispatch rule blocks next task when prior task has all_fail status. P3-B: Gate omission rationale enforcement — minOmissionWords added to GateDefinition (Q3=20, Q5=15, Q6=10, Q7=15). Short rationale upgrades verdict "omitted" → "flag". P4-A: Doctor issues → reassess escalation — pre-dispatch health check in loop.ts detects issues referencing slice IDs and queues reassess-roadmap sidecar instead of pausing. P4-B: File overlap preemption — analyzeParallelEligibility() sets eligible:false when the overlapping milestone is currently running (not just eligible/queued). P5-A: Deferred requirement tracking — parseDeferredRequirements() added to files.ts; completing-milestone rule warns (via logWarning) when deferred reqs targeting the milestone were not validated before completion. Co-Authored-By: Claude Sonnet 4.6 --- src/resources/extensions/sf/auto-dispatch.ts | 40 +++++++++- src/resources/extensions/sf/auto-prompts.ts | 16 ++++ src/resources/extensions/sf/auto/loop-deps.ts | 2 +- src/resources/extensions/sf/auto/loop.ts | 78 ++++++++++++++++++- src/resources/extensions/sf/files.ts | 31 +++++++- src/resources/extensions/sf/gate-registry.ts | 12 +++ .../extensions/sf/memory-extractor.ts | 16 +++- .../extensions/sf/parallel-eligibility.ts | 17 +++- .../extensions/sf/parallel-orchestrator.ts | 12 +++ .../extensions/sf/preferences-types.ts | 1 + src/resources/extensions/sf/preferences.ts | 1 + .../extensions/sf/prompts/execute-task.md | 2 + src/resources/extensions/sf/sf-db.ts | 17 +++- .../extensions/sf/tools/complete-task.ts | 23 +++++- .../sf/tools/workflow-tool-executors.ts | 17 +++- src/resources/extensions/sf/types.ts | 2 + 16 files changed, 268 insertions(+), 19 deletions(-) diff --git a/src/resources/extensions/sf/auto-dispatch.ts b/src/resources/extensions/sf/auto-dispatch.ts index 2d12a8bc1..066137234 100644 --- a/src/resources/extensions/sf/auto-dispatch.ts +++ b/src/resources/extensions/sf/auto-dispatch.ts @@ -12,8 +12,8 @@ import type { SFState } from "./types.js"; import type { SFPreferences } from "./preferences.js"; import type { UatType } from "./files.js"; -import { loadFile, extractUatType, loadActiveOverrides } from "./files.js"; -import { isDbAvailable, getMilestoneSlices, getPendingGates, markAllGatesOmitted, getMilestone } from "./sf-db.js"; +import { loadFile, extractUatType, loadActiveOverrides, parseDeferredRequirements } from "./files.js"; +import { isDbAvailable, getMilestoneSlices, getPendingGates, markAllGatesOmitted, getMilestone, getSliceTasks } from "./sf-db.js"; import { extractVerdict, isAcceptableUatVerdict } from "./verdict-parser.js"; import { @@ -684,6 +684,30 @@ export const DISPATCH_RULES: DispatchRule[] = [ return null; }, }, + { + name: "executing → prior-task verification all-fail guard", + match: async ({ state, mid }) => { + if (state.phase !== "executing" || !state.activeTask) return null; + if (!state.activeSlice) return null; + if (!isDbAvailable()) return null; + const sid = state.activeSlice.id; + const tid = state.activeTask.id; + const sliceTasks = getSliceTasks(mid, sid); + const sortedTasks = sliceTasks.sort((a, b) => (a.sequence ?? 0) - (b.sequence ?? 0) || a.id.localeCompare(b.id)); + const currentIdx = sortedTasks.findIndex((t) => t.id === tid); + if (currentIdx > 0) { + const priorTask = sortedTasks[currentIdx - 1]; + if (priorTask?.verification_status === "all_fail") { + return { + action: "stop", + reason: `Task ${priorTask.id} in slice ${sid} had all verification checks fail — stopping before dispatching ${tid}. Fix verification in the prior task or re-run it.`, + level: "error", + }; + } + } + return null; + }, + }, { name: "executing → execute-task", match: async ({ state, mid, basePath }) => { @@ -843,6 +867,18 @@ export const DISPATCH_RULES: DispatchRule[] = [ logWarning("dispatch", `verification class check failed: ${err instanceof Error ? err.message : String(err)}`); } + // P5-A: Advisory check for deferred requirements targeting this milestone + try { + const deferred = parseDeferredRequirements(basePath); + const unaddressed = deferred.filter((r) => r.deferredTo === mid); + if (unaddressed.length > 0) { + const ids = unaddressed.map((r) => r.id).join(", "); + logWarning("dispatch", `Milestone ${mid} has ${unaddressed.length} deferred requirement(s) (${ids}) that were not validated. Review before completing.`); + } + } catch { + // Non-fatal advisory + } + return { action: "dispatch", unitType: "complete-milestone", diff --git a/src/resources/extensions/sf/auto-prompts.ts b/src/resources/extensions/sf/auto-prompts.ts index b55d743fd..9ed65f427 100644 --- a/src/resources/extensions/sf/auto-prompts.ts +++ b/src/resources/extensions/sf/auto-prompts.ts @@ -36,6 +36,18 @@ import { readPhaseAnchor, formatAnchorForPrompt } from "./phase-anchor.js"; import { logWarning } from "./workflow-logger.js"; import { inlineGraphSubgraph } from "./graph-context.js"; +// ─── Memory Injection ───────────────────────────────────────────────────────── + +async function buildMemoriesBlock(limit = 5): Promise { + try { + const { getActiveMemoriesRanked, formatMemoriesForPrompt } = await import("./memory-store.js"); + const memories = getActiveMemoriesRanked(limit); + return formatMemoriesForPrompt(memories); + } catch { + return ""; + } +} + // ─── Preamble Cap ───────────────────────────────────────────────────────────── const MAX_PREAMBLE_CHARS = 30_000; @@ -1095,6 +1107,8 @@ export async function buildPlanMilestonePrompt(mid: string, midTitle: string, ba } const knowledgeInlinePM = await inlineGsdRootFile(base, "knowledge.md", "Project Knowledge"); if (knowledgeInlinePM) inlined.push(knowledgeInlinePM); + const memoriesBlockPM = await buildMemoriesBlock(5); + if (memoriesBlockPM) inlined.push(memoriesBlockPM); inlined.push(inlineTemplate("roadmap", "Roadmap")); if (inlineLevel === "full") { inlined.push(inlineTemplate("decisions", "Decisions")); @@ -1413,6 +1427,7 @@ export async function buildExecuteTaskPrompt( : ""; const phaseAnchorSection = planAnchor ? formatAnchorForPrompt(planAnchor) : ""; + const memoriesSection = await buildMemoriesBlock(3); // Task-scoped gates owned by execute-task (Q5/Q6/Q7). Pull only the // gates that plan-slice actually seeded for this task — tasks with no @@ -1428,6 +1443,7 @@ export async function buildExecuteTaskPrompt( return loadPrompt("execute-task", { overridesSection, runtimeContext, + memoriesSection, phaseAnchorSection, workingDirectory: base, milestoneId: mid, sliceId: sid, sliceTitle: sTitle, taskId: tid, taskTitle: tTitle, diff --git a/src/resources/extensions/sf/auto/loop-deps.ts b/src/resources/extensions/sf/auto/loop-deps.ts index 2d69f6a88..5138eafe1 100644 --- a/src/resources/extensions/sf/auto/loop-deps.ts +++ b/src/resources/extensions/sf/auto/loop-deps.ts @@ -64,7 +64,7 @@ export interface LoopDeps { // Pre-dispatch health gate preDispatchHealthGate: ( basePath: string, - ) => Promise<{ proceed: boolean; reason?: string; fixesApplied: string[] }>; + ) => Promise<{ proceed: boolean; reason?: string; issues?: string[]; fixesApplied: string[] }>; // Worktree sync syncProjectRootToWorktree: ( diff --git a/src/resources/extensions/sf/auto/loop.ts b/src/resources/extensions/sf/auto/loop.ts index 6ae953250..28eeae2d6 100644 --- a/src/resources/extensions/sf/auto/loop.ts +++ b/src/resources/extensions/sf/auto/loop.ts @@ -95,6 +95,30 @@ function checkMemoryPressure(): { pressured: boolean; heapMB: number; limitMB: n return { pressured: pct > MEMORY_PRESSURE_THRESHOLD, heapMB, limitMB, pct }; } +/** + * Wrap a phase function with a timeout. Rejects with an Error whose message + * starts with "phase-timeout:" so the blanket catch can handle it specially. + */ +async function withPhaseTimeout( + name: string, + fn: () => Promise, + timeoutMs: number, +): Promise { + let timer: ReturnType | undefined; + const timeout = new Promise((_, reject) => { + timer = setTimeout( + () => reject(new Error(`phase-timeout:${name}`)), + timeoutMs, + ); + }); + try { + const result = await Promise.race([fn(), timeout]); + return result; + } finally { + if (timer !== undefined) clearTimeout(timer); + } +} + /** * Main auto-mode execution loop. Iterates: derive → dispatch → guards → * runUnit → finalize → repeat. Exits when s.active becomes false or a @@ -212,6 +236,7 @@ export async function autoLoop( // ── Blanket try/catch: one bad iteration must not kill the session const prefs = deps.loadEffectiveSFPreferences()?.preferences; const uokFlags = resolveUokFlags(prefs); + const phaseTimeoutMs = ((prefs?.auto_supervisor?.phase_timeout_minutes ?? 10) * 60_000); // ── Check sidecar queue before deriveState ── let sidecarItem: SidecarItem | undefined; @@ -410,8 +435,42 @@ export async function autoLoop( } if (!sidecarItem) { + // ── P4-A: Doctor issues → reassess escalation ───────────────────── + // If the health gate detects issues that mention slice IDs (state + // inconsistencies that reassessment can fix), queue reassess instead + // of pausing auto-mode. This runs separately from the gate inside + // runPreDispatch so we can intercept *before* the break path. + try { + const healthCheck = await deps.preDispatchHealthGate(s.basePath); + if (!healthCheck.proceed && healthCheck.issues && healthCheck.issues.length > 0) { + const sliceRefPattern = /\bS\d+\b/; + const hasSliceRef = healthCheck.issues.some((issue) => sliceRefPattern.test(issue)); + if (hasSliceRef) { + const sfState = await deps.deriveState(s.basePath); + const mid = sfState.activeMilestone?.id; + const midTitle = sfState.activeMilestone?.title ?? ""; + const sliceId = sfState.activeSlice?.id ?? "reassess"; + if (mid) { + ctx.ui.notify(`Health issues detected with slice references — queuing reassess-roadmap instead of pausing.`, "warning"); + const { buildReassessRoadmapPrompt } = await import("../auto-prompts.js"); + const reassessPrompt = await buildReassessRoadmapPrompt(mid, midTitle, sliceId, s.basePath); + s.sidecarQueue.unshift({ + kind: "hook", + unitType: "reassess-roadmap", + unitId: `${mid}/${sliceId}`, + prompt: `## Doctor Health Issues\n\n${healthCheck.issues.map(i => `- ${i}`).join("\n")}\n\n${reassessPrompt}`, + }); + finishTurn("retry"); + continue; + } + } + } + } catch { + // Non-fatal — fall through to normal runPreDispatch path + } + // ── Phase 1: Pre-dispatch ───────────────────────────────────────── - const preDispatchResult = await runPreDispatch(ic, loopState); + const preDispatchResult = await withPhaseTimeout("preDispatch", () => runPreDispatch(ic, loopState), phaseTimeoutMs / 2); deps.uokObserver?.onPhaseResult("pre-dispatch", preDispatchResult.action); if (preDispatchResult.action === "break") { finishTurn("stopped", "manual-attention", "pre-dispatch-break"); @@ -433,7 +492,7 @@ export async function autoLoop( } // ── Phase 3: Dispatch ───────────────────────────────────────────── - const dispatchResult = await runDispatch(ic, preData, loopState); + const dispatchResult = await withPhaseTimeout("dispatch", () => runDispatch(ic, preData, loopState), phaseTimeoutMs); deps.uokObserver?.onPhaseResult("dispatch", dispatchResult.action); if (dispatchResult.action === "break") { finishTurn("stopped", "manual-attention", "dispatch-break"); @@ -481,7 +540,7 @@ export async function autoLoop( // ── Phase 5: Finalize ─────────────────────────────────────────────── - const finalizeResult = await runFinalize(ic, iterData, loopState, sidecarItem); + const finalizeResult = await withPhaseTimeout("finalize", () => runFinalize(ic, iterData, loopState, sidecarItem), phaseTimeoutMs); deps.uokObserver?.onPhaseResult("finalize", finalizeResult.action, { unitType: iterData.unitType, unitId: iterData.unitId, @@ -537,6 +596,19 @@ export async function autoLoop( break; } + // ── Phase timeout: log, increment counter, continue ── + if (msg.startsWith("phase-timeout:")) { + const phaseName = msg.slice("phase-timeout:".length); + loopState.consecutiveFinalizeTimeouts++; + ctx.ui.notify( + `Phase "${phaseName}" timed out (${loopState.consecutiveFinalizeTimeouts} consecutive) — skipping iteration and continuing.`, + "warning", + ); + debugLog("autoLoop", { phase: "phase-timeout", phaseName, consecutiveFinalizeTimeouts: loopState.consecutiveFinalizeTimeouts, iteration }); + finishTurn("retry", "timeout", msg); + continue; + } + // ── Credential cooldown: wait and retry with bounded budget ── // A 429 triggers a 30s credential backoff in AuthStorage. If the SDK's // getApiKey() retries couldn't outlast the window, the error surfaces diff --git a/src/resources/extensions/sf/files.ts b/src/resources/extensions/sf/files.ts index f64d92146..3bfceb4d9 100644 --- a/src/resources/extensions/sf/files.ts +++ b/src/resources/extensions/sf/files.ts @@ -3,7 +3,7 @@ // Used by state derivation and the status widget. // Pure functions, zero Pi dependencies - uses only Node built-ins. -import { promises as fs } from 'node:fs'; +import { promises as fs, readFileSync } from 'node:fs'; import { resolve } from 'node:path'; import { atomicWriteAsync } from './atomic-write.js'; import { resolveMilestoneFile, relMilestoneFile, resolveSfRootFile } from './paths.js'; @@ -531,6 +531,35 @@ export function parseRequirementCounts(content: string | null): RequirementCount return counts; } +// ─── Deferred Requirement Parser ────────────────────────────────────────── + +/** + * Parse requirement entries under the "## Deferred" section of REQUIREMENTS.md. + * Looks for milestone references in the form `→ M001`, `(deferred to M001)`, + * or `(deferred to M001)` in the entry text. + */ +export function parseDeferredRequirements(basePath: string): Array<{ id: string; text: string; deferredTo: string | null }> { + try { + const reqPath = resolveSfRootFile(basePath, "REQUIREMENTS"); + if (!reqPath) return []; + const content = readFileSync(reqPath, "utf-8"); + const deferredSection = extractSection(content, "Deferred", 2); + if (!deferredSection) return []; + const results: Array<{ id: string; text: string; deferredTo: string | null }> = []; + for (const line of deferredSection.split("\n")) { + const idMatch = line.match(/###\s+([A-Z][\w-]*\d+)\s+—\s+(.+)/); + if (!idMatch) continue; + const id = idMatch[1]; + const text = idMatch[2].trim(); + const deferMatch = text.match(/(?:→|deferred to)\s+(M\d+)/i); + results.push({ id, text, deferredTo: deferMatch ? deferMatch[1] : null }); + } + return results; + } catch { + return []; + } +} + // ─── Task Plan Must-Haves Parser ─────────────────────────────────────────── /** diff --git a/src/resources/extensions/sf/gate-registry.ts b/src/resources/extensions/sf/gate-registry.ts index 0ceb5ef4d..3824a519b 100644 --- a/src/resources/extensions/sf/gate-registry.ts +++ b/src/resources/extensions/sf/gate-registry.ts @@ -40,6 +40,8 @@ export interface GateDefinition { /** H3 section header used in the artifact the turn writes * (e.g. "Operational Readiness" for Q8 in the slice summary). */ promptSection: string; + /** Minimum word count required in the rationale when verdict is "omitted". 0 = no minimum. */ + minOmissionWords: number; } export const GATE_REGISTRY = { @@ -55,6 +57,7 @@ export const GATE_REGISTRY = { "If none apply, return verdict 'omitted' with rationale explaining why.", ].join("\n"), promptSection: "Abuse Surface", + minOmissionWords: 20, }, Q4: { id: "Q4", @@ -68,6 +71,7 @@ export const GATE_REGISTRY = { "If no existing requirements are affected, return verdict 'omitted'.", ].join("\n"), promptSection: "Broken Promises", + minOmissionWords: 0, }, Q5: { id: "Q5", @@ -81,6 +85,7 @@ export const GATE_REGISTRY = { "Return verdict 'omitted' only if the task has no external dependencies.", ].join("\n"), promptSection: "Failure Modes", + minOmissionWords: 15, }, Q6: { id: "Q6", @@ -93,6 +98,7 @@ export const GATE_REGISTRY = { "Return verdict 'omitted' if the task has no runtime load dimension.", ].join("\n"), promptSection: "Load Profile", + minOmissionWords: 10, }, Q7: { id: "Q7", @@ -105,6 +111,7 @@ export const GATE_REGISTRY = { "Return verdict 'omitted' only if the task has no meaningful negative surface.", ].join("\n"), promptSection: "Negative Tests", + minOmissionWords: 15, }, Q8: { id: "Q8", @@ -118,6 +125,7 @@ export const GATE_REGISTRY = { "Return verdict 'omitted' only for slices with no runtime behavior at all.", ].join("\n"), promptSection: "Operational Readiness", + minOmissionWords: 0, }, MV01: { id: "MV01", @@ -130,6 +138,7 @@ export const GATE_REGISTRY = { "Return verdict 'flag' if any criterion is unmet or unverifiable.", ].join("\n"), promptSection: "Success Criteria Checklist", + minOmissionWords: 0, }, MV02: { id: "MV02", @@ -142,6 +151,7 @@ export const GATE_REGISTRY = { "Flag missing artifacts and slices with outstanding follow-ups or known limitations.", ].join("\n"), promptSection: "Slice Delivery Audit", + minOmissionWords: 0, }, MV03: { id: "MV03", @@ -153,6 +163,7 @@ export const GATE_REGISTRY = { "Flag gaps where two slices were built in isolation with no integration evidence.", ].join("\n"), promptSection: "Cross-Slice Integration", + minOmissionWords: 0, }, MV04: { id: "MV04", @@ -164,6 +175,7 @@ export const GATE_REGISTRY = { "Flag requirements that slices claim to advance but no artifact proves.", ].join("\n"), promptSection: "Requirement Coverage", + minOmissionWords: 0, }, } as const satisfies Record; diff --git a/src/resources/extensions/sf/memory-extractor.ts b/src/resources/extensions/sf/memory-extractor.ts index 4ceaefda3..55bcbf791 100644 --- a/src/resources/extensions/sf/memory-extractor.ts +++ b/src/resources/extensions/sf/memory-extractor.ts @@ -309,6 +309,7 @@ export async function extractMemoriesFromUnit( _extracting = true; _lastExtractionTime = now; + let userPrompt: string | undefined; try { // Read and parse activity file const raw = readFileSync(activityFile, 'utf-8'); @@ -326,7 +327,7 @@ export async function extractMemoriesFromUnit( })); // Build prompts - const userPrompt = buildExtractionUserPrompt(unitType, unitId, activeMemories, safeTranscript); + userPrompt = buildExtractionUserPrompt(unitType, unitId, activeMemories, safeTranscript); // Call LLM const response = await llmCallFn(EXTRACTION_SYSTEM, userPrompt); @@ -345,7 +346,18 @@ export async function extractMemoriesFromUnit( // Mark unit as processed markUnitProcessed(unitKey, activityFile); } catch { - // Non-fatal — memory extraction failure should never affect auto-mode + // Retry once after a brief delay + if (userPrompt) { + try { + await new Promise((r) => setTimeout(r, 2000)); + const response2 = await llmCallFn(EXTRACTION_SYSTEM, userPrompt); + const actions2 = parseMemoryResponse(response2); + if (actions2.length > 0) applyMemoryActions(actions2, unitType, unitId); + markUnitProcessed(unitKey, activityFile); + } catch { + // Non-fatal — memory extraction failure should never affect auto-mode + } + } } finally { _extracting = false; } diff --git a/src/resources/extensions/sf/parallel-eligibility.ts b/src/resources/extensions/sf/parallel-eligibility.ts index 002a921ef..b6ee73106 100644 --- a/src/resources/extensions/sf/parallel-eligibility.ts +++ b/src/resources/extensions/sf/parallel-eligibility.ts @@ -9,6 +9,7 @@ import { deriveState } from "./state.js"; import { resolveMilestoneFile, resolveSliceFile } from "./paths.js"; import { findMilestoneIds } from "./guided-flow.js"; import { isDbAvailable, getMilestoneSlices, getSliceTasks } from "./sf-db.js"; +import { getWorkerStatuses } from "./parallel-orchestrator.js"; import type { MilestoneRegistryEntry } from "./types.js"; // ─── Types ─────────────────────────────────────────────────────────────────── @@ -179,9 +180,21 @@ export async function analyzeParallelEligibility( overlappingIds.add(overlap.mid2); } + const runningWorkerIds = new Set( + getWorkerStatuses(basePath) + .filter((w) => w.state === "running") + .map((w) => w.milestoneId), + ); + for (const result of eligible) { - if (overlappingIds.has(result.milestoneId)) { - result.reason = "All dependencies satisfied. WARNING: has file overlap with another eligible milestone."; + if (!overlappingIds.has(result.milestoneId)) continue; + const overlap = fileOverlaps.find((o) => o.mid1 === result.milestoneId || o.mid2 === result.milestoneId); + const overlappingWith = overlap ? (overlap.mid1 === result.milestoneId ? overlap.mid2 : overlap.mid1) : undefined; + if (overlappingWith && runningWorkerIds.has(overlappingWith)) { + result.eligible = false; + result.reason = `File overlap with running milestone ${overlappingWith} — will dispatch after it completes.`; + } else { + result.reason = "All dependencies satisfied. NOTE: file overlap with another eligible milestone."; } } diff --git a/src/resources/extensions/sf/parallel-orchestrator.ts b/src/resources/extensions/sf/parallel-orchestrator.ts index da66422ae..e1381d303 100644 --- a/src/resources/extensions/sf/parallel-orchestrator.ts +++ b/src/resources/extensions/sf/parallel-orchestrator.ts @@ -1015,6 +1015,18 @@ export function refreshWorkerStatuses( state.totalCost += worker.cost; } + // Kill workers that have exceeded their timeout + const workerTimeoutMs = ((state.config.worker_timeout_minutes ?? 120) * 60_000); + for (const [, worker] of state.workers) { + if (worker.state === "running" && Date.now() - worker.startedAt > workerTimeoutMs) { + if (worker.process) worker.process.kill("SIGTERM"); + worker.cleanup?.(); + worker.cleanup = undefined; + worker.state = "error"; + worker.process = null; + } + } + // If all workers are in a terminal state (error/stopped), the orchestration // is finished — deactivate and clean up so zombie workers don't persist. const allDead = [...state.workers.values()].every( diff --git a/src/resources/extensions/sf/preferences-types.ts b/src/resources/extensions/sf/preferences-types.ts index b52237736..8c81c5988 100644 --- a/src/resources/extensions/sf/preferences-types.ts +++ b/src/resources/extensions/sf/preferences-types.ts @@ -193,6 +193,7 @@ export interface AutoSupervisorConfig { soft_timeout_minutes?: number; idle_timeout_minutes?: number; hard_timeout_minutes?: number; + phase_timeout_minutes?: number; } export interface RemoteQuestionsConfig { diff --git a/src/resources/extensions/sf/preferences.ts b/src/resources/extensions/sf/preferences.ts index 61317111a..8210bf3ef 100644 --- a/src/resources/extensions/sf/preferences.ts +++ b/src/resources/extensions/sf/preferences.ts @@ -630,5 +630,6 @@ export function resolveParallelConfig(prefs: SFPreferences | undefined): import( merge_strategy: prefs?.parallel?.merge_strategy ?? "per-milestone", auto_merge: prefs?.parallel?.auto_merge ?? "confirm", worker_model: prefs?.parallel?.worker_model, + worker_timeout_minutes: prefs?.parallel?.worker_timeout_minutes, }; } diff --git a/src/resources/extensions/sf/prompts/execute-task.md b/src/resources/extensions/sf/prompts/execute-task.md index fd3b7c55c..1406e3f9a 100644 --- a/src/resources/extensions/sf/prompts/execute-task.md +++ b/src/resources/extensions/sf/prompts/execute-task.md @@ -12,6 +12,8 @@ A researcher explored the codebase and a planner decomposed the work — you are {{runtimeContext}} +{{memoriesSection}} + {{phaseAnchorSection}} {{resumeSection}} diff --git a/src/resources/extensions/sf/sf-db.ts b/src/resources/extensions/sf/sf-db.ts index 2ce22b269..7ec954303 100644 --- a/src/resources/extensions/sf/sf-db.ts +++ b/src/resources/extensions/sf/sf-db.ts @@ -1001,6 +1001,14 @@ function migrateSchema(db: DbAdapter): void { }); } + if (currentVersion < 17) { + ensureColumn(db, "tasks", "verification_status", `ALTER TABLE tasks ADD COLUMN verification_status TEXT NOT NULL DEFAULT ''`); + db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({ + ":version": 17, + ":applied_at": new Date().toISOString(), + }); + } + db.exec("COMMIT"); } catch (err) { db.exec("ROLLBACK"); @@ -1599,17 +1607,18 @@ export function insertTask(t: { fullSummaryMd?: string; sequence?: number; planning?: Partial; + verificationStatus?: "all_pass" | "partial" | "all_fail" | ""; }): void { if (!currentDb) throw new SFError(SF_STALE_STATE, "sf-db: No database open"); currentDb.prepare( `INSERT INTO tasks ( milestone_id, slice_id, id, title, status, one_liner, narrative, - verification_result, duration, completed_at, blocker_discovered, + verification_result, verification_status, duration, completed_at, blocker_discovered, deviations, known_issues, key_files, key_decisions, full_summary_md, description, estimate, files, verify, inputs, expected_output, observability_impact, sequence ) VALUES ( :milestone_id, :slice_id, :id, :title, :status, :one_liner, :narrative, - :verification_result, :duration, :completed_at, :blocker_discovered, + :verification_result, :verification_status, :duration, :completed_at, :blocker_discovered, :deviations, :known_issues, :key_files, :key_decisions, :full_summary_md, :description, :estimate, :files, :verify, :inputs, :expected_output, :observability_impact, :sequence ) @@ -1619,6 +1628,7 @@ export function insertTask(t: { one_liner = :one_liner, narrative = :narrative, verification_result = :verification_result, + verification_status = :verification_status, duration = :duration, completed_at = :completed_at, blocker_discovered = :blocker_discovered, @@ -1644,6 +1654,7 @@ export function insertTask(t: { ":one_liner": t.oneLiner ?? "", ":narrative": t.narrative ?? "", ":verification_result": t.verificationResult ?? "", + ":verification_status": t.verificationStatus ?? "", ":duration": t.duration ?? "", ":completed_at": t.status === "done" || t.status === "complete" ? new Date().toISOString() : null, ":blocker_discovered": t.blockerDiscovered ? 1 : 0, @@ -1818,6 +1829,7 @@ export interface TaskRow { observability_impact: string; full_plan_md: string; sequence: number; + verification_status?: string; } function parseTaskArrayColumn(raw: unknown): string[] { @@ -1888,6 +1900,7 @@ function rowToTask(row: Record): TaskRow { observability_impact: (row["observability_impact"] as string) ?? "", full_plan_md: (row["full_plan_md"] as string) ?? "", sequence: (row["sequence"] as number) ?? 0, + verification_status: (row["verification_status"] as string) ?? "", }; } diff --git a/src/resources/extensions/sf/tools/complete-task.ts b/src/resources/extensions/sf/tools/complete-task.ts index 35e1d912f..b34f89dbf 100644 --- a/src/resources/extensions/sf/tools/complete-task.ts +++ b/src/resources/extensions/sf/tools/complete-task.ts @@ -111,6 +111,7 @@ function paramsToTaskRow(params: CompleteTaskParams, completedAt: string): TaskR observability_impact: "", full_plan_md: "", sequence: 0, + verification_status: "", }; } @@ -178,6 +179,10 @@ export async function handleCompleteTask( // All guards passed — perform writes insertMilestone({ id: params.milestoneId, title: params.milestoneId }); insertSlice({ id: params.sliceId, milestoneId: params.milestoneId, title: params.sliceId }); + const evidence = params.verificationEvidence ?? []; + const verificationStatus = evidence.length === 0 ? "" : + evidence.every((c) => c.exitCode === 0) ? "all_pass" : + evidence.some((c) => c.exitCode === 0) ? "partial" : "all_fail"; insertTask({ id: params.taskId, sliceId: params.sliceId, @@ -193,6 +198,7 @@ export async function handleCompleteTask( knownIssues: params.knownIssues ?? "None.", keyFiles: params.keyFiles ?? [], keyDecisions: params.keyDecisions ?? [], + verificationStatus, }); for (const evidence of (params.verificationEvidence ?? [])) { @@ -279,15 +285,24 @@ export async function handleCompleteTask( if (!def) continue; const field = taskGateFieldForId(def.id, params); const hasContent = typeof field === "string" && field.trim().length > 0; + let verdict: import("../types.js").GateVerdict = hasContent ? "pass" : "omitted"; + let rationale = hasContent + ? `${def.promptSection} section populated in task summary` + : `${def.promptSection} section left empty — recorded as omitted`; + if (verdict === "omitted" && def.minOmissionWords > 0) { + const wordCount = rationale.trim().split(/\s+/).filter(Boolean).length; + if (wordCount < def.minOmissionWords) { + verdict = "flag"; + rationale = `[⚠ Rationale too short — ${wordCount} words, ${def.minOmissionWords} required for omission] ${rationale}`; + } + } saveGateResult({ milestoneId: params.milestoneId, sliceId: params.sliceId, taskId: params.taskId, gateId: def.id, - verdict: hasContent ? "pass" : "omitted", - rationale: hasContent - ? `${def.promptSection} section populated in task summary` - : `${def.promptSection} section left empty — recorded as omitted`, + verdict, + rationale, findings: hasContent ? (field as string).trim() : "", }); } diff --git a/src/resources/extensions/sf/tools/workflow-tool-executors.ts b/src/resources/extensions/sf/tools/workflow-tool-executors.ts index 17d7691ff..9430095fe 100644 --- a/src/resources/extensions/sf/tools/workflow-tool-executors.ts +++ b/src/resources/extensions/sf/tools/workflow-tool-executors.ts @@ -449,13 +449,26 @@ export async function executeSaveGateResult( } try { + let effectiveVerdict: string = params.verdict; + let effectiveRationale = params.rationale; + if (effectiveVerdict === "omitted") { + const def = (GATE_REGISTRY as Record)[params.gateId]; + const minWords = def?.minOmissionWords ?? 0; + if (minWords > 0) { + const wordCount = effectiveRationale.trim().split(/\s+/).filter(Boolean).length; + if (wordCount < minWords) { + effectiveVerdict = "flag"; + effectiveRationale = `[⚠ Rationale too short — ${wordCount} words, ${minWords} required for omission] ${effectiveRationale}`; + } + } + } saveGateResult({ milestoneId: params.milestoneId, sliceId: params.sliceId, gateId: params.gateId, taskId: params.taskId ?? "", - verdict: params.verdict, - rationale: params.rationale, + verdict: effectiveVerdict as import("../types.js").GateVerdict, + rationale: effectiveRationale, findings: params.findings ?? "", }); invalidateStateCache(); diff --git a/src/resources/extensions/sf/types.ts b/src/resources/extensions/sf/types.ts index be735e6a7..b6a5b78fd 100644 --- a/src/resources/extensions/sf/types.ts +++ b/src/resources/extensions/sf/types.ts @@ -455,6 +455,8 @@ export interface ParallelConfig { auto_merge: AutoMergeMode; /** Optional model override for parallel milestone workers (e.g. "claude-haiku-4-5"). */ worker_model?: string; + /** Minutes before a running worker is killed as hung. Default: 120. */ + worker_timeout_minutes?: number; } // ─── Reactive Task Execution Types ───────────────────────────────────────