From 9a04fef9256a4efec0a99e9b811ad5e4857c331b Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Sat, 18 Apr 2026 16:48:00 +0200 Subject: [PATCH] Fix Codex review issues: phase-timeout mutation race and missing backfill P1 (phase-timeout mutation race): withPhaseTimeout now stores the still-running phase promise in _danglingPhasePromise when a timeout fires. Each loop iteration drains that promise (with try/catch) before starting new work, preventing the timed-out phase from mutating state concurrently with the next iteration. P2 (verification_status backfill): Schema migration v17 now runs a backfill UPDATE after adding the new column, deriving verification_status from existing verification_evidence rows. Projects upgraded mid-slice will have correct all_pass/partial/all_fail values immediately rather than empty strings that bypass the prior-task guard. Co-Authored-By: Claude Sonnet 4.6 --- src/resources/extensions/sf/auto/loop.ts | 29 ++++++++++++++++++++++-- src/resources/extensions/sf/sf-db.ts | 26 +++++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/src/resources/extensions/sf/auto/loop.ts b/src/resources/extensions/sf/auto/loop.ts index 28eeae2d6..edf3b8231 100644 --- a/src/resources/extensions/sf/auto/loop.ts +++ b/src/resources/extensions/sf/auto/loop.ts @@ -95,9 +95,19 @@ function checkMemoryPressure(): { pressured: boolean; heapMB: number; limitMB: n return { pressured: pct > MEMORY_PRESSURE_THRESHOLD, heapMB, limitMB, pct }; } +/** + * Tracks the dangling phase promise from the most recent timeout so the next + * iteration can drain it before proceeding. Promise.race() rejects on timeout + * but does not cancel the underlying async work; draining here prevents the + * timed-out phase from mutating state concurrently with the next iteration. + */ +let _danglingPhasePromise: Promise | null = null; + /** * Wrap a phase function with a timeout. Rejects with an Error whose message * starts with "phase-timeout:" so the blanket catch can handle it specially. + * Stores the still-running phase promise in _danglingPhasePromise so the caller + * can drain it before starting a new iteration. */ async function withPhaseTimeout( name: string, @@ -105,6 +115,7 @@ async function withPhaseTimeout( timeoutMs: number, ): Promise { let timer: ReturnType | undefined; + const phasePromise = fn(); const timeout = new Promise((_, reject) => { timer = setTimeout( () => reject(new Error(`phase-timeout:${name}`)), @@ -112,8 +123,12 @@ async function withPhaseTimeout( ); }); try { - const result = await Promise.race([fn(), timeout]); - return result; + return await Promise.race([phasePromise, timeout]); + } catch (err) { + if (err instanceof Error && err.message.startsWith("phase-timeout:")) { + _danglingPhasePromise = phasePromise; + } + throw err; } finally { if (timer !== undefined) clearTimeout(timer); } @@ -232,6 +247,16 @@ export async function autoLoop( break; } + // ── Drain any dangling phase promise before starting new work ── + // Promise.race() on timeout does not cancel the underlying async fn; that + // fn keeps running and may mutate state after the loop has advanced. + // Awaiting its completion here ensures no concurrent state writes. + if (_danglingPhasePromise !== null) { + const dangling = _danglingPhasePromise; + _danglingPhasePromise = null; + try { await dangling; } catch { /* ignore — result is irrelevant */ } + } + try { // ── Blanket try/catch: one bad iteration must not kill the session const prefs = deps.loadEffectiveSFPreferences()?.preferences; diff --git a/src/resources/extensions/sf/sf-db.ts b/src/resources/extensions/sf/sf-db.ts index 7ec954303..8c58ee291 100644 --- a/src/resources/extensions/sf/sf-db.ts +++ b/src/resources/extensions/sf/sf-db.ts @@ -1003,6 +1003,32 @@ function migrateSchema(db: DbAdapter): void { if (currentVersion < 17) { ensureColumn(db, "tasks", "verification_status", `ALTER TABLE tasks ADD COLUMN verification_status TEXT NOT NULL DEFAULT ''`); + // Backfill verification_status from existing verification_evidence rows so the + // prior-task guard works on databases upgraded mid-project (not just new ones). + db.exec(` + UPDATE tasks + SET verification_status = CASE + WHEN (SELECT COUNT(*) FROM verification_evidence ve + WHERE ve.milestone_id = tasks.milestone_id + AND ve.slice_id = tasks.slice_id + AND ve.task_id = tasks.id) = 0 + THEN '' + WHEN (SELECT COUNT(*) FROM verification_evidence ve + WHERE ve.milestone_id = tasks.milestone_id + AND ve.slice_id = tasks.slice_id + AND ve.task_id = tasks.id + AND ve.exit_code != 0) = 0 + THEN 'all_pass' + WHEN (SELECT COUNT(*) FROM verification_evidence ve + WHERE ve.milestone_id = tasks.milestone_id + AND ve.slice_id = tasks.slice_id + AND ve.task_id = tasks.id + AND ve.exit_code = 0) > 0 + THEN 'partial' + ELSE 'all_fail' + END + WHERE tasks.status IN ('complete', 'done') + `); db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({ ":version": 17, ":applied_at": new Date().toISOString(),