fix(gsd): reconcile stale slice rows and rebuild STATE.md before DB close (#3658)

* fix(gsd): reconcile stale slice rows and rebuild STATE.md before DB close

Two coupled defects caused auto-mode split-brain where dispatch falsely
reported "No slice eligible" while STATE.md showed executable work:

1. deriveStateFromDb() reconciled missing slice rows but not stale
   existing ones. A slice with status "pending" in the DB but a SUMMARY
   file on disk was never repaired, permanently blocking downstream
   slices. Added slice-level stale reconciliation matching the existing
   task-level pattern.

2. stopAuto() closed the DB before rebuilding STATE.md, forcing
   deriveState() into filesystem fallback mode. Moved rebuildState()
   before closeDatabase() so stop-time STATE.md uses the same
   authoritative DB backend as dispatch.

Fixes #3599

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* test: add regression test for stale slice row reconciliation

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Tibsfox 2026-04-13 05:17:06 -07:00 committed by GitHub
parent 05edc2f484
commit 2978bacb74
3 changed files with 79 additions and 14 deletions

View file

@ -795,7 +795,22 @@ export async function stopAuto(
debugLog("stop-cleanup-worktree", { error: e instanceof Error ? e.message : String(e) });
}
// ── Step 5: DB cleanup ──
// ── Step 5: Rebuild state while DB is still open (#3599) ──
// rebuildState() calls deriveState() which needs the DB for authoritative
// state. Previously this ran after closeDatabase(), forcing a filesystem
// fallback that could disagree with the DB-backed dispatch decisions —
// a split-brain where dispatch says "blocked" but STATE.md shows work.
if (s.basePath) {
try {
await rebuildState(s.basePath);
} catch (e) {
debugLog("stop-rebuild-state-failed", {
error: e instanceof Error ? e.message : String(e),
});
}
}
// ── Step 6: DB cleanup ──
if (isDbAvailable()) {
try {
const { closeDatabase } = await import("./gsd-db.js");
@ -807,7 +822,7 @@ export async function stopAuto(
}
}
// ── Step 6: Restore basePath and chdir ──
// ── Step 7: Restore basePath and chdir ──
try {
if (s.originalBasePath) {
s.basePath = s.originalBasePath;
@ -822,7 +837,7 @@ export async function stopAuto(
debugLog("stop-cleanup-basepath", { error: e instanceof Error ? e.message : String(e) });
}
// ── Step 7: Ledger notification ──
// ── Step 8: Ledger notification ──
try {
const ledger = getLedger();
if (ledger && ledger.units.length > 0) {
@ -838,17 +853,6 @@ export async function stopAuto(
debugLog("stop-cleanup-ledger", { error: e instanceof Error ? e.message : String(e) });
}
// ── Step 8: Rebuild state ──
if (s.basePath) {
try {
await rebuildState(s.basePath);
} catch (e) {
debugLog("stop-rebuild-state-failed", {
error: e instanceof Error ? e.message : String(e),
});
}
}
// ── Step 9: Cmux sidebar / event log ──
try {
clearCmuxSidebar(loadedPreferences);

View file

@ -57,6 +57,7 @@ import {
insertMilestone,
insertSlice,
insertTask,
updateSliceStatus,
updateTaskStatus,
getPendingGateCountForTurn,
type MilestoneRow,
@ -358,6 +359,25 @@ function reconcileDiskToDb(basePath: string): MilestoneRow[] {
depends: s.depends, demo: s.demo,
});
}
// Reconcile stale *existing* slice rows (#3599): a slice row may exist in
// the DB with status "pending" even though disk artifacts (SUMMARY) prove
// completion — the same class of desync that task-level reconciliation
// (further below) already handles. Without this, the dependency resolver
// builds doneSliceIds from stale DB rows and downstream slices stay blocked
// forever with "No slice eligible".
for (const dbSlice of dbSlices) {
if (isStatusDone(dbSlice.status)) continue;
const summaryPath = resolveSliceFile(basePath, mid, dbSlice.id, "SUMMARY");
if (summaryPath) {
try {
updateSliceStatus(mid, dbSlice.id, "complete");
logWarning("reconcile", `slice ${mid}/${dbSlice.id} status reconciled from "${dbSlice.status}" to "complete" (#3599)`, { mid, sid: dbSlice.id });
} catch (e) {
logError("reconcile", `failed to update slice ${dbSlice.id}`, { sid: dbSlice.id, error: (e as Error).message });
}
}
}
}
return allMilestones;
}

View file

@ -0,0 +1,41 @@
/**
* stale-slice-rows.test.ts #3658
*
* Verify that state.ts contains slice-level status reconciliation that
* updates stale DB rows (status "pending") when disk artifacts (SUMMARY)
* prove the slice is complete. Without this, the dependency resolver builds
* doneSliceIds from stale DB rows and downstream slices stay blocked.
*/
import { describe, test } from "node:test";
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";
const __dirname = dirname(fileURLToPath(import.meta.url));
const sourceFile = join(__dirname, "..", "state.ts");
describe("stale slice row reconciliation (#3658)", () => {
const source = readFileSync(sourceFile, "utf-8");
test("imports updateSliceStatus from gsd-db", () => {
assert.match(source, /import\s*\{[^}]*updateSliceStatus[^}]*\}\s*from/);
});
test("checks isStatusDone before reconciling slice rows", () => {
assert.match(source, /isStatusDone\(dbSlice\.status\)/);
});
test("resolves SUMMARY file to detect completed slices on disk", () => {
assert.match(source, /resolveSliceFile\(basePath,\s*mid,\s*dbSlice\.id,\s*["']SUMMARY["']\)/);
});
test("calls updateSliceStatus to reconcile stale rows", () => {
assert.match(source, /updateSliceStatus\(mid,\s*dbSlice\.id,\s*["']complete["']\)/);
});
test("references issue #3599 in reconciliation comment", () => {
assert.match(source, /#3599/);
});
});