Merge pull request #3164 from jeremymcs/fix/3161-state-machine-fixes

fix(state-machine): 9 resilience fixes from validation audit
This commit is contained in:
Jeremy McSpadden 2026-04-07 07:57:48 -05:00 committed by GitHub
commit c3485820ca
13 changed files with 283 additions and 45 deletions

View file

@ -108,9 +108,19 @@ export async function runUnit(
{ triggerTurn: true },
);
// ── Await agent_end ──
// ── Await agent_end with absolute timeout (H4 fix) ──
// If supervision fails to resolve unitPromise within 30s, treat as cancelled.
// Without this, a crashed agent that never emits agent_end hangs the loop (#3161).
debugLog("runUnit", { phase: "awaiting-agent-end", unitType, unitId });
const result = await unitPromise;
const UNIT_HARD_TIMEOUT_MS = 30_000;
let unitTimeoutHandle: ReturnType<typeof setTimeout> | undefined;
const timeoutResult = new Promise<UnitResult>((resolve) => {
unitTimeoutHandle = setTimeout(() => {
resolve({ status: "cancelled", errorContext: { message: "Unit hard timeout — supervision may have failed", category: "timeout", isTransient: true } });
}, UNIT_HARD_TIMEOUT_MS);
});
const result = await Promise.race([unitPromise, timeoutResult]);
if (unitTimeoutHandle) clearTimeout(unitTimeoutHandle);
debugLog("runUnit", {
phase: "agent-end-received",
unitType,

View file

@ -409,6 +409,7 @@ function initSchema(db: DbAdapter, fileBacked: boolean): void {
db.exec("CREATE INDEX IF NOT EXISTS idx_milestones_status ON milestones(status)");
db.exec("CREATE INDEX IF NOT EXISTS idx_quality_gates_pending ON quality_gates(milestone_id, slice_id, status)");
db.exec("CREATE INDEX IF NOT EXISTS idx_verification_evidence_task ON verification_evidence(milestone_id, slice_id, task_id)");
db.exec("CREATE UNIQUE INDEX IF NOT EXISTS idx_verification_evidence_dedup ON verification_evidence(task_id, slice_id, milestone_id, command, verdict)");
// v14 index — slice dependency lookups
db.exec("CREATE INDEX IF NOT EXISTS idx_slice_deps_target ON slice_dependencies(milestone_id, depends_on_slice_id)");
@ -722,6 +723,7 @@ function migrateSchema(db: DbAdapter): void {
db.exec("CREATE INDEX IF NOT EXISTS idx_milestones_status ON milestones(status)");
db.exec("CREATE INDEX IF NOT EXISTS idx_quality_gates_pending ON quality_gates(milestone_id, slice_id, status)");
db.exec("CREATE INDEX IF NOT EXISTS idx_verification_evidence_task ON verification_evidence(milestone_id, slice_id, task_id)");
db.exec("CREATE UNIQUE INDEX IF NOT EXISTS idx_verification_evidence_dedup ON verification_evidence(task_id, slice_id, milestone_id, command, verdict)");
db.prepare("INSERT INTO schema_version (version, applied_at) VALUES (:version, :applied_at)").run({
":version": 13,
":applied_at": new Date().toISOString(),
@ -1351,6 +1353,13 @@ export function updateTaskStatus(milestoneId: string, sliceId: string, taskId: s
});
}
export function setTaskBlockerDiscovered(milestoneId: string, sliceId: string, taskId: string, discovered: boolean): void {
if (!currentDb) return;
currentDb.prepare(
`UPDATE tasks SET blocker_discovered = :discovered WHERE milestone_id = :mid AND slice_id = :sid AND id = :tid`,
).run({ ":discovered": discovered ? 1 : 0, ":mid": milestoneId, ":sid": sliceId, ":tid": taskId });
}
export function upsertTaskPlanning(milestoneId: string, sliceId: string, taskId: string, planning: Partial<TaskPlanningRecord>): void {
if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
currentDb.prepare(
@ -1545,7 +1554,7 @@ export function insertVerificationEvidence(e: {
}): void {
if (!currentDb) throw new GSDError(GSD_STALE_STATE, "gsd-db: No database open");
currentDb.prepare(
`INSERT INTO verification_evidence (task_id, slice_id, milestone_id, command, exit_code, verdict, duration_ms, created_at)
`INSERT OR IGNORE INTO verification_evidence (task_id, slice_id, milestone_id, command, exit_code, verdict, duration_ms, created_at)
VALUES (:task_id, :slice_id, :milestone_id, :command, :exit_code, :verdict, :duration_ms, :created_at)`,
).run({
":task_id": e.taskId,

View file

@ -468,9 +468,10 @@ export async function deriveStateFromDb(basePath: string): Promise<GSDState> {
continue;
}
// Ghost milestone check: no slices in DB AND no substantive files on disk
// Ghost milestone check: no slices in DB AND no substantive files on disk.
// Skip queued milestones — they are handled by the deferred-shell logic below (#3470).
const slices = getMilestoneSlices(m.id);
if (slices.length === 0 && !isStatusDone(m.status)) {
if (slices.length === 0 && !isStatusDone(m.status) && m.status !== 'queued') {
// Check disk for ghost detection
if (isGhostMilestone(basePath, m.id)) continue;
}

View file

@ -11,6 +11,8 @@
// GSD State Machine Live Validation (#3161)
import { describe, test, beforeEach, afterEach } from "node:test";
import assert from "node:assert/strict";
import {
@ -721,10 +723,10 @@ describe("state-machine-live-validation", () => {
const result = await handleCompleteTask(makeTaskParams("T01", "S99", "M099") as any, base);
assert.ok(!("error" in result), `expected success: ${JSON.stringify(result)}`);
// Phantom milestone created
// Phantom milestone created — H6 fix: now uses ID as title instead of empty string
const milestone = getMilestone("M099");
assert.ok(milestone, "phantom milestone M099 should exist");
assert.equal(milestone!.title, "", "phantom milestone has empty title");
assert.equal(milestone!.title, "M099", "H6 fix: phantom milestone uses ID as title");
// Phantom slice created
const slice = getSlice("M099", "S99");
@ -895,13 +897,10 @@ describe("state-machine-live-validation", () => {
// ─────────────────────────────────────────────────────────────────────────
describe("reopen-then-redo cycle", () => {
test("complete → reopen → M12: stale SUMMARY causes immediate auto-reconcile", async () => {
// Finding M12: reopen-task does NOT delete the SUMMARY.md from disk.
// The reopen handler's own post-mutation hook calls renderAllProjections
// which triggers deriveStateFromDb, which sees the stale SUMMARY.md and
// auto-reconciles the task BACK to "complete" (#2514) within the same call.
//
// Result: the reopen is effectively a no-op when filesystem artifacts exist.
test("complete → reopen → re-complete task works end-to-end (M12 fixed)", async () => {
// M12 fix: reopen-task now deletes SUMMARY.md from disk before the
// post-mutation hook runs, preventing the reconciler from auto-correcting
// the task back to "complete".
base = createFullFixture();
openDatabase(join(base, ".gsd", "gsd.db"));
insertMilestone({ id: "M001", title: "Active", status: "active" });
@ -915,23 +914,23 @@ describe("state-machine-live-validation", () => {
const summaryPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks", "T01-SUMMARY.md");
assert.ok(existsSync(summaryPath), "SUMMARY.md exists after completion");
// Reopen — handler sets DB to "pending" in transaction, but post-mutation
// hook triggers reconciler which immediately sets it back to "complete"
// Reopen — now deletes SUMMARY.md from disk (M12 fix)
const r2 = await handleReopenTask({ milestoneId: "M001", sliceId: "S01", taskId: "T01" }, base);
assert.ok(!("error" in r2), `reopen handler succeeded: ${JSON.stringify(r2)}`);
assert.ok(!("error" in r2), `reopen: ${JSON.stringify(r2)}`);
// M12: After reopen completes, DB shows "complete" not "pending" because
// the reconciler auto-corrected it from the stale SUMMARY.md
const task = getTask("M001", "S01", "T01");
assert.equal(task!.status, "complete", "M12: reconciler overrides reopen — task is back to complete");
assert.ok(existsSync(summaryPath), "M12: SUMMARY.md was never cleaned up");
// Task is now properly pending — SUMMARY.md was cleaned up
assert.equal(getTask("M001", "S01", "T01")!.status, "pending");
assert.ok(!existsSync(summaryPath), "M12 fix: SUMMARY.md cleaned up by reopen");
// Re-complete succeeds
const r3 = await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
assert.ok(!("error" in r3), `re-complete: ${JSON.stringify(r3)}`);
assert.ok(isClosedStatus(getTask("M001", "S01", "T01")!.status));
});
test("complete slice → reopen → M12: reconciler overrides task reset via stale SUMMARY", async () => {
// Same M12 pattern at the slice level: reopen-slice resets all tasks to
// "pending" in DB, but task SUMMARY.md artifacts remain on disk. The
// reopen handler's post-mutation hook triggers reconciler which sees the
// stale artifacts and auto-corrects tasks back to "complete".
test("complete slice → reopen → re-complete all works end-to-end (M12 fixed)", async () => {
// M12 fix: reopen-slice now deletes all SUMMARY.md and UAT.md artifacts
// from disk, preventing reconciler interference.
base = createFullFixture();
openDatabase(join(base, ".gsd", "gsd.db"));
insertMilestone({ id: "M001", title: "Active", status: "active" });
@ -943,17 +942,16 @@ describe("state-machine-live-validation", () => {
await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
assert.ok(isClosedStatus(getSlice("M001", "S01")!.status));
// Reopen slice — transaction resets slice to in_progress and task to pending,
// but post-mutation hook triggers reconciler which sees stale SUMMARY.md
// Reopen slice — now cleans up all artifacts (M12 fix)
await handleReopenSlice({ milestoneId: "M001", sliceId: "S01" }, base);
// Slice status is correctly in_progress (no slice SUMMARY reconciliation)
assert.equal(getSlice("M001", "S01")!.status, "in_progress");
assert.equal(getTask("M001", "S01", "T01")!.status, "pending");
// M12: Task was reset to "pending" in the transaction, but reconciler
// already corrected it back to "complete" from the stale SUMMARY.md
const task = getTask("M001", "S01", "T01");
assert.equal(task!.status, "complete", "M12: reconciler overrides reopen — task back to complete");
// Re-complete task + slice succeeds
await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
const r = await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
assert.ok(!("error" in r), `re-complete slice: ${JSON.stringify(r)}`);
assert.ok(isClosedStatus(getSlice("M001", "S01")!.status));
});
});
});

View file

@ -165,6 +165,7 @@ test("Rule 4: ENOENT paths non-consecutive still triggers", () => {
assert.ok(result!.reason.includes("/missing/skill"), `reason was: ${result!.reason}`);
});
// ─── Gap documentation: 3-unit cycle evades detection ────────────────────────
test("Three-unit cycle A-B-C-A-B-C does NOT trigger stuck (documents gap L13)", () => {

View file

@ -244,6 +244,7 @@ export async function handleCompleteSlice(
// ── Guards + DB writes inside a single transaction (prevents TOCTOU) ───
const completedAt = new Date().toISOString();
const originalSliceStatus = getSlice(params.milestoneId, params.sliceId)?.status ?? "pending";
let guardError: string | null = null;
transaction(() => {
@ -277,8 +278,8 @@ export async function handleCompleteSlice(
}
// All guards passed — perform writes
insertMilestone({ id: params.milestoneId });
insertSlice({ id: params.sliceId, milestoneId: params.milestoneId });
insertMilestone({ id: params.milestoneId, title: params.milestoneId });
insertSlice({ id: params.sliceId, milestoneId: params.milestoneId, title: params.sliceId });
updateSliceStatus(params.milestoneId, params.sliceId, "complete", completedAt);
});
@ -321,7 +322,7 @@ export async function handleCompleteSlice(
} catch (renderErr) {
// Disk render failed — roll back DB status so state stays consistent
logWarning("tool", `complete_slice — disk render failed for ${params.milestoneId}/${params.sliceId}, rolling back DB status`, { error: (renderErr as Error).message });
updateSliceStatus(params.milestoneId, params.sliceId, 'pending');
updateSliceStatus(params.milestoneId, params.sliceId, originalSliceStatus);
invalidateStateCache();
return { error: `disk render failed: ${(renderErr as Error).message}` };
}

View file

@ -152,8 +152,8 @@ export async function handleCompleteTask(
}
// All guards passed — perform writes
insertMilestone({ id: params.milestoneId });
insertSlice({ id: params.sliceId, milestoneId: params.milestoneId });
insertMilestone({ id: params.milestoneId, title: params.milestoneId });
insertSlice({ id: params.sliceId, milestoneId: params.milestoneId, title: params.sliceId });
insertTask({
id: params.taskId,
sliceId: params.sliceId,

View file

@ -0,0 +1,152 @@
// GSD — reopen-milestone tool handler
/**
* reopen-milestone handler the core operation behind gsd_milestone_reopen.
*
* Resets a closed milestone back to "active", all of its slices to
* "in_progress", and all tasks to "pending". Cleans up stale filesystem
* artifacts so the DB-filesystem reconciler does not auto-correct
* entities back to "complete".
*/
import {
getMilestone,
getMilestoneSlices,
getSliceTasks,
updateMilestoneStatus,
updateSliceStatus,
updateTaskStatus,
transaction,
} from "../gsd-db.js";
import { invalidateStateCache } from "../state.js";
import { isClosedStatus } from "../status-guards.js";
import { renderAllProjections } from "../workflow-projections.js";
import { writeManifest } from "../workflow-manifest.js";
import { appendEvent } from "../workflow-events.js";
import { logWarning } from "../workflow-logger.js";
import { debugLog } from "../debug-logger.js";
import { existsSync, unlinkSync } from "node:fs";
import { join } from "node:path";
import { resolveMilestonePath, resolveSlicePath, resolveTasksDir, clearPathCache } from "../paths.js";
export interface ReopenMilestoneParams {
milestoneId: string;
reason?: string;
/** Optional caller-provided identity for audit trail */
actorName?: string;
/** Optional caller-provided reason this action was triggered */
triggerReason?: string;
}
export interface ReopenMilestoneResult {
milestoneId: string;
slicesReset: number;
tasksReset: number;
}
export async function handleReopenMilestone(
params: ReopenMilestoneParams,
basePath: string,
): Promise<ReopenMilestoneResult | { error: string }> {
// ── Validate required fields ────────────────────────────────────────────
if (!params.milestoneId || typeof params.milestoneId !== "string" || params.milestoneId.trim() === "") {
return { error: "milestoneId is required and must be a non-empty string" };
}
// ── Guards + DB writes inside a single transaction (prevents TOCTOU) ───
let guardError: string | null = null;
let slicesResetCount = 0;
let tasksResetCount = 0;
transaction(() => {
const milestone = getMilestone(params.milestoneId);
if (!milestone) {
guardError = `milestone not found: ${params.milestoneId}`;
return;
}
if (!isClosedStatus(milestone.status)) {
guardError = `milestone ${params.milestoneId} is not closed (status: ${milestone.status}) — nothing to reopen`;
return;
}
updateMilestoneStatus(params.milestoneId, "active", null);
const slices = getMilestoneSlices(params.milestoneId);
slicesResetCount = slices.length;
for (const slice of slices) {
updateSliceStatus(params.milestoneId, slice.id, "in_progress");
const tasks = getSliceTasks(params.milestoneId, slice.id);
tasksResetCount += tasks.length;
for (const task of tasks) {
updateTaskStatus(params.milestoneId, slice.id, task.id, "pending");
}
}
});
if (guardError) {
return { error: guardError };
}
// ── Invalidate caches ────────────────────────────────────────────────────
invalidateStateCache();
// ── Clean up stale filesystem artifacts (M12 fix) ────────────────────────
// Without this, the DB-filesystem reconciler sees SUMMARY.md files and
// auto-corrects entities back to "complete", making reopen a no-op (#3161).
try {
const milestoneDir = resolveMilestonePath(basePath, params.milestoneId);
if (milestoneDir) {
const milestoneSummary = join(milestoneDir, `${params.milestoneId}-SUMMARY.md`);
if (existsSync(milestoneSummary)) unlinkSync(milestoneSummary);
}
const slices = getMilestoneSlices(params.milestoneId);
for (const slice of slices) {
const sliceDir = resolveSlicePath(basePath, params.milestoneId, slice.id);
if (sliceDir) {
const sliceSummary = join(sliceDir, `${slice.id}-SUMMARY.md`);
if (existsSync(sliceSummary)) unlinkSync(sliceSummary);
const sliceUat = join(sliceDir, `${slice.id}-UAT.md`);
if (existsSync(sliceUat)) unlinkSync(sliceUat);
}
const tasksDir = resolveTasksDir(basePath, params.milestoneId, slice.id);
if (tasksDir) {
const tasks = getSliceTasks(params.milestoneId, slice.id);
for (const task of tasks) {
const taskSummary = join(tasksDir, `${task.id}-SUMMARY.md`);
if (existsSync(taskSummary)) unlinkSync(taskSummary);
}
}
}
} catch (err) { debugLog("reopen-milestone-cleanup-failed", { milestoneId: params.milestoneId, error: String(err) }); }
clearPathCache();
// ── Post-mutation hook ───────────────────────────────────────────────────
try {
await renderAllProjections(basePath, params.milestoneId);
writeManifest(basePath);
appendEvent(basePath, {
cmd: "reopen-milestone",
params: {
milestoneId: params.milestoneId,
reason: params.reason ?? null,
slicesReset: slicesResetCount,
tasksReset: tasksResetCount,
},
ts: new Date().toISOString(),
actor: "agent",
actor_name: params.actorName,
trigger_reason: params.triggerReason,
});
} catch (hookErr) {
logWarning("tool", `reopen-milestone post-mutation hook warning: ${(hookErr as Error).message}`);
}
return {
milestoneId: params.milestoneId,
slicesReset: slicesResetCount,
tasksReset: tasksResetCount,
};
}

View file

@ -25,6 +25,9 @@ import { renderAllProjections } from "../workflow-projections.js";
import { writeManifest } from "../workflow-manifest.js";
import { appendEvent } from "../workflow-events.js";
import { logWarning } from "../workflow-logger.js";
import { existsSync, unlinkSync } from "node:fs";
import { join } from "node:path";
import { resolveTasksDir, resolveSlicePath, clearPathCache } from "../paths.js";
export interface ReopenSliceParams {
milestoneId: string;
@ -96,6 +99,30 @@ export async function handleReopenSlice(
// ── Invalidate caches ────────────────────────────────────────────────────
invalidateStateCache();
// ── Clean up stale filesystem artifacts (M12 fix) ────────────────────────
// Without this, the DB-filesystem reconciler sees SUMMARY.md files and
// auto-corrects tasks back to "complete", making reopen a no-op (#3161).
try {
const tasksDir = resolveTasksDir(basePath, params.milestoneId, params.sliceId);
if (tasksDir) {
const tasks = getSliceTasks(params.milestoneId, params.sliceId);
for (const task of tasks) {
const summaryPath = join(tasksDir, `${task.id}-SUMMARY.md`);
if (existsSync(summaryPath)) unlinkSync(summaryPath);
}
}
const sliceDir = resolveSlicePath(basePath, params.milestoneId, params.sliceId);
if (sliceDir) {
const sliceSummary = join(sliceDir, `${params.sliceId}-SUMMARY.md`);
if (existsSync(sliceSummary)) unlinkSync(sliceSummary);
const sliceUat = join(sliceDir, `${params.sliceId}-UAT.md`);
if (existsSync(sliceUat)) unlinkSync(sliceUat);
}
} catch (cleanupErr) {
logWarning("tool", `reopen-slice artifact cleanup warning: ${(cleanupErr as Error).message}`);
}
clearPathCache();
// ── Post-mutation hook ───────────────────────────────────────────────────
try {
await renderAllProjections(basePath, params.milestoneId);

View file

@ -23,6 +23,9 @@ import { renderAllProjections } from "../workflow-projections.js";
import { writeManifest } from "../workflow-manifest.js";
import { appendEvent } from "../workflow-events.js";
import { logWarning } from "../workflow-logger.js";
import { existsSync, unlinkSync } from "node:fs";
import { join } from "node:path";
import { resolveTasksDir, clearPathCache } from "../paths.js";
export interface ReopenTaskParams {
milestoneId: string;
@ -100,6 +103,20 @@ export async function handleReopenTask(
// ── Invalidate caches ────────────────────────────────────────────────────
invalidateStateCache();
// ── Clean up stale filesystem artifacts (M12 fix) ────────────────────────
// Without this, the DB-filesystem reconciler sees the SUMMARY.md and
// auto-corrects the task back to "complete", making reopen a no-op (#3161).
try {
const tasksDir = resolveTasksDir(basePath, params.milestoneId, params.sliceId);
if (tasksDir) {
const summaryPath = join(tasksDir, `${params.taskId}-SUMMARY.md`);
if (existsSync(summaryPath)) unlinkSync(summaryPath);
}
} catch (cleanupErr) {
logWarning("tool", `reopen-task artifact cleanup warning: ${(cleanupErr as Error).message}`);
}
clearPathCache();
// ── Post-mutation hook ───────────────────────────────────────────────────
try {
await renderAllProjections(basePath, params.milestoneId);

View file

@ -11,6 +11,7 @@ import {
insertVerificationEvidence,
upsertDecision,
openDatabase,
setTaskBlockerDiscovered,
} from "./gsd-db.js";
import { isClosedStatus } from "./status-guards.js";
import { writeManifest } from "./workflow-manifest.js";
@ -89,13 +90,11 @@ function replayEvents(events: WorkflowEvent[]): void {
break;
}
case "report_blocker": {
// report_blocker marks the task with blocker_discovered = 1
// The DB helper updateTaskStatus doesn't handle blockers,
// so we just update status to "blocked" as a best-effort replay.
const milestoneId = p["milestoneId"] as string;
const sliceId = p["sliceId"] as string;
const taskId = p["taskId"] as string;
updateTaskStatus(milestoneId, sliceId, taskId, "blocked");
setTaskBlockerDiscovered(milestoneId, sliceId, taskId, true);
break;
}
case "record_verification": {

View file

@ -58,8 +58,17 @@ let cachedRegistry: TemplateRegistry | null = null;
export function loadRegistry(): TemplateRegistry {
if (cachedRegistry) return cachedRegistry;
const content = readFileSync(registryPath, "utf-8");
cachedRegistry = JSON.parse(content) as TemplateRegistry;
if (!existsSync(registryPath)) {
cachedRegistry = { version: 1, templates: {} };
return cachedRegistry;
}
try {
const content = readFileSync(registryPath, "utf-8");
cachedRegistry = JSON.parse(content) as TemplateRegistry;
} catch {
cachedRegistry = { version: 1, templates: {} };
}
return cachedRegistry;
}

View file

@ -79,6 +79,20 @@ export function deriveWorkflowAction(input: WorkflowActionInput): WorkflowAction
primary = { label: "Start Auto", command: "/gsd auto", variant: "default" }
} else if (phase === "pre-planning" && !hasMilestones) {
primary = { label: "Initialize Project", command: "/gsd", variant: "default" }
} else if (phase === "blocked") {
primary = { label: "Blocked", command: "/gsd", variant: "default" }
disabled = true
disabledReason = "Project is blocked — check blockers"
} else if (phase === "paused") {
primary = { label: "Resume", command: "/gsd auto", variant: "default" }
} else if (phase === "validating-milestone") {
primary = { label: "Validate", command: "/gsd", variant: "default" }
} else if (phase === "completing-milestone") {
primary = { label: "Complete Milestone", command: "/gsd", variant: "default" }
} else if (phase === "needs-discussion") {
primary = { label: "Discuss", command: "/gsd", variant: "default" }
} else if (phase === "replanning-slice") {
primary = { label: "Replan", command: "/gsd", variant: "default" }
} else {
primary = { label: "Continue", command: "/gsd", variant: "default" }
}