test(state-machine): add regression suite — 86 tests across 6 files (#3161) (#3162)

Comprehensive validation of the GSD state machine identified 7 HIGH, 14 MEDIUM, and 16 LOW findings. This adds regression and integration tests covering: Unit tests (49): - Event replay idempotency (M4 lossy blocker replay, M5 duplicate evidence) - Reconciliation edge cases (fork detection, entity keys, conflict detection) - Completion hierarchy guards (vacuous truth, phantom parents, rollback fidelity) - State derivation parity (ghost milestones, phase transitions, DB/FS consistency) - Stuck detection coverage (all 3 rules + documented gap for 3-unit cycles) Integration tests (37): - Full happy-path lifecycle (pre-planning → complete) - 12 completion guard edge cases with real handlers - 7 reopen operations including H5 (no reopen-milestone exists) - Phantom parent auto-creation (H6) - State derivation consistency with live DB - Event log integrity across operations - M12: stale SUMMARY.md causes reconciler to override reopen Closes #3161
2026-03-31 12:54:30 -05:00 · 2026-03-31 12:54:30 -05:00 · 1e89090136
commit 1e89090136
parent fbb67f15f8
6 changed files with 1884 additions and 0 deletions
--- a/src/resources/extensions/gsd/tests/completion-hierarchy-guards.test.ts
+++ b/src/resources/extensions/gsd/tests/completion-hierarchy-guards.test.ts
@ -0,0 +1,192 @@
+// GSD State Machine Regression Tests — Completion Hierarchy & State Derivation (#3161)
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getTask,
+  getSlice,
+  getMilestone,
+  getSliceTasks,
+  updateTaskStatus,
+  updateSliceStatus,
+} from "../gsd-db.ts";
+import { isClosedStatus } from "../status-guards.ts";
+
+// ─── Setup / Teardown ──────────────────────────────────────────────────────
+
+beforeEach(() => {
+  openDatabase(":memory:");
+});
+
+afterEach(() => {
+  try { closeDatabase(); } catch { /* swallow */ }
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Tests
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("completion-hierarchy-guards", () => {
+
+  // ─── Test 1: isClosedStatus ─────────────────────────────────────────────
+  test("isClosedStatus returns true for 'complete' and 'done'", () => {
+    assert.ok(isClosedStatus("complete"), "'complete' should be closed");
+    assert.ok(isClosedStatus("done"), "'done' should be closed");
+    assert.ok(!isClosedStatus("pending"), "'pending' should not be closed");
+    assert.ok(!isClosedStatus("in-progress"), "'in-progress' should not be closed");
+    assert.ok(!isClosedStatus("blocked"), "'blocked' should not be closed");
+    assert.ok(!isClosedStatus(""), "empty string should not be closed");
+    assert.ok(!isClosedStatus("active"), "'active' should not be closed");
+  });
+
+  // ─── Test 2: vacuous truth guard — slice with zero tasks ───────────────
+  test("cannot complete slice with zero tasks — vacuous truth guard", () => {
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+
+    const tasks = getSliceTasks("M001", "S01");
+    assert.equal(tasks.length, 0, "newly inserted slice has zero tasks");
+
+    // The guard: a slice with no tasks is not completable.
+    // isSliceComplete from state.ts: plan.tasks.length > 0 && every done.
+    // Here we replicate the DB-side equivalent: zero tasks means guard fires.
+    const isCompletable = tasks.length > 0 && tasks.every(t => isClosedStatus(t.status));
+    assert.equal(isCompletable, false, "vacuous truth guard: zero tasks → not completable");
+  });
+
+  // ─── Test 3: cannot complete slice with incomplete tasks ─────────────────
+  test("cannot complete slice with incomplete tasks", () => {
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "done" });
+    insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+    const tasks = getSliceTasks("M001", "S01");
+    assert.equal(tasks.length, 2, "slice has 2 tasks");
+
+    const incompleteTasks = tasks.filter(t => !isClosedStatus(t.status));
+    assert.equal(incompleteTasks.length, 1, "exactly one task is not closed");
+    assert.equal(incompleteTasks[0]?.id, "T02", "the incomplete task is T02");
+    assert.equal(incompleteTasks[0]?.status, "pending", "incomplete task status is 'pending'");
+  });
+
+  // ─── Test 4: phantom parent milestone and slice (H6) ────────────────────
+  test("task completion auto-creates phantom parent milestone and slice (H6)", () => {
+    // H6 finding: insertMilestone/insertSlice accept empty titles — phantom
+    // parents can be created without substantive content.
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+
+    const milestone = getMilestone("M001");
+    assert.ok(milestone !== null, "phantom milestone M001 should exist in DB");
+    assert.equal(milestone!.title, "", "phantom milestone has empty title by default");
+
+    const slice = getSlice("M001", "S01");
+    assert.ok(slice !== null, "phantom slice S01 should exist in DB");
+    assert.equal(slice!.title, "", "phantom slice has empty title by default");
+
+    // This documents the H6 finding: the DB allows phantom parents with
+    // no meaningful content, which can silently accept task completion calls.
+  });
+
+  // ─── Test 5: double task completion is detectable via isClosedStatus ────
+  test("double task completion is detectable via isClosedStatus", () => {
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+    insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "done" });
+
+    const task = getTask("M001", "S01", "T01");
+    assert.ok(task !== null, "task T01 should exist");
+    assert.ok(
+      isClosedStatus(task!.status),
+      "isClosedStatus detects already-closed task — prevents double completion",
+    );
+
+    // The guard that prevents double completion: check isClosedStatus before
+    // calling updateTaskStatus again.
+    const wouldDoubleComplete = isClosedStatus(task!.status);
+    assert.ok(wouldDoubleComplete, "guard fires: task is already closed");
+  });
+
+  // ─── Test 6: updateSliceStatus rollback loses original status (M11) ─────
+  test("updateSliceStatus rollback goes to 'pending' not original status (M11)", () => {
+    insertMilestone({ id: "M001" });
+    // Insert with an explicit non-pending status to simulate an in-progress slice
+    insertSlice({ id: "S01", milestoneId: "M001", status: "pending" });
+
+    // Manually advance to "in_progress" equivalent via updateSliceStatus
+    updateSliceStatus("M001", "S01", "in_progress");
+    const afterProgress = getSlice("M001", "S01");
+    assert.equal(afterProgress!.status, "in_progress", "slice is in_progress after update");
+
+    // Simulate completion
+    updateSliceStatus("M001", "S01", "complete", new Date().toISOString());
+    const afterComplete = getSlice("M001", "S01");
+    assert.equal(afterComplete!.status, "complete", "slice is complete after completion");
+
+    // Simulate rollback — the DB only stores current status, not history.
+    // Rolling back means setting to "pending" — the original "in_progress" is lost.
+    updateSliceStatus("M001", "S01", "pending");
+    const afterRollback = getSlice("M001", "S01");
+    assert.equal(
+      afterRollback!.status,
+      "pending",
+      "M11: rollback sets status to 'pending', original 'in_progress' is lost",
+    );
+    // Document: there is no completed_at or status history to recover from.
+    // The rollback silently discards the in_progress state.
+  });
+
+  // ─── Test 7: milestone completion requires all slices closed ─────────────
+  test("milestone completion requires all slices closed", () => {
+    insertMilestone({ id: "M001" });
+    insertSlice({ id: "S01", milestoneId: "M001", status: "done" });
+    insertSlice({ id: "S02", milestoneId: "M001", status: "pending" });
+
+    const s01 = getSlice("M001", "S01");
+    const s02 = getSlice("M001", "S02");
+
+    assert.ok(s01 !== null, "S01 exists");
+    assert.ok(s02 !== null, "S02 exists");
+
+    const slices = [s01!, s02!];
+    const incompleteSlices = slices.filter(s => !isClosedStatus(s.status));
+    assert.ok(
+      incompleteSlices.length > 0,
+      "milestone is not completable — has incomplete slices",
+    );
+    assert.equal(incompleteSlices[0]?.id, "S02", "S02 is the incomplete slice");
+    assert.equal(incompleteSlices[0]?.status, "pending", "S02 status is 'pending'");
+  });
+
+  // ─── Test 8: closed parent blocks child completion ───────────────────────
+  test("closed parent blocks child completion", () => {
+    // Insert a milestone already in 'complete' state
+    insertMilestone({ id: "M001", status: "complete" });
+    insertSlice({ id: "S01", milestoneId: "M001" });
+
+    const milestone = getMilestone("M001");
+    assert.ok(milestone !== null, "milestone M001 exists");
+    assert.ok(
+      isClosedStatus(milestone!.status),
+      "parent milestone is closed — isClosedStatus returns true",
+    );
+
+    // The guard in complete-slice checks parent status via isClosedStatus.
+    // If isClosedStatus(milestone.status) === true, the child cannot be completed.
+    const parentIsClosed = isClosedStatus(milestone!.status);
+    assert.ok(parentIsClosed, "closed parent guard fires: milestone.status is 'complete'");
+
+    // Verify the slice itself is not yet closed
+    const slice = getSlice("M001", "S01");
+    assert.ok(slice !== null, "slice S01 exists");
+    assert.ok(!isClosedStatus(slice!.status), "slice S01 is not yet closed (parent is already closed)");
+  });
+
+});
--- a/src/resources/extensions/gsd/tests/event-replay-idempotency.test.ts
+++ b/src/resources/extensions/gsd/tests/event-replay-idempotency.test.ts
@ -0,0 +1,140 @@
+// GSD State Machine Regression Tests — Event Replay & Reconciliation (#3161)
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getTask,
+  updateTaskStatus,
+  insertVerificationEvidence,
+  upsertDecision,
+} from "../gsd-db.ts";
+import { extractEntityKey } from "../workflow-reconcile.ts";
+
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+
+const MID = "M001";
+const SID = "S01";
+const TID = "T01";
+const TS = new Date().toISOString();
+
+function setupDb(): void {
+  openDatabase(":memory:");
+  insertMilestone({ id: MID, title: "Test Milestone" });
+  insertSlice({ id: SID, milestoneId: MID, title: "Test Slice" });
+  insertTask({ id: TID, sliceId: SID, milestoneId: MID, title: "Test Task" });
+}
+
+// ─── Tests ────────────────────────────────────────────────────────────────────
+
+describe("event-replay-idempotency", () => {
+  beforeEach(() => {
+    setupDb();
+  });
+
+  afterEach(() => {
+    closeDatabase();
+  });
+
+  test("updateTaskStatus is idempotent for complete_task replay", () => {
+    // Simulates replaying a complete_task event twice (e.g. crash recovery)
+    updateTaskStatus(MID, SID, TID, "done", TS);
+    updateTaskStatus(MID, SID, TID, "done", TS);
+
+    const task = getTask(MID, SID, TID);
+    assert.ok(task !== null, "task should exist after status update");
+    assert.equal(task!.status, "done", "status should be 'done' after double replay");
+  });
+
+  test("updateTaskStatus is idempotent for start_task replay", () => {
+    // Simulates replaying a start_task event twice
+    updateTaskStatus(MID, SID, TID, "in-progress");
+    updateTaskStatus(MID, SID, TID, "in-progress");
+
+    const task = getTask(MID, SID, TID);
+    assert.ok(task !== null, "task should exist after status update");
+    assert.equal(task!.status, "in-progress", "status should be 'in-progress' after double replay");
+  });
+
+  test("updateTaskStatus for report_blocker does not set blocker_discovered flag (M4)", () => {
+    // M4 finding: report_blocker replay only calls updateTaskStatus("blocked").
+    // The blocker_discovered column is NOT set during replay — this is a known
+    // lossy replay: status is recovered but the blocker flag is not.
+    updateTaskStatus(MID, SID, TID, "blocked");
+
+    const task = getTask(MID, SID, TID);
+    assert.ok(task !== null, "task should exist after blocked status update");
+    assert.equal(task!.status, "blocked", "status should be 'blocked'");
+    assert.equal(
+      task!.blocker_discovered,
+      false,
+      "blocker_discovered should remain false — report_blocker replay is lossy (M4 finding)",
+    );
+  });
+
+  test("insertVerificationEvidence is NOT idempotent — duplicates accumulate (M5)", () => {
+    // M5 finding: insertVerificationEvidence uses a plain INSERT (no ON CONFLICT),
+    // so replaying the same record_verification event twice produces two rows.
+    // Both calls must succeed without throwing — the duplication is the risk.
+    const evidence = {
+      taskId: TID,
+      sliceId: SID,
+      milestoneId: MID,
+      command: "npm test",
+      exitCode: 0,
+      verdict: "pass",
+      durationMs: 1200,
+    };
+
+    assert.doesNotThrow(
+      () => insertVerificationEvidence(evidence),
+      "first insertVerificationEvidence call should not throw",
+    );
+    assert.doesNotThrow(
+      () => insertVerificationEvidence(evidence),
+      "second insertVerificationEvidence call should not throw — duplicates accumulate silently (M5 finding)",
+    );
+  });
+
+  test("upsertDecision is idempotent via INSERT OR REPLACE", () => {
+    // save_decision replay uses upsertDecision which is INSERT OR REPLACE,
+    // so replaying the same decision id twice overwrites without error.
+    const base = {
+      id: "arch:logging",
+      when_context: "during planning",
+      scope: "arch",
+      decision: "logging",
+      rationale: "structured logs",
+      revisable: "yes" as const,
+      made_by: "agent" as const,
+      superseded_by: null,
+    };
+
+    upsertDecision({ ...base, choice: "structured" });
+    upsertDecision({ ...base, choice: "unstructured" });
+
+    // No error means the second call replaced the first — idempotent at the id level.
+    // The final choice is "unstructured" per INSERT OR REPLACE semantics.
+  });
+
+  test("unknown event commands in replayEvents are silently skipped — extractEntityKey returns null for unknown commands", () => {
+    // replayEvents uses a switch/default that silently skips unrecognised commands.
+    // We verify this via extractEntityKey which follows the same command set.
+    // A future_command not in the switch must return null (not throw).
+    const event = {
+      cmd: "future_command",
+      params: { foo: "bar" },
+      ts: new Date().toISOString(),
+      hash: "0000000000000000",
+      actor: "agent" as const,
+      session_id: "test-session",
+    };
+
+    const key = extractEntityKey(event);
+    assert.equal(key, null, "extractEntityKey should return null for unknown commands");
+  });
+});
--- a/src/resources/extensions/gsd/tests/integration/state-machine-live-validation.test.ts
+++ b/src/resources/extensions/gsd/tests/integration/state-machine-live-validation.test.ts
@ -0,0 +1,959 @@
+/**
+ * state-machine-live-validation.test.ts — Live operational validation of the
+ * GSD state machine with real handlers, real DB, and real filesystem.
+ *
+ * Exercises every phase transition, completion guard, edge case, and reopen
+ * path end-to-end. This is NOT a unit test — it drives the actual tool handlers
+ * against a real temp directory with a real SQLite database.
+ *
+ * Findings reference: #3161 (state machine validation report)
+ */
+
+// GSD State Machine Live Validation (#3161)
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  mkdtempSync,
+  mkdirSync,
+  writeFileSync,
+  readFileSync,
+  rmSync,
+  existsSync,
+} from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+// ── DB layer ──────────────────────────────────────────────────────────────
+import {
+  openDatabase,
+  closeDatabase,
+  insertMilestone,
+  insertSlice,
+  insertTask,
+  getTask,
+  getSlice,
+  getMilestone,
+  getSliceTasks,
+  getMilestoneSlices,
+  updateTaskStatus,
+  updateSliceStatus,
+  updateMilestoneStatus,
+} from "../../gsd-db.ts";
+
+// ── Tool handlers ─────────────────────────────────────────────────────────
+import { handleCompleteTask } from "../../tools/complete-task.ts";
+import { handleCompleteSlice } from "../../tools/complete-slice.ts";
+import { handleCompleteMilestone } from "../../tools/complete-milestone.ts";
+import { handleReopenTask } from "../../tools/reopen-task.ts";
+import { handleReopenSlice } from "../../tools/reopen-slice.ts";
+
+// ── State derivation ──────────────────────────────────────────────────────
+import {
+  deriveState,
+  deriveStateFromDb,
+  invalidateStateCache,
+  isGhostMilestone,
+} from "../../state.ts";
+
+// ── Status guards ─────────────────────────────────────────────────────────
+import { isClosedStatus } from "../../status-guards.ts";
+
+// ── Events ────────────────────────────────────────────────────────────────
+import { readEvents } from "../../workflow-events.ts";
+
+// ── Cache invalidation ───────────────────────────────────────────────────
+import { invalidateAllCaches } from "../../cache.ts";
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Fixture Helpers
+// ═══════════════════════════════════════════════════════════════════════════
+
+function makeTempDir(): string {
+  return mkdtempSync(join(tmpdir(), "gsd-live-validation-"));
+}
+
+/**
+ * Create a realistic .gsd/ fixture with:
+ * - M001 milestone with ROADMAP, CONTEXT
+ * - S01 slice with PLAN (2 tasks T01, T02)
+ * - S02 slice with PLAN (1 task T01)
+ * - Task PLAN stubs for each task
+ * - REQUIREMENTS.md and DECISIONS.md
+ */
+function createFullFixture(): string {
+  const base = makeTempDir();
+  const gsdDir = join(base, ".gsd");
+  const m001Dir = join(gsdDir, "milestones", "M001");
+  const s01Dir = join(m001Dir, "slices", "S01");
+  const s01Tasks = join(s01Dir, "tasks");
+  const s02Dir = join(m001Dir, "slices", "S02");
+  const s02Tasks = join(s02Dir, "tasks");
+
+  mkdirSync(s01Tasks, { recursive: true });
+  mkdirSync(s02Tasks, { recursive: true });
+
+  // CONTEXT.md — needed to get past needs-discussion
+  writeFileSync(
+    join(m001Dir, "M001-CONTEXT.md"),
+    [
+      "# M001: Live Validation Milestone",
+      "",
+      "## Purpose",
+      "Validate the state machine end-to-end.",
+    ].join("\n"),
+  );
+
+  // ROADMAP.md
+  writeFileSync(
+    join(m001Dir, "M001-ROADMAP.md"),
+    [
+      "# M001: Live Validation Milestone",
+      "",
+      "## Vision",
+      "Prove state machine correctness.",
+      "",
+      "## Success Criteria",
+      "- All operations succeed",
+      "",
+      "## Slices",
+      "",
+      "- [ ] **S01: First Feature** `risk:low` `depends:[]`",
+      "  - After this: First feature proven.",
+      "",
+      "- [ ] **S02: Second Feature** `risk:low` `depends:[]`",
+      "  - After this: Second feature proven.",
+      "",
+      "## Boundary Map",
+      "",
+      "| From | To | Produces | Consumes |",
+      "|------|----|----------|----------|",
+      "| S01 | terminal | feature-a | nothing |",
+      "| S02 | terminal | feature-b | nothing |",
+    ].join("\n"),
+  );
+
+  // S01 PLAN
+  writeFileSync(
+    join(s01Dir, "S01-PLAN.md"),
+    [
+      "# S01: First Feature",
+      "",
+      "**Goal:** Implement first feature.",
+      "",
+      "## Tasks",
+      "",
+      "- [ ] **T01: Implementation** `est:30m`",
+      "  - Do: Build it",
+      "  - Verify: Run tests",
+      "",
+      "- [ ] **T02: Testing** `est:30m`",
+      "  - Do: Write tests",
+      "  - Verify: Run tests",
+    ].join("\n"),
+  );
+
+  // S01 task plan stubs
+  writeFileSync(join(s01Tasks, "T01-PLAN.md"), "# T01 Plan\nImplement.\n");
+  writeFileSync(join(s01Tasks, "T02-PLAN.md"), "# T02 Plan\nTest.\n");
+
+  // S02 PLAN
+  writeFileSync(
+    join(s02Dir, "S02-PLAN.md"),
+    [
+      "# S02: Second Feature",
+      "",
+      "**Goal:** Implement second feature.",
+      "",
+      "## Tasks",
+      "",
+      "- [ ] **T01: Implementation** `est:30m`",
+      "  - Do: Build it",
+      "  - Verify: Run tests",
+    ].join("\n"),
+  );
+
+  // S02 task plan stub
+  writeFileSync(join(s02Tasks, "T01-PLAN.md"), "# T01 Plan\nBuild.\n");
+
+  // REQUIREMENTS.md
+  writeFileSync(
+    join(gsdDir, "REQUIREMENTS.md"),
+    [
+      "# Requirements",
+      "",
+      "## Active",
+      "",
+      "| ID | Description | Owner |",
+      "|----|-------------|-------|",
+      "| R001 | Feature works | S01 |",
+    ].join("\n"),
+  );
+
+  // DECISIONS.md
+  writeFileSync(
+    join(gsdDir, "DECISIONS.md"),
+    [
+      "# Decisions",
+      "",
+      "| ID | Decision | Choice | Rationale |",
+      "|----|----------|--------|-----------|",
+    ].join("\n"),
+  );
+
+  return base;
+}
+
+function makeTaskParams(
+  taskId: string,
+  sliceId: string,
+  milestoneId: string,
+  overrides?: Partial<Record<string, unknown>>,
+): Record<string, unknown> {
+  return {
+    taskId,
+    sliceId,
+    milestoneId,
+    oneLiner: `Completed ${taskId}`,
+    narrative: `Implemented ${taskId} with full coverage.`,
+    verification: "All tests pass.",
+    keyFiles: ["src/feature.ts"],
+    keyDecisions: [],
+    deviations: "None.",
+    knownIssues: "None.",
+    blockerDiscovered: false,
+    verificationEvidence: [
+      { command: "npm test", exitCode: 0, verdict: "pass", durationMs: 1000 },
+    ],
+    ...overrides,
+  };
+}
+
+function makeSliceParams(
+  sliceId: string,
+  milestoneId: string,
+): Record<string, unknown> {
+  return {
+    sliceId,
+    milestoneId,
+    sliceTitle: `${sliceId} Feature`,
+    oneLiner: `${sliceId} proven`,
+    narrative: "All tasks completed.",
+    verification: "Tests pass.",
+    keyFiles: ["src/feature.ts"],
+    keyDecisions: [],
+    patternsEstablished: [],
+    observabilitySurfaces: [],
+    deviations: "None.",
+    knownLimitations: "None.",
+    followUps: "None.",
+    requirementsAdvanced: [],
+    requirementsValidated: [],
+    requirementsSurfaced: [],
+    requirementsInvalidated: [],
+    filesModified: [{ path: "src/feature.ts", description: "Feature" }],
+    uatContent: "Acceptance criteria met.",
+    provides: ["feature"],
+    requires: [],
+    affects: [],
+    drillDownPaths: [],
+  };
+}
+
+function makeMilestoneParams(milestoneId: string): Record<string, unknown> {
+  return {
+    milestoneId,
+    title: "Live Validation Milestone",
+    oneLiner: "Milestone proven end-to-end",
+    narrative: "All slices completed and verified.",
+    successCriteriaResults: "All criteria met.",
+    definitionOfDoneResults: "All items checked.",
+    requirementOutcomes: "All requirements satisfied.",
+    keyDecisions: ["Chose approach A"],
+    keyFiles: ["src/feature.ts"],
+    lessonsLearned: ["Integration testing is valuable"],
+    followUps: "None.",
+    deviations: "None.",
+    verificationPassed: true,
+  };
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Test Suite
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("state-machine-live-validation", () => {
+  let base: string;
+
+  afterEach(() => {
+    closeDatabase();
+    if (base) rmSync(base, { recursive: true, force: true });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 1: Full happy-path lifecycle
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("happy path: full lifecycle M001 → complete", () => {
+    test("step 1: empty project derives pre-planning", async () => {
+      base = makeTempDir();
+      mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+      const state = await deriveState(base);
+      assert.equal(state.phase, "pre-planning");
+      assert.equal(state.activeMilestone, null);
+    });
+
+    test("step 2: milestone with CONTEXT-DRAFT derives needs-discussion", async () => {
+      base = makeTempDir();
+      const mDir = join(base, ".gsd", "milestones", "M001");
+      mkdirSync(mDir, { recursive: true });
+      writeFileSync(join(mDir, "M001-CONTEXT-DRAFT.md"), "# Draft\nDraft context.\n");
+      invalidateStateCache();
+      const state = await deriveState(base);
+      assert.equal(state.phase, "needs-discussion");
+      assert.equal(state.activeMilestone?.id, "M001");
+    });
+
+    test("step 3: full fixture with ROADMAP+PLAN derives planning or executing", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      invalidateStateCache();
+      const state = await deriveState(base);
+      // Without DB migration, filesystem path is used — should be planning or executing
+      assert.ok(
+        ["planning", "executing", "pre-planning"].includes(state.phase),
+        `expected planning/executing/pre-planning, got: ${state.phase}`,
+      );
+    });
+
+    test("step 4: complete T01 in S01 — handler succeeds, DB reflects completion", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      // Seed DB with hierarchy
+      insertMilestone({ id: "M001", title: "Live Validation", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First Feature", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Implementation", status: "pending" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "Testing", status: "pending" });
+
+      const result = await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      assert.ok(!("error" in result), `expected success, got: ${JSON.stringify(result)}`);
+
+      // Verify DB state
+      const task = getTask("M001", "S01", "T01");
+      assert.ok(task, "T01 should exist in DB");
+      assert.ok(isClosedStatus(task!.status), `T01 status should be closed, got: ${task!.status}`);
+
+      // Verify SUMMARY.md written to disk
+      const summaryPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks", "T01-SUMMARY.md");
+      assert.ok(existsSync(summaryPath), "T01-SUMMARY.md should exist on disk");
+
+      // Verify event log entry
+      const events = readEvents(join(base, ".gsd", "event-log.jsonl"));
+      const taskEvent = events.find(e => e.cmd === "complete-task" && (e.params as any).taskId === "T01");
+      assert.ok(taskEvent, "event log should contain complete-task for T01");
+    });
+
+    test("step 5: complete T02 in S01 — both tasks now done", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Live Validation", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First Feature", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Implementation", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "Testing", status: "pending" });
+
+      const result = await handleCompleteTask(makeTaskParams("T02", "S01", "M001") as any, base);
+      assert.ok(!("error" in result), `expected success, got: ${JSON.stringify(result)}`);
+
+      // Both tasks complete
+      const tasks = getSliceTasks("M001", "S01");
+      assert.equal(tasks.length, 2);
+      assert.ok(tasks.every(t => isClosedStatus(t.status)), "all tasks should be closed");
+    });
+
+    test("step 6: complete slice S01 — all tasks done, slice closes", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Live Validation", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First Feature", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Impl", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "Test", status: "complete" });
+
+      const result = await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+      assert.ok(!("error" in result), `expected success, got: ${JSON.stringify(result)}`);
+
+      const slice = getSlice("M001", "S01");
+      assert.ok(slice, "S01 should exist");
+      assert.ok(isClosedStatus(slice!.status), `S01 should be closed, got: ${slice!.status}`);
+
+      // SUMMARY.md on disk
+      const summaryPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "S01-SUMMARY.md");
+      assert.ok(existsSync(summaryPath), "S01-SUMMARY.md should exist");
+    });
+
+    test("step 7: complete S02 task + slice — both slices done", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Live Validation", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "complete" });
+      insertSlice({ id: "S02", milestoneId: "M001", title: "Second", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Impl", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", title: "Impl", status: "pending" });
+
+      // Complete task
+      const taskResult = await handleCompleteTask(makeTaskParams("T01", "S02", "M001") as any, base);
+      assert.ok(!("error" in taskResult), `task: ${JSON.stringify(taskResult)}`);
+
+      // Complete slice
+      const sliceResult = await handleCompleteSlice(makeSliceParams("S02", "M001") as any, base);
+      assert.ok(!("error" in sliceResult), `slice: ${JSON.stringify(sliceResult)}`);
+
+      // Both slices complete
+      const slices = getMilestoneSlices("M001");
+      assert.ok(slices.length >= 2, "should have 2+ slices");
+      assert.ok(slices.every(s => isClosedStatus(s.status)), "all slices should be closed");
+    });
+
+    test("step 8: complete milestone M001 — full lifecycle done", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Live Validation", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "complete" });
+      insertSlice({ id: "S02", milestoneId: "M001", title: "Second", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", title: "Impl", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", title: "Test", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", title: "Impl", status: "complete" });
+
+      const result = await handleCompleteMilestone(makeMilestoneParams("M001") as any, base);
+      assert.ok(!("error" in result), `expected success, got: ${JSON.stringify(result)}`);
+
+      const milestone = getMilestone("M001");
+      assert.ok(milestone, "M001 should exist");
+      assert.ok(isClosedStatus(milestone!.status), `M001 should be closed, got: ${milestone!.status}`);
+
+      // SUMMARY.md on disk
+      const summaryPath = join(base, ".gsd", "milestones", "M001", "M001-SUMMARY.md");
+      assert.ok(existsSync(summaryPath), "M001-SUMMARY.md should exist");
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 2: Completion guard edge cases
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("completion guards — edge cases", () => {
+    test("cannot complete task with empty taskId", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      const result = await handleCompleteTask(makeTaskParams("", "S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /taskId is required/);
+    });
+
+    test("cannot complete task in closed milestone", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Done", status: "complete" });
+      insertSlice({ id: "S01", milestoneId: "M001" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      const result = await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /closed milestone/);
+    });
+
+    test("cannot complete task in closed slice", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      const result = await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /closed slice/);
+    });
+
+    test("double task completion returns error (H5-related)", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /already complete/);
+    });
+
+    test("cannot complete slice with zero tasks — vacuous truth guard", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "in_progress" });
+      // No tasks inserted
+
+      const result = await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /no tasks found/);
+    });
+
+    test("cannot complete slice with incomplete tasks", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      const result = await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /incomplete tasks/);
+    });
+
+    test("double slice completion returns error", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /already complete/);
+    });
+
+    test("cannot complete milestone with zero slices", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+
+      const result = await handleCompleteMilestone(makeMilestoneParams("M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /no slices found/);
+    });
+
+    test("cannot complete milestone with incomplete slices", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertSlice({ id: "S02", milestoneId: "M001", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", status: "pending" });
+
+      const result = await handleCompleteMilestone(makeMilestoneParams("M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /incomplete slices/);
+    });
+
+    test("cannot complete milestone with incomplete tasks in complete slice (deep check)", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      // Slice marked complete but task is still pending — simulates inconsistent state
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      const result = await handleCompleteMilestone(makeMilestoneParams("M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /incomplete tasks/);
+    });
+
+    test("cannot complete milestone without verificationPassed=true", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const params = makeMilestoneParams("M001");
+      params.verificationPassed = false;
+      const result = await handleCompleteMilestone(params as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /verification did not pass/);
+    });
+
+    test("double milestone completion returns error", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Done", status: "complete" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleCompleteMilestone(makeMilestoneParams("M001") as any, base);
+      assert.ok("error" in result);
+      assert.match((result as any).error, /already complete/);
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 3: Reopen operations
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("reopen operations", () => {
+    test("reopen task: resets completed task to pending", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleReopenTask(
+        { milestoneId: "M001", sliceId: "S01", taskId: "T01", reason: "Need to redo" },
+        base,
+      );
+      assert.ok(!("error" in result), `expected success: ${JSON.stringify(result)}`);
+
+      const task = getTask("M001", "S01", "T01");
+      assert.equal(task!.status, "pending");
+    });
+
+    test("cannot reopen task that is not complete", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      const result = await handleReopenTask(
+        { milestoneId: "M001", sliceId: "S01", taskId: "T01" },
+        base,
+      );
+      assert.ok("error" in result);
+      assert.match((result as any).error, /not complete/);
+    });
+
+    test("cannot reopen task in closed slice — must reopen slice first", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleReopenTask(
+        { milestoneId: "M001", sliceId: "S01", taskId: "T01" },
+        base,
+      );
+      assert.ok("error" in result);
+      assert.match((result as any).error, /closed slice/);
+    });
+
+    test("cannot reopen task in closed milestone", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Done", status: "complete" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleReopenTask(
+        { milestoneId: "M001", sliceId: "S01", taskId: "T01" },
+        base,
+      );
+      assert.ok("error" in result);
+      assert.match((result as any).error, /closed milestone/);
+    });
+
+    test("reopen slice: resets slice to in_progress and all tasks to pending", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleReopenSlice(
+        { milestoneId: "M001", sliceId: "S01", reason: "Need rework" },
+        base,
+      );
+      assert.ok(!("error" in result), `expected success: ${JSON.stringify(result)}`);
+      assert.equal((result as any).tasksReset, 2);
+
+      // Verify slice state
+      const slice = getSlice("M001", "S01");
+      assert.equal(slice!.status, "in_progress");
+
+      // Verify all tasks reset to pending
+      const tasks = getSliceTasks("M001", "S01");
+      assert.ok(tasks.every(t => t.status === "pending"), "all tasks should be pending after slice reopen");
+    });
+
+    test("cannot reopen slice in closed milestone", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Done", status: "complete" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "complete" });
+
+      const result = await handleReopenSlice(
+        { milestoneId: "M001", sliceId: "S01" },
+        base,
+      );
+      assert.ok("error" in result);
+      assert.match((result as any).error, /closed milestone/);
+    });
+
+    test("no reopen-milestone tool exists — milestone completion is irrevocable (H5)", async () => {
+      // This test documents the H5 finding: there is no handleReopenMilestone function.
+      // A completed milestone can only be undone via direct DB manipulation.
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Done", status: "complete" });
+
+      const milestone = getMilestone("M001");
+      assert.ok(isClosedStatus(milestone!.status), "milestone is closed");
+
+      // The only escape is direct DB manipulation — no handler exists
+      updateMilestoneStatus("M001", "active", null);
+      const reopened = getMilestone("M001");
+      assert.equal(reopened!.status, "active", "direct DB manipulation can reopen, but no tool exposes this");
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 4: Phantom parents and auto-creation (H6)
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("phantom parent auto-creation (H6)", () => {
+    test("completing task for non-existent milestone/slice auto-creates them", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      // No milestone or slice pre-inserted — handler will auto-create
+
+      const result = await handleCompleteTask(makeTaskParams("T01", "S99", "M099") as any, base);
+      assert.ok(!("error" in result), `expected success: ${JSON.stringify(result)}`);
+
+      // Phantom milestone created
+      const milestone = getMilestone("M099");
+      assert.ok(milestone, "phantom milestone M099 should exist");
+      assert.equal(milestone!.title, "", "phantom milestone has empty title");
+
+      // Phantom slice created
+      const slice = getSlice("M099", "S99");
+      assert.ok(slice, "phantom slice S99 should exist");
+    });
+
+    test("completing slice for non-existent milestone auto-creates it", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      // Insert task to satisfy completion guard
+      insertMilestone({ id: "M099" });
+      insertSlice({ id: "S99", milestoneId: "M099" });
+      insertTask({ id: "T01", sliceId: "S99", milestoneId: "M099", status: "complete" });
+
+      const result = await handleCompleteSlice(makeSliceParams("S99", "M099") as any, base);
+      assert.ok(!("error" in result), `expected success: ${JSON.stringify(result)}`);
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 5: State derivation consistency
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("state derivation with live DB", () => {
+    test("deriveStateFromDb reflects task completion immediately", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      invalidateStateCache();
+      const stateBefore = await deriveStateFromDb(base);
+      assert.equal(stateBefore.phase, "executing", `before: expected executing, got ${stateBefore.phase}`);
+
+      // Complete T01
+      updateTaskStatus("M001", "S01", "T01", "complete", new Date().toISOString());
+      invalidateStateCache();
+      const stateAfterT01 = await deriveStateFromDb(base);
+      // Still executing — T02 is pending
+      assert.equal(stateAfterT01.phase, "executing", `after T01: expected executing, got ${stateAfterT01.phase}`);
+
+      // Complete T02
+      updateTaskStatus("M001", "S01", "T02", "complete", new Date().toISOString());
+      invalidateStateCache();
+      const stateAfterT02 = await deriveStateFromDb(base);
+      // All tasks done → summarizing
+      assert.equal(stateAfterT02.phase, "summarizing", `after T02: expected summarizing, got ${stateAfterT02.phase}`);
+    });
+
+    test("deriveStateFromDb reflects slice completion → next slice or validating", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "complete" });
+      insertSlice({ id: "S02", milestoneId: "M001", title: "Second", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", status: "pending" });
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+      // S01 done, S02 has pending task → executing
+      assert.equal(state.phase, "executing", `expected executing for S02, got ${state.phase}`);
+      assert.equal(state.activeSlice?.id, "S02", "active slice should be S02");
+    });
+
+    test("deriveStateFromDb with all slices done → validating-milestone", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "complete" });
+      insertSlice({ id: "S02", milestoneId: "M001", title: "Second", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+      insertTask({ id: "T01", sliceId: "S02", milestoneId: "M001", status: "complete" });
+
+      invalidateStateCache();
+      const state = await deriveStateFromDb(base);
+      assert.equal(state.phase, "validating-milestone", `expected validating-milestone, got ${state.phase}`);
+    });
+
+    test("ghost milestone is skipped by deriveState", async () => {
+      base = makeTempDir();
+      const gsdDir = join(base, ".gsd", "milestones");
+      // M001 is ghost — empty dir
+      mkdirSync(join(gsdDir, "M001"), { recursive: true });
+      // M002 has content
+      mkdirSync(join(gsdDir, "M002"), { recursive: true });
+      writeFileSync(join(gsdDir, "M002", "M002-CONTEXT-DRAFT.md"), "# Draft\nContent.\n");
+
+      assert.ok(isGhostMilestone(base, "M001"), "M001 should be ghost");
+      assert.ok(!isGhostMilestone(base, "M002"), "M002 should not be ghost");
+
+      invalidateStateCache();
+      const state = await deriveState(base);
+      assert.equal(state.activeMilestone?.id, "M002", "should skip ghost M001 and use M002");
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 6: Event log integrity
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("event log integrity across operations", () => {
+    test("full operation sequence produces correct event log", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+      insertTask({ id: "T02", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      // Complete T01
+      await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      // Complete T02
+      await handleCompleteTask(makeTaskParams("T02", "S01", "M001") as any, base);
+      // Complete S01
+      await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+
+      const events = readEvents(join(base, ".gsd", "event-log.jsonl"));
+
+      // Should have 3 events: 2 task completions + 1 slice completion
+      assert.ok(events.length >= 3, `expected ≥3 events, got ${events.length}`);
+
+      const taskEvents = events.filter(e => e.cmd === "complete-task");
+      assert.equal(taskEvents.length, 2, "2 task completion events");
+
+      const sliceEvents = events.filter(e => e.cmd === "complete-slice");
+      assert.equal(sliceEvents.length, 1, "1 slice completion event");
+
+      // Events are ordered chronologically
+      for (let i = 1; i < events.length; i++) {
+        assert.ok(
+          events[i]!.ts >= events[i - 1]!.ts,
+          `events should be chronologically ordered: ${events[i - 1]!.ts} <= ${events[i]!.ts}`,
+        );
+      }
+
+      // All events have hashes and session IDs
+      for (const event of events) {
+        assert.ok(event.hash, "event should have hash");
+        assert.ok(event.session_id, "event should have session_id");
+      }
+    });
+
+    test("reopen operations produce events", async () => {
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "complete" });
+
+      await handleReopenTask(
+        { milestoneId: "M001", sliceId: "S01", taskId: "T01", reason: "redo" },
+        base,
+      );
+
+      const events = readEvents(join(base, ".gsd", "event-log.jsonl"));
+      const reopenEvent = events.find(e => e.cmd === "reopen-task");
+      assert.ok(reopenEvent, "should have reopen-task event");
+      assert.equal((reopenEvent!.params as any).taskId, "T01");
+      assert.equal((reopenEvent!.params as any).reason, "redo");
+    });
+  });
+
+  // ─────────────────────────────────────────────────────────────────────────
+  // PHASE 7: Reopen-then-redo cycle
+  // ─────────────────────────────────────────────────────────────────────────
+
+  describe("reopen-then-redo cycle", () => {
+    test("complete → reopen → M12: stale SUMMARY causes immediate auto-reconcile", async () => {
+      // Finding M12: reopen-task does NOT delete the SUMMARY.md from disk.
+      // The reopen handler's own post-mutation hook calls renderAllProjections
+      // which triggers deriveStateFromDb, which sees the stale SUMMARY.md and
+      // auto-reconciles the task BACK to "complete" (#2514) within the same call.
+      //
+      // Result: the reopen is effectively a no-op when filesystem artifacts exist.
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      // Complete — writes T01-SUMMARY.md to disk
+      const r1 = await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      assert.ok(!("error" in r1), `first complete: ${JSON.stringify(r1)}`);
+
+      const summaryPath = join(base, ".gsd", "milestones", "M001", "slices", "S01", "tasks", "T01-SUMMARY.md");
+      assert.ok(existsSync(summaryPath), "SUMMARY.md exists after completion");
+
+      // Reopen — handler sets DB to "pending" in transaction, but post-mutation
+      // hook triggers reconciler which immediately sets it back to "complete"
+      const r2 = await handleReopenTask({ milestoneId: "M001", sliceId: "S01", taskId: "T01" }, base);
+      assert.ok(!("error" in r2), `reopen handler succeeded: ${JSON.stringify(r2)}`);
+
+      // M12: After reopen completes, DB shows "complete" not "pending" because
+      // the reconciler auto-corrected it from the stale SUMMARY.md
+      const task = getTask("M001", "S01", "T01");
+      assert.equal(task!.status, "complete", "M12: reconciler overrides reopen — task is back to complete");
+      assert.ok(existsSync(summaryPath), "M12: SUMMARY.md was never cleaned up");
+    });
+
+    test("complete slice → reopen → M12: reconciler overrides task reset via stale SUMMARY", async () => {
+      // Same M12 pattern at the slice level: reopen-slice resets all tasks to
+      // "pending" in DB, but task SUMMARY.md artifacts remain on disk. The
+      // reopen handler's post-mutation hook triggers reconciler which sees the
+      // stale artifacts and auto-corrects tasks back to "complete".
+      base = createFullFixture();
+      openDatabase(join(base, ".gsd", "gsd.db"));
+      insertMilestone({ id: "M001", title: "Active", status: "active" });
+      insertSlice({ id: "S01", milestoneId: "M001", title: "First", status: "in_progress" });
+      insertTask({ id: "T01", sliceId: "S01", milestoneId: "M001", status: "pending" });
+
+      // Complete task + slice
+      await handleCompleteTask(makeTaskParams("T01", "S01", "M001") as any, base);
+      await handleCompleteSlice(makeSliceParams("S01", "M001") as any, base);
+      assert.ok(isClosedStatus(getSlice("M001", "S01")!.status));
+
+      // Reopen slice — transaction resets slice to in_progress and task to pending,
+      // but post-mutation hook triggers reconciler which sees stale SUMMARY.md
+      await handleReopenSlice({ milestoneId: "M001", sliceId: "S01" }, base);
+
+      // Slice status is correctly in_progress (no slice SUMMARY reconciliation)
+      assert.equal(getSlice("M001", "S01")!.status, "in_progress");
+
+      // M12: Task was reset to "pending" in the transaction, but reconciler
+      // already corrected it back to "complete" from the stale SUMMARY.md
+      const task = getTask("M001", "S01", "T01");
+      assert.equal(task!.status, "complete", "M12: reconciler overrides reopen — task back to complete");
+    });
+  });
+});
--- a/src/resources/extensions/gsd/tests/reconciliation-edge-cases.test.ts
+++ b/src/resources/extensions/gsd/tests/reconciliation-edge-cases.test.ts
@ -0,0 +1,162 @@
+// GSD State Machine Regression Tests — Event Replay & Reconciliation (#3161)
+
+import { describe, test, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { createHash } from "node:crypto";
+import * as fs from "node:fs";
+import * as path from "node:path";
+import * as os from "node:os";
+import { findForkPoint, readEvents, appendEvent } from "../workflow-events.ts";
+import type { WorkflowEvent } from "../workflow-events.ts";
+import { extractEntityKey, detectConflicts } from "../workflow-reconcile.ts";
+
+// ─── Helper: build a full WorkflowEvent from cmd + params ────────────────────
+
+function makeEvent(cmd: string, params: Record<string, unknown>, ts?: string): WorkflowEvent {
+  const hash = createHash("sha256")
+    .update(JSON.stringify({ cmd, params }))
+    .digest("hex")
+    .slice(0, 16);
+  return { cmd, params, ts: ts ?? new Date().toISOString(), hash, actor: "agent", session_id: "test-session" };
+}
+
+// ─── Temp dir management ─────────────────────────────────────────────────────
+
+const tempDirs: string[] = [];
+
+function tempDir(): string {
+  const dir = fs.mkdtempSync(path.join(os.tmpdir(), "gsd-recon-test-"));
+  tempDirs.push(dir);
+  return dir;
+}
+
+afterEach(() => {
+  for (const dir of tempDirs.splice(0)) {
+    try { fs.rmSync(dir, { recursive: true, force: true }); } catch { /* best effort */ }
+  }
+});
+
+// ─── Tests ────────────────────────────────────────────────────────────────────
+
+describe("reconciliation-edge-cases", () => {
+
+  // findForkPoint
+  test("findForkPoint returns -1 for completely diverged logs", () => {
+    const eA = makeEvent("complete_task", { milestoneId: "M001", sliceId: "S01", taskId: "T01" });
+    const eB = makeEvent("complete_task", { milestoneId: "M001", sliceId: "S01", taskId: "T02" });
+
+    const logA: WorkflowEvent[] = [eA];
+    const logB: WorkflowEvent[] = [eB];
+
+    assert.equal(findForkPoint(logA, logB), -1, "completely diverged logs should return -1");
+  });
+
+  test("findForkPoint returns last index when one log is prefix of another", () => {
+    const e1 = makeEvent("start_task", { milestoneId: "M001", sliceId: "S01", taskId: "T01" });
+    const e2 = makeEvent("complete_task", { milestoneId: "M001", sliceId: "S01", taskId: "T01" });
+    const e3 = makeEvent("complete_slice", { milestoneId: "M001", sliceId: "S01" });
+
+    const logA: WorkflowEvent[] = [e1, e2];
+    const logB: WorkflowEvent[] = [e1, e2, e3];
+
+    assert.equal(findForkPoint(logA, logB), 1, "prefix log should fork at last shared index");
+  });
+
+  test("findForkPoint returns -1 for empty logs", () => {
+    assert.equal(findForkPoint([], []), -1, "two empty logs should return -1");
+  });
+
+  // extractEntityKey
+  test("extractEntityKey returns null for malformed events (missing taskId)", () => {
+    const event = makeEvent("complete_task", {});
+    // params has no taskId — should return null rather than return a bad key
+    assert.equal(extractEntityKey(event), null, "missing taskId should yield null entity key");
+  });
+
+  test("extractEntityKey returns null for unknown commands", () => {
+    const event = makeEvent("future_cmd", { foo: "bar" });
+    assert.equal(extractEntityKey(event), null, "unknown command should yield null entity key");
+  });
+
+  test("plan_slice and complete_slice use different entity types", () => {
+    const planEvent = makeEvent("plan_slice", { sliceId: "S01" });
+    const completeEvent = makeEvent("complete_slice", { sliceId: "S01" });
+
+    const planKey = extractEntityKey(planEvent);
+    const completeKey = extractEntityKey(completeEvent);
+
+    assert.ok(planKey !== null, "plan_slice should produce an entity key");
+    assert.ok(completeKey !== null, "complete_slice should produce an entity key");
+    assert.equal(planKey!.type, "slice_plan", "plan_slice entity type should be 'slice_plan'");
+    assert.equal(completeKey!.type, "slice", "complete_slice entity type should be 'slice'");
+    assert.notEqual(
+      planKey!.type,
+      completeKey!.type,
+      "plan_slice and complete_slice must map to different entity types",
+    );
+  });
+
+  // detectConflicts
+  test("detectConflicts finds no conflicts when entities do not overlap", () => {
+    const mainDiverged: WorkflowEvent[] = [
+      makeEvent("complete_task", { milestoneId: "M001", sliceId: "S01", taskId: "T01" }),
+    ];
+    const wtDiverged: WorkflowEvent[] = [
+      makeEvent("complete_task", { milestoneId: "M001", sliceId: "S01", taskId: "T02" }),
+    ];
+
+    const conflicts = detectConflicts(mainDiverged, wtDiverged);
+    assert.equal(conflicts.length, 0, "non-overlapping task edits should produce no conflicts");
+  });
+
+  test("detectConflicts flags conflict when both sides touch the same task", () => {
+    const mainDiverged: WorkflowEvent[] = [
+      makeEvent("start_task", { milestoneId: "M001", sliceId: "S01", taskId: "T01" }),
+    ];
+    const wtDiverged: WorkflowEvent[] = [
+      makeEvent("complete_task", { milestoneId: "M001", sliceId: "S01", taskId: "T01" }),
+    ];
+
+    const conflicts = detectConflicts(mainDiverged, wtDiverged);
+    assert.equal(conflicts.length, 1, "same task touched by both sides should produce exactly one conflict");
+
+    const conflict = conflicts[0]!;
+    assert.equal(conflict.entityType, "task", "conflict entityType should be 'task'");
+    assert.equal(conflict.entityId, "T01", "conflict entityId should be 'T01'");
+  });
+
+  test("detectConflicts ignores events with null entity keys", () => {
+    // Events with unknown commands produce null keys and must not cause false conflicts.
+    const mainDiverged: WorkflowEvent[] = [
+      makeEvent("unknown_future_cmd", { milestoneId: "M001" }),
+    ];
+    const wtDiverged: WorkflowEvent[] = [
+      makeEvent("another_unknown_cmd", { milestoneId: "M001" }),
+    ];
+
+    const conflicts = detectConflicts(mainDiverged, wtDiverged);
+    assert.equal(conflicts.length, 0, "unknown commands with null entity keys should not produce conflicts");
+  });
+
+  // appendEvent — filesystem creation
+  test("appendEvent creates event log if directory does not exist", () => {
+    const base = tempDir();
+    // Remove the .gsd directory if it somehow exists — appendEvent should create it.
+    const gsdDir = path.join(base, ".gsd");
+    if (fs.existsSync(gsdDir)) fs.rmSync(gsdDir, { recursive: true, force: true });
+
+    appendEvent(base, {
+      cmd: "complete_task",
+      params: { milestoneId: "M001", sliceId: "S01", taskId: "T01" },
+      ts: new Date().toISOString(),
+      actor: "agent",
+    });
+
+    const logPath = path.join(base, ".gsd", "event-log.jsonl");
+    assert.ok(fs.existsSync(logPath), "event-log.jsonl should be created by appendEvent");
+
+    const events = readEvents(logPath);
+    assert.equal(events.length, 1, "event log should contain exactly one event");
+    assert.equal(events[0]!.cmd, "complete_task", "persisted event should have the correct cmd");
+  });
+});
--- a/src/resources/extensions/gsd/tests/state-derivation-parity.test.ts
+++ b/src/resources/extensions/gsd/tests/state-derivation-parity.test.ts
@ -0,0 +1,257 @@
+// GSD State Machine Regression Tests — Completion Hierarchy & State Derivation (#3161)
+
+import { describe, test, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+
+import { deriveState, isGhostMilestone, invalidateStateCache } from "../state.ts";
+
+// ─── Fixture Helpers ───────────────────────────────────────────────────────
+
+function createFixtureBase(): string {
+  const base = mkdtempSync(join(tmpdir(), "gsd-parity-test-"));
+  mkdirSync(join(base, ".gsd", "milestones"), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  rmSync(base, { recursive: true, force: true });
+}
+
+function writeMilestoneFile(base: string, mid: string, suffix: string, content: string): void {
+  const dir = join(base, ".gsd", "milestones", mid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(join(dir, `${mid}-${suffix}.md`), content);
+}
+
+function writeMilestoneValidation(base: string, mid: string, verdict: string = "pass"): void {
+  const dir = join(base, ".gsd", "milestones", mid);
+  mkdirSync(dir, { recursive: true });
+  writeFileSync(
+    join(dir, `${mid}-VALIDATION.md`),
+    `---\nverdict: ${verdict}\nremediation_round: 0\n---\n\n# Validation\nValidated.`,
+  );
+}
+
+// ─── Setup / Teardown ──────────────────────────────────────────────────────
+
+beforeEach(() => {
+  invalidateStateCache();
+});
+
+afterEach(() => {
+  invalidateStateCache();
+});
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Tests
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("state-derivation-parity", () => {
+
+  // ─── Test 1: ghost milestone with only META.json ─────────────────────────
+  test("ghost milestone with only META.json is correctly detected", () => {
+    const base = createFixtureBase();
+    try {
+      const dir = join(base, ".gsd", "milestones", "M001");
+      mkdirSync(dir, { recursive: true });
+      // Write only META.json — no CONTEXT, CONTEXT-DRAFT, ROADMAP, or SUMMARY
+      writeFileSync(join(dir, "META.json"), JSON.stringify({ id: "M001", createdAt: new Date().toISOString() }));
+
+      assert.ok(
+        isGhostMilestone(base, "M001"),
+        "milestone with only META.json is a ghost",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 2: non-ghost milestone with CONTEXT is not ghost ───────────────
+  test("non-ghost milestone with CONTEXT is not ghost", () => {
+    const base = createFixtureBase();
+    try {
+      writeMilestoneFile(base, "M001", "CONTEXT", "# M001 Context\n\nThis milestone has real content.");
+
+      assert.ok(
+        !isGhostMilestone(base, "M001"),
+        "milestone with CONTEXT.md is not a ghost",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 3: empty milestones dir derives pre-planning phase ─────────────
+  test("empty milestones dir derives pre-planning phase", async () => {
+    const base = createFixtureBase();
+    try {
+      const state = await deriveState(base);
+      assert.equal(state.phase, "pre-planning", "empty milestones dir yields pre-planning phase");
+      assert.equal(state.activeMilestone, null, "no active milestone for empty dir");
+      assert.equal(state.activeSlice, null, "no active slice for empty dir");
+      assert.deepEqual(state.registry, [], "registry is empty for empty dir");
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 4: state includes blockers field for future blocked-phase detection ──
+  test("deriveState result always includes a defined phase and nextAction", async () => {
+    // Document that the state shape includes a `phase` string and `nextAction` string.
+    // Triggering "blocked" via filesystem alone requires circular dep setup which
+    // is outside the scope of these parity tests. Instead we verify the shape.
+    const base = createFixtureBase();
+    try {
+      // Provide a milestone with a ROADMAP that has a single incomplete slice
+      const dir = join(base, ".gsd", "milestones", "M001");
+      mkdirSync(dir, { recursive: true });
+      writeFileSync(
+        join(dir, "M001-ROADMAP.md"),
+        `# M001: Test\n\n**Vision:** Parity check.\n\n## Slices\n\n- [ ] **S01: First Slice** \`risk:low\` \`depends:[]\`\n  > After this: First slice done.\n`,
+      );
+
+      const state = await deriveState(base);
+
+      assert.ok(typeof state.phase === "string", "state.phase is a string");
+      assert.ok(typeof state.nextAction === "string", "state.nextAction is a string");
+      // The state object is the same shape regardless of phase — blockers would
+      // appear when the phase is "blocked". We document that the field may exist.
+      assert.ok("activeMilestone" in state, "state has activeMilestone field");
+      assert.ok("registry" in state, "state has registry field");
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 5: CONTEXT-DRAFT but no CONTEXT returns needs-discussion ────────
+  test("deriveState with CONTEXT-DRAFT but no CONTEXT returns needs-discussion", async () => {
+    const base = createFixtureBase();
+    try {
+      writeMilestoneFile(
+        base,
+        "M001",
+        "CONTEXT-DRAFT",
+        "# Draft Context\n\nSeed discussion material for M001.",
+      );
+
+      const state = await deriveState(base);
+      assert.equal(
+        state.phase,
+        "needs-discussion",
+        "CONTEXT-DRAFT with no CONTEXT yields needs-discussion phase",
+      );
+      assert.equal(state.activeMilestone?.id, "M001", "active milestone is M001");
+      assert.equal(state.activeSlice, null, "no active slice in needs-discussion phase");
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Test 6: deriveState skips ghost milestones when finding active milestone ──
+  test("deriveState skips ghost milestones when finding active milestone", async () => {
+    const base = createFixtureBase();
+    try {
+      // M001: ghost — just an empty directory
+      mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+
+      // M002: has CONTEXT-DRAFT — should become active
+      writeMilestoneFile(
+        base,
+        "M002",
+        "CONTEXT-DRAFT",
+        "# Draft for M002\n\nThis is the real milestone.",
+      );
+
+      const state = await deriveState(base);
+
+      // M001 is a ghost so it is skipped; M002 becomes the active milestone
+      assert.equal(
+        state.activeMilestone?.id,
+        "M002",
+        "ghost M001 is skipped; M002 is the active milestone",
+      );
+      assert.equal(
+        state.phase,
+        "needs-discussion",
+        "phase is needs-discussion because M002 has only CONTEXT-DRAFT",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Bonus: isGhostMilestone returns true for fully empty directory ───────
+  test("isGhostMilestone returns true for milestone directory with no files", () => {
+    const base = createFixtureBase();
+    try {
+      mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+      // No files at all in the directory
+      assert.ok(
+        isGhostMilestone(base, "M001"),
+        "milestone directory with no files is a ghost",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Bonus: isGhostMilestone returns false when ROADMAP exists ────────────
+  test("isGhostMilestone returns false when ROADMAP exists", () => {
+    const base = createFixtureBase();
+    try {
+      writeMilestoneFile(base, "M001", "ROADMAP", "# M001\n\n## Slices\n\n- [ ] **S01: First** `risk:low` `depends:[]`\n  > After this: done.\n");
+      assert.ok(
+        !isGhostMilestone(base, "M001"),
+        "milestone with ROADMAP is not a ghost",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Bonus: isGhostMilestone returns false when CONTEXT-DRAFT exists ──────
+  test("isGhostMilestone returns false when CONTEXT-DRAFT exists", () => {
+    const base = createFixtureBase();
+    try {
+      writeMilestoneFile(base, "M001", "CONTEXT-DRAFT", "# Draft\n\nSeed material.");
+      assert.ok(
+        !isGhostMilestone(base, "M001"),
+        "milestone with CONTEXT-DRAFT is not a ghost",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+  // ─── Bonus: multiple ghost milestones before a real one are all skipped ───
+  test("deriveState skips multiple ghost milestones to find the first real one", async () => {
+    const base = createFixtureBase();
+    try {
+      // M001 and M002: ghosts
+      mkdirSync(join(base, ".gsd", "milestones", "M001"), { recursive: true });
+      mkdirSync(join(base, ".gsd", "milestones", "M002"), { recursive: true });
+
+      // M003: has CONTEXT-DRAFT — first real milestone
+      writeMilestoneFile(base, "M003", "CONTEXT-DRAFT", "# M003 Draft\n\nFirst substantive milestone.");
+
+      const state = await deriveState(base);
+
+      assert.equal(
+        state.activeMilestone?.id,
+        "M003",
+        "both ghost milestones skipped; M003 is active",
+      );
+      assert.equal(
+        state.phase,
+        "needs-discussion",
+        "phase is needs-discussion for M003 with CONTEXT-DRAFT",
+      );
+    } finally {
+      cleanup(base);
+    }
+  });
+
+});
--- a/src/resources/extensions/gsd/tests/stuck-detection-coverage.test.ts
+++ b/src/resources/extensions/gsd/tests/stuck-detection-coverage.test.ts
@ -0,0 +1,174 @@
+// GSD State Machine Regression Tests — Stuck Detection Coverage (#3161)
+
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import { detectStuck } from "../auto/detect-stuck.ts";
+
+// ─── Baseline: window too small ──────────────────────────────────────────────
+
+test("returns null for empty window", () => {
+  assert.equal(detectStuck([]), null);
+});
+
+test("returns null for single entry", () => {
+  assert.equal(detectStuck([{ key: "A" }]), null);
+});
+
+test("returns null for two different entries without errors", () => {
+  assert.equal(detectStuck([{ key: "A" }, { key: "B" }]), null);
+});
+
+// ─── Rule 1: Same error repeated consecutively ───────────────────────────────
+
+test("Rule 1: same error twice consecutively triggers stuck", () => {
+  const result = detectStuck([
+    { key: "A", error: "ENOENT: no such file" },
+    { key: "A", error: "ENOENT: no such file" },
+  ]);
+  assert.notEqual(result, null);
+  assert.equal(result!.stuck, true);
+  assert.ok(result!.reason.includes("Same error"), `reason was: ${result!.reason}`);
+});
+
+test("Rule 1: different errors do not trigger stuck", () => {
+  // Only 2 entries with different errors — Rule 2 needs 3 entries, so null.
+  const result = detectStuck([
+    { key: "A", error: "err1" },
+    { key: "A", error: "err2" },
+  ]);
+  assert.equal(result, null);
+});
+
+test("Rule 1: only last two entries matter for error check", () => {
+  // First two share an error, but the last two have distinct errors — no trigger.
+  const result = detectStuck([
+    { key: "A", error: "same-error" },
+    { key: "A", error: "same-error" },
+    { key: "B", error: "different-error-1" },
+    { key: "C", error: "different-error-2" },
+  ]);
+  assert.equal(result, null);
+});
+
+// ─── Rule 2: Same unit key 3+ consecutive times ───────────────────────────────
+
+test("Rule 2: same unit key 3 consecutive times triggers stuck", () => {
+  const result = detectStuck([
+    { key: "A" },
+    { key: "A" },
+    { key: "A" },
+  ]);
+  assert.notEqual(result, null);
+  assert.equal(result!.stuck, true);
+  assert.ok(
+    result!.reason.includes("3 consecutive times"),
+    `reason was: ${result!.reason}`,
+  );
+});
+
+test("Rule 2: same key twice is not enough", () => {
+  assert.equal(detectStuck([{ key: "A" }, { key: "A" }]), null);
+});
+
+test("Rule 2: interrupted sequence does not trigger", () => {
+  // A, B, A — last three are not all the same key.
+  assert.equal(
+    detectStuck([{ key: "A" }, { key: "B" }, { key: "A" }]),
+    null,
+  );
+});
+
+// ─── Rule 3: Oscillation A→B→A→B ─────────────────────────────────────────────
+
+test("Rule 3: A-B-A-B oscillation triggers stuck", () => {
+  const result = detectStuck([
+    { key: "A" },
+    { key: "B" },
+    { key: "A" },
+    { key: "B" },
+  ]);
+  assert.notEqual(result, null);
+  assert.equal(result!.stuck, true);
+  assert.ok(
+    result!.reason.includes("Oscillation"),
+    `reason was: ${result!.reason}`,
+  );
+});
+
+test("Rule 3: A-B-A-C does not trigger oscillation", () => {
+  assert.equal(
+    detectStuck([{ key: "A" }, { key: "B" }, { key: "A" }, { key: "C" }]),
+    null,
+  );
+});
+
+test("Rule 3: A-A-A-A triggers Rule 2 not Rule 3", () => {
+  // Rule 2 fires first (last 3 are all the same key).
+  const result = detectStuck([
+    { key: "A" },
+    { key: "A" },
+    { key: "A" },
+    { key: "A" },
+  ]);
+  assert.notEqual(result, null);
+  assert.equal(result!.stuck, true);
+  assert.ok(
+    result!.reason.includes("3 consecutive times"),
+    `expected Rule 2 reason but got: ${result!.reason}`,
+  );
+  assert.ok(
+    !result!.reason.includes("Oscillation"),
+    `unexpectedly matched Rule 3: ${result!.reason}`,
+  );
+});
+
+// ─── Gap documentation: 3-unit cycle evades detection ────────────────────────
+
+test("Three-unit cycle A-B-C-A-B-C does NOT trigger stuck (documents gap L13)", () => {
+  // None of the three rules fires for a 3-unit repeating cycle.
+  // This test intentionally documents the coverage gap where such cycles
+  // slip through undetected (#3161).
+  const result = detectStuck([
+    { key: "A" },
+    { key: "B" },
+    { key: "C" },
+    { key: "A" },
+    { key: "B" },
+    { key: "C" },
+  ]);
+  assert.equal(result, null);
+});
+
+// ─── Window boundary: earlier patterns do not contaminate recent check ─────────
+
+test("window bounded: detection uses last N entries correctly", () => {
+  // The first three entries would trigger Rule 2, but the last entries are
+  // healthy — only the tail matters.
+  const result = detectStuck([
+    { key: "X" },
+    { key: "X" },
+    { key: "X" }, // would be stuck if this were the end
+    { key: "A" },
+    { key: "B" }, // last two: different keys, no error
+  ]);
+  assert.equal(result, null);
+});
+
+// ─── Rule priority: Rule 1 before Rule 2 ─────────────────────────────────────
+
+test("Rule 1 takes priority over Rule 2 when both match", () => {
+  // Last 3 entries share the same key (Rule 2 candidate) AND last 2 share
+  // the same error (Rule 1 candidate). Rule 1 is evaluated first.
+  const result = detectStuck([
+    { key: "A", error: "boom" },
+    { key: "A", error: "boom" },
+    { key: "A", error: "boom" },
+  ]);
+  assert.notEqual(result, null);
+  assert.equal(result!.stuck, true);
+  assert.ok(
+    result!.reason.includes("Same error"),
+    `expected Rule 1 reason but got: ${result!.reason}`,
+  );
+});