test(gsd): add pause wiring and integration tests for enhanced verification

- pre-execution-pause-wiring.test.ts: Tests blocking check → pause control flow - enhanced-verification-integration.test.ts: End-to-end integration coverage Verifies that blocking pre-execution failures trigger auto-mode pause and that the enhanced verification pipeline integrates correctly with existing verification infrastructure.
2026-04-03 16:18:58 -04:00 · 2026-04-03 16:18:58 -04:00 · 9711ac3efa
commit 9711ac3efa
parent 8f2c544a91
4 changed files with 988 additions and 9 deletions
--- a/package-lock.json
+++ b/package-lock.json
@ -1,12 +1,12 @@
 {
  "name": "gsd-pi",
-  "version": "2.56.0",
+  "version": "2.58.0",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "gsd-pi",
-      "version": "2.56.0",
+      "version": "2.58.0",
      "hasInstallScript": true,
      "license": "MIT",
      "workspaces": [
@ -9534,7 +9534,7 @@
    },
    "packages/pi-coding-agent": {
      "name": "@gsd/pi-coding-agent",
-      "version": "2.56.0",
+      "version": "2.58.0",
      "dependencies": {
        "@mariozechner/jiti": "^2.6.2",
        "@silvia-odwyer/photon-node": "^0.3.4",
--- a/src/resources/extensions/gsd/auto-verification.ts
+++ b/src/resources/extensions/gsd/auto-verification.ts
@ -266,9 +266,7 @@ export async function runPostUnitVerification(
          }
        } catch (postExecErr) {
          // Post-execution check errors are non-fatal — log and continue
-          process.stderr.write(
-            `gsd-post-exec: error — ${(postExecErr as Error).message}\n`
-          );
+          logWarning("engine", `gsd-post-exec: error — ${(postExecErr as Error).message}`);
        }
      }
    }
@ -297,9 +295,7 @@ export async function runPostUnitVerification(
          );
        }
      } catch (evidenceErr) {
-        process.stderr.write(
-          `verification-evidence: post-exec write error — ${(evidenceErr as Error).message}\n`
-        );
+        logWarning("engine", `verification-evidence: post-exec write error — ${(evidenceErr as Error).message}`);
      }
    }

--- a/src/resources/extensions/gsd/tests/enhanced-verification-integration.test.ts
+++ b/src/resources/extensions/gsd/tests/enhanced-verification-integration.test.ts
@ -0,0 +1,526 @@
+/**
+ * enhanced-verification-integration.test.ts — Integration tests for enhanced verification.
+ *
+ * Exercises all 7 enhanced verification checks against GSD-2's actual source files.
+ * This proves:
+ *   - R012: No false positives on production code
+ *   - R013: Speed targets met (<2000ms pre-execution, <1000ms post-execution per task)
+ *
+ * The test constructs realistic TaskRow fixtures that reference real GSD source files,
+ * then runs both pre-execution and post-execution checks against them.
+ */
+
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { existsSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { fileURLToPath } from "node:url";
+
+import {
+  runPreExecutionChecks,
+  type PreExecutionResult,
+} from "../pre-execution-checks.ts";
+import {
+  runPostExecutionChecks,
+  type PostExecutionResult,
+} from "../post-execution-checks.ts";
+import type { TaskRow } from "../gsd-db.ts";
+
+// ─── Constants ───────────────────────────────────────────────────────────────
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+// Path to the GSD extension source directory (relative to test file)
+const GSD_SRC_DIR = join(__dirname, "..");
+
+// Speed targets from R013
+const PRE_EXECUTION_TIMEOUT_MS = 2000;
+const POST_EXECUTION_TIMEOUT_MS = 1000;
+
+// ─── Test Fixtures ───────────────────────────────────────────────────────────
+
+/**
+ * Create a minimal TaskRow for testing.
+ */
+function createTask(overrides: Partial<TaskRow> = {}): TaskRow {
+  return {
+    milestone_id: "M001",
+    slice_id: "S01",
+    id: overrides.id ?? "T01",
+    title: overrides.title ?? "Test Task",
+    status: overrides.status ?? "pending",
+    one_liner: "",
+    narrative: "",
+    verification_result: "",
+    duration: "",
+    completed_at: overrides.status === "complete" ? new Date().toISOString() : null,
+    blocker_discovered: false,
+    deviations: "",
+    known_issues: "",
+    key_files: overrides.key_files ?? [],
+    key_decisions: [],
+    full_summary_md: "",
+    description: overrides.description ?? "",
+    estimate: "",
+    files: overrides.files ?? [],
+    verify: "",
+    inputs: overrides.inputs ?? [],
+    expected_output: overrides.expected_output ?? [],
+    observability_impact: "",
+    full_plan_md: "",
+    sequence: overrides.sequence ?? 0,
+    ...overrides,
+  };
+}
+
+// ─── Real GSD Source Files for Testing ───────────────────────────────────────
+
+// These are actual GSD extension source files that exist in the codebase
+const REAL_GSD_FILES = [
+  "gsd-db.ts",
+  "auto-verification.ts",
+  "pre-execution-checks.ts",
+  "post-execution-checks.ts",
+  "state.ts",
+  "errors.ts",
+  "types.ts",
+  "cache.ts",
+  "atomic-write.ts",
+];
+
+// Verify the test fixture files actually exist
+function verifyTestFixturesExist(): void {
+  for (const file of REAL_GSD_FILES) {
+    const fullPath = join(GSD_SRC_DIR, file);
+    if (!existsSync(fullPath)) {
+      throw new Error(`Test fixture file does not exist: ${fullPath}`);
+    }
+  }
+}
+
+// ─── Integration Tests ───────────────────────────────────────────────────────
+
+describe("Enhanced Verification Integration Tests", () => {
+  // Verify fixtures before running tests
+  test("test fixture files exist", () => {
+    verifyTestFixturesExist();
+  });
+
+  describe("Pre-Execution Checks on Real GSD Code", () => {
+    test("runs pre-execution checks on realistic tasks referencing real files", async () => {
+      // Simulate tasks that reference real GSD source files
+      const tasks: TaskRow[] = [
+        createTask({
+          id: "T01",
+          sequence: 0,
+          title: "Add validation to gsd-db",
+          description: `
+## Steps
+1. Update src/resources/extensions/gsd/gsd-db.ts to add validation
+2. Read from src/resources/extensions/gsd/types.ts for type definitions
+3. Update src/resources/extensions/gsd/errors.ts with new error types
+4. Run tests to verify changes
+          `.trim(),
+          files: REAL_GSD_FILES.slice(0, 4).map((f) => join(GSD_SRC_DIR, f)),
+          inputs: [
+            join(GSD_SRC_DIR, "types.ts"),
+            join(GSD_SRC_DIR, "errors.ts"),
+          ],
+          expected_output: [
+            join(GSD_SRC_DIR, "gsd-db.ts"),
+          ],
+        }),
+      ];
+
+      const start = performance.now();
+      const result = await runPreExecutionChecks(tasks, GSD_SRC_DIR);
+      const duration = performance.now() - start;
+
+      // R012: No blocking failures (false positives) on production code
+      const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        blockingFailures.length,
+        0,
+        `Expected zero blocking failures, got: ${JSON.stringify(blockingFailures, null, 2)}`
+      );
+
+      // Overall status should not be fail
+      assert.notEqual(result.status, "fail", "Pre-execution checks should not fail on real GSD code");
+
+      // R013: Speed target met
+      assert.ok(
+        duration < PRE_EXECUTION_TIMEOUT_MS,
+        `Pre-execution checks took ${duration.toFixed(0)}ms, expected <${PRE_EXECUTION_TIMEOUT_MS}ms`
+      );
+    });
+
+    test("handles task with code block references to real packages", async () => {
+      // Task description with realistic code blocks using actual Node.js built-ins
+      const tasks: TaskRow[] = [
+        createTask({
+          id: "T01",
+          sequence: 0,
+          title: "Implement file watcher",
+          description: `
+## Implementation
+
+\`\`\`typescript
+import { readFileSync, writeFileSync } from "node:fs";
+import { join, dirname } from "node:path";
+import { existsSync } from "node:fs";
+
+// Use existing GSD types
+import type { TaskRow } from "./gsd-db.ts";
+\`\`\`
+
+Update the file watcher to use these imports.
+          `.trim(),
+          files: [join(GSD_SRC_DIR, "auto-verification.ts")],
+        }),
+      ];
+
+      const start = performance.now();
+      const result = await runPreExecutionChecks(tasks, GSD_SRC_DIR);
+      const duration = performance.now() - start;
+
+      // No blocking failures
+      const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        blockingFailures.length,
+        0,
+        `Unexpected blocking failures: ${JSON.stringify(blockingFailures, null, 2)}`
+      );
+
+      // Speed target met
+      assert.ok(
+        duration < PRE_EXECUTION_TIMEOUT_MS,
+        `Pre-execution checks took ${duration.toFixed(0)}ms, expected <${PRE_EXECUTION_TIMEOUT_MS}ms`
+      );
+    });
+
+    test("handles multi-task sequence with file dependencies", async () => {
+      // Simulate a realistic task sequence where T02 depends on T01's output
+      const tasks: TaskRow[] = [
+        createTask({
+          id: "T01",
+          sequence: 0,
+          title: "Create types file",
+          status: "complete",
+          expected_output: [join(GSD_SRC_DIR, "types.ts")],
+        }),
+        createTask({
+          id: "T02",
+          sequence: 1,
+          title: "Use types in implementation",
+          description: `
+Read the types from src/resources/extensions/gsd/types.ts and use them.
+          `.trim(),
+          inputs: [join(GSD_SRC_DIR, "types.ts")],
+          files: [join(GSD_SRC_DIR, "gsd-db.ts")],
+        }),
+      ];
+
+      const start = performance.now();
+      const result = await runPreExecutionChecks(tasks, GSD_SRC_DIR);
+      const duration = performance.now() - start;
+
+      // No blocking failures
+      const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        blockingFailures.length,
+        0,
+        `Unexpected blocking failures: ${JSON.stringify(blockingFailures, null, 2)}`
+      );
+
+      // Speed target met
+      assert.ok(
+        duration < PRE_EXECUTION_TIMEOUT_MS,
+        `Pre-execution checks took ${duration.toFixed(0)}ms, expected <${PRE_EXECUTION_TIMEOUT_MS}ms`
+      );
+    });
+  });
+
+  describe("Post-Execution Checks on Real GSD Code", () => {
+    test("runs post-execution checks on real GSD source files", () => {
+      // Simulate a completed task that modified real files
+      const completedTask = createTask({
+        id: "T01",
+        title: "Update gsd-db validation",
+        status: "complete",
+        key_files: [
+          join(GSD_SRC_DIR, "gsd-db.ts"),
+          join(GSD_SRC_DIR, "types.ts"),
+        ],
+      });
+
+      const start = performance.now();
+      const result = runPostExecutionChecks(completedTask, [], GSD_SRC_DIR);
+      const duration = performance.now() - start;
+
+      // R012: No blocking failures (false positives) on production code
+      const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        blockingFailures.length,
+        0,
+        `Expected zero blocking failures, got: ${JSON.stringify(blockingFailures, null, 2)}`
+      );
+
+      // Overall status should not be fail
+      assert.notEqual(result.status, "fail", "Post-execution checks should not fail on real GSD code");
+
+      // R013: Speed target met
+      assert.ok(
+        duration < POST_EXECUTION_TIMEOUT_MS,
+        `Post-execution checks took ${duration.toFixed(0)}ms, expected <${POST_EXECUTION_TIMEOUT_MS}ms`
+      );
+    });
+
+    test("analyzes imports in real TypeScript files", () => {
+      // Use auto-verification.ts which imports from multiple other GSD files
+      const completedTask = createTask({
+        id: "T02",
+        title: "Verify auto-verification imports",
+        status: "complete",
+        key_files: [join(GSD_SRC_DIR, "auto-verification.ts")],
+      });
+
+      const start = performance.now();
+      const result = runPostExecutionChecks(completedTask, [], GSD_SRC_DIR);
+      const duration = performance.now() - start;
+
+      // No blocking failures
+      const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        blockingFailures.length,
+        0,
+        `Unexpected blocking failures: ${JSON.stringify(blockingFailures, null, 2)}`
+      );
+
+      // Speed target met
+      assert.ok(
+        duration < POST_EXECUTION_TIMEOUT_MS,
+        `Post-execution checks took ${duration.toFixed(0)}ms, expected <${POST_EXECUTION_TIMEOUT_MS}ms`
+      );
+    });
+
+    test("handles multi-file task with cross-file dependencies", () => {
+      // Task that touched multiple related files
+      const completedTask = createTask({
+        id: "T03",
+        title: "Refactor state management",
+        status: "complete",
+        key_files: [
+          join(GSD_SRC_DIR, "state.ts"),
+          join(GSD_SRC_DIR, "gsd-db.ts"),
+          join(GSD_SRC_DIR, "cache.ts"),
+        ],
+      });
+
+      const start = performance.now();
+      const result = runPostExecutionChecks(completedTask, [], GSD_SRC_DIR);
+      const duration = performance.now() - start;
+
+      // No blocking failures
+      const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        blockingFailures.length,
+        0,
+        `Unexpected blocking failures: ${JSON.stringify(blockingFailures, null, 2)}`
+      );
+
+      // Speed target met
+      assert.ok(
+        duration < POST_EXECUTION_TIMEOUT_MS,
+        `Post-execution checks took ${duration.toFixed(0)}ms, expected <${POST_EXECUTION_TIMEOUT_MS}ms`
+      );
+    });
+
+    test("handles task sequence with signature analysis", () => {
+      // Simulate checking for signature consistency across tasks
+      const priorTasks: TaskRow[] = [
+        createTask({
+          id: "T01",
+          sequence: 0,
+          title: "Define TaskRow interface",
+          status: "complete",
+          key_files: [join(GSD_SRC_DIR, "gsd-db.ts")],
+        }),
+      ];
+
+      const completedTask = createTask({
+        id: "T02",
+        sequence: 1,
+        title: "Use TaskRow in state module",
+        status: "complete",
+        key_files: [join(GSD_SRC_DIR, "state.ts")],
+      });
+
+      const start = performance.now();
+      const result = runPostExecutionChecks(completedTask, priorTasks, GSD_SRC_DIR);
+      const duration = performance.now() - start;
+
+      // No blocking failures
+      const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        blockingFailures.length,
+        0,
+        `Unexpected blocking failures: ${JSON.stringify(blockingFailures, null, 2)}`
+      );
+
+      // Speed target met
+      assert.ok(
+        duration < POST_EXECUTION_TIMEOUT_MS,
+        `Post-execution checks took ${duration.toFixed(0)}ms, expected <${POST_EXECUTION_TIMEOUT_MS}ms`
+      );
+    });
+  });
+
+  describe("Combined Pre and Post Execution Flow", () => {
+    test("full verification flow on realistic task lifecycle", async () => {
+      // Simulate a complete task lifecycle
+      const tasks: TaskRow[] = [
+        createTask({
+          id: "T01",
+          sequence: 0,
+          title: "Implement enhanced verification",
+          status: "pending",
+          description: `
+## Steps
+1. Update pre-execution-checks.ts with new validation
+2. Update post-execution-checks.ts with signature analysis
+3. Add integration tests
+
+\`\`\`typescript
+import { runPreExecutionChecks } from "./pre-execution-checks.ts";
+import { runPostExecutionChecks } from "./post-execution-checks.ts";
+\`\`\`
+          `.trim(),
+          files: [
+            join(GSD_SRC_DIR, "pre-execution-checks.ts"),
+            join(GSD_SRC_DIR, "post-execution-checks.ts"),
+          ],
+          inputs: [
+            join(GSD_SRC_DIR, "types.ts"),
+            join(GSD_SRC_DIR, "gsd-db.ts"),
+          ],
+          expected_output: [
+            join(GSD_SRC_DIR, "tests/enhanced-verification-integration.test.ts"),
+          ],
+        }),
+      ];
+
+      // Run pre-execution checks
+      const preStart = performance.now();
+      const preResult = await runPreExecutionChecks(tasks, GSD_SRC_DIR);
+      const preDuration = performance.now() - preStart;
+
+      // Verify pre-execution results
+      const preBlockingFailures = preResult.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        preBlockingFailures.length,
+        0,
+        `Pre-execution had blocking failures: ${JSON.stringify(preBlockingFailures, null, 2)}`
+      );
+      assert.ok(
+        preDuration < PRE_EXECUTION_TIMEOUT_MS,
+        `Pre-execution took ${preDuration.toFixed(0)}ms, expected <${PRE_EXECUTION_TIMEOUT_MS}ms`
+      );
+
+      // Task after execution (simulated completion)
+      const completedTask = createTask({
+        ...tasks[0],
+        status: "complete",
+        key_files: tasks[0].files,
+      });
+
+      // Run post-execution checks
+      const postStart = performance.now();
+      const postResult = runPostExecutionChecks(completedTask, [], GSD_SRC_DIR);
+      const postDuration = performance.now() - postStart;
+
+      // Verify post-execution results
+      const postBlockingFailures = postResult.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        postBlockingFailures.length,
+        0,
+        `Post-execution had blocking failures: ${JSON.stringify(postBlockingFailures, null, 2)}`
+      );
+      assert.ok(
+        postDuration < POST_EXECUTION_TIMEOUT_MS,
+        `Post-execution took ${postDuration.toFixed(0)}ms, expected <${POST_EXECUTION_TIMEOUT_MS}ms`
+      );
+    });
+
+    test("handles large number of files without timeout", () => {
+      // Use all available GSD source files to stress test
+      const allGsdFiles = REAL_GSD_FILES.map((f) => join(GSD_SRC_DIR, f));
+
+      const task = createTask({
+        id: "T01",
+        title: "Large refactor touching many files",
+        status: "complete",
+        key_files: allGsdFiles,
+        files: allGsdFiles,
+      });
+
+      const start = performance.now();
+      const result = runPostExecutionChecks(task, [], GSD_SRC_DIR);
+      const duration = performance.now() - start;
+
+      // No blocking failures
+      const blockingFailures = result.checks.filter((c) => !c.passed && c.blocking);
+      assert.equal(
+        blockingFailures.length,
+        0,
+        `Unexpected blocking failures: ${JSON.stringify(blockingFailures, null, 2)}`
+      );
+
+      // Should still be fast even with many files
+      // Allow slightly more time for multi-file analysis but still within target
+      assert.ok(
+        duration < POST_EXECUTION_TIMEOUT_MS * 2, // Allow 2x for stress test
+        `Multi-file post-execution took ${duration.toFixed(0)}ms, expected <${POST_EXECUTION_TIMEOUT_MS * 2}ms`
+      );
+    });
+  });
+
+  describe("Warning Quality", () => {
+    test("warnings on real code are actionable, not spurious", () => {
+      // Run checks on well-formed production code
+      const task = createTask({
+        id: "T01",
+        title: "Review code quality",
+        status: "complete",
+        key_files: [
+          join(GSD_SRC_DIR, "pre-execution-checks.ts"),
+          join(GSD_SRC_DIR, "post-execution-checks.ts"),
+        ],
+      });
+
+      const result = runPostExecutionChecks(task, [], GSD_SRC_DIR);
+
+      // Extract warnings (either non-passed non-blocking, or passed with warning messages)
+      const warnings = result.checks.filter(
+        (c) => (!c.passed && !c.blocking) || (c.passed && c.message?.startsWith("Warning:"))
+      );
+
+      // Warnings are acceptable but should be few on well-maintained code
+      // If we get many warnings, it suggests the checks are too aggressive
+      assert.ok(
+        warnings.length <= 10,
+        `Too many warnings (${warnings.length}) suggests overly aggressive checks: ${JSON.stringify(warnings, null, 2)}`
+      );
+
+      // Each warning should have a clear message
+      for (const warning of warnings) {
+        assert.ok(warning.category, "Warning missing category");
+        assert.ok(warning.message, "Warning missing message");
+        assert.ok(
+          warning.message.length > 10,
+          `Warning message too short to be actionable: "${warning.message}"`
+        );
+      }
+    });
+  });
+});
--- a/src/resources/extensions/gsd/tests/pre-execution-pause-wiring.test.ts
+++ b/src/resources/extensions/gsd/tests/pre-execution-pause-wiring.test.ts
@ -0,0 +1,457 @@
+/**
+ * pre-execution-pause-wiring.test.ts — Integration tests for pre-execution check → pauseAuto wiring.
+ *
+ * Tests that verify the control flow from pre-execution checks through to pauseAuto:
+ *   1. When runPreExecutionChecks returns status: "fail" with blocking: true, pauseAuto is called
+ *   2. When enhanced_verification_strict: true and status: "warn", pauseAuto is also called
+ *
+ * These are integration-level tests that exercise the actual postUnitPostVerification function
+ * with controlled mocks for external dependencies.
+ */
+
+import { describe, test, mock, beforeEach, afterEach } from "node:test";
+import assert from "node:assert/strict";
+import { tmpdir } from "node:os";
+import { mkdirSync, writeFileSync, rmSync } from "node:fs";
+import { join } from "node:path";
+
+import { postUnitPostVerification, type PostUnitContext } from "../auto-post-unit.ts";
+import { AutoSession } from "../auto/session.ts";
+import { openDatabase, closeDatabase, insertMilestone, insertSlice, insertTask } from "../gsd-db.ts";
+import { invalidateAllCaches } from "../cache.ts";
+import { _clearGsdRootCache } from "../paths.ts";
+
+// ─── Test Fixtures ───────────────────────────────────────────────────────────
+
+let tempDir: string;
+let dbPath: string;
+let originalCwd: string;
+
+/**
+ * Create a minimal mock ExtensionContext.
+ */
+function makeMockCtx() {
+  return {
+    ui: {
+      notify: mock.fn(),
+      setStatus: () => {},
+      setWidget: () => {},
+      setFooter: () => {},
+    },
+    model: { id: "test-model" },
+  } as any;
+}
+
+/**
+ * Create a minimal mock ExtensionAPI.
+ */
+function makeMockPi() {
+  return {
+    sendMessage: mock.fn(),
+    setModel: mock.fn(async () => true),
+  } as any;
+}
+
+/**
+ * Create a minimal AutoSession for testing.
+ */
+function makeMockSession(basePath: string, currentUnit?: { type: string; id: string }): AutoSession {
+  const s = new AutoSession();
+  s.basePath = basePath;
+  s.active = true;
+  if (currentUnit) {
+    s.currentUnit = {
+      type: currentUnit.type,
+      id: currentUnit.id,
+      startedAt: Date.now(),
+    };
+  }
+  return s;
+}
+
+/**
+ * Create a PostUnitContext with a mockable pauseAuto.
+ */
+function makePostUnitContext(
+  s: AutoSession,
+  ctx: ReturnType<typeof makeMockCtx>,
+  pi: ReturnType<typeof makeMockPi>,
+  pauseAutoMock: ReturnType<typeof mock.fn>,
+): PostUnitContext {
+  return {
+    s,
+    ctx,
+    pi,
+    buildSnapshotOpts: () => ({}),
+    lockBase: () => tempDir,
+    stopAuto: mock.fn(async () => {}) as unknown as PostUnitContext["stopAuto"],
+    pauseAuto: pauseAutoMock as unknown as PostUnitContext["pauseAuto"],
+    updateProgressWidget: () => {},
+  };
+}
+
+/**
+ * Set up a temp directory with GSD structure and DB.
+ * Also changes cwd so preferences loading finds the right PREFERENCES.md.
+ */
+function setupTestEnvironment(): void {
+  // Save original cwd so we can restore it
+  originalCwd = process.cwd();
+  
+  tempDir = join(tmpdir(), `pre-exec-pause-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+  mkdirSync(tempDir, { recursive: true });
+  
+  // Create .gsd directory structure
+  const gsdDir = join(tempDir, ".gsd");
+  mkdirSync(gsdDir, { recursive: true });
+  
+  // Create milestones directory structure
+  const milestonesDir = join(gsdDir, "milestones", "M001", "slices", "S01", "tasks");
+  mkdirSync(milestonesDir, { recursive: true });
+  
+  // Change cwd so loadEffectiveGSDPreferences finds our PREFERENCES.md
+  process.chdir(tempDir);
+  
+  // Clear gsdRoot cache so it finds the new .gsd directory
+  _clearGsdRootCache();
+  
+  // Initialize DB
+  dbPath = join(gsdDir, "gsd.db");
+  openDatabase(dbPath);
+}
+
+/**
+ * Clean up test environment.
+ */
+function cleanupTestEnvironment(): void {
+  // Restore original cwd before cleanup
+  try {
+    process.chdir(originalCwd);
+  } catch {
+    // Ignore if original cwd doesn't exist
+  }
+  
+  try {
+    closeDatabase();
+  } catch {
+    // Ignore close errors
+  }
+  try {
+    rmSync(tempDir, { recursive: true, force: true });
+  } catch {
+    // Ignore cleanup errors
+  }
+}
+
+/**
+ * Create a PREFERENCES.md file with specified preferences.
+ * Uses YAML frontmatter format (---\nkey: value\n---).
+ * Also invalidates caches so the preferences are re-read.
+ */
+function writePreferences(prefs: Record<string, unknown>): void {
+  const yamlLines = Object.entries(prefs).map(([k, v]) => `${k}: ${JSON.stringify(v)}`);
+  const prefsContent = `---
+${yamlLines.join("\n")}
+---
+
+# GSD Preferences
+`;
+  writeFileSync(join(tempDir, ".gsd", "PREFERENCES.md"), prefsContent);
+  // Invalidate caches so the new preferences file is found
+  invalidateAllCaches();
+  _clearGsdRootCache();
+}
+
+/**
+ * Create tasks in DB that will cause pre-execution checks to fail.
+ * A task that references a non-existent file will produce a blocking failure.
+ */
+function createFailingTasks(): void {
+  // Insert milestone first
+  insertMilestone({ id: "M001" });
+
+  // Insert slice
+  insertSlice({
+    id: "S01",
+    milestoneId: "M001",
+    title: "Test Slice",
+    risk: "low",
+  });
+
+  // Create a task that references a file that doesn't exist
+  // This will cause checkFilePathConsistency to produce a blocking failure
+  insertTask({
+    id: "T01",
+    sliceId: "S01",
+    milestoneId: "M001",
+    title: "Task with missing file",
+    status: "pending",
+    planning: {
+      description: "This task references a non-existent file",
+      estimate: "1h",
+      files: ["nonexistent-file-that-does-not-exist.ts"],
+      verify: "npm test",
+      inputs: [],
+      expectedOutput: [],
+      observabilityImpact: "",
+    },
+    sequence: 0,
+  });
+}
+
+/**
+ * Create tasks in DB that will produce only warnings (non-blocking issues).
+ * Interface contract mismatches produce warnings, not blocking failures.
+ */
+function createWarningOnlyTasks(): void {
+  // Insert milestone first
+  insertMilestone({ id: "M001" });
+
+  // Insert slice
+  insertSlice({
+    id: "S01",
+    milestoneId: "M001",
+    title: "Test Slice",
+    risk: "low",
+  });
+
+  // Create tasks with interface contract mismatch (produces warn, not fail)
+  insertTask({
+    id: "T01",
+    sliceId: "S01",
+    milestoneId: "M001",
+    title: "Task 1 with function signature",
+    status: "pending",
+    planning: {
+      description: `
+\`\`\`typescript
+function processData(input: string): boolean
+\`\`\`
+      `.trim(),
+      estimate: "1h",
+      files: [],
+      verify: "npm test",
+      inputs: [],
+      expectedOutput: [],
+      observabilityImpact: "",
+    },
+    sequence: 0,
+  });
+
+  insertTask({
+    id: "T02",
+    sliceId: "S01",
+    milestoneId: "M001",
+    title: "Task 2 with mismatched signature",
+    status: "pending",
+    planning: {
+      description: `
+\`\`\`typescript
+function processData(input: number): string
+\`\`\`
+      `.trim(),
+      estimate: "1h",
+      files: [],
+      verify: "npm test",
+      inputs: [],
+      expectedOutput: [],
+      observabilityImpact: "",
+    },
+    sequence: 1,
+  });
+}
+
+// ─── Tests ───────────────────────────────────────────────────────────────────
+
+describe("Pre-execution checks → pauseAuto wiring", () => {
+  beforeEach(() => {
+    setupTestEnvironment();
+  });
+
+  afterEach(() => {
+    cleanupTestEnvironment();
+  });
+
+  test("pauseAuto is called when pre-execution checks return status: fail with blocking: true", async () => {
+    // Set up tasks that will cause a blocking failure
+    createFailingTasks();
+
+    // Create mocks
+    const ctx = makeMockCtx();
+    const pi = makeMockPi();
+    const pauseAutoMock = mock.fn(async () => {});
+    const s = makeMockSession(tempDir, { type: "plan-slice", id: "M001/S01" });
+    const pctx = makePostUnitContext(s, ctx, pi, pauseAutoMock);
+
+    // Call postUnitPostVerification
+    const result = await postUnitPostVerification(pctx);
+
+    // Verify pauseAuto was called
+    assert.equal(
+      pauseAutoMock.mock.callCount(),
+      1,
+      "pauseAuto should be called exactly once when pre-execution checks fail with blocking issues"
+    );
+
+    // Verify return value is "stopped"
+    assert.equal(
+      result,
+      "stopped",
+      "postUnitPostVerification should return 'stopped' when pre-execution checks fail"
+    );
+
+    // Verify UI was notified of the failure
+    const notifyCalls = ctx.ui.notify.mock.calls;
+    const errorNotify = notifyCalls.find(
+      (call: { arguments: unknown[] }) =>
+        call.arguments[1] === "error" &&
+        String(call.arguments[0]).includes("Pre-execution checks failed")
+    );
+    assert.ok(errorNotify, "Should show error notification about pre-execution check failure");
+  });
+
+  test("pauseAuto is called when enhanced_verification_strict: true and pre-execution returns warn", async () => {
+    // Write preferences with strict mode enabled
+    writePreferences({
+      enhanced_verification: true,
+      enhanced_verification_pre: true,
+      enhanced_verification_strict: true,
+    });
+
+    // Set up tasks that will produce only warnings (interface contract mismatch)
+    createWarningOnlyTasks();
+
+    // Create mocks
+    const ctx = makeMockCtx();
+    const pi = makeMockPi();
+    const pauseAutoMock = mock.fn(async () => {});
+    const s = makeMockSession(tempDir, { type: "plan-slice", id: "M001/S01" });
+    const pctx = makePostUnitContext(s, ctx, pi, pauseAutoMock);
+
+    // Call postUnitPostVerification
+    const result = await postUnitPostVerification(pctx);
+
+    // Verify pauseAuto was called (strict mode promotes warnings to blocking)
+    assert.equal(
+      pauseAutoMock.mock.callCount(),
+      1,
+      "pauseAuto should be called when strict mode is enabled and pre-execution returns warn"
+    );
+
+    // Verify return value is "stopped"
+    assert.equal(
+      result,
+      "stopped",
+      "postUnitPostVerification should return 'stopped' when strict mode treats warnings as blocking"
+    );
+
+    // Verify UI was notified of the warning
+    const notifyCalls = ctx.ui.notify.mock.calls;
+    const warnNotify = notifyCalls.find(
+      (call: { arguments: unknown[] }) =>
+        call.arguments[1] === "warning" &&
+        String(call.arguments[0]).includes("Pre-execution checks passed with warnings")
+    );
+    assert.ok(warnNotify, "Should show warning notification about pre-execution check warnings");
+  });
+
+  test("pauseAuto is NOT called when enhanced_verification_strict: false and pre-execution returns warn", async () => {
+    // Write preferences with strict mode disabled (default behavior)
+    writePreferences({
+      enhanced_verification: true,
+      enhanced_verification_pre: true,
+      enhanced_verification_strict: false,
+    });
+
+    // Set up tasks that will produce only warnings
+    createWarningOnlyTasks();
+
+    // Create mocks
+    const ctx = makeMockCtx();
+    const pi = makeMockPi();
+    const pauseAutoMock = mock.fn(async () => {});
+    const s = makeMockSession(tempDir, { type: "plan-slice", id: "M001/S01" });
+    const pctx = makePostUnitContext(s, ctx, pi, pauseAutoMock);
+
+    // Call postUnitPostVerification
+    const result = await postUnitPostVerification(pctx);
+
+    // Verify pauseAuto was NOT called (warnings don't block in non-strict mode)
+    assert.equal(
+      pauseAutoMock.mock.callCount(),
+      0,
+      "pauseAuto should NOT be called when strict mode is disabled and only warnings exist"
+    );
+
+    // Verify return value is "continue" (not "stopped")
+    assert.equal(
+      result,
+      "continue",
+      "postUnitPostVerification should return 'continue' when warnings don't block in non-strict mode"
+    );
+  });
+
+  test("pre-execution checks are skipped when unit type is not plan-slice", async () => {
+    // Set up tasks that would fail if checked
+    createFailingTasks();
+
+    // Create mocks with execute-task unit (not plan-slice)
+    const ctx = makeMockCtx();
+    const pi = makeMockPi();
+    const pauseAutoMock = mock.fn(async () => {});
+    const s = makeMockSession(tempDir, { type: "execute-task", id: "M001/S01/T01" });
+    const pctx = makePostUnitContext(s, ctx, pi, pauseAutoMock);
+
+    // Call postUnitPostVerification
+    const result = await postUnitPostVerification(pctx);
+
+    // Verify pauseAuto was NOT called (pre-execution checks only run for plan-slice)
+    assert.equal(
+      pauseAutoMock.mock.callCount(),
+      0,
+      "pauseAuto should NOT be called for non-plan-slice unit types"
+    );
+
+    // Verify return value is "continue"
+    assert.equal(
+      result,
+      "continue",
+      "postUnitPostVerification should return 'continue' for non-plan-slice unit types"
+    );
+  });
+
+  test("pre-execution checks are skipped when enhanced_verification_pre: false", async () => {
+    // Write preferences with pre-execution checks disabled
+    writePreferences({
+      enhanced_verification: true,
+      enhanced_verification_pre: false,
+    });
+
+    // Set up tasks that would fail if checked
+    createFailingTasks();
+
+    // Create mocks
+    const ctx = makeMockCtx();
+    const pi = makeMockPi();
+    const pauseAutoMock = mock.fn(async () => {});
+    const s = makeMockSession(tempDir, { type: "plan-slice", id: "M001/S01" });
+    const pctx = makePostUnitContext(s, ctx, pi, pauseAutoMock);
+
+    // Call postUnitPostVerification
+    const result = await postUnitPostVerification(pctx);
+
+    // Verify pauseAuto was NOT called (pre-execution checks disabled)
+    assert.equal(
+      pauseAutoMock.mock.callCount(),
+      0,
+      "pauseAuto should NOT be called when enhanced_verification_pre is disabled"
+    );
+
+    // Verify return value is "continue"
+    assert.equal(
+      result,
+      "continue",
+      "postUnitPostVerification should return 'continue' when pre-execution checks are disabled"
+    );
+  });
+});