Merge pull request #3546 from jeremymcs/worktree-issue-3541-ollama-native

fix(gsd): prevent LLM from querying gsd.db directly via bash
2026-04-05 10:51:01 -05:00 · 2026-04-05 10:51:01 -05:00 · 092d1c0a9e
commit 092d1c0a9e
parent dcf41154b8 563fdae8e2
11 changed files with 424 additions and 0 deletions
--- a/.prompt-injection-scanignore
+++ b/.prompt-injection-scanignore
@ -0,0 +1,2 @@
+# False positives in GSD prompt templates — these are legitimate LLM instructions, not injection
+src/resources/extensions/gsd/prompts/doctor-heal.md:You are now responsible
--- a/src/resources/extensions/gsd/bootstrap/query-tools.ts
+++ b/src/resources/extensions/gsd/bootstrap/query-tools.ts
@ -0,0 +1,98 @@
+// GSD2 — Read-only query tools exposing DB state to the LLM via the WAL connection
+
+import { Type } from "@sinclair/typebox";
+import type { ExtensionAPI } from "@gsd/pi-coding-agent";
+
+import { logWarning } from "../workflow-logger.js";
+
+export function registerQueryTools(pi: ExtensionAPI): void {
+  pi.registerTool({
+    name: "gsd_milestone_status",
+    label: "Milestone Status",
+    description:
+      "Read the current status of a milestone and all its slices from the GSD database. " +
+      "Returns milestone metadata, per-slice status, and task counts per slice. " +
+      "Use this instead of querying .gsd/gsd.db directly via sqlite3 or better-sqlite3.",
+    promptSnippet: "Get milestone status, slice statuses, and task counts for a given milestoneId",
+    promptGuidelines: [
+      "Use this tool — not sqlite3 or better-sqlite3 — to inspect milestone or slice state from the DB.",
+    ],
+    parameters: Type.Object({
+      milestoneId: Type.String({ description: "Milestone ID to query (e.g. M001)" }),
+    }),
+    async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
+      try {
+        // Strictly read-only: only use an already-open DB connection.
+        // Do NOT call ensureDbOpen() — it can create/migrate the DB as a side effect.
+        const {
+          isDbAvailable,
+          getMilestone,
+          getSliceStatusSummary,
+          getSliceTaskCounts,
+          _getAdapter,
+        } = await import("../gsd-db.js");
+
+        if (!isDbAvailable()) {
+          return {
+            content: [{ type: "text" as const, text: "Error: GSD database is not available." }],
+            details: { operation: "milestone_status", error: "db_unavailable" } as any,
+          };
+        }
+
+        // Wrap all reads in a single transaction for snapshot consistency.
+        // SQLite WAL mode guarantees reads within a transaction see a single
+        // consistent snapshot, preventing torn reads from concurrent writes.
+        const adapter = _getAdapter()!;
+        adapter.exec("BEGIN");  // eslint-disable-line -- SQLite exec, not child_process
+        try {
+          const milestone = getMilestone(params.milestoneId);
+          if (!milestone) {
+            adapter.exec("COMMIT");  // eslint-disable-line
+            return {
+              content: [{ type: "text" as const, text: `Milestone ${params.milestoneId} not found in database.` }],
+              details: { operation: "milestone_status", milestoneId: params.milestoneId, found: false } as any,
+            };
+          }
+
+          const sliceStatuses = getSliceStatusSummary(params.milestoneId);
+
+          const slices = sliceStatuses.map((s) => {
+            const counts = getSliceTaskCounts(params.milestoneId, s.id);
+            return {
+              id: s.id,
+              status: s.status,
+              taskCounts: counts,
+            };
+          });
+
+          adapter.exec("COMMIT");  // eslint-disable-line
+
+          const result = {
+            milestoneId: milestone.id,
+            title: milestone.title,
+            status: milestone.status,
+            createdAt: milestone.created_at,
+            completedAt: milestone.completed_at,
+            sliceCount: slices.length,
+            slices,
+          };
+
+          return {
+            content: [{ type: "text" as const, text: JSON.stringify(result, null, 2) }],
+            details: { operation: "milestone_status", milestoneId: milestone.id, sliceCount: slices.length } as any,
+          };
+        } catch (txErr) {
+          try { adapter.exec("ROLLBACK"); } catch { /* swallow */ }  // eslint-disable-line
+          throw txErr;
+        }
+      } catch (err) {
+        const msg = err instanceof Error ? err.message : String(err);
+        logWarning("tool", `gsd_milestone_status tool failed: ${msg}`);
+        return {
+          content: [{ type: "text" as const, text: `Error querying milestone status: ${msg}` }],
+          details: { operation: "milestone_status", error: msg } as any,
+        };
+      }
+    },
+  });
+}
--- a/src/resources/extensions/gsd/bootstrap/register-extension.ts
+++ b/src/resources/extensions/gsd/bootstrap/register-extension.ts
@ -1,3 +1,5 @@
+// GSD2 — Extension registration: wires all GSD tools, commands, and hooks into pi
+
 import type { ExtensionAPI, ExtensionCommandContext } from "@gsd/pi-coding-agent";

 import { registerGSDCommand } from "../commands.js";
@ -6,6 +8,7 @@ import { registerWorktreeCommand } from "../worktree-command.js";
 import { registerDbTools } from "./db-tools.js";
 import { registerDynamicTools } from "./dynamic-tools.js";
 import { registerJournalTools } from "./journal-tools.js";
+import { registerQueryTools } from "./query-tools.js";
 import { registerHooks } from "./register-hooks.js";
 import { registerShortcuts } from "./register-shortcuts.js";

@ -56,6 +59,7 @@ export function registerGsdExtension(pi: ExtensionAPI): void {
  registerDynamicTools(pi);
  registerDbTools(pi);
  registerJournalTools(pi);
+  registerQueryTools(pi);
  registerShortcuts(pi);
  registerHooks(pi);
 }
--- a/src/resources/extensions/gsd/prompts/complete-milestone.md
+++ b/src/resources/extensions/gsd/prompts/complete-milestone.md
@ -24,6 +24,8 @@ Then:
 7. Fill the **Decision Re-evaluation** table in the milestone summary. For each key decision from `.gsd/DECISIONS.md` made during this milestone, evaluate whether it is still valid given what was actually built. Flag decisions that should be revisited next milestone.
 8. Validate **requirement status transitions**. For each requirement that changed status during this milestone, confirm the transition is supported by evidence. Requirements can move between Active, Validated, Deferred, Blocked, or Out of Scope — but only with proof.

+**DB access safety:** Do NOT query `.gsd/gsd.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — the engine owns the WAL connection. Use `gsd_milestone_status` to read milestone and slice state. All data you need is already inlined in the context above or accessible via the `gsd_*` tools — never via direct SQL.
+
 ### Verification Gate — STOP if verification failed

 **If ANY verification failure was recorded in steps 3, 4, or 5, you MUST follow the failure path below. Do NOT proceed to step 9.**
--- a/src/resources/extensions/gsd/prompts/doctor-heal.md
+++ b/src/resources/extensions/gsd/prompts/doctor-heal.md
@ -9,6 +9,7 @@ Rules:
 4. For missing summaries or UAT files, generate the real artifact from existing slice/task context when possible — do not leave placeholders if you can reconstruct the real content.
 5. After each repair cluster, verify the relevant invariant directly from disk.
 6. When done, rerun `/gsd doctor {{doctorCommandSuffix}}` mentally by ensuring the remaining issue set for this scope is reduced or cleared.
+7. Do NOT query `.gsd/gsd.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — use `gsd_milestone_status` to inspect DB state. Direct access bypasses the WAL connection owned by the engine and can corrupt in-flight writes.

 ## Doctor Summary

--- a/src/resources/extensions/gsd/prompts/forensics.md
+++ b/src/resources/extensions/gsd/prompts/forensics.md
@ -116,6 +116,8 @@ A unit dispatched more than once (`type/id` appears multiple times) indicates a

 5. **Read the actual GSD source code** at `{{gsdSourceDir}}` to confirm or deny each hypothesis. Do not guess what code does — read it.

+   **DB inspection:** If you need to check DB state as part of investigation, use `gsd_milestone_status` — never run `sqlite3 .gsd/gsd.db` or `node -e require('better-sqlite3')` directly. The engine holds a WAL write lock; direct access will either fail or return stale data.
+
 6. **Trace the code path** from the entry point (usually `auto-loop.ts` dispatch or `auto-dispatch.ts`) through to the failure point. Follow function calls across files.

 7. **Identify the specific file and line** where the bug lives. Determine what kind of defect it is:
--- a/src/resources/extensions/gsd/prompts/reassess-roadmap.md
+++ b/src/resources/extensions/gsd/prompts/reassess-roadmap.md
@ -63,4 +63,6 @@ If `.gsd/REQUIREMENTS.md` exists and requirement ownership or status changed, up

 {{commitInstruction}}

+**DB access safety:** Do NOT query `.gsd/gsd.db` directly via `sqlite3` or `node -e require('better-sqlite3')`. Use `gsd_milestone_status` to read current milestone and slice state. All roadmap mutations go through `gsd_reassess_roadmap` — the tool writes to the DB and re-renders ROADMAP.md atomically.
+
 When done, say: "Roadmap reassessed."
--- a/src/resources/extensions/gsd/prompts/system.md
+++ b/src/resources/extensions/gsd/prompts/system.md
@ -175,6 +175,7 @@ Templates showing the expected format for each artifact type are in:
 - Never guess at library APIs from training data — use `get_library_docs`.
 - Never ask the user to run a command, set a variable, or check something you can check yourself.
 - Never await stale async jobs after editing source — `cancel_job` them first, then re-run.
+- Never query `.gsd/gsd.db` directly via `sqlite3`, `better-sqlite3`, or `node -e require('better-sqlite3')` — the database uses a single-writer WAL connection managed by the engine. Direct access causes reader/writer conflicts and bypasses validation logic. Use `gsd_milestone_status`, `gsd_journal_query`, or other `gsd_*` tools exclusively for all DB reads and writes.

 ### Ask vs infer

--- a/src/resources/extensions/gsd/prompts/validate-milestone.md
+++ b/src/resources/extensions/gsd/prompts/validate-milestone.md
@ -38,6 +38,8 @@ All relevant context has been preloaded below — the roadmap, all slice summari

 **Persist validation results through `gsd_validate_milestone`.** Call it with: `milestoneId`, `verdict`, `remediationRound`, `successCriteriaChecklist`, `sliceDeliveryAudit`, `crossSliceIntegration`, `requirementCoverage`, `verificationClasses` (when non-empty), `verdictRationale`, and `remediationPlan` (if verdict is `needs-remediation`). The tool writes the validation to the DB and renders VALIDATION.md to disk.

+**DB access safety:** Do NOT query `.gsd/gsd.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — the engine owns the WAL connection. Use `gsd_milestone_status` to read milestone and slice state. All data you need is already inlined in the context above or accessible via the `gsd_*` tools. Direct DB access corrupts the WAL and bypasses tool-level validation.
+
 If verdict is `needs-remediation`:
 - After calling `gsd_validate_milestone`, use `gsd_reassess_roadmap` to add remediation slices. Pass `milestoneId`, a synthetic `completedSliceId` (e.g. "VALIDATION"), `verdict: "roadmap-adjusted"`, `assessment` text, and `sliceChanges` with the new slices in the `added` array. The tool persists the changes to the DB and re-renders ROADMAP.md.
 - These remediation slices will be planned and executed before validation re-runs.
--- a/src/resources/extensions/gsd/tests/db-access-guardrails.test.ts
+++ b/src/resources/extensions/gsd/tests/db-access-guardrails.test.ts
@ -0,0 +1,109 @@
+// GSD2 — Regression tests: DB anti-pattern guardrails in prompt templates
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { readFileSync, readdirSync } from "node:fs";
+import { join } from "node:path";
+
+const promptsDir = join(process.cwd(), "src/resources/extensions/gsd/prompts");
+
+function readPrompt(name: string): string {
+  return readFileSync(join(promptsDir, `${name}.md`), "utf-8");
+}
+
+// ─── Layer 1: system.md global guardrail ──────────────────────────────────────
+
+test("system.md anti-patterns section prohibits direct .gsd/gsd.db access", () => {
+  const prompt = readPrompt("system");
+  assert.match(
+    prompt,
+    /Never query.*\.gsd\/gsd\.db.*directly/i,
+    "system.md must prohibit direct .gsd/gsd.db access in the anti-patterns section",
+  );
+  assert.match(prompt, /sqlite3/, "system.md DB guardrail must name the sqlite3 CLI");
+  assert.match(prompt, /better-sqlite3/, "system.md DB guardrail must name better-sqlite3");
+  assert.match(prompt, /gsd_\*/, "system.md DB guardrail must redirect to gsd_* tools");
+});
+
+test("system.md DB guardrail explains single-writer WAL risk", () => {
+  const prompt = readPrompt("system");
+  assert.match(prompt, /single-writer WAL/i, "system.md must explain the WAL architecture risk");
+});
+
+// ─── Layer 2: high-risk prompt guardrails ─────────────────────────────────────
+
+test("validate-milestone.md contains DB access safety guardrail with tool redirect", () => {
+  const prompt = readPrompt("validate-milestone");
+  assert.match(prompt, /DB access safety/i, "validate-milestone.md must have DB access safety section");
+  assert.match(prompt, /gsd_milestone_status/, "validate-milestone.md must name gsd_milestone_status as alternative");
+  assert.match(prompt, /Do NOT query.*\.gsd\/gsd\.db/i, "validate-milestone.md must prohibit direct DB queries");
+});
+
+test("complete-milestone.md contains DB access safety guardrail with tool redirect", () => {
+  const prompt = readPrompt("complete-milestone");
+  assert.match(prompt, /DB access safety/i, "complete-milestone.md must have DB access safety section");
+  assert.match(prompt, /gsd_milestone_status/, "complete-milestone.md must name gsd_milestone_status as alternative");
+  assert.match(prompt, /Do NOT query.*\.gsd\/gsd\.db/i, "complete-milestone.md must prohibit direct DB queries");
+});
+
+test("doctor-heal.md contains DB access guardrail naming gsd_milestone_status", () => {
+  const prompt = readPrompt("doctor-heal");
+  assert.match(prompt, /gsd_milestone_status/, "doctor-heal.md must name gsd_milestone_status as the DB inspection tool");
+  assert.match(prompt, /Do NOT query.*\.gsd\/gsd\.db/i, "doctor-heal.md must prohibit direct DB queries");
+});
+
+test("forensics.md contains DB inspection guardrail", () => {
+  const prompt = readPrompt("forensics");
+  assert.match(prompt, /gsd_milestone_status/, "forensics.md must name gsd_milestone_status as the DB inspection tool");
+  assert.match(prompt, /sqlite3.*\.gsd\/gsd\.db/i, "forensics.md must prohibit sqlite3 against .gsd/gsd.db");
+});
+
+test("reassess-roadmap.md contains DB access safety guardrail", () => {
+  const prompt = readPrompt("reassess-roadmap");
+  assert.match(prompt, /DB access safety/i, "reassess-roadmap.md must have DB access safety section");
+  assert.match(prompt, /gsd_milestone_status/, "reassess-roadmap.md must name gsd_milestone_status as alternative");
+});
+
+// ─── Negative assertion: no prompt instructs running sqlite3 as a command ─────
+
+test("no prompt file contains an unguarded sqlite3 command invocation", () => {
+  const files = readdirSync(promptsDir).filter((f) => f.endsWith(".md"));
+  assert.ok(files.length >= 35, `Expected at least 35 prompt files, found ${files.length}`);
+
+  const violations: string[] = [];
+
+  for (const file of files) {
+    const content = readFileSync(join(promptsDir, file), "utf-8");
+    const lines = content.split("\n");
+
+    for (let i = 0; i < lines.length; i++) {
+      const line = lines[i];
+      const trimmed = line.trim();
+
+      // Match lines containing sqlite3 targeting gsd.db in any common form:
+      //   sqlite3 .gsd/gsd.db, sqlite3 ./.gsd/gsd.db, sqlite3 "/path/.gsd/gsd.db",
+      //   sqlite3 -header .gsd/gsd.db, etc.
+      // Guardrail text that says "Never run" or "Do NOT query" is fine — only flag
+      // lines where these appear without a surrounding prohibition keyword.
+      if (/sqlite3\b.*gsd\.db/.test(trimmed)) {
+        const context = lines.slice(Math.max(0, i - 3), i + 1).join(" ");
+        if (!/Never|Do NOT|do not|don't|prohibited|forbidden|never run/i.test(context)) {
+          violations.push(`${file}:${i + 1} — unguarded sqlite3 command: ${trimmed}`);
+        }
+      }
+      // Match node -e with better-sqlite3 require in any quoting style
+      if (/node\s+-e\s+.*(?:require|import).*better-sqlite3/.test(trimmed)) {
+        const context = lines.slice(Math.max(0, i - 3), i + 1).join(" ");
+        if (!/Never|Do NOT|do not|don't|prohibited|forbidden|never run/i.test(context)) {
+          violations.push(`${file}:${i + 1} — unguarded node -e require command: ${trimmed}`);
+        }
+      }
+    }
+  }
+
+  assert.deepEqual(
+    violations,
+    [],
+    `Found prompts with unguarded sqlite3/better-sqlite3 invocations:\n${violations.join("\n")}`,
+  );
+});
--- a/src/resources/extensions/gsd/tests/milestone-status-tool.test.ts
+++ b/src/resources/extensions/gsd/tests/milestone-status-tool.test.ts
@ -0,0 +1,201 @@
+// GSD2 — Tests for gsd_milestone_status read-only query tool
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdirSync, rmSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { randomUUID } from "node:crypto";
+
+import { registerQueryTools } from "../bootstrap/query-tools.ts";
+import {
+  openDatabase,
+  closeDatabase,
+  _getAdapter,
+} from "../gsd-db.ts";
+
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+
+function makeMockPi() {
+  const tools: any[] = [];
+  return {
+    registerTool: (tool: any) => tools.push(tool),
+    tools,
+  } as any;
+}
+
+function makeTmpBase(): string {
+  const base = join(tmpdir(), `gsd-query-tool-test-${randomUUID()}`);
+  mkdirSync(join(base, ".gsd"), { recursive: true });
+  return base;
+}
+
+function cleanup(base: string): void {
+  try { rmSync(base, { recursive: true, force: true }); } catch { /* swallow */ }
+}
+
+function openTestDb(base: string): void {
+  openDatabase(join(base, ".gsd", "gsd.db"));
+}
+
+async function executeToolInDir(tool: any, params: Record<string, unknown>, dir: string) {
+  const originalCwd = process.cwd();
+  try {
+    process.chdir(dir);
+    return await tool.execute("test-call-id", params, undefined, undefined, undefined);
+  } finally {
+    process.chdir(originalCwd);
+  }
+}
+
+// ─── Seed helpers ─────────────────────────────────────────────────────────────
+
+function seedMilestone(milestoneId: string, title: string, status = "active"): void {
+  const db = _getAdapter();
+  if (!db) throw new Error("DB not open");
+  db.prepare(
+    "INSERT OR REPLACE INTO milestones (id, title, status, created_at) VALUES (?, ?, ?, ?)",
+  ).run(milestoneId, title, status, new Date().toISOString());
+}
+
+function seedSlice(milestoneId: string, sliceId: string, status: string): void {
+  const db = _getAdapter();
+  if (!db) throw new Error("DB not open");
+  db.prepare(
+    "INSERT OR REPLACE INTO slices (milestone_id, id, title, status, created_at) VALUES (?, ?, ?, ?, ?)",
+  ).run(milestoneId, sliceId, `Slice ${sliceId}`, status, new Date().toISOString());
+}
+
+function seedTask(milestoneId: string, sliceId: string, taskId: string, status: string): void {
+  const db = _getAdapter();
+  if (!db) throw new Error("DB not open");
+  db.prepare(
+    "INSERT OR REPLACE INTO tasks (milestone_id, slice_id, id, title, status) VALUES (?, ?, ?, ?, ?)",
+  ).run(milestoneId, sliceId, taskId, `Task ${taskId}`, status);
+}
+
+// ─── Registration ─────────────────────────────────────────────────────────────
+
+test("registerQueryTools registers gsd_milestone_status tool", () => {
+  const pi = makeMockPi();
+  registerQueryTools(pi);
+  assert.equal(pi.tools.length, 1, "Should register exactly one tool");
+  assert.equal(pi.tools[0].name, "gsd_milestone_status");
+});
+
+test("gsd_milestone_status has promptGuidelines mentioning prohibited alternatives", () => {
+  const pi = makeMockPi();
+  registerQueryTools(pi);
+  const tool = pi.tools[0];
+  assert.ok(Array.isArray(tool.promptGuidelines), "promptGuidelines must be an array");
+  assert.ok(tool.promptGuidelines.length >= 1, "Must have at least one guideline");
+  const joined = tool.promptGuidelines.join(" ");
+  assert.match(joined, /sqlite3|better-sqlite3/, "Guidelines must mention prohibited alternatives");
+});
+
+// ─── Happy path: milestone with slices and tasks ──────────────────────────────
+
+test("gsd_milestone_status returns milestone metadata and slice statuses", async () => {
+  const base = makeTmpBase();
+  try {
+    openTestDb(base);
+    seedMilestone("M001", "Test Milestone");
+    seedSlice("M001", "S01", "complete");
+    seedSlice("M001", "S02", "active");
+    seedTask("M001", "S01", "T01", "done");
+    seedTask("M001", "S01", "T02", "done");
+    seedTask("M001", "S02", "T01", "pending");
+
+    const pi = makeMockPi();
+    registerQueryTools(pi);
+    const tool = pi.tools[0];
+
+    const result = await executeToolInDir(tool, { milestoneId: "M001" }, base);
+    const parsed = JSON.parse(result.content[0].text);
+
+    assert.equal(parsed.milestoneId, "M001");
+    assert.equal(parsed.title, "Test Milestone");
+    assert.equal(parsed.status, "active");
+    assert.equal(parsed.sliceCount, 2);
+    assert.equal(parsed.slices.length, 2);
+
+    const s01 = parsed.slices.find((s: any) => s.id === "S01");
+    assert.ok(s01, "S01 should be in slices");
+    assert.equal(s01.status, "complete");
+    assert.equal(s01.taskCounts.total, 2);
+    assert.equal(s01.taskCounts.done, 2);
+
+    const s02 = parsed.slices.find((s: any) => s.id === "S02");
+    assert.ok(s02, "S02 should be in slices");
+    assert.equal(s02.status, "active");
+    assert.equal(s02.taskCounts.pending, 1);
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+});
+
+// ─── Milestone with no slices ─────────────────────────────────────────────────
+
+test("gsd_milestone_status returns empty slices array for milestone with no slices", async () => {
+  const base = makeTmpBase();
+  try {
+    openTestDb(base);
+    seedMilestone("M002", "Empty Milestone");
+
+    const pi = makeMockPi();
+    registerQueryTools(pi);
+    const tool = pi.tools[0];
+
+    const result = await executeToolInDir(tool, { milestoneId: "M002" }, base);
+    const parsed = JSON.parse(result.content[0].text);
+
+    assert.equal(parsed.milestoneId, "M002");
+    assert.equal(parsed.sliceCount, 0);
+    assert.deepEqual(parsed.slices, []);
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+});
+
+// ─── Missing milestone ────────────────────────────────────────────────────────
+
+test("gsd_milestone_status returns not-found for missing milestone", async () => {
+  const base = makeTmpBase();
+  try {
+    openTestDb(base);
+
+    const pi = makeMockPi();
+    registerQueryTools(pi);
+    const tool = pi.tools[0];
+
+    const result = await executeToolInDir(tool, { milestoneId: "M999" }, base);
+    assert.match(result.content[0].text, /M999.*not found/i);
+    assert.equal(result.details.found, false);
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+});
+
+// ─── DB unavailable ───────────────────────────────────────────────────────────
+
+test("gsd_milestone_status handles missing DB gracefully", async () => {
+  // Create a directory without .gsd/ to ensure ensureDbOpen has nothing to open
+  const base = join(tmpdir(), `gsd-no-db-${randomUUID()}`);
+  mkdirSync(base, { recursive: true });
+  closeDatabase(); // ensure no prior DB is open
+  try {
+    const pi = makeMockPi();
+    registerQueryTools(pi);
+    const tool = pi.tools[0];
+
+    const result = await executeToolInDir(tool, { milestoneId: "M001" }, base);
+    assert.match(result.content[0].text, /GSD database is not available/);
+    assert.equal(result.details.error, "db_unavailable");
+  } finally {
+    closeDatabase();
+    cleanup(base);
+  }
+});