Merge pull request #3546 from jeremymcs/worktree-issue-3541-ollama-native

fix(gsd): prevent LLM from querying gsd.db directly via bash
This commit is contained in:
Jeremy McSpadden 2026-04-05 10:51:01 -05:00 committed by GitHub
commit 092d1c0a9e
11 changed files with 424 additions and 0 deletions

View file

@ -0,0 +1,2 @@
# False positives in GSD prompt templates — these are legitimate LLM instructions, not injection
src/resources/extensions/gsd/prompts/doctor-heal.md:You are now responsible

View file

@ -0,0 +1,98 @@
// GSD2 — Read-only query tools exposing DB state to the LLM via the WAL connection
import { Type } from "@sinclair/typebox";
import type { ExtensionAPI } from "@gsd/pi-coding-agent";
import { logWarning } from "../workflow-logger.js";
export function registerQueryTools(pi: ExtensionAPI): void {
pi.registerTool({
name: "gsd_milestone_status",
label: "Milestone Status",
description:
"Read the current status of a milestone and all its slices from the GSD database. " +
"Returns milestone metadata, per-slice status, and task counts per slice. " +
"Use this instead of querying .gsd/gsd.db directly via sqlite3 or better-sqlite3.",
promptSnippet: "Get milestone status, slice statuses, and task counts for a given milestoneId",
promptGuidelines: [
"Use this tool — not sqlite3 or better-sqlite3 — to inspect milestone or slice state from the DB.",
],
parameters: Type.Object({
milestoneId: Type.String({ description: "Milestone ID to query (e.g. M001)" }),
}),
async execute(_toolCallId, params, _signal, _onUpdate, _ctx) {
try {
// Strictly read-only: only use an already-open DB connection.
// Do NOT call ensureDbOpen() — it can create/migrate the DB as a side effect.
const {
isDbAvailable,
getMilestone,
getSliceStatusSummary,
getSliceTaskCounts,
_getAdapter,
} = await import("../gsd-db.js");
if (!isDbAvailable()) {
return {
content: [{ type: "text" as const, text: "Error: GSD database is not available." }],
details: { operation: "milestone_status", error: "db_unavailable" } as any,
};
}
// Wrap all reads in a single transaction for snapshot consistency.
// SQLite WAL mode guarantees reads within a transaction see a single
// consistent snapshot, preventing torn reads from concurrent writes.
const adapter = _getAdapter()!;
adapter.exec("BEGIN"); // eslint-disable-line -- SQLite exec, not child_process
try {
const milestone = getMilestone(params.milestoneId);
if (!milestone) {
adapter.exec("COMMIT"); // eslint-disable-line
return {
content: [{ type: "text" as const, text: `Milestone ${params.milestoneId} not found in database.` }],
details: { operation: "milestone_status", milestoneId: params.milestoneId, found: false } as any,
};
}
const sliceStatuses = getSliceStatusSummary(params.milestoneId);
const slices = sliceStatuses.map((s) => {
const counts = getSliceTaskCounts(params.milestoneId, s.id);
return {
id: s.id,
status: s.status,
taskCounts: counts,
};
});
adapter.exec("COMMIT"); // eslint-disable-line
const result = {
milestoneId: milestone.id,
title: milestone.title,
status: milestone.status,
createdAt: milestone.created_at,
completedAt: milestone.completed_at,
sliceCount: slices.length,
slices,
};
return {
content: [{ type: "text" as const, text: JSON.stringify(result, null, 2) }],
details: { operation: "milestone_status", milestoneId: milestone.id, sliceCount: slices.length } as any,
};
} catch (txErr) {
try { adapter.exec("ROLLBACK"); } catch { /* swallow */ } // eslint-disable-line
throw txErr;
}
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
logWarning("tool", `gsd_milestone_status tool failed: ${msg}`);
return {
content: [{ type: "text" as const, text: `Error querying milestone status: ${msg}` }],
details: { operation: "milestone_status", error: msg } as any,
};
}
},
});
}

View file

@ -1,3 +1,5 @@
// GSD2 — Extension registration: wires all GSD tools, commands, and hooks into pi
import type { ExtensionAPI, ExtensionCommandContext } from "@gsd/pi-coding-agent";
import { registerGSDCommand } from "../commands.js";
@ -6,6 +8,7 @@ import { registerWorktreeCommand } from "../worktree-command.js";
import { registerDbTools } from "./db-tools.js";
import { registerDynamicTools } from "./dynamic-tools.js";
import { registerJournalTools } from "./journal-tools.js";
import { registerQueryTools } from "./query-tools.js";
import { registerHooks } from "./register-hooks.js";
import { registerShortcuts } from "./register-shortcuts.js";
@ -56,6 +59,7 @@ export function registerGsdExtension(pi: ExtensionAPI): void {
registerDynamicTools(pi);
registerDbTools(pi);
registerJournalTools(pi);
registerQueryTools(pi);
registerShortcuts(pi);
registerHooks(pi);
}

View file

@ -24,6 +24,8 @@ Then:
7. Fill the **Decision Re-evaluation** table in the milestone summary. For each key decision from `.gsd/DECISIONS.md` made during this milestone, evaluate whether it is still valid given what was actually built. Flag decisions that should be revisited next milestone.
8. Validate **requirement status transitions**. For each requirement that changed status during this milestone, confirm the transition is supported by evidence. Requirements can move between Active, Validated, Deferred, Blocked, or Out of Scope — but only with proof.
**DB access safety:** Do NOT query `.gsd/gsd.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — the engine owns the WAL connection. Use `gsd_milestone_status` to read milestone and slice state. All data you need is already inlined in the context above or accessible via the `gsd_*` tools — never via direct SQL.
### Verification Gate — STOP if verification failed
**If ANY verification failure was recorded in steps 3, 4, or 5, you MUST follow the failure path below. Do NOT proceed to step 9.**

View file

@ -9,6 +9,7 @@ Rules:
4. For missing summaries or UAT files, generate the real artifact from existing slice/task context when possible — do not leave placeholders if you can reconstruct the real content.
5. After each repair cluster, verify the relevant invariant directly from disk.
6. When done, rerun `/gsd doctor {{doctorCommandSuffix}}` mentally by ensuring the remaining issue set for this scope is reduced or cleared.
7. Do NOT query `.gsd/gsd.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — use `gsd_milestone_status` to inspect DB state. Direct access bypasses the WAL connection owned by the engine and can corrupt in-flight writes.
## Doctor Summary

View file

@ -116,6 +116,8 @@ A unit dispatched more than once (`type/id` appears multiple times) indicates a
5. **Read the actual GSD source code** at `{{gsdSourceDir}}` to confirm or deny each hypothesis. Do not guess what code does — read it.
**DB inspection:** If you need to check DB state as part of investigation, use `gsd_milestone_status` — never run `sqlite3 .gsd/gsd.db` or `node -e require('better-sqlite3')` directly. The engine holds a WAL write lock; direct access will either fail or return stale data.
6. **Trace the code path** from the entry point (usually `auto-loop.ts` dispatch or `auto-dispatch.ts`) through to the failure point. Follow function calls across files.
7. **Identify the specific file and line** where the bug lives. Determine what kind of defect it is:

View file

@ -63,4 +63,6 @@ If `.gsd/REQUIREMENTS.md` exists and requirement ownership or status changed, up
{{commitInstruction}}
**DB access safety:** Do NOT query `.gsd/gsd.db` directly via `sqlite3` or `node -e require('better-sqlite3')`. Use `gsd_milestone_status` to read current milestone and slice state. All roadmap mutations go through `gsd_reassess_roadmap` — the tool writes to the DB and re-renders ROADMAP.md atomically.
When done, say: "Roadmap reassessed."

View file

@ -175,6 +175,7 @@ Templates showing the expected format for each artifact type are in:
- Never guess at library APIs from training data — use `get_library_docs`.
- Never ask the user to run a command, set a variable, or check something you can check yourself.
- Never await stale async jobs after editing source — `cancel_job` them first, then re-run.
- Never query `.gsd/gsd.db` directly via `sqlite3`, `better-sqlite3`, or `node -e require('better-sqlite3')` — the database uses a single-writer WAL connection managed by the engine. Direct access causes reader/writer conflicts and bypasses validation logic. Use `gsd_milestone_status`, `gsd_journal_query`, or other `gsd_*` tools exclusively for all DB reads and writes.
### Ask vs infer

View file

@ -38,6 +38,8 @@ All relevant context has been preloaded below — the roadmap, all slice summari
**Persist validation results through `gsd_validate_milestone`.** Call it with: `milestoneId`, `verdict`, `remediationRound`, `successCriteriaChecklist`, `sliceDeliveryAudit`, `crossSliceIntegration`, `requirementCoverage`, `verificationClasses` (when non-empty), `verdictRationale`, and `remediationPlan` (if verdict is `needs-remediation`). The tool writes the validation to the DB and renders VALIDATION.md to disk.
**DB access safety:** Do NOT query `.gsd/gsd.db` directly via `sqlite3` or `node -e require('better-sqlite3')` — the engine owns the WAL connection. Use `gsd_milestone_status` to read milestone and slice state. All data you need is already inlined in the context above or accessible via the `gsd_*` tools. Direct DB access corrupts the WAL and bypasses tool-level validation.
If verdict is `needs-remediation`:
- After calling `gsd_validate_milestone`, use `gsd_reassess_roadmap` to add remediation slices. Pass `milestoneId`, a synthetic `completedSliceId` (e.g. "VALIDATION"), `verdict: "roadmap-adjusted"`, `assessment` text, and `sliceChanges` with the new slices in the `added` array. The tool persists the changes to the DB and re-renders ROADMAP.md.
- These remediation slices will be planned and executed before validation re-runs.

View file

@ -0,0 +1,109 @@
// GSD2 — Regression tests: DB anti-pattern guardrails in prompt templates
import test from "node:test";
import assert from "node:assert/strict";
import { readFileSync, readdirSync } from "node:fs";
import { join } from "node:path";
const promptsDir = join(process.cwd(), "src/resources/extensions/gsd/prompts");
function readPrompt(name: string): string {
return readFileSync(join(promptsDir, `${name}.md`), "utf-8");
}
// ─── Layer 1: system.md global guardrail ──────────────────────────────────────
test("system.md anti-patterns section prohibits direct .gsd/gsd.db access", () => {
const prompt = readPrompt("system");
assert.match(
prompt,
/Never query.*\.gsd\/gsd\.db.*directly/i,
"system.md must prohibit direct .gsd/gsd.db access in the anti-patterns section",
);
assert.match(prompt, /sqlite3/, "system.md DB guardrail must name the sqlite3 CLI");
assert.match(prompt, /better-sqlite3/, "system.md DB guardrail must name better-sqlite3");
assert.match(prompt, /gsd_\*/, "system.md DB guardrail must redirect to gsd_* tools");
});
test("system.md DB guardrail explains single-writer WAL risk", () => {
const prompt = readPrompt("system");
assert.match(prompt, /single-writer WAL/i, "system.md must explain the WAL architecture risk");
});
// ─── Layer 2: high-risk prompt guardrails ─────────────────────────────────────
test("validate-milestone.md contains DB access safety guardrail with tool redirect", () => {
const prompt = readPrompt("validate-milestone");
assert.match(prompt, /DB access safety/i, "validate-milestone.md must have DB access safety section");
assert.match(prompt, /gsd_milestone_status/, "validate-milestone.md must name gsd_milestone_status as alternative");
assert.match(prompt, /Do NOT query.*\.gsd\/gsd\.db/i, "validate-milestone.md must prohibit direct DB queries");
});
test("complete-milestone.md contains DB access safety guardrail with tool redirect", () => {
const prompt = readPrompt("complete-milestone");
assert.match(prompt, /DB access safety/i, "complete-milestone.md must have DB access safety section");
assert.match(prompt, /gsd_milestone_status/, "complete-milestone.md must name gsd_milestone_status as alternative");
assert.match(prompt, /Do NOT query.*\.gsd\/gsd\.db/i, "complete-milestone.md must prohibit direct DB queries");
});
test("doctor-heal.md contains DB access guardrail naming gsd_milestone_status", () => {
const prompt = readPrompt("doctor-heal");
assert.match(prompt, /gsd_milestone_status/, "doctor-heal.md must name gsd_milestone_status as the DB inspection tool");
assert.match(prompt, /Do NOT query.*\.gsd\/gsd\.db/i, "doctor-heal.md must prohibit direct DB queries");
});
test("forensics.md contains DB inspection guardrail", () => {
const prompt = readPrompt("forensics");
assert.match(prompt, /gsd_milestone_status/, "forensics.md must name gsd_milestone_status as the DB inspection tool");
assert.match(prompt, /sqlite3.*\.gsd\/gsd\.db/i, "forensics.md must prohibit sqlite3 against .gsd/gsd.db");
});
test("reassess-roadmap.md contains DB access safety guardrail", () => {
const prompt = readPrompt("reassess-roadmap");
assert.match(prompt, /DB access safety/i, "reassess-roadmap.md must have DB access safety section");
assert.match(prompt, /gsd_milestone_status/, "reassess-roadmap.md must name gsd_milestone_status as alternative");
});
// ─── Negative assertion: no prompt instructs running sqlite3 as a command ─────
test("no prompt file contains an unguarded sqlite3 command invocation", () => {
const files = readdirSync(promptsDir).filter((f) => f.endsWith(".md"));
assert.ok(files.length >= 35, `Expected at least 35 prompt files, found ${files.length}`);
const violations: string[] = [];
for (const file of files) {
const content = readFileSync(join(promptsDir, file), "utf-8");
const lines = content.split("\n");
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const trimmed = line.trim();
// Match lines containing sqlite3 targeting gsd.db in any common form:
// sqlite3 .gsd/gsd.db, sqlite3 ./.gsd/gsd.db, sqlite3 "/path/.gsd/gsd.db",
// sqlite3 -header .gsd/gsd.db, etc.
// Guardrail text that says "Never run" or "Do NOT query" is fine — only flag
// lines where these appear without a surrounding prohibition keyword.
if (/sqlite3\b.*gsd\.db/.test(trimmed)) {
const context = lines.slice(Math.max(0, i - 3), i + 1).join(" ");
if (!/Never|Do NOT|do not|don't|prohibited|forbidden|never run/i.test(context)) {
violations.push(`${file}:${i + 1} — unguarded sqlite3 command: ${trimmed}`);
}
}
// Match node -e with better-sqlite3 require in any quoting style
if (/node\s+-e\s+.*(?:require|import).*better-sqlite3/.test(trimmed)) {
const context = lines.slice(Math.max(0, i - 3), i + 1).join(" ");
if (!/Never|Do NOT|do not|don't|prohibited|forbidden|never run/i.test(context)) {
violations.push(`${file}:${i + 1} — unguarded node -e require command: ${trimmed}`);
}
}
}
}
assert.deepEqual(
violations,
[],
`Found prompts with unguarded sqlite3/better-sqlite3 invocations:\n${violations.join("\n")}`,
);
});

View file

@ -0,0 +1,201 @@
// GSD2 — Tests for gsd_milestone_status read-only query tool
import test from "node:test";
import assert from "node:assert/strict";
import { mkdirSync, rmSync } from "node:fs";
import { join } from "node:path";
import { tmpdir } from "node:os";
import { randomUUID } from "node:crypto";
import { registerQueryTools } from "../bootstrap/query-tools.ts";
import {
openDatabase,
closeDatabase,
_getAdapter,
} from "../gsd-db.ts";
// ─── Helpers ──────────────────────────────────────────────────────────────────
function makeMockPi() {
const tools: any[] = [];
return {
registerTool: (tool: any) => tools.push(tool),
tools,
} as any;
}
function makeTmpBase(): string {
const base = join(tmpdir(), `gsd-query-tool-test-${randomUUID()}`);
mkdirSync(join(base, ".gsd"), { recursive: true });
return base;
}
function cleanup(base: string): void {
try { rmSync(base, { recursive: true, force: true }); } catch { /* swallow */ }
}
function openTestDb(base: string): void {
openDatabase(join(base, ".gsd", "gsd.db"));
}
async function executeToolInDir(tool: any, params: Record<string, unknown>, dir: string) {
const originalCwd = process.cwd();
try {
process.chdir(dir);
return await tool.execute("test-call-id", params, undefined, undefined, undefined);
} finally {
process.chdir(originalCwd);
}
}
// ─── Seed helpers ─────────────────────────────────────────────────────────────
function seedMilestone(milestoneId: string, title: string, status = "active"): void {
const db = _getAdapter();
if (!db) throw new Error("DB not open");
db.prepare(
"INSERT OR REPLACE INTO milestones (id, title, status, created_at) VALUES (?, ?, ?, ?)",
).run(milestoneId, title, status, new Date().toISOString());
}
function seedSlice(milestoneId: string, sliceId: string, status: string): void {
const db = _getAdapter();
if (!db) throw new Error("DB not open");
db.prepare(
"INSERT OR REPLACE INTO slices (milestone_id, id, title, status, created_at) VALUES (?, ?, ?, ?, ?)",
).run(milestoneId, sliceId, `Slice ${sliceId}`, status, new Date().toISOString());
}
function seedTask(milestoneId: string, sliceId: string, taskId: string, status: string): void {
const db = _getAdapter();
if (!db) throw new Error("DB not open");
db.prepare(
"INSERT OR REPLACE INTO tasks (milestone_id, slice_id, id, title, status) VALUES (?, ?, ?, ?, ?)",
).run(milestoneId, sliceId, taskId, `Task ${taskId}`, status);
}
// ─── Registration ─────────────────────────────────────────────────────────────
test("registerQueryTools registers gsd_milestone_status tool", () => {
const pi = makeMockPi();
registerQueryTools(pi);
assert.equal(pi.tools.length, 1, "Should register exactly one tool");
assert.equal(pi.tools[0].name, "gsd_milestone_status");
});
test("gsd_milestone_status has promptGuidelines mentioning prohibited alternatives", () => {
const pi = makeMockPi();
registerQueryTools(pi);
const tool = pi.tools[0];
assert.ok(Array.isArray(tool.promptGuidelines), "promptGuidelines must be an array");
assert.ok(tool.promptGuidelines.length >= 1, "Must have at least one guideline");
const joined = tool.promptGuidelines.join(" ");
assert.match(joined, /sqlite3|better-sqlite3/, "Guidelines must mention prohibited alternatives");
});
// ─── Happy path: milestone with slices and tasks ──────────────────────────────
test("gsd_milestone_status returns milestone metadata and slice statuses", async () => {
const base = makeTmpBase();
try {
openTestDb(base);
seedMilestone("M001", "Test Milestone");
seedSlice("M001", "S01", "complete");
seedSlice("M001", "S02", "active");
seedTask("M001", "S01", "T01", "done");
seedTask("M001", "S01", "T02", "done");
seedTask("M001", "S02", "T01", "pending");
const pi = makeMockPi();
registerQueryTools(pi);
const tool = pi.tools[0];
const result = await executeToolInDir(tool, { milestoneId: "M001" }, base);
const parsed = JSON.parse(result.content[0].text);
assert.equal(parsed.milestoneId, "M001");
assert.equal(parsed.title, "Test Milestone");
assert.equal(parsed.status, "active");
assert.equal(parsed.sliceCount, 2);
assert.equal(parsed.slices.length, 2);
const s01 = parsed.slices.find((s: any) => s.id === "S01");
assert.ok(s01, "S01 should be in slices");
assert.equal(s01.status, "complete");
assert.equal(s01.taskCounts.total, 2);
assert.equal(s01.taskCounts.done, 2);
const s02 = parsed.slices.find((s: any) => s.id === "S02");
assert.ok(s02, "S02 should be in slices");
assert.equal(s02.status, "active");
assert.equal(s02.taskCounts.pending, 1);
} finally {
closeDatabase();
cleanup(base);
}
});
// ─── Milestone with no slices ─────────────────────────────────────────────────
test("gsd_milestone_status returns empty slices array for milestone with no slices", async () => {
const base = makeTmpBase();
try {
openTestDb(base);
seedMilestone("M002", "Empty Milestone");
const pi = makeMockPi();
registerQueryTools(pi);
const tool = pi.tools[0];
const result = await executeToolInDir(tool, { milestoneId: "M002" }, base);
const parsed = JSON.parse(result.content[0].text);
assert.equal(parsed.milestoneId, "M002");
assert.equal(parsed.sliceCount, 0);
assert.deepEqual(parsed.slices, []);
} finally {
closeDatabase();
cleanup(base);
}
});
// ─── Missing milestone ────────────────────────────────────────────────────────
test("gsd_milestone_status returns not-found for missing milestone", async () => {
const base = makeTmpBase();
try {
openTestDb(base);
const pi = makeMockPi();
registerQueryTools(pi);
const tool = pi.tools[0];
const result = await executeToolInDir(tool, { milestoneId: "M999" }, base);
assert.match(result.content[0].text, /M999.*not found/i);
assert.equal(result.details.found, false);
} finally {
closeDatabase();
cleanup(base);
}
});
// ─── DB unavailable ───────────────────────────────────────────────────────────
test("gsd_milestone_status handles missing DB gracefully", async () => {
// Create a directory without .gsd/ to ensure ensureDbOpen has nothing to open
const base = join(tmpdir(), `gsd-no-db-${randomUUID()}`);
mkdirSync(base, { recursive: true });
closeDatabase(); // ensure no prior DB is open
try {
const pi = makeMockPi();
registerQueryTools(pi);
const tool = pi.tools[0];
const result = await executeToolInDir(tool, { milestoneId: "M001" }, base);
assert.match(result.content[0].text, /GSD database is not available/);
assert.equal(result.details.error, "db_unavailable");
} finally {
closeDatabase();
cleanup(base);
}
});