From 85ece7ea69458b798edb8989949e012b97fa4869 Mon Sep 17 00:00:00 2001 From: Tom Boucher Date: Sun, 5 Apr 2026 01:04:48 -0400 Subject: [PATCH] fix: scope tools during discuss flows to prevent grammar overflow (#3307) * fix: scope tools during discuss flows to prevent grammar overflow (#2949) xAI/Grok (and other providers using grammar-based constrained decoding) reject tool schemas that exceed their grammar complexity limit with HTTP 400 "Grammar is too complex." The full GSD tool set registers ~33 tools with deeply nested schemas; discuss flows only need a small subset. Add DISCUSS_TOOLS_ALLOWLIST to constants.ts listing the 10 GSD tools (5 canonical + 5 aliases) actually referenced by discuss prompts. In dispatchWorkflow, when unitType starts with "discuss-", filter active tools to exclude heavy planning/execution/completion tools before dispatching. Closes #2949 Co-Authored-By: Claude Opus 4.6 * chore: retrigger CI * fix: remove resolveModelWithFallbacksForUnit import from guided-flow.ts This import was incorrectly added as part of the discuss grammar changes. The regression guard test (#2958) requires this function not be imported in guided-flow.ts. --------- Co-authored-by: Claude Opus 4.6 Co-authored-by: trek-e --- src/resources/extensions/gsd/constants.ts | 44 ++++++ src/resources/extensions/gsd/guided-flow.ts | 22 +++ .../gsd/tests/discuss-tool-scoping.test.ts | 130 ++++++++++++++++++ 3 files changed, 196 insertions(+) create mode 100644 src/resources/extensions/gsd/tests/discuss-tool-scoping.test.ts diff --git a/src/resources/extensions/gsd/constants.ts b/src/resources/extensions/gsd/constants.ts index 636f2d808..15812dc93 100644 --- a/src/resources/extensions/gsd/constants.ts +++ b/src/resources/extensions/gsd/constants.ts @@ -19,3 +19,47 @@ export const DIR_CACHE_MAX = 200; /** Max parse-cache entries before eviction. */ export const CACHE_MAX = 50; + +// ─── Tool Scoping ───────────────────────────────────────────────────────────── + +/** + * GSD tools allowed during discuss flows (#2949). + * + * xAI/Grok (and potentially other providers with grammar-based constrained + * decoding) return "Grammar is too complex" (HTTP 400) when the combined + * tool schemas exceed their internal grammar limit. The full GSD tool set + * registers ~33 tools with deeply nested schemas; discuss flows only need + * a small subset. + * + * By scoping tools to this allowlist during discuss dispatches, the grammar + * sent to the provider stays well under provider limits. + * + * Included tools and why: + * - gsd_summary_save: writes CONTEXT.md artifacts (all discuss prompts) + * - gsd_save_summary: alias for above + * - gsd_decision_save: records decisions (discuss.md output phase) + * - gsd_save_decision: alias for above + * - gsd_plan_milestone: writes roadmap (discuss.md single/multi milestone) + * - gsd_milestone_plan: alias for above + * - gsd_milestone_generate_id: generates milestone IDs (discuss.md multi-milestone) + * - gsd_generate_milestone_id: alias for above + * - gsd_requirement_update: updates requirements during discuss + * - gsd_update_requirement: alias for above + */ +export const DISCUSS_TOOLS_ALLOWLIST: readonly string[] = [ + // Context / summary writing + "gsd_summary_save", + "gsd_save_summary", + // Decision recording + "gsd_decision_save", + "gsd_save_decision", + // Milestone planning (needed for discuss.md output phase) + "gsd_plan_milestone", + "gsd_milestone_plan", + // Milestone ID generation (multi-milestone flow) + "gsd_milestone_generate_id", + "gsd_generate_milestone_id", + // Requirement updates + "gsd_requirement_update", + "gsd_update_requirement", +]; diff --git a/src/resources/extensions/gsd/guided-flow.ts b/src/resources/extensions/gsd/guided-flow.ts index 6b5f0d30b..d0f400448 100644 --- a/src/resources/extensions/gsd/guided-flow.ts +++ b/src/resources/extensions/gsd/guided-flow.ts @@ -39,6 +39,7 @@ import { debugLog } from "./debug-logger.js"; import { findMilestoneIds, nextMilestoneId, reserveMilestoneId, getReservedMilestoneIds, clearReservedMilestoneIds } from "./milestone-ids.js"; import { parkMilestone, discardMilestone } from "./milestone-actions.js"; import { selectAndApplyModel } from "./auto-model-selection.js"; +import { DISCUSS_TOOLS_ALLOWLIST } from "./constants.js"; // ─── Re-exports (preserve public API for existing importers) ──────────────── export { @@ -289,6 +290,27 @@ async function dispatchWorkflow( } } + // Scope tools for discuss flows (#2949). + // Providers with grammar-based constrained decoding (xAI/Grok) return + // "Grammar is too complex" when the combined tool schema is too large. + // Discuss flows only need a small subset of GSD tools — strip the heavy + // planning/execution/completion tools to keep the grammar within limits. + if (unitType?.startsWith("discuss-")) { + const currentTools = pi.getActiveTools(); + // Keep all non-GSD tools (builtins, other extensions) and only the + // GSD tools on the discuss allowlist. + const scopedTools = currentTools.filter( + (t) => !t.startsWith("gsd_") || DISCUSS_TOOLS_ALLOWLIST.includes(t), + ); + pi.setActiveTools(scopedTools); + debugLog("discuss-tool-scoping", { + unitType, + before: currentTools.length, + after: scopedTools.length, + removed: currentTools.length - scopedTools.length, + }); + } + const workflowPath = process.env.GSD_WORKFLOW_PATH ?? join(process.env.HOME ?? "~", ".gsd", "agent", "GSD-WORKFLOW.md"); const workflow = readFileSync(workflowPath, "utf-8"); diff --git a/src/resources/extensions/gsd/tests/discuss-tool-scoping.test.ts b/src/resources/extensions/gsd/tests/discuss-tool-scoping.test.ts new file mode 100644 index 000000000..36fc332c9 --- /dev/null +++ b/src/resources/extensions/gsd/tests/discuss-tool-scoping.test.ts @@ -0,0 +1,130 @@ +/** + * discuss-tool-scoping.test.ts — Tests for #2949. + * + * xAI/Grok returns "Grammar is too complex" (400) when the combined tool + * schemas exceed the provider's grammar limit. The GSD discuss flow only + * needs a small subset of tools (summary_save, decision_save, etc.), but + * was sending ALL ~30+ tools to the provider. + * + * These tests verify: + * 1. DISCUSS_TOOLS_ALLOWLIST is exported and contains only the tools + * needed during discuss flows (no heavy planning/execution/completion tools). + * 2. Heavy execution tools are NOT in the allowlist. + * 3. The allowlist includes the tools actually referenced by discuss prompts. + * 4. dispatchWorkflow scopes tools when unitType is a discuss variant. + */ + +import { describe, test } from "node:test"; +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { DISCUSS_TOOLS_ALLOWLIST } from "../constants.ts"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const promptsDir = join(__dirname, "..", "prompts"); +const guidedFlowPath = join(__dirname, "..", "guided-flow.ts"); + +// ─── Heavy tools that should NOT be in discuss scope ───────────────────────── + +/** Tools that are only needed during planning, execution, or completion phases */ +const HEAVY_TOOLS = [ + "gsd_plan_slice", + "gsd_slice_plan", + "gsd_plan_task", + "gsd_task_plan", + "gsd_task_complete", + "gsd_complete_task", + "gsd_slice_complete", + "gsd_complete_slice", + "gsd_complete_milestone", + "gsd_milestone_complete", + "gsd_validate_milestone", + "gsd_milestone_validate", + "gsd_replan_slice", + "gsd_slice_replan", + "gsd_reassess_roadmap", + "gsd_roadmap_reassess", + "gsd_save_gate_result", +]; + +// ─── Tools that discuss prompts reference ──────────────────────────────────── + +/** Tools explicitly called by discuss prompt templates */ +const DISCUSS_REQUIRED_TOOLS = [ + "gsd_summary_save", // guided-discuss-slice.md, guided-discuss-milestone.md, discuss.md + "gsd_decision_save", // discuss.md output phase + "gsd_plan_milestone", // discuss.md output phase (single + multi milestone) + "gsd_milestone_generate_id", // discuss.md multi-milestone Phase 1 + "gsd_requirement_update", // used during discuss for requirement updates +]; + +// ─── Tests ─────────────────────────────────────────────────────────────────── + +describe("discuss tool scoping (#2949)", () => { + test("DISCUSS_TOOLS_ALLOWLIST is exported and non-empty", () => { + assert.ok(Array.isArray(DISCUSS_TOOLS_ALLOWLIST), "should be an array"); + assert.ok(DISCUSS_TOOLS_ALLOWLIST.length > 0, "should not be empty"); + }); + + test("DISCUSS_TOOLS_ALLOWLIST excludes heavy execution/completion tools", () => { + for (const heavy of HEAVY_TOOLS) { + assert.ok( + !DISCUSS_TOOLS_ALLOWLIST.includes(heavy), + `allowlist should NOT include heavy tool "${heavy}"`, + ); + } + }); + + test("DISCUSS_TOOLS_ALLOWLIST includes tools referenced by discuss prompts", () => { + for (const required of DISCUSS_REQUIRED_TOOLS) { + assert.ok( + DISCUSS_TOOLS_ALLOWLIST.includes(required), + `allowlist should include "${required}" (used by discuss prompts)`, + ); + } + }); + + test("DISCUSS_TOOLS_ALLOWLIST is significantly smaller than full tool set", () => { + // Full set is 27 DB tools + dynamic + journal = 33+ + // Discuss set should be roughly 10 GSD tools (5 canonical + 5 aliases) + assert.ok( + DISCUSS_TOOLS_ALLOWLIST.length <= 12, + `allowlist should have at most 12 GSD tools, got ${DISCUSS_TOOLS_ALLOWLIST.length}`, + ); + }); + + test("guided-discuss-slice.md references gsd_summary_save", () => { + const prompt = readFileSync(join(promptsDir, "guided-discuss-slice.md"), "utf-8"); + assert.ok( + prompt.includes("gsd_summary_save"), + "guided-discuss-slice.md should reference gsd_summary_save", + ); + }); + + test("discuss.md references gsd_plan_milestone and gsd_decision_save", () => { + const prompt = readFileSync(join(promptsDir, "discuss.md"), "utf-8"); + assert.ok( + prompt.includes("gsd_plan_milestone"), + "discuss.md should reference gsd_plan_milestone", + ); + assert.ok( + prompt.includes("gsd_decision_save"), + "discuss.md should reference gsd_decision_save", + ); + }); + + test("dispatchWorkflow source code scopes tools for discuss unit types", () => { + const source = readFileSync(guidedFlowPath, "utf-8"); + // Verify that dispatchWorkflow references the allowlist for tool scoping + assert.ok( + source.includes("DISCUSS_TOOLS_ALLOWLIST"), + "guided-flow.ts should reference DISCUSS_TOOLS_ALLOWLIST for tool scoping", + ); + assert.ok( + source.includes("setActiveTools"), + "guided-flow.ts should call setActiveTools to scope tools during discuss", + ); + }); +});