fix: scope tools during discuss flows to prevent grammar overflow (#3307)

* fix: scope tools during discuss flows to prevent grammar overflow (#2949)

xAI/Grok (and other providers using grammar-based constrained decoding)
reject tool schemas that exceed their grammar complexity limit with HTTP
400 "Grammar is too complex." The full GSD tool set registers ~33 tools
with deeply nested schemas; discuss flows only need a small subset.

Add DISCUSS_TOOLS_ALLOWLIST to constants.ts listing the 10 GSD tools
(5 canonical + 5 aliases) actually referenced by discuss prompts. In
dispatchWorkflow, when unitType starts with "discuss-", filter active
tools to exclude heavy planning/execution/completion tools before
dispatching.

Closes #2949

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* chore: retrigger CI

* fix: remove resolveModelWithFallbacksForUnit import from guided-flow.ts

This import was incorrectly added as part of the discuss grammar changes.
The regression guard test (#2958) requires this function not be imported
in guided-flow.ts.

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
Co-authored-by: trek-e <trek-e@users.noreply.github.com>
This commit is contained in:
Tom Boucher 2026-04-05 01:04:48 -04:00 committed by GitHub
parent 1fe316477d
commit 85ece7ea69
3 changed files with 196 additions and 0 deletions

View file

@ -19,3 +19,47 @@ export const DIR_CACHE_MAX = 200;
/** Max parse-cache entries before eviction. */
export const CACHE_MAX = 50;
// ─── Tool Scoping ─────────────────────────────────────────────────────────────
/**
* GSD tools allowed during discuss flows (#2949).
*
* xAI/Grok (and potentially other providers with grammar-based constrained
* decoding) return "Grammar is too complex" (HTTP 400) when the combined
* tool schemas exceed their internal grammar limit. The full GSD tool set
* registers ~33 tools with deeply nested schemas; discuss flows only need
* a small subset.
*
* By scoping tools to this allowlist during discuss dispatches, the grammar
* sent to the provider stays well under provider limits.
*
* Included tools and why:
* - gsd_summary_save: writes CONTEXT.md artifacts (all discuss prompts)
* - gsd_save_summary: alias for above
* - gsd_decision_save: records decisions (discuss.md output phase)
* - gsd_save_decision: alias for above
* - gsd_plan_milestone: writes roadmap (discuss.md single/multi milestone)
* - gsd_milestone_plan: alias for above
* - gsd_milestone_generate_id: generates milestone IDs (discuss.md multi-milestone)
* - gsd_generate_milestone_id: alias for above
* - gsd_requirement_update: updates requirements during discuss
* - gsd_update_requirement: alias for above
*/
export const DISCUSS_TOOLS_ALLOWLIST: readonly string[] = [
// Context / summary writing
"gsd_summary_save",
"gsd_save_summary",
// Decision recording
"gsd_decision_save",
"gsd_save_decision",
// Milestone planning (needed for discuss.md output phase)
"gsd_plan_milestone",
"gsd_milestone_plan",
// Milestone ID generation (multi-milestone flow)
"gsd_milestone_generate_id",
"gsd_generate_milestone_id",
// Requirement updates
"gsd_requirement_update",
"gsd_update_requirement",
];

View file

@ -39,6 +39,7 @@ import { debugLog } from "./debug-logger.js";
import { findMilestoneIds, nextMilestoneId, reserveMilestoneId, getReservedMilestoneIds, clearReservedMilestoneIds } from "./milestone-ids.js";
import { parkMilestone, discardMilestone } from "./milestone-actions.js";
import { selectAndApplyModel } from "./auto-model-selection.js";
import { DISCUSS_TOOLS_ALLOWLIST } from "./constants.js";
// ─── Re-exports (preserve public API for existing importers) ────────────────
export {
@ -289,6 +290,27 @@ async function dispatchWorkflow(
}
}
// Scope tools for discuss flows (#2949).
// Providers with grammar-based constrained decoding (xAI/Grok) return
// "Grammar is too complex" when the combined tool schema is too large.
// Discuss flows only need a small subset of GSD tools — strip the heavy
// planning/execution/completion tools to keep the grammar within limits.
if (unitType?.startsWith("discuss-")) {
const currentTools = pi.getActiveTools();
// Keep all non-GSD tools (builtins, other extensions) and only the
// GSD tools on the discuss allowlist.
const scopedTools = currentTools.filter(
(t) => !t.startsWith("gsd_") || DISCUSS_TOOLS_ALLOWLIST.includes(t),
);
pi.setActiveTools(scopedTools);
debugLog("discuss-tool-scoping", {
unitType,
before: currentTools.length,
after: scopedTools.length,
removed: currentTools.length - scopedTools.length,
});
}
const workflowPath = process.env.GSD_WORKFLOW_PATH ?? join(process.env.HOME ?? "~", ".gsd", "agent", "GSD-WORKFLOW.md");
const workflow = readFileSync(workflowPath, "utf-8");

View file

@ -0,0 +1,130 @@
/**
* discuss-tool-scoping.test.ts Tests for #2949.
*
* xAI/Grok returns "Grammar is too complex" (400) when the combined tool
* schemas exceed the provider's grammar limit. The GSD discuss flow only
* needs a small subset of tools (summary_save, decision_save, etc.), but
* was sending ALL ~30+ tools to the provider.
*
* These tests verify:
* 1. DISCUSS_TOOLS_ALLOWLIST is exported and contains only the tools
* needed during discuss flows (no heavy planning/execution/completion tools).
* 2. Heavy execution tools are NOT in the allowlist.
* 3. The allowlist includes the tools actually referenced by discuss prompts.
* 4. dispatchWorkflow scopes tools when unitType is a discuss variant.
*/
import { describe, test } from "node:test";
import assert from "node:assert/strict";
import { readFileSync } from "node:fs";
import { join, dirname } from "node:path";
import { fileURLToPath } from "node:url";
import { DISCUSS_TOOLS_ALLOWLIST } from "../constants.ts";
const __dirname = dirname(fileURLToPath(import.meta.url));
const promptsDir = join(__dirname, "..", "prompts");
const guidedFlowPath = join(__dirname, "..", "guided-flow.ts");
// ─── Heavy tools that should NOT be in discuss scope ─────────────────────────
/** Tools that are only needed during planning, execution, or completion phases */
const HEAVY_TOOLS = [
"gsd_plan_slice",
"gsd_slice_plan",
"gsd_plan_task",
"gsd_task_plan",
"gsd_task_complete",
"gsd_complete_task",
"gsd_slice_complete",
"gsd_complete_slice",
"gsd_complete_milestone",
"gsd_milestone_complete",
"gsd_validate_milestone",
"gsd_milestone_validate",
"gsd_replan_slice",
"gsd_slice_replan",
"gsd_reassess_roadmap",
"gsd_roadmap_reassess",
"gsd_save_gate_result",
];
// ─── Tools that discuss prompts reference ────────────────────────────────────
/** Tools explicitly called by discuss prompt templates */
const DISCUSS_REQUIRED_TOOLS = [
"gsd_summary_save", // guided-discuss-slice.md, guided-discuss-milestone.md, discuss.md
"gsd_decision_save", // discuss.md output phase
"gsd_plan_milestone", // discuss.md output phase (single + multi milestone)
"gsd_milestone_generate_id", // discuss.md multi-milestone Phase 1
"gsd_requirement_update", // used during discuss for requirement updates
];
// ─── Tests ───────────────────────────────────────────────────────────────────
describe("discuss tool scoping (#2949)", () => {
test("DISCUSS_TOOLS_ALLOWLIST is exported and non-empty", () => {
assert.ok(Array.isArray(DISCUSS_TOOLS_ALLOWLIST), "should be an array");
assert.ok(DISCUSS_TOOLS_ALLOWLIST.length > 0, "should not be empty");
});
test("DISCUSS_TOOLS_ALLOWLIST excludes heavy execution/completion tools", () => {
for (const heavy of HEAVY_TOOLS) {
assert.ok(
!DISCUSS_TOOLS_ALLOWLIST.includes(heavy),
`allowlist should NOT include heavy tool "${heavy}"`,
);
}
});
test("DISCUSS_TOOLS_ALLOWLIST includes tools referenced by discuss prompts", () => {
for (const required of DISCUSS_REQUIRED_TOOLS) {
assert.ok(
DISCUSS_TOOLS_ALLOWLIST.includes(required),
`allowlist should include "${required}" (used by discuss prompts)`,
);
}
});
test("DISCUSS_TOOLS_ALLOWLIST is significantly smaller than full tool set", () => {
// Full set is 27 DB tools + dynamic + journal = 33+
// Discuss set should be roughly 10 GSD tools (5 canonical + 5 aliases)
assert.ok(
DISCUSS_TOOLS_ALLOWLIST.length <= 12,
`allowlist should have at most 12 GSD tools, got ${DISCUSS_TOOLS_ALLOWLIST.length}`,
);
});
test("guided-discuss-slice.md references gsd_summary_save", () => {
const prompt = readFileSync(join(promptsDir, "guided-discuss-slice.md"), "utf-8");
assert.ok(
prompt.includes("gsd_summary_save"),
"guided-discuss-slice.md should reference gsd_summary_save",
);
});
test("discuss.md references gsd_plan_milestone and gsd_decision_save", () => {
const prompt = readFileSync(join(promptsDir, "discuss.md"), "utf-8");
assert.ok(
prompt.includes("gsd_plan_milestone"),
"discuss.md should reference gsd_plan_milestone",
);
assert.ok(
prompt.includes("gsd_decision_save"),
"discuss.md should reference gsd_decision_save",
);
});
test("dispatchWorkflow source code scopes tools for discuss unit types", () => {
const source = readFileSync(guidedFlowPath, "utf-8");
// Verify that dispatchWorkflow references the allowlist for tool scoping
assert.ok(
source.includes("DISCUSS_TOOLS_ALLOWLIST"),
"guided-flow.ts should reference DISCUSS_TOOLS_ALLOWLIST for tool scoping",
);
assert.ok(
source.includes("setActiveTools"),
"guided-flow.ts should call setActiveTools to scope tools during discuss",
);
});
});