fix: scope tools during discuss flows to prevent grammar overflow (#3307)
* fix: scope tools during discuss flows to prevent grammar overflow (#2949) xAI/Grok (and other providers using grammar-based constrained decoding) reject tool schemas that exceed their grammar complexity limit with HTTP 400 "Grammar is too complex." The full GSD tool set registers ~33 tools with deeply nested schemas; discuss flows only need a small subset. Add DISCUSS_TOOLS_ALLOWLIST to constants.ts listing the 10 GSD tools (5 canonical + 5 aliases) actually referenced by discuss prompts. In dispatchWorkflow, when unitType starts with "discuss-", filter active tools to exclude heavy planning/execution/completion tools before dispatching. Closes #2949 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * chore: retrigger CI * fix: remove resolveModelWithFallbacksForUnit import from guided-flow.ts This import was incorrectly added as part of the discuss grammar changes. The regression guard test (#2958) requires this function not be imported in guided-flow.ts. --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: trek-e <trek-e@users.noreply.github.com>
This commit is contained in:
parent
1fe316477d
commit
85ece7ea69
3 changed files with 196 additions and 0 deletions
|
|
@ -19,3 +19,47 @@ export const DIR_CACHE_MAX = 200;
|
|||
|
||||
/** Max parse-cache entries before eviction. */
|
||||
export const CACHE_MAX = 50;
|
||||
|
||||
// ─── Tool Scoping ─────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* GSD tools allowed during discuss flows (#2949).
|
||||
*
|
||||
* xAI/Grok (and potentially other providers with grammar-based constrained
|
||||
* decoding) return "Grammar is too complex" (HTTP 400) when the combined
|
||||
* tool schemas exceed their internal grammar limit. The full GSD tool set
|
||||
* registers ~33 tools with deeply nested schemas; discuss flows only need
|
||||
* a small subset.
|
||||
*
|
||||
* By scoping tools to this allowlist during discuss dispatches, the grammar
|
||||
* sent to the provider stays well under provider limits.
|
||||
*
|
||||
* Included tools and why:
|
||||
* - gsd_summary_save: writes CONTEXT.md artifacts (all discuss prompts)
|
||||
* - gsd_save_summary: alias for above
|
||||
* - gsd_decision_save: records decisions (discuss.md output phase)
|
||||
* - gsd_save_decision: alias for above
|
||||
* - gsd_plan_milestone: writes roadmap (discuss.md single/multi milestone)
|
||||
* - gsd_milestone_plan: alias for above
|
||||
* - gsd_milestone_generate_id: generates milestone IDs (discuss.md multi-milestone)
|
||||
* - gsd_generate_milestone_id: alias for above
|
||||
* - gsd_requirement_update: updates requirements during discuss
|
||||
* - gsd_update_requirement: alias for above
|
||||
*/
|
||||
export const DISCUSS_TOOLS_ALLOWLIST: readonly string[] = [
|
||||
// Context / summary writing
|
||||
"gsd_summary_save",
|
||||
"gsd_save_summary",
|
||||
// Decision recording
|
||||
"gsd_decision_save",
|
||||
"gsd_save_decision",
|
||||
// Milestone planning (needed for discuss.md output phase)
|
||||
"gsd_plan_milestone",
|
||||
"gsd_milestone_plan",
|
||||
// Milestone ID generation (multi-milestone flow)
|
||||
"gsd_milestone_generate_id",
|
||||
"gsd_generate_milestone_id",
|
||||
// Requirement updates
|
||||
"gsd_requirement_update",
|
||||
"gsd_update_requirement",
|
||||
];
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ import { debugLog } from "./debug-logger.js";
|
|||
import { findMilestoneIds, nextMilestoneId, reserveMilestoneId, getReservedMilestoneIds, clearReservedMilestoneIds } from "./milestone-ids.js";
|
||||
import { parkMilestone, discardMilestone } from "./milestone-actions.js";
|
||||
import { selectAndApplyModel } from "./auto-model-selection.js";
|
||||
import { DISCUSS_TOOLS_ALLOWLIST } from "./constants.js";
|
||||
|
||||
// ─── Re-exports (preserve public API for existing importers) ────────────────
|
||||
export {
|
||||
|
|
@ -289,6 +290,27 @@ async function dispatchWorkflow(
|
|||
}
|
||||
}
|
||||
|
||||
// Scope tools for discuss flows (#2949).
|
||||
// Providers with grammar-based constrained decoding (xAI/Grok) return
|
||||
// "Grammar is too complex" when the combined tool schema is too large.
|
||||
// Discuss flows only need a small subset of GSD tools — strip the heavy
|
||||
// planning/execution/completion tools to keep the grammar within limits.
|
||||
if (unitType?.startsWith("discuss-")) {
|
||||
const currentTools = pi.getActiveTools();
|
||||
// Keep all non-GSD tools (builtins, other extensions) and only the
|
||||
// GSD tools on the discuss allowlist.
|
||||
const scopedTools = currentTools.filter(
|
||||
(t) => !t.startsWith("gsd_") || DISCUSS_TOOLS_ALLOWLIST.includes(t),
|
||||
);
|
||||
pi.setActiveTools(scopedTools);
|
||||
debugLog("discuss-tool-scoping", {
|
||||
unitType,
|
||||
before: currentTools.length,
|
||||
after: scopedTools.length,
|
||||
removed: currentTools.length - scopedTools.length,
|
||||
});
|
||||
}
|
||||
|
||||
const workflowPath = process.env.GSD_WORKFLOW_PATH ?? join(process.env.HOME ?? "~", ".gsd", "agent", "GSD-WORKFLOW.md");
|
||||
const workflow = readFileSync(workflowPath, "utf-8");
|
||||
|
||||
|
|
|
|||
130
src/resources/extensions/gsd/tests/discuss-tool-scoping.test.ts
Normal file
130
src/resources/extensions/gsd/tests/discuss-tool-scoping.test.ts
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
/**
|
||||
* discuss-tool-scoping.test.ts — Tests for #2949.
|
||||
*
|
||||
* xAI/Grok returns "Grammar is too complex" (400) when the combined tool
|
||||
* schemas exceed the provider's grammar limit. The GSD discuss flow only
|
||||
* needs a small subset of tools (summary_save, decision_save, etc.), but
|
||||
* was sending ALL ~30+ tools to the provider.
|
||||
*
|
||||
* These tests verify:
|
||||
* 1. DISCUSS_TOOLS_ALLOWLIST is exported and contains only the tools
|
||||
* needed during discuss flows (no heavy planning/execution/completion tools).
|
||||
* 2. Heavy execution tools are NOT in the allowlist.
|
||||
* 3. The allowlist includes the tools actually referenced by discuss prompts.
|
||||
* 4. dispatchWorkflow scopes tools when unitType is a discuss variant.
|
||||
*/
|
||||
|
||||
import { describe, test } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { join, dirname } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
import { DISCUSS_TOOLS_ALLOWLIST } from "../constants.ts";
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const promptsDir = join(__dirname, "..", "prompts");
|
||||
const guidedFlowPath = join(__dirname, "..", "guided-flow.ts");
|
||||
|
||||
// ─── Heavy tools that should NOT be in discuss scope ─────────────────────────
|
||||
|
||||
/** Tools that are only needed during planning, execution, or completion phases */
|
||||
const HEAVY_TOOLS = [
|
||||
"gsd_plan_slice",
|
||||
"gsd_slice_plan",
|
||||
"gsd_plan_task",
|
||||
"gsd_task_plan",
|
||||
"gsd_task_complete",
|
||||
"gsd_complete_task",
|
||||
"gsd_slice_complete",
|
||||
"gsd_complete_slice",
|
||||
"gsd_complete_milestone",
|
||||
"gsd_milestone_complete",
|
||||
"gsd_validate_milestone",
|
||||
"gsd_milestone_validate",
|
||||
"gsd_replan_slice",
|
||||
"gsd_slice_replan",
|
||||
"gsd_reassess_roadmap",
|
||||
"gsd_roadmap_reassess",
|
||||
"gsd_save_gate_result",
|
||||
];
|
||||
|
||||
// ─── Tools that discuss prompts reference ────────────────────────────────────
|
||||
|
||||
/** Tools explicitly called by discuss prompt templates */
|
||||
const DISCUSS_REQUIRED_TOOLS = [
|
||||
"gsd_summary_save", // guided-discuss-slice.md, guided-discuss-milestone.md, discuss.md
|
||||
"gsd_decision_save", // discuss.md output phase
|
||||
"gsd_plan_milestone", // discuss.md output phase (single + multi milestone)
|
||||
"gsd_milestone_generate_id", // discuss.md multi-milestone Phase 1
|
||||
"gsd_requirement_update", // used during discuss for requirement updates
|
||||
];
|
||||
|
||||
// ─── Tests ───────────────────────────────────────────────────────────────────
|
||||
|
||||
describe("discuss tool scoping (#2949)", () => {
|
||||
test("DISCUSS_TOOLS_ALLOWLIST is exported and non-empty", () => {
|
||||
assert.ok(Array.isArray(DISCUSS_TOOLS_ALLOWLIST), "should be an array");
|
||||
assert.ok(DISCUSS_TOOLS_ALLOWLIST.length > 0, "should not be empty");
|
||||
});
|
||||
|
||||
test("DISCUSS_TOOLS_ALLOWLIST excludes heavy execution/completion tools", () => {
|
||||
for (const heavy of HEAVY_TOOLS) {
|
||||
assert.ok(
|
||||
!DISCUSS_TOOLS_ALLOWLIST.includes(heavy),
|
||||
`allowlist should NOT include heavy tool "${heavy}"`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test("DISCUSS_TOOLS_ALLOWLIST includes tools referenced by discuss prompts", () => {
|
||||
for (const required of DISCUSS_REQUIRED_TOOLS) {
|
||||
assert.ok(
|
||||
DISCUSS_TOOLS_ALLOWLIST.includes(required),
|
||||
`allowlist should include "${required}" (used by discuss prompts)`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
test("DISCUSS_TOOLS_ALLOWLIST is significantly smaller than full tool set", () => {
|
||||
// Full set is 27 DB tools + dynamic + journal = 33+
|
||||
// Discuss set should be roughly 10 GSD tools (5 canonical + 5 aliases)
|
||||
assert.ok(
|
||||
DISCUSS_TOOLS_ALLOWLIST.length <= 12,
|
||||
`allowlist should have at most 12 GSD tools, got ${DISCUSS_TOOLS_ALLOWLIST.length}`,
|
||||
);
|
||||
});
|
||||
|
||||
test("guided-discuss-slice.md references gsd_summary_save", () => {
|
||||
const prompt = readFileSync(join(promptsDir, "guided-discuss-slice.md"), "utf-8");
|
||||
assert.ok(
|
||||
prompt.includes("gsd_summary_save"),
|
||||
"guided-discuss-slice.md should reference gsd_summary_save",
|
||||
);
|
||||
});
|
||||
|
||||
test("discuss.md references gsd_plan_milestone and gsd_decision_save", () => {
|
||||
const prompt = readFileSync(join(promptsDir, "discuss.md"), "utf-8");
|
||||
assert.ok(
|
||||
prompt.includes("gsd_plan_milestone"),
|
||||
"discuss.md should reference gsd_plan_milestone",
|
||||
);
|
||||
assert.ok(
|
||||
prompt.includes("gsd_decision_save"),
|
||||
"discuss.md should reference gsd_decision_save",
|
||||
);
|
||||
});
|
||||
|
||||
test("dispatchWorkflow source code scopes tools for discuss unit types", () => {
|
||||
const source = readFileSync(guidedFlowPath, "utf-8");
|
||||
// Verify that dispatchWorkflow references the allowlist for tool scoping
|
||||
assert.ok(
|
||||
source.includes("DISCUSS_TOOLS_ALLOWLIST"),
|
||||
"guided-flow.ts should reference DISCUSS_TOOLS_ALLOWLIST for tool scoping",
|
||||
);
|
||||
assert.ok(
|
||||
source.includes("setActiveTools"),
|
||||
"guided-flow.ts should call setActiveTools to scope tools during discuss",
|
||||
);
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Reference in a new issue