From f0759086e41750af22dc869ea5a560404a8548ed Mon Sep 17 00:00:00 2001 From: Jeremy Date: Sat, 4 Apr 2026 16:33:36 -0500 Subject: [PATCH] fix(gsd): prevent duplicate ask_user_questions dispatches with per-turn dedup cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #3513 - Add per-turn dedup cache in ask_user_questions keyed by hashed question IDs; duplicate calls return cached results instead of re-dispatching to Discord - Lower loop guard threshold for interactive tools (ask_user_questions) from 4 to 1 — blocks 2nd identical call immediately - Wire cache reset into session_start, session_switch, and agent_end hooks - Harden discuss prompts with "call exactly once per turn" instruction - Add regression tests covering strict threshold and dedup behavior --- .../extensions/ask-user-questions.ts | 53 ++++++++++++-- .../gsd/bootstrap/register-hooks.ts | 4 ++ .../gsd/bootstrap/tool-call-loop-guard.ts | 14 +++- .../gsd/prompts/guided-discuss-milestone.md | 2 +- .../gsd/prompts/guided-discuss-slice.md | 2 +- .../tests/ask-user-questions-dedup.test.ts | 71 +++++++++++++++++++ .../gsd/tests/tool-call-loop-guard.test.ts | 27 +++++-- 7 files changed, 159 insertions(+), 14 deletions(-) create mode 100644 src/resources/extensions/gsd/tests/ask-user-questions-dedup.test.ts diff --git a/src/resources/extensions/ask-user-questions.ts b/src/resources/extensions/ask-user-questions.ts index 8178fd1b0..19f8746cd 100644 --- a/src/resources/extensions/ask-user-questions.ts +++ b/src/resources/extensions/ask-user-questions.ts @@ -72,6 +72,30 @@ const AskUserQuestionsParams = Type.Object({ }), }); +// ─── Per-turn deduplication ────────────────────────────────────────────────── +// Prevents duplicate question dispatches (especially to remote channels like +// Discord) when the LLM calls ask_user_questions multiple times with the same +// questions in a single turn. Keyed by sorted question IDs. + +import { createHash } from "node:crypto"; + +interface CachedResult { + content: { type: "text"; text: string }[]; + details: AskUserQuestionsDetails; +} + +const turnCache = new Map(); + +function questionSignature(questions: Array<{ id: string }>): string { + const ids = questions.map((q) => q.id).sort().join("|"); + return createHash("sha256").update(ids).digest("hex").slice(0, 16); +} + +/** Reset the dedup cache. Called on session boundaries. */ +export function resetAskUserQuestionsCache(): void { + turnCache.clear(); +} + // ─── Helpers ────────────────────────────────────────────────────────────────── const OTHER_OPTION_LABEL = "None of the above"; @@ -121,6 +145,16 @@ export default function AskUserQuestions(pi: ExtensionAPI) { parameters: AskUserQuestionsParams, async execute(_toolCallId, params, signal, _onUpdate, ctx) { + // ── Per-turn dedup: return cached result for identical question sets ── + const sig = questionSignature(params.questions); + const cached = turnCache.get(sig); + if (cached) { + return { + content: [{ type: "text" as const, text: cached.content[0].text + "\n(Returned cached answer — this question set was already asked this turn.)" }], + details: cached.details, + }; + } + // Validation if (params.questions.length === 0 || params.questions.length > 3) { return errorResult("Error: questions must contain 1-3 items", params.questions); @@ -140,7 +174,14 @@ export default function AskUserQuestions(pi: ExtensionAPI) { // this is a no-op when the user has not set up Slack/Discord/Telegram. const { tryRemoteQuestions } = await import("./remote-questions/manager.js"); const remoteResult = await tryRemoteQuestions(params.questions, signal); - if (remoteResult) return { ...remoteResult, details: remoteResult.details as unknown }; + if (remoteResult) { + // Cache successful remote results to prevent duplicate Discord dispatches + const remoteDetails = remoteResult.details as Record | undefined; + if (remoteDetails && !remoteDetails.timed_out && !remoteDetails.error) { + turnCache.set(sig, remoteResult as unknown as CachedResult); + } + return { ...remoteResult, details: remoteResult.details as unknown }; + } if (!ctx.hasUI) { return errorResult("Error: UI not available (non-interactive mode)", params.questions); @@ -197,7 +238,7 @@ export default function AskUserQuestions(pi: ExtensionAPI) { ]), ), }; - return { + const fallbackResult = { content: [{ type: "text" as const, text: JSON.stringify({ answers }) }], details: { questions: params.questions, @@ -205,6 +246,8 @@ export default function AskUserQuestions(pi: ExtensionAPI) { cancelled: false, } satisfies LocalResultDetails, }; + turnCache.set(sig, fallbackResult); + return fallbackResult; } // Check if cancelled (empty answers = user exited) @@ -216,10 +259,12 @@ export default function AskUserQuestions(pi: ExtensionAPI) { }; } - return { - content: [{ type: "text", text: formatForLLM(result) }], + const successResult = { + content: [{ type: "text" as const, text: formatForLLM(result) }], details: { questions: params.questions, response: result, cancelled: false } satisfies LocalResultDetails, }; + turnCache.set(sig, successResult); + return successResult; }, // ─── Rendering ──────────────────────────────────────────────────────── diff --git a/src/resources/extensions/gsd/bootstrap/register-hooks.ts b/src/resources/extensions/gsd/bootstrap/register-hooks.ts index 537ebea63..cfa020b93 100644 --- a/src/resources/extensions/gsd/bootstrap/register-hooks.ts +++ b/src/resources/extensions/gsd/bootstrap/register-hooks.ts @@ -17,6 +17,7 @@ import { getAutoDashboardData, isAutoActive, isAutoPaused, markToolEnd, markTool import { isParallelActive, shutdownParallel } from "../parallel-orchestrator.js"; import { checkToolCallLoop, resetToolCallLoopGuard } from "./tool-call-loop-guard.js"; import { saveActivityLog } from "../activity-log.js"; +import { resetAskUserQuestionsCache } from "../../ask-user-questions.js"; // Skip the welcome screen on the very first session_start — cli.ts already // printed it before the TUI launched. Only re-print on /clear (subsequent sessions). @@ -31,6 +32,7 @@ export function registerHooks(pi: ExtensionAPI): void { pi.on("session_start", async (_event, ctx) => { resetWriteGateState(); resetToolCallLoopGuard(); + resetAskUserQuestionsCache(); await syncServiceTierStatus(ctx); // Apply show_token_cost preference (#1515) @@ -67,6 +69,7 @@ export function registerHooks(pi: ExtensionAPI): void { pi.on("session_switch", async (_event, ctx) => { resetWriteGateState(); resetToolCallLoopGuard(); + resetAskUserQuestionsCache(); clearDiscussionFlowState(); await syncServiceTierStatus(ctx); loadToolApiKeys(); @@ -78,6 +81,7 @@ export function registerHooks(pi: ExtensionAPI): void { pi.on("agent_end", async (event, ctx: ExtensionContext) => { resetToolCallLoopGuard(); + resetAskUserQuestionsCache(); await handleAgentEnd(pi, event, ctx); }); diff --git a/src/resources/extensions/gsd/bootstrap/tool-call-loop-guard.ts b/src/resources/extensions/gsd/bootstrap/tool-call-loop-guard.ts index 695c7e746..4d325fbf1 100644 --- a/src/resources/extensions/gsd/bootstrap/tool-call-loop-guard.ts +++ b/src/resources/extensions/gsd/bootstrap/tool-call-loop-guard.ts @@ -16,8 +16,13 @@ import { createHash } from "node:crypto"; const MAX_CONSECUTIVE_IDENTICAL_CALLS = 4; +/** Interactive/user-facing tools where even 1 duplicate is confusing. */ +const STRICT_LOOP_TOOLS = new Set(["ask_user_questions"]); +const MAX_CONSECUTIVE_STRICT = 1; + let consecutiveCount = 0; let lastSignature = ""; +let lastToolName = ""; let enabled = true; /** Hash tool name + args into a compact signature for comparison. */ @@ -55,9 +60,14 @@ export function checkToolCallLoop( } else { consecutiveCount = 1; lastSignature = sig; + lastToolName = toolName; } - if (consecutiveCount > MAX_CONSECUTIVE_IDENTICAL_CALLS) { + const threshold = STRICT_LOOP_TOOLS.has(toolName) + ? MAX_CONSECUTIVE_STRICT + : MAX_CONSECUTIVE_IDENTICAL_CALLS; + + if (consecutiveCount > threshold) { return { block: true, reason: @@ -75,6 +85,7 @@ export function checkToolCallLoop( export function resetToolCallLoopGuard(): void { consecutiveCount = 0; lastSignature = ""; + lastToolName = ""; enabled = true; } @@ -83,6 +94,7 @@ export function disableToolCallLoopGuard(): void { enabled = false; consecutiveCount = 0; lastSignature = ""; + lastToolName = ""; } /** Get current consecutive count for diagnostics. */ diff --git a/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md b/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md index 4abf22606..01d10f258 100644 --- a/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md +++ b/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md @@ -30,7 +30,7 @@ Ask **1–3 questions per round**. Keep each question focused on one of: - **The biggest technical unknowns / risks** — what could fail, what hasn't been proven - **What external systems/services this touches** — APIs, databases, third-party services -**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for each round. 1–3 questions per call, each as a separate question object. Keep option labels short (3–5 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions. +**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for each round. 1–3 questions per call, each as a separate question object. Keep option labels short (3–5 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions. **IMPORTANT: Call `ask_user_questions` exactly once per turn. Never make multiple calls with the same or overlapping questions — wait for the user's response before asking the next round.** **If `{{structuredQuestionsAvailable}}` is `false`:** ask questions in plain text. Keep each round to 1–3 focused questions. Wait for answers before asking the next round. diff --git a/src/resources/extensions/gsd/prompts/guided-discuss-slice.md b/src/resources/extensions/gsd/prompts/guided-discuss-slice.md index d27487336..2fb94d2a8 100644 --- a/src/resources/extensions/gsd/prompts/guided-discuss-slice.md +++ b/src/resources/extensions/gsd/prompts/guided-discuss-slice.md @@ -22,7 +22,7 @@ Do **not** go deep — just enough that your questions reflect what's actually t ### Question rounds -Ask **1–3 questions per round** using `ask_user_questions`. Keep each question focused on one of: +Ask **1–3 questions per round** using `ask_user_questions`. **Call `ask_user_questions` exactly once per turn — never make multiple calls with the same or overlapping questions. Wait for the user's response before asking the next round.** Keep each question focused on one of: - **UX and user-facing behaviour** — what does the user see, click, trigger, or experience? - **Edge cases and failure states** — what happens when things go wrong or are in unusual states? - **Scope boundaries** — what is explicitly in vs out for this slice? What deferred to later? diff --git a/src/resources/extensions/gsd/tests/ask-user-questions-dedup.test.ts b/src/resources/extensions/gsd/tests/ask-user-questions-dedup.test.ts new file mode 100644 index 000000000..98c9e35f7 --- /dev/null +++ b/src/resources/extensions/gsd/tests/ask-user-questions-dedup.test.ts @@ -0,0 +1,71 @@ +// ask-user-questions-dedup — Regression tests for per-turn deduplication +// +// Verifies that duplicate ask_user_questions calls within a single turn +// return cached results instead of re-dispatching (especially to remote +// channels like Discord). Also verifies the strict loop guard threshold +// for interactive tools. +// +// Regression: duplicate questions were sent to Discord when the LLM called +// ask_user_questions multiple times with the same question set in one turn, +// causing user confusion and tool failure cascading to plain text fallback. + +import { describe, test, beforeEach } from "node:test"; +import assert from "node:assert/strict"; +import { + checkToolCallLoop, + resetToolCallLoopGuard, +} from "../bootstrap/tool-call-loop-guard.ts"; +import { + resetAskUserQuestionsCache, +} from "../../ask-user-questions.ts"; + +// ═══════════════════════════════════════════════════════════════════════════ +// Strict loop guard: ask_user_questions blocks on 2nd identical call +// ═══════════════════════════════════════════════════════════════════════════ + +describe("ask_user_questions dedup", () => { + beforeEach(() => { + resetToolCallLoopGuard(); + resetAskUserQuestionsCache(); + }); + + test("loop guard blocks 2nd identical ask_user_questions call", () => { + const args = { questions: [{ id: "app_coverage", question: "Which apps?" }] }; + + const first = checkToolCallLoop("ask_user_questions", args); + assert.equal(first.block, false, "First call should be allowed"); + + const second = checkToolCallLoop("ask_user_questions", args); + assert.equal(second.block, true, "2nd identical call should be blocked"); + assert.ok(second.reason!.includes("ask_user_questions"), "Reason should name the tool"); + }); + + test("loop guard allows different ask_user_questions calls", () => { + const args1 = { questions: [{ id: "app_coverage", question: "Which apps?" }] }; + const args2 = { questions: [{ id: "testing_focus", question: "What priority?" }] }; + + const first = checkToolCallLoop("ask_user_questions", args1); + assert.equal(first.block, false, "First call allowed"); + + const second = checkToolCallLoop("ask_user_questions", args2); + assert.equal(second.block, false, "Different question set should be allowed"); + }); + + test("non-interactive tools still use normal threshold of 4", () => { + const args = { query: "same query" }; + + for (let i = 1; i <= 4; i++) { + const result = checkToolCallLoop("web_search", args); + assert.equal(result.block, false, `web_search call ${i} should be allowed`); + } + + const fifth = checkToolCallLoop("web_search", args); + assert.equal(fifth.block, true, "5th identical web_search should be blocked"); + }); + + test("cache resets independently from loop guard", () => { + // Verify the reset function exists and is callable + resetAskUserQuestionsCache(); + // No error means the cache module is properly exported and functional + }); +}); diff --git a/src/resources/extensions/gsd/tests/tool-call-loop-guard.test.ts b/src/resources/extensions/gsd/tests/tool-call-loop-guard.test.ts index c1fcecd2c..ab82b3a5e 100644 --- a/src/resources/extensions/gsd/tests/tool-call-loop-guard.test.ts +++ b/src/resources/extensions/gsd/tests/tool-call-loop-guard.test.ts @@ -136,17 +136,30 @@ console.log('\n── Loop guard: nested args are not stripped ──'); assert.deepStrictEqual(getToolCallLoopCount(), 1, `Each unique nested call should reset count to 1`); } - // Truly identical nested calls should still be detected + // Truly identical nested calls should still be detected. + // ask_user_questions has a strict threshold of 1, so the 2nd identical call is blocked. resetToolCallLoopGuard(); - for (let i = 1; i <= 4; i++) { - checkToolCallLoop('ask_user_questions', { - questions: [{ id: 'same', question: 'Same?' }], - }); - } + const first = checkToolCallLoop('ask_user_questions', { + questions: [{ id: 'same', question: 'Same?' }], + }); + assert.ok(first.block === false, 'First ask_user_questions call should be allowed'); const blocked = checkToolCallLoop('ask_user_questions', { questions: [{ id: 'same', question: 'Same?' }], }); - assert.ok(blocked.block === true, 'Identical nested calls should still be blocked'); + assert.ok(blocked.block === true, '2nd identical ask_user_questions call should be blocked (strict threshold)'); + + // Non-strict tools still allow up to 4 identical calls + resetToolCallLoopGuard(); + for (let i = 1; i <= 4; i++) { + const r = checkToolCallLoop('web_search', { + questions: [{ id: 'same', question: 'Same?' }], + }); + assert.ok(r.block === false, `web_search call ${i} should be allowed (normal threshold)`); + } + const blockedNormal = checkToolCallLoop('web_search', { + questions: [{ id: 'same', question: 'Same?' }], + }); + assert.ok(blockedNormal.block === true, '5th identical web_search call should be blocked'); } // ═══════════════════════════════════════════════════════════════════════════