fix(gsd): prevent duplicate ask_user_questions dispatches with per-turn dedup cache

Closes #3513 - Add per-turn dedup cache in ask_user_questions keyed by hashed question IDs; duplicate calls return cached results instead of re-dispatching to Discord - Lower loop guard threshold for interactive tools (ask_user_questions) from 4 to 1 — blocks 2nd identical call immediately - Wire cache reset into session_start, session_switch, and agent_end hooks - Harden discuss prompts with "call exactly once per turn" instruction - Add regression tests covering strict threshold and dedup behavior
2026-04-04 16:33:36 -05:00 · 2026-04-04 16:33:36 -05:00 · f0759086e4
commit f0759086e4
parent 099e6f3120
7 changed files with 159 additions and 14 deletions
--- a/src/resources/extensions/ask-user-questions.ts
+++ b/src/resources/extensions/ask-user-questions.ts
@ -72,6 +72,30 @@ const AskUserQuestionsParams = Type.Object({
 	}),
 });

+// ─── Per-turn deduplication ──────────────────────────────────────────────────
+// Prevents duplicate question dispatches (especially to remote channels like
+// Discord) when the LLM calls ask_user_questions multiple times with the same
+// questions in a single turn. Keyed by sorted question IDs.
+
+import { createHash } from "node:crypto";
+
+interface CachedResult {
+	content: { type: "text"; text: string }[];
+	details: AskUserQuestionsDetails;
+}
+
+const turnCache = new Map<string, CachedResult>();
+
+function questionSignature(questions: Array<{ id: string }>): string {
+	const ids = questions.map((q) => q.id).sort().join("|");
+	return createHash("sha256").update(ids).digest("hex").slice(0, 16);
+}
+
+/** Reset the dedup cache. Called on session boundaries. */
+export function resetAskUserQuestionsCache(): void {
+	turnCache.clear();
+}
+
 // ─── Helpers ──────────────────────────────────────────────────────────────────

 const OTHER_OPTION_LABEL = "None of the above";
@ -121,6 +145,16 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
 		parameters: AskUserQuestionsParams,

 		async execute(_toolCallId, params, signal, _onUpdate, ctx) {
+			// ── Per-turn dedup: return cached result for identical question sets ──
+			const sig = questionSignature(params.questions);
+			const cached = turnCache.get(sig);
+			if (cached) {
+				return {
+					content: [{ type: "text" as const, text: cached.content[0].text + "\n(Returned cached answer — this question set was already asked this turn.)" }],
+					details: cached.details,
+				};
+			}
+
 			// Validation
 			if (params.questions.length === 0 || params.questions.length > 3) {
 				return errorResult("Error: questions must contain 1-3 items", params.questions);
@ -140,7 +174,14 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
 			// this is a no-op when the user has not set up Slack/Discord/Telegram.
 			const { tryRemoteQuestions } = await import("./remote-questions/manager.js");
 			const remoteResult = await tryRemoteQuestions(params.questions, signal);
-			if (remoteResult) return { ...remoteResult, details: remoteResult.details as unknown };
+			if (remoteResult) {
+				// Cache successful remote results to prevent duplicate Discord dispatches
+				const remoteDetails = remoteResult.details as Record<string, unknown> | undefined;
+				if (remoteDetails && !remoteDetails.timed_out && !remoteDetails.error) {
+					turnCache.set(sig, remoteResult as unknown as CachedResult);
+				}
+				return { ...remoteResult, details: remoteResult.details as unknown };
+			}

 			if (!ctx.hasUI) {
 				return errorResult("Error: UI not available (non-interactive mode)", params.questions);
@ -197,7 +238,7 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
 						]),
 					),
 				};
-				return {
+				const fallbackResult = {
 					content: [{ type: "text" as const, text: JSON.stringify({ answers }) }],
 					details: {
 						questions: params.questions,
@ -205,6 +246,8 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
 						cancelled: false,
 					} satisfies LocalResultDetails,
 				};
+				turnCache.set(sig, fallbackResult);
+				return fallbackResult;
 			}

 			// Check if cancelled (empty answers = user exited)
@ -216,10 +259,12 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
 				};
 			}

-			return {
-				content: [{ type: "text", text: formatForLLM(result) }],
+			const successResult = {
+				content: [{ type: "text" as const, text: formatForLLM(result) }],
 				details: { questions: params.questions, response: result, cancelled: false } satisfies LocalResultDetails,
 			};
+			turnCache.set(sig, successResult);
+			return successResult;
 		},

 		// ─── Rendering ────────────────────────────────────────────────────────
--- a/src/resources/extensions/gsd/bootstrap/register-hooks.ts
+++ b/src/resources/extensions/gsd/bootstrap/register-hooks.ts
@ -17,6 +17,7 @@ import { getAutoDashboardData, isAutoActive, isAutoPaused, markToolEnd, markTool
 import { isParallelActive, shutdownParallel } from "../parallel-orchestrator.js";
 import { checkToolCallLoop, resetToolCallLoopGuard } from "./tool-call-loop-guard.js";
 import { saveActivityLog } from "../activity-log.js";
+import { resetAskUserQuestionsCache } from "../../ask-user-questions.js";

 // Skip the welcome screen on the very first session_start — cli.ts already
 // printed it before the TUI launched. Only re-print on /clear (subsequent sessions).
@ -31,6 +32,7 @@ export function registerHooks(pi: ExtensionAPI): void {
  pi.on("session_start", async (_event, ctx) => {
    resetWriteGateState();
    resetToolCallLoopGuard();
+    resetAskUserQuestionsCache();
    await syncServiceTierStatus(ctx);

    // Apply show_token_cost preference (#1515)
@ -67,6 +69,7 @@ export function registerHooks(pi: ExtensionAPI): void {
  pi.on("session_switch", async (_event, ctx) => {
    resetWriteGateState();
    resetToolCallLoopGuard();
+    resetAskUserQuestionsCache();
    clearDiscussionFlowState();
    await syncServiceTierStatus(ctx);
    loadToolApiKeys();
@ -78,6 +81,7 @@ export function registerHooks(pi: ExtensionAPI): void {

  pi.on("agent_end", async (event, ctx: ExtensionContext) => {
    resetToolCallLoopGuard();
+    resetAskUserQuestionsCache();
    await handleAgentEnd(pi, event, ctx);
  });

--- a/src/resources/extensions/gsd/bootstrap/tool-call-loop-guard.ts
+++ b/src/resources/extensions/gsd/bootstrap/tool-call-loop-guard.ts
@ -16,8 +16,13 @@ import { createHash } from "node:crypto";

 const MAX_CONSECUTIVE_IDENTICAL_CALLS = 4;

+/** Interactive/user-facing tools where even 1 duplicate is confusing. */
+const STRICT_LOOP_TOOLS = new Set(["ask_user_questions"]);
+const MAX_CONSECUTIVE_STRICT = 1;
+
 let consecutiveCount = 0;
 let lastSignature = "";
+let lastToolName = "";
 let enabled = true;

 /** Hash tool name + args into a compact signature for comparison. */
@ -55,9 +60,14 @@ export function checkToolCallLoop(
  } else {
    consecutiveCount = 1;
    lastSignature = sig;
+    lastToolName = toolName;
  }

-  if (consecutiveCount > MAX_CONSECUTIVE_IDENTICAL_CALLS) {
+  const threshold = STRICT_LOOP_TOOLS.has(toolName)
+    ? MAX_CONSECUTIVE_STRICT
+    : MAX_CONSECUTIVE_IDENTICAL_CALLS;
+
+  if (consecutiveCount > threshold) {
    return {
      block: true,
      reason:
@ -75,6 +85,7 @@ export function checkToolCallLoop(
 export function resetToolCallLoopGuard(): void {
  consecutiveCount = 0;
  lastSignature = "";
+  lastToolName = "";
  enabled = true;
 }

@ -83,6 +94,7 @@ export function disableToolCallLoopGuard(): void {
  enabled = false;
  consecutiveCount = 0;
  lastSignature = "";
+  lastToolName = "";
 }

 /** Get current consecutive count for diagnostics. */
--- a/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md
+++ b/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md
@ -30,7 +30,7 @@ Ask **1–3 questions per round**. Keep each question focused on one of:
 - **The biggest technical unknowns / risks** — what could fail, what hasn't been proven
 - **What external systems/services this touches** — APIs, databases, third-party services

-**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for each round. 1–3 questions per call, each as a separate question object. Keep option labels short (3–5 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions.
+**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for each round. 1–3 questions per call, each as a separate question object. Keep option labels short (3–5 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions. **IMPORTANT: Call `ask_user_questions` exactly once per turn. Never make multiple calls with the same or overlapping questions — wait for the user's response before asking the next round.**

 **If `{{structuredQuestionsAvailable}}` is `false`:** ask questions in plain text. Keep each round to 1–3 focused questions. Wait for answers before asking the next round.

--- a/src/resources/extensions/gsd/prompts/guided-discuss-slice.md
+++ b/src/resources/extensions/gsd/prompts/guided-discuss-slice.md
@ -22,7 +22,7 @@ Do **not** go deep — just enough that your questions reflect what's actually t

 ### Question rounds

-Ask **1–3 questions per round** using `ask_user_questions`. Keep each question focused on one of:
+Ask **1–3 questions per round** using `ask_user_questions`. **Call `ask_user_questions` exactly once per turn — never make multiple calls with the same or overlapping questions. Wait for the user's response before asking the next round.** Keep each question focused on one of:
 - **UX and user-facing behaviour** — what does the user see, click, trigger, or experience?
 - **Edge cases and failure states** — what happens when things go wrong or are in unusual states?
 - **Scope boundaries** — what is explicitly in vs out for this slice? What deferred to later?
--- a/src/resources/extensions/gsd/tests/ask-user-questions-dedup.test.ts
+++ b/src/resources/extensions/gsd/tests/ask-user-questions-dedup.test.ts
@ -0,0 +1,71 @@
+// ask-user-questions-dedup — Regression tests for per-turn deduplication
+//
+// Verifies that duplicate ask_user_questions calls within a single turn
+// return cached results instead of re-dispatching (especially to remote
+// channels like Discord). Also verifies the strict loop guard threshold
+// for interactive tools.
+//
+// Regression: duplicate questions were sent to Discord when the LLM called
+// ask_user_questions multiple times with the same question set in one turn,
+// causing user confusion and tool failure cascading to plain text fallback.
+
+import { describe, test, beforeEach } from "node:test";
+import assert from "node:assert/strict";
+import {
+  checkToolCallLoop,
+  resetToolCallLoopGuard,
+} from "../bootstrap/tool-call-loop-guard.ts";
+import {
+  resetAskUserQuestionsCache,
+} from "../../ask-user-questions.ts";
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Strict loop guard: ask_user_questions blocks on 2nd identical call
+// ═══════════════════════════════════════════════════════════════════════════
+
+describe("ask_user_questions dedup", () => {
+  beforeEach(() => {
+    resetToolCallLoopGuard();
+    resetAskUserQuestionsCache();
+  });
+
+  test("loop guard blocks 2nd identical ask_user_questions call", () => {
+    const args = { questions: [{ id: "app_coverage", question: "Which apps?" }] };
+
+    const first = checkToolCallLoop("ask_user_questions", args);
+    assert.equal(first.block, false, "First call should be allowed");
+
+    const second = checkToolCallLoop("ask_user_questions", args);
+    assert.equal(second.block, true, "2nd identical call should be blocked");
+    assert.ok(second.reason!.includes("ask_user_questions"), "Reason should name the tool");
+  });
+
+  test("loop guard allows different ask_user_questions calls", () => {
+    const args1 = { questions: [{ id: "app_coverage", question: "Which apps?" }] };
+    const args2 = { questions: [{ id: "testing_focus", question: "What priority?" }] };
+
+    const first = checkToolCallLoop("ask_user_questions", args1);
+    assert.equal(first.block, false, "First call allowed");
+
+    const second = checkToolCallLoop("ask_user_questions", args2);
+    assert.equal(second.block, false, "Different question set should be allowed");
+  });
+
+  test("non-interactive tools still use normal threshold of 4", () => {
+    const args = { query: "same query" };
+
+    for (let i = 1; i <= 4; i++) {
+      const result = checkToolCallLoop("web_search", args);
+      assert.equal(result.block, false, `web_search call ${i} should be allowed`);
+    }
+
+    const fifth = checkToolCallLoop("web_search", args);
+    assert.equal(fifth.block, true, "5th identical web_search should be blocked");
+  });
+
+  test("cache resets independently from loop guard", () => {
+    // Verify the reset function exists and is callable
+    resetAskUserQuestionsCache();
+    // No error means the cache module is properly exported and functional
+  });
+});
--- a/src/resources/extensions/gsd/tests/tool-call-loop-guard.test.ts
+++ b/src/resources/extensions/gsd/tests/tool-call-loop-guard.test.ts
@ -136,17 +136,30 @@ console.log('\n── Loop guard: nested args are not stripped ──');
    assert.deepStrictEqual(getToolCallLoopCount(), 1, `Each unique nested call should reset count to 1`);
  }

-  // Truly identical nested calls should still be detected
+  // Truly identical nested calls should still be detected.
+  // ask_user_questions has a strict threshold of 1, so the 2nd identical call is blocked.
  resetToolCallLoopGuard();
-  for (let i = 1; i <= 4; i++) {
-    checkToolCallLoop('ask_user_questions', {
-      questions: [{ id: 'same', question: 'Same?' }],
-    });
-  }
+  const first = checkToolCallLoop('ask_user_questions', {
+    questions: [{ id: 'same', question: 'Same?' }],
+  });
+  assert.ok(first.block === false, 'First ask_user_questions call should be allowed');
  const blocked = checkToolCallLoop('ask_user_questions', {
    questions: [{ id: 'same', question: 'Same?' }],
  });
-  assert.ok(blocked.block === true, 'Identical nested calls should still be blocked');
+  assert.ok(blocked.block === true, '2nd identical ask_user_questions call should be blocked (strict threshold)');
+
+  // Non-strict tools still allow up to 4 identical calls
+  resetToolCallLoopGuard();
+  for (let i = 1; i <= 4; i++) {
+    const r = checkToolCallLoop('web_search', {
+      questions: [{ id: 'same', question: 'Same?' }],
+    });
+    assert.ok(r.block === false, `web_search call ${i} should be allowed (normal threshold)`);
+  }
+  const blockedNormal = checkToolCallLoop('web_search', {
+    questions: [{ id: 'same', question: 'Same?' }],
+  });
+  assert.ok(blockedNormal.block === true, '5th identical web_search call should be blocked');
 }

 // ═══════════════════════════════════════════════════════════════════════════