fix(gsd): prevent duplicate ask_user_questions dispatches with per-turn dedup cache

Closes #3513

- Add per-turn dedup cache in ask_user_questions keyed by hashed question IDs;
  duplicate calls return cached results instead of re-dispatching to Discord
- Lower loop guard threshold for interactive tools (ask_user_questions) from 4
  to 1 — blocks 2nd identical call immediately
- Wire cache reset into session_start, session_switch, and agent_end hooks
- Harden discuss prompts with "call exactly once per turn" instruction
- Add regression tests covering strict threshold and dedup behavior
This commit is contained in:
Jeremy 2026-04-04 16:33:36 -05:00
parent 099e6f3120
commit f0759086e4
7 changed files with 159 additions and 14 deletions

View file

@ -72,6 +72,30 @@ const AskUserQuestionsParams = Type.Object({
}),
});
// ─── Per-turn deduplication ──────────────────────────────────────────────────
// Prevents duplicate question dispatches (especially to remote channels like
// Discord) when the LLM calls ask_user_questions multiple times with the same
// questions in a single turn. Keyed by sorted question IDs.
import { createHash } from "node:crypto";
interface CachedResult {
content: { type: "text"; text: string }[];
details: AskUserQuestionsDetails;
}
const turnCache = new Map<string, CachedResult>();
function questionSignature(questions: Array<{ id: string }>): string {
const ids = questions.map((q) => q.id).sort().join("|");
return createHash("sha256").update(ids).digest("hex").slice(0, 16);
}
/** Reset the dedup cache. Called on session boundaries. */
export function resetAskUserQuestionsCache(): void {
turnCache.clear();
}
// ─── Helpers ──────────────────────────────────────────────────────────────────
const OTHER_OPTION_LABEL = "None of the above";
@ -121,6 +145,16 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
parameters: AskUserQuestionsParams,
async execute(_toolCallId, params, signal, _onUpdate, ctx) {
// ── Per-turn dedup: return cached result for identical question sets ──
const sig = questionSignature(params.questions);
const cached = turnCache.get(sig);
if (cached) {
return {
content: [{ type: "text" as const, text: cached.content[0].text + "\n(Returned cached answer — this question set was already asked this turn.)" }],
details: cached.details,
};
}
// Validation
if (params.questions.length === 0 || params.questions.length > 3) {
return errorResult("Error: questions must contain 1-3 items", params.questions);
@ -140,7 +174,14 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
// this is a no-op when the user has not set up Slack/Discord/Telegram.
const { tryRemoteQuestions } = await import("./remote-questions/manager.js");
const remoteResult = await tryRemoteQuestions(params.questions, signal);
if (remoteResult) return { ...remoteResult, details: remoteResult.details as unknown };
if (remoteResult) {
// Cache successful remote results to prevent duplicate Discord dispatches
const remoteDetails = remoteResult.details as Record<string, unknown> | undefined;
if (remoteDetails && !remoteDetails.timed_out && !remoteDetails.error) {
turnCache.set(sig, remoteResult as unknown as CachedResult);
}
return { ...remoteResult, details: remoteResult.details as unknown };
}
if (!ctx.hasUI) {
return errorResult("Error: UI not available (non-interactive mode)", params.questions);
@ -197,7 +238,7 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
]),
),
};
return {
const fallbackResult = {
content: [{ type: "text" as const, text: JSON.stringify({ answers }) }],
details: {
questions: params.questions,
@ -205,6 +246,8 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
cancelled: false,
} satisfies LocalResultDetails,
};
turnCache.set(sig, fallbackResult);
return fallbackResult;
}
// Check if cancelled (empty answers = user exited)
@ -216,10 +259,12 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
};
}
return {
content: [{ type: "text", text: formatForLLM(result) }],
const successResult = {
content: [{ type: "text" as const, text: formatForLLM(result) }],
details: { questions: params.questions, response: result, cancelled: false } satisfies LocalResultDetails,
};
turnCache.set(sig, successResult);
return successResult;
},
// ─── Rendering ────────────────────────────────────────────────────────

View file

@ -17,6 +17,7 @@ import { getAutoDashboardData, isAutoActive, isAutoPaused, markToolEnd, markTool
import { isParallelActive, shutdownParallel } from "../parallel-orchestrator.js";
import { checkToolCallLoop, resetToolCallLoopGuard } from "./tool-call-loop-guard.js";
import { saveActivityLog } from "../activity-log.js";
import { resetAskUserQuestionsCache } from "../../ask-user-questions.js";
// Skip the welcome screen on the very first session_start — cli.ts already
// printed it before the TUI launched. Only re-print on /clear (subsequent sessions).
@ -31,6 +32,7 @@ export function registerHooks(pi: ExtensionAPI): void {
pi.on("session_start", async (_event, ctx) => {
resetWriteGateState();
resetToolCallLoopGuard();
resetAskUserQuestionsCache();
await syncServiceTierStatus(ctx);
// Apply show_token_cost preference (#1515)
@ -67,6 +69,7 @@ export function registerHooks(pi: ExtensionAPI): void {
pi.on("session_switch", async (_event, ctx) => {
resetWriteGateState();
resetToolCallLoopGuard();
resetAskUserQuestionsCache();
clearDiscussionFlowState();
await syncServiceTierStatus(ctx);
loadToolApiKeys();
@ -78,6 +81,7 @@ export function registerHooks(pi: ExtensionAPI): void {
pi.on("agent_end", async (event, ctx: ExtensionContext) => {
resetToolCallLoopGuard();
resetAskUserQuestionsCache();
await handleAgentEnd(pi, event, ctx);
});

View file

@ -16,8 +16,13 @@ import { createHash } from "node:crypto";
const MAX_CONSECUTIVE_IDENTICAL_CALLS = 4;
/** Interactive/user-facing tools where even 1 duplicate is confusing. */
const STRICT_LOOP_TOOLS = new Set(["ask_user_questions"]);
const MAX_CONSECUTIVE_STRICT = 1;
let consecutiveCount = 0;
let lastSignature = "";
let lastToolName = "";
let enabled = true;
/** Hash tool name + args into a compact signature for comparison. */
@ -55,9 +60,14 @@ export function checkToolCallLoop(
} else {
consecutiveCount = 1;
lastSignature = sig;
lastToolName = toolName;
}
if (consecutiveCount > MAX_CONSECUTIVE_IDENTICAL_CALLS) {
const threshold = STRICT_LOOP_TOOLS.has(toolName)
? MAX_CONSECUTIVE_STRICT
: MAX_CONSECUTIVE_IDENTICAL_CALLS;
if (consecutiveCount > threshold) {
return {
block: true,
reason:
@ -75,6 +85,7 @@ export function checkToolCallLoop(
export function resetToolCallLoopGuard(): void {
consecutiveCount = 0;
lastSignature = "";
lastToolName = "";
enabled = true;
}
@ -83,6 +94,7 @@ export function disableToolCallLoopGuard(): void {
enabled = false;
consecutiveCount = 0;
lastSignature = "";
lastToolName = "";
}
/** Get current consecutive count for diagnostics. */

View file

@ -30,7 +30,7 @@ Ask **13 questions per round**. Keep each question focused on one of:
- **The biggest technical unknowns / risks** — what could fail, what hasn't been proven
- **What external systems/services this touches** — APIs, databases, third-party services
**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for each round. 13 questions per call, each as a separate question object. Keep option labels short (35 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions.
**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for each round. 13 questions per call, each as a separate question object. Keep option labels short (35 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions. **IMPORTANT: Call `ask_user_questions` exactly once per turn. Never make multiple calls with the same or overlapping questions — wait for the user's response before asking the next round.**
**If `{{structuredQuestionsAvailable}}` is `false`:** ask questions in plain text. Keep each round to 13 focused questions. Wait for answers before asking the next round.

View file

@ -22,7 +22,7 @@ Do **not** go deep — just enough that your questions reflect what's actually t
### Question rounds
Ask **13 questions per round** using `ask_user_questions`. Keep each question focused on one of:
Ask **13 questions per round** using `ask_user_questions`. **Call `ask_user_questions` exactly once per turn — never make multiple calls with the same or overlapping questions. Wait for the user's response before asking the next round.** Keep each question focused on one of:
- **UX and user-facing behaviour** — what does the user see, click, trigger, or experience?
- **Edge cases and failure states** — what happens when things go wrong or are in unusual states?
- **Scope boundaries** — what is explicitly in vs out for this slice? What deferred to later?

View file

@ -0,0 +1,71 @@
// ask-user-questions-dedup — Regression tests for per-turn deduplication
//
// Verifies that duplicate ask_user_questions calls within a single turn
// return cached results instead of re-dispatching (especially to remote
// channels like Discord). Also verifies the strict loop guard threshold
// for interactive tools.
//
// Regression: duplicate questions were sent to Discord when the LLM called
// ask_user_questions multiple times with the same question set in one turn,
// causing user confusion and tool failure cascading to plain text fallback.
import { describe, test, beforeEach } from "node:test";
import assert from "node:assert/strict";
import {
checkToolCallLoop,
resetToolCallLoopGuard,
} from "../bootstrap/tool-call-loop-guard.ts";
import {
resetAskUserQuestionsCache,
} from "../../ask-user-questions.ts";
// ═══════════════════════════════════════════════════════════════════════════
// Strict loop guard: ask_user_questions blocks on 2nd identical call
// ═══════════════════════════════════════════════════════════════════════════
describe("ask_user_questions dedup", () => {
beforeEach(() => {
resetToolCallLoopGuard();
resetAskUserQuestionsCache();
});
test("loop guard blocks 2nd identical ask_user_questions call", () => {
const args = { questions: [{ id: "app_coverage", question: "Which apps?" }] };
const first = checkToolCallLoop("ask_user_questions", args);
assert.equal(first.block, false, "First call should be allowed");
const second = checkToolCallLoop("ask_user_questions", args);
assert.equal(second.block, true, "2nd identical call should be blocked");
assert.ok(second.reason!.includes("ask_user_questions"), "Reason should name the tool");
});
test("loop guard allows different ask_user_questions calls", () => {
const args1 = { questions: [{ id: "app_coverage", question: "Which apps?" }] };
const args2 = { questions: [{ id: "testing_focus", question: "What priority?" }] };
const first = checkToolCallLoop("ask_user_questions", args1);
assert.equal(first.block, false, "First call allowed");
const second = checkToolCallLoop("ask_user_questions", args2);
assert.equal(second.block, false, "Different question set should be allowed");
});
test("non-interactive tools still use normal threshold of 4", () => {
const args = { query: "same query" };
for (let i = 1; i <= 4; i++) {
const result = checkToolCallLoop("web_search", args);
assert.equal(result.block, false, `web_search call ${i} should be allowed`);
}
const fifth = checkToolCallLoop("web_search", args);
assert.equal(fifth.block, true, "5th identical web_search should be blocked");
});
test("cache resets independently from loop guard", () => {
// Verify the reset function exists and is callable
resetAskUserQuestionsCache();
// No error means the cache module is properly exported and functional
});
});

View file

@ -136,17 +136,30 @@ console.log('\n── Loop guard: nested args are not stripped ──');
assert.deepStrictEqual(getToolCallLoopCount(), 1, `Each unique nested call should reset count to 1`);
}
// Truly identical nested calls should still be detected
// Truly identical nested calls should still be detected.
// ask_user_questions has a strict threshold of 1, so the 2nd identical call is blocked.
resetToolCallLoopGuard();
for (let i = 1; i <= 4; i++) {
checkToolCallLoop('ask_user_questions', {
questions: [{ id: 'same', question: 'Same?' }],
});
}
const first = checkToolCallLoop('ask_user_questions', {
questions: [{ id: 'same', question: 'Same?' }],
});
assert.ok(first.block === false, 'First ask_user_questions call should be allowed');
const blocked = checkToolCallLoop('ask_user_questions', {
questions: [{ id: 'same', question: 'Same?' }],
});
assert.ok(blocked.block === true, 'Identical nested calls should still be blocked');
assert.ok(blocked.block === true, '2nd identical ask_user_questions call should be blocked (strict threshold)');
// Non-strict tools still allow up to 4 identical calls
resetToolCallLoopGuard();
for (let i = 1; i <= 4; i++) {
const r = checkToolCallLoop('web_search', {
questions: [{ id: 'same', question: 'Same?' }],
});
assert.ok(r.block === false, `web_search call ${i} should be allowed (normal threshold)`);
}
const blockedNormal = checkToolCallLoop('web_search', {
questions: [{ id: 'same', question: 'Same?' }],
});
assert.ok(blockedNormal.block === true, '5th identical web_search call should be blocked');
}
// ═══════════════════════════════════════════════════════════════════════════