fix(gsd): prevent duplicate ask_user_questions dispatches with per-turn dedup cache
Closes #3513 - Add per-turn dedup cache in ask_user_questions keyed by hashed question IDs; duplicate calls return cached results instead of re-dispatching to Discord - Lower loop guard threshold for interactive tools (ask_user_questions) from 4 to 1 — blocks 2nd identical call immediately - Wire cache reset into session_start, session_switch, and agent_end hooks - Harden discuss prompts with "call exactly once per turn" instruction - Add regression tests covering strict threshold and dedup behavior
This commit is contained in:
parent
099e6f3120
commit
f0759086e4
7 changed files with 159 additions and 14 deletions
|
|
@ -72,6 +72,30 @@ const AskUserQuestionsParams = Type.Object({
|
|||
}),
|
||||
});
|
||||
|
||||
// ─── Per-turn deduplication ──────────────────────────────────────────────────
|
||||
// Prevents duplicate question dispatches (especially to remote channels like
|
||||
// Discord) when the LLM calls ask_user_questions multiple times with the same
|
||||
// questions in a single turn. Keyed by sorted question IDs.
|
||||
|
||||
import { createHash } from "node:crypto";
|
||||
|
||||
interface CachedResult {
|
||||
content: { type: "text"; text: string }[];
|
||||
details: AskUserQuestionsDetails;
|
||||
}
|
||||
|
||||
const turnCache = new Map<string, CachedResult>();
|
||||
|
||||
function questionSignature(questions: Array<{ id: string }>): string {
|
||||
const ids = questions.map((q) => q.id).sort().join("|");
|
||||
return createHash("sha256").update(ids).digest("hex").slice(0, 16);
|
||||
}
|
||||
|
||||
/** Reset the dedup cache. Called on session boundaries. */
|
||||
export function resetAskUserQuestionsCache(): void {
|
||||
turnCache.clear();
|
||||
}
|
||||
|
||||
// ─── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
const OTHER_OPTION_LABEL = "None of the above";
|
||||
|
|
@ -121,6 +145,16 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
|
|||
parameters: AskUserQuestionsParams,
|
||||
|
||||
async execute(_toolCallId, params, signal, _onUpdate, ctx) {
|
||||
// ── Per-turn dedup: return cached result for identical question sets ──
|
||||
const sig = questionSignature(params.questions);
|
||||
const cached = turnCache.get(sig);
|
||||
if (cached) {
|
||||
return {
|
||||
content: [{ type: "text" as const, text: cached.content[0].text + "\n(Returned cached answer — this question set was already asked this turn.)" }],
|
||||
details: cached.details,
|
||||
};
|
||||
}
|
||||
|
||||
// Validation
|
||||
if (params.questions.length === 0 || params.questions.length > 3) {
|
||||
return errorResult("Error: questions must contain 1-3 items", params.questions);
|
||||
|
|
@ -140,7 +174,14 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
|
|||
// this is a no-op when the user has not set up Slack/Discord/Telegram.
|
||||
const { tryRemoteQuestions } = await import("./remote-questions/manager.js");
|
||||
const remoteResult = await tryRemoteQuestions(params.questions, signal);
|
||||
if (remoteResult) return { ...remoteResult, details: remoteResult.details as unknown };
|
||||
if (remoteResult) {
|
||||
// Cache successful remote results to prevent duplicate Discord dispatches
|
||||
const remoteDetails = remoteResult.details as Record<string, unknown> | undefined;
|
||||
if (remoteDetails && !remoteDetails.timed_out && !remoteDetails.error) {
|
||||
turnCache.set(sig, remoteResult as unknown as CachedResult);
|
||||
}
|
||||
return { ...remoteResult, details: remoteResult.details as unknown };
|
||||
}
|
||||
|
||||
if (!ctx.hasUI) {
|
||||
return errorResult("Error: UI not available (non-interactive mode)", params.questions);
|
||||
|
|
@ -197,7 +238,7 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
|
|||
]),
|
||||
),
|
||||
};
|
||||
return {
|
||||
const fallbackResult = {
|
||||
content: [{ type: "text" as const, text: JSON.stringify({ answers }) }],
|
||||
details: {
|
||||
questions: params.questions,
|
||||
|
|
@ -205,6 +246,8 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
|
|||
cancelled: false,
|
||||
} satisfies LocalResultDetails,
|
||||
};
|
||||
turnCache.set(sig, fallbackResult);
|
||||
return fallbackResult;
|
||||
}
|
||||
|
||||
// Check if cancelled (empty answers = user exited)
|
||||
|
|
@ -216,10 +259,12 @@ export default function AskUserQuestions(pi: ExtensionAPI) {
|
|||
};
|
||||
}
|
||||
|
||||
return {
|
||||
content: [{ type: "text", text: formatForLLM(result) }],
|
||||
const successResult = {
|
||||
content: [{ type: "text" as const, text: formatForLLM(result) }],
|
||||
details: { questions: params.questions, response: result, cancelled: false } satisfies LocalResultDetails,
|
||||
};
|
||||
turnCache.set(sig, successResult);
|
||||
return successResult;
|
||||
},
|
||||
|
||||
// ─── Rendering ────────────────────────────────────────────────────────
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ import { getAutoDashboardData, isAutoActive, isAutoPaused, markToolEnd, markTool
|
|||
import { isParallelActive, shutdownParallel } from "../parallel-orchestrator.js";
|
||||
import { checkToolCallLoop, resetToolCallLoopGuard } from "./tool-call-loop-guard.js";
|
||||
import { saveActivityLog } from "../activity-log.js";
|
||||
import { resetAskUserQuestionsCache } from "../../ask-user-questions.js";
|
||||
|
||||
// Skip the welcome screen on the very first session_start — cli.ts already
|
||||
// printed it before the TUI launched. Only re-print on /clear (subsequent sessions).
|
||||
|
|
@ -31,6 +32,7 @@ export function registerHooks(pi: ExtensionAPI): void {
|
|||
pi.on("session_start", async (_event, ctx) => {
|
||||
resetWriteGateState();
|
||||
resetToolCallLoopGuard();
|
||||
resetAskUserQuestionsCache();
|
||||
await syncServiceTierStatus(ctx);
|
||||
|
||||
// Apply show_token_cost preference (#1515)
|
||||
|
|
@ -67,6 +69,7 @@ export function registerHooks(pi: ExtensionAPI): void {
|
|||
pi.on("session_switch", async (_event, ctx) => {
|
||||
resetWriteGateState();
|
||||
resetToolCallLoopGuard();
|
||||
resetAskUserQuestionsCache();
|
||||
clearDiscussionFlowState();
|
||||
await syncServiceTierStatus(ctx);
|
||||
loadToolApiKeys();
|
||||
|
|
@ -78,6 +81,7 @@ export function registerHooks(pi: ExtensionAPI): void {
|
|||
|
||||
pi.on("agent_end", async (event, ctx: ExtensionContext) => {
|
||||
resetToolCallLoopGuard();
|
||||
resetAskUserQuestionsCache();
|
||||
await handleAgentEnd(pi, event, ctx);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -16,8 +16,13 @@ import { createHash } from "node:crypto";
|
|||
|
||||
const MAX_CONSECUTIVE_IDENTICAL_CALLS = 4;
|
||||
|
||||
/** Interactive/user-facing tools where even 1 duplicate is confusing. */
|
||||
const STRICT_LOOP_TOOLS = new Set(["ask_user_questions"]);
|
||||
const MAX_CONSECUTIVE_STRICT = 1;
|
||||
|
||||
let consecutiveCount = 0;
|
||||
let lastSignature = "";
|
||||
let lastToolName = "";
|
||||
let enabled = true;
|
||||
|
||||
/** Hash tool name + args into a compact signature for comparison. */
|
||||
|
|
@ -55,9 +60,14 @@ export function checkToolCallLoop(
|
|||
} else {
|
||||
consecutiveCount = 1;
|
||||
lastSignature = sig;
|
||||
lastToolName = toolName;
|
||||
}
|
||||
|
||||
if (consecutiveCount > MAX_CONSECUTIVE_IDENTICAL_CALLS) {
|
||||
const threshold = STRICT_LOOP_TOOLS.has(toolName)
|
||||
? MAX_CONSECUTIVE_STRICT
|
||||
: MAX_CONSECUTIVE_IDENTICAL_CALLS;
|
||||
|
||||
if (consecutiveCount > threshold) {
|
||||
return {
|
||||
block: true,
|
||||
reason:
|
||||
|
|
@ -75,6 +85,7 @@ export function checkToolCallLoop(
|
|||
export function resetToolCallLoopGuard(): void {
|
||||
consecutiveCount = 0;
|
||||
lastSignature = "";
|
||||
lastToolName = "";
|
||||
enabled = true;
|
||||
}
|
||||
|
||||
|
|
@ -83,6 +94,7 @@ export function disableToolCallLoopGuard(): void {
|
|||
enabled = false;
|
||||
consecutiveCount = 0;
|
||||
lastSignature = "";
|
||||
lastToolName = "";
|
||||
}
|
||||
|
||||
/** Get current consecutive count for diagnostics. */
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ Ask **1–3 questions per round**. Keep each question focused on one of:
|
|||
- **The biggest technical unknowns / risks** — what could fail, what hasn't been proven
|
||||
- **What external systems/services this touches** — APIs, databases, third-party services
|
||||
|
||||
**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for each round. 1–3 questions per call, each as a separate question object. Keep option labels short (3–5 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions.
|
||||
**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for each round. 1–3 questions per call, each as a separate question object. Keep option labels short (3–5 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions. **IMPORTANT: Call `ask_user_questions` exactly once per turn. Never make multiple calls with the same or overlapping questions — wait for the user's response before asking the next round.**
|
||||
|
||||
**If `{{structuredQuestionsAvailable}}` is `false`:** ask questions in plain text. Keep each round to 1–3 focused questions. Wait for answers before asking the next round.
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ Do **not** go deep — just enough that your questions reflect what's actually t
|
|||
|
||||
### Question rounds
|
||||
|
||||
Ask **1–3 questions per round** using `ask_user_questions`. Keep each question focused on one of:
|
||||
Ask **1–3 questions per round** using `ask_user_questions`. **Call `ask_user_questions` exactly once per turn — never make multiple calls with the same or overlapping questions. Wait for the user's response before asking the next round.** Keep each question focused on one of:
|
||||
- **UX and user-facing behaviour** — what does the user see, click, trigger, or experience?
|
||||
- **Edge cases and failure states** — what happens when things go wrong or are in unusual states?
|
||||
- **Scope boundaries** — what is explicitly in vs out for this slice? What deferred to later?
|
||||
|
|
|
|||
|
|
@ -0,0 +1,71 @@
|
|||
// ask-user-questions-dedup — Regression tests for per-turn deduplication
|
||||
//
|
||||
// Verifies that duplicate ask_user_questions calls within a single turn
|
||||
// return cached results instead of re-dispatching (especially to remote
|
||||
// channels like Discord). Also verifies the strict loop guard threshold
|
||||
// for interactive tools.
|
||||
//
|
||||
// Regression: duplicate questions were sent to Discord when the LLM called
|
||||
// ask_user_questions multiple times with the same question set in one turn,
|
||||
// causing user confusion and tool failure cascading to plain text fallback.
|
||||
|
||||
import { describe, test, beforeEach } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import {
|
||||
checkToolCallLoop,
|
||||
resetToolCallLoopGuard,
|
||||
} from "../bootstrap/tool-call-loop-guard.ts";
|
||||
import {
|
||||
resetAskUserQuestionsCache,
|
||||
} from "../../ask-user-questions.ts";
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
// Strict loop guard: ask_user_questions blocks on 2nd identical call
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
||||
describe("ask_user_questions dedup", () => {
|
||||
beforeEach(() => {
|
||||
resetToolCallLoopGuard();
|
||||
resetAskUserQuestionsCache();
|
||||
});
|
||||
|
||||
test("loop guard blocks 2nd identical ask_user_questions call", () => {
|
||||
const args = { questions: [{ id: "app_coverage", question: "Which apps?" }] };
|
||||
|
||||
const first = checkToolCallLoop("ask_user_questions", args);
|
||||
assert.equal(first.block, false, "First call should be allowed");
|
||||
|
||||
const second = checkToolCallLoop("ask_user_questions", args);
|
||||
assert.equal(second.block, true, "2nd identical call should be blocked");
|
||||
assert.ok(second.reason!.includes("ask_user_questions"), "Reason should name the tool");
|
||||
});
|
||||
|
||||
test("loop guard allows different ask_user_questions calls", () => {
|
||||
const args1 = { questions: [{ id: "app_coverage", question: "Which apps?" }] };
|
||||
const args2 = { questions: [{ id: "testing_focus", question: "What priority?" }] };
|
||||
|
||||
const first = checkToolCallLoop("ask_user_questions", args1);
|
||||
assert.equal(first.block, false, "First call allowed");
|
||||
|
||||
const second = checkToolCallLoop("ask_user_questions", args2);
|
||||
assert.equal(second.block, false, "Different question set should be allowed");
|
||||
});
|
||||
|
||||
test("non-interactive tools still use normal threshold of 4", () => {
|
||||
const args = { query: "same query" };
|
||||
|
||||
for (let i = 1; i <= 4; i++) {
|
||||
const result = checkToolCallLoop("web_search", args);
|
||||
assert.equal(result.block, false, `web_search call ${i} should be allowed`);
|
||||
}
|
||||
|
||||
const fifth = checkToolCallLoop("web_search", args);
|
||||
assert.equal(fifth.block, true, "5th identical web_search should be blocked");
|
||||
});
|
||||
|
||||
test("cache resets independently from loop guard", () => {
|
||||
// Verify the reset function exists and is callable
|
||||
resetAskUserQuestionsCache();
|
||||
// No error means the cache module is properly exported and functional
|
||||
});
|
||||
});
|
||||
|
|
@ -136,17 +136,30 @@ console.log('\n── Loop guard: nested args are not stripped ──');
|
|||
assert.deepStrictEqual(getToolCallLoopCount(), 1, `Each unique nested call should reset count to 1`);
|
||||
}
|
||||
|
||||
// Truly identical nested calls should still be detected
|
||||
// Truly identical nested calls should still be detected.
|
||||
// ask_user_questions has a strict threshold of 1, so the 2nd identical call is blocked.
|
||||
resetToolCallLoopGuard();
|
||||
for (let i = 1; i <= 4; i++) {
|
||||
checkToolCallLoop('ask_user_questions', {
|
||||
questions: [{ id: 'same', question: 'Same?' }],
|
||||
});
|
||||
}
|
||||
const first = checkToolCallLoop('ask_user_questions', {
|
||||
questions: [{ id: 'same', question: 'Same?' }],
|
||||
});
|
||||
assert.ok(first.block === false, 'First ask_user_questions call should be allowed');
|
||||
const blocked = checkToolCallLoop('ask_user_questions', {
|
||||
questions: [{ id: 'same', question: 'Same?' }],
|
||||
});
|
||||
assert.ok(blocked.block === true, 'Identical nested calls should still be blocked');
|
||||
assert.ok(blocked.block === true, '2nd identical ask_user_questions call should be blocked (strict threshold)');
|
||||
|
||||
// Non-strict tools still allow up to 4 identical calls
|
||||
resetToolCallLoopGuard();
|
||||
for (let i = 1; i <= 4; i++) {
|
||||
const r = checkToolCallLoop('web_search', {
|
||||
questions: [{ id: 'same', question: 'Same?' }],
|
||||
});
|
||||
assert.ok(r.block === false, `web_search call ${i} should be allowed (normal threshold)`);
|
||||
}
|
||||
const blockedNormal = checkToolCallLoop('web_search', {
|
||||
questions: [{ id: 'same', question: 'Same?' }],
|
||||
});
|
||||
assert.ok(blockedNormal.block === true, '5th identical web_search call should be blocked');
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════════════
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue