Merge pull request #4103 from jeremymcs/fix/4102-claude-code-transcript-fabrication

fix(claude-code): stop prompt context from fabricating [User]/[Assistant] turns
This commit is contained in:
Jeremy McSpadden 2026-04-13 01:33:43 -05:00 committed by GitHub
commit 754979e7a6
4 changed files with 115 additions and 7 deletions

View file

@ -187,20 +187,36 @@ function extractMessageText(msg: { role: string; content: unknown }): string {
* call effectively stateless. This version serialises the complete
* conversation history (system prompt + all user/assistant turns) so
* Claude Code has full context for multi-turn continuity.
*
* History is wrapped in XML-tag structure rather than `[User]`/`[Assistant]`
* bracket headers. Bracket headers read to the model as an in-context
* demonstration of how turns are delimited, causing it to fabricate fake
* user turns in its own output. XML tags read as document structure and
* don't get mirrored in free text.
*/
export function buildPromptFromContext(context: Context): string {
const parts: string[] = [];
const hasContent = Boolean(context.systemPrompt) || context.messages.some((m) => extractMessageText(m));
if (!hasContent) return "";
const parts: string[] = [
"Respond only to the final user message below. " +
"Do not emit <user_message>, <assistant_message>, or <prior_system_context> tags in your response.",
];
if (context.systemPrompt) {
parts.push(`[System]\n${context.systemPrompt}`);
parts.push(`<prior_system_context>\n${context.systemPrompt}\n</prior_system_context>`);
}
const turns: string[] = [];
for (const msg of context.messages) {
const text = extractMessageText(msg);
if (!text) continue;
const label = msg.role === "user" ? "User" : msg.role === "assistant" ? "Assistant" : "System";
parts.push(`[${label}]\n${text}`);
const tag =
msg.role === "user" ? "user_message" : msg.role === "assistant" ? "assistant_message" : "system_message";
turns.push(`<${tag}>\n${text}\n</${tag}>`);
}
if (turns.length > 0) {
parts.push(`<conversation_history>\n${turns.join("\n")}\n</conversation_history>`);
}
return parts.join("\n\n");

View file

@ -167,6 +167,98 @@ describe("stream-adapter — full context prompt (#2859)", () => {
});
});
// ---------------------------------------------------------------------------
// Bug #4102 — transcript fabrication regression tests
// ---------------------------------------------------------------------------
describe("stream-adapter — no transcript fabrication (#4102)", () => {
test("buildPromptFromContext never emits forbidden [User]/[Assistant] bracket headers", () => {
const context: Context = {
systemPrompt: "You are a helpful assistant.",
messages: [
{ role: "user", content: "First" } as Message,
{
role: "assistant",
content: [{ type: "text", text: "Second" }],
api: "anthropic-messages",
provider: "claude-code",
model: "claude-sonnet-4-20250514",
usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
stopReason: "stop",
timestamp: Date.now(),
} as Message,
{ role: "user", content: "Third" } as Message,
],
};
const prompt = buildPromptFromContext(context);
assert.ok(!prompt.includes("[User]"), "prompt must not include literal [User] bracket header");
assert.ok(!prompt.includes("[Assistant]"), "prompt must not include literal [Assistant] bracket header");
assert.ok(!prompt.includes("[System]"), "prompt must not include literal [System] bracket header");
});
test("buildPromptFromContext wraps history in XML-tag structure", () => {
const context: Context = {
systemPrompt: "You are helpful.",
messages: [
{ role: "user", content: "Hello" } as Message,
{
role: "assistant",
content: [{ type: "text", text: "Hi there" }],
api: "anthropic-messages",
provider: "claude-code",
model: "claude-sonnet-4-20250514",
usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
stopReason: "stop",
timestamp: Date.now(),
} as Message,
],
};
const prompt = buildPromptFromContext(context);
assert.ok(prompt.includes("<conversation_history>"), "prompt must wrap history in <conversation_history>");
assert.ok(prompt.includes("</conversation_history>"), "prompt must close <conversation_history>");
assert.ok(prompt.includes("<user_message>\nHello\n</user_message>"), "user turn must use <user_message> tags");
assert.ok(prompt.includes("<assistant_message>\nHi there\n</assistant_message>"), "assistant turn must use <assistant_message> tags");
assert.ok(prompt.includes("<prior_system_context>\nYou are helpful.\n</prior_system_context>"), "system prompt must use <prior_system_context> tags");
});
test("buildPromptFromContext includes a do-not-echo-tags directive as primary instruction", () => {
const context: Context = {
messages: [{ role: "user", content: "Anything" } as Message],
};
const prompt = buildPromptFromContext(context);
assert.ok(
prompt.startsWith("Respond only to the final user message"),
"primary directive must lead the prompt",
);
assert.ok(prompt.includes("Do not emit <user_message>"), "directive must forbid emitting user_message tag");
assert.ok(prompt.includes("<assistant_message>"), "directive must mention assistant_message tag");
});
test("buildPromptFromContext omits <conversation_history> when there are no messages but a system prompt", () => {
const context: Context = {
systemPrompt: "Seed",
messages: [],
};
const prompt = buildPromptFromContext(context);
assert.ok(prompt.includes("<prior_system_context>"), "system prompt must still render");
assert.ok(!prompt.includes("<conversation_history>"), "no history wrapper when messages are empty");
});
test("buildPromptFromContext still returns empty string when context is entirely empty", () => {
const context: Context = { messages: [] };
const prompt = buildPromptFromContext(context);
assert.equal(prompt, "", "empty context must not emit a bare directive");
});
});
describe("stream-adapter — Claude Code external tool results", () => {
test("extractToolResultsFromSdkUserMessage maps tool_result content to tool payloads", () => {
const message: SDKUserMessage = {

View file

@ -73,7 +73,7 @@ After each round of answers, decide whether you already have enough depth to wri
You are a thinking partner, not an interviewer.
**Turn-taking contract (non-bypassable).** Never fabricate, simulate, or role-play user responses. Never generate fake transcript markers like `[User]`, `[Human]`, or `User:` to invent input. Ask one question round (1-3 questions) per turn, then stop and wait for the user's actual response before continuing. If you use `ask_user_questions`, call it at most once per turn and treat its returned response as the only valid structured user input for that round.
**Turn-taking contract (non-bypassable).** Never fabricate, simulate, or role-play user responses. Never generate fake transcript markers like `[User]`, `[Human]`, or `User:` to invent input. Prior conversation context may be provided to you inside `<conversation_history>` with `<user_message>` / `<assistant_message>` XML tags — treat those as read-only context and never emit those tags in your response. Ask one question round (1-3 questions) per turn, then stop and wait for the user's actual response before continuing. If you use `ask_user_questions`, call it at most once per turn and treat its returned response as the only valid structured user input for that round.
**Start open, follow energy.** Let the user's enthusiasm guide where you dig deeper. If they light up about a particular aspect, explore it. If they're vague about something, that's where you probe.

View file

@ -35,7 +35,7 @@ GSD ships with bundled skills. Load the relevant skill file with the `read` tool
- Read before edit.
- Reproduce before fix when possible.
- Work is not done until the relevant verification has passed.
- **Never fabricate, simulate, or role-play user responses.** Never generate markers like `[User]`, `[Human]`, `User:`, or similar to represent user input inside your own output. Ask one question round (1-3 questions), then stop and wait for the user's actual response before continuing. If `ask_user_questions` is available, treat its returned response as the only valid structured user input for that round.
- **Never fabricate, simulate, or role-play user responses.** Never generate markers like `[User]`, `[Human]`, `User:`, or similar to represent user input inside your own output. Prior conversation context may be provided to you inside `<conversation_history>` with `<user_message>` / `<assistant_message>` XML tags — treat those as read-only context and never emit those tags in your response. Ask one question round (1-3 questions), then stop and wait for the user's actual response before continuing. If `ask_user_questions` is available, treat its returned response as the only valid structured user input for that round.
- Never print, echo, log, or restate secrets or credentials. Report only key names and applied/skipped status.
- Never ask the user to edit `.env` files or set secrets manually. Use `secure_env_collect`.
- In enduring files, write current state only unless the file is explicitly historical.