Merge pull request #4103 from jeremymcs/fix/4102-claude-code-transcript-fabrication

fix(claude-code): stop prompt context from fabricating [User]/[Assistant] turns
2026-04-13 01:33:43 -05:00 · 2026-04-13 01:33:43 -05:00 · 754979e7a6
commit 754979e7a6
parent 3a529f7a95 ad2211b218
4 changed files with 115 additions and 7 deletions
--- a/src/resources/extensions/claude-code-cli/stream-adapter.ts
+++ b/src/resources/extensions/claude-code-cli/stream-adapter.ts
@ -187,20 +187,36 @@ function extractMessageText(msg: { role: string; content: unknown }): string {
 * call effectively stateless. This version serialises the complete
 * conversation history (system prompt + all user/assistant turns) so
 * Claude Code has full context for multi-turn continuity.
+ *
+ * History is wrapped in XML-tag structure rather than `[User]`/`[Assistant]`
+ * bracket headers. Bracket headers read to the model as an in-context
+ * demonstration of how turns are delimited, causing it to fabricate fake
+ * user turns in its own output. XML tags read as document structure and
+ * don't get mirrored in free text.
 */
 export function buildPromptFromContext(context: Context): string {
-	const parts: string[] = [];
+	const hasContent = Boolean(context.systemPrompt) || context.messages.some((m) => extractMessageText(m));
+	if (!hasContent) return "";
+
+	const parts: string[] = [
+		"Respond only to the final user message below. " +
+			"Do not emit <user_message>, <assistant_message>, or <prior_system_context> tags in your response.",
+	];

 	if (context.systemPrompt) {
-		parts.push(`[System]\n${context.systemPrompt}`);
+		parts.push(`<prior_system_context>\n${context.systemPrompt}\n</prior_system_context>`);
 	}

+	const turns: string[] = [];
 	for (const msg of context.messages) {
 		const text = extractMessageText(msg);
 		if (!text) continue;
-
-		const label = msg.role === "user" ? "User" : msg.role === "assistant" ? "Assistant" : "System";
-		parts.push(`[${label}]\n${text}`);
+		const tag =
+			msg.role === "user" ? "user_message" : msg.role === "assistant" ? "assistant_message" : "system_message";
+		turns.push(`<${tag}>\n${text}\n</${tag}>`);
+	}
+	if (turns.length > 0) {
+		parts.push(`<conversation_history>\n${turns.join("\n")}\n</conversation_history>`);
 	}

 	return parts.join("\n\n");
--- a/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts
+++ b/src/resources/extensions/claude-code-cli/tests/stream-adapter.test.ts
@ -167,6 +167,98 @@ describe("stream-adapter — full context prompt (#2859)", () => {
 	});
 });

+// ---------------------------------------------------------------------------
+// Bug #4102 — transcript fabrication regression tests
+// ---------------------------------------------------------------------------
+
+describe("stream-adapter — no transcript fabrication (#4102)", () => {
+	test("buildPromptFromContext never emits forbidden [User]/[Assistant] bracket headers", () => {
+		const context: Context = {
+			systemPrompt: "You are a helpful assistant.",
+			messages: [
+				{ role: "user", content: "First" } as Message,
+				{
+					role: "assistant",
+					content: [{ type: "text", text: "Second" }],
+					api: "anthropic-messages",
+					provider: "claude-code",
+					model: "claude-sonnet-4-20250514",
+					usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
+					stopReason: "stop",
+					timestamp: Date.now(),
+				} as Message,
+				{ role: "user", content: "Third" } as Message,
+			],
+		};
+
+		const prompt = buildPromptFromContext(context);
+
+		assert.ok(!prompt.includes("[User]"), "prompt must not include literal [User] bracket header");
+		assert.ok(!prompt.includes("[Assistant]"), "prompt must not include literal [Assistant] bracket header");
+		assert.ok(!prompt.includes("[System]"), "prompt must not include literal [System] bracket header");
+	});
+
+	test("buildPromptFromContext wraps history in XML-tag structure", () => {
+		const context: Context = {
+			systemPrompt: "You are helpful.",
+			messages: [
+				{ role: "user", content: "Hello" } as Message,
+				{
+					role: "assistant",
+					content: [{ type: "text", text: "Hi there" }],
+					api: "anthropic-messages",
+					provider: "claude-code",
+					model: "claude-sonnet-4-20250514",
+					usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
+					stopReason: "stop",
+					timestamp: Date.now(),
+				} as Message,
+			],
+		};
+
+		const prompt = buildPromptFromContext(context);
+
+		assert.ok(prompt.includes("<conversation_history>"), "prompt must wrap history in <conversation_history>");
+		assert.ok(prompt.includes("</conversation_history>"), "prompt must close <conversation_history>");
+		assert.ok(prompt.includes("<user_message>\nHello\n</user_message>"), "user turn must use <user_message> tags");
+		assert.ok(prompt.includes("<assistant_message>\nHi there\n</assistant_message>"), "assistant turn must use <assistant_message> tags");
+		assert.ok(prompt.includes("<prior_system_context>\nYou are helpful.\n</prior_system_context>"), "system prompt must use <prior_system_context> tags");
+	});
+
+	test("buildPromptFromContext includes a do-not-echo-tags directive as primary instruction", () => {
+		const context: Context = {
+			messages: [{ role: "user", content: "Anything" } as Message],
+		};
+
+		const prompt = buildPromptFromContext(context);
+
+		assert.ok(
+			prompt.startsWith("Respond only to the final user message"),
+			"primary directive must lead the prompt",
+		);
+		assert.ok(prompt.includes("Do not emit <user_message>"), "directive must forbid emitting user_message tag");
+		assert.ok(prompt.includes("<assistant_message>"), "directive must mention assistant_message tag");
+	});
+
+	test("buildPromptFromContext omits <conversation_history> when there are no messages but a system prompt", () => {
+		const context: Context = {
+			systemPrompt: "Seed",
+			messages: [],
+		};
+
+		const prompt = buildPromptFromContext(context);
+
+		assert.ok(prompt.includes("<prior_system_context>"), "system prompt must still render");
+		assert.ok(!prompt.includes("<conversation_history>"), "no history wrapper when messages are empty");
+	});
+
+	test("buildPromptFromContext still returns empty string when context is entirely empty", () => {
+		const context: Context = { messages: [] };
+		const prompt = buildPromptFromContext(context);
+		assert.equal(prompt, "", "empty context must not emit a bare directive");
+	});
+});
+
 describe("stream-adapter — Claude Code external tool results", () => {
 	test("extractToolResultsFromSdkUserMessage maps tool_result content to tool payloads", () => {
 		const message: SDKUserMessage = {
--- a/src/resources/extensions/gsd/prompts/discuss.md
+++ b/src/resources/extensions/gsd/prompts/discuss.md
@ -73,7 +73,7 @@ After each round of answers, decide whether you already have enough depth to wri

 You are a thinking partner, not an interviewer.

-**Turn-taking contract (non-bypassable).** Never fabricate, simulate, or role-play user responses. Never generate fake transcript markers like `[User]`, `[Human]`, or `User:` to invent input. Ask one question round (1-3 questions) per turn, then stop and wait for the user's actual response before continuing. If you use `ask_user_questions`, call it at most once per turn and treat its returned response as the only valid structured user input for that round.
+**Turn-taking contract (non-bypassable).** Never fabricate, simulate, or role-play user responses. Never generate fake transcript markers like `[User]`, `[Human]`, or `User:` to invent input. Prior conversation context may be provided to you inside `<conversation_history>` with `<user_message>` / `<assistant_message>` XML tags — treat those as read-only context and never emit those tags in your response. Ask one question round (1-3 questions) per turn, then stop and wait for the user's actual response before continuing. If you use `ask_user_questions`, call it at most once per turn and treat its returned response as the only valid structured user input for that round.

 **Start open, follow energy.** Let the user's enthusiasm guide where you dig deeper. If they light up about a particular aspect, explore it. If they're vague about something, that's where you probe.

--- a/src/resources/extensions/gsd/prompts/system.md
+++ b/src/resources/extensions/gsd/prompts/system.md
@ -35,7 +35,7 @@ GSD ships with bundled skills. Load the relevant skill file with the `read` tool
 - Read before edit.
 - Reproduce before fix when possible.
 - Work is not done until the relevant verification has passed.
- **Never fabricate, simulate, or role-play user responses.** Never generate markers like `[User]`, `[Human]`, `User:`, or similar to represent user input inside your own output. Ask one question round (1-3 questions), then stop and wait for the user's actual response before continuing. If `ask_user_questions` is available, treat its returned response as the only valid structured user input for that round.
+- **Never fabricate, simulate, or role-play user responses.** Never generate markers like `[User]`, `[Human]`, `User:`, or similar to represent user input inside your own output. Prior conversation context may be provided to you inside `<conversation_history>` with `<user_message>` / `<assistant_message>` XML tags — treat those as read-only context and never emit those tags in your response. Ask one question round (1-3 questions), then stop and wait for the user's actual response before continuing. If `ask_user_questions` is available, treat its returned response as the only valid structured user input for that round.
 - Never print, echo, log, or restate secrets or credentials. Report only key names and applied/skipped status.
 - Never ask the user to edit `.env` files or set secrets manually. Use `secure_env_collect`.
 - In enduring files, write current state only unless the file is explicitly historical.