Merge pull request #4103 from jeremymcs/fix/4102-claude-code-transcript-fabrication
fix(claude-code): stop prompt context from fabricating [User]/[Assistant] turns
This commit is contained in:
commit
754979e7a6
4 changed files with 115 additions and 7 deletions
|
|
@ -187,20 +187,36 @@ function extractMessageText(msg: { role: string; content: unknown }): string {
|
|||
* call effectively stateless. This version serialises the complete
|
||||
* conversation history (system prompt + all user/assistant turns) so
|
||||
* Claude Code has full context for multi-turn continuity.
|
||||
*
|
||||
* History is wrapped in XML-tag structure rather than `[User]`/`[Assistant]`
|
||||
* bracket headers. Bracket headers read to the model as an in-context
|
||||
* demonstration of how turns are delimited, causing it to fabricate fake
|
||||
* user turns in its own output. XML tags read as document structure and
|
||||
* don't get mirrored in free text.
|
||||
*/
|
||||
export function buildPromptFromContext(context: Context): string {
|
||||
const parts: string[] = [];
|
||||
const hasContent = Boolean(context.systemPrompt) || context.messages.some((m) => extractMessageText(m));
|
||||
if (!hasContent) return "";
|
||||
|
||||
const parts: string[] = [
|
||||
"Respond only to the final user message below. " +
|
||||
"Do not emit <user_message>, <assistant_message>, or <prior_system_context> tags in your response.",
|
||||
];
|
||||
|
||||
if (context.systemPrompt) {
|
||||
parts.push(`[System]\n${context.systemPrompt}`);
|
||||
parts.push(`<prior_system_context>\n${context.systemPrompt}\n</prior_system_context>`);
|
||||
}
|
||||
|
||||
const turns: string[] = [];
|
||||
for (const msg of context.messages) {
|
||||
const text = extractMessageText(msg);
|
||||
if (!text) continue;
|
||||
|
||||
const label = msg.role === "user" ? "User" : msg.role === "assistant" ? "Assistant" : "System";
|
||||
parts.push(`[${label}]\n${text}`);
|
||||
const tag =
|
||||
msg.role === "user" ? "user_message" : msg.role === "assistant" ? "assistant_message" : "system_message";
|
||||
turns.push(`<${tag}>\n${text}\n</${tag}>`);
|
||||
}
|
||||
if (turns.length > 0) {
|
||||
parts.push(`<conversation_history>\n${turns.join("\n")}\n</conversation_history>`);
|
||||
}
|
||||
|
||||
return parts.join("\n\n");
|
||||
|
|
|
|||
|
|
@ -167,6 +167,98 @@ describe("stream-adapter — full context prompt (#2859)", () => {
|
|||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Bug #4102 — transcript fabrication regression tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("stream-adapter — no transcript fabrication (#4102)", () => {
|
||||
test("buildPromptFromContext never emits forbidden [User]/[Assistant] bracket headers", () => {
|
||||
const context: Context = {
|
||||
systemPrompt: "You are a helpful assistant.",
|
||||
messages: [
|
||||
{ role: "user", content: "First" } as Message,
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "Second" }],
|
||||
api: "anthropic-messages",
|
||||
provider: "claude-code",
|
||||
model: "claude-sonnet-4-20250514",
|
||||
usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
} as Message,
|
||||
{ role: "user", content: "Third" } as Message,
|
||||
],
|
||||
};
|
||||
|
||||
const prompt = buildPromptFromContext(context);
|
||||
|
||||
assert.ok(!prompt.includes("[User]"), "prompt must not include literal [User] bracket header");
|
||||
assert.ok(!prompt.includes("[Assistant]"), "prompt must not include literal [Assistant] bracket header");
|
||||
assert.ok(!prompt.includes("[System]"), "prompt must not include literal [System] bracket header");
|
||||
});
|
||||
|
||||
test("buildPromptFromContext wraps history in XML-tag structure", () => {
|
||||
const context: Context = {
|
||||
systemPrompt: "You are helpful.",
|
||||
messages: [
|
||||
{ role: "user", content: "Hello" } as Message,
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "Hi there" }],
|
||||
api: "anthropic-messages",
|
||||
provider: "claude-code",
|
||||
model: "claude-sonnet-4-20250514",
|
||||
usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
|
||||
stopReason: "stop",
|
||||
timestamp: Date.now(),
|
||||
} as Message,
|
||||
],
|
||||
};
|
||||
|
||||
const prompt = buildPromptFromContext(context);
|
||||
|
||||
assert.ok(prompt.includes("<conversation_history>"), "prompt must wrap history in <conversation_history>");
|
||||
assert.ok(prompt.includes("</conversation_history>"), "prompt must close <conversation_history>");
|
||||
assert.ok(prompt.includes("<user_message>\nHello\n</user_message>"), "user turn must use <user_message> tags");
|
||||
assert.ok(prompt.includes("<assistant_message>\nHi there\n</assistant_message>"), "assistant turn must use <assistant_message> tags");
|
||||
assert.ok(prompt.includes("<prior_system_context>\nYou are helpful.\n</prior_system_context>"), "system prompt must use <prior_system_context> tags");
|
||||
});
|
||||
|
||||
test("buildPromptFromContext includes a do-not-echo-tags directive as primary instruction", () => {
|
||||
const context: Context = {
|
||||
messages: [{ role: "user", content: "Anything" } as Message],
|
||||
};
|
||||
|
||||
const prompt = buildPromptFromContext(context);
|
||||
|
||||
assert.ok(
|
||||
prompt.startsWith("Respond only to the final user message"),
|
||||
"primary directive must lead the prompt",
|
||||
);
|
||||
assert.ok(prompt.includes("Do not emit <user_message>"), "directive must forbid emitting user_message tag");
|
||||
assert.ok(prompt.includes("<assistant_message>"), "directive must mention assistant_message tag");
|
||||
});
|
||||
|
||||
test("buildPromptFromContext omits <conversation_history> when there are no messages but a system prompt", () => {
|
||||
const context: Context = {
|
||||
systemPrompt: "Seed",
|
||||
messages: [],
|
||||
};
|
||||
|
||||
const prompt = buildPromptFromContext(context);
|
||||
|
||||
assert.ok(prompt.includes("<prior_system_context>"), "system prompt must still render");
|
||||
assert.ok(!prompt.includes("<conversation_history>"), "no history wrapper when messages are empty");
|
||||
});
|
||||
|
||||
test("buildPromptFromContext still returns empty string when context is entirely empty", () => {
|
||||
const context: Context = { messages: [] };
|
||||
const prompt = buildPromptFromContext(context);
|
||||
assert.equal(prompt, "", "empty context must not emit a bare directive");
|
||||
});
|
||||
});
|
||||
|
||||
describe("stream-adapter — Claude Code external tool results", () => {
|
||||
test("extractToolResultsFromSdkUserMessage maps tool_result content to tool payloads", () => {
|
||||
const message: SDKUserMessage = {
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ After each round of answers, decide whether you already have enough depth to wri
|
|||
|
||||
You are a thinking partner, not an interviewer.
|
||||
|
||||
**Turn-taking contract (non-bypassable).** Never fabricate, simulate, or role-play user responses. Never generate fake transcript markers like `[User]`, `[Human]`, or `User:` to invent input. Ask one question round (1-3 questions) per turn, then stop and wait for the user's actual response before continuing. If you use `ask_user_questions`, call it at most once per turn and treat its returned response as the only valid structured user input for that round.
|
||||
**Turn-taking contract (non-bypassable).** Never fabricate, simulate, or role-play user responses. Never generate fake transcript markers like `[User]`, `[Human]`, or `User:` to invent input. Prior conversation context may be provided to you inside `<conversation_history>` with `<user_message>` / `<assistant_message>` XML tags — treat those as read-only context and never emit those tags in your response. Ask one question round (1-3 questions) per turn, then stop and wait for the user's actual response before continuing. If you use `ask_user_questions`, call it at most once per turn and treat its returned response as the only valid structured user input for that round.
|
||||
|
||||
**Start open, follow energy.** Let the user's enthusiasm guide where you dig deeper. If they light up about a particular aspect, explore it. If they're vague about something, that's where you probe.
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ GSD ships with bundled skills. Load the relevant skill file with the `read` tool
|
|||
- Read before edit.
|
||||
- Reproduce before fix when possible.
|
||||
- Work is not done until the relevant verification has passed.
|
||||
- **Never fabricate, simulate, or role-play user responses.** Never generate markers like `[User]`, `[Human]`, `User:`, or similar to represent user input inside your own output. Ask one question round (1-3 questions), then stop and wait for the user's actual response before continuing. If `ask_user_questions` is available, treat its returned response as the only valid structured user input for that round.
|
||||
- **Never fabricate, simulate, or role-play user responses.** Never generate markers like `[User]`, `[Human]`, `User:`, or similar to represent user input inside your own output. Prior conversation context may be provided to you inside `<conversation_history>` with `<user_message>` / `<assistant_message>` XML tags — treat those as read-only context and never emit those tags in your response. Ask one question round (1-3 questions), then stop and wait for the user's actual response before continuing. If `ask_user_questions` is available, treat its returned response as the only valid structured user input for that round.
|
||||
- Never print, echo, log, or restate secrets or credentials. Report only key names and applied/skipped status.
|
||||
- Never ask the user to edit `.env` files or set secrets manually. Use `secure_env_collect`.
|
||||
- In enduring files, write current state only unless the file is explicitly historical.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue