feat(sf/prompts): Phase 4 — cache_control breakpoints at static/dynamic boundary

Split reorderForCaching into a structured reorderAndSplitForCaching that
returns {before, after} at the semi-static→dynamic section boundary.

- prompt-ordering.js: export reorderAndSplitForCaching — returns null if no
  dynamic sections, otherwise {before: static+semi-static, after: dynamic}
- auto.js: import and wire reorderAndSplitForCaching into deps
- phases-unit.js: use split function; pass promptParts to runUnit when split
  succeeds; fall back to flat reorderForCaching when null
- run-unit.js: when promptParts is present, send a two-block content array
  [{type:text, text:before, cache_control:{type:ephemeral}}, {type:text, text:after}]
  so Anthropic-compatible providers cache the stable prefix
- openai-completions.ts: preserve cache_control on text parts in convertMessages;
  skip maybeAddOpenRouterAnthropicCacheControl if any part already has cache_control

Tests: 5 new contract tests for reorderAndSplitForCaching; all 4502 unit tests pass.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Mikael Hugo 2026-05-13 01:36:22 +02:00
parent 3b83d09692
commit a49ea1da87
6 changed files with 237 additions and 9 deletions

View file

@ -528,6 +528,16 @@ function maybeAddOpenRouterAnthropicCacheControl(
if (!Array.isArray(content)) continue;
// Skip if any part already has cache_control — the prompt pipeline already
// placed breakpoints at the correct static/dynamic boundary.
const alreadyMarked = content.some(
(p) =>
p &&
typeof p === "object" &&
(p as unknown as Record<string, unknown>).cache_control !== undefined,
);
if (alreadyMarked) return;
// Find last text part and add cache_control
for (let j = content.length - 1; j >= 0; j--) {
const part = content[j];
@ -605,10 +615,21 @@ export function convertMessages(
const content: ChatCompletionContentPart[] = msg.content.map(
(item): ChatCompletionContentPart => {
if (item.type === "text") {
return {
const part: ChatCompletionContentPartText = {
type: "text",
text: sanitizeSurrogates(item.text),
} satisfies ChatCompletionContentPartText;
};
// Preserve cache_control if present (set upstream for Anthropic prompt caching).
// The property is not in the OpenAI SDK type but is accepted by providers
// that support Anthropic-style caching (openrouter/anthropic/*).
const cacheControl = (
item as unknown as Record<string, unknown>
).cache_control;
if (cacheControl) {
(part as unknown as Record<string, unknown>).cache_control =
cacheControl;
}
return part;
} else {
return {
type: "image_url",

View file

@ -154,7 +154,10 @@ import {
loadEffectiveSFPreferences,
resolveAutoSupervisorConfig,
} from "./preferences.js";
import { reorderForCaching } from "./prompt-ordering.js";
import {
reorderAndSplitForCaching,
reorderForCaching,
} from "./prompt-ordering.js";
import { pruneQueueOrder } from "./queue-order.js";
import { recordOutcome, resetRoutingHistory } from "./routing-history.js";
import { convertDispatchRules, initRegistry } from "./rule-registry.js";
@ -1394,6 +1397,7 @@ function buildLoopDeps() {
},
isDbAvailable,
reorderForCaching,
reorderAndSplitForCaching,
// Filesystem
existsSync,
readFileSync: (path, encoding) => readFileSync(path, encoding),

View file

@ -35,7 +35,6 @@ import {
classifyExecutorRefusal,
consumePendingAutonomousSolverSteering,
getConfiguredAutonomousSolverMaxIterations,
isNoOpExecutorTranscript,
readAutonomousSolverState,
recordAutonomousSolverMissingCheckpointRetry,
} from "../autonomous-solver.js";
@ -518,9 +517,16 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
});
}
}
// Cache-optimize prompt section ordering
// Cache-optimize prompt section ordering; split at the semi-static→dynamic
// boundary so providers can mark the stable prefix with cache_control:ephemeral.
let promptParts = null;
try {
finalPrompt = deps.reorderForCaching(finalPrompt);
promptParts = deps.reorderAndSplitForCaching?.(finalPrompt) ?? null;
if (promptParts) {
finalPrompt = promptParts.before + "\n" + promptParts.after;
} else {
finalPrompt = deps.reorderForCaching(finalPrompt);
}
} catch (reorderErr) {
const msg = getErrorMessage(reorderErr);
logWarning("engine", "Prompt reorder failed", { error: msg });
@ -719,7 +725,9 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
unitType,
unitId,
});
const unitResult = await runUnit(ctx, pi, s, unitType, unitId, finalPrompt);
const unitResult = await runUnit(ctx, pi, s, unitType, unitId, finalPrompt, {
promptParts: promptParts ?? undefined,
});
s.lastUnitAgentEndMessages = unitResult.event?.messages ?? null;
let currentUnitResult = unitResult;
const executorMessages = unitResult.event?.messages ?? [];

View file

@ -48,6 +48,10 @@ let sessionSwitchGeneration = 0;
*/
export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
const keepSession = options?.keepSession === true;
// promptParts: {before, after} — stable prefix (to cache) + dynamic suffix.
// When present, passes the content as a two-block array so providers can mark
// the stable prefix with cache_control:ephemeral.
const promptParts = options?.promptParts ?? null;
debugLog("runUnit", { phase: "start", unitType, unitId, keepSession });
// GAP-10: Ensure cwd matches basePath BEFORE newSession() captures it. The
// new session reads process.cwd() during construction to anchor its tool
@ -257,8 +261,21 @@ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
}
}
try {
// When promptParts is available, send structured content so the provider can
// apply cache_control:ephemeral to the stable prefix (before) while leaving
// the dynamic suffix (after) uncached.
const messageContent = promptParts
? [
{
type: "text",
text: promptParts.before,
cache_control: { type: "ephemeral" },
},
{ type: "text", text: promptParts.after },
]
: prompt;
await pi.sendMessage(
{ customType: "sf-auto", content: prompt, display: s.verbose },
{ customType: "sf-auto", content: messageContent, display: s.verbose },
{ triggerTurn: true },
);
} finally {

View file

@ -132,6 +132,51 @@ export function reorderForCaching(prompt) {
}
return parts.join("\n");
}
/**
* Reorder a prompt and split it at the semi-static dynamic boundary so the
* static+semi-static prefix can be marked with cache_control: ephemeral on
* Anthropic-compatible providers.
*
* Returns `{before: string, after: string}` where:
* - `before` = preamble + all static + all semi-static sections (cache this)
* - `after` = all dynamic sections (do not cache)
*
* Returns `null` if the prompt has no dynamic sections (nothing to split on).
*
* @param prompt The assembled prompt string
* @returns Split prompt or null if no dynamic content exists
*/
export function reorderAndSplitForCaching(prompt) {
const { preamble, sections } = splitSections(prompt);
if (sections.length === 0) return null;
const hasDynamic = sections.some((s) => s.role === "dynamic");
if (!hasDynamic) return null;
const sorted = [...sections].sort((a, b) => {
return ROLE_ORDER[a.role] - ROLE_ORDER[b.role];
});
const beforeParts = preamble ? [preamble] : [];
const afterParts = [];
for (const section of sorted) {
if (section.role === "dynamic") {
afterParts.push(section.content);
} else {
beforeParts.push(section.content);
}
}
// If nothing ended up in before (no static/semi-static sections), return null —
// there is no stable prefix to cache.
if (beforeParts.length === 0) return null;
return {
before: beforeParts.join("\n"),
after: afterParts.join("\n"),
};
}
/**
* Analyze a prompt's cache efficiency without reordering.
* Returns stats about how much of the prompt is cacheable.

View file

@ -1,7 +1,140 @@
import assert from "node:assert/strict";
import { test } from "vitest";
import { reorderForCaching } from "../prompt-ordering.js";
import {
reorderAndSplitForCaching,
reorderForCaching,
} from "../prompt-ordering.js";
test("reorderForCaching_when_inlined_slice_summary_has_requirements_advanced_keeps_it_after_mission", () => {
const prompt = [
"# Milestone Validation",
"",
"## Working Directory",
"/repo",
"",
"## Mission",
"Dispatch reviewers.",
"",
"## Context",
"Inlined below.",
"",
"## Inlined Context",
"### S01 Summary",
"# S01",
"",
"## Requirements Advanced",
"- R1",
"",
"## Requirements Validated",
"None.",
].join("\n");
const reordered = reorderForCaching(prompt);
assert.ok(
reordered.indexOf("## Mission") <
reordered.indexOf("## Requirements Advanced"),
);
assert.ok(
reordered.indexOf("## Context") <
reordered.indexOf("## Requirements Advanced"),
);
});
test("reorderForCaching_when_top_level_requirements_exists_still_hoists_exact_requirements_block", () => {
const prompt = [
"# Execute",
"",
"## Mission",
"Do work.",
"",
"## Requirements",
"- R1",
"",
"## Verification",
"Run tests.",
].join("\n");
const reordered = reorderForCaching(prompt);
assert.ok(
reordered.indexOf("## Requirements") < reordered.indexOf("## Mission"),
);
});
test("reorderAndSplitForCaching_when_prompt_has_dynamic_section_returns_split", () => {
const prompt = [
"Preamble text.",
"",
"## Working Directory",
"/repo",
"",
"## Requirements",
"- R1",
"",
"## Inlined Task Plan",
"Task: do something.",
].join("\n");
const result = reorderAndSplitForCaching(prompt);
assert.ok(result !== null, "expected non-null split result");
assert.ok(
result.before.includes("## Working Directory"),
"before should have static section",
);
assert.ok(
result.before.includes("## Requirements"),
"before should have semi-static section",
);
assert.ok(
result.after.includes("## Inlined Task Plan"),
"after should have dynamic section",
);
assert.ok(
!result.before.includes("## Inlined Task Plan"),
"before should not have dynamic section",
);
});
test("reorderAndSplitForCaching_when_no_dynamic_sections_returns_null", () => {
const prompt = [
"## Working Directory",
"/repo",
"",
"## Requirements",
"- R1",
].join("\n");
const result = reorderAndSplitForCaching(prompt);
assert.strictEqual(result, null, "expected null when no dynamic sections");
});
test("reorderAndSplitForCaching_preamble_goes_into_before", () => {
const prompt = [
"System instructions here.",
"",
"## Decisions",
"- D1",
"",
"## Resume State",
"Task was paused.",
].join("\n");
const result = reorderAndSplitForCaching(prompt);
assert.ok(result !== null);
assert.ok(
result.before.includes("System instructions here."),
"preamble should be in before",
);
assert.ok(
result.after.includes("## Resume State"),
"dynamic section in after",
);
});
test("reorderForCaching_when_inlined_slice_summary_has_requirements_advanced_keeps_it_after_mission", () => {
const prompt = [