feat(sf/prompts): Phase 4 — cache_control breakpoints at static/dynamic boundary
Split reorderForCaching into a structured reorderAndSplitForCaching that
returns {before, after} at the semi-static→dynamic section boundary.
- prompt-ordering.js: export reorderAndSplitForCaching — returns null if no
dynamic sections, otherwise {before: static+semi-static, after: dynamic}
- auto.js: import and wire reorderAndSplitForCaching into deps
- phases-unit.js: use split function; pass promptParts to runUnit when split
succeeds; fall back to flat reorderForCaching when null
- run-unit.js: when promptParts is present, send a two-block content array
[{type:text, text:before, cache_control:{type:ephemeral}}, {type:text, text:after}]
so Anthropic-compatible providers cache the stable prefix
- openai-completions.ts: preserve cache_control on text parts in convertMessages;
skip maybeAddOpenRouterAnthropicCacheControl if any part already has cache_control
Tests: 5 new contract tests for reorderAndSplitForCaching; all 4502 unit tests pass.
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
3b83d09692
commit
a49ea1da87
6 changed files with 237 additions and 9 deletions
|
|
@ -528,6 +528,16 @@ function maybeAddOpenRouterAnthropicCacheControl(
|
||||||
|
|
||||||
if (!Array.isArray(content)) continue;
|
if (!Array.isArray(content)) continue;
|
||||||
|
|
||||||
|
// Skip if any part already has cache_control — the prompt pipeline already
|
||||||
|
// placed breakpoints at the correct static/dynamic boundary.
|
||||||
|
const alreadyMarked = content.some(
|
||||||
|
(p) =>
|
||||||
|
p &&
|
||||||
|
typeof p === "object" &&
|
||||||
|
(p as unknown as Record<string, unknown>).cache_control !== undefined,
|
||||||
|
);
|
||||||
|
if (alreadyMarked) return;
|
||||||
|
|
||||||
// Find last text part and add cache_control
|
// Find last text part and add cache_control
|
||||||
for (let j = content.length - 1; j >= 0; j--) {
|
for (let j = content.length - 1; j >= 0; j--) {
|
||||||
const part = content[j];
|
const part = content[j];
|
||||||
|
|
@ -605,10 +615,21 @@ export function convertMessages(
|
||||||
const content: ChatCompletionContentPart[] = msg.content.map(
|
const content: ChatCompletionContentPart[] = msg.content.map(
|
||||||
(item): ChatCompletionContentPart => {
|
(item): ChatCompletionContentPart => {
|
||||||
if (item.type === "text") {
|
if (item.type === "text") {
|
||||||
return {
|
const part: ChatCompletionContentPartText = {
|
||||||
type: "text",
|
type: "text",
|
||||||
text: sanitizeSurrogates(item.text),
|
text: sanitizeSurrogates(item.text),
|
||||||
} satisfies ChatCompletionContentPartText;
|
};
|
||||||
|
// Preserve cache_control if present (set upstream for Anthropic prompt caching).
|
||||||
|
// The property is not in the OpenAI SDK type but is accepted by providers
|
||||||
|
// that support Anthropic-style caching (openrouter/anthropic/*).
|
||||||
|
const cacheControl = (
|
||||||
|
item as unknown as Record<string, unknown>
|
||||||
|
).cache_control;
|
||||||
|
if (cacheControl) {
|
||||||
|
(part as unknown as Record<string, unknown>).cache_control =
|
||||||
|
cacheControl;
|
||||||
|
}
|
||||||
|
return part;
|
||||||
} else {
|
} else {
|
||||||
return {
|
return {
|
||||||
type: "image_url",
|
type: "image_url",
|
||||||
|
|
|
||||||
|
|
@ -154,7 +154,10 @@ import {
|
||||||
loadEffectiveSFPreferences,
|
loadEffectiveSFPreferences,
|
||||||
resolveAutoSupervisorConfig,
|
resolveAutoSupervisorConfig,
|
||||||
} from "./preferences.js";
|
} from "./preferences.js";
|
||||||
import { reorderForCaching } from "./prompt-ordering.js";
|
import {
|
||||||
|
reorderAndSplitForCaching,
|
||||||
|
reorderForCaching,
|
||||||
|
} from "./prompt-ordering.js";
|
||||||
import { pruneQueueOrder } from "./queue-order.js";
|
import { pruneQueueOrder } from "./queue-order.js";
|
||||||
import { recordOutcome, resetRoutingHistory } from "./routing-history.js";
|
import { recordOutcome, resetRoutingHistory } from "./routing-history.js";
|
||||||
import { convertDispatchRules, initRegistry } from "./rule-registry.js";
|
import { convertDispatchRules, initRegistry } from "./rule-registry.js";
|
||||||
|
|
@ -1394,6 +1397,7 @@ function buildLoopDeps() {
|
||||||
},
|
},
|
||||||
isDbAvailable,
|
isDbAvailable,
|
||||||
reorderForCaching,
|
reorderForCaching,
|
||||||
|
reorderAndSplitForCaching,
|
||||||
// Filesystem
|
// Filesystem
|
||||||
existsSync,
|
existsSync,
|
||||||
readFileSync: (path, encoding) => readFileSync(path, encoding),
|
readFileSync: (path, encoding) => readFileSync(path, encoding),
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,6 @@ import {
|
||||||
classifyExecutorRefusal,
|
classifyExecutorRefusal,
|
||||||
consumePendingAutonomousSolverSteering,
|
consumePendingAutonomousSolverSteering,
|
||||||
getConfiguredAutonomousSolverMaxIterations,
|
getConfiguredAutonomousSolverMaxIterations,
|
||||||
isNoOpExecutorTranscript,
|
|
||||||
readAutonomousSolverState,
|
readAutonomousSolverState,
|
||||||
recordAutonomousSolverMissingCheckpointRetry,
|
recordAutonomousSolverMissingCheckpointRetry,
|
||||||
} from "../autonomous-solver.js";
|
} from "../autonomous-solver.js";
|
||||||
|
|
@ -518,9 +517,16 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Cache-optimize prompt section ordering
|
// Cache-optimize prompt section ordering; split at the semi-static→dynamic
|
||||||
|
// boundary so providers can mark the stable prefix with cache_control:ephemeral.
|
||||||
|
let promptParts = null;
|
||||||
try {
|
try {
|
||||||
finalPrompt = deps.reorderForCaching(finalPrompt);
|
promptParts = deps.reorderAndSplitForCaching?.(finalPrompt) ?? null;
|
||||||
|
if (promptParts) {
|
||||||
|
finalPrompt = promptParts.before + "\n" + promptParts.after;
|
||||||
|
} else {
|
||||||
|
finalPrompt = deps.reorderForCaching(finalPrompt);
|
||||||
|
}
|
||||||
} catch (reorderErr) {
|
} catch (reorderErr) {
|
||||||
const msg = getErrorMessage(reorderErr);
|
const msg = getErrorMessage(reorderErr);
|
||||||
logWarning("engine", "Prompt reorder failed", { error: msg });
|
logWarning("engine", "Prompt reorder failed", { error: msg });
|
||||||
|
|
@ -719,7 +725,9 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
||||||
unitType,
|
unitType,
|
||||||
unitId,
|
unitId,
|
||||||
});
|
});
|
||||||
const unitResult = await runUnit(ctx, pi, s, unitType, unitId, finalPrompt);
|
const unitResult = await runUnit(ctx, pi, s, unitType, unitId, finalPrompt, {
|
||||||
|
promptParts: promptParts ?? undefined,
|
||||||
|
});
|
||||||
s.lastUnitAgentEndMessages = unitResult.event?.messages ?? null;
|
s.lastUnitAgentEndMessages = unitResult.event?.messages ?? null;
|
||||||
let currentUnitResult = unitResult;
|
let currentUnitResult = unitResult;
|
||||||
const executorMessages = unitResult.event?.messages ?? [];
|
const executorMessages = unitResult.event?.messages ?? [];
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,10 @@ let sessionSwitchGeneration = 0;
|
||||||
*/
|
*/
|
||||||
export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
|
export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
|
||||||
const keepSession = options?.keepSession === true;
|
const keepSession = options?.keepSession === true;
|
||||||
|
// promptParts: {before, after} — stable prefix (to cache) + dynamic suffix.
|
||||||
|
// When present, passes the content as a two-block array so providers can mark
|
||||||
|
// the stable prefix with cache_control:ephemeral.
|
||||||
|
const promptParts = options?.promptParts ?? null;
|
||||||
debugLog("runUnit", { phase: "start", unitType, unitId, keepSession });
|
debugLog("runUnit", { phase: "start", unitType, unitId, keepSession });
|
||||||
// GAP-10: Ensure cwd matches basePath BEFORE newSession() captures it. The
|
// GAP-10: Ensure cwd matches basePath BEFORE newSession() captures it. The
|
||||||
// new session reads process.cwd() during construction to anchor its tool
|
// new session reads process.cwd() during construction to anchor its tool
|
||||||
|
|
@ -257,8 +261,21 @@ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
|
// When promptParts is available, send structured content so the provider can
|
||||||
|
// apply cache_control:ephemeral to the stable prefix (before) while leaving
|
||||||
|
// the dynamic suffix (after) uncached.
|
||||||
|
const messageContent = promptParts
|
||||||
|
? [
|
||||||
|
{
|
||||||
|
type: "text",
|
||||||
|
text: promptParts.before,
|
||||||
|
cache_control: { type: "ephemeral" },
|
||||||
|
},
|
||||||
|
{ type: "text", text: promptParts.after },
|
||||||
|
]
|
||||||
|
: prompt;
|
||||||
await pi.sendMessage(
|
await pi.sendMessage(
|
||||||
{ customType: "sf-auto", content: prompt, display: s.verbose },
|
{ customType: "sf-auto", content: messageContent, display: s.verbose },
|
||||||
{ triggerTurn: true },
|
{ triggerTurn: true },
|
||||||
);
|
);
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
||||||
|
|
@ -132,6 +132,51 @@ export function reorderForCaching(prompt) {
|
||||||
}
|
}
|
||||||
return parts.join("\n");
|
return parts.join("\n");
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Reorder a prompt and split it at the semi-static → dynamic boundary so the
|
||||||
|
* static+semi-static prefix can be marked with cache_control: ephemeral on
|
||||||
|
* Anthropic-compatible providers.
|
||||||
|
*
|
||||||
|
* Returns `{before: string, after: string}` where:
|
||||||
|
* - `before` = preamble + all static + all semi-static sections (cache this)
|
||||||
|
* - `after` = all dynamic sections (do not cache)
|
||||||
|
*
|
||||||
|
* Returns `null` if the prompt has no dynamic sections (nothing to split on).
|
||||||
|
*
|
||||||
|
* @param prompt The assembled prompt string
|
||||||
|
* @returns Split prompt or null if no dynamic content exists
|
||||||
|
*/
|
||||||
|
export function reorderAndSplitForCaching(prompt) {
|
||||||
|
const { preamble, sections } = splitSections(prompt);
|
||||||
|
if (sections.length === 0) return null;
|
||||||
|
|
||||||
|
const hasDynamic = sections.some((s) => s.role === "dynamic");
|
||||||
|
if (!hasDynamic) return null;
|
||||||
|
|
||||||
|
const sorted = [...sections].sort((a, b) => {
|
||||||
|
return ROLE_ORDER[a.role] - ROLE_ORDER[b.role];
|
||||||
|
});
|
||||||
|
|
||||||
|
const beforeParts = preamble ? [preamble] : [];
|
||||||
|
const afterParts = [];
|
||||||
|
for (const section of sorted) {
|
||||||
|
if (section.role === "dynamic") {
|
||||||
|
afterParts.push(section.content);
|
||||||
|
} else {
|
||||||
|
beforeParts.push(section.content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If nothing ended up in before (no static/semi-static sections), return null —
|
||||||
|
// there is no stable prefix to cache.
|
||||||
|
if (beforeParts.length === 0) return null;
|
||||||
|
|
||||||
|
return {
|
||||||
|
before: beforeParts.join("\n"),
|
||||||
|
after: afterParts.join("\n"),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Analyze a prompt's cache efficiency without reordering.
|
* Analyze a prompt's cache efficiency without reordering.
|
||||||
* Returns stats about how much of the prompt is cacheable.
|
* Returns stats about how much of the prompt is cacheable.
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,140 @@
|
||||||
import assert from "node:assert/strict";
|
import assert from "node:assert/strict";
|
||||||
import { test } from "vitest";
|
import { test } from "vitest";
|
||||||
|
|
||||||
import { reorderForCaching } from "../prompt-ordering.js";
|
import {
|
||||||
|
reorderAndSplitForCaching,
|
||||||
|
reorderForCaching,
|
||||||
|
} from "../prompt-ordering.js";
|
||||||
|
|
||||||
|
test("reorderForCaching_when_inlined_slice_summary_has_requirements_advanced_keeps_it_after_mission", () => {
|
||||||
|
const prompt = [
|
||||||
|
"# Milestone Validation",
|
||||||
|
"",
|
||||||
|
"## Working Directory",
|
||||||
|
"/repo",
|
||||||
|
"",
|
||||||
|
"## Mission",
|
||||||
|
"Dispatch reviewers.",
|
||||||
|
"",
|
||||||
|
"## Context",
|
||||||
|
"Inlined below.",
|
||||||
|
"",
|
||||||
|
"## Inlined Context",
|
||||||
|
"### S01 Summary",
|
||||||
|
"# S01",
|
||||||
|
"",
|
||||||
|
"## Requirements Advanced",
|
||||||
|
"- R1",
|
||||||
|
"",
|
||||||
|
"## Requirements Validated",
|
||||||
|
"None.",
|
||||||
|
].join("\n");
|
||||||
|
|
||||||
|
const reordered = reorderForCaching(prompt);
|
||||||
|
|
||||||
|
assert.ok(
|
||||||
|
reordered.indexOf("## Mission") <
|
||||||
|
reordered.indexOf("## Requirements Advanced"),
|
||||||
|
);
|
||||||
|
assert.ok(
|
||||||
|
reordered.indexOf("## Context") <
|
||||||
|
reordered.indexOf("## Requirements Advanced"),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("reorderForCaching_when_top_level_requirements_exists_still_hoists_exact_requirements_block", () => {
|
||||||
|
const prompt = [
|
||||||
|
"# Execute",
|
||||||
|
"",
|
||||||
|
"## Mission",
|
||||||
|
"Do work.",
|
||||||
|
"",
|
||||||
|
"## Requirements",
|
||||||
|
"- R1",
|
||||||
|
"",
|
||||||
|
"## Verification",
|
||||||
|
"Run tests.",
|
||||||
|
].join("\n");
|
||||||
|
|
||||||
|
const reordered = reorderForCaching(prompt);
|
||||||
|
|
||||||
|
assert.ok(
|
||||||
|
reordered.indexOf("## Requirements") < reordered.indexOf("## Mission"),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("reorderAndSplitForCaching_when_prompt_has_dynamic_section_returns_split", () => {
|
||||||
|
const prompt = [
|
||||||
|
"Preamble text.",
|
||||||
|
"",
|
||||||
|
"## Working Directory",
|
||||||
|
"/repo",
|
||||||
|
"",
|
||||||
|
"## Requirements",
|
||||||
|
"- R1",
|
||||||
|
"",
|
||||||
|
"## Inlined Task Plan",
|
||||||
|
"Task: do something.",
|
||||||
|
].join("\n");
|
||||||
|
|
||||||
|
const result = reorderAndSplitForCaching(prompt);
|
||||||
|
|
||||||
|
assert.ok(result !== null, "expected non-null split result");
|
||||||
|
assert.ok(
|
||||||
|
result.before.includes("## Working Directory"),
|
||||||
|
"before should have static section",
|
||||||
|
);
|
||||||
|
assert.ok(
|
||||||
|
result.before.includes("## Requirements"),
|
||||||
|
"before should have semi-static section",
|
||||||
|
);
|
||||||
|
assert.ok(
|
||||||
|
result.after.includes("## Inlined Task Plan"),
|
||||||
|
"after should have dynamic section",
|
||||||
|
);
|
||||||
|
assert.ok(
|
||||||
|
!result.before.includes("## Inlined Task Plan"),
|
||||||
|
"before should not have dynamic section",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("reorderAndSplitForCaching_when_no_dynamic_sections_returns_null", () => {
|
||||||
|
const prompt = [
|
||||||
|
"## Working Directory",
|
||||||
|
"/repo",
|
||||||
|
"",
|
||||||
|
"## Requirements",
|
||||||
|
"- R1",
|
||||||
|
].join("\n");
|
||||||
|
|
||||||
|
const result = reorderAndSplitForCaching(prompt);
|
||||||
|
assert.strictEqual(result, null, "expected null when no dynamic sections");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("reorderAndSplitForCaching_preamble_goes_into_before", () => {
|
||||||
|
const prompt = [
|
||||||
|
"System instructions here.",
|
||||||
|
"",
|
||||||
|
"## Decisions",
|
||||||
|
"- D1",
|
||||||
|
"",
|
||||||
|
"## Resume State",
|
||||||
|
"Task was paused.",
|
||||||
|
].join("\n");
|
||||||
|
|
||||||
|
const result = reorderAndSplitForCaching(prompt);
|
||||||
|
|
||||||
|
assert.ok(result !== null);
|
||||||
|
assert.ok(
|
||||||
|
result.before.includes("System instructions here."),
|
||||||
|
"preamble should be in before",
|
||||||
|
);
|
||||||
|
assert.ok(
|
||||||
|
result.after.includes("## Resume State"),
|
||||||
|
"dynamic section in after",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
test("reorderForCaching_when_inlined_slice_summary_has_requirements_advanced_keeps_it_after_mission", () => {
|
test("reorderForCaching_when_inlined_slice_summary_has_requirements_advanced_keeps_it_after_mission", () => {
|
||||||
const prompt = [
|
const prompt = [
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue