feat(sf/prompts): Phase 4 — cache_control breakpoints at static/dynamic boundary
Split reorderForCaching into a structured reorderAndSplitForCaching that
returns {before, after} at the semi-static→dynamic section boundary.
- prompt-ordering.js: export reorderAndSplitForCaching — returns null if no
dynamic sections, otherwise {before: static+semi-static, after: dynamic}
- auto.js: import and wire reorderAndSplitForCaching into deps
- phases-unit.js: use split function; pass promptParts to runUnit when split
succeeds; fall back to flat reorderForCaching when null
- run-unit.js: when promptParts is present, send a two-block content array
[{type:text, text:before, cache_control:{type:ephemeral}}, {type:text, text:after}]
so Anthropic-compatible providers cache the stable prefix
- openai-completions.ts: preserve cache_control on text parts in convertMessages;
skip maybeAddOpenRouterAnthropicCacheControl if any part already has cache_control
Tests: 5 new contract tests for reorderAndSplitForCaching; all 4502 unit tests pass.
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
3b83d09692
commit
a49ea1da87
6 changed files with 237 additions and 9 deletions
|
|
@ -528,6 +528,16 @@ function maybeAddOpenRouterAnthropicCacheControl(
|
|||
|
||||
if (!Array.isArray(content)) continue;
|
||||
|
||||
// Skip if any part already has cache_control — the prompt pipeline already
|
||||
// placed breakpoints at the correct static/dynamic boundary.
|
||||
const alreadyMarked = content.some(
|
||||
(p) =>
|
||||
p &&
|
||||
typeof p === "object" &&
|
||||
(p as unknown as Record<string, unknown>).cache_control !== undefined,
|
||||
);
|
||||
if (alreadyMarked) return;
|
||||
|
||||
// Find last text part and add cache_control
|
||||
for (let j = content.length - 1; j >= 0; j--) {
|
||||
const part = content[j];
|
||||
|
|
@ -605,10 +615,21 @@ export function convertMessages(
|
|||
const content: ChatCompletionContentPart[] = msg.content.map(
|
||||
(item): ChatCompletionContentPart => {
|
||||
if (item.type === "text") {
|
||||
return {
|
||||
const part: ChatCompletionContentPartText = {
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(item.text),
|
||||
} satisfies ChatCompletionContentPartText;
|
||||
};
|
||||
// Preserve cache_control if present (set upstream for Anthropic prompt caching).
|
||||
// The property is not in the OpenAI SDK type but is accepted by providers
|
||||
// that support Anthropic-style caching (openrouter/anthropic/*).
|
||||
const cacheControl = (
|
||||
item as unknown as Record<string, unknown>
|
||||
).cache_control;
|
||||
if (cacheControl) {
|
||||
(part as unknown as Record<string, unknown>).cache_control =
|
||||
cacheControl;
|
||||
}
|
||||
return part;
|
||||
} else {
|
||||
return {
|
||||
type: "image_url",
|
||||
|
|
|
|||
|
|
@ -154,7 +154,10 @@ import {
|
|||
loadEffectiveSFPreferences,
|
||||
resolveAutoSupervisorConfig,
|
||||
} from "./preferences.js";
|
||||
import { reorderForCaching } from "./prompt-ordering.js";
|
||||
import {
|
||||
reorderAndSplitForCaching,
|
||||
reorderForCaching,
|
||||
} from "./prompt-ordering.js";
|
||||
import { pruneQueueOrder } from "./queue-order.js";
|
||||
import { recordOutcome, resetRoutingHistory } from "./routing-history.js";
|
||||
import { convertDispatchRules, initRegistry } from "./rule-registry.js";
|
||||
|
|
@ -1394,6 +1397,7 @@ function buildLoopDeps() {
|
|||
},
|
||||
isDbAvailable,
|
||||
reorderForCaching,
|
||||
reorderAndSplitForCaching,
|
||||
// Filesystem
|
||||
existsSync,
|
||||
readFileSync: (path, encoding) => readFileSync(path, encoding),
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@ import {
|
|||
classifyExecutorRefusal,
|
||||
consumePendingAutonomousSolverSteering,
|
||||
getConfiguredAutonomousSolverMaxIterations,
|
||||
isNoOpExecutorTranscript,
|
||||
readAutonomousSolverState,
|
||||
recordAutonomousSolverMissingCheckpointRetry,
|
||||
} from "../autonomous-solver.js";
|
||||
|
|
@ -518,9 +517,16 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
});
|
||||
}
|
||||
}
|
||||
// Cache-optimize prompt section ordering
|
||||
// Cache-optimize prompt section ordering; split at the semi-static→dynamic
|
||||
// boundary so providers can mark the stable prefix with cache_control:ephemeral.
|
||||
let promptParts = null;
|
||||
try {
|
||||
finalPrompt = deps.reorderForCaching(finalPrompt);
|
||||
promptParts = deps.reorderAndSplitForCaching?.(finalPrompt) ?? null;
|
||||
if (promptParts) {
|
||||
finalPrompt = promptParts.before + "\n" + promptParts.after;
|
||||
} else {
|
||||
finalPrompt = deps.reorderForCaching(finalPrompt);
|
||||
}
|
||||
} catch (reorderErr) {
|
||||
const msg = getErrorMessage(reorderErr);
|
||||
logWarning("engine", "Prompt reorder failed", { error: msg });
|
||||
|
|
@ -719,7 +725,9 @@ export async function runUnitPhase(ic, iterData, loopState, sidecarItem) {
|
|||
unitType,
|
||||
unitId,
|
||||
});
|
||||
const unitResult = await runUnit(ctx, pi, s, unitType, unitId, finalPrompt);
|
||||
const unitResult = await runUnit(ctx, pi, s, unitType, unitId, finalPrompt, {
|
||||
promptParts: promptParts ?? undefined,
|
||||
});
|
||||
s.lastUnitAgentEndMessages = unitResult.event?.messages ?? null;
|
||||
let currentUnitResult = unitResult;
|
||||
const executorMessages = unitResult.event?.messages ?? [];
|
||||
|
|
|
|||
|
|
@ -48,6 +48,10 @@ let sessionSwitchGeneration = 0;
|
|||
*/
|
||||
export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
|
||||
const keepSession = options?.keepSession === true;
|
||||
// promptParts: {before, after} — stable prefix (to cache) + dynamic suffix.
|
||||
// When present, passes the content as a two-block array so providers can mark
|
||||
// the stable prefix with cache_control:ephemeral.
|
||||
const promptParts = options?.promptParts ?? null;
|
||||
debugLog("runUnit", { phase: "start", unitType, unitId, keepSession });
|
||||
// GAP-10: Ensure cwd matches basePath BEFORE newSession() captures it. The
|
||||
// new session reads process.cwd() during construction to anchor its tool
|
||||
|
|
@ -257,8 +261,21 @@ export async function runUnit(ctx, pi, s, unitType, unitId, prompt, options) {
|
|||
}
|
||||
}
|
||||
try {
|
||||
// When promptParts is available, send structured content so the provider can
|
||||
// apply cache_control:ephemeral to the stable prefix (before) while leaving
|
||||
// the dynamic suffix (after) uncached.
|
||||
const messageContent = promptParts
|
||||
? [
|
||||
{
|
||||
type: "text",
|
||||
text: promptParts.before,
|
||||
cache_control: { type: "ephemeral" },
|
||||
},
|
||||
{ type: "text", text: promptParts.after },
|
||||
]
|
||||
: prompt;
|
||||
await pi.sendMessage(
|
||||
{ customType: "sf-auto", content: prompt, display: s.verbose },
|
||||
{ customType: "sf-auto", content: messageContent, display: s.verbose },
|
||||
{ triggerTurn: true },
|
||||
);
|
||||
} finally {
|
||||
|
|
|
|||
|
|
@ -132,6 +132,51 @@ export function reorderForCaching(prompt) {
|
|||
}
|
||||
return parts.join("\n");
|
||||
}
|
||||
/**
|
||||
* Reorder a prompt and split it at the semi-static → dynamic boundary so the
|
||||
* static+semi-static prefix can be marked with cache_control: ephemeral on
|
||||
* Anthropic-compatible providers.
|
||||
*
|
||||
* Returns `{before: string, after: string}` where:
|
||||
* - `before` = preamble + all static + all semi-static sections (cache this)
|
||||
* - `after` = all dynamic sections (do not cache)
|
||||
*
|
||||
* Returns `null` if the prompt has no dynamic sections (nothing to split on).
|
||||
*
|
||||
* @param prompt The assembled prompt string
|
||||
* @returns Split prompt or null if no dynamic content exists
|
||||
*/
|
||||
export function reorderAndSplitForCaching(prompt) {
|
||||
const { preamble, sections } = splitSections(prompt);
|
||||
if (sections.length === 0) return null;
|
||||
|
||||
const hasDynamic = sections.some((s) => s.role === "dynamic");
|
||||
if (!hasDynamic) return null;
|
||||
|
||||
const sorted = [...sections].sort((a, b) => {
|
||||
return ROLE_ORDER[a.role] - ROLE_ORDER[b.role];
|
||||
});
|
||||
|
||||
const beforeParts = preamble ? [preamble] : [];
|
||||
const afterParts = [];
|
||||
for (const section of sorted) {
|
||||
if (section.role === "dynamic") {
|
||||
afterParts.push(section.content);
|
||||
} else {
|
||||
beforeParts.push(section.content);
|
||||
}
|
||||
}
|
||||
|
||||
// If nothing ended up in before (no static/semi-static sections), return null —
|
||||
// there is no stable prefix to cache.
|
||||
if (beforeParts.length === 0) return null;
|
||||
|
||||
return {
|
||||
before: beforeParts.join("\n"),
|
||||
after: afterParts.join("\n"),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze a prompt's cache efficiency without reordering.
|
||||
* Returns stats about how much of the prompt is cacheable.
|
||||
|
|
|
|||
|
|
@ -1,7 +1,140 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { test } from "vitest";
|
||||
|
||||
import { reorderForCaching } from "../prompt-ordering.js";
|
||||
import {
|
||||
reorderAndSplitForCaching,
|
||||
reorderForCaching,
|
||||
} from "../prompt-ordering.js";
|
||||
|
||||
test("reorderForCaching_when_inlined_slice_summary_has_requirements_advanced_keeps_it_after_mission", () => {
|
||||
const prompt = [
|
||||
"# Milestone Validation",
|
||||
"",
|
||||
"## Working Directory",
|
||||
"/repo",
|
||||
"",
|
||||
"## Mission",
|
||||
"Dispatch reviewers.",
|
||||
"",
|
||||
"## Context",
|
||||
"Inlined below.",
|
||||
"",
|
||||
"## Inlined Context",
|
||||
"### S01 Summary",
|
||||
"# S01",
|
||||
"",
|
||||
"## Requirements Advanced",
|
||||
"- R1",
|
||||
"",
|
||||
"## Requirements Validated",
|
||||
"None.",
|
||||
].join("\n");
|
||||
|
||||
const reordered = reorderForCaching(prompt);
|
||||
|
||||
assert.ok(
|
||||
reordered.indexOf("## Mission") <
|
||||
reordered.indexOf("## Requirements Advanced"),
|
||||
);
|
||||
assert.ok(
|
||||
reordered.indexOf("## Context") <
|
||||
reordered.indexOf("## Requirements Advanced"),
|
||||
);
|
||||
});
|
||||
|
||||
test("reorderForCaching_when_top_level_requirements_exists_still_hoists_exact_requirements_block", () => {
|
||||
const prompt = [
|
||||
"# Execute",
|
||||
"",
|
||||
"## Mission",
|
||||
"Do work.",
|
||||
"",
|
||||
"## Requirements",
|
||||
"- R1",
|
||||
"",
|
||||
"## Verification",
|
||||
"Run tests.",
|
||||
].join("\n");
|
||||
|
||||
const reordered = reorderForCaching(prompt);
|
||||
|
||||
assert.ok(
|
||||
reordered.indexOf("## Requirements") < reordered.indexOf("## Mission"),
|
||||
);
|
||||
});
|
||||
|
||||
test("reorderAndSplitForCaching_when_prompt_has_dynamic_section_returns_split", () => {
|
||||
const prompt = [
|
||||
"Preamble text.",
|
||||
"",
|
||||
"## Working Directory",
|
||||
"/repo",
|
||||
"",
|
||||
"## Requirements",
|
||||
"- R1",
|
||||
"",
|
||||
"## Inlined Task Plan",
|
||||
"Task: do something.",
|
||||
].join("\n");
|
||||
|
||||
const result = reorderAndSplitForCaching(prompt);
|
||||
|
||||
assert.ok(result !== null, "expected non-null split result");
|
||||
assert.ok(
|
||||
result.before.includes("## Working Directory"),
|
||||
"before should have static section",
|
||||
);
|
||||
assert.ok(
|
||||
result.before.includes("## Requirements"),
|
||||
"before should have semi-static section",
|
||||
);
|
||||
assert.ok(
|
||||
result.after.includes("## Inlined Task Plan"),
|
||||
"after should have dynamic section",
|
||||
);
|
||||
assert.ok(
|
||||
!result.before.includes("## Inlined Task Plan"),
|
||||
"before should not have dynamic section",
|
||||
);
|
||||
});
|
||||
|
||||
test("reorderAndSplitForCaching_when_no_dynamic_sections_returns_null", () => {
|
||||
const prompt = [
|
||||
"## Working Directory",
|
||||
"/repo",
|
||||
"",
|
||||
"## Requirements",
|
||||
"- R1",
|
||||
].join("\n");
|
||||
|
||||
const result = reorderAndSplitForCaching(prompt);
|
||||
assert.strictEqual(result, null, "expected null when no dynamic sections");
|
||||
});
|
||||
|
||||
test("reorderAndSplitForCaching_preamble_goes_into_before", () => {
|
||||
const prompt = [
|
||||
"System instructions here.",
|
||||
"",
|
||||
"## Decisions",
|
||||
"- D1",
|
||||
"",
|
||||
"## Resume State",
|
||||
"Task was paused.",
|
||||
].join("\n");
|
||||
|
||||
const result = reorderAndSplitForCaching(prompt);
|
||||
|
||||
assert.ok(result !== null);
|
||||
assert.ok(
|
||||
result.before.includes("System instructions here."),
|
||||
"preamble should be in before",
|
||||
);
|
||||
assert.ok(
|
||||
result.after.includes("## Resume State"),
|
||||
"dynamic section in after",
|
||||
);
|
||||
});
|
||||
|
||||
|
||||
test("reorderForCaching_when_inlined_slice_summary_has_requirements_advanced_keeps_it_after_mission", () => {
|
||||
const prompt = [
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue