fix: enable prompt caching for Anthropic agent requests

Add an ephemeral cache_control breakpoint on the system message so
OpenRouter forwards it to upstream Anthropic providers (Anthropic direct,
Amazon Bedrock, Google Vertex). Without the breakpoint, none of them
cache — the OpenRouter activity log showed tokens_cached=0 on every
Claude request.

Verified with live OpenRouter calls: 2nd identical request now reports
~99% cache hit on the system prompt, cutting per-call input cost ~12x.
This commit is contained in:
mantrakp04 2026-04-13 20:01:30 -07:00
parent 5399142db9
commit d16e3d84ba

View File

@ -53,11 +53,25 @@ export const POST = createSmartRouteHandler({
const isDocsOrSearch = systemPromptId === "docs-ask-ai" || systemPromptId === "command-center-ask-ai";
const stepLimit = toolsArg == null ? 1 : isDocsOrSearch ? 50 : 5;
// Anthropic models require an explicit cache_control breakpoint for prompt caching
// to work via OpenRouter (whether routed to Anthropic, Bedrock, or Google Vertex).
// Mark the static system prompt as an ephemeral cache breakpoint.
const isAnthropic = model.modelId.startsWith("anthropic/");
const systemMessage: ModelMessage = {
role: "system",
content: systemPrompt,
...(isAnthropic && {
providerOptions: {
openrouter: { cacheControl: { type: "ephemeral" } },
},
}),
};
const fullMessages: ModelMessage[] = [systemMessage, ...(messages as ModelMessage[])];
if (mode === "stream") {
const result = streamText({
model,
system: systemPrompt,
messages: messages as ModelMessage[],
messages: fullMessages,
tools: toolsArg,
stopWhen: stepCountIs(stepLimit),
});
@ -71,8 +85,7 @@ export const POST = createSmartRouteHandler({
const timeoutId = setTimeout(() => controller.abort(), 120_000);
const result = await generateText({
model,
system: systemPrompt,
messages: messages as ModelMessage[],
messages: fullMessages,
tools: toolsArg,
abortSignal: controller.signal,
stopWhen: stepCountIs(stepLimit),