mirror of
https://github.com/stack-auth/stack.git
synced 2026-06-19 21:00:40 +08:00
fix: enable prompt caching for Anthropic agent requests
Add an ephemeral cache_control breakpoint on the system message so OpenRouter forwards it to upstream Anthropic providers (Anthropic direct, Amazon Bedrock, Google Vertex). Without the breakpoint, none of them cache — the OpenRouter activity log showed tokens_cached=0 on every Claude request. Verified with live OpenRouter calls: 2nd identical request now reports ~99% cache hit on the system prompt, cutting per-call input cost ~12x.
This commit is contained in:
parent
5399142db9
commit
d16e3d84ba
@ -53,11 +53,25 @@ export const POST = createSmartRouteHandler({
|
||||
const isDocsOrSearch = systemPromptId === "docs-ask-ai" || systemPromptId === "command-center-ask-ai";
|
||||
const stepLimit = toolsArg == null ? 1 : isDocsOrSearch ? 50 : 5;
|
||||
|
||||
// Anthropic models require an explicit cache_control breakpoint for prompt caching
|
||||
// to work via OpenRouter (whether routed to Anthropic, Bedrock, or Google Vertex).
|
||||
// Mark the static system prompt as an ephemeral cache breakpoint.
|
||||
const isAnthropic = model.modelId.startsWith("anthropic/");
|
||||
const systemMessage: ModelMessage = {
|
||||
role: "system",
|
||||
content: systemPrompt,
|
||||
...(isAnthropic && {
|
||||
providerOptions: {
|
||||
openrouter: { cacheControl: { type: "ephemeral" } },
|
||||
},
|
||||
}),
|
||||
};
|
||||
const fullMessages: ModelMessage[] = [systemMessage, ...(messages as ModelMessage[])];
|
||||
|
||||
if (mode === "stream") {
|
||||
const result = streamText({
|
||||
model,
|
||||
system: systemPrompt,
|
||||
messages: messages as ModelMessage[],
|
||||
messages: fullMessages,
|
||||
tools: toolsArg,
|
||||
stopWhen: stepCountIs(stepLimit),
|
||||
});
|
||||
@ -71,8 +85,7 @@ export const POST = createSmartRouteHandler({
|
||||
const timeoutId = setTimeout(() => controller.abort(), 120_000);
|
||||
const result = await generateText({
|
||||
model,
|
||||
system: systemPrompt,
|
||||
messages: messages as ModelMessage[],
|
||||
messages: fullMessages,
|
||||
tools: toolsArg,
|
||||
abortSignal: controller.signal,
|
||||
stopWhen: stepCountIs(stepLimit),
|
||||
|
||||
Loading…
Reference in New Issue
Block a user