From d16e3d84ba65335c27132088aebcb5fda0121a2f Mon Sep 17 00:00:00 2001 From: mantrakp04 Date: Mon, 13 Apr 2026 20:01:30 -0700 Subject: [PATCH] fix: enable prompt caching for Anthropic agent requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an ephemeral cache_control breakpoint on the system message so OpenRouter forwards it to upstream Anthropic providers (Anthropic direct, Amazon Bedrock, Google Vertex). Without the breakpoint, none of them cache — the OpenRouter activity log showed tokens_cached=0 on every Claude request. Verified with live OpenRouter calls: 2nd identical request now reports ~99% cache hit on the system prompt, cutting per-call input cost ~12x. --- .../app/api/latest/ai/query/[mode]/route.ts | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/apps/backend/src/app/api/latest/ai/query/[mode]/route.ts b/apps/backend/src/app/api/latest/ai/query/[mode]/route.ts index c578f7723..e8826294e 100644 --- a/apps/backend/src/app/api/latest/ai/query/[mode]/route.ts +++ b/apps/backend/src/app/api/latest/ai/query/[mode]/route.ts @@ -53,11 +53,25 @@ export const POST = createSmartRouteHandler({ const isDocsOrSearch = systemPromptId === "docs-ask-ai" || systemPromptId === "command-center-ask-ai"; const stepLimit = toolsArg == null ? 1 : isDocsOrSearch ? 50 : 5; + // Anthropic models require an explicit cache_control breakpoint for prompt caching + // to work via OpenRouter (whether routed to Anthropic, Bedrock, or Google Vertex). + // Mark the static system prompt as an ephemeral cache breakpoint. + const isAnthropic = model.modelId.startsWith("anthropic/"); + const systemMessage: ModelMessage = { + role: "system", + content: systemPrompt, + ...(isAnthropic && { + providerOptions: { + openrouter: { cacheControl: { type: "ephemeral" } }, + }, + }), + }; + const fullMessages: ModelMessage[] = [systemMessage, ...(messages as ModelMessage[])]; + if (mode === "stream") { const result = streamText({ model, - system: systemPrompt, - messages: messages as ModelMessage[], + messages: fullMessages, tools: toolsArg, stopWhen: stepCountIs(stepLimit), }); @@ -71,8 +85,7 @@ export const POST = createSmartRouteHandler({ const timeoutId = setTimeout(() => controller.abort(), 120_000); const result = await generateText({ model, - system: systemPrompt, - messages: messages as ModelMessage[], + messages: fullMessages, tools: toolsArg, abortSignal: controller.signal, stopWhen: stepCountIs(stepLimit),