universal ai route through ai-proxy (#1312)
Some checks failed
all-good: Did all the other checks pass? / all-good (push) Has been cancelled
Ensure Prisma migrations are in sync with the schema / check_prisma_migrations (22.x) (push) Has been cancelled
DB migration compat / Check if migrations changed (push) Has been cancelled
Docker Server Build and Push / Docker Build and Push Server (push) Has been cancelled
Docker Server Build and Run / docker (push) Has been cancelled
Runs E2E API Tests (Local Emulator) / E2E Tests (Local Emulator, Node ${{ matrix.node-version }}) (22.x) (push) Has been cancelled
Runs E2E API Tests / E2E Tests (Node ${{ matrix.node-version }}, Freestyle ${{ matrix.freestyle-mode }}) (mock, 22.x) (push) Has been cancelled
Runs E2E API Tests / E2E Tests (Node ${{ matrix.node-version }}, Freestyle ${{ matrix.freestyle-mode }}) (prod, 22.x) (push) Has been cancelled
Runs E2E API Tests with custom port prefix / build (22.x) (push) Has been cancelled
Lint & build / lint_and_build (24) (push) Has been cancelled
Dev Environment Test With Custom Base Port / restart-dev-and-test-with-custom-base-port (push) Has been cancelled
Dev Environment Test / restart-dev-and-test (push) Has been cancelled
Run setup tests with custom base port / setup-tests-with-custom-base-port (push) Has been cancelled
Run setup tests / setup-tests (push) Has been cancelled
TOC Generator / TOC Generator (push) Has been cancelled
DB migration compat / Back-compat — Current branch migrations with ${{ needs.check-migrations-changed.outputs.base_branch }} branch code (push) Has been cancelled
DB migration compat / Forward-compat — Current branch code with ${{ needs.check-migrations-changed.outputs.base_branch }} branch migrations (push) Has been cancelled
DB migration compat / No migration changes (skipped) (push) Has been cancelled

<!--

Make sure you've read the CONTRIBUTING.md guidelines:
https://github.com/stack-auth/stack-auth/blob/dev/CONTRIBUTING.md

-->


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

* **Performance Improvements**
* Redesigned AI request processing to operate efficiently at the backend
level, providing faster responses and improved reliability for
AI-powered features

* **Enhanced AI Capabilities**
* Improved model validation and selection with explicit allowlist of
supported models, enabling broader model availability and better feature
stability

* **Developer Updates**
  * Refreshed local development environment configuration

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
BilalG1 2026-04-07 11:09:23 -07:00 committed by GitHub
parent e8428f57c0
commit c2a5b5b40a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 26 additions and 39 deletions

View File

@ -1,4 +1,3 @@
import { forwardToProduction } from "@/lib/ai/forward";
import { selectModel } from "@/lib/ai/models";
import { getFullSystemPrompt } from "@/lib/ai/prompts";
import { requestBodySchema } from "@/lib/ai/schema";
@ -7,7 +6,6 @@ import { listManagedProjectIds } from "@/lib/projects";
import { SmartResponse } from "@/route-handlers/smart-response";
import { createSmartRouteHandler } from "@/route-handlers/smart-route-handler";
import { yupMixed, yupObject, yupString } from "@stackframe/stack-shared/dist/schema-fields";
import { getEnvVariable } from "@stackframe/stack-shared/dist/utils/env";
import { StatusError } from "@stackframe/stack-shared/dist/utils/errors";
import { Json } from "@stackframe/stack-shared/dist/utils/json";
import { generateText, ModelMessage, stepCountIs, streamText } from "ai";
@ -30,18 +28,6 @@ export const POST = createSmartRouteHandler({
throw new StatusError(StatusError.BadRequest, `Invalid tool names in request.`);
}
const apiKey = getEnvVariable("STACK_OPENROUTER_API_KEY");
if (apiKey === "FORWARD_TO_PRODUCTION") {
const prodResponse = await forwardToProduction(mode, body);
return {
statusCode: prodResponse.status,
bodyType: "response" as const,
body: prodResponse,
};
}
const isAuthenticated = fullReq.auth != null;
const { quality, speed, systemPrompt: systemPromptId, tools: toolNames, messages, projectId } = body;

View File

@ -1,10 +1,11 @@
import { ALLOWED_MODEL_IDS } from "@/lib/ai/models";
import { handleApiRequest } from "@/route-handlers/smart-route-handler";
import { getEnvVariable } from "@stackframe/stack-shared/dist/utils/env";
import { StatusError } from "@stackframe/stack-shared/dist/utils/errors";
import { NextRequest } from "next/server";
const OPENROUTER_BASE_URL = "https://openrouter.ai/api";
const OPENROUTER_MODEL = "anthropic/claude-sonnet-4.6";
const OPENROUTER_DEFAULT_MODEL = "anthropic/claude-sonnet-4.6";
function sanitizeBody(raw: ArrayBuffer): Uint8Array {
const text = new TextDecoder().decode(raw);
@ -19,7 +20,9 @@ function sanitizeBody(raw: ArrayBuffer): Uint8Array {
throw new StatusError(400, "Request body must be a JSON object");
}
parsed.model = OPENROUTER_MODEL;
if (!parsed.model || !ALLOWED_MODEL_IDS.has(parsed.model)) {
parsed.model = OPENROUTER_DEFAULT_MODEL;
}
// OpenRouter limits metadata.user_id to 128 characters
if (parsed.metadata?.user_id && parsed.metadata.user_id.length > 128) {

View File

@ -1,17 +0,0 @@
import { type RequestBody } from "@/lib/ai/schema";
export async function forwardToProduction(
mode: "stream" | "generate",
body: RequestBody,
): Promise<Response> {
const productionUrl = `https://api.stack-auth.com/api/latest/ai/query/${mode}`;
const forwardHeaders = new Headers();
forwardHeaders.set("content-type", "application/json");
forwardHeaders.set("accept-encoding", "identity");
return await fetch(productionUrl, {
method: "POST",
headers: forwardHeaders,
body: JSON.stringify(body),
});
}

View File

@ -1,5 +1,5 @@
import { createOpenRouter } from "@openrouter/ai-sdk-provider";
import { getEnvVariable } from "@stackframe/stack-shared/dist/utils/env";
import { getNodeEnvironment } from "@stackframe/stack-shared/dist/utils/env";
export type ModelQuality = "dumb" | "smart" | "smartest";
export type ModelSpeed = "slow" | "fast";
@ -48,9 +48,24 @@ const MODEL_SELECTION_MATRIX: Record<
},
};
// All unique model IDs referenced in the selection matrix, plus sonnet as the proxy default
export const ALLOWED_MODEL_IDS: ReadonlySet<string> = new Set([
"anthropic/claude-sonnet-4.6",
...Object.values(MODEL_SELECTION_MATRIX).flatMap(quality =>
Object.values(quality).flatMap(speed =>
Object.values(speed).map(config => config.modelId)
)
),
]);
export function createOpenRouterProvider() {
const apiKey = getEnvVariable("STACK_OPENROUTER_API_KEY");
return createOpenRouter({ apiKey });
const baseURL = getNodeEnvironment() === "development"
? "http://localhost:8102/api/latest/integrations/ai-proxy/v1"
: "https://api.stack-auth.com/api/latest/integrations/ai-proxy/v1";
return createOpenRouter({
apiKey: "forwarded",
baseURL,
});
}
export function selectModel(

View File

@ -4,13 +4,13 @@ import { getNodeEnvironment } from "@stackframe/stack-shared/dist/utils/env";
/**
* Creates an MCP client connected to the Stack Auth documentation server.
*
* In development: connects to local docs server at http://localhost:8104
* In development: connects to local docs server at http://localhost:8126
* In production: connects to production docs server at https://mcp.stack-auth.com
*/
export async function createDocsTools() {
const mcpUrl =
getNodeEnvironment() === "development"
? new URL("/api/internal/mcp", "http://localhost:8104")
? new URL("/api/internal/mcp", "http://localhost:8126")
: new URL("/api/internal/mcp", "https://mcp.stack-auth.com");
const stackAuthMcp = await createMCPClient({

View File

@ -51,7 +51,7 @@
"db:migrate": "pnpm pre && pnpm run --filter=@stackframe/backend db:migrate",
"fern": "pnpm pre && pnpm run --filter=@stackframe/docs fern",
"dev:full": "pnpm pre && concurrently -k \"pnpm run generate-sdks:watch\" \"turbo run dev --concurrency 99999\"",
"dev": "pnpm pre && concurrently -k \"pnpm run generate-sdks:watch\" \"pnpm run generate-openapi-docs:watch\" \"turbo run dev --concurrency 99999 --filter=./apps/* --filter=@stackframe/docs-mintlify --filter=./packages/* --filter=./examples/demo \"",
"dev": "pnpm pre && concurrently -k \"pnpm run generate-sdks:watch\" \"pnpm run generate-openapi-docs:watch\" \"turbo run dev --concurrency 99999 --filter=./apps/* --filter=@stackframe/docs-mintlify --filter=@stackframe/stack-docs --filter=./packages/* --filter=./examples/demo \"",
"dev:tui": "pnpm pre && (trap 'kill 0' EXIT; pnpm run generate-sdks:watch & pnpm run generate-openapi-docs:watch & turbo run dev --ui tui --concurrency 99999 --filter=./apps/* --filter=@stackframe/stack-docs --filter=./packages/* --filter=./examples/demo)",
"dev:inspect": "pnpm pre && STACK_BACKEND_DEV_EXTRA_ARGS=\"--inspect\" pnpm run dev",
"dev:profile": "pnpm pre && STACK_BACKEND_DEV_EXTRA_ARGS=\"--experimental-cpu-prof\" pnpm run dev",