From 0621ad2032708b2ad64eb163fb6c7d83f3db0afa Mon Sep 17 00:00:00 2001 From: BilalG1 Date: Sun, 19 Apr 2026 22:57:38 -0700 Subject: [PATCH 1/3] ai proxy fix (#1343) ## Summary by CodeRabbit * **Refactor** * Request sanitization now includes an extra proxy-specific preprocessing step for safer AI proxying. * **New Features** * Initialization prompts centralized into a shared helper, with a web-specific prompt variant. * Authenticated requests can optionally route via a provided external API key to access alternate models. * **Chores** * Added and exposed a preprocessing hook with a default no-op implementation. --- .../app/api/latest/ai/query/[mode]/route.ts | 6 +- .../ai-proxy/[[...path]]/route.ts | 5 + apps/backend/src/lib/ai/models.ts | 11 +- .../backend/src/lib/ai/proxy-preprocessing.ts | 8 ++ apps/backend/src/private/implementation | 2 +- .../private/implementation-fallback/index.ts | 3 + apps/backend/src/private/index.ts | 2 +- packages/stack-cli/src/lib/init-prompt.ts | 127 +----------------- .../stack-shared/src/helpers/init-prompt.ts | 126 +++++++++++++++++ 9 files changed, 159 insertions(+), 131 deletions(-) create mode 100644 apps/backend/src/lib/ai/proxy-preprocessing.ts create mode 100644 packages/stack-shared/src/helpers/init-prompt.ts diff --git a/apps/backend/src/app/api/latest/ai/query/[mode]/route.ts b/apps/backend/src/app/api/latest/ai/query/[mode]/route.ts index 959a859d9..7c94fd035 100644 --- a/apps/backend/src/app/api/latest/ai/query/[mode]/route.ts +++ b/apps/backend/src/app/api/latest/ai/query/[mode]/route.ts @@ -11,6 +11,7 @@ import { createSmartRouteHandler } from "@/route-handlers/smart-route-handler"; import { runAsynchronouslyAndWaitUntil } from "@/utils/background-tasks"; import { validateImageAttachments } from "@stackframe/stack-shared/dist/ai/image-limits"; import { yupMixed, yupObject, yupString } from "@stackframe/stack-shared/dist/schema-fields"; +import { getEnvVariable } from "@stackframe/stack-shared/dist/utils/env"; import { StatusError } from "@stackframe/stack-shared/dist/utils/errors"; import { Json } from "@stackframe/stack-shared/dist/utils/json"; import { generateText, ModelMessage, stepCountIs, streamText } from "ai"; @@ -55,7 +56,10 @@ export const POST = createSmartRouteHandler({ throw new StatusError(StatusError.BadRequest, imageValidationResult.reason); } - const model = selectModel(quality, speed, isAuthenticated); + const authenticatedApiKey = isAuthenticated + ? getEnvVariable("STACK_OPENROUTER_AUTHENTICATED_API_KEY", "") + : ""; + const model = selectModel(quality, speed, isAuthenticated, authenticatedApiKey || undefined); const isDocsOrSearch = systemPromptId === "docs-ask-ai" || systemPromptId === "command-center-ask-ai"; let systemPrompt = getFullSystemPrompt(systemPromptId); if (isDocsOrSearch) { diff --git a/apps/backend/src/app/api/latest/integrations/ai-proxy/[[...path]]/route.ts b/apps/backend/src/app/api/latest/integrations/ai-proxy/[[...path]]/route.ts index 9a8205dd9..a45d49cc6 100644 --- a/apps/backend/src/app/api/latest/integrations/ai-proxy/[[...path]]/route.ts +++ b/apps/backend/src/app/api/latest/integrations/ai-proxy/[[...path]]/route.ts @@ -1,4 +1,5 @@ import { ALLOWED_MODEL_IDS } from "@/lib/ai/models"; +import { preprocessProxyBody } from "@/private"; import { handleApiRequest } from "@/route-handlers/smart-route-handler"; import { getEnvVariable } from "@stackframe/stack-shared/dist/utils/env"; import { StatusError } from "@stackframe/stack-shared/dist/utils/errors"; @@ -30,6 +31,10 @@ function sanitizeBody(raw: ArrayBuffer): Uint8Array { parsed.metadata.user_id = parsed.metadata.user_id.slice(0, 128); } + parsed = preprocessProxyBody({ + parsedBody: parsed, + }); + return new TextEncoder().encode(JSON.stringify(parsed)); } diff --git a/apps/backend/src/lib/ai/models.ts b/apps/backend/src/lib/ai/models.ts index 71693d6d7..0ab743461 100644 --- a/apps/backend/src/lib/ai/models.ts +++ b/apps/backend/src/lib/ai/models.ts @@ -69,15 +69,22 @@ export function createOpenRouterProvider() { }); } +export function createDirectOpenRouterProvider(apiKey: string) { + return createOpenRouter({ apiKey }); +} + export function selectModel( quality: ModelQuality, speed: ModelSpeed, - isAuthenticated: boolean + isAuthenticated: boolean, + directApiKey?: string, ) { const config = MODEL_SELECTION_MATRIX[quality][speed][isAuthenticated ? "authenticated" : "unauthenticated"]; - const openrouter = createOpenRouterProvider(); + const openrouter = directApiKey + ? createDirectOpenRouterProvider(directApiKey) + : createOpenRouterProvider(); const model = openrouter(config.modelId); return model; } diff --git a/apps/backend/src/lib/ai/proxy-preprocessing.ts b/apps/backend/src/lib/ai/proxy-preprocessing.ts new file mode 100644 index 000000000..3dc8945ec --- /dev/null +++ b/apps/backend/src/lib/ai/proxy-preprocessing.ts @@ -0,0 +1,8 @@ +/** + * Opaque preprocessing step applied to every parsed request body that + * flows through the AI proxy. The concrete behavior lives in the + * private implementation; the fallback is an identity function. + */ +export type AiProxyBodyProcessor = (input: { + parsedBody: Record, +}) => Record; diff --git a/apps/backend/src/private/implementation b/apps/backend/src/private/implementation index 576f383b6..73d5adbbc 160000 --- a/apps/backend/src/private/implementation +++ b/apps/backend/src/private/implementation @@ -1 +1 @@ -Subproject commit 576f383b69a9593a9cff8d755c64c810aeeae239 +Subproject commit 73d5adbbc1843fae71483b4143d44e97f00fee1b diff --git a/apps/backend/src/private/implementation-fallback/index.ts b/apps/backend/src/private/implementation-fallback/index.ts index 71e416ea5..69c6ddec9 100644 --- a/apps/backend/src/private/implementation-fallback/index.ts +++ b/apps/backend/src/private/implementation-fallback/index.ts @@ -1,3 +1,4 @@ +import { AiProxyBodyProcessor } from "@/lib/ai/proxy-preprocessing"; import { SignUpRiskEngine } from "@/lib/risk-scores"; import { createNeutralSignUpHeuristicFacts } from "@/lib/sign-up-heuristics"; @@ -9,3 +10,5 @@ export const signUpRiskEngine: SignUpRiskEngine = { }; }, }; + +export const preprocessProxyBody: AiProxyBodyProcessor = ({ parsedBody }) => parsedBody; diff --git a/apps/backend/src/private/index.ts b/apps/backend/src/private/index.ts index 371f3710c..159e7607c 100644 --- a/apps/backend/src/private/index.ts +++ b/apps/backend/src/private/index.ts @@ -1 +1 @@ -export { signUpRiskEngine } from "./implementation.generated"; +export { signUpRiskEngine, preprocessProxyBody } from "./implementation.generated"; diff --git a/packages/stack-cli/src/lib/init-prompt.ts b/packages/stack-cli/src/lib/init-prompt.ts index b4076e5dc..f4f5801dc 100644 --- a/packages/stack-cli/src/lib/init-prompt.ts +++ b/packages/stack-cli/src/lib/init-prompt.ts @@ -1,126 +1 @@ -// TODO: Use configPath in the prompt once local emulator is set up: -// Add "npx @stackframe/stack-cli emulator run --config-file ${configPath}" to project dev command -export const createInitPrompt = (web: boolean, configPath?: string) => `============================= -STACK AUTH SETUP INSTRUCTIONS -============================= - -These instructions describe how to set up Stack Auth. -${web ? ` -First of all, if you have access to a terminal, it is better to use the \`npx @stackframe/stack-cli init\`` : ""} - -Install mcp server from https://mcp.stack-auth.com if not already installed - -For reference, questions, or information on Stack Auth, you can query the docs on https://docs.stack-auth.com via curl or any tools that you have - -## Setup - -### 1) Install the package - -Run the install command using whatever package manager the project uses (npm, yarn, pnpm, bun): - -| Framework | Package | -|-----------|---------| -| Next.js | \`@stackframe/stack\` | -| React | \`@stackframe/react\` | -| Vanilla JS | \`@stackframe/js\` | - -### 2) Create the Stack apps - -Depending on whether you're on a client or a server, you will want to create stackClientApp or stackServerApp. Some environments, like Next.js, have both, so create both files. - -The stack client app has client-level permissions. It contains most of the useful methods and hooks for your client-side code. -The stack server app has full read and write access to all users. It requires STACK_SECRET_SERVER_KEY env variable and should only be used in secure context - -In Next.js, env vars are auto-detected (NEXT_PUBLIC_STACK_PROJECT_ID etc.), so the constructor needs no explicit config. For other frameworks, you must pass projectId explicitly using the framework's env var access method. Pass publishableClientKey only if your project is configured to require publishable client keys. - -The tokenStore should be "nextjs-cookie" for Next.js, or "cookie" for all other frameworks. - -Make sure to set redirectMethod on non next.js frameworks. For example for tanstack router import like so: -import { useNavigate } from '@tanstack/react-router' - -\`\`\`ts -// src/stack/client.ts -import { StackClientApp } from "@stackframe/stack"; // or "@stackframe/react" or "@stackframe/js" - -export const stackClientApp = new StackClientApp({ - // Next.js: omit projectId/publishableClientKey (auto-detected from NEXT_PUBLIC_ env vars) - // Other frameworks: pass projectId explicitly, and publishableClientKey only if required by your project. For Vite: - // projectId: import.meta.env.VITE_STACK_PROJECT_ID, - // publishableClientKey: import.meta.env.VITE_STACK_PUBLISHABLE_CLIENT_KEY, - tokenStore: "nextjs-cookie", // or "cookie" for non-Next.js, - // redirectMethod: { useNavigate } // or "window" -}); -\`\`\` - -If the framework has server-side support (e.g. Next.js), also create a server app: - -\`\`\`ts -// src/stack/server.ts -import "server-only"; -import { StackServerApp } from "@stackframe/stack"; -import { stackClientApp } from "./client"; - -export const stackServerApp = new StackServerApp({ - inheritsFrom: stackClientApp, -}); -\`\`\` - -### 3) Create the Stack handler (if available in framework) - -This sets up pages for sign in, sign up, password reset, etc. - -\`\`\`tsx -import { StackHandler } from "@stackframe/stack"; // Next.js -// import { StackHandler } from "@stackframe/react"; // React - -export default function Handler() { - return ; -} -\`\`\` - -### 4) Create a Suspense boundary - -Suspense is necessary for many stack auth hooks such as useUser to function. Add a loading component with a custom loading indicator for the current project. Don't add if one already exists - -For example: -\`\`\`tsx -//src/loading.tsx - -export default function Loading() { - return

Loading...

-} -\`\`\` - -### 5) Link environment variables - -This is only necessary if not using local emulator. Ensure these are ignored by git. - -Rename the env var keys in .env to match the framework's convention for client-exposed variables. For example, Vite requires VITE_ prefix, Next.js uses NEXT_PUBLIC_, etc. The values should stay the same — only rename the keys. - -The required variables are: -- Project ID (e.g. NEXT_PUBLIC_STACK_PROJECT_ID, VITE_STACK_PROJECT_ID, etc.) -- Secret server key: STACK_SECRET_SERVER_KEY (only for frameworks with server-side support, no prefix needed) - -The publishable client key (e.g. NEXT_PUBLIC_STACK_PUBLISHABLE_CLIENT_KEY, VITE_STACK_PUBLISHABLE_CLIENT_KEY, etc.) is only required if your project has publishable client keys enabled as a requirement. - -### 6) React only: Wrap the entire page in a Stack provider - -This is used for the useUser and useStackApp hooks. - -\`\`\`tsx -import { StackProvider, StackTheme } from "@stackframe/stack"; -import { stackClientApp } from "../stack/client"; // adjust relative path -\`\`\` - -Then wrap the body content: - -\`\`\`tsx -return ( - - - {children} - - -); -\`\`\` -`; +export { createInitPrompt } from "@stackframe/stack-shared/dist/helpers/init-prompt"; diff --git a/packages/stack-shared/src/helpers/init-prompt.ts b/packages/stack-shared/src/helpers/init-prompt.ts new file mode 100644 index 000000000..f1156f000 --- /dev/null +++ b/packages/stack-shared/src/helpers/init-prompt.ts @@ -0,0 +1,126 @@ +// TODO: Use configPath in the prompt once local emulator is set up: +// Add "npx @stackframe/stack-cli emulator run --config-file ${configPath}" to project dev command +export const createInitPrompt = (web: boolean, configPath?: string) => `============================= +STACK AUTH SETUP INSTRUCTIONS +============================= + +These instructions describe how to set up Stack Auth. +${web ? ` +First of all, if you have access to a terminal, it is better to use the \`npx @stackframe/stack-cli init\`` : ""} + +Install mcp server from https://mcp.stack-auth.com if not already installed + +For reference, questions, or information on Stack Auth, you can query the docs on https://docs.stack-auth.com via curl or any tools that you have + +## Setup + +### 1) Install the package + +Run the install command using whatever package manager the project uses (npm, yarn, pnpm, bun): + +| Framework | Package | +|-----------|---------| +| Next.js | \`@stackframe/stack\` | +| React | \`@stackframe/react\` | +| Vanilla JS | \`@stackframe/js\` | + +### 2) Create the Stack apps + +Depending on whether you're on a client or a server, you will want to create stackClientApp or stackServerApp. Some environments, like Next.js, have both, so create both files. + +The stack client app has client-level permissions. It contains most of the useful methods and hooks for your client-side code. +The stack server app has full read and write access to all users. It requires STACK_SECRET_SERVER_KEY env variable and should only be used in secure context + +In Next.js, env vars are auto-detected (NEXT_PUBLIC_STACK_PROJECT_ID etc.), so the constructor needs no explicit config. For other frameworks, you must pass projectId explicitly using the framework's env var access method. Pass publishableClientKey only if your project is configured to require publishable client keys. + +The tokenStore should be "nextjs-cookie" for Next.js, or "cookie" for all other frameworks. + +Make sure to set redirectMethod on non next.js frameworks. For example for tanstack router import like so: +import { useNavigate } from '@tanstack/react-router' + +\`\`\`ts +// src/stack/client.ts +import { StackClientApp } from "@stackframe/stack"; // or "@stackframe/react" or "@stackframe/js" + +export const stackClientApp = new StackClientApp({ + // Next.js: omit projectId/publishableClientKey (auto-detected from NEXT_PUBLIC_ env vars) + // Other frameworks: pass projectId explicitly, and publishableClientKey only if required by your project. For Vite: + // projectId: import.meta.env.VITE_STACK_PROJECT_ID, + // publishableClientKey: import.meta.env.VITE_STACK_PUBLISHABLE_CLIENT_KEY, + tokenStore: "nextjs-cookie", // or "cookie" for non-Next.js, + // redirectMethod: { useNavigate } // or "window" +}); +\`\`\` + +If the framework has server-side support (e.g. Next.js), also create a server app: + +\`\`\`ts +// src/stack/server.ts +import "server-only"; +import { StackServerApp } from "@stackframe/stack"; +import { stackClientApp } from "./client"; + +export const stackServerApp = new StackServerApp({ + inheritsFrom: stackClientApp, +}); +\`\`\` + +### 3) Create the Stack handler (if available in framework) + +This sets up pages for sign in, sign up, password reset, etc. + +\`\`\`tsx +import { StackHandler } from "@stackframe/stack"; // Next.js +// import { StackHandler } from "@stackframe/react"; // React + +export default function Handler() { + return ; +} +\`\`\` + +### 4) Create a Suspense boundary + +Suspense is necessary for many stack auth hooks such as useUser to function. Add a loading component with a custom loading indicator for the current project. Don't add if one already exists + +For example: +\`\`\`tsx +//src/loading.tsx + +export default function Loading() { + return

Loading...

+} +\`\`\` + +### 5) Link environment variables + +This is only necessary if not using local emulator. Ensure these are ignored by git. + +Rename the env var keys in .env to match the framework's convention for client-exposed variables. For example, Vite requires VITE_ prefix, Next.js uses NEXT_PUBLIC_, etc. The values should stay the same — only rename the keys. + +The required variables are: +- Project ID (e.g. NEXT_PUBLIC_STACK_PROJECT_ID, VITE_STACK_PROJECT_ID, etc.) +- Secret server key: STACK_SECRET_SERVER_KEY (only for frameworks with server-side support, no prefix needed) + +The publishable client key (e.g. NEXT_PUBLIC_STACK_PUBLISHABLE_CLIENT_KEY, VITE_STACK_PUBLISHABLE_CLIENT_KEY, etc.) is only required if your project has publishable client keys enabled as a requirement. + +### 6) React only: Wrap the entire page in a Stack provider + +This is used for the useUser and useStackApp hooks. + +\`\`\`tsx +import { StackProvider, StackTheme } from "@stackframe/stack"; +import { stackClientApp } from "../stack/client"; // adjust relative path +\`\`\` + +Then wrap the body content: + +\`\`\`tsx +return ( + + + {children} + + +); +\`\`\` +`; From 85ae4b1c9e69a791db4ea356ac7c6df2519c3dc5 Mon Sep 17 00:00:00 2001 From: BilalG1 Date: Sun, 19 Apr 2026 22:57:46 -0700 Subject: [PATCH 2/3] Fix ClickHouse OOM in MAU query + optimize /internal/metrics route (#1344) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Fixes the Sentry `StackAssertionError: Failed to load monthly active users for internal metrics` crash (ClickHouse OOM at the 7.2 GiB per-query cap) and applies two related optimizations to other queries in the same route while here. Adds a local benchmark harness that validates correctness and measures peak memory / duration before & after. ## Root cause (the original Sentry error) `loadMonthlyActiveUsers` was written as `SELECT user_id … GROUP BY user_id` and then counting in Node via a `Set`. On a large project that ships back millions of user_ids. Two failure modes stacked: 1. **Result materialization** — every distinct user_id had to be buffered in the server before streaming to Node (~20 MiB of result for 450k users; much more at real scale). 2. **`JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8')`** — the `toJSONString(data)` per-row re-serialization of the entire nested JSON column, billions of times, just to pull one boolean. Dominates bytes-read. Combined, on a single partition read from S3-backed MergeTree, this can exceed ClickHouse's 7.2 GiB per-query memory cap. That's exactly what the Sentry trace showed. ## Changes ### 1. Fix MAU query (`loadMonthlyActiveUsers`) Moved counting to the server with `uniqExact(sipHash64(normalized_user_id))` and pulled the JS-side normalization (`lower`, `trim`, `isUuid`) into SQL. Picked `sipHash64` after benchmarking 7 variants — it's exact (at <<2³² users) and halves the uniqExact hash-state vs. raw string keys. ### 2. Fix 1 — `JSONExtract(toJSONString(data), …)` → direct `CAST(data.is_anonymous, …)` Applied everywhere the pattern appeared in the metrics route: - `loadDailyActiveUsers` - the `analyticsUserJoin` subquery - the `nonAnonymousAnalyticsUserFilter` - `analyticsOverview:topRegion` - `analyticsOverview:online` Semantics preserved (`coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0)` matches `JSONExtract(…, 'UInt8')` behavior when the field is missing). ### 3. Fix 3 — server-aggregate the split queries `loadDailyActiveUsersSplit` and `loadDailyActiveTeamsSplit` used to ship 1.2M+ `(day, user_id)` rows back to Node just so the JS could bucket them into new / retained / reactivated. Rewrote both as one CTE-style query that returns 31 rows (one per day in the 30-day window) with the counts precomputed. **Minor semantic shift** (documented inline in `route.tsx`): \"new\" is now based on the user's first-ever `\$token-refresh` event rather than their Postgres `signedUpAt`. Agrees for users who log in immediately after sign-up (the common case). Disagrees for the rare edge case of an account that existed pre-window but never generated a `\$token-refresh` until now — old code classified as \"reactivated,\" new code classifies as \"new.\" Judged acceptable; can be revisited. Postgres round-trips for `ProjectUser.signedUpAt` / `Team.createdAt` are no longer needed for the split, and the 76 MiB-ish wire ship is gone. ### 4. Benchmark harness (`apps/backend/scripts/benchmark-internal-metrics.ts`) Local-only tool. Three modes: - **MAU equivalence matrix** — 13 edge cases (empty, dedup, anonymous filter, window boundary, null user_id, non-UUID user_id, case variation, project isolation, missing/null `is_anonymous`, wrong event_type). Asserts OLD pipeline and NEW query return the **same set** of users, not just the same count. - **MAU perf** — OLD vs NEW plus 6 other candidate variants (inline regex, UUID keys, sipHash64, HLL sketches), reads `memory_usage` / `read_rows` / `result_bytes` from `system.query_log` for each, prints a ranked table. - **Full-route benchmark** (`BENCH_ROUTE_QUERIES=1`) — runs every ClickHouse query in `/internal/metrics` in three stages (BEFORE, AFTER, candidate OPTIMIZED) against the same seed and prints per-query deltas plus endpoint-level totals. Seeds under a synthetic `project_id` so real data is never touched; cleans up on exit via `ALTER TABLE … DELETE`. ## Benchmark results ### MAU query alone Ran at two scales; set-equality verified (new query identifies the same individual users, not just the same count). | seed | MAU | peak memory (old → new) | bytes read | duration | |---|---|---|---|---| | 500k events | 89,939 | 158.7 MiB → 46.7 MiB (**3.4×**, −70%) | 175.7 MiB → 63.0 MiB (2.8×) | 483 ms → 76 ms (**6.4×**) | | 2.5M events | 449,990 | 439.2 MiB → 281.4 MiB (1.56×, −36%) | 865.0 MiB → 310.9 MiB (2.8×) | 783 ms → 126 ms (**6.2×**) | MAU variant bake-off at 2.5M events (all exact, all set-equal to OLD): | variant | memory | duration | notes | |---|---|---|---| | v0_old (baseline) | 440 MiB | 567 ms | — | | v1_uniqExact_string | 284 MiB | 110 ms | naive fix | | v3_uniqExact_toUUID | 244 MiB | 153 ms | UUID keys, slower per-row | | **v4_uniqExact_sipHash64** | **125 MiB** | **95 ms** | **shipped** | | v5_uniq (HLL) ~approx | 30 MiB | 86 ms | −0.25% error | | v6_uniqCombined ~approx | 31 MiB | 67 ms | −0.15% error | ### Full `/internal/metrics` route (2.7M events, 300k users + page-views + clicks + teams) Ranked by BEFORE peak memory: | query | mem BEFORE | mem AFTER | Δ mem | dur BEFORE | dur AFTER | Δ dur | |---|---|---|---|---|---|---| | analyticsOverview:topReferrers | 588.1 MiB | 411.1 MiB | 1.43× | 1833 ms | 110 ms | **16.66×** | | analyticsOverview:totalVisitors | 584.3 MiB | 403.5 MiB | 1.45× | 1829 ms | 121 ms | 15.12× | | analyticsOverview:dailyEvents | 584.1 MiB | 403.7 MiB | 1.45× | 1897 ms | 140 ms | 13.55× | | loadUsersByCountry | 393.1 MiB | 385.4 MiB | ≈same | 74 ms | 80 ms | ≈same | | loadDailyActiveUsersSplit | 363.4 MiB | 396.8 MiB | *+9%* | 1966 ms | 356 ms | 5.52× | | analyticsOverview:topRegion | 269.9 MiB | 106.4 MiB | 2.54× | 1602 ms | 65 ms | 24.65× | | loadDailyActiveUsers | 268.3 MiB | 84.0 MiB | 3.19× | 1111 ms | 44 ms | 25.25× | | loadDailyActiveTeamsSplit | 59.6 MiB | 78.1 MiB | *+31%* | 70 ms | 123 ms | *+76%* | | loadMonthlyActiveUsers | 54.9 MiB | 54.9 MiB | ≈same | 68 ms | 56 ms | ≈same | | analyticsOverview:online | 18.4 MiB | 5.8 MiB | 3.17× | 58 ms | 4 ms | 14.50× | **Endpoint-level totals** | metric | BEFORE | AFTER | Δ | |---|---|---|---| | Sum peak ClickHouse memory | 3.11 GiB | 2.28 GiB | **−27%** | | **Max query duration** (endpoint wall-clock floor) | **1966 ms** | **356 ms** | **−82%** (5.5×) | | Sum query duration (total CPU) | 10508 ms | 1099 ms | **−90%** (9.6×) | | Bytes read | 10.70 GiB | 4.55 GiB | −57% | | Bytes shipped to Node | 94.8 MiB | 44.2 KiB | **−99.95%** | Both split queries show a small memory *regression* at this seed size (the new server-side window-function + self-join has its own state cost that's near break-even with \"materialize + ship\" at 300k users); at prod scale the 76 MiB-ship saving dominates. Duration is unambiguously better. ## Why we don't need to drop the `analyticsUserJoin` in this PR The benchmark includes an OPTIMIZED stage that drops the LEFT JOIN and trusts `e.data.is_anonymous` directly, which would shave another **1.2 GiB / 1.9× duration** off the endpoint. **But we can't ship that here** — an audit of the client tracker (`packages/js/src/lib/stack-app/apps/implementations/event-tracker.ts`) confirmed `is_anonymous` is never set on client-emitted `$page-view` / `$click` events. The JOIN is currently load-bearing. A follow-up PR will enrich `is_anonymous` at the batch ingest endpoint using `auth.user.is_anonymous`; after one metrics-window cycle (~30 days) the JOIN can be dropped. ## Follow-up work (out of scope for this PR) - **Batch-endpoint enrichment** + drop the analytics-overview LEFT JOIN (est. further −53% endpoint memory, −46% duration per the benchmark). - **Teams-split hash-variant count mismatch** — `sipHash64(team_id)` variant of the teams split shows a count discrepancy vs. the string-keyed version in the benchmark. Not blocking since teams-split is only #8 by memory; needs a root-cause pass before shipping that particular optimization. - **`loadUsersByCountry` window bound** — currently scans every `$token-refresh` event ever for the tenancy (no time filter). Bounding to 30 days would bound memory growth with project age, but changes semantics (\"country of latest login ever\" → \"in last 30 days\"). Deferred because it's product-facing. ## Snapshot changes in `internal-metrics.test.ts.snap` The `should return metrics data with users` test signs in 10 users today, then deletes one of them mid-test. Two small snapshot values change on today's date; both are just a reclassification of that single deleted user — the total (10 active users) is unchanged. - **`daily_active_users_split.new[today]`: 9 → 10** All 10 users really did sign in for the first time today. The old code only counted 9 because the deleted user's Postgres row was gone by the time the metrics query ran, so the old classifier couldn't see they were created today. The new query looks at ClickHouse events directly, sees the deleted user's first event was today, and counts them as new like everyone else. - **`daily_active_users_split.reactivated[today]`: 1 → 0** No user was "reactivated" today — nobody was active on an earlier day and came back. The old "1" was the deleted user falling into this bucket by default (the old classifier had no other rule that fit them). The new code correctly reports zero. Totals match either way (9 + 1 = 10 + 0). We're moving one deleted user out of the "returning visitor" bucket and into the "brand-new user" bucket, which is what they actually were. ## Test plan - [x] `pnpm typecheck` and `pnpm lint` pass on the backend package - [x] MAU equivalence matrix: 13/13 cases return the same set of users (not just the same count) between OLD and NEW pipelines - [x] Set-equality verified at 500k-MAU perf scale - [x] Full-route benchmark confirms the expected memory / duration improvements - [ ] Sanity-check the dashboard rendering after deploy (split charts, MAU counter, analytics overview) - [ ] Monitor Sentry for the assertion error — should drop to zero ## Summary by CodeRabbit * **Performance Improvements** * Monthly and daily active metrics are now computed entirely server-side for faster queries and reduced client-side processing. * **Bug Fixes** * More consistent handling of anonymous/missing IDs and stricter ID filtering to improve accuracy across edge cases. * **Tests** * Added a comprehensive benchmark and validation harness to measure query performance and verify result equivalence across variants. --- .../scripts/benchmark-internal-metrics.ts | 1843 +++++++++++++++++ .../app/api/latest/internal/metrics/route.tsx | 265 +-- .../internal-metrics.test.ts.snap | 4 +- 3 files changed, 1948 insertions(+), 164 deletions(-) create mode 100644 apps/backend/scripts/benchmark-internal-metrics.ts diff --git a/apps/backend/scripts/benchmark-internal-metrics.ts b/apps/backend/scripts/benchmark-internal-metrics.ts new file mode 100644 index 000000000..605a98830 --- /dev/null +++ b/apps/backend/scripts/benchmark-internal-metrics.ts @@ -0,0 +1,1843 @@ +/** + * Local-only benchmark + equivalence harness for the ClickHouse queries in + * apps/backend/src/app/api/latest/internal/metrics/route.tsx. + * + * Three modes, selected via env flags (all run by default): + * + * 1. MAU equivalence matrix (default ON; set BENCH_SKIP_MATRIX=1 to skip) + * Small-data test cases for loadMonthlyActiveUsers. Asserts that the + * BEFORE (pre-fix) and AFTER (current) queries return the same MAU + * count AND the same set of individual users across 13 edge cases: + * empty, dedup, anonymous filter, window boundary, null user_id, + * non-UUID user_id, case variation, project isolation, etc. + * + * 2. MAU perf run (default ON; set BENCH_SKIP_PERF=1 to skip) + * Runs OLD vs NEW MAU query on the heavy seed. Reads + * memory_usage/read_rows/result_bytes from system.query_log and prints + * a comparison table plus all candidate variants (v1 uniqExact strings, + * v2 inline regex, v3 UUID keys, v4 sipHash64 [shipped], v5-v7 HLL + * sketches). Also includes a set-equality check so "same count, + * different users" can't slip through. + * + * 3. Full-route benchmark (set BENCH_ROUTE_QUERIES=1) + * Runs every ClickHouse query in the internal-metrics route + * (loadUsersByCountry, loadDailyActiveUsers, the splits, + * loadMonthlyActiveUsers, analyticsOverview:{dailyEvents, + * totalVisitors, topReferrers, topRegion, online}) in three stages: + * BEFORE (pre-fix), AFTER (current: fixes 1 + 3), and OPTIMIZED + * (further candidate opts not yet shipped — e.g. dropping the + * analyticsOverview LEFT JOIN, hashed split partition keys, + * loadUsersByCountry time window). Prints ranked per-query deltas and + * endpoint-level totals (sum peak memory, max duration). + * + * Seeds synthetic events under a unique project_id so real data is never + * touched; cleans up via ALTER TABLE ... DELETE on exit. + * + * Run: pnpm --filter @stackframe/backend run with-env:dev tsx scripts/benchmark-internal-metrics.ts + * Env knobs: + * BENCH_USERS (default 200_000) – distinct users in the perf seed + * BENCH_EVENTS_USER (default 5) – $token-refresh events per user + * BENCH_ANON_RATIO (default 0.1) – fraction flagged is_anonymous + * BENCH_BATCH (default 50_000) – insert batch size + * BENCH_SKIP_PERF=1 – skip the heavy MAU perf run + * BENCH_SKIP_MATRIX=1 – skip the equivalence matrix + * BENCH_ROUTE_QUERIES=1 – also run the full-route + * BEFORE/AFTER/OPTIMIZED suite + * BENCH_PAGE_VIEWS_USER (default 3) – $page-view events per user + * BENCH_CLICKS_USER (default 1) – $click events per user + * BENCH_TEAM_RATIO (default 0.3) – fraction of users with a team + */ + +import { getClickhouseAdminClient } from "@/lib/clickhouse"; +import { getEnvVariable } from "@stackframe/stack-shared/dist/utils/env"; +import { randomUUID } from "node:crypto"; + +const RUN_ID = randomUUID(); +const BENCH_PROJECT_ID = `bench-mau-${RUN_ID}`; +const PERF_BRANCH_ID = "perf"; + +const METRICS_WINDOW_DAYS = 30; +const METRICS_WINDOW_MS = METRICS_WINDOW_DAYS * 24 * 60 * 60 * 1000; +const ONE_DAY_MS = 24 * 60 * 60 * 1000; + +const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/; +const UUID_RE_CH = "^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$"; + +function envInt(name: string, fallback: number): number { + const v = getEnvVariable(name, ""); + if (v === "") return fallback; + const n = Number(v); + if (!Number.isFinite(n)) throw new Error(`bad ${name}: ${v}`); + return n; +} +function envFloat(name: string, fallback: number): number { + const v = getEnvVariable(name, ""); + if (v === "") return fallback; + const n = Number(v); + if (!Number.isFinite(n)) throw new Error(`bad ${name}: ${v}`); + return n; +} +function envBool(name: string): boolean { + const v = getEnvVariable(name, ""); + return v === "1" || v === "true"; +} + +function formatCh(date: Date): string { + return date.toISOString().slice(0, 19); +} + +function normalizeUuidFromEvent(value: string): string | null { + const n = value.trim().toLowerCase(); + return UUID_RE.test(n) ? n : null; +} + +type EventRow = { + event_type: string, + event_at: string, + data: Record, + project_id: string, + branch_id: string, + user_id: string | null, + team_id: string | null, +}; + +const OLD_QUERY = ` + SELECT assumeNotNull(user_id) AS user_id + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) + GROUP BY user_id +`; + +// Proposed query. Counts on the server (so we never ship N user_ids back to +// the client) and filters via direct JSON path access (skips the per-row +// toJSONString → JSONExtract round-trip that blows up memory in prod). +// Matches the old JS normalization: lower/trim + isUuid regex. +const NEW_QUERY = ` + SELECT uniqExact(normalized_user_id) AS mau + FROM ( + SELECT lower(trim(assumeNotNull(user_id))) AS normalized_user_id + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) + WHERE match(normalized_user_id, {uuidRe:String}) +`; + +type QueryParams = { + projectId: string, + branchId: string, + since: Date, + untilExclusive: Date, + includeAnonymous: boolean, +}; + +async function runOld(p: QueryParams): Promise<{ count: number, set: Set, queryId: string }> { + const client = getClickhouseAdminClient(); + const queryId = `bench-old-${randomUUID()}`; + const res = await client.query({ + query: OLD_QUERY, + query_params: { + projectId: p.projectId, + branchId: p.branchId, + since: formatCh(p.since), + untilExclusive: formatCh(p.untilExclusive), + includeAnonymous: p.includeAnonymous ? 1 : 0, + }, + query_id: queryId, + format: "JSONEachRow", + }); + const rows = (await res.json()) as { user_id: string }[]; + const set = new Set(); + for (const r of rows) { + const n = normalizeUuidFromEvent(r.user_id); + if (n != null) set.add(n); + } + return { count: set.size, set, queryId }; +} + +async function runNew(p: QueryParams): Promise<{ count: number, queryId: string }> { + const client = getClickhouseAdminClient(); + const queryId = `bench-new-${randomUUID()}`; + const res = await client.query({ + query: NEW_QUERY, + query_params: { + projectId: p.projectId, + branchId: p.branchId, + since: formatCh(p.since), + untilExclusive: formatCh(p.untilExclusive), + includeAnonymous: p.includeAnonymous ? 1 : 0, + uuidRe: UUID_RE_CH, + }, + query_id: queryId, + format: "JSONEachRow", + }); + const rows = (await res.json()) as { mau: string | number }[]; + return { count: Number(rows[0]?.mau ?? 0), queryId }; +} + +// Diagnostic-only variant of the NEW query that returns the set of distinct +// normalized user_ids instead of just the count. Used by the equivalence +// check to prove the OLD pipeline and the NEW pipeline would have counted +// the *same users*, not just the same number of users. +const NEW_QUERY_SET = ` + SELECT DISTINCT normalized_user_id + FROM ( + SELECT lower(trim(assumeNotNull(user_id))) AS normalized_user_id + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) + WHERE match(normalized_user_id, {uuidRe:String}) +`; + +async function runNewSet(p: QueryParams): Promise> { + const client = getClickhouseAdminClient(); + const res = await client.query({ + query: NEW_QUERY_SET, + query_params: { + projectId: p.projectId, + branchId: p.branchId, + since: formatCh(p.since), + untilExclusive: formatCh(p.untilExclusive), + includeAnonymous: p.includeAnonymous ? 1 : 0, + uuidRe: UUID_RE_CH, + }, + format: "JSONEachRow", + }); + const rows = (await res.json()) as { normalized_user_id: string }[]; + return new Set(rows.map((r) => r.normalized_user_id)); +} + +function setDiff(a: Set, b: Set): { onlyInA: string[], onlyInB: string[] } { + const onlyInA: string[] = []; + const onlyInB: string[] = []; + for (const x of a) if (!b.has(x)) onlyInA.push(x); + for (const x of b) if (!a.has(x)) onlyInB.push(x); + return { onlyInA, onlyInB }; +} + +// ── Alternate query variants explored for further memory/duration wins ────── +// Each variant returns a single row with `mau` (count). The equivalence check +// compares against the OLD pipeline's exact count. + +type Variant = { + name: string, + description: string, + approximate?: boolean, + sql: string, +}; + +const COMMON_FILTERS = ` + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) +`; + +const VARIANTS: Variant[] = [ + { + name: "v1_uniqExact_string", + description: "uniqExact on lower(trim(user_id)) string; regex filter in outer WHERE (current fix)", + sql: ` + SELECT uniqExact(normalized_user_id) AS mau + FROM ( + SELECT lower(trim(assumeNotNull(user_id))) AS normalized_user_id + FROM analytics_internal.events + ${COMMON_FILTERS} + ) + WHERE match(normalized_user_id, {uuidRe:String}) + `, + }, + { + name: "v2_uniqExact_inline", + description: "Same as v1 but regex + normalization folded into inner WHERE (no subquery)", + sql: ` + SELECT uniqExact(lower(trim(assumeNotNull(user_id)))) AS mau + FROM analytics_internal.events + ${COMMON_FILTERS} + AND match(lower(trim(assumeNotNull(user_id))), {uuidRe:String}) + `, + }, + { + name: "v3_uniqExact_toUUID", + description: "uniqExact on toUUIDOrNull(...) — 16-byte native UUID keys instead of 36-byte strings", + sql: ` + SELECT uniqExact(uid) AS mau + FROM ( + SELECT toUUIDOrNull(lower(trim(assumeNotNull(user_id)))) AS uid + FROM analytics_internal.events + ${COMMON_FILTERS} + ) + WHERE uid IS NOT NULL + AND match(toString(uid), {uuidRe:String}) + `, + }, + { + name: "v4_uniqExact_sipHash64", + description: "uniqExact on sipHash64(...) — 8-byte keys; collision prob negligible at <<2^32 users", + sql: ` + SELECT uniqExact(h) AS mau + FROM ( + SELECT sipHash64(lower(trim(assumeNotNull(user_id)))) AS h, + lower(trim(assumeNotNull(user_id))) AS normalized_user_id + FROM analytics_internal.events + ${COMMON_FILTERS} + ) + WHERE match(normalized_user_id, {uuidRe:String}) + `, + }, + { + name: "v5_uniq_hll", + description: "uniq() HyperLogLog — bounded ~16 KiB state, typical error ~0.5% (APPROXIMATE)", + approximate: true, + sql: ` + SELECT uniq(normalized_user_id) AS mau + FROM ( + SELECT lower(trim(assumeNotNull(user_id))) AS normalized_user_id + FROM analytics_internal.events + ${COMMON_FILTERS} + ) + WHERE match(normalized_user_id, {uuidRe:String}) + `, + }, + { + name: "v6_uniqCombined", + description: "uniqCombined(17) — exact for small N, HLL after threshold; ~96 KiB state (APPROXIMATE)", + approximate: true, + sql: ` + SELECT uniqCombined(17)(normalized_user_id) AS mau + FROM ( + SELECT lower(trim(assumeNotNull(user_id))) AS normalized_user_id + FROM analytics_internal.events + ${COMMON_FILTERS} + ) + WHERE match(normalized_user_id, {uuidRe:String}) + `, + }, + { + name: "v7_uniqHLL12", + description: "uniqHLL12 — ~4 KiB state, typical error ~2% (APPROXIMATE)", + approximate: true, + sql: ` + SELECT uniqHLL12(normalized_user_id) AS mau + FROM ( + SELECT lower(trim(assumeNotNull(user_id))) AS normalized_user_id + FROM analytics_internal.events + ${COMMON_FILTERS} + ) + WHERE match(normalized_user_id, {uuidRe:String}) + `, + }, +]; + +// ── Route-wide query census ───────────────────────────────────────────────── +// Every ClickHouse query from apps/backend/src/app/api/latest/internal/metrics/route.tsx, +// captured verbatim so we can measure the full shape of the endpoint. + +type RouteQuery = { + name: string, + desc: string, + sql: string, + extraParams?: (now: Date, untilExclusive: Date) => Record, +}; + +const ANALYTICS_USER_JOIN = ` + LEFT JOIN ( + SELECT + user_id, + argMax(JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8'), event_at) AS latest_is_anonymous + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at < {untilExclusive:DateTime} + GROUP BY user_id + ) AS token_refresh_users + ON e.user_id = token_refresh_users.user_id +`; +const NON_ANON_FILTER = "({includeAnonymous:UInt8} = 1 OR coalesce(JSONExtract(toJSONString(e.data), 'is_anonymous', 'Nullable(UInt8)'), token_refresh_users.latest_is_anonymous, 0) = 0)"; + +// Same joins/filters after fix 1 (direct CAST instead of JSONExtract(toJSONString(...))) +const ANALYTICS_USER_JOIN_AFTER = ` + LEFT JOIN ( + SELECT + user_id, + argMax(coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0), event_at) AS latest_is_anonymous + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at < {untilExclusive:DateTime} + GROUP BY user_id + ) AS token_refresh_users + ON e.user_id = token_refresh_users.user_id +`; +const NON_ANON_FILTER_AFTER = "({includeAnonymous:UInt8} = 1 OR coalesce(CAST(e.data.is_anonymous, 'Nullable(UInt8)'), token_refresh_users.latest_is_anonymous, 0) = 0)"; + +const ROUTE_QUERIES_BEFORE: RouteQuery[] = [ + { + name: "loadUsersByCountry", + desc: "argMax country per user over all $token-refresh events (no window)", + sql: ` + SELECT + country_code, + count() AS userCount + FROM ( + SELECT + user_id, + argMax(cc, event_at) AS country_code + FROM ( + SELECT + user_id, + event_at, + CAST(data.ip_info.country_code, 'Nullable(String)') AS cc, + CAST(data.is_anonymous, 'UInt8') AS is_anonymous + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + ) + WHERE cc IS NOT NULL + AND ({includeAnonymous:UInt8} = 1 OR is_anonymous = 0) + GROUP BY user_id + ) + WHERE country_code IS NOT NULL + GROUP BY country_code + ORDER BY userCount DESC + `, + }, + { + name: "loadDailyActiveUsers", + desc: "DAU per day over 30d (uniqExact on raw user_id)", + sql: ` + SELECT + toDate(event_at) AS day, + uniqExact(assumeNotNull(user_id)) AS dau + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) + GROUP BY day + ORDER BY day ASC + `, + }, + { + name: "loadDailyActiveUsersSplit", + desc: "All (day, user_id) pairs — ships N rows back to Node for split processing", + sql: ` + SELECT + toDate(event_at) AS day, + assumeNotNull(user_id) AS user_id + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) + GROUP BY day, user_id + `, + }, + { + name: "loadDailyActiveTeamsSplit", + desc: "All (day, team_id) pairs — same shape as DAU split, team side", + sql: ` + SELECT + toDate(event_at) AS day, + assumeNotNull(team_id) AS team_id + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND team_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + GROUP BY day, team_id + `, + }, + { + name: "loadMonthlyActiveUsers (FIXED: v4)", + desc: "NEW: uniqExact(sipHash64(normalized)) — what we just shipped", + sql: ` + SELECT uniqExact(sipHash64(normalized_user_id)) AS mau + FROM ( + SELECT lower(trim(assumeNotNull(user_id))) AS normalized_user_id + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) + WHERE match(normalized_user_id, {uuidRe:String}) + `, + }, + { + name: "analyticsOverview:dailyEvents", + desc: "page-view+click daily counts, visitors/day — with LEFT JOIN to token_refresh_users", + sql: ` + SELECT + toDate(e.event_at) AS day, + countIf( + e.event_type = '$page-view' + AND e.user_id IS NOT NULL + AND ${NON_ANON_FILTER} + ) AS pv, + countIf( + e.event_type = '$click' + AND e.user_id IS NOT NULL + AND ${NON_ANON_FILTER} + ) AS cl, + uniqExactIf( + assumeNotNull(e.user_id), + e.event_type = '$page-view' + AND e.user_id IS NOT NULL + AND ${NON_ANON_FILTER} + ) AS visitors + FROM analytics_internal.events AS e + ${ANALYTICS_USER_JOIN} + WHERE e.event_type IN ('$page-view', '$click') + AND e.project_id = {projectId:String} + AND e.branch_id = {branchId:String} + AND e.event_at >= {since:DateTime} + AND e.event_at < {untilExclusive:DateTime} + GROUP BY day + ORDER BY day ASC + `, + }, + { + name: "analyticsOverview:totalVisitors", + desc: "uniq visitors over 30d (page-view + join)", + sql: ` + SELECT + uniqExactIf( + assumeNotNull(e.user_id), + e.user_id IS NOT NULL + AND ${NON_ANON_FILTER} + ) AS visitors + FROM analytics_internal.events AS e + ${ANALYTICS_USER_JOIN} + WHERE e.event_type = '$page-view' + AND e.project_id = {projectId:String} + AND e.branch_id = {branchId:String} + AND e.user_id IS NOT NULL + AND e.event_at >= {since:DateTime} + AND e.event_at < {untilExclusive:DateTime} + `, + }, + { + name: "analyticsOverview:topReferrers", + desc: "top 100 referrers by uniq visitors (GROUP BY referrer)", + sql: ` + SELECT + nullIf(CAST(e.data.referrer, 'String'), '') AS referrer, + uniqExactIf( + assumeNotNull(e.user_id), + e.user_id IS NOT NULL + AND ${NON_ANON_FILTER} + ) AS visitors + FROM analytics_internal.events AS e + ${ANALYTICS_USER_JOIN} + WHERE e.event_type = '$page-view' + AND e.project_id = {projectId:String} + AND e.branch_id = {branchId:String} + AND e.event_at >= {since:DateTime} + AND e.event_at < {untilExclusive:DateTime} + GROUP BY referrer + HAVING visitors > 0 + ORDER BY visitors DESC + LIMIT 100 + `, + }, + { + name: "analyticsOverview:topRegion", + desc: "top (country, region) by uniq visitors (LIMIT 1)", + sql: ` + SELECT + CAST(data.ip_info.country_code, 'Nullable(String)') AS country_code, + CAST(data.ip_info.region_code, 'Nullable(String)') AS region_code, + uniqExactIf( + assumeNotNull(user_id), + user_id IS NOT NULL + AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) + ) AS visitors + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + GROUP BY country_code, region_code + HAVING visitors > 0 + ORDER BY visitors DESC + LIMIT 1 + `, + }, + { + name: "analyticsOverview:online", + desc: "uniq users active in last 5 minutes", + sql: ` + SELECT + uniqExact(assumeNotNull(user_id)) AS online + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {onlineSince:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) + `, + extraParams: (now, untilExclusive) => ({ + onlineSince: formatCh(new Date(now.getTime() - 5 * 60 * 1000)), + untilExclusive: formatCh(untilExclusive), + }), + }, +]; + +// After fixes 1 + 3. Same names, updated SQL. Queries not touched by either +// fix (loadUsersByCountry, loadMonthlyActiveUsers-FIXED) reuse the BEFORE entry. +function splitSqlAfter(idCol: "user_id" | "team_id", withAnonFilter: boolean): string { + const anonFilter = withAnonFilter + ? "AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0)" + : ""; + return ` + SELECT + toString(w.day) AS day, + count() AS total_count, + countIf(f.first_date = w.day) AS new_count, + countIf(f.first_date < w.day AND w.prev_day = addDays(w.day, -1)) AS retained_count, + countIf(f.first_date < w.day AND (isNull(w.prev_day) OR w.prev_day < addDays(w.day, -1))) AS reactivated_count + FROM ( + SELECT + day, + ${idCol}, + lagInFrame(day, 1) OVER (PARTITION BY ${idCol} ORDER BY day) AS prev_day + FROM ( + SELECT DISTINCT + toDate(event_at) AS day, + assumeNotNull(${idCol}) AS ${idCol} + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND ${idCol} IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + ${anonFilter} + ) + ) AS w + LEFT JOIN ( + SELECT + assumeNotNull(${idCol}) AS ${idCol}, + toDate(min(event_at)) AS first_date + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND ${idCol} IS NOT NULL + AND event_at < {untilExclusive:DateTime} + ${anonFilter} + GROUP BY ${idCol} + ) AS f USING (${idCol}) + GROUP BY w.day + ORDER BY w.day ASC + `; +} + +const ROUTE_QUERIES_AFTER: RouteQuery[] = [ + // Unchanged by fix 1/3 (already uses CAST). + ROUTE_QUERIES_BEFORE[0], // loadUsersByCountry + { + name: "loadDailyActiveUsers", + desc: "DAU per day (fix 1: CAST instead of JSONExtract)", + sql: ` + SELECT + toDate(event_at) AS day, + uniqExact(assumeNotNull(user_id)) AS dau + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + GROUP BY day + ORDER BY day ASC + `, + }, + { + name: "loadDailyActiveUsersSplit", + desc: "fix 3: server-side new/retained/reactivated (no PG join, 31-row result)", + sql: splitSqlAfter("user_id", true), + }, + { + name: "loadDailyActiveTeamsSplit", + desc: "fix 3: server-side new/retained/reactivated (no PG join, 31-row result)", + sql: splitSqlAfter("team_id", false), + }, + // Unchanged — already v4 fixed. + ROUTE_QUERIES_BEFORE[4], // loadMonthlyActiveUsers + { + name: "analyticsOverview:dailyEvents", + desc: "fix 1: direct CAST in join + non-anon filter", + sql: ` + SELECT + toDate(e.event_at) AS day, + countIf( + e.event_type = '$page-view' + AND e.user_id IS NOT NULL + AND ${NON_ANON_FILTER_AFTER} + ) AS pv, + countIf( + e.event_type = '$click' + AND e.user_id IS NOT NULL + AND ${NON_ANON_FILTER_AFTER} + ) AS cl, + uniqExactIf( + assumeNotNull(e.user_id), + e.event_type = '$page-view' + AND e.user_id IS NOT NULL + AND ${NON_ANON_FILTER_AFTER} + ) AS visitors + FROM analytics_internal.events AS e + ${ANALYTICS_USER_JOIN_AFTER} + WHERE e.event_type IN ('$page-view', '$click') + AND e.project_id = {projectId:String} + AND e.branch_id = {branchId:String} + AND e.event_at >= {since:DateTime} + AND e.event_at < {untilExclusive:DateTime} + GROUP BY day + ORDER BY day ASC + `, + }, + { + name: "analyticsOverview:totalVisitors", + desc: "fix 1: direct CAST in join + non-anon filter", + sql: ` + SELECT + uniqExactIf( + assumeNotNull(e.user_id), + e.user_id IS NOT NULL + AND ${NON_ANON_FILTER_AFTER} + ) AS visitors + FROM analytics_internal.events AS e + ${ANALYTICS_USER_JOIN_AFTER} + WHERE e.event_type = '$page-view' + AND e.project_id = {projectId:String} + AND e.branch_id = {branchId:String} + AND e.user_id IS NOT NULL + AND e.event_at >= {since:DateTime} + AND e.event_at < {untilExclusive:DateTime} + `, + }, + { + name: "analyticsOverview:topReferrers", + desc: "fix 1: direct CAST in join + non-anon filter", + sql: ` + SELECT + nullIf(CAST(e.data.referrer, 'String'), '') AS referrer, + uniqExactIf( + assumeNotNull(e.user_id), + e.user_id IS NOT NULL + AND ${NON_ANON_FILTER_AFTER} + ) AS visitors + FROM analytics_internal.events AS e + ${ANALYTICS_USER_JOIN_AFTER} + WHERE e.event_type = '$page-view' + AND e.project_id = {projectId:String} + AND e.branch_id = {branchId:String} + AND e.event_at >= {since:DateTime} + AND e.event_at < {untilExclusive:DateTime} + GROUP BY referrer + HAVING visitors > 0 + ORDER BY visitors DESC + LIMIT 100 + `, + }, + { + name: "analyticsOverview:topRegion", + desc: "fix 1: direct CAST", + sql: ` + SELECT + CAST(data.ip_info.country_code, 'Nullable(String)') AS country_code, + CAST(data.ip_info.region_code, 'Nullable(String)') AS region_code, + uniqExactIf( + assumeNotNull(user_id), + user_id IS NOT NULL + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) AS visitors + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + GROUP BY country_code, region_code + HAVING visitors > 0 + ORDER BY visitors DESC + LIMIT 1 + `, + }, + { + name: "analyticsOverview:online", + desc: "fix 1: direct CAST", + sql: ` + SELECT + uniqExact(assumeNotNull(user_id)) AS online + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {onlineSince:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + `, + extraParams: (now, untilExclusive) => ({ + onlineSince: formatCh(new Date(now.getTime() - 5 * 60 * 1000)), + untilExclusive: formatCh(untilExclusive), + }), + }, +]; + +// More aggressive optimizations stacked on top of fixes 1+3. Each entry is +// paired with its BEFORE/AFTER counterpart by name (normalized) so the +// comparator can line them up. +const ROUTE_QUERIES_OPTIMIZED: RouteQuery[] = [ + { + name: "loadUsersByCountry", + desc: "opt: add 30-day event_at window (was unbounded)", + sql: ` + SELECT + country_code, + count() AS userCount + FROM ( + SELECT + user_id, + argMax(cc, event_at) AS country_code + FROM ( + SELECT + user_id, + event_at, + CAST(data.ip_info.country_code, 'Nullable(String)') AS cc, + CAST(data.is_anonymous, 'UInt8') AS is_anonymous + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + ) + WHERE cc IS NOT NULL + AND ({includeAnonymous:UInt8} = 1 OR is_anonymous = 0) + GROUP BY user_id + ) + WHERE country_code IS NOT NULL + GROUP BY country_code + ORDER BY userCount DESC + `, + }, + { + name: "loadDailyActiveUsersSplit", + desc: "opt: sipHash64(user_id) as window partition key + join key", + sql: ` + SELECT + toString(w.day) AS day, + count() AS total_count, + countIf(f.first_date = w.day) AS new_count, + countIf(f.first_date < w.day AND w.prev_day = addDays(w.day, -1)) AS retained_count, + countIf(f.first_date < w.day AND (isNull(w.prev_day) OR w.prev_day < addDays(w.day, -1))) AS reactivated_count + FROM ( + SELECT + day, + user_hash, + lagInFrame(day, 1) OVER (PARTITION BY user_hash ORDER BY day) AS prev_day + FROM ( + SELECT DISTINCT + toDate(event_at) AS day, + sipHash64(assumeNotNull(user_id)) AS user_hash + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) + ) AS w + LEFT JOIN ( + SELECT + sipHash64(assumeNotNull(user_id)) AS user_hash, + toDate(min(event_at)) AS first_date + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + GROUP BY user_hash + ) AS f USING (user_hash) + GROUP BY w.day + ORDER BY w.day ASC + `, + }, + { + name: "loadDailyActiveTeamsSplit", + desc: "opt: sipHash64(team_id) as window partition key + join key", + sql: ` + SELECT + toString(w.day) AS day, + count() AS total_count, + countIf(f.first_date = w.day) AS new_count, + countIf(f.first_date < w.day AND w.prev_day = addDays(w.day, -1)) AS retained_count, + countIf(f.first_date < w.day AND (isNull(w.prev_day) OR w.prev_day < addDays(w.day, -1))) AS reactivated_count + FROM ( + SELECT + day, + team_hash, + lagInFrame(day, 1) OVER (PARTITION BY team_hash ORDER BY day) AS prev_day + FROM ( + SELECT DISTINCT + toDate(event_at) AS day, + sipHash64(assumeNotNull(team_id)) AS team_hash + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND team_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + ) + ) AS w + LEFT JOIN ( + SELECT + sipHash64(assumeNotNull(team_id)) AS team_hash, + toDate(min(event_at)) AS first_date + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND team_id IS NOT NULL + AND event_at < {untilExclusive:DateTime} + GROUP BY team_hash + ) AS f USING (team_hash) + GROUP BY w.day + ORDER BY w.day ASC + `, + }, + { + name: "analyticsOverview:dailyEvents", + desc: "opt: drop LEFT JOIN, trust e.data.is_anonymous", + sql: ` + SELECT + toDate(e.event_at) AS day, + countIf( + e.event_type = '$page-view' + AND e.user_id IS NOT NULL + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(e.data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) AS pv, + countIf( + e.event_type = '$click' + AND e.user_id IS NOT NULL + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(e.data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) AS cl, + uniqExactIf( + assumeNotNull(e.user_id), + e.event_type = '$page-view' + AND e.user_id IS NOT NULL + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(e.data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) AS visitors + FROM analytics_internal.events AS e + WHERE e.event_type IN ('$page-view', '$click') + AND e.project_id = {projectId:String} + AND e.branch_id = {branchId:String} + AND e.event_at >= {since:DateTime} + AND e.event_at < {untilExclusive:DateTime} + GROUP BY day + ORDER BY day ASC + `, + }, + { + name: "analyticsOverview:totalVisitors", + desc: "opt: drop LEFT JOIN, trust e.data.is_anonymous", + sql: ` + SELECT + uniqExactIf( + assumeNotNull(e.user_id), + e.user_id IS NOT NULL + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(e.data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) AS visitors + FROM analytics_internal.events AS e + WHERE e.event_type = '$page-view' + AND e.project_id = {projectId:String} + AND e.branch_id = {branchId:String} + AND e.user_id IS NOT NULL + AND e.event_at >= {since:DateTime} + AND e.event_at < {untilExclusive:DateTime} + `, + }, + { + name: "analyticsOverview:topReferrers", + desc: "opt: drop LEFT JOIN, trust e.data.is_anonymous", + sql: ` + SELECT + nullIf(CAST(e.data.referrer, 'String'), '') AS referrer, + uniqExactIf( + assumeNotNull(e.user_id), + e.user_id IS NOT NULL + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(e.data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) AS visitors + FROM analytics_internal.events AS e + WHERE e.event_type = '$page-view' + AND e.project_id = {projectId:String} + AND e.branch_id = {branchId:String} + AND e.event_at >= {since:DateTime} + AND e.event_at < {untilExclusive:DateTime} + GROUP BY referrer + HAVING visitors > 0 + ORDER BY visitors DESC + LIMIT 100 + `, + }, +]; + +async function runRouteQuery(rq: RouteQuery, p: QueryParams, now: Date): Promise { + const client = getClickhouseAdminClient(); + const queryId = `bench-route-${rq.name.replace(/[^a-z0-9]/gi, "-")}-${randomUUID()}`; + const baseParams: Record = { + projectId: p.projectId, + branchId: p.branchId, + since: formatCh(p.since), + untilExclusive: formatCh(p.untilExclusive), + includeAnonymous: p.includeAnonymous ? 1 : 0, + uuidRe: UUID_RE_CH, + }; + const extra = rq.extraParams ? rq.extraParams(now, p.untilExclusive) : {}; + await client.query({ + query: rq.sql, + query_params: { ...baseParams, ...extra }, + query_id: queryId, + format: "JSONEachRow", + }).then((r) => r.json()); // drain stream + return queryId; +} + +async function benchmarkRouteQueries(now: Date): Promise { + const untilExclusive = new Date(Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate()) + ONE_DAY_MS); + const since = new Date(untilExclusive.getTime() - METRICS_WINDOW_MS); + const params: QueryParams = { + projectId: BENCH_PROJECT_ID, + branchId: PERF_BRANCH_ID, + since, + untilExclusive, + includeAnonymous: false, + }; + + console.log("\n── Route-wide query benchmark (every ClickHouse query in /internal/metrics) ──"); + + // Warm cache once. + await runRouteQuery(ROUTE_QUERIES_BEFORE[1], params, now); + + async function runAll(list: RouteQuery[]): Promise> { + const out = new Map(); + for (const rq of list) { + const qid = await runRouteQuery(rq, params, now); + out.set(rq.name, await readStats(qid)); + } + return out; + } + + // Also capture the actual row payload so we can check correctness for OPT + // variants (e.g., dropping the LEFT JOIN on analyticsOverview must not change counts). + async function runAndCollect(list: RouteQuery[]): Promise<{ stats: Map, payloads: Map }> { + const stats = new Map(); + const payloads = new Map(); + for (const rq of list) { + const client = getClickhouseAdminClient(); + const queryId = `bench-route-${rq.name.replace(/[^a-z0-9]/gi, "-")}-${randomUUID()}`; + const baseParams: Record = { + projectId: params.projectId, + branchId: params.branchId, + since: formatCh(params.since), + untilExclusive: formatCh(params.untilExclusive), + includeAnonymous: params.includeAnonymous ? 1 : 0, + uuidRe: UUID_RE_CH, + }; + const extra = rq.extraParams ? rq.extraParams(now, params.untilExclusive) : {}; + const res = await client.query({ + query: rq.sql, + query_params: { ...baseParams, ...extra }, + query_id: queryId, + format: "JSONEachRow", + }); + const rows = (await res.json()) as unknown[]; + payloads.set(rq.name, rows); + stats.set(rq.name, await readStats(queryId)); + } + return { stats, payloads }; + } + + const before = await runAndCollect(ROUTE_QUERIES_BEFORE); + const after = await runAndCollect(ROUTE_QUERIES_AFTER); + const opt = await runAndCollect(ROUTE_QUERIES_OPTIMIZED); + const beforeStats = before.stats; + const afterStats = after.stats; + + // Normalize query names for the comparison table. Some AFTER queries have + // the same name as BEFORE so they line up; loadMonthlyActiveUsers's BEFORE + // entry is labeled "(FIXED: v4)" — normalize to match. + const normalize = (n: string) => n.replace(/\s*\(FIXED.*\)$/, "").trim(); + + const padL = (s: string, n: number) => s.padEnd(n); + const padR = (s: string, n: number) => s.padStart(n); + const fmtDelta = (a: number, b: number): string => { + if (a === 0) return "—"; + const ratio = a / Math.max(b, 1); + if (ratio >= 1) return `${ratio.toFixed(2)}× less`; + return `${(1 / ratio).toFixed(2)}× more`; + }; + + console.log("\n Per-query comparison (BEFORE → AFTER):"); + console.log(" " + [ + padL("query", 36), + padR("mem BEFORE", 12), + padR("mem AFTER", 12), + padR("Δ mem", 10), + padR("dur BEFORE", 11), + padR("dur AFTER", 11), + padR("Δ dur", 10), + ].join(" ")); + console.log(" " + "─".repeat(130)); + + const pairs: { name: string, before: QueryStats, after: QueryStats }[] = []; + for (const rq of ROUTE_QUERIES_BEFORE) { + const name = normalize(rq.name); + const b = beforeStats.get(rq.name); + const a = afterStats.get(rq.name) ?? afterStats.get(name) ?? + [...afterStats.entries()].find(([k]) => normalize(k) === name)?.[1]; + if (!b || !a) continue; + pairs.push({ name, before: b, after: a }); + } + // Sort by BEFORE memory descending for readability. + pairs.sort((x, y) => y.before.memory_usage - x.before.memory_usage); + + for (const { name, before, after } of pairs) { + console.log(" " + [ + padL(name, 36), + padR(fmtBytes(before.memory_usage), 12), + padR(fmtBytes(after.memory_usage), 12), + padR(fmtDelta(before.memory_usage, after.memory_usage), 10), + padR(`${before.query_duration_ms} ms`, 11), + padR(`${after.query_duration_ms} ms`, 11), + padR(fmtDelta(before.query_duration_ms, after.query_duration_ms), 10), + ].join(" ")); + } + + const sumMemBefore = pairs.reduce((a, b) => a + b.before.memory_usage, 0); + const sumMemAfter = pairs.reduce((a, b) => a + b.after.memory_usage, 0); + const maxDurBefore = Math.max(...pairs.map((p) => p.before.query_duration_ms)); + const maxDurAfter = Math.max(...pairs.map((p) => p.after.query_duration_ms)); + const sumDurBefore = pairs.reduce((a, b) => a + b.before.query_duration_ms, 0); + const sumDurAfter = pairs.reduce((a, b) => a + b.after.query_duration_ms, 0); + const sumReadBefore = pairs.reduce((a, b) => a + b.before.read_bytes, 0); + const sumReadAfter = pairs.reduce((a, b) => a + b.after.read_bytes, 0); + const sumResultBefore = pairs.reduce((a, b) => a + b.before.result_bytes, 0); + const sumResultAfter = pairs.reduce((a, b) => a + b.after.result_bytes, 0); + + console.log("\n Totals (BEFORE → AFTER):"); + console.log(` Sum peak memory: ${fmtBytes(sumMemBefore)} → ${fmtBytes(sumMemAfter)} (${fmtDelta(sumMemBefore, sumMemAfter)})`); + console.log(` Max query dur: ${maxDurBefore} ms → ${maxDurAfter} ms (${fmtDelta(maxDurBefore, maxDurAfter)}) [endpoint wall-clock floor]`); + console.log(` Sum query dur: ${sumDurBefore} ms → ${sumDurAfter} ms (${fmtDelta(sumDurBefore, sumDurAfter)}) [total CPU work]`); + console.log(` Sum bytes read: ${fmtBytes(sumReadBefore)} → ${fmtBytes(sumReadAfter)} (${fmtDelta(sumReadBefore, sumReadAfter)})`); + console.log(` Sum result ship: ${fmtBytes(sumResultBefore)} → ${fmtBytes(sumResultAfter)} (${fmtDelta(sumResultBefore, sumResultAfter)})`); + + // ── AFTER vs OPTIMIZED (additional peak-memory work) ─────────────────────── + console.log("\n AFTER vs OPTIMIZED (stacked on top of fixes 1+3):"); + console.log(" " + [ + padL("query", 36), + padR("mem AFTER", 12), + padR("mem OPT", 12), + padR("Δ mem", 12), + padR("dur AFTER", 11), + padR("dur OPT", 11), + padR("Δ dur", 10), + padL("counts=", 10), + ].join(" ")); + console.log(" " + "─".repeat(140)); + + type OptRow = { name: string, after: QueryStats, optStats: QueryStats, countsMatch: boolean | null }; + const optRows: OptRow[] = []; + for (const rq of ROUTE_QUERIES_OPTIMIZED) { + const optStats = opt.stats.get(rq.name); + const afterS = after.stats.get(rq.name); + const optPayload = opt.payloads.get(rq.name); + const afterPayload = after.payloads.get(rq.name); + if (!optStats || !afterS || !optPayload || !afterPayload) continue; + // Deep-equal JSON of both sets (ordered matters for top-N, fine otherwise). + const countsMatch = JSON.stringify(optPayload) === JSON.stringify(afterPayload); + optRows.push({ name: rq.name, after: afterS, optStats, countsMatch }); + } + optRows.sort((a, b) => b.after.memory_usage - a.after.memory_usage); + for (const r of optRows) { + console.log(" " + [ + padL(r.name, 36), + padR(fmtBytes(r.after.memory_usage), 12), + padR(fmtBytes(r.optStats.memory_usage), 12), + padR(fmtDelta(r.after.memory_usage, r.optStats.memory_usage), 12), + padR(`${r.after.query_duration_ms} ms`, 11), + padR(`${r.optStats.query_duration_ms} ms`, 11), + padR(fmtDelta(r.after.query_duration_ms, r.optStats.query_duration_ms), 10), + padL(r.countsMatch ? "yes" : "NO", 10), + ].join(" ")); + } + + // Totals if we stack OPTIMIZED on top (using OPT for queries that have an + // OPT variant, AFTER for queries that don't). + const optByName = new Map(ROUTE_QUERIES_OPTIMIZED.map((q) => [q.name, q])); + let sumMemStacked = 0; + let maxDurStacked = 0; + let sumDurStacked = 0; + for (const rq of ROUTE_QUERIES_AFTER) { + const nm = rq.name; + const optHasIt = optByName.has(nm); + const s = optHasIt ? opt.stats.get(nm) : after.stats.get(nm); + if (!s) continue; + sumMemStacked += s.memory_usage; + sumDurStacked += s.query_duration_ms; + maxDurStacked = Math.max(maxDurStacked, s.query_duration_ms); + } + console.log("\n Totals (AFTER → OPTIMIZED-stacked):"); + console.log(` Sum peak memory: ${fmtBytes(sumMemAfter)} → ${fmtBytes(sumMemStacked)} (${fmtDelta(sumMemAfter, sumMemStacked)})`); + console.log(` Max query dur: ${maxDurAfter} ms → ${maxDurStacked} ms (${fmtDelta(maxDurAfter, maxDurStacked)})`); + console.log(` Sum query dur: ${sumDurAfter} ms → ${sumDurStacked} ms (${fmtDelta(sumDurAfter, sumDurStacked)})`); +} + +async function runVariant(v: Variant, p: QueryParams): Promise<{ count: number, queryId: string }> { + const client = getClickhouseAdminClient(); + const queryId = `bench-${v.name}-${randomUUID()}`; + const res = await client.query({ + query: v.sql, + query_params: { + projectId: p.projectId, + branchId: p.branchId, + since: formatCh(p.since), + untilExclusive: formatCh(p.untilExclusive), + includeAnonymous: p.includeAnonymous ? 1 : 0, + uuidRe: UUID_RE_CH, + }, + query_id: queryId, + format: "JSONEachRow", + }); + const rows = (await res.json()) as { mau: string | number }[]; + return { count: Number(rows[0]?.mau ?? 0), queryId }; +} + +type QueryStats = { + memory_usage: number, + read_rows: number, + read_bytes: number, + result_rows: number, + result_bytes: number, + query_duration_ms: number, +}; + +async function readStats(queryId: string): Promise { + const client = getClickhouseAdminClient(); + await client.command({ query: "SYSTEM FLUSH LOGS" }); + const delays = [100, 200, 400, 800, 1600]; + for (let i = 0; i <= delays.length; i++) { + const res = await client.query({ + query: ` + SELECT + toUInt64(memory_usage) AS memory_usage, + toUInt64(read_rows) AS read_rows, + toUInt64(read_bytes) AS read_bytes, + toUInt64(result_rows) AS result_rows, + toUInt64(result_bytes) AS result_bytes, + toUInt64(query_duration_ms) AS query_duration_ms + FROM system.query_log + WHERE query_id = {qid:String} AND type = 'QueryFinish' + ORDER BY event_time DESC + LIMIT 1 + `, + query_params: { qid: queryId }, + format: "JSONEachRow", + }); + const rows = (await res.json()) as Array>; + if (rows.length === 1) { + const r = rows[0]; + return { + memory_usage: Number(r.memory_usage), + read_rows: Number(r.read_rows), + read_bytes: Number(r.read_bytes), + result_rows: Number(r.result_rows), + result_bytes: Number(r.result_bytes), + query_duration_ms: Number(r.query_duration_ms), + }; + } + if (i < delays.length) await new Promise((r) => setTimeout(r, delays[i])); + } + throw new Error(`no query_log row for ${queryId}`); +} + +async function seed(rows: EventRow[], batch = envInt("BENCH_BATCH", 50_000)): Promise { + const client = getClickhouseAdminClient(); + for (let i = 0; i < rows.length; i += batch) { + const chunk = rows.slice(i, i + batch); + await client.insert({ + table: "analytics_internal.events", + values: chunk, + format: "JSONEachRow", + clickhouse_settings: { date_time_input_format: "best_effort" }, + }); + } +} + +async function cleanup(): Promise { + const client = getClickhouseAdminClient(); + await client.command({ + query: `ALTER TABLE analytics_internal.events DELETE WHERE project_id = {p:String}`, + query_params: { p: BENCH_PROJECT_ID }, + // Block until the mutation is applied so the script exits clean. + clickhouse_settings: { mutations_sync: "2" }, + }); +} + +// ── Edge-case matrix ───────────────────────────────────────────────────────── + +type Case = { + name: string, + branchId: string, + includeAnonymous: boolean, + expected: number, + buildEvents: (windowStart: Date, windowEnd: Date) => EventRow[], +}; + +function mkUuid(): string { + // randomUUID is v4, matches isUuid regex. + return randomUUID(); +} + +function mkEvent(opts: { + branchId: string, + at: Date, + userId: string | null, + isAnonymous?: boolean | null, + eventType?: string, + projectId?: string, + teamId?: string | null, + extraData?: Record, +}): EventRow { + const eventType = opts.eventType ?? "$token-refresh"; + let data: Record; + if (eventType === "$token-refresh") { + data = { + refresh_token_id: mkUuid(), + ip_info: null, + }; + if (opts.isAnonymous !== undefined) data.is_anonymous = opts.isAnonymous; + } else { + data = {}; + if (opts.isAnonymous !== undefined) data.is_anonymous = opts.isAnonymous; + } + if (opts.extraData) Object.assign(data, opts.extraData); + return { + event_type: eventType, + event_at: formatCh(opts.at), + data, + project_id: opts.projectId ?? BENCH_PROJECT_ID, + branch_id: opts.branchId, + user_id: opts.userId, + team_id: opts.teamId ?? null, + }; +} + +function buildMatrix(): Case[] { + const otherProjectId = `${BENCH_PROJECT_ID}-other`; + return [ + { + name: "empty", + branchId: "m-empty", + includeAnonymous: false, + expected: 0, + buildEvents: () => [], + }, + { + name: "one user one event", + branchId: "m-one", + includeAnonymous: false, + expected: 1, + buildEvents: (s) => [ + mkEvent({ branchId: "m-one", at: new Date(s.getTime() + ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + ], + }, + { + name: "one user many events (dedup)", + branchId: "m-dedup", + includeAnonymous: false, + expected: 1, + buildEvents: (s) => { + const u = mkUuid(); + const rows: EventRow[] = []; + for (let i = 0; i < 50; i++) { + rows.push(mkEvent({ branchId: "m-dedup", at: new Date(s.getTime() + (i + 1) * 60_000), userId: u, isAnonymous: false })); + } + return rows; + }, + }, + { + name: "mixed anon + non-anon, include_anonymous=false", + branchId: "m-mix-false", + includeAnonymous: false, + expected: 3, + buildEvents: (s) => [ + mkEvent({ branchId: "m-mix-false", at: new Date(s.getTime() + ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + mkEvent({ branchId: "m-mix-false", at: new Date(s.getTime() + 2 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + mkEvent({ branchId: "m-mix-false", at: new Date(s.getTime() + 3 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + mkEvent({ branchId: "m-mix-false", at: new Date(s.getTime() + 4 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: true }), + mkEvent({ branchId: "m-mix-false", at: new Date(s.getTime() + 5 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: true }), + ], + }, + { + name: "mixed anon + non-anon, include_anonymous=true", + branchId: "m-mix-true", + includeAnonymous: true, + expected: 5, + buildEvents: (s) => [ + mkEvent({ branchId: "m-mix-true", at: new Date(s.getTime() + ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + mkEvent({ branchId: "m-mix-true", at: new Date(s.getTime() + 2 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + mkEvent({ branchId: "m-mix-true", at: new Date(s.getTime() + 3 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + mkEvent({ branchId: "m-mix-true", at: new Date(s.getTime() + 4 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: true }), + mkEvent({ branchId: "m-mix-true", at: new Date(s.getTime() + 5 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: true }), + ], + }, + { + name: "window boundary (before since / after until)", + branchId: "m-boundary", + includeAnonymous: false, + expected: 1, + buildEvents: (s, e) => [ + // just before since — should be excluded + mkEvent({ branchId: "m-boundary", at: new Date(s.getTime() - 1000), userId: mkUuid(), isAnonymous: false }), + // inside window — counted + mkEvent({ branchId: "m-boundary", at: new Date(s.getTime() + ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + // at untilExclusive — excluded (half-open interval) + mkEvent({ branchId: "m-boundary", at: new Date(e.getTime()), userId: mkUuid(), isAnonymous: false }), + ], + }, + { + name: "null user_id", + branchId: "m-null-uid", + includeAnonymous: false, + expected: 1, + buildEvents: (s) => [ + mkEvent({ branchId: "m-null-uid", at: new Date(s.getTime() + ONE_DAY_MS), userId: null, isAnonymous: false }), + mkEvent({ branchId: "m-null-uid", at: new Date(s.getTime() + 2 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + ], + }, + { + name: "non-UUID user_id (filtered)", + branchId: "m-non-uuid", + includeAnonymous: false, + expected: 1, + buildEvents: (s) => [ + mkEvent({ branchId: "m-non-uuid", at: new Date(s.getTime() + ONE_DAY_MS), userId: "not-a-uuid", isAnonymous: false }), + mkEvent({ branchId: "m-non-uuid", at: new Date(s.getTime() + 2 * ONE_DAY_MS), userId: "12345678-1234-1234-1234-123456789012", isAnonymous: false }), // v1 UUID shape, fails v4 regex + mkEvent({ branchId: "m-non-uuid", at: new Date(s.getTime() + 3 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + ], + }, + { + name: "case variation on user_id (dedup)", + branchId: "m-case", + includeAnonymous: false, + expected: 1, + buildEvents: (s) => { + const u = mkUuid(); + return [ + mkEvent({ branchId: "m-case", at: new Date(s.getTime() + ONE_DAY_MS), userId: u, isAnonymous: false }), + mkEvent({ branchId: "m-case", at: new Date(s.getTime() + 2 * ONE_DAY_MS), userId: u.toUpperCase(), isAnonymous: false }), + mkEvent({ branchId: "m-case", at: new Date(s.getTime() + 3 * ONE_DAY_MS), userId: ` ${u} `, isAnonymous: false }), + ]; + }, + }, + { + name: "project isolation", + branchId: "m-iso", + includeAnonymous: false, + expected: 1, + buildEvents: (s) => [ + mkEvent({ branchId: "m-iso", at: new Date(s.getTime() + ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + mkEvent({ branchId: "m-iso", at: new Date(s.getTime() + 2 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: false, projectId: otherProjectId }), + ], + }, + { + name: "missing is_anonymous field (treated as non-anon)", + branchId: "m-missing", + includeAnonymous: false, + expected: 2, + buildEvents: (s) => [ + mkEvent({ branchId: "m-missing", at: new Date(s.getTime() + ONE_DAY_MS), userId: mkUuid() /* no is_anonymous */ }), + mkEvent({ branchId: "m-missing", at: new Date(s.getTime() + 2 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + ], + }, + { + name: "null is_anonymous (treated as non-anon)", + branchId: "m-null-anon", + includeAnonymous: false, + expected: 1, + buildEvents: (s) => [ + mkEvent({ branchId: "m-null-anon", at: new Date(s.getTime() + ONE_DAY_MS), userId: mkUuid(), isAnonymous: null }), + ], + }, + { + name: "wrong event_type ignored", + branchId: "m-wrong-type", + includeAnonymous: false, + expected: 1, + buildEvents: (s) => [ + mkEvent({ branchId: "m-wrong-type", at: new Date(s.getTime() + ONE_DAY_MS), userId: mkUuid(), isAnonymous: false, eventType: "$page-view" }), + mkEvent({ branchId: "m-wrong-type", at: new Date(s.getTime() + 2 * ONE_DAY_MS), userId: mkUuid(), isAnonymous: false }), + ], + }, + ]; +} + +async function runMatrix(now: Date): Promise { + const untilExclusive = new Date(Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate()) + ONE_DAY_MS); + const since = new Date(untilExclusive.getTime() - METRICS_WINDOW_MS - ONE_DAY_MS); + + const cases = buildMatrix(); + // Seed everything for matrix in one go (lots of small branches). + const all: EventRow[] = cases.flatMap((c) => c.buildEvents(since, untilExclusive)); + if (all.length) await seed(all); + + let allPass = true; + console.log("\n── Equivalence matrix (set equality, not just count) ──"); + for (const c of cases) { + const params: QueryParams = { + projectId: BENCH_PROJECT_ID, + branchId: c.branchId, + since, + untilExclusive, + includeAnonymous: c.includeAnonymous, + }; + const [oldRes, newRes, newSet] = await Promise.all([ + runOld(params), + runNew(params), + runNewSet(params), + ]); + const countMatch = oldRes.count === newRes.count && oldRes.count === c.expected; + const { onlyInA, onlyInB } = setDiff(oldRes.set, newSet); + const setMatch = onlyInA.length === 0 && onlyInB.length === 0; + const match = countMatch && setMatch; + const tag = match ? "OK" : "FAIL"; + console.log( + ` [${tag}] ${c.name.padEnd(48)} expected=${c.expected} old_count=${oldRes.count} new_count=${newRes.count} set_match=${setMatch}`, + ); + if (!setMatch) { + if (onlyInA.length > 0) console.log(` only in OLD: ${onlyInA.slice(0, 3).join(", ")}${onlyInA.length > 3 ? ` …(+${onlyInA.length - 3})` : ""}`); + if (onlyInB.length > 0) console.log(` only in NEW: ${onlyInB.slice(0, 3).join(", ")}${onlyInB.length > 3 ? ` …(+${onlyInB.length - 3})` : ""}`); + } + if (!match) allPass = false; + } + return allPass; +} + +// ── Heavy perf seed ────────────────────────────────────────────────────────── + +const COUNTRY_CODES = ["US", "DE", "FR", "GB", "JP", "IN", "BR", "CA", "AU", "ES"]; +const REFERRERS = ["https://google.com/", "https://twitter.com/", "https://news.ycombinator.com/", "", "https://github.com/", "https://reddit.com/"]; + +function pick(arr: T[]): T { + return arr[Math.floor(Math.random() * arr.length)]; +} + +async function seedPerf(now: Date): Promise { + const users = envInt("BENCH_USERS", 200_000); + const perUser = envInt("BENCH_EVENTS_USER", 5); + const pvPerUser = envInt("BENCH_PAGE_VIEWS_USER", 3); + const clicksPerUser = envInt("BENCH_CLICKS_USER", 1); + const teamRatio = envFloat("BENCH_TEAM_RATIO", 0.3); + const teamCount = Math.max(1, Math.floor(users * 0.05)); // ~5% as many teams as users + const anonRatio = envFloat("BENCH_ANON_RATIO", 0.1); + const tokenEvents = users * perUser; + const pvEvents = users * pvPerUser; + const clickEvents = users * clicksPerUser; + const total = tokenEvents + pvEvents + clickEvents; + console.log( + `\n── Seeding perf data: ${users.toLocaleString()} users ` + + `× (${perUser} $token-refresh + ${pvPerUser} $page-view + ${clicksPerUser} $click) ` + + `+ ${teamCount.toLocaleString()} teams = ${total.toLocaleString()} rows ──`, + ); + + const batchRows = envInt("BENCH_BATCH", 50_000); + const windowEnd = new Date(Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate()) + ONE_DAY_MS); + const windowStart = new Date(windowEnd.getTime() - METRICS_WINDOW_MS); + const spanMs = windowEnd.getTime() - windowStart.getTime(); + const teamIds: string[] = Array.from({ length: teamCount }, () => mkUuid()); + + const t0 = Date.now(); + let buf: EventRow[] = []; + const flushIfNeeded = async () => { + if (buf.length >= batchRows) { + await seed(buf, batchRows); + buf = []; + } + }; + for (let u = 0; u < users; u++) { + const uid = mkUuid(); + const isAnon = Math.random() < anonRatio; + const country = pick(COUNTRY_CODES); + const region = country + "-" + Math.floor(Math.random() * 50).toString(36); + const teamId = Math.random() < teamRatio ? pick(teamIds) : null; + // $token-refresh events (realistic ip_info payload) + for (let e = 0; e < perUser; e++) { + const at = new Date(windowStart.getTime() + Math.floor(Math.random() * spanMs)); + buf.push({ + event_type: "$token-refresh", + event_at: formatCh(at), + data: { + refresh_token_id: mkUuid(), + is_anonymous: isAnon, + ip_info: { + ip: `${Math.floor(Math.random() * 255)}.${Math.floor(Math.random() * 255)}.${Math.floor(Math.random() * 255)}.${Math.floor(Math.random() * 255)}`, + is_trusted: true, + country_code: country, + region_code: region, + city_name: `City-${Math.floor(Math.random() * 1000)}`, + latitude: Math.random() * 180 - 90, + longitude: Math.random() * 360 - 180, + tz_identifier: "UTC", + }, + }, + project_id: BENCH_PROJECT_ID, + branch_id: PERF_BRANCH_ID, + user_id: uid, + team_id: teamId, + }); + await flushIfNeeded(); + } + // $page-view events + for (let e = 0; e < pvPerUser; e++) { + const at = new Date(windowStart.getTime() + Math.floor(Math.random() * spanMs)); + buf.push(mkEvent({ + branchId: PERF_BRANCH_ID, + at, + userId: uid, + isAnonymous: isAnon, + eventType: "$page-view", + extraData: { referrer: pick(REFERRERS), url: `https://example.com/page-${Math.floor(Math.random() * 100)}` }, + })); + await flushIfNeeded(); + } + // $click events + for (let e = 0; e < clicksPerUser; e++) { + const at = new Date(windowStart.getTime() + Math.floor(Math.random() * spanMs)); + buf.push(mkEvent({ + branchId: PERF_BRANCH_ID, + at, + userId: uid, + isAnonymous: isAnon, + eventType: "$click", + extraData: { element: `btn-${Math.floor(Math.random() * 50)}` }, + })); + await flushIfNeeded(); + } + if ((u + 1) % 20_000 === 0) { + console.log(` seeded ${(u + 1).toLocaleString()} / ${users.toLocaleString()} users (${((Date.now() - t0) / 1000).toFixed(1)}s)`); + } + } + if (buf.length) await seed(buf, batchRows); + // Force parts to settle so first-query cost isn't dominated by merges. + const client = getClickhouseAdminClient(); + await client.command({ query: "OPTIMIZE TABLE analytics_internal.events FINAL", clickhouse_settings: { mutations_sync: "2" } }); + console.log(` done in ${((Date.now() - t0) / 1000).toFixed(1)}s`); +} + +function fmtBytes(n: number): string { + if (n < 1024) return `${n} B`; + if (n < 1024 ** 2) return `${(n / 1024).toFixed(1)} KiB`; + if (n < 1024 ** 3) return `${(n / 1024 ** 2).toFixed(1)} MiB`; + return `${(n / 1024 ** 3).toFixed(2)} GiB`; +} + +async function runPerf(now: Date): Promise { + const untilExclusive = new Date(Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate()) + ONE_DAY_MS); + const since = new Date(untilExclusive.getTime() - METRICS_WINDOW_MS); + const params: QueryParams = { + projectId: BENCH_PROJECT_ID, + branchId: PERF_BRANCH_ID, + since, + untilExclusive, + includeAnonymous: false, + }; + + console.log("\n── Perf run (include_anonymous=false) ──"); + // Warm up caches so variants compete on equal footing. + const warmup = await runVariant(VARIANTS[0], params); + void warmup; + + const oldRes = await runOld(params); + const oldStats = await readStats(oldRes.queryId); + + // Set-equality baseline: the "ground truth" set of users the OLD pipeline counts. + const truthSet = oldRes.set; + + type Row = { + name: string, + description: string, + approximate: boolean, + count: number, + stats: QueryStats, + setMatch: boolean | null, // null if approximate (skipped) + errorPct: number | null, + }; + + const rows: Row[] = [ + { + name: "v0_old (baseline)", + description: "current query: GROUP BY user_id + JS normalize", + approximate: false, + count: oldRes.count, + stats: oldStats, + setMatch: true, + errorPct: 0, + }, + ]; + + for (const v of VARIANTS) { + const { count, queryId } = await runVariant(v, params); + const stats = await readStats(queryId); + const errorPct = oldRes.count > 0 ? ((count - oldRes.count) / oldRes.count) * 100 : 0; + let setMatch: boolean | null = null; + if (!v.approximate) { + // Exact variant: verify it sees the *same users* as the old pipeline, not + // just the same count. Skip for approximate variants since they don't + // return a recoverable set. + const resSet = await runNewSet(params); + const { onlyInA, onlyInB } = setDiff(truthSet, resSet); + setMatch = onlyInA.length === 0 && onlyInB.length === 0; + } + rows.push({ name: v.name, description: v.description, approximate: v.approximate ?? false, count, stats, setMatch, errorPct }); + } + + // Table output, ranked by peak memory. + const baselineMem = oldStats.memory_usage; + const baselineDur = oldStats.query_duration_ms; + const padR = (s: string, n: number) => s.padStart(n); + const padL = (s: string, n: number) => s.padEnd(n); + console.log(`\n Ground truth (v0_old): MAU=${oldRes.count}`); + console.log(" " + [ + padL("variant", 24), + padR("memory", 12), + padR("vs base", 8), + padR("duration", 10), + padR("vs base", 8), + padR("read", 12), + padR("result", 10), + padR("count", 9), + padR("err%", 7), + padL("set=", 6), + ].join(" ")); + console.log(" " + "─".repeat(120)); + for (const r of rows) { + const memRatio = baselineMem / Math.max(r.stats.memory_usage, 1); + const durRatio = baselineDur / Math.max(r.stats.query_duration_ms, 1); + const setMatch = r.setMatch == null ? "—" : r.setMatch ? "yes" : "NO"; + console.log(" " + [ + padL(r.name + (r.approximate ? " ~" : ""), 24), + padR(fmtBytes(r.stats.memory_usage), 12), + padR(memRatio >= 1 ? `${memRatio.toFixed(2)}×` : `${memRatio.toFixed(2)}×`, 8), + padR(`${r.stats.query_duration_ms} ms`, 10), + padR(durRatio >= 1 ? `${durRatio.toFixed(2)}×` : `${durRatio.toFixed(2)}×`, 8), + padR(fmtBytes(r.stats.read_bytes), 12), + padR(fmtBytes(r.stats.result_bytes), 10), + padR(r.count.toLocaleString(), 9), + padR(r.errorPct == null ? "—" : `${r.errorPct >= 0 ? "+" : ""}${r.errorPct.toFixed(3)}%`, 7), + padL(setMatch, 6), + ].join(" ")); + } + console.log("\n Legend: ~ = approximate variant. set=yes means the variant counts the same individual users as the OLD pipeline."); +} + +async function main(): Promise { + console.log(`Benchmark run_id=${RUN_ID}`); + console.log(`project_id=${BENCH_PROJECT_ID}`); + + const now = new Date(); + let matrixOk = true; + + try { + if (!envBool("BENCH_SKIP_MATRIX")) { + matrixOk = await runMatrix(now); + if (!matrixOk) { + console.error("\nEquivalence matrix failed — skipping perf run."); + } + } else { + console.log("Skipping equivalence matrix (BENCH_SKIP_MATRIX=1)"); + } + + const doPerf = matrixOk && !envBool("BENCH_SKIP_PERF"); + const doRouteQueries = matrixOk && envBool("BENCH_ROUTE_QUERIES"); + if (doPerf || doRouteQueries) { + await seedPerf(now); + if (doPerf) await runPerf(now); + if (doRouteQueries) await benchmarkRouteQueries(now); + } else if (envBool("BENCH_SKIP_PERF")) { + console.log("Skipping perf run (BENCH_SKIP_PERF=1)"); + } + } finally { + console.log("\nCleaning up seeded rows…"); + try { + await cleanup(); + console.log(" done."); + } catch (e) { + console.error(" cleanup failed:", e); + } + } + + if (!matrixOk) process.exit(1); +} + +try { + await main(); +} catch (e) { + console.error(e); + process.exit(1); +} diff --git a/apps/backend/src/app/api/latest/internal/metrics/route.tsx b/apps/backend/src/app/api/latest/internal/metrics/route.tsx index 2c1162cfa..26398cac0 100644 --- a/apps/backend/src/app/api/latest/internal/metrics/route.tsx +++ b/apps/backend/src/app/api/latest/internal/metrics/route.tsx @@ -2,7 +2,7 @@ import { Prisma } from "@/generated/prisma/client"; import { EmailOutboxSimpleStatus } from "@/generated/prisma/enums"; import { getClickhouseAdminClient } from "@/lib/clickhouse"; import { ClickHouseError } from "@clickhouse/client"; -import { ActivitySplit, buildSplitFromDailyEntitySets } from "@/lib/metrics-activity-split"; +import { ActivitySplit } from "@/lib/metrics-activity-split"; import { Tenancy } from "@/lib/tenancies"; import { getPrismaClientForTenancy, getPrismaSchemaForTenancy, sqlQuoteIdent } from "@/prisma-client"; import { createSmartRouteHandler } from "@/route-handlers/smart-route-handler"; @@ -19,7 +19,6 @@ import { MetricsRecentUserSchema, } from "@stackframe/stack-shared/dist/interface/admin-metrics"; import { captureError, StackAssertionError } from "@stackframe/stack-shared/dist/utils/errors"; -import { isUuid } from "@stackframe/stack-shared/dist/utils/uuids"; import { adaptSchema, adminAuthTypeSchema, yupArray, yupMixed, yupNumber, yupObject, yupRecord, yupString } from "@stackframe/stack-shared/dist/schema-fields"; import { userFullInclude, userPrismaToCrud, usersCrudHandlers } from "../../users/crud"; @@ -58,11 +57,6 @@ function formatClickhouseDateTimeParam(date: Date): string { return date.toISOString().slice(0, 19); } -function normalizeUuidFromEvent(value: string): string | null { - const normalized = value.trim().toLowerCase(); - return isUuid(normalized) ? normalized : null; -} - async function loadUsersByCountry(tenancy: Tenancy, includeAnonymous: boolean = false): Promise> { const clickhouseClient = getClickhouseAdminClient(); const res = await clickhouseClient.query({ @@ -163,7 +157,7 @@ async function loadDailyActiveUsers(tenancy: Tenancy, now: Date, includeAnonymou AND user_id IS NOT NULL AND event_at >= {since:DateTime} AND event_at < {untilExclusive:DateTime} - AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) GROUP BY day ORDER BY day ASC `, @@ -197,29 +191,71 @@ async function loadDailyActiveUsers(tenancy: Tenancy, now: Date, includeAnonymou return out; } -async function loadDailyActiveUsersSplit(tenancy: Tenancy, now: Date, includeAnonymous: boolean): Promise { +async function loadDailyActiveSplitFromClickhouse(options: { + tenancy: Tenancy, + now: Date, + entity: "user" | "team", + includeAnonymous: boolean, +}): Promise { + const { tenancy, now, entity, includeAnonymous } = options; const todayUtc = new Date(now); todayUtc.setUTCHours(0, 0, 0, 0); const since = new Date(todayUtc.getTime() - METRICS_WINDOW_MS); const untilExclusive = new Date(todayUtc.getTime() + ONE_DAY_MS); - const clickhouseClient = getClickhouseAdminClient(); - const schema = await getPrismaSchemaForTenancy(tenancy); - const prisma = await getPrismaClientForTenancy(tenancy); - const userRows = await clickhouseClient.query({ + const idCol = entity === "user" ? "user_id" : "team_id"; + // Teams don't have an is_anonymous concept, so that filter is users-only. + const anonFilter = entity === "user" + ? "AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0)" + : ""; + + const clickhouseClient = getClickhouseAdminClient(); + // Note: the inner `assumeNotNull(${idCol}) AS entity_id` must not reuse the + // column name, or ClickHouse re-resolves `WHERE ${idCol} IS NOT NULL` + // against the alias (assumeNotNull returns '' for NULLs, which passes the + // not-null test) and phantom rows slip through. + const result = await clickhouseClient.query({ query: ` SELECT - toDate(event_at) AS day, - assumeNotNull(user_id) AS user_id - FROM analytics_internal.events - WHERE event_type = '$token-refresh' - AND project_id = {projectId:String} - AND branch_id = {branchId:String} - AND user_id IS NOT NULL - AND event_at >= {since:DateTime} - AND event_at < {untilExclusive:DateTime} - AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) - GROUP BY day, user_id + toString(w.day) AS day, + count() AS total_count, + countIf(f.first_date = w.day) AS new_count, + countIf(f.first_date < w.day AND w.prev_day = addDays(w.day, -1)) AS retained_count, + countIf(f.first_date < w.day AND (isNull(w.prev_day) OR w.prev_day < addDays(w.day, -1))) AS reactivated_count + FROM ( + SELECT + day, + entity_id, + lagInFrame(day, 1) OVER (PARTITION BY entity_id ORDER BY day) AS prev_day + FROM ( + SELECT DISTINCT + toDate(event_at) AS day, + assumeNotNull(${idCol}) AS entity_id + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND ${idCol} IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + ${anonFilter} + ) + ) AS w + LEFT JOIN ( + SELECT + assumeNotNull(${idCol}) AS entity_id, + toDate(min(event_at)) AS first_date + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND ${idCol} IS NOT NULL + AND event_at < {untilExclusive:DateTime} + ${anonFilter} + GROUP BY entity_id + ) AS f USING (entity_id) + GROUP BY w.day + ORDER BY w.day ASC `, query_params: { projectId: tenancy.project.id, @@ -229,129 +265,35 @@ async function loadDailyActiveUsersSplit(tenancy: Tenancy, now: Date, includeAno includeAnonymous: includeAnonymous ? 1 : 0, }, format: "JSONEachRow", - }).then((result) => result.json() as Promise<{ day: string, user_id: string }[]>); - - const sanitizedUserRows = userRows.flatMap((row) => { - const userId = normalizeUuidFromEvent(row.user_id); - if (userId == null) { - return []; - } - return [{ ...row, user_id: userId }]; }); + const rows = (await result.json()) as { + day: string, + total_count: string, + new_count: string, + retained_count: string, + reactivated_count: string, + }[]; - const activeUserIds = [...new Set(sanitizedUserRows.map((row) => row.user_id))]; - const users: { projectUserId: string, signedUpAtOrCreatedAt: Date }[] = activeUserIds.length === 0 - ? [] - : await prisma.$replica().$queryRaw<{ projectUserId: string, signedUpAtOrCreatedAt: Date }[]>` - SELECT - "projectUserId"::text AS "projectUserId", - COALESCE("signedUpAt", "createdAt") AS "signedUpAtOrCreatedAt" - FROM ${sqlQuoteIdent(schema)}."ProjectUser" - WHERE "tenancyId" = ${tenancy.id}::UUID - AND "projectUserId" IN (${Prisma.join(activeUserIds.map((id) => Prisma.sql`${id}::UUID`))}) - ${includeAnonymous ? Prisma.empty : Prisma.sql`AND "isAnonymous" = false`} - `; - + const byDay = new Map(rows.map((r) => [r.day.split('T')[0], r])); const orderedDays: string[] = []; - const idsByDay = new Map>(); for (let i = 0; i <= METRICS_WINDOW_DAYS; i += 1) { - const date = new Date(since.getTime() + i * ONE_DAY_MS).toISOString().split('T')[0]; - orderedDays.push(date); - idsByDay.set(date, new Set()); - } - for (const row of sanitizedUserRows) { - const day = row.day.split('T')[0]; - const daySet = idsByDay.get(day); - if (daySet) { - daySet.add(row.user_id); - } + orderedDays.push(new Date(since.getTime() + i * ONE_DAY_MS).toISOString().split('T')[0]); } + const split: ActivitySplit = { + total: orderedDays.map((date) => ({ date, activity: Number(byDay.get(date)?.total_count ?? 0) })), + new: orderedDays.map((date) => ({ date, activity: Number(byDay.get(date)?.new_count ?? 0) })), + retained: orderedDays.map((date) => ({ date, activity: Number(byDay.get(date)?.retained_count ?? 0) })), + reactivated: orderedDays.map((date) => ({ date, activity: Number(byDay.get(date)?.reactivated_count ?? 0) })), + }; + return split; +} - const createdDayByUserId = new Map( - users.map((user) => [user.projectUserId, user.signedUpAtOrCreatedAt.toISOString().split('T')[0]]) - ); - - return buildSplitFromDailyEntitySets({ - orderedDays, - entityIdsByDay: idsByDay, - createdDayByEntityId: createdDayByUserId, - }); +async function loadDailyActiveUsersSplit(tenancy: Tenancy, now: Date, includeAnonymous: boolean): Promise { + return await loadDailyActiveSplitFromClickhouse({ tenancy, now, entity: "user", includeAnonymous }); } async function loadDailyActiveTeamsSplit(tenancy: Tenancy, now: Date): Promise { - const todayUtc = new Date(now); - todayUtc.setUTCHours(0, 0, 0, 0); - const since = new Date(todayUtc.getTime() - METRICS_WINDOW_MS); - const untilExclusive = new Date(todayUtc.getTime() + ONE_DAY_MS); - const clickhouseClient = getClickhouseAdminClient(); - const schema = await getPrismaSchemaForTenancy(tenancy); - const prisma = await getPrismaClientForTenancy(tenancy); - - const teamRows = await clickhouseClient.query({ - query: ` - SELECT - toDate(event_at) AS day, - assumeNotNull(team_id) AS team_id - FROM analytics_internal.events - WHERE event_type = '$token-refresh' - AND project_id = {projectId:String} - AND branch_id = {branchId:String} - AND team_id IS NOT NULL - AND event_at >= {since:DateTime} - AND event_at < {untilExclusive:DateTime} - GROUP BY day, team_id - `, - query_params: { - projectId: tenancy.project.id, - branchId: tenancy.branchId, - since: formatClickhouseDateTimeParam(since), - untilExclusive: formatClickhouseDateTimeParam(untilExclusive), - }, - format: "JSONEachRow", - }).then((result) => result.json() as Promise<{ day: string, team_id: string }[]>); - - const sanitizedTeamRows = teamRows.flatMap((row) => { - const teamId = normalizeUuidFromEvent(row.team_id); - if (teamId == null) { - return []; - } - return [{ ...row, team_id: teamId }]; - }); - - const activeTeamIds = [...new Set(sanitizedTeamRows.map((row) => row.team_id))]; - const teams: { teamId: string, createdAt: Date }[] = activeTeamIds.length === 0 - ? [] - : await prisma.$replica().$queryRaw<{ teamId: string, createdAt: Date }[]>` - SELECT "teamId"::text AS "teamId", "createdAt" - FROM ${sqlQuoteIdent(schema)}."Team" - WHERE "tenancyId" = ${tenancy.id}::UUID - AND "teamId" IN (${Prisma.join(activeTeamIds.map((id) => Prisma.sql`${id}::UUID`))}) - `; - - const orderedDays: string[] = []; - const idsByDay = new Map>(); - for (let i = 0; i <= METRICS_WINDOW_DAYS; i += 1) { - const date = new Date(since.getTime() + i * ONE_DAY_MS).toISOString().split('T')[0]; - orderedDays.push(date); - idsByDay.set(date, new Set()); - } - for (const row of sanitizedTeamRows) { - const day = row.day.split('T')[0]; - const daySet = idsByDay.get(day); - if (daySet) { - daySet.add(row.team_id); - } - } - - const createdDayByTeamId = new Map( - teams.map((team) => [team.teamId, team.createdAt.toISOString().split('T')[0]]) - ); - - return buildSplitFromDailyEntitySets({ - orderedDays, - entityIdsByDay: idsByDay, - createdDayByEntityId: createdDayByTeamId, - }); + return await loadDailyActiveSplitFromClickhouse({ tenancy, now, entity: "team", includeAnonymous: false }); } async function loadLoginMethods(tenancy: Tenancy): Promise<{ method: string, count: number }[]> { @@ -397,6 +339,9 @@ async function loadRecentlyActiveUsers(tenancy: Tenancy, includeAnonymous: boole return dbUsers.map((user) => userPrismaToCrud(user, tenancy.config)); } +// UUID v4 regex identical to isUuid() in stack-shared, ported to ClickHouse re2 syntax. +const MAU_UUID_V4_REGEX = "^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$"; + async function loadMonthlyActiveUsers(tenancy: Tenancy, now: Date, includeAnonymous: boolean = false): Promise { const { since, untilExclusive } = getMetricsWindowBounds(now); @@ -404,17 +349,19 @@ async function loadMonthlyActiveUsers(tenancy: Tenancy, now: Date, includeAnonym try { const result = await clickhouseClient.query({ query: ` - SELECT - assumeNotNull(user_id) AS user_id - FROM analytics_internal.events - WHERE event_type = '$token-refresh' - AND project_id = {projectId:String} - AND branch_id = {branchId:String} - AND user_id IS NOT NULL - AND event_at >= {since:DateTime} - AND event_at < {untilExclusive:DateTime} - AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) - GROUP BY user_id + SELECT uniqExact(sipHash64(normalized_user_id)) AS mau + FROM ( + SELECT lower(trim(assumeNotNull(user_id))) AS normalized_user_id + FROM analytics_internal.events + WHERE event_type = '$token-refresh' + AND project_id = {projectId:String} + AND branch_id = {branchId:String} + AND user_id IS NOT NULL + AND event_at >= {since:DateTime} + AND event_at < {untilExclusive:DateTime} + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) + ) + WHERE match(normalized_user_id, {uuidRe:String}) `, query_params: { projectId: tenancy.project.id, @@ -422,18 +369,12 @@ async function loadMonthlyActiveUsers(tenancy: Tenancy, now: Date, includeAnonym since: formatClickhouseDateTimeParam(since), untilExclusive: formatClickhouseDateTimeParam(untilExclusive), includeAnonymous: includeAnonymous ? 1 : 0, + uuidRe: MAU_UUID_V4_REGEX, }, format: "JSONEachRow", }); - const rows: { user_id: string }[] = await result.json(); - const uniqueUserIds = new Set(); - for (const row of rows) { - const normalizedUserId = normalizeUuidFromEvent(row.user_id); - if (normalizedUserId != null) { - uniqueUserIds.add(normalizedUserId); - } - } - return uniqueUserIds.size; + const rows: { mau: string | number }[] = await result.json(); + return Number(rows[0]?.mau ?? 0); } catch (error) { // Only swallow real ClickHouse errors (e.g. project hasn't enabled // analytics yet, transient query failure). Anything else is a programming @@ -835,7 +776,7 @@ async function loadAnalyticsOverview(tenancy: Tenancy, now: Date, includeAnonymo LEFT JOIN ( SELECT user_id, - argMax(JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8'), event_at) AS latest_is_anonymous + argMax(coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0), event_at) AS latest_is_anonymous FROM analytics_internal.events WHERE event_type = '$token-refresh' AND project_id = {projectId:String} @@ -846,7 +787,7 @@ async function loadAnalyticsOverview(tenancy: Tenancy, now: Date, includeAnonymo ) AS token_refresh_users ON e.user_id = token_refresh_users.user_id `; - const nonAnonymousAnalyticsUserFilter = "({includeAnonymous:UInt8} = 1 OR coalesce(JSONExtract(toJSONString(e.data), 'is_anonymous', 'Nullable(UInt8)'), token_refresh_users.latest_is_anonymous, 0) = 0)"; + const nonAnonymousAnalyticsUserFilter = "({includeAnonymous:UInt8} = 1 OR coalesce(CAST(e.data.is_anonymous, 'Nullable(UInt8)'), token_refresh_users.latest_is_anonymous, 0) = 0)"; const [dailyEventResult, totalVisitorResult, referrerResult, topRegionResult, onlineResult] = await Promise.all([ // Combined daily aggregates: page-view count, click count, and unique // visitors per day — one scan over the page-view/click event types. @@ -953,7 +894,7 @@ async function loadAnalyticsOverview(tenancy: Tenancy, now: Date, includeAnonymo uniqExactIf( assumeNotNull(user_id), user_id IS NOT NULL - AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) ) AS visitors FROM analytics_internal.events WHERE event_type = '$token-refresh' @@ -987,7 +928,7 @@ async function loadAnalyticsOverview(tenancy: Tenancy, now: Date, includeAnonymo AND user_id IS NOT NULL AND event_at >= {onlineSince:DateTime} AND event_at < {untilExclusive:DateTime} - AND ({includeAnonymous:UInt8} = 1 OR JSONExtract(toJSONString(data), 'is_anonymous', 'UInt8') = 0) + AND ({includeAnonymous:UInt8} = 1 OR coalesce(CAST(data.is_anonymous, 'Nullable(UInt8)'), 0) = 0) `, query_params: { onlineSince: formatClickhouseDateTimeParam(new Date(now.getTime() - 5 * 60 * 1000)), diff --git a/apps/e2e/tests/backend/endpoints/api/v1/__snapshots__/internal-metrics.test.ts.snap b/apps/e2e/tests/backend/endpoints/api/v1/__snapshots__/internal-metrics.test.ts.snap index ea78adab3..69a08bef4 100644 --- a/apps/e2e/tests/backend/endpoints/api/v1/__snapshots__/internal-metrics.test.ts.snap +++ b/apps/e2e/tests/backend/endpoints/api/v1/__snapshots__/internal-metrics.test.ts.snap @@ -3481,7 +3481,7 @@ NiceResponse { "date": , }, { - "activity": 9, + "activity": 10, "date": , }, ], @@ -3607,7 +3607,7 @@ NiceResponse { "date": , }, { - "activity": 1, + "activity": 0, "date": , }, ], From 6bc1836e6665a1d0bd6c9f1d0c796e2ff64a4c15 Mon Sep 17 00:00:00 2001 From: BilalG1 Date: Sun, 19 Apr 2026 22:58:07 -0700 Subject: [PATCH 3/3] fix(dashboard): resolve UI issues across email-* pages (#1345) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Six UI issues found across the email-* dashboard pages, ranked by impact, fixed here: 1. **email-sent layout** — the email log table and domain reputation card were forced side-by-side at all widths. A fixed-width sidebar plus a flex-1 table meant that on tablet the table got crushed, and on mobile the row overflowed horizontally. Fix: stack vertically below `lg`, and let the reputation card span full width on narrow viewports. 2. **Domain status enum leaks to the UI** — `Status: {domain.status}` rendered raw values like `pending_dns` / `pending_verification`. Added a `MANAGED_DOMAIN_STATUS_LABELS` map and route through it before rendering. 3. **email-themes dialog grid cramped on mobile** — the Change Theme dialog hardcoded `grid-cols-2`, so at 375px each theme card had ~150px and the preview images were illegible. Changed to `grid-cols-1 sm:grid-cols-2`. 4. **Template name row overflow** — long template names pushed the Edit Template button off the right edge of the card because the flex row had no `min-w-0` / `truncate`. Fixed both, and made the action column `shrink-0`. 5. **Boosted-capacity label was color-only** — during an active boost the label used a red strikethrough for the base value and a blue number for the boosted value with no non-color cue. Added an explicit `→` arrow between the two numbers, `title` tooltips on each, and a visible \"(boosted)\" marker after `/h max`. 6. **Draft progress bar overflowed at mobile width** — the 4-step progress bar used fixed 80px connectors, giving a minimum width of ~400px that clipped off both ends at 375px. Changed connectors to `w-8 sm:w-20` (32px on mobile, 80px otherwise) so all four steps and their labels fit below 640px. ## Before / after Each GIF below loops \"before\" (1s) → \"after\" (1s) with a red pill in the top-right indicating which frame is which. Full-size stills (before + after + extra viewports) are listed under **All screenshots** at the bottom. ### 1. email-sent — two-column layout collapses on narrow viewports Mobile (375px): ![email-sent mobile](https://gist.githubusercontent.com/BilalG1/edb04740a19c3f2d048da6e602209d45/raw/gif-01-email-sent-mobile.gif) Tablet (900px): ![email-sent tablet](https://gist.githubusercontent.com/BilalG1/edb04740a19c3f2d048da6e602209d45/raw/gif-01-email-sent-tablet.gif) ### 2. email-settings — managed-domain status label ![domain status](https://gist.githubusercontent.com/BilalG1/edb04740a19c3f2d048da6e602209d45/raw/gif-02-domain-status.gif) ### 3. email-themes — Change Theme dialog on mobile ![themes mobile](https://gist.githubusercontent.com/BilalG1/edb04740a19c3f2d048da6e602209d45/raw/gif-03-themes-mobile.gif) ### 4. email-templates — long name overflow ![templates overflow](https://gist.githubusercontent.com/BilalG1/edb04740a19c3f2d048da6e602209d45/raw/gif-04-templates-overflow.gif) ### 5. email-sent — boosted capacity label ![capacity label](https://gist.githubusercontent.com/BilalG1/edb04740a19c3f2d048da6e602209d45/raw/gif-05-capacity-label.gif) ### 7. email-drafts — draft progress bar on mobile ![draft progress bar](https://gist.githubusercontent.com/BilalG1/edb04740a19c3f2d048da6e602209d45/raw/gif-07-draft-progress-mobile.gif) ## Test plan - [x] \`pnpm --filter @stackframe/dashboard lint\` — clean - [x] \`pnpm --filter @stackframe/dashboard typecheck\` — clean - [x] Manual verification in a browser at 375px / 900px / 1440px, light + dark mode, for each fixed page - [ ] Reviewer sanity check of the remaining email-* pages (email-outbox, email-viewer) for similar responsive regressions ## Notes - The initial review flagged a \"white-on-white capacity boost timer\" — on closer look the label sits on a deliberately dark `bg-zinc-900/0.82` overlay inside the boost card, so it reads fine in light and dark mode. Not fixing; that part of the review was a false positive. - The initial review also flagged a missing empty state on email-templates. Because Stack seeds built-in templates, the empty branch is unreachable in practice — skipping that fix to avoid dead code. ## All screenshots Gist with all the individual before/after PNGs and the GIFs themselves: https://gist.github.com/BilalG1/edb04740a19c3f2d048da6e602209d45 ## Summary by CodeRabbit ## Release Notes * **New Features** * Added human-readable status labels for managed domains in domain settings * **Improvements** * Enhanced responsive layouts across dashboard pages for improved mobile experience * Improved email capacity display with visual indicators and tooltips for boost status * Refined template and theme selection layouts with better text handling and spacing --- .../[draftId]/draft-progress-bar.tsx | 4 ++-- .../email-sent/domain-reputation-card.tsx | 20 ++++++++++++++----- .../[projectId]/email-sent/page-client.tsx | 2 +- .../email-settings/domain-settings.tsx | 10 +++++++++- .../email-templates/page-client.tsx | 10 +++++----- .../[projectId]/email-themes/page-client.tsx | 2 +- 6 files changed, 33 insertions(+), 15 deletions(-) diff --git a/apps/dashboard/src/app/(main)/(protected)/projects/[projectId]/email-drafts/[draftId]/draft-progress-bar.tsx b/apps/dashboard/src/app/(main)/(protected)/projects/[projectId]/email-drafts/[draftId]/draft-progress-bar.tsx index 4dddf3ba0..f9b4d1656 100644 --- a/apps/dashboard/src/app/(main)/(protected)/projects/[projectId]/email-drafts/[draftId]/draft-progress-bar.tsx +++ b/apps/dashboard/src/app/(main)/(protected)/projects/[projectId]/email-drafts/[draftId]/draft-progress-bar.tsx @@ -55,7 +55,7 @@ export function DraftProgressBar({ steps, currentStep, onStepClick, disableNavig {!isLast && ( -
+
{step.label} - {!isLast &&
} + {!isLast &&
}
); })} diff --git a/apps/dashboard/src/app/(main)/(protected)/projects/[projectId]/email-sent/domain-reputation-card.tsx b/apps/dashboard/src/app/(main)/(protected)/projects/[projectId]/email-sent/domain-reputation-card.tsx index 08fd771a5..d43c6fa3d 100644 --- a/apps/dashboard/src/app/(main)/(protected)/projects/[projectId]/email-sent/domain-reputation-card.tsx +++ b/apps/dashboard/src/app/(main)/(protected)/projects/[projectId]/email-sent/domain-reputation-card.tsx @@ -247,10 +247,20 @@ export function DomainReputationCard() { const capacityLabel = isBoostActive ? ( {hourlyUsed} of{" "} - {Math.round(baseHourlyCapacity)} - {" "} - {Math.round(hourlyCapacity)} - /h max + + {Math.round(baseHourlyCapacity)} + + {" \u2192 "} + + {Math.round(hourlyCapacity)} + + /h max (boosted) ) : ( `${hourlyUsed} of ${Math.round(hourlyCapacity)}/h max` @@ -260,7 +270,7 @@ export function DomainReputationCard() {
diff --git a/apps/dashboard/src/app/(main)/(protected)/projects/[projectId]/email-sent/page-client.tsx b/apps/dashboard/src/app/(main)/(protected)/projects/[projectId]/email-sent/page-client.tsx index 153092b8e..9852d6bf3 100644 --- a/apps/dashboard/src/app/(main)/(protected)/projects/[projectId]/email-sent/page-client.tsx +++ b/apps/dashboard/src/app/(main)/(protected)/projects/[projectId]/email-sent/page-client.tsx @@ -134,7 +134,7 @@ export default function PageClient() { title="Sent" description="View email logs and domain reputation" > -
+
{/* Left side: Email Log with toggle inside card */}
= { standard: "Custom SMTP", }; +const MANAGED_DOMAIN_STATUS_LABELS: Record = { + pending_dns: "Pending DNS records", + pending_verification: "Pending verification", + verified: "Verified", + applied: "Applied", + failed: "Failed", +}; + const VISIBLE_FIELDS: Record = { shared: [], managed: [], @@ -317,7 +325,7 @@ function ManagedEmailSetupDialog(props: { trigger: React.ReactNode }) { {domain.senderLocalPart}@{domain.subdomain} - Status: {domain.status} + Status: {(MANAGED_DOMAIN_STATUS_LABELS as Record)[domain.status] ?? domain.status} -
-
-
+
+
+
- + {template.displayName}
-
+