From 0886586aa7e21d9e69da9431b26f4c824d1fe98f Mon Sep 17 00:00:00 2001 From: Mantra <87142457+mantrakp04@users.noreply.github.com> Date: Fri, 20 Mar 2026 22:26:28 -0700 Subject: [PATCH 1/3] risk score calculation debug logs (#1275) - Updated pnpm-lock.yaml to include 'rolldown' as a new optional dependency and upgraded 'minimatch' to version 10.2.4. - Added a debug log statement in risk-scores.tsx to indicate when the sign-up risk engine is disabled in the public build. ## Summary by CodeRabbit * **Bug Fixes** * Signup risk scoring now returns a neutral (zero) score when the scoring engine is unavailable; scoring errors are logged and surfaced consistently instead of being silently swallowed. * Invalid engine shapes now fail loudly rather than falling back silently. * **Chores** * Updated private engine reference and adjusted tests to reflect the new loading/resolution behavior. * Expanded Next.js output tracing to include private package files used by API routes. --- apps/backend/next.config.mjs | 4 + apps/backend/src/lib/risk-scores.tsx | 326 +++++++++++++-------------- packages/private | 2 +- 3 files changed, 164 insertions(+), 168 deletions(-) diff --git a/apps/backend/next.config.mjs b/apps/backend/next.config.mjs index d8b22a527..c2f48de7e 100644 --- a/apps/backend/next.config.mjs +++ b/apps/backend/next.config.mjs @@ -57,6 +57,10 @@ const nextConfig = { serverMinification: false, // needs to be disabled for oidc-provider to work, which relies on the original constructor names }, + outputFileTracingIncludes: { + "/api/**": ["../../packages/private/dist/**"], + }, + serverExternalPackages: [ 'oidc-provider', ], diff --git a/apps/backend/src/lib/risk-scores.tsx b/apps/backend/src/lib/risk-scores.tsx index f661f46f4..a04174ae5 100644 --- a/apps/backend/src/lib/risk-scores.tsx +++ b/apps/backend/src/lib/risk-scores.tsx @@ -2,10 +2,9 @@ import { getPrismaClientForTenancy, getPrismaSchemaForTenancy, sqlQuoteIdent } f import type { SignUpRiskScoresCrud } from "@stackframe/stack-shared/dist/interface/crud/users"; import type { SignUpAuthMethod } from "@stackframe/stack-shared/dist/utils/auth-methods"; import { captureError, StackAssertionError } from "@stackframe/stack-shared/dist/utils/errors"; -import { isIpAddress } from "@stackframe/stack-shared/dist/utils/ips"; +import fs from "node:fs"; import path from "node:path"; import { checkEmailWithEmailable } from "./emailable"; -import { normalizeEmail } from "./emails"; import { createNeutralSignUpHeuristicFacts, type DerivedSignUpHeuristicFacts } from "./sign-up-heuristics"; import type { Tenancy } from "./tenancies"; import type { SignUpTurnstileAssessment } from "./turnstile"; @@ -44,144 +43,95 @@ export type SignUpRiskRecentStats = { similarEmailCount: number, }; -export type SignUpRiskEngineDependencies = { - now: () => Date, - normalizeEmail: (email: string) => string, - isIpAddress: (ipAddress: string) => boolean, - createAssertionError: (message: string, details: Record) => Error, - checkPrimaryEmailRisk: (email: string) => Promise<{ emailableScore: number | null }>, - loadRecentSignUpStats: (request: SignUpRiskRecentStatsRequest) => Promise, -}; - -export type SignUpRiskEngine = { +type SignUpRiskEngine = { calculateRiskAssessment: ( context: SignUpRiskScoreContext, - dependencies: SignUpRiskEngineDependencies, + dependencies: { + checkPrimaryEmailRisk: (email: string) => Promise<{ emailableScore: number | null }>, + loadRecentSignUpStats: (request: SignUpRiskRecentStatsRequest) => Promise, + }, ) => Promise, }; -// ── Fallback engine (zero scores) ────────────────────────────────────── +// ── Private engine ───────────────────────────────────────────────────── const ZERO_SCORES: SignUpRiskScores = { bot: 0, free_trial_abuse: 0 }; -const fallbackEngine: SignUpRiskEngine = { - async calculateRiskAssessment(_context, deps) { - return { scores: ZERO_SCORES, heuristicFacts: createNeutralSignUpHeuristicFacts(deps.now()) }; +export const PRIVATE_ENGINE_PATH: string | null = (() => { + const cwd = process.cwd(); + for (const relative of ["packages/private/dist/index.js", "../../packages/private/dist/index.js"]) { + const resolved = path.resolve(cwd, relative); + if (fs.existsSync(resolved)) return resolved; + } + return null; +})(); + +function createZeroRiskAssessment(now: Date): SignUpRiskAssessment { + return { scores: ZERO_SCORES, heuristicFacts: createNeutralSignUpHeuristicFacts(now) }; +} + +const ZERO_SCORE_ENGINE: SignUpRiskEngine = { + async calculateRiskAssessment() { + return createZeroRiskAssessment(new Date()); }, }; +let cachedEnginePromise: Promise | null = null; -// ── Private engine loader ────────────────────────────────────────────── - -const PRIVATE_MODULE_PATH = "dist/sign-up-risk-engine.js"; -const PRIVATE_PACKAGE_IMPORT = "@stackframe/private/dist/sign-up-risk-engine.js"; - -const _testOverrides = { - rootPath: null as string | null, - importer: null as ((modulePath: string) => Promise) | null, -}; - -let cachedEngine: Promise | null = null; - -function isEngine(value: unknown): value is SignUpRiskEngine { - return typeof value === "object" && value !== null - && "calculateRiskAssessment" in value - && typeof (value as Record).calculateRiskAssessment === "function"; -} - -function extractEngine(mod: unknown): SignUpRiskEngine { - const nested = (obj: unknown, key: string): unknown => - typeof obj === "object" && obj !== null && key in obj ? (obj as Record)[key] : undefined; - - const defaultExport = nested(mod, "default"); - for (const candidate of [mod, nested(mod, "signUpRiskEngine"), defaultExport, nested(defaultExport, "signUpRiskEngine")]) { - if (isEngine(candidate)) return candidate; - } - - throw new StackAssertionError("Private sign-up risk module does not export a valid signUpRiskEngine"); -} - -function getFallbackPaths(): string[] { - if (_testOverrides.rootPath != null) { - return [path.join(_testOverrides.rootPath, PRIVATE_MODULE_PATH)]; - } - const cwd = process.cwd(); - return [ - path.join(cwd, "packages/private", PRIVATE_MODULE_PATH), // monorepo root - path.join(cwd, "../../packages/private", PRIVATE_MODULE_PATH), // workspace dir (e.g. apps/backend) - ]; -} - -function isModuleNotFound(e: unknown): boolean { - if (typeof e === "object" && e !== null && "code" in e) { - const code = (e as { code: unknown }).code; - return code === "MODULE_NOT_FOUND" || code === "ERR_MODULE_NOT_FOUND"; - } - return false; -} - -// Native dynamic import — the webpackIgnore comment prevents bundler transformation. -function nativeImport(modulePath: string): Promise { - return import(/* webpackIgnore: true */ modulePath); +function isSignUpRiskEngine(value: unknown): value is SignUpRiskEngine { + return value != null && typeof value === "object" && typeof (value as Record).calculateRiskAssessment === "function"; } async function loadEngine(): Promise { - const importer = _testOverrides.importer ?? nativeImport; - const fallbackPaths = getFallbackPaths(); + if (PRIVATE_ENGINE_PATH == null) { + console.debug("[risk-scores] Private sign-up risk engine not found; using zero scores"); + return ZERO_SCORE_ENGINE; + } - // 1. Try package-name resolution (works when @stackframe/private is a proper dependency) + return await loadEngineFromPath(PRIVATE_ENGINE_PATH); +} + +async function loadEngineFromPath(privateEnginePath: string): Promise { + let mod: Record; try { - const engine = extractEngine(await importer(PRIVATE_PACKAGE_IMPORT)); - console.info("[risk-scores] Loaded private sign-up risk engine via package import"); - return engine; - } catch (e: unknown) { - if (!isModuleNotFound(e)) { - captureError("sign-up-risk-engine-load", new StackAssertionError( - "Failed to load private sign-up risk engine via package import", - { importPath: PRIVATE_PACKAGE_IMPORT, cause: e }, - )); - } + mod = await import(/* webpackIgnore: true */ privateEnginePath) as Record; + } catch (error) { + captureError("sign-up-risk-engine-load", new StackAssertionError( + "Failed to import private sign-up risk engine; using zero scores fallback", + { + cause: error, + path: privateEnginePath, + }, + )); + return ZERO_SCORE_ENGINE; } - - // 2. Fall back to path-based resolution for monorepo setups - for (const fullPath of fallbackPaths) { - try { - const engine = extractEngine(await importer(fullPath)); - console.info("[risk-scores] Loaded private sign-up risk engine from path:", fullPath); - return engine; - } catch (e: unknown) { - if (!isModuleNotFound(e)) { - captureError("sign-up-risk-engine-load", new StackAssertionError( - "Failed to load private sign-up risk engine from path", - { fullPath, cause: e }, - )); - } - } + const engine = mod.signUpRiskEngine; + if (!isSignUpRiskEngine(engine)) { + captureError("sign-up-risk-engine-invalid", new StackAssertionError( + "Private engine does not export a valid signUpRiskEngine; using zero scores fallback", + { path: privateEnginePath }, + )); + return ZERO_SCORE_ENGINE; } - - // 3. No engine found — fall back to zero scores - captureError("sign-up-risk-engine-not-found", new StackAssertionError( - "Private sign-up risk engine not found — using fallback (zero scores)", - { searchedPaths: [PRIVATE_PACKAGE_IMPORT, ...fallbackPaths] }, - )); - return fallbackEngine; + console.info("[risk-scores] Loaded private sign-up risk engine from", privateEnginePath); + return engine; } -function getEngine(): Promise { - if (cachedEngine == null) { - cachedEngine = loadEngine().catch((e) => { - cachedEngine = null; // clear so next call retries - throw e; - }); - } - return cachedEngine; -} +async function getEngine(): Promise { + if (cachedEnginePromise != null) return await cachedEnginePromise; -function resetForTests() { - cachedEngine = null; - _testOverrides.rootPath = null; - _testOverrides.importer = null; + const enginePromise = loadEngine(); + cachedEnginePromise = enginePromise; + + try { + return await enginePromise; + } catch (error) { + if (cachedEnginePromise === enginePromise) { + cachedEnginePromise = null; + } + throw error; + } } @@ -227,19 +177,43 @@ async function loadRecentSignUpStats( }; } -function createDependencies(tenancy: Tenancy): SignUpRiskEngineDependencies { +function createDependencies(tenancy: Tenancy) { return { - now: () => new Date(), - normalizeEmail, - isIpAddress, - createAssertionError: (message, details) => new StackAssertionError(message, details), - checkPrimaryEmailRisk: async (email) => ({ + checkPrimaryEmailRisk: async (email: string) => ({ emailableScore: (await checkEmailWithEmailable(email)).emailableScore, }), - loadRecentSignUpStats: (request) => loadRecentSignUpStats(tenancy, request), + loadRecentSignUpStats: (request: SignUpRiskRecentStatsRequest) => loadRecentSignUpStats(tenancy, request), }; } +async function calculateRiskAssessmentWithFallback( + engine: SignUpRiskEngine, + context: SignUpRiskScoreContext, + dependencies: Parameters[1], +): Promise { + try { + return await engine.calculateRiskAssessment(context, dependencies); + } catch (error) { + captureError("sign-up-risk-assessment-failed", new StackAssertionError( + "Sign-up risk assessment failed; using zero scores fallback", + { + cause: error, + privateEnginePath: PRIVATE_ENGINE_PATH, + context: { + authMethod: context.authMethod, + oauthProvider: context.oauthProvider, + hasPrimaryEmail: context.primaryEmail != null, + primaryEmailVerified: context.primaryEmailVerified, + hasIpAddress: context.ipAddress != null, + ipTrusted: context.ipTrusted, + turnstileAssessment: context.turnstileAssessment, + }, + }, + )); + return createZeroRiskAssessment(new Date()); + } +} + // ── Public API ───────────────────────────────────────────────────────── @@ -248,12 +222,7 @@ export async function calculateSignUpRiskAssessment( context: SignUpRiskScoreContext, ): Promise { const engine = await getEngine(); - try { - return await engine.calculateRiskAssessment(context, createDependencies(tenancy)); - } catch (error) { - captureError("sign-up-risk-engine-error", error); - return { scores: ZERO_SCORES, heuristicFacts: createNeutralSignUpHeuristicFacts(new Date()) }; - } + return await calculateRiskAssessmentWithFallback(engine, context, createDependencies(tenancy)); } export async function calculateSignUpRiskScores( @@ -266,52 +235,75 @@ export async function calculateSignUpRiskScores( // ── Tests ────────────────────────────────────────────────────────────── -import.meta.vitest?.test("fallback engine returns zero scores", async ({ expect }) => { - const now = new Date("2026-03-11T00:00:00.000Z"); - const assessment = await fallbackEngine.calculateRiskAssessment({ - primaryEmail: "user@example.com", - primaryEmailVerified: false, - authMethod: "password", - oauthProvider: null, - ipAddress: "127.0.0.1", - ipTrusted: true, - turnstileAssessment: { status: "invalid" }, - }, { - now: () => now, - normalizeEmail, - isIpAddress, - createAssertionError: (msg, details) => new StackAssertionError(msg, details), - checkPrimaryEmailRisk: async () => ({ emailableScore: 100 }), - loadRecentSignUpStats: async () => ({ sameIpCount: 10, similarEmailCount: 10 }), - }); - - expect(assessment).toEqual({ - scores: ZERO_SCORES, - heuristicFacts: createNeutralSignUpHeuristicFacts(now), - }); +import.meta.vitest?.test.skipIf(!PRIVATE_ENGINE_PATH)("PRIVATE_ENGINE_PATH resolves in the monorepo", ({ expect }) => { + expect(PRIVATE_ENGINE_PATH).toMatch(/packages\/private\/dist\/index\.js$/); }); -import.meta.vitest?.test("loader falls back when private submodule is absent", async ({ expect }) => { - resetForTests(); - _testOverrides.rootPath = path.join(process.cwd(), "packages", `private-missing-${Date.now()}`); - +import.meta.vitest?.test.skipIf(!PRIVATE_ENGINE_PATH)("getEngine loads the real engine when available", async ({ expect }) => { + cachedEnginePromise = null; try { - expect(await getEngine()).toBe(fallbackEngine); + const engine = await getEngine(); + await engine.calculateRiskAssessment({ + primaryEmail: null, + primaryEmailVerified: false, + authMethod: "password", + oauthProvider: null, + ipAddress: null, + ipTrusted: null, + turnstileAssessment: { status: "ok" }, + }, { + checkPrimaryEmailRisk: async () => ({ emailableScore: null }), + loadRecentSignUpStats: async () => ({ sameIpCount: 0, similarEmailCount: 0 }), + }); + expect(typeof engine.calculateRiskAssessment).toBe("function"); + expect(engine).not.toBe(ZERO_SCORE_ENGINE); } finally { - resetForTests(); + cachedEnginePromise = null; } }); -import.meta.vitest?.test("loader falls back when private engine import fails", async ({ expect }) => { - resetForTests(); - _testOverrides.rootPath = path.join(process.cwd(), "packages", "private"); - _testOverrides.importer = async () => { - throw new Error("private engine exploded"); - }; +import.meta.vitest?.test("loadEngine returns zero-score engine when private engine import fails", async ({ expect }) => { + const missingPrivateEnginePath = path.join(process.cwd(), "__missing-risk-engine__.js"); + const engine = await loadEngineFromPath(missingPrivateEnginePath); + expect(engine).toBe(ZERO_SCORE_ENGINE); +}); + +import.meta.vitest?.test("loadEngineFromPath returns zero-score engine when private engine export is invalid", async ({ expect }) => { + const invalidPrivateEnginePath = path.join(process.cwd(), "__invalid-risk-engine__.mjs"); + const invalidPrivateEngineSource = "export const signUpRiskEngine = {};\n"; + fs.writeFileSync(invalidPrivateEnginePath, invalidPrivateEngineSource); try { - expect(await getEngine()).toBe(fallbackEngine); + const engine = await loadEngineFromPath(invalidPrivateEnginePath); + expect(engine).toBe(ZERO_SCORE_ENGINE); } finally { - resetForTests(); + fs.unlinkSync(invalidPrivateEnginePath); + } +}); + +import.meta.vitest?.test("calculateRiskAssessmentWithFallback returns zero scores on engine error", async ({ expect }) => { + const { vi } = import.meta.vitest!; + vi.useFakeTimers(); + vi.setSystemTime(new Date("2026-03-20T00:00:00.000Z")); + + try { + const assessment = await calculateRiskAssessmentWithFallback({ + async calculateRiskAssessment() { throw new Error("boom"); }, + }, { + primaryEmail: "user@example.com", + primaryEmailVerified: false, + authMethod: "password", + oauthProvider: null, + ipAddress: "127.0.0.1", + ipTrusted: true, + turnstileAssessment: { status: "ok" }, + }, { + checkPrimaryEmailRisk: async () => ({ emailableScore: null }), + loadRecentSignUpStats: async () => ({ sameIpCount: 0, similarEmailCount: 0 }), + }); + + expect(assessment).toEqual(createZeroRiskAssessment(new Date("2026-03-20T00:00:00.000Z"))); + } finally { + vi.useRealTimers(); } }); diff --git a/packages/private b/packages/private index 6f708e420..2f2c03135 160000 --- a/packages/private +++ b/packages/private @@ -1 +1 @@ -Subproject commit 6f708e4206abc6ec0903dd93629c7bd137dbcb0b +Subproject commit 2f2c03135725c2cf8273304725a93d12f2bc45ec From 1d00ed2c64c6503fd4312c9775b5fbbce23dc46b Mon Sep 17 00:00:00 2001 From: Aman Ganapathy <84686202+nams1570@users.noreply.github.com> Date: Mon, 23 Mar 2026 08:55:10 -0700 Subject: [PATCH 2/3] [Fix]: Investigate Memory Leak on Verify Data Integrity (#1269) ### Context We encountered an out of memory error when running verify-data-integrity against the prod database. This was the error: `FATAL ERROR: Ineffective mark-compacts near heap limit Allocation failed - JavaScript heap out of memory`. This was one of the things preventing verify-data-integrity from running successfully in prod. ### Summary of Changes Local stress testing with constrained heap and memory telemetry revealed that the rise in used heap memory was directly proportional to the number of api calls. Investigation revealed that the `currentOutputData` array was growing with each api call and was kept in memory. Since it was still being appended to, it was actively kept in the heap. We refactor the script to no longer use it, and for the two flags `--save-output` and `--verify-output` that used it before, we refactor them to not need to. `--save-output` now streams responses to disk as JSONL and `--verify-output` now compares each response immediately and discards it. We also note a potential source of a future memory leak in the `allUsers` array that is populated in memory for each project. We refactor to paginate instead. Note that this didn't cause a memory leak on local, this is a preventive measure. ### Out of Scope fetching all transactions in the payments section of the script is another potential cause for concern, but since the payments section of the script will be refactored soon, we defer that discussion. --- .../scripts/verify-data-integrity/api.ts | 99 ++++++-- .../scripts/verify-data-integrity/index.ts | 233 +++++++++--------- 2 files changed, 201 insertions(+), 131 deletions(-) diff --git a/apps/backend/scripts/verify-data-integrity/api.ts b/apps/backend/scripts/verify-data-integrity/api.ts index 17ffac578..ad5f5945c 100644 --- a/apps/backend/scripts/verify-data-integrity/api.ts +++ b/apps/backend/scripts/verify-data-integrity/api.ts @@ -1,3 +1,4 @@ +import fs from "fs"; import { getEnvVariable } from "@stackframe/stack-shared/dist/utils/env"; import { StackAssertionError } from "@stackframe/stack-shared/dist/utils/errors"; import { deepPlainEquals, filterUndefined } from "@stackframe/stack-shared/dist/utils/objects"; @@ -8,7 +9,7 @@ export type EndpointOutput = { responseJson: any, }; -export type OutputData = Record; +export type OutputData = Map; export type ExpectStatusCode = ( expectedStatusCode: number, @@ -16,40 +17,109 @@ export type ExpectStatusCode = ( request: RequestInit, ) => Promise; +/** + * Reads an output file that may be in either format: + * - Legacy: a single JSON object keyed by endpoint. This was old + * - JSONL: one JSON object per line, each `{ endpoint, output }` + */ +export function loadOutputData(filePath: string): OutputData { + const content = fs.readFileSync(filePath, "utf8").trim(); + const data: OutputData = new Map(); + if (!content) return data; + + const lines = content.split(/\r?\n/); + const firstLine = lines[0]; + try { + const parsed = JSON.parse(firstLine); + if (typeof parsed === "object" && parsed !== null && "endpoint" in parsed && "output" in parsed) { + for (const line of lines) { + if (!line.trim()) continue; + const { endpoint, output } = JSON.parse(line); + if (!data.has(endpoint)) data.set(endpoint, []); + data.get(endpoint)!.push(output); + } + return data; + } + } catch { + // Not JSONL — fall through to legacy parse + } + + const legacy = JSON.parse(content) as Record; + for (const [endpoint, outputs] of Object.entries(legacy)) { + data.set(endpoint, outputs); + } + return data; +} + export function createApiHelpers(options: { - currentOutputData: OutputData, targetOutputData?: OutputData, + /** + * When set, each API response is streamed to this file as JSONL + * (one `{ endpoint, output }` object per line). This avoids + * accumulating all responses in memory. Writes go to a temporary + * file first; call `finalizeOutput()` to rename it to the final path. + */ + outputFilePath?: string, }) { - const { currentOutputData, targetOutputData } = options; + const { targetOutputData, outputFilePath } = options; + const outputCountByEndpoint = new Map(); + const tmpFilePath = outputFilePath ? `${outputFilePath}.tmp` : undefined; + + if (tmpFilePath) { + fs.writeFileSync(tmpFilePath, ""); + } function appendOutputData(endpoint: string, output: EndpointOutput) { - if (!(endpoint in currentOutputData)) { - currentOutputData[endpoint] = []; - } - const newLength = currentOutputData[endpoint].push(output); + const count = (outputCountByEndpoint.get(endpoint) ?? 0) + 1; + outputCountByEndpoint.set(endpoint, count); + if (targetOutputData) { - if (!(endpoint in targetOutputData)) { + const targetEndpointOutputs = targetOutputData.get(endpoint); + if (!targetEndpointOutputs) { throw new StackAssertionError(deindent` Output data mismatch for endpoint ${endpoint}: Expected ${endpoint} to be in targetOutputData, but it is not. `, { endpoint }); } - if (targetOutputData[endpoint].length < newLength) { + if (targetEndpointOutputs.length < count) { throw new StackAssertionError(deindent` Output data mismatch for endpoint ${endpoint}: - Expected ${targetOutputData[endpoint].length} outputs but got at least ${newLength}. + Expected ${targetEndpointOutputs.length} outputs but got at least ${count}. `, { endpoint }); } - if (!(deepPlainEquals(targetOutputData[endpoint][newLength - 1], output))) { + if (!(deepPlainEquals(targetEndpointOutputs[count - 1], output))) { throw new StackAssertionError(deindent` Output data mismatch for endpoint ${endpoint}: - Expected output[${JSON.stringify(endpoint)}][${newLength - 1}] to be: - ${JSON.stringify(targetOutputData[endpoint][newLength - 1], null, 2)} + Expected output[${JSON.stringify(endpoint)}][${count - 1}] to be: + ${JSON.stringify(targetEndpointOutputs[count - 1], null, 2)} but got: ${JSON.stringify(output, null, 2)}. `, { endpoint }); } } + + if (tmpFilePath) { + fs.appendFileSync(tmpFilePath, JSON.stringify({ endpoint, output }) + "\n"); + } + } + + function verifyOutputCompleteness() { + if (!targetOutputData) return; + for (const [endpoint, expectedOutputs] of targetOutputData) { + const actualCount = outputCountByEndpoint.get(endpoint) ?? 0; + if (actualCount !== expectedOutputs.length) { + throw new StackAssertionError(deindent` + Output data mismatch for endpoint ${endpoint}: + Expected ${expectedOutputs.length} outputs but got ${actualCount}. + `, { endpoint, expectedCount: expectedOutputs.length, actualCount }); + } + } + } + + function finalizeOutput() { + if (tmpFilePath && outputFilePath) { + fs.renameSync(tmpFilePath, outputFilePath); + } } const expectStatusCode: ExpectStatusCode = async (expectedStatusCode, endpoint, request) => { @@ -87,6 +157,7 @@ export function createApiHelpers(options: { return { appendOutputData, expectStatusCode, + verifyOutputCompleteness, + finalizeOutput, }; } - diff --git a/apps/backend/scripts/verify-data-integrity/index.ts b/apps/backend/scripts/verify-data-integrity/index.ts index 3ab0ed2f0..ea1f01d70 100644 --- a/apps/backend/scripts/verify-data-integrity/index.ts +++ b/apps/backend/scripts/verify-data-integrity/index.ts @@ -3,12 +3,12 @@ import { getPrismaClientForTenancy, globalPrismaClient } from "@/prisma-client"; import type { OrganizationRenderedConfig } from "@stackframe/stack-shared/dist/config/schema"; import { getEnvVariable } from "@stackframe/stack-shared/dist/utils/env"; import { StackAssertionError } from "@stackframe/stack-shared/dist/utils/errors"; -import { deepPlainEquals, omit } from "@stackframe/stack-shared/dist/utils/objects"; +import { omit } from "@stackframe/stack-shared/dist/utils/objects"; import { wait } from "@stackframe/stack-shared/dist/utils/promises"; import { deindent } from "@stackframe/stack-shared/dist/utils/strings"; import fs from "fs"; -import { createApiHelpers, type OutputData } from "./api"; +import { createApiHelpers, loadOutputData, type OutputData } from "./api"; import { createPaymentsVerifier } from "./payments-verifier"; import { createRecurse } from "./recurse"; import { verifyStripePayoutIntegrity } from "./stripe-payout-integrity"; @@ -19,7 +19,6 @@ const STRIPE_SECRET_KEY = getEnvVariable("STACK_STRIPE_SECRET_KEY", ""); const USE_MOCK_STRIPE_API = STRIPE_SECRET_KEY === "sk_test_mockstripekey"; let targetOutputData: OutputData | undefined = undefined; -const currentOutputData: OutputData = {}; async function main() { console.log(); @@ -83,9 +82,12 @@ async function main() { const maxUsersPerProject = maxUsersPerProjectFlag ? parseInt(maxUsersPerProjectFlag.split("=")[1], 10) : Infinity; - const { recurse, collectedErrors } = createRecurse({ noBail }); + if (shouldSaveOutput && shouldVerifyOutput) { + throw new Error("Cannot use --save-output and --verify-output at the same time."); + } + if (noBail) { console.log(`Running in no-bail mode: will continue on errors and report all at the end.`); } @@ -102,11 +104,12 @@ async function main() { throw new Error(`Cannot verify output: ${OUTPUT_FILE_PATH} does not exist`); } try { - targetOutputData = JSON.parse(fs.readFileSync(OUTPUT_FILE_PATH, "utf8")); + targetOutputData = loadOutputData(OUTPUT_FILE_PATH); // TODO next-release these are hacks for the migration, delete them - if (targetOutputData) { - targetOutputData["/api/v1/internal/projects/current"] = targetOutputData["/api/v1/internal/projects/current"].map(output => { + const projectCurrentOutputs = targetOutputData.get("/api/v1/internal/projects/current"); + if (projectCurrentOutputs) { + targetOutputData.set("/api/v1/internal/projects/current", projectCurrentOutputs.map(output => { if ("config" in output.responseJson) { delete output.responseJson.config.id; output.responseJson.config.oauth_providers = output.responseJson.config.oauth_providers @@ -116,7 +119,7 @@ async function main() { .map((provider: any) => omit(provider, ["enabled"])); } return output; - }); + })); } console.log(`Loaded previous output data for verification`); @@ -125,9 +128,9 @@ async function main() { } } - const { expectStatusCode } = createApiHelpers({ - currentOutputData, + const { expectStatusCode, verifyOutputCompleteness, finalizeOutput } = createApiHelpers({ targetOutputData, + outputFilePath: shouldSaveOutput ? OUTPUT_FILE_PATH : undefined, }); const projects = await prismaClient.project.findMany({ @@ -191,30 +194,6 @@ async function main() { ]); void currentProject; - // Fetch users with pagination - const PAGE_LIMIT = 1000; - const allUsers: any[] = []; - let cursor: string | undefined = undefined; - while (allUsers.length < maxUsersPerProject) { - const remainingToFetch = maxUsersPerProject - allUsers.length; - const limit = Math.min(PAGE_LIMIT, remainingToFetch); - const cursorParam: string = cursor ? `&cursor=${encodeURIComponent(cursor)}` : ""; - const usersPage = await expectStatusCode(200, `/api/v1/users?limit=${limit}${cursorParam}`, { - method: "GET", - headers: { - "x-stack-project-id": projectId, - "x-stack-access-type": "admin", - "x-stack-development-override-key": getEnvVariable("STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY"), - }, - }); - allUsers.push(...usersPage.items); - if (!usersPage.pagination?.next_cursor) { - break; - } - cursor = usersPage.pagination.next_cursor; - } - const users = { items: allUsers.slice(0, maxUsersPerProject) }; - const tenancy = await getSoleTenancyFromProjectBranch(projectId, DEFAULT_BRANCH_ID, true); const paymentsConfig = tenancy ? (tenancy.config as OrganizationRenderedConfig).payments : undefined; const paymentsVerifier = tenancy && paymentsConfig @@ -241,86 +220,110 @@ async function main() { const verifiedTeams = new Set(); if (!skipUsers) { - for (let j = 0; j < users.items.length; j++) { - const user = users.items[j]; - await recurse(`[user ${j + 1}/${users.items.length}] ${user.display_name ?? user.primary_email}`, async (recurse) => { - // get user individually - await expectStatusCode(200, `/api/v1/users/${user.id}`, { - method: "GET", - headers: { - "x-stack-project-id": projectId, - "x-stack-access-type": "admin", - "x-stack-development-override-key": getEnvVariable("STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY"), - }, - }); + const userCount = tenancy + ? await (await getPrismaClientForTenancy(tenancy)).projectUser.count({ where: { tenancyId: tenancy.id } }) + : 0; - // list project permissions - const projectPermissions = await expectStatusCode(200, `/api/v1/project-permissions?user_id=${user.id}`, { - method: "GET", - headers: { - "x-stack-project-id": projectId, - "x-stack-access-type": "admin", - "x-stack-development-override-key": getEnvVariable("STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY"), - }, - }); - for (const projectPermission of projectPermissions.items) { - // `any` because these endpoint response types aren't imported here, - // and this script is intentionally tolerant of response shape changes. - if (!projectPermissionDefinitions.items.some((p: any) => p.id === projectPermission.id)) { - throw new StackAssertionError(deindent` - Project permission ${projectPermission.id} not found in project permission definitions. - `); - } - } + // Process users page-by-page to avoid holding all users in memory at once + const PAGE_LIMIT = 1000; + let userCursor: string | undefined = undefined; + let usersProcessed = 0; + let hasMore = true; - // list teams - const teams = await expectStatusCode(200, `/api/v1/teams?user_id=${user.id}`, { - method: "GET", - headers: { - "x-stack-project-id": projectId, - "x-stack-access-type": "admin", - "x-stack-development-override-key": getEnvVariable("STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY"), - }, - }); - - for (const team of teams.items) { - await recurse(`[team ${team.id}] ${team.name}`, async (recurse) => { - // list team permissions - const teamPermissions = await expectStatusCode(200, `/api/v1/team-permissions?team_id=${team.id}`, { - method: "GET", - headers: { - "x-stack-project-id": projectId, - "x-stack-access-type": "admin", - "x-stack-development-override-key": getEnvVariable("STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY"), - }, - }); - for (const teamPermission of teamPermissions.items) { - // `any` because these endpoint response types aren't imported here, - // and this script is intentionally tolerant of response shape changes. - if (!teamPermissionDefinitions.items.some((p: any) => p.id === teamPermission.id)) { - throw new StackAssertionError(deindent` - Team permission ${teamPermission.id} not found in team permission definitions. - `); - } - } - }); - - if (paymentsVerifier && !verifiedTeams.has(team.id)) { - await paymentsVerifier.verifyCustomerPayments({ - customerType: "team", - customerId: team.id, - }); - verifiedTeams.add(team.id); - } - } - - if (paymentsVerifier) { - await paymentsVerifier.verifyCustomerPayments({ - customerType: "user", - customerId: user.id, - }); - } + while (hasMore && usersProcessed < maxUsersPerProject) { + const remainingToFetch = maxUsersPerProject - usersProcessed; + const limit = Math.min(PAGE_LIMIT, remainingToFetch); + const cursorParam: string = userCursor ? `&cursor=${encodeURIComponent(userCursor)}` : ""; + const usersPage = await expectStatusCode(200, `/api/v1/users?limit=${limit}${cursorParam}`, { + method: "GET", + headers: { + "x-stack-project-id": projectId, + "x-stack-access-type": "admin", + "x-stack-development-override-key": getEnvVariable("STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY"), + }, }); + + for (const user of usersPage.items) { + if (usersProcessed >= maxUsersPerProject) break; + usersProcessed++; + await recurse(`[user ${usersProcessed}/${Math.min(userCount, maxUsersPerProject)}] ${user.display_name ?? user.primary_email}`, async (recurse) => { + await expectStatusCode(200, `/api/v1/users/${user.id}`, { + method: "GET", + headers: { + "x-stack-project-id": projectId, + "x-stack-access-type": "admin", + "x-stack-development-override-key": getEnvVariable("STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY"), + }, + }); + + const projectPermissions = await expectStatusCode(200, `/api/v1/project-permissions?user_id=${user.id}`, { + method: "GET", + headers: { + "x-stack-project-id": projectId, + "x-stack-access-type": "admin", + "x-stack-development-override-key": getEnvVariable("STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY"), + }, + }); + for (const projectPermission of projectPermissions.items) { + // `any` because these endpoint response types aren't imported here, + // and this script is intentionally tolerant of response shape changes. + if (!projectPermissionDefinitions.items.some((p: any) => p.id === projectPermission.id)) { + throw new StackAssertionError(deindent` + Project permission ${projectPermission.id} not found in project permission definitions. + `); + } + } + + const teams = await expectStatusCode(200, `/api/v1/teams?user_id=${user.id}`, { + method: "GET", + headers: { + "x-stack-project-id": projectId, + "x-stack-access-type": "admin", + "x-stack-development-override-key": getEnvVariable("STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY"), + }, + }); + + for (const team of teams.items) { + await recurse(`[team ${team.id}] ${team.name}`, async (recurse) => { + const teamPermissions = await expectStatusCode(200, `/api/v1/team-permissions?team_id=${team.id}`, { + method: "GET", + headers: { + "x-stack-project-id": projectId, + "x-stack-access-type": "admin", + "x-stack-development-override-key": getEnvVariable("STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY"), + }, + }); + for (const teamPermission of teamPermissions.items) { + // `any` because these endpoint response types aren't imported here, + // and this script is intentionally tolerant of response shape changes. + if (!teamPermissionDefinitions.items.some((p: any) => p.id === teamPermission.id)) { + throw new StackAssertionError(deindent` + Team permission ${teamPermission.id} not found in team permission definitions. + `); + } + } + }); + + if (paymentsVerifier && !verifiedTeams.has(team.id)) { + await paymentsVerifier.verifyCustomerPayments({ + customerType: "team", + customerId: team.id, + }); + verifiedTeams.add(team.id); + } + } + + if (paymentsVerifier) { + await paymentsVerifier.verifyCustomerPayments({ + customerType: "user", + customerId: user.id, + }); + } + }); + } + + hasMore = !!usersPage.pagination?.next_cursor; + userCursor = usersPage.pagination?.next_cursor ?? undefined; } if (paymentsVerifier) { @@ -335,13 +338,9 @@ async function main() { }); } - if (targetOutputData && !deepPlainEquals(currentOutputData, targetOutputData)) { - throw new StackAssertionError(deindent` - Output data mismatch between final and target output data. - `); - } + verifyOutputCompleteness(); if (shouldSaveOutput) { - fs.writeFileSync(OUTPUT_FILE_PATH, JSON.stringify(currentOutputData, null, 2)); + finalizeOutput(); console.log(`Output saved to ${OUTPUT_FILE_PATH}`); } From d51c303fb01fee34fa2fd4da46a2ffaddc6df9d6 Mon Sep 17 00:00:00 2001 From: BilalG1 Date: Mon, 23 Mar 2026 10:09:04 -0700 Subject: [PATCH 3/3] fix clickhouse surrogate pair bug (#1270) ## Summary by CodeRabbit * **Bug Fixes** * Enhanced analytics event processing to properly handle edge cases when data contains certain truncated special characters or emoji sequences, ensuring data integrity. * **Tests** * Added coverage for analytics data edge case handling. --- .../latest/analytics/events/batch/route.tsx | 24 +++++++++- .../api/v1/analytics-events-batch.test.ts | 45 +++++++++++++++++++ 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/apps/backend/src/app/api/latest/analytics/events/batch/route.tsx b/apps/backend/src/app/api/latest/analytics/events/batch/route.tsx index 683b1d8bc..d1403c87f 100644 --- a/apps/backend/src/app/api/latest/analytics/events/batch/route.tsx +++ b/apps/backend/src/app/api/latest/analytics/events/batch/route.tsx @@ -10,6 +10,28 @@ const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[089ab][0-9a-f]{3}-[0 const MAX_EVENTS = 500; +// Lone surrogates (\uD800-\uDFFF not part of a valid pair) are technically +// representable in JS strings but rejected by ClickHouse's JSON parser. +// The client-side event tracker can produce these when .substring() truncates +// text in the middle of a surrogate pair (e.g. emoji characters). +// eslint-disable-next-line no-control-regex +const LONE_SURROGATE_RE = /[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(? [k, stripLoneSurrogates(v)]) + ); + } + return value; +} + export const POST = createSmartRouteHandler({ metadata: { summary: "Upload analytics event batch", @@ -69,7 +91,7 @@ export const POST = createSmartRouteHandler({ const rows = body.events.map((event) => ({ event_type: event.event_type, event_at: new Date(event.event_at_ms), - data: event.data, + data: stripLoneSurrogates(event.data), project_id: projectId, branch_id: branchId, user_id: userId, diff --git a/apps/e2e/tests/backend/endpoints/api/v1/analytics-events-batch.test.ts b/apps/e2e/tests/backend/endpoints/api/v1/analytics-events-batch.test.ts index 5813cd96d..c47f9a6f5 100644 --- a/apps/e2e/tests/backend/endpoints/api/v1/analytics-events-batch.test.ts +++ b/apps/e2e/tests/backend/endpoints/api/v1/analytics-events-batch.test.ts @@ -160,6 +160,51 @@ it("accepts valid $click events", async ({ expect }) => { `); }); +it("handles click event data containing a truncated surrogate pair (lone high surrogate)", async ({ expect }) => { + await Project.createAndSwitch({ config: { magic_link_enabled: true } }); + await Project.updateConfig({ apps: { installed: { analytics: { enabled: true } } } }); + await Auth.Otp.signIn(); + + // Simulate what the client-side event tracker does: .substring(0, 200) can + // cut a string in the middle of a surrogate pair when emoji characters are + // near the boundary. For example, 🍉 is "\uD83C\uDF49" in UTF-16; cutting + // after the high surrogate leaves a lone "\uD83C" that ClickHouse cannot parse. + const paddedText = "a".repeat(199) + "\uD83C"; // lone high surrogate at position 199 + + const now = Date.now(); + const res = await uploadEventBatch({ + sessionReplaySegmentId: randomUUID(), + batchId: randomUUID(), + sentAtMs: now, + events: [ + { + event_type: "$click", + event_at_ms: now - 50, + data: { + tag_name: "div", + text: paddedText, + href: null, + selector: "div.container", + x: 100, + y: 200, + page_x: 100, + page_y: 500, + viewport_width: 375, + viewport_height: 647, + }, + }, + ], + }); + + expect(res).toMatchInlineSnapshot(` + NiceResponse { + "status": 200, + "body": { "inserted": 1 }, + "headers": Headers {