stack/apps/backend/scripts/verify-data-integrity/api.ts
Aman Ganapathy 1d00ed2c64
[Fix]: Investigate Memory Leak on Verify Data Integrity (#1269)
### Context
We encountered an out of memory error when running verify-data-integrity
against the prod database. This was the error:
`FATAL ERROR: Ineffective mark-compacts near heap limit Allocation
failed - JavaScript heap out of memory`. This was one of the things
preventing verify-data-integrity from running successfully in prod.

### Summary of Changes
Local stress testing with constrained heap and memory telemetry revealed
that the rise in used heap memory was directly proportional to the
number of api calls. Investigation revealed that the `currentOutputData`
array was growing with each api call and was kept in memory. Since it
was still being appended to, it was actively kept in the heap. We
refactor the script to no longer use it, and for the two flags
`--save-output` and `--verify-output` that used it before, we refactor
them to not need to. `--save-output` now streams responses to disk as
JSONL and `--verify-output` now compares each response immediately and
discards it.
We also note a potential source of a future memory leak in the
`allUsers` array that is populated in memory for each project. We
refactor to paginate instead. Note that this didn't cause a memory leak
on local, this is a preventive measure.

### Out of Scope
fetching all transactions in the payments section of the script is
another potential cause for concern, but since the payments section of
the script will be refactored soon, we defer that discussion.
2026-03-23 08:55:10 -07:00

164 lines
5.4 KiB
TypeScript

import fs from "fs";
import { getEnvVariable } from "@stackframe/stack-shared/dist/utils/env";
import { StackAssertionError } from "@stackframe/stack-shared/dist/utils/errors";
import { deepPlainEquals, filterUndefined } from "@stackframe/stack-shared/dist/utils/objects";
import { deindent } from "@stackframe/stack-shared/dist/utils/strings";
export type EndpointOutput = {
status: number,
responseJson: any,
};
export type OutputData = Map<string, EndpointOutput[]>;
export type ExpectStatusCode = <T = any>(
expectedStatusCode: number,
endpoint: string,
request: RequestInit,
) => Promise<T>;
/**
* Reads an output file that may be in either format:
* - Legacy: a single JSON object keyed by endpoint. This was old
* - JSONL: one JSON object per line, each `{ endpoint, output }`
*/
export function loadOutputData(filePath: string): OutputData {
const content = fs.readFileSync(filePath, "utf8").trim();
const data: OutputData = new Map();
if (!content) return data;
const lines = content.split(/\r?\n/);
const firstLine = lines[0];
try {
const parsed = JSON.parse(firstLine);
if (typeof parsed === "object" && parsed !== null && "endpoint" in parsed && "output" in parsed) {
for (const line of lines) {
if (!line.trim()) continue;
const { endpoint, output } = JSON.parse(line);
if (!data.has(endpoint)) data.set(endpoint, []);
data.get(endpoint)!.push(output);
}
return data;
}
} catch {
// Not JSONL — fall through to legacy parse
}
const legacy = JSON.parse(content) as Record<string, EndpointOutput[]>;
for (const [endpoint, outputs] of Object.entries(legacy)) {
data.set(endpoint, outputs);
}
return data;
}
export function createApiHelpers(options: {
targetOutputData?: OutputData,
/**
* When set, each API response is streamed to this file as JSONL
* (one `{ endpoint, output }` object per line). This avoids
* accumulating all responses in memory. Writes go to a temporary
* file first; call `finalizeOutput()` to rename it to the final path.
*/
outputFilePath?: string,
}) {
const { targetOutputData, outputFilePath } = options;
const outputCountByEndpoint = new Map<string, number>();
const tmpFilePath = outputFilePath ? `${outputFilePath}.tmp` : undefined;
if (tmpFilePath) {
fs.writeFileSync(tmpFilePath, "");
}
function appendOutputData(endpoint: string, output: EndpointOutput) {
const count = (outputCountByEndpoint.get(endpoint) ?? 0) + 1;
outputCountByEndpoint.set(endpoint, count);
if (targetOutputData) {
const targetEndpointOutputs = targetOutputData.get(endpoint);
if (!targetEndpointOutputs) {
throw new StackAssertionError(deindent`
Output data mismatch for endpoint ${endpoint}:
Expected ${endpoint} to be in targetOutputData, but it is not.
`, { endpoint });
}
if (targetEndpointOutputs.length < count) {
throw new StackAssertionError(deindent`
Output data mismatch for endpoint ${endpoint}:
Expected ${targetEndpointOutputs.length} outputs but got at least ${count}.
`, { endpoint });
}
if (!(deepPlainEquals(targetEndpointOutputs[count - 1], output))) {
throw new StackAssertionError(deindent`
Output data mismatch for endpoint ${endpoint}:
Expected output[${JSON.stringify(endpoint)}][${count - 1}] to be:
${JSON.stringify(targetEndpointOutputs[count - 1], null, 2)}
but got:
${JSON.stringify(output, null, 2)}.
`, { endpoint });
}
}
if (tmpFilePath) {
fs.appendFileSync(tmpFilePath, JSON.stringify({ endpoint, output }) + "\n");
}
}
function verifyOutputCompleteness() {
if (!targetOutputData) return;
for (const [endpoint, expectedOutputs] of targetOutputData) {
const actualCount = outputCountByEndpoint.get(endpoint) ?? 0;
if (actualCount !== expectedOutputs.length) {
throw new StackAssertionError(deindent`
Output data mismatch for endpoint ${endpoint}:
Expected ${expectedOutputs.length} outputs but got ${actualCount}.
`, { endpoint, expectedCount: expectedOutputs.length, actualCount });
}
}
}
function finalizeOutput() {
if (tmpFilePath && outputFilePath) {
fs.renameSync(tmpFilePath, outputFilePath);
}
}
const expectStatusCode: ExpectStatusCode = async (expectedStatusCode, endpoint, request) => {
const apiUrl = new URL(getEnvVariable("NEXT_PUBLIC_STACK_API_URL"));
const response = await fetch(new URL(endpoint, apiUrl), {
...request,
headers: {
"x-stack-disable-artificial-development-delay": "yes",
"x-stack-development-disable-extended-logging": "yes",
...filterUndefined(request.headers ?? {}),
},
});
const responseText = await response.text();
if (response.status !== expectedStatusCode) {
throw new StackAssertionError(deindent`
Expected status code ${expectedStatusCode} but got ${response.status} for ${endpoint}:
${responseText}
`, { request, response });
}
const responseJson = JSON.parse(responseText);
const currentOutput: EndpointOutput = {
status: response.status,
responseJson,
};
appendOutputData(endpoint, currentOutput);
return responseJson;
};
return {
appendOutputData,
expectStatusCode,
verifyOutputCompleteness,
finalizeOutput,
};
}