chore(internal-analytics): move benchmark scripts under internal-analytics/ and fix lint

Move the platform-analytics benchmark/optimization harnesses into
scripts/internal-analytics/ and resolve eslint failures (no-unnecessary-condition
on query_log row access, max-statements-per-line, and the .catch(console.error)
restriction). Lint and backend typecheck both pass.
This commit is contained in:
Bilal Godil 2026-06-19 11:52:33 -07:00
parent 252c5651a0
commit 60ccf1a06f
3 changed files with 87 additions and 27 deletions

View File

@ -49,13 +49,16 @@ const OUT = getEnvVariable("PA_OUT", "/tmp/platform-analytics-bench.json");
const chAdmin = getClickhouseAdminClient();
const chMetrics = getClickhouseAdminClientForMetrics();
function log(...a: unknown[]) { console.log(`[${new Date().toISOString().slice(11, 19)}]`, ...a); }
function log(...a: unknown[]) {
console.log(`[${new Date().toISOString().slice(11, 19)}]`, ...a);
}
// ---------- window math (mirror the route) ----------
const ONE_DAY_MS = 24 * 60 * 60 * 1000;
const WINDOW_DAYS = 30;
const now = new Date();
const todayUtc = new Date(now); todayUtc.setUTCHours(0, 0, 0, 0);
const todayUtc = new Date(now);
todayUtc.setUTCHours(0, 0, 0, 0);
const windowStart = new Date(todayUtc.getTime() - (WINDOW_DAYS - 1) * ONE_DAY_MS);
const priorStart = new Date(todayUtc.getTime() - (2 * WINDOW_DAYS - 1) * ONE_DAY_MS);
const untilExclusive = new Date(todayUtc.getTime() + ONE_DAY_MS);
@ -438,7 +441,7 @@ async function runChQuery(q: ChQ): Promise<ChResult> {
const queryId = `pa-${q.name}-${randomUUID()}`;
try {
const r = await chMetrics.query({ query: q.sql, query_params: q.params, query_id: queryId, format: "JSONEachRow" });
const rows = await r.json();
await r.json();
await chMetrics.command({ query: "SYSTEM FLUSH LOGS" });
const stat = await (await chMetrics.query({
query: `SELECT query_duration_ms, memory_usage, read_rows, read_bytes, result_rows
@ -448,9 +451,9 @@ async function runChQuery(q: ChQ): Promise<ChResult> {
const s = stat[0];
const res: ChResult = {
name: q.name, what: q.what,
durationMs: Number(s?.query_duration_ms ?? 0), memMiB: Number(s?.memory_usage ?? 0) / 1048576,
readRows: Number(s?.read_rows ?? 0), readMiB: Number(s?.read_bytes ?? 0) / 1048576,
resultRows: Number(s?.result_rows ?? rows.length),
durationMs: Number(s.query_duration_ms), memMiB: Number(s.memory_usage) / 1048576,
readRows: Number(s.read_rows), readMiB: Number(s.read_bytes) / 1048576,
resultRows: Number(s.result_rows),
};
if (!best || res.durationMs < best.durationMs) best = res;
} catch (e) {
@ -488,7 +491,7 @@ async function runPgQuery(q: PgQ): Promise<PgResult> {
return best!;
}
function sumChildren(node: Record<string, unknown>, key: string): number {
const plans = (node.Plans as Array<Record<string, unknown>>) ?? [];
const plans = (node.Plans as Array<Record<string, unknown>> | undefined) ?? [];
let s = 0;
for (const c of plans) s += Number(c[key] ?? 0) + sumChildren(c, key);
return s;
@ -497,7 +500,7 @@ function topNodes(node: Record<string, unknown>, depth = 0): string {
const t = String(node["Node Type"] ?? "");
const rel = node["Relation Name"] ? ` ${node["Relation Name"]}` : "";
let s = `${" ".repeat(depth)}${t}${rel} (rows=${node["Actual Rows"]})`;
const plans = (node.Plans as Array<Record<string, unknown>>) ?? [];
const plans = (node.Plans as Array<Record<string, unknown>> | undefined) ?? [];
for (const c of plans.slice(0, 3)) s += "\n" + topNodes(c, depth + 1);
return s;
}
@ -515,11 +518,19 @@ async function main() {
log("running ClickHouse queries...");
const chResults: ChResult[] = [];
for (const q of CH_QUERIES) { const r = await runChQuery(q); chResults.push(r); log(` CH ${q.name}: ${r.error ? "ERR " + r.error : `${r.durationMs}ms, ${r.memMiB.toFixed(0)}MiB, read ${r.readRows.toLocaleString()} rows`}`); }
for (const q of CH_QUERIES) {
const r = await runChQuery(q);
chResults.push(r);
log(` CH ${q.name}: ${r.error ? "ERR " + r.error : `${r.durationMs}ms, ${r.memMiB.toFixed(0)}MiB, read ${r.readRows.toLocaleString()} rows`}`);
}
log("running Postgres queries...");
const pgResults: PgResult[] = [];
for (const q of PG_QUERIES) { const r = await runPgQuery(q); pgResults.push(r); log(` PG ${q.name}: ${r.error ? "ERR " + r.error : `${r.durationMs.toFixed(0)}ms, ${(r.sharedHitMiB + r.sharedReadMiB).toFixed(0)}MiB buffers`}`); }
for (const q of PG_QUERIES) {
const r = await runPgQuery(q);
pgResults.push(r);
log(` PG ${q.name}: ${r.error ? "ERR " + r.error : `${r.durationMs.toFixed(0)}ms, ${(r.sharedHitMiB + r.sharedReadMiB).toFixed(0)}MiB buffers`}`);
}
const out = {
generatedAt: new Date().toISOString(),
@ -539,4 +550,10 @@ async function main() {
}
process.exit(0);
}
main().catch((e) => { console.error("BENCH FAILED:", e); process.exit(1); });
try {
await main();
} catch (e) {
console.error("BENCH FAILED:", e);
process.exit(1);
}

View File

@ -19,7 +19,10 @@ import { getEnvVariable } from "@hexclave/shared/dist/utils/env";
import { randomUUID } from "node:crypto";
import { writeFileSync } from "node:fs";
const envInt = (n: string, f: number) => { const v = getEnvVariable(n, ""); return v === "" ? f : Number(v); };
const envInt = (n: string, f: number) => {
const v = getEnvVariable(n, "");
return v === "" ? f : Number(v);
};
const envBool = (n: string) => ["1", "true"].includes(getEnvVariable(n, ""));
const NUM_PROJECTS = envInt("PA_PROJECTS", 10_000);
const NUM_USERS = envInt("PA_USERS", 1_000_000);
@ -30,7 +33,9 @@ const chMetrics = getClickhouseAdminClientForMetrics();
const log = (...a: unknown[]) => console.log(`[${new Date().toISOString().slice(11, 19)}]`, ...a);
const ONE_DAY_MS = 86400000, WINDOW_DAYS = 30;
const now = new Date(); const todayUtc = new Date(now); todayUtc.setUTCHours(0, 0, 0, 0);
const now = new Date();
const todayUtc = new Date(now);
todayUtc.setUTCHours(0, 0, 0, 0);
const windowStart = new Date(todayUtc.getTime() - (WINDOW_DAYS - 1) * ONE_DAY_MS);
const priorStart = new Date(todayUtc.getTime() - (2 * WINDOW_DAYS - 1) * ONE_DAY_MS);
const untilExclusive = new Date(todayUtc.getTime() + ONE_DAY_MS);
@ -103,7 +108,7 @@ async function runCh(sql: string, params: Record<string, unknown>): Promise<Run>
const rows = await (await chMetrics.query({ query: sql, query_params: params, query_id: qid, format: "JSONEachRow" })).json<Record<string, unknown>>();
await chMetrics.command({ query: "SYSTEM FLUSH LOGS" });
const s = (await (await chMetrics.query({ query: `SELECT query_duration_ms d, memory_usage m, read_rows r FROM system.query_log WHERE query_id={q:String} AND type='QueryFinish' ORDER BY event_time DESC LIMIT 1`, query_params: { q: qid }, format: "JSONEachRow" })).json<{ d: string, m: string, r: string }>())[0];
const run: Run = { ms: Number(s?.d ?? 0), memMiB: Number(s?.m ?? 0) / 1048576, readRows: Number(s?.r ?? 0), canon: canonRows(rows) };
const run: Run = { ms: Number(s.d), memMiB: Number(s.m) / 1048576, readRows: Number(s.r), canon: canonRows(rows) };
if (!best || run.memMiB < best.memMiB) best = run;
}
return best!;
@ -118,7 +123,9 @@ async function runPg(sql: string, params: unknown[]): Promise<PgRun> {
const plan = (await globalPrismaClient.$queryRawUnsafe<Array<{ "QUERY PLAN": Array<{ Plan: Record<string, unknown>, "Execution Time": number }> }>>(`EXPLAIN (ANALYZE,BUFFERS,FORMAT JSON) ${sql}`, ...params))[0]["QUERY PLAN"][0];
const sumBuf = (node: Record<string, unknown>): number => {
let s = Number(node["Shared Hit Blocks"] ?? 0) + Number(node["Shared Read Blocks"] ?? 0) + Number(node["Temp Written Blocks"] ?? 0) + Number(node["Temp Read Blocks"] ?? 0);
for (const c of (node.Plans as Array<Record<string, unknown>> ?? [])) s += sumBuf(c);
for (const c of ((node.Plans as Array<Record<string, unknown>> | undefined) ?? [])) {
s += sumBuf(c);
}
return s;
};
const run = { ms: Number(plan["Execution Time"]), bufMiB: sumBuf(plan.Plan) * 8192 / 1048576, rows: Number(plan.Plan["Actual Rows"] ?? 0) };
@ -204,7 +211,11 @@ const PG_CASES: PgCase[] = [
];
async function main() {
if (!envBool("PA_SKIP_SEED")) await seed(); else log("reusing bench_pa");
if (!envBool("PA_SKIP_SEED")) {
await seed();
} else {
log("reusing bench_pa");
}
const report: { ch: unknown[], pg: unknown[] } = { ch: [], pg: [] };
log("=== ClickHouse ===");
@ -241,7 +252,16 @@ async function main() {
writeFileSync("/tmp/platform-analytics-optimize.json", JSON.stringify({ generatedAt: new Date().toISOString(), scale: { NUM_PROJECTS, NUM_USERS, NUM_EVENTS }, ...report }, null, 2));
log("wrote /tmp/platform-analytics-optimize.json");
if (!envBool("PA_KEEP")) { await chAdmin.command({ query: "DROP DATABASE IF EXISTS bench_pa" }); await globalPrismaClient.$executeRawUnsafe("DROP SCHEMA IF EXISTS bench_pa CASCADE"); }
if (!envBool("PA_KEEP")) {
await chAdmin.command({ query: "DROP DATABASE IF EXISTS bench_pa" });
await globalPrismaClient.$executeRawUnsafe("DROP SCHEMA IF EXISTS bench_pa CASCADE");
}
process.exit(0);
}
main().catch((e) => { console.error("FAILED:", e); process.exit(1); });
try {
await main();
} catch (e) {
console.error("FAILED:", e);
process.exit(1);
}

View File

@ -15,7 +15,10 @@ import { getEnvVariable } from "@hexclave/shared/dist/utils/env";
import { randomUUID } from "node:crypto";
import { writeFileSync } from "node:fs";
const envInt = (n: string, f: number) => { const v = getEnvVariable(n, ""); return v === "" ? f : Number(v); };
const envInt = (n: string, f: number) => {
const v = getEnvVariable(n, "");
return v === "" ? f : Number(v);
};
const envBool = (n: string) => ["1", "true"].includes(getEnvVariable(n, ""));
const NUM_PROJECTS = envInt("PA_PROJECTS", 10_000), NUM_USERS = envInt("PA_USERS", 1_000_000), NUM_EVENTS = envInt("PA_EVENTS", 50_000_000);
const ZIPF_K = 4, BRANCH = "main", INTERNAL = "internal";
@ -24,7 +27,9 @@ const chMetrics = getClickhouseAdminClientForMetrics();
const log = (...a: unknown[]) => console.log(`[${new Date().toISOString().slice(11, 19)}]`, ...a);
const ONE_DAY_MS = 86400000, WINDOW_DAYS = 30;
const now = new Date(); const todayUtc = new Date(now); todayUtc.setUTCHours(0, 0, 0, 0);
const now = new Date();
const todayUtc = new Date(now);
todayUtc.setUTCHours(0, 0, 0, 0);
const windowStart = new Date(todayUtc.getTime() - (WINDOW_DAYS - 1) * ONE_DAY_MS);
const untilExclusive = new Date(todayUtc.getTime() + ONE_DAY_MS);
const chDT = (d: Date) => d.toISOString().slice(0, 19);
@ -105,7 +110,7 @@ async function run(sql: string, settings?: Record<string, string>): Promise<Run>
const rows = (await r.json<Record<string, string>>()).map((x) => ({ day: x.day, total_count: Number(x.total_count), new_count: Number(x.new_count), retained_count: Number(x.retained_count), reactivated_count: Number(x.reactivated_count) }));
await chMetrics.command({ query: "SYSTEM FLUSH LOGS" });
const s = (await (await chMetrics.query({ query: `SELECT query_duration_ms d, memory_usage m, read_rows rr FROM system.query_log WHERE query_id={q:String} AND type='QueryFinish' ORDER BY event_time DESC LIMIT 1`, query_params: { q: qid }, format: "JSONEachRow" })).json<{ d: string, m: string, rr: string }>())[0];
const run = { mem: Number(s?.m ?? 0) / 1048576, ms: Number(s?.d ?? 0), readRows: Number(s?.rr ?? 0), rows };
const run = { mem: Number(s.m) / 1048576, ms: Number(s.d), readRows: Number(s.rr), rows };
if (!best || run.mem < best.mem) best = run;
}
return best!;
@ -120,10 +125,16 @@ function accuracy(truth: Day[], approx: Day[]) {
for (const m of metrics) {
const errs: number[] = [];
for (const a of approx) {
const t = tm.get(a.day); if (!t) continue;
const t = tm.get(a.day);
if (!t) continue;
const tv = t[m], av = a[m];
if (tv === 0) { if (av !== 0) errs.push(100); continue; }
const e = Math.abs(av - tv) / tv * 100; errs.push(e); errsAll.push(e);
if (tv === 0) {
if (av !== 0) errs.push(100);
continue;
}
const e = Math.abs(av - tv) / tv * 100;
errs.push(e);
errsAll.push(e);
}
per[m] = { mean: errs.reduce((s, x) => s + x, 0) / Math.max(1, errs.length), max: Math.max(0, ...errs) };
}
@ -131,7 +142,11 @@ function accuracy(truth: Day[], approx: Day[]) {
}
async function main() {
if (!envBool("PA_SKIP_SEED")) await seed(); else log("reusing bench_pa.events");
if (!envBool("PA_SKIP_SEED")) {
await seed();
} else {
log("reusing bench_pa.events");
}
const cases: Array<{ name: string, kind: "exact" | "approx", sql: string, settings?: Record<string, string> }> = [
{ name: "original (string entity)", kind: "exact", sql: exactSql("assumeNotNull(user_id)") },
@ -157,7 +172,15 @@ async function main() {
}
writeFileSync("/tmp/split-optimize.json", JSON.stringify({ generatedAt: new Date().toISOString(), scale: { NUM_PROJECTS, NUM_USERS, NUM_EVENTS }, cases: out }, null, 2));
log("wrote /tmp/split-optimize.json");
if (!envBool("PA_KEEP")) await chAdmin.command({ query: "DROP DATABASE IF EXISTS bench_pa" });
if (!envBool("PA_KEEP")) {
await chAdmin.command({ query: "DROP DATABASE IF EXISTS bench_pa" });
}
process.exit(0);
}
main().catch((e) => { console.error("FAILED:", e); process.exit(1); });
try {
await main();
} catch (e) {
console.error("FAILED:", e);
process.exit(1);
}