diff --git a/apps/backend/scripts/benchmark-platform-analytics.ts b/apps/backend/scripts/internal-analytics/benchmark-platform-analytics.ts similarity index 96% rename from apps/backend/scripts/benchmark-platform-analytics.ts rename to apps/backend/scripts/internal-analytics/benchmark-platform-analytics.ts index bcccc7535..7b86b5f48 100644 --- a/apps/backend/scripts/benchmark-platform-analytics.ts +++ b/apps/backend/scripts/internal-analytics/benchmark-platform-analytics.ts @@ -49,13 +49,16 @@ const OUT = getEnvVariable("PA_OUT", "/tmp/platform-analytics-bench.json"); const chAdmin = getClickhouseAdminClient(); const chMetrics = getClickhouseAdminClientForMetrics(); -function log(...a: unknown[]) { console.log(`[${new Date().toISOString().slice(11, 19)}]`, ...a); } +function log(...a: unknown[]) { + console.log(`[${new Date().toISOString().slice(11, 19)}]`, ...a); +} // ---------- window math (mirror the route) ---------- const ONE_DAY_MS = 24 * 60 * 60 * 1000; const WINDOW_DAYS = 30; const now = new Date(); -const todayUtc = new Date(now); todayUtc.setUTCHours(0, 0, 0, 0); +const todayUtc = new Date(now); +todayUtc.setUTCHours(0, 0, 0, 0); const windowStart = new Date(todayUtc.getTime() - (WINDOW_DAYS - 1) * ONE_DAY_MS); const priorStart = new Date(todayUtc.getTime() - (2 * WINDOW_DAYS - 1) * ONE_DAY_MS); const untilExclusive = new Date(todayUtc.getTime() + ONE_DAY_MS); @@ -438,7 +441,7 @@ async function runChQuery(q: ChQ): Promise { const queryId = `pa-${q.name}-${randomUUID()}`; try { const r = await chMetrics.query({ query: q.sql, query_params: q.params, query_id: queryId, format: "JSONEachRow" }); - const rows = await r.json(); + await r.json(); await chMetrics.command({ query: "SYSTEM FLUSH LOGS" }); const stat = await (await chMetrics.query({ query: `SELECT query_duration_ms, memory_usage, read_rows, read_bytes, result_rows @@ -448,9 +451,9 @@ async function runChQuery(q: ChQ): Promise { const s = stat[0]; const res: ChResult = { name: q.name, what: q.what, - durationMs: Number(s?.query_duration_ms ?? 0), memMiB: Number(s?.memory_usage ?? 0) / 1048576, - readRows: Number(s?.read_rows ?? 0), readMiB: Number(s?.read_bytes ?? 0) / 1048576, - resultRows: Number(s?.result_rows ?? rows.length), + durationMs: Number(s.query_duration_ms), memMiB: Number(s.memory_usage) / 1048576, + readRows: Number(s.read_rows), readMiB: Number(s.read_bytes) / 1048576, + resultRows: Number(s.result_rows), }; if (!best || res.durationMs < best.durationMs) best = res; } catch (e) { @@ -488,7 +491,7 @@ async function runPgQuery(q: PgQ): Promise { return best!; } function sumChildren(node: Record, key: string): number { - const plans = (node.Plans as Array>) ?? []; + const plans = (node.Plans as Array> | undefined) ?? []; let s = 0; for (const c of plans) s += Number(c[key] ?? 0) + sumChildren(c, key); return s; @@ -497,7 +500,7 @@ function topNodes(node: Record, depth = 0): string { const t = String(node["Node Type"] ?? ""); const rel = node["Relation Name"] ? ` ${node["Relation Name"]}` : ""; let s = `${" ".repeat(depth)}${t}${rel} (rows=${node["Actual Rows"]})`; - const plans = (node.Plans as Array>) ?? []; + const plans = (node.Plans as Array> | undefined) ?? []; for (const c of plans.slice(0, 3)) s += "\n" + topNodes(c, depth + 1); return s; } @@ -515,11 +518,19 @@ async function main() { log("running ClickHouse queries..."); const chResults: ChResult[] = []; - for (const q of CH_QUERIES) { const r = await runChQuery(q); chResults.push(r); log(` CH ${q.name}: ${r.error ? "ERR " + r.error : `${r.durationMs}ms, ${r.memMiB.toFixed(0)}MiB, read ${r.readRows.toLocaleString()} rows`}`); } + for (const q of CH_QUERIES) { + const r = await runChQuery(q); + chResults.push(r); + log(` CH ${q.name}: ${r.error ? "ERR " + r.error : `${r.durationMs}ms, ${r.memMiB.toFixed(0)}MiB, read ${r.readRows.toLocaleString()} rows`}`); + } log("running Postgres queries..."); const pgResults: PgResult[] = []; - for (const q of PG_QUERIES) { const r = await runPgQuery(q); pgResults.push(r); log(` PG ${q.name}: ${r.error ? "ERR " + r.error : `${r.durationMs.toFixed(0)}ms, ${(r.sharedHitMiB + r.sharedReadMiB).toFixed(0)}MiB buffers`}`); } + for (const q of PG_QUERIES) { + const r = await runPgQuery(q); + pgResults.push(r); + log(` PG ${q.name}: ${r.error ? "ERR " + r.error : `${r.durationMs.toFixed(0)}ms, ${(r.sharedHitMiB + r.sharedReadMiB).toFixed(0)}MiB buffers`}`); + } const out = { generatedAt: new Date().toISOString(), @@ -539,4 +550,10 @@ async function main() { } process.exit(0); } -main().catch((e) => { console.error("BENCH FAILED:", e); process.exit(1); }); + +try { + await main(); +} catch (e) { + console.error("BENCH FAILED:", e); + process.exit(1); +} diff --git a/apps/backend/scripts/optimize-platform-analytics.ts b/apps/backend/scripts/internal-analytics/optimize-platform-analytics.ts similarity index 96% rename from apps/backend/scripts/optimize-platform-analytics.ts rename to apps/backend/scripts/internal-analytics/optimize-platform-analytics.ts index c09f363b5..97c2b08bb 100644 --- a/apps/backend/scripts/optimize-platform-analytics.ts +++ b/apps/backend/scripts/internal-analytics/optimize-platform-analytics.ts @@ -19,7 +19,10 @@ import { getEnvVariable } from "@hexclave/shared/dist/utils/env"; import { randomUUID } from "node:crypto"; import { writeFileSync } from "node:fs"; -const envInt = (n: string, f: number) => { const v = getEnvVariable(n, ""); return v === "" ? f : Number(v); }; +const envInt = (n: string, f: number) => { + const v = getEnvVariable(n, ""); + return v === "" ? f : Number(v); +}; const envBool = (n: string) => ["1", "true"].includes(getEnvVariable(n, "")); const NUM_PROJECTS = envInt("PA_PROJECTS", 10_000); const NUM_USERS = envInt("PA_USERS", 1_000_000); @@ -30,7 +33,9 @@ const chMetrics = getClickhouseAdminClientForMetrics(); const log = (...a: unknown[]) => console.log(`[${new Date().toISOString().slice(11, 19)}]`, ...a); const ONE_DAY_MS = 86400000, WINDOW_DAYS = 30; -const now = new Date(); const todayUtc = new Date(now); todayUtc.setUTCHours(0, 0, 0, 0); +const now = new Date(); +const todayUtc = new Date(now); +todayUtc.setUTCHours(0, 0, 0, 0); const windowStart = new Date(todayUtc.getTime() - (WINDOW_DAYS - 1) * ONE_DAY_MS); const priorStart = new Date(todayUtc.getTime() - (2 * WINDOW_DAYS - 1) * ONE_DAY_MS); const untilExclusive = new Date(todayUtc.getTime() + ONE_DAY_MS); @@ -103,7 +108,7 @@ async function runCh(sql: string, params: Record): Promise const rows = await (await chMetrics.query({ query: sql, query_params: params, query_id: qid, format: "JSONEachRow" })).json>(); await chMetrics.command({ query: "SYSTEM FLUSH LOGS" }); const s = (await (await chMetrics.query({ query: `SELECT query_duration_ms d, memory_usage m, read_rows r FROM system.query_log WHERE query_id={q:String} AND type='QueryFinish' ORDER BY event_time DESC LIMIT 1`, query_params: { q: qid }, format: "JSONEachRow" })).json<{ d: string, m: string, r: string }>())[0]; - const run: Run = { ms: Number(s?.d ?? 0), memMiB: Number(s?.m ?? 0) / 1048576, readRows: Number(s?.r ?? 0), canon: canonRows(rows) }; + const run: Run = { ms: Number(s.d), memMiB: Number(s.m) / 1048576, readRows: Number(s.r), canon: canonRows(rows) }; if (!best || run.memMiB < best.memMiB) best = run; } return best!; @@ -118,7 +123,9 @@ async function runPg(sql: string, params: unknown[]): Promise { const plan = (await globalPrismaClient.$queryRawUnsafe, "Execution Time": number }> }>>(`EXPLAIN (ANALYZE,BUFFERS,FORMAT JSON) ${sql}`, ...params))[0]["QUERY PLAN"][0]; const sumBuf = (node: Record): number => { let s = Number(node["Shared Hit Blocks"] ?? 0) + Number(node["Shared Read Blocks"] ?? 0) + Number(node["Temp Written Blocks"] ?? 0) + Number(node["Temp Read Blocks"] ?? 0); - for (const c of (node.Plans as Array> ?? [])) s += sumBuf(c); + for (const c of ((node.Plans as Array> | undefined) ?? [])) { + s += sumBuf(c); + } return s; }; const run = { ms: Number(plan["Execution Time"]), bufMiB: sumBuf(plan.Plan) * 8192 / 1048576, rows: Number(plan.Plan["Actual Rows"] ?? 0) }; @@ -204,7 +211,11 @@ const PG_CASES: PgCase[] = [ ]; async function main() { - if (!envBool("PA_SKIP_SEED")) await seed(); else log("reusing bench_pa"); + if (!envBool("PA_SKIP_SEED")) { + await seed(); + } else { + log("reusing bench_pa"); + } const report: { ch: unknown[], pg: unknown[] } = { ch: [], pg: [] }; log("=== ClickHouse ==="); @@ -241,7 +252,16 @@ async function main() { writeFileSync("/tmp/platform-analytics-optimize.json", JSON.stringify({ generatedAt: new Date().toISOString(), scale: { NUM_PROJECTS, NUM_USERS, NUM_EVENTS }, ...report }, null, 2)); log("wrote /tmp/platform-analytics-optimize.json"); - if (!envBool("PA_KEEP")) { await chAdmin.command({ query: "DROP DATABASE IF EXISTS bench_pa" }); await globalPrismaClient.$executeRawUnsafe("DROP SCHEMA IF EXISTS bench_pa CASCADE"); } + if (!envBool("PA_KEEP")) { + await chAdmin.command({ query: "DROP DATABASE IF EXISTS bench_pa" }); + await globalPrismaClient.$executeRawUnsafe("DROP SCHEMA IF EXISTS bench_pa CASCADE"); + } process.exit(0); } -main().catch((e) => { console.error("FAILED:", e); process.exit(1); }); + +try { + await main(); +} catch (e) { + console.error("FAILED:", e); + process.exit(1); +} diff --git a/apps/backend/scripts/optimize-split.ts b/apps/backend/scripts/internal-analytics/optimize-split.ts similarity index 92% rename from apps/backend/scripts/optimize-split.ts rename to apps/backend/scripts/internal-analytics/optimize-split.ts index 4cdfcced3..62991dc55 100644 --- a/apps/backend/scripts/optimize-split.ts +++ b/apps/backend/scripts/internal-analytics/optimize-split.ts @@ -15,7 +15,10 @@ import { getEnvVariable } from "@hexclave/shared/dist/utils/env"; import { randomUUID } from "node:crypto"; import { writeFileSync } from "node:fs"; -const envInt = (n: string, f: number) => { const v = getEnvVariable(n, ""); return v === "" ? f : Number(v); }; +const envInt = (n: string, f: number) => { + const v = getEnvVariable(n, ""); + return v === "" ? f : Number(v); +}; const envBool = (n: string) => ["1", "true"].includes(getEnvVariable(n, "")); const NUM_PROJECTS = envInt("PA_PROJECTS", 10_000), NUM_USERS = envInt("PA_USERS", 1_000_000), NUM_EVENTS = envInt("PA_EVENTS", 50_000_000); const ZIPF_K = 4, BRANCH = "main", INTERNAL = "internal"; @@ -24,7 +27,9 @@ const chMetrics = getClickhouseAdminClientForMetrics(); const log = (...a: unknown[]) => console.log(`[${new Date().toISOString().slice(11, 19)}]`, ...a); const ONE_DAY_MS = 86400000, WINDOW_DAYS = 30; -const now = new Date(); const todayUtc = new Date(now); todayUtc.setUTCHours(0, 0, 0, 0); +const now = new Date(); +const todayUtc = new Date(now); +todayUtc.setUTCHours(0, 0, 0, 0); const windowStart = new Date(todayUtc.getTime() - (WINDOW_DAYS - 1) * ONE_DAY_MS); const untilExclusive = new Date(todayUtc.getTime() + ONE_DAY_MS); const chDT = (d: Date) => d.toISOString().slice(0, 19); @@ -105,7 +110,7 @@ async function run(sql: string, settings?: Record): Promise const rows = (await r.json>()).map((x) => ({ day: x.day, total_count: Number(x.total_count), new_count: Number(x.new_count), retained_count: Number(x.retained_count), reactivated_count: Number(x.reactivated_count) })); await chMetrics.command({ query: "SYSTEM FLUSH LOGS" }); const s = (await (await chMetrics.query({ query: `SELECT query_duration_ms d, memory_usage m, read_rows rr FROM system.query_log WHERE query_id={q:String} AND type='QueryFinish' ORDER BY event_time DESC LIMIT 1`, query_params: { q: qid }, format: "JSONEachRow" })).json<{ d: string, m: string, rr: string }>())[0]; - const run = { mem: Number(s?.m ?? 0) / 1048576, ms: Number(s?.d ?? 0), readRows: Number(s?.rr ?? 0), rows }; + const run = { mem: Number(s.m) / 1048576, ms: Number(s.d), readRows: Number(s.rr), rows }; if (!best || run.mem < best.mem) best = run; } return best!; @@ -120,10 +125,16 @@ function accuracy(truth: Day[], approx: Day[]) { for (const m of metrics) { const errs: number[] = []; for (const a of approx) { - const t = tm.get(a.day); if (!t) continue; + const t = tm.get(a.day); + if (!t) continue; const tv = t[m], av = a[m]; - if (tv === 0) { if (av !== 0) errs.push(100); continue; } - const e = Math.abs(av - tv) / tv * 100; errs.push(e); errsAll.push(e); + if (tv === 0) { + if (av !== 0) errs.push(100); + continue; + } + const e = Math.abs(av - tv) / tv * 100; + errs.push(e); + errsAll.push(e); } per[m] = { mean: errs.reduce((s, x) => s + x, 0) / Math.max(1, errs.length), max: Math.max(0, ...errs) }; } @@ -131,7 +142,11 @@ function accuracy(truth: Day[], approx: Day[]) { } async function main() { - if (!envBool("PA_SKIP_SEED")) await seed(); else log("reusing bench_pa.events"); + if (!envBool("PA_SKIP_SEED")) { + await seed(); + } else { + log("reusing bench_pa.events"); + } const cases: Array<{ name: string, kind: "exact" | "approx", sql: string, settings?: Record }> = [ { name: "original (string entity)", kind: "exact", sql: exactSql("assumeNotNull(user_id)") }, @@ -157,7 +172,15 @@ async function main() { } writeFileSync("/tmp/split-optimize.json", JSON.stringify({ generatedAt: new Date().toISOString(), scale: { NUM_PROJECTS, NUM_USERS, NUM_EVENTS }, cases: out }, null, 2)); log("wrote /tmp/split-optimize.json"); - if (!envBool("PA_KEEP")) await chAdmin.command({ query: "DROP DATABASE IF EXISTS bench_pa" }); + if (!envBool("PA_KEEP")) { + await chAdmin.command({ query: "DROP DATABASE IF EXISTS bench_pa" }); + } process.exit(0); } -main().catch((e) => { console.error("FAILED:", e); process.exit(1); }); + +try { + await main(); +} catch (e) { + console.error("FAILED:", e); + process.exit(1); +}