Speed up seed script by a lot

This commit is contained in:
Konstantin Wohlwend 2026-04-18 17:29:20 -07:00
parent f85b4f3997
commit 1594ed94d5
2 changed files with 248 additions and 149 deletions

View File

@ -20,7 +20,35 @@ import { typedEntries, typedFromEntries } from '@stackframe/stack-shared/dist/ut
const DUMMY_PROJECT_ID = '6fbbf22e-f4b2-4c6e-95a1-beab6fa41063';
let didEnableSeedLogTimestamps = false;
function enableSeedLogTimestamps() {
if (didEnableSeedLogTimestamps) return;
didEnableSeedLogTimestamps = true;
const originalLog = console.log.bind(console);
const originalInfo = console.info.bind(console);
const originalWarn = console.warn.bind(console);
const originalError = console.error.bind(console);
const withTimestamp = (...data: unknown[]) => [`[${new Date().toISOString()}]`, ...data];
console.log = (...data: Parameters<typeof console.log>) => {
originalLog(...withTimestamp(...data));
};
console.info = (...data: Parameters<typeof console.info>) => {
originalInfo(...withTimestamp(...data));
};
console.warn = (...data: Parameters<typeof console.warn>) => {
originalWarn(...withTimestamp(...data));
};
console.error = (...data: Parameters<typeof console.error>) => {
originalError(...withTimestamp(...data));
};
}
export async function seed() {
enableSeedLogTimestamps();
process.env.STACK_SEED_MODE = 'true';
console.log('Seeding database...');

View File

@ -1292,8 +1292,6 @@ const BULK_LAST_NAMES = [
'Moore', 'Hall', 'King', 'Wright', 'Green', 'Baker', 'Turner', 'Okafor',
'Suzuki', 'Schneider', 'Dubois', 'Rossi', 'Nakamura', 'Silva', 'Ivanov',
];
const BULK_OAUTH_PROVIDERS = ['google', 'github', 'microsoft'];
const BULK_REFERRERS = [
{ url: 'https://www.google.com/', weight: 32 },
{ url: 'https://github.com/', weight: 18 },
@ -1374,17 +1372,23 @@ async function seedDummySessionActivityEvents(options: SessionActivityEventSeedO
twoMonthsAgo.setMonth(twoMonthsAgo.getMonth() - 2);
const windowMs = todayUtc.getTime() - twoMonthsAgo.getTime();
const userEmails = Array.from(userEmailToId.keys());
const userIds = Array.from(userEmailToId.values());
const systemEventTypeIds = ['$session-activity', '$user-activity', '$project-activity', '$project'];
console.log(`Seeding session activity events for ${userEmails.length} users...`);
console.log(`Seeding session activity events for ${userIds.length} users...`);
for (const email of userEmails) {
const userId = userEmailToId.get(email);
if (!userId) continue;
const eventIpInfos: Prisma.EventIpInfoCreateManyInput[] = [];
const events: Prisma.EventCreateManyInput[] = [];
const clickhouseRows: Array<Record<string, unknown>> = [];
const clickhouseUrl = getEnvVariable('STACK_CLICKHOUSE_URL', '');
const shouldSeedClickhouse = clickhouseUrl !== '';
const clickhouseClient = shouldSeedClickhouse ? getClickhouseAdminClient() : null;
for (const userId of userIds) {
// Per-user seeded PRNG so event count, timestamps, and locations are
// deterministic across re-runs. Deterministic IDs mean upserts hit the
// same rows instead of duplicating them.
// deterministic across re-runs. Deterministic IDs mean seeded rows can be
// replaced in bulk while staying idempotent across runs.
const userRand = deterministicPrng(seedFromString(`session-events:${tenancyId}:${userId}`));
const eventCount = 15 + Math.floor(userRand() * 11); // 15-25 events
@ -1398,111 +1402,107 @@ async function seedDummySessionActivityEvents(options: SessionActivityEventSeedO
const ipInfoId = deterministicUuid(`event-ip-info:${tenancyId}:${userId}:${i}`);
const eventId = deterministicUuid(`event:${tenancyId}:${userId}:${i}`);
await globalPrismaClient.eventIpInfo.upsert({
where: { id: ipInfoId },
update: {
ip: ipAddress,
countryCode: location.countryCode,
regionCode: location.regionCode,
cityName: location.cityName,
latitude: location.latitude,
longitude: location.longitude,
tzIdentifier: location.tzIdentifier,
updatedAt: randomTime,
},
create: {
id: ipInfoId,
ip: ipAddress,
countryCode: location.countryCode,
regionCode: location.regionCode,
cityName: location.cityName,
latitude: location.latitude,
longitude: location.longitude,
tzIdentifier: location.tzIdentifier,
createdAt: randomTime,
updatedAt: randomTime,
},
eventIpInfos.push({
id: ipInfoId,
ip: ipAddress,
countryCode: location.countryCode,
regionCode: location.regionCode,
cityName: location.cityName,
latitude: location.latitude,
longitude: location.longitude,
tzIdentifier: location.tzIdentifier,
createdAt: randomTime,
updatedAt: randomTime,
});
await globalPrismaClient.event.upsert({
where: { id: eventId },
update: {
systemEventTypeIds: ['$session-activity', '$user-activity', '$project-activity', '$project'],
data: {
projectId,
branchId: DEFAULT_BRANCH_ID,
userId,
sessionId,
isAnonymous: false,
},
isEndUserIpInfoGuessTrusted: true,
endUserIpInfoGuessId: ipInfoId,
isWide: false,
eventStartedAt: randomTime,
eventEndedAt: randomTime,
updatedAt: randomTime,
},
create: {
id: eventId,
systemEventTypeIds: ['$session-activity', '$user-activity', '$project-activity', '$project'],
data: {
projectId,
branchId: DEFAULT_BRANCH_ID,
userId,
sessionId,
isAnonymous: false,
},
isEndUserIpInfoGuessTrusted: true,
endUserIpInfoGuessId: ipInfoId,
isWide: false,
eventStartedAt: randomTime,
eventEndedAt: randomTime,
createdAt: randomTime,
updatedAt: randomTime,
events.push({
id: eventId,
systemEventTypeIds,
data: {
projectId,
branchId: DEFAULT_BRANCH_ID,
userId,
sessionId,
isAnonymous: false,
},
isEndUserIpInfoGuessTrusted: true,
endUserIpInfoGuessId: ipInfoId,
isWide: false,
eventStartedAt: randomTime,
eventEndedAt: randomTime,
createdAt: randomTime,
updatedAt: randomTime,
});
// Also create $token-refresh events for ClickHouse (used by globe + analytics)
const clickhouseUrl = getEnvVariable("STACK_CLICKHOUSE_URL", "");
if (clickhouseUrl) {
const clickhouseClient = getClickhouseAdminClient();
await clickhouseClient.insert({
table: "analytics_internal.events",
values: [{
event_type: '$token-refresh',
event_at: randomTime,
data: {
refresh_token_id: refreshTokenId,
is_anonymous: false,
ip_info: {
ip: ipAddress,
is_trusted: true,
country_code: location.countryCode,
region_code: location.regionCode,
city_name: location.cityName,
latitude: location.latitude,
longitude: location.longitude,
tz_identifier: location.tzIdentifier,
},
},
project_id: projectId,
branch_id: DEFAULT_BRANCH_ID,
user_id: userId,
team_id: null,
if (clickhouseClient) {
clickhouseRows.push({
event_type: '$token-refresh',
event_at: randomTime,
data: {
refresh_token_id: refreshTokenId,
session_replay_id: null,
session_replay_segment_id: null,
}],
format: "JSONEachRow",
clickhouse_settings: {
date_time_input_format: "best_effort",
is_anonymous: false,
ip_info: {
ip: ipAddress,
is_trusted: true,
country_code: location.countryCode,
region_code: location.regionCode,
city_name: location.cityName,
latitude: location.latitude,
longitude: location.longitude,
tz_identifier: location.tzIdentifier,
},
},
project_id: projectId,
branch_id: DEFAULT_BRANCH_ID,
user_id: userId,
team_id: null,
refresh_token_id: refreshTokenId,
session_replay_id: null,
session_replay_segment_id: null,
});
}
}
}
console.log('Finished seeding session activity events');
await globalPrismaClient.$transaction(async (tx) => {
const eventIds = events.map((event) => event.id ?? throwErr('Seeded event row is missing id'));
const ipInfoIds = eventIpInfos.map((info) => info.id ?? throwErr('Seeded event IP info row is missing id'));
await tx.event.deleteMany({
where: {
id: { in: eventIds },
},
});
await tx.eventIpInfo.deleteMany({
where: {
id: { in: ipInfoIds },
},
});
await tx.eventIpInfo.createMany({
data: eventIpInfos,
});
await tx.event.createMany({
data: events,
});
});
if (clickhouseClient && clickhouseRows.length > 0) {
const BATCH_SIZE = 500;
for (let i = 0; i < clickhouseRows.length; i += BATCH_SIZE) {
await clickhouseClient.insert({
table: 'analytics_internal.events',
values: clickhouseRows.slice(i, i + BATCH_SIZE),
format: 'JSONEachRow',
clickhouse_settings: {
date_time_input_format: 'best_effort',
async_insert: 1,
},
});
}
}
console.log(`Finished seeding session activity events (${events.length} events)`);
}
/**
@ -1536,8 +1536,16 @@ async function seedBulkSignupsAndActivity(options: {
let created = 0;
let updated = 0;
const userActivity: Array<{ userId: string, signupDaysAgo: number, region: BulkActivityRegion }> = [];
const seedUsers: Array<{
index: number,
email: string,
displayName: string,
signedUpAt: Date,
signupDaysAgo: number,
region: BulkActivityRegion,
primaryEmailVerified: boolean,
projectUserId: string,
}> = [];
for (let i = 0; i < count; i++) {
const firstName = BULK_FIRST_NAMES[Math.floor(rand() * BULK_FIRST_NAMES.length)]!;
const lastName = BULK_LAST_NAMES[Math.floor(rand() * BULK_LAST_NAMES.length)]!;
@ -1545,62 +1553,100 @@ async function seedBulkSignupsAndActivity(options: {
const email = `${firstName.toLowerCase()}.${lastName.toLowerCase()}.signupseed${i}@dummy.dev`;
const signedUpAt = bulkRandomTimestampOnDay(now, dayOffsets[i]!, rand);
const region = pickBulkActivityRegion(rand);
const hasOauth = rand() > 0.55;
const oauthProvider = hasOauth
? [{ id: BULK_OAUTH_PROVIDERS[Math.floor(rand() * BULK_OAUTH_PROVIDERS.length)]!, account_id: `${email}-oauth`, email }]
: [];
const existing = await prisma.projectUser.findFirst({
where: {
tenancyId: tenancy.id,
contactChannels: { some: { type: 'EMAIL', value: email } },
},
select: { projectUserId: true },
const primaryEmailVerified = rand() > 0.25;
seedUsers.push({
index: i,
email,
displayName,
signedUpAt,
signupDaysAgo: dayOffsets[i]!,
region,
primaryEmailVerified,
projectUserId: deterministicUuid(`bulk-signup-user:${tenancy.id}:${email}`),
});
}
let userId: string;
if (existing) {
userId = existing.projectUserId;
updated++;
} else {
const createdUser = await usersCrudHandlers.adminCreate({
tenancy,
data: {
display_name: displayName,
primary_email: email,
primary_email_auth_enabled: true,
primary_email_verified: rand() > 0.25,
otp_auth_enabled: false,
is_anonymous: false,
oauth_providers: oauthProvider,
profile_image_url: null,
},
});
userId = createdUser.id;
const existingContactChannels = await prisma.contactChannel.findMany({
where: {
tenancyId: tenancy.id,
type: 'EMAIL',
isPrimary: 'TRUE',
usedForAuth: 'TRUE',
value: { in: seedUsers.map((seedUser) => seedUser.email) },
},
select: {
value: true,
projectUserId: true,
},
});
const existingUserIdByEmail = new Map<string, string>();
for (const existingContactChannel of existingContactChannels) {
const existingUserId = existingUserIdByEmail.get(existingContactChannel.value);
if (existingUserId != null && existingUserId !== existingContactChannel.projectUserId) {
throwErr(`Expected one authenticated user per seed email (${existingContactChannel.value}), found multiple project users`);
}
existingUserIdByEmail.set(existingContactChannel.value, existingContactChannel.projectUserId);
}
const projectUsersToCreate: Prisma.ProjectUserCreateManyInput[] = [];
const contactChannelsToCreate: Prisma.ContactChannelCreateManyInput[] = [];
const userActivity: Array<{ userId: string, signupDaysAgo: number, region: BulkActivityRegion, signedUpAt: Date }> = [];
for (const seedUser of seedUsers) {
const userId = existingUserIdByEmail.get(seedUser.email) ?? seedUser.projectUserId;
const existingUserId = existingUserIdByEmail.get(seedUser.email);
if (existingUserId == null) {
created++;
projectUsersToCreate.push({
tenancyId: tenancy.id,
projectUserId: userId,
mirroredProjectId: tenancy.project.id,
mirroredBranchId: tenancy.branchId,
displayName: seedUser.displayName,
isAnonymous: false,
createdAt: seedUser.signedUpAt,
lastActiveAt: seedUser.signedUpAt,
signedUpAt: seedUser.signedUpAt,
signUpRiskScoreBot: 0,
signUpRiskScoreFreeTrialAbuse: 0,
});
contactChannelsToCreate.push({
tenancyId: tenancy.id,
projectUserId: userId,
type: 'EMAIL',
isPrimary: 'TRUE',
usedForAuth: 'TRUE',
isVerified: seedUser.primaryEmailVerified,
value: seedUser.email,
createdAt: seedUser.signedUpAt,
updatedAt: seedUser.signedUpAt,
});
} else {
updated++;
}
await prisma.projectUser.updateMany({
where: { tenancyId: tenancy.id, projectUserId: userId },
data: { createdAt: signedUpAt, signedUpAt },
userActivity.push({
userId,
signupDaysAgo: seedUser.signupDaysAgo,
region: seedUser.region,
signedUpAt: seedUser.signedUpAt,
});
userActivity.push({ userId, signupDaysAgo: dayOffsets[i]!, region });
const ipInfoForUser = {
ip: bulkFakeIp(region.ipPrefix, rand),
ip: bulkFakeIp(seedUser.region.ipPrefix, rand),
is_trusted: true,
country_code: region.country,
region_code: region.region,
city_name: region.city,
latitude: region.lat,
longitude: region.lon,
tz_identifier: region.tz,
country_code: seedUser.region.country,
region_code: seedUser.region.region,
city_name: seedUser.region.city,
latitude: seedUser.region.lat,
longitude: seedUser.region.lon,
tz_identifier: seedUser.region.tz,
};
clickhouseRows.push({
event_type: '$token-refresh',
event_at: formatClickhouseTimestamp(signedUpAt),
event_at: formatClickhouseTimestamp(seedUser.signedUpAt),
data: {
refresh_token_id: generateUuid(),
is_anonymous: false,
@ -1612,11 +1658,36 @@ async function seedBulkSignupsAndActivity(options: {
team_id: null,
});
if ((i + 1) % 100 === 0) {
console.log(`[seed-activity] ${i + 1}/${count} users processed (${created} new, ${updated} updated)`);
if ((seedUser.index + 1) % 100 === 0) {
console.log(`[seed-activity] ${seedUser.index + 1}/${count} users processed (${created} new, ${updated} updated)`);
}
}
if (projectUsersToCreate.length > 0) {
await prisma.projectUser.createMany({
data: projectUsersToCreate,
skipDuplicates: true,
});
}
if (contactChannelsToCreate.length > 0) {
await prisma.contactChannel.createMany({
data: contactChannelsToCreate,
skipDuplicates: true,
});
}
if (userActivity.length > 0) {
const seededTimestampRows = userActivity.map((activity) => Prisma.sql`(${activity.userId}::uuid, ${activity.signedUpAt}::timestamptz)`);
await prisma.$executeRaw`
UPDATE "ProjectUser" AS pu
SET "createdAt" = seeded.signed_up_at,
"signedUpAt" = seeded.signed_up_at
FROM (VALUES ${Prisma.join(seededTimestampRows)}) AS seeded(project_user_id, signed_up_at)
WHERE pu."tenancyId" = ${tenancy.id}
AND pu."projectUserId" = seeded.project_user_id
`;
}
console.log(`[seed-activity] Generating multi-day activity events for ${userActivity.length} users...`);
for (const { userId, signupDaysAgo, region } of userActivity) {