mirror of
https://github.com/stack-auth/stack.git
synced 2026-06-04 21:04:37 +08:00
fix: refresh-token P2025 race with concurrent sign-out (#1372)
## Summary - Fixes Sentry [STACK-BACKEND-146](https://stackframe-pw.sentry.io/issues/7377768662/): `PrismaClientKnownRequestError` P2025 on `projectUserRefreshToken.update()` during token refresh. - Root cause: `generateAccessTokenFromRefreshTokenIfValid` (`apps/backend/src/lib/tokens.tsx`) reads the refresh-token row upstream, then issues `.update(...)` on it (and on `projectUser`) inside a `Promise.all`. If a concurrent sign-out (`DELETE /auth/sessions/current`), session revoke, password change, or user deletion removes the row between the read and the update, Prisma throws P2025 and the refresh endpoint 500s. ## Changes - `apps/backend/src/lib/tokens.tsx` — swap the two `.update(...)`s for `.updateMany(...)` so a missing row is a no-op, then re-check the refresh token still exists; return `null` if it doesn't. The refresh route already maps `null` -> `KnownErrors.RefreshTokenNotFoundOrExpired` (401), which is the correct user-facing behavior for a just-revoked session. - `apps/backend/src/oauth/model.tsx` — in `generateAccessToken`, replace the "ultra-rare race condition" `throwErr` fallback with `throw new KnownErrors.RefreshTokenNotFoundOrExpired()` so concurrent sign-out during an OAuth `refresh_token` grant returns a clean 401 instead of 500. - `apps/e2e/tests/backend/endpoints/api/v1/auth/sessions/current/refresh-race.test.ts` — new regression test that fires `POST /auth/sessions/current/refresh` and `DELETE /auth/sessions/current` concurrently with the same refresh token. Before the fix it 500s on the first iteration; after, it passes in ~12s. ## Test plan - [x] New regression test passes locally. - [x] Existing `auth/sessions/**` + `auth/oauth/token.test.ts` still pass (27 tests, 3 todo, 0 failed). - [ ] CI green. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Bug Fixes** * Refresh flows now detect a revoked or removed refresh token during concurrent operations and stop cleanly, preventing issuance of an access token from stale data. * A specific refresh-token-not-found/expired error is returned instead of a generic failure when refresh cannot proceed. * **Tests** * Added E2E tests exercising concurrent refresh vs sign-out to prevent race-condition crashes and validate safe handling of competing requests. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
parent
982b8fb2d9
commit
a132dd23f9
@ -248,31 +248,32 @@ export async function generateAccessTokenFromRefreshTokenIfValid(options: Refres
|
||||
// Get end user IP info for session tracking and event logging
|
||||
const ipInfo = await getEndUserIpInfoForEvent();
|
||||
|
||||
await Promise.all([
|
||||
prisma.projectUser.update({
|
||||
where: {
|
||||
tenancyId_projectUserId: {
|
||||
tenancyId: options.tenancy.id,
|
||||
projectUserId: options.refreshTokenObj.projectUserId,
|
||||
},
|
||||
},
|
||||
data: withExternalDbSyncUpdate({
|
||||
lastActiveAt: now,
|
||||
}),
|
||||
// updateMany (instead of update) so a concurrent sign-out / session revocation
|
||||
// that deletes the row between the caller's read and this write does not
|
||||
// surface as a P2025 500. Update the refresh-token row first so a revoked
|
||||
// session stops before touching projectUser.lastActiveAt.
|
||||
const refreshTokenUpdate = await globalPrismaClient.projectUserRefreshToken.updateMany({
|
||||
where: {
|
||||
tenancyId: options.tenancy.id,
|
||||
id: options.refreshTokenObj.id,
|
||||
},
|
||||
data: withExternalDbSyncUpdate({
|
||||
lastActiveAt: now,
|
||||
lastActiveAtIpInfo: ipInfo ?? undefined,
|
||||
}),
|
||||
globalPrismaClient.projectUserRefreshToken.update({
|
||||
where: {
|
||||
tenancyId_id: {
|
||||
tenancyId: options.tenancy.id,
|
||||
id: options.refreshTokenObj.id,
|
||||
},
|
||||
},
|
||||
data: withExternalDbSyncUpdate({
|
||||
lastActiveAt: now,
|
||||
lastActiveAtIpInfo: ipInfo ?? undefined,
|
||||
}),
|
||||
});
|
||||
if (refreshTokenUpdate.count === 0) return null;
|
||||
|
||||
const projectUserUpdate = await prisma.projectUser.updateMany({
|
||||
where: {
|
||||
tenancyId: options.tenancy.id,
|
||||
projectUserId: options.refreshTokenObj.projectUserId,
|
||||
},
|
||||
data: withExternalDbSyncUpdate({
|
||||
lastActiveAt: now,
|
||||
}),
|
||||
]);
|
||||
});
|
||||
if (projectUserUpdate.count === 0) return null;
|
||||
|
||||
// Log session activity event (used for metrics, geo info, etc.)
|
||||
await logEvent(
|
||||
|
||||
@ -9,7 +9,7 @@ import { createRefreshTokenObj, decodeAccessToken, generateAccessTokenFromRefres
|
||||
import { getPrismaClientForTenancy, globalPrismaClient } from "@/prisma-client";
|
||||
import { AuthorizationCode, AuthorizationCodeModel, Client, Falsey, RefreshToken, Token, User } from "@node-oauth/oauth2-server";
|
||||
import { KnownErrors } from "@stackframe/stack-shared";
|
||||
import { StackAssertionError, StatusError, captureError, throwErr } from "@stackframe/stack-shared/dist/utils/errors";
|
||||
import { StackAssertionError, StatusError, captureError } from "@stackframe/stack-shared/dist/utils/errors";
|
||||
import { getProjectBranchFromClientId } from ".";
|
||||
const PrismaClientKnownRequestError = Prisma.PrismaClientKnownRequestError;
|
||||
|
||||
@ -105,10 +105,16 @@ export class OAuthModel implements AuthorizationCodeModel {
|
||||
|
||||
const refreshTokenObj = await this._getOrCreateRefreshTokenObj(client, user, scope);
|
||||
|
||||
return await generateAccessTokenFromRefreshTokenIfValid({
|
||||
const accessToken = await generateAccessTokenFromRefreshTokenIfValid({
|
||||
tenancy,
|
||||
refreshTokenObj,
|
||||
}) ?? throwErr("Get or create refresh token failed; returned refreshTokenObj that's invalid (or maybe it's an ultra-rare race condition and it became invalid in since the function call?)", { refreshTokenObj }); // TODO fix the ultra-rare race condition — although unless we're at gigascale this should basically never happen
|
||||
});
|
||||
if (!accessToken) {
|
||||
// Either the refresh token became invalid between _getOrCreateRefreshTokenObj and now
|
||||
// (e.g. a concurrent sign-out deleted the row), or the user was deleted mid-flight.
|
||||
throw new KnownErrors.RefreshTokenNotFoundOrExpired();
|
||||
}
|
||||
return accessToken;
|
||||
}
|
||||
|
||||
async _getOrCreateRefreshTokenObj(client: Client, user: User, scope: string[]) {
|
||||
|
||||
@ -0,0 +1,116 @@
|
||||
import { randomUUID } from "node:crypto";
|
||||
import { generatedEmailSuffix, it } from "../../../../../../../helpers";
|
||||
import { Auth, backendContext, createMailbox, niceBackendFetch } from "../../../../../../backend-helpers";
|
||||
|
||||
type RaceFailure = {
|
||||
readonly request: "refresh" | "sign-out",
|
||||
readonly status?: number,
|
||||
readonly body?: unknown,
|
||||
readonly error?: unknown,
|
||||
};
|
||||
|
||||
function collectUnexpectedRaceResponseFailures(options: {
|
||||
readonly refreshResult: PromiseSettledResult<Awaited<ReturnType<typeof niceBackendFetch>>>,
|
||||
readonly signOutResult: PromiseSettledResult<Awaited<ReturnType<typeof niceBackendFetch>>>,
|
||||
}): RaceFailure[] {
|
||||
const failures: RaceFailure[] = [];
|
||||
const results: [RaceFailure["request"], PromiseSettledResult<Awaited<ReturnType<typeof niceBackendFetch>>>][] = [
|
||||
["refresh", options.refreshResult],
|
||||
["sign-out", options.signOutResult],
|
||||
];
|
||||
for (const [request, result] of results) {
|
||||
if (result.status === "rejected") {
|
||||
failures.push({ request, error: result.reason });
|
||||
continue;
|
||||
}
|
||||
if (result.value.status >= 500 || JSON.stringify(result.value.body).includes("P2025")) {
|
||||
failures.push({ request, status: result.value.status, body: result.value.body });
|
||||
}
|
||||
}
|
||||
return failures;
|
||||
}
|
||||
|
||||
// Guards Sentry STACK-BACKEND-146:
|
||||
// PrismaClientKnownRequestError P2025 on projectUserRefreshToken.update()
|
||||
// caused by the refresh endpoint reading the token, then calling update()
|
||||
// after a concurrent sign-out has deleted the row.
|
||||
it("does not 500 when a refresh races with a sign-out of the same session", { timeout: 120_000 }, async ({ expect }) => {
|
||||
// Fire many refresh+signout pairs concurrently to hit the race window
|
||||
// between findFirst(refreshToken) and projectUserRefreshToken.update().
|
||||
const ATTEMPTS = 10;
|
||||
const failures: RaceFailure[] = [];
|
||||
|
||||
for (let i = 0; i < ATTEMPTS; i++) {
|
||||
backendContext.set({
|
||||
mailbox: createMailbox(`refresh-race--${randomUUID()}${generatedEmailSuffix}`),
|
||||
userAuth: null,
|
||||
});
|
||||
await Auth.Password.signUpWithEmail();
|
||||
const rt = backendContext.value.userAuth!.refreshToken!;
|
||||
|
||||
const refreshP = niceBackendFetch("/api/v1/auth/sessions/current/refresh", {
|
||||
method: "POST",
|
||||
accessType: "client",
|
||||
headers: { "x-stack-refresh-token": rt },
|
||||
});
|
||||
const signOutP = niceBackendFetch("/api/v1/auth/sessions/current", {
|
||||
method: "DELETE",
|
||||
accessType: "client",
|
||||
});
|
||||
|
||||
const [refreshResult, signOutResult] = await Promise.allSettled([refreshP, signOutP]);
|
||||
failures.push(...collectUnexpectedRaceResponseFailures({ refreshResult, signOutResult }));
|
||||
|
||||
// Acceptable outcomes:
|
||||
// 200 (refresh won the race)
|
||||
// 401 REFRESH_TOKEN_NOT_FOUND_OR_EXPIRED (sign-out won cleanly)
|
||||
// Bug outcome: 500 with Prisma P2025 bubbling out as an unhandled error.
|
||||
if (refreshResult.status === "fulfilled" && refreshResult.value.status !== 200 && refreshResult.value.status !== 401) {
|
||||
failures.push({ request: "refresh", status: refreshResult.value.status, body: refreshResult.value.body });
|
||||
}
|
||||
}
|
||||
|
||||
expect(failures).toEqual([]);
|
||||
});
|
||||
|
||||
it("does not 500 when an OAuth refresh-token grant races with a sign-out of the same session", { timeout: 120_000 }, async ({ expect }) => {
|
||||
// The OAuth token endpoint uses the same refresh-token helper as the direct
|
||||
// session refresh endpoint, so keep this regression covered on both callers.
|
||||
const ATTEMPTS = 10;
|
||||
const failures: RaceFailure[] = [];
|
||||
|
||||
for (let i = 0; i < ATTEMPTS; i++) {
|
||||
backendContext.set({
|
||||
mailbox: createMailbox(`oauth-refresh-race--${randomUUID()}${generatedEmailSuffix}`),
|
||||
userAuth: null,
|
||||
});
|
||||
await Auth.Password.signUpWithEmail();
|
||||
const rt = backendContext.value.userAuth!.refreshToken!;
|
||||
const projectKeys = backendContext.value.projectKeys;
|
||||
if (projectKeys === "no-project") throw new Error("No project keys found in the backend context");
|
||||
|
||||
const refreshP = niceBackendFetch("/api/v1/auth/oauth/token", {
|
||||
method: "POST",
|
||||
accessType: "client",
|
||||
body: {
|
||||
grant_type: "refresh_token",
|
||||
client_id: projectKeys.projectId,
|
||||
client_secret: projectKeys.publishableClientKey,
|
||||
refresh_token: rt,
|
||||
},
|
||||
});
|
||||
const signOutP = niceBackendFetch("/api/v1/auth/sessions/current", {
|
||||
method: "DELETE",
|
||||
accessType: "client",
|
||||
});
|
||||
|
||||
const [refreshResult, signOutResult] = await Promise.allSettled([refreshP, signOutP]);
|
||||
failures.push(...collectUnexpectedRaceResponseFailures({ refreshResult, signOutResult }));
|
||||
|
||||
if (refreshResult.status === "fulfilled" && refreshResult.value.status !== 200 && refreshResult.value.status !== 401) {
|
||||
failures.push({ request: "refresh", status: refreshResult.value.status, body: refreshResult.value.body });
|
||||
}
|
||||
}
|
||||
|
||||
expect(failures).toEqual([]);
|
||||
});
|
||||
Loading…
Reference in New Issue
Block a user