mirror of
https://github.com/stack-auth/stack.git
synced 2026-06-04 21:04:37 +08:00
Merge branch 'dev' into promptless/document-developer-tools
This commit is contained in:
commit
a2d92a55e8
119
apps/backend/src/lib/email-queue-step.test.tsx
Normal file
119
apps/backend/src/lib/email-queue-step.test.tsx
Normal file
@ -0,0 +1,119 @@
|
||||
import { EmailOutboxCreatedWith } from "@/generated/prisma/client";
|
||||
import { globalPrismaClient } from "@/prisma-client";
|
||||
import { afterAll, describe, expect, it } from "vitest";
|
||||
import { _forTesting } from "./email-queue-step";
|
||||
import { DEFAULT_BRANCH_ID, getSoleTenancyFromProjectBranch } from "./tenancies";
|
||||
|
||||
const { failEmailsStuckInSending, STUCK_EMAIL_TIMEOUT_MS } = _forTesting;
|
||||
|
||||
// These tests connect to the real dev DB (like payments.test.tsx) and create real EmailOutbox
|
||||
// rows against the seeded `internal` tenancy. Each row is tagged with a unique tsxSource so we
|
||||
// can find and clean up just our test rows.
|
||||
describe.sequential("failEmailsStuckInSending", () => {
|
||||
const testRunTag = `stuck-in-sending-test-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
const createdIds: { tenancyId: string, id: string }[] = [];
|
||||
|
||||
const recoveryTestFilter = { tsxSource: `/* ${testRunTag} */` };
|
||||
|
||||
const makeRow = async (params: {
|
||||
startedSendingAt: Date | null,
|
||||
finishedSendingAt?: Date | null,
|
||||
isPaused?: boolean,
|
||||
sendRetries?: number,
|
||||
nextSendRetryAt?: Date | null,
|
||||
}) => {
|
||||
const tenancy = await getSoleTenancyFromProjectBranch("internal", DEFAULT_BRANCH_ID);
|
||||
const created = await globalPrismaClient.emailOutbox.create({
|
||||
data: {
|
||||
tenancyId: tenancy.id,
|
||||
tsxSource: recoveryTestFilter.tsxSource,
|
||||
themeId: null,
|
||||
isHighPriority: false,
|
||||
to: { type: "custom-emails", emails: ["stuck-test@example.com"] },
|
||||
extraRenderVariables: {},
|
||||
shouldSkipDeliverabilityCheck: true,
|
||||
createdWith: EmailOutboxCreatedWith.PROGRAMMATIC_CALL,
|
||||
scheduledAt: new Date(0),
|
||||
isQueued: true,
|
||||
renderedByWorkerId: "00000000-0000-0000-0000-000000000000",
|
||||
startedRenderingAt: new Date(0),
|
||||
finishedRenderingAt: new Date(0),
|
||||
renderedHtml: "<p>stuck</p>",
|
||||
renderedText: "stuck",
|
||||
renderedSubject: "stuck",
|
||||
renderedIsTransactional: false,
|
||||
startedSendingAt: params.startedSendingAt,
|
||||
finishedSendingAt: params.finishedSendingAt ?? null,
|
||||
sendRetries: params.sendRetries ?? 0,
|
||||
nextSendRetryAt: params.nextSendRetryAt ?? null,
|
||||
isPaused: params.isPaused ?? false,
|
||||
},
|
||||
});
|
||||
createdIds.push({ tenancyId: created.tenancyId, id: created.id });
|
||||
return created;
|
||||
};
|
||||
|
||||
afterAll(async () => {
|
||||
for (const { tenancyId, id } of createdIds) {
|
||||
await globalPrismaClient.emailOutbox.deleteMany({ where: { tenancyId, id } });
|
||||
}
|
||||
});
|
||||
|
||||
it("marks a row as failed when startedSendingAt is older than the stuck timeout", async () => {
|
||||
const longAgo = new Date(Date.now() - STUCK_EMAIL_TIMEOUT_MS - 60_000);
|
||||
const row = await makeRow({
|
||||
startedSendingAt: longAgo,
|
||||
sendRetries: 1,
|
||||
nextSendRetryAt: new Date(Date.now() + 60_000),
|
||||
});
|
||||
|
||||
await failEmailsStuckInSending(recoveryTestFilter);
|
||||
|
||||
const after = await globalPrismaClient.emailOutbox.findUniqueOrThrow({
|
||||
where: { tenancyId_id: { tenancyId: row.tenancyId, id: row.id } },
|
||||
});
|
||||
expect(after.finishedSendingAt).not.toBeNull();
|
||||
expect(after.startedSendingAt?.toISOString()).toBe(row.startedSendingAt?.toISOString());
|
||||
expect(after.canHaveDeliveryInfo).toBe(false);
|
||||
expect(after.sendServerErrorExternalMessage).toMatch(/timed out/i);
|
||||
expect(after.sendServerErrorInternalMessage).toMatch(/stuck in sending/i);
|
||||
expect(after.sendServerErrorInternalMessage).toMatch(/terminal server error/i);
|
||||
// Must be a terminal state — no retry scheduled.
|
||||
expect(after.nextSendRetryAt).toBeNull();
|
||||
// sendRetries is not bumped by this path (we never attempted the send again).
|
||||
expect(after.sendRetries).toBe(row.sendRetries);
|
||||
// Status must be SERVER_ERROR, not SENDING.
|
||||
expect(after.status).toBe("SERVER_ERROR");
|
||||
});
|
||||
|
||||
it("does not touch a row that started sending recently", async () => {
|
||||
const recently = new Date(Date.now() - 1000);
|
||||
const row = await makeRow({ startedSendingAt: recently });
|
||||
|
||||
await failEmailsStuckInSending(recoveryTestFilter);
|
||||
|
||||
const after = await globalPrismaClient.emailOutbox.findUniqueOrThrow({
|
||||
where: { tenancyId_id: { tenancyId: row.tenancyId, id: row.id } },
|
||||
});
|
||||
expect(after.finishedSendingAt).toBeNull();
|
||||
expect(after.sendServerErrorExternalMessage).toBeNull();
|
||||
expect(after.status).toBe("SENDING");
|
||||
});
|
||||
|
||||
it("does not re-queue rows already marked failed for another send attempt", async () => {
|
||||
const longAgo = new Date(Date.now() - STUCK_EMAIL_TIMEOUT_MS - 60_000);
|
||||
const row = await makeRow({ startedSendingAt: longAgo });
|
||||
|
||||
await failEmailsStuckInSending(recoveryTestFilter);
|
||||
// A second pass should be a no-op for this row: it's already terminal, so it must not
|
||||
// become a candidate for re-sending (which could duplicate an already-accepted delivery).
|
||||
await failEmailsStuckInSending(recoveryTestFilter);
|
||||
|
||||
const after = await globalPrismaClient.emailOutbox.findUniqueOrThrow({
|
||||
where: { tenancyId_id: { tenancyId: row.tenancyId, id: row.id } },
|
||||
});
|
||||
expect(after.nextSendRetryAt).toBeNull();
|
||||
expect(after.isQueued).toBe(true); // unchanged: we do not unclaim stuck rows
|
||||
expect(after.status).toBe("SERVER_ERROR");
|
||||
});
|
||||
});
|
||||
@ -97,7 +97,7 @@ export const runEmailQueueStep = withTraceSpan("runEmailQueueStep", async () =>
|
||||
|
||||
const sendPlan = await withTraceSpan("runEmailQueueStep-prepareSendPlan", prepareSendPlan)(deltaSeconds);
|
||||
await withTraceSpan("runEmailQueueStep-processSendPlan", processSendPlan)(sendPlan);
|
||||
await withTraceSpan("runEmailQueueStep-logEmailsStuckInSending", logEmailsStuckInSending)();
|
||||
await withTraceSpan("runEmailQueueStep-failEmailsStuckInSending", failEmailsStuckInSending)();
|
||||
const sendEnd = performance.now();
|
||||
|
||||
if (sendPlan.length > 0 || queuedCount > 0 || pendingRender.length > 0) {
|
||||
@ -134,25 +134,73 @@ async function retryEmailsStuckInRendering(): Promise<void> {
|
||||
}
|
||||
}
|
||||
|
||||
async function logEmailsStuckInSending(): Promise<void> {
|
||||
const res = await globalPrismaClient.emailOutbox.findMany({
|
||||
where: {
|
||||
startedSendingAt: {
|
||||
lte: new Date(Date.now() - STUCK_EMAIL_TIMEOUT_MS),
|
||||
},
|
||||
finishedSendingAt: null,
|
||||
skippedReason: null,
|
||||
isPaused: false,
|
||||
/**
|
||||
* Mark rows stuck in SENDING for longer than STUCK_EMAIL_TIMEOUT_MS as failed (terminal server error).
|
||||
*
|
||||
* These rows can happen when sending hangs or fails after `startedSendingAt` was set but before
|
||||
* the send attempt was finalized. We deliberately do NOT retry automatically: the upstream send
|
||||
* may have actually been accepted by the SMTP/provider, and retrying would risk a duplicate
|
||||
* delivery. Instead, we mark the row as a terminal server error with delivery status unknown,
|
||||
* and emit a Sentry signal so a human can investigate if desired.
|
||||
*/
|
||||
async function failEmailsStuckInSending(additionalWhere?: Prisma.EmailOutboxWhereInput): Promise<void> {
|
||||
const stuckCutoff = new Date(Date.now() - STUCK_EMAIL_TIMEOUT_MS);
|
||||
const failedAt = new Date();
|
||||
const externalMessage = "Email sending timed out before we could confirm whether it was delivered.";
|
||||
const internalDetails: Prisma.InputJsonObject = {
|
||||
errorType: "stuck-in-sending-marked-failed",
|
||||
stuckTimeoutMs: STUCK_EMAIL_TIMEOUT_MS,
|
||||
failedAt: failedAt.toISOString(),
|
||||
};
|
||||
const externalDetails: Prisma.InputJsonObject = {
|
||||
errorType: "stuck-in-sending-marked-failed",
|
||||
};
|
||||
|
||||
const baseWhere: Prisma.EmailOutboxWhereInput = {
|
||||
startedSendingAt: { lte: stuckCutoff },
|
||||
finishedSendingAt: null,
|
||||
skippedReason: null,
|
||||
isPaused: false,
|
||||
};
|
||||
|
||||
const failed = await globalPrismaClient.emailOutbox.updateManyAndReturn({
|
||||
where: additionalWhere == null ? baseWhere : { AND: [baseWhere, additionalWhere] },
|
||||
data: {
|
||||
finishedSendingAt: failedAt,
|
||||
// canHaveDeliveryInfo must be non-null when finishedSendingAt is set (see schema).
|
||||
// We set it to false because we have no webhook/provider signal we can correlate: delivery
|
||||
// status is effectively unknown, and treating it as "no delivery info available" is the
|
||||
// honest representation.
|
||||
canHaveDeliveryInfo: false,
|
||||
sendServerErrorExternalMessage: externalMessage,
|
||||
sendServerErrorExternalDetails: externalDetails,
|
||||
sendServerErrorInternalMessage: `Email was stuck in sending (startedSendingAt <= ${stuckCutoff.toISOString()}) and was marked as a terminal server error by the email queue step (delivery status unknown). Not retried to avoid duplicate delivery.`,
|
||||
sendServerErrorInternalDetails: internalDetails,
|
||||
nextSendRetryAt: null,
|
||||
// Do not append to sendAttemptErrors here: this path did not observe a provider response
|
||||
// or application error for this attempt, and the terminal server-error fields already
|
||||
// record the failure without duplicating potentially sensitive details.
|
||||
shouldUpdateSequenceId: true,
|
||||
},
|
||||
select: { id: true, tenancyId: true, startedSendingAt: true, to: true, sentAt: true, sendAttemptErrors: true },
|
||||
select: { id: true, tenancyId: true, startedSendingAt: true },
|
||||
});
|
||||
if (res.length > 0) {
|
||||
captureError("email-queue-step-stuck-in-sending", new StackAssertionError(`${res.length} emails stuck in sending! This should never happen. It was NOT correctly marked as an error! Manual intervention is required.`, {
|
||||
emails: res,
|
||||
}));
|
||||
|
||||
if (failed.length > 0) {
|
||||
captureError(
|
||||
"email-queue-step-stuck-in-sending",
|
||||
new StackAssertionError(
|
||||
`${failed.length} emails were stuck in sending and were marked as failed (terminal server errors; delivery status unknown, not retried to avoid duplicate sends). Manual investigation is recommended.`,
|
||||
{ emails: failed.map(({ id, tenancyId, startedSendingAt }) => ({ id, tenancyId, startedSendingAt })) },
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export const _forTesting = {
|
||||
failEmailsStuckInSending,
|
||||
STUCK_EMAIL_TIMEOUT_MS,
|
||||
};
|
||||
|
||||
async function updateLastExecutionTime(): Promise<number> {
|
||||
const key = "EMAIL_QUEUE_METADATA_KEY";
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user