Merge branch 'dev' into promptless/document-custom-dashboards

This commit is contained in:
promptless[bot] 2026-04-24 18:01:39 +00:00
commit 4e34ba1a29
2 changed files with 182 additions and 15 deletions

View File

@ -0,0 +1,119 @@
import { EmailOutboxCreatedWith } from "@/generated/prisma/client";
import { globalPrismaClient } from "@/prisma-client";
import { afterAll, describe, expect, it } from "vitest";
import { _forTesting } from "./email-queue-step";
import { DEFAULT_BRANCH_ID, getSoleTenancyFromProjectBranch } from "./tenancies";
const { failEmailsStuckInSending, STUCK_EMAIL_TIMEOUT_MS } = _forTesting;
// These tests connect to the real dev DB (like payments.test.tsx) and create real EmailOutbox
// rows against the seeded `internal` tenancy. Each row is tagged with a unique tsxSource so we
// can find and clean up just our test rows.
describe.sequential("failEmailsStuckInSending", () => {
const testRunTag = `stuck-in-sending-test-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
const createdIds: { tenancyId: string, id: string }[] = [];
const recoveryTestFilter = { tsxSource: `/* ${testRunTag} */` };
const makeRow = async (params: {
startedSendingAt: Date | null,
finishedSendingAt?: Date | null,
isPaused?: boolean,
sendRetries?: number,
nextSendRetryAt?: Date | null,
}) => {
const tenancy = await getSoleTenancyFromProjectBranch("internal", DEFAULT_BRANCH_ID);
const created = await globalPrismaClient.emailOutbox.create({
data: {
tenancyId: tenancy.id,
tsxSource: recoveryTestFilter.tsxSource,
themeId: null,
isHighPriority: false,
to: { type: "custom-emails", emails: ["stuck-test@example.com"] },
extraRenderVariables: {},
shouldSkipDeliverabilityCheck: true,
createdWith: EmailOutboxCreatedWith.PROGRAMMATIC_CALL,
scheduledAt: new Date(0),
isQueued: true,
renderedByWorkerId: "00000000-0000-0000-0000-000000000000",
startedRenderingAt: new Date(0),
finishedRenderingAt: new Date(0),
renderedHtml: "<p>stuck</p>",
renderedText: "stuck",
renderedSubject: "stuck",
renderedIsTransactional: false,
startedSendingAt: params.startedSendingAt,
finishedSendingAt: params.finishedSendingAt ?? null,
sendRetries: params.sendRetries ?? 0,
nextSendRetryAt: params.nextSendRetryAt ?? null,
isPaused: params.isPaused ?? false,
},
});
createdIds.push({ tenancyId: created.tenancyId, id: created.id });
return created;
};
afterAll(async () => {
for (const { tenancyId, id } of createdIds) {
await globalPrismaClient.emailOutbox.deleteMany({ where: { tenancyId, id } });
}
});
it("marks a row as failed when startedSendingAt is older than the stuck timeout", async () => {
const longAgo = new Date(Date.now() - STUCK_EMAIL_TIMEOUT_MS - 60_000);
const row = await makeRow({
startedSendingAt: longAgo,
sendRetries: 1,
nextSendRetryAt: new Date(Date.now() + 60_000),
});
await failEmailsStuckInSending(recoveryTestFilter);
const after = await globalPrismaClient.emailOutbox.findUniqueOrThrow({
where: { tenancyId_id: { tenancyId: row.tenancyId, id: row.id } },
});
expect(after.finishedSendingAt).not.toBeNull();
expect(after.startedSendingAt?.toISOString()).toBe(row.startedSendingAt?.toISOString());
expect(after.canHaveDeliveryInfo).toBe(false);
expect(after.sendServerErrorExternalMessage).toMatch(/timed out/i);
expect(after.sendServerErrorInternalMessage).toMatch(/stuck in sending/i);
expect(after.sendServerErrorInternalMessage).toMatch(/terminal server error/i);
// Must be a terminal state — no retry scheduled.
expect(after.nextSendRetryAt).toBeNull();
// sendRetries is not bumped by this path (we never attempted the send again).
expect(after.sendRetries).toBe(row.sendRetries);
// Status must be SERVER_ERROR, not SENDING.
expect(after.status).toBe("SERVER_ERROR");
});
it("does not touch a row that started sending recently", async () => {
const recently = new Date(Date.now() - 1000);
const row = await makeRow({ startedSendingAt: recently });
await failEmailsStuckInSending(recoveryTestFilter);
const after = await globalPrismaClient.emailOutbox.findUniqueOrThrow({
where: { tenancyId_id: { tenancyId: row.tenancyId, id: row.id } },
});
expect(after.finishedSendingAt).toBeNull();
expect(after.sendServerErrorExternalMessage).toBeNull();
expect(after.status).toBe("SENDING");
});
it("does not re-queue rows already marked failed for another send attempt", async () => {
const longAgo = new Date(Date.now() - STUCK_EMAIL_TIMEOUT_MS - 60_000);
const row = await makeRow({ startedSendingAt: longAgo });
await failEmailsStuckInSending(recoveryTestFilter);
// A second pass should be a no-op for this row: it's already terminal, so it must not
// become a candidate for re-sending (which could duplicate an already-accepted delivery).
await failEmailsStuckInSending(recoveryTestFilter);
const after = await globalPrismaClient.emailOutbox.findUniqueOrThrow({
where: { tenancyId_id: { tenancyId: row.tenancyId, id: row.id } },
});
expect(after.nextSendRetryAt).toBeNull();
expect(after.isQueued).toBe(true); // unchanged: we do not unclaim stuck rows
expect(after.status).toBe("SERVER_ERROR");
});
});

View File

@ -97,7 +97,7 @@ export const runEmailQueueStep = withTraceSpan("runEmailQueueStep", async () =>
const sendPlan = await withTraceSpan("runEmailQueueStep-prepareSendPlan", prepareSendPlan)(deltaSeconds);
await withTraceSpan("runEmailQueueStep-processSendPlan", processSendPlan)(sendPlan);
await withTraceSpan("runEmailQueueStep-logEmailsStuckInSending", logEmailsStuckInSending)();
await withTraceSpan("runEmailQueueStep-failEmailsStuckInSending", failEmailsStuckInSending)();
const sendEnd = performance.now();
if (sendPlan.length > 0 || queuedCount > 0 || pendingRender.length > 0) {
@ -134,25 +134,73 @@ async function retryEmailsStuckInRendering(): Promise<void> {
}
}
async function logEmailsStuckInSending(): Promise<void> {
const res = await globalPrismaClient.emailOutbox.findMany({
where: {
startedSendingAt: {
lte: new Date(Date.now() - STUCK_EMAIL_TIMEOUT_MS),
},
finishedSendingAt: null,
skippedReason: null,
isPaused: false,
/**
* Mark rows stuck in SENDING for longer than STUCK_EMAIL_TIMEOUT_MS as failed (terminal server error).
*
* These rows can happen when sending hangs or fails after `startedSendingAt` was set but before
* the send attempt was finalized. We deliberately do NOT retry automatically: the upstream send
* may have actually been accepted by the SMTP/provider, and retrying would risk a duplicate
* delivery. Instead, we mark the row as a terminal server error with delivery status unknown,
* and emit a Sentry signal so a human can investigate if desired.
*/
async function failEmailsStuckInSending(additionalWhere?: Prisma.EmailOutboxWhereInput): Promise<void> {
const stuckCutoff = new Date(Date.now() - STUCK_EMAIL_TIMEOUT_MS);
const failedAt = new Date();
const externalMessage = "Email sending timed out before we could confirm whether it was delivered.";
const internalDetails: Prisma.InputJsonObject = {
errorType: "stuck-in-sending-marked-failed",
stuckTimeoutMs: STUCK_EMAIL_TIMEOUT_MS,
failedAt: failedAt.toISOString(),
};
const externalDetails: Prisma.InputJsonObject = {
errorType: "stuck-in-sending-marked-failed",
};
const baseWhere: Prisma.EmailOutboxWhereInput = {
startedSendingAt: { lte: stuckCutoff },
finishedSendingAt: null,
skippedReason: null,
isPaused: false,
};
const failed = await globalPrismaClient.emailOutbox.updateManyAndReturn({
where: additionalWhere == null ? baseWhere : { AND: [baseWhere, additionalWhere] },
data: {
finishedSendingAt: failedAt,
// canHaveDeliveryInfo must be non-null when finishedSendingAt is set (see schema).
// We set it to false because we have no webhook/provider signal we can correlate: delivery
// status is effectively unknown, and treating it as "no delivery info available" is the
// honest representation.
canHaveDeliveryInfo: false,
sendServerErrorExternalMessage: externalMessage,
sendServerErrorExternalDetails: externalDetails,
sendServerErrorInternalMessage: `Email was stuck in sending (startedSendingAt <= ${stuckCutoff.toISOString()}) and was marked as a terminal server error by the email queue step (delivery status unknown). Not retried to avoid duplicate delivery.`,
sendServerErrorInternalDetails: internalDetails,
nextSendRetryAt: null,
// Do not append to sendAttemptErrors here: this path did not observe a provider response
// or application error for this attempt, and the terminal server-error fields already
// record the failure without duplicating potentially sensitive details.
shouldUpdateSequenceId: true,
},
select: { id: true, tenancyId: true, startedSendingAt: true, to: true, sentAt: true, sendAttemptErrors: true },
select: { id: true, tenancyId: true, startedSendingAt: true },
});
if (res.length > 0) {
captureError("email-queue-step-stuck-in-sending", new StackAssertionError(`${res.length} emails stuck in sending! This should never happen. It was NOT correctly marked as an error! Manual intervention is required.`, {
emails: res,
}));
if (failed.length > 0) {
captureError(
"email-queue-step-stuck-in-sending",
new StackAssertionError(
`${failed.length} emails were stuck in sending and were marked as failed (terminal server errors; delivery status unknown, not retried to avoid duplicate sends). Manual investigation is recommended.`,
{ emails: failed.map(({ id, tenancyId, startedSendingAt }) => ({ id, tenancyId, startedSendingAt })) },
),
);
}
}
export const _forTesting = {
failEmailsStuckInSending,
STUCK_EMAIL_TIMEOUT_MS,
};
async function updateLastExecutionTime(): Promise<number> {
const key = "EMAIL_QUEUE_METADATA_KEY";