From 43d3dcef80d3e7b2e8e762b7c008bd608986ad8d Mon Sep 17 00:00:00 2001 From: Ian Chen Date: Thu, 10 Oct 2024 13:42:01 +0800 Subject: [PATCH] PLU-330: Auto-retry cloudflare intermittent error codes 502,504,520 (#769) ## Problem Postman email is returning 502, 504 and 520 intermittently. ## Solution Auto-retry for users --- .../actions/send-transactional-email.test.ts | 90 +++++++++++++++++++ .../apps/postman/common/data-out-validator.ts | 1 + .../src/apps/postman/common/email-helper.ts | 15 ++-- .../src/apps/postman/common/throw-errors.ts | 15 ++++ 4 files changed, 116 insertions(+), 5 deletions(-) diff --git a/packages/backend/src/apps/postman/__tests__/actions/send-transactional-email.test.ts b/packages/backend/src/apps/postman/__tests__/actions/send-transactional-email.test.ts index 1915a06db..3157bbef8 100644 --- a/packages/backend/src/apps/postman/__tests__/actions/send-transactional-email.test.ts +++ b/packages/backend/src/apps/postman/__tests__/actions/send-transactional-email.test.ts @@ -5,6 +5,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' import HttpError from '@/errors/http' import PartialStepError from '@/errors/partial-error' +import RetriableError from '@/errors/retriable-error' import StepError from '@/errors/step' import sendTransactionalEmail from '../../actions/send-transactional-email' @@ -316,6 +317,7 @@ describe('send transactional email', () => { }, } as AxiosError), ) + await expect(sendTransactionalEmail.run($)).rejects.toThrowError(StepError) expect($.setActionItem).toHaveBeenCalledWith({ raw: { @@ -337,6 +339,57 @@ describe('send transactional email', () => { }) }) + it('should retry on 502, 504, 520', async () => { + const recipients = [ + 'recipient1@open.gov.sg', + 'recipient2@open.gov.sg', + 'recipient3@open.gov.sg', + ] + $.step.parameters.destinationEmail = recipients.join(',') + $.http.post = vi + .fn() + .mockResolvedValueOnce({ + data: { + params: { + body: 'test body', + subject: 'test subject', + from: 'jack', + reply_to: 'replyTo@open.gov.sg', + }, + }, + }) + .mockRejectedValueOnce( + new HttpError({ + response: { + data: 'cloudflare error', + status: 500, + statusText: 'Too Many Requests', + }, + } as AxiosError), + ) + .mockRejectedValueOnce( + new HttpError({ + response: { + data: 'cloudflare error', + status: 502, + statusText: 'Too Many Requests', + }, + } as AxiosError), + ) + + await expect(sendTransactionalEmail.run($)).rejects.toThrow(RetriableError) + expect($.setActionItem).toHaveBeenCalledWith({ + raw: { + status: ['ACCEPTED', 'ERROR', 'INTERMITTENT-ERROR'], + recipient: recipients, + subject: 'test subject', + body: 'test body', + from: 'jack', + reply_to: 'replyTo@open.gov.sg', + }, + }) + }) + it('should only retry to non-accepted emails', async () => { const recipients = [ 'recipient1@open.gov.sg', @@ -367,4 +420,41 @@ describe('send transactional email', () => { }) expect(mocks.sendBlacklistEmail).not.toHaveBeenCalled() }) + + it('should only retry to non-accepted emails', async () => { + const recipients = [ + 'recipient1@open.gov.sg', + 'recipient2@open.gov.sg', + 'recipient3@open.gov.sg', + 'recipient4@open.gov.sg', + 'recipient5@open.gov.sg', + ] + $.step.parameters.destinationEmail = recipients.join(',') + $.getLastExecutionStep = vi.fn().mockResolvedValueOnce({ + status: 'success', + errorDetails: 'error error', + dataOut: { + status: [ + 'BLACKLISTED', + 'ACCEPTED', + 'INTERMITTENT-ERROR', + 'ERROR', + 'RATE-LIMITED', + ], + recipient: recipients, + }, + }) + await expect(sendTransactionalEmail.run($)).resolves.not.toThrow() + expect($.http.post).toBeCalledTimes(4) + expect($.setActionItem).toHaveBeenCalledWith({ + raw: { + status: ['ACCEPTED', 'ACCEPTED', 'ACCEPTED', 'ACCEPTED', 'ACCEPTED'], + recipient: recipients, + subject: 'test subject', + body: 'test body', + from: 'jack', + reply_to: 'replyTo@open.gov.sg', + }, + }) + }) }) diff --git a/packages/backend/src/apps/postman/common/data-out-validator.ts b/packages/backend/src/apps/postman/common/data-out-validator.ts index 226df7993..2439f74cc 100644 --- a/packages/backend/src/apps/postman/common/data-out-validator.ts +++ b/packages/backend/src/apps/postman/common/data-out-validator.ts @@ -8,6 +8,7 @@ export const dataOutSchema = z.object( 'BLACKLISTED', 'RATE-LIMITED', 'INVALID-ATTACHMENT', + 'INTERMITTENT-ERROR', 'ERROR', ]), ), diff --git a/packages/backend/src/apps/postman/common/email-helper.ts b/packages/backend/src/apps/postman/common/email-helper.ts index 3c9fa4e28..905bfd93b 100644 --- a/packages/backend/src/apps/postman/common/email-helper.ts +++ b/packages/backend/src/apps/postman/common/email-helper.ts @@ -159,13 +159,18 @@ export async function sendTransactionalEmails( * Since we can only return one error per postman step, we have to select in terms of priority * 1. RATE-LIMITED (so we can auto-retry) * 2. INVALID-ATTACHMENT (probably all recipients should fail) - * 3. ERROR (same as above, probably all recipients should fail) - * 4. BLACKLISTED (blacklisted errors are returned even if there are other errors like invalid attachment) + * 3. INTERMITTENT-ERROR (some recipients failed, auto-retry) + * 4. ERROR (probably all recipients should fail) + * 5. BLACKLISTED (blacklisted errors are returned even if there are other errors like invalid attachment) */ const sortedErrors = sortBy(errors, (error) => - ['RATE-LIMITED', 'INVALID-ATTACHMENT', 'ERROR', 'BLACKLISTED'].indexOf( - error.status, - ), + [ + 'RATE-LIMITED', + 'INVALID-ATTACHMENT', + 'INTERMITTENT-ERROR', + 'ERROR', + 'BLACKLISTED', + ].indexOf(error.status), ) const dataOut = { diff --git a/packages/backend/src/apps/postman/common/throw-errors.ts b/packages/backend/src/apps/postman/common/throw-errors.ts index 2707cc680..02bec0ec8 100644 --- a/packages/backend/src/apps/postman/common/throw-errors.ts +++ b/packages/backend/src/apps/postman/common/throw-errors.ts @@ -2,6 +2,7 @@ import { IGlobalVariable } from '@plumber/types' import HttpError from '@/errors/http' import PartialStepError from '@/errors/partial-error' +import RetriableError from '@/errors/retriable-error' import StepError from '@/errors/step' import { PostmanEmailSendStatus } from './data-out-validator' @@ -12,6 +13,11 @@ type PostmanApiErrorData = { message: string } +// These are HTTP error codes returned by Cloudflare, which likely indicate +// that Postman's origin server did not receive the request. +// Until this is fixed, we will retry these requests on behalf of the user +const POSTMAN_RETRIABLE_HTTP_CODES = [502, 504, 520] + export function getPostmanErrorStatus( error: HttpError, ): PostmanEmailSendStatus { @@ -31,6 +37,9 @@ export function getPostmanErrorStatus( case 'rate_limit': return 'RATE-LIMITED' default: + if (POSTMAN_RETRIABLE_HTTP_CODES.includes(error.response?.status)) { + return 'INTERMITTENT-ERROR' + } // return original error if not caught return 'ERROR' } @@ -96,6 +105,12 @@ export function throwPostmanStepError({ appName, error, ) + case 'INTERMITTENT-ERROR': + throw new RetriableError({ + error: error.details, + delayInMs: 'default', + delayType: 'step', + }) case 'ERROR': default: throw new StepError(