diff --git a/proxy/fails/basic_classifiers.go b/proxy/fails/basic_classifiers.go index d54d4465..5875406f 100644 --- a/proxy/fails/basic_classifiers.go +++ b/proxy/fails/basic_classifiers.go @@ -9,10 +9,6 @@ import ( "strings" ) -var IdempotentRequestEOFError = errors.New("EOF (via idempotent request)") - -var IncompleteRequestError = errors.New("incomplete request") - var AttemptedTLSWithNonTLSBackend = ClassifierFunc(func(err error) bool { return errors.As(err, &tls.RecordHeaderError{}) }) @@ -78,11 +74,3 @@ var UntrustedCert = ClassifierFunc(func(err error) bool { return false } }) - -var IdempotentRequestEOF = ClassifierFunc(func(err error) bool { - return errors.Is(err, IdempotentRequestEOFError) -}) - -var IncompleteRequest = ClassifierFunc(func(err error) bool { - return errors.Is(err, IncompleteRequestError) -}) diff --git a/proxy/fails/classifier_group.go b/proxy/fails/classifier_group.go index d0a1da5d..87f7b76e 100644 --- a/proxy/fails/classifier_group.go +++ b/proxy/fails/classifier_group.go @@ -10,6 +10,11 @@ type ClassifierGroup []Classifier // // Otherwise, there’s risk of a mutating non-idempotent request (e.g. send // payment) being silently retried without the client knowing. +// +// IMPORTANT: to truly determine whether a request is retry-able the function +// round_tripper.isRetrieable must be used. It includes additional checks that +// allow requests to be retried more often than it is allowed by the +// classifiers. var RetriableClassifiers = ClassifierGroup{ Dial, AttemptedTLSWithNonTLSBackend, @@ -19,18 +24,42 @@ var RetriableClassifiers = ClassifierGroup{ RemoteHandshakeTimeout, UntrustedCert, ExpiredOrNotYetValidCertFailure, - IdempotentRequestEOF, - IncompleteRequest, } +// FailableClassifiers match all errors that should result in the endpoint +// being marked as failed and taken out of the available pool. These endpoints +// will be cleaned up automatically by the route-pruning in case they have +// become stale, therefore there is no need to prune those endpoints +// proactively. var FailableClassifiers = ClassifierGroup{ - RetriableClassifiers, + Dial, + AttemptedTLSWithNonTLSBackend, + HostnameMismatch, + RemoteFailedCertCheck, + RemoteHandshakeFailure, + RemoteHandshakeTimeout, + UntrustedCert, + ExpiredOrNotYetValidCertFailure, ConnectionResetOnRead, } -var PrunableClassifiers = RetriableClassifiers +// PrunableClassifiers match all errors that should result in the endpoint +// being pruned. This applies only if the connection to the backend is using +// TLS since the route-integrity prevents routes from being pruned +// automatically if they are configured with TLS. +var PrunableClassifiers = ClassifierGroup{ + Dial, + AttemptedTLSWithNonTLSBackend, + HostnameMismatch, + RemoteFailedCertCheck, + RemoteHandshakeFailure, + RemoteHandshakeTimeout, + UntrustedCert, + ExpiredOrNotYetValidCertFailure, +} -// Classify returns true on errors that are retryable +// Classify returns true on errors that match the at least one Classifier from +// the ClassifierGroup it is called on. func (cg ClassifierGroup) Classify(err error) bool { for _, classifier := range cg { if classifier.Classify(err) { diff --git a/proxy/fails/classifier_group_test.go b/proxy/fails/classifier_group_test.go index 8beebccb..3ca5f712 100644 --- a/proxy/fails/classifier_group_test.go +++ b/proxy/fails/classifier_group_test.go @@ -4,7 +4,6 @@ import ( "crypto/tls" "crypto/x509" "errors" - "fmt" "net" "code.cloudfoundry.org/gorouter/proxy/fails" @@ -33,30 +32,19 @@ var _ = Describe("ClassifierGroup", func() { rc := fails.RetriableClassifiers Expect(rc.Classify(&net.OpError{Op: "dial"})).To(BeTrue()) - Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, &net.OpError{Op: "dial"}))).To(BeTrue()) Expect(rc.Classify(&net.OpError{Op: "remote error", Err: errors.New("tls: bad certificate")})).To(BeTrue()) - Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, &net.OpError{Op: "remote error", Err: errors.New("tls: bad certificate")}))).To(BeTrue()) Expect(rc.Classify(&net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")})).To(BeTrue()) - Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, &net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")}))).To(BeTrue()) Expect(rc.Classify(errors.New("net/http: TLS handshake timeout"))).To(BeTrue()) - Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, errors.New("net/http: TLS handshake timeout")))).To(BeTrue()) Expect(rc.Classify(tls.RecordHeaderError{})).To(BeTrue()) - Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, tls.RecordHeaderError{}))).To(BeTrue()) Expect(rc.Classify(x509.HostnameError{})).To(BeTrue()) - Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.HostnameError{}))).To(BeTrue()) Expect(rc.Classify(x509.UnknownAuthorityError{})).To(BeTrue()) - Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.UnknownAuthorityError{}))).To(BeTrue()) Expect(rc.Classify(x509.CertificateInvalidError{Reason: x509.Expired})).To(BeTrue()) - Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.CertificateInvalidError{Reason: x509.Expired}))).To(BeTrue()) Expect(rc.Classify(errors.New("i'm a potato"))).To(BeFalse()) - Expect(rc.Classify(fails.IdempotentRequestEOFError)).To(BeTrue()) - Expect(rc.Classify(fails.IncompleteRequestError)).To(BeTrue()) - Expect(rc.Classify(fmt.Errorf("%w (%w)", fails.IncompleteRequestError, x509.HostnameError{}))).To(BeTrue()) }) }) Describe("prunable", func() { - It("matches hostname mismatch", func() { + It("matches prunable errors", func() { pc := fails.PrunableClassifiers Expect(pc.Classify(&net.OpError{Op: "dial"})).To(BeTrue()) @@ -69,8 +57,6 @@ var _ = Describe("ClassifierGroup", func() { Expect(pc.Classify(x509.UnknownAuthorityError{})).To(BeTrue()) Expect(pc.Classify(x509.CertificateInvalidError{Reason: x509.Expired})).To(BeTrue()) Expect(pc.Classify(errors.New("i'm a potato"))).To(BeFalse()) - Expect(pc.Classify(fails.IdempotentRequestEOFError)).To(BeTrue()) - Expect(pc.Classify(fails.IncompleteRequestError)).To(BeTrue()) }) }) }) diff --git a/proxy/fails/fails.go b/proxy/fails/fails.go new file mode 100644 index 00000000..7eb2f416 --- /dev/null +++ b/proxy/fails/fails.go @@ -0,0 +1,4 @@ +// Package fails provides error classifiers that allow the user to check if a +// certain error fulfills certain requirements. See the individual +// documentation of each ClassifierGroup for details. +package fails diff --git a/proxy/round_tripper/error_handler_test.go b/proxy/round_tripper/error_handler_test.go index e082790e..78140113 100644 --- a/proxy/round_tripper/error_handler_test.go +++ b/proxy/round_tripper/error_handler_test.go @@ -5,7 +5,6 @@ import ( "crypto/tls" "crypto/x509" "errors" - "fmt" "net" "net/http/httptest" @@ -137,22 +136,6 @@ var _ = Describe("HandleError", func() { }) }) - Context("HostnameMismatch wrapped in IncompleteRequestError", func() { - BeforeEach(func() { - wrappedErr := x509.HostnameError{Host: "the wrong one"} - err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr) - errorHandler.HandleError(responseWriter, err) - }) - - It("has a 503 Status Code", func() { - Expect(responseWriter.Status()).To(Equal(503)) - }) - - It("emits a backend_invalid_id metric", func() { - Expect(metricReporter.CaptureBackendInvalidIDCallCount()).To(Equal(1)) - }) - }) - Context("Untrusted Cert", func() { BeforeEach(func() { err = x509.UnknownAuthorityError{} @@ -168,22 +151,6 @@ var _ = Describe("HandleError", func() { }) }) - Context("Untrusted Cert wrapped in IncompleteRequestError", func() { - BeforeEach(func() { - wrappedErr := x509.UnknownAuthorityError{} - err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr) - errorHandler.HandleError(responseWriter, err) - }) - - It("has a 526 Status Code", func() { - Expect(responseWriter.Status()).To(Equal(526)) - }) - - It("emits a backend_invalid_tls_cert metric", func() { - Expect(metricReporter.CaptureBackendInvalidTLSCertCallCount()).To(Equal(1)) - }) - }) - Context("Attempted TLS with non-TLS backend error", func() { BeforeEach(func() { err = tls.RecordHeaderError{Msg: "bad handshake"} @@ -199,22 +166,6 @@ var _ = Describe("HandleError", func() { }) }) - Context("Attempted TLS with non-TLS backend error wrapped in IncompleteRequestError", func() { - BeforeEach(func() { - wrappedErr := tls.RecordHeaderError{Msg: "bad handshake"} - err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr) - errorHandler.HandleError(responseWriter, err) - }) - - It("has a 525 Status Code", func() { - Expect(responseWriter.Status()).To(Equal(525)) - }) - - It("emits a backend_tls_handshake_failed metric", func() { - Expect(metricReporter.CaptureBackendTLSHandshakeFailedCallCount()).To(Equal(1)) - }) - }) - Context("Remote handshake failure", func() { BeforeEach(func() { err = &net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")} @@ -230,22 +181,6 @@ var _ = Describe("HandleError", func() { }) }) - Context("Remote handshake failure wrapped in IncompleteRequestError", func() { - BeforeEach(func() { - wrappedErr := &net.OpError{Op: "remote error", Err: errors.New("tls: handshake failure")} - err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, wrappedErr) - errorHandler.HandleError(responseWriter, err) - }) - - It("has a 525 Status Code", func() { - Expect(responseWriter.Status()).To(Equal(525)) - }) - - It("emits a backend_tls_handshake_failed metric", func() { - Expect(metricReporter.CaptureBackendTLSHandshakeFailedCallCount()).To(Equal(1)) - }) - }) - Context("Context Cancelled Error", func() { BeforeEach(func() { err = context.Canceled diff --git a/proxy/round_tripper/proxy_round_tripper.go b/proxy/round_tripper/proxy_round_tripper.go index b3a94c93..66742595 100644 --- a/proxy/round_tripper/proxy_round_tripper.go +++ b/proxy/round_tripper/proxy_round_tripper.go @@ -3,7 +3,6 @@ package round_tripper import ( "context" "errors" - "fmt" "io" "io/ioutil" "net/http" @@ -177,7 +176,7 @@ func (rt *roundTripper) RoundTrip(originalRequest *http.Request) (*http.Response if err != nil { reqInfo.FailedAttempts++ reqInfo.LastFailedAttemptFinishedAt = time.Now() - retriable, err := rt.isRetriable(request, err, trace) + retriable := rt.isRetriable(request, err, trace) logger.Error("backend-endpoint-failed", zap.Error(err), @@ -226,7 +225,7 @@ func (rt *roundTripper) RoundTrip(originalRequest *http.Request) (*http.Response if err != nil { reqInfo.FailedAttempts++ reqInfo.LastFailedAttemptFinishedAt = time.Now() - retriable, err := rt.isRetriable(request, err, trace) + retriable := rt.isRetriable(request, err, trace) logger.Error( "route-service-connection-failed", @@ -482,24 +481,23 @@ func isIdempotent(request *http.Request) bool { return false } -func (rt *roundTripper) isRetriable(request *http.Request, err error, trace *requestTracer) (bool, error) { +func (rt *roundTripper) isRetriable(request *http.Request, err error, trace *requestTracer) bool { // if the context has been cancelled we do not perform further retries if request.Context().Err() != nil { - return false, fmt.Errorf("%w (%w)", request.Context().Err(), err) + return false } // io.EOF errors are considered safe to retry for certain requests // Replace the error here to track this state when classifying later. if err == io.EOF && isIdempotent(request) { - err = fails.IdempotentRequestEOFError + return true } // We can retry for sure if we never obtained a connection // since there is no way any data was transmitted. If headers could not // be written in full, the request should also be safe to retry. if !trace.GotConn() || !trace.WroteHeaders() { - err = fmt.Errorf("%w (%w)", fails.IncompleteRequestError, err) + return true } - retriable := rt.retriableClassifier.Classify(err) - return retriable, err + return rt.retriableClassifier.Classify(err) } diff --git a/proxy/round_tripper/proxy_round_tripper_test.go b/proxy/round_tripper/proxy_round_tripper_test.go index 92543131..c4178f83 100644 --- a/proxy/round_tripper/proxy_round_tripper_test.go +++ b/proxy/round_tripper/proxy_round_tripper_test.go @@ -4,7 +4,6 @@ import ( "bytes" "errors" "fmt" - "io" "net" "net/http" "net/http/httptest" @@ -23,7 +22,6 @@ import ( "code.cloudfoundry.org/gorouter/config" "code.cloudfoundry.org/gorouter/handlers" "code.cloudfoundry.org/gorouter/metrics/fakes" - "code.cloudfoundry.org/gorouter/proxy/fails" "code.cloudfoundry.org/gorouter/proxy/handler" "code.cloudfoundry.org/gorouter/proxy/round_tripper" "code.cloudfoundry.org/gorouter/proxy/utils" @@ -434,94 +432,6 @@ var _ = Describe("ProxyRoundTripper", func() { }) }) - DescribeTable("when the backend fails with an empty response error (io.EOF)", - func(reqBody io.ReadCloser, getBodyIsNil bool, reqMethod string, headers map[string]string, classify fails.ClassifierFunc, expectRetry bool) { - badResponse := &http.Response{ - Header: make(map[string][]string), - } - badResponse.Header.Add(handlers.VcapRequestIdHeader, "some-request-id") - - // The first request fails with io.EOF, the second (if retried) succeeds - transport.RoundTripStub = func(*http.Request) (*http.Response, error) { - switch transport.RoundTripCallCount() { - case 1: - return nil, io.EOF - case 2: - return &http.Response{StatusCode: http.StatusTeapot}, nil - default: - return nil, nil - } - } - - retriableClassifier.ClassifyStub = classify - req.Method = reqMethod - req.Body = reqBody - if !getBodyIsNil { - req.GetBody = func() (io.ReadCloser, error) { - return new(testBody), nil - } - } - if headers != nil { - for key, value := range headers { - req.Header.Add(key, value) - } - } - - res, err := proxyRoundTripper.RoundTrip(req) - - if expectRetry { - Expect(err).NotTo(HaveOccurred()) - Expect(transport.RoundTripCallCount()).To(Equal(2)) - Expect(retriableClassifier.ClassifyCallCount()).To(Equal(1)) - Expect(res.StatusCode).To(Equal(http.StatusTeapot)) - } else { - Expect(errors.Is(err, io.EOF)).To(BeTrue()) - Expect(transport.RoundTripCallCount()).To(Equal(1)) - Expect(retriableClassifier.ClassifyCallCount()).To(Equal(1)) - } - }, - - Entry("POST, body is empty: does not retry", nil, true, "POST", nil, fails.IdempotentRequestEOF, false), - Entry("POST, body is not empty and GetBody is non-nil: does not retry", reqBody, false, "POST", nil, fails.IdempotentRequestEOF, false), - Entry("POST, body is not empty: does not retry", reqBody, true, "POST", nil, fails.IdempotentRequestEOF, false), - Entry("POST, body is http.NoBody: does not retry", http.NoBody, true, "POST", nil, fails.IdempotentRequestEOF, false), - - Entry("POST, body is empty, X-Idempotency-Key header: attempts retry", nil, true, "POST", map[string]string{"X-Idempotency-Key": "abc123"}, fails.IncompleteRequest, true), - Entry("POST, body is not empty and GetBody is non-nil, X-Idempotency-Key header: attempts retry", reqBody, false, "POST", map[string]string{"X-Idempotency-Key": "abc123"}, fails.IncompleteRequest, true), - Entry("POST, body is not empty, X-Idempotency-Key header: does not retry", reqBody, true, "POST", map[string]string{"X-Idempotency-Key": "abc123"}, fails.IdempotentRequestEOF, false), - Entry("POST, body is http.NoBody, X-Idempotency-Key header: does not retry", http.NoBody, true, "POST", map[string]string{"X-Idempotency-Key": "abc123"}, fails.IdempotentRequestEOF, false), - - Entry("POST, body is empty, Idempotency-Key header: attempts retry", nil, true, "POST", map[string]string{"Idempotency-Key": "abc123"}, fails.IncompleteRequest, true), - Entry("POST, body is not empty and GetBody is non-nil, Idempotency-Key header: attempts retry", reqBody, false, "POST", map[string]string{"Idempotency-Key": "abc123"}, fails.IncompleteRequest, true), - Entry("POST, body is not empty, Idempotency-Key header: does not retry", reqBody, true, "POST", map[string]string{"Idempotency-Key": "abc123"}, fails.IdempotentRequestEOF, false), - Entry("POST, body is http.NoBody, Idempotency-Key header: does not retry", http.NoBody, true, "POST", map[string]string{"Idempotency-Key": "abc123"}, fails.IdempotentRequestEOF, false), - - Entry("GET, body is empty: attempts retry", nil, true, "GET", nil, fails.IncompleteRequest, true), - Entry("GET, body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "GET", nil, fails.IncompleteRequest, true), - Entry("GET, body is not empty: does not retry", reqBody, true, "GET", nil, fails.IdempotentRequestEOF, false), - Entry("GET, body is http.NoBody: does not retry", http.NoBody, true, "GET", nil, fails.IdempotentRequestEOF, false), - - Entry("TRACE, body is empty: attempts retry", nil, true, "TRACE", nil, fails.IncompleteRequest, true), - Entry("TRACE, body is not empty: does not retry", reqBody, true, "TRACE", nil, fails.IdempotentRequestEOF, false), - Entry("TRACE, body is http.NoBody: does not retry", http.NoBody, true, "TRACE", nil, fails.IdempotentRequestEOF, false), - Entry("TRACE, body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "TRACE", nil, fails.IncompleteRequest, true), - - Entry("HEAD, body is empty: attempts retry", nil, true, "HEAD", nil, fails.IncompleteRequest, true), - Entry("HEAD, body is not empty: does not retry", reqBody, true, "HEAD", nil, fails.IdempotentRequestEOF, false), - Entry("HEAD, body is http.NoBody: does not retry", http.NoBody, true, "HEAD", nil, fails.IdempotentRequestEOF, false), - Entry("HEAD, body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "HEAD", nil, fails.IncompleteRequest, true), - - Entry("OPTIONS, body is empty: attempts retry", nil, true, "OPTIONS", nil, fails.IncompleteRequest, true), - Entry("OPTIONS, body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "OPTIONS", nil, fails.IncompleteRequest, true), - Entry("OPTIONS, body is not empty: does not retry", reqBody, true, "OPTIONS", nil, fails.IdempotentRequestEOF, false), - Entry("OPTIONS, body is http.NoBody: does not retry", http.NoBody, true, "OPTIONS", nil, fails.IdempotentRequestEOF, false), - - Entry(", body is empty: attempts retry", nil, true, "", nil, fails.IncompleteRequest, true), - Entry(", body is not empty and GetBody is non-nil: attempts retry", reqBody, false, "", nil, fails.IncompleteRequest, true), - Entry(", body is not empty: does not retry", reqBody, true, "", nil, fails.IdempotentRequestEOF, false), - Entry(", body is http.NoBody: does not retry", http.NoBody, true, "", nil, fails.IdempotentRequestEOF, false), - ) - Context("when there are no more endpoints available", func() { BeforeEach(func() { removed := routePool.Remove(endpoint)