From 7899e20004da2ff568eb7561cf2f419202a9f251 Mon Sep 17 00:00:00 2001 From: Brandur Leach Date: Thu, 4 Jul 2024 07:30:54 -0700 Subject: [PATCH] Add stack trace to error handler's `HandlePanicFunc` (#423) This one in response to #418 in which although we persist a stack trace to a job row's errors property, we don't reveal it in a panic handler, which is quite inconvenient for purposes of logging or other telemetry (e.g. sending to Sentry). Here, `HandlePanic`'s signature changes to (`trace` is added): HandlePanic(ctx context.Context, job *rivertype.JobRow, panicVal any, trace string) *ErrorHandlerResult A couple notes on choices: * The naming of `trace` is reused from `AttemptError`. * The value type is `string`. This is a little non-obvious because Go exposes it as a `[]byte`, something I've never quite understood as to why, but I did `string` because for one it's more convenient to use, but more importantly, it's the same type on `AttemptError`. This is a breaking change, but it seems like being able to get a stack trace during panic is important enough that it's worth it, and with any luck there's few enough people using this feature that it won't break that many people. The fix is quite easy regardless and will easily be caught by the compiler. Fixes #418. --- CHANGELOG.md | 14 ++++++++++++++ client_test.go | 2 +- error_handler.go | 2 +- example_error_handler_test.go | 2 +- job_executor.go | 8 ++++---- job_executor_test.go | 18 +++++++++++------- 6 files changed, 32 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cf96ccb2..e9224429 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `Config.TestOnly` has been added. It disables various features in the River client like staggered maintenance service start that are useful in production, but may be somewhat harmful in tests because they make start/stop slower. [PR #414](https://github.com/riverqueue/river/pull/414). +### Changed + +⚠️ Version 0.9.0 has a small breaking change in `ErrorHandler`. As before, we try never to make breaking changes, but this one was deemed quite important because `ErrorHandler` was fundamentally lacking important functionality. + +- **Breaking change:** Add stack trace to `ErrorHandler.HandlePanicFunc`. Fixing code only requires adding a new `trace string` argument to `HandlePanicFunc`. [PR #423](https://github.com/riverqueue/river/pull/423). + + ``` go + # before + HandlePanic(ctx context.Context, job *rivertype.JobRow, panicVal any) *ErrorHandlerResult + + # after + HandlePanic(ctx context.Context, job *rivertype.JobRow, panicVal any, trace string) *ErrorHandlerResult + ``` + ### Fixed - Pausing or resuming a queue that was already paused or not paused respectively no longer returns `rivertype.ErrNotFound`. The same goes for pausing or resuming using the all queues string (`*`) when no queues are in the database (previously that also returned `rivertype.ErrNotFound`). [PR #408](https://github.com/riverqueue/river/pull/408). diff --git a/client_test.go b/client_test.go index e89a3890..5f2852c2 100644 --- a/client_test.go +++ b/client_test.go @@ -2300,7 +2300,7 @@ func Test_Client_ErrorHandler(t *testing.T) { var panicHandlerCalled bool config.ErrorHandler = &testErrorHandler{ - HandlePanicFunc: func(ctx context.Context, job *rivertype.JobRow, panicVal any) *ErrorHandlerResult { + HandlePanicFunc: func(ctx context.Context, job *rivertype.JobRow, panicVal any, trace string) *ErrorHandlerResult { require.Equal(t, "panic val", panicVal) panicHandlerCalled = true return &ErrorHandlerResult{} diff --git a/error_handler.go b/error_handler.go index 9b6b7050..a80f0233 100644 --- a/error_handler.go +++ b/error_handler.go @@ -20,7 +20,7 @@ type ErrorHandler interface { // // Context is descended from the one used to start the River client that // worked the job. - HandlePanic(ctx context.Context, job *rivertype.JobRow, panicVal any) *ErrorHandlerResult + HandlePanic(ctx context.Context, job *rivertype.JobRow, panicVal any, trace string) *ErrorHandlerResult } type ErrorHandlerResult struct { diff --git a/example_error_handler_test.go b/example_error_handler_test.go index 90f8dcf3..d268c150 100644 --- a/example_error_handler_test.go +++ b/example_error_handler_test.go @@ -22,7 +22,7 @@ func (*CustomErrorHandler) HandleError(ctx context.Context, job *rivertype.JobRo return nil } -func (*CustomErrorHandler) HandlePanic(ctx context.Context, job *rivertype.JobRow, panicVal any) *river.ErrorHandlerResult { +func (*CustomErrorHandler) HandlePanic(ctx context.Context, job *rivertype.JobRow, panicVal any, trace string) *river.ErrorHandlerResult { fmt.Printf("Job panicked with: %v\n", panicVal) // Either function can also set the job to be immediately cancelled. diff --git a/job_executor.go b/job_executor.go index 177caab9..400f6e46 100644 --- a/job_executor.go +++ b/job_executor.go @@ -100,7 +100,7 @@ var ErrJobCancelledRemotely = JobCancel(errors.New("job cancelled remotely")) type jobExecutorResult struct { Err error NextRetry time.Time - PanicTrace []byte + PanicTrace string PanicVal any } @@ -175,7 +175,7 @@ func (e *jobExecutor) execute(ctx context.Context) (res *jobExecutorResult) { ) res = &jobExecutorResult{ - PanicTrace: debug.Stack(), + PanicTrace: string(debug.Stack()), PanicVal: recovery, } } @@ -234,7 +234,7 @@ func (e *jobExecutor) invokeErrorHandler(ctx context.Context, res *jobExecutorRe case res.PanicVal != nil: errorHandlerRes = invokeAndHandlePanic("HandlePanic", func() *ErrorHandlerResult { - return e.ErrorHandler.HandlePanic(ctx, e.JobRow, res.PanicVal) + return e.ErrorHandler.HandlePanic(ctx, e.JobRow, res.PanicVal, res.PanicTrace) }) } @@ -316,7 +316,7 @@ func (e *jobExecutor) reportError(ctx context.Context, res *jobExecutorResult) { At: e.start, Attempt: e.JobRow.Attempt, Error: res.ErrorStr(), - Trace: string(res.PanicTrace), + Trace: res.PanicTrace, } errData, err := json.Marshal(attemptErr) diff --git a/job_executor_test.go b/job_executor_test.go index e1ed00b3..101e7e9e 100644 --- a/job_executor_test.go +++ b/job_executor_test.go @@ -89,14 +89,16 @@ type testErrorHandler struct { HandleErrorFunc func(ctx context.Context, job *rivertype.JobRow, err error) *ErrorHandlerResult HandlePanicCalled bool - HandlePanicFunc func(ctx context.Context, job *rivertype.JobRow, panicVal any) *ErrorHandlerResult + HandlePanicFunc func(ctx context.Context, job *rivertype.JobRow, panicVal any, trace string) *ErrorHandlerResult } // Test handler with no-ops for both error handling functions. func newTestErrorHandler() *testErrorHandler { return &testErrorHandler{ HandleErrorFunc: func(ctx context.Context, job *rivertype.JobRow, err error) *ErrorHandlerResult { return nil }, - HandlePanicFunc: func(ctx context.Context, job *rivertype.JobRow, panicVal any) *ErrorHandlerResult { return nil }, + HandlePanicFunc: func(ctx context.Context, job *rivertype.JobRow, panicVal any, trace string) *ErrorHandlerResult { + return nil + }, } } @@ -105,9 +107,9 @@ func (h *testErrorHandler) HandleError(ctx context.Context, job *rivertype.JobRo return h.HandleErrorFunc(ctx, job, err) } -func (h *testErrorHandler) HandlePanic(ctx context.Context, job *rivertype.JobRow, panicVal any) *ErrorHandlerResult { +func (h *testErrorHandler) HandlePanic(ctx context.Context, job *rivertype.JobRow, panicVal any, trace string) *ErrorHandlerResult { h.HandlePanicCalled = true - return h.HandlePanicFunc(ctx, job, panicVal) + return h.HandlePanicFunc(ctx, job, panicVal, trace) } func TestJobExecutor_Execute(t *testing.T) { @@ -565,8 +567,10 @@ func TestJobExecutor_Execute(t *testing.T) { executor, bundle := setup(t) executor.WorkUnit = newWorkUnitFactoryWithCustomRetry(func() error { panic("panic val") }, nil).MakeUnit(bundle.jobRow) - bundle.errorHandler.HandlePanicFunc = func(ctx context.Context, job *rivertype.JobRow, panicVal any) *ErrorHandlerResult { + bundle.errorHandler.HandlePanicFunc = func(ctx context.Context, job *rivertype.JobRow, panicVal any, trace string) *ErrorHandlerResult { require.Equal(t, "panic val", panicVal) + require.Contains(t, trace, "runtime/debug.Stack()\n") + return nil } @@ -586,7 +590,7 @@ func TestJobExecutor_Execute(t *testing.T) { executor, bundle := setup(t) executor.WorkUnit = newWorkUnitFactoryWithCustomRetry(func() error { panic("panic val") }, nil).MakeUnit(bundle.jobRow) - bundle.errorHandler.HandlePanicFunc = func(ctx context.Context, job *rivertype.JobRow, panicVal any) *ErrorHandlerResult { + bundle.errorHandler.HandlePanicFunc = func(ctx context.Context, job *rivertype.JobRow, panicVal any, trace string) *ErrorHandlerResult { return &ErrorHandlerResult{SetCancelled: true} } @@ -606,7 +610,7 @@ func TestJobExecutor_Execute(t *testing.T) { executor, bundle := setup(t) executor.WorkUnit = newWorkUnitFactoryWithCustomRetry(func() error { panic("panic val") }, nil).MakeUnit(bundle.jobRow) - bundle.errorHandler.HandlePanicFunc = func(ctx context.Context, job *rivertype.JobRow, panicVal any) *ErrorHandlerResult { + bundle.errorHandler.HandlePanicFunc = func(ctx context.Context, job *rivertype.JobRow, panicVal any, trace string) *ErrorHandlerResult { panic("panic handler panicked!") }