From 01183bc3321d730b0a6377cc6ea6ce8bfaf57add Mon Sep 17 00:00:00 2001 From: Nodar Ambroladze Date: Mon, 3 Jun 2024 21:01:41 +0200 Subject: [PATCH 1/7] Profile only SequenceTransactions, increase threshold to 5 sec --- execution/gethexec/executionengine.go | 44 +++++++++++++++++++ execution/gethexec/sequencer.go | 62 +++++---------------------- 2 files changed, 54 insertions(+), 52 deletions(-) diff --git a/execution/gethexec/executionengine.go b/execution/gethexec/executionengine.go index e841a93529..f0f4551790 100644 --- a/execution/gethexec/executionengine.go +++ b/execution/gethexec/executionengine.go @@ -13,10 +13,15 @@ package gethexec */ import "C" import ( + "bytes" "context" "encoding/binary" "errors" "fmt" + "os" + "path" + "runtime/pprof" + "runtime/trace" "sync" "testing" "time" @@ -27,6 +32,7 @@ import ( "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/params" + "github.com/google/uuid" "github.com/offchainlabs/nitro/arbos" "github.com/offchainlabs/nitro/arbos/arbosState" "github.com/offchainlabs/nitro/arbos/arbostypes" @@ -334,6 +340,44 @@ func (s *ExecutionEngine) SequenceTransactions(header *arbostypes.L1IncomingMess }) } +// SequenceTransactionsWithProfiling runs SequenceTransactions with tracing and +// CPU profiling enabled. If the block creation takes longer than 5 seconds, it +// keeps both and prints out filenames in an error log line. +func (s *ExecutionEngine) SequenceTransactionsWithProfiling(header *arbostypes.L1IncomingMessageHeader, txes types.Transactions, hooks *arbos.SequencingHooks) (*types.Block, error) { + pprofBuf, traceBuf := bytes.NewBuffer(nil), bytes.NewBuffer(nil) + if err := pprof.StartCPUProfile(pprofBuf); err != nil { + log.Error("Starting CPU profiling", "error", err) + } + if err := trace.Start(traceBuf); err != nil { + log.Error("Starting tracing", "error", err) + } + start := time.Now() + res, err := s.SequenceTransactions(header, txes, hooks) + elapsed := time.Since(start) + pprof.StopCPUProfile() + trace.Stop() + if elapsed > 5*time.Second { + writeAndLog(pprofBuf, traceBuf) + return res, err + } + return res, err +} + +func writeAndLog(pprof, trace *bytes.Buffer) { + id := uuid.NewString() + pprofFile := path.Join(os.TempDir(), id+".pprof") + if err := os.WriteFile(pprofFile, pprof.Bytes(), 0o600); err != nil { + log.Error("Creating temporary file for pprof", "fileName", pprofFile, "error", err) + return + } + traceFile := path.Join(os.TempDir(), id+".trace") + if err := os.WriteFile(traceFile, trace.Bytes(), 0o600); err != nil { + log.Error("Creating temporary file for trace", "fileName", traceFile, "error", err) + return + } + log.Info("Block creation took longer than 5 seconds, created pprof and trace files", "pprof", pprofFile, "traceFile", traceFile) +} + func (s *ExecutionEngine) sequenceTransactionsWithBlockMutex(header *arbostypes.L1IncomingMessageHeader, txes types.Transactions, hooks *arbos.SequencingHooks) (*types.Block, error) { lastBlockHeader, err := s.getCurrentHeader() if err != nil { diff --git a/execution/gethexec/sequencer.go b/execution/gethexec/sequencer.go index a0623a8aed..059c7c7553 100644 --- a/execution/gethexec/sequencer.go +++ b/execution/gethexec/sequencer.go @@ -4,24 +4,18 @@ package gethexec import ( - "bytes" "context" "errors" "fmt" "math" "math/big" - "os" - "path" "runtime/debug" - "runtime/pprof" - "runtime/trace" "strconv" "strings" "sync" "sync/atomic" "time" - "github.com/google/uuid" "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/execution" "github.com/offchainlabs/nitro/util/arbmath" @@ -784,44 +778,6 @@ func (s *Sequencer) precheckNonces(queueItems []txQueueItem, totalBlockSize int) return outputQueueItems } -// createBlockWithProfiling runs create block with tracing and CPU profiling -// enabled. If the block creation takes longer than 5 seconds, it keeps both -// and prints out filenames in an error log line. -func (s *Sequencer) createBlockWithProfiling(ctx context.Context) bool { - pprofBuf, traceBuf := bytes.NewBuffer(nil), bytes.NewBuffer(nil) - if err := pprof.StartCPUProfile(pprofBuf); err != nil { - log.Error("Starting CPU profiling", "error", err) - } - if err := trace.Start(traceBuf); err != nil { - log.Error("Starting tracing", "error", err) - } - start := time.Now() - res := s.createBlock(ctx) - elapsed := time.Since(start) - pprof.StopCPUProfile() - trace.Stop() - if elapsed > 2*time.Second { - writeAndLog(pprofBuf, traceBuf) - return res - } - return res -} - -func writeAndLog(pprof, trace *bytes.Buffer) { - id := uuid.NewString() - pprofFile := path.Join(os.TempDir(), id+".pprof") - if err := os.WriteFile(pprofFile, pprof.Bytes(), 0o600); err != nil { - log.Error("Creating temporary file for pprof", "fileName", pprofFile, "error", err) - return - } - traceFile := path.Join(os.TempDir(), id+".trace") - if err := os.WriteFile(traceFile, trace.Bytes(), 0o600); err != nil { - log.Error("Creating temporary file for trace", "fileName", traceFile, "error", err) - return - } - log.Debug("Block creation took longer than 5 seconds, created pprof and trace files", "pprof", pprofFile, "traceFile", traceFile) -} - func (s *Sequencer) createBlock(ctx context.Context) (returnValue bool) { var queueItems []txQueueItem var totalBlockSize int @@ -969,7 +925,15 @@ func (s *Sequencer) createBlock(ctx context.Context) (returnValue bool) { } start := time.Now() - block, err := s.execEngine.SequenceTransactions(header, txes, hooks) + var ( + block *types.Block + err error + ) + if s.enableProfiling { + block, err = s.execEngine.SequenceTransactionsWithProfiling(header, txes, hooks) + } else { + block, err = s.execEngine.SequenceTransactions(header, txes, hooks) + } elapsed := time.Since(start) blockCreationTimer.Update(elapsed) if elapsed >= time.Second*5 { @@ -1165,13 +1129,7 @@ func (s *Sequencer) Start(ctxIn context.Context) error { s.CallIteratively(func(ctx context.Context) time.Duration { nextBlock := time.Now().Add(s.config().MaxBlockSpeed) - var madeBlock bool - if s.enableProfiling { - madeBlock = s.createBlockWithProfiling(ctx) - } else { - madeBlock = s.createBlock(ctx) - } - if madeBlock { + if s.createBlock(ctx) { // Note: this may return a negative duration, but timers are fine with that (they treat negative durations as 0). return time.Until(nextBlock) } From 5368e2fcf65628a76328f0d6eb68e3c8acb5ecaa Mon Sep 17 00:00:00 2001 From: Nodar Ambroladze Date: Tue, 4 Jun 2024 09:14:50 +0200 Subject: [PATCH 2/7] Reduce threshold back to 2 seconds --- execution/gethexec/executionengine.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/execution/gethexec/executionengine.go b/execution/gethexec/executionengine.go index f0f4551790..6923a7c2a5 100644 --- a/execution/gethexec/executionengine.go +++ b/execution/gethexec/executionengine.go @@ -356,7 +356,7 @@ func (s *ExecutionEngine) SequenceTransactionsWithProfiling(header *arbostypes.L elapsed := time.Since(start) pprof.StopCPUProfile() trace.Stop() - if elapsed > 5*time.Second { + if elapsed > 2*time.Second { writeAndLog(pprofBuf, traceBuf) return res, err } From 0006d1509af11d6d5aa1d4579eb5b98a9f827242 Mon Sep 17 00:00:00 2001 From: Nodar Date: Tue, 4 Jun 2024 17:32:34 +0200 Subject: [PATCH 3/7] Update execution/gethexec/executionengine.go Co-authored-by: Maciej Kulawik <10907694+magicxyyz@users.noreply.github.com> --- execution/gethexec/executionengine.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/execution/gethexec/executionengine.go b/execution/gethexec/executionengine.go index 6923a7c2a5..d1df8653b0 100644 --- a/execution/gethexec/executionengine.go +++ b/execution/gethexec/executionengine.go @@ -375,7 +375,7 @@ func writeAndLog(pprof, trace *bytes.Buffer) { log.Error("Creating temporary file for trace", "fileName", traceFile, "error", err) return } - log.Info("Block creation took longer than 5 seconds, created pprof and trace files", "pprof", pprofFile, "traceFile", traceFile) + log.Info("Transactions sequencing took longer than 2 seconds, created pprof and trace files", "pprof", pprofFile, "traceFile", traceFile) } func (s *ExecutionEngine) sequenceTransactionsWithBlockMutex(header *arbostypes.L1IncomingMessageHeader, txes types.Transactions, hooks *arbos.SequencingHooks) (*types.Block, error) { From d91ea0bafecf2501ec5377e5c3f08493081eb403 Mon Sep 17 00:00:00 2001 From: Nodar Date: Tue, 4 Jun 2024 17:32:41 +0200 Subject: [PATCH 4/7] Update execution/gethexec/executionengine.go Co-authored-by: Maciej Kulawik <10907694+magicxyyz@users.noreply.github.com> --- execution/gethexec/executionengine.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/execution/gethexec/executionengine.go b/execution/gethexec/executionengine.go index d1df8653b0..083f59dfab 100644 --- a/execution/gethexec/executionengine.go +++ b/execution/gethexec/executionengine.go @@ -341,7 +341,7 @@ func (s *ExecutionEngine) SequenceTransactions(header *arbostypes.L1IncomingMess } // SequenceTransactionsWithProfiling runs SequenceTransactions with tracing and -// CPU profiling enabled. If the block creation takes longer than 5 seconds, it +// CPU profiling enabled. If the block creation takes longer than 2 seconds, it // keeps both and prints out filenames in an error log line. func (s *ExecutionEngine) SequenceTransactionsWithProfiling(header *arbostypes.L1IncomingMessageHeader, txes types.Transactions, hooks *arbos.SequencingHooks) (*types.Block, error) { pprofBuf, traceBuf := bytes.NewBuffer(nil), bytes.NewBuffer(nil) From bb3117e7d256bc87214e6f378d1ea48c58651654 Mon Sep 17 00:00:00 2001 From: Nodar Ambroladze Date: Tue, 4 Jun 2024 17:36:49 +0200 Subject: [PATCH 5/7] Use enableProfiling directly from the configFetcher to make it hot-reloadable --- execution/gethexec/sequencer.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/execution/gethexec/sequencer.go b/execution/gethexec/sequencer.go index 059c7c7553..e1d1e5e9f6 100644 --- a/execution/gethexec/sequencer.go +++ b/execution/gethexec/sequencer.go @@ -76,7 +76,7 @@ type SequencerConfig struct { NonceFailureCacheExpiry time.Duration `koanf:"nonce-failure-cache-expiry" reload:"hot"` ExpectedSurplusSoftThreshold string `koanf:"expected-surplus-soft-threshold" reload:"hot"` ExpectedSurplusHardThreshold string `koanf:"expected-surplus-hard-threshold" reload:"hot"` - EnableProfiling bool `koanf:"enable-profiling"` + EnableProfiling bool `koanf:"enable-profiling" reload:"hot"` expectedSurplusSoftThreshold int expectedSurplusHardThreshold int } @@ -332,7 +332,6 @@ type Sequencer struct { expectedSurplusMutex sync.RWMutex expectedSurplus int64 expectedSurplusUpdated bool - enableProfiling bool } func NewSequencer(execEngine *ExecutionEngine, l1Reader *headerreader.HeaderReader, configFetcher SequencerConfigFetcher) (*Sequencer, error) { @@ -359,7 +358,6 @@ func NewSequencer(execEngine *ExecutionEngine, l1Reader *headerreader.HeaderRead l1Timestamp: 0, pauseChan: nil, onForwarderSet: make(chan struct{}, 1), - enableProfiling: config.EnableProfiling, } s.nonceFailures = &nonceFailureCache{ containers.NewLruCacheWithOnEvict(config.NonceCacheSize, s.onNonceFailureEvict), @@ -929,7 +927,7 @@ func (s *Sequencer) createBlock(ctx context.Context) (returnValue bool) { block *types.Block err error ) - if s.enableProfiling { + if s.config().EnableProfiling { block, err = s.execEngine.SequenceTransactionsWithProfiling(header, txes, hooks) } else { block, err = s.execEngine.SequenceTransactions(header, txes, hooks) From 4737526865c2f6719dfd58cfad5376af9c47987f Mon Sep 17 00:00:00 2001 From: Nodar Ambroladze Date: Wed, 5 Jun 2024 10:45:16 +0200 Subject: [PATCH 6/7] Don't fetch config multiple times in the same function --- execution/gethexec/sequencer.go | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/execution/gethexec/sequencer.go b/execution/gethexec/sequencer.go index 167f8d5a87..10939cda3a 100644 --- a/execution/gethexec/sequencer.go +++ b/execution/gethexec/sequencer.go @@ -408,11 +408,12 @@ func ctxWithTimeout(ctx context.Context, timeout time.Duration) (context.Context } func (s *Sequencer) PublishTransaction(parentCtx context.Context, tx *types.Transaction, options *arbitrum_types.ConditionalOptions) error { + config := s.config() // Only try to acquire Rlock and check for hard threshold if l1reader is not nil // And hard threshold was enabled, this prevents spamming of read locks when not needed - if s.l1Reader != nil && s.config().ExpectedSurplusHardThreshold != "default" { + if s.l1Reader != nil && config.ExpectedSurplusHardThreshold != "default" { s.expectedSurplusMutex.RLock() - if s.expectedSurplusUpdated && s.expectedSurplus < int64(s.config().expectedSurplusHardThreshold) { + if s.expectedSurplusUpdated && s.expectedSurplus < int64(config.expectedSurplusHardThreshold) { return errors.New("currently not accepting transactions due to expected surplus being below threshold") } s.expectedSurplusMutex.RUnlock() @@ -451,7 +452,7 @@ func (s *Sequencer) PublishTransaction(parentCtx context.Context, tx *types.Tran return err } - queueTimeout := s.config().QueueTimeout + queueTimeout := config.QueueTimeout queueCtx, cancelFunc := ctxWithTimeout(parentCtx, queueTimeout) defer cancelFunc() @@ -930,7 +931,7 @@ func (s *Sequencer) createBlock(ctx context.Context) (returnValue bool) { block *types.Block err error ) - if s.config().EnableProfiling { + if config.EnableProfiling { block, err = s.execEngine.SequenceTransactionsWithProfiling(header, txes, hooks) } else { block, err = s.execEngine.SequenceTransactions(header, txes, hooks) @@ -1066,16 +1067,17 @@ func (s *Sequencer) updateExpectedSurplus(ctx context.Context) (int64, error) { unusedL1GasChargeGauge.Update(backlogL1GasCharged) currentSurplusGauge.Update(surplus) expectedSurplusGauge.Update(expectedSurplus) - if s.config().ExpectedSurplusSoftThreshold != "default" && expectedSurplus < int64(s.config().expectedSurplusSoftThreshold) { - log.Warn("expected surplus is below soft threshold", "value", expectedSurplus, "threshold", s.config().expectedSurplusSoftThreshold) + config := s.config() + if config.ExpectedSurplusSoftThreshold != "default" && expectedSurplus < int64(config.expectedSurplusSoftThreshold) { + log.Warn("expected surplus is below soft threshold", "value", expectedSurplus, "threshold", config.expectedSurplusSoftThreshold) } return expectedSurplus, nil } func (s *Sequencer) Start(ctxIn context.Context) error { s.StopWaiter.Start(ctxIn, s) - - if (s.config().ExpectedSurplusHardThreshold != "default" || s.config().ExpectedSurplusSoftThreshold != "default") && s.l1Reader == nil { + config := s.config() + if (config.ExpectedSurplusHardThreshold != "default" || config.ExpectedSurplusSoftThreshold != "default") && s.l1Reader == nil { return errors.New("expected surplus soft/hard thresholds are enabled but l1Reader is nil") } @@ -1087,7 +1089,7 @@ func (s *Sequencer) Start(ctxIn context.Context) error { expectedSurplus, err := s.updateExpectedSurplus(ctxIn) if err != nil { - if s.config().ExpectedSurplusHardThreshold != "default" { + if config.ExpectedSurplusHardThreshold != "default" { return fmt.Errorf("expected-surplus-hard-threshold is enabled but error fetching initial expected surplus value: %w", err) } log.Error("expected-surplus-soft-threshold is enabled but error fetching initial expected surplus value", "err", err) @@ -1129,7 +1131,7 @@ func (s *Sequencer) Start(ctxIn context.Context) error { } s.CallIteratively(func(ctx context.Context) time.Duration { - nextBlock := time.Now().Add(s.config().MaxBlockSpeed) + nextBlock := time.Now().Add(config.MaxBlockSpeed) if s.createBlock(ctx) { // Note: this may return a negative duration, but timers are fine with that (they treat negative durations as 0). return time.Until(nextBlock) From d77f4057e4871657feccebdbd730ea37be4e2733 Mon Sep 17 00:00:00 2001 From: Nodar Ambroladze Date: Wed, 5 Jun 2024 14:42:47 +0200 Subject: [PATCH 7/7] Don't use cached config in iteratively called function --- execution/gethexec/sequencer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/execution/gethexec/sequencer.go b/execution/gethexec/sequencer.go index 10939cda3a..2ba286aac7 100644 --- a/execution/gethexec/sequencer.go +++ b/execution/gethexec/sequencer.go @@ -1131,7 +1131,7 @@ func (s *Sequencer) Start(ctxIn context.Context) error { } s.CallIteratively(func(ctx context.Context) time.Duration { - nextBlock := time.Now().Add(config.MaxBlockSpeed) + nextBlock := time.Now().Add(s.config().MaxBlockSpeed) if s.createBlock(ctx) { // Note: this may return a negative duration, but timers are fine with that (they treat negative durations as 0). return time.Until(nextBlock)