diff --git a/execution/gethexec/sequencer.go b/execution/gethexec/sequencer.go index 23340594c4..da816c212f 100644 --- a/execution/gethexec/sequencer.go +++ b/execution/gethexec/sequencer.go @@ -9,13 +9,17 @@ import ( "fmt" "math" "math/big" + "os" "runtime/debug" + "runtime/pprof" + "runtime/trace" "strconv" "strings" "sync" "sync/atomic" "time" + "github.com/google/uuid" "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/execution" "github.com/offchainlabs/nitro/util/arbmath" @@ -76,6 +80,7 @@ type SequencerConfig struct { NonceFailureCacheExpiry time.Duration `koanf:"nonce-failure-cache-expiry" reload:"hot"` ExpectedSurplusSoftThreshold string `koanf:"expected-surplus-soft-threshold" reload:"hot"` ExpectedSurplusHardThreshold string `koanf:"expected-surplus-hard-threshold" reload:"hot"` + EnableProfiling bool `koanf:"enable-profiling"` expectedSurplusSoftThreshold int expectedSurplusHardThreshold int } @@ -125,6 +130,7 @@ var DefaultSequencerConfig = SequencerConfig{ NonceFailureCacheExpiry: time.Second, ExpectedSurplusSoftThreshold: "default", ExpectedSurplusHardThreshold: "default", + EnableProfiling: true, } var TestSequencerConfig = SequencerConfig{ @@ -142,6 +148,7 @@ var TestSequencerConfig = SequencerConfig{ NonceFailureCacheExpiry: time.Second, ExpectedSurplusSoftThreshold: "default", ExpectedSurplusHardThreshold: "default", + EnableProfiling: false, } func SequencerConfigAddOptions(prefix string, f *flag.FlagSet) { @@ -159,6 +166,7 @@ func SequencerConfigAddOptions(prefix string, f *flag.FlagSet) { f.Duration(prefix+".nonce-failure-cache-expiry", DefaultSequencerConfig.NonceFailureCacheExpiry, "maximum amount of time to wait for a predecessor before rejecting a tx with nonce too high") f.String(prefix+".expected-surplus-soft-threshold", DefaultSequencerConfig.ExpectedSurplusSoftThreshold, "if expected surplus is lower than this value, warnings are posted") f.String(prefix+".expected-surplus-hard-threshold", DefaultSequencerConfig.ExpectedSurplusHardThreshold, "if expected surplus is lower than this value, new incoming transactions will be denied") + f.Bool(prefix+".enable-profiling", DefaultSequencerConfig.EnableProfiling, "enable CPU profiling and tracing") } type txQueueItem struct { @@ -327,6 +335,7 @@ type Sequencer struct { expectedSurplusMutex sync.RWMutex expectedSurplus int64 expectedSurplusUpdated bool + enableProfiling bool } func NewSequencer(execEngine *ExecutionEngine, l1Reader *headerreader.HeaderReader, configFetcher SequencerConfigFetcher) (*Sequencer, error) { @@ -353,6 +362,7 @@ func NewSequencer(execEngine *ExecutionEngine, l1Reader *headerreader.HeaderRead l1Timestamp: 0, pauseChan: nil, onForwarderSet: make(chan struct{}, 1), + enableProfiling: config.EnableProfiling, } s.nonceFailures = &nonceFailureCache{ containers.NewLruCacheWithOnEvict(config.NonceCacheSize, s.onNonceFailureEvict), @@ -758,6 +768,59 @@ func (s *Sequencer) precheckNonces(queueItems []txQueueItem) []txQueueItem { return outputQueueItems } +func deleteFiles(files ...*os.File) { + for _, f := range files { + if err := os.Remove(f.Name()); err != nil { + log.Error("Error removing file", "name", f.Name()) + } + } +} + +func closeFiles(files ...*os.File) { + for _, f := range files { + if err := os.Remove(f.Name()); err != nil { + log.Error("Error closing file", "name", f.Name()) + } + } +} + +// createBlockWithProfiling runs create block with tracing and CPU profiling +// enabled. If the block creation takes longer than 5 seconds, it keeps both +// and prints out filenames in an error log line. +func (s *Sequencer) createBlockWithProfiling(ctx context.Context) bool { + id := uuid.NewString() + pprofFile, err := os.CreateTemp("", id+".pprof") + if err != nil { + log.Error("Creating temporary file for profiling CPU", "error", err) + return false + } + traceFile, err := os.CreateTemp("", id+".trace") + if err != nil { + log.Error("Creating temporary file for tracing", "error", err) + return false + } + if err := pprof.StartCPUProfile(pprofFile); err != nil { + log.Error("Starting CPU profiling", "error", err) + deleteFiles(pprofFile, traceFile) + return false + } + if err := trace.Start(traceFile); err != nil { + log.Error("Starting tracing", "error", err) + } + start := time.Now() + res := s.createBlock(ctx) + elapsed := time.Since(start) + pprof.StopCPUProfile() + trace.Stop() + closeFiles(pprofFile, traceFile) + if elapsed > 5*time.Second { + log.Error("Block creation took longer than 5 seconds", "pprof", pprofFile.Name()) + return res + } + deleteFiles(pprofFile, traceFile) + return res +} + func (s *Sequencer) createBlock(ctx context.Context) (returnValue bool) { var queueItems []txQueueItem var totalBatchSize int @@ -1088,7 +1151,12 @@ func (s *Sequencer) Start(ctxIn context.Context) error { s.CallIteratively(func(ctx context.Context) time.Duration { nextBlock := time.Now().Add(s.config().MaxBlockSpeed) - madeBlock := s.createBlock(ctx) + var madeBlock bool + if s.enableProfiling { + s.createBlockWithProfiling(ctx) + } else { + madeBlock = s.createBlock(ctx) + } if madeBlock { // Note: this may return a negative duration, but timers are fine with that (they treat negative durations as 0). return time.Until(nextBlock)