From 935cb216402c9693faf86d75a7fbb045109ed4a3 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Sat, 20 Apr 2024 02:24:38 +0200 Subject: [PATCH 01/12] expose more pebble open options, add compact/debt and compact/inprogress metrics --- core/rawdb/database.go | 10 ++++--- ethdb/pebble/extraoptions.go | 27 +++++++++++++++++ ethdb/pebble/pebble.go | 53 ++++++++++++++++++++++++++++----- ethdb/pebble/pebble_non64bit.go | 2 +- node/node.go | 37 +++++++++++++++-------- 5 files changed, 104 insertions(+), 25 deletions(-) create mode 100644 ethdb/pebble/extraoptions.go diff --git a/core/rawdb/database.go b/core/rawdb/database.go index 1d7b7d1ca8..7b04af9a83 100644 --- a/core/rawdb/database.go +++ b/core/rawdb/database.go @@ -324,8 +324,8 @@ func NewLevelDBDatabase(file string, cache int, handles int, namespace string, r // NewPebbleDBDatabase creates a persistent key-value database without a freezer // moving immutable chain segments into cold storage. -func NewPebbleDBDatabase(file string, cache int, handles int, namespace string, readonly, ephemeral bool) (ethdb.Database, error) { - db, err := pebble.New(file, cache, handles, namespace, readonly, ephemeral) +func NewPebbleDBDatabase(file string, cache int, handles int, namespace string, readonly, ephemeral bool, extraOptions *pebble.ExtraOptions) (ethdb.Database, error) { + db, err := pebble.New(file, cache, handles, namespace, readonly, ephemeral, extraOptions) if err != nil { return nil, err } @@ -366,6 +366,8 @@ type OpenOptions struct { // Ephemeral means that filesystem sync operations should be avoided: data integrity in the face of // a crash is not important. This option should typically be used in tests. Ephemeral bool + + PebbleExtraOptions *pebble.ExtraOptions } // openKeyValueDatabase opens a disk-based key-value database, e.g. leveldb or pebble. @@ -387,7 +389,7 @@ func openKeyValueDatabase(o OpenOptions) (ethdb.Database, error) { } if o.Type == dbPebble || existingDb == dbPebble { log.Info("Using pebble as the backing database") - return NewPebbleDBDatabase(o.Directory, o.Cache, o.Handles, o.Namespace, o.ReadOnly, o.Ephemeral) + return NewPebbleDBDatabase(o.Directory, o.Cache, o.Handles, o.Namespace, o.ReadOnly, o.Ephemeral, o.PebbleExtraOptions) } if o.Type == dbLeveldb || existingDb == dbLeveldb { log.Info("Using leveldb as the backing database") @@ -395,7 +397,7 @@ func openKeyValueDatabase(o OpenOptions) (ethdb.Database, error) { } // No pre-existing database, no user-requested one either. Default to Pebble. log.Info("Defaulting to pebble as the backing database") - return NewPebbleDBDatabase(o.Directory, o.Cache, o.Handles, o.Namespace, o.ReadOnly, o.Ephemeral) + return NewPebbleDBDatabase(o.Directory, o.Cache, o.Handles, o.Namespace, o.ReadOnly, o.Ephemeral, o.PebbleExtraOptions) } // Open opens both a disk-based key-value database such as leveldb or pebble, but also diff --git a/ethdb/pebble/extraoptions.go b/ethdb/pebble/extraoptions.go new file mode 100644 index 0000000000..cafc809e6b --- /dev/null +++ b/ethdb/pebble/extraoptions.go @@ -0,0 +1,27 @@ +package pebble + +import "time" + +type ExtraOptions struct { + BytesPerSync int + L0CompactionFileThreshold int + L0CompactionThreshold int + L0StopWritesThreshold int + LBaseMaxBytes int64 + MaxConcurrentCompactions func() int + DisableAutomaticCompactions bool + WALBytesPerSync int + WALDir string + WALMinSyncInterval func() time.Duration + TargetByteDeletionRate int + Experimental ExtraOptionsExperimental +} + +type ExtraOptionsExperimental struct { + L0CompactionConcurrency int + CompactionDebtConcurrency uint64 + ReadCompactionRate int64 + ReadSamplingMultiplier int64 + MaxWriterConcurrency int + ForceWriterParallelism bool +} diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index 9c2d95a077..e53046b882 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -47,7 +47,7 @@ const ( // metricsGatheringInterval specifies the interval to retrieve pebble database // compaction, io and pause stats to report to the user. - metricsGatheringInterval = 3 * time.Second + metricsGatheringInterval = 100 * time.Millisecond //3 * time.Second ) // Database is a persistent key-value store based on the pebble storage engine. @@ -71,6 +71,9 @@ type Database struct { seekCompGauge metrics.Gauge // Gauge for tracking the number of table compaction caused by read opt manualMemAllocGauge metrics.Gauge // Gauge for tracking amount of non-managed memory currently allocated + compDebtGauge metrics.Gauge + compInProgressGauge metrics.Gauge + levelsGauge []metrics.Gauge // Gauge for tracking the number of tables in levels quitLock sync.RWMutex // Mutex protecting the quit channel and the closed flag @@ -138,7 +141,7 @@ func (l panicLogger) Fatalf(format string, args ...interface{}) { // New returns a wrapped pebble DB object. The namespace is the prefix that the // metrics reporting should use for surfacing internal stats. -func New(file string, cache int, handles int, namespace string, readonly bool, ephemeral bool) (*Database, error) { +func New(file string, cache int, handles int, namespace string, readonly bool, ephemeral bool, extraOptions *ExtraOptions) (*Database, error) { // Ensure we have some minimal caching and file guarantees if cache < minCache { cache = minCache @@ -181,7 +184,16 @@ func New(file string, cache int, handles int, namespace string, readonly bool, e quitChan: make(chan chan error), writeOptions: &pebble.WriteOptions{Sync: !ephemeral}, } - opt := &pebble.Options{ + + if extraOptions == nil { + extraOptions = &ExtraOptions{} + } + if extraOptions.MaxConcurrentCompactions == nil { + extraOptions.MaxConcurrentCompactions = func() int { return runtime.NumCPU() } + } + + var opt *pebble.Options + opt = &pebble.Options{ // Pebble has a single combined cache area and the write // buffers are taken from this too. Assign all available // memory allowance for cache. @@ -195,16 +207,16 @@ func New(file string, cache int, handles int, namespace string, readonly bool, e // MemTableStopWritesThreshold places a hard limit on the size // of the existent MemTables(including the frozen one). // Note, this must be the number of tables not the size of all memtables - // according to https://github.com/cockroachdb/pebble/blob/master/options.go#L738-L742 + // according to https://github.com/cockroachdb/pebble/blob/master/extraOptions.go#L738-L742 // and to https://github.com/cockroachdb/pebble/blob/master/db.go#L1892-L1903. MemTableStopWritesThreshold: memTableLimit, // The default compaction concurrency(1 thread), // Here use all available CPUs for faster compaction. - MaxConcurrentCompactions: func() int { return runtime.NumCPU() }, + MaxConcurrentCompactions: extraOptions.MaxConcurrentCompactions, - // Per-level options. Options for at least one level must be specified. The - // options for the last level are used for all subsequent levels. + // Per-level extraOptions. Options for at least one level must be specified. The + // extraOptions for the last level are used for all subsequent levels. Levels: []pebble.LevelOptions{ {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, @@ -222,11 +234,32 @@ func New(file string, cache int, handles int, namespace string, readonly bool, e WriteStallEnd: db.onWriteStallEnd, }, Logger: panicLogger{}, // TODO(karalabe): Delete when this is upstreamed in Pebble + + BytesPerSync: extraOptions.BytesPerSync, + L0CompactionFileThreshold: extraOptions.L0CompactionFileThreshold, + L0CompactionThreshold: extraOptions.L0CompactionThreshold, + L0StopWritesThreshold: extraOptions.L0StopWritesThreshold, + LBaseMaxBytes: extraOptions.LBaseMaxBytes, + DisableAutomaticCompactions: extraOptions.DisableAutomaticCompactions, + WALBytesPerSync: extraOptions.WALBytesPerSync, + WALDir: extraOptions.WALDir, + WALMinSyncInterval: extraOptions.WALMinSyncInterval, + TargetByteDeletionRate: extraOptions.TargetByteDeletionRate, } + // Disable seek compaction explicitly. Check https://github.com/ethereum/go-ethereum/pull/20130 // for more details. opt.Experimental.ReadSamplingMultiplier = -1 + if opt.Experimental.ReadSamplingMultiplier != 0 { + opt.Experimental.ReadSamplingMultiplier = extraOptions.Experimental.ReadSamplingMultiplier + } + opt.Experimental.L0CompactionConcurrency = extraOptions.Experimental.L0CompactionConcurrency + opt.Experimental.CompactionDebtConcurrency = extraOptions.Experimental.CompactionDebtConcurrency + opt.Experimental.ReadCompactionRate = extraOptions.Experimental.ReadCompactionRate + opt.Experimental.MaxWriterConcurrency = extraOptions.Experimental.MaxWriterConcurrency + opt.Experimental.ForceWriterParallelism = extraOptions.Experimental.ForceWriterParallelism + // Open the db and recover any potential corruptions innerDB, err := pebble.Open(file, opt) if err != nil { @@ -248,6 +281,9 @@ func New(file string, cache int, handles int, namespace string, readonly bool, e db.seekCompGauge = metrics.NewRegisteredGauge(namespace+"compact/seek", nil) db.manualMemAllocGauge = metrics.NewRegisteredGauge(namespace+"memory/manualalloc", nil) + db.compDebtGauge = metrics.NewRegisteredGauge(namespace+"compact/debt", nil) + db.compInProgressGauge = metrics.NewRegisteredGauge(namespace+"compact/inprogress", nil) + // Start up the metrics gathering and return go db.meter(metricsGatheringInterval, namespace) return db, nil @@ -525,6 +561,9 @@ func (d *Database) meter(refresh time.Duration, namespace string) { d.level0CompGauge.Update(level0CompCount) d.seekCompGauge.Update(stats.Compact.ReadCount) + d.compDebtGauge.Update(int64(stats.Compact.EstimatedDebt)) + d.compInProgressGauge.Update(stats.Compact.NumInProgress) + for i, level := range stats.Levels { // Append metrics for additional layers if i >= len(d.levelsGauge) { diff --git a/ethdb/pebble/pebble_non64bit.go b/ethdb/pebble/pebble_non64bit.go index b028c7e2e9..bd503aadea 100644 --- a/ethdb/pebble/pebble_non64bit.go +++ b/ethdb/pebble/pebble_non64bit.go @@ -8,6 +8,6 @@ import ( "github.com/ethereum/go-ethereum/ethdb" ) -func New(file string, cache int, handles int, namespace string, readonly bool, ephemeral bool) (ethdb.Database, error) { +func New(file string, cache int, handles int, namespace string, readonly bool, ephemeral bool, extraOptions *ExtraOptions) (ethdb.Database, error) { return nil, errors.New("pebble is not supported on this platform") } diff --git a/node/node.go b/node/node.go index 8b6d28e769..a0e1c84b56 100644 --- a/node/node.go +++ b/node/node.go @@ -34,6 +34,7 @@ import ( "github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethereum/go-ethereum/core/rawdb" "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/ethdb/pebble" "github.com/ethereum/go-ethereum/event" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/p2p" @@ -744,6 +745,10 @@ func (n *Node) EventMux() *event.TypeMux { // previous can be found) from within the node's instance directory. If the node is // ephemeral, a memory database is returned. func (n *Node) OpenDatabase(name string, cache, handles int, namespace string, readonly bool) (ethdb.Database, error) { + return n.OpenDatabaseWithExtraOptions(name, cache, handles, namespace, readonly, nil) +} + +func (n *Node) OpenDatabaseWithExtraOptions(name string, cache, handles int, namespace string, readonly bool, pebbleExtraOptions *pebble.ExtraOptions) (ethdb.Database, error) { n.lock.Lock() defer n.lock.Unlock() if n.state == closedState { @@ -756,12 +761,13 @@ func (n *Node) OpenDatabase(name string, cache, handles int, namespace string, r db = rawdb.NewMemoryDatabase() } else { db, err = rawdb.Open(rawdb.OpenOptions{ - Type: n.config.DBEngine, - Directory: n.ResolvePath(name), - Namespace: namespace, - Cache: cache, - Handles: handles, - ReadOnly: readonly, + Type: n.config.DBEngine, + Directory: n.ResolvePath(name), + Namespace: namespace, + Cache: cache, + Handles: handles, + ReadOnly: readonly, + PebbleExtraOptions: pebbleExtraOptions, }) } @@ -777,6 +783,10 @@ func (n *Node) OpenDatabase(name string, cache, handles int, namespace string, r // database to immutable append-only files. If the node is an ephemeral one, a // memory database is returned. func (n *Node) OpenDatabaseWithFreezer(name string, cache, handles int, ancient string, namespace string, readonly bool) (ethdb.Database, error) { + return n.OpenDatabaseWithFreezerWithExtraOptions(name, cache, handles, ancient, namespace, readonly, nil) +} + +func (n *Node) OpenDatabaseWithFreezerWithExtraOptions(name string, cache, handles int, ancient string, namespace string, readonly bool, pebbleExtraOptions *pebble.ExtraOptions) (ethdb.Database, error) { n.lock.Lock() defer n.lock.Unlock() if n.state == closedState { @@ -788,13 +798,14 @@ func (n *Node) OpenDatabaseWithFreezer(name string, cache, handles int, ancient db = rawdb.NewMemoryDatabase() } else { db, err = rawdb.Open(rawdb.OpenOptions{ - Type: n.config.DBEngine, - Directory: n.ResolvePath(name), - AncientsDirectory: n.ResolveAncient(name, ancient), - Namespace: namespace, - Cache: cache, - Handles: handles, - ReadOnly: readonly, + Type: n.config.DBEngine, + Directory: n.ResolvePath(name), + AncientsDirectory: n.ResolveAncient(name, ancient), + Namespace: namespace, + Cache: cache, + Handles: handles, + ReadOnly: readonly, + PebbleExtraOptions: pebbleExtraOptions, }) } From 9e62e652e211a47ad1c71a428b4a7ea6b96ae710 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Mon, 22 Apr 2024 16:20:18 +0200 Subject: [PATCH 02/12] fix lint err --- ethdb/pebble/pebble.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index e53046b882..2ac400d32f 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -192,8 +192,7 @@ func New(file string, cache int, handles int, namespace string, readonly bool, e extraOptions.MaxConcurrentCompactions = func() int { return runtime.NumCPU() } } - var opt *pebble.Options - opt = &pebble.Options{ + opt := &pebble.Options{ // Pebble has a single combined cache area and the write // buffers are taken from this too. Assign all available // memory allowance for cache. From d6428a6842a8c7d39821e74662fe3e0af34babd7 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Tue, 23 Apr 2024 19:39:43 +0200 Subject: [PATCH 03/12] add pebble layers extra options --- ethdb/pebble/extraoptions.go | 5 +++++ ethdb/pebble/pebble.go | 27 +++++++++++++++++---------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/ethdb/pebble/extraoptions.go b/ethdb/pebble/extraoptions.go index cafc809e6b..7fdea8dbcb 100644 --- a/ethdb/pebble/extraoptions.go +++ b/ethdb/pebble/extraoptions.go @@ -15,6 +15,7 @@ type ExtraOptions struct { WALMinSyncInterval func() time.Duration TargetByteDeletionRate int Experimental ExtraOptionsExperimental + Levels []ExtraLevelOptions } type ExtraOptionsExperimental struct { @@ -25,3 +26,7 @@ type ExtraOptionsExperimental struct { MaxWriterConcurrency int ForceWriterParallelism bool } + +type ExtraLevelOptions struct { + TargetFileSize int64 +} diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index 2ac400d32f..662a739039 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -191,7 +191,22 @@ func New(file string, cache int, handles int, namespace string, readonly bool, e if extraOptions.MaxConcurrentCompactions == nil { extraOptions.MaxConcurrentCompactions = func() int { return runtime.NumCPU() } } - + var levels []pebble.LevelOptions + if len(extraOptions.Levels) == 0 { + levels = []pebble.LevelOptions{ + {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, + {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, + {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, + {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, + {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, + {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, + {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, + } + } else { + for _, level := range extraOptions.Levels { + levels = append(levels, pebble.LevelOptions{TargetFileSize: level.TargetFileSize, FilterPolicy: bloom.FilterPolicy(10)}) + } + } opt := &pebble.Options{ // Pebble has a single combined cache area and the write // buffers are taken from this too. Assign all available @@ -216,15 +231,7 @@ func New(file string, cache int, handles int, namespace string, readonly bool, e // Per-level extraOptions. Options for at least one level must be specified. The // extraOptions for the last level are used for all subsequent levels. - Levels: []pebble.LevelOptions{ - {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, - {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, - {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, - {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, - {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, - {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, - {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, - }, + Levels: levels, ReadOnly: readonly, EventListener: &pebble.EventListener{ CompactionBegin: db.onCompactionBegin, From 509f1114edd9d4e367cedfe4011ceed5766e3f07 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 24 Apr 2024 15:11:05 +0200 Subject: [PATCH 04/12] add block size and index block size pebble options --- ethdb/pebble/extraoptions.go | 2 ++ ethdb/pebble/pebble.go | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ethdb/pebble/extraoptions.go b/ethdb/pebble/extraoptions.go index 7fdea8dbcb..bef902f495 100644 --- a/ethdb/pebble/extraoptions.go +++ b/ethdb/pebble/extraoptions.go @@ -28,5 +28,7 @@ type ExtraOptionsExperimental struct { } type ExtraLevelOptions struct { + BlockSize int + IndexBlockSize int TargetFileSize int64 } diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index 662a739039..03047d027d 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -204,7 +204,12 @@ func New(file string, cache int, handles int, namespace string, readonly bool, e } } else { for _, level := range extraOptions.Levels { - levels = append(levels, pebble.LevelOptions{TargetFileSize: level.TargetFileSize, FilterPolicy: bloom.FilterPolicy(10)}) + levels = append(levels, pebble.LevelOptions{ + BlockSize: level.BlockSize, + IndexBlockSize: level.IndexBlockSize, + TargetFileSize: level.TargetFileSize, + FilterPolicy: bloom.FilterPolicy(10), + }) } } opt := &pebble.Options{ From 040c6f787056826112340ce0b4e5b8d43503f20a Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 24 Apr 2024 15:37:40 +0200 Subject: [PATCH 05/12] add mem-table-stop-writes-threshold pebble extra option --- ethdb/pebble/extraoptions.go | 1 + ethdb/pebble/pebble.go | 57 ++++++++++++++++++------------------ 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/ethdb/pebble/extraoptions.go b/ethdb/pebble/extraoptions.go index bef902f495..787167c1cc 100644 --- a/ethdb/pebble/extraoptions.go +++ b/ethdb/pebble/extraoptions.go @@ -8,6 +8,7 @@ type ExtraOptions struct { L0CompactionThreshold int L0StopWritesThreshold int LBaseMaxBytes int64 + MemTableStopWritesThreshold int MaxConcurrentCompactions func() int DisableAutomaticCompactions bool WALBytesPerSync int diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index 03047d027d..28cc1f78fe 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -142,6 +142,34 @@ func (l panicLogger) Fatalf(format string, args ...interface{}) { // New returns a wrapped pebble DB object. The namespace is the prefix that the // metrics reporting should use for surfacing internal stats. func New(file string, cache int, handles int, namespace string, readonly bool, ephemeral bool, extraOptions *ExtraOptions) (*Database, error) { + if extraOptions == nil { + extraOptions = &ExtraOptions{} + } + if extraOptions.MaxConcurrentCompactions == nil { + extraOptions.MaxConcurrentCompactions = func() int { return runtime.NumCPU() } + } + var levels []pebble.LevelOptions + if len(extraOptions.Levels) == 0 { + levels = []pebble.LevelOptions{ + {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, + {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, + {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, + {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, + {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, + {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, + {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, + } + } else { + for _, level := range extraOptions.Levels { + levels = append(levels, pebble.LevelOptions{ + BlockSize: level.BlockSize, + IndexBlockSize: level.IndexBlockSize, + TargetFileSize: level.TargetFileSize, + FilterPolicy: bloom.FilterPolicy(10), + }) + } + } + // Ensure we have some minimal caching and file guarantees if cache < minCache { cache = minCache @@ -166,7 +194,7 @@ func New(file string, cache int, handles int, namespace string, readonly bool, e // Two memory tables is configured which is identical to leveldb, // including a frozen memory table and another live one. - memTableLimit := 2 + memTableLimit := extraOptions.MemTableStopWritesThreshold memTableSize := cache * 1024 * 1024 / 2 / memTableLimit // The memory table size is currently capped at maxMemTableSize-1 due to a @@ -185,33 +213,6 @@ func New(file string, cache int, handles int, namespace string, readonly bool, e writeOptions: &pebble.WriteOptions{Sync: !ephemeral}, } - if extraOptions == nil { - extraOptions = &ExtraOptions{} - } - if extraOptions.MaxConcurrentCompactions == nil { - extraOptions.MaxConcurrentCompactions = func() int { return runtime.NumCPU() } - } - var levels []pebble.LevelOptions - if len(extraOptions.Levels) == 0 { - levels = []pebble.LevelOptions{ - {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, - {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, - {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, - {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, - {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, - {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, - {TargetFileSize: 2 * 1024 * 1024, FilterPolicy: bloom.FilterPolicy(10)}, - } - } else { - for _, level := range extraOptions.Levels { - levels = append(levels, pebble.LevelOptions{ - BlockSize: level.BlockSize, - IndexBlockSize: level.IndexBlockSize, - TargetFileSize: level.TargetFileSize, - FilterPolicy: bloom.FilterPolicy(10), - }) - } - } opt := &pebble.Options{ // Pebble has a single combined cache area and the write // buffers are taken from this too. Assign all available From 5e8d11c191c4b88e53ca53e69b7854efe89487fd Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 24 Apr 2024 16:00:16 +0200 Subject: [PATCH 06/12] fix default MemTableStopWritesThreshold --- ethdb/pebble/pebble.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index 28cc1f78fe..c86b9b9a1a 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -145,6 +145,9 @@ func New(file string, cache int, handles int, namespace string, readonly bool, e if extraOptions == nil { extraOptions = &ExtraOptions{} } + if extraOptions.MemTableStopWritesThreshold <= 0 { + extraOptions.MemTableStopWritesThreshold = 2 + } if extraOptions.MaxConcurrentCompactions == nil { extraOptions.MaxConcurrentCompactions = func() int { return runtime.NumCPU() } } From 76745ffed5649d82beea1766aadeb4420aa62d48 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 25 Apr 2024 00:47:05 +0200 Subject: [PATCH 07/12] add pebble commit delay metrics --- ethdb/pebble/pebble.go | 78 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index c86b9b9a1a..ae0a9ffda3 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -74,6 +74,24 @@ type Database struct { compDebtGauge metrics.Gauge compInProgressGauge metrics.Gauge + commitCountMeter metrics.Meter + commitTotalDurationMeter metrics.Meter + commitSemaphoreWaitMeter metrics.Meter + commitWALQueueWaitMeter metrics.Meter + commitMemTableWriteStallMeter metrics.Meter + commitL0ReadAmpWriteStallMeter metrics.Meter + commitWALRotationMeter metrics.Meter + commitWaitMeter metrics.Meter + + commitCount atomic.Int64 + commitTotalDuration atomic.Int64 + commitSemaphoreWait atomic.Int64 + commitWALQueueWait atomic.Int64 + commitMemTableWriteStall atomic.Int64 + commitL0ReadAmpWriteStall atomic.Int64 + commitWALRotation atomic.Int64 + commitWait atomic.Int64 + levelsGauge []metrics.Gauge // Gauge for tracking the number of tables in levels quitLock sync.RWMutex // Mutex protecting the quit channel and the closed flag @@ -299,6 +317,15 @@ func New(file string, cache int, handles int, namespace string, readonly bool, e db.compDebtGauge = metrics.NewRegisteredGauge(namespace+"compact/debt", nil) db.compInProgressGauge = metrics.NewRegisteredGauge(namespace+"compact/inprogress", nil) + db.commitCountMeter = metrics.NewRegisteredMeter(namespace+"commit/counter", nil) + db.commitTotalDurationMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/total", nil) + db.commitSemaphoreWaitMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/semaphorewait", nil) + db.commitWALQueueWaitMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/walqueuewait", nil) + db.commitMemTableWriteStallMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/memtablewritestall", nil) + db.commitL0ReadAmpWriteStallMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/l0readampwritestall", nil) + db.commitWALRotationMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/walrotation", nil) + db.commitWaitMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/commitwait", nil) + // Start up the metrics gathering and return go db.meter(metricsGatheringInterval, namespace) return db, nil @@ -511,6 +538,15 @@ func (d *Database) meter(refresh time.Duration, namespace string) { compReads [2]int64 nWrites [2]int64 + + commitCounts [2]int64 + commitTotalDurations [2]int64 + commitSemaphoreWaits [2]int64 + commitWALQueueWaits [2]int64 + commitMemTableWriteStalls [2]int64 + commitL0ReadAmpWriteStalls [2]int64 + commitWALRotations [2]int64 + commitWaits [2]int64 ) // Iterate ad infinitum and collect the stats @@ -526,6 +562,15 @@ func (d *Database) meter(refresh time.Duration, namespace string) { writeDelayTime = d.writeDelayTime.Load() nonLevel0CompCount = int64(d.nonLevel0Comp.Load()) level0CompCount = int64(d.level0Comp.Load()) + + commitCount = d.commitCount.Load() + commitTotalDuration = d.commitTotalDuration.Load() + commitSemaphoreWait = d.commitSemaphoreWait.Load() + commitWALQueueWait = d.commitWALQueueWait.Load() + commitMemTableWriteStall = d.commitMemTableWriteStall.Load() + commitL0ReadAmpWriteStall = d.commitL0ReadAmpWriteStall.Load() + commitWALRotation = d.commitWALRotation.Load() + commitWait = d.commitWait.Load() ) writeDelayTimes[i%2] = writeDelayTime writeDelayCounts[i%2] = writeDelayCount @@ -576,6 +621,24 @@ func (d *Database) meter(refresh time.Duration, namespace string) { d.level0CompGauge.Update(level0CompCount) d.seekCompGauge.Update(stats.Compact.ReadCount) + commitCounts[i%2] = commitCount + commitTotalDurations[i%2] = commitTotalDuration + commitSemaphoreWaits[i%2] = commitSemaphoreWait + commitWALQueueWaits[i%2] = commitWALQueueWait + commitMemTableWriteStalls[i%2] = commitMemTableWriteStall + commitL0ReadAmpWriteStalls[i%2] = commitL0ReadAmpWriteStall + commitWALRotations[i%2] = commitWALRotation + commitWaits[i%2] = commitWait + + d.commitCountMeter.Mark(commitCounts[i%2] - commitCounts[(i-1)%2]) + d.commitTotalDurationMeter.Mark(commitTotalDurations[i%2] - commitTotalDurations[(i-1)%2]) + d.commitSemaphoreWaitMeter.Mark(commitSemaphoreWaits[i%2] - commitSemaphoreWaits[(i-1)%2]) + d.commitWALQueueWaitMeter.Mark(commitWALQueueWaits[i%2] - commitWALQueueWaits[(i-1)%2]) + d.commitMemTableWriteStallMeter.Mark(commitMemTableWriteStalls[i%2] - commitMemTableWriteStalls[(i-1)%2]) + d.commitL0ReadAmpWriteStallMeter.Mark(commitL0ReadAmpWriteStalls[i%2] - commitL0ReadAmpWriteStalls[(i-1)%2]) + d.commitWALRotationMeter.Mark(commitWALRotations[i%2] - commitWALRotations[(i-1)%2]) + d.commitWaitMeter.Mark(commitWaits[i%2] - commitWaits[(i-1)%2]) + d.compDebtGauge.Update(int64(stats.Compact.EstimatedDebt)) d.compInProgressGauge.Update(stats.Compact.NumInProgress) @@ -633,7 +696,20 @@ func (b *batch) Write() error { if b.db.closed { return pebble.ErrClosed } - return b.b.Commit(b.db.writeOptions) + err := b.b.Commit(b.db.writeOptions) + if err != nil { + return err + } + stats := b.b.CommitStats() + b.db.commitCount.Add(1) + b.db.commitTotalDuration.Add(int64(stats.TotalDuration)) + b.db.commitSemaphoreWait.Add(int64(stats.SemaphoreWaitDuration)) + b.db.commitWALQueueWait.Add(int64(stats.WALQueueWaitDuration)) + b.db.commitMemTableWriteStall.Add(int64(stats.MemTableWriteStallDuration)) + b.db.commitL0ReadAmpWriteStall.Add(int64(stats.L0ReadAmpWriteStallDuration)) + b.db.commitWALRotation.Add(int64(stats.WALRotationDuration)) + b.db.commitWait.Add(int64(stats.CommitWaitDuration)) + return nil } // Reset resets the batch for reuse. From 31dcc54970876a09e13820a4a7334f39af38157d Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 25 Apr 2024 00:54:11 +0200 Subject: [PATCH 08/12] revert metrics interval change --- ethdb/pebble/pebble.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index ae0a9ffda3..5d043f4ee8 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -47,7 +47,7 @@ const ( // metricsGatheringInterval specifies the interval to retrieve pebble database // compaction, io and pause stats to report to the user. - metricsGatheringInterval = 100 * time.Millisecond //3 * time.Second + metricsGatheringInterval = 3 * time.Second ) // Database is a persistent key-value store based on the pebble storage engine. From 9f39f194d0a5b1ab1a47b1d4f83cd112f18dc4b3 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Fri, 26 Apr 2024 22:50:02 +0200 Subject: [PATCH 09/12] remove wal queue wait metric as it's not used by pebble yet --- ethdb/pebble/pebble.go | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index 5d043f4ee8..04bda86acc 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -77,7 +77,6 @@ type Database struct { commitCountMeter metrics.Meter commitTotalDurationMeter metrics.Meter commitSemaphoreWaitMeter metrics.Meter - commitWALQueueWaitMeter metrics.Meter commitMemTableWriteStallMeter metrics.Meter commitL0ReadAmpWriteStallMeter metrics.Meter commitWALRotationMeter metrics.Meter @@ -86,7 +85,6 @@ type Database struct { commitCount atomic.Int64 commitTotalDuration atomic.Int64 commitSemaphoreWait atomic.Int64 - commitWALQueueWait atomic.Int64 commitMemTableWriteStall atomic.Int64 commitL0ReadAmpWriteStall atomic.Int64 commitWALRotation atomic.Int64 @@ -320,7 +318,6 @@ func New(file string, cache int, handles int, namespace string, readonly bool, e db.commitCountMeter = metrics.NewRegisteredMeter(namespace+"commit/counter", nil) db.commitTotalDurationMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/total", nil) db.commitSemaphoreWaitMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/semaphorewait", nil) - db.commitWALQueueWaitMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/walqueuewait", nil) db.commitMemTableWriteStallMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/memtablewritestall", nil) db.commitL0ReadAmpWriteStallMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/l0readampwritestall", nil) db.commitWALRotationMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/walrotation", nil) @@ -542,7 +539,6 @@ func (d *Database) meter(refresh time.Duration, namespace string) { commitCounts [2]int64 commitTotalDurations [2]int64 commitSemaphoreWaits [2]int64 - commitWALQueueWaits [2]int64 commitMemTableWriteStalls [2]int64 commitL0ReadAmpWriteStalls [2]int64 commitWALRotations [2]int64 @@ -566,7 +562,6 @@ func (d *Database) meter(refresh time.Duration, namespace string) { commitCount = d.commitCount.Load() commitTotalDuration = d.commitTotalDuration.Load() commitSemaphoreWait = d.commitSemaphoreWait.Load() - commitWALQueueWait = d.commitWALQueueWait.Load() commitMemTableWriteStall = d.commitMemTableWriteStall.Load() commitL0ReadAmpWriteStall = d.commitL0ReadAmpWriteStall.Load() commitWALRotation = d.commitWALRotation.Load() @@ -624,7 +619,6 @@ func (d *Database) meter(refresh time.Duration, namespace string) { commitCounts[i%2] = commitCount commitTotalDurations[i%2] = commitTotalDuration commitSemaphoreWaits[i%2] = commitSemaphoreWait - commitWALQueueWaits[i%2] = commitWALQueueWait commitMemTableWriteStalls[i%2] = commitMemTableWriteStall commitL0ReadAmpWriteStalls[i%2] = commitL0ReadAmpWriteStall commitWALRotations[i%2] = commitWALRotation @@ -633,7 +627,6 @@ func (d *Database) meter(refresh time.Duration, namespace string) { d.commitCountMeter.Mark(commitCounts[i%2] - commitCounts[(i-1)%2]) d.commitTotalDurationMeter.Mark(commitTotalDurations[i%2] - commitTotalDurations[(i-1)%2]) d.commitSemaphoreWaitMeter.Mark(commitSemaphoreWaits[i%2] - commitSemaphoreWaits[(i-1)%2]) - d.commitWALQueueWaitMeter.Mark(commitWALQueueWaits[i%2] - commitWALQueueWaits[(i-1)%2]) d.commitMemTableWriteStallMeter.Mark(commitMemTableWriteStalls[i%2] - commitMemTableWriteStalls[(i-1)%2]) d.commitL0ReadAmpWriteStallMeter.Mark(commitL0ReadAmpWriteStalls[i%2] - commitL0ReadAmpWriteStalls[(i-1)%2]) d.commitWALRotationMeter.Mark(commitWALRotations[i%2] - commitWALRotations[(i-1)%2]) @@ -704,11 +697,11 @@ func (b *batch) Write() error { b.db.commitCount.Add(1) b.db.commitTotalDuration.Add(int64(stats.TotalDuration)) b.db.commitSemaphoreWait.Add(int64(stats.SemaphoreWaitDuration)) - b.db.commitWALQueueWait.Add(int64(stats.WALQueueWaitDuration)) b.db.commitMemTableWriteStall.Add(int64(stats.MemTableWriteStallDuration)) b.db.commitL0ReadAmpWriteStall.Add(int64(stats.L0ReadAmpWriteStallDuration)) b.db.commitWALRotation.Add(int64(stats.WALRotationDuration)) b.db.commitWait.Add(int64(stats.CommitWaitDuration)) + // TODO add metric for stats.WALQueueWaitDuration when it will be used by pebble (currently it is always 0) return nil } From 3ecb5979ae489902c97d7146209c35071d167be6 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Thu, 2 May 2024 17:38:21 +0200 Subject: [PATCH 10/12] use GetOrRegister* for new metrics --- ethdb/pebble/pebble.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index 621dc59109..42f2c2ca3d 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -311,16 +311,16 @@ func New(file string, cache int, handles int, namespace string, readonly bool, e db.seekCompGauge = metrics.GetOrRegisterGauge(namespace+"compact/seek", nil) db.manualMemAllocGauge = metrics.GetOrRegisterGauge(namespace+"memory/manualalloc", nil) - db.compDebtGauge = metrics.NewRegisteredGauge(namespace+"compact/debt", nil) - db.compInProgressGauge = metrics.NewRegisteredGauge(namespace+"compact/inprogress", nil) - - db.commitCountMeter = metrics.NewRegisteredMeter(namespace+"commit/counter", nil) - db.commitTotalDurationMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/total", nil) - db.commitSemaphoreWaitMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/semaphorewait", nil) - db.commitMemTableWriteStallMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/memtablewritestall", nil) - db.commitL0ReadAmpWriteStallMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/l0readampwritestall", nil) - db.commitWALRotationMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/walrotation", nil) - db.commitWaitMeter = metrics.NewRegisteredMeter(namespace+"commit/duration/commitwait", nil) + db.compDebtGauge = metrics.GetOrRegisterGauge(namespace+"compact/debt", nil) + db.compInProgressGauge = metrics.GetOrRegisterGauge(namespace+"compact/inprogress", nil) + + db.commitCountMeter = metrics.GetOrRegisterMeter(namespace+"commit/counter", nil) + db.commitTotalDurationMeter = metrics.GetOrRegisterMeter(namespace+"commit/duration/total", nil) + db.commitSemaphoreWaitMeter = metrics.GetOrRegisterMeter(namespace+"commit/duration/semaphorewait", nil) + db.commitMemTableWriteStallMeter = metrics.GetOrRegisterMeter(namespace+"commit/duration/memtablewritestall", nil) + db.commitL0ReadAmpWriteStallMeter = metrics.GetOrRegisterMeter(namespace+"commit/duration/l0readampwritestall", nil) + db.commitWALRotationMeter = metrics.GetOrRegisterMeter(namespace+"commit/duration/walrotation", nil) + db.commitWaitMeter = metrics.GetOrRegisterMeter(namespace+"commit/duration/commitwait", nil) // Start up the metrics gathering and return go db.meter(metricsGatheringInterval, namespace) From bb30724a7b0ba4cd7b2f57a7a2c964087f97f068 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 15 May 2024 21:21:01 +0200 Subject: [PATCH 11/12] revert accidental comment change --- ethdb/pebble/pebble.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index 42f2c2ca3d..d7b96c3942 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -245,7 +245,7 @@ func New(file string, cache int, handles int, namespace string, readonly bool, e // MemTableStopWritesThreshold places a hard limit on the size // of the existent MemTables(including the frozen one). // Note, this must be the number of tables not the size of all memtables - // according to https://github.com/cockroachdb/pebble/blob/master/extraOptions.go#L738-L742 + // according to https://github.com/cockroachdb/pebble/blob/master/options.go#L738-L742 // and to https://github.com/cockroachdb/pebble/blob/master/db.go#L1892-L1903. MemTableStopWritesThreshold: memTableLimit, From 5b7b36a339ac28d708bca072dc5ec8189ceadac2 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Wed, 15 May 2024 21:23:29 +0200 Subject: [PATCH 12/12] remove unnecessary new lines --- ethdb/pebble/pebble.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/ethdb/pebble/pebble.go b/ethdb/pebble/pebble.go index d7b96c3942..ab6bb1d72d 100644 --- a/ethdb/pebble/pebble.go +++ b/ethdb/pebble/pebble.go @@ -230,7 +230,6 @@ func New(file string, cache int, handles int, namespace string, readonly bool, e quitChan: make(chan chan error), writeOptions: &pebble.WriteOptions{Sync: !ephemeral}, } - opt := &pebble.Options{ // Pebble has a single combined cache area and the write // buffers are taken from this too. Assign all available @@ -276,7 +275,6 @@ func New(file string, cache int, handles int, namespace string, readonly bool, e WALMinSyncInterval: extraOptions.WALMinSyncInterval, TargetByteDeletionRate: extraOptions.TargetByteDeletionRate, } - // Disable seek compaction explicitly. Check https://github.com/ethereum/go-ethereum/pull/20130 // for more details. opt.Experimental.ReadSamplingMultiplier = -1