Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tool for database conversion #2061

Merged
merged 75 commits into from
Aug 17, 2024
Merged
Show file tree
Hide file tree
Changes from 46 commits
Commits
Show all changes
75 commits
Select commit Hold shift + click to select a range
b6ae081
add dbconv draft
magicxyyz Dec 28, 2023
291214b
Merge branch 'master' into db-conversion
magicxyyz Dec 28, 2023
a52d9ac
make lint happy
magicxyyz Dec 28, 2023
67922b5
add database conversion system test draft
magicxyyz Dec 28, 2023
5f61bc3
improve db conversion test
magicxyyz Dec 28, 2023
689e653
use start key instead of prefix, add main draft
magicxyyz Dec 30, 2023
83a22e1
fix middle key lookup
magicxyyz Jan 3, 2024
a1ea3ae
fix lint
magicxyyz Jan 3, 2024
0868fa2
Merge branch 'master' into db-conversion
magicxyyz Jan 3, 2024
d14a201
add initial progress reporting
magicxyyz Jan 4, 2024
dd4ec96
remove debug log from stats
magicxyyz Jan 4, 2024
c81e6c8
fix forking, add more stats
magicxyyz Jan 4, 2024
97ea9b7
add verification option
magicxyyz Jan 4, 2024
1e7041e
reformat progress string, add log-level option
magicxyyz Jan 4, 2024
c735874
add compaction option
magicxyyz Jan 4, 2024
83f1f57
clean ':' from log
magicxyyz Jan 4, 2024
31e5b8c
stop progress printing during compaction
magicxyyz Jan 5, 2024
e779fe7
change unit of entries per second
magicxyyz Jan 8, 2024
d7f5360
Merge branch 'master' into db-conversion
magicxyyz Jan 8, 2024
bc41c66
shorten dbconv test
magicxyyz Jan 8, 2024
62c86a5
Merge branch 'master' into db-conversion
magicxyyz Jan 10, 2024
39a5311
add dbconv to Makefile
magicxyyz Jan 10, 2024
dd33ae2
add dbconv to docker
magicxyyz Jan 11, 2024
c82675a
Merge branch 'master' into db-conversion
magicxyyz Apr 2, 2024
48f5c84
cmd/dbconv: add metrics
magicxyyz Apr 5, 2024
7cb6417
Merge branch 'pebble-extra-options' into db-conversion
magicxyyz Apr 22, 2024
59bdbfc
Merge remote-tracking branch 'origin/pebble-extra-options' into db-co…
magicxyyz Apr 23, 2024
c3b4a19
dbconv: add pebble config options
magicxyyz Apr 24, 2024
c3c6aff
Merge branch 'pebble-extra-options' into db-conversion
magicxyyz Apr 24, 2024
7e6e2d1
Merge branch 'pebble-extra-options' into db-conversion
magicxyyz Apr 24, 2024
0062b76
Merge branch 'pebble-extra-options' into db-conversion
magicxyyz Apr 24, 2024
9daf165
Merge branch 'pebble-extra-options' into db-conversion
magicxyyz Apr 24, 2024
d7dfdb8
Merge branch 'pebble-extra-options' into db-conversion
magicxyyz Apr 24, 2024
073c40e
Merge remote-tracking branch 'origin/pebble-extra-options' into db-co…
magicxyyz Apr 26, 2024
b213597
Merge branch 'master' into db-conversion
magicxyyz Jun 3, 2024
fc98de6
cmd/dbconv: remove multithreading option, update pebble extra options…
magicxyyz Jun 4, 2024
d7f5ea9
system_tests: update db_conversion_test
magicxyyz Jun 4, 2024
5eac1d4
cmd/dbconv: format numbers in progress message
magicxyyz Jun 4, 2024
88e8221
Merge branch 'master' into db-conversion
magicxyyz Jun 5, 2024
e308cf6
scripts: add initial version of convert-databases.bash
magicxyyz Jun 6, 2024
8657e51
scripts: improve convert-database script
magicxyyz Jun 11, 2024
09d0371
cmd/dbconv: return 1 on error from main binary
magicxyyz Jun 11, 2024
c47ee34
scripts: add --help flag to convert-databases.bash
magicxyyz Jun 11, 2024
bc8b85d
Merge branch 'master' into db-conversion
magicxyyz Jun 11, 2024
25ab55d
scripts: add extra flags check in convert-databases.bash
magicxyyz Jun 11, 2024
897a33e
Merge branch 'master' into db-conversion
magicxyyz Jun 11, 2024
7f9f7ef
Update cmd/dbconv/dbconv/config.go
magicxyyz Jun 21, 2024
6595a44
Merge branch 'master' into db-conversion
magicxyyz Jun 21, 2024
cfb1393
dbconv: address review comments
magicxyyz Jun 17, 2024
6a22f1d
refactor convert-databases script
magicxyyz Jun 25, 2024
a2d3507
Merge branch 'master' into db-conversion
magicxyyz Jun 25, 2024
dc24202
retab convert-databases script
magicxyyz Jun 25, 2024
bc3d784
pass default config to DBConfigAddOptions
magicxyyz Jun 28, 2024
37a826e
dbconv/stats: rename AddBytes/AddEntries to LogBytes/LogEntries
magicxyyz Jun 28, 2024
0ca882a
remove dst dirs when conversion fails
magicxyyz Jul 1, 2024
a5ebac9
Merge branch 'master' into db-conversion
magicxyyz Jul 1, 2024
1d69881
clean up conver-databases script
magicxyyz Jul 1, 2024
7527848
add unfinished convertion canary key
magicxyyz Jul 3, 2024
77eb4da
Merge branch 'master' into db-conversion
magicxyyz Jul 25, 2024
8422e35
Merge branch 'master' into db-conversion
magicxyyz Aug 7, 2024
21f2ee4
enable archive mode for HashScheme only in db conversion system test
magicxyyz Aug 7, 2024
7895656
check for canary key when initializing databases
magicxyyz Aug 13, 2024
8ec8389
fix db_conversion_test for PathScheme
magicxyyz Aug 13, 2024
ee7cba8
convert-databases: by default on conversion failure remove only unfin…
magicxyyz Aug 13, 2024
6a9e8ff
Merge branch 'master' into db-conversion
magicxyyz Aug 13, 2024
4ebfd7a
fix NodeBuilder.RestartL2Node - use l2StackConfig from builder
magicxyyz Aug 14, 2024
b0484c5
add extra checks to db conversion system test
magicxyyz Aug 14, 2024
53c448f
move UnfinishedConversionCheck to dbutil package
magicxyyz Aug 14, 2024
4d79dfc
Merge branch 'master' into db-conversion
magicxyyz Aug 14, 2024
5b61070
convert-databases.bash: fix handling directories containing spaces
magicxyyz Aug 15, 2024
3fdab93
remove comment
magicxyyz Aug 15, 2024
a78fb97
copy convert-databases script to docker
magicxyyz Aug 15, 2024
44f0e18
Merge branch 'master' into db-conversion
magicxyyz Aug 15, 2024
bc8803a
fix RestartL2Node - pass initMessage to createL2BlockChainWithStackCo…
magicxyyz Aug 16, 2024
93aaaef
Merge branch 'master' into db-conversion
tsahee Aug 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ COPY --from=node-builder /workspace/target/bin/nitro /usr/local/bin/
COPY --from=node-builder /workspace/target/bin/relay /usr/local/bin/
COPY --from=node-builder /workspace/target/bin/nitro-val /usr/local/bin/
COPY --from=node-builder /workspace/target/bin/seq-coordinator-manager /usr/local/bin/
COPY --from=node-builder /workspace/target/bin/dbconv /usr/local/bin/
magicxyyz marked this conversation as resolved.
Show resolved Hide resolved
COPY --from=machine-versions /workspace/machines /home/user/target/machines
USER root
RUN export DEBIAN_FRONTEND=noninteractive && \
Expand Down
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ push: lint test-go .make/fmt
all: build build-replay-env test-gen-proofs
@touch .make/all

build: $(patsubst %,$(output_root)/bin/%, nitro deploy relay daserver datool seq-coordinator-invalidate nitro-val seq-coordinator-manager)
build: $(patsubst %,$(output_root)/bin/%, nitro deploy relay daserver datool seq-coordinator-invalidate nitro-val seq-coordinator-manager dbconv)
@printf $(done)

build-node-deps: $(go_source) build-prover-header build-prover-lib build-jit .make/solgen .make/cbrotli-lib
Expand Down Expand Up @@ -266,6 +266,9 @@ $(output_root)/bin/nitro-val: $(DEP_PREDICATE) build-node-deps
$(output_root)/bin/seq-coordinator-manager: $(DEP_PREDICATE) build-node-deps
go build $(GOLANG_PARAMS) -o $@ "$(CURDIR)/cmd/seq-coordinator-manager"

$(output_root)/bin/dbconv: $(DEP_PREDICATE) build-node-deps
go build $(GOLANG_PARAMS) -o $@ "$(CURDIR)/cmd/dbconv"

# recompile wasm, but don't change timestamp unless files differ
$(replay_wasm): $(DEP_PREDICATE) $(go_source) .make/solgen
mkdir -p `dirname $(replay_wasm)`
Expand Down
82 changes: 82 additions & 0 deletions cmd/dbconv/dbconv/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package dbconv

import (
"errors"
"fmt"

"github.com/offchainlabs/nitro/cmd/conf"
"github.com/offchainlabs/nitro/cmd/genericconf"
"github.com/offchainlabs/nitro/execution/gethexec"
flag "github.com/spf13/pflag"
)

type DBConfig struct {
Data string `koanf:"data"`
DBEngine string `koanf:"db-engine"`
Handles int `koanf:"handles"`
Cache int `koanf:"cache"`
Namespace string `koanf:"namespace"`
Pebble conf.PebbleConfig `koanf:"pebble"`
}

var DBConfigDefault = DBConfig{
Handles: conf.PersistentConfigDefault.Handles,
Cache: gethexec.DefaultCachingConfig.DatabaseCache,
Pebble: conf.PebbleConfigDefault,
}

func DBConfigAddOptions(prefix string, f *flag.FlagSet, defaultNamespace string) {
magicxyyz marked this conversation as resolved.
Show resolved Hide resolved
f.String(prefix+".data", DBConfigDefault.Data, "directory of stored chain state")
f.String(prefix+".db-engine", DBConfigDefault.DBEngine, "backing database implementation to use ('leveldb' or 'pebble')")
f.Int(prefix+".handles", DBConfigDefault.Handles, "number of file descriptor handles to use for the database")
magicxyyz marked this conversation as resolved.
Show resolved Hide resolved
f.Int(prefix+".cache", DBConfigDefault.Cache, "the capacity(in megabytes) of the data caching")
f.String(prefix+".namespace", defaultNamespace, "metrics namespace")
conf.PebbleConfigAddOptions(prefix+".pebble", f)
}

type DBConvConfig struct {
Src DBConfig `koanf:"src"`
Dst DBConfig `koanf:"dst"`
IdealBatchSize int `koanf:"ideal-batch-size"`
Convert bool `koanf:"convert"`
Compact bool `koanf:"compact"`
Verify int `koanf:"verify"`
LogLevel string `koanf:"log-level"`
LogType string `koanf:"log-type"`
Metrics bool `koanf:"metrics"`
MetricsServer genericconf.MetricsServerConfig `koanf:"metrics-server"`
}

var DefaultDBConvConfig = DBConvConfig{
IdealBatchSize: 100 * 1024 * 1024, // 100 MB
Convert: false,
Compact: false,
Verify: 0,
LogLevel: "INFO",
LogType: "plaintext",
Metrics: false,
MetricsServer: genericconf.MetricsServerConfigDefault,
}

func DBConvConfigAddOptions(f *flag.FlagSet) {
DBConfigAddOptions("src", f, "srcdb/")
DBConfigAddOptions("dst", f, "destdb/")
f.Int("ideal-batch-size", DefaultDBConvConfig.IdealBatchSize, "ideal write batch size")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick:

Suggested change
f.Int("ideal-batch-size", DefaultDBConvConfig.IdealBatchSize, "ideal write batch size")
f.Int("ideal-batch-size", DefaultDBConvConfig.IdealBatchSize, "ideal write batch size in bytes")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

included in: #2591

f.Bool("convert", DefaultDBConvConfig.Convert, "enables conversion step")
f.Bool("compact", DefaultDBConvConfig.Compact, "enables compaction step")
f.Int("verify", DefaultDBConvConfig.Verify, "enables verification step (0 = disabled, 1 = only keys, 2 = keys and values)")
f.String("log-level", DefaultDBConvConfig.LogLevel, "log level, valid values are CRIT, ERROR, WARN, INFO, DEBUG, TRACE")
f.String("log-type", DefaultDBConvConfig.LogType, "log type (plaintext or json)")
f.Bool("metrics", DefaultDBConvConfig.Metrics, "enable metrics")
genericconf.MetricsServerAddOptions("metrics-server", f)
}

func (c *DBConvConfig) Validate() error {
if c.Verify < 0 || c.Verify > 2 {
return fmt.Errorf("Invalid verify config value: %v", c.Verify)
}
if !c.Convert && c.Verify == 0 && !c.Compact {
return errors.New("nothing to be done, conversion, verification and compaction disabled")
}
diegoximenes marked this conversation as resolved.
Show resolved Hide resolved
return nil
}
152 changes: 152 additions & 0 deletions cmd/dbconv/dbconv/dbconv.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
package dbconv

import (
"bytes"
"context"
"fmt"
"time"

"github.com/ethereum/go-ethereum/core/rawdb"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
)

type DBConverter struct {
config *DBConvConfig
stats Stats

src ethdb.Database
dst ethdb.Database
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure why is useful to add those field to this struct.

Those values are built and closed in the same function call, e.g., Convert opens a ``dst` in the beginning of its execution, and it is closed in the end of its execution.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch, removed those

}

func NewDBConverter(config *DBConvConfig) *DBConverter {
return &DBConverter{
config: config,
}
}

func openDB(config *DBConfig, name string, readonly bool) (ethdb.Database, error) {
return rawdb.Open(rawdb.OpenOptions{
Type: config.DBEngine,
Directory: config.Data,
AncientsDirectory: "", // don't open freezer
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why we don't convert the freezer?
Would be interesting to add a comment here explaining that?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the freezer has format independent of db-engine, it only needs to be copied or moved

Namespace: config.Namespace,
Cache: config.Cache,
Handles: config.Handles,
ReadOnly: readonly,
PebbleExtraOptions: config.Pebble.ExtraOptions(name),
})
}

func (c *DBConverter) Convert(ctx context.Context) error {
var err error
defer c.Close()
c.src, err = openDB(&c.config.Src, "src", true)
if err != nil {
return err
}
c.dst, err = openDB(&c.config.Dst, "dst", false)
if err != nil {
return err
}
c.stats.Reset()
it := c.src.NewIterator(nil, nil)
defer it.Release()
batch := c.dst.NewBatch()
entriesInBatch := 0
for it.Next() && ctx.Err() == nil {
tsahee marked this conversation as resolved.
Show resolved Hide resolved
if err = batch.Put(it.Key(), it.Value()); err != nil {
return err
}
entriesInBatch++
if batchSize := batch.ValueSize(); batchSize >= c.config.IdealBatchSize {
if err = batch.Write(); err != nil {
return err
}
c.stats.AddEntries(int64(entriesInBatch))
c.stats.AddBytes(int64(batchSize))
magicxyyz marked this conversation as resolved.
Show resolved Hide resolved
batch.Reset()
entriesInBatch = 0
}
}
if err = ctx.Err(); err == nil {
batchSize := batch.ValueSize()
err = batch.Write()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Return err if is non nil here, before updating stats.

c.stats.AddEntries(int64(entriesInBatch))
c.stats.AddBytes(int64(batchSize))
}
return err
}

func (c *DBConverter) CompactDestination() error {
var err error
c.dst, err = openDB(&c.config.Dst, "dst", false)
if err != nil {
return err
}
defer c.dst.Close()
start := time.Now()
log.Info("Compacting destination database", "data", c.config.Dst.Data)
if err := c.dst.Compact(nil, nil); err != nil {
return err
}
log.Info("Compaction done", "elapsed", time.Since(start))
return nil
}

func (c *DBConverter) Verify(ctx context.Context) error {
if c.config.Verify == 1 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick: converting config.Verify from an int to an enum here can improve code readability in places where config.Verify is being used

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've changed the config to string and as it's not used in many places I didn't convert it to an enum, just used it as string

log.Info("Starting quick verification - verifying only keys existence")
} else {
log.Info("Starting full verification - verifying keys and values")
}
var err error
defer c.Close()
c.src, err = openDB(&c.config.Src, "src", true)
if err != nil {
return err
}
c.dst, err = openDB(&c.config.Dst, "dst", true)
if err != nil {
return err
}

c.stats.Reset()
it := c.src.NewIterator(nil, nil)
defer it.Release()
for it.Next() && ctx.Err() == nil {
switch c.config.Verify {
case 1:
if has, err := c.dst.Has(it.Key()); !has {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should check if err is non nil here

return fmt.Errorf("Missing key in destination db, key: %v, err: %w", it.Key(), err)
}
c.stats.AddBytes(int64(len(it.Key())))
case 2:
dstValue, err := c.dst.Get(it.Key())
if err != nil {
return err
}
if !bytes.Equal(dstValue, it.Value()) {
return fmt.Errorf("Value mismatch for key: %v, src value: %v, dst value: %s", it.Key(), it.Value(), dstValue)
}
c.stats.AddBytes(int64(len(it.Key()) + len(dstValue)))
default:
return fmt.Errorf("Invalid verify config value: %v", c.config.Verify)
}
c.stats.AddEntries(1)
}
return ctx.Err()
}

func (c *DBConverter) Stats() *Stats {
return &c.stats
}

func (c *DBConverter) Close() {
if c.src != nil {
c.src.Close()
}
if c.dst != nil {
c.dst.Close()
}
}
85 changes: 85 additions & 0 deletions cmd/dbconv/dbconv/dbconv_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package dbconv

import (
"bytes"
"context"
"testing"

"github.com/ethereum/go-ethereum/log"
"github.com/offchainlabs/nitro/util/testhelpers"
)

func TestConversion(t *testing.T) {
_ = testhelpers.InitTestLog(t, log.LvlTrace)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick: unnecessary line

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

addressed in: #2591

oldDBConfig := DBConfigDefault
oldDBConfig.Data = t.TempDir()
oldDBConfig.DBEngine = "leveldb"

newDBConfig := DBConfigDefault
newDBConfig.Data = t.TempDir()
newDBConfig.DBEngine = "pebble"

func() {
oldDb, err := openDB(&oldDBConfig, "", false)
defer oldDb.Close()
Require(t, err)
err = oldDb.Put([]byte{}, []byte{0xde, 0xed, 0xbe, 0xef})
Require(t, err)
for i := 0; i < 20; i++ {
err = oldDb.Put([]byte{byte(i)}, []byte{byte(i + 1)})
Require(t, err)
}
}()

config := DefaultDBConvConfig
config.Src = oldDBConfig
config.Dst = newDBConfig
config.IdealBatchSize = 5
conv := NewDBConverter(&config)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
err := conv.Convert(ctx)
Require(t, err)
conv.Close()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick: unnecessary, since conv.Convert already has a defer c.Close


oldDb, err := openDB(&oldDBConfig, "", true)
Require(t, err)
defer oldDb.Close()
newDb, err := openDB(&newDBConfig, "", true)
Require(t, err)
defer newDb.Close()

func() {
magicxyyz marked this conversation as resolved.
Show resolved Hide resolved
it := oldDb.NewIterator(nil, nil)
defer it.Release()
for it.Next() {
if has, _ := newDb.Has(it.Key()); !has {
magicxyyz marked this conversation as resolved.
Show resolved Hide resolved
t.Log("Missing key in the converted db, key:", it.Key())
}
newValue, err := newDb.Get(it.Key())
Require(t, err)
if !bytes.Equal(newValue, it.Value()) {
Fail(t, "Value mismatch, old:", it.Value(), "new:", newValue)
}
}
}()
func() {
it := newDb.NewIterator(nil, nil)
defer it.Release()
for it.Next() {
if has, _ := oldDb.Has(it.Key()); !has {
Fail(t, "Unexpected key in the converted db, key:", it.Key())
}
}
}()
}

func Require(t *testing.T, err error, printables ...interface{}) {
t.Helper()
testhelpers.RequireImpl(t, err, printables...)
}

func Fail(t *testing.T, printables ...interface{}) {
t.Helper()
testhelpers.FailImpl(t, printables...)
}
Loading
Loading