From 1f399badd0357200b2bedbac06066db6a6888094 Mon Sep 17 00:00:00 2001 From: Maciej Kulawik Date: Mon, 29 Jan 2024 21:41:07 +0000 Subject: [PATCH] add state recovery option --- cmd/conf/init.go | 59 +++++++++--------- cmd/nitro/init.go | 8 +++ cmd/staterecovery/staterecovery.go | 83 +++++++++++++++++++++++++ execution/gethexec/blockchain.go | 1 + system_tests/staterecovery_test.go | 99 ++++++++++++++++++++++++++++++ 5 files changed, 222 insertions(+), 28 deletions(-) create mode 100644 cmd/staterecovery/staterecovery.go create mode 100644 system_tests/staterecovery_test.go diff --git a/cmd/conf/init.go b/cmd/conf/init.go index bebf1955b7..313e5bbee5 100644 --- a/cmd/conf/init.go +++ b/cmd/conf/init.go @@ -7,37 +7,39 @@ import ( ) type InitConfig struct { - Force bool `koanf:"force"` - Url string `koanf:"url"` - DownloadPath string `koanf:"download-path"` - DownloadPoll time.Duration `koanf:"download-poll"` - DevInit bool `koanf:"dev-init"` - DevInitAddress string `koanf:"dev-init-address"` - DevInitBlockNum uint64 `koanf:"dev-init-blocknum"` - Empty bool `koanf:"empty"` - AccountsPerSync uint `koanf:"accounts-per-sync"` - ImportFile string `koanf:"import-file"` - ThenQuit bool `koanf:"then-quit"` - Prune string `koanf:"prune"` - PruneBloomSize uint64 `koanf:"prune-bloom-size"` - ResetToMessage int64 `koanf:"reset-to-message"` + Force bool `koanf:"force"` + Url string `koanf:"url"` + DownloadPath string `koanf:"download-path"` + DownloadPoll time.Duration `koanf:"download-poll"` + DevInit bool `koanf:"dev-init"` + DevInitAddress string `koanf:"dev-init-address"` + DevInitBlockNum uint64 `koanf:"dev-init-blocknum"` + Empty bool `koanf:"empty"` + AccountsPerSync uint `koanf:"accounts-per-sync"` + ImportFile string `koanf:"import-file"` + ThenQuit bool `koanf:"then-quit"` + Prune string `koanf:"prune"` + PruneBloomSize uint64 `koanf:"prune-bloom-size"` + ResetToMessage int64 `koanf:"reset-to-message"` + RecreateMissingState bool `koanf:"recreate-missing-state"` } var InitConfigDefault = InitConfig{ - Force: false, - Url: "", - DownloadPath: "/tmp/", - DownloadPoll: time.Minute, - DevInit: false, - DevInitAddress: "", - DevInitBlockNum: 0, - Empty: false, - ImportFile: "", - AccountsPerSync: 100000, - ThenQuit: false, - Prune: "", - PruneBloomSize: 2048, - ResetToMessage: -1, + Force: false, + Url: "", + DownloadPath: "/tmp/", + DownloadPoll: time.Minute, + DevInit: false, + DevInitAddress: "", + DevInitBlockNum: 0, + Empty: false, + ImportFile: "", + AccountsPerSync: 100000, + ThenQuit: false, + Prune: "", + PruneBloomSize: 2048, + ResetToMessage: -1, + RecreateMissingState: false, } func InitConfigAddOptions(prefix string, f *pflag.FlagSet) { @@ -55,4 +57,5 @@ func InitConfigAddOptions(prefix string, f *pflag.FlagSet) { f.String(prefix+".prune", InitConfigDefault.Prune, "pruning for a given use: \"full\" for full nodes serving RPC requests, or \"validator\" for validators") f.Uint64(prefix+".prune-bloom-size", InitConfigDefault.PruneBloomSize, "the amount of memory in megabytes to use for the pruning bloom filter (higher values prune better)") f.Int64(prefix+".reset-to-message", InitConfigDefault.ResetToMessage, "forces a reset to an old message height. Also set max-reorg-resequence-depth=0 to force re-reading messages") + f.Bool(prefix+".recreate-missing-state", InitConfigDefault.RecreateMissingState, "if true: in case database exists and force=false, missing state will be recreated and committed to disk") } diff --git a/cmd/nitro/init.go b/cmd/nitro/init.go index 4cf5dcda06..bab15b6157 100644 --- a/cmd/nitro/init.go +++ b/cmd/nitro/init.go @@ -34,6 +34,7 @@ import ( "github.com/offchainlabs/nitro/cmd/conf" "github.com/offchainlabs/nitro/cmd/ipfshelper" "github.com/offchainlabs/nitro/cmd/pruning" + "github.com/offchainlabs/nitro/cmd/staterecovery" "github.com/offchainlabs/nitro/cmd/util" "github.com/offchainlabs/nitro/execution/gethexec" "github.com/offchainlabs/nitro/statetransfer" @@ -183,6 +184,13 @@ func openInitializeChainDb(ctx context.Context, stack *node.Node, config *NodeCo if err != nil { return chainDb, l2BlockChain, err } + if config.Init.RecreateMissingState { + err = staterecovery.RecreateMissingStates(chainDb, l2BlockChain, cachingConfig) + if err != nil { + return chainDb, l2BlockChain, err + } + } + return chainDb, l2BlockChain, nil } readOnlyDb.Close() diff --git a/cmd/staterecovery/staterecovery.go b/cmd/staterecovery/staterecovery.go new file mode 100644 index 0000000000..0f2eba5c60 --- /dev/null +++ b/cmd/staterecovery/staterecovery.go @@ -0,0 +1,83 @@ +package staterecovery + +import ( + "fmt" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core" + "github.com/ethereum/go-ethereum/core/state" + "github.com/ethereum/go-ethereum/core/vm" + "github.com/ethereum/go-ethereum/ethdb" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/trie" + "github.com/ethereum/go-ethereum/trie/triedb/hashdb" +) + +func RecreateMissingStates(chainDb ethdb.Database, bc *core.BlockChain, cacheConfig *core.CacheConfig) error { + log.Info("Recreating missing states...") + start := time.Now() + current := bc.Genesis().NumberU64() + 1 + last := bc.CurrentBlock().Number.Uint64() + + previousBlock := bc.GetBlockByNumber(current - 1) + if previousBlock == nil { + return fmt.Errorf("genesis block is missing") + } + hashConfig := *hashdb.Defaults + hashConfig.CleanCacheSize = cacheConfig.TrieCleanLimit + trieConfig := &trie.Config{ + Preimages: false, + HashDB: &hashConfig, + } + database := state.NewDatabaseWithConfig(chainDb, trieConfig) + defer database.TrieDB().Close() + previousState, err := state.New(previousBlock.Root(), database, nil) + if err != nil { + return fmt.Errorf("genesis state is missing: %w", err) + } + database.TrieDB().Reference(previousBlock.Root(), common.Hash{}) + logged := time.Now() + recreated := 0 + for current <= last { + if time.Since(logged) > 1*time.Minute { + log.Info("Recreating missing states", "block", current, "target", last, "remaining", last-current, "elapsed", time.Since(start), "recreated", recreated) + logged = time.Now() + } + currentBlock := bc.GetBlockByNumber(current) + if currentBlock == nil { + return fmt.Errorf("missing block %d", current) + } + currentState, err := state.New(currentBlock.Root(), database, nil) + if err != nil { + _, _, _, err := bc.Processor().Process(currentBlock, previousState, vm.Config{}) + if err != nil { + return fmt.Errorf("processing block %d failed: %v", current, err) + } + root, err := previousState.Commit(current, bc.Config().IsEIP158(currentBlock.Number())) + if err != nil { + return fmt.Errorf("StateDB commit failed, number %d root %v: %w", current, currentBlock.Root().Hex(), err) + } + if root.Cmp(currentBlock.Root()) != 0 { + return fmt.Errorf("reached different state root after processing block %d, want %v, have %v", current, currentBlock.Root(), root) + } + // commit to disk + err = database.TrieDB().Commit(root, false) // TODO report = true, do we want this many logs? + if err != nil { + return fmt.Errorf("TrieDB commit failed, number %d root %v: %w", current, root, err) + } + currentState, err = state.New(currentBlock.Root(), database, nil) + if err != nil { + return fmt.Errorf("state reset after block %d failed: %v", current, err) + } + database.TrieDB().Reference(currentBlock.Root(), common.Hash{}) + database.TrieDB().Dereference(previousBlock.Root()) + recreated++ + } + current++ + previousState = currentState + previousBlock = currentBlock + } + log.Info("Finished recreating missing states", "elapsed", time.Since(start), "recreated", recreated) + return nil +} diff --git a/execution/gethexec/blockchain.go b/execution/gethexec/blockchain.go index a85224b635..2a20c3da26 100644 --- a/execution/gethexec/blockchain.go +++ b/execution/gethexec/blockchain.go @@ -67,6 +67,7 @@ var DefaultCachingConfig = CachingConfig{ MaxAmountOfGasToSkipStateSaving: 0, } +// TODO remove stack from parameters as it is no longer needed here func DefaultCacheConfigFor(stack *node.Node, cachingConfig *CachingConfig) *core.CacheConfig { baseConf := ethconfig.Defaults if cachingConfig.Archive { diff --git a/system_tests/staterecovery_test.go b/system_tests/staterecovery_test.go new file mode 100644 index 0000000000..561e889153 --- /dev/null +++ b/system_tests/staterecovery_test.go @@ -0,0 +1,99 @@ +package arbtest + +import ( + "context" + "testing" + "time" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/node" + "github.com/ethereum/go-ethereum/trie" + "github.com/offchainlabs/nitro/cmd/staterecovery" + "github.com/offchainlabs/nitro/execution/gethexec" +) + +func TestRectreateMissingStates(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + builder := NewNodeBuilder(ctx).DefaultConfig(t, true) + builder.execConfig.Caching.MaxNumberOfBlocksToSkipStateSaving = 16 + _ = builder.Build(t) + l2cleanupDone := false + defer func() { + if !l2cleanupDone { + builder.L2.cleanup() + } + builder.L1.cleanup() + }() + builder.L2Info.GenerateAccount("User2") + var txs []*types.Transaction + for i := uint64(0); i < 200; i++ { + tx := builder.L2Info.PrepareTx("Owner", "User2", builder.L2Info.TransferGas, common.Big1, nil) + txs = append(txs, tx) + err := builder.L2.Client.SendTransaction(ctx, tx) + Require(t, err) + } + for _, tx := range txs { + _, err := builder.L2.EnsureTxSucceeded(tx) + Require(t, err) + } + lastBlock, err := builder.L2.Client.BlockNumber(ctx) + Require(t, err) + l2cleanupDone = true + builder.L2.cleanup() + t.Log("stopped l2 node") + func() { + stack, err := node.New(builder.l2StackConfig) + Require(t, err) + defer stack.Close() + chainDb, err := stack.OpenDatabase("chaindb", 0, 0, "", false) + Require(t, err) + defer chainDb.Close() + cacheConfig := gethexec.DefaultCacheConfigFor(stack, &gethexec.DefaultCachingConfig) + bc, err := gethexec.GetBlockChain(chainDb, cacheConfig, builder.chainConfig, builder.execConfig.TxLookupLimit) + Require(t, err) + err = staterecovery.RecreateMissingStates(chainDb, bc, cacheConfig) + Require(t, err) + }() + + testClient, cleanup := builder.Build2ndNode(t, &SecondNodeParams{stackConfig: builder.l2StackConfig}) + defer cleanup() + + currentBlock := uint64(0) + // wait for the chain to catch up + for currentBlock < lastBlock { + currentBlock, err = testClient.Client.BlockNumber(ctx) + Require(t, err) + time.Sleep(20 * time.Millisecond) + } + + currentBlock, err = testClient.Client.BlockNumber(ctx) + Require(t, err) + bc := testClient.ExecNode.Backend.ArbInterface().BlockChain() + triedb := bc.StateCache().TrieDB() + var start uint64 + if currentBlock+1 >= builder.execConfig.Caching.BlockCount { + start = currentBlock + 1 - builder.execConfig.Caching.BlockCount + } else { + start = 0 + } + for i := start; i <= currentBlock; i++ { + header := bc.GetHeaderByNumber(i) + _, err := bc.StateAt(header.Root) + Require(t, err) + tr, err := trie.New(trie.TrieID(header.Root), triedb) + Require(t, err) + it, err := tr.NodeIterator(nil) + Require(t, err) + for it.Next(true) { + } + Require(t, it.Error()) + } + + tx := builder.L2Info.PrepareTx("Owner", "User2", builder.L2Info.TransferGas, common.Big1, nil) + err = testClient.Client.SendTransaction(ctx, tx) + Require(t, err) + _, err = testClient.EnsureTxSucceeded(tx) + Require(t, err) +}