diff --git a/Dockerfile b/Dockerfile index 5c56b60cc0..88c34cec44 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,7 +41,8 @@ RUN apt-get update && apt-get install -y curl build-essential=12.9 FROM wasm-base as wasm-libs-builder # clang / lld used by soft-float wasm -RUN apt-get install -y clang=1:14.0-55.7~deb12u1 lld=1:14.0-55.7~deb12u1 wabt +RUN apt-get update && \ + apt-get install -y clang=1:14.0-55.7~deb12u1 lld=1:14.0-55.7~deb12u1 wabt # pinned rust 1.75.0 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain 1.75.0 --target x86_64-unknown-linux-gnu wasm32-unknown-unknown wasm32-wasi COPY ./Makefile ./ @@ -203,6 +204,7 @@ COPY ./scripts/download-machine.sh . #RUN ./download-machine.sh consensus-v11 0xf4389b835497a910d7ba3ebfb77aa93da985634f3c052de1290360635be40c4a #RUN ./download-machine.sh consensus-v11.1 0x68e4fe5023f792d4ef584796c84d710303a5e12ea02d6e37e2b5e9c4332507c4 #RUN ./download-machine.sh consensus-v20 0x8b104a2e80ac6165dc58b9048de12f301d70b02a0ab51396c22b4b4b802a16a4 +RUN ./download-machine.sh consensus-v30-rc.2 0xb0de9cb89e4d944ae6023a3b62276e54804c242fd8c4c2d8e6cc4450f5fa8b1b FROM golang:1.21-bookworm as node-builder WORKDIR /workspace @@ -268,11 +270,15 @@ USER user WORKDIR /home/user/ ENTRYPOINT [ "/usr/local/bin/nitro" ] +FROM offchainlabs/nitro-node:v2.3.4-rc.5-b4cc111 as nitro-legacy + FROM nitro-node-slim as nitro-node USER root COPY --from=prover-export /bin/jit /usr/local/bin/ COPY --from=node-builder /workspace/target/bin/daserver /usr/local/bin/ COPY --from=node-builder /workspace/target/bin/datool /usr/local/bin/ +COPY --from=nitro-legacy /home/user/target/machines /home/user/nitro-legacy/machines +RUN rm -rf /workspace/target/legacy-machines/latest RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get update && \ apt-get install -y \ @@ -282,10 +288,23 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* /usr/share/doc/* /var/cache/ldconfig/aux-cache /usr/lib/python3.9/__pycache__/ /usr/lib/python3.9/*/__pycache__/ /var/log/* && \ nitro --version +ENTRYPOINT [ "/usr/local/bin/nitro" , "--validation.wasm.allowed-wasm-module-roots", "/home/user/nitro-legacy/machines,/home/user/target/machines"] + +USER user +FROM nitro-node as nitro-node-validator +USER root +COPY --from=nitro-legacy /usr/local/bin/nitro-val /home/user/nitro-legacy/bin/nitro-val +COPY --from=nitro-legacy /usr/local/bin/jit /home/user/nitro-legacy/bin/jit +RUN export DEBIAN_FRONTEND=noninteractive && \ + apt-get update && \ + apt-get install -y xxd netcat-traditional && \ + rm -rf /var/lib/apt/lists/* /usr/share/doc/* /var/cache/ldconfig/aux-cache /usr/lib/python3.9/__pycache__/ /usr/lib/python3.9/*/__pycache__/ /var/log/* +COPY scripts/split-val-entry.sh /usr/local/bin +ENTRYPOINT [ "/usr/local/bin/split-val-entry.sh" ] USER user -FROM nitro-node as nitro-node-dev-base +FROM nitro-node-validator as nitro-node-dev USER root # Copy in latest WASM module root RUN rm -f /home/user/target/machines/latest @@ -309,22 +328,5 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ USER user -FROM offchainlabs/nitro-node:v2.3.4-rc.5-b4cc111 as nitro-legacy - -FROM nitro-node-dev-base as nitro-node-dev -USER root - -RUN export DEBIAN_FRONTEND=noninteractive && \ - apt-get update && \ - apt-get install -y xxd netcat-traditional && \ - rm -rf /var/lib/apt/lists/* /usr/share/doc/* /var/cache/ldconfig/aux-cache /usr/lib/python3.9/__pycache__/ /usr/lib/python3.9/*/__pycache__/ /var/log/* -COPY scripts/split-val-entry.sh /usr/local/bin -COPY --from=nitro-legacy /home/user/target/machines /home/user/nitro-legacy/machines -RUN rm -rf /workspace/target/legacy-machines/latest -COPY --from=nitro-legacy /usr/local/bin/nitro-val /home/user/nitro-legacy/bin/nitro-val -COPY --from=nitro-legacy /usr/local/bin/jit /home/user/nitro-legacy/bin/jit -ENTRYPOINT [ "/usr/local/bin/split-val-entry.sh" ] -USER user - FROM nitro-node as nitro-node-default # Just to ensure nitro-node-dist is default diff --git a/arbitrator/stylus/src/cache.rs b/arbitrator/stylus/src/cache.rs index 2b83c6152f..06739f2219 100644 --- a/arbitrator/stylus/src/cache.rs +++ b/arbitrator/stylus/src/cache.rs @@ -21,7 +21,7 @@ macro_rules! cache { } pub struct InitCache { - arbos: HashMap, + long_term: HashMap, lru: LruCache, } @@ -59,20 +59,31 @@ impl CacheItem { } impl InitCache { + // current implementation only has one tag that stores to the long_term + // future implementations might have more, but 0 is a reserved tag + // that will never modify long_term state + const ARBOS_TAG: u32 = 1; + fn new(size: usize) -> Self { Self { - arbos: HashMap::new(), + long_term: HashMap::new(), lru: LruCache::new(NonZeroUsize::new(size).unwrap()), } } + pub fn set_lru_size(size: u32) { + cache!() + .lru + .resize(NonZeroUsize::new(size.try_into().unwrap()).unwrap()) + } + /// Retrieves a cached value, updating items as necessary. pub fn get(module_hash: Bytes32, version: u16, debug: bool) -> Option<(Module, Store)> { let mut cache = cache!(); let key = CacheKey::new(module_hash, version, debug); // See if the item is in the long term cache - if let Some(item) = cache.arbos.get(&key) { + if let Some(item) = cache.long_term.get(&key) { return Some(item.data()); } @@ -84,18 +95,27 @@ impl InitCache { } /// Inserts an item into the long term cache, cloning from the LRU cache if able. + /// If long_term_tag is 0 will only insert to LRU pub fn insert( module_hash: Bytes32, module: &[u8], version: u16, + long_term_tag: u32, debug: bool, ) -> Result<(Module, Store)> { let key = CacheKey::new(module_hash, version, debug); // if in LRU, add to ArbOS let mut cache = cache!(); + if let Some(item) = cache.long_term.get(&key) { + return Ok(item.data()); + } if let Some(item) = cache.lru.peek(&key).cloned() { - cache.arbos.insert(key, item.clone()); + if long_term_tag == Self::ARBOS_TAG { + cache.long_term.insert(key, item.clone()); + } else { + cache.lru.promote(&key) + } return Ok(item.data()); } drop(cache); @@ -105,37 +125,34 @@ impl InitCache { let item = CacheItem::new(module, engine); let data = item.data(); - cache!().arbos.insert(key, item); + let mut cache = cache!(); + if long_term_tag != Self::ARBOS_TAG { + cache.lru.put(key, item); + } else { + cache.long_term.insert(key, item); + } Ok(data) } - /// Inserts an item into the short-lived LRU cache. - pub fn insert_lru( - module_hash: Bytes32, - module: &[u8], - version: u16, - debug: bool, - ) -> Result<(Module, Store)> { - let engine = CompileConfig::version(version, debug).engine(); - let module = unsafe { Module::deserialize_unchecked(&engine, module)? }; - - let key = CacheKey::new(module_hash, version, debug); - let item = CacheItem::new(module, engine); - cache!().lru.put(key, item.clone()); - Ok(item.data()) - } - /// Evicts an item in the long-term cache. - pub fn evict(module_hash: Bytes32, version: u16, debug: bool) { + pub fn evict(module_hash: Bytes32, version: u16, long_term_tag: u32, debug: bool) { + if long_term_tag != Self::ARBOS_TAG { + return; + } let key = CacheKey::new(module_hash, version, debug); - cache!().arbos.remove(&key); + let mut cache = cache!(); + if let Some(item) = cache.long_term.remove(&key) { + cache.lru.put(key, item); + } } - /// Modifies the cache for reorg, dropping the long-term cache. - pub fn reorg(_block: u64) { + pub fn clear_long_term(long_term_tag: u32) { + if long_term_tag != Self::ARBOS_TAG { + return; + } let mut cache = cache!(); let cache = &mut *cache; - for (key, item) in cache.arbos.drain() { + for (key, item) in cache.long_term.drain() { cache.lru.put(key, item); // not all will fit, just a heuristic } } diff --git a/arbitrator/stylus/src/lib.rs b/arbitrator/stylus/src/lib.rs index 7abfb98bf5..3c53359f8b 100644 --- a/arbitrator/stylus/src/lib.rs +++ b/arbitrator/stylus/src/lib.rs @@ -183,6 +183,7 @@ pub unsafe extern "C" fn stylus_call( debug_chain: bool, output: *mut RustBytes, gas: *mut u64, + long_term_tag: u32, ) -> UserOutcomeKind { let module = module.slice(); let calldata = calldata.slice().to_vec(); @@ -193,7 +194,14 @@ pub unsafe extern "C" fn stylus_call( // Safety: module came from compile_user_wasm and we've paid for memory expansion let instance = unsafe { - NativeInstance::deserialize_cached(module, config.version, evm_api, evm_data, debug_chain) + NativeInstance::deserialize_cached( + module, + config.version, + evm_api, + evm_data, + long_term_tag, + debug_chain, + ) }; let mut instance = match instance { Ok(instance) => instance, @@ -212,33 +220,47 @@ pub unsafe extern "C" fn stylus_call( status } +/// resize lru +#[no_mangle] +pub extern "C" fn stylus_cache_lru_resize(size: u32) { + InitCache::set_lru_size(size); +} + /// Caches an activated user program. /// /// # Safety /// /// `module` must represent a valid module produced from `stylus_activate`. +/// arbos_tag: a tag for arbos cache. 0 won't affect real caching +/// currently only if tag==1 caching will be affected #[no_mangle] pub unsafe extern "C" fn stylus_cache_module( module: GoSliceData, module_hash: Bytes32, version: u16, + arbos_tag: u32, debug: bool, ) { - if let Err(error) = InitCache::insert(module_hash, module.slice(), version, debug) { + if let Err(error) = InitCache::insert(module_hash, module.slice(), version, arbos_tag, debug) { panic!("tried to cache invalid asm!: {error}"); } } /// Evicts an activated user program from the init cache. #[no_mangle] -pub extern "C" fn stylus_evict_module(module_hash: Bytes32, version: u16, debug: bool) { - InitCache::evict(module_hash, version, debug); +pub extern "C" fn stylus_evict_module( + module_hash: Bytes32, + version: u16, + arbos_tag: u32, + debug: bool, +) { + InitCache::evict(module_hash, version, arbos_tag, debug); } /// Reorgs the init cache. This will likely never happen. #[no_mangle] -pub extern "C" fn stylus_reorg_vm(block: u64) { - InitCache::reorg(block); +pub extern "C" fn stylus_reorg_vm(_block: u64, arbos_tag: u32) { + InitCache::clear_long_term(arbos_tag); } /// Frees the vector. Does nothing when the vector is null. diff --git a/arbitrator/stylus/src/native.rs b/arbitrator/stylus/src/native.rs index 6d5e4cd2e9..2858d59fdc 100644 --- a/arbitrator/stylus/src/native.rs +++ b/arbitrator/stylus/src/native.rs @@ -113,6 +113,7 @@ impl> NativeInstance { version: u16, evm: E, evm_data: EvmData, + mut long_term_tag: u32, debug: bool, ) -> Result { let compile = CompileConfig::version(version, debug); @@ -122,10 +123,11 @@ impl> NativeInstance { if let Some((module, store)) = InitCache::get(module_hash, version, debug) { return Self::from_module(module, store, env); } - let (module, store) = match env.evm_data.cached { - true => InitCache::insert(module_hash, module, version, debug)?, - false => InitCache::insert_lru(module_hash, module, version, debug)?, - }; + if !env.evm_data.cached { + long_term_tag = 0; + } + let (module, store) = + InitCache::insert(module_hash, module, version, long_term_tag, debug)?; Self::from_module(module, store, env) } diff --git a/arbnode/dataposter/data_poster.go b/arbnode/dataposter/data_poster.go index fb35ac3c8d..5aaef959d8 100644 --- a/arbnode/dataposter/data_poster.go +++ b/arbnode/dataposter/data_poster.go @@ -851,31 +851,31 @@ func (p *DataPoster) sendTx(ctx context.Context, prevTx *storage.QueuedTransacti // different type with a lower nonce. // If we decide not to send this tx yet, just leave it queued and with Sent set to false. // The resending/repricing loop in DataPoster.Start will keep trying. - if !newTx.Sent && newTx.FullTx.Nonce() > 0 { + previouslySent := newTx.Sent || (prevTx != nil && prevTx.Sent) // if we've previously sent this nonce + if !previouslySent && newTx.FullTx.Nonce() > 0 { precedingTx, err := p.queue.Get(ctx, arbmath.SaturatingUSub(newTx.FullTx.Nonce(), 1)) if err != nil { return fmt.Errorf("couldn't get preceding tx in DataPoster to check if should send tx with nonce %d: %w", newTx.FullTx.Nonce(), err) } if precedingTx != nil { // precedingTx == nil -> the actual preceding tx was already confirmed - var latestBlockNumber, prevBlockNumber, reorgResistantNonce uint64 + var latestBlockNumber, prevBlockNumber, reorgResistantTxCount uint64 if precedingTx.FullTx.Type() != newTx.FullTx.Type() || !precedingTx.Sent { latestBlockNumber, err = p.client.BlockNumber(ctx) if err != nil { return fmt.Errorf("couldn't get block number in DataPoster to check if should send tx with nonce %d: %w", newTx.FullTx.Nonce(), err) } prevBlockNumber = arbmath.SaturatingUSub(latestBlockNumber, 1) - reorgResistantNonce, err = p.client.NonceAt(ctx, p.Sender(), new(big.Int).SetUint64(prevBlockNumber)) + reorgResistantTxCount, err = p.client.NonceAt(ctx, p.Sender(), new(big.Int).SetUint64(prevBlockNumber)) if err != nil { return fmt.Errorf("couldn't determine reorg resistant nonce in DataPoster to check if should send tx with nonce %d: %w", newTx.FullTx.Nonce(), err) } - if precedingTx.FullTx.Nonce() > reorgResistantNonce { - log.Info("DataPoster is avoiding creating a mempool nonce gap (the tx remains queued and will be retried)", "nonce", newTx.FullTx.Nonce(), "prevType", precedingTx.FullTx.Type(), "type", newTx.FullTx.Type(), "prevSent", precedingTx.Sent) + if newTx.FullTx.Nonce() > reorgResistantTxCount { + log.Info("DataPoster is avoiding creating a mempool nonce gap (the tx remains queued and will be retried)", "nonce", newTx.FullTx.Nonce(), "prevType", precedingTx.FullTx.Type(), "type", newTx.FullTx.Type(), "prevSent", precedingTx.Sent, "latestBlockNumber", latestBlockNumber, "prevBlockNumber", prevBlockNumber, "reorgResistantTxCount", reorgResistantTxCount) return nil } - } else { - log.Info("DataPoster will send previously unsent batch tx", "nonce", newTx.FullTx.Nonce(), "prevType", precedingTx.FullTx.Type(), "type", newTx.FullTx.Type(), "prevSent", precedingTx.Sent, "latestBlockNumber", latestBlockNumber, "prevBlockNumber", prevBlockNumber, "reorgResistantNonce", reorgResistantNonce) } + log.Debug("DataPoster will send previously unsent batch tx", "nonce", newTx.FullTx.Nonce(), "prevType", precedingTx.FullTx.Type(), "type", newTx.FullTx.Type(), "prevSent", precedingTx.Sent, "latestBlockNumber", latestBlockNumber, "prevBlockNumber", prevBlockNumber, "reorgResistantTxCount", reorgResistantTxCount) } } diff --git a/arbnode/dataposter/storage_test.go b/arbnode/dataposter/storage_test.go index f98c120f38..e2aa321e0d 100644 --- a/arbnode/dataposter/storage_test.go +++ b/arbnode/dataposter/storage_test.go @@ -19,6 +19,7 @@ import ( "github.com/offchainlabs/nitro/arbnode/dataposter/redis" "github.com/offchainlabs/nitro/arbnode/dataposter/slice" "github.com/offchainlabs/nitro/arbnode/dataposter/storage" + "github.com/offchainlabs/nitro/cmd/conf" "github.com/offchainlabs/nitro/util/arbmath" "github.com/offchainlabs/nitro/util/redisutil" "github.com/offchainlabs/nitro/util/signature" @@ -44,7 +45,7 @@ func newLevelDBStorage(t *testing.T, encF storage.EncoderDecoderF) *dbstorage.St func newPebbleDBStorage(t *testing.T, encF storage.EncoderDecoderF) *dbstorage.Storage { t.Helper() - db, err := rawdb.NewPebbleDBDatabase(path.Join(t.TempDir(), "pebble.db"), 0, 0, "default", false, true) + db, err := rawdb.NewPebbleDBDatabase(path.Join(t.TempDir(), "pebble.db"), 0, 0, "default", false, true, conf.PersistentConfigDefault.Pebble.ExtraOptions("pebble")) if err != nil { t.Fatalf("NewPebbleDBDatabase() unexpected error: %v", err) } diff --git a/arbnode/inbox_test.go b/arbnode/inbox_test.go index 5c879743a4..594e0cedb5 100644 --- a/arbnode/inbox_test.go +++ b/arbnode/inbox_test.go @@ -65,6 +65,7 @@ func NewTransactionStreamerForTest(t *testing.T, ownerAddress common.Address) (* if err != nil { Fail(t, err) } + execEngine.Initialize(gethexec.DefaultCachingConfig.StylusLRUCache) execSeq := &execClientWrapper{execEngine, t} inbox, err := NewTransactionStreamer(arbDb, bc.Config(), execSeq, nil, make(chan error, 1), transactionStreamerConfigFetcher) if err != nil { diff --git a/arbos/programs/native.go b/arbos/programs/native.go index 7a6c16d866..f24dcac64d 100644 --- a/arbos/programs/native.go +++ b/arbos/programs/native.go @@ -172,6 +172,7 @@ func callProgram( evmData *EvmData, stylusParams *ProgParams, memoryModel *MemoryModel, + arbos_tag uint32, ) ([]byte, error) { db := interpreter.Evm().StateDB debug := stylusParams.DebugMode @@ -198,6 +199,7 @@ func callProgram( cbool(debug), output, (*u64)(&scope.Contract.Gas), + u32(arbos_tag), )) depth := interpreter.Depth() @@ -228,8 +230,9 @@ func cacheProgram(db vm.StateDB, module common.Hash, program Program, params *St if err != nil { panic("unable to recreate wasm") } - state.CacheWasmRust(asm, module, program.version, debug) - db.RecordCacheWasm(state.CacheWasm{ModuleHash: module, Version: program.version, Debug: debug}) + tag := db.Database().WasmCacheTag() + state.CacheWasmRust(asm, module, program.version, tag, debug) + db.RecordCacheWasm(state.CacheWasm{ModuleHash: module, Version: program.version, Tag: tag, Debug: debug}) } } @@ -237,22 +240,27 @@ func cacheProgram(db vm.StateDB, module common.Hash, program Program, params *St // For gas estimation and eth_call, we ignore permanent updates and rely on Rust's LRU. func evictProgram(db vm.StateDB, module common.Hash, version uint16, debug bool, runMode core.MessageRunMode, forever bool) { if runMode == core.MessageCommitMode { - state.EvictWasmRust(module, version, debug) + tag := db.Database().WasmCacheTag() + state.EvictWasmRust(module, version, tag, debug) if !forever { - db.RecordEvictWasm(state.EvictWasm{ModuleHash: module, Version: version, Debug: debug}) + db.RecordEvictWasm(state.EvictWasm{ModuleHash: module, Version: version, Tag: tag, Debug: debug}) } } } func init() { - state.CacheWasmRust = func(asm []byte, moduleHash common.Hash, version uint16, debug bool) { - C.stylus_cache_module(goSlice(asm), hashToBytes32(moduleHash), u16(version), cbool(debug)) + state.CacheWasmRust = func(asm []byte, moduleHash common.Hash, version uint16, tag uint32, debug bool) { + C.stylus_cache_module(goSlice(asm), hashToBytes32(moduleHash), u16(version), u32(tag), cbool(debug)) } - state.EvictWasmRust = func(moduleHash common.Hash, version uint16, debug bool) { - C.stylus_evict_module(hashToBytes32(moduleHash), u16(version), cbool(debug)) + state.EvictWasmRust = func(moduleHash common.Hash, version uint16, tag uint32, debug bool) { + C.stylus_evict_module(hashToBytes32(moduleHash), u16(version), u32(tag), cbool(debug)) } } +func ResizeWasmLruCache(size uint32) { + C.stylus_cache_lru_resize(u32(size)) +} + func (value bytes32) toHash() common.Hash { hash := common.Hash{} for index, b := range value.bytes { diff --git a/arbos/programs/programs.go b/arbos/programs/programs.go index 6f73e16b85..97b5477afc 100644 --- a/arbos/programs/programs.go +++ b/arbos/programs/programs.go @@ -166,6 +166,7 @@ func (p Programs) CallProgram( tracingInfo *util.TracingInfo, calldata []byte, reentrant bool, + runmode core.MessageRunMode, ) ([]byte, error) { evm := interpreter.Evm() contract := scope.Contract @@ -237,7 +238,11 @@ func (p Programs) CallProgram( if contract.CodeAddr != nil { address = *contract.CodeAddr } - return callProgram(address, moduleHash, localAsm, scope, interpreter, tracingInfo, calldata, evmData, goParams, model) + var arbos_tag uint32 + if runmode == core.MessageCommitMode { + arbos_tag = statedb.Database().WasmCacheTag() + } + return callProgram(address, moduleHash, localAsm, scope, interpreter, tracingInfo, calldata, evmData, goParams, model, arbos_tag) } func getWasm(statedb vm.StateDB, program common.Address) ([]byte, error) { diff --git a/arbos/programs/wasm.go b/arbos/programs/wasm.go index 95f30e83b6..4bc978a2b6 100644 --- a/arbos/programs/wasm.go +++ b/arbos/programs/wasm.go @@ -143,6 +143,7 @@ func callProgram( evmData *EvmData, params *ProgParams, memoryModel *MemoryModel, + _arbos_tag uint32, ) ([]byte, error) { reqHandler := newApiClosures(interpreter, tracingInfo, scope, memoryModel) gasLeft, retData, err := CallProgramLoop(moduleHash, calldata, scope.Contract.Gas, evmData, params, reqHandler) diff --git a/arbos/tx_processor.go b/arbos/tx_processor.go index b5fb64f695..65762fd2d1 100644 --- a/arbos/tx_processor.go +++ b/arbos/tx_processor.go @@ -127,6 +127,7 @@ func (p *TxProcessor) ExecuteWASM(scope *vm.ScopeContext, input []byte, interpre tracingInfo, input, reentrant, + p.RunMode(), ) } diff --git a/blocks_reexecutor/blocks_reexecutor.go b/blocks_reexecutor/blocks_reexecutor.go index bb6de00cad..1e4a06fe90 100644 --- a/blocks_reexecutor/blocks_reexecutor.go +++ b/blocks_reexecutor/blocks_reexecutor.go @@ -35,17 +35,16 @@ func (c *Config) Validate() error { if c.EndBlock < c.StartBlock { return errors.New("invalid block range for blocks re-execution") } - if c.Room == 0 { - return errors.New("room for blocks re-execution cannot be zero") + if c.Room <= 0 { + return errors.New("room for blocks re-execution should be greater than 0") } return nil } var DefaultConfig = Config{ - Enable: false, - Mode: "random", - Room: runtime.NumCPU(), - BlocksPerThread: 10000, + Enable: false, + Mode: "random", + Room: runtime.NumCPU(), } var TestConfig = Config{ @@ -66,13 +65,14 @@ func ConfigAddOptions(prefix string, f *flag.FlagSet) { type BlocksReExecutor struct { stopwaiter.StopWaiter - config *Config - blockchain *core.BlockChain - stateFor arbitrum.StateForHeaderFunction - done chan struct{} - fatalErrChan chan error - startBlock uint64 - currentBlock uint64 + config *Config + blockchain *core.BlockChain + stateFor arbitrum.StateForHeaderFunction + done chan struct{} + fatalErrChan chan error + startBlock uint64 + currentBlock uint64 + blocksPerThread uint64 } func New(c *Config, blockchain *core.BlockChain, fatalErrChan chan error) *BlocksReExecutor { @@ -84,32 +84,47 @@ func New(c *Config, blockchain *core.BlockChain, fatalErrChan chan error) *Block start = chainStart end = chainEnd } - if start < chainStart { - log.Warn("state reexecutor's start block number is lower than genesis, resetting to genesis") + if start < chainStart || start > chainEnd { + log.Warn("invalid state reexecutor's start block number, resetting to genesis", "start", start, "genesis", chainStart) start = chainStart } - if end > chainEnd { - log.Warn("state reexecutor's end block number is greater than latest, resetting to latest") + if end > chainEnd || end < chainStart { + log.Warn("invalid state reexecutor's end block number, resetting to latest", "end", end, "latest", chainEnd) end = chainEnd } + blocksPerThread := uint64(10000) + if c.BlocksPerThread != 0 { + blocksPerThread = c.BlocksPerThread + } if c.Mode == "random" && end != start { - if c.BlocksPerThread > end-start { - c.BlocksPerThread = end - start + // Reexecute a range of 10000 or (non-zero) c.BlocksPerThread number of blocks between start to end picked randomly + rng := blocksPerThread + if rng > end-start { + rng = end - start } - start += uint64(rand.Intn(int(end - start - c.BlocksPerThread + 1))) - end = start + c.BlocksPerThread + start += uint64(rand.Intn(int(end - start - rng + 1))) + end = start + rng } - // inclusive of block reexecution [start, end] - if start > 0 { + // Inclusive of block reexecution [start, end] + // Do not reexecute genesis block i,e chainStart + if start > 0 && start != chainStart { start-- } + // Divide work equally among available threads when BlocksPerThread is zero + if c.BlocksPerThread == 0 { + work := (end - start) / uint64(c.Room) + if work > 0 { + blocksPerThread = work + } + } return &BlocksReExecutor{ - config: c, - blockchain: blockchain, - currentBlock: end, - startBlock: start, - done: make(chan struct{}, c.Room), - fatalErrChan: fatalErrChan, + config: c, + blockchain: blockchain, + currentBlock: end, + startBlock: start, + blocksPerThread: blocksPerThread, + done: make(chan struct{}, c.Room), + fatalErrChan: fatalErrChan, stateFor: func(header *types.Header) (*state.StateDB, arbitrum.StateReleaseFunc, error) { state, err := blockchain.StateAt(header.Root) return state, arbitrum.NoopStateRelease, err @@ -119,17 +134,17 @@ func New(c *Config, blockchain *core.BlockChain, fatalErrChan chan error) *Block // LaunchBlocksReExecution launches the thread to apply blocks of range [currentBlock-s.config.BlocksPerThread, currentBlock] to the last available valid state func (s *BlocksReExecutor) LaunchBlocksReExecution(ctx context.Context, currentBlock uint64) uint64 { - start := arbmath.SaturatingUSub(currentBlock, s.config.BlocksPerThread) + start := arbmath.SaturatingUSub(currentBlock, s.blocksPerThread) if start < s.startBlock { start = s.startBlock } - // we don't use state release pattern here - // TODO do we want to use release pattern here? - startState, startHeader, _, err := arbitrum.FindLastAvailableState(ctx, s.blockchain, s.stateFor, s.blockchain.GetHeaderByNumber(start), nil, -1) + startState, startHeader, release, err := arbitrum.FindLastAvailableState(ctx, s.blockchain, s.stateFor, s.blockchain.GetHeaderByNumber(start), nil, -1) if err != nil { s.fatalErrChan <- fmt.Errorf("blocksReExecutor failed to get last available state while searching for state at %d, err: %w", start, err) return s.startBlock } + // NoOp + defer release() start = startHeader.Number.Uint64() s.LaunchThread(func(ctx context.Context) { _, err := arbitrum.AdvanceStateUpToBlock(ctx, s.blockchain, startState, s.blockchain.GetHeaderByNumber(currentBlock), startHeader, nil) @@ -169,9 +184,14 @@ func (s *BlocksReExecutor) Impl(ctx context.Context) { log.Info("BlocksReExecutor successfully completed re-execution of blocks against historic state", "stateAt", s.startBlock, "startBlock", s.startBlock+1, "endBlock", end) } -func (s *BlocksReExecutor) Start(ctx context.Context) { +func (s *BlocksReExecutor) Start(ctx context.Context, done chan struct{}) { s.StopWaiter.Start(ctx, s) - s.LaunchThread(s.Impl) + s.LaunchThread(func(ctx context.Context) { + s.Impl(ctx) + if done != nil { + close(done) + } + }) } func (s *BlocksReExecutor) StopAndWait() { diff --git a/cmd/conf/chain.go b/cmd/conf/chain.go index 531945b4d6..ab9a713287 100644 --- a/cmd/conf/chain.go +++ b/cmd/conf/chain.go @@ -20,11 +20,12 @@ type ParentChainConfig struct { } var L1ConnectionConfigDefault = rpcclient.ClientConfig{ - URL: "", - Retries: 2, - Timeout: time.Minute, - ConnectionWait: time.Minute, - ArgLogLimit: 2048, + URL: "", + Retries: 2, + Timeout: time.Minute, + ConnectionWait: time.Minute, + ArgLogLimit: 2048, + WebsocketMessageSizeLimit: 256 * 1024 * 1024, } var L1ConfigDefault = ParentChainConfig{ diff --git a/cmd/conf/database.go b/cmd/conf/database.go index b049375d66..57674ba7f1 100644 --- a/cmd/conf/database.go +++ b/cmd/conf/database.go @@ -5,20 +5,25 @@ package conf import ( "fmt" + "math" "os" "path" "path/filepath" + "runtime" + "time" + "github.com/ethereum/go-ethereum/ethdb/pebble" flag "github.com/spf13/pflag" ) type PersistentConfig struct { - GlobalConfig string `koanf:"global-config"` - Chain string `koanf:"chain"` - LogDir string `koanf:"log-dir"` - Handles int `koanf:"handles"` - Ancient string `koanf:"ancient"` - DBEngine string `koanf:"db-engine"` + GlobalConfig string `koanf:"global-config"` + Chain string `koanf:"chain"` + LogDir string `koanf:"log-dir"` + Handles int `koanf:"handles"` + Ancient string `koanf:"ancient"` + DBEngine string `koanf:"db-engine"` + Pebble PebbleConfig `koanf:"pebble"` } var PersistentConfigDefault = PersistentConfig{ @@ -28,6 +33,7 @@ var PersistentConfigDefault = PersistentConfig{ Handles: 512, Ancient: "", DBEngine: "leveldb", + Pebble: PebbleConfigDefault, } func PersistentConfigAddOptions(prefix string, f *flag.FlagSet) { @@ -37,6 +43,7 @@ func PersistentConfigAddOptions(prefix string, f *flag.FlagSet) { f.Int(prefix+".handles", PersistentConfigDefault.Handles, "number of file descriptor handles to use for the database") f.String(prefix+".ancient", PersistentConfigDefault.Ancient, "directory of ancient where the chain freezer can be opened") f.String(prefix+".db-engine", PersistentConfigDefault.DBEngine, "backing database implementation to use ('leveldb' or 'pebble')") + PebbleConfigAddOptions(prefix+".pebble", f) } func (c *PersistentConfig) ResolveDirectoryNames() error { @@ -94,5 +101,174 @@ func (c *PersistentConfig) Validate() error { if c.DBEngine != "leveldb" && c.DBEngine != "pebble" { return fmt.Errorf(`invalid .db-engine choice: %q, allowed "leveldb" or "pebble"`, c.DBEngine) } + if c.DBEngine == "pebble" { + if err := c.Pebble.Validate(); err != nil { + return err + } + } return nil } + +type PebbleConfig struct { + MaxConcurrentCompactions int `koanf:"max-concurrent-compactions"` + Experimental PebbleExperimentalConfig `koanf:"experimental"` +} + +var PebbleConfigDefault = PebbleConfig{ + MaxConcurrentCompactions: runtime.NumCPU(), + Experimental: PebbleExperimentalConfigDefault, +} + +func PebbleConfigAddOptions(prefix string, f *flag.FlagSet) { + f.Int(prefix+".max-concurrent-compactions", PebbleConfigDefault.MaxConcurrentCompactions, "maximum number of concurrent compactions") + PebbleExperimentalConfigAddOptions(prefix+".experimental", f) +} + +func (c *PebbleConfig) Validate() error { + if c.MaxConcurrentCompactions < 1 { + return fmt.Errorf("invalid .max-concurrent-compactions value: %d, has to be greater then 0", c.MaxConcurrentCompactions) + } + if err := c.Experimental.Validate(); err != nil { + return err + } + return nil +} + +type PebbleExperimentalConfig struct { + BytesPerSync int `koanf:"bytes-per-sync"` + L0CompactionFileThreshold int `koanf:"l0-compaction-file-threshold"` + L0CompactionThreshold int `koanf:"l0-compaction-threshold"` + L0StopWritesThreshold int `koanf:"l0-stop-writes-threshold"` + LBaseMaxBytes int64 `koanf:"l-base-max-bytes"` + MemTableStopWritesThreshold int `koanf:"mem-table-stop-writes-threshold"` + DisableAutomaticCompactions bool `koanf:"disable-automatic-compactions"` + WALBytesPerSync int `koanf:"wal-bytes-per-sync"` + WALDir string `koanf:"wal-dir"` + WALMinSyncInterval int `koanf:"wal-min-sync-interval"` + TargetByteDeletionRate int `koanf:"target-byte-deletion-rate"` + + // level specific + BlockSize int `koanf:"block-size"` + IndexBlockSize int `koanf:"index-block-size"` + TargetFileSize int64 `koanf:"target-file-size"` + TargetFileSizeEqualLevels bool `koanf:"target-file-size-equal-levels"` + + // pebble experimental + L0CompactionConcurrency int `koanf:"l0-compaction-concurrency"` + CompactionDebtConcurrency uint64 `koanf:"compaction-debt-concurrency"` + ReadCompactionRate int64 `koanf:"read-compaction-rate"` + ReadSamplingMultiplier int64 `koanf:"read-sampling-multiplier"` + MaxWriterConcurrency int `koanf:"max-writer-concurrency"` + ForceWriterParallelism bool `koanf:"force-writer-parallelism"` +} + +var PebbleExperimentalConfigDefault = PebbleExperimentalConfig{ + BytesPerSync: 512 << 10, // 512 KB + L0CompactionFileThreshold: 500, + L0CompactionThreshold: 4, + L0StopWritesThreshold: 12, + LBaseMaxBytes: 64 << 20, // 64 MB + MemTableStopWritesThreshold: 2, + DisableAutomaticCompactions: false, + WALBytesPerSync: 0, // no background syncing + WALDir: "", // use same dir as for sstables + WALMinSyncInterval: 0, // no artificial delay + TargetByteDeletionRate: 0, // deletion pacing disabled + + BlockSize: 4 << 10, // 4 KB + IndexBlockSize: 4 << 10, // 4 KB + TargetFileSize: 2 << 20, // 2 MB + TargetFileSizeEqualLevels: true, + + L0CompactionConcurrency: 10, + CompactionDebtConcurrency: 1 << 30, // 1GB + ReadCompactionRate: 16000, // see ReadSamplingMultiplier comment + ReadSamplingMultiplier: -1, // geth default, disables read sampling and disables read triggered compaction + MaxWriterConcurrency: 0, + ForceWriterParallelism: false, +} + +func PebbleExperimentalConfigAddOptions(prefix string, f *flag.FlagSet) { + f.Int(prefix+".bytes-per-sync", PebbleExperimentalConfigDefault.BytesPerSync, "number of bytes to write to a SSTable before calling Sync on it in the background") + f.Int(prefix+".l0-compaction-file-threshold", PebbleExperimentalConfigDefault.L0CompactionFileThreshold, "count of L0 files necessary to trigger an L0 compaction") + f.Int(prefix+".l0-compaction-threshold", PebbleExperimentalConfigDefault.L0CompactionThreshold, "amount of L0 read-amplification necessary to trigger an L0 compaction") + f.Int(prefix+".l0-stop-writes-threshold", PebbleExperimentalConfigDefault.L0StopWritesThreshold, "hard limit on L0 read-amplification, computed as the number of L0 sublevels. Writes are stopped when this threshold is reached") + f.Int64(prefix+".l-base-max-bytes", PebbleExperimentalConfigDefault.LBaseMaxBytes, "The maximum number of bytes for LBase. The base level is the level which L0 is compacted into. The base level is determined dynamically based on the existing data in the LSM. The maximum number of bytes for other levels is computed dynamically based on the base level's maximum size. When the maximum number of bytes for a level is exceeded, compaction is requested.") + f.Int(prefix+".mem-table-stop-writes-threshold", PebbleExperimentalConfigDefault.MemTableStopWritesThreshold, "hard limit on the number of queued of MemTables") + f.Bool(prefix+".disable-automatic-compactions", PebbleExperimentalConfigDefault.DisableAutomaticCompactions, "disables automatic compactions") + f.Int(prefix+".wal-bytes-per-sync", PebbleExperimentalConfigDefault.WALBytesPerSync, "number of bytes to write to a write-ahead log (WAL) before calling Sync on it in the background") + f.String(prefix+".wal-dir", PebbleExperimentalConfigDefault.WALDir, "absolute path of directory to store write-ahead logs (WALs) in. If empty, WALs will be stored in the same directory as sstables") + f.Int(prefix+".wal-min-sync-interval", PebbleExperimentalConfigDefault.WALMinSyncInterval, "minimum duration in microseconds between syncs of the WAL. If WAL syncs are requested faster than this interval, they will be artificially delayed.") + f.Int(prefix+".target-byte-deletion-rate", PebbleExperimentalConfigDefault.TargetByteDeletionRate, "rate (in bytes per second) at which sstable file deletions are limited to (under normal circumstances).") + f.Int(prefix+".block-size", PebbleExperimentalConfigDefault.BlockSize, "target uncompressed size in bytes of each table block") + f.Int(prefix+".index-block-size", PebbleExperimentalConfigDefault.IndexBlockSize, fmt.Sprintf("target uncompressed size in bytes of each index block. When the index block size is larger than this target, two-level indexes are automatically enabled. Setting this option to a large value (such as %d) disables the automatic creation of two-level indexes.", math.MaxInt32)) + f.Int64(prefix+".target-file-size", PebbleExperimentalConfigDefault.TargetFileSize, "target file size for the level 0") + f.Bool(prefix+".target-file-size-equal-levels", PebbleExperimentalConfigDefault.TargetFileSizeEqualLevels, "if true same target-file-size will be uses for all levels, otherwise target size for layer n = 2 * target size for layer n - 1") + + f.Int(prefix+".l0-compaction-concurrency", PebbleExperimentalConfigDefault.L0CompactionConcurrency, "threshold of L0 read-amplification at which compaction concurrency is enabled (if compaction-debt-concurrency was not already exceeded). Every multiple of this value enables another concurrent compaction up to max-concurrent-compactions.") + f.Uint64(prefix+".compaction-debt-concurrency", PebbleExperimentalConfigDefault.CompactionDebtConcurrency, "controls the threshold of compaction debt at which additional compaction concurrency slots are added. For every multiple of this value in compaction debt bytes, an additional concurrent compaction is added. This works \"on top\" of l0-compaction-concurrency, so the higher of the count of compaction concurrency slots as determined by the two options is chosen.") + f.Int64(prefix+".read-compaction-rate", PebbleExperimentalConfigDefault.ReadCompactionRate, "controls the frequency of read triggered compactions by adjusting `AllowedSeeks` in manifest.FileMetadata: AllowedSeeks = FileSize / ReadCompactionRate") + f.Int64(prefix+".read-sampling-multiplier", PebbleExperimentalConfigDefault.ReadSamplingMultiplier, "a multiplier for the readSamplingPeriod in iterator.maybeSampleRead() to control the frequency of read sampling to trigger a read triggered compaction. A value of -1 prevents sampling and disables read triggered compactions. Geth default is -1. The pebble default is 1 << 4. which gets multiplied with a constant of 1 << 16 to yield 1 << 20 (1MB).") + f.Int(prefix+".max-writer-concurrency", PebbleExperimentalConfigDefault.MaxWriterConcurrency, "maximum number of compression workers the compression queue is allowed to use. If max-writer-concurrency > 0, then the Writer will use parallelism, to compress and write blocks to disk. Otherwise, the writer will compress and write blocks to disk synchronously.") + f.Bool(prefix+".force-writer-parallelism", PebbleExperimentalConfigDefault.ForceWriterParallelism, "force parallelism in the sstable Writer for the metamorphic tests. Even with the MaxWriterConcurrency option set, pebble only enables parallelism in the sstable Writer if there is enough CPU available, and this option bypasses that.") +} + +func (c *PebbleExperimentalConfig) Validate() error { + if !filepath.IsAbs(c.WALDir) { + return fmt.Errorf("invalid .wal-dir directory (%s) - has to be an absolute path", c.WALDir) + } + // TODO + return nil +} + +func (c *PebbleConfig) ExtraOptions(namespace string) *pebble.ExtraOptions { + var maxConcurrentCompactions func() int + if c.MaxConcurrentCompactions > 0 { + maxConcurrentCompactions = func() int { return c.MaxConcurrentCompactions } + } + var walMinSyncInterval func() time.Duration + if c.Experimental.WALMinSyncInterval > 0 { + walMinSyncInterval = func() time.Duration { + return time.Microsecond * time.Duration(c.Experimental.WALMinSyncInterval) + } + } + var levels []pebble.ExtraLevelOptions + for i := 0; i < 7; i++ { + targetFileSize := c.Experimental.TargetFileSize + if !c.Experimental.TargetFileSizeEqualLevels { + targetFileSize = targetFileSize << i + } + levels = append(levels, pebble.ExtraLevelOptions{ + BlockSize: c.Experimental.BlockSize, + IndexBlockSize: c.Experimental.IndexBlockSize, + TargetFileSize: targetFileSize, + }) + } + walDir := c.Experimental.WALDir + if walDir != "" { + walDir = path.Join(walDir, namespace) + } + return &pebble.ExtraOptions{ + BytesPerSync: c.Experimental.BytesPerSync, + L0CompactionFileThreshold: c.Experimental.L0CompactionFileThreshold, + L0CompactionThreshold: c.Experimental.L0CompactionThreshold, + L0StopWritesThreshold: c.Experimental.L0StopWritesThreshold, + LBaseMaxBytes: c.Experimental.LBaseMaxBytes, + MemTableStopWritesThreshold: c.Experimental.MemTableStopWritesThreshold, + MaxConcurrentCompactions: maxConcurrentCompactions, + DisableAutomaticCompactions: c.Experimental.DisableAutomaticCompactions, + WALBytesPerSync: c.Experimental.WALBytesPerSync, + WALDir: walDir, + WALMinSyncInterval: walMinSyncInterval, + TargetByteDeletionRate: c.Experimental.TargetByteDeletionRate, + Experimental: pebble.ExtraOptionsExperimental{ + L0CompactionConcurrency: c.Experimental.L0CompactionConcurrency, + CompactionDebtConcurrency: c.Experimental.CompactionDebtConcurrency, + ReadCompactionRate: c.Experimental.ReadCompactionRate, + ReadSamplingMultiplier: c.Experimental.ReadSamplingMultiplier, + MaxWriterConcurrency: c.Experimental.MaxWriterConcurrency, + ForceWriterParallelism: c.Experimental.ForceWriterParallelism, + }, + Levels: levels, + } +} diff --git a/cmd/nitro/init.go b/cmd/nitro/init.go index c52c87732c..6305c41155 100644 --- a/cmd/nitro/init.go +++ b/cmd/nitro/init.go @@ -170,15 +170,15 @@ func validateBlockChain(blockChain *core.BlockChain, chainConfig *params.ChainCo return nil } -func openInitializeChainDb(ctx context.Context, stack *node.Node, config *NodeConfig, chainId *big.Int, cacheConfig *core.CacheConfig, l1Client arbutil.L1Interface, rollupAddrs chaininfo.RollupAddresses) (ethdb.Database, *core.BlockChain, error) { +func openInitializeChainDb(ctx context.Context, stack *node.Node, config *NodeConfig, chainId *big.Int, cacheConfig *core.CacheConfig, persistentConfig *conf.PersistentConfig, l1Client arbutil.L1Interface, rollupAddrs chaininfo.RollupAddresses) (ethdb.Database, *core.BlockChain, error) { if !config.Init.Force { - if readOnlyDb, err := stack.OpenDatabaseWithFreezer("l2chaindata", 0, 0, "", "l2chaindata/", true); err == nil { + if readOnlyDb, err := stack.OpenDatabaseWithFreezerWithExtraOptions("l2chaindata", 0, 0, "", "l2chaindata/", true, persistentConfig.Pebble.ExtraOptions("l2chaindata")); err == nil { if chainConfig := gethexec.TryReadStoredChainConfig(readOnlyDb); chainConfig != nil { readOnlyDb.Close() if !arbmath.BigEquals(chainConfig.ChainID, chainId) { return nil, nil, fmt.Errorf("database has chain ID %v but config has chain ID %v (are you sure this database is for the right chain?)", chainConfig.ChainID, chainId) } - chainData, err := stack.OpenDatabaseWithFreezer("l2chaindata", config.Execution.Caching.DatabaseCache, config.Persistent.Handles, config.Persistent.Ancient, "l2chaindata/", false) + chainData, err := stack.OpenDatabaseWithFreezerWithExtraOptions("l2chaindata", config.Execution.Caching.DatabaseCache, config.Persistent.Handles, config.Persistent.Ancient, "l2chaindata/", false, persistentConfig.Pebble.ExtraOptions("l2chaindata")) if err != nil { return nil, nil, err } @@ -186,8 +186,8 @@ func openInitializeChainDb(ctx context.Context, stack *node.Node, config *NodeCo if err != nil { return nil, nil, err } - chainDb := rawdb.WrapDatabaseWithWasm(chainData, wasmDb) - err = pruning.PruneChainDb(ctx, chainDb, stack, &config.Init, cacheConfig, l1Client, rollupAddrs, config.Node.ValidatorRequired()) + chainDb := rawdb.WrapDatabaseWithWasm(chainData, wasmDb, 1) + err = pruning.PruneChainDb(ctx, chainDb, stack, &config.Init, cacheConfig, persistentConfig, l1Client, rollupAddrs, config.Node.ValidatorRequired()) if err != nil { return chainDb, nil, fmt.Errorf("error pruning: %w", err) } @@ -235,7 +235,7 @@ func openInitializeChainDb(ctx context.Context, stack *node.Node, config *NodeCo var initDataReader statetransfer.InitDataReader = nil - chainData, err := stack.OpenDatabaseWithFreezer("l2chaindata", config.Execution.Caching.DatabaseCache, config.Persistent.Handles, config.Persistent.Ancient, "l2chaindata/", false) + chainData, err := stack.OpenDatabaseWithFreezerWithExtraOptions("l2chaindata", config.Execution.Caching.DatabaseCache, config.Persistent.Handles, config.Persistent.Ancient, "l2chaindata/", false, persistentConfig.Pebble.ExtraOptions("l2chaindata")) if err != nil { return nil, nil, err } @@ -243,7 +243,7 @@ func openInitializeChainDb(ctx context.Context, stack *node.Node, config *NodeCo if err != nil { return nil, nil, err } - chainDb := rawdb.WrapDatabaseWithWasm(chainData, wasmDb) + chainDb := rawdb.WrapDatabaseWithWasm(chainData, wasmDb, 1) if config.Init.ImportFile != "" { initDataReader, err = statetransfer.NewJsonInitDataReader(config.Init.ImportFile) @@ -388,7 +388,7 @@ func openInitializeChainDb(ctx context.Context, stack *node.Node, config *NodeCo return chainDb, l2BlockChain, err } - err = pruning.PruneChainDb(ctx, chainDb, stack, &config.Init, cacheConfig, l1Client, rollupAddrs, config.Node.ValidatorRequired()) + err = pruning.PruneChainDb(ctx, chainDb, stack, &config.Init, cacheConfig, persistentConfig, l1Client, rollupAddrs, config.Node.ValidatorRequired()) if err != nil { return chainDb, nil, fmt.Errorf("error pruning: %w", err) } diff --git a/cmd/nitro/nitro.go b/cmd/nitro/nitro.go index 9280c3af02..427974b34f 100644 --- a/cmd/nitro/nitro.go +++ b/cmd/nitro/nitro.go @@ -6,6 +6,7 @@ package main import ( "context" "crypto/ecdsa" + "encoding/hex" "errors" "fmt" "io" @@ -452,7 +453,21 @@ func mainImpl() int { if len(allowedWasmModuleRoots) > 0 { moduleRootMatched := false for _, root := range allowedWasmModuleRoots { - if common.HexToHash(root) == moduleRoot { + bytes, err := hex.DecodeString(strings.TrimPrefix(root, "0x")) + if err == nil { + if common.HexToHash(root) == common.BytesToHash(bytes) { + moduleRootMatched = true + break + } + continue + } + locator, locatorErr := server_common.NewMachineLocator(root) + if locatorErr != nil { + log.Warn("allowed-wasm-module-roots: value not a hex nor valid path:", "value", root, "locatorErr", locatorErr, "decodeErr", err) + continue + } + path := locator.GetMachinePath(moduleRoot) + if _, err := os.Stat(path); err == nil { moduleRootMatched = true break } @@ -476,7 +491,7 @@ func mainImpl() int { } } - chainDb, l2BlockChain, err := openInitializeChainDb(ctx, stack, nodeConfig, new(big.Int).SetUint64(nodeConfig.Chain.ID), gethexec.DefaultCacheConfigFor(stack, &nodeConfig.Execution.Caching), l1Client, rollupAddrs) + chainDb, l2BlockChain, err := openInitializeChainDb(ctx, stack, nodeConfig, new(big.Int).SetUint64(nodeConfig.Chain.ID), gethexec.DefaultCacheConfigFor(stack, &nodeConfig.Execution.Caching), &nodeConfig.Persistent, l1Client, rollupAddrs) if l2BlockChain != nil { deferFuncs = append(deferFuncs, func() { l2BlockChain.Stop() }) } @@ -487,13 +502,32 @@ func mainImpl() int { return 1 } - arbDb, err := stack.OpenDatabase("arbitrumdata", 0, 0, "arbitrumdata/", false) + arbDb, err := stack.OpenDatabaseWithExtraOptions("arbitrumdata", 0, 0, "arbitrumdata/", false, nodeConfig.Persistent.Pebble.ExtraOptions("arbitrumdata")) deferFuncs = append(deferFuncs, func() { closeDb(arbDb, "arbDb") }) if err != nil { log.Error("failed to open database", "err", err) return 1 } + fatalErrChan := make(chan error, 10) + + var blocksReExecutor *blocksreexecutor.BlocksReExecutor + if nodeConfig.BlocksReExecutor.Enable && l2BlockChain != nil { + blocksReExecutor = blocksreexecutor.New(&nodeConfig.BlocksReExecutor, l2BlockChain, fatalErrChan) + if nodeConfig.Init.ThenQuit { + success := make(chan struct{}) + blocksReExecutor.Start(ctx, success) + deferFuncs = append(deferFuncs, func() { blocksReExecutor.StopAndWait() }) + select { + case err := <-fatalErrChan: + log.Error("shutting down due to fatal error", "err", err) + defer log.Error("shut down due to fatal error", "err", err) + return 1 + case <-success: + } + } + } + if nodeConfig.Init.ThenQuit && nodeConfig.Init.ResetToMessage < 0 { return 0 } @@ -514,8 +548,6 @@ func mainImpl() int { return 1 } - fatalErrChan := make(chan error, 10) - var valNode *valnode.ValidationNode if sameProcessValidationNodeEnabled { valNode, err = valnode.CreateValidationNode( @@ -644,9 +676,8 @@ func mainImpl() int { // remove previous deferFuncs, StopAndWait closes database and blockchain. deferFuncs = []func(){func() { currentNode.StopAndWait() }} } - if nodeConfig.BlocksReExecutor.Enable && l2BlockChain != nil { - blocksReExecutor := blocksreexecutor.New(&nodeConfig.BlocksReExecutor, l2BlockChain, fatalErrChan) - blocksReExecutor.Start(ctx) + if blocksReExecutor != nil && !nodeConfig.Init.ThenQuit { + blocksReExecutor.Start(ctx, nil) deferFuncs = append(deferFuncs, func() { blocksReExecutor.StopAndWait() }) } diff --git a/cmd/pruning/pruning.go b/cmd/pruning/pruning.go index 3ef888e897..1b25811d6a 100644 --- a/cmd/pruning/pruning.go +++ b/cmd/pruning/pruning.go @@ -80,12 +80,12 @@ func (r *importantRoots) addHeader(header *types.Header, overwrite bool) error { var hashListRegex = regexp.MustCompile("^(0x)?[0-9a-fA-F]{64}(,(0x)?[0-9a-fA-F]{64})*$") // Finds important roots to retain while proving -func findImportantRoots(ctx context.Context, chainDb ethdb.Database, stack *node.Node, initConfig *conf.InitConfig, cacheConfig *core.CacheConfig, l1Client arbutil.L1Interface, rollupAddrs chaininfo.RollupAddresses, validatorRequired bool) ([]common.Hash, error) { +func findImportantRoots(ctx context.Context, chainDb ethdb.Database, stack *node.Node, initConfig *conf.InitConfig, cacheConfig *core.CacheConfig, persistentConfig *conf.PersistentConfig, l1Client arbutil.L1Interface, rollupAddrs chaininfo.RollupAddresses, validatorRequired bool) ([]common.Hash, error) { chainConfig := gethexec.TryReadStoredChainConfig(chainDb) if chainConfig == nil { return nil, errors.New("database doesn't have a chain config (was this node initialized?)") } - arbDb, err := stack.OpenDatabase("arbitrumdata", 0, 0, "arbitrumdata/", true) + arbDb, err := stack.OpenDatabaseWithExtraOptions("arbitrumdata", 0, 0, "arbitrumdata/", true, persistentConfig.Pebble.ExtraOptions("arbitrumdata")) if err != nil { return nil, err } @@ -232,11 +232,11 @@ func findImportantRoots(ctx context.Context, chainDb ethdb.Database, stack *node return roots.roots, nil } -func PruneChainDb(ctx context.Context, chainDb ethdb.Database, stack *node.Node, initConfig *conf.InitConfig, cacheConfig *core.CacheConfig, l1Client arbutil.L1Interface, rollupAddrs chaininfo.RollupAddresses, validatorRequired bool) error { +func PruneChainDb(ctx context.Context, chainDb ethdb.Database, stack *node.Node, initConfig *conf.InitConfig, cacheConfig *core.CacheConfig, persistentConfig *conf.PersistentConfig, l1Client arbutil.L1Interface, rollupAddrs chaininfo.RollupAddresses, validatorRequired bool) error { if initConfig.Prune == "" { return pruner.RecoverPruning(stack.InstanceDir(), chainDb) } - root, err := findImportantRoots(ctx, chainDb, stack, initConfig, cacheConfig, l1Client, rollupAddrs, validatorRequired) + root, err := findImportantRoots(ctx, chainDb, stack, initConfig, cacheConfig, persistentConfig, l1Client, rollupAddrs, validatorRequired) if err != nil { return fmt.Errorf("failed to find root to retain for pruning: %w", err) } diff --git a/execution/gethexec/blockchain.go b/execution/gethexec/blockchain.go index 2a20c3da26..1d5060ca8a 100644 --- a/execution/gethexec/blockchain.go +++ b/execution/gethexec/blockchain.go @@ -37,6 +37,7 @@ type CachingConfig struct { SnapshotRestoreGasLimit uint64 `koanf:"snapshot-restore-gas-limit"` MaxNumberOfBlocksToSkipStateSaving uint32 `koanf:"max-number-of-blocks-to-skip-state-saving"` MaxAmountOfGasToSkipStateSaving uint64 `koanf:"max-amount-of-gas-to-skip-state-saving"` + StylusLRUCache uint32 `koanf:"stylus-lru-cache"` } func CachingConfigAddOptions(prefix string, f *flag.FlagSet) { @@ -51,6 +52,7 @@ func CachingConfigAddOptions(prefix string, f *flag.FlagSet) { f.Uint64(prefix+".snapshot-restore-gas-limit", DefaultCachingConfig.SnapshotRestoreGasLimit, "maximum gas rolled back to recover snapshot") f.Uint32(prefix+".max-number-of-blocks-to-skip-state-saving", DefaultCachingConfig.MaxNumberOfBlocksToSkipStateSaving, "maximum number of blocks to skip state saving to persistent storage (archive node only) -- warning: this option seems to cause issues") f.Uint64(prefix+".max-amount-of-gas-to-skip-state-saving", DefaultCachingConfig.MaxAmountOfGasToSkipStateSaving, "maximum amount of gas in blocks to skip saving state to Persistent storage (archive node only) -- warning: this option seems to cause issues") + f.Uint32(prefix+".stylus-lru-cache", DefaultCachingConfig.StylusLRUCache, "initialized stylus programs to keep in LRU cache") } var DefaultCachingConfig = CachingConfig{ @@ -65,6 +67,22 @@ var DefaultCachingConfig = CachingConfig{ SnapshotRestoreGasLimit: 300_000_000_000, MaxNumberOfBlocksToSkipStateSaving: 0, MaxAmountOfGasToSkipStateSaving: 0, + StylusLRUCache: 256, +} + +var TestCachingConfig = CachingConfig{ + Archive: false, + BlockCount: 128, + BlockAge: 30 * time.Minute, + TrieTimeLimit: time.Hour, + TrieDirtyCache: 1024, + TrieCleanCache: 600, + SnapshotCache: 400, + DatabaseCache: 2048, + SnapshotRestoreGasLimit: 300_000_000_000, + MaxNumberOfBlocksToSkipStateSaving: 0, + MaxAmountOfGasToSkipStateSaving: 0, + StylusLRUCache: 0, } // TODO remove stack from parameters as it is no longer needed here diff --git a/execution/gethexec/executionengine.go b/execution/gethexec/executionengine.go index 96dca6c63e..3ef894d402 100644 --- a/execution/gethexec/executionengine.go +++ b/execution/gethexec/executionengine.go @@ -1,6 +1,9 @@ // Copyright 2022-2024, Offchain Labs, Inc. // For license information, see https://github.com/OffchainLabs/nitro/blob/master/LICENSE +//go:build !wasm +// +build !wasm + package gethexec /* @@ -28,6 +31,7 @@ import ( "github.com/offchainlabs/nitro/arbos/arbosState" "github.com/offchainlabs/nitro/arbos/arbostypes" "github.com/offchainlabs/nitro/arbos/l1pricing" + "github.com/offchainlabs/nitro/arbos/programs" "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/execution" "github.com/offchainlabs/nitro/util/arbmath" @@ -72,6 +76,12 @@ func NewExecutionEngine(bc *core.BlockChain) (*ExecutionEngine, error) { }, nil } +func (n *ExecutionEngine) Initialize(rustCacheSize uint32) { + if rustCacheSize != 0 { + programs.ResizeWasmLruCache(rustCacheSize) + } +} + func (s *ExecutionEngine) SetRecorder(recorder *BlockRecorder) { if s.Started() { panic("trying to set recorder after start") @@ -137,8 +147,9 @@ func (s *ExecutionEngine) Reorg(count arbutil.MessageIndex, newMessages []arbost return nil, nil } + tag := s.bc.StateCache().WasmCacheTag() // reorg Rust-side VM state - C.stylus_reorg_vm(C.uint64_t(blockNum)) + C.stylus_reorg_vm(C.uint64_t(blockNum), C.uint32_t(tag)) err := s.bc.ReorgToOldBlock(targetBlock) if err != nil { diff --git a/execution/gethexec/node.go b/execution/gethexec/node.go index 458d6601c5..eb0d39d253 100644 --- a/execution/gethexec/node.go +++ b/execution/gethexec/node.go @@ -107,6 +107,7 @@ var ConfigDefault = Config{ func ConfigDefaultNonSequencerTest() *Config { config := ConfigDefault + config.Caching = TestCachingConfig config.ParentChainReader = headerreader.TestConfig config.Sequencer.Enable = false config.Forwarder = DefaultTestForwarderConfig @@ -119,6 +120,7 @@ func ConfigDefaultNonSequencerTest() *Config { func ConfigDefaultTest() *Config { config := ConfigDefault + config.Caching = TestCachingConfig config.Sequencer = TestSequencerConfig config.ParentChainReader = headerreader.TestConfig config.ForwardingTarget = "null" @@ -216,7 +218,7 @@ func CreateExecutionNode( var classicOutbox *ClassicOutboxRetriever if l2BlockChain.Config().ArbitrumChainParams.GenesisBlockNum > 0 { - classicMsgDb, err := stack.OpenDatabase("classic-msg", 0, 0, "classicmsg/", true) + classicMsgDb, err := stack.OpenDatabase("classic-msg", 0, 0, "classicmsg/", true) // TODO can we skip using ExtraOptions here? if err != nil { log.Warn("Classic Msg Database not found", "err", err) classicOutbox = nil @@ -280,6 +282,7 @@ func (n *ExecutionNode) GetL1GasPriceEstimate() (uint64, error) { } func (n *ExecutionNode) Initialize(ctx context.Context) error { + n.ExecEngine.Initialize(n.ConfigFetcher().Caching.StylusLRUCache) n.ArbInterface.Initialize(n) err := n.Backend.Start() if err != nil { diff --git a/execution/gethexec/sequencer.go b/execution/gethexec/sequencer.go index 23340594c4..dd84c352a4 100644 --- a/execution/gethexec/sequencer.go +++ b/execution/gethexec/sequencer.go @@ -4,18 +4,24 @@ package gethexec import ( + "bytes" "context" "errors" "fmt" "math" "math/big" + "os" + "path" "runtime/debug" + "runtime/pprof" + "runtime/trace" "strconv" "strings" "sync" "sync/atomic" "time" + "github.com/google/uuid" "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/execution" "github.com/offchainlabs/nitro/util/arbmath" @@ -76,6 +82,7 @@ type SequencerConfig struct { NonceFailureCacheExpiry time.Duration `koanf:"nonce-failure-cache-expiry" reload:"hot"` ExpectedSurplusSoftThreshold string `koanf:"expected-surplus-soft-threshold" reload:"hot"` ExpectedSurplusHardThreshold string `koanf:"expected-surplus-hard-threshold" reload:"hot"` + EnableProfiling bool `koanf:"enable-profiling"` expectedSurplusSoftThreshold int expectedSurplusHardThreshold int } @@ -125,6 +132,7 @@ var DefaultSequencerConfig = SequencerConfig{ NonceFailureCacheExpiry: time.Second, ExpectedSurplusSoftThreshold: "default", ExpectedSurplusHardThreshold: "default", + EnableProfiling: true, } var TestSequencerConfig = SequencerConfig{ @@ -142,6 +150,7 @@ var TestSequencerConfig = SequencerConfig{ NonceFailureCacheExpiry: time.Second, ExpectedSurplusSoftThreshold: "default", ExpectedSurplusHardThreshold: "default", + EnableProfiling: false, } func SequencerConfigAddOptions(prefix string, f *flag.FlagSet) { @@ -159,6 +168,7 @@ func SequencerConfigAddOptions(prefix string, f *flag.FlagSet) { f.Duration(prefix+".nonce-failure-cache-expiry", DefaultSequencerConfig.NonceFailureCacheExpiry, "maximum amount of time to wait for a predecessor before rejecting a tx with nonce too high") f.String(prefix+".expected-surplus-soft-threshold", DefaultSequencerConfig.ExpectedSurplusSoftThreshold, "if expected surplus is lower than this value, warnings are posted") f.String(prefix+".expected-surplus-hard-threshold", DefaultSequencerConfig.ExpectedSurplusHardThreshold, "if expected surplus is lower than this value, new incoming transactions will be denied") + f.Bool(prefix+".enable-profiling", DefaultSequencerConfig.EnableProfiling, "enable CPU profiling and tracing") } type txQueueItem struct { @@ -327,6 +337,7 @@ type Sequencer struct { expectedSurplusMutex sync.RWMutex expectedSurplus int64 expectedSurplusUpdated bool + enableProfiling bool } func NewSequencer(execEngine *ExecutionEngine, l1Reader *headerreader.HeaderReader, configFetcher SequencerConfigFetcher) (*Sequencer, error) { @@ -353,6 +364,7 @@ func NewSequencer(execEngine *ExecutionEngine, l1Reader *headerreader.HeaderRead l1Timestamp: 0, pauseChan: nil, onForwarderSet: make(chan struct{}, 1), + enableProfiling: config.EnableProfiling, } s.nonceFailures = &nonceFailureCache{ containers.NewLruCacheWithOnEvict(config.NonceCacheSize, s.onNonceFailureEvict), @@ -758,6 +770,44 @@ func (s *Sequencer) precheckNonces(queueItems []txQueueItem) []txQueueItem { return outputQueueItems } +// createBlockWithProfiling runs create block with tracing and CPU profiling +// enabled. If the block creation takes longer than 5 seconds, it keeps both +// and prints out filenames in an error log line. +func (s *Sequencer) createBlockWithProfiling(ctx context.Context) bool { + pprofBuf, traceBuf := bytes.NewBuffer(nil), bytes.NewBuffer(nil) + if err := pprof.StartCPUProfile(pprofBuf); err != nil { + log.Error("Starting CPU profiling", "error", err) + } + if err := trace.Start(traceBuf); err != nil { + log.Error("Starting tracing", "error", err) + } + start := time.Now() + res := s.createBlock(ctx) + elapsed := time.Since(start) + pprof.StopCPUProfile() + trace.Stop() + if elapsed > 2*time.Second { + writeAndLog(pprofBuf, traceBuf) + return res + } + return res +} + +func writeAndLog(pprof, trace *bytes.Buffer) { + id := uuid.NewString() + pprofFile := path.Join(os.TempDir(), id+".pprof") + if err := os.WriteFile(pprofFile, pprof.Bytes(), 0o600); err != nil { + log.Error("Creating temporary file for pprof", "fileName", pprofFile, "error", err) + return + } + traceFile := path.Join(os.TempDir(), id+".trace") + if err := os.WriteFile(traceFile, trace.Bytes(), 0o600); err != nil { + log.Error("Creating temporary file for trace", "fileName", traceFile, "error", err) + return + } + log.Debug("Block creation took longer than 5 seconds, created pprof and trace files", "pprof", pprofFile, "traceFile", traceFile) +} + func (s *Sequencer) createBlock(ctx context.Context) (returnValue bool) { var queueItems []txQueueItem var totalBatchSize int @@ -1088,7 +1138,12 @@ func (s *Sequencer) Start(ctxIn context.Context) error { s.CallIteratively(func(ctx context.Context) time.Duration { nextBlock := time.Now().Add(s.config().MaxBlockSpeed) - madeBlock := s.createBlock(ctx) + var madeBlock bool + if s.enableProfiling { + madeBlock = s.createBlockWithProfiling(ctx) + } else { + madeBlock = s.createBlock(ctx) + } if madeBlock { // Note: this may return a negative duration, but timers are fine with that (they treat negative durations as 0). return time.Until(nextBlock) diff --git a/go-ethereum b/go-ethereum index 8048ac4bed..f45f6d7560 160000 --- a/go-ethereum +++ b/go-ethereum @@ -1 +1 @@ -Subproject commit 8048ac4bed2eda18284e3c022ea5ee4cce771134 +Subproject commit f45f6d75601626daf108aa62ea6cb1549d91c528 diff --git a/nitro-testnode b/nitro-testnode index e530842e58..c334820b2d 160000 --- a/nitro-testnode +++ b/nitro-testnode @@ -1 +1 @@ -Subproject commit e530842e583e2f3543f97a71c3a7cb53f8a10814 +Subproject commit c334820b2dba6dfa4078f81ed242afbbccc19c91 diff --git a/pubsub/common.go b/pubsub/common.go new file mode 100644 index 0000000000..9f05304e46 --- /dev/null +++ b/pubsub/common.go @@ -0,0 +1,29 @@ +package pubsub + +import ( + "context" + + "github.com/ethereum/go-ethereum/log" + "github.com/go-redis/redis/v8" +) + +// CreateStream tries to create stream with given name, if it already exists +// does not return an error. +func CreateStream(ctx context.Context, streamName string, client redis.UniversalClient) error { + _, err := client.XGroupCreateMkStream(ctx, streamName, streamName, "$").Result() + if err != nil && !StreamExists(ctx, streamName, client) { + return err + } + return nil +} + +// StreamExists returns whether there are any consumer group for specified +// redis stream. +func StreamExists(ctx context.Context, streamName string, client redis.UniversalClient) bool { + got, err := client.Do(ctx, "XINFO", "STREAM", streamName).Result() + if err != nil { + log.Error("Reading redis streams", "error", err) + return false + } + return got != nil +} diff --git a/pubsub/consumer.go b/pubsub/consumer.go index 7a5078ee00..df3695606d 100644 --- a/pubsub/consumer.go +++ b/pubsub/consumer.go @@ -57,14 +57,13 @@ func NewConsumer[Request any, Response any](client redis.UniversalClient, stream if streamName == "" { return nil, fmt.Errorf("redis stream name cannot be empty") } - consumer := &Consumer[Request, Response]{ + return &Consumer[Request, Response]{ id: uuid.NewString(), client: client, redisStream: streamName, redisGroup: streamName, // There is 1-1 mapping of redis stream and consumer group. cfg: cfg, - } - return consumer, nil + }, nil } // Start starts the consumer to iteratively perform heartbeat in configured intervals. @@ -80,16 +79,36 @@ func (c *Consumer[Request, Response]) Start(ctx context.Context) { func (c *Consumer[Request, Response]) StopAndWait() { c.StopWaiter.StopAndWait() + c.deleteHeartBeat(c.GetParentContext()) } func heartBeatKey(id string) string { return fmt.Sprintf("consumer:%s:heartbeat", id) } +func (c *Consumer[Request, Response]) RedisClient() redis.UniversalClient { + return c.client +} + +func (c *Consumer[Request, Response]) StreamName() string { + return c.redisStream +} + func (c *Consumer[Request, Response]) heartBeatKey() string { return heartBeatKey(c.id) } +// deleteHeartBeat deletes the heartbeat to indicate it is being shut down. +func (c *Consumer[Request, Response]) deleteHeartBeat(ctx context.Context) { + if err := c.client.Del(ctx, c.heartBeatKey()).Err(); err != nil { + l := log.Info + if ctx.Err() != nil { + l = log.Error + } + l("Deleting heardbeat", "consumer", c.id, "error", err) + } +} + // heartBeat updates the heartBeat key indicating aliveness. func (c *Consumer[Request, Response]) heartBeat(ctx context.Context) { if err := c.client.Set(ctx, c.heartBeatKey(), time.Now().UnixMilli(), 2*c.cfg.KeepAliveTimeout).Err(); err != nil { diff --git a/pubsub/pubsub_test.go b/pubsub/pubsub_test.go index 31f6d9e20a..72504602e3 100644 --- a/pubsub/pubsub_test.go +++ b/pubsub/pubsub_test.go @@ -4,8 +4,10 @@ import ( "context" "errors" "fmt" + "os" "sort" "testing" + "time" "github.com/ethereum/go-ethereum/log" "github.com/go-redis/redis/v8" @@ -201,6 +203,7 @@ func consume(ctx context.Context, t *testing.T, consumers []*Consumer[testReques } func TestRedisProduce(t *testing.T) { + log.SetDefault(log.NewLogger(log.NewTerminalHandlerWithLevel(os.Stderr, log.LevelTrace, true))) t.Parallel() for _, tc := range []struct { name string @@ -212,7 +215,7 @@ func TestRedisProduce(t *testing.T) { }, { name: "some consumers killed, others should take over their work", - killConsumers: false, + killConsumers: true, }, } { t.Run(tc.name, func(t *testing.T) { @@ -229,6 +232,7 @@ func TestRedisProduce(t *testing.T) { // Consumer messages in every third consumer but don't ack them to check // that other consumers will claim ownership on those messages. for i := 0; i < len(consumers); i += 3 { + consumers[i].Start(ctx) if _, err := consumers[i].Consume(ctx); err != nil { t.Errorf("Error consuming message: %v", err) } @@ -236,6 +240,7 @@ func TestRedisProduce(t *testing.T) { } } + time.Sleep(time.Second) gotMessages, wantResponses := consume(ctx, t, consumers) gotResponses, err := awaitResponses(ctx, promises) if err != nil { @@ -243,7 +248,7 @@ func TestRedisProduce(t *testing.T) { } producer.StopAndWait() for _, c := range consumers { - c.StopWaiter.StopAndWait() + c.StopAndWait() } got, err := mergeValues(gotMessages) if err != nil { @@ -280,6 +285,7 @@ func TestRedisReproduceDisabled(t *testing.T) { // Consumer messages in every third consumer but don't ack them to check // that other consumers will claim ownership on those messages. for i := 0; i < len(consumers); i += 3 { + consumers[i].Start(ctx) if _, err := consumers[i].Consume(ctx); err != nil { t.Errorf("Error consuming message: %v", err) } diff --git a/scripts/split-val-entry.sh b/scripts/split-val-entry.sh index 6f56a8ec46..8e1be0f6cc 100755 --- a/scripts/split-val-entry.sh +++ b/scripts/split-val-entry.sh @@ -16,4 +16,4 @@ for port in 52000 52001; do done done echo launching nitro-node -/usr/local/bin/nitro --node.block-validator.validation-server-configs-list='[{"jwtsecret":"/tmp/nitro-val.jwt","url":"http://127.0.0.10:52000"}, {"jwtsecret":"/tmp/nitro-val.jwt","url":"http://127.0.0.10:52001"}]' "$@" +/usr/local/bin/nitro --validation.wasm.allowed-wasm-module-roots /home/user/nitro-legacy/machines,/home/user/target/machines --node.block-validator.validation-server-configs-list='[{"jwtsecret":"/tmp/nitro-val.jwt","url":"http://127.0.0.10:52000"}, {"jwtsecret":"/tmp/nitro-val.jwt","url":"http://127.0.0.10:52001"}]' "$@" diff --git a/staker/block_validator.go b/staker/block_validator.go index e494b3da10..0fea05469f 100644 --- a/staker/block_validator.go +++ b/staker/block_validator.go @@ -791,8 +791,9 @@ validationsLoop: } for _, moduleRoot := range wasmRoots { if v.chosenValidator[moduleRoot] == nil { - v.possiblyFatal(fmt.Errorf("did not find spawner for moduleRoot :%v", moduleRoot)) - continue + notFoundErr := fmt.Errorf("did not find spawner for moduleRoot :%v", moduleRoot) + v.possiblyFatal(notFoundErr) + return nil, notFoundErr } if v.chosenValidator[moduleRoot].Room() == 0 { log.Trace("advanceValidations: no more room", "moduleRoot", moduleRoot) @@ -1088,7 +1089,7 @@ func (v *BlockValidator) Initialize(ctx context.Context) error { } // First spawner is always RedisValidationClient if RedisStreams are enabled. if v.redisValidator != nil { - err := v.redisValidator.Initialize(moduleRoots) + err := v.redisValidator.Initialize(ctx, moduleRoots) if err != nil { return err } @@ -1097,13 +1098,18 @@ func (v *BlockValidator) Initialize(ctx context.Context) error { for _, root := range moduleRoots { if v.redisValidator != nil && validator.SpawnerSupportsModule(v.redisValidator, root) { v.chosenValidator[root] = v.redisValidator + log.Info("validator chosen", "WasmModuleRoot", root, "chosen", "redis") } else { for _, spawner := range v.execSpawners { if validator.SpawnerSupportsModule(spawner, root) { v.chosenValidator[root] = spawner + log.Info("validator chosen", "WasmModuleRoot", root, "chosen", spawner.Name()) break } } + if v.chosenValidator[root] == nil { + return fmt.Errorf("cannot validate WasmModuleRoot %v", root) + } } } return nil diff --git a/system_tests/block_validator_test.go b/system_tests/block_validator_test.go index dfd892a079..54046edf15 100644 --- a/system_tests/block_validator_test.go +++ b/system_tests/block_validator_test.go @@ -72,8 +72,10 @@ func testBlockValidatorSimple(t *testing.T, dasModeString string, workloadLoops redisURL := "" if useRedisStreams { redisURL = redisutil.CreateTestRedis(ctx, t) - validatorConfig.BlockValidator.RedisValidationClientConfig = redis.DefaultValidationClientConfig + validatorConfig.BlockValidator.RedisValidationClientConfig = redis.TestValidationClientConfig validatorConfig.BlockValidator.RedisValidationClientConfig.RedisURL = redisURL + } else { + validatorConfig.BlockValidator.RedisValidationClientConfig = redis.ValidationClientConfig{} } AddDefaultValNode(t, ctx, validatorConfig, !arbitrator, redisURL) diff --git a/system_tests/blocks_reexecutor_test.go b/system_tests/blocks_reexecutor_test.go index c2941ddcc4..66690d1427 100644 --- a/system_tests/blocks_reexecutor_test.go +++ b/system_tests/blocks_reexecutor_test.go @@ -45,16 +45,11 @@ func TestBlocksReExecutorModes(t *testing.T) { } } + // Reexecute blocks at mode full success := make(chan struct{}) + executorFull := blocksreexecutor.New(&blocksreexecutor.TestConfig, blockchain, feedErrChan) + executorFull.Start(ctx, success) - // Reexecute blocks at mode full - go func() { - executorFull := blocksreexecutor.New(&blocksreexecutor.TestConfig, blockchain, feedErrChan) - executorFull.StopWaiter.Start(ctx, executorFull) - executorFull.Impl(ctx) - executorFull.StopAndWait() - success <- struct{}{} - }() select { case err := <-feedErrChan: t.Errorf("error occurred: %v", err) @@ -66,15 +61,12 @@ func TestBlocksReExecutorModes(t *testing.T) { } // Reexecute blocks at mode random - go func() { - c := &blocksreexecutor.TestConfig - c.Mode = "random" - executorRandom := blocksreexecutor.New(c, blockchain, feedErrChan) - executorRandom.StopWaiter.Start(ctx, executorRandom) - executorRandom.Impl(ctx) - executorRandom.StopAndWait() - success <- struct{}{} - }() + success = make(chan struct{}) + c := &blocksreexecutor.TestConfig + c.Mode = "random" + executorRandom := blocksreexecutor.New(c, blockchain, feedErrChan) + executorRandom.Start(ctx, success) + select { case err := <-feedErrChan: t.Errorf("error occurred: %v", err) diff --git a/system_tests/common_test.go b/system_tests/common_test.go index 04b91d6a18..5e1aca3ec2 100644 --- a/system_tests/common_test.go +++ b/system_tests/common_test.go @@ -23,6 +23,7 @@ import ( "github.com/offchainlabs/nitro/arbstate/daprovider" "github.com/offchainlabs/nitro/blsSignatures" "github.com/offchainlabs/nitro/cmd/chaininfo" + "github.com/offchainlabs/nitro/cmd/conf" "github.com/offchainlabs/nitro/cmd/genericconf" "github.com/offchainlabs/nitro/das" "github.com/offchainlabs/nitro/deploy" @@ -773,12 +774,12 @@ func createL2BlockChainWithStackConfig( stack, err = node.New(stackConfig) Require(t, err) - chainData, err := stack.OpenDatabase("l2chaindata", 0, 0, "l2chaindata/", false) + chainData, err := stack.OpenDatabaseWithExtraOptions("l2chaindata", 0, 0, "l2chaindata/", false, conf.PersistentConfigDefault.Pebble.ExtraOptions("l2chaindata")) Require(t, err) wasmData, err := stack.OpenDatabase("wasm", 0, 0, "wasm/", false) Require(t, err) - chainDb := rawdb.WrapDatabaseWithWasm(chainData, wasmData) - arbDb, err := stack.OpenDatabase("arbitrumdata", 0, 0, "arbitrumdata/", false) + chainDb := rawdb.WrapDatabaseWithWasm(chainData, wasmData, 0) + arbDb, err := stack.OpenDatabaseWithExtraOptions("arbitrumdata", 0, 0, "arbitrumdata/", false, conf.PersistentConfigDefault.Pebble.ExtraOptions("arbitrumdata")) Require(t, err) initReader := statetransfer.NewMemoryInitDataReader(&l2info.ArbInitData) @@ -980,13 +981,13 @@ func Create2ndNodeWithConfig( l2stack, err := node.New(stackConfig) Require(t, err) - l2chainData, err := l2stack.OpenDatabase("l2chaindata", 0, 0, "l2chaindata/", false) + l2chainData, err := l2stack.OpenDatabaseWithExtraOptions("l2chaindata", 0, 0, "l2chaindata/", false, conf.PersistentConfigDefault.Pebble.ExtraOptions("l2chaindata")) Require(t, err) wasmData, err := l2stack.OpenDatabase("wasm", 0, 0, "wasm/", false) Require(t, err) - l2chainDb := rawdb.WrapDatabaseWithWasm(l2chainData, wasmData) + l2chainDb := rawdb.WrapDatabaseWithWasm(l2chainData, wasmData, 0) - l2arbDb, err := l2stack.OpenDatabase("arbitrumdata", 0, 0, "arbitrumdata/", false) + l2arbDb, err := l2stack.OpenDatabaseWithExtraOptions("arbitrumdata", 0, 0, "arbitrumdata/", false, conf.PersistentConfigDefault.Pebble.ExtraOptions("arbitrumdata")) Require(t, err) initReader := statetransfer.NewMemoryInitDataReader(l2InitData) diff --git a/system_tests/das_test.go b/system_tests/das_test.go index a5ce02d87b..d15c5e6ba6 100644 --- a/system_tests/das_test.go +++ b/system_tests/das_test.go @@ -26,6 +26,7 @@ import ( "github.com/offchainlabs/nitro/arbnode" "github.com/offchainlabs/nitro/arbutil" "github.com/offchainlabs/nitro/blsSignatures" + "github.com/offchainlabs/nitro/cmd/conf" "github.com/offchainlabs/nitro/cmd/genericconf" "github.com/offchainlabs/nitro/das" "github.com/offchainlabs/nitro/execution/gethexec" @@ -177,10 +178,10 @@ func TestDASRekey(t *testing.T) { l2stackA, err := node.New(stackConfig) Require(t, err) - l2chainDb, err := l2stackA.OpenDatabase("l2chaindata", 0, 0, "l2chaindata/", false) + l2chainDb, err := l2stackA.OpenDatabaseWithExtraOptions("l2chaindata", 0, 0, "l2chaindata/", false, conf.PersistentConfigDefault.Pebble.ExtraOptions("l2chaindata")) Require(t, err) - l2arbDb, err := l2stackA.OpenDatabase("arbitrumdata", 0, 0, "arbitrumdata/", false) + l2arbDb, err := l2stackA.OpenDatabaseWithExtraOptions("arbitrumdata", 0, 0, "arbitrumdata/", false, conf.PersistentConfigDefault.Pebble.ExtraOptions("arbitrumdata")) Require(t, err) l2blockchain, err := gethexec.GetBlockChain(l2chainDb, nil, chainConfig, gethexec.ConfigDefaultTest().TxLookupLimit) diff --git a/system_tests/pruning_test.go b/system_tests/pruning_test.go index 8efc8653e6..041781ac48 100644 --- a/system_tests/pruning_test.go +++ b/system_tests/pruning_test.go @@ -65,7 +65,7 @@ func TestPruning(t *testing.T) { stack, err := node.New(builder.l2StackConfig) Require(t, err) defer stack.Close() - chainDb, err := stack.OpenDatabase("l2chaindata", 0, 0, "l2chaindata/", false) + chainDb, err := stack.OpenDatabaseWithExtraOptions("l2chaindata", 0, 0, "l2chaindata/", false, conf.PersistentConfigDefault.Pebble.ExtraOptions("l2chaindata")) Require(t, err) defer chainDb.Close() chainDbEntriesBeforePruning := countStateEntries(chainDb) @@ -89,7 +89,8 @@ func TestPruning(t *testing.T) { initConfig := conf.InitConfigDefault initConfig.Prune = "full" coreCacheConfig := gethexec.DefaultCacheConfigFor(stack, &builder.execConfig.Caching) - err = pruning.PruneChainDb(ctx, chainDb, stack, &initConfig, coreCacheConfig, builder.L1.Client, *builder.L2.ConsensusNode.DeployInfo, false) + persistentConfig := conf.PersistentConfigDefault + err = pruning.PruneChainDb(ctx, chainDb, stack, &initConfig, coreCacheConfig, &persistentConfig, builder.L1.Client, *builder.L2.ConsensusNode.DeployInfo, false) Require(t, err) for _, key := range testKeys { diff --git a/system_tests/recreatestate_rpc_test.go b/system_tests/recreatestate_rpc_test.go index 777ed17961..bf321808de 100644 --- a/system_tests/recreatestate_rpc_test.go +++ b/system_tests/recreatestate_rpc_test.go @@ -449,7 +449,7 @@ func testSkippingSavingStateAndRecreatingAfterRestart(t *testing.T, cacheConfig } func TestSkippingSavingStateAndRecreatingAfterRestart(t *testing.T) { - cacheConfig := gethexec.DefaultCachingConfig + cacheConfig := gethexec.TestCachingConfig cacheConfig.Archive = true cacheConfig.SnapshotCache = 0 // disable snapshots cacheConfig.BlockAge = 0 // use only Caching.BlockCount to keep only last N blocks in dirties cache, no matter how new they are diff --git a/system_tests/staterecovery_test.go b/system_tests/staterecovery_test.go index 632e748da8..1dd4be2444 100644 --- a/system_tests/staterecovery_test.go +++ b/system_tests/staterecovery_test.go @@ -9,6 +9,7 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/node" "github.com/ethereum/go-ethereum/trie" + "github.com/offchainlabs/nitro/cmd/conf" "github.com/offchainlabs/nitro/cmd/staterecovery" "github.com/offchainlabs/nitro/execution/gethexec" ) @@ -49,10 +50,10 @@ func TestRectreateMissingStates(t *testing.T) { stack, err := node.New(builder.l2StackConfig) Require(t, err) defer stack.Close() - chainDb, err := stack.OpenDatabase("l2chaindata", 0, 0, "l2chaindata/", false) + chainDb, err := stack.OpenDatabaseWithExtraOptions("l2chaindata", 0, 0, "l2chaindata/", false, conf.PersistentConfigDefault.Pebble.ExtraOptions("l2chaindata")) Require(t, err) defer chainDb.Close() - cacheConfig := gethexec.DefaultCacheConfigFor(stack, &gethexec.DefaultCachingConfig) + cacheConfig := gethexec.DefaultCacheConfigFor(stack, &gethexec.TestCachingConfig) bc, err := gethexec.GetBlockChain(chainDb, cacheConfig, builder.chainConfig, builder.execConfig.TxLookupLimit) Require(t, err) err = staterecovery.RecreateMissingStates(chainDb, bc, cacheConfig, 1) diff --git a/util/rpcclient/rpcclient.go b/util/rpcclient/rpcclient.go index 02b41cf15d..56aebef396 100644 --- a/util/rpcclient/rpcclient.go +++ b/util/rpcclient/rpcclient.go @@ -21,14 +21,15 @@ import ( ) type ClientConfig struct { - URL string `json:"url,omitempty" koanf:"url"` - JWTSecret string `json:"jwtsecret,omitempty" koanf:"jwtsecret"` - Timeout time.Duration `json:"timeout,omitempty" koanf:"timeout" reload:"hot"` - Retries uint `json:"retries,omitempty" koanf:"retries" reload:"hot"` - ConnectionWait time.Duration `json:"connection-wait,omitempty" koanf:"connection-wait"` - ArgLogLimit uint `json:"arg-log-limit,omitempty" koanf:"arg-log-limit" reload:"hot"` - RetryErrors string `json:"retry-errors,omitempty" koanf:"retry-errors" reload:"hot"` - RetryDelay time.Duration `json:"retry-delay,omitempty" koanf:"retry-delay"` + URL string `json:"url,omitempty" koanf:"url"` + JWTSecret string `json:"jwtsecret,omitempty" koanf:"jwtsecret"` + Timeout time.Duration `json:"timeout,omitempty" koanf:"timeout" reload:"hot"` + Retries uint `json:"retries,omitempty" koanf:"retries" reload:"hot"` + ConnectionWait time.Duration `json:"connection-wait,omitempty" koanf:"connection-wait"` + ArgLogLimit uint `json:"arg-log-limit,omitempty" koanf:"arg-log-limit" reload:"hot"` + RetryErrors string `json:"retry-errors,omitempty" koanf:"retry-errors" reload:"hot"` + RetryDelay time.Duration `json:"retry-delay,omitempty" koanf:"retry-delay"` + WebsocketMessageSizeLimit int64 `json:"websocket-message-size-limit,omitempty" koanf:"websocket-message-size-limit"` retryErrors *regexp.Regexp } @@ -46,16 +47,18 @@ func (c *ClientConfig) Validate() error { type ClientConfigFetcher func() *ClientConfig var TestClientConfig = ClientConfig{ - URL: "self", - JWTSecret: "", + URL: "self", + JWTSecret: "", + WebsocketMessageSizeLimit: 256 * 1024 * 1024, } var DefaultClientConfig = ClientConfig{ - URL: "self-auth", - JWTSecret: "", - Retries: 3, - RetryErrors: "websocket: close.*|dial tcp .*|.*i/o timeout|.*connection reset by peer|.*connection refused", - ArgLogLimit: 2048, + URL: "self-auth", + JWTSecret: "", + Retries: 3, + RetryErrors: "websocket: close.*|dial tcp .*|.*i/o timeout|.*connection reset by peer|.*connection refused", + ArgLogLimit: 2048, + WebsocketMessageSizeLimit: 256 * 1024 * 1024, } func RPCClientAddOptions(prefix string, f *flag.FlagSet, defaultConfig *ClientConfig) { @@ -67,6 +70,7 @@ func RPCClientAddOptions(prefix string, f *flag.FlagSet, defaultConfig *ClientCo f.Uint(prefix+".retries", defaultConfig.Retries, "number of retries in case of failure(0 mean one attempt)") f.String(prefix+".retry-errors", defaultConfig.RetryErrors, "Errors matching this regular expression are automatically retried") f.Duration(prefix+".retry-delay", defaultConfig.RetryDelay, "delay between retries") + f.Int64(prefix+".websocket-message-size-limit", defaultConfig.WebsocketMessageSizeLimit, "websocket message size limit used by the RPC client. 0 means no limit") } type RpcClient struct { @@ -256,9 +260,9 @@ func (c *RpcClient) Start(ctx_in context.Context) error { var err error var client *rpc.Client if jwt == nil { - client, err = rpc.DialContext(ctx, url) + client, err = rpc.DialOptions(ctx, url, rpc.WithWebsocketMessageSizeLimit(c.config().WebsocketMessageSizeLimit)) } else { - client, err = rpc.DialOptions(ctx, url, rpc.WithHTTPAuth(node.NewJWTAuth([32]byte(*jwt)))) + client, err = rpc.DialOptions(ctx, url, rpc.WithHTTPAuth(node.NewJWTAuth([32]byte(*jwt))), rpc.WithWebsocketMessageSizeLimit(c.config().WebsocketMessageSizeLimit)) } cancelCtx() if err == nil { diff --git a/validator/client/redis/producer.go b/validator/client/redis/producer.go index 1055d93968..41ae100954 100644 --- a/validator/client/redis/producer.go +++ b/validator/client/redis/producer.go @@ -23,6 +23,7 @@ type ValidationClientConfig struct { Room int32 `koanf:"room"` RedisURL string `koanf:"redis-url"` ProducerConfig pubsub.ProducerConfig `koanf:"producer-config"` + CreateStreams bool `koanf:"create-streams"` } func (c ValidationClientConfig) Enabled() bool { @@ -34,6 +35,7 @@ var DefaultValidationClientConfig = ValidationClientConfig{ Room: 2, RedisURL: "", ProducerConfig: pubsub.DefaultProducerConfig, + CreateStreams: true, } var TestValidationClientConfig = ValidationClientConfig{ @@ -41,12 +43,14 @@ var TestValidationClientConfig = ValidationClientConfig{ Room: 2, RedisURL: "", ProducerConfig: pubsub.TestProducerConfig, + CreateStreams: false, } func ValidationClientConfigAddOptions(prefix string, f *pflag.FlagSet) { f.String(prefix+".name", DefaultValidationClientConfig.Name, "validation client name") f.Int32(prefix+".room", DefaultValidationClientConfig.Room, "validation client room") pubsub.ProducerAddConfigAddOptions(prefix+".producer-config", f) + f.Bool(prefix+".create-streams", DefaultValidationClientConfig.CreateStreams, "create redis streams if it does not exist") } // ValidationClient implements validation client through redis streams. @@ -59,6 +63,7 @@ type ValidationClient struct { producerConfig pubsub.ProducerConfig redisClient redis.UniversalClient moduleRoots []common.Hash + createStreams bool } func NewValidationClient(cfg *ValidationClientConfig) (*ValidationClient, error) { @@ -75,11 +80,17 @@ func NewValidationClient(cfg *ValidationClientConfig) (*ValidationClient, error) producers: make(map[common.Hash]*pubsub.Producer[*validator.ValidationInput, validator.GoGlobalState]), producerConfig: cfg.ProducerConfig, redisClient: redisClient, + createStreams: cfg.CreateStreams, }, nil } -func (c *ValidationClient) Initialize(moduleRoots []common.Hash) error { +func (c *ValidationClient) Initialize(ctx context.Context, moduleRoots []common.Hash) error { for _, mr := range moduleRoots { + if c.createStreams { + if err := pubsub.CreateStream(ctx, server_api.RedisStreamForRoot(mr), c.redisClient); err != nil { + return fmt.Errorf("creating redis stream: %w", err) + } + } if _, exists := c.producers[mr]; exists { log.Warn("Producer already existsw for module root", "hash", mr) continue diff --git a/validator/server_common/machine_locator.go b/validator/server_common/machine_locator.go index 28093c30f0..71f6af60b6 100644 --- a/validator/server_common/machine_locator.go +++ b/validator/server_common/machine_locator.go @@ -58,7 +58,7 @@ func NewMachineLocator(rootPath string) (*MachineLocator, error) { for _, dir := range dirs { fInfo, err := os.Stat(dir) if err != nil { - log.Warn("Getting file info", "error", err) + log.Warn("Getting file info", "dir", dir, "error", err) continue } if !fInfo.IsDir() { diff --git a/validator/valnode/redis/consumer.go b/validator/valnode/redis/consumer.go index 1cadaf7c9a..3569e78b5c 100644 --- a/validator/valnode/redis/consumer.go +++ b/validator/valnode/redis/consumer.go @@ -22,7 +22,8 @@ type ValidationServer struct { spawner validator.ValidationSpawner // consumers stores moduleRoot to consumer mapping. - consumers map[common.Hash]*pubsub.Consumer[*validator.ValidationInput, validator.GoGlobalState] + consumers map[common.Hash]*pubsub.Consumer[*validator.ValidationInput, validator.GoGlobalState] + streamTimeout time.Duration } func NewValidationServer(cfg *ValidationServerConfig, spawner validator.ValidationSpawner) (*ValidationServer, error) { @@ -43,39 +44,83 @@ func NewValidationServer(cfg *ValidationServerConfig, spawner validator.Validati consumers[mr] = c } return &ValidationServer{ - consumers: consumers, - spawner: spawner, + consumers: consumers, + spawner: spawner, + streamTimeout: cfg.StreamTimeout, }, nil } func (s *ValidationServer) Start(ctx_in context.Context) { s.StopWaiter.Start(ctx_in, s) + // Channel that all consumers use to indicate their readiness. + readyStreams := make(chan struct{}, len(s.consumers)) for moduleRoot, c := range s.consumers { c := c + moduleRoot := moduleRoot c.Start(ctx_in) - s.StopWaiter.CallIteratively(func(ctx context.Context) time.Duration { - req, err := c.Consume(ctx) - if err != nil { - log.Error("Consuming request", "error", err) - return 0 + // Channel for single consumer, once readiness is indicated in this, + // consumer will start consuming iteratively. + ready := make(chan struct{}, 1) + s.StopWaiter.LaunchThread(func(ctx context.Context) { + for { + if pubsub.StreamExists(ctx, c.StreamName(), c.RedisClient()) { + ready <- struct{}{} + readyStreams <- struct{}{} + return + } + select { + case <-ctx.Done(): + log.Info("Context done", "error", ctx.Err().Error()) + return + case <-time.After(time.Millisecond * 100): + } } - if req == nil { - // There's nothing in the queue. - return time.Second - } - valRun := s.spawner.Launch(req.Value, moduleRoot) - res, err := valRun.Await(ctx) - if err != nil { - log.Error("Error validating", "request value", req.Value, "error", err) - return 0 - } - if err := c.SetResult(ctx, req.ID, res); err != nil { - log.Error("Error setting result for request", "id", req.ID, "result", res, "error", err) - return 0 + }) + s.StopWaiter.LaunchThread(func(ctx context.Context) { + select { + case <-ctx.Done(): + log.Info("Context done", "error", ctx.Err().Error()) + return + case <-ready: // Wait until the stream exists and start consuming iteratively. } - return time.Second + s.StopWaiter.CallIteratively(func(ctx context.Context) time.Duration { + req, err := c.Consume(ctx) + if err != nil { + log.Error("Consuming request", "error", err) + return 0 + } + if req == nil { + // There's nothing in the queue. + return time.Second + } + valRun := s.spawner.Launch(req.Value, moduleRoot) + res, err := valRun.Await(ctx) + if err != nil { + log.Error("Error validating", "request value", req.Value, "error", err) + return 0 + } + if err := c.SetResult(ctx, req.ID, res); err != nil { + log.Error("Error setting result for request", "id", req.ID, "result", res, "error", err) + return 0 + } + return time.Second + }) }) } + s.StopWaiter.LaunchThread(func(ctx context.Context) { + for { + select { + case <-readyStreams: + log.Trace("At least one stream is ready") + return // Don't block Start if at least one of the stream is ready. + case <-time.After(s.streamTimeout): + log.Error("Waiting for redis streams timed out") + case <-ctx.Done(): + log.Info(("Context expired, failed to start")) + return + } + } + }) } type ValidationServerConfig struct { @@ -83,23 +128,28 @@ type ValidationServerConfig struct { ConsumerConfig pubsub.ConsumerConfig `koanf:"consumer-config"` // Supported wasm module roots. ModuleRoots []string `koanf:"module-roots"` + // Timeout on polling for existence of each redis stream. + StreamTimeout time.Duration `koanf:"stream-timeout"` } var DefaultValidationServerConfig = ValidationServerConfig{ RedisURL: "", ConsumerConfig: pubsub.DefaultConsumerConfig, ModuleRoots: []string{}, + StreamTimeout: 10 * time.Minute, } var TestValidationServerConfig = ValidationServerConfig{ RedisURL: "", ConsumerConfig: pubsub.TestConsumerConfig, ModuleRoots: []string{}, + StreamTimeout: time.Minute, } func ValidationServerConfigAddOptions(prefix string, f *pflag.FlagSet) { pubsub.ConsumerConfigAddOptions(prefix+".consumer-config", f) f.StringSlice(prefix+".module-roots", nil, "Supported module root hashes") + f.Duration(prefix+".stream-timeout", DefaultValidationServerConfig.StreamTimeout, "Timeout on polling for existence of redis streams") } func (cfg *ValidationServerConfig) Enabled() bool { diff --git a/validator/valnode/valnode.go b/validator/valnode/valnode.go index 93a5b37238..972e11189d 100644 --- a/validator/valnode/valnode.go +++ b/validator/valnode/valnode.go @@ -25,7 +25,7 @@ type WasmConfig struct { func WasmConfigAddOptions(prefix string, f *pflag.FlagSet) { f.String(prefix+".root-path", DefaultWasmConfig.RootPath, "path to machine folders, each containing wasm files (machine.wavm.br, replay.wasm)") f.Bool(prefix+".enable-wasmroots-check", DefaultWasmConfig.EnableWasmrootsCheck, "enable check for compatibility of on-chain WASM module root with node") - f.StringSlice(prefix+".allowed-wasm-module-roots", DefaultWasmConfig.AllowedWasmModuleRoots, "list of WASM module roots to check if the on-chain WASM module root belongs to on node startup") + f.StringSlice(prefix+".allowed-wasm-module-roots", DefaultWasmConfig.AllowedWasmModuleRoots, "list of WASM module roots or mahcine base paths to match against on-chain WasmModuleRoot") } var DefaultWasmConfig = WasmConfig{