diff --git a/Cargo.lock b/Cargo.lock index 07887f3..bf73280 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5240,6 +5240,7 @@ dependencies = [ "reth-primitives", "reth-trie", "revm-primitives", + "rsp-mpt", "rsp-primitives", "tracing-subscriber", ] @@ -5270,9 +5271,11 @@ dependencies = [ "alloy-rpc-types", "alloy-transport", "futures", + "rayon", "reth-primitives", "reth-revm", "reth-storage-errors", + "reth-trie", "revm-primitives", "rsp-primitives", "rsp-witness-db", diff --git a/Cargo.toml b/Cargo.toml index 3eec31f..fa6a991 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,7 @@ futures = "0.3" url = "2.3" thiserror = "1.0.61" hex-literal = "0.4.1" +rayon = "1.10.0" # workspace rsp-rpc-db = { path = "./crates/storage/rpc-db" } diff --git a/README.md b/README.md index c69e65b..6f19a98 100644 --- a/README.md +++ b/README.md @@ -22,9 +22,9 @@ and the command `rsp` will be installed. ### RPC Node Requirement -RSP fetches block and state data from a JSON-RPC node. **But, you must use a RPC node that supports the `debug_dbGet` endpoint.** +RSP fetches block and state data from a JSON-RPC node. It's recommended that you use a RPC node that supports the `debug_dbGet` endpoint. -This is required because in some cases the host needs to recover the preimage of a [Merkle Patricia Trie](https://ethereum.org/en/developers/docs/data-structures-and-encoding/patricia-merkle-trie/) node that's referenced by hash. To do this, the host utilizes the [`debug_dbGet` endpoint](https://geth.ethereum.org/docs/interacting-with-geth/rpc/ns-debug#debugdbget) of a Geth node running with options `--state.scheme=hash`, which is the default, and `--gcmode=archive`. An example command for running the node is: +This is recommended because in some cases the host needs to recover the preimage of a [Merkle Patricia Trie](https://ethereum.org/en/developers/docs/data-structures-and-encoding/patricia-merkle-trie/) node that's referenced by hash. To do this, the host utilizes the [`debug_dbGet` endpoint](https://geth.ethereum.org/docs/interacting-with-geth/rpc/ns-debug#debugdbget) of a Geth node running with options `--state.scheme=hash`, which is the default, and `--gcmode=archive`. An example command for running the node is: ```bash geth \ @@ -33,7 +33,7 @@ geth \ --http.api=eth,debug ``` -When running the host CLI or integration tests, **make sure to use an RPC URL pointing to a Geth node running with said options**, or errors will arise when preimage recovery is needed. You can reach out to the Succinct team to access an RPC URL that supports this endpoint. +However, in the absence of the `debug_dbGet` method, the host is able to fall back to a less efficient process of recovering the preimages via the standard `eth_getProof`. The fallback works in most cases but not all, so if you encounter a preimage recovery failure, you can reach out to the Succinct team to access an RPC URL that supports `debug_dbGet`. > [!TIP] > diff --git a/crates/executor/host/Cargo.toml b/crates/executor/host/Cargo.toml index 87eb415..663726d 100644 --- a/crates/executor/host/Cargo.toml +++ b/crates/executor/host/Cargo.toml @@ -22,7 +22,7 @@ tracing.workspace = true rsp-rpc-db.workspace = true rsp-witness-db.workspace = true rsp-client-executor.workspace = true -rsp-mpt.workspace = true +rsp-mpt = { workspace = true, features = ["preimage_context"] } rsp-primitives.workspace = true # reth diff --git a/crates/mpt/Cargo.toml b/crates/mpt/Cargo.toml index af7e4d1..bbe06a0 100644 --- a/crates/mpt/Cargo.toml +++ b/crates/mpt/Cargo.toml @@ -34,3 +34,9 @@ alloy-rpc-types.workspace = true alloy-trie.workspace = true hex-literal.workspace = true tracing-subscriber = "0.3.18" + +rsp-mpt = { path = ".", features = ["preimage_context"] } + +[features] +default = [] +preimage_context = [] diff --git a/crates/mpt/src/lib.rs b/crates/mpt/src/lib.rs index acc97d4..3d676fc 100644 --- a/crates/mpt/src/lib.rs +++ b/crates/mpt/src/lib.rs @@ -13,6 +13,18 @@ use reth_trie::{ use revm_primitives::{keccak256, HashMap}; use rsp_primitives::storage::ExtDatabaseRef; +#[cfg(feature = "preimage_context")] +use rsp_primitives::storage::PreimageContext; + +/// Additional context for preimage recovery when calculating trie root. `Some` when calculating +/// storage trie root and `None` when calculating state trie root. +#[cfg(feature = "preimage_context")] +type RootContext = Option
; + +/// No additional context is needed since the `preimage_context` feature is disabled. +#[cfg(not(feature = "preimage_context"))] +type RootContext = (); + /// Computes the state root of a block's Merkle Patricia Trie given an [ExecutionOutcome] and a list /// of [EIP1186AccountProofResponse] storage proofs. pub fn compute_state_root( @@ -57,6 +69,11 @@ where let root = if proof.storage_proofs.is_empty() { proof.storage_root } else { + #[cfg(feature = "preimage_context")] + let context = Some(address); + #[cfg(not(feature = "preimage_context"))] + let context = (); + compute_root_from_proofs( storage_prefix_sets.freeze().iter().map(|storage_nibbles| { let hashed_slot = B256::from_slice(&storage_nibbles.pack()); @@ -75,11 +92,17 @@ where (storage_nibbles.clone(), encoded, storage_proof.proof.clone()) }), db, + context, )? }; storage_roots.insert(hashed_address, root); } + #[cfg(feature = "preimage_context")] + let context = None; + #[cfg(not(feature = "preimage_context"))] + let context = (); + // Compute the state root of the entire trie. let mut rlp_buf = Vec::with_capacity(128); compute_root_from_proofs( @@ -101,6 +124,7 @@ where (account_nibbles.clone(), encoded, proof.proof.clone()) }), db, + context, ) } @@ -108,6 +132,7 @@ where fn compute_root_from_proofs( items: impl IntoIterator>, Vec)>, db: &DB, + #[allow(unused)] root_context: RootContext, ) -> eyre::Result where DB: ExtDatabaseRef, @@ -277,7 +302,16 @@ where // technically have to modify this branch node, but the `alloy-trie` hash // builder handles this automatically when supplying child nodes. + #[cfg(feature = "preimage_context")] + let preimage = db + .trie_node_ref_with_context( + branch_hash, + PreimageContext { address: &root_context, branch_path: &path }, + ) + .unwrap(); + #[cfg(not(feature = "preimage_context"))] let preimage = db.trie_node_ref(branch_hash).unwrap(); + match TrieNode::decode(&mut &preimage[..]).unwrap() { TrieNode::Branch(_) => { // This node is a branch node that's referenced by hash. There's no need @@ -373,6 +407,14 @@ mod tests { panic!("missing preimage for test") } + + fn trie_node_ref_with_context( + &self, + hash: B256, + _context: PreimageContext<'_>, + ) -> Result { + self.trie_node_ref(hash) + } } #[test] @@ -468,6 +510,7 @@ cb10a951f0e82cf2e461b98c4e5afb0348ccab5bb42180808080808080808080808080" ], )], &TestTrieDb::new(), + None, ) .unwrap(); @@ -586,6 +629,7 @@ f2e461b98c4e5afb0348ccab5bb421808080808080808080808080" ), ], &TestTrieDb::new(), + None, ) .unwrap(); @@ -652,6 +696,7 @@ f2e461b98c4e5afb0348ccab5bb421808080808080808080808080" ), ], &TestTrieDb::new(), + None, ) .unwrap(); @@ -712,6 +757,7 @@ f2e461b98c4e5afb0348ccab5bb421808080808080808080808080" ), ], &TestTrieDb::new(), + None, ) .unwrap(); @@ -756,6 +802,7 @@ f2e461b98c4e5afb0348ccab5bb421808080808080808080808080" ], )], &TestTrieDb::new(), + None, ) .unwrap(); diff --git a/crates/primitives/src/storage.rs b/crates/primitives/src/storage.rs index 4b8ac42..008ca9c 100644 --- a/crates/primitives/src/storage.rs +++ b/crates/primitives/src/storage.rs @@ -1,4 +1,5 @@ -use reth_primitives::{Bytes, B256}; +use reth_primitives::{Address, Bytes, B256}; +use reth_trie::Nibbles; /// Custom database access methods implemented by RSP storage backends. pub trait ExtDatabaseRef { @@ -7,4 +8,22 @@ pub trait ExtDatabaseRef { /// Gets the preimage of a trie node given its Keccak hash. fn trie_node_ref(&self, hash: B256) -> Result; + + /// Gets the preimage of a trie node given its Keccak hash, with additional context that could + /// be helpful when the program is not running in a constrained environment. + fn trie_node_ref_with_context( + &self, + hash: B256, + context: PreimageContext, + ) -> Result; +} + +/// Additional context for retrieving trie node preimages. These are useful when the JSON-RPC node +/// does not serve the `debug_dbGet`. +pub struct PreimageContext<'a> { + /// The account address if calculating a storage trie root; `None` if calculating the state + /// root. + pub address: &'a Option
, + /// The trie key path of the branch child containing the hash whose preimage is being fetched. + pub branch_path: &'a Nibbles, } diff --git a/crates/storage/rpc-db/Cargo.toml b/crates/storage/rpc-db/Cargo.toml index b507df6..403b490 100644 --- a/crates/storage/rpc-db/Cargo.toml +++ b/crates/storage/rpc-db/Cargo.toml @@ -12,6 +12,7 @@ tokio.workspace = true futures.workspace = true thiserror.workspace = true tracing.workspace = true +rayon.workspace = true # workspace rsp-witness-db.workspace = true @@ -21,6 +22,7 @@ rsp-primitives.workspace = true reth-primitives.workspace = true reth-storage-errors.workspace = true reth-revm.workspace = true +reth-trie.workspace = true # revm revm-primitives.workspace = true diff --git a/crates/storage/rpc-db/src/lib.rs b/crates/storage/rpc-db/src/lib.rs index 49a76a8..b5865a2 100644 --- a/crates/storage/rpc-db/src/lib.rs +++ b/crates/storage/rpc-db/src/lib.rs @@ -1,19 +1,28 @@ -use std::{cell::RefCell, marker::PhantomData}; +use std::{cell::RefCell, iter::once, marker::PhantomData}; use alloy_provider::Provider; use alloy_rpc_types::BlockId; use alloy_transport::Transport; use futures::future::join_all; +use rayon::prelude::*; use reth_primitives::{ revm_primitives::{AccountInfo, Bytecode}, Address, Bytes, B256, U256, }; use reth_revm::DatabaseRef; use reth_storage_errors::{db::DatabaseError, provider::ProviderError}; -use revm_primitives::{HashMap, HashSet}; -use rsp_primitives::{account_proof::AccountProofWithBytecode, storage::ExtDatabaseRef}; +use reth_trie::Nibbles; +use revm_primitives::{keccak256, HashMap, HashSet}; +use rsp_primitives::{ + account_proof::AccountProofWithBytecode, + storage::{ExtDatabaseRef, PreimageContext}, +}; use rsp_witness_db::WitnessDb; +/// The maximum number of addresses/slots to attempt for brute-forcing the key to be used for +/// fetching trie node preimage via `eth_getProof`. +const BRUTE_FORCE_LIMIT: u64 = 0xffffffff_u64; + /// A database that fetches data from a [Provider] over a [Transport]. #[derive(Debug, Clone)] pub struct RpcDb { @@ -42,6 +51,8 @@ pub enum RpcDbError { RpcError(String), #[error("failed to find block")] BlockNotFound, + #[error("failed to find trie node preimage")] + PreimageNotFound, } impl + Clone> RpcDb { @@ -138,16 +149,33 @@ impl + Clone> RpcDb { } /// Fetch a trie node based on its Keccak hash using the `debug_dbGet` method. - pub async fn fetch_trie_node(&self, hash: B256) -> Result { + pub async fn fetch_trie_node( + &self, + hash: B256, + context: Option>, + ) -> Result { tracing::info!("fetching trie node {}", hash); // Fetch the trie node value from a geth node with `state.scheme=hash`. - let value = self - .provider - .client() - .request::<_, Bytes>("debug_dbGet", (hash,)) - .await - .map_err(|e| RpcDbError::RpcError(e.to_string()))?; + let value = match self.provider.client().request::<_, Bytes>("debug_dbGet", (hash,)).await { + Ok(value) => value, + Err(err) => match context { + Some(context) => { + // The `debug_dbGet` method failed for some reason. Fall back to brute-forcing + // the slot/address needed to recover the preimage via the `eth_getProof` method + // instead. + tracing::debug!( + "failed to fetch preimage from debug_dbGet; \ + falling back to using eth_getProof: address={:?}, prefix={:?}", + context.address, + context.branch_path + ); + + self.fetch_trie_node_via_proof(hash, context).await? + } + None => return Err(RpcDbError::RpcError(err.to_string())), + }, + }; // Record the trie node value to the state. self.trie_nodes.borrow_mut().insert(hash, value.clone()); @@ -224,6 +252,68 @@ impl + Clone> RpcDb { account_proofs } + + /// Fetches a trie node via `eth_getProof` with a hacky workaround when `debug_dbGet` is not + /// available. + async fn fetch_trie_node_via_proof( + &self, + hash: B256, + context: PreimageContext<'_>, + ) -> Result { + let (address, storage_keys) = match context.address { + Some(address) => { + // Computing storage root. Brute force the slot. + let slot = Self::find_key_preimage::<32>(context.branch_path) + .ok_or(RpcDbError::PreimageNotFound)?; + + (address.to_owned(), vec![slot.into()]) + } + None => { + // Computing state root. Brute force the address. + let address = Self::find_key_preimage::<20>(context.branch_path) + .ok_or(RpcDbError::PreimageNotFound)?; + + (address.into(), vec![]) + } + }; + + let account_proof = self + .provider + .get_proof(address, storage_keys) + .block_id(self.block) + .await + .map_err(|e| RpcDbError::RpcError(e.to_string()))?; + + for proof in account_proof + .storage_proof + .into_iter() + .map(|storage_proof| storage_proof.proof) + .chain(once(account_proof.account_proof)) + { + // The preimage we're looking for is more likely to be at the end of the proof. + for node in proof.into_iter().rev() { + if hash == keccak256(&node) { + return Ok(node) + } + } + } + + Err(RpcDbError::PreimageNotFound) + } + + /// Uses brute force to locate a key path preimage that contains a certain prefix. + fn find_key_preimage(prefix: &Nibbles) -> Option<[u8; BYTES]> { + (0..BRUTE_FORCE_LIMIT).into_par_iter().find_map_any(|nonce| { + let mut buffer = [0u8; BYTES]; + buffer[(BYTES - 8)..].copy_from_slice(&nonce.to_be_bytes()); + + if Nibbles::unpack(keccak256(buffer)).starts_with(prefix) { + Some(buffer) + } else { + None + } + }) + } } impl + Clone> DatabaseRef for RpcDb { @@ -273,7 +363,24 @@ impl + Clone> ExtDatabaseRef for RpcDb Result { + let handle = tokio::runtime::Handle::try_current().map_err(|_| { + ProviderError::Database(DatabaseError::Other("no tokio runtime found".to_string())) + })?; + let result = tokio::task::block_in_place(|| { + handle.block_on(self.fetch_trie_node(hash, Some(context))) + }); let value = result.map_err(|e| ProviderError::Database(DatabaseError::Other(e.to_string())))?; Ok(value) diff --git a/crates/storage/witness-db/src/lib.rs b/crates/storage/witness-db/src/lib.rs index a16e858..6e1c4e8 100644 --- a/crates/storage/witness-db/src/lib.rs +++ b/crates/storage/witness-db/src/lib.rs @@ -4,7 +4,7 @@ use reth_primitives::{ }; use reth_storage_errors::provider::ProviderError; use revm_primitives::{Address, HashMap, U256}; -use rsp_primitives::storage::ExtDatabaseRef; +use rsp_primitives::storage::{ExtDatabaseRef, PreimageContext}; use serde::{Deserialize, Serialize}; /// A database used to witness state inside the zkVM. @@ -49,4 +49,12 @@ impl ExtDatabaseRef for WitnessDb { // TODO: avoid cloning Ok(self.trie_nodes.get(&hash).unwrap().to_owned()) } + + fn trie_node_ref_with_context( + &self, + hash: B256, + _context: PreimageContext, + ) -> Result { + self.trie_node_ref(hash) + } }