From a7502f3dcf71f1e2b75f6f940b15aa51fc32fdb4 Mon Sep 17 00:00:00 2001 From: Jonathan LEI Date: Fri, 23 Aug 2024 07:20:28 +0800 Subject: [PATCH 1/2] feat: client input caching --- README.md | 12 ++- bin/host/src/main.rs | 107 ++++++++++++++++++---- crates/executor/host/src/lib.rs | 16 +--- crates/executor/host/tests/integration.rs | 13 +-- 4 files changed, 112 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index b66570f..0c09493 100644 --- a/README.md +++ b/README.md @@ -66,12 +66,22 @@ You can also run the CLI directly by running the following command: cargo run --bin rsp --release -- --block-number 18884864 --rpc-url ``` -or by providing the RPC URL in the `.env` file and specifying the chain id in the CLI command like this: +or by providing the RPC URL in the `.env` file (or otherwise setting the relevant env vars) and specifying the chain id in the CLI command like this: ```bash cargo run --bin rsp --release -- --block-number 18884864 --chain-id ``` +#### Using cached client input + +The client input (witness) generated by executing against RPC can be cached to speed up iteration of the client program by supplying the `--cache-dir` option: + +```bash +cargo run --bin rsp --release -- --block-number 18884864 --chain-id --cache-dir /path/to/cache +``` + +Note that even when utilizing a cached input, the host still needs access to the chain ID to identify the network type, either through `--rpc-url` or `--chain-id`. To run the host completely offline, use `--chain-id` for this. + ## Running Tests End-to-end integration tests are available. To run these tests, utilize the `.env` file (see [example](./.env.example)) or manually set these environment variables: diff --git a/bin/host/src/main.rs b/bin/host/src/main.rs index 81425fd..b583d30 100644 --- a/bin/host/src/main.rs +++ b/bin/host/src/main.rs @@ -1,7 +1,9 @@ -use alloy_provider::ReqwestProvider; +use alloy_provider::{network::AnyNetwork, Provider, ReqwestProvider}; use clap::Parser; use reth_primitives::B256; -use rsp_client_executor::ChainVariant; +use rsp_client_executor::{ + io::ClientExecutorInput, ChainVariant, CHAIN_ID_ETH_MAINNET, CHAIN_ID_OP_MAINNET, +}; use rsp_host_executor::HostExecutor; use sp1_sdk::{ProverClient, SP1Stdin}; use std::path::PathBuf; @@ -29,6 +31,10 @@ struct HostArgs { /// Whether to generate a proof or just execute the block. #[clap(long)] prove: bool, + /// Optional path to the directory containing cached client input. A new cache file will be + /// created from RPC data if it doesn't already exist. + #[clap(long)] + cache_dir: Option, /// The path to the CSV file containing the execution data. #[clap(long, default_value = "report.csv")] report_path: PathBuf, @@ -49,25 +55,94 @@ async fn main() -> eyre::Result<()> { // Parse the command line arguments. let args = HostArgs::parse(); - let rpc_url = if let Some(rpc_url) = args.rpc_url { - rpc_url + // We don't need RPC when using cache with known chain ID, so we leave it as `Option` here + // and decide on whether to panic later. + // + // On the other hand chain ID is always needed. + let (rpc_url, chain_id) = match (args.rpc_url, args.chain_id) { + (Some(rpc_url), Some(chain_id)) => (Some(rpc_url), chain_id), + (None, Some(chain_id)) => { + match std::env::var(format!("RPC_{}", chain_id)) { + Ok(rpc_env_var) => { + // We don't always need it but if the value exists it has to be valid. + (Some(Url::parse(rpc_env_var.as_str()).expect("invalid rpc url")), chain_id) + } + Err(_) => { + // Not having RPC is okay because we know chain ID. + (None, chain_id) + } + } + } + (Some(rpc_url), None) => { + // We can find out about chain ID from RPC. + let provider: ReqwestProvider = ReqwestProvider::new_http(rpc_url.clone()); + let chain_id = provider.get_chain_id().await?; + + (Some(rpc_url), chain_id) + } + (None, None) => { + eyre::bail!("either --rpc-url or --chain-id must be used") + } + }; + + let variant = match chain_id { + CHAIN_ID_ETH_MAINNET => ChainVariant::Ethereum, + CHAIN_ID_OP_MAINNET => ChainVariant::Optimism, + _ => { + eyre::bail!("unknown chain ID: {}", chain_id); + } + }; + + let client_input_from_cache = if let Some(cache_dir) = args.cache_dir.as_ref() { + let cache_path = cache_dir.join(format!("input/{}/{}.bin", chain_id, args.block_number)); + + if cache_path.exists() { + // TODO: prune the cache if invalid instead + let mut cache_file = std::fs::File::open(cache_path)?; + let client_input: ClientExecutorInput = bincode::deserialize_from(&mut cache_file)?; + + Some(client_input) + } else { + None + } } else { - let chain_id = args.chain_id.expect("If rpc_url is not provided, chain_id must be."); - let env_var_key = - std::env::var(format!("RPC_{}", chain_id)).expect("Could not find RPC_{} in .env"); - let rpc_url = Url::parse(env_var_key.as_str()).expect("invalid rpc url"); - rpc_url + None }; - // Setup the provider. - let provider = ReqwestProvider::new_http(rpc_url); + let client_input = match (client_input_from_cache, rpc_url) { + (Some(client_input_from_cache), _) => client_input_from_cache, + (None, Some(rpc_url)) => { + // Cache not found but we have RPC + // Setup the provider. + let provider = ReqwestProvider::new_http(rpc_url); + + // Setup the host executor. + let host_executor = HostExecutor::new(provider); + + // Execute the host. + let client_input = host_executor + .execute(args.block_number, variant) + .await + .expect("failed to execute host"); - // Setup the host executor. - let host_executor = HostExecutor::new(provider); + if let Some(cache_dir) = args.cache_dir { + let input_folder = cache_dir.join(format!("input/{}", chain_id)); + if !input_folder.exists() { + std::fs::create_dir_all(&input_folder)?; + } - // Execute the host. - let (client_input, variant) = - host_executor.execute(args.block_number).await.expect("failed to execute host"); + let input_path = input_folder.join(format!("{}.bin", args.block_number)); + let mut cache_file = std::fs::File::create(input_path)?; + + bincode::serialize_into(&mut cache_file, &client_input)?; + } + + client_input + } + (None, None) => { + eyre::bail!("cache not found and RPC URL not provided") + } + }; // Generate the proof. let client = ProverClient::new(); diff --git a/crates/executor/host/src/lib.rs b/crates/executor/host/src/lib.rs index 722314d..e04e9f8 100644 --- a/crates/executor/host/src/lib.rs +++ b/crates/executor/host/src/lib.rs @@ -9,7 +9,6 @@ use reth_primitives::{proofs, Block, Bloom, Receipts, B256}; use revm::db::CacheDB; use rsp_client_executor::{ io::ClientExecutorInput, ChainVariant, EthereumVariant, OptimismVariant, Variant, - CHAIN_ID_ETH_MAINNET, CHAIN_ID_OP_MAINNET, }; use rsp_primitives::account_proof::eip1186_proof_to_account_proof; use rsp_rpc_db::RpcDb; @@ -33,23 +32,14 @@ impl + Clone> HostExecutor { pub async fn execute( &self, block_number: u64, - ) -> eyre::Result<(ClientExecutorInput, ChainVariant)> { - tracing::info!("fetching chain ID to identify chain variant"); - let chain_id = self.provider.get_chain_id().await?; - let variant = match chain_id { - CHAIN_ID_ETH_MAINNET => ChainVariant::Ethereum, - CHAIN_ID_OP_MAINNET => ChainVariant::Optimism, - _ => { - eyre::bail!("unknown chain ID: {}", chain_id); - } - }; - + variant: ChainVariant, + ) -> eyre::Result { let client_input = match variant { ChainVariant::Ethereum => self.execute_variant::(block_number).await, ChainVariant::Optimism => self.execute_variant::(block_number).await, }?; - Ok((client_input, variant)) + Ok(client_input) } async fn execute_variant(&self, block_number: u64) -> eyre::Result diff --git a/crates/executor/host/tests/integration.rs b/crates/executor/host/tests/integration.rs index 6c89129..418d7e9 100644 --- a/crates/executor/host/tests/integration.rs +++ b/crates/executor/host/tests/integration.rs @@ -1,6 +1,7 @@ use alloy_provider::ReqwestProvider; use rsp_client_executor::{ - io::ClientExecutorInput, ClientExecutor, EthereumVariant, OptimismVariant, Variant, + io::ClientExecutorInput, ChainVariant, ClientExecutor, EthereumVariant, OptimismVariant, + Variant, }; use rsp_host_executor::HostExecutor; use tracing_subscriber::{ @@ -10,15 +11,15 @@ use url::Url; #[tokio::test(flavor = "multi_thread")] async fn test_e2e_ethereum() { - run_e2e::("RPC_1", 18884864).await; + run_e2e::(ChainVariant::Ethereum, "RPC_1", 18884864).await; } #[tokio::test(flavor = "multi_thread")] async fn test_e2e_optimism() { - run_e2e::("RPC_10", 122853660).await; + run_e2e::(ChainVariant::Optimism, "RPC_10", 122853660).await; } -async fn run_e2e(env_var_key: &str, block_number: u64) +async fn run_e2e(variant: ChainVariant, env_var_key: &str, block_number: u64) where V: Variant, { @@ -40,8 +41,8 @@ where let host_executor = HostExecutor::new(provider); // Execute the host. - let (client_input, _) = - host_executor.execute(block_number).await.expect("failed to execute host"); + let client_input = + host_executor.execute(block_number, variant).await.expect("failed to execute host"); // Setup the client executor. let client_executor = ClientExecutor; From 2c7e5986c991561d95ad7548c8dd3ca57781e1b3 Mon Sep 17 00:00:00 2001 From: Jonathan LEI Date: Tue, 27 Aug 2024 08:46:09 +0800 Subject: [PATCH 2/2] refactor: move code out of main --- bin/host/src/cli.rs | 57 ++++++++++++++++++++++++++ bin/host/src/main.rs | 96 ++++++++++++++++++-------------------------- 2 files changed, 95 insertions(+), 58 deletions(-) create mode 100644 bin/host/src/cli.rs diff --git a/bin/host/src/cli.rs b/bin/host/src/cli.rs new file mode 100644 index 0000000..f25222e --- /dev/null +++ b/bin/host/src/cli.rs @@ -0,0 +1,57 @@ +use alloy_provider::{network::AnyNetwork, Provider as _, ReqwestProvider}; +use clap::Parser; +use url::Url; + +/// The arguments for configuring the chain data provider. +#[derive(Debug, Clone, Parser)] +pub struct ProviderArgs { + /// The rpc url used to fetch data about the block. If not provided, will use the + /// RPC_{chain_id} env var. + #[clap(long)] + rpc_url: Option, + /// The chain ID. If not provided, requires the rpc_url argument to be provided. + #[clap(long)] + chain_id: Option, +} + +pub struct ProviderConfig { + pub rpc_url: Option, + pub chain_id: u64, +} + +impl ProviderArgs { + pub async fn into_provider(self) -> eyre::Result { + // We don't need RPC when using cache with known chain ID, so we leave it as `Option` + // here and decide on whether to panic later. + // + // On the other hand chain ID is always needed. + let (rpc_url, chain_id) = match (self.rpc_url, self.chain_id) { + (Some(rpc_url), Some(chain_id)) => (Some(rpc_url), chain_id), + (None, Some(chain_id)) => { + match std::env::var(format!("RPC_{}", chain_id)) { + Ok(rpc_env_var) => { + // We don't always need it but if the value exists it has to be valid. + (Some(Url::parse(rpc_env_var.as_str()).expect("invalid rpc url")), chain_id) + } + Err(_) => { + // Not having RPC is okay because we know chain ID. + (None, chain_id) + } + } + } + (Some(rpc_url), None) => { + // We can find out about chain ID from RPC. + let provider: ReqwestProvider = + ReqwestProvider::new_http(rpc_url.clone()); + let chain_id = provider.get_chain_id().await?; + + (Some(rpc_url), chain_id) + } + (None, None) => { + eyre::bail!("either --rpc-url or --chain-id must be used") + } + }; + + Ok(ProviderConfig { rpc_url, chain_id }) + } +} diff --git a/bin/host/src/main.rs b/bin/host/src/main.rs index b583d30..cd06661 100644 --- a/bin/host/src/main.rs +++ b/bin/host/src/main.rs @@ -1,4 +1,4 @@ -use alloy_provider::{network::AnyNetwork, Provider, ReqwestProvider}; +use alloy_provider::ReqwestProvider; use clap::Parser; use reth_primitives::B256; use rsp_client_executor::{ @@ -10,24 +10,21 @@ use std::path::PathBuf; use tracing_subscriber::{ filter::EnvFilter, fmt, prelude::__tracing_subscriber_SubscriberExt, util::SubscriberInitExt, }; -use url::Url; mod execute; use execute::process_execution_report; +mod cli; +use cli::ProviderArgs; + /// The arguments for the host executable. #[derive(Debug, Clone, Parser)] struct HostArgs { /// The block number of the block to execute. #[clap(long)] block_number: u64, - /// The rpc url used to fetch data about the block. If not provided, will use the - /// RPC_{chain_id} env var. - #[clap(long)] - rpc_url: Option, - /// The chain ID. If not provided, requires the rpc_url argument to be provided. - #[clap(long)] - chain_id: Option, + #[clap(flatten)] + provider: ProviderArgs, /// Whether to generate a proof or just execute the block. #[clap(long)] prove: bool, @@ -54,62 +51,23 @@ async fn main() -> eyre::Result<()> { // Parse the command line arguments. let args = HostArgs::parse(); + let provider_config = args.provider.into_provider().await?; - // We don't need RPC when using cache with known chain ID, so we leave it as `Option` here - // and decide on whether to panic later. - // - // On the other hand chain ID is always needed. - let (rpc_url, chain_id) = match (args.rpc_url, args.chain_id) { - (Some(rpc_url), Some(chain_id)) => (Some(rpc_url), chain_id), - (None, Some(chain_id)) => { - match std::env::var(format!("RPC_{}", chain_id)) { - Ok(rpc_env_var) => { - // We don't always need it but if the value exists it has to be valid. - (Some(Url::parse(rpc_env_var.as_str()).expect("invalid rpc url")), chain_id) - } - Err(_) => { - // Not having RPC is okay because we know chain ID. - (None, chain_id) - } - } - } - (Some(rpc_url), None) => { - // We can find out about chain ID from RPC. - let provider: ReqwestProvider = ReqwestProvider::new_http(rpc_url.clone()); - let chain_id = provider.get_chain_id().await?; - - (Some(rpc_url), chain_id) - } - (None, None) => { - eyre::bail!("either --rpc-url or --chain-id must be used") - } - }; - - let variant = match chain_id { + let variant = match provider_config.chain_id { CHAIN_ID_ETH_MAINNET => ChainVariant::Ethereum, CHAIN_ID_OP_MAINNET => ChainVariant::Optimism, _ => { - eyre::bail!("unknown chain ID: {}", chain_id); + eyre::bail!("unknown chain ID: {}", provider_config.chain_id); } }; - let client_input_from_cache = if let Some(cache_dir) = args.cache_dir.as_ref() { - let cache_path = cache_dir.join(format!("input/{}/{}.bin", chain_id, args.block_number)); - - if cache_path.exists() { - // TODO: prune the cache if invalid instead - let mut cache_file = std::fs::File::open(cache_path)?; - let client_input: ClientExecutorInput = bincode::deserialize_from(&mut cache_file)?; + let client_input_from_cache = try_load_input_from_cache( + args.cache_dir.as_ref(), + provider_config.chain_id, + args.block_number, + )?; - Some(client_input) - } else { - None - } - } else { - None - }; - - let client_input = match (client_input_from_cache, rpc_url) { + let client_input = match (client_input_from_cache, provider_config.rpc_url) { (Some(client_input_from_cache), _) => client_input_from_cache, (None, Some(rpc_url)) => { // Cache not found but we have RPC @@ -126,7 +84,7 @@ async fn main() -> eyre::Result<()> { .expect("failed to execute host"); if let Some(cache_dir) = args.cache_dir { - let input_folder = cache_dir.join(format!("input/{}", chain_id)); + let input_folder = cache_dir.join(format!("input/{}", provider_config.chain_id)); if !input_folder.exists() { std::fs::create_dir_all(&input_folder)?; } @@ -184,3 +142,25 @@ async fn main() -> eyre::Result<()> { Ok(()) } + +fn try_load_input_from_cache( + cache_dir: Option<&PathBuf>, + chain_id: u64, + block_number: u64, +) -> eyre::Result> { + Ok(if let Some(cache_dir) = cache_dir { + let cache_path = cache_dir.join(format!("input/{}/{}.bin", chain_id, block_number)); + + if cache_path.exists() { + // TODO: prune the cache if invalid instead + let mut cache_file = std::fs::File::open(cache_path)?; + let client_input: ClientExecutorInput = bincode::deserialize_from(&mut cache_file)?; + + Some(client_input) + } else { + None + } + } else { + None + }) +}