succinctlabs · xJonathanLEI · Aug 27, 2024 · Aug 22, 2024 · Aug 27, 2024
diff --git a/README.md b/README.md
@@ -66,12 +66,22 @@ You can also run the CLI directly by running the following command:
 cargo run --bin rsp --release -- --block-number 18884864 --rpc-url <RPC>
 ```
 
-or by providing the RPC URL in the `.env` file and specifying the chain id in the CLI command like this:
+or by providing the RPC URL in the `.env` file (or otherwise setting the relevant env vars) and specifying the chain id in the CLI command like this:
 
 ```bash
 cargo run --bin rsp --release -- --block-number 18884864 --chain-id <chain-id>
 ```
 
+#### Using cached client input
+
+The client input (witness) generated by executing against RPC can be cached to speed up iteration of the client program by supplying the `--cache-dir` option:
+
+```bash
+cargo run --bin rsp --release -- --block-number 18884864 --chain-id <chain-id> --cache-dir /path/to/cache
+```
+
+Note that even when utilizing a cached input, the host still needs access to the chain ID to identify the network type, either through `--rpc-url` or `--chain-id`. To run the host completely offline, use `--chain-id` for this.
+
 ## Running Tests
 
 End-to-end integration tests are available. To run these tests, utilize the `.env` file (see [example](./.env.example)) or manually set these environment variables:

diff --git a/bin/host/src/cli.rs b/bin/host/src/cli.rs
@@ -0,0 +1,57 @@
+use alloy_provider::{network::AnyNetwork, Provider as _, ReqwestProvider};
+use clap::Parser;
+use url::Url;
+
+/// The arguments for configuring the chain data provider.
+#[derive(Debug, Clone, Parser)]
+pub struct ProviderArgs {
+    /// The rpc url used to fetch data about the block. If not provided, will use the
+    /// RPC_{chain_id} env var.
+    #[clap(long)]
+    rpc_url: Option<Url>,
+    /// The chain ID. If not provided, requires the rpc_url argument to be provided.
+    #[clap(long)]
+    chain_id: Option<u64>,
+}
+
+pub struct ProviderConfig {
+    pub rpc_url: Option<Url>,
+    pub chain_id: u64,
+}
+
+impl ProviderArgs {
+    pub async fn into_provider(self) -> eyre::Result<ProviderConfig> {
+        // We don't need RPC when using cache with known chain ID, so we leave it as `Option<Url>`
+        // here and decide on whether to panic later.
+        //
+        // On the other hand chain ID is always needed.
+        let (rpc_url, chain_id) = match (self.rpc_url, self.chain_id) {
+            (Some(rpc_url), Some(chain_id)) => (Some(rpc_url), chain_id),
+            (None, Some(chain_id)) => {
+                match std::env::var(format!("RPC_{}", chain_id)) {
+                    Ok(rpc_env_var) => {
+                        // We don't always need it but if the value exists it has to be valid.
+                        (Some(Url::parse(rpc_env_var.as_str()).expect("invalid rpc url")), chain_id)
+                    }
+                    Err(_) => {
+                        // Not having RPC is okay because we know chain ID.
+                        (None, chain_id)
+                    }
+                }
+            }
+            (Some(rpc_url), None) => {
+                // We can find out about chain ID from RPC.
+                let provider: ReqwestProvider<AnyNetwork> =
+                    ReqwestProvider::new_http(rpc_url.clone());
+                let chain_id = provider.get_chain_id().await?;
+
+                (Some(rpc_url), chain_id)
+            }
+            (None, None) => {
+                eyre::bail!("either --rpc-url or --chain-id must be used")
+            }
+        };
+
+        Ok(ProviderConfig { rpc_url, chain_id })
+    }
+}
diff --git a/bin/host/src/main.rs b/bin/host/src/main.rs
@@ -1,34 +1,37 @@
 use alloy_provider::ReqwestProvider;
 use clap::Parser;
 use reth_primitives::B256;
-use rsp_client_executor::ChainVariant;
+use rsp_client_executor::{
+    io::ClientExecutorInput, ChainVariant, CHAIN_ID_ETH_MAINNET, CHAIN_ID_OP_MAINNET,
+};
 use rsp_host_executor::HostExecutor;
 use sp1_sdk::{ProverClient, SP1Stdin};
 use std::path::PathBuf;
 use tracing_subscriber::{
     filter::EnvFilter, fmt, prelude::__tracing_subscriber_SubscriberExt, util::SubscriberInitExt,
 };
-use url::Url;
 
 mod execute;
 use execute::process_execution_report;
 
+mod cli;
+use cli::ProviderArgs;
+
 /// The arguments for the host executable.
 #[derive(Debug, Clone, Parser)]
 struct HostArgs {
     /// The block number of the block to execute.
     #[clap(long)]
     block_number: u64,
-    /// The rpc url used to fetch data about the block. If not provided, will use the
-    /// RPC_{chain_id} env var.
-    #[clap(long)]
-    rpc_url: Option<Url>,
-    /// The chain ID. If not provided, requires the rpc_url argument to be provided.
-    #[clap(long)]
-    chain_id: Option<u64>,
+    #[clap(flatten)]
+    provider: ProviderArgs,
     /// Whether to generate a proof or just execute the block.
     #[clap(long)]
     prove: bool,
+    /// Optional path to the directory containing cached client input. A new cache file will be
+    /// created from RPC data if it doesn't already exist.
+    #[clap(long)]
+    cache_dir: Option<PathBuf>,
     /// The path to the CSV file containing the execution data.
     #[clap(long, default_value = "report.csv")]
     report_path: PathBuf,
@@ -48,26 +51,56 @@ async fn main() -> eyre::Result<()> {
 
     // Parse the command line arguments.
     let args = HostArgs::parse();
+    let provider_config = args.provider.into_provider().await?;
 
-    let rpc_url = if let Some(rpc_url) = args.rpc_url {
-        rpc_url
-    } else {
-        let chain_id = args.chain_id.expect("If rpc_url is not provided, chain_id must be.");
-        let env_var_key =
-            std::env::var(format!("RPC_{}", chain_id)).expect("Could not find RPC_{} in .env");
-        let rpc_url = Url::parse(env_var_key.as_str()).expect("invalid rpc url");
-        rpc_url
+    let variant = match provider_config.chain_id {
+        CHAIN_ID_ETH_MAINNET => ChainVariant::Ethereum,
+        CHAIN_ID_OP_MAINNET => ChainVariant::Optimism,
+        _ => {
+            eyre::bail!("unknown chain ID: {}", provider_config.chain_id);
+        }
     };
 
-    // Setup the provider.
-    let provider = ReqwestProvider::new_http(rpc_url);
-
-    // Setup the host executor.
-    let host_executor = HostExecutor::new(provider);
-
-    // Execute the host.
-    let (client_input, variant) =
-        host_executor.execute(args.block_number).await.expect("failed to execute host");
+    let client_input_from_cache = try_load_input_from_cache(
+        args.cache_dir.as_ref(),
+        provider_config.chain_id,
+        args.block_number,
+    )?;
+
+    let client_input = match (client_input_from_cache, provider_config.rpc_url) {
+        (Some(client_input_from_cache), _) => client_input_from_cache,
+        (None, Some(rpc_url)) => {
+            // Cache not found but we have RPC
+            // Setup the provider.
+            let provider = ReqwestProvider::new_http(rpc_url);
+
+            // Setup the host executor.
+            let host_executor = HostExecutor::new(provider);
+
+            // Execute the host.
+            let client_input = host_executor
+                .execute(args.block_number, variant)
+                .await
+                .expect("failed to execute host");
+
+            if let Some(cache_dir) = args.cache_dir {
+                let input_folder = cache_dir.join(format!("input/{}", provider_config.chain_id));
+                if !input_folder.exists() {
+                    std::fs::create_dir_all(&input_folder)?;
+                }
+
+                let input_path = input_folder.join(format!("{}.bin", args.block_number));
+                let mut cache_file = std::fs::File::create(input_path)?;
+
+                bincode::serialize_into(&mut cache_file, &client_input)?;
+            }
+
+            client_input
+        }
+        (None, None) => {
+            eyre::bail!("cache not found and RPC URL not provided")
+        }
+    };
 
     // Generate the proof.
     let client = ProverClient::new();
@@ -109,3 +142,25 @@ async fn main() -> eyre::Result<()> {
 
     Ok(())
 }
+
+fn try_load_input_from_cache(
+    cache_dir: Option<&PathBuf>,
+    chain_id: u64,
+    block_number: u64,
+) -> eyre::Result<Option<ClientExecutorInput>> {
+    Ok(if let Some(cache_dir) = cache_dir {
+        let cache_path = cache_dir.join(format!("input/{}/{}.bin", chain_id, block_number));
+
+        if cache_path.exists() {
+            // TODO: prune the cache if invalid instead
+            let mut cache_file = std::fs::File::open(cache_path)?;
+            let client_input: ClientExecutorInput = bincode::deserialize_from(&mut cache_file)?;
+
+            Some(client_input)
+        } else {
+            None
+        }
+    } else {
+        None
+    })
+}
diff --git a/crates/executor/host/src/lib.rs b/crates/executor/host/src/lib.rs
@@ -9,7 +9,6 @@ use reth_primitives::{proofs, Block, Bloom, Receipts, B256};
 use revm::db::CacheDB;
 use rsp_client_executor::{
     io::ClientExecutorInput, ChainVariant, EthereumVariant, OptimismVariant, Variant,
-    CHAIN_ID_ETH_MAINNET, CHAIN_ID_OP_MAINNET,
 };
 use rsp_primitives::account_proof::eip1186_proof_to_account_proof;
 use rsp_rpc_db::RpcDb;
@@ -33,23 +32,14 @@ impl<T: Transport + Clone, P: Provider<T> + Clone> HostExecutor<T, P> {
     pub async fn execute(
         &self,
         block_number: u64,
-    ) -> eyre::Result<(ClientExecutorInput, ChainVariant)> {
-        tracing::info!("fetching chain ID to identify chain variant");
-        let chain_id = self.provider.get_chain_id().await?;
-        let variant = match chain_id {
-            CHAIN_ID_ETH_MAINNET => ChainVariant::Ethereum,
-            CHAIN_ID_OP_MAINNET => ChainVariant::Optimism,
-            _ => {
-                eyre::bail!("unknown chain ID: {}", chain_id);
-            }
-        };
-
+        variant: ChainVariant,
+    ) -> eyre::Result<ClientExecutorInput> {
         let client_input = match variant {
             ChainVariant::Ethereum => self.execute_variant::<EthereumVariant>(block_number).await,
             ChainVariant::Optimism => self.execute_variant::<OptimismVariant>(block_number).await,
         }?;
 
-        Ok((client_input, variant))
+        Ok(client_input)
     }
 
     async fn execute_variant<V>(&self, block_number: u64) -> eyre::Result<ClientExecutorInput>

diff --git a/crates/executor/host/tests/integration.rs b/crates/executor/host/tests/integration.rs
@@ -1,6 +1,7 @@
 use alloy_provider::ReqwestProvider;
 use rsp_client_executor::{
-    io::ClientExecutorInput, ClientExecutor, EthereumVariant, OptimismVariant, Variant,
+    io::ClientExecutorInput, ChainVariant, ClientExecutor, EthereumVariant, OptimismVariant,
+    Variant,
 };
 use rsp_host_executor::HostExecutor;
 use tracing_subscriber::{
@@ -10,15 +11,15 @@ use url::Url;
 
 #[tokio::test(flavor = "multi_thread")]
 async fn test_e2e_ethereum() {
-    run_e2e::<EthereumVariant>("RPC_1", 18884864).await;
+    run_e2e::<EthereumVariant>(ChainVariant::Ethereum, "RPC_1", 18884864).await;
 }
 
 #[tokio::test(flavor = "multi_thread")]
 async fn test_e2e_optimism() {
-    run_e2e::<OptimismVariant>("RPC_10", 122853660).await;
+    run_e2e::<OptimismVariant>(ChainVariant::Optimism, "RPC_10", 122853660).await;
 }
 
-async fn run_e2e<V>(env_var_key: &str, block_number: u64)
+async fn run_e2e<V>(variant: ChainVariant, env_var_key: &str, block_number: u64)
 where
     V: Variant,
 {
@@ -40,8 +41,8 @@ where
     let host_executor = HostExecutor::new(provider);
 
     // Execute the host.
-    let (client_input, _) =
-        host_executor.execute(block_number).await.expect("failed to execute host");
+    let client_input =
+        host_executor.execute(block_number, variant).await.expect("failed to execute host");
 
     // Setup the client executor.
     let client_executor = ClientExecutor;