From a935c864a57532b2168ad30cd5299bfbec837c60 Mon Sep 17 00:00:00 2001 From: Ognyan Genev <39865181+ogenev@users.noreply.github.com> Date: Wed, 2 Oct 2024 10:33:05 +0300 Subject: [PATCH] feat(bridge): add CLI flag for total request timeout value (#1499) --- ethportal-peertest/src/scenarios/bridge.rs | 22 ++++++++++----- portal-bridge/src/api/consensus.rs | 26 +++++++++++++----- portal-bridge/src/api/execution.rs | 32 +++++++++++++++------- portal-bridge/src/cli.rs | 14 ++++++++-- portal-bridge/src/constants.rs | 6 ++-- portal-bridge/src/main.rs | 2 ++ src/bin/test_providers.rs | 1 + 7 files changed, 73 insertions(+), 30 deletions(-) diff --git a/ethportal-peertest/src/scenarios/bridge.rs b/ethportal-peertest/src/scenarios/bridge.rs index e71292ea4..3746c1504 100644 --- a/ethportal-peertest/src/scenarios/bridge.rs +++ b/ethportal-peertest/src/scenarios/bridge.rs @@ -9,7 +9,7 @@ use ethportal_api::{ use portal_bridge::{ api::{consensus::ConsensusApi, execution::ExecutionApi}, bridge::{beacon::BeaconBridge, history::HistoryBridge}, - constants::DEFAULT_GOSSIP_LIMIT, + constants::{DEFAULT_GOSSIP_LIMIT, DEFAULT_TOTAL_REQUEST_TIMEOUT}, types::mode::BridgeMode, }; use serde::Deserialize; @@ -24,9 +24,13 @@ pub async fn test_history_bridge(peertest: &Peertest, portal_client: &HttpClient let mode = BridgeMode::Test("./test_assets/portalnet/bridge_data.json".into()); // url doesn't matter, we're not making any requests let client_url = Url::parse("http://www.null.com").unwrap(); - let execution_api = ExecutionApi::new(client_url.clone(), client_url) - .await - .unwrap(); + let execution_api = ExecutionApi::new( + client_url.clone(), + client_url, + DEFAULT_TOTAL_REQUEST_TIMEOUT, + ) + .await + .unwrap(); // Wait for bootnode to start sleep(Duration::from_secs(1)).await; let bridge = HistoryBridge::new( @@ -54,9 +58,13 @@ pub async fn test_beacon_bridge(peertest: &Peertest, portal_client: &HttpClient) sleep(Duration::from_secs(1)).await; // url doesn't matter, we're not making any requests let client_url = Url::parse("http://www.null.com").unwrap(); - let consensus_api = ConsensusApi::new(client_url.clone(), client_url) - .await - .unwrap(); + let consensus_api = ConsensusApi::new( + client_url.clone(), + client_url, + DEFAULT_TOTAL_REQUEST_TIMEOUT, + ) + .await + .unwrap(); let bridge = BeaconBridge::new(consensus_api, mode, portal_client.clone()); bridge.launch().await; diff --git a/portal-bridge/src/api/consensus.rs b/portal-bridge/src/api/consensus.rs index 23a350e4f..6258fa8f1 100644 --- a/portal-bridge/src/api/consensus.rs +++ b/portal-bridge/src/api/consensus.rs @@ -15,20 +15,29 @@ use crate::{ pub struct ConsensusApi { primary: Url, fallback: Url, + request_timeout: u64, } impl ConsensusApi { - pub async fn new(primary: Url, fallback: Url) -> Result { + pub async fn new( + primary: Url, + fallback: Url, + request_timeout: u64, + ) -> Result { debug!( "Starting ConsensusApi with primary provider: {primary} and fallback provider: {fallback}", ); - let client = url_to_client(primary.clone()).map_err(|err| { + let client = url_to_client(primary.clone(), request_timeout).map_err(|err| { anyhow!("Unable to create primary client for consensus data provider: {err:?}") })?; if let Err(err) = check_provider(&client).await { warn!("Primary consensus data provider may be offline: {err:?}"); } - Ok(Self { primary, fallback }) + Ok(Self { + primary, + fallback, + request_timeout, + }) } /// Requests the `BeaconState` structure corresponding to the current head of the beacon chain. @@ -88,7 +97,7 @@ impl ConsensusApi { /// Make a request to the cl provider. async fn request(&self, endpoint: String) -> anyhow::Result { - let client = url_to_client(self.primary.clone()).map_err(|err| { + let client = url_to_client(self.primary.clone(), self.request_timeout).map_err(|err| { anyhow!("Unable to create client for primary consensus data provider: {err:?}") })?; match client.get(&endpoint)?.send().await?.text().await { @@ -96,9 +105,12 @@ impl ConsensusApi { Err(err) => { warn!("Error requesting consensus data from provider, retrying with fallback provider: {err:?}"); sleep(FALLBACK_RETRY_AFTER).await; - let client = url_to_client(self.fallback.clone()).map_err(|err| { - anyhow!("Unable to create client for fallback consensus data provider: {err:?}") - })?; + let client = + url_to_client(self.fallback.clone(), self.request_timeout).map_err(|err| { + anyhow!( + "Unable to create client for fallback consensus data provider: {err:?}" + ) + })?; client .get(endpoint)? .send() diff --git a/portal-bridge/src/api/execution.rs b/portal-bridge/src/api/execution.rs index 4bb3f9b46..f7b1a3833 100644 --- a/portal-bridge/src/api/execution.rs +++ b/portal-bridge/src/api/execution.rs @@ -46,15 +46,20 @@ pub struct ExecutionApi { pub primary: Url, pub fallback: Url, pub header_validator: HeaderValidator, + pub request_timeout: u64, } impl ExecutionApi { - pub async fn new(primary: Url, fallback: Url) -> Result { + pub async fn new( + primary: Url, + fallback: Url, + request_timeout: u64, + ) -> Result { // Only check that provider is connected & available if not using a test provider. debug!( "Starting ExecutionApi with primary provider: {primary} and fallback provider: {fallback}", ); - let client = url_to_client(primary.clone()).map_err(|err| { + let client = url_to_client(primary.clone(), request_timeout).map_err(|err| { anyhow!("Unable to create primary client for execution data provider: {err:?}") })?; if let Err(err) = check_provider(&client).await { @@ -65,6 +70,7 @@ impl ExecutionApi { primary, fallback, header_validator, + request_timeout, }) } @@ -303,7 +309,7 @@ impl ExecutionApi { "Attempting to send requests outnumbering provider request limit of {BATCH_LIMIT}." ) } - let client = url_to_client(self.primary.clone()).map_err(|err| { + let client = url_to_client(self.primary.clone(), self.request_timeout).map_err(|err| { anyhow!("Unable to create primary client for execution data provider: {err:?}") })?; match Self::send_batch_request(&client, &requests).await { @@ -311,9 +317,12 @@ impl ExecutionApi { Err(msg) => { warn!("Failed to send batch request to primary provider: {msg}"); sleep(FALLBACK_RETRY_AFTER).await; - let client = url_to_client(self.fallback.clone()).map_err(|err| { - anyhow!("Unable to create fallback client for execution data provider: {err:?}") - })?; + let client = + url_to_client(self.fallback.clone(), self.request_timeout).map_err(|err| { + anyhow!( + "Unable to create fallback client for execution data provider: {err:?}" + ) + })?; Self::send_batch_request(&client, &requests) .await .map_err(|err| { @@ -340,7 +349,7 @@ impl ExecutionApi { } async fn try_request(&self, request: JsonRequest) -> anyhow::Result { - let client = url_to_client(self.primary.clone()).map_err(|err| { + let client = url_to_client(self.primary.clone(), self.request_timeout).map_err(|err| { anyhow!("Unable to create primary client for execution data provider: {err:?}") })?; match Self::send_request(&client, &request).await { @@ -348,9 +357,12 @@ impl ExecutionApi { Err(msg) => { warn!("Failed to send request to primary provider, retrying with fallback provider: {msg}"); sleep(FALLBACK_RETRY_AFTER).await; - let client = url_to_client(self.fallback.clone()).map_err(|err| { - anyhow!("Unable to create fallback client for execution data provider: {err:?}") - })?; + let client = + url_to_client(self.fallback.clone(), self.request_timeout).map_err(|err| { + anyhow!( + "Unable to create fallback client for execution data provider: {err:?}" + ) + })?; Self::send_request(&client, &request) .await .map_err(|err| anyhow!("Failed to send request to fallback provider: {err:?}")) diff --git a/portal-bridge/src/cli.rs b/portal-bridge/src/cli.rs index 2d0ccdfb3..3fa323d60 100644 --- a/portal-bridge/src/cli.rs +++ b/portal-bridge/src/cli.rs @@ -8,12 +8,13 @@ use reqwest::{ }; use reqwest_middleware::{ClientBuilder, ClientWithMiddleware, RequestBuilder}; use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware}; +use tokio::time::Duration; use tracing::error; use url::Url; use crate::{ census::ENR_OFFER_LIMIT, - constants::{DEFAULT_GOSSIP_LIMIT, DEFAULT_OFFER_LIMIT, HTTP_REQUEST_TIMEOUT}, + constants::{DEFAULT_GOSSIP_LIMIT, DEFAULT_OFFER_LIMIT, DEFAULT_TOTAL_REQUEST_TIMEOUT}, types::mode::BridgeMode, DEFAULT_BASE_CL_ENDPOINT, DEFAULT_BASE_EL_ENDPOINT, FALLBACK_BASE_CL_ENDPOINT, FALLBACK_BASE_EL_ENDPOINT, @@ -173,6 +174,13 @@ pub struct BridgeConfig { value_delimiter = ',' )] pub filter_clients: Vec, + + #[arg( + default_value_t = DEFAULT_TOTAL_REQUEST_TIMEOUT, + long = "request-timeout", + help = "The timeout in seconds is applied from when the request starts connecting until the response body has finished. Also considered a total deadline.", + )] + pub request_timeout: u64, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -222,7 +230,7 @@ impl From<&Enr> for ClientType { } } -pub fn url_to_client(url: Url) -> Result { +pub fn url_to_client(url: Url, request_timeout: u64) -> Result { let mut headers = HeaderMap::new(); headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); @@ -256,7 +264,7 @@ pub fn url_to_client(url: Url) -> Result { // Add retry middleware let reqwest_client = Client::builder() .default_headers(headers) - .timeout(HTTP_REQUEST_TIMEOUT) + .timeout(Duration::from_secs(request_timeout)) .build() .map_err(|_| "Failed to build HTTP client")?; let client = ClientBuilder::new(reqwest_client) diff --git a/portal-bridge/src/constants.rs b/portal-bridge/src/constants.rs index 7ff0d3c90..41c026614 100644 --- a/portal-bridge/src/constants.rs +++ b/portal-bridge/src/constants.rs @@ -14,9 +14,9 @@ pub const HEADER_WITH_PROOF_CONTENT_VALUE: &str = // Beacon chain mainnet genesis time: Tue Dec 01 2020 12:00:23 GMT+0000 pub const BEACON_GENESIS_TIME: u64 = 1606824023; -// This is a very conservative timeout if a provider takes longer than even 1 second it is probably -// overloaded and not performing well -pub const HTTP_REQUEST_TIMEOUT: Duration = Duration::from_secs(20); +/// The timeout in seconds is applied from when the request starts connecting until the response +/// body has finished. Also considered a total deadline. +pub const DEFAULT_TOTAL_REQUEST_TIMEOUT: u64 = 20; // The maximum number of active blocks being gossiped. Note that this doesn't // exactly mean the number of concurrent gossip jsonrpc requests, as the gossip diff --git a/portal-bridge/src/main.rs b/portal-bridge/src/main.rs index c49795acf..5a3b72a72 100644 --- a/portal-bridge/src/main.rs +++ b/portal-bridge/src/main.rs @@ -86,6 +86,7 @@ async fn main() -> Result<(), Box> { let consensus_api = ConsensusApi::new( bridge_config.cl_provider, bridge_config.cl_provider_fallback, + bridge_config.request_timeout, ) .await?; let portal_client_clone = portal_client.clone(); @@ -110,6 +111,7 @@ async fn main() -> Result<(), Box> { let execution_api = ExecutionApi::new( bridge_config.el_provider, bridge_config.el_provider_fallback, + bridge_config.request_timeout, ) .await?; match bridge_config.mode { diff --git a/src/bin/test_providers.rs b/src/bin/test_providers.rs index 375d9fea2..8ce005ac7 100644 --- a/src/bin/test_providers.rs +++ b/src/bin/test_providers.rs @@ -68,6 +68,7 @@ pub async fn main() -> Result<()> { primary: client_url.clone(), fallback: client_url, header_validator: HeaderValidator::default(), + request_timeout: 20, }; for gossip_range in all_ranges.iter_mut() { debug!("Testing range: {gossip_range:?}");