From 548f54e787f6f966c21577cfec2dd255aa0494bc Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Wed, 20 Sep 2023 15:25:51 -0700 Subject: [PATCH] allow disabling ssl verification or checking hostnames --- Cargo.lock | 3 +++ daft/daft.pyi | 2 ++ src/common/io-config/src/python.rs | 6 +++++ src/common/io-config/src/s3.rs | 12 ++++++++-- src/daft-io/Cargo.toml | 3 +++ src/daft-io/src/s3_like.rs | 35 ++++++++++++++++++++++++++++++ 6 files changed, 59 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8d8003f0f9..96cffe15f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1070,6 +1070,7 @@ dependencies = [ "aws-sig-auth", "aws-sigv4", "aws-smithy-async", + "aws-smithy-client", "azure_storage", "azure_storage_blobs", "bytes", @@ -1078,6 +1079,8 @@ dependencies = [ "daft-core", "futures", "google-cloud-storage", + "hyper", + "hyper-tls", "lazy_static", "log", "md5", diff --git a/daft/daft.pyi b/daft/daft.pyi index e96b2eaa6f..3613c2e4d4 100644 --- a/daft/daft.pyi +++ b/daft/daft.pyi @@ -283,6 +283,8 @@ class S3Config: num_tries: int | None = None, retry_mode: str | None = None, anonymous: bool | None = None, + verify_ssl: bool | None = None, + check_hostname_ssl: bool | None = None, ): ... class AzureConfig: diff --git a/src/common/io-config/src/python.rs b/src/common/io-config/src/python.rs index e6785e8137..b74099668f 100644 --- a/src/common/io-config/src/python.rs +++ b/src/common/io-config/src/python.rs @@ -19,6 +19,8 @@ use crate::config; /// num_tries: Number of attempts to make a connection, defaults to 5 /// retry_mode: Retry Mode when a request fails, current supported values are `standard` and `adaptive` /// anonymous: Whether or not to use "anonymous mode", which will access S3 without any credentials +/// verify_ssl: Whether or not to use verify ssl certificates, which will access S3 without checking if the certs are valid +/// check_hostname_ssl: Whether or not to verify the hostname when verifying ssl certificates, this was the legacy behavior for openssl /// /// Example: /// >>> io_config = IOConfig(s3=S3Config(key_id="xxx", access_key="xxx")) @@ -150,6 +152,8 @@ impl S3Config { num_tries: Option, retry_mode: Option, anonymous: Option, + verify_ssl: Option, + check_hostname_ssl: Option, ) -> Self { let def = crate::S3Config::default(); S3Config { @@ -167,6 +171,8 @@ impl S3Config { num_tries: num_tries.unwrap_or(def.num_tries), retry_mode: retry_mode.or(def.retry_mode), anonymous: anonymous.unwrap_or(def.anonymous), + verify_ssl: verify_ssl.unwrap_or(def.verify_ssl), + check_hostname_ssl: check_hostname_ssl.unwrap_or(def.check_hostname_ssl), }, } } diff --git a/src/common/io-config/src/s3.rs b/src/common/io-config/src/s3.rs index 05f9111f9c..9379212173 100644 --- a/src/common/io-config/src/s3.rs +++ b/src/common/io-config/src/s3.rs @@ -18,6 +18,8 @@ pub struct S3Config { pub num_tries: u32, pub retry_mode: Option, pub anonymous: bool, + pub verify_ssl: bool, + pub check_hostname_ssl: bool, } impl Default for S3Config { @@ -35,6 +37,8 @@ impl Default for S3Config { num_tries: 5, retry_mode: Some("standard".to_string()), anonymous: false, + verify_ssl: true, + check_hostname_ssl: true, } } } @@ -55,7 +59,9 @@ impl Display for S3Config { read_timeout_ms: {}, num_tries: {:?}, retry_mode: {:?}, - anonymous: {}", + anonymous: {}, + verify_ssl: {}, + check_hostname_ssl: {}", self.region_name, self.endpoint_url, self.key_id, @@ -67,7 +73,9 @@ impl Display for S3Config { self.read_timeout_ms, self.num_tries, self.retry_mode, - self.anonymous + self.anonymous, + self.verify_ssl, + self.check_hostname_ssl ) } } diff --git a/src/daft-io/Cargo.toml b/src/daft-io/Cargo.toml index ea361bc3e6..cf29f1b892 100644 --- a/src/daft-io/Cargo.toml +++ b/src/daft-io/Cargo.toml @@ -7,6 +7,7 @@ aws-sdk-s3 = {version = "0.28.0", features = ["native-tls", "rt-tokio"], default aws-sig-auth = "0.55.3" aws-sigv4 = "0.55.3" aws-smithy-async = "0.55.3" +aws-smithy-client = "0.55.3" azure_storage = {version = "0.13.0", features = ["enable_reqwest"], default-features = false} azure_storage_blobs = {version = "0.13.1", features = ["enable_reqwest"], default-features = false} bytes = {workspace = true} @@ -15,6 +16,8 @@ common-io-config = {path = "../common/io-config", default-features = false} daft-core = {path = "../daft-core", default-features = false} futures = {workspace = true} google-cloud-storage = {version = "0.13.0", default-features = false, features = ["default-tls", "auth"]} +hyper = "0.14.27" +hyper-tls = "0.5.0" lazy_static = {workspace = true} log = {workspace = true} openssl-sys = {version = "0.9.93", features = ["vendored"]} diff --git a/src/daft-io/src/s3_like.rs b/src/daft-io/src/s3_like.rs index 14a09916d3..5c53c1c4d5 100644 --- a/src/daft-io/src/s3_like.rs +++ b/src/daft-io/src/s3_like.rs @@ -100,6 +100,10 @@ enum Error { "Unable to parse data as Utf8 while reading header for file: {path}. {source}" ))] UnableToParseUtf8 { path: String, source: FromUtf8Error }, + #[snafu(display("Unable to create TlsConnector. {source}"))] + UnableToCreateTlsConnector { + source: hyper_tls::native_tls::Error, + }, } impl From for super::Error { @@ -160,6 +164,35 @@ impl From for super::Error { } } +fn handle_https_client_settings( + builder: aws_sdk_s3::config::Builder, + config: &S3Config, +) -> super::Result { + let tls_connector = hyper_tls::native_tls::TlsConnector::builder() + .danger_accept_invalid_certs(!config.verify_ssl) + .danger_accept_invalid_hostnames((!config.verify_ssl) || (!config.check_hostname_ssl)) + .build() + .context(UnableToCreateTlsConnectorSnafu {})?; + let mut http_connector = hyper::client::HttpConnector::new(); + http_connector.enforce_http(false); + let https_connector = hyper_tls::HttpsConnector::::from(( + http_connector, + tls_connector.into(), + )); + use aws_smithy_client::http_connector::ConnectorSettings; + use aws_smithy_client::hyper_ext; + let smithy_client = hyper_ext::Adapter::builder() + .connector_settings( + ConnectorSettings::builder() + .connect_timeout(Duration::from_millis(config.connect_timeout_ms)) + .read_timeout(Duration::from_millis(config.read_timeout_ms)) + .build(), + ) + .build(https_connector); + let builder = builder.http_connector(smithy_client); + Ok(builder) +} + async fn build_s3_client(config: &S3Config) -> super::Result<(bool, s3::Client)> { const DEFAULT_REGION: Region = Region::from_static("us-east-1"); @@ -207,6 +240,8 @@ async fn build_s3_client(config: &S3Config) -> super::Result<(bool, s3::Client)> let builder = builder.retry_config(retry_config); + let builder = handle_https_client_settings(builder, config)?; + let sleep_impl = Arc::new(TokioSleep::new()); let builder = builder.sleep_impl(sleep_impl); let timeout_config = TimeoutConfig::builder()