Skip to content

Commit

Permalink
[FEAT] Native S3 Client: allow disabling ssl verification or checking…
Browse files Browse the repository at this point in the history
… hostnames (#1395)

* adds `verify_ssl` and `check_hostname_ssl` options for S3Config, which
allows the user to disable verification steps for Tls connections.
  • Loading branch information
samster25 authored Sep 21, 2023
1 parent a0031d5 commit 83b73b2
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 2 deletions.
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions daft/daft.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,8 @@ class S3Config:
num_tries: int | None = None,
retry_mode: str | None = None,
anonymous: bool | None = None,
verify_ssl: bool | None = None,
check_hostname_ssl: bool | None = None,
): ...

class AzureConfig:
Expand Down
6 changes: 6 additions & 0 deletions src/common/io-config/src/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ use crate::config;
/// num_tries: Number of attempts to make a connection, defaults to 5
/// retry_mode: Retry Mode when a request fails, current supported values are `standard` and `adaptive`
/// anonymous: Whether or not to use "anonymous mode", which will access S3 without any credentials
/// verify_ssl: Whether or not to verify ssl certificates, which will access S3 without checking if the certs are valid, defaults to True
/// check_hostname_ssl: Whether or not to verify the hostname when verifying ssl certificates, this was the legacy behavior for openssl, defaults to True
///
/// Example:
/// >>> io_config = IOConfig(s3=S3Config(key_id="xxx", access_key="xxx"))
Expand Down Expand Up @@ -150,6 +152,8 @@ impl S3Config {
num_tries: Option<u32>,
retry_mode: Option<String>,
anonymous: Option<bool>,
verify_ssl: Option<bool>,
check_hostname_ssl: Option<bool>,
) -> Self {
let def = crate::S3Config::default();
S3Config {
Expand All @@ -167,6 +171,8 @@ impl S3Config {
num_tries: num_tries.unwrap_or(def.num_tries),
retry_mode: retry_mode.or(def.retry_mode),
anonymous: anonymous.unwrap_or(def.anonymous),
verify_ssl: verify_ssl.unwrap_or(def.verify_ssl),
check_hostname_ssl: check_hostname_ssl.unwrap_or(def.check_hostname_ssl),
},
}
}
Expand Down
12 changes: 10 additions & 2 deletions src/common/io-config/src/s3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ pub struct S3Config {
pub num_tries: u32,
pub retry_mode: Option<String>,
pub anonymous: bool,
pub verify_ssl: bool,
pub check_hostname_ssl: bool,
}

impl Default for S3Config {
Expand All @@ -35,6 +37,8 @@ impl Default for S3Config {
num_tries: 5,
retry_mode: Some("standard".to_string()),
anonymous: false,
verify_ssl: true,
check_hostname_ssl: true,
}
}
}
Expand All @@ -55,7 +59,9 @@ impl Display for S3Config {
read_timeout_ms: {},
num_tries: {:?},
retry_mode: {:?},
anonymous: {}",
anonymous: {},
verify_ssl: {},
check_hostname_ssl: {}",
self.region_name,
self.endpoint_url,
self.key_id,
Expand All @@ -67,7 +73,9 @@ impl Display for S3Config {
self.read_timeout_ms,
self.num_tries,
self.retry_mode,
self.anonymous
self.anonymous,
self.verify_ssl,
self.check_hostname_ssl
)
}
}
3 changes: 3 additions & 0 deletions src/daft-io/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ aws-sdk-s3 = {version = "0.28.0", features = ["native-tls", "rt-tokio"], default
aws-sig-auth = "0.55.3"
aws-sigv4 = "0.55.3"
aws-smithy-async = "0.55.3"
aws-smithy-client = "0.55.3"
azure_storage = {version = "0.13.0", features = ["enable_reqwest"], default-features = false}
azure_storage_blobs = {version = "0.13.1", features = ["enable_reqwest"], default-features = false}
bytes = {workspace = true}
Expand All @@ -16,6 +17,8 @@ common-io-config = {path = "../common/io-config", default-features = false}
daft-core = {path = "../daft-core", default-features = false}
futures = {workspace = true}
google-cloud-storage = {version = "0.13.0", default-features = false, features = ["default-tls", "auth"]}
hyper = "0.14.27"
hyper-tls = "0.5.0"
lazy_static = {workspace = true}
log = {workspace = true}
openssl-sys = {version = "0.9.93", features = ["vendored"]}
Expand Down
35 changes: 35 additions & 0 deletions src/daft-io/src/s3_like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@ enum Error {
"Unable to parse data as Utf8 while reading header for file: {path}. {source}"
))]
UnableToParseUtf8 { path: String, source: FromUtf8Error },
#[snafu(display("Unable to create TlsConnector. {source}"))]
UnableToCreateTlsConnector {
source: hyper_tls::native_tls::Error,
},
}

impl From<Error> for super::Error {
Expand Down Expand Up @@ -174,6 +178,35 @@ impl From<Error> for super::Error {
}
}

fn handle_https_client_settings(
builder: aws_sdk_s3::config::Builder,
config: &S3Config,
) -> super::Result<aws_sdk_s3::config::Builder> {
let tls_connector = hyper_tls::native_tls::TlsConnector::builder()
.danger_accept_invalid_certs(!config.verify_ssl)
.danger_accept_invalid_hostnames((!config.verify_ssl) || (!config.check_hostname_ssl))
.build()
.context(UnableToCreateTlsConnectorSnafu {})?;
let mut http_connector = hyper::client::HttpConnector::new();
http_connector.enforce_http(false);
let https_connector = hyper_tls::HttpsConnector::<hyper::client::HttpConnector>::from((
http_connector,
tls_connector.into(),
));
use aws_smithy_client::http_connector::ConnectorSettings;
use aws_smithy_client::hyper_ext;
let smithy_client = hyper_ext::Adapter::builder()
.connector_settings(
ConnectorSettings::builder()
.connect_timeout(Duration::from_millis(config.connect_timeout_ms))
.read_timeout(Duration::from_millis(config.read_timeout_ms))
.build(),
)
.build(https_connector);
let builder = builder.http_connector(smithy_client);
Ok(builder)
}

async fn build_s3_client(config: &S3Config) -> super::Result<(bool, s3::Client)> {
const DEFAULT_REGION: Region = Region::from_static("us-east-1");

Expand Down Expand Up @@ -221,6 +254,8 @@ async fn build_s3_client(config: &S3Config) -> super::Result<(bool, s3::Client)>

let builder = builder.retry_config(retry_config);

let builder = handle_https_client_settings(builder, config)?;

let sleep_impl = Arc::new(TokioSleep::new());
let builder = builder.sleep_impl(sleep_impl);
let timeout_config = TimeoutConfig::builder()
Expand Down

0 comments on commit 83b73b2

Please sign in to comment.