Skip to content

Commit

Permalink
Merge pull request #763 from splitgraph/bump-iceberg
Browse files Browse the repository at this point in the history
Don't pick up S3 config file/env props automatically
  • Loading branch information
gruuya authored Dec 16, 2024
2 parents ddf8144 + 8a3cc6f commit 22ab168
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 51 deletions.
6 changes: 4 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ datafusion-functions-nested = "43.0.0"

futures = "0.3"

iceberg = { git = "https://github.com/splitgraph/iceberg-rust", rev = "734e70dbe6227de7ce621474c38278da1b450f1a" }
iceberg-datafusion = { git = "https://github.com/splitgraph/iceberg-rust", rev = "734e70dbe6227de7ce621474c38278da1b450f1a" }
iceberg = { git = "https://github.com/splitgraph/iceberg-rust", rev = "2be7c389de17e8b5f88e202b2b696b221c9d9adb" }
iceberg-datafusion = { git = "https://github.com/splitgraph/iceberg-rust", rev = "2be7c389de17e8b5f88e202b2b696b221c9d9adb" }

itertools = ">=0.10.0"
object_store = { version = "0.11", features = ["aws", "azure", "gcp"] }
Expand Down
56 changes: 56 additions & 0 deletions object_store_factory/src/aws.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
use iceberg::io::{
S3_ACCESS_KEY_ID, S3_ALLOW_ANONYMOUS, S3_DISABLE_CONFIG_LOAD,
S3_DISABLE_EC2_METADATA, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY,
};
use object_store::aws::{
resolve_bucket_region, AmazonS3Builder, AmazonS3ConfigKey, S3ConditionalPut,
};
Expand Down Expand Up @@ -259,6 +263,38 @@ async fn detect_region(url: &Url) -> Result<String, object_store::Error> {
Ok(region)
}

pub fn s3_opts_to_file_io_props(
key: AmazonS3ConfigKey,
val: &str,
props: &mut HashMap<String, String>,
) {
// If any S3 key is detected at all skip picking up config from config file or env vars
props.insert(S3_DISABLE_CONFIG_LOAD.to_string(), "true".to_string());
// FileIO requires the region prop even when the S3 store doesn't (e.g. MinIO)
props
.entry(S3_REGION.to_string())
.or_insert("dummy-region".to_string());

let key = match key {
AmazonS3ConfigKey::AccessKeyId => S3_ACCESS_KEY_ID,
AmazonS3ConfigKey::SecretAccessKey => S3_SECRET_ACCESS_KEY,
AmazonS3ConfigKey::SkipSignature
if ["true", "t", "1"].contains(&val.to_lowercase().as_str()) =>
{
// We need two options on the opendal client in this case
props.insert(S3_ALLOW_ANONYMOUS.to_string(), val.to_string());
props.insert(S3_DISABLE_EC2_METADATA.to_string(), val.to_string());
return;
}
AmazonS3ConfigKey::Region => S3_REGION,
AmazonS3ConfigKey::Endpoint => S3_ENDPOINT,
// for now just propagate any non-matched keys
_ => key.as_ref(),
};

props.insert(key.to_string(), val.to_string());
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -616,4 +652,24 @@ mod tests {
assert!(config.allow_http); // Default value should be true
assert!(config.skip_signature); // Default value should be true
}

#[test]
fn test_s3_opts_to_file_io_props() {
let mut props = HashMap::new();

// Test SkipSignature with a truthy value
s3_opts_to_file_io_props(AmazonS3ConfigKey::SkipSignature, "true", &mut props);

// We expect both allow_anonymous and disable_ec2_metadata to be set.
// In addition, we expect disable_config_load to be set, as well as a
// region placeholder.
assert_eq!(props.get(S3_ALLOW_ANONYMOUS), Some(&"true".to_string()));
assert_eq!(
props.get(S3_DISABLE_EC2_METADATA),
Some(&"true".to_string())
);
assert_eq!(props.get(S3_DISABLE_CONFIG_LOAD), Some(&"true".to_string()));
assert_eq!(props.get(S3_REGION), Some(&"dummy-region".to_string()));
props.clear();
}
}
27 changes: 26 additions & 1 deletion object_store_factory/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,23 @@ pub mod aws;
pub mod google;
pub mod local;
mod memory;
pub mod utils;

use aws::S3Config;
use google::GCSConfig;
use local::LocalConfig;

use object_store::aws::AmazonS3ConfigKey;
use object_store::{
memory::InMemory, parse_url_opts, path::Path, prefix::PrefixStore, DynObjectStore,
ObjectStore, ObjectStoreScheme,
};
use std::collections::HashMap;
use std::str::FromStr;
use std::sync::Arc;
use tracing::warn;
use url::Url;

use crate::aws::s3_opts_to_file_io_props;
use serde::Deserialize;

#[derive(Deserialize, Debug, PartialEq, Eq, Clone)]
Expand Down Expand Up @@ -197,6 +199,29 @@ pub async fn build_storage_location_info_from_opts(
})
}

// Go through all known keys for object store and convert them to corresponding file_io ones.
//
// For now only converts S3 keys.
// TODO: At some point this should be redundant, since there is an OpenDAL adapter for object_store,
// https://github.com/apache/iceberg-rust/issues/172
pub fn object_store_opts_to_file_io_props(
opts: &HashMap<String, String>,
) -> HashMap<String, String> {
let mut props = HashMap::new();

for (key, val) in opts.iter() {
match AmazonS3ConfigKey::from_str(key) {
Ok(s3_key) => s3_opts_to_file_io_props(s3_key, val, &mut props),
// for now just propagate any non-S3 keys
_ => {
props.insert(key.clone(), val.clone());
}
};
}

props
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
45 changes: 0 additions & 45 deletions object_store_factory/src/utils.rs

This file was deleted.

2 changes: 1 addition & 1 deletion src/catalog/metastore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use iceberg::io::FileIO;
use iceberg::table::StaticTable;
use iceberg::TableIdent;
use iceberg_datafusion::IcebergTableProvider;
use object_store_factory::utils::object_store_opts_to_file_io_props;
use object_store_factory::object_store_opts_to_file_io_props;
use std::collections::HashMap;
use std::str::FromStr;
use std::sync::Arc;
Expand Down

0 comments on commit 22ab168

Please sign in to comment.