Skip to content

Commit

Permalink
Add support for querying public Iceberg tables
Browse files Browse the repository at this point in the history
  • Loading branch information
gruuya committed Dec 4, 2024
1 parent 0a2de18 commit 41274e0
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 23 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ deltalake = { git = "https://github.com/splitgraph/delta-rs", rev = "eff57356982
futures = "0.3"
hex = ">=0.4.0"

iceberg = { git = "https://github.com/splitgraph/iceberg-rust", rev = "aa54f22f9acf0cc0955ac7b783c7c903a96c4ec6" }
iceberg-datafusion = { git = "https://github.com/splitgraph/iceberg-rust", rev = "aa54f22f9acf0cc0955ac7b783c7c903a96c4ec6" }
iceberg = { git = "https://github.com/splitgraph/iceberg-rust", rev = "adf221357451861ebe403961e9e791c6b4093051" }
iceberg-datafusion = { git = "https://github.com/splitgraph/iceberg-rust", rev = "adf221357451861ebe403961e9e791c6b4093051" }
indexmap = "2.6.0"
itertools = { workspace = true }

Expand Down
8 changes: 2 additions & 6 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,10 @@ services:
/bin/sh -c " /usr/bin/mc config host add test-minio http://minio:9000 minioadmin minioadmin;
/usr/bin/mc rm -r --force test-minio/seafowl-test-bucket; /usr/bin/mc mb
test-minio/seafowl-test-bucket; /usr/bin/mc cp -r test-data test-minio/seafowl-test-bucket;
/usr/bin/mc anonymous set public
test-minio/seafowl-test-bucket/test-data/table_with_ns_column.parquet;
/usr/bin/mc anonymous set public test-minio/seafowl-test-bucket;
/usr/bin/mc admin user add test-minio test-user test-pass; /usr/bin/mc admin policy attach
test-minio readwrite --user test-user;
/usr/bin/mc mb test-minio/seafowl-test-bucket-public; /usr/bin/mc anonymous set public
test-minio/seafowl-test-bucket-public; exit 0; "
test-minio readwrite --user test-user; exit 0; "
fake-gcs:
image: tustvold/fake-gcs-server
Expand Down
13 changes: 12 additions & 1 deletion src/object_store/utils.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use futures::TryFutureExt;
use iceberg::io::{S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY};
use iceberg::io::{
S3_ACCESS_KEY_ID, S3_ALLOW_ANONYMOUS, S3_DISABLE_EC2_METADATA, S3_ENDPOINT,
S3_REGION, S3_SECRET_ACCESS_KEY,
};
use object_store::aws::AmazonS3ConfigKey;
use object_store::Error;
use std::collections::HashMap;
Expand Down Expand Up @@ -63,6 +66,14 @@ pub fn object_store_opts_to_file_io_props(
let key = match AmazonS3ConfigKey::from_str(key) {
Ok(AmazonS3ConfigKey::AccessKeyId) => S3_ACCESS_KEY_ID,
Ok(AmazonS3ConfigKey::SecretAccessKey) => S3_SECRET_ACCESS_KEY,
Ok(AmazonS3ConfigKey::SkipSignature)
if ["true", "t", "1"].contains(&val.to_lowercase().as_str()) =>
{
// We need two options on the opendal client in this case
props.insert(S3_ALLOW_ANONYMOUS.to_string(), val.clone());
props.insert(S3_DISABLE_EC2_METADATA.to_string(), val.clone());
continue;
}
Ok(AmazonS3ConfigKey::Region) => S3_REGION,
Ok(AmazonS3ConfigKey::Endpoint) => S3_ENDPOINT,
_ => key, // for now just propagate any non-matched keys
Expand Down
2 changes: 1 addition & 1 deletion tests/clade/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::clade::*;
)]
#[case("local.file", false)]
#[case("local.file", true)]
#[case("s3.minio", true)]
#[case("s3.delta", true)]
#[case("gcs.fake", true)]
#[tokio::test]
async fn test_basic_select(#[case] table: &str, #[case] object_store: bool) -> () {
Expand Down
40 changes: 32 additions & 8 deletions tests/fixtures.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,21 +51,27 @@ pub fn schemas(include_file_without_store: bool) -> ListSchemaResponse {
name: "s3".to_string(),
tables: vec![
TableObject {
name: "minio".to_string(),
name: "delta".to_string(),
path: "test-data/delta".to_string(),
store: Some("minio".to_string()),
format: TableFormat::Delta.into(),
},
TableObject {
name: "minio_prefix".to_string(),
name: "delta_public".to_string(),
path: "delta".to_string(),
store: Some("minio-prefix".to_string()),
store: Some("minio-public".to_string()),
format: TableFormat::Delta.into(),
},
TableObject {
name: "iceberg".to_string(),
path: "test-data/iceberg/default.db/iceberg_table/metadata/00001-f394d7ec-944b-432d-a44f-78b5ec95aae2.metadata.json".to_string(),
store: Some("minio".to_string()),
format: TableFormat::Iceberg.into(),
},
TableObject {
name: "iceberg_public".to_string(),
path: "iceberg/default.db/iceberg_table/metadata/00001-f394d7ec-944b-432d-a44f-78b5ec95aae2.metadata.json".to_string(),
store: Some("minio-prefix".to_string()),
store: Some("minio-public".to_string()),
format: TableFormat::Iceberg.into(),
},
],
Expand All @@ -87,9 +93,9 @@ pub fn schemas(include_file_without_store: bool) -> ListSchemaResponse {
options: minio_options(),
},
StorageLocation {
name: "minio-prefix".to_string(),
name: "minio-public".to_string(),
location: "s3://seafowl-test-bucket/test-data".to_string(),
options: minio_options(),
options: minio_public_options(),
},
StorageLocation {
name: "fake-gcs".to_string(),
Expand Down Expand Up @@ -126,8 +132,26 @@ pub fn minio_options() -> HashMap<String, String> {
"minioadmin".to_string(),
),
(
// This has been removed from the config enum, but it can
// still be picked up via `AmazonS3ConfigKey::from_str`
AmazonS3ConfigKey::Client(ClientConfigKey::AllowHttp)
.as_ref()
.to_string(),
"true".to_string(),
),
])
}

// Used for public bucket/paths
pub fn minio_public_options() -> HashMap<String, String> {
HashMap::from([
(
AmazonS3ConfigKey::Endpoint.as_ref().to_string(),
"http://127.0.0.1:9000".to_string(),
),
(
AmazonS3ConfigKey::SkipSignature.as_ref().to_string(),
"true".to_string(),
),
(
AmazonS3ConfigKey::Client(ClientConfigKey::AllowHttp)
.as_ref()
.to_string(),
Expand Down
5 changes: 3 additions & 2 deletions tests/flight/inline_metastore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ use crate::flight::*;
#[case("local.file_with_store", TestServerType::InlineOnly, true)]
// Testing with properly sent inline metastore
#[case("local.file_with_store", TestServerType::InlineOnly, false)]
#[case("s3.minio", TestServerType::InlineOnly, false)]
#[case("s3.minio_prefix", TestServerType::InlineOnly, false)]
#[case("s3.delta", TestServerType::InlineOnly, false)]
#[case("s3.delta_public", TestServerType::InlineOnly, false)]
#[case("s3.iceberg", TestServerType::InlineOnly, false)]
#[case("s3.iceberg_public", TestServerType::InlineOnly, false)]
#[case("gcs.fake", TestServerType::InlineOnly, false)]
#[tokio::test]
async fn test_inline_query(
Expand Down
2 changes: 1 addition & 1 deletion tests/statements/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ bucket = "seafowl-test-bucket"
ObjectStoreType::S3Public => (
r#"type = "s3"
endpoint = "http://127.0.0.1:9000"
bucket = "seafowl-test-bucket-public"
bucket = "seafowl-test-bucket"
"#
.to_string(),
None,
Expand Down

0 comments on commit 41274e0

Please sign in to comment.