diff --git a/datafusion-cli/src/object_storage.rs b/datafusion-cli/src/object_storage.rs index 3d999766e03f..a48f7aaa6f0b 100644 --- a/datafusion-cli/src/object_storage.rs +++ b/datafusion-cli/src/object_storage.rs @@ -471,12 +471,13 @@ mod tests { #[tokio::test] async fn s3_object_store_builder() -> Result<()> { - let access_key_id = "fake_access_key_id"; - let secret_access_key = "fake_secret_access_key"; + // "fake" is uppercase to ensure the values are not lowercased when parsed + let access_key_id = "FAKE_access_key_id"; + let secret_access_key = "FAKE_secret_access_key"; let region = "fake_us-east-2"; let endpoint = "endpoint33"; - let session_token = "fake_session_token"; - let location = "s3://bucket/path/file.parquet"; + let session_token = "FAKE_session_token"; + let location = "s3://bucket/path/FAKE/file.parquet"; let table_url = ListingTableUrl::parse(location)?; let scheme = table_url.scheme(); diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 3a07a238a4c9..37303c92f2cb 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -110,7 +110,7 @@ macro_rules! config_namespace { $vis:vis struct $struct_name:ident { $( $(#[doc = $d:tt])* - $field_vis:vis $field_name:ident : $field_type:ty, default = $default:expr + $field_vis:vis $field_name:ident : $field_type:ty, $(transform = $tf:expr,)? default = $default:expr )*$(,)* } ) => { @@ -127,9 +127,13 @@ macro_rules! config_namespace { impl ConfigField for $struct_name { fn set(&mut self, key: &str, value: &str) -> Result<()> { let (key, rem) = key.split_once('.').unwrap_or((key, "")); + match key { $( - stringify!($field_name) => self.$field_name.set(rem, value), + stringify!($field_name) => { + $(let value = $tf(value);)? + self.$field_name.set(rem, value.as_ref()) + }, )* _ => return _config_err!( "Config value \"{}\" not found on {}", key, stringify!($struct_name) @@ -212,11 +216,11 @@ config_namespace! { pub enable_ident_normalization: bool, default = true /// When set to true, SQL parser will normalize options value (convert value to lowercase) - pub enable_options_value_normalization: bool, default = true + pub enable_options_value_normalization: bool, default = false /// Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, /// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. - pub dialect: String, default = "generic".to_string() + pub dialect: String, default = "generic".to_string() // no need to lowercase because this is handled by [`sqlparser::dialect_from_str`] /// If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but /// ignore the length. If false, error if a `VARCHAR` with a length is @@ -257,7 +261,7 @@ config_namespace! { /// /// Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime /// according to this time zone, and then extract the hour - pub time_zone: Option, default = Some("+00:00".into()) + pub time_zone: Option, default = Some("+00:00".into()) // todo: to_upper? /// Parquet options pub parquet: ParquetOptions, default = Default::default() @@ -431,7 +435,7 @@ config_namespace! { /// /// Note that this default setting is not the same as /// the default parquet writer setting. - pub compression: Option, default = Some("zstd(3)".into()) + pub compression: Option, transform = str::to_lowercase, default = Some("zstd(3)".into()) /// (writing) Sets if dictionary encoding is enabled. If NULL, uses /// default parquet writer setting @@ -444,7 +448,7 @@ config_namespace! { /// Valid values are: "none", "chunk", and "page" /// These values are not case sensitive. If NULL, uses /// default parquet writer setting - pub statistics_enabled: Option, default = Some("page".into()) + pub statistics_enabled: Option, transform = str::to_lowercase, default = Some("page".into()) /// (writing) Sets max statistics size for any column. If NULL, uses /// default parquet writer setting @@ -470,7 +474,7 @@ config_namespace! { /// delta_byte_array, rle_dictionary, and byte_stream_split. /// These values are not case sensitive. If NULL, uses /// default parquet writer setting - pub encoding: Option, default = None + pub encoding: Option, transform = str::to_lowercase, default = None /// (writing) Use any available bloom filters when reading parquet files pub bloom_filter_on_read: bool, default = true @@ -973,19 +977,28 @@ impl ConfigField for Option { #[macro_export] macro_rules! config_field { - ($t:ty) => { + ($t:ty $(, $transform:expr)?) => { impl ConfigField for $t { fn visit(&self, v: &mut V, key: &str, description: &'static str) { v.some(key, self, description) } fn set(&mut self, _: &str, value: &str) -> Result<()> { + $( + let value = $transform(&value); + )? + *self = value.parse().map_err(|e| { DataFusionError::Context( - format!(concat!("Error parsing {} as ", stringify!($t),), value), + format!( + "Error parsing '{}' as {}", + value, + stringify!($t), + ), Box::new(DataFusionError::External(Box::new(e))), ) })?; + Ok(()) } } @@ -993,7 +1006,7 @@ macro_rules! config_field { } config_field!(String); -config_field!(bool); +config_field!(bool, str::to_lowercase); config_field!(usize); config_field!(f64); config_field!(u64); diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs index d8fad5b6cd37..2cea37fe17e2 100644 --- a/datafusion/core/src/datasource/stream.rs +++ b/datafusion/core/src/datasource/stream.rs @@ -62,7 +62,7 @@ impl TableProviderFactory for StreamTableFactory { let header = if let Ok(opt) = cmd .options .get("format.has_header") - .map(|has_header| bool::from_str(has_header)) + .map(|has_header| bool::from_str(has_header.to_lowercase().as_str())) .transpose() { opt.unwrap_or(false) diff --git a/datafusion/core/tests/config_from_env.rs b/datafusion/core/tests/config_from_env.rs index a5a5a4524e60..f172ad724937 100644 --- a/datafusion/core/tests/config_from_env.rs +++ b/datafusion/core/tests/config_from_env.rs @@ -21,11 +21,21 @@ use std::env; #[test] fn from_env() { // Note: these must be a single test to avoid interference from concurrent execution + let env_key = "DATAFUSION_OPTIMIZER_FILTER_NULL_JOIN_KEYS"; - env::set_var(env_key, "true"); - let config = ConfigOptions::from_env().unwrap(); + // valid testing + for bool_option in ["true", "TRUE", "True"] { + env::set_var(env_key, bool_option); + let config = ConfigOptions::from_env().unwrap(); + env::remove_var(env_key); + assert!(config.optimizer.filter_null_join_keys); + } + + // invalid testing + env::set_var(env_key, "ttruee"); + let err = ConfigOptions::from_env().unwrap_err().strip_backtrace(); + assert_eq!(err, "Error parsing 'ttruee' as bool\ncaused by\nExternal error: provided string was not `true` or `false`"); env::remove_var(env_key); - assert!(config.optimizer.filter_null_join_keys); let env_key = "DATAFUSION_EXECUTION_BATCH_SIZE"; @@ -37,7 +47,7 @@ fn from_env() { // for invalid testing env::set_var(env_key, "abc"); let err = ConfigOptions::from_env().unwrap_err().strip_backtrace(); - assert_eq!(err, "Error parsing abc as usize\ncaused by\nExternal error: invalid digit found in string"); + assert_eq!(err, "Error parsing 'abc' as usize\ncaused by\nExternal error: invalid digit found in string"); env::remove_var(env_key); let config = ConfigOptions::from_env().unwrap(); diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 496088b1e171..2dafbd4e7433 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -56,7 +56,7 @@ impl Default for ParserOptions { parse_float_as_decimal: false, enable_ident_normalization: true, support_varchar_with_length: true, - enable_options_value_normalization: true, + enable_options_value_normalization: false, } } } diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt b/datafusion/sqllogictest/test_files/create_external_table.slt index ed001cf9f84c..6a63ea1cd3e4 100644 --- a/datafusion/sqllogictest/test_files/create_external_table.slt +++ b/datafusion/sqllogictest/test_files/create_external_table.slt @@ -226,6 +226,20 @@ OPTIONS ( has_header false, compression gzip); +# Verify that some options are case insensitive +statement ok +CREATE EXTERNAL TABLE IF NOT EXISTS region ( + r_regionkey BIGINT, + r_name VARCHAR, + r_comment VARCHAR, + r_rev VARCHAR, +) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' +OPTIONS ( + format.delimiter '|', + has_header FALSE, + compression GZIP); + + # Create an external parquet table and infer schema to order by # query should succeed diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 4d51a61c8a52..ce348570c530 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -258,7 +258,7 @@ datafusion.optimizer.skip_failed_rules false datafusion.optimizer.top_down_join_key_reordering true datafusion.sql_parser.dialect generic datafusion.sql_parser.enable_ident_normalization true -datafusion.sql_parser.enable_options_value_normalization true +datafusion.sql_parser.enable_options_value_normalization false datafusion.sql_parser.parse_float_as_decimal false datafusion.sql_parser.support_varchar_with_length true @@ -351,7 +351,7 @@ datafusion.optimizer.skip_failed_rules false When set to true, the logical plan datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) -datafusion.sql_parser.enable_options_value_normalization true When set to true, SQL parser will normalize options value (convert value to lowercase) +datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase) datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. diff --git a/datafusion/sqllogictest/test_files/set_variable.slt b/datafusion/sqllogictest/test_files/set_variable.slt index 6f19c9f4d42f..bb4ac920d032 100644 --- a/datafusion/sqllogictest/test_files/set_variable.slt +++ b/datafusion/sqllogictest/test_files/set_variable.slt @@ -93,10 +93,10 @@ datafusion.execution.coalesce_batches false statement ok set datafusion.catalog.information_schema = true -statement error DataFusion error: Error parsing 1 as bool +statement error DataFusion error: Error parsing '1' as bool SET datafusion.execution.coalesce_batches to 1 -statement error DataFusion error: Error parsing abc as bool +statement error DataFusion error: Error parsing 'abc' as bool SET datafusion.execution.coalesce_batches to abc # set u64 variable @@ -132,10 +132,10 @@ datafusion.execution.batch_size 2 statement ok set datafusion.catalog.information_schema = true -statement error DataFusion error: Error parsing -1 as usize +statement error DataFusion error: Error parsing '-1' as usize SET datafusion.execution.batch_size to -1 -statement error DataFusion error: Error parsing abc as usize +statement error DataFusion error: Error parsing 'abc' as usize SET datafusion.execution.batch_size to abc statement error External error: invalid digit found in string diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 6a49fda668a9..304b0efe5b65 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -122,6 +122,6 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.explain.show_schema | false | When set to true, the explain statement will print schema information | | datafusion.sql_parser.parse_float_as_decimal | false | When set to true, SQL parser will parse float as decimal type | | datafusion.sql_parser.enable_ident_normalization | true | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) | -| datafusion.sql_parser.enable_options_value_normalization | true | When set to true, SQL parser will normalize options value (convert value to lowercase) | +| datafusion.sql_parser.enable_options_value_normalization | false | When set to true, SQL parser will normalize options value (convert value to lowercase) | | datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. | | datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. |