From 2887bb91635e1a746bfc803a37bbce59f3e097ff Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 26 Nov 2024 22:32:23 +0000 Subject: [PATCH 1/9] Do not normalize values --- datafusion/common/src/config.rs | 2 +- datafusion/sql/src/planner.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 1ad10d164868..39664af7f208 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -211,7 +211,7 @@ config_namespace! { pub enable_ident_normalization: bool, default = true /// When set to true, SQL parser will normalize options value (convert value to lowercase) - pub enable_options_value_normalization: bool, default = true + pub enable_options_value_normalization: bool, default = false /// Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, /// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index ccb2ccf7126f..51a71906ae4f 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -56,7 +56,7 @@ impl Default for ParserOptions { parse_float_as_decimal: false, enable_ident_normalization: true, support_varchar_with_length: true, - enable_options_value_normalization: true, + enable_options_value_normalization: false, } } } From 813d6345815440b1d9a94a913e172be537ea8378 Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 26 Nov 2024 23:25:45 +0000 Subject: [PATCH 2/9] Fix tests & update docs --- datafusion/sqllogictest/test_files/information_schema.slt | 4 ++-- docs/source/user-guide/configs.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 4d51a61c8a52..ce348570c530 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -258,7 +258,7 @@ datafusion.optimizer.skip_failed_rules false datafusion.optimizer.top_down_join_key_reordering true datafusion.sql_parser.dialect generic datafusion.sql_parser.enable_ident_normalization true -datafusion.sql_parser.enable_options_value_normalization true +datafusion.sql_parser.enable_options_value_normalization false datafusion.sql_parser.parse_float_as_decimal false datafusion.sql_parser.support_varchar_with_length true @@ -351,7 +351,7 @@ datafusion.optimizer.skip_failed_rules false When set to true, the logical plan datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) -datafusion.sql_parser.enable_options_value_normalization true When set to true, SQL parser will normalize options value (convert value to lowercase) +datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase) datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 6a49fda668a9..a34e1360d6cc 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -36,7 +36,7 @@ If the value in the environment variable cannot be cast to the type of the confi Environment variables are read during `SessionConfig` initialisation so they must be set beforehand and will not affect running sessions. | key | default | description | -| ----------------------------------------------------------------------- | ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| ----------------------------------------------------------------------- |---------------------------| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | datafusion.catalog.create_default_catalog_and_schema | true | Whether the default catalog and schema should be created automatically. | | datafusion.catalog.default_catalog | datafusion | The default catalog name - this impacts what SQL queries use if not specified | | datafusion.catalog.default_schema | public | The default schema name - this impacts what SQL queries use if not specified | @@ -122,6 +122,6 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.explain.show_schema | false | When set to true, the explain statement will print schema information | | datafusion.sql_parser.parse_float_as_decimal | false | When set to true, SQL parser will parse float as decimal type | | datafusion.sql_parser.enable_ident_normalization | true | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) | -| datafusion.sql_parser.enable_options_value_normalization | true | When set to true, SQL parser will normalize options value (convert value to lowercase) | +| datafusion.sql_parser.enable_options_value_normalization | false | When set to true, SQL parser will normalize options value (convert value to lowercase) | | datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. | | datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. | From c3de620c86643e1cf0c2b2b7d06e77df29e9083e Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 26 Nov 2024 23:29:59 +0000 Subject: [PATCH 3/9] Prettier --- docs/source/user-guide/configs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index a34e1360d6cc..304b0efe5b65 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -36,7 +36,7 @@ If the value in the environment variable cannot be cast to the type of the confi Environment variables are read during `SessionConfig` initialisation so they must be set beforehand and will not affect running sessions. | key | default | description | -| ----------------------------------------------------------------------- |---------------------------| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| ----------------------------------------------------------------------- | ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | datafusion.catalog.create_default_catalog_and_schema | true | Whether the default catalog and schema should be created automatically. | | datafusion.catalog.default_catalog | datafusion | The default catalog name - this impacts what SQL queries use if not specified | | datafusion.catalog.default_schema | public | The default schema name - this impacts what SQL queries use if not specified | From 7c2b3fe7536278df4c9d4d89928020054bdf00d9 Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 3 Dec 2024 18:01:21 +0000 Subject: [PATCH 4/9] Lowercase config params --- datafusion-cli/src/object_storage.rs | 9 ++-- datafusion/common/src/config.rs | 43 +++++++++++++------ datafusion/core/src/datasource/stream.rs | 2 +- datafusion/core/tests/config_from_env.rs | 17 ++++++-- .../test_files/create_external_table.slt | 14 ++++++ .../sqllogictest/test_files/set_variable.slt | 8 ++-- 6 files changed, 66 insertions(+), 27 deletions(-) diff --git a/datafusion-cli/src/object_storage.rs b/datafusion-cli/src/object_storage.rs index 3d999766e03f..a48f7aaa6f0b 100644 --- a/datafusion-cli/src/object_storage.rs +++ b/datafusion-cli/src/object_storage.rs @@ -471,12 +471,13 @@ mod tests { #[tokio::test] async fn s3_object_store_builder() -> Result<()> { - let access_key_id = "fake_access_key_id"; - let secret_access_key = "fake_secret_access_key"; + // "fake" is uppercase to ensure the values are not lowercased when parsed + let access_key_id = "FAKE_access_key_id"; + let secret_access_key = "FAKE_secret_access_key"; let region = "fake_us-east-2"; let endpoint = "endpoint33"; - let session_token = "fake_session_token"; - let location = "s3://bucket/path/file.parquet"; + let session_token = "FAKE_session_token"; + let location = "s3://bucket/path/FAKE/file.parquet"; let table_url = ListingTableUrl::parse(location)?; let scheme = table_url.scheme(); diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 2c9375896446..fa073c939f6d 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -38,7 +38,7 @@ use crate::{DataFusionError, Result}; /// /// Amazing config /// pub struct MyConfig { /// /// Field 1 doc -/// field1: String, default = "".to_string() +/// field1: String, transform = str::to_lowercase, default = "".to_string() /// /// /// Field 2 doc /// field2: usize, default = 232 @@ -67,9 +67,12 @@ use crate::{DataFusionError, Result}; /// fn set(&mut self, key: &str, value: &str) -> Result<()> { /// let (key, rem) = key.split_once('.').unwrap_or((key, "")); /// match key { -/// "field1" => self.field1.set(rem, value), -/// "field2" => self.field2.set(rem, value), -/// "field3" => self.field3.set(rem, value), +/// "field1" => { +/// let value = str::to_lowercase(value); +/// self.field1.set(rem, value.as_ref()) +/// }, +/// "field2" => self.field2.set(rem, value.as_ref()), +/// "field3" => self.field3.set(rem, value.as_ref()), /// _ => _internal_err!( /// "Config value \"{}\" not found on MyConfig", /// key @@ -102,7 +105,6 @@ use crate::{DataFusionError, Result}; /// ``` /// /// NB: Misplaced commas may result in nonsensical errors -/// #[macro_export] macro_rules! config_namespace { ( @@ -110,7 +112,7 @@ macro_rules! config_namespace { $vis:vis struct $struct_name:ident { $( $(#[doc = $d:tt])* - $field_vis:vis $field_name:ident : $field_type:ty, default = $default:expr + $field_vis:vis $field_name:ident : $field_type:ty, $(transform = $transform:expr,)? default = $default:expr )*$(,)* } ) => { @@ -127,9 +129,13 @@ macro_rules! config_namespace { impl ConfigField for $struct_name { fn set(&mut self, key: &str, value: &str) -> Result<()> { let (key, rem) = key.split_once('.').unwrap_or((key, "")); + match key { $( - stringify!($field_name) => self.$field_name.set(rem, value), + stringify!($field_name) => { + $(let value = $transform(value);)? + self.$field_name.set(rem, value.as_ref()) + }, )* _ => return _config_err!( "Config value \"{}\" not found on {}", key, stringify!($struct_name) @@ -216,7 +222,8 @@ config_namespace! { /// Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, /// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. - pub dialect: String, default = "generic".to_string() + pub dialect: String, default = "generic".to_string() // no need to lowercase because + // [`sqlparser::dialect_from_str`] is case-insensitive /// If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but /// ignore the length. If false, error if a `VARCHAR` with a length is @@ -431,7 +438,7 @@ config_namespace! { /// /// Note that this default setting is not the same as /// the default parquet writer setting. - pub compression: Option, default = Some("zstd(3)".into()) + pub compression: Option, transform = str::to_lowercase, default = Some("zstd(3)".into()) /// (writing) Sets if dictionary encoding is enabled. If NULL, uses /// default parquet writer setting @@ -444,7 +451,7 @@ config_namespace! { /// Valid values are: "none", "chunk", and "page" /// These values are not case sensitive. If NULL, uses /// default parquet writer setting - pub statistics_enabled: Option, default = Some("page".into()) + pub statistics_enabled: Option, transform = str::to_lowercase, default = Some("page".into()) /// (writing) Sets max statistics size for any column. If NULL, uses /// default parquet writer setting @@ -470,7 +477,7 @@ config_namespace! { /// delta_byte_array, rle_dictionary, and byte_stream_split. /// These values are not case sensitive. If NULL, uses /// default parquet writer setting - pub encoding: Option, default = None + pub encoding: Option, transform = str::to_lowercase, default = None /// (writing) Use any available bloom filters when reading parquet files pub bloom_filter_on_read: bool, default = true @@ -973,16 +980,24 @@ impl ConfigField for Option { #[macro_export] macro_rules! config_field { - ($t:ty) => { + ($t:ty $(, $transform:expr)?) => { impl ConfigField for $t { fn visit(&self, v: &mut V, key: &str, description: &'static str) { v.some(key, self, description) } fn set(&mut self, _: &str, value: &str) -> Result<()> { + $( + let value = $transform(&value); + )? + *self = value.parse().map_err(|e| { DataFusionError::Context( - format!(concat!("Error parsing {} as ", stringify!($t),), value), + format!( + "Error parsing '{}' as {}", + value, + stringify!($t), + ), Box::new(DataFusionError::External(Box::new(e))), ) })?; @@ -993,7 +1008,7 @@ macro_rules! config_field { } config_field!(String); -config_field!(bool); +config_field!(bool, str::to_lowercase); config_field!(usize); config_field!(f64); config_field!(u64); diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs index d8fad5b6cd37..2cea37fe17e2 100644 --- a/datafusion/core/src/datasource/stream.rs +++ b/datafusion/core/src/datasource/stream.rs @@ -62,7 +62,7 @@ impl TableProviderFactory for StreamTableFactory { let header = if let Ok(opt) = cmd .options .get("format.has_header") - .map(|has_header| bool::from_str(has_header)) + .map(|has_header| bool::from_str(has_header.to_lowercase().as_str())) .transpose() { opt.unwrap_or(false) diff --git a/datafusion/core/tests/config_from_env.rs b/datafusion/core/tests/config_from_env.rs index a5a5a4524e60..76bde4a37d6d 100644 --- a/datafusion/core/tests/config_from_env.rs +++ b/datafusion/core/tests/config_from_env.rs @@ -22,10 +22,19 @@ use std::env; fn from_env() { // Note: these must be a single test to avoid interference from concurrent execution let env_key = "DATAFUSION_OPTIMIZER_FILTER_NULL_JOIN_KEYS"; - env::set_var(env_key, "true"); - let config = ConfigOptions::from_env().unwrap(); + // valid testing in different cases + for bool_option in ["true", "TRUE", "True"] { + env::set_var(env_key, bool_option); + let config = ConfigOptions::from_env().unwrap(); + env::remove_var(env_key); + assert!(config.optimizer.filter_null_join_keys); + } + + // invalid testing + env::set_var(env_key, "ttruee"); + let err = ConfigOptions::from_env().unwrap_err().strip_backtrace(); + assert_eq!(err, "Error parsing 'ttruee' as bool\ncaused by\nExternal error: provided string was not `true` or `false`"); env::remove_var(env_key); - assert!(config.optimizer.filter_null_join_keys); let env_key = "DATAFUSION_EXECUTION_BATCH_SIZE"; @@ -37,7 +46,7 @@ fn from_env() { // for invalid testing env::set_var(env_key, "abc"); let err = ConfigOptions::from_env().unwrap_err().strip_backtrace(); - assert_eq!(err, "Error parsing abc as usize\ncaused by\nExternal error: invalid digit found in string"); + assert_eq!(err, "Error parsing 'abc' as usize\ncaused by\nExternal error: invalid digit found in string"); env::remove_var(env_key); let config = ConfigOptions::from_env().unwrap(); diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt b/datafusion/sqllogictest/test_files/create_external_table.slt index ed001cf9f84c..6a63ea1cd3e4 100644 --- a/datafusion/sqllogictest/test_files/create_external_table.slt +++ b/datafusion/sqllogictest/test_files/create_external_table.slt @@ -226,6 +226,20 @@ OPTIONS ( has_header false, compression gzip); +# Verify that some options are case insensitive +statement ok +CREATE EXTERNAL TABLE IF NOT EXISTS region ( + r_regionkey BIGINT, + r_name VARCHAR, + r_comment VARCHAR, + r_rev VARCHAR, +) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' +OPTIONS ( + format.delimiter '|', + has_header FALSE, + compression GZIP); + + # Create an external parquet table and infer schema to order by # query should succeed diff --git a/datafusion/sqllogictest/test_files/set_variable.slt b/datafusion/sqllogictest/test_files/set_variable.slt index 6f19c9f4d42f..bb4ac920d032 100644 --- a/datafusion/sqllogictest/test_files/set_variable.slt +++ b/datafusion/sqllogictest/test_files/set_variable.slt @@ -93,10 +93,10 @@ datafusion.execution.coalesce_batches false statement ok set datafusion.catalog.information_schema = true -statement error DataFusion error: Error parsing 1 as bool +statement error DataFusion error: Error parsing '1' as bool SET datafusion.execution.coalesce_batches to 1 -statement error DataFusion error: Error parsing abc as bool +statement error DataFusion error: Error parsing 'abc' as bool SET datafusion.execution.coalesce_batches to abc # set u64 variable @@ -132,10 +132,10 @@ datafusion.execution.batch_size 2 statement ok set datafusion.catalog.information_schema = true -statement error DataFusion error: Error parsing -1 as usize +statement error DataFusion error: Error parsing '-1' as usize SET datafusion.execution.batch_size to -1 -statement error DataFusion error: Error parsing abc as usize +statement error DataFusion error: Error parsing 'abc' as usize SET datafusion.execution.batch_size to abc statement error External error: invalid digit found in string From 0574ab8b449f454f54ae1c3c39bc0a63377ecd5c Mon Sep 17 00:00:00 2001 From: blaginin Date: Mon, 9 Dec 2024 15:29:13 +0000 Subject: [PATCH 5/9] Unify transform and parse --- datafusion/common/src/config.rs | 40 +++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index fa073c939f6d..78323e7da803 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -19,6 +19,7 @@ use std::any::Any; use std::collections::{BTreeMap, HashMap}; +use std::error::Error; use std::fmt::{self, Display}; use std::str::FromStr; @@ -978,29 +979,34 @@ impl ConfigField for Option { } } +fn parse(input: &str) -> Result +where + T: FromStr, + T::Err: Display, + ::Err: Sync + Send + Error + 'static, +{ + input.parse().map_err(|e| { + DataFusionError::Context( + format!("Error parsing '{}' as {}", input, stringify!(T),), + Box::new(DataFusionError::External(Box::new(e))), + ) + }) +} + #[macro_export] macro_rules! config_field { - ($t:ty $(, $transform:expr)?) => { + ($t:ty) => { + config_field!($t, value => parse(value)?); + }; + + ($t:ty, $arg:ident => $transform:expr) => { impl ConfigField for $t { fn visit(&self, v: &mut V, key: &str, description: &'static str) { v.some(key, self, description) } - fn set(&mut self, _: &str, value: &str) -> Result<()> { - $( - let value = $transform(&value); - )? - - *self = value.parse().map_err(|e| { - DataFusionError::Context( - format!( - "Error parsing '{}' as {}", - value, - stringify!($t), - ), - Box::new(DataFusionError::External(Box::new(e))), - ) - })?; + fn set(&mut self, _: &str, $arg: &str) -> Result<()> { + *self = $transform; Ok(()) } } @@ -1008,7 +1014,7 @@ macro_rules! config_field { } config_field!(String); -config_field!(bool, str::to_lowercase); +config_field!(bool, value => parse(value.to_lowercase().as_str())?); config_field!(usize); config_field!(f64); config_field!(u64); From 06c013d7113d8618b12e490fb45533f8e6fdf19e Mon Sep 17 00:00:00 2001 From: blaginin Date: Mon, 9 Dec 2024 15:56:08 +0000 Subject: [PATCH 6/9] Fix tests --- datafusion/common/src/config.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 78323e7da803..289057053785 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -987,7 +987,11 @@ where { input.parse().map_err(|e| { DataFusionError::Context( - format!("Error parsing '{}' as {}", input, stringify!(T),), + format!( + "Error parsing '{}' as {}", + input, + std::any::type_name::() + ), Box::new(DataFusionError::External(Box::new(e))), ) }) From 967f79fec9740f68ae30a67d82140f2953fed1d8 Mon Sep 17 00:00:00 2001 From: blaginin Date: Wed, 11 Dec 2024 18:30:44 +0000 Subject: [PATCH 7/9] Rename `default_transform` and relax boundaries --- datafusion/common/src/config.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 289057053785..f715542647cf 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -979,10 +979,9 @@ impl ConfigField for Option { } } -fn parse(input: &str) -> Result +fn default_transform(input: &str) -> Result where T: FromStr, - T::Err: Display, ::Err: Sync + Send + Error + 'static, { input.parse().map_err(|e| { @@ -1000,7 +999,7 @@ where #[macro_export] macro_rules! config_field { ($t:ty) => { - config_field!($t, value => parse(value)?); + config_field!($t, value => default_transform(value)?); }; ($t:ty, $arg:ident => $transform:expr) => { @@ -1018,7 +1017,7 @@ macro_rules! config_field { } config_field!(String); -config_field!(bool, value => parse(value.to_lowercase().as_str())?); +config_field!(bool, value => default_transform(value.to_lowercase().as_str())?); config_field!(usize); config_field!(f64); config_field!(u64); From aea717d989e637d03fbea29ec1752cde721972ec Mon Sep 17 00:00:00 2001 From: blaginin Date: Wed, 11 Dec 2024 18:39:33 +0000 Subject: [PATCH 8/9] Make `compression` case-insensitive --- datafusion/common/src/config.rs | 9 ++++++--- datafusion/core/tests/config_from_env.rs | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index f715542647cf..bdaf550129c1 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -1532,7 +1532,7 @@ macro_rules! config_namespace_with_hashmap { $vis:vis struct $struct_name:ident { $( $(#[doc = $d:tt])* - $field_vis:vis $field_name:ident : $field_type:ty, default = $default:expr + $field_vis:vis $field_name:ident : $field_type:ty, $(transform = $transform:expr,)? default = $default:expr )*$(,)* } ) => { @@ -1551,7 +1551,10 @@ macro_rules! config_namespace_with_hashmap { let (key, rem) = key.split_once('.').unwrap_or((key, "")); match key { $( - stringify!($field_name) => self.$field_name.set(rem, value), + stringify!($field_name) => { + $(let value = $transform(value);)? + self.$field_name.set(rem, value.as_ref()) + }, )* _ => _config_err!( "Config value \"{}\" not found on {}", key, stringify!($struct_name) @@ -1630,7 +1633,7 @@ config_namespace_with_hashmap! { /// lzo, brotli(level), lz4, zstd(level), and lz4_raw. /// These values are not case-sensitive. If NULL, uses /// default parquet options - pub compression: Option, default = None + pub compression: Option, transform = str::to_lowercase, default = None /// Sets if statistics are enabled for the column /// Valid values are: "none", "chunk", and "page" diff --git a/datafusion/core/tests/config_from_env.rs b/datafusion/core/tests/config_from_env.rs index 76bde4a37d6d..976597c8a9ac 100644 --- a/datafusion/core/tests/config_from_env.rs +++ b/datafusion/core/tests/config_from_env.rs @@ -23,7 +23,7 @@ fn from_env() { // Note: these must be a single test to avoid interference from concurrent execution let env_key = "DATAFUSION_OPTIMIZER_FILTER_NULL_JOIN_KEYS"; // valid testing in different cases - for bool_option in ["true", "TRUE", "True"] { + for bool_option in ["true", "TRUE", "True", "tRUe"] { env::set_var(env_key, bool_option); let config = ConfigOptions::from_env().unwrap(); env::remove_var(env_key); From 64ebbd5db81e34bf600afc992e7a3f7ef20a8e32 Mon Sep 17 00:00:00 2001 From: blaginin Date: Wed, 11 Dec 2024 18:55:36 +0000 Subject: [PATCH 9/9] Comment to new line --- datafusion/common/src/config.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index bdaf550129c1..de5ea49771bd 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -223,8 +223,8 @@ config_namespace! { /// Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, /// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. - pub dialect: String, default = "generic".to_string() // no need to lowercase because - // [`sqlparser::dialect_from_str`] is case-insensitive + pub dialect: String, default = "generic".to_string() + // no need to lowercase because `sqlparser::dialect_from_str`] is case-insensitive /// If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but /// ignore the length. If false, error if a `VARCHAR` with a length is