apache · blaginin · Nov 26, 2024 · Nov 26, 2024 · Nov 26, 2024 · Dec 1, 2024
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
@@ -110,7 +110,7 @@ macro_rules! config_namespace {
      $vis:vis struct $struct_name:ident {
         $(
         $(#[doc = $d:tt])*
-        $field_vis:vis $field_name:ident : $field_type:ty, default = $default:expr
+        $field_vis:vis $field_name:ident : $field_type:ty, $(transform = $tf:expr,)? default = $default:expr
         )*$(,)*
     }
     ) => {
@@ -127,9 +127,13 @@ macro_rules! config_namespace {
         impl ConfigField for $struct_name {
             fn set(&mut self, key: &str, value: &str) -> Result<()> {
                 let (key, rem) = key.split_once('.').unwrap_or((key, ""));
+
                 match key {
                     $(
-                       stringify!($field_name) => self.$field_name.set(rem, value),
+                       stringify!($field_name) => {
+                           $(let value = $tf(value);)?
+                           self.$field_name.set(rem, value.as_ref())
+                       },
                     )*
                     _ => return _config_err!(
                         "Config value \"{}\" not found on {}", key, stringify!($struct_name)
@@ -212,11 +216,11 @@ config_namespace! {
         pub enable_ident_normalization: bool, default = true
 
         /// When set to true, SQL parser will normalize options value (convert value to lowercase)
-        pub enable_options_value_normalization: bool, default = true
+        pub enable_options_value_normalization: bool, default = false
 
         /// Configure the SQL dialect used by DataFusion's parser; supported values include: Generic,
         /// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
-        pub dialect: String, default = "generic".to_string()
+        pub dialect: String, default = "generic".to_string() // no need to lowercase because this is handled by [`sqlparser::dialect_from_str`]
 
         /// If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but
         /// ignore the length. If false, error if a `VARCHAR` with a length is
@@ -257,7 +261,7 @@ config_namespace! {
         ///
         /// Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime
         /// according to this time zone, and then extract the hour
-        pub time_zone: Option<String>, default = Some("+00:00".into())
+        pub time_zone: Option<String>, default = Some("+00:00".into()) // todo: to_upper?
 
         /// Parquet options
         pub parquet: ParquetOptions, default = Default::default()
@@ -431,7 +435,7 @@ config_namespace! {
         ///
         /// Note that this default setting is not the same as
         /// the default parquet writer setting.
-        pub compression: Option<String>, default = Some("zstd(3)".into())
+        pub compression: Option<String>, transform = str::to_lowercase, default = Some("zstd(3)".into())
 
         /// (writing) Sets if dictionary encoding is enabled. If NULL, uses
         /// default parquet writer setting
@@ -444,7 +448,7 @@ config_namespace! {
         /// Valid values are: "none", "chunk", and "page"
         /// These values are not case sensitive. If NULL, uses
         /// default parquet writer setting
-        pub statistics_enabled: Option<String>, default = Some("page".into())
+        pub statistics_enabled: Option<String>, transform = str::to_lowercase, default = Some("page".into())
 
         /// (writing) Sets max statistics size for any column. If NULL, uses
         /// default parquet writer setting
@@ -470,7 +474,7 @@ config_namespace! {
         /// delta_byte_array, rle_dictionary, and byte_stream_split.
         /// These values are not case sensitive. If NULL, uses
         /// default parquet writer setting
-        pub encoding: Option<String>, default = None
+        pub encoding: Option<String>, transform = str::to_lowercase, default = None
 
         /// (writing) Use any available bloom filters when reading parquet files
         pub bloom_filter_on_read: bool, default = true
@@ -973,27 +977,46 @@ impl<F: ConfigField + Default> ConfigField for Option<F> {
 
 #[macro_export]
 macro_rules! config_field {
-    ($t:ty) => {
+    ($t:ty, $($transform:expr)? $(,)?) => {
         impl ConfigField for $t {
             fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
-                v.some(key, self, description)
+                v.some(key, self, description);
             }
 
             fn set(&mut self, _: &str, value: &str) -> Result<()> {
+                // Apply all transformations if provided
+                let value = {
+                    let current_value = value.to_string();
+                    $(
+                        let current_value = $transform(&current_value);
+                    )?
+                    current_value
+                };
+
+                // Parse the final value into the target type
                 *self = value.parse().map_err(|e| {
                     DataFusionError::Context(
-                        format!(concat!("Error parsing {} as ", stringify!($t),), value),
+                        format!(
+                            "Error parsing '{}' as {}",
+                            value,
+                            stringify!($t),
+                        ),
                         Box::new(DataFusionError::External(Box::new(e))),
                     )
                 })?;
+
                 Ok(())
             }
         }
     };
+
+    ($t:ty) => {
+        config_field!($t,);
+    };
 }
 
 config_field!(String);
-config_field!(bool);
+config_field!(bool, str::to_lowercase);
 config_field!(usize);
 config_field!(f64);
 config_field!(u64);

diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs
@@ -62,7 +62,7 @@ impl TableProviderFactory for StreamTableFactory {
         let header = if let Ok(opt) = cmd
             .options
             .get("format.has_header")
-            .map(|has_header| bool::from_str(has_header))
+            .map(|has_header| bool::from_str(has_header.to_lowercase().as_str()))
             .transpose()
         {
             opt.unwrap_or(false)

diff --git a/datafusion/core/tests/config_from_env.rs b/datafusion/core/tests/config_from_env.rs
@@ -21,11 +21,21 @@ use std::env;
 #[test]
 fn from_env() {
     // Note: these must be a single test to avoid interference from concurrent execution
+
     let env_key = "DATAFUSION_OPTIMIZER_FILTER_NULL_JOIN_KEYS";
-    env::set_var(env_key, "true");
-    let config = ConfigOptions::from_env().unwrap();
+    // valid testing
+    for bool_option in ["true", "TRUE", "True"] {
+        env::set_var(env_key, bool_option);
+        let config = ConfigOptions::from_env().unwrap();
+        env::remove_var(env_key);
+        assert!(config.optimizer.filter_null_join_keys);
+    }
+
+    // invalid testing
+    env::set_var(env_key, "ttruee");
+    let err = ConfigOptions::from_env().unwrap_err().strip_backtrace();
+    assert_eq!(err, "Error parsing 'ttruee' as bool\ncaused by\nExternal error: provided string was not `true` or `false`");
     env::remove_var(env_key);
-    assert!(config.optimizer.filter_null_join_keys);
 
     let env_key = "DATAFUSION_EXECUTION_BATCH_SIZE";
 
@@ -37,7 +47,7 @@ fn from_env() {
     // for invalid testing
     env::set_var(env_key, "abc");
     let err = ConfigOptions::from_env().unwrap_err().strip_backtrace();
-    assert_eq!(err, "Error parsing abc as usize\ncaused by\nExternal error: invalid digit found in string");
+    assert_eq!(err, "Error parsing 'abc' as usize\ncaused by\nExternal error: invalid digit found in string");
 
     env::remove_var(env_key);
     let config = ConfigOptions::from_env().unwrap();

diff --git a/datafusion/core/tests/data/dates.csv b/datafusion/core/tests/data/dates.csv
@@ -0,0 +1,4 @@
+date
+03/12/2024
+31/12/2024
+
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
@@ -56,7 +56,7 @@ impl Default for ParserOptions {
             parse_float_as_decimal: false,
             enable_ident_normalization: true,
             support_varchar_with_length: true,
-            enable_options_value_normalization: true,
+            enable_options_value_normalization: false,
         }
     }
 }

diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt b/datafusion/sqllogictest/test_files/create_external_table.slt
@@ -226,6 +226,20 @@ OPTIONS (
         has_header false,
         compression gzip);
 
+# Verify that some options are case insensitive
+statement ok
+CREATE EXTERNAL TABLE IF NOT EXISTS region (
+        r_regionkey BIGINT,
+        r_name VARCHAR,
+        r_comment VARCHAR,
+        r_rev VARCHAR,
+) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl'
+OPTIONS (
+        format.delimiter '|',
+        has_header FALSE,
+        compression GZIP);
+
+
 # Create an external parquet table and infer schema to order by
 
 # query should succeed

diff --git a/datafusion/sqllogictest/test_files/csv_files.slt b/datafusion/sqllogictest/test_files/csv_files.slt
@@ -362,3 +362,15 @@ OPTIONS ('format.terminator' E'\r', 'format.has_header' 'true');
 
 statement ok
 drop table stored_table_with_cr_terminator;
+
+
+# check that datetime_format is case sensitive
+statement ok
+CREATE EXTERNAL TABLE dates (
+    date date
+) STORED AS CSV
+LOCATION '../core/tests/data/dates.csv'
+OPTIONS ('format.date_format' 'dd/MM/yyyy', 'format.has_header' 'TRUE');
+
+statement ok
+drop table dates;
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -258,7 +258,7 @@ datafusion.optimizer.skip_failed_rules false
 datafusion.optimizer.top_down_join_key_reordering true
 datafusion.sql_parser.dialect generic
 datafusion.sql_parser.enable_ident_normalization true
-datafusion.sql_parser.enable_options_value_normalization true
+datafusion.sql_parser.enable_options_value_normalization false
 datafusion.sql_parser.parse_float_as_decimal false
 datafusion.sql_parser.support_varchar_with_length true
 
@@ -351,7 +351,7 @@ datafusion.optimizer.skip_failed_rules false When set to true, the logical plan
 datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys
 datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
 datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
-datafusion.sql_parser.enable_options_value_normalization true When set to true, SQL parser will normalize options value (convert value to lowercase)
+datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase)
 datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type
 datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.
 

diff --git a/datafusion/sqllogictest/test_files/set_variable.slt b/datafusion/sqllogictest/test_files/set_variable.slt
@@ -93,10 +93,10 @@ datafusion.execution.coalesce_batches false
 statement ok
 set datafusion.catalog.information_schema = true
 
-statement error DataFusion error: Error parsing 1 as bool
+statement error DataFusion error: Error parsing '1' as bool
 SET datafusion.execution.coalesce_batches to 1
 
-statement error DataFusion error: Error parsing abc as bool
+statement error DataFusion error: Error parsing 'abc' as bool
 SET datafusion.execution.coalesce_batches to abc
 
 # set u64 variable
@@ -132,10 +132,10 @@ datafusion.execution.batch_size 2
 statement ok
 set datafusion.catalog.information_schema = true
 
-statement error DataFusion error: Error parsing -1 as usize
+statement error DataFusion error: Error parsing '-1' as usize
 SET datafusion.execution.batch_size to -1
 
-statement error DataFusion error: Error parsing abc as usize
+statement error DataFusion error: Error parsing 'abc' as usize
 SET datafusion.execution.batch_size to abc
 
 statement error External error: invalid digit found in string

diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
@@ -122,6 +122,6 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.explain.show_schema                                          | false                     | When set to true, the explain statement will print schema information                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.sql_parser.parse_float_as_decimal                            | false                     | When set to true, SQL parser will parse float as decimal type                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
 | datafusion.sql_parser.enable_ident_normalization                        | true                      | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
-| datafusion.sql_parser.enable_options_value_normalization                | true                      | When set to true, SQL parser will normalize options value (convert value to lowercase)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| datafusion.sql_parser.enable_options_value_normalization                | false                     | When set to true, SQL parser will normalize options value (convert value to lowercase)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | datafusion.sql_parser.dialect                                           | generic                   | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.                                                                                                                                                                                                                                                                                                                                                                                   |
 | datafusion.sql_parser.support_varchar_with_length                       | true                      | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits.                                                                                                                                                                                                                                                                                                    |