From c2dca644379008dc3af17c86161235040c8f466c Mon Sep 17 00:00:00 2001 From: Edmondo Porcu Date: Fri, 19 Apr 2024 20:11:03 +0000 Subject: [PATCH] Adding a failed tests for statistics --- .../physical_plan/parquet/statistics.rs | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs index aac5aff80f16..61c79ff78439 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/statistics.rs @@ -571,6 +571,51 @@ mod test { ); } + #[test] + fn multiple_rows_with_struct() { + let struct_col = struct_array(vec![ + // row group 1 + (Some(true), Some(1)), + (None, None), + (Some(false), Some(3)), + ]); + + let expected_max = struct_array(vec![(Some(true), Some(3))]); + + let expected_min = struct_array(vec![(Some(false), Some(1))]); + + let input_batch = + RecordBatch::try_from_iter([("struct_col", struct_col.clone())]).unwrap(); + + let schema = input_batch.schema(); + + let metadata = parquet_metadata(schema.clone(), input_batch); + let parquet_schema = metadata.file_metadata().schema_descr(); + + // read the int_col statistics + let (idx, _) = parquet_column(parquet_schema, &schema, "struct_col").unwrap(); + assert_eq!(idx, 1); + + let row_groups = metadata.row_groups(); + let iter = row_groups.iter().map(|x| x.column(idx).statistics()); + + let min = min_statistics(&DataType::Int32, iter.clone()).unwrap(); + assert_eq!( + &min, + &expected_min, + "Min. Statistics\n\n{}\n\n", + DisplayStats(row_groups) + ); + + let max = max_statistics(&DataType::Int32, iter).unwrap(); + assert_eq!( + &max, + &expected_max, + "Max. Statistics\n\n{}\n\n", + DisplayStats(row_groups) + ); + } + #[test] fn nan_in_stats() { // /parquet-testing/data/nan_in_stats.parquet