From 5f662cee0ab65084a5037c352e4630a7160c2fb6 Mon Sep 17 00:00:00 2001 From: wyb Date: Tue, 5 Nov 2024 15:11:39 +0800 Subject: [PATCH] [Enhancement] Support merge invalid columns with null in files() Signed-off-by: wyb --- be/src/exec/orc_scanner.cpp | 3 + be/src/exec/parquet_reader.cpp | 2 +- be/src/exec/parquet_reader.h | 3 + be/src/exec/parquet_scanner.cpp | 26 ++-- be/src/formats/orc/orc_chunk_reader.cpp | 3 + test/sql/test_files/R/test_orc_files_merge | 115 ++++++++++++++++++ .../sql/test_files/R/test_parquet_files_merge | 115 ++++++++++++++++++ test/sql/test_files/T/test_orc_files_merge | 59 +++++++++ .../sql/test_files/T/test_parquet_files_merge | 59 +++++++++ test/sql/test_files/orc_format/basic_type.orc | Bin 0 -> 1027 bytes .../orc_format/basic_type_k2k5k7.orc | Bin 0 -> 434 bytes .../parquet_format/basic_type_k2k5k7.parquet | Bin 0 -> 836 bytes 12 files changed, 376 insertions(+), 9 deletions(-) create mode 100644 test/sql/test_files/R/test_orc_files_merge create mode 100644 test/sql/test_files/R/test_parquet_files_merge create mode 100644 test/sql/test_files/T/test_orc_files_merge create mode 100644 test/sql/test_files/T/test_parquet_files_merge create mode 100644 test/sql/test_files/orc_format/basic_type.orc create mode 100644 test/sql/test_files/orc_format/basic_type_k2k5k7.orc create mode 100644 test/sql/test_files/parquet_format/basic_type_k2k5k7.parquet diff --git a/be/src/exec/orc_scanner.cpp b/be/src/exec/orc_scanner.cpp index 080a1ce860459..c8e82221d9cfd 100644 --- a/be/src/exec/orc_scanner.cpp +++ b/be/src/exec/orc_scanner.cpp @@ -86,6 +86,9 @@ Status ORCScanner::open() { RETURN_IF_ERROR(_orc_reader->set_timezone(_state->timezone())); _orc_reader->set_runtime_state(_state); _orc_reader->set_case_sensitive(_case_sensitive); + if (_scan_range.params.__isset.flexible_column_mapping && _scan_range.params.flexible_column_mapping) { + _orc_reader->set_invalid_as_null(true); + } RETURN_IF_ERROR(_open_next_orc_reader()); return Status::OK(); diff --git a/be/src/exec/parquet_reader.cpp b/be/src/exec/parquet_reader.cpp index ec12c8842cc7d..8eeb01c16ff49 100644 --- a/be/src/exec/parquet_reader.cpp +++ b/be/src/exec/parquet_reader.cpp @@ -248,7 +248,7 @@ Status ParquetReaderWrap::column_indices(const std::vector& tup for (auto index : iter->second) { _parquet_column_ids.emplace_back(index); } - } else { + } else if (!_invalid_as_null) { std::stringstream str_error; str_error << "Column: " << slot_desc->col_name() << " is not found in file: " << _filename; LOG(WARNING) << str_error.str(); diff --git a/be/src/exec/parquet_reader.h b/be/src/exec/parquet_reader.h index df23659510cc8..9b480b66b8a4f 100644 --- a/be/src/exec/parquet_reader.h +++ b/be/src/exec/parquet_reader.h @@ -74,6 +74,7 @@ class ParquetReaderWrap { int64_t num_rows() { return _num_rows; } Status get_schema(std::vector* schema); + void set_invalid_as_null(bool invalid_as_null) { _invalid_as_null = invalid_as_null; } private: Status column_indices(const std::vector& tuple_slot_descs); @@ -107,6 +108,8 @@ class ParquetReaderWrap { int64_t _read_size; std::string _filename; + + bool _invalid_as_null{false}; }; // Reader of broker parquet file diff --git a/be/src/exec/parquet_scanner.cpp b/be/src/exec/parquet_scanner.cpp index 500cba44b14b1..9d5580f842046 100644 --- a/be/src/exec/parquet_scanner.cpp +++ b/be/src/exec/parquet_scanner.cpp @@ -77,17 +77,21 @@ Status ParquetScanner::initialize_src_chunk(ChunkPtr* chunk) { SCOPED_RAW_TIMER(&_counter->init_chunk_ns); _pool.clear(); (*chunk) = std::make_shared(); - size_t column_pos = 0; _chunk_filter.clear(); for (auto i = 0; i < _num_of_columns_from_file; ++i) { SlotDescriptor* slot_desc = _src_slot_descriptors[i]; if (slot_desc == nullptr) { continue; } - auto* array = _batch->column(column_pos++).get(); ColumnPtr column; - RETURN_IF_ERROR(new_column(array->type().get(), slot_desc, &column, _conv_funcs[i].get(), &_cast_exprs[i], - _pool, _strict_mode)); + auto array_ptr = _batch->GetColumnByName(slot_desc->col_name()); + if (array_ptr == nullptr) { + _cast_exprs[i] = _pool.add(new ColumnRef(slot_desc)); + column = ColumnHelper::create_column(slot_desc->type(), slot_desc->is_nullable()); + } else { + RETURN_IF_ERROR(new_column(array_ptr->type().get(), slot_desc, &column, _conv_funcs[i].get(), + &_cast_exprs[i], _pool, _strict_mode)); + } column->reserve(_max_chunk_size); (*chunk)->append_column(column, slot_desc->id()); } @@ -98,7 +102,6 @@ Status ParquetScanner::append_batch_to_src_chunk(ChunkPtr* chunk) { SCOPED_RAW_TIMER(&_counter->fill_ns); size_t num_elements = std::min((_max_chunk_size - _chunk_start_idx), (_batch->num_rows() - _batch_start_idx)); - size_t column_pos = 0; _chunk_filter.resize(_chunk_filter.size() + num_elements, 1); for (auto i = 0; i < _num_of_columns_from_file; ++i) { SlotDescriptor* slot_desc = _src_slot_descriptors[i]; @@ -106,10 +109,14 @@ Status ParquetScanner::append_batch_to_src_chunk(ChunkPtr* chunk) { continue; } _conv_ctx.current_slot = slot_desc; - auto* array = _batch->column(column_pos++).get(); auto& column = (*chunk)->get_column_by_slot_id(slot_desc->id()); - RETURN_IF_ERROR(convert_array_to_column(_conv_funcs[i].get(), num_elements, array, column, _batch_start_idx, - _chunk_start_idx, &_chunk_filter, &_conv_ctx)); + auto array_ptr = _batch->GetColumnByName(slot_desc->col_name()); + if (array_ptr == nullptr) { + (void)column->append_nulls(_batch->num_rows()); + } else { + RETURN_IF_ERROR(convert_array_to_column(_conv_funcs[i].get(), num_elements, array_ptr.get(), column, + _batch_start_idx, _chunk_start_idx, &_chunk_filter, &_conv_ctx)); + } } _chunk_start_idx += num_elements; @@ -460,6 +467,9 @@ Status ParquetScanner::open_next_reader() { auto parquet_file = std::make_shared(file, 0, _counter); auto parquet_reader = std::make_shared(std::move(parquet_file), _num_of_columns_from_file, range_desc.start_offset, range_desc.size); + if (_scan_range.params.__isset.flexible_column_mapping && _scan_range.params.flexible_column_mapping) { + parquet_reader->set_invalid_as_null(true); + } _next_file++; int64_t file_size; RETURN_IF_ERROR(parquet_reader->size(&file_size)); diff --git a/be/src/formats/orc/orc_chunk_reader.cpp b/be/src/formats/orc/orc_chunk_reader.cpp index 4c3559929311d..8b1832ff20360 100644 --- a/be/src/formats/orc/orc_chunk_reader.cpp +++ b/be/src/formats/orc/orc_chunk_reader.cpp @@ -190,6 +190,9 @@ Status OrcChunkReader::init(std::unique_ptr reader, const OrcPredic return Status::InternalError(s); } + // _batch can't be reused because the schema between files may be different + _batch.reset(); + // TODO(SmithCruise) delete _init_position_in_orc() when develop subfield lazy load. RETURN_IF_ERROR(_init_position_in_orc()); RETURN_IF_ERROR(_init_cast_exprs()); diff --git a/test/sql/test_files/R/test_orc_files_merge b/test/sql/test_files/R/test_orc_files_merge new file mode 100644 index 0000000000000..94ee12f2ecaee --- /dev/null +++ b/test/sql/test_files/R/test_orc_files_merge @@ -0,0 +1,115 @@ +-- name: test_orc_files_merge + +create database db_${uuid0}; +use db_${uuid0}; + +shell: ossutil64 mkdir oss://${oss_bucket}/test_files/orc_format/${uuid0} >/dev/null || echo "exit 0" >/dev/null + +shell: ossutil64 cp --force ./sql/test_files/orc_format/basic_type.orc oss://${oss_bucket}/test_files/orc_format/${uuid0}/ | grep -Pv "(average|elapsed)" +-- result: +0 + +Succeed: Total num: 1, size: 1,027. OK num: 1(upload 1 files). +-- !result + +shell: ossutil64 cp --force ./sql/test_files/orc_format/basic_type_k2k5k7.orc oss://${oss_bucket}/test_files/orc_format/${uuid0}/ | grep -Pv "(average|elapsed)" +-- result: +0 + +Succeed: Total num: 1, size: 434. OK num: 1(upload 1 files). +-- !result + + +desc files( + "path" = "oss://${oss_bucket}/test_files/orc_format/${uuid0}/basic_type.orc", + "format" = "orc", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +-- result: +k1 boolean YES +k2 int YES +k3 bigint YES +k4 decimal(10,2) YES +k5 date YES +k6 datetime YES +k7 varchar(1048576) YES +k8 double YES +-- !result + +desc files( + "path" = "oss://${oss_bucket}/test_files/orc_format/${uuid0}/basic_type_k2k5k7.orc", + "format" = "orc", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +-- result: +k2 int YES +k5 date YES +k7 varchar(1048576) YES +-- !result + + +select * from files( + "path" = "oss://${oss_bucket}/test_files/orc_format/${uuid0}/basic_type.orc", + "format" = "orc", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +-- result: +0 1 2 3.20 2024-10-01 2024-10-01 12:12:12 a 4.3 +1 11 12 13.20 2024-10-02 2024-10-02 13:13:13 b 14.3 +-- !result + +select * from files( + "path" = "oss://${oss_bucket}/test_files/orc_format/${uuid0}/basic_type_k2k5k7.orc", + "format" = "orc", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +-- result: +21 2024-10-03 c +-- !result + + +select * from files( + "path" = "oss://${oss_bucket}/test_files/orc_format/${uuid0}/*", + "format" = "orc", + "auto_detect_sample_files" = "2", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +-- result: +None 21 None None 2024-10-03 None c None +0 1 2 3.20 2024-10-01 2024-10-01 12:12:12 a 4.3 +1 11 12 13.20 2024-10-02 2024-10-02 13:13:13 b 14.3 +-- !result + +select k2, k5, k7 from files( + "path" = "oss://${oss_bucket}/test_files/orc_format/${uuid0}/*", + "format" = "orc", + "auto_detect_sample_files" = "2", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +-- result: +21 2024-10-03 c +1 2024-10-01 a +11 2024-10-02 b +-- !result + +select k1, k3, k8 from files( + "path" = "oss://${oss_bucket}/test_files/orc_format/${uuid0}/*", + "format" = "orc", + "auto_detect_sample_files" = "2", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +-- result: +None None None +0 2 4.3 +1 12 14.3 +-- !result + + +shell: ossutil64 rm -rf oss://${oss_bucket}/test_files/orc_format/${uuid0}/ > /dev/null diff --git a/test/sql/test_files/R/test_parquet_files_merge b/test/sql/test_files/R/test_parquet_files_merge new file mode 100644 index 0000000000000..bdcda52a19c0e --- /dev/null +++ b/test/sql/test_files/R/test_parquet_files_merge @@ -0,0 +1,115 @@ +-- name: test_parquet_files_merge + +create database db_${uuid0}; +use db_${uuid0}; + +shell: ossutil64 mkdir oss://${oss_bucket}/test_files/parquet_format/${uuid0} >/dev/null || echo "exit 0" >/dev/null + +shell: ossutil64 cp --force ./sql/test_files/parquet_format/basic_type.parquet oss://${oss_bucket}/test_files/parquet_format/${uuid0}/ | grep -Pv "(average|elapsed)" +-- result: +0 + +Succeed: Total num: 1, size: 2,281. OK num: 1(upload 1 files). +-- !result + +shell: ossutil64 cp --force ./sql/test_files/parquet_format/basic_type_k2k5k7.parquet oss://${oss_bucket}/test_files/parquet_format/${uuid0}/ | grep -Pv "(average|elapsed)" +-- result: +0 + +Succeed: Total num: 1, size: 836. OK num: 1(upload 1 files). +-- !result + + +desc files( + "path" = "oss://${oss_bucket}/test_files/parquet_format/${uuid0}/basic_type.parquet", + "format" = "parquet", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +-- result: +k1 boolean YES +k2 int YES +k3 bigint YES +k4 decimal(10,2) YES +k5 date YES +k6 datetime YES +k7 varchar(1048576) YES +k8 double YES +-- !result + +desc files( + "path" = "oss://${oss_bucket}/test_files/parquet_format/${uuid0}/basic_type_k2k5k7.parquet", + "format" = "parquet", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +-- result: +k2 int YES +k5 date YES +k7 varchar(1048576) YES +-- !result + + +select * from files( + "path" = "oss://${oss_bucket}/test_files/parquet_format/${uuid0}/basic_type.parquet", + "format" = "parquet", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +-- result: +0 1 2 3.20 2024-10-01 2024-10-01 12:12:12 a 4.3 +1 11 12 13.20 2024-10-02 2024-10-02 13:13:13 b 14.3 +-- !result + +select * from files( + "path" = "oss://${oss_bucket}/test_files/parquet_format/${uuid0}/basic_type_k2k5k7.parquet", + "format" = "parquet", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +-- result: +21 2024-10-03 c +-- !result + + +select * from files( + "path" = "oss://${oss_bucket}/test_files/parquet_format/${uuid0}/*", + "format" = "parquet", + "auto_detect_sample_files" = "2", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +-- result: +None 21 None None 2024-10-03 None c None +0 1 2 3.20 2024-10-01 2024-10-01 12:12:12 a 4.3 +1 11 12 13.20 2024-10-02 2024-10-02 13:13:13 b 14.3 +-- !result + +select k2, k5, k7 from files( + "path" = "oss://${oss_bucket}/test_files/parquet_format/${uuid0}/*", + "format" = "parquet", + "auto_detect_sample_files" = "2", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +-- result: +21 2024-10-03 c +1 2024-10-01 a +11 2024-10-02 b +-- !result + +select k1, k3, k8 from files( + "path" = "oss://${oss_bucket}/test_files/parquet_format/${uuid0}/*", + "format" = "parquet", + "auto_detect_sample_files" = "2", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +-- result: +None None None +0 2 4.3 +1 12 14.3 +-- !result + + +shell: ossutil64 rm -rf oss://${oss_bucket}/test_files/parquet_format/${uuid0}/ > /dev/null diff --git a/test/sql/test_files/T/test_orc_files_merge b/test/sql/test_files/T/test_orc_files_merge new file mode 100644 index 0000000000000..7994c344c5439 --- /dev/null +++ b/test/sql/test_files/T/test_orc_files_merge @@ -0,0 +1,59 @@ +-- name: test_orc_files_merge + +create database db_${uuid0}; +use db_${uuid0}; + +shell: ossutil64 mkdir oss://${oss_bucket}/test_files/orc_format/${uuid0} >/dev/null || echo "exit 0" >/dev/null + +shell: ossutil64 cp --force ./sql/test_files/orc_format/basic_type.orc oss://${oss_bucket}/test_files/orc_format/${uuid0}/ | grep -Pv "(average|elapsed)" +shell: ossutil64 cp --force ./sql/test_files/orc_format/basic_type_k2k5k7.orc oss://${oss_bucket}/test_files/orc_format/${uuid0}/ | grep -Pv "(average|elapsed)" + +desc files( + "path" = "oss://${oss_bucket}/test_files/orc_format/${uuid0}/basic_type.orc", + "format" = "orc", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +desc files( + "path" = "oss://${oss_bucket}/test_files/orc_format/${uuid0}/basic_type_k2k5k7.orc", + "format" = "orc", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); + +select * from files( + "path" = "oss://${oss_bucket}/test_files/orc_format/${uuid0}/basic_type.orc", + "format" = "orc", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +select * from files( + "path" = "oss://${oss_bucket}/test_files/orc_format/${uuid0}/basic_type_k2k5k7.orc", + "format" = "orc", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); + +select * from files( + "path" = "oss://${oss_bucket}/test_files/orc_format/${uuid0}/*", + "format" = "orc", + "auto_detect_sample_files" = "2", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +select k2, k5, k7 from files( + "path" = "oss://${oss_bucket}/test_files/orc_format/${uuid0}/*", + "format" = "orc", + "auto_detect_sample_files" = "2", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +select k1, k3, k8 from files( + "path" = "oss://${oss_bucket}/test_files/orc_format/${uuid0}/*", + "format" = "orc", + "auto_detect_sample_files" = "2", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); + +shell: ossutil64 rm -rf oss://${oss_bucket}/test_files/orc_format/${uuid0}/ > /dev/null diff --git a/test/sql/test_files/T/test_parquet_files_merge b/test/sql/test_files/T/test_parquet_files_merge new file mode 100644 index 0000000000000..c3e9d78ef4a4c --- /dev/null +++ b/test/sql/test_files/T/test_parquet_files_merge @@ -0,0 +1,59 @@ +-- name: test_parquet_files_merge + +create database db_${uuid0}; +use db_${uuid0}; + +shell: ossutil64 mkdir oss://${oss_bucket}/test_files/parquet_format/${uuid0} >/dev/null || echo "exit 0" >/dev/null + +shell: ossutil64 cp --force ./sql/test_files/parquet_format/basic_type.parquet oss://${oss_bucket}/test_files/parquet_format/${uuid0}/ | grep -Pv "(average|elapsed)" +shell: ossutil64 cp --force ./sql/test_files/parquet_format/basic_type_k2k5k7.parquet oss://${oss_bucket}/test_files/parquet_format/${uuid0}/ | grep -Pv "(average|elapsed)" + +desc files( + "path" = "oss://${oss_bucket}/test_files/parquet_format/${uuid0}/basic_type.parquet", + "format" = "parquet", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +desc files( + "path" = "oss://${oss_bucket}/test_files/parquet_format/${uuid0}/basic_type_k2k5k7.parquet", + "format" = "parquet", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); + +select * from files( + "path" = "oss://${oss_bucket}/test_files/parquet_format/${uuid0}/basic_type.parquet", + "format" = "parquet", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +select * from files( + "path" = "oss://${oss_bucket}/test_files/parquet_format/${uuid0}/basic_type_k2k5k7.parquet", + "format" = "parquet", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); + +select * from files( + "path" = "oss://${oss_bucket}/test_files/parquet_format/${uuid0}/*", + "format" = "parquet", + "auto_detect_sample_files" = "2", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +select k2, k5, k7 from files( + "path" = "oss://${oss_bucket}/test_files/parquet_format/${uuid0}/*", + "format" = "parquet", + "auto_detect_sample_files" = "2", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +select k1, k3, k8 from files( + "path" = "oss://${oss_bucket}/test_files/parquet_format/${uuid0}/*", + "format" = "parquet", + "auto_detect_sample_files" = "2", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); + +shell: ossutil64 rm -rf oss://${oss_bucket}/test_files/parquet_format/${uuid0}/ > /dev/null diff --git a/test/sql/test_files/orc_format/basic_type.orc b/test/sql/test_files/orc_format/basic_type.orc new file mode 100644 index 0000000000000000000000000000000000000000..c4dfdc74b1cfb8a3e2cadaa3fe4b11dd29b13830 GIT binary patch literal 1027 zcmah|&ubGw7@c3c`(+GuGQ{{bio{SP6b)(8)OPW(P*D*~OYtnEHiuAbF)8&L{0qEz zP>X+ppr;}pdemDLJb4uH9}w|YzszpdHd;II&3p4^<{Nl#cWGr&`&NNiX4+`U7_%7= z(^7Lziq(RIEC}m^Fa^XhO&P@Vq+xtB2cmvT%SN?s)eOzFlXo2^K!gBqcZGSkJACPa zR`e3H1rWCl?mhi^@P4hd|Lgnb4{NpIyJzBlbgc}Uw=G{qMe-%hnY{?2crMd$BNyeK zKO2tA(PT{O(K0)TT3i%bT;agm&)?wJ@9~{*=h`-(di7?0>h;4<5oZzKYp;%)jCyDv ze8UhNakzO)$PGu~P@r-s6I2d8qfk2CL*Jo~n$R=8 z(}f9={UGTBXecLUSUL;G|3B0DqP%(Mu71IFobsV~j|LmBMv?v_%kdPCOK&{3lM@4J zzR6;i;3LO{(idJ4X?B_m zvo#Sqcf&vKhDY2@R*Ade-|mKE?uN&>J3M#zrwC7WH+s^##o;S`C^#6XvS8WDOJ4II L|B!NNWpVO16%C`3 literal 0 HcmV?d00001 diff --git a/test/sql/test_files/orc_format/basic_type_k2k5k7.orc b/test/sql/test_files/orc_format/basic_type_k2k5k7.orc new file mode 100644 index 0000000000000000000000000000000000000000..389164a16b23d7c9b1f731d3cd42fe9ee7a060bd GIT binary patch literal 434 zcmZ9I!AiqG5Qb-VLmbwi%TjgCspJ#_mLN^V9Hh{Lda#w^RiT6&La~LU=r#BRK7j8a z-aPqKK7eB?#pW>c&HT*aVr-ehP7jm> zKIJ$dg_*(3ay*9U0p2c#P-n^|SA(U7XgdvsJ!~lL5fe39*-%)ADYX3CrA0OZ4cx_c z_vPiyo$9N#@V07OvSmk7f1s}z*LJsiA7XG!o{=b|FXpFQ3D;Q{LWm(ClACFNGwrOW zC{(su&B}6_&8yyXS*-H1_mr1av0NDMaaJr&26>Vv=lvMd#x&hBOzfrKm?pkOCmkFQq6|5G6qtSpX;^ z8U})MVG}ABtb|Hq_n3f@o7{8?PSj4DIyPJx@@pQbf0FUzgX!7Xk%Q`7m{v#XFd{Iub3z^Eju zuoDX-U1NYy))y3cUi3;fd?mRS=WH%eMFK$CJj3O7YKB`fmFGe_GHGA-0vQlt4)r{8 zS?_5-lhpkU6*m|6dfJW857ctt+2S%PTh(_pOn$cLfL|h10%v!l;x~(!Kj&HhI|u&I zIUX&_0zV+56BTm+kgpBfqwAYsQuK$zVtX_i++}Wq(YQ0{XUf%5xpb5nPqgfJ$Hl95 Yr(diFZs49&R9)H^K5#EzPecBnA627Zz5oCK literal 0 HcmV?d00001