diff --git a/be/src/exec/csv_scanner.cpp b/be/src/exec/csv_scanner.cpp index 43edf2e767a8d..0b4f594ccc8c7 100644 --- a/be/src/exec/csv_scanner.cpp +++ b/be/src/exec/csv_scanner.cpp @@ -65,7 +65,7 @@ static std::string make_column_count_not_matched_error_message_for_query(int exp << "Column separator: " << string_2_asc(parse_options.column_delimiter) << ", " << "Row delimiter: " << string_2_asc(parse_options.row_delimiter) << ", " << "Row: '" << row << "', File: " << filename << ". " - << "Consider setting 'fill_mismatch_column_with' = 'null'"; + << "Consider setting 'fill_mismatch_column_with' = 'null' property"; return error_msg.str(); } diff --git a/be/src/exec/orc_scanner.cpp b/be/src/exec/orc_scanner.cpp index c8e82221d9cfd..7a0b7cdb0d919 100644 --- a/be/src/exec/orc_scanner.cpp +++ b/be/src/exec/orc_scanner.cpp @@ -229,6 +229,9 @@ Status ORCScanner::_open_next_orc_reader() { if (st.is_end_of_file()) { LOG(WARNING) << "Failed to init orc reader. filename: " << file_name << ", status: " << st.to_string(); continue; + } else if (st.is_not_found() && + (_file_scan_type == TFileScanType::FILES_INSERT || _file_scan_type == TFileScanType::FILES_QUERY)) { + st = st.clone_and_append("Consider setting 'fill_mismatch_column_with' = 'null' property"); } return st; } diff --git a/be/src/exec/parquet_reader.cpp b/be/src/exec/parquet_reader.cpp index 8eeb01c16ff49..17c4cb51de756 100644 --- a/be/src/exec/parquet_reader.cpp +++ b/be/src/exec/parquet_reader.cpp @@ -252,7 +252,7 @@ Status ParquetReaderWrap::column_indices(const std::vector& tup std::stringstream str_error; str_error << "Column: " << slot_desc->col_name() << " is not found in file: " << _filename; LOG(WARNING) << str_error.str(); - return Status::InvalidArgument(str_error.str()); + return Status::NotFound(str_error.str()); } } return Status::OK(); diff --git a/be/src/exec/parquet_scanner.cpp b/be/src/exec/parquet_scanner.cpp index 9d5580f842046..06f97c7c90f11 100644 --- a/be/src/exec/parquet_scanner.cpp +++ b/be/src/exec/parquet_scanner.cpp @@ -441,6 +441,9 @@ Status ParquetScanner::next_batch() { _last_file_scan_bytes += incr_bytes; _state->update_num_bytes_scan_from_source(incr_bytes); } + } else if (status.is_not_found() && (_file_scan_type == TFileScanType::FILES_INSERT || + _file_scan_type == TFileScanType::FILES_QUERY)) { + status = status.clone_and_append("Consider setting 'fill_mismatch_column_with' = 'null' property"); } return status; } diff --git a/test/sql/test_files/R/test_csv_files_merge b/test/sql/test_files/R/test_csv_files_merge index 9b4d50e01a4ea..3102ad646a828 100644 --- a/test/sql/test_files/R/test_csv_files_merge +++ b/test/sql/test_files/R/test_csv_files_merge @@ -56,7 +56,7 @@ select * from files( "auto_detect_sample_files" = "1", "fill_mismatch_column_with" = "none"); -- result: -[REGEX].*Schema column count: 4 doesn't match source value column count: 3. Column separator: ',', Row delimiter: .*, Row: '4,Tom,30.4', File: .*basic0_column_mismatch.csv. Consider setting 'fill_mismatch_column_with' = 'null'.* +[REGEX].*Schema column count: 4 doesn't match source value column count: 3. Column separator: ',', Row delimiter: .*, Row: '4,Tom,30.4', File: .*basic0_column_mismatch.csv. Consider setting 'fill_mismatch_column_with' = 'null' property.* -- !result diff --git a/test/sql/test_files/R/test_orc_files_merge b/test/sql/test_files/R/test_orc_files_merge index 16ef31cfa8023..6dd41e4e5d703 100644 --- a/test/sql/test_files/R/test_orc_files_merge +++ b/test/sql/test_files/R/test_orc_files_merge @@ -115,4 +115,17 @@ None None None -- !result +select * from files( + "path" = "oss://${oss_bucket}/test_files/orc_format/${uuid0}/*", + "format" = "orc", + "fill_mismatch_column_with" = "none", + "auto_detect_sample_files" = "2", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +-- result: +[REGEX].*Column: k1 is not found in file: .*basic_type_k2k5k7.orc.* Consider setting 'fill_mismatch_column_with' = 'null' property.* +-- !result + + shell: ossutil64 rm -rf oss://${oss_bucket}/test_files/orc_format/${uuid0}/ > /dev/null diff --git a/test/sql/test_files/R/test_parquet_files_merge b/test/sql/test_files/R/test_parquet_files_merge index 9f11f6812716b..cbf49852d45b9 100644 --- a/test/sql/test_files/R/test_parquet_files_merge +++ b/test/sql/test_files/R/test_parquet_files_merge @@ -115,4 +115,17 @@ None None None -- !result +select * from files( + "path" = "oss://${oss_bucket}/test_files/parquet_format/${uuid0}/*", + "format" = "parquet", + "fill_mismatch_column_with" = "none", + "auto_detect_sample_files" = "2", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); +-- result: +[REGEX].*Column: k1 is not found in file: .*basic_type_k2k5k7.parquet.* Consider setting 'fill_mismatch_column_with' = 'null' property.* +-- !result + + shell: ossutil64 rm -rf oss://${oss_bucket}/test_files/parquet_format/${uuid0}/ > /dev/null diff --git a/test/sql/test_files/T/test_orc_files_merge b/test/sql/test_files/T/test_orc_files_merge index 9460e8cc9ef19..0b82c6d5e52e2 100644 --- a/test/sql/test_files/T/test_orc_files_merge +++ b/test/sql/test_files/T/test_orc_files_merge @@ -59,4 +59,14 @@ select k1, k3, k8 from files( "aws.s3.secret_key" = "${oss_sk}", "aws.s3.endpoint" = "${oss_endpoint}"); +-- column mismatch +select * from files( + "path" = "oss://${oss_bucket}/test_files/orc_format/${uuid0}/*", + "format" = "orc", + "fill_mismatch_column_with" = "none", + "auto_detect_sample_files" = "2", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); + shell: ossutil64 rm -rf oss://${oss_bucket}/test_files/orc_format/${uuid0}/ > /dev/null diff --git a/test/sql/test_files/T/test_parquet_files_merge b/test/sql/test_files/T/test_parquet_files_merge index 7727b6eee0aaa..5787e28d7cadd 100644 --- a/test/sql/test_files/T/test_parquet_files_merge +++ b/test/sql/test_files/T/test_parquet_files_merge @@ -59,4 +59,14 @@ select k1, k3, k8 from files( "aws.s3.secret_key" = "${oss_sk}", "aws.s3.endpoint" = "${oss_endpoint}"); +-- column mismatch +select * from files( + "path" = "oss://${oss_bucket}/test_files/parquet_format/${uuid0}/*", + "format" = "parquet", + "fill_mismatch_column_with" = "none", + "auto_detect_sample_files" = "2", + "aws.s3.access_key" = "${oss_ak}", + "aws.s3.secret_key" = "${oss_sk}", + "aws.s3.endpoint" = "${oss_endpoint}"); + shell: ossutil64 rm -rf oss://${oss_bucket}/test_files/parquet_format/${uuid0}/ > /dev/null