Skip to content

Commit

Permalink
[Enhancement] Improve parquet orc files() column mismatch error message
Browse files Browse the repository at this point in the history
Signed-off-by: wyb <[email protected]>
  • Loading branch information
wyb committed Dec 31, 2024
1 parent fbc725c commit 8294d35
Show file tree
Hide file tree
Showing 9 changed files with 55 additions and 3 deletions.
2 changes: 1 addition & 1 deletion be/src/exec/csv_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ static std::string make_column_count_not_matched_error_message_for_query(int exp
<< "Column separator: " << string_2_asc(parse_options.column_delimiter) << ", "
<< "Row delimiter: " << string_2_asc(parse_options.row_delimiter) << ", "
<< "Row: '" << row << "', File: " << filename << ". "
<< "Consider setting 'fill_mismatch_column_with' = 'null'";
<< "Consider setting 'fill_mismatch_column_with' = 'null' property";
return error_msg.str();
}

Expand Down
3 changes: 3 additions & 0 deletions be/src/exec/orc_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,9 @@ Status ORCScanner::_open_next_orc_reader() {
if (st.is_end_of_file()) {
LOG(WARNING) << "Failed to init orc reader. filename: " << file_name << ", status: " << st.to_string();
continue;
} else if (st.is_not_found() &&
(_file_scan_type == TFileScanType::FILES_INSERT || _file_scan_type == TFileScanType::FILES_QUERY)) {
st = st.clone_and_append("Consider setting 'fill_mismatch_column_with' = 'null' property");
}
return st;
}
Expand Down
2 changes: 1 addition & 1 deletion be/src/exec/parquet_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ Status ParquetReaderWrap::column_indices(const std::vector<SlotDescriptor*>& tup
std::stringstream str_error;
str_error << "Column: " << slot_desc->col_name() << " is not found in file: " << _filename;
LOG(WARNING) << str_error.str();
return Status::InvalidArgument(str_error.str());
return Status::NotFound(str_error.str());
}
}
return Status::OK();
Expand Down
3 changes: 3 additions & 0 deletions be/src/exec/parquet_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,9 @@ Status ParquetScanner::next_batch() {
_last_file_scan_bytes += incr_bytes;
_state->update_num_bytes_scan_from_source(incr_bytes);
}
} else if (status.is_not_found() && (_file_scan_type == TFileScanType::FILES_INSERT ||
_file_scan_type == TFileScanType::FILES_QUERY)) {
status = status.clone_and_append("Consider setting 'fill_mismatch_column_with' = 'null' property");
}
return status;
}
Expand Down
2 changes: 1 addition & 1 deletion test/sql/test_files/R/test_csv_files_merge
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ select * from files(
"auto_detect_sample_files" = "1",
"fill_mismatch_column_with" = "none");
-- result:
[REGEX].*Schema column count: 4 doesn't match source value column count: 3. Column separator: ',', Row delimiter: .*, Row: '4,Tom,30.4', File: .*basic0_column_mismatch.csv. Consider setting 'fill_mismatch_column_with' = 'null'.*
[REGEX].*Schema column count: 4 doesn't match source value column count: 3. Column separator: ',', Row delimiter: .*, Row: '4,Tom,30.4', File: .*basic0_column_mismatch.csv. Consider setting 'fill_mismatch_column_with' = 'null' property.*
-- !result


Expand Down
13 changes: 13 additions & 0 deletions test/sql/test_files/R/test_orc_files_merge
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,17 @@ None None None
-- !result


select * from files(
"path" = "oss://${oss_bucket}/test_files/orc_format/${uuid0}/*",
"format" = "orc",
"fill_mismatch_column_with" = "none",
"auto_detect_sample_files" = "2",
"aws.s3.access_key" = "${oss_ak}",
"aws.s3.secret_key" = "${oss_sk}",
"aws.s3.endpoint" = "${oss_endpoint}");
-- result:
[REGEX].*Column: k1 is not found in file: .*basic_type_k2k5k7.orc.* Consider setting 'fill_mismatch_column_with' = 'null' property.*
-- !result


shell: ossutil64 rm -rf oss://${oss_bucket}/test_files/orc_format/${uuid0}/ > /dev/null
13 changes: 13 additions & 0 deletions test/sql/test_files/R/test_parquet_files_merge
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,17 @@ None None None
-- !result


select * from files(
"path" = "oss://${oss_bucket}/test_files/parquet_format/${uuid0}/*",
"format" = "parquet",
"fill_mismatch_column_with" = "none",
"auto_detect_sample_files" = "2",
"aws.s3.access_key" = "${oss_ak}",
"aws.s3.secret_key" = "${oss_sk}",
"aws.s3.endpoint" = "${oss_endpoint}");
-- result:
[REGEX].*Column: k1 is not found in file: .*basic_type_k2k5k7.parquet.* Consider setting 'fill_mismatch_column_with' = 'null' property.*
-- !result


shell: ossutil64 rm -rf oss://${oss_bucket}/test_files/parquet_format/${uuid0}/ > /dev/null
10 changes: 10 additions & 0 deletions test/sql/test_files/T/test_orc_files_merge
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,14 @@ select k1, k3, k8 from files(
"aws.s3.secret_key" = "${oss_sk}",
"aws.s3.endpoint" = "${oss_endpoint}");

-- column mismatch
select * from files(
"path" = "oss://${oss_bucket}/test_files/orc_format/${uuid0}/*",
"format" = "orc",
"fill_mismatch_column_with" = "none",
"auto_detect_sample_files" = "2",
"aws.s3.access_key" = "${oss_ak}",
"aws.s3.secret_key" = "${oss_sk}",
"aws.s3.endpoint" = "${oss_endpoint}");

shell: ossutil64 rm -rf oss://${oss_bucket}/test_files/orc_format/${uuid0}/ > /dev/null
10 changes: 10 additions & 0 deletions test/sql/test_files/T/test_parquet_files_merge
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,14 @@ select k1, k3, k8 from files(
"aws.s3.secret_key" = "${oss_sk}",
"aws.s3.endpoint" = "${oss_endpoint}");

-- column mismatch
select * from files(
"path" = "oss://${oss_bucket}/test_files/parquet_format/${uuid0}/*",
"format" = "parquet",
"fill_mismatch_column_with" = "none",
"auto_detect_sample_files" = "2",
"aws.s3.access_key" = "${oss_ak}",
"aws.s3.secret_key" = "${oss_sk}",
"aws.s3.endpoint" = "${oss_endpoint}");

shell: ossutil64 rm -rf oss://${oss_bucket}/test_files/parquet_format/${uuid0}/ > /dev/null

0 comments on commit 8294d35

Please sign in to comment.