Skip to content

Commit

Permalink
add more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jinchengchenghh committed Jul 31, 2024
1 parent 6eeda04 commit 90e9a87
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ public class CsvFragmentScanOptions implements FragmentScanOptions {
* CSV scan options, map to CPP struct CsvFragmentScanOptions. The key in config map is the field
* name of mapping cpp struct
*
* If the option type is std::vector in cpp code, only support set one value.
* For example, for convert option null_values, only support set one string as null value.
* <p>If the option type is a std::vector in the CPP code, only support for setting one value. For
* example, for convert option null_values, only support set one string as null value.
*
* @param convertOptions similar to CsvFragmentScanOptions#convert_options in CPP, the ArrowSchema
* represents column_types, convert data option such as null value recognition.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,13 @@

import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;

import com.google.common.collect.ImmutableMap;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
import java.util.Optional;
import org.apache.arrow.c.ArrowSchema;
import org.apache.arrow.c.CDataDictionaryProvider;
Expand Down Expand Up @@ -205,4 +208,110 @@ public void testCsvReadParseAndReadOptions() throws Exception {
assertEquals(2, rowCount);
}
}

@Test
public void testCsvReadOtherOptions() throws Exception {
String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv";
BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
Map<String, String> convertOption =
ImmutableMap.of(
"check_utf8",
"true",
"null_values",
"NULL",
"true_values",
"True",
"false_values",
"False",
"quoted_strings_can_be_null",
"true",
"auto_dict_encode",
"false",
"auto_dict_max_cardinality",
"3456",
"decimal_point",
".",
"include_missing_columns",
"false");
Map<String, String> readOption =
ImmutableMap.of(
"use_threads",
"true",
"block_size",
"1024",
"skip_rows",
"12",
"skip_rows_after_names",
"12",
"autogenerate_column_names",
"false");
Map<String, String> parseOption =
ImmutableMap.of(
"delimiter",
".",
"quoting",
"true",
"quote_char",
"'",
"double_quote",
"False",
"escaping",
"true",
"escape_char",
"v",
"newlines_in_values",
"false",
"ignore_empty_lines",
"true");
CsvFragmentScanOptions fragmentScanOptions =
new CsvFragmentScanOptions(new CsvConvertOptions(convertOption), readOption, parseOption);
ScanOptions options =
new ScanOptions.Builder(/*batchSize*/ 32768)
.columns(Optional.empty())
.fragmentScanOptions(fragmentScanOptions)
.build();
try (DatasetFactory datasetFactory =
new FileSystemDatasetFactory(
allocator, NativeMemoryPool.getDefault(), FileFormat.CSV, path);
Dataset dataset = datasetFactory.finish();
Scanner ignored = dataset.newScan(options)) {}
}

@Test
public void testCsvInvalidOption() throws Exception {
String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv";
BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
Map<String, String> convertOption = ImmutableMap.of("not_exists_key_check_utf8", "true");
CsvFragmentScanOptions fragmentScanOptions =
new CsvFragmentScanOptions(
new CsvConvertOptions(convertOption), ImmutableMap.of(), ImmutableMap.of());
ScanOptions options =
new ScanOptions.Builder(/*batchSize*/ 32768)
.columns(Optional.empty())
.fragmentScanOptions(fragmentScanOptions)
.build();
try (DatasetFactory datasetFactory =
new FileSystemDatasetFactory(
allocator, NativeMemoryPool.getDefault(), FileFormat.CSV, path);
Dataset dataset = datasetFactory.finish()) {
assertThrows(IOException.class, () -> dataset.newScan(options));
}

CsvFragmentScanOptions fragmentScanOptionsFaultValue =
new CsvFragmentScanOptions(
new CsvConvertOptions(ImmutableMap.of()),
ImmutableMap.of("", ""),
ImmutableMap.of("escape_char", "vbvb"));
ScanOptions optionsFault =
new ScanOptions.Builder(/*batchSize*/ 32768)
.columns(Optional.empty())
.fragmentScanOptions(fragmentScanOptionsFaultValue)
.build();
try (DatasetFactory datasetFactory =
new FileSystemDatasetFactory(
allocator, NativeMemoryPool.getDefault(), FileFormat.CSV, path);
Dataset dataset = datasetFactory.finish()) {
assertThrows(Throwable.class, () -> dataset.newScan(optionsFault));
}
}
}

0 comments on commit 90e9a87

Please sign in to comment.