diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvFragmentScanOptions.java b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvFragmentScanOptions.java
index 2ae776e62a862..2fe30d596e0db 100644
--- a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvFragmentScanOptions.java
+++ b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/csv/CsvFragmentScanOptions.java
@@ -32,8 +32,8 @@ public class CsvFragmentScanOptions implements FragmentScanOptions {
* CSV scan options, map to CPP struct CsvFragmentScanOptions. The key in config map is the field
* name of mapping cpp struct
*
- * If the option type is std::vector in cpp code, only support set one value.
- * For example, for convert option null_values, only support set one string as null value.
+ *
If the option type is a std::vector in the CPP code, only support for setting one value. For
+ * example, for convert option null_values, only support set one string as null value.
*
* @param convertOptions similar to CsvFragmentScanOptions#convert_options in CPP, the ArrowSchema
* represents column_types, convert data option such as null value recognition.
diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/TestFragmentScanOptions.java b/java/dataset/src/test/java/org/apache/arrow/dataset/TestFragmentScanOptions.java
index 1bf0ea4b38a3c..c4bdc67cd02f7 100644
--- a/java/dataset/src/test/java/org/apache/arrow/dataset/TestFragmentScanOptions.java
+++ b/java/dataset/src/test/java/org/apache/arrow/dataset/TestFragmentScanOptions.java
@@ -18,10 +18,13 @@
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
import com.google.common.collect.ImmutableMap;
+import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
+import java.util.Map;
import java.util.Optional;
import org.apache.arrow.c.ArrowSchema;
import org.apache.arrow.c.CDataDictionaryProvider;
@@ -205,4 +208,110 @@ public void testCsvReadParseAndReadOptions() throws Exception {
assertEquals(2, rowCount);
}
}
+
+ @Test
+ public void testCsvReadOtherOptions() throws Exception {
+ String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv";
+ BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ Map convertOption =
+ ImmutableMap.of(
+ "check_utf8",
+ "true",
+ "null_values",
+ "NULL",
+ "true_values",
+ "True",
+ "false_values",
+ "False",
+ "quoted_strings_can_be_null",
+ "true",
+ "auto_dict_encode",
+ "false",
+ "auto_dict_max_cardinality",
+ "3456",
+ "decimal_point",
+ ".",
+ "include_missing_columns",
+ "false");
+ Map readOption =
+ ImmutableMap.of(
+ "use_threads",
+ "true",
+ "block_size",
+ "1024",
+ "skip_rows",
+ "12",
+ "skip_rows_after_names",
+ "12",
+ "autogenerate_column_names",
+ "false");
+ Map parseOption =
+ ImmutableMap.of(
+ "delimiter",
+ ".",
+ "quoting",
+ "true",
+ "quote_char",
+ "'",
+ "double_quote",
+ "False",
+ "escaping",
+ "true",
+ "escape_char",
+ "v",
+ "newlines_in_values",
+ "false",
+ "ignore_empty_lines",
+ "true");
+ CsvFragmentScanOptions fragmentScanOptions =
+ new CsvFragmentScanOptions(new CsvConvertOptions(convertOption), readOption, parseOption);
+ ScanOptions options =
+ new ScanOptions.Builder(/*batchSize*/ 32768)
+ .columns(Optional.empty())
+ .fragmentScanOptions(fragmentScanOptions)
+ .build();
+ try (DatasetFactory datasetFactory =
+ new FileSystemDatasetFactory(
+ allocator, NativeMemoryPool.getDefault(), FileFormat.CSV, path);
+ Dataset dataset = datasetFactory.finish();
+ Scanner ignored = dataset.newScan(options)) {}
+ }
+
+ @Test
+ public void testCsvInvalidOption() throws Exception {
+ String path = "file://" + getClass().getResource("/").getPath() + "/data/student.csv";
+ BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ Map convertOption = ImmutableMap.of("not_exists_key_check_utf8", "true");
+ CsvFragmentScanOptions fragmentScanOptions =
+ new CsvFragmentScanOptions(
+ new CsvConvertOptions(convertOption), ImmutableMap.of(), ImmutableMap.of());
+ ScanOptions options =
+ new ScanOptions.Builder(/*batchSize*/ 32768)
+ .columns(Optional.empty())
+ .fragmentScanOptions(fragmentScanOptions)
+ .build();
+ try (DatasetFactory datasetFactory =
+ new FileSystemDatasetFactory(
+ allocator, NativeMemoryPool.getDefault(), FileFormat.CSV, path);
+ Dataset dataset = datasetFactory.finish()) {
+ assertThrows(IOException.class, () -> dataset.newScan(options));
+ }
+
+ CsvFragmentScanOptions fragmentScanOptionsFaultValue =
+ new CsvFragmentScanOptions(
+ new CsvConvertOptions(ImmutableMap.of()),
+ ImmutableMap.of("", ""),
+ ImmutableMap.of("escape_char", "vbvb"));
+ ScanOptions optionsFault =
+ new ScanOptions.Builder(/*batchSize*/ 32768)
+ .columns(Optional.empty())
+ .fragmentScanOptions(fragmentScanOptionsFaultValue)
+ .build();
+ try (DatasetFactory datasetFactory =
+ new FileSystemDatasetFactory(
+ allocator, NativeMemoryPool.getDefault(), FileFormat.CSV, path);
+ Dataset dataset = datasetFactory.finish()) {
+ assertThrows(Throwable.class, () -> dataset.newScan(optionsFault));
+ }
+ }
}