Skip to content

Commit

Permalink
[s] fixed typo and Copyright date
Browse files Browse the repository at this point in the history
  • Loading branch information
psainics committed Jan 8, 2024
1 parent c9b31fd commit e002f14
Show file tree
Hide file tree
Showing 12 changed files with 34 additions and 39 deletions.
2 changes: 1 addition & 1 deletion format-xls/pom.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<!--
~ Copyright © 2023 Cask Data, Inc.
~ Copyright © 2024 Cask Data, Inc.
~
~ Licensed under the Apache License, Version 2.0 (the "License"); you may not
~ use this file except in compliance with the License. You may obtain a copy of
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright © 2023 Cask Data, Inc.
* Copyright © 2024 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
Expand All @@ -26,7 +26,7 @@
/**
* Formats the cell value of an Excel file.
*/
public class XlsInputFormatDataFormatter {
public class XlsDataFormatter {
private static final DataFormatter dataFormatter = new DataFormatter();
private final FormulaEvaluator evaluator;

Expand All @@ -35,7 +35,7 @@ public class XlsInputFormatDataFormatter {
*
* @param evaluator the formula evaluator
*/
public XlsInputFormatDataFormatter(FormulaEvaluator evaluator) {
public XlsDataFormatter(FormulaEvaluator evaluator) {
this.evaluator = evaluator;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright © 2023 Cask Data, Inc.
* Copyright © 2024 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
Expand All @@ -17,7 +17,6 @@
package io.cdap.plugin.format.xls.input;

import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import io.cdap.cdap.api.data.format.StructuredRecord;
import io.cdap.cdap.api.data.schema.Schema;
import org.apache.hadoop.conf.Configuration;
Expand All @@ -32,8 +31,6 @@
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.FormulaEvaluator;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
Expand Down Expand Up @@ -67,7 +64,7 @@ public RecordReader<LongWritable, StructuredRecord> createRecordReader(InputSpli
*/
public static class XlsRecordReader extends RecordReader<LongWritable, StructuredRecord> {
// DataFormatter to format and get each cell's value as String
XlsInputFormatDataFormatter formatter;
XlsDataFormatter formatter;
FormulaEvaluator formulaEvaluator;
// Map key that represents the row index.
private LongWritable key;
Expand Down Expand Up @@ -103,15 +100,15 @@ public void initialize(InputSplit genericSplit, TaskAttemptContext context) thro
try (Workbook workbook = WorkbookFactory.create(fileIn)) {
formulaEvaluator = workbook.getCreationHelper().createFormulaEvaluator();
formulaEvaluator.setIgnoreMissingWorkbooks(true);
formatter = new XlsInputFormatDataFormatter(formulaEvaluator);
formatter = new XlsDataFormatter(formulaEvaluator);
// Check if user wants to access with name or number
if (sheet.equals(XlsInputFormatConfig.SHEET_NUMBER)) {
workSheet = workbook.getSheetAt(Integer.parseInt(sheetValue));
} else {
workSheet = workbook.getSheet(sheetValue);
}
} catch (Exception e) {
throw new IllegalArgumentException("Exception while reading excel sheet. " + e.getMessage(), e);
throw new IOException("Exception while reading excel sheet. " + e.getMessage(), e);
}

lastRowNum = workSheet.getLastRowNum();
Expand Down Expand Up @@ -153,7 +150,7 @@ public boolean nextKeyValue() {
for (int cellIndex = 0; cellIndex < row.getLastCellNum(); cellIndex++) {
if (cellIndex >= fields.size()) {
throw new IllegalArgumentException(
String.format("Schema contains less fields than the number of columns in the excel file. " +
String.format("Schema contains fewer fields than the number of columns in the excel file. " +
"Schema fields: %s, Excel columns: %s", fields.size(), row.getLastCellNum()));
}
Cell cell = row.getCell(cellIndex, Row.MissingCellPolicy.RETURN_BLANK_AS_NULL);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright © 2023 Cask Data, Inc.
* Copyright © 2024 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
Expand Down Expand Up @@ -47,7 +47,7 @@ public class XlsInputFormatConfig extends PathTrackingConfig {
public static final String NAME_SAMPLE_SIZE = "sampleSize";

public static final String DESC_SKIP_HEADER =
"Whether to skip the first line of each file. The default value is false.";
"Whether to skip the first line of each sheet. The default value is false.";
public static final String DESC_SHEET = "Select the sheet by name or number. Default is 'Sheet Number'.";
public static final String DESC_SHEET_VALUE = "Specifies the value corresponding to 'sheet' input. " +
"Can be either sheet name or sheet no; for example: 'Sheet1' or '0' in case user selects 'Sheet Name' or " +
Expand Down Expand Up @@ -80,7 +80,6 @@ public class XlsInputFormatConfig extends PathTrackingConfig {
@Description(DESC_SHEET_VALUE)
private String sheetValue;


@Macro
@Nullable
@Name(NAME_SKIP_HEADER)
Expand Down Expand Up @@ -170,6 +169,7 @@ public Map<String, Schema> getOverride() throws IllegalArgumentException {
public static Builder builder() {
return new Builder();
}

/**
* Builder for building a {@link XlsInputFormatConfig}.
*/
Expand All @@ -194,6 +194,7 @@ public Builder setSheetValue(String sheetValue) {
this.sheetValue = sheetValue;
return this;
}

public Builder setSkipHeader(Boolean skipHeader) {
this.skipHeader = skipHeader;
return this;
Expand All @@ -203,6 +204,7 @@ public Builder setTerminateIfEmptyRow(Boolean terminateIfEmptyRow) {
this.terminateIfEmptyRow = terminateIfEmptyRow;
return this;
}

public XlsInputFormatConfig build() {
return new XlsInputFormatConfig(schema, sheet, sheetValue, skipHeader, terminateIfEmptyRow);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright © 2023 Cask Data, Inc.
* Copyright © 2024 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright © 2023 Cask Data, Inc.
* Copyright © 2024 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright © 2023 Cask Data, Inc.
* Copyright © 2024 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
Expand All @@ -16,10 +16,6 @@

package io.cdap.plugin.format.xls.input;

import io.cdap.cdap.api.data.schema.Schema;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright © 2023 Cask Data, Inc.
* Copyright © 2024 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
Expand Down Expand Up @@ -31,9 +31,9 @@
import java.io.IOException;

/**
* Unit tests for {@link XlsInputFormatDataFormatter}
* Unit tests for {@link XlsDataFormatter}
*/
public class XlsInputFormatDataFormatterTest {
public class XlsDataFormatterTest {
Workbook workbook;
Sheet sheet;

Expand Down Expand Up @@ -66,15 +66,15 @@ public void testFormatCellValue() {
Cell errorCell = row.createCell(++testColumn);
errorCell.setCellErrorValue(FormulaError.DIV0.getCode());

XlsInputFormatDataFormatter xlsInputFormatDataFormatter = new XlsInputFormatDataFormatter(
XlsDataFormatter xlsDataFormatter = new XlsDataFormatter(
workbook.getCreationHelper().createFormulaEvaluator());

Assert.assertNull(xlsInputFormatDataFormatter.formatCellValue(blankCell, Schema.Type.STRING));
Assert.assertEquals("TRUE", xlsInputFormatDataFormatter.formatCellValue(booleanCell, Schema.Type.BOOLEAN));
Assert.assertEquals("1.0", xlsInputFormatDataFormatter.formatCellValue(numericCell, Schema.Type.DOUBLE));
Assert.assertEquals("test", xlsInputFormatDataFormatter.formatCellValue(stringCell, Schema.Type.STRING));
Assert.assertNull(xlsInputFormatDataFormatter.formatCellValue(errorCell, Schema.Type.STRING));
Assert.assertNull(xlsInputFormatDataFormatter.formatCellValue(null, Schema.Type.STRING));
Assert.assertNull(xlsDataFormatter.formatCellValue(blankCell, Schema.Type.STRING));
Assert.assertEquals("TRUE", xlsDataFormatter.formatCellValue(booleanCell, Schema.Type.BOOLEAN));
Assert.assertEquals("1.0", xlsDataFormatter.formatCellValue(numericCell, Schema.Type.DOUBLE));
Assert.assertEquals("test", xlsDataFormatter.formatCellValue(stringCell, Schema.Type.STRING));
Assert.assertNull(xlsDataFormatter.formatCellValue(errorCell, Schema.Type.STRING));
Assert.assertNull(xlsDataFormatter.formatCellValue(null, Schema.Type.STRING));
}

@Test
Expand Down Expand Up @@ -105,10 +105,10 @@ public void testFormatCellValueWithCachedFormulaResult() {
// Cache the formula results
evaluator.evaluateAll();

XlsInputFormatDataFormatter xlsInputFormatDataFormatter = new XlsInputFormatDataFormatter(evaluator);
XlsDataFormatter xlsDataFormatter = new XlsDataFormatter(evaluator);
Assert.assertEquals("3.0",
xlsInputFormatDataFormatter.formatCellValue(formulaCell, Schema.Type.DOUBLE));
xlsDataFormatter.formatCellValue(formulaCell, Schema.Type.DOUBLE));
Assert.assertEquals("helloworld",
xlsInputFormatDataFormatter.formatCellValue(formulaCell2, Schema.Type.STRING));
xlsDataFormatter.formatCellValue(formulaCell2, Schema.Type.STRING));
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright © 2023 Cask Data, Inc.
* Copyright © 2024 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright © 2023 Cask Data, Inc.
* Copyright © 2024 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright © 2023 Cask Data, Inc.
* Copyright © 2024 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@
<module>format-orc</module>
<module>format-parquet</module>
<module>format-text</module>
<module>format-xls</module>
<module>hbase-plugins</module>
<module>http-plugins</module>
<module>mongodb-plugins</module>
<module>solrsearch-plugins</module>
<module>spark-plugins</module>
<module>transform-plugins</module>
<module>format-xls</module>
</modules>

<licenses>
Expand Down

0 comments on commit e002f14

Please sign in to comment.