Skip to content

Commit

Permalink
Add varchar to double coercion for hive tables
Browse files Browse the repository at this point in the history
  • Loading branch information
findinpath authored and dain committed Nov 8, 2023
1 parent 881cc39 commit 0a9f81f
Show file tree
Hide file tree
Showing 10 changed files with 180 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ public static Type createTypeFromCoercer(TypeManager typeManager, HiveType fromH
if (fromType instanceof VarcharType fromVarcharType && (toHiveType.equals(HIVE_BYTE) || toHiveType.equals(HIVE_SHORT) || toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG))) {
return Optional.of(new VarcharToIntegerNumberCoercer<>(fromVarcharType, toType));
}
if (fromType instanceof VarcharType varcharType && toHiveType.equals(HIVE_DOUBLE)) {
return Optional.of(new VarcharToDoubleCoercer(varcharType, coercionContext.treatNaNAsNull()));
}
if (fromType instanceof VarcharType varcharType && toType instanceof TimestampType timestampType) {
if (timestampType.isShort()) {
return Optional.of(new VarcharToShortTimestampCoercer(varcharType, timestampType));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.trino.plugin.hive.coercions;

import io.trino.spi.block.Block;
import io.trino.spi.block.BlockBuilder;
import io.trino.spi.type.DoubleType;
import io.trino.spi.type.VarcharType;

import static io.trino.spi.type.DoubleType.DOUBLE;

public class VarcharToDoubleCoercer
extends TypeCoercer<VarcharType, DoubleType>
{
private final boolean treatNaNAsNull;

public VarcharToDoubleCoercer(VarcharType fromType, boolean treatNaNAsNull)
{
super(fromType, DOUBLE);
this.treatNaNAsNull = treatNaNAsNull;
}

@Override
protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position)
{
double doubleValue;
try {
doubleValue = Double.parseDouble(fromType.getSlice(block, position).toStringUtf8());
}
catch (NumberFormatException e) {
blockBuilder.appendNull();
return;
}

if (Double.isNaN(doubleValue) && treatNaNAsNull) {
blockBuilder.appendNull();
return;
}
DOUBLE.writeDouble(blockBuilder, doubleValue);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
import io.trino.plugin.hive.coercions.TimestampCoercer.VarcharToLongTimestampCoercer;
import io.trino.plugin.hive.coercions.TimestampCoercer.VarcharToShortTimestampCoercer;
import io.trino.plugin.hive.coercions.TypeCoercer;
import io.trino.plugin.hive.coercions.VarcharToDoubleCoercer;
import io.trino.spi.type.DateType;
import io.trino.spi.type.DoubleType;
import io.trino.spi.type.TimestampType;
import io.trino.spi.type.Type;
import io.trino.spi.type.VarcharType;
Expand Down Expand Up @@ -55,6 +57,9 @@ private OrcTypeTranslator() {}
if (toTrinoType instanceof DateType toDateType) {
return Optional.of(new VarcharToDateCoercer(createUnboundedVarcharType(), toDateType));
}
if (toTrinoType instanceof DoubleType) {
return Optional.of(new VarcharToDoubleCoercer(createUnboundedVarcharType(), true));
}
return Optional.empty();
}
if (fromOrcType == DOUBLE && toTrinoType instanceof VarcharType varcharType) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ private boolean canCoerce(HiveType fromHiveType, HiveType toHiveType, HiveTimest
toHiveType.equals(HIVE_SHORT) ||
toHiveType.equals(HIVE_INT) ||
toHiveType.equals(HIVE_LONG) ||
toHiveType.equals(HIVE_DOUBLE) ||
toHiveType.equals(HIVE_DATE) ||
toHiveType.equals(HIVE_TIMESTAMP);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2397,7 +2397,7 @@ public void testPartitionSchemaMismatch()
}
})
.isInstanceOf(TrinoException.class)
.hasMessageMatching(".*The column 't_data' in table '.*\\.trino_test_partition_schema_change' is declared as type 'double', but partition 'ds=2012-12-29' declared column 't_data' as type 'string'.");
.hasMessageMatching(".*The column 't_data' in table '.*\\.trino_test_partition_schema_change' is declared as type 'float', but partition 'ds=2012-12-29' declared column 't_data' as type 'string'.");
}

// TODO coercion of non-canonical values should be supported
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.hive.coercions;

import io.trino.plugin.hive.coercions.CoercionUtils.CoercionContext;
import io.trino.spi.block.Block;
import org.junit.jupiter.api.Test;

import static io.airlift.slice.Slices.utf8Slice;
import static io.trino.plugin.hive.HiveTimestampPrecision.DEFAULT_PRECISION;
import static io.trino.plugin.hive.HiveType.toHiveType;
import static io.trino.plugin.hive.coercions.CoercionUtils.createCoercer;
import static io.trino.spi.predicate.Utils.blockToNativeValue;
import static io.trino.spi.predicate.Utils.nativeValueToBlock;
import static io.trino.spi.type.DoubleType.DOUBLE;
import static io.trino.spi.type.VarcharType.createUnboundedVarcharType;
import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER;
import static java.lang.Double.NEGATIVE_INFINITY;
import static java.lang.Double.NaN;
import static java.lang.Double.POSITIVE_INFINITY;
import static org.assertj.core.api.Assertions.assertThat;

public class TestVarcharToDoubleCoercer
{
@Test
public void testDoubleToVarcharCoercions()
{
// Below infinity
assertVarcharToDoubleCoercion("-1.7976931348623157e+310", NEGATIVE_INFINITY);
assertVarcharToDoubleCoercion("-Infinity", NEGATIVE_INFINITY);
assertVarcharToDoubleCoercion("1.12e+3", Double.parseDouble("1120.0"));
assertVarcharToDoubleCoercion("123456789.12345678", Double.parseDouble("123456789.12345678"));
assertVarcharToDoubleCoercion("123", Double.parseDouble("123"));
assertVarcharToDoubleCoercion("Infinity", POSITIVE_INFINITY);
assertVarcharToDoubleCoercion("+Infinity", POSITIVE_INFINITY);
// Above infinity
assertVarcharToDoubleCoercion("1.7976931348623157e+310", POSITIVE_INFINITY);
// Invalid string
assertVarcharToDoubleCoercion("Hello", null);
}

@Test
public void testNaNToVarcharCoercions()
{
assertVarcharToDoubleCoercion("NaN", true, null);
assertVarcharToDoubleCoercion("NaN", false, NaN);
}

private static void assertVarcharToDoubleCoercion(String actualValue, Double expectedValue)
{
assertVarcharToDoubleCoercion(actualValue, false, expectedValue);
}

private static void assertVarcharToDoubleCoercion(String actualValue, boolean treatNaNAsNull, Double expectedValue)
{
Block coercedBlock = createCoercer(TESTING_TYPE_MANAGER, toHiveType(createUnboundedVarcharType()), toHiveType(DOUBLE), new CoercionContext(DEFAULT_PRECISION, treatNaNAsNull)).orElseThrow()
.apply(nativeValueToBlock(createUnboundedVarcharType(), utf8Slice(actualValue)));
assertThat(blockToNativeValue(DOUBLE, coercedBlock))
.isEqualTo(expectedValue);
}
}
2 changes: 1 addition & 1 deletion plugin/trino-hive/src/test/sql/create-test.sql
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ DROP TABLE tmp_trino_test;
ALTER TABLE trino_test_partition_schema_change ADD PARTITION (ds='2012-12-29');
INSERT OVERWRITE TABLE trino_test_partition_schema_change PARTITION (ds='2012-12-29')
SELECT '123', '456' FROM trino_test_sequence;
ALTER TABLE trino_test_partition_schema_change REPLACE COLUMNS (t_data DOUBLE);
ALTER TABLE trino_test_partition_schema_change REPLACE COLUMNS (t_data FLOAT);

INSERT OVERWRITE TABLE trino_test_partition_schema_change_non_canonical PARTITION (t_boolean='0')
SELECT 'test' FROM trino_test_sequence LIMIT 100;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ protected void doTestHiveCoercion(HiveTableDefinition tableDefinition)
"varchar_to_smaller_varchar",
"varchar_to_date",
"varchar_to_distant_date",
"varchar_to_double",
"string_to_double",
"varchar_to_double_infinity",
"varchar_to_special_double",
"char_to_bigger_char",
"char_to_smaller_char",
"timestamp_to_string",
Expand Down Expand Up @@ -215,6 +219,10 @@ protected void insertTableRows(String tableName, String floatToDoubleType)
" 'abc', " +
" '2023-09-28', " +
" '8000-04-13', " +
" '1234.567', " +
" '1234.01234', " +
" 'Infinity'," +
" 'NaN'," +
" 'abc', " +
" 'abc', " +
" TIMESTAMP '2121-07-15 15:30:12.123', " +
Expand Down Expand Up @@ -266,6 +274,10 @@ protected void insertTableRows(String tableName, String floatToDoubleType)
" '\uD83D\uDCB0\uD83D\uDCB0\uD83D\uDCB0', " +
" '2023-09-27', " +
" '1900-01-01', " +
" '-12345.6789', " +
" '0', " +
" '-Infinity'," +
" 'Invalid Double'," +
" '\uD83D\uDCB0\uD83D\uDCB0\uD83D\uDCB0', " +
" '\uD83D\uDCB0\uD83D\uDCB0\uD83D\uDCB0', " +
" TIMESTAMP '1970-01-01 00:00:00.123', " +
Expand Down Expand Up @@ -458,6 +470,18 @@ else if (getHiveVersionMajor() == 3 && isFormat.test("orc")) {
.put("varchar_to_distant_date", ImmutableList.of(
java.sql.Date.valueOf("8000-04-13"),
java.sql.Date.valueOf("1900-01-01")))
.put("varchar_to_double", ImmutableList.of(
1234.567,
-12345.6789))
.put("string_to_double", ImmutableList.of(
1234.01234,
0D))
.put("varchar_to_double_infinity", ImmutableList.of(
Double.POSITIVE_INFINITY,
Double.NEGATIVE_INFINITY))
.put("varchar_to_special_double", Arrays.asList(
coercedNaN == null ? null : Double.NaN,
null))
.put("char_to_bigger_char", ImmutableList.of(
"abc ",
"\uD83D\uDCB0\uD83D\uDCB0\uD83D\uDCB0 "))
Expand Down Expand Up @@ -879,6 +903,10 @@ private void assertProperAlteredTableSchema(String tableName)
row("varchar_to_smaller_varchar", "varchar(2)"),
row("varchar_to_date", "date"),
row("varchar_to_distant_date", "date"),
row("varchar_to_double", "double"),
row("string_to_double", "double"),
row("varchar_to_double_infinity", "double"),
row("varchar_to_special_double", "double"),
row("char_to_bigger_char", "char(4)"),
row("char_to_smaller_char", "char(2)"),
row("timestamp_to_string", "varchar"),
Expand Down Expand Up @@ -946,6 +974,10 @@ private void assertColumnTypes(
.put("varchar_to_smaller_varchar", VARCHAR)
.put("varchar_to_date", DATE)
.put("varchar_to_distant_date", DATE)
.put("varchar_to_double", DOUBLE)
.put("string_to_double", DOUBLE)
.put("varchar_to_double_infinity", DOUBLE)
.put("varchar_to_special_double", DOUBLE)
.put("char_to_bigger_char", CHAR)
.put("char_to_smaller_char", CHAR)
.put("id", BIGINT)
Expand Down Expand Up @@ -1012,6 +1044,10 @@ private static void alterTableColumnTypes(String tableName)
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_smaller_varchar varchar_to_smaller_varchar varchar(2)", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_date varchar_to_date date", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_distant_date varchar_to_distant_date date", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_double varchar_to_double double", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN string_to_double string_to_double double", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_double_infinity varchar_to_double_infinity double", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_special_double varchar_to_special_double double", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN char_to_bigger_char char_to_bigger_char char(4)", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN char_to_smaller_char char_to_smaller_char char(2)", tableName));
onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN timestamp_to_string timestamp_to_string string", tableName));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionBui
" varchar_to_smaller_varchar VARCHAR(3)," +
" varchar_to_date VARCHAR(10)," +
" varchar_to_distant_date VARCHAR(12)," +
" varchar_to_double VARCHAR(40)," +
" string_to_double STRING," +
" varchar_to_double_infinity VARCHAR(40)," +
" varchar_to_special_double VARCHAR(40)," +
" char_to_bigger_char CHAR(3)," +
" char_to_smaller_char CHAR(3)," +
" timestamp_to_string TIMESTAMP," +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ varchar_to_bigger_varchar VARCHAR(3),
varchar_to_smaller_varchar VARCHAR(3),
varchar_to_date VARCHAR(10),
varchar_to_distant_date VARCHAR(12),
varchar_to_double VARCHAR(40),
string_to_double STRING,
varchar_to_double_infinity VARCHAR(40),
varchar_to_special_double VARCHAR(40),
char_to_bigger_char CHAR(3),
char_to_smaller_char CHAR(3),
timestamp_to_string TIMESTAMP,
Expand Down

0 comments on commit 0a9f81f

Please sign in to comment.