From 8424b369a44ec83f4486631a106ed1278334ca47 Mon Sep 17 00:00:00 2001 From: Dain Sundstrom Date: Tue, 7 Nov 2023 11:20:30 -0800 Subject: [PATCH 1/3] Do not modify properties argument in AvroHiveFileUtils The determineSchemaOrThrowException was writing the schema back in the properties argument. This behavior is unexpected an unneeded by the current code --- .../java/io/trino/plugin/hive/avro/AvroHiveFileUtils.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroHiveFileUtils.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroHiveFileUtils.java index 2d81ff5d2921..3a08f13d01d4 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroHiveFileUtils.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroHiveFileUtils.java @@ -94,9 +94,7 @@ public static Schema determineSchemaOrThrowException(TrinoFileSystem fileSystem, throw new IOException("Unable to read avro schema file from given path: " + schemaURL, e); } } - Schema schema = getSchemaFromProperties(properties); - properties.setProperty(SCHEMA_LITERAL, schema.toString()); - return schema; + return getSchemaFromProperties(properties); } private static Schema getSchemaFromProperties(Properties properties) From cb2799d183b04f382bac67fdb1ac8b78fd558591 Mon Sep 17 00:00:00 2001 From: Dain Sundstrom Date: Sat, 11 Nov 2023 12:45:40 -0800 Subject: [PATCH 2/3] Use Hive constants instead of strings where possible --- .../plugin/hive/metastore/MetastoreUtil.java | 3 ++- .../hive/TestBackgroundHiveSplitLoader.java | 25 +++++++++++-------- .../plugin/hive/TestHiveFileFormats.java | 14 ++++++----- .../trino/plugin/hive/TestHivePageSink.java | 6 +++-- .../hive/TestOrcPageSourceMemoryTracking.java | 11 +++++--- .../hive/metastore/TestMetastoreUtil.java | 21 ++++++++++------ .../plugin/hive/orc/TestOrcPredicates.java | 6 +++-- .../plugin/hive/parquet/ParquetTester.java | 6 +++-- .../parquet/TestParquetDecimalScaling.java | 6 +++-- .../product/hive/TestHiveCreateTable.java | 7 +++--- 10 files changed, 65 insertions(+), 40 deletions(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/MetastoreUtil.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/MetastoreUtil.java index 405796542872..b32d0eab64da 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/MetastoreUtil.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/MetastoreUtil.java @@ -75,6 +75,7 @@ import static io.trino.plugin.hive.metastore.thrift.ThriftMetastoreUtil.NUM_ROWS; import static io.trino.plugin.hive.util.HiveClassNames.AVRO_SERDE_CLASS; import static io.trino.plugin.hive.util.HiveUtil.makePartName; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_COMMENTS; import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; import static io.trino.spi.predicate.TupleDomain.withColumnDomains; @@ -175,7 +176,7 @@ private static Properties getHiveSchema( String columnTypes = columnTypeBuilder.toString(); schema.setProperty(META_TABLE_COLUMNS, columnNames); schema.setProperty(META_TABLE_COLUMN_TYPES, columnTypes); - schema.setProperty("columns.comments", columnCommentBuilder.toString()); + schema.setProperty(LIST_COLUMN_COMMENTS, columnCommentBuilder.toString()); StringBuilder partString = new StringBuilder(); String partStringSep = ""; diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestBackgroundHiveSplitLoader.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestBackgroundHiveSplitLoader.java index 0d4191f21fc9..6b0e8067c432 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestBackgroundHiveSplitLoader.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestBackgroundHiveSplitLoader.java @@ -82,6 +82,7 @@ import static io.airlift.units.DataSize.Unit.GIGABYTE; import static io.airlift.units.DataSize.Unit.KILOBYTE; import static io.airlift.units.DataSize.Unit.MEGABYTE; +import static io.trino.hive.thrift.metastore.hive_metastoreConstants.FILE_INPUT_FORMAT; import static io.trino.plugin.hive.BackgroundHiveSplitLoader.BucketSplitInfo.createBucketSplitInfo; import static io.trino.plugin.hive.BackgroundHiveSplitLoader.getBucketNumber; import static io.trino.plugin.hive.BackgroundHiveSplitLoader.hasAttemptId; @@ -91,6 +92,7 @@ import static io.trino.plugin.hive.HiveStorageFormat.AVRO; import static io.trino.plugin.hive.HiveStorageFormat.CSV; import static io.trino.plugin.hive.HiveStorageFormat.ORC; +import static io.trino.plugin.hive.HiveTableProperties.TRANSACTIONAL; import static io.trino.plugin.hive.HiveTestUtils.SESSION; import static io.trino.plugin.hive.HiveTestUtils.getHiveSession; import static io.trino.plugin.hive.HiveTimestampPrecision.DEFAULT_PRECISION; @@ -100,6 +102,8 @@ import static io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V1; import static io.trino.plugin.hive.util.HiveClassNames.SYMLINK_TEXT_INPUT_FORMAT_CLASS; import static io.trino.plugin.hive.util.HiveUtil.getRegularColumnHandles; +import static io.trino.plugin.hive.util.SerdeConstants.FOOTER_COUNT; +import static io.trino.plugin.hive.util.SerdeConstants.HEADER_COUNT; import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; import static io.trino.spi.predicate.TupleDomain.withColumnDomains; import static io.trino.spi.type.IntegerType.INTEGER; @@ -110,7 +114,6 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.concurrent.Executors.newCachedThreadPool; import static java.util.concurrent.TimeUnit.SECONDS; -import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; @@ -165,10 +168,10 @@ public void testCsv() { FileEntry file = new FileEntry(LOCATION, DataSize.of(2, GIGABYTE).toBytes(), Instant.now(), Optional.empty()); assertCsvSplitCount(file, Map.of(), 33); - assertCsvSplitCount(file, Map.of("skip.header.line.count", "1"), 33); - assertCsvSplitCount(file, Map.of("skip.header.line.count", "2"), 1); - assertCsvSplitCount(file, Map.of("skip.footer.line.count", "1"), 1); - assertCsvSplitCount(file, Map.of("skip.header.line.count", "1", "skip.footer.line.count", "1"), 1); + assertCsvSplitCount(file, Map.of(HEADER_COUNT, "1"), 33); + assertCsvSplitCount(file, Map.of(HEADER_COUNT, "2"), 1); + assertCsvSplitCount(file, Map.of(HEADER_COUNT, "1"), 1); + assertCsvSplitCount(file, Map.of(HEADER_COUNT, "1", FOOTER_COUNT, "1"), 1); } private void assertCsvSplitCount(FileEntry file, Map tableProperties, int expectedSplitCount) @@ -531,7 +534,7 @@ public void testSplitsGenerationWithAbortedTransactions() List.of(), Optional.empty(), Map.of( - "transactional", "true", + TRANSACTIONAL, "true", "transactional_properties", "insert_only")); List fileLocations = List.of( @@ -577,7 +580,7 @@ public void testFullAcidTableWithOriginalFiles() tableLocation.toString(), List.of(), Optional.empty(), - Map.of("transactional", "true")); + Map.of(TRANSACTIONAL, "true")); Location originalFile = tableLocation.appendPath("000000_1"); try (OutputStream outputStream = fileSystem.newOutputFile(originalFile).create()) { @@ -620,7 +623,7 @@ public void testVersionValidationNoOrcAcidVersionFile() tableLocation.toString(), List.of(), Optional.empty(), - Map.of("transactional", "true")); + Map.of(TRANSACTIONAL, "true")); List fileLocations = List.of( tableLocation.appendPath("000000_1"), @@ -665,7 +668,7 @@ public void testVersionValidationOrcAcidVersionFileHasVersion2() tableLocation.toString(), List.of(), Optional.empty(), - Map.of("transactional", "true")); + Map.of(TRANSACTIONAL, "true")); List fileLocations = List.of( tableLocation.appendPath("000000_1"), // _orc_acid_version does not exist, so it's assumed to be "ORC ACID version 0" @@ -708,7 +711,7 @@ public void testVersionValidationOrcAcidVersionFileHasVersion1() tableLocation.toString(), List.of(), Optional.empty(), - Map.of("transactional", "true")); + Map.of(TRANSACTIONAL, "true")); List fileLocations = List.of( tableLocation.appendPath("000000_1"), @@ -818,7 +821,7 @@ public void testBuildManifestFileIteratorNestedDirectory() { CachingDirectoryLister directoryLister = new CachingDirectoryLister(new Duration(5, TimeUnit.MINUTES), DataSize.of(100, KILOBYTE), List.of()); Properties schema = new Properties(); - schema.setProperty("file.inputformat", SYMLINK_TEXT_INPUT_FORMAT_CLASS); + schema.setProperty(FILE_INPUT_FORMAT, SYMLINK_TEXT_INPUT_FORMAT_CLASS); schema.setProperty(SERIALIZATION_LIB, AVRO.getSerde()); Location filePath = Location.of("memory:///db_name/table_name/file1"); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java index f84a458cd221..902291918503 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java @@ -152,6 +152,8 @@ import static io.trino.plugin.hive.HiveTestUtils.getHiveSession; import static io.trino.plugin.hive.HiveTestUtils.mapType; import static io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMNS; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_TYPES; import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.BooleanType.BOOLEAN; @@ -956,8 +958,8 @@ private static void testPageSourceFactory( } } - splitProperties.setProperty("columns", String.join(",", splitPropertiesColumnNames.build())); - splitProperties.setProperty("columns.types", String.join(",", splitPropertiesColumnTypes.build())); + splitProperties.setProperty(LIST_COLUMNS, String.join(",", splitPropertiesColumnNames.build())); + splitProperties.setProperty(LIST_COLUMN_TYPES, String.join(",", splitPropertiesColumnTypes.build())); List partitionKeys = testReadColumns.stream() .filter(TestColumn::partitionKey) @@ -1319,13 +1321,13 @@ private static void createTestFileTrino( Properties tableProperties = new Properties(); tableProperties.setProperty( - "columns", + LIST_COLUMNS, testColumns.stream() .map(TestColumn::name) .collect(Collectors.joining(","))); tableProperties.setProperty( - "columns.types", + LIST_COLUMN_TYPES, testColumns.stream() .map(TestColumn::type) .map(HiveType::toHiveType) @@ -1470,8 +1472,8 @@ private static void createTestFileHive( .collect(toImmutableList()); Properties tableProperties = new Properties(); - tableProperties.setProperty("columns", testColumns.stream().map(TestColumn::name).collect(Collectors.joining(","))); - tableProperties.setProperty("columns.types", testColumns.stream().map(testColumn -> HiveType.toHiveType(testColumn.type()).toString()).collect(Collectors.joining(","))); + tableProperties.setProperty(LIST_COLUMNS, testColumns.stream().map(TestColumn::name).collect(Collectors.joining(","))); + tableProperties.setProperty(LIST_COLUMN_TYPES, testColumns.stream().map(testColumn -> HiveType.toHiveType(testColumn.type()).toString()).collect(Collectors.joining(","))); serializer.initialize(new Configuration(false), tableProperties); JobConf jobConf = new JobConf(false); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePageSink.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePageSink.java index 2c4d60a3da93..b530e08accd6 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePageSink.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePageSink.java @@ -75,6 +75,8 @@ import static io.trino.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_NEW_DIRECTORY; import static io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION; import static io.trino.plugin.hive.metastore.file.TestingFileHiveMetastore.createTestingFileHiveMetastore; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMNS; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_TYPES; import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.DateType.DATE; @@ -243,8 +245,8 @@ private static ConnectorPageSource createPageSource(TrinoFileSystemFactory fileS Properties splitProperties = new Properties(); splitProperties.setProperty(FILE_INPUT_FORMAT, config.getHiveStorageFormat().getInputFormat()); splitProperties.setProperty(SERIALIZATION_LIB, config.getHiveStorageFormat().getSerde()); - splitProperties.setProperty("columns", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getName).collect(toImmutableList()))); - splitProperties.setProperty("columns.types", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getHiveType).map(hiveType -> hiveType.getHiveTypeName().toString()).collect(toImmutableList()))); + splitProperties.setProperty(LIST_COLUMNS, Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getName).collect(toImmutableList()))); + splitProperties.setProperty(LIST_COLUMN_TYPES, Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getHiveType).map(hiveType -> hiveType.getHiveTypeName().toString()).collect(toImmutableList()))); HiveSplit split = new HiveSplit( "", location.toString(), diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java index eefff9d7c6a0..399318cfcb80 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java @@ -106,6 +106,9 @@ import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_FACTORY; import static io.trino.plugin.hive.HiveTestUtils.SESSION; import static io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMNS; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_COMMENTS; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_TYPES; import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; import static io.trino.sql.relational.Expressions.field; @@ -507,11 +510,11 @@ public TestPreparer(String tempFilePath, List testColumns, int numRo { OrcSerde serde = new OrcSerde(); schema = new Properties(); - schema.setProperty("columns", + schema.setProperty(LIST_COLUMNS, testColumns.stream() .map(TestColumn::getName) .collect(Collectors.joining(","))); - schema.setProperty("columns.types", + schema.setProperty(LIST_COLUMN_TYPES, testColumns.stream() .map(TestColumn::getType) .collect(Collectors.joining(","))); @@ -659,13 +662,13 @@ private static FileSplit createTestFile( Properties tableProperties = new Properties(); tableProperties.setProperty( - "columns", + LIST_COLUMNS, testColumns.stream() .map(TestColumn::getName) .collect(Collectors.joining(","))); tableProperties.setProperty( - "columns.types", + LIST_COLUMN_COMMENTS, testColumns.stream() .map(TestColumn::getType) .collect(Collectors.joining(","))); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestMetastoreUtil.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestMetastoreUtil.java index bf0a31cd1166..62d1cbe65a61 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestMetastoreUtil.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestMetastoreUtil.java @@ -36,11 +36,18 @@ import java.util.Properties; import static io.airlift.slice.Slices.utf8Slice; +import static io.trino.hive.thrift.metastore.hive_metastoreConstants.FILE_INPUT_FORMAT; +import static io.trino.hive.thrift.metastore.hive_metastoreConstants.FILE_OUTPUT_FORMAT; import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY; import static io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle; +import static io.trino.plugin.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY; import static io.trino.plugin.hive.HiveType.HIVE_STRING; import static io.trino.plugin.hive.metastore.MetastoreUtil.computePartitionKeyFilter; import static io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMNS; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_COMMENTS; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_TYPES; +import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; import static io.trino.spi.type.IntegerType.INTEGER; import static io.trino.spi.type.VarcharType.VARCHAR; import static org.assertj.core.api.Assertions.assertThat; @@ -137,13 +144,13 @@ public class TestMetastoreUtil // Properties expected = MetaStoreUtils.getTableMetadata(TEST_TABLE_WITH_UNSUPPORTED_FIELDS); // expected.remove(COLUMN_NAME_DELIMITER); private static final Map TEST_TABLE_METADATA = ImmutableMap.builder() - .put("bucket_count", "100") + .put(BUCKET_COUNT_PROPERTY, "100") .put("bucket_field_name", "col2,col3") - .put("columns", "col1,col2,col3") - .put("columns.comments", "comment1\0\0") - .put("columns.types", "bigint:binary:string") - .put("file.inputformat", "com.facebook.hive.orc.OrcInputFormat") - .put("file.outputformat", "com.facebook.hive.orc.OrcOutputFormat") + .put(LIST_COLUMNS, "col1,col2,col3") + .put(LIST_COLUMN_COMMENTS, "comment1\0\0") + .put(LIST_COLUMN_TYPES, "bigint:binary:string") + .put(FILE_INPUT_FORMAT, "com.facebook.hive.orc.OrcInputFormat") + .put(FILE_OUTPUT_FORMAT, "com.facebook.hive.orc.OrcOutputFormat") .put("k1", "v1") .put("k2", "v2") .put("k3", "v3") @@ -153,7 +160,7 @@ public class TestMetastoreUtil .put("partition_columns.types", "string:string") .put("sdk1", "sdv1") .put("sdk2", "sdv2") - .put("serialization.lib", "com.facebook.hive.orc.OrcSerde") + .put(SERIALIZATION_LIB, "com.facebook.hive.orc.OrcSerde") .buildOrThrow(); @Test diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java index cad9c391c0a1..416de5dcce8f 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java @@ -61,6 +61,8 @@ import static io.trino.plugin.hive.HiveStorageFormat.ORC; import static io.trino.plugin.hive.HiveTestUtils.getHiveSession; import static io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMNS; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_COMMENTS; import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.IntegerType.INTEGER; @@ -225,12 +227,12 @@ private static Properties getTableProperties() tableProperties.setProperty(FILE_INPUT_FORMAT, ORC.getInputFormat()); tableProperties.setProperty(SERIALIZATION_LIB, ORC.getSerde()); tableProperties.setProperty( - "columns", + LIST_COLUMNS, COLUMNS.stream() .map(HiveColumnHandle::getName) .collect(Collectors.joining(","))); tableProperties.setProperty( - "columns.types", + LIST_COLUMN_COMMENTS, COLUMNS.stream() .map(HiveColumnHandle::getHiveType) .map(HiveType::toString) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetTester.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetTester.java index 5f3ed87812fe..a21ce4788577 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetTester.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetTester.java @@ -116,6 +116,8 @@ import static io.trino.plugin.hive.HiveTestUtils.getHiveSession; import static io.trino.plugin.hive.parquet.ParquetUtil.createPageSource; import static io.trino.plugin.hive.util.HiveUtil.isStructuralType; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMNS; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_TYPES; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.BooleanType.BOOLEAN; import static io.trino.spi.type.Chars.truncateToLengthAndTrimSpaces; @@ -692,8 +694,8 @@ public static void writeParquetColumn( public static Properties createTableProperties(List columnNames, List objectInspectors) { Properties orderTableProperties = new Properties(); - orderTableProperties.setProperty("columns", Joiner.on(',').join(columnNames)); - orderTableProperties.setProperty("columns.types", Joiner.on(',').join(transform(objectInspectors, ObjectInspector::getTypeName))); + orderTableProperties.setProperty(LIST_COLUMNS, Joiner.on(',').join(columnNames)); + orderTableProperties.setProperty(LIST_COLUMN_TYPES, Joiner.on(',').join(transform(objectInspectors, ObjectInspector::getTypeName))); return orderTableProperties; } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetDecimalScaling.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetDecimalScaling.java index 2c8c73f72c4b..c8e59fc1189c 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetDecimalScaling.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetDecimalScaling.java @@ -57,6 +57,8 @@ import static com.google.common.collect.Iterables.transform; import static io.trino.plugin.hive.parquet.TestParquetDecimalScaling.ParquetDecimalInsert.maximumValue; import static io.trino.plugin.hive.parquet.TestParquetDecimalScaling.ParquetDecimalInsert.minimumValue; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMNS; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_COMMENTS; import static io.trino.spi.type.Decimals.overflows; import static io.trino.testing.DataProviders.cartesianProduct; import static io.trino.testing.DataProviders.toDataProvider; @@ -506,8 +508,8 @@ private static void writeParquetDecimalsRecord(Path output, List columnNames, List objectInspectors) { Properties tableProperties = new Properties(); - tableProperties.setProperty("columns", Joiner.on(',').join(columnNames)); - tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(objectInspectors, ObjectInspector::getTypeName))); + tableProperties.setProperty(LIST_COLUMNS, Joiner.on(',').join(columnNames)); + tableProperties.setProperty(LIST_COLUMN_COMMENTS, Joiner.on(',').join(transform(objectInspectors, ObjectInspector::getTypeName))); return tableProperties; } diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCreateTable.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCreateTable.java index 90f0c84113d1..1e75070b1613 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCreateTable.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCreateTable.java @@ -22,6 +22,7 @@ import java.sql.Statement; import java.util.Optional; +import static io.trino.plugin.hive.HiveTableProperties.TRANSACTIONAL; import static io.trino.tempto.assertions.QueryAssert.Row.row; import static io.trino.tests.product.TestGroups.HDP3_ONLY; import static io.trino.tests.product.TestGroups.PROFILE_SPECIFIC_TESTS; @@ -51,7 +52,7 @@ public void testCreateTable() row(null, null, null), row(-42, "abc", -127), row(9223372036854775807L, "abcdefghijklmnopqrstuvwxyz", 32767)); - assertThat(getTableProperty("test_create_table", "transactional")) + assertThat(getTableProperty("test_create_table", TRANSACTIONAL)) // Hive 3 removes "transactional" table property when it has value "false" .isIn(Optional.empty(), Optional.of("false")); onTrino().executeQuery("DROP TABLE test_create_table"); @@ -74,7 +75,7 @@ public void testCreateTableAsSelect() row(null, null, null), row(-42, "abc", -127), row(9223372036854775807L, "abcdefghijklmnopqrstuvwxyz", 32767)); - assertThat(getTableProperty("test_create_table_as_select", "transactional")) + assertThat(getTableProperty("test_create_table_as_select", TRANSACTIONAL)) // Hive 3 removes "transactional" table property when it has value "false" .isIn(Optional.empty(), Optional.of("false")); onTrino().executeQuery("DROP TABLE test_create_table_as_select"); @@ -85,7 +86,7 @@ public void testVerifyEnvironmentHiveTransactionalByDefault() throws SQLException { onHive().executeQuery("CREATE TABLE test_hive_transactional_by_default(a bigint) STORED AS ORC"); - assertThat(getTableProperty("test_hive_transactional_by_default", "transactional")) + assertThat(getTableProperty("test_hive_transactional_by_default", TRANSACTIONAL)) .contains("true"); onHive().executeQuery("DROP TABLE test_hive_transactional_by_default"); } From 526eaf9c81f2f8fd4f31c27e12337869df5dd223 Mon Sep 17 00:00:00 2001 From: Dain Sundstrom Date: Sat, 11 Nov 2023 12:16:33 -0800 Subject: [PATCH 3/3] Replace Properties with Map in Hive readers and writers --- .../hive/BackgroundHiveSplitLoader.java | 3 +- .../plugin/hive/HiveFileWriterFactory.java | 4 +- .../io/trino/plugin/hive/HiveMetadata.java | 3 +- .../plugin/hive/HivePageSourceFactory.java | 4 +- .../plugin/hive/HivePageSourceProvider.java | 3 +- .../java/io/trino/plugin/hive/HiveSplit.java | 10 ++--- .../trino/plugin/hive/HiveWriterFactory.java | 18 ++++---- .../trino/plugin/hive/InternalHiveSplit.java | 10 ++--- .../io/trino/plugin/hive/MergeFileWriter.java | 12 ++---- .../plugin/hive/RcFileFileWriterFactory.java | 7 ++- .../io/trino/plugin/hive/acid/AcidSchema.java | 23 +++++----- .../hive/avro/AvroFileWriterFactory.java | 4 +- .../plugin/hive/avro/AvroHiveFileUtils.java | 27 ++++++------ .../hive/avro/AvroPageSourceFactory.java | 3 +- .../hive/line/LineFileWriterFactory.java | 13 +++--- .../hive/line/LinePageSourceFactory.java | 9 ++-- .../hive/line/RegexFileWriterFactory.java | 4 +- .../plugin/hive/metastore/MetastoreUtil.java | 43 +++++++++---------- .../plugin/hive/orc/OrcFileWriterFactory.java | 4 +- .../plugin/hive/orc/OrcPageSourceFactory.java | 14 +++--- .../parquet/ParquetFileWriterFactory.java | 4 +- .../parquet/ParquetPageSourceFactory.java | 9 ++-- .../hive/rcfile/RcFilePageSourceFactory.java | 13 +++--- .../io/trino/plugin/hive/util/HiveUtil.java | 33 +++++++------- .../hive/util/InternalHiveSplitFactory.java | 7 ++- .../hive/TestBackgroundHiveSplitLoader.java | 18 ++++---- .../plugin/hive/TestHiveFileFormats.java | 30 +++++-------- .../trino/plugin/hive/TestHivePageSink.java | 13 +++--- .../io/trino/plugin/hive/TestHiveSplit.java | 9 ++-- .../plugin/hive/TestHiveSplitSource.java | 13 ++---- .../TestNodeLocalDynamicSplitPruning.java | 8 ++-- .../hive/TestOrcPageSourceMemoryTracking.java | 21 ++++----- .../hive/avro/TestAvroSchemaGeneration.java | 36 +++++++++++----- .../hive/metastore/TestMetastoreUtil.java | 9 ++-- .../hive/orc/TestOrcPageSourceFactory.java | 15 +++---- .../plugin/hive/orc/TestOrcPredicates.java | 27 ++++-------- .../plugin/hive/orc/TestOrcWriterOptions.java | 22 +++++----- .../plugin/hive/parquet/ParquetUtil.java | 6 +-- .../parquet/TestParquetDecimalScaling.java | 4 +- 39 files changed, 238 insertions(+), 277 deletions(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/BackgroundHiveSplitLoader.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/BackgroundHiveSplitLoader.java index 01d252210f40..a79c7a6b2adc 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/BackgroundHiveSplitLoader.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/BackgroundHiveSplitLoader.java @@ -66,7 +66,6 @@ import java.util.Map.Entry; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.concurrent.ConcurrentLinkedDeque; import java.util.concurrent.Executor; import java.util.concurrent.atomic.AtomicInteger; @@ -389,7 +388,7 @@ private ListenableFuture loadPartition(HivePartitionMetadata partition) { HivePartition hivePartition = partition.getHivePartition(); String partitionName = hivePartition.getPartitionId(); - Properties schema = partition.getPartition() + Map schema = partition.getPartition() .map(value -> getHiveSchema(value, table)) .orElseGet(() -> getHiveSchema(table)); List partitionKeys = getPartitionKeys(table, partition.getPartition()); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveFileWriterFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveFileWriterFactory.java index 4efac69d9a8e..05c7feddd4c9 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveFileWriterFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveFileWriterFactory.java @@ -19,9 +19,9 @@ import io.trino.spi.connector.ConnectorSession; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; public interface HiveFileWriterFactory { @@ -30,7 +30,7 @@ Optional createFileWriter( List inputColumnNames, StorageFormat storageFormat, HiveCompressionCodec compressionCodec, - Properties schema, + Map schema, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java index a9b31c68234f..eaa423967fbb 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveMetadata.java @@ -142,7 +142,6 @@ import java.util.Optional; import java.util.OptionalInt; import java.util.OptionalLong; -import java.util.Properties; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.function.Function; @@ -1934,7 +1933,7 @@ private List computeFileNamesForMissingBuckets( private void createEmptyFiles(ConnectorSession session, Location path, Table table, Optional partition, List fileNames) { - Properties schema; + Map schema; StorageFormat format; if (partition.isPresent()) { schema = getHiveSchema(partition.get(), table); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceFactory.java index 308fcb58b915..344063332a3b 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceFactory.java @@ -19,9 +19,9 @@ import io.trino.spi.predicate.TupleDomain; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; public interface HivePageSourceFactory { @@ -31,7 +31,7 @@ Optional createPageSource( long start, long length, long estimatedFileSize, - Properties schema, + Map schema, List columns, TupleDomain effectivePredicate, Optional acidInfo, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java index 113c17480f31..ec34a5814c5a 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java @@ -49,7 +49,6 @@ import java.util.Objects; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.Set; import java.util.regex.Pattern; @@ -173,7 +172,7 @@ public static Optional createHivePageSource( long start, long length, long estimatedFileSize, - Properties schema, + Map schema, TupleDomain effectivePredicate, TypeManager typeManager, Optional bucketConversion, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplit.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplit.java index 36b850648cf1..f93e8a3d6a2a 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplit.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveSplit.java @@ -24,9 +24,9 @@ import io.trino.spi.connector.ConnectorSplit; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import static com.google.common.base.MoreObjects.toStringHelper; import static com.google.common.base.Preconditions.checkArgument; @@ -46,7 +46,7 @@ public class HiveSplit private final long length; private final long estimatedFileSize; private final long fileModifiedTime; - private final Properties schema; + private final Map schema; private final List partitionKeys; private final List addresses; private final String partitionName; @@ -67,7 +67,7 @@ public HiveSplit( @JsonProperty("length") long length, @JsonProperty("estimatedFileSize") long estimatedFileSize, @JsonProperty("fileModifiedTime") long fileModifiedTime, - @JsonProperty("schema") Properties schema, + @JsonProperty("schema") Map schema, @JsonProperty("partitionKeys") List partitionKeys, @JsonProperty("readBucketNumber") OptionalInt readBucketNumber, @JsonProperty("tableBucketNumber") OptionalInt tableBucketNumber, @@ -105,7 +105,7 @@ public HiveSplit( long length, long estimatedFileSize, long fileModifiedTime, - Properties schema, + Map schema, List partitionKeys, List addresses, OptionalInt readBucketNumber, @@ -188,7 +188,7 @@ public long getFileModifiedTime() } @JsonProperty - public Properties getSchema() + public Map getSchema() { return schema; } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveWriterFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveWriterFactory.java index 4c07825cddca..3456966dd53a 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveWriterFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveWriterFactory.java @@ -57,7 +57,6 @@ import java.util.Map.Entry; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.Set; import java.util.UUID; import java.util.function.Consumer; @@ -299,7 +298,7 @@ public HiveWriter createWriter(Page partitionColumns, int position, OptionalInt } UpdateMode updateMode; - Properties schema; + Map schema = new HashMap<>(); WriteInfo writeInfo; StorageFormat outputStorageFormat; HiveCompressionCodec compressionCodec; @@ -308,11 +307,10 @@ public HiveWriter createWriter(Page partitionColumns, int position, OptionalInt // Write to: a new partition in a new partitioned table, // or a new unpartitioned table. updateMode = UpdateMode.NEW; - schema = new Properties(); - schema.setProperty(LIST_COLUMNS, dataColumns.stream() + schema.put(LIST_COLUMNS, dataColumns.stream() .map(DataColumn::getName) .collect(joining(","))); - schema.setProperty(LIST_COLUMN_TYPES, dataColumns.stream() + schema.put(LIST_COLUMN_TYPES, dataColumns.stream() .map(DataColumn::getHiveType) .map(HiveType::getHiveTypeName) .map(HiveTypeName::toString) @@ -371,7 +369,7 @@ public HiveWriter createWriter(Page partitionColumns, int position, OptionalInt } } - schema = getHiveSchema(table); + schema.putAll(getHiveSchema(table)); } if (partitionName.isPresent()) { @@ -417,7 +415,7 @@ public HiveWriter createWriter(Page partitionColumns, int position, OptionalInt outputStorageFormat = partition.get().getStorage().getStorageFormat(); compressionCodec = selectCompressionCodec(session, outputStorageFormat); - schema = getHiveSchema(partition.get(), table); + schema.putAll(getHiveSchema(partition.get(), table)); writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition, partitionName.get()); break; @@ -431,7 +429,7 @@ public HiveWriter createWriter(Page partitionColumns, int position, OptionalInt outputStorageFormat = fromHiveStorageFormat(partitionStorageFormat); compressionCodec = selectCompressionCodec(session, partitionStorageFormat); - schema = getHiveSchema(table); + schema.putAll(getHiveSchema(table)); writeInfo = locationService.getPartitionWriteInfo(locationHandle, Optional.empty(), partitionName.get()); break; @@ -442,7 +440,7 @@ public HiveWriter createWriter(Page partitionColumns, int position, OptionalInt } } - additionalTableParameters.forEach(schema::setProperty); + schema.putAll(additionalTableParameters); validateSchema(partitionName, schema); @@ -623,7 +621,7 @@ public SortingFileWriter makeRowIdSortingWriter(FileWriter deleteFileWriter, Loc OrcFileWriterFactory::createOrcDataSink); } - private void validateSchema(Optional partitionName, Properties schema) + private void validateSchema(Optional partitionName, Map schema) { // existing tables may have columns in a different order List fileColumnNames = getColumnNames(schema); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveSplit.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveSplit.java index ae6c28e0c63b..28b11595a7bc 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveSplit.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/InternalHiveSplit.java @@ -20,9 +20,9 @@ import io.trino.spi.HostAddress; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.function.BooleanSupplier; import static com.google.common.base.MoreObjects.toStringHelper; @@ -37,13 +37,13 @@ @NotThreadSafe public class InternalHiveSplit { - private static final int INSTANCE_SIZE = instanceSize(InternalHiveSplit.class) + instanceSize(Properties.class) + instanceSize(OptionalInt.class); + private static final int INSTANCE_SIZE = instanceSize(InternalHiveSplit.class) + instanceSize(OptionalInt.class); private final String path; private final long end; private final long estimatedFileSize; private final long fileModifiedTime; - private final Properties schema; + private final Map schema; private final List partitionKeys; private final List blocks; private final String partitionName; @@ -67,7 +67,7 @@ public InternalHiveSplit( long end, long estimatedFileSize, long fileModifiedTime, - Properties schema, + Map schema, List partitionKeys, List blocks, OptionalInt readBucketNumber, @@ -141,7 +141,7 @@ public long getFileModifiedTime() return fileModifiedTime; } - public Properties getSchema() + public Map getSchema() { return schema; } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/MergeFileWriter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/MergeFileWriter.java index 432c138320bd..48d39436a608 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/MergeFileWriter.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/MergeFileWriter.java @@ -31,9 +31,9 @@ import java.io.Closeable; import java.io.IOException; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -81,7 +81,7 @@ public class MergeFileWriter private final RowIdSortingFileWriterMaker sortingFileWriterMaker; private final OrcFileWriterFactory orcFileWriterFactory; private final HiveCompressionCodec compressionCodec; - private final Properties hiveAcidSchema; + private final Map hiveAcidSchema; private final String bucketFilename; private Optional deleteFileWriter = Optional.empty(); private Optional insertFileWriter = Optional.empty(); @@ -247,14 +247,12 @@ private Page buildDeletePage(Block rowIds, long writeId) private FileWriter getOrCreateInsertFileWriter() { if (insertFileWriter.isEmpty()) { - Properties schemaCopy = new Properties(); - schemaCopy.putAll(hiveAcidSchema); insertFileWriter = orcFileWriterFactory.createFileWriter( deltaDirectory.appendPath(bucketFilename), ACID_COLUMN_NAMES, fromHiveStorageFormat(ORC), compressionCodec, - schemaCopy, + hiveAcidSchema, session, bucketNumber, transaction, @@ -267,15 +265,13 @@ private FileWriter getOrCreateInsertFileWriter() private FileWriter getOrCreateDeleteFileWriter() { if (deleteFileWriter.isEmpty()) { - Properties schemaCopy = new Properties(); - schemaCopy.putAll(hiveAcidSchema); Location deletePath = deleteDeltaDirectory.appendPath(bucketFilename); FileWriter writer = getWriter(orcFileWriterFactory.createFileWriter( deletePath, ACID_COLUMN_NAMES, fromHiveStorageFormat(ORC), compressionCodec, - schemaCopy, + hiveAcidSchema, session, bucketNumber, transaction, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java index 98717d361a0c..d35a6a863807 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/RcFileFileWriterFactory.java @@ -14,7 +14,6 @@ package io.trino.plugin.hive; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Maps; import com.google.inject.Inject; import io.trino.filesystem.Location; import io.trino.filesystem.TrinoFileSystem; @@ -36,9 +35,9 @@ import java.io.Closeable; import java.io.OutputStream; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.function.Supplier; import static io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext; @@ -91,7 +90,7 @@ public Optional createFileWriter( List inputColumnNames, StorageFormat storageFormat, HiveCompressionCodec compressionCodec, - Properties schema, + Map schema, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, @@ -107,7 +106,7 @@ public Optional createFileWriter( columnEncodingFactory = new BinaryColumnEncodingFactory(timeZone); } else if (COLUMNAR_SERDE_CLASS.equals(storageFormat.getSerde())) { - columnEncodingFactory = new TextColumnEncodingFactory(TextEncodingOptions.fromSchema(Maps.fromProperties(schema))); + columnEncodingFactory = new TextColumnEncodingFactory(TextEncodingOptions.fromSchema(schema)); } else { return Optional.empty(); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/acid/AcidSchema.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/acid/AcidSchema.java index e63a5b8df801..149cdb2a2c3e 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/acid/AcidSchema.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/acid/AcidSchema.java @@ -14,6 +14,7 @@ package io.trino.plugin.hive.acid; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import io.trino.plugin.hive.HiveType; import io.trino.plugin.hive.HiveTypeName; import io.trino.spi.type.RowType; @@ -21,8 +22,8 @@ import io.trino.spi.type.Type; import java.util.List; +import java.util.Map; import java.util.Optional; -import java.util.Properties; import static com.google.common.base.Preconditions.checkArgument; import static io.trino.plugin.hive.HiveType.HIVE_INT; @@ -61,17 +62,17 @@ public final class AcidSchema private AcidSchema() {} - public static Properties createAcidSchema(HiveType rowType) + public static Map createAcidSchema(HiveType rowType) { - Properties hiveAcidSchema = new Properties(); - hiveAcidSchema.setProperty(LIST_COLUMNS, String.join(",", ACID_COLUMN_NAMES)); - // We must supply an accurate row type, because Apache ORC code we don't control has a consistency - // check that the layout of this "row" must agree with the layout of an inserted row. - hiveAcidSchema.setProperty(LIST_COLUMN_TYPES, createAcidColumnHiveTypes(rowType).stream() - .map(HiveType::getHiveTypeName) - .map(HiveTypeName::toString) - .collect(joining(":"))); - return hiveAcidSchema; + return ImmutableMap.builder() + .put(LIST_COLUMNS, String.join(",", ACID_COLUMN_NAMES)) + // We must supply an accurate row type, because Apache ORC code we don't control has a consistency + // check that the layout of this "row" must agree with the layout of an inserted row. + .put(LIST_COLUMN_TYPES, createAcidColumnHiveTypes(rowType).stream() + .map(HiveType::getHiveTypeName) + .map(HiveTypeName::toString) + .collect(joining(":"))) + .buildOrThrow(); } public static Type createRowType(List names, List types) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroFileWriterFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroFileWriterFactory.java index 9b1ca1db2009..53ff9b636361 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroFileWriterFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroFileWriterFactory.java @@ -38,9 +38,9 @@ import java.io.Closeable; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.collect.ImmutableList.toImmutableList; @@ -78,7 +78,7 @@ public Optional createFileWriter( List inputColumnNames, StorageFormat storageFormat, HiveCompressionCodec compressionCodec, - Properties schema, + Map schema, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroHiveFileUtils.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroHiveFileUtils.java index 3a08f13d01d4..b5ca4bfc242f 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroHiveFileUtils.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroHiveFileUtils.java @@ -42,7 +42,6 @@ import java.util.Locale; import java.util.Map; import java.util.Optional; -import java.util.Properties; import java.util.concurrent.atomic.AtomicInteger; import static com.google.common.collect.ImmutableMap.toImmutableMap; @@ -71,17 +70,17 @@ public final class AvroHiveFileUtils private AvroHiveFileUtils() {} // Lifted and shifted from org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.determineSchemaOrThrowException - public static Schema determineSchemaOrThrowException(TrinoFileSystem fileSystem, Properties properties) + public static Schema determineSchemaOrThrowException(TrinoFileSystem fileSystem, Map properties) throws IOException { // Try pull schema from literal table property - String schemaString = properties.getProperty(SCHEMA_LITERAL, ""); + String schemaString = properties.getOrDefault(SCHEMA_LITERAL, ""); if (!schemaString.isBlank() && !schemaString.equals(SCHEMA_NONE)) { return getSchemaParser().parse(schemaString); } // Try pull schema directly from URL - String schemaURL = properties.getProperty(SCHEMA_URL, ""); + String schemaURL = properties.getOrDefault(SCHEMA_URL, ""); if (!schemaURL.isBlank()) { TrinoInputFile schemaFile = fileSystem.newInputFile(Location.of(schemaURL)); if (!schemaFile.exists()) { @@ -97,32 +96,32 @@ public static Schema determineSchemaOrThrowException(TrinoFileSystem fileSystem, return getSchemaFromProperties(properties); } - private static Schema getSchemaFromProperties(Properties properties) + private static Schema getSchemaFromProperties(Map schema) throws IOException { - List columnNames = getColumnNames(properties); - List columnTypes = getColumnTypes(properties); + List columnNames = getColumnNames(schema); + List columnTypes = getColumnTypes(schema); if (columnNames.isEmpty() || columnTypes.isEmpty()) { - throw new IOException("Unable to parse column names or column types from job properties to create Avro Schema"); + throw new IOException("Unable to parse column names or column types from schema to create Avro Schema"); } if (columnNames.size() != columnTypes.size()) { throw new IllegalArgumentException("Avro Schema initialization failed. Number of column name and column type differs. columnNames = %s, columnTypes = %s".formatted(columnNames, columnTypes)); } - List columnComments = Optional.ofNullable(properties.getProperty(LIST_COLUMN_COMMENTS)) + List columnComments = Optional.ofNullable(schema.get(LIST_COLUMN_COMMENTS)) .filter(not(String::isBlank)) .map(Splitter.on('\0')::splitToList) .orElse(emptyList()); - final String tableName = properties.getProperty(TABLE_NAME); - final String tableComment = properties.getProperty(TABLE_COMMENT); + String tableName = schema.get(TABLE_NAME); + String tableComment = schema.get(TABLE_COMMENT); return constructSchemaFromParts( columnNames, columnTypes, columnComments, - Optional.ofNullable(properties.getProperty(SCHEMA_NAMESPACE)), - Optional.ofNullable(properties.getProperty(SCHEMA_NAME, tableName)), - Optional.ofNullable(properties.getProperty(SCHEMA_DOC, tableComment))); + Optional.ofNullable(schema.get(SCHEMA_NAMESPACE)), + Optional.ofNullable(schema.getOrDefault(SCHEMA_NAME, tableName)), + Optional.ofNullable(schema.getOrDefault(SCHEMA_DOC, tableComment))); } private static Schema constructSchemaFromParts(List columnNames, List columnTypes, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroPageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroPageSourceFactory.java index c390680d5985..e3fc466634a7 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroPageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroPageSourceFactory.java @@ -47,7 +47,6 @@ import java.util.Objects; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.Set; import java.util.UUID; @@ -86,7 +85,7 @@ public Optional createPageSource( long start, long length, long estimatedFileSize, - Properties schema, + Map schema, List columns, TupleDomain effectivePredicate, Optional acidInfo, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/LineFileWriterFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/LineFileWriterFactory.java index 74afba8e4357..343311340d22 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/LineFileWriterFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/LineFileWriterFactory.java @@ -41,13 +41,12 @@ import java.io.IOException; import java.io.OutputStream; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.stream.IntStream; import static com.google.common.collect.ImmutableList.toImmutableList; -import static com.google.common.collect.Maps.fromProperties; import static io.trino.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext; import static io.trino.plugin.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT; import static io.trino.plugin.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR; @@ -87,7 +86,7 @@ public Optional createFileWriter( List inputColumnNames, StorageFormat storageFormat, HiveCompressionCodec compressionCodec, - Properties schema, + Map schema, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, @@ -114,7 +113,7 @@ public Optional createFileWriter( .mapToObj(ordinal -> new Column(fileColumnNames.get(ordinal), fileColumnTypes.get(ordinal), ordinal)) .toList(); - LineSerializer lineSerializer = lineSerializerFactory.create(columns, fromProperties(schema)); + LineSerializer lineSerializer = lineSerializerFactory.create(columns, schema); try { TrinoFileSystem fileSystem = fileSystemFactory.create(session); @@ -141,10 +140,10 @@ public Optional createFileWriter( } } - private Optional getFileHeader(Properties schema, List columns) + private Optional getFileHeader(Map schema, List columns) throws IOException { - String skipHeaderCount = schema.getProperty(SKIP_HEADER_COUNT_KEY, "0"); + String skipHeaderCount = schema.getOrDefault(SKIP_HEADER_COUNT_KEY, "0"); if (skipHeaderCount.equals("0")) { return Optional.empty(); } @@ -157,7 +156,7 @@ private Optional getFileHeader(Properties schema, List columns) columns.stream() .map(column -> new Column(column.name(), VARCHAR, column.ordinal())) .collect(toImmutableList()), - fromProperties(schema)); + schema); PageBuilder pageBuilder = new PageBuilder(headerSerializer.getTypes()); pageBuilder.declarePosition(); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/LinePageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/LinePageSourceFactory.java index eae741258e6a..b937f4af9009 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/LinePageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/LinePageSourceFactory.java @@ -13,7 +13,6 @@ */ package io.trino.plugin.hive.line; -import com.google.common.collect.Maps; import io.airlift.slice.Slices; import io.airlift.units.DataSize; import io.airlift.units.DataSize.Unit; @@ -40,9 +39,9 @@ import java.io.InputStream; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.collect.ImmutableList.toImmutableList; @@ -83,7 +82,7 @@ public Optional createPageSource( long start, long length, long estimatedFileSize, - Properties schema, + Map schema, List columns, TupleDomain effectivePredicate, Optional acidInfo, @@ -91,7 +90,7 @@ public Optional createPageSource( boolean originalFile, AcidTransaction transaction) { - if (!lineReaderFactory.getHiveOutputFormatClassName().equals(schema.getProperty(FILE_INPUT_FORMAT)) || + if (!lineReaderFactory.getHiveOutputFormatClassName().equals(schema.get(FILE_INPUT_FORMAT)) || !lineDeserializerFactory.getHiveSerDeClassNames().contains(getDeserializerClassName(schema))) { return Optional.empty(); } @@ -124,7 +123,7 @@ public Optional createPageSource( projectedReaderColumns.stream() .map(column -> new Column(column.getName(), column.getType(), column.getBaseHiveColumnIndex())) .collect(toImmutableList()), - Maps.fromProperties(schema)); + schema); } // Skip empty inputs diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/RegexFileWriterFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/RegexFileWriterFactory.java index ae3bc7d2a984..6aa8cc6517c0 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/RegexFileWriterFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/RegexFileWriterFactory.java @@ -24,9 +24,9 @@ import io.trino.spi.connector.ConnectorSession; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import static io.trino.plugin.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR; import static io.trino.plugin.hive.util.HiveClassNames.REGEX_SERDE_CLASS; @@ -40,7 +40,7 @@ public Optional createFileWriter( List inputColumnNames, StorageFormat storageFormat, HiveCompressionCodec compressionCodec, - Properties schema, + Map schema, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/MetastoreUtil.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/MetastoreUtil.java index b32d0eab64da..40596d8aef2c 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/MetastoreUtil.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/MetastoreUtil.java @@ -51,7 +51,6 @@ import java.util.Map; import java.util.Objects; import java.util.Optional; -import java.util.Properties; import java.util.concurrent.TimeUnit; import static com.google.common.base.Preconditions.checkArgument; @@ -91,7 +90,7 @@ public final class MetastoreUtil private MetastoreUtil() {} - public static Properties getHiveSchema(Table table) + public static Map getHiveSchema(Table table) { // Mimics function in Hive: MetaStoreUtils.getTableMetadata(Table) return getHiveSchema( @@ -104,7 +103,7 @@ public static Properties getHiveSchema(Table table) table.getPartitionColumns()); } - public static Properties getHiveSchema(Partition partition, Table table) + public static Map getHiveSchema(Partition partition, Table table) { // Mimics function in Hive: MetaStoreUtils.getSchema(Partition, Table) return getHiveSchema( @@ -117,7 +116,7 @@ public static Properties getHiveSchema(Partition partition, Table table) table.getPartitionColumns()); } - private static Properties getHiveSchema( + private static Map getHiveSchema( Storage sd, Optional tableSd, List tableDataColumns, @@ -129,33 +128,33 @@ private static Properties getHiveSchema( // Mimics function in Hive: // MetaStoreUtils.getSchema(StorageDescriptor, StorageDescriptor, Map, String, String, List) - Properties schema = new Properties(); + ImmutableMap.Builder schema = ImmutableMap.builder(); - schema.setProperty(FILE_INPUT_FORMAT, sd.getStorageFormat().getInputFormat()); - schema.setProperty(FILE_OUTPUT_FORMAT, sd.getStorageFormat().getOutputFormat()); + schema.put(FILE_INPUT_FORMAT, sd.getStorageFormat().getInputFormat()); + schema.put(FILE_OUTPUT_FORMAT, sd.getStorageFormat().getOutputFormat()); - schema.setProperty(META_TABLE_NAME, databaseName + "." + tableName); - schema.setProperty(META_TABLE_LOCATION, sd.getLocation()); + schema.put(META_TABLE_NAME, databaseName + "." + tableName); + schema.put(META_TABLE_LOCATION, sd.getLocation()); if (sd.getBucketProperty().isPresent()) { - schema.setProperty(BUCKET_FIELD_NAME, Joiner.on(",").join(sd.getBucketProperty().get().getBucketedBy())); - schema.setProperty(BUCKET_COUNT, Integer.toString(sd.getBucketProperty().get().getBucketCount())); + schema.put(BUCKET_FIELD_NAME, Joiner.on(",").join(sd.getBucketProperty().get().getBucketedBy())); + schema.put(BUCKET_COUNT, Integer.toString(sd.getBucketProperty().get().getBucketCount())); } else { - schema.setProperty(BUCKET_COUNT, "0"); + schema.put(BUCKET_COUNT, "0"); } for (Map.Entry param : sd.getSerdeParameters().entrySet()) { - schema.setProperty(param.getKey(), (param.getValue() != null) ? param.getValue() : ""); + schema.put(param.getKey(), (param.getValue() != null) ? param.getValue() : ""); } if (sd.getStorageFormat().getSerde().equals(AVRO_SERDE_CLASS) && tableSd.isPresent()) { for (Map.Entry param : tableSd.get().getSerdeParameters().entrySet()) { - schema.setProperty(param.getKey(), nullToEmpty(param.getValue())); + schema.put(param.getKey(), nullToEmpty(param.getValue())); } } - schema.setProperty(SERIALIZATION_LIB, sd.getStorageFormat().getSerde()); + schema.put(SERIALIZATION_LIB, sd.getStorageFormat().getSerde()); StringBuilder columnNameBuilder = new StringBuilder(); StringBuilder columnTypeBuilder = new StringBuilder(); @@ -174,9 +173,9 @@ private static Properties getHiveSchema( } String columnNames = columnNameBuilder.toString(); String columnTypes = columnTypeBuilder.toString(); - schema.setProperty(META_TABLE_COLUMNS, columnNames); - schema.setProperty(META_TABLE_COLUMN_TYPES, columnTypes); - schema.setProperty(LIST_COLUMN_COMMENTS, columnCommentBuilder.toString()); + schema.put(META_TABLE_COLUMNS, columnNames); + schema.put(META_TABLE_COLUMN_TYPES, columnTypes); + schema.put(LIST_COLUMN_COMMENTS, columnCommentBuilder.toString()); StringBuilder partString = new StringBuilder(); String partStringSep = ""; @@ -193,20 +192,20 @@ private static Properties getHiveSchema( } } if (partString.length() > 0) { - schema.setProperty(META_TABLE_PARTITION_COLUMNS, partString.toString()); - schema.setProperty(META_TABLE_PARTITION_COLUMN_TYPES, partTypesString.toString()); + schema.put(META_TABLE_PARTITION_COLUMNS, partString.toString()); + schema.put(META_TABLE_PARTITION_COLUMN_TYPES, partTypesString.toString()); } if (parameters != null) { for (Map.Entry entry : parameters.entrySet()) { // add non-null parameters to the schema if (entry.getValue() != null) { - schema.setProperty(entry.getKey(), entry.getValue()); + schema.put(entry.getKey(), entry.getValue()); } } } - return schema; + return schema.buildKeepingLast(); } public static ProtectMode getProtectMode(Partition partition) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcFileWriterFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcFileWriterFactory.java index 8ad0a0bd51c3..35359facdadd 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcFileWriterFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcFileWriterFactory.java @@ -44,9 +44,9 @@ import java.io.Closeable; import java.io.IOException; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.function.Supplier; import static io.trino.orc.metadata.OrcType.createRootOrcType; @@ -125,7 +125,7 @@ public Optional createFileWriter( List inputColumnNames, StorageFormat storageFormat, HiveCompressionCodec compressionCodec, - Properties schema, + Map schema, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcPageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcPageSourceFactory.java index 19bc4e238ddc..aca3b9c3f715 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcPageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcPageSourceFactory.java @@ -15,7 +15,6 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Maps; import com.google.inject.Inject; import io.airlift.slice.Slice; import io.trino.filesystem.Location; @@ -61,7 +60,6 @@ import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -160,12 +158,10 @@ public OrcPageSourceFactory( this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); } - public static Properties stripUnnecessaryProperties(Properties schema) + public static Map stripUnnecessaryProperties(Map schema) { - if (ORC_SERDE_CLASS.equals(getDeserializerClassName(schema)) && !isFullAcidTable(Maps.fromProperties(schema))) { - Properties stripped = new Properties(); - stripped.put(SERIALIZATION_LIB, schema.getProperty(SERIALIZATION_LIB)); - return stripped; + if (ORC_SERDE_CLASS.equals(getDeserializerClassName(schema)) && !isFullAcidTable(schema)) { + return ImmutableMap.of(SERIALIZATION_LIB, schema.get(SERIALIZATION_LIB)); } return schema; } @@ -177,7 +173,7 @@ public Optional createPageSource( long start, long length, long estimatedFileSize, - Properties schema, + Map schema, List columns, TupleDomain effectivePredicate, Optional acidInfo, @@ -207,7 +203,7 @@ public Optional createPageSource( readerColumnHandles, columns, isUseOrcColumnNames(session), - isFullAcidTable(Maps.fromProperties(schema)), + isFullAcidTable(schema), effectivePredicate, legacyTimeZone, orcReaderOptions diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetFileWriterFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetFileWriterFactory.java index 49faff70afda..5d830e5440c8 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetFileWriterFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetFileWriterFactory.java @@ -43,9 +43,9 @@ import java.io.Closeable; import java.io.IOException; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.function.Supplier; import static io.trino.parquet.writer.ParquetSchemaConverter.HIVE_PARQUET_USE_INT96_TIMESTAMP_ENCODING; @@ -90,7 +90,7 @@ public Optional createFileWriter( List inputColumnNames, StorageFormat storageFormat, HiveCompressionCodec compressionCodec, - Properties schema, + Map schema, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java index 5dd96adc51d0..6c2ce5cde12a 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java @@ -69,7 +69,6 @@ import java.util.Optional; import java.util.OptionalInt; import java.util.OptionalLong; -import java.util.Properties; import java.util.Set; import static com.google.common.base.Preconditions.checkArgument; @@ -148,12 +147,10 @@ public ParquetPageSourceFactory( domainCompactionThreshold = hiveConfig.getDomainCompactionThreshold(); } - public static Properties stripUnnecessaryProperties(Properties schema) + public static Map stripUnnecessaryProperties(Map schema) { if (PARQUET_SERDE_CLASS_NAMES.contains(getDeserializerClassName(schema))) { - Properties stripped = new Properties(); - stripped.put(SERIALIZATION_LIB, schema.getProperty(SERIALIZATION_LIB)); - return stripped; + return ImmutableMap.of(SERIALIZATION_LIB, schema.get(SERIALIZATION_LIB)); } return schema; } @@ -165,7 +162,7 @@ public Optional createPageSource( long start, long length, long estimatedFileSize, - Properties schema, + Map schema, List columns, TupleDomain effectivePredicate, Optional acidInfo, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java index 389eb0408c3a..a077d12dccc2 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java @@ -14,7 +14,6 @@ package io.trino.plugin.hive.rcfile; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Maps; import com.google.inject.Inject; import io.airlift.slice.Slices; import io.airlift.units.DataSize; @@ -47,9 +46,9 @@ import java.io.InputStream; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.collect.ImmutableList.toImmutableList; @@ -80,12 +79,10 @@ public RcFilePageSourceFactory(TrinoFileSystemFactory fileSystemFactory, HiveCon this.timeZone = hiveConfig.getRcfileDateTimeZone(); } - public static Properties stripUnnecessaryProperties(Properties schema) + public static Map stripUnnecessaryProperties(Map schema) { if (LAZY_BINARY_COLUMNAR_SERDE_CLASS.equals(getDeserializerClassName(schema))) { - Properties stripped = new Properties(); - stripped.put(SERIALIZATION_LIB, schema.getProperty(SERIALIZATION_LIB)); - return stripped; + return ImmutableMap.of(SERIALIZATION_LIB, schema.get(SERIALIZATION_LIB)); } return schema; } @@ -97,7 +94,7 @@ public Optional createPageSource( long start, long length, long estimatedFileSize, - Properties schema, + Map schema, List columns, TupleDomain effectivePredicate, Optional acidInfo, @@ -111,7 +108,7 @@ public Optional createPageSource( columnEncodingFactory = new BinaryColumnEncodingFactory(timeZone); } else if (deserializerClassName.equals(COLUMNAR_SERDE_CLASS)) { - columnEncodingFactory = new TextColumnEncodingFactory(TextEncodingOptions.fromSchema(Maps.fromProperties(schema))); + columnEncodingFactory = new TextColumnEncodingFactory(TextEncodingOptions.fromSchema(schema)); } else { return Optional.empty(); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveUtil.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveUtil.java index cdd87f1cb87a..8d370af19d55 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveUtil.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveUtil.java @@ -65,7 +65,6 @@ import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.function.Function; import static com.google.common.base.Strings.isNullOrEmpty; @@ -184,9 +183,9 @@ private HiveUtil() { } - public static Optional getInputFormatName(Properties schema) + public static Optional getInputFormatName(Map schema) { - return Optional.ofNullable(schema.getProperty(FILE_INPUT_FORMAT)); + return Optional.ofNullable(schema.get(FILE_INPUT_FORMAT)); } private static long parseHiveDate(String value) @@ -203,9 +202,9 @@ public static long parseHiveTimestamp(String value) return HIVE_TIMESTAMP_PARSER.parseMillis(value) * MICROSECONDS_PER_MILLISECOND; } - public static String getDeserializerClassName(Properties schema) + public static String getDeserializerClassName(Map schema) { - String name = schema.getProperty(SERIALIZATION_LIB); + String name = schema.get(SERIALIZATION_LIB); checkCondition(name != null, HIVE_INVALID_METADATA, "Table or partition is missing Hive deserializer property: %s", SERIALIZATION_LIB); return name; } @@ -710,19 +709,19 @@ public static List extractStructFieldTypes(HiveType hiveType) .collect(toImmutableList()); } - public static int getHeaderCount(Properties schema) + public static int getHeaderCount(Map schema) { return getPositiveIntegerValue(schema, SKIP_HEADER_COUNT_KEY, "0"); } - public static int getFooterCount(Properties schema) + public static int getFooterCount(Map schema) { return getPositiveIntegerValue(schema, SKIP_FOOTER_COUNT_KEY, "0"); } - private static int getPositiveIntegerValue(Properties schema, String key, String defaultValue) + private static int getPositiveIntegerValue(Map schema, String key, String defaultValue) { - String value = schema.getProperty(key, defaultValue); + String value = schema.getOrDefault(key, defaultValue); try { int intValue = parseInt(value); if (intValue < 0) { @@ -735,30 +734,30 @@ private static int getPositiveIntegerValue(Properties schema, String key, String } } - public static List getColumnNames(Properties schema) + public static List getColumnNames(Map schema) { - return COLUMN_NAMES_SPLITTER.splitToList(schema.getProperty(LIST_COLUMNS, "")); + return COLUMN_NAMES_SPLITTER.splitToList(schema.getOrDefault(LIST_COLUMNS, "")); } - public static List getColumnTypes(Properties schema) + public static List getColumnTypes(Map schema) { - return toHiveTypes(schema.getProperty(LIST_COLUMN_TYPES, "")); + return toHiveTypes(schema.getOrDefault(LIST_COLUMN_TYPES, "")); } - public static OrcWriterOptions getOrcWriterOptions(Properties schema, OrcWriterOptions orcWriterOptions) + public static OrcWriterOptions getOrcWriterOptions(Map schema, OrcWriterOptions orcWriterOptions) { if (schema.containsKey(ORC_BLOOM_FILTER_COLUMNS_KEY)) { if (!schema.containsKey(ORC_BLOOM_FILTER_FPP_KEY)) { throw new TrinoException(HIVE_INVALID_METADATA, "FPP for bloom filter is missing"); } try { - double fpp = parseDouble(schema.getProperty(ORC_BLOOM_FILTER_FPP_KEY)); + double fpp = parseDouble(schema.get(ORC_BLOOM_FILTER_FPP_KEY)); return orcWriterOptions - .withBloomFilterColumns(ImmutableSet.copyOf(COLUMN_NAMES_SPLITTER.splitToList(schema.getProperty(ORC_BLOOM_FILTER_COLUMNS_KEY)))) + .withBloomFilterColumns(ImmutableSet.copyOf(COLUMN_NAMES_SPLITTER.splitToList(schema.get(ORC_BLOOM_FILTER_COLUMNS_KEY)))) .withBloomFilterFpp(fpp); } catch (NumberFormatException e) { - throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, format("Invalid value for %s property: %s", ORC_BLOOM_FILTER_FPP, schema.getProperty(ORC_BLOOM_FILTER_FPP_KEY))); + throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, format("Invalid value for %s property: %s", ORC_BLOOM_FILTER_FPP, schema.get(ORC_BLOOM_FILTER_FPP_KEY))); } } return orcWriterOptions; diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/InternalHiveSplitFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/InternalHiveSplitFactory.java index 3395cfd426a9..2e6588d3d918 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/InternalHiveSplitFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/InternalHiveSplitFactory.java @@ -38,7 +38,6 @@ import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.function.BooleanSupplier; import static com.google.common.base.Preconditions.checkArgument; @@ -51,7 +50,7 @@ public class InternalHiveSplitFactory { private final String partitionName; private final HiveStorageFormat storageFormat; - private final Properties strippedSchema; + private final Map strippedSchema; private final List partitionKeys; private final Optional pathDomain; private final TableToPartitionMapping tableToPartitionMapping; @@ -65,7 +64,7 @@ public class InternalHiveSplitFactory public InternalHiveSplitFactory( String partitionName, HiveStorageFormat storageFormat, - Properties schema, + Map schema, List partitionKeys, TupleDomain effectivePredicate, BooleanSupplier partitionMatchSupplier, @@ -91,7 +90,7 @@ public InternalHiveSplitFactory( checkArgument(minimumTargetSplitSizeInBytes > 0, "minimumTargetSplitSize must be > 0, found: %s", minimumTargetSplitSize); } - private static Properties stripUnnecessaryProperties(Properties schema) + private static Map stripUnnecessaryProperties(Map schema) { // Sending the full schema with every split is costly and can be avoided for formats supported natively schema = OrcPageSourceFactory.stripUnnecessaryProperties(schema); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestBackgroundHiveSplitLoader.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestBackgroundHiveSplitLoader.java index 6b0e8067c432..64a89f0076fc 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestBackgroundHiveSplitLoader.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestBackgroundHiveSplitLoader.java @@ -16,6 +16,7 @@ import com.google.common.collect.AbstractIterator; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ListMultimap; import com.google.common.io.Resources; import io.airlift.stats.CounterStat; @@ -64,7 +65,6 @@ import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.CountDownLatch; @@ -170,7 +170,7 @@ public void testCsv() assertCsvSplitCount(file, Map.of(), 33); assertCsvSplitCount(file, Map.of(HEADER_COUNT, "1"), 33); assertCsvSplitCount(file, Map.of(HEADER_COUNT, "2"), 1); - assertCsvSplitCount(file, Map.of(HEADER_COUNT, "1"), 1); + assertCsvSplitCount(file, Map.of(FOOTER_COUNT, "1"), 1); assertCsvSplitCount(file, Map.of(HEADER_COUNT, "1", FOOTER_COUNT, "1"), 1); } @@ -780,9 +780,10 @@ public void testBuildManifestFileIterator() throws IOException { CachingDirectoryLister directoryLister = new CachingDirectoryLister(new Duration(0, TimeUnit.MINUTES), DataSize.ofBytes(0), List.of()); - Properties schema = new Properties(); - schema.setProperty(FILE_INPUT_FORMAT, SYMLINK_TEXT_INPUT_FORMAT_CLASS); - schema.setProperty(SERIALIZATION_LIB, AVRO.getSerde()); + Map schema = ImmutableMap.builder() + .put(FILE_INPUT_FORMAT, SYMLINK_TEXT_INPUT_FORMAT_CLASS) + .put(SERIALIZATION_LIB, AVRO.getSerde()) + .buildOrThrow(); Location firstFilePath = Location.of("memory:///db_name/table_name/file1"); Location secondFilePath = Location.of("memory:///db_name/table_name/file2"); @@ -820,9 +821,10 @@ public void testBuildManifestFileIteratorNestedDirectory() throws IOException { CachingDirectoryLister directoryLister = new CachingDirectoryLister(new Duration(5, TimeUnit.MINUTES), DataSize.of(100, KILOBYTE), List.of()); - Properties schema = new Properties(); - schema.setProperty(FILE_INPUT_FORMAT, SYMLINK_TEXT_INPUT_FORMAT_CLASS); - schema.setProperty(SERIALIZATION_LIB, AVRO.getSerde()); + Map schema = ImmutableMap.builder() + .put(FILE_INPUT_FORMAT, SYMLINK_TEXT_INPUT_FORMAT_CLASS) + .put(SERIALIZATION_LIB, AVRO.getSerde()) + .buildOrThrow(); Location filePath = Location.of("memory:///db_name/table_name/file1"); Location directoryPath = Location.of("memory:///db_name/table_name/dir/file2"); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java index 902291918503..a598b26c166c 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java @@ -940,10 +940,6 @@ private static void testPageSourceFactory( int rowCount) throws IOException { - Properties splitProperties = new Properties(); - splitProperties.setProperty(FILE_INPUT_FORMAT, storageFormat.getInputFormat()); - splitProperties.setProperty(SERIALIZATION_LIB, storageFormat.getSerde()); - // Use full columns in split properties ImmutableList.Builder splitPropertiesColumnNames = ImmutableList.builder(); ImmutableList.Builder splitPropertiesColumnTypes = ImmutableList.builder(); @@ -958,8 +954,12 @@ private static void testPageSourceFactory( } } - splitProperties.setProperty(LIST_COLUMNS, String.join(",", splitPropertiesColumnNames.build())); - splitProperties.setProperty(LIST_COLUMN_TYPES, String.join(",", splitPropertiesColumnTypes.build())); + Map splitProperties = ImmutableMap.builder() + .put(FILE_INPUT_FORMAT, storageFormat.getInputFormat()) + .put(SERIALIZATION_LIB, storageFormat.getSerde()) + .put(LIST_COLUMNS, String.join(",", splitPropertiesColumnNames.build())) + .put(LIST_COLUMN_TYPES, String.join(",", splitPropertiesColumnTypes.build())) + .buildOrThrow(); List partitionKeys = testReadColumns.stream() .filter(TestColumn::partitionKey) @@ -1319,20 +1319,10 @@ private static void createTestFileTrino( } Page page = pageBuilder.build(); - Properties tableProperties = new Properties(); - tableProperties.setProperty( - LIST_COLUMNS, - testColumns.stream() - .map(TestColumn::name) - .collect(Collectors.joining(","))); - - tableProperties.setProperty( - LIST_COLUMN_TYPES, - testColumns.stream() - .map(TestColumn::type) - .map(HiveType::toHiveType) - .map(HiveType::toString) - .collect(Collectors.joining(","))); + Map tableProperties = ImmutableMap.builder() + .put(LIST_COLUMNS, testColumns.stream().map(TestColumn::name).collect(Collectors.joining(","))) + .put(LIST_COLUMN_TYPES, testColumns.stream().map(TestColumn::type).map(HiveType::toHiveType).map(HiveType::toString).collect(Collectors.joining(","))) + .buildOrThrow(); Optional fileWriter = fileWriterFactory.createFileWriter( location, diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePageSink.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePageSink.java index b530e08accd6..09974bb0654b 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePageSink.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHivePageSink.java @@ -53,9 +53,9 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.stream.Stream; import static com.google.common.collect.ImmutableList.toImmutableList; @@ -242,11 +242,12 @@ private static ConnectorPageSource createPageSource(TrinoFileSystemFactory fileS throws IOException { long length = fileSystemFactory.create(ConnectorIdentity.ofUser("test")).newInputFile(location).length(); - Properties splitProperties = new Properties(); - splitProperties.setProperty(FILE_INPUT_FORMAT, config.getHiveStorageFormat().getInputFormat()); - splitProperties.setProperty(SERIALIZATION_LIB, config.getHiveStorageFormat().getSerde()); - splitProperties.setProperty(LIST_COLUMNS, Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getName).collect(toImmutableList()))); - splitProperties.setProperty(LIST_COLUMN_TYPES, Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getHiveType).map(hiveType -> hiveType.getHiveTypeName().toString()).collect(toImmutableList()))); + Map splitProperties = ImmutableMap.builder() + .put(FILE_INPUT_FORMAT, config.getHiveStorageFormat().getInputFormat()) + .put(SERIALIZATION_LIB, config.getHiveStorageFormat().getSerde()) + .put(LIST_COLUMNS, Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getName).collect(toImmutableList()))) + .put(LIST_COLUMN_TYPES, Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getHiveType).map(hiveType -> hiveType.getHiveTypeName().toString()).collect(toImmutableList()))) + .buildOrThrow(); HiveSplit split = new HiveSplit( "", location.toString(), diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplit.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplit.java index b8a264a76a38..068aec23941b 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplit.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplit.java @@ -28,9 +28,9 @@ import org.junit.jupiter.api.Test; import java.time.Instant; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import static io.trino.plugin.hive.HiveColumnHandle.createBaseColumn; import static io.trino.plugin.hive.HiveType.HIVE_LONG; @@ -47,9 +47,10 @@ public void testJsonRoundTrip() objectMapperProvider.setJsonDeserializers(ImmutableMap.of(Type.class, new TypeDeserializer(new TestingTypeManager()))); JsonCodec codec = new JsonCodecFactory(objectMapperProvider).jsonCodec(HiveSplit.class); - Properties schema = new Properties(); - schema.setProperty("foo", "bar"); - schema.setProperty("bar", "baz"); + Map schema = ImmutableMap.builder() + .put("foo", "bar") + .put("bar", "baz") + .buildOrThrow(); ImmutableList partitionKeys = ImmutableList.of(new HivePartitionKey("a", "apple"), new HivePartitionKey("b", "42")); ImmutableList addresses = ImmutableList.of(HostAddress.fromParts("127.0.0.1", 44), HostAddress.fromParts("127.0.0.1", 45)); diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplitSource.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplitSource.java index 9325a2729633..516fa6e1c5cc 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplitSource.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveSplitSource.java @@ -14,6 +14,7 @@ package io.trino.plugin.hive; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.SettableFuture; import io.airlift.stats.CounterStat; import io.airlift.units.DataSize; @@ -25,7 +26,6 @@ import java.util.List; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.concurrent.CountDownLatch; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; @@ -231,7 +231,7 @@ public void testReaderWaitsForSplits() // wait for thread to get the split ConnectorSplit split = splits.get(800, TimeUnit.MILLISECONDS); - assertEquals(((HiveSplit) split).getSchema().getProperty("id"), "33"); + assertEquals(((HiveSplit) split).getSchema().get("id"), "33"); } finally { // make sure the thread exits @@ -325,7 +325,7 @@ private TestSplit(int id, OptionalInt bucketNumber, DataSize fileSize, BooleanSu fileSize.toBytes(), fileSize.toBytes(), Instant.now().toEpochMilli(), - properties("id", String.valueOf(id)), + ImmutableMap.of("id", String.valueOf(id)), ImmutableList.of(), ImmutableList.of(new InternalHiveBlock(0, fileSize.toBytes(), ImmutableList.of())), bucketNumber, @@ -338,12 +338,5 @@ private TestSplit(int id, OptionalInt bucketNumber, DataSize fileSize, BooleanSu Optional.empty(), partitionMatchSupplier); } - - private static Properties properties(String key, String value) - { - Properties properties = new Properties(); - properties.setProperty(key, value); - return properties; - } } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestNodeLocalDynamicSplitPruning.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestNodeLocalDynamicSplitPruning.java index 1122c6ccbff2..805b1217cd16 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestNodeLocalDynamicSplitPruning.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestNodeLocalDynamicSplitPruning.java @@ -40,7 +40,6 @@ import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.Set; import java.util.concurrent.CompletableFuture; @@ -118,9 +117,10 @@ private static ConnectorPageSource createTestingPageSource(HiveTransactionHandle TrinoFileSystemFactory fileSystemFactory = new MemoryFileSystemFactory(); fileSystemFactory.create(ConnectorIdentity.ofUser("test")).newOutputFile(location).create().close(); - Properties splitProperties = new Properties(); - splitProperties.setProperty(FILE_INPUT_FORMAT, hiveConfig.getHiveStorageFormat().getInputFormat()); - splitProperties.setProperty(SERIALIZATION_LIB, hiveConfig.getHiveStorageFormat().getSerde()); + Map splitProperties = ImmutableMap.builder() + .put(FILE_INPUT_FORMAT, hiveConfig.getHiveStorageFormat().getInputFormat()) + .put(SERIALIZATION_LIB, hiveConfig.getHiveStorageFormat().getSerde()) + .buildOrThrow(); HiveSplit split = new HiveSplit( "", location.toString(), diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java index 399318cfcb80..b9138e10f2e0 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java @@ -14,6 +14,7 @@ package io.trino.plugin.hive; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import io.airlift.slice.Slice; import io.airlift.stats.Distribution; @@ -84,6 +85,7 @@ import java.time.Instant; import java.util.Arrays; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; import java.util.Properties; @@ -491,7 +493,7 @@ public void testScanFilterAndProjectOperator() private class TestPreparer { private final FileSplit fileSplit; - private final Properties schema; + private final Map schema; private final List columns; private final List types; private final String partitionName; @@ -509,17 +511,12 @@ public TestPreparer(String tempFilePath, List testColumns, int numRo throws Exception { OrcSerde serde = new OrcSerde(); - schema = new Properties(); - schema.setProperty(LIST_COLUMNS, - testColumns.stream() - .map(TestColumn::getName) - .collect(Collectors.joining(","))); - schema.setProperty(LIST_COLUMN_TYPES, - testColumns.stream() - .map(TestColumn::getType) - .collect(Collectors.joining(","))); - schema.setProperty(FILE_INPUT_FORMAT, OrcInputFormat.class.getName()); - schema.setProperty(SERIALIZATION_LIB, serde.getClass().getName()); + schema = ImmutableMap.builder() + .put(LIST_COLUMNS, testColumns.stream().map(TestColumn::getName).collect(Collectors.joining(","))) + .put(LIST_COLUMN_TYPES, testColumns.stream().map(TestColumn::getType).collect(Collectors.joining(","))) + .put(FILE_INPUT_FORMAT, OrcInputFormat.class.getName()) + .put(SERIALIZATION_LIB, serde.getClass().getName()) + .buildOrThrow(); partitionKeys = testColumns.stream() .filter(TestColumn::isPartitionKey) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/avro/TestAvroSchemaGeneration.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/avro/TestAvroSchemaGeneration.java index 2e67ab71b940..cdc380572c74 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/avro/TestAvroSchemaGeneration.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/avro/TestAvroSchemaGeneration.java @@ -13,24 +13,28 @@ */ package io.trino.plugin.hive.avro; +import com.google.common.collect.ImmutableMap; import io.trino.filesystem.local.LocalFileSystem; import io.trino.plugin.hive.HiveType; import io.trino.plugin.hive.type.TypeInfo; import io.trino.spi.type.RowType; -import io.trino.spi.type.VarcharType; import org.apache.avro.Schema; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; import org.junit.jupiter.api.Test; import java.nio.file.Path; +import java.util.Map; import java.util.Properties; import java.util.stream.Collectors; import java.util.stream.Stream; +import static io.trino.plugin.hive.HiveType.HIVE_STRING; +import static io.trino.plugin.hive.HiveType.toHiveType; import static io.trino.plugin.hive.avro.AvroHiveConstants.TABLE_NAME; import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMNS; import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_TYPES; +import static io.trino.spi.type.VarcharType.VARCHAR; import static org.assertj.core.api.Assertions.assertThat; public class TestAvroSchemaGeneration @@ -39,12 +43,13 @@ public class TestAvroSchemaGeneration public void testOldVsNewSchemaGeneration() throws Exception { - Properties properties = new Properties(); - properties.setProperty(TABLE_NAME, "testingTable"); - properties.setProperty(LIST_COLUMNS, "a,b"); - properties.setProperty(LIST_COLUMN_TYPES, Stream.of(HiveType.HIVE_INT, HiveType.HIVE_STRING).map(HiveType::getTypeInfo).map(TypeInfo::toString).collect(Collectors.joining(","))); + Map properties = ImmutableMap.builder() + .put(TABLE_NAME, "testingTable") + .put(LIST_COLUMNS, "a,b") + .put(LIST_COLUMN_TYPES, Stream.of(HiveType.HIVE_INT, HIVE_STRING).map(HiveType::getTypeInfo).map(TypeInfo::toString).collect(Collectors.joining(","))) + .buildOrThrow(); Schema actual = AvroHiveFileUtils.determineSchemaOrThrowException(new LocalFileSystem(Path.of("/")), properties); - Schema expected = AvroSerdeUtils.determineSchemaOrThrowException(new Configuration(false), properties); + Schema expected = AvroSerdeUtils.determineSchemaOrThrowException(new Configuration(false), toProperties(properties)); assertThat(actual).isEqualTo(expected); } @@ -52,12 +57,21 @@ public void testOldVsNewSchemaGeneration() public void testOldVsNewSchemaGenerationWithNested() throws Exception { - Properties properties = new Properties(); - properties.setProperty(TABLE_NAME, "testingTable"); - properties.setProperty(LIST_COLUMNS, "a,b"); - properties.setProperty(LIST_COLUMN_TYPES, Stream.of(HiveType.toHiveType(RowType.rowType(RowType.field("a", VarcharType.VARCHAR))), HiveType.HIVE_STRING).map(HiveType::getTypeInfo).map(TypeInfo::toString).collect(Collectors.joining(","))); + Map properties = ImmutableMap.builder() + .put(TABLE_NAME, "testingTable") + .put(LIST_COLUMNS, "a,b") + .put(LIST_COLUMN_TYPES, toHiveType(RowType.rowType(RowType.field("a", VARCHAR))) + "," + HIVE_STRING) + .buildOrThrow(); Schema actual = AvroHiveFileUtils.determineSchemaOrThrowException(new LocalFileSystem(Path.of("/")), properties); - Schema expected = AvroSerdeUtils.determineSchemaOrThrowException(new Configuration(false), properties); + + Schema expected = AvroSerdeUtils.determineSchemaOrThrowException(new Configuration(false), toProperties(properties)); assertThat(actual).isEqualTo(expected); } + + private static Properties toProperties(Map properties) + { + Properties hiveProperties = new Properties(); + hiveProperties.putAll(properties); + return hiveProperties; + } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestMetastoreUtil.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestMetastoreUtil.java index 62d1cbe65a61..07cc8fd8b7f5 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestMetastoreUtil.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/TestMetastoreUtil.java @@ -33,7 +33,6 @@ import java.util.Locale; import java.util.Map; import java.util.Optional; -import java.util.Properties; import static io.airlift.slice.Slices.utf8Slice; import static io.trino.hive.thrift.metastore.hive_metastoreConstants.FILE_INPUT_FORMAT; @@ -182,14 +181,14 @@ public void testPartitionRoundTrip() @Test public void testHiveSchemaTable() { - Properties actual = MetastoreUtil.getHiveSchema(ThriftMetastoreUtil.fromMetastoreApiTable(TEST_TABLE_WITH_UNSUPPORTED_FIELDS, TEST_SCHEMA)); + Map actual = MetastoreUtil.getHiveSchema(ThriftMetastoreUtil.fromMetastoreApiTable(TEST_TABLE_WITH_UNSUPPORTED_FIELDS, TEST_SCHEMA)); assertEquals(actual, TEST_TABLE_METADATA); } @Test public void testHiveSchemaPartition() { - Properties actual = MetastoreUtil.getHiveSchema(ThriftMetastoreUtil.fromMetastoreApiPartition(TEST_PARTITION_WITH_UNSUPPORTED_FIELDS), ThriftMetastoreUtil.fromMetastoreApiTable(TEST_TABLE_WITH_UNSUPPORTED_FIELDS, TEST_SCHEMA)); + Map actual = MetastoreUtil.getHiveSchema(ThriftMetastoreUtil.fromMetastoreApiPartition(TEST_PARTITION_WITH_UNSUPPORTED_FIELDS), ThriftMetastoreUtil.fromMetastoreApiTable(TEST_TABLE_WITH_UNSUPPORTED_FIELDS, TEST_SCHEMA)); assertEquals(actual, TEST_TABLE_METADATA); } @@ -199,10 +198,10 @@ public void testHiveSchemaCaseInsensitive() List testSchema = TEST_SCHEMA.stream() .map(fieldSchema -> new FieldSchema(fieldSchema.getName(), fieldSchema.getType().toUpperCase(Locale.ENGLISH), fieldSchema.getComment())) .toList(); - Properties actualTable = MetastoreUtil.getHiveSchema(ThriftMetastoreUtil.fromMetastoreApiTable(TEST_TABLE_WITH_UNSUPPORTED_FIELDS, testSchema)); + Map actualTable = MetastoreUtil.getHiveSchema(ThriftMetastoreUtil.fromMetastoreApiTable(TEST_TABLE_WITH_UNSUPPORTED_FIELDS, testSchema)); assertEquals(actualTable, TEST_TABLE_METADATA); - Properties actualPartition = MetastoreUtil.getHiveSchema(ThriftMetastoreUtil.fromMetastoreApiPartition(TEST_PARTITION_WITH_UNSUPPORTED_FIELDS), ThriftMetastoreUtil.fromMetastoreApiTable(TEST_TABLE_WITH_UNSUPPORTED_FIELDS, testSchema)); + Map actualPartition = MetastoreUtil.getHiveSchema(ThriftMetastoreUtil.fromMetastoreApiPartition(TEST_PARTITION_WITH_UNSUPPORTED_FIELDS), ThriftMetastoreUtil.fromMetastoreApiTable(TEST_TABLE_WITH_UNSUPPORTED_FIELDS, testSchema)); assertEquals(actualPartition, TEST_TABLE_METADATA); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPageSourceFactory.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPageSourceFactory.java index 3ad44049fd8c..2dcdd5e86dda 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPageSourceFactory.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPageSourceFactory.java @@ -45,13 +45,13 @@ import java.util.Optional; import java.util.OptionalInt; import java.util.OptionalLong; -import java.util.Properties; import java.util.Set; import java.util.function.LongPredicate; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.io.Resources.getResource; +import static io.trino.hive.thrift.metastore.hive_metastoreConstants.FILE_INPUT_FORMAT; import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR; import static io.trino.plugin.hive.HiveColumnHandle.createBaseColumn; import static io.trino.plugin.hive.HiveStorageFormat.ORC; @@ -68,7 +68,6 @@ import static io.trino.tpch.NationColumn.NATION_KEY; import static io.trino.tpch.NationColumn.REGION_KEY; import static java.util.Collections.nCopies; -import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; @@ -328,13 +327,13 @@ private static HiveColumnHandle toHiveColumnHandle(NationColumn nationColumn, in Optional.empty()); } - private static Properties createSchema() + private static Map createSchema() { - Properties schema = new Properties(); - schema.setProperty(SERIALIZATION_LIB, ORC.getSerde()); - schema.setProperty(FILE_INPUT_FORMAT, ORC.getInputFormat()); - schema.setProperty(TRANSACTIONAL, "true"); - return schema; + return ImmutableMap.builder() + .put(SERIALIZATION_LIB, ORC.getSerde()) + .put(FILE_INPUT_FORMAT, ORC.getInputFormat()) + .put(TRANSACTIONAL, "true") + .buildOrThrow(); } private static void assertEqualsByColumns(Set columns, List actualRows, List expectedRows) diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java index 416de5dcce8f..bf028866b7ea 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java @@ -49,9 +49,9 @@ import java.io.IOException; import java.time.Instant; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.stream.Collectors; import static io.trino.hive.thrift.metastore.hive_metastoreConstants.FILE_INPUT_FORMAT; @@ -62,7 +62,7 @@ import static io.trino.plugin.hive.HiveTestUtils.getHiveSession; import static io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION; import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMNS; -import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_COMMENTS; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_TYPES; import static io.trino.plugin.hive.util.SerdeConstants.SERIALIZATION_LIB; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.IntegerType.INTEGER; @@ -221,22 +221,13 @@ private static void writeTestFile(ConnectorSession session, TrinoFileSystemFacto fileWriter.commit(); } - private static Properties getTableProperties() + private static Map getTableProperties() { - Properties tableProperties = new Properties(); - tableProperties.setProperty(FILE_INPUT_FORMAT, ORC.getInputFormat()); - tableProperties.setProperty(SERIALIZATION_LIB, ORC.getSerde()); - tableProperties.setProperty( - LIST_COLUMNS, - COLUMNS.stream() - .map(HiveColumnHandle::getName) - .collect(Collectors.joining(","))); - tableProperties.setProperty( - LIST_COLUMN_COMMENTS, - COLUMNS.stream() - .map(HiveColumnHandle::getHiveType) - .map(HiveType::toString) - .collect(Collectors.joining(","))); - return tableProperties; + return ImmutableMap.builder() + .put(FILE_INPUT_FORMAT, ORC.getInputFormat()) + .put(SERIALIZATION_LIB, ORC.getSerde()) + .put(LIST_COLUMNS, COLUMNS.stream().map(HiveColumnHandle::getName).collect(Collectors.joining(","))) + .put(LIST_COLUMN_TYPES, COLUMNS.stream().map(HiveColumnHandle::getHiveType).map(HiveType::toString).collect(Collectors.joining(","))) + .buildOrThrow(); } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcWriterOptions.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcWriterOptions.java index a8b52d6db664..3164d0d68c99 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcWriterOptions.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcWriterOptions.java @@ -13,11 +13,12 @@ */ package io.trino.plugin.hive.orc; +import com.google.common.collect.ImmutableMap; import io.airlift.units.DataSize; import io.trino.orc.OrcWriterOptions; import org.junit.jupiter.api.Test; -import java.util.Properties; +import java.util.Map; import static io.airlift.units.DataSize.Unit.KILOBYTE; import static io.airlift.units.DataSize.Unit.MEGABYTE; @@ -76,9 +77,10 @@ public void testOrcWriterOptionsFromOrcWriterConfig() @Test public void testOrcWriterOptionsFromTableProperties() { - Properties tableProperties = new Properties(); - tableProperties.setProperty(ORC_BLOOM_FILTER_COLUMNS_KEY, "column_a, column_b"); - tableProperties.setProperty(ORC_BLOOM_FILTER_FPP_KEY, "0.5"); + Map tableProperties = ImmutableMap.builder() + .put(ORC_BLOOM_FILTER_COLUMNS_KEY, "column_a, column_b") + .put(ORC_BLOOM_FILTER_FPP_KEY, "0.5") + .buildOrThrow(); OrcWriterOptions orcWriterOptions = getOrcWriterOptions(tableProperties, new OrcWriterOptions()); assertThat(orcWriterOptions.getBloomFilterFpp()).isEqualTo(0.5); assertThat(orcWriterOptions.isBloomFilterColumn("column_a")).isTrue(); @@ -89,7 +91,7 @@ public void testOrcWriterOptionsFromTableProperties() @Test public void testOrcWriterOptionsWithInvalidFPPValue() { - Properties tableProperties = createTablePropertiesWithFpp("abc"); + Map tableProperties = createTablePropertiesWithFpp("abc"); assertThatThrownBy(() -> getOrcWriterOptions(tableProperties, new OrcWriterOptions())) .hasMessage("Invalid value for orc_bloom_filter_fpp property: abc"); } @@ -107,11 +109,11 @@ public void testOrcBloomFilterWithInvalidRange() .hasMessage("bloomFilterFpp should be > 0.0 & < 1.0"); } - private static Properties createTablePropertiesWithFpp(String fpp) + private static Map createTablePropertiesWithFpp(String fpp) { - Properties properties = new Properties(); - properties.setProperty(ORC_BLOOM_FILTER_COLUMNS_KEY, "column_with_bloom_filter"); - properties.setProperty(ORC_BLOOM_FILTER_FPP_KEY, fpp); - return properties; + return ImmutableMap.builder() + .put(ORC_BLOOM_FILTER_COLUMNS_KEY, "column_with_bloom_filter") + .put(ORC_BLOOM_FILTER_FPP_KEY, fpp) + .buildOrThrow(); } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetUtil.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetUtil.java index dfddba972a4f..30511eb29743 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetUtil.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetUtil.java @@ -13,6 +13,7 @@ */ package io.trino.plugin.hive.parquet; +import com.google.common.collect.ImmutableMap; import io.trino.filesystem.Location; import io.trino.filesystem.memory.MemoryFileSystemFactory; import io.trino.plugin.hive.FileFormatDataSourceStats; @@ -33,7 +34,6 @@ import java.util.List; import java.util.Optional; import java.util.OptionalInt; -import java.util.Properties; import java.util.stream.IntStream; import static com.google.common.base.Preconditions.checkArgument; @@ -70,15 +70,13 @@ public static ConnectorPageSource createPageSource(ConnectorSession session, Fil new ParquetReaderConfig(), new HiveConfig()); - Properties schema = new Properties(); - schema.setProperty(SERIALIZATION_LIB, HiveStorageFormat.PARQUET.getSerde()); return hivePageSourceFactory.createPageSource( session, location, 0, parquetFile.length(), parquetFile.length(), - schema, + ImmutableMap.of(SERIALIZATION_LIB, HiveStorageFormat.PARQUET.getSerde()), columns, domain, Optional.empty(), diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetDecimalScaling.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetDecimalScaling.java index c8e59fc1189c..f9316bac2646 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetDecimalScaling.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestParquetDecimalScaling.java @@ -58,7 +58,7 @@ import static io.trino.plugin.hive.parquet.TestParquetDecimalScaling.ParquetDecimalInsert.maximumValue; import static io.trino.plugin.hive.parquet.TestParquetDecimalScaling.ParquetDecimalInsert.minimumValue; import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMNS; -import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_COMMENTS; +import static io.trino.plugin.hive.util.SerdeConstants.LIST_COLUMN_TYPES; import static io.trino.spi.type.Decimals.overflows; import static io.trino.testing.DataProviders.cartesianProduct; import static io.trino.testing.DataProviders.toDataProvider; @@ -509,7 +509,7 @@ private static Properties createTableProperties(List columnNames, List