From 7b4390824709a0c513ddb921d7846cd6bf0a46bc Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Thu, 24 Oct 2024 07:55:23 +0900 Subject: [PATCH] Add modernizer rule to disallow Glue Table.getStorageDescriptor --- .mvn/modernizer/violations.xml | 6 ++++++ .../glue/v1/converter/GlueInputConverter.java | 3 ++- .../v1/converter/GlueToTrinoConverter.java | 15 +++++++++++---- .../glue/v1/TestGlueToTrinoConverter.java | 19 ++++++++++--------- .../glue/GlueIcebergTableOperations.java | 4 +++- .../iceberg/catalog/glue/GlueIcebergUtil.java | 2 +- .../catalog/glue/TrinoGlueCatalog.java | 11 +++++++---- ...tIcebergGlueCatalogConnectorSmokeTest.java | 13 +++++++------ 8 files changed, 47 insertions(+), 26 deletions(-) diff --git a/.mvn/modernizer/violations.xml b/.mvn/modernizer/violations.xml index bd4590c55611..12045c09097b 100644 --- a/.mvn/modernizer/violations.xml +++ b/.mvn/modernizer/violations.xml @@ -150,6 +150,12 @@ Use AssertJ's assertThatThrownBy, see https://github.com/trinodb/trino/issues/5320 for rationale + + com/amazonaws/services/glue/model/Table.getStorageDescriptor:()Lcom/amazonaws/services/glue/model/StorageDescriptor; + 1.1 + Storage descriptor is nullable in Glue model, which is too easy to forget about. Prefer GlueToTrinoConverter.getStorageDescriptor + + com/amazonaws/services/glue/model/Table.getTableType:()Ljava/lang/String; 1.1 diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/converter/GlueInputConverter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/converter/GlueInputConverter.java index 5ed0a2266b31..1d12277e87d4 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/converter/GlueInputConverter.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/converter/GlueInputConverter.java @@ -48,6 +48,7 @@ import static io.trino.plugin.hive.metastore.MetastoreUtil.metastoreFunctionName; import static io.trino.plugin.hive.metastore.MetastoreUtil.toResourceUris; import static io.trino.plugin.hive.metastore.MetastoreUtil.updateStatisticsParameters; +import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getStorageDescriptor; import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableParameters; import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableTypeNullable; @@ -100,7 +101,7 @@ public static TableInput convertGlueTableToTableInput(com.amazonaws.services.glu .withLastAccessTime(glueTable.getLastAccessTime()) .withLastAnalyzedTime(glueTable.getLastAnalyzedTime()) .withRetention(glueTable.getRetention()) - .withStorageDescriptor(glueTable.getStorageDescriptor()) + .withStorageDescriptor(getStorageDescriptor(glueTable).orElse(null)) .withPartitionKeys(glueTable.getPartitionKeys()) .withViewOriginalText(glueTable.getViewOriginalText()) .withViewExpandedText(glueTable.getViewExpandedText()) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/converter/GlueToTrinoConverter.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/converter/GlueToTrinoConverter.java index 2c599a545afe..549ca17beb36 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/converter/GlueToTrinoConverter.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/glue/v1/converter/GlueToTrinoConverter.java @@ -68,6 +68,12 @@ public final class GlueToTrinoConverter private GlueToTrinoConverter() {} + @SuppressModernizer // Usage of `Table.getStorageDescriptor` is not allowed. Only this method can call that. + public static Optional getStorageDescriptor(com.amazonaws.services.glue.model.Table glueTable) + { + return Optional.ofNullable(glueTable.getStorageDescriptor()); + } + @SuppressModernizer // Usage of `Column.getParameters` is not allowed. Only this method can call that. public static Map getColumnParameters(com.amazonaws.services.glue.model.Column glueColumn) { @@ -144,11 +150,11 @@ public static Table convertTable(com.amazonaws.services.glue.model.Table glueTab .setViewOriginalText(Optional.ofNullable(glueTable.getViewOriginalText())) .setViewExpandedText(Optional.ofNullable(glueTable.getViewExpandedText())); - StorageDescriptor sd = glueTable.getStorageDescriptor(); + Optional storageDescriptor = getStorageDescriptor(glueTable); if (isIcebergTable(tableParameters) || - (sd == null && isDeltaLakeTable(tableParameters)) || - (sd == null && isTrinoMaterializedView(tableType, tableParameters))) { + (storageDescriptor.isEmpty() && isDeltaLakeTable(tableParameters)) || + (storageDescriptor.isEmpty() && isTrinoMaterializedView(tableType, tableParameters))) { // Iceberg tables do not need to read the StorageDescriptor field, but we still need to return dummy properties for compatibility // Delta Lake tables only need to provide a dummy properties if a StorageDescriptor was not explicitly configured. // Materialized views do not need to read the StorageDescriptor, but we still need to return dummy properties for compatibility @@ -156,9 +162,10 @@ public static Table convertTable(com.amazonaws.services.glue.model.Table glueTab tableBuilder.getStorageBuilder().setStorageFormat(HiveStorageFormat.PARQUET.toStorageFormat()); } else { - if (sd == null) { + if (storageDescriptor.isEmpty()) { throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, "Table StorageDescriptor is null for table '%s' %s".formatted(table, glueTable)); } + StorageDescriptor sd = storageDescriptor.get(); if (sd.getSerdeInfo() == null) { throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, "Table SerdeInfo is null for table '%s' %s".formatted(table, glueTable)); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/v1/TestGlueToTrinoConverter.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/v1/TestGlueToTrinoConverter.java index 914772bfbc9b..345e11007fb1 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/v1/TestGlueToTrinoConverter.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/glue/v1/TestGlueToTrinoConverter.java @@ -45,6 +45,7 @@ import static io.trino.plugin.hive.metastore.glue.v1.TestingMetastoreObjects.getGlueTestTable; import static io.trino.plugin.hive.metastore.glue.v1.TestingMetastoreObjects.getGlueTestTrinoMaterializedView; import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getPartitionParameters; +import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getStorageDescriptor; import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableParameters; import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableTypeNullable; import static io.trino.plugin.hive.util.HiveUtil.DELTA_LAKE_PROVIDER; @@ -97,9 +98,9 @@ public void testConvertTable() assertThat(trinoTable.getTableType()).isEqualTo(getTableTypeNullable(testTable)); assertThat(trinoTable.getOwner().orElse(null)).isEqualTo(testTable.getOwner()); assertThat(trinoTable.getParameters()).isEqualTo(getTableParameters(testTable)); - assertColumnList(trinoTable.getDataColumns(), testTable.getStorageDescriptor().getColumns()); + assertColumnList(trinoTable.getDataColumns(), getStorageDescriptor(testTable).orElseThrow().getColumns()); assertColumnList(trinoTable.getPartitionColumns(), testTable.getPartitionKeys()); - assertStorage(trinoTable.getStorage(), testTable.getStorageDescriptor()); + assertStorage(trinoTable.getStorage(), getStorageDescriptor(testTable).orElseThrow()); assertThat(trinoTable.getViewOriginalText().get()).isEqualTo(testTable.getViewOriginalText()); assertThat(trinoTable.getViewExpandedText().get()).isEqualTo(testTable.getViewExpandedText()); } @@ -122,7 +123,7 @@ public void testConvertTableWithOpenCSVSerDe() assertThat(trinoTable.getDataColumns().get(0).getType()).isEqualTo(HIVE_STRING); assertColumnList(trinoTable.getPartitionColumns(), glueTable.getPartitionKeys()); - assertStorage(trinoTable.getStorage(), glueTable.getStorageDescriptor()); + assertStorage(trinoTable.getStorage(), getStorageDescriptor(glueTable).orElseThrow()); assertThat(trinoTable.getViewOriginalText().get()).isEqualTo(glueTable.getViewOriginalText()); assertThat(trinoTable.getViewExpandedText().get()).isEqualTo(glueTable.getViewExpandedText()); } @@ -148,7 +149,7 @@ public void testConvertTableNullPartitions() public void testConvertTableUppercaseColumnType() { com.amazonaws.services.glue.model.Column uppercaseColumn = getGlueTestColumn().withType("String"); - testTable.getStorageDescriptor().setColumns(ImmutableList.of(uppercaseColumn)); + getStorageDescriptor(testTable).orElseThrow().setColumns(ImmutableList.of(uppercaseColumn)); GlueToTrinoConverter.convertTable(testTable, testDatabase.getName()); } @@ -211,7 +212,7 @@ public void testDatabaseNullParameters() public void testTableNullParameters() { testTable.setParameters(null); - testTable.getStorageDescriptor().getSerdeInfo().setParameters(null); + getStorageDescriptor(testTable).orElseThrow().getSerdeInfo().setParameters(null); io.trino.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testTable, testDatabase.getName()); assertThat(trinoTable.getParameters()).isNotNull(); assertThat(trinoTable.getStorage().getSerdeParameters()).isNotNull(); @@ -230,7 +231,7 @@ public void testIcebergTableNullStorageDescriptor() public void testIcebergTableNonNullStorageDescriptor() { testTable.setParameters(ImmutableMap.of(ICEBERG_TABLE_TYPE_NAME, ICEBERG_TABLE_TYPE_VALUE)); - assertThat(testTable.getStorageDescriptor()).isNotNull(); + assertThat(getStorageDescriptor(testTable)).isPresent(); io.trino.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testTable, testDatabase.getName()); assertThat(trinoTable.getDataColumns().size()).isEqualTo(1); } @@ -248,11 +249,11 @@ public void testDeltaTableNullStorageDescriptor() public void testDeltaTableNonNullStorageDescriptor() { testTable.setParameters(ImmutableMap.of(SPARK_TABLE_PROVIDER_KEY, DELTA_LAKE_PROVIDER)); - assertThat(testTable.getStorageDescriptor()).isNotNull(); + assertThat(getStorageDescriptor(testTable)).isPresent(); io.trino.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testTable, testDatabase.getName()); assertThat(trinoTable.getDataColumns().stream() .map(Column::getName) - .collect(toImmutableSet())).isEqualTo(testTable.getStorageDescriptor().getColumns().stream() + .collect(toImmutableSet())).isEqualTo(getStorageDescriptor(testTable).orElseThrow().getColumns().stream() .map(com.amazonaws.services.glue.model.Column::getName) .collect(toImmutableSet())); } @@ -261,7 +262,7 @@ public void testDeltaTableNonNullStorageDescriptor() public void testIcebergMaterializedViewNullStorageDescriptor() { Table testMaterializedView = getGlueTestTrinoMaterializedView(testDatabase.getName()); - assertThat(testMaterializedView.getStorageDescriptor()).isNull(); + assertThat(getStorageDescriptor(testMaterializedView)).isEmpty(); io.trino.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testMaterializedView, testDatabase.getName()); assertThat(trinoTable.getDataColumns().size()).isEqualTo(1); } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperations.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperations.java index 1dc87ae74556..60aef66c65b8 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperations.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergTableOperations.java @@ -21,6 +21,7 @@ import com.amazonaws.services.glue.model.EntityNotFoundException; import com.amazonaws.services.glue.model.InvalidInputException; import com.amazonaws.services.glue.model.ResourceNumberLimitExceededException; +import com.amazonaws.services.glue.model.StorageDescriptor; import com.amazonaws.services.glue.model.Table; import com.amazonaws.services.glue.model.TableInput; import com.amazonaws.services.glue.model.UpdateTableRequest; @@ -47,6 +48,7 @@ import static com.google.common.base.Verify.verify; import static io.trino.plugin.hive.ViewReaderUtil.isTrinoMaterializedView; import static io.trino.plugin.hive.ViewReaderUtil.isTrinoView; +import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getStorageDescriptor; import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableParameters; import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableType; import static io.trino.plugin.hive.util.HiveUtil.isIcebergTable; @@ -166,7 +168,7 @@ protected void commitToExistingTable(TableMetadata base, TableMetadata metadata) tableName, owner, metadata, - table.getStorageDescriptor().getLocation(), + getStorageDescriptor(table).map(StorageDescriptor::getLocation).orElse(null), newMetadataLocation, ImmutableMap.of(PREVIOUS_METADATA_LOCATION_PROP, currentMetadataLocation), cacheTableMetadata)); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergUtil.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergUtil.java index b48c9d916eaf..e95cb57ee231 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergUtil.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/GlueIcebergUtil.java @@ -69,7 +69,7 @@ public static TableInput getTableInput( String tableName, Optional owner, TableMetadata metadata, - String tableLocation, + @Nullable String tableLocation, String newMetadataLocation, Map parameters, boolean cacheTableMetadata) diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java index 38235c2236bf..e003357edef5 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/catalog/glue/TrinoGlueCatalog.java @@ -31,6 +31,7 @@ import com.amazonaws.services.glue.model.GetTableRequest; import com.amazonaws.services.glue.model.GetTablesRequest; import com.amazonaws.services.glue.model.GetTablesResult; +import com.amazonaws.services.glue.model.StorageDescriptor; import com.amazonaws.services.glue.model.TableInput; import com.amazonaws.services.glue.model.UpdateTableRequest; import com.google.common.cache.Cache; @@ -125,6 +126,7 @@ import static io.trino.plugin.hive.ViewReaderUtil.isTrinoView; import static io.trino.plugin.hive.metastore.glue.v1.AwsSdkUtil.getPaginatedResults; import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getColumnParameters; +import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getStorageDescriptor; import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableParameters; import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableType; import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableTypeNullable; @@ -639,13 +641,14 @@ private Optional> getCachedColumnMetadata(com.amazonaws.ser Map tableParameters = getTableParameters(glueTable); String metadataLocation = tableParameters.get(METADATA_LOCATION_PROP); String metadataValidForMetadata = tableParameters.get(TRINO_TABLE_METADATA_INFO_VALID_FOR); + Optional storageDescriptor = getStorageDescriptor(glueTable); if (metadataLocation == null || !metadataLocation.equals(metadataValidForMetadata) || - glueTable.getStorageDescriptor() == null || - glueTable.getStorageDescriptor().getColumns() == null) { + storageDescriptor.isEmpty() || + storageDescriptor.get().getColumns() == null) { return Optional.empty(); } - List glueColumns = glueTable.getStorageDescriptor().getColumns(); + List glueColumns = storageDescriptor.get().getColumns(); if (glueColumns.stream().noneMatch(column -> getColumnParameters(column).containsKey(COLUMN_TRINO_TYPE_ID_PROPERTY))) { // No column has type parameter, maybe the parameters were erased return Optional.empty(); @@ -804,7 +807,7 @@ public void renameTable(ConnectorSession session, SchemaTableName from, SchemaTa to.getTableName(), Optional.ofNullable(table.getOwner()), metadata, - table.getStorageDescriptor().getLocation(), + getStorageDescriptor(table).map(StorageDescriptor::getLocation).orElse(null), metadataLocation, tableParameters, cacheTableMetadata); diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogConnectorSmokeTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogConnectorSmokeTest.java index 2da7088e11f9..56e13fb9c1e1 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogConnectorSmokeTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/glue/TestIcebergGlueCatalogConnectorSmokeTest.java @@ -55,6 +55,7 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static io.trino.plugin.hive.metastore.glue.v1.AwsSdkUtil.getPaginatedResults; +import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getStorageDescriptor; import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableParameters; import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableType; import static io.trino.plugin.iceberg.IcebergTestUtils.checkParquetFileSorting; @@ -155,25 +156,25 @@ public void testRenameSchema() void testGlueTableLocation() { try (TestTable table = new TestTable(getQueryRunner()::execute, "test_table_location", "AS SELECT 1 x")) { - String initialLocation = getGlueTable(table.getName()).getStorageDescriptor().getLocation(); - assertThat(getGlueTable(table.getName()).getStorageDescriptor().getLocation()) + String initialLocation = getStorageDescriptor(getGlueTable(table.getName())).orElseThrow().getLocation(); + assertThat(getStorageDescriptor(getGlueTable(table.getName())).orElseThrow().getLocation()) // Using startsWith because the location has UUID suffix .startsWith("%s/%s.db/%s".formatted(schemaPath(), schemaName, table.getName())); assertUpdate("INSERT INTO " + table.getName() + " VALUES 2", 1); Table glueTable = getGlueTable(table.getName()); - assertThat(glueTable.getStorageDescriptor().getLocation()) + assertThat(getStorageDescriptor(glueTable).orElseThrow().getLocation()) .isEqualTo(initialLocation); String newTableLocation = initialLocation + "_new"; updateTableLocation(glueTable, newTableLocation); assertUpdate("INSERT INTO " + table.getName() + " VALUES 3", 1); - assertThat(getGlueTable(table.getName()).getStorageDescriptor().getLocation()) + assertThat(getStorageDescriptor(getGlueTable(table.getName())).orElseThrow().getLocation()) .isEqualTo(newTableLocation); assertUpdate("CALL system.unregister_table(CURRENT_SCHEMA, '" + table.getName() + "')"); assertUpdate("CALL system.register_table(CURRENT_SCHEMA, '" + table.getName() + "', '" + initialLocation + "')"); - assertThat(getGlueTable(table.getName()).getStorageDescriptor().getLocation()) + assertThat(getStorageDescriptor(getGlueTable(table.getName())).orElseThrow().getLocation()) .isEqualTo(initialLocation); } } @@ -189,7 +190,7 @@ private void updateTableLocation(Table table, String newLocation) TableInput tableInput = new TableInput() .withName(table.getName()) .withTableType(getTableType(table)) - .withStorageDescriptor(table.getStorageDescriptor().withLocation(newLocation)) + .withStorageDescriptor(getStorageDescriptor(table).orElseThrow().withLocation(newLocation)) .withParameters(getTableParameters(table)); UpdateTableRequest updateTableRequest = new UpdateTableRequest() .withDatabaseName(schemaName)