Skip to content

Commit

Permalink
Add modernizer rule to disallow Glue Table.getStorageDescriptor
Browse files Browse the repository at this point in the history
  • Loading branch information
ebyhr committed Oct 23, 2024
1 parent 1c1fa15 commit 7b43908
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 26 deletions.
6 changes: 6 additions & 0 deletions .mvn/modernizer/violations.xml
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,12 @@
<comment>Use AssertJ's assertThatThrownBy, see https://github.com/trinodb/trino/issues/5320 for rationale</comment>
</violation>

<violation>
<name>com/amazonaws/services/glue/model/Table.getStorageDescriptor:()Lcom/amazonaws/services/glue/model/StorageDescriptor;</name>
<version>1.1</version>
<comment>Storage descriptor is nullable in Glue model, which is too easy to forget about. Prefer GlueToTrinoConverter.getStorageDescriptor</comment>
</violation>

<violation>
<name>com/amazonaws/services/glue/model/Table.getTableType:()Ljava/lang/String;</name>
<version>1.1</version>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import static io.trino.plugin.hive.metastore.MetastoreUtil.metastoreFunctionName;
import static io.trino.plugin.hive.metastore.MetastoreUtil.toResourceUris;
import static io.trino.plugin.hive.metastore.MetastoreUtil.updateStatisticsParameters;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getStorageDescriptor;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableParameters;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableTypeNullable;

Expand Down Expand Up @@ -100,7 +101,7 @@ public static TableInput convertGlueTableToTableInput(com.amazonaws.services.glu
.withLastAccessTime(glueTable.getLastAccessTime())
.withLastAnalyzedTime(glueTable.getLastAnalyzedTime())
.withRetention(glueTable.getRetention())
.withStorageDescriptor(glueTable.getStorageDescriptor())
.withStorageDescriptor(getStorageDescriptor(glueTable).orElse(null))
.withPartitionKeys(glueTable.getPartitionKeys())
.withViewOriginalText(glueTable.getViewOriginalText())
.withViewExpandedText(glueTable.getViewExpandedText())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ public final class GlueToTrinoConverter

private GlueToTrinoConverter() {}

@SuppressModernizer // Usage of `Table.getStorageDescriptor` is not allowed. Only this method can call that.
public static Optional<StorageDescriptor> getStorageDescriptor(com.amazonaws.services.glue.model.Table glueTable)
{
return Optional.ofNullable(glueTable.getStorageDescriptor());
}

@SuppressModernizer // Usage of `Column.getParameters` is not allowed. Only this method can call that.
public static Map<String, String> getColumnParameters(com.amazonaws.services.glue.model.Column glueColumn)
{
Expand Down Expand Up @@ -144,21 +150,22 @@ public static Table convertTable(com.amazonaws.services.glue.model.Table glueTab
.setViewOriginalText(Optional.ofNullable(glueTable.getViewOriginalText()))
.setViewExpandedText(Optional.ofNullable(glueTable.getViewExpandedText()));

StorageDescriptor sd = glueTable.getStorageDescriptor();
Optional<StorageDescriptor> storageDescriptor = getStorageDescriptor(glueTable);

if (isIcebergTable(tableParameters) ||
(sd == null && isDeltaLakeTable(tableParameters)) ||
(sd == null && isTrinoMaterializedView(tableType, tableParameters))) {
(storageDescriptor.isEmpty() && isDeltaLakeTable(tableParameters)) ||
(storageDescriptor.isEmpty() && isTrinoMaterializedView(tableType, tableParameters))) {
// Iceberg tables do not need to read the StorageDescriptor field, but we still need to return dummy properties for compatibility
// Delta Lake tables only need to provide a dummy properties if a StorageDescriptor was not explicitly configured.
// Materialized views do not need to read the StorageDescriptor, but we still need to return dummy properties for compatibility
tableBuilder.setDataColumns(ImmutableList.of(new Column("dummy", HIVE_INT, Optional.empty(), ImmutableMap.of())));
tableBuilder.getStorageBuilder().setStorageFormat(HiveStorageFormat.PARQUET.toStorageFormat());
}
else {
if (sd == null) {
if (storageDescriptor.isEmpty()) {
throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, "Table StorageDescriptor is null for table '%s' %s".formatted(table, glueTable));
}
StorageDescriptor sd = storageDescriptor.get();
if (sd.getSerdeInfo() == null) {
throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, "Table SerdeInfo is null for table '%s' %s".formatted(table, glueTable));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import static io.trino.plugin.hive.metastore.glue.v1.TestingMetastoreObjects.getGlueTestTable;
import static io.trino.plugin.hive.metastore.glue.v1.TestingMetastoreObjects.getGlueTestTrinoMaterializedView;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getPartitionParameters;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getStorageDescriptor;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableParameters;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableTypeNullable;
import static io.trino.plugin.hive.util.HiveUtil.DELTA_LAKE_PROVIDER;
Expand Down Expand Up @@ -97,9 +98,9 @@ public void testConvertTable()
assertThat(trinoTable.getTableType()).isEqualTo(getTableTypeNullable(testTable));
assertThat(trinoTable.getOwner().orElse(null)).isEqualTo(testTable.getOwner());
assertThat(trinoTable.getParameters()).isEqualTo(getTableParameters(testTable));
assertColumnList(trinoTable.getDataColumns(), testTable.getStorageDescriptor().getColumns());
assertColumnList(trinoTable.getDataColumns(), getStorageDescriptor(testTable).orElseThrow().getColumns());
assertColumnList(trinoTable.getPartitionColumns(), testTable.getPartitionKeys());
assertStorage(trinoTable.getStorage(), testTable.getStorageDescriptor());
assertStorage(trinoTable.getStorage(), getStorageDescriptor(testTable).orElseThrow());
assertThat(trinoTable.getViewOriginalText().get()).isEqualTo(testTable.getViewOriginalText());
assertThat(trinoTable.getViewExpandedText().get()).isEqualTo(testTable.getViewExpandedText());
}
Expand All @@ -122,7 +123,7 @@ public void testConvertTableWithOpenCSVSerDe()
assertThat(trinoTable.getDataColumns().get(0).getType()).isEqualTo(HIVE_STRING);

assertColumnList(trinoTable.getPartitionColumns(), glueTable.getPartitionKeys());
assertStorage(trinoTable.getStorage(), glueTable.getStorageDescriptor());
assertStorage(trinoTable.getStorage(), getStorageDescriptor(glueTable).orElseThrow());
assertThat(trinoTable.getViewOriginalText().get()).isEqualTo(glueTable.getViewOriginalText());
assertThat(trinoTable.getViewExpandedText().get()).isEqualTo(glueTable.getViewExpandedText());
}
Expand All @@ -148,7 +149,7 @@ public void testConvertTableNullPartitions()
public void testConvertTableUppercaseColumnType()
{
com.amazonaws.services.glue.model.Column uppercaseColumn = getGlueTestColumn().withType("String");
testTable.getStorageDescriptor().setColumns(ImmutableList.of(uppercaseColumn));
getStorageDescriptor(testTable).orElseThrow().setColumns(ImmutableList.of(uppercaseColumn));
GlueToTrinoConverter.convertTable(testTable, testDatabase.getName());
}

Expand Down Expand Up @@ -211,7 +212,7 @@ public void testDatabaseNullParameters()
public void testTableNullParameters()
{
testTable.setParameters(null);
testTable.getStorageDescriptor().getSerdeInfo().setParameters(null);
getStorageDescriptor(testTable).orElseThrow().getSerdeInfo().setParameters(null);
io.trino.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testTable, testDatabase.getName());
assertThat(trinoTable.getParameters()).isNotNull();
assertThat(trinoTable.getStorage().getSerdeParameters()).isNotNull();
Expand All @@ -230,7 +231,7 @@ public void testIcebergTableNullStorageDescriptor()
public void testIcebergTableNonNullStorageDescriptor()
{
testTable.setParameters(ImmutableMap.of(ICEBERG_TABLE_TYPE_NAME, ICEBERG_TABLE_TYPE_VALUE));
assertThat(testTable.getStorageDescriptor()).isNotNull();
assertThat(getStorageDescriptor(testTable)).isPresent();
io.trino.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testTable, testDatabase.getName());
assertThat(trinoTable.getDataColumns().size()).isEqualTo(1);
}
Expand All @@ -248,11 +249,11 @@ public void testDeltaTableNullStorageDescriptor()
public void testDeltaTableNonNullStorageDescriptor()
{
testTable.setParameters(ImmutableMap.of(SPARK_TABLE_PROVIDER_KEY, DELTA_LAKE_PROVIDER));
assertThat(testTable.getStorageDescriptor()).isNotNull();
assertThat(getStorageDescriptor(testTable)).isPresent();
io.trino.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testTable, testDatabase.getName());
assertThat(trinoTable.getDataColumns().stream()
.map(Column::getName)
.collect(toImmutableSet())).isEqualTo(testTable.getStorageDescriptor().getColumns().stream()
.collect(toImmutableSet())).isEqualTo(getStorageDescriptor(testTable).orElseThrow().getColumns().stream()
.map(com.amazonaws.services.glue.model.Column::getName)
.collect(toImmutableSet()));
}
Expand All @@ -261,7 +262,7 @@ public void testDeltaTableNonNullStorageDescriptor()
public void testIcebergMaterializedViewNullStorageDescriptor()
{
Table testMaterializedView = getGlueTestTrinoMaterializedView(testDatabase.getName());
assertThat(testMaterializedView.getStorageDescriptor()).isNull();
assertThat(getStorageDescriptor(testMaterializedView)).isEmpty();
io.trino.metastore.Table trinoTable = GlueToTrinoConverter.convertTable(testMaterializedView, testDatabase.getName());
assertThat(trinoTable.getDataColumns().size()).isEqualTo(1);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import com.amazonaws.services.glue.model.EntityNotFoundException;
import com.amazonaws.services.glue.model.InvalidInputException;
import com.amazonaws.services.glue.model.ResourceNumberLimitExceededException;
import com.amazonaws.services.glue.model.StorageDescriptor;
import com.amazonaws.services.glue.model.Table;
import com.amazonaws.services.glue.model.TableInput;
import com.amazonaws.services.glue.model.UpdateTableRequest;
Expand All @@ -47,6 +48,7 @@
import static com.google.common.base.Verify.verify;
import static io.trino.plugin.hive.ViewReaderUtil.isTrinoMaterializedView;
import static io.trino.plugin.hive.ViewReaderUtil.isTrinoView;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getStorageDescriptor;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableParameters;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableType;
import static io.trino.plugin.hive.util.HiveUtil.isIcebergTable;
Expand Down Expand Up @@ -166,7 +168,7 @@ protected void commitToExistingTable(TableMetadata base, TableMetadata metadata)
tableName,
owner,
metadata,
table.getStorageDescriptor().getLocation(),
getStorageDescriptor(table).map(StorageDescriptor::getLocation).orElse(null),
newMetadataLocation,
ImmutableMap.of(PREVIOUS_METADATA_LOCATION_PROP, currentMetadataLocation),
cacheTableMetadata));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public static TableInput getTableInput(
String tableName,
Optional<String> owner,
TableMetadata metadata,
String tableLocation,
@Nullable String tableLocation,
String newMetadataLocation,
Map<String, String> parameters,
boolean cacheTableMetadata)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import com.amazonaws.services.glue.model.GetTableRequest;
import com.amazonaws.services.glue.model.GetTablesRequest;
import com.amazonaws.services.glue.model.GetTablesResult;
import com.amazonaws.services.glue.model.StorageDescriptor;
import com.amazonaws.services.glue.model.TableInput;
import com.amazonaws.services.glue.model.UpdateTableRequest;
import com.google.common.cache.Cache;
Expand Down Expand Up @@ -125,6 +126,7 @@
import static io.trino.plugin.hive.ViewReaderUtil.isTrinoView;
import static io.trino.plugin.hive.metastore.glue.v1.AwsSdkUtil.getPaginatedResults;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getColumnParameters;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getStorageDescriptor;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableParameters;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableType;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableTypeNullable;
Expand Down Expand Up @@ -639,13 +641,14 @@ private Optional<List<ColumnMetadata>> getCachedColumnMetadata(com.amazonaws.ser
Map<String, String> tableParameters = getTableParameters(glueTable);
String metadataLocation = tableParameters.get(METADATA_LOCATION_PROP);
String metadataValidForMetadata = tableParameters.get(TRINO_TABLE_METADATA_INFO_VALID_FOR);
Optional<StorageDescriptor> storageDescriptor = getStorageDescriptor(glueTable);
if (metadataLocation == null || !metadataLocation.equals(metadataValidForMetadata) ||
glueTable.getStorageDescriptor() == null ||
glueTable.getStorageDescriptor().getColumns() == null) {
storageDescriptor.isEmpty() ||
storageDescriptor.get().getColumns() == null) {
return Optional.empty();
}

List<Column> glueColumns = glueTable.getStorageDescriptor().getColumns();
List<Column> glueColumns = storageDescriptor.get().getColumns();
if (glueColumns.stream().noneMatch(column -> getColumnParameters(column).containsKey(COLUMN_TRINO_TYPE_ID_PROPERTY))) {
// No column has type parameter, maybe the parameters were erased
return Optional.empty();
Expand Down Expand Up @@ -804,7 +807,7 @@ public void renameTable(ConnectorSession session, SchemaTableName from, SchemaTa
to.getTableName(),
Optional.ofNullable(table.getOwner()),
metadata,
table.getStorageDescriptor().getLocation(),
getStorageDescriptor(table).map(StorageDescriptor::getLocation).orElse(null),
metadataLocation,
tableParameters,
cacheTableMetadata);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@

import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.trino.plugin.hive.metastore.glue.v1.AwsSdkUtil.getPaginatedResults;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getStorageDescriptor;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableParameters;
import static io.trino.plugin.hive.metastore.glue.v1.converter.GlueToTrinoConverter.getTableType;
import static io.trino.plugin.iceberg.IcebergTestUtils.checkParquetFileSorting;
Expand Down Expand Up @@ -155,25 +156,25 @@ public void testRenameSchema()
void testGlueTableLocation()
{
try (TestTable table = new TestTable(getQueryRunner()::execute, "test_table_location", "AS SELECT 1 x")) {
String initialLocation = getGlueTable(table.getName()).getStorageDescriptor().getLocation();
assertThat(getGlueTable(table.getName()).getStorageDescriptor().getLocation())
String initialLocation = getStorageDescriptor(getGlueTable(table.getName())).orElseThrow().getLocation();
assertThat(getStorageDescriptor(getGlueTable(table.getName())).orElseThrow().getLocation())
// Using startsWith because the location has UUID suffix
.startsWith("%s/%s.db/%s".formatted(schemaPath(), schemaName, table.getName()));

assertUpdate("INSERT INTO " + table.getName() + " VALUES 2", 1);
Table glueTable = getGlueTable(table.getName());
assertThat(glueTable.getStorageDescriptor().getLocation())
assertThat(getStorageDescriptor(glueTable).orElseThrow().getLocation())
.isEqualTo(initialLocation);

String newTableLocation = initialLocation + "_new";
updateTableLocation(glueTable, newTableLocation);
assertUpdate("INSERT INTO " + table.getName() + " VALUES 3", 1);
assertThat(getGlueTable(table.getName()).getStorageDescriptor().getLocation())
assertThat(getStorageDescriptor(getGlueTable(table.getName())).orElseThrow().getLocation())
.isEqualTo(newTableLocation);

assertUpdate("CALL system.unregister_table(CURRENT_SCHEMA, '" + table.getName() + "')");
assertUpdate("CALL system.register_table(CURRENT_SCHEMA, '" + table.getName() + "', '" + initialLocation + "')");
assertThat(getGlueTable(table.getName()).getStorageDescriptor().getLocation())
assertThat(getStorageDescriptor(getGlueTable(table.getName())).orElseThrow().getLocation())
.isEqualTo(initialLocation);
}
}
Expand All @@ -189,7 +190,7 @@ private void updateTableLocation(Table table, String newLocation)
TableInput tableInput = new TableInput()
.withName(table.getName())
.withTableType(getTableType(table))
.withStorageDescriptor(table.getStorageDescriptor().withLocation(newLocation))
.withStorageDescriptor(getStorageDescriptor(table).orElseThrow().withLocation(newLocation))
.withParameters(getTableParameters(table));
UpdateTableRequest updateTableRequest = new UpdateTableRequest()
.withDatabaseName(schemaName)
Expand Down

0 comments on commit 7b43908

Please sign in to comment.