Skip to content

Commit

Permalink
Hive: Optimize tableExists API in hive catalog (#11597)
Browse files Browse the repository at this point in the history
* Hive: Optimize tableExists API in hive catalog

Skip creation of hive table operation when check existence of iceberg table in hive catalog

* Add a newline after if/else

* Add current thread interrupt

* Handle metadata tables and separate the tests

* Add comment back

* Address feedback

* Add extra  comment for EcsCatalog override method

* Move javadoc  around

* Added note if hive table with same name exists

* Added note if hive table with same name exists

* Add test with invalid identifier
  • Loading branch information
dramaticlly authored Dec 12, 2024
1 parent 1e126e2 commit a3dcfd1
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ private Table loadMetadataTable(TableIdentifier identifier) {
}
}

private boolean isValidMetadataIdentifier(TableIdentifier identifier) {
protected boolean isValidMetadataIdentifier(TableIdentifier identifier) {
return MetadataTableType.from(identifier.name()) != null
&& isValidIdentifier(TableIdentifier.of(identifier.namespace().levels()));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,43 @@ private void validateTableIsIcebergTableOrView(
}
}

/**
* Check whether table or metadata table exists.
*
* <p>Note: If a hive table with the same identifier exists in catalog, this method will return
* {@code false}.
*
* @param identifier a table identifier
* @return true if the table exists, false otherwise
*/
@Override
public boolean tableExists(TableIdentifier identifier) {
TableIdentifier baseTableIdentifier = identifier;
if (!isValidIdentifier(identifier)) {
if (!isValidMetadataIdentifier(identifier)) {
return false;
} else {
baseTableIdentifier = TableIdentifier.of(identifier.namespace().levels());
}
}

String database = baseTableIdentifier.namespace().level(0);
String tableName = baseTableIdentifier.name();
try {
Table table = clients.run(client -> client.getTable(database, tableName));
HiveOperationsBase.validateTableIsIceberg(table, fullTableName(name, baseTableIdentifier));
return true;
} catch (NoSuchTableException | NoSuchObjectException e) {
return false;
} catch (TException e) {
throw new RuntimeException("Failed to check table existence of " + baseTableIdentifier, e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(
"Interrupted in call to check table existence of " + baseTableIdentifier, e);
}
}

@Override
public void createNamespace(Namespace namespace, Map<String, String> meta) {
Preconditions.checkArgument(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,55 @@ public void testHiveTableAndIcebergTableWithSameName(TableType tableType)
HIVE_METASTORE_EXTENSION.metastoreClient().dropTable(DB_NAME, hiveTableName);
}

@Test
public void testTableExists() throws TException, IOException {
String testTableName = "test_table_exists";
TableIdentifier identifier = TableIdentifier.of(DB_NAME, testTableName);
TableIdentifier metadataIdentifier = TableIdentifier.of(DB_NAME, testTableName, "partitions");
TableIdentifier invalidIdentifier = TableIdentifier.of(DB_NAME, "invalid", testTableName);

assertThat(catalog.tableExists(invalidIdentifier))
.as("Should return false on invalid identifier")
.isFalse();
assertThat(catalog.tableExists(identifier))
.as("Table should not exist before create")
.isFalse();
catalog.buildTable(identifier, SCHEMA).create();

assertThat(catalog.tableExists(identifier)).as("Table should exist after create").isTrue();
assertThat(catalog.tableExists(metadataIdentifier))
.as("Metadata table should also exist")
.isTrue();

assertThat(catalog.dropTable(identifier)).as("Should drop a table that does exist").isTrue();
assertThat(catalog.tableExists(identifier)).as("Table should not exist after drop").isFalse();
assertThat(catalog.tableExists(metadataIdentifier))
.as("Metadata table should not exist after drop")
.isFalse();

HIVE_METASTORE_EXTENSION
.metastoreClient()
.createTable(createHiveTable(testTableName, TableType.EXTERNAL_TABLE));
assertThat(catalog.tableExists(identifier))
.as("Should return false when a hive table with the same name exists")
.isFalse();
assertThat(catalog.tableExists(metadataIdentifier))
.as("Metadata table should not exist")
.isFalse();
HIVE_METASTORE_EXTENSION.metastoreClient().dropTable(DB_NAME, testTableName);

catalog
.buildView(identifier)
.withSchema(SCHEMA)
.withDefaultNamespace(identifier.namespace())
.withQuery("spark", "select * from ns.tbl")
.create();
assertThat(catalog.tableExists(identifier))
.as("Should return false if identifier refers to a view")
.isFalse();
catalog.dropView(identifier);
}

private org.apache.hadoop.hive.metastore.api.Table createHiveTable(
String hiveTableName, TableType type) throws IOException {
Map<String, String> parameters = Maps.newHashMap();
Expand Down

0 comments on commit a3dcfd1

Please sign in to comment.