From 0adcdef3e54d71d3485cba4302424c8621b3a94e Mon Sep 17 00:00:00 2001 From: manuzhang Date: Wed, 11 Dec 2024 20:49:04 +0800 Subject: [PATCH] Hive: Add Hive 4 support and remove Hive 3 --- .github/labeler.yml | 3 - .github/workflows/delta-conversion-ci.yml | 3 - .github/workflows/flink-ci.yml | 3 - .github/workflows/hive-ci.yml | 33 +- .github/workflows/kafka-connect-ci.yml | 3 - .github/workflows/spark-ci.yml | 3 - build.gradle | 10 +- flink/v1.18/build.gradle | 4 +- flink/v1.19/build.gradle | 8 +- flink/v1.20/build.gradle | 8 +- .../apache/iceberg/flink/CatalogTestBase.java | 2 +- .../iceberg/flink/TestFlinkHiveCatalog.java | 3 +- .../TestFlinkInputFormatReaderDeletes.java | 4 +- .../TestIcebergSourceReaderDeletes.java | 4 +- gradle.properties | 2 +- gradle/libs.versions.toml | 11 +- .../apache/iceberg/hive/CachedClientPool.java | 2 +- .../org/apache/iceberg/hive/HiveCatalog.java | 10 +- .../apache/iceberg/hive/TestHiveCatalog.java | 2 +- .../iceberg/hive/TestHiveClientPool.java | 24 +- .../iceberg/hive/TestHiveMetastore.java | 20 +- hive-runtime/LICENSE | 502 ------------------ hive-runtime/NOTICE | 92 ---- hive-runtime/build.gradle | 92 ---- hive3-orc-bundle/build.gradle | 89 ---- hive3/build.gradle | 110 ---- .../hadoop/hive/ql/io/orc/OrcSplit.java | 311 ----------- .../IcebergDateObjectInspectorHive3.java | 82 --- .../IcebergTimestampObjectInspectorHive3.java | 88 --- ...TimestampWithZoneObjectInspectorHive3.java | 82 --- .../vector/CompatibilityHiveVectorUtils.java | 218 -------- .../HiveIcebergVectorizedRecordReader.java | 77 --- .../mr/hive/vector/HiveVectorizedReader.java | 216 -------- .../vector/ParquetSchemaFieldNameVisitor.java | 125 ----- .../vector/VectorizedRowBatchIterator.java | 97 ---- .../iceberg/orc/VectorizedReadUtils.java | 50 -- .../mr/hive/TestHiveSchemaUtilHive3.java | 49 -- .../TestIcebergDateObjectInspectorHive3.java | 64 --- ...tIcebergTimestampObjectInspectorHive3.java | 70 --- ...TimestampWithZoneObjectInspectorHive3.java | 77 --- hive3/src/test/resources/log4j2.properties | 33 -- kafka-connect/build.gradle | 2 +- settings.gradle | 12 +- 43 files changed, 64 insertions(+), 2636 deletions(-) delete mode 100644 hive-runtime/LICENSE delete mode 100644 hive-runtime/NOTICE delete mode 100644 hive-runtime/build.gradle delete mode 100644 hive3-orc-bundle/build.gradle delete mode 100644 hive3/build.gradle delete mode 100644 hive3/src/main/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java delete mode 100644 hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java delete mode 100644 hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspectorHive3.java delete mode 100644 hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampWithZoneObjectInspectorHive3.java delete mode 100644 hive3/src/main/java/org/apache/iceberg/mr/hive/vector/CompatibilityHiveVectorUtils.java delete mode 100644 hive3/src/main/java/org/apache/iceberg/mr/hive/vector/HiveIcebergVectorizedRecordReader.java delete mode 100644 hive3/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java delete mode 100644 hive3/src/main/java/org/apache/iceberg/mr/hive/vector/ParquetSchemaFieldNameVisitor.java delete mode 100644 hive3/src/main/java/org/apache/iceberg/mr/hive/vector/VectorizedRowBatchIterator.java delete mode 100644 hive3/src/main/java/org/apache/iceberg/orc/VectorizedReadUtils.java delete mode 100644 hive3/src/test/java/org/apache/iceberg/mr/hive/TestHiveSchemaUtilHive3.java delete mode 100644 hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java delete mode 100644 hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java delete mode 100644 hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampWithZoneObjectInspectorHive3.java delete mode 100644 hive3/src/test/resources/log4j2.properties diff --git a/.github/labeler.yml b/.github/labeler.yml index 6afc3141ee31..76a1890d8a4f 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -100,10 +100,7 @@ ORC: HIVE: - changed-files: - any-glob-to-any-file: [ - 'hive3/**/*', 'hive-metastore/**/*', - 'hive-runtime/**/*', - 'hive3-orc-bundle/**/*' ] DATA: diff --git a/.github/workflows/delta-conversion-ci.yml b/.github/workflows/delta-conversion-ci.yml index 521d061f6552..58a85475762d 100644 --- a/.github/workflows/delta-conversion-ci.yml +++ b/.github/workflows/delta-conversion-ci.yml @@ -48,9 +48,6 @@ on: - '.asf.yml' - 'dev/**' - 'mr/**' - - 'hive3/**' - - 'hive3-orc-bundle/**' - - 'hive-runtime/**' - 'flink/**' - 'kafka-connect/**' - 'docs/**' diff --git a/.github/workflows/flink-ci.yml b/.github/workflows/flink-ci.yml index 22f4f008a215..ad82e637a508 100644 --- a/.github/workflows/flink-ci.yml +++ b/.github/workflows/flink-ci.yml @@ -48,9 +48,6 @@ on: - '.asf.yml' - 'dev/**' - 'mr/**' - - 'hive3/**' - - 'hive3-orc-bundle/**' - - 'hive-runtime/**' - 'kafka-connect/**' - 'spark/**' - 'docs/**' diff --git a/.github/workflows/hive-ci.yml b/.github/workflows/hive-ci.yml index d95ca1bd5c6a..6aa853d34721 100644 --- a/.github/workflows/hive-ci.yml +++ b/.github/workflows/hive-ci.yml @@ -87,39 +87,10 @@ jobs: key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} restore-keys: ${{ runner.os }}-gradle- - run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts - - run: ./gradlew -DsparkVersions= -DhiveVersions=2 -DflinkVersions= -DkafkaVersions= -Pquick=true :iceberg-mr:check :iceberg-hive-runtime:check -x javadoc + - run: ./gradlew -DsparkVersions= -DhiveVersions=2 -DflinkVersions= -DkafkaVersions= -Pquick=true :iceberg-mr:check -x javadoc - uses: actions/upload-artifact@v4 if: failure() with: name: test logs path: | - **/build/testlogs - - hive3-tests: - runs-on: ubuntu-22.04 - strategy: - matrix: - jvm: [11, 17, 21] - env: - SPARK_LOCAL_IP: localhost - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-java@v4 - with: - distribution: zulu - java-version: ${{ matrix.jvm }} - - uses: actions/cache@v4 - with: - path: | - ~/.gradle/caches - ~/.gradle/wrapper - key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*', '**/gradle-wrapper.properties') }} - restore-keys: ${{ runner.os }}-gradle- - - run: echo -e "$(ip addr show eth0 | grep "inet\b" | awk '{print $2}' | cut -d/ -f1)\t$(hostname -f) $(hostname -s)" | sudo tee -a /etc/hosts - - run: ./gradlew -DsparkVersions= -DhiveVersions=3 -DflinkVersions= -DkafkaVersions= -Pquick=true :iceberg-hive3-orc-bundle:check :iceberg-hive3:check :iceberg-hive-runtime:check -x javadoc - - uses: actions/upload-artifact@v4 - if: failure() - with: - name: test logs - path: | - **/build/testlogs + **/build/testlogs \ No newline at end of file diff --git a/.github/workflows/kafka-connect-ci.yml b/.github/workflows/kafka-connect-ci.yml index 60cd9188b61d..9adf44661547 100644 --- a/.github/workflows/kafka-connect-ci.yml +++ b/.github/workflows/kafka-connect-ci.yml @@ -49,9 +49,6 @@ on: - 'dev/**' - 'mr/**' - 'flink/**' - - 'hive3/**' - - 'hive3-orc-bundle/**' - - 'hive-runtime/**' - 'spark/**' - 'docs/**' - 'site/**' diff --git a/.github/workflows/spark-ci.yml b/.github/workflows/spark-ci.yml index 0d7bd2d3d3e7..4faf7d051483 100644 --- a/.github/workflows/spark-ci.yml +++ b/.github/workflows/spark-ci.yml @@ -49,9 +49,6 @@ on: - 'dev/**' - 'site/**' - 'mr/**' - - 'hive3/**' - - 'hive3-orc-bundle/**' - - 'hive-runtime/**' - 'flink/**' - 'kafka-connect/**' - 'docs/**' diff --git a/build.gradle b/build.gradle index eb63ce138ab2..d02fc2831db9 100644 --- a/build.gradle +++ b/build.gradle @@ -675,7 +675,7 @@ project(':iceberg-hive-metastore') { compileOnly libs.avro.avro - compileOnly(libs.hive2.metastore) { + compileOnly(libs.hive4.metastore) { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.pentaho' // missing dependency @@ -695,7 +695,7 @@ project(':iceberg-hive-metastore') { // that's really old. We use the core classifier to be able to override our guava // version. Luckily, hive-exec seems to work okay so far with this version of guava // See: https://github.com/apache/hive/blob/master/ql/pom.xml#L911 for more context. - testImplementation("${libs.hive2.exec.get().module}:${libs.hive2.exec.get().getVersion()}:core") { + testImplementation("${libs.hive4.exec.get().module}:${libs.hive4.exec.get().getVersion()}:core") { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.pentaho' // missing dependency @@ -707,7 +707,7 @@ project(':iceberg-hive-metastore') { exclude group: 'com.google.code.findbugs', module: 'jsr305' } - testImplementation(libs.hive2.metastore) { + testImplementation(libs.hive4.metastore) { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.pentaho' // missing dependency @@ -723,7 +723,9 @@ project(':iceberg-hive-metastore') { exclude group: 'com.zaxxer', module: 'HikariCP' } - compileOnly(libs.hadoop2.client) { + testImplementation(libs.hive4.standalone.metastore.server) + + compileOnly(libs.hadoop3.client) { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' } diff --git a/flink/v1.18/build.gradle b/flink/v1.18/build.gradle index 83dc07523a3c..597adedc028f 100644 --- a/flink/v1.18/build.gradle +++ b/flink/v1.18/build.gradle @@ -88,7 +88,7 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") { // that's really old. We use the core classifier to be able to override our guava // version. Luckily, hive-exec seems to work okay so far with this version of guava // See: https://github.com/apache/hive/blob/master/ql/pom.xml#L911 for more context. - testImplementation("${libs.hive2.exec.get().module}:${libs.hive2.exec.get().getVersion()}:core") { + testImplementation("${libs.hive4.exec.get().module}:${libs.hive4.exec.get().getVersion()}:core") { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.pentaho' // missing dependency @@ -100,7 +100,7 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") { exclude group: 'com.google.code.findbugs', module: 'jsr305' } - testImplementation(libs.hive2.metastore) { + testImplementation(libs.hive4.metastore) { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.pentaho' // missing dependency diff --git a/flink/v1.19/build.gradle b/flink/v1.19/build.gradle index 50bcadb618e4..261e1612a81c 100644 --- a/flink/v1.19/build.gradle +++ b/flink/v1.19/build.gradle @@ -88,7 +88,7 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") { // that's really old. We use the core classifier to be able to override our guava // version. Luckily, hive-exec seems to work okay so far with this version of guava // See: https://github.com/apache/hive/blob/master/ql/pom.xml#L911 for more context. - testImplementation("${libs.hive2.exec.get().module}:${libs.hive2.exec.get().getVersion()}:core") { + testImplementation("${libs.hive4.exec.get().module}:${libs.hive4.exec.get().getVersion()}:core") { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.pentaho' // missing dependency @@ -100,7 +100,7 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") { exclude group: 'com.google.code.findbugs', module: 'jsr305' } - testImplementation(libs.hive2.metastore) { + testImplementation(libs.hive4.metastore) { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.pentaho' // missing dependency @@ -193,7 +193,7 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") { exclude group: 'org.apache.avro', module: 'avro' } - integrationImplementation(libs.hive2.metastore) { + integrationImplementation(libs.hive4.metastore) { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.pentaho' // missing dependency @@ -210,7 +210,7 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") { exclude group: 'org.slf4j' } - integrationImplementation("${libs.hive2.exec.get().module}:${libs.hive2.exec.get().getVersion()}:core") { + integrationImplementation("${libs.hive4.exec.get().module}:${libs.hive4.exec.get().getVersion()}:core") { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.pentaho' // missing dependency diff --git a/flink/v1.20/build.gradle b/flink/v1.20/build.gradle index 4a1bae660bdb..ca69f06658ec 100644 --- a/flink/v1.20/build.gradle +++ b/flink/v1.20/build.gradle @@ -88,7 +88,7 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") { // that's really old. We use the core classifier to be able to override our guava // version. Luckily, hive-exec seems to work okay so far with this version of guava // See: https://github.com/apache/hive/blob/master/ql/pom.xml#L911 for more context. - testImplementation("${libs.hive2.exec.get().module}:${libs.hive2.exec.get().getVersion()}:core") { + testImplementation("${libs.hive4.exec.get().module}:${libs.hive4.exec.get().getVersion()}:core") { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.pentaho' // missing dependency @@ -100,7 +100,7 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") { exclude group: 'com.google.code.findbugs', module: 'jsr305' } - testImplementation(libs.hive2.metastore) { + testImplementation(libs.hive4.metastore) { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.pentaho' // missing dependency @@ -193,7 +193,7 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") { exclude group: 'org.apache.avro', module: 'avro' } - integrationImplementation(libs.hive2.metastore) { + integrationImplementation(libs.hive4.metastore) { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.pentaho' // missing dependency @@ -210,7 +210,7 @@ project(":iceberg-flink:iceberg-flink-runtime-${flinkMajorVersion}") { exclude group: 'org.slf4j' } - integrationImplementation("${libs.hive2.exec.get().module}:${libs.hive2.exec.get().getVersion()}:core") { + integrationImplementation("${libs.hive4.exec.get().module}:${libs.hive4.exec.get().getVersion()}:core") { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.pentaho' // missing dependency diff --git a/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/CatalogTestBase.java b/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/CatalogTestBase.java index 062ff68d5d85..49ee1a398cb3 100644 --- a/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/CatalogTestBase.java +++ b/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/CatalogTestBase.java @@ -116,6 +116,6 @@ protected String getFullQualifiedTableName(String tableName) { } static String getURI(HiveConf conf) { - return conf.get(HiveConf.ConfVars.METASTOREURIS.varname); + return conf.get(HiveConf.ConfVars.METASTORE_URIS.varname); } } diff --git a/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/TestFlinkHiveCatalog.java b/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/TestFlinkHiveCatalog.java index 91343ab1ee72..792221ec1e3a 100644 --- a/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/TestFlinkHiveCatalog.java +++ b/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/TestFlinkHiveCatalog.java @@ -57,7 +57,8 @@ public void testCreateCatalogWithHiveConfDir() throws IOException { Configuration newConf = new Configuration(hiveConf); // Set another new directory which is different with the hive metastore's warehouse path. newConf.set( - HiveConf.ConfVars.METASTOREWAREHOUSE.varname, "file://" + warehouseDir.getAbsolutePath()); + HiveConf.ConfVars.METASTORE_WAREHOUSE.varname, + "file://" + warehouseDir.getAbsolutePath()); newConf.writeXml(fos); } assertThat(hiveSiteXML.toPath()).exists(); diff --git a/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/source/TestFlinkInputFormatReaderDeletes.java b/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/source/TestFlinkInputFormatReaderDeletes.java index 1b4fc863631f..ec8ffc66c615 100644 --- a/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/source/TestFlinkInputFormatReaderDeletes.java +++ b/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/source/TestFlinkInputFormatReaderDeletes.java @@ -44,8 +44,8 @@ protected StructLikeSet rowSet(String tableName, Table testTable, String... colu Map properties = Maps.newHashMap(); properties.put( CatalogProperties.WAREHOUSE_LOCATION, - hiveConf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname)); - properties.put(CatalogProperties.URI, hiveConf.get(HiveConf.ConfVars.METASTOREURIS.varname)); + hiveConf.get(HiveConf.ConfVars.METASTORE_WAREHOUSE.varname)); + properties.put(CatalogProperties.URI, hiveConf.get(HiveConf.ConfVars.METASTORE_URIS.varname)); properties.put( CatalogProperties.CLIENT_POOL_SIZE, Integer.toString(hiveConf.getInt("iceberg.hive.client-pool-size", 5))); diff --git a/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/source/TestIcebergSourceReaderDeletes.java b/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/source/TestIcebergSourceReaderDeletes.java index df148c212ebd..0a18dbfe71af 100644 --- a/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/source/TestIcebergSourceReaderDeletes.java +++ b/flink/v1.20/flink/src/test/java/org/apache/iceberg/flink/source/TestIcebergSourceReaderDeletes.java @@ -62,8 +62,8 @@ protected StructLikeSet rowSet(String tableName, Table testTable, String... colu Map properties = Maps.newHashMap(); properties.put( CatalogProperties.WAREHOUSE_LOCATION, - hiveConf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname)); - properties.put(CatalogProperties.URI, hiveConf.get(HiveConf.ConfVars.METASTOREURIS.varname)); + hiveConf.get(HiveConf.ConfVars.METASTORE_WAREHOUSE.varname)); + properties.put(CatalogProperties.URI, hiveConf.get(HiveConf.ConfVars.METASTORE_URIS.varname)); properties.put( CatalogProperties.CLIENT_POOL_SIZE, Integer.toString(hiveConf.getInt("iceberg.hive.client-pool-size", 5))); diff --git a/gradle.properties b/gradle.properties index dc1e1a509b01..f860f958b579 100644 --- a/gradle.properties +++ b/gradle.properties @@ -19,7 +19,7 @@ jmhIncludeRegex=.* systemProp.defaultFlinkVersions=1.20 systemProp.knownFlinkVersions=1.18,1.19,1.20 systemProp.defaultHiveVersions=2 -systemProp.knownHiveVersions=2,3 +systemProp.knownHiveVersions=2,4 systemProp.defaultSparkVersions=3.5 systemProp.knownSparkVersions=3.3,3.4,3.5 systemProp.defaultKafkaVersions=3 diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 3981ac05ff11..e389627a6484 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -50,7 +50,7 @@ hadoop2 = "2.7.3" hadoop3 = "3.4.1" httpcomponents-httpclient5 = "5.4.1" hive2 = { strictly = "2.3.9"} # see rich version usage explanation above -hive3 = "3.1.3" +hive4 = "4.0.1" immutables-value = "2.10.1" jackson-bom = "2.18.2" jackson211 = { strictly = "2.11.4"} # see rich version usage explanation above @@ -139,10 +139,11 @@ hive2-exec = { module = "org.apache.hive:hive-exec", version.ref = "hive2" } hive2-metastore = { module = "org.apache.hive:hive-metastore", version.ref = "hive2" } hive2-serde = { module = "org.apache.hive:hive-serde", version.ref = "hive2" } hive2-service = { module = "org.apache.hive:hive-service", version.ref = "hive2" } -hive3-exec = { module = "org.apache.hive:hive-exec", version.ref = "hive3" } -hive3-metastore = { module = "org.apache.hive:hive-metastore", version.ref = "hive3" } -hive3-serde = { module = "org.apache.hive:hive-serde", version.ref = "hive3" } -hive3-service = { module = "org.apache.hive:hive-service", version.ref = "hive3" } +hive4-exec = { module = "org.apache.hive:hive-exec", version.ref = "hive4" } +hive4-metastore = { module = "org.apache.hive:hive-metastore", version.ref = "hive4" } +hive4-standalone-metastore-server = { module = "org.apache.hive:hive-standalone-metastore-server", version.ref = "hive4" } +hive4-serde = { module = "org.apache.hive:hive-serde", version.ref = "hive4" } +hive4-service = { module = "org.apache.hive:hive-service", version.ref = "hive4" } httpcomponents-httpclient5 = { module = "org.apache.httpcomponents.client5:httpclient5", version.ref = "httpcomponents-httpclient5" } immutables-value = { module = "org.immutables:value", version.ref = "immutables-value" } jackson-bom = { module = "com.fasterxml.jackson:jackson-bom", version.ref = "jackson-bom" } diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/CachedClientPool.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/CachedClientPool.java index 9ce123943fca..a3492d10e1a7 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/CachedClientPool.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/CachedClientPool.java @@ -132,7 +132,7 @@ public R run(Action action, boolean retry) static Key extractKey(String cacheKeys, Configuration conf) { // generate key elements in a certain order, so that the Key instances are comparable List elements = Lists.newArrayList(); - elements.add(conf.get(HiveConf.ConfVars.METASTOREURIS.varname, "")); + elements.add(conf.get(HiveConf.ConfVars.METASTORE_URIS.varname, "")); elements.add(conf.get(HiveCatalog.HIVE_CONF_CATALOG, "hive")); if (cacheKeys == null || cacheKeys.isEmpty()) { return Key.of(elements); diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index 1cf738d736cb..8737e9292e5d 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -106,12 +106,13 @@ public void initialize(String inputName, Map properties) { } if (properties.containsKey(CatalogProperties.URI)) { - this.conf.set(HiveConf.ConfVars.METASTOREURIS.varname, properties.get(CatalogProperties.URI)); + this.conf.set( + HiveConf.ConfVars.METASTORE_URIS.varname, properties.get(CatalogProperties.URI)); } if (properties.containsKey(CatalogProperties.WAREHOUSE_LOCATION)) { this.conf.set( - HiveConf.ConfVars.METASTOREWAREHOUSE.varname, + HiveConf.ConfVars.METASTORE_WAREHOUSE.varname, LocationUtil.stripTrailingSlash(properties.get(CatalogProperties.WAREHOUSE_LOCATION))); } @@ -674,7 +675,7 @@ protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) { } private String databaseLocation(String databaseName) { - String warehouseLocation = conf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname); + String warehouseLocation = conf.get(HiveConf.ConfVars.METASTORE_WAREHOUSE.varname); Preconditions.checkNotNull( warehouseLocation, "Warehouse location is not set: hive.metastore.warehouse.dir=null"); warehouseLocation = LocationUtil.stripTrailingSlash(warehouseLocation); @@ -742,7 +743,8 @@ Database convertToDatabase(Namespace namespace, Map meta) { public String toString() { return MoreObjects.toStringHelper(this) .add("name", name) - .add("uri", this.conf == null ? "" : this.conf.get(HiveConf.ConfVars.METASTOREURIS.varname)) + .add( + "uri", this.conf == null ? "" : this.conf.get(HiveConf.ConfVars.METASTORE_URIS.varname)) .toString(); } diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java index 709bb1caaa62..8c83835d5b4e 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java +++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java @@ -1134,7 +1134,7 @@ public void testConstructorWarehousePathWithEndSlash() { catalogWithSlash.initialize( "hive_catalog", ImmutableMap.of(CatalogProperties.WAREHOUSE_LOCATION, wareHousePath + "/")); - assertThat(catalogWithSlash.getConf().get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname)) + assertThat(catalogWithSlash.getConf().get(HiveConf.ConfVars.METASTORE_WAREHOUSE.varname)) .as("Should have trailing slash stripped") .isEqualTo(wareHousePath); } diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveClientPool.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveClientPool.java index 2fe1bacf9dd1..918f6e4d49ef 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveClientPool.java +++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveClientPool.java @@ -30,12 +30,12 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; -import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.Function; import org.apache.hadoop.hive.metastore.api.FunctionType; import org.apache.hadoop.hive.metastore.api.GetAllFunctionsResponse; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.PrincipalType; +import org.apache.hadoop.hive.metastore.utils.JavaUtils; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.thrift.transport.TTransportException; import org.junit.jupiter.api.AfterEach; @@ -73,13 +73,13 @@ public void after() { @Test public void testConf() { HiveConf conf = createHiveConf(); - conf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, "file:/mywarehouse/"); + conf.set(HiveConf.ConfVars.METASTORE_WAREHOUSE.varname, "file:/mywarehouse/"); HiveClientPool clientPool = new HiveClientPool(10, conf); HiveConf clientConf = clientPool.hiveConf(); - assertThat(clientConf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname)) - .isEqualTo(conf.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname)); + assertThat(clientConf.get(HiveConf.ConfVars.METASTORE_WAREHOUSE.varname)) + .isEqualTo(conf.get(HiveConf.ConfVars.METASTORE_WAREHOUSE.varname)); assertThat(clientPool.poolSize()).isEqualTo(10); // 'hive.metastore.sasl.enabled' should be 'true' as defined in xml @@ -121,36 +121,36 @@ public void testGetTablesFailsForNonReconnectableException() throws Exception { @Test public void testExceptionMessages() { - try (MockedStatic mockedStatic = Mockito.mockStatic(MetaStoreUtils.class)) { + try (MockedStatic mockedStatic = Mockito.mockStatic(JavaUtils.class)) { mockedStatic - .when(() -> MetaStoreUtils.newInstance(any(), any(), any())) + .when(() -> JavaUtils.newInstance(any(), any(), any())) .thenThrow(new RuntimeException(new MetaException("Another meta exception"))); assertThatThrownBy(() -> clients.run(client -> client.getTables("default", "t"))) .isInstanceOf(RuntimeMetaException.class) .hasMessage("Failed to connect to Hive Metastore"); } - try (MockedStatic mockedStatic = Mockito.mockStatic(MetaStoreUtils.class)) { + try (MockedStatic mockedStatic = Mockito.mockStatic(JavaUtils.class)) { mockedStatic - .when(() -> MetaStoreUtils.newInstance(any(), any(), any())) + .when(() -> JavaUtils.newInstance(any(), any(), any())) .thenThrow(new RuntimeException(new MetaException())); assertThatThrownBy(() -> clients.run(client -> client.getTables("default", "t"))) .isInstanceOf(RuntimeMetaException.class) .hasMessage("Failed to connect to Hive Metastore"); } - try (MockedStatic mockedStatic = Mockito.mockStatic(MetaStoreUtils.class)) { + try (MockedStatic mockedStatic = Mockito.mockStatic(JavaUtils.class)) { mockedStatic - .when(() -> MetaStoreUtils.newInstance(any(), any(), any())) + .when(() -> JavaUtils.newInstance(any(), any(), any())) .thenThrow(new RuntimeException()); assertThatThrownBy(() -> clients.run(client -> client.getTables("default", "t"))) .isInstanceOf(RuntimeMetaException.class) .hasMessage("Failed to connect to Hive Metastore"); } - try (MockedStatic mockedStatic = Mockito.mockStatic(MetaStoreUtils.class)) { + try (MockedStatic mockedStatic = Mockito.mockStatic(JavaUtils.class)) { mockedStatic - .when(() -> MetaStoreUtils.newInstance(any(), any(), any())) + .when(() -> JavaUtils.newInstance(any(), any(), any())) .thenThrow(new RuntimeException("Another instance of Derby may have already booted")); assertThatThrownBy(() -> clients.run(client -> client.getTables("default", "t"))) .isInstanceOf(RuntimeMetaException.class) diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java index ef8bd7ee0ae3..8643cd26c3cc 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java +++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java @@ -38,7 +38,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.HiveMetaStore; +import org.apache.hadoop.hive.metastore.HMSHandler; import org.apache.hadoop.hive.metastore.IHMSHandler; import org.apache.hadoop.hive.metastore.RetryingHMSHandler; import org.apache.hadoop.hive.metastore.TSetIpAddressProcessor; @@ -61,10 +61,10 @@ public class TestHiveMetastore { // create the metastore handlers based on whether we're working with Hive2 or Hive3 dependencies // we need to do this because there is a breaking API change between Hive2 and Hive3 - private static final DynConstructors.Ctor HMS_HANDLER_CTOR = + private static final DynConstructors.Ctor HMS_HANDLER_CTOR = DynConstructors.builder() - .impl(HiveMetaStore.HMSHandler.class, String.class, Configuration.class) - .impl(HiveMetaStore.HMSHandler.class, String.class, HiveConf.class) + .impl(HMSHandler.class, String.class, Configuration.class) + .impl(HMSHandler.class, String.class, HiveConf.class) .build(); private static final DynMethods.StaticMethod GET_BASE_HMS_HANDLER = @@ -125,7 +125,7 @@ public class TestHiveMetastore { private HiveConf hiveConf; private ExecutorService executorService; private TServer server; - private HiveMetaStore.HMSHandler baseHandler; + private HMSHandler baseHandler; private HiveClientPool clientPool; /** @@ -165,8 +165,8 @@ public void start(HiveConf conf, int poolSize) { // in Hive3, setting this as a system prop ensures that it will be picked up whenever a new // HiveConf is created System.setProperty( - HiveConf.ConfVars.METASTOREURIS.varname, - hiveConf.getVar(HiveConf.ConfVars.METASTOREURIS)); + HiveConf.ConfVars.METASTORE_URIS.varname, + hiveConf.getVar(HiveConf.ConfVars.METASTORE_URIS)); this.clientPool = new HiveClientPool(1, hiveConf); } catch (Exception e) { @@ -244,7 +244,7 @@ private TServer newThriftServer(TServerSocket socket, int poolSize, HiveConf con throws Exception { HiveConf serverConf = new HiveConf(conf); serverConf.set( - HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, + HiveConf.ConfVars.METASTORE_CONNECT_URL_KEY.varname, "jdbc:derby:" + DERBY_PATH + ";create=true"); baseHandler = HMS_HANDLER_CTOR.newInstance("new db based metaserver", serverConf); IHMSHandler handler = GET_BASE_HMS_HANDLER.invoke(serverConf, baseHandler, false); @@ -261,9 +261,9 @@ private TServer newThriftServer(TServerSocket socket, int poolSize, HiveConf con } private void initConf(HiveConf conf, int port) { - conf.set(HiveConf.ConfVars.METASTOREURIS.varname, "thrift://localhost:" + port); + conf.set(HiveConf.ConfVars.METASTORE_URIS.varname, "thrift://localhost:" + port); conf.set( - HiveConf.ConfVars.METASTOREWAREHOUSE.varname, "file:" + HIVE_LOCAL_DIR.getAbsolutePath()); + HiveConf.ConfVars.METASTORE_WAREHOUSE.varname, "file:" + HIVE_LOCAL_DIR.getAbsolutePath()); conf.set(HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL.varname, "false"); conf.set(HiveConf.ConfVars.METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES.varname, "false"); conf.set("iceberg.hive.client-pool-size", "2"); diff --git a/hive-runtime/LICENSE b/hive-runtime/LICENSE deleted file mode 100644 index 24cd3612e5e4..000000000000 --- a/hive-runtime/LICENSE +++ /dev/null @@ -1,502 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - --------------------------------------------------------------------------------- - -This binary artifact contains Apache Avro. - -Copyright: 2014-2020 The Apache Software Foundation. -Home page: https://parquet.apache.org/ -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This binary artifact contains the Jackson JSON processor. - -Copyright: 2007-2020 Tatu Saloranta and other contributors -Home page: http://jackson.codehaus.org/ -License: http://www.apache.org/licenses/LICENSE-2.0.txt - --------------------------------------------------------------------------------- - -This binary artifact contains Apache Parquet. - -Copyright: 2014-2020 The Apache Software Foundation. -Home page: https://parquet.apache.org/ -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This binary artifact contains Apache Thrift. - -Copyright: 2006-2010 The Apache Software Foundation. -Home page: https://thrift.apache.org/ -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This binary artifact contains fastutil. - -Copyright: 2002-2014 Sebastiano Vigna -Home page: http://fastutil.di.unimi.it/ -License: http://www.apache.org/licenses/LICENSE-2.0.html - --------------------------------------------------------------------------------- - -This binary artifact contains Apache ORC. - -Copyright: 2013-2020 The Apache Software Foundation. -Home page: https://orc.apache.org/ -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This binary artifact contains Apache Hive's storage API via ORC. - -Copyright: 2013-2020 The Apache Software Foundation. -Home page: https://hive.apache.org/ -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This binary artifact contains Airlift Aircompressor. - -Copyright: 2011-2020 Aircompressor authors. -Home page: https://github.com/airlift/aircompressor -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This binary artifact contains Airlift Slice. - -Copyright: 2013-2020 Slice authors. -Home page: https://github.com/airlift/slice -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This binary artifact contains JetBrains annotations. - -Copyright: 2000-2020 JetBrains s.r.o. -Home page: https://github.com/JetBrains/java-annotations -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This binary artifact contains Google Guava. - -Copyright: 2006-2020 The Guava Authors -Home page: https://github.com/google/guava -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This binary artifact contains Google Error Prone Annotations. - -Copyright: Copyright 2011-2019 The Error Prone Authors -Home page: https://github.com/google/error-prone -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This binary artifact contains checkerframework checker-qual Annotations. - -Copyright: 2004-2020 the Checker Framework developers -Home page: https://github.com/typetools/checker-framework -License: https://github.com/typetools/checker-framework/blob/master/LICENSE.txt (MIT license) - -License text: -| The annotations are licensed under the MIT License. (The text of this -| license appears below.) More specifically, all the parts of the Checker -| Framework that you might want to include with your own program use the -| MIT License. This is the checker-qual.jar file and all the files that -| appear in it: every file in a qual/ directory, plus utility files such -| as NullnessUtil.java, RegexUtil.java, SignednessUtil.java, etc. -| In addition, the cleanroom implementations of third-party annotations, -| which the Checker Framework recognizes as aliases for its own -| annotations, are licensed under the MIT License. -| -| Permission is hereby granted, free of charge, to any person obtaining a copy -| of this software and associated documentation files (the "Software"), to deal -| in the Software without restriction, including without limitation the rights -| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -| copies of the Software, and to permit persons to whom the Software is -| furnished to do so, subject to the following conditions: -| -| The above copyright notice and this permission notice shall be included in -| all copies or substantial portions of the Software. -| -| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -| THE SOFTWARE. - --------------------------------------------------------------------------------- - -This binary artifact contains Animal Sniffer Annotations. - -Copyright: 2009-2018 codehaus.org -Home page: https://www.mojohaus.org/animal-sniffer/animal-sniffer-annotations/ -License: https://www.mojohaus.org/animal-sniffer/animal-sniffer-annotations/license.html (MIT license) - -License text: -| The MIT License -| -| Copyright (c) 2009 codehaus.org. -| -| Permission is hereby granted, free of charge, to any person obtaining a copy -| of this software and associated documentation files (the "Software"), to deal -| in the Software without restriction, including without limitation the rights -| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -| copies of the Software, and to permit persons to whom the Software is -| furnished to do so, subject to the following conditions: -| -| The above copyright notice and this permission notice shall be included in -| all copies or substantial portions of the Software. -| -| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -| THE SOFTWARE. - --------------------------------------------------------------------------------- - -This binary artifact contains Caffeine by Ben Manes. - -Copyright: 2014-2020 Ben Manes and contributors -Home page: https://github.com/ben-manes/caffeine -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This binary artifact contains Apache Yetus audience annotations. - -Copyright: 2008-2020 The Apache Software Foundation. -Home page: https://yetus.apache.org/ -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This binary artifact contains Google protobuf. - -Copyright: 2008 Google Inc. -Home page: https://developers.google.com/protocol-buffers -License: https://github.com/protocolbuffers/protobuf/blob/master/LICENSE (BSD) - -License text: - -| Copyright 2008 Google Inc. All rights reserved. -| -| Redistribution and use in source and binary forms, with or without -| modification, are permitted provided that the following conditions are -| met: -| -| * Redistributions of source code must retain the above copyright -| notice, this list of conditions and the following disclaimer. -| * Redistributions in binary form must reproduce the above -| copyright notice, this list of conditions and the following disclaimer -| in the documentation and/or other materials provided with the -| distribution. -| * Neither the name of Google Inc. nor the names of its -| contributors may be used to endorse or promote products derived from -| this software without specific prior written permission. -| -| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -| OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -| -| Code generated by the Protocol Buffer compiler is owned by the owner -| of the input file used when generating it. This code is not -| standalone and requires a support library to be linked with it. This -| support library is itself covered by the above license. - --------------------------------------------------------------------------------- - -This binary artifact contains ThreeTen. - -Copyright: 2007-present, Stephen Colebourne & Michael Nascimento Santos. -Home page: https://www.threeten.org/threeten-extra/ -License: https://github.com/ThreeTen/threeten-extra/blob/master/LICENSE.txt (BSD 3-clause) - -License text: - -| All rights reserved. -| -| * Redistribution and use in source and binary forms, with or without -| modification, are permitted provided that the following conditions are met: -| -| * Redistributions of source code must retain the above copyright notice, -| this list of conditions and the following disclaimer. -| -| * Redistributions in binary form must reproduce the above copyright notice, -| this list of conditions and the following disclaimer in the documentation -| and/or other materials provided with the distribution. -| -| * Neither the name of JSR-310 nor the names of its contributors -| may be used to endorse or promote products derived from this software -| without specific prior written permission. -| -| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -| CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -| EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -| PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -| PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -| LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -| NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -This binary artifact includes Project Nessie with the following in its NOTICE -file: - -| Dremio -| Copyright 2015-2017 Dremio Corporation -| -| This product includes software developed at -| The Apache Software Foundation (http://www.apache.org/). - --------------------------------------------------------------------------------- - -This binary includes code from Apache Commons. - -* Core ArrayUtil. - -Copyright: 2020 The Apache Software Foundation -Home page: https://commons.apache.org/ -License: https://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This binary artifact contains Apache HttpComponents Client. - -Copyright: 1999-2022 The Apache Software Foundation. -Home page: https://hc.apache.org/ -License: http://www.apache.org/licenses/LICENSE-2.0 - --------------------------------------------------------------------------------- - -This product includes code from Apache HttpComponents Client. - -* retry and error handling logic in ExponentialHttpRequestRetryStrategy.java - -Copyright: 1999-2022 The Apache Software Foundation. -Home page: https://hc.apache.org/ -License: https://www.apache.org/licenses/LICENSE-2.0 diff --git a/hive-runtime/NOTICE b/hive-runtime/NOTICE deleted file mode 100644 index 774e87fedf58..000000000000 --- a/hive-runtime/NOTICE +++ /dev/null @@ -1,92 +0,0 @@ - -Apache Iceberg -Copyright 2017-2024 The Apache Software Foundation - -This product includes software developed at -The Apache Software Foundation (http://www.apache.org/). - --------------------------------------------------------------------------------- - -This binary artifact includes Apache ORC with the following in its NOTICE file: - -| Apache ORC -| Copyright 2013-2019 The Apache Software Foundation -| -| This product includes software developed by The Apache Software -| Foundation (http://www.apache.org/). -| -| This product includes software developed by Hewlett-Packard: -| (c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P - --------------------------------------------------------------------------------- - -This binary artifact includes Airlift Aircompressor with the following in its -NOTICE file: - -| Snappy Copyright Notices -| ========================= -| -| * Copyright 2011 Dain Sundstrom -| * Copyright 2011, Google Inc. -| -| -| Snappy License -| =============== -| Copyright 2011, Google Inc. -| All rights reserved. -| -| Redistribution and use in source and binary forms, with or without -| modification, are permitted provided that the following conditions are -| met: -| -| * Redistributions of source code must retain the above copyright -| notice, this list of conditions and the following disclaimer. -| * Redistributions in binary form must reproduce the above -| copyright notice, this list of conditions and the following disclaimer -| in the documentation and/or other materials provided with the -| distribution. -| * Neither the name of Google Inc. nor the names of its -| contributors may be used to endorse or promote products derived from -| this software without specific prior written permission. -| -| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -| OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - --------------------------------------------------------------------------------- - -This binary artifact includes Apache Yetus with the following in its NOTICE -file: - -| Apache Yetus -| Copyright 2008-2020 The Apache Software Foundation -| -| This product includes software developed at -| The Apache Software Foundation (https://www.apache.org/). -| -| --- -| Additional licenses for the Apache Yetus Source/Website: -| --- -| -| -| See LICENSE for terms. - --------------------------------------------------------------------------------- - -This binary artifact includes Project Nessie with the following in its NOTICE -file: - -| Dremio -| Copyright 2015-2017 Dremio Corporation -| -| This product includes software developed at -| The Apache Software Foundation (http://www.apache.org/). - diff --git a/hive-runtime/build.gradle b/hive-runtime/build.gradle deleted file mode 100644 index a107afcb3777..000000000000 --- a/hive-runtime/build.gradle +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -def hiveVersions = (System.getProperty("hiveVersions") != null ? System.getProperty("hiveVersions") : System.getProperty("defaultHiveVersions")).split(",") - -project(':iceberg-hive-runtime') { - apply plugin: 'com.gradleup.shadow' - - tasks.jar.dependsOn tasks.shadowJar - - configurations { - implementation { - exclude group: 'com.github.stephenc.findbugs' - exclude group: 'commons-pool' - exclude group: 'javax.annotation' - exclude group: 'javax.xml.bind' - exclude group: 'org.apache.commons' - exclude group: 'org.slf4j' - exclude group: 'org.xerial.snappy' - } - } - - dependencies { - implementation project(':iceberg-mr') - if (hiveVersions.contains("3")) { - implementation project(':iceberg-hive3') - } - implementation(project(':iceberg-nessie')) { - exclude group: 'com.google.code.findbugs', module: 'jsr305' - } - implementation project(':iceberg-aws') - implementation project(':iceberg-azure') - implementation(project(':iceberg-aliyun')) { - exclude group: 'edu.umd.cs.findbugs', module: 'findbugs' - exclude group: 'org.apache.httpcomponents', module: 'httpclient' - exclude group: 'commons-logging', module: 'commons-logging' - } - implementation project(':iceberg-gcp') - } - - shadowJar { - configurations = [project.configurations.runtimeClasspath] - - zip64 true - - // include the LICENSE and NOTICE files for the shaded Jar - from(projectDir) { - include 'LICENSE' - include 'NOTICE' - } - - // Relocate dependencies to avoid conflicts - relocate 'org.apache.avro', 'org.apache.iceberg.shaded.org.apache.avro' - relocate 'org.apache.parquet', 'org.apache.iceberg.shaded.org.apache.parquet' - relocate 'com.google.errorprone', 'org.apache.iceberg.shaded.com.google.errorprone' - relocate 'com.google.flatbuffers', 'org.apache.iceberg.shaded.com.google.flatbuffers' - relocate 'com.fasterxml', 'org.apache.iceberg.shaded.com.fasterxml' - relocate 'com.github.benmanes', 'org.apache.iceberg.shaded.com.github.benmanes' - relocate 'org.checkerframework', 'org.apache.iceberg.shaded.org.checkerframework' - relocate 'shaded.parquet', 'org.apache.iceberg.shaded.org.apache.parquet.shaded' - relocate 'org.apache.orc', 'org.apache.iceberg.shaded.org.apache.orc' - relocate 'io.airlift', 'org.apache.iceberg.shaded.io.airlift' - relocate 'org.threeten.extra', 'org.apache.iceberg.shaded.org.threeten.extra' - relocate 'org.apache.hc.client5', 'org.apache.iceberg.shaded.org.apache.hc.client5' - relocate 'org.apache.hc.core5', 'org.apache.iceberg.shaded.org.apache.hc.core5' - // relocate OrcSplit in order to avoid the conflict from Hive's OrcSplit - relocate 'org.apache.hadoop.hive.ql.io.orc.OrcSplit', 'org.apache.iceberg.shaded.org.apache.hadoop.hive.ql.io.orc.OrcSplit' - - archiveClassifier.set(null) - } - - jar { - enabled = false - } -} - diff --git a/hive3-orc-bundle/build.gradle b/hive3-orc-bundle/build.gradle deleted file mode 100644 index 4e1e9c5dd222..000000000000 --- a/hive3-orc-bundle/build.gradle +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -// The purpose of this module is to re-shade org.apache.orc.storage to the original org.apache.hadoop.hive package -// name. This is to be used by Hive3 for features including e.g. vectorization. -project(':iceberg-hive3-orc-bundle') { - - apply plugin: 'com.gradleup.shadow' - - tasks.jar.dependsOn tasks.shadowJar - - configurations { - implementation { - exclude group: 'com.github.luben' - } - } - - dependencies { - implementation project(':iceberg-data') - implementation project(':iceberg-orc') - - testCompileOnly project(path: ':iceberg-data', configuration: 'testArtifacts') - testCompileOnly project(path: ':iceberg-orc', configuration: 'testArtifacts') - } - - shadowJar { - configurations = [project.configurations.compileClasspath, project.configurations.runtimeClasspath] - - zip64 true - - // include the LICENSE and NOTICE files for the shaded Jar - from(projectDir) { - include 'LICENSE' - include 'NOTICE' - } - - dependencies { - exclude(dependency('org.slf4j:slf4j-api')) - } - - // Relocate dependencies to avoid conflicts - relocate 'org.apache.orc.storage', 'org.apache.hadoop.hive' - - // We really only need Iceberg and Orc classes, but with relocated references for storage-api classes (see above) - // Unfortunately the include list feature of this shader plugin doesn't work as expected - dependencies { - exclude 'com/**/*' - exclude 'edu/**/*' - exclude 'io/**' - exclude 'javax/**' - exclude 'org/apache/avro/**/*' - exclude 'org/apache/commons/**/*' - exclude 'org/checkerframework/**/*' - exclude 'org/codehaus/**/*' - exclude 'org/intellij/**/*' - exclude 'org/jetbrains/**/*' - exclude 'org/slf4j/**/*' - exclude 'org/threeten/**/*' - exclude 'org/xerial/**/*' - exclude 'org/apache/parquet/**/*' - exclude 'org/apache/yetus/**/*' - exclude 'shaded/parquet/**/*' - } - - archiveClassifier.set(null) - } - - jar { - enabled = false - } - -} - diff --git a/hive3/build.gradle b/hive3/build.gradle deleted file mode 100644 index f8bb50951fb1..000000000000 --- a/hive3/build.gradle +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -project(':iceberg-hive3') { - - // run the tests in iceberg-mr with Hive3 dependencies - sourceSets { - test { - java.srcDirs = ['../mr/src/test/java', 'src/test/java'] - resources.srcDirs = ['../mr/src/test/resources', 'src/test/resources'] - } - } - - // exclude these Hive2-specific tests from iceberg-mr - test { - exclude '**/TestIcebergDateObjectInspector.class' - exclude '**/TestIcebergTimestampObjectInspector.class' - exclude '**/TestIcebergTimestampWithZoneObjectInspector.class' - } - - dependencies { - compileOnly project(path: ':iceberg-bundled-guava', configuration: 'shadow') - compileOnly project(':iceberg-api') - compileOnly project(':iceberg-core') - compileOnly project(':iceberg-common') - compileOnly project(':iceberg-hive-metastore') - compileOnly project(':iceberg-parquet') - compileOnly project(':iceberg-hive3-orc-bundle') - compileOnly project(':iceberg-mr') - compileOnly project(':iceberg-data') - compileOnly project(':iceberg-orc') - - compileOnly(libs.hadoop3.client) { - exclude group: 'org.apache.avro', module: 'avro' - } - - compileOnly("${libs.hive3.exec.get().module}:${libs.hive3.exec.get().getVersion()}:core") { - exclude group: 'com.google.code.findbugs', module: 'jsr305' - exclude group: 'com.google.guava' - exclude group: 'com.google.protobuf', module: 'protobuf-java' - exclude group: 'org.apache.avro', module: 'avro' - exclude group: 'org.apache.calcite.avatica' - exclude group: 'org.apache.hive', module: 'hive-llap-tez' - exclude group: 'org.apache.logging.log4j' - exclude group: 'org.pentaho' // missing dependency - exclude group: 'org.slf4j', module: 'slf4j-log4j12' - } - - compileOnly("${libs.orc.core.get().module}:${libs.versions.orc.get()}:nohive") { - exclude group: 'org.apache.hadoop' - exclude group: 'commons-lang' - // These artifacts are shaded and included in the orc-core fat jar - exclude group: 'com.google.protobuf', module: 'protobuf-java' - exclude group: 'org.apache.hive', module: 'hive-storage-api' - } - - compileOnly(libs.hive3.metastore) { - exclude group: 'org.apache.orc' - exclude group: 'org.apache.parquet' - } - compileOnly(libs.hive3.serde) { - exclude group: 'org.apache.orc' - exclude group: 'org.apache.parquet' - } - - compileOnly(libs.parquet.avro) { - exclude group: 'org.apache.avro', module: 'avro' - // already shaded by Parquet - exclude group: 'it.unimi.dsi' - exclude group: 'org.codehaus.jackson' - } - - testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts') - testImplementation project(path: ':iceberg-core', configuration: 'testArtifacts') - testImplementation project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts') - testImplementation project(path: ':iceberg-data', configuration: 'testArtifacts') - - testImplementation libs.avro.avro - testImplementation libs.calcite.core - testImplementation libs.kryo.shaded - testImplementation platform(libs.jackson.bom) - testImplementation(libs.hive3.service) { - exclude group: 'org.apache.hive', module: 'hive-exec' - exclude group: 'org.apache.orc' - } - testImplementation libs.tez010.dag - testImplementation libs.tez010.mapreduce - } - - test { - // testJoinTables / testScanTable - maxHeapSize '2500m' - } -} diff --git a/hive3/src/main/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java b/hive3/src/main/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java deleted file mode 100644 index 4031bfaa20f6..000000000000 --- a/hive3/src/main/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java +++ /dev/null @@ -1,311 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.hadoop.hive.ql.io.orc; - -import java.io.ByteArrayOutputStream; -import java.io.DataInput; -import java.io.DataOutput; -import java.io.DataOutputStream; -import java.io.IOException; -import java.util.List; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.ql.io.AcidInputFormat; -import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.ql.io.ColumnarSplit; -import org.apache.hadoop.hive.ql.io.LlapAwareSplit; -import org.apache.hadoop.hive.ql.io.SyntheticFileId; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableUtils; -import org.apache.hadoop.mapred.FileSplit; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.orc.OrcProto; -import org.apache.orc.impl.OrcTail; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * In order to fix some compatibility issues with ORC support with Hive 3.x and the shaded ORC - * libraries, this class has been copied from Hive 3.x source code. However, this class should be - * removed once Hive 4 is out. - */ -public class OrcSplit extends FileSplit implements ColumnarSplit, LlapAwareSplit { - private static final Logger LOG = LoggerFactory.getLogger(OrcSplit.class); - private OrcTail orcTail; - private boolean hasFooter; - - /** This means {@link AcidUtils.AcidBaseFileType#ORIGINAL_BASE} */ - private boolean isOriginal; - - private boolean hasBase; - // partition root - private Path rootDir; - private final List deltas = Lists.newArrayList(); - private long projColsUncompressedSize; - private transient Object fileKey; - private long fileLen; - - static final int HAS_SYNTHETIC_FILEID_FLAG = 16; - static final int HAS_LONG_FILEID_FLAG = 8; - static final int BASE_FLAG = 4; - static final int ORIGINAL_FLAG = 2; - static final int FOOTER_FLAG = 1; - - protected OrcSplit() { - // The FileSplit() constructor in hadoop 0.20 and 1.x is package private so can't use it. - // This constructor is used to create the object and then call readFields() - // so just pass nulls to this super constructor. - super(null, 0, 0, (String[]) null); - } - - public OrcSplit( - Path path, - Object fileId, - long offset, - long length, - String[] hosts, - OrcTail orcTail, - boolean isOriginal, - boolean hasBase, - List deltas, - long projectedDataSize, - long fileLen, - Path rootDir) { - super(path, offset, length, hosts); - // For HDFS, we could avoid serializing file ID and just replace the path with inode-based - // path. However, that breaks bunch of stuff because Hive later looks up things by split path. - this.fileKey = fileId; - this.orcTail = orcTail; - hasFooter = this.orcTail != null; - this.isOriginal = isOriginal; - this.hasBase = hasBase; - this.rootDir = rootDir; - this.deltas.addAll(deltas); - this.projColsUncompressedSize = projectedDataSize <= 0 ? length : projectedDataSize; - // setting file length to Long.MAX_VALUE will let orc reader read file length from file system - this.fileLen = fileLen <= 0 ? Long.MAX_VALUE : fileLen; - } - - @Override - public void write(DataOutput out) throws IOException { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - DataOutputStream dos = new DataOutputStream(bos); - // serialize path, offset, length using FileSplit - super.write(dos); - int required = bos.size(); - - // write addition payload required for orc - writeAdditionalPayload(dos); - int additional = bos.size() - required; - - out.write(bos.toByteArray()); - if (LOG.isTraceEnabled()) { - LOG.trace( - "Writing additional {} bytes to OrcSplit as payload. Required {} bytes.", - additional, - required); - } - } - - private void writeAdditionalPayload(final DataOutputStream out) throws IOException { - boolean isFileIdLong = fileKey instanceof Long; - boolean isFileIdWritable = fileKey instanceof Writable; - int flags = - (hasBase ? BASE_FLAG : 0) - | (isOriginal ? ORIGINAL_FLAG : 0) - | (hasFooter ? FOOTER_FLAG : 0) - | (isFileIdLong ? HAS_LONG_FILEID_FLAG : 0) - | (isFileIdWritable ? HAS_SYNTHETIC_FILEID_FLAG : 0); - out.writeByte(flags); - out.writeInt(deltas.size()); - for (AcidInputFormat.DeltaMetaData delta : deltas) { - delta.write(out); - } - if (hasFooter) { - OrcProto.FileTail fileTail = orcTail.getMinimalFileTail(); - byte[] tailBuffer = fileTail.toByteArray(); - int tailLen = tailBuffer.length; - WritableUtils.writeVInt(out, tailLen); - out.write(tailBuffer); - } - if (isFileIdLong) { - out.writeLong(((Long) fileKey).longValue()); - } else if (isFileIdWritable) { - ((Writable) fileKey).write(out); - } - out.writeLong(fileLen); - out.writeUTF(rootDir.toString()); - } - - @Override - public void readFields(DataInput in) throws IOException { - // deserialize path, offset, length using FileSplit - super.readFields(in); - - byte flags = in.readByte(); - hasFooter = (FOOTER_FLAG & flags) != 0; - isOriginal = (ORIGINAL_FLAG & flags) != 0; - hasBase = (BASE_FLAG & flags) != 0; - boolean hasLongFileId = (HAS_LONG_FILEID_FLAG & flags) != 0; - boolean hasWritableFileId = (HAS_SYNTHETIC_FILEID_FLAG & flags) != 0; - if (hasLongFileId && hasWritableFileId) { - throw new IOException("Invalid split - both file ID types present"); - } - - deltas.clear(); - int numDeltas = in.readInt(); - for (int i = 0; i < numDeltas; i++) { - AcidInputFormat.DeltaMetaData dmd = new AcidInputFormat.DeltaMetaData(); - dmd.readFields(in); - deltas.add(dmd); - } - if (hasFooter) { - int tailLen = WritableUtils.readVInt(in); - byte[] tailBuffer = new byte[tailLen]; - in.readFully(tailBuffer); - OrcProto.FileTail fileTail = OrcProto.FileTail.parseFrom(tailBuffer); - orcTail = new OrcTail(fileTail, null); - } - if (hasLongFileId) { - fileKey = in.readLong(); - } else if (hasWritableFileId) { - SyntheticFileId fileId = new SyntheticFileId(); - fileId.readFields(in); - this.fileKey = fileId; - } - fileLen = in.readLong(); - rootDir = new Path(in.readUTF()); - } - - public OrcTail getOrcTail() { - return orcTail; - } - - public boolean hasFooter() { - return hasFooter; - } - - /** - * Returns {@code true} if file schema doesn't have Acid metadata columns Such file may be in a - * delta_x_y/ or base_x due to being added via "load data" command. It could be at partition|table - * root due to table having been converted from non-acid to acid table. It could even be something - * like "warehouse/t/HIVE_UNION_SUBDIR_15/000000_0" if it was written by an "insert into t select - * ... from A union all select ... from B" - * - * @return {@code true} if file schema doesn't have Acid metadata columns - */ - public boolean isOriginal() { - return isOriginal; - } - - public boolean hasBase() { - return hasBase; - } - - public Path getRootDir() { - return rootDir; - } - - public List getDeltas() { - return deltas; - } - - public long getFileLength() { - return fileLen; - } - - /** - * If this method returns true, then for sure it is ACID. However, if it returns false.. it could - * be ACID or non-ACID. - * - * @return true if is ACID - */ - public boolean isAcid() { - return hasBase || !deltas.isEmpty(); - } - - public long getProjectedColumnsUncompressedSize() { - return projColsUncompressedSize; - } - - public Object getFileKey() { - return fileKey; - } - - @Override - public long getColumnarProjectionSize() { - return projColsUncompressedSize; - } - - @Override - public boolean canUseLlapIo(Configuration conf) { - final boolean hasDelta = deltas != null && !deltas.isEmpty(); - final boolean isAcidRead = AcidUtils.isFullAcidScan(conf); - final boolean isVectorized = HiveConf.getBoolVar(conf, ConfVars.HIVE_VECTORIZATION_ENABLED); - Boolean isSplitUpdate = null; - if (isAcidRead) { - final AcidUtils.AcidOperationalProperties acidOperationalProperties = - AcidUtils.getAcidOperationalProperties(conf); - isSplitUpdate = acidOperationalProperties.isSplitUpdate(); - } - - if (isOriginal) { - if (!isAcidRead && !hasDelta) { - // Original scan only - return true; - } - } else { - boolean isAcidEnabled = HiveConf.getBoolVar(conf, ConfVars.LLAP_IO_ACID_ENABLED); - if (isAcidEnabled && isAcidRead && hasBase && isVectorized) { - if (hasDelta) { - if (isSplitUpdate) { // Base with delete deltas - return true; - } - } else { - // Base scan only - return true; - } - } - } - return false; - } - - @Override - public String toString() { - return "OrcSplit [" - + getPath() - + ", start=" - + getStart() - + ", length=" - + getLength() - + ", isOriginal=" - + isOriginal - + ", fileLength=" - + fileLen - + ", hasFooter=" - + hasFooter - + ", hasBase=" - + hasBase - + ", deltas=" - + (deltas == null ? 0 : deltas.size()) - + "]"; - } -} diff --git a/hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java b/hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java deleted file mode 100644 index 08bb03282bac..000000000000 --- a/hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergDateObjectInspectorHive3.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.mr.hive.serde.objectinspector; - -import java.time.LocalDate; -import org.apache.hadoop.hive.common.type.Date; -import org.apache.hadoop.hive.serde2.io.DateWritableV2; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.iceberg.util.DateTimeUtil; - -public final class IcebergDateObjectInspectorHive3 extends AbstractPrimitiveJavaObjectInspector - implements DateObjectInspector, WriteObjectInspector { - - private static final IcebergDateObjectInspectorHive3 INSTANCE = - new IcebergDateObjectInspectorHive3(); - - public static IcebergDateObjectInspectorHive3 get() { - return INSTANCE; - } - - private IcebergDateObjectInspectorHive3() { - super(TypeInfoFactory.dateTypeInfo); - } - - @Override - public Date getPrimitiveJavaObject(Object o) { - if (o == null) { - return null; - } - LocalDate date = (LocalDate) o; - return Date.ofEpochDay(DateTimeUtil.daysFromDate(date)); - } - - @Override - public DateWritableV2 getPrimitiveWritableObject(Object o) { - return o == null ? null : new DateWritableV2(DateTimeUtil.daysFromDate((LocalDate) o)); - } - - @Override - public Object copyObject(Object o) { - if (o == null) { - return null; - } - - if (o instanceof Date) { - return new Date((Date) o); - } else if (o instanceof LocalDate) { - return LocalDate.of( - ((LocalDate) o).getYear(), ((LocalDate) o).getMonth(), ((LocalDate) o).getDayOfMonth()); - } else { - return o; - } - } - - @Override - public LocalDate convert(Object o) { - if (o == null) { - return null; - } - - Date date = (Date) o; - return LocalDate.of(date.getYear(), date.getMonth(), date.getDay()); - } -} diff --git a/hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspectorHive3.java b/hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspectorHive3.java deleted file mode 100644 index 3db2940f3cac..000000000000 --- a/hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampObjectInspectorHive3.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.mr.hive.serde.objectinspector; - -import java.time.LocalDateTime; -import java.time.ZoneOffset; -import org.apache.hadoop.hive.common.type.Timestamp; -import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; - -public class IcebergTimestampObjectInspectorHive3 extends AbstractPrimitiveJavaObjectInspector - implements TimestampObjectInspector, WriteObjectInspector { - - private static final IcebergTimestampObjectInspectorHive3 INSTANCE = - new IcebergTimestampObjectInspectorHive3(); - - public static IcebergTimestampObjectInspectorHive3 get() { - return INSTANCE; - } - - private IcebergTimestampObjectInspectorHive3() { - super(TypeInfoFactory.timestampTypeInfo); - } - - @Override - public LocalDateTime convert(Object o) { - if (o == null) { - return null; - } - Timestamp timestamp = (Timestamp) o; - return LocalDateTime.ofEpochSecond( - timestamp.toEpochSecond(), timestamp.getNanos(), ZoneOffset.UTC); - } - - @Override - @SuppressWarnings("JavaLocalDateTimeGetNano") - public Timestamp getPrimitiveJavaObject(Object o) { - if (o == null) { - return null; - } - LocalDateTime time = (LocalDateTime) o; - Timestamp timestamp = Timestamp.ofEpochMilli(time.toInstant(ZoneOffset.UTC).toEpochMilli()); - timestamp.setNanos(time.getNano()); - return timestamp; - } - - @Override - public TimestampWritableV2 getPrimitiveWritableObject(Object o) { - Timestamp ts = getPrimitiveJavaObject(o); - return ts == null ? null : new TimestampWritableV2(ts); - } - - @Override - public Object copyObject(Object o) { - if (o == null) { - return null; - } - - if (o instanceof Timestamp) { - Timestamp ts = (Timestamp) o; - Timestamp copy = new Timestamp(ts); - copy.setNanos(ts.getNanos()); - return copy; - } else if (o instanceof LocalDateTime) { - return LocalDateTime.of(((LocalDateTime) o).toLocalDate(), ((LocalDateTime) o).toLocalTime()); - } else { - return o; - } - } -} diff --git a/hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampWithZoneObjectInspectorHive3.java b/hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampWithZoneObjectInspectorHive3.java deleted file mode 100644 index c3c81f8d7a74..000000000000 --- a/hive3/src/main/java/org/apache/iceberg/mr/hive/serde/objectinspector/IcebergTimestampWithZoneObjectInspectorHive3.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.mr.hive.serde.objectinspector; - -import java.time.OffsetDateTime; -import java.time.ZoneOffset; -import java.time.ZonedDateTime; -import org.apache.hadoop.hive.common.type.TimestampTZ; -import org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampLocalTZObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; - -public class IcebergTimestampWithZoneObjectInspectorHive3 - extends AbstractPrimitiveJavaObjectInspector - implements TimestampLocalTZObjectInspector, WriteObjectInspector { - - private static final IcebergTimestampWithZoneObjectInspectorHive3 INSTANCE = - new IcebergTimestampWithZoneObjectInspectorHive3(); - - public static IcebergTimestampWithZoneObjectInspectorHive3 get() { - return INSTANCE; - } - - private IcebergTimestampWithZoneObjectInspectorHive3() { - super(TypeInfoFactory.timestampLocalTZTypeInfo); - } - - @Override - public OffsetDateTime convert(Object o) { - if (o == null) { - return null; - } - ZonedDateTime zdt = ((TimestampTZ) o).getZonedDateTime(); - return OffsetDateTime.of(zdt.toLocalDateTime(), zdt.getOffset()); - } - - @Override - public TimestampTZ getPrimitiveJavaObject(Object o) { - if (o == null) { - return null; - } - OffsetDateTime odt = (OffsetDateTime) o; - ZonedDateTime zdt = odt.atZoneSameInstant(ZoneOffset.UTC); - return new TimestampTZ(zdt); - } - - @Override - public TimestampLocalTZWritable getPrimitiveWritableObject(Object o) { - TimestampTZ tsTz = getPrimitiveJavaObject(o); - return tsTz == null ? null : new TimestampLocalTZWritable(tsTz); - } - - @Override - public Object copyObject(Object o) { - if (o instanceof TimestampTZ) { - TimestampTZ ts = (TimestampTZ) o; - return new TimestampTZ(ts.getZonedDateTime()); - } else if (o instanceof OffsetDateTime) { - OffsetDateTime odt = (OffsetDateTime) o; - return OffsetDateTime.of(odt.toLocalDateTime(), odt.getOffset()); - } else { - return o; - } - } -} diff --git a/hive3/src/main/java/org/apache/iceberg/mr/hive/vector/CompatibilityHiveVectorUtils.java b/hive3/src/main/java/org/apache/iceberg/mr/hive/vector/CompatibilityHiveVectorUtils.java deleted file mode 100644 index 619e858daac9..000000000000 --- a/hive3/src/main/java/org/apache/iceberg/mr/hive/vector/CompatibilityHiveVectorUtils.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.mr.hive.vector; - -import java.nio.charset.StandardCharsets; -import java.sql.Date; -import java.sql.Timestamp; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; -import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.exec.tez.DagUtils; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; -import org.apache.hadoop.hive.ql.plan.BaseWork; -import org.apache.hadoop.hive.ql.plan.MapWork; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.mapred.JobConf; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Contains ported code snippets from later Hive sources. We should get rid of this class as soon as - * Hive 4 is released and Iceberg makes a dependency to that version. - */ -public class CompatibilityHiveVectorUtils { - - private static final Logger LOG = LoggerFactory.getLogger(CompatibilityHiveVectorUtils.class); - - private CompatibilityHiveVectorUtils() {} - - /** - * Returns serialized mapwork instance from a job conf - ported from Hive source code - * LlapHiveUtils#findMapWork - * - * @param job JobConf instance - * @return a serialized {@link MapWork} based on the given job conf - */ - public static MapWork findMapWork(JobConf job) { - String inputName = job.get(Utilities.INPUT_NAME, null); - if (LOG.isDebugEnabled()) { - LOG.debug("Initializing for input {}", inputName); - } - String prefixes = job.get(DagUtils.TEZ_MERGE_WORK_FILE_PREFIXES); - if (prefixes != null && !prefixes.trim().isEmpty()) { - // Currently SMB is broken, so we cannot check if it's compatible with IO elevator. - // So, we don't use the below code that would get the correct MapWork. See HIVE-16985. - return null; - } - - BaseWork work = null; - // HIVE-16985: try to find the fake merge work for SMB join, that is really another MapWork. - if (inputName != null) { - if (prefixes == null || !Lists.newArrayList(prefixes.split(",")).contains(inputName)) { - inputName = null; - } - } - if (inputName != null) { - work = Utilities.getMergeWork(job, inputName); - } - - if (!(work instanceof MapWork)) { - work = Utilities.getMapWork(job); - } - return (MapWork) work; - } - - /** - * Ported from Hive source code VectorizedRowBatchCtx#addPartitionColsToBatch - * - * @param col ColumnVector to write the partition value into - * @param value partition value - * @param partitionColumnName partition key - * @param rowColumnTypeInfo column type description - */ - // @SuppressWarnings({"AvoidNestedBlocks", "FallThrough", "MethodLength", "CyclomaticComplexity", - // "Indentation"}) - public static void addPartitionColsToBatch( - ColumnVector col, Object value, String partitionColumnName, TypeInfo rowColumnTypeInfo) { - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) rowColumnTypeInfo; - - if (value == null) { - col.noNulls = false; - col.isNull[0] = true; - col.isRepeating = true; - return; - } - - switch (primitiveTypeInfo.getPrimitiveCategory()) { - case BOOLEAN: - LongColumnVector booleanColumnVector = (LongColumnVector) col; - booleanColumnVector.fill((Boolean) value ? 1 : 0); - booleanColumnVector.isNull[0] = false; - break; - - case BYTE: - LongColumnVector byteColumnVector = (LongColumnVector) col; - byteColumnVector.fill((Byte) value); - byteColumnVector.isNull[0] = false; - break; - - case SHORT: - LongColumnVector shortColumnVector = (LongColumnVector) col; - shortColumnVector.fill((Short) value); - shortColumnVector.isNull[0] = false; - break; - - case INT: - LongColumnVector intColumnVector = (LongColumnVector) col; - intColumnVector.fill((Integer) value); - intColumnVector.isNull[0] = false; - break; - - case LONG: - LongColumnVector longColumnVector = (LongColumnVector) col; - longColumnVector.fill((Long) value); - longColumnVector.isNull[0] = false; - break; - - case DATE: - LongColumnVector dateColumnVector = (LongColumnVector) col; - dateColumnVector.fill(DateWritable.dateToDays((Date) value)); - dateColumnVector.isNull[0] = false; - break; - - case TIMESTAMP: - TimestampColumnVector timeStampColumnVector = (TimestampColumnVector) col; - timeStampColumnVector.fill((Timestamp) value); - timeStampColumnVector.isNull[0] = false; - break; - - case INTERVAL_YEAR_MONTH: - LongColumnVector intervalYearMonthColumnVector = (LongColumnVector) col; - intervalYearMonthColumnVector.fill(((HiveIntervalYearMonth) value).getTotalMonths()); - intervalYearMonthColumnVector.isNull[0] = false; - break; - - case INTERVAL_DAY_TIME: - IntervalDayTimeColumnVector intervalDayTimeColumnVector = (IntervalDayTimeColumnVector) col; - intervalDayTimeColumnVector.fill((HiveIntervalDayTime) value); - intervalDayTimeColumnVector.isNull[0] = false; - break; - - case FLOAT: - DoubleColumnVector floatColumnVector = (DoubleColumnVector) col; - floatColumnVector.fill((Float) value); - floatColumnVector.isNull[0] = false; - break; - - case DOUBLE: - DoubleColumnVector doubleColumnVector = (DoubleColumnVector) col; - doubleColumnVector.fill((Double) value); - doubleColumnVector.isNull[0] = false; - break; - - case DECIMAL: - DecimalColumnVector decimalColumnVector = (DecimalColumnVector) col; - HiveDecimal hd = (HiveDecimal) value; - decimalColumnVector.set(0, hd); - decimalColumnVector.isRepeating = true; - decimalColumnVector.isNull[0] = false; - break; - - case BINARY: - BytesColumnVector binaryColumnVector = (BytesColumnVector) col; - byte[] bytes = (byte[]) value; - binaryColumnVector.fill(bytes); - binaryColumnVector.isNull[0] = false; - break; - - case STRING: - case CHAR: - case VARCHAR: - BytesColumnVector bytesColumnVector = (BytesColumnVector) col; - String sVal = value.toString(); - if (sVal == null) { - bytesColumnVector.noNulls = false; - bytesColumnVector.isNull[0] = true; - bytesColumnVector.isRepeating = true; - } else { - bytesColumnVector.setVal(0, sVal.getBytes(StandardCharsets.UTF_8)); - bytesColumnVector.isRepeating = true; - } - break; - - default: - throw new RuntimeException( - "Unable to recognize the partition type " - + primitiveTypeInfo.getPrimitiveCategory() - + " for column " - + partitionColumnName); - } - } -} diff --git a/hive3/src/main/java/org/apache/iceberg/mr/hive/vector/HiveIcebergVectorizedRecordReader.java b/hive3/src/main/java/org/apache/iceberg/mr/hive/vector/HiveIcebergVectorizedRecordReader.java deleted file mode 100644 index 70c43cb21f91..000000000000 --- a/hive3/src/main/java/org/apache/iceberg/mr/hive/vector/HiveIcebergVectorizedRecordReader.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.mr.hive.vector; - -import java.io.IOException; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.Reporter; -import org.apache.iceberg.mr.mapred.AbstractMapredIcebergRecordReader; -import org.apache.iceberg.mr.mapreduce.IcebergSplit; - -/** - * Basically an MR1 API implementing wrapper for transferring VectorizedRowBatch's produced by - * IcebergInputformat$IcebergRecordReader which relies on the MR2 API format. - */ -public final class HiveIcebergVectorizedRecordReader - extends AbstractMapredIcebergRecordReader { - - private final JobConf job; - - public HiveIcebergVectorizedRecordReader( - org.apache.iceberg.mr.mapreduce.IcebergInputFormat mapreduceInputFormat, - IcebergSplit split, - JobConf job, - Reporter reporter) - throws IOException { - super(mapreduceInputFormat, split, job, reporter); - this.job = job; - } - - @Override - public boolean next(Void key, VectorizedRowBatch value) throws IOException { - try { - if (innerReader.nextKeyValue()) { - VectorizedRowBatch newBatch = (VectorizedRowBatch) innerReader.getCurrentValue(); - value.cols = newBatch.cols; - value.endOfFile = newBatch.endOfFile; - value.selectedInUse = newBatch.selectedInUse; - value.size = newBatch.size; - return true; - } else { - return false; - } - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - throw new RuntimeException(ie); - } - } - - @Override - public VectorizedRowBatch createValue() { - return CompatibilityHiveVectorUtils.findMapWork(job) - .getVectorizedRowBatchCtx() - .createVectorizedRowBatch(); - } - - @Override - public long getPos() { - return -1; - } -} diff --git a/hive3/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java b/hive3/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java deleted file mode 100644 index 0817060b811b..000000000000 --- a/hive3/src/main/java/org/apache/iceberg/mr/hive/vector/HiveVectorizedReader.java +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.mr.hive.vector; - -import java.io.IOException; -import java.util.List; -import java.util.Map; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.io.IOConstants; -import org.apache.hadoop.hive.ql.io.orc.OrcSplit; -import org.apache.hadoop.hive.ql.io.orc.VectorizedOrcInputFormat; -import org.apache.hadoop.hive.ql.io.parquet.VectorizedParquetInputFormat; -import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.mapred.FileSplit; -import org.apache.hadoop.mapred.InputSplit; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.RecordReader; -import org.apache.hadoop.mapred.Reporter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.iceberg.FileFormat; -import org.apache.iceberg.FileScanTask; -import org.apache.iceberg.PartitionField; -import org.apache.iceberg.PartitionSpec; -import org.apache.iceberg.Schema; -import org.apache.iceberg.io.CloseableIterable; -import org.apache.iceberg.io.CloseableIterator; -import org.apache.iceberg.io.InputFile; -import org.apache.iceberg.mr.mapred.MapredIcebergInputFormat; -import org.apache.iceberg.orc.VectorizedReadUtils; -import org.apache.iceberg.parquet.ParquetSchemaUtil; -import org.apache.iceberg.parquet.TypeWithSchemaVisitor; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.orc.impl.OrcTail; -import org.apache.parquet.hadoop.ParquetFileReader; -import org.apache.parquet.schema.MessageType; - -/** - * Utility class to create vectorized readers for Hive. As per the file format of the task, it will - * create a matching vectorized record reader that is already implemented in Hive. It will also do - * some tweaks on the produced vectors for Iceberg's use e.g. partition column handling. - */ -public class HiveVectorizedReader { - - private HiveVectorizedReader() {} - - public static CloseableIterable reader( - InputFile inputFile, - FileScanTask task, - Map idToConstant, - TaskAttemptContext context) { - JobConf job = (JobConf) context.getConfiguration(); - Path path = new Path(inputFile.location()); - FileFormat format = task.file().format(); - Reporter reporter = - ((MapredIcebergInputFormat.CompatibilityTaskAttemptContextImpl) context) - .getLegacyReporter(); - - // Hive by default requires partition columns to be read too. This is not required for identity - // partition - // columns, as we will add this as constants later. - - int[] partitionColIndices = null; - Object[] partitionValues = null; - PartitionSpec partitionSpec = task.spec(); - - if (!partitionSpec.isUnpartitioned()) { - List readColumnIds = ColumnProjectionUtils.getReadColumnIDs(job); - - List fields = partitionSpec.fields(); - List partitionColIndicesList = Lists.newLinkedList(); - List partitionValuesList = Lists.newLinkedList(); - - for (PartitionField field : fields) { - if (field.transform().isIdentity()) { - // Skip reading identity partition columns from source file... - int hiveColIndex = field.sourceId() - 1; - readColumnIds.remove((Integer) hiveColIndex); - - // ...and use the corresponding constant value instead - partitionColIndicesList.add(hiveColIndex); - partitionValuesList.add(idToConstant.get(field.sourceId())); - } - } - - partitionColIndices = partitionColIndicesList.stream().mapToInt(Integer::intValue).toArray(); - partitionValues = partitionValuesList.toArray(new Object[0]); - - ColumnProjectionUtils.setReadColumns(job, readColumnIds); - } - - try { - - long start = task.start(); - long length = task.length(); - - RecordReader recordReader = null; - - switch (format) { - case ORC: - recordReader = orcRecordReader(job, reporter, task, inputFile, path, start, length); - break; - case PARQUET: - recordReader = parquetRecordReader(job, reporter, task, path, start, length); - break; - - default: - throw new UnsupportedOperationException( - "Vectorized Hive reading unimplemented for format: " + format); - } - - return createVectorizedRowBatchIterable( - recordReader, job, partitionColIndices, partitionValues); - - } catch (IOException ioe) { - throw new RuntimeException("Error creating vectorized record reader for " + inputFile, ioe); - } - } - - private static RecordReader orcRecordReader( - JobConf job, - Reporter reporter, - FileScanTask task, - InputFile inputFile, - Path path, - long start, - long length) - throws IOException { - // Metadata information has to be passed along in the OrcSplit. Without specifying this, the - // vectorized - // reader will assume that the ORC file ends at the task's start + length, and might fail - // reading the tail.. - OrcTail orcTail = VectorizedReadUtils.getOrcTail(inputFile, job); - - InputSplit split = - new OrcSplit( - path, - null, - start, - length, - (String[]) null, - orcTail, - false, - false, - Lists.newArrayList(), - 0, - task.length(), - path.getParent()); - return new VectorizedOrcInputFormat().getRecordReader(split, job, reporter); - } - - private static RecordReader parquetRecordReader( - JobConf job, Reporter reporter, FileScanTask task, Path path, long start, long length) - throws IOException { - InputSplit split = new FileSplit(path, start, length, job); - VectorizedParquetInputFormat inputFormat = new VectorizedParquetInputFormat(); - - MessageType fileSchema = ParquetFileReader.readFooter(job, path).getFileMetaData().getSchema(); - MessageType typeWithIds = null; - Schema expectedSchema = task.spec().schema(); - - if (ParquetSchemaUtil.hasIds(fileSchema)) { - typeWithIds = ParquetSchemaUtil.pruneColumns(fileSchema, expectedSchema); - } else { - typeWithIds = - ParquetSchemaUtil.pruneColumnsFallback( - ParquetSchemaUtil.addFallbackIds(fileSchema), expectedSchema); - } - - ParquetSchemaFieldNameVisitor psv = new ParquetSchemaFieldNameVisitor(fileSchema); - TypeWithSchemaVisitor.visit(expectedSchema.asStruct(), typeWithIds, psv); - job.set(IOConstants.COLUMNS, psv.retrieveColumnNameList()); - - return inputFormat.getRecordReader(split, job, reporter); - } - - private static CloseableIterable createVectorizedRowBatchIterable( - RecordReader hiveRecordReader, - JobConf job, - int[] partitionColIndices, - Object[] partitionValues) { - - VectorizedRowBatchIterator iterator = - new VectorizedRowBatchIterator(hiveRecordReader, job, partitionColIndices, partitionValues); - - return new CloseableIterable() { - - @Override - public CloseableIterator iterator() { - return iterator; - } - - @Override - public void close() throws IOException { - iterator.close(); - } - }; - } -} diff --git a/hive3/src/main/java/org/apache/iceberg/mr/hive/vector/ParquetSchemaFieldNameVisitor.java b/hive3/src/main/java/org/apache/iceberg/mr/hive/vector/ParquetSchemaFieldNameVisitor.java deleted file mode 100644 index e6e4ff1e04de..000000000000 --- a/hive3/src/main/java/org/apache/iceberg/mr/hive/vector/ParquetSchemaFieldNameVisitor.java +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.mr.hive.vector; - -import java.util.List; -import java.util.Map; -import org.apache.iceberg.MetadataColumns; -import org.apache.iceberg.parquet.TypeWithSchemaVisitor; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.relocated.com.google.common.collect.Maps; -import org.apache.iceberg.types.Types; -import org.apache.parquet.schema.GroupType; -import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.Type; - -/** - * Collects the top level field names from Parquet schema. During schema visit it translates the - * expected schema's field names to what fields the visitor can match in the file schema to support - * column renames. - */ -class ParquetSchemaFieldNameVisitor extends TypeWithSchemaVisitor { - private final MessageType originalFileSchema; - private final Map typesById = Maps.newHashMap(); - private StringBuilder sb = new StringBuilder(); - private static final String DUMMY_COL_NAME = "<>"; - - ParquetSchemaFieldNameVisitor(MessageType originalFileSchema) { - this.originalFileSchema = originalFileSchema; - } - - @Override - public Type message(Types.StructType expected, MessageType prunedFileSchema, List fields) { - return this.struct(expected, prunedFileSchema.asGroupType(), fields); - } - - @Override - public Type struct(Types.StructType expected, GroupType struct, List fields) { - boolean isMessageType = struct instanceof MessageType; - - List expectedFields = - expected != null ? expected.fields() : ImmutableList.of(); - List types = Lists.newArrayListWithExpectedSize(expectedFields.size()); - - for (Types.NestedField field : expectedFields) { - int id = field.fieldId(); - if (MetadataColumns.metadataFieldIds().contains(id)) { - continue; - } - - Type fieldInPrunedFileSchema = typesById.get(id); - if (fieldInPrunedFileSchema == null) { - if (!originalFileSchema.containsField(field.name())) { - // Must be a new field - it isn't in this parquet file yet, so add the new field name - // instead of null - appendToColNamesList(isMessageType, field.name()); - } else { - // This field is found in the parquet file with a different ID, so it must have been - // recreated since. - // Inserting a dummy col name to force Hive Parquet reader returning null for this column. - appendToColNamesList(isMessageType, DUMMY_COL_NAME); - } - } else { - // Already present column in this parquet file, add the original name - types.add(fieldInPrunedFileSchema); - appendToColNamesList(isMessageType, fieldInPrunedFileSchema.getName()); - } - } - - if (!isMessageType) { - GroupType groupType = new GroupType(Type.Repetition.REPEATED, fieldNames.peek(), types); - typesById.put(struct.getId().intValue(), groupType); - return groupType; - } else { - return new MessageType("table", types); - } - } - - private void appendToColNamesList(boolean isMessageType, String colName) { - if (isMessageType) { - sb.append(colName).append(','); - } - } - - @Override - public Type primitive( - org.apache.iceberg.types.Type.PrimitiveType expected, - org.apache.parquet.schema.PrimitiveType primitive) { - typesById.put(primitive.getId().intValue(), primitive); - return primitive; - } - - @Override - public Type list(Types.ListType iList, GroupType array, Type element) { - typesById.put(array.getId().intValue(), array); - return array; - } - - @Override - public Type map(Types.MapType iMap, GroupType map, Type key, Type value) { - typesById.put(map.getId().intValue(), map); - return map; - } - - public String retrieveColumnNameList() { - sb.setLength(sb.length() - 1); - return sb.toString(); - } -} diff --git a/hive3/src/main/java/org/apache/iceberg/mr/hive/vector/VectorizedRowBatchIterator.java b/hive3/src/main/java/org/apache/iceberg/mr/hive/vector/VectorizedRowBatchIterator.java deleted file mode 100644 index 558a181cb0ad..000000000000 --- a/hive3/src/main/java/org/apache/iceberg/mr/hive/vector/VectorizedRowBatchIterator.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.mr.hive.vector; - -import java.io.IOException; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.RecordReader; -import org.apache.iceberg.io.CloseableIterator; - -/** - * Iterator wrapper around Hive's VectorizedRowBatch producer (MRv1 implementing) record readers. - */ -public final class VectorizedRowBatchIterator implements CloseableIterator { - - private final RecordReader recordReader; - private final NullWritable key; - private final VectorizedRowBatch batch; - private final VectorizedRowBatchCtx vrbCtx; - private final int[] partitionColIndices; - private final Object[] partitionValues; - private boolean advanced = false; - - VectorizedRowBatchIterator( - RecordReader recordReader, - JobConf job, - int[] partitionColIndices, - Object[] partitionValues) { - this.recordReader = recordReader; - this.key = recordReader.createKey(); - this.batch = recordReader.createValue(); - this.vrbCtx = CompatibilityHiveVectorUtils.findMapWork(job).getVectorizedRowBatchCtx(); - this.partitionColIndices = partitionColIndices; - this.partitionValues = partitionValues; - } - - @Override - public void close() throws IOException { - this.recordReader.close(); - } - - private void advance() { - if (!advanced) { - try { - - if (!recordReader.next(key, batch)) { - batch.size = 0; - } - // Fill partition values - if (partitionColIndices != null) { - for (int i = 0; i < partitionColIndices.length; ++i) { - int colIdx = partitionColIndices[i]; - CompatibilityHiveVectorUtils.addPartitionColsToBatch( - batch.cols[colIdx], - partitionValues[i], - vrbCtx.getRowColumnNames()[colIdx], - vrbCtx.getRowColumnTypeInfos()[colIdx]); - } - } - } catch (IOException ioe) { - throw new RuntimeException(ioe); - } - advanced = true; - } - } - - @Override - public boolean hasNext() { - advance(); - return batch.size > 0; - } - - @Override - public VectorizedRowBatch next() { - advance(); - advanced = false; - return batch; - } -} diff --git a/hive3/src/main/java/org/apache/iceberg/orc/VectorizedReadUtils.java b/hive3/src/main/java/org/apache/iceberg/orc/VectorizedReadUtils.java deleted file mode 100644 index 70e6fc9d5a48..000000000000 --- a/hive3/src/main/java/org/apache/iceberg/orc/VectorizedReadUtils.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.orc; - -import java.io.IOException; -import org.apache.hadoop.mapred.JobConf; -import org.apache.iceberg.io.InputFile; -import org.apache.orc.impl.OrcTail; -import org.apache.orc.impl.ReaderImpl; - -/** - * Utilities that rely on Iceberg code from org.apache.iceberg.orc package and are required for ORC - * vectorization. - */ -public class VectorizedReadUtils { - - private VectorizedReadUtils() {} - - /** - * Opens the ORC inputFile and reads the metadata information to construct the OrcTail content. - * Unfortunately the API doesn't allow simple access to OrcTail, so we need the serialization - * trick. - * - * @param inputFile - the ORC file - * @param job - JobConf instance for the current task - * @throws IOException - errors relating to accessing the ORC file - */ - public static OrcTail getOrcTail(InputFile inputFile, JobConf job) throws IOException { - - try (ReaderImpl orcFileReader = (ReaderImpl) ORC.newFileReader(inputFile, job)) { - return ReaderImpl.extractFileTail(orcFileReader.getSerializedFileFooter()); - } - } -} diff --git a/hive3/src/test/java/org/apache/iceberg/mr/hive/TestHiveSchemaUtilHive3.java b/hive3/src/test/java/org/apache/iceberg/mr/hive/TestHiveSchemaUtilHive3.java deleted file mode 100644 index 69a3dd39f8d1..000000000000 --- a/hive3/src/test/java/org/apache/iceberg/mr/hive/TestHiveSchemaUtilHive3.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.mr.hive; - -import static org.apache.iceberg.types.Types.NestedField.optional; - -import java.util.List; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.iceberg.Schema; -import org.apache.iceberg.hive.TestHiveSchemaUtil; -import org.apache.iceberg.relocated.com.google.common.collect.Lists; -import org.apache.iceberg.types.Types; - -public class TestHiveSchemaUtilHive3 extends TestHiveSchemaUtil { - - @Override - protected List getSupportedFieldSchemas() { - List fields = Lists.newArrayList(super.getSupportedFieldSchemas()); - // timestamp local tz only present in Hive3 - fields.add(new FieldSchema("c_timestamptz", serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME, null)); - return fields; - } - - @Override - protected Schema getSchemaWithSupportedTypes() { - Schema schema = super.getSchemaWithSupportedTypes(); - List columns = Lists.newArrayList(schema.columns()); - // timestamp local tz only present in Hive3 - columns.add(optional(columns.size(), "c_timestamptz", Types.TimestampType.withZone())); - return new Schema(columns); - } -} diff --git a/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java b/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java deleted file mode 100644 index e3e06c8560e4..000000000000 --- a/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergDateObjectInspectorHive3.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.mr.hive.serde.objectinspector; - -import static org.assertj.core.api.Assertions.assertThat; - -import java.time.LocalDate; -import org.apache.hadoop.hive.common.type.Date; -import org.apache.hadoop.hive.serde2.io.DateWritableV2; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.junit.jupiter.api.Test; - -public class TestIcebergDateObjectInspectorHive3 { - - @Test - public void testIcebergDateObjectInspector() { - DateObjectInspector oi = IcebergDateObjectInspectorHive3.get(); - - assertThat(oi.getCategory()).isEqualTo(ObjectInspector.Category.PRIMITIVE); - assertThat(oi.getPrimitiveCategory()) - .isEqualTo(PrimitiveObjectInspector.PrimitiveCategory.DATE); - - assertThat(oi.getTypeInfo()).isEqualTo(TypeInfoFactory.dateTypeInfo); - assertThat(oi.getTypeName()).isEqualTo(TypeInfoFactory.dateTypeInfo.getTypeName()); - - assertThat(oi.getJavaPrimitiveClass()).isEqualTo(Date.class); - assertThat(oi.getPrimitiveWritableClass()).isEqualTo(DateWritableV2.class); - - assertThat(oi.copyObject(null)).isNull(); - assertThat(oi.getPrimitiveJavaObject(null)).isNull(); - assertThat(oi.getPrimitiveWritableObject(null)).isNull(); - - int epochDays = 5005; - LocalDate local = LocalDate.ofEpochDay(epochDays); - Date date = Date.ofEpochDay(epochDays); - - assertThat(oi.getPrimitiveJavaObject(local)).isEqualTo(date); - assertThat(oi.getPrimitiveWritableObject(local)).isEqualTo(new DateWritableV2(date)); - - Date copy = (Date) oi.copyObject(date); - - assertThat(copy).isEqualTo(date).isNotSameAs(date); - assertThat(oi.preferWritable()).isFalse(); - } -} diff --git a/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java b/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java deleted file mode 100644 index 523d530bfd2a..000000000000 --- a/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampObjectInspectorHive3.java +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.mr.hive.serde.objectinspector; - -import static org.assertj.core.api.Assertions.assertThat; - -import java.time.Instant; -import java.time.LocalDateTime; -import java.time.ZoneId; -import org.apache.hadoop.hive.common.type.Timestamp; -import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.junit.jupiter.api.Test; - -public class TestIcebergTimestampObjectInspectorHive3 { - - @Test - public void testIcebergTimestampObjectInspector() { - IcebergTimestampObjectInspectorHive3 oi = IcebergTimestampObjectInspectorHive3.get(); - - assertThat(oi.getCategory()).isEqualTo(ObjectInspector.Category.PRIMITIVE); - assertThat(oi.getPrimitiveCategory()) - .isEqualTo(PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMP); - - assertThat(oi.getTypeInfo()).isEqualTo(TypeInfoFactory.timestampTypeInfo); - assertThat(oi.getTypeName()).isEqualTo(TypeInfoFactory.timestampTypeInfo.getTypeName()); - - assertThat(oi.getJavaPrimitiveClass()).isEqualTo(Timestamp.class); - assertThat(oi.getPrimitiveWritableClass()).isEqualTo(TimestampWritableV2.class); - - assertThat(oi.copyObject(null)).isNull(); - assertThat(oi.getPrimitiveJavaObject(null)).isNull(); - assertThat(oi.getPrimitiveWritableObject(null)).isNull(); - assertThat(oi.convert(null)).isNull(); - - long epochMilli = 1601471970000L; - LocalDateTime local = - LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMilli), ZoneId.of("UTC")) - .plusNanos(34000); - Timestamp ts = Timestamp.ofEpochMilli(epochMilli); - ts.setNanos(34000); - - assertThat(oi.getPrimitiveJavaObject(local)).isEqualTo(ts); - assertThat(oi.getPrimitiveWritableObject(local)).isEqualTo(new TimestampWritableV2(ts)); - - Timestamp copy = (Timestamp) oi.copyObject(ts); - - assertThat(copy).isEqualTo(ts).isNotSameAs(ts); - assertThat(oi.preferWritable()).isFalse(); - assertThat(oi.convert(ts)).isEqualTo(local); - } -} diff --git a/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampWithZoneObjectInspectorHive3.java b/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampWithZoneObjectInspectorHive3.java deleted file mode 100644 index d14cb893d82e..000000000000 --- a/hive3/src/test/java/org/apache/iceberg/mr/hive/serde/objectinspector/TestIcebergTimestampWithZoneObjectInspectorHive3.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.mr.hive.serde.objectinspector; - -import static org.assertj.core.api.Assertions.assertThat; - -import java.time.LocalDateTime; -import java.time.OffsetDateTime; -import java.time.ZoneId; -import java.time.ZoneOffset; -import org.apache.hadoop.hive.common.type.TimestampTZ; -import org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.junit.jupiter.api.Test; - -public class TestIcebergTimestampWithZoneObjectInspectorHive3 { - - @Test - public void testIcebergTimestampLocalTZObjectInspector() { - IcebergTimestampWithZoneObjectInspectorHive3 oi = - IcebergTimestampWithZoneObjectInspectorHive3.get(); - - assertThat(oi.getCategory()).isEqualTo(ObjectInspector.Category.PRIMITIVE); - assertThat(oi.getPrimitiveCategory()) - .isEqualTo(PrimitiveObjectInspector.PrimitiveCategory.TIMESTAMPLOCALTZ); - - assertThat(oi.getTypeInfo()).isEqualTo(TypeInfoFactory.timestampLocalTZTypeInfo); - assertThat(oi.getTypeName()).isEqualTo(TypeInfoFactory.timestampLocalTZTypeInfo.getTypeName()); - - assertThat(oi.getJavaPrimitiveClass()).isEqualTo(TimestampTZ.class); - assertThat(oi.getPrimitiveWritableClass()).isEqualTo(TimestampLocalTZWritable.class); - - assertThat(oi.copyObject(null)).isNull(); - assertThat(oi.getPrimitiveJavaObject(null)).isNull(); - assertThat(oi.getPrimitiveWritableObject(null)).isNull(); - assertThat(oi.convert(null)).isNull(); - - LocalDateTime dateTimeAtUTC = LocalDateTime.of(2020, 12, 10, 15, 55, 20, 30000); - OffsetDateTime offsetDateTime = - OffsetDateTime.of(dateTimeAtUTC.plusHours(4), ZoneOffset.ofHours(4)); - TimestampTZ ts = new TimestampTZ(dateTimeAtUTC.atZone(ZoneId.of("UTC"))); - - assertThat(oi.getPrimitiveJavaObject(offsetDateTime)).isEqualTo(ts); - assertThat(oi.getPrimitiveWritableObject(offsetDateTime)) - .isEqualTo(new TimestampLocalTZWritable(ts)); - - // try with another offset as well - offsetDateTime = OffsetDateTime.of(dateTimeAtUTC.plusHours(11), ZoneOffset.ofHours(11)); - assertThat(oi.getPrimitiveJavaObject(offsetDateTime)).isEqualTo(ts); - assertThat(oi.getPrimitiveWritableObject(offsetDateTime)) - .isEqualTo(new TimestampLocalTZWritable(ts)); - - TimestampTZ copy = (TimestampTZ) oi.copyObject(ts); - - assertThat(copy).isEqualTo(ts).isNotSameAs(ts); - assertThat(oi.preferWritable()).isFalse(); - assertThat(oi.convert(ts)).isEqualTo(OffsetDateTime.of(dateTimeAtUTC, ZoneOffset.UTC)); - } -} diff --git a/hive3/src/test/resources/log4j2.properties b/hive3/src/test/resources/log4j2.properties deleted file mode 100644 index c6be389e66d6..000000000000 --- a/hive3/src/test/resources/log4j2.properties +++ /dev/null @@ -1,33 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Configuration for modules which are using log4j v2 (e.g. Hive3) - -appenders = console -appender.console.type = Console -appender.console.name = STDOUT -appender.console.layout.type = PatternLayout -appender.console.layout.pattern = %d{ISO8601} %-5p [%t] %c{2} (%F:%M(%L)) - %m%n - -rootLogger.level = WARN -rootLogger.appenderRefs = stdout -rootLogger.appenderRef.stdout.ref = STDOUT - -# custom logger to print Counter values after Tez queries -logger = counters -logger.counters.name = org.apache.hadoop.hive.ql.exec.Task -logger.counters.level = INFO diff --git a/kafka-connect/build.gradle b/kafka-connect/build.gradle index 15bf013f28b2..6ac9f9bc37d3 100644 --- a/kafka-connect/build.gradle +++ b/kafka-connect/build.gradle @@ -134,7 +134,7 @@ project(':iceberg-kafka-connect:iceberg-kafka-connect-runtime') { implementation 'com.azure:azure-identity' hive project(':iceberg-hive-metastore') - hive(libs.hive2.metastore) { + hive(libs.hive4.metastore) { exclude group: 'org.apache.avro', module: 'avro' exclude group: 'org.slf4j', module: 'slf4j-log4j12' exclude group: 'org.pentaho' // missing dependency diff --git a/settings.gradle b/settings.gradle index 103741389a26..8c9ab55116aa 100644 --- a/settings.gradle +++ b/settings.gradle @@ -180,20 +180,10 @@ if (sparkVersions.contains("3.5")) { project(":iceberg-spark:spark-runtime-3.5_${scalaVersion}").name = "iceberg-spark-runtime-3.5_${scalaVersion}" } -// hive 3 depends on hive 2, so always add hive 2 if hive3 is enabled -if (hiveVersions.contains("2") || hiveVersions.contains("3")) { +if (hiveVersions.contains("2")) { include 'mr' - include 'hive-runtime' project(':mr').name = 'iceberg-mr' - project(':hive-runtime').name = 'iceberg-hive-runtime' -} - -if (hiveVersions.contains("3")) { - include 'hive3' - include 'hive3-orc-bundle' - project(':hive3').name = 'iceberg-hive3' - project(':hive3-orc-bundle').name = 'iceberg-hive3-orc-bundle' } if (kafkaVersions.contains("3")) {