Skip to content

Commit

Permalink
Bump Iceberg to 1.4.0, support Spark 3.5 (#7539)
Browse files Browse the repository at this point in the history
---------

Co-authored-by: Alexandre Dutra <[email protected]>
  • Loading branch information
snazy and adutra authored Oct 9, 2023
1 parent 113b349 commit b1e9861
Show file tree
Hide file tree
Showing 33 changed files with 1,286 additions and 141 deletions.
7 changes: 6 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -760,7 +760,12 @@ jobs:
- name: Nessie Spark 3.4 / 2.13 Extensions test
uses: gradle/gradle-build-action@v2
with:
arguments: :nessie:nessie-iceberg:nessie-spark-extensions-3.3_2.13:test :nessie:nessie-iceberg:nessie-spark-extensions-3.3_2.13:intTest --scan
arguments: :nessie:nessie-iceberg:nessie-spark-extensions-3.4_2.13:test :nessie:nessie-iceberg:nessie-spark-extensions-3.4_2.13:intTest --scan

- name: Nessie Spark 3.5 / 2.13 Extensions test
uses: gradle/gradle-build-action@v2
with:
arguments: :nessie:nessie-iceberg:nessie-spark-extensions-3.5_2.13:test :nessie:nessie-iceberg:nessie-spark-extensions-3.5_2.13:intTest --scan

#- name: Publish Nessie + Iceberg to local Maven repo
# uses: gradle/gradle-build-action@v2
Expand Down
8 changes: 4 additions & 4 deletions build-logic/src/main/kotlin/Utilities.kt
Original file line number Diff line number Diff line change
Expand Up @@ -345,16 +345,16 @@ fun Project.useSparkScalaVersionsForProject(
* Get the newest Java LTS version that is lower than or equal to the currently running Java
* version.
*
* For Spark 3.1 and 3.2, this is always Java 11. For Spark 3.3 and 3.4, this is Java 17 when
* running the build on Java 17 or newer, otherwise Java 11.
* For Spark 3.2, this is always Java 11. For Spark 3.3 and 3.4, this is Java 17 when running the
* build on Java 17 or newer, otherwise Java 11.
*/
fun javaVersionForSpark(sparkMajorVersion: String): Int {
val currentJavaVersion = JavaVersion.current().majorVersion.toInt()
return when (sparkMajorVersion) {
"3.1",
"3.2" -> 11
"3.3",
"3.4" ->
"3.4",
"3.5" ->
when {
currentJavaVersion >= 17 -> 17
else -> 11
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,14 @@ void checkMainPom(String artifactId) throws Exception {
"iceberg-views",
"nessie-spark-antlr-runtime",
"nessie-spark-extensions-grammar",
"nessie-spark-extensions-3.1_2.12",
"nessie-spark-extensions-3.2_2.12",
"nessie-spark-extensions-3.2_2.13",
"nessie-spark-extensions-3.3_2.12",
"nessie-spark-extensions-3.3_2.13",
"nessie-spark-extensions-3.4_2.12",
"nessie-spark-extensions-3.4_2.13"
"nessie-spark-extensions-3.4_2.13",
"nessie-spark-extensions-3.5_2.12",
"nessie-spark-extensions-3.5_2.13"
})
void checkIntegrationsPom(String artifactId) throws Exception {
checkPom("org.projectnessie.nessie-integrations", artifactId);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.apache.hadoop.conf.Configuration;
import org.apache.iceberg.aws.AwsProperties;
import org.apache.iceberg.aws.HttpClientProperties;
import org.apache.iceberg.aws.s3.S3FileIOProperties;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
Expand Down Expand Up @@ -175,10 +176,10 @@ protected static URI icebergBaseUri(String path) {

protected Map<String, String> icebergProperties(S3MockServer server) {
Map<String, String> props = new HashMap<>();
props.put(AwsProperties.S3FILEIO_ACCESS_KEY_ID, "accessKey");
props.put(AwsProperties.S3FILEIO_SECRET_ACCESS_KEY, "secretKey");
props.put(AwsProperties.S3FILEIO_ENDPOINT, server.getBaseUri().toString());
props.put(AwsProperties.HTTP_CLIENT_TYPE, AwsProperties.HTTP_CLIENT_TYPE_URLCONNECTION);
props.put(S3FileIOProperties.ACCESS_KEY_ID, "accessKey");
props.put(S3FileIOProperties.SECRET_ACCESS_KEY, "secretKey");
props.put(S3FileIOProperties.ENDPOINT, server.getBaseUri().toString());
props.put(HttpClientProperties.CLIENT_TYPE, HttpClientProperties.CLIENT_TYPE_URLCONNECTION);
return props;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ private BridgeToIceberg() {}
public static DataFile dummyDataFile(String filePath, StructType partitionType) {
PartitionData partitionData = new PartitionData(partitionType);
return new GenericDataFile(
0, filePath, FileFormat.PARQUET, partitionData, 42L, DUMMY_METRICS, null, null, 0);
0, filePath, FileFormat.PARQUET, partitionData, 42L, DUMMY_METRICS, null, null, null, 0);
}

public static IndexedRecord dummyIndexedDataFile(String filePath, StructType partitionType) {
Expand Down
15 changes: 9 additions & 6 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ errorproneSlf4j = "0.1.20"
gatling = "3.9.5"
googleJavaFormat = "1.18.1"
hadoop = "3.3.6"
iceberg = "1.3.1" # While bumping this version, 'nessieClientVersion' must also be updated.
iceberg = "1.4.0" # While bumping this version, 'nessieClientVersion' must also be updated.
immutables = "2.10.0"
jacoco = "0.8.10"
jandex = "3.1.5"
Expand All @@ -19,7 +19,7 @@ keycloak = "22.0.4"
maven = "3.9.5"
mavenResolver = "1.9.16"
mockito="4.11.0"
nessieClientVersion = "0.59.0" # Must be in sync with Nessie version in the Iceberg release.
nessieClientVersion = "0.71.0" # Must be in sync with Nessie version in the Iceberg release.
nessieBuildPlugins = "0.2.24"
opentelemetry = "1.31.0"
opentelemetryAlpha = "1.25.0-alpha"
Expand All @@ -44,7 +44,7 @@ antlr-antlr4-runtime = { module = "org.antlr:antlr4-runtime", version.ref = "ant
assertj-core = { module = "org.assertj:assertj-core", version = "3.24.2" }
avro = { module = "org.apache.avro:avro", version = "1.11.3" }
awaitility = { module = "org.awaitility:awaitility", version = "4.2.0" }
awssdk-bom = { module = "software.amazon.awssdk:bom", version = "2.20.93" }
awssdk-bom = { module = "software.amazon.awssdk:bom", version = "2.20.162" }
bouncycastle-bcpkix = { module = "org.bouncycastle:bcpkix-jdk15on", version.ref = "bouncycastle" }
bouncycastle-bcprov = { module = "org.bouncycastle:bcprov-jdk15on", version.ref = "bouncycastle" }
cassandra-driver-bom = { module = "com.datastax.oss:java-driver-bom", version = "4.17.0" }
Expand Down Expand Up @@ -73,20 +73,22 @@ iceberg-core = { module = "org.apache.iceberg:iceberg-core", version.ref = "iceb
iceberg-hive-metastore = { module = "org.apache.iceberg:iceberg-hive-metastore", version.ref = "iceberg" }
iceberg-nessie = { module = "org.apache.iceberg:iceberg-nessie", version.ref = "iceberg" }
iceberg-parquet = { module = "org.apache.iceberg:iceberg-parquet", version.ref = "iceberg" }
iceberg-spark-extensions31-scala212 = { module = "org.apache.iceberg:iceberg-spark-extensions-3.1_2.12", version.ref = "iceberg" }
iceberg-spark-extensions32-scala212 = { module = "org.apache.iceberg:iceberg-spark-extensions-3.2_2.12", version.ref = "iceberg" }
iceberg-spark-extensions32-scala213 = { module = "org.apache.iceberg:iceberg-spark-extensions-3.2_2.13", version.ref = "iceberg" }
iceberg-spark-extensions33-scala212 = { module = "org.apache.iceberg:iceberg-spark-extensions-3.3_2.12", version.ref = "iceberg" }
iceberg-spark-extensions33-scala213 = { module = "org.apache.iceberg:iceberg-spark-extensions-3.3_2.13", version.ref = "iceberg" }
iceberg-spark-extensions34-scala212 = { module = "org.apache.iceberg:iceberg-spark-extensions-3.4_2.12", version.ref = "iceberg" }
iceberg-spark-extensions34-scala213 = { module = "org.apache.iceberg:iceberg-spark-extensions-3.4_2.13", version.ref = "iceberg" }
iceberg-spark31-scala212 = { module = "org.apache.iceberg:iceberg-spark-3.1_2.12", version.ref = "iceberg" }
iceberg-spark-extensions35-scala212 = { module = "org.apache.iceberg:iceberg-spark-extensions-3.5_2.12", version.ref = "iceberg" }
iceberg-spark-extensions35-scala213 = { module = "org.apache.iceberg:iceberg-spark-extensions-3.5_2.13", version.ref = "iceberg" }
iceberg-spark32-scala212 = { module = "org.apache.iceberg:iceberg-spark-3.2_2.12", version.ref = "iceberg" }
iceberg-spark32-scala213 = { module = "org.apache.iceberg:iceberg-spark-3.2_2.13", version.ref = "iceberg" }
iceberg-spark33-scala212 = { module = "org.apache.iceberg:iceberg-spark-3.3_2.12", version.ref = "iceberg" }
iceberg-spark33-scala213 = { module = "org.apache.iceberg:iceberg-spark-3.3_2.13", version.ref = "iceberg" }
iceberg-spark34-scala212 = { module = "org.apache.iceberg:iceberg-spark-3.4_2.12", version.ref = "iceberg" }
iceberg-spark34-scala213 = { module = "org.apache.iceberg:iceberg-spark-3.4_2.13", version.ref = "iceberg" }
iceberg-spark35-scala212 = { module = "org.apache.iceberg:iceberg-spark-3.5_2.12", version.ref = "iceberg" }
iceberg-spark35-scala213 = { module = "org.apache.iceberg:iceberg-spark-3.5_2.13", version.ref = "iceberg" }
immutables-builder = { module = "org.immutables:builder", version.ref = "immutables" }
immutables-value-annotations = { module = "org.immutables:value-annotations", version.ref = "immutables" }
immutables-value-fixture = { module = "org.immutables:value-fixture", version = "2.9.3" }
Expand Down Expand Up @@ -151,13 +153,14 @@ scala-library-v213 = { module = "org.scala-lang:scala-library", version = { stri
slf4j-api = { module = "org.slf4j:slf4j-api", version.ref = "slf4j" }
slf4j-jcl-over-slf4j = { module = "org.slf4j:jcl-over-slf4j", version.ref = "slf4j" }
slf4j-log4j-over-slf4j = { module = "org.slf4j:log4j-over-slf4j", version.ref = "slf4j" }
spark-sql-v31-v212 = { module = "org.apache.spark:spark-sql_2_12", version = { strictly = "[3.1, 3.2[", prefer = "3.1.3"}}
spark-sql-v32-v212 = { module = "org.apache.spark:spark-sql_2_12", version = { strictly = "[3.2, 3.3[", prefer = "3.2.4"}}
spark-sql-v32-v213 = { module = "org.apache.spark:spark-sql_2_13", version = { strictly = "[3.2, 3.3[", prefer = "3.2.4"}}
spark-sql-v33-v212 = { module = "org.apache.spark:spark-sql_2_12", version = { strictly = "[3.3, 3.4[", prefer = "3.3.3"}}
spark-sql-v33-v213 = { module = "org.apache.spark:spark-sql_2_13", version = { strictly = "[3.3, 3.4[", prefer = "3.3.3"}}
spark-sql-v34-v212 = { module = "org.apache.spark:spark-sql_2_12", version = { strictly = "[3.4, 3.5[", prefer = "3.4.1"}}
spark-sql-v34-v213 = { module = "org.apache.spark:spark-sql_2_13", version = { strictly = "[3.4, 3.5[", prefer = "3.4.1"}}
spark-sql-v35-v212 = { module = "org.apache.spark:spark-sql_2_12", version = { strictly = "[3.5, 3.6[", prefer = "3.5.0"}}
spark-sql-v35-v213 = { module = "org.apache.spark:spark-sql_2_13", version = { strictly = "[3.5, 3.6[", prefer = "3.5.0"}}
testcontainers-bom = { module = "org.testcontainers:testcontainers-bom", version = "1.19.1" }
testcontainers-keycloak = { module = "com.github.dasniko:testcontainers-keycloak", version = "2.6.0" }
threeten-extra = { module = "org.threeten:threeten-extra", version = "1.7.2" }
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 Dremio
* Copyright (C) 2023 Dremio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,108 +19,121 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference
import org.apache.spark.sql.types.{DataTypes, Metadata, StructField, StructType}

object NessieCommandOutputs {
def referenceOutput(): Seq[AttributeReference] = new StructType(
Array[StructField](
StructField(
"refType",
DataTypes.StringType,
nullable = false,
Metadata.empty
),
StructField(
"name",
DataTypes.StringType,
nullable = false,
Metadata.empty
),
StructField(
"hash",
DataTypes.StringType,
nullable = false,
Metadata.empty
private def structToAttributes(struct: StructType): Seq[AttributeReference] =
struct.map(f =>
AttributeReference(f.name, f.dataType, f.nullable, f.metadata)()
)

def referenceOutput(): Seq[AttributeReference] = structToAttributes(
new StructType(
Array[StructField](
StructField(
"refType",
DataTypes.StringType,
nullable = false,
Metadata.empty
),
StructField(
"name",
DataTypes.StringType,
nullable = false,
Metadata.empty
),
StructField(
"hash",
DataTypes.StringType,
nullable = false,
Metadata.empty
)
)
)
).toAttributes
)

def simpleReferenceOutput(): Seq[AttributeReference] = new StructType(
Array[StructField](
StructField(
"name",
DataTypes.StringType,
nullable = false,
Metadata.empty
),
StructField(
"hash",
DataTypes.StringType,
nullable = false,
Metadata.empty
def simpleReferenceOutput(): Seq[AttributeReference] = structToAttributes(
new StructType(
Array[StructField](
StructField(
"name",
DataTypes.StringType,
nullable = false,
Metadata.empty
),
StructField(
"hash",
DataTypes.StringType,
nullable = false,
Metadata.empty
)
)
)
).toAttributes
)

def dropReferenceOutput(): Seq[AttributeReference] = new StructType(
Array[StructField](
StructField(
"status",
DataTypes.StringType,
nullable = false,
Metadata.empty
def dropReferenceOutput(): Seq[AttributeReference] = structToAttributes(
new StructType(
Array[StructField](
StructField(
"status",
DataTypes.StringType,
nullable = false,
Metadata.empty
)
)
)
).toAttributes
)

def showLogOutput(): Seq[AttributeReference] = new StructType(
Array[StructField](
StructField(
"author",
DataTypes.StringType,
nullable = false,
Metadata.empty
),
StructField(
"committer",
DataTypes.StringType,
nullable = false,
Metadata.empty
),
StructField(
"hash",
DataTypes.StringType,
nullable = false,
Metadata.empty
),
StructField(
"message",
DataTypes.StringType,
nullable = false,
Metadata.empty
),
StructField(
"signedOffBy",
DataTypes.StringType,
nullable = false,
Metadata.empty
),
StructField(
"authorTime",
DataTypes.TimestampType,
nullable = false,
Metadata.empty
),
StructField(
"committerTime",
DataTypes.TimestampType,
nullable = false,
Metadata.empty
),
StructField(
"properties",
DataTypes
.createMapType(DataTypes.StringType, DataTypes.StringType, false),
nullable = false,
Metadata.empty
def showLogOutput(): Seq[AttributeReference] = structToAttributes(
new StructType(
Array[StructField](
StructField(
"author",
DataTypes.StringType,
nullable = false,
Metadata.empty
),
StructField(
"committer",
DataTypes.StringType,
nullable = false,
Metadata.empty
),
StructField(
"hash",
DataTypes.StringType,
nullable = false,
Metadata.empty
),
StructField(
"message",
DataTypes.StringType,
nullable = false,
Metadata.empty
),
StructField(
"signedOffBy",
DataTypes.StringType,
nullable = false,
Metadata.empty
),
StructField(
"authorTime",
DataTypes.TimestampType,
nullable = false,
Metadata.empty
),
StructField(
"committerTime",
DataTypes.TimestampType,
nullable = false,
Metadata.empty
),
StructField(
"properties",
DataTypes
.createMapType(DataTypes.StringType, DataTypes.StringType, false),
nullable = false,
Metadata.empty
)
)
)
).toAttributes
)
}
Loading

0 comments on commit b1e9861

Please sign in to comment.