Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test: Add a test utility method to programmatically create expected partition specs #8467

Merged
merged 17 commits into from
Nov 7, 2023
40 changes: 40 additions & 0 deletions api/src/test/java/org/apache/iceberg/TestHelpers.java
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,10 @@ public static void serialize(final Serializable obj, final OutputStream outputSt
}
}

public static ExpectedSpecBuilder newExpectedSpecBuilder() {
return new ExpectedSpecBuilder();
}

public static class KryoHelpers {
private KryoHelpers() {}

Expand Down Expand Up @@ -667,4 +671,40 @@ public List<Long> splitOffsets() {
return null;
}
}

public static class ExpectedSpecBuilder {
private final UnboundPartitionSpec.Builder unboundPartitionSpecBuilder;

private Schema schema;

private ExpectedSpecBuilder() {
this.unboundPartitionSpecBuilder = UnboundPartitionSpec.builder();
}

public ExpectedSpecBuilder withSchema(Schema newSchema) {
this.schema = newSchema;
return this;
}

public ExpectedSpecBuilder withSpecId(int newSpecId) {
unboundPartitionSpecBuilder.withSpecId(newSpecId);
return this;
}

public ExpectedSpecBuilder addField(
String transformAsString, int sourceId, int partitionId, String name) {
unboundPartitionSpecBuilder.addField(transformAsString, sourceId, partitionId, name);
return this;
}

public ExpectedSpecBuilder addField(String transformAsString, int sourceId, String name) {
unboundPartitionSpecBuilder.addField(transformAsString, sourceId, name);
return this;
}

public PartitionSpec build() {
Preconditions.checkNotNull(schema, "Field schema is missing");
return unboundPartitionSpecBuilder.build().bind(schema);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
package org.apache.iceberg.spark.extensions;

import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.TestHelpers;
import org.apache.iceberg.spark.SparkCatalogConfig;
import org.apache.iceberg.spark.source.SparkTable;
import org.apache.spark.sql.connector.catalog.CatalogManager;
Expand Down Expand Up @@ -392,17 +392,11 @@ public void testReplacePartition() {
.build();
} else {
expected =
PartitionSpecParser.fromJson(
table.schema(),
"{\n"
+ " \"spec-id\" : 2,\n"
+ " \"fields\" : [ {\n"
+ " \"name\" : \"ts_hour\",\n"
+ " \"transform\" : \"hour\",\n"
+ " \"source-id\" : 3,\n"
+ " \"field-id\" : 1001\n"
+ " } ]\n"
+ "}");
TestHelpers.newExpectedSpecBuilder()
.withSchema(table.schema())
.withSpecId(2)
.addField("hour", 3, 1001, "ts_hour")
.build();
}
Assert.assertEquals(
"Should changed from daily to hourly partitioned field", expected, table.spec());
Expand Down Expand Up @@ -431,17 +425,11 @@ public void testReplacePartitionAndRename() {
.build();
} else {
expected =
PartitionSpecParser.fromJson(
table.schema(),
"{\n"
+ " \"spec-id\" : 2,\n"
+ " \"fields\" : [ {\n"
+ " \"name\" : \"hour_col\",\n"
+ " \"transform\" : \"hour\",\n"
+ " \"source-id\" : 3,\n"
+ " \"field-id\" : 1001\n"
+ " } ]\n"
+ "}");
TestHelpers.newExpectedSpecBuilder()
.withSchema(table.schema())
.withSpecId(2)
.addField("hour", 3, 1001, "hour_col")
.build();
}
Assert.assertEquals(
"Should changed from daily to hourly partitioned field", expected, table.spec());
Expand Down Expand Up @@ -470,17 +458,11 @@ public void testReplaceNamedPartition() {
.build();
} else {
expected =
PartitionSpecParser.fromJson(
table.schema(),
"{\n"
+ " \"spec-id\" : 2,\n"
+ " \"fields\" : [ {\n"
+ " \"name\" : \"ts_hour\",\n"
+ " \"transform\" : \"hour\",\n"
+ " \"source-id\" : 3,\n"
+ " \"field-id\" : 1001\n"
+ " } ]\n"
+ "}");
TestHelpers.newExpectedSpecBuilder()
.withSchema(table.schema())
.withSpecId(2)
.addField("hour", 3, 1001, "ts_hour")
.build();
}
Assert.assertEquals(
"Should changed from daily to hourly partitioned field", expected, table.spec());
Expand Down Expand Up @@ -509,17 +491,11 @@ public void testReplaceNamedPartitionAndRenameDifferently() {
.build();
} else {
expected =
PartitionSpecParser.fromJson(
table.schema(),
"{\n"
+ " \"spec-id\" : 2,\n"
+ " \"fields\" : [ {\n"
+ " \"name\" : \"hour_col\",\n"
+ " \"transform\" : \"hour\",\n"
+ " \"source-id\" : 3,\n"
+ " \"field-id\" : 1001\n"
+ " } ]\n"
+ "}");
TestHelpers.newExpectedSpecBuilder()
.withSchema(table.schema())
.withSpecId(2)
.addField("hour", 3, 1001, "hour_col")
.build();
}
Assert.assertEquals(
"Should changed from daily to hourly partitioned field", expected, table.spec());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import org.apache.iceberg.ManifestFiles;
import org.apache.iceberg.ManifestWriter;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableProperties;
Expand Down Expand Up @@ -75,14 +74,18 @@ public class TestForwardCompatibility {

// create a spec for the schema that uses a "zero" transform that produces all 0s
private static final PartitionSpec UNKNOWN_SPEC =
PartitionSpecParser.fromJson(
SCHEMA,
"{ \"spec-id\": 0, \"fields\": [ { \"name\": \"id_zero\", \"transform\": \"zero\", \"source-id\": 1 } ] }");
org.apache.iceberg.TestHelpers.newExpectedSpecBuilder()
.withSchema(SCHEMA)
.withSpecId(0)
.addField("zero", 1, "id_zero")
.build();
// create a fake spec to use to write table metadata
private static final PartitionSpec FAKE_SPEC =
PartitionSpecParser.fromJson(
SCHEMA,
"{ \"spec-id\": 0, \"fields\": [ { \"name\": \"id_zero\", \"transform\": \"identity\", \"source-id\": 1 } ] }");
org.apache.iceberg.TestHelpers.newExpectedSpecBuilder()
.withSchema(SCHEMA)
.withSpecId(0)
.addField("identity", 1, "id_zero")
.build();

@Rule public TemporaryFolder temp = new TemporaryFolder();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@
import org.apache.iceberg.HasTableOperations;
import org.apache.iceberg.MetadataTableType;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.TestHelpers;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
Expand Down Expand Up @@ -627,9 +627,11 @@ public void testMetadataTablesWithUnknownTransforms() {
Table table = validationCatalog.loadTable(tableIdent);

PartitionSpec unknownSpec =
PartitionSpecParser.fromJson(
table.schema(),
"{ \"spec-id\": 1, \"fields\": [ { \"name\": \"id_zero\", \"transform\": \"zero\", \"source-id\": 1 } ] }");
TestHelpers.newExpectedSpecBuilder()
.withSchema(table.schema())
.withSpecId(1)
.addField("zero", 1, "id_zero")
.build();

// replace the table spec to include an unknown transform
TableOperations ops = ((HasTableOperations) table).operations();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@
import org.apache.iceberg.HasTableOperations;
import org.apache.iceberg.MetadataColumns;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.TestHelpers;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
Expand Down Expand Up @@ -75,9 +75,11 @@ public class TestSparkMetadataColumns extends SparkTestBase {
Types.NestedField.optional(3, "data", Types.StringType.get()));
private static final PartitionSpec SPEC = PartitionSpec.unpartitioned();
private static final PartitionSpec UNKNOWN_SPEC =
PartitionSpecParser.fromJson(
SCHEMA,
"{ \"spec-id\": 1, \"fields\": [ { \"name\": \"id_zero\", \"transform\": \"zero\", \"source-id\": 1 } ] }");
TestHelpers.newExpectedSpecBuilder()
.withSchema(SCHEMA)
.withSpecId(1)
.addField("zero", 1, "id_zero")
.build();

@Parameterized.Parameters(name = "fileFormat = {0}, vectorized = {1}, formatVersion = {2}")
public static Object[][] parameters() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
package org.apache.iceberg.spark.extensions;

import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.TestHelpers;
import org.apache.iceberg.spark.SparkCatalogConfig;
import org.apache.iceberg.spark.source.SparkTable;
import org.apache.spark.sql.connector.catalog.CatalogManager;
Expand Down Expand Up @@ -392,17 +392,11 @@ public void testReplacePartition() {
.build();
} else {
expected =
PartitionSpecParser.fromJson(
table.schema(),
"{\n"
+ " \"spec-id\" : 2,\n"
+ " \"fields\" : [ {\n"
+ " \"name\" : \"ts_hour\",\n"
+ " \"transform\" : \"hour\",\n"
+ " \"source-id\" : 3,\n"
+ " \"field-id\" : 1001\n"
+ " } ]\n"
+ "}");
TestHelpers.newExpectedSpecBuilder()
.withSchema(table.schema())
.withSpecId(2)
.addField("hour", 3, 1001, "ts_hour")
.build();
}
Assert.assertEquals(
"Should changed from daily to hourly partitioned field", expected, table.spec());
Expand Down Expand Up @@ -431,17 +425,11 @@ public void testReplacePartitionAndRename() {
.build();
} else {
expected =
PartitionSpecParser.fromJson(
table.schema(),
"{\n"
+ " \"spec-id\" : 2,\n"
+ " \"fields\" : [ {\n"
+ " \"name\" : \"hour_col\",\n"
+ " \"transform\" : \"hour\",\n"
+ " \"source-id\" : 3,\n"
+ " \"field-id\" : 1001\n"
+ " } ]\n"
+ "}");
TestHelpers.newExpectedSpecBuilder()
.withSchema(table.schema())
.withSpecId(2)
.addField("hour", 3, 1001, "hour_col")
.build();
}
Assert.assertEquals(
"Should changed from daily to hourly partitioned field", expected, table.spec());
Expand Down Expand Up @@ -470,17 +458,11 @@ public void testReplaceNamedPartition() {
.build();
} else {
expected =
PartitionSpecParser.fromJson(
table.schema(),
"{\n"
+ " \"spec-id\" : 2,\n"
+ " \"fields\" : [ {\n"
+ " \"name\" : \"ts_hour\",\n"
+ " \"transform\" : \"hour\",\n"
+ " \"source-id\" : 3,\n"
+ " \"field-id\" : 1001\n"
+ " } ]\n"
+ "}");
TestHelpers.newExpectedSpecBuilder()
.withSchema(table.schema())
.withSpecId(2)
.addField("hour", 3, 1001, "ts_hour")
.build();
}
Assert.assertEquals(
"Should changed from daily to hourly partitioned field", expected, table.spec());
Expand Down Expand Up @@ -509,17 +491,11 @@ public void testReplaceNamedPartitionAndRenameDifferently() {
.build();
} else {
expected =
PartitionSpecParser.fromJson(
table.schema(),
"{\n"
+ " \"spec-id\" : 2,\n"
+ " \"fields\" : [ {\n"
+ " \"name\" : \"hour_col\",\n"
+ " \"transform\" : \"hour\",\n"
+ " \"source-id\" : 3,\n"
+ " \"field-id\" : 1001\n"
+ " } ]\n"
+ "}");
TestHelpers.newExpectedSpecBuilder()
.withSchema(table.schema())
.withSpecId(2)
.addField("hour", 3, 1001, "hour_col")
.build();
}
Assert.assertEquals(
"Should changed from daily to hourly partitioned field", expected, table.spec());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import org.apache.iceberg.ManifestFiles;
import org.apache.iceberg.ManifestWriter;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableProperties;
Expand Down Expand Up @@ -75,14 +74,19 @@ public class TestForwardCompatibility {

// create a spec for the schema that uses a "zero" transform that produces all 0s
private static final PartitionSpec UNKNOWN_SPEC =
PartitionSpecParser.fromJson(
SCHEMA,
"{ \"spec-id\": 0, \"fields\": [ { \"name\": \"id_zero\", \"transform\": \"zero\", \"source-id\": 1 } ] }");
org.apache.iceberg.TestHelpers.newExpectedSpecBuilder()
.withSchema(SCHEMA)
.withSpecId(0)
.addField("zero", 1, "id_zero")
.build();

// create a fake spec to use to write table metadata
private static final PartitionSpec FAKE_SPEC =
PartitionSpecParser.fromJson(
SCHEMA,
"{ \"spec-id\": 0, \"fields\": [ { \"name\": \"id_zero\", \"transform\": \"identity\", \"source-id\": 1 } ] }");
org.apache.iceberg.TestHelpers.newExpectedSpecBuilder()
.withSchema(SCHEMA)
.withSpecId(0)
.addField("identity", 1, "id_zero")
.build();

@Rule public TemporaryFolder temp = new TemporaryFolder();

Expand Down
Loading