Skip to content

Commit

Permalink
Test: Add a test utility method to programmatically create expected p…
Browse files Browse the repository at this point in the history
…artition specs (#8467)
  • Loading branch information
jerqi authored Nov 7, 2023
1 parent 7c4bdaa commit e8bb8b5
Show file tree
Hide file tree
Showing 17 changed files with 214 additions and 240 deletions.
40 changes: 40 additions & 0 deletions api/src/test/java/org/apache/iceberg/TestHelpers.java
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,10 @@ public static void serialize(final Serializable obj, final OutputStream outputSt
}
}

public static ExpectedSpecBuilder newExpectedSpecBuilder() {
return new ExpectedSpecBuilder();
}

public static class KryoHelpers {
private KryoHelpers() {}

Expand Down Expand Up @@ -667,4 +671,40 @@ public List<Long> splitOffsets() {
return null;
}
}

public static class ExpectedSpecBuilder {
private final UnboundPartitionSpec.Builder unboundPartitionSpecBuilder;

private Schema schema;

private ExpectedSpecBuilder() {
this.unboundPartitionSpecBuilder = UnboundPartitionSpec.builder();
}

public ExpectedSpecBuilder withSchema(Schema newSchema) {
this.schema = newSchema;
return this;
}

public ExpectedSpecBuilder withSpecId(int newSpecId) {
unboundPartitionSpecBuilder.withSpecId(newSpecId);
return this;
}

public ExpectedSpecBuilder addField(
String transformAsString, int sourceId, int partitionId, String name) {
unboundPartitionSpecBuilder.addField(transformAsString, sourceId, partitionId, name);
return this;
}

public ExpectedSpecBuilder addField(String transformAsString, int sourceId, String name) {
unboundPartitionSpecBuilder.addField(transformAsString, sourceId, name);
return this;
}

public PartitionSpec build() {
Preconditions.checkNotNull(schema, "Field schema is missing");
return unboundPartitionSpecBuilder.build().bind(schema);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
package org.apache.iceberg.spark.extensions;

import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.TestHelpers;
import org.apache.iceberg.spark.SparkCatalogConfig;
import org.apache.iceberg.spark.source.SparkTable;
import org.apache.spark.sql.connector.catalog.CatalogManager;
Expand Down Expand Up @@ -392,17 +392,11 @@ public void testReplacePartition() {
.build();
} else {
expected =
PartitionSpecParser.fromJson(
table.schema(),
"{\n"
+ " \"spec-id\" : 2,\n"
+ " \"fields\" : [ {\n"
+ " \"name\" : \"ts_hour\",\n"
+ " \"transform\" : \"hour\",\n"
+ " \"source-id\" : 3,\n"
+ " \"field-id\" : 1001\n"
+ " } ]\n"
+ "}");
TestHelpers.newExpectedSpecBuilder()
.withSchema(table.schema())
.withSpecId(2)
.addField("hour", 3, 1001, "ts_hour")
.build();
}
Assert.assertEquals(
"Should changed from daily to hourly partitioned field", expected, table.spec());
Expand Down Expand Up @@ -431,17 +425,11 @@ public void testReplacePartitionAndRename() {
.build();
} else {
expected =
PartitionSpecParser.fromJson(
table.schema(),
"{\n"
+ " \"spec-id\" : 2,\n"
+ " \"fields\" : [ {\n"
+ " \"name\" : \"hour_col\",\n"
+ " \"transform\" : \"hour\",\n"
+ " \"source-id\" : 3,\n"
+ " \"field-id\" : 1001\n"
+ " } ]\n"
+ "}");
TestHelpers.newExpectedSpecBuilder()
.withSchema(table.schema())
.withSpecId(2)
.addField("hour", 3, 1001, "hour_col")
.build();
}
Assert.assertEquals(
"Should changed from daily to hourly partitioned field", expected, table.spec());
Expand Down Expand Up @@ -470,17 +458,11 @@ public void testReplaceNamedPartition() {
.build();
} else {
expected =
PartitionSpecParser.fromJson(
table.schema(),
"{\n"
+ " \"spec-id\" : 2,\n"
+ " \"fields\" : [ {\n"
+ " \"name\" : \"ts_hour\",\n"
+ " \"transform\" : \"hour\",\n"
+ " \"source-id\" : 3,\n"
+ " \"field-id\" : 1001\n"
+ " } ]\n"
+ "}");
TestHelpers.newExpectedSpecBuilder()
.withSchema(table.schema())
.withSpecId(2)
.addField("hour", 3, 1001, "ts_hour")
.build();
}
Assert.assertEquals(
"Should changed from daily to hourly partitioned field", expected, table.spec());
Expand Down Expand Up @@ -509,17 +491,11 @@ public void testReplaceNamedPartitionAndRenameDifferently() {
.build();
} else {
expected =
PartitionSpecParser.fromJson(
table.schema(),
"{\n"
+ " \"spec-id\" : 2,\n"
+ " \"fields\" : [ {\n"
+ " \"name\" : \"hour_col\",\n"
+ " \"transform\" : \"hour\",\n"
+ " \"source-id\" : 3,\n"
+ " \"field-id\" : 1001\n"
+ " } ]\n"
+ "}");
TestHelpers.newExpectedSpecBuilder()
.withSchema(table.schema())
.withSpecId(2)
.addField("hour", 3, 1001, "hour_col")
.build();
}
Assert.assertEquals(
"Should changed from daily to hourly partitioned field", expected, table.spec());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import org.apache.iceberg.ManifestFiles;
import org.apache.iceberg.ManifestWriter;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableProperties;
Expand Down Expand Up @@ -75,14 +74,18 @@ public class TestForwardCompatibility {

// create a spec for the schema that uses a "zero" transform that produces all 0s
private static final PartitionSpec UNKNOWN_SPEC =
PartitionSpecParser.fromJson(
SCHEMA,
"{ \"spec-id\": 0, \"fields\": [ { \"name\": \"id_zero\", \"transform\": \"zero\", \"source-id\": 1 } ] }");
org.apache.iceberg.TestHelpers.newExpectedSpecBuilder()
.withSchema(SCHEMA)
.withSpecId(0)
.addField("zero", 1, "id_zero")
.build();
// create a fake spec to use to write table metadata
private static final PartitionSpec FAKE_SPEC =
PartitionSpecParser.fromJson(
SCHEMA,
"{ \"spec-id\": 0, \"fields\": [ { \"name\": \"id_zero\", \"transform\": \"identity\", \"source-id\": 1 } ] }");
org.apache.iceberg.TestHelpers.newExpectedSpecBuilder()
.withSchema(SCHEMA)
.withSpecId(0)
.addField("identity", 1, "id_zero")
.build();

@Rule public TemporaryFolder temp = new TemporaryFolder();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@
import org.apache.iceberg.HasTableOperations;
import org.apache.iceberg.MetadataTableType;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.TestHelpers;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
Expand Down Expand Up @@ -627,9 +627,11 @@ public void testMetadataTablesWithUnknownTransforms() {
Table table = validationCatalog.loadTable(tableIdent);

PartitionSpec unknownSpec =
PartitionSpecParser.fromJson(
table.schema(),
"{ \"spec-id\": 1, \"fields\": [ { \"name\": \"id_zero\", \"transform\": \"zero\", \"source-id\": 1 } ] }");
TestHelpers.newExpectedSpecBuilder()
.withSchema(table.schema())
.withSpecId(1)
.addField("zero", 1, "id_zero")
.build();

// replace the table spec to include an unknown transform
TableOperations ops = ((HasTableOperations) table).operations();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@
import org.apache.iceberg.HasTableOperations;
import org.apache.iceberg.MetadataColumns;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.TestHelpers;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
Expand Down Expand Up @@ -75,9 +75,11 @@ public class TestSparkMetadataColumns extends SparkTestBase {
Types.NestedField.optional(3, "data", Types.StringType.get()));
private static final PartitionSpec SPEC = PartitionSpec.unpartitioned();
private static final PartitionSpec UNKNOWN_SPEC =
PartitionSpecParser.fromJson(
SCHEMA,
"{ \"spec-id\": 1, \"fields\": [ { \"name\": \"id_zero\", \"transform\": \"zero\", \"source-id\": 1 } ] }");
TestHelpers.newExpectedSpecBuilder()
.withSchema(SCHEMA)
.withSpecId(1)
.addField("zero", 1, "id_zero")
.build();

@Parameterized.Parameters(name = "fileFormat = {0}, vectorized = {1}, formatVersion = {2}")
public static Object[][] parameters() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
package org.apache.iceberg.spark.extensions;

import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.TestHelpers;
import org.apache.iceberg.spark.SparkCatalogConfig;
import org.apache.iceberg.spark.source.SparkTable;
import org.apache.spark.sql.connector.catalog.CatalogManager;
Expand Down Expand Up @@ -392,17 +392,11 @@ public void testReplacePartition() {
.build();
} else {
expected =
PartitionSpecParser.fromJson(
table.schema(),
"{\n"
+ " \"spec-id\" : 2,\n"
+ " \"fields\" : [ {\n"
+ " \"name\" : \"ts_hour\",\n"
+ " \"transform\" : \"hour\",\n"
+ " \"source-id\" : 3,\n"
+ " \"field-id\" : 1001\n"
+ " } ]\n"
+ "}");
TestHelpers.newExpectedSpecBuilder()
.withSchema(table.schema())
.withSpecId(2)
.addField("hour", 3, 1001, "ts_hour")
.build();
}
Assert.assertEquals(
"Should changed from daily to hourly partitioned field", expected, table.spec());
Expand Down Expand Up @@ -431,17 +425,11 @@ public void testReplacePartitionAndRename() {
.build();
} else {
expected =
PartitionSpecParser.fromJson(
table.schema(),
"{\n"
+ " \"spec-id\" : 2,\n"
+ " \"fields\" : [ {\n"
+ " \"name\" : \"hour_col\",\n"
+ " \"transform\" : \"hour\",\n"
+ " \"source-id\" : 3,\n"
+ " \"field-id\" : 1001\n"
+ " } ]\n"
+ "}");
TestHelpers.newExpectedSpecBuilder()
.withSchema(table.schema())
.withSpecId(2)
.addField("hour", 3, 1001, "hour_col")
.build();
}
Assert.assertEquals(
"Should changed from daily to hourly partitioned field", expected, table.spec());
Expand Down Expand Up @@ -470,17 +458,11 @@ public void testReplaceNamedPartition() {
.build();
} else {
expected =
PartitionSpecParser.fromJson(
table.schema(),
"{\n"
+ " \"spec-id\" : 2,\n"
+ " \"fields\" : [ {\n"
+ " \"name\" : \"ts_hour\",\n"
+ " \"transform\" : \"hour\",\n"
+ " \"source-id\" : 3,\n"
+ " \"field-id\" : 1001\n"
+ " } ]\n"
+ "}");
TestHelpers.newExpectedSpecBuilder()
.withSchema(table.schema())
.withSpecId(2)
.addField("hour", 3, 1001, "ts_hour")
.build();
}
Assert.assertEquals(
"Should changed from daily to hourly partitioned field", expected, table.spec());
Expand Down Expand Up @@ -509,17 +491,11 @@ public void testReplaceNamedPartitionAndRenameDifferently() {
.build();
} else {
expected =
PartitionSpecParser.fromJson(
table.schema(),
"{\n"
+ " \"spec-id\" : 2,\n"
+ " \"fields\" : [ {\n"
+ " \"name\" : \"hour_col\",\n"
+ " \"transform\" : \"hour\",\n"
+ " \"source-id\" : 3,\n"
+ " \"field-id\" : 1001\n"
+ " } ]\n"
+ "}");
TestHelpers.newExpectedSpecBuilder()
.withSchema(table.schema())
.withSpecId(2)
.addField("hour", 3, 1001, "hour_col")
.build();
}
Assert.assertEquals(
"Should changed from daily to hourly partitioned field", expected, table.spec());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import org.apache.iceberg.ManifestFiles;
import org.apache.iceberg.ManifestWriter;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableProperties;
Expand Down Expand Up @@ -75,14 +74,19 @@ public class TestForwardCompatibility {

// create a spec for the schema that uses a "zero" transform that produces all 0s
private static final PartitionSpec UNKNOWN_SPEC =
PartitionSpecParser.fromJson(
SCHEMA,
"{ \"spec-id\": 0, \"fields\": [ { \"name\": \"id_zero\", \"transform\": \"zero\", \"source-id\": 1 } ] }");
org.apache.iceberg.TestHelpers.newExpectedSpecBuilder()
.withSchema(SCHEMA)
.withSpecId(0)
.addField("zero", 1, "id_zero")
.build();

// create a fake spec to use to write table metadata
private static final PartitionSpec FAKE_SPEC =
PartitionSpecParser.fromJson(
SCHEMA,
"{ \"spec-id\": 0, \"fields\": [ { \"name\": \"id_zero\", \"transform\": \"identity\", \"source-id\": 1 } ] }");
org.apache.iceberg.TestHelpers.newExpectedSpecBuilder()
.withSchema(SCHEMA)
.withSpecId(0)
.addField("identity", 1, "id_zero")
.build();

@Rule public TemporaryFolder temp = new TemporaryFolder();

Expand Down
Loading

0 comments on commit e8bb8b5

Please sign in to comment.