From 7f6ac921fd401a56a52e150b40fc421077f61af8 Mon Sep 17 00:00:00 2001 From: Oscar Westra van Holthe - Kind Date: Mon, 7 Nov 2022 11:52:31 +0100 Subject: [PATCH] AVRO-3660: Use data generator with RandomData Using `GenericData` (or subclasses) with `RandomData` allows to generate `GenericRecord`, `SpecificRecord` and reflected records as random data. --- .../java/org/apache/avro/util/RandomData.java | 60 +++-- .../org/apache/avro/util/TestRandomData.java | 224 ++++++++++++++++++ 2 files changed, 261 insertions(+), 23 deletions(-) create mode 100644 lang/java/avro/src/test/java/org/apache/avro/util/TestRandomData.java diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/RandomData.java b/lang/java/avro/src/main/java/org/apache/avro/util/RandomData.java index 8001e746306..053bc7966a2 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/util/RandomData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/RandomData.java @@ -17,32 +17,30 @@ */ package org.apache.avro.util; +import org.apache.avro.LogicalType; +import org.apache.avro.LogicalTypes; +import org.apache.avro.Schema; +import org.apache.avro.file.CodecFactory; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericArray; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumWriter; + import java.io.File; -import java.nio.Buffer; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.time.Duration; -import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; import java.util.concurrent.ThreadLocalRandom; -import org.apache.avro.LogicalType; -import org.apache.avro.LogicalTypes; -import org.apache.avro.Schema; -import org.apache.avro.file.CodecFactory; -import org.apache.avro.file.DataFileWriter; -import org.apache.avro.generic.GenericArray; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericDatumWriter; -import org.apache.avro.generic.GenericRecord; - /** Generates schema data as Java objects with random values. */ public class RandomData implements Iterable { public static final String USE_DEFAULT = "use-default"; + private final GenericData genericData; private static final int MILLIS_IN_DAY = (int) Duration.ofDays(1).toMillis(); @@ -64,6 +62,23 @@ public RandomData(Schema schema, int count, boolean utf8ForString) { } public RandomData(Schema schema, int count, long seed, boolean utf8ForString) { + this(GenericData.get(), schema, count, seed, utf8ForString); + } + + public RandomData(GenericData genericData, Schema schema, int count) { + this(genericData, schema, count, false); + } + + public RandomData(GenericData genericData, Schema schema, int count, long seed) { + this(genericData, schema, count, seed, false); + } + + public RandomData(GenericData genericData, Schema schema, int count, boolean utf8ForString) { + this(genericData, schema, count, System.currentTimeMillis(), utf8ForString); + } + + public RandomData(GenericData genericData, Schema schema, int count, long seed, boolean utf8ForString) { + this.genericData = genericData; this.root = schema; this.seed = seed; this.count = count; @@ -74,7 +89,7 @@ public RandomData(Schema schema, int count, long seed, boolean utf8ForString) { public Iterator iterator() { return new Iterator() { private int n; - private Random random = new Random(seed); + private final Random random = new Random(seed); @Override public boolean hasNext() { @@ -98,26 +113,25 @@ public void remove() { private Object generate(Schema schema, Random random, int d) { switch (schema.getType()) { case RECORD: - GenericRecord record = new GenericData.Record(schema); + Object record = genericData.newRecord(null, schema); for (Schema.Field field : schema.getFields()) { Object value = (field.getObjectProp(USE_DEFAULT) == null) ? generate(field.schema(), random, d + 1) : GenericData.get().getDefaultValue(field); - record.put(field.name(), value); + genericData.setField(record, field.name(), field.pos(), value); } return record; case ENUM: List symbols = schema.getEnumSymbols(); - return new GenericData.EnumSymbol(schema, symbols.get(random.nextInt(symbols.size()))); + return genericData.createEnum(symbols.get(random.nextInt(symbols.size())), schema); case ARRAY: - int length = (random.nextInt(5) + 2) - d; - @SuppressWarnings("rawtypes") - GenericArray array = new GenericData.Array(length <= 0 ? 0 : length, schema); + int length = Math.max(0, (random.nextInt(5) + 2) - d); + GenericArray array = (GenericArray) genericData.newArray(null, length, schema); for (int i = 0; i < length; i++) array.add(generate(schema.getElementType(), random, d + 1)); return array; case MAP: - length = (random.nextInt(5) + 2) - d; - Map map = new HashMap<>(length <= 0 ? 0 : length); + length = Math.max(0, (random.nextInt(5) + 2) - d); + Map map = (Map) genericData.newMap(null, length); for (int i = 0; i < length; i++) { map.put(randomString(random, 40), generate(schema.getValueType(), random, d + 1)); } @@ -128,7 +142,7 @@ private Object generate(Schema schema, Random random, int d) { case FIXED: byte[] bytes = new byte[schema.getFixedSize()]; random.nextBytes(bytes); - return new GenericData.Fixed(schema, bytes); + return genericData.createFixed(null, bytes, schema); case STRING: return randomString(random, 40); case BYTES: @@ -180,7 +194,7 @@ private Object randomString(Random random, int maxLength) { private static ByteBuffer randomBytes(Random rand, int maxLength) { ByteBuffer bytes = ByteBuffer.allocate(rand.nextInt(maxLength)); - ((Buffer) bytes).limit(bytes.capacity()); + bytes.limit(bytes.capacity()); rand.nextBytes(bytes.array()); return bytes; } diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestRandomData.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestRandomData.java new file mode 100644 index 00000000000..fa4ff77b674 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestRandomData.java @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.Objects; +import java.util.Random; + +import org.apache.avro.Schema; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericData; +import org.apache.avro.reflect.ReflectData; +import org.apache.avro.specific.SpecificData; +import org.apache.avro.specific.SpecificRecordBase; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class TestRandomData { + private long seed; + + private int count; + + private File file; + private GenericData genericData; + private SpecificData specificData; + private Schema specificSchema; + private ReflectData reflectData; + private Schema reflectedSchema; + + @Before + public void setUp() throws Exception { + file = Files.createTempFile("randomData", ".avro").toFile(); + seed = System.currentTimeMillis(); + count = new Random().nextInt(50) + 75; + + genericData = GenericData.get(); + specificData = SpecificData.get(); + specificSchema = specificData.getSchema(SpecificTestRecord.class); + reflectData = ReflectData.get(); + reflectedSchema = reflectData.getSchema(ReflectTestRecord.class); + } + + @Test + public void testRandomDataFromGenericToGeneric() throws IOException { + checkWrite(genericData, TEST_SCHEMA); + checkRead(genericData, TEST_SCHEMA); + } + + @Test + public void testRandomDataFromGenericToSpecific() throws IOException { + checkWrite(genericData, TEST_SCHEMA); + checkRead(specificData, specificSchema); + } + + @Test + public void testRandomDataFromGenericToReflected() throws IOException { + checkWrite(genericData, TEST_SCHEMA); + checkRead(reflectData, reflectedSchema); + } + + @Test + public void testRandomDataFromSpecificToGeneric() throws IOException { + checkWrite(specificData, specificSchema); + checkRead(genericData, TEST_SCHEMA); + } + + @Test + public void testRandomDataFromSpecificToSpecific() throws IOException { + checkWrite(specificData, specificSchema); + checkRead(specificData, specificSchema); + } + + @Test + public void testRandomDataFromSpecificToReflected() throws IOException { + checkWrite(specificData, specificSchema); + checkRead(reflectData, reflectedSchema); + } + + @Test + public void testRandomDataFromReflectedToGeneric() throws IOException { + checkWrite(reflectData, reflectedSchema); + checkRead(genericData, TEST_SCHEMA); + } + + @Test + public void testRandomDataFromReflectedToSpecific() throws IOException { + checkWrite(reflectData, reflectedSchema); + checkRead(specificData, specificSchema); + } + + @Test + public void testRandomDataFromReflectedToReflected() throws IOException { + checkWrite(reflectData, reflectedSchema); + checkRead(reflectData, reflectedSchema); + } + + private void checkWrite(GenericData genericData, Schema schema) throws IOException { + // noinspection unchecked + try (DataFileWriter writer = new DataFileWriter(genericData.createDatumWriter(schema))) { + writer.create(schema, file); + for (Object datum : new RandomData(genericData, schema, this.count, seed)) { + writer.append(datum); + } + } + } + + private void checkRead(GenericData genericData, Schema schema) throws IOException { + // noinspection unchecked + try (DataFileReader reader = new DataFileReader(file, genericData.createDatumReader(schema))) { + for (Object expected : new RandomData(genericData, schema, this.count, seed)) { + assertEquals(expected, reader.next()); + } + } + } + + /* + * Test classes: they implement the same schema, but one is a SpecificRecord and + * the other uses a reflected schema. + */ + + public static final String TEST_SCHEMA_JSON = "{\"type\":\"record\",\"name\":\"Record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"}}]}"; + + public static final Schema TEST_SCHEMA = new Schema.Parser().parse(TEST_SCHEMA_JSON); + + public static class SpecificTestRecord extends SpecificRecordBase { + public static final Schema SCHEMA$ = new Schema.Parser().parse(TEST_SCHEMA_JSON.replace("\"name\":\"Record\"", + "\"name\":\"" + SpecificTestRecord.class.getCanonicalName() + "\"")); + private int x; + private String y; + + @Override + public Schema getSchema() { + return SCHEMA$; + } + + @Override + public void put(int i, Object v) { + switch (i) { + case 0: + x = (Integer) v; + break; + case 1: + y = (String) v; + break; + default: + throw new RuntimeException(); + } + } + + @Override + public Object get(int i) { + switch (i) { + case 0: + return x; + case 1: + return y; + } + throw new RuntimeException(); + } + } + + public static class ReflectTestRecord { + private int x; + private String y; + + public int getX() { + return x; + } + + public void setX(int x) { + this.x = x; + } + + public String getY() { + return y; + } + + public void setY(String y) { + this.y = y; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ReflectTestRecord that = (ReflectTestRecord) o; + return x == that.x && Objects.equals(y, that.y); + } + + @Override + public int hashCode() { + return Objects.hash(x, y); + } + + @Override + public String toString() { + return String.format("{\"x\": %d, \"y\": \"%s\"}", x, y); + } + } +}