Skip to content

Commit

Permalink
AVRO-3660: Use data generator with RandomData (apache#2526)
Browse files Browse the repository at this point in the history
Using `GenericData` (or subclasses) with `RandomData` allows to generate
`GenericRecord`, `SpecificRecord` and reflected records as random data.
  • Loading branch information
opwvhk authored and Ranbir Kumar committed May 13, 2024
1 parent 51ecc5c commit 0e66c91
Show file tree
Hide file tree
Showing 2 changed files with 261 additions and 23 deletions.
60 changes: 37 additions & 23 deletions lang/java/avro/src/main/java/org/apache/avro/util/RandomData.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,32 +17,30 @@
*/
package org.apache.avro.util;

import org.apache.avro.LogicalType;
import org.apache.avro.LogicalTypes;
import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericArray;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;

import java.io.File;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ThreadLocalRandom;

import org.apache.avro.LogicalType;
import org.apache.avro.LogicalTypes;
import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericArray;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;

/** Generates schema data as Java objects with random values. */
public class RandomData implements Iterable<Object> {
public static final String USE_DEFAULT = "use-default";
private final GenericData genericData;

private static final int MILLIS_IN_DAY = (int) Duration.ofDays(1).toMillis();

Expand All @@ -64,6 +62,23 @@ public RandomData(Schema schema, int count, boolean utf8ForString) {
}

public RandomData(Schema schema, int count, long seed, boolean utf8ForString) {
this(GenericData.get(), schema, count, seed, utf8ForString);
}

public RandomData(GenericData genericData, Schema schema, int count) {
this(genericData, schema, count, false);
}

public RandomData(GenericData genericData, Schema schema, int count, long seed) {
this(genericData, schema, count, seed, false);
}

public RandomData(GenericData genericData, Schema schema, int count, boolean utf8ForString) {
this(genericData, schema, count, System.currentTimeMillis(), utf8ForString);
}

public RandomData(GenericData genericData, Schema schema, int count, long seed, boolean utf8ForString) {
this.genericData = genericData;
this.root = schema;
this.seed = seed;
this.count = count;
Expand All @@ -74,7 +89,7 @@ public RandomData(Schema schema, int count, long seed, boolean utf8ForString) {
public Iterator<Object> iterator() {
return new Iterator<Object>() {
private int n;
private Random random = new Random(seed);
private final Random random = new Random(seed);

@Override
public boolean hasNext() {
Expand All @@ -98,26 +113,25 @@ public void remove() {
private Object generate(Schema schema, Random random, int d) {
switch (schema.getType()) {
case RECORD:
GenericRecord record = new GenericData.Record(schema);
Object record = genericData.newRecord(null, schema);
for (Schema.Field field : schema.getFields()) {
Object value = (field.getObjectProp(USE_DEFAULT) == null) ? generate(field.schema(), random, d + 1)
: GenericData.get().getDefaultValue(field);
record.put(field.name(), value);
genericData.setField(record, field.name(), field.pos(), value);
}
return record;
case ENUM:
List<String> symbols = schema.getEnumSymbols();
return new GenericData.EnumSymbol(schema, symbols.get(random.nextInt(symbols.size())));
return genericData.createEnum(symbols.get(random.nextInt(symbols.size())), schema);
case ARRAY:
int length = (random.nextInt(5) + 2) - d;
@SuppressWarnings("rawtypes")
GenericArray<Object> array = new GenericData.Array(length <= 0 ? 0 : length, schema);
int length = Math.max(0, (random.nextInt(5) + 2) - d);
GenericArray<Object> array = (GenericArray<Object>) genericData.newArray(null, length, schema);
for (int i = 0; i < length; i++)
array.add(generate(schema.getElementType(), random, d + 1));
return array;
case MAP:
length = (random.nextInt(5) + 2) - d;
Map<Object, Object> map = new HashMap<>(length <= 0 ? 0 : length);
length = Math.max(0, (random.nextInt(5) + 2) - d);
Map<Object, Object> map = (Map<Object, Object>) genericData.newMap(null, length);
for (int i = 0; i < length; i++) {
map.put(randomString(random, 40), generate(schema.getValueType(), random, d + 1));
}
Expand All @@ -128,7 +142,7 @@ private Object generate(Schema schema, Random random, int d) {
case FIXED:
byte[] bytes = new byte[schema.getFixedSize()];
random.nextBytes(bytes);
return new GenericData.Fixed(schema, bytes);
return genericData.createFixed(null, bytes, schema);
case STRING:
return randomString(random, 40);
case BYTES:
Expand Down Expand Up @@ -180,7 +194,7 @@ private Object randomString(Random random, int maxLength) {

private static ByteBuffer randomBytes(Random rand, int maxLength) {
ByteBuffer bytes = ByteBuffer.allocate(rand.nextInt(maxLength));
((Buffer) bytes).limit(bytes.capacity());
bytes.limit(bytes.capacity());
rand.nextBytes(bytes.array());
return bytes;
}
Expand Down
224 changes: 224 additions & 0 deletions lang/java/avro/src/test/java/org/apache/avro/util/TestRandomData.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro.util;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.Objects;
import java.util.Random;

import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.reflect.ReflectData;
import org.apache.avro.specific.SpecificData;
import org.apache.avro.specific.SpecificRecordBase;
import org.junit.Before;
import org.junit.Test;

import static org.junit.Assert.assertEquals;

public class TestRandomData {
private long seed;

private int count;

private File file;
private GenericData genericData;
private SpecificData specificData;
private Schema specificSchema;
private ReflectData reflectData;
private Schema reflectedSchema;

@Before
public void setUp() throws Exception {
file = Files.createTempFile("randomData", ".avro").toFile();
seed = System.currentTimeMillis();
count = new Random().nextInt(50) + 75;

genericData = GenericData.get();
specificData = SpecificData.get();
specificSchema = specificData.getSchema(SpecificTestRecord.class);
reflectData = ReflectData.get();
reflectedSchema = reflectData.getSchema(ReflectTestRecord.class);
}

@Test
public void testRandomDataFromGenericToGeneric() throws IOException {
checkWrite(genericData, TEST_SCHEMA);
checkRead(genericData, TEST_SCHEMA);
}

@Test
public void testRandomDataFromGenericToSpecific() throws IOException {
checkWrite(genericData, TEST_SCHEMA);
checkRead(specificData, specificSchema);
}

@Test
public void testRandomDataFromGenericToReflected() throws IOException {
checkWrite(genericData, TEST_SCHEMA);
checkRead(reflectData, reflectedSchema);
}

@Test
public void testRandomDataFromSpecificToGeneric() throws IOException {
checkWrite(specificData, specificSchema);
checkRead(genericData, TEST_SCHEMA);
}

@Test
public void testRandomDataFromSpecificToSpecific() throws IOException {
checkWrite(specificData, specificSchema);
checkRead(specificData, specificSchema);
}

@Test
public void testRandomDataFromSpecificToReflected() throws IOException {
checkWrite(specificData, specificSchema);
checkRead(reflectData, reflectedSchema);
}

@Test
public void testRandomDataFromReflectedToGeneric() throws IOException {
checkWrite(reflectData, reflectedSchema);
checkRead(genericData, TEST_SCHEMA);
}

@Test
public void testRandomDataFromReflectedToSpecific() throws IOException {
checkWrite(reflectData, reflectedSchema);
checkRead(specificData, specificSchema);
}

@Test
public void testRandomDataFromReflectedToReflected() throws IOException {
checkWrite(reflectData, reflectedSchema);
checkRead(reflectData, reflectedSchema);
}

private void checkWrite(GenericData genericData, Schema schema) throws IOException {
// noinspection unchecked
try (DataFileWriter<Object> writer = new DataFileWriter<Object>(genericData.createDatumWriter(schema))) {
writer.create(schema, file);
for (Object datum : new RandomData(genericData, schema, this.count, seed)) {
writer.append(datum);
}
}
}

private void checkRead(GenericData genericData, Schema schema) throws IOException {
// noinspection unchecked
try (DataFileReader<Object> reader = new DataFileReader<Object>(file, genericData.createDatumReader(schema))) {
for (Object expected : new RandomData(genericData, schema, this.count, seed)) {
assertEquals(expected, reader.next());
}
}
}

/*
* Test classes: they implement the same schema, but one is a SpecificRecord and
* the other uses a reflected schema.
*/

public static final String TEST_SCHEMA_JSON = "{\"type\":\"record\",\"name\":\"Record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"}}]}";

public static final Schema TEST_SCHEMA = new Schema.Parser().parse(TEST_SCHEMA_JSON);

public static class SpecificTestRecord extends SpecificRecordBase {
public static final Schema SCHEMA$ = new Schema.Parser().parse(TEST_SCHEMA_JSON.replace("\"name\":\"Record\"",
"\"name\":\"" + SpecificTestRecord.class.getCanonicalName() + "\""));
private int x;
private String y;

@Override
public Schema getSchema() {
return SCHEMA$;
}

@Override
public void put(int i, Object v) {
switch (i) {
case 0:
x = (Integer) v;
break;
case 1:
y = (String) v;
break;
default:
throw new RuntimeException();
}
}

@Override
public Object get(int i) {
switch (i) {
case 0:
return x;
case 1:
return y;
}
throw new RuntimeException();
}
}

public static class ReflectTestRecord {
private int x;
private String y;

public int getX() {
return x;
}

public void setX(int x) {
this.x = x;
}

public String getY() {
return y;
}

public void setY(String y) {
this.y = y;
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
ReflectTestRecord that = (ReflectTestRecord) o;
return x == that.x && Objects.equals(y, that.y);
}

@Override
public int hashCode() {
return Objects.hash(x, y);
}

@Override
public String toString() {
return String.format("{\"x\": %d, \"y\": \"%s\"}", x, y);
}
}
}

0 comments on commit 0e66c91

Please sign in to comment.