Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AVRO-3660: [JAVA] Use data generator with RandomData #2526

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 37 additions & 23 deletions lang/java/avro/src/main/java/org/apache/avro/util/RandomData.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,32 +17,30 @@
*/
package org.apache.avro.util;

import org.apache.avro.LogicalType;
import org.apache.avro.LogicalTypes;
import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericArray;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;

import java.io.File;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ThreadLocalRandom;

import org.apache.avro.LogicalType;
import org.apache.avro.LogicalTypes;
import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericArray;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;

/** Generates schema data as Java objects with random values. */
public class RandomData implements Iterable<Object> {
public static final String USE_DEFAULT = "use-default";
private final GenericData genericData;

private static final int MILLIS_IN_DAY = (int) Duration.ofDays(1).toMillis();

Expand All @@ -64,6 +62,23 @@ public RandomData(Schema schema, int count, boolean utf8ForString) {
}

public RandomData(Schema schema, int count, long seed, boolean utf8ForString) {
this(GenericData.get(), schema, count, seed, utf8ForString);
}

public RandomData(GenericData genericData, Schema schema, int count) {
this(genericData, schema, count, false);
}

public RandomData(GenericData genericData, Schema schema, int count, long seed) {
this(genericData, schema, count, seed, false);
}

public RandomData(GenericData genericData, Schema schema, int count, boolean utf8ForString) {
this(genericData, schema, count, System.currentTimeMillis(), utf8ForString);
}

public RandomData(GenericData genericData, Schema schema, int count, long seed, boolean utf8ForString) {
this.genericData = genericData;
this.root = schema;
this.seed = seed;
this.count = count;
Expand All @@ -74,7 +89,7 @@ public RandomData(Schema schema, int count, long seed, boolean utf8ForString) {
public Iterator<Object> iterator() {
return new Iterator<Object>() {
private int n;
private Random random = new Random(seed);
private final Random random = new Random(seed);

@Override
public boolean hasNext() {
Expand All @@ -98,26 +113,25 @@ public void remove() {
private Object generate(Schema schema, Random random, int d) {
switch (schema.getType()) {
case RECORD:
GenericRecord record = new GenericData.Record(schema);
Object record = genericData.newRecord(null, schema);
for (Schema.Field field : schema.getFields()) {
Object value = (field.getObjectProp(USE_DEFAULT) == null) ? generate(field.schema(), random, d + 1)
: GenericData.get().getDefaultValue(field);
record.put(field.name(), value);
genericData.setField(record, field.name(), field.pos(), value);
}
return record;
case ENUM:
List<String> symbols = schema.getEnumSymbols();
return new GenericData.EnumSymbol(schema, symbols.get(random.nextInt(symbols.size())));
return genericData.createEnum(symbols.get(random.nextInt(symbols.size())), schema);
case ARRAY:
int length = (random.nextInt(5) + 2) - d;
@SuppressWarnings("rawtypes")
GenericArray<Object> array = new GenericData.Array(length <= 0 ? 0 : length, schema);
int length = Math.max(0, (random.nextInt(5) + 2) - d);
GenericArray<Object> array = (GenericArray<Object>) genericData.newArray(null, length, schema);
for (int i = 0; i < length; i++)
array.add(generate(schema.getElementType(), random, d + 1));
return array;
case MAP:
length = (random.nextInt(5) + 2) - d;
Map<Object, Object> map = new HashMap<>(length <= 0 ? 0 : length);
length = Math.max(0, (random.nextInt(5) + 2) - d);
Map<Object, Object> map = (Map<Object, Object>) genericData.newMap(null, length);
for (int i = 0; i < length; i++) {
map.put(randomString(random, 40), generate(schema.getValueType(), random, d + 1));
}
Expand All @@ -128,7 +142,7 @@ private Object generate(Schema schema, Random random, int d) {
case FIXED:
byte[] bytes = new byte[schema.getFixedSize()];
random.nextBytes(bytes);
return new GenericData.Fixed(schema, bytes);
return genericData.createFixed(null, bytes, schema);
case STRING:
return randomString(random, 40);
case BYTES:
Expand Down Expand Up @@ -180,7 +194,7 @@ private Object randomString(Random random, int maxLength) {

private static ByteBuffer randomBytes(Random rand, int maxLength) {
ByteBuffer bytes = ByteBuffer.allocate(rand.nextInt(maxLength));
((Buffer) bytes).limit(bytes.capacity());
bytes.limit(bytes.capacity());
rand.nextBytes(bytes.array());
return bytes;
}
Expand Down
224 changes: 224 additions & 0 deletions lang/java/avro/src/test/java/org/apache/avro/util/TestRandomData.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro.util;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.Objects;
import java.util.Random;

import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.reflect.ReflectData;
import org.apache.avro.specific.SpecificData;
import org.apache.avro.specific.SpecificRecordBase;
import org.junit.Before;
import org.junit.Test;

import static org.junit.Assert.assertEquals;

public class TestRandomData {
private long seed;

private int count;

private File file;
private GenericData genericData;
private SpecificData specificData;
private Schema specificSchema;
private ReflectData reflectData;
private Schema reflectedSchema;

@Before
public void setUp() throws Exception {
file = Files.createTempFile("randomData", ".avro").toFile();
seed = System.currentTimeMillis();
count = new Random().nextInt(50) + 75;

Check warning

Code scanning / CodeQL

Random used only once

Random object created and used only once.

genericData = GenericData.get();
specificData = SpecificData.get();
specificSchema = specificData.getSchema(SpecificTestRecord.class);
reflectData = ReflectData.get();
reflectedSchema = reflectData.getSchema(ReflectTestRecord.class);
}

@Test
public void testRandomDataFromGenericToGeneric() throws IOException {
checkWrite(genericData, TEST_SCHEMA);
checkRead(genericData, TEST_SCHEMA);
}

@Test
public void testRandomDataFromGenericToSpecific() throws IOException {
checkWrite(genericData, TEST_SCHEMA);
checkRead(specificData, specificSchema);
}

@Test
public void testRandomDataFromGenericToReflected() throws IOException {
checkWrite(genericData, TEST_SCHEMA);
checkRead(reflectData, reflectedSchema);
}

@Test
public void testRandomDataFromSpecificToGeneric() throws IOException {
checkWrite(specificData, specificSchema);
checkRead(genericData, TEST_SCHEMA);
}

@Test
public void testRandomDataFromSpecificToSpecific() throws IOException {
checkWrite(specificData, specificSchema);
checkRead(specificData, specificSchema);
}

@Test
public void testRandomDataFromSpecificToReflected() throws IOException {
checkWrite(specificData, specificSchema);
checkRead(reflectData, reflectedSchema);
}

@Test
public void testRandomDataFromReflectedToGeneric() throws IOException {
checkWrite(reflectData, reflectedSchema);
checkRead(genericData, TEST_SCHEMA);
}

@Test
public void testRandomDataFromReflectedToSpecific() throws IOException {
checkWrite(reflectData, reflectedSchema);
checkRead(specificData, specificSchema);
}

@Test
public void testRandomDataFromReflectedToReflected() throws IOException {
checkWrite(reflectData, reflectedSchema);
checkRead(reflectData, reflectedSchema);
}

private void checkWrite(GenericData genericData, Schema schema) throws IOException {
// noinspection unchecked
try (DataFileWriter<Object> writer = new DataFileWriter<Object>(genericData.createDatumWriter(schema))) {
writer.create(schema, file);
for (Object datum : new RandomData(genericData, schema, this.count, seed)) {
writer.append(datum);
}
}
}

private void checkRead(GenericData genericData, Schema schema) throws IOException {
// noinspection unchecked
try (DataFileReader<Object> reader = new DataFileReader<Object>(file, genericData.createDatumReader(schema))) {
for (Object expected : new RandomData(genericData, schema, this.count, seed)) {
assertEquals(expected, reader.next());
}
}
}

/*
* Test classes: they implement the same schema, but one is a SpecificRecord and
* the other uses a reflected schema.
*/

public static final String TEST_SCHEMA_JSON = "{\"type\":\"record\",\"name\":\"Record\",\"fields\":[{\"name\":\"x\",\"type\":\"int\"},{\"name\":\"y\",\"type\":{\"type\":\"string\",\"avro.java.string\":\"String\"}}]}";

public static final Schema TEST_SCHEMA = new Schema.Parser().parse(TEST_SCHEMA_JSON);

public static class SpecificTestRecord extends SpecificRecordBase {
public static final Schema SCHEMA$ = new Schema.Parser().parse(TEST_SCHEMA_JSON.replace("\"name\":\"Record\"",
"\"name\":\"" + SpecificTestRecord.class.getCanonicalName() + "\""));
private int x;
private String y;

@Override
public Schema getSchema() {
return SCHEMA$;
}

@Override
public void put(int i, Object v) {
switch (i) {
case 0:
x = (Integer) v;
break;
case 1:
y = (String) v;
break;
default:
throw new RuntimeException();
}
}

@Override
public Object get(int i) {
switch (i) {
case 0:
return x;
case 1:
return y;
}
throw new RuntimeException();
}
}

public static class ReflectTestRecord {
private int x;
private String y;

public int getX() {
return x;
}

public void setX(int x) {
this.x = x;
}

public String getY() {
return y;
}

public void setY(String y) {
this.y = y;
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
ReflectTestRecord that = (ReflectTestRecord) o;
return x == that.x && Objects.equals(y, that.y);
}

@Override
public int hashCode() {
return Objects.hash(x, y);
}

@Override
public String toString() {
return String.format("{\"x\": %d, \"y\": \"%s\"}", x, y);
}
}
}