Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AVRO-3966: [Java] Fix default value serialisation for fixed and bytes #2823

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 12 additions & 27 deletions lang/java/avro/src/main/java/org/apache/avro/SchemaBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.avro;

import java.io.IOException;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
Expand All @@ -31,7 +30,6 @@
import java.util.Objects;
import java.util.Set;

import com.fasterxml.jackson.core.io.JsonStringEncoder;
import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
Expand Down Expand Up @@ -2414,11 +2412,12 @@ public final FieldAssembler<R> bytesDefault(ByteBuffer defaultVal) {

/**
* Completes this field with the default value provided, cannot be null. The
* string is interpreted as a byte[], with each character code point value
* equalling the byte value, as in the Avro spec JSON default.
* string is interpreted as a byte[] (in ISO_8859_1 encoding), with each
* character code point value equalling the byte value, as in the Avro spec JSON
* default.
**/
public final FieldAssembler<R> bytesDefault(String defaultVal) {
return super.usingDefault(defaultVal);
return super.usingDefault(defaultVal.getBytes(StandardCharsets.ISO_8859_1));
}

@Override
Expand Down Expand Up @@ -2496,11 +2495,12 @@ public final FieldAssembler<R> fixedDefault(ByteBuffer defaultVal) {

/**
* Completes this field with the default value provided, cannot be null. The
* string is interpreted as a byte[], with each character code point value
* equalling the byte value, as in the Avro spec JSON default.
* string is interpreted as a byte[] (in ISO_8859_1 encoding), with each
* character code point value equalling the byte value, as in the Avro spec JSON
* default.
**/
public final FieldAssembler<R> fixedDefault(String defaultVal) {
return super.usingDefault(defaultVal);
return this.fixedDefault(defaultVal.getBytes(StandardCharsets.ISO_8859_1));
}

@Override
Expand Down Expand Up @@ -2715,26 +2715,11 @@ public R endUnion() {
// create default value JsonNodes from objects
private static JsonNode toJsonNode(Object o) {
try {
String s;
if (o instanceof ByteBuffer) {
// special case since GenericData.toString() is incorrect for bytes
// note that this does not handle the case of a default value with nested bytes
ByteBuffer bytes = ((ByteBuffer) o);
((Buffer) bytes).mark();
byte[] data = new byte[bytes.remaining()];
bytes.get(data);
((Buffer) bytes).reset(); // put the buffer back the way we got it
s = new String(data, StandardCharsets.ISO_8859_1);
char[] quoted = JsonStringEncoder.getInstance().quoteAsString(s);
s = "\"" + new String(quoted) + "\"";
} else if (o instanceof byte[]) {
s = new String((byte[]) o, StandardCharsets.ISO_8859_1);
char[] quoted = JsonStringEncoder.getInstance().quoteAsString(s);
s = '\"' + new String(quoted) + '\"';
} else {
s = GenericData.get().toString(o);
if (o instanceof byte[]) {
o = ByteBuffer.wrap((byte[]) o);
}
return new ObjectMapper().readTree(s);

return new ObjectMapper().readTree(GenericData.get().toString(o));
} catch (IOException e) {
throw new SchemaBuilderException(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import java.io.IOException;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.time.temporal.Temporal;
import java.util.AbstractList;
import java.util.Arrays;
Expand Down Expand Up @@ -744,7 +743,7 @@ protected void toString(Object datum, StringBuilder buffer, IdentityHashMap<Obje
} else if (isBytes(datum)) {
buffer.append("\"");
ByteBuffer bytes = ((ByteBuffer) datum).duplicate();
writeEscapedString(StandardCharsets.ISO_8859_1.decode(bytes), buffer);
writeBytesAsJsonString(bytes, buffer);
buffer.append("\"");
} else if (isNanOrInfinity(datum) || isTemporal(datum) || datum instanceof UUID) {
buffer.append("\"");
Expand Down Expand Up @@ -772,6 +771,20 @@ private boolean isNanOrInfinity(Object datum) {
|| ((datum instanceof Double) && (((Double) datum).isInfinite() || ((Double) datum).isNaN()));
}

private static void writeBytesAsJsonString(ByteBuffer buffer, StringBuilder builder) {
byte[] bytes = new byte[buffer.remaining()];
buffer.get(bytes);

for (byte b : bytes) {
String hex = Integer.toHexString(b);

builder.append("\\u");
for (int j = 0; j < 4 - hex.length(); j++)
builder.append('0');
builder.append(hex.toUpperCase());
}
}

/* Adapted from https://code.google.com/p/json-simple */
private static void writeEscapedString(CharSequence string, StringBuilder builder) {
for (int i = 0; i < string.length(); i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -550,11 +550,10 @@ void mapWithNonStringKeyToStringIsJson() throws Exception {
}

@Test
void toStringEscapesControlCharsInBytes() throws Exception {
void toStringWritesBytesAsUEscapedSequence() throws Exception {
GenericData data = GenericData.get();
ByteBuffer bytes = ByteBuffer.wrap(new byte[] { 'a', '\n', 'b' });
assertEquals("\"a\\nb\"", data.toString(bytes));
assertEquals("\"a\\nb\"", data.toString(bytes));
assertEquals("\"\\u0061\\u000A\\u0062\"", data.toString(bytes));
}

@Test
Expand Down
Loading