From e92eac807968912f846747919851a99fd3850fc5 Mon Sep 17 00:00:00 2001 From: Oscar Westra van Holthe - Kind Date: Thu, 26 Oct 2023 12:28:09 +0200 Subject: [PATCH] AVRO-3666: Refactor for recent changes Includes the use of NameValidator and parsing multiple files with circular references between them. --- .editorconfig | 2 + .../apache/avro/FormattedSchemaParser.java | 57 +-- .../org/apache/avro/JsonSchemaParser.java | 26 +- .../java/org/apache/avro/NameValidator.java | 104 +++++ .../java/org/apache/avro/ParseContext.java | 310 +++++++++++++++ .../src/main/java/org/apache/avro/Schema.java | 99 +---- .../java/org/apache/avro/SchemaParser.java | 16 +- .../org/apache/avro/file/DataFileStream.java | 3 +- .../org/apache/avro/util/SchemaResolver.java | 367 ++++++++++++++++++ .../org/apache/avro/util}/SchemaVisitor.java | 24 +- .../java/org/apache/avro/util}/Schemas.java | 13 +- .../org/apache/avro/DummySchemaParser.java | 14 +- .../org/apache/avro/ParseContextTest.java | 156 ++++++++ .../apache/avro/SchemaNameValidatorTest.java | 44 +-- .../org/apache/avro/TestDataFileReader.java | 6 +- .../test/java/org/apache/avro/TestSchema.java | 6 +- .../org/apache/avro/TestSchemaBuilder.java | 2 +- .../org/apache/avro/reflect/TestReflect.java | 5 +- .../apache/avro/util}/TestSchemaResolver.java | 41 +- .../org/apache/avro/util}/TestSchemas.java | 20 +- .../java/org/apache/avro/idl/IdlFile.java | 27 +- .../java/org/apache/avro/idl/IdlReader.java | 86 ++-- .../org/apache/avro/idl/IdlSchemaParser.java | 15 +- .../avro/idl/IsResolvedSchemaVisitor.java | 60 --- .../org/apache/avro/idl/ResolvingVisitor.java | 192 --------- .../org/apache/avro/idl/SchemaResolver.java | 149 ------- .../apache/avro/idl/SchemaVisitorAction.java | 40 -- .../org/apache/avro/idl/IdlReaderTest.java | 4 +- .../java/org/apache/avro/idl/TestCycle.java | 11 +- .../test/java/org/apache/avro/TestSchema.java | 6 +- .../java/org/apache/avro/mojo/IDLMojo.java | 16 +- .../java/org/apache/avro/tool/IdlTool.java | 1 + 32 files changed, 1183 insertions(+), 739 deletions(-) create mode 100644 lang/java/avro/src/main/java/org/apache/avro/NameValidator.java create mode 100644 lang/java/avro/src/main/java/org/apache/avro/ParseContext.java create mode 100644 lang/java/avro/src/main/java/org/apache/avro/util/SchemaResolver.java rename lang/java/{idl/src/main/java/org/apache/avro/idl => avro/src/main/java/org/apache/avro/util}/SchemaVisitor.java (77%) rename lang/java/{idl/src/main/java/org/apache/avro/idl => avro/src/main/java/org/apache/avro/util}/Schemas.java (89%) create mode 100644 lang/java/avro/src/test/java/org/apache/avro/ParseContextTest.java rename lang/java/{idl/src/test/java/org/apache/avro/idl => avro/src/test/java/org/apache/avro/util}/TestSchemaResolver.java (62%) rename lang/java/{idl/src/test/java/org/apache/avro/idl => avro/src/test/java/org/apache/avro/util}/TestSchemas.java (90%) delete mode 100644 lang/java/idl/src/main/java/org/apache/avro/idl/IsResolvedSchemaVisitor.java delete mode 100644 lang/java/idl/src/main/java/org/apache/avro/idl/ResolvingVisitor.java delete mode 100644 lang/java/idl/src/main/java/org/apache/avro/idl/SchemaResolver.java delete mode 100644 lang/java/idl/src/main/java/org/apache/avro/idl/SchemaVisitorAction.java diff --git a/.editorconfig b/.editorconfig index a2a93880be0..b96e2b9c6e8 100644 --- a/.editorconfig +++ b/.editorconfig @@ -19,6 +19,8 @@ root = true charset = utf-8 end_of_line = lf insert_final_newline = true +ij_any_block_comment_at_first_column = false +ij_any_line_comment_at_first_column = false [*.{java,xml,sh}] indent_style = space diff --git a/lang/java/avro/src/main/java/org/apache/avro/FormattedSchemaParser.java b/lang/java/avro/src/main/java/org/apache/avro/FormattedSchemaParser.java index f4dc90ba3db..cd67788fa9e 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/FormattedSchemaParser.java +++ b/lang/java/avro/src/main/java/org/apache/avro/FormattedSchemaParser.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.net.URI; -import java.util.Collection; /** * Schema parser for a specific schema format. @@ -29,46 +28,50 @@ * schema sources. *

* - *

Note to implementers:

- * - *

- * Implementations are located using a {@link java.util.ServiceLoader}. See that - * class for details. - *

- * - *

- * You can expect that schemas being read are invalid, so you are encouraged to - * return {@code null} upon parsing failure where the input clearly doesn't make - * sense (e.g., reading "/**" when expecting JSON). If the input is likely in - * the correct format, but invalid, throw a {@link SchemaParseException} - * instead. - *

- * *

- * Note that throwing anything other than a {@code SchemaParseException} will - * abort the parsing process, so reserve that for rethrowing exceptions. + * Implementations are located using a {@link java.util.ServiceLoader} and must + * therefore be threadsafe. See the {@code ServiceLoader} class for details on + * loading your implementation. *

* * @see java.util.ServiceLoader */ public interface FormattedSchemaParser { /** - * Parse a schema from a text based source. Can use the base location of the - * schema (e.g., the directory where the schema file lives) if available. - * *

- * Implementations should add all named schemas they parse to the collection. + * Parse schema definitions from a text based source. *

* - * @param types a mutable collection of known types; parsed named - * schemata will be added + *

Notes for implementers:

+ * + * + * + * @param parseContext the current parse context: all parsed schemata should + * be added here to resolve names with; contains all + * previously known types * @param baseUri the base location of the schema, or {@code null} if * not known - * @param formattedSchema the schema as text - * @return the parsed schema, or {@code null} if the format is not supported + * @param formattedSchema the text of the schema definition(s) to parse + * @return the main schema, if any * @throws IOException when the schema cannot be read * @throws SchemaParseException when the schema cannot be parsed */ - Schema parse(Collection types, URI baseUri, CharSequence formattedSchema) + Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema) throws IOException, SchemaParseException; } diff --git a/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaParser.java b/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaParser.java index 9a1da447d15..c7d91878627 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaParser.java +++ b/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaParser.java @@ -19,8 +19,6 @@ import java.io.IOException; import java.net.URI; -import java.util.ArrayList; -import java.util.Collection; /** * Schema parser for JSON formatted schemata. This initial implementation simply @@ -59,32 +57,26 @@ public static Schema parseInternal(String... fragments) { for (String fragment : fragments) { buffer.append(fragment); } - return new JsonSchemaParser().parse(new ArrayList<>(), buffer, true); + return new JsonSchemaParser().parse(new ParseContext(NameValidator.NO_VALIDATION), buffer, null); } @Override - public Schema parse(Collection schemas, URI baseUri, CharSequence formattedSchema) + public Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema) throws IOException, SchemaParseException { - return parse(schemas, formattedSchema, false); + return parse(parseContext, formattedSchema, parseContext.nameValidator); } - private Schema parse(Collection schemas, CharSequence formattedSchema, boolean skipValidation) + private Schema parse(ParseContext parseContext, CharSequence formattedSchema, NameValidator nameValidator) throws SchemaParseException { - // TODO: refactor JSON parsing out of the Schema class - Schema.Parser parser; - if (skipValidation) { - parser = new Schema.Parser(Schema.NameValidator.NO_VALIDATION); + Schema.Parser parser = new Schema.Parser(nameValidator); + if (nameValidator == NameValidator.NO_VALIDATION) { parser.setValidateDefaults(false); } else { - parser = new Schema.Parser(); - } - if (schemas != null) { - parser.addTypes(schemas); + parser = new Schema.Parser(nameValidator); } + parser.addTypes(parseContext.typesByName().values()); Schema schema = parser.parse(formattedSchema.toString()); - if (schemas != null) { - schemas.addAll(parser.getTypes().values()); - } + parser.getTypes().values().forEach(parseContext::put); return schema; } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/NameValidator.java b/lang/java/avro/src/main/java/org/apache/avro/NameValidator.java new file mode 100644 index 00000000000..f1262d922cf --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/NameValidator.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +public interface NameValidator { + + class Result { + private final String errors; + + public Result(final String errors) { + this.errors = errors; + } + + public boolean isOK() { + return this == NameValidator.OK; + } + + public String getErrors() { + return errors; + } + } + + Result OK = new Result(null); + + default Result validate(String name) { + return OK; + } + + NameValidator NO_VALIDATION = new NameValidator() { + }; + + NameValidator UTF_VALIDATOR = new NameValidator() { + @Override + public Result validate(final String name) { + if (name == null) { + return new Result("Null name"); + } + int length = name.length(); + if (length == 0) { + return new Result("Empty name"); + } + char first = name.charAt(0); + if (!(Character.isLetter(first) || first == '_')) { + return new Result("Illegal initial character: " + name); + } + for (int i = 1; i < length; i++) { + char c = name.charAt(i); + if (!(Character.isLetterOrDigit(c) || c == '_')) { + return new Result("Illegal character in: " + name); + } + } + return OK; + } + }; + + NameValidator STRICT_VALIDATOR = new NameValidator() { + @Override + public Result validate(final String name) { + if (name == null) { + return new Result("Null name"); + } + int length = name.length(); + if (length == 0) { + return new Result("Empty name"); + } + char first = name.charAt(0); + if (!(isLetter(first) || first == '_')) { + return new Result("Illegal initial character: " + name); + } + for (int i = 1; i < length; i++) { + char c = name.charAt(i); + if (!(isLetter(c) || isDigit(c) || c == '_')) { + return new Result("Illegal character in: " + name); + } + } + return OK; + } + + private boolean isLetter(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); + } + + private boolean isDigit(char c) { + return c >= '0' && c <= '9'; + } + + }; + +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/ParseContext.java b/lang/java/avro/src/main/java/org/apache/avro/ParseContext.java new file mode 100644 index 00000000000..02f4129bed2 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/ParseContext.java @@ -0,0 +1,310 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.apache.avro.util.SchemaResolver; + +import java.util.EnumSet; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; + +/** + * Class to define a name context, useful to reference schemata with. This + * allows for the following: + * + *
    + *
  • Provide a default namespace for nested contexts, as found for example in + * JSON based schema definitions.
  • + *
  • Find schemata by name, including primitives.
  • + *
  • Collect new named schemata.
  • + *
+ * + *

+ * Note: this class has no use for most Avro users, but is a key component when + * implementing a schema parser. + *

+ * + * @see JSON based + * schema definition + **/ +public class ParseContext { + private static final Map PRIMITIVES = new HashMap<>(); + + static { + PRIMITIVES.put("string", Schema.Type.STRING); + PRIMITIVES.put("bytes", Schema.Type.BYTES); + PRIMITIVES.put("int", Schema.Type.INT); + PRIMITIVES.put("long", Schema.Type.LONG); + PRIMITIVES.put("float", Schema.Type.FLOAT); + PRIMITIVES.put("double", Schema.Type.DOUBLE); + PRIMITIVES.put("boolean", Schema.Type.BOOLEAN); + PRIMITIVES.put("null", Schema.Type.NULL); + } + + private static final Set NAMED_SCHEMA_TYPES = EnumSet.of(Schema.Type.RECORD, Schema.Type.ENUM, + Schema.Type.FIXED); + private final Map oldSchemas; + private final Map newSchemas; + // Visible for use in JsonSchemaParser + final NameValidator nameValidator; + private final String namespace; + + /** + * Create a {@code ParseContext} for the default/{@code null} namespace, using + * default name validation for new schemata. + */ + public ParseContext() { + this(NameValidator.UTF_VALIDATOR, null); + } + + /** + * Create a {@code ParseContext} for the specified namespace, using default name + * validation for new schemata. + */ + public ParseContext(String namespace) { + this(NameValidator.UTF_VALIDATOR, namespace); + } + + /** + * Create a {@code ParseContext} for the default/{@code null} namespace, using + * the specified name validation for new schemata. + */ + public ParseContext(NameValidator nameValidator) { + this(nameValidator, null); + } + + /** + * Create a {@code ParseContext} for the specified namespace, using the + * specified name validation for new schemata. + */ + public ParseContext(NameValidator nameValidator, String namespace) { + this(nameValidator, new LinkedHashMap<>(), new LinkedHashMap<>(), namespace); + } + + private ParseContext(NameValidator nameValidator, Map oldSchemas, Map newSchemas, + String namespace) { + this.nameValidator = nameValidator; + this.oldSchemas = oldSchemas; + this.newSchemas = newSchemas; + this.namespace = notEmpty(namespace) ? namespace : null; + } + + /** + * Create a derived context using a different fallback namespace. + * + * @param namespace the fallback namespace to resolve names with + * @return a new context + */ + public ParseContext namespace(String namespace) { + return new ParseContext(nameValidator, oldSchemas, newSchemas, namespace); + } + + /** + * Return the fallback namespace. + * + * @return the namespace + */ + public String namespace() { + return namespace; + } + + /** + * Tell whether this context contains the given schema. + * + * @param schema a schema + * @return {@code true} if the context contains the schema, {@code false} + * otherwise + */ + @Deprecated + public boolean contains(Schema schema) { + String fullName = schema.getFullName(); + return schema.equals(oldSchemas.get(fullName)) || schema.equals(newSchemas.get(fullName)); + } + + /** + * Tell whether this context contains a schema with the given name. + * + * @param name a schema name + * @return {@code true} if the context contains a schema with this name, + * {@code false} otherwise + */ + public boolean contains(String name) { + return PRIMITIVES.containsKey(name) || oldSchemas.containsKey(name) || newSchemas.containsKey(name); + } + + /** + * Resolve a schema by name. That is: + * + *
    + *
  • If {@code name} is a primitive name, return a (new) schema for it
  • + *
  • If {@code name} contains a dot, resolve the schema by full name only
  • + *
  • Otherwise: resolve the schema in the current and in the null namespace + * (the former takes precedence)
  • + *
+ * + * Resolving means that the schema is returned if known, and otherwise an + * unresolved schema (a reference) is returned. + * + * @param name the schema name to resolve + * @return the schema + * @throws SchemaParseException when the schema does not exist + */ + public Schema resolve(String name) { + Schema.Type type = PRIMITIVES.get(name); + if (type != null) { + return Schema.create(type); + } + + String fullName = resolveName(name, namespace); + Schema schema = getSchema(fullName); + if (schema == null) { + schema = getSchema(name); + } + + return schema != null ? schema : SchemaResolver.unresolvedSchema(fullName); + } + + private Schema getSchema(String fullName) { + Schema schema = oldSchemas.get(fullName); + if (schema == null) { + schema = newSchemas.get(fullName); + } + return schema; + } + + // Visible for testing + String resolveName(String name, String space) { + int lastDot = name.lastIndexOf('.'); + if (lastDot < 0) { // short name + if (!notEmpty(space)) { + space = namespace; + } + if (notEmpty(space)) { + return space + "." + name; + } + } + return name; + } + + /** + * Return the simplest name that references the same schema in the current + * namespace. Returns the name without any namespace if it is not a primitive, + * and the namespace is the current namespace. + * + * @param fullName the full schema name + * @return the simplest name within the current namespace + */ + public String simpleName(String fullName) { + int lastDot = fullName.lastIndexOf('.'); + if (lastDot >= 0) { + String name = fullName.substring(lastDot + 1); + String space = fullName.substring(0, lastDot); + if (!PRIMITIVES.containsKey(name) && space.equals(namespace)) { + // The name is a full name in the current namespace, and cannot be + // mistaken for a primitive type. + return name; + } + } + // The special case of the previous comment does not apply. + return fullName; + } + + private boolean notEmpty(String str) { + return str != null && !str.isEmpty(); + } + + /** + * Put the schema into this context. This is an idempotent operation: it only + * fails if this context already has a different schema with the same name. + * + *

+ * Note that although this method works for all types except for arrays, maps + * and unions, all primitive types have already been defined upon construction. + * This means you cannot redefine a 'long' with a logical timestamp type. + *

+ * + * @param schema the schema to put into the context + */ + public void put(Schema schema) { + if (!(NAMED_SCHEMA_TYPES.contains(schema.getType()))) { + throw new AvroTypeException("You can only put a named schema into the context"); + } + + String fullName = requireValidFullName(schema.getFullName()); + + Schema alreadyKnownSchema = oldSchemas.get(fullName); + if (alreadyKnownSchema != null) { + if (!schema.equals(alreadyKnownSchema)) { + throw new SchemaParseException("Can't redefine: " + fullName); + } + } else { + Schema previouslyAddedSchema = newSchemas.putIfAbsent(fullName, schema); + if (previouslyAddedSchema != null && !previouslyAddedSchema.equals(schema)) { + throw new SchemaParseException("Can't redefine: " + fullName); + } + } + } + + private String requireValidFullName(String fullName) { + String[] names = fullName.split("\\."); + for (int i = 0; i < names.length - 1; i++) { + validateName(names[i], "Namespace part"); + } + validateName(names[names.length - 1], "Name"); + return fullName; + } + + private void validateName(String name, String what) { + NameValidator.Result result = nameValidator.validate(name); + if (!result.isOK()) { + throw new SchemaParseException(what + " \"" + name + "\" is invalid: " + result.getErrors()); + } + } + + public boolean hasNewSchemas() { + return !newSchemas.isEmpty(); + } + + public void commit() { + oldSchemas.putAll(newSchemas); + newSchemas.clear(); + } + + public void rollback() { + newSchemas.clear(); + } + + /** + * Return all known types by their fullname. + * + * @return a map of all types by their name + */ + public Map typesByName() { + LinkedHashMap result = new LinkedHashMap<>(); + result.putAll(oldSchemas); + result.putAll(newSchemas); + return result; + } + + public Protocol resolveSchemata(Protocol protocol) { + protocol.getTypes().forEach(this::put); + return SchemaResolver.resolve(this, protocol); + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/Schema.java b/lang/java/avro/src/main/java/org/apache/avro/Schema.java index 38a6e4a9e42..f312ecfb6f2 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Schema.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Schema.java @@ -25,6 +25,12 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.DoubleNode; import com.fasterxml.jackson.databind.node.NullNode; +import org.apache.avro.util.internal.Accessor; +import org.apache.avro.util.internal.Accessor.FieldAccessor; +import org.apache.avro.util.internal.JacksonUtils; +import org.apache.avro.util.internal.ThreadLocalWithInitial; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.ByteArrayOutputStream; import java.io.File; @@ -51,13 +57,6 @@ import java.util.stream.Collectors; import java.util.stream.StreamSupport; -import org.apache.avro.util.internal.Accessor; -import org.apache.avro.util.internal.Accessor.FieldAccessor; -import org.apache.avro.util.internal.JacksonUtils; -import org.apache.avro.util.internal.ThreadLocalWithInitial; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import static org.apache.avro.LogicalType.LOGICAL_TYPE_PROP; /** @@ -1490,7 +1489,7 @@ public NullSchema() { */ public static class Parser { private Names names = new Names(); - private final Schema.NameValidator validate; + private final NameValidator validate; private boolean validateDefaults = true; public Parser() { @@ -1760,18 +1759,18 @@ public Schema put(Name name, Schema schema) { } } - private static ThreadLocal validateNames = ThreadLocalWithInitial + private static ThreadLocal validateNames = ThreadLocalWithInitial .of(() -> NameValidator.UTF_VALIDATOR); private static String validateName(String name) { NameValidator.Result result = validateNames.get().validate(name); if (!result.isOK()) { - throw new SchemaParseException(result.errors); + throw new SchemaParseException(result.getErrors()); } return name; } - public static void setNameValidator(final Schema.NameValidator validator) { + public static void setNameValidator(final NameValidator validator) { Schema.validateNames.set(validator); } @@ -2311,84 +2310,6 @@ private static String getFieldAlias(Name record, String field, Map= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); - } - - private boolean isDigit(char c) { - return c >= '0' && c <= '9'; - } - - }; - - } - /** * No change is permitted on LockableArrayList once lock() has been called on * it. diff --git a/lang/java/avro/src/main/java/org/apache/avro/SchemaParser.java b/lang/java/avro/src/main/java/org/apache/avro/SchemaParser.java index c100f724b8e..dfb3c01f353 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/SchemaParser.java +++ b/lang/java/avro/src/main/java/org/apache/avro/SchemaParser.java @@ -29,10 +29,8 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.Collection; -import java.util.LinkedHashSet; import java.util.List; import java.util.ServiceLoader; -import java.util.Set; /** * Avro schema parser for text-based formats like JSON, IDL, etc. @@ -55,7 +53,7 @@ * @see UtfTextUtils */ public class SchemaParser { - private final Set knownSchemata; + private final ParseContext parseContext; private final Collection formattedSchemaParsers; /** @@ -63,7 +61,7 @@ public class SchemaParser { * empty. */ public SchemaParser() { - this.knownSchemata = new LinkedHashSet<>(); + this.parseContext = new ParseContext(); this.formattedSchemaParsers = new ArrayList<>(); for (FormattedSchemaParser formattedSchemaParser : ServiceLoader.load(FormattedSchemaParser.class)) { formattedSchemaParsers.add(formattedSchemaParser); @@ -226,14 +224,14 @@ private Schema parse(URI baseUri, CharSequence formattedSchema) throws IOExcepti List parseExceptions = new ArrayList<>(); for (FormattedSchemaParser formattedSchemaParser : formattedSchemaParsers) { try { - // Ensure we're only changing (adding to) the known types when a parser succeeds - Set schemaSet = new LinkedHashSet<>(knownSchemata); - Schema schema = formattedSchemaParser.parse(schemaSet, baseUri, formattedSchema); - if (schema != null) { - knownSchemata.addAll(schemaSet); + Schema schema = formattedSchemaParser.parse(parseContext, baseUri, formattedSchema); + if (parseContext.hasNewSchemas()) { + // Parsing succeeded: return the result. + parseContext.commit(); return schema; } } catch (SchemaParseException e) { + parseContext.rollback(); parseExceptions.add(e); } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java index 150d2ace9ba..e9b5ed38852 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java +++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java @@ -34,6 +34,7 @@ import org.apache.avro.AvroRuntimeException; import org.apache.avro.InvalidAvroMagicException; +import org.apache.avro.NameValidator; import org.apache.avro.Schema; import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.DecoderFactory; @@ -139,7 +140,7 @@ void initialize(InputStream in, byte[] magic) throws IOException { // finalize the header header.metaKeyList = Collections.unmodifiableList(header.metaKeyList); - header.schema = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false) + header.schema = new Schema.Parser(NameValidator.NO_VALIDATION).setValidateDefaults(false) .parse(getMetaString(DataFileConstants.SCHEMA)); this.codec = resolveCodec(); reader.setSchema(header.schema); diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/SchemaResolver.java b/lang/java/avro/src/main/java/org/apache/avro/util/SchemaResolver.java new file mode 100644 index 00000000000..c3a25a5e577 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/SchemaResolver.java @@ -0,0 +1,367 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.util; + +import org.apache.avro.AvroTypeException; +import org.apache.avro.JsonProperties; +import org.apache.avro.ParseContext; +import org.apache.avro.Protocol; +import org.apache.avro.Schema; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.IdentityHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; +import java.util.stream.Collectors; + +import static org.apache.avro.Schema.Type.ARRAY; +import static org.apache.avro.Schema.Type.ENUM; +import static org.apache.avro.Schema.Type.FIXED; +import static org.apache.avro.Schema.Type.MAP; +import static org.apache.avro.Schema.Type.RECORD; +import static org.apache.avro.Schema.Type.UNION; + +/** + * Utility class to resolve schemas that are unavailable at the point they are + * referenced in the IDL. + */ +public final class SchemaResolver { + + private SchemaResolver() { + } + + private static final String UR_SCHEMA_ATTR = "org.apache.avro.idl.unresolved.name"; + + private static final String UR_SCHEMA_NAME = "UnresolvedSchema"; + + private static final String UR_SCHEMA_NS = "org.apache.avro.compiler"; + + private static final AtomicInteger COUNTER = new AtomicInteger(); + + /** + * Create a schema to represent an "unresolved" schema. (used to represent a + * schema whose definition does not exist, yet). + * + * @param name a schema name + * @return an unresolved schema for the given name + */ + public static Schema unresolvedSchema(final String name) { + Schema schema = Schema.createRecord(UR_SCHEMA_NAME + '_' + COUNTER.getAndIncrement(), "unresolved schema", + UR_SCHEMA_NS, false, Collections.emptyList()); + schema.addProp(UR_SCHEMA_ATTR, name); + return schema; + } + + /** + * Is this an unresolved schema. + * + * @param schema a schema + * @return whether the schema is an unresolved schema + */ + public static boolean isUnresolvedSchema(final Schema schema) { + return (schema.getType() == Schema.Type.RECORD && schema.getProp(UR_SCHEMA_ATTR) != null && schema.getName() != null + && schema.getName().startsWith(UR_SCHEMA_NAME) && UR_SCHEMA_NS.equals(schema.getNamespace())); + } + + /** + * Get the unresolved schema name. + * + * @param schema an unresolved schema + * @return the name of the unresolved schema + */ + public static String getUnresolvedSchemaName(final Schema schema) { + if (!isUnresolvedSchema(schema)) { + throw new IllegalArgumentException("Not a unresolved schema: " + schema); + } + return schema.getProp(UR_SCHEMA_ATTR); + } + + /** + * Is this an unresolved schema? + */ + public static boolean isFullyResolvedSchema(final Schema schema) { + if (isUnresolvedSchema(schema)) { + return false; + } else { + return Schemas.visit(schema, new IsResolvedSchemaVisitor()); + } + } + + /** + * Clone the provided schema while resolving all unreferenced schemas. + * + * @param parseContext the parse context with known names + * @param schema the schema to resolve + * @return a copy of the schema with all schemas resolved + */ + public static Schema resolve(final ParseContext parseContext, Schema schema) { + if (schema == null) { + return null; + } + ResolvingVisitor visitor = new ResolvingVisitor(schema, parseContext::resolve); + return Schemas.visit(schema, visitor); + } + + /** + * Clone all provided schemas while resolving all unreferenced schemas. + * + * @param parseContext the parse context with known names + * @param schemas the schemas to resolve + * @return a copy of all schemas with all schemas resolved + */ + public static Collection resolve(final ParseContext parseContext, Collection schemas) { + ResolvingVisitor visitor = new ResolvingVisitor(null, parseContext::resolve); + return schemas.stream().map(schema -> Schemas.visit(schema, visitor.withRoot(schema))).collect(Collectors.toList()); + } + + /** + * Will clone the provided protocol while resolving all unreferenced schemas + * + * @param parseContext the parse context with known names + * @param protocol the protocol to resolve + * @return a copy of the protocol with all schemas resolved + */ + public static Protocol resolve(ParseContext parseContext, final Protocol protocol) { + // Create an empty copy of the protocol + Protocol result = new Protocol(protocol.getName(), protocol.getDoc(), protocol.getNamespace()); + protocol.getObjectProps().forEach(((JsonProperties) result)::addProp); + + ResolvingVisitor visitor = new ResolvingVisitor(null, parseContext::resolve); + Function resolver = schema -> Schemas.visit(schema, visitor.withRoot(schema)); + + // Resolve all schemata in the protocol. + result.setTypes(protocol.getTypes().stream().map(resolver).collect(Collectors.toList())); + Map resultMessages = result.getMessages(); + protocol.getMessages().forEach((name, oldValue) -> { + Protocol.Message newValue; + if (oldValue.isOneWay()) { + newValue = result.createMessage(oldValue.getName(), oldValue.getDoc(), oldValue, + resolver.apply(oldValue.getRequest())); + } else { + Schema request = resolver.apply(oldValue.getRequest()); + Schema response = resolver.apply(oldValue.getResponse()); + Schema errors = resolver.apply(oldValue.getErrors()); + newValue = result.createMessage(oldValue.getName(), oldValue.getDoc(), oldValue, request, response, errors); + } + resultMessages.put(name, newValue); + }); + return result; + } + + /** + * This visitor checks if the current schema is fully resolved. + */ + public static final class IsResolvedSchemaVisitor implements SchemaVisitor { + boolean hasUnresolvedParts; + + IsResolvedSchemaVisitor() { + hasUnresolvedParts = false; + } + + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + hasUnresolvedParts = isUnresolvedSchema(terminal); + return hasUnresolvedParts ? SchemaVisitorAction.TERMINATE : SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction visitNonTerminal(Schema nonTerminal) { + hasUnresolvedParts = isUnresolvedSchema(nonTerminal); + if (hasUnresolvedParts) { + return SchemaVisitorAction.TERMINATE; + } + if (nonTerminal.getType() == Schema.Type.RECORD && !nonTerminal.hasFields()) { + // We're still initializing the type... + return SchemaVisitorAction.SKIP_SUBTREE; + } + return SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction afterVisitNonTerminal(Schema nonTerminal) { + return SchemaVisitorAction.CONTINUE; + } + + @Override + public Boolean get() { + return !hasUnresolvedParts; + } + } + + /** + * This visitor creates clone of the visited Schemata, minus the specified + * schema properties, and resolves all unresolved schemas. + */ + public static final class ResolvingVisitor implements SchemaVisitor { + private static final Set CONTAINER_SCHEMA_TYPES = EnumSet.of(RECORD, ARRAY, MAP, UNION); + private static final Set NAMED_SCHEMA_TYPES = EnumSet.of(RECORD, ENUM, FIXED); + + private final Function symbolTable; + private final Set schemaPropertiesToRemove; + private final IdentityHashMap replace; + + private final Schema root; + + public ResolvingVisitor(final Schema root, final Function symbolTable, + String... schemaPropertiesToRemove) { + this(root, symbolTable, new HashSet<>(Arrays.asList(schemaPropertiesToRemove))); + } + + public ResolvingVisitor(final Schema root, final Function symbolTable, + Set schemaPropertiesToRemove) { + this.replace = new IdentityHashMap<>(); + this.symbolTable = symbolTable; + this.schemaPropertiesToRemove = schemaPropertiesToRemove; + + this.root = root; + } + + public ResolvingVisitor withRoot(Schema root) { + return new ResolvingVisitor(root, symbolTable, schemaPropertiesToRemove); + } + + @Override + public SchemaVisitorAction visitTerminal(final Schema terminal) { + Schema.Type type = terminal.getType(); + Schema newSchema; + if (CONTAINER_SCHEMA_TYPES.contains(type)) { + if (!replace.containsKey(terminal)) { + throw new IllegalStateException("Schema " + terminal + " must be already processed"); + } + return SchemaVisitorAction.CONTINUE; + } else if (type == ENUM) { + newSchema = Schema.createEnum(terminal.getName(), terminal.getDoc(), terminal.getNamespace(), + terminal.getEnumSymbols(), terminal.getEnumDefault()); + } else if (type == FIXED) { + newSchema = Schema.createFixed(terminal.getName(), terminal.getDoc(), terminal.getNamespace(), + terminal.getFixedSize()); + } else { + newSchema = Schema.create(type); + } + copyProperties(terminal, newSchema); + replace.put(terminal, newSchema); + return SchemaVisitorAction.CONTINUE; + } + + public void copyProperties(final Schema first, final Schema second) { + // Logical type + Optional.ofNullable(first.getLogicalType()).ifPresent(logicalType -> logicalType.addToSchema(second)); + + // Aliases (if applicable) + if (NAMED_SCHEMA_TYPES.contains(first.getType())) { + first.getAliases().forEach(second::addAlias); + } + + // Other properties + first.getObjectProps().forEach((name, value) -> { + if (!schemaPropertiesToRemove.contains(name)) { + second.addProp(name, value); + } + }); + } + + @Override + public SchemaVisitorAction visitNonTerminal(final Schema nt) { + Schema.Type type = nt.getType(); + if (type == RECORD) { + if (isUnresolvedSchema(nt)) { + // unresolved schema will get a replacement that we already encountered, + // or we will attempt to resolve. + final String unresolvedSchemaName = getUnresolvedSchemaName(nt); + Schema resSchema = symbolTable.apply(unresolvedSchemaName); + if (resSchema == null) { + throw new AvroTypeException("Undefined schema: " + unresolvedSchemaName); + } + Schema replacement = replace.computeIfAbsent(resSchema, schema -> { + Schemas.visit(schema, this); + return replace.get(schema); + }); + replace.put(nt, replacement); + } else { + // Create a clone without fields. Fields will be added in afterVisitNonTerminal. + Schema newSchema = Schema.createRecord(nt.getName(), nt.getDoc(), nt.getNamespace(), nt.isError()); + copyProperties(nt, newSchema); + replace.put(nt, newSchema); + } + } + return SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction afterVisitNonTerminal(final Schema nt) { + Schema.Type type = nt.getType(); + Schema newSchema; + switch (type) { + case RECORD: + if (!isUnresolvedSchema(nt)) { + newSchema = replace.get(nt); + // Check if we've already handled the replacement schema with a + // reentrant call to visit(...) from within the visitor. + if (!newSchema.hasFields()) { + List fields = nt.getFields(); + List newFields = new ArrayList<>(fields.size()); + for (Schema.Field field : fields) { + newFields.add(new Schema.Field(field, replace.get(field.schema()))); + } + newSchema.setFields(newFields); + } + } + return SchemaVisitorAction.CONTINUE; + case UNION: + List types = nt.getTypes(); + List newTypes = new ArrayList<>(types.size()); + for (Schema sch : types) { + newTypes.add(replace.get(sch)); + } + newSchema = Schema.createUnion(newTypes); + break; + case ARRAY: + newSchema = Schema.createArray(replace.get(nt.getElementType())); + break; + case MAP: + newSchema = Schema.createMap(replace.get(nt.getValueType())); + break; + default: + throw new IllegalStateException("Illegal type " + type + ", schema " + nt); + } + copyProperties(nt, newSchema); + replace.put(nt, newSchema); + return SchemaVisitorAction.CONTINUE; + } + + @Override + public Schema get() { + return replace.get(root); + } + + @Override + public String toString() { + return "ResolvingVisitor{symbolTable=" + symbolTable + ", schemaPropertiesToRemove=" + schemaPropertiesToRemove + + ", replace=" + replace + '}'; + } + } +} diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaVisitor.java b/lang/java/avro/src/main/java/org/apache/avro/util/SchemaVisitor.java similarity index 77% rename from lang/java/idl/src/main/java/org/apache/avro/idl/SchemaVisitor.java rename to lang/java/avro/src/main/java/org/apache/avro/util/SchemaVisitor.java index 0f9fcae5b68..1ac35baeda7 100644 --- a/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaVisitor.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/SchemaVisitor.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.avro.idl; +package org.apache.avro.util; import org.apache.avro.Schema; @@ -44,4 +44,26 @@ public interface SchemaVisitor { * @return a value that will be returned by the visit method. */ T get(); + + enum SchemaVisitorAction { + + /** + * continue visit. + */ + CONTINUE, + /** + * terminate visit. + */ + TERMINATE, + /** + * when returned from pre non terminal visit method the children of the non + * terminal are skipped. afterVisitNonTerminal for the current schema will not + * be invoked. + */ + SKIP_SUBTREE, + /** + * Skip visiting the siblings of this schema. + */ + SKIP_SIBLINGS + } } diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/Schemas.java b/lang/java/avro/src/main/java/org/apache/avro/util/Schemas.java similarity index 89% rename from lang/java/idl/src/main/java/org/apache/avro/idl/Schemas.java rename to lang/java/avro/src/main/java/org/apache/avro/util/Schemas.java index da4b949d2bc..927a0c37b43 100644 --- a/lang/java/idl/src/main/java/org/apache/avro/idl/Schemas.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/Schemas.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.avro.idl; +package org.apache.avro.util; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; @@ -51,8 +51,7 @@ public static T visit(final Schema start, final SchemaVisitor visitor) { while ((current = dq.poll()) != null) { if (current instanceof Supplier) { // We are executing a non-terminal post visit. - @SuppressWarnings("unchecked") - SchemaVisitorAction action = ((Supplier) current).get(); + SchemaVisitor.SchemaVisitorAction action = ((Supplier) current).get(); switch (action) { case CONTINUE: break; @@ -107,14 +106,14 @@ public static T visit(final Schema start, final SchemaVisitor visitor) { private static boolean visitNonTerminal(final SchemaVisitor visitor, final Schema schema, final Deque dq, final Iterable itSupp) { - SchemaVisitorAction action = visitor.visitNonTerminal(schema); + SchemaVisitor.SchemaVisitorAction action = visitor.visitNonTerminal(schema); switch (action) { case CONTINUE: - dq.push((Supplier) () -> visitor.afterVisitNonTerminal(schema)); + dq.push((Supplier) () -> visitor.afterVisitNonTerminal(schema)); itSupp.forEach(dq::push); break; case SKIP_SUBTREE: - dq.push((Supplier) () -> visitor.afterVisitNonTerminal(schema)); + dq.push((Supplier) () -> visitor.afterVisitNonTerminal(schema)); break; case SKIP_SIBLINGS: while (dq.peek() instanceof Schema) { @@ -130,7 +129,7 @@ private static boolean visitNonTerminal(final SchemaVisitor visitor, final Sc } private static boolean visitTerminal(final SchemaVisitor visitor, final Schema schema, final Deque dq) { - SchemaVisitorAction action = visitor.visitTerminal(schema); + SchemaVisitor.SchemaVisitorAction action = visitor.visitTerminal(schema); switch (action) { case CONTINUE: break; diff --git a/lang/java/avro/src/test/java/org/apache/avro/DummySchemaParser.java b/lang/java/avro/src/test/java/org/apache/avro/DummySchemaParser.java index 0a20beadfa7..4802aea0747 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/DummySchemaParser.java +++ b/lang/java/avro/src/test/java/org/apache/avro/DummySchemaParser.java @@ -17,22 +17,30 @@ */ package org.apache.avro; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.io.IOException; import java.net.URI; -import java.util.Collection; public class DummySchemaParser implements FormattedSchemaParser { + /** + * Logger for this class. + */ + private static final Logger LOGGER = LoggerFactory.getLogger(DummySchemaParser.class); public static final String SCHEMA_TEXT_ONE = "one"; public static final Schema FIXED_SCHEMA = Schema.createFixed("DummyOne", null, "tests", 42); public static final String SCHEMA_TEXT_ERROR = "error"; - public static final String SCHEMA_TEXT_IO_ERROR = "ioerror"; + public static final String SCHEMA_TEXT_IO_ERROR = "io-error"; public static final String ERROR_MESSAGE = "Syntax error"; public static final String IO_ERROR_MESSAGE = "I/O error"; @Override - public Schema parse(Collection schemata, URI baseUri, CharSequence formattedSchema) + public Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema) throws IOException, SchemaParseException { + LOGGER.info("Using DummySchemaParser for {}", formattedSchema); if (SCHEMA_TEXT_ONE.contentEquals(formattedSchema)) { + parseContext.put(FIXED_SCHEMA); return FIXED_SCHEMA; } else if (SCHEMA_TEXT_ERROR.contentEquals(formattedSchema)) { throw new SchemaParseException(ERROR_MESSAGE); diff --git a/lang/java/avro/src/test/java/org/apache/avro/ParseContextTest.java b/lang/java/avro/src/test/java/org/apache/avro/ParseContextTest.java new file mode 100644 index 00000000000..0c3a3148058 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/ParseContextTest.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.EnumSet; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class ParseContextTest { + Schema fooRecord, fooRecordCopy, barEnum, bazFixed, mehRecord; + ParseContext fooBarBaz; + + @BeforeEach + public void setUp() throws Exception { + fooRecord = SchemaBuilder.record("ns.Foo").fields().endRecord(); + fooRecordCopy = SchemaBuilder.record("ns.Foo").fields().endRecord(); + barEnum = SchemaBuilder.enumeration("ns.Bar").symbols(); + bazFixed = SchemaBuilder.fixed("ns.Baz").size(8); + mehRecord = SchemaBuilder.record("ns.Meh").fields().endRecord(); + + fooBarBaz = new ParseContext(); + fooBarBaz.put(fooRecord); + fooBarBaz.put(barEnum); + fooBarBaz.put(bazFixed); + } + + @Test + public void checkNewNameContextContainsPrimitives() { + EnumSet complexTypes = EnumSet.of(Schema.Type.RECORD, Schema.Type.ENUM, Schema.Type.FIXED, + Schema.Type.UNION, Schema.Type.ARRAY, Schema.Type.MAP); + EnumSet primitives = EnumSet.complementOf(complexTypes); + + ParseContext context = new ParseContext(); + for (Schema.Type type : complexTypes) { + assertFalse(context.contains(type.getName())); + } + for (Schema.Type type : primitives) { + assertTrue(context.contains(type.getName())); + } + } + + @Test + public void primitivesAreNotCached() { + EnumSet primitives = EnumSet.complementOf(EnumSet.of(Schema.Type.RECORD, Schema.Type.ENUM, + Schema.Type.FIXED, Schema.Type.UNION, Schema.Type.ARRAY, Schema.Type.MAP)); + + ParseContext context = new ParseContext(); + for (Schema.Type type : primitives) { + Schema first = context.resolve(type.getName()); + Schema second = context.resolve(type.getName()); + assertEquals(first, second); + assertNotSame(first, second); + + first.addProp("logicalType", "brick"); + assertNotEquals(first, second); + } + } + + @Test + public void validateSchemaTests() { + assertTrue(fooBarBaz.contains(fooRecord)); + assertTrue(fooBarBaz.contains(barEnum)); + assertTrue(fooBarBaz.contains(bazFixed)); + assertFalse(fooBarBaz.contains(mehRecord)); + + assertTrue(fooBarBaz.contains(fooRecord.getFullName())); + assertTrue(fooBarBaz.contains(barEnum.getFullName())); + assertTrue(fooBarBaz.contains(bazFixed.getFullName())); + assertFalse(fooBarBaz.contains(mehRecord.getFullName())); + } + + @Test + public void validateNameResolvingAgainstDefaultNamespace() { + ParseContext context = new ParseContext(""); + assertEquals("Bar", context.resolveName("Bar", "")); + assertEquals("Bar", context.resolveName("Bar", null)); + assertEquals("foo.Bar", context.resolveName("Bar", "foo")); + } + + @Test + public void validateNameResolvingAgainstSetNamespace() { + ParseContext context = new ParseContext("ns"); + assertEquals("ns.Bar", context.resolveName("Bar", "")); + assertEquals("ns.Bar", context.resolveName("Bar", null)); + assertEquals("foo.Bar", context.resolveName("Bar", "foo")); + } + + @Test + public void validateSchemaRetrievalFailure() { + Schema unknown = Schema.createFixed("unknown", null, null, 0); + + assertThrows(AvroRuntimeException.class, () -> fooBarBaz.resolve("unknown")); + assertSame(unknown, fooBarBaz.resolve("unknown")); + } + + @Test + public void validateSchemaRetrievalByFullName() { + assertSame(fooRecord, fooBarBaz.resolve(fooRecord.getFullName())); + } + + @Test + public void validateSchemaRetrievalByNameAndInheritedNamespace() { + assertSame(fooRecord, fooBarBaz.namespace(fooRecord.getNamespace()).resolve(fooRecord.getName())); + } + + @Test + public void verifyPutIsIdempotent() { + ParseContext context = new ParseContext(); + assertFalse(context.contains(fooRecord)); + + context.put(fooRecord); + assertTrue(context.contains(fooRecord)); + + context.put(fooRecord); + assertTrue(context.contains(fooRecord)); + } + + @Test + public void verifyPutOnlyAcceptsNamedSchemas() { + ParseContext context = new ParseContext(); + assertThrows(AvroRuntimeException.class, () -> context.put(Schema.create(Schema.Type.STRING))); + } + + @Test + public void verifyAddDoesNotAllowChangingSchemas() { + Schema fooEnum = SchemaBuilder.enumeration("ns.Foo").symbols(); + + ParseContext context = new ParseContext(); + context.put(fooRecord); + assertThrows(AvroRuntimeException.class, () -> context.put(fooEnum)); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java b/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java index 6846c4434cf..871c172875d 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java +++ b/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java @@ -28,32 +28,32 @@ class SchemaNameValidatorTest { @ParameterizedTest @MethodSource("data") - void validator(Schema.NameValidator validator, String input, boolean expectedResult) { - Schema.NameValidator.Result result = validator.validate(input); + void validator(NameValidator validator, String input, boolean expectedResult) { + NameValidator.Result result = validator.validate(input); Assertions.assertEquals(expectedResult, result.isOK(), result.getErrors()); } static Stream data() { - return Stream.of(Arguments.of(Schema.NameValidator.UTF_VALIDATOR, null, false), // null not accepted - Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, null, false), // null not accepted - Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "", false), // empty not accepted - Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "", false), // empty not accepted - Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "Hello world", false), // space not accepted - Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "Hello world", false), // space not accepted - Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "H&", false), // non letter or digit not accepted - Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "H&", false), // non letter or digit not accepted - Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "H=", false), // non letter or digit not accepted - Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "H=", false), // non letter or digit not accepted - Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "H]", false), // non letter or digit not accepted - Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "H]", false), // non letter or digit not accepted - Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "Hello_world", true), - Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "Hello_world", true), - Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "éàçô", true), // Accept accent - Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "éàçô", false), // Not Accept accent - Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "5éàçô", false), // can't start with number - Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "5éàçô", false), // can't start with number - Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "_Hello_world", true), - Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "_Hello_world", true)); + return Stream.of(Arguments.of(NameValidator.UTF_VALIDATOR, null, false), // null not accepted + Arguments.of(NameValidator.STRICT_VALIDATOR, null, false), // null not accepted + Arguments.of(NameValidator.UTF_VALIDATOR, "", false), // empty not accepted + Arguments.of(NameValidator.STRICT_VALIDATOR, "", false), // empty not accepted + Arguments.of(NameValidator.UTF_VALIDATOR, "Hello world", false), // space not accepted + Arguments.of(NameValidator.STRICT_VALIDATOR, "Hello world", false), // space not accepted + Arguments.of(NameValidator.UTF_VALIDATOR, "H&", false), // non letter or digit not accepted + Arguments.of(NameValidator.STRICT_VALIDATOR, "H&", false), // non letter or digit not accepted + Arguments.of(NameValidator.UTF_VALIDATOR, "H=", false), // non letter or digit not accepted + Arguments.of(NameValidator.STRICT_VALIDATOR, "H=", false), // non letter or digit not accepted + Arguments.of(NameValidator.UTF_VALIDATOR, "H]", false), // non letter or digit not accepted + Arguments.of(NameValidator.STRICT_VALIDATOR, "H]", false), // non letter or digit not accepted + Arguments.of(NameValidator.UTF_VALIDATOR, "Hello_world", true), + Arguments.of(NameValidator.STRICT_VALIDATOR, "Hello_world", true), + Arguments.of(NameValidator.UTF_VALIDATOR, "éàçô", true), // Accept accent + Arguments.of(NameValidator.STRICT_VALIDATOR, "éàçô", false), // Not Accept accent + Arguments.of(NameValidator.UTF_VALIDATOR, "5éàçô", false), // can't start with number + Arguments.of(NameValidator.STRICT_VALIDATOR, "5éàçô", false), // can't start with number + Arguments.of(NameValidator.UTF_VALIDATOR, "_Hello_world", true), + Arguments.of(NameValidator.STRICT_VALIDATOR, "_Hello_world", true)); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java index 4b4f1c2d35a..6ed6b35cc50 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java @@ -90,7 +90,7 @@ void throttledInputStream() throws IOException { // magic header check. This happens with throttled input stream, // where we read into buffer less bytes than requested. - Schema legacySchema = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false) + Schema legacySchema = new Schema.Parser(NameValidator.NO_VALIDATION).setValidateDefaults(false) .parse("{\"type\": \"record\", \"name\": \"TestSchema\", \"fields\": " + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}"); File f = dataDir.resolve("testThrottledInputStream.avro").toFile(); @@ -149,7 +149,7 @@ void inputStreamEOF() throws IOException { // AVRO-2944 describes hanging/failure in reading Avro file with performing // magic header check. This potentially happens with a defective input stream // where a -1 value is unexpectedly returned from a read. - Schema legacySchema = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false) + Schema legacySchema = new Schema.Parser(NameValidator.NO_VALIDATION).setValidateDefaults(false) .parse("{\"type\": \"record\", \"name\": \"TestSchema\", \"fields\": " + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}"); File f = dataDir.resolve("testInputStreamEOF.avro").toFile(); @@ -198,7 +198,7 @@ void ignoreSchemaValidationOnRead() throws IOException { // This schema has an accent in the name and the default for the field doesn't // match the first type in the union. A Java SDK in the past could create a file // containing this schema. - Schema legacySchema = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false) + Schema legacySchema = new Schema.Parser(NameValidator.NO_VALIDATION).setValidateDefaults(false) .parse("{\"type\": \"record\", \"name\": \"InvalidAccëntWithInvalidNull\", \"fields\": " + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}"); diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java index 64748da1364..805f2b80b2a 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java @@ -553,14 +553,14 @@ public void parseAliases() throws JsonProcessingException { @Test void testContentAfterAvsc() { - Schema.Parser parser = new Schema.Parser(Schema.NameValidator.UTF_VALIDATOR); + Schema.Parser parser = new Schema.Parser(NameValidator.UTF_VALIDATOR); parser.setValidateDefaults(true); assertThrows(SchemaParseException.class, () -> parser.parse("{\"type\": \"string\"}; DROP TABLE STUDENTS")); } @Test void testContentAfterAvscInInputStream() throws Exception { - Schema.Parser parser = new Schema.Parser(Schema.NameValidator.UTF_VALIDATOR); + Schema.Parser parser = new Schema.Parser(NameValidator.UTF_VALIDATOR); parser.setValidateDefaults(true); String avsc = "{\"type\": \"string\"}; DROP TABLE STUDENTS"; ByteArrayInputStream is = new ByteArrayInputStream(avsc.getBytes(StandardCharsets.UTF_8)); @@ -576,7 +576,7 @@ void testContentAfterAvscInFile() throws Exception { writer.flush(); } - Schema.Parser parser = new Schema.Parser(Schema.NameValidator.UTF_VALIDATOR); + Schema.Parser parser = new Schema.Parser(NameValidator.UTF_VALIDATOR); parser.setValidateDefaults(true); assertThrows(SchemaParseException.class, () -> parser.parse(avscFile)); } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java index 293554e8e4b..fdb18d0f87c 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java @@ -882,7 +882,7 @@ void namespaceDefaulting() { void namesAcceptAll() throws InterruptedException { // Ensure that Schema.setNameValidator won't interfere with others unit tests. Runnable r = () -> { - Schema.setNameValidator(Schema.NameValidator.NO_VALIDATION); + Schema.setNameValidator(NameValidator.NO_VALIDATION); final Schema schema = SchemaBuilder.record("7name").fields().name("123").type(Schema.create(Schema.Type.INT)) .noDefault().endRecord(); Assertions.assertNotNull(schema); diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java index 5f52a2cf789..2a75916169d 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java @@ -38,6 +38,7 @@ import org.apache.avro.AvroRuntimeException; import org.apache.avro.AvroTypeException; import org.apache.avro.JsonProperties; +import org.apache.avro.NameValidator; import org.apache.avro.Protocol; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; @@ -1260,7 +1261,7 @@ private static class Z { @Test void dollarTerminatedNamespaceCompatibility() { ReflectData data = ReflectData.get(); - Schema s = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse( + Schema s = new Schema.Parser(NameValidator.NO_VALIDATION).parse( "{\"type\":\"record\",\"name\":\"Z\",\"namespace\":\"org.apache.avro.reflect.TestReflect$\",\"fields\":[]}"); assertEquals(data.getSchema(data.getClass(s)).toString(), "{\"type\":\"record\",\"name\":\"Z\",\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[]}"); @@ -1270,7 +1271,7 @@ void dollarTerminatedNamespaceCompatibility() { void dollarTerminatedNestedStaticClassNamespaceCompatibility() { ReflectData data = ReflectData.get(); // Older versions of Avro generated this namespace on nested records. - Schema s = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse( + Schema s = new Schema.Parser(NameValidator.NO_VALIDATION).parse( "{\"type\":\"record\",\"name\":\"AnotherSampleRecord\",\"namespace\":\"org.apache.avro.reflect.TestReflect$SampleRecord\",\"fields\":[]}"); assertThat(data.getSchema(data.getClass(s)).getFullName(), is("org.apache.avro.reflect.TestReflect.SampleRecord.AnotherSampleRecord")); diff --git a/lang/java/idl/src/test/java/org/apache/avro/idl/TestSchemaResolver.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestSchemaResolver.java similarity index 62% rename from lang/java/idl/src/test/java/org/apache/avro/idl/TestSchemaResolver.java rename to lang/java/avro/src/test/java/org/apache/avro/util/TestSchemaResolver.java index 70488232581..1a340b7fa5d 100644 --- a/lang/java/idl/src/test/java/org/apache/avro/idl/TestSchemaResolver.java +++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestSchemaResolver.java @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + /* * Copyright 2017 The Apache Software Foundation. * @@ -13,32 +31,25 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.avro.idl; +package org.apache.avro.util; -import org.apache.avro.Protocol; import org.apache.avro.Schema; import org.apache.avro.SchemaBuilder; -import org.junit.Assert; import org.junit.Test; import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.io.File; -import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; public class TestSchemaResolver { @Test public void testResolving() throws IOException { - Path testIdl = Paths.get(".", "src", "test", "idl", "cycle.avdl").toAbsolutePath(); - IdlReader parser = new IdlReader(); - IdlFile idlFile = parser.parse(testIdl); - Protocol protocol = idlFile.getProtocol(); - System.out.println(protocol); - Assert.assertEquals(5, protocol.getTypes().size()); + // Path testIdl = Paths.get(".", "src", "test", "idl", + // "cycle.avdl").toAbsolutePath(); + // IdlReader parser = new IdlReader(); + // IdlFile idlFile = parser.parse(testIdl); + // Protocol protocol = idlFile.getProtocol(); + // System.out.println(protocol); + // Assert.assertEquals(5, protocol.getTypes().size()); } @Test(expected = IllegalArgumentException.class) diff --git a/lang/java/idl/src/test/java/org/apache/avro/idl/TestSchemas.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestSchemas.java similarity index 90% rename from lang/java/idl/src/test/java/org/apache/avro/idl/TestSchemas.java rename to lang/java/avro/src/test/java/org/apache/avro/util/TestSchemas.java index 000ba20dbcb..6609819299c 100644 --- a/lang/java/idl/src/test/java/org/apache/avro/idl/TestSchemas.java +++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestSchemas.java @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + /* * Copyright 2017 The Apache Software Foundation. * @@ -13,7 +31,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.avro.idl; +package org.apache.avro.util; import org.apache.avro.Schema; import org.junit.Assert; diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/IdlFile.java b/lang/java/idl/src/main/java/org/apache/avro/idl/IdlFile.java index b3777c9f790..75d0f73faaf 100644 --- a/lang/java/idl/src/main/java/org/apache/avro/idl/IdlFile.java +++ b/lang/java/idl/src/main/java/org/apache/avro/idl/IdlFile.java @@ -34,21 +34,18 @@ public class IdlFile { private final Schema mainSchema; private final Protocol protocol; - private final String namespace; private final Map namedSchemas; private final List warnings; IdlFile(Protocol protocol, List warnings) { - this(protocol.getNamespace(), protocol.getTypes(), null, protocol, warnings); + this(protocol.getTypes(), null, protocol, warnings); } - IdlFile(String namespace, Schema mainSchema, Iterable schemas, List warnings) { - this(namespace, schemas, mainSchema, null, warnings); + IdlFile(Schema mainSchema, Iterable schemas, List warnings) { + this(schemas, mainSchema, null, warnings); } - private IdlFile(String namespace, Iterable schemas, Schema mainSchema, Protocol protocol, - List warnings) { - this.namespace = namespace; + private IdlFile(Iterable schemas, Schema mainSchema, Protocol protocol, List warnings) { this.namedSchemas = new LinkedHashMap<>(); for (Schema namedSchema : schemas) { this.namedSchemas.put(namedSchema.getFullName(), namedSchema); @@ -82,13 +79,6 @@ public List getWarnings(String importFile) { .collect(Collectors.toList()); } - /** - * The default namespace to resolve schema names against. - */ - public String getNamespace() { - return namespace; - } - /** * The named schemas defined by the IDL file, mapped by their full name. */ @@ -105,14 +95,7 @@ public Map getNamedSchemas() { * @return the schema, or {@code null} if it does not exist */ public Schema getNamedSchema(String name) { - Schema result = namedSchemas.get(name); - if (result != null) { - return result; - } - if (namespace != null && !name.contains(".")) { - result = namedSchemas.get(namespace + '.' + name); - } - return result; + return namedSchemas.get(name); } // Visible for testing diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/IdlReader.java b/lang/java/idl/src/main/java/org/apache/avro/idl/IdlReader.java index ea3f3cff6df..047d162879b 100644 --- a/lang/java/idl/src/main/java/org/apache/avro/idl/IdlReader.java +++ b/lang/java/idl/src/main/java/org/apache/avro/idl/IdlReader.java @@ -38,6 +38,8 @@ import org.apache.avro.JsonSchemaParser; import org.apache.avro.LogicalType; import org.apache.avro.LogicalTypes; +import org.apache.avro.NameValidator; +import org.apache.avro.ParseContext; import org.apache.avro.Protocol; import org.apache.avro.Schema; import org.apache.avro.SchemaParseException; @@ -69,6 +71,7 @@ import org.apache.avro.idl.IdlParser.SchemaPropertyContext; import org.apache.avro.idl.IdlParser.UnionTypeContext; import org.apache.avro.idl.IdlParser.VariableDeclarationContext; +import org.apache.avro.util.SchemaResolver; import org.apache.avro.util.UtfTextUtils; import org.apache.avro.util.internal.Accessor; import org.apache.commons.text.StringEscapeUtils; @@ -85,7 +88,6 @@ import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.Deque; import java.util.HashSet; @@ -102,7 +104,6 @@ import java.util.regex.Pattern; import static java.util.Collections.singleton; -import static java.util.Collections.unmodifiableMap; public class IdlReader { /** @@ -146,42 +147,38 @@ public void syntaxError(Recognizer recognizer, Object offendingSymbol, int private static final String CLASSPATH_SCHEME = "classpath"; private final Set readLocations; - private final Map names; + private final ParseContext parseContext; public IdlReader() { - readLocations = new HashSet<>(); - names = new LinkedHashMap<>(); + this(new ParseContext()); } - public Map getTypes() { - return unmodifiableMap(names); + public IdlReader(NameValidator nameValidator) { + this(new ParseContext(nameValidator)); } - private Schema namedSchemaOrUnresolved(String fullName) { - Schema schema = names.get(fullName); - if (schema == null) { - schema = SchemaResolver.unresolvedSchema(fullName); - } - return schema; + public IdlReader(ParseContext parseContext) { + readLocations = new HashSet<>(); + this.parseContext = parseContext; } - private void setTypes(Collection types) { - names.clear(); - addTypes(types); + private Schema namedSchemaOrUnresolved(String fullName) { + return parseContext.resolve(fullName); } - public void addTypes(Collection types) { - for (Schema schema : types) { - addSchema(schema); - } + private void addSchema(Schema schema) { + parseContext.put(schema); } - private void addSchema(Schema schema) { - String fullName = schema.getFullName(); - if (names.containsKey(fullName)) { - throw new SchemaParseException("Can't redefine: " + fullName); + public IdlFile resolve(IdlFile unresolved) { + Protocol protocol = unresolved.getProtocol(); + if (protocol == null) { + Schema mainSchema = SchemaResolver.resolve(parseContext, unresolved.getMainSchema()); + Iterable namedSchemas = SchemaResolver.resolve(parseContext, unresolved.getNamedSchemas().values()); + return new IdlFile(mainSchema, namedSchemas, unresolved.getWarnings()); + } else { + return new IdlFile(SchemaResolver.resolve(parseContext, protocol), unresolved.getWarnings()); } - names.put(fullName, schema); } public IdlFile parse(Path location) throws IOException { @@ -366,13 +363,11 @@ private void popNamespace() { @Override public void exitIdlFile(IdlFileContext ctx) { - IdlFile unresolved; if (protocol == null) { - unresolved = new IdlFile(currentNamespace(), mainSchema, getTypes().values(), warnings); + result = new IdlFile(mainSchema, parseContext.typesByName().values(), warnings); } else { - unresolved = new IdlFile(protocol, warnings); + result = new IdlFile(protocol, warnings); } - result = SchemaResolver.resolve(unresolved, OPTIONAL_NULLABLE_TYPE_PROPERTY); } @Override @@ -396,7 +391,7 @@ public void enterProtocolDeclarationBody(ProtocolDeclarationBodyContext ctx) { @Override public void exitProtocolDeclaration(ProtocolDeclarationContext ctx) { if (protocol != null) - protocol.setTypes(getTypes().values()); + protocol.setTypes(parseContext.typesByName().values()); if (!namespaces.isEmpty()) popNamespace(); } @@ -459,10 +454,7 @@ public void exitImportStatement(ImportStatementContext importContext) { case IdlParser.Schema: try (InputStream stream = importLocation.toURL().openStream()) { JsonSchemaParser parser = new JsonSchemaParser(); - Collection types = new ArrayList<>(names.values()); - parser.parse(types, importLocation.resolve("."), UtfTextUtils.readAllBytes(stream, null)); - // Ensure we're only changing (adding to) the known types when a parser succeeds - types.forEach(IdlReader.this::addSchema); + parser.parse(parseContext, importLocation.resolve("."), UtfTextUtils.readAllBytes(stream, null)); } break; } @@ -690,18 +682,22 @@ private JsonNode fixDefaultValue(JsonNode defaultValue, Schema fieldType) { */ private Schema fixOptionalSchema(Schema schema, JsonNode defaultValue) { Object optionalType = schema.getObjectProp(OPTIONAL_NULLABLE_TYPE_PROPERTY); - if (optionalType != null) { - // The schema is a union schema with 2 types: "null" and a non-"null" schema - Schema nullSchema = schema.getTypes().get(0); - Schema nonNullSchema = schema.getTypes().get(1); - boolean nonNullDefault = defaultValue != null && !defaultValue.isNull(); - - // Note: the resolving visitor we'll use later drops the marker property. - if (nonNullDefault) { - return Schema.createUnion(nonNullSchema, nullSchema); - } + if (optionalType == null) { + return schema; + } + + // The schema is a union schema with 2 types: "null" and a non-"null" + // schema. The result of this method must not have the property + // OPTIONAL_NULLABLE_TYPE_PROPERTY. + Schema nullSchema = schema.getTypes().get(0); + Schema nonNullSchema = schema.getTypes().get(1); + boolean nonNullDefault = defaultValue != null && !defaultValue.isNull(); + + if (nonNullDefault) { + return Schema.createUnion(nonNullSchema, nullSchema); + } else { + return Schema.createUnion(nullSchema, nonNullSchema); } - return schema; } @Override diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/IdlSchemaParser.java b/lang/java/idl/src/main/java/org/apache/avro/idl/IdlSchemaParser.java index c6de45bf077..618ac6254a6 100644 --- a/lang/java/idl/src/main/java/org/apache/avro/idl/IdlSchemaParser.java +++ b/lang/java/idl/src/main/java/org/apache/avro/idl/IdlSchemaParser.java @@ -18,34 +18,27 @@ package org.apache.avro.idl; import org.apache.avro.FormattedSchemaParser; +import org.apache.avro.ParseContext; import org.apache.avro.Schema; import org.apache.avro.SchemaParseException; import java.io.IOException; import java.net.URI; -import java.util.Collection; import java.util.regex.Pattern; public class IdlSchemaParser implements FormattedSchemaParser { @Override - public Schema parse(Collection existingSchemata, URI baseUri, CharSequence formattedSchema) + public Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema) throws IOException, SchemaParseException { boolean valid = Pattern.compile("^\\A*!" + // Initial whitespace "(?:/\\*(?:[^*]|\\*[^/])*!\\*/\\s*!|//(!=\\R)*!\\R\\s*!)*!" + // Comments "(?:namespace|schema|protocol|record|enum|fixed|import)\\s", // First keyword Pattern.UNICODE_CHARACTER_CLASS | Pattern.MULTILINE).matcher(formattedSchema).find(); if (valid) { - IdlReader idlReader = new IdlReader(); - idlReader.addTypes(existingSchemata); + IdlReader idlReader = new IdlReader(parseContext); IdlFile idlFile = idlReader.parse(baseUri, formattedSchema); - Schema mainSchema = idlFile.getMainSchema(); - if (mainSchema != null) { - return mainSchema; - } - if (!idlFile.getNamedSchemas().isEmpty()) { - return idlFile.getNamedSchemas().values().iterator().next(); - } + return idlFile.getMainSchema(); } return null; } diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/IsResolvedSchemaVisitor.java b/lang/java/idl/src/main/java/org/apache/avro/idl/IsResolvedSchemaVisitor.java deleted file mode 100644 index 12fd5dbff21..00000000000 --- a/lang/java/idl/src/main/java/org/apache/avro/idl/IsResolvedSchemaVisitor.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.avro.idl; - -import org.apache.avro.Schema; - -/** - * This visitor checks if the current schema is fully resolved. - */ -public final class IsResolvedSchemaVisitor implements SchemaVisitor { - boolean hasUnresolvedParts; - - IsResolvedSchemaVisitor() { - hasUnresolvedParts = false; - } - - @Override - public SchemaVisitorAction visitTerminal(Schema terminal) { - hasUnresolvedParts = SchemaResolver.isUnresolvedSchema(terminal); - return hasUnresolvedParts ? SchemaVisitorAction.TERMINATE : SchemaVisitorAction.CONTINUE; - } - - @Override - public SchemaVisitorAction visitNonTerminal(Schema nonTerminal) { - hasUnresolvedParts = SchemaResolver.isUnresolvedSchema(nonTerminal); - if (hasUnresolvedParts) { - return SchemaVisitorAction.TERMINATE; - } - if (nonTerminal.getType() == Schema.Type.RECORD && !nonTerminal.hasFields()) { - // We're still initializing the type... - return SchemaVisitorAction.SKIP_SUBTREE; - } - return SchemaVisitorAction.CONTINUE; - } - - @Override - public SchemaVisitorAction afterVisitNonTerminal(Schema nonTerminal) { - return SchemaVisitorAction.CONTINUE; - } - - @Override - public Boolean get() { - return !hasUnresolvedParts; - } -} diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/ResolvingVisitor.java b/lang/java/idl/src/main/java/org/apache/avro/idl/ResolvingVisitor.java deleted file mode 100644 index 04e41f3403a..00000000000 --- a/lang/java/idl/src/main/java/org/apache/avro/idl/ResolvingVisitor.java +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.avro.idl; - -import org.apache.avro.AvroTypeException; -import org.apache.avro.Schema; -import org.apache.avro.Schema.Field; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.EnumSet; -import java.util.HashSet; -import java.util.IdentityHashMap; -import java.util.List; -import java.util.Optional; -import java.util.Set; -import java.util.function.Function; - -import static org.apache.avro.Schema.Type.ARRAY; -import static org.apache.avro.Schema.Type.ENUM; -import static org.apache.avro.Schema.Type.FIXED; -import static org.apache.avro.Schema.Type.MAP; -import static org.apache.avro.Schema.Type.RECORD; -import static org.apache.avro.Schema.Type.UNION; - -/** - * This visitor creates clone of the visited Schemata, minus the specified - * schema properties, and resolves all unresolved schemas. - */ -public final class ResolvingVisitor implements SchemaVisitor { - private static final Set CONTAINER_SCHEMA_TYPES = EnumSet.of(RECORD, ARRAY, MAP, UNION); - private static final Set NAMED_SCHEMA_TYPES = EnumSet.of(RECORD, ENUM, FIXED); - - private final Function symbolTable; - private final Set schemaPropertiesToRemove; - private final IdentityHashMap replace; - - private final Schema root; - - public ResolvingVisitor(final Schema root, final Function symbolTable, - String... schemaPropertiesToRemove) { - this(root, symbolTable, new HashSet<>(Arrays.asList(schemaPropertiesToRemove))); - } - - public ResolvingVisitor(final Schema root, final Function symbolTable, - Set schemaPropertiesToRemove) { - this.replace = new IdentityHashMap<>(); - this.symbolTable = symbolTable; - this.schemaPropertiesToRemove = schemaPropertiesToRemove; - - this.root = root; - } - - public ResolvingVisitor withRoot(Schema root) { - return new ResolvingVisitor(root, symbolTable, schemaPropertiesToRemove); - } - - @Override - public SchemaVisitorAction visitTerminal(final Schema terminal) { - Schema.Type type = terminal.getType(); - Schema newSchema; - if (CONTAINER_SCHEMA_TYPES.contains(type)) { - if (!replace.containsKey(terminal)) { - throw new IllegalStateException("Schema " + terminal + " must be already processed"); - } - return SchemaVisitorAction.CONTINUE; - } else if (type == ENUM) { - newSchema = Schema.createEnum(terminal.getName(), terminal.getDoc(), terminal.getNamespace(), - terminal.getEnumSymbols(), terminal.getEnumDefault()); - } else if (type == FIXED) { - newSchema = Schema.createFixed(terminal.getName(), terminal.getDoc(), terminal.getNamespace(), - terminal.getFixedSize()); - } else { - newSchema = Schema.create(type); - } - copyProperties(terminal, newSchema); - replace.put(terminal, newSchema); - return SchemaVisitorAction.CONTINUE; - } - - public void copyProperties(final Schema first, final Schema second) { - // Logical type - Optional.ofNullable(first.getLogicalType()).ifPresent(logicalType -> logicalType.addToSchema(second)); - - // Aliases (if applicable) - if (NAMED_SCHEMA_TYPES.contains(first.getType())) { - first.getAliases().forEach(second::addAlias); - } - - // Other properties - first.getObjectProps().forEach((name, value) -> { - if (!schemaPropertiesToRemove.contains(name)) { - second.addProp(name, value); - } - }); - } - - @Override - public SchemaVisitorAction visitNonTerminal(final Schema nt) { - Schema.Type type = nt.getType(); - if (type == RECORD) { - if (SchemaResolver.isUnresolvedSchema(nt)) { - // unresolved schema will get a replacement that we already encountered, - // or we will attempt to resolve. - final String unresolvedSchemaName = SchemaResolver.getUnresolvedSchemaName(nt); - Schema resSchema = symbolTable.apply(unresolvedSchemaName); - if (resSchema == null) { - throw new AvroTypeException("Unable to resolve " + unresolvedSchemaName); - } - Schema replacement = replace.computeIfAbsent(resSchema, schema -> { - Schemas.visit(schema, this); - return replace.get(schema); - }); - replace.put(nt, replacement); - } else { - // create a fieldless clone. Fields will be added in afterVisitNonTerminal. - Schema newSchema = Schema.createRecord(nt.getName(), nt.getDoc(), nt.getNamespace(), nt.isError()); - copyProperties(nt, newSchema); - replace.put(nt, newSchema); - } - } - return SchemaVisitorAction.CONTINUE; - } - - @Override - public SchemaVisitorAction afterVisitNonTerminal(final Schema nt) { - Schema.Type type = nt.getType(); - Schema newSchema; - switch (type) { - case RECORD: - if (!SchemaResolver.isUnresolvedSchema(nt)) { - newSchema = replace.get(nt); - // Check if we've already handled the replacement schema with a - // reentrant call to visit(...) from within the visitor. - if (!newSchema.hasFields()) { - List fields = nt.getFields(); - List newFields = new ArrayList<>(fields.size()); - for (Schema.Field field : fields) { - newFields.add(new Field(field, replace.get(field.schema()))); - } - newSchema.setFields(newFields); - } - } - return SchemaVisitorAction.CONTINUE; - case UNION: - List types = nt.getTypes(); - List newTypes = new ArrayList<>(types.size()); - for (Schema sch : types) { - newTypes.add(replace.get(sch)); - } - newSchema = Schema.createUnion(newTypes); - break; - case ARRAY: - newSchema = Schema.createArray(replace.get(nt.getElementType())); - break; - case MAP: - newSchema = Schema.createMap(replace.get(nt.getValueType())); - break; - default: - throw new IllegalStateException("Illegal type " + type + ", schema " + nt); - } - copyProperties(nt, newSchema); - replace.put(nt, newSchema); - return SchemaVisitorAction.CONTINUE; - } - - @Override - public Schema get() { - return replace.get(root); - } - - @Override - public String toString() { - return "ResolvingVisitor{symbolTable=" + symbolTable + ", schemaPropertiesToRemove=" + schemaPropertiesToRemove - + ", replace=" + replace + '}'; - } -} diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaResolver.java b/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaResolver.java deleted file mode 100644 index 8c9a9c15b99..00000000000 --- a/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaResolver.java +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.avro.idl; - -import org.apache.avro.JsonProperties; -import org.apache.avro.Protocol; -import org.apache.avro.Schema; - -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.Function; -import java.util.stream.Collectors; - -/** - * Utility class to resolve schemas that are unavailable at the point they are - * referenced in the IDL. - */ -final class SchemaResolver { - - private SchemaResolver() { - } - - private static final String UR_SCHEMA_ATTR = "org.apache.avro.idl.unresolved.name"; - - private static final String UR_SCHEMA_NAME = "UnresolvedSchema"; - - private static final String UR_SCHEMA_NS = "org.apache.avro.compiler"; - - private static final AtomicInteger COUNTER = new AtomicInteger(); - - /** - * Create a schema to represent an "unresolved" schema. (used to represent a - * schema whose definition does not exist, yet). - * - * @param name a schema name - * @return an unresolved schema for the given name - */ - static Schema unresolvedSchema(final String name) { - Schema schema = Schema.createRecord(UR_SCHEMA_NAME + '_' + COUNTER.getAndIncrement(), "unresolved schema", - UR_SCHEMA_NS, false, Collections.emptyList()); - schema.addProp(UR_SCHEMA_ATTR, name); - return schema; - } - - /** - * Is this an unresolved schema. - * - * @param schema a schema - * @return whether the schema is an unresolved schema - */ - static boolean isUnresolvedSchema(final Schema schema) { - return (schema.getType() == Schema.Type.RECORD && schema.getProp(UR_SCHEMA_ATTR) != null && schema.getName() != null - && schema.getName().startsWith(UR_SCHEMA_NAME) && UR_SCHEMA_NS.equals(schema.getNamespace())); - } - - /** - * Get the unresolved schema name. - * - * @param schema an unresolved schema - * @return the name of the unresolved schema - */ - static String getUnresolvedSchemaName(final Schema schema) { - if (!isUnresolvedSchema(schema)) { - throw new IllegalArgumentException("Not a unresolved schema: " + schema); - } - return schema.getProp(UR_SCHEMA_ATTR); - } - - /** - * Is this an unresolved schema? - */ - static boolean isFullyResolvedSchema(final Schema schema) { - if (isUnresolvedSchema(schema)) { - return false; - } else { - return Schemas.visit(schema, new IsResolvedSchemaVisitor()); - } - } - - /** - * Clone all provided schemas while resolving all unreferenced schemas. - * - * @param idlFile a parsed IDL file - * @return a copy of idlFile with all schemas resolved - */ - static IdlFile resolve(final IdlFile idlFile, String... schemaPropertiesToRemove) { - if (idlFile.getProtocol() != null) { - return new IdlFile(resolve(idlFile.getProtocol(), schemaPropertiesToRemove), idlFile.getWarnings()); - } - - ResolvingVisitor visitor = new ResolvingVisitor(null, idlFile::getNamedSchema, schemaPropertiesToRemove); - Function resolver = schema -> Schemas.visit(schema, visitor.withRoot(schema)); - - List namedSchemata = idlFile.getNamedSchemas().values().stream().map(resolver).collect(Collectors.toList()); - Schema mainSchema = Optional.ofNullable(idlFile.getMainSchema()).map(resolver).orElse(null); - return new IdlFile(idlFile.getNamespace(), mainSchema, namedSchemata, idlFile.getWarnings()); - } - - /** - * Will clone the provided protocol while resolving all unreferenced schemas - * - * @param protocol a parsed protocol - * @return a copy of the protocol with all schemas resolved - */ - static Protocol resolve(final Protocol protocol, String... schemaPropertiesToRemove) { - // Create an empty copy of the protocol - Protocol result = new Protocol(protocol.getName(), protocol.getDoc(), protocol.getNamespace()); - protocol.getObjectProps().forEach(((JsonProperties) result)::addProp); - - ResolvingVisitor visitor = new ResolvingVisitor(null, protocol::getType, schemaPropertiesToRemove); - Function resolver = schema -> Schemas.visit(schema, visitor.withRoot(schema)); - - // Resolve all schemata in the protocol. - result.setTypes(protocol.getTypes().stream().map(resolver).collect(Collectors.toList())); - Map resultMessages = result.getMessages(); - protocol.getMessages().forEach((name, oldValue) -> { - Protocol.Message newValue; - if (oldValue.isOneWay()) { - newValue = result.createMessage(oldValue.getName(), oldValue.getDoc(), oldValue, - resolver.apply(oldValue.getRequest())); - } else { - Schema request = resolver.apply(oldValue.getRequest()); - Schema response = resolver.apply(oldValue.getResponse()); - Schema errors = resolver.apply(oldValue.getErrors()); - newValue = result.createMessage(oldValue.getName(), oldValue.getDoc(), oldValue, request, response, errors); - } - resultMessages.put(name, newValue); - }); - return result; - } -} diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaVisitorAction.java b/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaVisitorAction.java deleted file mode 100644 index 6aed09b3d32..00000000000 --- a/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaVisitorAction.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.avro.idl; - -public enum SchemaVisitorAction { - - /** - * continue visit. - */ - CONTINUE, - /** - * terminate visit. - */ - TERMINATE, - /** - * when returned from pre non terminal visit method the children of the non - * terminal are skipped. afterVisitNonTerminal for the current schema will not - * be invoked. - */ - SKIP_SUBTREE, - /** - * Skip visiting the siblings of this schema. - */ - SKIP_SIBLINGS -} diff --git a/lang/java/idl/src/test/java/org/apache/avro/idl/IdlReaderTest.java b/lang/java/idl/src/test/java/org/apache/avro/idl/IdlReaderTest.java index 8e9f187f4ce..9b503bcb2a5 100644 --- a/lang/java/idl/src/test/java/org/apache/avro/idl/IdlReaderTest.java +++ b/lang/java/idl/src/test/java/org/apache/avro/idl/IdlReaderTest.java @@ -97,7 +97,6 @@ public void validateProtocolParsingResult() throws IOException { assertEquals(1, idlFile.getNamedSchemas().size()); idlFile.getNamedSchemas().keySet().forEach(System.out::println); assertNotNull(idlFile.getNamedSchema("communication.Message")); - assertNotNull(idlFile.getNamedSchema("Message")); assertNotNull(idlFile.getProtocol()); assertNull(idlFile.getMainSchema()); @@ -111,12 +110,11 @@ public void validateSchemaParsingResult() throws IOException { assertEquals(1, idlFile.getNamedSchemas().size()); idlFile.getNamedSchemas().keySet().forEach(System.out::println); assertNotNull(idlFile.getNamedSchema("communication.Message")); - assertNotNull(idlFile.getNamedSchema("Message")); assertNull(idlFile.getProtocol()); Schema mainSchema = idlFile.getMainSchema(); assertEquals(Schema.Type.ARRAY, mainSchema.getType()); - assertEquals(idlFile.getNamedSchema("Message"), mainSchema.getElementType()); + assertEquals(idlFile.getNamedSchema("communication.Message"), mainSchema.getElementType()); } @Test diff --git a/lang/java/idl/src/test/java/org/apache/avro/idl/TestCycle.java b/lang/java/idl/src/test/java/org/apache/avro/idl/TestCycle.java index 427de0957d6..a3c94ff025d 100644 --- a/lang/java/idl/src/test/java/org/apache/avro/idl/TestCycle.java +++ b/lang/java/idl/src/test/java/org/apache/avro/idl/TestCycle.java @@ -44,26 +44,27 @@ public class TestCycle { @Test public void testCycleGeneration() throws IOException, URISyntaxException { final ClassLoader cl = Thread.currentThread().getContextClassLoader(); - IdlFile idlFile = new IdlReader().parse(requireNonNull(cl.getResource("input/cycle.avdl")).toURI()); + IdlReader parser = new IdlReader(); + IdlFile idlFile = parser.resolve(parser.parse(requireNonNull(cl.getResource("input/cycle.avdl")).toURI())); String json = idlFile.outputString(); LOG.info(json); - GenericRecordBuilder rb2 = new GenericRecordBuilder(idlFile.getNamedSchema("SampleNode")); + GenericRecordBuilder rb2 = new GenericRecordBuilder(idlFile.getNamedSchema("org.apache.avro.gen.SampleNode")); rb2.set("count", 10); rb2.set("subNodes", Collections.EMPTY_LIST); GenericData.Record node = rb2.build(); - GenericRecordBuilder mb = new GenericRecordBuilder(idlFile.getNamedSchema("Method")); + GenericRecordBuilder mb = new GenericRecordBuilder(idlFile.getNamedSchema("org.apache.avro.gen.Method")); mb.set("declaringClass", "Test"); mb.set("methodName", "test"); GenericData.Record method = mb.build(); - GenericRecordBuilder spb = new GenericRecordBuilder(idlFile.getNamedSchema("SamplePair")); + GenericRecordBuilder spb = new GenericRecordBuilder(idlFile.getNamedSchema("org.apache.avro.gen.SamplePair")); spb.set("method", method); spb.set("node", node); GenericData.Record sp = spb.build(); - GenericRecordBuilder rb = new GenericRecordBuilder(idlFile.getNamedSchema("SampleNode")); + GenericRecordBuilder rb = new GenericRecordBuilder(idlFile.getNamedSchema("org.apache.avro.gen.SampleNode")); rb.set("count", 10); rb.set("subNodes", Collections.singletonList(sp)); GenericData.Record record = rb.build(); diff --git a/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java b/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java index d85b28effa3..ab35a6d837a 100644 --- a/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java +++ b/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java @@ -205,9 +205,9 @@ void record(TestInfo testInfo) throws Exception { @Test void invalidNameTolerance() { - new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse("{\"type\":\"record\",\"name\":\"1X\",\"fields\":[]}"); - new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse("{\"type\":\"record\",\"name\":\"X-\",\"fields\":[]}"); - new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse("{\"type\":\"record\",\"name\":\"X$\",\"fields\":[]}"); + new Schema.Parser(NameValidator.NO_VALIDATION).parse("{\"type\":\"record\",\"name\":\"1X\",\"fields\":[]}"); + new Schema.Parser(NameValidator.NO_VALIDATION).parse("{\"type\":\"record\",\"name\":\"X-\",\"fields\":[]}"); + new Schema.Parser(NameValidator.NO_VALIDATION).parse("{\"type\":\"record\",\"name\":\"X$\",\"fields\":[]}"); } @Test diff --git a/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLMojo.java b/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLMojo.java index 15f6a6c0c4e..d6db625d1de 100644 --- a/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLMojo.java +++ b/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLMojo.java @@ -18,21 +18,20 @@ package org.apache.avro.mojo; -import java.io.File; -import java.io.IOException; -import java.net.URL; -import java.net.URLClassLoader; -import java.util.ArrayList; -import java.util.List; - import org.apache.avro.Protocol; import org.apache.avro.compiler.specific.SpecificCompiler; import org.apache.avro.generic.GenericData; - import org.apache.avro.idl.IdlFile; import org.apache.avro.idl.IdlReader; import org.apache.maven.artifact.DependencyResolutionRequiredException; +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.net.URLClassLoader; +import java.util.ArrayList; +import java.util.List; + /** * Generate Java classes and interfaces from AvroIDL files (.avdl) * @@ -89,6 +88,7 @@ protected void doCompile(String filename, File sourceDirectory, File outputDirec for (String warning : idlFile.getWarnings()) { getLog().warn(warning); } + idlFile = parser.resolve(idlFile); final SpecificCompiler compiler; final Protocol protocol = idlFile.getProtocol(); if (protocol != null) { diff --git a/lang/java/tools/src/main/java/org/apache/avro/tool/IdlTool.java b/lang/java/tools/src/main/java/org/apache/avro/tool/IdlTool.java index d20226b8e77..a0acd520199 100644 --- a/lang/java/tools/src/main/java/org/apache/avro/tool/IdlTool.java +++ b/lang/java/tools/src/main/java/org/apache/avro/tool/IdlTool.java @@ -70,6 +70,7 @@ public int run(InputStream in, PrintStream out, PrintStream err, List ar for (String warning : idlFile.getWarnings()) { err.println("Warning: " + warning); } + idlFile = parser.resolve(idlFile); p = idlFile.getProtocol(); m = idlFile.getMainSchema(); }