AVRO-3666: Refactor for recent changes

Includes the use of NameValidator and parsing multiple files with circular references between them.
apache · Nov 9, 2023 · e92eac8 · e92eac8
1 parent d9e1754
commit e92eac8
Show file tree

Hide file tree

Showing 32 changed files with 1,183 additions and 739 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -19,6 +19,8 @@ root = true
 charset = utf-8
 end_of_line = lf
 insert_final_newline = true
+ij_any_block_comment_at_first_column = false
+ij_any_line_comment_at_first_column = false
 
 [*.{java,xml,sh}]
 indent_style = space

diff --git a/lang/java/avro/src/main/java/org/apache/avro/FormattedSchemaParser.java b/lang/java/avro/src/main/java/org/apache/avro/FormattedSchemaParser.java
@@ -19,7 +19,6 @@
 
 import java.io.IOException;
 import java.net.URI;
-import java.util.Collection;
 
 /**
  * Schema parser for a specific schema format.
@@ -29,46 +28,50 @@
  * schema sources.
  * </p>
  *
- * <h2>Note to implementers:</h2>
- *
- * <p>
- * Implementations are located using a {@link java.util.ServiceLoader}. See that
- * class for details.
- * </p>
- *
- * <p>
- * You can expect that schemas being read are invalid, so you are encouraged to
- * return {@code null} upon parsing failure where the input clearly doesn't make
- * sense (e.g., reading "/**" when expecting JSON). If the input is likely in
- * the correct format, but invalid, throw a {@link SchemaParseException}
- * instead.
- * </p>
- *
  * <p>
- * Note that throwing anything other than a {@code SchemaParseException} will
- * abort the parsing process, so reserve that for rethrowing exceptions.
+ * Implementations are located using a {@link java.util.ServiceLoader} and must
+ * therefore be threadsafe. See the {@code ServiceLoader} class for details on
+ * loading your implementation.
  * </p>
  *
  * @see java.util.ServiceLoader
  */
 public interface FormattedSchemaParser {
   /**
-   * Parse a schema from a text based source. Can use the base location of the
-   * schema (e.g., the directory where the schema file lives) if available.
-   *
    * <p>
-   * Implementations should add all named schemas they parse to the collection.
+   * Parse schema definitions from a text based source.
    * </p>
    *
-   * @param types           a mutable collection of known types; parsed named
-   *                        schemata will be added
+   * <h2>Notes for implementers:</h2>
+   *
+   * <ul>
+   * <li>Schema definitions are expected not to be in the format the parser
+   * expects. So when the input clearly doesn't make sense (e.g., reading "/**"
+   * when expecting JSON), it is a good idea not to do anything (especially
+   * calling methods on the @code ParseContext}).</li>
+   * <li>The parameter {@code parseContext} is not thread-safe.</li>
+   * <li>When parsing, all parsed schema definitions should be added to the
+   * provided {@link ParseContext}.</li>
+   * <li>Optionally, you may return a "main" schema. Some schema definitions have
+   * one, for example the schema defined by the root of the JSON document in a
+   * <a href="https://avro.apache.org/docs/current/specification/">standard schema
+   * definition</a>. If unsure, return {@code null}.</li>
+   * <li>If parsing fails, throw a {@link SchemaParseException}. This will let the
+   * parsing process recover and continue.</li>
+   * <li>Throwing anything other than a {@code SchemaParseException} will abort
+   * the parsing process, so reserve that for rethrowing exceptions.</li>
+   * </ul>
+   *
+   * @param parseContext    the current parse context: all parsed schemata should
+   *                        be added here to resolve names with; contains all
+   *                        previously known types
    * @param baseUri         the base location of the schema, or {@code null} if
    *                        not known
-   * @param formattedSchema the schema as text
-   * @return the parsed schema, or {@code null} if the format is not supported
+   * @param formattedSchema the text of the schema definition(s) to parse
+   * @return the main schema, if any
    * @throws IOException          when the schema cannot be read
    * @throws SchemaParseException when the schema cannot be parsed
    */
-  Schema parse(Collection<Schema> types, URI baseUri, CharSequence formattedSchema)
+  Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema)
       throws IOException, SchemaParseException;
 }
diff --git a/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaParser.java b/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaParser.java
@@ -19,8 +19,6 @@
 
 import java.io.IOException;
 import java.net.URI;
-import java.util.ArrayList;
-import java.util.Collection;
 
 /**
  * Schema parser for JSON formatted schemata. This initial implementation simply
@@ -59,32 +57,26 @@ public static Schema parseInternal(String... fragments) {
     for (String fragment : fragments) {
       buffer.append(fragment);
     }
-    return new JsonSchemaParser().parse(new ArrayList<>(), buffer, true);
+    return new JsonSchemaParser().parse(new ParseContext(NameValidator.NO_VALIDATION), buffer, null);
   }
 
   @Override
-  public Schema parse(Collection<Schema> schemas, URI baseUri, CharSequence formattedSchema)
+  public Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema)
       throws IOException, SchemaParseException {
-    return parse(schemas, formattedSchema, false);
+    return parse(parseContext, formattedSchema, parseContext.nameValidator);
   }
 
-  private Schema parse(Collection<Schema> schemas, CharSequence formattedSchema, boolean skipValidation)
+  private Schema parse(ParseContext parseContext, CharSequence formattedSchema, NameValidator nameValidator)
       throws SchemaParseException {
-    // TODO: refactor JSON parsing out of the Schema class
-    Schema.Parser parser;
-    if (skipValidation) {
-      parser = new Schema.Parser(Schema.NameValidator.NO_VALIDATION);
+    Schema.Parser parser = new Schema.Parser(nameValidator);
+    if (nameValidator == NameValidator.NO_VALIDATION) {
       parser.setValidateDefaults(false);
     } else {
-      parser = new Schema.Parser();
-    }
-    if (schemas != null) {
-      parser.addTypes(schemas);
+      parser = new Schema.Parser(nameValidator);
     }
+    parser.addTypes(parseContext.typesByName().values());
     Schema schema = parser.parse(formattedSchema.toString());
-    if (schemas != null) {
-      schemas.addAll(parser.getTypes().values());
-    }
+    parser.getTypes().values().forEach(parseContext::put);
     return schema;
   }
 }
diff --git a/lang/java/avro/src/main/java/org/apache/avro/NameValidator.java b/lang/java/avro/src/main/java/org/apache/avro/NameValidator.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro;
+
+public interface NameValidator {
+
+  class Result {
+    private final String errors;
+
+    public Result(final String errors) {
+      this.errors = errors;
+    }
+
+    public boolean isOK() {
+      return this == NameValidator.OK;
+    }
+
+    public String getErrors() {
+      return errors;
+    }
+  }
+
+  Result OK = new Result(null);
+
+  default Result validate(String name) {
+    return OK;
+  }
+
+  NameValidator NO_VALIDATION = new NameValidator() {
+  };
+
+  NameValidator UTF_VALIDATOR = new NameValidator() {
+    @Override
+    public Result validate(final String name) {
+      if (name == null) {
+        return new Result("Null name");
+      }
+      int length = name.length();
+      if (length == 0) {
+        return new Result("Empty name");
+      }
+      char first = name.charAt(0);
+      if (!(Character.isLetter(first) || first == '_')) {
+        return new Result("Illegal initial character: " + name);
+      }
+      for (int i = 1; i < length; i++) {
+        char c = name.charAt(i);
+        if (!(Character.isLetterOrDigit(c) || c == '_')) {
+          return new Result("Illegal character in: " + name);
+        }
+      }
+      return OK;
+    }
+  };
+
+  NameValidator STRICT_VALIDATOR = new NameValidator() {
+    @Override
+    public Result validate(final String name) {
+      if (name == null) {
+        return new Result("Null name");
+      }
+      int length = name.length();
+      if (length == 0) {
+        return new Result("Empty name");
+      }
+      char first = name.charAt(0);
+      if (!(isLetter(first) || first == '_')) {
+        return new Result("Illegal initial character: " + name);
+      }
+      for (int i = 1; i < length; i++) {
+        char c = name.charAt(i);
+        if (!(isLetter(c) || isDigit(c) || c == '_')) {
+          return new Result("Illegal character in: " + name);
+        }
+      }
+      return OK;
+    }
+
+    private boolean isLetter(char c) {
+      return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+    }
+
+    private boolean isDigit(char c) {
+      return c >= '0' && c <= '9';
+    }
+
+  };
+
+}