apache · Fokko · Apr 4, 2024 · Dec 15, 2023 · Dec 18, 2023 · Dec 19, 2023
diff --git a/doc/content/en/docs/++version++/Getting started (Java)/_index.md b/doc/content/en/docs/++version++/Getting started (Java)/_index.md
@@ -212,7 +212,7 @@ Let's go over the same example as in the previous section, but without using cod
 First, we use a SchemaParser to read our schema definition and create a Schema object.
 
 ```java
-Schema schema = new SchemaParser().parse(new File("user.avsc"));
+Schema schema = JsonSchemaParser.parseInternal(new File("user.avsc"));
 ```
 
 Using this schema, let's create some users.

diff --git a/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaParser.java b/lang/java/avro/src/main/java/org/apache/avro/JsonSchemaParser.java
@@ -57,26 +57,34 @@
     for (String fragment : fragments) {
       buffer.append(fragment);
     }
-    return new JsonSchemaParser().parse(new ParseContext(NameValidator.NO_VALIDATION), buffer, null);
+
+    boolean saved = Schema.getValidateDefaults();
+    try {
+      ParseContext context = new ParseContext(NameValidator.NO_VALIDATION);
+
+      Schema schema = new JsonSchemaParser().parse(context, buffer, true);
+      context.commit();
+      Schema.setValidateDefaults(false);
+      context.resolveAllTypes();
+      return context.resolve(schema);
+    } finally {
+      // Unless explicitly disabled when needed, defaults should always be validated.
+      Schema.setValidateDefaults(saved);
+    }
   }
 
   @Override
   public Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema)
       throws IOException, SchemaParseException {
-    return parse(parseContext, formattedSchema, parseContext.nameValidator);
+    return parse(parseContext, formattedSchema, false);
   }
 
-  private Schema parse(ParseContext parseContext, CharSequence formattedSchema, NameValidator nameValidator)
+  private Schema parse(ParseContext parseContext, CharSequence formattedSchema, boolean allowInvalidDefaults)
       throws SchemaParseException {
-    Schema.Parser parser = new Schema.Parser(nameValidator);
-    if (nameValidator == NameValidator.NO_VALIDATION) {
+    Schema.Parser parser = new Schema.Parser(parseContext);
+    if (allowInvalidDefaults) {
       parser.setValidateDefaults(false);
-    } else {
-      parser = new Schema.Parser(nameValidator);
     }
-    parser.addTypes(parseContext.typesByName().values());
-    Schema schema = parser.parse(formattedSchema.toString());
-    parser.getTypes().values().forEach(parseContext::put);
-    return schema;
+    return parser.parse(formattedSchema.toString());
   }
 }
diff --git a/lang/java/avro/src/main/java/org/apache/avro/ParseContext.java b/lang/java/avro/src/main/java/org/apache/avro/ParseContext.java
@@ -18,25 +18,35 @@
 package org.apache.avro;
 
 import org.apache.avro.util.SchemaResolver;
+import org.apache.avro.util.Schemas;
 
+import java.util.ArrayList;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.function.Function;
+
+import static java.util.Objects.requireNonNull;
 
 /**
  * Class to define a name context, useful to reference schemata with. This
  * allows for the following:
  *
  * <ul>
- * <li>Provide a default namespace for nested contexts, as found for example in
- * JSON based schema definitions.</li>
  * <li>Find schemata by name, including primitives.</li>
+ * <li>Find schemas that do not exist yet. Use with {@link #resolveAllTypes()}
+ * to ensure resulting schemas are usable.</li>
  * <li>Collect new named schemata.</li>
  * </ul>
  *
  * <p>
+ * This class is NOT thread-safe.
+ * </p>
+ *
+ * <p>
  * Note: this class has no use for most Avro users, but is a key component when
  * implementing a schema parser.
  * </p>
@@ -64,6 +74,7 @@
   private final Map<String, Schema> newSchemas;
   // Visible for use in JsonSchemaParser
   final NameValidator nameValidator;
+  boolean isResolved;
 
   /**
    * Create a {@code ParseContext} for the default/{@code null} namespace, using
@@ -78,22 +89,14 @@
    * schemata.
    */
   public ParseContext(NameValidator nameValidator) {
-    this(nameValidator, new LinkedHashMap<>(), new LinkedHashMap<>());
+    this(requireNonNull(nameValidator), new LinkedHashMap<>(), new LinkedHashMap<>());
   }
 
   private ParseContext(NameValidator nameValidator, Map<String, Schema> oldSchemas, Map<String, Schema> newSchemas) {
     this.nameValidator = nameValidator;
     this.oldSchemas = oldSchemas;
     this.newSchemas = newSchemas;
-  }
-
-  /**
-   * Create a derived context using a different fallback namespace.
-   *
-   * @return a new context
-   */
-  public ParseContext namespace() {
-    return new ParseContext(nameValidator, oldSchemas, newSchemas);
+    isResolved = false;
   }
 
   /**
@@ -109,56 +112,64 @@
 
   /**
    * <p>
-   * Resolve a schema by name.
+   * Find a schema by name and namespace.
    * </p>
    *
    * <p>
    * That is:
    * </p>
    *
-   * <ul>
-   * <li>If {@code fullName} is a primitive name, return a (new) schema for
-   * it</li>
-   * <li>Otherwise: resolve the schema in its own namespace and in the null
-   * namespace (the former takes precedence)</li>
-   * </ul>
+   * <ol>
+   * <li>If {@code name} is a primitive name, return a (new) schema for it</li>
+   * <li>Otherwise, determine the full schema name (using the given
+   * {@code namespace} if necessary), and find it</li>
+   * <li>If no schema was found and {@code name} is a simple name, find the schema
+   * in the default (null) namespace</li>
+   * <li>If still no schema was found, return an unresolved reference for the full
+   * schema name (see step 2)</li>
+   * </ol>
    *
-   * Resolving means that the schema is returned if known, and otherwise an
-   * unresolved schema (a reference) is returned.
-   *
-   * @param fullName the full schema name to resolve
-   * @return the schema
-   * @throws SchemaParseException when the schema does not exist
+   * @param name      the schema name to find
+   * @param namespace the namespace to find the schema against
+   * @return the schema, or an unresolved reference
    */
-  public Schema resolve(String fullName) {
-    Schema.Type type = PRIMITIVES.get(fullName);
+  public Schema find(String name, String namespace) {
+    Schema.Type type = PRIMITIVES.get(name);
     if (type != null) {
       return Schema.create(type);
     }
 
-    Schema schema = getSchema(fullName);
+    String fullName = fullName(name, namespace);
+    Schema schema = getNamedSchema(fullName);
     if (schema == null) {
-      // Not found; attempt to resolve in the default namespace
-      int lastDot = fullName.lastIndexOf('.');
-      String name = fullName.substring(lastDot + 1);
-      schema = getSchema(name);
+      schema = getNamedSchema(name);
     }
 
     return schema != null ? schema : SchemaResolver.unresolvedSchema(fullName);
   }
 
-  private Schema getSchema(String fullName) {
+  private String fullName(String name, String namespace) {
+    if (namespace != null && name.lastIndexOf('.') < 0) {
+      return namespace + "." + name;
+    }
+    return name;
+  }
+
+  /**
+   * Get a schema by name. Note that the schema might not (yet) be resolved/usable
+   * until {@link #resolveAllTypes()} has been called.
+   *
+   * @param fullName a full schema name
+   * @return the schema, if known
+   */
+  public Schema getNamedSchema(String fullName) {
     Schema schema = oldSchemas.get(fullName);
     if (schema == null) {
       schema = newSchemas.get(fullName);
     }
     return schema;
   }
 
-  private boolean notEmpty(String str) {
-    return str != null && !str.isEmpty();
-  }
-
   /**
    * Put the schema into this context. This is an idempotent operation: it only
    * fails if this context already has a different schema with the same name.
@@ -184,6 +195,7 @@
         throw new SchemaParseException("Can't redefine: " + fullName);
       }
     } else {
+      isResolved = false;
       Schema previouslyAddedSchema = newSchemas.putIfAbsent(fullName, schema);
       if (previouslyAddedSchema != null && !previouslyAddedSchema.equals(schema)) {
         throw new SchemaParseException("Can't redefine: " + fullName);
@@ -200,10 +212,10 @@
     return fullName;
   }
 
-  private void validateName(String name, String what) {
+  private void validateName(String name, String typeOfName) {
     NameValidator.Result result = nameValidator.validate(name);
     if (!result.isOK()) {
-      throw new SchemaParseException(what + " \"" + name + "\" is invalid: " + result.getErrors());
+      throw new SchemaParseException(typeOfName + " \"" + name + "\" is invalid: " + result.getErrors());
     }
   }
 
@@ -220,6 +232,77 @@
     newSchemas.clear();
   }
 
+  /**
+   * Resolve all (named) schemas that were parsed. This resolves all forward
+   * references, even if parsed from different files.
+   *
+   * @return all parsed schemas, in the order they were parsed
+   * @throws AvroTypeException if a reference cannot be resolved
+   */
+  public List<Schema> resolveAllTypes() {
+    if (hasNewSchemas()) {
+      throw new IllegalStateException("Types cannot be resolved unless the ParseContext is committed.");
+    }
+
+    if (!isResolved) {
+      NameValidator saved = Schema.getNameValidator();
+      try {
+        Schema.setNameValidator(nameValidator); // Ensure we use the same validation.
+        HashMap<String, Schema> result = new LinkedHashMap<>(oldSchemas);
+        SchemaResolver.ResolvingVisitor visitor = new SchemaResolver.ResolvingVisitor(null, result::get, false);
+        Function<Schema, Schema> resolver = schema -> Schemas.visit(schema, visitor.withRoot(schema));
+        for (Map.Entry<String, Schema> entry : result.entrySet()) {
+          entry.setValue(resolver.apply(entry.getValue()));
+        }
+        oldSchemas.putAll(result);
+        isResolved = true;
+      } finally {
+        Schema.setNameValidator(saved);
+      }
+    }
+
+    return new ArrayList<>(oldSchemas.values());
+  }
+
+  /**
+   * Try to resolve unresolved references in a schema using the types known to
+   * this context. It is advisable to call {@link #resolveAllTypes()} first if you
+   * want the returned types to be stable.
+   *
+   * @param schema the schema resolve
+   * @return the fully resolved schema if possible, {@code null} otherwise
+   */
+  public Schema tryResolve(Schema schema) {
+    if (schema == null) {
+      return null;
+    }
+    return resolve(schema, true);
+  }
+
+  /**
+   * Resolve unresolved references in a schema using the types known to this
+   * context. It is advisable to call {@link #resolveAllTypes()} first if you want
+   * the returned types to be stable.
+   *
+   * @param schema the schema resolve
+   * @return the fully resolved schema
+   * @throws AvroTypeException if the schema cannot be resolved
+   */
+  public Schema resolve(Schema schema) {
+    return resolve(schema, false);
+  }
+
+  public Schema resolve(Schema schema, boolean returnNullUponFailure) {
+    NameValidator saved = Schema.getNameValidator();
+    try {
+      Schema.setNameValidator(nameValidator); // Ensure we use the same validation.
+      return Schemas.visit(schema,
+          new SchemaResolver.ResolvingVisitor(schema, this::getNamedSchema, returnNullUponFailure));
+    } finally {
+      Schema.setNameValidator(saved);
+    }
+  }
+
   /**
    * Return all known types by their fullname.
    *