forked from apache/avro
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
AVRO-3666: [JAVA] Separate parsing from Schema class (apache#2513)
This allows using pluggable parser implementations, allowing multiple formats to be parsed with the same code. This includes the use of NameValidator and parsing multiple files with circular references between them.
- Loading branch information
Showing
42 changed files
with
2,088 additions
and
694 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
77 changes: 77 additions & 0 deletions
77
lang/java/avro/src/main/java/org/apache/avro/FormattedSchemaParser.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.avro; | ||
|
||
import java.io.IOException; | ||
import java.net.URI; | ||
|
||
/** | ||
* Schema parser for a specific schema format. | ||
* | ||
* <p> | ||
* The {@link SchemaParser} class uses this interface, supporting text based | ||
* schema sources. | ||
* </p> | ||
* | ||
* <p> | ||
* Implementations are located using a {@link java.util.ServiceLoader} and must | ||
* therefore be threadsafe. See the {@code ServiceLoader} class for details on | ||
* loading your implementation. | ||
* </p> | ||
* | ||
* @see java.util.ServiceLoader | ||
*/ | ||
public interface FormattedSchemaParser { | ||
/** | ||
* <p> | ||
* Parse schema definitions from a text based source. | ||
* </p> | ||
* | ||
* <h2>Notes for implementers:</h2> | ||
* | ||
* <ul> | ||
* <li>Schema definitions are expected not to be in the format the parser | ||
* expects. So when the input clearly doesn't make sense (e.g., reading "/**" | ||
* when expecting JSON), it is a good idea not to do anything (especially | ||
* calling methods on the @code ParseContext}).</li> | ||
* <li>The parameter {@code parseContext} is not thread-safe.</li> | ||
* <li>When parsing, all parsed schema definitions should be added to the | ||
* provided {@link ParseContext}.</li> | ||
* <li>Optionally, you may return a "main" schema. Some schema definitions have | ||
* one, for example the schema defined by the root of the JSON document in a | ||
* <a href="https://avro.apache.org/docs/current/specification/">standard schema | ||
* definition</a>. If unsure, return {@code null}.</li> | ||
* <li>If parsing fails, throw a {@link SchemaParseException}. This will let the | ||
* parsing process recover and continue.</li> | ||
* <li>Throwing anything other than a {@code SchemaParseException} will abort | ||
* the parsing process, so reserve that for rethrowing exceptions.</li> | ||
* </ul> | ||
* | ||
* @param parseContext the current parse context: all parsed schemata should | ||
* be added here to resolve names with; contains all | ||
* previously known types | ||
* @param baseUri the base location of the schema, or {@code null} if | ||
* not known | ||
* @param formattedSchema the text of the schema definition(s) to parse | ||
* @return the main schema, if any | ||
* @throws IOException when the schema cannot be read | ||
* @throws SchemaParseException when the schema cannot be parsed | ||
*/ | ||
Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema) | ||
throws IOException, SchemaParseException; | ||
} |
82 changes: 82 additions & 0 deletions
82
lang/java/avro/src/main/java/org/apache/avro/JsonSchemaParser.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.avro; | ||
|
||
import java.io.IOException; | ||
import java.net.URI; | ||
|
||
/** | ||
* Schema parser for JSON formatted schemata. This initial implementation simply | ||
* delegates to the {@link Schema.Parser} class, though it should be refactored | ||
* out of there. | ||
* | ||
* <p> | ||
* Note: this class is intentionally not available via the Java | ||
* {@link java.util.ServiceLoader}, as its use is hardcoded as fallback when no | ||
* service exists. This enables users to reliably override the standard JSON | ||
* parser as well. | ||
* </p> | ||
*/ | ||
public class JsonSchemaParser implements FormattedSchemaParser { | ||
/** | ||
* <p> | ||
* Parse a schema written in the internal (JSON) format without any validations. | ||
* </p> | ||
* | ||
* <p> | ||
* Using this method is only safe if used to parse a write schema (i.e., a | ||
* schema used to read Avro data). Other usages, for example by generated Avro | ||
* code, can cause interoperability problems. | ||
* </p> | ||
* | ||
* <p> | ||
* Use with care and sufficient testing! | ||
* </p> | ||
* | ||
* @param fragments one or more strings making up the schema (some schemata | ||
* exceed the compiler limits) | ||
* @return the parsed schema | ||
*/ | ||
public static Schema parseInternal(String... fragments) { | ||
StringBuilder buffer = new StringBuilder(); | ||
for (String fragment : fragments) { | ||
buffer.append(fragment); | ||
} | ||
return new JsonSchemaParser().parse(new ParseContext(NameValidator.NO_VALIDATION), buffer, null); | ||
} | ||
|
||
@Override | ||
public Schema parse(ParseContext parseContext, URI baseUri, CharSequence formattedSchema) | ||
throws IOException, SchemaParseException { | ||
return parse(parseContext, formattedSchema, parseContext.nameValidator); | ||
} | ||
|
||
private Schema parse(ParseContext parseContext, CharSequence formattedSchema, NameValidator nameValidator) | ||
throws SchemaParseException { | ||
Schema.Parser parser = new Schema.Parser(nameValidator); | ||
if (nameValidator == NameValidator.NO_VALIDATION) { | ||
parser.setValidateDefaults(false); | ||
} else { | ||
parser = new Schema.Parser(nameValidator); | ||
} | ||
parser.addTypes(parseContext.typesByName().values()); | ||
Schema schema = parser.parse(formattedSchema.toString()); | ||
parser.getTypes().values().forEach(parseContext::put); | ||
return schema; | ||
} | ||
} |
104 changes: 104 additions & 0 deletions
104
lang/java/avro/src/main/java/org/apache/avro/NameValidator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.avro; | ||
|
||
public interface NameValidator { | ||
|
||
class Result { | ||
private final String errors; | ||
|
||
public Result(final String errors) { | ||
this.errors = errors; | ||
} | ||
|
||
public boolean isOK() { | ||
return this == NameValidator.OK; | ||
} | ||
|
||
public String getErrors() { | ||
return errors; | ||
} | ||
} | ||
|
||
Result OK = new Result(null); | ||
|
||
default Result validate(String name) { | ||
return OK; | ||
} | ||
|
||
NameValidator NO_VALIDATION = new NameValidator() { | ||
}; | ||
|
||
NameValidator UTF_VALIDATOR = new NameValidator() { | ||
@Override | ||
public Result validate(final String name) { | ||
if (name == null) { | ||
return new Result("Null name"); | ||
} | ||
int length = name.length(); | ||
if (length == 0) { | ||
return new Result("Empty name"); | ||
} | ||
char first = name.charAt(0); | ||
if (!(Character.isLetter(first) || first == '_')) { | ||
return new Result("Illegal initial character: " + name); | ||
} | ||
for (int i = 1; i < length; i++) { | ||
char c = name.charAt(i); | ||
if (!(Character.isLetterOrDigit(c) || c == '_')) { | ||
return new Result("Illegal character in: " + name); | ||
} | ||
} | ||
return OK; | ||
} | ||
}; | ||
|
||
NameValidator STRICT_VALIDATOR = new NameValidator() { | ||
@Override | ||
public Result validate(final String name) { | ||
if (name == null) { | ||
return new Result("Null name"); | ||
} | ||
int length = name.length(); | ||
if (length == 0) { | ||
return new Result("Empty name"); | ||
} | ||
char first = name.charAt(0); | ||
if (!(isLetter(first) || first == '_')) { | ||
return new Result("Illegal initial character: " + name); | ||
} | ||
for (int i = 1; i < length; i++) { | ||
char c = name.charAt(i); | ||
if (!(isLetter(c) || isDigit(c) || c == '_')) { | ||
return new Result("Illegal character in: " + name); | ||
} | ||
} | ||
return OK; | ||
} | ||
|
||
private boolean isLetter(char c) { | ||
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); | ||
} | ||
|
||
private boolean isDigit(char c) { | ||
return c >= '0' && c <= '9'; | ||
} | ||
|
||
}; | ||
|
||
} |
Oops, something went wrong.