diff --git a/src/main/java/opwvhk/avro/SchemaManipulator.java b/src/main/java/opwvhk/avro/SchemaManipulator.java
index c19b6ad..f462bc6 100644
--- a/src/main/java/opwvhk/avro/SchemaManipulator.java
+++ b/src/main/java/opwvhk/avro/SchemaManipulator.java
@@ -1,5 +1,12 @@
package opwvhk.avro;
+import net.jimblackler.jsonschemafriend.GenerationException;
+import opwvhk.avro.json.SchemaAnalyzer;
+import opwvhk.avro.util.AvroSchemaUtils;
+import opwvhk.avro.util.NamingConvention;
+import opwvhk.avro.xml.XsdAnalyzer;
+import org.apache.avro.Schema;
+
import java.io.IOException;
import java.io.InputStream;
import java.net.URISyntaxException;
@@ -10,12 +17,7 @@
import java.util.List;
import java.util.Objects;
import java.util.Set;
-
-import net.jimblackler.jsonschemafriend.GenerationException;
-import opwvhk.avro.json.SchemaAnalyzer;
-import opwvhk.avro.util.AvroSchemaUtils;
-import opwvhk.avro.xml.XsdAnalyzer;
-import org.apache.avro.Schema;
+import java.util.stream.Stream;
import static java.util.Objects.requireNonNull;
@@ -29,11 +31,18 @@ public class SchemaManipulator {
private boolean renameWithAliases;
private StringBuilder markdownBuffer;
private List schemaRenamerList;
+ private SchemaRenamer schemaNamingConvention;
private List fieldRenamerList;
+ private FieldRenamer fieldNamingConvention;
private List unwrapTests;
- private SchemaManipulator(Schema initialSchema) {
- reset(initialSchema);
+ /**
+ * Create a schema manipulator for a given schema.
+ *
+ * @param schema the schema to manipulate
+ */
+ public SchemaManipulator(Schema schema) {
+ reset(schema);
}
private void reset(Schema initialSchema) {
@@ -42,7 +51,9 @@ private void reset(Schema initialSchema) {
renameWithAliases = true;
markdownBuffer = null;
schemaRenamerList = new ArrayList<>();
+ schemaNamingConvention = (pathToField, fieldSchema) -> null;
fieldRenamerList = new ArrayList<>();
+ fieldNamingConvention = (pathToField, schemaWithField, field) -> null;
unwrapTests = new ArrayList<>();
}
@@ -71,7 +82,8 @@ public static SchemaManipulator startFromAvro(URL schemaLocation) throws IOExcep
}
/**
- * Create a schema manipulator from an XML Schema Definition (XSD). The location of the main {@code .xsd} file is provided, both to provide the XSD content,
+ * Create a schema manipulator from an XML Schema Definition (XSD). The location of the main {@code .xsd} file is provided, both to provide the XSD
+ * content,
* as to provide a way to locate imported/included {@code .xsd} files.
*
* @param schemaLocation the location of the main {@code .xsd} file (it may include/import other {@code .xsd} files)
@@ -210,7 +222,7 @@ private Schema applySchemaChanges(IdentityHashMap changedSchemas
}
private String newSchemaName(String path, Schema schema) {
- return schemaRenamerList.stream()
+ return Stream.concat(schemaRenamerList.stream(), Stream.of(schemaNamingConvention))
.map(renamer -> renamer.newSchemaName(path, schema))
.filter(Objects::nonNull)
.findAny()
@@ -218,7 +230,7 @@ private String newSchemaName(String path, Schema schema) {
}
private String newFieldName(String path, Schema schemaWithField, Schema.Field field) {
- return fieldRenamerList.stream()
+ return Stream.concat(fieldRenamerList.stream(), Stream.of(fieldNamingConvention))
.map(renamer -> renamer.newFieldName(path, schemaWithField, field))
.filter(Objects::nonNull)
.findAny()
@@ -297,6 +309,35 @@ public SchemaManipulator renameSchemaAtPath(String newSchemaName, String... path
return this;
}
+ /**
+ * Use the specified naming convention for schemas. This naming convention applies for all schemas that have not been explicitly renamed using
+ * {@link #renameSchema(String, String)} or {@link #renameSchemaAtPath(String, String...)}, and leaves the namespace name intact.
+ *
+ * @param schemaNamingConvention the naming convention to use
+ * @return this {@code SchemaManipulator}
+ */
+ public SchemaManipulator useSchemaNamingConvention(NamingConvention schemaNamingConvention) {
+ return useSchemaNamingConvention(NamingConvention.NULL, schemaNamingConvention);
+ }
+
+ /**
+ * Use the specified naming conventions for schemas. These naming conventions apply for all schemas that have not been explicitly renamed using
+ * {@link #renameSchema(String, String)} or {@link #renameSchemaAtPath(String, String...)}.
+ *
+ * @param namespaceNamingConvention the naming convention to use for the schema namespace
+ * @param schemaNamingConvention the naming convention to use for the schema (simple) name
+ * @return this {@code SchemaManipulator}
+ */
+ public SchemaManipulator useSchemaNamingConvention(NamingConvention namespaceNamingConvention, NamingConvention schemaNamingConvention) {
+ this.schemaNamingConvention = (path, schema) -> {
+ String namespace = schema.getNamespace();
+ String prefix = namespace == null ? "" : namespaceNamingConvention.convert(namespace) + ".";
+ String newFullName = prefix + schemaNamingConvention.convert(schema.getName());
+ return schema.getFullName().equals(newFullName) ? null : newFullName;
+ };
+ return this;
+ }
+
/**
* Rename the specified field in the (named) schema.
*
@@ -330,6 +371,22 @@ public SchemaManipulator renameFieldAtPath(String newFieldName, String... pathTo
return this;
}
+ /**
+ * Use the specified naming conventions for fields. This naming convention applies for all fields that have not been explicitly renamed using
+ * * {@link #renameField(String, String, String)} or {@link #renameFieldAtPath(String, String...)}.
+ *
+ * @param namingConvention the naming convention to use for the schema (simple) name
+ * @return this {@code SchemaManipulator}
+ */
+ public SchemaManipulator useFieldNamingConvention(NamingConvention namingConvention) {
+ this.fieldNamingConvention = (pathToField, schemaWithField, field) -> {
+ String oldName = field.name();
+ String newName = namingConvention.convert(oldName);
+ return oldName.equals(newName) ? null : newName;
+ };
+ return this;
+ }
+
/**
* Unwrap all arrays whose field names (except up to the last {@code ignoredMaxSuffixLength} characters) are equal.
*
@@ -384,7 +441,8 @@ public SchemaManipulator unwrapArray(String schemaName, String wrappingField) {
* Unwrap the array whose wrapping field is at the specified path.
*
* Wrapped arrays are an XML construct. They result in array fields without siblings in a record field (optionally in a union with null). In Avro,
- * Parquet, and in fact most/all other formats, they are both not needed and unwanted. This method unwraps them based on the path to the wrapping field.
+ * Parquet, and in fact most/all other formats, they are both not needed and unwanted. This method unwraps them based on the path to the wrapping field
+ * .
*
* When unwrapping, wrapped field will replace the wrapping field using the name of the wrapping field. As this is not a renaming action, no alias will
* be added.
diff --git a/src/main/java/opwvhk/avro/util/NamingConvention.java b/src/main/java/opwvhk/avro/util/NamingConvention.java
new file mode 100644
index 0000000..e2a992f
--- /dev/null
+++ b/src/main/java/opwvhk/avro/util/NamingConvention.java
@@ -0,0 +1,238 @@
+/*
+ * Copyright © Oscar Westra van Holthe - Kind
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package opwvhk.avro.util;
+
+import org.jetbrains.annotations.NotNull;
+
+import java.text.Normalizer;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+import java.util.function.UnaryOperator;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * A class to provide naming conventions for multiple-word
+ * identifiers, like camel case, snake case, etc.
+ *
+ * It is up to the user of the class to ensure the applied naming convention makes sense: using e.g. camel case for a script that has no notion of
+ * upper/lower case letters is not useful.
+ *
+ * Algorithm
+ *
+ * Casing is applied by first sanitising the text, and determining the list of words. Then the words are put together according to the selected style.
+ *
+ * This algorithm is somewhat opinionated: it does not make any special exceptions for acronyms. This is mostly in line with general guidelines, such as
+ * from Java/Oracle
+ * and Microsoft, but ignores the Microsoft
+ * exception for two-letter acronyms (like IO).
+ *
+ * Sanitation is done by creating a {@link Normalizer.Form#NFD canonical decomposition}, and then removing everything that is not in the
+ * unicode categories letter (L), number (N), space separator (Zs), connector
+ * punctuation (Pc) or dash punctuation (Pd). This also removes accents. Words are then determined by splitting along spacing and punctuation.
+ *
+ * Defined conventions
+ *
+ * There are a number of capitalisation conventions predefined, combining various delimiters and combinations of upper and lower case, as listed below:
+ *
+ * Capitalisation Conventions
+ * Convention | Example |
+ *
+ * Pascal Case | PascalCase |
+ * Camel Case | camelCase |
+ * Snake Case | snake_case |
+ * Kebab Case | kebab-case |
+ * Pascal Snake Case | Pascal_Snake_Case |
+ * Camel Snake Case | camel_Snake_Case |
+ * Screaming Snake Case | SCREAMING_SNAKE_CASE |
+ * Train Case | Train-Case |
+ * Cobol Case | COBOL-CASE |
+ *
+ *
+ * Note that there is no predefined convention for (upper) flat case. The reason is that they are not reversible. The other conventions can be applied in any
+ * sequence, and the last one deterministically determines the result. If any convention in between uses flat case, this is no longer true.
+ *
+ * @see Naming conventions for multiple-word identifiers
+ * @see Java naming convensions
+ * @see Microsoft capitalization conventions
+ * @see Unicode annex #44, General Category Values
+ */
+public class NamingConvention {
+ /**
+ * Pascal Case: capitalized words without delimiter.
+ */
+ public static final NamingConvention PASCAL_CASE = new NamingConvention("", WordCase.CAPITALIZED, WordCase.CAPITALIZED);
+ /**
+ * Camel Case: lowercase first word followed by capitalized words, without delimiter.
+ */
+ public static final NamingConvention CAMEL_CASE = new NamingConvention("", WordCase.LOWER_CASE, WordCase.CAPITALIZED);
+ /**
+ * Snake Case: lowercase words, separated by underscores.
+ */
+ public static final NamingConvention SNAKE_CASE = new NamingConvention("_", WordCase.LOWER_CASE, WordCase.LOWER_CASE);
+ /**
+ * Kebab Case: lowercase words, separated by hyphens.
+ */
+ public static final NamingConvention KEBAB_CASE = new NamingConvention("-", WordCase.LOWER_CASE, WordCase.LOWER_CASE);
+ /**
+ * Pascal Snake Case: capitalized words, separated by underscores.
+ */
+ public static final NamingConvention PASCAL_SNAKE_CASE = new NamingConvention("_", WordCase.CAPITALIZED, WordCase.CAPITALIZED);
+ /**
+ * Camel Snake Case: lowercase first word followed by capitalized words, separated by underscores.
+ */
+ public static final NamingConvention CAMEL_SNAKE_CASE = new NamingConvention("_", WordCase.LOWER_CASE, WordCase.CAPITALIZED);
+ /**
+ * Screaming Snake Case: uppercase words, separated by underscores.
+ */
+ public static final NamingConvention SCREAMING_SNAKE_CASE = new NamingConvention("_", WordCase.UPPER_CASE, WordCase.UPPER_CASE);
+ /**
+ * Train Case: capitalized words, separated by hyphens.
+ */
+ public static final NamingConvention TRAIN_CASE = new NamingConvention("-", WordCase.CAPITALIZED, WordCase.CAPITALIZED);
+ /**
+ * Cobol Case: uppercase words, separated by hyphens.
+ */
+ public static final NamingConvention COBOL_CASE = new NamingConvention("-", WordCase.UPPER_CASE, WordCase.UPPER_CASE);
+ /**
+ * Dummy naming convention that returns the given name as-is.
+ */
+ public static final NamingConvention NULL = new NamingConvention(null, null, null) {
+ @Override
+ public String convert(String name) {
+ return name;
+ }
+ };
+
+ private final String delimiter;
+ private final WordCase firstWord;
+ private final WordCase otherWords;
+
+ /**
+ * Create a naming convention for multiple-word identifiers. Combining an empty delimiter with {@link WordCase#LOWER_CASE} or
+ * {@link WordCase#UPPER_CASE} is discouraged, as the result cannot be converted to another naming convention.
+ *
+ * @param delimiter the word delimiter to use
+ * @param firstWord the capitalization for the first word
+ * @param otherWords the capitalization for the other words
+ */
+ public NamingConvention(String delimiter, WordCase firstWord, WordCase otherWords) {
+ this.delimiter = delimiter;
+ this.firstWord = firstWord;
+ this.otherWords = otherWords;
+ }
+
+ /**
+ * Convert a text/name to a name in this name case.
+ *
+ * @param name the name to convert
+ * @return the name in this name case
+ */
+ public String convert(String name) {
+ // First remove accents, extra punctuation, etc. Keep only letters, numbers, and dash & combining punctuation.
+ String cleanName = NAME_CHARACTER_FILTER.matcher(Normalizer.normalize(name, Normalizer.Form.NFD)).replaceAll("");
+ // if (cleanName.isEmpty() )
+
+ // Then split by boundary characters, and determine the first non-empty word
+ List words = splitToWords(DELIMITER_BOUNDARY, cleanName);
+ if (words.isEmpty()) {
+ throw new IllegalArgumentException("The name contains no letters or numbers");
+ } else if (words.size() == 1) {
+ // The name contains no boundary characters: maybe it is camel case.
+ words = splitToWords(CAMEL_BOUNDARY, cleanName);
+ }
+
+ StringBuilder buffer = new StringBuilder((int) (name.length() * 1.2f));
+ Iterator iterator = words.iterator();
+ buffer.append(firstWord.apply(iterator.next()));
+ iterator.forEachRemaining(word -> buffer.append(delimiter).append(otherWords.apply(word)));
+ return buffer.toString();
+ }
+
+ /**
+ * Pattern to match anything that's not a letter, number or delimiter boundary.
+ */
+ private static final Pattern NAME_CHARACTER_FILTER = Pattern.compile("[^\\p{L}\\p{N}\\p{Zs}\\p{Pd}\\p{Pc}]+");
+
+ /**
+ * Pattern to match word boundaries using delimiters: any combination of spaces & dash/combining punctuation after a letter or number.
+ */
+ private static final Pattern DELIMITER_BOUNDARY = Pattern.compile("[\\p{Zs}\\p{Pd}\\p{Pc}]+");
+ /**
+ * Pattern to match any word boundary: the (zero-width) point between a lower- and uppercase letter, or any combination of spaces & punctuation.
+ */
+ @SuppressWarnings("RegExpSimplifiable") // bug: the suggestion to remove [] from [\p{L}&&\P{Lu}] is wrong
+ private static final Pattern CAMEL_BOUNDARY = Pattern.compile("(?<=[\\p{L}&&\\P{Lu}])(?=\\p{Lu})");
+
+ @NotNull
+ private List splitToWords(Pattern wordBoundary, String text) {
+ List words = new ArrayList<>();
+ Matcher matcher = wordBoundary.matcher(text);
+ int start = 0;
+ while (matcher.find()) {
+ if (start < matcher.start()) {
+ // Only add non-empty words
+ words.add(text.substring(start, matcher.start()));
+ }
+ start = matcher.end();
+ }
+ if (start < text.length()) {
+ // There's text remaining: add it
+ words.add(text.substring(start));
+ }
+ return words;
+ }
+
+ /**
+ * Operator to apply "proper" to a name part.
+ */
+ public enum WordCase implements UnaryOperator {
+ /**
+ * Convert the word to lower case.
+ */
+ LOWER_CASE {
+ @Override
+ public String apply(String word) {
+ return word.toLowerCase(Locale.ROOT);
+ }
+ },
+ /**
+ * Convert the word to upper case.
+ */
+ UPPER_CASE {
+ @Override
+ public String apply(String word) {
+ return word.toUpperCase(Locale.ROOT);
+ }
+ },
+ /**
+ * Convert the word to lower case, except the first character (convert that to upper case).
+ */
+ CAPITALIZED {
+ @Override
+ public String apply(String word) {
+ int firstCodePoint = word.codePointAt(0);
+ int sizeOfFirstCharacter = Character.charCount(firstCodePoint);
+ // Use toTitleCase instead of toUpperCase to properly handle digraphs.
+ return Character.toString(Character.toTitleCase(firstCodePoint)) + word.substring(sizeOfFirstCharacter).toLowerCase(Locale.ROOT);
+ }
+ }
+ }
+}
diff --git a/src/test/java/opwvhk/avro/SchemaManipulatorTest.java b/src/test/java/opwvhk/avro/SchemaManipulatorTest.java
index 2c5a487..9ad285e 100644
--- a/src/test/java/opwvhk/avro/SchemaManipulatorTest.java
+++ b/src/test/java/opwvhk/avro/SchemaManipulatorTest.java
@@ -1,517 +1,569 @@
package opwvhk.avro;
+import net.jimblackler.jsonschemafriend.GenerationException;
+import opwvhk.avro.util.NamingConvention;
+import org.apache.avro.Schema;
+import org.apache.avro.SchemaBuilder;
+import org.junit.jupiter.api.Test;
+
import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.List;
-import net.jimblackler.jsonschemafriend.GenerationException;
-import org.apache.avro.Schema;
-import org.junit.jupiter.api.Test;
-
import static java.util.Objects.requireNonNull;
import static org.assertj.core.api.Assertions.assertThat;
class SchemaManipulatorTest {
- @Test
- void testSortedDocumentationViaXsd() throws IOException {
- StringBuilder markdown = new StringBuilder();
+ @Test
+ void testSortedDocumentationViaXsd() throws IOException {
+ StringBuilder markdown = new StringBuilder();
+
+ URL xsdLocation = getClass().getResource("xml/payload.xsd");
+ Schema envelopeSchema = SchemaManipulator
+ .startFromXsd(xsdLocation, "envelope")
+ .sortFields()
+ .alsoDocumentAsMarkdownTable(markdown)
+ .finish();
+
+ assertThat(markdown).isEqualToNormalizingUnicode("""
+ | Field(path) | Type | Documentation |
+ |-------------|------|---------------|
+ | | record | |
+ | payload | record | The payload is either XML, UTF-8 text or base64 encoded binary data.
Type: The payload is either XML, UTF-8 text or base64 encoded binary data. |
+ | payload.type? | enum | |
+ | payload.value? | string | The entire element content, unparsed. |
+ | source | string | |
+ | target | string | |
+ """);
+ assertThat(envelopeSchema.toString(true)).isEqualTo(Schema.createRecord("ns.envelope", null, null, false, List.of(
+ new Schema.Field("payload",
+ Schema.createRecord("ns.payload", "The payload is either XML, UTF-8 text or base64 encoded binary data.", null, false, List.of(
+ new Schema.Field("type", Schema.createUnion(Schema.createEnum("ns.type", null, null, List.of("xml", "text", "binary")),
+ Schema.create(Schema.Type.NULL)), null, "xml"),
+ new Schema.Field("value", Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING)),
+ "The entire element content, unparsed.", Schema.Field.NULL_DEFAULT_VALUE)
+ )), "The payload is either XML, UTF-8 text or base64 encoded binary data.", null),
+ new Schema.Field("source", Schema.create(Schema.Type.STRING)),
+ new Schema.Field("target", Schema.create(Schema.Type.STRING))
+ )).toString(true));
+ }
- URL xsdLocation = getClass().getResource("xml/payload.xsd");
- Schema envelopeSchema = SchemaManipulator
- .startFromXsd(xsdLocation, "envelope")
- .sortFields()
- .alsoDocumentAsMarkdownTable(markdown)
- .finish();
+ @Test
+ void testSortedDocumentationViaJsonSchema() throws GenerationException, URISyntaxException, IOException {
+ StringBuilder markdown = new StringBuilder();
- assertThat(markdown).isEqualToNormalizingUnicode("""
- | Field(path) | Type | Documentation |
- |-------------|------|---------------|
- | | record | |
- | payload | record | The payload is either XML, UTF-8 text or base64 encoded binary data.
Type: The payload is either XML, UTF-8 text or base64 encoded binary data. |
- | payload.type? | enum | |
- | payload.value? | string | The entire element content, unparsed. |
- | source | string | |
- | target | string | |
- """);
- assertThat(envelopeSchema.toString(true)).isEqualTo(Schema.createRecord("ns.envelope", null, null, false, List.of(
- new Schema.Field("payload",
- Schema.createRecord("ns.payload", "The payload is either XML, UTF-8 text or base64 encoded binary data.", null, false, List.of(
- new Schema.Field("type", Schema.createUnion(Schema.createEnum("ns.type", null, null, List.of("xml", "text", "binary")),
- Schema.create(Schema.Type.NULL)), null, "xml"),
- new Schema.Field("value", Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING)),
- "The entire element content, unparsed.", Schema.Field.NULL_DEFAULT_VALUE)
- )), "The payload is either XML, UTF-8 text or base64 encoded binary data.", null),
- new Schema.Field("source", Schema.create(Schema.Type.STRING)),
- new Schema.Field("target", Schema.create(Schema.Type.STRING))
- )).toString(true));
- }
+ URL schemaLocation = getClass().getResource("json/TestRecord.schema.json");
+ Schema schema = SchemaManipulator
+ .startFromJsonSchema(requireNonNull(schemaLocation))
+ .alsoDocumentAsMarkdownTable(markdown)
+ .finish();
- @Test
- void testSortedDocumentationViaJsonSchema() throws GenerationException, URISyntaxException, IOException {
- StringBuilder markdown = new StringBuilder();
+ assertThat(markdown).isEqualToNormalizingUnicode("""
+ | Field(path) | Type | Documentation |
+ |-------------|------|---------------|
+ | | record | Type: Test schema for parsing records. |
+ | bool | boolean | |
+ | shortInt? | int | |
+ | longInt? | long | |
+ | hugeInt? | decimal(21,0) | |
+ | defaultInt? | long | |
+ | singleFloat? | float | |
+ | doubleFloat? | double | |
+ | fixedPoint? | decimal(17,6) | |
+ | defaultNumber? | double | |
+ | choice | enum | |
+ | date? | date | |
+ | time? | time-millis | |
+ | timestamp? | timestamp-millis | |
+ | binary? | bytes | |
+ | hexBytes? | bytes | |
+ | texts[] | string | |
+ | weirdStuff? | record | |
+ | weirdStuff?.explanation? | string | |
+ | weirdStuff?.fancy? | string | |
+ | weirdStuff?.hatseflats? | string | |
+ | weirdStuff?.rabbitHole? | record | |
+ """);
+ assertThat(schema.toString(true)).isEqualTo(new Schema.Parser().parse(getClass().getResourceAsStream("json/TestRecordAll.avsc")).toString(true));
+ }
- URL schemaLocation = getClass().getResource("json/TestRecord.schema.json");
- Schema schema = SchemaManipulator
- .startFromJsonSchema(requireNonNull(schemaLocation))
- .alsoDocumentAsMarkdownTable(markdown)
- .finish();
+ @Test
+ void testDocumentationViaAvro() throws IOException {
+ URL avroLocation = getClass().getResource("xml/envelope.avsc");
+ String markDownTable = SchemaManipulator.startFromAvro(avroLocation).asMarkdownTable();
- assertThat(markdown).isEqualToNormalizingUnicode("""
- | Field(path) | Type | Documentation |
- |-------------|------|---------------|
- | | record | Type: Test schema for parsing records. |
- | bool | boolean | |
- | shortInt? | int | |
- | longInt? | long | |
- | hugeInt? | decimal(21,0) | |
- | defaultInt? | long | |
- | singleFloat? | float | |
- | doubleFloat? | double | |
- | fixedPoint? | decimal(17,6) | |
- | defaultNumber? | double | |
- | choice | enum | |
- | date? | date | |
- | time? | time-millis | |
- | timestamp? | timestamp-millis | |
- | binary? | bytes | |
- | hexBytes? | bytes | |
- | texts[] | string | |
- | weirdStuff? | record | |
- | weirdStuff?.explanation? | string | |
- | weirdStuff?.fancy? | string | |
- | weirdStuff?.hatseflats? | string | |
- | weirdStuff?.rabbitHole? | record | |
- """);
- assertThat(schema.toString(true)).isEqualTo(new Schema.Parser().parse(getClass().getResourceAsStream("json/TestRecordAll.avsc")).toString(true));
- }
+ assertThat(markDownTable).isEqualTo("""
+ | Field(path) | Type | Documentation |
+ |-------------|------|---------------|
+ | | record | |
+ | source | string | |
+ | target | string | |
+ | payload | record | The payload is either XML, UTF-8 text or base64 encoded binary data.
Type: The payload is either XML, UTF-8 text or base64 encoded binary data. |
+ | payload.type? | enum | |
+ | payload.value? | string | The entire element content, unparsed. |
+ """);
+ }
- @Test
- void testDocumentationViaAvro() throws IOException {
- URL avroLocation = getClass().getResource("xml/envelope.avsc");
- String markDownTable = SchemaManipulator.startFromAvro(avroLocation).asMarkdownTable();
+ @Test
+ void testManipulationsWithAliases() {
+ // Note: manipulating by schema (and field name) also matches on aliases
+ Schema schema = SchemaManipulator.startFromAvro(SOURCE_SCHEMA)
+ .renameSchema("ns.envelope", "ns.satchel")
+ .renameSchema("ns.switch", "ns.toggle")
+ .renameSchema("ns.hash", "ns.salted")
+ .renameField("ns.envelope", "target", "destination")
+ .renameField("ns.envelope", "properties", "extraProperties")
+ .renameField("ns.payloadRecord", "digestsByName", "namedHashes")
+ .unwrapArray("ns.envelope", "wrappingField")
+ .finish();
- assertThat(markDownTable).isEqualTo("""
- | Field(path) | Type | Documentation |
- |-------------|------|---------------|
- | | record | |
- | source | string | |
- | target | string | |
- | payload | record | The payload is either XML, UTF-8 text or base64 encoded binary data.
Type: The payload is either XML, UTF-8 text or base64 encoded binary data. |
- | payload.type? | enum | |
- | payload.value? | string | The entire element content, unparsed. |
- """);
- }
+ Schema expectedSchema = new Schema.Parser().parse(EXPECTED_SCHEMA);
+ assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true));
+ }
- @Test
- void testManipulationsWithAliases() {
- // Note: manipulating by schema (and field name) also matches on aliases
- Schema schema = SchemaManipulator.startFromAvro(SOURCE_SCHEMA)
- .renameSchema("ns.envelope", "ns.satchel")
- .renameSchema("ns.switch", "ns.toggle")
- .renameSchema("ns.hash", "ns.salted")
- .renameField("ns.envelope", "target", "destination")
- .renameField("ns.envelope", "properties", "extraProperties")
- .renameField("ns.payloadRecord", "digestsByName", "namedHashes")
- .unwrapArray("ns.envelope", "wrappingField")
- .finish();
+ @Test
+ void testManipulationsWithoutAliasesByPath() {
+ // Note: manipulating by path cannot match on aliases
+ Schema schema = SchemaManipulator.startFromAvro(SOURCE_SCHEMA)
+ .renameWithoutAliases()
+ .renameSchemaAtPath("ns.satchel")
+ .renameSchemaAtPath("ns.salted", "payload", "digestsByName")
+ .renameSchemaAtPath("ns.toggle", "payload", "switchList")
+ .renameFieldAtPath("destination", "target")
+ .renameFieldAtPath("namedHashes", "payload", "digestsByName")
+ .unwrapArrayAtPath("nested")
+ .finish();
- Schema expectedSchema = new Schema.Parser().parse(EXPECTED_SCHEMA);
- assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true));
- }
+ Schema expectedSchema = new Schema.Parser().parse(EXPECTED_SCHEMA_WITHOUT_ALIASES);
+ assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true));
+ }
- @Test
- void testManipulationsWithoutAliasesByPath() {
- // Note: manipulating by path cannot match on aliases
- Schema schema = SchemaManipulator.startFromAvro(SOURCE_SCHEMA)
- .renameWithoutAliases()
- .renameSchemaAtPath("ns.satchel")
- .renameSchemaAtPath("ns.salted", "payload", "digestsByName")
- .renameSchemaAtPath("ns.toggle", "payload", "switchList")
- .renameFieldAtPath("destination", "target")
- .renameFieldAtPath("namedHashes", "payload", "digestsByName")
- .unwrapArrayAtPath("nested")
- .finish();
+ @Test
+ void testUnwrappingArrays1() {
+ Schema schema = SchemaManipulator.startFromAvro(SOURCE_SCHEMA_WITH_ARRAYS)
+ .unwrapArrayAtPath("matchByPath")
+ .unwrapArrays(3)
+ .finish();
- Schema expectedSchema = new Schema.Parser().parse(EXPECTED_SCHEMA_WITHOUT_ALIASES);
- assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true));
- }
+ Schema expectedSchema = new Schema.Parser().parse(EXPECTED_SCHEMA_WITH_ARRAYS);
+ assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true));
+ }
- @Test
- void testUnwrappingArrays1() {
- Schema schema = SchemaManipulator.startFromAvro(SOURCE_SCHEMA_WITH_ARRAYS)
- .unwrapArrayAtPath("matchByPath")
- .unwrapArrays(3)
- .finish();
+ @Test
+ void testUnwrappingArrays2() {
+ Schema schema = SchemaManipulator.startFromAvro(SOURCE_SCHEMA_WITH_ARRAYS)
+ .unwrapArray("ns.WithArrays", "matchByName")
+ .unwrapArrays(3)
+ .finish();
- Schema expectedSchema = new Schema.Parser().parse(EXPECTED_SCHEMA_WITH_ARRAYS);
- assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true));
- }
+ Schema expectedSchema = new Schema.Parser().parse(EXPECTED_SCHEMA_WITH_ARRAYS);
+ assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true));
+ }
- @Test
- void testUnwrappingArrays2() {
- Schema schema = SchemaManipulator.startFromAvro(SOURCE_SCHEMA_WITH_ARRAYS)
- .unwrapArray("ns.WithArrays", "matchByName")
- .unwrapArrays(3)
- .finish();
+ @Test
+ void testManipulatingRecursiveSchemas() {
+ // Note: manipulating by schema (and field name) also matches on aliases
+ Schema schema = SchemaManipulator.startFromAvro(SOURCE_RECURSIVE_SCHEMA)
+ .renameField("ns.recursive", "rabbitHole", "droste")
+ .finish();
- Schema expectedSchema = new Schema.Parser().parse(EXPECTED_SCHEMA_WITH_ARRAYS);
- assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true));
- }
+ Schema expectedSchema = new Schema.Parser().parse(EXPECTED_RECURSIVE_SCHEMA);
+ assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true));
+ }
- @Test
- void testManipulatingRecursiveSchemas() {
- // Note: manipulating by schema (and field name) also matches on aliases
- Schema schema = SchemaManipulator.startFromAvro(SOURCE_RECURSIVE_SCHEMA)
- .renameField("ns.recursive", "rabbitHole", "droste")
- .finish();
+ @Test
+ void testApplyNamingConventions() {
+ Schema schemaWithoutNamespace = SchemaBuilder.record("simple_name").fields()
+ .name("field_1").type("string").noDefault()
+ .name("field2").type("string").noDefault()
+ .endRecord();
+ Schema schemaResultWithoutNamespace = SchemaBuilder.record("simple_name").fields()
+ .name("field1").type("string").noDefault()
+ .name("field2").type("string").noDefault()
+ .endRecord();
+ assertThat(new SchemaManipulator(schemaWithoutNamespace).renameWithoutAliases()
+ .useSchemaNamingConvention(NamingConvention.SNAKE_CASE)
+ .useFieldNamingConvention(NamingConvention.CAMEL_CASE)
+ .finish().toString(true)
+ ).isEqualTo(schemaResultWithoutNamespace.toString(true));
- Schema expectedSchema = new Schema.Parser().parse(EXPECTED_RECURSIVE_SCHEMA);
- assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true));
- }
+ Schema schemaWithNamespace = SchemaBuilder.record("simple_name").namespace("somewhereInTheCode").fields()
+ .name("field_one").type("string").noDefault()
+ .name("field_two").type("string").noDefault()
+ .endRecord();
+ Schema schemaResultWithNamespace = SchemaBuilder.record("SimpleName").namespace("somewhere_in_the_code").fields()
+ .name("fieldOne").type("string").noDefault()
+ .name("fieldTwo").type("string").noDefault()
+ .endRecord();
+ assertThat(new SchemaManipulator(schemaWithNamespace).renameWithoutAliases()
+ .useSchemaNamingConvention(NamingConvention.SNAKE_CASE, NamingConvention.PASCAL_CASE)
+ .useFieldNamingConvention(NamingConvention.CAMEL_CASE)
+ .finish().toString(true)
+ ).isEqualTo(schemaResultWithNamespace.toString(true));
+ }
- private static final String SOURCE_SCHEMA = """
- {
- "testCase": "SchemaManipulatorTest",
- "type": "record",
- "name": "envelope",
- "namespace": "ns",
- "fields": [
- {
- "extra": "unused",
- "name": "source",
- "type": "string"
- }, {
- "name": "target",
- "type": "string"
- }, {
- "name": "payload",
- "type": {
- "type": "record",
- "name": "payloadRecord",
- "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data.",
- "fields": [
- {
- "name": "type",
- "type": [
- {
- "type": "enum",
- "name": "type",
- "symbols": ["xml", "text", "binary"]
- }, "null"
- ],
- "default": "xml"
- }, {
- "name": "value",
- "type": ["null", "string"],
- "doc": "The entire element content, unparsed.",
- "default": null
- }, {
- "name": "digestsByName",
- "type": {"type": "map", "values": {
- "type": "fixed",
- "name": "hash",
- "size": 16
- }},
- "default": {}
- }, {
- "name": "hmac",
- "type": {
- "type": "fixed",
- "name": "hmac",
- "size": 32
- }
- }, {
- "name": "switchList",
- "type": {"type": "array", "items": {
- "type": "enum",
- "name": "switch",
- "symbols": ["off", "on"]
- }},
- "default": []
- }, {
- "name": "category",
- "type": {
- "type": "enum",
- "name": "types",
- "symbols": ["good", "bad", "ugly"],
- "default": "ugly"
- },
- "default": "good"
- }
- ]
- },
- "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data."
- }, {
- "name": "nested",
- "aliases": ["wrappingField"],
- "type": {
- "type": "record",
- "name": "WithSingleField",
- "fields": [{
- "name": "ignored",
- "type": {
- "type": "array",
- "items": "string"
- },
- "default": []
- }]
- }
- }
- ]
- }""";
- private static final String EXPECTED_SCHEMA = """
- {
- "testCase": "SchemaManipulatorTest",
- "type": "record",
- "name": "satchel",
- "namespace": "ns",
- "aliases": ["ns.envelope"],
- "fields": [
- {
- "extra": "unused",
- "name": "source",
- "type": "string"
- }, {
- "name": "destination",
- "aliases": ["target"],
- "type": "string"
- }, {
- "name": "payload",
- "type": {
- "type": "record",
- "name": "payloadRecord",
- "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data.",
- "fields": [
- {
- "name": "type",
- "type": [
- {
- "type": "enum",
- "name": "type",
- "symbols": ["xml", "text", "binary"]
- }, "null"
- ],
- "default": "xml"
- }, {
- "name": "value",
- "type": ["null", "string"],
- "doc": "The entire element content, unparsed.",
- "default": null
- }, {
- "name": "namedHashes",
- "aliases": ["digestsByName"],
- "type": {"type": "map", "values": {
- "type": "fixed",
- "name": "salted",
- "size": 16
- }},
- "default": {}
- }, {
- "name": "hmac",
- "type": {
- "type": "fixed",
- "name": "hmac",
- "size": 32
- }
- }, {
- "name": "switchList",
- "type": {"type": "array", "items": {
- "type": "enum",
- "name": "toggle",
- "symbols": ["off", "on"]
- }},
- "default": []
- }, {
- "name": "category",
- "type": {
- "type": "enum",
- "name": "types",
- "symbols": ["good", "bad", "ugly"],
- "default": "ugly"
- },
- "default": "good"
- }
- ]
- },
- "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data."
- }, {
- "name": "nested",
- "aliases": ["wrappingField"],
- "type": {
- "type": "array",
- "items": "string"
- },
- "default": []
- }
- ]
- }""";
- private static final String EXPECTED_SCHEMA_WITHOUT_ALIASES = """
- {
- "testCase": "SchemaManipulatorTest",
- "type": "record",
- "name": "satchel",
- "namespace": "ns",
- "fields": [
- {
- "extra": "unused",
- "name": "source",
- "type": "string"
- }, {
- "name": "destination",
- "type": "string"
- }, {
- "name": "payload",
- "type": {
- "type": "record",
- "name": "payloadRecord",
- "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data.",
- "fields": [
- {
- "name": "type",
- "type": [
- {
- "type": "enum",
- "name": "type",
- "symbols": ["xml", "text", "binary"]
- }, "null"
- ],
- "default": "xml"
- }, {
- "name": "value",
- "type": ["null", "string"],
- "doc": "The entire element content, unparsed.",
- "default": null
- }, {
- "name": "namedHashes",
- "type": {"type": "map", "values": {
- "type": "fixed",
- "name": "salted",
- "size": 16
- }},
- "default": {}
- }, {
- "name": "hmac",
- "type": {
- "type": "fixed",
- "name": "hmac",
- "size": 32
- }
- }, {
- "name": "switchList",
- "type": {"type": "array", "items": {
- "type": "enum",
- "name": "toggle",
- "symbols": ["off", "on"]
- }},
- "default": []
- }, {
- "name": "category",
- "type": {
- "type": "enum",
- "name": "types",
- "symbols": ["good", "bad", "ugly"],
- "default": "ugly"
- },
- "default": "good"
- }
- ]
- },
- "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data."
- }, {
- "name": "nested",
- "aliases": ["wrappingField"],
- "type": {
- "type": "array",
- "items": "string"
- },
- "default": []
- }
- ]
- }""";
+ private static final String SOURCE_SCHEMA = """
+ {
+ "testCase": "SchemaManipulatorTest",
+ "type": "record",
+ "name": "envelope",
+ "namespace": "ns",
+ "fields": [
+ {
+ "extra": "unused",
+ "name": "source",
+ "type": "string"
+ }, {
+ "name": "target",
+ "type": "string"
+ }, {
+ "name": "payload",
+ "type": {
+ "type": "record",
+ "name": "payloadRecord",
+ "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data.",
+ "fields": [
+ {
+ "name": "type",
+ "type": [
+ {
+ "type": "enum",
+ "name": "type",
+ "symbols": ["xml", "text", "binary"]
+ }, "null"
+ ],
+ "default": "xml"
+ }, {
+ "name": "value",
+ "type": ["null", "string"],
+ "doc": "The entire element content, unparsed.",
+ "default": null
+ }, {
+ "name": "digestsByName",
+ "type": {"type": "map", "values": {
+ "type": "fixed",
+ "name": "hash",
+ "size": 16
+ }},
+ "default": {}
+ }, {
+ "name": "hmac",
+ "type": {
+ "type": "fixed",
+ "name": "hmac",
+ "size": 32
+ }
+ }, {
+ "name": "switchList",
+ "type": {"type": "array", "items": {
+ "type": "enum",
+ "name": "switch",
+ "symbols": ["off", "on"]
+ }},
+ "default": []
+ }, {
+ "name": "category",
+ "type": {
+ "type": "enum",
+ "name": "types",
+ "symbols": ["good", "bad", "ugly"],
+ "default": "ugly"
+ },
+ "default": "good"
+ }
+ ]
+ },
+ "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data."
+ }, {
+ "name": "nested",
+ "aliases": ["wrappingField"],
+ "type": {
+ "type": "record",
+ "name": "WithSingleField",
+ "fields": [{
+ "name": "ignored",
+ "type": {
+ "type": "array",
+ "items": "string"
+ },
+ "default": []
+ }]
+ }
+ }
+ ]
+ }""";
+ private static final String EXPECTED_SCHEMA = """
+ {
+ "testCase": "SchemaManipulatorTest",
+ "type": "record",
+ "name": "satchel",
+ "namespace": "ns",
+ "aliases": ["ns.envelope"],
+ "fields": [
+ {
+ "extra": "unused",
+ "name": "source",
+ "type": "string"
+ }, {
+ "name": "destination",
+ "aliases": ["target"],
+ "type": "string"
+ }, {
+ "name": "payload",
+ "type": {
+ "type": "record",
+ "name": "payloadRecord",
+ "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data.",
+ "fields": [
+ {
+ "name": "type",
+ "type": [
+ {
+ "type": "enum",
+ "name": "type",
+ "symbols": ["xml", "text", "binary"]
+ }, "null"
+ ],
+ "default": "xml"
+ }, {
+ "name": "value",
+ "type": ["null", "string"],
+ "doc": "The entire element content, unparsed.",
+ "default": null
+ }, {
+ "name": "namedHashes",
+ "aliases": ["digestsByName"],
+ "type": {"type": "map", "values": {
+ "type": "fixed",
+ "name": "salted",
+ "size": 16
+ }},
+ "default": {}
+ }, {
+ "name": "hmac",
+ "type": {
+ "type": "fixed",
+ "name": "hmac",
+ "size": 32
+ }
+ }, {
+ "name": "switchList",
+ "type": {"type": "array", "items": {
+ "type": "enum",
+ "name": "toggle",
+ "symbols": ["off", "on"]
+ }},
+ "default": []
+ }, {
+ "name": "category",
+ "type": {
+ "type": "enum",
+ "name": "types",
+ "symbols": ["good", "bad", "ugly"],
+ "default": "ugly"
+ },
+ "default": "good"
+ }
+ ]
+ },
+ "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data."
+ }, {
+ "name": "nested",
+ "aliases": ["wrappingField"],
+ "type": {
+ "type": "array",
+ "items": "string"
+ },
+ "default": []
+ }
+ ]
+ }""";
+ private static final String EXPECTED_SCHEMA_WITHOUT_ALIASES = """
+ {
+ "testCase": "SchemaManipulatorTest",
+ "type": "record",
+ "name": "satchel",
+ "namespace": "ns",
+ "fields": [
+ {
+ "extra": "unused",
+ "name": "source",
+ "type": "string"
+ }, {
+ "name": "destination",
+ "type": "string"
+ }, {
+ "name": "payload",
+ "type": {
+ "type": "record",
+ "name": "payloadRecord",
+ "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data.",
+ "fields": [
+ {
+ "name": "type",
+ "type": [
+ {
+ "type": "enum",
+ "name": "type",
+ "symbols": ["xml", "text", "binary"]
+ }, "null"
+ ],
+ "default": "xml"
+ }, {
+ "name": "value",
+ "type": ["null", "string"],
+ "doc": "The entire element content, unparsed.",
+ "default": null
+ }, {
+ "name": "namedHashes",
+ "type": {"type": "map", "values": {
+ "type": "fixed",
+ "name": "salted",
+ "size": 16
+ }},
+ "default": {}
+ }, {
+ "name": "hmac",
+ "type": {
+ "type": "fixed",
+ "name": "hmac",
+ "size": 32
+ }
+ }, {
+ "name": "switchList",
+ "type": {"type": "array", "items": {
+ "type": "enum",
+ "name": "toggle",
+ "symbols": ["off", "on"]
+ }},
+ "default": []
+ }, {
+ "name": "category",
+ "type": {
+ "type": "enum",
+ "name": "types",
+ "symbols": ["good", "bad", "ugly"],
+ "default": "ugly"
+ },
+ "default": "good"
+ }
+ ]
+ },
+ "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data."
+ }, {
+ "name": "nested",
+ "aliases": ["wrappingField"],
+ "type": {
+ "type": "array",
+ "items": "string"
+ },
+ "default": []
+ }
+ ]
+ }""";
- private static final String SOURCE_SCHEMA_WITH_ARRAYS = """
- {
- "type": "record",
- "namespace": "ns",
- "name": "WithArrays",
- "fields": [
- {"name": "notARecord", "type": ["int", "string"], "doc": "this and the nest field also increase coverage on union checks"},
- {"name": "alsoNotARecord", "type": ["int", "string", "null"]},
- {"name": "tooLargeRecord", "type": { "type": "record", "name": "pair", "fields": [
- {"name": "one", "type": "int"}, {"name": "two", "type": "int"}
- ]}},
- {"name": "notAWrappedArray", "type": {"type": "record", "name": "nested", "fields": [
- {"name": "numbers", "type": {"type": "record", "name": "wrappedNumbers",
- "doc": "This wrapped array is nested in a single-field record to increase test coverage twice: numbers is not an array, and to not match on wrapping schema name",
- "fields": [
- {"name": "numberArray", "type": {"type": "array", "items": "int"}, "default": []}
- ]}}
- ]}},
- {"name": "dependencies", "type": ["null", {"type": "record", "name": "whatever", "fields": [
- {"name": "dependency", "type": {"type": "array", "items": "string"}, "default": []}
- ]}], "default": null},
- {"name": "matchByPath", "aliases": ["matchByName"], "type": {"type": "record", "name": "wrappedStrings", "fields": [
- {"name": "textList", "type": {"type": "array", "items": "string"}}
- ]}},
- {"name": "anotherOne", "type": {"type": "record", "name": "wrappedEnums", "fields": [
- {"name": "keptIntact", "type": {"type": "array", "items": {"type": "enum", "name": "switch", "symbols": ["on", "off"]}}}
- ]}}
- ]
- }""";
- private static final String EXPECTED_SCHEMA_WITH_ARRAYS = """
- {
- "type": "record",
- "namespace": "ns",
- "name": "WithArrays",
- "fields": [
- {"name": "notARecord", "type": ["int", "string"], "doc": "this and the nest field also increase coverage on union checks"},
- {"name": "alsoNotARecord", "type": ["int", "string", "null"]},
- {"name": "tooLargeRecord", "type": { "type": "record", "name": "pair", "fields": [
- {"name": "one", "type": "int"}, {"name": "two", "type": "int"}
- ]}},
- {"name": "notAWrappedArray", "type": {"type": "record", "name": "nested", "fields": [
- {"name": "numbers", "type": {"type": "array", "items": "int"}, "default": []}
- ]}},
- {"name": "dependencies", "type": {"type": "array", "items": "string"}, "default": []},
- {"name": "matchByPath", "aliases": ["matchByName"], "type": {"type": "array", "items": "string"}},
- {"name": "anotherOne", "type": {"type": "record", "name": "wrappedEnums", "fields": [
- {"name": "keptIntact", "type": {"type": "array", "items": {"type": "enum", "name": "switch", "symbols": ["on", "off"]}}}
- ]}}
- ]
- }""";
+ private static final String SOURCE_SCHEMA_WITH_ARRAYS = """
+ {
+ "type": "record",
+ "namespace": "ns",
+ "name": "WithArrays",
+ "fields": [
+ {"name": "notARecord", "type": ["int", "string"], "doc": "this and the nest field also increase coverage on union checks"},
+ {"name": "alsoNotARecord", "type": ["int", "string", "null"]},
+ {"name": "tooLargeRecord", "type": { "type": "record", "name": "pair", "fields": [
+ {"name": "one", "type": "int"}, {"name": "two", "type": "int"}
+ ]}},
+ {"name": "notAWrappedArray", "type": {"type": "record", "name": "nested", "fields": [
+ {"name": "numbers", "type": {"type": "record", "name": "wrappedNumbers",
+ "doc": "This wrapped array is nested in a single-field record to increase test coverage twice: numbers is not an array, and to not match on wrapping schema name",
+ "fields": [
+ {"name": "numberArray", "type": {"type": "array", "items": "int"}, "default": []}
+ ]}}
+ ]}},
+ {"name": "dependencies", "type": ["null", {"type": "record", "name": "whatever", "fields": [
+ {"name": "dependency", "type": {"type": "array", "items": "string"}, "default": []}
+ ]}], "default": null},
+ {"name": "matchByPath", "aliases": ["matchByName"], "type": {"type": "record", "name": "wrappedStrings", "fields": [
+ {"name": "textList", "type": {"type": "array", "items": "string"}}
+ ]}},
+ {"name": "anotherOne", "type": {"type": "record", "name": "wrappedEnums", "fields": [
+ {"name": "keptIntact", "type": {"type": "array", "items": {"type": "enum", "name": "switch", "symbols": ["on", "off"]}}}
+ ]}}
+ ]
+ }""";
+ private static final String EXPECTED_SCHEMA_WITH_ARRAYS = """
+ {
+ "type": "record",
+ "namespace": "ns",
+ "name": "WithArrays",
+ "fields": [
+ {"name": "notARecord", "type": ["int", "string"], "doc": "this and the nest field also increase coverage on union checks"},
+ {"name": "alsoNotARecord", "type": ["int", "string", "null"]},
+ {"name": "tooLargeRecord", "type": { "type": "record", "name": "pair", "fields": [
+ {"name": "one", "type": "int"}, {"name": "two", "type": "int"}
+ ]}},
+ {"name": "notAWrappedArray", "type": {"type": "record", "name": "nested", "fields": [
+ {"name": "numbers", "type": {"type": "array", "items": "int"}, "default": []}
+ ]}},
+ {"name": "dependencies", "type": {"type": "array", "items": "string"}, "default": []},
+ {"name": "matchByPath", "aliases": ["matchByName"], "type": {"type": "array", "items": "string"}},
+ {"name": "anotherOne", "type": {"type": "record", "name": "wrappedEnums", "fields": [
+ {"name": "keptIntact", "type": {"type": "array", "items": {"type": "enum", "name": "switch", "symbols": ["on", "off"]}}}
+ ]}}
+ ]
+ }""";
- private static final String SOURCE_RECURSIVE_SCHEMA = """
- {
- "testCase": "SchemaManipulatorTest",
- "type": "record",
- "name": "recursive",
- "namespace": "ns",
- "fields": [
- {
- "name": "name",
- "type": "string"
- }, {
- "name": "rabbitHole",
- "type": "recursive"
- }
- ]
- }""";
- private static final String EXPECTED_RECURSIVE_SCHEMA = """
- {
- "testCase": "SchemaManipulatorTest",
- "type": "record",
- "name": "recursive",
- "namespace": "ns",
- "fields": [
- {
- "name": "name",
- "type": "string"
- }, {
- "name": "droste",
- "aliases": ["rabbitHole"],
- "type": "recursive"
- }
- ]
- }""";
+ private static final String SOURCE_RECURSIVE_SCHEMA = """
+ {
+ "testCase": "SchemaManipulatorTest",
+ "type": "record",
+ "name": "recursive",
+ "namespace": "ns",
+ "fields": [
+ {
+ "name": "name",
+ "type": "string"
+ }, {
+ "name": "rabbitHole",
+ "type": "recursive"
+ }
+ ]
+ }""";
+ private static final String EXPECTED_RECURSIVE_SCHEMA = """
+ {
+ "testCase": "SchemaManipulatorTest",
+ "type": "record",
+ "name": "recursive",
+ "namespace": "ns",
+ "fields": [
+ {
+ "name": "name",
+ "type": "string"
+ }, {
+ "name": "droste",
+ "aliases": ["rabbitHole"],
+ "type": "recursive"
+ }
+ ]
+ }""";
+ private static final String EXPECTED_RECURSIVE_SCHEMA_NAMING_CONVENTIONS = """
+ {
+ "testCase": "SchemaManipulatorTest",
+ "type": "record",
+ "name": "RECURSIVE",
+ "aliases": ["recursive"],
+ "namespace": "ns",
+ "fields": [
+ {
+ "name": "Name",
+ "aliases": ["name"],
+ "type": "string"
+ }, {
+ "name": "droste",
+ "aliases": ["rabbitHole"],
+ "type": "RECURSIVE"
+ }
+ ]
+ }""";
}
diff --git a/src/test/java/opwvhk/avro/util/NamingConventionTest.java b/src/test/java/opwvhk/avro/util/NamingConventionTest.java
new file mode 100644
index 0000000..95c32a8
--- /dev/null
+++ b/src/test/java/opwvhk/avro/util/NamingConventionTest.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright © Oscar Westra van Holthe - Kind
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+package opwvhk.avro.util;
+
+import org.junit.jupiter.api.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+class NamingConventionTest {
+ private static final String EXAMPLE = "multiple word identifier";
+ private static final Map EXAMPLES = Map.of(
+ NamingConvention.PASCAL_CASE, "MultipleWordIdentifier",
+ NamingConvention.CAMEL_CASE, "multipleWordIdentifier",
+ NamingConvention.SNAKE_CASE, "multiple_word_identifier",
+ NamingConvention.KEBAB_CASE, "multiple-word-identifier",
+ NamingConvention.PASCAL_SNAKE_CASE, "Multiple_Word_Identifier",
+ NamingConvention.CAMEL_SNAKE_CASE, "multiple_Word_Identifier",
+ NamingConvention.SCREAMING_SNAKE_CASE, "MULTIPLE_WORD_IDENTIFIER",
+ NamingConvention.TRAIN_CASE, "Multiple-Word-Identifier",
+ NamingConvention.COBOL_CASE, "MULTIPLE-WORD-IDENTIFIER"
+ );
+
+ @Test
+ void validatePredefinedNamingConventions() {
+ assertThat(NamingConvention.PASCAL_CASE.convert("Pascal Case")).isEqualTo("PascalCase");
+ assertThat(NamingConvention.CAMEL_CASE.convert("Camel Case")).isEqualTo("camelCase");
+ assertThat(NamingConvention.SNAKE_CASE.convert("Snake Case")).isEqualTo("snake_case");
+ assertThat(NamingConvention.KEBAB_CASE.convert("Kebab Case")).isEqualTo("kebab-case");
+ assertThat(NamingConvention.PASCAL_SNAKE_CASE.convert("Pascal Snake Case")).isEqualTo("Pascal_Snake_Case");
+ assertThat(NamingConvention.CAMEL_SNAKE_CASE.convert("Camel Snake Case")).isEqualTo("camel_Snake_Case");
+ assertThat(NamingConvention.SCREAMING_SNAKE_CASE.convert("Screaming Snake Case")).isEqualTo("SCREAMING_SNAKE_CASE");
+ assertThat(NamingConvention.TRAIN_CASE.convert("Train Case")).isEqualTo("Train-Case");
+ assertThat(NamingConvention.COBOL_CASE.convert("Cobol Case")).isEqualTo("COBOL-CASE");
+ }
+
+ @Test
+ void verifyPredefinedNamingConventionsAreDeterministic() {
+ List namingConventions = new ArrayList<>(EXAMPLES.keySet());
+ Collections.shuffle(namingConventions);
+
+ String name = EXAMPLE;
+ for (NamingConvention namingConvention : namingConventions) {
+ name = namingConvention.convert(name);
+ assertThat(name).isEqualTo(EXAMPLES.get(namingConvention));
+ }
+ }
+
+ @Test
+ void validateWordCase() {
+ assertThat(NamingConvention.WordCase.LOWER_CASE.apply("MiXeD")).isEqualTo("mixed");
+ assertThat(NamingConvention.WordCase.UPPER_CASE.apply("MiXeD")).isEqualTo("MIXED");
+ assertThat(NamingConvention.WordCase.CAPITALIZED.apply("MiXeD")).isEqualTo("Mixed");
+ }
+
+ @Test
+ @SuppressWarnings("SpellCheckingInspection")
+ void validateWordSplitting() {
+ NamingConvention dummy = new NamingConvention(" ", NamingConvention.WordCase.CAPITALIZED, NamingConvention.WordCase.LOWER_CASE);
+
+ // Text with accents, various dashes & spaces, and nonsense characters
+ assertThat(dummy.convert("th↔︎Ïs—IS–a Sèn🛫ténçE")).isEqualTo("This is a sentence");
+ // Greek text with accents, an underscore and various spaces / space marks ("Αυτή είναι μια πρόταση" translates to "This is a sentence")
+ assertThat(dummy.convert("αυτή είΝαι_μΙα﹏ΠΡόταση")).isEqualTo("Αυτη ειναι μια προταση");
+ // Text without dashes & spaces
+ assertThat(dummy.convert("th↔︎ïsIsAnotherSèn🛫ténçe")).isEqualTo("This is another sentence");
+
+ assertThatThrownBy(() -> dummy.convert("🛫 ﹏_ ↔︎")).isInstanceOf(IllegalArgumentException.class);
+ }
+
+ @Test
+ void ensureTheNullConventionDoesNothing() {
+ byte[] randomBytes = new byte[16];
+ new Random().nextBytes(randomBytes);
+ String randomString = new String(randomBytes, StandardCharsets.UTF_8);
+
+ assertThat(NamingConvention.NULL.convert(randomString)).isEqualTo(randomString);
+ }
+}