diff --git a/src/main/java/opwvhk/avro/SchemaManipulator.java b/src/main/java/opwvhk/avro/SchemaManipulator.java index c19b6ad..f462bc6 100644 --- a/src/main/java/opwvhk/avro/SchemaManipulator.java +++ b/src/main/java/opwvhk/avro/SchemaManipulator.java @@ -1,5 +1,12 @@ package opwvhk.avro; +import net.jimblackler.jsonschemafriend.GenerationException; +import opwvhk.avro.json.SchemaAnalyzer; +import opwvhk.avro.util.AvroSchemaUtils; +import opwvhk.avro.util.NamingConvention; +import opwvhk.avro.xml.XsdAnalyzer; +import org.apache.avro.Schema; + import java.io.IOException; import java.io.InputStream; import java.net.URISyntaxException; @@ -10,12 +17,7 @@ import java.util.List; import java.util.Objects; import java.util.Set; - -import net.jimblackler.jsonschemafriend.GenerationException; -import opwvhk.avro.json.SchemaAnalyzer; -import opwvhk.avro.util.AvroSchemaUtils; -import opwvhk.avro.xml.XsdAnalyzer; -import org.apache.avro.Schema; +import java.util.stream.Stream; import static java.util.Objects.requireNonNull; @@ -29,11 +31,18 @@ public class SchemaManipulator { private boolean renameWithAliases; private StringBuilder markdownBuffer; private List schemaRenamerList; + private SchemaRenamer schemaNamingConvention; private List fieldRenamerList; + private FieldRenamer fieldNamingConvention; private List unwrapTests; - private SchemaManipulator(Schema initialSchema) { - reset(initialSchema); + /** + * Create a schema manipulator for a given schema. + * + * @param schema the schema to manipulate + */ + public SchemaManipulator(Schema schema) { + reset(schema); } private void reset(Schema initialSchema) { @@ -42,7 +51,9 @@ private void reset(Schema initialSchema) { renameWithAliases = true; markdownBuffer = null; schemaRenamerList = new ArrayList<>(); + schemaNamingConvention = (pathToField, fieldSchema) -> null; fieldRenamerList = new ArrayList<>(); + fieldNamingConvention = (pathToField, schemaWithField, field) -> null; unwrapTests = new ArrayList<>(); } @@ -71,7 +82,8 @@ public static SchemaManipulator startFromAvro(URL schemaLocation) throws IOExcep } /** - * Create a schema manipulator from an XML Schema Definition (XSD). The location of the main {@code .xsd} file is provided, both to provide the XSD content, + * Create a schema manipulator from an XML Schema Definition (XSD). The location of the main {@code .xsd} file is provided, both to provide the XSD + * content, * as to provide a way to locate imported/included {@code .xsd} files. * * @param schemaLocation the location of the main {@code .xsd} file (it may include/import other {@code .xsd} files) @@ -210,7 +222,7 @@ private Schema applySchemaChanges(IdentityHashMap changedSchemas } private String newSchemaName(String path, Schema schema) { - return schemaRenamerList.stream() + return Stream.concat(schemaRenamerList.stream(), Stream.of(schemaNamingConvention)) .map(renamer -> renamer.newSchemaName(path, schema)) .filter(Objects::nonNull) .findAny() @@ -218,7 +230,7 @@ private String newSchemaName(String path, Schema schema) { } private String newFieldName(String path, Schema schemaWithField, Schema.Field field) { - return fieldRenamerList.stream() + return Stream.concat(fieldRenamerList.stream(), Stream.of(fieldNamingConvention)) .map(renamer -> renamer.newFieldName(path, schemaWithField, field)) .filter(Objects::nonNull) .findAny() @@ -297,6 +309,35 @@ public SchemaManipulator renameSchemaAtPath(String newSchemaName, String... path return this; } + /** + * Use the specified naming convention for schemas. This naming convention applies for all schemas that have not been explicitly renamed using + * {@link #renameSchema(String, String)} or {@link #renameSchemaAtPath(String, String...)}, and leaves the namespace name intact. + * + * @param schemaNamingConvention the naming convention to use + * @return this {@code SchemaManipulator} + */ + public SchemaManipulator useSchemaNamingConvention(NamingConvention schemaNamingConvention) { + return useSchemaNamingConvention(NamingConvention.NULL, schemaNamingConvention); + } + + /** + * Use the specified naming conventions for schemas. These naming conventions apply for all schemas that have not been explicitly renamed using + * {@link #renameSchema(String, String)} or {@link #renameSchemaAtPath(String, String...)}. + * + * @param namespaceNamingConvention the naming convention to use for the schema namespace + * @param schemaNamingConvention the naming convention to use for the schema (simple) name + * @return this {@code SchemaManipulator} + */ + public SchemaManipulator useSchemaNamingConvention(NamingConvention namespaceNamingConvention, NamingConvention schemaNamingConvention) { + this.schemaNamingConvention = (path, schema) -> { + String namespace = schema.getNamespace(); + String prefix = namespace == null ? "" : namespaceNamingConvention.convert(namespace) + "."; + String newFullName = prefix + schemaNamingConvention.convert(schema.getName()); + return schema.getFullName().equals(newFullName) ? null : newFullName; + }; + return this; + } + /** * Rename the specified field in the (named) schema. * @@ -330,6 +371,22 @@ public SchemaManipulator renameFieldAtPath(String newFieldName, String... pathTo return this; } + /** + * Use the specified naming conventions for fields. This naming convention applies for all fields that have not been explicitly renamed using + * * {@link #renameField(String, String, String)} or {@link #renameFieldAtPath(String, String...)}. + * + * @param namingConvention the naming convention to use for the schema (simple) name + * @return this {@code SchemaManipulator} + */ + public SchemaManipulator useFieldNamingConvention(NamingConvention namingConvention) { + this.fieldNamingConvention = (pathToField, schemaWithField, field) -> { + String oldName = field.name(); + String newName = namingConvention.convert(oldName); + return oldName.equals(newName) ? null : newName; + }; + return this; + } + /** *

Unwrap all arrays whose field names (except up to the last {@code ignoredMaxSuffixLength} characters) are equal.

* @@ -384,7 +441,8 @@ public SchemaManipulator unwrapArray(String schemaName, String wrappingField) { *

Unwrap the array whose wrapping field is at the specified path.

* *

Wrapped arrays are an XML construct. They result in array fields without siblings in a record field (optionally in a union with null). In Avro, - * Parquet, and in fact most/all other formats, they are both not needed and unwanted. This method unwraps them based on the path to the wrapping field.

+ * Parquet, and in fact most/all other formats, they are both not needed and unwanted. This method unwraps them based on the path to the wrapping field + * .

* *

When unwrapping, wrapped field will replace the wrapping field using the name of the wrapping field. As this is not a renaming action, no alias will * be added.

diff --git a/src/main/java/opwvhk/avro/util/NamingConvention.java b/src/main/java/opwvhk/avro/util/NamingConvention.java new file mode 100644 index 0000000..e2a992f --- /dev/null +++ b/src/main/java/opwvhk/avro/util/NamingConvention.java @@ -0,0 +1,238 @@ +/* + * Copyright © Oscar Westra van Holthe - Kind + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package opwvhk.avro.util; + +import org.jetbrains.annotations.NotNull; + +import java.text.Normalizer; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Locale; +import java.util.function.UnaryOperator; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + *

A class to provide naming conventions for multiple-word + * identifiers, like camel case, snake case, etc.

+ * + *

It is up to the user of the class to ensure the applied naming convention makes sense: using e.g. camel case for a script that has no notion of + * upper/lower case letters is not useful.

+ * + *

Algorithm

+ * + *

Casing is applied by first sanitising the text, and determining the list of words. Then the words are put together according to the selected style.

+ * + *

This algorithm is somewhat opinionated: it does not make any special exceptions for acronyms. This is mostly in line with general guidelines, such as + * from Java/Oracle + * and Microsoft, but ignores the Microsoft + * exception for two-letter acronyms (like IO).

+ * + *

Sanitation is done by creating a {@link Normalizer.Form#NFD canonical decomposition}, and then removing everything that is not in the + * unicode categories letter (L), number (N), space separator (Zs), connector + * punctuation (Pc) or dash punctuation (Pd). This also removes accents. Words are then determined by splitting along spacing and punctuation.

+ * + *

Defined conventions

+ * + *

There are a number of capitalisation conventions predefined, combining various delimiters and combinations of upper and lower case, as listed below:

+ * + * + * + * + * + * + * + * + * + * + * + * + * + *
Capitalisation Conventions
ConventionExample
Pascal CasePascalCase
Camel CasecamelCase
Snake Casesnake_case
Kebab Casekebab-case
Pascal Snake CasePascal_Snake_Case
Camel Snake Casecamel_Snake_Case
Screaming Snake CaseSCREAMING_SNAKE_CASE
Train CaseTrain-Case
Cobol CaseCOBOL-CASE
+ * + *

Note that there is no predefined convention for (upper) flat case. The reason is that they are not reversible. The other conventions can be applied in any + * sequence, and the last one deterministically determines the result. If any convention in between uses flat case, this is no longer true.

+ * + * @see Naming conventions for multiple-word identifiers + * @see Java naming convensions + * @see Microsoft capitalization conventions + * @see Unicode annex #44, General Category Values + */ +public class NamingConvention { + /** + * Pascal Case: capitalized words without delimiter. + */ + public static final NamingConvention PASCAL_CASE = new NamingConvention("", WordCase.CAPITALIZED, WordCase.CAPITALIZED); + /** + * Camel Case: lowercase first word followed by capitalized words, without delimiter. + */ + public static final NamingConvention CAMEL_CASE = new NamingConvention("", WordCase.LOWER_CASE, WordCase.CAPITALIZED); + /** + * Snake Case: lowercase words, separated by underscores. + */ + public static final NamingConvention SNAKE_CASE = new NamingConvention("_", WordCase.LOWER_CASE, WordCase.LOWER_CASE); + /** + * Kebab Case: lowercase words, separated by hyphens. + */ + public static final NamingConvention KEBAB_CASE = new NamingConvention("-", WordCase.LOWER_CASE, WordCase.LOWER_CASE); + /** + * Pascal Snake Case: capitalized words, separated by underscores. + */ + public static final NamingConvention PASCAL_SNAKE_CASE = new NamingConvention("_", WordCase.CAPITALIZED, WordCase.CAPITALIZED); + /** + * Camel Snake Case: lowercase first word followed by capitalized words, separated by underscores. + */ + public static final NamingConvention CAMEL_SNAKE_CASE = new NamingConvention("_", WordCase.LOWER_CASE, WordCase.CAPITALIZED); + /** + * Screaming Snake Case: uppercase words, separated by underscores. + */ + public static final NamingConvention SCREAMING_SNAKE_CASE = new NamingConvention("_", WordCase.UPPER_CASE, WordCase.UPPER_CASE); + /** + * Train Case: capitalized words, separated by hyphens. + */ + public static final NamingConvention TRAIN_CASE = new NamingConvention("-", WordCase.CAPITALIZED, WordCase.CAPITALIZED); + /** + * Cobol Case: uppercase words, separated by hyphens. + */ + public static final NamingConvention COBOL_CASE = new NamingConvention("-", WordCase.UPPER_CASE, WordCase.UPPER_CASE); + /** + * Dummy naming convention that returns the given name as-is. + */ + public static final NamingConvention NULL = new NamingConvention(null, null, null) { + @Override + public String convert(String name) { + return name; + } + }; + + private final String delimiter; + private final WordCase firstWord; + private final WordCase otherWords; + + /** + * Create a naming convention for multiple-word identifiers. Combining an empty delimiter with {@link WordCase#LOWER_CASE} or + * {@link WordCase#UPPER_CASE} is discouraged, as the result cannot be converted to another naming convention. + * + * @param delimiter the word delimiter to use + * @param firstWord the capitalization for the first word + * @param otherWords the capitalization for the other words + */ + public NamingConvention(String delimiter, WordCase firstWord, WordCase otherWords) { + this.delimiter = delimiter; + this.firstWord = firstWord; + this.otherWords = otherWords; + } + + /** + * Convert a text/name to a name in this name case. + * + * @param name the name to convert + * @return the name in this name case + */ + public String convert(String name) { + // First remove accents, extra punctuation, etc. Keep only letters, numbers, and dash & combining punctuation. + String cleanName = NAME_CHARACTER_FILTER.matcher(Normalizer.normalize(name, Normalizer.Form.NFD)).replaceAll(""); + // if (cleanName.isEmpty() ) + + // Then split by boundary characters, and determine the first non-empty word + List words = splitToWords(DELIMITER_BOUNDARY, cleanName); + if (words.isEmpty()) { + throw new IllegalArgumentException("The name contains no letters or numbers"); + } else if (words.size() == 1) { + // The name contains no boundary characters: maybe it is camel case. + words = splitToWords(CAMEL_BOUNDARY, cleanName); + } + + StringBuilder buffer = new StringBuilder((int) (name.length() * 1.2f)); + Iterator iterator = words.iterator(); + buffer.append(firstWord.apply(iterator.next())); + iterator.forEachRemaining(word -> buffer.append(delimiter).append(otherWords.apply(word))); + return buffer.toString(); + } + + /** + * Pattern to match anything that's not a letter, number or delimiter boundary. + */ + private static final Pattern NAME_CHARACTER_FILTER = Pattern.compile("[^\\p{L}\\p{N}\\p{Zs}\\p{Pd}\\p{Pc}]+"); + + /** + * Pattern to match word boundaries using delimiters: any combination of spaces & dash/combining punctuation after a letter or number. + */ + private static final Pattern DELIMITER_BOUNDARY = Pattern.compile("[\\p{Zs}\\p{Pd}\\p{Pc}]+"); + /** + * Pattern to match any word boundary: the (zero-width) point between a lower- and uppercase letter, or any combination of spaces & punctuation. + */ + @SuppressWarnings("RegExpSimplifiable") // bug: the suggestion to remove [] from [\p{L}&&\P{Lu}] is wrong + private static final Pattern CAMEL_BOUNDARY = Pattern.compile("(?<=[\\p{L}&&\\P{Lu}])(?=\\p{Lu})"); + + @NotNull + private List splitToWords(Pattern wordBoundary, String text) { + List words = new ArrayList<>(); + Matcher matcher = wordBoundary.matcher(text); + int start = 0; + while (matcher.find()) { + if (start < matcher.start()) { + // Only add non-empty words + words.add(text.substring(start, matcher.start())); + } + start = matcher.end(); + } + if (start < text.length()) { + // There's text remaining: add it + words.add(text.substring(start)); + } + return words; + } + + /** + * Operator to apply "proper" to a name part. + */ + public enum WordCase implements UnaryOperator { + /** + * Convert the word to lower case. + */ + LOWER_CASE { + @Override + public String apply(String word) { + return word.toLowerCase(Locale.ROOT); + } + }, + /** + * Convert the word to upper case. + */ + UPPER_CASE { + @Override + public String apply(String word) { + return word.toUpperCase(Locale.ROOT); + } + }, + /** + * Convert the word to lower case, except the first character (convert that to upper case). + */ + CAPITALIZED { + @Override + public String apply(String word) { + int firstCodePoint = word.codePointAt(0); + int sizeOfFirstCharacter = Character.charCount(firstCodePoint); + // Use toTitleCase instead of toUpperCase to properly handle digraphs. + return Character.toString(Character.toTitleCase(firstCodePoint)) + word.substring(sizeOfFirstCharacter).toLowerCase(Locale.ROOT); + } + } + } +} diff --git a/src/test/java/opwvhk/avro/SchemaManipulatorTest.java b/src/test/java/opwvhk/avro/SchemaManipulatorTest.java index 2c5a487..9ad285e 100644 --- a/src/test/java/opwvhk/avro/SchemaManipulatorTest.java +++ b/src/test/java/opwvhk/avro/SchemaManipulatorTest.java @@ -1,517 +1,569 @@ package opwvhk.avro; +import net.jimblackler.jsonschemafriend.GenerationException; +import opwvhk.avro.util.NamingConvention; +import org.apache.avro.Schema; +import org.apache.avro.SchemaBuilder; +import org.junit.jupiter.api.Test; + import java.io.IOException; import java.net.URISyntaxException; import java.net.URL; import java.util.List; -import net.jimblackler.jsonschemafriend.GenerationException; -import org.apache.avro.Schema; -import org.junit.jupiter.api.Test; - import static java.util.Objects.requireNonNull; import static org.assertj.core.api.Assertions.assertThat; class SchemaManipulatorTest { - @Test - void testSortedDocumentationViaXsd() throws IOException { - StringBuilder markdown = new StringBuilder(); + @Test + void testSortedDocumentationViaXsd() throws IOException { + StringBuilder markdown = new StringBuilder(); + + URL xsdLocation = getClass().getResource("xml/payload.xsd"); + Schema envelopeSchema = SchemaManipulator + .startFromXsd(xsdLocation, "envelope") + .sortFields() + .alsoDocumentAsMarkdownTable(markdown) + .finish(); + + assertThat(markdown).isEqualToNormalizingUnicode(""" + | Field(path) | Type | Documentation | + |-------------|------|---------------| + | | record | | + | payload | record | The payload is either XML, UTF-8 text or base64 encoded binary data.
Type: The payload is either XML, UTF-8 text or base64 encoded binary data. | + | payload.type? | enum | | + | payload.value? | string | The entire element content, unparsed. | + | source | string | | + | target | string | | + """); + assertThat(envelopeSchema.toString(true)).isEqualTo(Schema.createRecord("ns.envelope", null, null, false, List.of( + new Schema.Field("payload", + Schema.createRecord("ns.payload", "The payload is either XML, UTF-8 text or base64 encoded binary data.", null, false, List.of( + new Schema.Field("type", Schema.createUnion(Schema.createEnum("ns.type", null, null, List.of("xml", "text", "binary")), + Schema.create(Schema.Type.NULL)), null, "xml"), + new Schema.Field("value", Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING)), + "The entire element content, unparsed.", Schema.Field.NULL_DEFAULT_VALUE) + )), "The payload is either XML, UTF-8 text or base64 encoded binary data.", null), + new Schema.Field("source", Schema.create(Schema.Type.STRING)), + new Schema.Field("target", Schema.create(Schema.Type.STRING)) + )).toString(true)); + } - URL xsdLocation = getClass().getResource("xml/payload.xsd"); - Schema envelopeSchema = SchemaManipulator - .startFromXsd(xsdLocation, "envelope") - .sortFields() - .alsoDocumentAsMarkdownTable(markdown) - .finish(); + @Test + void testSortedDocumentationViaJsonSchema() throws GenerationException, URISyntaxException, IOException { + StringBuilder markdown = new StringBuilder(); - assertThat(markdown).isEqualToNormalizingUnicode(""" - | Field(path) | Type | Documentation | - |-------------|------|---------------| - | | record | | - | payload | record | The payload is either XML, UTF-8 text or base64 encoded binary data.
Type: The payload is either XML, UTF-8 text or base64 encoded binary data. | - | payload.type? | enum | | - | payload.value? | string | The entire element content, unparsed. | - | source | string | | - | target | string | | - """); - assertThat(envelopeSchema.toString(true)).isEqualTo(Schema.createRecord("ns.envelope", null, null, false, List.of( - new Schema.Field("payload", - Schema.createRecord("ns.payload", "The payload is either XML, UTF-8 text or base64 encoded binary data.", null, false, List.of( - new Schema.Field("type", Schema.createUnion(Schema.createEnum("ns.type", null, null, List.of("xml", "text", "binary")), - Schema.create(Schema.Type.NULL)), null, "xml"), - new Schema.Field("value", Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING)), - "The entire element content, unparsed.", Schema.Field.NULL_DEFAULT_VALUE) - )), "The payload is either XML, UTF-8 text or base64 encoded binary data.", null), - new Schema.Field("source", Schema.create(Schema.Type.STRING)), - new Schema.Field("target", Schema.create(Schema.Type.STRING)) - )).toString(true)); - } + URL schemaLocation = getClass().getResource("json/TestRecord.schema.json"); + Schema schema = SchemaManipulator + .startFromJsonSchema(requireNonNull(schemaLocation)) + .alsoDocumentAsMarkdownTable(markdown) + .finish(); - @Test - void testSortedDocumentationViaJsonSchema() throws GenerationException, URISyntaxException, IOException { - StringBuilder markdown = new StringBuilder(); + assertThat(markdown).isEqualToNormalizingUnicode(""" + | Field(path) | Type | Documentation | + |-------------|------|---------------| + | | record | Type: Test schema for parsing records. | + | bool | boolean | | + | shortInt? | int | | + | longInt? | long | | + | hugeInt? | decimal(21,0) | | + | defaultInt? | long | | + | singleFloat? | float | | + | doubleFloat? | double | | + | fixedPoint? | decimal(17,6) | | + | defaultNumber? | double | | + | choice | enum | | + | date? | date | | + | time? | time-millis | | + | timestamp? | timestamp-millis | | + | binary? | bytes | | + | hexBytes? | bytes | | + | texts[] | string | | + | weirdStuff? | record | | + | weirdStuff?.explanation? | string | | + | weirdStuff?.fancy? | string | | + | weirdStuff?.hatseflats? | string | | + | weirdStuff?.rabbitHole? | record | | + """); + assertThat(schema.toString(true)).isEqualTo(new Schema.Parser().parse(getClass().getResourceAsStream("json/TestRecordAll.avsc")).toString(true)); + } - URL schemaLocation = getClass().getResource("json/TestRecord.schema.json"); - Schema schema = SchemaManipulator - .startFromJsonSchema(requireNonNull(schemaLocation)) - .alsoDocumentAsMarkdownTable(markdown) - .finish(); + @Test + void testDocumentationViaAvro() throws IOException { + URL avroLocation = getClass().getResource("xml/envelope.avsc"); + String markDownTable = SchemaManipulator.startFromAvro(avroLocation).asMarkdownTable(); - assertThat(markdown).isEqualToNormalizingUnicode(""" - | Field(path) | Type | Documentation | - |-------------|------|---------------| - | | record | Type: Test schema for parsing records. | - | bool | boolean | | - | shortInt? | int | | - | longInt? | long | | - | hugeInt? | decimal(21,0) | | - | defaultInt? | long | | - | singleFloat? | float | | - | doubleFloat? | double | | - | fixedPoint? | decimal(17,6) | | - | defaultNumber? | double | | - | choice | enum | | - | date? | date | | - | time? | time-millis | | - | timestamp? | timestamp-millis | | - | binary? | bytes | | - | hexBytes? | bytes | | - | texts[] | string | | - | weirdStuff? | record | | - | weirdStuff?.explanation? | string | | - | weirdStuff?.fancy? | string | | - | weirdStuff?.hatseflats? | string | | - | weirdStuff?.rabbitHole? | record | | - """); - assertThat(schema.toString(true)).isEqualTo(new Schema.Parser().parse(getClass().getResourceAsStream("json/TestRecordAll.avsc")).toString(true)); - } + assertThat(markDownTable).isEqualTo(""" + | Field(path) | Type | Documentation | + |-------------|------|---------------| + | | record | | + | source | string | | + | target | string | | + | payload | record | The payload is either XML, UTF-8 text or base64 encoded binary data.
Type: The payload is either XML, UTF-8 text or base64 encoded binary data. | + | payload.type? | enum | | + | payload.value? | string | The entire element content, unparsed. | + """); + } - @Test - void testDocumentationViaAvro() throws IOException { - URL avroLocation = getClass().getResource("xml/envelope.avsc"); - String markDownTable = SchemaManipulator.startFromAvro(avroLocation).asMarkdownTable(); + @Test + void testManipulationsWithAliases() { + // Note: manipulating by schema (and field name) also matches on aliases + Schema schema = SchemaManipulator.startFromAvro(SOURCE_SCHEMA) + .renameSchema("ns.envelope", "ns.satchel") + .renameSchema("ns.switch", "ns.toggle") + .renameSchema("ns.hash", "ns.salted") + .renameField("ns.envelope", "target", "destination") + .renameField("ns.envelope", "properties", "extraProperties") + .renameField("ns.payloadRecord", "digestsByName", "namedHashes") + .unwrapArray("ns.envelope", "wrappingField") + .finish(); - assertThat(markDownTable).isEqualTo(""" - | Field(path) | Type | Documentation | - |-------------|------|---------------| - | | record | | - | source | string | | - | target | string | | - | payload | record | The payload is either XML, UTF-8 text or base64 encoded binary data.
Type: The payload is either XML, UTF-8 text or base64 encoded binary data. | - | payload.type? | enum | | - | payload.value? | string | The entire element content, unparsed. | - """); - } + Schema expectedSchema = new Schema.Parser().parse(EXPECTED_SCHEMA); + assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true)); + } - @Test - void testManipulationsWithAliases() { - // Note: manipulating by schema (and field name) also matches on aliases - Schema schema = SchemaManipulator.startFromAvro(SOURCE_SCHEMA) - .renameSchema("ns.envelope", "ns.satchel") - .renameSchema("ns.switch", "ns.toggle") - .renameSchema("ns.hash", "ns.salted") - .renameField("ns.envelope", "target", "destination") - .renameField("ns.envelope", "properties", "extraProperties") - .renameField("ns.payloadRecord", "digestsByName", "namedHashes") - .unwrapArray("ns.envelope", "wrappingField") - .finish(); + @Test + void testManipulationsWithoutAliasesByPath() { + // Note: manipulating by path cannot match on aliases + Schema schema = SchemaManipulator.startFromAvro(SOURCE_SCHEMA) + .renameWithoutAliases() + .renameSchemaAtPath("ns.satchel") + .renameSchemaAtPath("ns.salted", "payload", "digestsByName") + .renameSchemaAtPath("ns.toggle", "payload", "switchList") + .renameFieldAtPath("destination", "target") + .renameFieldAtPath("namedHashes", "payload", "digestsByName") + .unwrapArrayAtPath("nested") + .finish(); - Schema expectedSchema = new Schema.Parser().parse(EXPECTED_SCHEMA); - assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true)); - } + Schema expectedSchema = new Schema.Parser().parse(EXPECTED_SCHEMA_WITHOUT_ALIASES); + assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true)); + } - @Test - void testManipulationsWithoutAliasesByPath() { - // Note: manipulating by path cannot match on aliases - Schema schema = SchemaManipulator.startFromAvro(SOURCE_SCHEMA) - .renameWithoutAliases() - .renameSchemaAtPath("ns.satchel") - .renameSchemaAtPath("ns.salted", "payload", "digestsByName") - .renameSchemaAtPath("ns.toggle", "payload", "switchList") - .renameFieldAtPath("destination", "target") - .renameFieldAtPath("namedHashes", "payload", "digestsByName") - .unwrapArrayAtPath("nested") - .finish(); + @Test + void testUnwrappingArrays1() { + Schema schema = SchemaManipulator.startFromAvro(SOURCE_SCHEMA_WITH_ARRAYS) + .unwrapArrayAtPath("matchByPath") + .unwrapArrays(3) + .finish(); - Schema expectedSchema = new Schema.Parser().parse(EXPECTED_SCHEMA_WITHOUT_ALIASES); - assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true)); - } + Schema expectedSchema = new Schema.Parser().parse(EXPECTED_SCHEMA_WITH_ARRAYS); + assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true)); + } - @Test - void testUnwrappingArrays1() { - Schema schema = SchemaManipulator.startFromAvro(SOURCE_SCHEMA_WITH_ARRAYS) - .unwrapArrayAtPath("matchByPath") - .unwrapArrays(3) - .finish(); + @Test + void testUnwrappingArrays2() { + Schema schema = SchemaManipulator.startFromAvro(SOURCE_SCHEMA_WITH_ARRAYS) + .unwrapArray("ns.WithArrays", "matchByName") + .unwrapArrays(3) + .finish(); - Schema expectedSchema = new Schema.Parser().parse(EXPECTED_SCHEMA_WITH_ARRAYS); - assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true)); - } + Schema expectedSchema = new Schema.Parser().parse(EXPECTED_SCHEMA_WITH_ARRAYS); + assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true)); + } - @Test - void testUnwrappingArrays2() { - Schema schema = SchemaManipulator.startFromAvro(SOURCE_SCHEMA_WITH_ARRAYS) - .unwrapArray("ns.WithArrays", "matchByName") - .unwrapArrays(3) - .finish(); + @Test + void testManipulatingRecursiveSchemas() { + // Note: manipulating by schema (and field name) also matches on aliases + Schema schema = SchemaManipulator.startFromAvro(SOURCE_RECURSIVE_SCHEMA) + .renameField("ns.recursive", "rabbitHole", "droste") + .finish(); - Schema expectedSchema = new Schema.Parser().parse(EXPECTED_SCHEMA_WITH_ARRAYS); - assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true)); - } + Schema expectedSchema = new Schema.Parser().parse(EXPECTED_RECURSIVE_SCHEMA); + assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true)); + } - @Test - void testManipulatingRecursiveSchemas() { - // Note: manipulating by schema (and field name) also matches on aliases - Schema schema = SchemaManipulator.startFromAvro(SOURCE_RECURSIVE_SCHEMA) - .renameField("ns.recursive", "rabbitHole", "droste") - .finish(); + @Test + void testApplyNamingConventions() { + Schema schemaWithoutNamespace = SchemaBuilder.record("simple_name").fields() + .name("field_1").type("string").noDefault() + .name("field2").type("string").noDefault() + .endRecord(); + Schema schemaResultWithoutNamespace = SchemaBuilder.record("simple_name").fields() + .name("field1").type("string").noDefault() + .name("field2").type("string").noDefault() + .endRecord(); + assertThat(new SchemaManipulator(schemaWithoutNamespace).renameWithoutAliases() + .useSchemaNamingConvention(NamingConvention.SNAKE_CASE) + .useFieldNamingConvention(NamingConvention.CAMEL_CASE) + .finish().toString(true) + ).isEqualTo(schemaResultWithoutNamespace.toString(true)); - Schema expectedSchema = new Schema.Parser().parse(EXPECTED_RECURSIVE_SCHEMA); - assertThat(schema.toString(true)).isEqualTo(expectedSchema.toString(true)); - } + Schema schemaWithNamespace = SchemaBuilder.record("simple_name").namespace("somewhereInTheCode").fields() + .name("field_one").type("string").noDefault() + .name("field_two").type("string").noDefault() + .endRecord(); + Schema schemaResultWithNamespace = SchemaBuilder.record("SimpleName").namespace("somewhere_in_the_code").fields() + .name("fieldOne").type("string").noDefault() + .name("fieldTwo").type("string").noDefault() + .endRecord(); + assertThat(new SchemaManipulator(schemaWithNamespace).renameWithoutAliases() + .useSchemaNamingConvention(NamingConvention.SNAKE_CASE, NamingConvention.PASCAL_CASE) + .useFieldNamingConvention(NamingConvention.CAMEL_CASE) + .finish().toString(true) + ).isEqualTo(schemaResultWithNamespace.toString(true)); + } - private static final String SOURCE_SCHEMA = """ - { - "testCase": "SchemaManipulatorTest", - "type": "record", - "name": "envelope", - "namespace": "ns", - "fields": [ - { - "extra": "unused", - "name": "source", - "type": "string" - }, { - "name": "target", - "type": "string" - }, { - "name": "payload", - "type": { - "type": "record", - "name": "payloadRecord", - "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data.", - "fields": [ - { - "name": "type", - "type": [ - { - "type": "enum", - "name": "type", - "symbols": ["xml", "text", "binary"] - }, "null" - ], - "default": "xml" - }, { - "name": "value", - "type": ["null", "string"], - "doc": "The entire element content, unparsed.", - "default": null - }, { - "name": "digestsByName", - "type": {"type": "map", "values": { - "type": "fixed", - "name": "hash", - "size": 16 - }}, - "default": {} - }, { - "name": "hmac", - "type": { - "type": "fixed", - "name": "hmac", - "size": 32 - } - }, { - "name": "switchList", - "type": {"type": "array", "items": { - "type": "enum", - "name": "switch", - "symbols": ["off", "on"] - }}, - "default": [] - }, { - "name": "category", - "type": { - "type": "enum", - "name": "types", - "symbols": ["good", "bad", "ugly"], - "default": "ugly" - }, - "default": "good" - } - ] - }, - "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data." - }, { - "name": "nested", - "aliases": ["wrappingField"], - "type": { - "type": "record", - "name": "WithSingleField", - "fields": [{ - "name": "ignored", - "type": { - "type": "array", - "items": "string" - }, - "default": [] - }] - } - } - ] - }"""; - private static final String EXPECTED_SCHEMA = """ - { - "testCase": "SchemaManipulatorTest", - "type": "record", - "name": "satchel", - "namespace": "ns", - "aliases": ["ns.envelope"], - "fields": [ - { - "extra": "unused", - "name": "source", - "type": "string" - }, { - "name": "destination", - "aliases": ["target"], - "type": "string" - }, { - "name": "payload", - "type": { - "type": "record", - "name": "payloadRecord", - "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data.", - "fields": [ - { - "name": "type", - "type": [ - { - "type": "enum", - "name": "type", - "symbols": ["xml", "text", "binary"] - }, "null" - ], - "default": "xml" - }, { - "name": "value", - "type": ["null", "string"], - "doc": "The entire element content, unparsed.", - "default": null - }, { - "name": "namedHashes", - "aliases": ["digestsByName"], - "type": {"type": "map", "values": { - "type": "fixed", - "name": "salted", - "size": 16 - }}, - "default": {} - }, { - "name": "hmac", - "type": { - "type": "fixed", - "name": "hmac", - "size": 32 - } - }, { - "name": "switchList", - "type": {"type": "array", "items": { - "type": "enum", - "name": "toggle", - "symbols": ["off", "on"] - }}, - "default": [] - }, { - "name": "category", - "type": { - "type": "enum", - "name": "types", - "symbols": ["good", "bad", "ugly"], - "default": "ugly" - }, - "default": "good" - } - ] - }, - "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data." - }, { - "name": "nested", - "aliases": ["wrappingField"], - "type": { - "type": "array", - "items": "string" - }, - "default": [] - } - ] - }"""; - private static final String EXPECTED_SCHEMA_WITHOUT_ALIASES = """ - { - "testCase": "SchemaManipulatorTest", - "type": "record", - "name": "satchel", - "namespace": "ns", - "fields": [ - { - "extra": "unused", - "name": "source", - "type": "string" - }, { - "name": "destination", - "type": "string" - }, { - "name": "payload", - "type": { - "type": "record", - "name": "payloadRecord", - "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data.", - "fields": [ - { - "name": "type", - "type": [ - { - "type": "enum", - "name": "type", - "symbols": ["xml", "text", "binary"] - }, "null" - ], - "default": "xml" - }, { - "name": "value", - "type": ["null", "string"], - "doc": "The entire element content, unparsed.", - "default": null - }, { - "name": "namedHashes", - "type": {"type": "map", "values": { - "type": "fixed", - "name": "salted", - "size": 16 - }}, - "default": {} - }, { - "name": "hmac", - "type": { - "type": "fixed", - "name": "hmac", - "size": 32 - } - }, { - "name": "switchList", - "type": {"type": "array", "items": { - "type": "enum", - "name": "toggle", - "symbols": ["off", "on"] - }}, - "default": [] - }, { - "name": "category", - "type": { - "type": "enum", - "name": "types", - "symbols": ["good", "bad", "ugly"], - "default": "ugly" - }, - "default": "good" - } - ] - }, - "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data." - }, { - "name": "nested", - "aliases": ["wrappingField"], - "type": { - "type": "array", - "items": "string" - }, - "default": [] - } - ] - }"""; + private static final String SOURCE_SCHEMA = """ + { + "testCase": "SchemaManipulatorTest", + "type": "record", + "name": "envelope", + "namespace": "ns", + "fields": [ + { + "extra": "unused", + "name": "source", + "type": "string" + }, { + "name": "target", + "type": "string" + }, { + "name": "payload", + "type": { + "type": "record", + "name": "payloadRecord", + "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data.", + "fields": [ + { + "name": "type", + "type": [ + { + "type": "enum", + "name": "type", + "symbols": ["xml", "text", "binary"] + }, "null" + ], + "default": "xml" + }, { + "name": "value", + "type": ["null", "string"], + "doc": "The entire element content, unparsed.", + "default": null + }, { + "name": "digestsByName", + "type": {"type": "map", "values": { + "type": "fixed", + "name": "hash", + "size": 16 + }}, + "default": {} + }, { + "name": "hmac", + "type": { + "type": "fixed", + "name": "hmac", + "size": 32 + } + }, { + "name": "switchList", + "type": {"type": "array", "items": { + "type": "enum", + "name": "switch", + "symbols": ["off", "on"] + }}, + "default": [] + }, { + "name": "category", + "type": { + "type": "enum", + "name": "types", + "symbols": ["good", "bad", "ugly"], + "default": "ugly" + }, + "default": "good" + } + ] + }, + "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data." + }, { + "name": "nested", + "aliases": ["wrappingField"], + "type": { + "type": "record", + "name": "WithSingleField", + "fields": [{ + "name": "ignored", + "type": { + "type": "array", + "items": "string" + }, + "default": [] + }] + } + } + ] + }"""; + private static final String EXPECTED_SCHEMA = """ + { + "testCase": "SchemaManipulatorTest", + "type": "record", + "name": "satchel", + "namespace": "ns", + "aliases": ["ns.envelope"], + "fields": [ + { + "extra": "unused", + "name": "source", + "type": "string" + }, { + "name": "destination", + "aliases": ["target"], + "type": "string" + }, { + "name": "payload", + "type": { + "type": "record", + "name": "payloadRecord", + "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data.", + "fields": [ + { + "name": "type", + "type": [ + { + "type": "enum", + "name": "type", + "symbols": ["xml", "text", "binary"] + }, "null" + ], + "default": "xml" + }, { + "name": "value", + "type": ["null", "string"], + "doc": "The entire element content, unparsed.", + "default": null + }, { + "name": "namedHashes", + "aliases": ["digestsByName"], + "type": {"type": "map", "values": { + "type": "fixed", + "name": "salted", + "size": 16 + }}, + "default": {} + }, { + "name": "hmac", + "type": { + "type": "fixed", + "name": "hmac", + "size": 32 + } + }, { + "name": "switchList", + "type": {"type": "array", "items": { + "type": "enum", + "name": "toggle", + "symbols": ["off", "on"] + }}, + "default": [] + }, { + "name": "category", + "type": { + "type": "enum", + "name": "types", + "symbols": ["good", "bad", "ugly"], + "default": "ugly" + }, + "default": "good" + } + ] + }, + "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data." + }, { + "name": "nested", + "aliases": ["wrappingField"], + "type": { + "type": "array", + "items": "string" + }, + "default": [] + } + ] + }"""; + private static final String EXPECTED_SCHEMA_WITHOUT_ALIASES = """ + { + "testCase": "SchemaManipulatorTest", + "type": "record", + "name": "satchel", + "namespace": "ns", + "fields": [ + { + "extra": "unused", + "name": "source", + "type": "string" + }, { + "name": "destination", + "type": "string" + }, { + "name": "payload", + "type": { + "type": "record", + "name": "payloadRecord", + "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data.", + "fields": [ + { + "name": "type", + "type": [ + { + "type": "enum", + "name": "type", + "symbols": ["xml", "text", "binary"] + }, "null" + ], + "default": "xml" + }, { + "name": "value", + "type": ["null", "string"], + "doc": "The entire element content, unparsed.", + "default": null + }, { + "name": "namedHashes", + "type": {"type": "map", "values": { + "type": "fixed", + "name": "salted", + "size": 16 + }}, + "default": {} + }, { + "name": "hmac", + "type": { + "type": "fixed", + "name": "hmac", + "size": 32 + } + }, { + "name": "switchList", + "type": {"type": "array", "items": { + "type": "enum", + "name": "toggle", + "symbols": ["off", "on"] + }}, + "default": [] + }, { + "name": "category", + "type": { + "type": "enum", + "name": "types", + "symbols": ["good", "bad", "ugly"], + "default": "ugly" + }, + "default": "good" + } + ] + }, + "doc": "The payload is either XML, UTF-8 text or base64 encoded binary data." + }, { + "name": "nested", + "aliases": ["wrappingField"], + "type": { + "type": "array", + "items": "string" + }, + "default": [] + } + ] + }"""; - private static final String SOURCE_SCHEMA_WITH_ARRAYS = """ - { - "type": "record", - "namespace": "ns", - "name": "WithArrays", - "fields": [ - {"name": "notARecord", "type": ["int", "string"], "doc": "this and the nest field also increase coverage on union checks"}, - {"name": "alsoNotARecord", "type": ["int", "string", "null"]}, - {"name": "tooLargeRecord", "type": { "type": "record", "name": "pair", "fields": [ - {"name": "one", "type": "int"}, {"name": "two", "type": "int"} - ]}}, - {"name": "notAWrappedArray", "type": {"type": "record", "name": "nested", "fields": [ - {"name": "numbers", "type": {"type": "record", "name": "wrappedNumbers", - "doc": "This wrapped array is nested in a single-field record to increase test coverage twice: numbers is not an array, and to not match on wrapping schema name", - "fields": [ - {"name": "numberArray", "type": {"type": "array", "items": "int"}, "default": []} - ]}} - ]}}, - {"name": "dependencies", "type": ["null", {"type": "record", "name": "whatever", "fields": [ - {"name": "dependency", "type": {"type": "array", "items": "string"}, "default": []} - ]}], "default": null}, - {"name": "matchByPath", "aliases": ["matchByName"], "type": {"type": "record", "name": "wrappedStrings", "fields": [ - {"name": "textList", "type": {"type": "array", "items": "string"}} - ]}}, - {"name": "anotherOne", "type": {"type": "record", "name": "wrappedEnums", "fields": [ - {"name": "keptIntact", "type": {"type": "array", "items": {"type": "enum", "name": "switch", "symbols": ["on", "off"]}}} - ]}} - ] - }"""; - private static final String EXPECTED_SCHEMA_WITH_ARRAYS = """ - { - "type": "record", - "namespace": "ns", - "name": "WithArrays", - "fields": [ - {"name": "notARecord", "type": ["int", "string"], "doc": "this and the nest field also increase coverage on union checks"}, - {"name": "alsoNotARecord", "type": ["int", "string", "null"]}, - {"name": "tooLargeRecord", "type": { "type": "record", "name": "pair", "fields": [ - {"name": "one", "type": "int"}, {"name": "two", "type": "int"} - ]}}, - {"name": "notAWrappedArray", "type": {"type": "record", "name": "nested", "fields": [ - {"name": "numbers", "type": {"type": "array", "items": "int"}, "default": []} - ]}}, - {"name": "dependencies", "type": {"type": "array", "items": "string"}, "default": []}, - {"name": "matchByPath", "aliases": ["matchByName"], "type": {"type": "array", "items": "string"}}, - {"name": "anotherOne", "type": {"type": "record", "name": "wrappedEnums", "fields": [ - {"name": "keptIntact", "type": {"type": "array", "items": {"type": "enum", "name": "switch", "symbols": ["on", "off"]}}} - ]}} - ] - }"""; + private static final String SOURCE_SCHEMA_WITH_ARRAYS = """ + { + "type": "record", + "namespace": "ns", + "name": "WithArrays", + "fields": [ + {"name": "notARecord", "type": ["int", "string"], "doc": "this and the nest field also increase coverage on union checks"}, + {"name": "alsoNotARecord", "type": ["int", "string", "null"]}, + {"name": "tooLargeRecord", "type": { "type": "record", "name": "pair", "fields": [ + {"name": "one", "type": "int"}, {"name": "two", "type": "int"} + ]}}, + {"name": "notAWrappedArray", "type": {"type": "record", "name": "nested", "fields": [ + {"name": "numbers", "type": {"type": "record", "name": "wrappedNumbers", + "doc": "This wrapped array is nested in a single-field record to increase test coverage twice: numbers is not an array, and to not match on wrapping schema name", + "fields": [ + {"name": "numberArray", "type": {"type": "array", "items": "int"}, "default": []} + ]}} + ]}}, + {"name": "dependencies", "type": ["null", {"type": "record", "name": "whatever", "fields": [ + {"name": "dependency", "type": {"type": "array", "items": "string"}, "default": []} + ]}], "default": null}, + {"name": "matchByPath", "aliases": ["matchByName"], "type": {"type": "record", "name": "wrappedStrings", "fields": [ + {"name": "textList", "type": {"type": "array", "items": "string"}} + ]}}, + {"name": "anotherOne", "type": {"type": "record", "name": "wrappedEnums", "fields": [ + {"name": "keptIntact", "type": {"type": "array", "items": {"type": "enum", "name": "switch", "symbols": ["on", "off"]}}} + ]}} + ] + }"""; + private static final String EXPECTED_SCHEMA_WITH_ARRAYS = """ + { + "type": "record", + "namespace": "ns", + "name": "WithArrays", + "fields": [ + {"name": "notARecord", "type": ["int", "string"], "doc": "this and the nest field also increase coverage on union checks"}, + {"name": "alsoNotARecord", "type": ["int", "string", "null"]}, + {"name": "tooLargeRecord", "type": { "type": "record", "name": "pair", "fields": [ + {"name": "one", "type": "int"}, {"name": "two", "type": "int"} + ]}}, + {"name": "notAWrappedArray", "type": {"type": "record", "name": "nested", "fields": [ + {"name": "numbers", "type": {"type": "array", "items": "int"}, "default": []} + ]}}, + {"name": "dependencies", "type": {"type": "array", "items": "string"}, "default": []}, + {"name": "matchByPath", "aliases": ["matchByName"], "type": {"type": "array", "items": "string"}}, + {"name": "anotherOne", "type": {"type": "record", "name": "wrappedEnums", "fields": [ + {"name": "keptIntact", "type": {"type": "array", "items": {"type": "enum", "name": "switch", "symbols": ["on", "off"]}}} + ]}} + ] + }"""; - private static final String SOURCE_RECURSIVE_SCHEMA = """ - { - "testCase": "SchemaManipulatorTest", - "type": "record", - "name": "recursive", - "namespace": "ns", - "fields": [ - { - "name": "name", - "type": "string" - }, { - "name": "rabbitHole", - "type": "recursive" - } - ] - }"""; - private static final String EXPECTED_RECURSIVE_SCHEMA = """ - { - "testCase": "SchemaManipulatorTest", - "type": "record", - "name": "recursive", - "namespace": "ns", - "fields": [ - { - "name": "name", - "type": "string" - }, { - "name": "droste", - "aliases": ["rabbitHole"], - "type": "recursive" - } - ] - }"""; + private static final String SOURCE_RECURSIVE_SCHEMA = """ + { + "testCase": "SchemaManipulatorTest", + "type": "record", + "name": "recursive", + "namespace": "ns", + "fields": [ + { + "name": "name", + "type": "string" + }, { + "name": "rabbitHole", + "type": "recursive" + } + ] + }"""; + private static final String EXPECTED_RECURSIVE_SCHEMA = """ + { + "testCase": "SchemaManipulatorTest", + "type": "record", + "name": "recursive", + "namespace": "ns", + "fields": [ + { + "name": "name", + "type": "string" + }, { + "name": "droste", + "aliases": ["rabbitHole"], + "type": "recursive" + } + ] + }"""; + private static final String EXPECTED_RECURSIVE_SCHEMA_NAMING_CONVENTIONS = """ + { + "testCase": "SchemaManipulatorTest", + "type": "record", + "name": "RECURSIVE", + "aliases": ["recursive"], + "namespace": "ns", + "fields": [ + { + "name": "Name", + "aliases": ["name"], + "type": "string" + }, { + "name": "droste", + "aliases": ["rabbitHole"], + "type": "RECURSIVE" + } + ] + }"""; } diff --git a/src/test/java/opwvhk/avro/util/NamingConventionTest.java b/src/test/java/opwvhk/avro/util/NamingConventionTest.java new file mode 100644 index 0000000..95c32a8 --- /dev/null +++ b/src/test/java/opwvhk/avro/util/NamingConventionTest.java @@ -0,0 +1,101 @@ +/* + * Copyright © Oscar Westra van Holthe - Kind + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package opwvhk.avro.util; + +import org.junit.jupiter.api.Test; + +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class NamingConventionTest { + private static final String EXAMPLE = "multiple word identifier"; + private static final Map EXAMPLES = Map.of( + NamingConvention.PASCAL_CASE, "MultipleWordIdentifier", + NamingConvention.CAMEL_CASE, "multipleWordIdentifier", + NamingConvention.SNAKE_CASE, "multiple_word_identifier", + NamingConvention.KEBAB_CASE, "multiple-word-identifier", + NamingConvention.PASCAL_SNAKE_CASE, "Multiple_Word_Identifier", + NamingConvention.CAMEL_SNAKE_CASE, "multiple_Word_Identifier", + NamingConvention.SCREAMING_SNAKE_CASE, "MULTIPLE_WORD_IDENTIFIER", + NamingConvention.TRAIN_CASE, "Multiple-Word-Identifier", + NamingConvention.COBOL_CASE, "MULTIPLE-WORD-IDENTIFIER" + ); + + @Test + void validatePredefinedNamingConventions() { + assertThat(NamingConvention.PASCAL_CASE.convert("Pascal Case")).isEqualTo("PascalCase"); + assertThat(NamingConvention.CAMEL_CASE.convert("Camel Case")).isEqualTo("camelCase"); + assertThat(NamingConvention.SNAKE_CASE.convert("Snake Case")).isEqualTo("snake_case"); + assertThat(NamingConvention.KEBAB_CASE.convert("Kebab Case")).isEqualTo("kebab-case"); + assertThat(NamingConvention.PASCAL_SNAKE_CASE.convert("Pascal Snake Case")).isEqualTo("Pascal_Snake_Case"); + assertThat(NamingConvention.CAMEL_SNAKE_CASE.convert("Camel Snake Case")).isEqualTo("camel_Snake_Case"); + assertThat(NamingConvention.SCREAMING_SNAKE_CASE.convert("Screaming Snake Case")).isEqualTo("SCREAMING_SNAKE_CASE"); + assertThat(NamingConvention.TRAIN_CASE.convert("Train Case")).isEqualTo("Train-Case"); + assertThat(NamingConvention.COBOL_CASE.convert("Cobol Case")).isEqualTo("COBOL-CASE"); + } + + @Test + void verifyPredefinedNamingConventionsAreDeterministic() { + List namingConventions = new ArrayList<>(EXAMPLES.keySet()); + Collections.shuffle(namingConventions); + + String name = EXAMPLE; + for (NamingConvention namingConvention : namingConventions) { + name = namingConvention.convert(name); + assertThat(name).isEqualTo(EXAMPLES.get(namingConvention)); + } + } + + @Test + void validateWordCase() { + assertThat(NamingConvention.WordCase.LOWER_CASE.apply("MiXeD")).isEqualTo("mixed"); + assertThat(NamingConvention.WordCase.UPPER_CASE.apply("MiXeD")).isEqualTo("MIXED"); + assertThat(NamingConvention.WordCase.CAPITALIZED.apply("MiXeD")).isEqualTo("Mixed"); + } + + @Test + @SuppressWarnings("SpellCheckingInspection") + void validateWordSplitting() { + NamingConvention dummy = new NamingConvention(" ", NamingConvention.WordCase.CAPITALIZED, NamingConvention.WordCase.LOWER_CASE); + + // Text with accents, various dashes & spaces, and nonsense characters + assertThat(dummy.convert("th↔︎Ïs—IS–a Sèn🛫ténçE")).isEqualTo("This is a sentence"); + // Greek text with accents, an underscore and various spaces / space marks ("Αυτή είναι μια πρόταση" translates to "This is a sentence") + assertThat(dummy.convert("αυτή είΝαι_μΙα﹏ΠΡόταση")).isEqualTo("Αυτη ειναι μια προταση"); + // Text without dashes & spaces + assertThat(dummy.convert("th↔︎ïsIsAnotherSèn🛫ténçe")).isEqualTo("This is another sentence"); + + assertThatThrownBy(() -> dummy.convert("🛫  ﹏_ ↔︎")).isInstanceOf(IllegalArgumentException.class); + } + + @Test + void ensureTheNullConventionDoesNothing() { + byte[] randomBytes = new byte[16]; + new Random().nextBytes(randomBytes); + String randomString = new String(randomBytes, StandardCharsets.UTF_8); + + assertThat(NamingConvention.NULL.convert(randomString)).isEqualTo(randomString); + } +}