From 5bb887e997d6e3c2996445d4a9b8d63032434974 Mon Sep 17 00:00:00 2001 From: Felix B Date: Wed, 12 Sep 2018 13:31:39 +0200 Subject: [PATCH 1/5] added english character conversion --- .../languagespecifics/ChooseLanguage.java | 16 +++-- .../en/EnglishSpecialCharacterConversion.java | 65 +++++++++++++++++++ .../en/SpecialCharacterConversion.java | 41 ++++++++++++ .../amyassist/amy/natlang/nl/NLLexer.java | 1 + 4 files changed, 119 insertions(+), 4 deletions(-) create mode 100644 natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/EnglishSpecialCharacterConversion.java create mode 100644 natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/SpecialCharacterConversion.java diff --git a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/ChooseLanguage.java b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/ChooseLanguage.java index 480ec82a3..f12954f84 100644 --- a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/ChooseLanguage.java +++ b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/ChooseLanguage.java @@ -23,10 +23,7 @@ package de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics; -import de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en.EnglishContraction; -import de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en.EnglishDateTimeUtility; -import de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en.EnglishNumberConversion; -import de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en.EnglishStemmer; +import de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en.*; /** * @@ -37,6 +34,7 @@ public class ChooseLanguage { private final NumberConversion numberConversion; private final DateTimeUtility timeUtility; private final Contraction contraction; + private final SpecialCharacterConversion specialCharacterConversion; /** * constructor for choose language @@ -58,6 +56,7 @@ public ChooseLanguage(String language, boolean stemmerEnabled) { this.numberConversion = new EnglishNumberConversion(); this.timeUtility = new EnglishDateTimeUtility(); this.contraction = new EnglishContraction(); + this.specialCharacterConversion = new EnglishSpecialCharacterConversion(); break; } } @@ -97,4 +96,13 @@ public DateTimeUtility getTimeUtility() { public Contraction getContraction() { return this.contraction; } + + /** + * Get's {@link #specialCharacterConversion conversion} + * + * @return contraction + */ + public SpecialCharacterConversion getSpecialCharacterConversion() { + return this.specialCharacterConversion; + } } diff --git a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/EnglishSpecialCharacterConversion.java b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/EnglishSpecialCharacterConversion.java new file mode 100644 index 000000000..9c9c578df --- /dev/null +++ b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/EnglishSpecialCharacterConversion.java @@ -0,0 +1,65 @@ +/* + * This source file is part of the Amy open source project. + * For more information see github.com/AmyAssist + * + * Copyright (c) 2018 the Amy project authors. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information see notice.md + */ + +package de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en; + +import java.util.HashMap; +import java.util.Map; + +/** + * Special Character conversion for the english language + * + * @author Felix Burk + */ +public class EnglishSpecialCharacterConversion implements SpecialCharacterConversion { + + private Map conversion; + + public EnglishSpecialCharacterConversion() { + this.conversion = new HashMap<>(); + + this.conversion.put("%", "percent"); + this.conversion.put("$", "dollar"); + this.conversion.put("€", "euro"); + this.conversion.put("¢", "cent"); + this.conversion.put("£", "pound"); + this.conversion.put("¥", "yen"); + this.conversion.put("°", "degree"); + } + + /** + * @see de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en.SpecialCharacterConversion#format(java.lang.String) + */ + @Override + public String format(String toFormat) { + //this is kinda ugly buuut regex and special characters is pretty bad + for(String c : this.conversion.keySet()) { + if(toFormat.contains(c)) { + toFormat.replaceAll(c, this.conversion.get(c)); + } + } + + return toFormat; + } + +} diff --git a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/SpecialCharacterConversion.java b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/SpecialCharacterConversion.java new file mode 100644 index 000000000..c6c453156 --- /dev/null +++ b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/SpecialCharacterConversion.java @@ -0,0 +1,41 @@ +/* + * This source file is part of the Amy open source project. + * For more information see github.com/AmyAssist + * + * Copyright (c) 2018 the Amy project authors. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information see notice.md + */ + +package de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en; + +/** + * Utility interface to convert special characters + * + * @author Felix Burk + */ +public interface SpecialCharacterConversion { + + /** + * formats special characters to their language counterpars + * e.g. % to percent + * @param toFormat input to format + * @return formatted string + */ + String format(String toFormat); + +} \ No newline at end of file diff --git a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/nl/NLLexer.java b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/nl/NLLexer.java index b248f3984..f1b2fa3cb 100644 --- a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/nl/NLLexer.java +++ b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/nl/NLLexer.java @@ -90,6 +90,7 @@ public List tokenize(String nlInput) { toLex = this.language.getTimeUtility().formatTime(toLex); toLex = this.language.getTimeUtility().formatDate(toLex); toLex = this.language.getContraction().disassemblingContraction(toLex); + toLex = this.language.getSpecialCharacterConversion().format(toLex); StringBuilder currentWord = new StringBuilder(); if (!toLex.isEmpty()) { From d58c3048bd913a682288f881b5ae2ade40ed0e2a Mon Sep 17 00:00:00 2001 From: Felix B Date: Wed, 12 Sep 2018 13:46:02 +0200 Subject: [PATCH 2/5] sonarcloud --- .../en/EnglishSpecialCharacterConversion.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/EnglishSpecialCharacterConversion.java b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/EnglishSpecialCharacterConversion.java index 9c9c578df..124d8582c 100644 --- a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/EnglishSpecialCharacterConversion.java +++ b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/EnglishSpecialCharacterConversion.java @@ -25,6 +25,7 @@ import java.util.HashMap; import java.util.Map; +import java.util.Map.Entry; /** * Special Character conversion for the english language @@ -52,14 +53,15 @@ public EnglishSpecialCharacterConversion() { */ @Override public String format(String toFormat) { + String result = toFormat; //this is kinda ugly buuut regex and special characters is pretty bad - for(String c : this.conversion.keySet()) { - if(toFormat.contains(c)) { - toFormat.replaceAll(c, this.conversion.get(c)); + for(Entry e : this.conversion.entrySet()) { + if(result.contains(e.getKey())) { + result = toFormat.replaceAll(e.getKey(), e.getValue()); } } - return toFormat; + return result; } } From 5e734af27e054e5f2a12104e4be2157435085901 Mon Sep 17 00:00:00 2001 From: Felix B Date: Wed, 12 Sep 2018 13:56:13 +0200 Subject: [PATCH 3/5] fixed tests --- .../iaas/amyassist/amy/core/natlang/nl/NLLexerTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/natlang/src/test/java/de/unistuttgart/iaas/amyassist/amy/core/natlang/nl/NLLexerTest.java b/natlang/src/test/java/de/unistuttgart/iaas/amyassist/amy/core/natlang/nl/NLLexerTest.java index 7af23f4c0..be73bd60d 100644 --- a/natlang/src/test/java/de/unistuttgart/iaas/amyassist/amy/core/natlang/nl/NLLexerTest.java +++ b/natlang/src/test/java/de/unistuttgart/iaas/amyassist/amy/core/natlang/nl/NLLexerTest.java @@ -73,8 +73,8 @@ public void setup() { public static Stream badCharacters() { // every ascii character except numbers of letters return IntStream.range(0, 128).mapToObj(i -> (char) i) - .filter(c -> (0 > c && c > 33) || (34 < c && c != 37 && !(43 < c && c < 47) && c < 48) - || (59 < c && c < 65 && c != 63) || (90 < c && c < 97) || 122 < c); + .filter(c -> ((0 > c && c > 33) || (34 < c && !(43 < c && c < 47) && c != 36 && c != 37 && c < 48) + || (59 < c && c < 65 && c != 63) || (90 < c && c < 97) || 122 < c)); } /** From 0a3d5440afad39353b5cafb4345da4ac4e87cb14 Mon Sep 17 00:00:00 2001 From: Felix B Date: Wed, 12 Sep 2018 14:04:07 +0200 Subject: [PATCH 4/5] newline at end of file --- .../languagespecifics/en/SpecialCharacterConversion.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/SpecialCharacterConversion.java b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/SpecialCharacterConversion.java index c6c453156..c9d63b22c 100644 --- a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/SpecialCharacterConversion.java +++ b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/SpecialCharacterConversion.java @@ -38,4 +38,4 @@ public interface SpecialCharacterConversion { */ String format(String toFormat); -} \ No newline at end of file +} From 2e5f7fdbbbdcecc1909e8bed57fe04ec8bcd6247 Mon Sep 17 00:00:00 2001 From: Felix B Date: Wed, 12 Sep 2018 14:21:28 +0200 Subject: [PATCH 5/5] changed package of SpecialCharacterConversion --- .../{en => }/SpecialCharacterConversion.java | 2 +- .../en/EnglishSpecialCharacterConversion.java | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) rename natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/{en => }/SpecialCharacterConversion.java (99%) diff --git a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/SpecialCharacterConversion.java b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/SpecialCharacterConversion.java similarity index 99% rename from natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/SpecialCharacterConversion.java rename to natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/SpecialCharacterConversion.java index c6c453156..802627cb2 100644 --- a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/SpecialCharacterConversion.java +++ b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/SpecialCharacterConversion.java @@ -21,7 +21,7 @@ * For more information see notice.md */ -package de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en; +package de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics; /** * Utility interface to convert special characters diff --git a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/EnglishSpecialCharacterConversion.java b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/EnglishSpecialCharacterConversion.java index 124d8582c..4c8d18f77 100644 --- a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/EnglishSpecialCharacterConversion.java +++ b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/EnglishSpecialCharacterConversion.java @@ -27,6 +27,8 @@ import java.util.Map; import java.util.Map.Entry; +import de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.SpecialCharacterConversion; + /** * Special Character conversion for the english language * @@ -49,7 +51,7 @@ public EnglishSpecialCharacterConversion() { } /** - * @see de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en.SpecialCharacterConversion#format(java.lang.String) + * @see de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.SpecialCharacterConversion#format(java.lang.String) */ @Override public String format(String toFormat) {