diff --git a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/ChooseLanguage.java b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/ChooseLanguage.java index 480ec82a3..f12954f84 100644 --- a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/ChooseLanguage.java +++ b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/ChooseLanguage.java @@ -23,10 +23,7 @@ package de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics; -import de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en.EnglishContraction; -import de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en.EnglishDateTimeUtility; -import de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en.EnglishNumberConversion; -import de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en.EnglishStemmer; +import de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en.*; /** * @@ -37,6 +34,7 @@ public class ChooseLanguage { private final NumberConversion numberConversion; private final DateTimeUtility timeUtility; private final Contraction contraction; + private final SpecialCharacterConversion specialCharacterConversion; /** * constructor for choose language @@ -58,6 +56,7 @@ public ChooseLanguage(String language, boolean stemmerEnabled) { this.numberConversion = new EnglishNumberConversion(); this.timeUtility = new EnglishDateTimeUtility(); this.contraction = new EnglishContraction(); + this.specialCharacterConversion = new EnglishSpecialCharacterConversion(); break; } } @@ -97,4 +96,13 @@ public DateTimeUtility getTimeUtility() { public Contraction getContraction() { return this.contraction; } + + /** + * Get's {@link #specialCharacterConversion conversion} + * + * @return contraction + */ + public SpecialCharacterConversion getSpecialCharacterConversion() { + return this.specialCharacterConversion; + } } diff --git a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/SpecialCharacterConversion.java b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/SpecialCharacterConversion.java new file mode 100644 index 000000000..87f77cfe2 --- /dev/null +++ b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/SpecialCharacterConversion.java @@ -0,0 +1,41 @@ +/* + * This source file is part of the Amy open source project. + * For more information see github.com/AmyAssist + * + * Copyright (c) 2018 the Amy project authors. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information see notice.md + */ + +package de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics; + +/** + * Utility interface to convert special characters + * + * @author Felix Burk + */ +public interface SpecialCharacterConversion { + + /** + * formats special characters to their language counterpars + * e.g. % to percent + * @param toFormat input to format + * @return formatted string + */ + String format(String toFormat); + +} diff --git a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/EnglishSpecialCharacterConversion.java b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/EnglishSpecialCharacterConversion.java new file mode 100644 index 000000000..4c8d18f77 --- /dev/null +++ b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/languagespecifics/en/EnglishSpecialCharacterConversion.java @@ -0,0 +1,69 @@ +/* + * This source file is part of the Amy open source project. + * For more information see github.com/AmyAssist + * + * Copyright (c) 2018 the Amy project authors. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For more information see notice.md + */ + +package de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en; + +import java.util.HashMap; +import java.util.Map; +import java.util.Map.Entry; + +import de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.SpecialCharacterConversion; + +/** + * Special Character conversion for the english language + * + * @author Felix Burk + */ +public class EnglishSpecialCharacterConversion implements SpecialCharacterConversion { + + private Map conversion; + + public EnglishSpecialCharacterConversion() { + this.conversion = new HashMap<>(); + + this.conversion.put("%", "percent"); + this.conversion.put("$", "dollar"); + this.conversion.put("€", "euro"); + this.conversion.put("¢", "cent"); + this.conversion.put("£", "pound"); + this.conversion.put("¥", "yen"); + this.conversion.put("°", "degree"); + } + + /** + * @see de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.SpecialCharacterConversion#format(java.lang.String) + */ + @Override + public String format(String toFormat) { + String result = toFormat; + //this is kinda ugly buuut regex and special characters is pretty bad + for(Entry e : this.conversion.entrySet()) { + if(result.contains(e.getKey())) { + result = toFormat.replaceAll(e.getKey(), e.getValue()); + } + } + + return result; + } + +} diff --git a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/nl/NLLexer.java b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/nl/NLLexer.java index b248f3984..f1b2fa3cb 100644 --- a/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/nl/NLLexer.java +++ b/natlang/src/main/java/de/unistuttgart/iaas/amyassist/amy/natlang/nl/NLLexer.java @@ -90,6 +90,7 @@ public List tokenize(String nlInput) { toLex = this.language.getTimeUtility().formatTime(toLex); toLex = this.language.getTimeUtility().formatDate(toLex); toLex = this.language.getContraction().disassemblingContraction(toLex); + toLex = this.language.getSpecialCharacterConversion().format(toLex); StringBuilder currentWord = new StringBuilder(); if (!toLex.isEmpty()) { diff --git a/natlang/src/test/java/de/unistuttgart/iaas/amyassist/amy/core/natlang/nl/NLLexerTest.java b/natlang/src/test/java/de/unistuttgart/iaas/amyassist/amy/core/natlang/nl/NLLexerTest.java index 7af23f4c0..be73bd60d 100644 --- a/natlang/src/test/java/de/unistuttgart/iaas/amyassist/amy/core/natlang/nl/NLLexerTest.java +++ b/natlang/src/test/java/de/unistuttgart/iaas/amyassist/amy/core/natlang/nl/NLLexerTest.java @@ -73,8 +73,8 @@ public void setup() { public static Stream badCharacters() { // every ascii character except numbers of letters return IntStream.range(0, 128).mapToObj(i -> (char) i) - .filter(c -> (0 > c && c > 33) || (34 < c && c != 37 && !(43 < c && c < 47) && c < 48) - || (59 < c && c < 65 && c != 63) || (90 < c && c < 97) || 122 < c); + .filter(c -> ((0 > c && c > 33) || (34 < c && !(43 < c && c < 47) && c != 36 && c != 37 && c < 48) + || (59 < c && c < 65 && c != 63) || (90 < c && c < 97) || 122 < c)); } /**