Skip to content
This repository has been archived by the owner on Mar 5, 2020. It is now read-only.

Commit

Permalink
Merge pull request #454 from AmyAssist/characterConversion
Browse files Browse the repository at this point in the history
added english character conversion
  • Loading branch information
buddy200 authored Sep 12, 2018
2 parents d52ea36 + 35e108d commit a597315
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@

package de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics;

import de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en.EnglishContraction;
import de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en.EnglishDateTimeUtility;
import de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en.EnglishNumberConversion;
import de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en.EnglishStemmer;
import de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en.*;

/**
*
Expand All @@ -37,6 +34,7 @@ public class ChooseLanguage {
private final NumberConversion numberConversion;
private final DateTimeUtility timeUtility;
private final Contraction contraction;
private final SpecialCharacterConversion specialCharacterConversion;

/**
* constructor for choose language
Expand All @@ -58,6 +56,7 @@ public ChooseLanguage(String language, boolean stemmerEnabled) {
this.numberConversion = new EnglishNumberConversion();
this.timeUtility = new EnglishDateTimeUtility();
this.contraction = new EnglishContraction();
this.specialCharacterConversion = new EnglishSpecialCharacterConversion();
break;
}
}
Expand Down Expand Up @@ -97,4 +96,13 @@ public DateTimeUtility getTimeUtility() {
public Contraction getContraction() {
return this.contraction;
}

/**
* Get's {@link #specialCharacterConversion conversion}
*
* @return contraction
*/
public SpecialCharacterConversion getSpecialCharacterConversion() {
return this.specialCharacterConversion;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* This source file is part of the Amy open source project.
* For more information see github.com/AmyAssist
*
* Copyright (c) 2018 the Amy project authors.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more information see notice.md
*/

package de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics;

/**
* Utility interface to convert special characters
*
* @author Felix Burk
*/
public interface SpecialCharacterConversion {

/**
* formats special characters to their language counterpars
* e.g. % to percent
* @param toFormat input to format
* @return formatted string
*/
String format(String toFormat);

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* This source file is part of the Amy open source project.
* For more information see github.com/AmyAssist
*
* Copyright (c) 2018 the Amy project authors.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more information see notice.md
*/

package de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.en;

import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;

import de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.SpecialCharacterConversion;

/**
* Special Character conversion for the english language
*
* @author Felix Burk
*/
public class EnglishSpecialCharacterConversion implements SpecialCharacterConversion {

private Map<String, String> conversion;

public EnglishSpecialCharacterConversion() {
this.conversion = new HashMap<>();

this.conversion.put("%", "percent");
this.conversion.put("$", "dollar");
this.conversion.put("€", "euro");
this.conversion.put("¢", "cent");
this.conversion.put("£", "pound");
this.conversion.put("¥", "yen");
this.conversion.put("°", "degree");
}

/**
* @see de.unistuttgart.iaas.amyassist.amy.natlang.languagespecifics.SpecialCharacterConversion#format(java.lang.String)
*/
@Override
public String format(String toFormat) {
String result = toFormat;
//this is kinda ugly buuut regex and special characters is pretty bad
for(Entry<String, String> e : this.conversion.entrySet()) {
if(result.contains(e.getKey())) {
result = toFormat.replaceAll(e.getKey(), e.getValue());
}
}

return result;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ public List<WordToken> tokenize(String nlInput) {
toLex = this.language.getTimeUtility().formatTime(toLex);
toLex = this.language.getTimeUtility().formatDate(toLex);
toLex = this.language.getContraction().disassemblingContraction(toLex);
toLex = this.language.getSpecialCharacterConversion().format(toLex);

StringBuilder currentWord = new StringBuilder();
if (!toLex.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ public void setup() {
public static Stream<Character> badCharacters() {
// every ascii character except numbers of letters
return IntStream.range(0, 128).mapToObj(i -> (char) i)
.filter(c -> (0 > c && c > 33) || (34 < c && c != 37 && !(43 < c && c < 47) && c < 48)
|| (59 < c && c < 65 && c != 63) || (90 < c && c < 97) || 122 < c);
.filter(c -> ((0 > c && c > 33) || (34 < c && !(43 < c && c < 47) && c != 36 && c != 37 && c < 48)
|| (59 < c && c < 65 && c != 63) || (90 < c && c < 97) || 122 < c));
}

/**
Expand Down

0 comments on commit a597315

Please sign in to comment.