Skip to content

Commit

Permalink
Fix for : Laboratuvar'ı kelimesini hatalı olarak işaretliyor. #16
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmetaa committed Feb 22, 2019
1 parent ce875a9 commit 5b83f59
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 5 deletions.
Binary file modified libreoffice-tr-tools/lib/runtime/zemberek-lo.jar
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package lo.tr.tools.spellchecker;

import _zem.org.antlr.v4.runtime.Token;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
Expand All @@ -19,6 +20,7 @@
import zemberek.morphology.generator.WordGenerator;
import zemberek.morphology.lexicon.RootLexicon;
import zemberek.normalization.TurkishSpellChecker;
import zemberek.tokenization.TurkishTokenizer;

public class ZemberekSpellChecker {

Expand Down Expand Up @@ -60,7 +62,7 @@ private ZemberekSpellChecker(TurkishMorphology morphology) {
/**
* This is used for debugging purposes.
*/
static ZemberekSpellChecker getInstance(RootLexicon lexicon) {
static synchronized ZemberekSpellChecker getInstance(RootLexicon lexicon) {
TurkishMorphology morphology = TurkishMorphology.builder()
.setLexicon(lexicon)
.useInformalAnalysis().build();
Expand All @@ -87,7 +89,20 @@ public boolean isCorrect(String w) {
return true;
}
}
return spellChecker.check(input);

boolean passed = spellChecker.check(input);
if (passed) {
return true;
}

List<Token> tokens = TurkishTokenizer.DEFAULT.tokenize(w);
if (tokens.size() != 1) {
return false;
}
Token t = tokens.get(0);
List<SingleAnalysis> analyses = morphology.getUnidentifiedTokenAnalyzer().analyze(t);
return analyses.size() > 0;

}

public List<String> getSuggestions(String s) {
Expand Down Expand Up @@ -132,10 +147,10 @@ private List<String> informalWordSuggestions(String s) {
}

private String getApostrophe(String input) {
String apostrophe;
String apostrophe = null;
if (input.indexOf('’') > 0) {
apostrophe = "’";
} else {
} else if (input.indexOf('\'') > 0) {
apostrophe = "'";
}
return apostrophe;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package lo.tr.tools.spellchecker;

import java.util.List;
import org.junit.Assert;
import org.junit.Test;
import zemberek.morphology.lexicon.RootLexicon;
Expand All @@ -24,7 +25,8 @@ public void testSuggestions1() {
ZemberekSpellChecker instance = ZemberekSpellChecker.getInstance(lexicon);
String[] inputs = {"ou", "ku", "okuu", "oyu"};
for (String input : inputs) {
Assert.assertTrue(instance.getSuggestions(input).contains("oku"));
List<String> suggestions = instance.getSuggestions(input);
Assert.assertTrue(suggestions.contains("oku"));
}
}

Expand All @@ -44,5 +46,40 @@ public void testInformalWords1() {
Assert.assertTrue(instance.getSuggestions("gidicem").contains("gideceğim"));
}

@Test
public void testRegularWordWithApostrophe() {
RootLexicon lexicon = RootLexicon.fromLines("kitap");

ZemberekSpellChecker instance = ZemberekSpellChecker.getInstance(lexicon);
Assert.assertTrue(instance.isCorrect("kitapta"));
Assert.assertTrue(instance.isCorrect("Kitapta"));
Assert.assertTrue(instance.isCorrect("Kitap'ta"));

// expect false.
Assert.assertFalse(instance.isCorrect("Kitap'taa"));
}

@Test
public void testRegularWordWithApostropheSuggestions() {
RootLexicon lexicon = RootLexicon.fromLines("kitap");

ZemberekSpellChecker instance = ZemberekSpellChecker.getInstance(lexicon);
Assert.assertTrue(instance.getSuggestions("ktapta").contains("kitapta"));
Assert.assertTrue(instance.getSuggestions("Ktapta").contains("Kitapta"));
Assert.assertTrue(instance.getSuggestions("Ktap'ta").contains("Kitap'ta"));
}

@Test
public void testRegularWordWithApostrophesIssue16() {
RootLexicon lexicon = RootLexicon.fromLines("laboratuvar");

ZemberekSpellChecker instance = ZemberekSpellChecker.getInstance(lexicon);
Assert.assertTrue(instance.isCorrect("laboratuvar"));
Assert.assertTrue(instance.isCorrect("laboratuvarda"));
Assert.assertTrue(instance.isCorrect("Laboratuvarda"));
Assert.assertTrue(instance.isCorrect("Laboratuvar'da"));
}



}

0 comments on commit 5b83f59

Please sign in to comment.