Skip to content

Commit

Permalink
fix to #409
Browse files Browse the repository at this point in the history
  • Loading branch information
dhowe committed Jan 4, 2018
1 parent ccec050 commit 1479f46
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 12 deletions.
10 changes: 3 additions & 7 deletions java/rita/rita_dict.js
Original file line number Diff line number Diff line change
Expand Up @@ -3329,7 +3329,6 @@ function _dict() { return {
'canoe':['k-ah n-uw1','nn'],
'canon':['k-ae1 n-ah-n','nn'],
'canopy':['k-ae1 n-ah p-iy','nn'],
'cant':['k-ae1-n-t','nn'],
'cantaloupe':['k-ae1-n t-ah l-ow-p','nn'],
'canteen':['k-ae-n t-iy1-n','nn'],
'canter':['k-ae1-n t-er','nn vb'],
Expand Down Expand Up @@ -11877,7 +11876,6 @@ function _dict() { return {
'hugged':['hh-ah1-g-d','vbd'],
'hugging':['hh-ah1 g-ih-ng','vbg nn'],
'huh':['hh-ah1','uh'],
'hula':['hh-uw1 l-ah','nn'],
'hulk':['hh-ah1-l-k','nn vb'],
'hulking':['hh-ah1-l k-ih-ng','jj vbg'],
'hull':['hh-ah1-l','nn'],
Expand Down Expand Up @@ -16022,7 +16020,6 @@ function _dict() { return {
'negotiation':['n-ih g-ow sh-iy ey1 sh-ah-n','nn'],
'negotiator':['n-ah g-ow1 sh-iy ey t-er','nn'],
'negro':['n-iy1 g-r-ow','nn'],
'negroes':['n-iy1 g-r-ow-z','nns'],
'neighbor':['n-ey1 b-er','nn vb'],
'neighborhood':['n-ey1 b-er hh-uh-d','nn'],
'neighboring':['n-ey1 b-er ih-ng','vbg jj'],
Expand Down Expand Up @@ -27084,14 +27081,14 @@ function _dict() { return {
'waiver':['w-ey1 v-er','nn'],
'waives':['w-ey1-v-z','vbz'],
'waiving':['w-ey1 v-ih-ng','vbg'],
'wake':['w-ey1-k','nn vbp vb'],
'wake':['w-ey1-k','vbp vb'],
'wakes':['w-ey1-k-s','vbz'],
'waking':['w-ey1 k-ih-ng','vbg jj'],
'wale':['w-ey1-l','nn'],
'walk':['w-ao1-k','vb vbp nn'],
'walked':['w-ao1-k-t','vbd vbn'],
'walker':['w-ao1 k-er','nn'],
'walking':['w-ao1 k-ih-ng','vbg nn jj'],
'walking':['w-ao1 k-ih-ng','vbg jj'],
'walkout':['w-ao1-k aw-t','nn'],
'walkway':['w-ao1-k w-ey','nn'],
'wall':['w-ao1-l','nn vbp vb'],
Expand Down Expand Up @@ -27668,7 +27665,6 @@ function _dict() { return {
'worship':['w-er1 sh-ah-p','nn vb vbp'],
'worshiped':['w-er1 sh-ih-p-t','vbn'],
'worshipful':['w-er1 sh-ah-p f-ah-l','jj'],
'worshipped':['w-er1 sh-ah-p-t','nn'],
'worshipper':['w-er1 sh-ih p-er','nn'],
'worshipping':['w-er1 sh-ah p-ih-ng','vbg'],
'worst':['w-er1-s-t','jjs rbs jj'],
Expand All @@ -27689,7 +27685,7 @@ function _dict() { return {
'wow':['w-aw1','vb'],
'wowed':['w-aw1-d','vbd'],
'wows':['w-aw1-z','vbz'],
'wrack':['r-ae1-k','nn vb vbp'],
'wrack':['r-ae1-k','vb vbp'],
'wracked':['r-ae1-k-t','vbn vbd'],
'wracking':['r-ae1 k-ih-ng','vbg'],
'wrangle':['r-ae1-ng-g-ah-l','vb'],
Expand Down
24 changes: 19 additions & 5 deletions java/rita/support/JSONLexicon.java
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,8 @@ public Iterator<String> randomIterator() {
}

public Iterator<String> randomPosIterator(String pos) {
return new RandomIterator(getWordsWithPos(pos));
Set<String> wordsWithPos = getWordsWithPos(pos);
return new RandomIterator(wordsWithPos);
}

public Iterator<String> posIterator(String pos) {
Expand Down Expand Up @@ -415,20 +416,33 @@ public Set<String> getWords(Pattern regex) {
/** Returns all words where 'pos' is the first (or only) tag listed */
public Set<String> getWordsWithPos(String pos) {

// System.out.println("JSONLexicon.getWordsWithPos("+pos+")");

if (!RiPos.isPennTag(pos)) {
throw new RiTaException("Pos '"+ pos + "' is not a known part-of-speech tag." +
" Check the list at http://rednoise.org/rita/reference/PennTags.html");
}

boolean pluralize = false; // fix to #409
if (pos.equals("n") || pos.equals("nns")) {
pluralize = pos.equals("nns");
pos = "nn";
}
else if (pos.equals("v"))
pos = "vb";
else if (pos.equals("r"))
pos = "rb";
else if (pos.equals("a"))
pos = "jj";


Set<String> s = new TreeSet<String>();
String posSpc = pos + " ";
for (Iterator<String> iter = iterator(); iter.hasNext();) {
String word = iter.next();
String poslist = getPosStr(word);
if (poslist.startsWith(posSpc) || poslist.equals(pos))
if (poslist.startsWith(posSpc) || poslist.equals(pos)) {
if (pluralize) word = RiTa.pluralize(word); // fix to #409
s.add(word);
}
}
return s;
}
Expand Down Expand Up @@ -625,7 +639,7 @@ public static void testTiming(int numTests) {
public static void main(String[] args) {
// testTiming(50); if (1==1) return;
JSONLexicon lex = JSONLexicon.getInstance();
System.out.println(lex.getWordsWithPos("vbg"));
System.out.println(lex.getWordsWithPos("nns"));
if (1 == 1)
return;
String test = "swimming";
Expand Down

0 comments on commit 1479f46

Please sign in to comment.