Skip to content

Commit

Permalink
Merge pull request #12 from turbanoff/master
Browse files Browse the repository at this point in the history
#9 add matches and findFirst methods
  • Loading branch information
hankcs authored Apr 1, 2018
2 parents 5b92b3e + 36ce63c commit a3d5a5e
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 6 deletions.
54 changes: 49 additions & 5 deletions src/main/java/com/hankcs/algorithm/AhoCorasickDoubleArrayTrie.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.*;
import java.util.concurrent.LinkedBlockingDeque;

/**
* An implementation of Aho Corasick algorithm based on Double Array Trie
Expand Down Expand Up @@ -74,7 +73,7 @@ public List<Hit<V>> parseText(String text)
{
int position = 1;
int currentState = 0;
List<Hit<V>> collectedEmits = new LinkedList<Hit<V>>();
List<Hit<V>> collectedEmits = new ArrayList<Hit<V>>();
for (int i = 0; i < text.length(); ++i)
{
currentState = getState(currentState, text.charAt(i));
Expand Down Expand Up @@ -183,6 +182,51 @@ public void parseText(char[] text, IHitFull<V> processor)
}
}

/**
* Checks that string contains at least one substring
*
* @param text source text to check
* @return {@code true} if string contains at least one substring
*/
public boolean matches(String text)
{
int currentState = 0;
for (int i = 0; i < text.length(); ++i)
{
currentState = getState(currentState, text.charAt(i));
int[] hitArray = output[currentState];
if (hitArray != null)
{
return true;
}
}
return false;
}

/**
* Search first match in string
*
* @param text source text to check
* @return first match or {@code null} if there are no matches
*/
public Hit<V> findFirst(String text)
{
int position = 1;
int currentState = 0;
for (int i = 0; i < text.length(); ++i)
{
currentState = getState(currentState, text.charAt(i));
int[] hitArray = output[currentState];
if (hitArray != null)
{
int hitIndex = hitArray[0];
return new Hit<V>(position - l[hitIndex], position, v[hitIndex]);
}
++position;
}
return null;
}


/**
* Save
Expand Down Expand Up @@ -233,7 +277,7 @@ public V get(String key)

/**
* Pick the value by index in value array <br>
* Notice that to be more efficiently, this method DONOT check the parameter
* Notice that to be more efficiently, this method DO NOT check the parameter
* @param index The index
* @return The value
*/
Expand Down Expand Up @@ -291,7 +335,7 @@ public interface IHitCancellable<V>
*
* @param <V> the value type
*/
public class Hit<V>
public static class Hit<V>
{
/**
* the beginning index, inclusive.
Expand Down Expand Up @@ -726,7 +770,7 @@ private void constructFailureStates()
fail = new int[size + 1];
fail[1] = base[0];
output = new int[size + 1][];
Queue<State> queue = new LinkedBlockingDeque<State>();
Queue<State> queue = new ArrayDeque<State>();

// 第一步,将深度为1的节点的failure设为根节点
for (State depthOneState : this.rootState.getStates())
Expand Down
46 changes: 45 additions & 1 deletion src/test/java/TestAhoCorasickDoubleArrayTrie.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public void hit(int begin, int end, String value)
}
});
// Or simply use
List<AhoCorasickDoubleArrayTrie<String>.Hit<String>> wordList = acdat.parseText(text);
List<AhoCorasickDoubleArrayTrie.Hit<String>> wordList = acdat.parseText(text);
System.out.println(wordList);
}

Expand Down Expand Up @@ -122,6 +122,50 @@ public boolean hit(int begin, int end, String value) {
}
}

public void testMatches() {
Map<String, Integer> map = new HashMap<String, Integer>();
map.put("space", 1);
map.put("keyword", 2);
map.put("ch", 3);
AhoCorasickDoubleArrayTrie<Integer> trie = new AhoCorasickDoubleArrayTrie<Integer>();
trie.build(map);

assertTrue(trie.matches("space"));
assertTrue(trie.matches("keyword"));
assertTrue(trie.matches("ch"));
assertTrue(trie.matches(" ch"));
assertTrue(trie.matches("chkeyword"));
assertTrue(trie.matches("oooospace2"));
assertFalse(trie.matches("c"));
assertFalse(trie.matches(""));
assertFalse(trie.matches("spac"));
assertFalse(trie.matches("nothing"));
}

public void testFirstMatch() {
Map<String, Integer> map = new HashMap<String, Integer>();
map.put("space", 1);
map.put("keyword", 2);
map.put("ch", 3);
AhoCorasickDoubleArrayTrie<Integer> trie = new AhoCorasickDoubleArrayTrie<Integer>();
trie.build(map);

AhoCorasickDoubleArrayTrie.Hit<Integer> hit = trie.findFirst("space");
assertEquals(0, hit.begin);
assertEquals(5, hit.end);
assertEquals(1, hit.value.intValue());

hit = trie.findFirst("a lot of garbage in the space ch");
assertEquals(24, hit.begin);
assertEquals(29, hit.end);
assertEquals(1, hit.value.intValue());

assertNull(trie.findFirst(""));
assertNull(trie.findFirst("value"));
assertNull(trie.findFirst("keywork"));
assertNull(trie.findFirst(" no pace"));
}

public void testCancellation() throws Exception {
// Collect test data set
TreeMap<String, String> map = new TreeMap<String, String>();
Expand Down

0 comments on commit a3d5a5e

Please sign in to comment.