-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathAhoCorasick.java
81 lines (70 loc) · 1.94 KB
/
AhoCorasick.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
package Strings;
import java.util.*;
// https://en.wikipedia.org/wiki/Aho–Corasick_algorithm
public class AhoCorasick {
static final int ALPHABET_SIZE = 26;
Node[] nodes;
int nodeCount;
public static class Node {
int parent;
char charFromParent;
int suffLink = -1;
int[] children = new int[ALPHABET_SIZE];
int[] transitions = new int[ALPHABET_SIZE];
boolean leaf;
{
Arrays.fill(children, -1);
Arrays.fill(transitions, -1);
}
}
public AhoCorasick(int maxNodes) {
nodes = new Node[maxNodes];
// create root
nodes[0] = new Node();
nodes[0].suffLink = 0;
nodes[0].parent = -1;
nodeCount = 1;
}
public void addString(String s) {
int cur = 0;
for (char ch : s.toCharArray()) {
int c = ch - 'a';
if (nodes[cur].children[c] == -1) {
nodes[nodeCount] = new Node();
nodes[nodeCount].parent = cur;
nodes[nodeCount].charFromParent = ch;
nodes[cur].children[c] = nodeCount++;
}
cur = nodes[cur].children[c];
}
nodes[cur].leaf = true;
}
public int suffLink(int nodeIndex) {
Node node = nodes[nodeIndex];
if (node.suffLink == -1)
node.suffLink = node.parent == 0 ? 0 : transition(suffLink(node.parent), node.charFromParent);
return node.suffLink;
}
public int transition(int nodeIndex, char ch) {
int c = ch - 'a';
Node node = nodes[nodeIndex];
if (node.transitions[c] == -1)
node.transitions[c] = node.children[c] != -1 ? node.children[c] : (nodeIndex == 0 ? 0 : transition(suffLink(nodeIndex), ch));
return node.transitions[c];
}
// Usage example
public static void main(String[] args) {
AhoCorasick ahoCorasick = new AhoCorasick(1000);
ahoCorasick.addString("bc");
ahoCorasick.addString("abc");
String s = "tabcbc";
int node = 0;
List<Integer> positions = new ArrayList<>();
for (int i = 0; i < s.length(); i++) {
node = ahoCorasick.transition(node, s.charAt(i));
if (ahoCorasick.nodes[node].leaf)
positions.add(i);
}
System.out.println(positions);
}
}