Skip to content

Commit

Permalink
move Operations.sameLanguage/subsetOf to AutomatonTestUtil in test-fr…
Browse files Browse the repository at this point in the history
…amework (#13708)

This code is suitable for tests only and may throw unexpected Exceptions or AssertionErrors for some input.
  • Loading branch information
rmuir authored Sep 5, 2024
1 parent b91b413 commit 87bc827
Show file tree
Hide file tree
Showing 14 changed files with 129 additions and 116 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ API Changes

* GITHUB#13632: CandidateMatcher public matching functions (Bryan Jacobowitz)

* GITHUB#13708: Move Operations.sameLanguage/subsetOf to test-framework. (Robert Muir)


New Features
---------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1490,7 +1490,7 @@ public void testRandomSyns() throws Exception {
}

assertTrue(approxEquals(actual, expected));
assertTrue(Operations.sameLanguage(actual, expected));
assertTrue(AutomatonTestUtil.sameLanguage(actual, expected));
}

a.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import java.util.BitSet;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand Down Expand Up @@ -374,17 +373,6 @@ public static Automaton intersection(Automaton a1, Automaton a2) {
return removeDeadStates(c);
}

/**
* Returns true if these two automata accept exactly the same language. This is a costly
* computation! Both automata must be determinized and have no dead states!
*/
public static boolean sameLanguage(Automaton a1, Automaton a2) {
if (a1 == a2) {
return true;
}
return subsetOf(a2, a1) && subsetOf(a1, a2);
}

// TODO: move to test-framework?
/**
* Returns true if this automaton has any states that cannot be reached from the initial state or
Expand Down Expand Up @@ -417,73 +405,6 @@ public static boolean hasDeadStatesToAccept(Automaton a) {
return reachableFromAccept.isEmpty() == false;
}

/**
* Returns true if the language of <code>a1</code> is a subset of the language of <code>a2</code>.
* Both automata must be determinized and must have no dead states.
*
* <p>Complexity: quadratic in number of states.
*/
public static boolean subsetOf(Automaton a1, Automaton a2) {
if (a1.isDeterministic() == false) {
throw new IllegalArgumentException("a1 must be deterministic");
}
if (a2.isDeterministic() == false) {
throw new IllegalArgumentException("a2 must be deterministic");
}
assert hasDeadStatesFromInitial(a1) == false;
assert hasDeadStatesFromInitial(a2) == false;
if (a1.getNumStates() == 0) {
// Empty language is alwyas a subset of any other language
return true;
} else if (a2.getNumStates() == 0) {
return isEmpty(a1);
}

// TODO: cutover to iterators instead
Transition[][] transitions1 = a1.getSortedTransitions();
Transition[][] transitions2 = a2.getSortedTransitions();
ArrayDeque<StatePair> worklist = new ArrayDeque<>();
HashSet<StatePair> visited = new HashSet<>();
StatePair p = new StatePair(0, 0);
worklist.add(p);
visited.add(p);
while (worklist.size() > 0) {
p = worklist.removeFirst();
if (a1.isAccept(p.s1) && a2.isAccept(p.s2) == false) {
return false;
}
Transition[] t1 = transitions1[p.s1];
Transition[] t2 = transitions2[p.s2];
for (int n1 = 0, b2 = 0; n1 < t1.length; n1++) {
while (b2 < t2.length && t2[b2].max < t1[n1].min) {
b2++;
}
int min1 = t1[n1].min, max1 = t1[n1].max;

for (int n2 = b2; n2 < t2.length && t1[n1].max >= t2[n2].min; n2++) {
if (t2[n2].min > min1) {
return false;
}
if (t2[n2].max < Character.MAX_CODE_POINT) {
min1 = t2[n2].max + 1;
} else {
min1 = Character.MAX_CODE_POINT;
max1 = Character.MIN_CODE_POINT;
}
StatePair q = new StatePair(t1[n1].dest, t2[n2].dest);
if (!visited.contains(q)) {
worklist.add(q);
visited.add(q);
}
}
if (min1 <= max1) {
return false;
}
}
}
return true;
}

/**
* Returns an automaton that accepts the union of the languages of the given automata.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,14 @@
* @lucene.experimental
*/
public class StatePair {
// only mike knows what it does (do not expose)
int s;
int s1;
int s2;

/** first state */
public final int s1;

/** second state */
public final int s2;

StatePair(int s, int s1, int s2) {
this.s = s;
Expand Down Expand Up @@ -81,7 +86,7 @@ public boolean equals(Object obj) {
@Override
public int hashCode() {
// Don't use s1 ^ s2 since it's vulnerable to the case where s1 == s2 always --> hashCode = 0,
// e.g. if you call Operations.sameLanguage,
// e.g. if you call AutomatonTestUtil.sameLanguage,
// passing the same automaton against itself:
return s1 * 31 + s2;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,7 @@ private void assertSameLanguage(Automaton expected, Automaton actual) {
Operations.removeDeadStates(expected), DEFAULT_DETERMINIZE_WORK_LIMIT);
Automaton actualDet =
Operations.determinize(Operations.removeDeadStates(actual), DEFAULT_DETERMINIZE_WORK_LIMIT);
if (Operations.sameLanguage(expectedDet, actualDet) == false) {
if (AutomatonTestUtil.sameLanguage(expectedDet, actualDet) == false) {
Set<String> expectedPaths = toPathStrings(expectedDet);
Set<String> actualPaths = toPathStrings(actualDet);
StringBuilder b = new StringBuilder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ public void testIntersect() throws Exception {

Automaton actual =
Operations.determinize(Automata.makeStringUnion(found), DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.sameLanguage(expected, actual));
assertTrue(AutomatonTestUtil.sameLanguage(expected, actual));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public void testSameLanguage() throws Exception {
Automaton a2 =
Operations.removeDeadStates(
Operations.concatenate(Automata.makeString("foo"), Automata.makeString("bar")));
assertTrue(Operations.sameLanguage(a1, a2));
assertTrue(AutomatonTestUtil.sameLanguage(a1, a2));
}

public void testCommonPrefixString() throws Exception {
Expand Down Expand Up @@ -257,7 +257,7 @@ public void testMinimizeSimple() throws Exception {
Automaton a = Automata.makeString("foobar");
Automaton aMin = MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);

assertTrue(Operations.sameLanguage(a, aMin));
assertTrue(AutomatonTestUtil.sameLanguage(a, aMin));
}

public void testMinimize2() throws Exception {
Expand All @@ -266,7 +266,7 @@ public void testMinimize2() throws Exception {
Arrays.asList(Automata.makeString("foobar"), Automata.makeString("boobar")));
Automaton aMin = MinimizationOperations.minimize(a, DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(
Operations.sameLanguage(
AutomatonTestUtil.sameLanguage(
Operations.determinize(Operations.removeDeadStates(a), DEFAULT_DETERMINIZE_WORK_LIMIT),
aMin));
}
Expand All @@ -276,7 +276,7 @@ public void testReverse() throws Exception {
Automaton ra = Operations.reverse(a);
Automaton a2 = Operations.determinize(Operations.reverse(ra), DEFAULT_DETERMINIZE_WORK_LIMIT);

assertTrue(Operations.sameLanguage(a, a2));
assertTrue(AutomatonTestUtil.sameLanguage(a, a2));
}

public void testOptional() throws Exception {
Expand Down Expand Up @@ -401,7 +401,7 @@ public void testReverseRandom1() throws Exception {
Automaton ra = Operations.reverse(a);
Automaton rra = Operations.reverse(ra);
assertTrue(
Operations.sameLanguage(
AutomatonTestUtil.sameLanguage(
Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE),
Operations.determinize(Operations.removeDeadStates(rra), Integer.MAX_VALUE)));
}
Expand Down Expand Up @@ -502,7 +502,7 @@ public void testBuilderRandom() throws Exception {
}

assertTrue(
Operations.sameLanguage(
AutomatonTestUtil.sameLanguage(
Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE),
Operations.determinize(
Operations.removeDeadStates(builder.finish()), Integer.MAX_VALUE)));
Expand Down Expand Up @@ -735,7 +735,8 @@ public void testSameLanguage1() throws Exception {
a2.addTransition(0, state, 'a');
a2.finishState();
assertTrue(
Operations.sameLanguage(Operations.removeDeadStates(a), Operations.removeDeadStates(a2)));
AutomatonTestUtil.sameLanguage(
Operations.removeDeadStates(a), Operations.removeDeadStates(a2)));
}

private Automaton randomNoOp(Automaton a) {
Expand Down Expand Up @@ -1288,7 +1289,7 @@ private void assertSame(Collection<BytesRef> terms, Automaton a) {
Automaton a2 =
Operations.removeDeadStates(Operations.determinize(unionTerms(terms), Integer.MAX_VALUE));
assertTrue(
Operations.sameLanguage(
AutomatonTestUtil.sameLanguage(
a2, Operations.removeDeadStates(Operations.determinize(a, Integer.MAX_VALUE))));

// Do same check, in UTF8 space
Expand Down Expand Up @@ -1613,7 +1614,7 @@ public void testMakeBinaryIntervalOpenBoth() throws Exception {

public void testAcceptAllEmptyStringMin() throws Exception {
Automaton a = Automata.makeBinaryInterval(newBytesRef(), true, null, true);
assertTrue(Operations.sameLanguage(Automata.makeAnyBinary(), a));
assertTrue(AutomatonTestUtil.sameLanguage(Automata.makeAnyBinary(), a));
}

private static IntsRef toIntsRef(String s) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public void testAgainstSimple() throws Exception {
a = AutomatonTestUtil.determinizeSimple(a);
Automaton b = Operations.determinize(a, Integer.MAX_VALUE);
// TODO: more verifications possible?
assertTrue(Operations.sameLanguage(a, b));
assertTrue(AutomatonTestUtil.sameLanguage(a, b));
}
}

Expand All @@ -53,20 +53,20 @@ private static void assertAutomaton(Automaton a) {
Operations.complement(
Operations.complement(a, DEFAULT_DETERMINIZE_WORK_LIMIT),
DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.sameLanguage(a, equivalent));
assertTrue(AutomatonTestUtil.sameLanguage(a, equivalent));

// a union a = a
equivalent =
Operations.determinize(
Operations.removeDeadStates(Operations.union(a, a)), DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.sameLanguage(a, equivalent));
assertTrue(AutomatonTestUtil.sameLanguage(a, equivalent));

// a intersect a = a
equivalent =
Operations.determinize(
Operations.removeDeadStates(Operations.intersection(a, a)),
DEFAULT_DETERMINIZE_WORK_LIMIT);
assertTrue(Operations.sameLanguage(a, equivalent));
assertTrue(AutomatonTestUtil.sameLanguage(a, equivalent));

// a minus a = empty
Automaton empty = Operations.minus(a, a, DEFAULT_DETERMINIZE_WORK_LIMIT);
Expand All @@ -81,7 +81,7 @@ private static void assertAutomaton(Automaton a) {
equivalent =
Operations.minus(optional, Automata.makeEmptyString(), DEFAULT_DETERMINIZE_WORK_LIMIT);
// System.out.println("equiv " + equivalent);
assertTrue(Operations.sameLanguage(a, equivalent));
assertTrue(AutomatonTestUtil.sameLanguage(a, equivalent));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -81,44 +81,46 @@ private void assertLev(String s, int maxDistance) {
// check that the dfa for n-1 accepts a subset of the dfa for n
if (n > 0) {
assertTrue(
Operations.subsetOf(
AutomatonTestUtil.subsetOf(
Operations.removeDeadStates(automata[n - 1]),
Operations.removeDeadStates(automata[n])));
assertTrue(
Operations.subsetOf(
AutomatonTestUtil.subsetOf(
Operations.removeDeadStates(automata[n - 1]),
Operations.removeDeadStates(tautomata[n])));
assertTrue(
Operations.subsetOf(
AutomatonTestUtil.subsetOf(
Operations.removeDeadStates(tautomata[n - 1]),
Operations.removeDeadStates(automata[n])));
assertTrue(
Operations.subsetOf(
AutomatonTestUtil.subsetOf(
Operations.removeDeadStates(tautomata[n - 1]),
Operations.removeDeadStates(tautomata[n])));
assertNotSame(automata[n - 1], automata[n]);
}
// check that Lev(N) is a subset of LevT(N)
assertTrue(
Operations.subsetOf(
AutomatonTestUtil.subsetOf(
Operations.removeDeadStates(automata[n]), Operations.removeDeadStates(tautomata[n])));
// special checks for specific n
switch (n) {
case 0:
// easy, matches the string itself
assertTrue(
Operations.sameLanguage(
AutomatonTestUtil.sameLanguage(
Automata.makeString(s), Operations.removeDeadStates(automata[0])));
assertTrue(
Operations.sameLanguage(
AutomatonTestUtil.sameLanguage(
Automata.makeString(s), Operations.removeDeadStates(tautomata[0])));
break;
case 1:
// generate a lev1 naively, and check the accepted lang is the same.
assertTrue(
Operations.sameLanguage(naiveLev1(s), Operations.removeDeadStates(automata[1])));
AutomatonTestUtil.sameLanguage(
naiveLev1(s), Operations.removeDeadStates(automata[1])));
assertTrue(
Operations.sameLanguage(naiveLev1T(s), Operations.removeDeadStates(tautomata[1])));
AutomatonTestUtil.sameLanguage(
naiveLev1T(s), Operations.removeDeadStates(tautomata[1])));
break;
default:
assertBruteForce(s, automata[n], n);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public void testBasic() {
Automaton a = AutomatonTestUtil.randomAutomaton(random());
Automaton la = Operations.determinize(Operations.removeDeadStates(a), Integer.MAX_VALUE);
Automaton lb = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
assertTrue(Operations.sameLanguage(la, lb));
assertTrue(AutomatonTestUtil.sameLanguage(la, lb));
}
}

Expand All @@ -42,7 +42,7 @@ public void testAgainstBrzozowski() {
Automaton a = AutomatonTestUtil.randomAutomaton(random());
a = AutomatonTestUtil.minimizeSimple(a);
Automaton b = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
assertTrue(Operations.sameLanguage(a, b));
assertTrue(AutomatonTestUtil.sameLanguage(a, b));
assertEquals(a.getNumStates(), b.getNumStates());
int numStates = a.getNumStates();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public void testStringUnion() {
assertTrue(naiveUnion.isDeterministic());
assertFalse(Operations.hasDeadStatesFromInitial(naiveUnion));

assertTrue(Operations.sameLanguage(union, naiveUnion));
assertTrue(AutomatonTestUtil.sameLanguage(union, naiveUnion));
}

private static Automaton naiveUnion(List<BytesRef> strings) {
Expand Down Expand Up @@ -116,13 +116,13 @@ public void testEmptySingletonNFAConcatenate() {
Automaton concat2 = Operations.concatenate(singleton, nfa);
assertFalse(concat2.isDeterministic());
assertTrue(
Operations.sameLanguage(
AutomatonTestUtil.sameLanguage(
Operations.determinize(concat1, 100), Operations.determinize(concat2, 100)));
assertTrue(
Operations.sameLanguage(
AutomatonTestUtil.sameLanguage(
Operations.determinize(nfa, 100), Operations.determinize(concat1, 100)));
assertTrue(
Operations.sameLanguage(
AutomatonTestUtil.sameLanguage(
Operations.determinize(nfa, 100), Operations.determinize(concat2, 100)));
}

Expand Down
Loading

0 comments on commit 87bc827

Please sign in to comment.