Skip to content

Commit

Permalink
Avoid locking for put methods for RE2. Fixes google#46 (google#121)
Browse files Browse the repository at this point in the history
* Avoid locking for put methods for RE2.

Google-wide-profiling indicated that this was a significant source of java lock contention. This new approach uses a Treiber stack to make adding an operation back into the pool a lock-free operation. It uses the existing objects as nodes in the linked stack - the Treiber stack suffers from an ABA problem when popping if nodes are reused, so removing an item from the pool is done by moving the whole stack to a simple linked stack guarded by the existing lock.

The locking could be avoided entirely by allocating wrapper objects for the stack nodes, but I'm not sure if that's desirable, since the goal of the pool was to avoid allocation.

* Address comments

* Address comments

* fix commnt

* Address comments

* Update GWT dependency to support AtomicReference

Co-authored-by: Charles Munger <clm@google.com>
  • Loading branch information
charlesmunger and Charles Munger authored Mar 11, 2021
1 parent e41cfff commit 2bf9ce9
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 20 deletions.
4 changes: 2 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ test {

dependencies {
testCompile 'junit:junit:4.12'
testCompile 'com.google.gwt:gwt-dev:2.8.2'
testCompile 'com.google.gwt:gwt-user:2.8.2'
testCompile 'com.google.gwt:gwt-dev:2.9.0'
testCompile 'com.google.gwt:gwt-user:2.9.0'
testCompile 'com.google.truth:truth:0.36'

// Cobertura requires slf4j at runtime
Expand Down
19 changes: 19 additions & 0 deletions java/com/google/re2j/Machine.java
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ public String toString() {
private int[] matchcap;
private int ncap;

// Make sure to include new fields in the copy constructor

// Pointer to form a linked stack for the pool of Machines. Not included in copy constructor.
Machine next;

/**
* Constructs a matching Machine for the specified {@code RE2}.
*/
Expand All @@ -110,6 +115,20 @@ public String toString() {
this.matchcap = new int[prog.numCap < 2 ? 2 : prog.numCap];
}

/** Copy constructor, but does not include {@code next} */
Machine(Machine copy) {
// Make sure to include any new fields here
this.re2 = copy.re2;
this.prog = copy.prog;
this.q0 = copy.q0;
this.q1 = copy.q1;
this.pool = copy.pool;
this.poolSize = copy.poolSize;
this.matched = copy.matched;
this.matchcap = copy.matchcap;
this.ncap = copy.ncap;
}

// init() reinitializes an existing Machine for re-use on a new input.
void init(int ncap) {
// length change need new arrays
Expand Down
61 changes: 43 additions & 18 deletions java/com/google/re2j/RE2.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,11 @@

import com.google.re2j.MatcherInput.Encoding;
import java.io.UnsupportedEncodingException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.concurrent.atomic.AtomicReference;

/**
* An RE2 class instance is a compiled representation of an RE2 regular expression, independent of
Expand Down Expand Up @@ -116,10 +115,9 @@ class RE2 {
boolean prefixComplete; // true iff prefix is the entire regexp
int prefixRune; // first rune in prefix

// Cache of machines for running regexp.
// Accesses must be serialized using |this| monitor.
// @GuardedBy("this")
private final Queue<Machine> machine = new ArrayDeque<Machine>();
// Cache of machines for running regexp. Forms a Treiber stack.
private final AtomicReference<Machine> pooled = new AtomicReference<Machine>();

public Map<String, Integer> namedGroups;

// This is visible for testing.
Expand Down Expand Up @@ -214,25 +212,41 @@ int numberOfCapturingGroups() {
// get() returns a machine to use for matching |this|. It uses |this|'s
// machine cache if possible, to avoid unnecessary allocation.
Machine get() {
synchronized (this) {
if (!machine.isEmpty()) {
return machine.remove();
}
}
return new Machine(this);
// Pop a machine off the stack if available.
Machine head;
do {
head = pooled.get();
} while (head != null && !pooled.compareAndSet(head, head.next));
return head;
}

// Clears the memory associated with this machine.
synchronized void reset() {
machine.clear();
void reset() {
pooled.set(null);
}

// put() returns a machine to |this|'s machine cache. There is no attempt to
// limit the size of the cache, so it will grow to the maximum number of
// simultaneous matches run using |this|. (The cache empties when |this|
// gets garbage collected.)
synchronized void put(Machine m) {
machine.add(m);
// gets garbage collected or reset is called.)
void put(Machine m, boolean isNew) {
// To avoid allocation in the single-thread or uncontended case, reuse a node only if
// it was the only element in the stack when it was popped, and it's the only element
// in the stack when it's pushed back after use.
Machine head;
do {
head = pooled.get();
if (!isNew && head != null) {
// If an element had a null next pointer and it was previously in the stack, another thread
// might be trying to pop it out right now, and if it sees the same node now in the
// stack the pop will succeed, but the new top of the stack will be the stale (null) value
// of next. Allocate a new Machine so that the CAS will not succeed if this node has been
// popped and re-pushed.
m = new Machine(m);
isNew = true;
}
m.next = head;
} while (!pooled.compareAndSet(head, m));
}

@Override
Expand All @@ -245,9 +259,20 @@ public String toString() {
// Derived from exec.go.
private int[] doExecute(MachineInput in, int pos, int anchor, int ncap) {
Machine m = get();
// The Treiber stack cannot reuse nodes, unless the node to be reused has only ever been at
// the bottom of the stack (i.e., next == null).
boolean isNew = false;
if (m == null) {
m = new Machine(this);
isNew = true;
} else if (m.next != null) {
m = new Machine(m);
isNew = true;
}

m.init(ncap);
int[] cap = m.match(in, pos, anchor) ? m.submatches() : null;
put(m);
put(m, isNew);
return cap;
}

Expand Down

0 comments on commit 2bf9ce9

Please sign in to comment.