From 2bf9ce9e64eb5ae95be6485dbe450d5e9134afc5 Mon Sep 17 00:00:00 2001 From: Charles Munger Date: Thu, 11 Mar 2021 14:23:24 -0800 Subject: [PATCH] Avoid locking for put methods for RE2. Fixes #46 (#121) * Avoid locking for put methods for RE2. Google-wide-profiling indicated that this was a significant source of java lock contention. This new approach uses a Treiber stack to make adding an operation back into the pool a lock-free operation. It uses the existing objects as nodes in the linked stack - the Treiber stack suffers from an ABA problem when popping if nodes are reused, so removing an item from the pool is done by moving the whole stack to a simple linked stack guarded by the existing lock. The locking could be avoided entirely by allocating wrapper objects for the stack nodes, but I'm not sure if that's desirable, since the goal of the pool was to avoid allocation. * Address comments * Address comments * fix commnt * Address comments * Update GWT dependency to support AtomicReference Co-authored-by: Charles Munger --- build.gradle | 4 +- java/com/google/re2j/Machine.java | 19 ++++++++++ java/com/google/re2j/RE2.java | 61 ++++++++++++++++++++++--------- 3 files changed, 64 insertions(+), 20 deletions(-) diff --git a/build.gradle b/build.gradle index 07e7d19a..3604a962 100644 --- a/build.gradle +++ b/build.gradle @@ -81,8 +81,8 @@ test { dependencies { testCompile 'junit:junit:4.12' - testCompile 'com.google.gwt:gwt-dev:2.8.2' - testCompile 'com.google.gwt:gwt-user:2.8.2' + testCompile 'com.google.gwt:gwt-dev:2.9.0' + testCompile 'com.google.gwt:gwt-user:2.9.0' testCompile 'com.google.truth:truth:0.36' // Cobertura requires slf4j at runtime diff --git a/java/com/google/re2j/Machine.java b/java/com/google/re2j/Machine.java index 1ca1ad8d..ba785917 100644 --- a/java/com/google/re2j/Machine.java +++ b/java/com/google/re2j/Machine.java @@ -99,6 +99,11 @@ public String toString() { private int[] matchcap; private int ncap; + // Make sure to include new fields in the copy constructor + + // Pointer to form a linked stack for the pool of Machines. Not included in copy constructor. + Machine next; + /** * Constructs a matching Machine for the specified {@code RE2}. */ @@ -110,6 +115,20 @@ public String toString() { this.matchcap = new int[prog.numCap < 2 ? 2 : prog.numCap]; } + /** Copy constructor, but does not include {@code next} */ + Machine(Machine copy) { + // Make sure to include any new fields here + this.re2 = copy.re2; + this.prog = copy.prog; + this.q0 = copy.q0; + this.q1 = copy.q1; + this.pool = copy.pool; + this.poolSize = copy.poolSize; + this.matched = copy.matched; + this.matchcap = copy.matchcap; + this.ncap = copy.ncap; + } + // init() reinitializes an existing Machine for re-use on a new input. void init(int ncap) { // length change need new arrays diff --git a/java/com/google/re2j/RE2.java b/java/com/google/re2j/RE2.java index f5b0cebb..078102f7 100644 --- a/java/com/google/re2j/RE2.java +++ b/java/com/google/re2j/RE2.java @@ -22,12 +22,11 @@ import com.google.re2j.MatcherInput.Encoding; import java.io.UnsupportedEncodingException; -import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; -import java.util.Queue; +import java.util.concurrent.atomic.AtomicReference; /** * An RE2 class instance is a compiled representation of an RE2 regular expression, independent of @@ -116,10 +115,9 @@ class RE2 { boolean prefixComplete; // true iff prefix is the entire regexp int prefixRune; // first rune in prefix - // Cache of machines for running regexp. - // Accesses must be serialized using |this| monitor. - // @GuardedBy("this") - private final Queue machine = new ArrayDeque(); + // Cache of machines for running regexp. Forms a Treiber stack. + private final AtomicReference pooled = new AtomicReference(); + public Map namedGroups; // This is visible for testing. @@ -214,25 +212,41 @@ int numberOfCapturingGroups() { // get() returns a machine to use for matching |this|. It uses |this|'s // machine cache if possible, to avoid unnecessary allocation. Machine get() { - synchronized (this) { - if (!machine.isEmpty()) { - return machine.remove(); - } - } - return new Machine(this); + // Pop a machine off the stack if available. + Machine head; + do { + head = pooled.get(); + } while (head != null && !pooled.compareAndSet(head, head.next)); + return head; } // Clears the memory associated with this machine. - synchronized void reset() { - machine.clear(); + void reset() { + pooled.set(null); } // put() returns a machine to |this|'s machine cache. There is no attempt to // limit the size of the cache, so it will grow to the maximum number of // simultaneous matches run using |this|. (The cache empties when |this| - // gets garbage collected.) - synchronized void put(Machine m) { - machine.add(m); + // gets garbage collected or reset is called.) + void put(Machine m, boolean isNew) { + // To avoid allocation in the single-thread or uncontended case, reuse a node only if + // it was the only element in the stack when it was popped, and it's the only element + // in the stack when it's pushed back after use. + Machine head; + do { + head = pooled.get(); + if (!isNew && head != null) { + // If an element had a null next pointer and it was previously in the stack, another thread + // might be trying to pop it out right now, and if it sees the same node now in the + // stack the pop will succeed, but the new top of the stack will be the stale (null) value + // of next. Allocate a new Machine so that the CAS will not succeed if this node has been + // popped and re-pushed. + m = new Machine(m); + isNew = true; + } + m.next = head; + } while (!pooled.compareAndSet(head, m)); } @Override @@ -245,9 +259,20 @@ public String toString() { // Derived from exec.go. private int[] doExecute(MachineInput in, int pos, int anchor, int ncap) { Machine m = get(); + // The Treiber stack cannot reuse nodes, unless the node to be reused has only ever been at + // the bottom of the stack (i.e., next == null). + boolean isNew = false; + if (m == null) { + m = new Machine(this); + isNew = true; + } else if (m.next != null) { + m = new Machine(m); + isNew = true; + } + m.init(ncap); int[] cap = m.match(in, pos, anchor) ? m.submatches() : null; - put(m); + put(m, isNew); return cap; }