From 861e302011bb3aaf0c8431c121b58a57b78481e3 Mon Sep 17 00:00:00 2001 From: Claes Redestad Date: Wed, 15 Feb 2023 21:52:41 +0000 Subject: [PATCH] 8302163: Speed up various String comparison methods with ArraysSupport.mismatch Reviewed-by: stsypanov, rriggs, alanb --- .../share/classes/java/lang/String.java | 60 ++++++------- .../share/classes/java/lang/StringLatin1.java | 8 +- .../bench/java/lang/StringBuilders.java | 13 +++ .../bench/java/lang/StringComparisons.java | 87 +++++++++++++++++++ .../openjdk/bench/java/lang/StringOther.java | 23 ----- 5 files changed, 127 insertions(+), 64 deletions(-) create mode 100644 test/micro/org/openjdk/bench/java/lang/StringComparisons.java diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index 1897a06cd6008..1b5b5efb72b08 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -51,6 +51,7 @@ import java.util.stream.Stream; import java.util.stream.StreamSupport; +import jdk.internal.util.ArraysSupport; import jdk.internal.util.Preconditions; import jdk.internal.vm.annotation.ForceInline; import jdk.internal.vm.annotation.IntrinsicCandidate; @@ -1272,8 +1273,7 @@ private static void throwUnmappable(int off) { } private static void throwUnmappable(byte[] val) { - int dp = 0; - while (dp < val.length && val[dp] >=0) { dp++; } + int dp = StringCoding.countPositives(val, 0, val.length); throwUnmappable(dp); } @@ -1870,23 +1870,17 @@ private boolean nonSyncContentEquals(AbstractStringBuilder sb) { if (len != sb.length()) { return false; } - byte v1[] = value; - byte v2[] = sb.getValue(); + byte[] v1 = value; + byte[] v2 = sb.getValue(); byte coder = coder(); if (coder == sb.getCoder()) { - int n = v1.length; - for (int i = 0; i < n; i++) { - if (v1[i] != v2[i]) { - return false; - } - } + return v1.length <= v2.length && ArraysSupport.mismatch(v1, v2, v1.length) < 0; } else { if (coder != LATIN1) { // utf16 str and latin1 abs can never be "equal" return false; } return StringUTF16.contentEquals(v1, v2, len); } - return true; } /** @@ -2024,8 +2018,8 @@ public boolean equalsIgnoreCase(String anotherString) { * lexicographically greater than the string argument. */ public int compareTo(String anotherString) { - byte v1[] = value; - byte v2[] = anotherString.value; + byte[] v1 = value; + byte[] v2 = anotherString.value; byte coder = coder(); if (coder == anotherString.coder()) { return coder == LATIN1 ? StringLatin1.compareTo(v1, v2) @@ -2060,8 +2054,8 @@ private static class CaseInsensitiveComparator private static final long serialVersionUID = 8575799808933029326L; public int compare(String s1, String s2) { - byte v1[] = s1.value; - byte v2[] = s2.value; + byte[] v1 = s1.value; + byte[] v2 = s2.value; byte coder = s1.coder(); if (coder == s2.coder()) { return coder == LATIN1 ? StringLatin1.compareToCI(v1, v2) @@ -2136,26 +2130,23 @@ public int compareToIgnoreCase(String str) { * {@code false} otherwise. */ public boolean regionMatches(int toffset, String other, int ooffset, int len) { - byte tv[] = value; - byte ov[] = other.value; // Note: toffset, ooffset, or len might be near -1>>>1. if ((ooffset < 0) || (toffset < 0) || (toffset > (long)length() - len) || (ooffset > (long)other.length() - len)) { return false; } + byte[] tv = value; + byte[] ov = other.value; byte coder = coder(); if (coder == other.coder()) { - if (!isLatin1() && (len > 0)) { - toffset = toffset << 1; - ooffset = ooffset << 1; - len = len << 1; - } - while (len-- > 0) { - if (tv[toffset++] != ov[ooffset++]) { - return false; - } + if (coder == UTF16) { + toffset <<= UTF16; + ooffset <<= UTF16; + len <<= UTF16; } + return ArraysSupport.mismatch(tv, toffset, + ov, ooffset, len) < 0; } else { if (coder == LATIN1) { while (len-- > 0) { @@ -2235,8 +2226,8 @@ public boolean regionMatches(boolean ignoreCase, int toffset, || (ooffset > (long)other.length() - len)) { return false; } - byte tv[] = value; - byte ov[] = other.value; + byte[] tv = value; + byte[] ov = other.value; byte coder = coder(); if (coder == other.coder()) { return coder == LATIN1 @@ -2270,18 +2261,17 @@ public boolean startsWith(String prefix, int toffset) { if (toffset < 0 || toffset > length() - prefix.length()) { return false; } - byte ta[] = value; - byte pa[] = prefix.value; + byte[] ta = value; + byte[] pa = prefix.value; int po = 0; int pc = pa.length; byte coder = coder(); if (coder == prefix.coder()) { - int to = (coder == LATIN1) ? toffset : toffset << 1; - while (po < pc) { - if (ta[to++] != pa[po++]) { - return false; - } + if (coder == UTF16) { + toffset <<= UTF16; } + return ArraysSupport.mismatch(ta, toffset, + pa, 0, pc) < 0; } else { if (coder == LATIN1) { // && pcoder == UTF16 return false; diff --git a/src/java.base/share/classes/java/lang/StringLatin1.java b/src/java.base/share/classes/java/lang/StringLatin1.java index 526d752f05833..7749597a24a59 100644 --- a/src/java.base/share/classes/java/lang/StringLatin1.java +++ b/src/java.base/share/classes/java/lang/StringLatin1.java @@ -109,12 +109,8 @@ public static int compareTo(byte[] value, byte[] other) { public static int compareTo(byte[] value, byte[] other, int len1, int len2) { int lim = Math.min(len1, len2); - for (int k = 0; k < lim; k++) { - if (value[k] != other[k]) { - return getChar(value, k) - getChar(other, k); - } - } - return len1 - len2; + int k = ArraysSupport.mismatch(value, other, lim); + return (k < 0) ? len1 - len2 : getChar(value, k) - getChar(other, k); } @IntrinsicCandidate diff --git a/test/micro/org/openjdk/bench/java/lang/StringBuilders.java b/test/micro/org/openjdk/bench/java/lang/StringBuilders.java index e2a72a9ecb89c..40f41659e7c00 100644 --- a/test/micro/org/openjdk/bench/java/lang/StringBuilders.java +++ b/test/micro/org/openjdk/bench/java/lang/StringBuilders.java @@ -51,7 +51,9 @@ public class StringBuilders { private String[] str3p9p8; private String[] str22p40p31; private StringBuilder sbLatin1; + private StringBuilder sbLatin2; private StringBuilder sbUtf16; + private StringBuilder sbUtf17; @Setup public void setup() { @@ -64,7 +66,9 @@ public void setup() { str3p9p8 = new String[]{"123", "123456789", "12345678"}; str22p40p31 = new String[]{"1234567890123456789012", "1234567890123456789012345678901234567890", "1234567890123456789012345678901"}; sbLatin1 = new StringBuilder("Latin1 string"); + sbLatin2 = new StringBuilder("Latin1 string"); sbUtf16 = new StringBuilder("UTF-\uFF11\uFF16 string"); + sbUtf17 = new StringBuilder("UTF-\uFF11\uFF16 string"); } @Benchmark @@ -250,6 +254,15 @@ public String toStringCharWithFloat8() { return result.toString(); } + @Benchmark + public int compareToLatin1() { + return sbLatin1.compareTo(sbLatin2); + } + + @Benchmark + public int compareToUTF16() { + return sbUtf16.compareTo(sbUtf17); + } @Benchmark public String toStringCharWithMixed8() { diff --git a/test/micro/org/openjdk/bench/java/lang/StringComparisons.java b/test/micro/org/openjdk/bench/java/lang/StringComparisons.java new file mode 100644 index 0000000000000..b18b56bee2a67 --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/StringComparisons.java @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.java.lang; + +import org.openjdk.jmh.annotations.*; + +import java.util.concurrent.TimeUnit; + +/* + * This benchmark naively explores String::startsWith and other String + * comparison methods + */ +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Benchmark) +@Warmup(iterations = 5, time = 1) +@Measurement(iterations = 5, time = 1) +@Fork(value = 3) +public class StringComparisons { + + @Param({"6", "15", "1024"}) + public int size; + + @Param({"true", "false"}) + public boolean utf16; + + public String string; + public String equalString; + public String endsWithA; + public String endsWithB; + public String startsWithA; + + @Setup + public void setup() { + String c = utf16 ? "\uff11" : "c"; + string = c.repeat(size); + equalString = c.repeat(size); + endsWithA = c.repeat(size).concat("A"); + endsWithB = c.repeat(size).concat("B"); + startsWithA = "A" + (c.repeat(size)); + } + + @Benchmark + public boolean startsWith() { + return endsWithA.startsWith(string); + } + + @Benchmark + public boolean endsWith() { + return startsWithA.endsWith(string); + } + + @Benchmark + public boolean regionMatches() { + return endsWithA.regionMatches(0, endsWithB, 0, endsWithB.length()); + } + + @Benchmark + public boolean regionMatchesRange() { + return startsWithA.regionMatches(1, endsWithB, 0, endsWithB.length() - 1); + } + + @Benchmark + public boolean regionMatchesCI() { + return endsWithA.regionMatches(true, 0, endsWithB, 0, endsWithB.length()); + } +} diff --git a/test/micro/org/openjdk/bench/java/lang/StringOther.java b/test/micro/org/openjdk/bench/java/lang/StringOther.java index e6bee3f037690..99ebca8c3d1d1 100644 --- a/test/micro/org/openjdk/bench/java/lang/StringOther.java +++ b/test/micro/org/openjdk/bench/java/lang/StringOther.java @@ -49,17 +49,9 @@ public class StringOther { private String testString; private Random rnd; - private String str1, str2, str3, str4; - private String str1UP; - @Setup public void setup() { testString = "Idealism is what precedes experience; cynicism is what follows."; - str1 = "vm-guld vm-guld vm-guld"; - str1UP = str1.toUpperCase(Locale.ROOT); - str2 = "vm-guld vm-guld vm-guldx"; - str3 = "vm-guld vm-guld vm-guldx"; - str4 = "adadaskasdjierudks"; rnd = new Random(); } @@ -70,15 +62,6 @@ public void charAt(Blackhole bh) { } } - @Benchmark - public int compareTo() { - int total = 0; - total += str1.compareTo(str2); - total += str2.compareTo(str3); - total += str3.compareTo(str4); - return total; - } - /** * Creates (hopefully) unique Strings and internizes them, creating a zillion forgettable strings in the JVMs string * pool. @@ -94,10 +77,4 @@ public String internUnique() { return String.valueOf(rnd.nextInt()).intern(); } - @Benchmark - public void regionMatchesLatin1(Blackhole bh) { - bh.consume(str1.regionMatches(true, 0, str2, 0, str1.length())); - bh.consume(str2.regionMatches(true, 16, str1UP, 0, 8)); - bh.consume(str3.regionMatches(true, 6, str4, 1, 2)); - } }