Skip to content

Commit 9540a1f

Browse files
committed
performance refine for IOUtils.contentEquals(Reader, Reader)
1 parent f7efc7b commit 9540a1f

File tree

9 files changed

+2017
-31
lines changed

9 files changed

+2017
-31
lines changed

pom.xml

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,18 @@ file comparators, endian transformation classes, and much more.
242242
</dependencyManagement>
243243

244244
<dependencies>
245+
<dependency>
246+
<groupId>org.openjdk.jmh</groupId>
247+
<artifactId>jmh-core</artifactId>
248+
<version>${jmh.version}</version>
249+
<scope>test</scope>
250+
</dependency>
251+
<dependency>
252+
<groupId>org.openjdk.jmh</groupId>
253+
<artifactId>jmh-generator-annprocess</artifactId>
254+
<version>${jmh.version}</version>
255+
<scope>test</scope>
256+
</dependency>
245257
<dependency>
246258
<groupId>org.junit.jupiter</groupId>
247259
<artifactId>junit-jupiter</artifactId>
@@ -312,6 +324,7 @@ file comparators, endian transformation classes, and much more.
312324
<commons.release.isDistModule>true</commons.release.isDistModule>
313325
<commons.releaseManagerName>Gary Gregory</commons.releaseManagerName>
314326
<commons.releaseManagerKey>86fdc7e2a11262cb</commons.releaseManagerKey>
327+
<jmh.version>1.21</jmh.version>
315328
</properties>
316329

317330
<build>
@@ -549,5 +562,44 @@ file comparators, endian transformation classes, and much more.
549562
<coveralls.skip>true</coveralls.skip>
550563
</properties>
551564
</profile>
565+
<profile>
566+
<id>benchmark</id>
567+
<properties>
568+
<skipTests>true</skipTests>
569+
<benchmark>org.apache</benchmark>
570+
</properties>
571+
<build>
572+
<plugins>
573+
<plugin>
574+
<groupId>org.codehaus.mojo</groupId>
575+
<artifactId>exec-maven-plugin</artifactId>
576+
<version>1.6.0</version>
577+
<executions>
578+
<execution>
579+
<id>benchmark</id>
580+
<phase>test</phase>
581+
<goals>
582+
<goal>exec</goal>
583+
</goals>
584+
<configuration>
585+
<classpathScope>test</classpathScope>
586+
<executable>java</executable>
587+
<arguments>
588+
<argument>-classpath</argument>
589+
<classpath/>
590+
<argument>org.openjdk.jmh.Main</argument>
591+
<argument>-rf</argument>
592+
<argument>json</argument>
593+
<argument>-rff</argument>
594+
<argument>target/jmh-result.${benchmark}.json</argument>
595+
<argument>${benchmark}</argument>
596+
</arguments>
597+
</configuration>
598+
</execution>
599+
</executions>
600+
</plugin>
601+
</plugins>
602+
</build>
603+
</profile>
552604
</profiles>
553605
</project>

src/main/java/org/apache/commons/io/IOUtils.java

Lines changed: 191 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
import java.util.function.Consumer;
5151

5252
import org.apache.commons.io.function.IOConsumer;
53+
import org.apache.commons.io.input.buffer.LineEndUnifiedBufferedReader;
5354
import org.apache.commons.io.output.AppendableWriter;
5455
import org.apache.commons.io.output.ByteArrayOutputStream;
5556
import org.apache.commons.io.output.NullOutputStream;
@@ -157,14 +158,14 @@ public class IOUtils {
157158

158159
/**
159160
* The Unix line separator string.
160-
*
161+
*
161162
* @see StandardLineSeparator#LF
162163
*/
163164
public static final String LINE_SEPARATOR_UNIX = StandardLineSeparator.LF.getString();
164165

165166
/**
166167
* The Windows line separator string.
167-
*
168+
*
168169
* @see StandardLineSeparator#CRLF
169170
*/
170171
public static final String LINE_SEPARATOR_WINDOWS = StandardLineSeparator.CRLF.getString();
@@ -745,23 +746,48 @@ public static long consume(final InputStream input)
745746
@SuppressWarnings("resource")
746747
public static boolean contentEquals(final InputStream input1, final InputStream input2)
747748
throws IOException {
749+
// see comments in public static boolean contentEquals(final Reader input1, final Reader input2)
750+
// this function is mirror to it.
748751
if (input1 == input2) {
749752
return true;
750753
}
751754
if (input1 == null ^ input2 == null) {
752755
return false;
753756
}
754-
final BufferedInputStream bufferedInput1 = buffer(input1);
755-
final BufferedInputStream bufferedInput2 = buffer(input2);
756-
int ch = bufferedInput1.read();
757-
while (EOF != ch) {
758-
final int ch2 = bufferedInput2.read();
759-
if (ch != ch2) {
760-
return false;
757+
758+
byte[] byteArray1 = new byte[DEFAULT_BUFFER_SIZE];
759+
byte[] byteArray2 = new byte[DEFAULT_BUFFER_SIZE];
760+
int nowPos1;
761+
int nowPos2;
762+
int nowRead1;
763+
int nowRead2;
764+
while (true) {
765+
nowPos1 = 0;
766+
nowPos2 = 0;
767+
for (int nowCheck = 0; nowCheck < DEFAULT_BUFFER_SIZE; nowCheck++) {
768+
if (nowPos1 == nowCheck) {
769+
do {
770+
nowRead1 = input1.read(byteArray1, nowPos1, DEFAULT_BUFFER_SIZE - nowPos1);
771+
} while (nowRead1 == 0);
772+
if (nowRead1 == EOF) {
773+
return nowPos2 == nowCheck && input2.read() == EOF;
774+
}
775+
nowPos1 += nowRead1;
776+
}
777+
if (nowPos2 == nowCheck) {
778+
do {
779+
nowRead2 = input2.read(byteArray2, nowPos2, DEFAULT_BUFFER_SIZE - nowPos2);
780+
} while (nowRead2 == 0);
781+
if (nowRead2 == EOF) {
782+
return nowPos1 == nowCheck && input1.read() == EOF;
783+
}
784+
nowPos2 += nowRead2;
785+
}
786+
if (byteArray1[nowCheck] != byteArray2[nowCheck]) {
787+
return false;
788+
}
761789
}
762-
ch = bufferedInput1.read();
763790
}
764-
return bufferedInput2.read() == EOF;
765791
}
766792

767793
/**
@@ -789,19 +815,79 @@ public static boolean contentEquals(final Reader reader1, final Reader reader2)
789815
if (reader1 == null ^ reader2 == null) {
790816
return false;
791817
}
792-
final BufferedReader bufferedInput1 = toBufferedReader(reader1);
793-
final BufferedReader bufferedInput2 = toBufferedReader(reader2);
794818

795-
int ch = bufferedInput1.read();
796-
while (EOF != ch) {
797-
final int ch2 = bufferedInput2.read();
798-
if (ch != ch2) {
799-
return false;
819+
// char buffer array for input1
820+
char[] charArray1 = new char[DEFAULT_BUFFER_SIZE];
821+
// char buffer array for input2
822+
char[] charArray2 = new char[DEFAULT_BUFFER_SIZE];
823+
824+
// the current last-index of chars read to charArray1 from input1
825+
int nowPos1;
826+
// the current last-index of chars read to charArray2 from input2
827+
int nowPos2;
828+
// the chars read this time.
829+
int nowRead;
830+
while (true) {
831+
nowPos1 = 0;
832+
nowPos2 = 0;
833+
/*
834+
* For better performance, this loop is special designed.
835+
* Since input1 and input2's content must be equal to return true,
836+
* we share the index used in the two char buffers,
837+
* by simply make it from 0 to DEFAULT_BUFFER_SIZE, means 8192.
838+
* Every time it read, it read as long as possible, both limited by the input reader itself,
839+
* and the remaining length of this array.
840+
* The performance of the following loop can be proved simply.
841+
* 1. If the reader can read only several chars during one read() call:
842+
* then we only invert it every 8192 times, thus it will not be time costing.
843+
* 2. If the reader can read many chars during one read() call:
844+
* then it will be filled fast, and also will not be time costing.
845+
*/
846+
for (int nowCheck = 0; nowCheck < DEFAULT_BUFFER_SIZE; nowCheck++) {
847+
if (nowPos1 == nowCheck) {
848+
// if nowPos1 == nowCheck,
849+
// then means charArray1[nowCheck]
850+
// is empty now, thus we need to invoke read on input1 first.
851+
do {
852+
// read as many chars as possible, using the remaining spaces of charArray1.
853+
nowRead = reader1.read(charArray1, nowPos1, DEFAULT_BUFFER_SIZE - nowPos1);
854+
} while (nowRead == 0);
855+
if (nowRead == EOF) {
856+
// if input1 ends, then we check if input2 ends too.
857+
// if nowPos2 == nowCheck && input2.read() == EOF,
858+
// we think input2 have no more chars,
859+
// and cannot read more either,
860+
// thus return true.
861+
// otherwise return false.
862+
return nowPos2 == nowCheck && reader2.read() == EOF;
863+
}
864+
nowPos1 += nowRead;
865+
}
866+
if (nowPos2 == nowCheck) {
867+
// if nowPos1 == nowCheck,
868+
// then means charArray1[nowCheck]
869+
// is empty now, thus we need to invoke read on input1 first.
870+
do {
871+
// read as many chars as possible, using the remaining spaces of charArray2.
872+
nowRead = reader2.read(charArray2, nowPos2, DEFAULT_BUFFER_SIZE - nowPos2);
873+
} while (nowRead == 0);
874+
if (nowRead == EOF) {
875+
// if input2 ends, then we check if input1 ends too.
876+
// if nowPos1 == nowCheck && input1.read() == EOF,
877+
// we think input1 have no more chars,
878+
// and cannot read more either,
879+
// thus return true.
880+
// otherwise return false.
881+
return nowPos1 == nowCheck && reader1.read() == EOF;
882+
}
883+
nowPos2 += nowRead;
884+
}
885+
// now we have
886+
if (charArray1[nowCheck] != charArray2[nowCheck]) {
887+
return false;
888+
}
800889
}
801-
ch = bufferedInput1.read();
802890
}
803-
804-
return bufferedInput2.read() == EOF;
805891
}
806892

807893
/**
@@ -827,16 +913,90 @@ public static boolean contentEqualsIgnoreEOL(final Reader reader1, final Reader
827913
if (reader1 == null ^ reader2 == null) {
828914
return false;
829915
}
830-
final BufferedReader br1 = toBufferedReader(reader1);
831-
final BufferedReader br2 = toBufferedReader(reader2);
832916

833-
String line1 = br1.readLine();
834-
String line2 = br2.readLine();
835-
while (line1 != null && line1.equals(line2)) {
836-
line1 = br1.readLine();
837-
line2 = br2.readLine();
917+
final LineEndUnifiedBufferedReader bufferedInput1;
918+
if (reader1 instanceof LineEndUnifiedBufferedReader) {
919+
bufferedInput1 = (LineEndUnifiedBufferedReader) reader1;
920+
} else {
921+
bufferedInput1 = new LineEndUnifiedBufferedReader(reader1);
922+
}
923+
924+
final LineEndUnifiedBufferedReader bufferedInput2;
925+
if (reader2 instanceof LineEndUnifiedBufferedReader) {
926+
bufferedInput2 = (LineEndUnifiedBufferedReader) reader2;
927+
} else {
928+
bufferedInput2 = new LineEndUnifiedBufferedReader(reader2);
929+
}
930+
931+
/*
932+
* We use this variable to mark if last char be '\n'.
933+
* Because "a" and "a\n" is thought contentEqualsIgnoreEOL,
934+
* but "\n" and "\n\n" is thought not contentEqualsIgnoreEOL.
935+
*/
936+
boolean justNewLine = true;
937+
938+
int currentChar1;
939+
int currentChar2;
940+
941+
while (true) {
942+
currentChar1 = bufferedInput1.peek();
943+
currentChar2 = bufferedInput2.peek();
944+
945+
if (currentChar1 == EOF) {
946+
if (currentChar2 == EOF) {
947+
return true;
948+
} else {
949+
if (!justNewLine) {
950+
return inputOnlyHaveCRLForEOF( bufferedInput2, currentChar2);
951+
}
952+
return false;
953+
}
954+
} else if (currentChar2 == EOF) {
955+
if (!justNewLine) {
956+
return inputOnlyHaveCRLForEOF(bufferedInput1, currentChar1);
957+
}
958+
return false;
959+
}
960+
if (currentChar1 != currentChar2) {
961+
return false;
962+
}
963+
justNewLine = currentChar1 == '\n';
964+
bufferedInput1.eat();
965+
bufferedInput2.eat();
966+
}
967+
}
968+
969+
/**
970+
* private function used only in contentEqualsIgnoreEOL.
971+
* used in contentEqualsIgnoreEOL to detect whether a input only have CRLF or EOF.
972+
* @param input input reader
973+
* @param currentChar current peek char of input
974+
* @return true/false
975+
* @throws IOException by input.read(), not me.
976+
* @see #contentEqualsIgnoreEOL(Reader, Reader)
977+
*/
978+
private static boolean inputOnlyHaveCRLForEOF(LineEndUnifiedBufferedReader input, int currentChar) throws IOException {
979+
980+
/*
981+
* logically there should be some code like
982+
*
983+
* if (char1 == EOF) {
984+
* return true;
985+
* }
986+
*
987+
* here.
988+
*
989+
* But actually, if this input's read() is EOF, then we will not invoke this function at all.
990+
* So the check is deleted.
991+
*
992+
* You can go contentEqualsIgnoreEOL for details.
993+
*/
994+
995+
if (currentChar == '\n') {
996+
input.eat();
997+
return input.read() == EOF;
838998
}
839-
return Objects.equals(line1, line2);
999+
return false;
8401000
}
8411001

8421002
/**
@@ -1154,7 +1314,7 @@ public static long copyLarge(final InputStream inputStream, final OutputStream o
11541314
* </p>
11551315
*
11561316
* @param inputStream the <code>InputStream</code> to read, may be {@code null}.
1157-
* @param outputStream the <code>OutputStream</code> to write
1317+
* @param outputStream the <code>OutputStream</code> to write
11581318
* @param buffer the buffer to use for the copy
11591319
* @return the number of bytes copied. or {@code 0} if {@code input} is {@code null}.
11601320
* @throws NullPointerException if the OutputStream is {@code null}.
@@ -3382,7 +3542,7 @@ public static Writer writer(final Appendable appendable) {
33823542
* Instances should NOT be constructed in standard programming.
33833543
*/
33843544
public IOUtils() { //NOSONAR
3385-
3545+
33863546
}
33873547

33883548
}

0 commit comments

Comments
 (0)