Skip to content

Commit 3072544

Browse files
committed
Refactor DefaultSplitCharacter#checkDatePattern to increase perfomance
DEVSIX-4680
1 parent 43bf5b7 commit 3072544

File tree

3 files changed

+5062
-5
lines changed

3 files changed

+5062
-5
lines changed

itext/src/main/java/com/itextpdf/text/pdf/DefaultSplitCharacter.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@
6060
*/
6161
public class DefaultSplitCharacter implements SplitCharacter {
6262

63+
private static final Pattern DATE_PATTERN = Pattern.compile("(\\d{2,4}-\\d{2}-\\d{2,4})");
64+
6365
/**
6466
* An instance of the default SplitCharacter.
6567
*/
@@ -154,11 +156,12 @@ protected char getCurrentCharacter(int current, char[] cc, PdfChunk[] ck) {
154156
}
155157

156158
private char[] checkDatePattern(String data) {
157-
String regex = "(\\d{2,4}-\\d{2}-\\d{2,4})";
158-
Matcher m = Pattern.compile(regex).matcher(data);
159-
if (m.find()) {
160-
String tmpData = m.group(1).replace('-', '\u2011');
161-
data = data.replaceAll(m.group(1), tmpData);
159+
if (data.contains("-")) {
160+
Matcher m = DATE_PATTERN.matcher(data);
161+
if (m.find()) {
162+
String tmpData = m.group(1).replace('-', '\u2011');
163+
data = data.replaceAll(m.group(1), tmpData);
164+
}
162165
}
163166
return data.toCharArray();
164167
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
package com.itextpdf.text.pdf;
2+
3+
import java.io.BufferedReader;
4+
import java.io.FileReader;
5+
import java.text.MessageFormat;
6+
import org.junit.Assert;
7+
import org.junit.Test;
8+
9+
public class DefaultSplitCharacterProfilingTest {
10+
11+
private static final String INPUT_DIR = "./src/test/resources/com/itextpdf/text/pdf/DefaultSplitCharacterProfilingTest/";
12+
13+
private static final String CHECK_DATE_PATTERN_FAIL_MESSAGE =
14+
"The test verifies the optimization of the checkDatePattern method. This failure indicates that the optimization was broken.";
15+
16+
private static final String READ_FILE_FAIL_MESSAGE = "Failed to read test file {0}. The test could not be completed.";
17+
18+
private static final int TIME_LIMIT = 20000;
19+
20+
@Test(timeout = 30000)
21+
public void checkDatePatternProfilingTest() {
22+
String testFile = INPUT_DIR + "profilingText.txt";
23+
String str = readFile(testFile);
24+
if (str == null) {
25+
Assert.fail(MessageFormat.format(READ_FILE_FAIL_MESSAGE, testFile));
26+
}
27+
long startTime = System.currentTimeMillis();
28+
for (int i = 0; i < 70000; i++) {
29+
isSplitCharacter(str);
30+
}
31+
long time = System.currentTimeMillis() - startTime;
32+
System.out.println("Test run time: " + time);
33+
Assert.assertTrue(CHECK_DATE_PATTERN_FAIL_MESSAGE, time < TIME_LIMIT);
34+
}
35+
36+
private static void isSplitCharacter(String text) {
37+
new DefaultSplitCharacter().isSplitCharacter(0, 0, text.length() + 1, text.toCharArray(), null);
38+
}
39+
40+
private static String readFile(String fileName) {
41+
StringBuilder stringBuilder = new StringBuilder();
42+
try {
43+
BufferedReader reader = new BufferedReader(new FileReader(fileName));
44+
String line;
45+
while ((line = reader.readLine()) != null) {
46+
stringBuilder.append(line);
47+
}
48+
reader.close();
49+
return stringBuilder.toString();
50+
} catch (Exception e) {
51+
return null;
52+
}
53+
}
54+
}

0 commit comments

Comments
 (0)