Skip to content

Commit c25f352

Browse files
cost0muchCesar Soares Lucas
authored andcommitted
8341735: Rewrite the build/AbsPathsInImage.java test to not load the entire file at once
Reviewed-by: erikj
1 parent ebf9c5b commit c25f352

File tree

1 file changed

+145
-58
lines changed

1 file changed

+145
-58
lines changed

test/jdk/build/AbsPathsInImage.java

Lines changed: 145 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
* questions.
2222
*/
2323

24+
import java.io.ByteArrayOutputStream;
2425
import java.io.IOException;
2526
import java.io.InputStream;
2627
import java.nio.file.FileVisitResult;
@@ -35,14 +36,16 @@
3536
import java.util.zip.ZipEntry;
3637
import java.util.zip.ZipInputStream;
3738

39+
import static java.util.Comparator.comparing;
40+
3841
/*
3942
* @test
4043
* @bug 8226346
4144
* @summary Check all output files for absolute path fragments
4245
* @requires !vm.debug
4346
* @comment ASAN keeps the 'unwanted' paths in the binaries because of its build options
4447
* @requires !vm.asan
45-
* @run main/othervm -Xmx900m AbsPathsInImage
48+
* @run main AbsPathsInImage
4649
*/
4750
public class AbsPathsInImage {
4851

@@ -51,9 +54,14 @@ public class AbsPathsInImage {
5154
public static final String DIR_PROPERTY = "jdk.test.build.AbsPathsInImage.dir";
5255
private static final boolean IS_WINDOWS = System.getProperty("os.name").toLowerCase().contains("windows");
5356
private static final boolean IS_LINUX = System.getProperty("os.name").toLowerCase().contains("linux");
57+
private static final int DEFAULT_BUFFER_SIZE = 8192;
58+
private static List<byte[]> searchPatterns = new ArrayList<>();
59+
private static List<int[]> prefixTables = new ArrayList<>();
5460

5561
private boolean matchFound = false;
5662

63+
record Match(int begin, int end) { }
64+
5765
public static void main(String[] args) throws Exception {
5866
String jdkPathString = System.getProperty("test.jdk");
5967
Path jdkHome = Paths.get(jdkPathString);
@@ -107,9 +115,9 @@ public static void main(String[] args) throws Exception {
107115
throw new Error("Output root is not an absolute path: " + buildOutputRoot);
108116
}
109117

110-
List<byte[]> searchPatterns = new ArrayList<>();
111-
expandPatterns(searchPatterns, buildWorkspaceRoot);
112-
expandPatterns(searchPatterns, buildOutputRoot);
118+
expandPatterns(buildWorkspaceRoot);
119+
expandPatterns(buildOutputRoot);
120+
createPrefixTables();
113121

114122
System.out.println("Looking for:");
115123
for (byte[] searchPattern : searchPatterns) {
@@ -118,7 +126,7 @@ public static void main(String[] args) throws Exception {
118126
System.out.println();
119127

120128
AbsPathsInImage absPathsInImage = new AbsPathsInImage();
121-
absPathsInImage.scanFiles(dirToScan, searchPatterns);
129+
absPathsInImage.scanFiles(dirToScan);
122130

123131
if (absPathsInImage.matchFound) {
124132
throw new Exception("Test failed");
@@ -129,7 +137,7 @@ public static void main(String[] args) throws Exception {
129137
* Add path pattern to list of patterns to search for. Create all possible
130138
* variants depending on platform.
131139
*/
132-
private static void expandPatterns(List<byte[]> searchPatterns, String pattern) {
140+
private static void expandPatterns(String pattern) {
133141
if (IS_WINDOWS) {
134142
String forward = pattern.replace('\\', '/');
135143
String back = pattern.replace('/', '\\');
@@ -151,7 +159,42 @@ private static void expandPatterns(List<byte[]> searchPatterns, String pattern)
151159
}
152160
}
153161

154-
private void scanFiles(Path root, List<byte[]> searchPatterns) throws IOException {
162+
/**
163+
* The failure function for KMP. Returns the correct index in the pattern to jump
164+
* back to when encountering a mismatched character. Used in both
165+
* createPrefixTables (pre-processing) and scanBytes (matching).
166+
*/
167+
private static int getPrefixIndex(int patternIdx, int state, byte match) {
168+
if (state == 0) {
169+
return 0;
170+
}
171+
byte[] searchPattern = searchPatterns.get(patternIdx);
172+
int[] prefixTable = prefixTables.get(patternIdx);
173+
int i = prefixTable[state - 1];
174+
while (i > 0 && searchPattern[i] != match) {
175+
i = prefixTable[i - 1];
176+
}
177+
return searchPattern[i] == match ? i + 1 : i;
178+
}
179+
180+
/**
181+
* Pre-processing string patterns for Knuth–Morris–Pratt (KMP) search algorithm.
182+
* Lookup tables of longest prefixes at each given index are created for each
183+
* search pattern string. These tables are later used in scanBytes during matching
184+
* as lookups for failure state transitions.
185+
*/
186+
private static void createPrefixTables() {
187+
for (int patternIdx = 0; patternIdx < searchPatterns.size(); patternIdx++) {
188+
int patternLen = searchPatterns.get(patternIdx).length;
189+
int[] prefixTable = new int[patternLen];
190+
prefixTables.add(prefixTable);
191+
for (int i = 1; i < patternLen; i++) {
192+
prefixTable[i] = getPrefixIndex(patternIdx, i, searchPatterns.get(patternIdx)[i]);
193+
}
194+
}
195+
}
196+
197+
private void scanFiles(Path root) throws IOException {
155198
Files.walkFileTree(root, new SimpleFileVisitor<>() {
156199
@Override
157200
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
@@ -170,84 +213,128 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
170213
} else if ((fileName.endsWith(".debuginfo") && !IS_LINUX) || fileName.endsWith(".pdb")) {
171214
// Do nothing
172215
} else if (fileName.endsWith(".zip")) {
173-
scanZipFile(file, searchPatterns);
216+
scanZipFile(file);
174217
} else {
175-
scanFile(file, searchPatterns);
218+
scanFile(file);
176219
}
177220
return super.visitFile(file, attrs);
178221
}
179222
});
180223
}
181224

182-
private void scanFile(Path file, List<byte[]> searchPatterns) throws IOException {
183-
List<String> matches = scanBytes(Files.readAllBytes(file), searchPatterns);
184-
if (matches.size() > 0) {
185-
matchFound = true;
186-
System.out.println(file + ":");
187-
for (String match : matches) {
188-
System.out.println(match);
189-
}
190-
System.out.println();
225+
private void scanFile(Path file) throws IOException {
226+
List<Match> matches;
227+
try (InputStream inputStream = Files.newInputStream(file)) {
228+
matches = scanBytes(inputStream);
229+
}
230+
// test succeeds
231+
if (matches.size() == 0) {
232+
return;
233+
}
234+
// test fails; pay penalty and re-scan file for debug output
235+
try (InputStream inputStream = Files.newInputStream(file)) {
236+
printDebugOutput(inputStream, matches, file + ":");
191237
}
192238
}
193239

194-
private void scanZipFile(Path zipFile, List<byte[]> searchPatterns) throws IOException {
240+
private void scanZipFile(Path zipFile) throws IOException {
241+
List<List<Match>> entryMatches = new ArrayList<>();
242+
boolean found = false;
243+
ZipEntry zipEntry;
195244
try (ZipInputStream zipInputStream = new ZipInputStream(Files.newInputStream(zipFile))) {
196-
ZipEntry zipEntry;
197245
while ((zipEntry = zipInputStream.getNextEntry()) != null) {
198-
List<String> matches = scanBytes(zipInputStream.readAllBytes(), searchPatterns);
246+
List<Match> matches = scanBytes(zipInputStream);
199247
if (matches.size() > 0) {
200-
matchFound = true;
201-
System.out.println(zipFile + ", " + zipEntry.getName() + ":");
202-
for (String match : matches) {
203-
System.out.println(match);
204-
}
205-
System.out.println();
248+
entryMatches.add(matches);
249+
found = true;
250+
} else {
251+
entryMatches.add(null);
252+
}
253+
}
254+
}
255+
// test succeeds
256+
if (!found) {
257+
return;
258+
}
259+
// test fails
260+
try (ZipInputStream zipInputStream = new ZipInputStream(Files.newInputStream(zipFile))) {
261+
int i = 0;
262+
while ((zipEntry = zipInputStream.getNextEntry()) != null) {
263+
List<Match> matches = entryMatches.get(i);
264+
i++;
265+
if (matches != null) {
266+
printDebugOutput(zipInputStream, matches, zipFile + ", " + zipEntry.getName() + ":");
206267
}
207268
}
208269
}
209270
}
210271

211-
private List<String> scanBytes(byte[] data, List<byte[]> searchPatterns) {
212-
List<String> matches = new ArrayList<>();
213-
for (int i = 0; i < data.length; i++) {
214-
for (byte[] searchPattern : searchPatterns) {
215-
boolean found = true;
216-
for (int j = 0; j < searchPattern.length; j++) {
217-
if ((i + j >= data.length || data[i + j] != searchPattern[j])) {
218-
found = false;
272+
/**
273+
* Scans each byte until encounters a match with one of searchPatterns. Uses KMP to
274+
* perform matches. Keep track of current matched index (states) for each search
275+
* pattern. At each given byte, update states accordingly (increment if match or
276+
* failure function transition if mismatch). Returns a list of Match objects.
277+
*/
278+
private List<Match> scanBytes(InputStream input) throws IOException {
279+
List<Match> matches = new ArrayList<>();
280+
byte[] buf = new byte[DEFAULT_BUFFER_SIZE];
281+
int[] states = new int[searchPatterns.size()];
282+
int fileIdx = 0;
283+
int bytesRead, patternLen;
284+
while ((bytesRead = input.read(buf)) != -1) {
285+
for (int bufIdx = 0; bufIdx < bytesRead; bufIdx++, fileIdx++) {
286+
byte datum = buf[bufIdx];
287+
for (int i = 0; i < searchPatterns.size(); i++) {
288+
patternLen = searchPatterns.get(i).length;
289+
if (datum != searchPatterns.get(i)[states[i]]) {
290+
states[i] = getPrefixIndex(i, states[i], datum);
291+
} else if (++states[i] == patternLen) {
292+
// technically at last match, state should reset according to failure function
293+
// but in original test, matching didn't search same string for multiple matches
294+
states[i] = 0;
295+
matches.add(new Match(fileIdx - patternLen + 1, fileIdx));
219296
break;
220297
}
221298
}
222-
if (found) {
223-
matches.add(new String(data, charsStart(data, i), charsOffset(data, i, searchPattern.length)));
224-
// No need to search the same string for multiple patterns
225-
break;
226-
}
227299
}
228300
}
229301
return matches;
230302
}
231303

232-
private int charsStart(byte[] data, int startIndex) {
233-
int index = startIndex;
234-
while (--index > 0) {
235-
byte datum = data[index];
236-
if (datum < 32 || datum > 126) {
237-
break;
238-
}
239-
}
240-
return index + 1;
241-
}
242-
243-
private int charsOffset(byte[] data, int startIndex, int startOffset) {
244-
int offset = startOffset;
245-
while (startIndex + ++offset < data.length) {
246-
byte datum = data[startIndex + offset];
247-
if (datum < 32 || datum > 126) {
248-
break;
304+
/**
305+
* In original test, failed test output would backtrack to last non-ascii byte on
306+
* matched pattern. This is incompatible with the new buffered approach (and a
307+
* proper solution requires a 2nd dynamic buffer). Instead, on failed test case,
308+
* files are scanned a 2nd time to print debug output. Failed runs will pay
309+
* additional performance/space penalty, but passing runs are faster.
310+
*/
311+
private void printDebugOutput(InputStream input, List<Match> matches, final String HEADER) throws IOException{
312+
matchFound = true;
313+
System.out.println(HEADER);
314+
matches.sort(comparing(Match::begin));
315+
ByteArrayOutputStream output = new ByteArrayOutputStream();
316+
byte[] buf = new byte[DEFAULT_BUFFER_SIZE];
317+
int matchIdx = 0;
318+
int fileIdx = 0;
319+
int bytesRead;
320+
while (matchIdx < matches.size() && (bytesRead = input.read(buf)) != -1) {
321+
for (int i = 0; matchIdx < matches.size() && i < bytesRead; i++, fileIdx++) {
322+
byte datum = buf[i];
323+
if (datum >= 32 && datum <= 126) {
324+
output.write(datum);
325+
} else if (fileIdx < matches.get(matchIdx).begin()) {
326+
output.reset();
327+
} else if (fileIdx > matches.get(matchIdx).end()) {
328+
System.out.println(output.toString());
329+
output.reset();
330+
// This imperfect as incorrect in edge cases with patterns containing non-ascii?
331+
// but high-accuracy not priority + output still legible and useful
332+
for (; matchIdx < matches.size() && matches.get(matchIdx).end() < fileIdx; matchIdx++);
333+
} else {
334+
output.write(datum);
335+
}
249336
}
250337
}
251-
return offset;
338+
System.out.println();
252339
}
253340
}

0 commit comments

Comments
 (0)