2121 * questions.
2222 */
2323
24+ import java .io .ByteArrayOutputStream ;
2425import java .io .IOException ;
2526import java .io .InputStream ;
2627import java .nio .file .FileVisitResult ;
3536import java .util .zip .ZipEntry ;
3637import java .util .zip .ZipInputStream ;
3738
39+ import static java .util .Comparator .comparing ;
40+
3841/*
3942 * @test
4043 * @bug 8226346
4144 * @summary Check all output files for absolute path fragments
4245 * @requires !vm.debug
4346 * @comment ASAN keeps the 'unwanted' paths in the binaries because of its build options
4447 * @requires !vm.asan
45- * @run main/othervm -Xmx900m AbsPathsInImage
48+ * @run main AbsPathsInImage
4649 */
4750public class AbsPathsInImage {
4851
@@ -51,9 +54,14 @@ public class AbsPathsInImage {
5154 public static final String DIR_PROPERTY = "jdk.test.build.AbsPathsInImage.dir" ;
5255 private static final boolean IS_WINDOWS = System .getProperty ("os.name" ).toLowerCase ().contains ("windows" );
5356 private static final boolean IS_LINUX = System .getProperty ("os.name" ).toLowerCase ().contains ("linux" );
57+ private static final int DEFAULT_BUFFER_SIZE = 8192 ;
58+ private static List <byte []> searchPatterns = new ArrayList <>();
59+ private static List <int []> prefixTables = new ArrayList <>();
5460
5561 private boolean matchFound = false ;
5662
63+ record Match (int begin , int end ) { }
64+
5765 public static void main (String [] args ) throws Exception {
5866 String jdkPathString = System .getProperty ("test.jdk" );
5967 Path jdkHome = Paths .get (jdkPathString );
@@ -107,9 +115,9 @@ public static void main(String[] args) throws Exception {
107115 throw new Error ("Output root is not an absolute path: " + buildOutputRoot );
108116 }
109117
110- List < byte []> searchPatterns = new ArrayList <>( );
111- expandPatterns (searchPatterns , buildWorkspaceRoot );
112- expandPatterns ( searchPatterns , buildOutputRoot );
118+ expandPatterns ( buildWorkspaceRoot );
119+ expandPatterns (buildOutputRoot );
120+ createPrefixTables ( );
113121
114122 System .out .println ("Looking for:" );
115123 for (byte [] searchPattern : searchPatterns ) {
@@ -118,7 +126,7 @@ public static void main(String[] args) throws Exception {
118126 System .out .println ();
119127
120128 AbsPathsInImage absPathsInImage = new AbsPathsInImage ();
121- absPathsInImage .scanFiles (dirToScan , searchPatterns );
129+ absPathsInImage .scanFiles (dirToScan );
122130
123131 if (absPathsInImage .matchFound ) {
124132 throw new Exception ("Test failed" );
@@ -129,7 +137,7 @@ public static void main(String[] args) throws Exception {
129137 * Add path pattern to list of patterns to search for. Create all possible
130138 * variants depending on platform.
131139 */
132- private static void expandPatterns (List < byte []> searchPatterns , String pattern ) {
140+ private static void expandPatterns (String pattern ) {
133141 if (IS_WINDOWS ) {
134142 String forward = pattern .replace ('\\' , '/' );
135143 String back = pattern .replace ('/' , '\\' );
@@ -151,7 +159,42 @@ private static void expandPatterns(List<byte[]> searchPatterns, String pattern)
151159 }
152160 }
153161
154- private void scanFiles (Path root , List <byte []> searchPatterns ) throws IOException {
162+ /**
163+ * The failure function for KMP. Returns the correct index in the pattern to jump
164+ * back to when encountering a mismatched character. Used in both
165+ * createPrefixTables (pre-processing) and scanBytes (matching).
166+ */
167+ private static int getPrefixIndex (int patternIdx , int state , byte match ) {
168+ if (state == 0 ) {
169+ return 0 ;
170+ }
171+ byte [] searchPattern = searchPatterns .get (patternIdx );
172+ int [] prefixTable = prefixTables .get (patternIdx );
173+ int i = prefixTable [state - 1 ];
174+ while (i > 0 && searchPattern [i ] != match ) {
175+ i = prefixTable [i - 1 ];
176+ }
177+ return searchPattern [i ] == match ? i + 1 : i ;
178+ }
179+
180+ /**
181+ * Pre-processing string patterns for Knuth–Morris–Pratt (KMP) search algorithm.
182+ * Lookup tables of longest prefixes at each given index are created for each
183+ * search pattern string. These tables are later used in scanBytes during matching
184+ * as lookups for failure state transitions.
185+ */
186+ private static void createPrefixTables () {
187+ for (int patternIdx = 0 ; patternIdx < searchPatterns .size (); patternIdx ++) {
188+ int patternLen = searchPatterns .get (patternIdx ).length ;
189+ int [] prefixTable = new int [patternLen ];
190+ prefixTables .add (prefixTable );
191+ for (int i = 1 ; i < patternLen ; i ++) {
192+ prefixTable [i ] = getPrefixIndex (patternIdx , i , searchPatterns .get (patternIdx )[i ]);
193+ }
194+ }
195+ }
196+
197+ private void scanFiles (Path root ) throws IOException {
155198 Files .walkFileTree (root , new SimpleFileVisitor <>() {
156199 @ Override
157200 public FileVisitResult preVisitDirectory (Path dir , BasicFileAttributes attrs ) throws IOException {
@@ -170,84 +213,128 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IO
170213 } else if ((fileName .endsWith (".debuginfo" ) && !IS_LINUX ) || fileName .endsWith (".pdb" )) {
171214 // Do nothing
172215 } else if (fileName .endsWith (".zip" )) {
173- scanZipFile (file , searchPatterns );
216+ scanZipFile (file );
174217 } else {
175- scanFile (file , searchPatterns );
218+ scanFile (file );
176219 }
177220 return super .visitFile (file , attrs );
178221 }
179222 });
180223 }
181224
182- private void scanFile (Path file , List <byte []> searchPatterns ) throws IOException {
183- List <String > matches = scanBytes (Files .readAllBytes (file ), searchPatterns );
184- if (matches .size () > 0 ) {
185- matchFound = true ;
186- System .out .println (file + ":" );
187- for (String match : matches ) {
188- System .out .println (match );
189- }
190- System .out .println ();
225+ private void scanFile (Path file ) throws IOException {
226+ List <Match > matches ;
227+ try (InputStream inputStream = Files .newInputStream (file )) {
228+ matches = scanBytes (inputStream );
229+ }
230+ // test succeeds
231+ if (matches .size () == 0 ) {
232+ return ;
233+ }
234+ // test fails; pay penalty and re-scan file for debug output
235+ try (InputStream inputStream = Files .newInputStream (file )) {
236+ printDebugOutput (inputStream , matches , file + ":" );
191237 }
192238 }
193239
194- private void scanZipFile (Path zipFile , List <byte []> searchPatterns ) throws IOException {
240+ private void scanZipFile (Path zipFile ) throws IOException {
241+ List <List <Match >> entryMatches = new ArrayList <>();
242+ boolean found = false ;
243+ ZipEntry zipEntry ;
195244 try (ZipInputStream zipInputStream = new ZipInputStream (Files .newInputStream (zipFile ))) {
196- ZipEntry zipEntry ;
197245 while ((zipEntry = zipInputStream .getNextEntry ()) != null ) {
198- List <String > matches = scanBytes (zipInputStream . readAllBytes (), searchPatterns );
246+ List <Match > matches = scanBytes (zipInputStream );
199247 if (matches .size () > 0 ) {
200- matchFound = true ;
201- System .out .println (zipFile + ", " + zipEntry .getName () + ":" );
202- for (String match : matches ) {
203- System .out .println (match );
204- }
205- System .out .println ();
248+ entryMatches .add (matches );
249+ found = true ;
250+ } else {
251+ entryMatches .add (null );
252+ }
253+ }
254+ }
255+ // test succeeds
256+ if (!found ) {
257+ return ;
258+ }
259+ // test fails
260+ try (ZipInputStream zipInputStream = new ZipInputStream (Files .newInputStream (zipFile ))) {
261+ int i = 0 ;
262+ while ((zipEntry = zipInputStream .getNextEntry ()) != null ) {
263+ List <Match > matches = entryMatches .get (i );
264+ i ++;
265+ if (matches != null ) {
266+ printDebugOutput (zipInputStream , matches , zipFile + ", " + zipEntry .getName () + ":" );
206267 }
207268 }
208269 }
209270 }
210271
211- private List <String > scanBytes (byte [] data , List <byte []> searchPatterns ) {
212- List <String > matches = new ArrayList <>();
213- for (int i = 0 ; i < data .length ; i ++) {
214- for (byte [] searchPattern : searchPatterns ) {
215- boolean found = true ;
216- for (int j = 0 ; j < searchPattern .length ; j ++) {
217- if ((i + j >= data .length || data [i + j ] != searchPattern [j ])) {
218- found = false ;
272+ /**
273+ * Scans each byte until encounters a match with one of searchPatterns. Uses KMP to
274+ * perform matches. Keep track of current matched index (states) for each search
275+ * pattern. At each given byte, update states accordingly (increment if match or
276+ * failure function transition if mismatch). Returns a list of Match objects.
277+ */
278+ private List <Match > scanBytes (InputStream input ) throws IOException {
279+ List <Match > matches = new ArrayList <>();
280+ byte [] buf = new byte [DEFAULT_BUFFER_SIZE ];
281+ int [] states = new int [searchPatterns .size ()];
282+ int fileIdx = 0 ;
283+ int bytesRead , patternLen ;
284+ while ((bytesRead = input .read (buf )) != -1 ) {
285+ for (int bufIdx = 0 ; bufIdx < bytesRead ; bufIdx ++, fileIdx ++) {
286+ byte datum = buf [bufIdx ];
287+ for (int i = 0 ; i < searchPatterns .size (); i ++) {
288+ patternLen = searchPatterns .get (i ).length ;
289+ if (datum != searchPatterns .get (i )[states [i ]]) {
290+ states [i ] = getPrefixIndex (i , states [i ], datum );
291+ } else if (++states [i ] == patternLen ) {
292+ // technically at last match, state should reset according to failure function
293+ // but in original test, matching didn't search same string for multiple matches
294+ states [i ] = 0 ;
295+ matches .add (new Match (fileIdx - patternLen + 1 , fileIdx ));
219296 break ;
220297 }
221298 }
222- if (found ) {
223- matches .add (new String (data , charsStart (data , i ), charsOffset (data , i , searchPattern .length )));
224- // No need to search the same string for multiple patterns
225- break ;
226- }
227299 }
228300 }
229301 return matches ;
230302 }
231303
232- private int charsStart (byte [] data , int startIndex ) {
233- int index = startIndex ;
234- while (--index > 0 ) {
235- byte datum = data [index ];
236- if (datum < 32 || datum > 126 ) {
237- break ;
238- }
239- }
240- return index + 1 ;
241- }
242-
243- private int charsOffset (byte [] data , int startIndex , int startOffset ) {
244- int offset = startOffset ;
245- while (startIndex + ++offset < data .length ) {
246- byte datum = data [startIndex + offset ];
247- if (datum < 32 || datum > 126 ) {
248- break ;
304+ /**
305+ * In original test, failed test output would backtrack to last non-ascii byte on
306+ * matched pattern. This is incompatible with the new buffered approach (and a
307+ * proper solution requires a 2nd dynamic buffer). Instead, on failed test case,
308+ * files are scanned a 2nd time to print debug output. Failed runs will pay
309+ * additional performance/space penalty, but passing runs are faster.
310+ */
311+ private void printDebugOutput (InputStream input , List <Match > matches , final String HEADER ) throws IOException {
312+ matchFound = true ;
313+ System .out .println (HEADER );
314+ matches .sort (comparing (Match ::begin ));
315+ ByteArrayOutputStream output = new ByteArrayOutputStream ();
316+ byte [] buf = new byte [DEFAULT_BUFFER_SIZE ];
317+ int matchIdx = 0 ;
318+ int fileIdx = 0 ;
319+ int bytesRead ;
320+ while (matchIdx < matches .size () && (bytesRead = input .read (buf )) != -1 ) {
321+ for (int i = 0 ; matchIdx < matches .size () && i < bytesRead ; i ++, fileIdx ++) {
322+ byte datum = buf [i ];
323+ if (datum >= 32 && datum <= 126 ) {
324+ output .write (datum );
325+ } else if (fileIdx < matches .get (matchIdx ).begin ()) {
326+ output .reset ();
327+ } else if (fileIdx > matches .get (matchIdx ).end ()) {
328+ System .out .println (output .toString ());
329+ output .reset ();
330+ // This imperfect as incorrect in edge cases with patterns containing non-ascii?
331+ // but high-accuracy not priority + output still legible and useful
332+ for (; matchIdx < matches .size () && matches .get (matchIdx ).end () < fileIdx ; matchIdx ++);
333+ } else {
334+ output .write (datum );
335+ }
249336 }
250337 }
251- return offset ;
338+ System . out . println () ;
252339 }
253340}
0 commit comments