From 4a365aa64e36658a12de0a3148fec4d17349b5ab Mon Sep 17 00:00:00 2001 From: Gabriel Einsdorf Date: Mon, 15 Oct 2018 17:10:41 +0200 Subject: [PATCH 1/2] DataHandle: fix findString(..) and support handles with unknown length The findString(..) method used to require the exact length of the handle, this length can be unknown, e.g. for compressed handles. This is no longer the case. --- .../org/scijava/io/handle/DataHandle.java | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/main/java/org/scijava/io/handle/DataHandle.java b/src/main/java/org/scijava/io/handle/DataHandle.java index ccb93d434..c7cbfb654 100644 --- a/src/main/java/org/scijava/io/handle/DataHandle.java +++ b/src/main/java/org/scijava/io/handle/DataHandle.java @@ -350,10 +350,7 @@ default String findString(final boolean saveString, final int blockSize, final StringBuilder out = new StringBuilder(); final long startPos = offset(); long bytesDropped = 0; - final long inputLen = length(); - long maxLen = inputLen - startPos; - final boolean tooLong = saveString && maxLen > MAX_SEARCH_SIZE; - if (tooLong) maxLen = MAX_SEARCH_SIZE; + final long maxLen = saveString ? MAX_SEARCH_SIZE : Long.MAX_VALUE; boolean match = false; int maxTermLen = 0; for (final String term : terminators) { @@ -366,7 +363,10 @@ default String findString(final boolean saveString, final int blockSize, new DataHandleInputStream<>(this), getEncoding()); final char[] buf = new char[blockSize]; long loc = 0; - while (loc < maxLen && offset() < length() - 1) { + int r = 0; + + // NB: we need at least 2 bytes to read a char + while (loc < maxLen && ((r = in.read(buf, 0, blockSize)) > 1)) { // if we're not saving the string, drop any old, unnecessary output if (!saveString) { final int outLen = out.length(); @@ -378,16 +378,12 @@ default String findString(final boolean saveString, final int blockSize, bytesDropped += dropIndex; } } - - // read block from stream - final int r = in.read(buf, 0, blockSize); - if (r <= 0) throw new IOException("Cannot read from stream: " + r); - // append block to output out.append(buf, 0, r); // check output, returning smallest possible string - int min = Integer.MAX_VALUE, tagLen = 0; + int min = Integer.MAX_VALUE; + int tagLen = 0; for (final String t : terminators) { final int len = t.length(); final int start = (int) (loc - bytesDropped - len); @@ -415,7 +411,9 @@ default String findString(final boolean saveString, final int blockSize, } // no match - if (tooLong) throw new IOException("Maximum search length reached."); + if (loc > MAX_SEARCH_SIZE) { + throw new IOException("Maximum search length reached."); + } return saveString ? out.toString() : null; } From a922e158a31e3a0469cc090cdad5ea67ac7f5cc7 Mon Sep 17 00:00:00 2001 From: Gabriel Einsdorf Date: Tue, 16 Oct 2018 10:59:55 +0200 Subject: [PATCH 2/2] Add test for edge cases in the DataHandle default methods --- .../io/handle/DataHandleEdgeCaseTests.java | 142 ++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 src/test/java/org/scijava/io/handle/DataHandleEdgeCaseTests.java diff --git a/src/test/java/org/scijava/io/handle/DataHandleEdgeCaseTests.java b/src/test/java/org/scijava/io/handle/DataHandleEdgeCaseTests.java new file mode 100644 index 000000000..268faee59 --- /dev/null +++ b/src/test/java/org/scijava/io/handle/DataHandleEdgeCaseTests.java @@ -0,0 +1,142 @@ +/*- + * #%L + * SciJava Common shared library for SciJava software. + * %% + * Copyright (C) 2009 - 2018 Board of Regents of the University of + * Wisconsin-Madison, Broad Institute of MIT and Harvard, Max Planck + * Institute of Molecular Cell Biology and Genetics, University of + * Konstanz, and KNIME GmbH. + * %% + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * #L% + */ + +package org.scijava.io.handle; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; + +import java.io.IOException; +import java.util.Arrays; + +import org.junit.Test; +import org.scijava.Context; +import org.scijava.io.location.BytesLocation; +import org.scijava.io.location.Location; + +/** + * Additional Tests for edge case behavior of {@link DataHandle}. + * + * @author Gabriel Einsdorf + */ +public class DataHandleEdgeCaseTests { + + private static final byte[] BYTES = { // + 'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '\n', // + 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, -128, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // + 125, 127, -127, -125, -3, 'h', 'e' }; + + /** + * Test to ensure {@link DataHandle#findString(String...)} and + * {@link DataHandle#readCString()} work with {@link DataHandle} + * implementations that have unknown length. + * + * @throws IOException + */ + @Test + public void testFindStringsOnUnknonwLengthHandle() throws IOException { + final Context ctx = new Context(); + final DataHandleService dhs = ctx.getService(DataHandleService.class); + final DummyHandle dummy = new DummyHandle(dhs.create(new BytesLocation( + BYTES))); + + assertEquals("Hello,", dummy.findString(",")); + assertEquals(" world\n", dummy.findString("\n")); + + dummy.seek(41); + assertEquals("he", dummy.findString("\n")); + + dummy.seek(16); + assertArrayEquals(Arrays.copyOfRange(BYTES, 16, 23), dummy.readCString() + .getBytes()); + dummy.seek(42); + assertNull(dummy.readCString()); + } + + private class DummyHandle extends AbstractHigherOrderHandle { + + public DummyHandle(final DataHandle handle) { + super(handle); + } + + @Override + public long length() throws IOException { + return -1; + } + + @Override + public long offset() throws IOException { + return handle().offset(); + } + + @Override + public void seek(final long pos) throws IOException { + handle().seek(pos); + } + + @Override + public void setLength(final long length) throws IOException { + handle().setLength(length); + } + + @Override + public int read(final byte[] b, final int off, final int len) + throws IOException + { + return handle().read(b, off, len); + } + + @Override + public byte readByte() throws IOException { + return handle().readByte(); + } + + @Override + public void write(final int b) throws IOException { + handle().write(b); + } + + @Override + public void write(final byte[] b, final int off, final int len) + throws IOException + { + handle().write(b, off, len); + } + + @Override + protected void cleanup() throws IOException { + // + } + } + +}