Skip to content

Commit

Permalink
Added new catch block for character encoding error cases.
Browse files Browse the repository at this point in the history
Added a new catch block in `PathLineIterator` for character encoding
errors, along with a new error message to be given to the user for such
cases.

Added unit test for malformed xsv locatable files.

Fixes #4006
  • Loading branch information
jonn-smith committed Aug 20, 2018
1 parent 12104fc commit ab38fe9
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ public SimpleKeyXsvFuncotationFactory(final String name,
final Iterator<String> it = pathLineIterator.iterator();

// Get our column names:
annotationColumnNames = createColumnNames( it, numHeaderLinesToIgnore );
annotationColumnNames = createColumnNames(it, numHeaderLinesToIgnore);

// Populate our empty annotation list:
emptyAnnotationList = new ArrayList<>(annotationColumnNames.size());
Expand All @@ -139,7 +139,7 @@ public SimpleKeyXsvFuncotationFactory(final String name,
}

// Populate our annotation map:
populateAnnotationMap( it, permissiveColumns );
populateAnnotationMap(it, permissiveColumns);
}

// Initialize overrides / defaults:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,13 @@ public XsvTableFeature(final int contigColumn, final int startColumn, final int
this.dataSourceName = dataSourceName;

contig = columnValues.get(contigColumn);
start = Integer.valueOf( columnValues.get(startColumn) );
end = Integer.valueOf( columnValues.get(endColumn) );
try {
start = Integer.valueOf(columnValues.get(startColumn));
end = Integer.valueOf(columnValues.get(endColumn));
}
catch ( final NumberFormatException ex ) {
throw new UserException.MalformedFile("Could not convert value (" + ex.getMessage() + ") from input file into a number for Data Source: " + dataSourceName);
}

if ( columnNames.size() != columnValues.size() ) {
throw new UserException.BadInput("Number of columns in given header and data do not match: " + columnNames.size() + " != " + columnValues.size());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
package org.broadinstitute.hellbender.utils.nio;

import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.Utils;

import java.io.IOException;
import java.nio.charset.CharacterCodingException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Iterator;
import java.util.Spliterator;
import java.util.function.Consumer;
import java.util.stream.Stream;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.Utils;

/**
* Iterate through the lines of a Path. Works for anything you can point
Expand All @@ -30,10 +32,14 @@ public class PathLineIterator implements AutoCloseable, Iterable<String> {
* @param path path to a text file.
* @throws UserException if we cannot open the file for reading.
*/
public PathLineIterator(Path path) throws UserException {
public PathLineIterator(final Path path) throws UserException {
try {
lines = Files.lines(Utils.nonNull(path, "path shouldn't be null"));
} catch (IOException x) {
}
catch (final CharacterCodingException ex ) {
throw new UserException("Error detected in file character encoding. Possible inconsistent character encodings within the file: " + path.toUri().toString(), ex);
}
catch (final IOException x) {
throw new UserException("Error reading " + path.toUri().toString(), x);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ public class FuncotatorTestConstants {
// Data source variables:

public static final String XSV_CSV_FILE_PATH = FUNCOTATOR_TEST_DIR + "xsv_CSV_testFile.csv";

public static final String XSV_TSV_FILE_PATH = FUNCOTATOR_TEST_DIR + "xsv_TSV_testFile.csv";
public static final String XSV_PIPESV_FILE_PATH = FUNCOTATOR_TEST_DIR + "xsv_PIPESV_testFile.xsv";
public static final String XSV_DEADBEEFSV_FILE_PATH = FUNCOTATOR_TEST_DIR + "xsv_DEADBEEFSV_testFile.csv";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ public class XsvLocatableTableCodecUnitTest extends GATKBaseTest {
private static final String TEST_FILE1 = TEST_RESOURCE_DIR + "xsv_locatable_test.csv";
private static final String TEST_FILE2 = TEST_RESOURCE_DIR + "xsv_locatable_test2.tsv";

private static final String TEST_FILE_MIXED_ENCODING = TEST_RESOURCE_DIR + "xsv_locatable_test_mixed_encodings.csv";

/** Uses column names, instead of index */
private static final String TEST_FILE3 = TEST_RESOURCE_DIR + "xsv_locatable_test3.csv";
private static final String TEST_FILE4 = TEST_RESOURCE_DIR + "xsv_locatable_test4.csv";
Expand Down Expand Up @@ -78,6 +80,14 @@ private Object[][] provideForTestCanDecode() {
};
}

@DataProvider
private Object[][] provideForTestDecodeCharsetFailure() {

return new Object[][]{
{ TEST_FILE_MIXED_ENCODING },
};
}

@DataProvider
private Object[][] provideForTestDecode() {

Expand Down Expand Up @@ -109,6 +119,8 @@ private Object[][] provideForTestDecode() {
};
}



@DataProvider
private Object[][] provideForTestReadActualHeader() {
return new Object[][] {
Expand Down Expand Up @@ -164,16 +176,14 @@ public void testCanDecode(final String filePath, final boolean expected) {
Assert.assertEquals(xsvLocatableTableCodec.canDecode(filePath), expected);
}

// decode
@Test(dataProvider = "provideForTestDecode")
public void testDecode(final String filePath, final List<XsvTableFeature> expected) {
private void testDecodeHelper(final String filePath, final List<XsvTableFeature> expected) {
final XsvLocatableTableCodec xsvLocatableTableCodec = new XsvLocatableTableCodec();
if (xsvLocatableTableCodec.canDecode(filePath)) {
try ( final FileInputStream fileInputStream = new FileInputStream(filePath)) {

// Lots of scaffolding to do reading here:
final AsciiLineReaderIterator lineReaderIterator = new AsciiLineReaderIterator(AsciiLineReader.from(fileInputStream));
final ArrayList<XsvTableFeature> output = new ArrayList<>(expected.size());
final AsciiLineReaderIterator lineReaderIterator = new AsciiLineReaderIterator(AsciiLineReader.from(fileInputStream));
final ArrayList<XsvTableFeature> output = new ArrayList<>(expected.size());

// Read off the header:
xsvLocatableTableCodec.readActualHeader(lineReaderIterator);
Expand All @@ -198,6 +208,43 @@ public void testDecode(final String filePath, final List<XsvTableFeature> expect
}
}

// Attempt to decode a malformed file:
@Test(dataProvider = "provideForTestDecodeCharsetFailure",
expectedExceptions = {UserException.MalformedFile.class})
public void testDecodeCharsetFailure(final String filePath ) {
final XsvLocatableTableCodec xsvLocatableTableCodec = new XsvLocatableTableCodec();
if (xsvLocatableTableCodec.canDecode(filePath)) {
try ( final FileInputStream fileInputStream = new FileInputStream(filePath)) {

// Lots of scaffolding to do reading here:
final AsciiLineReaderIterator lineReaderIterator = new AsciiLineReaderIterator(AsciiLineReader.from(fileInputStream));

// Read off the header:
xsvLocatableTableCodec.readActualHeader(lineReaderIterator);

// Read and decode the lines:
while ( lineReaderIterator.hasNext() ) {
xsvLocatableTableCodec.decode(lineReaderIterator.next());
}
}
catch ( final FileNotFoundException ex ) {
throw new GATKException("Error - could not find test file: " + filePath, ex);
}
catch ( final IOException ex ) {
throw new GATKException("Error - IO problem with file " + filePath, ex);
}
}
else {
throw new GATKException("Error - bad test case.");
}
}

// decode
@Test(dataProvider = "provideForTestDecode")
public void testDecode(final String filePath, final List<XsvTableFeature> expected) {
testDecodeHelper(filePath, expected);
}

// readActualHeader
@Test(dataProvider = "provideForTestReadActualHeader")
public void testReadActualHeader(final String filePath, final List<String> expected) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
contig_column = 1
start_column = 3
end_column = 4
xsv_delimiter = ,
name = XSV_LOCATABLE_TEST_NAME
Binary file not shown.

0 comments on commit ab38fe9

Please sign in to comment.