diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/SimpleKeyXsvFuncotationFactory.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/SimpleKeyXsvFuncotationFactory.java index 2ae5e807d4f..a7cf29792ae 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/SimpleKeyXsvFuncotationFactory.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/xsv/SimpleKeyXsvFuncotationFactory.java @@ -130,7 +130,7 @@ public SimpleKeyXsvFuncotationFactory(final String name, final Iterator it = pathLineIterator.iterator(); // Get our column names: - annotationColumnNames = createColumnNames( it, numHeaderLinesToIgnore ); + annotationColumnNames = createColumnNames(it, numHeaderLinesToIgnore); // Populate our empty annotation list: emptyAnnotationList = new ArrayList<>(annotationColumnNames.size()); @@ -139,7 +139,7 @@ public SimpleKeyXsvFuncotationFactory(final String name, } // Populate our annotation map: - populateAnnotationMap( it, permissiveColumns ); + populateAnnotationMap(it, permissiveColumns); } // Initialize overrides / defaults: diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvTableFeature.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvTableFeature.java index fe6a4e4ede8..0e6eec2b270 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvTableFeature.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvTableFeature.java @@ -66,8 +66,13 @@ public XsvTableFeature(final int contigColumn, final int startColumn, final int this.dataSourceName = dataSourceName; contig = columnValues.get(contigColumn); - start = Integer.valueOf( columnValues.get(startColumn) ); - end = Integer.valueOf( columnValues.get(endColumn) ); + try { + start = Integer.valueOf(columnValues.get(startColumn)); + end = Integer.valueOf(columnValues.get(endColumn)); + } + catch ( final NumberFormatException ex ) { + throw new UserException.MalformedFile("Could not convert value (" + ex.getMessage() + ") from input file into a number for Data Source: " + dataSourceName); + } if ( columnNames.size() != columnValues.size() ) { throw new UserException.BadInput("Number of columns in given header and data do not match: " + columnNames.size() + " != " + columnValues.size()); diff --git a/src/main/java/org/broadinstitute/hellbender/utils/nio/PathLineIterator.java b/src/main/java/org/broadinstitute/hellbender/utils/nio/PathLineIterator.java index cd1b1d78194..7d2045639a6 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/nio/PathLineIterator.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/nio/PathLineIterator.java @@ -1,14 +1,16 @@ package org.broadinstitute.hellbender.utils.nio; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.utils.Utils; + import java.io.IOException; +import java.nio.charset.CharacterCodingException; import java.nio.file.Files; import java.nio.file.Path; import java.util.Iterator; import java.util.Spliterator; import java.util.function.Consumer; import java.util.stream.Stream; -import org.broadinstitute.hellbender.exceptions.UserException; -import org.broadinstitute.hellbender.utils.Utils; /** * Iterate through the lines of a Path. Works for anything you can point @@ -28,12 +30,15 @@ public class PathLineIterator implements AutoCloseable, Iterable { * to close it automatically. * * @param path path to a text file. - * @throws UserException if we cannot open the file for reading. */ - public PathLineIterator(Path path) throws UserException { + public PathLineIterator(final Path path) { try { lines = Files.lines(Utils.nonNull(path, "path shouldn't be null")); - } catch (IOException x) { + } + catch (final CharacterCodingException ex ) { + throw new UserException("Error detected in file character encoding. Possible inconsistent character encodings within the file: " + path.toUri().toString(), ex); + } + catch (final IOException x) { throw new UserException("Error reading " + path.toUri().toString(), x); } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorTestConstants.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorTestConstants.java index 99ba4a10cd4..bde7dcaac7e 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorTestConstants.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorTestConstants.java @@ -46,6 +46,7 @@ public class FuncotatorTestConstants { // Data source variables: public static final String XSV_CSV_FILE_PATH = FUNCOTATOR_TEST_DIR + "xsv_CSV_testFile.csv"; + public static final String XSV_TSV_FILE_PATH = FUNCOTATOR_TEST_DIR + "xsv_TSV_testFile.csv"; public static final String XSV_PIPESV_FILE_PATH = FUNCOTATOR_TEST_DIR + "xsv_PIPESV_testFile.xsv"; public static final String XSV_DEADBEEFSV_FILE_PATH = FUNCOTATOR_TEST_DIR + "xsv_DEADBEEFSV_testFile.csv"; diff --git a/src/test/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodecUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodecUnitTest.java index 1cdbdc94237..6a32fc422f6 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodecUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/XsvLocatableTableCodecUnitTest.java @@ -32,6 +32,8 @@ public class XsvLocatableTableCodecUnitTest extends GATKBaseTest { private static final String TEST_FILE1 = TEST_RESOURCE_DIR + "xsv_locatable_test.csv"; private static final String TEST_FILE2 = TEST_RESOURCE_DIR + "xsv_locatable_test2.tsv"; + private static final String TEST_FILE_MIXED_ENCODING = TEST_RESOURCE_DIR + "xsv_locatable_test_mixed_encodings.csv"; + /** Uses column names, instead of index */ private static final String TEST_FILE3 = TEST_RESOURCE_DIR + "xsv_locatable_test3.csv"; private static final String TEST_FILE4 = TEST_RESOURCE_DIR + "xsv_locatable_test4.csv"; @@ -78,6 +80,14 @@ private Object[][] provideForTestCanDecode() { }; } + @DataProvider + private Object[][] provideForTestDecodeCharsetFailure() { + + return new Object[][]{ + { TEST_FILE_MIXED_ENCODING }, + }; + } + @DataProvider private Object[][] provideForTestDecode() { @@ -109,6 +119,8 @@ private Object[][] provideForTestDecode() { }; } + + @DataProvider private Object[][] provideForTestReadActualHeader() { return new Object[][] { @@ -164,16 +176,14 @@ public void testCanDecode(final String filePath, final boolean expected) { Assert.assertEquals(xsvLocatableTableCodec.canDecode(filePath), expected); } - // decode - @Test(dataProvider = "provideForTestDecode") - public void testDecode(final String filePath, final List expected) { + private void testDecodeHelper(final String filePath, final List expected) { final XsvLocatableTableCodec xsvLocatableTableCodec = new XsvLocatableTableCodec(); if (xsvLocatableTableCodec.canDecode(filePath)) { try ( final FileInputStream fileInputStream = new FileInputStream(filePath)) { // Lots of scaffolding to do reading here: - final AsciiLineReaderIterator lineReaderIterator = new AsciiLineReaderIterator(AsciiLineReader.from(fileInputStream)); - final ArrayList output = new ArrayList<>(expected.size()); + final AsciiLineReaderIterator lineReaderIterator = new AsciiLineReaderIterator(AsciiLineReader.from(fileInputStream)); + final ArrayList output = new ArrayList<>(expected.size()); // Read off the header: xsvLocatableTableCodec.readActualHeader(lineReaderIterator); @@ -198,6 +208,43 @@ public void testDecode(final String filePath, final List expect } } + // Attempt to decode a malformed file: + @Test(dataProvider = "provideForTestDecodeCharsetFailure", + expectedExceptions = {UserException.MalformedFile.class}) + public void testDecodeCharsetFailure(final String filePath ) { + final XsvLocatableTableCodec xsvLocatableTableCodec = new XsvLocatableTableCodec(); + if (xsvLocatableTableCodec.canDecode(filePath)) { + try ( final FileInputStream fileInputStream = new FileInputStream(filePath)) { + + // Lots of scaffolding to do reading here: + final AsciiLineReaderIterator lineReaderIterator = new AsciiLineReaderIterator(AsciiLineReader.from(fileInputStream)); + + // Read off the header: + xsvLocatableTableCodec.readActualHeader(lineReaderIterator); + + // Read and decode the lines: + while ( lineReaderIterator.hasNext() ) { + xsvLocatableTableCodec.decode(lineReaderIterator.next()); + } + } + catch ( final FileNotFoundException ex ) { + throw new GATKException("Error - could not find test file: " + filePath, ex); + } + catch ( final IOException ex ) { + throw new GATKException("Error - IO problem with file " + filePath, ex); + } + } + else { + throw new GATKException("Error - bad test case."); + } + } + + // decode + @Test(dataProvider = "provideForTestDecode") + public void testDecode(final String filePath, final List expected) { + testDecodeHelper(filePath, expected); + } + // readActualHeader @Test(dataProvider = "provideForTestReadActualHeader") public void testReadActualHeader(final String filePath, final List expected) { diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_mixed_encodings.config b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_mixed_encodings.config new file mode 100644 index 00000000000..c347ab6a75f --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_mixed_encodings.config @@ -0,0 +1,5 @@ +contig_column = 1 +start_column = 3 +end_column = 4 +xsv_delimiter = , +name = XSV_LOCATABLE_TEST_NAME \ No newline at end of file diff --git a/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_mixed_encodings.csv b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_mixed_encodings.csv new file mode 100644 index 00000000000..1fe37b2cd8a Binary files /dev/null and b/src/test/resources/org/broadinstitute/hellbender/utils/codecs/xsvLocatableTable/xsv_locatable_test_mixed_encodings.csv differ