Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added new catch block for character encoding error cases. #5124

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ public SimpleKeyXsvFuncotationFactory(final String name,
final Iterator<String> it = pathLineIterator.iterator();

// Get our column names:
annotationColumnNames = createColumnNames( it, numHeaderLinesToIgnore );
annotationColumnNames = createColumnNames(it, numHeaderLinesToIgnore);

// Populate our empty annotation list:
emptyAnnotationList = new ArrayList<>(annotationColumnNames.size());
Expand All @@ -139,7 +139,7 @@ public SimpleKeyXsvFuncotationFactory(final String name,
}

// Populate our annotation map:
populateAnnotationMap( it, permissiveColumns );
populateAnnotationMap(it, permissiveColumns);
}

// Initialize overrides / defaults:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,13 @@ public XsvTableFeature(final int contigColumn, final int startColumn, final int
this.dataSourceName = dataSourceName;

contig = columnValues.get(contigColumn);
start = Integer.valueOf( columnValues.get(startColumn) );
end = Integer.valueOf( columnValues.get(endColumn) );
try {
start = Integer.valueOf(columnValues.get(startColumn));
end = Integer.valueOf(columnValues.get(endColumn));
}
catch ( final NumberFormatException ex ) {
throw new UserException.MalformedFile("Could not convert value (" + ex.getMessage() + ") from input file into a number for Data Source: " + dataSourceName);
}

if ( columnNames.size() != columnValues.size() ) {
throw new UserException.BadInput("Number of columns in given header and data do not match: " + columnNames.size() + " != " + columnValues.size());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
package org.broadinstitute.hellbender.utils.nio;

import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.Utils;

import java.io.IOException;
import java.nio.charset.CharacterCodingException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Iterator;
import java.util.Spliterator;
import java.util.function.Consumer;
import java.util.stream.Stream;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.Utils;

/**
* Iterate through the lines of a Path. Works for anything you can point
Expand All @@ -30,10 +32,14 @@ public class PathLineIterator implements AutoCloseable, Iterable<String> {
* @param path path to a text file.
* @throws UserException if we cannot open the file for reading.
*/
public PathLineIterator(Path path) throws UserException {
public PathLineIterator(final Path path) throws UserException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need throws UserException, since that is a runtime exception?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does not. I'll pull it out.

try {
lines = Files.lines(Utils.nonNull(path, "path shouldn't be null"));
} catch (IOException x) {
}
catch (final CharacterCodingException ex ) {
throw new UserException("Error detected in file character encoding. Possible inconsistent character encodings within the file: " + path.toUri().toString(), ex);
}
catch (final IOException x) {
throw new UserException("Error reading " + path.toUri().toString(), x);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ public class FuncotatorTestConstants {
// Data source variables:

public static final String XSV_CSV_FILE_PATH = FUNCOTATOR_TEST_DIR + "xsv_CSV_testFile.csv";

public static final String XSV_TSV_FILE_PATH = FUNCOTATOR_TEST_DIR + "xsv_TSV_testFile.csv";
public static final String XSV_PIPESV_FILE_PATH = FUNCOTATOR_TEST_DIR + "xsv_PIPESV_testFile.xsv";
public static final String XSV_DEADBEEFSV_FILE_PATH = FUNCOTATOR_TEST_DIR + "xsv_DEADBEEFSV_testFile.csv";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ public class XsvLocatableTableCodecUnitTest extends GATKBaseTest {
private static final String TEST_FILE1 = TEST_RESOURCE_DIR + "xsv_locatable_test.csv";
private static final String TEST_FILE2 = TEST_RESOURCE_DIR + "xsv_locatable_test2.tsv";

private static final String TEST_FILE_MIXED_ENCODING = TEST_RESOURCE_DIR + "xsv_locatable_test_mixed_encodings.csv";

/** Uses column names, instead of index */
private static final String TEST_FILE3 = TEST_RESOURCE_DIR + "xsv_locatable_test3.csv";
private static final String TEST_FILE4 = TEST_RESOURCE_DIR + "xsv_locatable_test4.csv";
Expand Down Expand Up @@ -78,6 +80,14 @@ private Object[][] provideForTestCanDecode() {
};
}

@DataProvider
private Object[][] provideForTestDecodeCharsetFailure() {

return new Object[][]{
{ TEST_FILE_MIXED_ENCODING },
};
}

@DataProvider
private Object[][] provideForTestDecode() {

Expand Down Expand Up @@ -109,6 +119,8 @@ private Object[][] provideForTestDecode() {
};
}



@DataProvider
private Object[][] provideForTestReadActualHeader() {
return new Object[][] {
Expand Down Expand Up @@ -164,16 +176,14 @@ public void testCanDecode(final String filePath, final boolean expected) {
Assert.assertEquals(xsvLocatableTableCodec.canDecode(filePath), expected);
}

// decode
@Test(dataProvider = "provideForTestDecode")
public void testDecode(final String filePath, final List<XsvTableFeature> expected) {
private void testDecodeHelper(final String filePath, final List<XsvTableFeature> expected) {
final XsvLocatableTableCodec xsvLocatableTableCodec = new XsvLocatableTableCodec();
if (xsvLocatableTableCodec.canDecode(filePath)) {
try ( final FileInputStream fileInputStream = new FileInputStream(filePath)) {

// Lots of scaffolding to do reading here:
final AsciiLineReaderIterator lineReaderIterator = new AsciiLineReaderIterator(AsciiLineReader.from(fileInputStream));
final ArrayList<XsvTableFeature> output = new ArrayList<>(expected.size());
final AsciiLineReaderIterator lineReaderIterator = new AsciiLineReaderIterator(AsciiLineReader.from(fileInputStream));
final ArrayList<XsvTableFeature> output = new ArrayList<>(expected.size());

// Read off the header:
xsvLocatableTableCodec.readActualHeader(lineReaderIterator);
Expand All @@ -198,6 +208,43 @@ public void testDecode(final String filePath, final List<XsvTableFeature> expect
}
}

// Attempt to decode a malformed file:
@Test(dataProvider = "provideForTestDecodeCharsetFailure",
expectedExceptions = {UserException.MalformedFile.class})
public void testDecodeCharsetFailure(final String filePath ) {
final XsvLocatableTableCodec xsvLocatableTableCodec = new XsvLocatableTableCodec();
if (xsvLocatableTableCodec.canDecode(filePath)) {
try ( final FileInputStream fileInputStream = new FileInputStream(filePath)) {

// Lots of scaffolding to do reading here:
final AsciiLineReaderIterator lineReaderIterator = new AsciiLineReaderIterator(AsciiLineReader.from(fileInputStream));

// Read off the header:
xsvLocatableTableCodec.readActualHeader(lineReaderIterator);

// Read and decode the lines:
while ( lineReaderIterator.hasNext() ) {
xsvLocatableTableCodec.decode(lineReaderIterator.next());
}
}
catch ( final FileNotFoundException ex ) {
throw new GATKException("Error - could not find test file: " + filePath, ex);
}
catch ( final IOException ex ) {
throw new GATKException("Error - IO problem with file " + filePath, ex);
}
}
else {
throw new GATKException("Error - bad test case.");
}
}

// decode
@Test(dataProvider = "provideForTestDecode")
public void testDecode(final String filePath, final List<XsvTableFeature> expected) {
testDecodeHelper(filePath, expected);
}

// readActualHeader
@Test(dataProvider = "provideForTestReadActualHeader")
public void testReadActualHeader(final String filePath, final List<String> expected) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
contig_column = 1
start_column = 3
end_column = 4
xsv_delimiter = ,
name = XSV_LOCATABLE_TEST_NAME
Binary file not shown.