From f7c46fb38fd0dce13ead6ac3c37670132dfcf8a9 Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Wed, 17 Oct 2018 17:29:59 -0400 Subject: [PATCH] adding more tests and further changes --- .../java/htsjdk/samtools/TextTagCodec.java | 39 +++--- .../htsjdk/samtools/SAMRecordUnitTest.java | 32 +++-- .../htsjdk/samtools/SAMTextReaderTest.java | 20 ++++ .../htsjdk/samtools/SAMTextWriterTest.java | 14 ++- .../htsjdk/samtools/TextTagCodecTest.java | 112 ++++++++++++++++++ 5 files changed, 177 insertions(+), 40 deletions(-) create mode 100644 src/test/java/htsjdk/samtools/TextTagCodecTest.java diff --git a/src/main/java/htsjdk/samtools/TextTagCodec.java b/src/main/java/htsjdk/samtools/TextTagCodec.java index e416aa6dc0..7a9d07c07b 100644 --- a/src/main/java/htsjdk/samtools/TextTagCodec.java +++ b/src/main/java/htsjdk/samtools/TextTagCodec.java @@ -45,8 +45,11 @@ public class TextTagCodec { private static final int NUM_TAG_FIELDS = 3; private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; + private static final TagValueAndUnsignedArrayFlag EMPTY_UNSIGNED_BYTE_ARRAY = new TagValueAndUnsignedArrayFlag(EMPTY_BYTE_ARRAY, true); private static final short[] EMPTY_SHORT_ARRAY = new short[0]; + private static final TagValueAndUnsignedArrayFlag EMPTY_UNSIGNED_SHORT_ARRAY = new TagValueAndUnsignedArrayFlag(EMPTY_SHORT_ARRAY, true); private static final int[] EMPTY_INT_ARRAY = new int[0]; + private static final TagValueAndUnsignedArrayFlag EMPTY_UNSIGNED_INT_ARRAY = new TagValueAndUnsignedArrayFlag(EMPTY_INT_ARRAY, true); private static final float[] EMPTY_FLOAT_ARRAY = new float[0]; /** @@ -57,7 +60,7 @@ public class TextTagCodec { /** * Convert in-memory representation of tag to SAM text representation. * @param tagName Two-character tag name. - * @param value Tag value as approriate Object subclass. + * @param value Tag value as appropriate Object subclass. * @return SAM text String representation, i.e. name:type:value */ public String encode(final String tagName, Object value) { @@ -76,7 +79,7 @@ public String encode(final String tagName, Object value) { // H should never happen anymore. value = StringUtil.bytesToHexString((byte[])value); } else if (tagType == 'B') { - value = getArrayType(value, false) + "," + encodeArrayValue(value); + value = getArrayType(value, false) + encodeArrayValue(value); } else if (tagType == 'i') { final long longVal = ((Number) value).longValue(); // as the spec says: [-2^31, 2^32) @@ -88,7 +91,7 @@ public String encode(final String tagName, Object value) { return sb.toString(); } - private char getArrayType(final Object array, final boolean isUnsigned) { + private static char getArrayType(final Object array, final boolean isUnsigned) { final char type; final Class componentType = array.getClass().getComponentType(); if (componentType == Float.TYPE) { @@ -102,13 +105,10 @@ private char getArrayType(final Object array, final boolean isUnsigned) { return (isUnsigned? Character.toUpperCase(type): type); } - private String encodeArrayValue(final Object value) { + private static String encodeArrayValue(final Object value) { final int length = Array.getLength(value); - if (length == 0){ - return ""; - } - final StringBuilder ret = new StringBuilder(Array.get(value, 0).toString()); - for (int i = 1; i < length; ++i) { + final StringBuilder ret = new StringBuilder(); + for (int i = 0; i < length; ++i) { ret.append(','); ret.append(Array.get(value, i).toString()); } @@ -116,7 +116,7 @@ private String encodeArrayValue(final Object value) { } - private long[] widenToUnsigned(final Object array) { + private static long[] widenToUnsigned(final Object array) { final Class componentType = array.getClass().getComponentType(); final long mask; if (componentType == Byte.TYPE) mask = 0xffL; @@ -135,7 +135,7 @@ String encodeUnsignedArray(final String tagName, final Object array) { throw new IllegalArgumentException("Non-array passed to encodeUnsignedArray: " + array.getClass()); } final long[] widened = widenToUnsigned(array); - return tagName + ":B:" + getArrayType(array, true) + "," + encodeArrayValue(widened); + return tagName + ":B:" + getArrayType(array, true) + encodeArrayValue(widened); } /** @@ -183,7 +183,7 @@ public Object setValue(final Object o) { }; } - private Object convertStringToObject(final String type, final String stringVal) { + private static Object convertStringToObject(final String type, final String stringVal) { if (type.equals("Z")) { return stringVal; } else if (type.equals("A")) { @@ -227,13 +227,11 @@ else if (SAMUtils.isValidUnsignedIntegerAttribute(lValue)) { } } - private Object covertStringArrayToObject(final String stringVal) { + private static Object covertStringArrayToObject(final String stringVal) { final String[] elementTypeAndValue = new String[2]; final int numberOfTokens = StringUtil.splitConcatenateExcessTokens(stringVal, elementTypeAndValue, ','); - if (!(numberOfTokens == 1 || numberOfTokens == 2)) { - throw new SAMFormatException("Tag of type B requires an element type"); - } + if (elementTypeAndValue[0].length() != 1) { throw new SAMFormatException("Unrecognized element type for array tag value: " + elementTypeAndValue[0]); } @@ -334,14 +332,17 @@ private Object covertStringArrayToObject(final String stringVal) { private static Object createEmptyArray(char elementType) { switch ( elementType ) { case 'c': - case 'C': return EMPTY_BYTE_ARRAY; + case 'C': + return EMPTY_UNSIGNED_BYTE_ARRAY; case 's': - case 'S': return EMPTY_SHORT_ARRAY; + case 'S': + return EMPTY_UNSIGNED_SHORT_ARRAY; case 'i': - case 'I': return EMPTY_INT_ARRAY; + case 'I': + return EMPTY_UNSIGNED_INT_ARRAY; case 'f': //note that F is not a valid option since there is no signed/unsigned float distinction return EMPTY_FLOAT_ARRAY; diff --git a/src/test/java/htsjdk/samtools/SAMRecordUnitTest.java b/src/test/java/htsjdk/samtools/SAMRecordUnitTest.java index 5bb236474b..e627e784ec 100644 --- a/src/test/java/htsjdk/samtools/SAMRecordUnitTest.java +++ b/src/test/java/htsjdk/samtools/SAMRecordUnitTest.java @@ -42,6 +42,8 @@ public class SAMRecordUnitTest extends HtsjdkTest { + private static final String ARRAY_TAG = "xa"; + @DataProvider(name = "serializationTestData") public Object[][] getSerializationTestData() { return new Object[][] { @@ -1175,17 +1177,14 @@ public void testHasAttribute(final SAMRecord samRecord, final String tag, final @Test public void test_setAttribute_empty_array() { final SAMFileHeader header = new SAMFileHeader(); - final String arrayTag = "xa"; final SAMRecord record = new SAMRecord(header); - Assert.assertNull(record.getStringAttribute(arrayTag)); - record.setAttribute(arrayTag, new int[0]); - Assert.assertNotNull(record.getSignedIntArrayAttribute(arrayTag)); - Assert.assertEquals(record.getSignedIntArrayAttribute(arrayTag),new int[0]); - record.getSAMString(); - Assert.assertEquals(record.getAttribute(arrayTag), new char[0]); - record.setAttribute(arrayTag, null); - Assert.assertNull(record.getStringAttribute(arrayTag)); - + Assert.assertNull(record.getStringAttribute(ARRAY_TAG)); + record.setAttribute(ARRAY_TAG, new int[0]); + Assert.assertNotNull(record.getSignedIntArrayAttribute(ARRAY_TAG)); + Assert.assertEquals(record.getSignedIntArrayAttribute(ARRAY_TAG), new int[0]); + Assert.assertEquals(record.getAttribute(ARRAY_TAG), new char[0]); + record.setAttribute(ARRAY_TAG, null); + Assert.assertNull(record.getStringAttribute(ARRAY_TAG)); } private static Object[][] getEmptyArrays() { @@ -1197,7 +1196,7 @@ private static Object[][] getEmptyArrays() { }; } - private static Object[][] getFileExtension(){ + private static Object[][] getFileExtensions(){ return new Object[][]{ {BamFileIoUtils.BAM_FILE_EXTENSION}, {IOUtil.SAM_FILE_EXTENSION}, {CramIO.CRAM_FILE_EXTENSION} }; @@ -1205,7 +1204,7 @@ private static Object[][] getFileExtension(){ @DataProvider public Object[][] getEmptyArraysAndExtensions(){ - return TestNGUtils.cartesianProduct(getEmptyArrays(), getFileExtension()); + return TestNGUtils.cartesianProduct(getEmptyArrays(), getFileExtensions()); } @Test(dataProvider = "getEmptyArraysAndExtensions") @@ -1213,12 +1212,11 @@ public void testWriteSamWithEmptyArray(Object emptyArray, Class arrayClass, S Assert.assertEquals(emptyArray.getClass(), arrayClass); Assert.assertEquals(Array.getLength(emptyArray), 0); - final String arrayTag = "xa"; final SAMRecordSetBuilder samRecords = new SAMRecordSetBuilder(); samRecords.addFrag("Read", 0, 100, false); final SAMRecord record = samRecords.getRecords().iterator().next(); - record.setAttribute(arrayTag, emptyArray); - checkArrayIsEmpty(arrayTag, record, arrayClass); + record.setAttribute(ARRAY_TAG, emptyArray); + checkArrayIsEmpty(ARRAY_TAG, record, arrayClass); final Path tmp = Files.createTempFile("tmp", fileExtension); IOUtil.deleteOnExit(tmp); @@ -1237,7 +1235,7 @@ public void testWriteSamWithEmptyArray(Object emptyArray, Class arrayClass, S final SAMRecordIterator iterator = reader.iterator(); Assert.assertTrue(iterator.hasNext()); final SAMRecord recordFromDisk = iterator.next(); - checkArrayIsEmpty(arrayTag, recordFromDisk, arrayClass); + checkArrayIsEmpty(ARRAY_TAG, recordFromDisk, arrayClass); } } @@ -1247,6 +1245,4 @@ private static void checkArrayIsEmpty(String arrayTag, SAMRecord recordFromDisk, Assert.assertEquals(attribute.getClass(), expectedClass); Assert.assertEquals(Array.getLength(attribute), 0); } - - } diff --git a/src/test/java/htsjdk/samtools/SAMTextReaderTest.java b/src/test/java/htsjdk/samtools/SAMTextReaderTest.java index 142eea32c1..36826d0925 100644 --- a/src/test/java/htsjdk/samtools/SAMTextReaderTest.java +++ b/src/test/java/htsjdk/samtools/SAMTextReaderTest.java @@ -27,12 +27,15 @@ import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.CloserUtil; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; public class SAMTextReaderTest extends HtsjdkTest { + private static final String ARRAY_TAG = "xa"; + // Simple input, spot check that parsed correctly, and make sure nothing blows up. @Test public void testBasic() throws Exception { @@ -135,4 +138,21 @@ public void testTagWithColon() { Assert.assertEquals(recFromText.getAttribute(SAMTag.CQ.name()), valueWithColons); CloserUtil.close(reader); } + + @DataProvider + public Object[][] getRecordsWithArrays(){ + final String recordBase = "Read\t4\tchr1\t1\t0\t*\t*\t0\t0\tG\t%\t"; + return new Object[][]{ + {recordBase + ARRAY_TAG + ":B:i", new int[0]}, + {recordBase + ARRAY_TAG + ":B:i,", new int[0]}, + {recordBase + ARRAY_TAG + ":B:i,1,2,3,", new int[]{1,2,3}}, + }; + } + + @Test(dataProvider = "getRecordsWithArrays") + public void testSamRecordCanHandleArrays(String samRecord, Object array){ + final SAMLineParser samLineParser = new SAMLineParser(new SAMFileHeader()); + final SAMRecord record = samLineParser.parseLine(samRecord); + Assert.assertEquals(record.getAttribute(ARRAY_TAG), array); + } } diff --git a/src/test/java/htsjdk/samtools/SAMTextWriterTest.java b/src/test/java/htsjdk/samtools/SAMTextWriterTest.java index 5c9ff28cde..959d5818f1 100644 --- a/src/test/java/htsjdk/samtools/SAMTextWriterTest.java +++ b/src/test/java/htsjdk/samtools/SAMTextWriterTest.java @@ -34,7 +34,7 @@ public class SAMTextWriterTest extends HtsjdkTest { - private SAMRecordSetBuilder getSAMReader(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder) { + private SAMRecordSetBuilder getSamRecordSet(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder) { final SAMRecordSetBuilder ret = new SAMRecordSetBuilder(sortForMe, sortOrder); ret.addPair("readB", 20, 200, 300); ret.addPair("readA", 20, 100, 150); @@ -45,7 +45,7 @@ private SAMRecordSetBuilder getSAMReader(final boolean sortForMe, final SAMFileH @Test public void testNullHeader() throws Exception { - final SAMRecordSetBuilder recordSetBuilder = getSAMReader(true, SAMFileHeader.SortOrder.coordinate); + final SAMRecordSetBuilder recordSetBuilder = getSamRecordSet(true, SAMFileHeader.SortOrder.coordinate); for (final SAMRecord rec : recordSetBuilder.getRecords()) { rec.setHeader(null); } @@ -77,7 +77,7 @@ private void doTest(final SAMRecordSetBuilder recordSetBuilder) throws Exception } private void doTest(final SamFlagField samFlagField) throws Exception { - doTest(getSAMReader(true, SAMFileHeader.SortOrder.coordinate), samFlagField); + doTest(getSamRecordSet(true, SAMFileHeader.SortOrder.coordinate), samFlagField); } private void doTest(final SAMRecordSetBuilder recordSetBuilder, final SamFlagField samFlagField) throws Exception { @@ -128,4 +128,12 @@ private void doTest(final SAMRecordSetBuilder recordSetBuilder, final SamFlagFie Assert.assertFalse(newSAMIt.hasNext()); inputSAM.close(); } + + @Test + public void testEmptyArrayAttributeHasNoCommaWhenWrittenToSAM(){ + final SAMFileHeader header = new SAMFileHeader(); + final SAMRecord record = new SAMRecord(header); + record.setAttribute("xa", new int[0]); + Assert.assertTrue(record.getSAMString().endsWith("xa:B:i")); + } } diff --git a/src/test/java/htsjdk/samtools/TextTagCodecTest.java b/src/test/java/htsjdk/samtools/TextTagCodecTest.java new file mode 100644 index 0000000000..83cda20b16 --- /dev/null +++ b/src/test/java/htsjdk/samtools/TextTagCodecTest.java @@ -0,0 +1,112 @@ +package htsjdk.samtools; + +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Map; + +public class TextTagCodecTest { + + @DataProvider + public Object[][] getArraysToEncode(){ + return new Object[][]{ + {new byte[0], true, "xa:B:c"}, + {new byte[0], false, "xa:B:C"}, + {new byte[]{1, 2, 3}, true, "xa:B:c,1,2,3"}, + {new byte[]{1, 2, 3}, false, "xa:B:C,1,2,3"}, + + {new short[0], true, "xa:B:s"}, + {new short[0], false, "xa:B:S"}, + {new short[]{1, 2, 3}, true, "xa:B:s,1,2,3"}, + {new short[]{1, 2, 3}, false, "xa:B:S,1,2,3"}, + + {new int[0], true, "xa:B:i"}, + {new int[0], false, "xa:B:I"}, + {new int[]{1, 2, 3}, true, "xa:B:i,1,2,3"}, + {new int[]{1, 2, 3}, false, "xa:B:I,1,2,3"}, + + {new float[0], true, "xa:B:f"}, + {new float[]{1.0f, 2.0f, 3.0f}, true, "xa:B:f,1.0,2.0,3.0"}, + + }; + } + + @Test(dataProvider = "getArraysToEncode") + public void testEmptyAndNonEmptyArrayEncoding(Object array, boolean isSigned, String expectedTag){ + final TextTagCodec textTagCodec = new TextTagCodec(); + final String tagName = "xa"; + final String encodedTag = isSigned + ? textTagCodec.encode(tagName, array) + : textTagCodec.encodeUnsignedArray(tagName, array); + Assert.assertEquals(encodedTag, expectedTag); + + } + + @DataProvider + public Object[][] getArraysToDecode(){ + return new Object[][]{ + {"xa:B:c", new byte[0]}, + {"xa:B:C", new TagValueAndUnsignedArrayFlag(new byte[0], true)}, + {"xa:B:c,1,2,3", new byte[]{1, 2, 3}}, + {"xa:B:C,1,2,3", new TagValueAndUnsignedArrayFlag(new byte[]{1, 2, 3}, true)}, + {"xa:B:c,", new byte[0]}, + {"xa:B:C,", new TagValueAndUnsignedArrayFlag(new byte[0], true)}, + + {"xa:B:s", new short[0]}, + {"xa:B:S", new TagValueAndUnsignedArrayFlag(new short[0], true)}, + {"xa:B:s,1,2,3", new short[]{1, 2, 3}}, + {"xa:B:S,1,2,3", new TagValueAndUnsignedArrayFlag(new short[]{1, 2, 3}, true)}, + {"xa:B:s,", new short[0]}, + {"xa:B:S,", new TagValueAndUnsignedArrayFlag(new short[0], true)}, + + {"xa:B:i", new int[0]}, + {"xa:B:I", new TagValueAndUnsignedArrayFlag(new int[0], true)}, + {"xa:B:i,1,2,3", new int[]{1, 2, 3}}, + {"xa:B:I,1,2,3", new TagValueAndUnsignedArrayFlag(new int[]{1, 2, 3}, true)}, + {"xa:B:i,", new int[0]}, + {"xa:B:I,", new TagValueAndUnsignedArrayFlag(new int[0], true)}, + + {"xa:B:f", new float[0]}, + {"xa:B:f,", new float[0]}, + {"xa:B:f,1.0,2.0,3.0", new float[]{1.0f, 2.0f, 3.0f}}, + }; + } + + @Test(dataProvider = "getArraysToDecode") + public void testEmptyAndNonEmptyArrayDecoding(String tag, Object expectedValue) { + final TextTagCodec textTagCodec = new TextTagCodec(); + final Map.Entry decoded = textTagCodec.decode(tag); + + Assert.assertEquals(decoded.getKey(), "xa"); + + final Object value = decoded.getValue(); + if( value instanceof TagValueAndUnsignedArrayFlag){ + Assert.assertTrue(expectedValue instanceof TagValueAndUnsignedArrayFlag); + final TagValueAndUnsignedArrayFlag typedValue = (TagValueAndUnsignedArrayFlag) value; + final TagValueAndUnsignedArrayFlag typedExpected = (TagValueAndUnsignedArrayFlag) expectedValue; + Assert.assertEquals(typedValue.value, typedExpected.value); + Assert.assertEquals(typedValue.isUnsignedArray, typedExpected.isUnsignedArray); + } else { + Assert.assertEquals(value, expectedValue); + } + } + + @DataProvider + public Object[][] getBadArrayTags(){ + return new Object[][]{ + {"xa:B"}, // no colon + {"xa:B:F"}, // there is no such thing as unsigned floating point + {"xa:B:F,1.0,2.0"}, // same as above but empty arrays have a special path + {"xa:B:,1,2,3"}, // missing type + {"xa:B:c,700"}, //out of bounds + {"xa:B:C,-10"}, // negative in unsigned array + }; + } + + @Test(dataProvider = "getBadArrayTags", expectedExceptions = SAMFormatException.class) + public void testBadArrayTags(String badTag){ + final TextTagCodec textTagCodec = new TextTagCodec(); + textTagCodec.decode(badTag); + } +} \ No newline at end of file