From 27a451ba75d21d39255e093d9a2643484b2fd919 Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Thu, 19 Sep 2019 10:26:11 -0500 Subject: [PATCH 01/13] Bump bdg-formats dependency version to 0.14.0. --- .../convert/ga4gh/AlignmentRecordToReadAlignment.java | 6 +++--- .../convert/ga4gh/AlignmentRecordToReadAlignmentTest.java | 4 ++-- pom.xml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/convert-ga4gh/src/main/java/org/bdgenomics/convert/ga4gh/AlignmentRecordToReadAlignment.java b/convert-ga4gh/src/main/java/org/bdgenomics/convert/ga4gh/AlignmentRecordToReadAlignment.java index 5887338..5881f79 100644 --- a/convert-ga4gh/src/main/java/org/bdgenomics/convert/ga4gh/AlignmentRecordToReadAlignment.java +++ b/convert-ga4gh/src/main/java/org/bdgenomics/convert/ga4gh/AlignmentRecordToReadAlignment.java @@ -98,9 +98,9 @@ public ReadAlignment convert(final AlignmentRecord alignmentRecord, builder.setNextMatePosition(matePosition); } - if (isNotEmpty(alignmentRecord.getQuality())) { - List alignedQuality = new ArrayList(alignmentRecord.getQuality().length()); - for (char c : alignmentRecord.getQuality().toCharArray()) { + if (isNotEmpty(alignmentRecord.getQualityScores())) { + List alignedQuality = new ArrayList(alignmentRecord.getQualityScores().length()); + for (char c : alignmentRecord.getQualityScores().toCharArray()) { alignedQuality.add(((int) c) - 33); } builder.addAllAlignedQuality(alignedQuality); diff --git a/convert-ga4gh/src/test/java/org/bdgenomics/convert/ga4gh/AlignmentRecordToReadAlignmentTest.java b/convert-ga4gh/src/test/java/org/bdgenomics/convert/ga4gh/AlignmentRecordToReadAlignmentTest.java index 7c6d1c3..f9ad33c 100644 --- a/convert-ga4gh/src/test/java/org/bdgenomics/convert/ga4gh/AlignmentRecordToReadAlignmentTest.java +++ b/convert-ga4gh/src/test/java/org/bdgenomics/convert/ga4gh/AlignmentRecordToReadAlignmentTest.java @@ -72,7 +72,7 @@ public void setUp() { .setReadMapped(true) .setCigar("10M") .setSequence("AAAAAAAAAA") - .setQuality("**********") + .setQualityScores("**********") .setReadNegativeStrand(false) .setMappingQuality(60) .setMismatchingPositions("10") @@ -147,7 +147,7 @@ public void testConvertNoMapq() { .setReadMapped(true) .setCigar("10M") .setSequence("AAAAAAAAAA") - .setQuality("**********") + .setQualityScores("**********") .setReadNegativeStrand(false) .setMismatchingPositions("10") .setOriginalStart(12L) diff --git a/pom.xml b/pom.xml index f310e68..aac648f 100644 --- a/pom.xml +++ b/pom.xml @@ -18,7 +18,7 @@ 1.8 - 0.13.0 + 0.14.0 3.8.1 0.6.0a10 27.0-jre From bcb88f2dacfbcd36a413bef060799b3f140046dd Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Thu, 19 Sep 2019 10:30:20 -0500 Subject: [PATCH 02/13] Use openjdk8 for Travis CI. --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 9bcf999..b179f30 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,3 @@ language: java jdk: - - oraclejdk8 + - openjdk8 From e46b1225b933347277e62235c0620f17cb6cb83b Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Thu, 19 Sep 2019 10:42:53 -0500 Subject: [PATCH 03/13] Modifying changelog. --- CHANGES.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 624750f..f232e44 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,16 @@ # convert Changelog # +### Version 0.8.0 ### + +**Closed issues:** + + - Bump bdg-formats dependency version to 0.14.0 [\#74](https://github.com/bigdatagenomics/convert/issues/74) + +**Merged and closed pull requests:** + + - [CONVERT-74] Bump bdg-formats dependency version to 0.14.0 [\#75](https://github.com/bigdatagenomics/convert/pull/75) ([heuermh](https://github.com/heuermh)) + + ### Version 0.6.0 ### **Closed issues:** From 3cd8b59af232eb006b307b1fba4a2a6f1fb8c090 Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Thu, 19 Sep 2019 10:44:20 -0500 Subject: [PATCH 04/13] [maven-release-plugin] prepare release convert-parent-0.8.0 --- convert-ga4gh/pom.xml | 2 +- convert-htsjdk/pom.xml | 2 +- convert/pom.xml | 2 +- pom.xml | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/convert-ga4gh/pom.xml b/convert-ga4gh/pom.xml index acc97d6..8111882 100644 --- a/convert-ga4gh/pom.xml +++ b/convert-ga4gh/pom.xml @@ -4,7 +4,7 @@ org.bdgenomics.convert convert-parent - 0.8.0-SNAPSHOT + 0.8.0 jar diff --git a/convert-htsjdk/pom.xml b/convert-htsjdk/pom.xml index f125a5c..886600a 100644 --- a/convert-htsjdk/pom.xml +++ b/convert-htsjdk/pom.xml @@ -4,7 +4,7 @@ org.bdgenomics.convert convert-parent - 0.8.0-SNAPSHOT + 0.8.0 jar diff --git a/convert/pom.xml b/convert/pom.xml index a4ba389..d4b20fa 100644 --- a/convert/pom.xml +++ b/convert/pom.xml @@ -4,7 +4,7 @@ org.bdgenomics.convert convert-parent - 0.8.0-SNAPSHOT + 0.8.0 jar diff --git a/pom.xml b/pom.xml index aac648f..ad1fcf7 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ 4.0.0 org.bdgenomics.convert convert-parent - 0.8.0-SNAPSHOT + 0.8.0 pom Big Data Genomics: Convert Parent Convert Parent POM @@ -41,7 +41,7 @@ scm:git:git@github.com:bigdatagenomics/convert.git scm:git:git@github.com:bigdatagenomics/convert.git scm:git:git@github.com:bigdatagenomics/convert.git - master + convert-parent-0.8.0 From 57ea80e72b9dbb34c5d50c84fffb9de95653156e Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Thu, 19 Sep 2019 10:44:33 -0500 Subject: [PATCH 05/13] [maven-release-plugin] prepare for next development iteration --- convert-ga4gh/pom.xml | 2 +- convert-htsjdk/pom.xml | 2 +- convert/pom.xml | 2 +- pom.xml | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/convert-ga4gh/pom.xml b/convert-ga4gh/pom.xml index 8111882..ec4d333 100644 --- a/convert-ga4gh/pom.xml +++ b/convert-ga4gh/pom.xml @@ -4,7 +4,7 @@ org.bdgenomics.convert convert-parent - 0.8.0 + 0.9.0-SNAPSHOT jar diff --git a/convert-htsjdk/pom.xml b/convert-htsjdk/pom.xml index 886600a..977d0f9 100644 --- a/convert-htsjdk/pom.xml +++ b/convert-htsjdk/pom.xml @@ -4,7 +4,7 @@ org.bdgenomics.convert convert-parent - 0.8.0 + 0.9.0-SNAPSHOT jar diff --git a/convert/pom.xml b/convert/pom.xml index d4b20fa..3510266 100644 --- a/convert/pom.xml +++ b/convert/pom.xml @@ -4,7 +4,7 @@ org.bdgenomics.convert convert-parent - 0.8.0 + 0.9.0-SNAPSHOT jar diff --git a/pom.xml b/pom.xml index ad1fcf7..43ff212 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ 4.0.0 org.bdgenomics.convert convert-parent - 0.8.0 + 0.9.0-SNAPSHOT pom Big Data Genomics: Convert Parent Convert Parent POM @@ -41,7 +41,7 @@ scm:git:git@github.com:bigdatagenomics/convert.git scm:git:git@github.com:bigdatagenomics/convert.git scm:git:git@github.com:bigdatagenomics/convert.git - convert-parent-0.8.0 + master From 28e081b3a3776dc66d74c98677f4254ddcb858b5 Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Mon, 15 Apr 2019 16:48:47 -0500 Subject: [PATCH 06/13] adding sam record and variant context converters, work in progress --- README.md | 30 +- convert-htsjdk/pom.xml | 5 + .../htsjdk/AlignmentRecordToSamRecord.java | 299 ++++++++++++++++++ .../AlignmentRecordToSamRecordFactory.java | 55 ++++ .../htsjdk/GenotypesToVariantContext.java | 77 +++++ .../GenotypesToVariantContextFactory.java | 42 +++ .../convert/htsjdk/HtsjdkModule.java | 37 ++- .../htsjdk/SamRecordToAlignmentRecord.java | 290 +++++++++++++++++ .../htsjdk/VariantContextToGenotypes.java | 73 +++++ .../VariantContextToGenotypesFactory.java | 42 +++ .../htsjdk/VariantContextToVariants.java | 73 +++++ .../VariantContextToVariantsFactory.java | 42 +++ .../htsjdk/VariantsToVariantContext.java | 77 +++++ .../VariantsToVariantContextFactory.java | 42 +++ .../AlignmentRecordToSamRecordTest.java | 120 +++++++ .../htsjdk/GenotypesToVariantContextTest.java | 104 ++++++ .../convert/htsjdk/HtsjdkModuleTest.java | 71 ++++- .../SamRecordToAlignmentRecordTest.java | 154 +++++++++ .../htsjdk/VariantContextToGenotypesTest.java | 87 +++++ .../htsjdk/VariantContextToVariantsTest.java | 87 +++++ .../htsjdk/VariantsToVariantContextTest.java | 104 ++++++ .../bdgenomics/convert/AbstractConverter.java | 13 +- pom.xml | 7 +- 23 files changed, 1924 insertions(+), 7 deletions(-) create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/AlignmentRecordToSamRecord.java create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/AlignmentRecordToSamRecordFactory.java create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/GenotypesToVariantContext.java create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/GenotypesToVariantContextFactory.java create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamRecordToAlignmentRecord.java create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantContextToGenotypes.java create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantContextToGenotypesFactory.java create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantContextToVariants.java create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantContextToVariantsFactory.java create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContext.java create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContextFactory.java create mode 100644 convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/AlignmentRecordToSamRecordTest.java create mode 100644 convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/GenotypesToVariantContextTest.java create mode 100644 convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamRecordToAlignmentRecordTest.java create mode 100644 convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VariantContextToGenotypesTest.java create mode 100644 convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VariantContextToVariantsTest.java create mode 100644 convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContextTest.java diff --git a/README.md b/README.md index a6ac17f..55e7b0a 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,11 @@ To build ### About convert -The [`Converter`](https://github.com/bigdatagenomics/convert/blob/master/convert/src/main/java/org/bdgenomics/convert/Converter.java) interface, inspired by Apache [Commons Convert](https://commons.apache.org/sandbox/commons-convert/) (sandbox component, never released), provides for converting from a source type `S` to a target type `T`, with a [conversion stringency](https://github.com/bigdatagenomics/convert/blob/master/convert/src/main/java/org/bdgenomics/convert/ConversionStringency.java) and [SLF4J logger](http://www.slf4j.org/) given as context. +The [`Converter`](https://github.com/bigdatagenomics/convert/blob/master/convert/src/main/java/org/bdgenomics/convert/Converter.java) +interface, inspired by Apache [Commons Convert](https://commons.apache.org/sandbox/commons-convert/) +(sandbox component, never released), provides for converting from a source type `S` to a target type `T`, with a +[conversion stringency](https://github.com/bigdatagenomics/convert/blob/master/convert/src/main/java/org/bdgenomics/convert/ConversionStringency.java) +and [SLF4J logger](http://www.slf4j.org/) given as context. ```java public interface Converter { @@ -48,4 +52,26 @@ final class MyClass { } ``` -The Guice injector handles construction of the converter instances, managing nested converter dependencies as necessary (if say, a `SAMRecord` to `AlignmentRecord` converter depends on a `String` to `Strand` converter). +The Guice injector handles construction of the converter instances, managing nested converter dependencies +as necessary (if say, a `Gff3Record` to `Feature` converter depends on a `String` to `Strand` converter). + +Some converters may require late bindings; for those a converter factory is available via injection: +```java +final class MyClass { + private final AlignmentRecordToSamRecordFactory alignmentRecordToSamRecordFactory; + private final ConversionStringency stringency = ConversionStringency.STRICT; + private static final logger = LoggerFactory.getLogger(MyClass.class); + + @Inject + MyClass(final AlignmentRecordToSamRecordFactory alignmentRecordToSamRecordFactory) { + this.alignmentRecordToSamRecordFactory = alignmentRecordToSamRecordFactory; + } + + void doIt() { + AlignmentRecord alignmentRecord = ...; + SAMFileHeader header = ...; + Converter converter = alignmentRecordToSamRecordFactory.create(header); + SAMRecord record = converter.convert(alignmentRecord, stringency, logger); + } +} +``` diff --git a/convert-htsjdk/pom.xml b/convert-htsjdk/pom.xml index 977d0f9..506a43d 100644 --- a/convert-htsjdk/pom.xml +++ b/convert-htsjdk/pom.xml @@ -32,6 +32,11 @@ junit test + + com.google.inject.extensions + guice-assistedinject + compile + org.bdgenomics.convert convert diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/AlignmentRecordToSamRecord.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/AlignmentRecordToSamRecord.java new file mode 100644 index 0000000..470ed81 --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/AlignmentRecordToSamRecord.java @@ -0,0 +1,299 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import com.google.inject.Inject; + +import com.google.inject.assistedinject.Assisted; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMReadGroupRecord; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMUtils; + +import org.bdgenomics.convert.AbstractConverter; +import org.bdgenomics.convert.ConversionException; +import org.bdgenomics.convert.ConversionStringency; + +import org.bdgenomics.formats.avro.AlignmentRecord; + +import org.slf4j.Logger; + +/** + * Convert AlignmentRecord to htsjdk SAMRecord. + */ +public final class AlignmentRecordToSamRecord extends AbstractConverter { + + /** Regex to capture attributes. */ + private static final Pattern ATTRIBUTE = Pattern.compile("([^:]{2,4}):([AifZHB]):(.*)"); + + /** Regex to capture array attribute types. */ + private static final Pattern ARRAY_ATTRIBUTE = Pattern.compile("([cCiIsSf]{1},)(.*)"); + + /** Header. */ + private final SAMFileHeader header; + + + /** + * Create a new AlignmentRecord to htsjdk SAMRecord converter with the specified header. + * + * @param header header, must not be null + */ + @Inject + public AlignmentRecordToSamRecord(@Assisted final SAMFileHeader header) { + super(AlignmentRecord.class, SAMRecord.class); + + checkNotNull(header); + this.header = header; + } + + + @Override + public SAMRecord convert(final AlignmentRecord alignmentRecord, + final ConversionStringency stringency, + final Logger logger) throws ConversionException { + + if (alignmentRecord == null) { + warnOrThrow(alignmentRecord, "must not be null", null, stringency, logger); + return null; + } + + SAMRecord builder = new SAMRecord(header); + builder.setReadName(alignmentRecord.getReadName()); + builder.setReadString(alignmentRecord.getSequence()); + + if (alignmentRecord.getQuality() == null) { + builder.setBaseQualityString("*"); + } + else { + builder.setBaseQualityString(alignmentRecord.getQuality()); + } + + String readGroupId = alignmentRecord.getReadGroupId(); + if (readGroupId != null) { + builder.setAttribute("RG", readGroupId); + + SAMReadGroupRecord readGroup = header.getReadGroup(readGroupId); + if (readGroup != null && readGroup.getLibrary() != null) { + builder.setAttribute("LB", readGroup.getLibrary()); + } + if (readGroup != null && readGroup.getPlatformUnit() != null) { + builder.setAttribute("PU", readGroup.getPlatformUnit()); + } + } + + if (alignmentRecord.getMateReferenceName() != null) { + builder.setMateReferenceName(alignmentRecord.getMateReferenceName()); + } + if (alignmentRecord.getMateAlignmentStart() != null) { + builder.setMateAlignmentStart(alignmentRecord.getMateAlignmentStart().intValue() + 1); + } + if (alignmentRecord.getInsertSize() != null) { + builder.setInferredInsertSize(alignmentRecord.getInsertSize().intValue()); + } + + if (alignmentRecord.getReadPaired() != null) { + boolean readPaired = alignmentRecord.getReadPaired(); + builder.setReadPairedFlag(readPaired); + + if (readPaired) { + if (alignmentRecord.getMateNegativeStrand() != null) { + builder.setMateNegativeStrandFlag(alignmentRecord.getMateNegativeStrand()); + } + if (alignmentRecord.getMateMapped() != null) { + builder.setMateUnmappedFlag(!alignmentRecord.getMateMapped()); + } + if (alignmentRecord.getProperPair() != null) { + builder.setProperPairFlag(alignmentRecord.getProperPair()); + } + if (alignmentRecord.getReadInFragment() != null) { + builder.setFirstOfPairFlag(alignmentRecord.getReadInFragment() == 0); + builder.setSecondOfPairFlag(alignmentRecord.getReadInFragment() == 1); + } + } + } + + if (alignmentRecord.getDuplicateRead() != null) { + builder.setDuplicateReadFlag(alignmentRecord.getDuplicateRead()); + } + + if (alignmentRecord.getReadMapped() != null) { + boolean readMapped = alignmentRecord.getReadMapped(); + builder.setReadUnmappedFlag(!readMapped); + + if (alignmentRecord.getReadNegativeStrand() != null) { + builder.setReadNegativeStrandFlag(alignmentRecord.getReadNegativeStrand()); + } + + if (readMapped) { + if (alignmentRecord.getReferenceName() == null) { + warnOrThrow(alignmentRecord, "referenceName must not be null if read aligned", null, stringency, logger); + } + else { + builder.setReferenceName(alignmentRecord.getReferenceName()); + } + + if (alignmentRecord.getCigar() != null) { + builder.setCigarString(alignmentRecord.getCigar()); + } + if (alignmentRecord.getPrimaryAlignment() != null) { + builder.setNotPrimaryAlignmentFlag(!alignmentRecord.getPrimaryAlignment()); + } + if (alignmentRecord.getSupplementaryAlignment() != null) { + builder.setSupplementaryAlignmentFlag(alignmentRecord.getSupplementaryAlignment()); + } + if (alignmentRecord.getStart() != null) { + builder.setAlignmentStart(alignmentRecord.getStart().intValue() + 1); + } + if (alignmentRecord.getMappingQuality() != null) { + builder.setMappingQuality(alignmentRecord.getMappingQuality()); + } + } + else { + builder.setMappingQuality(0); + } + } + + if (alignmentRecord.getFailedVendorQualityChecks() != null) { + builder.setReadFailsVendorQualityCheckFlag(alignmentRecord.getFailedVendorQualityChecks()); + } + if (alignmentRecord.getMismatchingPositions() != null) { + builder.setAttribute("MD", alignmentRecord.getMismatchingPositions()); + } + if (alignmentRecord.getOriginalQuality() != null) { + builder.setOriginalBaseQualities(SAMUtils.fastqToPhred(alignmentRecord.getOriginalQuality())); + } + if (alignmentRecord.getOriginalCigar() != null) { + builder.setAttribute("OC", alignmentRecord.getOriginalCigar()); + } + if (alignmentRecord.getOriginalStart() != null) { + builder.setAttribute("OP", alignmentRecord.getOriginalStart().intValue() + 1); + } + + if (alignmentRecord.getAttributes() != null) { + String[] tokens = alignmentRecord.getAttributes().split("\t"); + for (String token : tokens) { + Matcher m = ATTRIBUTE.matcher(token); + if (m.matches()) { + String tagName = m.group(1); + String tagType = m.group(2); + String value = m.group(3); + + if ("B".equals(tagType)) { + m = ARRAY_ATTRIBUTE.matcher(value); + if (m.matches()) { + tagType = tagType + ":" + m.group(1); + value = m.group(2); + } + } + + switch (tagType) { + case "A": + builder.setAttribute(tagName, value.charAt(0)); + case "i": + builder.setAttribute(tagName, Integer.valueOf(value)); + case "f": + builder.setAttribute(tagName, Float.valueOf(value)); + case "Z": + builder.setAttribute(tagName, value); + case "H": + builder.setAttribute(tagName, value.getBytes()); + case "B:c,": + case "B:C,": + builder.setAttribute(tagName, splitToByteArray(value)); + case "B:i,": + case "B:I,": + builder.setAttribute(tagName, splitToIntegerArray(value)); + case "B:s,": + case "B:S,": + builder.setAttribute(tagName, splitToShortArray(value)); + case "B:f,": + builder.setAttribute(tagName, splitToFloatArray(value)); + default: + warnOrThrow(alignmentRecord, "invalid attribute type " + tagType, null, stringency, logger); + } + } + } + } + + return builder; + } + + /** + * Split the specified value to a byte array. + * + * @param value value + * @return the specified value split to a byte array + */ + static byte[] splitToByteArray(final String value) { + String[] tokens = value.split(","); + byte[] bytes = new byte[tokens.length]; + for (int i = 0; i < tokens.length; i++) { + bytes[i] = Byte.valueOf(tokens[i]); + } + return bytes; + } + + /** + * Split the specified value to an integer array. + * + * @param value value + * @return the specified value split to an integer array + */ + static int[] splitToIntegerArray(final String value) { + String[] tokens = value.split(","); + int[] ints = new int[tokens.length]; + for (int i = 0; i < tokens.length; i++) { + ints[i] = Integer.valueOf(tokens[i]); + } + return ints; + } + + /** + * Split the specified value to a short array. + * + * @param value value + * @return the specified value split to a short array + */ + static short[] splitToShortArray(final String value) { + String[] tokens = value.split(","); + short[] shorts = new short[tokens.length]; + for (int i = 0; i < tokens.length; i++) { + shorts[i] = Short.valueOf(tokens[i]); + } + return shorts; + } + + /** + * Split the specified value to a float array. + * + * @param value value + * @return the specified value split to a float array + */ + static float[] splitToFloatArray(final String value) { + String[] tokens = value.split(","); + float[] floats = new float[tokens.length]; + for (int i = 0; i < tokens.length; i++) { + floats[i] = Float.valueOf(tokens[i]); + } + return floats; + } +} diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/AlignmentRecordToSamRecordFactory.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/AlignmentRecordToSamRecordFactory.java new file mode 100644 index 0000000..3f96143 --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/AlignmentRecordToSamRecordFactory.java @@ -0,0 +1,55 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMRecord; + +import org.bdgenomics.convert.Converter; + +import org.bdgenomics.formats.avro.AlignmentRecord; + +/** + * Factory for creating AlignmentRecord to htsjdk SAMRecord converters, which + * require late binding for a SAMFileHeader. + * + * Thus instead of a converter instance, a converter factory is available via injection: + *
+ * final class MyClass {
+ *   private final AlignmentRecordToSamRecordFactory alignmentRecordToSamRecordFactory;
+ *
+ *   @Inject
+ *   MyClass(final AlignmentRecordToSamRecordFactory alignmentRecordToSamRecordFactory) {
+ *     this.alignmentRecordToSamRecordFactory = alignmentRecordToSamRecordFactory;
+ *   }
+ *
+ *   void doIt() {
+ *     Converter<AlignmentRecord, SAMRecord> converter = alignmentRecordToSamRecordFactory.create(header);
+ *     SAMRecord record = converter.convert(alignmentRecord, stringency, logger);
+ *     // ...
+ * 
+ */ +public interface AlignmentRecordToSamRecordFactory { + + /** + * Create a new AlignmentRecord to htsjdk SAMRecord converter with the specified header. + * + * @param header header, must not be null + */ + Converter create(SAMFileHeader header); +} diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/GenotypesToVariantContext.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/GenotypesToVariantContext.java new file mode 100644 index 0000000..cbbfa37 --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/GenotypesToVariantContext.java @@ -0,0 +1,77 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.List; + +import com.google.inject.Inject; + +import com.google.inject.assistedinject.Assisted; + +import htsjdk.variant.variantcontext.VariantContext; + +import htsjdk.variant.vcf.VCFHeader; + +import org.bdgenomics.convert.AbstractConverter; +import org.bdgenomics.convert.ConversionException; +import org.bdgenomics.convert.ConversionStringency; + +import org.bdgenomics.formats.avro.Genotype; + +import org.slf4j.Logger; + +/** + * Convert a list of Genotypes to a VariantContext. + */ +public final class GenotypesToVariantContext extends AbstractConverter, VariantContext> { + + /** Header. */ + private final VCFHeader header; + + + /** + * Create a new list of Genotypes to VariantContext converter with the specified header. + * + * @param header header, must not be null + */ + @Inject + public GenotypesToVariantContext(@Assisted final VCFHeader header) { + super(List.class, VariantContext.class); + + checkNotNull(header); + this.header = header; + } + + + @Override + public VariantContext convert(final List genotypes, + final ConversionStringency stringency, + final Logger logger) throws ConversionException { + + if (genotypes == null) { + warnOrThrow(genotypes, "must not be null", null, stringency, logger); + return null; + } + if (genotypes.isEmpty()) { + warnOrThrow(genotypes, "must not be empty", null, stringency, logger); + return null; + } + + return null; + } +} diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/GenotypesToVariantContextFactory.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/GenotypesToVariantContextFactory.java new file mode 100644 index 0000000..7eddd7b --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/GenotypesToVariantContextFactory.java @@ -0,0 +1,42 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.List; + +import htsjdk.variant.variantcontext.VariantContext; + +import htsjdk.variant.vcf.VCFHeader; + +import org.bdgenomics.convert.Converter; + +import org.bdgenomics.formats.avro.Genotype; + +/** + * Factory for creating list of Genotypes to VariantContext converters, which + * require late binding for a VCFHeader. + */ +public interface GenotypesToVariantContextFactory { + + /** + * Create a new list of Genotypes to VariantContext converter with the specified header. + * + * @param header header, must not be null + */ + Converter, VariantContext> create(VCFHeader header); +} diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java index 8f5d158..8964318 100644 --- a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java @@ -17,22 +17,52 @@ */ package org.bdgenomics.convert.htsjdk; +import java.util.List; + +import htsjdk.samtools.SAMRecord; import htsjdk.samtools.ValidationStringency; +import htsjdk.variant.variantcontext.VariantContext; + import com.google.inject.AbstractModule; import com.google.inject.Provides; import com.google.inject.Singleton; +import com.google.inject.TypeLiteral; + +import com.google.inject.assistedinject.FactoryModuleBuilder; import org.bdgenomics.convert.Converter; import org.bdgenomics.convert.ConversionStringency; +import org.bdgenomics.formats.avro.AlignmentRecord; +import org.bdgenomics.formats.avro.Genotype; +import org.bdgenomics.formats.avro.Variant; + /** * Guice module for the org.bdgenomics.convert.htsjdk package. */ public final class HtsjdkModule extends AbstractModule { @Override protected void configure() { - // empty + install(new FactoryModuleBuilder() + .implement(new TypeLiteral>() {}, AlignmentRecordToSamRecord.class) + .build(AlignmentRecordToSamRecordFactory.class)); + + install(new FactoryModuleBuilder() + .implement(new TypeLiteral, VariantContext>>() {}, GenotypesToVariantContext.class) + .build(GenotypesToVariantContextFactory.class)); + + install(new FactoryModuleBuilder() + .implement(new TypeLiteral, VariantContext>>() {}, VariantsToVariantContext.class) + .build(VariantsToVariantContextFactory.class)); + + install(new FactoryModuleBuilder() + .implement(new TypeLiteral>>() {}, VariantContextToGenotypes.class) + .build(VariantContextToGenotypesFactory.class)); + + install(new FactoryModuleBuilder() + .implement(new TypeLiteral>>() {}, VariantContextToVariants.class) + .build(VariantContextToVariantsFactory.class)); } @Provides @Singleton @@ -44,4 +74,9 @@ Converter createConversionStringency Converter createValidationStringencyToConversionStringency() { return new ValidationStringencyToConversionStringency(); } + + @Provides @Singleton + Converter createSamRecordToAlignmentRecord() { + return new SamRecordToAlignmentRecord(); + } } diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamRecordToAlignmentRecord.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamRecordToAlignmentRecord.java new file mode 100644 index 0000000..10f238b --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamRecordToAlignmentRecord.java @@ -0,0 +1,290 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import htsjdk.samtools.SAMBinaryTagAndValue; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMReadGroupRecord; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMTag; +import htsjdk.samtools.SAMUtils; +import htsjdk.samtools.TextTagCodec; + +import org.bdgenomics.convert.AbstractConverter; +import org.bdgenomics.convert.ConversionException; +import org.bdgenomics.convert.ConversionStringency; + +import org.bdgenomics.formats.avro.AlignmentRecord; + +import org.slf4j.Logger; + +/** + * Convert htsjdk SAMRecord to AlignmentRecord. + */ +public final class SamRecordToAlignmentRecord extends AbstractConverter { + + /** Regex to capture the first cigar operation. */ + private static final Pattern FIRST_CIGAR_OPERATION = Pattern.compile("^([0-9]*)([A-Z]).*$"); + + /** Regex to capture the last cigar operation. */ + private static final Pattern LAST_CIGAR_OPERATION = Pattern.compile("^.*([0-9]*)([A-Z])$"); + + /** SAM text tag codec. */ + private static final TextTagCodec TAG_CODEC = new TextTagCodec(); + + + /** + * Create a new htsjdk SAMRecord to AlignmentRecord converter. + */ + public SamRecordToAlignmentRecord() { + super(SAMRecord.class, AlignmentRecord.class); + } + + + @Override + public AlignmentRecord convert(final SAMRecord samRecord, + final ConversionStringency stringency, + final Logger logger) throws ConversionException { + + if (samRecord == null) { + warnOrThrow(samRecord, "must not be null", null, stringency, logger); + return null; + } + + AlignmentRecord.Builder builder = AlignmentRecord.newBuilder() + .setReadName(samRecord.getReadName()) + .setSequence(samRecord.getReadString()); + + String cigar = samRecord.getCigarString(); + if (cigar != null && !"*".equals(cigar)) { + builder.setCigar(cigar); + builder.setBasesTrimmedFromStart(startTrim(cigar)); + builder.setBasesTrimmedFromEnd(endTrim(cigar)); + } + + if (samRecord.getBaseQualityString() != "*") { + builder.setQuality(samRecord.getBaseQualityString()); + } + if (samRecord.getOriginalBaseQualities() != null) { + builder.setOriginalQuality(SAMUtils.phredToFastq(samRecord.getOriginalBaseQualities())); + } + + int readReference = samRecord.getReferenceIndex(); + if (readReference != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { + builder.setReferenceName(samRecord.getReferenceName()); + + long start = (long) samRecord.getAlignmentStart(); + if (start < 1L) { + warnOrThrow(samRecord, "alignment start must be greater than zero if read aligned", null, stringency, logger); + } + else { + builder.setStart(start - 1L); + + long end = start - 1L + samRecord.getCigar().getReferenceLength(); + builder.setEnd(end); + } + + int mappingQuality = samRecord.getMappingQuality(); + if (mappingQuality != SAMRecord.UNKNOWN_MAPPING_QUALITY) { + builder.setMappingQuality(mappingQuality); + } + + if (samRecord.getAttribute("OP") != null) { + builder.setOriginalStart(samRecord.getIntegerAttribute("OP") - 1L); + builder.setOriginalCigar(samRecord.getStringAttribute("OC")); + } + } + + builder.setReadMapped(!samRecord.getReadUnmappedFlag()); + builder.setReadNegativeStrand(samRecord.getReadNegativeStrandFlag()); + builder.setPrimaryAlignment(!samRecord.getNotPrimaryAlignmentFlag()); + builder.setSupplementaryAlignment(samRecord.getSupplementaryAlignmentFlag()); + + int mateReferenceIndex = samRecord.getMateReferenceIndex(); + if (mateReferenceIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { + builder.setMateReferenceName(samRecord.getMateReferenceName()); + + long mateStart = (long) samRecord.getMateAlignmentStart(); + if (mateStart < 1L) { + warnOrThrow(samRecord, "mate start must be greater than zero if mate aligned", null, stringency, logger); + } + else { + builder.setMateAlignmentStart(mateStart - 1L); + } + } + + if (samRecord.getFlags() != 0) { + if (samRecord.getReadPairedFlag()) { + builder.setReadPaired(true); + if (samRecord.getMateNegativeStrandFlag()) { + builder.setMateNegativeStrand(true); + } + if (!samRecord.getMateUnmappedFlag()) { + builder.setMateMapped(true); + } + if (samRecord.getProperPairFlag()) { + builder.setProperPair(true); + } + if (samRecord.getFirstOfPairFlag()) { + builder.setReadInFragment(0); + } + if (samRecord.getSecondOfPairFlag()) { + builder.setReadInFragment(1); + } + } + if (samRecord.getDuplicateReadFlag()) { + builder.setDuplicateRead(true); + } + if (samRecord.getReadFailsVendorQualityCheckFlag()) { + builder.setFailedVendorQualityChecks(true); + } + } + + long insertSize = (long) samRecord.getInferredInsertSize(); + if (insertSize != 0L) { + builder.setInsertSize(insertSize); + } + + SAMReadGroupRecord readGroup = samRecord.getReadGroup(); + if (readGroup != null) { + builder.setReadGroupId(readGroup.getReadGroupId()); + builder.setReadGroupSampleId(readGroup.getSample()); + } + + String md = samRecord.getStringAttribute("MD"); + if (md != null) { + builder.setMismatchingPositions(md); + } + + String attributes = encodeAttributes(getBinaryAttributes(samRecord)); + if (!attributes.isEmpty()) { + builder.setAttributes(attributes); + } + + return builder.build(); + } + + /** + * Calculate the hard clipped trim in bases from the start of the specified cigar string. + * + * @param cigar cigar string + * @return the hard clipped trim in bases from the start of the specified cigar string + */ + static int startTrim(final String cigar) { + int startTrim = 0; + Matcher m = FIRST_CIGAR_OPERATION.matcher(cigar); + if (m.matches()) { + String operator = m.group(2); + if ("H".equals(operator)) { + startTrim = Integer.parseInt(m.group(1)); + } + } + return startTrim; + } + + /** + * Calculate the hard clipped trim in bases from the end of the specified cigar string. + * + * @param cigar cigar string + * @return the hard clipped trim in bases from the end of the specified cigar string + */ + static int endTrim(final String cigar) { + int endTrim = 0; + Matcher m = LAST_CIGAR_OPERATION.matcher(cigar); + if (m.matches()) { + String operator = m.group(2); + if ("H".equals(operator)) { + endTrim = Integer.parseInt(m.group(1)); + } + } + return endTrim; + } + + /** + * Return true if the specified attribute tag should be skipped. + * + * @param tag attribute tag + * @return true if the specified attribute tag should be skipped + */ + static boolean skipTag(final short tag) { + return tag == SAMTag.MD.getBinaryTag() || tag == SAMTag.OC.getBinaryTag() || tag == SAMTag.OP.getBinaryTag() || tag == SAMTag.OQ.getBinaryTag(); + } + + /** + * Return SAMRecord.getBinaryAttributes() via reflection. + * + * @param samRecord SAM record + * @return SAMRecord.getBinaryAttributes() via reflection + */ + static SAMBinaryTagAndValue getBinaryAttributes(final SAMRecord samRecord) { + try { + java.lang.reflect.Field f = samRecord.getClass().getDeclaredField("mAttributes"); + f.setAccessible(true); + return (SAMBinaryTagAndValue) f.get(samRecord); + } + catch (Exception e) { + return null; + } + } + + /** + * Return TextTagCodec.encodeUnsignedArray(String, Object) via reflection. + * + * @param tag tag + * @param value value + * @return TextTagCodec.encodeUnsignedArray(String, Object) via reflection + */ + static String encodeUnsignedArray(final String tag, final Object value) { + try { + java.lang.reflect.Method m = TAG_CODEC.getClass().getDeclaredMethod("encodeUnsignedArray"); + m.setAccessible(true); + return (String) m.invoke(TAG_CODEC, new Object[] { tag, value }, new Class[] { String.class, Object.class }); + } + catch (Exception e) { + return null; + } + } + + /** + * Encode the specified attribute and its child attributes to a string. + * + * @param attribute attribute + * @return the specified attribute and its child attributes to a string + */ + static String encodeAttributes(SAMBinaryTagAndValue attribute) { + StringBuilder sb = new StringBuilder(); + while (attribute != null) { + if (!skipTag(attribute.tag)) { + final String encodedTag; + if (attribute.isUnsignedArray()) { + encodedTag = encodeUnsignedArray(SAMTag.makeStringTag(attribute.tag), attribute.value); + } + else { + encodedTag = TAG_CODEC.encode(SAMTag.makeStringTag(attribute.tag), attribute.value); + } + sb.append(encodedTag); + sb.append("\t"); + } + attribute = attribute.getNext(); + } + return sb.toString().trim(); + } +} diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantContextToGenotypes.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantContextToGenotypes.java new file mode 100644 index 0000000..dc993a1 --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantContextToGenotypes.java @@ -0,0 +1,73 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.List; + +import com.google.inject.Inject; + +import com.google.inject.assistedinject.Assisted; + +import htsjdk.variant.variantcontext.VariantContext; + +import htsjdk.variant.vcf.VCFHeader; + +import org.bdgenomics.convert.AbstractConverter; +import org.bdgenomics.convert.ConversionException; +import org.bdgenomics.convert.ConversionStringency; + +import org.bdgenomics.formats.avro.Genotype; + +import org.slf4j.Logger; + +/** + * Convert VariantContext to a list of Genotypes. + */ +public final class VariantContextToGenotypes extends AbstractConverter> { + + /** Header. */ + private final VCFHeader header; + + + /** + * Create a new VariantContext to list of Genotypes converter with the specified header. + * + * @param header header, must not be null + */ + @Inject + public VariantContextToGenotypes(@Assisted final VCFHeader header) { + super(VariantContext.class, List.class); + + checkNotNull(header); + this.header = header; + } + + + @Override + public List convert(final VariantContext variantContext, + final ConversionStringency stringency, + final Logger logger) throws ConversionException { + + if (variantContext == null) { + warnOrThrow(variantContext, "must not be null", null, stringency, logger); + return null; + } + + return null; + } +} diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantContextToGenotypesFactory.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantContextToGenotypesFactory.java new file mode 100644 index 0000000..241b08d --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantContextToGenotypesFactory.java @@ -0,0 +1,42 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.List; + +import htsjdk.variant.variantcontext.VariantContext; + +import htsjdk.variant.vcf.VCFHeader; + +import org.bdgenomics.convert.Converter; + +import org.bdgenomics.formats.avro.Genotype; + +/** + * Factory for creating VariantContext to list of Genotypes converters, which + * require late binding for a VCFHeader. + */ +public interface VariantContextToGenotypesFactory { + + /** + * Create a new VariantContext to list of Genotypes converter with the specified header. + * + * @param header header, must not be null + */ + Converter> create(VCFHeader header); +} diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantContextToVariants.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantContextToVariants.java new file mode 100644 index 0000000..015bdad --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantContextToVariants.java @@ -0,0 +1,73 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.List; + +import com.google.inject.Inject; + +import com.google.inject.assistedinject.Assisted; + +import htsjdk.variant.variantcontext.VariantContext; + +import htsjdk.variant.vcf.VCFHeader; + +import org.bdgenomics.convert.AbstractConverter; +import org.bdgenomics.convert.ConversionException; +import org.bdgenomics.convert.ConversionStringency; + +import org.bdgenomics.formats.avro.Variant; + +import org.slf4j.Logger; + +/** + * Convert VariantContext to a list of Variants. + */ +public final class VariantContextToVariants extends AbstractConverter> { + + /** Header. */ + private final VCFHeader header; + + + /** + * Create a new VariantContext to list of Variants converter with the specified header. + * + * @param header header, must not be null + */ + @Inject + public VariantContextToVariants(@Assisted final VCFHeader header) { + super(VariantContext.class, List.class); + + checkNotNull(header); + this.header = header; + } + + + @Override + public List convert(final VariantContext variantContext, + final ConversionStringency stringency, + final Logger logger) throws ConversionException { + + if (variantContext == null) { + warnOrThrow(variantContext, "must not be null", null, stringency, logger); + return null; + } + + return null; + } +} diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantContextToVariantsFactory.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantContextToVariantsFactory.java new file mode 100644 index 0000000..1bf2eeb --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantContextToVariantsFactory.java @@ -0,0 +1,42 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.List; + +import htsjdk.variant.variantcontext.VariantContext; + +import htsjdk.variant.vcf.VCFHeader; + +import org.bdgenomics.convert.Converter; + +import org.bdgenomics.formats.avro.Variant; + +/** + * Factory for creating VariantContext to list of Variants converters, which + * require late binding for a VCFHeader. + */ +public interface VariantContextToVariantsFactory { + + /** + * Create a new VariantContext to list of Variants converter with the specified header. + * + * @param header header, must not be null + */ + Converter> create(VCFHeader header); +} diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContext.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContext.java new file mode 100644 index 0000000..a47c4c6 --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContext.java @@ -0,0 +1,77 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.List; + +import com.google.inject.Inject; + +import com.google.inject.assistedinject.Assisted; + +import htsjdk.variant.variantcontext.VariantContext; + +import htsjdk.variant.vcf.VCFHeader; + +import org.bdgenomics.convert.AbstractConverter; +import org.bdgenomics.convert.ConversionException; +import org.bdgenomics.convert.ConversionStringency; + +import org.bdgenomics.formats.avro.Variant; + +import org.slf4j.Logger; + +/** + * Convert a list of Variants to a VariantContext. + */ +public final class VariantsToVariantContext extends AbstractConverter, VariantContext> { + + /** Header. */ + private final VCFHeader header; + + + /** + * Create a new list of Variants to VariantContext converter with the specified header. + * + * @param header header, must not be null + */ + @Inject + public VariantsToVariantContext(@Assisted final VCFHeader header) { + super(List.class, VariantContext.class); + + checkNotNull(header); + this.header = header; + } + + + @Override + public VariantContext convert(final List variants, + final ConversionStringency stringency, + final Logger logger) throws ConversionException { + + if (variants == null) { + warnOrThrow(variants, "must not be null", null, stringency, logger); + return null; + } + if (variants.isEmpty()) { + warnOrThrow(variants, "must not be empty", null, stringency, logger); + return null; + } + + return null; + } +} diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContextFactory.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContextFactory.java new file mode 100644 index 0000000..8f4eb48 --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContextFactory.java @@ -0,0 +1,42 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.List; + +import htsjdk.variant.variantcontext.VariantContext; + +import htsjdk.variant.vcf.VCFHeader; + +import org.bdgenomics.convert.Converter; + +import org.bdgenomics.formats.avro.Variant; + +/** + * Factory for creating list of Variants to VariantContext converters, which + * require late binding for a VCFHeader. + */ +public interface VariantsToVariantContextFactory { + + /** + * Create a new list of Variants to VariantContext converter with the specified header. + * + * @param header header, must not be null + */ + Converter, VariantContext> create(VCFHeader header); +} diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/AlignmentRecordToSamRecordTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/AlignmentRecordToSamRecordTest.java new file mode 100644 index 0000000..db2c89a --- /dev/null +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/AlignmentRecordToSamRecordTest.java @@ -0,0 +1,120 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import static org.bdgenomics.convert.ConversionStringency.*; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMSequenceRecord; + +import org.bdgenomics.convert.Converter; +import org.bdgenomics.convert.ConversionException; + +import org.bdgenomics.formats.avro.AlignmentRecord; + +import org.junit.Before; +import org.junit.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test for AlignmentRecordToSamRecord. + */ +public final class AlignmentRecordToSamRecordTest { + private final Logger logger = LoggerFactory.getLogger(AlignmentRecordToSamRecordTest.class); + + SAMFileHeader header; + Converter converter; + + @Before + public void setUp() { + header = new SAMFileHeader(); + SAMSequenceRecord sequenceRecord = new SAMSequenceRecord("1", 3000000); + header.getSequenceDictionary().addSequence(sequenceRecord); + + converter = new AlignmentRecordToSamRecord(header); + } + + @Test + public void testConstructor() { + assertNotNull(converter); + } + + @Test(expected=NullPointerException.class) + public void testConstructorNullHeader() { + new AlignmentRecordToSamRecord(null); + } + + @Test(expected=ConversionException.class) + public void testConvertNullSourceStrict() { + converter.convert(null, STRICT, logger); + } + + @Test + public void testConvertNullSourceLenient() { + assertEquals(null, converter.convert(null, LENIENT, logger)); + } + + @Test + public void testConvertNullSourceSilent() { + assertEquals(null, converter.convert(null, SILENT, logger)); + } + + @Test(expected=ConversionException.class) + public void testConvertNullReferenceNameStrict() { + AlignmentRecord alignmentRecord = AlignmentRecord.newBuilder() + .setReadMapped(true) + .setReferenceName(null) + .build(); + + converter.convert(alignmentRecord, STRICT, logger); + } + + @Test + public void testConvertNullReferenceNameLenient() { + AlignmentRecord alignmentRecord = AlignmentRecord.newBuilder() + .setReadMapped(true) + .setReferenceName(null) + .build(); + + SAMRecord samRecord = converter.convert(alignmentRecord, LENIENT, logger); + assertEquals("*", samRecord.getReferenceName()); + } + + @Test + public void testConvertNullReferenceNameSilent() { + AlignmentRecord alignmentRecord = AlignmentRecord.newBuilder() + .setReadMapped(true) + .setReferenceName(null) + .build(); + + SAMRecord samRecord = converter.convert(alignmentRecord, SILENT, logger); + assertEquals("*", samRecord.getReferenceName()); + } + + @Test + public void testConvert() { + // todo + } +} diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/GenotypesToVariantContextTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/GenotypesToVariantContextTest.java new file mode 100644 index 0000000..307e487 --- /dev/null +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/GenotypesToVariantContextTest.java @@ -0,0 +1,104 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import static org.bdgenomics.convert.ConversionStringency.*; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import java.util.Collections; +import java.util.List; + +import htsjdk.variant.variantcontext.VariantContext; + +import htsjdk.variant.vcf.VCFHeader; + +import org.bdgenomics.convert.Converter; +import org.bdgenomics.convert.ConversionException; + +import org.bdgenomics.formats.avro.Genotype; + +import org.junit.Before; +import org.junit.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test for GenotypesToVariantContext. + */ +public final class GenotypesToVariantContextTest { + private final Logger logger = LoggerFactory.getLogger(GenotypesToVariantContextTest.class); + + VCFHeader header; + List empty = Collections.emptyList(); + Converter, VariantContext> converter; + + @Before + public void setUp() { + header = new VCFHeader(); + converter = new GenotypesToVariantContext(header); + } + + @Test + public void testConstructor() { + assertNotNull(converter); + } + + @Test(expected=NullPointerException.class) + public void testConstructorNullHeader() { + new GenotypesToVariantContext(null); + } + + @Test(expected=ConversionException.class) + public void testConvertNullSourceStrict() { + converter.convert(null, STRICT, logger); + } + + @Test + public void testConvertNullSourceLenient() { + assertEquals(null, converter.convert(null, LENIENT, logger)); + } + + @Test + public void testConvertNullSourceSilent() { + assertEquals(null, converter.convert(null, SILENT, logger)); + } + + @Test(expected=ConversionException.class) + public void testConvertEmptySourceStrict() { + converter.convert(empty, STRICT, logger); + } + + @Test + public void testConvertEmptySourceLenient() { + assertEquals(null, converter.convert(empty, LENIENT, logger)); + } + + @Test + public void testConvertEmptySourceSilent() { + assertEquals(null, converter.convert(empty, SILENT, logger)); + } + + @Test + public void testConvert() { + // todo + } +} diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java index 8891a5b..7676c5e 100644 --- a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java @@ -24,14 +24,24 @@ import com.google.inject.Injector; import com.google.inject.Guice; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMRecord; import htsjdk.samtools.ValidationStringency; +import htsjdk.variant.vcf.VCFHeader; + +import htsjdk.variant.variantcontext.VariantContext; + import org.junit.Before; import org.junit.Test; import org.bdgenomics.convert.Converter; import org.bdgenomics.convert.ConversionStringency; +import org.bdgenomics.formats.avro.AlignmentRecord; +import org.bdgenomics.formats.avro.Genotype; +import org.bdgenomics.formats.avro.Variant; + /** * Unit test for HtsjdkModule. */ @@ -54,6 +64,21 @@ public void testHtsjdkModule() { Target target = injector.getInstance(Target.class); assertNotNull(target.getConversionStringencyToValidationStringency()); assertNotNull(target.getValidationStringencyToConversionStringency()); + assertNotNull(target.getSamRecordToAlignmentRecord()); + + SAMFileHeader samFileHeader = new SAMFileHeader(); + assertNotNull(target.getAlignmentRecordToSamRecordFactory()); + assertNotNull(target.getAlignmentRecordToSamRecordFactory().create(samFileHeader)); + + VCFHeader vcfHeader = new VCFHeader(); + assertNotNull(target.getGenotypesToVariantContextFactory()); + assertNotNull(target.getGenotypesToVariantContextFactory().create(vcfHeader)); + assertNotNull(target.getVariantsToVariantContextFactory()); + assertNotNull(target.getVariantsToVariantContextFactory().create(vcfHeader)); + assertNotNull(target.getVariantContextToGenotypesFactory()); + assertNotNull(target.getVariantContextToGenotypesFactory().create(vcfHeader)); + assertNotNull(target.getVariantContextToVariantsFactory()); + assertNotNull(target.getVariantContextToVariantsFactory().create(vcfHeader)); } /** @@ -62,13 +87,31 @@ public void testHtsjdkModule() { static class Target { final Converter conversionStringencyToValidationStringency; final Converter validationStringencyToConversionStringency; + final Converter samRecordToAlignmentRecord; + final AlignmentRecordToSamRecordFactory alignmentRecordToSamRecordFactory; + final GenotypesToVariantContextFactory genotypesToVariantContextFactory; + final VariantsToVariantContextFactory variantsToVariantContextFactory; + final VariantContextToGenotypesFactory variantContextToGenotypesFactory; + final VariantContextToVariantsFactory variantContextToVariantsFactory; @Inject Target(final Converter conversionStringencyToValidationStringency, - final Converter validationStringencyToConversionStringency) { + final Converter validationStringencyToConversionStringency, + final Converter samRecordToAlignmentRecord, + final AlignmentRecordToSamRecordFactory alignmentRecordToSamRecordFactory, + final GenotypesToVariantContextFactory genotypesToVariantContextFactory, + final VariantsToVariantContextFactory variantsToVariantContextFactory, + final VariantContextToGenotypesFactory variantContextToGenotypesFactory, + final VariantContextToVariantsFactory variantContextToVariantsFactory) { + this.conversionStringencyToValidationStringency = conversionStringencyToValidationStringency; this.validationStringencyToConversionStringency = validationStringencyToConversionStringency; - + this.samRecordToAlignmentRecord = samRecordToAlignmentRecord; + this.alignmentRecordToSamRecordFactory = alignmentRecordToSamRecordFactory; + this.genotypesToVariantContextFactory = genotypesToVariantContextFactory; + this.variantsToVariantContextFactory = variantsToVariantContextFactory; + this.variantContextToGenotypesFactory = variantContextToGenotypesFactory; + this.variantContextToVariantsFactory = variantContextToVariantsFactory; } Converter getConversionStringencyToValidationStringency() { @@ -78,6 +121,30 @@ Converter getConversionStringencyToV Converter getValidationStringencyToConversionStringency() { return validationStringencyToConversionStringency; } + + Converter getSamRecordToAlignmentRecord() { + return samRecordToAlignmentRecord; + } + + AlignmentRecordToSamRecordFactory getAlignmentRecordToSamRecordFactory() { + return alignmentRecordToSamRecordFactory; + } + + GenotypesToVariantContextFactory getGenotypesToVariantContextFactory() { + return genotypesToVariantContextFactory; + } + + VariantsToVariantContextFactory getVariantsToVariantContextFactory() { + return variantsToVariantContextFactory; + } + + VariantContextToGenotypesFactory getVariantContextToGenotypesFactory() { + return variantContextToGenotypesFactory; + } + + VariantContextToVariantsFactory getVariantContextToVariantsFactory() { + return variantContextToVariantsFactory; + } } /** diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamRecordToAlignmentRecordTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamRecordToAlignmentRecordTest.java new file mode 100644 index 0000000..5d4a6a8 --- /dev/null +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamRecordToAlignmentRecordTest.java @@ -0,0 +1,154 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import static org.bdgenomics.convert.ConversionStringency.*; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMSequenceRecord; + +import org.bdgenomics.convert.Converter; +import org.bdgenomics.convert.ConversionException; +import org.bdgenomics.convert.ConversionStringency; + +import org.bdgenomics.formats.avro.AlignmentRecord; + +import org.junit.Before; +import org.junit.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test for SamRecordToAlignmentRecord. + */ +public final class SamRecordToAlignmentRecordTest { + private final Logger logger = LoggerFactory.getLogger(SamRecordToAlignmentRecordTest.class); + + Converter converter; + SAMFileHeader header; + + @Before + public void setUp() { + header = new SAMFileHeader(); + SAMSequenceRecord sequenceRecord = new SAMSequenceRecord("1", 3000000); + header.getSequenceDictionary().addSequence(sequenceRecord); + + converter = new SamRecordToAlignmentRecord(); + } + + @Test + public void testConstructor() { + assertNotNull(converter); + } + + @Test(expected=ConversionException.class) + public void testConvertNullSourceStrict() { + converter.convert(null, STRICT, logger); + } + + @Test + public void testConvertNullSourceLenient() { + assertEquals(null, converter.convert(null, LENIENT, logger)); + } + + @Test + public void testConvertNullSourceSilent() { + assertEquals(null, converter.convert(null, SILENT, logger)); + } + + @Test(expected=ConversionException.class) + public void testConvertInvalidAlignmentStartStrict() { + SAMRecord samRecord = new SAMRecord(header); + samRecord.setReferenceIndex(0); + samRecord.setReferenceName("1"); + samRecord.setAlignmentStart(0); + + converter.convert(samRecord, STRICT, logger); + } + + @Test + public void testConvertInvalidAlignmentStartLenient() { + SAMRecord samRecord = new SAMRecord(header); + samRecord.setReferenceIndex(0); + samRecord.setReferenceName("1"); + samRecord.setAlignmentStart(0); + + AlignmentRecord alignmentRecord = converter.convert(samRecord, LENIENT, logger); + assertEquals(samRecord.getReferenceName(), alignmentRecord.getReferenceName()); + assertNull(alignmentRecord.getStart()); + assertNull(alignmentRecord.getEnd()); + } + + @Test + public void testConvertInvalidAlignmentStartSilent() { + SAMRecord samRecord = new SAMRecord(header); + samRecord.setReferenceIndex(0); + samRecord.setReferenceName("1"); + samRecord.setAlignmentStart(0); + + AlignmentRecord alignmentRecord = converter.convert(samRecord, SILENT, logger); + assertEquals(samRecord.getReferenceName(), alignmentRecord.getReferenceName()); + assertNull(alignmentRecord.getStart()); + assertNull(alignmentRecord.getEnd()); + } + + @Test(expected=ConversionException.class) + public void testConvertInvalidMateAlignmentStartStrict() { + SAMRecord samRecord = new SAMRecord(header); + samRecord.setMateReferenceIndex(0); + samRecord.setMateReferenceName("1"); + samRecord.setMateAlignmentStart(0); + + converter.convert(samRecord, STRICT, logger); + } + + @Test + public void testConvertInvalidMateAlignmentStartLenient() { + SAMRecord samRecord = new SAMRecord(header); + samRecord.setMateReferenceIndex(0); + samRecord.setMateReferenceName("1"); + samRecord.setMateAlignmentStart(0); + + AlignmentRecord alignmentRecord = converter.convert(samRecord, LENIENT, logger); + assertEquals(samRecord.getMateReferenceName(), alignmentRecord.getMateReferenceName()); + assertNull(alignmentRecord.getMateAlignmentStart()); + } + + @Test + public void testConvertInvalidMateAlignmentStartSilent() { + SAMRecord samRecord = new SAMRecord(header); + samRecord.setMateReferenceIndex(0); + samRecord.setMateReferenceName("1"); + samRecord.setMateAlignmentStart(0); + + AlignmentRecord alignmentRecord = converter.convert(samRecord, SILENT, logger); + assertEquals(samRecord.getMateReferenceName(), alignmentRecord.getMateReferenceName()); + assertNull(alignmentRecord.getMateAlignmentStart()); + } + + @Test + public void testConvert() { + // todo + } +} diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VariantContextToGenotypesTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VariantContextToGenotypesTest.java new file mode 100644 index 0000000..955410f --- /dev/null +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VariantContextToGenotypesTest.java @@ -0,0 +1,87 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import static org.bdgenomics.convert.ConversionStringency.*; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import java.util.List; + +import htsjdk.variant.variantcontext.VariantContext; + +import htsjdk.variant.vcf.VCFHeader; + +import org.bdgenomics.convert.Converter; +import org.bdgenomics.convert.ConversionException; + +import org.bdgenomics.formats.avro.Genotype; + +import org.junit.Before; +import org.junit.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test for VariantContextToGenotypes. + */ +public final class VariantContextToGenotypesTest { + private final Logger logger = LoggerFactory.getLogger(VariantContextToGenotypesTest.class); + + VCFHeader header; + Converter> converter; + + @Before + public void setUp() { + header = new VCFHeader(); + converter = new VariantContextToGenotypes(header); + } + + @Test + public void testConstructor() { + assertNotNull(converter); + } + + @Test(expected=NullPointerException.class) + public void testConstructorNullHeader() { + new VariantContextToGenotypes(null); + } + + @Test(expected=ConversionException.class) + public void testConvertNullSourceStrict() { + converter.convert(null, STRICT, logger); + } + + @Test + public void testConvertNullSourceLenient() { + assertEquals(null, converter.convert(null, LENIENT, logger)); + } + + @Test + public void testConvertNullSourceSilent() { + assertEquals(null, converter.convert(null, SILENT, logger)); + } + + @Test + public void testConvert() { + // todo + } +} diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VariantContextToVariantsTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VariantContextToVariantsTest.java new file mode 100644 index 0000000..e289a37 --- /dev/null +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VariantContextToVariantsTest.java @@ -0,0 +1,87 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import static org.bdgenomics.convert.ConversionStringency.*; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import java.util.List; + +import htsjdk.variant.variantcontext.VariantContext; + +import htsjdk.variant.vcf.VCFHeader; + +import org.bdgenomics.convert.Converter; +import org.bdgenomics.convert.ConversionException; + +import org.bdgenomics.formats.avro.Variant; + +import org.junit.Before; +import org.junit.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test for VariantContextToVariants. + */ +public final class VariantContextToVariantsTest { + private final Logger logger = LoggerFactory.getLogger(VariantContextToVariantsTest.class); + + VCFHeader header; + Converter> converter; + + @Before + public void setUp() { + header = new VCFHeader(); + converter = new VariantContextToVariants(header); + } + + @Test + public void testConstructor() { + assertNotNull(converter); + } + + @Test(expected=NullPointerException.class) + public void testConstructorNullHeader() { + new VariantContextToVariants(null); + } + + @Test(expected=ConversionException.class) + public void testConvertNullSourceStrict() { + converter.convert(null, STRICT, logger); + } + + @Test + public void testConvertNullSourceLenient() { + assertEquals(null, converter.convert(null, LENIENT, logger)); + } + + @Test + public void testConvertNullSourceSilent() { + assertEquals(null, converter.convert(null, SILENT, logger)); + } + + @Test + public void testConvert() { + // todo + } +} diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContextTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContextTest.java new file mode 100644 index 0000000..a21c5cf --- /dev/null +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContextTest.java @@ -0,0 +1,104 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import static org.bdgenomics.convert.ConversionStringency.*; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import java.util.Collections; +import java.util.List; + +import htsjdk.variant.variantcontext.VariantContext; + +import htsjdk.variant.vcf.VCFHeader; + +import org.bdgenomics.convert.Converter; +import org.bdgenomics.convert.ConversionException; + +import org.bdgenomics.formats.avro.Variant; + +import org.junit.Before; +import org.junit.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test for VariantsToVariantContext. + */ +public final class VariantsToVariantContextTest { + private final Logger logger = LoggerFactory.getLogger(VariantsToVariantContextTest.class); + + VCFHeader header; + List empty = Collections.emptyList(); + Converter, VariantContext> converter; + + @Before + public void setUp() { + header = new VCFHeader(); + converter = new VariantsToVariantContext(header); + } + + @Test + public void testConstructor() { + assertNotNull(converter); + } + + @Test(expected=NullPointerException.class) + public void testConstructorNullHeader() { + new VariantsToVariantContext(null); + } + + @Test(expected=ConversionException.class) + public void testConvertNullSourceStrict() { + converter.convert(null, STRICT, logger); + } + + @Test + public void testConvertNullSourceLenient() { + assertEquals(null, converter.convert(null, LENIENT, logger)); + } + + @Test + public void testConvertNullSourceSilent() { + assertEquals(null, converter.convert(null, SILENT, logger)); + } + + @Test(expected=ConversionException.class) + public void testConvertEmptySourceStrict() { + converter.convert(empty, STRICT, logger); + } + + @Test + public void testConvertEmptySourceLenient() { + assertEquals(null, converter.convert(empty, LENIENT, logger)); + } + + @Test + public void testConvertEmptySourceSilent() { + assertEquals(null, converter.convert(empty, SILENT, logger)); + } + + @Test + public void testConvert() { + // todo + } +} diff --git a/convert/src/main/java/org/bdgenomics/convert/AbstractConverter.java b/convert/src/main/java/org/bdgenomics/convert/AbstractConverter.java index e6e25f5..3bde263 100644 --- a/convert/src/main/java/org/bdgenomics/convert/AbstractConverter.java +++ b/convert/src/main/java/org/bdgenomics/convert/AbstractConverter.java @@ -63,10 +63,21 @@ public final Class getTargetClass() { return targetClass; } + /** + * Check the specified value is not null. + * + * @param value value, must not be null + */ + protected final void checkNotNull(final Object value) { + if (value == null) { + throw new NullPointerException("value must not be null"); + } + } + /** * Check the specified converter is not null. * - * @param converter converter, must not b null + * @param converter converter, must not be null * @throws NullPointerException if converter is null */ protected final void checkNotNull(final Converter converter) { diff --git a/pom.xml b/pom.xml index 43ff212..27b89b2 100644 --- a/pom.xml +++ b/pom.xml @@ -23,7 +23,7 @@ 0.6.0a10 27.0-jre 4.2.1 - 2.18.2 + 2.19.0 3.0.2 4.12 3.0.0-beta-3 @@ -166,6 +166,11 @@ guice ${guice.version}
+ + com.google.inject.extensions + guice-assistedinject + ${guice.version} + com.google.guava guava From 89348507b7e79775a0921e965c2bf968f3d0e3bf Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Sat, 10 Aug 2019 14:33:26 -0500 Subject: [PATCH 07/13] adding header conversions, wip --- .../convert/htsjdk/HtsjdkModule.java | 38 +++++++ .../htsjdk/SamHeaderToProcessingSteps.java | 57 ++++++++++ .../convert/htsjdk/SamHeaderToReadGroups.java | 57 ++++++++++ .../convert/htsjdk/SamHeaderToReferences.java | 82 ++++++++++++++ .../htsjdk/SamSequenceRecordToReference.java | 80 +++++++++++++ .../convert/htsjdk/VcfHeaderToReferences.java | 85 ++++++++++++++ .../convert/htsjdk/VcfHeaderToSamples.java | 59 ++++++++++ .../convert/htsjdk/HtsjdkModuleTest.java | 58 ++++++++++ .../SamHeaderToProcessingStepsTest.java | 81 ++++++++++++++ .../htsjdk/SamHeaderToReadGroupsTest.java | 81 ++++++++++++++ .../htsjdk/SamHeaderToReferencesTest.java | 104 +++++++++++++++++ .../htsjdk/VcfHeaderToReferencesTest.java | 105 ++++++++++++++++++ .../htsjdk/VcfHeaderToSamplesTest.java | 81 ++++++++++++++ pom.xml | 2 +- 14 files changed, 969 insertions(+), 1 deletion(-) create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToProcessingSteps.java create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToReadGroups.java create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToReferences.java create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamSequenceRecordToReference.java create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VcfHeaderToReferences.java create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VcfHeaderToSamples.java create mode 100644 convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToProcessingStepsTest.java create mode 100644 convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToReadGroupsTest.java create mode 100644 convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToReferencesTest.java create mode 100644 convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VcfHeaderToReferencesTest.java create mode 100644 convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VcfHeaderToSamplesTest.java diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java index 8964318..0cf6407 100644 --- a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java @@ -19,11 +19,15 @@ import java.util.List; +import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.ValidationStringency; import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFHeader; + import com.google.inject.AbstractModule; import com.google.inject.Provides; import com.google.inject.Singleton; @@ -36,6 +40,10 @@ import org.bdgenomics.formats.avro.AlignmentRecord; import org.bdgenomics.formats.avro.Genotype; +import org.bdgenomics.formats.avro.ProcessingStep; +import org.bdgenomics.formats.avro.ReadGroup; +import org.bdgenomics.formats.avro.Reference; +import org.bdgenomics.formats.avro.Sample; import org.bdgenomics.formats.avro.Variant; /** @@ -79,4 +87,34 @@ Converter createValidationStringency Converter createSamRecordToAlignmentRecord() { return new SamRecordToAlignmentRecord(); } + + @Provides @Singleton + Converter createSamSequenceRecordToReference() { + return new SamSequenceRecordToReference(); + } + + @Provides @Singleton + Converter> createSamHeaderToReferences(final Converter referenceConverter) { + return new SamHeaderToReferences(referenceConverter); + } + + @Provides @Singleton + Converter> createSamHeaderToReadGroups() { + return new SamHeaderToReadGroups(); + } + + @Provides @Singleton + Converter> createSamHeaderToProcessingSteps() { + return new SamHeaderToProcessingSteps(); + } + + @Provides @Singleton + Converter> createVcfHeaderToReferences(final Converter referenceConverter) { + return new VcfHeaderToReferences(referenceConverter); + } + + @Provides @Singleton + Converter> createVcfHeaderToSamples() { + return new VcfHeaderToSamples(); + } } diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToProcessingSteps.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToProcessingSteps.java new file mode 100644 index 0000000..c69309f --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToProcessingSteps.java @@ -0,0 +1,57 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.List; + +import htsjdk.samtools.SAMFileHeader; + +import org.bdgenomics.convert.AbstractConverter; +import org.bdgenomics.convert.ConversionException; +import org.bdgenomics.convert.ConversionStringency; + +import org.bdgenomics.formats.avro.ProcessingStep; + +import org.slf4j.Logger; + +/** + * Convert a SAMFileHeader to a list of ProcessingSteps. + */ +public final class SamHeaderToProcessingSteps extends AbstractConverter> { + + /** + * Create a new SAMFileHeader to a list of ProcessingSteps converter. + */ + public SamHeaderToProcessingSteps() { + super(SAMFileHeader.class, List.class); + } + + + @Override + public List convert(final SAMFileHeader header, + final ConversionStringency stringency, + final Logger logger) throws ConversionException { + + if (header == null) { + warnOrThrow(header, "must not be null", null, stringency, logger); + return null; + } + + return null; + } +} diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToReadGroups.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToReadGroups.java new file mode 100644 index 0000000..99cc234 --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToReadGroups.java @@ -0,0 +1,57 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.List; + +import htsjdk.samtools.SAMFileHeader; + +import org.bdgenomics.convert.AbstractConverter; +import org.bdgenomics.convert.ConversionException; +import org.bdgenomics.convert.ConversionStringency; + +import org.bdgenomics.formats.avro.ReadGroup; + +import org.slf4j.Logger; + +/** + * Convert a SAMFileHeader to a list of ReadGroups. + */ +public final class SamHeaderToReadGroups extends AbstractConverter> { + + /** + * Create a new SAMFileHeader to a list of ReadGroups converter. + */ + public SamHeaderToReadGroups() { + super(SAMFileHeader.class, List.class); + } + + + @Override + public List convert(final SAMFileHeader header, + final ConversionStringency stringency, + final Logger logger) throws ConversionException { + + if (header == null) { + warnOrThrow(header, "must not be null", null, stringency, logger); + return null; + } + + return null; + } +} diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToReferences.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToReferences.java new file mode 100644 index 0000000..92ed4eb --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToReferences.java @@ -0,0 +1,82 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.ArrayList; +import java.util.List; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMSequenceRecord; + +import org.bdgenomics.convert.AbstractConverter; +import org.bdgenomics.convert.Converter; +import org.bdgenomics.convert.ConversionException; +import org.bdgenomics.convert.ConversionStringency; + +import org.bdgenomics.formats.avro.Reference; + +import org.slf4j.Logger; + +/** + * Convert a SAMFileHeader to a list of References. + */ +public final class SamHeaderToReferences extends AbstractConverter> { + + /** Convert SAMSequenceRecord to Reference. */ + private final Converter referenceConverter; + + + /** + * Create a new SAMFileHeader to a list of References converter. + */ + public SamHeaderToReferences() { + this(new SamSequenceRecordToReference()); + } + + /** + * Create a new SAMFileHeader to a list of References converter. + * + * @param referenceConverter reference converter, must not be null + */ + SamHeaderToReferences(final Converter referenceConverter) { + super(SAMFileHeader.class, List.class); + + checkNotNull(referenceConverter); + this.referenceConverter = referenceConverter; + } + + + @Override + public List convert(final SAMFileHeader header, + final ConversionStringency stringency, + final Logger logger) throws ConversionException { + + if (header == null) { + warnOrThrow(header, "must not be null", null, stringency, logger); + return null; + } + + List references = new ArrayList(); + if (header.getSequenceDictionary() != null && header.getSequenceDictionary().getSequences() != null) { + for (SAMSequenceRecord record: header.getSequenceDictionary().getSequences()) { + references.add(referenceConverter.convert(record, stringency, logger)); + } + } + return references; + } +} diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamSequenceRecordToReference.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamSequenceRecordToReference.java new file mode 100644 index 0000000..84b8f54 --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamSequenceRecordToReference.java @@ -0,0 +1,80 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +import htsjdk.samtools.SAMSequenceRecord; + +import org.bdgenomics.convert.AbstractConverter; +import org.bdgenomics.convert.ConversionException; +import org.bdgenomics.convert.ConversionStringency; + +import org.bdgenomics.formats.avro.Reference; + +import org.slf4j.Logger; + +/** + * Convert a SAMSequenceRecord to a Reference. + */ +public final class SamSequenceRecordToReference extends AbstractConverter { + static final String REFSEQ_TAG = "REFSEQ"; + static final String GENBANK_TAG = "GENBANK"; + + /** + * Create a new SAMSequenceRecord to a Reference converter. + */ + public SamSequenceRecordToReference() { + super(SAMSequenceRecord.class, Reference.class); + } + + + @Override + public Reference convert(final SAMSequenceRecord record, + final ConversionStringency stringency, + final Logger logger) throws ConversionException { + + if (record == null) { + warnOrThrow(record, "must not be null", null, stringency, logger); + return null; + } + + Reference.Builder builder = Reference.newBuilder() + .setName(record.getSequenceName()) + .setLength(Long.valueOf(record.getSequenceLength())); + + if (record.getSequenceIndex() > -1) { + builder.setIndex(record.getSequenceIndex()); + } + + Optional.ofNullable(record.getMd5()).ifPresent(md5 -> builder.setMd5(md5)); + Optional.ofNullable(record.getSpecies()).ifPresent(species -> builder.setSpecies(species)); + Optional.ofNullable(record.getAttribute(SAMSequenceRecord.URI_TAG)).ifPresent(uri -> builder.setSourceUri(uri)); + + List accessions = new ArrayList(2); + Optional.ofNullable(record.getAttribute(REFSEQ_TAG)).ifPresent(refseq -> accessions.add(refseq)); + Optional.ofNullable(record.getAttribute(GENBANK_TAG)).ifPresent(genbank -> accessions.add(genbank)); + if (!accessions.isEmpty()) { + builder.setSourceAccessions(accessions); + } + + return builder.build(); + } +} diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VcfHeaderToReferences.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VcfHeaderToReferences.java new file mode 100644 index 0000000..e0b67bf --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VcfHeaderToReferences.java @@ -0,0 +1,85 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.ArrayList; +import java.util.List; + +import htsjdk.samtools.SAMSequenceRecord; + +import htsjdk.variant.variantcontext.VariantContext; + +import htsjdk.variant.vcf.VCFHeader; + +import org.bdgenomics.convert.AbstractConverter; +import org.bdgenomics.convert.Converter; +import org.bdgenomics.convert.ConversionException; +import org.bdgenomics.convert.ConversionStringency; + +import org.bdgenomics.formats.avro.Reference; + +import org.slf4j.Logger; + +/** + * Convert a VCFHeader to a list of References. + */ +public final class VcfHeaderToReferences extends AbstractConverter> { + + /** Convert SAMSequenceRecord to Reference. */ + private final Converter referenceConverter; + + + /** + * Create a new VCFHeader to a list of References converter. + */ + public VcfHeaderToReferences() { + this(new SamSequenceRecordToReference()); + } + + /** + * Create a new VCFHeader to a list of References converter. + * + * @param referenceConverter reference converter, must not be null + */ + VcfHeaderToReferences(final Converter referenceConverter) { + super(VCFHeader.class, List.class); + + checkNotNull(referenceConverter); + this.referenceConverter = referenceConverter; + } + + + @Override + public List convert(final VCFHeader header, + final ConversionStringency stringency, + final Logger logger) throws ConversionException { + + if (header == null) { + warnOrThrow(header, "must not be null", null, stringency, logger); + return null; + } + + List references = new ArrayList(); + if (header.getSequenceDictionary() != null && header.getSequenceDictionary().getSequences() != null) { + for (SAMSequenceRecord record: header.getSequenceDictionary().getSequences()) { + references.add(referenceConverter.convert(record, stringency, logger)); + } + } + return references; + } +} diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VcfHeaderToSamples.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VcfHeaderToSamples.java new file mode 100644 index 0000000..11bef7a --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VcfHeaderToSamples.java @@ -0,0 +1,59 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.List; + +import htsjdk.variant.variantcontext.VariantContext; + +import htsjdk.variant.vcf.VCFHeader; + +import org.bdgenomics.convert.AbstractConverter; +import org.bdgenomics.convert.ConversionException; +import org.bdgenomics.convert.ConversionStringency; + +import org.bdgenomics.formats.avro.Sample; + +import org.slf4j.Logger; + +/** + * Convert a VCFHeader to a list of Samples. + */ +public final class VcfHeaderToSamples extends AbstractConverter> { + + /** + * Create a new VCFHeader to a list of Samples converter. + */ + public VcfHeaderToSamples() { + super(VCFHeader.class, List.class); + } + + + @Override + public List convert(final VCFHeader header, + final ConversionStringency stringency, + final Logger logger) throws ConversionException { + + if (header == null) { + warnOrThrow(header, "must not be null", null, stringency, logger); + return null; + } + + return null; + } +} diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java index 7676c5e..8f2e1b6 100644 --- a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java @@ -19,6 +19,8 @@ import static org.junit.Assert.assertNotNull; +import java.util.List; + import com.google.inject.AbstractModule; import com.google.inject.Inject; import com.google.inject.Injector; @@ -26,6 +28,7 @@ import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.ValidationStringency; import htsjdk.variant.vcf.VCFHeader; @@ -40,6 +43,10 @@ import org.bdgenomics.formats.avro.AlignmentRecord; import org.bdgenomics.formats.avro.Genotype; +import org.bdgenomics.formats.avro.ProcessingStep; +import org.bdgenomics.formats.avro.ReadGroup; +import org.bdgenomics.formats.avro.Reference; +import org.bdgenomics.formats.avro.Sample; import org.bdgenomics.formats.avro.Variant; /** @@ -65,6 +72,15 @@ public void testHtsjdkModule() { assertNotNull(target.getConversionStringencyToValidationStringency()); assertNotNull(target.getValidationStringencyToConversionStringency()); assertNotNull(target.getSamRecordToAlignmentRecord()); + assertNotNull(target.getVcfHeaderToReferences()); + assertNotNull(target.getVcfHeaderToSamples()); + + assertNotNull(target.getSamHeaderToProcessingSteps()); + assertNotNull(target.getSamHeaderToReadGroups()); + assertNotNull(target.getSamSequenceRecordToReference()); + assertNotNull(target.getSamHeaderToReferences()); + assertNotNull(target.getVcfHeaderToReferences()); + assertNotNull(target.getVcfHeaderToSamples()); SAMFileHeader samFileHeader = new SAMFileHeader(); assertNotNull(target.getAlignmentRecordToSamRecordFactory()); @@ -88,6 +104,12 @@ static class Target { final Converter conversionStringencyToValidationStringency; final Converter validationStringencyToConversionStringency; final Converter samRecordToAlignmentRecord; + final Converter> samHeaderToProcessingSteps; + final Converter> samHeaderToReadGroups; + final Converter samSequenceRecordToReference; + final Converter> samHeaderToReferences; + final Converter> vcfHeaderToReferences; + final Converter> vcfHeaderToSamples; final AlignmentRecordToSamRecordFactory alignmentRecordToSamRecordFactory; final GenotypesToVariantContextFactory genotypesToVariantContextFactory; final VariantsToVariantContextFactory variantsToVariantContextFactory; @@ -98,6 +120,12 @@ static class Target { Target(final Converter conversionStringencyToValidationStringency, final Converter validationStringencyToConversionStringency, final Converter samRecordToAlignmentRecord, + final Converter> samHeaderToProcessingSteps, + final Converter> samHeaderToReadGroups, + final Converter samSequenceRecordToReference, + final Converter> samHeaderToReferences, + final Converter> vcfHeaderToReferences, + final Converter> vcfHeaderToSamples, final AlignmentRecordToSamRecordFactory alignmentRecordToSamRecordFactory, final GenotypesToVariantContextFactory genotypesToVariantContextFactory, final VariantsToVariantContextFactory variantsToVariantContextFactory, @@ -107,6 +135,12 @@ static class Target { this.conversionStringencyToValidationStringency = conversionStringencyToValidationStringency; this.validationStringencyToConversionStringency = validationStringencyToConversionStringency; this.samRecordToAlignmentRecord = samRecordToAlignmentRecord; + this.samHeaderToProcessingSteps = samHeaderToProcessingSteps; + this.samHeaderToReadGroups = samHeaderToReadGroups; + this.samSequenceRecordToReference = samSequenceRecordToReference; + this.samHeaderToReferences = samHeaderToReferences; + this.vcfHeaderToReferences = vcfHeaderToReferences; + this.vcfHeaderToSamples = vcfHeaderToSamples; this.alignmentRecordToSamRecordFactory = alignmentRecordToSamRecordFactory; this.genotypesToVariantContextFactory = genotypesToVariantContextFactory; this.variantsToVariantContextFactory = variantsToVariantContextFactory; @@ -126,6 +160,30 @@ Converter getSamRecordToAlignmentRecord() { return samRecordToAlignmentRecord; } + Converter> getSamHeaderToProcessingSteps() { + return samHeaderToProcessingSteps; + } + + Converter> getSamHeaderToReadGroups() { + return samHeaderToReadGroups; + } + + Converter getSamSequenceRecordToReference() { + return samSequenceRecordToReference; + } + + Converter> getSamHeaderToReferences() { + return samHeaderToReferences; + } + + Converter> getVcfHeaderToReferences() { + return vcfHeaderToReferences; + } + + Converter> getVcfHeaderToSamples() { + return vcfHeaderToSamples; + } + AlignmentRecordToSamRecordFactory getAlignmentRecordToSamRecordFactory() { return alignmentRecordToSamRecordFactory; } diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToProcessingStepsTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToProcessingStepsTest.java new file mode 100644 index 0000000..1bfc240 --- /dev/null +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToProcessingStepsTest.java @@ -0,0 +1,81 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import static org.bdgenomics.convert.ConversionStringency.*; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import java.util.Collections; +import java.util.List; + +import htsjdk.samtools.SAMFileHeader; + +import org.bdgenomics.convert.Converter; +import org.bdgenomics.convert.ConversionException; + +import org.bdgenomics.formats.avro.ProcessingStep; + +import org.junit.Before; +import org.junit.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test for SamHeaderToProcessingSteps. + */ +public final class SamHeaderToProcessingStepsTest { + private final Logger logger = LoggerFactory.getLogger(SamHeaderToProcessingStepsTest.class); + + SAMFileHeader header; + Converter> converter; + + @Before + public void setUp() { + header = new SAMFileHeader(); + converter = new SamHeaderToProcessingSteps(); + } + + @Test + public void testConstructor() { + assertNotNull(converter); + } + + @Test(expected=ConversionException.class) + public void testConvertNullSourceStrict() { + converter.convert(null, STRICT, logger); + } + + @Test + public void testConvertNullSourceLenient() { + assertEquals(null, converter.convert(null, LENIENT, logger)); + } + + @Test + public void testConvertNullSourceSilent() { + assertEquals(null, converter.convert(null, SILENT, logger)); + } + + @Test + public void testConvert() { + // todo + } +} diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToReadGroupsTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToReadGroupsTest.java new file mode 100644 index 0000000..d708199 --- /dev/null +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToReadGroupsTest.java @@ -0,0 +1,81 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import static org.bdgenomics.convert.ConversionStringency.*; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import java.util.Collections; +import java.util.List; + +import htsjdk.samtools.SAMFileHeader; + +import org.bdgenomics.convert.Converter; +import org.bdgenomics.convert.ConversionException; + +import org.bdgenomics.formats.avro.ReadGroup; + +import org.junit.Before; +import org.junit.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test for SamHeaderToReadGroups. + */ +public final class SamHeaderToReadGroupsTest { + private final Logger logger = LoggerFactory.getLogger(SamHeaderToReadGroupsTest.class); + + SAMFileHeader header; + Converter> converter; + + @Before + public void setUp() { + header = new SAMFileHeader(); + converter = new SamHeaderToReadGroups(); + } + + @Test + public void testConstructor() { + assertNotNull(converter); + } + + @Test(expected=ConversionException.class) + public void testConvertNullSourceStrict() { + converter.convert(null, STRICT, logger); + } + + @Test + public void testConvertNullSourceLenient() { + assertEquals(null, converter.convert(null, LENIENT, logger)); + } + + @Test + public void testConvertNullSourceSilent() { + assertEquals(null, converter.convert(null, SILENT, logger)); + } + + @Test + public void testConvert() { + // todo + } +} diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToReferencesTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToReferencesTest.java new file mode 100644 index 0000000..8eebc17 --- /dev/null +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToReferencesTest.java @@ -0,0 +1,104 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import static org.bdgenomics.convert.ConversionStringency.*; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import java.util.Collections; +import java.util.List; + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMSequenceRecord; + +import org.bdgenomics.convert.Converter; +import org.bdgenomics.convert.ConversionException; + +import org.bdgenomics.formats.avro.Reference; + +import org.junit.Before; +import org.junit.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test for SamHeaderToReferences. + */ +public final class SamHeaderToReferencesTest { + private final Logger logger = LoggerFactory.getLogger(SamHeaderToReferencesTest.class); + + SAMFileHeader header; + Converter referenceConverter; + Converter> converter; + + @Before + public void setUp() { + header = new SAMFileHeader(); + referenceConverter = new SamSequenceRecordToReference(); + converter = new SamHeaderToReferences(referenceConverter); + } + + @Test + public void testConstructor() { + assertNotNull(converter); + } + + @Test(expected=NullPointerException.class) + public void testConstructorNullReferenceConverter() { + new SamHeaderToReferences(null); + } + + @Test + public void testNoargConstructor() { + assertNotNull(new SamHeaderToReferences()); + } + + @Test(expected=ConversionException.class) + public void testConvertNullSourceStrict() { + converter.convert(null, STRICT, logger); + } + + @Test + public void testConvertNullSourceLenient() { + assertEquals(null, converter.convert(null, LENIENT, logger)); + } + + @Test + public void testConvertNullSourceSilent() { + assertEquals(null, converter.convert(null, SILENT, logger)); + } + + @Test + public void testConvert() { + // todo + } + + @Test + public void testConvertNullSequenceDictionary() { + // todo + } + + @Test + public void testConvertNullSequences() { + // todo + } +} diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VcfHeaderToReferencesTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VcfHeaderToReferencesTest.java new file mode 100644 index 0000000..03adfb0 --- /dev/null +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VcfHeaderToReferencesTest.java @@ -0,0 +1,105 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import static org.bdgenomics.convert.ConversionStringency.*; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import java.util.Collections; +import java.util.List; + +import htsjdk.samtools.SAMSequenceRecord; + +import htsjdk.variant.vcf.VCFHeader; + +import org.bdgenomics.convert.Converter; +import org.bdgenomics.convert.ConversionException; + +import org.bdgenomics.formats.avro.Reference; + +import org.junit.Before; +import org.junit.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test for VcfHeaderToReferences. + */ +public final class VcfHeaderToReferencesTest { + private final Logger logger = LoggerFactory.getLogger(VcfHeaderToReferencesTest.class); + + VCFHeader header; + Converter referenceConverter; + Converter> converter; + + @Before + public void setUp() { + header = new VCFHeader(); + referenceConverter = new SamSequenceRecordToReference(); + converter = new VcfHeaderToReferences(referenceConverter); + } + + @Test + public void testConstructor() { + assertNotNull(converter); + } + + @Test(expected=NullPointerException.class) + public void testConstructorNullReferenceConverter() { + new VcfHeaderToReferences(null); + } + + @Test + public void testNoargConstructor() { + assertNotNull(new VcfHeaderToReferences()); + } + + @Test(expected=ConversionException.class) + public void testConvertNullSourceStrict() { + converter.convert(null, STRICT, logger); + } + + @Test + public void testConvertNullSourceLenient() { + assertEquals(null, converter.convert(null, LENIENT, logger)); + } + + @Test + public void testConvertNullSourceSilent() { + assertEquals(null, converter.convert(null, SILENT, logger)); + } + + @Test + public void testConvert() { + // todo + } + + @Test + public void testConvertNullSequenceDictionary() { + // todo + } + + @Test + public void testConvertNullSequences() { + // todo + } +} diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VcfHeaderToSamplesTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VcfHeaderToSamplesTest.java new file mode 100644 index 0000000..acf5009 --- /dev/null +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VcfHeaderToSamplesTest.java @@ -0,0 +1,81 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import static org.bdgenomics.convert.ConversionStringency.*; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import java.util.Collections; +import java.util.List; + +import htsjdk.variant.vcf.VCFHeader; + +import org.bdgenomics.convert.Converter; +import org.bdgenomics.convert.ConversionException; + +import org.bdgenomics.formats.avro.Sample; + +import org.junit.Before; +import org.junit.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test for VcfHeaderToSamples. + */ +public final class VcfHeaderToSamplesTest { + private final Logger logger = LoggerFactory.getLogger(VcfHeaderToSamplesTest.class); + + VCFHeader header; + Converter> converter; + + @Before + public void setUp() { + header = new VCFHeader(); + converter = new VcfHeaderToSamples(); + } + + @Test + public void testConstructor() { + assertNotNull(converter); + } + + @Test(expected=ConversionException.class) + public void testConvertNullSourceStrict() { + converter.convert(null, STRICT, logger); + } + + @Test + public void testConvertNullSourceLenient() { + assertEquals(null, converter.convert(null, LENIENT, logger)); + } + + @Test + public void testConvertNullSourceSilent() { + assertEquals(null, converter.convert(null, SILENT, logger)); + } + + @Test + public void testConvert() { + // todo + } +} diff --git a/pom.xml b/pom.xml index 27b89b2..d0d08b3 100644 --- a/pom.xml +++ b/pom.xml @@ -23,7 +23,7 @@ 0.6.0a10 27.0-jre 4.2.1 - 2.19.0 + 2.20.1 3.0.2 4.12 3.0.0-beta-3 From 51d6356e8d99d5ed5cc8eeaa79a5800de47edfdd Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Sat, 10 Aug 2019 15:53:52 -0500 Subject: [PATCH 08/13] add'l header conversion work --- .../convert/htsjdk/HtsjdkModule.java | 20 ++- .../htsjdk/SamHeaderToProcessingSteps.java | 29 ++++- .../convert/htsjdk/SamHeaderToReadGroups.java | 27 +++- .../SamProgramRecordToProcessingStep.java | 69 ++++++++++ .../htsjdk/SamReadGroupRecordToReadGroup.java | 82 ++++++++++++ .../htsjdk/SamSequenceRecordToReference.java | 8 +- .../convert/htsjdk/HtsjdkModuleTest.java | 18 +++ .../SamHeaderToProcessingStepsTest.java | 15 ++- .../htsjdk/SamHeaderToReadGroupsTest.java | 15 ++- .../SamProgramRecordToProcessingStepTest.java | 98 ++++++++++++++ .../SamReadGroupRecordToReadGroupTest.java | 121 ++++++++++++++++++ .../SamSequenceRecordToReferenceTest.java | 81 ++++++++++++ 12 files changed, 571 insertions(+), 12 deletions(-) create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamProgramRecordToProcessingStep.java create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamReadGroupRecordToReadGroup.java create mode 100644 convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamProgramRecordToProcessingStepTest.java create mode 100644 convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamReadGroupRecordToReadGroupTest.java create mode 100644 convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamSequenceRecordToReferenceTest.java diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java index 0cf6407..3022afb 100644 --- a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java @@ -20,6 +20,8 @@ import java.util.List; import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMProgramRecord; +import htsjdk.samtools.SAMReadGroupRecord; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.ValidationStringency; @@ -99,13 +101,23 @@ Converter> createSamHeaderToReferences(final Conv } @Provides @Singleton - Converter> createSamHeaderToReadGroups() { - return new SamHeaderToReadGroups(); + Converter createSamReadGroupRecordToReadGroups() { + return new SamReadGroupRecordToReadGroup(); } @Provides @Singleton - Converter> createSamHeaderToProcessingSteps() { - return new SamHeaderToProcessingSteps(); + Converter> createSamHeaderToReadGroups(final Converter readGroupConverter) { + return new SamHeaderToReadGroups(readGroupConverter); + } + + @Provides @Singleton + Converter createSamProgramRecordToProcessingStep() { + return new SamProgramRecordToProcessingStep(); + } + + @Provides @Singleton + Converter> createSamHeaderToProcessingSteps(final Converter processingStepConverter) { + return new SamHeaderToProcessingSteps(processingStepConverter); } @Provides @Singleton diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToProcessingSteps.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToProcessingSteps.java index c69309f..ffd21bd 100644 --- a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToProcessingSteps.java +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToProcessingSteps.java @@ -17,11 +17,14 @@ */ package org.bdgenomics.convert.htsjdk; +import java.util.ArrayList; import java.util.List; import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMProgramRecord; import org.bdgenomics.convert.AbstractConverter; +import org.bdgenomics.convert.Converter; import org.bdgenomics.convert.ConversionException; import org.bdgenomics.convert.ConversionStringency; @@ -34,11 +37,27 @@ */ public final class SamHeaderToProcessingSteps extends AbstractConverter> { + /** Convert a SAMProgramRecord to a ProcessingStep. */ + private final Converter processingStepConverter; + + /** * Create a new SAMFileHeader to a list of ProcessingSteps converter. */ public SamHeaderToProcessingSteps() { - super(SAMFileHeader.class, List.class); + this(new SamProgramRecordToProcessingStep()); + } + + /** + * Create a new SAMFileHeader to a list of ProcessingSteps converter. + * + * @param processingStepConverter processing step converter, must not be null + */ + SamHeaderToProcessingSteps(final Converter processingStepConverter) { + super(SAMProgramRecord.class, ProcessingStep.class); + + checkNotNull(processingStepConverter); + this.processingStepConverter = processingStepConverter; } @@ -52,6 +71,12 @@ public List convert(final SAMFileHeader header, return null; } - return null; + List processingSteps = new ArrayList(); + if (header.getProgramRecords() != null) { + for (SAMProgramRecord record : header.getProgramRecords()) { + processingSteps.add(processingStepConverter.convert(record, stringency, logger)); + } + } + return processingSteps; } } diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToReadGroups.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToReadGroups.java index 99cc234..2ae81ea 100644 --- a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToReadGroups.java +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamHeaderToReadGroups.java @@ -17,11 +17,14 @@ */ package org.bdgenomics.convert.htsjdk; +import java.util.ArrayList; import java.util.List; import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMReadGroupRecord; import org.bdgenomics.convert.AbstractConverter; +import org.bdgenomics.convert.Converter; import org.bdgenomics.convert.ConversionException; import org.bdgenomics.convert.ConversionStringency; @@ -34,11 +37,27 @@ */ public final class SamHeaderToReadGroups extends AbstractConverter> { + /** Convert a SAMReadGroupRecord to a ReadGroup. */ + private final Converter readGroupConverter; + + /** * Create a new SAMFileHeader to a list of ReadGroups converter. */ public SamHeaderToReadGroups() { + this(new SamReadGroupRecordToReadGroup()); + } + + /** + * Create a new SAMFileHeader to a list of ReadGroups converter. + * + * @param readGroupConverter read group converter, must not be null + */ + public SamHeaderToReadGroups(final Converter readGroupConverter) { super(SAMFileHeader.class, List.class); + + checkNotNull(readGroupConverter); + this.readGroupConverter = readGroupConverter; } @@ -52,6 +71,12 @@ public List convert(final SAMFileHeader header, return null; } - return null; + List readGroups = new ArrayList(); + if (header.getReadGroups() != null) { + for (SAMReadGroupRecord record : header.getReadGroups()) { + readGroups.add(readGroupConverter.convert(record, stringency, logger)); + } + } + return readGroups; } } diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamProgramRecordToProcessingStep.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamProgramRecordToProcessingStep.java new file mode 100644 index 0000000..1b77f0c --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamProgramRecordToProcessingStep.java @@ -0,0 +1,69 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.Optional; + +import htsjdk.samtools.SAMProgramRecord; + +import org.bdgenomics.convert.AbstractConverter; +import org.bdgenomics.convert.ConversionException; +import org.bdgenomics.convert.ConversionStringency; + +import org.bdgenomics.formats.avro.ProcessingStep; + +import org.slf4j.Logger; + +/** + * Convert a SAMProgramRecord to a ProcessingStep. + */ +public final class SamProgramRecordToProcessingStep extends AbstractConverter { + + /** + * Create a new SAMProgramRecord to a ProcessingStep converter. + */ + public SamProgramRecordToProcessingStep() { + super(SAMProgramRecord.class, ProcessingStep.class); + } + + + @Override + public ProcessingStep convert(final SAMProgramRecord record, + final ConversionStringency stringency, + final Logger logger) throws ConversionException { + + if (record == null) { + warnOrThrow(record, "must not be null", null, stringency, logger); + return null; + } + if (record.getId() == null) { + warnOrThrow(record, "id must not be null", null, stringency, logger); + return null; + } + + ProcessingStep.Builder builder = ProcessingStep.newBuilder() + .setId(record.getId()); + + Optional.ofNullable(record.getCommandLine()).ifPresent(commandLine -> builder.setCommandLine(commandLine)); + Optional.ofNullable(record.getPreviousProgramGroupId()).ifPresent(previousId -> builder.setPreviousId(previousId)); + Optional.ofNullable(record.getProgramName()).ifPresent(programName -> builder.setProgramName(programName)); + Optional.ofNullable(record.getProgramVersion()).ifPresent(version -> builder.setVersion(version)); + + return builder.build(); + } +} diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamReadGroupRecordToReadGroup.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamReadGroupRecordToReadGroup.java new file mode 100644 index 0000000..e7caf08 --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamReadGroupRecordToReadGroup.java @@ -0,0 +1,82 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.Optional; + +import htsjdk.samtools.SAMReadGroupRecord; + +import org.bdgenomics.convert.AbstractConverter; +import org.bdgenomics.convert.ConversionException; +import org.bdgenomics.convert.ConversionStringency; + +import org.bdgenomics.formats.avro.ReadGroup; + +import org.slf4j.Logger; + +/** + * Convert a SAMReadGroup to a ReadGroup. + */ +public final class SamReadGroupRecordToReadGroup extends AbstractConverter { + + /** + * Create a new SAMReadGroupRecord to a ReadGroup converter. + */ + public SamReadGroupRecordToReadGroup() { + super(SAMReadGroupRecord.class, ReadGroup.class); + } + + + @Override + public ReadGroup convert(final SAMReadGroupRecord record, + final ConversionStringency stringency, + final Logger logger) throws ConversionException { + + if (record == null) { + warnOrThrow(record, "must not be null", null, stringency, logger); + return null; + } + if (record.getReadGroupId() == null) { + warnOrThrow(record, "readGroupId must not be null", null, stringency, logger); + return null; + } + if (record.getSample() == null) { + warnOrThrow(record, "sample must not be null", null, stringency, logger); + return null; + } + + ReadGroup.Builder builder = ReadGroup.newBuilder() + .setId(record.getReadGroupId()) + .setSampleId(record.getSample()); + + Optional.ofNullable(record.getDescription()).ifPresent(description -> builder.setDescription(description)); + Optional.ofNullable(record.getFlowOrder()).ifPresent(flowOrder -> builder.setFlowOrder(flowOrder)); + Optional.ofNullable(record.getKeySequence()).ifPresent(keySequence -> builder.setKeySequence(keySequence)); + Optional.ofNullable(record.getLibrary()).ifPresent(library -> builder.setLibrary(library)); + Optional.ofNullable(record.getPlatform()).ifPresent(platform -> builder.setPlatform(platform)); + Optional.ofNullable(record.getPlatformModel()).ifPresent(platformModel -> builder.setPlatformModel(platformModel)); + Optional.ofNullable(record.getPlatformUnit()).ifPresent(platformUnit -> builder.setPlatformUnit(platformUnit)); + Optional.ofNullable(record.getPredictedMedianInsertSize()).ifPresent(insertSize -> builder.setPredictedMedianInsertSize(insertSize)); + Optional.ofNullable(record.getRunDate()).map(runDate -> builder.setRunDateEpoch(runDate.getTime())); + Optional.ofNullable(record.getSequencingCenter()).ifPresent(sequencingCenter -> builder.setSequencingCenter(sequencingCenter)); + + // processing steps? + + return builder.build(); + } +} diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamSequenceRecordToReference.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamSequenceRecordToReference.java index 84b8f54..630c381 100644 --- a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamSequenceRecordToReference.java +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamSequenceRecordToReference.java @@ -56,14 +56,16 @@ public Reference convert(final SAMSequenceRecord record, return null; } - Reference.Builder builder = Reference.newBuilder() - .setName(record.getSequenceName()) - .setLength(Long.valueOf(record.getSequenceLength())); + Reference.Builder builder = Reference.newBuilder(); if (record.getSequenceIndex() > -1) { builder.setIndex(record.getSequenceIndex()); } + if (record.getSequenceLength() > 0) { + builder.setLength((long) record.getSequenceLength()); + } + Optional.ofNullable(record.getSequenceName()).ifPresent(name -> builder.setName(name)); Optional.ofNullable(record.getMd5()).ifPresent(md5 -> builder.setMd5(md5)); Optional.ofNullable(record.getSpecies()).ifPresent(species -> builder.setSpecies(species)); Optional.ofNullable(record.getAttribute(SAMSequenceRecord.URI_TAG)).ifPresent(uri -> builder.setSourceUri(uri)); diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java index 8f2e1b6..fdd48b8 100644 --- a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java @@ -27,6 +27,8 @@ import com.google.inject.Guice; import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMProgramRecord; +import htsjdk.samtools.SAMReadGroupRecord; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.ValidationStringency; @@ -75,7 +77,9 @@ public void testHtsjdkModule() { assertNotNull(target.getVcfHeaderToReferences()); assertNotNull(target.getVcfHeaderToSamples()); + assertNotNull(target.getSamProgramRecordToProcessingStep()); assertNotNull(target.getSamHeaderToProcessingSteps()); + assertNotNull(target.getSamReadGroupRecordToReadGroup()); assertNotNull(target.getSamHeaderToReadGroups()); assertNotNull(target.getSamSequenceRecordToReference()); assertNotNull(target.getSamHeaderToReferences()); @@ -104,7 +108,9 @@ static class Target { final Converter conversionStringencyToValidationStringency; final Converter validationStringencyToConversionStringency; final Converter samRecordToAlignmentRecord; + final Converter samProgramRecordToProcessingStep; final Converter> samHeaderToProcessingSteps; + final Converter samReadGroupRecordToReadGroup; final Converter> samHeaderToReadGroups; final Converter samSequenceRecordToReference; final Converter> samHeaderToReferences; @@ -120,7 +126,9 @@ static class Target { Target(final Converter conversionStringencyToValidationStringency, final Converter validationStringencyToConversionStringency, final Converter samRecordToAlignmentRecord, + final Converter samProgramRecordToProcessingStep, final Converter> samHeaderToProcessingSteps, + final Converter samReadGroupRecordToReadGroup, final Converter> samHeaderToReadGroups, final Converter samSequenceRecordToReference, final Converter> samHeaderToReferences, @@ -135,7 +143,9 @@ static class Target { this.conversionStringencyToValidationStringency = conversionStringencyToValidationStringency; this.validationStringencyToConversionStringency = validationStringencyToConversionStringency; this.samRecordToAlignmentRecord = samRecordToAlignmentRecord; + this.samProgramRecordToProcessingStep = samProgramRecordToProcessingStep; this.samHeaderToProcessingSteps = samHeaderToProcessingSteps; + this.samReadGroupRecordToReadGroup = samReadGroupRecordToReadGroup; this.samHeaderToReadGroups = samHeaderToReadGroups; this.samSequenceRecordToReference = samSequenceRecordToReference; this.samHeaderToReferences = samHeaderToReferences; @@ -160,10 +170,18 @@ Converter getSamRecordToAlignmentRecord() { return samRecordToAlignmentRecord; } + Converter getSamProgramRecordToProcessingStep() { + return samProgramRecordToProcessingStep; + } + Converter> getSamHeaderToProcessingSteps() { return samHeaderToProcessingSteps; } + Converter getSamReadGroupRecordToReadGroup() { + return samReadGroupRecordToReadGroup; + } + Converter> getSamHeaderToReadGroups() { return samHeaderToReadGroups; } diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToProcessingStepsTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToProcessingStepsTest.java index 1bfc240..4867328 100644 --- a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToProcessingStepsTest.java +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToProcessingStepsTest.java @@ -27,6 +27,7 @@ import java.util.List; import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMProgramRecord; import org.bdgenomics.convert.Converter; import org.bdgenomics.convert.ConversionException; @@ -46,12 +47,14 @@ public final class SamHeaderToProcessingStepsTest { private final Logger logger = LoggerFactory.getLogger(SamHeaderToProcessingStepsTest.class); SAMFileHeader header; + Converter processingStepConverter; Converter> converter; @Before public void setUp() { header = new SAMFileHeader(); - converter = new SamHeaderToProcessingSteps(); + processingStepConverter = new SamProgramRecordToProcessingStep(); + converter = new SamHeaderToProcessingSteps(processingStepConverter); } @Test @@ -59,6 +62,16 @@ public void testConstructor() { assertNotNull(converter); } + @Test(expected=NullPointerException.class) + public void testConstructorNullProcessingStepConverter() { + new SamHeaderToProcessingSteps(null); + } + + @Test + public void testNoargConstructor() { + assertNotNull(new SamHeaderToProcessingSteps()); + } + @Test(expected=ConversionException.class) public void testConvertNullSourceStrict() { converter.convert(null, STRICT, logger); diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToReadGroupsTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToReadGroupsTest.java index d708199..951c679 100644 --- a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToReadGroupsTest.java +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamHeaderToReadGroupsTest.java @@ -27,6 +27,7 @@ import java.util.List; import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMReadGroupRecord; import org.bdgenomics.convert.Converter; import org.bdgenomics.convert.ConversionException; @@ -46,12 +47,14 @@ public final class SamHeaderToReadGroupsTest { private final Logger logger = LoggerFactory.getLogger(SamHeaderToReadGroupsTest.class); SAMFileHeader header; + Converter readGroupConverter; Converter> converter; @Before public void setUp() { header = new SAMFileHeader(); - converter = new SamHeaderToReadGroups(); + readGroupConverter = new SamReadGroupRecordToReadGroup(); + converter = new SamHeaderToReadGroups(readGroupConverter); } @Test @@ -59,6 +62,16 @@ public void testConstructor() { assertNotNull(converter); } + @Test(expected=NullPointerException.class) + public void testConstructorNullReadGroupConverter() { + new SamHeaderToReadGroups(null); + } + + @Test + public void testNoargConstructor() { + assertNotNull(new SamHeaderToReadGroups()); + } + @Test(expected=ConversionException.class) public void testConvertNullSourceStrict() { converter.convert(null, STRICT, logger); diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamProgramRecordToProcessingStepTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamProgramRecordToProcessingStepTest.java new file mode 100644 index 0000000..ed80bbf --- /dev/null +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamProgramRecordToProcessingStepTest.java @@ -0,0 +1,98 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import static org.bdgenomics.convert.ConversionStringency.*; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import java.util.Collections; +import java.util.List; + +import htsjdk.samtools.SAMProgramRecord; + +import org.bdgenomics.convert.Converter; +import org.bdgenomics.convert.ConversionException; + +import org.bdgenomics.formats.avro.ProcessingStep; + +import org.junit.Before; +import org.junit.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test for SamProgramRecordToProcessingStep. + */ +public final class SamProgramRecordToProcessingStepTest { + private final Logger logger = LoggerFactory.getLogger(SamProgramRecordToProcessingStepTest.class); + + SAMProgramRecord record; + SAMProgramRecord nullId; + Converter converter; + + @Before + public void setUp() { + record = new SAMProgramRecord("bwa"); + nullId = new SAMProgramRecord(null); + converter = new SamProgramRecordToProcessingStep(); + } + + @Test + public void testConstructor() { + assertNotNull(converter); + } + + @Test(expected=ConversionException.class) + public void testConvertNullSourceStrict() { + converter.convert(null, STRICT, logger); + } + + @Test + public void testConvertNullSourceLenient() { + assertEquals(null, converter.convert(null, LENIENT, logger)); + } + + @Test + public void testConvertNullSourceSilent() { + assertEquals(null, converter.convert(null, SILENT, logger)); + } + + @Test(expected=ConversionException.class) + public void testConvertNullIdStrict() { + converter.convert(nullId, STRICT, logger); + } + + @Test + public void testConvertNullIdLenient() { + assertEquals(null, converter.convert(nullId, LENIENT, logger)); + } + + @Test + public void testConvertNullIdSilent() { + assertEquals(null, converter.convert(nullId, SILENT, logger)); + } + + @Test + public void testConvert() { + // todo + } +} diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamReadGroupRecordToReadGroupTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamReadGroupRecordToReadGroupTest.java new file mode 100644 index 0000000..b706f22 --- /dev/null +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamReadGroupRecordToReadGroupTest.java @@ -0,0 +1,121 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import static org.bdgenomics.convert.ConversionStringency.*; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import java.util.Collections; +import java.util.List; + +import htsjdk.samtools.SAMReadGroupRecord; + +import org.bdgenomics.convert.Converter; +import org.bdgenomics.convert.ConversionException; + +import org.bdgenomics.formats.avro.ReadGroup; + +import org.junit.Before; +import org.junit.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test for SamReadGroupRecordToReadGroup. + */ +public final class SamReadGroupRecordToReadGroupTest { + private final Logger logger = LoggerFactory.getLogger(SamReadGroupRecordToReadGroupTest.class); + + SAMReadGroupRecord record; + SAMReadGroupRecord nullReadGroupId; + SAMReadGroupRecord nullSample; + Converter converter; + + @Before + public void setUp() { + record = new SAMReadGroupRecord("SRR062634"); + record.setSample("HG00096"); + + nullReadGroupId = new SAMReadGroupRecord(null); + nullReadGroupId.setSample("HG00096"); + + nullSample = new SAMReadGroupRecord("SRR062634"); + nullSample.setSample(null); + + converter = new SamReadGroupRecordToReadGroup(); + } + + @Test + public void testConstructor() { + assertNotNull(converter); + } + + @Test(expected=ConversionException.class) + public void testConvertNullSourceStrict() { + converter.convert(null, STRICT, logger); + } + + @Test + public void testConvertNullSourceLenient() { + assertEquals(null, converter.convert(null, LENIENT, logger)); + } + + @Test + public void testConvertNullSourceSilent() { + assertEquals(null, converter.convert(null, SILENT, logger)); + } + + @Test(expected=ConversionException.class) + public void testConvertNullReadGroupIdStrict() { + converter.convert(nullReadGroupId, STRICT, logger); + } + + @Test + public void testConvertNullReadGroupIdLenient() { + assertEquals(null, converter.convert(nullReadGroupId, LENIENT, logger)); + } + + @Test + public void testConvertNullReadGroupIdSilent() { + assertEquals(null, converter.convert(nullReadGroupId, SILENT, logger)); + } + + @Test(expected=ConversionException.class) + public void testConvertNullSampleStrict() { + converter.convert(nullSample, STRICT, logger); + } + + @Test + public void testConvertNullSampleLenient() { + assertEquals(null, converter.convert(nullSample, LENIENT, logger)); + } + + @Test + public void testConvertNullSampleSilent() { + assertEquals(null, converter.convert(nullSample, SILENT, logger)); + } + + @Test + public void testConvert() { + // todo + } +} diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamSequenceRecordToReferenceTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamSequenceRecordToReferenceTest.java new file mode 100644 index 0000000..d2dc5ae --- /dev/null +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/SamSequenceRecordToReferenceTest.java @@ -0,0 +1,81 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import static org.bdgenomics.convert.ConversionStringency.*; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import java.util.Collections; +import java.util.List; + +import htsjdk.samtools.SAMSequenceRecord; + +import org.bdgenomics.convert.Converter; +import org.bdgenomics.convert.ConversionException; + +import org.bdgenomics.formats.avro.Reference; + +import org.junit.Before; +import org.junit.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test for SamSequenceRecordToReference. + */ +public final class SamSequenceRecordToReferenceTest { + private final Logger logger = LoggerFactory.getLogger(SamSequenceRecordToReferenceTest.class); + + SAMSequenceRecord record; + Converter converter; + + @Before + public void setUp() { + record = new SAMSequenceRecord("hs37d5", 35477943); + converter = new SamSequenceRecordToReference(); + } + + @Test + public void testConstructor() { + assertNotNull(converter); + } + + @Test(expected=ConversionException.class) + public void testConvertNullSourceStrict() { + converter.convert(null, STRICT, logger); + } + + @Test + public void testConvertNullSourceLenient() { + assertEquals(null, converter.convert(null, LENIENT, logger)); + } + + @Test + public void testConvertNullSourceSilent() { + assertEquals(null, converter.convert(null, SILENT, logger)); + } + + @Test + public void testConvert() { + // todo + } +} From dc58c3fc2d302d49c9fabac1aa02eadee87cb242 Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Sun, 11 Aug 2019 16:05:33 -0500 Subject: [PATCH 09/13] vcf header to samples --- .../convert/htsjdk/VcfHeaderToSamples.java | 12 +++++++++--- .../convert/htsjdk/VcfHeaderToSamplesTest.java | 16 ++++++++++++++-- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VcfHeaderToSamples.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VcfHeaderToSamples.java index 11bef7a..b9280e9 100644 --- a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VcfHeaderToSamples.java +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VcfHeaderToSamples.java @@ -17,10 +17,9 @@ */ package org.bdgenomics.convert.htsjdk; +import java.util.ArrayList; import java.util.List; -import htsjdk.variant.variantcontext.VariantContext; - import htsjdk.variant.vcf.VCFHeader; import org.bdgenomics.convert.AbstractConverter; @@ -54,6 +53,13 @@ public List convert(final VCFHeader header, return null; } - return null; + List samples = new ArrayList(); + if (header.getGenotypeSamples() != null) { + Sample.Builder builder = Sample.newBuilder(); + for (String sampleId : header.getGenotypeSamples()) { + samples.add(builder.setId(sampleId).build()); + } + } + return samples; } } diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VcfHeaderToSamplesTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VcfHeaderToSamplesTest.java index acf5009..3c17c14 100644 --- a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VcfHeaderToSamplesTest.java +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VcfHeaderToSamplesTest.java @@ -22,11 +22,13 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; import java.util.Collections; import java.util.List; import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFHeaderLine; import org.bdgenomics.convert.Converter; import org.bdgenomics.convert.ConversionException; @@ -50,7 +52,7 @@ public final class VcfHeaderToSamplesTest { @Before public void setUp() { - header = new VCFHeader(); + header = new VCFHeader(Collections.emptySet(), Collections.singleton("sampleId")); converter = new VcfHeaderToSamples(); } @@ -76,6 +78,16 @@ public void testConvertNullSourceSilent() { @Test public void testConvert() { - // todo + List samples = converter.convert(header, SILENT, logger); + assertNotNull(samples); + assertEquals(1, samples.size()); + assertEquals("sampleId", samples.get(0).getId()); + } + + @Test + public void testConvertEmpty() { + List samples = converter.convert(new VCFHeader(), SILENT, logger); + assertNotNull(samples); + assertTrue(samples.isEmpty()); } } From 4c6a238a2387f15e12d6e5fe180810dadc8930e9 Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Tue, 3 Sep 2019 11:29:58 -0500 Subject: [PATCH 10/13] update dependency versions --- pom.xml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pom.xml b/pom.xml index d0d08b3..00f66fb 100644 --- a/pom.xml +++ b/pom.xml @@ -23,7 +23,7 @@ 0.6.0a10 27.0-jre 4.2.1 - 2.20.1 + 2.20.3 3.0.2 4.12 3.0.0-beta-3 @@ -64,7 +64,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.0.1 + 3.1.0 org.apache.maven.plugins @@ -98,12 +98,12 @@ org.apache.maven.plugins maven-deploy-plugin - 2.8.2 + 3.0.0-M1 org.apache.maven.plugins maven-enforcer-plugin - 1.4.1 + 3.0.0-M2 enforce-versions @@ -113,8 +113,8 @@ - [3.1.1,) - convert requires Maven 3.1.1 or greater + [3.3.9,) + convert requires Maven 3.3.9 or greater [1.8,) @@ -128,12 +128,12 @@ org.apache.maven.plugins maven-install-plugin - 2.5.2 + 3.0.0-M1 org.apache.maven.plugins maven-jar-plugin - 3.1.0 + 3.1.2 org.apache.maven.plugins @@ -143,7 +143,7 @@ org.apache.maven.plugins maven-surefire-plugin - 3.0.0-M1 + 3.0.0-M3 @@ -251,7 +251,7 @@ maven-javadoc-plugin - 3.0.1 + 3.1.0 maven-source-plugin From 93f5466066f5a10fe19980ad33af11410eb7127a Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Tue, 3 Sep 2019 15:02:58 -0500 Subject: [PATCH 11/13] add vcfheader to headerlines --- .../convert/htsjdk/HtsjdkModule.java | 6 ++ .../htsjdk/VcfHeaderToVcfHeaderLines.java | 62 +++++++++++++++++++ .../convert/htsjdk/HtsjdkModuleTest.java | 12 +++- 3 files changed, 77 insertions(+), 3 deletions(-) create mode 100644 convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VcfHeaderToVcfHeaderLines.java diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java index 3022afb..3cbe5de 100644 --- a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java @@ -29,6 +29,7 @@ import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFHeaderLine; import com.google.inject.AbstractModule; import com.google.inject.Provides; @@ -129,4 +130,9 @@ Converter> createVcfHeaderToReferences(final Converte Converter> createVcfHeaderToSamples() { return new VcfHeaderToSamples(); } + + @Provides @Singleton + Converter> createVcfHeaderToVcfHeaderLines() { + return new VcfHeaderToVcfHeaderLines(); + } } diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VcfHeaderToVcfHeaderLines.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VcfHeaderToVcfHeaderLines.java new file mode 100644 index 0000000..66a5037 --- /dev/null +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VcfHeaderToVcfHeaderLines.java @@ -0,0 +1,62 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import java.util.ArrayList; +import java.util.List; + +import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFHeaderLine; + +import org.bdgenomics.convert.AbstractConverter; +import org.bdgenomics.convert.ConversionException; +import org.bdgenomics.convert.ConversionStringency; + +import org.slf4j.Logger; + +/** + * Convert a VCFHeader to a list of VCFHeaderLines. + */ +public final class VcfHeaderToVcfHeaderLines extends AbstractConverter> { + + /** + * Create a new VCFHeader to a list of VCFHeaderLines converter. + */ + public VcfHeaderToVcfHeaderLines() { + super(VCFHeader.class, VCFHeaderLine.class); + } + + + @Override + public List convert(final VCFHeader header, + final ConversionStringency stringency, + final Logger logger) throws ConversionException { + + if (header == null) { + warnOrThrow(header, "must not be null", null, stringency, logger); + return null; + } + + List headerLines = new ArrayList(); + headerLines.addAll(header.getFilterLines()); + headerLines.addAll(header.getFormatHeaderLines()); + headerLines.addAll(header.getInfoHeaderLines()); + headerLines.addAll(header.getOtherHeaderLines()); + return headerLines; + } +} diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java index fdd48b8..01aef31 100644 --- a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java @@ -34,6 +34,7 @@ import htsjdk.samtools.ValidationStringency; import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFHeaderLine; import htsjdk.variant.variantcontext.VariantContext; @@ -74,9 +75,6 @@ public void testHtsjdkModule() { assertNotNull(target.getConversionStringencyToValidationStringency()); assertNotNull(target.getValidationStringencyToConversionStringency()); assertNotNull(target.getSamRecordToAlignmentRecord()); - assertNotNull(target.getVcfHeaderToReferences()); - assertNotNull(target.getVcfHeaderToSamples()); - assertNotNull(target.getSamProgramRecordToProcessingStep()); assertNotNull(target.getSamHeaderToProcessingSteps()); assertNotNull(target.getSamReadGroupRecordToReadGroup()); @@ -85,6 +83,7 @@ public void testHtsjdkModule() { assertNotNull(target.getSamHeaderToReferences()); assertNotNull(target.getVcfHeaderToReferences()); assertNotNull(target.getVcfHeaderToSamples()); + assertNotNull(target.getVcfHeaderToVcfHeaderLines()); SAMFileHeader samFileHeader = new SAMFileHeader(); assertNotNull(target.getAlignmentRecordToSamRecordFactory()); @@ -116,6 +115,7 @@ static class Target { final Converter> samHeaderToReferences; final Converter> vcfHeaderToReferences; final Converter> vcfHeaderToSamples; + final Converter> vcfHeaderToVcfHeaderLines; final AlignmentRecordToSamRecordFactory alignmentRecordToSamRecordFactory; final GenotypesToVariantContextFactory genotypesToVariantContextFactory; final VariantsToVariantContextFactory variantsToVariantContextFactory; @@ -134,6 +134,7 @@ static class Target { final Converter> samHeaderToReferences, final Converter> vcfHeaderToReferences, final Converter> vcfHeaderToSamples, + final Converter> vcfHeaderToVcfHeaderLines, final AlignmentRecordToSamRecordFactory alignmentRecordToSamRecordFactory, final GenotypesToVariantContextFactory genotypesToVariantContextFactory, final VariantsToVariantContextFactory variantsToVariantContextFactory, @@ -151,6 +152,7 @@ static class Target { this.samHeaderToReferences = samHeaderToReferences; this.vcfHeaderToReferences = vcfHeaderToReferences; this.vcfHeaderToSamples = vcfHeaderToSamples; + this.vcfHeaderToVcfHeaderLines = vcfHeaderToVcfHeaderLines; this.alignmentRecordToSamRecordFactory = alignmentRecordToSamRecordFactory; this.genotypesToVariantContextFactory = genotypesToVariantContextFactory; this.variantsToVariantContextFactory = variantsToVariantContextFactory; @@ -202,6 +204,10 @@ Converter> getVcfHeaderToSamples() { return vcfHeaderToSamples; } + Converter> getVcfHeaderToVcfHeaderLines() { + return vcfHeaderToVcfHeaderLines; + } + AlignmentRecordToSamRecordFactory getAlignmentRecordToSamRecordFactory() { return alignmentRecordToSamRecordFactory; } From f750a9cebc99ca363f575280340b7b6e0d6910dd Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Tue, 3 Sep 2019 15:57:19 -0500 Subject: [PATCH 12/13] rename variantsToVariantContext to variantToVariantContext --- .../convert/htsjdk/HtsjdkModule.java | 4 +- ...text.java => VariantToVariantContext.java} | 22 ++--- ...va => VariantToVariantContextFactory.java} | 10 +-- .../convert/htsjdk/HtsjdkModuleTest.java | 14 +-- ....java => VariantToVariantContextTest.java} | 31 ++----- .../htsjdk/VcfHeaderToVcfHeaderLinesTest.java | 88 +++++++++++++++++++ 6 files changed, 115 insertions(+), 54 deletions(-) rename convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/{VariantsToVariantContext.java => VariantToVariantContext.java} (70%) rename convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/{VariantsToVariantContextFactory.java => VariantToVariantContextFactory.java} (78%) rename convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/{VariantsToVariantContextTest.java => VariantToVariantContextTest.java} (73%) create mode 100644 convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VcfHeaderToVcfHeaderLinesTest.java diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java index 3cbe5de..d89694f 100644 --- a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/HtsjdkModule.java @@ -64,8 +64,8 @@ protected void configure() { .build(GenotypesToVariantContextFactory.class)); install(new FactoryModuleBuilder() - .implement(new TypeLiteral, VariantContext>>() {}, VariantsToVariantContext.class) - .build(VariantsToVariantContextFactory.class)); + .implement(new TypeLiteral>() {}, VariantToVariantContext.class) + .build(VariantToVariantContextFactory.class)); install(new FactoryModuleBuilder() .implement(new TypeLiteral>>() {}, VariantContextToGenotypes.class) diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContext.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantToVariantContext.java similarity index 70% rename from convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContext.java rename to convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantToVariantContext.java index a47c4c6..f6cf1fc 100644 --- a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContext.java +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantToVariantContext.java @@ -17,8 +17,6 @@ */ package org.bdgenomics.convert.htsjdk; -import java.util.List; - import com.google.inject.Inject; import com.google.inject.assistedinject.Assisted; @@ -36,22 +34,22 @@ import org.slf4j.Logger; /** - * Convert a list of Variants to a VariantContext. + * Convert a Variant to a VariantContext. */ -public final class VariantsToVariantContext extends AbstractConverter, VariantContext> { +public final class VariantToVariantContext extends AbstractConverter { /** Header. */ private final VCFHeader header; /** - * Create a new list of Variants to VariantContext converter with the specified header. + * Create a new Variant to VariantContext converter with the specified header. * * @param header header, must not be null */ @Inject - public VariantsToVariantContext(@Assisted final VCFHeader header) { - super(List.class, VariantContext.class); + public VariantToVariantContext(@Assisted final VCFHeader header) { + super(Variant.class, VariantContext.class); checkNotNull(header); this.header = header; @@ -59,16 +57,12 @@ public VariantsToVariantContext(@Assisted final VCFHeader header) { @Override - public VariantContext convert(final List variants, + public VariantContext convert(final Variant variant, final ConversionStringency stringency, final Logger logger) throws ConversionException { - if (variants == null) { - warnOrThrow(variants, "must not be null", null, stringency, logger); - return null; - } - if (variants.isEmpty()) { - warnOrThrow(variants, "must not be empty", null, stringency, logger); + if (variant == null) { + warnOrThrow(variant, "must not be null", null, stringency, logger); return null; } diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContextFactory.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantToVariantContextFactory.java similarity index 78% rename from convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContextFactory.java rename to convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantToVariantContextFactory.java index 8f4eb48..2b3eda9 100644 --- a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContextFactory.java +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/VariantToVariantContextFactory.java @@ -17,8 +17,6 @@ */ package org.bdgenomics.convert.htsjdk; -import java.util.List; - import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFHeader; @@ -28,15 +26,15 @@ import org.bdgenomics.formats.avro.Variant; /** - * Factory for creating list of Variants to VariantContext converters, which + * Factory for creating Variant to VariantContext converters, which * require late binding for a VCFHeader. */ -public interface VariantsToVariantContextFactory { +public interface VariantToVariantContextFactory { /** - * Create a new list of Variants to VariantContext converter with the specified header. + * Create a new Variant to VariantContext converter with the specified header. * * @param header header, must not be null */ - Converter, VariantContext> create(VCFHeader header); + Converter create(VCFHeader header); } diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java index 01aef31..9ef66e9 100644 --- a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/HtsjdkModuleTest.java @@ -92,8 +92,8 @@ public void testHtsjdkModule() { VCFHeader vcfHeader = new VCFHeader(); assertNotNull(target.getGenotypesToVariantContextFactory()); assertNotNull(target.getGenotypesToVariantContextFactory().create(vcfHeader)); - assertNotNull(target.getVariantsToVariantContextFactory()); - assertNotNull(target.getVariantsToVariantContextFactory().create(vcfHeader)); + assertNotNull(target.getVariantToVariantContextFactory()); + assertNotNull(target.getVariantToVariantContextFactory().create(vcfHeader)); assertNotNull(target.getVariantContextToGenotypesFactory()); assertNotNull(target.getVariantContextToGenotypesFactory().create(vcfHeader)); assertNotNull(target.getVariantContextToVariantsFactory()); @@ -118,7 +118,7 @@ static class Target { final Converter> vcfHeaderToVcfHeaderLines; final AlignmentRecordToSamRecordFactory alignmentRecordToSamRecordFactory; final GenotypesToVariantContextFactory genotypesToVariantContextFactory; - final VariantsToVariantContextFactory variantsToVariantContextFactory; + final VariantToVariantContextFactory variantToVariantContextFactory; final VariantContextToGenotypesFactory variantContextToGenotypesFactory; final VariantContextToVariantsFactory variantContextToVariantsFactory; @@ -137,7 +137,7 @@ static class Target { final Converter> vcfHeaderToVcfHeaderLines, final AlignmentRecordToSamRecordFactory alignmentRecordToSamRecordFactory, final GenotypesToVariantContextFactory genotypesToVariantContextFactory, - final VariantsToVariantContextFactory variantsToVariantContextFactory, + final VariantToVariantContextFactory variantToVariantContextFactory, final VariantContextToGenotypesFactory variantContextToGenotypesFactory, final VariantContextToVariantsFactory variantContextToVariantsFactory) { @@ -155,7 +155,7 @@ static class Target { this.vcfHeaderToVcfHeaderLines = vcfHeaderToVcfHeaderLines; this.alignmentRecordToSamRecordFactory = alignmentRecordToSamRecordFactory; this.genotypesToVariantContextFactory = genotypesToVariantContextFactory; - this.variantsToVariantContextFactory = variantsToVariantContextFactory; + this.variantToVariantContextFactory = variantToVariantContextFactory; this.variantContextToGenotypesFactory = variantContextToGenotypesFactory; this.variantContextToVariantsFactory = variantContextToVariantsFactory; } @@ -216,8 +216,8 @@ GenotypesToVariantContextFactory getGenotypesToVariantContextFactory() { return genotypesToVariantContextFactory; } - VariantsToVariantContextFactory getVariantsToVariantContextFactory() { - return variantsToVariantContextFactory; + VariantToVariantContextFactory getVariantToVariantContextFactory() { + return variantToVariantContextFactory; } VariantContextToGenotypesFactory getVariantContextToGenotypesFactory() { diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContextTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VariantToVariantContextTest.java similarity index 73% rename from convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContextTest.java rename to convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VariantToVariantContextTest.java index a21c5cf..fc2a25c 100644 --- a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VariantsToVariantContextTest.java +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VariantToVariantContextTest.java @@ -23,9 +23,6 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; -import java.util.Collections; -import java.util.List; - import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFHeader; @@ -42,19 +39,18 @@ import org.slf4j.LoggerFactory; /** - * Unit test for VariantsToVariantContext. + * Unit test for VariantToVariantContext. */ -public final class VariantsToVariantContextTest { - private final Logger logger = LoggerFactory.getLogger(VariantsToVariantContextTest.class); +public final class VariantToVariantContextTest { + private final Logger logger = LoggerFactory.getLogger(VariantToVariantContextTest.class); VCFHeader header; - List empty = Collections.emptyList(); - Converter, VariantContext> converter; + Converter converter; @Before public void setUp() { header = new VCFHeader(); - converter = new VariantsToVariantContext(header); + converter = new VariantToVariantContext(header); } @Test @@ -64,7 +60,7 @@ public void testConstructor() { @Test(expected=NullPointerException.class) public void testConstructorNullHeader() { - new VariantsToVariantContext(null); + new VariantToVariantContext(null); } @Test(expected=ConversionException.class) @@ -82,21 +78,6 @@ public void testConvertNullSourceSilent() { assertEquals(null, converter.convert(null, SILENT, logger)); } - @Test(expected=ConversionException.class) - public void testConvertEmptySourceStrict() { - converter.convert(empty, STRICT, logger); - } - - @Test - public void testConvertEmptySourceLenient() { - assertEquals(null, converter.convert(empty, LENIENT, logger)); - } - - @Test - public void testConvertEmptySourceSilent() { - assertEquals(null, converter.convert(empty, SILENT, logger)); - } - @Test public void testConvert() { // todo diff --git a/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VcfHeaderToVcfHeaderLinesTest.java b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VcfHeaderToVcfHeaderLinesTest.java new file mode 100644 index 0000000..e90f48f --- /dev/null +++ b/convert-htsjdk/src/test/java/org/bdgenomics/convert/htsjdk/VcfHeaderToVcfHeaderLinesTest.java @@ -0,0 +1,88 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.convert.htsjdk; + +import static org.bdgenomics.convert.ConversionStringency.*; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.util.Collections; +import java.util.List; + +import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFHeaderLine; + +import org.bdgenomics.convert.Converter; +import org.bdgenomics.convert.ConversionException; + +import org.junit.Before; +import org.junit.Test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Unit test for VcfHeaderToVcfHeaderLines. + */ +public final class VcfHeaderToVcfHeaderLinesTest { + private final Logger logger = LoggerFactory.getLogger(VcfHeaderToVcfHeaderLinesTest.class); + + VCFHeader header; + Converter> converter; + + @Before + public void setUp() { + header = new VCFHeader(Collections.emptySet()); + converter = new VcfHeaderToVcfHeaderLines(); + } + + @Test + public void testConstructor() { + assertNotNull(converter); + } + + @Test(expected=ConversionException.class) + public void testConvertNullSourceStrict() { + converter.convert(null, STRICT, logger); + } + + @Test + public void testConvertNullSourceLenient() { + assertEquals(null, converter.convert(null, LENIENT, logger)); + } + + @Test + public void testConvertNullSourceSilent() { + assertEquals(null, converter.convert(null, SILENT, logger)); + } + + @Test + public void testConvert() { + // todo + } + + @Test + public void testConvertEmpty() { + List headerLines = converter.convert(new VCFHeader(), SILENT, logger); + assertNotNull(headerLines); + assertTrue(headerLines.isEmpty()); + } +} From 1c20d01b5b677afc16234c4144e4b15948598ff7 Mon Sep 17 00:00:00 2001 From: Michael L Heuer Date: Mon, 23 Sep 2019 11:20:52 -0500 Subject: [PATCH 13/13] fixes due to bdg-formats 0.14.0 --- .../convert/htsjdk/AlignmentRecordToSamRecord.java | 8 ++++---- .../convert/htsjdk/SamRecordToAlignmentRecord.java | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/AlignmentRecordToSamRecord.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/AlignmentRecordToSamRecord.java index 470ed81..aed0caa 100644 --- a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/AlignmentRecordToSamRecord.java +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/AlignmentRecordToSamRecord.java @@ -80,11 +80,11 @@ public SAMRecord convert(final AlignmentRecord alignmentRecord, builder.setReadName(alignmentRecord.getReadName()); builder.setReadString(alignmentRecord.getSequence()); - if (alignmentRecord.getQuality() == null) { + if (alignmentRecord.getQualityScores() == null) { builder.setBaseQualityString("*"); } else { - builder.setBaseQualityString(alignmentRecord.getQuality()); + builder.setBaseQualityString(alignmentRecord.getQualityScores()); } String readGroupId = alignmentRecord.getReadGroupId(); @@ -178,8 +178,8 @@ public SAMRecord convert(final AlignmentRecord alignmentRecord, if (alignmentRecord.getMismatchingPositions() != null) { builder.setAttribute("MD", alignmentRecord.getMismatchingPositions()); } - if (alignmentRecord.getOriginalQuality() != null) { - builder.setOriginalBaseQualities(SAMUtils.fastqToPhred(alignmentRecord.getOriginalQuality())); + if (alignmentRecord.getOriginalQualityScores() != null) { + builder.setOriginalBaseQualities(SAMUtils.fastqToPhred(alignmentRecord.getOriginalQualityScores())); } if (alignmentRecord.getOriginalCigar() != null) { builder.setAttribute("OC", alignmentRecord.getOriginalCigar()); diff --git a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamRecordToAlignmentRecord.java b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamRecordToAlignmentRecord.java index 10f238b..d9ad421 100644 --- a/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamRecordToAlignmentRecord.java +++ b/convert-htsjdk/src/main/java/org/bdgenomics/convert/htsjdk/SamRecordToAlignmentRecord.java @@ -81,10 +81,10 @@ public AlignmentRecord convert(final SAMRecord samRecord, } if (samRecord.getBaseQualityString() != "*") { - builder.setQuality(samRecord.getBaseQualityString()); + builder.setQualityScores(samRecord.getBaseQualityString()); } if (samRecord.getOriginalBaseQualities() != null) { - builder.setOriginalQuality(SAMUtils.phredToFastq(samRecord.getOriginalBaseQualities())); + builder.setOriginalQualityScores(SAMUtils.phredToFastq(samRecord.getOriginalBaseQualities())); } int readReference = samRecord.getReferenceIndex();