Skip to content

Commit

Permalink
Add python realignIndelsFromKnownIndels method.
Browse files Browse the repository at this point in the history
  • Loading branch information
heuermh committed Dec 14, 2018
1 parent e696b64 commit c4fa300
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,29 @@ object ConsensusGenerator {
}

/**
* Provides a generator to extract consensuses from a known set of INDELs.
* (Java-specific) Provides a generator to extract consensuses from a known set of INDELs.
*
* @param rdd The previously called INDEL variants.
* @return A consensus generator that looks at previously called INDELs.
*/
def fromKnownIndels(rdd: VariantDataset): ConsensusGenerator = {
new ConsensusGeneratorFromKnowns(rdd.rdd, 0)
}

/**
* (Java-specific) Provides a generator to extract consensuses from a known set of INDELs.
*
* @param rdd The previously called INDEL variants.
* @param flankSize The number of bases to flank each known INDEL by.
* @return A consensus generator that looks at previously called INDELs.
*/
def fromKnownIndels(rdd: VariantDataset,
flankSize: java.lang.Integer): ConsensusGenerator = {
new ConsensusGeneratorFromKnowns(rdd.rdd, flankSize)
}

/**
* (Scala-specific) Provides a generator to extract consensuses from a known set of INDELs.
*
* @param rdd The previously called INDEL variants.
* @param flankSize The number of bases to flank each known INDEL by. Default
Expand All @@ -98,13 +120,14 @@ object ConsensusGenerator {
* @return A consensus generator that generates consensuses with several
* methods.
*/
@scala.annotation.varargs
def union(generators: ConsensusGenerator*): ConsensusGenerator = {
UnionConsensusGenerator(generators.toSeq)
}
}

/**
* Trait for generating consensus sequences for INDEL realignment.
* Abstract class for generating consensus sequences for INDEL realignment.
*
* INDEL realignment scores read alignments against the reference genome and
* a set of "consensus" sequences. These consensus sequences represent alternate
Expand All @@ -113,7 +136,7 @@ object ConsensusGenerator {
* trait provides an interface that a consensus generation method should
* implement to provide it's consensus sequences to the realigner.
*/
trait ConsensusGenerator extends Serializable {
abstract class ConsensusGenerator extends Serializable {

/**
* @param cigar The CIGAR to process.
Expand Down
28 changes: 12 additions & 16 deletions adam-python/bdgenomics/adam/rdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -1027,9 +1027,7 @@ def realignIndels(self,
maxReadsPerTarget = 20000,
unclipReads = False):
"""
Realigns indels using a concensus-based heuristic.
Generates consensuses from reads.
Realigns indels using a consensus-based heuristic from reads.
:param bool isSorted: If the input data is sorted, setting this
parameter to true avoids a second sort.
Expand Down Expand Up @@ -1060,19 +1058,17 @@ def realignIndels(self,
self.sc)


def realignIndels(self,
knownIndels,
isSorted = False,
maxIndelSize = 500,
maxConsensusNumber = 30,
lodThreshold = 5.0,
maxTargetSize = 3000,
maxReadsPerTarget = 20000,
unclipReads = False):
def realignIndelsFromKnownIndels(self,
knownIndels,
isSorted = False,
maxIndelSize = 500,
maxConsensusNumber = 30,
lodThreshold = 5.0,
maxTargetSize = 3000,
maxReadsPerTarget = 20000,
unclipReads = False):
"""
Realigns indels using a concensus-based heuristic.
Generates consensuses from prior called INDELs.
Realigns indels using a consensus-based heuristic from prior called INDELs.
:param bdgenomics.adam.rdd.VariantDataset knownIndels: An RDD of previously
called INDEL variants.
Expand All @@ -1093,7 +1089,7 @@ def realignIndels(self,
:rtype: bdgenomics.adam.rdd.AlignmentRecordDataset
"""

consensusModel = self.sc._jvm.org.bdgenomics.adam.algorithms.consensus.ConsensusGenerator.fromKnowns(knownIndels._jvmRdd)
consensusModel = self.sc._jvm.org.bdgenomics.adam.algorithms.consensus.ConsensusGenerator.fromKnownIndels(knownIndels._jvmRdd, 0)
return AlignmentRecordDataset(self._jvmRdd.realignIndels(consensusModel,
isSorted,
maxIndelSize,
Expand Down
26 changes: 26 additions & 0 deletions adam-python/bdgenomics/adam/test/alignmentRecordDataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,3 +359,29 @@ def test_shuffle_right_outer_join_groupBy_left(self):
jRdd = reads.rightOuterShuffleRegionJoinAndGroupByLeft(targets)

self.assertEqual(jRdd.toDF().count(), 21)


def test_realignIndels_reads(self):

readsPath = self.resourceFile("small.1.sam")

ac = ADAMContext(self.ss)

reads = ac.loadAlignments(readsPath)
realigned = reads.realignIndels()

self.assertEqual(realigned.toDF().count(), 20)

def test_realignIndels_known_indels(self):

readsPath = self.resourceFile("small.1.sam")
variantsPath = self.resourceFile("small.vcf")

ac = ADAMContext(self.ss)

reads = ac.loadAlignments(readsPath)
knownIndels = ac.loadVariants(variantsPath)

realigned = reads.realignIndelsFromKnownIndels(knownIndels)

self.assertEqual(realigned.toDF().count(), 20)

0 comments on commit c4fa300

Please sign in to comment.