Skip to content

Commit

Permalink
Merge pull request bigdatagenomics#2 from fnothaft/mo-mutect
Browse files Browse the repository at this point in the history
Adding more pre-processing filters for Mutect.
  • Loading branch information
jstjohn committed May 30, 2015
2 parents 095c2d4 + b3a3f23 commit 504c0a8
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/**
* Licensed to Big Data Genomics (BDG) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The BDG licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.bdgenomics.avocado.preprocessing

import org.apache.commons.configuration.SubnodeConfiguration
import org.apache.spark.rdd.RDD
import org.bdgenomics.formats.avro.AlignmentRecord

object MappingQualityFilter extends PreprocessingStage {
override val stageName: String = "mapping_quality_filter"

override def apply(rdd: RDD[AlignmentRecord], config: SubnodeConfiguration): RDD[AlignmentRecord] = {
// what is the threshold to apply?
val mqThreshold = config.getInt("mapQualityThreshold", 0)

rdd.filter(rec => rec.getMapq >= mqThreshold)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Licensed to Big Data Genomics (BDG) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The BDG licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.bdgenomics.avocado.preprocessing

import org.apache.commons.configuration.SubnodeConfiguration
import org.apache.spark.rdd.RDD
import org.bdgenomics.formats.avro.AlignmentRecord
import org.bdgenomics.adam.rich.RichAlignmentRecord._

object MateRescueFilter extends PreprocessingStage {
override val stageName: String = "mate_rescue_filter"

override def apply(rdd: RDD[AlignmentRecord], config: SubnodeConfiguration): RDD[AlignmentRecord] =
rdd.filter {
case record =>
!record.tags.exists(a => a.tag == "XT" && a.value == "M")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ object Preprocessor {
RealignIndels,
BaseQualityFilter,
ClippingFilter,
DuplicateFilter)
DuplicateFilter,
MappingQualityFilter,
MateRescueFilter)

def apply(rdd: RDD[AlignmentRecord],
stageName: String,
Expand Down

0 comments on commit 504c0a8

Please sign in to comment.