Skip to content

Commit

Permalink
Merge pull request #39 from yhoogstrate/bam_extract_less_strict_in_bam
Browse files Browse the repository at this point in the history
Allow non fixed BAM files in bam-extract
  • Loading branch information
yhoogstrate authored Feb 3, 2017
2 parents 14bea2b + 898d47e commit f833cd2
Show file tree
Hide file tree
Showing 8 changed files with 73 additions and 36 deletions.
4 changes: 4 additions & 0 deletions Changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
2017-02-03 Youri Hoogstrate v0.3.2
* Allows to run `dr-disco bam-extract` on non 'fixed' BAM-files
plus corresponding test cases

2017-02-01 Youri Hoogstrate v0.3.1
* Fix in bam fixing code for reads annotated with identical SA-tags

Expand Down
2 changes: 1 addition & 1 deletion bin/dr-disco
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def CLI_detect_intronic_break_points(min_e_score, output_file, bam_input_file):
@click.argument('bam_output_file')
@click.argument('bam_input_file', type=click.Path(exists=True))
def CLI_bam_extract_regions(region1, region2, bam_output_file, bam_input_file):
c = BAMExtract(bam_input_file)
c = BAMExtract(bam_input_file, False)
c.extract(region1, region2, bam_output_file)


Expand Down
51 changes: 28 additions & 23 deletions drdisco/IntronDecomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -990,34 +990,39 @@ def merge(self, subnet_m):


class BAMExtract(object):
def __init__(self, bam_file):
self.pysam_fh = self.test_disco_alignment(bam_file)
def __init__(self, bam_file, require_fixed_bam_file):
self.pysam_fh = self.test_disco_alignment(bam_file, require_fixed_bam_file)

@staticmethod
def test_disco_alignment(alignment_file):
def test_disco_alignment(alignment_file, require_fixed_bam_file):
"""Ensures by reading the BAM header whether the BAM file was
indeed fixed using Dr. Disco
"""
bam_fh = pysam.AlignmentFile(alignment_file, "rb")
if 'PG' in bam_fh.header:
for pg in bam_fh.header['PG']:
if pg['ID'] == 'drdisco_fix_chimeric':
try: # pragma: no cover
bam_fh.fetch()
except: # pragma: no cover
logging.info('Indexing BAM file with pysam: ' + bam_fh.filename) # create index if it does not exist
pysam.index(bam_fh.filename)
bam_fh = pysam.AlignmentFile(bam_fh.filename)

try:
bam_fh.fetch()
except:
raise Exception('Could not indexing BAM file: ' + bam_fh.filename)

return bam_fh

# @todo write simple test
raise Exception("Invalid STAR BAM File: has to be post processed with 'dr-disco fix-chimeric ...' first")

if require_fixed_bam_file:
proper_tag = False
if 'PG' in bam_fh.header:
for pg in bam_fh.header['PG']:
if pg['ID'] == 'drdisco_fix_chimeric':
proper_tag = True

if not proper_tag:
raise Exception("Invalid STAR BAM File: has to be post processed with 'dr-disco fix-chimeric ...' first")

try: # pragma: no cover
bam_fh.fetch()
except: # pragma: no cover
logging.info('Indexing BAM file with pysam: ' + bam_fh.filename) # create index if it does not exist
pysam.index(bam_fh.filename)
bam_fh = pysam.AlignmentFile(bam_fh.filename)

try:
bam_fh.fetch()
except:
raise Exception('Could not indexing BAM file: ' + bam_fh.filename)

return bam_fh

def extract_junctions(self, fusion_junctions, splice_junctions):
def read_to_junction(read, rg, parsed_SA_tag, specific_type=None):
Expand Down Expand Up @@ -1371,7 +1376,7 @@ def __init__(self, alignment_file):
self.alignment_file = alignment_file

def decompose(self, MIN_SCORE_FOR_EXTRACTING_SUBGRAPHS):
alignment = BAMExtract(self.alignment_file)
alignment = BAMExtract(self.alignment_file, True)

fusion_junctions = Graph()
splice_junctions = Graph()
Expand Down
4 changes: 2 additions & 2 deletions drdisco/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python
# *- coding: utf-8 -*-
# vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 textwidth=79:

Expand All @@ -23,7 +23,7 @@
<http://epydoc.sourceforge.net/manual-fields.html#fields-synonyms>
"""

__version_info__ = ('0', '3', '1')
__version_info__ = ('0', '3', '2')
__version__ = '.'.join(__version_info__) if (len(__version_info__) == 3) else '.'.join(__version_info__[0:3]) + "-" + __version_info__[3]
__author__ = 'Youri Hoogstrate'
__homepage__ = 'https://github.com/yhoogstrate/dr-disco'
Expand Down
Binary file added tests/bam-extract/test_terg_01.bam
Binary file not shown.
1 change: 1 addition & 0 deletions tests/detect-intronic/test_20.bam
41 changes: 31 additions & 10 deletions tests/test_bam_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,34 @@


class TestIntronicBreakDetection(unittest.TestCase):
def test_01a(self):
def test_01_a(self):
# Tests a file that has not (yet) been fixed with `dr-disco fix`

input_file = TEST_DIR + "test_terg_01.bam"
output_file = T_TEST_DIR + "test_terg_01.filtered.bam"
output_file_s = T_TEST_DIR + "test_terg_01.filtered.sam"
test_file = TEST_DIR + "test_terg_02.filtered.sam"

c = BAMExtract(input_file, False)
c.extract("chr21:39000000-40000000", "chr5:1-2", output_file)

# Bam2Sam
fhq = open(output_file_s, "w")
fhq.write(pysam.view(output_file))
fhq.close()

if not filecmp.cmp(output_file_s, test_file):
print 'diff \'' + output_file_s + '\' \'' + test_file + '\''

self.assertTrue(filecmp.cmp(output_file_s, test_file))

def test_02_a(self):
input_file = TEST_DIR + "test_terg_02.bam"
output_file = T_TEST_DIR + "test_terg_02.filtered.bam"
output_file_s = T_TEST_DIR + "test_terg_02.filtered.sam"
test_file = TEST_DIR + "test_terg_02.filtered.sam"

c = BAMExtract(input_file)
c = BAMExtract(input_file, False)
c.extract("chr21:39000000-40000000", "chr5:1-2", output_file)

# Bam2Sam
Expand All @@ -61,13 +82,13 @@ def test_01a(self):

self.assertTrue(filecmp.cmp(output_file_s, test_file))

def test_01b(self):
def test_02_b(self):
input_file = TEST_DIR + "test_terg_02.bam"
output_file = T_TEST_DIR + "test_terg_02.filtered.bam"
output_file_s = T_TEST_DIR + "test_terg_02.filtered.sam"
test_file = TEST_DIR + "test_terg_02.filtered.sam"

c = BAMExtract(input_file)
c = BAMExtract(input_file, False)
c.extract("chr5:1-2", "chr21:39000000-40000000", output_file)

# Bam2Sam
Expand All @@ -80,13 +101,13 @@ def test_01b(self):

self.assertTrue(filecmp.cmp(output_file_s, test_file))

def test_02a(self):
def test_02_c(self):
input_file = TEST_DIR + "test_terg_02.bam"
output_file = T_TEST_DIR + "test_terg_02.filtered.bam"
output_file_s = T_TEST_DIR + "test_terg_02.filtered.sam"
test_file = TEST_DIR + "test_terg_02.filtered.sam"

c = BAMExtract(input_file)
c = BAMExtract(input_file, False)
c.extract("chr7:151000000-153000000", "chr5:1-2", output_file)

# Bam2Sam
Expand All @@ -99,13 +120,13 @@ def test_02a(self):

self.assertTrue(filecmp.cmp(output_file_s, test_file))

def test_02b(self):
def test_02_d(self):
input_file = TEST_DIR + "test_terg_02.bam"
output_file = T_TEST_DIR + "test_terg_02.filtered.bam"
output_file_s = T_TEST_DIR + "test_terg_02.filtered.sam"
test_file = TEST_DIR + "test_terg_02.filtered.sam"

c = BAMExtract(input_file)
c = BAMExtract(input_file, False)
c.extract("chr5:1-2", "chr7:151000000-153000000", output_file)

# Bam2Sam
Expand All @@ -118,12 +139,12 @@ def test_02b(self):

self.assertTrue(filecmp.cmp(output_file_s, test_file))

def test_03(self):
def test_02_e(self):
input_file = TEST_DIR + "test_terg_02.bam"
output_file = T_TEST_DIR + "test_terg_02.filtered.bam"
output_file_s = T_TEST_DIR + "test_terg_02.filtered.sam"

c = BAMExtract(input_file)
c = BAMExtract(input_file, False)
c.extract("chr12:151000000-153000000", "chr5:1-2", output_file)

# Bam2Sam
Expand Down
6 changes: 6 additions & 0 deletions tests/test_intronic_break_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,12 @@ def test_19_tests_parsing_of_inversed_TERG_from_s55(self):
# Test data not checked, should just not throw an exception
self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read())

def test_20_tests_trigger_error_on_non_fixed_file(self):
input_file_a = TEST_DIR + "test_20.bam"

ic = IntronDecomposition(input_file_a)
self.assertRaises(Exception, ic.decompose, 0) # ic.decompose(0) triggers exception


def main():
unittest.main()
Expand Down

0 comments on commit f833cd2

Please sign in to comment.