Skip to content

Commit

Permalink
Merge pull request #45 from yhoogstrate/manual_things
Browse files Browse the repository at this point in the history
Better way of handling stdio / logging for bioconda compatibility
  • Loading branch information
yhoogstrate authored Mar 2, 2017
2 parents bb56f87 + e8ed63d commit deaa916
Show file tree
Hide file tree
Showing 11 changed files with 35 additions and 61 deletions.
3 changes: 3 additions & 0 deletions Changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
2017-03-02 Youri Hoogstrate v0.3.4
* Better way of handling stdio / logging for bioconda compatibility

2017-02-08 Youri Hoogstrate v0.3.3
* Huge improvement in performance in `extract subnetworks` by translating
a recursion problem into an iterative linear problem.
Expand Down
5 changes: 0 additions & 5 deletions bin/dr-disco
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@


import drdisco
import logging
import sys
import pysam
import click

Expand Down Expand Up @@ -41,9 +39,6 @@ from drdisco.ChimericAlignment import ChimericAlignment
"""


logging.basicConfig(level=logging.DEBUG, format=drdisco.__log_format__, stream=sys.stdout)


def main():
CLI()

Expand Down
22 changes: 11 additions & 11 deletions drdisco/ChimericAlignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
# vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 textwidth=79:

import os
import logging

import pysam

from fuma.Fusion import STRAND_FORWARD
from .CigarAlignment import CigarAlignment

from drdisco import __version__
from drdisco import log

"""[License: GNU General Public License v3 (GPLv3)]
Expand Down Expand Up @@ -282,7 +282,7 @@ def fix_chain(self, alignments, bam_file, mates):
# start = self.get_closest(next_pos, alignments)

# else:
# print("Warning - mates do not correspond? - maybe empty (-1) as well?")
# logger.warn("Warning - mates do not correspond? - maybe empty (-1) as well?")

# next_pos = [alignments[0].reference_id, alignments[0].reference_start]
# last_pos = [mates[0].reference_id, mates[0].reference_start]
Expand Down Expand Up @@ -459,7 +459,7 @@ def reconstruct_alignments(self, alignments, bam_file, fh_out):

else:
if n == 1:
print("Warning: segments of mate are missing: " + alignments[0].query_name)
log.warn("segments of mate are missing: " + alignments[0].query_name)
all_reads_updated.append(alignments[0])
else:
raise Exception("what happens here?")
Expand All @@ -480,10 +480,10 @@ def convert(self, bam_file_discordant_fixed, temp_dir):
# @TODO / consider todo - start straight from sam
# samtools view -bS samples/7046-004-041_discordant.Chimeric.out.sam > samples/7046-004-041_discordant.Chimeric.out.unsorted.bam

logging.info("Convert into a name-sorted bam file, to get all reads with the same name adjacent to each other")
log.info("Convert into a name-sorted bam file, to get all reads with the same name adjacent to each other")
pysam.sort("-o", basename + ".name-sorted.bam", "-n", self.input_alignment_file)

logging.info("Fixing sam file")
log.info("Fixing sam file")
sam_file_discordant = pysam.AlignmentFile(basename + ".name-sorted.bam", "rb")
header = sam_file_discordant.header
header['RG'] = [
Expand Down Expand Up @@ -519,22 +519,22 @@ def convert(self, bam_file_discordant_fixed, temp_dir):
self.reconstruct_alignments(alignments, sam_file_discordant, fh)
fh.close()

logging.info("Converting fixed file into BAM")
log.info("Converting fixed file into BAM")
fhq = open(basename + ".name-sorted.fixed.bam", "wb")
fhq.write(pysam.view('-bS', basename + ".name-sorted.fixed.sam"))
fhq.close()

logging.info("Sorting position based fixed file")
log.info("Sorting position based fixed file")
pysam.sort("-o", basename + ".sorted.fixed.bam", basename + ".name-sorted.fixed.bam")

logging.info("Indexing the position sorted bam file")
log.info("Indexing the position sorted bam file")
pysam.index(basename + ".sorted.fixed.bam")

logging.info("Cleaning up temp files")
log.info("Cleaning up temp files")
for fname in [basename + ".name-sorted.bam", basename + ".name-sorted.fixed.sam", basename + ".name-sorted.fixed.bam"]:
logging.debug("=> " + fname)
log.debug("=> " + fname)
os.remove(fname)

logging.info("Moving to final destination")
log.info("Moving to final destination")
os.rename(basename + ".sorted.fixed.bam", bam_file_discordant_fixed)
os.rename(basename + ".sorted.fixed.bam" + ".bai", bam_file_discordant_fixed + ".bai")
30 changes: 15 additions & 15 deletions drdisco/IntronDecomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@

from __init__ import MAX_ACCEPTABLE_INSERT_SIZE, MAX_ACCEPTABLE_ALIGNMENT_ERROR, MAX_GENOME_DISTANCE, MIN_SUBNET_ENTROPY, MIN_DISCO_PER_SUBNET_PER_NODE, MIN_SUPPORTING_READS_PER_SUBNET_PER_NODE

import logging
import math
import operator

import pysam
import HTSeq

from drdisco import log
from .CigarAlignment import cigar_to_cigartuple

from fuma.Fusion import STRAND_FORWARD, STRAND_REVERSE, STRAND_UNDETERMINED
Expand Down Expand Up @@ -601,7 +601,7 @@ def print_chain(self): # pragma: no cover
print "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"

def generate_edge_idx(self):
logging.info("Creating edge index before pruning")
log.info("Creating edge index before pruning")

edges = set()
edges_tuple = []
Expand All @@ -624,7 +624,7 @@ def prune(self):
"""Does some 'clever' tricks to merge edges together and reduce data points
"""
self.generate_edge_idx()
logging.info("Finding and merging other edges in close proximity (insert size)")
log.info("Finding and merging other edges in close proximity (insert size)")

self.check_symmetry()

Expand All @@ -639,7 +639,7 @@ def prune(self):

self.remove_edge(candidate) # do not remove if splice junc exists?

logging.info("Pruned into " + str(len(candidates)) + " candidate edge(s)")
log.info("Pruned into " + str(len(candidates)) + " candidate edge(s)")
return candidates

def prune_edge(self, edge):
Expand Down Expand Up @@ -694,7 +694,7 @@ def rejoin_splice_juncs(self, splice_junctions):
the goal is to add the splice juncs between the nodes
"""
logging.debug("Initiated")
log.debug("Initiated")
k = 0

def search(pos1):
Expand Down Expand Up @@ -736,7 +736,7 @@ def search(pos1):
splice_edges_had.add(splice_junction)
splice_edges_had.add(splice_junction.get_complement())

logging.info("Linked " + str(k) + " splice junction(s)")
log.info("Linked " + str(k) + " splice junction(s)")

def extract_subnetworks_by_splice_junctions(self, thicker_edges, MIN_SCORE_FOR_EXTRACTING_SUBGRAPHS):
""" Deze functie haalt recursief per edge een set van edges op die
Expand All @@ -751,7 +751,7 @@ def extract_subnetworks_by_splice_junctions(self, thicker_edges, MIN_SCORE_FOR_E
en 1 met posities die groter zijn dan zichzelf. Hierdoor is een recursief
terugloop probleem redelijk opgelost.
"""
logging.info("Initiated [MIN_SCORE_FOR_EXTRACTING_SUBGRAPHS=%i]" % MIN_SCORE_FOR_EXTRACTING_SUBGRAPHS)
log.info("Initiated [MIN_SCORE_FOR_EXTRACTING_SUBGRAPHS=%i]" % MIN_SCORE_FOR_EXTRACTING_SUBGRAPHS)

thicker_edges.reverse()
q = 0
Expand Down Expand Up @@ -793,7 +793,7 @@ def extract_subnetworks_by_splice_junctions(self, thicker_edges, MIN_SCORE_FOR_E
subnetworks.append(SubGraph(q, subedges, left_splice_junctions, right_splice_junctions))
self.remove_edge(start_point)

logging.info("Extracted %i subnetwork(s)" % len(subnetworks))
log.info("Extracted %i subnetwork(s)" % len(subnetworks))
return subnetworks


Expand Down Expand Up @@ -1014,7 +1014,7 @@ def test_disco_alignment(alignment_file, require_fixed_bam_file):
bam_fh.fetch()
except: # pragma: no cover
fname = bam_fh.filename
logging.info('Indexing BAM file with pysam: ' + fname) # create index if it does not exist
log.info('Indexing BAM file with pysam: ' + fname) # create index if it does not exist
bam_fh.close()

pysam.index(fname)
Expand Down Expand Up @@ -1182,7 +1182,7 @@ def read_to_junction(read, rg, parsed_SA_tag, specific_type=None):

return (None, None, None)

logging.debug("Parsing reads to obtain fusion gene and splice junctions")
log.debug("Parsing reads to obtain fusion gene and splice junctions")
for read in self.pysam_fh.fetch():
sa = self.parse_SA(read.get_tag('SA'))
_chr = self.pysam_fh.get_reference_name(read.reference_id)
Expand Down Expand Up @@ -1237,7 +1237,7 @@ def read_to_junction(read, rg, parsed_SA_tag, specific_type=None):
else:
fusion_junctions.insert_edge(i_pos1, i_pos2, internal_edge[2], None)

logging.debug("alignment data loaded")
log.debug("alignment data loaded")

def parse_pos(self, str_pos):
_chr, _poss = str_pos.split(":", 2)
Expand Down Expand Up @@ -1450,7 +1450,7 @@ def merge_overlapping_subnets(self, subnets):
merge all subnets in M into i, and remove the former subnets
"""
logging.info("initiated")
log.info("initiated")

def sq_dist(vec):
sum_of_squares = sum(pow(x, 2) for x in vec)
Expand Down Expand Up @@ -1579,11 +1579,11 @@ def tree_remove(genometree, subnet):

new_subnets.append(subnet)

logging.info("Merged " + str(k) + " of the " + str(n) + " into " + str(len(new_subnets)) + " merged subnetwork(s)")
log.info("Merged " + str(k) + " of the " + str(n) + " into " + str(len(new_subnets)) + " merged subnetwork(s)")
return new_subnets

def filter_subnets(self, subnets):
logging.debug("init")
log.debug("init")
k = 0
for subnet in subnets:
"""Total of 8 reads is minimum, of which 2 must be
Expand Down Expand Up @@ -1618,5 +1618,5 @@ def filter_subnets(self, subnets):
if len(subnet.discarded) > 0:
k += 1

logging.info("Filtered " + str(k) + " of the " + str(len(subnets)) + " subnetwork(s)")
log.info("Filtered " + str(k) + " of the " + str(len(subnets)) + " subnetwork(s)")
return subnets
7 changes: 6 additions & 1 deletion drdisco/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,18 @@
gmail dot com
"""

__version_info__ = ('0', '3', '3')
__version_info__ = ('0', '3', '4')
__version__ = '.'.join(__version_info__) if (len(__version_info__) == 3) else '.'.join(__version_info__[0:3]) + "-" + __version_info__[3]
__author__ = 'Youri Hoogstrate'
__homepage__ = 'https://github.com/yhoogstrate/dr-disco'
__license__ = 'GNU General Public License v3 (GPLv3)'
__license_notice__ = 'License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>.\nThis is free software: you are free to change and redistribute it.\nThere is NO WARRANTY, to the extent permitted by law.'

import logging
import sys
__log_format__ = "[%(filename)s:%(lineno)s - %(funcName)s()] %(asctime)s - %(levelname)s - %(message)s"
logging.basicConfig(level=logging.DEBUG, format=__log_format__, stream=sys.stderr) # bioconda seems to crash on stdout here..
log = logging.getLogger(__name__)


# parameters
Expand Down
5 changes: 0 additions & 5 deletions tests/test_bam_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,13 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""

import drdisco
from drdisco.IntronDecomposition import BAMExtract

import unittest
import logging
import sys
import filecmp
import pysam
import os

logging.basicConfig(level=logging.DEBUG, format=drdisco.__log_format__, stream=sys.stdout)

TEST_DIR = "tests/bam-extract/"
T_TEST_DIR = "tmp/" + TEST_DIR

Expand Down
5 changes: 0 additions & 5 deletions tests/test_cigaralignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,11 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""

import drdisco
from drdisco.CigarAlignment import CigarAlignment, cigar_to_cigartuple

from fuma.Fusion import STRAND_REVERSE

import unittest
import logging
import sys

logging.basicConfig(level=logging.DEBUG, format=drdisco.__log_format__, stream=sys.stdout)


class TestIntronicBreakDetection(unittest.TestCase):
Expand Down
5 changes: 0 additions & 5 deletions tests/test_fix_chimeric_alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,14 @@
"""


import drdisco
from drdisco.ChimericAlignment import ChimericAlignment

import unittest
import logging
import sys
import subprocess
import filecmp
import pysam
import os

logging.basicConfig(level=logging.DEBUG, format=drdisco.__log_format__, stream=sys.stdout)

subprocess.call(["bash", "tests/rm_bai_files.sh"])


Expand Down
4 changes: 0 additions & 4 deletions tests/test_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,12 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""

import drdisco
import unittest
import logging
import sys
import filecmp
import pysam
import os
import subprocess

logging.basicConfig(level=logging.DEBUG, format=drdisco.__log_format__, stream=sys.stdout)
# Nosetests doesn't use main()

subprocess.call(["bash", "tests/rm_bai_files.sh"])
Expand Down
5 changes: 0 additions & 5 deletions tests/test_intronic_break_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,13 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""

import drdisco
from drdisco.IntronDecomposition import IntronDecomposition

import unittest
import logging
import sys
import subprocess
import filecmp
import os

logging.basicConfig(level=logging.DEBUG, format=drdisco.__log_format__, stream=sys.stdout)

subprocess.call(["bash", "tests/rm_bai_files.sh"])

TEST_DIR = "tests/detect-intronic/"
Expand Down
5 changes: 0 additions & 5 deletions tests/test_logo_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,11 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""

import drdisco
import logging
import sys
import os
import unittest
import subprocess
import filecmp

logging.basicConfig(level=logging.DEBUG, format=drdisco.__log_format__, stream=sys.stdout)

subprocess.call(["bash", "tests/rm_bai_files.sh"])

TEST_DIR = "tests/logo-sequence/"
Expand Down

0 comments on commit deaa916

Please sign in to comment.