Skip to content

Commit

Permalink
Merge pull request #22 from karel-brinda/rnftools_lift
Browse files Browse the repository at this point in the history
rnftools liftover
  • Loading branch information
karel-brinda committed Sep 17, 2015
2 parents 8a59ef9 + f05c38b commit 79e1801
Show file tree
Hide file tree
Showing 24 changed files with 2,403 additions and 74 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pysam>=0.8.3
pbr>=1.0.1
pbr>=1.4.0
termcolor>=1.1.0
sphinx>=1.3.0
sphinxcontrib-napoleon>=0.3.3
Expand Down
2 changes: 2 additions & 0 deletions rnftools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import rnftools.mishmash
import rnftools.lavender
import rnftools.rnfformat
import rnftools.utils

import os

# version detection
Expand Down
8 changes: 4 additions & 4 deletions rnftools/mishmash/CuReSim.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,9 @@ def recode_curesim_reads(
"""Recode CuReSim output FASTQ file to the RNF-compatible output FASTQ file.
Args:
curesim_fastq_fo (file): File object of CuReSim FASTQ file.
fastq_rnf_fo (file): File object of RNF FASTQ.
fai_fo (file): File object for FAI file of the reference genome.
curesim_fastq_fo (file object): File object of CuReSim FASTQ file.
fastq_rnf_fo (file object): File object of RNF FASTQ.
fai_fo (file object): File object for FAI file of the reference genome.
genome_id (int): RNF genome ID to be used.
number_of_read_tuples (int): Expected number of read tuples (to estimate number of digits in RNF).
Expand All @@ -149,7 +149,7 @@ def recode_curesim_reads(

max_seq_len=0

fai_index = FaiIndex(fai_fo=fai_fo)
fai_index = rnftools.utils.FaIdx(fai_fo=fai_fo)
read_tuple_id_width=len(format(number_of_read_tuples,'x'))

fq_creator=rnftools.rnfformat.FqCreator(
Expand Down
3 changes: 1 addition & 2 deletions rnftools/mishmash/DwgSim.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import rnftools
from .Source import *
from .FaiIndex import *

import os
import smbl
Expand Down Expand Up @@ -223,7 +222,7 @@ def recode_dwgsim_reads(
14) read number (unique within a given contig/chromosome)
"""

fai_index = FaiIndex(fai_fo=fai_fo)
fai_index = rnftools.utils.FaIdx(fai_fo=fai_fo)
read_tuple_id_width=len(format(number_of_read_tuples,'x'))

# parsing FQ file
Expand Down
46 changes: 0 additions & 46 deletions rnftools/mishmash/FaiIndex.py

This file was deleted.

3 changes: 1 addition & 2 deletions rnftools/mishmash/Source.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import pysam

import rnftools
from .FaiIndex import *

class Source(object):
""" Abstract class for a genome from which read tuples are simulated.
Expand Down Expand Up @@ -166,7 +165,7 @@ def recode_sam_reads(
NotImplementedError
"""

fai_index = FaiIndex(fai_fo)
fai_index = rnftools.utils.FaIdx(fai_fo)
#last_read_tuple_name=[]
read_tuple_id_width=len(format(number_of_read_tuples,'x'))
fq_creator=rnftools.rnfformat.FqCreator(
Expand Down
3 changes: 1 addition & 2 deletions rnftools/mishmash/WgSim.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import rnftools
from .Source import *
from .FaiIndex import *

import os
import smbl
Expand Down Expand Up @@ -208,7 +207,7 @@ def recode_wgsim_reads(
11) pair
"""

fai_index = FaiIndex(fai_fo)
fai_index = rnftools.utils.FaIdx(fai_fo)
read_tuple_id_width=len(format(number_of_read_tuples,'x'))

last_read_tuple_name=None
Expand Down
94 changes: 94 additions & 0 deletions rnftools/rnfformat/RnfLifter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import rnftools
import pysam

import re

class RnfLifter:

def __init__(self,
chain_fn,
fai_fn,
invert=False,
):
self._fai_fn=fai_fn
self._chain_fn=chain_fn
self._invert=invert

if chain_fn is not None:
with open(chain_fn) as chain_fo:
self._chain=rnftools.utils.Chain(chain_fo=chain_fo,invert=invert)
else:
self._chain=None

if fai_fn is not None:
with open(fai_fn) as fai_fo:
self._fai_index=rnftools.utils.FaIdx(fai_fo=fai_fo)
else:
if self._chain is not None:
self._fai_index=self._chain.get_fasta_index()
else:
self._fai_index=None

self._reg_block=re.compile(r"(\(([0-9]+),[0-9]+,[FRN],([0-9]+),([0-9]+)\))")

def lift_rnf_name(self,rnf_name):
if self._chain is None:
return rnf_name
for occur in self._reg_block.finditer(rnf_name):
groups=occur.groups()
chrom_id=int(groups[1])
chrom=self._fai_index.dict_ids_chr[chrom_id]
o_left=groups[2]
o_right=groups[3]
left=int(o_left)
right=int(o_right)
if left!=0:
n_left=self._chain.one_based_transl(chrom,left)
f_new_left=str(n_left).zfill(len(o_left))
rnf_name=rnf_name.replace(",{},".format(o_left),",{},".format(f_new_left))
if right!=0:
new_right=self._chain.one_based_transl(chrom,right)
f_new_right=str(n_right).zfill(len(o_right))
rnf_name=rnf_name.replace(",{})".format(o_right),",{})".format(f_new_right))
return rnf_name

def lift_fastq(self,
fastq_in_fo,
fastq_out_fo
):
for i, line in enumerate(fastq_in_fo,start=0):
if i%4==0:
fastq_out_fo.write(self.lift_rnf_name(line))
else:
fastq_out_fo.write(line)

def lift_sam(self,
sam_in_fn=None,
bam_in_fn=None,
sam_out_fn=None,
bam_out_fn=None,
):
assert sam_in_fn is not None or bam_in_fn is not None
assert sam_in_fn is None or bam_in_fn is None
assert sam_out_fn is not None or bam_out_fn is not None
assert sam_out_fn is None or bam_out_fn is None

if sam_in_fn is None:
in_mode="rb"
in_fn=bam_in_fn
else:
in_mode="r"
in_fn=sam_in_fn

if sam_out_fn is None:
out_mode="wb"
out_fn=bam_out_fn
else:
out_mode="wh"
out_fn=sam_out_fn

infile = pysam.AlignmentFile(in_fn, in_mode)
outfile = pysam.AlignmentFile(out_fn, out_mode, template=infile)
for s in infile:
s.qname=self.lift_rnf_name(s.qname)
outfile.write(s)
2 changes: 2 additions & 0 deletions rnftools/rnfformat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from .FqCreator import *
from .FqMerger import *
from .ReadTuple import *
from .RnfLifter import *
from .RnfProfile import *
from .Segment import *
from .Validator import *

Loading

0 comments on commit 79e1801

Please sign in to comment.