Skip to content

Commit

Permalink
merged for py3 compatible
Browse files Browse the repository at this point in the history
  • Loading branch information
zj-zhang committed Jul 23, 2019
1 parent 66b2996 commit e55fe4f
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 51 deletions.
2 changes: 1 addition & 1 deletion CLAM/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
""" General Version and other info
"""

__version__ = '1.2.0-beta'
__version__ = '1.2.0'
__author__ = 'Zijun Zhang'
__email__ = 'zj.z@ucla.edu'
3 changes: 1 addition & 2 deletions CLAM/download_data.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os
import sys
import subprocess
import peak_annotator


def parser(args):
Expand Down Expand Up @@ -45,7 +44,7 @@ def download_genome(genome):
cmd.append('rm {genome}.zip'.format(genome=genome))
for item in cmd:
subprocess.call(item, shell=True, executable='/bin/bash')
print 'Download finished'
print('Download finished')
os.chdir(curr_dir)

def check_genome_data(genome):
Expand Down
26 changes: 13 additions & 13 deletions CLAM/peak_annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pybedtools
import argparse as ap
import logging
import download_data
from . import download_data, config

'''
Assign peaks to genomic regions
Expand All @@ -30,8 +30,8 @@ def parser(args):
genome = args.genome
out_file = args.out_file
if 'CLAM_DAT' not in os.environ or not download_data.check_genome_data(genome):
print "Unable to locate CLAM data folder for genomic regions, will try to download."
print "Downloading..."
print("Unable to locate CLAM data folder for genomic regions, will try to download.")
print("Downloading...")
download_data.download_genome(genome)
genome_data = os.environ['CLAM_DAT']
intersect_gtf_regions(
Expand All @@ -51,10 +51,10 @@ def intersect_gtf_regions(peak_fp, outfn, gtf_dir):
# input arguments

# make pybedtools objects
print "Loading peaks..."
print("Loading peaks...")
peaks = pybedtools.BedTool(peak_fp)
print "Peak file loaded."
print "Loading genome annotation..."
print("Peak file loaded.")
print("Loading genome annotation...")
ref_dict = {
'exon': pybedtools.BedTool(os.path.join(gtf_dir, 'exons.bed')),
'3UTR': pybedtools.BedTool(os.path.join(gtf_dir, '3UTRs.bed')),
Expand All @@ -64,7 +64,7 @@ def intersect_gtf_regions(peak_fp, outfn, gtf_dir):
'proximal200': pybedtools.BedTool(os.path.join(gtf_dir, 'proximal200_intron.bed')),
'proximal500': pybedtools.BedTool(os.path.join(gtf_dir, 'proximal500_intron.bed'))
}
print "Genome annotation loaded."
print("Genome annotation loaded.")

# # process reference for use
target = {
Expand All @@ -80,7 +80,7 @@ def intersect_gtf_regions(peak_fp, outfn, gtf_dir):
'other_exon', "px200_intron", "px500_intron", "distal_intron"]
init = True

print "Intersecting peaks with genome annotation..."
print("Intersecting peaks with genome annotation...")
for cat in category_list:
bed_arr = []
for interval in target[cat]:
Expand All @@ -99,10 +99,10 @@ def intersect_gtf_regions(peak_fp, outfn, gtf_dir):
target[cat], wa=True, wb=True), postmerge=False)
result_bed = result_bed.sort()

print "Preparing output..."
print("Preparing output...")
result_bed.saveas(outfn + '_')
prepend = ['## Annotation peaks to genomic regions, all intersected genomic regions are presented.',
'## CLAM version: 1.2.0',
'## CLAM version: %s'%config.__version__,
'## Column 1: Peak chromosome',
'## Column 2: Peak start',
'## Column 3: Peak end',
Expand All @@ -129,16 +129,16 @@ def intersect_gtf_regions(peak_fp, outfn, gtf_dir):
os.system('cat {outtmp} >> {outfn}'.format(
outtmp=outfn + '_', outfn=outfn))
os.remove(outfn+'_')
print "DONE"
print("DONE")


if __name__ == '__main__':
# peak_fp, genome, outfn = sys.argv[1], sys.argv[2], sys.argv[3]
os.chdir('/mnt/h/yi_lab/m6a/src/scripts/peakComposition')
peak_in, genome, out_file = 'narrow_peak.unique.bed', 'mm10', 'annotate_peak.bed'
if 'CLAM_DAT' not in os.environ or not download_data.check_genome_data(genome):
print "Unable to find CLAM data folder for genomic regions, please try to download it using download_genome command."
print "Downloading..."
print("Unable to find CLAM data folder for genomic regions, please try to download it using download_genome command.")
print("Downloading...")
download_data.download_genome(genome)
genome_data = os.environ['CLAM_DAT']
intersect_gtf_regions(
Expand Down
4 changes: 3 additions & 1 deletion CLAM/peakcaller.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ def call_gene_peak(bam_dict, gene, unique_only=False, with_control=False, binsiz
## "narrowPeak" format from
## https://genome.ucsc.edu/FAQ/FAQformat.html#format12
## chr start end name 1000 strand signalValue pVal qVal peak
narrowPeak_formatter = "%s\t%i\t%i\t%s\t1000\t%s\t%.3f\t%.3e\t%.3e\t.\n"
narrowPeak_formatter = "%s\t%i\t%i\t%s\t1000\t%s\t%s\t%.3e\t%.3e\t.\n"
BED = ''
if len(fold_change)==1:
lb = np.log(fold_change[0]) if with_control else fold_change[0]
Expand All @@ -397,6 +397,8 @@ def call_gene_peak(bam_dict, gene, unique_only=False, with_control=False, binsiz
strand = gene[3]
peak_num += 1
peak_name = gene[4] + '-%i'%peak_num
if with_control:
signal = "%.3f"%(float(signal))
BED += narrowPeak_formatter % (chr, binstart, binend, peak_name, strand, signal, pval, qval)
return BED

Expand Down
60 changes: 30 additions & 30 deletions bin/CLAM
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,17 @@ def main():
#print args
peakcaller.parser( args )

elif subcommand == 'permutation_callpeak':
elif subcommand == 'permutation_callpeak':
from CLAM import permutation_peakcaller
permutation_peakcaller.parser( args )
elif subcommand == 'peak_annotator':
from CLAM import peak_annotator
peak_annotator.parser(args)
elif subcommand == 'data_downloader':
from CLAM import download_data
download_data.parser(args)

elif subcommand == 'peak_annotator':
from CLAM import peak_annotator
peak_annotator.parser(args)

elif subcommand == 'data_downloader':
from CLAM import download_data
download_data.parser(args)


def setup_logger():
Expand Down Expand Up @@ -131,12 +131,12 @@ def get_arg_parser():
# permutation_callpeak
add_permutation_callpeak_parser(subparsers)

# peak_annotator
add_peak_annotator_parser(subparsers)
# peak_annotator
add_peak_annotator_parser(subparsers)

# data_downloader
add_data_downloader_parser(subparsers)

# data_downloader
add_data_downloader_parser(subparsers)

return argparser


Expand Down Expand Up @@ -293,31 +293,31 @@ def add_permutation_callpeak_parser( subparsers ):


def add_peak_annotator_parser(subparsers):
ag_anno = subparsers.add_parser(
"peak_annotator", help="CLAM peak annotator: assign peaks to genomic regions")
ag_anno = subparsers.add_parser(
"peak_annotator", help="CLAM peak annotator: assign peaks to genomic regions")

# input/output
ag_anno.add_argument("-i", "--input", dest="peak_in", type=str, required=True,
help="Input peak file")
# input/output
ag_anno.add_argument("-i", "--input", dest="peak_in", type=str, required=True,
help="Input peak file")

ag_anno.add_argument("-g", "--genome", dest="genome", choices=('hg19', 'hg38', 'mm10'), type=str, required=True,
help="Genome version (hg19, hg38, mm10 avaiable)")
ag_anno.add_argument("-g", "--genome", dest="genome", choices=('hg19', 'hg38', 'mm10'), type=str, required=True,
help="Genome version (hg19, hg38, mm10 avaiable)")

ag_anno.add_argument("-o", "--out-file", dest="out_file", type=str, required=True,
help="Output file")
ag_anno.add_argument("-o", "--out-file", dest="out_file", type=str, required=True,
help="Output file")

return
return


def add_data_downloader_parser(subparsers):
ag_down = subparsers.add_parser(
"data_downloader", help="CLAM data downloader: download data of genomic regions")
ag_down = subparsers.add_parser(
"data_downloader", help="CLAM data downloader: download data of genomic regions")

# input/output
ag_down.add_argument("-g", "--genome", dest="genome", choices=('hg19', 'hg38', 'mm10'), type=str, required=True,
help="Genome version (hg19, hg38, mm10 avaiable)")
# input/output
ag_down.add_argument("-g", "--genome", dest="genome", choices=('hg19', 'hg38', 'mm10'), type=str, required=True,
help="Genome version (hg19, hg38, mm10 avaiable)")

return
return



Expand Down
14 changes: 10 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,26 @@
#!/usr/bin/env python

from setuptools import setup

from CLAM.config import __version__

def main():
setup(name='CLAM',
version='1.2.0-beta',
version=__version__,
description='CLIP-seq Analysis of Multi-mapped reads',
author='Zijun Zhang',
author_email='zj.z@ucla.edu',
url='https://github.com/Xinglab/CLAM',
packages=['CLAM', 'CLAM.stats'],
scripts=['bin/CLAM'],
install_requires=[
#'pysam>0.12,<0.2',
'numpy']
'scipy',
'pysam',
'numpy',
'multiprocessing',
'statsmodels',
'tqdm',
'pybedtools'
'mpmath']
)
return

Expand Down

0 comments on commit e55fe4f

Please sign in to comment.