diff --git a/CLAM/config.py b/CLAM/config.py index dcc32a5..711e924 100644 --- a/CLAM/config.py +++ b/CLAM/config.py @@ -3,6 +3,6 @@ """ General Version and other info """ -__version__ = '1.2.0-beta' +__version__ = '1.2.0' __author__ = 'Zijun Zhang' __email__ = 'zj.z@ucla.edu' \ No newline at end of file diff --git a/CLAM/download_data.py b/CLAM/download_data.py index e510ce8..3f8cbb8 100644 --- a/CLAM/download_data.py +++ b/CLAM/download_data.py @@ -1,7 +1,6 @@ import os import sys import subprocess -import peak_annotator def parser(args): @@ -45,7 +44,7 @@ def download_genome(genome): cmd.append('rm {genome}.zip'.format(genome=genome)) for item in cmd: subprocess.call(item, shell=True, executable='/bin/bash') - print 'Download finished' + print('Download finished') os.chdir(curr_dir) def check_genome_data(genome): diff --git a/CLAM/peak_annotator.py b/CLAM/peak_annotator.py index 8d5bf36..3907b23 100644 --- a/CLAM/peak_annotator.py +++ b/CLAM/peak_annotator.py @@ -3,7 +3,7 @@ import pybedtools import argparse as ap import logging -import download_data +from . import download_data, config ''' Assign peaks to genomic regions @@ -30,8 +30,8 @@ def parser(args): genome = args.genome out_file = args.out_file if 'CLAM_DAT' not in os.environ or not download_data.check_genome_data(genome): - print "Unable to locate CLAM data folder for genomic regions, will try to download." - print "Downloading..." + print("Unable to locate CLAM data folder for genomic regions, will try to download.") + print("Downloading...") download_data.download_genome(genome) genome_data = os.environ['CLAM_DAT'] intersect_gtf_regions( @@ -51,10 +51,10 @@ def intersect_gtf_regions(peak_fp, outfn, gtf_dir): # input arguments # make pybedtools objects - print "Loading peaks..." + print("Loading peaks...") peaks = pybedtools.BedTool(peak_fp) - print "Peak file loaded." - print "Loading genome annotation..." + print("Peak file loaded.") + print("Loading genome annotation...") ref_dict = { 'exon': pybedtools.BedTool(os.path.join(gtf_dir, 'exons.bed')), '3UTR': pybedtools.BedTool(os.path.join(gtf_dir, '3UTRs.bed')), @@ -64,7 +64,7 @@ def intersect_gtf_regions(peak_fp, outfn, gtf_dir): 'proximal200': pybedtools.BedTool(os.path.join(gtf_dir, 'proximal200_intron.bed')), 'proximal500': pybedtools.BedTool(os.path.join(gtf_dir, 'proximal500_intron.bed')) } - print "Genome annotation loaded." + print("Genome annotation loaded.") # # process reference for use target = { @@ -80,7 +80,7 @@ def intersect_gtf_regions(peak_fp, outfn, gtf_dir): 'other_exon', "px200_intron", "px500_intron", "distal_intron"] init = True - print "Intersecting peaks with genome annotation..." + print("Intersecting peaks with genome annotation...") for cat in category_list: bed_arr = [] for interval in target[cat]: @@ -99,10 +99,10 @@ def intersect_gtf_regions(peak_fp, outfn, gtf_dir): target[cat], wa=True, wb=True), postmerge=False) result_bed = result_bed.sort() - print "Preparing output..." + print("Preparing output...") result_bed.saveas(outfn + '_') prepend = ['## Annotation peaks to genomic regions, all intersected genomic regions are presented.', - '## CLAM version: 1.2.0', + '## CLAM version: %s'%config.__version__, '## Column 1: Peak chromosome', '## Column 2: Peak start', '## Column 3: Peak end', @@ -129,7 +129,7 @@ def intersect_gtf_regions(peak_fp, outfn, gtf_dir): os.system('cat {outtmp} >> {outfn}'.format( outtmp=outfn + '_', outfn=outfn)) os.remove(outfn+'_') - print "DONE" + print("DONE") if __name__ == '__main__': @@ -137,8 +137,8 @@ def intersect_gtf_regions(peak_fp, outfn, gtf_dir): os.chdir('/mnt/h/yi_lab/m6a/src/scripts/peakComposition') peak_in, genome, out_file = 'narrow_peak.unique.bed', 'mm10', 'annotate_peak.bed' if 'CLAM_DAT' not in os.environ or not download_data.check_genome_data(genome): - print "Unable to find CLAM data folder for genomic regions, please try to download it using download_genome command." - print "Downloading..." + print("Unable to find CLAM data folder for genomic regions, please try to download it using download_genome command.") + print("Downloading...") download_data.download_genome(genome) genome_data = os.environ['CLAM_DAT'] intersect_gtf_regions( diff --git a/CLAM/peakcaller.py b/CLAM/peakcaller.py index 0e9d37a..d512b7d 100755 --- a/CLAM/peakcaller.py +++ b/CLAM/peakcaller.py @@ -375,7 +375,7 @@ def call_gene_peak(bam_dict, gene, unique_only=False, with_control=False, binsiz ## "narrowPeak" format from ## https://genome.ucsc.edu/FAQ/FAQformat.html#format12 ## chr start end name 1000 strand signalValue pVal qVal peak - narrowPeak_formatter = "%s\t%i\t%i\t%s\t1000\t%s\t%.3f\t%.3e\t%.3e\t.\n" + narrowPeak_formatter = "%s\t%i\t%i\t%s\t1000\t%s\t%s\t%.3e\t%.3e\t.\n" BED = '' if len(fold_change)==1: lb = np.log(fold_change[0]) if with_control else fold_change[0] @@ -397,6 +397,8 @@ def call_gene_peak(bam_dict, gene, unique_only=False, with_control=False, binsiz strand = gene[3] peak_num += 1 peak_name = gene[4] + '-%i'%peak_num + if with_control: + signal = "%.3f"%(float(signal)) BED += narrowPeak_formatter % (chr, binstart, binend, peak_name, strand, signal, pval, qval) return BED diff --git a/bin/CLAM b/bin/CLAM index d93ce03..ef9fe85 100755 --- a/bin/CLAM +++ b/bin/CLAM @@ -65,17 +65,17 @@ def main(): #print args peakcaller.parser( args ) - elif subcommand == 'permutation_callpeak': + elif subcommand == 'permutation_callpeak': from CLAM import permutation_peakcaller permutation_peakcaller.parser( args ) - - elif subcommand == 'peak_annotator': - from CLAM import peak_annotator - peak_annotator.parser(args) - - elif subcommand == 'data_downloader': - from CLAM import download_data - download_data.parser(args) + + elif subcommand == 'peak_annotator': + from CLAM import peak_annotator + peak_annotator.parser(args) + + elif subcommand == 'data_downloader': + from CLAM import download_data + download_data.parser(args) def setup_logger(): @@ -131,12 +131,12 @@ def get_arg_parser(): # permutation_callpeak add_permutation_callpeak_parser(subparsers) - # peak_annotator - add_peak_annotator_parser(subparsers) + # peak_annotator + add_peak_annotator_parser(subparsers) + + # data_downloader + add_data_downloader_parser(subparsers) - # data_downloader - add_data_downloader_parser(subparsers) - return argparser @@ -293,31 +293,31 @@ def add_permutation_callpeak_parser( subparsers ): def add_peak_annotator_parser(subparsers): - ag_anno = subparsers.add_parser( - "peak_annotator", help="CLAM peak annotator: assign peaks to genomic regions") + ag_anno = subparsers.add_parser( + "peak_annotator", help="CLAM peak annotator: assign peaks to genomic regions") - # input/output - ag_anno.add_argument("-i", "--input", dest="peak_in", type=str, required=True, - help="Input peak file") + # input/output + ag_anno.add_argument("-i", "--input", dest="peak_in", type=str, required=True, + help="Input peak file") - ag_anno.add_argument("-g", "--genome", dest="genome", choices=('hg19', 'hg38', 'mm10'), type=str, required=True, - help="Genome version (hg19, hg38, mm10 avaiable)") + ag_anno.add_argument("-g", "--genome", dest="genome", choices=('hg19', 'hg38', 'mm10'), type=str, required=True, + help="Genome version (hg19, hg38, mm10 avaiable)") - ag_anno.add_argument("-o", "--out-file", dest="out_file", type=str, required=True, - help="Output file") + ag_anno.add_argument("-o", "--out-file", dest="out_file", type=str, required=True, + help="Output file") - return + return def add_data_downloader_parser(subparsers): - ag_down = subparsers.add_parser( - "data_downloader", help="CLAM data downloader: download data of genomic regions") + ag_down = subparsers.add_parser( + "data_downloader", help="CLAM data downloader: download data of genomic regions") - # input/output - ag_down.add_argument("-g", "--genome", dest="genome", choices=('hg19', 'hg38', 'mm10'), type=str, required=True, - help="Genome version (hg19, hg38, mm10 avaiable)") + # input/output + ag_down.add_argument("-g", "--genome", dest="genome", choices=('hg19', 'hg38', 'mm10'), type=str, required=True, + help="Genome version (hg19, hg38, mm10 avaiable)") - return + return diff --git a/setup.py b/setup.py index d67593c..5c3ff2f 100755 --- a/setup.py +++ b/setup.py @@ -1,11 +1,11 @@ #!/usr/bin/env python from setuptools import setup - +from CLAM.config import __version__ def main(): setup(name='CLAM', - version='1.2.0-beta', + version=__version__, description='CLIP-seq Analysis of Multi-mapped reads', author='Zijun Zhang', author_email='zj.z@ucla.edu', @@ -13,8 +13,14 @@ def main(): packages=['CLAM', 'CLAM.stats'], scripts=['bin/CLAM'], install_requires=[ - #'pysam>0.12,<0.2', - 'numpy'] + 'scipy', + 'pysam', + 'numpy', + 'multiprocessing', + 'statsmodels', + 'tqdm', + 'pybedtools' + 'mpmath'] ) return