diff --git a/clipper/__init__.py b/clipper/__init__.py index e69de29..4257eb1 100644 --- a/clipper/__init__.py +++ b/clipper/__init__.py @@ -0,0 +1,32 @@ +import os + +def data_dir(): + """ + Returns the data directory that contains files for data and + documentation. + """ + return os.path.join(os.path.dirname(__file__), 'data') + + +def test_dir(): + + """ + Returns the data directory that contains example files for tests and + documentation. + """ + return os.path.join(os.path.dirname(__file__), 'test') + +def data_file(fn): + fn = os.path.join(data_dir(), fn) + + if not os.path.exists(fn): + raise ValueError("%s does not exist") + return fn + + +def test_file(fn): + fn = os.path.join(data_dir(), fn) + + if not os.path.exists(fn): + raise ValueError("%s does not exist") + return fn \ No newline at end of file diff --git a/clipper/__init__.pyc b/clipper/__init__.pyc index 909a48f..52a055f 100644 Binary files a/clipper/__init__.pyc and b/clipper/__init__.pyc differ diff --git a/clipper/data/hg19.AS.STRUCTURE_genes.BED.gz b/clipper/data/hg19.AS.STRUCTURE_genes.BED.gz index ff0ec2d..1f864ed 100755 Binary files a/clipper/data/hg19.AS.STRUCTURE_genes.BED.gz and b/clipper/data/hg19.AS.STRUCTURE_genes.BED.gz differ diff --git a/clipper/data/test.AS.STRUCTURE_genes.BED.gz b/clipper/data/test.AS.STRUCTURE_genes.BED.gz new file mode 100644 index 0000000..d3714e1 Binary files /dev/null and b/clipper/data/test.AS.STRUCTURE_genes.BED.gz differ diff --git a/clipper/data/test.AS.STRUCTURE_mRNA.lengths b/clipper/data/test.AS.STRUCTURE_mRNA.lengths new file mode 100644 index 0000000..43f783f --- /dev/null +++ b/clipper/data/test.AS.STRUCTURE_mRNA.lengths @@ -0,0 +1,5 @@ +ENSG00000232113 384 +ENSG00000228150 323 +ENSG00000223883 437 +ENSG00000135750 3141 +ENSG00000227280 212 diff --git a/clipper/data/test.AS.STRUCTURE_premRNA.lengths b/clipper/data/test.AS.STRUCTURE_premRNA.lengths new file mode 100644 index 0000000..e84fd59 --- /dev/null +++ b/clipper/data/test.AS.STRUCTURE_premRNA.lengths @@ -0,0 +1,5 @@ +ENSG00000232113 1147 +ENSG00000228150 3088 +ENSG00000223883 46051 +ENSG00000135750 35997 +ENSG00000227280 609 diff --git a/clipper/src/call_peak.py b/clipper/src/call_peak.py index ed9a611..8e0cc5c 100644 --- a/clipper/src/call_peak.py +++ b/clipper/src/call_peak.py @@ -315,7 +315,7 @@ def call_peaks(loc, gene_length, bam_fileobj=None, bam_file=None, """ #setup - chrom, gene_name, tx_start, tx_end, signstrand = loc.split("|") + chrom, gene_name, tx_start, tx_end, signstrand = loc #logic reading bam files @@ -436,7 +436,7 @@ def peaks_from_info(wiggle, pos_counts, lengths, loc, gene_length, #peak_dict['loc'] = loc #data munging - chrom, gene_name, tx_start, tx_end, signstrand = loc.split("|") + chrom, gene_name, tx_start, tx_end, signstrand = loc tx_start, tx_end = map(int, [tx_start, tx_end]) #used for poisson calclulation? diff --git a/clipper/src/peakfinder.py b/clipper/src/peakfinder.py index 25ce559..2d744d2 100755 --- a/clipper/src/peakfinder.py +++ b/clipper/src/peakfinder.py @@ -7,11 +7,10 @@ import time import pybedtools import gzip -import pkg_resources #import pp import math import multiprocessing - +import clipper from clipper.src.call_peak import call_peaks, poissonP import logging @@ -102,22 +101,21 @@ def build_geneinfo(bed): A dictionary with the key being the name position of the bed file and the values being the ordered bed file - TODO: Refactor to used bedtools instead - """ #opens bed file, either zipped or unzipped try: bedfile = gzip.open(bed, "rb") - except: + except IOError: bedfile = open(bed, "r") gene_info = dict() for line in bedfile.readlines(): - chromosome, start, stop, name, score, signstrand = line.strip().split("\t") - chromosome.replace("chr", "") - gene_info[name] = "|".join([chromosome, name, start, stop, str(signstrand)]) + chromosome, start, stop, name, score, signstrand = line.strip().split() + gene_info[name] = [chromosome, name, int(start), + int(stop), str(signstrand)] + bedfile.close() return gene_info @@ -136,15 +134,19 @@ def build_lengths(length_file): """ - handle = open(length_file, "r") - gene_lengths = {} - - for line in handle.readlines(): - name, gene_length = line.strip().split("\t") - gene_lengths[name] = int(gene_length) - - handle.close() - + try: + handle = open(length_file, "r") + gene_lengths = {} + + for line in handle.readlines(): + name, gene_length = line.strip().split("\t") + gene_lengths[name] = int(gene_length) + + handle.close() + + except TypeError: + raise ValueError("file %s not found" % length_file) + return gene_lengths @@ -177,17 +179,99 @@ def add_species(species, chrs, bed, mrna, premrna): par["mRNA"] = mrna par["premRNA"] = premrna return par - + def func_star(varables): """ covert f([1,2]) to f(1,2) """ return call_peaks(*varables) + +def get_acceptable_species(): + + """ + + Finds all species in data directory + + """ + + acceptable_species = set([]) + for fn in os.listdir(clipper.data_dir()): + fn = fn.split(".")[0] + + if fn == "__init__": + continue + + acceptable_species.add(fn) + + return acceptable_species + + +def build_transcript_data(species, gene_bed, gene_mrna, gene_pre_mrna, pre_mrna): + + """ + + Generates transcript data structures to call peaks on + + Allows for either predefined files (from the data directory) + or custom files + + Accepts species, and genebed, genemrnaand genepremrna options + + species - the species to run on + gene_bed - an abribtary bed file of locations to search for peaks (should be gene locations) + gene_mrna - the effective length of the mrna of a gene (unmappable regions removed) + gene_premrna - the effective length of the pre-mrna (unmappable regions removed) + + returns genes and lengths dict + + """ + + #error checking + + acceptable_species = get_acceptable_species() + if (species is None and + gene_bed is None and + (gene_mrna is None or gene_premrna is None)): + + raise ValueError("You must set either \"species\" or \"geneBed\"+\"geneMRNA\"+\"genePREMRNA\"") + + if species is not None and gene_bed is not None: + raise ValueError("You shouldn't set both geneBed and species, defaults exist for %s" % (acceptable_species)) + + #Now actually assign values + if species is not None: + try: + gene_bed = clipper.data_file(species + ".AS.STRUCTURE_genes.BED.gz") + gene_mrna = clipper.data_file(species + ".AS.STRUCTURE_mRNA.lengths") + gene_pre_mrna = clipper.data_file(species + ".AS.STRUCTURE_premRNA.lengths") + + except ValueError: + raise ValueError("Defaults don't exist for your species: %s. Please choose from: %s or supply \"geneBed\"+\"geneMRNA\"+\"genePREMRNA\"" % (species, acceptable_species)) + + #Selects mRNA or preMRNA lengths + if pre_mrna is True: + lenfile = gene_pre_mrna + else: + lenfile = gene_mrna + + if lenfile is None: + raise IOError("""didn't pass correct mRNA length file option + with given length file""") + + #builds dict to do processing on, + genes = build_geneinfo(gene_bed) + lengths = build_lengths(lenfile) + + + return genes, lengths + def main(options): if options.np == 'autodetect': options.np = multiprocessing.cpu_count() pool = multiprocessing.Pool(options.np) + #job_server = pp.Server(ncpus=options.np) #old pp stuff + bamfile = options.bam if os.path.exists(bamfile): @@ -198,57 +282,12 @@ def main(options): sys.stderr.write("Bam file not defined") raise IOError - species = options.species - #geneBed = options.geneBEDfile - #genemRNA = options.geneMRNAfile - #genePREmRNA = options.genePREMRNAfile - - species_parameters = dict() - - if species is None: #and geneBed is None: - print """You must set either \"species\" or - \"geneBed\"+\"geneMRNA\"+\"genePREMRNA\"""" - exit() - - #error checking - #if species is not None and geneBed is not None: - # print """You shouldn't set both geneBed and - #species, defaults exist for %s" % (acceptable_species)""" - # exit() - #if species is not None and species not in species_parameters: - # print "Defaults don't exist for your species: %s. Please choose from: %s or supply \"geneBed\"+\"geneMRNA\"+\"genePREMRNA\"" % (species, acceptable_species) - # exit() - - lenfile = "" - - species_parameters["hg19"] = add_species("hg19", [range(1, 22), "X", "Y"], - pkg_resources.resource_filename(__name__, "../data/hg19.AS.STRUCTURE_genes.BED.gz"), - pkg_resources.resource_filename(__name__, "../data/hg19.AS.STRUCTURE_mRNA.lengths"), - pkg_resources.resource_filename(__name__, "../data/hg19.AS.STRUCTURE_premRNA.lengths")) - species_parameters["hg18"] = add_species("hg18", [range(1, 22), "X", "Y"], - pkg_resources.resource_filename(__name__, "../data/hg18.AS.STRUCTURE_genes.BED.gz"), - pkg_resources.resource_filename(__name__, "../data/hg18.AS.STRUCTURE_mRNA.lengths"), - pkg_resources.resource_filename(__name__, "../data/hg18.AS.STRUCTURE_premRNA.lengths")) - species_parameters["mm9"] = add_species("mm9", [range(1, 19), "X", "Y"], - pkg_resources.resource_filename(__name__, "../data/mm9.AS.STRUCTURE_genes.BED.gz"), - pkg_resources.resource_filename(__name__, "../data/mm9.AS.STRUCTURE_mRNA.lengths"), - pkg_resources.resource_filename(__name__, "../data/mm9.AS.STRUCTURE_premRNA.lengths")) - #acceptable_species = ",".join(species_parameters.keys()) - - - #if species is None: - #species = "custom" - #species_parameters["custom"] = add_species("custom", [range(1,22), "X", "Y"], geneBed, genemRNA, genePREmRNA) ##warning: set to 1..22, for human, fix this in the future - - #Selects mRNA or preMRNA lengths - if options.premRNA is True: - lenfile = species_parameters[species]["premRNA"] - else: - lenfile = species_parameters[species]["mRNA"] + genes, lengths = build_transcript_data(options.species, + options.geneBEDfile, + options.geneMRNAfile, + options.genePREMRNAfile, + options.premRNA) - #builds dict to do processing on, - lengths = build_lengths(lenfile) - genes = build_geneinfo(species_parameters[species]["gene_bed"]) margin = int(options.margin) #this should be fixed, args should initally be ints if passed @@ -258,59 +297,30 @@ def main(options): minreads = int(options.minreads) poisson_cutoff = options.poisson_cutoff - gene = options.gene - - gene_list = list() - #gets all the genes to call peaks on - try: - if len(gene) > 0: - gene_list = gene - except: + if options.gene is not None and len(options.gene ) > 0: + gene_list = options.gene + else: #selects all genes gene_list = genes.keys() - results = [] - transcriptome_size = 0 - - #I think this calls peaks for each gene in the gene list, - #which could be every gene in the genome - running_list = [] - length_list = [] - - for gene_number, gene in enumerate(gene_list): - #again, hacky should be factored to a single if statement, - #need to be more explicit about code paths - if options.maxgenes == None: - pass - else: - if gene_number >= maxgenes: - break - - geneinfo = genes[gene] - - #There is a better way of doing timing. - start_time = time.strftime('%X %x %Z') - verboseprint(geneinfo + " started:" + str(start_time)) - transcriptome_size += lengths[gene] - #TODO make it so transcript size isn't always used - #this is a filter operation, should make it as such - running_list.append(genes[gene]) - length_list.append(lengths[gene]) - - verboseprint(lengths[gene]) - - #for debugging purposes, sometimes - #call_peaks(genes[gene], lengths[gene], None, bamfile, margin, options.FDR_alpha, options.threshold, - # minreads, poisson_cutoff, options.plotit, 10, 1000, options.SloP, False,) - - - combined_list = zip(running_list, length_list) + #Set up peak calling by gene + running_list = [genes[gene] for gene in gene_list] + length_list = [lengths[gene] for gene in gene_list] + + #truncates for max genes + if options.maxgenes is not None: + running_list = running_list[:maxgenes] + length_list = length_list[:maxgenes] + + transcriptome_size = sum(length_list) + + #do the parralization tasks = [(gene, length, None, bamfile, margin, options.FDR_alpha, options.threshold, minreads, poisson_cutoff, options.plotit, 10, 1000, options.SloP, False) - for gene, length in combined_list] + for gene, length in zip(running_list, length_list)] jobs = pool.map(func_star, tasks) #jobs = [job_server.submit(call_peaks, @@ -342,7 +352,8 @@ def main(options): if gene_result is not None: verboseprint("nreads", gene_result['nreads']) transcriptome_reads += gene_result['nreads'] - print "Transcriptome size is %d, transcriptome reads are %d" % (transcriptome_size, transcriptome_reads) + print """Transcriptome size is %d, transcriptome + reads are %d""" % (transcriptome_size, transcriptome_reads) #is this a missed indent? for gener in results: @@ -352,13 +363,25 @@ def main(options): for cluster in gener['clusters'].keys(): try: - transcriptome_p = poissonP(transcriptome_reads, gener['clusters'][cluster]['Nreads'], transcriptome_size, gener['clusters'][cluster]['size']) + transcriptome_p = poissonP(transcriptome_reads, + gener['clusters'][cluster]['Nreads'], + transcriptome_size, + gener['clusters'][cluster]['size']) if math.isnan(transcriptome_p): - print "Transcriptome P is NaN, transcriptome_reads = %d, cluster reads = %d, transcriptome_size = %d, cluster_size = %d" % (transcriptome_reads, gener['clusters'][cluster]['Nreads'], transcriptome_size, gener['clusters'][cluster]['size']) + print """Transcriptome P is NaN, transcriptome_reads = %d, + cluster reads = %d, transcriptome_size = %d, + cluster_size = %d""" % (transcriptome_reads, + gener['clusters'][cluster]['Nreads'], + transcriptome_size, + gener['clusters'][cluster]['size']) + continue if transcriptome_p > poisson_cutoff: - print "%s\n Failed Transcriptome cutoff with %s reads, pval: %s" % (cluster, gener['clusters'][cluster]['Nreads'], transcriptome_p) + print """%s\n Failed Transcriptome cutoff with %s reads, + pval: %s""" % (cluster, + gener['clusters'][cluster]['Nreads'], + transcriptome_p) continue min_pval = 1 @@ -412,13 +435,9 @@ def call_main(): parser.add_option("--species", "-s", dest="species", help="A species for your peak-finding, either hg19 or mm9") #we don't have custom scripts or documentation to support this right now, removing until those get added in - #parser.add_option("--customBED", dest="geneBEDfile", help="bed file to call peaks on, must come withOUT species and with customMRNA and customPREMRNA", metavar="BEDFILE") - #parser.add_option("--customMRNA", dest="geneMRNAfile", help="file with mRNA lengths for your bed file in format: GENENAMELEN", metavar="FILE") - #parser.add_option("--customPREMRNA", dest="genePREMRNAfile", help="file with pre-mRNA lengths for your bed file in format: GENENAMELEN", metavar="FILE") - parser.add_option("--customBED", dest="geneBEDfile", metavar="BEDFILE", help=SUPPRESS_HELP) - parser.add_option("--customMRNA", dest="geneMRNAfile", metavar="FILE", help=SUPPRESS_HELP) - parser.add_option("--customPREMRNA", dest="genePREMRNAfile", metavar="FILE", help=SUPPRESS_HELP) - + parser.add_option("--customBED", dest="geneBEDfile", help="bed file to call peaks on, must come withOUT species and with customMRNA and customPREMRNA", metavar="BEDFILE") + parser.add_option("--customMRNA", dest="geneMRNAfile", help="file with mRNA lengths for your bed file in format: GENENAMELEN", metavar="FILE") + parser.add_option("--customPREMRNA", dest="genePREMRNAfile", help="file with pre-mRNA lengths for your bed file in format: GENENAMELEN", metavar="FILE") parser.add_option("--outfile", "-o", dest="outfile", default="fitted_clusters", help="a bed file output, default:%default") parser.add_option("--gene", "-g", dest="gene", action="append", help="A specific gene you'd like try", metavar="GENENAME") parser.add_option("--minreads", dest="minreads", help="minimum reads required for a section to start the fitting process. Default:%default", default=3, type="int", metavar="NREADS") diff --git a/clipper/test/test_call_peak.py b/clipper/test/test_call_peak.py index 70f67ae..84edfce 100644 --- a/clipper/test/test_call_peak.py +++ b/clipper/test/test_call_peak.py @@ -10,12 +10,6 @@ from numpy.testing import * from scipy import interpolate -def verboseprint(*args): - # Print each argument separately so caller doesn't need to - # stuff everything to be printed into a single string - for arg in args: - print arg, - print class Test(unittest.TestCase): diff --git a/clipper/test/test_call_peak.pyc b/clipper/test/test_call_peak.pyc new file mode 100644 index 0000000..c5d3066 Binary files /dev/null and b/clipper/test/test_call_peak.pyc differ diff --git a/clipper/test/test_peakfinder.py b/clipper/test/test_peakfinder.py index 76a9863..99686a5 100644 --- a/clipper/test/test_peakfinder.py +++ b/clipper/test/test_peakfinder.py @@ -1,10 +1,9 @@ import unittest from clipper.src.peakfinder import * - +import pkg_resources class test_peakfinder(unittest.TestCase): parser = None - def setUp(self): """ @@ -87,36 +86,7 @@ def test_allup(self): #cleanup #os.remove(pkg_resources.resource_filename(__name__, "../src/peak_results.BED")) - - """ - def test_plotting(self): - args = ["-b", pkg_resources.resource_filename(__name__, "../test/allup_test.bam"), - "-s", "hg19", - "-g", "ENSG00000198901", - "--serial", - "--job_name=peak_test", - "--outfile=" + pkg_resources.resource_filename(__name__, "../src/peak_results"), - "-q", - "-p", - ] - - (options,args) = self.parser.parse_args(args) - main(options) - - tested = open(pkg_resources.resource_filename(__name__, "../src/peak_results.BED")) - correct = open(pkg_resources.resource_filename(__name__, "../test/peak_results.BED")) - - #problem with tracks being different - tested.next() - correct.next() - for test, correct in zip(tested, correct): - self.assertEqual(test, correct) - - #cleanup - os.remove(pkg_resources.resource_filename(__name__, "../src/peak_results.BED")) - """ - - + def test_check_overlaps(self): """ @@ -156,17 +126,16 @@ def test_trim_reads(self): Performs unit tests on trim_reads """ - - pass + #does standard test assuming no melformed input - #test_file = pkg_resources.resource_filename(__name__, "../test/allup_test.bam") + test_file = pkg_resources.resource_filename(__name__, "../test/allup_test.bam") #print type(test_file) - #outfile = trim_reads(test_file) - #correct = pysam.Samfile(pkg_resources.resource_filename(__name__, "../test/rmdup_test.bam")) - #test = pysam.Samfile(outfile) + outfile = trim_reads(test_file) + correct = pysam.Samfile(pkg_resources.resource_filename(__name__, "../test/rmdup_test.bam")) + test = pysam.Samfile(outfile) - #for t, c in zip(correct, test): - # assert t == c + for t, c in zip(correct, test): + assert t == c def test_check_for_index(self): @@ -211,7 +180,7 @@ def test_check_for_index(self): def test_build_geneinfo(self): - + self.maxDiff = 10000000 """ Performs unit testing on build_geneinfo @@ -219,9 +188,28 @@ def test_build_geneinfo(self): I'm hopefully going to remove this method soon so no testing for now """ - pass - + + #checks error mode + self.assertRaises(TypeError, build_geneinfo, None) + + self.assertRaises(IOError, build_geneinfo, "foo") + + #checks working mode + geneinfo = build_geneinfo( + clipper.data_file("test.AS.STRUCTURE_genes.BED.gz")) + + true_values = { + "ENSG00000232113" : ["chr1", "ENSG00000232113", 173604911, 173606273, "+"], + "ENSG00000228150" : ["chr1", "ENSG00000228150", 10002980, 10010032, "+"], + "ENSG00000223883" : ["chr1", "ENSG00000223883", 69521580, 69650686, "+"], + "ENSG00000135750" : ["chr1", "ENSG00000135750", 233749749, 233808258, "+"], + "ENSG00000227280" : ["chr1", "ENSG00000227280", 145373053, 145375554 ,"-"], + } + + + self.assertDictEqual(geneinfo, true_values) + def test_build_lengths(self): """ @@ -231,9 +219,25 @@ def test_build_lengths(self): I'm hopefully going to remove this method soon so no unit testing for now """ - pass - + + #Checks error mode + self.assertRaises(ValueError, build_lengths, None) + + self.assertRaises(IOError, build_lengths, "foo") + + #checks working mode + lengths = build_lengths( + clipper.data_file("test.AS.STRUCTURE_mRNA.lengths")) + true = {"ENSG00000232113" : 384, + "ENSG00000228150" : 323, + "ENSG00000223883" : 437, + "ENSG00000135750" : 3141, + "ENSG00000227280" : 212, + } + + self.assertDictEqual(lengths, true) + def test_add_species(self): """ @@ -254,8 +258,64 @@ def test_add_species(self): "gene_bed" : "foo", "mRNA" : "bar", "premRNA" : "baz"}) - - + + def test_get_acceptable_species(self): + + """ + + Test get acceptable species + + """ + + result = get_acceptable_species() + + #just a quick test to make sure it works, probably need to fix this + #later + self.assertSetEqual(result, set(["test", "hg19", "mm9", "hg18"])) + + + def test_build_transcript_data(self): + self.maxDiff = 10000000 + """ + + Tests building transcript data and returning the proper values + + Doesn't assume malformed data + + """ + + #tests error modes + self.assertRaises(ValueError, build_transcript_data, None, None, None, None, True) + + self.assertRaises(ValueError, build_transcript_data, "foo", "bar", "bar", "bar", True) + + self.assertRaises(ValueError, build_transcript_data, "bar", None, None, None, True) + + #tests hg19 to make sure its equal to logic + genes, lengths = build_transcript_data("test", None, None, None, True) + true_genes = build_geneinfo(clipper.data_file("test.AS.STRUCTURE_genes.BED.gz")) + true_lengths = build_lengths(clipper.data_file("test.AS.STRUCTURE_premRNA.lengths")) + + self.assertDictEqual(genes, true_genes) + self.assertDictEqual(lengths, true_lengths) + + #tests hg19 on premrna lengths + genes, lengths = build_transcript_data("test", None, None, None, False) + true_genes = build_geneinfo(clipper.data_file("test.AS.STRUCTURE_genes.BED.gz")) + true_lengths = build_lengths(clipper.data_file("test.AS.STRUCTURE_mRNA.lengths")) + + self.assertDictEqual(genes, true_genes) + self.assertDictEqual(lengths, true_lengths) + + #Test custom files + #this should all work, + self.assertRaises(IOError, build_transcript_data, None, clipper.data_file("test.AS.STRUCTURE_genes.BED.gz"), None, clipper.data_file("test.AS.STRUCTURE_premRNA.lengths"), False) + build_transcript_data(None, clipper.data_file("test.AS.STRUCTURE_genes.BED.gz"), clipper.data_file("test.AS.STRUCTURE_mRNA.lengths"), clipper.data_file("test.AS.STRUCTURE_premRNA.lengths"), True) + build_transcript_data(None, clipper.data_file("test.AS.STRUCTURE_genes.BED.gz"), clipper.data_file("test.AS.STRUCTURE_mRNA.lengths"), clipper.data_file("test.AS.STRUCTURE_premRNA.lengths"), False) + build_transcript_data(None, clipper.data_file("test.AS.STRUCTURE_genes.BED.gz"), clipper.data_file("test.AS.STRUCTURE_mRNA.lengths"), None, False) + build_transcript_data(None, clipper.data_file("test.AS.STRUCTURE_genes.BED.gz"), None, clipper.data_file("test.AS.STRUCTURE_mRNA.lengths"), True) + + def test_main(self): """ diff --git a/clipper/test/test_peakfinder.pyc b/clipper/test/test_peakfinder.pyc new file mode 100644 index 0000000..c7bbf0f Binary files /dev/null and b/clipper/test/test_peakfinder.pyc differ diff --git a/clipper/test/test_peaks.py b/clipper/test/test_peaks.py index 642d57e..e31462b 100644 --- a/clipper/test/test_peaks.py +++ b/clipper/test/test_peaks.py @@ -3,11 +3,14 @@ @author: gabrielp ''' + import unittest from clipper.src.peaks import find_sections, readsToWiggle_pysam, shuffle from numpy import ones import pysam -import pkg_resources +import clipper +import os + class Test(unittest.TestCase): """ @@ -180,7 +183,7 @@ def test_readsToWiggle_pysam_jxnsOnly(self): #assert 1 == 0 def test_readsToWiggle_pysam(self): - reads = pysam.Samfile(pkg_resources.resource_filename(__name__, "../test/allup_test.bam")) + reads = pysam.Samfile(os.path.join(clipper.test_dir(), "allup_test.bam")) reads = reads.fetch(region="chr15:91536649-91537641") wiggle, jxns, pos_counts, lengths, allreads = readsToWiggle_pysam(reads, 91537632, 91537675, '-', 'center', False) @@ -205,7 +208,7 @@ def test_readsToWiggle_pysam(self): assert lengths == [33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33] - reads = pysam.Samfile(pkg_resources.resource_filename(__name__, "../test/allup_test.bam")) + reads = pysam.Samfile(os.path.join(clipper.test_dir(), "allup_test.bam")) reads = reads.fetch(region="chr15:91536649-91537641") wiggle, jxns, pos_counts, lengths, allreads = readsToWiggle_pysam(reads, 91537632, 91537675, '-', 'center', True) wiggle_true = [0.06060606060606061, 0.06060606060606061, 0.06060606060606061, 0.06060606060606061, 0.06060606060606061, 0.06060606060606061, 0.06060606060606061, 0.06060606060606061, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.33333333333333326, 0.2727272727272727, 0.2727272727272727, 0.2727272727272727, 0.2727272727272727, 0.2727272727272727, 0.2727272727272727, 0.2727272727272727, 0.2727272727272727, 0.0, 0.0, 0.0] diff --git a/clipper/test/test_peaks.pyc b/clipper/test/test_peaks.pyc new file mode 100644 index 0000000..599a9db Binary files /dev/null and b/clipper/test/test_peaks.pyc differ diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..e69de29 diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..f6dfefc --- /dev/null +++ b/test.sh @@ -0,0 +1,6 @@ +#!/bin/bash +#My attempt at automating full testing, once clipper is installed will run a few different clipper full runs to see if things don't break + +#This will be mostly on oolite, and oolite specific (so we can run large files) + +clipper -b clipper/test/allup_test.bam -s hg19 \ No newline at end of file