diff --git a/circlator/bamfilter.py b/circlator/bamfilter.py index 971c7d9..35194d6 100644 --- a/circlator/bamfilter.py +++ b/circlator/bamfilter.py @@ -12,6 +12,9 @@ def __init__( bam, outprefix, length_cutoff=100000, + min_read_length=250, + contigs_to_use=None, + discard_unmapped=False, log_prefix='[bamfilter]', ): self.bam = os.path.abspath(bam) @@ -22,6 +25,9 @@ def __init__( self.reads_fa = os.path.abspath(outprefix + '.fasta') self.log = os.path.abspath(outprefix + '.log') self.log_prefix = log_prefix + self.contigs_to_use = self._get_contigs_to_use(contigs_to_use) + self.discard_unmapped = discard_unmapped + self.min_read_length = min_read_length def _get_ref_lengths(self): @@ -30,6 +36,33 @@ def _get_ref_lengths(self): return dict(zip(sam_reader.references, sam_reader.lengths)) + def _get_contigs_to_use(self, contigs_to_use): + '''If contigs_to_use is a set, returns that set. If it's None, returns an empty set. + Otherwise, assumes it's a file name, and gets names from the file''' + if type(contigs_to_use) == set: + return contigs_to_use + elif contigs_to_use is None: + return set() + else: + f = pyfastaq.utils.open_file_read(contigs_to_use) + contigs_to_use = set([line.rstrip() for line in f]) + pyfastaq.utils.close(f) + return contigs_to_use + + + def _check_contigs_to_use(self, ref_dict): + '''Checks that the set of contigs to use are all in the reference + fasta lengths dict made by self._get_ref_lengths()''' + if self.contigs_to_use is None: + return True + + for contig in self.contigs_to_use: + if contig not in ref_dict: + raise Error('Requested to use contig "' + contig + '", but not found in input BAM file "' + self.bam + '"') + + return True + + def _all_reads_from_contig(self, contig, fout): '''Gets all reads from contig called "contig" and writes to fout''' sam_reader = pysam.Samfile(self.bam, "rb") @@ -97,7 +130,7 @@ def _get_region(self, contig, start, end, fout, min_length=250): if read.is_reverse: seq.revcomp() - + if len(seq) >= min_length: print(seq, file=fout) @@ -110,6 +143,10 @@ def run(self): print(self.log_prefix, '#contig', 'length', 'reads_kept', sep='\t', file=f_log) for contig in sorted(ref_lengths): + if len(self.contigs_to_use) > 0 and contig not in self.contigs_to_use: + print(self.log_prefix, contig, ref_lengths[contig], 'skipping', sep='\t', file=f_log) + continue + if ref_lengths[contig] <= self.length_cutoff: self._all_reads_from_contig(contig, f_fa) print(self.log_prefix, contig, ref_lengths[contig], 'all', sep='\t', file=f_log) @@ -117,8 +154,8 @@ def run(self): end_bases_keep = int(0.5 * self.length_cutoff) start = end_bases_keep - 1 end = max(end_bases_keep - 1, ref_lengths[contig] - end_bases_keep) - self._get_region(contig, 0, start, f_fa) - self._get_region(contig, end, ref_lengths[contig], f_fa) + self._get_region(contig, 0, start, f_fa, min_length=self.min_read_length) + self._get_region(contig, end, ref_lengths[contig], f_fa, min_length=self.min_read_length) print( self.log_prefix, contig, @@ -128,6 +165,8 @@ def run(self): file=f_log ) - self._get_all_unmapped_reads(f_fa) + if not self.discard_unmapped: + self._get_all_unmapped_reads(f_fa) + pyfastaq.utils.close(f_fa) pyfastaq.utils.close(f_log) diff --git a/circlator/common.py b/circlator/common.py index 27f5600..224b2d6 100644 --- a/circlator/common.py +++ b/circlator/common.py @@ -1,7 +1,10 @@ import sys +import os import subprocess -version = '0.14.1' +class Error (Exception): pass + +version = '0.15.0' def syscall(cmd, allow_fail=False, verbose=False): if verbose: @@ -36,3 +39,12 @@ def decode(x): except: return x return s + + +def check_files_exist(filenames): + '''Dies if any files in the list of filenames does not exist''' + files_not_found = [x for x in filenames if not os.path.exists(x)] + if len(files_not_found): + for filename in files_not_found: + print('File not found: "', filename, '"', sep='', file=sys.stderr) + raise Error('File(s) not found. Cannot continue') diff --git a/circlator/tasks/all.py b/circlator/tasks/all.py index da4ac36..6f4e0b5 100644 --- a/circlator/tasks/all.py +++ b/circlator/tasks/all.py @@ -26,7 +26,10 @@ def run(): mapreads_group.add_argument('--bwa_opts', help='BWA options, in quotes [%(default)s]', default='-x pacbio', metavar='STRING') bam2reads_group = parser.add_argument_group('bam2reads options') + bam2reads_group.add_argument('--b2r_discard_unmapped', action='store_true', help='Use this to not keep unmapped reads') + bam2reads_group.add_argument('--b2r_only_contigs', help='File of contig names (one per line). Only reads that map to these contigs are kept (and unmapped reads, unless --b2r_discard_unmapped is used). Note: the whole assembly is still used as a reference when mapping', metavar='FILENAME') bam2reads_group.add_argument('--b2r_length_cutoff', type=int, help='All reads mapped to contigs shorter than this will be kept [%(default)s]', default=100000, metavar='INT') + bam2reads_group.add_argument('--b2r_min_read_length', type=int, help='Minimum length of read to output [%(default)s]', default=250, metavar='INT') assemble_group = parser.add_argument_group('assemble options') assemble_group.add_argument('--assemble_spades_k', help='Comma separated list of kmers to use when running SPAdes. Max kmer is 127 and each kmer should be an odd integer [%(default)s]', default='127,121,111,101,95,91,85,81,75,71', metavar='k1,k2,k3,...') @@ -58,9 +61,20 @@ def run(): print_message('{:_^79}'.format(' Checking external programs '), options) circlator.external_progs.check_all_progs(verbose=options.verbose) + files_to_check = [options.assembly, options.reads] + if options.b2r_only_contigs: + files_to_check.append(options.b2r_only_contigs) + options.b2r_only_contigs = os.path.abspath(options.b2r_only_contigs) + + if options.genes_fa: + files_to_check.append(options.genes_fa) + + circlator.common.check_files_exist(files_to_check) + original_assembly = os.path.abspath(options.assembly) original_reads = os.path.abspath(options.reads) + try: os.mkdir(options.outdir) except: @@ -69,6 +83,7 @@ def run(): os.chdir(options.outdir) + original_assembly_renamed = '00.input_assembly.fasta' bam = '01.mapreads.bam' filtered_reads_prefix = '02.bam2reads' filtered_reads = filtered_reads_prefix + '.fasta' @@ -81,11 +96,17 @@ def run(): fixstart_prefix = '06.fixstart' fixstart_fasta = fixstart_prefix + '.fasta' + pyfastaq.tasks.to_fasta( + original_assembly, + original_assembly_renamed, + strip_after_first_whitespace=True, + check_unique=True + ) #-------------------------------- mapreads ------------------------------- print_message('{:_^79}'.format(' Running mapreads '), options) circlator.mapping.bwa_mem( - original_assembly, + original_assembly_renamed, original_reads, bam, threads=options.threads, @@ -99,7 +120,10 @@ def run(): bam_filter = circlator.bamfilter.BamFilter( bam, filtered_reads_prefix, - length_cutoff=options.b2r_length_cutoff + length_cutoff=options.b2r_length_cutoff, + min_read_length=options.b2r_min_read_length, + contigs_to_use=options.b2r_only_contigs, + discard_unmapped=options.b2r_discard_unmapped, ) bam_filter.run() @@ -116,6 +140,15 @@ def run(): a.run() + #------------------------------ filter original assembly ----------------- + if options.b2r_only_contigs: + print_message('{:_^79}'.format(' --b2r_only_contigs used - filering contigs '), options) + assembly_to_use = merge_prefix + '.00.filtered_assembly.fa' + pyfastaq.tasks.filter(original_assembly_renamed, assembly_to_use, ids_file=options.b2r_only_contigs) + else: + assembly_to_use = original_assembly_renamed + + #-------------------------------- merge ---------------------------------- print_message('{:_^79}'.format(' Running merge '), options) if not options.no_pair_merge: @@ -125,7 +158,7 @@ def run(): options.merge_opts.extend(['--reads', filtered_reads]) m = circlator.merge.Merger( - original_assembly, + assembly_to_use, reassembly, merge_prefix, nucmer_diagdiff=options.merge_diagdiff, diff --git a/circlator/tasks/bam2reads.py b/circlator/tasks/bam2reads.py index b30392c..039aa88 100644 --- a/circlator/tasks/bam2reads.py +++ b/circlator/tasks/bam2reads.py @@ -7,7 +7,10 @@ def run(): parser = argparse.ArgumentParser( description = 'Make reads from mapping to be reassembled', usage = 'circlator bam2reads [options] ') + parser.add_argument('--discard_unmapped', action='store_true', help='Use this to not keep unmapped reads') + parser.add_argument('--only_contigs', help='File of contig names (one per line). Only reads that map to these contigs are kept (and unmapped reads, unless --discard_unmapped is used).', metavar='FILENAME') parser.add_argument('--length_cutoff', type=int, help='All reads mapped to contigs shorter than this will be kept [%(default)s]', default=100000, metavar='INT') + parser.add_argument('--min_read_length', type=int, help='Minimum length of read to output [%(default)s]', default=250, metavar='INT') parser.add_argument('bam', help='Name of input bam file', metavar='in.bam') parser.add_argument('outprefix', help='Prefix of output filenames') options = parser.parse_args() @@ -15,7 +18,10 @@ def run(): bam_filter = circlator.bamfilter.BamFilter( options.bam, options.outprefix, - length_cutoff=options.length_cutoff + length_cutoff=options.length_cutoff, + min_read_length=options.min_read_length, + contigs_to_use=options.only_contigs, + discard_unmapped=options.discard_unmapped, ) bam_filter.run() diff --git a/circlator/tests/bamfilter_test.py b/circlator/tests/bamfilter_test.py index 83ba2de..b7336d2 100644 --- a/circlator/tests/bamfilter_test.py +++ b/circlator/tests/bamfilter_test.py @@ -1,7 +1,7 @@ import unittest import filecmp import os -import pyfastaq +import pyfastaq from circlator import bamfilter modules_dir = os.path.dirname(os.path.abspath(bamfilter.__file__)) @@ -20,11 +20,38 @@ def test_get_ref_lengths(self): self.assertEqual(expected, b._get_ref_lengths()) + def test_get_contigs_to_use(self): + '''test _get_contigs_to_use''' + b = bamfilter.BamFilter(os.path.join(data_dir, 'bamfilter_test_get_contigs_to_use.bam'), 'out') + test_file = os.path.join(data_dir, 'bamfilter_test_get_contigs_to_use.infile') + self.assertEqual(b._get_contigs_to_use(test_file), {'contig42', 'contig4444244'}) + self.assertEqual(b._get_contigs_to_use(None), set()) + self.assertEqual(b._get_contigs_to_use({'42', '43'}), {'42', '43'}) + + + def test_check_contigs_to_use(self): + '''test _check_contigs_to_use''' + input_bam = os.path.join(data_dir, 'bamfilter_test_check_contigs_to_use.bam') + b = bamfilter.BamFilter(input_bam, 'out') + ref_lengths = b._get_ref_lengths() + self.assertTrue(b._check_contigs_to_use(ref_lengths)) + + b = bamfilter.BamFilter(input_bam, 'out', contigs_to_use={'1'}) + self.assertTrue(b._check_contigs_to_use(ref_lengths)) + + b = bamfilter.BamFilter(input_bam, 'out', contigs_to_use={'1', '2'}) + self.assertTrue(b._check_contigs_to_use(ref_lengths)) + + with self.assertRaises(bamfilter.Error): + b = bamfilter.BamFilter(input_bam, 'out', contigs_to_use={'42'}) + self.assertTrue(b._check_contigs_to_use(ref_lengths)) + + def test_all_reads_from_contig(self): '''test _all_reads_from_contig''' b = bamfilter.BamFilter(os.path.join(data_dir, 'bamfilter_test_all_reads_from_contig.bam'), 'out') tmp = 'tmp.test_all_reads_from_contig.out.fa' - f = pyfastaq.utils.open_file_write(tmp) + f = pyfastaq.utils.open_file_write(tmp) expected = os.path.join(data_dir, 'bamfilter_test_all_reads_from_contig.reads.fa') b._all_reads_from_contig('1', f) pyfastaq.utils.close(f) @@ -37,7 +64,7 @@ def test_get_all_unmapped_reads(self): b = bamfilter.BamFilter(os.path.join(data_dir, 'bamfilter_test_get_all_unmapped_reads.bam'), 'out') expected = os.path.join(data_dir, 'bamfilter_test_get_all_unmapped_reads.reads.fa') tmp = 'tmp.test_get_all_unmapped_reads.out.fa' - f = pyfastaq.utils.open_file_write(tmp) + f = pyfastaq.utils.open_file_write(tmp) b._get_all_unmapped_reads(f) pyfastaq.utils.close(f) self.assertTrue(filecmp.cmp(expected, tmp, shallow=False)) @@ -49,10 +76,10 @@ def test_break_reads(self): b = bamfilter.BamFilter(os.path.join(data_dir, 'bamfilter_test_break_reads.bam'), 'out') expected = os.path.join(data_dir, 'bamfilter_test_break_reads.broken_reads.fa') tmp = 'tmp.test_break_reads.out.fa' - f = pyfastaq.utils.open_file_write(tmp) + f = pyfastaq.utils.open_file_write(tmp) b._break_reads('contig1', 390, f, min_read_length=5) pyfastaq.utils.close(f) - self.assertTrue(filecmp.cmp(expected, tmp)) + self.assertTrue(filecmp.cmp(expected, tmp, shallow=False)) os.unlink(tmp) @@ -61,10 +88,10 @@ def test_exclude_region(self): b = bamfilter.BamFilter(os.path.join(data_dir, 'bamfilter_test_exclude_region.bam'), 'out') expected = os.path.join(data_dir, 'bamfilter_test_exclude_region.reads.fa') tmp = 'tmp.test_exclude_reads.out.fa' - f = pyfastaq.utils.open_file_write(tmp) + f = pyfastaq.utils.open_file_write(tmp) b._exclude_region('1', 500, 700, f) pyfastaq.utils.close(f) - self.assertTrue(filecmp.cmp(expected, tmp)) + self.assertTrue(filecmp.cmp(expected, tmp, shallow=False)) os.unlink(tmp) @@ -76,7 +103,7 @@ def test_get_region_start(self): f = pyfastaq.utils.open_file_write(tmp) b._get_region('1', 0, 64, f, min_length=20) pyfastaq.utils.close(f) - self.assertTrue(filecmp.cmp(expected, tmp)) + self.assertTrue(filecmp.cmp(expected, tmp, shallow=False)) os.unlink(tmp) @@ -88,5 +115,41 @@ def test_get_region_end(self): f = pyfastaq.utils.open_file_write(tmp) b._get_region('2', 379, 499, f, min_length=20) pyfastaq.utils.close(f) - self.assertTrue(filecmp.cmp(expected, tmp)) + self.assertTrue(filecmp.cmp(expected, tmp, shallow=False)) os.unlink(tmp) + + + def test_run_keep_unmapped(self): + '''test run keep unmapped''' + outprefix = 'tmp.bamfilter_run' + b = bamfilter.BamFilter( + os.path.join(data_dir, 'bamfilter_test_run.bam'), + outprefix, + length_cutoff=600, + min_read_length=100, + contigs_to_use={'contig1', 'contig3', 'contig4'} + ) + b.run() + expected = os.path.join(data_dir, 'bamfilter_test_run_keep_unmapped.out.reads.fa') + self.assertTrue(filecmp.cmp(expected, outprefix + '.fasta', shallow=False)) + os.unlink(outprefix + '.fasta') + os.unlink(outprefix + '.log') + + + def test_run_discard_unmapped(self): + '''test run keep unmapped''' + outprefix = 'tmp.bamfilter_run' + b = bamfilter.BamFilter( + os.path.join(data_dir, 'bamfilter_test_run.bam'), + outprefix, + length_cutoff=600, + min_read_length=100, + contigs_to_use={'contig1', 'contig3', 'contig4'}, + discard_unmapped=True + ) + b.run() + expected = os.path.join(data_dir, 'bamfilter_test_run_discard_unmapped.out.reads.fa') + self.assertTrue(filecmp.cmp(expected, outprefix + '.fasta', shallow=False)) + os.unlink(outprefix + '.fasta') + os.unlink(outprefix + '.log') + diff --git a/circlator/tests/common_test.py b/circlator/tests/common_test.py new file mode 100644 index 0000000..8466a14 --- /dev/null +++ b/circlator/tests/common_test.py @@ -0,0 +1,15 @@ +import unittest +import os +from circlator import common + +modules_dir = os.path.dirname(os.path.abspath(common.__file__)) +data_dir = os.path.join(modules_dir, 'tests', 'data') + + +class TestCommon(unittest.TestCase): + def test_check_files_exist(self): + '''test check_files_exist''' + file_exists = os.path.join(data_dir, 'common_test_file_exists') + common.check_files_exist([file_exists]) + with self.assertRaises(common.Error): + common.check_files_exist([file_exists, 'thisisnotafileandshouldcauseanerror']) diff --git a/circlator/tests/data/bamfilter_test_check_contigs_to_use.bam b/circlator/tests/data/bamfilter_test_check_contigs_to_use.bam new file mode 100644 index 0000000..e71ba3f Binary files /dev/null and b/circlator/tests/data/bamfilter_test_check_contigs_to_use.bam differ diff --git a/circlator/tests/data/bamfilter_test_check_contigs_to_use.bam.bai b/circlator/tests/data/bamfilter_test_check_contigs_to_use.bam.bai new file mode 100644 index 0000000..cbdbb02 Binary files /dev/null and b/circlator/tests/data/bamfilter_test_check_contigs_to_use.bam.bai differ diff --git a/circlator/tests/data/bamfilter_test_get_contigs_to_use.bam b/circlator/tests/data/bamfilter_test_get_contigs_to_use.bam new file mode 100644 index 0000000..e69de29 diff --git a/circlator/tests/data/bamfilter_test_get_contigs_to_use.infile b/circlator/tests/data/bamfilter_test_get_contigs_to_use.infile new file mode 100644 index 0000000..1f47037 --- /dev/null +++ b/circlator/tests/data/bamfilter_test_get_contigs_to_use.infile @@ -0,0 +1,2 @@ +contig42 +contig4444244 diff --git a/circlator/tests/data/bamfilter_test_run.bam b/circlator/tests/data/bamfilter_test_run.bam new file mode 100644 index 0000000..49218cd Binary files /dev/null and b/circlator/tests/data/bamfilter_test_run.bam differ diff --git a/circlator/tests/data/bamfilter_test_run.bam.bai b/circlator/tests/data/bamfilter_test_run.bam.bai new file mode 100644 index 0000000..cdfcf53 Binary files /dev/null and b/circlator/tests/data/bamfilter_test_run.bam.bai differ diff --git a/circlator/tests/data/bamfilter_test_run.make_reads_and_bam.sh b/circlator/tests/data/bamfilter_test_run.make_reads_and_bam.sh new file mode 100644 index 0000000..520818c --- /dev/null +++ b/circlator/tests/data/bamfilter_test_run.make_reads_and_bam.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash -e +reads=bamfilter_test_run.reads.fa +ref=bamfilter_test_run.ref.fa +samtools faidx bamfilter_test_run.ref.fa contig1:1-100 > $reads +samtools faidx bamfilter_test_run.ref.fa contig1:301-400 >> $reads +samtools faidx bamfilter_test_run.ref.fa contig2:1-100 >> $reads +samtools faidx bamfilter_test_run.ref.fa contig4:1-100 >> $reads +samtools faidx bamfilter_test_run.ref.fa contig4:251-350 >> $reads +samtools faidx bamfilter_test_run.ref.fa contig4:201-350 >> $reads +samtools faidx bamfilter_test_run.ref.fa contig4:401-500 >> $reads +samtools faidx bamfilter_test_run.ref.fa contig4:651-750 >> $reads +samtools faidx bamfilter_test_run.ref.fa contig4:651-800 >> $reads +samtools faidx bamfilter_test_run.ref.fa contig4:651-801 >> $reads +samtools faidx bamfilter_test_run.ref.fa contig4:851-950 >> $reads +samtools faidx bamfilter_test_run.ref.fa contig4:901-1000 >> $reads +fastaq make_random_contigs 1 100 - | awk 'BEGIN{getline; print ">unmapped_read"} 1' >> $reads + +smalt index -k 9 -s 1 $ref $ref +smalt map $ref $reads | samtools view -bS - > tmp.$$.bam +samtools sort tmp.$$.bam bamfilter_test_run +samtools index bamfilter_test_run.bam +rm tmp.$$.bam $ref.sm{a,i} + + diff --git a/circlator/tests/data/bamfilter_test_run.reads.fa b/circlator/tests/data/bamfilter_test_run.reads.fa new file mode 100644 index 0000000..2d6ba6c --- /dev/null +++ b/circlator/tests/data/bamfilter_test_run.reads.fa @@ -0,0 +1,42 @@ +>contig1:1-100 +TTTAATTTGTCCGTAAATTGGGAGGTCTTCAACCGGGGGCGAATGTCGATCTCGTCGAGG +CGTTTGTAAAGTGGTAACAGGGGTCATTGATCACGGTGTA +>contig1:301-400 +CGTTGATGATACGAATTACGTAGGGCTCTGGGAGATGCTCGGAACCCCACAGCGTCTATT +TTAGTTGCGACATTACGCGGTATGCGCTTCTGCAAGATGG +>contig2:1-100 +GGAAGCCACCGATGCTGAAAAAGCTCGAACAAGCCTCCACAGTAATACTTCATGCGCCAG +CATTTGCAAACTCGGACCCATTCCCTGCTTATACGTGAGT +>contig4:1-100 +TCGAAAGTACACTTTGAACTCTAAAAGCGGTTACGACCTCTTCCGTTCGATCGATGCGTG +AGTACGTACTCTGGATCCAGCCGTGGCAAACCGGGTAACA +>contig4:251-350 +CGTGTGTTGGATAGACATTTGTGACCGTATAGCGGGATGACGTTTCTCTGTTTGGACTTA +TCAACGGCTCGAGTCTTACGAATTAGGATCCGACCTAGAT +>contig4:201-350 +CCTCCGCCTGCCTTTGACACACCGGACCTCGGGGGTGTCTAAAAGCCGTCCGTGTGTTGG +ATAGACATTTGTGACCGTATAGCGGGATGACGTTTCTCTGTTTGGACTTATCAACGGCTC +GAGTCTTACGAATTAGGATCCGACCTAGAT +>contig4:401-500 +AGCGGTAGTTCGACTACATAGTGTTCGCCTAGAGTCGCCCGTGTCATGCTCTTAGGGCTG +GGTCTCCCTCAGAGTATCTGCGGTTTGAATAGACCAGGCG +>contig4:651-750 +CCTAAACATAACTTCCTTTTATAAATTCGCGCCACTGCTCTCATCACATAGTGAAGGAGG +GGGAGTCGTGCCCGTATCTGGGCCCAGTATATACATTGGG +>contig4:651-800 +CCTAAACATAACTTCCTTTTATAAATTCGCGCCACTGCTCTCATCACATAGTGAAGGAGG +GGGAGTCGTGCCCGTATCTGGGCCCAGTATATACATTGGGCAGGAGGGTTTGTCAAGAAT +TCTATCCTTACTAGTCTATTTTCGATACGC +>contig4:651-801 +CCTAAACATAACTTCCTTTTATAAATTCGCGCCACTGCTCTCATCACATAGTGAAGGAGG +GGGAGTCGTGCCCGTATCTGGGCCCAGTATATACATTGGGCAGGAGGGTTTGTCAAGAAT +TCTATCCTTACTAGTCTATTTTCGATACGCG +>contig4:851-950 +GAGTACGAGGGACAGATGTCTACACTTGAGCGTACACAAGAATGTGGTACCAAAGGTATC +CTCATCGCAACTGGCATTCAAGCCGCTGTTCGACAGTGGG +>contig4:901-1000 +CAAAGGTATCCTCATCGCAACTGGCATTCAAGCCGCTGTTCGACAGTGGGTCTGTTGTAC +CCCTCTGCCCAACTGCTGAGTAGTTGGGTAAGGACCGAGT +>unmapped_read +TTATGGTACTTCGTTGCTCCCAAGGCTGAACTGATACATAGAGTGGGCTTTGTGATAGAA +CCAAACGACAACGAAGCGAATTTCGTCACCATCTCCATAA diff --git a/circlator/tests/data/bamfilter_test_run.ref.fa b/circlator/tests/data/bamfilter_test_run.ref.fa new file mode 100644 index 0000000..fdd4706 --- /dev/null +++ b/circlator/tests/data/bamfilter_test_run.ref.fa @@ -0,0 +1,48 @@ +>contig1 +TTTAATTTGTCCGTAAATTGGGAGGTCTTCAACCGGGGGCGAATGTCGATCTCGTCGAGG +CGTTTGTAAAGTGGTAACAGGGGTCATTGATCACGGTGTAGTGAGCCAAGCACTCACGTC +AGTAAGGTTGGGGAGTCAAGCTCTTCACGTGCAACTGTTTGTCCAATGCGCAGGGTGTTC +AAGGGTAGCGCCCGAAAACCGAATCTCGGATTGTATGTTCAGGGAAGTAGTACCAGCCTT +GCACGGTGTCCGGACCAGGACGCGAAAACCGTAAAGTTGGTTTCCACGCGACTTAACACT +CGTTGATGATACGAATTACGTAGGGCTCTGGGAGATGCTCGGAACCCCACAGCGTCTATT +TTAGTTGCGACATTACGCGGTATGCGCTTCTGCAAGATGGGGACTAAAGTAAGCATAGTA +CCGCATTGGCTTGCACATCTGCGACCTGTCGGATAGGCTTGGTTGGCCGTCCTTGTGCGC +AGATAGAAAAGAGCCTCGAT +>contig2 +GGAAGCCACCGATGCTGAAAAAGCTCGAACAAGCCTCCACAGTAATACTTCATGCGCCAG +CATTTGCAAACTCGGACCCATTCCCTGCTTATACGTGAGTGCATTAGACAGGTACCCCCT +TAAATATGCCAGTATCGGCCTCCTTTGTATAGATATGTTGTGCAGTTCCATATAAGATCG +TATCTAACCCGATGTGTCAGTAAGTATACCTTGCGGGAACACGATGCTCGCGCTCAGAGT +TATCTTGTCTTTCTGTGCTCATTGCGTTATGGGTGGGCGCTTACAAGCAGGTTCCGAATA +CTGCGATGTGCTCAGAATTGATATTTTTGATCCCGAATGCGGTCTTCTAGTGGTGGGCTC +CTTATCTTAGAGACCTTGATCCTATGGTCTACGATCTGATCACGAAAGCAGGTGGCTACT +GTTCGCCGAGCGCGATTATGCACTAGGGCCACTTTGGCCGGTAGAAGGGAGCGTGTACCC +CCCCTGTTCACCGATCCAGA +>contig3 +AATCAGGTTCTCGGTTGCCCGAGGGCTCGACTTGTGACATCTCCTCCGAAATACGACATA +GAGCTCTCTTCGGACACTCCTCATCTTTCGAACGATACGCGGACCAAATCACCATGTCCA +ATCTAATAGTAGATCTAGCAGTTCCTACAGTTGCCAGATCACAGTCCAGTCTGCCGACTA +CTAGTGCGACTTGGCCCGTAAAAGGCCTCCATCCGTAGACAAGAGAACACCGCTTCGGTT +TACCTTGACTCGCGTCCAAAAGTGTCTGCTCGCGTGAATCTTTGGAGAAGAAACGCCACG +TCCGTATGTTGGATGGGCTAGGGATCTCACTAACCGCCGTGAAGGAGGCGGCAATGTACC +GTTAACGCCGTGTGCAGCACAGTGGATCACAGCCATCCTTCGGCCCTAATGGCTGCCGCC +GCGTCTGGTAGCAGTCGAATGGTTGTCTCTTCATGTGGGCAGCCCTCCTCGGAATCAGCC +GCGTCCTTTTTACTAGTTGT +>contig4 +TCGAAAGTACACTTTGAACTCTAAAAGCGGTTACGACCTCTTCCGTTCGATCGATGCGTG +AGTACGTACTCTGGATCCAGCCGTGGCAAACCGGGTAACAGTATGACATAGAACTACCTC +CCACCTTACGCCAATTGCACGAGCCGGCGTGTCCTGTAGGCTAGCTAAAATAAAGTTTCC +GTGTCGAGTCCCTACTGTATCCTCCGCCTGCCTTTGACACACCGGACCTCGGGGGTGTCT +AAAAGCCGTCCGTGTGTTGGATAGACATTTGTGACCGTATAGCGGGATGACGTTTCTCTG +TTTGGACTTATCAACGGCTCGAGTCTTACGAATTAGGATCCGACCTAGATGAAAATACCC +TGTGCACTGATCGAATTACACGTAAGATCTGTGGTTGCGTAGCGGTAGTTCGACTACATA +GTGTTCGCCTAGAGTCGCCCGTGTCATGCTCTTAGGGCTGGGTCTCCCTCAGAGTATCTG +CGGTTTGAATAGACCAGGCGACTGGGAGGCCATAGATAAGTGGCTCTTGACTGTTAACTT +GTGATTTGCCCACTTGAAACGGCAATAGCACCGGGCCTGGTTTCAATATTATTCGATAGA +AGGGACACAACTGGTACTTTCACCTGTCCACCGGTCGAGAAAGAATCCCACCTAAACATA +ACTTCCTTTTATAAATTCGCGCCACTGCTCTCATCACATAGTGAAGGAGGGGGAGTCGTG +CCCGTATCTGGGCCCAGTATATACATTGGGCAGGAGGGTTTGTCAAGAATTCTATCCTTA +CTAGTCTATTTTCGATACGCGGAGGGATCAGACGTGCAAATGCCGTTATTAAAAGAGTGT +CCGGAGTGTTGAGTACGAGGGACAGATGTCTACACTTGAGCGTACACAAGAATGTGGTAC +CAAAGGTATCCTCATCGCAACTGGCATTCAAGCCGCTGTTCGACAGTGGGTCTGTTGTAC +CCCTCTGCCCAACTGCTGAGTAGTTGGGTAAGGACCGAGT diff --git a/circlator/tests/data/bamfilter_test_run.ref.fa.fai b/circlator/tests/data/bamfilter_test_run.ref.fa.fai new file mode 100644 index 0000000..918bc22 --- /dev/null +++ b/circlator/tests/data/bamfilter_test_run.ref.fa.fai @@ -0,0 +1,4 @@ +contig1 500 9 60 61 +contig2 500 527 60 61 +contig3 500 1045 60 61 +contig4 1000 1563 60 61 diff --git a/circlator/tests/data/bamfilter_test_run_discard_unmapped.out.reads.fa b/circlator/tests/data/bamfilter_test_run_discard_unmapped.out.reads.fa new file mode 100644 index 0000000..86365f8 --- /dev/null +++ b/circlator/tests/data/bamfilter_test_run_discard_unmapped.out.reads.fa @@ -0,0 +1,21 @@ +>contig1:1-100 +TTTAATTTGTCCGTAAATTGGGAGGTCTTCAACCGGGGGCGAATGTCGATCTCGTCGAGG +CGTTTGTAAAGTGGTAACAGGGGTCATTGATCACGGTGTA +>contig1:301-400 +CGTTGATGATACGAATTACGTAGGGCTCTGGGAGATGCTCGGAACCCCACAGCGTCTATT +TTAGTTGCGACATTACGCGGTATGCGCTTCTGCAAGATGG +>contig4:1-100 +TCGAAAGTACACTTTGAACTCTAAAAGCGGTTACGACCTCTTCCGTTCGATCGATGCGTG +AGTACGTACTCTGGATCCAGCCGTGGCAAACCGGGTAACA +>contig4:201-350 +CCTCCGCCTGCCTTTGACACACCGGACCTCGGGGGTGTCTAAAAGCCGTCCGTGTGTTGG +ATAGACATTTGTGACCGTATAGCGGGATGACGTTTCTCTG +>contig4:651-801 +TGAAGGAGGGGGAGTCGTGCCCGTATCTGGGCCCAGTATATACATTGGGCAGGAGGGTTT +GTCAAGAATTCTATCCTTACTAGTCTATTTTCGATACGCG +>contig4:851-950 +GAGTACGAGGGACAGATGTCTACACTTGAGCGTACACAAGAATGTGGTACCAAAGGTATC +CTCATCGCAACTGGCATTCAAGCCGCTGTTCGACAGTGGG +>contig4:901-1000 +CAAAGGTATCCTCATCGCAACTGGCATTCAAGCCGCTGTTCGACAGTGGGTCTGTTGTAC +CCCTCTGCCCAACTGCTGAGTAGTTGGGTAAGGACCGAGT diff --git a/circlator/tests/data/bamfilter_test_run_keep_unmapped.out.reads.fa b/circlator/tests/data/bamfilter_test_run_keep_unmapped.out.reads.fa new file mode 100644 index 0000000..fbd8eee --- /dev/null +++ b/circlator/tests/data/bamfilter_test_run_keep_unmapped.out.reads.fa @@ -0,0 +1,24 @@ +>contig1:1-100 +TTTAATTTGTCCGTAAATTGGGAGGTCTTCAACCGGGGGCGAATGTCGATCTCGTCGAGG +CGTTTGTAAAGTGGTAACAGGGGTCATTGATCACGGTGTA +>contig1:301-400 +CGTTGATGATACGAATTACGTAGGGCTCTGGGAGATGCTCGGAACCCCACAGCGTCTATT +TTAGTTGCGACATTACGCGGTATGCGCTTCTGCAAGATGG +>contig4:1-100 +TCGAAAGTACACTTTGAACTCTAAAAGCGGTTACGACCTCTTCCGTTCGATCGATGCGTG +AGTACGTACTCTGGATCCAGCCGTGGCAAACCGGGTAACA +>contig4:201-350 +CCTCCGCCTGCCTTTGACACACCGGACCTCGGGGGTGTCTAAAAGCCGTCCGTGTGTTGG +ATAGACATTTGTGACCGTATAGCGGGATGACGTTTCTCTG +>contig4:651-801 +TGAAGGAGGGGGAGTCGTGCCCGTATCTGGGCCCAGTATATACATTGGGCAGGAGGGTTT +GTCAAGAATTCTATCCTTACTAGTCTATTTTCGATACGCG +>contig4:851-950 +GAGTACGAGGGACAGATGTCTACACTTGAGCGTACACAAGAATGTGGTACCAAAGGTATC +CTCATCGCAACTGGCATTCAAGCCGCTGTTCGACAGTGGG +>contig4:901-1000 +CAAAGGTATCCTCATCGCAACTGGCATTCAAGCCGCTGTTCGACAGTGGGTCTGTTGTAC +CCCTCTGCCCAACTGCTGAGTAGTTGGGTAAGGACCGAGT +>unmapped_read +TTATGGTACTTCGTTGCTCCCAAGGCTGAACTGATACATAGAGTGGGCTTTGTGATAGAA +CCAAACGACAACGAAGCGAATTTCGTCACCATCTCCATAA diff --git a/circlator/tests/data/common_test_file_exists b/circlator/tests/data/common_test_file_exists new file mode 100644 index 0000000..e69de29 diff --git a/setup.py b/setup.py index 62b4bdc..1b8df3b 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='circlator', - version='0.14.2', + version='0.15.0', description='circlator: a tool to circularise genome assemblies', packages = find_packages(), package_data={'circlator': ['data/*']}, @@ -19,7 +19,7 @@ tests_require=['nose >= 1.3'], install_requires=[ 'openpyxl', - 'pyfastaq >= 3.5.0', + 'pyfastaq >= 3.6.0', 'pysam >= 0.8.1', 'pymummer>=0.4.0', 'bio_assembly_refinement>=0.3.2',