From d56d5db7fe3a7c75683749c2ad8eeccebd3bbd2d Mon Sep 17 00:00:00 2001 From: Jessica Mizzi Date: Tue, 4 Nov 2014 16:40:21 -0500 Subject: [PATCH 1/8] initial work for issue 399 --- scripts/do-partition.py | 5 ++++- scripts/interleave-reads.py | 5 ++++- scripts/load-graph.py | 3 +++ scripts/load-into-counting.py | 3 +++ scripts/normalize-by-median.py | 3 ++- scripts/sample-reads-randomly.py | 5 ++++- 6 files changed, 20 insertions(+), 4 deletions(-) diff --git a/scripts/do-partition.py b/scripts/do-partition.py index 0445a94c0a..ac040e5b73 100755 --- a/scripts/do-partition.py +++ b/scripts/do-partition.py @@ -97,6 +97,8 @@ def get_parser(): parser.add_argument('graphbase', help="base name for output files") parser.add_argument('input_filenames', metavar='input_sequence_filename', nargs='+', help='input FAST[AQ] sequence filenames') + parser.add_argument('-f','-force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -137,7 +139,8 @@ def main(): # pylint: disable=too-many-locals,too-many-statements " this data set. Increase k-mer presence table " "size/num of tables.") print >> sys.stderr, "**" - sys.exit(1) + if not args.force: + sys.exit(1) # partition-graph diff --git a/scripts/interleave-reads.py b/scripts/interleave-reads.py index e73d4fb731..233fc44e88 100755 --- a/scripts/interleave-reads.py +++ b/scripts/interleave-reads.py @@ -63,6 +63,8 @@ def get_parser(): default=sys.stdout) parser.add_argument('--version', action='version', version='%(prog)s ' + khmer.__version__) + parser.add_argument('-f','-force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -93,7 +95,8 @@ def main(): fail = True if fail: - sys.exit(1) + if not args.force: + sys.exit(1) print >> sys.stderr, "Interleaving:\n\t%s\n\t%s" % (s1_file, s2_file) diff --git a/scripts/load-graph.py b/scripts/load-graph.py index 8334335193..f51b95bb05 100755 --- a/scripts/load-graph.py +++ b/scripts/load-graph.py @@ -40,6 +40,8 @@ def get_parser(): help="Prints the total number of k-mers to stderr") parser.add_argument('--write-fp-rate', '-w', action='store_true', help="Write false positive rate into .info file") + parser.add_argument('-f','-force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -111,6 +113,7 @@ def main(): print >> sys.stderr, ("** ERROR: the graph structure is too small for " "this data set. Increase table size/# tables.") print >> sys.stderr, "**" + #if not args.force: sys.exit(1) print >> sys.stderr, 'wrote to', base + '.info and', base + '.pt' diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py index f06283f74c..59b9012bb8 100755 --- a/scripts/load-into-counting.py +++ b/scripts/load-into-counting.py @@ -62,6 +62,8 @@ def get_parser(): "summary be in? (json or tsv, disabled by default)") parser.add_argument('--report-total-kmers', '-t', action='store_true', help="Prints the total number of k-mers to stderr") + parser.add_argument('-f','-force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -162,6 +164,7 @@ def main(): print >> sys.stderr, "** ERROR: the k-mer counting table is too small", print >> sys.stderr, "for this data set. Increase tablesize/# tables." print >> sys.stderr, "**" + #if not args.force: sys.exit(1) print >>sys.stderr, 'DONE.' diff --git a/scripts/normalize-by-median.py b/scripts/normalize-by-median.py index 7e3a838d6c..9b1ffca6be 100755 --- a/scripts/normalize-by-median.py +++ b/scripts/normalize-by-median.py @@ -313,7 +313,8 @@ def main(): # pylint: disable=too-many-branches,too-many-statements "tables.") print >> sys.stderr, "**" print >> sys.stderr, "** Do not use these results!!" - sys.exit(1) + if not args.force: + sys.exit(1) if __name__ == '__main__': main() diff --git a/scripts/sample-reads-randomly.py b/scripts/sample-reads-randomly.py index 9e6d7517d9..f61feb2319 100755 --- a/scripts/sample-reads-randomly.py +++ b/scripts/sample-reads-randomly.py @@ -67,6 +67,8 @@ def get_parser(): type=argparse.FileType('w'), default=None) parser.add_argument('--version', action='version', version='%(prog)s ' + khmer.__version__) + parser.add_argument('-f','-force', default=False, action='store_true', + help='Overwrite output file if it exits') return parser @@ -102,7 +104,8 @@ def main(): if num_samples > 1: sys.stderr.write( "Error: cannot specify -o with more than one sample.") - sys.exit(-1) + if not args.force: + sys.exit(-1) output_filename = output_file.name else: filename = args.filenames[0] From 20ee2705c4d60133226e89d43676e6fa659ed05c Mon Sep 17 00:00:00 2001 From: Jessica Mizzi Date: Tue, 4 Nov 2014 17:00:11 -0500 Subject: [PATCH 2/8] autopep8 --- khmer/load_pe.py | 2 +- scripts/do-partition.py | 8 +-- scripts/interleave-reads.py | 6 +-- scripts/load-graph.py | 6 +-- scripts/load-into-counting.py | 6 +-- scripts/normalize-by-median.py | 2 +- scripts/sample-reads-randomly.py | 6 +-- setup.py | 1 + tests/khmer_tst_utils.py | 6 +-- tests/test_counting_hash.py | 90 ++++++++++++++++---------------- tests/test_counting_single.py | 10 ++-- tests/test_filter.py | 6 +-- tests/test_hashbits.py | 30 +++++------ tests/test_hashbits_obj.py | 6 +-- tests/test_labelhash.py | 32 ++++++------ tests/test_read_parsers.py | 9 ++-- tests/test_script_arguments.py | 4 +- tests/test_subset_graph.py | 18 +++---- 18 files changed, 125 insertions(+), 123 deletions(-) diff --git a/khmer/load_pe.py b/khmer/load_pe.py index 169188b71a..faabd1a314 100644 --- a/khmer/load_pe.py +++ b/khmer/load_pe.py @@ -23,7 +23,7 @@ def load_pe(screed_handle): screed_iter = iter(screed_handle) - while 1: + while True: try: this_record = screed_iter.next() except StopIteration: diff --git a/scripts/do-partition.py b/scripts/do-partition.py index ac040e5b73..87cda2f9fd 100755 --- a/scripts/do-partition.py +++ b/scripts/do-partition.py @@ -45,7 +45,7 @@ def __debug_vm_usage(msg): # pylint: disable=unused-argument def worker(queue, basename, stop_big_traversals): - while 1: + while True: try: (htable, index, start, stop) = queue.get(False) except Queue.Empty: @@ -97,8 +97,8 @@ def get_parser(): parser.add_argument('graphbase', help="base name for output files") parser.add_argument('input_filenames', metavar='input_sequence_filename', nargs='+', help='input FAST[AQ] sequence filenames') - parser.add_argument('-f','-force', default=False, action='store_true', - help='Overwrite output file if it exists') + parser.add_argument('-f', '-force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -139,7 +139,7 @@ def main(): # pylint: disable=too-many-locals,too-many-statements " this data set. Increase k-mer presence table " "size/num of tables.") print >> sys.stderr, "**" - if not args.force: + if not args.force: sys.exit(1) # partition-graph diff --git a/scripts/interleave-reads.py b/scripts/interleave-reads.py index 233fc44e88..000b418cf8 100755 --- a/scripts/interleave-reads.py +++ b/scripts/interleave-reads.py @@ -63,8 +63,8 @@ def get_parser(): default=sys.stdout) parser.add_argument('--version', action='version', version='%(prog)s ' + khmer.__version__) - parser.add_argument('-f','-force', default=False, action='store_true', - help='Overwrite output file if it exists') + parser.add_argument('-f', '-force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -95,7 +95,7 @@ def main(): fail = True if fail: - if not args.force: + if not args.force: sys.exit(1) print >> sys.stderr, "Interleaving:\n\t%s\n\t%s" % (s1_file, s2_file) diff --git a/scripts/load-graph.py b/scripts/load-graph.py index f51b95bb05..82bca465cb 100755 --- a/scripts/load-graph.py +++ b/scripts/load-graph.py @@ -40,8 +40,8 @@ def get_parser(): help="Prints the total number of k-mers to stderr") parser.add_argument('--write-fp-rate', '-w', action='store_true', help="Write false positive rate into .info file") - parser.add_argument('-f','-force', default=False, action='store_true', - help='Overwrite output file if it exists') + parser.add_argument('-f', '-force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -113,7 +113,7 @@ def main(): print >> sys.stderr, ("** ERROR: the graph structure is too small for " "this data set. Increase table size/# tables.") print >> sys.stderr, "**" - #if not args.force: + # if not args.force: sys.exit(1) print >> sys.stderr, 'wrote to', base + '.info and', base + '.pt' diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py index 59b9012bb8..906e7fe518 100755 --- a/scripts/load-into-counting.py +++ b/scripts/load-into-counting.py @@ -62,8 +62,8 @@ def get_parser(): "summary be in? (json or tsv, disabled by default)") parser.add_argument('--report-total-kmers', '-t', action='store_true', help="Prints the total number of k-mers to stderr") - parser.add_argument('-f','-force', default=False, action='store_true', - help='Overwrite output file if it exists') + parser.add_argument('-f', '-force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -164,7 +164,7 @@ def main(): print >> sys.stderr, "** ERROR: the k-mer counting table is too small", print >> sys.stderr, "for this data set. Increase tablesize/# tables." print >> sys.stderr, "**" - #if not args.force: + # if not args.force: sys.exit(1) print >>sys.stderr, 'DONE.' diff --git a/scripts/normalize-by-median.py b/scripts/normalize-by-median.py index 9b1ffca6be..4a199530b9 100755 --- a/scripts/normalize-by-median.py +++ b/scripts/normalize-by-median.py @@ -313,7 +313,7 @@ def main(): # pylint: disable=too-many-branches,too-many-statements "tables.") print >> sys.stderr, "**" print >> sys.stderr, "** Do not use these results!!" - if not args.force: + if not args.force: sys.exit(1) if __name__ == '__main__': diff --git a/scripts/sample-reads-randomly.py b/scripts/sample-reads-randomly.py index f61feb2319..bb6d6a402b 100755 --- a/scripts/sample-reads-randomly.py +++ b/scripts/sample-reads-randomly.py @@ -67,8 +67,8 @@ def get_parser(): type=argparse.FileType('w'), default=None) parser.add_argument('--version', action='version', version='%(prog)s ' + khmer.__version__) - parser.add_argument('-f','-force', default=False, action='store_true', - help='Overwrite output file if it exits') + parser.add_argument('-f', '-force', default=False, action='store_true', + help='Overwrite output file if it exits') return parser @@ -104,7 +104,7 @@ def main(): if num_samples > 1: sys.stderr.write( "Error: cannot specify -o with more than one sample.") - if not args.force: + if not args.force: sys.exit(-1) output_filename = output_file.name else: diff --git a/setup.py b/setup.py index cc8e08af04..8c37e5cdc3 100755 --- a/setup.py +++ b/setup.py @@ -126,6 +126,7 @@ class KhmerBuildExt(_build_ext): # pylint: disable=R0904 + """Specialized Python extension builder for khmer project. Only run the library setup when needed, not on every invocation. diff --git a/tests/khmer_tst_utils.py b/tests/khmer_tst_utils.py index f4e8ca75e8..beb071b449 100644 --- a/tests/khmer_tst_utils.py +++ b/tests/khmer_tst_utils.py @@ -56,7 +56,7 @@ def _runscript(scriptname, sandbox=False): pkg_resources.get_distribution("khmer").run_script( scriptname, ns) return 0 - except pkg_resources.ResolutionError, err: + except pkg_resources.ResolutionError as err: if sandbox: paths = [os.path.join(os.path.dirname(__file__), "../sandbox")] else: @@ -66,7 +66,7 @@ def _runscript(scriptname, sandbox=False): for path in paths: scriptfile = os.path.join(path, scriptname) if os.path.isfile(scriptfile): - execfile(scriptfile, ns) + exec(compile(open(scriptfile).read(), scriptfile, 'exec'), ns) return 0 if sandbox: raise nose.SkipTest("sandbox tests are only run in a repository.") @@ -103,7 +103,7 @@ def runscript(scriptname, args, in_directory=None, status = _runscript(scriptname, sandbox=sandbox) except nose.SkipTest: raise - except SystemExit, e: + except SystemExit as e: status = e.code except: traceback.print_exc(file=sys.stderr) diff --git a/tests/test_counting_hash.py b/tests/test_counting_hash.py index 7e2a1aff03..c7ab475c3a 100644 --- a/tests/test_counting_hash.py +++ b/tests/test_counting_hash.py @@ -44,10 +44,10 @@ def test_collision_1(self): assert khmer.forward_hash(GG, 12) == 11184810 collision_1 = 'AAACGTATGACT' - assert khmer.forward_hash(collision_1, 12) == 184777L + assert khmer.forward_hash(collision_1, 12) == 184777 collision_2 = 'AAATACCGAGCG' - assert khmer.forward_hash(collision_2, 12) == 76603L + assert khmer.forward_hash(collision_2, 12) == 76603 # note, hash(GG) % 1000003 == hash(collision_1) # note, hash(GG) % 1009837 == hash(collision_2) @@ -64,10 +64,10 @@ def test_collision_2(self): assert khmer.forward_hash(GG, 12) == 11184810 collision_1 = 'AAACGTATGACT' - assert khmer.forward_hash(collision_1, 12) == 184777L + assert khmer.forward_hash(collision_1, 12) == 184777 collision_2 = 'AAATACCGAGCG' - assert khmer.forward_hash(collision_2, 12) == 76603L + assert khmer.forward_hash(collision_2, 12) == 76603 # hash(GG) % 1000003 == hash(collision_1) # hash(GG) % 1009837 == hash(collision_2) @@ -84,10 +84,10 @@ def test_collision_3(self): assert khmer.forward_hash(GG, 12) == 11184810 collision_1 = 'AAACGTATGACT' - assert khmer.forward_hash(collision_1, 12) == 184777L + assert khmer.forward_hash(collision_1, 12) == 184777 collision_2 = 'AAATACCGAGCG' - assert khmer.forward_hash(collision_2, 12) == 76603L + assert khmer.forward_hash(collision_2, 12) == 76603 # hash(GG) % 1000003 == hash(collision_1) # hash(GG) % 1009837 == hash(collision_2) @@ -110,13 +110,13 @@ def test_3_tables(): assert khmer.forward_hash(GG, 12) == 11184810 collision_1 = 'AAACGTATGACT' - assert khmer.forward_hash(collision_1, 12) == 184777L + assert khmer.forward_hash(collision_1, 12) == 184777 collision_2 = 'AAATACCGAGCG' - assert khmer.forward_hash(collision_2, 12) == 76603L + assert khmer.forward_hash(collision_2, 12) == 76603 collision_3 = 'AAACGTATCGAG' - assert khmer.forward_hash(collision_3, 12) == 184755L + assert khmer.forward_hash(collision_3, 12) == 184755 # hash(GG) % 1000003 == hash(collision_1) # hash(GG) % 1009837 == hash(collision_2) @@ -622,7 +622,7 @@ def test_get_hashsizes(): # hb = kh.collect_high_abundance_kmers(seqpath, 2, 4) -#### +# def test_load_notexist_should_fail(): @@ -632,7 +632,7 @@ def test_load_notexist_should_fail(): try: hi.load(savepath) assert 0, "load should fail" - except IOError, e: + except IOError as e: print str(e) @@ -656,7 +656,7 @@ def test_load_truncated_should_fail(): try: hi.load(savepath) assert 0, "load should fail" - except IOError, e: + except IOError as e: print str(e) @@ -667,7 +667,7 @@ def test_load_gz_notexist_should_fail(): try: hi.load(savepath) assert 0, "load should fail" - except IOError, e: + except IOError as e: print str(e) @@ -691,7 +691,7 @@ def test_load_gz_truncated_should_fail(): try: hi.load(savepath) assert 0, "load should fail" - except IOError, e: + except IOError as e: print str(e) @@ -703,7 +703,7 @@ def test_counting_file_version_check(): try: ht.load(inpath) assert 0, "this should fail" - except IOError, e: + except IOError as e: print str(e) @@ -715,7 +715,7 @@ def test_counting_gz_file_version_check(): try: ht.load(inpath) assert 0, "this should fail" - except IOError, e: + except IOError as e: print str(e) @@ -727,7 +727,7 @@ def test_counting_file_type_check(): try: kh.load(inpath) assert 0, "this should fail" - except IOError, e: + except IOError as e: print str(e) @@ -741,7 +741,7 @@ def test_counting_gz_file_type_check(): try: kh.load(inpath) assert 0, "this should fail" - except IOError, e: + except IOError as e: print str(e) @@ -749,7 +749,7 @@ def test_counting_bad_primes_list(): try: ht = khmer._new_counting_hash(12, ["a", "b", "c"], 1) assert 0, "bad list of primes should fail" - except TypeError, e: + except TypeError as e: print str(e) @@ -760,7 +760,7 @@ def test_bad_use_bigcount(): try: countingtable.get_use_bigcount(True) assert 0, "this should fail" - except TypeError, err: + except TypeError as err: print str(err) @@ -769,7 +769,7 @@ def test_consume_absentfasta(): try: countingtable.consume_fasta("absent_file.fa") assert 0, "This should fail" - except IOError, err: + except IOError as err: print str(err) @@ -778,13 +778,13 @@ def test_consume_absentfasta_with_reads_parser(): try: countingtable.consume_fasta_with_reads_parser() assert 0, "this should fail" - except TypeError, err: + except TypeError as err: print str(err) try: readparser = ReadParser(utils.get_test_data('empty-file')) countingtable.consume_fasta_with_reads_parser(readparser) assert 0, "this should fail" - except IOError, err: + except IOError as err: print str(err) except ValueError, err: print str(err) @@ -795,12 +795,12 @@ def test_badconsume(): try: countingtable.consume() assert 0, "this should fail" - except TypeError, err: + except TypeError as err: print str(err) try: countingtable.consume("AAA") assert 0, "this should fail" - except ValueError, err: + except ValueError as err: print str(err) @@ -809,12 +809,12 @@ def test_get_badmin_count(): try: countingtable.get_min_count() assert 0, "this should fail" - except TypeError, err: + except TypeError as err: print str(err) try: countingtable.get_min_count("AAA") assert 0, "this should fail" - except ValueError, err: + except ValueError as err: print str(err) @@ -823,12 +823,12 @@ def test_get_badmax_count(): try: countingtable.get_max_count() assert 0, "this should fail" - except TypeError, err: + except TypeError as err: print str(err) try: countingtable.get_max_count("AAA") assert 0, "this should fail" - except ValueError, err: + except ValueError as err: print str(err) @@ -837,12 +837,12 @@ def test_get_badmedian_count(): try: countingtable.get_median_count() assert 0, "this should fail" - except TypeError, err: + except TypeError as err: print str(err) try: countingtable.get_median_count("AAA") assert 0, "this should fail" - except ValueError, err: + except ValueError as err: print str(err) @@ -851,12 +851,12 @@ def test_get_badkadian_count(): try: countingtable.get_kadian_count() assert 0, "this should fail" - except TypeError, err: + except TypeError as err: print str(err) try: countingtable.get_kadian_count("AAA") assert 0, "this should fail" - except ValueError, err: + except ValueError as err: print str(err) @@ -865,7 +865,7 @@ def test_badget(): try: countingtable.get() assert 0, "this should fail" - except TypeError, err: + except TypeError as err: print str(err) @@ -881,7 +881,7 @@ def test_badget_2(): try: countingtable.get("AGCTT") assert 0, "this should fail" - except ValueError, err: + except ValueError as err: print str(err) @@ -892,7 +892,7 @@ def test_badtrim(): try: countingtable.trim_on_abundance() assert 0, "this should fail" - except TypeError, err: + except TypeError as err: print str(err) countingtable.trim_on_abundance("AAAAAA", 1) @@ -901,19 +901,19 @@ def test_badfasta_count_kmers_by_position(): countingtable = khmer.new_counting_hash(4, 4 ** 4, 4) try: countingtable.fasta_count_kmers_by_position() - except TypeError, err: + except TypeError as err: print str(err) filename = utils.get_test_data("test-short.fa") try: countingtable.fasta_count_kmers_by_position(filename, -1, 0) assert 0, "this should fail" - except ValueError, err: + except ValueError as err: print str(err) try: countingtable.fasta_count_kmers_by_position(filename, 0, -1) assert 0, "this should fail" - except ValueError, err: + except ValueError as err: print str(err) @@ -922,7 +922,7 @@ def test_badload(): try: countingtable.load() assert 0, "this should fail" - except TypeError, err: + except TypeError as err: print str(err) @@ -931,7 +931,7 @@ def test_badsave(): try: countingtable.save() assert 0, "this should fail" - except TypeError, err: + except TypeError as err: print str(err) @@ -940,7 +940,7 @@ def test_badksize(): try: countingtable.ksize(True) assert 0, "this should fail" - except TypeError, err: + except TypeError as err: print str(err) @@ -949,7 +949,7 @@ def test_badhashsizes(): try: countingtable.hashsizes(True) assert 0, "this should fail" - except TypeError, err: + except TypeError as err: print str(err) @@ -958,7 +958,7 @@ def test_badconsume_and_tag(): try: countingtable.consume_and_tag() assert 0, "this should fail" - except TypeError, err: + except TypeError as err: print str(err) @@ -967,6 +967,6 @@ def test_consume_fasta_and_tag(): try: countingtable.consume_fasta_and_tag() assert 0, "this should fail" - except TypeError, err: + except TypeError as err: print str(err) countingtable.consume_fasta_and_tag(utils.get_test_data("test-graph2.fa")) diff --git a/tests/test_counting_single.py b/tests/test_counting_single.py index 1be6b25b66..d295cd6f5c 100644 --- a/tests/test_counting_single.py +++ b/tests/test_counting_single.py @@ -39,12 +39,12 @@ def test_badcount(): try: countingtable.count() assert 0, "count should require one argument" - except TypeError, err: + except TypeError as err: print str(err) try: countingtable.count('ABCDE') assert 0, "count should require k-mer size to be equal" - except ValueError, err: + except ValueError as err: print str(err) @@ -53,7 +53,7 @@ def test_hashtable_n_entries(): try: countingtable.n_entries("nope") assert 0, "n_entries should accept no arguments" - except TypeError, err: + except TypeError as err: print str(err) @@ -268,7 +268,7 @@ def test_badget(): try: kh.get("AGCTT") assert 0, "this should fail" - except ValueError, err: + except ValueError as err: print str(err) @@ -320,7 +320,7 @@ def test_n_occupied(self): try: self.kh.n_occupied("MU", 1, 3) assert 0, "n_occupied shouldn't accept three arguments" - except TypeError, err: + except TypeError as err: print str(err) @attr('highmem') diff --git a/tests/test_filter.py b/tests/test_filter.py index dbebf6ae47..a416be139b 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -35,18 +35,18 @@ def test_abund(self): try: ht.consume_fasta() assert 0, "should fail" - except TypeError, err: + except TypeError as err: print str(err) try: ht.consume_fasta("nonexistent") assert 0, "should fail" - except IOError, err: + except IOError as err: print str(err) ht.output_fasta_kmer_pos_freq(filename, outname) try: ht.output_fasta_kmer_pos_freq() assert 0, "should fail" - except TypeError, err: + except TypeError as err: print str(err) fd = open(outname, "r") diff --git a/tests/test_hashbits.py b/tests/test_hashbits.py index c343bd4123..d5908f2be1 100644 --- a/tests/test_hashbits.py +++ b/tests/test_hashbits.py @@ -535,11 +535,11 @@ def test_badget(): try: hbts.get("AGCTT") assert 0, "this should fail" - except ValueError, err: + except ValueError as err: print str(err) -#### +# def test_load_notexist_should_fail(): @@ -573,7 +573,7 @@ def test_load_truncated_should_fail(): try: hi.load(savepath) assert 0, "load should fail" - except IOError, e: + except IOError as e: print str(e) @@ -584,7 +584,7 @@ def test_save_load_tagset_notexist(): try: ht.load_tagset(outfile) assert 0, "this test should fail" - except IOError, e: + except IOError as e: print str(e) @@ -666,7 +666,7 @@ def test_hashbits_file_version_check(): try: ht.load(inpath) assert 0, "this should fail" - except IOError, e: + except IOError as e: print str(e) @@ -680,7 +680,7 @@ def test_hashbits_file_type_check(): try: ht.load(savepath) assert 0, "this should fail" - except IOError, e: + except IOError as e: print str(e) @@ -692,7 +692,7 @@ def test_stoptags_file_version_check(): try: ht.load_stop_tags(inpath) assert 0, "this should fail" - except IOError, e: + except IOError as e: print str(e) @@ -703,7 +703,7 @@ def test_stoptags_ksize_check(): try: ht.load_stop_tags(inpath) assert 0, "this should fail" - except IOError, e: + except IOError as e: print str(e) @@ -714,7 +714,7 @@ def test_stop_tags_filetype_check(): try: ht.load_stop_tags(inpath) assert 0, "this should fail" - except IOError, e: + except IOError as e: print str(e) @@ -726,7 +726,7 @@ def test_tagset_file_version_check(): try: ht.load_tagset(inpath) assert 0, "this should fail" - except IOError, e: + except IOError as e: print str(e) @@ -737,7 +737,7 @@ def test_tagset_ksize_check(): try: ht.load_tagset(inpath) assert 0, "this should fail" - except IOError, e: + except IOError as e: print str(e) @@ -748,7 +748,7 @@ def test_tagset_filetype_check(): try: ht.load_tagset(inpath) assert 0, "this should fail" - except IOError, e: + except IOError as e: print str(e) @@ -756,7 +756,7 @@ def test_bad_primes_list(): try: coutingtable = khmer._new_hashbits(31, ["a", "b", "c"], 1) assert 0, "Bad primes list should fail" - except TypeError, e: + except TypeError as e: print str(e) @@ -765,13 +765,13 @@ def test_consume_absentfasta_with_reads_parser(): try: presencetable.consume_fasta_with_reads_parser() assert 0, "this should fail" - except TypeError, err: + except TypeError as err: print str(err) try: readparser = ReadParser(utils.get_test_data('empty-file')) presencetable.consume_fasta_with_reads_parser(readparser) assert 0, "this should fail" - except IOError, err: + except IOError as err: print str(err) except ValueError, err: print str(err) diff --git a/tests/test_hashbits_obj.py b/tests/test_hashbits_obj.py index 23732e75b1..b56a475266 100644 --- a/tests/test_hashbits_obj.py +++ b/tests/test_hashbits_obj.py @@ -541,7 +541,7 @@ def test_badget(): try: hbts.get("AGCTT") assert 0, "this should fail" - except ValueError, err: + except ValueError as err: print str(err) @@ -550,7 +550,7 @@ def test_bad_primes(): countingtable = khmer._Hashbits.__new__( khmer._Hashbits, 6, ["a", "b", "c"]) assert 0, "this should fail" - except TypeError, e: + except TypeError as e: print str(e) @@ -560,7 +560,7 @@ def test_consume_fasta_and_tag_with_badreads_parser(): readsparser = khmer.ReadParser(utils.get_test_data("test-empty.fa")) presencetable.consume_fasta_and_tag_with_reads_parser(readsparser) assert 0, "this should fail" - except IOError, e: + except IOError as e: print str(e) except ValueError, e: print str(e) diff --git a/tests/test_labelhash.py b/tests/test_labelhash.py index 7fc9ccbdf8..894dda7e4c 100644 --- a/tests/test_labelhash.py +++ b/tests/test_labelhash.py @@ -37,7 +37,7 @@ def test_get_label_dict(): lb.consume_fasta_and_tag_with_labels(filename) labels = lb.get_label_dict() - expected = [0L, 1L, 2L, 3L] + expected = [0, 1, 2, 3] for e_label in expected: assert e_label in labels for a_label in labels: @@ -48,11 +48,11 @@ def test_get_tag_labels(): lb = LabelHash(20, 1e7, 4) filename = utils.get_test_data('single-read.fq') lb.consume_fasta_and_tag_with_labels(filename) - tag = 173473779682L + tag = 173473779682 labels = lb.get_tag_labels(tag) assert len(labels) == 1 - assert labels.pop() == 0L + assert labels.pop() == 0 def test_consume_fasta_and_tag_with_labels(): @@ -93,13 +93,13 @@ def test_consume_partitioned_fasta_and_tag_with_labels(): # print lb.n_labels() # print labels assert len(labels) == 1 - assert labels.pop() == 2L + assert labels.pop() == 2 assert lb.n_labels() == 1 def test_consume_sequence_and_tag_with_labels(): lb = LabelHash(20, 1e6, 4) - label = 0L + label = 0 sequence = 'ATGCATCGATCGATCGATCGATCGATCGATCGATCGATCG' n_consumed = lb.consume_sequence_and_tag_with_labels(sequence, label) @@ -117,7 +117,7 @@ def test_sweep_tag_neighborhood(): tags = lb.sweep_tag_neighborhood('CAGGCGCCCACCACCGTGCCCTCCAACCTGATGGT') assert len(tags) == 1 - assert tags.pop() == 173473779682L + assert tags.pop() == 173473779682 def test_sweep_label_neighborhood(): @@ -127,7 +127,7 @@ def test_sweep_label_neighborhood(): labels = lb.sweep_label_neighborhood('CAGGCGCCCACCACCGTGCCCTCCAACCTGATGGT') assert len(labels) == 1 - assert labels.pop() == 0L + assert labels.pop() == 0 ''' * The test data set as four reads: A, B, C, and D @@ -153,8 +153,8 @@ def test_label_tag_correctness(): print labels print len('ATCGTGTAAGCTATCGTAATCGTAAGCTCTGCCTAGAGCTAGGCTAG') - 19 assert len(labels) == 2 - assert 0L in labels - assert 1L in labels + assert 0 in labels + assert 1 in labels # read B labels = lb.sweep_label_neighborhood( @@ -162,9 +162,9 @@ def test_label_tag_correctness(): 'ATAGATAGATGACCTAGAGCTAGGCTAGGTGTTGGGGATAGATAGATAGATGA') print labels assert len(labels) == 3 - assert 0L in labels - assert 1L in labels - assert 2L in labels + assert 0 in labels + assert 1 in labels + assert 2 in labels # read C labels = lb.sweep_label_neighborhood( @@ -173,15 +173,15 @@ def test_label_tag_correctness(): 'ACAACACATACA') print labels assert len(labels) == 2 - assert 1L in labels - assert 2L in labels + assert 1 in labels + assert 2 in labels # read D labels = lb.sweep_label_neighborhood( 'TATATATATAGCTAGCTAGCTAACTAGCTAGCATCGATCGATCGATC') print labels assert len(labels) == 1 - assert 3L in labels + assert 3 in labels # # Begin Hashbits tests @@ -694,5 +694,5 @@ def test_bad_primes(): try: hi = khmer._LabelHash.__new__(khmer.LabelHash, 6, ["a", "b", "c"]) assert 0, "Non number prime list should fail" - except TypeError, e: + except TypeError as e: print str(e) diff --git a/tests/test_read_parsers.py b/tests/test_read_parsers.py index 7759526ec0..a02d55fcb0 100644 --- a/tests/test_read_parsers.py +++ b/tests/test_read_parsers.py @@ -12,6 +12,7 @@ from khmer import ReadParser import khmer_tst_utils as utils from nose.plugins.attrib import attr +from functools import reduce @attr('highmem') @@ -118,7 +119,7 @@ def test_badbzip2(): for read in rparser: pass assert 0, "this should fail" - except IOError, err: + except IOError as err: print str(err) except ValueError, err: print str(err) @@ -306,12 +307,12 @@ def test_constructor(): rparser = ReadParser(utils.get_test_data("single-read.fq"), "a") assert 0, ("ReadParser's constructor shouldn't accept a character for " "the number of threads") - except TypeError, err: + except TypeError as err: print str(err) try: rparser = ReadParser("non-existent-file-name") assert 0, "ReadParser shouldn't accept a non-existant file name" - except ValueError, err: + except ValueError as err: print str(err) @@ -322,7 +323,7 @@ def test_iternext(): for read_1, read_2 in rparser.iter_read_pairs(): read_pairs.append(read_1, read_2) assert 0, "Shouldn't be able to iterate over non FASTA file" - except IOError, err: + except IOError as err: print str(err) except ValueError, err: print str(err) diff --git a/tests/test_script_arguments.py b/tests/test_script_arguments.py index 18778d936c..ba850f7e71 100644 --- a/tests/test_script_arguments.py +++ b/tests/test_script_arguments.py @@ -22,7 +22,7 @@ def test_check_space(): try: khmer.file.check_space([fakelump_fa], _testhook_free_space=0) assert 0, "this should fail" - except SystemExit, e: + except SystemExit as e: print str(e) finally: sys.stderr = save_stderr @@ -33,7 +33,7 @@ def test_check_tablespace(): try: khmer.file.check_space_for_hashtable(1e9, _testhook_free_space=0) assert 0, "this should fail" - except SystemExit, e: + except SystemExit as e: print str(e) finally: sys.stderr = save_stderr diff --git a/tests/test_subset_graph.py b/tests/test_subset_graph.py index 7f1a7c08c0..71d5567059 100644 --- a/tests/test_subset_graph.py +++ b/tests/test_subset_graph.py @@ -262,7 +262,7 @@ def test_save_load_merge_nexist(self): try: a = ht.load_subset_partitionmap('this does not exist') assert 0, "this should not succeed" - except IOError, e: + except IOError as e: print str(e) @attr('highmem') @@ -342,7 +342,7 @@ def test_save_merge_from_disk_file_not_exist(self): try: ht.merge_subset_from_disk(outfile1) assert 0, "this should fail" - except IOError, e: + except IOError as e: print str(e) @attr('highmem') @@ -353,7 +353,7 @@ def test_merge_from_disk_file_bad_type(self): try: ht.merge_subset_from_disk(infile) assert 0, "this should fail" - except IOError, e: + except IOError as e: print str(e) @attr('highmem') @@ -364,7 +364,7 @@ def test_merge_from_disk_file_version(self): try: ht.merge_subset_from_disk(infile) assert 0, "this should fail" - except IOError, e: + except IOError as e: print str(e) def test_save_merge_from_disk_ksize(self): @@ -387,7 +387,7 @@ def test_save_merge_from_disk_ksize(self): try: ht.merge_subset_from_disk(outfile1) assert 0, "this should fail" - except IOError, e: + except IOError as e: print str(e) @@ -579,13 +579,13 @@ def test_partition_overlap_2(): assert x == (2, 0, 6), x x = p1.partition_sizes() - assert x == ([(3L, 8L)], 0), x + assert x == ([(3, 8)], 0), x x = p2.partition_sizes() - assert x == ([(3L, 6L), (5L, 6L)], 2), x + assert x == ([(3, 6), (5, 6)], 2), x x = p1.partition_average_coverages(kh) - assert x == [(3L, 11L)] + assert x == [(3, 11)] x = p2.partition_average_coverages(kh) - assert x == [(3L, 5L), (5L, 10L)], x + assert x == [(3, 5), (5, 10)], x From 1655683a1ccc2813d364c376bff524df7b262331 Mon Sep 17 00:00:00 2001 From: Jessica Mizzi Date: Thu, 6 Nov 2014 10:32:38 -0500 Subject: [PATCH 3/8] fixed syntax errors in force parser and fixed 2 other errors with args.force --- scripts/interleave-reads.py | 2 +- scripts/load-graph.py | 6 +++--- scripts/load-into-counting.py | 6 +++--- scripts/sample-reads-randomly.py | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/interleave-reads.py b/scripts/interleave-reads.py index 000b418cf8..ff77755592 100755 --- a/scripts/interleave-reads.py +++ b/scripts/interleave-reads.py @@ -63,7 +63,7 @@ def get_parser(): default=sys.stdout) parser.add_argument('--version', action='version', version='%(prog)s ' + khmer.__version__) - parser.add_argument('-f', '-force', default=False, action='store_true', + parser.add_argument('-f', '--force', default=False, action='store_true', help='Overwrite output file if it exists') return parser diff --git a/scripts/load-graph.py b/scripts/load-graph.py index 82bca465cb..58a95d8b43 100755 --- a/scripts/load-graph.py +++ b/scripts/load-graph.py @@ -40,7 +40,7 @@ def get_parser(): help="Prints the total number of k-mers to stderr") parser.add_argument('--write-fp-rate', '-w', action='store_true', help="Write false positive rate into .info file") - parser.add_argument('-f', '-force', default=False, action='store_true', + parser.add_argument('-f', '--force', default=False, action='store_true', help='Overwrite output file if it exists') return parser @@ -113,8 +113,8 @@ def main(): print >> sys.stderr, ("** ERROR: the graph structure is too small for " "this data set. Increase table size/# tables.") print >> sys.stderr, "**" - # if not args.force: - sys.exit(1) + if not args.force: + sys.exit(1) print >> sys.stderr, 'wrote to', base + '.info and', base + '.pt' if not args.no_build_tagset: diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py index 906e7fe518..bc3a535fa5 100755 --- a/scripts/load-into-counting.py +++ b/scripts/load-into-counting.py @@ -62,7 +62,7 @@ def get_parser(): "summary be in? (json or tsv, disabled by default)") parser.add_argument('--report-total-kmers', '-t', action='store_true', help="Prints the total number of k-mers to stderr") - parser.add_argument('-f', '-force', default=False, action='store_true', + parser.add_argument('-f', '--force', default=False, action='store_true', help='Overwrite output file if it exists') return parser @@ -164,8 +164,8 @@ def main(): print >> sys.stderr, "** ERROR: the k-mer counting table is too small", print >> sys.stderr, "for this data set. Increase tablesize/# tables." print >> sys.stderr, "**" - # if not args.force: - sys.exit(1) + if not args.force: + sys.exit(1) print >>sys.stderr, 'DONE.' print >>sys.stderr, 'wrote to:', base + '.info' diff --git a/scripts/sample-reads-randomly.py b/scripts/sample-reads-randomly.py index bb6d6a402b..364df34705 100755 --- a/scripts/sample-reads-randomly.py +++ b/scripts/sample-reads-randomly.py @@ -67,7 +67,7 @@ def get_parser(): type=argparse.FileType('w'), default=None) parser.add_argument('--version', action='version', version='%(prog)s ' + khmer.__version__) - parser.add_argument('-f', '-force', default=False, action='store_true', + parser.add_argument('-f', '--force', default=False, action='store_true', help='Overwrite output file if it exits') return parser From 7b24ad24ce206891a2e9f4a4d4e72a50d5752d28 Mon Sep 17 00:00:00 2001 From: Jessica Mizzi Date: Tue, 18 Nov 2014 16:44:20 -0500 Subject: [PATCH 4/8] fixed some formatting and combined 2 if statements into 1 --- scripts/interleave-reads.py | 5 ++--- scripts/load-into-counting.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/scripts/interleave-reads.py b/scripts/interleave-reads.py index ff77755592..9f79c71a67 100755 --- a/scripts/interleave-reads.py +++ b/scripts/interleave-reads.py @@ -94,9 +94,8 @@ def main(): print >> sys.stderr, "Error! R2 file %s does not exist" % s2_file fail = True - if fail: - if not args.force: - sys.exit(1) + if fail and not args.force: + sys.exit(1) print >> sys.stderr, "Interleaving:\n\t%s\n\t%s" % (s1_file, s2_file) diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py index bc3a535fa5..17b2b86909 100755 --- a/scripts/load-into-counting.py +++ b/scripts/load-into-counting.py @@ -78,7 +78,7 @@ def main(): filenames = args.input_sequence_filename for name in args.input_sequence_filename: - check_file_status(name) + check_file_status(name) check_space(args.input_sequence_filename) check_space_for_hashtable(args.n_tables * args.min_tablesize) @@ -164,8 +164,7 @@ def main(): print >> sys.stderr, "** ERROR: the k-mer counting table is too small", print >> sys.stderr, "for this data set. Increase tablesize/# tables." print >> sys.stderr, "**" - if not args.force: - sys.exit(1) + sys.exit(1) print >>sys.stderr, 'DONE.' print >>sys.stderr, 'wrote to:', base + '.info' From 12b196dd157a70ab26031410b2941fc821311bd0 Mon Sep 17 00:00:00 2001 From: Jessica Mizzi Date: Tue, 18 Nov 2014 16:51:12 -0500 Subject: [PATCH 5/8] Fixed typo --- scripts/do-partition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/do-partition.py b/scripts/do-partition.py index 87cda2f9fd..53a20e4d22 100755 --- a/scripts/do-partition.py +++ b/scripts/do-partition.py @@ -97,7 +97,7 @@ def get_parser(): parser.add_argument('graphbase', help="base name for output files") parser.add_argument('input_filenames', metavar='input_sequence_filename', nargs='+', help='input FAST[AQ] sequence filenames') - parser.add_argument('-f', '-force', default=False, action='store_true', + parser.add_argument('-f', '--force', default=False, action='store_true', help='Overwrite output file if it exists') return parser From ab123c0b0c2084b142a2a7610405781d695cc7a8 Mon Sep 17 00:00:00 2001 From: Jessica Mizzi Date: Tue, 2 Dec 2014 17:23:58 -0500 Subject: [PATCH 6/8] Fixed typos --- khmer/file.py | 3 ++- scripts/sample-reads-randomly.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/khmer/file.py b/khmer/file.py index 0ef2eb1471..0b2ed4b78a 100644 --- a/khmer/file.py +++ b/khmer/file.py @@ -99,7 +99,8 @@ def check_space_for_hashtable(hash_size, _testhook_free_space=None): % (float(hash_size) / 1e9,) print >>sys.stderr, " Free space: %.1f GB" \ % (float(free_space) / 1e9,) - sys.exit(1) + if not force: + sys.exit(1) def check_valid_file_exists(in_files): diff --git a/scripts/sample-reads-randomly.py b/scripts/sample-reads-randomly.py index 364df34705..358d535cb8 100755 --- a/scripts/sample-reads-randomly.py +++ b/scripts/sample-reads-randomly.py @@ -105,7 +105,7 @@ def main(): sys.stderr.write( "Error: cannot specify -o with more than one sample.") if not args.force: - sys.exit(-1) + sys.exit(1) output_filename = output_file.name else: filename = args.filenames[0] From eea8efa3a4b0262af363d00697f82ba5914da199 Mon Sep 17 00:00:00 2001 From: Jessica Mizzi Date: Fri, 5 Dec 2014 13:53:35 -0500 Subject: [PATCH 7/8] Added force option to all scripts to script IO sanity checks and updated tests to match --- ChangeLog | 11 +++++++++++ khmer/file.py | 15 +++++++++------ sandbox/sweep-reads.py | 7 ++++--- scripts/abundance-dist-single.py | 6 ++++-- scripts/abundance-dist.py | 4 +++- scripts/annotate-partitions.py | 8 +++++--- scripts/count-median.py | 6 ++++-- scripts/count-overlap.py | 7 ++++--- scripts/do-partition.py | 4 ++-- scripts/extract-paired-reads.py | 6 ++++-- scripts/extract-partitions.py | 6 ++++-- scripts/filter-abund-single.py | 8 +++++--- scripts/filter-abund.py | 6 ++++-- scripts/filter-stoptags.py | 7 ++++--- scripts/interleave-reads.py | 4 ++-- scripts/load-graph.py | 6 +++--- scripts/load-into-counting.py | 6 +++--- scripts/make-initial-stoptags.py | 6 ++++-- scripts/merge-partitions.py | 6 ++++-- scripts/normalize-by-median.py | 6 ++++-- scripts/partition-graph.py | 6 ++++-- scripts/sample-reads-randomly.py | 4 ++-- scripts/split-paired-reads.py | 6 ++++-- setup.cfg | 2 +- tests/test_script_arguments.py | 28 ++++++++++++++++++++++++++-- tests/test_scripts.py | 4 ++-- 26 files changed, 126 insertions(+), 59 deletions(-) diff --git a/ChangeLog b/ChangeLog index cba74f4675..b1ed942e60 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2014-12-17 Jessica Mizzi + + * khmer/file.py,sandbox/sweep-reads.py,scripts/{abundance-dist-single, + abundance-dist,annotate-partitions,count-median,count-overlap,do-partition, + extract-paired-reads,extract-partitions,filter-abund-single,filter-abund, + filter-stoptags,interleave-reads,load-graph,load-into-counting, + make-initial-stoptags,merge-partitions,normalize-by-median,partition-graph, + sample-reads-randomly,split-paired-reads}.py,setup.cfg, + tests/{test_script_arguments,test_scripts}.py: Added force option to all + scripts to script IO sanity checks and updated tests to match. + 2014-12-17 Michael R. Crusoe * scripts/load-graph.py,khmer/_khmermodule.cc: restore threading to diff --git a/khmer/file.py b/khmer/file.py index 0b2ed4b78a..5e6171cff4 100644 --- a/khmer/file.py +++ b/khmer/file.py @@ -14,7 +14,7 @@ from stat import S_ISBLK, S_ISFIFO -def check_file_status(file_path): +def check_file_status(file_path, force): """Check the status of the file; if the file is empty or doesn't exist AND if the file is NOT a fifo/block/named pipe then a warning is printed and sys.exit(1) is called @@ -30,15 +30,17 @@ def check_file_status(file_path): if not os.path.exists(file_path): print >>sys.stderr, "ERROR: Input file %s does not exist; exiting" % \ file_path - sys.exit(1) + if not force: + sys.exit(1) else: if os.stat(file_path).st_size == 0: print >>sys.stderr, "ERROR: Input file %s is empty; exiting." % \ file_path - sys.exit(1) + if not force: + sys.exit(1) -def check_space(in_files, _testhook_free_space=None): +def check_space(in_files, force, _testhook_free_space=None): """ Estimate size of input files passed, then calculate disk space available. Exit if insufficient disk space, @@ -74,10 +76,11 @@ def check_space(in_files, _testhook_free_space=None): % (float(total_size) / 1e9,) print >>sys.stderr, " Free space: %.1f GB" \ % (float(free_space) / 1e9,) - sys.exit(1) + if not force: + sys.exit(1) -def check_space_for_hashtable(hash_size, _testhook_free_space=None): +def check_space_for_hashtable(hash_size, force, _testhook_free_space=None): """ Check we have enough size to write a hash table """ diff --git a/sandbox/sweep-reads.py b/sandbox/sweep-reads.py index fbdc068875..81770fb5af 100755 --- a/sandbox/sweep-reads.py +++ b/sandbox/sweep-reads.py @@ -191,7 +191,8 @@ def get_parser(): parser.add_argument(dest='input_fastp', help='Reference fasta or fastp') parser.add_argument('input_files', nargs='+', help='Reads to be swept and sorted') - + parser.add_argument('-f', '--force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -224,13 +225,13 @@ def main(): buf_size = args.buffer_size max_reads = args.max_reads - check_file_status(args.input_fastp) + check_file_status(args.input_fastp, args.force) check_valid_file_exists(args.input_files) all_input_files = [input_fastp] all_input_files.extend(args.input_files) # Check disk space availability - check_space(all_input_files) + check_space(all_input_files, args.force) # figure out input file type (FA/FQ) -- based on first file ix = iter(screed.open(args.input_files[0])) diff --git a/scripts/abundance-dist-single.py b/scripts/abundance-dist-single.py index 77345aea89..5b8d5799fb 100755 --- a/scripts/abundance-dist-single.py +++ b/scripts/abundance-dist-single.py @@ -60,6 +60,8 @@ def get_parser(): "filename.") parser.add_argument('--report-total-kmers', '-t', action='store_true', help="Prints the total number of k-mers to stderr") + parser.add_argument('-f', '--force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -68,8 +70,8 @@ def main(): # pylint: disable=too-many-locals,too-many-branches args = get_parser().parse_args() report_on_config(args) - check_file_status(args.input_sequence_filename) - check_space([args.input_sequence_filename]) + check_file_status(args.input_sequence_filename, args.force) + check_space([args.input_sequence_filename], args.force) if args.savetable: check_space_for_hashtable(args.n_tables * args.min_tablesize) diff --git a/scripts/abundance-dist.py b/scripts/abundance-dist.py index 5ae61d297d..36edf1786c 100755 --- a/scripts/abundance-dist.py +++ b/scripts/abundance-dist.py @@ -44,6 +44,8 @@ def get_parser(): help='Overwrite output file if it exists') parser.add_argument('--version', action='version', version='%(prog)s ' + khmer.__version__) + parser.add_argument('-f', '--force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -53,7 +55,7 @@ def main(): infiles = [args.input_counting_table_filename, args.input_sequence_filename] for infile in infiles: - check_file_status(infile) + check_file_status(infile, args.force) print ('hashtable from', args.input_counting_table_filename, file=sys.stderr) diff --git a/scripts/annotate-partitions.py b/scripts/annotate-partitions.py index a749a5d6a9..91faa24a97 100755 --- a/scripts/annotate-partitions.py +++ b/scripts/annotate-partitions.py @@ -55,6 +55,8 @@ def get_parser(): 'annotate.') parser.add_argument('--version', action='version', version='%(prog)s ' + khmer.__version__) + parser.add_argument('-f', '--force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -68,11 +70,11 @@ def main(): partitionmap_file = args.graphbase + '.pmap.merged' - check_file_status(partitionmap_file) + check_file_status(partitionmap_file, args.force) for _ in filenames: - check_file_status(_) + check_file_status(_, args.force) - check_space(filenames) + check_space(filenames, args.force) print >>sys.stderr, 'loading partition map from:', partitionmap_file htable.load_partitionmap(partitionmap_file) diff --git a/scripts/count-median.py b/scripts/count-median.py index efb0d466de..c912ebaf73 100755 --- a/scripts/count-median.py +++ b/scripts/count-median.py @@ -51,6 +51,8 @@ def get_parser(): help='output summary filename') parser.add_argument('--version', action='version', version='%(prog)s ' + khmer.__version__) + parser.add_argument('-f', '--force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -64,9 +66,9 @@ def main(): infiles = [htfile, input_filename] for infile in infiles: - check_file_status(infile) + check_file_status(infile, args.force) - check_space(infiles) + check_space(infiles, args.force) print >>sys.stderr, 'loading k-mer counting table from', htfile htable = khmer.load_counting_hash(htfile) diff --git a/scripts/count-overlap.py b/scripts/count-overlap.py index 9d1f131294..aabb895489 100755 --- a/scripts/count-overlap.py +++ b/scripts/count-overlap.py @@ -44,7 +44,8 @@ def get_parser(): help="input sequence filename") parser.add_argument('report_filename', metavar='output_report_filename', help='output report filename') - + parser.add_argument('-f', '--force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -54,9 +55,9 @@ def main(): report_on_config(args, hashtype='hashbits') for infile in [args.ptfile, args.fafile]: - check_file_status(infile) + check_file_status(infile, args.force) - check_space([args.ptfile, args.fafile]) + check_space([args.ptfile, args.fafile], args.force) print >>sys.stderr, 'loading k-mer presence table from', args.ptfile ht1 = khmer.load_hashbits(args.ptfile) diff --git a/scripts/do-partition.py b/scripts/do-partition.py index 53a20e4d22..9c499715c2 100755 --- a/scripts/do-partition.py +++ b/scripts/do-partition.py @@ -110,9 +110,9 @@ def main(): # pylint: disable=too-many-locals,too-many-statements report_on_config(args, hashtype='hashbits') for infile in args.input_filenames: - check_file_status(infile) + check_file_status(infile, args.force) - check_space(args.input_filenames) + check_space(args.input_filenames, args.force) print >>sys.stderr, 'Saving k-mer presence table to %s' % args.graphbase print >>sys.stderr, 'Loading kmers from sequences in %s' % \ diff --git a/scripts/extract-paired-reads.py b/scripts/extract-paired-reads.py index 96d3c86507..f589deea22 100755 --- a/scripts/extract-paired-reads.py +++ b/scripts/extract-paired-reads.py @@ -74,6 +74,8 @@ def get_parser(): parser.add_argument('infile') parser.add_argument('--version', action='version', version='%(prog)s ' + khmer.__version__) + parser.add_argument('-f', '--force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -81,9 +83,9 @@ def main(): info('extract-paired-reads.py') args = get_parser().parse_args() - check_file_status(args.infile) + check_file_status(args.infile, args.force) infiles = [args.infile] - check_space(infiles) + check_space(infiles, args.force) outfile = os.path.basename(args.infile) if len(sys.argv) > 2: diff --git a/scripts/extract-partitions.py b/scripts/extract-partitions.py index f0e714f7bd..410dd2df06 100755 --- a/scripts/extract-partitions.py +++ b/scripts/extract-partitions.py @@ -81,6 +81,8 @@ def get_parser(): help='Output unassigned sequences, too') parser.add_argument('--version', action='version', version='%(prog)s ' + khmer.__version__) + parser.add_argument('-f', '--force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -94,9 +96,9 @@ def main(): # pylint: disable=too-many-locals,too-many-branches n_unassigned = 0 for infile in args.part_filenames: - check_file_status(infile) + check_file_status(infile, args.force) - check_space(args.part_filenames) + check_space(args.part_filenames, args.force) print >>sys.stderr, '---' print >>sys.stderr, 'reading partitioned files:', repr(args.part_filenames) diff --git a/scripts/filter-abund-single.py b/scripts/filter-abund-single.py index 7a290761ce..9471e76899 100755 --- a/scripts/filter-abund-single.py +++ b/scripts/filter-abund-single.py @@ -56,16 +56,18 @@ def get_parser(): help="FAST[AQ] sequence file to trim") parser.add_argument('--report-total-kmers', '-t', action='store_true', help="Prints the total number of k-mers to stderr") + parser.add_argument('-f', '--force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser def main(): info('filter-abund-single.py', ['counting']) args = get_parser().parse_args() - check_file_status(args.datafile) - check_space([args.datafile]) + check_file_status(args.datafile, args.force) + check_space([args.datafile],args.force) if args.savetable: - check_space_for_hashtable(args.n_tables * args.min_tablesize) + check_space_for_hashtable(args.n_tables * args.min_tablesize, args.force) report_on_config(args) print >>sys.stderr, 'making k-mer counting table' diff --git a/scripts/filter-abund.py b/scripts/filter-abund.py index b5bd08c70e..d9dab6059c 100755 --- a/scripts/filter-abund.py +++ b/scripts/filter-abund.py @@ -67,6 +67,8 @@ def get_parser(): 'file for each input file.') parser.add_argument('--version', action='version', version='khmer {v}'.format(v=__version__)) + parser.add_argument('-f', '--force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -78,9 +80,9 @@ def main(): infiles = args.input_filename for _ in infiles: - check_file_status(_) + check_file_status(_, args.force) - check_space(infiles) + check_space(infiles, args.force) print >>sys.stderr, 'loading hashtable' htable = khmer.load_counting_hash(counting_ht) diff --git a/scripts/filter-stoptags.py b/scripts/filter-stoptags.py index 681bcfb04c..8340c929b6 100755 --- a/scripts/filter-stoptags.py +++ b/scripts/filter-stoptags.py @@ -45,9 +45,10 @@ def get_parser(): nargs='+') parser.add_argument('--version', action='version', version='%(prog)s ' + khmer.__version__) + parser.add_argument('-f', '--force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser - def main(): info('filter-stoptags.py', ['graph']) args = get_parser().parse_args() @@ -55,9 +56,9 @@ def main(): infiles = args.input_filenames for _ in infiles: - check_file_status(_) + check_file_status(_, args.force) - check_space(infiles) + check_space(infiles, args.force) print >>sys.stderr, 'loading stop tags, with K', args.ksize htable = khmer.new_hashbits(args.ksize, 1, 1) diff --git a/scripts/interleave-reads.py b/scripts/interleave-reads.py index 9f79c71a67..ca33e8c201 100755 --- a/scripts/interleave-reads.py +++ b/scripts/interleave-reads.py @@ -73,9 +73,9 @@ def main(): args = get_parser().parse_args() for _ in args.infiles: - check_file_status(_) + check_file_status(_, args.force) - check_space(args.infiles) + check_space(args.infiles, args.force) s1_file = args.infiles[0] if len(args.infiles) == 2: diff --git a/scripts/load-graph.py b/scripts/load-graph.py index 58a95d8b43..43b429bb7e 100755 --- a/scripts/load-graph.py +++ b/scripts/load-graph.py @@ -54,10 +54,10 @@ def main(): filenames = args.input_filenames for _ in args.input_filenames: - check_file_status(_) + check_file_status(_, args.force) - check_space(args.input_filenames) - check_space_for_hashtable(float(args.n_tables * args.min_tablesize) / 8.) + check_space(args.input_filenames, args.force) + check_space_for_hashtable((float(args.n_tables * args.min_tablesize) / 8.), args.force) print >>sys.stderr, 'Saving k-mer presence table to %s' % base print >>sys.stderr, 'Loading kmers from sequences in %s' % repr(filenames) diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py index 17b2b86909..8a728f1279 100755 --- a/scripts/load-into-counting.py +++ b/scripts/load-into-counting.py @@ -78,10 +78,10 @@ def main(): filenames = args.input_sequence_filename for name in args.input_sequence_filename: - check_file_status(name) + check_file_status(name, args.force) - check_space(args.input_sequence_filename) - check_space_for_hashtable(args.n_tables * args.min_tablesize) + check_space(args.input_sequence_filename, args.force) + check_space_for_hashtable(args.n_tables * args.min_tablesize, args.force) print >>sys.stderr, 'Saving k-mer counting table to %s' % base print >>sys.stderr, 'Loading kmers from sequences in %s' % repr(filenames) diff --git a/scripts/make-initial-stoptags.py b/scripts/make-initial-stoptags.py index 093f9b2acc..35f8524188 100755 --- a/scripts/make-initial-stoptags.py +++ b/scripts/make-initial-stoptags.py @@ -62,6 +62,8 @@ def get_parser(): help="Use stoptags in this file during partitioning") parser.add_argument('graphbase', help='basename for input and output ' 'filenames') + parser.add_argument('-f', '--force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -77,9 +79,9 @@ def main(): if args.stoptags: infiles.append(args.stoptags) for _ in infiles: - check_file_status(_) + check_file_status(_, args.force) - check_space(infiles) + check_space(infiles, args.force) print >>sys.stderr, 'loading htable %s.pt' % graphbase htable = khmer.load_hashbits(graphbase + '.pt') diff --git a/scripts/merge-partitions.py b/scripts/merge-partitions.py index cc64773627..18aae51966 100755 --- a/scripts/merge-partitions.py +++ b/scripts/merge-partitions.py @@ -44,6 +44,8 @@ def get_parser(): 'files') parser.add_argument('--version', action='version', version='%(prog)s ' + khmer.__version__) + parser.add_argument('-f', '--force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -61,9 +63,9 @@ def main(): htable = khmer.new_hashbits(ksize, 1, 1) for _ in pmap_files: - check_file_status(_) + check_file_status(_, args.force) - check_space(pmap_files) + check_space(pmap_files, args.force) for pmap_file in pmap_files: print >>sys.stderr, 'merging', pmap_file diff --git a/scripts/normalize-by-median.py b/scripts/normalize-by-median.py index 4a199530b9..cc079c44e0 100755 --- a/scripts/normalize-by-median.py +++ b/scripts/normalize-by-median.py @@ -208,6 +208,8 @@ def get_parser(): parser.add_argument('--report-total-kmers', '-t', action='store_true', help="Prints the total number of k-mers" " post-normalization to stderr") + parser.add_argument('--force', default=False, action='store_true', + help='Overwrite output file if it exists') add_loadhash_args(parser) return parser @@ -221,9 +223,9 @@ def main(): # pylint: disable=too-many-branches,too-many-statements report_fp = args.report check_valid_file_exists(args.input_filenames) - check_space(args.input_filenames) + check_space(args.input_filenames, args.force) if args.savetable: - check_space_for_hashtable(args.n_tables * args.min_tablesize) + check_space_for_hashtable(args.n_tables * args.min_tablesize, args.force) # list to save error files along with throwing exceptions if args.force: diff --git a/scripts/partition-graph.py b/scripts/partition-graph.py index a37260eabd..250e4044ab 100755 --- a/scripts/partition-graph.py +++ b/scripts/partition-graph.py @@ -90,6 +90,8 @@ def get_parser(): 'traversals') parser.add_argument('--version', action='version', version='%(prog)s ' + khmer.__version__) + parser.add_argument('-f', '--force', default=False, action='store_true', + help='Overwrite output file if it exists') add_threading_args(parser) return parser @@ -101,9 +103,9 @@ def main(): filenames = [basename + '.pt', basename + '.tagset'] for _ in filenames: - check_file_status(_) + check_file_status(_, args.force) - check_space(filenames) + check_space(filenames, args.force) print >>sys.stderr, '--' print >>sys.stderr, 'SUBSET SIZE', args.subset_size diff --git a/scripts/sample-reads-randomly.py b/scripts/sample-reads-randomly.py index 358d535cb8..755c8a63b0 100755 --- a/scripts/sample-reads-randomly.py +++ b/scripts/sample-reads-randomly.py @@ -84,9 +84,9 @@ def main(): args = get_parser().parse_args() for _ in args.filenames: - check_file_status(_) + check_file_status(_, args.force) - check_space(args.filenames) + check_space(args.filenames, args.force) # seed the random number generator? if args.random_seed: diff --git a/scripts/split-paired-reads.py b/scripts/split-paired-reads.py index 09521e5cbe..fbb4ead6ec 100755 --- a/scripts/split-paired-reads.py +++ b/scripts/split-paired-reads.py @@ -42,6 +42,8 @@ def get_parser(): parser.add_argument('infile') parser.add_argument('--version', action='version', version='%(prog)s ' + khmer.__version__) + parser.add_argument('-f', '--force', default=False, action='store_true', + help='Overwrite output file if it exists') return parser @@ -51,9 +53,9 @@ def main(): infile = args.infile - check_file_status(infile) + check_file_status(infile, args.force) filenames = [infile] - check_space(filenames) + check_space(filenames, args.force) out1 = os.path.basename(infile) + '.1' out2 = os.path.basename(infile) + '.2' diff --git a/setup.cfg b/setup.cfg index c1ce118a54..414e272846 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [nosetests] verbosity = 2 -stop = TRUE +#stop = TRUE attr = !known_failing,!jenkins #processes = -1 # breaks xunit output #attr = !known_failing,!highmem diff --git a/tests/test_script_arguments.py b/tests/test_script_arguments.py index ba850f7e71..ac91f0a4dd 100644 --- a/tests/test_script_arguments.py +++ b/tests/test_script_arguments.py @@ -20,7 +20,7 @@ def test_check_space(): save_stderr, sys.stderr = sys.stderr, cStringIO.StringIO() try: - khmer.file.check_space([fakelump_fa], _testhook_free_space=0) + khmer.file.check_space([fakelump_fa], force=False, _testhook_free_space=0) assert 0, "this should fail" except SystemExit as e: print str(e) @@ -31,9 +31,33 @@ def test_check_space(): def test_check_tablespace(): save_stderr, sys.stderr = sys.stderr, cStringIO.StringIO() try: - khmer.file.check_space_for_hashtable(1e9, _testhook_free_space=0) + khmer.file.check_space_for_hashtable(1e9, force=False, _testhook_free_space=0) assert 0, "this should fail" except SystemExit as e: print str(e) finally: sys.stderr = save_stderr + + +def test_check_space_force(): + fakelump_fa = utils.get_test_data('fakelump.fa') + + save_stderr, sys.stderr = sys.stderr, cStringIO.StringIO() + try: + khmer.file.check_space([fakelump_fa], force=True, _testhook_free_space=0) + assert True, "this should pass" + except SystemExit as e: + print str(e) + finally: + sys.stderr = save_stderr + + +def test_check_tablespace_force(): + save_stderr, sys.stderr = sys.stderr, cStringIO.StringIO() + try: + khmer.file.check_space_for_hashtable(1e9, force=True, _testhook_free_space=0) + assert True, "this should pass" + except SystemExit as e: + print str(e) + finally: + sys.stderr = save_stderr diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 8881274822..c8e3757978 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -36,7 +36,7 @@ def teardown(): def test_check_space(): # @CTB this probably belongs in a new test file, along with other # tests of the file.py module. - khmer.file.check_space(['', utils.get_test_data('test-abund-read-2.fa')]) + khmer.file.check_space(['', utils.get_test_data('test-abund-read-2.fa')], False) def test_load_into_counting(): @@ -1555,7 +1555,7 @@ def test_sample_reads_randomly_S(): badargs = list(args) badargs.extend(['-o', 'test', 'test.fq', 'test.fq']) (status, out, err) = utils.runscript(script, badargs, in_dir, fail_ok=True) - assert status == -1, (status, out, err) + assert status == 1, (status, out, err) args.append('test.fq') From 4bab69a66e4a52dd2b0cff543ff2d87cef017612 Mon Sep 17 00:00:00 2001 From: Jessica Mizzi Date: Fri, 19 Dec 2014 14:17:07 -0500 Subject: [PATCH 8/8] pep8 --- khmer/file.py | 2 +- scripts/filter-abund-single.py | 5 +++-- scripts/filter-stoptags.py | 1 + scripts/load-graph.py | 3 ++- scripts/load-into-counting.py | 2 +- scripts/normalize-by-median.py | 3 ++- tests/test_script_arguments.py | 12 ++++++++---- tests/test_scripts.py | 3 ++- 8 files changed, 20 insertions(+), 11 deletions(-) diff --git a/khmer/file.py b/khmer/file.py index 5e6171cff4..27b51e4a51 100644 --- a/khmer/file.py +++ b/khmer/file.py @@ -37,7 +37,7 @@ def check_file_status(file_path, force): print >>sys.stderr, "ERROR: Input file %s is empty; exiting." % \ file_path if not force: - sys.exit(1) + sys.exit(1) def check_space(in_files, force, _testhook_free_space=None): diff --git a/scripts/filter-abund-single.py b/scripts/filter-abund-single.py index 9471e76899..f5ff0bc5cd 100755 --- a/scripts/filter-abund-single.py +++ b/scripts/filter-abund-single.py @@ -65,9 +65,10 @@ def main(): info('filter-abund-single.py', ['counting']) args = get_parser().parse_args() check_file_status(args.datafile, args.force) - check_space([args.datafile],args.force) + check_space([args.datafile], args.force) if args.savetable: - check_space_for_hashtable(args.n_tables * args.min_tablesize, args.force) + check_space_for_hashtable( + args.n_tables * args.min_tablesize, args.force) report_on_config(args) print >>sys.stderr, 'making k-mer counting table' diff --git a/scripts/filter-stoptags.py b/scripts/filter-stoptags.py index 8340c929b6..dde8fb5740 100755 --- a/scripts/filter-stoptags.py +++ b/scripts/filter-stoptags.py @@ -49,6 +49,7 @@ def get_parser(): help='Overwrite output file if it exists') return parser + def main(): info('filter-stoptags.py', ['graph']) args = get_parser().parse_args() diff --git a/scripts/load-graph.py b/scripts/load-graph.py index 43b429bb7e..b0d15a200f 100755 --- a/scripts/load-graph.py +++ b/scripts/load-graph.py @@ -57,7 +57,8 @@ def main(): check_file_status(_, args.force) check_space(args.input_filenames, args.force) - check_space_for_hashtable((float(args.n_tables * args.min_tablesize) / 8.), args.force) + check_space_for_hashtable( + (float(args.n_tables * args.min_tablesize) / 8.), args.force) print >>sys.stderr, 'Saving k-mer presence table to %s' % base print >>sys.stderr, 'Loading kmers from sequences in %s' % repr(filenames) diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py index 8a728f1279..501f89008e 100755 --- a/scripts/load-into-counting.py +++ b/scripts/load-into-counting.py @@ -78,7 +78,7 @@ def main(): filenames = args.input_sequence_filename for name in args.input_sequence_filename: - check_file_status(name, args.force) + check_file_status(name, args.force) check_space(args.input_sequence_filename, args.force) check_space_for_hashtable(args.n_tables * args.min_tablesize, args.force) diff --git a/scripts/normalize-by-median.py b/scripts/normalize-by-median.py index cc079c44e0..956336ac99 100755 --- a/scripts/normalize-by-median.py +++ b/scripts/normalize-by-median.py @@ -225,7 +225,8 @@ def main(): # pylint: disable=too-many-branches,too-many-statements check_valid_file_exists(args.input_filenames) check_space(args.input_filenames, args.force) if args.savetable: - check_space_for_hashtable(args.n_tables * args.min_tablesize, args.force) + check_space_for_hashtable( + args.n_tables * args.min_tablesize, args.force) # list to save error files along with throwing exceptions if args.force: diff --git a/tests/test_script_arguments.py b/tests/test_script_arguments.py index ac91f0a4dd..9af1a753ea 100644 --- a/tests/test_script_arguments.py +++ b/tests/test_script_arguments.py @@ -20,7 +20,8 @@ def test_check_space(): save_stderr, sys.stderr = sys.stderr, cStringIO.StringIO() try: - khmer.file.check_space([fakelump_fa], force=False, _testhook_free_space=0) + khmer.file.check_space( + [fakelump_fa], force=False, _testhook_free_space=0) assert 0, "this should fail" except SystemExit as e: print str(e) @@ -31,7 +32,8 @@ def test_check_space(): def test_check_tablespace(): save_stderr, sys.stderr = sys.stderr, cStringIO.StringIO() try: - khmer.file.check_space_for_hashtable(1e9, force=False, _testhook_free_space=0) + khmer.file.check_space_for_hashtable( + 1e9, force=False, _testhook_free_space=0) assert 0, "this should fail" except SystemExit as e: print str(e) @@ -44,7 +46,8 @@ def test_check_space_force(): save_stderr, sys.stderr = sys.stderr, cStringIO.StringIO() try: - khmer.file.check_space([fakelump_fa], force=True, _testhook_free_space=0) + khmer.file.check_space( + [fakelump_fa], force=True, _testhook_free_space=0) assert True, "this should pass" except SystemExit as e: print str(e) @@ -55,7 +58,8 @@ def test_check_space_force(): def test_check_tablespace_force(): save_stderr, sys.stderr = sys.stderr, cStringIO.StringIO() try: - khmer.file.check_space_for_hashtable(1e9, force=True, _testhook_free_space=0) + khmer.file.check_space_for_hashtable( + 1e9, force=True, _testhook_free_space=0) assert True, "this should pass" except SystemExit as e: print str(e) diff --git a/tests/test_scripts.py b/tests/test_scripts.py index c8e3757978..a111c1b910 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -36,7 +36,8 @@ def teardown(): def test_check_space(): # @CTB this probably belongs in a new test file, along with other # tests of the file.py module. - khmer.file.check_space(['', utils.get_test_data('test-abund-read-2.fa')], False) + khmer.file.check_space( + ['', utils.get_test_data('test-abund-read-2.fa')], False) def test_load_into_counting():