From 7c307822e4e1b1b8ec22a13849c27a9885896ab7 Mon Sep 17 00:00:00 2001 From: Jake Fenton Date: Fri, 10 Jul 2015 16:05:52 -0400 Subject: [PATCH 01/11] initial/working changes --- khmer/kfile.py | 4 ++-- scripts/load-into-counting.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/khmer/kfile.py b/khmer/kfile.py index 9a01f590d9..5e77a7aa9c 100644 --- a/khmer/kfile.py +++ b/khmer/kfile.py @@ -117,8 +117,8 @@ def check_space_for_hashtable(args, hashtype, force, """Check we have enough size to write a hash table.""" hash_size = khmer_args._calculate_tablesize(args, hashtype) - cwd = os.getcwd() - dir_path = os.path.dirname(os.path.realpath(cwd)) + #cwd = os.getcwd() + dir_path = os.path.dirname(os.path.realpath(args.output_filename)) target = os.statvfs(dir_path) if _testhook_free_space is None: free_space = target.f_frsize * target.f_bavail diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py index f907c36723..0ad11c8bbe 100755 --- a/scripts/load-into-counting.py +++ b/scripts/load-into-counting.py @@ -24,7 +24,7 @@ from khmer.khmer_args import build_counting_args, report_on_config, info,\ add_threading_args from khmer.kfile import check_file_writable -from khmer.kfile import check_input_files, check_space +from khmer.kfile import check_input_files from khmer.kfile import check_space_for_hashtable @@ -84,7 +84,6 @@ def main(): for name in args.input_sequence_filename: check_input_files(name, args.force) - check_space(args.input_sequence_filename, args.force) check_space_for_hashtable(args, 'countgraph', args.force) check_file_writable(base) From 9d1e88148ba26e086b4ef0bf1791ae32ff5a192f Mon Sep 17 00:00:00 2001 From: Jake Fenton Date: Thu, 16 Jul 2015 14:43:01 -0400 Subject: [PATCH 02/11] removed additionally unneeded space checks --- scripts/abundance-dist-single.py | 4 +--- scripts/count-overlap.py | 4 +--- scripts/make-initial-stoptags.py | 4 +--- scripts/partition-graph.py | 4 +--- scripts/sample-reads-randomly.py | 4 +--- 5 files changed, 5 insertions(+), 15 deletions(-) diff --git a/scripts/abundance-dist-single.py b/scripts/abundance-dist-single.py index 10d2109c77..bb85618a7c 100755 --- a/scripts/abundance-dist-single.py +++ b/scripts/abundance-dist-single.py @@ -25,8 +25,7 @@ from khmer import khmer_args from khmer.khmer_args import (build_counting_args, add_threading_args, report_on_config, info) -from khmer.kfile import (check_input_files, check_space, - check_space_for_hashtable) +from khmer.kfile import (check_input_files, check_space_for_hashtable) def get_parser(): @@ -78,7 +77,6 @@ def main(): # pylint: disable=too-many-locals,too-many-branches report_on_config(args) check_input_files(args.input_sequence_filename, args.force) - check_space([args.input_sequence_filename], args.force) if args.savetable: check_space_for_hashtable(args, 'countgraph', args.force) diff --git a/scripts/count-overlap.py b/scripts/count-overlap.py index a8c715d161..1016b10149 100755 --- a/scripts/count-overlap.py +++ b/scripts/count-overlap.py @@ -24,7 +24,7 @@ import khmer import textwrap from khmer import khmer_args -from khmer.kfile import check_input_files, check_space +from khmer.kfile import check_input_files from khmer.khmer_args import (build_hashbits_args, report_on_config, info) @@ -60,8 +60,6 @@ def main(): for infile in [args.ptfile, args.fafile]: check_input_files(infile, args.force) - check_space([args.ptfile, args.fafile], args.force) - print('loading k-mer presence table from', args.ptfile, file=sys.stderr) ht1 = khmer.load_hashbits(args.ptfile) kmer_size = ht1.ksize() diff --git a/scripts/make-initial-stoptags.py b/scripts/make-initial-stoptags.py index 29a08ef7d7..e99a690275 100755 --- a/scripts/make-initial-stoptags.py +++ b/scripts/make-initial-stoptags.py @@ -18,7 +18,7 @@ import khmer from khmer import khmer_args from khmer.khmer_args import (build_counting_args, info) -from khmer.kfile import check_input_files, check_space +from khmer.kfile import check_input_files DEFAULT_SUBSET_SIZE = int(1e4) DEFAULT_COUNTING_HT_SIZE = 3e6 # number of bytes @@ -83,8 +83,6 @@ def main(): for _ in infiles: check_input_files(_, args.force) - check_space(infiles, args.force) - print('loading htable %s.pt' % graphbase, file=sys.stderr) htable = khmer.load_hashbits(graphbase + '.pt') diff --git a/scripts/partition-graph.py b/scripts/partition-graph.py index 73666e2040..bb03c0b655 100755 --- a/scripts/partition-graph.py +++ b/scripts/partition-graph.py @@ -24,7 +24,7 @@ import khmer import sys from khmer.khmer_args import (add_threading_args, info) -from khmer.kfile import check_input_files, check_space +from khmer.kfile import check_input_files # Debugging Support import re @@ -111,8 +111,6 @@ def main(): for _ in filenames: check_input_files(_, args.force) - check_space(filenames, args.force) - print('--', file=sys.stderr) print('SUBSET SIZE', args.subset_size, file=sys.stderr) print('N THREADS', args.threads, file=sys.stderr) diff --git a/scripts/sample-reads-randomly.py b/scripts/sample-reads-randomly.py index 79b4777c33..3cdc4260bb 100755 --- a/scripts/sample-reads-randomly.py +++ b/scripts/sample-reads-randomly.py @@ -27,7 +27,7 @@ import sys import khmer -from khmer.kfile import check_input_files, check_space +from khmer.kfile import check_input_files from khmer.khmer_args import info from khmer.utils import write_record, broken_paired_reader @@ -85,8 +85,6 @@ def main(): for _ in args.filenames: check_input_files(_, args.force) - check_space(args.filenames, args.force) - # seed the random number generator? if args.random_seed: random.seed(args.random_seed) From 060f2a8742aa70eb317c4723713b9f9e9a4d2da2 Mon Sep 17 00:00:00 2001 From: Jake Fenton Date: Thu, 16 Jul 2015 15:53:46 -0400 Subject: [PATCH 03/11] working changes--refactoring argument structs for utility functions --- khmer/kfile.py | 8 +++++--- khmer/khmer_args.py | 18 +++++++++++------- oxli/build_graph.py | 4 +++- scripts/load-into-counting.py | 4 +++- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/khmer/kfile.py b/khmer/kfile.py index 5e77a7aa9c..81a67ce8ac 100644 --- a/khmer/kfile.py +++ b/khmer/kfile.py @@ -112,13 +112,15 @@ def check_space(in_files, force, _testhook_free_space=None): sys.exit(1) -def check_space_for_hashtable(args, hashtype, force, +def check_space_for_hashtable(outfile_name, hashtype, force, n_tables, + max_tablesize, max_mem=None, _testhook_free_space=None): """Check we have enough size to write a hash table.""" - hash_size = khmer_args._calculate_tablesize(args, hashtype) + hash_size = khmer_args._calculate_tablesize(max_tablesize, n_tables, + hashtype, max_memory_use=max_mem) #cwd = os.getcwd() - dir_path = os.path.dirname(os.path.realpath(args.output_filename)) + dir_path = os.path.dirname(os.path.realpath(outfile_name)) target = os.statvfs(dir_path) if _testhook_free_space is None: free_space = target.f_frsize * target.f_bavail diff --git a/khmer/khmer_args.py b/khmer/khmer_args.py index 7782af7c07..47f3514644 100644 --- a/khmer/khmer_args.py +++ b/khmer/khmer_args.py @@ -133,19 +133,20 @@ def __call__(self, parser, namespace, values, option_string=None): action=LoadAction) -def _calculate_tablesize(args, hashtype, multiplier=1.0): +def _calculate_tablesize(max_tablesize, n_tables, hashtype, multiplier=1.0, + max_memory_use=None): if hashtype not in ('countgraph', 'nodegraph'): raise Exception("unknown graph type: %s" % (hashtype,)) - if args.max_memory_usage: + if max_memory_use: if hashtype == 'countgraph': - tablesize = args.max_memory_usage / args.n_tables / \ + tablesize = max_memory_use / n_tables / \ float(multiplier) elif hashtype == 'nodegraph': - tablesize = 8. * args.max_memory_usage / args.n_tables / \ + tablesize = 8. * max_memory_use / n_tables / \ float(multiplier) else: - tablesize = args.max_tablesize + tablesize = max_tablesize return tablesize @@ -168,7 +169,9 @@ def create_countgraph(args, ksize=None, multiplier=1.0): print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n") sys.exit(1) - tablesize = _calculate_tablesize(args, 'countgraph', multiplier=multiplier) + tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables, + 'countgraph', multiplier=multiplier, + max_memory_use=args.max_memory_usage) return khmer.CountingHash(ksize, tablesize, args.n_tables) @@ -185,7 +188,8 @@ def report_on_config(args, hashtype='countgraph'): if args.quiet: return - tablesize = _calculate_tablesize(args, hashtype) + tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables, + hashtype, max_memory_use=args.max_memory_usage) print_error("\nPARAMETERS:") print_error(" - kmer size = {0} \t\t(-k)".format(args.ksize)) diff --git a/oxli/build_graph.py b/oxli/build_graph.py index b58d2af080..8e32380eda 100644 --- a/oxli/build_graph.py +++ b/oxli/build_graph.py @@ -55,7 +55,9 @@ def main(args): args = functions.do_sanity_checking(args, 0.01) check_space(args.input_filenames, args.force) - check_space_for_hashtable(args, 'nodegraph', args.force) + check_space_for_hashtable(output_filename, 'nodegraph', args.force, + args.n_tables, ars.max_tablesize, + args.max_memory_usage) print('Saving k-mer presence table to %s' % base, file=sys.stderr) print('Loading kmers from sequences in %s' % diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py index 0ad11c8bbe..461de31603 100755 --- a/scripts/load-into-counting.py +++ b/scripts/load-into-counting.py @@ -84,7 +84,9 @@ def main(): for name in args.input_sequence_filename: check_input_files(name, args.force) - check_space_for_hashtable(args, 'countgraph', args.force) + check_space_for_hashtable(args.output_countingtable_filename, 'countgraph', + args.force, args.n_tables, args.max_tablesize, + args.max_memory_usage) check_file_writable(base) check_file_writable(base + ".info") From 1e1d3d3c38acbf5c8d6e1f2148aa9ebf65ec5e63 Mon Sep 17 00:00:00 2001 From: Jake Fenton Date: Thu, 16 Jul 2015 16:49:12 -0400 Subject: [PATCH 04/11] finished refactoring everything, made stuff play nice together --- khmer/kfile.py | 7 ++++--- khmer/khmer_args.py | 11 +++++++---- oxli/build_graph.py | 4 ++-- sandbox/collect-reads.py | 4 +++- scripts/abundance-dist-single.py | 4 +++- scripts/filter-abund-single.py | 4 +++- scripts/load-into-counting.py | 6 ++++-- scripts/normalize-by-median.py | 4 +++- scripts/trim-low-abund.py | 4 +++- tests/test_script_arguments.py | 15 ++++++++++++--- 10 files changed, 44 insertions(+), 19 deletions(-) diff --git a/khmer/kfile.py b/khmer/kfile.py index 81a67ce8ac..3e3b4535eb 100644 --- a/khmer/kfile.py +++ b/khmer/kfile.py @@ -116,12 +116,13 @@ def check_space_for_hashtable(outfile_name, hashtype, force, n_tables, max_tablesize, max_mem=None, _testhook_free_space=None): """Check we have enough size to write a hash table.""" - hash_size = khmer_args._calculate_tablesize(max_tablesize, n_tables, - hashtype, max_memory_use=max_mem) + hash_size = khmer_args._calculate_tablesize(max_tablesize, n_tables, + hashtype, + max_memory_use=max_mem) - #cwd = os.getcwd() dir_path = os.path.dirname(os.path.realpath(outfile_name)) target = os.statvfs(dir_path) + if _testhook_free_space is None: free_space = target.f_frsize * target.f_bavail else: diff --git a/khmer/khmer_args.py b/khmer/khmer_args.py index 47f3514644..15373ced64 100644 --- a/khmer/khmer_args.py +++ b/khmer/khmer_args.py @@ -158,7 +158,9 @@ def create_nodegraph(args, ksize=None, multiplier=1.0): print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n") sys.exit(1) - tablesize = _calculate_tablesize(args, 'nodegraph', multiplier=multiplier) + tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables, + 'nodegraph', multiplier, + args.max_memory_usage) return khmer.Hashbits(ksize, tablesize, args.n_tables) @@ -169,7 +171,7 @@ def create_countgraph(args, ksize=None, multiplier=1.0): print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n") sys.exit(1) - tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables, + tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables, 'countgraph', multiplier=multiplier, max_memory_use=args.max_memory_usage) return khmer.CountingHash(ksize, tablesize, args.n_tables) @@ -188,8 +190,9 @@ def report_on_config(args, hashtype='countgraph'): if args.quiet: return - tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables, - hashtype, max_memory_use=args.max_memory_usage) + tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables, + hashtype, + max_memory_use=args.max_memory_usage) print_error("\nPARAMETERS:") print_error(" - kmer size = {0} \t\t(-k)".format(args.ksize)) diff --git a/oxli/build_graph.py b/oxli/build_graph.py index 8e32380eda..fefd24def2 100644 --- a/oxli/build_graph.py +++ b/oxli/build_graph.py @@ -55,8 +55,8 @@ def main(args): args = functions.do_sanity_checking(args, 0.01) check_space(args.input_filenames, args.force) - check_space_for_hashtable(output_filename, 'nodegraph', args.force, - args.n_tables, ars.max_tablesize, + check_space_for_hashtable(args.output_filename, 'nodegraph', args.force, + args.n_tables, args.max_tablesize, args.max_memory_usage) print('Saving k-mer presence table to %s' % base, file=sys.stderr) diff --git a/sandbox/collect-reads.py b/sandbox/collect-reads.py index 48c4a115f1..aad7dfdef0 100755 --- a/sandbox/collect-reads.py +++ b/sandbox/collect-reads.py @@ -77,7 +77,9 @@ def main(): check_input_files(name, False) check_space(args.input_sequence_filename, False) - check_space_for_hashtable(args, 'countgraph', False) + check_space_for_hashtable(args.output_countingtable_filename, 'countgraph', + False, args.n_tables, args.max_tablesize, + args.max_memory_usage) print('Saving k-mer counting table to %s' % base) print('Loading sequences from %s' % repr(filenames)) diff --git a/scripts/abundance-dist-single.py b/scripts/abundance-dist-single.py index bb85618a7c..ac0f2528d7 100755 --- a/scripts/abundance-dist-single.py +++ b/scripts/abundance-dist-single.py @@ -78,7 +78,9 @@ def main(): # pylint: disable=too-many-locals,too-many-branches check_input_files(args.input_sequence_filename, args.force) if args.savetable: - check_space_for_hashtable(args, 'countgraph', args.force) + check_space_for_hashtable(args.savetable, 'countgraph', args.force, + args.n_tables, args.max_tablesize, + args.max_memory_usage) if (not args.squash_output and os.path.exists(args.output_histogram_filename)): diff --git a/scripts/filter-abund-single.py b/scripts/filter-abund-single.py index b22a4946bd..cb30eae7e0 100755 --- a/scripts/filter-abund-single.py +++ b/scripts/filter-abund-single.py @@ -71,7 +71,9 @@ def main(): check_input_files(args.datafile, args.force) check_space([args.datafile], args.force) if args.savetable: - check_space_for_hashtable(args, 'countgraph', args.force) + check_space_for_hashtable(args.savetable, 'countgraph', args.force, + args.n_tables, args.max_tablesize, + args.max_memory_usage) report_on_config(args) print('making countgraph', file=sys.stderr) diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py index 461de31603..3a563bf553 100755 --- a/scripts/load-into-counting.py +++ b/scripts/load-into-counting.py @@ -84,7 +84,7 @@ def main(): for name in args.input_sequence_filename: check_input_files(name, args.force) - check_space_for_hashtable(args.output_countingtable_filename, 'countgraph', + check_space_for_hashtable(args.output_countingtable_filename, 'countgraph', args.force, args.n_tables, args.max_tablesize, args.max_memory_usage) @@ -125,7 +125,9 @@ def main(): thread.join() if index > 0 and index % 10 == 0: - check_space_for_hashtable(args, 'countgraph', args.force) + check_space_for_hashtable(base, 'countgraph', args.force, + args.n_tables, args.max_tablesize, + args.max_memory_usage) print('mid-save', base, file=sys.stderr) htable.save(base) diff --git a/scripts/normalize-by-median.py b/scripts/normalize-by-median.py index e58cb6dc9e..fea78416eb 100755 --- a/scripts/normalize-by-median.py +++ b/scripts/normalize-by-median.py @@ -277,7 +277,9 @@ def main(): # pylint: disable=too-many-branches,too-many-statements check_valid_file_exists(args.input_filenames) check_space(args.input_filenames, args.force) if args.savetable: - check_space_for_hashtable(args, 'countgraph', args.force) + check_space_for_hashtable(args.savetable, 'countgraph', args.force, + args.n_tables, args.max_tablesize, + args.max_memory_usage) # load or create counting table. if args.loadtable: diff --git a/scripts/trim-low-abund.py b/scripts/trim-low-abund.py index 741b181775..694eeb8a24 100755 --- a/scripts/trim-low-abund.py +++ b/scripts/trim-low-abund.py @@ -126,7 +126,9 @@ def main(): check_valid_file_exists(args.input_filenames) check_space(args.input_filenames, args.force) if args.savetable: - check_space_for_hashtable(args, 'countgraph', args.force) + check_space_for_hashtable(args.savetable, 'countgraph', args.force, + args.n_tables, args.max_tablesize, + args.max_memory_usage) if args.loadtable: print('loading countgraph from', args.loadtable, file=sys.stderr) diff --git a/tests/test_script_arguments.py b/tests/test_script_arguments.py index f523d28c70..11310b8c52 100644 --- a/tests/test_script_arguments.py +++ b/tests/test_script_arguments.py @@ -46,13 +46,16 @@ def test_check_space(): def test_check_tablespace(): + outfile = utils.get_test_data('truncated.fq') save_stderr, sys.stderr = sys.stderr, io.StringIO() parser = khmer_args.build_counting_args() args = parser.parse_args(['-M', '1e9']) try: - khmer.kfile.check_space_for_hashtable(args, 'countgraph', force=False, + khmer.kfile.check_space_for_hashtable(outfile, 'countgraph', + False, args.n_tables, + args.max_tablesize, _testhook_free_space=0) assert 0, "this should fail" except SystemExit as e: @@ -78,11 +81,15 @@ def test_check_space_force(): def test_check_tablespace_force(): save_stderr, sys.stderr = sys.stderr, io.StringIO() + outfile = utils.get_test_data('truncated') + parser = khmer_args.build_counting_args() args = parser.parse_args(['-M', '1e9']) try: - khmer.kfile.check_space_for_hashtable(args, 'countgraph', True, + khmer.kfile.check_space_for_hashtable(outfile, 'countgraph', True, + args.n_tables, + args.max_tablesize, _testhook_free_space=0) assert True, "this should pass" except SystemExit as e: @@ -265,7 +272,9 @@ def test_fail_calculate_foograph_size(): args = FakeArgparseObject(ksize, n_tables, max_tablesize, max_mem) try: - nodegraph = khmer_args._calculate_tablesize(args, 'foograph') + nodegraph = khmer_args._calculate_tablesize(max_tablesize, n_tables, + 'foograph', + max_memory_use=max_mem) assert 0, "previous statement should fail" except AssertionError: raise From a948d5147533e1d16356d270d140518183f7861d Mon Sep 17 00:00:00 2001 From: Jake Fenton Date: Thu, 16 Jul 2015 16:56:42 -0400 Subject: [PATCH 05/11] changelog --- ChangeLog | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ChangeLog b/ChangeLog index 4b5cfa05cf..6382a20418 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2015-07-16 Jacob Fenton + + * khmer/{kfile,khmer_args}.py: refactored information passing, made it so + space checks happen in the right directory + * oxli/build_graph.py,sandbox/collect-reads.py,scripts/{count-overlap, + abundance-dist-single,filter-abund-single,load-into-counting, + make-initial-stoptags,normalize-by-median,partition-graph, + sample-reads-randomly,trim-low-abund}.py,tests/test_script_arguments.py: + changed to use new arg structure + 2015-07-10 Jacob Fenton * oxli/functions.py: changed estimate functions to use correct letter From af2bdc45767a2ee74ea706d440f8efe7182eb6c8 Mon Sep 17 00:00:00 2001 From: Jake Fenton Date: Fri, 17 Jul 2015 20:06:09 -0400 Subject: [PATCH 06/11] refactored order of finding stuff out and args --- khmer/kfile.py | 6 +----- khmer/khmer_args.py | 22 +++++++++++----------- oxli/build_graph.py | 11 ++++++----- sandbox/collect-reads.py | 11 +++++++---- scripts/abundance-dist-single.py | 10 +++++----- scripts/filter-abund-single.py | 11 +++++++---- scripts/load-into-counting.py | 18 +++++++++++------- scripts/normalize-by-median.py | 10 ++++++---- scripts/trim-low-abund.py | 9 +++++---- tests/test_script_arguments.py | 26 +++++++++++++++----------- 10 files changed, 74 insertions(+), 60 deletions(-) diff --git a/khmer/kfile.py b/khmer/kfile.py index 3e3b4535eb..a643064b82 100644 --- a/khmer/kfile.py +++ b/khmer/kfile.py @@ -112,13 +112,9 @@ def check_space(in_files, force, _testhook_free_space=None): sys.exit(1) -def check_space_for_hashtable(outfile_name, hashtype, force, n_tables, - max_tablesize, max_mem=None, +def check_space_for_hashtable(outfile_name, hash_size, force, _testhook_free_space=None): """Check we have enough size to write a hash table.""" - hash_size = khmer_args._calculate_tablesize(max_tablesize, n_tables, - hashtype, - max_memory_use=max_mem) dir_path = os.path.dirname(os.path.realpath(outfile_name)) target = os.statvfs(dir_path) diff --git a/khmer/khmer_args.py b/khmer/khmer_args.py index 15373ced64..5753831970 100644 --- a/khmer/khmer_args.py +++ b/khmer/khmer_args.py @@ -133,8 +133,8 @@ def __call__(self, parser, namespace, values, option_string=None): action=LoadAction) -def _calculate_tablesize(max_tablesize, n_tables, hashtype, multiplier=1.0, - max_memory_use=None): +def calculate_tablesize(max_tablesize, n_tables, hashtype, multiplier=1.0, + max_memory_use=None): if hashtype not in ('countgraph', 'nodegraph'): raise Exception("unknown graph type: %s" % (hashtype,)) @@ -158,9 +158,9 @@ def create_nodegraph(args, ksize=None, multiplier=1.0): print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n") sys.exit(1) - tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables, - 'nodegraph', multiplier, - args.max_memory_usage) + tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, + 'nodegraph', multiplier, + args.max_memory_usage) return khmer.Hashbits(ksize, tablesize, args.n_tables) @@ -171,9 +171,9 @@ def create_countgraph(args, ksize=None, multiplier=1.0): print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n") sys.exit(1) - tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables, - 'countgraph', multiplier=multiplier, - max_memory_use=args.max_memory_usage) + tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, + 'countgraph', multiplier=multiplier, + max_memory_use=args.max_memory_usage) return khmer.CountingHash(ksize, tablesize, args.n_tables) @@ -190,9 +190,9 @@ def report_on_config(args, hashtype='countgraph'): if args.quiet: return - tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables, - hashtype, - max_memory_use=args.max_memory_usage) + tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, + hashtype, + max_memory_use=args.max_memory_usage) print_error("\nPARAMETERS:") print_error(" - kmer size = {0} \t\t(-k)".format(args.ksize)) diff --git a/oxli/build_graph.py b/oxli/build_graph.py index fefd24def2..802604262d 100644 --- a/oxli/build_graph.py +++ b/oxli/build_graph.py @@ -20,7 +20,8 @@ import khmer from khmer import khmer_args -from khmer.khmer_args import (report_on_config, info, add_threading_args) +from khmer.khmer_args import (report_on_config, info, add_threading_args, + calculate_tablesize) from khmer.kfile import check_input_files, check_space from khmer.kfile import check_space_for_hashtable from oxli import functions @@ -54,10 +55,10 @@ def main(args): # if optimization args are given, do optimization args = functions.do_sanity_checking(args, 0.01) - check_space(args.input_filenames, args.force) - check_space_for_hashtable(args.output_filename, 'nodegraph', args.force, - args.n_tables, args.max_tablesize, - args.max_memory_usage) + tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, + 'nodegraph', + max_memory_use=args.max_memory_usage) + check_space_for_hashtable(args.output_filename, tablesize, args.force) print('Saving k-mer presence table to %s' % base, file=sys.stderr) print('Loading kmers from sequences in %s' % diff --git a/sandbox/collect-reads.py b/sandbox/collect-reads.py index aad7dfdef0..3b79394d5f 100755 --- a/sandbox/collect-reads.py +++ b/sandbox/collect-reads.py @@ -21,7 +21,8 @@ import textwrap import khmer from khmer import khmer_args -from khmer.khmer_args import build_counting_args, report_on_config, info +from khmer.khmer_args import (build_counting_args, report_on_config, info, + calculate_tablesize) from khmer.kfile import check_input_files, check_space from khmer.kfile import check_space_for_hashtable import argparse @@ -77,9 +78,11 @@ def main(): check_input_files(name, False) check_space(args.input_sequence_filename, False) - check_space_for_hashtable(args.output_countingtable_filename, 'countgraph', - False, args.n_tables, args.max_tablesize, - args.max_memory_usage) + tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, + 'countgraph', + max_memory_use=args.max_memory_usage) + check_space_for_hashtable(args.output_countingtable_filename, tablesize, + False) print('Saving k-mer counting table to %s' % base) print('Loading sequences from %s' % repr(filenames)) diff --git a/scripts/abundance-dist-single.py b/scripts/abundance-dist-single.py index ac0f2528d7..d84c0c1d5a 100755 --- a/scripts/abundance-dist-single.py +++ b/scripts/abundance-dist-single.py @@ -24,7 +24,7 @@ import textwrap from khmer import khmer_args from khmer.khmer_args import (build_counting_args, add_threading_args, - report_on_config, info) + report_on_config, info, calculate_tablesize) from khmer.kfile import (check_input_files, check_space_for_hashtable) @@ -78,10 +78,10 @@ def main(): # pylint: disable=too-many-locals,too-many-branches check_input_files(args.input_sequence_filename, args.force) if args.savetable: - check_space_for_hashtable(args.savetable, 'countgraph', args.force, - args.n_tables, args.max_tablesize, - args.max_memory_usage) - + tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, + 'countgraph', + max_memory_use=args.max_memory_usage) + check_space_for_hashtable(args.savetable, tablesize, args.force) if (not args.squash_output and os.path.exists(args.output_histogram_filename)): print('ERROR: %s exists; not squashing.' % diff --git a/scripts/filter-abund-single.py b/scripts/filter-abund-single.py index cb30eae7e0..6109130d1b 100755 --- a/scripts/filter-abund-single.py +++ b/scripts/filter-abund-single.py @@ -26,7 +26,7 @@ from khmer.thread_utils import ThreadedSequenceProcessor, verbose_loader from khmer import khmer_args from khmer.khmer_args import (build_counting_args, report_on_config, - add_threading_args, info) + add_threading_args, info, calculate_tablesize) from khmer.kfile import (check_input_files, check_space, check_space_for_hashtable) # @@ -71,9 +71,12 @@ def main(): check_input_files(args.datafile, args.force) check_space([args.datafile], args.force) if args.savetable: - check_space_for_hashtable(args.savetable, 'countgraph', args.force, - args.n_tables, args.max_tablesize, - args.max_memory_usage) + mem_args = args.max_memory_usage + tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, + 'countgraph', + max_memory_use=mem_args) + check_space_for_hashtable(args.savetable, tablesize, args.force) + report_on_config(args) print('making countgraph', file=sys.stderr) diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py index 3a563bf553..c21d655953 100755 --- a/scripts/load-into-counting.py +++ b/scripts/load-into-counting.py @@ -22,7 +22,7 @@ import khmer from khmer import khmer_args from khmer.khmer_args import build_counting_args, report_on_config, info,\ - add_threading_args + add_threading_args, calculate_tablesize from khmer.kfile import check_file_writable from khmer.kfile import check_input_files from khmer.kfile import check_space_for_hashtable @@ -84,9 +84,11 @@ def main(): for name in args.input_sequence_filename: check_input_files(name, args.force) - check_space_for_hashtable(args.output_countingtable_filename, 'countgraph', - args.force, args.n_tables, args.max_tablesize, - args.max_memory_usage) + tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, + 'countgraph', + max_memory_use=args.max_memory_usage) + check_space_for_hashtable(args.output_countingtable_filename, tablesize, + args.force) check_file_writable(base) check_file_writable(base + ".info") @@ -125,9 +127,11 @@ def main(): thread.join() if index > 0 and index % 10 == 0: - check_space_for_hashtable(base, 'countgraph', args.force, - args.n_tables, args.max_tablesize, - args.max_memory_usage) + mem_args = args.max_memory_usage + tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, + 'countgraph', + max_memory_use=mem_args) + check_space_for_hashtable(base, tablesize, args.force) print('mid-save', base, file=sys.stderr) htable.save(base) diff --git a/scripts/normalize-by-median.py b/scripts/normalize-by-median.py index fea78416eb..8a308c2204 100755 --- a/scripts/normalize-by-median.py +++ b/scripts/normalize-by-median.py @@ -28,7 +28,7 @@ from contextlib import contextmanager from oxli import functions as oxutils from khmer.khmer_args import (build_counting_args, add_loadhash_args, - report_on_config, info) + report_on_config, info, calculate_tablesize) import argparse from khmer.kfile import (check_space, check_space_for_hashtable, check_valid_file_exists) @@ -277,9 +277,11 @@ def main(): # pylint: disable=too-many-branches,too-many-statements check_valid_file_exists(args.input_filenames) check_space(args.input_filenames, args.force) if args.savetable: - check_space_for_hashtable(args.savetable, 'countgraph', args.force, - args.n_tables, args.max_tablesize, - args.max_memory_usage) + tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, + 'countgraph', + max_memory_use=args.max_memory_usage) + + check_space_for_hashtable(args.savetable, tablesize, args.force) # load or create counting table. if args.loadtable: diff --git a/scripts/trim-low-abund.py b/scripts/trim-low-abund.py index 694eeb8a24..c093e18732 100755 --- a/scripts/trim-low-abund.py +++ b/scripts/trim-low-abund.py @@ -28,7 +28,7 @@ from khmer import khmer_args from khmer.khmer_args import (build_counting_args, info, add_loadhash_args, - report_on_config) + report_on_config, calculate_tablesize) from khmer.utils import write_record, write_record_pair, broken_paired_reader from khmer.kfile import (check_space, check_space_for_hashtable, check_valid_file_exists) @@ -126,9 +126,10 @@ def main(): check_valid_file_exists(args.input_filenames) check_space(args.input_filenames, args.force) if args.savetable: - check_space_for_hashtable(args.savetable, 'countgraph', args.force, - args.n_tables, args.max_tablesize, - args.max_memory_usage) + tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, + 'countgraph', + max_memory_use=args.max_memory_usage) + check_space_for_hashtable(args.savetable, tablesize, args.force) if args.loadtable: print('loading countgraph from', args.loadtable, file=sys.stderr) diff --git a/tests/test_script_arguments.py b/tests/test_script_arguments.py index 11310b8c52..6dced2d18c 100644 --- a/tests/test_script_arguments.py +++ b/tests/test_script_arguments.py @@ -53,10 +53,12 @@ def test_check_tablespace(): args = parser.parse_args(['-M', '1e9']) try: - khmer.kfile.check_space_for_hashtable(outfile, 'countgraph', - False, args.n_tables, - args.max_tablesize, - _testhook_free_space=0) + mem_args = args.max_memory_usage + tablesize = khmer_args.calculate_tablesize(args.max_tablesize, + args.n_tables, 'countgraph', + max_memory_use=mem_args) + khmer.kfile.check_space_for_hashtable(outfile, tablesize, + False, _testhook_free_space=0) assert 0, "this should fail" except SystemExit as e: print(str(e)) @@ -87,10 +89,12 @@ def test_check_tablespace_force(): args = parser.parse_args(['-M', '1e9']) try: - khmer.kfile.check_space_for_hashtable(outfile, 'countgraph', True, - args.n_tables, - args.max_tablesize, - _testhook_free_space=0) + mem_args = args.max_memory_usage + tablesize = khmer_args.calculate_tablesize(args.max_tablesize, + args.n_tables, 'countgraph', + max_memory_use=mem_args) + khmer.kfile.check_space_for_hashtable(outfile, tablesize, + True, _testhook_free_space=0) assert True, "this should pass" except SystemExit as e: print(str(e)) @@ -272,9 +276,9 @@ def test_fail_calculate_foograph_size(): args = FakeArgparseObject(ksize, n_tables, max_tablesize, max_mem) try: - nodegraph = khmer_args._calculate_tablesize(max_tablesize, n_tables, - 'foograph', - max_memory_use=max_mem) + nodegraph = khmer_args.calculate_tablesize(max_tablesize, n_tables, + 'foograph', + max_memory_use=max_mem) assert 0, "previous statement should fail" except AssertionError: raise From 2dacd904861cd8e5b86d9b1cdda542efe901cb5c Mon Sep 17 00:00:00 2001 From: Jake Fenton Date: Sat, 18 Jul 2015 13:57:47 -0400 Subject: [PATCH 07/11] added note on the existence of "--force" arg to where it's checked for --- khmer/kfile.py | 10 ++++++++++ oxli/functions.py | 2 ++ sandbox/saturate-by-median.py | 2 ++ scripts/sample-reads-randomly.py | 2 ++ 4 files changed, 16 insertions(+) diff --git a/khmer/kfile.py b/khmer/kfile.py index 8af8432c6c..990eeab0af 100644 --- a/khmer/kfile.py +++ b/khmer/kfile.py @@ -34,6 +34,8 @@ def check_input_files(file_path, force): file_path, file=sys.stderr) if not force: + print("NOTE: This can be overridden using the --force argument", + file=sys.stderr) print("Exiting", file=sys.stderr) sys.exit(1) else: @@ -47,12 +49,16 @@ def check_input_files(file_path, force): print("ERROR: Input file %s does not exist; exiting" % file_path, file=sys.stderr) if not force: + print("NOTE: This can be overridden using the --force argument", + file=sys.stderr) sys.exit(1) else: if os.stat(file_path).st_size == 0: print("ERROR: Input file %s is empty; exiting." % file_path, file=sys.stderr) if not force: + print("NOTE: This can be overridden using the --force" + " argument", file=sys.stderr) sys.exit(1) @@ -109,6 +115,8 @@ def check_space(in_files, force, _testhook_free_space=None): print(" Free space: %.1f GB" % (float(free_space) / 1e9,), file=sys.stderr) if not force: + print("NOTE: This can be overridden using the --force argument", + file=sys.stderr) sys.exit(1) @@ -135,6 +143,8 @@ def check_space_for_hashtable(outfile_name, hash_size, force, print(" Free space: %.1f GB" % (float(free_space) / 1e9,), file=sys.stderr) if not force: + print("NOTE: This can be overridden using the --force argument", + file=sys.stderr) sys.exit(1) diff --git a/oxli/functions.py b/oxli/functions.py index b5ffe1ed86..19ccaa41d7 100644 --- a/oxli/functions.py +++ b/oxli/functions.py @@ -131,6 +131,8 @@ def do_sanity_checking(args, desired_max_fp): *** which is above the recommended false positive ceiling of {1}!""" .format(res.fp_rate, desired_max_fp), file=sys.stderr) if not args.force: + print("NOTE: This can be overridden using the --force" + " argument", file=sys.stderr) print("*** Aborting...!", file=sys.stderr) sys.exit(1) else: diff --git a/sandbox/saturate-by-median.py b/sandbox/saturate-by-median.py index 7a11d519ee..a47cde43fc 100755 --- a/sandbox/saturate-by-median.py +++ b/sandbox/saturate-by-median.py @@ -215,6 +215,8 @@ def main(): # pylint: disable=too-many-branches,too-many-statements except IOError as err: handle_error(err, input_filename) if not args.force: + print("NOTE: This can be overridden using the --force" + " argument", file=sys.stderr) print('** Exiting!', file=sys.stderr) sys.exit(1) else: diff --git a/scripts/sample-reads-randomly.py b/scripts/sample-reads-randomly.py index 3cdc4260bb..21ad095126 100755 --- a/scripts/sample-reads-randomly.py +++ b/scripts/sample-reads-randomly.py @@ -102,6 +102,8 @@ def main(): sys.stderr.write( "Error: cannot specify -o with more than one sample.") if not args.force: + print("NOTE: This can be overridden using the --force" + " argument", file=sys.stderr) sys.exit(1) output_filename = output_file.name else: From c11d25f2bcb1bb22eb030dcd80f8d5c5c357d8af Mon Sep 17 00:00:00 2001 From: Jake Fenton Date: Sun, 19 Jul 2015 12:18:26 -0400 Subject: [PATCH 08/11] "It's dangerous to go alone--Take some args!" --- khmer/khmer_args.py | 23 ++++++++--------------- oxli/build_graph.py | 4 +--- sandbox/collect-reads.py | 4 +--- scripts/abundance-dist-single.py | 4 +--- scripts/filter-abund-single.py | 4 +--- scripts/load-into-counting.py | 8 ++------ scripts/normalize-by-median.py | 4 +--- scripts/trim-low-abund.py | 4 +--- tests/test_script_arguments.py | 12 +++--------- 9 files changed, 19 insertions(+), 48 deletions(-) diff --git a/khmer/khmer_args.py b/khmer/khmer_args.py index 962b756988..24e87471e5 100644 --- a/khmer/khmer_args.py +++ b/khmer/khmer_args.py @@ -132,20 +132,19 @@ def __call__(self, parser, namespace, values, option_string=None): action=LoadAction) -def calculate_tablesize(max_tablesize, n_tables, hashtype, multiplier=1.0, - max_memory_use=None): +def calculate_tablesize(args, hashtype, multiplier=1.0): if hashtype not in ('countgraph', 'nodegraph'): raise Exception("unknown graph type: %s" % (hashtype,)) - if max_memory_use: + if args.max_memory_usage: if hashtype == 'countgraph': - tablesize = max_memory_use / n_tables / \ + tablesize = args.max_memory_usage / args.n_tables / \ float(multiplier) elif hashtype == 'nodegraph': - tablesize = 8. * max_memory_use / n_tables / \ + tablesize = 8. * args.max_memory_usage / args.n_tables / \ float(multiplier) else: - tablesize = max_tablesize + tablesize = args.max_tablesize return tablesize @@ -157,9 +156,7 @@ def create_nodegraph(args, ksize=None, multiplier=1.0): print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n") sys.exit(1) - tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, - 'nodegraph', multiplier, - args.max_memory_usage) + tablesize = calculate_tablesize(args, 'nodegraph', multiplier) return khmer.Hashbits(ksize, tablesize, args.n_tables) @@ -170,9 +167,7 @@ def create_countgraph(args, ksize=None, multiplier=1.0): print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n") sys.exit(1) - tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, - 'countgraph', multiplier=multiplier, - max_memory_use=args.max_memory_usage) + tablesize = calculate_tablesize(args, 'countgraph', multiplier=multiplier) return khmer.CountingHash(ksize, tablesize, args.n_tables) @@ -189,9 +184,7 @@ def report_on_config(args, hashtype='countgraph'): if args.quiet: return - tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, - hashtype, - max_memory_use=args.max_memory_usage) + tablesize = calculate_tablesize(args, hashtype) print_error("\nPARAMETERS:") print_error(" - kmer size = {0} \t\t(-k)".format(args.ksize)) diff --git a/oxli/build_graph.py b/oxli/build_graph.py index 44b9109b2e..adf4de622a 100644 --- a/oxli/build_graph.py +++ b/oxli/build_graph.py @@ -55,9 +55,7 @@ def main(args): # if optimization args are given, do optimization args = functions.do_sanity_checking(args, 0.01) - tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, - 'nodegraph', - max_memory_use=args.max_memory_usage) + tablesize = calculate_tablesize(args, 'nodegraph') check_space_for_hashtable(args.output_filename, tablesize, args.force) print('Saving k-mer presence table to %s' % base, file=sys.stderr) diff --git a/sandbox/collect-reads.py b/sandbox/collect-reads.py index 3b79394d5f..ca297272a5 100755 --- a/sandbox/collect-reads.py +++ b/sandbox/collect-reads.py @@ -78,9 +78,7 @@ def main(): check_input_files(name, False) check_space(args.input_sequence_filename, False) - tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, - 'countgraph', - max_memory_use=args.max_memory_usage) + tablesize = calculate_tablesize(args, 'countgraph') check_space_for_hashtable(args.output_countingtable_filename, tablesize, False) diff --git a/scripts/abundance-dist-single.py b/scripts/abundance-dist-single.py index d84c0c1d5a..4976a8ba63 100755 --- a/scripts/abundance-dist-single.py +++ b/scripts/abundance-dist-single.py @@ -78,9 +78,7 @@ def main(): # pylint: disable=too-many-locals,too-many-branches check_input_files(args.input_sequence_filename, args.force) if args.savetable: - tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, - 'countgraph', - max_memory_use=args.max_memory_usage) + tablesize = calculate_tablesize(args, 'countgraph') check_space_for_hashtable(args.savetable, tablesize, args.force) if (not args.squash_output and os.path.exists(args.output_histogram_filename)): diff --git a/scripts/filter-abund-single.py b/scripts/filter-abund-single.py index 6109130d1b..9b5f9b71b5 100755 --- a/scripts/filter-abund-single.py +++ b/scripts/filter-abund-single.py @@ -72,9 +72,7 @@ def main(): check_space([args.datafile], args.force) if args.savetable: mem_args = args.max_memory_usage - tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, - 'countgraph', - max_memory_use=mem_args) + tablesize = calculate_tablesize(args, 'countgraph') check_space_for_hashtable(args.savetable, tablesize, args.force) report_on_config(args) diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py index c21d655953..0d5b11eddc 100755 --- a/scripts/load-into-counting.py +++ b/scripts/load-into-counting.py @@ -84,9 +84,7 @@ def main(): for name in args.input_sequence_filename: check_input_files(name, args.force) - tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, - 'countgraph', - max_memory_use=args.max_memory_usage) + tablesize = calculate_tablesize(args, 'countgraph') check_space_for_hashtable(args.output_countingtable_filename, tablesize, args.force) @@ -128,9 +126,7 @@ def main(): if index > 0 and index % 10 == 0: mem_args = args.max_memory_usage - tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, - 'countgraph', - max_memory_use=mem_args) + tablesize = calculate_tablesize(args, 'countgraph') check_space_for_hashtable(base, tablesize, args.force) print('mid-save', base, file=sys.stderr) diff --git a/scripts/normalize-by-median.py b/scripts/normalize-by-median.py index 2828b5b680..614d3d2e46 100755 --- a/scripts/normalize-by-median.py +++ b/scripts/normalize-by-median.py @@ -272,9 +272,7 @@ def main(): # pylint: disable=too-many-branches,too-many-statements check_valid_file_exists(args.input_filenames) check_space(args.input_filenames, args.force) if args.savetable: - tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, - 'countgraph', - max_memory_use=args.max_memory_usage) + tablesize = calculate_tablesize(args, 'countgraph') check_space_for_hashtable(args.savetable, tablesize, args.force) diff --git a/scripts/trim-low-abund.py b/scripts/trim-low-abund.py index c093e18732..1254947c41 100755 --- a/scripts/trim-low-abund.py +++ b/scripts/trim-low-abund.py @@ -126,9 +126,7 @@ def main(): check_valid_file_exists(args.input_filenames) check_space(args.input_filenames, args.force) if args.savetable: - tablesize = calculate_tablesize(args.max_tablesize, args.n_tables, - 'countgraph', - max_memory_use=args.max_memory_usage) + tablesize = calculate_tablesize(args, 'countgraph') check_space_for_hashtable(args.savetable, tablesize, args.force) if args.loadtable: diff --git a/tests/test_script_arguments.py b/tests/test_script_arguments.py index 6dced2d18c..4e2f16a140 100644 --- a/tests/test_script_arguments.py +++ b/tests/test_script_arguments.py @@ -54,9 +54,7 @@ def test_check_tablespace(): try: mem_args = args.max_memory_usage - tablesize = khmer_args.calculate_tablesize(args.max_tablesize, - args.n_tables, 'countgraph', - max_memory_use=mem_args) + tablesize = khmer_args.calculate_tablesize(args, 'countgraph') khmer.kfile.check_space_for_hashtable(outfile, tablesize, False, _testhook_free_space=0) assert 0, "this should fail" @@ -90,9 +88,7 @@ def test_check_tablespace_force(): try: mem_args = args.max_memory_usage - tablesize = khmer_args.calculate_tablesize(args.max_tablesize, - args.n_tables, 'countgraph', - max_memory_use=mem_args) + tablesize = khmer_args.calculate_tablesize(args, 'countgraph') khmer.kfile.check_space_for_hashtable(outfile, tablesize, True, _testhook_free_space=0) assert True, "this should pass" @@ -276,9 +272,7 @@ def test_fail_calculate_foograph_size(): args = FakeArgparseObject(ksize, n_tables, max_tablesize, max_mem) try: - nodegraph = khmer_args.calculate_tablesize(max_tablesize, n_tables, - 'foograph', - max_memory_use=max_mem) + nodegraph = khmer_args.calculate_tablesize(args, 'foograph') assert 0, "previous statement should fail" except AssertionError: raise From 9fd52c9cfcd2a695c5e6e79ae3771387e57906ee Mon Sep 17 00:00:00 2001 From: Jake Fenton Date: Mon, 20 Jul 2015 12:08:31 -0400 Subject: [PATCH 09/11] Gigabyte floats are my favorite --- khmer/kfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/khmer/kfile.py b/khmer/kfile.py index 990eeab0af..c434fae1bb 100644 --- a/khmer/kfile.py +++ b/khmer/kfile.py @@ -136,7 +136,7 @@ def check_space_for_hashtable(outfile_name, hash_size, force, if size_diff > 0: print("ERROR: Not enough free space on disk " "for saved table files;" - " Need at least %s GB more." + " Need at least %.1f GB more." % (float(size_diff) / 1e9,), file=sys.stderr) print(" Table size: %.1f GB" % (float(hash_size) / 1e9,), file=sys.stderr) From 766885c230810da31c84f826e74dc189af3ba985 Mon Sep 17 00:00:00 2001 From: Jake Fenton Date: Tue, 21 Jul 2015 13:36:10 -0400 Subject: [PATCH 10/11] formatting --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index 43a95f5bf7..21ed4f849e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -17,6 +17,7 @@ * tests/test_normalize_by_median.py: updated/added tests for reporting. 2015-07-17 Jacob Fenton + * oxli/{functions,build_graph}.py,scripts/{load-graph,normalize-by-median, abundance-dist}.py,tests/test_{normalize_by_median,subset_graph,hashbits, oxli_function}.py: pylint cleanup From 4ecd60be1d78980fea10759db1515f5ee3e9eaa0 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Tue, 21 Jul 2015 18:55:02 -0400 Subject: [PATCH 11/11] minor cleanup --- ChangeLog | 59 ++++++++++++++++++---------------- khmer/kfile.py | 2 +- scripts/filter-abund-single.py | 1 - scripts/load-into-counting.py | 1 - scripts/normalize-by-median.py | 1 - tests/test_script_arguments.py | 2 -- 6 files changed, 33 insertions(+), 33 deletions(-) diff --git a/ChangeLog b/ChangeLog index 21ed4f849e..39d206cad0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,12 +1,16 @@ -2015-07-20 Jacob Fenton +2015-07-21 Jacob Fenton * khmer/{kfile,khmer_args}.py: refactored information passing, made it so - space checks happen in the right directory - * oxli/build_graph.py,sandbox/collect-reads.py,scripts/{count-overlap, + space checks happen in the right directory. + * oxli/build_graph.py,sandbox/collect-reads.py,scripts/{ abundance-dist-single,filter-abund-single,load-into-counting, - make-initial-stoptags,normalize-by-median,partition-graph, - sample-reads-randomly,trim-low-abund}.py,tests/test_script_arguments.py: - changed to use new arg structure + normalize-by-median,trim-low-abund}.py,tests/test_script_arguments.py: + changed to use new arg structure for checking hashtable save space. + * oxli/functions.py,scripts/saturate-by-median.py: updated error message + to mention --force option. + * scripts/{count-overlap,load-into-counting,make-initial-stoptags, + partition-graph,sample-reads-randomly}.py: removed unnecessary call to + check_space. 2015-07-20 Titus Brown @@ -20,43 +24,44 @@ * oxli/{functions,build_graph}.py,scripts/{load-graph,normalize-by-median, abundance-dist}.py,tests/test_{normalize_by_median,subset_graph,hashbits, - oxli_function}.py: pylint cleanup + oxli_function}.py: pylint cleanup. 2015-07-17 Michael R. Crusoe - * Makefile, tests/test_read_aligner.py: import khmer when pylinting + * Makefile, tests/test_read_aligner.py: import khmer when pylinting. 2015-07-17 Michael R. Crusoe * lib/read_parser.{cc,hh}: use std::string everywhere to match existing - exceptions + exceptions. 2015-07-10 Jacob Fenton * khmer/kfile.py: changed check_valid_file_exists to recognize fifos as - non-empty - * tests/test_normalize_by_median.py: added test + non-empty. + * tests/test_normalize_by_median.py: added test. 2015-07-10 Jacob Fenton * oxli/functions.py: changed estimate functions to use correct letter - abbreviations - * sandbox/estimate_optimal_hash.py: changed to use renamed estimate - functions + abbreviations. + * sandbox/estimate_optimal_hash.py: changed to use renamed estimate + functions. * sandbox/unique-kmers.py: changed to not output recommended HT args by - default - * tests/test_oxli_functions.py: changed to use renamed estimate functions + default. + * tests/test_oxli_functions.py: changed to use renamed estimate functions. 2015-07-10 Jacob Fenton - * oxli/functions.py: added '--force' check to sanity check + * oxli/functions.py: added '--force' check to sanity check. 2015-07-10 Jacob Fenton - * oxli/functions.py: moved optimization/sanity check func to oxli + * oxli/functions.py: moved optimization/sanity check func to oxli. * scripts/normalize-by-median.py,oxli/build_graph.py: added - optimization/sanity checking via oxli estimation funcs - * tests/test_normalize_by_median.py: updated tests to cover estimation funcs + optimization/sanity checking via oxli estimation funcs. + * tests/test_normalize_by_median.py: updated tests to cover estimation + functions. 2015-07-08 Luiz Irber @@ -74,22 +79,22 @@ 2015-07-05 Jacob Fenton * doc/whats-new-2.0.rst: added in normalize-by-median.py broken paired - updates + updates. 2015-07-05 Michael R. Crusoe - * Makefile: fix cppcheck invocation + * Makefile: fix cppcheck invocation. * khmer/_khmer.cc: switch to prefix increment for non-primitive objects, - use a C++ cast, adjust scope + use a C++ cast, adjust scope. * lib/hashtable.{hh,cc}: make copy constructor no-op explicit. adjust scope - * lib/{ht-diff,test-HashTables,test-Parser}.cc: remove unused test code - * lib/labelhash.cc,hllcounter.cc: astyle reformatting - * lib/read_parsers.hh: more explicit constructors + * lib/{ht-diff,test-HashTables,test-Parser}.cc: remove unused test code. + * lib/labelhash.cc,hllcounter.cc: astyle reformatting. + * lib/read_parsers.hh: more explicit constructors. 2015-07-05 Michael R. Crusoe * sandbox/{collect-variants,optimal_args_hashbits,sweep-files}.py: - update API usage + update API usage. 2015-07-05 Titus Brown diff --git a/khmer/kfile.py b/khmer/kfile.py index c434fae1bb..430f8c64cd 100644 --- a/khmer/kfile.py +++ b/khmer/kfile.py @@ -122,7 +122,7 @@ def check_space(in_files, force, _testhook_free_space=None): def check_space_for_hashtable(outfile_name, hash_size, force, _testhook_free_space=None): - """Check we have enough size to write a hash table.""" + """Check that we have enough size to write the specified hash table.""" dir_path = os.path.dirname(os.path.realpath(outfile_name)) target = os.statvfs(dir_path) diff --git a/scripts/filter-abund-single.py b/scripts/filter-abund-single.py index 9b5f9b71b5..1cb9947ad7 100755 --- a/scripts/filter-abund-single.py +++ b/scripts/filter-abund-single.py @@ -71,7 +71,6 @@ def main(): check_input_files(args.datafile, args.force) check_space([args.datafile], args.force) if args.savetable: - mem_args = args.max_memory_usage tablesize = calculate_tablesize(args, 'countgraph') check_space_for_hashtable(args.savetable, tablesize, args.force) diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py index 0d5b11eddc..fcf8fbb109 100755 --- a/scripts/load-into-counting.py +++ b/scripts/load-into-counting.py @@ -125,7 +125,6 @@ def main(): thread.join() if index > 0 and index % 10 == 0: - mem_args = args.max_memory_usage tablesize = calculate_tablesize(args, 'countgraph') check_space_for_hashtable(base, tablesize, args.force) print('mid-save', base, file=sys.stderr) diff --git a/scripts/normalize-by-median.py b/scripts/normalize-by-median.py index 60a74770d2..68601d663d 100755 --- a/scripts/normalize-by-median.py +++ b/scripts/normalize-by-median.py @@ -306,7 +306,6 @@ def main(): # pylint: disable=too-many-branches,too-many-statements check_space(args.input_filenames, args.force) if args.savetable: tablesize = calculate_tablesize(args, 'countgraph') - check_space_for_hashtable(args.savetable, tablesize, args.force) # load or create counting table. diff --git a/tests/test_script_arguments.py b/tests/test_script_arguments.py index 4e2f16a140..bf2148dc02 100644 --- a/tests/test_script_arguments.py +++ b/tests/test_script_arguments.py @@ -53,7 +53,6 @@ def test_check_tablespace(): args = parser.parse_args(['-M', '1e9']) try: - mem_args = args.max_memory_usage tablesize = khmer_args.calculate_tablesize(args, 'countgraph') khmer.kfile.check_space_for_hashtable(outfile, tablesize, False, _testhook_free_space=0) @@ -87,7 +86,6 @@ def test_check_tablespace_force(): args = parser.parse_args(['-M', '1e9']) try: - mem_args = args.max_memory_usage tablesize = khmer_args.calculate_tablesize(args, 'countgraph') khmer.kfile.check_space_for_hashtable(outfile, tablesize, True, _testhook_free_space=0)