diff --git a/ChangeLog b/ChangeLog index 47168e8cec..39d206cad0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2015-07-21 Jacob Fenton + + * khmer/{kfile,khmer_args}.py: refactored information passing, made it so + space checks happen in the right directory. + * oxli/build_graph.py,sandbox/collect-reads.py,scripts/{ + abundance-dist-single,filter-abund-single,load-into-counting, + normalize-by-median,trim-low-abund}.py,tests/test_script_arguments.py: + changed to use new arg structure for checking hashtable save space. + * oxli/functions.py,scripts/saturate-by-median.py: updated error message + to mention --force option. + * scripts/{count-overlap,load-into-counting,make-initial-stoptags, + partition-graph,sample-reads-randomly}.py: removed unnecessary call to + check_space. + 2015-07-20 Titus Brown * khmer/__init__.py: cleaned up FP rate reporting. @@ -10,43 +24,44 @@ * oxli/{functions,build_graph}.py,scripts/{load-graph,normalize-by-median, abundance-dist}.py,tests/test_{normalize_by_median,subset_graph,hashbits, - oxli_function}.py: pylint cleanup + oxli_function}.py: pylint cleanup. 2015-07-17 Michael R. Crusoe - * Makefile, tests/test_read_aligner.py: import khmer when pylinting + * Makefile, tests/test_read_aligner.py: import khmer when pylinting. 2015-07-17 Michael R. Crusoe * lib/read_parser.{cc,hh}: use std::string everywhere to match existing - exceptions + exceptions. 2015-07-10 Jacob Fenton * khmer/kfile.py: changed check_valid_file_exists to recognize fifos as - non-empty - * tests/test_normalize_by_median.py: added test + non-empty. + * tests/test_normalize_by_median.py: added test. 2015-07-10 Jacob Fenton * oxli/functions.py: changed estimate functions to use correct letter - abbreviations - * sandbox/estimate_optimal_hash.py: changed to use renamed estimate - functions + abbreviations. + * sandbox/estimate_optimal_hash.py: changed to use renamed estimate + functions. * sandbox/unique-kmers.py: changed to not output recommended HT args by - default - * tests/test_oxli_functions.py: changed to use renamed estimate functions + default. + * tests/test_oxli_functions.py: changed to use renamed estimate functions. 2015-07-10 Jacob Fenton - * oxli/functions.py: added '--force' check to sanity check + * oxli/functions.py: added '--force' check to sanity check. 2015-07-10 Jacob Fenton - * oxli/functions.py: moved optimization/sanity check func to oxli + * oxli/functions.py: moved optimization/sanity check func to oxli. * scripts/normalize-by-median.py,oxli/build_graph.py: added - optimization/sanity checking via oxli estimation funcs - * tests/test_normalize_by_median.py: updated tests to cover estimation funcs + optimization/sanity checking via oxli estimation funcs. + * tests/test_normalize_by_median.py: updated tests to cover estimation + functions. 2015-07-08 Luiz Irber @@ -64,22 +79,22 @@ 2015-07-05 Jacob Fenton * doc/whats-new-2.0.rst: added in normalize-by-median.py broken paired - updates + updates. 2015-07-05 Michael R. Crusoe - * Makefile: fix cppcheck invocation + * Makefile: fix cppcheck invocation. * khmer/_khmer.cc: switch to prefix increment for non-primitive objects, - use a C++ cast, adjust scope + use a C++ cast, adjust scope. * lib/hashtable.{hh,cc}: make copy constructor no-op explicit. adjust scope - * lib/{ht-diff,test-HashTables,test-Parser}.cc: remove unused test code - * lib/labelhash.cc,hllcounter.cc: astyle reformatting - * lib/read_parsers.hh: more explicit constructors + * lib/{ht-diff,test-HashTables,test-Parser}.cc: remove unused test code. + * lib/labelhash.cc,hllcounter.cc: astyle reformatting. + * lib/read_parsers.hh: more explicit constructors. 2015-07-05 Michael R. Crusoe * sandbox/{collect-variants,optimal_args_hashbits,sweep-files}.py: - update API usage + update API usage. 2015-07-05 Titus Brown diff --git a/khmer/kfile.py b/khmer/kfile.py index 0924009874..430f8c64cd 100644 --- a/khmer/kfile.py +++ b/khmer/kfile.py @@ -34,6 +34,8 @@ def check_input_files(file_path, force): file_path, file=sys.stderr) if not force: + print("NOTE: This can be overridden using the --force argument", + file=sys.stderr) print("Exiting", file=sys.stderr) sys.exit(1) else: @@ -47,12 +49,16 @@ def check_input_files(file_path, force): print("ERROR: Input file %s does not exist; exiting" % file_path, file=sys.stderr) if not force: + print("NOTE: This can be overridden using the --force argument", + file=sys.stderr) sys.exit(1) else: if os.stat(file_path).st_size == 0: print("ERROR: Input file %s is empty; exiting." % file_path, file=sys.stderr) if not force: + print("NOTE: This can be overridden using the --force" + " argument", file=sys.stderr) sys.exit(1) @@ -109,17 +115,18 @@ def check_space(in_files, force, _testhook_free_space=None): print(" Free space: %.1f GB" % (float(free_space) / 1e9,), file=sys.stderr) if not force: + print("NOTE: This can be overridden using the --force argument", + file=sys.stderr) sys.exit(1) -def check_space_for_hashtable(args, hashtype, force, +def check_space_for_hashtable(outfile_name, hash_size, force, _testhook_free_space=None): - """Check we have enough size to write a hash table.""" - hash_size = khmer_args._calculate_tablesize(args, hashtype) + """Check that we have enough size to write the specified hash table.""" - cwd = os.getcwd() - dir_path = os.path.dirname(os.path.realpath(cwd)) + dir_path = os.path.dirname(os.path.realpath(outfile_name)) target = os.statvfs(dir_path) + if _testhook_free_space is None: free_space = target.f_frsize * target.f_bavail else: @@ -129,13 +136,15 @@ def check_space_for_hashtable(args, hashtype, force, if size_diff > 0: print("ERROR: Not enough free space on disk " "for saved table files;" - " Need at least %s GB more." + " Need at least %.1f GB more." % (float(size_diff) / 1e9,), file=sys.stderr) print(" Table size: %.1f GB" % (float(hash_size) / 1e9,), file=sys.stderr) print(" Free space: %.1f GB" % (float(free_space) / 1e9,), file=sys.stderr) if not force: + print("NOTE: This can be overridden using the --force argument", + file=sys.stderr) sys.exit(1) diff --git a/khmer/khmer_args.py b/khmer/khmer_args.py index 3a9a3da0fa..24e87471e5 100644 --- a/khmer/khmer_args.py +++ b/khmer/khmer_args.py @@ -132,7 +132,7 @@ def __call__(self, parser, namespace, values, option_string=None): action=LoadAction) -def _calculate_tablesize(args, hashtype, multiplier=1.0): +def calculate_tablesize(args, hashtype, multiplier=1.0): if hashtype not in ('countgraph', 'nodegraph'): raise Exception("unknown graph type: %s" % (hashtype,)) @@ -156,7 +156,7 @@ def create_nodegraph(args, ksize=None, multiplier=1.0): print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n") sys.exit(1) - tablesize = _calculate_tablesize(args, 'nodegraph', multiplier=multiplier) + tablesize = calculate_tablesize(args, 'nodegraph', multiplier) return khmer.Hashbits(ksize, tablesize, args.n_tables) @@ -167,7 +167,7 @@ def create_countgraph(args, ksize=None, multiplier=1.0): print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n") sys.exit(1) - tablesize = _calculate_tablesize(args, 'countgraph', multiplier=multiplier) + tablesize = calculate_tablesize(args, 'countgraph', multiplier=multiplier) return khmer.CountingHash(ksize, tablesize, args.n_tables) @@ -184,7 +184,7 @@ def report_on_config(args, hashtype='countgraph'): if args.quiet: return - tablesize = _calculate_tablesize(args, hashtype) + tablesize = calculate_tablesize(args, hashtype) print_error("\nPARAMETERS:") print_error(" - kmer size = {0} \t\t(-k)".format(args.ksize)) diff --git a/oxli/build_graph.py b/oxli/build_graph.py index 1d7dd096ef..adf4de622a 100644 --- a/oxli/build_graph.py +++ b/oxli/build_graph.py @@ -20,7 +20,8 @@ import khmer from khmer import khmer_args -from khmer.khmer_args import (report_on_config, info, add_threading_args) +from khmer.khmer_args import (report_on_config, info, add_threading_args, + calculate_tablesize) from khmer.kfile import check_input_files, check_space from khmer.kfile import check_space_for_hashtable from oxli import functions @@ -54,8 +55,8 @@ def main(args): # if optimization args are given, do optimization args = functions.do_sanity_checking(args, 0.01) - check_space(args.input_filenames, args.force) - check_space_for_hashtable(args, 'nodegraph', args.force) + tablesize = calculate_tablesize(args, 'nodegraph') + check_space_for_hashtable(args.output_filename, tablesize, args.force) print('Saving k-mer presence table to %s' % base, file=sys.stderr) print('Loading kmers from sequences in %s' % diff --git a/oxli/functions.py b/oxli/functions.py index b5ffe1ed86..19ccaa41d7 100644 --- a/oxli/functions.py +++ b/oxli/functions.py @@ -131,6 +131,8 @@ def do_sanity_checking(args, desired_max_fp): *** which is above the recommended false positive ceiling of {1}!""" .format(res.fp_rate, desired_max_fp), file=sys.stderr) if not args.force: + print("NOTE: This can be overridden using the --force" + " argument", file=sys.stderr) print("*** Aborting...!", file=sys.stderr) sys.exit(1) else: diff --git a/sandbox/collect-reads.py b/sandbox/collect-reads.py index 48c4a115f1..ca297272a5 100755 --- a/sandbox/collect-reads.py +++ b/sandbox/collect-reads.py @@ -21,7 +21,8 @@ import textwrap import khmer from khmer import khmer_args -from khmer.khmer_args import build_counting_args, report_on_config, info +from khmer.khmer_args import (build_counting_args, report_on_config, info, + calculate_tablesize) from khmer.kfile import check_input_files, check_space from khmer.kfile import check_space_for_hashtable import argparse @@ -77,7 +78,9 @@ def main(): check_input_files(name, False) check_space(args.input_sequence_filename, False) - check_space_for_hashtable(args, 'countgraph', False) + tablesize = calculate_tablesize(args, 'countgraph') + check_space_for_hashtable(args.output_countingtable_filename, tablesize, + False) print('Saving k-mer counting table to %s' % base) print('Loading sequences from %s' % repr(filenames)) diff --git a/sandbox/saturate-by-median.py b/sandbox/saturate-by-median.py index 7a11d519ee..a47cde43fc 100755 --- a/sandbox/saturate-by-median.py +++ b/sandbox/saturate-by-median.py @@ -215,6 +215,8 @@ def main(): # pylint: disable=too-many-branches,too-many-statements except IOError as err: handle_error(err, input_filename) if not args.force: + print("NOTE: This can be overridden using the --force" + " argument", file=sys.stderr) print('** Exiting!', file=sys.stderr) sys.exit(1) else: diff --git a/scripts/abundance-dist-single.py b/scripts/abundance-dist-single.py index 10d2109c77..4976a8ba63 100755 --- a/scripts/abundance-dist-single.py +++ b/scripts/abundance-dist-single.py @@ -24,9 +24,8 @@ import textwrap from khmer import khmer_args from khmer.khmer_args import (build_counting_args, add_threading_args, - report_on_config, info) -from khmer.kfile import (check_input_files, check_space, - check_space_for_hashtable) + report_on_config, info, calculate_tablesize) +from khmer.kfile import (check_input_files, check_space_for_hashtable) def get_parser(): @@ -78,10 +77,9 @@ def main(): # pylint: disable=too-many-locals,too-many-branches report_on_config(args) check_input_files(args.input_sequence_filename, args.force) - check_space([args.input_sequence_filename], args.force) if args.savetable: - check_space_for_hashtable(args, 'countgraph', args.force) - + tablesize = calculate_tablesize(args, 'countgraph') + check_space_for_hashtable(args.savetable, tablesize, args.force) if (not args.squash_output and os.path.exists(args.output_histogram_filename)): print('ERROR: %s exists; not squashing.' % diff --git a/scripts/count-overlap.py b/scripts/count-overlap.py index a8c715d161..1016b10149 100755 --- a/scripts/count-overlap.py +++ b/scripts/count-overlap.py @@ -24,7 +24,7 @@ import khmer import textwrap from khmer import khmer_args -from khmer.kfile import check_input_files, check_space +from khmer.kfile import check_input_files from khmer.khmer_args import (build_hashbits_args, report_on_config, info) @@ -60,8 +60,6 @@ def main(): for infile in [args.ptfile, args.fafile]: check_input_files(infile, args.force) - check_space([args.ptfile, args.fafile], args.force) - print('loading k-mer presence table from', args.ptfile, file=sys.stderr) ht1 = khmer.load_hashbits(args.ptfile) kmer_size = ht1.ksize() diff --git a/scripts/filter-abund-single.py b/scripts/filter-abund-single.py index b22a4946bd..1cb9947ad7 100755 --- a/scripts/filter-abund-single.py +++ b/scripts/filter-abund-single.py @@ -26,7 +26,7 @@ from khmer.thread_utils import ThreadedSequenceProcessor, verbose_loader from khmer import khmer_args from khmer.khmer_args import (build_counting_args, report_on_config, - add_threading_args, info) + add_threading_args, info, calculate_tablesize) from khmer.kfile import (check_input_files, check_space, check_space_for_hashtable) # @@ -71,7 +71,9 @@ def main(): check_input_files(args.datafile, args.force) check_space([args.datafile], args.force) if args.savetable: - check_space_for_hashtable(args, 'countgraph', args.force) + tablesize = calculate_tablesize(args, 'countgraph') + check_space_for_hashtable(args.savetable, tablesize, args.force) + report_on_config(args) print('making countgraph', file=sys.stderr) diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py index f907c36723..fcf8fbb109 100755 --- a/scripts/load-into-counting.py +++ b/scripts/load-into-counting.py @@ -22,9 +22,9 @@ import khmer from khmer import khmer_args from khmer.khmer_args import build_counting_args, report_on_config, info,\ - add_threading_args + add_threading_args, calculate_tablesize from khmer.kfile import check_file_writable -from khmer.kfile import check_input_files, check_space +from khmer.kfile import check_input_files from khmer.kfile import check_space_for_hashtable @@ -84,8 +84,9 @@ def main(): for name in args.input_sequence_filename: check_input_files(name, args.force) - check_space(args.input_sequence_filename, args.force) - check_space_for_hashtable(args, 'countgraph', args.force) + tablesize = calculate_tablesize(args, 'countgraph') + check_space_for_hashtable(args.output_countingtable_filename, tablesize, + args.force) check_file_writable(base) check_file_writable(base + ".info") @@ -124,7 +125,8 @@ def main(): thread.join() if index > 0 and index % 10 == 0: - check_space_for_hashtable(args, 'countgraph', args.force) + tablesize = calculate_tablesize(args, 'countgraph') + check_space_for_hashtable(base, tablesize, args.force) print('mid-save', base, file=sys.stderr) htable.save(base) diff --git a/scripts/make-initial-stoptags.py b/scripts/make-initial-stoptags.py index 29a08ef7d7..e99a690275 100755 --- a/scripts/make-initial-stoptags.py +++ b/scripts/make-initial-stoptags.py @@ -18,7 +18,7 @@ import khmer from khmer import khmer_args from khmer.khmer_args import (build_counting_args, info) -from khmer.kfile import check_input_files, check_space +from khmer.kfile import check_input_files DEFAULT_SUBSET_SIZE = int(1e4) DEFAULT_COUNTING_HT_SIZE = 3e6 # number of bytes @@ -83,8 +83,6 @@ def main(): for _ in infiles: check_input_files(_, args.force) - check_space(infiles, args.force) - print('loading htable %s.pt' % graphbase, file=sys.stderr) htable = khmer.load_hashbits(graphbase + '.pt') diff --git a/scripts/normalize-by-median.py b/scripts/normalize-by-median.py index dd2b91b0ed..68601d663d 100755 --- a/scripts/normalize-by-median.py +++ b/scripts/normalize-by-median.py @@ -28,7 +28,7 @@ from contextlib import contextmanager from oxli import functions as oxutils from khmer.khmer_args import (build_counting_args, add_loadhash_args, - report_on_config, info) + report_on_config, info, calculate_tablesize) import argparse from khmer.kfile import (check_space, check_space_for_hashtable, check_valid_file_exists) @@ -305,7 +305,8 @@ def main(): # pylint: disable=too-many-branches,too-many-statements check_valid_file_exists(args.input_filenames) check_space(args.input_filenames, args.force) if args.savetable: - check_space_for_hashtable(args, 'countgraph', args.force) + tablesize = calculate_tablesize(args, 'countgraph') + check_space_for_hashtable(args.savetable, tablesize, args.force) # load or create counting table. if args.loadtable: diff --git a/scripts/partition-graph.py b/scripts/partition-graph.py index 73666e2040..bb03c0b655 100755 --- a/scripts/partition-graph.py +++ b/scripts/partition-graph.py @@ -24,7 +24,7 @@ import khmer import sys from khmer.khmer_args import (add_threading_args, info) -from khmer.kfile import check_input_files, check_space +from khmer.kfile import check_input_files # Debugging Support import re @@ -111,8 +111,6 @@ def main(): for _ in filenames: check_input_files(_, args.force) - check_space(filenames, args.force) - print('--', file=sys.stderr) print('SUBSET SIZE', args.subset_size, file=sys.stderr) print('N THREADS', args.threads, file=sys.stderr) diff --git a/scripts/sample-reads-randomly.py b/scripts/sample-reads-randomly.py index 79b4777c33..21ad095126 100755 --- a/scripts/sample-reads-randomly.py +++ b/scripts/sample-reads-randomly.py @@ -27,7 +27,7 @@ import sys import khmer -from khmer.kfile import check_input_files, check_space +from khmer.kfile import check_input_files from khmer.khmer_args import info from khmer.utils import write_record, broken_paired_reader @@ -85,8 +85,6 @@ def main(): for _ in args.filenames: check_input_files(_, args.force) - check_space(args.filenames, args.force) - # seed the random number generator? if args.random_seed: random.seed(args.random_seed) @@ -104,6 +102,8 @@ def main(): sys.stderr.write( "Error: cannot specify -o with more than one sample.") if not args.force: + print("NOTE: This can be overridden using the --force" + " argument", file=sys.stderr) sys.exit(1) output_filename = output_file.name else: diff --git a/scripts/trim-low-abund.py b/scripts/trim-low-abund.py index 741b181775..1254947c41 100755 --- a/scripts/trim-low-abund.py +++ b/scripts/trim-low-abund.py @@ -28,7 +28,7 @@ from khmer import khmer_args from khmer.khmer_args import (build_counting_args, info, add_loadhash_args, - report_on_config) + report_on_config, calculate_tablesize) from khmer.utils import write_record, write_record_pair, broken_paired_reader from khmer.kfile import (check_space, check_space_for_hashtable, check_valid_file_exists) @@ -126,7 +126,8 @@ def main(): check_valid_file_exists(args.input_filenames) check_space(args.input_filenames, args.force) if args.savetable: - check_space_for_hashtable(args, 'countgraph', args.force) + tablesize = calculate_tablesize(args, 'countgraph') + check_space_for_hashtable(args.savetable, tablesize, args.force) if args.loadtable: print('loading countgraph from', args.loadtable, file=sys.stderr) diff --git a/tests/test_script_arguments.py b/tests/test_script_arguments.py index f523d28c70..bf2148dc02 100644 --- a/tests/test_script_arguments.py +++ b/tests/test_script_arguments.py @@ -46,14 +46,16 @@ def test_check_space(): def test_check_tablespace(): + outfile = utils.get_test_data('truncated.fq') save_stderr, sys.stderr = sys.stderr, io.StringIO() parser = khmer_args.build_counting_args() args = parser.parse_args(['-M', '1e9']) try: - khmer.kfile.check_space_for_hashtable(args, 'countgraph', force=False, - _testhook_free_space=0) + tablesize = khmer_args.calculate_tablesize(args, 'countgraph') + khmer.kfile.check_space_for_hashtable(outfile, tablesize, + False, _testhook_free_space=0) assert 0, "this should fail" except SystemExit as e: print(str(e)) @@ -78,12 +80,15 @@ def test_check_space_force(): def test_check_tablespace_force(): save_stderr, sys.stderr = sys.stderr, io.StringIO() + outfile = utils.get_test_data('truncated') + parser = khmer_args.build_counting_args() args = parser.parse_args(['-M', '1e9']) try: - khmer.kfile.check_space_for_hashtable(args, 'countgraph', True, - _testhook_free_space=0) + tablesize = khmer_args.calculate_tablesize(args, 'countgraph') + khmer.kfile.check_space_for_hashtable(outfile, tablesize, + True, _testhook_free_space=0) assert True, "this should pass" except SystemExit as e: print(str(e)) @@ -265,7 +270,7 @@ def test_fail_calculate_foograph_size(): args = FakeArgparseObject(ksize, n_tables, max_tablesize, max_mem) try: - nodegraph = khmer_args._calculate_tablesize(args, 'foograph') + nodegraph = khmer_args.calculate_tablesize(args, 'foograph') assert 0, "previous statement should fail" except AssertionError: raise