From 7c307822e4e1b1b8ec22a13849c27a9885896ab7 Mon Sep 17 00:00:00 2001
From: Jake Fenton <bocajnotnef@gmail.com>
Date: Fri, 10 Jul 2015 16:05:52 -0400
Subject: [PATCH 01/11] initial/working changes

---
 khmer/kfile.py                | 4 ++--
 scripts/load-into-counting.py | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/khmer/kfile.py b/khmer/kfile.py
index 9a01f590d9..5e77a7aa9c 100644
--- a/khmer/kfile.py
+++ b/khmer/kfile.py
@@ -117,8 +117,8 @@ def check_space_for_hashtable(args, hashtype, force,
     """Check we have enough size to write a hash table."""
     hash_size = khmer_args._calculate_tablesize(args, hashtype)
 
-    cwd = os.getcwd()
-    dir_path = os.path.dirname(os.path.realpath(cwd))
+    #cwd = os.getcwd()
+    dir_path = os.path.dirname(os.path.realpath(args.output_filename))
     target = os.statvfs(dir_path)
     if _testhook_free_space is None:
         free_space = target.f_frsize * target.f_bavail
diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py
index f907c36723..0ad11c8bbe 100755
--- a/scripts/load-into-counting.py
+++ b/scripts/load-into-counting.py
@@ -24,7 +24,7 @@
 from khmer.khmer_args import build_counting_args, report_on_config, info,\
     add_threading_args
 from khmer.kfile import check_file_writable
-from khmer.kfile import check_input_files, check_space
+from khmer.kfile import check_input_files
 from khmer.kfile import check_space_for_hashtable
 
 
@@ -84,7 +84,6 @@ def main():
     for name in args.input_sequence_filename:
         check_input_files(name, args.force)
 
-    check_space(args.input_sequence_filename, args.force)
     check_space_for_hashtable(args, 'countgraph', args.force)
 
     check_file_writable(base)

From 9d1e88148ba26e086b4ef0bf1791ae32ff5a192f Mon Sep 17 00:00:00 2001
From: Jake Fenton <bocajnotnef@gmail.com>
Date: Thu, 16 Jul 2015 14:43:01 -0400
Subject: [PATCH 02/11] removed additionally unneeded space checks

---
 scripts/abundance-dist-single.py | 4 +---
 scripts/count-overlap.py         | 4 +---
 scripts/make-initial-stoptags.py | 4 +---
 scripts/partition-graph.py       | 4 +---
 scripts/sample-reads-randomly.py | 4 +---
 5 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/scripts/abundance-dist-single.py b/scripts/abundance-dist-single.py
index 10d2109c77..bb85618a7c 100755
--- a/scripts/abundance-dist-single.py
+++ b/scripts/abundance-dist-single.py
@@ -25,8 +25,7 @@
 from khmer import khmer_args
 from khmer.khmer_args import (build_counting_args, add_threading_args,
                               report_on_config, info)
-from khmer.kfile import (check_input_files, check_space,
-                         check_space_for_hashtable)
+from khmer.kfile import (check_input_files, check_space_for_hashtable)
 
 
 def get_parser():
@@ -78,7 +77,6 @@ def main():  # pylint: disable=too-many-locals,too-many-branches
     report_on_config(args)
 
     check_input_files(args.input_sequence_filename, args.force)
-    check_space([args.input_sequence_filename], args.force)
     if args.savetable:
         check_space_for_hashtable(args, 'countgraph', args.force)
 
diff --git a/scripts/count-overlap.py b/scripts/count-overlap.py
index a8c715d161..1016b10149 100755
--- a/scripts/count-overlap.py
+++ b/scripts/count-overlap.py
@@ -24,7 +24,7 @@
 import khmer
 import textwrap
 from khmer import khmer_args
-from khmer.kfile import check_input_files, check_space
+from khmer.kfile import check_input_files
 from khmer.khmer_args import (build_hashbits_args, report_on_config, info)
 
 
@@ -60,8 +60,6 @@ def main():
     for infile in [args.ptfile, args.fafile]:
         check_input_files(infile, args.force)
 
-    check_space([args.ptfile, args.fafile], args.force)
-
     print('loading k-mer presence table from', args.ptfile, file=sys.stderr)
     ht1 = khmer.load_hashbits(args.ptfile)
     kmer_size = ht1.ksize()
diff --git a/scripts/make-initial-stoptags.py b/scripts/make-initial-stoptags.py
index 29a08ef7d7..e99a690275 100755
--- a/scripts/make-initial-stoptags.py
+++ b/scripts/make-initial-stoptags.py
@@ -18,7 +18,7 @@
 import khmer
 from khmer import khmer_args
 from khmer.khmer_args import (build_counting_args, info)
-from khmer.kfile import check_input_files, check_space
+from khmer.kfile import check_input_files
 
 DEFAULT_SUBSET_SIZE = int(1e4)
 DEFAULT_COUNTING_HT_SIZE = 3e6                # number of bytes
@@ -83,8 +83,6 @@ def main():
     for _ in infiles:
         check_input_files(_, args.force)
 
-    check_space(infiles, args.force)
-
     print('loading htable %s.pt' % graphbase, file=sys.stderr)
     htable = khmer.load_hashbits(graphbase + '.pt')
 
diff --git a/scripts/partition-graph.py b/scripts/partition-graph.py
index 73666e2040..bb03c0b655 100755
--- a/scripts/partition-graph.py
+++ b/scripts/partition-graph.py
@@ -24,7 +24,7 @@
 import khmer
 import sys
 from khmer.khmer_args import (add_threading_args, info)
-from khmer.kfile import check_input_files, check_space
+from khmer.kfile import check_input_files
 
 # Debugging Support
 import re
@@ -111,8 +111,6 @@ def main():
     for _ in filenames:
         check_input_files(_, args.force)
 
-    check_space(filenames, args.force)
-
     print('--', file=sys.stderr)
     print('SUBSET SIZE', args.subset_size, file=sys.stderr)
     print('N THREADS', args.threads, file=sys.stderr)
diff --git a/scripts/sample-reads-randomly.py b/scripts/sample-reads-randomly.py
index 79b4777c33..3cdc4260bb 100755
--- a/scripts/sample-reads-randomly.py
+++ b/scripts/sample-reads-randomly.py
@@ -27,7 +27,7 @@
 import sys
 
 import khmer
-from khmer.kfile import check_input_files, check_space
+from khmer.kfile import check_input_files
 from khmer.khmer_args import info
 from khmer.utils import write_record, broken_paired_reader
 
@@ -85,8 +85,6 @@ def main():
     for _ in args.filenames:
         check_input_files(_, args.force)
 
-    check_space(args.filenames, args.force)
-
     # seed the random number generator?
     if args.random_seed:
         random.seed(args.random_seed)

From 060f2a8742aa70eb317c4723713b9f9e9a4d2da2 Mon Sep 17 00:00:00 2001
From: Jake Fenton <bocajnotnef@gmail.com>
Date: Thu, 16 Jul 2015 15:53:46 -0400
Subject: [PATCH 03/11] working changes--refactoring argument structs for
 utility functions

---
 khmer/kfile.py                |  8 +++++---
 khmer/khmer_args.py           | 18 +++++++++++-------
 oxli/build_graph.py           |  4 +++-
 scripts/load-into-counting.py |  4 +++-
 4 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/khmer/kfile.py b/khmer/kfile.py
index 5e77a7aa9c..81a67ce8ac 100644
--- a/khmer/kfile.py
+++ b/khmer/kfile.py
@@ -112,13 +112,15 @@ def check_space(in_files, force, _testhook_free_space=None):
             sys.exit(1)
 
 
-def check_space_for_hashtable(args, hashtype, force,
+def check_space_for_hashtable(outfile_name, hashtype, force, n_tables,
+                              max_tablesize, max_mem=None,
                               _testhook_free_space=None):
     """Check we have enough size to write a hash table."""
-    hash_size = khmer_args._calculate_tablesize(args, hashtype)
+    hash_size = khmer_args._calculate_tablesize(max_tablesize, n_tables, 
+            hashtype, max_memory_use=max_mem)
 
     #cwd = os.getcwd()
-    dir_path = os.path.dirname(os.path.realpath(args.output_filename))
+    dir_path = os.path.dirname(os.path.realpath(outfile_name))
     target = os.statvfs(dir_path)
     if _testhook_free_space is None:
         free_space = target.f_frsize * target.f_bavail
diff --git a/khmer/khmer_args.py b/khmer/khmer_args.py
index 7782af7c07..47f3514644 100644
--- a/khmer/khmer_args.py
+++ b/khmer/khmer_args.py
@@ -133,19 +133,20 @@ def __call__(self, parser, namespace, values, option_string=None):
                         action=LoadAction)
 
 
-def _calculate_tablesize(args, hashtype, multiplier=1.0):
+def _calculate_tablesize(max_tablesize, n_tables, hashtype, multiplier=1.0,
+                         max_memory_use=None):
     if hashtype not in ('countgraph', 'nodegraph'):
         raise Exception("unknown graph type: %s" % (hashtype,))
 
-    if args.max_memory_usage:
+    if max_memory_use:
         if hashtype == 'countgraph':
-            tablesize = args.max_memory_usage / args.n_tables / \
+            tablesize = max_memory_use / n_tables / \
                 float(multiplier)
         elif hashtype == 'nodegraph':
-            tablesize = 8. * args.max_memory_usage / args.n_tables / \
+            tablesize = 8. * max_memory_use / n_tables / \
                 float(multiplier)
     else:
-        tablesize = args.max_tablesize
+        tablesize = max_tablesize
 
     return tablesize
 
@@ -168,7 +169,9 @@ def create_countgraph(args, ksize=None, multiplier=1.0):
         print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n")
         sys.exit(1)
 
-    tablesize = _calculate_tablesize(args, 'countgraph', multiplier=multiplier)
+    tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables, 
+                                     'countgraph', multiplier=multiplier,
+                                     max_memory_use=args.max_memory_usage)
     return khmer.CountingHash(ksize, tablesize, args.n_tables)
 
 
@@ -185,7 +188,8 @@ def report_on_config(args, hashtype='countgraph'):
     if args.quiet:
         return
 
-    tablesize = _calculate_tablesize(args, hashtype)
+    tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables, 
+            hashtype, max_memory_use=args.max_memory_usage)
 
     print_error("\nPARAMETERS:")
     print_error(" - kmer size =    {0} \t\t(-k)".format(args.ksize))
diff --git a/oxli/build_graph.py b/oxli/build_graph.py
index b58d2af080..8e32380eda 100644
--- a/oxli/build_graph.py
+++ b/oxli/build_graph.py
@@ -55,7 +55,9 @@ def main(args):
     args = functions.do_sanity_checking(args, 0.01)
 
     check_space(args.input_filenames, args.force)
-    check_space_for_hashtable(args, 'nodegraph', args.force)
+    check_space_for_hashtable(output_filename, 'nodegraph', args.force,
+                              args.n_tables, ars.max_tablesize,
+                              args.max_memory_usage)
 
     print('Saving k-mer presence table to %s' % base, file=sys.stderr)
     print('Loading kmers from sequences in %s' %
diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py
index 0ad11c8bbe..461de31603 100755
--- a/scripts/load-into-counting.py
+++ b/scripts/load-into-counting.py
@@ -84,7 +84,9 @@ def main():
     for name in args.input_sequence_filename:
         check_input_files(name, args.force)
 
-    check_space_for_hashtable(args, 'countgraph', args.force)
+    check_space_for_hashtable(args.output_countingtable_filename, 'countgraph', 
+                              args.force, args.n_tables, args.max_tablesize,
+                              args.max_memory_usage)
 
     check_file_writable(base)
     check_file_writable(base + ".info")

From 1e1d3d3c38acbf5c8d6e1f2148aa9ebf65ec5e63 Mon Sep 17 00:00:00 2001
From: Jake Fenton <bocajnotnef@gmail.com>
Date: Thu, 16 Jul 2015 16:49:12 -0400
Subject: [PATCH 04/11] finished refactoring everything, made stuff play nice
 together

---
 khmer/kfile.py                   |  7 ++++---
 khmer/khmer_args.py              | 11 +++++++----
 oxli/build_graph.py              |  4 ++--
 sandbox/collect-reads.py         |  4 +++-
 scripts/abundance-dist-single.py |  4 +++-
 scripts/filter-abund-single.py   |  4 +++-
 scripts/load-into-counting.py    |  6 ++++--
 scripts/normalize-by-median.py   |  4 +++-
 scripts/trim-low-abund.py        |  4 +++-
 tests/test_script_arguments.py   | 15 ++++++++++++---
 10 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/khmer/kfile.py b/khmer/kfile.py
index 81a67ce8ac..3e3b4535eb 100644
--- a/khmer/kfile.py
+++ b/khmer/kfile.py
@@ -116,12 +116,13 @@ def check_space_for_hashtable(outfile_name, hashtype, force, n_tables,
                               max_tablesize, max_mem=None,
                               _testhook_free_space=None):
     """Check we have enough size to write a hash table."""
-    hash_size = khmer_args._calculate_tablesize(max_tablesize, n_tables, 
-            hashtype, max_memory_use=max_mem)
+    hash_size = khmer_args._calculate_tablesize(max_tablesize, n_tables,
+                                                hashtype,
+                                                max_memory_use=max_mem)
 
-    #cwd = os.getcwd()
     dir_path = os.path.dirname(os.path.realpath(outfile_name))
     target = os.statvfs(dir_path)
+
     if _testhook_free_space is None:
         free_space = target.f_frsize * target.f_bavail
     else:
diff --git a/khmer/khmer_args.py b/khmer/khmer_args.py
index 47f3514644..15373ced64 100644
--- a/khmer/khmer_args.py
+++ b/khmer/khmer_args.py
@@ -158,7 +158,9 @@ def create_nodegraph(args, ksize=None, multiplier=1.0):
         print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n")
         sys.exit(1)
 
-    tablesize = _calculate_tablesize(args, 'nodegraph', multiplier=multiplier)
+    tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables,
+                                     'nodegraph', multiplier,
+                                     args.max_memory_usage)
     return khmer.Hashbits(ksize, tablesize, args.n_tables)
 
 
@@ -169,7 +171,7 @@ def create_countgraph(args, ksize=None, multiplier=1.0):
         print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n")
         sys.exit(1)
 
-    tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables, 
+    tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables,
                                      'countgraph', multiplier=multiplier,
                                      max_memory_use=args.max_memory_usage)
     return khmer.CountingHash(ksize, tablesize, args.n_tables)
@@ -188,8 +190,9 @@ def report_on_config(args, hashtype='countgraph'):
     if args.quiet:
         return
 
-    tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables, 
-            hashtype, max_memory_use=args.max_memory_usage)
+    tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables,
+                                     hashtype,
+                                     max_memory_use=args.max_memory_usage)
 
     print_error("\nPARAMETERS:")
     print_error(" - kmer size =    {0} \t\t(-k)".format(args.ksize))
diff --git a/oxli/build_graph.py b/oxli/build_graph.py
index 8e32380eda..fefd24def2 100644
--- a/oxli/build_graph.py
+++ b/oxli/build_graph.py
@@ -55,8 +55,8 @@ def main(args):
     args = functions.do_sanity_checking(args, 0.01)
 
     check_space(args.input_filenames, args.force)
-    check_space_for_hashtable(output_filename, 'nodegraph', args.force,
-                              args.n_tables, ars.max_tablesize,
+    check_space_for_hashtable(args.output_filename, 'nodegraph', args.force,
+                              args.n_tables, args.max_tablesize,
                               args.max_memory_usage)
 
     print('Saving k-mer presence table to %s' % base, file=sys.stderr)
diff --git a/sandbox/collect-reads.py b/sandbox/collect-reads.py
index 48c4a115f1..aad7dfdef0 100755
--- a/sandbox/collect-reads.py
+++ b/sandbox/collect-reads.py
@@ -77,7 +77,9 @@ def main():
         check_input_files(name, False)
 
     check_space(args.input_sequence_filename, False)
-    check_space_for_hashtable(args, 'countgraph', False)
+    check_space_for_hashtable(args.output_countingtable_filename, 'countgraph',
+                              False, args.n_tables, args.max_tablesize,
+                              args.max_memory_usage)
 
     print('Saving k-mer counting table to %s' % base)
     print('Loading sequences from %s' % repr(filenames))
diff --git a/scripts/abundance-dist-single.py b/scripts/abundance-dist-single.py
index bb85618a7c..ac0f2528d7 100755
--- a/scripts/abundance-dist-single.py
+++ b/scripts/abundance-dist-single.py
@@ -78,7 +78,9 @@ def main():  # pylint: disable=too-many-locals,too-many-branches
 
     check_input_files(args.input_sequence_filename, args.force)
     if args.savetable:
-        check_space_for_hashtable(args, 'countgraph', args.force)
+        check_space_for_hashtable(args.savetable, 'countgraph', args.force,
+                                  args.n_tables, args.max_tablesize,
+                                  args.max_memory_usage)
 
     if (not args.squash_output and
             os.path.exists(args.output_histogram_filename)):
diff --git a/scripts/filter-abund-single.py b/scripts/filter-abund-single.py
index b22a4946bd..cb30eae7e0 100755
--- a/scripts/filter-abund-single.py
+++ b/scripts/filter-abund-single.py
@@ -71,7 +71,9 @@ def main():
     check_input_files(args.datafile, args.force)
     check_space([args.datafile], args.force)
     if args.savetable:
-        check_space_for_hashtable(args, 'countgraph', args.force)
+        check_space_for_hashtable(args.savetable, 'countgraph', args.force,
+                                  args.n_tables, args.max_tablesize,
+                                  args.max_memory_usage)
     report_on_config(args)
 
     print('making countgraph', file=sys.stderr)
diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py
index 461de31603..3a563bf553 100755
--- a/scripts/load-into-counting.py
+++ b/scripts/load-into-counting.py
@@ -84,7 +84,7 @@ def main():
     for name in args.input_sequence_filename:
         check_input_files(name, args.force)
 
-    check_space_for_hashtable(args.output_countingtable_filename, 'countgraph', 
+    check_space_for_hashtable(args.output_countingtable_filename, 'countgraph',
                               args.force, args.n_tables, args.max_tablesize,
                               args.max_memory_usage)
 
@@ -125,7 +125,9 @@ def main():
             thread.join()
 
         if index > 0 and index % 10 == 0:
-            check_space_for_hashtable(args, 'countgraph', args.force)
+            check_space_for_hashtable(base, 'countgraph', args.force,
+                                      args.n_tables, args.max_tablesize,
+                                      args.max_memory_usage)
             print('mid-save', base, file=sys.stderr)
 
             htable.save(base)
diff --git a/scripts/normalize-by-median.py b/scripts/normalize-by-median.py
index e58cb6dc9e..fea78416eb 100755
--- a/scripts/normalize-by-median.py
+++ b/scripts/normalize-by-median.py
@@ -277,7 +277,9 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
     check_valid_file_exists(args.input_filenames)
     check_space(args.input_filenames, args.force)
     if args.savetable:
-        check_space_for_hashtable(args, 'countgraph', args.force)
+        check_space_for_hashtable(args.savetable, 'countgraph', args.force,
+                                  args.n_tables, args.max_tablesize,
+                                  args.max_memory_usage)
 
     # load or create counting table.
     if args.loadtable:
diff --git a/scripts/trim-low-abund.py b/scripts/trim-low-abund.py
index 741b181775..694eeb8a24 100755
--- a/scripts/trim-low-abund.py
+++ b/scripts/trim-low-abund.py
@@ -126,7 +126,9 @@ def main():
     check_valid_file_exists(args.input_filenames)
     check_space(args.input_filenames, args.force)
     if args.savetable:
-        check_space_for_hashtable(args, 'countgraph', args.force)
+        check_space_for_hashtable(args.savetable, 'countgraph', args.force,
+                                  args.n_tables, args.max_tablesize,
+                                  args.max_memory_usage)
 
     if args.loadtable:
         print('loading countgraph from', args.loadtable, file=sys.stderr)
diff --git a/tests/test_script_arguments.py b/tests/test_script_arguments.py
index f523d28c70..11310b8c52 100644
--- a/tests/test_script_arguments.py
+++ b/tests/test_script_arguments.py
@@ -46,13 +46,16 @@ def test_check_space():
 
 
 def test_check_tablespace():
+    outfile = utils.get_test_data('truncated.fq')
     save_stderr, sys.stderr = sys.stderr, io.StringIO()
 
     parser = khmer_args.build_counting_args()
     args = parser.parse_args(['-M', '1e9'])
 
     try:
-        khmer.kfile.check_space_for_hashtable(args, 'countgraph', force=False,
+        khmer.kfile.check_space_for_hashtable(outfile, 'countgraph',
+                                              False, args.n_tables,
+                                              args.max_tablesize,
                                               _testhook_free_space=0)
         assert 0, "this should fail"
     except SystemExit as e:
@@ -78,11 +81,15 @@ def test_check_space_force():
 def test_check_tablespace_force():
     save_stderr, sys.stderr = sys.stderr, io.StringIO()
 
+    outfile = utils.get_test_data('truncated')
+
     parser = khmer_args.build_counting_args()
     args = parser.parse_args(['-M', '1e9'])
 
     try:
-        khmer.kfile.check_space_for_hashtable(args, 'countgraph', True,
+        khmer.kfile.check_space_for_hashtable(outfile, 'countgraph', True,
+                                              args.n_tables,
+                                              args.max_tablesize,
                                               _testhook_free_space=0)
         assert True, "this should pass"
     except SystemExit as e:
@@ -265,7 +272,9 @@ def test_fail_calculate_foograph_size():
     args = FakeArgparseObject(ksize, n_tables, max_tablesize, max_mem)
 
     try:
-        nodegraph = khmer_args._calculate_tablesize(args, 'foograph')
+        nodegraph = khmer_args._calculate_tablesize(max_tablesize, n_tables,
+                                                    'foograph',
+                                                    max_memory_use=max_mem)
         assert 0, "previous statement should fail"
     except AssertionError:
         raise

From a948d5147533e1d16356d270d140518183f7861d Mon Sep 17 00:00:00 2001
From: Jake Fenton <bocajnotnef@gmail.com>
Date: Thu, 16 Jul 2015 16:56:42 -0400
Subject: [PATCH 05/11] changelog

---
 ChangeLog | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 4b5cfa05cf..6382a20418 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2015-07-16  Jacob Fenton  <bocajnotnef@gmail.com>
+
+   * khmer/{kfile,khmer_args}.py: refactored information passing, made it so
+   space checks happen in the right directory
+   * oxli/build_graph.py,sandbox/collect-reads.py,scripts/{count-overlap,
+   abundance-dist-single,filter-abund-single,load-into-counting,
+   make-initial-stoptags,normalize-by-median,partition-graph,
+   sample-reads-randomly,trim-low-abund}.py,tests/test_script_arguments.py:
+   changed to use new arg structure
+
 2015-07-10  Jacob Fenton  <bocajnotnef@gmail.com>
 
    * oxli/functions.py: changed estimate functions to use correct letter

From af2bdc45767a2ee74ea706d440f8efe7182eb6c8 Mon Sep 17 00:00:00 2001
From: Jake Fenton <bocajnotnef@gmail.com>
Date: Fri, 17 Jul 2015 20:06:09 -0400
Subject: [PATCH 06/11] refactored order of finding stuff out and args

---
 khmer/kfile.py                   |  6 +-----
 khmer/khmer_args.py              | 22 +++++++++++-----------
 oxli/build_graph.py              | 11 ++++++-----
 sandbox/collect-reads.py         | 11 +++++++----
 scripts/abundance-dist-single.py | 10 +++++-----
 scripts/filter-abund-single.py   | 11 +++++++----
 scripts/load-into-counting.py    | 18 +++++++++++-------
 scripts/normalize-by-median.py   | 10 ++++++----
 scripts/trim-low-abund.py        |  9 +++++----
 tests/test_script_arguments.py   | 26 +++++++++++++++-----------
 10 files changed, 74 insertions(+), 60 deletions(-)

diff --git a/khmer/kfile.py b/khmer/kfile.py
index 3e3b4535eb..a643064b82 100644
--- a/khmer/kfile.py
+++ b/khmer/kfile.py
@@ -112,13 +112,9 @@ def check_space(in_files, force, _testhook_free_space=None):
             sys.exit(1)
 
 
-def check_space_for_hashtable(outfile_name, hashtype, force, n_tables,
-                              max_tablesize, max_mem=None,
+def check_space_for_hashtable(outfile_name, hash_size, force,
                               _testhook_free_space=None):
     """Check we have enough size to write a hash table."""
-    hash_size = khmer_args._calculate_tablesize(max_tablesize, n_tables,
-                                                hashtype,
-                                                max_memory_use=max_mem)
 
     dir_path = os.path.dirname(os.path.realpath(outfile_name))
     target = os.statvfs(dir_path)
diff --git a/khmer/khmer_args.py b/khmer/khmer_args.py
index 15373ced64..5753831970 100644
--- a/khmer/khmer_args.py
+++ b/khmer/khmer_args.py
@@ -133,8 +133,8 @@ def __call__(self, parser, namespace, values, option_string=None):
                         action=LoadAction)
 
 
-def _calculate_tablesize(max_tablesize, n_tables, hashtype, multiplier=1.0,
-                         max_memory_use=None):
+def calculate_tablesize(max_tablesize, n_tables, hashtype, multiplier=1.0,
+                        max_memory_use=None):
     if hashtype not in ('countgraph', 'nodegraph'):
         raise Exception("unknown graph type: %s" % (hashtype,))
 
@@ -158,9 +158,9 @@ def create_nodegraph(args, ksize=None, multiplier=1.0):
         print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n")
         sys.exit(1)
 
-    tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables,
-                                     'nodegraph', multiplier,
-                                     args.max_memory_usage)
+    tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
+                                    'nodegraph', multiplier,
+                                    args.max_memory_usage)
     return khmer.Hashbits(ksize, tablesize, args.n_tables)
 
 
@@ -171,9 +171,9 @@ def create_countgraph(args, ksize=None, multiplier=1.0):
         print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n")
         sys.exit(1)
 
-    tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables,
-                                     'countgraph', multiplier=multiplier,
-                                     max_memory_use=args.max_memory_usage)
+    tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
+                                    'countgraph', multiplier=multiplier,
+                                    max_memory_use=args.max_memory_usage)
     return khmer.CountingHash(ksize, tablesize, args.n_tables)
 
 
@@ -190,9 +190,9 @@ def report_on_config(args, hashtype='countgraph'):
     if args.quiet:
         return
 
-    tablesize = _calculate_tablesize(args.max_tablesize, args.n_tables,
-                                     hashtype,
-                                     max_memory_use=args.max_memory_usage)
+    tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
+                                    hashtype,
+                                    max_memory_use=args.max_memory_usage)
 
     print_error("\nPARAMETERS:")
     print_error(" - kmer size =    {0} \t\t(-k)".format(args.ksize))
diff --git a/oxli/build_graph.py b/oxli/build_graph.py
index fefd24def2..802604262d 100644
--- a/oxli/build_graph.py
+++ b/oxli/build_graph.py
@@ -20,7 +20,8 @@
 
 import khmer
 from khmer import khmer_args
-from khmer.khmer_args import (report_on_config, info, add_threading_args)
+from khmer.khmer_args import (report_on_config, info, add_threading_args,
+                              calculate_tablesize)
 from khmer.kfile import check_input_files, check_space
 from khmer.kfile import check_space_for_hashtable
 from oxli import functions
@@ -54,10 +55,10 @@ def main(args):
     # if optimization args are given, do optimization
     args = functions.do_sanity_checking(args, 0.01)
 
-    check_space(args.input_filenames, args.force)
-    check_space_for_hashtable(args.output_filename, 'nodegraph', args.force,
-                              args.n_tables, args.max_tablesize,
-                              args.max_memory_usage)
+    tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
+                                    'nodegraph',
+                                    max_memory_use=args.max_memory_usage)
+    check_space_for_hashtable(args.output_filename, tablesize, args.force)
 
     print('Saving k-mer presence table to %s' % base, file=sys.stderr)
     print('Loading kmers from sequences in %s' %
diff --git a/sandbox/collect-reads.py b/sandbox/collect-reads.py
index aad7dfdef0..3b79394d5f 100755
--- a/sandbox/collect-reads.py
+++ b/sandbox/collect-reads.py
@@ -21,7 +21,8 @@
 import textwrap
 import khmer
 from khmer import khmer_args
-from khmer.khmer_args import build_counting_args, report_on_config, info
+from khmer.khmer_args import (build_counting_args, report_on_config, info,
+                              calculate_tablesize)
 from khmer.kfile import check_input_files, check_space
 from khmer.kfile import check_space_for_hashtable
 import argparse
@@ -77,9 +78,11 @@ def main():
         check_input_files(name, False)
 
     check_space(args.input_sequence_filename, False)
-    check_space_for_hashtable(args.output_countingtable_filename, 'countgraph',
-                              False, args.n_tables, args.max_tablesize,
-                              args.max_memory_usage)
+    tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
+                                    'countgraph',
+                                    max_memory_use=args.max_memory_usage)
+    check_space_for_hashtable(args.output_countingtable_filename, tablesize,
+                              False)
 
     print('Saving k-mer counting table to %s' % base)
     print('Loading sequences from %s' % repr(filenames))
diff --git a/scripts/abundance-dist-single.py b/scripts/abundance-dist-single.py
index ac0f2528d7..d84c0c1d5a 100755
--- a/scripts/abundance-dist-single.py
+++ b/scripts/abundance-dist-single.py
@@ -24,7 +24,7 @@
 import textwrap
 from khmer import khmer_args
 from khmer.khmer_args import (build_counting_args, add_threading_args,
-                              report_on_config, info)
+                              report_on_config, info, calculate_tablesize)
 from khmer.kfile import (check_input_files, check_space_for_hashtable)
 
 
@@ -78,10 +78,10 @@ def main():  # pylint: disable=too-many-locals,too-many-branches
 
     check_input_files(args.input_sequence_filename, args.force)
     if args.savetable:
-        check_space_for_hashtable(args.savetable, 'countgraph', args.force,
-                                  args.n_tables, args.max_tablesize,
-                                  args.max_memory_usage)
-
+        tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
+                                        'countgraph',
+                                        max_memory_use=args.max_memory_usage)
+        check_space_for_hashtable(args.savetable, tablesize, args.force)
     if (not args.squash_output and
             os.path.exists(args.output_histogram_filename)):
         print('ERROR: %s exists; not squashing.' %
diff --git a/scripts/filter-abund-single.py b/scripts/filter-abund-single.py
index cb30eae7e0..6109130d1b 100755
--- a/scripts/filter-abund-single.py
+++ b/scripts/filter-abund-single.py
@@ -26,7 +26,7 @@
 from khmer.thread_utils import ThreadedSequenceProcessor, verbose_loader
 from khmer import khmer_args
 from khmer.khmer_args import (build_counting_args, report_on_config,
-                              add_threading_args, info)
+                              add_threading_args, info, calculate_tablesize)
 from khmer.kfile import (check_input_files, check_space,
                          check_space_for_hashtable)
 #
@@ -71,9 +71,12 @@ def main():
     check_input_files(args.datafile, args.force)
     check_space([args.datafile], args.force)
     if args.savetable:
-        check_space_for_hashtable(args.savetable, 'countgraph', args.force,
-                                  args.n_tables, args.max_tablesize,
-                                  args.max_memory_usage)
+        mem_args = args.max_memory_usage
+        tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
+                                        'countgraph',
+                                        max_memory_use=mem_args)
+        check_space_for_hashtable(args.savetable, tablesize, args.force)
+
     report_on_config(args)
 
     print('making countgraph', file=sys.stderr)
diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py
index 3a563bf553..c21d655953 100755
--- a/scripts/load-into-counting.py
+++ b/scripts/load-into-counting.py
@@ -22,7 +22,7 @@
 import khmer
 from khmer import khmer_args
 from khmer.khmer_args import build_counting_args, report_on_config, info,\
-    add_threading_args
+    add_threading_args, calculate_tablesize
 from khmer.kfile import check_file_writable
 from khmer.kfile import check_input_files
 from khmer.kfile import check_space_for_hashtable
@@ -84,9 +84,11 @@ def main():
     for name in args.input_sequence_filename:
         check_input_files(name, args.force)
 
-    check_space_for_hashtable(args.output_countingtable_filename, 'countgraph',
-                              args.force, args.n_tables, args.max_tablesize,
-                              args.max_memory_usage)
+    tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
+                                    'countgraph',
+                                    max_memory_use=args.max_memory_usage)
+    check_space_for_hashtable(args.output_countingtable_filename, tablesize,
+                              args.force)
 
     check_file_writable(base)
     check_file_writable(base + ".info")
@@ -125,9 +127,11 @@ def main():
             thread.join()
 
         if index > 0 and index % 10 == 0:
-            check_space_for_hashtable(base, 'countgraph', args.force,
-                                      args.n_tables, args.max_tablesize,
-                                      args.max_memory_usage)
+            mem_args = args.max_memory_usage
+            tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
+                                            'countgraph',
+                                            max_memory_use=mem_args)
+            check_space_for_hashtable(base, tablesize, args.force)
             print('mid-save', base, file=sys.stderr)
 
             htable.save(base)
diff --git a/scripts/normalize-by-median.py b/scripts/normalize-by-median.py
index fea78416eb..8a308c2204 100755
--- a/scripts/normalize-by-median.py
+++ b/scripts/normalize-by-median.py
@@ -28,7 +28,7 @@
 from contextlib import contextmanager
 from oxli import functions as oxutils
 from khmer.khmer_args import (build_counting_args, add_loadhash_args,
-                              report_on_config, info)
+                              report_on_config, info, calculate_tablesize)
 import argparse
 from khmer.kfile import (check_space, check_space_for_hashtable,
                          check_valid_file_exists)
@@ -277,9 +277,11 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
     check_valid_file_exists(args.input_filenames)
     check_space(args.input_filenames, args.force)
     if args.savetable:
-        check_space_for_hashtable(args.savetable, 'countgraph', args.force,
-                                  args.n_tables, args.max_tablesize,
-                                  args.max_memory_usage)
+        tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
+                                        'countgraph',
+                                        max_memory_use=args.max_memory_usage)
+
+        check_space_for_hashtable(args.savetable, tablesize, args.force)
 
     # load or create counting table.
     if args.loadtable:
diff --git a/scripts/trim-low-abund.py b/scripts/trim-low-abund.py
index 694eeb8a24..c093e18732 100755
--- a/scripts/trim-low-abund.py
+++ b/scripts/trim-low-abund.py
@@ -28,7 +28,7 @@
 from khmer import khmer_args
 
 from khmer.khmer_args import (build_counting_args, info, add_loadhash_args,
-                              report_on_config)
+                              report_on_config, calculate_tablesize)
 from khmer.utils import write_record, write_record_pair, broken_paired_reader
 from khmer.kfile import (check_space, check_space_for_hashtable,
                          check_valid_file_exists)
@@ -126,9 +126,10 @@ def main():
     check_valid_file_exists(args.input_filenames)
     check_space(args.input_filenames, args.force)
     if args.savetable:
-        check_space_for_hashtable(args.savetable, 'countgraph', args.force,
-                                  args.n_tables, args.max_tablesize,
-                                  args.max_memory_usage)
+        tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
+                                        'countgraph',
+                                        max_memory_use=args.max_memory_usage)
+        check_space_for_hashtable(args.savetable, tablesize, args.force)
 
     if args.loadtable:
         print('loading countgraph from', args.loadtable, file=sys.stderr)
diff --git a/tests/test_script_arguments.py b/tests/test_script_arguments.py
index 11310b8c52..6dced2d18c 100644
--- a/tests/test_script_arguments.py
+++ b/tests/test_script_arguments.py
@@ -53,10 +53,12 @@ def test_check_tablespace():
     args = parser.parse_args(['-M', '1e9'])
 
     try:
-        khmer.kfile.check_space_for_hashtable(outfile, 'countgraph',
-                                              False, args.n_tables,
-                                              args.max_tablesize,
-                                              _testhook_free_space=0)
+        mem_args = args.max_memory_usage
+        tablesize = khmer_args.calculate_tablesize(args.max_tablesize,
+                                                   args.n_tables, 'countgraph',
+                                                   max_memory_use=mem_args)
+        khmer.kfile.check_space_for_hashtable(outfile, tablesize,
+                                              False, _testhook_free_space=0)
         assert 0, "this should fail"
     except SystemExit as e:
         print(str(e))
@@ -87,10 +89,12 @@ def test_check_tablespace_force():
     args = parser.parse_args(['-M', '1e9'])
 
     try:
-        khmer.kfile.check_space_for_hashtable(outfile, 'countgraph', True,
-                                              args.n_tables,
-                                              args.max_tablesize,
-                                              _testhook_free_space=0)
+        mem_args = args.max_memory_usage
+        tablesize = khmer_args.calculate_tablesize(args.max_tablesize,
+                                                   args.n_tables, 'countgraph',
+                                                   max_memory_use=mem_args)
+        khmer.kfile.check_space_for_hashtable(outfile, tablesize,
+                                              True, _testhook_free_space=0)
         assert True, "this should pass"
     except SystemExit as e:
         print(str(e))
@@ -272,9 +276,9 @@ def test_fail_calculate_foograph_size():
     args = FakeArgparseObject(ksize, n_tables, max_tablesize, max_mem)
 
     try:
-        nodegraph = khmer_args._calculate_tablesize(max_tablesize, n_tables,
-                                                    'foograph',
-                                                    max_memory_use=max_mem)
+        nodegraph = khmer_args.calculate_tablesize(max_tablesize, n_tables,
+                                                   'foograph',
+                                                   max_memory_use=max_mem)
         assert 0, "previous statement should fail"
     except AssertionError:
         raise

From 2dacd904861cd8e5b86d9b1cdda542efe901cb5c Mon Sep 17 00:00:00 2001
From: Jake Fenton <bocajnotnef@gmail.com>
Date: Sat, 18 Jul 2015 13:57:47 -0400
Subject: [PATCH 07/11] added note on the existence of "--force" arg to where
 it's checked for

---
 khmer/kfile.py                   | 10 ++++++++++
 oxli/functions.py                |  2 ++
 sandbox/saturate-by-median.py    |  2 ++
 scripts/sample-reads-randomly.py |  2 ++
 4 files changed, 16 insertions(+)

diff --git a/khmer/kfile.py b/khmer/kfile.py
index 8af8432c6c..990eeab0af 100644
--- a/khmer/kfile.py
+++ b/khmer/kfile.py
@@ -34,6 +34,8 @@ def check_input_files(file_path, force):
               file_path, file=sys.stderr)
 
         if not force:
+            print("NOTE: This can be overridden using the --force argument",
+                  file=sys.stderr)
             print("Exiting", file=sys.stderr)
             sys.exit(1)
         else:
@@ -47,12 +49,16 @@ def check_input_files(file_path, force):
         print("ERROR: Input file %s does not exist; exiting" %
               file_path, file=sys.stderr)
         if not force:
+            print("NOTE: This can be overridden using the --force argument",
+                  file=sys.stderr)
             sys.exit(1)
     else:
         if os.stat(file_path).st_size == 0:
             print("ERROR: Input file %s is empty; exiting." %
                   file_path, file=sys.stderr)
             if not force:
+                print("NOTE: This can be overridden using the --force"
+                      " argument", file=sys.stderr)
                 sys.exit(1)
 
 
@@ -109,6 +115,8 @@ def check_space(in_files, force, _testhook_free_space=None):
         print("       Free space: %.1f GB"
               % (float(free_space) / 1e9,), file=sys.stderr)
         if not force:
+            print("NOTE: This can be overridden using the --force argument",
+                  file=sys.stderr)
             sys.exit(1)
 
 
@@ -135,6 +143,8 @@ def check_space_for_hashtable(outfile_name, hash_size, force,
         print("       Free space: %.1f GB"
               % (float(free_space) / 1e9,), file=sys.stderr)
         if not force:
+            print("NOTE: This can be overridden using the --force argument",
+                  file=sys.stderr)
             sys.exit(1)
 
 
diff --git a/oxli/functions.py b/oxli/functions.py
index b5ffe1ed86..19ccaa41d7 100644
--- a/oxli/functions.py
+++ b/oxli/functions.py
@@ -131,6 +131,8 @@ def do_sanity_checking(args, desired_max_fp):
 *** which is above the recommended false positive ceiling of {1}!"""
                       .format(res.fp_rate, desired_max_fp), file=sys.stderr)
                 if not args.force:
+                    print("NOTE: This can be overridden using the --force"
+                          " argument", file=sys.stderr)
                     print("*** Aborting...!", file=sys.stderr)
                     sys.exit(1)
         else:
diff --git a/sandbox/saturate-by-median.py b/sandbox/saturate-by-median.py
index 7a11d519ee..a47cde43fc 100755
--- a/sandbox/saturate-by-median.py
+++ b/sandbox/saturate-by-median.py
@@ -215,6 +215,8 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
         except IOError as err:
             handle_error(err, input_filename)
             if not args.force:
+                print("NOTE: This can be overridden using the --force"
+                      " argument", file=sys.stderr)
                 print('** Exiting!', file=sys.stderr)
                 sys.exit(1)
             else:
diff --git a/scripts/sample-reads-randomly.py b/scripts/sample-reads-randomly.py
index 3cdc4260bb..21ad095126 100755
--- a/scripts/sample-reads-randomly.py
+++ b/scripts/sample-reads-randomly.py
@@ -102,6 +102,8 @@ def main():
             sys.stderr.write(
                 "Error: cannot specify -o with more than one sample.")
             if not args.force:
+                print("NOTE: This can be overridden using the --force"
+                      " argument", file=sys.stderr)
                 sys.exit(1)
         output_filename = output_file.name
     else:

From c11d25f2bcb1bb22eb030dcd80f8d5c5c357d8af Mon Sep 17 00:00:00 2001
From: Jake Fenton <bocajnotnef@gmail.com>
Date: Sun, 19 Jul 2015 12:18:26 -0400
Subject: [PATCH 08/11] "It's dangerous to go alone--Take some args!"

---
 khmer/khmer_args.py              | 23 ++++++++---------------
 oxli/build_graph.py              |  4 +---
 sandbox/collect-reads.py         |  4 +---
 scripts/abundance-dist-single.py |  4 +---
 scripts/filter-abund-single.py   |  4 +---
 scripts/load-into-counting.py    |  8 ++------
 scripts/normalize-by-median.py   |  4 +---
 scripts/trim-low-abund.py        |  4 +---
 tests/test_script_arguments.py   | 12 +++---------
 9 files changed, 19 insertions(+), 48 deletions(-)

diff --git a/khmer/khmer_args.py b/khmer/khmer_args.py
index 962b756988..24e87471e5 100644
--- a/khmer/khmer_args.py
+++ b/khmer/khmer_args.py
@@ -132,20 +132,19 @@ def __call__(self, parser, namespace, values, option_string=None):
                         action=LoadAction)
 
 
-def calculate_tablesize(max_tablesize, n_tables, hashtype, multiplier=1.0,
-                        max_memory_use=None):
+def calculate_tablesize(args, hashtype, multiplier=1.0):
     if hashtype not in ('countgraph', 'nodegraph'):
         raise Exception("unknown graph type: %s" % (hashtype,))
 
-    if max_memory_use:
+    if args.max_memory_usage:
         if hashtype == 'countgraph':
-            tablesize = max_memory_use / n_tables / \
+            tablesize = args.max_memory_usage / args.n_tables / \
                 float(multiplier)
         elif hashtype == 'nodegraph':
-            tablesize = 8. * max_memory_use / n_tables / \
+            tablesize = 8. * args.max_memory_usage / args.n_tables / \
                 float(multiplier)
     else:
-        tablesize = max_tablesize
+        tablesize = args.max_tablesize
 
     return tablesize
 
@@ -157,9 +156,7 @@ def create_nodegraph(args, ksize=None, multiplier=1.0):
         print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n")
         sys.exit(1)
 
-    tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
-                                    'nodegraph', multiplier,
-                                    args.max_memory_usage)
+    tablesize = calculate_tablesize(args, 'nodegraph', multiplier)
     return khmer.Hashbits(ksize, tablesize, args.n_tables)
 
 
@@ -170,9 +167,7 @@ def create_countgraph(args, ksize=None, multiplier=1.0):
         print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n")
         sys.exit(1)
 
-    tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
-                                    'countgraph', multiplier=multiplier,
-                                    max_memory_use=args.max_memory_usage)
+    tablesize = calculate_tablesize(args, 'countgraph', multiplier=multiplier)
     return khmer.CountingHash(ksize, tablesize, args.n_tables)
 
 
@@ -189,9 +184,7 @@ def report_on_config(args, hashtype='countgraph'):
     if args.quiet:
         return
 
-    tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
-                                    hashtype,
-                                    max_memory_use=args.max_memory_usage)
+    tablesize = calculate_tablesize(args, hashtype)
 
     print_error("\nPARAMETERS:")
     print_error(" - kmer size =    {0} \t\t(-k)".format(args.ksize))
diff --git a/oxli/build_graph.py b/oxli/build_graph.py
index 44b9109b2e..adf4de622a 100644
--- a/oxli/build_graph.py
+++ b/oxli/build_graph.py
@@ -55,9 +55,7 @@ def main(args):
     # if optimization args are given, do optimization
     args = functions.do_sanity_checking(args, 0.01)
 
-    tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
-                                    'nodegraph',
-                                    max_memory_use=args.max_memory_usage)
+    tablesize = calculate_tablesize(args, 'nodegraph')
     check_space_for_hashtable(args.output_filename, tablesize, args.force)
 
     print('Saving k-mer presence table to %s' % base, file=sys.stderr)
diff --git a/sandbox/collect-reads.py b/sandbox/collect-reads.py
index 3b79394d5f..ca297272a5 100755
--- a/sandbox/collect-reads.py
+++ b/sandbox/collect-reads.py
@@ -78,9 +78,7 @@ def main():
         check_input_files(name, False)
 
     check_space(args.input_sequence_filename, False)
-    tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
-                                    'countgraph',
-                                    max_memory_use=args.max_memory_usage)
+    tablesize = calculate_tablesize(args, 'countgraph')
     check_space_for_hashtable(args.output_countingtable_filename, tablesize,
                               False)
 
diff --git a/scripts/abundance-dist-single.py b/scripts/abundance-dist-single.py
index d84c0c1d5a..4976a8ba63 100755
--- a/scripts/abundance-dist-single.py
+++ b/scripts/abundance-dist-single.py
@@ -78,9 +78,7 @@ def main():  # pylint: disable=too-many-locals,too-many-branches
 
     check_input_files(args.input_sequence_filename, args.force)
     if args.savetable:
-        tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
-                                        'countgraph',
-                                        max_memory_use=args.max_memory_usage)
+        tablesize = calculate_tablesize(args, 'countgraph')
         check_space_for_hashtable(args.savetable, tablesize, args.force)
     if (not args.squash_output and
             os.path.exists(args.output_histogram_filename)):
diff --git a/scripts/filter-abund-single.py b/scripts/filter-abund-single.py
index 6109130d1b..9b5f9b71b5 100755
--- a/scripts/filter-abund-single.py
+++ b/scripts/filter-abund-single.py
@@ -72,9 +72,7 @@ def main():
     check_space([args.datafile], args.force)
     if args.savetable:
         mem_args = args.max_memory_usage
-        tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
-                                        'countgraph',
-                                        max_memory_use=mem_args)
+        tablesize = calculate_tablesize(args, 'countgraph')
         check_space_for_hashtable(args.savetable, tablesize, args.force)
 
     report_on_config(args)
diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py
index c21d655953..0d5b11eddc 100755
--- a/scripts/load-into-counting.py
+++ b/scripts/load-into-counting.py
@@ -84,9 +84,7 @@ def main():
     for name in args.input_sequence_filename:
         check_input_files(name, args.force)
 
-    tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
-                                    'countgraph',
-                                    max_memory_use=args.max_memory_usage)
+    tablesize = calculate_tablesize(args, 'countgraph')
     check_space_for_hashtable(args.output_countingtable_filename, tablesize,
                               args.force)
 
@@ -128,9 +126,7 @@ def main():
 
         if index > 0 and index % 10 == 0:
             mem_args = args.max_memory_usage
-            tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
-                                            'countgraph',
-                                            max_memory_use=mem_args)
+            tablesize = calculate_tablesize(args, 'countgraph')
             check_space_for_hashtable(base, tablesize, args.force)
             print('mid-save', base, file=sys.stderr)
 
diff --git a/scripts/normalize-by-median.py b/scripts/normalize-by-median.py
index 2828b5b680..614d3d2e46 100755
--- a/scripts/normalize-by-median.py
+++ b/scripts/normalize-by-median.py
@@ -272,9 +272,7 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
     check_valid_file_exists(args.input_filenames)
     check_space(args.input_filenames, args.force)
     if args.savetable:
-        tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
-                                        'countgraph',
-                                        max_memory_use=args.max_memory_usage)
+        tablesize = calculate_tablesize(args, 'countgraph')
 
         check_space_for_hashtable(args.savetable, tablesize, args.force)
 
diff --git a/scripts/trim-low-abund.py b/scripts/trim-low-abund.py
index c093e18732..1254947c41 100755
--- a/scripts/trim-low-abund.py
+++ b/scripts/trim-low-abund.py
@@ -126,9 +126,7 @@ def main():
     check_valid_file_exists(args.input_filenames)
     check_space(args.input_filenames, args.force)
     if args.savetable:
-        tablesize = calculate_tablesize(args.max_tablesize, args.n_tables,
-                                        'countgraph',
-                                        max_memory_use=args.max_memory_usage)
+        tablesize = calculate_tablesize(args, 'countgraph')
         check_space_for_hashtable(args.savetable, tablesize, args.force)
 
     if args.loadtable:
diff --git a/tests/test_script_arguments.py b/tests/test_script_arguments.py
index 6dced2d18c..4e2f16a140 100644
--- a/tests/test_script_arguments.py
+++ b/tests/test_script_arguments.py
@@ -54,9 +54,7 @@ def test_check_tablespace():
 
     try:
         mem_args = args.max_memory_usage
-        tablesize = khmer_args.calculate_tablesize(args.max_tablesize,
-                                                   args.n_tables, 'countgraph',
-                                                   max_memory_use=mem_args)
+        tablesize = khmer_args.calculate_tablesize(args, 'countgraph')
         khmer.kfile.check_space_for_hashtable(outfile, tablesize,
                                               False, _testhook_free_space=0)
         assert 0, "this should fail"
@@ -90,9 +88,7 @@ def test_check_tablespace_force():
 
     try:
         mem_args = args.max_memory_usage
-        tablesize = khmer_args.calculate_tablesize(args.max_tablesize,
-                                                   args.n_tables, 'countgraph',
-                                                   max_memory_use=mem_args)
+        tablesize = khmer_args.calculate_tablesize(args, 'countgraph')
         khmer.kfile.check_space_for_hashtable(outfile, tablesize,
                                               True, _testhook_free_space=0)
         assert True, "this should pass"
@@ -276,9 +272,7 @@ def test_fail_calculate_foograph_size():
     args = FakeArgparseObject(ksize, n_tables, max_tablesize, max_mem)
 
     try:
-        nodegraph = khmer_args.calculate_tablesize(max_tablesize, n_tables,
-                                                   'foograph',
-                                                   max_memory_use=max_mem)
+        nodegraph = khmer_args.calculate_tablesize(args, 'foograph')
         assert 0, "previous statement should fail"
     except AssertionError:
         raise

From 9fd52c9cfcd2a695c5e6e79ae3771387e57906ee Mon Sep 17 00:00:00 2001
From: Jake Fenton <bocajnotnef@gmail.com>
Date: Mon, 20 Jul 2015 12:08:31 -0400
Subject: [PATCH 09/11] Gigabyte floats are my favorite

---
 khmer/kfile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/khmer/kfile.py b/khmer/kfile.py
index 990eeab0af..c434fae1bb 100644
--- a/khmer/kfile.py
+++ b/khmer/kfile.py
@@ -136,7 +136,7 @@ def check_space_for_hashtable(outfile_name, hash_size, force,
     if size_diff > 0:
         print("ERROR: Not enough free space on disk "
               "for saved table files;"
-              "       Need at least %s GB more."
+              "       Need at least %.1f GB more."
               % (float(size_diff) / 1e9,), file=sys.stderr)
         print("       Table size: %.1f GB"
               % (float(hash_size) / 1e9,), file=sys.stderr)

From 766885c230810da31c84f826e74dc189af3ba985 Mon Sep 17 00:00:00 2001
From: Jake Fenton <bocajnotnef@gmail.com>
Date: Tue, 21 Jul 2015 13:36:10 -0400
Subject: [PATCH 10/11] formatting

---
 ChangeLog | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ChangeLog b/ChangeLog
index 43a95f5bf7..21ed4f849e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -17,6 +17,7 @@
    * tests/test_normalize_by_median.py: updated/added tests for reporting.
 
 2015-07-17  Jacob Fenton  <bocajnotnef@gmail.com>
+
    * oxli/{functions,build_graph}.py,scripts/{load-graph,normalize-by-median,
    abundance-dist}.py,tests/test_{normalize_by_median,subset_graph,hashbits,
    oxli_function}.py: pylint cleanup

From 4ecd60be1d78980fea10759db1515f5ee3e9eaa0 Mon Sep 17 00:00:00 2001
From: "C. Titus Brown" <titus@idyll.org>
Date: Tue, 21 Jul 2015 18:55:02 -0400
Subject: [PATCH 11/11] minor cleanup

---
 ChangeLog                      | 59 ++++++++++++++++++----------------
 khmer/kfile.py                 |  2 +-
 scripts/filter-abund-single.py |  1 -
 scripts/load-into-counting.py  |  1 -
 scripts/normalize-by-median.py |  1 -
 tests/test_script_arguments.py |  2 --
 6 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 21ed4f849e..39d206cad0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,12 +1,16 @@
-2015-07-20  Jacob Fenton  <bocajnotnef@gmail.com>
+2015-07-21  Jacob Fenton  <bocajnotnef@gmail.com>
 
    * khmer/{kfile,khmer_args}.py: refactored information passing, made it so
-   space checks happen in the right directory
-   * oxli/build_graph.py,sandbox/collect-reads.py,scripts/{count-overlap,
+   space checks happen in the right directory.
+   * oxli/build_graph.py,sandbox/collect-reads.py,scripts/{
    abundance-dist-single,filter-abund-single,load-into-counting,
-   make-initial-stoptags,normalize-by-median,partition-graph,
-   sample-reads-randomly,trim-low-abund}.py,tests/test_script_arguments.py:
-   changed to use new arg structure
+   normalize-by-median,trim-low-abund}.py,tests/test_script_arguments.py:
+   changed to use new arg structure for checking hashtable save space.
+   * oxli/functions.py,scripts/saturate-by-median.py: updated error message
+   to mention --force option.
+   * scripts/{count-overlap,load-into-counting,make-initial-stoptags,
+   partition-graph,sample-reads-randomly}.py: removed unnecessary call to
+   check_space.
 
 2015-07-20  Titus Brown  <titus@idyll.org>
 
@@ -20,43 +24,44 @@
 
    * oxli/{functions,build_graph}.py,scripts/{load-graph,normalize-by-median,
    abundance-dist}.py,tests/test_{normalize_by_median,subset_graph,hashbits,
-   oxli_function}.py: pylint cleanup
+   oxli_function}.py: pylint cleanup.
 
 2015-07-17  Michael R. Crusoe  <crusoe@ucdavis.edu>  
 
-   * Makefile, tests/test_read_aligner.py: import khmer when pylinting
+   * Makefile, tests/test_read_aligner.py: import khmer when pylinting.
 
 2015-07-17  Michael R. Crusoe  <crusoe@ucdavis.edu>
 
    * lib/read_parser.{cc,hh}: use std::string everywhere to match existing
-   exceptions
+   exceptions.
 
 2015-07-10  Jacob Fenton  <bocajnotnef@gmail.com>
 
    * khmer/kfile.py: changed check_valid_file_exists to recognize fifos as
-   non-empty
-   * tests/test_normalize_by_median.py: added test
+   non-empty.
+   * tests/test_normalize_by_median.py: added test.
 
 2015-07-10  Jacob Fenton  <bocajnotnef@gmail.com>
 
    * oxli/functions.py: changed estimate functions to use correct letter
-   abbreviations
-   * sandbox/estimate_optimal_hash.py: changed to use renamed estimate 
-   functions
+   abbreviations.
+   * sandbox/estimate_optimal_hash.py: changed to use renamed estimate
+   functions.
    * sandbox/unique-kmers.py: changed to not output recommended HT args by
-   default
-   * tests/test_oxli_functions.py: changed to use renamed estimate functions
+   default.
+   * tests/test_oxli_functions.py: changed to use renamed estimate functions.
 
 2015-07-10  Jacob Fenton  <bocajnotnef@gmail.com>
 
-   * oxli/functions.py: added '--force' check to sanity check
+   * oxli/functions.py: added '--force' check to sanity check.
 
 2015-07-10  Jacob Fenton  <bocajnotnef@gmail.com>
 
-   * oxli/functions.py: moved optimization/sanity check func to oxli
+   * oxli/functions.py: moved optimization/sanity check func to oxli.
    * scripts/normalize-by-median.py,oxli/build_graph.py: added
-   optimization/sanity checking via oxli estimation funcs
-   * tests/test_normalize_by_median.py: updated tests to cover estimation funcs
+   optimization/sanity checking via oxli estimation funcs.
+   * tests/test_normalize_by_median.py: updated tests to cover estimation
+   functions.
 
 2015-07-08  Luiz Irber  <khmer@luizirber.org>
 
@@ -74,22 +79,22 @@
 2015-07-05  Jacob Fenton  <bocajnotnef@gmail.com>
 
    * doc/whats-new-2.0.rst: added in normalize-by-median.py broken paired 
-   updates
+   updates.
 
 2015-07-05  Michael R. Crusoe  <crusoe@ucdavis.edu>
 
-   * Makefile: fix cppcheck invocation
+   * Makefile: fix cppcheck invocation.
    * khmer/_khmer.cc: switch to prefix increment for non-primitive objects,
-   use a C++ cast, adjust scope
+   use a C++ cast, adjust scope.
    * lib/hashtable.{hh,cc}: make copy constructor no-op explicit. adjust scope
-   * lib/{ht-diff,test-HashTables,test-Parser}.cc: remove unused test code
-   * lib/labelhash.cc,hllcounter.cc: astyle reformatting
-   * lib/read_parsers.hh: more explicit constructors
+   * lib/{ht-diff,test-HashTables,test-Parser}.cc: remove unused test code.
+   * lib/labelhash.cc,hllcounter.cc: astyle reformatting.
+   * lib/read_parsers.hh: more explicit constructors.
 
 2015-07-05  Michael R. Crusoe  <crusoe@ucdavis.edu>
 
    * sandbox/{collect-variants,optimal_args_hashbits,sweep-files}.py:
-   update API usage
+   update API usage.
 
 2015-07-05  Titus Brown  <titus@idyll.org>
 
diff --git a/khmer/kfile.py b/khmer/kfile.py
index c434fae1bb..430f8c64cd 100644
--- a/khmer/kfile.py
+++ b/khmer/kfile.py
@@ -122,7 +122,7 @@ def check_space(in_files, force, _testhook_free_space=None):
 
 def check_space_for_hashtable(outfile_name, hash_size, force,
                               _testhook_free_space=None):
-    """Check we have enough size to write a hash table."""
+    """Check that we have enough size to write the specified hash table."""
 
     dir_path = os.path.dirname(os.path.realpath(outfile_name))
     target = os.statvfs(dir_path)
diff --git a/scripts/filter-abund-single.py b/scripts/filter-abund-single.py
index 9b5f9b71b5..1cb9947ad7 100755
--- a/scripts/filter-abund-single.py
+++ b/scripts/filter-abund-single.py
@@ -71,7 +71,6 @@ def main():
     check_input_files(args.datafile, args.force)
     check_space([args.datafile], args.force)
     if args.savetable:
-        mem_args = args.max_memory_usage
         tablesize = calculate_tablesize(args, 'countgraph')
         check_space_for_hashtable(args.savetable, tablesize, args.force)
 
diff --git a/scripts/load-into-counting.py b/scripts/load-into-counting.py
index 0d5b11eddc..fcf8fbb109 100755
--- a/scripts/load-into-counting.py
+++ b/scripts/load-into-counting.py
@@ -125,7 +125,6 @@ def main():
             thread.join()
 
         if index > 0 and index % 10 == 0:
-            mem_args = args.max_memory_usage
             tablesize = calculate_tablesize(args, 'countgraph')
             check_space_for_hashtable(base, tablesize, args.force)
             print('mid-save', base, file=sys.stderr)
diff --git a/scripts/normalize-by-median.py b/scripts/normalize-by-median.py
index 60a74770d2..68601d663d 100755
--- a/scripts/normalize-by-median.py
+++ b/scripts/normalize-by-median.py
@@ -306,7 +306,6 @@ def main():  # pylint: disable=too-many-branches,too-many-statements
     check_space(args.input_filenames, args.force)
     if args.savetable:
         tablesize = calculate_tablesize(args, 'countgraph')
-
         check_space_for_hashtable(args.savetable, tablesize, args.force)
 
     # load or create counting table.
diff --git a/tests/test_script_arguments.py b/tests/test_script_arguments.py
index 4e2f16a140..bf2148dc02 100644
--- a/tests/test_script_arguments.py
+++ b/tests/test_script_arguments.py
@@ -53,7 +53,6 @@ def test_check_tablespace():
     args = parser.parse_args(['-M', '1e9'])
 
     try:
-        mem_args = args.max_memory_usage
         tablesize = khmer_args.calculate_tablesize(args, 'countgraph')
         khmer.kfile.check_space_for_hashtable(outfile, tablesize,
                                               False, _testhook_free_space=0)
@@ -87,7 +86,6 @@ def test_check_tablespace_force():
     args = parser.parse_args(['-M', '1e9'])
 
     try:
-        mem_args = args.max_memory_usage
         tablesize = khmer_args.calculate_tablesize(args, 'countgraph')
         khmer.kfile.check_space_for_hashtable(outfile, tablesize,
                                               True, _testhook_free_space=0)