Skip to content

Commit

Permalink
Merge pull request #1170 from dib-lab/fix/spacechecks
Browse files Browse the repository at this point in the history
Fix weird things with space checks in some files (see #1167 #1166)
  • Loading branch information
ctb committed Jul 21, 2015
2 parents 5695b9f + 4ecd60b commit 526faf6
Show file tree
Hide file tree
Showing 17 changed files with 105 additions and 70 deletions.
57 changes: 36 additions & 21 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
2015-07-21 Jacob Fenton <bocajnotnef@gmail.com>

* khmer/{kfile,khmer_args}.py: refactored information passing, made it so
space checks happen in the right directory.
* oxli/build_graph.py,sandbox/collect-reads.py,scripts/{
abundance-dist-single,filter-abund-single,load-into-counting,
normalize-by-median,trim-low-abund}.py,tests/test_script_arguments.py:
changed to use new arg structure for checking hashtable save space.
* oxli/functions.py,scripts/saturate-by-median.py: updated error message
to mention --force option.
* scripts/{count-overlap,load-into-counting,make-initial-stoptags,
partition-graph,sample-reads-randomly}.py: removed unnecessary call to
check_space.

2015-07-20 Titus Brown <titus@idyll.org>

* khmer/__init__.py: cleaned up FP rate reporting.
Expand All @@ -10,43 +24,44 @@

* oxli/{functions,build_graph}.py,scripts/{load-graph,normalize-by-median,
abundance-dist}.py,tests/test_{normalize_by_median,subset_graph,hashbits,
oxli_function}.py: pylint cleanup
oxli_function}.py: pylint cleanup.

2015-07-17 Michael R. Crusoe <crusoe@ucdavis.edu>

* Makefile, tests/test_read_aligner.py: import khmer when pylinting
* Makefile, tests/test_read_aligner.py: import khmer when pylinting.

2015-07-17 Michael R. Crusoe <crusoe@ucdavis.edu>

* lib/read_parser.{cc,hh}: use std::string everywhere to match existing
exceptions
exceptions.

2015-07-10 Jacob Fenton <bocajnotnef@gmail.com>

* khmer/kfile.py: changed check_valid_file_exists to recognize fifos as
non-empty
* tests/test_normalize_by_median.py: added test
non-empty.
* tests/test_normalize_by_median.py: added test.

2015-07-10 Jacob Fenton <bocajnotnef@gmail.com>

* oxli/functions.py: changed estimate functions to use correct letter
abbreviations
* sandbox/estimate_optimal_hash.py: changed to use renamed estimate
functions
abbreviations.
* sandbox/estimate_optimal_hash.py: changed to use renamed estimate
functions.
* sandbox/unique-kmers.py: changed to not output recommended HT args by
default
* tests/test_oxli_functions.py: changed to use renamed estimate functions
default.
* tests/test_oxli_functions.py: changed to use renamed estimate functions.

2015-07-10 Jacob Fenton <bocajnotnef@gmail.com>

* oxli/functions.py: added '--force' check to sanity check
* oxli/functions.py: added '--force' check to sanity check.

2015-07-10 Jacob Fenton <bocajnotnef@gmail.com>

* oxli/functions.py: moved optimization/sanity check func to oxli
* oxli/functions.py: moved optimization/sanity check func to oxli.
* scripts/normalize-by-median.py,oxli/build_graph.py: added
optimization/sanity checking via oxli estimation funcs
* tests/test_normalize_by_median.py: updated tests to cover estimation funcs
optimization/sanity checking via oxli estimation funcs.
* tests/test_normalize_by_median.py: updated tests to cover estimation
functions.

2015-07-08 Luiz Irber <khmer@luizirber.org>

Expand All @@ -64,22 +79,22 @@
2015-07-05 Jacob Fenton <bocajnotnef@gmail.com>

* doc/whats-new-2.0.rst: added in normalize-by-median.py broken paired
updates
updates.

2015-07-05 Michael R. Crusoe <crusoe@ucdavis.edu>

* Makefile: fix cppcheck invocation
* Makefile: fix cppcheck invocation.
* khmer/_khmer.cc: switch to prefix increment for non-primitive objects,
use a C++ cast, adjust scope
use a C++ cast, adjust scope.
* lib/hashtable.{hh,cc}: make copy constructor no-op explicit. adjust scope
* lib/{ht-diff,test-HashTables,test-Parser}.cc: remove unused test code
* lib/labelhash.cc,hllcounter.cc: astyle reformatting
* lib/read_parsers.hh: more explicit constructors
* lib/{ht-diff,test-HashTables,test-Parser}.cc: remove unused test code.
* lib/labelhash.cc,hllcounter.cc: astyle reformatting.
* lib/read_parsers.hh: more explicit constructors.

2015-07-05 Michael R. Crusoe <crusoe@ucdavis.edu>

* sandbox/{collect-variants,optimal_args_hashbits,sweep-files}.py:
update API usage
update API usage.

2015-07-05 Titus Brown <titus@idyll.org>

Expand Down
21 changes: 15 additions & 6 deletions khmer/kfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ def check_input_files(file_path, force):
file_path, file=sys.stderr)

if not force:
print("NOTE: This can be overridden using the --force argument",
file=sys.stderr)
print("Exiting", file=sys.stderr)
sys.exit(1)
else:
Expand All @@ -47,12 +49,16 @@ def check_input_files(file_path, force):
print("ERROR: Input file %s does not exist; exiting" %
file_path, file=sys.stderr)
if not force:
print("NOTE: This can be overridden using the --force argument",
file=sys.stderr)
sys.exit(1)
else:
if os.stat(file_path).st_size == 0:
print("ERROR: Input file %s is empty; exiting." %
file_path, file=sys.stderr)
if not force:
print("NOTE: This can be overridden using the --force"
" argument", file=sys.stderr)
sys.exit(1)


Expand Down Expand Up @@ -109,17 +115,18 @@ def check_space(in_files, force, _testhook_free_space=None):
print(" Free space: %.1f GB"
% (float(free_space) / 1e9,), file=sys.stderr)
if not force:
print("NOTE: This can be overridden using the --force argument",
file=sys.stderr)
sys.exit(1)


def check_space_for_hashtable(args, hashtype, force,
def check_space_for_hashtable(outfile_name, hash_size, force,
_testhook_free_space=None):
"""Check we have enough size to write a hash table."""
hash_size = khmer_args._calculate_tablesize(args, hashtype)
"""Check that we have enough size to write the specified hash table."""

cwd = os.getcwd()
dir_path = os.path.dirname(os.path.realpath(cwd))
dir_path = os.path.dirname(os.path.realpath(outfile_name))
target = os.statvfs(dir_path)

if _testhook_free_space is None:
free_space = target.f_frsize * target.f_bavail
else:
Expand All @@ -129,13 +136,15 @@ def check_space_for_hashtable(args, hashtype, force,
if size_diff > 0:
print("ERROR: Not enough free space on disk "
"for saved table files;"
" Need at least %s GB more."
" Need at least %.1f GB more."
% (float(size_diff) / 1e9,), file=sys.stderr)
print(" Table size: %.1f GB"
% (float(hash_size) / 1e9,), file=sys.stderr)
print(" Free space: %.1f GB"
% (float(free_space) / 1e9,), file=sys.stderr)
if not force:
print("NOTE: This can be overridden using the --force argument",
file=sys.stderr)
sys.exit(1)


Expand Down
8 changes: 4 additions & 4 deletions khmer/khmer_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def __call__(self, parser, namespace, values, option_string=None):
action=LoadAction)


def _calculate_tablesize(args, hashtype, multiplier=1.0):
def calculate_tablesize(args, hashtype, multiplier=1.0):
if hashtype not in ('countgraph', 'nodegraph'):
raise Exception("unknown graph type: %s" % (hashtype,))

Expand All @@ -156,7 +156,7 @@ def create_nodegraph(args, ksize=None, multiplier=1.0):
print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n")
sys.exit(1)

tablesize = _calculate_tablesize(args, 'nodegraph', multiplier=multiplier)
tablesize = calculate_tablesize(args, 'nodegraph', multiplier)
return khmer.Hashbits(ksize, tablesize, args.n_tables)


Expand All @@ -167,7 +167,7 @@ def create_countgraph(args, ksize=None, multiplier=1.0):
print_error("\n** ERROR: khmer only supports k-mer sizes <= 32.\n")
sys.exit(1)

tablesize = _calculate_tablesize(args, 'countgraph', multiplier=multiplier)
tablesize = calculate_tablesize(args, 'countgraph', multiplier=multiplier)
return khmer.CountingHash(ksize, tablesize, args.n_tables)


Expand All @@ -184,7 +184,7 @@ def report_on_config(args, hashtype='countgraph'):
if args.quiet:
return

tablesize = _calculate_tablesize(args, hashtype)
tablesize = calculate_tablesize(args, hashtype)

print_error("\nPARAMETERS:")
print_error(" - kmer size = {0} \t\t(-k)".format(args.ksize))
Expand Down
7 changes: 4 additions & 3 deletions oxli/build_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@

import khmer
from khmer import khmer_args
from khmer.khmer_args import (report_on_config, info, add_threading_args)
from khmer.khmer_args import (report_on_config, info, add_threading_args,
calculate_tablesize)
from khmer.kfile import check_input_files, check_space
from khmer.kfile import check_space_for_hashtable
from oxli import functions
Expand Down Expand Up @@ -54,8 +55,8 @@ def main(args):
# if optimization args are given, do optimization
args = functions.do_sanity_checking(args, 0.01)

check_space(args.input_filenames, args.force)
check_space_for_hashtable(args, 'nodegraph', args.force)
tablesize = calculate_tablesize(args, 'nodegraph')
check_space_for_hashtable(args.output_filename, tablesize, args.force)

print('Saving k-mer presence table to %s' % base, file=sys.stderr)
print('Loading kmers from sequences in %s' %
Expand Down
2 changes: 2 additions & 0 deletions oxli/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ def do_sanity_checking(args, desired_max_fp):
*** which is above the recommended false positive ceiling of {1}!"""
.format(res.fp_rate, desired_max_fp), file=sys.stderr)
if not args.force:
print("NOTE: This can be overridden using the --force"
" argument", file=sys.stderr)
print("*** Aborting...!", file=sys.stderr)
sys.exit(1)
else:
Expand Down
7 changes: 5 additions & 2 deletions sandbox/collect-reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
import textwrap
import khmer
from khmer import khmer_args
from khmer.khmer_args import build_counting_args, report_on_config, info
from khmer.khmer_args import (build_counting_args, report_on_config, info,
calculate_tablesize)
from khmer.kfile import check_input_files, check_space
from khmer.kfile import check_space_for_hashtable
import argparse
Expand Down Expand Up @@ -77,7 +78,9 @@ def main():
check_input_files(name, False)

check_space(args.input_sequence_filename, False)
check_space_for_hashtable(args, 'countgraph', False)
tablesize = calculate_tablesize(args, 'countgraph')
check_space_for_hashtable(args.output_countingtable_filename, tablesize,
False)

print('Saving k-mer counting table to %s' % base)
print('Loading sequences from %s' % repr(filenames))
Expand Down
2 changes: 2 additions & 0 deletions sandbox/saturate-by-median.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,8 @@ def main(): # pylint: disable=too-many-branches,too-many-statements
except IOError as err:
handle_error(err, input_filename)
if not args.force:
print("NOTE: This can be overridden using the --force"
" argument", file=sys.stderr)
print('** Exiting!', file=sys.stderr)
sys.exit(1)
else:
Expand Down
10 changes: 4 additions & 6 deletions scripts/abundance-dist-single.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,8 @@
import textwrap
from khmer import khmer_args
from khmer.khmer_args import (build_counting_args, add_threading_args,
report_on_config, info)
from khmer.kfile import (check_input_files, check_space,
check_space_for_hashtable)
report_on_config, info, calculate_tablesize)
from khmer.kfile import (check_input_files, check_space_for_hashtable)


def get_parser():
Expand Down Expand Up @@ -78,10 +77,9 @@ def main(): # pylint: disable=too-many-locals,too-many-branches
report_on_config(args)

check_input_files(args.input_sequence_filename, args.force)
check_space([args.input_sequence_filename], args.force)
if args.savetable:
check_space_for_hashtable(args, 'countgraph', args.force)

tablesize = calculate_tablesize(args, 'countgraph')
check_space_for_hashtable(args.savetable, tablesize, args.force)
if (not args.squash_output and
os.path.exists(args.output_histogram_filename)):
print('ERROR: %s exists; not squashing.' %
Expand Down
4 changes: 1 addition & 3 deletions scripts/count-overlap.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import khmer
import textwrap
from khmer import khmer_args
from khmer.kfile import check_input_files, check_space
from khmer.kfile import check_input_files
from khmer.khmer_args import (build_hashbits_args, report_on_config, info)


Expand Down Expand Up @@ -60,8 +60,6 @@ def main():
for infile in [args.ptfile, args.fafile]:
check_input_files(infile, args.force)

check_space([args.ptfile, args.fafile], args.force)

print('loading k-mer presence table from', args.ptfile, file=sys.stderr)
ht1 = khmer.load_hashbits(args.ptfile)
kmer_size = ht1.ksize()
Expand Down
6 changes: 4 additions & 2 deletions scripts/filter-abund-single.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from khmer.thread_utils import ThreadedSequenceProcessor, verbose_loader
from khmer import khmer_args
from khmer.khmer_args import (build_counting_args, report_on_config,
add_threading_args, info)
add_threading_args, info, calculate_tablesize)
from khmer.kfile import (check_input_files, check_space,
check_space_for_hashtable)
#
Expand Down Expand Up @@ -71,7 +71,9 @@ def main():
check_input_files(args.datafile, args.force)
check_space([args.datafile], args.force)
if args.savetable:
check_space_for_hashtable(args, 'countgraph', args.force)
tablesize = calculate_tablesize(args, 'countgraph')
check_space_for_hashtable(args.savetable, tablesize, args.force)

report_on_config(args)

print('making countgraph', file=sys.stderr)
Expand Down
12 changes: 7 additions & 5 deletions scripts/load-into-counting.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
import khmer
from khmer import khmer_args
from khmer.khmer_args import build_counting_args, report_on_config, info,\
add_threading_args
add_threading_args, calculate_tablesize
from khmer.kfile import check_file_writable
from khmer.kfile import check_input_files, check_space
from khmer.kfile import check_input_files
from khmer.kfile import check_space_for_hashtable


Expand Down Expand Up @@ -84,8 +84,9 @@ def main():
for name in args.input_sequence_filename:
check_input_files(name, args.force)

check_space(args.input_sequence_filename, args.force)
check_space_for_hashtable(args, 'countgraph', args.force)
tablesize = calculate_tablesize(args, 'countgraph')
check_space_for_hashtable(args.output_countingtable_filename, tablesize,
args.force)

check_file_writable(base)
check_file_writable(base + ".info")
Expand Down Expand Up @@ -124,7 +125,8 @@ def main():
thread.join()

if index > 0 and index % 10 == 0:
check_space_for_hashtable(args, 'countgraph', args.force)
tablesize = calculate_tablesize(args, 'countgraph')
check_space_for_hashtable(base, tablesize, args.force)
print('mid-save', base, file=sys.stderr)

htable.save(base)
Expand Down
4 changes: 1 addition & 3 deletions scripts/make-initial-stoptags.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import khmer
from khmer import khmer_args
from khmer.khmer_args import (build_counting_args, info)
from khmer.kfile import check_input_files, check_space
from khmer.kfile import check_input_files

DEFAULT_SUBSET_SIZE = int(1e4)
DEFAULT_COUNTING_HT_SIZE = 3e6 # number of bytes
Expand Down Expand Up @@ -83,8 +83,6 @@ def main():
for _ in infiles:
check_input_files(_, args.force)

check_space(infiles, args.force)

print('loading htable %s.pt' % graphbase, file=sys.stderr)
htable = khmer.load_hashbits(graphbase + '.pt')

Expand Down
Loading

0 comments on commit 526faf6

Please sign in to comment.