Skip to content

Commit

Permalink
Added force option to all scripts to script IO sanity checks and upda…
Browse files Browse the repository at this point in the history
…ted tests to match
  • Loading branch information
Jessica Mizzi committed Dec 5, 2014
1 parent f9add4f commit 47e8890
Show file tree
Hide file tree
Showing 26 changed files with 117 additions and 62 deletions.
5 changes: 2 additions & 3 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
2014-10-06 Michael R. Crusoe <mcrusoe@msu.edu>

* Doxyfile.in: add links to the stdc++ docs
2014-12-05 Jessica Mizzi <mizzijes@msu.edu>
* khmer/file.py,sandbox/sweep-reads.py,scripts/{abundance-dist-single,abundance-dist,annotate-partitions,count-median,count-overlap,do-partition,extract-paired-reads,extract-partitions,filter-abund-single,filter-abund,filter-stoptags,interleave-reads,load-graph,load-into-counting,make-initial-stoptags,merge-partitions,normalize-by-median,partition-graph,sample-reads-randomly,split-paired-reads}.py,setup.cfg,tests/{test_script_arguments,test_scripts}.py: Added force option to all scripts to script IO sanity checks and updated tests to match.

2014-10-01 Ben Taylor <taylo886@msu.edu>

Expand Down
15 changes: 9 additions & 6 deletions khmer/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,25 @@
import sys


def check_file_status(file_path):
def check_file_status(file_path, force):
"""
Check status of file - return if file exists; warn and exit
if empty, or does not exist
"""
if not os.path.exists(file_path):
print >>sys.stderr, "ERROR: Input file %s does not exist; exiting" % \
file_path
sys.exit(1)
if not force:
sys.exit(1)
else:
if os.stat(file_path).st_size == 0:
print >>sys.stderr, "ERROR: Input file %s is empty; exiting." % \
file_path
sys.exit(1)
if not force:
sys.exit(1)


def check_space(in_files, _testhook_free_space=None):
def check_space(in_files, force, _testhook_free_space=None):
"""
Estimate size of input files passed, then calculate
disk space available. Exit if insufficient disk space,
Expand Down Expand Up @@ -65,10 +67,11 @@ def check_space(in_files, _testhook_free_space=None):
% (float(total_size) / 1e9,)
print >>sys.stderr, " Free space: %.1f GB" \
% (float(free_space) / 1e9,)
sys.exit(1)
if not force:
sys.exit(1)


def check_space_for_hashtable(hash_size, _testhook_free_space=None):
def check_space_for_hashtable(hash_size, force, _testhook_free_space=None):
"""
Check we have enough size to write a hash table
"""
Expand Down
7 changes: 4 additions & 3 deletions sandbox/sweep-reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ def get_parser():
parser.add_argument(dest='input_fastp', help='Reference fasta or fastp')
parser.add_argument('input_files', nargs='+',
help='Reads to be swept and sorted')

parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand Down Expand Up @@ -224,13 +225,13 @@ def main():
buf_size = args.buffer_size
max_reads = args.max_reads

check_file_status(args.input_fastp)
check_file_status(args.input_fastp, args.force)
check_valid_file_exists(args.input_files)
all_input_files = [input_fastp]
all_input_files.extend(args.input_files)

# Check disk space availability
check_space(all_input_files)
check_space(all_input_files, args.force)

# figure out input file type (FA/FQ) -- based on first file
ix = iter(screed.open(args.input_files[0]))
Expand Down
6 changes: 4 additions & 2 deletions scripts/abundance-dist-single.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ def get_parser():
"filename.")
parser.add_argument('--report-total-kmers', '-t', action='store_true',
help="Prints the total number of k-mers to stderr")
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -68,8 +70,8 @@ def main(): # pylint: disable=too-many-locals,too-many-branches
args = get_parser().parse_args()
report_on_config(args)

check_file_status(args.input_sequence_filename)
check_space([args.input_sequence_filename])
check_file_status(args.input_sequence_filename, args.force)
check_space([args.input_sequence_filename], args.force)
if args.savetable:
check_space_for_hashtable(args.n_tables * args.min_tablesize)

Expand Down
4 changes: 3 additions & 1 deletion scripts/abundance-dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ def get_parser():
help='Overwrite output file if it exists')
parser.add_argument('--version', action='version', version='%(prog)s '
+ khmer.__version__)
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -53,7 +55,7 @@ def main():
infiles = [args.input_counting_table_filename,
args.input_sequence_filename]
for infile in infiles:
check_file_status(infile)
check_file_status(infile, args.force)

print('hashtable from', args.input_counting_table_filename)
counting_hash = khmer.load_counting_hash(
Expand Down
8 changes: 5 additions & 3 deletions scripts/annotate-partitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ def get_parser():
'annotate.')
parser.add_argument('--version', action='version', version='%(prog)s '
+ khmer.__version__)
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -67,11 +69,11 @@ def main():

partitionmap_file = args.graphbase + '.pmap.merged'

check_file_status(partitionmap_file)
check_file_status(partitionmap_file, args.force)
for _ in filenames:
check_file_status(_)
check_file_status(_, args.force)

check_space(filenames)
check_space(filenames, args.force)

print 'loading partition map from:', partitionmap_file
htable.load_partitionmap(partitionmap_file)
Expand Down
6 changes: 4 additions & 2 deletions scripts/count-median.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ def get_parser():
help='output summary filename')
parser.add_argument('--version', action='version', version='%(prog)s '
+ khmer.__version__)
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -63,9 +65,9 @@ def main():

infiles = [htfile, input_filename]
for infile in infiles:
check_file_status(infile)
check_file_status(infile, args.force)

check_space(infiles)
check_space(infiles, args.force)

print 'loading k-mer counting table from', htfile
htable = khmer.load_counting_hash(htfile)
Expand Down
7 changes: 4 additions & 3 deletions scripts/count-overlap.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ def get_parser():
help="input sequence filename")
parser.add_argument('report_filename', metavar='output_report_filename',
help='output report filename')

parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -54,9 +55,9 @@ def main():
report_on_config(args, hashtype='hashbits')

for infile in [args.ptfile, args.fafile]:
check_file_status(infile)
check_file_status(infile, args.force)

check_space([args.ptfile, args.fafile])
check_space([args.ptfile, args.fafile], args.force)

print 'loading k-mer presence table from', args.ptfile
ht1 = khmer.load_hashbits(args.ptfile)
Expand Down
4 changes: 2 additions & 2 deletions scripts/do-partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,9 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
report_on_config(args, hashtype='hashbits')

for infile in args.input_filenames:
check_file_status(infile)
check_file_status(infile, args.force)

check_space(args.input_filenames)
check_space(args.input_filenames, args.force)

print 'Saving k-mer presence table to %s' % args.graphbase
print 'Loading kmers from sequences in %s' % repr(args.input_filenames)
Expand Down
6 changes: 4 additions & 2 deletions scripts/extract-paired-reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,18 @@ def get_parser():
parser.add_argument('infile')
parser.add_argument('--version', action='version', version='%(prog)s '
+ khmer.__version__)
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


def main():
info('extract-paired-reads.py')
args = get_parser().parse_args()

check_file_status(args.infile)
check_file_status(args.infile, args.force)
infiles = [args.infile]
check_space(infiles)
check_space(infiles, args.force)

outfile = os.path.basename(args.infile)
if len(sys.argv) > 2:
Expand Down
6 changes: 4 additions & 2 deletions scripts/extract-partitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ def get_parser():
help='Output unassigned sequences, too')
parser.add_argument('--version', action='version', version='%(prog)s '
+ khmer.__version__)
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -94,9 +96,9 @@ def main(): # pylint: disable=too-many-locals,too-many-branches
n_unassigned = 0

for infile in args.part_filenames:
check_file_status(infile)
check_file_status(infile, args.force)

check_space(args.part_filenames)
check_space(args.part_filenames, args.force)

print '---'
print 'reading partitioned files:', repr(args.part_filenames)
Expand Down
8 changes: 5 additions & 3 deletions scripts/filter-abund-single.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,18 @@ def get_parser():
help="FAST[AQ] sequence file to trim")
parser.add_argument('--report-total-kmers', '-t', action='store_true',
help="Prints the total number of k-mers to stderr")
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


def main():
info('filter-abund-single.py', ['counting'])
args = get_parser().parse_args()
check_file_status(args.datafile)
check_space([args.datafile])
check_file_status(args.datafile, args.force)
check_space([args.datafile],args.force)
if args.savetable:
check_space_for_hashtable(args.n_tables * args.min_tablesize)
check_space_for_hashtable(args.n_tables * args.min_tablesize, args.force)
report_on_config(args)

config = khmer.get_config()
Expand Down
6 changes: 4 additions & 2 deletions scripts/filter-abund.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ def get_parser():
'file for each input file.')
parser.add_argument('--version', action='version',
version='khmer {v}'.format(v=__version__))
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -77,9 +79,9 @@ def main():
infiles = args.input_filename

for _ in infiles:
check_file_status(_)
check_file_status(_, args.force)

check_space(infiles)
check_space(infiles, args.force)

print 'loading hashtable'
htable = khmer.load_counting_hash(counting_ht)
Expand Down
7 changes: 4 additions & 3 deletions scripts/filter-stoptags.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,20 @@ def get_parser():
nargs='+')
parser.add_argument('--version', action='version', version='%(prog)s '
+ khmer.__version__)
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


def main():
info('filter-stoptags.py', ['graph'])
args = get_parser().parse_args()
stoptags = args.stoptags_file
infiles = args.input_filenames

for _ in infiles:
check_file_status(_)
check_file_status(_, args.force)

check_space(infiles)
check_space(infiles, args.force)

print 'loading stop tags, with K', args.ksize
htable = khmer.new_hashbits(args.ksize, 1, 1)
Expand Down
4 changes: 2 additions & 2 deletions scripts/interleave-reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ def main():
args = get_parser().parse_args()

for _ in args.infiles:
check_file_status(_)
check_file_status(_, args.force)

check_space(args.infiles)
check_space(args.infiles, args.force)

s1_file = args.infiles[0]
if len(args.infiles) == 2:
Expand Down
6 changes: 3 additions & 3 deletions scripts/load-graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ def main():
filenames = args.input_filenames

for _ in args.input_filenames:
check_file_status(_)
check_file_status(_, args.force)

check_space(args.input_filenames)
check_space_for_hashtable(float(args.n_tables * args.min_tablesize) / 8.)
check_space(args.input_filenames, args.force)
check_space_for_hashtable((float(args.n_tables * args.min_tablesize) / 8.), args.force)

print 'Saving k-mer presence table to %s' % base
print 'Loading kmers from sequences in %s' % repr(filenames)
Expand Down
6 changes: 3 additions & 3 deletions scripts/load-into-counting.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,10 @@ def main():
filenames = args.input_sequence_filename

for name in args.input_sequence_filename:
check_file_status(name)
check_file_status(name, args.force)

check_space(args.input_sequence_filename)
check_space_for_hashtable(args.n_tables * args.min_tablesize)
check_space(args.input_sequence_filename, args.force)
check_space_for_hashtable(args.n_tables * args.min_tablesize, args.force)

print 'Saving k-mer counting table to %s' % base
print 'Loading kmers from sequences in %s' % repr(filenames)
Expand Down
6 changes: 4 additions & 2 deletions scripts/make-initial-stoptags.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ def get_parser():
help="Use stoptags in this file during partitioning")
parser.add_argument('graphbase', help='basename for input and output '
'filenames')
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -77,9 +79,9 @@ def main():
if args.stoptags:
infiles.append(args.stoptags)
for _ in infiles:
check_file_status(_)
check_file_status(_, args.force)

check_space(infiles)
check_space(infiles, args.force)

print 'loading htable %s.pt' % graphbase
htable = khmer.load_hashbits(graphbase + '.pt')
Expand Down
6 changes: 4 additions & 2 deletions scripts/merge-partitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ def get_parser():
'files')
parser.add_argument('--version', action='version', version='%(prog)s '
+ khmer.__version__)
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -60,9 +62,9 @@ def main():
htable = khmer.new_hashbits(ksize, 1, 1)

for _ in pmap_files:
check_file_status(_)
check_file_status(_, args.force)

check_space(pmap_files)
check_space(pmap_files, args.force)

for pmap_file in pmap_files:
print 'merging', pmap_file
Expand Down
Loading

0 comments on commit 47e8890

Please sign in to comment.