Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

initial work for issue 399: add --force flag #647

Merged
merged 8 commits into from
Dec 19, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
2014-12-17 Jessica Mizzi <mizzijes@msu.edu>

* khmer/file.py,sandbox/sweep-reads.py,scripts/{abundance-dist-single,
abundance-dist,annotate-partitions,count-median,count-overlap,do-partition,
extract-paired-reads,extract-partitions,filter-abund-single,filter-abund,
filter-stoptags,interleave-reads,load-graph,load-into-counting,
make-initial-stoptags,merge-partitions,normalize-by-median,partition-graph,
sample-reads-randomly,split-paired-reads}.py,setup.cfg,
tests/{test_script_arguments,test_scripts}.py: Added force option to all
scripts to script IO sanity checks and updated tests to match.

2014-12-17 Michael R. Crusoe <mcrusoe@msu.edu>

* scripts/load-graph.py,khmer/_khmermodule.cc: restore threading to
Expand Down
18 changes: 11 additions & 7 deletions khmer/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from stat import S_ISBLK, S_ISFIFO


def check_file_status(file_path):
def check_file_status(file_path, force):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like force should have a default value set explicitly:
def check_file_status(file_path, force=0)

Or is 'force' something that's always passed?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use True/False/None for boolean parameters instead of 0 or 1.

@jessicamizzi using default parameters is a good way to avoid breaking code that is already using check_file_status,
but since all uses currently are internal (they doesn't change the public API) tracking all occurrences in the codebase and refactoring is still feasible.

"""Check the status of the file; if the file is empty or doesn't exist
AND if the file is NOT a fifo/block/named pipe then a warning is printed
and sys.exit(1) is called
Expand All @@ -30,15 +30,17 @@ def check_file_status(file_path):
if not os.path.exists(file_path):
print >>sys.stderr, "ERROR: Input file %s does not exist; exiting" % \
file_path
sys.exit(1)
if not force:
sys.exit(1)
else:
if os.stat(file_path).st_size == 0:
print >>sys.stderr, "ERROR: Input file %s is empty; exiting." % \
file_path
sys.exit(1)
if not force:
sys.exit(1)


def check_space(in_files, _testhook_free_space=None):
def check_space(in_files, force, _testhook_free_space=None):
"""
Estimate size of input files passed, then calculate
disk space available. Exit if insufficient disk space,
Expand Down Expand Up @@ -74,10 +76,11 @@ def check_space(in_files, _testhook_free_space=None):
% (float(total_size) / 1e9,)
print >>sys.stderr, " Free space: %.1f GB" \
% (float(free_space) / 1e9,)
sys.exit(1)
if not force:
sys.exit(1)


def check_space_for_hashtable(hash_size, _testhook_free_space=None):
def check_space_for_hashtable(hash_size, force, _testhook_free_space=None):
"""
Check we have enough size to write a hash table
"""
Expand All @@ -99,7 +102,8 @@ def check_space_for_hashtable(hash_size, _testhook_free_space=None):
% (float(hash_size) / 1e9,)
print >>sys.stderr, " Free space: %.1f GB" \
% (float(free_space) / 1e9,)
sys.exit(1)
if not force:
sys.exit(1)


def check_valid_file_exists(in_files):
Expand Down
2 changes: 1 addition & 1 deletion khmer/load_pe.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def load_pe(screed_handle):

screed_iter = iter(screed_handle)

while 1:
while True:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change? 'while 1' is perfectly idiomatic in Python...?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may be an 'autopep8'-ism

try:
this_record = screed_iter.next()
except StopIteration:
Expand Down
7 changes: 4 additions & 3 deletions sandbox/sweep-reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ def get_parser():
parser.add_argument(dest='input_fastp', help='Reference fasta or fastp')
parser.add_argument('input_files', nargs='+',
help='Reads to be swept and sorted')

parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand Down Expand Up @@ -224,13 +225,13 @@ def main():
buf_size = args.buffer_size
max_reads = args.max_reads

check_file_status(args.input_fastp)
check_file_status(args.input_fastp, args.force)
check_valid_file_exists(args.input_files)
all_input_files = [input_fastp]
all_input_files.extend(args.input_files)

# Check disk space availability
check_space(all_input_files)
check_space(all_input_files, args.force)

# figure out input file type (FA/FQ) -- based on first file
ix = iter(screed.open(args.input_files[0]))
Expand Down
6 changes: 4 additions & 2 deletions scripts/abundance-dist-single.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ def get_parser():
"filename.")
parser.add_argument('--report-total-kmers', '-t', action='store_true',
help="Prints the total number of k-mers to stderr")
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -68,8 +70,8 @@ def main(): # pylint: disable=too-many-locals,too-many-branches
args = get_parser().parse_args()
report_on_config(args)

check_file_status(args.input_sequence_filename)
check_space([args.input_sequence_filename])
check_file_status(args.input_sequence_filename, args.force)
check_space([args.input_sequence_filename], args.force)
if args.savetable:
check_space_for_hashtable(args.n_tables * args.min_tablesize)

Expand Down
4 changes: 3 additions & 1 deletion scripts/abundance-dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ def get_parser():
help='Overwrite output file if it exists')
parser.add_argument('--version', action='version', version='%(prog)s '
+ khmer.__version__)
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -53,7 +55,7 @@ def main():
infiles = [args.input_counting_table_filename,
args.input_sequence_filename]
for infile in infiles:
check_file_status(infile)
check_file_status(infile, args.force)

print ('hashtable from', args.input_counting_table_filename,
file=sys.stderr)
Expand Down
8 changes: 5 additions & 3 deletions scripts/annotate-partitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def get_parser():
'annotate.')
parser.add_argument('--version', action='version', version='%(prog)s '
+ khmer.__version__)
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -68,11 +70,11 @@ def main():

partitionmap_file = args.graphbase + '.pmap.merged'

check_file_status(partitionmap_file)
check_file_status(partitionmap_file, args.force)
for _ in filenames:
check_file_status(_)
check_file_status(_, args.force)

check_space(filenames)
check_space(filenames, args.force)

print >>sys.stderr, 'loading partition map from:', partitionmap_file
htable.load_partitionmap(partitionmap_file)
Expand Down
6 changes: 4 additions & 2 deletions scripts/count-median.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ def get_parser():
help='output summary filename')
parser.add_argument('--version', action='version', version='%(prog)s '
+ khmer.__version__)
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -64,9 +66,9 @@ def main():

infiles = [htfile, input_filename]
for infile in infiles:
check_file_status(infile)
check_file_status(infile, args.force)

check_space(infiles)
check_space(infiles, args.force)

print >>sys.stderr, 'loading k-mer counting table from', htfile
htable = khmer.load_counting_hash(htfile)
Expand Down
7 changes: 4 additions & 3 deletions scripts/count-overlap.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ def get_parser():
help="input sequence filename")
parser.add_argument('report_filename', metavar='output_report_filename',
help='output report filename')

parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -54,9 +55,9 @@ def main():
report_on_config(args, hashtype='hashbits')

for infile in [args.ptfile, args.fafile]:
check_file_status(infile)
check_file_status(infile, args.force)

check_space([args.ptfile, args.fafile])
check_space([args.ptfile, args.fafile], args.force)

print >>sys.stderr, 'loading k-mer presence table from', args.ptfile
ht1 = khmer.load_hashbits(args.ptfile)
Expand Down
11 changes: 7 additions & 4 deletions scripts/do-partition.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __debug_vm_usage(msg): # pylint: disable=unused-argument


def worker(queue, basename, stop_big_traversals):
while 1:
while True:
try:
(htable, index, start, stop) = queue.get(False)
except Queue.Empty:
Expand Down Expand Up @@ -97,6 +97,8 @@ def get_parser():
parser.add_argument('graphbase', help="base name for output files")
parser.add_argument('input_filenames', metavar='input_sequence_filename',
nargs='+', help='input FAST[AQ] sequence filenames')
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -108,9 +110,9 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
report_on_config(args, hashtype='hashbits')

for infile in args.input_filenames:
check_file_status(infile)
check_file_status(infile, args.force)

check_space(args.input_filenames)
check_space(args.input_filenames, args.force)

print >>sys.stderr, 'Saving k-mer presence table to %s' % args.graphbase
print >>sys.stderr, 'Loading kmers from sequences in %s' % \
Expand All @@ -137,7 +139,8 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
" this data set. Increase k-mer presence table "
"size/num of tables.")
print >> sys.stderr, "**"
sys.exit(1)
if not args.force:
sys.exit(1)

# partition-graph

Expand Down
6 changes: 4 additions & 2 deletions scripts/extract-paired-reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,18 @@ def get_parser():
parser.add_argument('infile')
parser.add_argument('--version', action='version', version='%(prog)s '
+ khmer.__version__)
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


def main():
info('extract-paired-reads.py')
args = get_parser().parse_args()

check_file_status(args.infile)
check_file_status(args.infile, args.force)
infiles = [args.infile]
check_space(infiles)
check_space(infiles, args.force)

outfile = os.path.basename(args.infile)
if len(sys.argv) > 2:
Expand Down
6 changes: 4 additions & 2 deletions scripts/extract-partitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ def get_parser():
help='Output unassigned sequences, too')
parser.add_argument('--version', action='version', version='%(prog)s '
+ khmer.__version__)
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -94,9 +96,9 @@ def main(): # pylint: disable=too-many-locals,too-many-branches
n_unassigned = 0

for infile in args.part_filenames:
check_file_status(infile)
check_file_status(infile, args.force)

check_space(args.part_filenames)
check_space(args.part_filenames, args.force)

print >>sys.stderr, '---'
print >>sys.stderr, 'reading partitioned files:', repr(args.part_filenames)
Expand Down
9 changes: 6 additions & 3 deletions scripts/filter-abund-single.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,19 @@ def get_parser():
help="FAST[AQ] sequence file to trim")
parser.add_argument('--report-total-kmers', '-t', action='store_true',
help="Prints the total number of k-mers to stderr")
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


def main():
info('filter-abund-single.py', ['counting'])
args = get_parser().parse_args()
check_file_status(args.datafile)
check_space([args.datafile])
check_file_status(args.datafile, args.force)
check_space([args.datafile], args.force)
if args.savetable:
check_space_for_hashtable(args.n_tables * args.min_tablesize)
check_space_for_hashtable(
args.n_tables * args.min_tablesize, args.force)
report_on_config(args)

print >>sys.stderr, 'making k-mer counting table'
Expand Down
6 changes: 4 additions & 2 deletions scripts/filter-abund.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ def get_parser():
'file for each input file.')
parser.add_argument('--version', action='version',
version='khmer {v}'.format(v=__version__))
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -78,9 +80,9 @@ def main():
infiles = args.input_filename

for _ in infiles:
check_file_status(_)
check_file_status(_, args.force)

check_space(infiles)
check_space(infiles, args.force)

print >>sys.stderr, 'loading hashtable'
htable = khmer.load_counting_hash(counting_ht)
Expand Down
6 changes: 4 additions & 2 deletions scripts/filter-stoptags.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ def get_parser():
nargs='+')
parser.add_argument('--version', action='version', version='%(prog)s '
+ khmer.__version__)
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -55,9 +57,9 @@ def main():
infiles = args.input_filenames

for _ in infiles:
check_file_status(_)
check_file_status(_, args.force)

check_space(infiles)
check_space(infiles, args.force)

print >>sys.stderr, 'loading stop tags, with K', args.ksize
htable = khmer.new_hashbits(args.ksize, 1, 1)
Expand Down
8 changes: 5 additions & 3 deletions scripts/interleave-reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ def get_parser():
default=sys.stdout)
parser.add_argument('--version', action='version', version='%(prog)s '
+ khmer.__version__)
parser.add_argument('-f', '--force', default=False, action='store_true',
help='Overwrite output file if it exists')
return parser


Expand All @@ -71,9 +73,9 @@ def main():
args = get_parser().parse_args()

for _ in args.infiles:
check_file_status(_)
check_file_status(_, args.force)

check_space(args.infiles)
check_space(args.infiles, args.force)

s1_file = args.infiles[0]
if len(args.infiles) == 2:
Expand All @@ -92,7 +94,7 @@ def main():
print >> sys.stderr, "Error! R2 file %s does not exist" % s2_file
fail = True

if fail:
if fail and not args.force:
sys.exit(1)

print >> sys.stderr, "Interleaving:\n\t%s\n\t%s" % (s1_file, s2_file)
Expand Down
Loading