Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the append behavior in normalize-by-median.py (#745) #843

Merged
merged 19 commits into from
Feb 25, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
2015-02-25 Tamer A. Mansour <drtamermansour@gmail.com>

* scripts/normalize-by-median.py: change to the default behavior to
overwrite the sequences output file. Also add a new argument --append to
append new reads to the output file.
* tests/test_scripts.py: add a test for the --append option in
normalize-by-median.py

2015-02-25 Hussien Alameldin <hussien@msu.edu>

* khmer/khmer_args.py: add 'hll' citation entry "Irber and Brown,
Expand Down
13 changes: 10 additions & 3 deletions scripts/normalize-by-median.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,9 @@ def get_parser():
dest='single_output_filename',
default='', help='only output a single'
' file with the specified filename')
parser.add_argument('--append', default=False, action='store_true',
help='append reads to the outputfile. '
'Only with -o specified')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would word the help text this way: "appends reads instead of replacing the output file (only if -o/--out is specified)'

parser.add_argument('input_filenames', metavar='input_sequence_filename',
help='Input FAST[AQ] sequence filename.', nargs='+')
parser.add_argument('--report-total-kmers', '-t', action='store_true',
Expand Down Expand Up @@ -233,11 +236,15 @@ def main(): # pylint: disable=too-many-branches,too-many-statements
discarded = 0
input_filename = None

for index, input_filename in enumerate(args.input_filenames):
if args.single_output_filename != '':
output_name = args.single_output_filename
if args.single_output_filename:
output_name = args.single_output_filename
if args.append:
outfp = open(args.single_output_filename, 'a')
else:
outfp = open(args.single_output_filename, 'w')

for index, input_filename in enumerate(args.input_filenames):
if not args.single_output_filename:
output_name = os.path.basename(input_filename) + '.keep'
outfp = open(output_name, 'w')

Expand Down
36 changes: 36 additions & 0 deletions tests/test_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,42 @@ def test_normalize_by_median():
assert seqs[0].startswith('GGTTGACGGGGCTCAGGGGG'), seqs


def test_normalize_by_median_append():
outfile = utils.get_temp_filename('test.fa.keep')
shutil.copyfile(utils.get_test_data('test-abund-read.fa'), outfile)
in_dir = os.path.dirname(outfile)

CUTOFF = '1'
infile = utils.get_temp_filename('test.fa', in_dir)
shutil.copyfile(utils.get_test_data('test-abund-read-3.fa'), infile)
script = scriptpath('normalize-by-median.py')

args = ['-C', CUTOFF, '-k', '17', '-t', '-o', outfile, '--append', infile]
(status, out, err) = utils.runscript(script, args, in_dir)
assert os.path.exists(outfile), outfile
seqs = [r.sequence for r in screed.open(outfile)]
assert len(seqs) == 2, seqs
assert 'GACAGCgtgCCGCA' in seqs[1], seqs


def test_normalize_by_median_overwrite():
outfile = utils.get_temp_filename('test.fa.keep')
shutil.copyfile(utils.get_test_data('test-abund-read.fa'), outfile)
in_dir = os.path.dirname(outfile)

CUTOFF = '1'
infile = utils.get_temp_filename('test.fa', in_dir)
shutil.copyfile(utils.get_test_data('test-abund-read-3.fa'), infile)
script = scriptpath('normalize-by-median.py')

args = ['-C', CUTOFF, '-k', '17', '-t', '-o', outfile, infile]
(status, out, err) = utils.runscript(script, args, in_dir)
assert os.path.exists(outfile), outfile
seqs = [r.sequence for r in screed.open(outfile)]
assert len(seqs) == 1, seqs
assert 'GACAGCgtgCCGCA' in seqs[0], seqs


def test_normalize_by_median_version():
script = scriptpath('normalize-by-median.py')
args = ['--version']
Expand Down