Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

check for errors while writing to disk #856

Merged
merged 3 commits into from
Apr 14, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
2015-04-14 Michael R. Crusoe <mcrusoe@msu.edu>

* khmer/{__init__.py,_khmermodule.cc},lib/{counting,hashbits,hashtable,
subset}.cc: catch IO errors and report them.
* tests/test_hashbits.py: remove write to fixed path in /tmp
* tests/test_scripts.py: added test for empty counting table file

2015-04-13 Elmar Bucher <buchere@ohsu.edu>

* scripts/normalize-by-median.py (main): introduced warning for when at least
Expand Down
32 changes: 19 additions & 13 deletions khmer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,15 @@ def extract_hashbits_info(filename):
uchar_size = len(pack('B', 0))
ulonglong_size = len(pack('Q', 0))

with open(filename, 'rb') as hashbits:
version, = unpack('B', hashbits.read(1))
ht_type, = unpack('B', hashbits.read(1))
ksize, = unpack('I', hashbits.read(uint_size))
n_tables, = unpack('B', hashbits.read(uchar_size))
table_size, = unpack('Q', hashbits.read(ulonglong_size))
try:
with open(filename, 'rb') as hashbits:
version, = unpack('B', hashbits.read(1))
ht_type, = unpack('B', hashbits.read(1))
ksize, = unpack('I', hashbits.read(uint_size))
n_tables, = unpack('B', hashbits.read(uchar_size))
table_size, = unpack('Q', hashbits.read(ulonglong_size))
except:
raise ValueError("Presence table '{}' is corrupt ".format(filename))

return ksize, round(table_size, -2), n_tables, version, ht_type

Expand All @@ -146,13 +149,16 @@ def extract_countinghash_info(filename):
uint_size = len(pack('I', 0))
ulonglong_size = len(pack('Q', 0))

with open(filename, 'rb') as countinghash:
version, = unpack('B', countinghash.read(1))
ht_type, = unpack('B', countinghash.read(1))
use_bigcount, = unpack('B', countinghash.read(1))
ksize, = unpack('I', countinghash.read(uint_size))
n_tables, = unpack('B', countinghash.read(1))
table_size, = unpack('Q', countinghash.read(ulonglong_size))
try:
with open(filename, 'rb') as countinghash:
version, = unpack('B', countinghash.read(1))
ht_type, = unpack('B', countinghash.read(1))
use_bigcount, = unpack('B', countinghash.read(1))
ksize, = unpack('I', countinghash.read(uint_size))
n_tables, = unpack('B', countinghash.read(1))
table_size, = unpack('Q', countinghash.read(ulonglong_size))
except:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this check be used on extract_hashbits_info too?

(at first I thought this could be more strict and catch only struct.error, but there might be IOError too and in the end they are all a kind of corrupted input, so I think that's fine)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks for the suggestion.

raise ValueError("Counting table '{}' is corrupt ".format(filename))

return ksize, round(table_size, -2), n_tables, use_bigcount, version, \
ht_type
Expand Down
42 changes: 36 additions & 6 deletions khmer/_khmermodule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1258,7 +1258,12 @@ hash_save(khmer_KCountingHash_Object * me, PyObject * args)
return NULL;
}

counting->save(filename);
try {
counting->save(filename);
} catch (khmer_file_exception &e) {
PyErr_SetString(PyExc_IOError, e.what());
return NULL;
}

Py_RETURN_NONE;
}
Expand Down Expand Up @@ -2032,7 +2037,12 @@ hashbits_save_stop_tags(khmer_KHashbits_Object * me, PyObject * args)
return NULL;
}

hashbits->save_stop_tags(filename);
try {
hashbits->save_stop_tags(filename);
} catch (khmer_file_exception &e) {
PyErr_SetString(PyExc_IOError, e.what());
return NULL;
}

Py_RETURN_NONE;
}
Expand Down Expand Up @@ -2808,7 +2818,12 @@ hashbits_save_partitionmap(khmer_KHashbits_Object * me, PyObject * args)
return NULL;
}

hashbits->partition->save_partitionmap(filename);
try {
hashbits->partition->save_partitionmap(filename);
} catch (khmer_file_exception &e) {
PyErr_SetString(PyExc_IOError, e.what());
return NULL;
}

Py_RETURN_NONE;
}
Expand Down Expand Up @@ -2961,7 +2976,12 @@ hashbits_save(khmer_KHashbits_Object * me, PyObject * args)
return NULL;
}

hashbits->save(filename);
try {
hashbits->save(filename);
} catch (khmer_file_exception &e) {
PyErr_SetString(PyExc_IOError, e.what());
return NULL;
}

Py_RETURN_NONE;
}
Expand Down Expand Up @@ -3006,7 +3026,12 @@ hashbits_save_tagset(khmer_KHashbits_Object * me, PyObject * args)
return NULL;
}

hashbits->save_tagset(filename);
try {
hashbits->save_tagset(filename);
} catch (khmer_file_exception &e) {
PyErr_SetString(PyExc_IOError, e.what());
return NULL;
}

Py_RETURN_NONE;
}
Expand All @@ -3027,7 +3052,12 @@ hashbits_save_subset_partitionmap(khmer_KHashbits_Object * me, PyObject * args)

Py_BEGIN_ALLOW_THREADS

subset_p->save_partitionmap(filename);
try {
subset_p->save_partitionmap(filename);
} catch (khmer_file_exception &e) {
PyErr_SetString(PyExc_IOError, e.what());
return NULL;
}

Py_END_ALLOW_THREADS

Expand Down
61 changes: 53 additions & 8 deletions lib/counting.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <math.h>
#include <algorithm>
#include <sstream>
#include <errno.h>

using namespace std;
using namespace khmer;
Expand Down Expand Up @@ -47,6 +48,9 @@ void CountingHash::output_fasta_kmer_pos_freq(
}

delete parser;
if (outfile.fail()) {
throw khmer_file_exception(strerror(errno));
}

outfile.close();
}
Expand Down Expand Up @@ -487,7 +491,7 @@ CountingHashFileReader::CountingHashFileReader(
} else {
err = "Unknown error in opening file: " + infilename;
}
throw khmer_file_exception(err.c_str());
throw khmer_file_exception(err + " " + strerror(errno));
}

if (ht._counts) {
Expand Down Expand Up @@ -575,9 +579,10 @@ CountingHashFileReader::CountingHashFileReader(
if (infile.eof()) {
err = "Unexpected end of k-mer count file: " + infilename;
} else {
err = "Error reading from k-mer count file: " + infilename;
err = "Error reading from k-mer count file: " + infilename + " "
+ strerror(errno);
}
throw khmer_file_exception(err.c_str());
throw khmer_file_exception(err);
}
}

Expand Down Expand Up @@ -610,7 +615,8 @@ CountingHashGzFileReader::CountingHashGzFileReader(
int read_t = gzread(infile, (char *) &ht_type, 1);

if (read_v <= 0 || read_t <= 0) {
std::string err = "K-mer count file read error: " + infilename;
std::string err = "K-mer count file read error: " + infilename + " "
+ strerror(errno);
gzclose(infile);
throw khmer_file_exception(err.c_str());
} else if (!(version == SAVED_FORMAT_VERSION)
Expand All @@ -637,7 +643,8 @@ CountingHashGzFileReader::CountingHashGzFileReader(
sizeof(save_n_tables));

if (read_b <= 0 || read_k <= 0 || read_nt <= 0) {
std::string err = "K-mer count file header read error: " + infilename;
std::string err = "K-mer count file header read error: " + infilename
+ " " + strerror(errno);
gzclose(infile);
throw khmer_file_exception(err.c_str());
}
Expand All @@ -656,8 +663,14 @@ CountingHashGzFileReader::CountingHashGzFileReader(
sizeof(save_tablesize));

if (read_b <= 0) {
std::string err = "K-mer count file header read error: " \
std::string gzerr = gzerror(infile, &read_b);
std::string err = "K-mer count file header read error: "
+ infilename;
if (read_b == Z_ERRNO) {
err = err + " " + strerror(errno);
} else {
err = err + " " + gzerr;
}
gzclose(infile);
throw khmer_file_exception(err.c_str());
}
Expand All @@ -673,7 +686,13 @@ CountingHashGzFileReader::CountingHashGzFileReader(
(unsigned) (tablesize - loaded));

if (read_b <= 0) {
std::string gzerr = gzerror(infile, &read_b);
std::string err = "K-mer count file read error: " + infilename;
if (read_b == Z_ERRNO) {
err = err + " " + strerror(errno);
} else {
err = err + " " + gzerr;
}
gzclose(infile);
throw khmer_file_exception(err.c_str());
}
Expand All @@ -685,7 +704,13 @@ CountingHashGzFileReader::CountingHashGzFileReader(
HashIntoType n_counts = 0;
read_b = gzread(infile, (char *) &n_counts, sizeof(n_counts));
if (read_b <= 0) {
std::string gzerr = gzerror(infile, &read_b);
std::string err = "K-mer count header read error: " + infilename;
if (read_b == Z_ERRNO) {
err = err + " " + strerror(errno);
} else {
err = err + " " + gzerr;
}
gzclose(infile);
throw khmer_file_exception(err.c_str());
}
Expand All @@ -701,7 +726,13 @@ CountingHashGzFileReader::CountingHashGzFileReader(
int read_c = gzread(infile, (char *) &count, sizeof(count));

if (read_k <= 0 || read_c <= 0) {
std::string gzerr = gzerror(infile, &read_b);
std::string err = "K-mer count read error: " + infilename;
if (read_b == Z_ERRNO) {
err = err + " " + strerror(errno);
} else {
err = err + " " + gzerr;
}
gzclose(infile);
throw khmer_file_exception(err.c_str());
}
Expand Down Expand Up @@ -761,7 +792,7 @@ CountingHashFileWriter::CountingHashFileWriter(
}
}
if (outfile.fail()) {
perror("Hash writing file access failure:");
throw khmer_file_exception(strerror(errno));
}
outfile.close();
}
Expand All @@ -774,11 +805,20 @@ CountingHashGzFileWriter::CountingHashGzFileWriter(
throw khmer_exception();
}

int errnum = 0;
unsigned int save_ksize = ht._ksize;
unsigned char save_n_tables = ht._n_tables;
unsigned long long save_tablesize;

gzFile outfile = gzopen(outfilename.c_str(), "wb");
if (outfile == NULL) {
const char * error = gzerror(outfile, &errnum);
if (errnum == Z_ERRNO) {
throw khmer_file_exception(strerror(errno));
} else {
throw khmer_file_exception(error);
}
}

unsigned char version = SAVED_FORMAT_VERSION;
gzwrite(outfile, (const char *) &version, 1);
Expand Down Expand Up @@ -818,7 +858,12 @@ CountingHashGzFileWriter::CountingHashGzFileWriter(
gzwrite(outfile, (const char *) &it->second, sizeof(it->second));
}
}

const char * error = gzerror(outfile, &errnum);
if (errnum == Z_ERRNO) {
throw khmer_file_exception(strerror(errno));
} else if (errnum != Z_OK) {
throw khmer_file_exception(error);
}
gzclose(outfile);
}

Expand Down
4 changes: 4 additions & 0 deletions lib/hashbits.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "read_parsers.hh"

#include <sstream>
#include <errno.h>

using namespace std;
using namespace khmer;
Expand Down Expand Up @@ -45,6 +46,9 @@ void Hashbits::save(std::string outfilename)

outfile.write((const char *) _counts[i], tablebytes);
}
if (outfile.fail()) {
throw khmer_file_exception(strerror(errno));
}
outfile.close();
}

Expand Down
4 changes: 4 additions & 0 deletions lib/hashtable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include <algorithm>
#include <sstream>
#include <errno.h>

using namespace std;
using namespace khmer;
Expand Down Expand Up @@ -256,6 +257,9 @@ void Hashtable::save_tagset(std::string outfilename)
}

outfile.write((const char *) buf, sizeof(HashIntoType) * tagset_size);
if (outfile.fail()) {
throw khmer_file_exception(strerror(errno));
}
outfile.close();

delete[] buf;
Expand Down
5 changes: 5 additions & 0 deletions lib/subset.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "read_parsers.hh"

#include <sstream>
#include <errno.h>

#define IO_BUF_SIZE 250*1000*1000
#define BIG_TRAVERSALS_ARE 200
Expand Down Expand Up @@ -1418,6 +1419,10 @@ void SubsetPartition::save_partitionmap(string pmap_filename)
if (n_bytes) {
outfile.write(buf, n_bytes);
}
if (outfile.fail()) {
delete[] buf;
throw khmer_file_exception(strerror(errno));
}
outfile.close();

delete[] buf;
Expand Down
1 change: 0 additions & 1 deletion tests/test_hashbits.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,6 @@ def test_save_load_tagset_trunc():
ht.add_tag('A' * 32)
ht.add_tag('G' * 32)
ht.save_tagset(outfile)
ht.save_tagset('/tmp/goodversion-k32.tagset')

# truncate tagset file...
fp = open(outfile, 'rb')
Expand Down
14 changes: 14 additions & 0 deletions tests/test_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,6 +761,20 @@ def test_normalize_by_median_empty():
assert os.path.exists(outfile), outfile


def test_normalize_by_median_emptycountingtable():
CUTOFF = '1'

infile = utils.get_temp_filename('test.fa')
in_dir = os.path.dirname(infile)

shutil.copyfile(utils.get_test_data('test-empty.fa'), infile)

script = scriptpath('normalize-by-median.py')
args = ['-C', CUTOFF, '--loadtable', infile, infile]
(status, out, err) = utils.runscript(script, args, in_dir, fail_ok=True)
assert 'ValueError' in err, (status, out, err)


def test_normalize_by_median_fpr():
MIN_TABLESIZE_PARAM = 1

Expand Down