diff --git a/ChangeLog b/ChangeLog index 6546c1e7c2..cb64e15c88 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2015-04-14 Michael R. Crusoe + + * khmer/{__init__.py,_khmermodule.cc},lib/{counting,hashbits,hashtable, + subset}.cc: catch IO errors and report them. + * tests/test_hashbits.py: remove write to fixed path in /tmp + * tests/test_scripts.py: added test for empty counting table file + 2015-04-13 Elmar Bucher * scripts/normalize-by-median.py (main): introduced warning for when at least diff --git a/khmer/__init__.py b/khmer/__init__.py index 0d6c644d4a..0832096556 100644 --- a/khmer/__init__.py +++ b/khmer/__init__.py @@ -117,12 +117,15 @@ def extract_hashbits_info(filename): uchar_size = len(pack('B', 0)) ulonglong_size = len(pack('Q', 0)) - with open(filename, 'rb') as hashbits: - version, = unpack('B', hashbits.read(1)) - ht_type, = unpack('B', hashbits.read(1)) - ksize, = unpack('I', hashbits.read(uint_size)) - n_tables, = unpack('B', hashbits.read(uchar_size)) - table_size, = unpack('Q', hashbits.read(ulonglong_size)) + try: + with open(filename, 'rb') as hashbits: + version, = unpack('B', hashbits.read(1)) + ht_type, = unpack('B', hashbits.read(1)) + ksize, = unpack('I', hashbits.read(uint_size)) + n_tables, = unpack('B', hashbits.read(uchar_size)) + table_size, = unpack('Q', hashbits.read(ulonglong_size)) + except: + raise ValueError("Presence table '{}' is corrupt ".format(filename)) return ksize, round(table_size, -2), n_tables, version, ht_type @@ -146,13 +149,16 @@ def extract_countinghash_info(filename): uint_size = len(pack('I', 0)) ulonglong_size = len(pack('Q', 0)) - with open(filename, 'rb') as countinghash: - version, = unpack('B', countinghash.read(1)) - ht_type, = unpack('B', countinghash.read(1)) - use_bigcount, = unpack('B', countinghash.read(1)) - ksize, = unpack('I', countinghash.read(uint_size)) - n_tables, = unpack('B', countinghash.read(1)) - table_size, = unpack('Q', countinghash.read(ulonglong_size)) + try: + with open(filename, 'rb') as countinghash: + version, = unpack('B', countinghash.read(1)) + ht_type, = unpack('B', countinghash.read(1)) + use_bigcount, = unpack('B', countinghash.read(1)) + ksize, = unpack('I', countinghash.read(uint_size)) + n_tables, = unpack('B', countinghash.read(1)) + table_size, = unpack('Q', countinghash.read(ulonglong_size)) + except: + raise ValueError("Counting table '{}' is corrupt ".format(filename)) return ksize, round(table_size, -2), n_tables, use_bigcount, version, \ ht_type diff --git a/khmer/_khmermodule.cc b/khmer/_khmermodule.cc index ad1fe2f500..b00e266a62 100644 --- a/khmer/_khmermodule.cc +++ b/khmer/_khmermodule.cc @@ -1258,7 +1258,12 @@ hash_save(khmer_KCountingHash_Object * me, PyObject * args) return NULL; } - counting->save(filename); + try { + counting->save(filename); + } catch (khmer_file_exception &e) { + PyErr_SetString(PyExc_IOError, e.what()); + return NULL; + } Py_RETURN_NONE; } @@ -2032,7 +2037,12 @@ hashbits_save_stop_tags(khmer_KHashbits_Object * me, PyObject * args) return NULL; } - hashbits->save_stop_tags(filename); + try { + hashbits->save_stop_tags(filename); + } catch (khmer_file_exception &e) { + PyErr_SetString(PyExc_IOError, e.what()); + return NULL; + } Py_RETURN_NONE; } @@ -2808,7 +2818,12 @@ hashbits_save_partitionmap(khmer_KHashbits_Object * me, PyObject * args) return NULL; } - hashbits->partition->save_partitionmap(filename); + try { + hashbits->partition->save_partitionmap(filename); + } catch (khmer_file_exception &e) { + PyErr_SetString(PyExc_IOError, e.what()); + return NULL; + } Py_RETURN_NONE; } @@ -2961,7 +2976,12 @@ hashbits_save(khmer_KHashbits_Object * me, PyObject * args) return NULL; } - hashbits->save(filename); + try { + hashbits->save(filename); + } catch (khmer_file_exception &e) { + PyErr_SetString(PyExc_IOError, e.what()); + return NULL; + } Py_RETURN_NONE; } @@ -3006,7 +3026,12 @@ hashbits_save_tagset(khmer_KHashbits_Object * me, PyObject * args) return NULL; } - hashbits->save_tagset(filename); + try { + hashbits->save_tagset(filename); + } catch (khmer_file_exception &e) { + PyErr_SetString(PyExc_IOError, e.what()); + return NULL; + } Py_RETURN_NONE; } @@ -3027,7 +3052,12 @@ hashbits_save_subset_partitionmap(khmer_KHashbits_Object * me, PyObject * args) Py_BEGIN_ALLOW_THREADS - subset_p->save_partitionmap(filename); + try { + subset_p->save_partitionmap(filename); + } catch (khmer_file_exception &e) { + PyErr_SetString(PyExc_IOError, e.what()); + return NULL; + } Py_END_ALLOW_THREADS diff --git a/lib/counting.cc b/lib/counting.cc index 1c7b41f110..817f2982b3 100644 --- a/lib/counting.cc +++ b/lib/counting.cc @@ -14,6 +14,7 @@ #include #include #include +#include using namespace std; using namespace khmer; @@ -47,6 +48,9 @@ void CountingHash::output_fasta_kmer_pos_freq( } delete parser; + if (outfile.fail()) { + throw khmer_file_exception(strerror(errno)); + } outfile.close(); } @@ -487,7 +491,7 @@ CountingHashFileReader::CountingHashFileReader( } else { err = "Unknown error in opening file: " + infilename; } - throw khmer_file_exception(err.c_str()); + throw khmer_file_exception(err + " " + strerror(errno)); } if (ht._counts) { @@ -575,9 +579,10 @@ CountingHashFileReader::CountingHashFileReader( if (infile.eof()) { err = "Unexpected end of k-mer count file: " + infilename; } else { - err = "Error reading from k-mer count file: " + infilename; + err = "Error reading from k-mer count file: " + infilename + " " + + strerror(errno); } - throw khmer_file_exception(err.c_str()); + throw khmer_file_exception(err); } } @@ -610,7 +615,8 @@ CountingHashGzFileReader::CountingHashGzFileReader( int read_t = gzread(infile, (char *) &ht_type, 1); if (read_v <= 0 || read_t <= 0) { - std::string err = "K-mer count file read error: " + infilename; + std::string err = "K-mer count file read error: " + infilename + " " + + strerror(errno); gzclose(infile); throw khmer_file_exception(err.c_str()); } else if (!(version == SAVED_FORMAT_VERSION) @@ -637,7 +643,8 @@ CountingHashGzFileReader::CountingHashGzFileReader( sizeof(save_n_tables)); if (read_b <= 0 || read_k <= 0 || read_nt <= 0) { - std::string err = "K-mer count file header read error: " + infilename; + std::string err = "K-mer count file header read error: " + infilename + + " " + strerror(errno); gzclose(infile); throw khmer_file_exception(err.c_str()); } @@ -656,8 +663,14 @@ CountingHashGzFileReader::CountingHashGzFileReader( sizeof(save_tablesize)); if (read_b <= 0) { - std::string err = "K-mer count file header read error: " \ + std::string gzerr = gzerror(infile, &read_b); + std::string err = "K-mer count file header read error: " + infilename; + if (read_b == Z_ERRNO) { + err = err + " " + strerror(errno); + } else { + err = err + " " + gzerr; + } gzclose(infile); throw khmer_file_exception(err.c_str()); } @@ -673,7 +686,13 @@ CountingHashGzFileReader::CountingHashGzFileReader( (unsigned) (tablesize - loaded)); if (read_b <= 0) { + std::string gzerr = gzerror(infile, &read_b); std::string err = "K-mer count file read error: " + infilename; + if (read_b == Z_ERRNO) { + err = err + " " + strerror(errno); + } else { + err = err + " " + gzerr; + } gzclose(infile); throw khmer_file_exception(err.c_str()); } @@ -685,7 +704,13 @@ CountingHashGzFileReader::CountingHashGzFileReader( HashIntoType n_counts = 0; read_b = gzread(infile, (char *) &n_counts, sizeof(n_counts)); if (read_b <= 0) { + std::string gzerr = gzerror(infile, &read_b); std::string err = "K-mer count header read error: " + infilename; + if (read_b == Z_ERRNO) { + err = err + " " + strerror(errno); + } else { + err = err + " " + gzerr; + } gzclose(infile); throw khmer_file_exception(err.c_str()); } @@ -701,7 +726,13 @@ CountingHashGzFileReader::CountingHashGzFileReader( int read_c = gzread(infile, (char *) &count, sizeof(count)); if (read_k <= 0 || read_c <= 0) { + std::string gzerr = gzerror(infile, &read_b); std::string err = "K-mer count read error: " + infilename; + if (read_b == Z_ERRNO) { + err = err + " " + strerror(errno); + } else { + err = err + " " + gzerr; + } gzclose(infile); throw khmer_file_exception(err.c_str()); } @@ -761,7 +792,7 @@ CountingHashFileWriter::CountingHashFileWriter( } } if (outfile.fail()) { - perror("Hash writing file access failure:"); + throw khmer_file_exception(strerror(errno)); } outfile.close(); } @@ -774,11 +805,20 @@ CountingHashGzFileWriter::CountingHashGzFileWriter( throw khmer_exception(); } + int errnum = 0; unsigned int save_ksize = ht._ksize; unsigned char save_n_tables = ht._n_tables; unsigned long long save_tablesize; gzFile outfile = gzopen(outfilename.c_str(), "wb"); + if (outfile == NULL) { + const char * error = gzerror(outfile, &errnum); + if (errnum == Z_ERRNO) { + throw khmer_file_exception(strerror(errno)); + } else { + throw khmer_file_exception(error); + } + } unsigned char version = SAVED_FORMAT_VERSION; gzwrite(outfile, (const char *) &version, 1); @@ -818,7 +858,12 @@ CountingHashGzFileWriter::CountingHashGzFileWriter( gzwrite(outfile, (const char *) &it->second, sizeof(it->second)); } } - + const char * error = gzerror(outfile, &errnum); + if (errnum == Z_ERRNO) { + throw khmer_file_exception(strerror(errno)); + } else if (errnum != Z_OK) { + throw khmer_file_exception(error); + } gzclose(outfile); } diff --git a/lib/hashbits.cc b/lib/hashbits.cc index 045de3b8e8..eda1db2821 100644 --- a/lib/hashbits.cc +++ b/lib/hashbits.cc @@ -11,6 +11,7 @@ #include "read_parsers.hh" #include +#include using namespace std; using namespace khmer; @@ -45,6 +46,9 @@ void Hashbits::save(std::string outfilename) outfile.write((const char *) _counts[i], tablebytes); } + if (outfile.fail()) { + throw khmer_file_exception(strerror(errno)); + } outfile.close(); } diff --git a/lib/hashtable.cc b/lib/hashtable.cc index 5bfa5c51b6..b88e71b287 100644 --- a/lib/hashtable.cc +++ b/lib/hashtable.cc @@ -12,6 +12,7 @@ #include #include +#include using namespace std; using namespace khmer; @@ -256,6 +257,9 @@ void Hashtable::save_tagset(std::string outfilename) } outfile.write((const char *) buf, sizeof(HashIntoType) * tagset_size); + if (outfile.fail()) { + throw khmer_file_exception(strerror(errno)); + } outfile.close(); delete[] buf; diff --git a/lib/subset.cc b/lib/subset.cc index 17008979cf..237864d1d9 100644 --- a/lib/subset.cc +++ b/lib/subset.cc @@ -10,6 +10,7 @@ #include "read_parsers.hh" #include +#include #define IO_BUF_SIZE 250*1000*1000 #define BIG_TRAVERSALS_ARE 200 @@ -1418,6 +1419,10 @@ void SubsetPartition::save_partitionmap(string pmap_filename) if (n_bytes) { outfile.write(buf, n_bytes); } + if (outfile.fail()) { + delete[] buf; + throw khmer_file_exception(strerror(errno)); + } outfile.close(); delete[] buf; diff --git a/tests/test_hashbits.py b/tests/test_hashbits.py index 5ab6401fe6..c47000f895 100644 --- a/tests/test_hashbits.py +++ b/tests/test_hashbits.py @@ -584,7 +584,6 @@ def test_save_load_tagset_trunc(): ht.add_tag('A' * 32) ht.add_tag('G' * 32) ht.save_tagset(outfile) - ht.save_tagset('/tmp/goodversion-k32.tagset') # truncate tagset file... fp = open(outfile, 'rb') diff --git a/tests/test_scripts.py b/tests/test_scripts.py index b1499565eb..0561daa28f 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -761,6 +761,20 @@ def test_normalize_by_median_empty(): assert os.path.exists(outfile), outfile +def test_normalize_by_median_emptycountingtable(): + CUTOFF = '1' + + infile = utils.get_temp_filename('test.fa') + in_dir = os.path.dirname(infile) + + shutil.copyfile(utils.get_test_data('test-empty.fa'), infile) + + script = scriptpath('normalize-by-median.py') + args = ['-C', CUTOFF, '--loadtable', infile, infile] + (status, out, err) = utils.runscript(script, args, in_dir, fail_ok=True) + assert 'ValueError' in err, (status, out, err) + + def test_normalize_by_median_fpr(): MIN_TABLESIZE_PARAM = 1