dib-lab · luizirber · Apr 14, 2015 · Feb 26, 2015 · Mar 30, 2015 · Apr 6, 2015
diff --git a/ChangeLog b/ChangeLog
@@ -1,3 +1,10 @@
+2015-04-14  Michael R. Crusoe  <mcrusoe@msu.edu>
+
+   * khmer/{__init__.py,_khmermodule.cc},lib/{counting,hashbits,hashtable,
+   subset}.cc: catch IO errors and report them.
+   * tests/test_hashbits.py: remove write to fixed path in /tmp
+   * tests/test_scripts.py: added test for empty counting table file
+
 2015-04-13 Elmar Bucher <buchere@ohsu.edu>
 
    * scripts/normalize-by-median.py (main): introduced warning for when at least

diff --git a/khmer/__init__.py b/khmer/__init__.py
@@ -117,12 +117,15 @@ def extract_hashbits_info(filename):
     uchar_size = len(pack('B', 0))
     ulonglong_size = len(pack('Q', 0))
 
-    with open(filename, 'rb') as hashbits:
-        version, = unpack('B', hashbits.read(1))
-        ht_type, = unpack('B', hashbits.read(1))
-        ksize, = unpack('I', hashbits.read(uint_size))
-        n_tables, = unpack('B', hashbits.read(uchar_size))
-        table_size, = unpack('Q', hashbits.read(ulonglong_size))
+    try:
+        with open(filename, 'rb') as hashbits:
+            version, = unpack('B', hashbits.read(1))
+            ht_type, = unpack('B', hashbits.read(1))
+            ksize, = unpack('I', hashbits.read(uint_size))
+            n_tables, = unpack('B', hashbits.read(uchar_size))
+            table_size, = unpack('Q', hashbits.read(ulonglong_size))
+    except:
+        raise ValueError("Presence table '{}' is corrupt ".format(filename))
 
     return ksize, round(table_size, -2), n_tables, version, ht_type
 
@@ -146,13 +149,16 @@ def extract_countinghash_info(filename):
     uint_size = len(pack('I', 0))
     ulonglong_size = len(pack('Q', 0))
 
-    with open(filename, 'rb') as countinghash:
-        version, = unpack('B', countinghash.read(1))
-        ht_type, = unpack('B', countinghash.read(1))
-        use_bigcount, = unpack('B', countinghash.read(1))
-        ksize, = unpack('I', countinghash.read(uint_size))
-        n_tables, = unpack('B', countinghash.read(1))
-        table_size, = unpack('Q', countinghash.read(ulonglong_size))
+    try:
+        with open(filename, 'rb') as countinghash:
+            version, = unpack('B', countinghash.read(1))
+            ht_type, = unpack('B', countinghash.read(1))
+            use_bigcount, = unpack('B', countinghash.read(1))
+            ksize, = unpack('I', countinghash.read(uint_size))
+            n_tables, = unpack('B', countinghash.read(1))
+            table_size, = unpack('Q', countinghash.read(ulonglong_size))
+    except:
+        raise ValueError("Counting table '{}' is corrupt ".format(filename))
 
     return ksize, round(table_size, -2), n_tables, use_bigcount, version, \
         ht_type

diff --git a/khmer/_khmermodule.cc b/khmer/_khmermodule.cc
@@ -1258,7 +1258,12 @@ hash_save(khmer_KCountingHash_Object * me, PyObject * args)
         return NULL;
     }
 
-    counting->save(filename);
+    try {
+        counting->save(filename);
+    } catch (khmer_file_exception &e) {
+        PyErr_SetString(PyExc_IOError, e.what());
+        return NULL;
+    }
 
     Py_RETURN_NONE;
 }
@@ -2032,7 +2037,12 @@ hashbits_save_stop_tags(khmer_KHashbits_Object * me, PyObject * args)
         return NULL;
     }
 
-    hashbits->save_stop_tags(filename);
+    try {
+        hashbits->save_stop_tags(filename);
+    } catch (khmer_file_exception &e) {
+        PyErr_SetString(PyExc_IOError, e.what());
+        return NULL;
+    }
 
     Py_RETURN_NONE;
 }
@@ -2808,7 +2818,12 @@ hashbits_save_partitionmap(khmer_KHashbits_Object * me, PyObject * args)
         return NULL;
     }
 
-    hashbits->partition->save_partitionmap(filename);
+    try {
+        hashbits->partition->save_partitionmap(filename);
+    } catch (khmer_file_exception &e) {
+        PyErr_SetString(PyExc_IOError, e.what());
+        return NULL;
+    }
 
     Py_RETURN_NONE;
 }
@@ -2961,7 +2976,12 @@ hashbits_save(khmer_KHashbits_Object * me, PyObject * args)
         return NULL;
     }
 
-    hashbits->save(filename);
+    try {
+        hashbits->save(filename);
+    } catch (khmer_file_exception &e) {
+        PyErr_SetString(PyExc_IOError, e.what());
+        return NULL;
+    }
 
     Py_RETURN_NONE;
 }
@@ -3006,7 +3026,12 @@ hashbits_save_tagset(khmer_KHashbits_Object * me, PyObject * args)
         return NULL;
     }
 
-    hashbits->save_tagset(filename);
+    try {
+        hashbits->save_tagset(filename);
+    } catch (khmer_file_exception &e) {
+        PyErr_SetString(PyExc_IOError, e.what());
+        return NULL;
+    }
 
     Py_RETURN_NONE;
 }
@@ -3027,7 +3052,12 @@ hashbits_save_subset_partitionmap(khmer_KHashbits_Object * me, PyObject * args)
 
     Py_BEGIN_ALLOW_THREADS
 
-    subset_p->save_partitionmap(filename);
+    try {
+        subset_p->save_partitionmap(filename);
+    } catch (khmer_file_exception &e) {
+        PyErr_SetString(PyExc_IOError, e.what());
+        return NULL;
+    }
 
     Py_END_ALLOW_THREADS
 

diff --git a/lib/counting.cc b/lib/counting.cc
@@ -14,6 +14,7 @@
 #include <math.h>
 #include <algorithm>
 #include <sstream>
+#include <errno.h>
 
 using namespace std;
 using namespace khmer;
@@ -47,6 +48,9 @@ void CountingHash::output_fasta_kmer_pos_freq(
     }
 
     delete parser;
+    if (outfile.fail()) {
+        throw khmer_file_exception(strerror(errno));
+    }
 
     outfile.close();
 }
@@ -487,7 +491,7 @@ CountingHashFileReader::CountingHashFileReader(
         } else {
             err = "Unknown error in opening file: " + infilename;
         }
-        throw khmer_file_exception(err.c_str());
+        throw khmer_file_exception(err + " " + strerror(errno));
     }
 
     if (ht._counts) {
@@ -575,9 +579,10 @@ CountingHashFileReader::CountingHashFileReader(
         if (infile.eof()) {
             err = "Unexpected end of k-mer count file: " + infilename;
         } else {
-            err = "Error reading from k-mer count file: " + infilename;
+            err = "Error reading from k-mer count file: " + infilename + " "
+                  + strerror(errno);
         }
-        throw khmer_file_exception(err.c_str());
+        throw khmer_file_exception(err);
     }
 }
 
@@ -610,7 +615,8 @@ CountingHashGzFileReader::CountingHashGzFileReader(
     int read_t = gzread(infile, (char *) &ht_type, 1);
 
     if (read_v <= 0 || read_t <= 0) {
-        std::string err = "K-mer count file read error: " + infilename;
+        std::string err = "K-mer count file read error: " + infilename + " "
+                          + strerror(errno);
         gzclose(infile);
         throw khmer_file_exception(err.c_str());
     } else if (!(version == SAVED_FORMAT_VERSION)
@@ -637,7 +643,8 @@ CountingHashGzFileReader::CountingHashGzFileReader(
                          sizeof(save_n_tables));
 
     if (read_b <= 0 || read_k <= 0 || read_nt <= 0) {
-        std::string err = "K-mer count file header read error: " + infilename;
+        std::string err = "K-mer count file header read error: " + infilename
+                          + " " + strerror(errno);
         gzclose(infile);
         throw khmer_file_exception(err.c_str());
     }
@@ -656,8 +663,14 @@ CountingHashGzFileReader::CountingHashGzFileReader(
                         sizeof(save_tablesize));
 
         if (read_b <= 0) {
-            std::string err = "K-mer count file header read error: " \
+            std::string gzerr = gzerror(infile, &read_b);
+            std::string err = "K-mer count file header read error: "
                               + infilename;
+            if (read_b == Z_ERRNO) {
+                err = err + " " + strerror(errno);
+            } else {
+                err = err + " " + gzerr;
+            }
             gzclose(infile);
             throw khmer_file_exception(err.c_str());
         }
@@ -673,7 +686,13 @@ CountingHashGzFileReader::CountingHashGzFileReader(
                             (unsigned) (tablesize - loaded));
 
             if (read_b <= 0) {
+                std::string gzerr = gzerror(infile, &read_b);
                 std::string err = "K-mer count file read error: " + infilename;
+                if (read_b == Z_ERRNO) {
+                    err = err + " " + strerror(errno);
+                } else {
+                    err = err + " " + gzerr;
+                }
                 gzclose(infile);
                 throw khmer_file_exception(err.c_str());
             }
@@ -685,7 +704,13 @@ CountingHashGzFileReader::CountingHashGzFileReader(
     HashIntoType n_counts = 0;
     read_b = gzread(infile, (char *) &n_counts, sizeof(n_counts));
     if (read_b <= 0) {
+        std::string gzerr = gzerror(infile, &read_b);
         std::string err = "K-mer count header read error: " + infilename;
+        if (read_b == Z_ERRNO) {
+            err = err + " " + strerror(errno);
+        } else {
+            err = err + " " + gzerr;
+        }
         gzclose(infile);
         throw khmer_file_exception(err.c_str());
     }
@@ -701,7 +726,13 @@ CountingHashGzFileReader::CountingHashGzFileReader(
             int read_c = gzread(infile, (char *) &count, sizeof(count));
 
             if (read_k <= 0 || read_c <= 0) {
+                std::string gzerr = gzerror(infile, &read_b);
                 std::string err = "K-mer count read error: " + infilename;
+                if (read_b == Z_ERRNO) {
+                    err = err + " " + strerror(errno);
+                } else {
+                    err = err + " " + gzerr;
+                }
                 gzclose(infile);
                 throw khmer_file_exception(err.c_str());
             }
@@ -761,7 +792,7 @@ CountingHashFileWriter::CountingHashFileWriter(
         }
     }
     if (outfile.fail()) {
-        perror("Hash writing file access failure:");
+        throw khmer_file_exception(strerror(errno));
     }
     outfile.close();
 }
@@ -774,11 +805,20 @@ CountingHashGzFileWriter::CountingHashGzFileWriter(
         throw khmer_exception();
     }
 
+    int errnum = 0;
     unsigned int save_ksize = ht._ksize;
     unsigned char save_n_tables = ht._n_tables;
     unsigned long long save_tablesize;
 
     gzFile outfile = gzopen(outfilename.c_str(), "wb");
+    if (outfile == NULL) {
+        const char * error = gzerror(outfile, &errnum);
+        if (errnum == Z_ERRNO) {
+            throw khmer_file_exception(strerror(errno));
+        } else {
+            throw khmer_file_exception(error);
+        }
+    }
 
     unsigned char version = SAVED_FORMAT_VERSION;
     gzwrite(outfile, (const char *) &version, 1);
@@ -818,7 +858,12 @@ CountingHashGzFileWriter::CountingHashGzFileWriter(
             gzwrite(outfile, (const char *) &it->second, sizeof(it->second));
         }
     }
-
+    const char * error = gzerror(outfile, &errnum);
+    if (errnum == Z_ERRNO) {
+        throw khmer_file_exception(strerror(errno));
+    } else if (errnum != Z_OK) {
+        throw khmer_file_exception(error);
+    }
     gzclose(outfile);
 }
 

diff --git a/lib/hashbits.cc b/lib/hashbits.cc
@@ -11,6 +11,7 @@
 #include "read_parsers.hh"
 
 #include <sstream>
+#include <errno.h>
 
 using namespace std;
 using namespace khmer;
@@ -45,6 +46,9 @@ void Hashbits::save(std::string outfilename)
 
         outfile.write((const char *) _counts[i], tablebytes);
     }
+    if (outfile.fail()) {
+        throw khmer_file_exception(strerror(errno));
+    }
     outfile.close();
 }
 

diff --git a/lib/hashtable.cc b/lib/hashtable.cc
@@ -12,6 +12,7 @@
 
 #include <algorithm>
 #include <sstream>
+#include <errno.h>
 
 using namespace std;
 using namespace khmer;
@@ -256,6 +257,9 @@ void Hashtable::save_tagset(std::string outfilename)
     }
 
     outfile.write((const char *) buf, sizeof(HashIntoType) * tagset_size);
+    if (outfile.fail()) {
+        throw khmer_file_exception(strerror(errno));
+    }
     outfile.close();
 
     delete[] buf;

diff --git a/lib/subset.cc b/lib/subset.cc
@@ -10,6 +10,7 @@
 #include "read_parsers.hh"
 
 #include <sstream>
+#include <errno.h>
 
 #define IO_BUF_SIZE 250*1000*1000
 #define BIG_TRAVERSALS_ARE 200
@@ -1418,6 +1419,10 @@ void SubsetPartition::save_partitionmap(string pmap_filename)
     if (n_bytes) {
         outfile.write(buf, n_bytes);
     }
+    if (outfile.fail()) {
+        delete[] buf;
+        throw khmer_file_exception(strerror(errno));
+    }
     outfile.close();
 
     delete[] buf;

diff --git a/tests/test_hashbits.py b/tests/test_hashbits.py
@@ -584,7 +584,6 @@ def test_save_load_tagset_trunc():
     ht.add_tag('A' * 32)
     ht.add_tag('G' * 32)
     ht.save_tagset(outfile)
-    ht.save_tagset('/tmp/goodversion-k32.tagset')
 
     # truncate tagset file...
     fp = open(outfile, 'rb')

diff --git a/tests/test_scripts.py b/tests/test_scripts.py
@@ -761,6 +761,20 @@ def test_normalize_by_median_empty():
     assert os.path.exists(outfile), outfile
 
 
+def test_normalize_by_median_emptycountingtable():
+    CUTOFF = '1'
+
+    infile = utils.get_temp_filename('test.fa')
+    in_dir = os.path.dirname(infile)
+
+    shutil.copyfile(utils.get_test_data('test-empty.fa'), infile)
+
+    script = scriptpath('normalize-by-median.py')
+    args = ['-C', CUTOFF, '--loadtable', infile, infile]
+    (status, out, err) = utils.runscript(script, args, in_dir, fail_ok=True)
+    assert 'ValueError' in err, (status, out, err)
+
+
 def test_normalize_by_median_fpr():
     MIN_TABLESIZE_PARAM = 1