Skip to content

Commit

Permalink
work in progress
Browse files Browse the repository at this point in the history
  • Loading branch information
mr-c committed Apr 3, 2015
1 parent f492f63 commit 47c29dd
Show file tree
Hide file tree
Showing 8 changed files with 126 additions and 22 deletions.
17 changes: 10 additions & 7 deletions khmer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,16 @@ def extract_countinghash_info(filename):
uint_size = len(pack('I', 0))
ulonglong_size = len(pack('Q', 0))

with open(filename, 'rb') as countinghash:
version, = unpack('B', countinghash.read(1))
ht_type, = unpack('B', countinghash.read(1))
use_bigcount, = unpack('B', countinghash.read(1))
ksize, = unpack('I', countinghash.read(uint_size))
n_tables, = unpack('B', countinghash.read(1))
table_size, = unpack('Q', countinghash.read(ulonglong_size))
try:
with open(filename, 'rb') as countinghash:
version, = unpack('B', countinghash.read(1))
ht_type, = unpack('B', countinghash.read(1))
use_bigcount, = unpack('B', countinghash.read(1))
ksize, = unpack('I', countinghash.read(uint_size))
n_tables, = unpack('B', countinghash.read(1))
table_size, = unpack('Q', countinghash.read(ulonglong_size))
except:
raise ValueError("Counting table '{}' is corrupt ".format(filename))

return ksize, round(table_size, -2), n_tables, use_bigcount, version, \
ht_type
Expand Down
42 changes: 36 additions & 6 deletions khmer/_khmermodule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1262,7 +1262,12 @@ hash_save(khmer_KCountingHash_Object * me, PyObject * args)
return NULL;
}

counting->save(filename);
try {
counting->save(filename);
} catch (khmer_file_exception &e) {
PyErr_SetString(PyExc_IOError, e.what());
return NULL;
}

Py_RETURN_NONE;
}
Expand Down Expand Up @@ -2082,7 +2087,12 @@ hashbits_save_stop_tags(khmer_KHashbits_Object * me, PyObject * args)
return NULL;
}

hashbits->save_stop_tags(filename);
try {
hashbits->save_stop_tags(filename);
} catch (khmer_file_exception &e) {
PyErr_SetString(PyExc_IOError, e.what());
return NULL;
}

Py_RETURN_NONE;
}
Expand Down Expand Up @@ -2858,7 +2868,12 @@ hashbits_save_partitionmap(khmer_KHashbits_Object * me, PyObject * args)
return NULL;
}

hashbits->partition->save_partitionmap(filename);
try {
hashbits->partition->save_partitionmap(filename);
} catch (khmer_file_exception &e) {
PyErr_SetString(PyExc_IOError, e.what());
return NULL;
}

Py_RETURN_NONE;
}
Expand Down Expand Up @@ -3011,7 +3026,12 @@ hashbits_save(khmer_KHashbits_Object * me, PyObject * args)
return NULL;
}

hashbits->save(filename);
try {
hashbits->save(filename);
} catch (khmer_file_exception &e) {
PyErr_SetString(PyExc_IOError, e.what());
return NULL;
}

Py_RETURN_NONE;
}
Expand Down Expand Up @@ -3056,7 +3076,12 @@ hashbits_save_tagset(khmer_KHashbits_Object * me, PyObject * args)
return NULL;
}

hashbits->save_tagset(filename);
try {
hashbits->save_tagset(filename);
} catch (khmer_file_exception &e) {
PyErr_SetString(PyExc_IOError, e.what());
return NULL;
}

Py_RETURN_NONE;
}
Expand All @@ -3077,7 +3102,12 @@ hashbits_save_subset_partitionmap(khmer_KHashbits_Object * me, PyObject * args)

Py_BEGIN_ALLOW_THREADS

subset_p->save_partitionmap(filename);
try {
subset_p->save_partitionmap(filename);
} catch (khmer_file_exception &e) {
PyErr_SetString(PyExc_IOError, e.what());
return NULL;
}

Py_END_ALLOW_THREADS

Expand Down
61 changes: 53 additions & 8 deletions lib/counting.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <math.h>
#include <algorithm>
#include <sstream>
#include <errno.h>

using namespace std;
using namespace khmer;
Expand Down Expand Up @@ -47,6 +48,9 @@ void CountingHash::output_fasta_kmer_pos_freq(
}

delete parser;
if (outfile.fail()) {
throw khmer_file_exception(strerror(errno));
}

outfile.close();
}
Expand Down Expand Up @@ -487,7 +491,7 @@ CountingHashFileReader::CountingHashFileReader(
} else {
err = "Unknown error in opening file: " + infilename;
}
throw khmer_file_exception(err.c_str());
throw khmer_file_exception(err + " " + strerror(errno));
}

if (ht._counts) {
Expand Down Expand Up @@ -575,9 +579,10 @@ CountingHashFileReader::CountingHashFileReader(
if (infile.eof()) {
err = "Unexpected end of k-mer count file: " + infilename;
} else {
err = "Error reading from k-mer count file: " + infilename;
err = "Error reading from k-mer count file: " + infilename + " "
+ strerror(errno);
}
throw khmer_file_exception(err.c_str());
throw khmer_file_exception(err);
}
}

Expand Down Expand Up @@ -610,7 +615,8 @@ CountingHashGzFileReader::CountingHashGzFileReader(
int read_t = gzread(infile, (char *) &ht_type, 1);

if (read_v <= 0 || read_t <= 0) {
std::string err = "K-mer count file read error: " + infilename;
std::string err = "K-mer count file read error: " + infilename + " "
+ strerror(errno);
gzclose(infile);
throw khmer_file_exception(err.c_str());
} else if (!(version == SAVED_FORMAT_VERSION)
Expand All @@ -637,7 +643,8 @@ CountingHashGzFileReader::CountingHashGzFileReader(
sizeof(save_n_tables));

if (read_b <= 0 || read_k <= 0 || read_nt <= 0) {
std::string err = "K-mer count file header read error: " + infilename;
std::string err = "K-mer count file header read error: " + infilename
+ " " + strerror(errno);
gzclose(infile);
throw khmer_file_exception(err.c_str());
}
Expand All @@ -656,8 +663,14 @@ CountingHashGzFileReader::CountingHashGzFileReader(
sizeof(save_tablesize));

if (read_b <= 0) {
std::string err = "K-mer count file header read error: " \
std::string gzerr = gzerror(infile, &read_b);
std::string err = "K-mer count file header read error: "
+ infilename;
if (read_b == Z_ERRNO) {
err = err + " " + strerror(errno);
} else {
err = err + " " + gzerr;
}
gzclose(infile);
throw khmer_file_exception(err.c_str());
}
Expand All @@ -673,7 +686,13 @@ CountingHashGzFileReader::CountingHashGzFileReader(
(unsigned) (tablesize - loaded));

if (read_b <= 0) {
std::string gzerr = gzerror(infile, &read_b);
std::string err = "K-mer count file read error: " + infilename;
if (read_b == Z_ERRNO) {
err = err + " " + strerror(errno);
} else {
err = err + " " + gzerr;
}
gzclose(infile);
throw khmer_file_exception(err.c_str());
}
Expand All @@ -685,7 +704,13 @@ CountingHashGzFileReader::CountingHashGzFileReader(
HashIntoType n_counts = 0;
read_b = gzread(infile, (char *) &n_counts, sizeof(n_counts));
if (read_b <= 0) {
std::string gzerr = gzerror(infile, &read_b);
std::string err = "K-mer count header read error: " + infilename;
if (read_b == Z_ERRNO) {
err = err + " " + strerror(errno);
} else {
err = err + " " + gzerr;
}
gzclose(infile);
throw khmer_file_exception(err.c_str());
}
Expand All @@ -701,7 +726,13 @@ CountingHashGzFileReader::CountingHashGzFileReader(
int read_c = gzread(infile, (char *) &count, sizeof(count));

if (read_k <= 0 || read_c <= 0) {
std::string gzerr = gzerror(infile, &read_b);
std::string err = "K-mer count read error: " + infilename;
if (read_b == Z_ERRNO) {
err = err + " " + strerror(errno);
} else {
err = err + " " + gzerr;
}
gzclose(infile);
throw khmer_file_exception(err.c_str());
}
Expand Down Expand Up @@ -761,7 +792,7 @@ CountingHashFileWriter::CountingHashFileWriter(
}
}
if (outfile.fail()) {
perror("Hash writing file access failure:");
throw khmer_file_exception(strerror(errno));
}
outfile.close();
}
Expand All @@ -774,11 +805,20 @@ CountingHashGzFileWriter::CountingHashGzFileWriter(
throw khmer_exception();
}

int errnum = 0;
unsigned int save_ksize = ht._ksize;
unsigned char save_n_tables = ht._n_tables;
unsigned long long save_tablesize;

gzFile outfile = gzopen(outfilename.c_str(), "wb");
if (outfile == NULL) {
const char * error = gzerror(outfile, &errnum);
if (errnum == Z_ERRNO) {
throw khmer_file_exception(strerror(errno));
} else {
throw khmer_file_exception(error);
}
}

unsigned char version = SAVED_FORMAT_VERSION;
gzwrite(outfile, (const char *) &version, 1);
Expand Down Expand Up @@ -818,7 +858,12 @@ CountingHashGzFileWriter::CountingHashGzFileWriter(
gzwrite(outfile, (const char *) &it->second, sizeof(it->second));
}
}

const char * error = gzerror(outfile, &errnum);
if (errnum == Z_ERRNO) {
throw khmer_file_exception(strerror(errno));
} else if (errnum != Z_OK) {
throw khmer_file_exception(error);
}
gzclose(outfile);
}

Expand Down
4 changes: 4 additions & 0 deletions lib/hashbits.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "read_parsers.hh"

#include <sstream>
#include <errno.h>

using namespace std;
using namespace khmer;
Expand Down Expand Up @@ -45,6 +46,9 @@ void Hashbits::save(std::string outfilename)

outfile.write((const char *) _counts[i], tablebytes);
}
if (outfile.fail()) {
throw khmer_file_exception(strerror(errno));
}
outfile.close();
}

Expand Down
4 changes: 4 additions & 0 deletions lib/hashtable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include <algorithm>
#include <sstream>
#include <errno.h>

using namespace std;
using namespace khmer;
Expand Down Expand Up @@ -256,6 +257,9 @@ void Hashtable::save_tagset(std::string outfilename)
}

outfile.write((const char *) buf, sizeof(HashIntoType) * tagset_size);
if (outfile.fail()) {
throw khmer_file_exception(strerror(errno));
}
outfile.close();

delete[] buf;
Expand Down
5 changes: 5 additions & 0 deletions lib/subset.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "read_parsers.hh"

#include <sstream>
#include <errno.h>

#define IO_BUF_SIZE 250*1000*1000
#define BIG_TRAVERSALS_ARE 200
Expand Down Expand Up @@ -1418,6 +1419,10 @@ void SubsetPartition::save_partitionmap(string pmap_filename)
if (n_bytes) {
outfile.write(buf, n_bytes);
}
if (outfile.fail()) {
delete[] buf;
throw khmer_file_exception(strerror(errno));
}
outfile.close();

delete[] buf;
Expand Down
1 change: 0 additions & 1 deletion tests/test_hashbits.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,6 @@ def test_save_load_tagset_trunc():
ht.add_tag('A' * 32)
ht.add_tag('G' * 32)
ht.save_tagset(outfile)
ht.save_tagset('/tmp/goodversion-k32.tagset')

# truncate tagset file...
fp = open(outfile, 'rb')
Expand Down
14 changes: 14 additions & 0 deletions tests/test_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,20 @@ def test_normalize_by_median_empty():
assert os.path.exists(outfile), outfile


def test_normalize_by_median_emptycountingtable():
CUTOFF = '1'

infile = utils.get_temp_filename('test.fa')
in_dir = os.path.dirname(infile)

shutil.copyfile(utils.get_test_data('test-empty.fa'), infile)

script = scriptpath('normalize-by-median.py')
args = ['-C', CUTOFF, '--loadtable', infile, infile]
(status, out, err) = utils.runscript(script, args, in_dir, fail_ok=True)
assert 'ValueError' in err, (status, out, err)


def test_normalize_by_median_fpr():
MIN_TABLESIZE_PARAM = 1

Expand Down

0 comments on commit 47c29dd

Please sign in to comment.