diff --git a/ChangeLog b/ChangeLog index c1988dc9f2..84b159ae62 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2015-06-14 Kevin Murray + + * lib/counting.cc: Fix infinite loop in gzipped CountingHash I/O + * tests/test_counting_hash.py: Add test of large CountingHash I/O + * setup.cfg: Skip tests with the 'huge' label by default + 2015-06-13 Michael R. Crusoe * Makefile, build-jenkins.sh: unify sphinx dependencies diff --git a/Makefile b/Makefile index cadd146f73..ed2d435255 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ UNAME := $(shell uname) ifeq ($(UNAME),Linux) TESTATTR='!known_failing,!jenkins' else - TESTATTR='!known_failing,!jenkins,!linux' + TESTATTR='!known_failing,!jenkins,!huge' endif ## all : default task; compile C++ code, build shared object library diff --git a/lib/counting.cc b/lib/counting.cc index c7e6faf346..9f87df0f91 100644 --- a/lib/counting.cc +++ b/lib/counting.cc @@ -685,8 +685,15 @@ CountingHashGzFileReader::CountingHashGzFileReader( HashIntoType loaded = 0; while (loaded != tablesize) { - read_b = gzread(infile, (char *) ht._counts[i], - (unsigned) (tablesize - loaded)); + unsigned long long to_read_ll = tablesize - loaded; + unsigned int to_read_int; + // Zlib can only read chunks of at most INT_MAX bytes. + if (to_read_ll > INT_MAX) { + to_read_int = INT_MAX; + } else { + to_read_int = to_read_ll; + } + read_b = gzread(infile, (char *) ht._counts[i], to_read_int); if (read_b <= 0) { std::string gzerr = gzerror(infile, &read_b); @@ -845,8 +852,38 @@ CountingHashGzFileWriter::CountingHashGzFileWriter( sizeof(save_tablesize)); unsigned long long written = 0; while (written != save_tablesize) { - written += gzwrite(outfile, (const char *) ht._counts[i], - (int) (save_tablesize - written)); + unsigned long long to_write_ll = save_tablesize - written; + unsigned int to_write_int; + int gz_result; + // Zlib can only write chunks of at most INT_MAX bytes. + if (to_write_ll > INT_MAX) { + to_write_int = INT_MAX; + } else { + to_write_int = to_write_ll; + } + gz_result = gzwrite(outfile, (const char *) ht._counts[i], + to_write_int); + // Zlib returns 0 on error + if (gz_result == 0) { + int errcode = 0; + const char *err_msg; + std::ostringstream msg; + + msg << "gzwrite failed while writing counting hash: "; + // Get zlib error + err_msg = gzerror(outfile, &errcode); + if (errcode != Z_ERRNO) { + // Zlib error, not stdlib + msg << err_msg; + gzclearerr(outfile); + } else { + // stdlib error + msg << strerror(errno); + } + gzclose(outfile); + throw khmer_file_exception(msg.str().c_str()); + } + written += gz_result; } } diff --git a/setup.cfg b/setup.cfg index d5e5068ebc..7291943ff4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,7 +1,7 @@ [nosetests] verbosity = 2 stop = TRUE -attr = !known_failing,!jenkins,!linux +attr = !known_failing,!jenkins,!huge #processes = -1 # breaks xunit output [build_ext] diff --git a/tests/test_counting_hash.py b/tests/test_counting_hash.py index 97a1b35f80..595c5c283b 100644 --- a/tests/test_counting_hash.py +++ b/tests/test_counting_hash.py @@ -142,7 +142,7 @@ def test_get_raw_tables_view(): assert sum(tab.tolist()) == 1 -@attr('linux') +@attr('huge') def test_toobig(): try: ct = khmer.new_counting_hash(30, 1e13, 1) @@ -568,6 +568,29 @@ def test_get_kmers(): assert kmers == ["AAAAAA", "AAAAAT"] +@attr("huge") +def test_save_load_large(): + def do_test(ctfile): + inpath = utils.get_test_data('random-20-a.fa') + savepath = utils.get_temp_filename(ctfile) + + sizes = khmer.get_n_primes_above_x(1, 2**31) + + orig = khmer.CountingHash(12, sizes) + orig.consume_fasta(inpath) + orig.save(savepath) + + loaded = khmer.load_counting_hash(savepath) + + orig_count = orig.n_occupied() + loaded_count = loaded.n_occupied() + assert orig_count == 3966, orig_count + assert loaded_count == orig_count, loaded_count + + for ctfile in ['temp.ct.gz', 'temp.ct']: + do_test(ctfile) + + def test_save_load(): inpath = utils.get_test_data('random-20-a.fa') savepath = utils.get_temp_filename('tempcountingsave0.ht') diff --git a/tests/test_counting_single.py b/tests/test_counting_single.py index 73c4dc77c3..3848690d14 100644 --- a/tests/test_counting_single.py +++ b/tests/test_counting_single.py @@ -26,7 +26,7 @@ def test_no_collision(): assert kh.get('TTTT') == 2 -@attr('linux') +@attr('huge') def test_toobig(): try: ct = khmer.new_hashtable(4, 1000000000000) diff --git a/tests/test_hashbits_obj.py b/tests/test_hashbits_obj.py index 9de8e799ec..9dc6f78ec7 100644 --- a/tests/test_hashbits_obj.py +++ b/tests/test_hashbits_obj.py @@ -27,7 +27,7 @@ def teardown(): utils.cleanup() -@attr('linux') +@attr('huge') def test_toobig(): try: pt = khmer.Hashbits(32, 1e13, 1) diff --git a/tests/test_labelhash.py b/tests/test_labelhash.py index b7e8813527..48346b025c 100644 --- a/tests/test_labelhash.py +++ b/tests/test_labelhash.py @@ -25,7 +25,7 @@ def teardown(): # * thread-safety -@attr('linux') +@attr('huge') def test_toobig(): try: lh = LabelHash(20, 1e13, 1) diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 2f49e2fc9c..26204f72c6 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -72,7 +72,7 @@ def test_load_into_counting_nonwritable(): assert status == 1, status -@attr('linux') +@attr('huge') def test_load_into_counting_toobig(): script = 'load-into-counting.py' args = ['-x', '1e12', '-N', '2', '-k', '20', '-t', '--force']