From 40799f0b3a742d0ed443dbb5efb605aaf58819c8 Mon Sep 17 00:00:00 2001 From: Jouni Siren Date: Tue, 7 Apr 2020 14:50:40 -0700 Subject: [PATCH] Remove weighted_minimizers() --- include/gbwtgraph/minimizer.h | 78 ----------------------------------- tests/test_minimizer.cpp | 29 ------------- 2 files changed, 107 deletions(-) diff --git a/include/gbwtgraph/minimizer.h b/include/gbwtgraph/minimizer.h index 415bd34..9146f84 100644 --- a/include/gbwtgraph/minimizer.h +++ b/include/gbwtgraph/minimizer.h @@ -731,84 +731,6 @@ class MinimizerIndex return this->minimizers(str.begin(), str.end()); } - /* - Returns all minimizers in the string specified by the iterators, together - with the weight of how many windows they arise from. The return value is a - vector of pairs of minimizers and window counts sorted by their offsets. If - there are multiple occurrences of one or more minimizer keys with the same - hash in a window, they are all returned, but the window's weight is all - assigned to an arbitrary minimizer that it contains. - */ - std::vector> weighted_minimizers(std::string::const_iterator begin, std::string::const_iterator end) const - { - std::vector> result; - size_t window_length = this->k() + this->w() - 1, total_length = end - begin; - if(total_length < window_length) { return result; } - - // Find the minimizers. - CircularBuffer buffer(this->w()); - size_t valid_chars = 0, start_pos = 0; - size_t next_read_offset = 0; // The first read offset that may contain a new minimizer. - key_type forward_key, reverse_key; - std::string::const_iterator iter = begin; - while(iter != end) - { - // Get the forward and reverse strand minimizer candidates - forward_key.forward(this->k(), *iter, valid_chars); - reverse_key.reverse(this->k(), *iter); - // If they don't have any Ns or anything in them, throw them into the sliding window tracked by buffer. - // Otherwise just slide it along. - if(valid_chars >= this->k()) { buffer.advance(start_pos, forward_key, reverse_key); } - else { buffer.advance(start_pos); } - ++iter; - // If we have passed at least k characters, we must advance the starting position of the next kmer. - if(static_cast(iter - begin) >= this->k()) { start_pos++; } - // We have a full window with a minimizer. - if(static_cast(iter - begin) >= window_length && !buffer.empty()) - { - // Insert the candidates if: - // 1) this is the first minimizer we encounter; - // 2) the last reported minimizer had the same hash (we may have new occurrences); or - // 3) the first candidate is located after the last reported minimizer. - if(result.empty() || result.back().first.hash == buffer.front().hash || result.back().first.offset < buffer.front().offset) - { - // Insert all new occurrences of the minimizer in the window. - for(size_t i = buffer.begin(); i < buffer.end() && buffer.at(i).hash == buffer.front().hash; i++) - { - if(buffer.at(i).offset >= next_read_offset) - { - result.emplace_back(buffer.at(i), 0); - next_read_offset = buffer.at(i).offset + 1; - } - } - } - - // Assign the window's weight to an arbitrary minimizer that occured in it. - // Whatever is last in result right now will work. - result.back().second++; - } - } - - // It was more convenient to use the first offset of the kmer, regardless of the orientation. - // If the minimizer is a reverse complement, we must return the last offset instead. - for(auto& weighted_minimizer : result) - { - if(weighted_minimizer.first.is_reverse) { weighted_minimizer.first.offset += this->k() - 1; } - } - std::sort(result.begin(), result.end()); - - return result; - } - - /* - Returns all minimizers in the string. The return value is a vector of - minimizers and window counts sorted by their offsets. - */ - std::vector> weighted_minimizers(const std::string& str) const - { - return this->weighted_minimizers(str.begin(), str.end()); - } - /* Returns all minimizers in the string specified by the iterators, together with the start and length of the run of windows they arise from. The return diff --git a/tests/test_minimizer.cpp b/tests/test_minimizer.cpp index 440df3b..0bf388a 100644 --- a/tests/test_minimizer.cpp +++ b/tests/test_minimizer.cpp @@ -157,29 +157,6 @@ class MinimizerExtraction : public ::testing::Test this->rev = reverse_complement(this->str); this->repetitive = "TATATA"; } - - void check_weighted_minimizers(const std::string& query, size_t k, size_t w) - { - MinimizerIndex index(k, w); - std::vector::minimizer_type> result = index.minimizers(query); - std::vector::minimizer_type, size_t>> weighted = index.weighted_minimizers(query); - - std::stringstream ss; - ss << "(" << k << ", " << w << ")-minimizers in " << query; - std::string test_description = ss.str(); - size_t correct_weight = query.length() + 2 - k - w; - - ASSERT_EQ(weighted.size(), result.size()) << "Wrong number of weighted " << test_description; - size_t total_weight = 0; - bool same_minimizers = true; - for(size_t i = 0; i < result.size(); i++) - { - if(weighted[i].first != result[i]) { same_minimizers = false; } - total_weight += weighted[i].second; - } - EXPECT_TRUE(same_minimizers) << "Incorrect weighted " << test_description; - EXPECT_EQ(total_weight, correct_weight) << "Incorrect total weight for " << test_description; - } }; TYPED_TEST_CASE(MinimizerExtraction, KeyTypes); @@ -424,12 +401,6 @@ TYPED_TEST(MinimizerExtraction, AllOccurrences) EXPECT_EQ(result, correct) << "Did not find the correct minimizers"; } -TYPED_TEST(MinimizerExtraction, WeightedMinimizers) -{ - this->check_weighted_minimizers(this->str, 3, 2); - this->check_weighted_minimizers(this->repetitive, 3, 3); -} - TYPED_TEST(MinimizerExtraction, InvalidCharacters) { std::string weird = "CGAATAxAATACT";