Skip to content

Commit

Permalink
Remove weighted_minimizers()
Browse files Browse the repository at this point in the history
  • Loading branch information
jltsiren committed Apr 7, 2020
1 parent 9ef26a1 commit 40799f0
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 107 deletions.
78 changes: 0 additions & 78 deletions include/gbwtgraph/minimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -731,84 +731,6 @@ class MinimizerIndex
return this->minimizers(str.begin(), str.end());
}

/*
Returns all minimizers in the string specified by the iterators, together
with the weight of how many windows they arise from. The return value is a
vector of pairs of minimizers and window counts sorted by their offsets. If
there are multiple occurrences of one or more minimizer keys with the same
hash in a window, they are all returned, but the window's weight is all
assigned to an arbitrary minimizer that it contains.
*/
std::vector<std::pair<minimizer_type, size_t>> weighted_minimizers(std::string::const_iterator begin, std::string::const_iterator end) const
{
std::vector<std::pair<minimizer_type, size_t>> result;
size_t window_length = this->k() + this->w() - 1, total_length = end - begin;
if(total_length < window_length) { return result; }

// Find the minimizers.
CircularBuffer buffer(this->w());
size_t valid_chars = 0, start_pos = 0;
size_t next_read_offset = 0; // The first read offset that may contain a new minimizer.
key_type forward_key, reverse_key;
std::string::const_iterator iter = begin;
while(iter != end)
{
// Get the forward and reverse strand minimizer candidates
forward_key.forward(this->k(), *iter, valid_chars);
reverse_key.reverse(this->k(), *iter);
// If they don't have any Ns or anything in them, throw them into the sliding window tracked by buffer.
// Otherwise just slide it along.
if(valid_chars >= this->k()) { buffer.advance(start_pos, forward_key, reverse_key); }
else { buffer.advance(start_pos); }
++iter;
// If we have passed at least k characters, we must advance the starting position of the next kmer.
if(static_cast<size_t>(iter - begin) >= this->k()) { start_pos++; }
// We have a full window with a minimizer.
if(static_cast<size_t>(iter - begin) >= window_length && !buffer.empty())
{
// Insert the candidates if:
// 1) this is the first minimizer we encounter;
// 2) the last reported minimizer had the same hash (we may have new occurrences); or
// 3) the first candidate is located after the last reported minimizer.
if(result.empty() || result.back().first.hash == buffer.front().hash || result.back().first.offset < buffer.front().offset)
{
// Insert all new occurrences of the minimizer in the window.
for(size_t i = buffer.begin(); i < buffer.end() && buffer.at(i).hash == buffer.front().hash; i++)
{
if(buffer.at(i).offset >= next_read_offset)
{
result.emplace_back(buffer.at(i), 0);
next_read_offset = buffer.at(i).offset + 1;
}
}
}

// Assign the window's weight to an arbitrary minimizer that occured in it.
// Whatever is last in result right now will work.
result.back().second++;
}
}

// It was more convenient to use the first offset of the kmer, regardless of the orientation.
// If the minimizer is a reverse complement, we must return the last offset instead.
for(auto& weighted_minimizer : result)
{
if(weighted_minimizer.first.is_reverse) { weighted_minimizer.first.offset += this->k() - 1; }
}
std::sort(result.begin(), result.end());

return result;
}

/*
Returns all minimizers in the string. The return value is a vector of
minimizers and window counts sorted by their offsets.
*/
std::vector<std::pair<minimizer_type, size_t>> weighted_minimizers(const std::string& str) const
{
return this->weighted_minimizers(str.begin(), str.end());
}

/*
Returns all minimizers in the string specified by the iterators, together
with the start and length of the run of windows they arise from. The return
Expand Down
29 changes: 0 additions & 29 deletions tests/test_minimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,29 +157,6 @@ class MinimizerExtraction : public ::testing::Test
this->rev = reverse_complement(this->str);
this->repetitive = "TATATA";
}

void check_weighted_minimizers(const std::string& query, size_t k, size_t w)
{
MinimizerIndex<KeyType> index(k, w);
std::vector<typename MinimizerIndex<KeyType>::minimizer_type> result = index.minimizers(query);
std::vector<std::pair<typename MinimizerIndex<KeyType>::minimizer_type, size_t>> weighted = index.weighted_minimizers(query);

std::stringstream ss;
ss << "(" << k << ", " << w << ")-minimizers in " << query;
std::string test_description = ss.str();
size_t correct_weight = query.length() + 2 - k - w;

ASSERT_EQ(weighted.size(), result.size()) << "Wrong number of weighted " << test_description;
size_t total_weight = 0;
bool same_minimizers = true;
for(size_t i = 0; i < result.size(); i++)
{
if(weighted[i].first != result[i]) { same_minimizers = false; }
total_weight += weighted[i].second;
}
EXPECT_TRUE(same_minimizers) << "Incorrect weighted " << test_description;
EXPECT_EQ(total_weight, correct_weight) << "Incorrect total weight for " << test_description;
}
};

TYPED_TEST_CASE(MinimizerExtraction, KeyTypes);
Expand Down Expand Up @@ -424,12 +401,6 @@ TYPED_TEST(MinimizerExtraction, AllOccurrences)
EXPECT_EQ(result, correct) << "Did not find the correct minimizers";
}

TYPED_TEST(MinimizerExtraction, WeightedMinimizers)
{
this->check_weighted_minimizers(this->str, 3, 2);
this->check_weighted_minimizers(this->repetitive, 3, 3);
}

TYPED_TEST(MinimizerExtraction, InvalidCharacters)
{
std::string weird = "CGAATAxAATACT";
Expand Down

0 comments on commit 40799f0

Please sign in to comment.