diff --git a/src/commons/KmerMatcher.cpp b/src/commons/KmerMatcher.cpp index 9f9a2e9a..e0ea45a0 100644 --- a/src/commons/KmerMatcher.cpp +++ b/src/commons/KmerMatcher.cpp @@ -170,9 +170,9 @@ bool KmerMatcher::matchKmers(QueryKmerBuffer * queryKmerBuffer, remainder--; } bool needLastTargetBlock = true; - queryAA = AminoAcidPart(queryKmerList[startIdx].ADkmer); + queryAA = AMINO_ACID_PART(queryKmerList[startIdx].ADkmer); for (size_t j = 0; j < numOfDiffIdxSplits_use; j ++) { - if (queryAA <= AminoAcidPart(diffIdxSplits.data[j].ADkmer)) { + if (queryAA <= AMINO_ACID_PART(diffIdxSplits.data[j].ADkmer)) { j = j - (j != 0); querySplits.emplace_back(startIdx, endIdx, endIdx - startIdx + 1, diffIdxSplits.data[j]); needLastTargetBlock = false; @@ -231,9 +231,14 @@ querySplits, queryKmerList, matchBuffer, cout, targetDiffIdxFileName, numOfDiffI std::vector selectedMatches; std::vector selectedHammings; std::vector selectedDnaEncodings; + selectedHammingSum.resize(1024); + selectedMatches.resize(1024); + selectedHammings.resize(1024); + selectedDnaEncodings.resize(1024); + size_t selectedMatchCnt = 0; + size_t posToWrite; - int currMatchNum; size_t idx; #pragma omp for schedule(dynamic, 1) for (size_t i = 0; i < querySplits.size(); i++) { @@ -268,9 +273,8 @@ querySplits, queryKmerList, matchBuffer, cout, targetDiffIdxFileName, numOfDiffI // Reuse the comparison data if queries are exactly identical if (currentQuery == queryKmerList[j].ADkmer && (currentQueryInfo.frame/3 == queryKmerList[j].info.frame/3)) { - currMatchNum = selectedMatches.size(); // If local buffer is full, copy them to the shared buffer. - if (matchCnt + currMatchNum > localBufferSize) { + if (matchCnt + selectedMatchCnt > localBufferSize) { // Check if the shared buffer is full. posToWrite = matchBuffer->reserveMemory(matchCnt); if (posToWrite + matchCnt >= matchBuffer->bufferSize) { @@ -283,7 +287,7 @@ querySplits, queryKmerList, matchBuffer, cout, targetDiffIdxFileName, numOfDiffI lastMovedQueryIdx = j; } } - for (int k = 0; k < currMatchNum; k++) { + for (int k = 0; k < selectedMatchCnt; k++) { idx = selectedMatches[k]; // Check if candidateKmerInfos[idx].sequenceID is valid // if (taxId2genusId.find(candidateKmerInfos[idx].sequenceID) == taxId2genusId.end() || @@ -301,19 +305,15 @@ querySplits, queryKmerList, matchBuffer, cout, targetDiffIdxFileName, numOfDiffI } continue; } - selectedMatches.clear(); - selectedHammingSum.clear(); - selectedHammings.clear(); - selectedDnaEncodings.clear(); + selectedMatchCnt = 0; // Reuse the candidate target k-mers to compare in DNA level if queries are the same at amino acid level but not at DNA level - if (currentQueryAA == AminoAcidPart(queryKmerList[j].ADkmer)) { + if (currentQueryAA == AMINO_ACID_PART(queryKmerList[j].ADkmer)) { compareDna(queryKmerList[j].ADkmer, candidateTargetKmers, selectedMatches, - selectedHammingSum, selectedHammings, selectedDnaEncodings, queryKmerList[j].info.frame); - currMatchNum = selectedMatches.size(); + selectedHammingSum, selectedHammings, selectedDnaEncodings, selectedMatchCnt, queryKmerList[j].info.frame); // If local buffer is full, copy them to the shared buffer. - if (matchCnt + currMatchNum > localBufferSize) { + if (matchCnt + selectedMatchCnt > localBufferSize) { // Check if the shared buffer is full. posToWrite = matchBuffer->reserveMemory(matchCnt); if (posToWrite + matchCnt >= matchBuffer->bufferSize) { @@ -326,7 +326,7 @@ querySplits, queryKmerList, matchBuffer, cout, targetDiffIdxFileName, numOfDiffI lastMovedQueryIdx = j; } } - for (int k = 0; k < currMatchNum; k++) { + for (int k = 0; k < selectedMatchCnt; k++) { idx = selectedMatches[k]; matches[matchCnt] = {queryKmerList[j].info, candidateKmerInfos[idx].sequenceID, @@ -338,7 +338,7 @@ querySplits, queryKmerList, matchBuffer, cout, targetDiffIdxFileName, numOfDiffI matchCnt++; } currentQuery = queryKmerList[j].ADkmer; - currentQueryAA = AminoAcidPart(currentQuery); + currentQueryAA = AMINO_ACID_PART(currentQuery); currentQueryInfo = queryKmerList[j].info; continue; } @@ -347,12 +347,12 @@ querySplits, queryKmerList, matchBuffer, cout, targetDiffIdxFileName, numOfDiffI // Get next query, and start to find currentQuery = queryKmerList[j].ADkmer; - currentQueryAA = AminoAcidPart(currentQuery); + currentQueryAA = AMINO_ACID_PART(currentQuery); currentQueryInfo = queryKmerList[j].info; // Skip target k-mers that are not matched in amino acid level while (diffIdxPos != numOfDiffIdx - && (currentQueryAA > AminoAcidPart(currentTargetKmer))) { + && (currentQueryAA > AMINO_ACID_PART(currentTargetKmer))) { if (unlikely(BufferSize < diffIdxBufferIdx + 7)){ loadBuffer(diffIdxFp, diffIdxBuffer, diffIdxBufferIdx, BufferSize, ((int)(BufferSize - diffIdxBufferIdx)) * -1 ); } @@ -361,12 +361,12 @@ querySplits, queryKmerList, matchBuffer, cout, targetDiffIdxFileName, numOfDiffI kmerInfoBufferIdx ++; } - if (currentQueryAA != AminoAcidPart(currentTargetKmer)) // Move to next query k-mer if there isn't any match. + if (currentQueryAA != AMINO_ACID_PART(currentTargetKmer)) // Move to next query k-mer if there isn't any match. continue; // Load target k-mers that are matched in amino acid level while (diffIdxPos != numOfDiffIdx && - currentQueryAA == AminoAcidPart(currentTargetKmer)) { + currentQueryAA == AMINO_ACID_PART(currentTargetKmer)) { candidateTargetKmers.push_back(currentTargetKmer); candidateKmerInfos.push_back(getKmerInfo(BufferSize, kmerInfoFp, kmerInfoBuffer, kmerInfoBufferIdx)); // Print the target k-mer @@ -397,13 +397,19 @@ querySplits, queryKmerList, matchBuffer, cout, targetDiffIdxFileName, numOfDiffI kmerInfoBufferIdx ++; } + if (candidateTargetKmers.size() > selectedMatches.size()) { + selectedMatches.resize(candidateTargetKmers.size()); + selectedHammingSum.resize(candidateTargetKmers.size()); + selectedHammings.resize(candidateTargetKmers.size()); + selectedDnaEncodings.resize(candidateTargetKmers.size()); + } + // Compare the current query and the loaded target k-mers and select compareDna(currentQuery, candidateTargetKmers, selectedMatches, selectedHammingSum, - selectedHammings, selectedDnaEncodings, queryKmerList[j].info.frame); + selectedHammings, selectedDnaEncodings, selectedMatchCnt, queryKmerList[j].info.frame); // If local buffer is full, copy them to the shared buffer. - currMatchNum = selectedMatches.size(); - if (matchCnt + currMatchNum > localBufferSize) { + if (matchCnt + selectedMatchCnt > localBufferSize) { // Check if the shared buffer is full. posToWrite = matchBuffer->reserveMemory(matchCnt); if (posToWrite + matchCnt >= matchBuffer->bufferSize) { // full -> write matches to file first @@ -417,7 +423,7 @@ querySplits, queryKmerList, matchBuffer, cout, targetDiffIdxFileName, numOfDiffI } } - for (int k = 0; k < currMatchNum; k++) { + for (int k = 0; k < selectedMatchCnt; k++) { idx = selectedMatches[k]; // Check if candidateKmerInfos[idx].sequenceID is valid // if (taxId2genusId.find(candidateKmerInfos[idx].sequenceID) == taxId2genusId.end() || @@ -493,6 +499,7 @@ void KmerMatcher::compareDna(uint64_t query, std::vector &selectedHammingSum, std::vector &selectedHammings, std::vector &selectedDnaEncodings, + size_t & selectedMatchIdx, uint8_t frame) { size_t size = targetKmersToCompare.size(); auto *hammingSums = new uint8_t[size + 1]; @@ -509,18 +516,20 @@ void KmerMatcher::compareDna(uint64_t query, } // Select target k-mers that passed hamming criteria + selectedMatchIdx = 0; uint8_t maxHamming = min(minHammingSum * 2, 7); for (size_t h = 0; h < size; h++) { if (hammingSums[h] <= maxHamming) { - selectedMatches.push_back(h); - selectedHammingSum.push_back(hammingSums[h]); + selectedMatches[selectedMatchIdx] = h; + selectedHammingSum[selectedMatchIdx] = hammingSums[h]; if (frame < 3) { // Frame of query k-mer - selectedHammings.push_back(getHammings(query, targetKmersToCompare[h])); + selectedHammings[selectedMatchIdx] = getHammings(query, targetKmersToCompare[h]); } else { - selectedHammings.push_back(getHammings_reverse(query, targetKmersToCompare[h])); + selectedHammings[selectedMatchIdx] = getHammings_reverse(query, targetKmersToCompare[h]); } // Store right 24 bits of the target k-mer in selectedDnaEncodings - selectedDnaEncodings.push_back(GET_24_BITS_UINT(targetKmersToCompare[h])); + selectedDnaEncodings[selectedMatchIdx] = GET_24_BITS_UINT(targetKmersToCompare[h]); + selectedMatchIdx++; } } delete[] hammingSums; diff --git a/src/commons/KmerMatcher.h b/src/commons/KmerMatcher.h index dfc29172..282a6053 100644 --- a/src/commons/KmerMatcher.h +++ b/src/commons/KmerMatcher.h @@ -14,6 +14,8 @@ #define BufferSize 16'777'216 // 16 * 1024 * 1024 // 16 M +#define AMINO_ACID_PART(kmer) ((kmer) & MARKER) + // Input // 1. Query K-mers // 2. Reference K-mers @@ -58,7 +60,7 @@ class KmerMatcher { // search begins. }; - size_t AminoAcidPart(size_t kmer) const { return (kmer)&MARKER; } + inline size_t AminoAcidPart(size_t kmer) const { return (kmer)&MARKER; } template static void loadBuffer(FILE *fp, T *buffer, size_t &bufferIdx, size_t size) { @@ -89,6 +91,7 @@ class KmerMatcher { std::vector &selectedHammingSum, std::vector &rightEndHammings, std::vector &selectedDnaEncodings, + size_t & selectedMatchIdx, uint8_t frame); virtual uint8_t getHammingDistanceSum(uint64_t kmer1, uint64_t kmer2); diff --git a/src/commons/Taxonomer.cpp b/src/commons/Taxonomer.cpp index 85e537e0..26c6f319 100644 --- a/src/commons/Taxonomer.cpp +++ b/src/commons/Taxonomer.cpp @@ -126,6 +126,10 @@ void Taxonomer::chooseBestTaxon(uint32_t currentQuery, const Match *matchList, vector & queryList, const LocalParameters &par) { + // cout << "Current query: " << currentQuery << endl; + // for (size_t i = offset; i < end+1; i ++) { + // matchList[i].printMatch(); + // } // Get the best species and its matches for the current query TaxonScore speciesScore(0, 0, 0, 0, 0); std::pair bestSpeciesRange; @@ -324,11 +328,16 @@ TaxonScore Taxonomer::getBestSpeciesMatches(std::pair & bestSpec i ++; } if (i - start >= minConsCnt) { - remainConsecutiveMatches(matchList, start, i, matchPaths, currentSpecies); + // remainConsecutiveMatches(matchList, start, i, matchPaths, currentSpecies); + getMatchPaths(matchList, start, i, matchPaths, currentSpecies); } } size_t pathSize = matchPaths.size(); // Combine MatchPaths + // cout << "Current species: " << currentSpecies << endl; + // for (size_t kk = previousPathSize; kk < matchPaths.size(); kk++) { + // matchPaths[kk].printMatchPath(); + // } if (pathSize > previousPathSize) { speciesList.push_back(currentSpecies); speciesPathIdx.push_back(previousPathSize); @@ -390,6 +399,7 @@ float Taxonomer::combineMatchPaths(vector & matchPaths, vector & combinedMatchPaths, size_t combMatchPathStart, int readLength) { + // cout << "combineMatchPaths" << endl; // Sort matchPaths by the their score sort(matchPaths.begin() + matchPathStart, matchPaths.end(), [](const MatchPath &a, const MatchPath &b) { @@ -410,6 +420,8 @@ float Taxonomer::combineMatchPaths(vector & matchPaths, for (size_t i = matchPathStart; i < matchPaths.size(); i++) { if (combMatchPathStart == combinedMatchPaths.size()) { combinedMatchPaths.push_back(matchPaths[i]); + // cout << "Path added "; + // matchPaths[i].printMatchPath(); score += matchPaths[i].score; } else { bool isOverlapped = false; @@ -417,9 +429,14 @@ float Taxonomer::combineMatchPaths(vector & matchPaths, if (isMatchPathOverlapped(matchPaths[i], combinedMatchPaths[j])) { // overlap! int overlappedLength = min(matchPaths[i].end, combinedMatchPaths[j].end) - max(matchPaths[i].start, combinedMatchPaths[j].start) + 1; + if (overlappedLength == matchPaths[i].end - matchPaths[i].start + 1) { // Current path is completely overlapped + isOverlapped = true; + break; + } + if (overlappedLength < 24) { trimMatchPath(matchPaths[i], combinedMatchPaths[j], overlappedLength); - + // cout << "Path trimmed "; matchPaths[i].printMatchPath(); continue; } else { isOverlapped = true; @@ -433,6 +450,7 @@ float Taxonomer::combineMatchPaths(vector & matchPaths, } if (!isOverlapped) { combinedMatchPaths.push_back(matchPaths[i]); + // cout << "Path added "; matchPaths[i].printMatchPath(); score += matchPaths[i].score; } } @@ -458,6 +476,156 @@ void Taxonomer::trimMatchPath(MatchPath & path1, const MatchPath & path2, int ov } } +void Taxonomer::getMatchPaths(const Match * matchList, + size_t start, + size_t end, + vector & filteredMatchPaths, + TaxID speciesId) { + // cout << "getMatchPaths2" << endl; + // for (size_t i = start; i < end; i ++) { + // matchList[i].printMatch(); + // } + size_t i = start; + size_t currPos = matchList[start].qInfo.pos; + uint64_t frame = matchList[start].qInfo.frame; + size_t MIN_DEPTH = minConsCnt - 1; + + if (taxonomy->IsAncestor(eukaryotaTaxId, speciesId)) { + MIN_DEPTH = minConsCntEuk - 1; + } + + connectedToNext.resize(end - start + 1); + fill(connectedToNext.begin(), connectedToNext.end(), false); + localMatchPaths.clear(); + localMatchPaths.resize(end - start + 1); + + if (frame < 3) { // Forward frame + size_t curPosMatchStart = i; + while (i < end && matchList[i].qInfo.pos == currPos) { + localMatchPaths[i - start] = MatchPath(matchList + i); + ++ i; + } + size_t curPosMatchEnd = i; // exclusive + + while (i < end) { + uint32_t nextPos = matchList[i].qInfo.pos; + size_t nextPosMatchStart = i; + while (i < end && nextPos == matchList[i].qInfo.pos) { + localMatchPaths[i - start] = MatchPath(matchList + i); + ++ i; + } + size_t nextPosMatchEnd = i; // exclusive + + // Check if current position and next position are consecutive + if (currPos + 3 == nextPos) { + // Compare curPosMatches and nextPosMatches. + for (size_t nextIdx = nextPosMatchStart; nextIdx < nextPosMatchEnd; nextIdx++) { + uint8_t lastEndHamming = (matchList[nextIdx].rightEndHamming >> 14); + float scoreIncrement = (lastEndHamming == 0) ? 3.0f : 2.0f - 0.5f * lastEndHamming; + const MatchPath * bestPath = nullptr; + float bestScore = 0; + for (size_t curIdx = curPosMatchStart; curIdx < curPosMatchEnd; ++curIdx) { + if (isConsecutive(matchList + curIdx, matchList + nextIdx)) { + connectedToNext[curIdx - start] = true; + if (localMatchPaths[curIdx - start].score > bestScore) { + bestPath = &localMatchPaths[curIdx - start]; + bestScore = localMatchPaths[curIdx - start].score; + } + } + } + if (bestPath != nullptr) { + localMatchPaths[nextIdx - start].start = bestPath->start; + localMatchPaths[nextIdx - start].score = bestPath->score + scoreIncrement; + localMatchPaths[nextIdx - start].hammingDist = bestPath->hammingDist + lastEndHamming; + localMatchPaths[nextIdx - start].depth = bestPath->depth + 1; + localMatchPaths[nextIdx - start].startMatch = bestPath->startMatch; + } + } + } + for (size_t curIdx = curPosMatchStart; curIdx < curPosMatchEnd; ++curIdx) { + if (!connectedToNext[curIdx - start] && localMatchPaths[curIdx - start].depth > MIN_DEPTH) { + filteredMatchPaths.push_back(localMatchPaths[curIdx - start]); + // cout << localMatchPaths[curIdx - start].score << endl; + } + } + if (i == end) { + for (size_t nextIdx = nextPosMatchStart; nextIdx < nextPosMatchEnd; ++nextIdx) { + if (localMatchPaths[nextIdx - start].depth > MIN_DEPTH) { + filteredMatchPaths.push_back(localMatchPaths[nextIdx - start]); + } + // cout << localMatchPaths[nextIdx - start].score << endl; + } + } + // Update curPosMatches and nextPosMatches + curPosMatchStart = nextPosMatchStart; + curPosMatchEnd = nextPosMatchEnd; + currPos = nextPos; + } + } else { + size_t curPosMatchStart = i; + while (i < end && matchList[i].qInfo.pos == currPos) { + localMatchPaths[i - start] = MatchPath(matchList + i); + ++ i; + } + size_t curPosMatchEnd = i; // exclusive + + while (i < end) { + uint32_t nextPos = matchList[i].qInfo.pos; + size_t nextPosMatchStart = i; + while (i < end && nextPos == matchList[i].qInfo.pos) { + localMatchPaths[i - start] = MatchPath(matchList + i); + ++ i; + } + size_t nextPosMatchEnd = i; // exclusive + + // Check if current position and next position are consecutive + if (currPos + 3 == nextPos) { + // Compare curPosMatches and nextPosMatches. + for (size_t nextIdx = nextPosMatchStart; nextIdx < nextPosMatchEnd; nextIdx++) { + uint8_t lastEndHamming = (matchList[nextIdx].rightEndHamming >> 14); + float scoreIncrement = (lastEndHamming == 0) ? 3.0f : 2.0f - 0.5f * lastEndHamming; + const MatchPath * bestPath = nullptr; + float bestScore = 0; + for (size_t curIdx = curPosMatchStart; curIdx < curPosMatchEnd; ++curIdx) { + if (isConsecutive(matchList + nextIdx, matchList + curIdx)) { + connectedToNext[curIdx - start] = true; + if (localMatchPaths[curIdx - start].score > bestScore) { + bestPath = &localMatchPaths[curIdx - start]; + bestScore = localMatchPaths[curIdx - start].score; + } + } + } + if (bestPath != nullptr) { + localMatchPaths[nextIdx - start].start = bestPath->start; + localMatchPaths[nextIdx - start].score = bestPath->score + scoreIncrement; + localMatchPaths[nextIdx - start].hammingDist = bestPath->hammingDist + lastEndHamming; + localMatchPaths[nextIdx - start].depth = bestPath->depth + 1; + localMatchPaths[nextIdx - start].startMatch = bestPath->startMatch; + } + } + } + for (size_t curIdx = curPosMatchStart; curIdx < curPosMatchEnd; ++curIdx) { + if (!connectedToNext[curIdx - start] && localMatchPaths[curIdx - start].depth > MIN_DEPTH) { + filteredMatchPaths.push_back(localMatchPaths[curIdx - start]); + // cout << localMatchPaths[curIdx - start].score << endl; + } + } + if (i == end) { + for (size_t nextIdx = nextPosMatchStart; nextIdx < nextPosMatchEnd; ++nextIdx) { + if (localMatchPaths[nextIdx - start].depth > MIN_DEPTH) { + filteredMatchPaths.push_back(localMatchPaths[nextIdx - start]); + } + // cout << localMatchPaths[nextIdx - start].score << endl; + } + } + // Update curPosMatches and nextPosMatches + curPosMatchStart = nextPosMatchStart; + curPosMatchEnd = nextPosMatchEnd; + currPos = nextPos; + } + } +} + void Taxonomer::remainConsecutiveMatches(const Match * matchList, size_t start, size_t end, @@ -472,8 +640,6 @@ void Taxonomer::remainConsecutiveMatches(const Match * matchList, linkedMatchValuesIdx.resize(end - start); size_t linkedMatchKeysIdx = 0; - - size_t currPos = matchList[start].qInfo.pos; uint64_t frame = matchList[start].qInfo.frame; if (frame < 3) { // Forward frame @@ -590,7 +756,7 @@ void Taxonomer::remainConsecutiveMatches(const Match * matchList, if (bestPath.depth > MIN_DEPTH) { matchPaths.emplace_back(key->qInfo.pos, // start coordinate on query key->qInfo.pos + bestPath.depth * 3 + 20, // end coordinate on query - bestPath.score, bestPath.hammingDist, + bestPath.score, bestPath.hammingDist, bestPath.depth, key, bestPath.endMatch); } diff --git a/src/commons/Taxonomer.h b/src/commons/Taxonomer.h index 5c84c8c1..c36f7406 100644 --- a/src/commons/Taxonomer.h +++ b/src/commons/Taxonomer.h @@ -33,15 +33,29 @@ struct depthScore { }; struct MatchPath { - MatchPath(int start, int end, float score, int hammingDist, const Match * startMatch, const Match * endMatch) : - start(start), end(end), score(score), hammingDist(hammingDist), startMatch(startMatch), endMatch(endMatch) {} - MatchPath() : start(0), end(0), score(0.f), hammingDist(0), startMatch(nullptr), endMatch(nullptr) {} - int start; - int end; + MatchPath(int start, int end, float score, int hammingDist, int depth, const Match * startMatch, const Match * endMatch) : + start(start), end(end), score(score), hammingDist(hammingDist), depth(depth), startMatch(startMatch), endMatch(endMatch) {} + MatchPath() : start(0), end(0), score(0.f), hammingDist(0), depth(0), startMatch(nullptr), endMatch(nullptr) {} + MatchPath(const Match * startMatch) + : start(startMatch->qInfo.pos), + end(startMatch->qInfo.pos + 23), + score(startMatch->getScore()), + hammingDist(startMatch->hamming), + depth(1), + startMatch(startMatch), + endMatch(startMatch) {} + + int start; // query coordinate + int end; // query coordinate float score; int hammingDist; + int depth; const Match * startMatch; const Match * endMatch; + + void printMatchPath() { + std::cout << start << " " << end << " " << score << " " << hammingDist << " " << depth << std::endl; + } }; struct MatchBlock { @@ -92,6 +106,10 @@ class Taxonomer { vector linkedMatchValuesIdx; unordered_map match2depthScore; + // getMatchPaths + vector connectedToNext; + vector localMatchPaths; + // lowerRankClassification unordered_map cladeCnt; @@ -138,7 +156,13 @@ class Taxonomer { size_t end, vector & matchPaths, TaxID speciesId); - + + void getMatchPaths(const Match * matchList, + size_t start, + size_t end, + vector & matchPaths, + TaxID speciesId); + float combineMatchPaths(vector & matchPaths, size_t matchPathStart, vector & combMatchPaths, @@ -147,10 +171,6 @@ class Taxonomer { bool isMatchPathOverlapped(const MatchPath & matchPath1, const MatchPath & matchPath2); - bool isMatchPathLinked(const MatchPath & matchPath1, const MatchPath & matchPath2); - - void mergeMatchPaths(const MatchPath & source, MatchPath & target); - void trimMatchPath(MatchPath & path1, const MatchPath & path2, int overlapLength); void filterRedundantMatches(const Match *matchList, @@ -165,47 +185,6 @@ class Taxonomer { TaxonScore getBestSpeciesMatches(std::pair & bestSpeciesRange, const Match *matchList, size_t end, size_t offset, int queryLength); - - // TaxonScore getBestGenusMatches_spaced(vector &matchesForMajorityLCA, const Match *matchList, size_t end, size_t offset, - // int readLength1, int readLength2); - // TaxonScore getBestGenusMatches_spaced(vector &matchesForMajorityLCA, const Match *matchList, size_t end, size_t offset, - // int readLength1); - - TaxonScore scoreTaxon(vector &filteredMatches, - TaxID taxId, - int queryLength); - - TaxonScore scoreTaxon(vector &filteredMatches, - TaxID taxId, - int readLength1, - int readLength2); - - void scoreGenus_ExtensionScore(vector &filteredMatches, - vector> &matchesForEachGenus, - vector &scoreOfEachGenus, - int readLength1, int readLength2); - - TaxonScore chooseSpecies(const std::vector &matches, - int queryLength, - vector &species, - unordered_map> & speciesMatchRange); - - TaxonScore chooseSpecies(const std::vector &matches, - int read1Length, - int read2Length, - vector &species, - unordered_map> & speciesMatchRange); - - TaxonScore scoreSpecies(const vector &matches, - size_t begin, - size_t end, - int queryLength); - - TaxonScore scoreSpecies(const vector &matches, - size_t begin, - size_t end, - int queryLength, - int queryLength2); TaxID lowerRankClassification(const unordered_map & taxCnt, TaxID speciesID, int queryLength);