Skip to content

Commit

Permalink
fix an error reported in issue #68
Browse files Browse the repository at this point in the history
  • Loading branch information
jaebeom-kim committed Jun 21, 2024
1 parent 74b8ff7 commit 4df4b03
Show file tree
Hide file tree
Showing 9 changed files with 51 additions and 27 deletions.
8 changes: 5 additions & 3 deletions src/commons/KmerExtractor.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#include "KmerExtractor.h"
#include "common.h"
#include <unordered_map>

KmerExtractor::KmerExtractor(const LocalParameters &par) {
spaceNum = par.spaceMask.length() - 8;
Expand Down Expand Up @@ -76,7 +78,7 @@ void KmerExtractor::fillQueryKmerBufferParallel(KSeqWrapper *kseq1,
processedQueryNum ++;
count ++;
}
#pragma omp parallel default(none), shared(par, kmerBuffer, cout, processedQueryNum, queryList, currentQueryNum, currentSplit, count, reads1)
#pragma omp parallel default(none), shared(par, kmerBuffer, cout, processedQueryNum, queryList, currentQueryNum, count, reads1)
{
SeqIterator seqIterator(par);
size_t posToWrite;
Expand Down Expand Up @@ -165,8 +167,8 @@ void KmerExtractor::fillQueryKmerBufferParallel_paired(KSeqWrapper *kseq1,
for (size_t i = 0; i < currentQueryNum; i ++) {
size_t queryIdx = processedQueryNum - currentQueryNum + i;
// Get k-mer count
auto kmerCnt = LocalUtil::getQueryKmerNumber<size_t>(reads1[i].length(), spaceNum);
auto kmerCnt2 = LocalUtil::getQueryKmerNumber<size_t>(reads2[i].length(), spaceNum);
int kmerCnt = LocalUtil::getQueryKmerNumber<int>(reads1[i].length(), spaceNum);
int kmerCnt2 = LocalUtil::getQueryKmerNumber<int>(reads2[i].length(), spaceNum);

// Ignore short read
if (kmerCnt2 < 1 || kmerCnt < 1) { continue; }
Expand Down
2 changes: 2 additions & 0 deletions src/commons/KmerExtractor.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include "SeqIterator.h"
#include "QueryIndexer.h"
#include "KSeqWrapper.h"
#include "common.h"
#include <unordered_map>

class KmerExtractor {
private:
Expand Down
10 changes: 1 addition & 9 deletions src/commons/LocalUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,7 @@ void LocalUtil::splitQueryFile(std::vector<SequenceBlock> & sequences, const std
delete kseq;
}

int LocalUtil::getMaxCoveredLength(int queryLength) {
if (queryLength % 3 == 2) {
return queryLength - 2;
} else if (queryLength % 3 == 1) {
return queryLength - 4;
} else {
return queryLength - 3;
}
}


int LocalUtil::getFirstWhiteSpacePos(const std::string &str) {
for (size_t i = 0; i < str.size(); ++i) {
Expand Down
13 changes: 12 additions & 1 deletion src/commons/LocalUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ class LocalUtil : public Util {

static void splitQueryFile(std::vector<SequenceBlock> & seqSegments, const std::string & queryPath);

static int getMaxCoveredLength(int queryLength) ;
template<typename T>
static T getMaxCoveredLength(T queryLength);

static int getFirstWhiteSpacePos(const std::string & str);

Expand All @@ -30,5 +31,15 @@ T LocalUtil::getQueryKmerNumber(T queryLength, int spaceNum) {
return (getMaxCoveredLength(queryLength) / 3 - kmerLength - spaceNum + 1) * 6;
}

template<typename T>
T LocalUtil::getMaxCoveredLength(T queryLength) {
if (queryLength % 3 == 2) {
return queryLength - 2;
} else if (queryLength % 3 == 1) {
return queryLength - 4;
} else {
return queryLength - 3;
}
}

#endif //METABULI_LOCALUTIL_H
1 change: 1 addition & 0 deletions src/commons/QueryFilter.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "QueryFilter.h"
#include "common.h"
#include <unordered_map>

QueryFilter::QueryFilter(LocalParameters & par) {
// Load parameters
Expand Down
32 changes: 20 additions & 12 deletions src/commons/QueryIndexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,15 @@ void QueryIndexer::indexQueryFile(size_t processedQueryNum) {
readNum_1++;
seqCnt++;
totalReadLength += kseq->entry.sequence.l;
size_t currentKmerCnt = LocalUtil::getQueryKmerNumber<size_t>(kseq->entry.sequence.l, spaceNum);
kmerCnt += currentKmerCnt;

int kmerCnt_int = LocalUtil::getQueryKmerNumber<int>(kseq->entry.sequence.l, spaceNum);
if (kmerCnt_int > 0) {
kmerCnt += (size_t) kmerCnt_int;
} else {
continue;
}
if (bytesPerKmer * kmerCnt + ((size_t) 200 * seqCnt) > availableRam) {
querySplits.emplace_back(start, readNum_1 - 1, kmerCnt - currentKmerCnt, seqCnt - 1);
kmerCnt = currentKmerCnt;
querySplits.emplace_back(start, readNum_1 - 1, kmerCnt - (size_t) kmerCnt_int, seqCnt - 1);
kmerCnt = (size_t) kmerCnt_int;
start = readNum_1 - 1;
seqCnt = 1;
}
Expand All @@ -77,14 +80,15 @@ void QueryIndexer::indexQueryFile(size_t processedQueryNum) {
size_t seqCnt_1 = 0;
size_t seqCnt_2 = 0;
size_t start = 0;
size_t currentKmerCnt;
bool end = false;
int kmerCnt_int_1;
int kmerCnt_int_2;
while(true) {
if (kseq_1->ReadEntry()) {
readNum_1++;
seqCnt_1++;
totalReadLength += kseq_1->entry.sequence.l;
currentKmerCnt = LocalUtil::getQueryKmerNumber<size_t>(kseq_1->entry.sequence.l, spaceNum);
kmerCnt_int_1 = LocalUtil::getQueryKmerNumber<int>(kseq_1->entry.sequence.l, spaceNum);
} else {
end = true;
}
Expand All @@ -93,8 +97,7 @@ void QueryIndexer::indexQueryFile(size_t processedQueryNum) {
readNum_2++;
seqCnt_2++;
totalReadLength += kseq_2->entry.sequence.l;
currentKmerCnt += LocalUtil::getQueryKmerNumber<size_t>(kseq_2->entry.sequence.l, spaceNum);

kmerCnt_int_2 = LocalUtil::getQueryKmerNumber<int>(kseq_1->entry.sequence.l, spaceNum);
} else {
end = true;
}
Expand All @@ -104,10 +107,15 @@ void QueryIndexer::indexQueryFile(size_t processedQueryNum) {
EXIT(EXIT_FAILURE);
}

kmerCnt += currentKmerCnt;
if (kmerCnt_int_1 > 0 && kmerCnt_int_2 > 0) {
kmerCnt += (size_t) kmerCnt_int_1 + (size_t) kmerCnt_int_2;
} else {
continue;
}

if (bytesPerKmer * kmerCnt + ((size_t) 200 * seqCnt_1) > availableRam) {
querySplits.emplace_back(start, readNum_1 - 1, kmerCnt - currentKmerCnt, seqCnt_1 - 1);
kmerCnt = currentKmerCnt;
querySplits.emplace_back(start, readNum_1 - 1, kmerCnt - ((size_t) kmerCnt_int_1 + (size_t) kmerCnt_int_2), seqCnt_1 - 1);
kmerCnt = (size_t) kmerCnt_int_1 + (size_t) kmerCnt_int_2;
start = readNum_1 - 1;
seqCnt_1 = 1;
}
Expand Down
2 changes: 2 additions & 0 deletions src/commons/QueryIndexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include "KSeqWrapper.h"
#include "LocalUtil.h"
#include "Debug.h"
#include "common.h"
#include <unordered_map>

struct QuerySplit {
size_t start;
Expand Down
2 changes: 0 additions & 2 deletions src/commons/SeqIterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,6 @@ void SeqIterator::fillQueryKmerBuffer(const char *seq, int seqLen, QueryKmerBuff
int forOrRev;
uint64_t tempKmer = 0;
int checkN;

for (uint8_t frame = 0; frame < 6; frame++) {
uint32_t len = aaFrames[frame].size();
forOrRev = frame / 3;
Expand Down Expand Up @@ -355,7 +354,6 @@ void SeqIterator::fillQueryKmerBuffer(const char *seq, int seqLen, QueryKmerBuff
posToWrite++;
}
}
// cout << "posToWrite: " << posToWrite << endl;
}

void
Expand Down
8 changes: 8 additions & 0 deletions src/commons/common.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#ifndef ADCLASSIFIER2_COMMON_H
#define ADCLASSIFIER2_COMMON_H
#include <cstddef>
#include <utility>
#include "LocalParameters.h"
#include "NcbiTaxonomy.h"
Expand All @@ -9,6 +10,13 @@
#define unlikely(x) __builtin_expect((x),0)
#define kmerLength 8

struct KmerCnt {
KmerCnt(size_t length, size_t kmerCnt, size_t totalCnt) : length(length), kmerCnt(kmerCnt), totalCnt(totalCnt) {}
KmerCnt() : length(0), kmerCnt(0), totalCnt(0){}
size_t length;
size_t kmerCnt;
size_t totalCnt;
};
struct SequenceBlock{
SequenceBlock(size_t start, size_t end, size_t length, size_t seqLength = 0)
: start(start), end(end), length(length), seqLength(seqLength) {}
Expand Down

0 comments on commit 4df4b03

Please sign in to comment.