Skip to content

Commit

Permalink
Expose Smith-Waterman-based prefilter
Browse files Browse the repository at this point in the history
  • Loading branch information
milot-mirdita committed Aug 26, 2023
1 parent 1d62fa0 commit df77d9e
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 3 deletions.
1 change: 1 addition & 0 deletions src/CommandDeclarations.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ extern int profile2repseq(int argc, const char **argv, const Command& command);
extern int proteinaln2nucl(int argc, const char **argv, const Command& command);
extern int rescorediagonal(int argc, const char **argv, const Command& command);
extern int ungappedprefilter(int argc, const char **argv, const Command& command);
extern int gappedprefilter(int argc, const char **argv, const Command& command);
extern int unpackdb(int argc, const char **argv, const Command& command);
extern int rbh(int argc, const char **argv, const Command& command);
extern int result2flat(int argc, const char **argv, const Command& command);
Expand Down
8 changes: 8 additions & 0 deletions src/MMseqsBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,14 @@ std::vector<Command> baseCommands = {
CITATION_MMSEQS2, {{"queryDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::sequenceDb },
{"targetDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::sequenceDb },
{"prefilterDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::prefilterDb }}},
{"gappedprefilter", gappedprefilter, &par.gappedprefilter, COMMAND_PREFILTER,
"Optimal Smith-Waterman-based prefiltering (slow)",
NULL,
"Martin Steinegger <martin.steinegger@snu.ac.kr>",
"<i:queryDB> <i:targetDB> <o:prefilterDB>",
CITATION_MMSEQS2, {{"queryDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::sequenceDb },
{"targetDB", DbType::ACCESS_MODE_INPUT, DbType::NEED_DATA, &DbValidator::sequenceDb },
{"prefilterDB", DbType::ACCESS_MODE_OUTPUT, DbType::NEED_DATA, &DbValidator::prefilterDb }}},
{"kmermatcher", kmermatcher, &par.kmermatcher, COMMAND_PREFILTER,
"Find bottom-m-hashed k-mer matches within sequence DB",
NULL,
Expand Down
17 changes: 17 additions & 0 deletions src/commons/Parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,23 @@ Parameters::Parameters():
ungappedprefilter.push_back(&PARAM_COMPRESSED);
ungappedprefilter.push_back(&PARAM_V);
// gappedprefilter
gappedprefilter.push_back(&PARAM_SUB_MAT);
gappedprefilter.push_back(&PARAM_GAP_OPEN);
gappedprefilter.push_back(&PARAM_GAP_EXTEND);
gappedprefilter.push_back(&PARAM_E);
gappedprefilter.push_back(&PARAM_C);
gappedprefilter.push_back(&PARAM_COV_MODE);
gappedprefilter.push_back(&PARAM_NO_COMP_BIAS_CORR);
gappedprefilter.push_back(&PARAM_NO_COMP_BIAS_CORR_SCALE);
gappedprefilter.push_back(&PARAM_MIN_DIAG_SCORE);
gappedprefilter.push_back(&PARAM_MAX_SEQS);
gappedprefilter.push_back(&PARAM_TAXON_LIST);
gappedprefilter.push_back(&PARAM_PRELOAD_MODE);
gappedprefilter.push_back(&PARAM_THREADS);
gappedprefilter.push_back(&PARAM_COMPRESSED);
gappedprefilter.push_back(&PARAM_V);
// clustering
clust.push_back(&PARAM_CLUSTER_MODE);
clust.push_back(&PARAM_MAXITERATIONS);
Expand Down
1 change: 1 addition & 0 deletions src/commons/Parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,7 @@ class Parameters {
PARAMETER(PARAM_LOCAL_TMP)
std::vector<MMseqsParameter*> prefilter;
std::vector<MMseqsParameter*> ungappedprefilter;
std::vector<MMseqsParameter*> gappedprefilter;

// alignment
PARAMETER(PARAM_ALIGNMENT_MODE)
Expand Down
41 changes: 38 additions & 3 deletions src/prefiltering/ungappedprefilter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
#include <omp.h>
#endif


int ungappedprefilter(int argc, const char **argv, const Command &command) {
int prefilterInternal(int argc, const char **argv, const Command &command, int mode) {
Parameters &par = Parameters::getInstance();
par.parseParameters(argc, argv, command, true, 0, 0);
DBWriter resultWriter(par.db3.c_str(), par.db3Index.c_str(), 1, par.compressed, Parameters::DBTYPE_PREFILTER_RES);
Expand Down Expand Up @@ -141,7 +140,37 @@ int ungappedprefilter(int argc, const char **argv, const Command &command) {
continue;
}

int score = aligner.ungapped_alignment(tSeq.numSequence, tSeq.L);
int score;
if (mode == 0) {
score = aligner.ungapped_alignment(tSeq.numSequence, tSeq.L);
} else {
std::string backtrace;
s_align res;
if (isIdentity) {
res = aligner.scoreIdentical(
tSeq.numSequence, tSeq.L, evaluer, Matcher::SCORE_ONLY, backtrace
);
} else {
res = aligner.ssw_align(
tSeq.numSequence,
tSeq.numConsensusSequence,
tSeq.getAlignmentProfile(),
tSeq.L,
backtrace,
par.gapOpen.values.aminoacid(),
par.gapExtend.values.aminoacid(),
Matcher::SCORE_ONLY,
par.evalThr,
evaluer,
par.covMode,
par.covThr,
par.correlationScoreWeight,
qSeq.L / 2,
tId
);
}
score = res.score1;
}
bool hasDiagScore = (score > par.minDiagScoreThr);
double evalue = 0.0;
// check if evalThr != inf
Expand Down Expand Up @@ -199,4 +228,10 @@ int ungappedprefilter(int argc, const char **argv, const Command &command) {
return 0;
}

int ungappedprefilter(int argc, const char **argv, const Command &command) {
return prefilterInternal(argc, argv, command, 0);
}

int gappedprefilter(int argc, const char **argv, const Command &command) {
return prefilterInternal(argc, argv, command, 1);
}

0 comments on commit df77d9e

Please sign in to comment.