Skip to content

Commit

Permalink
Allow different prefilter modes
Browse files Browse the repository at this point in the history
  • Loading branch information
martin-steinegger committed Oct 31, 2023
1 parent 5edc79b commit 5e119e9
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 15 deletions.
46 changes: 41 additions & 5 deletions data/workflow/blastp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,33 @@ fail() {
exit 1
}

abspath() {
if [ -d "$1" ]; then
(cd "$1"; pwd)
elif [ -f "$1" ]; then
if [ -z "${1##*/*}" ]; then
echo "$(cd "${1%/*}"; pwd)/${1##*/}"
else
echo "$(pwd)/$1"
fi
elif [ -d "$(dirname "$1")" ]; then
echo "$(cd "$(dirname "$1")"; pwd)/$(basename "$1")"
fi
}

fake_pref() {
QDB="$1"
TDB="$2"
RES="$3"
# create link to data file which contains a list of all targets that should be aligned
ln -s "$(abspath "${TDB}.index")" "${RES}"
# create new index repeatedly pointing to same entry
INDEX_SIZE="$(wc -c < "${TDB}.index")"
awk -v size="$INDEX_SIZE" '{ print $1"\t0\t"size; }' "${QDB}.index" > "${RES}.index"
# create dbtype (7)
awk 'BEGIN { printf("%c%c%c%c",7,0,0,0); exit; }' > "${RES}.dbtype"
}

notExists() {
[ ! -f "$1" ]
}
Expand All @@ -27,14 +54,23 @@ ALN_RES_MERGE="$TMP_PATH/aln_0"
while [ "$STEP" -lt "$STEPS" ]; do
SENS_PARAM=SENSE_${STEP}
eval SENS="\$$SENS_PARAM"
# call prefilter module

# 1. Prefilter hits
if notExists "$TMP_PATH/pref_$STEP.dbtype"; then
# shellcheck disable=SC2086
$RUNNER "$MMSEQS" prefilter "$INPUT" "$TARGET" "$TMP_PATH/pref_$STEP" $PREFILTER_PAR -s "$SENS" \
|| fail "Prefilter died"
if [ "$PREFMODE" = "EXHAUSTIVE" ]; then
fake_pref "${INPUT}" "${TARGET}" "$TMP_PATH/pref_$STEP"
elif [ "$PREFMODE" = "UNGAPPED" ]; then
# shellcheck disable=SC2086
$RUNNER "$MMSEQS" ungappedprefilter "$INPUT" "$TARGET" "$TMP_PATH/pref_$STEP" $UNGAPPEDPREFILTER_PAR \
|| fail "Ungapped prefilter died"
else
# shellcheck disable=SC2086
$RUNNER "$MMSEQS" prefilter "$INPUT" "$TARGET" "$TMP_PATH/pref_$STEP" $PREFILTER_PAR -s "$SENS" \
|| fail "Prefilter died"
fi
fi

# call alignment module
# 2. alignment module
if [ "$STEPS" -eq 1 ]; then
if notExists "$3.dbtype"; then
# shellcheck disable=SC2086
Expand Down
46 changes: 42 additions & 4 deletions data/workflow/blastpgp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,33 @@ fail() {
exit 1
}

abspath() {
if [ -d "$1" ]; then
(cd "$1"; pwd)
elif [ -f "$1" ]; then
if [ -z "${1##*/*}" ]; then
echo "$(cd "${1%/*}"; pwd)/${1##*/}"
else
echo "$(pwd)/$1"
fi
elif [ -d "$(dirname "$1")" ]; then
echo "$(cd "$(dirname "$1")"; pwd)/$(basename "$1")"
fi
}

fake_pref() {
QDB="$1"
TDB="$2"
RES="$3"
# create link to data file which contains a list of all targets that should be aligned
ln -s "$(abspath "${TDB}.index")" "${RES}"
# create new index repeatedly pointing to same entry
INDEX_SIZE="$(wc -c < "${TDB}.index")"
awk -v size="$INDEX_SIZE" '{ print $1"\t0\t"size; }' "${QDB}.index" > "${RES}.index"
# create dbtype (7)
awk 'BEGIN { printf("%c%c%c%c",7,0,0,0); exit; }' > "${RES}.dbtype"
}

notExists() {
[ ! -f "$1" ]
}
Expand All @@ -28,15 +55,26 @@ STEP=0
while [ "$STEP" -lt "$NUM_IT" ]; do
# call prefilter module
if notExists "$TMP_PATH/pref_tmp_${STEP}.done"; then
PARAM="PREFILTER_PAR_$STEP"
eval TMP="\$$PARAM"
if [ "$PREFMODE" = "EXHAUSTIVE" ]; then
TMP=""
PREF="fake_pref"
elif [ "$PREFMODE" = "UNGAPPED" ]; then
PARAM="UNGAPPEDPREFILTER_PAR_$STEP"
eval TMP="\$$PARAM"
PREF="${MMSEQS} ungappedprefilter"
else
PARAM="PREFILTER_PAR_$STEP"
eval TMP="\$$PARAM"
PREF="${MMSEQS} prefilter"
fi

if [ "$STEP" -eq 0 ]; then
# shellcheck disable=SC2086
$RUNNER "$MMSEQS" prefilter "$QUERYDB" "$2" "$TMP_PATH/pref_$STEP" ${TMP} \
$RUNNER $PREF "$QUERYDB" "$2" "$TMP_PATH/pref_$STEP" ${TMP} \
|| fail "Prefilter died"
else
# shellcheck disable=SC2086
$RUNNER "$MMSEQS" prefilter "$QUERYDB" "$2" "$TMP_PATH/pref_tmp_$STEP" ${TMP} \
$RUNNER $PREF "$QUERYDB" "$2" "$TMP_PATH/pref_tmp_$STEP" ${TMP} \
|| fail "Prefilter died"
fi
touch "$TMP_PATH/pref_tmp_${STEP}.done"
Expand Down
3 changes: 3 additions & 0 deletions src/commons/Parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ Parameters::Parameters():
PARAM_NUM_ITERATIONS(PARAM_NUM_ITERATIONS_ID, "--num-iterations", "Search iterations", "Number of iterative profile search iterations", typeid(int), (void *) &numIterations, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_PROFILE),
PARAM_START_SENS(PARAM_START_SENS_ID, "--start-sens", "Start sensitivity", "Start sensitivity", typeid(float), (void *) &startSens, "^[0-9]*(\\.[0-9]+)?$"),
PARAM_SENS_STEPS(PARAM_SENS_STEPS_ID, "--sens-steps", "Search steps", "Number of search steps performed from --start-sens to -s", typeid(int), (void *) &sensSteps, "^[1-9]{1}$"),
PARAM_PREF_MODE(PARAM_PREF_MODE_ID,"--prefilter-mode", "Prefilter mode", "prefilter mode: 0: kmer/ungapped 1: ungapped, 2: nofilter",typeid(int), (void *) &prefMode, "^[0-2]{1}$"),
PARAM_EXHAUSTIVE_SEARCH(PARAM_EXHAUSTIVE_SEARCH_ID, "--exhaustive-search", "Exhaustive search mode", "For bigger profile DB, run iteratively the search by greedily swapping the search results", typeid(bool), (void *) &exhaustiveSearch, "", MMseqsParameter::COMMAND_PROFILE | MMseqsParameter::COMMAND_EXPERT),
PARAM_EXHAUSTIVE_SEARCH_FILTER(PARAM_EXHAUSTIVE_SEARCH_FILTER_ID, "--exhaustive-search-filter", "Filter results during exhaustive search", "Filter result during search: 0: do not filter, 1: filter", typeid(int), (void *) &exhaustiveFilterMsa, "^[0-1]{1}$", MMseqsParameter::COMMAND_ALIGN | MMseqsParameter::COMMAND_EXPERT),
Expand Down Expand Up @@ -1253,6 +1254,7 @@ Parameters::Parameters():
searchworkflow.push_back(&PARAM_NUM_ITERATIONS);
searchworkflow.push_back(&PARAM_START_SENS);
searchworkflow.push_back(&PARAM_SENS_STEPS);
searchworkflow.push_back(&PARAM_PREF_MODE);
searchworkflow.push_back(&PARAM_EXHAUSTIVE_SEARCH);
searchworkflow.push_back(&PARAM_EXHAUSTIVE_SEARCH_FILTER);
searchworkflow.push_back(&PARAM_STRAND);
Expand Down Expand Up @@ -2260,6 +2262,7 @@ void Parameters::setDefaults() {

// search workflow
numIterations = 1;
prefMode = PREF_MODE_KMER;
startSens = 4;
sensSteps = 1;
exhaustiveSearch = false;
Expand Down
7 changes: 7 additions & 0 deletions src/commons/Parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,11 @@ class Parameters {
static const int ID_MODE_KEYS = 0;
static const int ID_MODE_LOOKUP = 1;

// prefilter mode
static const int PREF_MODE_KMER = 0;
static const int PREF_MODE_UNGAPPED = 1;
static const int PREF_MODE_EXHAUSTIVE = 2;

// unpackdb
static const int UNPACK_NAME_KEY = 0;
static const int UNPACK_NAME_ACCESSION = 1;
Expand Down Expand Up @@ -450,6 +455,7 @@ class Parameters {

// SEARCH WORKFLOW
int numIterations;
int prefMode;
float startSens;
int sensSteps;
bool exhaustiveSearch;
Expand Down Expand Up @@ -872,6 +878,7 @@ class Parameters {
PARAMETER(PARAM_NUM_ITERATIONS)
PARAMETER(PARAM_START_SENS)
PARAMETER(PARAM_SENS_STEPS)
PARAMETER(PARAM_PREF_MODE)
PARAMETER(PARAM_EXHAUSTIVE_SEARCH)
PARAMETER(PARAM_EXHAUSTIVE_SEARCH_FILTER)
PARAMETER(PARAM_STRAND)
Expand Down
45 changes: 39 additions & 6 deletions src/workflow/Search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,8 @@ int search(int argc, const char **argv, const Command& command) {
}
}



const bool isUngappedMode = par.alignmentMode == Parameters::ALIGNMENT_MODE_UNGAPPED;
if (isUngappedMode && (searchMode & (Parameters::SEARCH_MODE_FLAG_QUERY_PROFILE |Parameters::SEARCH_MODE_FLAG_TARGET_PROFILE ))) {
par.printUsageMessage(command, MMseqsParameter::COMMAND_ALIGN | MMseqsParameter::COMMAND_PREFILTER);
Expand Down Expand Up @@ -316,6 +318,19 @@ int search(int argc, const char **argv, const Command& command) {
} else {
cmd.addVariable("ALIGN_MODULE", "align");
}

switch(par.prefMode){
case Parameters::PREF_MODE_KMER:
cmd.addVariable("PREFMODE", "KMER");
break;
case Parameters::PREF_MODE_UNGAPPED:
cmd.addVariable("PREFMODE", "UNGAPPED");
break;
case Parameters::PREF_MODE_EXHAUSTIVE:
cmd.addVariable("PREFMODE", "EXHAUSTIVE");
break;
}

cmd.addVariable("REMOVE_TMP", par.removeTmpFiles ? "TRUE" : NULL);
std::string program;
cmd.addVariable("RUNNER", par.runner.c_str());
Expand All @@ -334,7 +349,11 @@ int search(int argc, const char **argv, const Command& command) {
par.covMode = Util::swapCoverageMode(par.covMode);
size_t maxResListLen = par.maxResListLen;
par.maxResListLen = std::max((size_t)300, queryDbSize);
cmd.addVariable("PREFILTER_PAR", par.createParameterString(par.prefilter).c_str());
if(par.prefMode == Parameters::PREF_MODE_KMER){
cmd.addVariable("PREFILTER_PAR", par.createParameterString(par.prefilter).c_str());
} else if (par.prefMode == Parameters::PREF_MODE_UNGAPPED) {
cmd.addVariable("UNGAPPEDPREFILTER_PAR", par.createParameterString(par.ungappedprefilter).c_str());
}
par.maxResListLen = maxResListLen;
double originalEvalThr = par.evalThr;
par.evalThr = std::numeric_limits<double>::max();
Expand Down Expand Up @@ -385,8 +404,13 @@ int search(int argc, const char **argv, const Command& command) {
if (i == (par.numIterations - 1)) {
par.evalThr = originalEval;
}
cmd.addVariable(std::string("PREFILTER_PAR_" + SSTR(i)).c_str(),
par.createParameterString(par.prefilter).c_str());
if (par.prefMode == Parameters::PREF_MODE_KMER) {
cmd.addVariable(std::string("PREFILTER_PAR_" + SSTR(i)).c_str(),
par.createParameterString(par.prefilter).c_str());
} else if (par.prefMode == Parameters::PREF_MODE_UNGAPPED) {
cmd.addVariable(std::string("UNGAPPEDPREFILTER_PAR_" + SSTR(i)).c_str(),
par.createParameterString(par.ungappedprefilter).c_str());
}
if (isUngappedMode) {
par.rescoreMode = Parameters::RESCORE_MODE_ALIGNMENT;
cmd.addVariable(std::string("ALIGNMENT_PAR_" + SSTR(i)).c_str(),
Expand Down Expand Up @@ -439,8 +463,13 @@ int search(int argc, const char **argv, const Command& command) {
par.evalThr = originalEval;
}

cmd.addVariable(std::string("PREFILTER_PAR_" + SSTR(i)).c_str(),
par.createParameterString(par.prefilter).c_str());
if (par.prefMode == Parameters::PREF_MODE_KMER) {
cmd.addVariable(std::string("PREFILTER_PAR_" + SSTR(i)).c_str(),
par.createParameterString(par.prefilter).c_str());
} else if (par.prefMode == Parameters::PREF_MODE_UNGAPPED) {
cmd.addVariable(std::string("UNGAPPEDPREFILTER_PAR_" + SSTR(i)).c_str(),
par.createParameterString(par.ungappedprefilter).c_str());
}
if (isUngappedMode) {
par.rescoreMode = Parameters::RESCORE_MODE_ALIGNMENT;
cmd.addVariable(std::string("ALIGNMENT_PAR_" + SSTR(i)).c_str(),
Expand Down Expand Up @@ -487,7 +516,11 @@ int search(int argc, const char **argv, const Command& command) {
prefilterWithoutS.push_back(par.prefilter[i]);
}
}
cmd.addVariable("PREFILTER_PAR", par.createParameterString(prefilterWithoutS).c_str());
if (par.prefMode == Parameters::PREF_MODE_KMER) {
cmd.addVariable("PREFILTER_PAR", par.createParameterString(prefilterWithoutS).c_str());
} else if (par.prefMode == Parameters::PREF_MODE_UNGAPPED) {
cmd.addVariable("UNGAPPEDPREFILTER_PAR", par.createParameterString(par.ungappedprefilter).c_str());
}
if (isUngappedMode) {
par.rescoreMode = Parameters::RESCORE_MODE_ALIGNMENT;
cmd.addVariable("ALIGNMENT_PAR", par.createParameterString(par.rescorediagonal).c_str());
Expand Down

0 comments on commit 5e119e9

Please sign in to comment.