From 8fe3bf9bbec401bb43390e909b143adc660df74e Mon Sep 17 00:00:00 2001 From: Milot Mirdita Date: Wed, 2 Aug 2023 16:39:15 +0200 Subject: [PATCH] All combining various index subset modes for createindex --- src/commons/Parameters.cpp | 2 +- src/prefiltering/PrefilteringIndexReader.cpp | 5 +++-- src/util/indexdb.cpp | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/commons/Parameters.cpp b/src/commons/Parameters.cpp index 0acab122a..38ccb41ee 100644 --- a/src/commons/Parameters.cpp +++ b/src/commons/Parameters.cpp @@ -188,7 +188,7 @@ Parameters::Parameters(): // indexdb PARAM_CHECK_COMPATIBLE(PARAM_CHECK_COMPATIBLE_ID, "--check-compatible", "Check compatible", "0: Always recreate index, 1: Check if recreating index is needed, 2: Fail if index is incompatible", typeid(int), (void *) &checkCompatible, "^[0-2]{1}$", MMseqsParameter::COMMAND_MISC), PARAM_SEARCH_TYPE(PARAM_SEARCH_TYPE_ID, "--search-type", "Search type", "Search type 0: auto 1: amino acid, 2: translated, 3: nucleotide, 4: translated nucleotide alignment", typeid(int), (void *) &searchType, "^[0-4]{1}"), - PARAM_INDEX_SUBSET(PARAM_INDEX_SUBSET_ID, "--index-subset", "Index subset", "Create specialized index with subset of entries 0: normal index 1: index without headers 1: index without prefiltering data", typeid(int), (void *) &indexSubset, "^[0-2]{1}", MMseqsParameter::COMMAND_EXPERT), + PARAM_INDEX_SUBSET(PARAM_INDEX_SUBSET_ID, "--index-subset", "Index subset", "Create specialized index with subset of entries\n0: normal index\n1: index without headers\n2: index without prefiltering data\nFlags can be combined bit wise", typeid(int), (void *) &indexSubset, "^[0-3]{1}", MMseqsParameter::COMMAND_EXPERT), // createdb PARAM_USE_HEADER(PARAM_USE_HEADER_ID, "--use-fasta-header", "Use fasta header", "Use the id parsed from the fasta header as the index key instead of using incrementing numeric identifiers", typeid(bool), (void *) &useHeader, ""), PARAM_ID_OFFSET(PARAM_ID_OFFSET_ID, "--id-offset", "Offset of numeric ids", "Numeric ids in index file are offset by this value", typeid(int), (void *) &identifierOffset, "^(0|[1-9]{1}[0-9]*)$"), diff --git a/src/prefiltering/PrefilteringIndexReader.cpp b/src/prefiltering/PrefilteringIndexReader.cpp index 9182a3d03..e5df17dcc 100644 --- a/src/prefiltering/PrefilteringIndexReader.cpp +++ b/src/prefiltering/PrefilteringIndexReader.cpp @@ -190,13 +190,14 @@ void PrefilteringIndexReader::createIndexFile(const std::string &outDB, (Parameters::isEqualDbtype(seqType, Parameters::DBTYPE_NUCLEOTIDES) || Parameters::isEqualDbtype(seqType, Parameters::DBTYPE_AMINO_ACIDS)) ? alphabetSize -1: alphabetSize; - if (indexSubset == Parameters::INDEX_SUBSET_NO_PREFILTER) { + const bool noPrefilter = (indexSubset & Parameters::INDEX_SUBSET_NO_PREFILTER) != 0; + if (noPrefilter) { splits = 0; } ScoreMatrix s3; ScoreMatrix s2; - if (Parameters::isEqualDbtype(seqType, Parameters::DBTYPE_HMM_PROFILE) == false && indexSubset != Parameters::INDEX_SUBSET_NO_PREFILTER) { + if (Parameters::isEqualDbtype(seqType, Parameters::DBTYPE_HMM_PROFILE) == false && noPrefilter == false) { int alphabetSize = subMat->alphabetSize; subMat->alphabetSize = subMat->alphabetSize-1; s3 = ExtendedSubstitutionMatrix::calcScoreMatrix(*subMat, 3); diff --git a/src/util/indexdb.cpp b/src/util/indexdb.cpp index 1916b3fe9..906695445 100644 --- a/src/util/indexdb.cpp +++ b/src/util/indexdb.cpp @@ -135,15 +135,16 @@ int indexdb(int argc, const char **argv, const Command &command) { } } + const bool noHeaders = (par.indexSubset & Parameters::INDEX_SUBSET_NO_HEADERS) != 0; if (recreate) { DBReader *hdbr1 = NULL; - if (par.indexSubset != Parameters::INDEX_SUBSET_NO_HEADERS) { + if (noHeaders == false) { hdbr1 = new DBReader(hdr1.c_str(), hdr1Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); hdbr1->open(DBReader::NOSORT); } DBReader *hdbr2 = NULL; - if (sameDB == false && ppDB == false && par.indexSubset != Parameters::INDEX_SUBSET_NO_HEADERS) { + if (sameDB == false && ppDB == false && noHeaders == false) { hdbr2 = new DBReader(par.hdr2.c_str(), par.hdr2Index.c_str(), par.threads, DBReader::USE_INDEX | DBReader::USE_DATA); hdbr2->open(DBReader::NOSORT); }