From 3310337471fc46880c245508af6a23adcb192cee Mon Sep 17 00:00:00 2001 From: Jaebeom Kim <68528165+jaebeom-kim@users.noreply.github.com> Date: Mon, 2 Dec 2024 16:27:35 +0900 Subject: [PATCH] Add parameter for taxonomy report in easy-search (#389) --- data/easystructuresearch.sh | 6 ++++++ src/commons/LocalParameters.cpp | 1 + src/workflow/EasyStructureSearch.cpp | 10 ++++++++++ 3 files changed, 17 insertions(+) diff --git a/data/easystructuresearch.sh b/data/easystructuresearch.sh index ba482f01..30a06e7a 100644 --- a/data/easystructuresearch.sh +++ b/data/easystructuresearch.sh @@ -51,6 +51,12 @@ if notExists "${TMP_PATH}/alis.dbtype"; then || fail "Convert Alignments died" fi +if [ -n "${TAXONOMY}" ]; then + # shellcheck disable=SC2086 + "$MMSEQS" taxonomyreport "${TARGET}${INDEXEXT}" "${INTERMEDIATE}" "${RESULTS}_report" ${TAXONOMYREPORT_PAR} \ + || fail "taxonomyreport died" +fi + if [ -n "${REMOVE_TMP}" ]; then if [ -n "${GREEDY_BEST_HITS}" ]; then # shellcheck disable=SC2086 diff --git a/src/commons/LocalParameters.cpp b/src/commons/LocalParameters.cpp index 645ae0b2..cf85e356 100644 --- a/src/commons/LocalParameters.cpp +++ b/src/commons/LocalParameters.cpp @@ -158,6 +158,7 @@ LocalParameters::LocalParameters() : easystructuresearchworkflow = combineList(structuresearchworkflow, structurecreatedb); easystructuresearchworkflow = combineList(easystructuresearchworkflow, convertalignments); + easystructuresearchworkflow = combineList(easystructuresearchworkflow, taxonomyreport); easystructuresearchworkflow.push_back(&PARAM_GREEDY_BEST_HITS); structureclusterworkflow = combineList(prefilter, structurealign); diff --git a/src/workflow/EasyStructureSearch.cpp b/src/workflow/EasyStructureSearch.cpp index c59cc578..49b69b19 100644 --- a/src/workflow/EasyStructureSearch.cpp +++ b/src/workflow/EasyStructureSearch.cpp @@ -20,6 +20,7 @@ void setEasyStructureSearchDefaults(Parameters *p) { p->gapExtend = 1; p->alignmentMode = Parameters::ALIGNMENT_MODE_SCORE_COV_SEQID; p->removeTmpFiles = true; + p->reportMode = 2; } void setEasyStructureSearchMustPassAlong(Parameters *p) { p->PARAM_K.wasSet = true; @@ -50,6 +51,8 @@ int easystructuresearch(int argc, const char **argv, const Command &command) { par.PARAM_THREADS.removeCategory(MMseqsParameter::COMMAND_EXPERT); par.PARAM_V.removeCategory(MMseqsParameter::COMMAND_EXPERT); + par.overrideParameterDescription(par.PARAM_REPORT_MODE, "Taxonomy report mode 0: Kraken 1: Krona 2: Skip taxonomy report", "^[0-2]{1}$", 0); + setEasyStructureSearchDefaults(&par); par.parseParameters(argc, argv, command, true, Parameters::PARSE_VARIADIC, 0); setEasyStructureSearchMustPassAlong(&par); @@ -69,6 +72,10 @@ int easystructuresearch(int argc, const char **argv, const Command &command) { bool needLDDT = false; LocalParameters::getOutputFormat(par.formatAlignmentMode, par.outfmt, needSequenceDB, needBacktrace, needFullHeaders, needLookup, needSource, needTaxonomyMapping, needTaxonomy, needQCA, needTCA, needTMalign, needLDDT); + if (par.reportMode != 2) { + needTaxonomy = true; + needTaxonomyMapping = true; + } } if (par.formatAlignmentMode == Parameters::FORMAT_ALIGNMENT_SAM || @@ -129,6 +136,9 @@ int easystructuresearch(int argc, const char **argv, const Command &command) { cmd.addVariable("CONVERT_PAR", par.createParameterString(par.convertalignments).c_str()); cmd.addVariable("SUMMARIZE_PAR", par.createParameterString(par.summarizeresult).c_str()); + cmd.addVariable("TAXONOMY", needTaxonomy && needTaxonomyMapping && par.reportMode != 2 ? "TRUE" : NULL); + cmd.addVariable("TAXONOMYREPORT_PAR", par.createParameterString(par.taxonomyreport).c_str()); + std::string program = tmpDir + "/easystructuresearch.sh"; FileUtil::writeFile(program, easystructuresearch_sh, easystructuresearch_sh_len); cmd.execProgram(program.c_str(), par.filenames);