From 5edc79bc35f04bfebbe4f7d0a5dd746b48a5aab6 Mon Sep 17 00:00:00 2001 From: Dohyun Kim <62280486+Dohyun-s@users.noreply.github.com> Date: Tue, 31 Oct 2023 15:26:08 +0900 Subject: [PATCH] Add ppos format-output to convertalis for count of positive substitution scores (#723) Co-authored-by: dohyun-s --- src/commons/Parameters.cpp | 3 ++- src/commons/Parameters.h | 1 + src/util/convertalignments.cpp | 30 ++++++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/commons/Parameters.cpp b/src/commons/Parameters.cpp index 8a925693a..bab4330f8 100644 --- a/src/commons/Parameters.cpp +++ b/src/commons/Parameters.cpp @@ -102,7 +102,7 @@ Parameters::Parameters(): PARAM_V(PARAM_V_ID, "-v", "Verbosity", "Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info", typeid(int), (void *) &verbosity, "^[0-3]{1}$", MMseqsParameter::COMMAND_COMMON), // convertalignments PARAM_FORMAT_MODE(PARAM_FORMAT_MODE_ID, "--format-mode", "Alignment format", "Output format:\n0: BLAST-TAB\n1: SAM\n2: BLAST-TAB + query/db length\n3: Pretty HTML\n4: BLAST-TAB + column headers\nBLAST-TAB (0) and BLAST-TAB + column headers (4) support custom output formats (--format-output)", typeid(int), (void *) &formatAlignmentMode, "^[0-4]{1}$"), - PARAM_FORMAT_OUTPUT(PARAM_FORMAT_OUTPUT_ID, "--format-output", "Format alignment output", "Choose comma separated list of output columns from: query,target,evalue,gapopen,pident,fident,nident,qstart,qend,qlen\ntstart,tend,tlen,alnlen,raw,bits,cigar,qseq,tseq,qheader,theader,qaln,taln,qframe,tframe,mismatch,qcov,tcov\nqset,qsetid,tset,tsetid,taxid,taxname,taxlineage,qorfstart,qorfend,torfstart,torfend", typeid(std::string), (void *) &outfmt, ""), + PARAM_FORMAT_OUTPUT(PARAM_FORMAT_OUTPUT_ID, "--format-output", "Format alignment output", "Choose comma separated list of output columns from: query,target,evalue,gapopen,pident,fident,nident,qstart,qend,qlen\ntstart,tend,tlen,alnlen,raw,bits,cigar,qseq,tseq,qheader,theader,qaln,taln,qframe,tframe,mismatch,qcov,tcov\nqset,qsetid,tset,tsetid,taxid,taxname,taxlineage,qorfstart,qorfend,torfstart,torfend,ppos", typeid(std::string), (void *) &outfmt, ""), PARAM_DB_OUTPUT(PARAM_DB_OUTPUT_ID, "--db-output", "Database output", "Return a result DB instead of a text file", typeid(bool), (void *) &dbOut, "", MMseqsParameter::COMMAND_EXPERT), // --include-only-extendablediagonal PARAM_RESCORE_MODE(PARAM_RESCORE_MODE_ID, "--rescore-mode", "Rescore mode", "Rescore diagonals with:\n0: Hamming distance\n1: local alignment (score only)\n2: local alignment\n3: global alignment\n4: longest alignment fulfilling window quality criterion", typeid(int), (void *) &rescoreMode, "^[0-4]{1}$"), @@ -2828,6 +2828,7 @@ std::vector Parameters::getOutputFormat(int formatMode, const std::string & else if (outformatSplit[i].compare("qorfend") == 0){ code = Parameters::OUTFMT_QORFEND;} else if (outformatSplit[i].compare("torfstart") == 0){ code = Parameters::OUTFMT_TORFSTART;} else if (outformatSplit[i].compare("torfend") == 0){ code = Parameters::OUTFMT_TORFEND;} + else if (outformatSplit[i].compare("ppos") == 0){ needSequences = true; needBacktrace = true; code = Parameters::OUTFMT_PPOS;} else if (outformatSplit[i].compare("empty") == 0){ code = Parameters::OUTFMT_EMPTY;} else { Debug(Debug::ERROR) << "Format code " << outformatSplit[i] << " does not exist."; diff --git a/src/commons/Parameters.h b/src/commons/Parameters.h index 131ac5861..4f9e742cd 100644 --- a/src/commons/Parameters.h +++ b/src/commons/Parameters.h @@ -182,6 +182,7 @@ class Parameters { static const int OUTFMT_TORFSTART = 37; static const int OUTFMT_TORFEND = 38; static const int OUTFMT_FIDENT = 39; + static const int OUTFMT_PPOS = 40; static const int INDEX_SUBSET_NORMAL = 0; static const int INDEX_SUBSET_NO_HEADERS = 1; diff --git a/src/util/convertalignments.cpp b/src/util/convertalignments.cpp index f0d889309..ffa542544 100644 --- a/src/util/convertalignments.cpp +++ b/src/util/convertalignments.cpp @@ -636,6 +636,36 @@ int convertalignments(int argc, const char **argv, const Command &command) { case Parameters::OUTFMT_TORFEND: result.append(SSTR(res.dbOrfEndPos)); break; + case Parameters::OUTFMT_PPOS: { + float pPositive = 0; + int matchCount = 0; + if (res.backtrace.empty() == false) { + int qPos = 0; + int tPos = 0; + for (size_t pos = 0; pos < res.backtrace.size(); pos++) { + switch (res.backtrace[pos]) { + case 'M': { + char qRes = queryProfile ? queryProfData[qPos] : querySeqData[qPos]; + char tRes = targetProfile ? targetProfData[tPos] : targetSeqData[tPos]; + pPositive += (subMat->subMatrix[subMat->aa2num[(int)qRes]][subMat->aa2num[(int)tRes]] > 0); + matchCount += 1; + qPos++; + tPos++; + break; + } + case 'D': + qPos++; + break; + case 'I': + tPos++; + break; + } + } + pPositive /= matchCount; + } + result.append(SSTR(pPositive)); + break; + } } if (i < outcodes.size() - 1) { result.push_back('\t');