diff --git a/command_line/image.cpp b/command_line/image.cpp index fd02c1e6..3563245d 100644 --- a/command_line/image.cpp +++ b/command_line/image.cpp @@ -123,6 +123,7 @@ int bandageImage(QStringList arguments) QString blastError = g_blastSearch->doAutoGraphSearch(*g_assemblyGraph, g_settings->blastQueryFilename, + false, /* include paths */ g_settings->blastSearchParameters); if (!blastError.isEmpty()) { err << blastError << Qt::endl; diff --git a/command_line/querypaths.cpp b/command_line/querypaths.cpp index e0ea90dd..653bca9a 100644 --- a/command_line/querypaths.cpp +++ b/command_line/querypaths.cpp @@ -128,6 +128,7 @@ int bandageQueryPaths(QStringList arguments) out << "(" << QDateTime::currentDateTime().toString("dd MMM yyyy hh:mm:ss") << ") Running BLAST search... " << Qt::flush; QString blastError = g_blastSearch->doAutoGraphSearch(*g_assemblyGraph, g_settings->blastQueryFilename, + false, /* include paths */ g_settings->blastSearchParameters); if (!blastError.isEmpty()) { err << Qt::endl << blastError << Qt::endl; diff --git a/command_line/reduce.cpp b/command_line/reduce.cpp index 3e5cc386..d36fe940 100644 --- a/command_line/reduce.cpp +++ b/command_line/reduce.cpp @@ -92,6 +92,7 @@ int bandageReduce(QStringList arguments) QString blastError = g_blastSearch->doAutoGraphSearch(*g_assemblyGraph, g_settings->blastQueryFilename, + false, /* include paths */ g_settings->blastSearchParameters); if (!blastError.isEmpty()) { err << blastError << Qt::endl; diff --git a/graph/annotationsmanager.cpp b/graph/annotationsmanager.cpp index a16a6a0f..674fe2c3 100644 --- a/graph/annotationsmanager.cpp +++ b/graph/annotationsmanager.cpp @@ -96,14 +96,14 @@ void AnnotationsManager::updateGroupFromHits(const QString &name, const std::vec auto &group = createAnnotationGroup(name); for (auto *query: queries) { for (const auto &hit: query->getHits()) { - auto &annotation = group.annotationMap[hit.m_node].emplace_back( + auto &annotation = group.annotationMap[hit->m_node].emplace_back( std::make_unique( - hit.m_nodeStart, - hit.m_nodeEnd, + hit->m_nodeStart, + hit->m_nodeEnd, query->getName().toStdString())); annotation->addView(std::make_unique(1.0, query->getColour())); - annotation->addView(std::make_unique(hit.queryStartFraction(), - hit.queryEndFraction())); + annotation->addView(std::make_unique(hit->queryStartFraction(), + hit->queryEndFraction())); } } diff --git a/graph/path.cpp b/graph/path.cpp index e1bb19dd..65512a87 100644 --- a/graph/path.cpp +++ b/graph/path.cpp @@ -157,7 +157,7 @@ Path Path::makeFromString(const QString& pathString, const AssemblyGraph &graph, return path; } - //Find which node names are and are not actually in the graph. + //Find which node names are and are not actually in the graph. std::vector nodesInGraph; QStringList nodesNotInGraph; for (auto & i : nodeNameList) { @@ -433,33 +433,32 @@ int Path::getLength() const { } -QString Path::getFasta() const { - //The description line is a comma-delimited list of the nodes in the path - QString fasta = ">" + getString(false); +QByteArray Path::getFasta(QString name) const { + // The description line is a comma-delimited list of the nodes in the path + QByteArray fasta = ">" + (name.isEmpty() ? getString(false).toLatin1() : name.toLatin1()); if (isCircular()) fasta += " (circular)"; fasta += "\n"; + fasta += utils::addNewlinesToSequence(getPathSequence()); - QString pathSequence = getPathSequence(); - int charactersOnLine = 0; - for (auto i : pathSequence) - { - fasta += i; - ++charactersOnLine; - if (charactersOnLine >= 70) - { - fasta += "\n"; - charactersOnLine = 0; - } - } + return fasta; +} + +QByteArray Path::getAAFasta(unsigned shift, QString name) const { + // The description line is a comma-delimited list of the nodes in the path + QByteArray fasta = ">" + (name.isEmpty() ? getString(false).toLatin1() : name.toLatin1()); + + if (isCircular()) + fasta += " (circular)"; + fasta += "/" + std::to_string(shift); fasta += "\n"; + fasta += utils::addNewlinesToSequence(getPathSequence()); return fasta; } - QString Path::getString(bool spaces) const { QString output; for (int i = 0; i < m_nodes.size(); ++i) { @@ -773,11 +772,11 @@ std::vector Path::getPosition(const DeBruijnNode *node) const { return res; } -// Checks whether [leftx, rightx) intersects [lefty, righty) +// Checks whether [leftx, rightx] intersects [lefty, righty] static bool intersects(int leftx, int rightx, int lefty, int righty) { - return (leftx <= lefty && lefty < rightx) || - (lefty <= leftx && leftx < righty); + return (leftx <= lefty && lefty <= rightx) || + (lefty <= leftx && leftx <= righty); } @@ -791,7 +790,7 @@ std::vector Path::getNodesAt(int startPosition, int endPosition) if (i > 0) pos -= m_edges[i-1]->getOverlap(); int nodeLen = m_nodes[i]->getLength(); - if (intersects(pos, pos+nodeLen, + if (intersects(pos, pos + nodeLen - 1, startPosition, endPosition)) res.push_back(m_nodes[i]); @@ -800,3 +799,43 @@ std::vector Path::getNodesAt(int startPosition, int endPosition) return res; } + +Path::MappingPath Path::getNodeCovering(int startPosition, int endPosition) const { + int pos = -m_startLocation.getPosition() + 1; // all UI positions are 1-based, convert to 0-based + + startPosition -= 1; endPosition -= 1; // all UI positions are 1-based, convert to 0-based + + MappingPath res; + for (size_t i = 0; i < m_nodes.size(); ++i) { + if (i > 0) + pos -= m_edges[i-1]->getOverlap(); + int nodeLen = m_nodes[i]->getLength(); + if (intersects(pos, pos + nodeLen - 1, + startPosition, endPosition)) { + MappingRange range; + // Determing the mapping range for a node + // Note that MappingRange is 1-based. + if (pos < startPosition) { + range.initial_range.from = startPosition + 1; + range.mapped_range.from = startPosition - pos + 1; + } else { + range.initial_range.from = pos + 1; + range.mapped_range.from = 1; + } + + if (pos + nodeLen - 1 > endPosition) { + range.initial_range.to = endPosition + 1; + range.mapped_range.to = endPosition - pos + 1; + } else { + range.initial_range.to = pos + nodeLen; + range.mapped_range.to = nodeLen; + } + + res.emplace_back(m_nodes[i], range); + } + + pos += nodeLen; + } + + return res; +} diff --git a/graph/path.h b/graph/path.h index a491c246..ce46325d 100644 --- a/graph/path.h +++ b/graph/path.h @@ -31,9 +31,22 @@ class DeBruijnNode; class DeBruijnEdge; class AssemblyGraph; -class Path -{ +class Path { public: + // [from, to] since UI does this + struct Range { + int from; int to; + }; + + struct MappingRange { + // on genome/contig/whatever + Range initial_range; + // on node + Range mapped_range; + }; + + using MappingPath = std::vector>; + //CREATORS Path() {} Path(GraphLocation location); @@ -55,7 +68,8 @@ class Path bool haveSameNodes(const Path& other) const; bool hasNodeSubset(const Path& other) const; [[nodiscard]] QByteArray getPathSequence() const; - [[nodiscard]] QString getFasta() const; + [[nodiscard]] QByteArray getFasta(QString name = "") const; + [[nodiscard]] QByteArray getAAFasta(unsigned shift, QString name = "") const; [[nodiscard]] QString getString(bool spaces) const; int getLength() const; QList extendPathInAllPossibleWays() const; @@ -64,6 +78,8 @@ class Path std::vector getPosition(const DeBruijnNode *node) const; std::vector getNodesAt(int startPosition, int endPosition) const; + MappingPath getNodeCovering(int startPosition, int endPosition) const; + bool containsNode(const DeBruijnNode * node) const; bool containsEntireNode(const DeBruijnNode * node) const; bool isInMiddleOfPath(const DeBruijnNode * node) const; diff --git a/graphsearch/blast/blastsearch.cpp b/graphsearch/blast/blastsearch.cpp index 7c66b91e..14e91ac0 100644 --- a/graphsearch/blast/blastsearch.cpp +++ b/graphsearch/blast/blastsearch.cpp @@ -52,9 +52,9 @@ bool BlastSearch::findTools() { return true; } -QString BlastSearch::buildDatabase(const AssemblyGraph &graph) { +QString BlastSearch::buildDatabase(const AssemblyGraph &graph, bool includePaths) { DbBuildFinishedRAII watcher(this); - + m_lastError = ""; if (!findTools()) return m_lastError; @@ -68,14 +68,24 @@ QString BlastSearch::buildDatabase(const AssemblyGraph &graph) { if (!file.open(QIODevice::WriteOnly | QIODevice::Text)) return (m_lastError = "Failed to open: " + file.fileName()); - QTextStream out(&file); - for (const auto *node : graph.m_deBruijnGraphNodes) { - if (m_cancelBuildDatabase) - return (m_lastError = "Build cancelled."); + { + QTextStream out(&file); + for (const auto *node : graph.m_deBruijnGraphNodes) { + if (m_cancelBuildDatabase) + return (m_lastError = "Build cancelled."); + + out << node->getFasta(true, false, false); + } - out << node->getFasta(true, false, false); + if (includePaths) { + for (auto it = graph.m_deBruijnGraphPaths.begin(); it != graph.m_deBruijnGraphPaths.end(); ++it) { + if (m_cancelBuildDatabase) + return (m_lastError = "Build cancelled."); + + out << it.value()->getFasta(it.key().c_str()); + } + } } - file.close(); // Make sure the graph has sequences bool atLeastOneSequence = false; @@ -178,11 +188,13 @@ QString BlastSearch::runOneBlastSearch(QuerySequenceType sequenceType, return blastOutput; } -static void buildHitsFromBlastOutput(QString blastOutput, Queries &queries); +static std::pair +buildHitsFromBlastOutput(QString blastOutput, + Queries &queries); QString BlastSearch::doSearch(Queries &queries, QString extraParameters) { GraphSearchFinishedRAII watcher(this); - + m_lastError = ""; if (!findTools()) return m_lastError; @@ -211,8 +223,10 @@ QString BlastSearch::doSearch(Queries &queries, QString extraParameters) { return (m_lastError = "BLAST search cancelled"); // If the code got here, then the search completed successfully. - buildHitsFromBlastOutput(blastOutput, queries); + auto [nodeHits, pathHits] = buildHitsFromBlastOutput(blastOutput, queries); + queries.addNodeHits(nodeHits); queries.findQueryPaths(); + queries.addPathHits(pathHits); queries.searchOccurred(); m_lastError = ""; @@ -222,10 +236,11 @@ QString BlastSearch::doSearch(Queries &queries, QString extraParameters) { //This function carries out the entire BLAST search procedure automatically, without user input. //It returns an error string which is empty if all goes well. QString BlastSearch::doAutoGraphSearch(const AssemblyGraph &graph, QString queriesFilename, + bool includePaths, QString extraParameters) { cleanUp(); - QString maybeError = buildDatabase(graph); // It is expected that buildDatabase will setup last error as well + QString maybeError = buildDatabase(graph, includePaths); // It is expected that buildDatabase will setup last error as well if (!maybeError.isEmpty()) return maybeError; @@ -315,8 +330,11 @@ static QString getNodeNameFromString(const QString &nodeString) { // BLAST search) to construct the Hit objects. // It looks at the filters to possibly exclude hits which fail to meet user- // defined thresholds. -static void buildHitsFromBlastOutput(QString blastOutput, - Queries &queries) { +static std::pair +buildHitsFromBlastOutput(QString blastOutput, + Queries &queries) { + NodeHits nodeHits; PathHits pathHits; + QStringList blastHitList = blastOutput.split("\n", Qt::SkipEmptyParts); for (const auto &hitString : blastHitList) { @@ -338,27 +356,11 @@ static void buildHitsFromBlastOutput(QString blastOutput, SciNot eValue(alignmentParts[10]); double bitScore = alignmentParts[11].toDouble(); - //Only save BLAST hits that are on forward strands. - if (nodeStart > nodeEnd) - continue; - - QString nodeName = getNodeNameFromString(nodeLabel); - DeBruijnNode *node = nullptr; - auto it = g_assemblyGraph->m_deBruijnGraphNodes.find(nodeName.toStdString()); - if (it == g_assemblyGraph->m_deBruijnGraphNodes.end()) - continue; - - node = it.value(); - Query *query = queries.getQueryFromName(queryName); if (query == nullptr) continue; // Check the user-defined filters. - if (g_settings->blastAlignmentLengthFilter.on && - alignmentLength < g_settings->blastAlignmentLengthFilter) - continue; - if (g_settings->blastIdentityFilter.on && percentIdentity < g_settings->blastIdentityFilter) continue; @@ -371,16 +373,39 @@ static void buildHitsFromBlastOutput(QString blastOutput, bitScore < g_settings->blastBitScoreFilter) continue; - Hit hit(query, node, percentIdentity, alignmentLength, - numberMismatches, numberGapOpens, queryStart, queryEnd, - nodeStart, nodeEnd, eValue, bitScore); + if (g_settings->blastAlignmentLengthFilter.on && + alignmentLength < g_settings->blastAlignmentLengthFilter) + continue; if (g_settings->blastQueryCoverageFilter.on) { - double hitCoveragePercentage = 100.0 * hit.getQueryCoverageFraction(); + double hitCoveragePercentage = 100.0 * Hit::getQueryCoverageFraction(query, + queryStart, queryEnd); if (hitCoveragePercentage < g_settings->blastQueryCoverageFilter) continue; } - query->addHit(hit); + auto nodeIt = g_assemblyGraph->m_deBruijnGraphNodes.find(getNodeNameFromString(nodeLabel).toStdString()); + if (nodeIt != g_assemblyGraph->m_deBruijnGraphNodes.end()) { + // Only save BLAST hits that are on forward strands. + if (nodeStart > nodeEnd) + continue; + + nodeHits.emplace_back(query, + new Hit(query, nodeIt.value(), + percentIdentity, alignmentLength, + numberMismatches, numberGapOpens, + queryStart, queryEnd, + nodeStart, nodeEnd, eValue, bitScore)); + } + + auto pathIt = g_assemblyGraph->m_deBruijnGraphPaths.find(nodeLabel.toStdString()); + if (pathIt != g_assemblyGraph->m_deBruijnGraphPaths.end()) { + pathHits.emplace_back(query, pathIt.value(), + Path::MappingRange{queryStart, queryEnd, + nodeStart, nodeEnd}); + } + } + + return { nodeHits, pathHits }; } diff --git a/graphsearch/blast/blastsearch.h b/graphsearch/blast/blastsearch.h index a25279ee..d9928ebf 100644 --- a/graphsearch/blast/blastsearch.h +++ b/graphsearch/blast/blastsearch.h @@ -29,6 +29,9 @@ class QProcess; +namespace search { +class Queries; + class BlastSearch : public search::GraphSearch { Q_OBJECT public: @@ -37,9 +40,11 @@ class BlastSearch : public search::GraphSearch { QString doAutoGraphSearch(const AssemblyGraph &graph, QString queriesFilename, + bool includePaths = false, QString extraParameters = "") override; int loadQueriesFromFile(QString fullFileName) override; - QString buildDatabase(const AssemblyGraph &graph) override; + QString buildDatabase(const AssemblyGraph &graph, + bool includePaths = true) override; QString doSearch(QString extraParameters) override; QString doSearch(search::Queries &queries, QString extraParameters) override; @@ -63,3 +68,5 @@ public slots: QProcess *m_buildDb = nullptr, *m_doSearch = nullptr; QString m_makeblastdbCommand, m_blastnCommand, m_tblastnCommand; }; + +} diff --git a/graphsearch/graphsearch.cpp b/graphsearch/graphsearch.cpp index b0ce9655..d5f20ce9 100644 --- a/graphsearch/graphsearch.cpp +++ b/graphsearch/graphsearch.cpp @@ -17,6 +17,8 @@ #include "graphsearch.h" #include "graph/annotationsmanager.h" + +#include "graph/assemblygraph.h" #include "program/globals.h" #include @@ -131,3 +133,30 @@ GraphSearch::DbBuildFinishedRAII::~DbBuildFinishedRAII() { GraphSearch::GraphSearchFinishedRAII::~GraphSearchFinishedRAII() { emit m_search->finishedSearch(m_search->lastError()); } + + +void GraphSearch::addPathHit(Query *query, Path *path, + int queryStart, int queryEnd, + int pathStart, int pathEnd) { + bool invert = pathStart > pathEnd; + if (invert) + std::swap(pathStart, pathEnd); + + for (auto entry: path->getNodeCovering(pathStart, pathEnd)) { + DeBruijnNode *node = entry.first; + int nodeStart = entry.second.mapped_range.from, nodeEnd = entry.second.mapped_range.to; + if (invert) { + int nodeLength = node->getLength(); + node = node->getReverseComplement(); + nodeEnd = nodeLength - nodeEnd + 1; + nodeStart = nodeLength - nodeStart + 1; + std::swap(nodeEnd, nodeStart); + } + + query->emplaceHit(query, node, -1, nodeEnd - nodeStart + 1, + -1, -1, + queryStart, queryEnd, + nodeStart, nodeEnd, + 0, 0); + } +} diff --git a/graphsearch/graphsearch.h b/graphsearch/graphsearch.h index 4d17e46d..80a7dcac 100644 --- a/graphsearch/graphsearch.h +++ b/graphsearch/graphsearch.h @@ -17,7 +17,7 @@ #pragma once -#include "graphsearch/queries.h" +#include "queries.h" #include #include @@ -82,10 +82,11 @@ class GraphSearch : public QObject { void emptyTempDirectory() const; virtual int loadQueriesFromFile(QString fullFileName) = 0; - virtual QString buildDatabase(const AssemblyGraph &graph) = 0; + virtual QString buildDatabase(const AssemblyGraph &graph, bool includePaths = true) = 0; virtual QString doSearch(QString extraParameters) = 0; virtual QString doSearch(search::Queries &queries, QString extraParameters) = 0; virtual QString doAutoGraphSearch(const AssemblyGraph &graph, QString queriesFilename, + bool includePaths = false, QString extraParameters = "") = 0; [[nodiscard]] virtual QString name() const = 0; [[nodiscard]] virtual QString queryFormat() const = 0; @@ -95,6 +96,11 @@ class GraphSearch : public QObject { static std::unique_ptr get(GraphSearchKind kind, const QDir &workDir = QDir::temp(), QObject *parent = nullptr); +protected: + static void addPathHit(Query *query, Path *path, + int queryStart, int queryEnd, + int pathStart, int pathEnd); + public slots: virtual void cancelDatabaseBuild() {}; virtual void cancelSearch() {}; diff --git a/graphsearch/hit.cpp b/graphsearch/hit.cpp index 421997ca..9479e856 100644 --- a/graphsearch/hit.cpp +++ b/graphsearch/hit.cpp @@ -53,9 +53,10 @@ double Hit::queryEndFraction() const { return double(m_queryEnd - 1) / m_query->getLength(); } -double Hit::getQueryCoverageFraction() const { - int queryRegionSize = m_queryEnd - m_queryStart + 1; - int queryLength = m_query->getLength(); +double Hit::getQueryCoverageFraction(Query *query, + int queryStart, int queryEnd) { + int queryRegionSize = queryEnd - queryStart + 1; + int queryLength = query->getLength(); if (queryLength == 0) return 0.0; @@ -64,6 +65,12 @@ double Hit::getQueryCoverageFraction() const { } +double Hit::getQueryCoverageFraction() const { + return getQueryCoverageFraction(m_query, + m_queryStart, m_queryEnd); +} + + GraphLocation Hit::getHitStart() const { return {m_node, m_nodeStart}; } @@ -75,4 +82,4 @@ GraphLocation Hit::getHitEnd() const { //This function returns the node sequence for this hit. QByteArray Hit::getNodeSequence() const { return utils::sequenceToQByteArray(m_node->getSequence().Subseq(m_nodeStart-1, m_nodeEnd)); -} \ No newline at end of file +} diff --git a/graphsearch/hit.h b/graphsearch/hit.h index 1d34cd1b..a3220ac6 100644 --- a/graphsearch/hit.h +++ b/graphsearch/hit.h @@ -51,6 +51,8 @@ namespace search { double m_bitScore; double getQueryCoverageFraction() const; + static double getQueryCoverageFraction(Query *query, + int queryStart, int queryEnd); GraphLocation getHitStart() const; GraphLocation getHitEnd() const; @@ -63,4 +65,4 @@ namespace search { double queryStartFraction() const; double queryEndFraction() const; }; -} \ No newline at end of file +} diff --git a/graphsearch/hits.h b/graphsearch/hits.h new file mode 100644 index 00000000..08e9c1b9 --- /dev/null +++ b/graphsearch/hits.h @@ -0,0 +1,30 @@ +// Copyright 2022 Anton Korobeynikov + +// This file is part of Bandage + +// Bandage is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Bandage is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Bandage. If not, see . + +#pragma once + +#include "graph/path.h" +#include +#include + +namespace search { +class Query; +class Hit; + +using NodeHits = std::vector>; +using PathHits = std::vector>; +} diff --git a/graphsearch/hmmer/hmmersearch.cpp b/graphsearch/hmmer/hmmersearch.cpp index c58e45f5..914beca8 100644 --- a/graphsearch/hmmer/hmmersearch.cpp +++ b/graphsearch/hmmer/hmmersearch.cpp @@ -20,6 +20,7 @@ #include "graph/debruijnnode.h" #include "graphsearch/query.h" +#include "program/globals.h" #include "program/settings.h" #include "graph/assemblygraph.h" @@ -52,7 +53,7 @@ bool HmmerSearch::findTools() { return true; } -QString HmmerSearch::buildDatabase(const AssemblyGraph &graph) { +QString HmmerSearch::buildDatabase(const AssemblyGraph &graph, bool includePaths) { DbBuildFinishedRAII watcher(this); m_lastError = ""; @@ -93,6 +94,15 @@ QString HmmerSearch::buildDatabase(const AssemblyGraph &graph) { out << node->getFasta(true, false, false); } + + if (includePaths) { + for (auto it = graph.m_deBruijnGraphPaths.begin(); it != graph.m_deBruijnGraphPaths.end(); ++it) { + if (m_cancelBuildDatabase) + return (m_lastError = "Build cancelled."); + + out << it.value()->getFasta(it.key().c_str()); + } + } } // No need to perform empty checks for AAs as they all are handled above @@ -158,10 +168,10 @@ static QString getNodeNameFromString(const QString &nodeString) { return nodeName; } -static void buildHitsFromTblOut(QString hmmerOutput, - Queries &queries); -static void buildHitsFromDomTblOut(QString hmmerOutput, - Queries &queries); +static std::pair +buildHitsFromTblOut(QString hmmerOutput, Queries &queries); +static std::pair +buildHitsFromDomTblOut(QString hmmerOutput, Queries &queries); QString HmmerSearch::doSearch(Queries &queries, QString extraParameters) { GraphSearchFinishedRAII watcher(this); @@ -169,21 +179,27 @@ QString HmmerSearch::doSearch(Queries &queries, QString extraParameters) { if (!findTools()) return m_lastError; + NodeHits nnodeHits; PathHits npathHits; if (queries.getQueryCount(NUCLEOTIDE) > 0 && !m_cancelSearch) { QString hmmerOutput = doOneSearch(NUCLEOTIDE, queries, extraParameters); if (!m_lastError.isEmpty()) return m_lastError; - buildHitsFromTblOut(hmmerOutput, queries); + std::tie(nnodeHits, npathHits) = buildHitsFromTblOut(hmmerOutput, queries); + queries.addNodeHits(nnodeHits); } + NodeHits pnodeHits; PathHits ppathHits; if (queries.getQueryCount(PROTEIN) > 0 && !m_cancelSearch) { QString hmmerOutput = doOneSearch(PROTEIN, queries, extraParameters); if (!m_lastError.isEmpty()) return m_lastError; - buildHitsFromDomTblOut(hmmerOutput, queries); + std::tie(pnodeHits, ppathHits) = buildHitsFromDomTblOut(hmmerOutput, queries); + queries.addNodeHits(pnodeHits); } queries.findQueryPaths(); + queries.addPathHits(npathHits); + queries.addPathHits(ppathHits); queries.searchOccurred(); return m_lastError; @@ -254,10 +270,11 @@ QString HmmerSearch::doOneSearch(search::QuerySequenceType sequenceType, } QString HmmerSearch::doAutoGraphSearch(const AssemblyGraph &graph, QString queriesFilename, - QString extraParameters) { + bool includePaths, + QString extraParameters) { cleanUp(); - QString maybeError = buildDatabase(graph); // It is expected that buildDatabase will setup last error as well + QString maybeError = buildDatabase(graph, includePaths); // It is expected that buildDatabase will setup last error as well if (!maybeError.isEmpty()) return maybeError; @@ -316,11 +333,12 @@ void HmmerSearch::cancelSearch() { m_doSearch->kill(); } -static void buildHitsFromTblOut(QString hmmerOutput, - Queries &queries) { - QStringList hmmerHitList = hmmerOutput.split("\n", Qt::SkipEmptyParts); +static std::pair +buildHitsFromTblOut(QString hmmerOutput, + Queries &queries) { + NodeHits nodeHits; PathHits pathHits; - for (const auto &hitString : hmmerHitList) { + for (const auto &hitString : hmmerOutput.split('\n', Qt::SkipEmptyParts)) { if (hitString.startsWith('#')) continue; @@ -331,10 +349,6 @@ static void buildHitsFromTblOut(QString hmmerOutput, QString nodeLabel = alignmentParts[0]; QString queryName = alignmentParts[2]; - double percentIdentity = -1; - int numberMismatches = -1; - int numberGapOpens = -1; - int queryStart = alignmentParts[4].toInt(); int queryEnd = alignmentParts[5].toInt(); @@ -346,27 +360,11 @@ static void buildHitsFromTblOut(QString hmmerOutput, SciNot eValue(alignmentParts[12]); double bitScore = alignmentParts[13].toDouble(); - // Only save hits that are on forward strands. - if (nodeStart > nodeEnd) - continue; - - QString nodeName = getNodeNameFromString(nodeLabel); - DeBruijnNode *node = nullptr; - auto it = g_assemblyGraph->m_deBruijnGraphNodes.find(nodeName.toStdString()); - if (it == g_assemblyGraph->m_deBruijnGraphNodes.end()) - continue; - - node = it.value(); - Query *query = queries.getQueryFromName(queryName); if (query == nullptr) continue; // Check the user-defined filters. - if (g_settings->blastAlignmentLengthFilter.on && - alignmentLength < g_settings->blastAlignmentLengthFilter) - continue; - if (g_settings->blastEValueFilter.on && eValue > g_settings->blastEValueFilter) continue; @@ -375,25 +373,49 @@ static void buildHitsFromTblOut(QString hmmerOutput, bitScore < g_settings->blastBitScoreFilter) continue; - Hit hit(query, node, percentIdentity, alignmentLength, - numberMismatches, numberGapOpens, queryStart, queryEnd, - nodeStart, nodeEnd, eValue, bitScore); + if (g_settings->blastAlignmentLengthFilter.on && + alignmentLength < g_settings->blastAlignmentLengthFilter) + continue; if (g_settings->blastQueryCoverageFilter.on) { - double hitCoveragePercentage = 100.0 * hit.getQueryCoverageFraction(); + double hitCoveragePercentage = 100.0 * Hit::getQueryCoverageFraction(query, + queryStart, queryEnd); if (hitCoveragePercentage < g_settings->blastQueryCoverageFilter) continue; } - query->addHit(hit); + auto nodeIt = g_assemblyGraph->m_deBruijnGraphNodes.find(getNodeNameFromString(nodeLabel).toStdString()); + if (nodeIt != g_assemblyGraph->m_deBruijnGraphNodes.end()) { + // Only save hits that are on forward strands. + if (nodeStart > nodeEnd) + continue; + + nodeHits.emplace_back(query, + new Hit(query, nodeIt.value(), + -1, alignmentLength, + -1, -1, + queryStart, queryEnd, + nodeStart, nodeEnd, + eValue, bitScore)); + } + + auto pathIt = g_assemblyGraph->m_deBruijnGraphPaths.find(nodeLabel.toStdString()); + if (pathIt != g_assemblyGraph->m_deBruijnGraphPaths.end()) { + pathHits.emplace_back(query, pathIt.value(), + Path::MappingRange{queryStart, queryEnd, + nodeStart, nodeEnd}); + } } + + return { nodeHits, pathHits }; } -static void buildHitsFromDomTblOut(QString hmmerOutput, - Queries &queries) { - QStringList hmmerHitList = hmmerOutput.split("\n", Qt::SkipEmptyParts); +static std::pair +buildHitsFromDomTblOut(QString hmmerOutput, + Queries &queries) { + NodeHits nodeHits; PathHits pathHits; - for (const auto &hitString : hmmerHitList) { + for (const auto &hitString : hmmerOutput.split("\n", Qt::SkipEmptyParts)) { if (hitString.startsWith('#')) continue; @@ -419,35 +441,11 @@ static void buildHitsFromDomTblOut(QString hmmerOutput, SciNot eValue(alignmentParts[6]); double bitScore = alignmentParts[7].toDouble(); - // Only save hits that are on forward strands. - if (nodeStart > nodeEnd) - continue; - - QString nodeName = getNodeNameFromString(nodeLabel); - DeBruijnNode *node = nullptr; - auto it = g_assemblyGraph->m_deBruijnGraphNodes.find(nodeName.toStdString()); - if (it == g_assemblyGraph->m_deBruijnGraphNodes.end()) - continue; - - node = it.value(); - bool ok = false; - unsigned shift = nodeLabel.last(1).toInt(&ok); - if (!ok || shift > 2) - continue; - - nodeStart = (nodeStart - 1) * 3 + shift + 1; - nodeEnd = (nodeEnd - 1) * 3 + shift + 1; - - Query *query = queries.getQueryFromName(queryName); if (query == nullptr) continue; // Check the user-defined filters. - if (g_settings->blastAlignmentLengthFilter.on && - alignmentLength < g_settings->blastAlignmentLengthFilter) - continue; - if (g_settings->blastEValueFilter.on && eValue > g_settings->blastEValueFilter) continue; @@ -456,16 +454,40 @@ static void buildHitsFromDomTblOut(QString hmmerOutput, bitScore < g_settings->blastBitScoreFilter) continue; - Hit hit(query, node, percentIdentity, alignmentLength, - numberMismatches, numberGapOpens, queryStart, queryEnd, - nodeStart, nodeEnd, eValue, bitScore); + if (g_settings->blastAlignmentLengthFilter.on && + alignmentLength < g_settings->blastAlignmentLengthFilter) + continue; if (g_settings->blastQueryCoverageFilter.on) { - double hitCoveragePercentage = 100.0 * hit.getQueryCoverageFraction(); + double hitCoveragePercentage = 100.0 * Hit::getQueryCoverageFraction(query, + queryStart, queryEnd); if (hitCoveragePercentage < g_settings->blastQueryCoverageFilter) continue; } - query->addHit(hit); + auto nodeIt = g_assemblyGraph->m_deBruijnGraphNodes.find(getNodeNameFromString(nodeLabel).toStdString()); + if (nodeIt != g_assemblyGraph->m_deBruijnGraphNodes.end()) { + // Only save hits that are on forward strands. + if (nodeStart > nodeEnd) + continue; + + bool ok = false; + unsigned shift = nodeLabel.last(1).toInt(&ok); + if (!ok || shift > 2) + continue; + + nodeStart = (nodeStart - 1) * 3 + shift + 1; + nodeEnd = (nodeEnd - 1) * 3 + shift + 1; + + nodeHits.emplace_back(query, + new Hit(query, nodeIt.value(), + -1, alignmentLength, + -1, -1, + queryStart, queryEnd, + nodeStart, nodeEnd, + eValue, bitScore)); + } } + + return { nodeHits, pathHits }; } diff --git a/graphsearch/hmmer/hmmersearch.h b/graphsearch/hmmer/hmmersearch.h index 07b38300..8e30fec6 100644 --- a/graphsearch/hmmer/hmmersearch.h +++ b/graphsearch/hmmer/hmmersearch.h @@ -24,7 +24,11 @@ class QProcess; -class HmmerSearch : public search::GraphSearch { +namespace search { + +class Queries; + +class HmmerSearch : public GraphSearch { Q_OBJECT public: explicit HmmerSearch(const QDir &workDir = QDir::temp(), QObject *parent = nullptr); @@ -32,9 +36,11 @@ class HmmerSearch : public search::GraphSearch { QString doAutoGraphSearch(const AssemblyGraph &graph, QString queriesFilename, + bool includePaths = false, QString extraParameters = "") override; int loadQueriesFromFile(QString fullFileName) override; - QString buildDatabase(const AssemblyGraph &graph) override; + QString buildDatabase(const AssemblyGraph &graph, + bool includePaths = true) override; QString doSearch(QString extraParameters) override; QString doSearch(search::Queries &queries, QString extraParameters) override; @@ -58,3 +64,5 @@ public slots: QProcess *m_buildDb = nullptr, *m_doSearch = nullptr; QString m_nhmmerCommand, m_hmmerCommand; }; + +} diff --git a/graphsearch/minimap2/minimap2search.cpp b/graphsearch/minimap2/minimap2search.cpp index 6553c50e..16ec3688 100644 --- a/graphsearch/minimap2/minimap2search.cpp +++ b/graphsearch/minimap2/minimap2search.cpp @@ -18,6 +18,7 @@ #include "minimap2search.h" +#include "graphsearch/graphsearch.h" #include "program/settings.h" #include "graph/assemblygraph.h" @@ -42,7 +43,7 @@ bool Minimap2Search::findTools() { return true; } -QString Minimap2Search::buildDatabase(const AssemblyGraph &graph) { +QString Minimap2Search::buildDatabase(const AssemblyGraph &graph, bool includePaths) { DbBuildFinishedRAII watcher(this); m_lastError = ""; if (!findTools()) @@ -57,14 +58,24 @@ QString Minimap2Search::buildDatabase(const AssemblyGraph &graph) { if (!file.open(QIODevice::WriteOnly | QIODevice::Text)) return (m_lastError = "Failed to open: " + file.fileName()); - QTextStream out(&file); - for (const auto *node : graph.m_deBruijnGraphNodes) { - if (m_cancelBuildDatabase) - return (m_lastError = "Build cancelled."); + { + QTextStream out(&file); + for (const auto *node : graph.m_deBruijnGraphNodes) { + if (m_cancelBuildDatabase) + return (m_lastError = "Build cancelled."); + + out << node->getFasta(true, false, false); + } + + if (includePaths) { + for (auto it = graph.m_deBruijnGraphPaths.begin(); it != graph.m_deBruijnGraphPaths.end(); ++it) { + if (m_cancelBuildDatabase) + return (m_lastError = "Build cancelled."); - out << node->getFasta(true, false, false); + out << it.value()->getFasta(it.key().c_str()); + } + } } - file.close(); // Make sure the graph has sequences bool atLeastOneSequence = false; @@ -78,26 +89,6 @@ QString Minimap2Search::buildDatabase(const AssemblyGraph &graph) { if (!atLeastOneSequence) return (m_lastError = "Cannot build the Minimap2 database as this graph contains no sequences"); - QStringList minimap2Options; - minimap2Options << "-d" << temporaryDir().filePath("all_nodes.idx") - << temporaryDir().filePath("all_nodes.fasta"); - - m_buildDb = new QProcess(); - m_buildDb->start(m_minimap2Command, minimap2Options); - - bool finished = m_buildDb->waitForFinished(-1); - if (m_buildDb->exitCode() != 0 || !finished) { - m_lastError = "There was a problem building minimap2 database"; - QString stdErr = m_buildDb->readAllStandardError(); - m_lastError += stdErr.isEmpty() ? "." : ":\n\n" + stdErr; - } else if (m_cancelBuildDatabase) - m_lastError = "Build cancelled."; - else - m_lastError = ""; - - m_buildDb->deleteLater(); - m_buildDb = nullptr; - return m_lastError; } @@ -141,11 +132,12 @@ static QString getNodeNameFromString(const QString &nodeString) { return nodeName; } -static void buildHitsFromPAF(const QString &PAF, - Queries &queries) { - QStringList hitList = PAF.split("\n", Qt::SkipEmptyParts); +static std::pair +buildHitsFromPAF(const QString &PAF, + Queries &queries) { + NodeHits nodeHits; PathHits pathHits; - for (const auto &hitString : hitList) { + for (const auto &hitString : PAF.split("\n", Qt::SkipEmptyParts)) { QStringList alignmentParts = hitString.split('\t'); if (alignmentParts.size() < 12) continue; @@ -161,50 +153,49 @@ static void buildHitsFromPAF(const QString &PAF, int alignmentLength = alignmentParts[10].toInt(); - double percentIdentity = -1; - int numberMismatches = -1; - int numberGapOpens = -1; - - SciNot eValue(0); - double bitScore = 0; - - if (!strand) - continue; - - QString nodeName = getNodeNameFromString(nodeLabel); - DeBruijnNode *node = nullptr; - auto it = g_assemblyGraph->m_deBruijnGraphNodes.find(nodeName.toStdString()); - if (it == g_assemblyGraph->m_deBruijnGraphNodes.end()) - continue; - - node = it.value(); - Query *query = queries.getQueryFromName(queryName); if (query == nullptr) continue; - // Check the user-defined filters. if (g_settings->blastAlignmentLengthFilter.on && alignmentLength < g_settings->blastAlignmentLengthFilter) continue; - Hit hit(query, node, percentIdentity, alignmentLength, - numberMismatches, numberGapOpens, queryStart, queryEnd, - nodeStart, nodeEnd, eValue, bitScore); - if (g_settings->blastQueryCoverageFilter.on) { - double hitCoveragePercentage = 100.0 * hit.getQueryCoverageFraction(); + double hitCoveragePercentage = 100.0 * Hit::getQueryCoverageFraction(query, + queryStart, queryEnd); if (hitCoveragePercentage < g_settings->blastQueryCoverageFilter) continue; } - query->addHit(hit); + auto nodeIt = g_assemblyGraph->m_deBruijnGraphNodes.find(getNodeNameFromString(nodeLabel).toStdString()); + if (nodeIt != g_assemblyGraph->m_deBruijnGraphNodes.end()) { + if (!strand) + continue; + + nodeHits.emplace_back(query, + new Hit(query, nodeIt.value(), + -1, alignmentLength, + -1, -1, + queryStart, queryEnd, + nodeStart, nodeEnd, 0, 0)); + } + + auto pathIt = g_assemblyGraph->m_deBruijnGraphPaths.find(nodeLabel.toStdString()); + if (pathIt != g_assemblyGraph->m_deBruijnGraphPaths.end()) { + pathHits.emplace_back(query, pathIt.value(), + Path::MappingRange{queryStart, queryEnd, + nodeStart, nodeEnd}); + } + } + + return { nodeHits, pathHits }; } QString Minimap2Search::doSearch(Queries &queries, QString extraParameters) { GraphSearchFinishedRAII watcher(this); - + m_lastError = ""; if (!findTools()) return m_lastError; @@ -226,7 +217,7 @@ QString Minimap2Search::doSearch(Queries &queries, QString extraParameters) { QStringList minimap2Options; minimap2Options << extraParameters.split(" ", Qt::SkipEmptyParts) - << temporaryDir().filePath("all_nodes.idx") + << temporaryDir().filePath("all_nodes.fasta") << tmpFile.fileName(); m_cancelSearch = false; @@ -257,8 +248,10 @@ QString Minimap2Search::doSearch(Queries &queries, QString extraParameters) { if (m_cancelSearch) return (m_lastError = "Minimap2 search cancelled"); - buildHitsFromPAF(minimap2Output, queries); + auto [nodeHits, pathHits] = buildHitsFromPAF(minimap2Output, queries); + queries.addNodeHits(nodeHits); queries.findQueryPaths(); + queries.addPathHits(pathHits); queries.searchOccurred(); m_lastError = ""; @@ -267,10 +260,11 @@ QString Minimap2Search::doSearch(Queries &queries, QString extraParameters) { } QString Minimap2Search::doAutoGraphSearch(const AssemblyGraph &graph, QString queriesFilename, + bool includePaths, QString extraParameters) { cleanUp(); - QString maybeError = buildDatabase(graph); // It is expected that buildDatabase will setup last error as well + QString maybeError = buildDatabase(graph, includePaths); // It is expected that buildDatabase will setup last error as well if (!maybeError.isEmpty()) return maybeError; diff --git a/graphsearch/minimap2/minimap2search.h b/graphsearch/minimap2/minimap2search.h index 9c6f2414..f4c54882 100644 --- a/graphsearch/minimap2/minimap2search.h +++ b/graphsearch/minimap2/minimap2search.h @@ -24,6 +24,10 @@ class QProcess; +namespace search { + +class Queries; + class Minimap2Search : public search::GraphSearch { Q_OBJECT public: @@ -32,9 +36,11 @@ class Minimap2Search : public search::GraphSearch { QString doAutoGraphSearch(const AssemblyGraph &graph, QString queriesFilename, + bool includePaths = false, QString extraParameters = "") override; int loadQueriesFromFile(QString fullFileName) override; - QString buildDatabase(const AssemblyGraph &graph) override; + QString buildDatabase(const AssemblyGraph &graph, + bool includePaths = true) override; QString doSearch(QString extraParameters) override; QString doSearch(search::Queries &queries, QString extraParameters) override; @@ -54,3 +60,5 @@ public slots: QProcess *m_buildDb = nullptr, *m_doSearch = nullptr; QString m_minimap2Command; }; + +} diff --git a/graphsearch/queries.cpp b/graphsearch/queries.cpp index f37bba62..7f163557 100644 --- a/graphsearch/queries.cpp +++ b/graphsearch/queries.cpp @@ -17,6 +17,9 @@ #include "queries.h" +#include "hits.h" + +#include "graph/debruijnnode.h" #include "program/globals.h" #include "program/settings.h" @@ -149,12 +152,12 @@ void Queries::findQueryPaths() { query->findQueryPaths(); } -std::vector Queries::allHits() const { - std::vector res; +Query::Hits Queries::allHits() const { + Query::Hits res; for (const auto *query : m_queries) { - const auto &hits = query->getHits(); - res.insert(res.end(), hits.begin(), hits.end()); + for (const auto &hit : query->getHits()) + res.push_back(hit.get()); } return res; @@ -171,14 +174,57 @@ std::vector Queries::getNodesFromHits(const QString& queryName) // Add pointers to nodes that have a hit for the selected target(s). for (auto *currentQuery: m_queries) { for (const auto &hit: currentQuery->getHits()) - returnVector.push_back(hit.m_node); + returnVector.push_back(hit->m_node); } } else { if (Query *query = getQueryFromName(queryName)) { for (const auto &hit: query->getHits()) - returnVector.push_back(hit.m_node); + returnVector.push_back(hit->m_node); } } return returnVector; } + +void Queries::addNodeHits(const NodeHits &nodeHits) { + // Simply glue hits to queries. Now query owns hit. + for (auto &entry : nodeHits) + entry.first->addHit(entry.second); +} + +void Queries::addPathHits(const PathHits &hits) { + for (const auto &hit : hits) { + Query *query; + const Path *path; + Path::MappingRange range; + std::tie(query, path, range) = hit; + std::vector pathHits; + + int pathStart = range.mapped_range.from, pathEnd = range.mapped_range.to; + int queryStart = range.initial_range.from, queryEnd = range.initial_range.to; + + bool invert = pathStart > pathEnd; + if (invert) + std::swap(pathStart, pathEnd); + + for (auto entry: path->getNodeCovering(pathStart, pathEnd)) { + DeBruijnNode *node = entry.first; + int nodeStart = entry.second.mapped_range.from, nodeEnd = entry.second.mapped_range.to; + if (invert) { + int nodeLength = node->getLength(); + node = node->getReverseComplement(); + nodeEnd = nodeLength - nodeEnd + 1; + nodeStart = nodeLength - nodeStart + 1; + std::swap(nodeEnd, nodeStart); + } + + pathHits.push_back(query->emplaceHit(query, node, -1, nodeEnd - nodeStart + 1, + -1, -1, + queryStart, queryEnd, + nodeStart, nodeEnd, + 0, 0)); + } + + query->emplaceQueryPath(*path, query, pathHits); + } +} diff --git a/graphsearch/queries.h b/graphsearch/queries.h index b5dcb284..e0f8b84d 100644 --- a/graphsearch/queries.h +++ b/graphsearch/queries.h @@ -19,6 +19,7 @@ #pragma once #include "query.h" +#include "hits.h" #include namespace search { @@ -51,13 +52,15 @@ class Queries { size_t getQueryPathCount() const ; size_t getQueryCount(QuerySequenceType sequenceType) const; bool isQueryPresent(const Query * query) const; - std::vector allHits() const; + Query::Hits allHits() const; const auto &queries() const { return m_queries; } auto &queries() { return m_queries; } Query *query(size_t idx) { return m_queries[idx]; } const Query *query(size_t idx) const { return m_queries[idx]; } std::vector getNodesFromHits(const QString& queryName = "") const; + void addNodeHits(const NodeHits &hits); + void addPathHits(const PathHits &hits); void findQueryPaths(); private: QString getUniqueName(QString name); @@ -67,4 +70,4 @@ class Queries { std::vector m_presetColours; }; -} \ No newline at end of file +} diff --git a/graphsearch/query.cpp b/graphsearch/query.cpp index bddaf1af..36e50718 100644 --- a/graphsearch/query.cpp +++ b/graphsearch/query.cpp @@ -88,24 +88,24 @@ void Query::findQueryPaths() { if (m_sequenceType == PROTEIN) queryLength *= 3; - //Find all possible path starts within an acceptable distance from the query - //start. - std::vector possibleStarts; + // Find all possible path starts within an acceptable distance from the query + // start. + Hits possibleStarts; double acceptableStartFraction = 1.0 - g_settings->minQueryCoveredByPath; - for (auto &hit : m_hits) { - if (hit.queryStartFraction() <= acceptableStartFraction) - possibleStarts.push_back(&hit); + for (const auto &hit : m_hits) { + if (hit->queryStartFraction() <= acceptableStartFraction) + possibleStarts.push_back(hit.get()); } - //Find all possible path ends. + // Find all possible path ends. std::vector possibleEnds; double acceptableEndFraction = g_settings->minQueryCoveredByPath; - for (auto &hit : m_hits) { - if (hit.queryEndFraction() >= acceptableEndFraction) - possibleEnds.push_back(&hit); + for (const auto &hit : m_hits) { + if (hit->queryEndFraction() >= acceptableEndFraction) + possibleEnds.push_back(hit.get()); } - //For each possible start, find paths to each possible end. + // For each possible start, find paths to each possible end. QList possiblePaths; for (auto start : possibleStarts) { GraphLocation startLocation = start->getHitStart(); @@ -213,7 +213,7 @@ void Query::findQueryPaths() { //If a list of BLAST hits is passed to the function, it only looks in those //hits. If no such list is passed, it looks in all hits for this query. // http://stackoverflow.com/questions/5276686/merging-ranges-in-c -double Query::fractionCoveredByHits(const std::vector &hitsToCheck) const { +double Query::fractionCoveredByHits(const Hits &hitsToCheck) const { int hitBases = 0; int queryLength = getLength(); if (queryLength == 0) @@ -222,7 +222,7 @@ double Query::fractionCoveredByHits(const std::vector &hitsToCheck) std::vector > ranges; if (hitsToCheck.empty()) { for (const auto &hit : m_hits) - ranges.emplace_back(hit.m_queryStart - 1, hit.m_queryEnd); + ranges.emplace_back(hit->m_queryStart - 1, hit->m_queryEnd); } else { for (auto *hit : hitsToCheck) { ranges.emplace_back(hit->m_queryStart - 1, hit->m_queryEnd); diff --git a/graphsearch/query.h b/graphsearch/query.h index 2580133e..39269ad6 100644 --- a/graphsearch/query.h +++ b/graphsearch/query.h @@ -19,10 +19,11 @@ #pragma once #include "querypath.h" -#include "graphsearch/hit.h" +#include "hit.h" #include #include +#include #include namespace search { @@ -33,6 +34,8 @@ namespace search { class Query { public: + using Hits = std::vector; + //CREATORS Query(QString name, QString sequence, QByteArray aux = QByteArray()); @@ -62,12 +65,22 @@ namespace search { // MODIFIERS void setName(QString newName) { m_name = std::move(newName); } - void addHit(Hit newHit) { m_hits.emplace_back(newHit); } + template + const Hit* emplaceHit(Args&&... args) { + m_hits.emplace_back(new Hit(std::forward(args)...)); + return m_hits.back().get(); + } + void addHit(Hit *newHit) { m_hits.emplace_back(newHit); } void clearSearchResults(); void setAsSearchedFor() { m_searchedFor = true; } void findQueryPaths(); + void addQueryPath(QueryPath path) { m_paths.emplace_back(path); } + template + void emplaceQueryPath(Args&... args) { + m_paths.emplace_back(std::forward(args)...); + } void setColour(QColor newColour) { m_colour = newColour; } void setShown(bool newShown) { m_shown = newShown; } @@ -75,7 +88,7 @@ namespace search { QString m_name; QString m_sequence; QByteArray m_aux; - std::vector m_hits; + std::vector> m_hits; QuerySequenceType m_sequenceType; bool m_searchedFor = false; bool m_shown = true; diff --git a/graphsearch/querypath.cpp b/graphsearch/querypath.cpp index f42958b5..0373e372 100644 --- a/graphsearch/querypath.cpp +++ b/graphsearch/querypath.cpp @@ -29,21 +29,21 @@ using namespace search; -QueryPath::QueryPath(Path path, Query *query) : - m_path(std::move(path)), m_query(query) { +QueryPath::QueryPath(Path path, Query *query) + : m_path(std::move(path)), m_query(query) { //This function follows the path, returning the BLAST hits it finds for the //query. It requires that the hits occur in order, i.e. that each hit in //the path begins later in the query than the previous hit. - const Hit * previousHit = nullptr; + const Hit *previousHit = nullptr; const auto &pathNodes = m_path.nodes(); for (int i = 0; i < pathNodes.size(); ++i) { DeBruijnNode * node = pathNodes[i]; - std::vector hitsThisNode; + Query::Hits hitsThisNode; for (auto &queryHit : query->getHits()) { - if (queryHit.m_node == node) - hitsThisNode.push_back(&queryHit); + if (queryHit->m_node == node) + hitsThisNode.push_back(queryHit.get()); } std::sort(hitsThisNode.begin(), hitsThisNode.end(), @@ -68,6 +68,9 @@ QueryPath::QueryPath(Path path, Query *query) : } } +QueryPath::QueryPath(Path path, Query *query, std::vector hits) + : m_path(std::move(path)), m_query(query), m_hits(std::move(hits)) {} + double QueryPath::getMeanHitPercIdentity() const { int totalHitLength = 0; double sum = 0.0; @@ -83,9 +86,6 @@ double QueryPath::getMeanHitPercIdentity() const { return totalHitLength == 0 ? 0.0 : sum / totalHitLength; } - - - //This function looks at all of the hits in the path for this query and //multiplies the e-values together. If the hits overlap each other, then //this function reduces the e-values accoringly (effectively to prevent @@ -124,8 +124,7 @@ SciNot QueryPath::getEvalueProduct() const { } -int QueryPath::getHitOverlap(const Hit * hit1, const Hit * hit2) const -{ +int QueryPath::getHitOverlap(const Hit * hit1, const Hit * hit2) const { int hit1Start, hit1End, hit2Start, hit2End; QPair possibleEdge(hit1->m_node, hit2->m_node); diff --git a/graphsearch/querypath.h b/graphsearch/querypath.h index 8042d475..7bd719b9 100644 --- a/graphsearch/querypath.h +++ b/graphsearch/querypath.h @@ -18,7 +18,7 @@ #pragma once -#include "graphsearch/hit.h" +#include "hit.h" #include "graph/path.h" #include "program/scinot.h" @@ -28,12 +28,12 @@ namespace search { class QueryPath { public: - //CREATORS + // CREATORS QueryPath(Path path, Query *query); + QueryPath(Path path, Query *query, std::vector hits); //ACCESSORS - Path getPath() const { return m_path; } - + const Path &getPath() const { return m_path; } const auto &getHits() const { return m_hits; } SciNot getEvalueProduct() const; @@ -51,9 +51,9 @@ namespace search { private: Path m_path; Query *m_query; - std::vector m_hits; + std::vector m_hits; int getHitQueryLength() const; int getHitOverlap(const Hit *hit1, const Hit *hit2) const; }; -} \ No newline at end of file +} diff --git a/program/globals.cpp b/program/globals.cpp index 6567c863..46ecae1f 100644 --- a/program/globals.cpp +++ b/program/globals.cpp @@ -24,7 +24,7 @@ QSharedPointer g_settings; QSharedPointer g_memory; BandageGraphicsView * g_graphicsView; double g_absoluteZoom; -QSharedPointer g_blastSearch; +QSharedPointer g_blastSearch; QSharedPointer g_assemblyGraph; std::shared_ptr g_annotationsManager; @@ -82,4 +82,4 @@ QString formatDepthForDisplay(double depth) { decimals += 1; } return formatDoubleForDisplay(depth, decimals) + "x"; -} \ No newline at end of file +} diff --git a/program/globals.h b/program/globals.h index 7732421f..b15600b2 100644 --- a/program/globals.h +++ b/program/globals.h @@ -27,10 +27,13 @@ class Settings; class Memory; class BandageGraphicsView; -class BlastSearch; class AssemblyGraph; class AnnotationsManager; +namespace search { +class BlastSearch; +}; + // Some of the program's common components are made global, so they don't have // to be passed around as parameters. // FIXME: This is bad. Factor this out when possible @@ -38,7 +41,7 @@ extern QSharedPointer g_settings; extern QSharedPointer g_memory; extern BandageGraphicsView * g_graphicsView; extern double g_absoluteZoom; -extern QSharedPointer g_blastSearch; +extern QSharedPointer g_blastSearch; extern QSharedPointer g_assemblyGraph; extern std::shared_ptr g_annotationsManager; diff --git a/program/main.cpp b/program/main.cpp index 8666e959..d3a8335b 100644 --- a/program/main.cpp +++ b/program/main.cpp @@ -111,7 +111,7 @@ int main(int argc, char *argv[]) //Create the important global objects. g_settings.reset(new Settings()); g_memory.reset(new Memory()); - g_blastSearch.reset(new BlastSearch()); + g_blastSearch.reset(new search::BlastSearch()); g_assemblyGraph.reset(new AssemblyGraph()); g_graphicsView = new BandageGraphicsView(); g_annotationsManager = std::make_shared(); diff --git a/tests/bandagetests.cpp b/tests/bandagetests.cpp index 69267f03..3e7a02c5 100644 --- a/tests/bandagetests.cpp +++ b/tests/bandagetests.cpp @@ -89,7 +89,7 @@ private slots: void init() { g_settings.reset(new Settings()); g_memory.reset(new Memory()); - g_blastSearch.reset(new BlastSearch(QDir("."))); + g_blastSearch.reset(new search::BlastSearch(QDir("."))); g_assemblyGraph.reset(new AssemblyGraph()); g_annotationsManager = std::make_shared(); } @@ -548,19 +548,19 @@ void BandageTests::blastSearch() const auto &one_insertionHit = one_insertion->getHits().at(0); const auto &one_deletionHit = one_deletion->getHits().at(0); - QCOMPARE(exactHit.m_numberMismatches, 0); - QCOMPARE(exactHit.m_numberGapOpens, 0); - QCOMPARE(one_mismatchHit.m_numberMismatches, 1); - QCOMPARE(one_mismatchHit.m_numberGapOpens, 0); - QCOMPARE(one_insertionHit.m_numberMismatches, 0); - QCOMPARE(one_insertionHit.m_numberGapOpens, 1); - QCOMPARE(one_deletionHit.m_numberMismatches, 0); - QCOMPARE(one_deletionHit.m_numberGapOpens, 1); - - QCOMPARE(exactHit.m_percentIdentity < 100.0, false); - QCOMPARE(one_mismatchHit.m_percentIdentity < 100.0, true); - QCOMPARE(one_insertionHit.m_percentIdentity < 100.0, true); - QCOMPARE(one_deletionHit.m_percentIdentity < 100.0, true); + QCOMPARE(exactHit->m_numberMismatches, 0); + QCOMPARE(exactHit->m_numberGapOpens, 0); + QCOMPARE(one_mismatchHit->m_numberMismatches, 1); + QCOMPARE(one_mismatchHit->m_numberGapOpens, 0); + QCOMPARE(one_insertionHit->m_numberMismatches, 0); + QCOMPARE(one_insertionHit->m_numberGapOpens, 1); + QCOMPARE(one_deletionHit->m_numberMismatches, 0); + QCOMPARE(one_deletionHit->m_numberGapOpens, 1); + + QCOMPARE(exactHit->m_percentIdentity < 100.0, false); + QCOMPARE(one_mismatchHit->m_percentIdentity < 100.0, true); + QCOMPARE(one_insertionHit->m_percentIdentity < 100.0, true); + QCOMPARE(one_deletionHit->m_percentIdentity < 100.0, true); } diff --git a/ui/dialogs/graphsearchdialog.cpp b/ui/dialogs/graphsearchdialog.cpp index a648e672..33372c96 100644 --- a/ui/dialogs/graphsearchdialog.cpp +++ b/ui/dialogs/graphsearchdialog.cpp @@ -821,7 +821,7 @@ QVariant HitsListModel::data(const QModelIndex &index, int role) const { auto column = HitsColumns(index.column()); const auto &hit = m_hits[index.row()]; - const auto &hitQuery = *hit.m_query; + const auto &hitQuery = *hit->m_query; if (role == Qt::BackgroundRole) { if (hitQuery.isHidden()) // Hide disabled queries @@ -839,35 +839,35 @@ QVariant HitsListModel::data(const QModelIndex &index, int role) const { case HitsColumns::QueryName: return hitQuery.getName(); case HitsColumns::NodeName: - return hit.m_node->getName(); + return hit->m_node->getName(); case HitsColumns::PercentIdentity: - if (hit.m_percentIdentity > 0) - return formatDoubleForDisplay(hit.m_percentIdentity, 2) + "%"; + if (hit->m_percentIdentity > 0) + return formatDoubleForDisplay(hit->m_percentIdentity, 2) + "%"; return "N/A"; case HitsColumns::AlignmentLength: - return hit.m_alignmentLength; + return hit->m_alignmentLength; case HitsColumns::QueryCover: - return formatDoubleForDisplay(100.0 * hit.getQueryCoverageFraction(), 2) + "%"; + return formatDoubleForDisplay(100.0 * hit->getQueryCoverageFraction(), 2) + "%"; case HitsColumns::Mismatches: - if (hit.m_numberMismatches < 0) + if (hit->m_numberMismatches < 0) return "N/A"; - return hit.m_numberMismatches; + return hit->m_numberMismatches; case HitsColumns::GapOpens: - if (hit.m_numberGapOpens < 0) + if (hit->m_numberGapOpens < 0) return "N/A"; - return hit.m_numberGapOpens; + return hit->m_numberGapOpens; case HitsColumns::QueryStart: - return hit.m_queryStart; + return hit->m_queryStart; case HitsColumns::QueryEnd: - return hit.m_queryEnd; + return hit->m_queryEnd; case HitsColumns::NodeStart: - return hit.m_nodeStart; + return hit->m_nodeStart; case HitsColumns::NodeEnd: - return hit.m_nodeEnd; + return hit->m_nodeEnd; case HitsColumns::Evalue: - return hit.m_eValue.asString(false); + return hit->m_eValue.asString(false); case HitsColumns::BitScore: - return hit.m_bitScore; + return hit->m_bitScore; } return {}; diff --git a/ui/dialogs/graphsearchdialog.h b/ui/dialogs/graphsearchdialog.h index c9811827..5faecfd5 100644 --- a/ui/dialogs/graphsearchdialog.h +++ b/ui/dialogs/graphsearchdialog.h @@ -19,6 +19,8 @@ #pragma once +#include "graphsearch/query.h" + #include #include #include @@ -30,8 +32,6 @@ namespace search { class GraphSearch; } -using Hits = std::vector; - namespace Ui { class GraphSearchDialog; } @@ -100,7 +100,7 @@ class HitsListModel : public QAbstractTableModel { void endUpdate() { endResetModel(); } bool empty() const { return m_hits.empty(); } - Hits m_hits; + search::Query::Hits m_hits; }; class GraphSearchDialog : public QDialog { @@ -147,4 +147,4 @@ private slots: signals: void changed(); void queryPathSelectionChanged(); -}; \ No newline at end of file +};