Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Align to the paths, if present #114

Merged
merged 6 commits into from
Aug 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions command_line/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ int bandageImage(QStringList arguments)

QString blastError = g_blastSearch->doAutoGraphSearch(*g_assemblyGraph,
g_settings->blastQueryFilename,
false, /* include paths */
g_settings->blastSearchParameters);
if (!blastError.isEmpty()) {
err << blastError << Qt::endl;
Expand Down
1 change: 1 addition & 0 deletions command_line/querypaths.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ int bandageQueryPaths(QStringList arguments)
out << "(" << QDateTime::currentDateTime().toString("dd MMM yyyy hh:mm:ss") << ") Running BLAST search... " << Qt::flush;
QString blastError = g_blastSearch->doAutoGraphSearch(*g_assemblyGraph,
g_settings->blastQueryFilename,
false, /* include paths */
g_settings->blastSearchParameters);
if (!blastError.isEmpty()) {
err << Qt::endl << blastError << Qt::endl;
Expand Down
1 change: 1 addition & 0 deletions command_line/reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ int bandageReduce(QStringList arguments)

QString blastError = g_blastSearch->doAutoGraphSearch(*g_assemblyGraph,
g_settings->blastQueryFilename,
false, /* include paths */
g_settings->blastSearchParameters);
if (!blastError.isEmpty()) {
err << blastError << Qt::endl;
Expand Down
10 changes: 5 additions & 5 deletions graph/annotationsmanager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,14 @@ void AnnotationsManager::updateGroupFromHits(const QString &name, const std::vec
auto &group = createAnnotationGroup(name);
for (auto *query: queries) {
for (const auto &hit: query->getHits()) {
auto &annotation = group.annotationMap[hit.m_node].emplace_back(
auto &annotation = group.annotationMap[hit->m_node].emplace_back(
std::make_unique<Annotation>(
hit.m_nodeStart,
hit.m_nodeEnd,
hit->m_nodeStart,
hit->m_nodeEnd,
query->getName().toStdString()));
annotation->addView(std::make_unique<SolidView>(1.0, query->getColour()));
annotation->addView(std::make_unique<RainbowBlastHitView>(hit.queryStartFraction(),
hit.queryEndFraction()));
annotation->addView(std::make_unique<RainbowBlastHitView>(hit->queryStartFraction(),
hit->queryEndFraction()));
}
}

Expand Down
81 changes: 60 additions & 21 deletions graph/path.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ Path Path::makeFromString(const QString& pathString, const AssemblyGraph &graph,
return path;
}

//Find which node names are and are not actually in the graph.
//Find which node names are and are not actually in the graph.
std::vector<DeBruijnNode *> nodesInGraph;
QStringList nodesNotInGraph;
for (auto & i : nodeNameList) {
Expand Down Expand Up @@ -433,33 +433,32 @@ int Path::getLength() const {
}


QString Path::getFasta() const {
//The description line is a comma-delimited list of the nodes in the path
QString fasta = ">" + getString(false);
QByteArray Path::getFasta(QString name) const {
// The description line is a comma-delimited list of the nodes in the path
QByteArray fasta = ">" + (name.isEmpty() ? getString(false).toLatin1() : name.toLatin1());

if (isCircular())
fasta += " (circular)";
fasta += "\n";
fasta += utils::addNewlinesToSequence(getPathSequence());

QString pathSequence = getPathSequence();
int charactersOnLine = 0;
for (auto i : pathSequence)
{
fasta += i;
++charactersOnLine;
if (charactersOnLine >= 70)
{
fasta += "\n";
charactersOnLine = 0;
}
}
return fasta;
}

QByteArray Path::getAAFasta(unsigned shift, QString name) const {
// The description line is a comma-delimited list of the nodes in the path
QByteArray fasta = ">" + (name.isEmpty() ? getString(false).toLatin1() : name.toLatin1());

if (isCircular())
fasta += " (circular)";
fasta += "/" + std::to_string(shift);
fasta += "\n";
fasta += utils::addNewlinesToSequence(getPathSequence());

return fasta;
}



QString Path::getString(bool spaces) const {
QString output;
for (int i = 0; i < m_nodes.size(); ++i) {
Expand Down Expand Up @@ -773,11 +772,11 @@ std::vector<int> Path::getPosition(const DeBruijnNode *node) const {
return res;
}

// Checks whether [leftx, rightx) intersects [lefty, righty)
// Checks whether [leftx, rightx] intersects [lefty, righty]
static bool intersects(int leftx, int rightx,
int lefty, int righty) {
return (leftx <= lefty && lefty < rightx) ||
(lefty <= leftx && leftx < righty);
return (leftx <= lefty && lefty <= rightx) ||
(lefty <= leftx && leftx <= righty);

}

Expand All @@ -791,7 +790,7 @@ std::vector<DeBruijnNode *> Path::getNodesAt(int startPosition, int endPosition)
if (i > 0)
pos -= m_edges[i-1]->getOverlap();
int nodeLen = m_nodes[i]->getLength();
if (intersects(pos, pos+nodeLen,
if (intersects(pos, pos + nodeLen - 1,
startPosition, endPosition))
res.push_back(m_nodes[i]);

Expand All @@ -800,3 +799,43 @@ std::vector<DeBruijnNode *> Path::getNodesAt(int startPosition, int endPosition)

return res;
}

Path::MappingPath Path::getNodeCovering(int startPosition, int endPosition) const {
int pos = -m_startLocation.getPosition() + 1; // all UI positions are 1-based, convert to 0-based

startPosition -= 1; endPosition -= 1; // all UI positions are 1-based, convert to 0-based

MappingPath res;
for (size_t i = 0; i < m_nodes.size(); ++i) {
if (i > 0)
pos -= m_edges[i-1]->getOverlap();
int nodeLen = m_nodes[i]->getLength();
if (intersects(pos, pos + nodeLen - 1,
startPosition, endPosition)) {
MappingRange range;
// Determing the mapping range for a node
// Note that MappingRange is 1-based.
if (pos < startPosition) {
range.initial_range.from = startPosition + 1;
range.mapped_range.from = startPosition - pos + 1;
} else {
range.initial_range.from = pos + 1;
range.mapped_range.from = 1;
}

if (pos + nodeLen - 1 > endPosition) {
range.initial_range.to = endPosition + 1;
range.mapped_range.to = endPosition - pos + 1;
} else {
range.initial_range.to = pos + nodeLen;
range.mapped_range.to = nodeLen;
}

res.emplace_back(m_nodes[i], range);
}

pos += nodeLen;
}

return res;
}
22 changes: 19 additions & 3 deletions graph/path.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,22 @@ class DeBruijnNode;
class DeBruijnEdge;
class AssemblyGraph;

class Path
{
class Path {
public:
// [from, to] since UI does this
struct Range {
int from; int to;
};

struct MappingRange {
// on genome/contig/whatever
Range initial_range;
// on node
Range mapped_range;
};

using MappingPath = std::vector<std::pair<DeBruijnNode*, MappingRange>>;

//CREATORS
Path() {}
Path(GraphLocation location);
Expand All @@ -55,7 +68,8 @@ class Path
bool haveSameNodes(const Path& other) const;
bool hasNodeSubset(const Path& other) const;
[[nodiscard]] QByteArray getPathSequence() const;
[[nodiscard]] QString getFasta() const;
[[nodiscard]] QByteArray getFasta(QString name = "") const;
[[nodiscard]] QByteArray getAAFasta(unsigned shift, QString name = "") const;
[[nodiscard]] QString getString(bool spaces) const;
int getLength() const;
QList<Path> extendPathInAllPossibleWays() const;
Expand All @@ -64,6 +78,8 @@ class Path

std::vector<int> getPosition(const DeBruijnNode *node) const;
std::vector<DeBruijnNode *> getNodesAt(int startPosition, int endPosition) const;
MappingPath getNodeCovering(int startPosition, int endPosition) const;

bool containsNode(const DeBruijnNode * node) const;
bool containsEntireNode(const DeBruijnNode * node) const;
bool isInMiddleOfPath(const DeBruijnNode * node) const;
Expand Down
95 changes: 60 additions & 35 deletions graphsearch/blast/blastsearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ bool BlastSearch::findTools() {
return true;
}

QString BlastSearch::buildDatabase(const AssemblyGraph &graph) {
QString BlastSearch::buildDatabase(const AssemblyGraph &graph, bool includePaths) {
DbBuildFinishedRAII watcher(this);

m_lastError = "";
if (!findTools())
return m_lastError;
Expand All @@ -68,14 +68,24 @@ QString BlastSearch::buildDatabase(const AssemblyGraph &graph) {
if (!file.open(QIODevice::WriteOnly | QIODevice::Text))
return (m_lastError = "Failed to open: " + file.fileName());

QTextStream out(&file);
for (const auto *node : graph.m_deBruijnGraphNodes) {
if (m_cancelBuildDatabase)
return (m_lastError = "Build cancelled.");
{
QTextStream out(&file);
for (const auto *node : graph.m_deBruijnGraphNodes) {
if (m_cancelBuildDatabase)
return (m_lastError = "Build cancelled.");

out << node->getFasta(true, false, false);
}

out << node->getFasta(true, false, false);
if (includePaths) {
for (auto it = graph.m_deBruijnGraphPaths.begin(); it != graph.m_deBruijnGraphPaths.end(); ++it) {
if (m_cancelBuildDatabase)
return (m_lastError = "Build cancelled.");

out << it.value()->getFasta(it.key().c_str());
}
}
}
file.close();

// Make sure the graph has sequences
bool atLeastOneSequence = false;
Expand Down Expand Up @@ -178,11 +188,13 @@ QString BlastSearch::runOneBlastSearch(QuerySequenceType sequenceType,
return blastOutput;
}

static void buildHitsFromBlastOutput(QString blastOutput, Queries &queries);
static std::pair<NodeHits, PathHits>
buildHitsFromBlastOutput(QString blastOutput,
Queries &queries);

QString BlastSearch::doSearch(Queries &queries, QString extraParameters) {
GraphSearchFinishedRAII watcher(this);

m_lastError = "";
if (!findTools())
return m_lastError;
Expand Down Expand Up @@ -211,8 +223,10 @@ QString BlastSearch::doSearch(Queries &queries, QString extraParameters) {
return (m_lastError = "BLAST search cancelled");

// If the code got here, then the search completed successfully.
buildHitsFromBlastOutput(blastOutput, queries);
auto [nodeHits, pathHits] = buildHitsFromBlastOutput(blastOutput, queries);
queries.addNodeHits(nodeHits);
queries.findQueryPaths();
queries.addPathHits(pathHits);
queries.searchOccurred();

m_lastError = "";
Expand All @@ -222,10 +236,11 @@ QString BlastSearch::doSearch(Queries &queries, QString extraParameters) {
//This function carries out the entire BLAST search procedure automatically, without user input.
//It returns an error string which is empty if all goes well.
QString BlastSearch::doAutoGraphSearch(const AssemblyGraph &graph, QString queriesFilename,
bool includePaths,
QString extraParameters) {
cleanUp();

QString maybeError = buildDatabase(graph); // It is expected that buildDatabase will setup last error as well
QString maybeError = buildDatabase(graph, includePaths); // It is expected that buildDatabase will setup last error as well
if (!maybeError.isEmpty())
return maybeError;

Expand Down Expand Up @@ -315,8 +330,11 @@ static QString getNodeNameFromString(const QString &nodeString) {
// BLAST search) to construct the Hit objects.
// It looks at the filters to possibly exclude hits which fail to meet user-
// defined thresholds.
static void buildHitsFromBlastOutput(QString blastOutput,
Queries &queries) {
static std::pair<NodeHits, PathHits>
buildHitsFromBlastOutput(QString blastOutput,
Queries &queries) {
NodeHits nodeHits; PathHits pathHits;

QStringList blastHitList = blastOutput.split("\n", Qt::SkipEmptyParts);

for (const auto &hitString : blastHitList) {
Expand All @@ -338,27 +356,11 @@ static void buildHitsFromBlastOutput(QString blastOutput,
SciNot eValue(alignmentParts[10]);
double bitScore = alignmentParts[11].toDouble();

//Only save BLAST hits that are on forward strands.
if (nodeStart > nodeEnd)
continue;

QString nodeName = getNodeNameFromString(nodeLabel);
DeBruijnNode *node = nullptr;
auto it = g_assemblyGraph->m_deBruijnGraphNodes.find(nodeName.toStdString());
if (it == g_assemblyGraph->m_deBruijnGraphNodes.end())
continue;

node = it.value();

Query *query = queries.getQueryFromName(queryName);
if (query == nullptr)
continue;

// Check the user-defined filters.
if (g_settings->blastAlignmentLengthFilter.on &&
alignmentLength < g_settings->blastAlignmentLengthFilter)
continue;

if (g_settings->blastIdentityFilter.on &&
percentIdentity < g_settings->blastIdentityFilter)
continue;
Expand All @@ -371,16 +373,39 @@ static void buildHitsFromBlastOutput(QString blastOutput,
bitScore < g_settings->blastBitScoreFilter)
continue;

Hit hit(query, node, percentIdentity, alignmentLength,
numberMismatches, numberGapOpens, queryStart, queryEnd,
nodeStart, nodeEnd, eValue, bitScore);
if (g_settings->blastAlignmentLengthFilter.on &&
alignmentLength < g_settings->blastAlignmentLengthFilter)
continue;

if (g_settings->blastQueryCoverageFilter.on) {
double hitCoveragePercentage = 100.0 * hit.getQueryCoverageFraction();
double hitCoveragePercentage = 100.0 * Hit::getQueryCoverageFraction(query,
queryStart, queryEnd);
if (hitCoveragePercentage < g_settings->blastQueryCoverageFilter)
continue;
}

query->addHit(hit);
auto nodeIt = g_assemblyGraph->m_deBruijnGraphNodes.find(getNodeNameFromString(nodeLabel).toStdString());
if (nodeIt != g_assemblyGraph->m_deBruijnGraphNodes.end()) {
// Only save BLAST hits that are on forward strands.
if (nodeStart > nodeEnd)
continue;

nodeHits.emplace_back(query,
new Hit(query, nodeIt.value(),
percentIdentity, alignmentLength,
numberMismatches, numberGapOpens,
queryStart, queryEnd,
nodeStart, nodeEnd, eValue, bitScore));
}

auto pathIt = g_assemblyGraph->m_deBruijnGraphPaths.find(nodeLabel.toStdString());
if (pathIt != g_assemblyGraph->m_deBruijnGraphPaths.end()) {
pathHits.emplace_back(query, pathIt.value(),
Path::MappingRange{queryStart, queryEnd,
nodeStart, nodeEnd});
}

}

return { nodeHits, pathHits };
}
Loading