From 1c4ae3fb67f1bd145bd7d901e6ecb539478c774d Mon Sep 17 00:00:00 2001 From: Paolo Carnevali Date: Fri, 31 Jan 2025 16:28:50 -0800 Subject: [PATCH] In alignment free assembly, enforce a minimum coverage for AnchorGraph edges. --- src/Mode3Assembler.cpp | 10 +++++++++- src/mode3-AnchorGraph.cpp | 4 ++-- src/mode3-AnchorGraph.hpp | 2 +- src/mode3-AssemblyGraphDetangle4.cpp | 9 ++++----- 4 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/Mode3Assembler.cpp b/src/Mode3Assembler.cpp index 23816abf..147c0c05 100644 --- a/src/Mode3Assembler.cpp +++ b/src/Mode3Assembler.cpp @@ -386,6 +386,11 @@ shared_ptr Mode3Assembler::assembleConnectedComponent( ostream& orientedReadsCsv, bool debug) { + // EXPOSE WHEN CODE STABILIZES. + // Minimum anchor graph, edge coverage, only for alignment-free assembly. + const uint64_t minEdgeCoverageAlignmentFree = 3; + + performanceLog << timestamp << "Assembling connected component " << componentId << " of " << componentOrientedReadIds.size() << endl; cout << timestamp << "Assembling connected component " << @@ -427,7 +432,10 @@ shared_ptr Mode3Assembler::assembleConnectedComponent( // Now we can create the AnchorGraph for this connected component. // The constructor generates the vertices and edges. - AnchorGraph anchorGraph(anchors(), anchorIds); + const uint64_t minEdgeCoverage = ( + options.anchorCreationMethod == "FromMarkerKmers" ? minEdgeCoverageAlignmentFree : + 0); + AnchorGraph anchorGraph(anchors(), anchorIds, minEdgeCoverage); cout << "The AnchorGraph for this connected component has " << diff --git a/src/mode3-AnchorGraph.cpp b/src/mode3-AnchorGraph.cpp index 6a7bf6dd..02eeb977 100644 --- a/src/mode3-AnchorGraph.cpp +++ b/src/mode3-AnchorGraph.cpp @@ -25,7 +25,7 @@ using namespace mode3; // Create the AnchorGraph and its vertices and edges given a vector of AnchorIds. -AnchorGraph::AnchorGraph(const Anchors& anchors, span anchorIds) : +AnchorGraph::AnchorGraph(const Anchors& anchors, span anchorIds, uint64_t minEdgeCoverage) : anchorIds(anchorIds) { @@ -47,7 +47,7 @@ AnchorGraph::AnchorGraph(const Anchors& anchors, span anchorIds) vector counts; for(uint64_t localAnchorId0=0; localAnchorId0 anchorIds); + AnchorGraph(const Anchors&, span anchorIds, uint64_t minEdgeCoverage); // The AnchorIds of this AnchorGraph. Each of these corresponds to a vertex. // An index in this vector is called a local anchor id. diff --git a/src/mode3-AssemblyGraphDetangle4.cpp b/src/mode3-AssemblyGraphDetangle4.cpp index f8a2e598..f1ea8de9 100644 --- a/src/mode3-AssemblyGraphDetangle4.cpp +++ b/src/mode3-AssemblyGraphDetangle4.cpp @@ -9,7 +9,7 @@ using namespace mode3; void AssemblyGraph::run4( - uint64_t /* threadCount */, + uint64_t threadCount, bool /* assembleSequence */, bool debug) { @@ -22,10 +22,6 @@ void AssemblyGraph::run4( write("A"); - detangleVertices4(); - write("B"); - -#if 0 // Bubble cleanup. compress(); for(uint64_t iteration=0; ; iteration ++) { @@ -44,6 +40,9 @@ void AssemblyGraph::run4( compress(); } + write("B"); + +#if 0 // Clean up short superbubbles. cleanupSuperbubbles(false, options.assemblyGraphOptions.superbubbleLengthThreshold1,