Skip to content

Commit

Permalink
In alignment free assembly, enforce a minimum coverage for AnchorGrap…
Browse files Browse the repository at this point in the history
…h edges.
  • Loading branch information
paoloshasta committed Feb 1, 2025
1 parent ce5a43a commit 1c4ae3f
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 9 deletions.
10 changes: 9 additions & 1 deletion src/Mode3Assembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,11 @@ shared_ptr<AssemblyGraph> Mode3Assembler::assembleConnectedComponent(
ostream& orientedReadsCsv,
bool debug)
{
// EXPOSE WHEN CODE STABILIZES.
// Minimum anchor graph, edge coverage, only for alignment-free assembly.
const uint64_t minEdgeCoverageAlignmentFree = 3;


performanceLog << timestamp << "Assembling connected component " <<
componentId << " of " << componentOrientedReadIds.size() << endl;
cout << timestamp << "Assembling connected component " <<
Expand Down Expand Up @@ -427,7 +432,10 @@ shared_ptr<AssemblyGraph> Mode3Assembler::assembleConnectedComponent(

// Now we can create the AnchorGraph for this connected component.
// The constructor generates the vertices and edges.
AnchorGraph anchorGraph(anchors(), anchorIds);
const uint64_t minEdgeCoverage = (
options.anchorCreationMethod == "FromMarkerKmers" ? minEdgeCoverageAlignmentFree :
0);
AnchorGraph anchorGraph(anchors(), anchorIds, minEdgeCoverage);


cout << "The AnchorGraph for this connected component has " <<
Expand Down
4 changes: 2 additions & 2 deletions src/mode3-AnchorGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ using namespace mode3;


// Create the AnchorGraph and its vertices and edges given a vector of AnchorIds.
AnchorGraph::AnchorGraph(const Anchors& anchors, span<const AnchorId> anchorIds) :
AnchorGraph::AnchorGraph(const Anchors& anchors, span<const AnchorId> anchorIds, uint64_t minEdgeCoverage) :
anchorIds(anchorIds)
{

Expand All @@ -47,7 +47,7 @@ AnchorGraph::AnchorGraph(const Anchors& anchors, span<const AnchorId> anchorIds)
vector<uint64_t> counts;
for(uint64_t localAnchorId0=0; localAnchorId0<anchorIds.size(); localAnchorId0++) {
const AnchorId anchorId0 = anchorIds[localAnchorId0];
anchors.findChildren(anchorId0, children, counts);
anchors.findChildren(anchorId0, children, counts, minEdgeCoverage);
const uint64_t n = children.size();
SHASTA_ASSERT(n == counts.size());
for(uint64_t i=0; i<n; i++) {
Expand Down
2 changes: 1 addition & 1 deletion src/mode3-AnchorGraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ class shasta::mode3::AnchorGraph : public AnchorGraphBaseClass {

// Create the AnchorGraph and its vertices and edges given the AnchorIds
// for a conneced component of the global anchor graph.
AnchorGraph(const Anchors&, span<const AnchorId> anchorIds);
AnchorGraph(const Anchors&, span<const AnchorId> anchorIds, uint64_t minEdgeCoverage);

// The AnchorIds of this AnchorGraph. Each of these corresponds to a vertex.
// An index in this vector is called a local anchor id.
Expand Down
9 changes: 4 additions & 5 deletions src/mode3-AssemblyGraphDetangle4.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ using namespace mode3;


void AssemblyGraph::run4(
uint64_t /* threadCount */,
uint64_t threadCount,
bool /* assembleSequence */,
bool debug)
{
Expand All @@ -22,10 +22,6 @@ void AssemblyGraph::run4(

write("A");

detangleVertices4();
write("B");

#if 0
// Bubble cleanup.
compress();
for(uint64_t iteration=0; ; iteration ++) {
Expand All @@ -44,6 +40,9 @@ void AssemblyGraph::run4(
compress();
}

write("B");

#if 0
// Clean up short superbubbles.
cleanupSuperbubbles(false,
options.assemblyGraphOptions.superbubbleLengthThreshold1,
Expand Down

0 comments on commit 1c4ae3f

Please sign in to comment.