Skip to content

Commit

Permalink
Finalize the implementation for lineage tree construction.
Browse files Browse the repository at this point in the history
  • Loading branch information
mourisl committed Dec 6, 2019
1 parent 06e2183 commit b404abd
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 34 deletions.
89 changes: 56 additions & 33 deletions CloneEvolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,32 +44,50 @@ void InsertAdj( int u, int v, struct _adj *adj, int &adjUsed )
}

// Prim algorithm for minimum spanning tree
void Prim( int **dist, int n, struct _adj *adj, int &adjUsed )
void Prim( int **dist, int n, struct _adj *adj, int &adjUsed, int offset, std::vector<struct _cdr3> &cdr3s )
{
int i, j ;
struct _pair *minDist = new struct _pair[n] ; // a-dist, b-the node used to connect
bool *used = new bool[n] ;
memset( used, false, sizeof(bool) * n ) ;

minDist[0].a = 0 ;
minDist[0].b = 0 ;
for ( i = 1 ; i < n ; ++i )
int max = -1 ;
int maxTag = -1 ;
for ( i = 0 ; i < n ; ++i )
if ( cdr3s[i + offset].abund > max )
{
max = cdr3s[i + offset].abund ;
maxTag = i ;
}

minDist[maxTag].a = 0 ;
minDist[maxTag].b = 0 ;
for ( i = 0 ; i < n ; ++i )
{
minDist[0].a = dist[0][i] ;
minDist[0].b = 0 ;
if ( i == maxTag )
continue ;
minDist[i].a = dist[maxTag][i] ;
minDist[i].b = maxTag ;
}
used[0] = true ;
used[maxTag] = true ;

for ( i = 1 ; i < n ; ++i )
{
int min = LINE_WIDTH ;
int minTag = -1 ;
for ( j = 0 ; j < n ; ++j )
{
if ( !used[j] && minDist[j].a < min )
if ( !used[j] )
{
min = minDist[j].a ;
minTag = j ;
if ( minDist[j].a < min )
{
min = minDist[j].a ;
minTag = j ;
}
else if ( minDist[j].a == min && cdr3s[j + offset].abund > cdr3s[minTag + offset].abund )
{
minTag = j ;
}
}
}

Expand All @@ -79,10 +97,18 @@ void Prim( int **dist, int n, struct _adj *adj, int &adjUsed )

for ( j = 0 ; j < n ; ++j )
{
if ( !used[j] && dist[minTag][j] < minDist[j].a )
if ( !used[j] )
{
minDist[j].a = dist[minTag][j] ;
minDist[j].b = minTag ;
if ( dist[minTag][j] < minDist[j].a )
{
minDist[j].a = dist[minTag][j] ;
minDist[j].b = minTag ;
}
else if ( dist[minTag][j] == minDist[j].a
&& cdr3s[minTag + offset].abund > cdr3s[ minDist[j].b + offset ].abund )
{
minDist[j].b = minTag ;
}
}
}
}
Expand All @@ -92,22 +118,22 @@ void Prim( int **dist, int n, struct _adj *adj, int &adjUsed )
}

// Return: whether it is a leaf.
int OrderMST( int tag, struct _adj *adj, int n, int *parent, std::vector<int> &leaves )
int LongestRootLeafDistMST( int tag, struct _adj *adj, int n, bool *visited, int **dist )
{
visited[tag] = true ;

int p = adj[tag].next ;
int ret = 1 ;
int ret = 0 ;
while ( p != -1 )
{
if ( parent[adj[tag].v] == -1 )
if ( visited[ adj[p].v ] == false )
{
parent[ adj[tag].v ] = tag ;
OrderMST( adj[tag].v, adj, n, parent, leaves ) ;
ret = 0 ;
int tmp = dist[tag][ adj[p].v ] + LongestRootLeafDistMST( adj[p].v, adj, n, visited, dist ) ;
if ( tmp > ret )
ret = tmp ;
}
p = adj[tag].next ;
p = adj[p].next ;
}
if ( ret == 1 )
leaves.push_back( tag ) ;
return ret ;
}

Expand All @@ -134,10 +160,11 @@ void PrintMST( FILE *fp, int tag, int parent, bool *visited, int **dist, int off
if ( childCnt > 0 )
fprintf( fp, ")" ) ;
if ( parent == tag ) // root
fprintf( fp, "%s_%d;\n", clusterIdToName[ cdr3[tag + offset].clusterId ].c_str(), cdr3[tag + offset].subId ) ;
fprintf( fp, "%s_%d_%.2lf;\n", clusterIdToName[ cdr3[tag + offset].clusterId ].c_str(), cdr3[tag + offset].subId,
cdr3[tag + offset].abund ) ;
else
fprintf( fp, "%s_%d:%d", clusterIdToName[ cdr3[tag + offset].clusterId ].c_str(),
cdr3[tag + offset].subId, dist[parent][tag] ) ;
fprintf( fp, "%s_%d_%.2lf:%d", clusterIdToName[ cdr3[tag + offset].clusterId ].c_str(),
cdr3[tag + offset].subId, cdr3[tag + offset].abund, dist[parent][tag] ) ;
}


Expand Down Expand Up @@ -224,7 +251,7 @@ int main( int argc, char *argv[] )
adjUsed = k ;

// Build the tree
Prim( dist, n, adj, adjUsed ) ;
Prim( dist, n, adj, adjUsed, i, cdr3s ) ;

// Find the root for the tree
std::vector<int> leaves ;
Expand All @@ -236,14 +263,11 @@ int main( int argc, char *argv[] )

int bestRoot = -1 ;
int minMaxRootLeafDist = LINE_WIDTH ;
bool *visited = new bool[n] ;
for ( k = 0 ; k < n ; ++k )
{
int lcnt = leaves.size() ;
int max = -1 ;
for ( l = 0 ; l < lcnt ; ++l )
if ( leaves[l] != k && dist[k][leaves[l]] > max )
max = dist[k][leaves[l]] ;

memset( visited, false, sizeof( bool ) * n ) ;
int max = LongestRootLeafDistMST( k, adj, n, visited, dist ) ;
if ( max < minMaxRootLeafDist )
{
bestRoot = k ;
Expand All @@ -257,7 +281,6 @@ int main( int argc, char *argv[] )
}

// Output the tree
bool *visited = new bool[n] ;
memset( visited, false, sizeof( bool ) * n ) ;
PrintMST( stdout, bestRoot, bestRoot, visited, dist, i, adj, cdr3s, clusterIdToName ) ;

Expand Down
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ LINKFLAGS = -lpthread -lz
DEBUG=
OBJECTS = main.o #BaseReads.o Alignment.o

all: trust4 bam-extractor annotator
all: trust4 bam-extractor annotator clone-evo

trust4: main.o
$(CXX) -o $@ $(LINKPATH) $(CXXFLAGS) $< $(LINKFLAGS)
Expand All @@ -20,10 +20,14 @@ bam-extractor: BamExtractor.o
annotator: Annotator.o
$(CXX) -o $@ $(LINKPATH) $(CXXFLAGS) $< $(LINKFLAGS)

clone-evo: CloneEvolution.o
$(CXX) -o $@ $(LINKPATH) $(CXXFLAGS) $< $(LINKFLAGS)

main.o: main.cpp AlignAlgo.hpp ReadFiles.hpp kseq.h SeqSet.hpp KmerIndex.hpp SimpleVector.hpp defs.h KmerCode.hpp KmerCount.hpp
BamExtractor.o: BamExtractor.cpp alignments.hpp defs.h SeqSet.hpp
Annotator.o: Annotator.cpp AlignAlgo.hpp ReadFiles.hpp kseq.h SeqSet.hpp KmerIndex.hpp SimpleVector.hpp defs.h KmerCode.hpp KmerCount.hpp
#Alignment.o: Alignment.cpp Alignment.h SimpleVector.h defs.h StatsTests.h KmerTree.h ReadSet.h KmerIndex.h poa.h
CloneEvolution.o: CloneEvolution.cpp defs.h

clean:
rm -f *.o *.gch trust4 bam-extractor annotator

0 comments on commit b404abd

Please sign in to comment.