Skip to content

Commit

Permalink
added transcript and genomic position info to clusters output
Browse files Browse the repository at this point in the history
  • Loading branch information
AdamDS committed Sep 26, 2016
1 parent f673b80 commit 0ac2f78
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 29 deletions.
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
Cluster Gene/Drug Mutation/Gene Degree_Connectivity Closeness_Centrality Geodesic_From_Centroid Recurrence
1.1 SMAD4 p.R361S 1 17.18811319794 0 1
1.1 SMAD4 p.R361H 1 17.18811319794 0 10
1.1 SMAD3 p.Q405L 2 0.263658841817618 9.042 1
1.1 SMAD4 p.R361P 1 17.18811319794 0 1
1.1 SMAD2 p.D450N 6 3.16150864161809 2.561 1
1.1 SMAD4 p.D355G 2 0.525640839784387 5.899 1
1.1 SMAD4 p.R361C 1 17.18811319794 0 5
1.1 SMAD4 p.RFCLG361in_frame_del 1 17.18811319794 0 1
0 SMAD2 p.R321Q 1 1.16132031130647 0 2
0 SMAD4 p.D493N 1 0.322640622612945 2.632 1
2 SMAD2 p.D304G 4 0.436816059849492 3.360 1
2 SMAD3 p.R268C 4 2.66236905595016 2.629 1
2 SMAD4 p.D537G 3 3.58236392191537 0 1
2 SMAD4 p.D537E 3 3.58236392191537 0 1
2 SMAD3 p.R268H 4 2.66236905595016 2.629 2
2 SMAD4 p.D537Y 3 3.58236392191537 0 1
2 SMAD4 p.D537V 3 3.58236392191537 0 1
Cluster Gene/Drug Mutation/Gene Degree_Connectivity Closeness_Centrality Geodesic_From_Centroid Recurrence Chromosome Start Stop Transcript Alternative_Transcripts
1.1 SMAD4 p.R361S 1 17.18811319794 0 1 18 48591918 48591918 ENST00000342988|ENST00000398417 NA
1.1 SMAD4 p.R361P 1 17.18811319794 0 1 18 48591919 48591919 ENST00000342988|ENST00000398417 NA
1.1 SMAD4 p.R361H 1 17.18811319794 0 10 18 48591919 48591919 ENST00000342988|ENST00000398417 NA
1.1 SMAD4 p.RFCLG361in_frame_del 1 17.18811319794 0 1 18 48591917 48591931 ENST00000342988|ENST00000398417 NA
1.1 SMAD4 p.D355G 2 0.525640839784387 5.899 1 18 48591901 48591901 ENST00000342988|ENST00000398417 NA
1.1 SMAD2 p.D450N 6 3.16150864161809 2.561 1 18 45368254 45368254 ENST00000262160|ENST00000402690 NA
1.1 SMAD3 p.Q405L 2 0.263658841817618 9.042 1 15 67482810 67482810 ENST00000327367 NA
1.1 SMAD4 p.R361C 1 17.18811319794 0 5 18 48591918 48591918 ENST00000342988|ENST00000398417 NA
0 SMAD4 p.D493N 1 0.322640622612945 2.632 1 18 48604655 48604655 ENST00000342988|ENST00000398417 NA
0 SMAD2 p.R321Q 1 1.16132031130647 0 2 18 45374881 45374881 ENST00000262160|ENST00000402690 NA
2 SMAD4 p.D537V 3 3.58236392191537 0 1 18 48604788 48604788 ENST00000342988|ENST00000398417 NA
2 SMAD3 p.R268C 4 2.66236905595016 2.629 1 15 67473722 67473722 ENST00000327367 NA
2 SMAD4 p.D537Y 3 3.58236392191537 0 1 18 48604787 48604787 ENST00000342988|ENST00000398417 NA
2 SMAD2 p.D304G 4 0.436816059849492 3.360 1 18 45374932 45374932 ENST00000262160|ENST00000402690 NA
2 SMAD3 p.R268H 4 2.66236905595016 2.629 2 15 67473723 67473723 ENST00000327367 NA
2 SMAD4 p.D537G 3 3.58236392191537 0 1 18 48604788 48604788 ENST00000342988|ENST00000398417 NA
2 SMAD4 p.D537E 3 3.58236392191537 0 1 18 48604789 48604789 ENST00000342988|ENST00000398417 NA
Binary file modified HotSpot3D-0.5.4.tar.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Usage
-----

Program: HotSpot3D - 3D mutation proximity analysis program.
Version: V0.5.4
Version: V0.5.5
Author: Beifang Niu, John Wallis, Adam D Scott, & Sohini Sengupta

Usage: hotspot3d <command> [options]
Expand Down
2 changes: 1 addition & 1 deletion bin/hotspot3d
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
use strict;
use warnings;

our $VERSION = 'V0.5.4';
our $VERSION = 'V0.5.5';

use Carp;
use FileHandle;
Expand Down
2 changes: 1 addition & 1 deletion dist.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = HotSpot3D
author = Beifang Niu, John Wallis, Adam D Scott, & Sohini Sengupta from McDonnell Genome Institute of Washington University at St. Louis
version = 0.5.4
version = 0.5.5
license = Perl_5
copyright_holder = McDonnell Genome Institute at Washington University
copyright_year = 2013
Expand Down
69 changes: 61 additions & 8 deletions lib/TGI/Mutpro/Main/Cluster.pm
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ sub process {
unless( -e $this->{'maf_file'} ) { warn "The input .maf file )".$this->{'maf_file'}.") does not exist! ", "\n"; die $this->help_text(); }
}
## processing procedure
my ( %clusterings , %distance_matrix , %pdb_loc, %aa_map, %master, %mut_chrpos , %locations , %Variants , $WEIGHT );
my ( %clusterings , %distance_matrix , %pdb_loc, %aa_map, %master, %mut_chrpos , %locations , %Variants , $WEIGHT , %transcripts );
$WEIGHT = "weight";
#####
# drug-mutation pairs
Expand Down Expand Up @@ -317,7 +317,6 @@ sub process {
}
}
##Mutation recurrence or weight from MAF
my %mutations;
die "Could not open .maf file\n" unless( $fh->open( $this->{'maf_file'} , "r" ) );
print STDOUT "\nReading in .maf ...\n";
my $mafi = 0;
Expand Down Expand Up @@ -347,6 +346,7 @@ sub process {
unless( defined( $mafcols{$this->{"weight_header"}} ) ) { die "\n"; };
push @mafcols , $mafcols{$this->{"weight_header"}};
}
my %mutations;
map {
chomp;
my @line = split /\t/;
Expand All @@ -361,6 +361,7 @@ sub process {
my $variant = join( "_" , ( $gene , $aachange , $chr , $start , $stop ) );
if ( exists $variants_from_pairs{$variant} ) {
my $gene_aachange = $gene.":".$aachange;
$transcripts{$gene_aachange}{$transcript_name.":".$aachange} = $chr.":".$start.":".$stop;
if ( exists $Variants{$gene_aachange} ) {
if ( $this->{'vertex_type'} ne $WEIGHT ) {
if ( not exists $mutations{$variant}{$barID} ) {
Expand Down Expand Up @@ -404,11 +405,17 @@ sub process {
push @outFilename , "clusters";
my $outFilename = join( "." , @outFilename );
die "Could not create clustering output file\n" unless( $fh->open( $outFilename , "w" ) );
$fh->print( "Cluster\tGene/Drug\tMutation/Gene\tDegree_Connectivity\tCloseness_Centrality\tGeodesic_From_Centroid\tRecurrence\n" );
$fh->print( join( "\t" , ( "Cluster" , "Gene/Drug" , "Mutation/Gene" ,
"Degree_Connectivity" , "Closeness_Centrality" ,
"Geodesic_From_Centroid" , "Recurrence" ,
"Chromosome" , "Start" , "Stop" , "Transcript" , "Alternative_Transcripts"
)
)."\n"
);
print STDOUT "Getting Cluster ID's & Centroids\n";
foreach my $clus_num ( keys %clusterings ) {
my @clus_mut = @{$clusterings{$clus_num}};
$this->centroid(\%Variants,\%distance_matrix,\%degree_connectivity,$clus_num,\@clus_mut,$fh,0, 1);
$this->centroid(\%Variants,\%distance_matrix,\%degree_connectivity,$clus_num,\@clus_mut,$fh,0, 1 , \%transcripts);
} #foreach cluster
my $numclusters = scalar keys %clusterings;
print STDOUT "Found $numclusters clusters\n";
Expand All @@ -419,7 +426,7 @@ sub process {
# sub functions
#####
sub centroid{
my ($this, $Variants,$distance_matrix,$degree_connectivity,$clus_num,$clus_mut,$fh,$recluster,$counter)=@_;
my ($this, $Variants,$distance_matrix,$degree_connectivity,$clus_num,$clus_mut,$fh,$recluster,$counter , $transcripts )=@_;
my %dist = ();
foreach my $mut1 ( @{$clus_mut} ) { #initialize geodesics
my @mu1 = split( ":" , $mut1 );
Expand Down Expand Up @@ -484,6 +491,8 @@ sub centroid{
my $degrees = $degree_connectivity->{$other};
my $closenesscentrality = $centrality{$clus_num}{$other};
my ( $gene , $mutation ) = split /\:/ , $other;
my ( $reportedTranscript , $altTranscript ,
$chromosome , $start , $stop ) = $this->getTranscriptInfo( $transcripts , $other );
$weight = 1;

if ( $geodesic <= $this->{'max_radius'} ) {
Expand All @@ -492,13 +501,26 @@ sub centroid{
}
$count+=1;
if ($recluster==1){
$fh->print( join( "\t" , ( "$clus_num.$counter" , $gene , $mutation , $degrees , $closenesscentrality , $geodesic , $weight ) )."\n" );
$fh->print( join( "\t" , ( "$clus_num.$counter" , $gene , $mutation ,
$degrees , $closenesscentrality ,
$geodesic , $weight ,
$chromosome , $start , $stop ,
$reportedTranscript , $altTranscript
)
)."\n"
);
my $index=0;
$index++ until $clus_mut->[$index] eq $other;
splice(@{$clus_mut}, $index, 1);
}
else {
$fh->print( join( "\t" , ( $clus_num , $gene , $mutation , $degrees , $closenesscentrality , $geodesic , $weight ) )."\n" );
$fh->print( join( "\t" , ( $clus_num , $gene , $mutation , $degrees ,
$closenesscentrality , $geodesic , $weight ,
$chromosome , $start , $stop ,
$reportedTranscript , $altTranscript
)
)."\n"
);
}
}
} #foreach other vertex in network
Expand All @@ -512,7 +534,7 @@ sub centroid{
if ($recluster==1) {
$counter+=1;
#print STDOUT "$counter\n";
$this->centroid($Variants,$distance_matrix,$degree_connectivity,$clus_num,$clus_mut,$fh,$recluster, $counter);
$this->centroid($Variants,$distance_matrix,$degree_connectivity,$clus_num,$clus_mut,$fh,$recluster, $counter , $transcripts);
}
}

Expand Down Expand Up @@ -607,6 +629,37 @@ sub floydwarshall {
}
}

sub getTranscriptInfo {
my ( $this , $transcripts , $other ) = @_;
my ( $reportedTranscript , $altTranscript , $transcript );
my ( $chromosome , $start , $stop );
my ( $gene , $mu ) = split /\:/ , $other;
print $other." => ";
foreach my $tranmu ( sort keys %{$transcripts->{$other}} ) {
print $tranmu." ==> ";
my ( $transcript , $mutation ) = split /\:/ , $tranmu;
my $css = $transcripts->{$other}->{$tranmu};
print $css." | \n";
( $chromosome , $start , $stop ) = split /\:/ , $css;
if ( $mu eq $mutation ) {
if ( not $reportedTranscript ) {
$reportedTranscript = $transcript;
} else {
$reportedTranscript .= "|".$transcript;
}
} else {
if ( not $altTranscript ) {
$altTranscript = $transcript.":".$mutation;
} else {
$altTranscript .= "|".$transcript.":".$mutation;
}
}
} #foreach tranmu

if ( not $reportedTranscript ) { $reportedTranscript = "NA"; }
if ( not $altTranscript ) { $altTranscript = "NA"; }
return ( $reportedTranscript , $altTranscript , $chromosome , $start , $stop );
}



Expand Down

0 comments on commit 0ac2f78

Please sign in to comment.