Skip to content

Commit

Permalink
updated some distance cutoff defaults. added pair filtering by 3D dis…
Browse files Browse the repository at this point in the history
…tance. added average distance measure for preprocessed data
  • Loading branch information
AdamDS committed Sep 30, 2016
1 parent 67a72a6 commit 74020ed
Show file tree
Hide file tree
Showing 9 changed files with 112 additions and 41 deletions.
Binary file renamed HotSpot3D-0.5.5.tar.gz → HotSpot3D-0.6.0.tar.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Usage
-----

Program: HotSpot3D - 3D mutation proximity analysis program.
Version: V0.5.5
Version: V0.6.0
Author: Beifang Niu, John Wallis, Adam D Scott, & Sohini Sengupta

Usage: hotspot3d <command> [options]
Expand Down
41 changes: 22 additions & 19 deletions bin/hotspot3d
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
use strict;
use warnings;

our $VERSION = 'V0.5.5';
our $VERSION = 'V0.6.0';

use Carp;
use FileHandle;
Expand All @@ -32,34 +32,36 @@ use TGI::Mutpro::Preprocess::Anno;
use TGI::Mutpro::Preprocess::Cosmic;
use TGI::Mutpro::Preprocess::Prior;
use TGI::Mutpro::Preprocess::Homolog;
#use TGI::Mutpro::Preprocess::AllPreprocess;

my $subCmd = shift;
## Add module option here
my %cmds = map{ ($_, 1) } qw( search post visual cluster sigclus summary drugport uppro calpro calroi statis anno trans homo cosmic prior help );
my %cmds = map{ ($_, 1) } qw( search post visual cluster sigclus summary drugport uppro calpro calroi statis anno trans homo cosmic prior prep help );
unless (defined $subCmd) { die help_text(); };
unless (exists $cmds{$subCmd}) {
warn ' Please give valid sub command ! ', "\n";
die help_text();
}
SWITCH:{
## Add module action here
$subCmd eq 'search' && do { TGI::Mutpro::Main::Proximity->new(); last SWITCH; };
$subCmd eq 'post' && do { TGI::Mutpro::Main::Post->new(); last SWITCH; };
$subCmd eq 'visual' && do { TGI::Mutpro::Main::Visual->new(); last SWITCH; };
$subCmd eq 'cluster' && do { TGI::Mutpro::Main::Cluster->new(); last SWITCH; };
$subCmd eq 'sigclus' && do { TGI::Mutpro::Main::Significance->new(); last SWITCH; };
$subCmd eq 'summary' && do { TGI::Mutpro::Main::Summary->new(); last SWITCH; };
$subCmd eq 'drugport' && do { TGI::Mutpro::Preprocess::Drugport->new(); last SWITCH; };
$subCmd eq 'uppro' && do { TGI::Mutpro::Preprocess::Uppro->new(); last SWITCH; };
$subCmd eq 'calpro' && do { TGI::Mutpro::Preprocess::Calpro->new(); last SWITCH; };
$subCmd eq 'calroi' && do { TGI::Mutpro::Preprocess::Calroi->new(); last SWITCH; };
$subCmd eq 'statis' && do { TGI::Mutpro::Preprocess::Statis->new(); last SWITCH; };
$subCmd eq 'anno' && do { TGI::Mutpro::Preprocess::Anno->new(); last SWITCH; };
$subCmd eq 'trans' && do { TGI::Mutpro::Preprocess::Trans->new(); last SWITCH; };
$subCmd eq 'homo' && do { TGI::Mutpro::Preprocess::Homolog->new(); last SWITCH; };
$subCmd eq 'cosmic' && do { TGI::Mutpro::Preprocess::Cosmic->new(); last SWITCH; };
$subCmd eq 'prior' && do { TGI::Mutpro::Preprocess::Prior->new(); last SWITCH; };
$subCmd eq 'help' && do { die help_text(); last SWITCH; };
$subCmd eq 'search' && do { TGI::Mutpro::Main::Proximity->new(); last SWITCH; };
$subCmd eq 'post' && do { TGI::Mutpro::Main::Post->new(); last SWITCH; };
$subCmd eq 'visual' && do { TGI::Mutpro::Main::Visual->new(); last SWITCH; };
$subCmd eq 'cluster' && do { TGI::Mutpro::Main::Cluster->new(); last SWITCH; };
$subCmd eq 'sigclus' && do { TGI::Mutpro::Main::Significance->new(); last SWITCH; };
$subCmd eq 'summary' && do { TGI::Mutpro::Main::Summary->new(); last SWITCH; };
$subCmd eq 'drugport' && do { TGI::Mutpro::Preprocess::Drugport->new(); last SWITCH; };
$subCmd eq 'uppro' && do { TGI::Mutpro::Preprocess::Uppro->new(); last SWITCH; };
$subCmd eq 'calpro' && do { TGI::Mutpro::Preprocess::Calpro->new(); last SWITCH; };
$subCmd eq 'calroi' && do { TGI::Mutpro::Preprocess::Calroi->new(); last SWITCH; };
$subCmd eq 'statis' && do { TGI::Mutpro::Preprocess::Statis->new(); last SWITCH; };
$subCmd eq 'anno' && do { TGI::Mutpro::Preprocess::Anno->new(); last SWITCH; };
$subCmd eq 'trans' && do { TGI::Mutpro::Preprocess::Trans->new(); last SWITCH; };
$subCmd eq 'homo' && do { TGI::Mutpro::Preprocess::Homolog->new(); last SWITCH; };
$subCmd eq 'cosmic' && do { TGI::Mutpro::Preprocess::Cosmic->new(); last SWITCH; };
$subCmd eq 'prior' && do { TGI::Mutpro::Preprocess::Prior->new(); last SWITCH; };
# $subCmd eq 'prep' && do { TGI::Mutpro::Preprocess::AllPreprocess->new(); last SWITCH; };
$subCmd eq 'help' && do { die help_text(); last SWITCH; };
}
sub help_text {
## Add module help here
Expand All @@ -71,6 +73,7 @@ Version: $VERSION
Usage: hotspot3d <command> [options]
Preprocessing
prep -- Preprocessing steps 1-7
drugport -- 0) Parse drugport database (OPTIONAL)
uppro -- 1) Update proximity files
Expand Down
2 changes: 1 addition & 1 deletion dist.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = HotSpot3D
author = Beifang Niu, John Wallis, Adam D Scott, & Sohini Sengupta from McDonnell Genome Institute of Washington University at St. Louis
version = 0.5.5
version = 0.6.0
license = Perl_5
copyright_holder = McDonnell Genome Institute at Washington University
copyright_year = 2013
Expand Down
63 changes: 55 additions & 8 deletions lib/TGI/Mutpro/Main/Cluster.pm
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ use Data::Dumper;
my $WEIGHT = "weight";
my $RECURRENCE = "recurrence";
my $UNIQUE = "unique";
my $PVALUEDEFAULT = 0.05;
my $DISTANCEDEFAULT = 10;
my $MAXDISTANCE = 100;

sub new {
my $class = shift;
Expand All @@ -35,7 +38,8 @@ sub new {
$this->{'collapsed_file'} = '3D_Proximity.pairwise.singleprotein.collapsed';
$this->{'drug_clean_file'} = undef;
$this->{'output_prefix'} = undef;
$this->{'p_value_cutoff'} = 0.05;
$this->{'p_value_cutoff'} = undef;
$this->{'3d_distance_cutoff'} = undef;
$this->{'linear_cutoff'} = 0;
$this->{'max_radius'} = 10;
$this->{'vertex_type'} = $RECURRENCE;
Expand All @@ -58,6 +62,7 @@ sub process {
'collapsed-file=s' => \$this->{'collapsed_file'},
'drug-clean-file=s' => \$this->{'drug_clean_file'},
'p-value-cutoff=f' => \$this->{'p_value_cutoff'},
'3d-distance-cutoff=f' => \$this->{'3d_distance_cutoff'},
'linear-cutoff=f' => \$this->{'linear_cutoff'},
'max-radius=f' => \$this->{'max_radius'},
'vertex-type=s' => \$this->{'vertex_type'},
Expand All @@ -69,6 +74,19 @@ sub process {
);
if ( $help ) { print STDERR help_text(); exit 0; }
unless( $options ) { die $this->help_text(); }
if ( not defined $this->{'p_value_cutoff'} ) {
if ( not defined $this->{'3d_distance_cutoff'} ) {
warn "HotSpot3D::Cluster warning: no pair distance limit given, setting to default p-value cutoff = 0.05\n";
$this->{'p_value_cutoff'} = $PVALUEDEFAULT;
$this->{'3d_distance_cutoff'} = $MAXDISTANCE;
} else {
$this->{'p_value_cutoff'} = 1;
}
} else {
if ( not defined $this->{'3d_distance_cutoff'} ) {
$this->{'3d_distance_cutoff'} = $MAXDISTANCE;
}
}
if ( ( not defined $this->{'collapsed_file'} ) and ( not defined $this->{'drug_clean_file'} ) ) {
warn 'You must provide a collapsed pairs file or drug pairs file! ', "\n";
die $this->help_text();
Expand Down Expand Up @@ -184,8 +202,8 @@ sub process {
$second = $gene2.":".$m2;
push @mutations , $second; #@mus2;
my ( $dist , $pval ) = split ":" , $master{$first}{$second};
$this->AHC( $pval , $this->{'p_value_cutoff'} , \%clusterings , \@mutations );
if ( $pval < $this->{'p_value_cutoff'} ) {
$this->AHC( $pval , $dist , \%clusterings , \@mutations );
if ( $pval < $this->{'p_value_cutoff'} or $dist < $this->{'3d_distance_cutoff'} ) {
$distance_matrix{$first}{$second} = $dist;
$distance_matrix{$second}{$first} = $dist;
}
Expand Down Expand Up @@ -274,7 +292,7 @@ sub process {
} #foreach transcript representation of mutations
my @mutations = @gm1;
push @mutations , @gm2;
$this->AHC( $pval , $this->{'p_value_cutoff'} , \%clusterings , \@mutations );
$this->AHC( $pval , $dist , \%clusterings , \@mutations );
} $fh->getlines;
$fh->close();
} #if using collapsed pairs file
Expand Down Expand Up @@ -399,7 +417,18 @@ sub process {
}
}
push @outFilename , $this->{'linear_cutoff'};
push @outFilename , $this->{'p_value_cutoff'};
if ( $this->{'3d_distance_cutoff'} != $MAXDISTANCE ) {
if ( $this->{'p_value_cutoff'} != 1 ) {
push @outFilename , $this->{'p_value_cutoff'};
push @outFilename , $this->{'3d_distance_cutoff'};
} else {
push @outFilename , $this->{'3d_distance_cutoff'};
}
} else {
if ( $this->{'p_value_cutoff'} != 1 ) {
push @outFilename , $this->{'p_value_cutoff'};
}
}
push @outFilename , $this->{'max_radius'};
}
push @outFilename , "clusters";
Expand Down Expand Up @@ -541,8 +570,8 @@ sub centroid{

## CLUSTERING FUNCTION - AGGLOMERATIVE HIERARCHICAL CLUSTERING (AHC)
sub AHC {
my ( $this, $pval , $pthreshold , $clusterings , $mutations ) = @_;
if ( $pval < $pthreshold ) { #meets desired significance
my ( $this, $pval , $dist , $clusterings , $mutations ) = @_;
if ( $pval < $this->{'p_value_cutoff'} or $dist < $this->{'3d_distance_cutoff'} ) { #meets desired significance
my ( @temp, @found, @combine );
my ( @uniq, $c );
foreach $c ( keys %{$clusterings} ) { #each cluster
Expand Down Expand Up @@ -661,6 +690,23 @@ sub getTranscriptInfo {
return ( $reportedTranscript , $altTranscript , $chromosome , $start , $stop );
}

sub checkPair {
my ( $this , $dist , $pval ) = @_;
if ( $this->{'3d_distance_cutoff'} == $MAXDISTANCE ) {
if ( $pval < $this->{'p_value_cutoff'} ) {
return 1;
}
} elsif ( $this->{'p_value_cutoff'} == 1 ) {
if ( $dist < $this->{'3d_distance_cutoff'} ) {
return 1;
}
} else {
if ( $dist < $this->{'3d_distance_cutoff'} and $pval < $this->{'p_value_cutoff'} ) {
return 1;
}
}
return 0;
}


sub help_text{
Expand All @@ -678,7 +724,8 @@ Usage: hotspot3d cluster [options]
OPTIONAL
--output-prefix Output prefix, default: 3D_Proximity
--p-value-cutoff P_value cutoff (<), default: 0.05
--p-value-cutoff P_value cutoff (<), default: 0.05 (if 3d-distance-cutoff also not set)
--3d-distance-cutoff 3D distance cutoff (<), default: 100 (if p-value-cutoff also not set)
--linear-cutoff Linear distance cutoff (> peptides), default: 20
--max-radius Maximum cluster radius (max network geodesic from centroid, <= Angstroms), default: 10
--vertex-type Graph vertex type (recurrence, unique, or weight), default: recurrence
Expand Down
30 changes: 24 additions & 6 deletions lib/TGI/Mutpro/Main/Proximity.pm
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ use FileHandle;

use TGI::Mutpro::Preprocess::AminoAcid;

my $PVALUEDEFAULT = 0.05;
my $DISTANCEDEFAULT = 10;
my $MAXDISTANCE = 100;

sub new {
my $class = shift;
my $this = {};
Expand All @@ -27,9 +31,9 @@ sub new {
$this->{'data_dir'} = undef;
$this->{'drugport_file'} = undef;
$this->{'output_prefix'} = '3D_Proximity';
$this->{'pvalue_cutoff'} = 0.05;
$this->{'3d_distance_cutoff'} = 10;
$this->{'linear_cutoff'} = 20;
$this->{'pvalue_cutoff'} = undef;
$this->{'3d_distance_cutoff'} = undef;
$this->{'linear_cutoff'} = 0;
$this->{'stat'} = undef;
$this->{'acceptable_types'} = undef;
$this->{'amino_acid_header'} = "amino_acid_change";
Expand Down Expand Up @@ -59,6 +63,19 @@ sub process {
);
if ( $help ) { print STDERR help_text(); exit 0; }
unless( $options ) { die $this->help_text(); }
if ( not defined $this->{'p_value_cutoff'} ) {
if ( not defined $this->{'3d_distance_cutoff'} ) {
warn "HotSpot3D::Cluster warning: no pair distance limit given, setting to default p-value cutoff = 0.05\n";
$this->{'p_value_cutoff'} = $PVALUEDEFAULT;
$this->{'3d_distance_cutoff'} = $MAXDISTANCE;
} else {
$this->{'p_value_cutoff'} = 1;
}
} else {
if ( not defined $this->{'3d_distance_cutoff'} ) {
$this->{'3d_distance_cutoff'} = $MAXDISTANCE;
}
}
unless( $this->{'data_dir'} ) { warn 'You must provide a output directory ! ', "\n"; die help_text(); }
unless( -d $this->{'data_dir'} ) { warn 'You must provide a valid data directory ! ', "\n"; die help_text(); }
unless( $this->{'maf'} and (-e $this->{'maf'}) ) { warn 'You must provide a MAF format file ! ', "\n"; die $this->help_text(); }
Expand Down Expand Up @@ -299,9 +316,10 @@ sub proximitySearching {
$uid2, $chain2, $pdbcor2, $offset2, $residue2, $domain2, $cosmic2,
$proximityinfor ) = @ta;
if ( $drugportref ) {
if ( $AA->filterWater( $residue1 ) and $AA->filterWater( $residue2 ) ) { next; }
unless ( $AA->filterWater( $residue1 ) and $AA->filterWater( $residue2 ) ) { next; }
} else {
if ( $AA->checkAA( $residue1 ) and $AA->checkAA( $residue2 ) ) { next; }
unless ( $AA->filterNonAA( $residue1 ) and $AA->filterNonAA( $residue2 ) ) {
print "bad AA pair: ".$residue1." - ".$residue2."\n"; next; }
}
my $uniprotcor1 = $pdbcor1 + $offset1;
my $uniprotcor2 = $pdbcor2 + $offset2;
Expand Down Expand Up @@ -524,7 +542,7 @@ Usage: hotspot3d search [options]
--missense-only missense mutation only, default: no
--p-value-cutoff p_value cutoff(<=), default: 0.05
--3d-distance-cutoff 3D distance cutoff (<=), default: 10
--linear-cutoff Linear distance cutoff (>= peptides), default: 20
--linear-cutoff Linear distance cutoff (>= peptides), default: 0
--transcript-id-header MAF file column header for transcript id's, default: transcript_name
--amino-acid-header MAF file column header for amino acid changes, default: amino_acid_change
Expand Down
2 changes: 1 addition & 1 deletion lib/TGI/Mutpro/Preprocess/AminoAcid.pm
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ sub minDistance {

sub filterWater {
my ( $this , $residue ) = @_;
if ( $residue eq "HOH" ) {
if ( $residue ne "HOH" ) {
return 1;
}
return 0;
Expand Down
7 changes: 4 additions & 3 deletions lib/TGI/Mutpro/Preprocess/Calpro.pm
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ sub process {
unless(defined $this->{'uniprot_id'}) { warn 'You must provide a Uniprot ID !', "\n"; die $this->help_text(); }
unless( $this->{'output_dir'} and (-e $this->{'output_dir'} ) ) { warn 'You must provide a output directory ! ', "\n"; die $this->help_text(); }
unless( $this->{'pdb_file_dir'} and (-e $this->{'pdb_file_dir'}) ) { warn 'You must provide a PDB file directory ! ', "\n"; die $this->help_text(); }
if ( $this->{'distance_measure'} eq $MINDISTANCE or $this->{'distance_measure'} eq $AVGDISTANCE ) {
if ( $this->{'distance_measure'} ne $MINDISTANCE and $this->{'distance_measure'} ne $AVGDISTANCE ) {
warn "HotSpot3D::Calpro warning: measure not recognized, resetting to default = averageDistance\n";
$this->{'distance_measure'} = $AVGDISTANCE;
}
Expand Down Expand Up @@ -323,9 +323,9 @@ sub writeProximityFile {
# is not close to the amino acid at '$residuePosition'
# of peptide chain '$uniprotChain'
$aaObjRef = $$peptideRef{$chain}->getAminoAcidObject($position);
if ( $this->{'distance_measure'} == $MINDISTANCE ) {
if ( $this->{'distance_measure'} eq $MINDISTANCE ) {
$distanceBetweenResidues = $$aaObjRef->minDistance($uniprotAminoAcidRef);
} elsif ( $this->{'distance_measure'} == $AVGDISTANCE ) {
} elsif ( $this->{'distance_measure'} eq $AVGDISTANCE ) {
$distanceBetweenResidues = $$aaObjRef->averageDistance($uniprotAminoAcidRef);
} else {
$distanceBetweenResidues = $$aaObjRef->averageDistance($uniprotAminoAcidRef);
Expand Down Expand Up @@ -456,6 +456,7 @@ sub checkOffsets {
$aminoAcidA = TGI::Mutpro::Preprocess::PdbStructure::convertAA( $aminoAcidA );
$aminoAcidB = TGI::Mutpro::Preprocess::PdbStructure::convertAA( $aminoAcidB );
next if ( !defined $aminoAcidA || !defined $aminoAcidB );
#next unless ( TGI::Mutpro::Preprocess::AminoAcid::checkAA( $aminoAcidA ) and TGI::Mutpro::Preprocess::AminoAcid::checkAA( $aminoAcidB )
if ( defined $pdbUniprotPosition{$pdbId}{$uniprotA}{$positionA+$offsetA} && $pdbUniprotPosition{$pdbId}{$uniprotA}{$positionA+$offsetA} ne $aminoAcidA ) {
print $coorfh "Inconsistent amino acids for $uniprotA position $positionA+$offsetA in $pdbId: '$pdbUniprotPosition{$pdbId}{$uniprotA}{$positionA+$offsetA}' and $aminoAcidA \n";
}
Expand Down
6 changes: 4 additions & 2 deletions lib/TGI/Mutpro/Preprocess/Peptide.pm
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,9 @@ sub addAminoAcid {
# ref to AminoAcid object
my $self = shift;
my ($position, $aaRef) = @_;
${$self->{AA}}{$position} = $aaRef;
if ( $aaRef->filterNonAA( $aaRef->name() ) ) {
${$self->{AA}}{$position} = $aaRef;
}
}

sub getAminoAcidObject {
Expand Down Expand Up @@ -106,5 +108,5 @@ sub aminoAcidPositionNumbers {
return \@positions;
}

return 1;
1;

0 comments on commit 74020ed

Please sign in to comment.