Skip to content

Commit

Permalink
Output junction information in simple report.
Browse files Browse the repository at this point in the history
  • Loading branch information
mourisl committed Oct 23, 2019
1 parent ed79bf0 commit b21f984
Showing 1 changed file with 127 additions and 6 deletions.
133 changes: 127 additions & 6 deletions trust-simplerep.pl
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,13 @@ sub GetChainType

my $i ;
my $annotFile = "" ;
my $reportJunctionInfo = 0 ;
for ( $i = 1 ; $i < @ARGV ; ++$i )
{
if ( $ARGV[$i] == "--junction" )
if ( $ARGV[$i] eq "--junction" )
{
$annotFile = $ARGV[$i + 1] ;
$reportJunctionInfo = 1 ;
++$i ;
}
else
Expand All @@ -109,34 +111,141 @@ sub GetChainType
}
}

my %junctionInfo ; # V-end, V-del, VD(J)-Ins, D-Ldel, D-start, D-end, D-Rdel, DJ-Ins, J-del, J-start
my %junctionInfo ; # key: assembly id. Values: V-end, V-del, VD(J)-Ins, D-Ldel, D-start, D-end, D-Rdel, DJ-Ins, J-del, J-start

if ( $reportJunctionInfo == 1 )
{
open FP1, $annotFile ;
while ( <FP1> )
{
next if ( !/^>/ ) ;
my @cols = split /\s/, $_ ;

my @vCoord ;
my @dCoord ;
my @jCoord ;
my @cCoord ;
my @cdr3Coord ;

if ( $cols[3] =~ /\(([0-9]+?)\):\(([0-9]+?)-([0-9]+?)\):\(([0-9]+?)-([0-9]+?)\)/ )
{
@vCoord = ($1, $2, $3, $4, $5) ;
}
else
{
#die "Wrong format $header\n" ;
next ;
}
if ( $cols[4] =~ /\(([0-9]+?)\):\(([0-9]+?)-([0-9]+?)\):\(([0-9]+?)-([0-9]+?)\)/ )
{
@dCoord = ($1, $2, $3, $4, $5) ;
}
else
{
@dCoord = (-1, -1, -1, -1, -1) ;
}
if ( $cols[5] =~ /\(([0-9]+?)\):\(([0-9]+?)-([0-9]+?)\):\(([0-9]+?)-([0-9]+?)\)/ )
{
@jCoord = ($1, $2, $3, $4, $5) ;
}
else
{
#die "Wrong format $header\n" ;
next ;
}
if ( $cols[6] =~ /\(([0-9]+?)\):\(([0-9]+?)-([0-9]+?)\):\(([0-9]+?)-([0-9]+?)\)/ )
{
@cCoord = ($1, $2, $3, $4, $5) ;
}
else
{
#die "Wrong format $header\n" ;
next ;
}
next if ( $vCoord[2] >= $jCoord[1] ) ;
next if ( $cols[9] =~ /:0\.00/ ) ;
if ( $cols[9] =~ /CDR3\(([0-9]+?)-([0-9]+?)\)/ )
{
@cdr3Coord = ($1, $2) ;
}
else
{
next ;
}

next if ( $vCoord[2] < $cdr3Coord[0] || $jCoord[1] > $cdr3Coord[1] ) ;

my $chainType = substr( $cols[3], 0, 3 ) ;
my @info ;
# v end, vdelete
push @info, $vCoord[2] - $cdr3Coord[0], $vCoord[0] - $vCoord[4] - 1 ;

# v insert, d-left-delete, d-start, d-end, d-right-delete, j insert
if ( $chainType eq "IGH" || $chainType eq "TRB" || $chainType eq "TRD" )
{
if ( $dCoord[0] == -1 || $dCoord[1] <= $vCoord[2] || $dCoord[2] >= $jCoord[1])
{
push @info, "*", "*", "*", "*", "*", "*" ;
}
else
{
push @info, $dCoord[1] - $vCoord[2] - 1, $dCoord[3],
$dCoord[1] - $cdr3Coord[0], $dCoord[2] - $cdr3Coord[0],
$dCoord[0] - $dCoord[4] - 1,
$jCoord[1] - $dCoord[2] - 1 ;
}
}
else
{
# already make sure J gene is after V gene in this case.
push @info, $jCoord[1] - $vCoord[2] - 1, "*", "*", "*", "*", "*" ;
}

# j-left-delete, j-start
push @info, $jCoord[3], $jCoord[1] - $cdr3Coord[0] ;

#print(substr($cols[0], 1), ":", join(",", @info), "\n") ;
@{$junctionInfo{ substr( $cols[0], 1 )}} = @info ;
}
close FP1 ;
}
# read in the input
open FP1, $ARGV[0] ;
my @totalCnt = (0, 0, 0) ;
my %cdr3AssemblyId ; # Record which assembly is representative for the cdr3
while ( <FP1> )
{
chomp ;
my @cols = split ;
my $key = join( "\t", ( $cols[2], $cols[3], $cols[4], $cols[5], $cols[8] ) ) ;
my $assemblyId = $cols[0] ;
if ( defined $cdr3{ $key } )
{
my $val = \@{ $cdr3{ $key } } ;
$val->[0] = $cols[9] if ( $cols[9] > $val->[0] ) ;
if ( $cols[9] > $val->[0] )
{
$val->[0] = $cols[9];
$val->[2] = $assemblyId ;
}
$val->[1] += $cols[10] ;
}
else
{
@{ $cdr3{ $key } } = ( $cols[9], $cols[10] ) ;
@{ $cdr3{ $key } } = ( $cols[9], $cols[10], $assemblyId ) ;
}
my $type = GetChainType( $cols[2], $cols[4], $cols[5] ) ;
$totalCnt[ $type ] += $cols[10] if ( $type != -1 ) ;
}
close FP1 ;

# Output what we collected.
print( "#count\tfrequency\tCDR3nt\tCDR3aa\tV\tD\tJ\tC\n" ) ;
print( "#count\tfrequency\tCDR3nt\tCDR3aa\tV\tD\tJ\tC" ) ;
if ( $reportJunctionInfo == 1 )
{
print( "\tjunction" )
}
print( "\n" ) ;

foreach my $key ( sort { $cdr3{$b}[1] <=> $cdr3{$a}[1] } keys %cdr3 )
{
my @val = @{ $cdr3{ $key } } ;
Expand Down Expand Up @@ -172,5 +281,17 @@ sub GetChainType
my $freq = 0 ;
my $type = GetChainType( $info[0], $info[2], $info[3] ) ;
$freq = $val[1] / $totalCnt[ $type ] if ( $type != -1 ) ;
printf( "%.2f\t%e\t%s\t%s\t%s\t%s\t%s\t%s\n", $val[1], $freq, $info[4], $aa, $info[0], $info[1], $info[2], $info[3] ) ;
printf( "%.2f\t%e\t%s\t%s\t%s\t%s\t%s\t%s", $val[1], $freq, $info[4], $aa, $info[0], $info[1], $info[2], $info[3] ) ;
if ( $reportJunctionInfo == 1 )
{
if ( defined $junctionInfo{$val[2]} )
{
print( "\t", join( ",", @{$junctionInfo{$val[2]}}) ) ;
}
else
{
print( "\t*" ) ;
}
}
print( "\n" ) ;
}

0 comments on commit b21f984

Please sign in to comment.