Skip to content

Commit

Permalink
Support barcode count in simplerep
Browse files Browse the repository at this point in the history
  • Loading branch information
mourisl committed Jan 21, 2020
1 parent e7b7123 commit ff50e8f
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 1 deletion.
4 changes: 3 additions & 1 deletion run-trust4
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ my $vdjcRef = "" ;
my $bamExtractorArgs = "" ;
my $mainArgs = "" ;
my $annotatorArgs = "" ;
my $simpleRepArgs = "" ;
my $threadCnt = 1 ;
my $stage = 0 ;
my $noExtraction = 0 ;
Expand Down Expand Up @@ -129,6 +130,7 @@ for ( $i = 0 ; $i < @ARGV ; ++$i )
{
$hasBarcode = 1 ;
$bamExtractorArgs .= " --barcode ".$ARGV[$i + 1] ;
$simpleRepArgs .= " --barcodeCnt --noPartial" ;
++$i ;
}
elsif ( $ARGV[$i] eq "--stage" )
Expand Down Expand Up @@ -263,7 +265,7 @@ if ( $stage <= 2 )
# Generate the report table
if ( $stage <= 3 )
{
system_call( "perl $WD/trust-simplerep.pl ${prefix}_cdr3.out > ${prefix}_report.tsv" ) ;
system_call( "perl $WD/trust-simplerep.pl ${prefix}_cdr3.out $simpleRepArgs > ${prefix}_report.tsv" ) ;
}

print STDERR "[".localtime()."] TRUST4 finishes.\n" ;
Expand Down
31 changes: 31 additions & 0 deletions trust-simplerep.pl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
die "Usage: ./trust-simplerep.pl xxx_cdr3.out [OPTIONS] > trust_report.out\n".
"OPTIONS:\n".
"\t--decimalCnt: the count column uses decimal instead of truncated integer. (default: not used)\n".
"\t--barcodeCnt: the count column is the number of barcode instead of read support. (default: not used)\n".
"\t--junction trust_annot.fa: output junction information for the CDR3 (default: not used)\n".
"\t--noPartial: do not including partial CDR3 in report. (default: include partial)\n".
"\t--filterTcrError FLOAT: filter TCR CDR3s less than the fraction of representative CDR3 in the consensus. (default: 0.05)\n"
if ( @ARGV == 0 ) ;

Expand Down Expand Up @@ -176,6 +178,8 @@ sub InferConstantGene
my $reportJunctionInfo = 0 ;
my $tcrErrorFilter = 0.05 ;
my $roundDownCount = 1 ;
my $useBarcodeCnt = 0 ;
my $reportPartial = 1 ;
for ( $i = 1 ; $i < @ARGV ; ++$i )
{
if ( $ARGV[$i] eq "--junction" )
Expand All @@ -193,6 +197,14 @@ sub InferConstantGene
{
$roundDownCount = 0 ;
}
elsif ( $ARGV[$i] eq "--barcodeCnt" )
{
$useBarcodeCnt = 1 ;
}
elsif ( $ARGV[$i] eq "--noPartial" )
{
$reportPartial = 0 ;
}
else
{
die "Unknown option ", $ARGV[$i], "\n" ;
Expand Down Expand Up @@ -312,6 +324,7 @@ sub InferConstantGene
open FP1, $ARGV[0] ;
my @totalCnt = (0, 0, 0) ;
my %cdr3AssemblyId ; # Record which assembly is representative for the cdr3
my %cdr3Barcode ; # record whether a "chain_cdr3_barcode" has showed up before or not
while ( <FP1> )
{
chomp ;
Expand All @@ -326,6 +339,8 @@ sub InferConstantGene
my $key = join( "\t", ( $vgene, $dgene, $jgene, $cgene, $cols[8] ) ) ;
my $type = GetChainType( $vgene, $jgene, $cgene ) ;

next if ( $reportPartial == 0 && $cols[9] == 0 ) ;

if ($type == 2) # TCR
{
if ($cols[10] < $assemblyMostReads{$assemblyId} * $tcrErrorFilter )
Expand All @@ -334,6 +349,22 @@ sub InferConstantGene
}
}

if ( $useBarcodeCnt )
{
my @cols2 = split/_/, $assemblyId ;
my $barcode = join( "_", @cols2[0..scalar(@cols2)-2] ) ;
my $tmp = $type."_".$cols[8]."_".$barcode ;
if ( defined $cdr3Barcode{$tmp} )
{
next ;
}
else
{
$cdr3Barcode{$tmp} = 1 ;
$cols[10] = 1 ;
}
}

if ( defined $cdr3{ $key } )
{
my $val = \@{ $cdr3{ $key } } ;
Expand Down

0 comments on commit ff50e8f

Please sign in to comment.