Skip to content

Commit

Permalink
differentiate preliminary vs. high conf fusion preds
Browse files Browse the repository at this point in the history
  • Loading branch information
brianjohnhaas committed Apr 18, 2019
1 parent 18e4969 commit 1eeb568
Show file tree
Hide file tree
Showing 4 changed files with 329 additions and 5 deletions.
2 changes: 1 addition & 1 deletion GMAP-fusion
Submodule GMAP-fusion updated 1 files
+1 −1 Docker/Dockerfile
291 changes: 291 additions & 0 deletions PerlLib/DelimParser.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
#!/usr/bin/env perl

# classes for DelimParser::Reader and DelimParser::Writer

package DelimParser;
use strict;
use warnings;
use Carp;

####
sub new {
my ($packagename, $fh, $delimiter) = @_;

unless ($fh && $delimiter) {
confess "Error, need filehandle and delimiter params";
}


my $self = { _delim => $delimiter,
_fh => $fh,

# set below in _init()
_column_headers => [],
};


bless ($self, $packagename);

return($self);
}


####
sub get_fh {
my $self = shift;
return($self->{_fh});
}

####
sub get_delim {
my $self = shift;
return($self->{_delim});
}

####
sub get_column_headers {
my $self = shift;
return(@{$self->{_column_headers}});
}

####
sub set_column_headers {
my $self = shift;
my (@columns) = @_;

$self->{_column_headers} = \@columns;

return;
}

####
sub get_num_columns {
my $self = shift;
return(length($self->get_column_headers()));
}


###
sub reconstruct_header_line {
my $self = shift;
my @column_headers = $self->get_column_headers();

my $header_line = join("\t", @column_headers);
return($header_line);
}

###
sub reconstruct_line_from_row {
my $self = shift;
my $row_href = shift;
unless ($row_href && ref $row_href) {
confess "Error, must set row_href as param";
}

my @column_headers = $self->get_column_headers();

my @vals;
foreach my $col_header (@column_headers) {
my $val = $row_href->{$col_header};
push (@vals, $val);
}

my $row_text = join("\t", @vals);

return($row_text);

}


##################################################
package DelimParser::Reader;
use strict;
use warnings;
use Carp;
use Data::Dumper;

our @ISA;
push (@ISA, 'DelimParser');

sub new {
my ($packagename) = shift;
my $self = $packagename->DelimParser::new(@_);

$self->_init();

return($self);
}


####
sub _init {
my $self = shift;

my $fh = $self->get_fh();
my $delim = $self->get_delim();

my $header_row = <$fh>;
chomp $header_row;

unless ($header_row) {
confess "Error, no header row read.";
}

my @fields = split(/$delim/, $header_row);

$self->set_column_headers(@fields);


return;
}

####
sub get_row {
my $self = shift;

my $fh = $self->get_fh();
my $line = <$fh>;
unless ($line) {
return(undef); # eof
}

my $delim = $self->get_delim();
my @fields = split(/$delim/, $line);
chomp $fields[$#fields]; ## it's important that this is done after the delimiter splitting in case the last field is actually empty.


my @column_headers = $self->get_column_headers();

my $num_col = scalar (@column_headers);
my $num_fields = scalar(@fields);

if ($num_col != $num_fields) {
confess "Error, line: [$line] " . Dumper(\@fields) . " is lacking $num_col fields: " . Dumper(\@column_headers);
}

my %dict;
foreach my $colname (@column_headers) {
my $field = shift @fields;
$dict{$colname} = $field;
}

return(\%dict);
}


####
sub get_row_val {
my ($self, $row_href, $key) = @_;

if (! exists $row_href->{$key}) {
confess "Error, row: " . Dumper($row_href) > " doesn't include key: [$key]";
}

return($row_href->{$key});
}



##################################################

package DelimParser::Writer;
use strict;
use warnings;
use Carp;

our @ISA;
push (@ISA, 'DelimParser');

sub new {
my ($packagename) = shift;
my ($ofh, $delim, $column_fields_aref, $FLAGS) = @_;

## FLAGS can be:
# NO_WRITE_HEADER|...

unless (ref $column_fields_aref eq 'ARRAY') {
confess "Error, need constructor params: ofh, delim, column_fields_aref";
}

my $self = $packagename->DelimParser::new($ofh, $delim);

$self->_initialize($column_fields_aref, $FLAGS);

return($self);
}


####
sub _initialize {
my $self = shift;
my $column_fields_aref = shift;
my $FLAGS = shift;

unless (ref $column_fields_aref eq 'ARRAY') {
confess "Error, require column_fields_aref as param";
}


my $ofh = $self->get_fh();
my $delim = $self->get_delim();



$self->set_column_headers(@$column_fields_aref);

unless (defined($FLAGS) && $FLAGS =~ /NO_WRITE_HEADER/) {
my $output_line = join($delim, @$column_fields_aref);
print $ofh "$output_line\n";
}


return;
}


####
sub write_row {
my $self = shift;
my $dict_href = shift;

unless (ref $dict_href eq "HASH") {
confess "Error, need dict_href as param";
}

my $num_dict_fields = scalar(keys %$dict_href);

my @column_headers = $self->get_column_headers();


my $delim = $self->get_delim();

my @out_fields;
for my $column_header (@column_headers) {
my $field = $dict_href->{$column_header};
unless (defined $field) {
confess "Error, missing value for required column field: $column_header";
}
if ($field =~ /$delim/) {
# don't allow any delimiters to contaminate the field value, otherwise it'll introduce offsets.
$field =~ s/$delim/ /g;
}
# also avoid newlines, which will also break the output formatting.
if ($field =~ /\n/) {
$field =~ s/\n/ /g;
}

push (@out_fields, $field);
}

my $outline = join("\t", @out_fields);

my $ofh = $self->get_fh();

print $ofh "$outline\n";

return;
}


1; #EOM

10 changes: 6 additions & 4 deletions TrinityFusion
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use lib("$FindBin::Bin/PerlLib");
use Pipeliner;


my $__VERSION__ = "0.2.0";
my $__VERSION__ = "0.3.0";

my $help_flag;

Expand Down Expand Up @@ -40,7 +40,7 @@ my $MAX_FUZZY_OVERLAP = 12;

my $SHOW_VERSION = 0;


my $UTILDIR="$FindBin::Bin/util";

my $usage = <<__EOUSAGE;
Expand Down Expand Up @@ -267,8 +267,10 @@ main: {

$pipeliner->add_commands(new Command($cmd, "gmap-fusion.ok"));

$pipeliner->add_commands(new Command("ln -sf $gmap_outdir/GMAP-fusion.fusion_predictions.tsv TrinityFusion-${TrinityFusionMode}.fusion_predictions.tsv",
"sym_final.ok"));
$pipeliner->add_commands(new Command("cp $gmap_outdir/GMAP-fusion.fusion_predictions.tsv TrinityFusion-${TrinityFusionMode}.fusion_predictions.preliminary.tsv",
"define_prelim.ok"));

$pipeliner->add_commands(new Command("$UTILDIR/extract_ref_spliced_fusions.pl TrinityFusion-${TrinityFusionMode}.fusion_predictions.preliminary.tsv > TrinityFusion-${TrinityFusionMode}.fusion_predictions.tsv", "write_final.ok"));

$pipeliner->run();

Expand Down
31 changes: 31 additions & 0 deletions util/extract_ref_spliced_fusions.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env perl

use strict;
use warnings;
use FindBin;
use lib "$FindBin::Bin/../PerlLib";
use DelimParser;

my $usage = "\n\tusage: $0 fusions.preliminary > fusions.final\n\n";

my $prelim_fusions_file = $ARGV[0] or die $usage;


main: {
open(my $fh, $prelim_fusions_file) or die "Error, cannot open file: $prelim_fusions_file";
my $delim_reader = new DelimParser::Reader($fh, "\t");
my @column_headers = $delim_reader->get_column_headers();

my $delim_writer = new DelimParser::Writer(*STDOUT, "\t", \@column_headers);

while (my $row = $delim_reader->get_row()) {
if ($delim_reader->get_row_val($row, "SpliceType") eq "ONLY_REF_SPLICE") {
$delim_writer->write_row($row);
}
}

exit(0);
}



0 comments on commit 1eeb568

Please sign in to comment.