-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
differentiate preliminary vs. high conf fusion preds
- Loading branch information
1 parent
18e4969
commit 1eeb568
Showing
4 changed files
with
329 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,291 @@ | ||
#!/usr/bin/env perl | ||
|
||
# classes for DelimParser::Reader and DelimParser::Writer | ||
|
||
package DelimParser; | ||
use strict; | ||
use warnings; | ||
use Carp; | ||
|
||
#### | ||
sub new { | ||
my ($packagename, $fh, $delimiter) = @_; | ||
|
||
unless ($fh && $delimiter) { | ||
confess "Error, need filehandle and delimiter params"; | ||
} | ||
|
||
|
||
my $self = { _delim => $delimiter, | ||
_fh => $fh, | ||
|
||
# set below in _init() | ||
_column_headers => [], | ||
}; | ||
|
||
|
||
bless ($self, $packagename); | ||
|
||
return($self); | ||
} | ||
|
||
|
||
#### | ||
sub get_fh { | ||
my $self = shift; | ||
return($self->{_fh}); | ||
} | ||
|
||
#### | ||
sub get_delim { | ||
my $self = shift; | ||
return($self->{_delim}); | ||
} | ||
|
||
#### | ||
sub get_column_headers { | ||
my $self = shift; | ||
return(@{$self->{_column_headers}}); | ||
} | ||
|
||
#### | ||
sub set_column_headers { | ||
my $self = shift; | ||
my (@columns) = @_; | ||
|
||
$self->{_column_headers} = \@columns; | ||
|
||
return; | ||
} | ||
|
||
#### | ||
sub get_num_columns { | ||
my $self = shift; | ||
return(length($self->get_column_headers())); | ||
} | ||
|
||
|
||
### | ||
sub reconstruct_header_line { | ||
my $self = shift; | ||
my @column_headers = $self->get_column_headers(); | ||
|
||
my $header_line = join("\t", @column_headers); | ||
return($header_line); | ||
} | ||
|
||
### | ||
sub reconstruct_line_from_row { | ||
my $self = shift; | ||
my $row_href = shift; | ||
unless ($row_href && ref $row_href) { | ||
confess "Error, must set row_href as param"; | ||
} | ||
|
||
my @column_headers = $self->get_column_headers(); | ||
|
||
my @vals; | ||
foreach my $col_header (@column_headers) { | ||
my $val = $row_href->{$col_header}; | ||
push (@vals, $val); | ||
} | ||
|
||
my $row_text = join("\t", @vals); | ||
|
||
return($row_text); | ||
|
||
} | ||
|
||
|
||
################################################## | ||
package DelimParser::Reader; | ||
use strict; | ||
use warnings; | ||
use Carp; | ||
use Data::Dumper; | ||
|
||
our @ISA; | ||
push (@ISA, 'DelimParser'); | ||
|
||
sub new { | ||
my ($packagename) = shift; | ||
my $self = $packagename->DelimParser::new(@_); | ||
|
||
$self->_init(); | ||
|
||
return($self); | ||
} | ||
|
||
|
||
#### | ||
sub _init { | ||
my $self = shift; | ||
|
||
my $fh = $self->get_fh(); | ||
my $delim = $self->get_delim(); | ||
|
||
my $header_row = <$fh>; | ||
chomp $header_row; | ||
|
||
unless ($header_row) { | ||
confess "Error, no header row read."; | ||
} | ||
|
||
my @fields = split(/$delim/, $header_row); | ||
|
||
$self->set_column_headers(@fields); | ||
|
||
|
||
return; | ||
} | ||
|
||
#### | ||
sub get_row { | ||
my $self = shift; | ||
|
||
my $fh = $self->get_fh(); | ||
my $line = <$fh>; | ||
unless ($line) { | ||
return(undef); # eof | ||
} | ||
|
||
my $delim = $self->get_delim(); | ||
my @fields = split(/$delim/, $line); | ||
chomp $fields[$#fields]; ## it's important that this is done after the delimiter splitting in case the last field is actually empty. | ||
|
||
|
||
my @column_headers = $self->get_column_headers(); | ||
|
||
my $num_col = scalar (@column_headers); | ||
my $num_fields = scalar(@fields); | ||
|
||
if ($num_col != $num_fields) { | ||
confess "Error, line: [$line] " . Dumper(\@fields) . " is lacking $num_col fields: " . Dumper(\@column_headers); | ||
} | ||
|
||
my %dict; | ||
foreach my $colname (@column_headers) { | ||
my $field = shift @fields; | ||
$dict{$colname} = $field; | ||
} | ||
|
||
return(\%dict); | ||
} | ||
|
||
|
||
#### | ||
sub get_row_val { | ||
my ($self, $row_href, $key) = @_; | ||
|
||
if (! exists $row_href->{$key}) { | ||
confess "Error, row: " . Dumper($row_href) > " doesn't include key: [$key]"; | ||
} | ||
|
||
return($row_href->{$key}); | ||
} | ||
|
||
|
||
|
||
################################################## | ||
|
||
package DelimParser::Writer; | ||
use strict; | ||
use warnings; | ||
use Carp; | ||
|
||
our @ISA; | ||
push (@ISA, 'DelimParser'); | ||
|
||
sub new { | ||
my ($packagename) = shift; | ||
my ($ofh, $delim, $column_fields_aref, $FLAGS) = @_; | ||
|
||
## FLAGS can be: | ||
# NO_WRITE_HEADER|... | ||
|
||
unless (ref $column_fields_aref eq 'ARRAY') { | ||
confess "Error, need constructor params: ofh, delim, column_fields_aref"; | ||
} | ||
|
||
my $self = $packagename->DelimParser::new($ofh, $delim); | ||
|
||
$self->_initialize($column_fields_aref, $FLAGS); | ||
|
||
return($self); | ||
} | ||
|
||
|
||
#### | ||
sub _initialize { | ||
my $self = shift; | ||
my $column_fields_aref = shift; | ||
my $FLAGS = shift; | ||
|
||
unless (ref $column_fields_aref eq 'ARRAY') { | ||
confess "Error, require column_fields_aref as param"; | ||
} | ||
|
||
|
||
my $ofh = $self->get_fh(); | ||
my $delim = $self->get_delim(); | ||
|
||
|
||
|
||
$self->set_column_headers(@$column_fields_aref); | ||
|
||
unless (defined($FLAGS) && $FLAGS =~ /NO_WRITE_HEADER/) { | ||
my $output_line = join($delim, @$column_fields_aref); | ||
print $ofh "$output_line\n"; | ||
} | ||
|
||
|
||
return; | ||
} | ||
|
||
|
||
#### | ||
sub write_row { | ||
my $self = shift; | ||
my $dict_href = shift; | ||
|
||
unless (ref $dict_href eq "HASH") { | ||
confess "Error, need dict_href as param"; | ||
} | ||
|
||
my $num_dict_fields = scalar(keys %$dict_href); | ||
|
||
my @column_headers = $self->get_column_headers(); | ||
|
||
|
||
my $delim = $self->get_delim(); | ||
|
||
my @out_fields; | ||
for my $column_header (@column_headers) { | ||
my $field = $dict_href->{$column_header}; | ||
unless (defined $field) { | ||
confess "Error, missing value for required column field: $column_header"; | ||
} | ||
if ($field =~ /$delim/) { | ||
# don't allow any delimiters to contaminate the field value, otherwise it'll introduce offsets. | ||
$field =~ s/$delim/ /g; | ||
} | ||
# also avoid newlines, which will also break the output formatting. | ||
if ($field =~ /\n/) { | ||
$field =~ s/\n/ /g; | ||
} | ||
|
||
push (@out_fields, $field); | ||
} | ||
|
||
my $outline = join("\t", @out_fields); | ||
|
||
my $ofh = $self->get_fh(); | ||
|
||
print $ofh "$outline\n"; | ||
|
||
return; | ||
} | ||
|
||
|
||
1; #EOM | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#!/usr/bin/env perl | ||
|
||
use strict; | ||
use warnings; | ||
use FindBin; | ||
use lib "$FindBin::Bin/../PerlLib"; | ||
use DelimParser; | ||
|
||
my $usage = "\n\tusage: $0 fusions.preliminary > fusions.final\n\n"; | ||
|
||
my $prelim_fusions_file = $ARGV[0] or die $usage; | ||
|
||
|
||
main: { | ||
open(my $fh, $prelim_fusions_file) or die "Error, cannot open file: $prelim_fusions_file"; | ||
my $delim_reader = new DelimParser::Reader($fh, "\t"); | ||
my @column_headers = $delim_reader->get_column_headers(); | ||
|
||
my $delim_writer = new DelimParser::Writer(*STDOUT, "\t", \@column_headers); | ||
|
||
while (my $row = $delim_reader->get_row()) { | ||
if ($delim_reader->get_row_val($row, "SpliceType") eq "ONLY_REF_SPLICE") { | ||
$delim_writer->write_row($row); | ||
} | ||
} | ||
|
||
exit(0); | ||
} | ||
|
||
|
||
|