Skip to content

Commit

Permalink
Merge pull request #1477 from nuno-agostinho/fix/web-vep-input-format
Browse files Browse the repository at this point in the history
Fix web VEP input format detection
  • Loading branch information
nakib103 authored Sep 5, 2023
2 parents 611114e + 6382a27 commit 36e1aa6
Show file tree
Hide file tree
Showing 8 changed files with 4 additions and 185 deletions.
36 changes: 2 additions & 34 deletions modules/Bio/EnsEMBL/VEP/Parser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::VEP::Utils qw(get_compressed_filehandle);
use Bio::EnsEMBL::Variation::Utils::Sequence qw(trim_sequences);
use Bio::EnsEMBL::Variation::Utils::VEP qw(&check_format);

use Bio::EnsEMBL::VEP::Parser::VCF;
use Bio::EnsEMBL::VEP::Parser::VEP_input;
Expand Down Expand Up @@ -415,40 +416,7 @@ sub detect_format {
my @data = split $delimiter, $_;
next unless @data;

# region chr21:10-10:1/A
if ( $self->Bio::EnsEMBL::VEP::Parser::Region::validate_line(@data) ) {
$format = 'region';
}

# SPDI: NC_000016.10:68684738:G:A
elsif ($self->Bio::EnsEMBL::VEP::Parser::SPDI::validate_line(@data) ) {
$format = 'spdi';
}

# CAID: CA9985736
elsif ( $self->Bio::EnsEMBL::VEP::Parser::CAID::validate_line(@data) ) {
$format = 'caid';
}

# HGVS: ENST00000285667.3:c.1047_1048insC
elsif ( $self->Bio::EnsEMBL::VEP::Parser::HGVS::validate_line(@data) ) {
$format = 'hgvs';
}

# variant identifier: rs123456
elsif ( $self->Bio::EnsEMBL::VEP::Parser::ID::validate_line(@data) ) {
$format = 'id';
}

# VCF: 20 14370 rs6054257 G A 29 0 NS=58;DP=258;AF=0.786;DB;H2 GT:GQ:DP:HQ
elsif ( $self->Bio::EnsEMBL::VEP::Parser::VCF::validate_line(@data) ) {
$format = 'vcf';
}

# ensembl: 20 14370 14370 A/G +
elsif ( $self->Bio::EnsEMBL::VEP::Parser::VEP_input::validate_line(@data) ) {
$format = 'ensembl';
}
$format = &check_format(@data);

# reset file handle if it was a handle
eval {
Expand Down
19 changes: 0 additions & 19 deletions modules/Bio/EnsEMBL/VEP/Parser/CAID.pm
Original file line number Diff line number Diff line change
Expand Up @@ -108,25 +108,6 @@ sub new {
}


=head2 validate_line
Example : $valid = $self->validate_line();
Description: Check if input line can be read using this format.
Returntype : bool
Exceptions : none
Caller : $self->SUPER::detect_format()
Status : Stable
=cut

sub validate_line {
my $self = shift;
my @line = @_;

return ( scalar @line == 1 && $line[0] =~ /^CA\d{1,}$/i );
}


=head2 parser
Example : $io_parser = $parser->parser();
Expand Down
22 changes: 0 additions & 22 deletions modules/Bio/EnsEMBL/VEP/Parser/HGVS.pm
Original file line number Diff line number Diff line change
Expand Up @@ -109,28 +109,6 @@ sub new {
}


=head2 validate_line
Example : $valid = $self->validate_line();
Description: Check if input line can be read using this format.
Returntype : bool
Exceptions : none
Caller : $self->SUPER::detect_format()
Status : Stable
=cut

sub validate_line {
my $self = shift;
my @line = @_;

return (
scalar @line == 1 &&
$line[0] =~ /^([^\:]+)\:.*?([cgmrp]?)\.?([\*\-0-9]+.*)$/i
);
}


=head2 parser
Example : $io_parser = $parser->parser();
Expand Down
18 changes: 0 additions & 18 deletions modules/Bio/EnsEMBL/VEP/Parser/ID.pm
Original file line number Diff line number Diff line change
Expand Up @@ -105,24 +105,6 @@ sub new {
}


=head2 validate_line
Example : $valid = $self->validate_line();
Description: Check if input line can be read using this format.
Returntype : bool
Exceptions : none
Caller : $self->SUPER::detect_format()
Status : Stable
=cut

sub validate_line {
my $self = shift;
my @line = @_;

return ( scalar @line == 1 );
}

=head2 parser
Example : $io_parser = $parser->parser();
Expand Down
27 changes: 2 additions & 25 deletions modules/Bio/EnsEMBL/VEP/Parser/Region.pm
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::IO::ListBasedParser;

use Bio::EnsEMBL::Variation::Utils::VEP qw(&_valid_region_regex);

=head2 new
Expand Down Expand Up @@ -124,30 +125,6 @@ sub new {
}


sub _valid_line_regex {
return qr/^([^:]+):(\d+)-(\d+)(:[-\+]?1)?[\/:]([a-z]{3,}|[ACGTN-]+)$/i;
}


=head2 validate_line
Example : $valid = $self->validate_line();
Description: Check if input line can be read using this format.
Returntype : bool
Exceptions : none
Caller : $self->SUPER::detect_format()
Status : Stable
=cut

sub validate_line {
my $self = shift;
my @line = @_;

return ( scalar @line == 1 && $line[0] =~ _valid_line_regex() );
}


=head2 parser
Example : $io_parser = $parser->parser();
Expand Down Expand Up @@ -191,7 +168,7 @@ sub create_VariationFeatures {

my $region = $parser->get_value();

return [] unless $region =~ _valid_line_regex();
return [] unless $region =~ &_valid_region_regex();
my ($chr, $start, $end) = ($1, $2, $3);

my ($strand, $allele);
Expand Down
19 changes: 0 additions & 19 deletions modules/Bio/EnsEMBL/VEP/Parser/SPDI.pm
Original file line number Diff line number Diff line change
Expand Up @@ -101,25 +101,6 @@ sub new {
}


=head2 validate_line
Example : $valid = $self->validate_line();
Description: Check if input line can be read using this format.
Returntype : bool
Exceptions : none
Caller : $self->SUPER::detect_format()
Status : Stable
=cut

sub validate_line {
my $self = shift;
my @line = @_;

return ( scalar @line == 1 && $line[0] =~ /^(.*?\:){2}([^\:]+|)$/i );
}


=head2 parser
Example : $io_parser = $parser->parser();
Expand Down
24 changes: 0 additions & 24 deletions modules/Bio/EnsEMBL/VEP/Parser/VCF.pm
Original file line number Diff line number Diff line change
Expand Up @@ -108,30 +108,6 @@ sub new {
}


=head2 validate_line
Example : $valid = $self->validate_line();
Description: Check if input line can be read using this format.
Returntype : bool
Exceptions : none
Caller : $self->SUPER::detect_format()
Status : Stable
=cut

sub validate_line {
my $self = shift;
my @line = @_;

return (
$line[0] =~ /(chr)?\w+/ &&
$line[1] =~ /^\d+$/ &&
$line[3] && $line[3] =~ /^[ACGTN\-\.]+$/i &&
$line[4]
);
}


=head2 parser
Example : $io_parser = $parser->parser();
Expand Down
24 changes: 0 additions & 24 deletions modules/Bio/EnsEMBL/VEP/Parser/VEP_input.pm
Original file line number Diff line number Diff line change
Expand Up @@ -71,30 +71,6 @@ use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::IO::Parser::VEP_input;


=head2 validate_line
Example : $valid = $self->validate_line();
Description: Check if input line can be read using this format.
Returntype : bool
Exceptions : none
Caller : $self->SUPER::detect_format()
Status : Stable
=cut

sub validate_line {
my $self = shift;
my @line = @_;

return (
$line[0] =~ /\w+/ &&
$line[1] =~ /^\d+$/ &&
$line[2] && $line[2] =~ /^\d+$/ &&
$line[3] && $line[3] =~ /([a-z]{2,})|([ACGTN-]+\/[ACGTN-]+)/i
);
}


=head2 parser
Example : $io_parser = $parser->parser();
Expand Down

0 comments on commit 36e1aa6

Please sign in to comment.