Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix web VEP input format detection #1477

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 2 additions & 34 deletions modules/Bio/EnsEMBL/VEP/Parser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::VEP::Utils qw(get_compressed_filehandle);
use Bio::EnsEMBL::Variation::Utils::Sequence qw(trim_sequences);
use Bio::EnsEMBL::Variation::Utils::VEP qw(&check_format);

use Bio::EnsEMBL::VEP::Parser::VCF;
use Bio::EnsEMBL::VEP::Parser::VEP_input;
Expand Down Expand Up @@ -415,40 +416,7 @@ sub detect_format {
my @data = split $delimiter, $_;
next unless @data;

# region chr21:10-10:1/A
if ( $self->Bio::EnsEMBL::VEP::Parser::Region::validate_line(@data) ) {
$format = 'region';
}

# SPDI: NC_000016.10:68684738:G:A
elsif ($self->Bio::EnsEMBL::VEP::Parser::SPDI::validate_line(@data) ) {
$format = 'spdi';
}

# CAID: CA9985736
elsif ( $self->Bio::EnsEMBL::VEP::Parser::CAID::validate_line(@data) ) {
$format = 'caid';
}

# HGVS: ENST00000285667.3:c.1047_1048insC
elsif ( $self->Bio::EnsEMBL::VEP::Parser::HGVS::validate_line(@data) ) {
$format = 'hgvs';
}

# variant identifier: rs123456
elsif ( $self->Bio::EnsEMBL::VEP::Parser::ID::validate_line(@data) ) {
$format = 'id';
}

# VCF: 20 14370 rs6054257 G A 29 0 NS=58;DP=258;AF=0.786;DB;H2 GT:GQ:DP:HQ
elsif ( $self->Bio::EnsEMBL::VEP::Parser::VCF::validate_line(@data) ) {
$format = 'vcf';
}

# ensembl: 20 14370 14370 A/G +
elsif ( $self->Bio::EnsEMBL::VEP::Parser::VEP_input::validate_line(@data) ) {
$format = 'ensembl';
}
$format = &check_format(@data);

# reset file handle if it was a handle
eval {
Expand Down
19 changes: 0 additions & 19 deletions modules/Bio/EnsEMBL/VEP/Parser/CAID.pm
Original file line number Diff line number Diff line change
Expand Up @@ -108,25 +108,6 @@ sub new {
}


=head2 validate_line

Example : $valid = $self->validate_line();
Description: Check if input line can be read using this format.
Returntype : bool
Exceptions : none
Caller : $self->SUPER::detect_format()
Status : Stable

=cut

sub validate_line {
my $self = shift;
my @line = @_;

return ( scalar @line == 1 && $line[0] =~ /^CA\d{1,}$/i );
}


=head2 parser

Example : $io_parser = $parser->parser();
Expand Down
22 changes: 0 additions & 22 deletions modules/Bio/EnsEMBL/VEP/Parser/HGVS.pm
Original file line number Diff line number Diff line change
Expand Up @@ -109,28 +109,6 @@ sub new {
}


=head2 validate_line

Example : $valid = $self->validate_line();
Description: Check if input line can be read using this format.
Returntype : bool
Exceptions : none
Caller : $self->SUPER::detect_format()
Status : Stable

=cut

sub validate_line {
my $self = shift;
my @line = @_;

return (
scalar @line == 1 &&
$line[0] =~ /^([^\:]+)\:.*?([cgmrp]?)\.?([\*\-0-9]+.*)$/i
);
}


=head2 parser

Example : $io_parser = $parser->parser();
Expand Down
18 changes: 0 additions & 18 deletions modules/Bio/EnsEMBL/VEP/Parser/ID.pm
Original file line number Diff line number Diff line change
Expand Up @@ -105,24 +105,6 @@ sub new {
}


=head2 validate_line

Example : $valid = $self->validate_line();
Description: Check if input line can be read using this format.
Returntype : bool
Exceptions : none
Caller : $self->SUPER::detect_format()
Status : Stable

=cut

sub validate_line {
my $self = shift;
my @line = @_;

return ( scalar @line == 1 );
}

=head2 parser

Example : $io_parser = $parser->parser();
Expand Down
27 changes: 2 additions & 25 deletions modules/Bio/EnsEMBL/VEP/Parser/Region.pm
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ use Bio::EnsEMBL::Utils::Scalar qw(assert_ref);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::IO::ListBasedParser;

use Bio::EnsEMBL::Variation::Utils::VEP qw(&_valid_region_regex);

=head2 new

Expand Down Expand Up @@ -124,30 +125,6 @@ sub new {
}


sub _valid_line_regex {
return qr/^([^:]+):(\d+)-(\d+)(:[-\+]?1)?[\/:]([a-z]{3,}|[ACGTN-]+)$/i;
}


=head2 validate_line

Example : $valid = $self->validate_line();
Description: Check if input line can be read using this format.
Returntype : bool
Exceptions : none
Caller : $self->SUPER::detect_format()
Status : Stable

=cut

sub validate_line {
my $self = shift;
my @line = @_;

return ( scalar @line == 1 && $line[0] =~ _valid_line_regex() );
}


=head2 parser

Example : $io_parser = $parser->parser();
Expand Down Expand Up @@ -191,7 +168,7 @@ sub create_VariationFeatures {

my $region = $parser->get_value();

return [] unless $region =~ _valid_line_regex();
return [] unless $region =~ &_valid_region_regex();
my ($chr, $start, $end) = ($1, $2, $3);

my ($strand, $allele);
Expand Down
19 changes: 0 additions & 19 deletions modules/Bio/EnsEMBL/VEP/Parser/SPDI.pm
Original file line number Diff line number Diff line change
Expand Up @@ -101,25 +101,6 @@ sub new {
}


=head2 validate_line

Example : $valid = $self->validate_line();
Description: Check if input line can be read using this format.
Returntype : bool
Exceptions : none
Caller : $self->SUPER::detect_format()
Status : Stable

=cut

sub validate_line {
my $self = shift;
my @line = @_;

return ( scalar @line == 1 && $line[0] =~ /^(.*?\:){2}([^\:]+|)$/i );
}


=head2 parser

Example : $io_parser = $parser->parser();
Expand Down
24 changes: 0 additions & 24 deletions modules/Bio/EnsEMBL/VEP/Parser/VCF.pm
Original file line number Diff line number Diff line change
Expand Up @@ -108,30 +108,6 @@ sub new {
}


=head2 validate_line

Example : $valid = $self->validate_line();
Description: Check if input line can be read using this format.
Returntype : bool
Exceptions : none
Caller : $self->SUPER::detect_format()
Status : Stable

=cut

sub validate_line {
my $self = shift;
my @line = @_;

return (
$line[0] =~ /(chr)?\w+/ &&
$line[1] =~ /^\d+$/ &&
$line[3] && $line[3] =~ /^[ACGTN\-\.]+$/i &&
$line[4]
);
}


=head2 parser

Example : $io_parser = $parser->parser();
Expand Down
24 changes: 0 additions & 24 deletions modules/Bio/EnsEMBL/VEP/Parser/VEP_input.pm
Original file line number Diff line number Diff line change
Expand Up @@ -71,30 +71,6 @@ use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::IO::Parser::VEP_input;


=head2 validate_line

Example : $valid = $self->validate_line();
Description: Check if input line can be read using this format.
Returntype : bool
Exceptions : none
Caller : $self->SUPER::detect_format()
Status : Stable

=cut

sub validate_line {
my $self = shift;
my @line = @_;

return (
$line[0] =~ /\w+/ &&
$line[1] =~ /^\d+$/ &&
$line[2] && $line[2] =~ /^\d+$/ &&
$line[3] && $line[3] =~ /([a-z]{2,})|([ACGTN-]+\/[ACGTN-]+)/i
);
}


=head2 parser

Example : $io_parser = $parser->parser();
Expand Down