diff --git a/modules/Bio/EnsEMBL/VEP/Parser.pm b/modules/Bio/EnsEMBL/VEP/Parser.pm index 11ec106af..6f8a76af8 100755 --- a/modules/Bio/EnsEMBL/VEP/Parser.pm +++ b/modules/Bio/EnsEMBL/VEP/Parser.pm @@ -67,6 +67,7 @@ use Bio::EnsEMBL::Utils::Scalar qw(assert_ref); use Bio::EnsEMBL::Utils::Exception qw(throw warning); use Bio::EnsEMBL::VEP::Utils qw(get_compressed_filehandle); use Bio::EnsEMBL::Variation::Utils::Sequence qw(trim_sequences); +use Bio::EnsEMBL::Variation::Utils::VEP qw(&check_format); use Bio::EnsEMBL::VEP::Parser::VCF; use Bio::EnsEMBL::VEP::Parser::VEP_input; @@ -415,40 +416,7 @@ sub detect_format { my @data = split $delimiter, $_; next unless @data; - # region chr21:10-10:1/A - if ( $self->Bio::EnsEMBL::VEP::Parser::Region::validate_line(@data) ) { - $format = 'region'; - } - - # SPDI: NC_000016.10:68684738:G:A - elsif ($self->Bio::EnsEMBL::VEP::Parser::SPDI::validate_line(@data) ) { - $format = 'spdi'; - } - - # CAID: CA9985736 - elsif ( $self->Bio::EnsEMBL::VEP::Parser::CAID::validate_line(@data) ) { - $format = 'caid'; - } - - # HGVS: ENST00000285667.3:c.1047_1048insC - elsif ( $self->Bio::EnsEMBL::VEP::Parser::HGVS::validate_line(@data) ) { - $format = 'hgvs'; - } - - # variant identifier: rs123456 - elsif ( $self->Bio::EnsEMBL::VEP::Parser::ID::validate_line(@data) ) { - $format = 'id'; - } - - # VCF: 20 14370 rs6054257 G A 29 0 NS=58;DP=258;AF=0.786;DB;H2 GT:GQ:DP:HQ - elsif ( $self->Bio::EnsEMBL::VEP::Parser::VCF::validate_line(@data) ) { - $format = 'vcf'; - } - - # ensembl: 20 14370 14370 A/G + - elsif ( $self->Bio::EnsEMBL::VEP::Parser::VEP_input::validate_line(@data) ) { - $format = 'ensembl'; - } + $format = &check_format(@data); # reset file handle if it was a handle eval { diff --git a/modules/Bio/EnsEMBL/VEP/Parser/CAID.pm b/modules/Bio/EnsEMBL/VEP/Parser/CAID.pm index d709bbd4d..e051880a1 100755 --- a/modules/Bio/EnsEMBL/VEP/Parser/CAID.pm +++ b/modules/Bio/EnsEMBL/VEP/Parser/CAID.pm @@ -108,25 +108,6 @@ sub new { } -=head2 validate_line - - Example : $valid = $self->validate_line(); - Description: Check if input line can be read using this format. - Returntype : bool - Exceptions : none - Caller : $self->SUPER::detect_format() - Status : Stable - -=cut - -sub validate_line { - my $self = shift; - my @line = @_; - - return ( scalar @line == 1 && $line[0] =~ /^CA\d{1,}$/i ); -} - - =head2 parser Example : $io_parser = $parser->parser(); diff --git a/modules/Bio/EnsEMBL/VEP/Parser/HGVS.pm b/modules/Bio/EnsEMBL/VEP/Parser/HGVS.pm index 391bfc8ae..dedde26d7 100644 --- a/modules/Bio/EnsEMBL/VEP/Parser/HGVS.pm +++ b/modules/Bio/EnsEMBL/VEP/Parser/HGVS.pm @@ -109,28 +109,6 @@ sub new { } -=head2 validate_line - - Example : $valid = $self->validate_line(); - Description: Check if input line can be read using this format. - Returntype : bool - Exceptions : none - Caller : $self->SUPER::detect_format() - Status : Stable - -=cut - -sub validate_line { - my $self = shift; - my @line = @_; - - return ( - scalar @line == 1 && - $line[0] =~ /^([^\:]+)\:.*?([cgmrp]?)\.?([\*\-0-9]+.*)$/i - ); -} - - =head2 parser Example : $io_parser = $parser->parser(); diff --git a/modules/Bio/EnsEMBL/VEP/Parser/ID.pm b/modules/Bio/EnsEMBL/VEP/Parser/ID.pm index ebce17995..1f5b43ec6 100644 --- a/modules/Bio/EnsEMBL/VEP/Parser/ID.pm +++ b/modules/Bio/EnsEMBL/VEP/Parser/ID.pm @@ -105,24 +105,6 @@ sub new { } -=head2 validate_line - - Example : $valid = $self->validate_line(); - Description: Check if input line can be read using this format. - Returntype : bool - Exceptions : none - Caller : $self->SUPER::detect_format() - Status : Stable - -=cut - -sub validate_line { - my $self = shift; - my @line = @_; - - return ( scalar @line == 1 ); -} - =head2 parser Example : $io_parser = $parser->parser(); diff --git a/modules/Bio/EnsEMBL/VEP/Parser/Region.pm b/modules/Bio/EnsEMBL/VEP/Parser/Region.pm index a1af5d5e2..4540b14e6 100644 --- a/modules/Bio/EnsEMBL/VEP/Parser/Region.pm +++ b/modules/Bio/EnsEMBL/VEP/Parser/Region.pm @@ -86,6 +86,7 @@ use Bio::EnsEMBL::Utils::Scalar qw(assert_ref); use Bio::EnsEMBL::Utils::Exception qw(throw warning); use Bio::EnsEMBL::IO::ListBasedParser; +use Bio::EnsEMBL::Variation::Utils::VEP qw(&_valid_region_regex); =head2 new @@ -124,30 +125,6 @@ sub new { } -sub _valid_line_regex { - return qr/^([^:]+):(\d+)-(\d+)(:[-\+]?1)?[\/:]([a-z]{3,}|[ACGTN-]+)$/i; -} - - -=head2 validate_line - - Example : $valid = $self->validate_line(); - Description: Check if input line can be read using this format. - Returntype : bool - Exceptions : none - Caller : $self->SUPER::detect_format() - Status : Stable - -=cut - -sub validate_line { - my $self = shift; - my @line = @_; - - return ( scalar @line == 1 && $line[0] =~ _valid_line_regex() ); -} - - =head2 parser Example : $io_parser = $parser->parser(); @@ -191,7 +168,7 @@ sub create_VariationFeatures { my $region = $parser->get_value(); - return [] unless $region =~ _valid_line_regex(); + return [] unless $region =~ &_valid_region_regex(); my ($chr, $start, $end) = ($1, $2, $3); my ($strand, $allele); diff --git a/modules/Bio/EnsEMBL/VEP/Parser/SPDI.pm b/modules/Bio/EnsEMBL/VEP/Parser/SPDI.pm index 612ce9b61..a314f58fb 100644 --- a/modules/Bio/EnsEMBL/VEP/Parser/SPDI.pm +++ b/modules/Bio/EnsEMBL/VEP/Parser/SPDI.pm @@ -101,25 +101,6 @@ sub new { } -=head2 validate_line - - Example : $valid = $self->validate_line(); - Description: Check if input line can be read using this format. - Returntype : bool - Exceptions : none - Caller : $self->SUPER::detect_format() - Status : Stable - -=cut - -sub validate_line { - my $self = shift; - my @line = @_; - - return ( scalar @line == 1 && $line[0] =~ /^(.*?\:){2}([^\:]+|)$/i ); -} - - =head2 parser Example : $io_parser = $parser->parser(); diff --git a/modules/Bio/EnsEMBL/VEP/Parser/VCF.pm b/modules/Bio/EnsEMBL/VEP/Parser/VCF.pm index 8f605c4bd..8b80e5caa 100644 --- a/modules/Bio/EnsEMBL/VEP/Parser/VCF.pm +++ b/modules/Bio/EnsEMBL/VEP/Parser/VCF.pm @@ -108,30 +108,6 @@ sub new { } -=head2 validate_line - - Example : $valid = $self->validate_line(); - Description: Check if input line can be read using this format. - Returntype : bool - Exceptions : none - Caller : $self->SUPER::detect_format() - Status : Stable - -=cut - -sub validate_line { - my $self = shift; - my @line = @_; - - return ( - $line[0] =~ /(chr)?\w+/ && - $line[1] =~ /^\d+$/ && - $line[3] && $line[3] =~ /^[ACGTN\-\.]+$/i && - $line[4] - ); -} - - =head2 parser Example : $io_parser = $parser->parser(); diff --git a/modules/Bio/EnsEMBL/VEP/Parser/VEP_input.pm b/modules/Bio/EnsEMBL/VEP/Parser/VEP_input.pm index 2d66ecee1..708df0e95 100644 --- a/modules/Bio/EnsEMBL/VEP/Parser/VEP_input.pm +++ b/modules/Bio/EnsEMBL/VEP/Parser/VEP_input.pm @@ -71,30 +71,6 @@ use Bio::EnsEMBL::Utils::Exception qw(throw warning); use Bio::EnsEMBL::IO::Parser::VEP_input; -=head2 validate_line - - Example : $valid = $self->validate_line(); - Description: Check if input line can be read using this format. - Returntype : bool - Exceptions : none - Caller : $self->SUPER::detect_format() - Status : Stable - -=cut - -sub validate_line { - my $self = shift; - my @line = @_; - - return ( - $line[0] =~ /\w+/ && - $line[1] =~ /^\d+$/ && - $line[2] && $line[2] =~ /^\d+$/ && - $line[3] && $line[3] =~ /([a-z]{2,})|([ACGTN-]+\/[ACGTN-]+)/i - ); -} - - =head2 parser Example : $io_parser = $parser->parser();