diff --git a/.travis.yml b/.travis.yml
index a68d3ec5c..33c2fa8e3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,69 +1,81 @@
 language: perl
-perl:
-  - "5.14"
-  - "5.26.2"
-services:
-  - mysql
-env:
-  - COVERALLS=true  DB=mysql
-addons:
-  apt:
-    update: true      
-    packages:
-      - unzip
-      - sendmail
-      - graphviz
-      - emboss
-before_install:
-  - git clone --depth 1 https://github.com/Ensembl/ensembl-git-tools.git
-  - export PATH=$PATH:$PWD/ensembl-git-tools/bin
-  - export ENSEMBL_BRANCH=master
-  - export SECONDARY_BRANCH=main
-  - echo "TRAVIS_BRANCH=$TRAVIS_BRANCH"
-  - if [[ $TRAVIS_BRANCH =~ ^release\/[0-9]+$ ]]; then export ENSEMBL_BRANCH=$TRAVIS_BRANCH; export SECONDARY_BRANCH=$TRAVIS_BRANCH; fi
-  - echo "ENSEMBL_BRANCH=$ENSEMBL_BRANCH"
-  - echo "SECONDARY_BRANCH=$SECONDARY_BRANCH"
-  - git-ensembl --clone --branch $ENSEMBL_BRANCH --secondary_branch $SECONDARY_BRANCH --depth 1 ensembl-test
-  - git-ensembl --clone --branch $ENSEMBL_BRANCH --secondary_branch $SECONDARY_BRANCH --depth 1 ensembl
-  - git-ensembl --clone --branch $ENSEMBL_BRANCH --secondary_branch $SECONDARY_BRANCH --depth 1 ensembl-compara
-  - git-ensembl --clone --branch $ENSEMBL_BRANCH --secondary_branch $SECONDARY_BRANCH --depth 1 ensembl-datacheck
-  - git-ensembl --clone --branch $ENSEMBL_BRANCH --secondary_branch $SECONDARY_BRANCH --depth 1 ensembl-variation
-  - git-ensembl --clone --branch $ENSEMBL_BRANCH --secondary_branch $SECONDARY_BRANCH --depth 1 ensembl-metadata
-  - git-ensembl --clone --branch $ENSEMBL_BRANCH --secondary_branch $SECONDARY_BRANCH --depth 1 ensembl-funcgen
-  - git-ensembl --clone --branch master --secondary_branch main --depth 1 ensembl-hive
-  - git-ensembl --clone --branch master --secondary_branch main --depth 1 ensembl-orm
-  - git-ensembl --clone --branch master --secondary_branch main --depth 1 ensembl-taxonomy
-  - git clone --branch 1.9 --depth 1 https://github.com/samtools/htslib.git
-  - git clone --branch release-1-6-924 --depth 1 https://github.com/bioperl/bioperl-live.git
-  - cd htslib
-  - make
-  - export HTSLIB_DIR=$(pwd -P)
-  - cd ..
-install:
-  - cpanm --sudo -v --installdeps --with-recommends --notest --cpanfile ensembl/cpanfile .
-  - cpanm --sudo -v --installdeps --notest --cpanfile ensembl-hive/cpanfile .
-  - cpanm --sudo -v --installdeps --notest --cpanfile ensembl-datacheck/cpanfile .
-  - export PERL5LIB=$PERL5LIB:$PWD/bioperl-live
-  - cpanm --sudo -v --installdeps --notest .
-  - cpanm --sudo -n Devel::Cover::Report::Coveralls
-  - cp travisci/MultiTestDB.conf.travisci  modules/t/MultiTestDB.conf
-  - mysql -u root -h localhost -e 'GRANT ALL PRIVILEGES ON *.* TO "travis"@"%"'
-script:
-  - ./travisci/harness.sh
+os: linux
 jobs:
   include:
-    - language: python
-      python: 3.8
+    - name: "Perl Job"
+      perl: "5.26.2"
+      services:
+        - mysql
+      env:
+        - COVERALLS=true  DB=mysql
+      addons:
+        apt:
+          update: true
+          packages:
+            - unzip
+            - sendmail
+            - graphviz
+            - emboss
+            - libkyotocabinet-dev
+      before_install:
+        - git clone --depth 1 https://github.com/Ensembl/ensembl-git-tools.git
+        - export PATH=$PATH:$PWD/ensembl-git-tools/bin
+        - export ENSEMBL_BRANCH=master
+        - export SECONDARY_BRANCH=main
+        - echo "TRAVIS_BRANCH=$TRAVIS_BRANCH"
+        - if [[ $TRAVIS_BRANCH =~ ^release\/[0-9]+$ ]]; then export ENSEMBL_BRANCH=$TRAVIS_BRANCH; export SECONDARY_BRANCH=$TRAVIS_BRANCH; fi
+        - echo "ENSEMBL_BRANCH=$ENSEMBL_BRANCH"
+        - echo "SECONDARY_BRANCH=$SECONDARY_BRANCH"
+        - git-ensembl --clone --branch $ENSEMBL_BRANCH --secondary_branch $SECONDARY_BRANCH --depth 1 ensembl-test
+        - git-ensembl --clone --branch $ENSEMBL_BRANCH --secondary_branch $SECONDARY_BRANCH --depth 1 ensembl
+        - git-ensembl --clone --branch $ENSEMBL_BRANCH --secondary_branch $SECONDARY_BRANCH --depth 1 ensembl-compara
+        - git-ensembl --clone --branch $ENSEMBL_BRANCH --secondary_branch $SECONDARY_BRANCH --depth 1 ensembl-datacheck
+        - git-ensembl --clone --branch $ENSEMBL_BRANCH --secondary_branch $SECONDARY_BRANCH --depth 1 ensembl-variation
+        - git-ensembl --clone --branch $ENSEMBL_BRANCH --secondary_branch $SECONDARY_BRANCH --depth 1 ensembl-metadata
+        - git-ensembl --clone --branch $ENSEMBL_BRANCH --secondary_branch $SECONDARY_BRANCH --depth 1 ensembl-funcgen
+        - git-ensembl --clone --branch master --secondary_branch main --depth 1 ensembl-hive
+        - git-ensembl --clone --branch master --secondary_branch main --depth 1 ensembl-orm
+        - git-ensembl --clone --branch master --secondary_branch main --depth 1 ensembl-taxonomy
+        - git clone --branch 1.9 --depth 1 https://github.com/samtools/htslib.git
+        - git clone --branch release-1-6-924 --depth 1 https://github.com/bioperl/bioperl-live.git
+        - cd htslib
+        - make
+        - export HTSLIB_DIR=$(pwd -P)
+        - mysql -e "SET GLOBAL local_infile=1;"
+        - cd ..
+      install:
+        - cpanm --sudo -v --installdeps --with-recommends --notest --cpanfile ensembl/cpanfile .
+        - cpanm --sudo -v --installdeps --notest --cpanfile ensembl-hive/cpanfile .
+        - cpanm --sudo -v --installdeps --notest --cpanfile ensembl-datacheck/cpanfile .
+        - export PERL5LIB=$PERL5LIB:$PWD/bioperl-live
+        - cpanm travisci/kyotocabinet-perl-1.20.tar.gz
+        - cpanm --sudo -v --installdeps --notest .
+        - cpanm --sudo -n Devel::Cover::Report::Coveralls
+        - cp travisci/MultiTestDB.conf.travisci  modules/t/MultiTestDB.conf
+        - mysql -u root -h localhost -e 'GRANT ALL PRIVILEGES ON *.* TO "travis"@"%"'
+      script:
+        - ./travisci/harness.sh
+
+    - name: "Python Job"
+      language: python
+      python:
+        - "3.10"
+        - "3.11"
+      services:
+        - mysql
+      env:
+        - COVERALLS=true  DB=mysql
       install:
-        - pip install -e .
         - pip install -r requirements-test.txt
+        - pip install -e .
+      before_script:
+        - mysql -e "SET GLOBAL local_infile=1;"
       script:
         - pytest src/python/test
 
-
 notifications:
   email:
     on_success: always
     on_failure: always
   slack:
-    secure: BkrSPAkOM5aTOpeyO9vZnHdZ0LF1PLk0r2HtcXN2eTMyiHoGXkl6VUjdAL8EkzI4gunW2GProdSIjHpf60WdiEmKAulMdJRI+xyUbuxnY31mwiikS9HYwqmPBbMTf0Mh2pMBngZRFs+gaFZDUMTfLfp+8MQfU1R54yb6hPuVt5I=
+    secure: BkrSPAkOM5aTOpeyO9vZnHdZ0LF1PLk0r2HtcXN2eTMyiHoGXkl6VUjdAL8EkzI4gunW2GProdSIjHpf60WdiEmKAulMdJRI+xyUbuxnY31mwiikS9HYwqmPBbMTf0Mh2pMBngZRFs+gaFZDUMTfLfp+8MQfU1R54yb6hPuVt5I=
\ No newline at end of file
diff --git a/cpanfile b/cpanfile
index ffd9edb0c..6939b8444 100644
--- a/cpanfile
+++ b/cpanfile
@@ -14,7 +14,7 @@ requires 'File::Slurp';
 requires 'Log::Log4perl';
 requires 'XML::Simple';
 requires 'Time::Duration';
-requires 'Tie::LevelDB';
 requires 'IO::Zlib';
 requires 'File::Temp';
 requires 'Fcntl';
+requires 'KyotoCabinet';
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/AlphaFold/CreateAlphaDB.pm b/modules/Bio/EnsEMBL/Production/Pipeline/AlphaFold/CreateAlphaDB.pm
index b7ec2f278..905ed4fca 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/AlphaFold/CreateAlphaDB.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/AlphaFold/CreateAlphaDB.pm
@@ -33,12 +33,12 @@
 
  This module prepares a DB with a mapping from Uniprot accession to related
  Alphafold data (Alphafold accession, protein start, end). The DB is created on
- disk in LevelDB format.
+ disk in KyotoCabinet format.
 
 =head1 DESCRIPTION
 
  - We expect the file accession_ids.csv to be available
- - We go through the file and build a LevelDB mapping the Uniprot accession to the Alphafold data
+ - We go through the file and build a DB mapping the Uniprot accession to the Alphafold data
 
 =cut
 
@@ -49,7 +49,7 @@ use strict;
 
 use parent 'Bio::EnsEMBL::Production::Pipeline::Common::Base';
 use Bio::EnsEMBL::Utils::Exception qw(throw info);
-use Tie::LevelDB;
+use KyotoCabinet;
 use File::Temp 'tempdir';
 
 
@@ -66,7 +66,7 @@ sub run {
 
     throw ("Data file not found: '$map_file' on host " . `hostname`) unless -f $map_file;
 
-    my $idx_dir = $self->param_required('alphafold_db_dir') . '/uniprot-to-alpha.leveldb';
+    my $idx_dir = $self->param_required('alphafold_db_dir') . '/uniprot-to-alphafold';
     if (-d $idx_dir) {
         system(qw(rm -rf), $idx_dir);
     }
@@ -78,33 +78,42 @@ sub run {
         $copy_to = $idx_dir;
         $idx_dir = tempdir(DIR => '/dev/shm/');
     }
-
-    tie(my %idx, 'Tie::LevelDB', $idx_dir)
-        or die "Error trying to tie Tie::LevelDB $idx_dir: $!";
+ 
+    my $db = new KyotoCabinet::DB;
+
+    # Set 4 GB mmap size
+    my $mapsize_gb = 4 << 30;
+
+    # Open the DB
+    # Open as the exclusive writer, truncate if it exists, otherwise create the DB
+    # Open the database as a file hash DB, 600M buckets, 4GB mmap, linear option for
+    # hash collision handling. These are tuned for write speed and for approx. 300M entries.
+    # As with a regular Perl hash, a duplicate entry will overwrite the previous
+    # value.
+    $db->open("$idx_dir/uniprot-to-alphafold.kch#bnum=600000000#msiz=$mapsize_gb#opts=l",
+        $db->OWRITER | $db->OCREATE | $db->OTRUNCATE
+    ) or die "Error opening DB: " . $db->error();
 
     my $map;
     open($map, '<', $map_file) or die "Opening map file $map_file failed: $!";
 
-    # A line from accession_ids.csv looks like this:
-    # Uniprot accession, hit start, hit end, Alphafold accession, Alphafold version
-    # A0A2I1PIX0,1,200,AF-A0A2I1PIX0-F1,4
-    # Currently, all entries in this file have a unique uniprot accession and
-    # have a hit starting at 1
-
     while (my $line = <$map>) {
+        chomp $line;
+        # A line from accession_ids.csv looks like this:
+        # Uniprot accession, hit start, hit end, Alphafold accession, Alphafold version
+        # A0A2I1PIX0,1,200,AF-A0A2I1PIX0-F1,4
+        # Currently, all entries in this file have a unique uniprot accession and
+        # have a hit starting at 1
         unless ($line =~ /^\w+,\d+,\d+,[\w_-]+,\d+$/) {
-            chomp $line;
-            warn "Data error. Line is not what we expect: '$line'";
-            next;
+            die "Data error. Line is not what we expect: '$line'";
         }
         my @x = split(",", $line, 2);
 
-        # This is the DB write operation. Tie::LevelDB will croak on errors (e.g. disk full)
-        $idx{$x[0]} = $x[1];
+        # This is the DB write operation.
+        $db->set($x[0], $x[1]) or die "Error inserting data: " . $db->error();
     }
 
-    close($map);
-    untie %idx;
+    $db->close() or die "Error closing DB: " . $db->error();
 
     if ($copy_back) {
         system (qw(cp -r), $idx_dir, $copy_to);
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/AlphaFold/CreateUniparcDB.pm b/modules/Bio/EnsEMBL/Production/Pipeline/AlphaFold/CreateUniparcDB.pm
index 58c01117a..da6064f2e 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/AlphaFold/CreateUniparcDB.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/AlphaFold/CreateUniparcDB.pm
@@ -32,12 +32,12 @@
 =head1 SYNOPSIS
 
  This module prepares a DB with a mapping from Uniparc accession to Uniprot
- accession. The DB is created on disk in LevelDB format.
+ accession. The DB is created on disk in KyotoCabinet format.
 
 =head1 DESCRIPTION
 
  - We expect the file idmapping_selected.tab.gz to be available
- - We go through the file and build a LevelDB mapping the Uniparc accessions to Uniprot accessions
+ - We go through the file and build a DB mapping the Uniparc accessions to Uniprot accessions
 
 =cut
 
@@ -49,7 +49,7 @@ use strict;
 use parent 'Bio::EnsEMBL::Production::Pipeline::Common::Base';
 
 use Bio::EnsEMBL::Utils::Exception qw(throw info);
-use Tie::LevelDB;
+use KyotoCabinet;
 use IO::Zlib;
 use File::Temp 'tempdir';
 
@@ -66,7 +66,7 @@ sub run {
 
     throw ("Data file not found: '$map_file' on host " . `hostname`) unless -f $map_file;
 
-    my $idx_dir = $self->param_required('uniparc_db_dir') . '/uniparc-to-uniprot.leveldb';
+    my $idx_dir = $self->param_required('uniparc_db_dir') . '/uniparc-to-uniprot';
     if (-d $idx_dir) {
         system(qw(rm -rf), $idx_dir);
     }
@@ -79,8 +79,21 @@ sub run {
         $idx_dir = tempdir(DIR => '/dev/shm/');
     }
 
-    tie(my %idx, 'Tie::LevelDB', $idx_dir)
-        or die "Error trying to tie Tie::LevelDB $idx_dir: $!";
+    my $db = new KyotoCabinet::DB;
+
+    # Set 4 GB mmap size
+    my $mapsize_gb = 4 << 30;
+
+    # Open the DB
+    # Open as the exclusive writer, truncate if it exists, otherwise create the DB
+    # Open the database as a file hash DB, 600M buckets, 4GB mmap, linear option for
+    # hash collision handling. These are tuned for write speed and for approx. 300M entries.
+    # Uniparc has 251M entries at the moment.
+    # As with a regular Perl hash, a duplicate entry will overwrite the previous
+    # value.
+    $db->open("$idx_dir/uniparc-to-uniprot.kch#bnum=600000000#msiz=$mapsize_gb#opts=l",
+        $db->OWRITER | $db->OCREATE | $db->OTRUNCATE
+    ) or die "Error opening DB: " . $db->error();
 
     my $map = new IO::Zlib;
     $map->open($map_file, 'rb') or die "Opening map file $map_file with IO::Zlib failed: $!";
@@ -90,22 +103,27 @@ sub run {
     # We pick out the Uniparc accession and Uniprot accession
     # index[10] (Uniparc): UPI00003B0FD4; index[0] (Uniprot): Q6GZX4
     my $line;
+
     while ($line = <$map>) {
+        chomp $line;
         unless ($line =~ /^\w+\t[[:print:]\t]+$/) {
-                warn "Data error: Line is not what we expect: '$line'";
-                next;
+            die "Data error: Uniparc accession is not what we expect: '$line'";
         }
         my @x = split("\t", $line, 12);
         unless ($x[10] and $x[10] =~ /^UPI\w+$/) {
-            warn "Data error: Uniparc accession is not what we expect: '$line'";
-            next;
+            die "Data error: Uniparc accession is not what we expect: '$line'";
+        }
+        # This is the DB write operation.
+        my $oldval;
+        if ($oldval = $db->get($x[10])) {
+            $db->set($x[10], "$oldval\t" . $x[0]) or die "Error inserting data: " . $db->error();
+        } else {
+            $db->set($x[10], $x[0]) or die "Error inserting data: " . $db->error();
         }
-        # This is the DB write operation. Tie::LevelDB will croak on errors (e.g. disk full)
-        $idx{$x[10]} = $x[0];
     }
 
     $map->close;
-    untie %idx;
+    $db->close() or die "Error closing DB: " . $db->error();
 
     if ($copy_back) {
         system (qw(cp -r), $idx_dir, $copy_to);
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/AlphaFold/InsertProteinFeatures.pm b/modules/Bio/EnsEMBL/Production/Pipeline/AlphaFold/InsertProteinFeatures.pm
index 059ac4a3e..0178fc21e 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/AlphaFold/InsertProteinFeatures.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/AlphaFold/InsertProteinFeatures.pm
@@ -164,7 +164,7 @@ sub run {
             -db            => 'alphafold',
             -db_version    => $alpha_version,
             -db_file       => $self->param('db_dir') . '/accession_ids.csv',
-            -display_label => 'AlphaFold DB import',
+            -display_label => 'AFDB-ENSP mapping',
             -displayable   => '1',
             -description   => 'Protein features based on AlphaFold predictions, mapped with GIFTS or UniParc'
     );
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/FileDump/Geneset_GFF3.pm b/modules/Bio/EnsEMBL/Production/Pipeline/FileDump/Geneset_GFF3.pm
index 49258e302..dff0d0adc 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/FileDump/Geneset_GFF3.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/FileDump/Geneset_GFF3.pm
@@ -266,7 +266,8 @@ sub Bio::EnsEMBL::Transcript::summary_as_hash {
   $summary{'transcript_support_level'} = $self->tsl if $self->tsl;
 
   my @tags;
-  push(@tags, 'basic') if $self->gencode_basic();
+  push(@tags, 'gencode_basic') if $self->gencode_basic();
+  push(@tags, 'gencode_primary') if $self->gencode_primary();
   push(@tags, 'Ensembl_canonical') if $self->is_canonical();
   
   # A transcript can have different types of MANE-related attributes (MANE_Select, MANE_Plus_Clinical)
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/GFF3/DumpFile.pm b/modules/Bio/EnsEMBL/Production/Pipeline/GFF3/DumpFile.pm
index 6d88ab72a..9efa955a6 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/GFF3/DumpFile.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/GFF3/DumpFile.pm
@@ -279,7 +279,8 @@ sub Bio::EnsEMBL::Transcript::summary_as_hash {
   $summary{'transcript_support_level'} = $self->tsl if $self->tsl;
 
   my @tags;
-  push(@tags, 'basic') if $self->gencode_basic();
+  push(@tags, 'gencode_basic') if $self->gencode_basic();
+  push(@tags, 'gencode_primary') if $self->gencode_primary();
   push(@tags, 'Ensembl_canonical') if $self->is_canonical();
 
   # A transcript can have different types of MANE-related attributes (MANE_Select, MANE_Plus_Clinical)
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/GTF/DumpFile.pm b/modules/Bio/EnsEMBL/Production/Pipeline/GTF/DumpFile.pm
index 79358a04d..d95763721 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/GTF/DumpFile.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/GTF/DumpFile.pm
@@ -383,7 +383,8 @@ feature for the position of this on the genome
 - cds_start_NF: the coding region start could not be confirmed
 - mRNA_end_NF: the mRNA end could not be confirmed
 - mRNA_start_NF: the mRNA start could not be confirmed.
-- basic: the transcript is part of the gencode basic geneset
+- gencode_basic: the transcript is part of the gencode basic geneset
+- gencode_primary: the transcript is part of the gencode primary geneset
 
 Comments
 
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/Ga4ghChecksum/ChecksumGenerator.pm b/modules/Bio/EnsEMBL/Production/Pipeline/Ga4ghChecksum/ChecksumGenerator.pm
index 28aa1ba06..d5d8e3a8a 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/Ga4ghChecksum/ChecksumGenerator.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/Ga4ghChecksum/ChecksumGenerator.pm
@@ -218,7 +218,7 @@ sub all_hashes {
     } ## end foreach my $slice (@slices)
 
     for my $seq_type (keys %$batch) {
-        for my $attrib_table (keys $batch->{$seq_type}) {
+        for my $attrib_table (keys %{$batch->{$seq_type}}) {
             $attribute_adaptor->store_batch_on_Object($attrib_table, $batch->{$seq_type}->{$attrib_table}, 1000);
         }
     }
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/JSON/JsonRemodeller.pm b/modules/Bio/EnsEMBL/Production/Pipeline/JSON/JsonRemodeller.pm
index d8e8328da..a6738edc0 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/JSON/JsonRemodeller.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/JSON/JsonRemodeller.pm
@@ -292,7 +292,10 @@ sub merge_xrefs {
         $obj->{$dbname} = [];
       }
       for my $ann ( @{ $subobj->{$dbname} } ) {
-        push $obj->{$dbname}, $self->copy_hash($ann);
+        if (ref($obj->{$dbname}) ne 'ARRAY') {
+          $obj->{$dbname} = [];
+        }
+        push @{ $obj->{$dbname} }, $self->copy_hash($ann);
       }
     }
   }
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/LoadFamily/AddFamilyMembers.pm b/modules/Bio/EnsEMBL/Production/Pipeline/LoadFamily/AddFamilyMembers.pm
index 37da4aabe..043c98ebc 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/LoadFamily/AddFamilyMembers.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/LoadFamily/AddFamilyMembers.pm
@@ -78,11 +78,13 @@ sub run {
   # create a hash first though, which can then be processed
   # gene_id as key, then sets of protein-family pairs
   my $gene_families = {};
+  # retrieve family data for canonical translations
   my $sql = qq/select t.gene_id, t.transcript_id, pf.hit_name
 		from coord_system c
 		join seq_region s using (coord_system_id)
-		join transcript t using (seq_region_id)
-		join translation tl using (transcript_id)
+        join gene g using(seq_region_id)
+        join transcript t on t.transcript_id = g.canonical_transcript_id
+        join translation tl on tl.translation_id = t.canonical_translation_id
 		join protein_feature pf using (translation_id)
 		join analysis pfa ON (pf.analysis_id=pfa.analysis_id)
 		where pfa.logic_name in ($logic_names)
@@ -104,37 +106,52 @@ sub run {
   my $family_members     = {};
   while ( my ( $gene_id, $hits ) = each %$gene_families ) {
     my $gene = $gene_adaptor->fetch_by_dbID($gene_id);
-    # create and store gene member
-    my $gene_member =
-      Bio::EnsEMBL::Compara::GeneMember->new_from_Gene(
-        -GENE          => $gene,
-        -GENOME_DB     => $genome_db,
-        -BIOTYPE_GROUP => $gene->get_Biotype->biotype_group()
-      );
-    # If there are duplicate stable IDs, trap fatal error from compara
-    # method, so we can skip it and carry on with others.
-    eval {
-      $gene_member_dba->store($gene_member);
-    };
-    if ($@) {
-      my ($msg) = $@ =~ /MSG:\s+([^\n]+)/m;
-      $self->warning('Duplicate stable ID: '.$msg);
+
+    # retrieve or create-and-store gene_member from the current genome_db
+    my $gene_member = $gene_member_dba->fetch_by_stable_id_GenomeDB($gene->stable_id,
+                                                                    $genome_db);
+    my $existing_canonical;
+    if (defined $gene_member) {
+      $existing_canonical = $seq_member_dba->fetch_by_dbID( $gene_member->canonical_member_id );
     } else {
-      for my $hit (@$hits) {
-        my $transcript =
-          $transcript_adaptor->fetch_by_dbID( $hit->[0] );
-          my $seq_member =
-            Bio::EnsEMBL::Compara::SeqMember->new_from_Transcript(
-              -TRANSCRIPT => $transcript,
-              -TRANSLATE  => 'yes',
-              -GENOME_DB  => $genome_db
-            );
+      $gene_member =
+        Bio::EnsEMBL::Compara::GeneMember->new_from_Gene(
+          -GENE          => $gene,
+          -GENOME_DB     => $genome_db,
+          -BIOTYPE_GROUP => $gene->get_Biotype->biotype_group()
+        );
+      $gene_member_dba->store($gene_member);
+    }
+
+    for my $hit (@$hits) {
+      my $transcript =
+        $transcript_adaptor->fetch_by_dbID( $hit->[0] );
+      my $translation_stable_id = $transcript->translation->stable_id;
+
+      if (defined $existing_canonical && $translation_stable_id ne $existing_canonical->stable_id) {
+        $self->warning(sprintf('skipping translation %s because stable ID does not match canonical member %s',
+                               $translation_stable_id, $existing_canonical->stable_id));
+        next;
+      }
+
+      # retrieve or create-and-store seq_member from the current genome_db
+      my $seq_member =
+        $seq_member_dba->fetch_by_stable_id_GenomeDB($translation_stable_id,
+                                                     $genome_db);
+      if (!defined $seq_member) {
+        $seq_member =
+          Bio::EnsEMBL::Compara::SeqMember->new_from_Transcript(
+            -TRANSCRIPT => $transcript,
+            -TRANSLATE  => 'yes',
+            -GENOME_DB  => $genome_db
+          );
         # TODO store CDS too?
         $seq_member->gene_member_id( $gene_member->dbID );
         $seq_member_dba->store($seq_member);
         $seq_member_dba->_set_member_as_canonical($seq_member);
-        push @{ $family_members->{ $hit->[1] } }, $seq_member->dbID();
       }
+
+      push @{ $family_members->{ $hit->[1] } }, $seq_member->dbID();
     }
   } ## end while ( my ( $gene_id, $hits...))
   print "Saving familes for ".$dba->species()."\n";
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/LoadFamily/CreateFamilies.pm b/modules/Bio/EnsEMBL/Production/Pipeline/LoadFamily/CreateFamilies.pm
index e5cd4784c..f505d87fd 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/LoadFamily/CreateFamilies.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/LoadFamily/CreateFamilies.pm
@@ -66,7 +66,9 @@ sub run {
   my $compara_dba = $compara_dbas[0];
   $self->param('compara_url', $compara_dba->url);
   my $schema_version = $compara_dba->get_MetaContainer->get_schema_version();
-  $compara_dba->dbc()->sql_helper()->execute_update(-SQL=>'delete family.*,family_member.* from family left join family_member using (family_id)');
+  my $compara_sql_helper = $compara_dba->dbc()->sql_helper();
+  $compara_sql_helper->execute_update(-SQL=>'delete family.*,family_member.* from family left join family_member using (family_id)');
+
   # get compara
   my $genome_dba = $compara_dba->get_GenomeDBAdaptor();
 
@@ -143,25 +145,36 @@ sub run {
   print "Found ".scalar(keys(%{$families}))." familes\n";
 
   # create and store MLSS
-  my $sso = Bio::EnsEMBL::Compara::SpeciesSet->new(
-    -GENOME_DBS => $genome_dbs,
-    -NAME => "collection-all_division",
-  );
-  $sso->first_release($schema_version);
-  $compara_dba->get_SpeciesSetAdaptor()->store($sso);
-
-  my $mlss =
-    Bio::EnsEMBL::Compara::MethodLinkSpeciesSet->new(
-      -method =>
-        Bio::EnsEMBL::Compara::Method->new(
-          -type  => 'FAMILY',
-          -class => 'Family.family',
-          -display_name => 'families'
-        ),
-      -species_set => $sso );
-  $mlss->first_release($schema_version);
-
-  $compara_dba->get_MethodLinkSpeciesSetAdaptor()->store($mlss);
+  my $sql = q/
+      insert ignore into method_link (method_link_id, type, class, display_name)
+      values (301, 'FAMILY', 'Family.family', 'families')/;
+  $compara_sql_helper->execute_update( -SQL => $sql );
+  my $method_dba = $compara_dba->get_MethodAdaptor();
+  my $method = $method_dba->fetch_by_type('FAMILY');
+
+  my $species_set_dba = $compara_dba->get_SpeciesSetAdaptor();
+  my $sso = $species_set_dba->fetch_by_GenomeDBs($genome_dbs);
+  if (!defined $sso) {
+    $sso = Bio::EnsEMBL::Compara::SpeciesSet->new(
+      -GENOME_DBS => $genome_dbs,
+      -NAME => "collection-all_division",
+    );
+    $sso->first_release($schema_version);
+    $species_set_dba->store($sso);
+  }
+
+  my $mlss_dba = $compara_dba->get_MethodLinkSpeciesSetAdaptor();
+  my $mlss = $mlss_dba->fetch_by_method_link_type_GenomeDBs($method->type, $genome_dbs);
+  if (!defined $mlss) {
+    $mlss =
+      Bio::EnsEMBL::Compara::MethodLinkSpeciesSet->new(
+        -method => $method,
+        -name => 'all_division families',
+        -species_set => $sso );
+    $mlss->first_release($schema_version);
+    $mlss_dba->store($mlss);
+  }
+
   my $family_dba = $compara_dba->get_FamilyAdaptor();
   while ( my ( $id, $name ) = each %$families ) {
     print "Storing family $id $name\n";
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/Ortholog/SourceFactory.pm b/modules/Bio/EnsEMBL/Production/Pipeline/Ortholog/SourceFactory.pm
index 4599d8636..37cd8b09e 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/Ortholog/SourceFactory.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/Ortholog/SourceFactory.pm
@@ -59,7 +59,7 @@ sub write_output {
     my $compara_param = $self->param('compara');
     my $cleanup_dir = $self->param('cleanup_dir');
 
-    foreach my $pair (keys $sp_config) {
+    foreach my $pair (keys %{$sp_config}) {
         my $compara = $sp_config->{$pair}->{'compara'};
         if (defined $compara_param && $compara ne $compara_param) {
             print STDERR "Skipping $compara\n";
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/AlphaDBImport_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/AlphaDBImport_conf.pm
index b05614659..f16b1dfbe 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/AlphaDBImport_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/AlphaDBImport_conf.pm
@@ -45,7 +45,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.6;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 
 =head2 default_options
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/BasePython_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/BasePython_conf.pm
index 2e51ffe33..54906c466 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/BasePython_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/BasePython_conf.pm
@@ -25,7 +25,7 @@ use strict;
 use warnings;
 use Data::Dumper;
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 use base ('Bio::EnsEMBL::Hive::PipeConfig::EnsemblGeneric_conf');
 
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/Base_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/Base_conf.pm
index 157809e9d..4f0c0a558 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/Base_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/Base_conf.pm
@@ -24,7 +24,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Hive::PipeConfig::EnsemblGeneric_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 use File::Spec::Functions qw(catdir);
 
@@ -61,7 +61,7 @@ sub default_options {
         'species' => [],
         'antispecies' => [],
         'batch_size' => 50,
-        'meta_filters' => {},   
+        'meta_filters' => {},
         'update_dataset_status' => 'Processing', #updates dataset status in new metadata db
         #param to connect to old pipeline analysis name
         'genome_factory_dynamic_output_flow' => {
@@ -83,7 +83,7 @@ sub factory_analyses {
       -input_ids  => [{}],
       -flow_into  => {
         '1'    => ['GenomeFactory'],
-        
+
       },
       -rc_name    => 'default',
     },
@@ -98,12 +98,12 @@ sub factory_analyses {
                       'dataset_type' => $self->o('dataset_type'),
                       'dataset_status' => $self->o('dataset_status'),
                       'division' => $self->o('division'),
-                      'organism_group_type' => $self->o('organism_group_type'),                        
+                      'organism_group_type' => $self->o('organism_group_type'),
                       'species' => $self->o('species'),
-                      'antispecies' => $self->o('antispecies'),                        
+                      'antispecies' => $self->o('antispecies'),
                       'batch_size' => $self->o('batch_size'),
-                      'update_dataset_status' => $self->o('update_dataset_status'),       
-                    }, 
+                      'update_dataset_status' => $self->o('update_dataset_status'),
+                    },
       -flow_into  => $self->o('genome_factory_dynamic_output_flow'),
 
     },
@@ -111,7 +111,7 @@ sub factory_analyses {
       -logic_name      => 'UpdateDatasetStatus',
       -module          => 'ensembl.production.hive.HiveDatasetFactory',
       -language        => 'python3',
-      -rc_name         => 'default', 
+      -rc_name         => 'default',
       -parameters      => {
                             'metadata_db_uri'    => $self->o('metadata_db_uri'),
                             'update_dataset_status' => $self->o('update_dataset_status'),
@@ -143,6 +143,8 @@ sub resource_classes {
     );
 
     my %memory = (
+       '100M'            => '100',
+        '200M'           => '200',
         '500M'           => '500',
         '1GB'            => '1000',
         '2GB'            => '2000',
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/BulkSQL_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/BulkSQL_conf.pm
index 3c2bab042..b0503c433 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/BulkSQL_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/BulkSQL_conf.pm
@@ -38,7 +38,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 sub default_options {
   my ($self) = @_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/ChecksumGenerator_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/ChecksumGenerator_conf.pm
index c7198cc3d..781f60416 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/ChecksumGenerator_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/ChecksumGenerator_conf.pm
@@ -25,7 +25,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 use File::Spec;
 
 sub default_options {
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/ChecksumLoader_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/ChecksumLoader_conf.pm
index a7e110a93..977ae0568 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/ChecksumLoader_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/ChecksumLoader_conf.pm
@@ -26,7 +26,7 @@ use warnings;
 use Data::Dumper;
 
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
 
 
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/CoreStatistics_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/CoreStatistics_conf.pm
index 236ad1365..1cf6a365a 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/CoreStatistics_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/CoreStatistics_conf.pm
@@ -38,7 +38,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::ApiVersion qw/software_version/;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
 
 sub default_options {
@@ -130,7 +130,7 @@ sub pipeline_analyses {
                             '3->A' => ['CheckStatistics_Chromosome'],
                             'A->1' => ['SpeciesFactory_All'],
                           },
-      -rc_name         => '2GB',
+      -rc_name         => '2GB_D',
     },
 
 
@@ -211,7 +211,7 @@ sub pipeline_analyses {
         },
         -max_retry_count => 1,
         -hive_capacity   => 50,
-        -rc_name         => 'default_W'
+        -rc_name         => '2GB_W'
     },
 
     {
@@ -262,7 +262,7 @@ sub pipeline_analyses {
       -max_retry_count => 1,
       -hive_capacity   => 50,
       -flow_into       => ['GeneGC_Datacheck'],
-      -rc_name         => 'default_D',
+      -rc_name         => '2GB_D',
     },
 
     {
@@ -327,7 +327,7 @@ sub pipeline_analyses {
       -max_retry_count => 1,
       -hive_capacity   => 50,
       -batch_size      => 10,
-      -rc_name         => 'default',
+      -rc_name         => '1GB',
     },
 #
     {
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DataChecksNonCore_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DataChecksNonCore_conf.pm
index 71d3b7655..de8aaffd0 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DataChecksNonCore_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DataChecksNonCore_conf.pm
@@ -31,7 +31,7 @@ use warnings;
 use base ('Bio::EnsEMBL::DataCheck::Pipeline::DbDataChecks_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 sub default_options {
   my ($self) = @_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DbCopy_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DbCopy_conf.pm
index 11f63e43c..627e0ab05 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DbCopy_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DbCopy_conf.pm
@@ -25,7 +25,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 use File::Spec::Functions qw(catdir);
 
 sub default_options {
@@ -39,6 +39,7 @@ sub default_options {
     # Database type factory
     groups => 1,
     group  => [],
+    delete_group => [], 
 
     # Named database factory
     dbname => [],
@@ -58,7 +59,6 @@ sub default_options {
     # Drop databases from target, by default the same set that will be copied
     delete_db          => 0,
     delete_release     => $self->o('ensembl_release'),
-    delete_group       => $self->o('group'),
     delete_dbname      => $self->o('dbname'),
     delete_marts       => $self->o('marts'),
     delete_compara     => $self->o('compara'),
@@ -200,7 +200,7 @@ sub pipeline_analyses {
       -max_retry_count => 1,
       -parameters      => {
                             ensembl_release => $self->o('delete_release'),
-                            group           => $self->o('delete_group'),
+                            group           => (ref($self->o('delete_group')) eq 'ARRAY' && @{$self->o('delete_group')}) ? $self->o('delete_group') : $self->o('group'),
                             groups          => $self->o('groups'),
                           },
       -flow_into       => {
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpCore_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpCore_conf.pm
index 89f38d607..02bbb9cc9 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpCore_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpCore_conf.pm
@@ -25,7 +25,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 use File::Spec;
 
 sub default_options {
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpOrtholog_conf_strains.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpOrtholog_conf_strains.pm
index 41a9ca74d..b0bd2e3a9 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpOrtholog_conf_strains.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpOrtholog_conf_strains.pm
@@ -24,7 +24,7 @@ use warnings;
 
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::DumpOrtholog_conf');
 
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 sub default_options {
 	my ($self) = @_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpOrtholog_eg_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpOrtholog_eg_conf.pm
index 5cbdea73f..fd730fc3e 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpOrtholog_eg_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpOrtholog_eg_conf.pm
@@ -24,7 +24,7 @@ use warnings;
 
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::DumpOrtholog_conf');
 
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 sub default_options {
 	my ($self) = @_;
@@ -92,17 +92,17 @@ sub default_options {
                   compara     => 'metazoa',
                   source      => 'drosophila_melanogaster',
                   species     => [
-                    'drosophila_ananassae',
-                    'drosophila_erecta',
-                    'drosophila_grimshawi',
-                    'drosophila_mojavensis',
-                    'drosophila_persimilis',
-                    'drosophila_pseudoobscura',
-                    'drosophila_sechellia',
-                    'drosophila_simulans',
-                    'drosophila_virilis',
-                    'drosophila_willistoni',
-                    'drosophila_yakuba'
+                    'drosophila_ananassae_gca017639315v2rs',
+                    'drosophila_erecta_gca003286155v2rs',
+                    'drosophila_grimshawi_gca018153295v1rs',
+                    'drosophila_mojavensis_gca018153725v1rs',
+                    'drosophila_persimilis_gca003286085v2rs',
+                    'drosophila_pseudoobscura_gca009870125v2rs',
+                    'drosophila_sechellia_gca004382195v2rs',
+                    'drosophila_simulans_gca016746395v2rs',
+                    'drosophila_virilis_gca003285735v2rs',
+                    'drosophila_willistoni_gca018902025v2rs',
+                    'drosophila_yakuba_gca016746365v2rs'
                   ],
                   antispecies => 'drosophila_melanogaster',
                   division    => 'EnsemblMetazoa',
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpOrtholog_ensembl_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpOrtholog_ensembl_conf.pm
index a0e6ab38b..46f9513cf 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpOrtholog_ensembl_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpOrtholog_ensembl_conf.pm
@@ -24,7 +24,7 @@ use warnings;
 
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::DumpOrtholog_conf');
 
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 sub default_options {
 	my ($self) = @_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpSpeciesForGOA_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpSpeciesForGOA_conf.pm
index cd3b8e9a1..171494407 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpSpeciesForGOA_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/DumpSpeciesForGOA_conf.pm
@@ -34,7 +34,7 @@ use warnings;
 
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 use File::Spec::Functions qw(catdir);
 
 sub default_options {
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/EarlyDumps_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/EarlyDumps_conf.pm
index 0637f1043..9d809f563 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/EarlyDumps_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/EarlyDumps_conf.pm
@@ -26,7 +26,7 @@ use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 use Bio::EnsEMBL::Production::Pipeline::PipeConfig::FileDumpMySQL_conf;
 use Bio::EnsEMBL::Production::Pipeline::PipeConfig::DumpCore_conf;
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 use File::Spec::Functions qw(catdir);
 
 
@@ -56,7 +56,7 @@ sub default_options {
     metadata_base_dir => catdir($self->o('ENV', 'NOBACKUP_DIR'), $self->o('username'), 'genome_reports_'.$self->o('ensembl_version')),
     metadata_script   => catdir($self->o('base_dir'), '/ensembl-metadata/misc_scripts/report_genomes.pl'),
     division_pattern_nonvert  => '.fungi,.metazoa,.plants,.protists',
-    early_dump_base_path      => catdir($self->o('ENV', 'NOBACKUP_DIR'), '/release_dumps/'),
+    early_dump_base_path      => catdir($self->o('ENV', 'NOBACKUP_DIR'), '/release_dumps/', '/release-'.$self->o('ensembl_version').'/ftp_dumps/'),
     nfs_early_dump_path       => '/nfs/production/flicek/ensembl/production/ensemblftp/',
     early_dumps_private_ftp   => catdir('/nfs/ftp/private/ensembl/pre-releases','/release-'.$self->o('ensembl_version').'_'.$self->o('eg_version')),
     #flags to restrict division
@@ -131,37 +131,14 @@ sub pipeline_analyses {
       -module            => 'Bio::EnsEMBL::Hive::RunnableDB::SystemCmd',
       -max_retry_count   => 1,
       -parameters        => {
-                              early_dump_path_vert             => catdir($self->o('early_dump_base_path'), '/release-'.$self->o('ensembl_version')),
-			      nfs_early_dump_path_vert         => catdir($self->o('nfs_early_dump_path'), '/release-'.$self->o('ensembl_version')), 
-			      early_dump_path_nonvert          => catdir($self->o('early_dump_base_path'), '/release-'.$self->o('eg_version')),	
-			      nfs_early_dump_path_nonvert      => catdir($self->o('nfs_early_dump_path'), '/release-'.$self->o('eg_version')), 
+			                        nfs_early_dump_path_vert         => catdir($self->o('nfs_early_dump_path'), '/release-'.$self->o('ensembl_version')), 
+			                        nfs_early_dump_path_nonvert      => catdir($self->o('nfs_early_dump_path'), '/release-'.$self->o('eg_version')), 
                               cmd              => q{ 
-			      				rsync -avW #early_dump_path_vert# #nfs_early_dump_path_vert# 	
-							rsync -avW #early_dump_path_nonvert# #nfs_early_dump_path_nonvert#
-			      				
+			      	                                        rsync -avW  --include={'vertebrates'}  --exclude={'plants','protists', 'fungi', 'bacteria', 'metazoa'} #early_dump_base_path# #nfs_early_dump_path_vert# 	
+							                                        rsync -avW  --include={'plants','protists', 'fungi', 'bacteria', 'metazoa'} --exclude={'vertebrates'}  #early_dump_base_path# #nfs_early_dump_path_nonvert#
                                                    },
                             },
-      -flow_into         => { '1' => 'CopyToPublicFtp' },			    
-     			    
-
-    },
-    {
-      -logic_name        => 'CopyToPublicFtp',
-      -module            => 'Bio::EnsEMBL::Hive::RunnableDB::SystemCmd',
-      -max_retry_count   => 1,
-      -parameters        => {
-                              nfs_early_dump_path_vert         => catdir($self->o('nfs_early_dump_path'), '/release-'.$self->o('ensembl_version')),     
-                              nfs_early_dump_path_nonvert      => catdir($self->o('nfs_early_dump_path'), '/release-'.$self->o('eg_version')),
-		              early_dumps_private_ftp          => $self->o('early_dumps_private_ftp'),	      
-                              cmd              => q{ 
-                                                        rsync -avW  #nfs_early_dump_path_vert#/verterates/ #early_dumps_private_ftp#   
-                                                        rsync -avW  #nfs_early_dump_path_nonvert#/ #early_dumps_private_ftp# 
-                                                        
-                                                   },
-                            },
-
-     -flow_into         => { '1' => 'Email' }, 			    
-      
+      -flow_into         => { '1' => 'Email' },			    
     },
     {
       -logic_name        => 'MetaDataReport',
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/EnsemblSearchDumps_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/EnsemblSearchDumps_conf.pm
index 8910c2531..7bd38728e 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/EnsemblSearchDumps_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/EnsemblSearchDumps_conf.pm
@@ -27,7 +27,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 sub default_options {
   my ($self) = @_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/EnsemblThoasDumps_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/EnsemblThoasDumps_conf.pm
index e5153532e..61361da4c 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/EnsemblThoasDumps_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/EnsemblThoasDumps_conf.pm
@@ -27,7 +27,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 sub default_options {
   my ($self) = @_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/FactoryTest_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/FactoryTest_conf.pm
index 5c8590d06..af53f5fd7 100755
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/FactoryTest_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/FactoryTest_conf.pm
@@ -26,7 +26,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 sub default_options {
   my ($self) = @_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/FileDumpMySQL_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/FileDumpMySQL_conf.pm
index a6f5f3229..aff1a5ba6 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/FileDumpMySQL_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/FileDumpMySQL_conf.pm
@@ -25,7 +25,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 use File::Spec::Functions qw(catdir);
 
 sub default_options {
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/GPAD_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/GPAD_conf.pm
index d5346d9f7..0f594487d 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/GPAD_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/GPAD_conf.pm
@@ -24,7 +24,7 @@ use warnings;
 
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 use File::Spec::Functions qw(catdir);
 
 sub default_options {
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/GeneAutoComplete_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/GeneAutoComplete_conf.pm
index 59eeae96e..b31a3f0eb 100755
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/GeneAutoComplete_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/GeneAutoComplete_conf.pm
@@ -37,7 +37,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 sub default_options {
   my ($self) = @_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/GeneTreeHighlighting_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/GeneTreeHighlighting_conf.pm
index 075faf886..24d418067 100755
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/GeneTreeHighlighting_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/GeneTreeHighlighting_conf.pm
@@ -32,7 +32,7 @@ use warnings;
 
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
 
 sub default_options {
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/GrantMySQL_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/GrantMySQL_conf.pm
index a81a2c03a..0a9d9fe4c 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/GrantMySQL_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/GrantMySQL_conf.pm
@@ -25,7 +25,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 sub default_options {
   my ($self) = @_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/LoadAppris_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/LoadAppris_conf.pm
index 15c3a2486..793ec0d50 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/LoadAppris_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/LoadAppris_conf.pm
@@ -32,7 +32,7 @@ use warnings;
 
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
 
 sub default_options {
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/LoadRefget_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/LoadRefget_conf.pm
index faab42885..c10601583 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/LoadRefget_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/LoadRefget_conf.pm
@@ -20,7 +20,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 use File::Spec;
 
 sub default_options {
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/LoadTSL_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/LoadTSL_conf.pm
index 670594711..184d5a82f 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/LoadTSL_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/LoadTSL_conf.pm
@@ -32,7 +32,7 @@ use warnings;
 
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
 
 sub default_options {
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/MVP_XrefProcess_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/MVP_XrefProcess_conf.pm
index 7c800054b..9826b1478 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/MVP_XrefProcess_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/MVP_XrefProcess_conf.pm
@@ -24,7 +24,7 @@ use warnings;
 
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
 
 sub default_options {
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/OLSLoad_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/OLSLoad_conf.pm
index 0b83cf992..66549db1e 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/OLSLoad_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/OLSLoad_conf.pm
@@ -31,7 +31,7 @@ use warnings FATAL => 'all';
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 sub default_options {
     my ($self) = @_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/ProductionDBSync_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/ProductionDBSync_conf.pm
index 3b58fe283..002f8f271 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/ProductionDBSync_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/ProductionDBSync_conf.pm
@@ -32,7 +32,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 use File::Spec::Functions qw(catdir);
 
 sub default_options {
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/ProteinFeatures_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/ProteinFeatures_conf.pm
index a140af5a7..55b1cea2e 100755
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/ProteinFeatures_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/ProteinFeatures_conf.pm
@@ -25,7 +25,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 use File::Spec::Functions qw(catdir);
 
@@ -75,7 +75,7 @@ sub default_options {
 
         interpro_file                        => 'names.dat',
         interpro2go_file                     => 'interpro2go',
-        uniparc_file                         => 'upidump.lis',
+        uniparc_file                         => 'upidump.lis.gz',
         mapping_file                         => 'idmapping_selected.tab.gz',
 
         # Files are retrieved and stored locally with the same name.
@@ -228,11 +228,35 @@ sub default_options {
                     ipscan_lookup => 0,
                 },
                 {
-                    logic_name => 'seg',
-                    db         => 'Seg',
+                    db               => 'Phobius',
+                    ipscan_lookup    => 1,
+                    ipscan_name      => 'Phobius',
+                    ipscan_xml       => 'PHOBIUS',
+                    logic_name       => 'phobius',
+                    program          => 'InterProScan',
                 },
-            ],
-
+                {
+                    db              => 'SignalP_GRAM_POSITIVE',
+                    ipscan_lookup   => 1,
+                    ipscan_name     => 'SignalP_GRAM_POSITIVE',
+                    ipscan_xml      => 'SIGNALP_GRAM_POSITIVE',
+                    logic_name      => 'signalp_gram_positive',
+                    program         => 'InterProScan',
+                },
+                {
+                    db              => 'SignalP_GRAM_NEGATIVE',
+                    ipscan_lookup   => 1,
+                    ipscan_name     => 'SignalP_GRAM_NEGATIVE',
+                    ipscan_xml      => 'SIGNALP_GRAM_NEGATIVE',
+                    logic_name      => 'signalp_gram_negative',
+                    program         => 'InterProScan',
+                },
+      #seg replaces low complexity regions in protein sequences with X characters(https://rothlab.ucdavis.edu/genhelp/seg.html)
+                 {
+                    logic_name      => 'seg',
+                    db              => 'Seg',
+                },
+    ],
         xref_analyses                        =>
             [
                 {
@@ -336,17 +360,19 @@ sub pipeline_analyses {
             -logic_name      => 'InterProScanVersionCheck',
             -module          => 'Bio::EnsEMBL::Production::Pipeline::ProteinFeatures::InterProScanVersionCheck',
             -max_retry_count => 0,
-#            -input_ids       => [ {} ],
-            -parameters      => {
-                interproscan_path    => $self->o('interproscan_path'),
-                interproscan_version => $self->o('interproscan_version'),
-                local_computation    => $self->o('local_computation'),
-            },
-            -flow_into       => {
-                '3->A' => [ 'FetchFiles' ],
-                'A->3' => [ 'AnnotateProteinFeatures' ],
-            },
-        },
+#      -input_ids       => [ {} ],
+      -parameters      => {
+                            interproscan_path    => $self->o('interproscan_path'),
+                            interproscan_version => $self->o('interproscan_version'),
+                            local_computation    => $self->o('local_computation'),
+                          },
+      -flow_into       => {
+                            '3->A' => ['FetchFiles'],
+                            'A->3' => ['AnnotateProteinFeatures'],
+                          },
+      -rc_name           => '2GB',
+
+    },
 
         {
             -logic_name      => 'FetchFiles',
@@ -421,14 +447,16 @@ sub pipeline_analyses {
             -rc_name         => 'dm',
         },
 
-        {
-            -logic_name      => 'LoadUniParc',
-            -module          => 'Bio::EnsEMBL::Production::Pipeline::ProteinFeatures::LoadUniParc',
-            -max_retry_count => 1,
-            -parameters      => {
-                uniparc_file_local => $self->o('uniparc_file_local'),
-            },
-        },
+    {
+      -logic_name      => 'LoadUniParc',
+      -module          => 'Bio::EnsEMBL::Production::Pipeline::ProteinFeatures::LoadUniParc',
+      -max_retry_count => 1,
+      -parameters      => {
+                            uniparc_file_local => $self->o('uniparc_file_local'),
+                          },
+      -rc_name           => '2GB_W',
+
+    },
 
         {
             -logic_name      => 'LoadUniProt',
@@ -485,20 +513,23 @@ sub pipeline_analyses {
                 ],
                 output_file => catdir('#pipeline_dir#', '#dbname#', 'pre_pipeline_bkp.sql.gz'),
             },
-            -flow_into         => [ 'AnalysisConfiguration' ],
+            -rc_name           => '1GB',
+      -flow_into         => [ 'AnalysisConfiguration' ],
         },
 
-        {
-            -logic_name      => 'AnalysisConfiguration',
-            -module          => 'Bio::EnsEMBL::Production::Pipeline::ProteinFeatures::AnalysisConfiguration',
-            -max_retry_count => 0,
-            -parameters      => {
-                protein_feature_analyses  => $self->o('protein_feature_analyses'),
-                check_interpro_db_version => $self->o('check_interpro_db_version'),
-                run_seg                   => $self->o('run_seg'),
-                xref_analyses             => $self->o('xref_analyses'),
-            },
-            -flow_into       => {
+    {
+      -logic_name        => 'AnalysisConfiguration',
+      -module            => 'Bio::EnsEMBL::Production::Pipeline::ProteinFeatures::AnalysisConfiguration',
+      -max_retry_count   => 0,
+      -parameters        => {
+                              protein_feature_analyses  => $self->o('protein_feature_analyses'),
+                              check_interpro_db_version => $self->o('check_interpro_db_version'),
+                              run_seg                   => $self->o('run_seg'),
+                              xref_analyses             => $self->o('xref_analyses'),
+                            },
+       -rc_name           => '2GB',
+
+      -flow_into       => {
                 '2->A' => [ 'AnalysisSetup' ],
                 'A->3' => [ 'RemoveOrphans' ],
             }
@@ -535,7 +566,8 @@ sub pipeline_analyses {
                         'WHERE ox.object_xref_id IS NULL',
                 ]
             },
-            -flow_into         => [ 'DeleteInterPro' ]
+            -rc_name           => 'default_D',
+      -flow_into         => [ 'DeleteInterPro' ]
         },
 
         {
@@ -573,7 +605,8 @@ sub pipeline_analyses {
             -max_retry_count   => 1,
             -analysis_capacity => 20,
             -parameters        => {},
-            -flow_into         => {
+            -rc_name           => '1GB',
+      -flow_into         => {
                 '2' => [ 'DumpProteome' ],
             }
         },
@@ -588,7 +621,7 @@ sub pipeline_analyses {
                 header_style => 'dbID',
                 overwrite    => 1,
             },
-            -rc_name           => '4GB',
+            -rc_name           => '4GB_W',
             -flow_into         => {
                 '-1' => [ 'DumpProteome_HighMem' ],
                 '1'  => WHEN('#run_seg#' =>
@@ -631,7 +664,8 @@ sub pipeline_analyses {
                 max_files_per_directory => $self->o('max_files_per_directory'),
                 max_dirs_per_directory  => $self->o('max_dirs_per_directory'),
             },
-            -flow_into         => {
+            -rc_name           => '1GB',
+      -flow_into         => {
                 '2' => [ 'RunSeg' ],
             },
         },
@@ -646,7 +680,8 @@ sub pipeline_analyses {
                 {
                     cmd => $self->o('seg_exe') . ' #split_file# ' . $self->o('seg_params') . ' > #split_file#.seg.txt',
                 },
-            -flow_into         => [ 'StoreSegFeatures' ],
+            -rc_name           => '1GB',
+      -flow_into         => [ 'StoreSegFeatures' ],
         },
 
         {
@@ -673,7 +708,8 @@ sub pipeline_analyses {
                 uniparc_logic_name => $self->o('uniparc_logic_name'),
                 uniprot_logic_name => $self->o('uniprot_logic_name'),
             },
-            -flow_into         => {
+            -rc_name           => '1GB_D',
+      -flow_into         => {
                 '3' => [ 'SplitChecksumFile' ],
                 '4' => [ 'SplitNoChecksumFile' ],
             },
@@ -764,7 +800,7 @@ sub pipeline_analyses {
                     interproscan_applications => '#interproscan_nolookup_applications#',
                     run_interproscan          => $self->o('run_interproscan'),
                 },
-            -rc_name         => '4GB_8CPU',
+            -rc_name         => '16GB_8CPU',
             -flow_into       => {
                 '3'  => [ 'StoreProteinFeatures' ],
                 '-1' => [ 'InterProScanNoLookup_HighMem' ],
@@ -801,7 +837,7 @@ sub pipeline_analyses {
                     interproscan_applications => '#interproscan_local_applications#',
                     run_interproscan          => $self->o('run_interproscan'),
                 },
-            -rc_name         => '4GB_8CPU',
+            -rc_name         => '16GB_8CPU',
             -flow_into       => {
                 '3' => [ 'StoreProteinFeatures' ],
                 '0' => [ 'InterProScanLocal_HighMem' ],
@@ -835,7 +871,8 @@ sub pipeline_analyses {
             -parameters        => {
                 analyses => $self->o('protein_feature_analyses')
             },
-            -flow_into         => {
+            -rc_name           => '1GB_D',
+      -flow_into         => {
                 '-1' => [ 'StoreProteinFeatures_HighMem' ],
             },
         },
@@ -861,7 +898,8 @@ sub pipeline_analyses {
                 interpro2go_file => $self->o('interpro2go_file_local'),
                 logic_name       => $self->o('interpro2go_logic_name')
             },
-            -flow_into         => [ 'StoreInterProXrefs' ],
+            -rc_name           => '1GB',
+      -flow_into         => [ 'StoreInterProXrefs' ],
         },
 
         {
@@ -892,7 +930,8 @@ sub pipeline_analyses {
                 history_file     => $self->o('history_file'),
                 failures_fatal   => 1,
             },
-            -flow_into         => WHEN('#email_report#' => [ 'EmailReport' ]),
+            -rc_name           => '2GB_D',
+      -flow_into         => WHEN('#email_report#' => [ 'EmailReport' ]),
         },
 
         {
@@ -906,14 +945,23 @@ sub pipeline_analyses {
             },
         },
 
-        {
-            -logic_name      => 'TidyScratch',
-            -module          => 'Bio::EnsEMBL::Hive::RunnableDB::SystemCmd',
-            -max_retry_count => 1,
-            -parameters      => {
-                cmd => 'rm -rf #scratch_dir#',
-            },
+    {
+      -logic_name        => 'TidyScratch',
+      -module            => 'Bio::EnsEMBL::Hive::RunnableDB::SystemCmd',
+      -max_retry_count   => 1,
+      -parameters        => {
+                              cmd => 'rm -rf #scratch_dir#',
+                            },
+     -flow_into  => 'CleanTables',
+    },
+
+    {
+        -logic_name => 'CleanTables',
+        -module     => 'Bio::EnsEMBL::Hive::RunnableDB::SqlCmd',
+        -parameters => {
+            sql => 'DROP table uniparc; drop table uniprot',
         },
+    },
 
     ];
 }
@@ -921,12 +969,13 @@ sub pipeline_analyses {
 sub resource_classes {
     my ($self) = @_;
 
-    return {
-        %{$self->SUPER::resource_classes},
-        '4GB_8CPU'  => { 'LSF' => '-q ' . $self->o('production_queue') . ' -n 8 -M  4000 -R "rusage[mem=4000]"' },
-        '16GB_8CPU' => { 'LSF' => '-q ' . $self->o('production_queue') . ' -n 8 -M 16000 -R "rusage[mem=16000]"' },
-        '32GB_8CPU' => { 'LSF' => '-q ' . $self->o('production_queue') . ' -n 8 -M 32000 -R "rusage[mem=32000]"' },
-    }
+  return {
+    %{$self->SUPER::resource_classes},
+    '16GB_8CPU' => { 'LSF' => '-q ' . $self->o('production_queue') . ' -n 8 -M 16000 -R "rusage[mem=16000]"' ,
+                          'SLURM' => ' --partition=standard --time=1-00:00:00  --mem=16000m -n 8 -N 1'},
+     '32GB_8CPU' => { 'LSF' => '-q ' . $self->o('production_queue') . ' -n 8 -M 32000 -R "rusage[mem=32000]"' ,
+                    'SLURM' => ' --partition=standard --time=1-00:00:00  --mem=32000m -n 8 -N 1'},
+  }
 }
 
 1;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/RNAGeneXref_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/RNAGeneXref_conf.pm
index 40c6c9b78..6d8fa3fc8 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/RNAGeneXref_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/RNAGeneXref_conf.pm
@@ -25,7 +25,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 use File::Spec::Functions qw(catdir);
 
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/SampleDataCopy_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/SampleDataCopy_conf.pm
index d02de7472..82778ac57 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/SampleDataCopy_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/SampleDataCopy_conf.pm
@@ -41,7 +41,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 sub default_options {
   my ($self) = @_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/SampleData_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/SampleData_conf.pm
index 79a860494..110f532dd 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/SampleData_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/SampleData_conf.pm
@@ -43,7 +43,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 sub default_options {
   my ($self) = @_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/SearchDumps_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/SearchDumps_conf.pm
index 6b16ce2e4..adb3bcbbd 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/SearchDumps_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/SearchDumps_conf.pm
@@ -37,7 +37,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 sub default_options {
   my ($self) = @_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/StableIDs_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/StableIDs_conf.pm
index 4df737ac6..dc30908ef 100755
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/StableIDs_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/StableIDs_conf.pm
@@ -38,7 +38,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 sub default_options {
   my ($self) = @_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/TaxonomyInfoCore_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/TaxonomyInfoCore_conf.pm
index cb010d7ef..ec6141542 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/TaxonomyInfoCore_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/TaxonomyInfoCore_conf.pm
@@ -25,7 +25,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 use File::Spec::Functions qw(catdir);
 
 sub default_options {
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/TranscriptomeDomains_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/TranscriptomeDomains_conf.pm
index b9426d30c..fdad0bde8 100755
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/TranscriptomeDomains_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/TranscriptomeDomains_conf.pm
@@ -25,7 +25,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::ProteinFeatures_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 sub default_options {
   my ($self) = @_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/UpdatePackedStatus_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/UpdatePackedStatus_conf.pm
index 8b02ae00d..b0a6c1460 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/UpdatePackedStatus_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/UpdatePackedStatus_conf.pm
@@ -26,7 +26,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 sub default_options {
   my ($self) = @_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/VariationStatistics_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/VariationStatistics_conf.pm
index 0cb424cbf..a709f5b44 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/VariationStatistics_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/VariationStatistics_conf.pm
@@ -37,7 +37,7 @@ use warnings;
 
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::CoreStatistics_conf');
 
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 sub default_options {
   my ($self) = @_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/XrefDownload_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/XrefDownload_conf.pm
index 4c17d22d3..6bbe3fa04 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/XrefDownload_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/XrefDownload_conf.pm
@@ -24,7 +24,7 @@ use warnings;
 
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
 
 sub default_options {
@@ -76,7 +76,7 @@ sub pipeline_analyses {
         '2->A' => 'download_source',
         'A->1' => 'schedule_cleanup'
       },
-      -rc_name    => 'small'
+      -rc_name    => 'default'
     },
     {
       -logic_name      => 'download_source',
@@ -85,7 +85,30 @@ sub pipeline_analyses {
       -parameters      => {
         base_path => $self->o('base_path')
       },
-      -rc_name         => 'dm',
+      -rc_name         => 'dm_D',
+      -max_retry_count => 3,
+      -flow_into  => { '-1' => 'download_source_32'}
+    },
+    {
+      -logic_name      => 'download_source_32',
+      -module          => 'Bio::EnsEMBL::Production::Pipeline::Xrefs::DownloadSource',
+      -comment         => 'Downloads the source files and stores then in -base_path.',
+      -parameters      => {
+        base_path => $self->o('base_path')
+      },
+      -rc_name         => 'dm32_D',
+      -max_retry_count => 3,
+      -flow_into  => { '-1' => 'download_source_MAX'}
+    },
+      #THIS STEP IS THE RESULT OF A BUG AND SHOULD BE REMOVED AS SOON AS THE PIPELINE IS FIXED
+    {
+      -logic_name      => 'download_source_MAX',
+      -module          => 'Bio::EnsEMBL::Production::Pipeline::Xrefs::DownloadSource',
+      -comment         => 'Downloads the source files and stores then in -base_path.',
+      -parameters      => {
+        base_path => $self->o('base_path')
+      },
+      -rc_name         => 'dmMAX_D',
       -max_retry_count => 3
     },
     {
@@ -102,7 +125,7 @@ sub pipeline_analyses {
         '4->A' => 'cleanup_uniprot',
         'A->1' => 'schedule_pre_parse'
       },
-      -rc_name    => 'small'
+      -rc_name    => 'default'
     },
     {
       -logic_name => 'checksum',
@@ -112,7 +135,7 @@ sub pipeline_analyses {
         base_path     => $self->o('base_path'),
         skip_download => $self->o('skip_download')
       },
-      -rc_name    => 'normal'
+      -rc_name    => '100M_W'
     },
     {
       -logic_name => 'cleanup_refseq_dna',
@@ -124,7 +147,7 @@ sub pipeline_analyses {
         skip_download => $self->o('skip_download'),
         clean_dir    => $self->o('clean_dir')
       },
-      -rc_name    => 'small'
+      -rc_name    => '100M_D'
     },
     {
       -logic_name => 'cleanup_refseq_peptide',
@@ -136,7 +159,7 @@ sub pipeline_analyses {
         skip_download => $self->o('skip_download'),
         clean_dir    => $self->o('clean_dir')
       },
-      -rc_name    => 'small'
+      -rc_name    => 'default'
     },
     {
       -logic_name => 'cleanup_uniprot',
@@ -148,7 +171,7 @@ sub pipeline_analyses {
 	skip_download => $self->o('skip_download'),
         clean_dir    => $self->o('clean_dir')
       },
-      -rc_name    => 'small'
+      -rc_name    => '200M_D'
     },
     {
       -logic_name => 'schedule_pre_parse',
@@ -167,13 +190,23 @@ sub pipeline_analyses {
 	'4' => 'pre_parse_source_tertiary',
 	'-1' => 'notify_by_email'
       },
-      -rc_name    => 'small'
+      -rc_name    => 'default'
     },
+
     {
       -logic_name => 'pre_parse_source',
       -module     => 'Bio::EnsEMBL::Production::Pipeline::Xrefs::PreParse',
       -comment    => 'Store data for faster species parsing',
-      -rc_name    => '2GB',
+      -rc_name    => '2GB_D',
+      -hive_capacity => 100,
+      -can_be_empty => 1,
+      -flow_into  => {'-1' => 'pre_parse_source_long_HM'}
+    },
+    {
+      -logic_name => 'pre_parse_source_long_HM',
+      -module     => 'Bio::EnsEMBL::Production::Pipeline::Xrefs::PreParse',
+      -comment    => 'Store data for faster species parsing',
+      -rc_name    => '4GB_W',
       -hive_capacity => 100,
       -can_be_empty => 1,
     },
@@ -181,7 +214,7 @@ sub pipeline_analyses {
       -logic_name => 'pre_parse_source_dependent',
       -module     => 'Bio::EnsEMBL::Production::Pipeline::Xrefs::PreParse',
       -comment    => 'Store data for faster species parsing',
-      -rc_name    => '2GB',
+      -rc_name    => '16GB_D',
       -hive_capacity => 100,
       -can_be_empty => 1,
       -wait_for => 'pre_parse_source'
@@ -190,7 +223,7 @@ sub pipeline_analyses {
       -logic_name => 'pre_parse_source_tertiary',
       -module     => 'Bio::EnsEMBL::Production::Pipeline::Xrefs::PreParse',
       -comment    => 'Store data for faster species parsing',
-      -rc_name    => '2GB',
+      -rc_name    => '2GB_D',
       -hive_capacity => 100,
       -can_be_empty => 1,
       -wait_for => 'pre_parse_source_dependent',
@@ -208,21 +241,11 @@ sub pipeline_analyses {
         skip_preparse => $self->o('skip_preparse')
       },
       -wait_for => 'pre_parse_source_tertiary',
-      -rc_name    => 'small'
+      -rc_name    => 'default'
     }
   ];
 }
 
-sub resource_classes {
-  my ($self) = @_;
-
-  return {
-    %{$self->SUPER::resource_classes},
-    'small'  => { 'LSF' => '-q production -M 200 -R "rusage[mem=200]"' }, # Change 'production' to 'production-rh74' if running on noah
-    'normal' => { 'LSF' => '-q production -M 1000 -R "rusage[mem=1000]"' }
-  };
-}
-
 sub pipeline_wide_parameters {
   my ($self) = @_;
 
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/XrefProcess_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/XrefProcess_conf.pm
index c737c2a77..d478fff24 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/XrefProcess_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/XrefProcess_conf.pm
@@ -24,7 +24,7 @@ use warnings;
 
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
 
 sub default_options {
@@ -76,7 +76,7 @@ sub pipeline_analyses {
       '1->A' => 'schedule_species',
       'A->1' => 'EmailAdvisoryXrefReport'
     },
-    -rc_name    => 'small',
+    -rc_name    => 'default',
   },
   {
     -logic_name => 'schedule_species',
@@ -93,7 +93,7 @@ sub pipeline_analyses {
       '2->A' => 'schedule_source',
       'A->2' => 'schedule_dependent_source'
     },
-    -rc_name    => 'small',
+    -rc_name    => 'default',
   },
   {
     -logic_name => 'schedule_source',
@@ -112,7 +112,7 @@ sub pipeline_analyses {
       xref_pass  => $self->o('xref_pass'),
     },
     -flow_into  => { '2' => 'parse_source' },
-    -rc_name    => 'small',
+    -rc_name    => '1GB_D',
     -analysis_capacity => 10,
   },
   {
@@ -133,7 +133,7 @@ sub pipeline_analyses {
       '2->A' => 'parse_source',
       'A->1' => 'schedule_tertiary_source',
     },
-    -rc_name    => 'small',
+    -rc_name    => '4GB_D',
   },
   {
     -logic_name => 'schedule_tertiary_source',
@@ -153,12 +153,12 @@ sub pipeline_analyses {
       '2->A' => 'parse_source',
       'A->1' => 'dump_ensembl',
     },
-    -rc_name    => 'small',
+    -rc_name    => 'default',
   },
   {
     -logic_name        => 'parse_source',
     -module            => 'Bio::EnsEMBL::Production::Pipeline::Xrefs::ParseSource',
-    -rc_name           => 'large',
+    -rc_name           => '16GB_D',
     -hive_capacity     => 300,
     -analysis_capacity => 50,
     -batch_size        => 30,
@@ -173,11 +173,12 @@ sub pipeline_analyses {
       base_path => $self->o('base_path'),
       release   => $self->o('release')
     },
+    -max_retry_count => 0,
     -flow_into  => {
       '2->A' => 'dump_xref',
       'A->1' => 'schedule_mapping'
     },
-    -rc_name    => 'mem',
+    -rc_name    => '16GB_D',
   },
   {
     -logic_name => 'dump_xref',
@@ -187,8 +188,9 @@ sub pipeline_analyses {
       release     => $self->o('release'),
       config_file => $self->o('config_file')
     },
+    -max_retry_count => 0,
     -flow_into  => { 2 => 'align_factory' },
-    -rc_name    => 'normal',
+    -rc_name    => '1GB',
   },
   {
     -logic_name => 'align_factory',
@@ -197,7 +199,7 @@ sub pipeline_analyses {
       base_path => $self->o('base_path'),
       release   => $self->o('release')},
     -flow_into  => { 2 => 'align' },
-    -rc_name    => 'small',
+    -rc_name    => 'default',
   },
   {
     -logic_name        => 'align',
@@ -205,7 +207,7 @@ sub pipeline_analyses {
     -parameters        => {
       base_path => $self->o('base_path')
     },
-    -rc_name           => 'large',
+    -rc_name           => '16GB_D',
     -hive_capacity     => 300,
     -analysis_capacity => 300,
     -batch_size        => 5,
@@ -220,9 +222,9 @@ sub pipeline_analyses {
     },
     -flow_into  => {
       '2->A' => ['direct_xrefs', 'rnacentral_mapping'],
-      'A->1' => 'mapping'
+      'A->1' => 'object_xref_check'
     },
-    -rc_name    => 'small',
+    -rc_name    => '1GB',
   },
   {
     -logic_name => 'direct_xrefs',
@@ -232,7 +234,7 @@ sub pipeline_analyses {
       release   => $self->o('release')
     },
     -flow_into  => { 1 => 'process_alignment' },
-    -rc_name    => 'normal',
+    -rc_name    => '1GB_D',
     -analysis_capacity => 30
   },
   {
@@ -242,7 +244,7 @@ sub pipeline_analyses {
       base_path => $self->o('base_path'),
       release   => $self->o('release')
     },
-    -rc_name    => 'normal',
+    -rc_name    => '1GB_D',
     -analysis_capacity => 30
   },
   {
@@ -253,7 +255,7 @@ sub pipeline_analyses {
       release   => $self->o('release')
     },
     -flow_into  => { 1 => 'uniparc_mapping' },
-    -rc_name    => 'normal',
+    -rc_name    => 'default',
     -hive_capacity => 300,
     -analysis_capacity => 30
   },
@@ -265,7 +267,7 @@ sub pipeline_analyses {
       release   => $self->o('release')
     },
     -flow_into  => { 1 => 'coordinate_mapping' },
-    -rc_name    => 'normal',
+    -rc_name    => '1GB',
     -hive_capacity => 300,
     -analysis_capacity => 30
   },
@@ -276,9 +278,21 @@ sub pipeline_analyses {
       base_path => $self->o('base_path'),
       release   => $self->o('release')
     },
-    -rc_name    => 'mem',
+    -rc_name    => '16GB',
     -analysis_capacity => 30
   },
+  {
+    -logic_name => 'object_xref_check',
+    -module     => 'Bio::EnsEMBL::Hive::RunnableDB::SqlHealthcheck',
+    -parameters => {
+      db_conn       => '#xref_url#',
+      description   => 'Check that the object_xref table has rows',
+      query         => 'SELECT object_xref_id FROM object_xref',
+      expected_size => '> 0'
+    },
+    -flow_into  => { 1 => 'mapping' },
+    -rc_name    => 'default',
+  },
   {
     -logic_name => 'mapping',
     -module     => 'Bio::EnsEMBL::Production::Pipeline::Xrefs::Mapping',
@@ -290,25 +304,26 @@ sub pipeline_analyses {
       '1->A' => 'RunXrefCriticalDatacheck',
       'A->1' => 'RunXrefAdvisoryDatacheck'
     },
-    -rc_name    => 'mem',
+    -rc_name    => '16GB_D',
     -analysis_capacity => 30,
   },
   {
-    -logic_name        => 'RunXrefCriticalDatacheck',
-    -module            => 'Bio::EnsEMBL::DataCheck::Pipeline::RunDataChecks',
-    -max_retry_count   => 1,
-    -analysis_capacity => 10,
-    -batch_size        => 10,
-    -parameters        => {
-      datacheck_names  => ['ForeignKeys'],
-      datacheck_groups => ['xref_mapping'],
-      datacheck_types  => ['critical'],
-      registry_file    => $self->o('registry'),
-      config_file      => $self->o('dc_config_file'),
-      history_file     => $self->o('history_file'),
-      old_server_uri   => $self->o('old_server_uri'),
-      failures_fatal   => 1,
-    },
+      -logic_name        => 'RunXrefCriticalDatacheck',
+      -module            => 'Bio::EnsEMBL::DataCheck::Pipeline::RunDataChecks',
+      -max_retry_count   => 1,
+      -analysis_capacity => 10,
+      -batch_size        => 10,
+      -parameters        => {
+          datacheck_names  => [ 'ForeignKeys' ],
+          datacheck_groups => [ 'xref_mapping' ],
+          datacheck_types  => [ 'critical' ],
+          registry_file    => $self->o('registry'),
+          config_file      => $self->o('dc_config_file'),
+          history_file     => $self->o('history_file'),
+          old_server_uri   => $self->o('old_server_uri'),
+          failures_fatal   => 1,
+      },
+      -rc_name           => '1GB',
   },
   {
     -logic_name        => 'RunXrefAdvisoryDatacheck',
@@ -325,12 +340,14 @@ sub pipeline_analyses {
       old_server_uri   => $self->o('old_server_uri'),
       failures_fatal   => 0,
     },
-    -flow_into         => { 4 => 'AdvisoryXrefReport' }
+    -flow_into         => { 4 => 'AdvisoryXrefReport' },
+    -rc_name           => '1GB',
+
   },
   {
     -logic_name => 'AdvisoryXrefReport',
     -module     => 'Bio::EnsEMBL::Production::Pipeline::Xrefs::AdvisoryXrefReport',
-    -rc_name    => 'small'
+    -rc_name    => 'default'
   },
   {
     -logic_name => 'EmailAdvisoryXrefReport',
@@ -340,7 +357,7 @@ sub pipeline_analyses {
       pipeline_name => $self->o('pipeline_name'),
       base_path => $self->o('base_path')
     },
-    -rc_name    => 'small',
+    -rc_name    => 'default',
     -flow_into  => { 1 => 'notify_by_email' }
   },
   {
@@ -350,22 +367,11 @@ sub pipeline_analyses {
       email        => $self->o('email'),
       pipeline_name => $self->o('pipeline_name')
     },
-    -rc_name    => 'small'
+    -rc_name    => 'default'
   }
   ];
 }
 
-sub resource_classes {
-  my ($self) = @_;
-
-  return {
-    %{$self->SUPER::resource_classes},
-    'small'  => { 'LSF' => '-q production -M 200 -R "rusage[mem=200]"' },
-    'normal' => { 'LSF' => '-q production -M 500 -R "rusage[mem=500]"' },
-    'mem'    => { 'LSF' => '-q production -M 3000 -R "rusage[mem=3000]"' },
-    'large'  => { 'LSF' => '-q production -M 10000 -R "rusage[mem=10000]"' },
-  }
-}
 
 sub pipeline_wide_parameters {
   my ($self) = @_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/Xref_update_conf.pm b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/Xref_update_conf.pm
index b72c803c2..69947d710 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/Xref_update_conf.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/PipeConfig/Xref_update_conf.pm
@@ -25,7 +25,7 @@ use warnings;
 use base ('Bio::EnsEMBL::Production::Pipeline::PipeConfig::Base_conf');
 
 use Bio::EnsEMBL::Hive::PipeConfig::HiveGeneric_conf;
-use Bio::EnsEMBL::Hive::Version 2.5;
+use Bio::EnsEMBL::Hive::Version 2.7;
 
 
 sub default_options {
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/ProteinFeatures/LoadUniParc.pm b/modules/Bio/EnsEMBL/Production/Pipeline/ProteinFeatures/LoadUniParc.pm
index 40eddb1b4..444b7a2bb 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/ProteinFeatures/LoadUniParc.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/ProteinFeatures/LoadUniParc.pm
@@ -21,16 +21,27 @@ package Bio::EnsEMBL::Production::Pipeline::ProteinFeatures::LoadUniParc;
 
 use strict;
 use warnings;
-
+use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
 use File::Basename;
-
 use base ('Bio::EnsEMBL::Production::Pipeline::Common::Base');
 
 sub run {
   my ($self) = @_;
   my $uniparc_file = $self->param_required('uniparc_file_local');
 
+
   if (-e $uniparc_file) {
+
+    #check if uniparc file is compressed
+    if ($uniparc_file =~ /\.gz$/){
+        my $uniparc_file_decompress = $uniparc_file;
+        $uniparc_file_decompress =~ s/\.gz$//;
+        gunzip $uniparc_file => $uniparc_file_decompress  or $self->throw("gunzip failed: $GunzipError");
+        #delete compressed file .gz
+        unlink  $uniparc_file or $self->throw("unable to delete $uniparc_file: $!");
+        $uniparc_file = $uniparc_file_decompress;
+    }
+
     my $dbh = $self->hive_dbh;
     my $sql = "LOAD DATA LOCAL INFILE '$uniparc_file' INTO TABLE uniparc FIELDS TERMINATED BY ' '";
     $dbh->do($sql) or self->throw($dbh->errstr);
@@ -41,9 +52,14 @@ sub run {
     my $index_2 = 'ALTER TABLE uniparc ADD KEY md5sum_idx (md5sum) USING HASH';
     $dbh->do($index_2) or self->throw($dbh->errstr);
 
+    #delete upidump file from pipeline direcotry after loading into hive db
+    unlink  $uniparc_file or $self->throw("unable to delete $uniparc_file: $!");
+
   } else {
     $self->throw("Checksum file '$uniparc_file' does not exist");
   }
+
+
 }
 
 1;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/StableID/sql/index.sql b/modules/Bio/EnsEMBL/Production/Pipeline/StableID/sql/index.sql
index a807f370a..d05766336 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/StableID/sql/index.sql
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/StableID/sql/index.sql
@@ -13,5 +13,4 @@
 -- See the License for the specific language governing permissions and
 -- limitations under the License.
 
-CREATE INDEX stable_id_db_type ON stable_id_lookup(stable_id, db_type, object_type);
-CREATE INDEX stable_id_object_type ON stable_id_lookup(stable_id, object_type);
+CREATE INDEX stable_id_db_type ON stable_id_lookup(stable_id, object_type, db_type);
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/StableID/sql/table.sql b/modules/Bio/EnsEMBL/Production/Pipeline/StableID/sql/table.sql
index 4ae7ac6ec..6319894a5 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/StableID/sql/table.sql
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/StableID/sql/table.sql
@@ -14,15 +14,12 @@
 -- limitations under the License.
 
 CREATE TABLE archive_id_lookup (
-  archive_id  VARCHAR(128) NOT NULL,
+  archive_id  VARCHAR(100) NOT NULL,
   species_id  INTEGER UNSIGNED NOT NULL,
-  db_type     VARCHAR(255) NOT NULL,
-  object_type VARCHAR(255) NOT NULL,
-
-  UNIQUE INDEX archive_id_lookup_idx (archive_id, species_id, db_type, object_type),
-  KEY archive_id_db_type (archive_id, db_type, object_type),
-  KEY archive_id_object_type (archive_id, object_type)
+  db_type     VARCHAR(20) NOT NULL,
+  object_type VARCHAR(20) NOT NULL,
 
+  UNIQUE INDEX archive_id_lookup_idx (archive_id, object_type, db_type, species_id)
 ) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
 
 CREATE TABLE meta (
@@ -48,9 +45,9 @@ CREATE TABLE species (
 ) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
 
 CREATE TABLE stable_id_lookup (
-  stable_id   VARCHAR(128) NOT NULL,	      
+  stable_id   VARCHAR(100) NOT NULL,	      
   species_id  INTEGER UNSIGNED NOT NULL,
-  db_type     VARCHAR(255) NOT NULL,
-  object_type VARCHAR(255) NOT NULL
+  db_type     VARCHAR(20) NOT NULL,
+  object_type VARCHAR(20) NOT NULL
 
 ) COLLATE=latin1_swedish_ci ENGINE=MyISAM;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/TSV/DumpFile.pm b/modules/Bio/EnsEMBL/Production/Pipeline/TSV/DumpFile.pm
index dde019e2f..b713fa3b8 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/TSV/DumpFile.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/TSV/DumpFile.pm
@@ -55,7 +55,6 @@ return;
 
 sub run {
     my ($self) = @_;
-
     $self->info( "Starting tsv dump for " . $self->param('species'));
     $self->_write_tsv();
     $self->_create_README();
@@ -115,7 +114,7 @@ sub _write_tsv {
          }#transcript
       }#gene
   }#slice 
-  close $fh; 
+  close $fh;
   $self->core_dbc()->disconnect_if_idle();
 return;
 }
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/TSV/DumpFileEna.pm b/modules/Bio/EnsEMBL/Production/Pipeline/TSV/DumpFileEna.pm
index 3e80b6fef..75768cb7c 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/TSV/DumpFileEna.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/TSV/DumpFileEna.pm
@@ -107,8 +107,8 @@ sub _write_tsv {
         if(!defined $row->[5]){
 	   $row->[5] = $self->_find_contig($ta, $contig_ids, $row->[3] );
         } elsif( !defined $row->[6] && defined $row->[4]){
-	   $row->[6] = $cds2acc->{$row->[4]}; 
- 	} 
+	   $row->[6] = $cds2acc->{$row->[4]};
+ 	}
 
 	if (defined $row->[5]) {
             $row->[5] =~ s/\.[0-9]+$//;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/TSV/DumpFileMetadata.pm b/modules/Bio/EnsEMBL/Production/Pipeline/TSV/DumpFileMetadata.pm
index d63261607..ba7cde4b0 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/TSV/DumpFileMetadata.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/TSV/DumpFileMetadata.pm
@@ -70,9 +70,9 @@ return;
 
 sub run {
   my ($self) = @_;
-  
+
   $self->_make_karyotype_file();
-  
+
 return;
 }
 
@@ -81,7 +81,7 @@ sub _make_karyotype_file {
 
     my $sp = $self->param_required('species');
     my $sa = Bio::EnsEMBL::Registry->get_adaptor($sp, 'core', 'slice');
-   
+
     if(! $sa) {
         $self->info("Cannot continue as we cannot find a core:slice DBAdaptor for %s", $sp);
         return;
@@ -92,7 +92,7 @@ sub _make_karyotype_file {
     my $slices = $sa->fetch_all_karyotype();
     # If we don't have any slices (ie. chromosomes), don't make the file
     return unless(scalar(@$slices));
- 
+
     my $file = $self->_generate_file_name();
   
     work_with_file($file, 'w', sub {
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/TSV/DumpFileXref.pm b/modules/Bio/EnsEMBL/Production/Pipeline/TSV/DumpFileXref.pm
index fffe56ded..df33c0d76 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/TSV/DumpFileXref.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/TSV/DumpFileXref.pm
@@ -66,7 +66,9 @@ sub run {
 
 return;
 }
+sub write {
 
+}
 #############
 ##SUBROUTINES
 #############
@@ -74,7 +76,7 @@ sub _write_tsv {
     my ($self) = @_;
 
     my $out_file  = $self->_generate_file_name();
-    my $header    = $self->_build_headers();   
+    my $header    = $self->_build_headers();
 
     open my $fh, '>', $out_file or die "cannot open $out_file for writing!";
     print $fh join ("\t", @$header);
@@ -112,9 +114,9 @@ sub _write_tsv {
                    my $xref_db       = $dbentry->dbname();
                    my $xref_info_type= $dbentry->info_type();
 
-                   if ($dbentry->isa('Bio::EnsEMBL::IdentityXref')){ 
- 		      $src_identity  = $dbentry->ensembl_identity(); 
-                      $xref_identity = $dbentry->xref_identity(); 
+                   if ($dbentry->isa('Bio::EnsEMBL::IdentityXref')){
+ 		      $src_identity  = $dbentry->ensembl_identity();
+                      $xref_identity = $dbentry->xref_identity();
                    }
 		   $linkage_type = join(' ', @{$dbentry->get_all_linkage_types()})if($dbentry->isa('Bio::EnsEMBL::OntologyXref'));
                    print $fh "$g_id\t$tr_id\t$tl_id\t$xref_id\t$xref_db\t$xref_info_type\t$src_identity\t$xref_identity\t$linkage_type\n";
@@ -122,8 +124,9 @@ sub _write_tsv {
 	       }#dbentry
          }#transcript
       }#gene
-  }#slice 
-  close $fh; 
+  }#slice
+  close $fh;
+
 
   if ($xrefs_exist == 1) {
       $self->dataflow_output_id(
@@ -132,7 +135,6 @@ sub _write_tsv {
     # If we have no xrefs, delete the file (which will just have a header).
     unlink $out_file  or die "failed to delete $out_file!";
   }
-
 return;
 }
 
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/TaxonomyUpdate/QueryMetadata.pm b/modules/Bio/EnsEMBL/Production/Pipeline/TaxonomyUpdate/QueryMetadata.pm
index 3438dd571..025cc3a65 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/TaxonomyUpdate/QueryMetadata.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/TaxonomyUpdate/QueryMetadata.pm
@@ -82,7 +82,24 @@ sub _meta {
   $self->warning('Querying Taxonomy');
   #my $tdba =  Bio::EnsEMBL::Registry->get_DBAdaptor( "multi", "taxonomy" );
   my $taxonomy  = $self->_taxonomy( $tdba, $metadata->{'species.taxonomy_id'});
-  $metadata->{'species.classification'} = $taxonomy; 	
+
+  if (scalar(@{$taxonomy}) == 0) {
+    my $dbname = $self->param('dbname');
+    my $species = $self->param('species');
+    my $taxon_id = $metadata->{'species.taxonomy_id'};
+    my $msg = "Cannot retrieve taxonomy classification for species $species with taxonomy_id $taxon_id in database $dbname;";
+
+    my $primary_taxon_id = $self->_fetch_primary_taxon_id($tdba, $taxon_id);
+    if (defined $primary_taxon_id) {
+      $msg = $msg
+             ." taxon_id $taxon_id has been merged into primary taxon_id $primary_taxon_id."
+             ." Would you kindly set the 'species.taxonomy_id' meta entry for this species to the"
+             ." primary taxon_id and ensure the change is propagated to other relevant databases ?";
+    }
+    throw $msg;
+  }
+
+  $metadata->{'species.classification'} = $taxonomy;
   $self->warning('Updating meta');
   foreach my $key ( keys %{$metadata} ) {
     my $array = wrap_array( $metadata->{$key} );
@@ -150,6 +167,34 @@ SQL
 }
 
 
+sub _fetch_primary_taxon_id {
+  my ( $self, $tdba, $taxon_id ) = @_;
+  $self->warning('Querying taxonomy to fetch primary taxon_id');
+  my $sql            = <<'SQL';
+select taxon_id
+from ncbi_taxa_name
+where name = ?
+and name_class = 'merged_taxon_id'
+SQL
+  my $dbc = $tdba->dbc();
+  my $res = $dbc->sql_helper()->execute_simple(
+    -SQL    => $sql,
+    -PARAMS => [ $taxon_id ]
+  );
+  $self->debug( 'Result is [%s]', join( q{, }, @{$res} ) );
+  my $result_count = scalar(@{$res});
+
+  my $primary_taxon_id;
+  if ($result_count == 1) {
+    $primary_taxon_id = $res->[0];
+  } elsif ($result_count > 1) {
+    throw "Expected at most 1 primary taxon_id for $taxon_id but got $result_count";
+  }
+
+  return $primary_taxon_id;
+}
+
+
 sub _remove_deprecated {
   my ($self, $meta_container) = @_;
 
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/Alignment.pm b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/Alignment.pm
index b3233ea9d..ef4868850 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/Alignment.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/Alignment.pm
@@ -84,10 +84,18 @@ sub run {
   $exe =~ s/\n//g;
   my $command_string = sprintf ("%s --showalignment FALSE --showvulgar FALSE --ryo '%s' --gappedextension FALSE --model 'affine:local' %s --subopt no --query %s --target %s --querychunktotal %s --querychunkid %s", $exe, $ryo, $method, $source, $target, $max_chunks, $chunk);
   my $output = `$command_string`;
-  my @hits = grep {$_ =~ /^xref/} split "\n", $output; # not all lines in output are alignments
 
-  while (my $hit = shift @hits) {
-    print $fh $hit . "\n";
+  if ($? == 0) {
+    my @hits = grep {$_ =~ /^xref/} split "\n", $output; # not all lines in output are alignments
+
+    while (my $hit = shift @hits) {
+      print $fh $hit . "\n";
+    }
+  } else {
+    my $job = $self->input_job();
+    $job->adaptor()->db()->get_LogMessageAdaptor()->store_job_message($job->dbID(), $output, 'WORKER_ERROR');  
+
+    throw("Exonerate failed with exit_code: $?\n");
   }
 
   $fh->close();
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/CleanupRefseqPeptide.pm b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/CleanupRefseqPeptide.pm
index 313773f1f..3dd8e2aff 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/CleanupRefseqPeptide.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/CleanupRefseqPeptide.pm
@@ -89,7 +89,7 @@ sub run {
       while (<$in_fh>) {
         if ($_ =~ /^REFERENCE/ || $_ =~ /^COMMENT/ || $_ =~ /^\s{5}Protein/) {
           $skip_data = 1;
-        } elsif ($_ =~ /^\s{5}source/ || $_ =~ /^\s{5}CDS/) {
+        } elsif ($_ =~ /^\s{5}source/ || $_ =~ /^\s{5}CDS/ || $_ =~ /^ORIGIN/) {
           $skip_data = 0;
         }
         if (!$skip_data) {print $out_fh $_;}
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/CleanupUniprot.pm b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/CleanupUniprot.pm
index 522a74080..df744e250 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/CleanupUniprot.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/CleanupUniprot.pm
@@ -116,7 +116,7 @@ sub run {
         $_ =~ s/\nCC\s{3}.*//g; # Remove comments
         $_ =~ s/\nCT(\s{3}.*)CAUTION: The sequence shown here is derived from an Ensembl(.*)/\nCC$1CAUTION: The sequence shown here is derived from an Ensembl$2/g; # Set temp line back to comment
 	$_ =~ s/\nFT\s{3}.*//g; # Remove feature coordinates
-        $_ =~ s/\nDR\s{3}($sources_to_remove);.*\n//g; # Remove sources skipped at processing
+        $_ =~ s/\nDR\s{3}($sources_to_remove);.*//g; # Remove sources skipped at processing
 
         # Added lines that we do need into output
         print $out_fh $_;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/DumpXref.pm b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/DumpXref.pm
index 3a184d14c..635e6fd8d 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/DumpXref.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/DumpXref.pm
@@ -99,7 +99,7 @@ sub run {
         # Ambiguous peptides must be cleaned out to protect Exonerate from J,O and U codes
         $row[1] = uc($row[1]);
         $row[1] =~ s/(.{60})/$1\n/g;
-        if ($seq_type eq 'pep') { $row[1] =~ tr/JOU/X/ }
+        if ($seq_type eq 'peptide') { $row[1] =~ tr/JOU/X/ }
         print $DH ">".$row[0]."\n".$row[1]."\n";
       }
       $mapping_source_sth->execute($source_id, $seq_type);
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/ParseSource.pm b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/ParseSource.pm
index 068bf107f..e9d807beb 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/ParseSource.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/ParseSource.pm
@@ -79,13 +79,17 @@ sub run {
                              file       => $file_name}) ;
     $self->cleanup_DBAdaptor($db);
   } else {
-    $failure += $xref_run->run( { source_id  => $source_id,
-                      species_id => $species_id,
-                      species    => $species,
-                      rel_file   => $release_file,
-                      dbi        => $dbi,
-		      xref_source => $source_dbi,
-                      files      => [@files] }) ;
+    my $run_params = {
+      source_id  => $source_id,
+      species_id => $species_id,
+      species    => $species,
+      rel_file   => $release_file,
+      dbi        => $dbi,
+      xref_source => $source_dbi,
+      files      => [@files]
+    };
+    $run_params->{hgnc_file} = $self->param('hgnc_file') if ($parser =~ /^UniProt/);
+    $failure += $xref_run->run( $run_params ) ;
   }
   if ($failure) { die; }
 
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/Parser/UniProtDatabaseParser.pm b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/Parser/UniProtDatabaseParser.pm
index e47bcc2de..e8db14138 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/Parser/UniProtDatabaseParser.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/Parser/UniProtDatabaseParser.pm
@@ -242,7 +242,7 @@ sub run {
         # Make sure these are still lines with Name or Synonyms
         if (($gn !~ /^GN/ || $gn !~ /Name=/) && $gn !~ /Synonyms=/) { last; }
 
-        if ($gn =~ / Name=([A-Za-z0-9_\-\.\s]+)/s) { #/s for multi-line entries ; is the delimiter
+        if ($gn =~ / Name=([A-Za-z0-9_\-\.\s:]+)/s) { #/s for multi-line entries ; is the delimiter
           # Example line 
           # GN   Name=ctrc {ECO:0000313|Xenbase:XB-GENE-5790348};
           my $name = $1;
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/ScheduleSource.pm b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/ScheduleSource.pm
index 48f20cbc3..fd56a7d5a 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/ScheduleSource.pm
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/ScheduleSource.pm
@@ -83,12 +83,16 @@ sub run {
   # Retrieve list of sources from versioning database
   my ($source_user, $source_pass, $source_host, $source_port, $source_db) = $self->parse_url($source_url);
   my $dbi = $self->get_dbi($source_host, $source_port, $source_user, $source_pass, $source_db);
-  my $select_source_sth = $dbi->prepare("SELECT distinct name, parser, uri, clean_uri, index_uri, count_seen, preparse, revision FROM source s, version v WHERE s.source_id = v.source_id");
+  my $select_source_sth = $dbi->prepare("SELECT distinct name, parser, uri, clean_uri, index_uri, count_seen, preparse, revision FROM source s, version v WHERE s.source_id = v.source_id order by name");
   my ($name, $parser, $file_name, $clean_file_name, $dataflow_params, $db, $priority, $release_file);
   $select_source_sth->execute();
   $select_source_sth->bind_columns(\$name, \$parser, \$file_name, \$clean_file_name, \$db, \$priority, \$preparse, \$release_file);
 
+  my $hgnc_path;
+
   while ($select_source_sth->fetch()) {
+    $hgnc_path = $file_name if ($name eq 'HGNC');
+
     if (defined $db && $db eq 'checksum') { next; }
     if ($priority != $order_priority) { next; }
     if (defined $clean_file_name) { $file_name = $clean_file_name; }
@@ -126,11 +130,34 @@ sub run {
       $self->dataflow_output_id($dataflow_params, 2);
     } else {
       # Create list of files
-      my @list_files = `ls $file_name`;
+      opendir(my $dir_handle, $file_name);
+      my @temp_list_files = readdir($dir_handle);
+      closedir($dir_handle);
+
+      my @list_files;
+      foreach my $file (@temp_list_files) {
+        next if ($file =~ /^\./);
+        push(@list_files, $file_name . "/" . $file);
+      }
       if ($preparse) { @list_files = $preparse; }
+
+      # For Uniprot and Refseq, files might have been split by species
+      if (!$preparse && ($name =~ /^Uniprot/ || $name =~ /^RefSeq_peptide/ || $name =~ /^RefSeq_dna/)) {
+        my $file_prefix = ($name =~ /SPTREMBL/ ? 'uniprot_trembl' : ($name =~ /SWISSPROT/ ? 'uniprot_sprot' : ($name =~ /_dna/ ? 'refseq_rna' : 'refseq_protein')));
+        my @species_list_files = glob($file_name . "/**/**/**/**/" . $file_prefix . "-" . $species_id);
+        if (scalar(@species_list_files) > 0) {
+          @list_files = @species_list_files;
+        }
+      }
+
+      # For ZFIN, we only need 1 job (parser handles all the files)
+      if ($name eq 'ZFIN_ID') {
+        @list_files = $list_files[0];
+      }
+
       foreach my $file (@list_files) {
         $file =~ s/\n//;
-        $file = $file_name . "/" . $file;
+        if (!-f $file) { next; }
         if (defined $release_file and $file eq $release_file) { next; }
   
         $dataflow_params = {
@@ -144,6 +171,10 @@ sub run {
           priority      => $priority,
           file_name     => $file
         };
+        if ($name =~ /^Uniprot/) {
+          my @hgnc_files = glob( $hgnc_path . '/*' );
+          $dataflow_params->{hgnc_file} = $hgnc_files[0];
+        }
         $self->dataflow_output_id($dataflow_params, 2);
       }
     }
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/gencode_sources.json b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/gencode_sources.json
index 140082a34..f34e6f977 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/gencode_sources.json
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/gencode_sources.json
@@ -16,7 +16,7 @@
     {
       "name" : "UniParc",
       "parser" : "ChecksumParser",
-      "file" : "https://ftp.ebi.ac.uk/pub/contrib/uniparc/upidump.lis",
+      "file" : "https://ftp.ebi.ac.uk/pub/contrib/uniparc/upidump.lis.gz",
       "db" : "checksum",
       "priority" : 1
     },
@@ -194,7 +194,7 @@
     {
       "name" : "miRBase",
       "parser" : "miRBaseParser",
-      "file" : "https://mirbase.org/ftp/CURRENT/miRNA.dat.gz",
+      "file" : "https://mirbase.org/download/miRNA.dat",
       "method" : "--bestn 1",
       "query_cutoff" : 90,
       "target_cutoff" : 90,
@@ -203,7 +203,7 @@
     {
       "name" : "HGNC",
       "parser" : "HGNCParser",
-      "file" : "https://www.genenames.org/cgi-bin/download?col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_prev_sym&col=gd_aliases&col=gd_pub_eg_id&col=gd_pub_ensembl_id&col=gd_pub_refseq_ids&col=gd_ccds_ids&col=gd_lsdb_links&status=Approved&status_opt=2&where=&order_by=gd_app_sym_sort&format=text&limit=&hgnc_dbtag=on&submit=submit",
+      "file" : "https://www.genenames.org/cgi-bin/download/custom?col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_prev_sym&col=gd_aliases&col=gd_pub_eg_id&col=gd_pub_ensembl_id&col=gd_pub_refseq_ids&col=gd_ccds_ids&col=gd_lsdb_links&status=Approved&status_opt=2&where=&order_by=gd_app_sym_sort&format=text&limit=&hgnc_dbtag=on&submit=submit",
       "db" : "ccds",
       "priority" : 3
     }
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/xref_all_sources.json b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/xref_all_sources.json
index 7ef812874..07c48e29a 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/xref_all_sources.json
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/xref_all_sources.json
@@ -6,17 +6,10 @@
       "db" : "core",
       "priority" : 1
     },
-    {
-      "name" : "CCDS",
-      "parser" : "CCDSParser",
-      "file" : "Database",
-      "db" : "ccds",
-      "priority" : 1
-    },
     {
       "name" : "UniParc",
       "parser" : "ChecksumParser",
-      "file" : "https://ftp.ebi.ac.uk/pub/contrib/uniparc/upidump.lis",
+      "file" : "https://ftp.ebi.ac.uk/pub/contrib/uniparc/upidump.lis.gz",
       "db" : "checksum",
       "priority" : 1
     },
@@ -197,19 +190,19 @@
       "name" : "ZFIN_ID",
       "parser" : "ZFINParser",
       "file" : "http://zfin.org/data_transfer/Downloads/uniprot.txt",
-      "priority" : 2
+      "priority" : 3
     },
     {
       "name" : "ZFIN_ID",
       "parser" : "ZFINParser",
       "file" : "http://zfin.org/data_transfer/Downloads/aliases.txt",
-      "priority" : 2
+      "priority" : 3
     },
     {
       "name" : "ZFIN_ID",
       "parser" : "ZFINParser",
-      "file" : "http://zfin.org/data_transfer/Downloads/gene_seq.txt",
-      "priority" : 1
+      "file" : "https://zfin.org/downloads/ensembl_1_to_1.txt",
+      "priority" : 3
     },
     {
       "name" : "ZFIN_desc",
@@ -226,13 +219,13 @@
     {
       "name" : "Xenbase",
       "parser" : "XenopusJamboreeParser",
-      "file" : "http://ftp.xenbase.org/pub/GenePageReports/GenePageEnsemblModelMapping.txt",
+      "file" : "http://ftp.xenbase.org/pub/GenePageReports/GenePageEnsemblModelMapping_4.1.txt",
       "priority" : 1
     },
     {
       "name" : "miRBase",
       "parser" : "miRBaseParser",
-      "file" : "https://mirbase.org/ftp/CURRENT/miRNA.dat.gz",
+      "file" : "https://mirbase.org/download/miRNA.dat",
       "method" : "--bestn 1",
       "query_cutoff" : 90,
       "target_cutoff" : 90,
@@ -241,7 +234,7 @@
     {
       "name" : "HGNC",
       "parser" : "HGNCParser",
-      "file" : "https://www.genenames.org/cgi-bin/download?col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_prev_sym&col=gd_aliases&col=gd_pub_eg_id&col=gd_pub_ensembl_id&col=gd_pub_refseq_ids&col=gd_ccds_ids&col=gd_lsdb_links&status=Approved&status_opt=2&where=&order_by=gd_app_sym_sort&format=text&limit=&hgnc_dbtag=on&submit=submit",
+      "file" : "https://www.genenames.org/cgi-bin/download/custom?col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_prev_sym&col=gd_aliases&col=gd_pub_eg_id&col=gd_pub_ensembl_id&col=gd_pub_refseq_ids&col=gd_ccds_ids&col=gd_lsdb_links&status=Approved&status_opt=2&where=&order_by=gd_app_sym_sort&format=text&limit=&hgnc_dbtag=on&submit=submit",
       "db" : "ccds",
       "priority" : 3
     }
diff --git a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/xref_sources.json b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/xref_sources.json
index 17fa985f1..0070a69d4 100644
--- a/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/xref_sources.json
+++ b/modules/Bio/EnsEMBL/Production/Pipeline/Xrefs/xref_sources.json
@@ -16,7 +16,7 @@
     {
       "name" : "UniParc",
       "parser" : "ChecksumParser",
-      "file" : "https://ftp.ebi.ac.uk/pub/contrib/uniparc/upidump.lis",
+      "file" : "https://ftp.ebi.ac.uk/pub/contrib/uniparc/upidump.lis.gz",
       "db" : "checksum",
       "priority" : 1
     },
@@ -254,13 +254,13 @@
     {
       "name" : "Xenbase",
       "parser" : "XenopusJamboreeParser",
-      "file" : "http://ftp.xenbase.org/pub/GenePageReports/GenePageEnsemblModelMapping.txt",
+      "file" : "http://ftp.xenbase.org/pub/GenePageReports/GenePageEnsemblModelMapping_4.1.txt",
       "priority" : 1
     },
     {
       "name" : "miRBase",
       "parser" : "miRBaseParser",
-      "file" : "https://mirbase.org/ftp/CURRENT/miRNA.dat.gz",
+      "file" : "https://mirbase.org/download/miRNA.dat",
       "method" : "--bestn 1",
       "query_cutoff" : 90,
       "target_cutoff" : 90,
@@ -269,7 +269,7 @@
     {
       "name" : "HGNC",
       "parser" : "HGNCParser",
-      "file" : "https://www.genenames.org/cgi-bin/download?col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_prev_sym&col=gd_aliases&col=gd_pub_eg_id&col=gd_pub_ensembl_id&col=gd_pub_refseq_ids&col=gd_ccds_ids&col=gd_lsdb_links&status=Approved&status_opt=2&where=&order_by=gd_app_sym_sort&format=text&limit=&hgnc_dbtag=on&submit=submit",
+      "file" : "https://www.genenames.org/cgi-bin/download/custom?col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_prev_sym&col=gd_aliases&col=gd_pub_eg_id&col=gd_pub_ensembl_id&col=gd_pub_refseq_ids&col=gd_ccds_ids&col=gd_lsdb_links&status=Approved&status_opt=2&where=&order_by=gd_app_sym_sort&format=text&limit=&hgnc_dbtag=on&submit=submit",
       "db" : "ccds",
       "priority" : 3
     }
diff --git a/modules/t/test-genome-DBs/homo_sapiens/core/meta.txt b/modules/t/test-genome-DBs/homo_sapiens/core/meta.txt
index 811bab016..1840fcdb1 100644
--- a/modules/t/test-genome-DBs/homo_sapiens/core/meta.txt
+++ b/modules/t/test-genome-DBs/homo_sapiens/core/meta.txt
@@ -1,5 +1,5 @@
 1	\N	schema_type	core
-2	\N	schema_version	111
+2	\N	schema_version	114
 3	\N	patch	patch_98_99_a.sql|schema_version
 2124	1	xref.timestamp	2013-07-22 11:20:10
 4	\N	patch	patch_52_53_c.sql|identity_xref_rename
@@ -261,3 +261,9 @@
 2217	\N	patch	patch_109_110_b.sql|Add IS_PAR relationship to link X- and Y-PAR genes
 2218	\N	patch	patch_109_110_c.sql|Allow gene id to belong to multiple alt allele groups
 2219	\N	patch	patch_110_111_a.sql|schema_version
+2220	\N	patch	patch_111_112_a.sql|schema_version
+2221	\N	patch	patch_111_112_b.sql|Allow meta_value to be null
+2222	\N	patch	patch_111_112_c.sql|Extend meta_key length to 64
+2223	\N	patch	patch_112_113_a.sql|schema_version
+2224	\N	patch	patch_112_113_b.sql|Ensure meta_value is not null
+2225	\N	patch	patch_113_114_a.sql|schema_version
diff --git a/modules/t/test-genome-DBs/homo_sapiens/core/table.sql b/modules/t/test-genome-DBs/homo_sapiens/core/table.sql
index 70bd53327..76867bd9b 100644
--- a/modules/t/test-genome-DBs/homo_sapiens/core/table.sql
+++ b/modules/t/test-genome-DBs/homo_sapiens/core/table.sql
@@ -485,12 +485,12 @@ CREATE TABLE `marker_synonym` (
 CREATE TABLE `meta` (
   `meta_id` int(11) NOT NULL AUTO_INCREMENT,
   `species_id` int(10) unsigned DEFAULT '1',
-  `meta_key` varchar(40) NOT NULL,
+  `meta_key` varchar(64) NOT NULL,
   `meta_value` varchar(255) NOT NULL,
   PRIMARY KEY (`meta_id`),
   UNIQUE KEY `species_key_value_idx` (`species_id`,`meta_key`,`meta_value`),
   KEY `species_value_idx` (`species_id`,`meta_value`)
-) ENGINE=MyISAM AUTO_INCREMENT=2220 DEFAULT CHARSET=latin1;
+) ENGINE=MyISAM AUTO_INCREMENT=2226 DEFAULT CHARSET=latin1;
 
 CREATE TABLE `meta_coord` (
   `table_name` varchar(40) NOT NULL,
diff --git a/modules/t/test-genome-DBs/homo_sapiens/empty/meta.txt b/modules/t/test-genome-DBs/homo_sapiens/empty/meta.txt
index 87b939fd5..e3a5b80c8 100644
--- a/modules/t/test-genome-DBs/homo_sapiens/empty/meta.txt
+++ b/modules/t/test-genome-DBs/homo_sapiens/empty/meta.txt
@@ -1,4 +1,4 @@
-1	\N	schema_version	111
+1	\N	schema_version	114
 2	1	assembly.default	NCBI34
 33	1	species.classification	Chordata
 32	1	species.classification	Vertebrata
@@ -126,3 +126,9 @@
 176	\N	patch	patch_109_110_b.sql|Add IS_PAR relationship to link X- and Y-PAR genes
 177	\N	patch	patch_109_110_c.sql|Allow gene id to belong to multiple alt allele groups
 178	\N	patch	patch_110_111_a.sql|schema_version
+179	\N	patch	patch_111_112_a.sql|schema_version
+180	\N	patch	patch_111_112_b.sql|Allow meta_value to be null
+181	\N	patch	patch_111_112_c.sql|Extend meta_key length to 64
+182	\N	patch	patch_112_113_a.sql|schema_version
+183	\N	patch	patch_112_113_b.sql|Ensure meta_value is not null
+184	\N	patch	patch_113_114_a.sql|schema_version
diff --git a/modules/t/test-genome-DBs/homo_sapiens/empty/table.sql b/modules/t/test-genome-DBs/homo_sapiens/empty/table.sql
index 4c32c6caf..e5db71a2e 100644
--- a/modules/t/test-genome-DBs/homo_sapiens/empty/table.sql
+++ b/modules/t/test-genome-DBs/homo_sapiens/empty/table.sql
@@ -490,12 +490,12 @@ CREATE TABLE `marker_synonym` (
 CREATE TABLE `meta` (
   `meta_id` int(11) NOT NULL AUTO_INCREMENT,
   `species_id` int(10) unsigned DEFAULT '1',
-  `meta_key` varchar(40) NOT NULL,
+  `meta_key` varchar(64) NOT NULL,
   `meta_value` varchar(255) NOT NULL,
   PRIMARY KEY (`meta_id`),
   UNIQUE KEY `species_key_value_idx` (`species_id`,`meta_key`,`meta_value`),
   KEY `species_value_idx` (`species_id`,`meta_value`)
-) ENGINE=MyISAM AUTO_INCREMENT=179 DEFAULT CHARSET=latin1;
+) ENGINE=MyISAM AUTO_INCREMENT=185 DEFAULT CHARSET=latin1;
 
 CREATE TABLE `meta_coord` (
   `table_name` varchar(40) NOT NULL DEFAULT '',
diff --git a/modules/t/test-genome-DBs/hp_dump/core/meta.txt b/modules/t/test-genome-DBs/hp_dump/core/meta.txt
index d401f5666..9a3a430a0 100644
--- a/modules/t/test-genome-DBs/hp_dump/core/meta.txt
+++ b/modules/t/test-genome-DBs/hp_dump/core/meta.txt
@@ -1,5 +1,5 @@
 1	\N	schema_type	core
-2	\N	schema_version	111
+2	\N	schema_version	114
 3	\N	patch	patch_98_99_a.sql|schema_version
 2124	1	xref.timestamp	2013-07-22 11:20:10
 4	\N	patch	patch_52_53_c.sql|identity_xref_rename
@@ -265,3 +265,9 @@
 2221	\N	patch	patch_109_110_b.sql|Add IS_PAR relationship to link X- and Y-PAR genes
 2222	\N	patch	patch_109_110_c.sql|Allow gene id to belong to multiple alt allele groups
 2223	\N	patch	patch_110_111_a.sql|schema_version
+2224	\N	patch	patch_111_112_a.sql|schema_version
+2225	\N	patch	patch_111_112_b.sql|Allow meta_value to be null
+2226	\N	patch	patch_111_112_c.sql|Extend meta_key length to 64
+2227	\N	patch	patch_112_113_a.sql|schema_version
+2228	\N	patch	patch_112_113_b.sql|Ensure meta_value is not null
+2229	\N	patch	patch_113_114_a.sql|schema_version
diff --git a/modules/t/test-genome-DBs/hp_dump/core/table.sql b/modules/t/test-genome-DBs/hp_dump/core/table.sql
index df7288b40..f6e687e50 100644
--- a/modules/t/test-genome-DBs/hp_dump/core/table.sql
+++ b/modules/t/test-genome-DBs/hp_dump/core/table.sql
@@ -485,12 +485,12 @@ CREATE TABLE `marker_synonym` (
 CREATE TABLE `meta` (
   `meta_id` int(11) NOT NULL AUTO_INCREMENT,
   `species_id` int(10) unsigned DEFAULT '1',
-  `meta_key` varchar(40) NOT NULL,
+  `meta_key` varchar(64) NOT NULL,
   `meta_value` varchar(255) NOT NULL,
   PRIMARY KEY (`meta_id`),
   UNIQUE KEY `species_key_value_idx` (`species_id`,`meta_key`,`meta_value`),
   KEY `species_value_idx` (`species_id`,`meta_value`)
-) ENGINE=MyISAM AUTO_INCREMENT=2224 DEFAULT CHARSET=latin1;
+) ENGINE=MyISAM AUTO_INCREMENT=2230 DEFAULT CHARSET=latin1;
 
 CREATE TABLE `meta_coord` (
   `table_name` varchar(40) NOT NULL,
diff --git a/modules/t/test-genome-DBs/multi/compara/meta.txt b/modules/t/test-genome-DBs/multi/compara/meta.txt
index d16a1c75c..69cf87d7d 100644
--- a/modules/t/test-genome-DBs/multi/compara/meta.txt
+++ b/modules/t/test-genome-DBs/multi/compara/meta.txt
@@ -1,6 +1,6 @@
 2	\N	schema_type	compara
 3	\N	patch	patch_98_99_a.sql|schema_version
-165	\N	schema_version	111
+172	\N	schema_version	114
 4	\N	patch	patch_72_73_b.sql|homology_genetree_links
 6	\N	patch	patch_73_74_a.sql|schema_version
 7	\N	patch	patch_73_74_b.sql|hmm_profile
@@ -126,3 +126,7 @@
 163	\N	patch	patch_109_110_b.sql|case_insensitive_stable_id_again
 164	\N	patch	patch_109_110_c.sql|ncbi_taxa_name_varchar500
 166	\N	patch	patch_110_111_a.sql|schema_version
+168	\N	patch	patch_111_112_a.sql|schema_version
+170	\N	patch	patch_112_113_a.sql|schema_version
+171	\N	patch	patch_112_113_b.sql|meta_key_64
+173	\N	patch	patch_113_114_a.sql|schema_version
diff --git a/modules/t/test-genome-DBs/multi/compara/table.sql b/modules/t/test-genome-DBs/multi/compara/table.sql
index 49de3d28b..01d92fd28 100644
--- a/modules/t/test-genome-DBs/multi/compara/table.sql
+++ b/modules/t/test-genome-DBs/multi/compara/table.sql
@@ -436,12 +436,12 @@ CREATE TABLE `member_xref` (
 CREATE TABLE `meta` (
   `meta_id` int(11) NOT NULL AUTO_INCREMENT,
   `species_id` int(10) unsigned DEFAULT '1',
-  `meta_key` varchar(40) NOT NULL,
+  `meta_key` varchar(64) NOT NULL,
   `meta_value` text NOT NULL,
   PRIMARY KEY (`meta_id`),
   UNIQUE KEY `species_key_value_idx` (`species_id`,`meta_key`,`meta_value`(255)),
   KEY `species_value_idx` (`species_id`,`meta_value`(255))
-) ENGINE=MyISAM AUTO_INCREMENT=167 DEFAULT CHARSET=latin1;
+) ENGINE=MyISAM AUTO_INCREMENT=174 DEFAULT CHARSET=latin1;
 
 CREATE TABLE `method_link` (
   `method_link_id` int(10) unsigned NOT NULL AUTO_INCREMENT,
diff --git a/modules/t/test-genome-DBs/s_cerevisiae/core/meta.txt b/modules/t/test-genome-DBs/s_cerevisiae/core/meta.txt
index fe9f9a707..2f62db00f 100644
--- a/modules/t/test-genome-DBs/s_cerevisiae/core/meta.txt
+++ b/modules/t/test-genome-DBs/s_cerevisiae/core/meta.txt
@@ -1,5 +1,5 @@
 1	\N	schema_type	core
-2	\N	schema_version	111
+2	\N	schema_version	112
 3	\N	patch	patch_98_99_a.sql|schema_version
 4	\N	patch	patch_60_61_b.sql|create_seq_region_synonym_table
 5	\N	patch	patch_60_61_c.sql|rejig_object_xref_indexes
@@ -190,3 +190,5 @@
 679	\N	patch	patch_109_110_b.sql|Add IS_PAR relationship to link X- and Y-PAR genes
 680	\N	patch	patch_109_110_c.sql|Allow gene id to belong to multiple alt allele groups
 681	\N	patch	patch_110_111_a.sql|schema_version
+682	\N	patch	patch_111_112_a.sql|schema_version
+683	\N	patch	patch_111_112_b.sql|Allow meta_value to be null
diff --git a/modules/t/test-genome-DBs/s_cerevisiae/core/table.sql b/modules/t/test-genome-DBs/s_cerevisiae/core/table.sql
index 7ade412de..2bfa40eba 100644
--- a/modules/t/test-genome-DBs/s_cerevisiae/core/table.sql
+++ b/modules/t/test-genome-DBs/s_cerevisiae/core/table.sql
@@ -486,11 +486,11 @@ CREATE TABLE `meta` (
   `meta_id` int(11) NOT NULL AUTO_INCREMENT,
   `species_id` int(10) unsigned DEFAULT '1',
   `meta_key` varchar(40) NOT NULL,
-  `meta_value` varchar(255) NOT NULL,
+  `meta_value` varchar(255) DEFAULT NULL,
   PRIMARY KEY (`meta_id`),
   UNIQUE KEY `species_key_value_idx` (`species_id`,`meta_key`,`meta_value`),
   KEY `species_value_idx` (`species_id`,`meta_value`)
-) ENGINE=MyISAM AUTO_INCREMENT=682 DEFAULT CHARSET=latin1;
+) ENGINE=MyISAM AUTO_INCREMENT=684 DEFAULT CHARSET=latin1;
 
 CREATE TABLE `meta_coord` (
   `table_name` varchar(40) NOT NULL,
diff --git a/nextflow/config/xref.config b/nextflow/config/xref.config
new file mode 100644
index 000000000..024f80e68
--- /dev/null
+++ b/nextflow/config/xref.config
@@ -0,0 +1,85 @@
+includeConfig './base.config'
+
+params.pipeline_dir = "$PWD"
+params.user = "$USER"
+params.email = "${params.user}@ebi.ac.uk"
+params.email_server = "hh-smtp.ebi.ac.uk:25"
+
+params.work_dir = "$BASE_DIR"
+params.scripts_dir = "${params.work_dir}/ensembl-production/src/python/scripts/"
+params.perl_scripts_dir = "${params.work_dir}/ensembl-production/scripts/xrefs/"
+
+params.config_file = "${params.work_dir}/ensembl-production/src/python/ensembl/xrefs/config/xref_all_sources.json"
+params.sources_config_file = "${params.work_dir}/ensembl-production/src/python/ensembl/xrefs/config/xref_config.ini"
+params.source_db_url = ''
+params.skip_download = 0
+params.reuse_db = 0
+params.skip_preparse = 1
+params.split_files_by_species = 1
+params.tax_ids_file = ''
+params.update_mode = 0
+
+params.base_path = ''
+params.clean_files = 1
+params.clean_dir = "${params.base_path}/clean_files"
+
+trace {
+    enabled = true
+    file = "trace"
+    overwrite = true
+}
+
+report {
+    overwrite = true
+    file = "report.html"
+    enable = true
+}
+
+profiles {
+
+  lsf {
+   process {
+    errorStrategy = { task.attempt <= process.maxRetries  ? 'retry' : 'finish' }
+    executor = 'lsf'
+    queue = 'production'
+    queueSize = 100
+    maxRetries = 3
+     withLabel:small_process {
+        memory = 200.MB
+        //very specific to lsf
+        executor.perTaskReserve = 200.MB
+    }
+    withLabel: dm {
+        queue = 'datamover'
+        time = '2h'
+    }
+  }
+ }
+
+  slurm {
+   process {
+    errorStrategy = { task.attempt <= process.maxRetries  ? 'retry' : 'finish' }
+    executor = 'slurm'
+    queue = 'production'
+    queueSize = 100
+    maxRetries = 3
+    time = '1d'
+
+    withLabel:small_process {
+        memory = 200.MB
+    }
+
+    withLabel: dm {
+        queue = 'datamover'
+        time = '3h'
+        memory = 2.GB
+    }
+    withLabel:mem4GB {
+        time = '5d'
+        memory = 4.GB
+    }
+  }
+ }
+}
+
+
diff --git a/nextflow/workflows/xrefDownload.nf b/nextflow/workflows/xrefDownload.nf
new file mode 100644
index 000000000..65e255fda
--- /dev/null
+++ b/nextflow/workflows/xrefDownload.nf
@@ -0,0 +1,243 @@
+#!/usr/bin/env nextflow
+
+// Parameter default values
+params.pipeline_name = 'Xref Download Pipeline'
+params.help = false
+
+println """\
+        XREF DOWNLOAD PIPELINE
+        ======================
+        source_db_url             : ${params.source_db_url}
+        base_path                 : ${params.base_path}
+        reuse_db                  : ${params.reuse_db}
+        skip_download             : ${params.skip_download}
+        skip_preparse             : ${params.skip_preparse}
+        clean_files               : ${params.clean_files}
+        split_files_by_species    : ${params.split_files_by_species}
+        config_file               : ${params.config_file}
+        sources_config_file       : ${params.sources_config_file}
+        clean_dir                 : ${params.clean_dir}
+        tax_ids_file              : ${params.tax_ids_file}
+        update_mode               : ${params.update_mode}
+        """
+        .stripIndent()
+
+def helpMessage() {
+  log.info"""
+  Usage:
+  nextflow run ensembl-production/xrefDownload.nf <ARGUMENTS>
+    --source_db_url             (mandatory)     Database URL to store information about xref sources.
+                                                Syntax: 'mysql://user:password@host:port/dbname'
+
+    --base_path                 (mandatory)     Path where log and source files will be stored,
+                                                a scratch space with sufficient storage is recommended.
+
+    --reuse_db                  (optional)      If set to 1, an existing source database (specified in --source_db_url) will be reused.
+                                                Default: 0
+
+    --skip_download             (optional)      If set to 1, source files will only be downloaded if they don't already exist in --base_path.
+                                                Default: 0
+
+    --skip_preparse             (optional)      If set to 1, the pre-parse step will be skipped (no central DB).
+                                                Default: 1
+
+    --clean_files               (optional)      If set to 1, the Cleanup analysis will be run for RefSeq and UniProt files.
+                                                Default: 1
+
+    --split_files_by_species    (optional)      If set to 1, UniProt and RefSeq file will be split according to taxonomy ID.
+                                                Default: 1
+
+    --config_file               (optional)      Path to the json file containing information about xref sources to download.
+                                                Default: $BASE_DIR/ensembl_nf/src/python/ensembl/xrefs/config/xref_all_sources.json
+
+    --sources_config_file       (optional)      Path to the ini file containing information about all xref sources and species/divisions.
+                                                Default: $BASE_DIR/ensembl_nf/src/python/ensembl/xrefs/config/xref_config.ini
+
+    --clean_dir                 (optional)      Path where to save the cleaned up files.
+                                                Default: [--base_path]/clean_files
+
+    --tax_ids_file              (optional)      Path to the file containing the taxonomy IDs of the species to extract data for.
+                                                Used to update the data for the provided species.
+
+    --update_mode               (optional)      If set to 1, pipeline is in update mode, refreshing/updating its data for new taxonomy IDs.
+                                                Only used if --tax_ids_file is set. Default: 0
+  """.stripIndent()
+}
+
+workflow {
+  if (params.help || !params.source_db_url || !params.base_path) {
+    helpMessage()
+
+    if (!params.source_db_url) {
+      println """
+      Missing required param source_db_url
+      """.stripIndent()
+    }
+    if (!params.base_path) {
+      println """
+      Missing required param base_path
+      """.stripIndent()
+    }
+
+    exit 1
+  }
+
+  ScheduleDownload()
+  timestamp = ScheduleDownload.out[0]
+
+  DownloadSource(ScheduleDownload.out[1].splitText(), timestamp)
+
+  CleanupTmpFiles(DownloadSource.out.collect())
+  ScheduleCleanup(CleanupTmpFiles.out, timestamp)
+
+  Checksum(ScheduleCleanup.out[0], timestamp)
+  if (params.split_files_by_species) {
+    CleanupSplitSource(ScheduleCleanup.out[1].ifEmpty([]).splitText(), timestamp)
+    NotifyByEmail(Checksum.out.concat(CleanupSplitSource.out.collect()).collect(), timestamp)
+  } else {
+    CleanupSource(ScheduleCleanup.out[1].ifEmpty([]).splitText(), timestamp)
+    NotifyByEmail(Checksum.out.concat(CleanupSource.out.collect()).collect(), timestamp)
+  }
+}
+
+process ScheduleDownload {
+  label 'small_process'
+
+  output:
+  val timestamp
+  path 'dataflow_sources.json'
+
+  script:
+  timestamp = new java.util.Date().format("yyyyMMdd_HHmmss")
+
+  """
+  python ${params.scripts_dir}/run_module.py --module ensembl.production.xrefs.ScheduleDownload --config_file ${params.config_file} --source_db_url ${params.source_db_url} --reuse_db ${params.reuse_db} --skip_preparse ${params.skip_preparse} --base_path ${params.base_path} --log_timestamp $timestamp
+  """
+}
+
+process DownloadSource {
+  label 'dm'
+  tag "$src_name"
+
+  input:
+  val x
+  val timestamp
+
+  output:
+  val 'DownloadSourceDone'
+
+  shell:
+  src_name = (x =~ /"name":\s*"([A-Za-z0-9_.-\/]+)"/)[0][1]
+
+  """
+  python ${params.scripts_dir}/run_module.py --module ensembl.production.xrefs.DownloadSource --dataflow '$x' --base_path ${params.base_path} --log_timestamp $timestamp --source_db_url ${params.source_db_url} --skip_download ${params.skip_download}
+  """
+}
+
+process CleanupTmpFiles {
+  label 'small_process'
+
+  input:
+  val x
+
+  output:
+  val 'TmpCleanupDone'
+
+  """
+  find ${params.base_path} -type f -name "*.tmp" -delete
+  """
+}
+
+process ScheduleCleanup {
+  label 'small_process'
+
+  input:
+  val x
+  val timestamp
+
+  output:
+  val 'ScheduleCleanupDone'
+  path 'dataflow_cleanup_sources.json'
+
+  """
+  python ${params.scripts_dir}/run_module.py --module ensembl.production.xrefs.ScheduleCleanup --base_path ${params.base_path} --source_db_url ${params.source_db_url} --clean_files ${params.clean_files} --clean_dir ${params.clean_dir} --split_files_by_species ${params.split_files_by_species} --log_timestamp $timestamp
+  """
+}
+
+process Checksum {
+  label 'default_process'
+
+  input:
+  val x
+  val timestamp
+
+  output:
+  val 'ChecksumDone'
+
+  """
+  python ${params.scripts_dir}/run_module.py --module ensembl.production.xrefs.Checksum --base_path ${params.base_path} --source_db_url ${params.source_db_url} --skip_download ${params.skip_download} --log_timestamp $timestamp
+  """
+}
+
+process CleanupSplitSource {
+  label 'mem4GB'
+  tag "$src_name"
+
+  input:
+  each x
+  val timestamp
+
+  output:
+  val 'CleanupDone'
+
+  shell:
+  cmd_params = ""
+  src_name = (x =~ /"name":\s*"([A-Za-z0-9_.-\/]+)"/)[0][1]
+  if (x =~ /"version_file":/) {
+    version_file = (x =~ /"version_file":\s*"(.*?)"/)[0][1]
+    cmd_params = "${cmd_params} --version_file '${version_file}'"
+  }
+  if (params.tax_ids_file) {
+    cmd_params = "${cmd_params} --tax_ids_file ${params.tax_ids_file}"
+  }
+
+  """
+  perl ${params.perl_scripts_dir}/cleanup_and_split_source.pl --base_path ${params.base_path} --log_timestamp $timestamp --source_db_url ${params.source_db_url} --name $src_name --clean_dir ${params.clean_dir} --clean_files ${params.clean_files} --update_mode ${params.update_mode} $cmd_params
+  """
+}
+
+process CleanupSource {
+  label 'mem4GB'
+  tag "$src_name"
+
+  input:
+  val x
+  val timestamp
+
+  output:
+  val 'CleanupDone'
+
+  shell:
+  cmd_params = ""
+  src_name = (x =~ /"name":\s*"([A-Za-z0-9_.-\/]+)"/)[0][1]
+  if (x =~ /"version_file":/) {
+    version_file = (x =~ /"version_file":\s*"(.*?)"/)[0][1]
+    cmd_params = "${cmd_params} --version_file '${version_file}'"
+  }
+
+  """
+  perl ${params.perl_scripts_dir}/cleanup_source.pl --base_path ${params.base_path} --log_timestamp $timestamp --source_db_url ${params.source_db_url} --name $src_name --clean_dir ${params.clean_dir} --skip_download ${params.skip_download} --clean_files ${params.clean_files} $cmd_params
+  """
+}
+
+process NotifyByEmail {
+  label 'small_process'
+
+  input:
+  val x
+  val timestamp
+
+  """
+  python ${params.scripts_dir}/run_module.py --module ensembl.production.xrefs.EmailNotification --pipeline_name '${params.pipeline_name}' --base_path ${params.base_path} --email ${params.email} --email_server ${params.email_server} --log_timestamp $timestamp
+  """
+}
diff --git a/requirements.in b/requirements.in
index 0a96ae592..ff57a0d74 100755
--- a/requirements.in
+++ b/requirements.in
@@ -1,5 +1,5 @@
 requests>=2.25.0,<3
 pydantic~=1.10.5
-graphene~=2.1.9
-graphene-sqlalchemy~=2.3.0
-git+https://github.com/Ensembl/ensembl-metadata-api.git@2.1.0a1#egg=ensembl-metadata-api
+ensembl-py>=2.1.0
+ensembl-utils>=0.4.4
+git+https://github.com/Ensembl/ensembl-metadata-api.git@3.3.0a1#egg=ensembl-metadata-api
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index ba89f7a44..54aeade70 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,144 +1,55 @@
 #
-# This file is autogenerated by pip-compile with Python 3.8
+# This file is autogenerated by pip-compile with Python 3.10
 # by the following command:
 #
-#    pip-compile requirements.in
+#    pip-compile --output-file=requirements.txt requirements.in
 #
-aniso8601==7.0.0
-    # via graphene
 certifi==2024.2.2
-    # via
-    #   ensembl-metadata-api
-    #   requests
+    # via requests
 charset-normalizer==3.3.2
-    # via
-    #   ensembl-metadata-api
-    #   requests
-ensembl-hive @ git+https://github.com/Ensembl/ensembl-hive.git
-    # via
-    #   ensembl-metadata-api
-    #   ensembl-py
-ensembl-metadata-api @ git+https://github.com/Ensembl/ensembl-metadata-api.git@2.1.0a1
+    # via requests
+ensembl-metadata-api @ git+https://github.com/Ensembl/ensembl-metadata-api.git@3.3.0a1
     # via -r requirements.in
-ensembl-py @ git+https://github.com/Ensembl/ensembl-py.git@1.2.2
-    # via ensembl-metadata-api
-exceptiongroup==1.2.0
-    # via
-    #   ensembl-metadata-api
-    #   pytest
-graphene==2.1.9
-    # via
-    #   -r requirements.in
-    #   graphene-sqlalchemy
-graphene-sqlalchemy==2.3.0
+ensembl-py==2.1.3
     # via -r requirements.in
-graphql-core==2.3.2
-    # via
-    #   graphene
-    #   graphql-relay
-graphql-relay==2.0.1
-    # via graphene
-greenlet==3.0.3
-    # via
-    #   ensembl-metadata-api
-    #   sqlalchemy
-grpcio==1.62.0
-    # via
-    #   ensembl-metadata-api
-    #   grpcio-reflection
-    #   grpcio-tools
-grpcio-reflection==1.62.0
-    # via ensembl-metadata-api
-grpcio-tools==1.62.0
-    # via ensembl-metadata-api
+ensembl-utils==0.5.0
+    # via -r requirements.in
+exceptiongroup==1.2.2
+    # via pytest
+greenlet==3.1.1
+    # via sqlalchemy
 idna==3.6
-    # via
-    #   ensembl-metadata-api
-    #   requests
+    # via requests
 iniconfig==2.0.0
-    # via
-    #   ensembl-metadata-api
-    #   pytest
-mysqlclient==2.1.1
-    # via
-    #   ensembl-metadata-api
-    #   ensembl-py
-packaging==23.2
-    # via
-    #   ensembl-metadata-api
-    #   pytest
-pluggy==1.4.0
-    # via
-    #   ensembl-metadata-api
-    #   pytest
-promise==2.3
-    # via
-    #   graphene-sqlalchemy
-    #   graphql-core
-    #   graphql-relay
-protobuf==4.25.3
-    # via
-    #   ensembl-metadata-api
-    #   grpcio-reflection
-    #   grpcio-tools
-pydantic==1.10.9
+    # via pytest
+packaging==24.1
+    # via pytest
+pluggy==1.5.0
+    # via pytest
+pydantic==1.10.14
     # via -r requirements.in
-pytest==8.0.2
-    # via
-    #   ensembl-metadata-api
-    #   ensembl-py
-    #   pytest-dependency
-pytest-dependency==0.5.1
-    # via
-    #   ensembl-metadata-api
-    #   ensembl-py
-python-dotenv==0.19.2
-    # via
-    #   ensembl-metadata-api
-    #   ensembl-py
-pyyaml==6.0.1
-    # via
-    #   ensembl-metadata-api
-    #   ensembl-py
+pytest==8.3.3
+    # via ensembl-utils
+python-dotenv==1.0.1
+    # via ensembl-utils
+pyyaml==6.0.2
+    # via ensembl-utils
 requests==2.31.0
     # via
     #   -r requirements.in
-    #   ensembl-metadata-api
-    #   ensembl-py
-rx==1.6.3
-    # via graphql-core
-singledispatch==3.7.0
-    # via graphene-sqlalchemy
-six==1.16.0
-    # via
-    #   graphene
-    #   graphene-sqlalchemy
-    #   graphql-core
-    #   graphql-relay
-    #   promise
-    #   singledispatch
-sqlalchemy==1.4.52
+    #   ensembl-utils
+sqlalchemy==2.0.35
     # via
-    #   ensembl-metadata-api
     #   ensembl-py
-    #   graphene-sqlalchemy
+    #   ensembl-utils
     #   sqlalchemy-utils
-sqlalchemy-utils==0.38.3
-    # via
-    #   ensembl-metadata-api
-    #   ensembl-py
+sqlalchemy-utils==0.41.2
+    # via ensembl-utils
 tomli==2.0.1
+    # via pytest
+typing-extensions==4.10.0
     # via
-    #   ensembl-metadata-api
-    #   pytest
-types-pymysql==1.1.0.1
-    # via ensembl-metadata-api
-typing-extensions==4.6.3
-    # via pydantic
+    #   pydantic
+    #   sqlalchemy
 urllib3==1.26.18
-    # via
-    #   ensembl-metadata-api
-    #   requests
-
-# The following packages are considered to be unsafe in a requirements file:
-# setuptools
+    # via requests
diff --git a/scripts/copyrights/update_copyrights.sh b/scripts/copyrights/update_copyrights.sh
index bf18476e1..490acc32b 100755
--- a/scripts/copyrights/update_copyrights.sh
+++ b/scripts/copyrights/update_copyrights.sh
@@ -50,6 +50,7 @@ for repo in $repositories; do
   git clone --depth 1 --branch main git@github.com:${repo} ${tmp_dir}/${repo}
   if [ $? -eq 0 ]; then
     cd ${tmp_dir}/${repo}
+    git push origin --delete bau/copyright-${year}
     git checkout -b bau/copyright-${year}
     perl ${ENSEMBL_ROOT_DIR}/ensembl/misc-scripts/annual_copyright_updater.sh
     git commit -a -m "${year} copyright update"
@@ -65,6 +66,7 @@ for repo in $repositories; do
         fi
       else
         echo 'failed to push commits and open a pull request.';
+        git push origin --delete bau/copyright-${year}
       fi
     else
       echo 'failed to commit updates.';
diff --git a/scripts/py/regulation_ftp_symlinks.py b/scripts/py/regulation_ftp_symlinks.py
index be8c30d9e..3ded73f0c 100644
--- a/scripts/py/regulation_ftp_symlinks.py
+++ b/scripts/py/regulation_ftp_symlinks.py
@@ -35,15 +35,14 @@
 """
 
 
+import logging
 from argparse import ArgumentParser
 from collections import defaultdict
-import logging
-from os import walk, path, listdir, makedirs
+from os import listdir, makedirs, path, walk
 from pathlib import Path
 
-
 # Human and Mouse follow a different dir structure
-SPECIES_TO_NOT_INCLUDE = ["homo_sapiens", "mus_musculus"]
+SPECIES_TO_NOT_INCLUDE = []
 
 # GENE-SWITCH species
 GENE_SWITCH_SPECIES = [
@@ -140,8 +139,15 @@ def get_species_with_analysis_type_folder(analysis_type, ftp_path):
     def get_most_recent_release_data_file_path(data_file_path):
         validator.is_dir(Path(data_file_path))
         available_releases = listdir(data_file_path)
+        releases = []
+        for release in available_releases:
+            try:
+                releases.append(int(release))
+            except:
+                continue
+
         return Path(data_file_path) / str(
-            max([int(release) for release in available_releases])
+            max(releases)
         )
 
 
@@ -181,12 +187,12 @@ def __init__(self, **path_specifics):
     def get(self, key):
         return self.path_specifics.get(key)
 
-    def symlink2rf(self, only_remove=False, relative=True):
+    def symlink2rf(self, analysis_type, only_remove=False, relative=True):
         target = (
             Path(path.relpath(self.target, self.sources["release_folder"]))
-            / "peaks"
+            / analysis_type
             if relative
-            else self.target / "peaks"
+            else self.target / analysis_type
         )
         source = self.sources["release_folder"] / self.get("analysis_type")
 
@@ -221,7 +227,9 @@ def _symlink(self, source, target, only_remove):
                 )
         else:
             if not validator.is_symlink(source, check=True):
-                logger.info("{source} -> {target} -- was successfully removed")
+                logger.info(
+                    f"{source} -> {target} -- was successfully removed"
+                )
 
     def aliased_paths(self, **kwargs):
         return {
@@ -243,7 +251,7 @@ def search(analysis_type, ftp_path, release):
                 release=release,
             )
             for species, assemblies in result.items()
-            for assembly in assemblies
+            for assembly in assemblies if assembly not in ["GRCh37", "GRCm38", "NCBIM37"]
         ]
 
 
@@ -299,7 +307,7 @@ def parse_arguments():
         ANALYSIS_TYPE_PEAKS, ftp_path, args.release_version
     )
     for peak in peaks:
-        peak.symlink2rf(only_remove=args.delete_symlinks)
+        peak.symlink2rf("peaks", only_remove=args.delete_symlinks)
         peak.symlink2misc("peaks", only_remove=args.delete_symlinks)
 
     logger.info("Searching for signals in data_files ...")
@@ -307,8 +315,7 @@ def parse_arguments():
         ANALYSIS_TYPE_SIGNAL, ftp_path, args.release_version
     )
     for signal in signals:
-        signal.symlink2rf(only_remove=args.release_version)
-
+        signal.symlink2rf("signal", only_remove=args.delete_symlinks)
         signal.symlink2misc("signal", only_remove=args.delete_symlinks)
 
     logger.info("Process Completed")
diff --git a/scripts/xrefs/cleanup_and_split_source.pl b/scripts/xrefs/cleanup_and_split_source.pl
new file mode 100644
index 000000000..3beabbcd6
--- /dev/null
+++ b/scripts/xrefs/cleanup_and_split_source.pl
@@ -0,0 +1,291 @@
+#!/usr/bin/env perl
+#  Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
+#  Copyright [2016-2024] EMBL-European Bioinformatics Institute
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+use Data::Dumper;
+use Getopt::Long;
+use Carp;
+use DBI;
+use File::Path qw/make_path rmtree/;
+use File::Spec::Functions;
+use HTTP::Tiny;
+use JSON;
+use File::Basename;
+use POSIX qw(strftime);
+
+use Nextflow::Utils;
+
+my ($base_path, $source_db_url, $source_name, $clean_dir, $clean_files, $version_file, $tax_ids_file, $update_mode, $log_timestamp);
+GetOptions(
+  'base_path=s'     => \$base_path,
+  'source_db_url=s' => \$source_db_url,
+  'name=s'          => \$source_name,
+  'clean_dir=s'     => \$clean_dir,
+  'clean_files=i'   => \$clean_files,
+  'version_file:s'  => \$version_file,
+  'tax_ids_file:s'  => \$tax_ids_file,
+  'update_mode:i'   => \$update_mode,
+  'log_timestamp:s' => \$log_timestamp
+);
+
+# Check that all mandatory parameters are passed
+if (!defined($base_path) || !defined($source_db_url) || !defined($source_name) || !defined($clean_dir) || !defined($clean_files)) {
+  croak "Usage: cleanup_source.pl --base_path <base_path> --source_db_url <source_db_url> --name <name> --clean_dir <clean_dir> --clean_files <clean_files> [--version_file <version_file>] [--tax_ids_file <tax_ids_file>] [--update_mode <update_mode>] [--log_timestamp <log_timestamp>]";
+}
+
+if (!defined($update_mode)) {$update_mode = 0;}
+
+my $log_file;
+if (defined($log_timestamp)) {
+  my $log_path = catdir($base_path, 'logs', $log_timestamp);
+  make_path($log_path);
+  $log_file = catfile($log_path, "tmp_logfile_CleanupSplitSource_".int(rand(500)));
+
+  add_to_log_file($log_file, "CleanupSplitSource starting for source $source_name");
+  add_to_log_file($log_file, "Param: tax_ids_file = $tax_ids_file");
+}
+
+# Do nothing if not a uniprot or refseq source
+if ($source_name !~ /^Uniprot/ && $source_name !~ /^RefSeq_/) {
+  add_to_log_file($log_file, "Provided source name is invalid. Can only clean up and split Uniprot or RefSeq files.");
+  exit;
+}
+
+# Remove last '/' character if it exists
+if ($base_path =~ /\/$/) {chop($base_path);}
+
+# Remove / char from source name to access directory
+my $clean_name = $source_name;
+$clean_name =~ s/\///g;
+
+my $output_path = $clean_dir."/".$clean_name;
+
+# Create needed directories
+if (!$update_mode) {
+  rmtree($output_path);
+}
+make_path($output_path);
+
+my $sources_to_remove;
+my ($is_uniprot, $is_refseq_dna, $is_refseq_peptide) = (0, 0, 0);
+
+# Decide which files are being processed
+my $output_file_name = '';
+if ($source_name =~ /^Uniprot/) {
+  $is_uniprot = 1;
+  $output_file_name = ($source_name =~ /SPTREMBL/ ? 'uniprot_trembl' : 'uniprot_sprot');
+
+  # Set sources to skip in parsing step
+  my @source_names = (
+    'GO', 'UniGene', 'RGD', 'CCDS', 'IPI', 'UCSC', 'SGD', 'HGNC', 'MGI', 'VGNC', 'Orphanet',
+    'ArrayExpress', 'GenomeRNAi', 'EPD', 'Xenbase', 'Reactome', 'MIM_GENE', 'MIM_MORBID', 'MIM',
+    'Interpro'
+  );
+  $sources_to_remove = join("|", @source_names);
+} elsif ($source_name =~ /^RefSeq_dna/) {
+  $is_refseq_dna = 1;
+  $output_file_name = 'refseq_rna';
+} elsif ($source_name =~ /^RefSeq_peptide/) {
+  $is_refseq_peptide = 1;
+  $output_file_name = 'refseq_protein';
+} else {
+  croak "Unknown file type $source_name";
+}
+
+# Extract taxonomy IDs
+my %tax_ids;
+my ($skipped_species, $added_species) = (0, 0);
+if ($tax_ids_file && $update_mode) {
+  open my $fh, '<', $tax_ids_file;
+  chomp(my @lines = <$fh>);
+  close $fh;
+  %tax_ids = map { $_ => 1 } @lines;
+
+  # Check if any taxonomy IDs already have files
+  foreach my $tax_id (keys(%tax_ids)) {
+    my @tax_files = glob($output_path . "/**/**/**/**/" . $output_file_name . "-" . $tax_id);
+    if (scalar(@tax_files) > 0) {
+      $tax_ids{$tax_id} = 0;
+      $skipped_species++;
+    }
+  }
+
+  # Do nothing if all taxonomy IDs already have files
+  if ($skipped_species == scalar(keys(%tax_ids))) {
+    add_to_log_file($log_file, "All provided tax IDs already have files. Doing nothing.");
+    exit;
+  }
+}
+
+# Get all files for source
+my $files_path = $base_path."/".$clean_name;
+my @files = glob($files_path."/*");
+my $out_fh;
+my $current_species_id;
+
+# Process each file
+foreach my $input_file_name (@files) {
+  local $/ = "//\n";
+
+  add_to_log_file($log_file, "Splitting up file $input_file_name");
+
+  $input_file_name = basename($input_file_name);
+  my $input_file = $files_path."/".$input_file_name;
+  my $in_fh;
+
+  # Skip the release file
+  if (defined($version_file) && $input_file eq $version_file) {next;}
+
+  # Open file normally or with zcat for zipped filed
+  if ($input_file_name =~ /\.(gz|Z)$/x) {
+    open($in_fh, "zcat $input_file |") or die "Couldn't call 'zcat' to open input file '$input_file' $!";
+    $output_file_name =~ s/\.[^.]+$//;
+  } else {
+    open($in_fh, '<', $input_file) or die "Couldn't open file input '$input_file' $!";
+  }
+
+  # Only start processing if could get filehandle
+  if (defined($in_fh)) {
+    my ($write_path, $write_file);
+
+    # Read full records
+    while (my $record = $in_fh->getline()) {
+      # Extract the species id from record
+      my $species_id;
+      if ($is_uniprot) {
+        ($species_id) = $record =~ /OX\s+[a-zA-Z_]+=([0-9 ,]+).*;/;
+        $species_id =~ s/\s// if $species_id;
+      } else {
+        ($species_id) = $record =~ /db_xref=.taxon:(\d+)/;
+      }
+
+      # Only continue with wanted species
+      next if (!$species_id);
+      next if ($tax_ids_file && (!defined($tax_ids{$species_id}) || !$tax_ids{$species_id}));
+
+      # Clean up data
+      if ($clean_files) {
+        if ($is_uniprot) {
+          $record =~ s/\nR(N|P|X|A|T|R|L|C|G)\s{3}.*//g; # Remove references lines
+          $record =~ s/\nCC(\s{3}.*)CAUTION: The sequence shown here is derived from an Ensembl(.*)/\nCT$1CAUTION: The sequence shown here is derived from an Ensembl$2/g; # Set specific caution comment to temporary
+          $record =~ s/\nCC\s{3}.*//g; # Remove comments
+          $record =~ s/\nCT(\s{3}.*)CAUTION: The sequence shown here is derived from an Ensembl(.*)/\nCC$1CAUTION: The sequence shown here is derived from an Ensembl$2/g; # Set temp line back to comment
+          $record =~ s/\nFT\s{3}.*//g; # Remove feature coordinates
+          $record =~ s/\nDR\s{3}($sources_to_remove);.*//g; # Remove sources skipped at processing
+        } else {
+          my $skip_data = 0;
+          my @lines = split("\n", $record);
+          my @new_record;
+
+          for my $line (@lines) {
+            if ($is_refseq_dna) {
+              if ($line =~ /^REFERENCE/ || $line =~ /^COMMENT/ || $line =~ /^\s{5}exon/ || $line =~ /^\s{5}misc_feature/ || $line =~ /^\s{5}variation/) {
+                $skip_data = 1;
+              } elsif ($line =~ /^\s{5}source/ || $line =~ /^ORIGIN/) {
+                $skip_data = 0;
+              }
+            } elsif ($is_refseq_peptide) {
+              if ($line =~ /^REFERENCE/ || $line =~ /^COMMENT/ || $line =~ /^\s{5}Protein/) {
+                $skip_data = 1;
+              } elsif ($line =~ /^\s{5}source/ || $line =~ /^\s{5}CDS/ || $line =~ /^ORIGIN/) {
+                $skip_data = 0;
+              }
+            }
+
+            if (!$skip_data) {
+              push(@new_record, $line);
+            }
+
+            $record = join("\n", @new_record);
+          }
+        }
+      }
+
+      # Write the record in the appropriate file
+      if (!defined($current_species_id) || (defined($current_species_id) && $species_id ne $current_species_id)) {
+        close($out_fh) if (defined($current_species_id));
+
+        my $species_id_str = sprintf("%04d", $species_id);
+        my @digits = split('', $species_id_str);
+
+        $write_path = catdir($output_path, $digits[0], $digits[1], $digits[2], $digits[3]);
+        make_path($write_path);
+
+        $write_file = $write_path."/".$output_file_name."-".$species_id;
+
+        # Check if creating new file
+        if (!-e $write_file) {
+          $added_species++;
+        }
+
+        open($out_fh, '>>', $write_file) or die "Couldn't open output file '$write_file' $!";
+
+        $current_species_id = $species_id;
+      }
+
+      print $out_fh $record.($is_uniprot ? "" : "\n");
+    }
+
+    close($in_fh);
+    close($out_fh) if $out_fh;
+  }
+}
+
+add_to_log_file($log_file, "Source $source_name cleaned up");
+add_to_log_file($log_file, "$source_name skipped species = $skipped_species");
+add_to_log_file($log_file, "$source_name species files created = $added_species");
+
+# Save the clean files directory in source db
+my ($user, $pass, $host, $port, $source_db) = parse_url($source_db_url);
+my $dbi = get_dbi($host, $port, $user, $pass, $source_db);
+my $update_version_sth = $dbi->prepare("UPDATE IGNORE version set clean_uri=? where source_id=(SELECT source_id FROM source WHERE name=?)");
+$update_version_sth->execute($output_path, $source_name);
+$update_version_sth->finish();
+
+sub get_dbi {
+  my ($host, $port, $user, $pass, $dbname) = @_;
+  my $dbconn;
+  if (defined $dbname) {
+    $dbconn = sprintf("dbi:mysql:host=%s;port=%s;database=%s", $host, $port, $dbname);
+  } else {
+    $dbconn = sprintf("dbi:mysql:host=%s;port=%s", $host, $port);
+  }
+  my $dbi = DBI->connect( $dbconn, $user, $pass, { 'RaiseError' => 1 } ) or croak( "Can't connect to database: " . $DBI::errstr );
+  return $dbi;
+}
+
+sub parse_url {
+  my ($url) = @_;
+  my $parsed_url = Nextflow::Utils::parse($url);
+  my $user = $parsed_url->{'user'};
+  my $pass = $parsed_url->{'pass'};
+  my $host = $parsed_url->{'host'};
+  my $port = $parsed_url->{'port'};
+  my $db   = $parsed_url->{'dbname'};
+  return ($user, $pass, $host, $port, $db);
+}
+
+sub add_to_log_file {
+  my ($log_file, $message) = @_;
+
+  if (defined($log_file)) {
+    my $current_timestamp = strftime "%d-%b-%Y %H:%M:%S", localtime;
+
+    open(my $fh, '>>', $log_file);
+    print $fh "$current_timestamp | INFO | $message\n";
+    close($fh);
+  }
+}
diff --git a/scripts/xrefs/cleanup_source.pl b/scripts/xrefs/cleanup_source.pl
new file mode 100644
index 000000000..5ce29a0f5
--- /dev/null
+++ b/scripts/xrefs/cleanup_source.pl
@@ -0,0 +1,235 @@
+#!/usr/bin/env perl
+#  Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
+#  Copyright [2016-2024] EMBL-European Bioinformatics Institute
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use strict;
+use warnings;
+use Data::Dumper;
+use Getopt::Long;
+use Carp;
+use DBI;
+use File::Path qw/make_path/;
+use File::Spec::Functions;
+
+use Nextflow::Utils;
+
+my ($base_path, $source_db_url, $source_name, $clean_dir, $skip_download, $clean_files, $version_file, $log_timestamp);
+GetOptions(
+  'base_path=s'     => \$base_path,
+  'source_db_url=s' => \$source_db_url,
+  'name=s'          => \$source_name,
+  'clean_dir=s'     => \$clean_dir,
+  'skip_download=i' => \$skip_download,
+  'clean_files=i'   => \$clean_files,
+  'version_file:s'  => \$version_file,
+  'log_timestamp:s' => \$log_timestamp
+);
+
+# Check that all mandatory parameters are passed
+if (!defined($base_path) || !defined($source_db_url) || !defined($source_name) || !defined($clean_dir) || !defined($skip_download) || !defined($clean_files)) {
+  croak "Usage: cleanup_source.pl --base_path <base_path> --source_db_url <source_db_url> --name <name> --clean_dir <clean_dir> --skip_download <skip_download> --clean_files <clean_files> [--version_file <version_file>] [--log_timestamp <log_timestamp>]";
+}
+
+my $log_file;
+if (defined($log_timestamp)) {
+  my $log_path = catdir($base_path, 'logs', $log_timestamp);
+  make_path($log_path);
+  $log_file = catfile($log_path, "tmp_logfile_CleanupSource_".int(rand(500)));
+
+  add_to_log_file($log_file, "CleanupSource starting for source $source_name");
+}
+
+# Do nothing if not cleaning files, not a uniprot or refseq source, or no new download
+if ($clean_files && ($source_name =~ /^Uniprot/ || $source_name =~ /^RefSeq_/)) {
+  # Remove last '/' character if it exists
+  if ($base_path =~ /\/$/) {chop($base_path);}
+
+  # Remove / char from source name to access directory
+  my $clean_name = $source_name;
+  $clean_name =~ s/\///g;
+
+  my $output_path = $clean_dir."/".$clean_name;
+  my $update_clean_uri = 0;
+
+  # If not a new download, check if clean files exist
+  if ($skip_download) {
+    if (-d $output_path) {
+      $update_clean_uri = 1
+    }
+  } else {
+    # Create needed directories
+    make_path($output_path);
+
+    $update_clean_uri = 1;
+
+    my $sources_to_remove;
+    my ($is_uniprot, $is_refseq_dna, $is_refseq_peptide) = (0, 0, 0);
+    my $file_size = 0;
+
+    # Set sources to skip in parsing step (uniprot only)
+    if ($source_name =~ /^Uniprot/) {
+      $is_uniprot = 1;
+      my @source_names = (
+        'GO', 'UniGene', 'RGD', 'CCDS', 'IPI', 'UCSC', 'SGD', 'HGNC', 'MGI', 'VGNC', 'Orphanet',
+        'ArrayExpress', 'GenomeRNAi', 'EPD', 'Xenbase', 'Reactome', 'MIM_GENE', 'MIM_MORBID', 'MIM',
+        'Interpro'
+      );
+      $sources_to_remove = join("|", @source_names);
+      $file_size = 200000;
+    } elsif ($source_name =~ /^RefSeq_dna/) {
+      $is_refseq_dna = 1;
+    } elsif ($source_name =~ /^RefSeq_peptide/) {
+      $is_refseq_peptide = 1;
+    } else {
+      croak "Unknown file type $source_name";
+    }
+
+    # Get all files for source
+    my $files_path = $base_path."/".$clean_name;
+    my @files = `ls $files_path`;
+    foreach my $file_name (@files) {
+      $file_name =~ s/\n//;
+      my $file = $files_path."/".$file_name;
+
+      # Skip the release file
+      if (defined($version_file) && $file eq $version_file) {next;}
+
+      my ($in_fh, $out_fh);
+      my $output_file = $file_name;
+
+      # Open file normally or with zcat for zipped filed
+      if ($file_name =~ /\.(gz|Z)$/x) {
+        open($in_fh, "zcat $file |")
+          or die "Couldn't call 'zcat' to open input file '$file' $!";
+
+        $output_file =~ s/\.[^.]+$//;
+      } else {
+        open($in_fh, '<', $file)
+          or die "Couldn't open file input '$file' $!";
+      }
+
+      # Only start cleaning up if could get filehandle
+      my $count = 0;
+      my $file_count = 1;
+      if (defined($in_fh)) {
+        if ($is_uniprot) {
+          local $/ = "//\n";
+
+          my $write_file = $output_path."/".$output_file . "-$file_count";
+          open($out_fh, '>', $write_file) or die "Couldn't open output file '$write_file' $!";
+
+          # Read full records
+          while ($_ = $in_fh->getline()) {
+            # Remove unused data
+            $_ =~ s/\nR(N|P|X|A|T|R|L|C|G)\s{3}.*//g; # Remove references lines
+            $_ =~ s/\nCC(\s{3}.*)CAUTION: The sequence shown here is derived from an Ensembl(.*)/\nCT$1CAUTION: The sequence shown here is derived from an Ensembl$2/g; # Set specific caution comment to temporary
+            $_ =~ s/\nCC\s{3}.*//g; # Remove comments
+            $_ =~ s/\nCT(\s{3}.*)CAUTION: The sequence shown here is derived from an Ensembl(.*)/\nCC$1CAUTION: The sequence shown here is derived from an Ensembl$2/g; # Set temp line back to comment
+            $_ =~ s/\nFT\s{3}.*//g; # Remove feature coordinates
+            $_ =~ s/\nDR\s{3}($sources_to_remove);.*//g; # Remove sources skipped at processing
+
+            # Added lines that we do need into output
+            print $out_fh $_;
+
+            # Check how many lines have been processed and write to new file if size exceeded
+            $count++;
+            if ($count > $file_size) {
+              close($out_fh);
+              $file_count++;
+              $write_file = $output_path."/".$output_file . "-$file_count";
+              open($out_fh, '>', $write_file)
+                or die "Couldn't open output file '$write_file' $!";
+              $count = 0;
+            }
+          }
+
+          close($in_fh);
+          close($out_fh);
+        } else {
+          $output_file = $output_path."/".$output_file;
+          open($out_fh, '>', $output_file) or die "Couldn't open output file '$output_file' $!";
+
+          # Remove unuused data
+          my $skip_data = 0;
+          while (<$in_fh>) {
+            if ($is_refseq_dna) {
+              if ($_ =~ /^REFERENCE/ || $_ =~ /^COMMENT/ || $_ =~ /^\s{5}exon/ || $_ =~ /^\s{5}misc_feature/ || $_ =~ /^\s{5}variation/) {
+                $skip_data = 1;
+              } elsif ($_ =~ /^\s{5}source/ || $_ =~ /^ORIGIN/) {
+                $skip_data = 0;
+              }
+            } elsif ($is_refseq_peptide) {
+              if ($_ =~ /^REFERENCE/ || $_ =~ /^COMMENT/ || $_ =~ /^\s{5}Protein/) {
+                $skip_data = 1;
+              } elsif ($_ =~ /^\s{5}source/ || $_ =~ /^\s{5}CDS/ || $_ =~ /^ORIGIN/) {
+                $skip_data = 0;
+              }
+            }
+
+            if (!$skip_data) {print $out_fh $_;}
+          }
+
+          close($in_fh);
+          close($out_fh);
+        }
+      }
+    }
+
+    add_to_log_file($log_file, "Source $source_name cleaned up");
+  }
+
+  # Save the clean files directory in source db
+  if ($update_clean_uri) {
+    my ($user, $pass, $host, $port, $source_db) = parse_url($source_db_url);
+    my $dbi = get_dbi($host, $port, $user, $pass, $source_db);
+    my $update_version_sth = $dbi->prepare("UPDATE IGNORE version set clean_uri=? where source_id=(SELECT source_id FROM source WHERE name=?)");
+    $update_version_sth->execute($output_path, $source_name);
+    $update_version_sth->finish();
+  }
+}
+
+sub get_dbi {
+  my ($host, $port, $user, $pass, $dbname) = @_;
+  my $dbconn;
+  if (defined $dbname) {
+    $dbconn = sprintf("dbi:mysql:host=%s;port=%s;database=%s", $host, $port, $dbname);
+  } else {
+    $dbconn = sprintf("dbi:mysql:host=%s;port=%s", $host, $port);
+  }
+  my $dbi = DBI->connect( $dbconn, $user, $pass, { 'RaiseError' => 1 } ) or croak( "Can't connect to database: " . $DBI::errstr );
+  return $dbi;
+}
+
+sub parse_url {
+  my ($url) = @_;
+  my $parsed_url = Nextflow::Utils::parse($url);
+  my $user = $parsed_url->{'user'};
+  my $pass = $parsed_url->{'pass'};
+  my $host = $parsed_url->{'host'};
+  my $port = $parsed_url->{'port'};
+  my $db   = $parsed_url->{'dbname'};
+  return ($user, $pass, $host, $port, $db);
+}
+
+sub add_to_log_file {
+  my ($log_file, $message) = @_;
+
+  if (defined($log_file)) {
+    my $current_timestamp = strftime "%d-%b-%Y %H:%M:%S", localtime;
+
+    open(my $fh, '>>', $log_file);
+    print $fh "$current_timestamp | INFO | $message\n";
+    close($fh);
+  }
+}
diff --git a/sql/patch_109_110_a.sql b/sql/patch_109_110_a.sql
index 427981eec..27bc2d892 100644
--- a/sql/patch_109_110_a.sql
+++ b/sql/patch_109_110_a.sql
@@ -13,7 +13,7 @@
 -- See the License for the specific language governing permissions and
 -- limitations under the License.
 
-# patch_108_109_a.sql
+# patch_109_110_a.sql
 #
 # Title: Update schema version.
 #
diff --git a/sql/patch_110_111_a.sql b/sql/patch_110_111_a.sql
index 91ba1c712..e8725ff82 100644
--- a/sql/patch_110_111_a.sql
+++ b/sql/patch_110_111_a.sql
@@ -13,7 +13,7 @@
 -- See the License for the specific language governing permissions and
 -- limitations under the License.
 
-# patch_108_109_a.sql
+# patch_110_111_a.sql
 #
 # Title: Update schema version.
 #
diff --git a/sql/patch_111_112_a.sql b/sql/patch_111_112_a.sql
new file mode 100644
index 000000000..4394d65ed
--- /dev/null
+++ b/sql/patch_111_112_a.sql
@@ -0,0 +1,27 @@
+-- Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
+-- Copyright [2016-2024] EMBL-European Bioinformatics Institute
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--      http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+# patch_111_112_a.sql
+#
+# Title: Update schema version.
+#
+# Description:
+#   Update schema_version in meta table to 112.
+
+UPDATE meta SET meta_value='112' WHERE meta_key='schema_version';
+
+# Patch identifier
+INSERT INTO meta (species_id, meta_key, meta_value)
+VALUES (NULL, 'patch', 'patch_111_112_a.sql|schema_version');
diff --git a/sql/patch_112_113_a.sql b/sql/patch_112_113_a.sql
new file mode 100644
index 000000000..f89e3f0b9
--- /dev/null
+++ b/sql/patch_112_113_a.sql
@@ -0,0 +1,27 @@
+-- Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
+-- Copyright [2016-2024] EMBL-European Bioinformatics Institute
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--      http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+# patch_112_113_a.sql
+#
+# Title: Update schema version.
+#
+# Description:
+#   Update schema_version in meta table to 112.
+
+UPDATE meta SET meta_value='113' WHERE meta_key='schema_version';
+
+# Patch identifier
+INSERT INTO meta (species_id, meta_key, meta_value)
+VALUES (NULL, 'patch', 'patch_112_113_a.sql|schema_version');
diff --git a/sql/patch_113_114_a.sql b/sql/patch_113_114_a.sql
new file mode 100644
index 000000000..5637cb307
--- /dev/null
+++ b/sql/patch_113_114_a.sql
@@ -0,0 +1,27 @@
+-- Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
+-- Copyright [2016-2024] EMBL-European Bioinformatics Institute
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--      http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+# patch_112_113_a.sql
+#
+# Title: Update schema version.
+#
+# Description:
+#   Update schema_version in meta table to 112.
+
+UPDATE meta SET meta_value='114' WHERE meta_key='schema_version';
+
+# Patch identifier
+INSERT INTO meta (species_id, meta_key, meta_value)
+VALUES (NULL, 'patch', 'patch_113_114_a.sql|schema_version');
diff --git a/sql/table.sql b/sql/table.sql
index a72e11ee5..19ce5692b 100644
--- a/sql/table.sql
+++ b/sql/table.sql
@@ -32,11 +32,11 @@ CREATE TABLE IF NOT EXISTS meta (
 # Add schema type and schema version to the meta table
 INSERT INTO meta (species_id, meta_key, meta_value) VALUES
   (NULL, 'schema_type', 'production'),
-  (NULL, 'schema_version', 111);
+  (NULL, 'schema_version', 114);
 
 # Patches included in this schema file
 INSERT INTO meta (species_id, meta_key, meta_value)
-  VALUES (NULL, 'patch', 'patch_110_111_a.sql|schema version');
+  VALUES (NULL, 'patch', 'patch_113_114_a.sql|schema version');
 
 -- The 'master_biotype' table.
 -- Contains all the valid biotypes used for genes and transcripts.
diff --git a/src/python/ensembl/common/Params.py b/src/python/ensembl/common/Params.py
new file mode 100644
index 000000000..ef9371f99
--- /dev/null
+++ b/src/python/ensembl/common/Params.py
@@ -0,0 +1,233 @@
+#  See the NOTICE file distributed with this work for additional information
+#  regarding copyright ownership.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""Params module to handle parameter manipulation between pipeline processes."""
+
+import sys
+import re
+import json
+import argparse
+
+sys.tracebacklimit = 0
+
+class Params:
+  def __init__(self, params: dict=None, parse_dataflow_json: bool=True) -> None:
+    """
+    Parameters
+    ----------
+    params: dict, optional
+        The parameters to start the object with. If defined, command-line parameters won't be parsed (default is None)
+    parse_dataflow_json: bool, optional
+        Specifies whether to parse an option called 'dataflow' in the provided options (default is True)
+    """
+    if params:
+      self._params = params
+    else:
+      self._params = {}
+      self.parse_argv_params(parse_dataflow_json)
+
+  def parse_argv_params(self, parse_dataflow_json: bool=True):
+    """Parses command-line arguments and extracts them into the Params object.
+    Command-line arguments need to be passed in the format "--name value".
+
+    Parameters
+    ----------
+    parse_dataflow_json: bool, optional
+        Specifies whether to parse an option called 'dataflow' in the provided options (default is True)
+    """
+    args = sys.argv[1:]
+
+    # Extract param names from command line
+    r = re.compile(r"^--")
+    param_names = list(filter(r.match, args))
+
+    parser = argparse.ArgumentParser()
+    for name in param_names:
+      parser.add_argument(name)
+
+    params = parser.parse_args()
+    for param_name in vars(params):
+      if param_name == 'dataflow' and parse_dataflow_json:
+        dataflow_params = json.loads(getattr(params, param_name))
+        for name,value in dataflow_params.items():
+          self.param(name, value)
+      else:
+        self.param(param_name, getattr(params, param_name))
+
+  def param(self, name: str, new_value=None, options: dict={}):
+    """ Gets or sets a parameter value.
+
+    Parameters
+    ----------
+    name: str
+        The name of the paramater
+    new_value: any, optional
+        The value to set the parameter to (default is None)
+    options: dict, optional
+        Extra options, including:
+        - default: The default value to use if parameter has no value (sets the parameter value to this)
+        - type: The type of the parameter value, used to check if value is valid
+
+    Returns
+    -------
+    The value of the parameter with provided name.
+
+    Raises
+    ------
+    AttributeError
+        If no parameter name was passed.
+    """
+    if not name:
+      raise AttributeError('You must supply a parameter name')
+
+    value = None
+
+    if new_value is not None:
+      self._params[name] = new_value
+      value = new_value
+    else:
+      value = self._params.get(name)
+      if value is None and options.get('default') is not None:
+        default = options['default']
+        self._params[name] = default
+        value = default
+
+    if options.get('type'):
+      return self.check_type(name, value, options['type'])
+
+    return value
+
+  def param_required(self, name: str, options: dict={}):
+    """ Gets a parameter value, raising an error if no value is found.
+    
+    Parameters
+    ----------
+    name: str
+        The name of th parameter
+    options: dict, optional
+        Extra options, including:
+        - default: The default value to use if parameter has no value (sets the parameter value to this)
+        - type: The type of the parameter value, used to check if value is valid
+
+    Returns
+    -------
+    The value of the parameter with provided name.
+
+    Raises
+    ------
+    AttributeError
+        If no value is found for the required paramater.
+    """
+    value = self.param(name, None, options)
+
+    if value is None:
+      raise AttributeError(f'Parameter \'{name}\' is required but has no value')
+
+    return value
+
+  def check_type(self, name: str, value, value_type: str):
+    """ Checks if the parameter value provided is valid.
+    For specific types, this function can change the parameter value.
+
+    Parameters
+    ----------
+    name: str
+        The name of the parameter
+    value: any
+        The value of the parameter
+    value_type: str
+        The type of the parameter value. Accepted types:
+        - hash, dict, or dictionary
+        - array or list
+        - int or integer
+        - bool or boolean
+        - str or string
+
+    Returns
+    -------
+    None if no value is found, or the new value of the parameter with provided name.
+
+    Raises
+    ------
+    AttributeError
+        If no parameter name is provided.
+        If parameter value is not valid.
+    """
+    if not name:
+      raise AttributeError('You must supply a parameter name')
+    if value is None:
+      return
+
+    value_type = value_type.lower()
+    error = 0
+    new_value = None
+
+    if value_type in ['hash', 'dict', 'dictionary'] and not isinstance(value, dict):
+      error = 1
+    elif value_type in ['array', 'list'] and not isinstance(value, list):
+      # Try to split by commas
+      if re.search(",", value):
+        new_value = value.split(",")
+      else:
+        new_value = [value]
+    elif value_type in ['integer', 'int'] and not isinstance(value, int):
+      # Try to make it an integer
+      try:
+        new_value = int(value)
+      except ValueError:
+        error = 1
+    elif value_type in ['bool', 'boolean'] and not isinstance(value, bool):
+      # Try to make it a boolean
+      if isinstance(value, int):
+        new_value = bool(value)
+      elif value in ['0', '1']:
+        new_value = bool(int(value))
+      else:
+        error = 1
+    elif value_type in ['str', 'string'] and not isinstance(value, str):
+      new_value = str(value)
+
+    if error:
+      raise AttributeError(f'Parameter \'{name}\' has an invalid value \'{value}\'. Must be of type {value_type}')
+
+    self.param(name, new_value)
+    return new_value
+
+  def write_output(self, suffix: str, params: dict):
+    """ Appends data to the dataflow json file (passed into next pipeline process).
+
+    Parameters
+    ----------
+    suffix: str
+        The file suffix to add to the output file name (dataflow_[suffix].json)
+    params: dict
+        The data to append into the file
+    """
+    # Remove null params
+    params = {k: v for k, v in params.items() if v is not None}
+
+    with open(f'dataflow_{suffix}.json', 'a') as fh:
+      json.dump(params, fh)
+      fh.write("\n")
+
+  def write_all_output(self, suffix: str):
+    """ Appends all of the parameters in the object into the dataflow json file.
+    This calls the write_output function.
+
+    Parameters
+    ----------
+    suffix: str
+        The file suffix to add to the output file name (dataflow_[suffix].json)
+    """
+    self.write_output(suffix, self._params)
diff --git a/src/python/ensembl/production/hive/ensembl_genome_metadata/MetadataUpdaterHiveCore.py b/src/python/ensembl/production/hive/ensembl_genome_metadata/MetadataUpdaterHiveCore.py
index 6a37efffc..e6c2a9a9c 100644
--- a/src/python/ensembl/production/hive/ensembl_genome_metadata/MetadataUpdaterHiveCore.py
+++ b/src/python/ensembl/production/hive/ensembl_genome_metadata/MetadataUpdaterHiveCore.py
@@ -10,7 +10,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-
+import json
 from ensembl.production.hive.BaseProdRunnable import BaseProdRunnable
 from ensembl.production.metadata.updater.core import CoreMetaUpdater
 
@@ -18,11 +18,18 @@
 class MetadataUpdaterHiveCore(BaseProdRunnable):
 
     def run(self):
-        if self.param("force") == 0 or self.param("force") is None:
-            run = CoreMetaUpdater(self.param("database_uri"), self.param("genome_metadata_uri"), self.param("taxonomy_uri"))
-        elif self.param("force") == 1:
-            run = CoreMetaUpdater(self.param("database_uri"), self.param("genome_metadata_uri"), self.param("taxonomy_uri"),
-                                  force=1)
-        else:
-            raise ValueError(f"Unable to figure out param {self.param('force')}")
-        run.process_core()
+        try:
+            run = CoreMetaUpdater(self.param("database_uri"), self.param("genome_metadata_uri"))
+            run.process_core()
+            output = { 'metadata_uri' : self.param("genome_metadata_uri"),
+             'database_uri' : self.param("database_uri"),
+             'email': self.param("email")
+            }
+            
+            self.dataflow({
+			    'job_id' : self.input_job.dbID,
+			    'output' : json.dumps(output)
+			}, 2);
+
+        except Exception as e : 
+            raise ValueError(str(e))
diff --git a/src/python/ensembl/production/xrefs/Base.py b/src/python/ensembl/production/xrefs/Base.py
new file mode 100644
index 000000000..fcf94dc42
--- /dev/null
+++ b/src/python/ensembl/production/xrefs/Base.py
@@ -0,0 +1,781 @@
+#  See the NOTICE file distributed with this work for additional information
+#  regarding copyright ownership.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""Base xref module to include all common functions used by xref modules."""
+
+import re
+import os
+import shutil
+import requests
+import fnmatch
+import gzip
+import importlib
+import wget
+import threading
+import json
+import logging
+import time
+import random
+import csv
+import subprocess
+
+from sqlalchemy import create_engine, select, insert, update, text, func, and_
+from sqlalchemy.engine.url import make_url, URL
+from sqlalchemy.engine import Connection
+from sqlalchemy.orm import aliased
+from sqlalchemy_utils import database_exists, create_database, drop_database
+from urllib.parse import urlparse
+from ftplib import FTP
+from itertools import groupby
+from configparser import ConfigParser
+from datetime import datetime
+
+from ensembl.xrefs.xref_source_db_model import Base as XrefSourceDB, Source as SourceSORM, Version as VersionORM, ChecksumXref as ChecksumXrefSORM
+
+from ensembl.xrefs.xref_update_db_model import Base as XrefUpdateDB, Source as SourceUORM, SourceURL as SourceURLORM, Xref as XrefUORM, \
+  PrimaryXref as PrimaryXrefORM, DependentXref as DependentXrefUORM, GeneDirectXref as GeneDirectXrefORM, TranscriptDirectXref as TranscriptDirectXrefORM, \
+  TranslationDirectXref as TranslationDirectXrefORM, Synonym as SynonymORM, Pairs as PairsORM, Species as SpeciesORM, \
+  SourceMappingMethod as SourceMappingMethodORM, MappingJobs as MappingJobsORM, Mapping as MappingORM
+
+from ensembl.core.models import Meta as MetaCORM, Gene as GeneORM, Transcript as TranscriptORM, Analysis as AnalysisORM, \
+  ExonTranscript as ExonTranscriptORM, SupportingFeature as SupportingFeatureORM, DnaAlignFeature as DnaAlignFeatureORM, \
+  TranscriptAttrib as TranscriptAttribORM, AttribType as AttribTypeORM, AnalysisDescription as AnalysisDescriptionORM, \
+  SeqRegion as SeqRegionORM, SeqRegionAttrib as SeqRegionAttribORM, CoordSystem as CoordSystemORM, Translation as TranslationORM, \
+  Exon as ExonORM, Xref as XrefCORM, DependentXref as DependentXrefCORM, ExternalDb as ExternalDbORM, Dna as DnaORM, ObjectXref as ObjectXrefCORM
+
+from ensembl.common.Params import Params
+
+class Base(Params):
+  """ Class to represent the base of xref modules. Inherits the Params class.
+  """
+  def __init__(self, params: dict=None, parse_dataflow_json: bool=True) -> None:
+    """ Calls the parent __init__ then sets some specific parameters.
+
+    Parameters
+    ----------
+    params: dict, optional
+        The parameters to start the object with. If defined, command-line parameters won't be parsed (default is None)
+    parse_dataflow_json: bool, optional
+        Specifies whether to parse an option called 'dataflow' in the provided options (default is True)
+    """
+    super().__init__(params, parse_dataflow_json)
+
+    self.param('metasearch_url', "http://registry-grpc.ebi.ac.uk:8080/registry/metaSearch")
+
+    # Initialize the logfile for this run
+    if self.param('log_timestamp'):
+      current_timestamp = self.param('log_timestamp')
+    else:
+      current_timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+
+    log_path = os.path.join(self.param_required('base_path'), 'logs', current_timestamp)
+    if not os.path.exists(log_path): os.makedirs(log_path, exist_ok = True)
+
+    log_file = os.path.join(log_path, 'tmp_logfile_'+self.__class__.__name__+'_'+str(random.randint(0, 5000)))
+    self._log_file = log_file
+
+    console_handler = logging.StreamHandler()
+    file_handler = logging.FileHandler(log_file, mode='a')
+    console_handler.setLevel(logging.WARNING)
+    file_handler.setLevel(logging.DEBUG)
+
+    logging.basicConfig(
+      level=logging.DEBUG,
+      format='%(asctime)s | %(levelname)s | %(message)s',
+      datefmt='%d-%b-%Y %H:%M:%S',
+      handlers=[console_handler, file_handler]
+    )
+
+  def create_source_db(self, source_url: str, reuse_db_if_present: bool):
+    """ Creates the xref source database from model.
+
+    Parameters
+    ----------
+    source_url: str
+        The source database URL with format: [driver]://[user]:[password]@[host]:[port]/[dbname]
+    reuse_db_if_present: bool
+        If set to False, the database defined by provided URL will be dropped before creating a new one
+    """
+    url = make_url(source_url)
+    engine = create_engine(url, isolation_level="AUTOCOMMIT")
+
+    if url.database and reuse_db_if_present:
+      return
+
+    if database_exists(engine.url):
+      drop_database(engine.url)
+    create_database(engine.url)
+    XrefSourceDB.metadata.create_all(engine)
+
+  def download_file(self, file: str, base_path: str, source_name: str, extra_args: dict):
+    """ Downloads an xref file and saves into provided space.
+
+    Parameters
+    ----------
+    file: str
+        The URL of the file to download. Acceptable URL schemes: ftp, http, and https
+    base_path: str
+        The path to save the downloaded file into
+    source_name: str
+        The xref source name
+    extra_args: dict
+        Extra options, including:
+        - skip_download_if_file_present: If set to True, file is only downloaded if does not exist
+        - db: The type of external db for the xref source (only relevent here if equal to 'checksum')
+        - release: If set to 'version', then this is a version file download
+        - rel_number: The URL used to retrieve the release number (only for RefSeq)
+        - catalog: The URL used to retrieve the release catalog (only for RefSeq)
+
+    Returns
+    -------
+    The path of the downloaded file.
+
+    Raises
+    ------
+    LookupError
+        If rel_number is provided but no release number was found in URL.
+    AttributeError
+        If file URL scheme is invalid.
+    """
+    # Create uri object and get scheme
+    uri = urlparse(file)
+    if not uri.scheme:
+      return file
+
+    # Get extra parameters
+    skip_download_if_file_present = extra_args.get('skip_download_if_file_present') or False
+    db = extra_args.get('db')
+    release = extra_args.get('release')
+    rel_number = extra_args.get('rel_number')
+    catalog = extra_args.get('catalog')
+
+    # Create file download path
+    orig_source_name = source_name
+    source_name = re.sub(r"\/", "", source_name)
+    dest_dir = os.path.join(base_path, source_name)
+    if db and db == 'checksum':
+      dest_dir = os.path.join(base_path, 'Checksum')
+    if not os.path.exists(dest_dir): os.makedirs(dest_dir, exist_ok = True)
+
+    file_path = ""
+
+    # If file is in local ftp, copy from there
+    if re.search("ftp.ebi.ac.uk", file):
+      # Construct local path
+      local_file = file
+      local_file = re.sub("https://ftp.ebi.ac.uk/pub/", "/nfs/ftp/public/", local_file)
+
+      # Check if local file exists
+      if os.path.exists(local_file):
+        file_path = os.path.join(dest_dir, os.path.basename(uri.path))
+        if db and db == 'checksum':
+          file_path = os.path.join(dest_dir, f'{source_name}-{os.path.basename(uri.path)}')
+
+        if not (skip_download_if_file_present and os.path.exists(file_path)):
+          shutil.copy(local_file, file_path)
+
+          # Check if copy was successful
+          if os.path.exists(file_path):
+            logging.info(f'{orig_source_name} file copied from local FTP: {file_path}')
+            if release:
+              return file_path
+            return os.path.dirname(file_path)
+        else:
+          logging.info(f'{orig_source_name} file already exists, skipping download ({file_path})')
+
+    # Handle Refseq files
+    if re.search("RefSeq", source_name) and rel_number and catalog and not release:
+      # Get current release number
+      release_number = requests.get(rel_number).json()
+      if not release_number:
+        raise LookupError(f'No release number in {rel_number}')
+
+      # Get list of files in release catalog
+      catalog = re.sub(r"\*", str(release_number), catalog)
+      files_list = requests.get(catalog).text
+      refseq_files = files_list.split("\n")
+      files_to_download = []
+
+      # Download each refseq file
+      for refseq_file in refseq_files:
+        if not refseq_file: continue
+        checksum, filename = refseq_file.split("\t")
+
+        # Only interested in files matching pattern
+        if not fnmatch.fnmatch(filename, os.path.basename(uri.path)): continue
+        if re.search("nonredundant_protein", filename) or re.search("wp_protein", filename): continue
+
+        file_path = os.path.join(dest_dir, os.path.basename(filename))
+        if os.path.exists(file_path):
+          if skip_download_if_file_present:
+            logging.info(f'{orig_source_name} file already exists, skipping download ({file_path})')
+            continue
+          os.remove(file_path)
+
+        file_url = os.path.join(os.path.dirname(file), filename)
+        files_to_download.append({'url': file_url, 'path': file_path})
+        logging.info(f'{orig_source_name} file downloaded via HTTP: {file_path}')
+
+      self.refseq_multithreading(files_to_download)
+    elif uri.scheme == 'ftp':
+      ftp = FTP(uri.netloc)
+      ftp.login('anonymous', '-anonymous@')
+      ftp.cwd(os.path.dirname(uri.path))
+      remote_files = ftp.nlst()
+
+      # Download files in ftp server
+      for remote_file in remote_files:
+        # Only interested in files matching pattern
+        if not fnmatch.fnmatch(remote_file, os.path.basename(uri.path)): continue
+
+        remote_file = re.sub(r"\n", "", remote_file)
+        file_path = os.path.join(dest_dir, os.path.basename(remote_file))
+        if db and db == 'checksum':
+          file_path = os.path.join(dest_dir, f'{source_name}-{os.path.basename(remote_file)}')
+
+        if not (skip_download_if_file_present and os.path.exists(file_path)):
+          ftp.retrbinary("RETR " + remote_file , open(file_path, 'wb').write)
+          logging.info(f'{orig_source_name} file downloaded via FTP: {file_path}')
+        else:
+          logging.info(f'{orig_source_name} file already exists, skipping download ({file_path})')
+        ftp.close()
+    elif uri.scheme == 'http' or uri.scheme == 'https':
+      # This is the case for the release file
+      if re.search("RefSeq", source_name) and rel_number and release:
+        # Get current release number
+        release_number = requests.get(rel_number).json()
+        if not release_number:
+          raise LookupError(f'No release number in {rel_number}')
+
+        file = re.sub(r"\*", str(release_number), file)
+        uri = urlparse(file)
+
+      file_path = os.path.join(dest_dir, os.path.basename(uri.path))
+      if db and db == 'checksum':
+        file_path = os.path.join(dest_dir, f'{source_name}-{os.path.basename(uri.path)}')
+
+      if not os.path.exists(file_path) or not skip_download_if_file_present:
+        if not skip_download_if_file_present and os.path.exists(file_path):
+          os.remove(file_path)
+        wget.download(file, file_path)
+        logging.info(f'{orig_source_name} file downloaded via HTTP: {file_path}')
+      else:
+        logging.info(f'{orig_source_name} file already exists, skipping download ({file_path})')
+    else:
+      raise AttributeError(f'Invalid URL scheme {uri.scheme}')
+
+    if release:
+      return file_path
+    return os.path.dirname(file_path)
+
+  def refseq_multithreading(self, files):
+    """ Creates multiple threads to download RefSeq files in parallel.
+
+    Parameters
+    ----------
+    files: list
+        The list of file URLs and paths to download.
+    """
+    number_of_threads = 20
+    chunk_size = int(len(files) / number_of_threads)
+    threads = []
+
+    for thread_index in range(number_of_threads):
+      array_start = thread_index * chunk_size
+      array_end = len(files) if thread_index+1 == number_of_threads else (thread_index+1) * chunk_size
+
+      thread = threading.Thread(target=self.download_refseq_files, args=(files, array_start, array_end))
+      threads.append(thread)
+      threads[thread_index].start()
+
+    for thread in threads:
+      thread.join()
+
+  def download_refseq_files(self, files, start: int, end: int):
+    """ Downloads RefSeq files from a subset of files.
+
+    Parameters
+    ----------
+    files: list
+        The list of file URLs and paths to download.
+    start: int
+        The start index of the files list.
+    end: int
+        The end index of the files list.
+
+    Raises
+    ------
+    Exception
+        If file download fails all attempts.
+    """
+    for index in range(start, end):
+      failed = 0
+      file_url = files[index]['url']
+      local_path = files[index]['path']
+
+      for retry in range(0,3):
+        try:
+          wget.download(file_url, local_path)
+        except:
+          failed += 1
+          continue
+        break
+
+      if failed > 0:
+        raise Exception(f'Failed to download file {file_url}')
+
+  def get_dbi(self, url: str):
+    """ Returns a DB connection for a provided URL.
+
+    Parameters
+    ----------
+    url: str
+        The database URL to connect to
+
+    Returns
+    -------
+    An sqlalchemy engine connection.
+    """
+    connect_url = make_url(url)
+    engine = create_engine(connect_url, isolation_level="AUTOCOMMIT")
+
+    return engine.connect()
+
+  def get_db_engine(self, url: str):
+    """ Returns a DB engine for a provided URL.
+
+    Parameters
+    ----------
+    url: str
+        The database URL to create an engine for
+
+    Returns
+    -------
+    An sqlalchemy engine.
+    """
+    connect_url = make_url(url)
+    engine = create_engine(connect_url, isolation_level="AUTOCOMMIT")
+
+    return engine
+
+  def load_checksum(self, path: str, url: str):
+    """ Loads the xref checksum files into a provided database.
+    This first combines the checksum data from different xref sources into 1 file called checksum.txt before loading into the DB.
+
+    Parameters
+    ----------
+    path: str
+        The path where the checksum files can be found
+    url: str
+        The database URL to load the checksum data into
+    """
+    checksum_dir = os.path.join(path, 'Checksum')
+    if not os.path.exists(checksum_dir): os.makedirs(checksum_dir, exist_ok = True)
+
+    # Connect to db
+    url = url + "?local_infile=1"
+    db_engine = self.get_db_engine(url)
+    with db_engine.connect() as dbi:
+      counter = 1
+      source_id = 1
+
+      # Open the checksum output file
+      files = os.listdir(checksum_dir)
+      checksum_file = os.path.join(checksum_dir, 'checksum.txt')
+      with open(checksum_file, 'w') as output_fh:
+        # Go through all available checksum files
+        for file in files:
+          if re.search("checksum", file): continue
+
+          input_file = os.path.join(checksum_dir, file)
+          match = re.search(r"\/([A-Za-z]*)-.*$", input_file)
+          source_name = match.group(1)
+          source_id = self.get_source_id_from_name(dbi, source_name)
+
+          input_fh = self.get_filehandle(input_file)
+          for line in input_fh:
+            line = line.rstrip()
+            (id, checksum) = re.split(r"\s+", line)
+
+            counter += 1
+            output = [str(counter), str(source_id), id, checksum]
+            output_str = "\t".join(output)
+            output_fh.write(f'{output_str}\n')
+
+          input_fh.close()
+
+      query = f'load data local infile \'{checksum_file}\' into table checksum_xref'
+      dbi.execute(text(query))
+
+  def get_filehandle(self, filename: str):
+    """ Opens an appropriate read filehandle for a file based on its type.
+
+    Parameters
+    ----------
+    filename: str
+        The name and path of the file to read
+
+    Returns
+    -------
+    A read filehandle.
+
+    Raises
+    ------
+    FileNotFoundError
+        If no file name was provided.
+        If provided file could not be found.
+    """
+    if not filename or filename == '':
+      raise FileNotFoundError('No file name')
+
+    alt_filename = filename
+    alt_filename = re.sub(r"\.(gz|Z)$", "", alt_filename)
+    if alt_filename == filename:
+      alt_filename = alt_filename + ".gz"
+
+    if not os.path.exists(filename):
+      if not os.path.exists(alt_filename):
+        raise FileNotFoundError(f'Could not find either {filename} or {alt_filename}')
+      filename = alt_filename
+
+    if re.search(r"\.(gz|Z)$", filename):
+      fh = gzip.open(filename, 'rt')
+    else:
+      fh = open(filename, 'r')
+
+    return fh
+
+  def get_source_id_from_name(self, dbi, source_name: str):
+    """ Retrieves a source ID from its name from a database.
+
+    Parameters
+    ----------
+    dbi: db connection
+        The database connection to query in
+    source_name: str
+        The name of the source
+
+    Returns
+    -------
+    The source ID.
+    """
+    query = select(SourceSORM.source_id).where(SourceSORM.name==source_name)
+    source_id = dbi.execute(query).scalar()
+
+    return source_id
+
+  def get_file_sections(self, file: str, delimiter: str):
+    """ Reads a provided file by sections, separated by a provided delimiter.
+    This function uses 'yield' to provide the file sections one by one.
+
+    Parameters
+    ----------
+    file: str
+        The name and path of the file to read
+    delimiter: str
+        The character or string separating the file sections
+
+    Returns
+    -------
+    A yield of file sections.
+    """
+    if re.search(r"\.(gz|Z)$", file):
+      with gzip.open(file, 'rt') as fh:
+        groups = groupby(fh, key=lambda x: x.lstrip().startswith(delimiter))
+        for key,group in groups:
+          yield list(group)
+    else:
+      with open(file, 'r') as fh:
+        groups = groupby(fh, key=lambda x: x.lstrip().startswith(delimiter))
+        for key,group in groups:
+          yield list(group)
+
+  def create_xref_db(self, url: str, config_file: str, preparse:bool):
+    """ Creates the xref database from model.
+    This function always drops the database defined by the provided URL (if it exists) before creating a new one.
+
+    Parameters
+    ----------
+    url: str
+        The database URL with format: [driver]://[user]:[password]@[host]:[port]/[dbname]
+    config_file: str
+        The name and path of the .ini file that has information about xref sources and species
+    preparse: bool
+        Specifies whether source preparsing will be done or not
+    """
+    engine = create_engine(url, isolation_level="AUTOCOMMIT")
+
+    # Drop database and create again
+    if database_exists(engine.url):
+      drop_database(engine.url)
+    create_database(engine.url)
+    XrefUpdateDB.metadata.create_all(engine)
+
+    xref_dbi = engine.connect()
+    self.populate_xref_db(xref_dbi, config_file, preparse)
+
+  def populate_xref_db(self, dbi, config_file:str, preparse:bool):
+    """ Populates the xref database with configuration data.
+
+    Parameters
+    ----------
+    dbi: db connection
+        The xref database connection
+    config_file: str
+        The name and path of the .ini file that has information about xref sources and species to populate the database with
+    preparse: bool
+        Specifies whether source preparsing will be done or not (needed to decide if to use old parsers)
+
+    Raises
+    ------
+    KeyError
+        If a source exists in a species section in the configuration file, but has no source section of its own.
+    """
+    source_ids = {}
+    source_parsers = {}
+    species_sources = {}
+
+    config = ConfigParser()
+    config.read(config_file)
+
+    species_sections, sources_sections = {}, {}
+
+    for section_name in config.sections():
+      section = config[section_name]
+      (keyword, name) = re.split(r"\s+", section_name)
+
+      if keyword == 'source':
+        sources_sections[name] = section
+      elif keyword == 'species':
+        species_sections[name] = section
+
+    # Parse species sections
+    for species_name, section in species_sections.items():
+      taxonomy_ids = section.get('taxonomy_id').split(",")
+      sources = section.get('sources')
+      aliases = section.get('aliases', species_name)
+
+      species_id = taxonomy_ids[0]
+
+      for tax_id in taxonomy_ids:
+        # Add new species
+        query = insert(SpeciesORM).values(species_id=species_id, taxonomy_id=tax_id, name=species_name, aliases=aliases)
+        dbi.execute(query)
+
+      species_sources[species_id] = sources
+
+    source_id = 0
+    # Parse source sections
+    for source_name, section in sorted(sources_sections.items()):
+      source_id += 1
+      source_name = section.get('name')
+      order = section.get('order')
+      priority = section.get('priority')
+      priority_description = section.get('prio_descr', '')
+      status = section.get('status', 'NOIDEA')
+
+      old_parser = section.get('old_parser')
+      if old_parser and not preparse:
+        parser = old_parser
+      else:
+        parser = section.get('parser')
+
+      # Add new source
+      query = insert(SourceUORM).values(name=source_name, source_release='1', ordered=order, priority=priority, priority_description=priority_description, status=status)
+      dbi.execute(query)
+
+      source_ids[source_name] = source_id
+      source_parsers[source_id] = parser
+
+    # Add source url rows
+    for species_id, sources in species_sources.items():
+      source_names = sources.split(",")
+
+      for source_name in source_names:
+        if not source_ids.get(source_name):
+          raise KeyError(f'No source section found for {source_name} in config file')
+
+        source_id = source_ids[source_name]
+        parser = source_parsers[source_id]
+        query = insert(SourceURLORM).values(source_id=source_id, species_id=species_id, parser=parser)
+        dbi.execute(query)
+
+  def get_source_id(self, dbi, parser: str, species_id: int, name: str, division_id: int):
+    """ Retrieves a source ID from its parser, species ID, name or division ID.
+
+    Parameters
+    ----------
+    dbi: db connection
+        The database connection to query in
+    parser: str
+        The source parser
+    species_id: int
+        The ID of the species related to the source
+    name: str
+        The source name
+    division_id: int
+        The ID of the division related to the source
+
+    Returns
+    -------
+    The source ID.
+    """
+    name = "%"+name+"%"
+    source_id = None
+
+    query = select(SourceURLORM.source_id).where(SourceUORM.source_id==SourceURLORM.source_id, SourceURLORM.parser==parser, SourceURLORM.species_id==species_id)
+    result = dbi.execute(query)
+    if result.rowcount == 1:
+      source_id = result.scalar()
+
+    query = select(SourceURLORM.source_id).where(SourceUORM.source_id==SourceURLORM.source_id, SourceURLORM.parser==parser, SourceURLORM.species_id==species_id).filter(SourceUORM.name.like(name))
+    result = dbi.execute(query)
+    if result.rowcount == 1:
+      source_id = result.scalar()
+
+    if not source_id:
+      query = select(SourceURLORM.source_id).where(SourceUORM.source_id==SourceURLORM.source_id, SourceURLORM.parser==parser, SourceURLORM.species_id==division_id).filter(SourceUORM.name.like(name))
+      result = dbi.execute(query).first()
+      if result:
+        source_id = result[0]
+
+    return source_id
+
+  def get_taxon_id(self, dbi):
+    """ Retrieves the species.taxonomy_id value of the meta table in a database.
+
+    Parameters
+    ----------
+    dbi: db connection
+        The database connection to query in
+
+    Returns
+    -------
+    The taxonomy ID in the database or 1 if not found.
+    """
+    query = select(MetaCORM.meta_value).where(MetaCORM.meta_key=='species.taxonomy_id')
+    result = dbi.execute(query)
+    if result.rowcount > 0:
+      return result.scalar()
+
+    return 1
+
+  def get_division_id(self, dbi):
+    """ Retrives the division ID from a database based on the species.division value of the meta table.
+
+    Parameters
+    ----------
+    dbi: db connection
+        The database connection to query in
+
+    Returns
+    -------
+    The division ID in the database or 1 if not found
+    """
+    query = select(MetaCORM.meta_value).where(MetaCORM.meta_key=='species.division')
+    result = dbi.execute(query)
+
+    if result.rowcount > 0:
+      division = result.scalar()
+
+      division_taxon = {
+        'Ensembl'            : 7742,
+        'EnsemblVertebrates' : 7742,
+        'Vertebrates'        : 7742,
+        'EnsemblMetazoa'     : 33208,
+        'Metazoa'            : 33208,
+        'Plants'             : 33090,
+        'EnsemblPlants'      : 33090,
+      }
+
+      division_id = division_taxon.get(division)
+      if division_id:
+        return division_id
+
+    return 1
+
+  def get_path(self, base_path: str, species: str, release: int, category: str, file_name: str=None):
+    """ Creates directories based on provided data.
+
+    Parameters
+    ----------
+    base_path: str
+        The base file path
+    species: str
+        The species name
+    release: int
+        The ensEMBL release number
+    category: str
+        The file category
+    file_name: str, optional
+        The file name
+
+    Returns
+    -------
+    A file path.
+    """
+    full_path = os.path.join(base_path, species, release, category)
+    if not os.path.exists(full_path):
+      os.makedirs(full_path, exist_ok = True)
+
+    if file_name:
+      return os.path.join(full_path, file_name)
+    else:
+      return full_path
+
+  def get_db_from_registry(self, species: str, group: str, release: int, registry: str):
+    """ Looks up a db in the registry and returns an sqlaclehmy angine for it.
+
+    Parameters
+    ----------
+    species: str
+        The species name
+    group: str
+        The db group (core, ccds, otherfeatures, etc...)
+    release: int
+        The ensEMBL release number
+    registry: str
+        The registry url
+
+    Returns
+    -------
+    A db engine or 0 if no db is found.
+    """
+    # Fix registry url, if needed
+    match = re.search(r"^(.*)://(.*)", registry)
+    if match: registry = match.group(2)
+    match = re.search(r"(.*)/(.*)", registry)
+    if match: registry = match.group(1)
+
+    metasearch_url  = self.param_required('metasearch_url')
+    metasearch_body = {
+      "name_pattern":f'{species}_{group}%',
+      "filters":[
+        {
+          "meta_key":"schema_version",
+          "meta_value":release
+        },
+      ],
+      "servers":[registry]
+    }
+
+    dbs = requests.post(metasearch_url, json=metasearch_body).json()
+    dbs = dbs[registry]
+
+    if len(dbs) > 0:
+      db_url = 'mysql://' + dbs[0]
+      return db_url
+    else:
+      return 0
+
diff --git a/src/python/ensembl/production/xrefs/Checksum.py b/src/python/ensembl/production/xrefs/Checksum.py
new file mode 100644
index 000000000..7ccb401a7
--- /dev/null
+++ b/src/python/ensembl/production/xrefs/Checksum.py
@@ -0,0 +1,46 @@
+#  See the NOTICE file distributed with this work for additional information
+#  regarding copyright ownership.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""Checksum module for the Xref Download pipeline."""
+
+from ensembl.production.xrefs.Base import *
+
+class Checksum(Base):
+  def run(self):
+    base_path     = self.param_required('base_path')
+    source_db_url = self.param_required('source_db_url')
+    skip_download = self.param_required('skip_download', {'type': 'bool'})
+
+    logging.info('Checksum starting with parameters:')
+    logging.info(f'Param: base_path = {base_path}')
+    logging.info(f'Param: source_db_url = {source_db_url}')
+    logging.info(f'Param: skip_download = {skip_download}')
+
+    # Connect to source db
+    db_engine = self.get_db_engine(source_db_url)
+
+    # Check if checksums already exist
+    table_nonempty = 0
+    if skip_download:
+      with db_engine.connect() as dbi:
+        query = select(func.count(ChecksumXrefSORM.checksum_xref_id))
+        table_nonempty = dbi.execute(query).scalar()
+
+    # Load checksums from files into db
+    if not table_nonempty:
+      self.load_checksum(base_path, source_db_url)
+      logging.info('Checksum data loaded')
+    else:
+      logging.info('Checksum data already exists, skipping loading')
+
diff --git a/src/python/ensembl/production/xrefs/DownloadSource.py b/src/python/ensembl/production/xrefs/DownloadSource.py
new file mode 100644
index 000000000..060fcb116
--- /dev/null
+++ b/src/python/ensembl/production/xrefs/DownloadSource.py
@@ -0,0 +1,63 @@
+#  See the NOTICE file distributed with this work for additional information
+#  regarding copyright ownership.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""Download module to download xref and version files."""
+
+from ensembl.production.xrefs.Base import *
+
+class DownloadSource(Base):
+  def run(self):
+    base_path     = self.param_required('base_path')
+    parser        = self.param_required('parser')
+    name          = self.param_required('name')
+    priority      = self.param_required('priority')
+    source_db_url = self.param_required('source_db_url')
+    file          = self.param_required('file')
+    skip_download = self.param_required('skip_download', {'type': 'bool'})
+    db            = self.param('db')
+    version_file  = self.param('version_file')
+    preparse      = self.param('preparse', None, {'type': 'bool'})
+    rel_number    = self.param('rel_number')
+    catalog       = self.param('catalog')
+
+    logging.info(f'DownloadSource starting for source {name}')
+
+    # Download the main xref file
+    extra_args = {}
+    extra_args['skip_download_if_file_present'] = skip_download
+    extra_args['db'] = db
+    if rel_number and catalog:
+      extra_args['rel_number'] = rel_number
+      extra_args['catalog'] = catalog
+    file_name = self.download_file(file, base_path, name, extra_args)
+
+    # Download the version file
+    version = ""
+    if version_file:
+      extra_args['release'] = 'version'
+      version = self.download_file(version_file, base_path, name, extra_args)
+
+    # Update source db
+    db_engine = self.get_db_engine(source_db_url)
+    with db_engine.connect() as dbi:
+      query = insert(SourceSORM).values(name=name, parser=parser).prefix_with('IGNORE')
+      dbi.execute(query)
+
+      query = select(SourceSORM.source_id).where(SourceSORM.name==name)
+      source_id = dbi.execute(query).scalar()
+
+      if preparse is None: preparse = False
+      query = insert(VersionORM).values(source_id=source_id, uri=file_name, index_uri=db, count_seen=priority, revision=version, preparse=preparse).prefix_with('IGNORE')
+      dbi.execute(query)
+
diff --git a/src/python/ensembl/production/xrefs/EmailNotification.py b/src/python/ensembl/production/xrefs/EmailNotification.py
new file mode 100644
index 000000000..22738d990
--- /dev/null
+++ b/src/python/ensembl/production/xrefs/EmailNotification.py
@@ -0,0 +1,136 @@
+#  See the NOTICE file distributed with this work for additional information
+#  regarding copyright ownership.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""Email module to send user emails notifying of xref pipelines end, with important information and statistics."""
+
+from ensembl.production.xrefs.Base import *
+
+from smtplib import SMTP
+from email.message import EmailMessage
+
+class EmailNotification(Base):
+  def run(self):
+    pipeline_name = self.param_required('pipeline_name')
+    base_path     = self.param_required('base_path')
+    email_address = self.param_required('email')
+    email_server  = self.param_required('email_server')
+    log_timestamp = self.param('log_timestamp')
+
+    email_message = f'The <b>{pipeline_name}</b> has completed its run.<br>'
+
+    if log_timestamp:
+      # Get the path of the log files
+      log_path = os.path.join(base_path, 'logs', log_timestamp)
+
+      # Read the log file
+      if os.path.exists(log_path):
+        log_files = os.listdir(log_path)
+
+        parameters, sources, added_species, skipped_species = {}, {}, {}, {}
+
+        main_log_file = os.path.join(base_path, 'logs', log_timestamp, 'logfile_'+log_timestamp)
+
+        # Copy different log files into a main one
+        with open(main_log_file, 'a') as out_fh:
+          for log_file in log_files:
+            if not re.search(r"^tmp_", log_file): continue
+            log_file = os.path.join(log_path, log_file)
+            with open(log_file) as in_fh:
+              log_data = in_fh.read()
+              out_fh.write(log_data)
+            os.remove(log_file)
+
+        # Read the full logs
+        with open(main_log_file) as fh:
+          data = fh.read()
+
+        # Extract parameter data
+        parameters_list = re.findall(r"^\d{2}-\w{3}-\d{4} \\| INFO \\| Param: (\w+) = (.*)", data)
+        parameters = {param[0]: param[1] for param in parameters_list}
+
+        email_message += '<br>The pipeline was run with the following parameters:<br>'
+        for param_name,param_value in parameters.items():
+          email_message += f'<b>{param_name}</b> = {param_value}<br>'
+
+        if re.search('Download', pipeline_name):
+          #Extract data from logs
+          sources_list = re.findall(r"^\d{2}-\w{3}-\d{4} \\| INFO \\| Source to download: ([\w\/]+)", data)
+          sources = {source : {'to_download' : 1} for source in sources_list}
+
+          sources_list = re.findall(r"^\d{2}-\w{3}-\d{4} \\| INFO \\| Source to cleanup: ([\w\/]+)", data)
+          for source in sources_list: sources[source].update({'to_cleanup' : 1})
+
+          sources_list = re.findall(r"^\d{2}-\w{3}-\d{4} \\| INFO \\| Source to preparse: ([\w\/]+)", data)
+          for source in sources_list: sources[source].update({'to_preparse' : 1})
+
+          sources_list = re.findall(r"^\d{2}-\w{3}-\d{4} \\| INFO \\| Source ([\w\/]+) cleaned up", data)
+          for source in sources_list: sources[source].update({'cleaned_up' : 1})
+
+          sources_list = re.findall(r"^\d{2}-\w{3}-\d{4} \\| INFO \\| Source ([\w\/]+) preparsed", data)
+          for source in sources_list: sources[source].update({'preparsed' : 1})
+
+          sources_list = re.findall(r"^\d{2}-\w{3}-\d{4} \\| INFO \\| ([\w\/]+) file already exists, skipping download \((.*)\)", data)
+          for source in sources_list: sources[source[0]].update({'skipped' : os.path.dirname(source[1])})
+
+          sources_list = re.findall(r"^\d{2}-\w{3}-\d{4} \\| INFO \\| ([\w\/]+) file downloaded via (HTTP|FTP): (.*)", data)
+          for source in sources_list: sources[source[0]].update({'downloaded' : source[1]+"|"+os.path.dirname(source[2])})
+
+          sources_list = re.findall(r"^\d{2}-\w{3}-\d{4} \\| INFO \\| ([\w\/]+) file copied from local FTP: (.*)", data)
+          for source in sources_list: sources[source[0]].update({'copied' : os.path.dirname(source[1])})
+
+          skipped_species_list = re.findall(r"^\d{2}-\w{3}-\d{4} \\| INFO \\| (\w+) skipped species = (\d+)", data)
+          skipped_species = {source[0]: source[1] for source in skipped_species_list}
+
+          added_species_list = re.findall(r"^\d{2}-\w{3}-\d{4} \\| INFO \\| (\w+) species files created = (\d+)", data)
+          added_species = {source[0]: source[1] for source in added_species_list}
+
+          # Include source statistics
+          email_message += '<br>--Source Statistics--<br>'
+          for source_name,source_values in sources.items():
+            email_message += f'<b>{source_name}:</b><br>'
+            if source_values.get('to_download'): email_message += '&nbsp;&nbsp;&nbsp;Scheduled for download &#10004;<br>'
+
+            if source_values.get('downloaded'):
+              (download_type, file_path) = source_values['downloaded'].split("|")
+              email_message += f'&nbsp;&nbsp;&nbsp;File downloaded via {download_type} into {file_path}<br>'
+            elif source_values.get('copied'): email_message += '&nbsp;&nbsp;&nbsp;File(s) copied from local FTP into %s<br>' % (source_values['copied'])
+            elif source_values.get('skipped'): email_message += '&nbsp;&nbsp;&nbsp;File(s) download skipped, already exists in %s<br>' % (source_values['skipped'])
+
+            if source_values.get('to_cleanup'): email_message += '&nbsp;&nbsp;&nbsp;Scheduled for cleanup &#10004;<br>'
+            if source_values.get('cleaned_up'): email_message += '&nbsp;&nbsp;&nbsp;Cleaned up &#10004;<br>'
+
+            if source_values.get('to_preparse'): email_message += '&nbsp;&nbsp;&nbsp;Scheduled for pre-parse &#10004;<br>'
+            if source_values.get('preparsed'): email_message += '&nbsp;&nbsp;&nbsp;Pre-parsed &#10004;<br>'
+
+          # Include species statistics
+          email_message += '<br>--Species Statistics--<br>'
+          email_message += 'Skipped Species (files already exist):<br>'
+          for source_name, count in skipped_species.items():
+            email_message += f'&nbsp;&nbsp;&nbsp;{source_name}: {count}<br>'
+          email_message += 'Added Species (files created):<br>'
+          for source_name, count in added_species.items():
+            email_message += f'&nbsp;&nbsp;&nbsp;{source_name}: {count}<br>'
+
+          email_message += '<br>To run the Xref Process Pipeline based on the data from this pipeline, use the same <b>--base_path</b>, <b>--source_db_url</b>, and <b>--central_db_url</b> (if preparse was run) values provided to this pipeline.'
+
+    # Send email
+    message = EmailMessage()
+    message['Subject'] = f'{pipeline_name} Finished'
+    message['From'] = email_address
+    message['To'] = email_address
+    message.set_content(email_message, 'html')
+
+    smtp = SMTP(email_server)
+    smtp.send_message(message)
+
diff --git a/src/python/ensembl/production/xrefs/ScheduleCleanup.py b/src/python/ensembl/production/xrefs/ScheduleCleanup.py
new file mode 100644
index 000000000..58396b33a
--- /dev/null
+++ b/src/python/ensembl/production/xrefs/ScheduleCleanup.py
@@ -0,0 +1,57 @@
+#  See the NOTICE file distributed with this work for additional information
+#  regarding copyright ownership.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""Scheduling module to create cleanup jobs for specific xref sources."""
+
+from ensembl.production.xrefs.Base import *
+
+class ScheduleCleanup(Base):
+  def run(self):
+    base_path              = self.param_required('base_path')
+    source_db_url          = self.param_required('source_db_url')
+    clean_files            = self.param('clean_files')
+    clean_dir              = self.param('clean_dir')
+    split_files_by_species = self.param('split_files_by_species')
+
+    logging.info('ScheduleCleanup starting with parameters:')
+    logging.info(f'Param: base_path = {base_path}')
+    logging.info(f'Param: source_db_url = {source_db_url}')
+    logging.info(f'Param: clean_files = {clean_files}')
+    logging.info(f'Param: clean_dir = {clean_dir}')
+    logging.info(f'Param: split_files_by_species = {split_files_by_species}')
+
+    # Connect to source db
+    db_engine = self.get_db_engine(source_db_url)
+    with db_engine.connect() as dbi:
+      # Get name and version file for each source
+      query = select(SourceSORM.name, VersionORM.revision).where(SourceSORM.source_id==VersionORM.source_id).distinct()
+      sources = dbi.execute(query).mappings().all()
+
+    for source in sources:
+      # Only cleaning RefSeq and UniProt for now
+      if not (re.search(r"^RefSeq_(dna|peptide)", source.name) or re.search(r"^Uniprot", source.name)): continue
+
+      # Remove / char from source name to access directory
+      clean_name = source.name
+      clean_name = re.sub(r"\/", "", clean_name)
+
+      # Send parameters into cleanup jobs for each source
+      if os.path.exists(os.path.join(base_path, clean_name)):
+        logging.info(f'Source to cleanup: {source.name}')
+
+        self.write_output('cleanup_sources', {
+          'name'         : source.name,
+          'version_file' : source.revision
+        })
+
diff --git a/src/python/ensembl/production/xrefs/ScheduleDownload.py b/src/python/ensembl/production/xrefs/ScheduleDownload.py
new file mode 100644
index 000000000..8001bccc8
--- /dev/null
+++ b/src/python/ensembl/production/xrefs/ScheduleDownload.py
@@ -0,0 +1,73 @@
+#  See the NOTICE file distributed with this work for additional information
+#  regarding copyright ownership.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""Scheduling module to create download jobs for all xref sources in config file."""
+
+from ensembl.production.xrefs.Base import *
+
+class ScheduleDownload(Base):
+  def run(self):
+    config_file     = self.param_required('config_file')
+    source_db_url   = self.param_required('source_db_url')
+    reuse_db        = self.param_required('reuse_db', {'type': 'bool'})
+    skip_preparse   = self.param('skip_preparse', None, {'type': 'bool', 'default' : False})
+
+    logging.info('ScheduleDownload starting with parameters:')
+    logging.info(f'Param: config_file = {config_file}')
+    logging.info(f'Param: source_db_url = {source_db_url}')
+    logging.info(f'Param: reuse_db = {reuse_db}')
+    logging.info(f'Param: skip_preparse = {skip_preparse}')
+
+    # Create the source db from url
+    self.create_source_db(source_db_url, reuse_db)
+
+    # Extract sources to download from config file
+    sources = []
+    with open(config_file) as conf_file:
+      sources = json.load(conf_file)
+
+    if len(sources) < 1:
+      raise IOError(f'No sources found in config file {config_file}. Need sources to run pipeline')
+
+    for source_data in sources:
+      name         = source_data['name']
+      parser       = source_data['parser']
+      priority     = source_data['priority']
+      file         = source_data['file']
+      db           = source_data.get('db')
+      version_file = source_data.get('release')
+      preparse     = source_data.get('preparse')
+      rel_number   = source_data.get('release_number')
+      catalog      = source_data.get('catalog')
+
+      logging.info(f'Source to download: {name}')
+
+      # Revert to the old parser if not pre-parsing
+      if preparse and skip_preparse:
+        parser = source_data['old_parser']
+        preparse = 0
+
+      # Pass the source parameters into download jobs
+      self.write_output('sources', {
+        'parser'       : parser,
+        'name'         : name,
+        'priority'     : priority,
+        'db'           : db,
+        'version_file' : version_file,
+        'preparse'     : preparse,
+        'file'         : file,
+        'rel_number'   : rel_number,
+        'catalog'      : catalog
+      })
+
diff --git a/src/python/ensembl/production/xrefs/config/xref_all_sources.json b/src/python/ensembl/production/xrefs/config/xref_all_sources.json
new file mode 100644
index 000000000..a89f40d4a
--- /dev/null
+++ b/src/python/ensembl/production/xrefs/config/xref_all_sources.json
@@ -0,0 +1,248 @@
+[
+    {
+      "name" : "ArrayExpress",
+      "parser" : "ArrayExpressParser",
+      "file" : "Database",
+      "db" : "core",
+      "priority" : 1
+    },
+    {
+      "name" : "CCDS",
+      "parser" : "CCDSParser",
+      "file" : "Database",
+      "db" : "ccds",
+      "priority" : 1
+    },
+    {
+      "name" : "UniParc",
+      "parser" : "ChecksumParser",
+      "file" : "https://ftp.ebi.ac.uk/pub/contrib/uniparc/upidump.lis.gz",
+      "db" : "checksum",
+      "priority" : 1
+    },
+    {
+      "name" : "RNACentral",
+      "parser" : "ChecksumParser",
+      "file" : "https://ftp.ebi.ac.uk/pub/databases/RNAcentral/current_release/md5/md5.tsv.gz",
+      "db" : "checksum",
+      "priority" : 1
+    },
+    {
+      "name" : "DBASS3",
+      "parser" : "DBASSParser",
+      "file" : "https://www.dbass.soton.ac.uk/Dbass3/DownloadCsv",
+      "priority" : 1
+    },
+    {
+      "name" : "DBASS5",
+      "parser" : "DBASSParser",
+      "file" : "https://www.dbass.soton.ac.uk/Dbass5/DownloadCsv",
+      "priority" : 1
+    },
+    {
+      "name" : "EntrezGene",
+      "parser" : "EntrezGeneParser",
+      "file" : "https://ftp.ncbi.nlm.nih.gov/gene/DATA/gene_info.gz",
+      "priority" : 1
+    },
+    {
+      "name" : "HPA",
+      "parser" : "HPAParser",
+      "file" : "https://www.proteinatlas.org/download/xref.php",
+      "priority" : 1
+    },
+    {
+      "name" : "MGI",
+      "parser" : "MGIParser",
+      "file" : "https://www.informatics.jax.org/downloads/reports/MRK_ENSEMBL.rpt",
+      "priority" : 2
+    },
+    {
+      "name" : "MGI_desc",
+      "parser" : "MGI_Desc_Parser",
+      "file" : "https://www.informatics.jax.org/downloads/reports/MRK_List2.rpt",
+      "priority" : 1
+    },
+    {
+      "name" : "MGI_ccds",
+      "parser" : "MGI_CCDS_Parser",
+      "file" : "https://ftp.ncbi.nlm.nih.gov/pub/CCDS/current_mouse/CCDS.current.txt",
+      "priority" : 2
+    },
+    {
+      "name" : "MIM2GENE",
+      "parser" : "Mim2GeneParser",
+      "file" : "https://ftp.ncbi.nlm.nih.gov/gene/DATA/mim2gene_medgen",
+      "priority" : 3
+    },
+    {
+      "name" : "MIM",
+      "parser" : "MIMParser",
+      "file" : "https://data.omim.org/downloads/ZpPlmgwjuTBK9T5vf2sFjA/omim.txt.gz",
+      "priority" : 2
+    },
+    {
+      "name" : "RFAM",
+      "parser" : "RFAMParser",
+      "file" : "https://ftp.ebi.ac.uk/pub/databases/Rfam/CURRENT/Rfam.seed.gz",
+      "db" : "core",
+      "priority" : 1
+    },
+    {
+      "name" : "RGD",
+      "parser" : "RGDParser",
+      "file" : "https://download.rgd.mcw.edu/pub/data_release/GENES_RAT.txt",
+      "priority" : 2
+    },
+    {
+      "name" : "Reactome",
+      "parser" : "ReactomeParser",
+      "file" : "https://www.reactome.org/download/current/Ensembl2Reactome_All_Levels.txt",
+      "release" : "https://www.reactome.org/ReactomeRESTfulAPI/RESTfulWS/version",
+      "priority" : 1
+    },
+    {
+      "name" : "Reactome",
+      "parser" : "ReactomeParser",
+      "file" : "https://www.reactome.org/download/current/UniProt2Reactome_All_Levels.txt",
+      "release" : "https://www.reactome.org/ReactomeRESTfulAPI/RESTfulWS/version",
+      "priority" : 2
+    },
+    {
+      "name" : "RefSeq_dna",
+      "parser" : "RefSeqDatabaseParser",
+      "old_parser" : "RefSeqGPFFParser",
+      "file" : "https://ftp.ncbi.nlm.nih.gov/refseq/release/complete/complete.*rna.gbff.gz",
+      "method" : "--bestn 5",
+      "query_cutoff" : 90,
+      "target_cutoff" : 90,
+      "release" : "https://ftp.ncbi.nlm.nih.gov/refseq/release/release-notes/RefSeq-release*.txt",
+      "preparse" : 1,
+      "priority" : 2,
+      "release_number" : "https://ftp.ncbi.nlm.nih.gov/refseq/release/RELEASE_NUMBER",
+      "catalog" : "https://ftp.ncbi.nlm.nih.gov/refseq/release/release-catalog/release*.files.installed"
+    },
+    {
+      "name" : "RefSeq_peptide",
+      "parser" : "RefSeqDatabaseParser",
+      "old_parser" : "RefSeqGPFFParser",
+      "file" : "https://ftp.ncbi.nlm.nih.gov/refseq/release/complete/complete.*.protein.gpff.gz",
+      "method" : "--bestn 1",
+      "query_cutoff" : 100,
+      "target_cutoff" : 100,
+      "release" : "https://ftp.ncbi.nlm.nih.gov/refseq/release/release-notes/RefSeq-release*.txt",
+      "preparse" : 1,
+      "priority" : 3,
+      "release_number" : "https://ftp.ncbi.nlm.nih.gov/refseq/release/RELEASE_NUMBER",
+      "catalog" : "https://ftp.ncbi.nlm.nih.gov/refseq/release/release-catalog/release*.files.installed"
+    },
+    {
+      "name" : "Refseq_import",
+      "parser" : "RefSeqCoordinateParser",
+      "file" : "Database",
+      "db" : "otherfeatures",
+      "priority" : 2
+    },
+    {
+      "name" : "UCSC_hg38",
+      "parser" : "UCSCParser",
+      "file" : "https://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/knownGene.txt.gz",
+      "release" : "https://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/README.txt",
+      "priority" : 1
+    },
+    {
+      "name" : "UCSC_mm10",
+      "parser" : "UCSCParser",
+      "file" : "https://hgdownload.cse.ucsc.edu/goldenPath/mm10/database/knownGene.txt.gz",
+      "release" : "https://hgdownload.cse.ucsc.edu/goldenPath/mm10/database/README.txt",
+      "priority" : 1
+    },
+    {
+      "name" : "Uniprot/SWISSPROT",
+      "parser" : "UniProtDatabaseParser",
+      "old_parser" : "UniProtParser",
+      "file" : "https://ftp.ebi.ac.uk/pub/databases/uniprot/knowledgebase/uniprot_sprot.dat.gz",
+      "method" : "--bestn 1",
+      "query_cutoff" : 100,
+      "target_cutoff" : 100,
+      "preparse" : 1,
+      "release" : "https://ftp.ebi.ac.uk/pub/databases/uniprot/knowledgebase/reldate.txt",
+      "priority" : 1
+    },
+    {
+      "name" : "Uniprot/SPTREMBL",
+      "parser" : "UniProtDatabaseParser",
+      "old_parser" : "UniProtParser",
+      "file" : "https://ftp.ebi.ac.uk/pub/databases/uniprot/knowledgebase/uniprot_trembl.dat.gz",
+      "method" : "--bestn 1",
+      "query_cutoff" : 100,
+      "target_cutoff" : 100,
+      "preparse" : 1,
+      "release" : "https://ftp.ebi.ac.uk/pub/databases/uniprot/knowledgebase/reldate.txt",
+      "priority" : 1
+    },
+    {
+      "name" : "VGNC",
+      "parser" : "VGNCParser",
+      "file" : "https://ftp.ebi.ac.uk/pub/databases/genenames/vgnc/tsv/vgnc_gene_set_All.txt.gz",
+      "priority" : 1
+    },
+    {
+      "name" : "ZFIN_ID",
+      "parser" : "ZFINParser",
+      "file" : "https://zfin.org/data_transfer/Downloads/refseq.txt",
+      "priority" : 3
+    },
+    {
+      "name" : "ZFIN_ID",
+      "parser" : "ZFINParser",
+      "file" : "https://zfin.org/data_transfer/Downloads/uniprot.txt",
+      "priority" : 2
+    },
+    {
+      "name" : "ZFIN_ID",
+      "parser" : "ZFINParser",
+      "file" : "https://zfin.org/data_transfer/Downloads/aliases.txt",
+      "priority" : 2
+    },
+    {
+      "name" : "ZFIN_ID",
+      "parser" : "ZFINParser",
+      "file" : "https://zfin.org/data_transfer/Downloads/gene_seq.txt",
+      "priority" : 1
+    },
+    {
+      "name" : "ZFIN_desc",
+      "parser" : "ZFINDescParser",
+      "file" : "ftp://zfin.org/pub/transfer/MEOW/zfin_genes.txt",
+      "priority" : 1
+    },
+    {
+      "name" : "cint_jgi_v1",
+      "parser" : "JGI_ProteinParser",
+      "file" : "https://ftp.ensembl.org/pub/misc/cint_jgi/v1/ciona.prot.fasta.gz",
+      "priority" : 1
+    },
+    {
+      "name" : "Xenbase",
+      "parser" : "XenopusJamboreeParser",
+      "file" : "http://ftp.xenbase.org/pub/GenePageReports/GenePageEnsemblModelMapping.txt",
+      "priority" : 1
+    },
+    {
+      "name" : "miRBase",
+      "parser" : "miRBaseParser",
+      "file" : "https://mirbase.org/download/miRNA.dat",
+      "method" : "--bestn 1",
+      "query_cutoff" : 90,
+      "target_cutoff" : 90,
+      "priority" : 1
+    },
+    {
+      "name" : "HGNC",
+      "parser" : "HGNCParser",
+      "file" : "https://www.genenames.org/cgi-bin/download/custom?col=gd_hgnc_id&col=gd_app_sym&col=gd_app_name&col=gd_prev_sym&col=gd_aliases&col=gd_pub_eg_id&col=gd_pub_ensembl_id&col=gd_pub_refseq_ids&col=gd_ccds_ids&col=gd_lsdb_links&status=Approved&status_opt=2&where=&order_by=gd_app_sym_sort&format=text&limit=&hgnc_dbtag=on&submit=submit",
+      "db" : "ccds",
+      "priority" : 3
+    }
+]
diff --git a/src/python/ensembl/production/xrefs/config/xref_config.ini b/src/python/ensembl/production/xrefs/config/xref_config.ini
new file mode 100644
index 000000000..5a4830d52
--- /dev/null
+++ b/src/python/ensembl/production/xrefs/config/xref_config.ini
@@ -0,0 +1,1680 @@
+##########################################################################
+# SOURCES                                                                #
+#                                                                        #
+# Keys:                                                                  #
+#   name          - name of this source (required)                       #
+#   order         - parsing order for this source (required)             #
+#   priority      - priority of these data files when more files belong  #
+#                   to the same source 'name' (required)                 #
+#   prio_descr    - label for the 'priority' (optional)                  #
+#   parser        - the parser to be used (required)                     #
+#   dependent_on  - Comma separated list of sources which must be loaded #
+#                   first (optional)                                     #
+#                   Note that if species does not have xrefs from a      #
+#                   master source specified in this list than the        #
+#                   dependency is ignored                                #
+#                                                                        #
+##########################################################################
+
+[source EC_NUMBER::saccharomyces_cerevisiae]
+# Used by S.cerevisiae
+name            = EC_NUMBER
+order           = 50
+priority        = 70
+parser          = UniProtParser
+
+[source BioGRID::drosophila_melanogaster]
+# Used by drosophila_melanogaster
+name            = BioGRID
+order           = 50
+priority        = 1
+parser          = FlybaseParser
+
+[source EPD::drosophila_melanogaster]
+# Used by the 12 drosophila genomes
+name            = EPD
+order           = 50
+priority        = 1
+parser          = FlybaseParser
+
+[source FlyExpress::drosophila_melanogaster]
+# Used by drosophila_melanogaster
+name            = FlyExpress
+order           = 50
+priority        = 1
+parser          = FlybaseParser
+
+[source FlyReactome::drosophila_melanogaster]
+# Used by drosophila_melanogaster
+name            = FlyReactome
+order           = 50
+priority        = 1
+parser          = FlybaseParser
+
+[source GenomeRNAi::drosophila_melanogaster]
+# Used by drosophila_melanogaster
+name            = GenomeRNAi
+order           = 50
+priority        = 1
+parser          = FlybaseParser
+
+[source InteractiveFly::drosophila_melanogaster]
+# Used by drosophila_melanogaster
+name            = InteractiveFly
+order           = 50
+priority        = 1
+parser          = FlybaseParser
+
+[source miRBase::drosophila_melanogaster]
+# Used by the 12 drosophila genomes
+name            = miRBase
+order           = 50
+priority        = 1
+parser          = FlybaseParser
+
+[source MitoDrome::drosophila_melanogaster]
+# Used by drosophila_melanogaster
+name            = MitoDrome
+order           = 50
+priority        = 1
+parser          = FlybaseParser
+
+[source TransFac::drosophila_melanogaster]
+# Used by the 12 drosophila genomes
+name            = TransFac
+order           = 50
+priority        = 1
+parser          = FlybaseParser
+
+[source TransFac::drosophila_pseudoobscura]
+# Used by the 12 drosophila genomes
+name            = TransFac
+order           = 50
+priority        = 1
+parser          = FlybaseParser
+
+[source flybase_annotation_id::drosophila_melanogaster]
+# Used by drosophila_melanogaster
+name            = flybase_annotation_id
+order           = 50
+priority        = 1
+prio_descr      = Annotation ID assigned by FlyBase
+parser          = FlybaseParser
+
+[source flybase_gene_id::drosophila_melanogaster]
+# Used by drosophila_melanogaster
+name            = flybase_gene_id
+order           = 75
+priority        = 1
+prio_descr      = ID assigned by FlyBase
+parser          = FlybaseParser
+dependent_on    = Uniprot/SPTREMBL,Uniprot/SWISSPROT
+
+[source flybase_gene_id::drosophila_pseudoobscura]
+# Used by drosophila_pseudoobscura
+name            = flybase_gene_id
+order           = 100
+priority        = 1
+prio_descr      = ID assigned by FlyBase
+parser          = FlybaseParser
+dependent_on    = Uniprot/SPTREMBL,Uniprot/SWISSPROT
+
+[source flybase_gene_id::drosophila_grimshawi]
+# Used by drosophila_grimshawi
+name            = flybase_gene_id
+order           = 100
+priority        = 1
+prio_descr      = ID assigned by FlyBase
+parser          = FlybaseParser
+dependent_on    = Uniprot/SPTREMBL,Uniprot/SWISSPROT
+
+[source flybase_gene_id::drosophila_willistoni]
+# Used by drosophila_willistoni
+name            = flybase_gene_id
+order           = 100
+priority        = 1
+prio_descr      = ID assigned by FlyBase
+parser          = FlybaseParser
+dependent_on    = Uniprot/SPTREMBL,Uniprot/SWISSPROT
+
+[source flybase_gene_id::drosophila_ananassae]
+# Used by drosophila_ananassae
+name            = flybase_gene_id
+order           = 100
+priority        = 1
+prio_descr      = ID assigned by FlyBase
+parser          = FlybaseParser
+dependent_on    = Uniprot/SPTREMBL,Uniprot/SWISSPROT
+
+[source flybase_gene_id::drosophila_yakuba]
+# Used by drosophila_yakuba
+name            = flybase_gene_id
+order           = 100
+priority        = 1
+prio_descr      = ID assigned by FlyBase
+parser          = FlybaseParser
+dependent_on    = Uniprot/SPTREMBL,Uniprot/SWISSPROT
+
+[source flybase_gene_id::drosophila_simulans]
+# Used by drosophila_simulans
+name            = flybase_gene_id
+order           = 100
+priority        = 1
+prio_descr      = ID assigned by FlyBase
+parser          = FlybaseParser
+dependent_on    = Uniprot/SPTREMBL,Uniprot/SWISSPROT
+
+[source flybase_gene_id::drosophila_sechellia]
+# Used by drosophila_sechellia
+name            = flybase_gene_id
+order           = 100
+priority        = 1
+prio_descr      = ID assigned by FlyBase
+parser          = FlybaseParser
+dependent_on    = Uniprot/SPTREMBL,Uniprot/SWISSPROT
+
+[source flybase_gene_id::drosophila_erecta]
+# Used by drosophila_erecta
+name            = flybase_gene_id
+order           = 100
+priority        = 1
+prio_descr      = ID assigned by FlyBase
+parser          = FlybaseParser
+dependent_on    = Uniprot/SPTREMBL,Uniprot/SWISSPROT
+
+[source flybase_gene_id::drosophila_persimilis]
+# Used by drosophila_persimilis
+name            = flybase_gene_id
+order           = 100
+priority        = 1
+prio_descr      = ID assigned by FlyBase
+parser          = FlybaseParser
+dependent_on    = Uniprot/SPTREMBL,Uniprot/SWISSPROT
+
+[source flybase_gene_id::drosophila_mojavensis]
+# Used by drosophila_mojavensis
+name            = flybase_gene_id
+order           = 100
+priority        = 1
+prio_descr      = ID assigned by FlyBase
+parser          = FlybaseParser
+dependent_on    = Uniprot/SPTREMBL,Uniprot/SWISSPROT
+
+[source flybase_gene_id::drosophila_virilis]
+# Used by drosophila_virilis
+name            = flybase_gene_id
+order           = 100
+priority        = 1
+prio_descr      = ID assigned by FlyBase
+parser          = FlybaseParser
+dependent_on    = Uniprot/SPTREMBL,Uniprot/SWISSPROT
+
+[source flybase_transcript_id::drosophila_melanogaster]
+# Used by drosophila_melanogaster
+name            = flybase_transcript_id
+order           = 50
+priority        = 1
+prio_descr      = ID assigned by FlyBase
+parser          = FlybaseParser
+
+[source flybase_translation_id::drosophila_melanogaster]
+# Used by drosophila_melanogaster
+name            = flybase_translation_id
+order           = 50
+priority        = 1
+prio_descr      = ID assigned by FlyBase
+parser          = FlybaseParser
+
+[source FlyBaseCGID_gene::drosophila_melanogaster]
+# Used by drosophila_melanogaster
+name            = FlyBaseCGID_gene
+order           = 50
+priority        = 1
+prio_descr      = FlyBase_Annotation_IDs
+parser          = FlybaseParser
+
+[source FlyBaseCGID_transcript::drosophila_melanogaster]
+# Used by drosophila_melanogaster
+name            = FlyBaseCGID_transcript
+order           = 50
+priority        = 1
+prio_descr      = FlyBase_Annotation_IDs
+parser          = FlybaseParser
+
+[source FlyBaseCGID_translation::drosophila_melanogaster]
+# Used by drosophila_melanogaster
+name            = FlyBaseCGID_translation
+order           = 50
+priority        = 1
+prio_descr      = FlyBase_Annotation_IDs
+parser          = FlybaseParser
+
+[source FlyBaseName_gene::drosophila_melanogaster]
+# Used by drosophila_melanogaster
+name            = FlyBaseName_gene
+order           = 50
+priority        = 1
+prio_descr      = Name assigned to gene in FlyBase gff
+parser          = FlybaseParser
+
+[source FlyBaseName_transcript::drosophila_melanogaster]
+# Used by drosophila_melanogaster
+name            = FlyBaseName_transcript
+order           = 50
+priority        = 1
+prio_descr      = Transcript name in FlyBase gff
+parser          = FlybaseParser
+
+[source FlyBaseName_translations::drosophila_melanogaster]
+# Used by drosophila_melanogaster
+name            = FlyBaseName_translation
+order           = 50
+priority        = 1
+prio_descr      = Translation name in FlyBase gff
+parser          = FlybaseParser
+
+[source PHIbase::MULTI]
+name            = PHIbase
+order           = 50
+priority        = 1
+parser          = PHIbaseParser
+dependent_on    = Uniprot/SWISSPROT,Uniprot/SPTREMBL
+
+[source ArrayExpress::MULTI]
+# Used by all ensembl species
+name            = ArrayExpress
+order           = 50
+priority        = 1
+parser          = ArrayExpressParser
+
+[source ArrayExpress::EG]
+name            = ArrayExpress
+order           = 50
+priority        = 1
+parser          = ArrayExpressParser
+
+[source CCDS::homo_sapiens]
+# Used by homo_sapiens
+name            = CCDS
+order           = 10
+priority        = 1
+parser          = CCDSParser
+
+[source CCDS::mus_musculus]
+# Used by mus_musculus
+name            = CCDS
+order           = 10
+priority        = 1
+parser          = CCDSParser
+
+[source DBASS5::homo_sapiens]
+# Used by homo_sapiens
+name            = DBASS5
+order           = 50
+priority        = 1
+prio_descr      = Database of aberrant 5\' splice sites.
+parser          = DBASSParser
+
+[source DBASS3::homo_sapiens]
+# Used by homo_sapiens
+name            = DBASS3
+order           = 50
+priority        = 1
+prio_descr      = Database of aberrant 3\' splice sites.
+parser          = DBASSParser
+
+[source EntrezGene::MULTI]
+# Used by aedes_aegypti, anolis_carolinensis, anopheles_gambiae, acyrthosiphon_pisum, apis_mellifera, bos_taurus, caenorhabditis_elegans, canis_familiaris, cavia_porcellus, ciona_intestinalis, ciona_savignyi, danio_rerio, dasypus_novemcinctus, drosophila_melanogaster, drosophila_pseudoobscura, echinops_telfairi, erinaceus_europaeus, equus_caballus, felis_catus, ficedula_albicollis, gallus_gallus, gasterosteus_aculeatus, gorilla_gorilla, homo_sapiens, ixodes_scappularis, loxodonta_africana, macaca_mulatta, monodelphis_domestica, mus_musculus, myotis_lucifugus, ochotona_princeps, oryctolagus_cuniculus, oryzias_latipes, pan_troglodytes, pongo_abelii,  rattus_norvegicus, saccharomyces_cerevisiae, ictidomys_tridecemlineatus, sus_scrofa, taeniopygia_guttata, takifugu_rubripes, tupaia_belangeri, xenopus_tropicalis,phaeodactylum_tricornutum,thalassiosira_pseudonana, lepisosteus_oculatus
+name            = EntrezGene
+order           = 10
+priority        = 1
+parser          = EntrezGeneParser
+
+[source EntrezGene_trans_name]
+# Used by homo_sapiens,mus_musculus,danio_rerio,sus_scrofa
+name            = EntrezGene_trans_name
+order           = 70
+priority        = 1
+parser          = comes via official naming
+
+[source WikiGene::MULTI]
+# used via the EntrezGeneParser, for all species
+name            = WikiGene
+order           = 100
+priority        = 1
+parser          = EntrezGeneParser
+
+[source HPA::homo_sapiens]
+# Used by homo_sapiens
+name            = HPA
+order           = 50
+priority        = 1
+prio_descr      = Human Protein Atlas (HPA) database
+parser          = HPAParser
+
+[source LRG_HGNC_notransfer]
+name            = LRG_HGNC_notransfer
+order           = 30
+priority        = 5
+parser          = HGNCParser
+
+[source VGNC::vertebrate]
+name            = VGNC
+order           = 29
+priority        = 1
+parser          = VGNCParser
+
+[source HGNC::homo_sapiens#07]
+# used by #02
+name            = HGNC
+order           = 30
+priority        = 1
+prio_descr      = ensembl_manual
+parser          = HGNCParser
+
+[source HGNC::homo_sapiens#01]
+# Used by homo_sapiens
+name            = HGNC
+order           = 30
+priority        = 2
+prio_descr      = ccds
+parser          = HGNCParser
+
+
+[source HGNC::homo_sapiens#02]
+# Used by homo_sapiens
+name            = HGNC
+order           = 29
+priority        = 4
+prio_descr      = entrezgene_manual
+parser          = HGNCParser
+dependent_on    = EntrezGene,Uniprot/SWISSPROT,RefSeq_dna,RefSeq_peptide
+
+
+[source HGNC::homo_sapiens#03]
+# Used by #02
+name            = HGNC
+order           = 30
+priority        = 5
+prio_descr      = refseq_manual
+parser          = HGNCParser
+
+[source HGNC::homo_sapiens#08]
+# used by #02
+name            = HGNC
+order           = 30
+priority        = 100
+prio_descr      = desc_only
+parser          = HGNCParser
+
+[source MIM_GENE::homo_sapiens]
+# MIM parse loads data as MIM_GENE or MIM_MORBID not as MIM
+name            = MIM_GENE
+order           = 40
+priority        = 1
+parser          = MIMParser
+
+[source MIM_MORBID::homo_sapiens]
+# MIM parse loads data as MIM_GENE or MIM_MORBID not as MIM
+name            = MIM_MORBID
+order           = 40
+priority        = 1
+parser          = MIMParser
+
+[source MIM::homo_sapiens]
+# Used by homo_sapiens
+name            = MIM
+order           = 10
+priority        = 1
+parser          = MIMParser
+
+[source MIM2GENE::homo_sapiens]
+# Used by homo_sapiens
+name            = MIM2GENE
+order           = 60
+priority        = 1
+parser          = Mim2GeneParser
+dependent_on    = MIM,EntrezGene
+
+[source GeneCards::homo_sapiens]
+# used via the HGNCParser, for homo_sapiens
+name            = GeneCards
+order           = 100
+priority        = 1
+parser          = HGNCParser
+
+[source MGI::mus_musculus#01]
+# Used by mus_musculus
+name            = MGI
+order           = 30
+priority        = 1
+prio_descr      = official
+parser          = MGIParser
+
+[source MGI::mus_musculus#05]
+# Used by mus_musculus
+name            = MGI
+order           = 1
+priority        = 10
+prio_descr      = descriptions
+parser          = MGI_Desc_Parser
+
+[source Reactome::MULTI]
+# Used by all species
+name            = Reactome
+order           = 80
+priority        = 1
+prio_descr      = direct
+parser          = ReactomeParser
+
+[source Reactome_transcript::MULTI]
+# Species source used in ReactomeParser. No species uses this source
+name            = Reactome_transcript
+order           = 20
+priority        = 1
+prio_descr      = transcript
+parser          = ReactomeParser
+
+[source Reactome_gene::MULTI]
+# Species source used in ReactomeParser. No species uses this source
+name            = Reactome_gene
+order           = 20
+priority        = 1
+prio_descr      = gene
+parser          = ReactomeParser
+
+[source Reactome::MULTI-Uniprot]
+# Special source used in ReactomeParser.  No species uses this source.
+name            = Reactome
+order           = 20
+priority        = 1
+prio_descr      = uniprot
+parser          = ReactomeParser
+
+[source RGD::rattus_norvegicus]
+# Used by rattus_norvegicus
+name            = RGD
+order           = 30
+priority        = 2
+parser          = RGDParser
+dependent_on    = RefSeq_dna,RefSeq_peptide
+
+[source RGD::rattus_norvegicus#02]
+# Used by rattus_norvegicus
+name            = RGD
+order           = 30
+priority        = 1
+prio_descr      = direct_xref
+parser          = done_in_RGDParser
+
+[source RGD_trans_name]
+name            = RGD_trans_name
+order           = 49
+priority        = 1
+parser          = done_in_official_naming
+
+[source RefSeq_dna::MULTI-vertebrate]
+# Used by vertebrates
+name            = RefSeq_dna
+order           = 15
+priority        = 2
+prio_descr      = refseq
+parser          = RefSeqDatabaseParser
+old_parser      = RefSeqGPFFParser
+
+[source RefSeq_dna::gencode]
+# Used by human and mouse
+name            = RefSeq_dna
+order           = 15
+priority        = 2
+prio_descr      = refseq
+parser          = RefSeqGPFFParser
+
+[source RefSeq_dna::MULTI-fungi]
+# Used by saccharomyces_cerevisiae
+name            = RefSeq_dna
+order           = 15
+priority        = 2
+prio_descr      = refseq
+parser          = RefSeqParser
+
+[source RefSeq_dna::MULTI-Plants]
+name            = RefSeq_dna
+order           = 15
+priority        = 2
+prio_descr      = refseq
+parser          = RefSeqDatabaseParser
+old_parser      = RefSeqGPFFParser
+
+[source RefSeq_dna::MULTI-complete]
+# Used by phaeodactylum_tricornutum
+name            = RefSeq_dna
+order           = 15
+priority        = 2
+prio_descr      = refseq
+parser          = RefSeqParser
+
+[source RefSeq_dna::MULTI-protozoa]
+# Used by dictyostelium_discoideum
+name            = RefSeq_dna
+order           = 15
+priority        = 2
+prio_descr      = refseq
+parser          = RefSeqParser
+
+[source RefSeq_dna::MULTI-invertebrate]
+# Used by drosophila_melanogaster
+name            = RefSeq_dna
+order           = 15
+priority        = 1
+prio_descr      = refseq
+parser          = RefSeqParser
+
+[source RefSeq_dna::MULTI-predicted]
+# Special source used in RefSeqParser.  No species uses this source.
+name            = RefSeq_dna_predicted
+order           = 20
+priority        = 1
+prio_descr      = refseq
+parser          = RefSeqParser
+
+[source RefSeq_mRNA::MULTI]
+# Special source used in RefSeqParser.  No species uses this source.
+# dependent source is used to provide a link between RefSeq_dna source in SubmitMapper
+name            = RefSeq_mRNA
+order           = 15
+priority        = 3
+prio_descr      = refseq
+parser          = RefSeqParser
+
+[source RefSeq_mRNA::otherfeatures]
+# Special source used in RefSeqCoordinateParser.  No species uses this source.
+name            = RefSeq_mRNA
+order           = 15
+priority        = 1
+prio_descr      = otherfeatures
+parser          = RefSeqCoordinateParser
+
+[source RefSeq_peptide::otherfeatures]
+# Special source used in RefSeqCoordinateParser.  No species uses this source.
+name            = RefSeq_peptide
+order           = 15
+priority        = 1
+prio_descr      = otherfeatures
+parser          = RefSeqCoordinateParser
+
+[source RefSeq_mRNA_predicted::otherfeatures]
+# Special source used in RefSeqCoordinateParser.  No species uses this source.
+name            = RefSeq_mRNA_predicted
+order           = 20
+priority        = 1
+prio_descr      = otherfeatures
+parser          = RefSeqCoordinateParser
+
+[source RefSeq_peptide_predicted::otherfeatures]
+# Special source used in RefSeqCoordinateParser.  No species uses this source.
+name            = RefSeq_peptide_predicted
+order           = 20
+priority        = 1
+prio_descr      = otherfeatures
+parser          = RefSeqCoordinateParser
+
+[source RefSeq_import::otherfeatures]
+# Import RefSeq models from otherfeatures database
+# Used for human and mouse
+name            = RefSeq_import
+order           = 20
+priority        = 1
+prio_descr      = otherfeatures
+parser          = RefSeqCoordinateParser
+
+[source RefSeq_ncRNA::MULTI]
+# Special source used in RefSeqParser.  No species uses this source.
+name            = RefSeq_ncRNA
+order           = 15
+priority        = 2
+prio_descr      = refseq
+parser          = RefSeqParser
+
+[source RefSeq_ncRNA::otherfeatures]
+# Special source used in RefSeqCoordinateParser.  No species uses this source.
+name            = RefSeq_ncRNA
+order           = 15
+priority        = 1
+prio_descr      = otherfeatures
+parser          = RefSeqCoordinateParser
+
+[source RefSeq_ncRNA_predicted::otherfeatures]
+# Special source used in RefSeqCoordinateParser.  No species uses this source.
+name            = RefSeq_ncRNA_predicted
+order           = 20
+priority        = 1
+prio_descr      = otherfeatures
+parser          = RefSeqCoordinateParser
+
+[source RefSeq_mRNA_predicted::MULTI]
+# Special source used in RefSeqParser.  No species uses this source.
+name            = RefSeq_mRNA_predicted
+order           = 20
+priority        = 2
+prio_descr      = refseq
+parser          = RefSeqParser
+
+[source RefSeq_ncRNA_predicted::MULTI]
+# Special source used in RefSeqParser.  No species uses this source.
+name            = RefSeq_ncRNA_predicted
+order           = 20
+priority        = 1
+prio_descr      = refseq
+parser          = RefSeqParser
+
+[source RefSeq_peptide::MULTI]
+name            = RefSeq_peptide
+order           = 30
+priority        = 2
+parser          = RefSeqGPFFParser
+
+[source RefSeq_peptide::gencode]
+name            = RefSeq_peptide
+order           = 30
+priority        = 2
+parser          = RefSeqGPFFParser
+
+[source RefSeq_peptide::MULTI-fungi]
+# Used by saccharomyces_cerevisiae
+name            = RefSeq_peptide
+order           = 25
+priority        = 2
+parser          = RefSeqGPFFParser
+
+[source RefSeq_peptide::MULTI-Plants]
+name            = RefSeq_peptide
+order           = 25
+priority        = 2
+parser          = RefSeqGPFFParser
+
+[source RefSeq_peptide::MULTI-complete]
+# Used by phaeodactylum_tricornutum
+name            = RefSeq_peptide
+order           = 25
+priority        = 2
+parser          = RefSeqGPFFParser
+
+[source RefSeq_peptide::MULTI-protozoa]
+# Used by dictyostelium_discoideum
+name            = RefSeq_peptide
+order           = 25
+priority        = 2
+parser          = RefSeqGPFFParser
+
+[source RefSeq_peptide::MULTI-invertebrate]
+# Used by caenorhabditis_elegans, ciona_savignyi, drosophila_melanogaster
+name            = RefSeq_peptide
+order           = 25
+priority        = 2
+parser          = RefSeqGPFFParser
+
+[source RefSeq_peptide_predicted::MULTI]
+# Special source used in RefSeqGPFFParser.  No species uses this source.
+name            = RefSeq_peptide_predicted
+order           = 30
+priority        = 2
+prio_descr      = refseq
+parser          = RefSeqGPFFParser
+
+[source RefSeq_peptide::MULTI-vertebrate]
+# Used by vertebrates
+name            = RefSeq_peptide
+order           = 25
+priority        = 2
+prio_descr      = refseq
+parser          = RefSeqDatabaseParser
+old_parser      = RefSeqGPFFParser
+
+[source SGD_GENE::saccharomyces_cerevisiae]
+# Used by saccharomyces_cerevisiae
+name            = SGD_GENE
+order           = 10
+priority        = 1
+parser          = SGDParser
+
+[source SGD_TRANSLATION::saccharomyces_cerevisiae]
+# Used by saccharomyces_cerevisiae
+name            = SGD_TRANSLATION
+order           = 10
+priority        = 1
+parser          = SGDParser
+
+[source SGD::saccharomyces_cerevisiae]
+# Used by saccharomyces_cerevisiae
+name            = SGD
+order           = 10
+priority        = 1
+parser          = SGDParser
+
+[source PomBase_GENE::schizosaccharomyces_pombe]
+# Used by schizosaccharomyces_pombe
+name            = PomBase_GENE
+order           = 10
+priority        = 1
+parser          = PomBaseParser
+
+[source PomBase_TRANSCRIPT::schizosaccharomyces_pombe]
+# Used by schizosaccharomyces_pombe
+name            = PomBase_TRANSCRIPT
+order           = 10
+priority        = 1
+parser          = PomBaseParser
+
+[source PomBase::schizosaccharomyces_pombe]
+# Used by schizosaccharomyces_pombe
+name            = PomBase
+order           = 10
+priority        = 1
+parser          = PomBaseParser
+
+[source PGSC_GENE::solanum_tuberosum]
+# Used by solanum_tuberosum
+name            = PGSC_GENE
+order           = 10
+priority        = 1
+parser          = PGSCParser
+
+[source PHYTOZOME_GMAX_GENE::glycine_max]
+# Used by glycine_max
+name            = PHYTOZOME_GMAX_GENE
+order           = 10
+priority        = 1
+parser          = PhytozomeGmaxParser
+
+[source UCSC::MULTI]
+# Special source used in UCSCParser.  No species uses this source.
+name            = UCSC
+order           = 70
+priority        = 1
+prio_descr      =
+parser          = UCSCParser
+
+[source UCSC::homo_sapiens]
+# Used by homo_sapiens
+name            = UCSC_hg38
+order           = 70
+priority        = 1
+parser          = UCSC_human_parser
+
+[source UCSC::mus_musculus]
+# Used by mus_musculus
+name            = UCSC_mm10
+order           = 70
+priority        = 1
+parser          = UCSC_mouse_parser
+
+[source Uniprot/SPTREMBL::MULTI-invertebrate]
+name            = Uniprot/SPTREMBL
+order           = 20
+priority        = 3
+parser          = UniProtParser
+dependent_on    = MIM
+
+[source Uniprot/SPTREMBL::MULTI]
+# Used by vertebrates
+name            = Uniprot/SPTREMBL
+order           = 20
+priority        = 3
+prio_descr      = sequence_mapped
+parser          = UniProtDatabaseParser
+old_parser      = UniProtParser
+dependent_on    = MIM
+
+[source Uniprot/SPTREMBL::gencode]
+# Used by human and mouse
+name            = Uniprot/SPTREMBL
+order           = 20
+priority        = 3
+prio_descr      = sequence_mapped
+parser          = UniProtParser
+dependent_on    = MIM
+
+[source Uniprot/SPTREMBL::MULTI-evidence_gt_2]
+# Additional source for entires with evidence at protein level > 2 (numerically) for Uniprot/SPTREMBL::MULTI
+# These are not taken into account when deriving display xrefs or assigning gene status
+name            = Uniprot/SPTREMBL
+order           = 20
+priority        = 10
+prio_descr      = protein_evidence_gt_2
+parser          = UniProtParser
+status          = LOWEVIDENCE
+
+[source Uniprot/SWISSPROT::MULTI]
+# Used by vertebrates
+name            = Uniprot/SWISSPROT
+order           = 20
+priority        = 3
+prio_descr      = sequence_mapped
+parser          = UniProtDatabaseParser
+old_parser      = UniProtParser
+dependent_on    = MIM
+
+[source Uniprot/SWISSPROT::gencode]
+# Used by human and mouse
+name            = Uniprot/SWISSPROT
+order           = 20
+priority        = 3
+prio_descr      = sequence_mapped
+parser          = UniProtParser
+dependent_on    = MIM
+
+[source Uniprot/SWISSPROT::MULTI-invertebrate]
+name            = Uniprot/SWISSPROT
+order           = 20
+priority        = 3
+prio_descr      = sequence_mapped
+parser          = UniProtParser
+dependent_on    = MIM
+
+[source Uniprot/SWISSPROT::DIRECT]
+# Special source used in UniProtParser for direct mappings from Uniprot
+name            = Uniprot/SWISSPROT
+order           = 22
+priority        = 1
+prio_descr      = direct
+parser          = UniProtParser
+
+[source Uniprot/SPTREMBL::DIRECT]
+# Special source used in UniProtParser for direct mappings from Uniprot
+name            = Uniprot/SPTREMBL
+order           = 22
+priority        = 1
+prio_descr      = direct
+parser          = UniProtParser
+
+[source Uniprot_gn]
+# Special source used in UniProtParser foir gene names..
+name            = Uniprot_gn
+order           = 20
+priority        = 1
+parser          = UniProtParser
+
+[source Uniprot_isoform]
+# Special source used in UniProtParser for protein isoforms
+name            = Uniprot_isoform
+order           = 30
+priority        = 1
+parser          = UniProtParser
+
+[source UniProt::protein_id]
+# Special source used in UniProtParser.  No species uses this source.
+name            = protein_id
+order           = 20
+priority        = 1
+parser          = UniProtParser
+
+[source UniProt::PDB]
+# Special source used in UniProtParser.  No species uses this source.
+name            = PDB
+order           = 20
+priority        = 1
+parser          = UniProtParser
+
+[source UniProt::MEROPS]
+# Special source used in UniProtParser.  No species uses this source.
+name            = MEROPS
+order           = 20
+priority        = 1
+parser          = UniProtParser
+
+[source UniProt::EMBL]
+# Special source used in UniProtParser.  No species uses this source.
+name            = EMBL
+order           = 20
+priority        = 1
+parser          = UniProtParser
+
+[source UniProt::ChEMBL]
+# Special source used in UniProtParser.  No species uses this source.
+name            = ChEMBL
+order           = 20
+priority        = 1
+parser          = UniProtParser
+
+[source UniParc::MULTI]
+name        = UniParc
+order       = 20
+priority    = 1
+parser      = ChecksumParser
+
+[source RNACentral::MULTI]
+name        = RNAcentral
+order       = 1
+priority    = 1
+parser      = ChecksumParser
+
+[source PIGGY_trans_name]
+name            = PIGGY_trans_name
+order           = 49
+priority        = 1
+parser          = done_in_official_naming
+
+[source HGNC_trans_name]
+name            = HGNC_trans_name
+order           = 49
+priority        = 1
+parser          = done_in_official_naming
+
+[source VGNC_trans_name]
+name            = VGNC_trans_name
+order           = 49
+priority        = 1
+parser          = done_in_official_naming
+
+[source MGI_automatic_transcript::mus_musculus]
+name            = MGI_automatic_transcript_notransfer
+order           = 49
+priority        = 1
+parser          = done_in_official_naming
+
+[source MGI_trans_name]
+# Used homo_sapiens,mus_musculus
+name            = MGI_trans_name
+order           = 70
+priority        = 1
+parser          = comes via official naming
+
+[source Clone_based_ensembl_transcript::homo_sapiens]
+name            = Clone_based_ensembl_transcript
+order           = 50
+priority        = 1
+parser          = done_in_official_naming
+
+[source Clone_based_ensembl_gene::homo_sapiens]
+name            = Clone_based_ensembl_gene
+order           = 50
+priority        = 1
+parser          = done_in_official_naming
+
+[source Xenopus_Jamboree::xenopus_tropicalis]
+# Used by xenopus_tropicalis
+name            = Xenbase
+order           = 20
+priority        = 1
+parser          = XenopusJamboreeParser
+
+[source ZFIN_ID::danio_rerio#01]
+# Used by danio_rerio
+name            = ZFIN_ID
+order           = 31
+priority        = 1
+prio_descr      = uniprot/refseq
+parser          = ZFINParser
+
+[source ZFIN_ID::danio_rerio#03]
+# Used by danio_rerio
+name            = ZFIN_ID
+order           = 1
+priority        = 10
+prio_descr      = description_only
+parser          = ZFINDescParser
+
+[source ZFIN_ID_trans_name]
+name            = ZFIN_ID_trans_name
+order           = 49
+priority        = 1
+parser          = done_in_official_naming
+
+[source cint_jgi_v1::ciona_intestinalis]
+# Used by ciona_intestinalis
+name            = cint_jgi_v1
+order           = 50
+priority        = 1
+parser          = JGI_ProteinParser
+
+[source RFAM::MULTI]
+# Used by bos_taurus, canis_familiaris, ciona_intestinalis, ciona_savignyi, danio_rerio, dasypus_novemcinctus, drosophila_pseudoobscura, erinaceus_europaeus, ficedula_albicollis, gallus_gallus, gasterosteus_aculeatus, homo_sapiens, loxodonta_africana, macaca_mulatta, monodelphis_domestica, mus_musculus, myotis_lucifugus, oryctolagus_cuniculus, oryzias_latipes, pan_troglodytes,pongo_abelii, rattus_norvegicus, ictidomys_tridecemlineatus, takifugu_rubripes, tupaia_belangeri, xenopus_tropicalis, ornithorhynchus_anatinus
+name            = RFAM
+order           = 70
+priority        = 1
+parser          = RFAMParser
+
+[source RFAM::EG]
+name            = RFAM
+order           = 70
+priority        = 1
+parser          = CoreXrefParser
+
+
+[source miRBase::MULTI]
+# Used by bos_taurus, canis_familiaris, ciona_intestinalis, ciona_savignyi, danio_rerio, dasypus_novemcinctus, erinaceus_europaeus, ficedula_albicollis, gallus_gallus, gasterosteus_aculeatus, homo_sapiens, loxodonta_africana, macaca_mulatta, monodelphis_domestica, mus_musculus, myotis_lucifugus, oryctolagus_cuniculus, oryzias_latipes, pan_troglodytes,pongo_abelii, rattus_norvegicus, ictidomys_tridecemlineatus, takifugu_rubripes, tupaia_belangeri, xenopus_tropicalis, ornithorhynchus_anatinus
+name            = miRBase
+order           = 70
+priority        = 1
+parser          = miRBaseParser
+
+[source miRBase_trans_name]
+# Used homo_sapiens,mus_musculus
+name            = miRBase_trans_name
+order           = 70
+priority        = 1
+parser          = comes via official naming
+
+[source RFAM_trans_name]
+# Used homo_sapiens,mus_musculus
+name            = RFAM_trans_name
+order           = 70
+priority        = 1
+parser          = comes via official naming
+
+[source Uniprot_gn_trans_name]
+# Used by merged species: homo_sapiens,mus_musculus, danio_rerio and sus_scrofa
+name            = Uniprot_gn_trans_name
+order           = 70
+priority        = 1
+parser          = comes via official naming
+
+[source RNAMMER::MULTI]
+# Used by EnsemblGenomes, e.g. aspergillus_clavatus, aspergillus_flavus, aspergillus_fumigatus, aspergillus_nidulans, aspergillus_niger, aspergillus_oryzae, aspergillus_terreus, neosartorya_fischeri
+name            = RNAMMER
+order           = 70
+priority        = 1
+parser          = CoreXrefParser
+
+[source TRNASCAN_SE::MULTI]
+# Used by EnsemblGenomes, e.g. aspergillus_clavatus, aspergillus_flavus, aspergillus_fumigatus, aspergillus_nidulans, aspergillus_niger, aspergillus_oryzae, aspergillus_terreus, neosartorya_fischeri
+name            = TRNASCAN_SE
+order           = 70
+priority        = 1
+parser          = CoreXrefParser
+
+[source ncRNA_EG::EG]
+# Used by EnsemblGenomes, e.g. aspergillus_clavatus, aspergillus_flavus, aspergillus_fumigatus, aspergillus_nidulans, aspergillus_niger, aspergillus_oryzae, aspergillus_terreus, neosartorya_fischeri
+# replaces TRNASCAN; RNAMMER and RFAM::EG
+name            = ncRNA_EG
+order           = 70
+priority        = 1
+parser          = CoreXrefParser
+
+[source misc_EG::EG]
+# Used by EnsemblGenomes to maintain sources of xrefs which don't have a proper parser yet.
+name            = misc_EG
+order           = 70
+priority        = 1
+parser          = EG_DBParser
+
+[source ENA_GENE::MULTI]
+# Used by aspergillus_niger
+name            = ENA_GENE
+order           = 70
+priority        = 1
+
+[source CADRE::MULTI]
+# Used by aspergillus_clavatus, aspergillus_flavus, aspergillus_fumigatus, aspergillus_nidulans, aspergillus_niger, aspergillus_oryzae, aspergillus_terreus, neosartorya_fischeri
+name            = CADRE
+order           = 70
+priority        = 1
+
+[source CADRE_AFum_A1163::MULTI]
+# Used by aspergillus_fumigatusa1163
+name            = CADRE_Afum_A1163
+order           = 70
+priority        = 1
+
+[source AspGD::MULTI]
+# Used by aspergillus_nidulans
+name            = AspGD
+order           = 70
+priority        = 1
+
+[source GeneDB::MULTI]
+# Used by plasmodium_falciparum, trypanosoma_brucei, leishmania_major, schistosoma_mansoni
+name            = GeneDB
+order           = 70
+priority        = 1
+
+[source phatr_jgi_v2::MULTI]
+# Used by Pt
+name            = phatr_jgi_v2
+order           = 70
+priority        = 1
+
+[source phatr_jgi_v2_bd::MULTI]
+# Used by Pt
+name            = phatr_jgi_v2_bd
+order           = 70
+priority        = 1
+
+[source thaps_jgi_v2::MULTI]
+# Used by Tp
+name            = thaps_jgi_v2
+order           = 70
+priority        = 1
+
+[source physo1_jgi_v1.1_gene::MULTI]
+# Used by Pt
+name            = physo1_jgi_v1.1_gene
+order           = 70
+priority        = 1
+
+[source PGD_GENE::MULTI]
+# Used by Tp
+name            = PGD_GENE
+order           = 70
+priority        = 1
+
+[source physo1_jgi_v1.1::MULTI]
+# Used by Pt
+name            = physo1_jgi_v1.1
+order           = 70
+priority        = 1
+
+[source phyra_jgi_v1.1::MULTI]
+# Used by Pt
+name            = phyra_jgi_v1.1
+order           = 70
+priority        = 1
+
+[source BROAD_P_infestans::MULTI]
+# Used by phytophthora_infestans
+name            = BROAD_P_infestans
+order           = 70
+priority        = 1
+
+[source thaps_jgi_v2_bd::MULTI]
+# Used by Tp
+name            = thaps_jgi_v2_bd
+order           = 70
+priority        = 1
+
+
+[source BROAD_U_maydis::MULTI]
+# Used by ustilago_maydis
+name            = BROAD_U_maydis
+order           = 70
+priority        = 1
+
+[source BROAD_F_oxysporum::MULTI]
+# Used by fusarium_oxysporum
+name            = BROAD_F_oxysporum
+order           = 70
+priority        = 1
+
+[source BROAD_g_zeae::MULTI]
+# Used by gibberella_zeae
+name            = BROAD_G_zeae
+order           = 70
+priority        = 1
+
+[source BROAD_G_moniliformis::MULTI]
+# Used by gibberella_moniliformis
+name            = BROAD_G_moniliformis
+order           = 70
+priority        = 1
+
+[source SCHISTODB::MULTI]
+# Used by schistosoma_mansoni
+name            = SCHISTODB
+order           = 70
+priority        = 1
+
+[source triad_jgi_v1.0::MULTI]
+# Used by trichoplax_adhaerens
+name            = triad_jgi_v1.0
+order           = 70
+priority        = 1
+
+[source wormbase::celegans]
+name            = wormbase_all
+order           = 50
+priority        = 1
+parser          = WormbaseDirectParser
+
+[source wormbase::cbriggsae]
+name            = wormbase_all
+order           = 50
+priority        = 1
+parser          = WormbaseDirectParser
+
+[source wormbase::cbrenneri]
+name            = wormbase_all
+order           = 50
+priority        = 1
+parser          = WormbaseDirectParser
+
+[source wormbase::cremanei]
+name            = wormbase_all
+order           = 50
+priority        = 1
+parser          = WormbaseDirectParser
+
+[source wormbase::cjaponica]
+name            = wormbase_all
+order           = 50
+priority        = 1
+parser          = WormbaseDirectParser
+
+[source wormbase::ppacificus]
+name            = wormbase_all
+order           = 50
+priority        = 1
+parser          = WormbaseDirectParser
+
+[source wormbase::sratti]
+name            = wormbase_all
+order           = 50
+priority        = 1
+parser          = WormbaseDirectParser
+
+
+[source wormbase::bmalayi]
+name            = wormbase_all
+order           = 50
+priority        = 1
+parser          = WormbaseDirectParser
+
+[source wormbase::ovolvulus]
+name            = wormbase_all
+order           = 50
+priority        = 1
+parser          = WormbaseDirectParser
+
+[source wormbase::tmuris]
+name            = wormbase_all
+order           = 50
+priority        = 1
+parser          = WormbaseDirectParser
+
+[source wormpep_id::wormbase]
+# Used by wormbase core species
+name            = wormpep_id
+order           = 50
+priority        = 1
+parser          = comes from WormbaseDirectParser
+
+[source wormbase_gene::wormbase]
+# Used by wormbase core species
+name            = wormbase_gene
+order           = 50
+priority        = 1
+parser          = comes from WormbaseDirectParser
+
+[source wormbase_locus::wormbase]
+# Used by wormbase core species
+name            = wormbase_locus
+order           = 50
+priority        = 1
+parser          = comes from WormbaseDirectParser
+
+[source wormbase_gseqname::wormbase]
+# Used by wormbase core species
+name            = wormbase_gseqname
+order           = 50
+priority        = 1
+parser          = comes from WormbaseDirectParser
+
+[source wormbase_transcript::wormbase]
+# Used by wormbase core species
+name            = wormbase_transcript
+order           = 50
+priority        = 1
+parser          = comes from WormbaseDirectParser
+
+[source wormbase_cds::wormbase]
+# Used by wormbase core species
+name            = wormbase_cds
+order           = 50
+priority        = 1
+parser          = comes from WormbaseDirectParser
+
+[source Gramene_Pathway::arabidopsis_thaliana]
+# Used by Arabidopsis thaliana, Gramene-specific
+name            = Gramene_Pathway
+order           = 50
+priority        = 1
+parser          = GramenePathwayParser
+
+[source Gramene_Pathway::brachypodium_distachyon]
+# Used by Brachypodium distachyon, Gramene-specific
+name            = Gramene_Pathway
+order           = 50
+priority        = 1
+parser          = GramenePathwayParser
+
+[source Gramene_Pathway::solanum_lycopersicum]
+# Used by Tomato, Gramene-specific
+name            = Gramene_Pathway
+order           = 50
+priority        = 1
+parser          = GramenePathwayParser
+
+[source Gramene_Pathway::zea_mays]
+# Used by Zea mays, Gramene-specific
+name            = Gramene_Pathway
+order           = 50
+priority        = 1
+parser          = GramenePathwayParser
+
+[source Gramene_Pathway::populus_trichocarpa]
+# Used by Poplar, Gramene-specific
+name            = Gramene_Pathway
+order           = 50
+priority        = 1
+parser          = GramenePathwayParser
+
+[source Gramene_Pathway::solanum_tuberosum]
+# Used by Solanum tuberosum, Gramene-specific
+name            = Gramene_Pathway
+order           = 50
+priority        = 1
+parser          = GramenePathwayParser
+
+[source Gramene_Pathway::oryza_sativa]
+# Used by Oryza sativa, Gramene-specific
+name            = Gramene_Pathway
+order           = 50
+priority        = 1
+parser          = GramenePathwayParser
+
+[source Gramene_Pathway::sorghum_bicolor]
+# Used by Sorghum bicolor, Gramene-specific
+name            = Gramene_Pathway
+order           = 50
+priority        = 1
+parser          = GramenePathwayParser
+
+
+[source PO_GROW::arabidopsis_thaliana]
+# Used by Arabidopsis thaliana, Gramene-specific
+name            = PO
+order           = 85
+priority        = 1
+prio_descr      = main
+dependent_on    = TAIR_TRANSLATION
+parser          = TAIROntologyParser
+
+[source PO_STRU::arabidopsis_thaliana]
+# Used by Arabidopsis thaliana, Gramene-specific
+name            = PO
+order           = 85
+priority        = 1
+prio_descr      = main
+dependent_on    = TAIR_TRANSLATION
+parser          = TAIROntologyParser
+
+[source TAIR_LOCUS::arabidopsis_thaliana]
+# Used by arabidopsis_thaliana
+name            = TAIR_LOCUS
+order           = 1
+priority        = 1
+parser          = TAIRIDParser
+
+[source TAIR_LOCUS_MODEL::arabidopsis_thaliana]
+# Used by arabidopsis_thaliana
+name            = TAIR_LOCUS_MODEL
+order           = 1
+priority        = 1
+parser          = TAIRIDParser
+
+[source TAIR_SYMBOL::arabidopsis_thaliana]
+# Used by arabidopsis_thaliana
+name            = TAIR_SYMBOL
+order           = 1
+priority        = 1
+parser          = TAIRIDParser
+
+[source TAIR_TRANSLATION::arabidopsis_thaliana]
+# Used by arabidopsis_thaliana
+name            = TAIR_TRANSLATION
+order           = 1
+priority        = 1
+
+[source NASC_GENE_ID::arabidopsis_thaliana]
+# Used by arabidopsis_thaliana
+name            = NASC_GENE_ID
+order           = 1
+priority        = 1
+parser          = TAIRIDParser
+
+[source CommunityGO::arabidopsis_thaliana]
+# Used by Arabidopsis thaliana, Gramene-specific
+name            = GO
+order           = 85
+priority        = 1
+prio_descr      = main
+dependent_on    = TAIR_TRANSLATION
+parser          = TAIROntologyParser
+
+
+
+
+########################################################################
+# SPECIES                                                              #
+#                                                                      #
+# Keys:                                                                #
+#   taxonomy_id - taxonomy ID of species/strain                        #
+#                 (multiple comma separated, required)                 #
+#   sources     - sources used for this species/strain                 #
+#                 (multiple comma separated, required)                 #
+#                                                                      #
+########################################################################
+
+########################################################################
+# VERTEBRATES                                                          #
+#                                                                      #
+# Default sources for vertebrates                                      #
+# Additional configuration for species-specific sources                #
+#                                                                      #
+########################################################################
+
+
+[species sars_cov_2]
+taxonomy_id     = 2697049
+sources         = RefSeq_peptide::MULTI,EntrezGene::MULTI,Uniprot/SWISSPROT::MULTI
+
+[species vertebrates]
+taxonomy_id     = 7742
+sources         = EntrezGene::MULTI,Reactome::MULTI,RNACentral::MULTI,RefSeq_dna::MULTI-vertebrate,RefSeq_peptide::MULTI-vertebrate,RefSeq_import::otherfeatures,Uniprot/SPTREMBL::MULTI,Uniprot/SWISSPROT::MULTI,UniParc::MULTI,RFAM::MULTI,miRBase::MULTI,ArrayExpress::MULTI,VGNC::vertebrate
+
+[species homo_sapiens]
+taxonomy_id     = 9606
+sources         = CCDS::homo_sapiens,DBASS3::homo_sapiens,DBASS5::homo_sapiens,HPA::homo_sapiens,HGNC::homo_sapiens#02,MIM::homo_sapiens,MIM2GENE::homo_sapiens,UCSC::homo_sapiens,RefSeq_dna::gencode,RefSeq_peptide::gencode,Uniprot/SPTREMBL::gencode,Uniprot/SWISSPROT::gencode
+
+[species mus_musculus]
+taxonomy_id     = 10090
+sources         = CCDS::mus_musculus,EntrezGene::MULTI,MGI::mus_musculus#01,MGI::mus_musculus#05,UCSC::mus_musculus,RefSeq_dna::gencode,RefSeq_peptide::gencode,Uniprot/SPTREMBL::gencode,Uniprot/SWISSPROT::gencode
+
+[species danio_rerio]
+taxonomy_id     = 7955
+sources         = ZFIN_ID::danio_rerio#01,ZFIN_ID::danio_rerio#03
+
+[species rattus_norvegicus]
+taxonomy_id     = 10116
+sources         = RGD::rattus_norvegicus,RGD::rattus_norvegicus#02
+
+
+
+[species ciona_intestinalis]
+taxonomy_id     = 7719
+sources         = cint_jgi_v1::ciona_intestinalis
+
+[species xenopus_tropicalis]
+taxonomy_id     = 8364
+sources         = Xenopus_Jamboree::xenopus_tropicalis
+
+########################################################################
+# METAZOA                                                              #
+#                                                                      #
+# Default sources for metazoa                                          #
+# Additional configuration for species-specific sources                #
+#                                                                      #
+########################################################################
+
+[species metazoa]
+taxonomy_id     = 33208
+sources         = EntrezGene::MULTI,RefSeq_dna::MULTI-invertebrate,RefSeq_peptide::MULTI-invertebrate,Uniprot/SPTREMBL::MULTI-invertebrate,Uniprot/SWISSPROT::MULTI-invertebrate,UniParc::MULTI,ArrayExpress::EG,ncRNA_EG::EG,RNAMMER::MULTI,miRBase::MULTI,RFAM::EG,TRNASCAN_SE::MULTI,misc_EG::EG
+
+[species drosophila_melanogaster]
+taxonomy_id     = 7227
+sources         = flybase_gene_id::drosophila_melanogaster
+
+[species drosophila_pseudoobscura]
+taxonomy_id     = 46245
+sources         = flybase_gene_id::drosophila_pseudoobscura
+
+[species drosophila_ananassae]
+taxonomy_id     = 7217
+sources         = flybase_gene_id::drosophila_ananassae
+
+[species drosophila_erecta]
+taxonomy_id     = 7220
+sources         = flybase_gene_id::drosophila_erecta
+
+[species drosophila_grimshawi]
+taxonomy_id     = 7222
+sources         = flybase_gene_id::drosophila_grimshawi
+
+[species drosophila_mojavensis]
+taxonomy_id     = 7230
+sources         = flybase_gene_id::drosophila_mojavensis
+
+[species drosophila_persimilis]
+taxonomy_id     = 7234
+sources         = flybase_gene_id::drosophila_persimilis
+
+[species drosophila_sechellia]
+taxonomy_id     = 7238
+sources         = flybase_gene_id::drosophila_sechellia
+
+[species drosophila_simulans]
+taxonomy_id     = 7240
+sources         = flybase_gene_id::drosophila_simulans
+
+[species drosophila_virilis]
+taxonomy_id     = 7244
+sources         = flybase_gene_id::drosophila_virilis
+
+[species drosophila_willistoni]
+taxonomy_id     = 7260
+sources         = flybase_gene_id::drosophila_willistoni
+
+[species drosophila_yakuba]
+taxonomy_id     = 7245
+sources         = flybase_gene_id::drosophila_yakuba
+
+########################################################################
+# FUNGI                                                                #
+#                                                                      #
+# Default sources for fungi                                            #
+# Additional configuration for species-specific sources                #
+#                                                                      #
+########################################################################
+
+[species fungi]
+taxonomy_id     = 4751
+sources         = EntrezGene::MULTI,RefSeq_dna::MULTI-fungi,RefSeq_peptide::MULTI-fungi,Uniprot/SPTREMBL::MULTI,Uniprot/SWISSPROT::MULTI,PHIbase::MULTI,ArrayExpress::EG,RFAM::EG,miRBase::MULTI,RNAMMER::MULTI,TRNASCAN_SE::MULTI,misc_EG::EG
+
+[species saccharomyces_cerevisiae]
+taxonomy_id     = 4932,559292
+sources         = SGD::saccharomyces_cerevisiae
+
+[species schizosaccharomyces_pombe]
+taxonomy_id     = 4896,284812
+sources         = PomBase::schizosaccharomyces_pombe
+
+########################################################################
+# PLANTS                                                               #
+#                                                                      #
+# Default sources for plants                                           #
+# Additional configuration for species-specific sources                #
+#                                                                      #
+########################################################################
+
+[species plants]
+taxonomy_id = 33090
+sources     = EntrezGene::MULTI,Reactome::MULTI,RNACentral::MULTI,RefSeq_dna::MULTI-Plants,RefSeq_import::otherfeatures,Uniprot/SPTREMBL::MULTI,Uniprot/SWISSPROT::MULTI,UniParc::MULTI,RFAM::MULTI,miRBase::MULTI,ArrayExpress::MULTI,ncRNA_EG::EG,misc_EG::EG
+
+[species glycine_max]
+taxonomy_id     = 3847
+sources         = PHYTOZOME_GMAX_GENE::glycine_max
+
+[species solanum_lycopersicum]
+taxonomy_id     = 4081
+sources         = Gramene_Pathway::solanum_lycopersicum
+
+[species solanum_tuberosum]
+taxonomy_id     = 4113
+sources         = PGSC_GENE::solanum_tuberosum,Gramene_Pathway::solanum_tuberosum
+
+[species arabidopsis_thaliana]
+taxonomy_id = 3702
+sources     = Gramene_Pathway::arabidopsis_thaliana,TAIR_LOCUS::arabidopsis_thaliana,CommunityGO::arabidopsis_thaliana,PO_GROW::arabidopsis_thaliana,PO_STRU::arabidopsis_thaliana
+
+[species brachypodium_distachyon]
+taxonomy_id = 15368
+sources     = Gramene_Pathway::brachypodium_distachyon
+
+[species oryza_sativa]
+taxonomy_id = 39947
+sources     = Gramene_Pathway::oryza_sativa
+
+[species populus_trichocarpa]
+taxonomy_id = 3694
+sources     = Gramene_Pathway::populus_trichocarpa
+
+[species sorghum_bicolor]
+taxonomy_id = 4558,91525,171959
+sources     = Gramene_Pathway::sorghum_bicolor
+
+[species zea_mays]
+taxonomy_id = 4577,112001,381124,334825,4579,76912
+sources     = Gramene_Pathway::zea_mays
+
+[species caenorhabditis_elegans]
+taxonomy_id     = 6239
+sources         = wormbase::celegans
+
+[species caenorhabditis_briggsae]
+taxonomy_id     = 6238
+sources         = wormbase::cbriggsae
+
+[species caenorhabditis_remanei]
+taxonomy_id     = 31234
+sources         = wormbase::cremanei
+
+[species caenorhabditis_brenneri]
+taxonomy_id     = 135651
+sources         = wormbase::cbrenneri
+
+[species caenorhabditis_japonica]
+taxonomy_id     = 281687
+sources         = wormbase::cjaponica
+
+[species brugia_malayi]
+taxonomy_id     = 6279
+sources         = wormbase::bmalayi
+
+[species onchocerca_volvulus]
+taxonomy_id     = 6282
+sources         = wormbase::ovolvulus
+
+[species pristionchus_pacificus]
+taxonomy_id     = 54126
+sources         = wormbase::ppacificus
+
+[species strongyloides_ratti]
+taxonomy_id     = 34506
+sources         = wormbase::sratti
+
+[species trichuris_muris]
+taxonomy_id     = 70415
+sources         = wormbase::tmuris
+
+########################################################################
+# PROTISTS                                                             #
+#                                                                      #
+# Default sources for protists                                         #
+# Additional configuration for species-specific sources                #
+#                                                                      #
+########################################################################
+
+[species protist]
+taxonomy_id     = 2759
+sources         = EntrezGene::MULTI,RefSeq_dna::MULTI-complete,RefSeq_peptide::MULTI-complete,Uniprot/SPTREMBL::MULTI,Uniprot/SWISSPROT::MULTI,TRNASCAN_SE::MULTI,RNAMMER::MULTI,ArrayExpress::EG,PHIbase::MULTI,miRBase::MULTI,misc_EG::EG,RFAM::EG
+
diff --git a/src/python/scripts/genome_info.py b/src/python/scripts/genome_info.py
new file mode 100755
index 000000000..48ade41b2
--- /dev/null
+++ b/src/python/scripts/genome_info.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+"""
+Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
+Copyright [2016-2024] EMBL-European Bioinformatics Institute
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+'''
+Fetch Genome Info from the new metadata api  
+'''
+
+import argparse
+import logging
+import sys
+import json
+import configparser 
+from os import getenv
+from os.path import isdir
+from os.path import join, isfile, realpath
+from ensembl.production.metadata.api.genome import GenomeAdaptor
+
+logging.basicConfig(level=logging.INFO, format='%(message)s')
+logger = logging.getLogger(__name__)
+
+def main():  
+  parser = argparse.ArgumentParser(
+       prog='genome_info.py',
+      description='Fetch Ensembl genome info from new metadata API'
+  )
+  parser.add_argument('-g', '--genome_uuid', type=str, nargs='*', required=False, default=None, help='genome UUID, ex: a23663571,b236571')
+  parser.add_argument('-s', '--species',     type=str, nargs='*', required=False, default=None, help='Ensembl species names, ex: homo_sapiens,mus_musculus')
+  parser.add_argument('-d', '--organism_group', type=str, nargs='*', required=False, default=None, help='versioned file name, ex: EnsemblVertbrates,EnsemblPlants')
+  parser.add_argument('-p', '--organism_group_type', type=str, nargs='*', required=False, default=None, help='organism group type, ex: Division')
+  parser.add_argument('-u', '--unreleased_genomes', help='Fetch only unreleased genome and datasets', action='store_true')
+  parser.add_argument('-n', '--dataset_name', type=str, nargs='*', required=False, default=None, help='ensembl dataset type to fetch unique genomes, ex: assembly, genebuild')
+  parser.add_argument('-r', '--dataset_source', type=str, nargs='*', required=False, default=None, help='ensembl dataset source, ex: homo_sapiens_core_111_38')
+  parser.add_argument('-m', '--metadata_db_uri', type=str, required=True,  help='metadata db mysql uri, ex: mysql://ensro@localhost:3366/ensembl_genome_metadata')
+  parser.add_argument('-t', '--taxonomy_db_uri', type=str, required=True,  help='taxonomy db mysql uri, ex: mysql://ensro@localhost:3366/ncbi_taxonomy')
+  parser.add_argument('-o', '--output', type=str, required=True,  help='output file ex: genome_info.json')
+  
+  args = parser.parse_args()
+  
+  print(args)
+  #default values
+  genome_uuid         = args.genome_uuid
+  species             = args.species
+  organism_group      = args.organism_group
+  organism_group_type = args.organism_group_type
+  dataset_name        = args.dataset_name
+  dataset_source      = args.dataset_source
+  
+  
+  #required values
+  unreleased_genomes   = args.unreleased_genomes
+  metadata_db_uri      = args.metadata_db_uri
+  taxonomy_db_uri      = args.taxonomy_db_uri
+  output_file_name     = args.output
+  
+  genome_info_obj = GenomeAdaptor(metadata_uri=metadata_db_uri, taxonomy_uri=taxonomy_db_uri)    
+  with open(output_file_name, 'w') as json_output:       
+    for genome in genome_info_obj.fetch_genomes_info(genome_uuid=genome_uuid,
+                                                     ensembl_name=species,
+                                                     group=organism_group,
+                                                     group_type=organism_group_type,
+                                                     dataset_name=dataset_name,
+                                                     dataset_source=dataset_source,
+                                                     unreleased_genomes=unreleased_genomes) or []:
+      
+      genome_info = {
+                      "genome_id"            : genome[0]['genome'][0].genome_uuid,
+                      "species"              : genome[0]['genome'][1].ensembl_name,
+                      "assembly"             : genome[0]['genome'][2].assembly_default,
+                      "assembly_name"        : genome[0]['genome'][2].ensembl_name,
+                      "assembly_accession"   : genome[0]['genome'][2].accession,
+                      "assembly_level"       : genome[0]['genome'][2].level,
+                      "division"             : genome[0]['genome'][-1].name,
+                      "database"             : genome[0]['datasets'][-1][-1].name,
+                      "database_type"        : genome[0]['datasets'][-1][-1].type
+      }
+      json.dump(genome_info, json_output)
+      json_output.write("\n")
+      
+if __name__ == '__main__':
+  main()
diff --git a/src/python/scripts/run_module.py b/src/python/scripts/run_module.py
new file mode 100644
index 000000000..874f02dd8
--- /dev/null
+++ b/src/python/scripts/run_module.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+
+#  Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
+#  Copyright [2016-2024] EMBL-European Bioinformatics Institute
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import importlib
+
+from ensembl.common.Params import Params
+
+def main():
+  params = Params()
+
+  module_name = params.param_required('module')
+  class_name = module_name.split(".")[-1]
+
+  module = importlib.import_module(module_name)
+  module_class = getattr(module, class_name)
+  module_instance = module_class()
+
+  module_instance.run()
+
+if __name__ == '__main__':
+  main()
diff --git a/travisci/kyotocabinet-perl-1.20.tar.gz b/travisci/kyotocabinet-perl-1.20.tar.gz
new file mode 100644
index 000000000..77200c05b
Binary files /dev/null and b/travisci/kyotocabinet-perl-1.20.tar.gz differ